file_with_bom 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,133 @@
1
+ =begin rdoc
2
+ ==Write and read Unicode-Files with Bom
3
+
4
+ Unicode may include a
5
+ (BOM)[http://en.wikipedia.org/wiki/Byte_order_mark].
6
+ When reading such a file, you must remove the BOM.
7
+ When saving such a file, you must add the BOM at the begin of the file.
8
+
9
+ This gem extends the File class to provide some additional values.
10
+
11
+ ==Ruby >= 1.9.2
12
+
13
+ Ruby has a BOM-supports since Ruby 1.9.2 Revision 28199
14
+
15
+ You may use:
16
+
17
+ File.open('file.txt', "r:bom|utf-8"){|f|
18
+ content = f.read
19
+ }
20
+
21
+ or
22
+
23
+ File.open('file.txt', "r", :encoding => "BOM|UTF-8"){|f|
24
+ content = f.read
25
+ }
26
+
27
+ Details see http://redmine.ruby-lang.org/issues/show/3407 and http://bugs.ruby-lang.org/projects/ruby-trunk/repository/revisions/28199
28
+
29
+ There is no support for w:bom|...
30
+
31
+ ==History
32
+ This gem is based on
33
+ http://stackoverflow.com/questions/9886705/how-to-write-bom-marker-to-a-file-in-ruby/9887927#9887927
34
+
35
+ =end
36
+
37
+
38
+ =begin rdoc
39
+ Extend File with some BOM-Handling.
40
+ =end
41
+ class File
42
+ #BOMs for different encodings.
43
+ BOM_LIST_hex = {
44
+ 'UTF_8' => "\xEF\xBB\xBF", #"\uEFBBBF"
45
+ 'UTF_16BE' => "\xFE\xFF", #"\uFEFF",
46
+ 'UTF_16LE' => "\xFF\xFE",
47
+ 'UTF_32BE' => "\x00\x00\xFE\xFF",
48
+ 'UTF_32LE' => "\xFE\xFF\x00\x00",
49
+ }
50
+ #Define alias names for ruby 1.9
51
+ if defined? Encoding
52
+ BOM_LIST_hex[Encoding::UTF_8] = BOM_LIST_hex['UTF_8']
53
+ BOM_LIST_hex[Encoding::UTF_16BE] = BOM_LIST_hex['UTF_16BE']
54
+ BOM_LIST_hex[Encoding::UTF_16LE] = BOM_LIST_hex['UTF_16LE']
55
+ BOM_LIST_hex[Encoding::UTF_32BE] = BOM_LIST_hex['UTF_32BE']
56
+ BOM_LIST_hex[Encoding::UTF_32LE] = BOM_LIST_hex['UTF_32LE']
57
+ end
58
+
59
+ #~ BOM_LIST_hex.freeze
60
+
61
+ =begin rdoc
62
+ Get BOM for the 'external_encoding'.
63
+
64
+ You may use it like this:
65
+
66
+ File.open(filename, "w:utf-16le"){|f|
67
+ f << f.utf_bom #add the BOM manual
68
+ f << 'some content'
69
+ }
70
+ =end
71
+ def utf_bom_hex(encoding = external_encoding )
72
+ BOM_LIST_hex[encoding].force_encoding(encoding) #ruby 1.9
73
+ end
74
+ #Class options
75
+ class << File
76
+ #Store the old File.open
77
+ alias :open_old :open
78
+ =begin rdoc
79
+ Redefine open to support BOM.
80
+
81
+ This modification allow the usage of encodings like "utf-8-bom".
82
+ This encodings can be used in read- and write-mode.
83
+
84
+ Examples:
85
+
86
+ File.open("file.txt", "w:utf-16le-bom"){|f|
87
+ f << 'some content'
88
+ }
89
+ File.open("file.txt", "w:utf-16le", :bom => true ){|f|
90
+ f << 'some content'
91
+ }
92
+
93
+ ==Remark
94
+ Ruby 1.9.2 supports already BOMs in read mode (e.g. "r:bom|utf-8").
95
+
96
+ The syntactical difference (uft-8-bom instead bom|utf-8) is wanted to separate
97
+ the two logics.
98
+
99
+ * This gem does not support ruby 1.8 (makes no sense, you may store the BOM, but the conntent will not ne unicode).
100
+ * This gem supports also BOMs in write mode.
101
+
102
+ =end
103
+ def open(filename, mode_string = 'r', options = {}, &block)
104
+ #~ puts "! %-10s %-20s %s" % [mode_string, filename, options.inspect] #only for tests
105
+
106
+ #check for bom-flag in mode_string
107
+ options[:bom] = true if mode_string.sub!('-bom','')
108
+
109
+ f = open_old(filename, mode_string, options)
110
+
111
+ if options[:bom]
112
+ case mode_string
113
+ when /\Ar/ #read mode -> remove BOM
114
+ #remove BOM
115
+ bom = f.read(f.utf_bom_hex.bytesize)
116
+ #check, if it was really a bom
117
+ if bom != f.utf_bom_hex
118
+ f.rewind #return to position 0 if BOM was no BOM
119
+ end
120
+ when /\Aw/ #write mode -> attach BOM
121
+ f << f.utf_bom_hex
122
+ end #mode_string
123
+ end
124
+
125
+ if block_given?
126
+ yield f
127
+ f.close
128
+ end
129
+ end
130
+ end #File - class options
131
+
132
+ end #File
133
+
@@ -0,0 +1,81 @@
1
+ #encoding: utf-8
2
+ $:.unshift('../lib')
3
+ require 'file_with_bom'
4
+
5
+ #~ gem 'test-unit'
6
+ require 'test/unit'
7
+
8
+ EXAMPLE_TEXT = 'some content öäü'
9
+ TESTDIR = File.join(File.dirname(File.expand_path(__FILE__)), 'testfiles')
10
+
11
+ #Hash with testfiles
12
+ TESTFILE = {}
13
+ def make_testdata
14
+ Dir.mkdir(TESTDIR) unless File.exist?(TESTDIR)
15
+ %w{utf-8 utf-16le utf-16be utf-32le utf-32be}.each{|encoding|
16
+
17
+ filename = TESTFILE[encoding] = File.join(TESTDIR, "#{encoding}.txt")
18
+ File.open(filename, "w", :encoding => encoding){|f|
19
+ f << EXAMPLE_TEXT
20
+ } unless File.exist?(filename)
21
+
22
+ filename = TESTFILE[encoding+'-bom'] = File.join(TESTDIR, "#{encoding}_bom.txt")
23
+ File.open(filename, "w", :encoding => encoding){|f|
24
+ f << f.utf_bom_hex #add the BOM
25
+ f << EXAMPLE_TEXT
26
+ } unless File.exist?(filename)
27
+ } #encodings
28
+ end #make_testdata
29
+ make_testdata #testdata should be part of the distribution. Call is needed to fill TESTFILE
30
+
31
+
32
+ class BOM_Test < Test::Unit::TestCase
33
+ def setup
34
+ @files_to_delete = []
35
+ end
36
+ def teardown
37
+ @files_to_delete.each{|filename|
38
+ File.delete(filename) if File.exist?(filename)
39
+ }
40
+ end
41
+ def test_utf_bom_hex
42
+
43
+ assert_equal( File::BOM_LIST_hex['UTF_8'], File.new(TESTFILE['utf-8'], 'r:utf-8').utf_bom_hex )
44
+ assert_equal( File::BOM_LIST_hex['UTF_16LE'], File.new(TESTFILE['utf-16le'], 'rb:utf-16le').utf_bom_hex )
45
+ assert_equal( File::BOM_LIST_hex['UTF_16BE'], File.new(TESTFILE['utf-16be'], 'rb:utf-16be').utf_bom_hex )
46
+ assert_equal( File::BOM_LIST_hex['UTF_32LE'], File.new(TESTFILE['utf-32le'], 'rb:utf-32le').utf_bom_hex )
47
+ assert_equal( File::BOM_LIST_hex['UTF_32BE'], File.new(TESTFILE['utf-32be'], 'rb:utf-32be').utf_bom_hex )
48
+
49
+ if defined? Encoding #ruby 1.9
50
+ assert_equal( File::BOM_LIST_hex[Encoding::UTF_8], File.new(TESTFILE['utf-8'], 'r:utf-8').utf_bom_hex )
51
+ assert_equal( File::BOM_LIST_hex[Encoding::UTF_16LE], File.new(TESTFILE['utf-16le'], 'rb:utf-16le').utf_bom_hex )
52
+ assert_equal( File::BOM_LIST_hex[Encoding::UTF_16BE], File.new(TESTFILE['utf-16be'], 'rb:utf-16be').utf_bom_hex )
53
+ assert_equal( File::BOM_LIST_hex[Encoding::UTF_32LE], File.new(TESTFILE['utf-32le'], 'rb:utf-32le').utf_bom_hex )
54
+ assert_equal( File::BOM_LIST_hex[Encoding::UTF_32BE], File.new(TESTFILE['utf-32be'], 'rb:utf-32be').utf_bom_hex )
55
+ end
56
+
57
+ end
58
+ def mk_file_test(encoding)
59
+ @files_to_delete << filename = "testfile1_#{encoding}.txt"
60
+ File.open( filename, "w:#{encoding}"){|f| f << EXAMPLE_TEXT }
61
+ assert_equal( File.read(TESTFILE[encoding]), File.read(filename))
62
+
63
+ #Same test with other interface
64
+ encoding2 = encoding.gsub('-bom','')
65
+ @files_to_delete << filename = "testfile2_#{encoding2}.txt"
66
+ File.open( filename, "w", :encoding => encoding2, :bom => encoding.include?('-bom')){|f| f << EXAMPLE_TEXT }
67
+ assert_equal( File.read(TESTFILE[encoding]), File.read(filename))
68
+ end
69
+
70
+ def test_utf_8; mk_file_test('utf-8'); end
71
+ def test_utf_8_bom; mk_file_test('utf-8-bom'); end
72
+ def test_utf_16le; mk_file_test('utf-16le'); end
73
+ def test_utf_16le_bom; mk_file_test('utf-16le-bom'); end
74
+ def test_utf_16be; mk_file_test('utf-16be'); end
75
+ def test_utf_16be_bom; mk_file_test('utf-16be-bom'); end
76
+ def test_utf_32le; mk_file_test('utf-32le'); end
77
+ def test_utf_32le_bom; mk_file_test('utf-32le-bom'); end
78
+ def test_utf_32be; mk_file_test('utf-32be'); end
79
+ def test_utf_32be_bom; mk_file_test('utf-32be-bom'); end
80
+
81
+ end #class BOM_Test
@@ -0,0 +1 @@
1
+ some content öäü
@@ -0,0 +1 @@
1
+ some content öäü
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: file_with_bom
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Knut Lickert
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-28 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: ! 'Handle BOM for unicode files
15
+
16
+ '
17
+ email: knut@lickert.net
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - lib/file_with_bom.rb
23
+ - unittest/test_file_with_bom.rb
24
+ - unittest/testfiles/utf-8.txt
25
+ - unittest/testfiles/utf-8_bom.txt
26
+ - unittest/testfiles/utf-16be.txt
27
+ - unittest/testfiles/utf-16be_bom.txt
28
+ - unittest/testfiles/utf-16le.txt
29
+ - unittest/testfiles/utf-16le_bom.txt
30
+ - unittest/testfiles/utf-32be.txt
31
+ - unittest/testfiles/utf-32be_bom.txt
32
+ - unittest/testfiles/utf-32le.txt
33
+ - unittest/testfiles/utf-32le_bom.txt
34
+ homepage: http://stackoverflow.com/questions/9886705/how-to-write-bom-marker-to-a-file-in-ruby/9887927#9887927
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --main
39
+ - lib/file_with_bom.rb
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '1.9'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 1.8.11
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: Handle BOM for unicode files
60
+ test_files:
61
+ - unittest/test_file_with_bom.rb
62
+ - unittest/testfiles/utf-8.txt
63
+ - unittest/testfiles/utf-8_bom.txt
64
+ - unittest/testfiles/utf-16be.txt
65
+ - unittest/testfiles/utf-16be_bom.txt
66
+ - unittest/testfiles/utf-16le.txt
67
+ - unittest/testfiles/utf-16le_bom.txt
68
+ - unittest/testfiles/utf-32be.txt
69
+ - unittest/testfiles/utf-32be_bom.txt
70
+ - unittest/testfiles/utf-32le.txt
71
+ - unittest/testfiles/utf-32le_bom.txt