file_with_bom 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,133 @@
1
+ =begin rdoc
2
+ ==Write and read Unicode-Files with Bom
3
+
4
+ Unicode may include a
5
+ (BOM)[http://en.wikipedia.org/wiki/Byte_order_mark].
6
+ When reading such a file, you must remove the BOM.
7
+ When saving such a file, you must add the BOM at the begin of the file.
8
+
9
+ This gem extends the File class to provide some additional values.
10
+
11
+ ==Ruby >= 1.9.2
12
+
13
+ Ruby has a BOM-supports since Ruby 1.9.2 Revision 28199
14
+
15
+ You may use:
16
+
17
+ File.open('file.txt', "r:bom|utf-8"){|f|
18
+ content = f.read
19
+ }
20
+
21
+ or
22
+
23
+ File.open('file.txt', "r", :encoding => "BOM|UTF-8"){|f|
24
+ content = f.read
25
+ }
26
+
27
+ Details see http://redmine.ruby-lang.org/issues/show/3407 and http://bugs.ruby-lang.org/projects/ruby-trunk/repository/revisions/28199
28
+
29
+ There is no support for w:bom|...
30
+
31
+ ==History
32
+ This gem is based on
33
+ http://stackoverflow.com/questions/9886705/how-to-write-bom-marker-to-a-file-in-ruby/9887927#9887927
34
+
35
+ =end
36
+
37
+
38
+ =begin rdoc
39
+ Extend File with some BOM-Handling.
40
+ =end
41
+ class File
42
+ #BOMs for different encodings.
43
+ BOM_LIST_hex = {
44
+ 'UTF_8' => "\xEF\xBB\xBF", #"\uEFBBBF"
45
+ 'UTF_16BE' => "\xFE\xFF", #"\uFEFF",
46
+ 'UTF_16LE' => "\xFF\xFE",
47
+ 'UTF_32BE' => "\x00\x00\xFE\xFF",
48
+ 'UTF_32LE' => "\xFE\xFF\x00\x00",
49
+ }
50
+ #Define alias names for ruby 1.9
51
+ if defined? Encoding
52
+ BOM_LIST_hex[Encoding::UTF_8] = BOM_LIST_hex['UTF_8']
53
+ BOM_LIST_hex[Encoding::UTF_16BE] = BOM_LIST_hex['UTF_16BE']
54
+ BOM_LIST_hex[Encoding::UTF_16LE] = BOM_LIST_hex['UTF_16LE']
55
+ BOM_LIST_hex[Encoding::UTF_32BE] = BOM_LIST_hex['UTF_32BE']
56
+ BOM_LIST_hex[Encoding::UTF_32LE] = BOM_LIST_hex['UTF_32LE']
57
+ end
58
+
59
+ #~ BOM_LIST_hex.freeze
60
+
61
+ =begin rdoc
62
+ Get BOM for the 'external_encoding'.
63
+
64
+ You may use it like this:
65
+
66
+ File.open(filename, "w:utf-16le"){|f|
67
+ f << f.utf_bom #add the BOM manual
68
+ f << 'some content'
69
+ }
70
+ =end
71
+ def utf_bom_hex(encoding = external_encoding )
72
+ BOM_LIST_hex[encoding].force_encoding(encoding) #ruby 1.9
73
+ end
74
+ #Class options
75
+ class << File
76
+ #Store the old File.open
77
+ alias :open_old :open
78
+ =begin rdoc
79
+ Redefine open to support BOM.
80
+
81
+ This modification allow the usage of encodings like "utf-8-bom".
82
+ This encodings can be used in read- and write-mode.
83
+
84
+ Examples:
85
+
86
+ File.open("file.txt", "w:utf-16le-bom"){|f|
87
+ f << 'some content'
88
+ }
89
+ File.open("file.txt", "w:utf-16le", :bom => true ){|f|
90
+ f << 'some content'
91
+ }
92
+
93
+ ==Remark
94
+ Ruby 1.9.2 supports already BOMs in read mode (e.g. "r:bom|utf-8").
95
+
96
+ The syntactical difference (uft-8-bom instead bom|utf-8) is wanted to separate
97
+ the two logics.
98
+
99
+ * This gem does not support ruby 1.8 (makes no sense, you may store the BOM, but the conntent will not ne unicode).
100
+ * This gem supports also BOMs in write mode.
101
+
102
+ =end
103
+ def open(filename, mode_string = 'r', options = {}, &block)
104
+ #~ puts "! %-10s %-20s %s" % [mode_string, filename, options.inspect] #only for tests
105
+
106
+ #check for bom-flag in mode_string
107
+ options[:bom] = true if mode_string.sub!('-bom','')
108
+
109
+ f = open_old(filename, mode_string, options)
110
+
111
+ if options[:bom]
112
+ case mode_string
113
+ when /\Ar/ #read mode -> remove BOM
114
+ #remove BOM
115
+ bom = f.read(f.utf_bom_hex.bytesize)
116
+ #check, if it was really a bom
117
+ if bom != f.utf_bom_hex
118
+ f.rewind #return to position 0 if BOM was no BOM
119
+ end
120
+ when /\Aw/ #write mode -> attach BOM
121
+ f << f.utf_bom_hex
122
+ end #mode_string
123
+ end
124
+
125
+ if block_given?
126
+ yield f
127
+ f.close
128
+ end
129
+ end
130
+ end #File - class options
131
+
132
+ end #File
133
+
@@ -0,0 +1,81 @@
1
+ #encoding: utf-8
2
+ $:.unshift('../lib')
3
+ require 'file_with_bom'
4
+
5
+ #~ gem 'test-unit'
6
+ require 'test/unit'
7
+
8
+ EXAMPLE_TEXT = 'some content öäü'
9
+ TESTDIR = File.join(File.dirname(File.expand_path(__FILE__)), 'testfiles')
10
+
11
+ #Hash with testfiles
12
+ TESTFILE = {}
13
+ def make_testdata
14
+ Dir.mkdir(TESTDIR) unless File.exist?(TESTDIR)
15
+ %w{utf-8 utf-16le utf-16be utf-32le utf-32be}.each{|encoding|
16
+
17
+ filename = TESTFILE[encoding] = File.join(TESTDIR, "#{encoding}.txt")
18
+ File.open(filename, "w", :encoding => encoding){|f|
19
+ f << EXAMPLE_TEXT
20
+ } unless File.exist?(filename)
21
+
22
+ filename = TESTFILE[encoding+'-bom'] = File.join(TESTDIR, "#{encoding}_bom.txt")
23
+ File.open(filename, "w", :encoding => encoding){|f|
24
+ f << f.utf_bom_hex #add the BOM
25
+ f << EXAMPLE_TEXT
26
+ } unless File.exist?(filename)
27
+ } #encodings
28
+ end #make_testdata
29
+ make_testdata #testdata should be part of the distribution. Call is needed to fill TESTFILE
30
+
31
+
32
+ class BOM_Test < Test::Unit::TestCase
33
+ def setup
34
+ @files_to_delete = []
35
+ end
36
+ def teardown
37
+ @files_to_delete.each{|filename|
38
+ File.delete(filename) if File.exist?(filename)
39
+ }
40
+ end
41
+ def test_utf_bom_hex
42
+
43
+ assert_equal( File::BOM_LIST_hex['UTF_8'], File.new(TESTFILE['utf-8'], 'r:utf-8').utf_bom_hex )
44
+ assert_equal( File::BOM_LIST_hex['UTF_16LE'], File.new(TESTFILE['utf-16le'], 'rb:utf-16le').utf_bom_hex )
45
+ assert_equal( File::BOM_LIST_hex['UTF_16BE'], File.new(TESTFILE['utf-16be'], 'rb:utf-16be').utf_bom_hex )
46
+ assert_equal( File::BOM_LIST_hex['UTF_32LE'], File.new(TESTFILE['utf-32le'], 'rb:utf-32le').utf_bom_hex )
47
+ assert_equal( File::BOM_LIST_hex['UTF_32BE'], File.new(TESTFILE['utf-32be'], 'rb:utf-32be').utf_bom_hex )
48
+
49
+ if defined? Encoding #ruby 1.9
50
+ assert_equal( File::BOM_LIST_hex[Encoding::UTF_8], File.new(TESTFILE['utf-8'], 'r:utf-8').utf_bom_hex )
51
+ assert_equal( File::BOM_LIST_hex[Encoding::UTF_16LE], File.new(TESTFILE['utf-16le'], 'rb:utf-16le').utf_bom_hex )
52
+ assert_equal( File::BOM_LIST_hex[Encoding::UTF_16BE], File.new(TESTFILE['utf-16be'], 'rb:utf-16be').utf_bom_hex )
53
+ assert_equal( File::BOM_LIST_hex[Encoding::UTF_32LE], File.new(TESTFILE['utf-32le'], 'rb:utf-32le').utf_bom_hex )
54
+ assert_equal( File::BOM_LIST_hex[Encoding::UTF_32BE], File.new(TESTFILE['utf-32be'], 'rb:utf-32be').utf_bom_hex )
55
+ end
56
+
57
+ end
58
+ def mk_file_test(encoding)
59
+ @files_to_delete << filename = "testfile1_#{encoding}.txt"
60
+ File.open( filename, "w:#{encoding}"){|f| f << EXAMPLE_TEXT }
61
+ assert_equal( File.read(TESTFILE[encoding]), File.read(filename))
62
+
63
+ #Same test with other interface
64
+ encoding2 = encoding.gsub('-bom','')
65
+ @files_to_delete << filename = "testfile2_#{encoding2}.txt"
66
+ File.open( filename, "w", :encoding => encoding2, :bom => encoding.include?('-bom')){|f| f << EXAMPLE_TEXT }
67
+ assert_equal( File.read(TESTFILE[encoding]), File.read(filename))
68
+ end
69
+
70
+ def test_utf_8; mk_file_test('utf-8'); end
71
+ def test_utf_8_bom; mk_file_test('utf-8-bom'); end
72
+ def test_utf_16le; mk_file_test('utf-16le'); end
73
+ def test_utf_16le_bom; mk_file_test('utf-16le-bom'); end
74
+ def test_utf_16be; mk_file_test('utf-16be'); end
75
+ def test_utf_16be_bom; mk_file_test('utf-16be-bom'); end
76
+ def test_utf_32le; mk_file_test('utf-32le'); end
77
+ def test_utf_32le_bom; mk_file_test('utf-32le-bom'); end
78
+ def test_utf_32be; mk_file_test('utf-32be'); end
79
+ def test_utf_32be_bom; mk_file_test('utf-32be-bom'); end
80
+
81
+ end #class BOM_Test
@@ -0,0 +1 @@
1
+ some content öäü
@@ -0,0 +1 @@
1
+ some content öäü
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: file_with_bom
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Knut Lickert
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-28 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: ! 'Handle BOM for unicode files
15
+
16
+ '
17
+ email: knut@lickert.net
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - lib/file_with_bom.rb
23
+ - unittest/test_file_with_bom.rb
24
+ - unittest/testfiles/utf-8.txt
25
+ - unittest/testfiles/utf-8_bom.txt
26
+ - unittest/testfiles/utf-16be.txt
27
+ - unittest/testfiles/utf-16be_bom.txt
28
+ - unittest/testfiles/utf-16le.txt
29
+ - unittest/testfiles/utf-16le_bom.txt
30
+ - unittest/testfiles/utf-32be.txt
31
+ - unittest/testfiles/utf-32be_bom.txt
32
+ - unittest/testfiles/utf-32le.txt
33
+ - unittest/testfiles/utf-32le_bom.txt
34
+ homepage: http://stackoverflow.com/questions/9886705/how-to-write-bom-marker-to-a-file-in-ruby/9887927#9887927
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --main
39
+ - lib/file_with_bom.rb
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '1.9'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 1.8.11
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: Handle BOM for unicode files
60
+ test_files:
61
+ - unittest/test_file_with_bom.rb
62
+ - unittest/testfiles/utf-8.txt
63
+ - unittest/testfiles/utf-8_bom.txt
64
+ - unittest/testfiles/utf-16be.txt
65
+ - unittest/testfiles/utf-16be_bom.txt
66
+ - unittest/testfiles/utf-16le.txt
67
+ - unittest/testfiles/utf-16le_bom.txt
68
+ - unittest/testfiles/utf-32be.txt
69
+ - unittest/testfiles/utf-32be_bom.txt
70
+ - unittest/testfiles/utf-32le.txt
71
+ - unittest/testfiles/utf-32le_bom.txt