file_with_bom 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/file_with_bom.rb +133 -0
- data/unittest/test_file_with_bom.rb +81 -0
- data/unittest/testfiles/utf-16be.txt +0 -0
- data/unittest/testfiles/utf-16be_bom.txt +0 -0
- data/unittest/testfiles/utf-16le.txt +0 -0
- data/unittest/testfiles/utf-16le_bom.txt +0 -0
- data/unittest/testfiles/utf-32be.txt +0 -0
- data/unittest/testfiles/utf-32be_bom.txt +0 -0
- data/unittest/testfiles/utf-32le.txt +0 -0
- data/unittest/testfiles/utf-32le_bom.txt +0 -0
- data/unittest/testfiles/utf-8.txt +1 -0
- data/unittest/testfiles/utf-8_bom.txt +1 -0
- metadata +71 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
=begin rdoc
|
2
|
+
==Write and read Unicode-Files with Bom
|
3
|
+
|
4
|
+
Unicode may include a
|
5
|
+
(BOM)[http://en.wikipedia.org/wiki/Byte_order_mark].
|
6
|
+
When reading such a file, you must remove the BOM.
|
7
|
+
When saving such a file, you must add the BOM at the begin of the file.
|
8
|
+
|
9
|
+
This gem extends the File class to provide some additional values.
|
10
|
+
|
11
|
+
==Ruby >= 1.9.2
|
12
|
+
|
13
|
+
Ruby has a BOM-supports since Ruby 1.9.2 Revision 28199
|
14
|
+
|
15
|
+
You may use:
|
16
|
+
|
17
|
+
File.open('file.txt', "r:bom|utf-8"){|f|
|
18
|
+
content = f.read
|
19
|
+
}
|
20
|
+
|
21
|
+
or
|
22
|
+
|
23
|
+
File.open('file.txt', "r", :encoding => "BOM|UTF-8"){|f|
|
24
|
+
content = f.read
|
25
|
+
}
|
26
|
+
|
27
|
+
Details see http://redmine.ruby-lang.org/issues/show/3407 and http://bugs.ruby-lang.org/projects/ruby-trunk/repository/revisions/28199
|
28
|
+
|
29
|
+
There is no support for w:bom|...
|
30
|
+
|
31
|
+
==History
|
32
|
+
This gem is based on
|
33
|
+
http://stackoverflow.com/questions/9886705/how-to-write-bom-marker-to-a-file-in-ruby/9887927#9887927
|
34
|
+
|
35
|
+
=end
|
36
|
+
|
37
|
+
|
38
|
+
=begin rdoc
|
39
|
+
Extend File with some BOM-Handling.
|
40
|
+
=end
|
41
|
+
class File
|
42
|
+
#BOMs for different encodings.
|
43
|
+
BOM_LIST_hex = {
|
44
|
+
'UTF_8' => "\xEF\xBB\xBF", #"\uEFBBBF"
|
45
|
+
'UTF_16BE' => "\xFE\xFF", #"\uFEFF",
|
46
|
+
'UTF_16LE' => "\xFF\xFE",
|
47
|
+
'UTF_32BE' => "\x00\x00\xFE\xFF",
|
48
|
+
'UTF_32LE' => "\xFE\xFF\x00\x00",
|
49
|
+
}
|
50
|
+
#Define alias names for ruby 1.9
|
51
|
+
if defined? Encoding
|
52
|
+
BOM_LIST_hex[Encoding::UTF_8] = BOM_LIST_hex['UTF_8']
|
53
|
+
BOM_LIST_hex[Encoding::UTF_16BE] = BOM_LIST_hex['UTF_16BE']
|
54
|
+
BOM_LIST_hex[Encoding::UTF_16LE] = BOM_LIST_hex['UTF_16LE']
|
55
|
+
BOM_LIST_hex[Encoding::UTF_32BE] = BOM_LIST_hex['UTF_32BE']
|
56
|
+
BOM_LIST_hex[Encoding::UTF_32LE] = BOM_LIST_hex['UTF_32LE']
|
57
|
+
end
|
58
|
+
|
59
|
+
#~ BOM_LIST_hex.freeze
|
60
|
+
|
61
|
+
=begin rdoc
|
62
|
+
Get BOM for the 'external_encoding'.
|
63
|
+
|
64
|
+
You may use it like this:
|
65
|
+
|
66
|
+
File.open(filename, "w:utf-16le"){|f|
|
67
|
+
f << f.utf_bom #add the BOM manual
|
68
|
+
f << 'some content'
|
69
|
+
}
|
70
|
+
=end
|
71
|
+
def utf_bom_hex(encoding = external_encoding )
|
72
|
+
BOM_LIST_hex[encoding].force_encoding(encoding) #ruby 1.9
|
73
|
+
end
|
74
|
+
#Class options
|
75
|
+
class << File
|
76
|
+
#Store the old File.open
|
77
|
+
alias :open_old :open
|
78
|
+
=begin rdoc
|
79
|
+
Redefine open to support BOM.
|
80
|
+
|
81
|
+
This modification allow the usage of encodings like "utf-8-bom".
|
82
|
+
This encodings can be used in read- and write-mode.
|
83
|
+
|
84
|
+
Examples:
|
85
|
+
|
86
|
+
File.open("file.txt", "w:utf-16le-bom"){|f|
|
87
|
+
f << 'some content'
|
88
|
+
}
|
89
|
+
File.open("file.txt", "w:utf-16le", :bom => true ){|f|
|
90
|
+
f << 'some content'
|
91
|
+
}
|
92
|
+
|
93
|
+
==Remark
|
94
|
+
Ruby 1.9.2 supports already BOMs in read mode (e.g. "r:bom|utf-8").
|
95
|
+
|
96
|
+
The syntactical difference (uft-8-bom instead bom|utf-8) is wanted to separate
|
97
|
+
the two logics.
|
98
|
+
|
99
|
+
* This gem does not support ruby 1.8 (makes no sense, you may store the BOM, but the conntent will not ne unicode).
|
100
|
+
* This gem supports also BOMs in write mode.
|
101
|
+
|
102
|
+
=end
|
103
|
+
def open(filename, mode_string = 'r', options = {}, &block)
|
104
|
+
#~ puts "! %-10s %-20s %s" % [mode_string, filename, options.inspect] #only for tests
|
105
|
+
|
106
|
+
#check for bom-flag in mode_string
|
107
|
+
options[:bom] = true if mode_string.sub!('-bom','')
|
108
|
+
|
109
|
+
f = open_old(filename, mode_string, options)
|
110
|
+
|
111
|
+
if options[:bom]
|
112
|
+
case mode_string
|
113
|
+
when /\Ar/ #read mode -> remove BOM
|
114
|
+
#remove BOM
|
115
|
+
bom = f.read(f.utf_bom_hex.bytesize)
|
116
|
+
#check, if it was really a bom
|
117
|
+
if bom != f.utf_bom_hex
|
118
|
+
f.rewind #return to position 0 if BOM was no BOM
|
119
|
+
end
|
120
|
+
when /\Aw/ #write mode -> attach BOM
|
121
|
+
f << f.utf_bom_hex
|
122
|
+
end #mode_string
|
123
|
+
end
|
124
|
+
|
125
|
+
if block_given?
|
126
|
+
yield f
|
127
|
+
f.close
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end #File - class options
|
131
|
+
|
132
|
+
end #File
|
133
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
$:.unshift('../lib')
|
3
|
+
require 'file_with_bom'
|
4
|
+
|
5
|
+
#~ gem 'test-unit'
|
6
|
+
require 'test/unit'
|
7
|
+
|
8
|
+
EXAMPLE_TEXT = 'some content öäü'
|
9
|
+
TESTDIR = File.join(File.dirname(File.expand_path(__FILE__)), 'testfiles')
|
10
|
+
|
11
|
+
#Hash with testfiles
|
12
|
+
TESTFILE = {}
|
13
|
+
def make_testdata
|
14
|
+
Dir.mkdir(TESTDIR) unless File.exist?(TESTDIR)
|
15
|
+
%w{utf-8 utf-16le utf-16be utf-32le utf-32be}.each{|encoding|
|
16
|
+
|
17
|
+
filename = TESTFILE[encoding] = File.join(TESTDIR, "#{encoding}.txt")
|
18
|
+
File.open(filename, "w", :encoding => encoding){|f|
|
19
|
+
f << EXAMPLE_TEXT
|
20
|
+
} unless File.exist?(filename)
|
21
|
+
|
22
|
+
filename = TESTFILE[encoding+'-bom'] = File.join(TESTDIR, "#{encoding}_bom.txt")
|
23
|
+
File.open(filename, "w", :encoding => encoding){|f|
|
24
|
+
f << f.utf_bom_hex #add the BOM
|
25
|
+
f << EXAMPLE_TEXT
|
26
|
+
} unless File.exist?(filename)
|
27
|
+
} #encodings
|
28
|
+
end #make_testdata
|
29
|
+
make_testdata #testdata should be part of the distribution. Call is needed to fill TESTFILE
|
30
|
+
|
31
|
+
|
32
|
+
class BOM_Test < Test::Unit::TestCase
|
33
|
+
def setup
|
34
|
+
@files_to_delete = []
|
35
|
+
end
|
36
|
+
def teardown
|
37
|
+
@files_to_delete.each{|filename|
|
38
|
+
File.delete(filename) if File.exist?(filename)
|
39
|
+
}
|
40
|
+
end
|
41
|
+
def test_utf_bom_hex
|
42
|
+
|
43
|
+
assert_equal( File::BOM_LIST_hex['UTF_8'], File.new(TESTFILE['utf-8'], 'r:utf-8').utf_bom_hex )
|
44
|
+
assert_equal( File::BOM_LIST_hex['UTF_16LE'], File.new(TESTFILE['utf-16le'], 'rb:utf-16le').utf_bom_hex )
|
45
|
+
assert_equal( File::BOM_LIST_hex['UTF_16BE'], File.new(TESTFILE['utf-16be'], 'rb:utf-16be').utf_bom_hex )
|
46
|
+
assert_equal( File::BOM_LIST_hex['UTF_32LE'], File.new(TESTFILE['utf-32le'], 'rb:utf-32le').utf_bom_hex )
|
47
|
+
assert_equal( File::BOM_LIST_hex['UTF_32BE'], File.new(TESTFILE['utf-32be'], 'rb:utf-32be').utf_bom_hex )
|
48
|
+
|
49
|
+
if defined? Encoding #ruby 1.9
|
50
|
+
assert_equal( File::BOM_LIST_hex[Encoding::UTF_8], File.new(TESTFILE['utf-8'], 'r:utf-8').utf_bom_hex )
|
51
|
+
assert_equal( File::BOM_LIST_hex[Encoding::UTF_16LE], File.new(TESTFILE['utf-16le'], 'rb:utf-16le').utf_bom_hex )
|
52
|
+
assert_equal( File::BOM_LIST_hex[Encoding::UTF_16BE], File.new(TESTFILE['utf-16be'], 'rb:utf-16be').utf_bom_hex )
|
53
|
+
assert_equal( File::BOM_LIST_hex[Encoding::UTF_32LE], File.new(TESTFILE['utf-32le'], 'rb:utf-32le').utf_bom_hex )
|
54
|
+
assert_equal( File::BOM_LIST_hex[Encoding::UTF_32BE], File.new(TESTFILE['utf-32be'], 'rb:utf-32be').utf_bom_hex )
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
def mk_file_test(encoding)
|
59
|
+
@files_to_delete << filename = "testfile1_#{encoding}.txt"
|
60
|
+
File.open( filename, "w:#{encoding}"){|f| f << EXAMPLE_TEXT }
|
61
|
+
assert_equal( File.read(TESTFILE[encoding]), File.read(filename))
|
62
|
+
|
63
|
+
#Same test with other interface
|
64
|
+
encoding2 = encoding.gsub('-bom','')
|
65
|
+
@files_to_delete << filename = "testfile2_#{encoding2}.txt"
|
66
|
+
File.open( filename, "w", :encoding => encoding2, :bom => encoding.include?('-bom')){|f| f << EXAMPLE_TEXT }
|
67
|
+
assert_equal( File.read(TESTFILE[encoding]), File.read(filename))
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_utf_8; mk_file_test('utf-8'); end
|
71
|
+
def test_utf_8_bom; mk_file_test('utf-8-bom'); end
|
72
|
+
def test_utf_16le; mk_file_test('utf-16le'); end
|
73
|
+
def test_utf_16le_bom; mk_file_test('utf-16le-bom'); end
|
74
|
+
def test_utf_16be; mk_file_test('utf-16be'); end
|
75
|
+
def test_utf_16be_bom; mk_file_test('utf-16be-bom'); end
|
76
|
+
def test_utf_32le; mk_file_test('utf-32le'); end
|
77
|
+
def test_utf_32le_bom; mk_file_test('utf-32le-bom'); end
|
78
|
+
def test_utf_32be; mk_file_test('utf-32be'); end
|
79
|
+
def test_utf_32be_bom; mk_file_test('utf-32be-bom'); end
|
80
|
+
|
81
|
+
end #class BOM_Test
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
some content öäü
|
@@ -0,0 +1 @@
|
|
1
|
+
some content öäü
|
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: file_with_bom
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Knut Lickert
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-28 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: ! 'Handle BOM for unicode files
|
15
|
+
|
16
|
+
'
|
17
|
+
email: knut@lickert.net
|
18
|
+
executables: []
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- lib/file_with_bom.rb
|
23
|
+
- unittest/test_file_with_bom.rb
|
24
|
+
- unittest/testfiles/utf-8.txt
|
25
|
+
- unittest/testfiles/utf-8_bom.txt
|
26
|
+
- unittest/testfiles/utf-16be.txt
|
27
|
+
- unittest/testfiles/utf-16be_bom.txt
|
28
|
+
- unittest/testfiles/utf-16le.txt
|
29
|
+
- unittest/testfiles/utf-16le_bom.txt
|
30
|
+
- unittest/testfiles/utf-32be.txt
|
31
|
+
- unittest/testfiles/utf-32be_bom.txt
|
32
|
+
- unittest/testfiles/utf-32le.txt
|
33
|
+
- unittest/testfiles/utf-32le_bom.txt
|
34
|
+
homepage: http://stackoverflow.com/questions/9886705/how-to-write-bom-marker-to-a-file-in-ruby/9887927#9887927
|
35
|
+
licenses: []
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- --main
|
39
|
+
- lib/file_with_bom.rb
|
40
|
+
require_paths:
|
41
|
+
- lib
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.9'
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
requirements: []
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 1.8.11
|
57
|
+
signing_key:
|
58
|
+
specification_version: 3
|
59
|
+
summary: Handle BOM for unicode files
|
60
|
+
test_files:
|
61
|
+
- unittest/test_file_with_bom.rb
|
62
|
+
- unittest/testfiles/utf-8.txt
|
63
|
+
- unittest/testfiles/utf-8_bom.txt
|
64
|
+
- unittest/testfiles/utf-16be.txt
|
65
|
+
- unittest/testfiles/utf-16be_bom.txt
|
66
|
+
- unittest/testfiles/utf-16le.txt
|
67
|
+
- unittest/testfiles/utf-16le_bom.txt
|
68
|
+
- unittest/testfiles/utf-32be.txt
|
69
|
+
- unittest/testfiles/utf-32be_bom.txt
|
70
|
+
- unittest/testfiles/utf-32le.txt
|
71
|
+
- unittest/testfiles/utf-32le_bom.txt
|