file_with_bom 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/file_with_bom.rb +133 -0
- data/unittest/test_file_with_bom.rb +81 -0
- data/unittest/testfiles/utf-16be.txt +0 -0
- data/unittest/testfiles/utf-16be_bom.txt +0 -0
- data/unittest/testfiles/utf-16le.txt +0 -0
- data/unittest/testfiles/utf-16le_bom.txt +0 -0
- data/unittest/testfiles/utf-32be.txt +0 -0
- data/unittest/testfiles/utf-32be_bom.txt +0 -0
- data/unittest/testfiles/utf-32le.txt +0 -0
- data/unittest/testfiles/utf-32le_bom.txt +0 -0
- data/unittest/testfiles/utf-8.txt +1 -0
- data/unittest/testfiles/utf-8_bom.txt +1 -0
- metadata +71 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
=begin rdoc
|
2
|
+
==Write and read Unicode-Files with Bom
|
3
|
+
|
4
|
+
Unicode may include a
|
5
|
+
(BOM)[http://en.wikipedia.org/wiki/Byte_order_mark].
|
6
|
+
When reading such a file, you must remove the BOM.
|
7
|
+
When saving such a file, you must add the BOM at the begin of the file.
|
8
|
+
|
9
|
+
This gem extends the File class to provide some additional values.
|
10
|
+
|
11
|
+
==Ruby >= 1.9.2
|
12
|
+
|
13
|
+
Ruby has a BOM-supports since Ruby 1.9.2 Revision 28199
|
14
|
+
|
15
|
+
You may use:
|
16
|
+
|
17
|
+
File.open('file.txt', "r:bom|utf-8"){|f|
|
18
|
+
content = f.read
|
19
|
+
}
|
20
|
+
|
21
|
+
or
|
22
|
+
|
23
|
+
File.open('file.txt', "r", :encoding => "BOM|UTF-8"){|f|
|
24
|
+
content = f.read
|
25
|
+
}
|
26
|
+
|
27
|
+
Details see http://redmine.ruby-lang.org/issues/show/3407 and http://bugs.ruby-lang.org/projects/ruby-trunk/repository/revisions/28199
|
28
|
+
|
29
|
+
There is no support for w:bom|...
|
30
|
+
|
31
|
+
==History
|
32
|
+
This gem is based on
|
33
|
+
http://stackoverflow.com/questions/9886705/how-to-write-bom-marker-to-a-file-in-ruby/9887927#9887927
|
34
|
+
|
35
|
+
=end
|
36
|
+
|
37
|
+
|
38
|
+
=begin rdoc
|
39
|
+
Extend File with some BOM-Handling.
|
40
|
+
=end
|
41
|
+
class File
|
42
|
+
#BOMs for different encodings.
|
43
|
+
BOM_LIST_hex = {
|
44
|
+
'UTF_8' => "\xEF\xBB\xBF", #"\uEFBBBF"
|
45
|
+
'UTF_16BE' => "\xFE\xFF", #"\uFEFF",
|
46
|
+
'UTF_16LE' => "\xFF\xFE",
|
47
|
+
'UTF_32BE' => "\x00\x00\xFE\xFF",
|
48
|
+
'UTF_32LE' => "\xFE\xFF\x00\x00",
|
49
|
+
}
|
50
|
+
#Define alias names for ruby 1.9
|
51
|
+
if defined? Encoding
|
52
|
+
BOM_LIST_hex[Encoding::UTF_8] = BOM_LIST_hex['UTF_8']
|
53
|
+
BOM_LIST_hex[Encoding::UTF_16BE] = BOM_LIST_hex['UTF_16BE']
|
54
|
+
BOM_LIST_hex[Encoding::UTF_16LE] = BOM_LIST_hex['UTF_16LE']
|
55
|
+
BOM_LIST_hex[Encoding::UTF_32BE] = BOM_LIST_hex['UTF_32BE']
|
56
|
+
BOM_LIST_hex[Encoding::UTF_32LE] = BOM_LIST_hex['UTF_32LE']
|
57
|
+
end
|
58
|
+
|
59
|
+
#~ BOM_LIST_hex.freeze
|
60
|
+
|
61
|
+
=begin rdoc
|
62
|
+
Get BOM for the 'external_encoding'.
|
63
|
+
|
64
|
+
You may use it like this:
|
65
|
+
|
66
|
+
File.open(filename, "w:utf-16le"){|f|
|
67
|
+
f << f.utf_bom #add the BOM manual
|
68
|
+
f << 'some content'
|
69
|
+
}
|
70
|
+
=end
|
71
|
+
def utf_bom_hex(encoding = external_encoding )
|
72
|
+
BOM_LIST_hex[encoding].force_encoding(encoding) #ruby 1.9
|
73
|
+
end
|
74
|
+
#Class options
|
75
|
+
class << File
|
76
|
+
#Store the old File.open
|
77
|
+
alias :open_old :open
|
78
|
+
=begin rdoc
|
79
|
+
Redefine open to support BOM.
|
80
|
+
|
81
|
+
This modification allow the usage of encodings like "utf-8-bom".
|
82
|
+
This encodings can be used in read- and write-mode.
|
83
|
+
|
84
|
+
Examples:
|
85
|
+
|
86
|
+
File.open("file.txt", "w:utf-16le-bom"){|f|
|
87
|
+
f << 'some content'
|
88
|
+
}
|
89
|
+
File.open("file.txt", "w:utf-16le", :bom => true ){|f|
|
90
|
+
f << 'some content'
|
91
|
+
}
|
92
|
+
|
93
|
+
==Remark
|
94
|
+
Ruby 1.9.2 supports already BOMs in read mode (e.g. "r:bom|utf-8").
|
95
|
+
|
96
|
+
The syntactical difference (uft-8-bom instead bom|utf-8) is wanted to separate
|
97
|
+
the two logics.
|
98
|
+
|
99
|
+
* This gem does not support ruby 1.8 (makes no sense, you may store the BOM, but the conntent will not ne unicode).
|
100
|
+
* This gem supports also BOMs in write mode.
|
101
|
+
|
102
|
+
=end
|
103
|
+
def open(filename, mode_string = 'r', options = {}, &block)
|
104
|
+
#~ puts "! %-10s %-20s %s" % [mode_string, filename, options.inspect] #only for tests
|
105
|
+
|
106
|
+
#check for bom-flag in mode_string
|
107
|
+
options[:bom] = true if mode_string.sub!('-bom','')
|
108
|
+
|
109
|
+
f = open_old(filename, mode_string, options)
|
110
|
+
|
111
|
+
if options[:bom]
|
112
|
+
case mode_string
|
113
|
+
when /\Ar/ #read mode -> remove BOM
|
114
|
+
#remove BOM
|
115
|
+
bom = f.read(f.utf_bom_hex.bytesize)
|
116
|
+
#check, if it was really a bom
|
117
|
+
if bom != f.utf_bom_hex
|
118
|
+
f.rewind #return to position 0 if BOM was no BOM
|
119
|
+
end
|
120
|
+
when /\Aw/ #write mode -> attach BOM
|
121
|
+
f << f.utf_bom_hex
|
122
|
+
end #mode_string
|
123
|
+
end
|
124
|
+
|
125
|
+
if block_given?
|
126
|
+
yield f
|
127
|
+
f.close
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end #File - class options
|
131
|
+
|
132
|
+
end #File
|
133
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
$:.unshift('../lib')
|
3
|
+
require 'file_with_bom'
|
4
|
+
|
5
|
+
#~ gem 'test-unit'
|
6
|
+
require 'test/unit'
|
7
|
+
|
8
|
+
EXAMPLE_TEXT = 'some content öäü'
|
9
|
+
TESTDIR = File.join(File.dirname(File.expand_path(__FILE__)), 'testfiles')
|
10
|
+
|
11
|
+
#Hash with testfiles
|
12
|
+
TESTFILE = {}
|
13
|
+
def make_testdata
|
14
|
+
Dir.mkdir(TESTDIR) unless File.exist?(TESTDIR)
|
15
|
+
%w{utf-8 utf-16le utf-16be utf-32le utf-32be}.each{|encoding|
|
16
|
+
|
17
|
+
filename = TESTFILE[encoding] = File.join(TESTDIR, "#{encoding}.txt")
|
18
|
+
File.open(filename, "w", :encoding => encoding){|f|
|
19
|
+
f << EXAMPLE_TEXT
|
20
|
+
} unless File.exist?(filename)
|
21
|
+
|
22
|
+
filename = TESTFILE[encoding+'-bom'] = File.join(TESTDIR, "#{encoding}_bom.txt")
|
23
|
+
File.open(filename, "w", :encoding => encoding){|f|
|
24
|
+
f << f.utf_bom_hex #add the BOM
|
25
|
+
f << EXAMPLE_TEXT
|
26
|
+
} unless File.exist?(filename)
|
27
|
+
} #encodings
|
28
|
+
end #make_testdata
|
29
|
+
make_testdata #testdata should be part of the distribution. Call is needed to fill TESTFILE
|
30
|
+
|
31
|
+
|
32
|
+
class BOM_Test < Test::Unit::TestCase
|
33
|
+
def setup
|
34
|
+
@files_to_delete = []
|
35
|
+
end
|
36
|
+
def teardown
|
37
|
+
@files_to_delete.each{|filename|
|
38
|
+
File.delete(filename) if File.exist?(filename)
|
39
|
+
}
|
40
|
+
end
|
41
|
+
def test_utf_bom_hex
|
42
|
+
|
43
|
+
assert_equal( File::BOM_LIST_hex['UTF_8'], File.new(TESTFILE['utf-8'], 'r:utf-8').utf_bom_hex )
|
44
|
+
assert_equal( File::BOM_LIST_hex['UTF_16LE'], File.new(TESTFILE['utf-16le'], 'rb:utf-16le').utf_bom_hex )
|
45
|
+
assert_equal( File::BOM_LIST_hex['UTF_16BE'], File.new(TESTFILE['utf-16be'], 'rb:utf-16be').utf_bom_hex )
|
46
|
+
assert_equal( File::BOM_LIST_hex['UTF_32LE'], File.new(TESTFILE['utf-32le'], 'rb:utf-32le').utf_bom_hex )
|
47
|
+
assert_equal( File::BOM_LIST_hex['UTF_32BE'], File.new(TESTFILE['utf-32be'], 'rb:utf-32be').utf_bom_hex )
|
48
|
+
|
49
|
+
if defined? Encoding #ruby 1.9
|
50
|
+
assert_equal( File::BOM_LIST_hex[Encoding::UTF_8], File.new(TESTFILE['utf-8'], 'r:utf-8').utf_bom_hex )
|
51
|
+
assert_equal( File::BOM_LIST_hex[Encoding::UTF_16LE], File.new(TESTFILE['utf-16le'], 'rb:utf-16le').utf_bom_hex )
|
52
|
+
assert_equal( File::BOM_LIST_hex[Encoding::UTF_16BE], File.new(TESTFILE['utf-16be'], 'rb:utf-16be').utf_bom_hex )
|
53
|
+
assert_equal( File::BOM_LIST_hex[Encoding::UTF_32LE], File.new(TESTFILE['utf-32le'], 'rb:utf-32le').utf_bom_hex )
|
54
|
+
assert_equal( File::BOM_LIST_hex[Encoding::UTF_32BE], File.new(TESTFILE['utf-32be'], 'rb:utf-32be').utf_bom_hex )
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
def mk_file_test(encoding)
|
59
|
+
@files_to_delete << filename = "testfile1_#{encoding}.txt"
|
60
|
+
File.open( filename, "w:#{encoding}"){|f| f << EXAMPLE_TEXT }
|
61
|
+
assert_equal( File.read(TESTFILE[encoding]), File.read(filename))
|
62
|
+
|
63
|
+
#Same test with other interface
|
64
|
+
encoding2 = encoding.gsub('-bom','')
|
65
|
+
@files_to_delete << filename = "testfile2_#{encoding2}.txt"
|
66
|
+
File.open( filename, "w", :encoding => encoding2, :bom => encoding.include?('-bom')){|f| f << EXAMPLE_TEXT }
|
67
|
+
assert_equal( File.read(TESTFILE[encoding]), File.read(filename))
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_utf_8; mk_file_test('utf-8'); end
|
71
|
+
def test_utf_8_bom; mk_file_test('utf-8-bom'); end
|
72
|
+
def test_utf_16le; mk_file_test('utf-16le'); end
|
73
|
+
def test_utf_16le_bom; mk_file_test('utf-16le-bom'); end
|
74
|
+
def test_utf_16be; mk_file_test('utf-16be'); end
|
75
|
+
def test_utf_16be_bom; mk_file_test('utf-16be-bom'); end
|
76
|
+
def test_utf_32le; mk_file_test('utf-32le'); end
|
77
|
+
def test_utf_32le_bom; mk_file_test('utf-32le-bom'); end
|
78
|
+
def test_utf_32be; mk_file_test('utf-32be'); end
|
79
|
+
def test_utf_32be_bom; mk_file_test('utf-32be-bom'); end
|
80
|
+
|
81
|
+
end #class BOM_Test
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
some content öäü
|
@@ -0,0 +1 @@
|
|
1
|
+
some content öäü
|
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: file_with_bom
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Knut Lickert
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-28 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: ! 'Handle BOM for unicode files
|
15
|
+
|
16
|
+
'
|
17
|
+
email: knut@lickert.net
|
18
|
+
executables: []
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- lib/file_with_bom.rb
|
23
|
+
- unittest/test_file_with_bom.rb
|
24
|
+
- unittest/testfiles/utf-8.txt
|
25
|
+
- unittest/testfiles/utf-8_bom.txt
|
26
|
+
- unittest/testfiles/utf-16be.txt
|
27
|
+
- unittest/testfiles/utf-16be_bom.txt
|
28
|
+
- unittest/testfiles/utf-16le.txt
|
29
|
+
- unittest/testfiles/utf-16le_bom.txt
|
30
|
+
- unittest/testfiles/utf-32be.txt
|
31
|
+
- unittest/testfiles/utf-32be_bom.txt
|
32
|
+
- unittest/testfiles/utf-32le.txt
|
33
|
+
- unittest/testfiles/utf-32le_bom.txt
|
34
|
+
homepage: http://stackoverflow.com/questions/9886705/how-to-write-bom-marker-to-a-file-in-ruby/9887927#9887927
|
35
|
+
licenses: []
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- --main
|
39
|
+
- lib/file_with_bom.rb
|
40
|
+
require_paths:
|
41
|
+
- lib
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.9'
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
requirements: []
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 1.8.11
|
57
|
+
signing_key:
|
58
|
+
specification_version: 3
|
59
|
+
summary: Handle BOM for unicode files
|
60
|
+
test_files:
|
61
|
+
- unittest/test_file_with_bom.rb
|
62
|
+
- unittest/testfiles/utf-8.txt
|
63
|
+
- unittest/testfiles/utf-8_bom.txt
|
64
|
+
- unittest/testfiles/utf-16be.txt
|
65
|
+
- unittest/testfiles/utf-16be_bom.txt
|
66
|
+
- unittest/testfiles/utf-16le.txt
|
67
|
+
- unittest/testfiles/utf-16le_bom.txt
|
68
|
+
- unittest/testfiles/utf-32be.txt
|
69
|
+
- unittest/testfiles/utf-32be_bom.txt
|
70
|
+
- unittest/testfiles/utf-32le.txt
|
71
|
+
- unittest/testfiles/utf-32le_bom.txt
|