cmess 0.1.1.283 → 0.1.2.288
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +7 -0
- data/README +1 -1
- data/Rakefile +1 -1
- data/bin/bconv +19 -25
- data/bin/cinderella +25 -24
- data/bin/decode_entities +10 -10
- data/bin/guess_encoding +30 -38
- data/lib/cmess.rb +3 -0
- data/lib/cmess/bconv.rb +97 -26
- data/lib/cmess/cinderella.rb +2 -0
- data/lib/cmess/cli.rb +56 -17
- data/lib/cmess/guess_encoding/automatic.rb +4 -4
- data/lib/cmess/guess_encoding/manual.rb +1 -1
- data/lib/cmess/version.rb +1 -1
- metadata +4 -4
data/ChangeLog
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
= Revision history for cmess
|
2
2
|
|
3
|
+
== 0.1.2 [2008-09-17]
|
4
|
+
|
5
|
+
* Some refactoring; started to make tools more usable as a library
|
6
|
+
* Make tools accept multiple files for input (via tempfile)
|
7
|
+
* Use ENV.user_encoding from ruby-nuggets
|
8
|
+
* Wrap command execution in a block catching any exceptions
|
9
|
+
|
3
10
|
== 0.1.1 [2008-09-16]
|
4
11
|
|
5
12
|
* Added bconv tool to convert between bibliographic encodings
|
data/README
CHANGED
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ begin
|
|
17
17
|
},
|
18
18
|
:files => FileList['lib/**/*.rb', 'bin/*'].to_a,
|
19
19
|
:extra_files => FileList['[A-Z]*', 'example/**/*', 'data/**/*'].to_a,
|
20
|
-
:dependencies =>
|
20
|
+
:dependencies => [['ruby-nuggets', '>= 0.3.3'], 'htmlentities']
|
21
21
|
}
|
22
22
|
}}
|
23
23
|
rescue LoadError
|
data/bin/bconv
CHANGED
@@ -29,7 +29,6 @@
|
|
29
29
|
###############################################################################
|
30
30
|
#++
|
31
31
|
|
32
|
-
require 'yaml'
|
33
32
|
require 'optparse'
|
34
33
|
|
35
34
|
require 'rubygems'
|
@@ -49,11 +48,11 @@ options = {
|
|
49
48
|
:output => STDOUT,
|
50
49
|
:source_encoding => determine_system_encoding,
|
51
50
|
:target_encoding => determine_system_encoding,
|
52
|
-
:chartab_file =>
|
51
|
+
:chartab_file => CMess::BConv::DEFAULT_CHARTAB_FILE
|
53
52
|
}
|
54
53
|
|
55
54
|
OptionParser.new(nil, 40) { |opts|
|
56
|
-
opts.banner = "Usage: #{$0} [options] [FILE]"
|
55
|
+
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
57
56
|
|
58
57
|
opts.separator ''
|
59
58
|
opts.separator 'Options:'
|
@@ -84,7 +83,7 @@ OptionParser.new(nil, 40) { |opts|
|
|
84
83
|
|
85
84
|
opts.separator ''
|
86
85
|
|
87
|
-
opts.on('-c', '--chartab
|
86
|
+
opts.on('-c', '--chartab YAML_FILE', "File containing character mappings, in YAML format.", "[Default: #{options[:chartab_file]}]") { |c|
|
88
87
|
options[:chartab_file] = c
|
89
88
|
}
|
90
89
|
|
@@ -109,28 +108,23 @@ OptionParser.new(nil, 40) { |opts|
|
|
109
108
|
opts.separator "When FILE is -, either STDIN or STDOUT is used (as appropriate)."
|
110
109
|
}.parse!
|
111
110
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
111
|
+
cli do
|
112
|
+
if options[:list_encodings]
|
113
|
+
puts CMess::BConv.encodings(options[:chartab_file])
|
114
|
+
exit
|
115
|
+
end
|
116
116
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
end
|
117
|
+
[:source_encoding, :target_encoding].each { |key|
|
118
|
+
options[key].call if options[key].respond_to?(:call)
|
119
|
+
}
|
121
120
|
|
122
|
-
|
123
|
-
options[key].call if options[key].respond_to?(:call)
|
124
|
-
}
|
121
|
+
trailing_args_as_input(options)
|
125
122
|
|
126
|
-
|
127
|
-
|
123
|
+
CMess::BConv.convert(
|
124
|
+
options[:input],
|
125
|
+
options[:output],
|
126
|
+
options[:source_encoding],
|
127
|
+
options[:target_encoding],
|
128
|
+
options[:chartab_file]
|
129
|
+
)
|
128
130
|
end
|
129
|
-
|
130
|
-
CMess::BConv.convert(
|
131
|
-
options[:input],
|
132
|
-
options[:output],
|
133
|
-
options[:source_encoding],
|
134
|
-
options[:target_encoding],
|
135
|
-
chartab
|
136
|
-
)
|
data/bin/cinderella
CHANGED
@@ -48,12 +48,12 @@ options = {
|
|
48
48
|
:crop => nil,
|
49
49
|
:source_encoding => nil,
|
50
50
|
:target_encoding => determine_system_encoding,
|
51
|
-
:csets => [
|
51
|
+
:csets => [CMess::Cinderella::DEFAULT_CSETS_DIR],
|
52
52
|
:repair => false
|
53
53
|
}
|
54
54
|
|
55
55
|
OptionParser.new(nil, 40) { |opts|
|
56
|
-
opts.banner = "Usage: #{$0} [options] [FILE]"
|
56
|
+
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
57
57
|
|
58
58
|
opts.separator ''
|
59
59
|
opts.separator 'Options:'
|
@@ -161,29 +161,30 @@ OptionParser.new(nil, 40) { |opts|
|
|
161
161
|
opts.separator "is ignored. When FILE is -, either STDIN or STDOUT is used (as appropriate)."
|
162
162
|
}.parse!
|
163
163
|
|
164
|
-
|
164
|
+
cli do
|
165
|
+
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
165
166
|
|
166
|
-
abort "No source encoding given! (Use the '-e' switch to do so; see '--help' for more information)" \
|
167
|
-
|
167
|
+
abort "No source encoding given! (Use the '-e' switch to do so; see '--help' for more information)" \
|
168
|
+
unless options[:source_encoding]
|
168
169
|
|
169
|
-
yaml_file = "#{options[:target_encoding].downcase}.yaml"
|
170
|
-
char_file = options[:csets].inject(nil) { |path, cset|
|
171
|
-
|
172
|
-
|
173
|
-
}
|
174
|
-
abort "Char file not found for target encoding: #{options[:target_encoding]}" \
|
175
|
-
unless char_file
|
170
|
+
yaml_file = "#{options[:target_encoding].downcase}.yaml"
|
171
|
+
char_file = options[:csets].inject(nil) { |path, cset|
|
172
|
+
path = File.join(cset, yaml_file)
|
173
|
+
break path if File.readable?(path)
|
174
|
+
}
|
176
175
|
|
177
|
-
|
178
|
-
|
179
|
-
end
|
176
|
+
abort "Char file not found for target encoding: #{options[:target_encoding]}" \
|
177
|
+
unless char_file
|
180
178
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
)
|
179
|
+
trailing_args_as_input(options)
|
180
|
+
|
181
|
+
CMess::Cinderella.pick(
|
182
|
+
options[:input],
|
183
|
+
options[:pot],
|
184
|
+
options[:crop],
|
185
|
+
options[:source_encoding],
|
186
|
+
options[:target_encoding],
|
187
|
+
YAML.load_file(char_file),
|
188
|
+
options[:repair]
|
189
|
+
)
|
190
|
+
end
|
data/bin/decode_entities
CHANGED
@@ -48,7 +48,7 @@ options = {
|
|
48
48
|
}
|
49
49
|
|
50
50
|
OptionParser.new { |opts|
|
51
|
-
opts.banner = "Usage: #{$0} [options] [FILE]"
|
51
|
+
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
52
52
|
|
53
53
|
opts.separator ''
|
54
54
|
opts.separator 'Options:'
|
@@ -94,13 +94,13 @@ OptionParser.new { |opts|
|
|
94
94
|
opts.separator "When FILE is -, either STDIN or STDOUT is used (as appropriate)."
|
95
95
|
}.parse!
|
96
96
|
|
97
|
-
|
98
|
-
options
|
99
|
-
end
|
97
|
+
cli do
|
98
|
+
trailing_args_as_input(options)
|
100
99
|
|
101
|
-
CMess::DecodeEntities.decode(
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
)
|
100
|
+
CMess::DecodeEntities.decode(
|
101
|
+
options[:input],
|
102
|
+
options[:output],
|
103
|
+
options[:source_encoding],
|
104
|
+
options[:target_encoding]
|
105
|
+
)
|
106
|
+
end
|
data/bin/guess_encoding
CHANGED
@@ -46,9 +46,6 @@ PROGNAME = File.basename($0)
|
|
46
46
|
# short-cut
|
47
47
|
CGE = CMess::GuessEncoding
|
48
48
|
|
49
|
-
# how to split list of arguments
|
50
|
-
SPLIT_ARG_LIST_RE = /\s*[,\s]\s*/o
|
51
|
-
|
52
49
|
options = {
|
53
50
|
:input => STDIN,
|
54
51
|
:line => 1,
|
@@ -64,7 +61,7 @@ options = {
|
|
64
61
|
}
|
65
62
|
|
66
63
|
OptionParser.new(nil, 40) { |opts|
|
67
|
-
opts.banner = "Usage: #{$0} [options] [FILE]"
|
64
|
+
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
68
65
|
|
69
66
|
opts.separator ''
|
70
67
|
opts.separator 'Options:'
|
@@ -193,39 +190,34 @@ OptionParser.new(nil, 40) { |opts|
|
|
193
190
|
opts.separator "When FILE is -, STDIN is used."
|
194
191
|
}.parse!
|
195
192
|
|
196
|
-
|
197
|
-
options
|
198
|
-
|
193
|
+
cli do
|
194
|
+
trailing_args_as_input(options)
|
195
|
+
|
196
|
+
if options[:manual] || options[:charcodes]
|
197
|
+
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
198
|
+
|
199
|
+
if charcodes = options[:charcodes]
|
200
|
+
base = options[:octal] ? 8 : options[:decimal] ? 10 : 16
|
201
|
+
input = charcodes.map { |c| c.to_i(base).chr }.join
|
202
|
+
else
|
203
|
+
# reset line counter
|
204
|
+
$. = 0
|
205
|
+
|
206
|
+
input = options[:input].each { |line|
|
207
|
+
break line if $. == options[:line]
|
208
|
+
}
|
209
|
+
|
210
|
+
abort "Input was empty!" if $..zero?
|
211
|
+
abort "Line not found -- input has only #{$.} line#{'s' if $. != 1}" unless input.is_a?(String)
|
212
|
+
end
|
199
213
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
abort "Input was empty!" if $..zero?
|
210
|
-
abort "Line not found -- input has only #{$.} line#{'s' if $. != 1}" unless input.is_a?(String)
|
211
|
-
|
212
|
-
CGE::Manual.display(
|
213
|
-
input,
|
214
|
-
options[:target_encoding],
|
215
|
-
options[:encodings],
|
216
|
-
options[:additional_encodings]
|
217
|
-
)
|
218
|
-
elsif charcodes = options[:charcodes]
|
219
|
-
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
220
|
-
|
221
|
-
base = options[:octal] ? 8 : options[:decimal] ? 10 : 16
|
222
|
-
|
223
|
-
CGE::Manual.display(
|
224
|
-
charcodes.map { |c| c.to_i(base).chr }.join,
|
225
|
-
options[:target_encoding],
|
226
|
-
options[:encodings],
|
227
|
-
options[:additional_encodings]
|
228
|
-
)
|
229
|
-
else # automatic
|
230
|
-
puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
|
214
|
+
CGE::Manual.display(
|
215
|
+
input,
|
216
|
+
options[:target_encoding],
|
217
|
+
options[:encodings],
|
218
|
+
options[:additional_encodings]
|
219
|
+
)
|
220
|
+
else # automatic
|
221
|
+
puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
|
222
|
+
end
|
231
223
|
end
|
data/lib/cmess.rb
CHANGED
data/lib/cmess/bconv.rb
CHANGED
@@ -26,67 +26,101 @@
|
|
26
26
|
###############################################################################
|
27
27
|
#++
|
28
28
|
|
29
|
+
require 'yaml'
|
29
30
|
require 'iconv'
|
30
31
|
require 'cmess'
|
31
32
|
|
32
33
|
# Convert between bibliographic (and other) encodings.
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
extend self
|
35
|
+
class CMess::BConv
|
37
36
|
|
38
37
|
# our version ;-)
|
39
|
-
VERSION = '0.0.
|
38
|
+
VERSION = '0.0.2'
|
40
39
|
|
41
40
|
INTERMEDIATE_ENCODING = 'utf-8'
|
42
41
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
DEFAULT_CHARTAB_FILE = File.join(CMess::DATA_DIR, 'chartab.yaml')
|
43
|
+
|
44
|
+
class << self
|
45
|
+
|
46
|
+
def encodings(chartab = DEFAULT_CHARTAB_FILE)
|
47
|
+
chartab = load_chartab(chartab)
|
48
|
+
|
49
|
+
chartab[chartab.keys.first].keys.map { |encoding|
|
50
|
+
encoding.upcase unless encoding =~ /\A__/
|
51
|
+
}.compact.sort
|
52
|
+
end
|
53
|
+
|
54
|
+
def convert(*args)
|
55
|
+
new(*args).convert
|
56
|
+
end
|
57
|
+
|
58
|
+
def load_chartab(chartab)
|
59
|
+
case chartab
|
60
|
+
when Hash
|
61
|
+
chartab
|
62
|
+
when String
|
63
|
+
raise "chartab file not found: #{chartab}" unless File.readable?(chartab)
|
64
|
+
YAML.load_file(chartab)
|
65
|
+
else
|
66
|
+
raise ArgumentError, "invalid chartab of type #{chartab.class}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
47
70
|
end
|
48
71
|
|
49
|
-
|
50
|
-
source_encoding.upcase!
|
51
|
-
target_encoding.upcase!
|
72
|
+
attr_reader :input, :output, :source_encoding, :target_encoding, :chartab, :encodings
|
52
73
|
|
53
|
-
|
74
|
+
def initialize(input, output, source_encoding, target_encoding, chartab = DEFAULT_CHARTAB_FILE)
|
75
|
+
@input, @output = input, output
|
54
76
|
|
55
|
-
|
56
|
-
|
57
|
-
|
77
|
+
@source_encoding = source_encoding.upcase
|
78
|
+
@target_encoding = target_encoding.upcase
|
79
|
+
|
80
|
+
@chartab = self.class.load_chartab(chartab)
|
81
|
+
@encodings = self.class.encodings(@chartab)
|
82
|
+
end
|
83
|
+
|
84
|
+
def encoding?(encoding)
|
85
|
+
encodings.include?(encoding)
|
86
|
+
end
|
87
|
+
|
88
|
+
def convert
|
89
|
+
if encoding?(source_encoding)
|
90
|
+
if encoding?(target_encoding)
|
91
|
+
@charmap = chartab.inject({}) { |hash, (code, map)|
|
58
92
|
hash.update(map[source_encoding] => map[target_encoding].pack('U*'))
|
59
93
|
}
|
60
94
|
|
61
|
-
input.each_byte { |
|
62
|
-
output.print
|
95
|
+
input.each_byte { |char|
|
96
|
+
output.print map(char)
|
63
97
|
}
|
64
98
|
else
|
65
|
-
iconv =
|
99
|
+
iconv = iconv_to
|
66
100
|
|
67
|
-
charmap = chartab.inject({}) { |hash, (code, map)|
|
101
|
+
@charmap = chartab.inject({}) { |hash, (code, map)|
|
68
102
|
hash.update(map[source_encoding] => [code.to_i(16)].pack('U*'))
|
69
103
|
}
|
70
104
|
|
71
|
-
input.each_byte { |
|
72
|
-
output.print iconv.iconv(
|
105
|
+
input.each_byte { |char|
|
106
|
+
output.print iconv.iconv(map(char))
|
73
107
|
}
|
74
108
|
end
|
75
109
|
else
|
76
|
-
if
|
77
|
-
iconv =
|
110
|
+
if encoding?(target_encoding)
|
111
|
+
iconv = iconv_from
|
78
112
|
|
79
113
|
charmap = chartab.inject({}) { |hash, (code, map)|
|
80
114
|
hash.update(code.to_i(16) => map[target_encoding].pack('U*'))
|
81
115
|
}
|
82
116
|
|
83
117
|
input.each { |line|
|
84
|
-
iconv.iconv(line).unpack('U*').each { |
|
85
|
-
output.print charmap[
|
118
|
+
iconv.iconv(line).unpack('U*').each { |char|
|
119
|
+
output.print charmap[char]
|
86
120
|
}
|
87
121
|
}
|
88
122
|
else
|
89
|
-
iconv =
|
123
|
+
iconv = iconv_from_to
|
90
124
|
|
91
125
|
input.each { |line|
|
92
126
|
output.puts iconv.iconv(line)
|
@@ -95,4 +129,41 @@ module CMess::BConv
|
|
95
129
|
end
|
96
130
|
end
|
97
131
|
|
132
|
+
private
|
133
|
+
|
134
|
+
def iconv_from_to(from = source_encoding, to = target_encoding)
|
135
|
+
iconv = begin
|
136
|
+
Iconv.new(to, from)
|
137
|
+
rescue Iconv::InvalidEncoding
|
138
|
+
raise ArgumentError, "invalid encoding: source encoding = #{from}, target encoding = #{to}"
|
139
|
+
end
|
140
|
+
|
141
|
+
def iconv.iconv(*args)
|
142
|
+
super
|
143
|
+
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => err
|
144
|
+
warn "ILLEGAL INPUT SEQUENCE: #{err}"; ''
|
145
|
+
end
|
146
|
+
|
147
|
+
iconv
|
148
|
+
end
|
149
|
+
|
150
|
+
def iconv_from(from = source_encoding)
|
151
|
+
iconv_from_to(from, INTERMEDIATE_ENCODING)
|
152
|
+
end
|
153
|
+
|
154
|
+
def iconv_to(to = target_encoding)
|
155
|
+
iconv_from_to(INTERMEDIATE_ENCODING, to)
|
156
|
+
end
|
157
|
+
|
158
|
+
def map(char, charmap = @charmap)
|
159
|
+
unless map = charmap[[char]]
|
160
|
+
unless map = charmap[[char, c = input.getc]]
|
161
|
+
input.ungetc(c) if c
|
162
|
+
map = ''
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
map
|
167
|
+
end
|
168
|
+
|
98
169
|
end
|
data/lib/cmess/cinderella.rb
CHANGED
@@ -43,6 +43,8 @@ module CMess::Cinderella
|
|
43
43
|
# our version ;-)
|
44
44
|
VERSION = '0.0.3'
|
45
45
|
|
46
|
+
DEFAULT_CSETS_DIR = File.join(CMess::DATA_DIR, 'csets')
|
47
|
+
|
46
48
|
def pick(input, pot, crop, source_encoding, target_encoding, chars, repair = false)
|
47
49
|
iconv = Iconv.new(target_encoding, source_encoding)
|
48
50
|
|
data/lib/cmess/cli.rb
CHANGED
@@ -26,9 +26,15 @@
|
|
26
26
|
###############################################################################
|
27
27
|
#++
|
28
28
|
|
29
|
+
require 'tempfile'
|
30
|
+
|
31
|
+
require 'rubygems'
|
32
|
+
require 'nuggets/env/user_encoding'
|
33
|
+
|
29
34
|
module CMess::CLI
|
30
35
|
|
31
|
-
|
36
|
+
# how to split list of arguments
|
37
|
+
SPLIT_ARG_LIST_RE = /\s*[,\s]\s*/o
|
32
38
|
|
33
39
|
def ensure_readable(file)
|
34
40
|
abort "Can't find input file: #{file}" unless File.readable?(file)
|
@@ -39,8 +45,7 @@ module CMess::CLI
|
|
39
45
|
end
|
40
46
|
|
41
47
|
def open_file_in_place(file)
|
42
|
-
|
43
|
-
[File.readlines(file), File.open(file, 'w')]
|
48
|
+
[open_temporary_input(file), File.open(file, 'w')]
|
44
49
|
end
|
45
50
|
|
46
51
|
def open_file_or_std(file, mode = 'r')
|
@@ -57,25 +62,59 @@ module CMess::CLI
|
|
57
62
|
end
|
58
63
|
end
|
59
64
|
|
60
|
-
def
|
61
|
-
|
62
|
-
ENV['LANG'][/\.(.*)/, 1] ||
|
63
|
-
system_encoding_not_found
|
64
|
-
end
|
65
|
+
def open_temporary_input(*files)
|
66
|
+
temp = Tempfile.new('cmess_cli')
|
65
67
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
68
|
+
files.each { |file|
|
69
|
+
if file == '-'
|
70
|
+
STDIN.each { |line| temp << line }
|
71
|
+
else
|
72
|
+
ensure_readable(file)
|
73
|
+
File.open(file) { |f| f.each { |line| temp << line } }
|
74
|
+
end
|
72
75
|
}
|
73
76
|
|
74
|
-
|
75
|
-
|
77
|
+
# return File, instead of Tempfile
|
78
|
+
temp.close
|
79
|
+
temp.open
|
80
|
+
end
|
81
|
+
|
82
|
+
def trailing_args_as_input(options)
|
83
|
+
unless ARGV.empty? || options[:input_set]
|
84
|
+
options[:input] = if ARGV.size == 1
|
85
|
+
open_file_or_std(ARGV.first)
|
86
|
+
else
|
87
|
+
open_temporary_input(*ARGV)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def determine_system_encoding
|
93
|
+
ENV.user_encoding || begin
|
94
|
+
dummy = lambda {
|
95
|
+
abort <<-EOT
|
96
|
+
Your system's encoding couldn't be determined automatically -- please specify
|
97
|
+
it explicitly via the ENCODING environment variable or via the '-t' option.
|
98
|
+
EOT
|
99
|
+
}
|
100
|
+
|
101
|
+
def dummy.to_s; 'NOT FOUND' end
|
102
|
+
|
103
|
+
dummy
|
76
104
|
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def cli
|
108
|
+
yield
|
109
|
+
rescue => err
|
110
|
+
if $VERBOSE
|
111
|
+
backtrace = err.backtrace
|
112
|
+
fromtrace = backtrace[1..-1].map { |i| "\n from #{i}" }
|
77
113
|
|
78
|
-
|
114
|
+
abort "#{backtrace.first} #{err} (#{err.class})#{fromtrace}"
|
115
|
+
else
|
116
|
+
abort "#{err.to_s.capitalize} [#{err.backtrace.first}]"
|
117
|
+
end
|
79
118
|
end
|
80
119
|
|
81
120
|
end
|
@@ -273,11 +273,11 @@ class CMess::GuessEncoding::Automatic
|
|
273
273
|
# UTF-8, if number of escape-bytes and following bytes
|
274
274
|
# is matching (cf. http://en.wikipedia.org/wiki/UTF-8)
|
275
275
|
encoding UTF_8 do
|
276
|
-
esc_bytes = byte_count_sum(0xc0..0xdf)
|
276
|
+
esc_bytes = byte_count_sum(0xc0..0xdf) +
|
277
277
|
# => 110xxxxx 10xxxxxx
|
278
|
-
|
278
|
+
byte_count_sum(0xe0..0xef) * 2 +
|
279
279
|
# => 1110xxxx 10xxxxxx 10xxxxxx
|
280
|
-
|
280
|
+
byte_count_sum(0xf0..0xf7) * 3
|
281
281
|
# => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
282
282
|
fol_bytes = byte_count_sum(0x80..0xbf)
|
283
283
|
# => 10xxxxxx
|
@@ -287,7 +287,7 @@ class CMess::GuessEncoding::Automatic
|
|
287
287
|
|
288
288
|
# Analyse statistical appearance of German umlauts and other accented
|
289
289
|
# letters (see TEST_CHARS)
|
290
|
-
encodings
|
290
|
+
encodings(*TEST_ENCODINGS) do
|
291
291
|
ratios = {}
|
292
292
|
|
293
293
|
TEST_ENCODINGS.find(lambda {
|
@@ -95,7 +95,7 @@ module CMess::GuessEncoding::Manual
|
|
95
95
|
"ILLEGAL INPUT SEQUENCE: #{err}"
|
96
96
|
rescue Iconv::InvalidEncoding
|
97
97
|
if encoding == target
|
98
|
-
|
98
|
+
raise ArgumentError, "invalid encoding: #{encoding}"
|
99
99
|
else
|
100
100
|
"INVALID ENCODING!"
|
101
101
|
end
|
data/lib/cmess/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2.288
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-09-
|
12
|
+
date: 2008-09-17 00:00:00 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: 0.3.3
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: htmlentities
|
@@ -102,6 +102,7 @@ has_rdoc: true
|
|
102
102
|
homepage: http://prometheus.rubyforge.org/cmess
|
103
103
|
post_install_message:
|
104
104
|
rdoc_options:
|
105
|
+
- --line-numbers
|
105
106
|
- --inline-source
|
106
107
|
- --title
|
107
108
|
- cmess Application documentation
|
@@ -110,7 +111,6 @@ rdoc_options:
|
|
110
111
|
- --main
|
111
112
|
- README
|
112
113
|
- --all
|
113
|
-
- --line-numbers
|
114
114
|
require_paths:
|
115
115
|
- lib
|
116
116
|
required_ruby_version: !ruby/object:Gem::Requirement
|