cmess 0.1.1.283 → 0.1.2.288
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +7 -0
- data/README +1 -1
- data/Rakefile +1 -1
- data/bin/bconv +19 -25
- data/bin/cinderella +25 -24
- data/bin/decode_entities +10 -10
- data/bin/guess_encoding +30 -38
- data/lib/cmess.rb +3 -0
- data/lib/cmess/bconv.rb +97 -26
- data/lib/cmess/cinderella.rb +2 -0
- data/lib/cmess/cli.rb +56 -17
- data/lib/cmess/guess_encoding/automatic.rb +4 -4
- data/lib/cmess/guess_encoding/manual.rb +1 -1
- data/lib/cmess/version.rb +1 -1
- metadata +4 -4
data/ChangeLog
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
= Revision history for cmess
|
2
2
|
|
3
|
+
== 0.1.2 [2008-09-17]
|
4
|
+
|
5
|
+
* Some refactoring; started to make tools more usable as a library
|
6
|
+
* Make tools accept multiple files for input (via tempfile)
|
7
|
+
* Use ENV.user_encoding from ruby-nuggets
|
8
|
+
* Wrap command execution in a block catching any exceptions
|
9
|
+
|
3
10
|
== 0.1.1 [2008-09-16]
|
4
11
|
|
5
12
|
* Added bconv tool to convert between bibliographic encodings
|
data/README
CHANGED
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ begin
|
|
17
17
|
},
|
18
18
|
:files => FileList['lib/**/*.rb', 'bin/*'].to_a,
|
19
19
|
:extra_files => FileList['[A-Z]*', 'example/**/*', 'data/**/*'].to_a,
|
20
|
-
:dependencies =>
|
20
|
+
:dependencies => [['ruby-nuggets', '>= 0.3.3'], 'htmlentities']
|
21
21
|
}
|
22
22
|
}}
|
23
23
|
rescue LoadError
|
data/bin/bconv
CHANGED
@@ -29,7 +29,6 @@
|
|
29
29
|
###############################################################################
|
30
30
|
#++
|
31
31
|
|
32
|
-
require 'yaml'
|
33
32
|
require 'optparse'
|
34
33
|
|
35
34
|
require 'rubygems'
|
@@ -49,11 +48,11 @@ options = {
|
|
49
48
|
:output => STDOUT,
|
50
49
|
:source_encoding => determine_system_encoding,
|
51
50
|
:target_encoding => determine_system_encoding,
|
52
|
-
:chartab_file =>
|
51
|
+
:chartab_file => CMess::BConv::DEFAULT_CHARTAB_FILE
|
53
52
|
}
|
54
53
|
|
55
54
|
OptionParser.new(nil, 40) { |opts|
|
56
|
-
opts.banner = "Usage: #{$0} [options] [FILE]"
|
55
|
+
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
57
56
|
|
58
57
|
opts.separator ''
|
59
58
|
opts.separator 'Options:'
|
@@ -84,7 +83,7 @@ OptionParser.new(nil, 40) { |opts|
|
|
84
83
|
|
85
84
|
opts.separator ''
|
86
85
|
|
87
|
-
opts.on('-c', '--chartab
|
86
|
+
opts.on('-c', '--chartab YAML_FILE', "File containing character mappings, in YAML format.", "[Default: #{options[:chartab_file]}]") { |c|
|
88
87
|
options[:chartab_file] = c
|
89
88
|
}
|
90
89
|
|
@@ -109,28 +108,23 @@ OptionParser.new(nil, 40) { |opts|
|
|
109
108
|
opts.separator "When FILE is -, either STDIN or STDOUT is used (as appropriate)."
|
110
109
|
}.parse!
|
111
110
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
111
|
+
cli do
|
112
|
+
if options[:list_encodings]
|
113
|
+
puts CMess::BConv.encodings(options[:chartab_file])
|
114
|
+
exit
|
115
|
+
end
|
116
116
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
end
|
117
|
+
[:source_encoding, :target_encoding].each { |key|
|
118
|
+
options[key].call if options[key].respond_to?(:call)
|
119
|
+
}
|
121
120
|
|
122
|
-
|
123
|
-
options[key].call if options[key].respond_to?(:call)
|
124
|
-
}
|
121
|
+
trailing_args_as_input(options)
|
125
122
|
|
126
|
-
|
127
|
-
|
123
|
+
CMess::BConv.convert(
|
124
|
+
options[:input],
|
125
|
+
options[:output],
|
126
|
+
options[:source_encoding],
|
127
|
+
options[:target_encoding],
|
128
|
+
options[:chartab_file]
|
129
|
+
)
|
128
130
|
end
|
129
|
-
|
130
|
-
CMess::BConv.convert(
|
131
|
-
options[:input],
|
132
|
-
options[:output],
|
133
|
-
options[:source_encoding],
|
134
|
-
options[:target_encoding],
|
135
|
-
chartab
|
136
|
-
)
|
data/bin/cinderella
CHANGED
@@ -48,12 +48,12 @@ options = {
|
|
48
48
|
:crop => nil,
|
49
49
|
:source_encoding => nil,
|
50
50
|
:target_encoding => determine_system_encoding,
|
51
|
-
:csets => [
|
51
|
+
:csets => [CMess::Cinderella::DEFAULT_CSETS_DIR],
|
52
52
|
:repair => false
|
53
53
|
}
|
54
54
|
|
55
55
|
OptionParser.new(nil, 40) { |opts|
|
56
|
-
opts.banner = "Usage: #{$0} [options] [FILE]"
|
56
|
+
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
57
57
|
|
58
58
|
opts.separator ''
|
59
59
|
opts.separator 'Options:'
|
@@ -161,29 +161,30 @@ OptionParser.new(nil, 40) { |opts|
|
|
161
161
|
opts.separator "is ignored. When FILE is -, either STDIN or STDOUT is used (as appropriate)."
|
162
162
|
}.parse!
|
163
163
|
|
164
|
-
|
164
|
+
cli do
|
165
|
+
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
165
166
|
|
166
|
-
abort "No source encoding given! (Use the '-e' switch to do so; see '--help' for more information)" \
|
167
|
-
|
167
|
+
abort "No source encoding given! (Use the '-e' switch to do so; see '--help' for more information)" \
|
168
|
+
unless options[:source_encoding]
|
168
169
|
|
169
|
-
yaml_file = "#{options[:target_encoding].downcase}.yaml"
|
170
|
-
char_file = options[:csets].inject(nil) { |path, cset|
|
171
|
-
|
172
|
-
|
173
|
-
}
|
174
|
-
abort "Char file not found for target encoding: #{options[:target_encoding]}" \
|
175
|
-
unless char_file
|
170
|
+
yaml_file = "#{options[:target_encoding].downcase}.yaml"
|
171
|
+
char_file = options[:csets].inject(nil) { |path, cset|
|
172
|
+
path = File.join(cset, yaml_file)
|
173
|
+
break path if File.readable?(path)
|
174
|
+
}
|
176
175
|
|
177
|
-
|
178
|
-
|
179
|
-
end
|
176
|
+
abort "Char file not found for target encoding: #{options[:target_encoding]}" \
|
177
|
+
unless char_file
|
180
178
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
)
|
179
|
+
trailing_args_as_input(options)
|
180
|
+
|
181
|
+
CMess::Cinderella.pick(
|
182
|
+
options[:input],
|
183
|
+
options[:pot],
|
184
|
+
options[:crop],
|
185
|
+
options[:source_encoding],
|
186
|
+
options[:target_encoding],
|
187
|
+
YAML.load_file(char_file),
|
188
|
+
options[:repair]
|
189
|
+
)
|
190
|
+
end
|
data/bin/decode_entities
CHANGED
@@ -48,7 +48,7 @@ options = {
|
|
48
48
|
}
|
49
49
|
|
50
50
|
OptionParser.new { |opts|
|
51
|
-
opts.banner = "Usage: #{$0} [options] [FILE]"
|
51
|
+
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
52
52
|
|
53
53
|
opts.separator ''
|
54
54
|
opts.separator 'Options:'
|
@@ -94,13 +94,13 @@ OptionParser.new { |opts|
|
|
94
94
|
opts.separator "When FILE is -, either STDIN or STDOUT is used (as appropriate)."
|
95
95
|
}.parse!
|
96
96
|
|
97
|
-
|
98
|
-
options
|
99
|
-
end
|
97
|
+
cli do
|
98
|
+
trailing_args_as_input(options)
|
100
99
|
|
101
|
-
CMess::DecodeEntities.decode(
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
)
|
100
|
+
CMess::DecodeEntities.decode(
|
101
|
+
options[:input],
|
102
|
+
options[:output],
|
103
|
+
options[:source_encoding],
|
104
|
+
options[:target_encoding]
|
105
|
+
)
|
106
|
+
end
|
data/bin/guess_encoding
CHANGED
@@ -46,9 +46,6 @@ PROGNAME = File.basename($0)
|
|
46
46
|
# short-cut
|
47
47
|
CGE = CMess::GuessEncoding
|
48
48
|
|
49
|
-
# how to split list of arguments
|
50
|
-
SPLIT_ARG_LIST_RE = /\s*[,\s]\s*/o
|
51
|
-
|
52
49
|
options = {
|
53
50
|
:input => STDIN,
|
54
51
|
:line => 1,
|
@@ -64,7 +61,7 @@ options = {
|
|
64
61
|
}
|
65
62
|
|
66
63
|
OptionParser.new(nil, 40) { |opts|
|
67
|
-
opts.banner = "Usage: #{$0} [options] [FILE]"
|
64
|
+
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
68
65
|
|
69
66
|
opts.separator ''
|
70
67
|
opts.separator 'Options:'
|
@@ -193,39 +190,34 @@ OptionParser.new(nil, 40) { |opts|
|
|
193
190
|
opts.separator "When FILE is -, STDIN is used."
|
194
191
|
}.parse!
|
195
192
|
|
196
|
-
|
197
|
-
options
|
198
|
-
|
193
|
+
cli do
|
194
|
+
trailing_args_as_input(options)
|
195
|
+
|
196
|
+
if options[:manual] || options[:charcodes]
|
197
|
+
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
198
|
+
|
199
|
+
if charcodes = options[:charcodes]
|
200
|
+
base = options[:octal] ? 8 : options[:decimal] ? 10 : 16
|
201
|
+
input = charcodes.map { |c| c.to_i(base).chr }.join
|
202
|
+
else
|
203
|
+
# reset line counter
|
204
|
+
$. = 0
|
205
|
+
|
206
|
+
input = options[:input].each { |line|
|
207
|
+
break line if $. == options[:line]
|
208
|
+
}
|
209
|
+
|
210
|
+
abort "Input was empty!" if $..zero?
|
211
|
+
abort "Line not found -- input has only #{$.} line#{'s' if $. != 1}" unless input.is_a?(String)
|
212
|
+
end
|
199
213
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
abort "Input was empty!" if $..zero?
|
210
|
-
abort "Line not found -- input has only #{$.} line#{'s' if $. != 1}" unless input.is_a?(String)
|
211
|
-
|
212
|
-
CGE::Manual.display(
|
213
|
-
input,
|
214
|
-
options[:target_encoding],
|
215
|
-
options[:encodings],
|
216
|
-
options[:additional_encodings]
|
217
|
-
)
|
218
|
-
elsif charcodes = options[:charcodes]
|
219
|
-
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
220
|
-
|
221
|
-
base = options[:octal] ? 8 : options[:decimal] ? 10 : 16
|
222
|
-
|
223
|
-
CGE::Manual.display(
|
224
|
-
charcodes.map { |c| c.to_i(base).chr }.join,
|
225
|
-
options[:target_encoding],
|
226
|
-
options[:encodings],
|
227
|
-
options[:additional_encodings]
|
228
|
-
)
|
229
|
-
else # automatic
|
230
|
-
puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
|
214
|
+
CGE::Manual.display(
|
215
|
+
input,
|
216
|
+
options[:target_encoding],
|
217
|
+
options[:encodings],
|
218
|
+
options[:additional_encodings]
|
219
|
+
)
|
220
|
+
else # automatic
|
221
|
+
puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
|
222
|
+
end
|
231
223
|
end
|
data/lib/cmess.rb
CHANGED
data/lib/cmess/bconv.rb
CHANGED
@@ -26,67 +26,101 @@
|
|
26
26
|
###############################################################################
|
27
27
|
#++
|
28
28
|
|
29
|
+
require 'yaml'
|
29
30
|
require 'iconv'
|
30
31
|
require 'cmess'
|
31
32
|
|
32
33
|
# Convert between bibliographic (and other) encodings.
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
extend self
|
35
|
+
class CMess::BConv
|
37
36
|
|
38
37
|
# our version ;-)
|
39
|
-
VERSION = '0.0.
|
38
|
+
VERSION = '0.0.2'
|
40
39
|
|
41
40
|
INTERMEDIATE_ENCODING = 'utf-8'
|
42
41
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
DEFAULT_CHARTAB_FILE = File.join(CMess::DATA_DIR, 'chartab.yaml')
|
43
|
+
|
44
|
+
class << self
|
45
|
+
|
46
|
+
def encodings(chartab = DEFAULT_CHARTAB_FILE)
|
47
|
+
chartab = load_chartab(chartab)
|
48
|
+
|
49
|
+
chartab[chartab.keys.first].keys.map { |encoding|
|
50
|
+
encoding.upcase unless encoding =~ /\A__/
|
51
|
+
}.compact.sort
|
52
|
+
end
|
53
|
+
|
54
|
+
def convert(*args)
|
55
|
+
new(*args).convert
|
56
|
+
end
|
57
|
+
|
58
|
+
def load_chartab(chartab)
|
59
|
+
case chartab
|
60
|
+
when Hash
|
61
|
+
chartab
|
62
|
+
when String
|
63
|
+
raise "chartab file not found: #{chartab}" unless File.readable?(chartab)
|
64
|
+
YAML.load_file(chartab)
|
65
|
+
else
|
66
|
+
raise ArgumentError, "invalid chartab of type #{chartab.class}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
47
70
|
end
|
48
71
|
|
49
|
-
|
50
|
-
source_encoding.upcase!
|
51
|
-
target_encoding.upcase!
|
72
|
+
attr_reader :input, :output, :source_encoding, :target_encoding, :chartab, :encodings
|
52
73
|
|
53
|
-
|
74
|
+
def initialize(input, output, source_encoding, target_encoding, chartab = DEFAULT_CHARTAB_FILE)
|
75
|
+
@input, @output = input, output
|
54
76
|
|
55
|
-
|
56
|
-
|
57
|
-
|
77
|
+
@source_encoding = source_encoding.upcase
|
78
|
+
@target_encoding = target_encoding.upcase
|
79
|
+
|
80
|
+
@chartab = self.class.load_chartab(chartab)
|
81
|
+
@encodings = self.class.encodings(@chartab)
|
82
|
+
end
|
83
|
+
|
84
|
+
def encoding?(encoding)
|
85
|
+
encodings.include?(encoding)
|
86
|
+
end
|
87
|
+
|
88
|
+
def convert
|
89
|
+
if encoding?(source_encoding)
|
90
|
+
if encoding?(target_encoding)
|
91
|
+
@charmap = chartab.inject({}) { |hash, (code, map)|
|
58
92
|
hash.update(map[source_encoding] => map[target_encoding].pack('U*'))
|
59
93
|
}
|
60
94
|
|
61
|
-
input.each_byte { |
|
62
|
-
output.print
|
95
|
+
input.each_byte { |char|
|
96
|
+
output.print map(char)
|
63
97
|
}
|
64
98
|
else
|
65
|
-
iconv =
|
99
|
+
iconv = iconv_to
|
66
100
|
|
67
|
-
charmap = chartab.inject({}) { |hash, (code, map)|
|
101
|
+
@charmap = chartab.inject({}) { |hash, (code, map)|
|
68
102
|
hash.update(map[source_encoding] => [code.to_i(16)].pack('U*'))
|
69
103
|
}
|
70
104
|
|
71
|
-
input.each_byte { |
|
72
|
-
output.print iconv.iconv(
|
105
|
+
input.each_byte { |char|
|
106
|
+
output.print iconv.iconv(map(char))
|
73
107
|
}
|
74
108
|
end
|
75
109
|
else
|
76
|
-
if
|
77
|
-
iconv =
|
110
|
+
if encoding?(target_encoding)
|
111
|
+
iconv = iconv_from
|
78
112
|
|
79
113
|
charmap = chartab.inject({}) { |hash, (code, map)|
|
80
114
|
hash.update(code.to_i(16) => map[target_encoding].pack('U*'))
|
81
115
|
}
|
82
116
|
|
83
117
|
input.each { |line|
|
84
|
-
iconv.iconv(line).unpack('U*').each { |
|
85
|
-
output.print charmap[
|
118
|
+
iconv.iconv(line).unpack('U*').each { |char|
|
119
|
+
output.print charmap[char]
|
86
120
|
}
|
87
121
|
}
|
88
122
|
else
|
89
|
-
iconv =
|
123
|
+
iconv = iconv_from_to
|
90
124
|
|
91
125
|
input.each { |line|
|
92
126
|
output.puts iconv.iconv(line)
|
@@ -95,4 +129,41 @@ module CMess::BConv
|
|
95
129
|
end
|
96
130
|
end
|
97
131
|
|
132
|
+
private
|
133
|
+
|
134
|
+
def iconv_from_to(from = source_encoding, to = target_encoding)
|
135
|
+
iconv = begin
|
136
|
+
Iconv.new(to, from)
|
137
|
+
rescue Iconv::InvalidEncoding
|
138
|
+
raise ArgumentError, "invalid encoding: source encoding = #{from}, target encoding = #{to}"
|
139
|
+
end
|
140
|
+
|
141
|
+
def iconv.iconv(*args)
|
142
|
+
super
|
143
|
+
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => err
|
144
|
+
warn "ILLEGAL INPUT SEQUENCE: #{err}"; ''
|
145
|
+
end
|
146
|
+
|
147
|
+
iconv
|
148
|
+
end
|
149
|
+
|
150
|
+
def iconv_from(from = source_encoding)
|
151
|
+
iconv_from_to(from, INTERMEDIATE_ENCODING)
|
152
|
+
end
|
153
|
+
|
154
|
+
def iconv_to(to = target_encoding)
|
155
|
+
iconv_from_to(INTERMEDIATE_ENCODING, to)
|
156
|
+
end
|
157
|
+
|
158
|
+
def map(char, charmap = @charmap)
|
159
|
+
unless map = charmap[[char]]
|
160
|
+
unless map = charmap[[char, c = input.getc]]
|
161
|
+
input.ungetc(c) if c
|
162
|
+
map = ''
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
map
|
167
|
+
end
|
168
|
+
|
98
169
|
end
|
data/lib/cmess/cinderella.rb
CHANGED
@@ -43,6 +43,8 @@ module CMess::Cinderella
|
|
43
43
|
# our version ;-)
|
44
44
|
VERSION = '0.0.3'
|
45
45
|
|
46
|
+
DEFAULT_CSETS_DIR = File.join(CMess::DATA_DIR, 'csets')
|
47
|
+
|
46
48
|
def pick(input, pot, crop, source_encoding, target_encoding, chars, repair = false)
|
47
49
|
iconv = Iconv.new(target_encoding, source_encoding)
|
48
50
|
|
data/lib/cmess/cli.rb
CHANGED
@@ -26,9 +26,15 @@
|
|
26
26
|
###############################################################################
|
27
27
|
#++
|
28
28
|
|
29
|
+
require 'tempfile'
|
30
|
+
|
31
|
+
require 'rubygems'
|
32
|
+
require 'nuggets/env/user_encoding'
|
33
|
+
|
29
34
|
module CMess::CLI
|
30
35
|
|
31
|
-
|
36
|
+
# how to split list of arguments
|
37
|
+
SPLIT_ARG_LIST_RE = /\s*[,\s]\s*/o
|
32
38
|
|
33
39
|
def ensure_readable(file)
|
34
40
|
abort "Can't find input file: #{file}" unless File.readable?(file)
|
@@ -39,8 +45,7 @@ module CMess::CLI
|
|
39
45
|
end
|
40
46
|
|
41
47
|
def open_file_in_place(file)
|
42
|
-
|
43
|
-
[File.readlines(file), File.open(file, 'w')]
|
48
|
+
[open_temporary_input(file), File.open(file, 'w')]
|
44
49
|
end
|
45
50
|
|
46
51
|
def open_file_or_std(file, mode = 'r')
|
@@ -57,25 +62,59 @@ module CMess::CLI
|
|
57
62
|
end
|
58
63
|
end
|
59
64
|
|
60
|
-
def
|
61
|
-
|
62
|
-
ENV['LANG'][/\.(.*)/, 1] ||
|
63
|
-
system_encoding_not_found
|
64
|
-
end
|
65
|
+
def open_temporary_input(*files)
|
66
|
+
temp = Tempfile.new('cmess_cli')
|
65
67
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
68
|
+
files.each { |file|
|
69
|
+
if file == '-'
|
70
|
+
STDIN.each { |line| temp << line }
|
71
|
+
else
|
72
|
+
ensure_readable(file)
|
73
|
+
File.open(file) { |f| f.each { |line| temp << line } }
|
74
|
+
end
|
72
75
|
}
|
73
76
|
|
74
|
-
|
75
|
-
|
77
|
+
# return File, instead of Tempfile
|
78
|
+
temp.close
|
79
|
+
temp.open
|
80
|
+
end
|
81
|
+
|
82
|
+
def trailing_args_as_input(options)
|
83
|
+
unless ARGV.empty? || options[:input_set]
|
84
|
+
options[:input] = if ARGV.size == 1
|
85
|
+
open_file_or_std(ARGV.first)
|
86
|
+
else
|
87
|
+
open_temporary_input(*ARGV)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def determine_system_encoding
|
93
|
+
ENV.user_encoding || begin
|
94
|
+
dummy = lambda {
|
95
|
+
abort <<-EOT
|
96
|
+
Your system's encoding couldn't be determined automatically -- please specify
|
97
|
+
it explicitly via the ENCODING environment variable or via the '-t' option.
|
98
|
+
EOT
|
99
|
+
}
|
100
|
+
|
101
|
+
def dummy.to_s; 'NOT FOUND' end
|
102
|
+
|
103
|
+
dummy
|
76
104
|
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def cli
|
108
|
+
yield
|
109
|
+
rescue => err
|
110
|
+
if $VERBOSE
|
111
|
+
backtrace = err.backtrace
|
112
|
+
fromtrace = backtrace[1..-1].map { |i| "\n from #{i}" }
|
77
113
|
|
78
|
-
|
114
|
+
abort "#{backtrace.first} #{err} (#{err.class})#{fromtrace}"
|
115
|
+
else
|
116
|
+
abort "#{err.to_s.capitalize} [#{err.backtrace.first}]"
|
117
|
+
end
|
79
118
|
end
|
80
119
|
|
81
120
|
end
|
@@ -273,11 +273,11 @@ class CMess::GuessEncoding::Automatic
|
|
273
273
|
# UTF-8, if number of escape-bytes and following bytes
|
274
274
|
# is matching (cf. http://en.wikipedia.org/wiki/UTF-8)
|
275
275
|
encoding UTF_8 do
|
276
|
-
esc_bytes = byte_count_sum(0xc0..0xdf)
|
276
|
+
esc_bytes = byte_count_sum(0xc0..0xdf) +
|
277
277
|
# => 110xxxxx 10xxxxxx
|
278
|
-
|
278
|
+
byte_count_sum(0xe0..0xef) * 2 +
|
279
279
|
# => 1110xxxx 10xxxxxx 10xxxxxx
|
280
|
-
|
280
|
+
byte_count_sum(0xf0..0xf7) * 3
|
281
281
|
# => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
282
282
|
fol_bytes = byte_count_sum(0x80..0xbf)
|
283
283
|
# => 10xxxxxx
|
@@ -287,7 +287,7 @@ class CMess::GuessEncoding::Automatic
|
|
287
287
|
|
288
288
|
# Analyse statistical appearance of German umlauts and other accented
|
289
289
|
# letters (see TEST_CHARS)
|
290
|
-
encodings
|
290
|
+
encodings(*TEST_ENCODINGS) do
|
291
291
|
ratios = {}
|
292
292
|
|
293
293
|
TEST_ENCODINGS.find(lambda {
|
@@ -95,7 +95,7 @@ module CMess::GuessEncoding::Manual
|
|
95
95
|
"ILLEGAL INPUT SEQUENCE: #{err}"
|
96
96
|
rescue Iconv::InvalidEncoding
|
97
97
|
if encoding == target
|
98
|
-
|
98
|
+
raise ArgumentError, "invalid encoding: #{encoding}"
|
99
99
|
else
|
100
100
|
"INVALID ENCODING!"
|
101
101
|
end
|
data/lib/cmess/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2.288
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-09-
|
12
|
+
date: 2008-09-17 00:00:00 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: 0.3.3
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: htmlentities
|
@@ -102,6 +102,7 @@ has_rdoc: true
|
|
102
102
|
homepage: http://prometheus.rubyforge.org/cmess
|
103
103
|
post_install_message:
|
104
104
|
rdoc_options:
|
105
|
+
- --line-numbers
|
105
106
|
- --inline-source
|
106
107
|
- --title
|
107
108
|
- cmess Application documentation
|
@@ -110,7 +111,6 @@ rdoc_options:
|
|
110
111
|
- --main
|
111
112
|
- README
|
112
113
|
- --all
|
113
|
-
- --line-numbers
|
114
114
|
require_paths:
|
115
115
|
- lib
|
116
116
|
required_ruby_version: !ruby/object:Gem::Requirement
|