cmess 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +68 -81
- data/ChangeLog +28 -0
- data/README +23 -21
- data/Rakefile +15 -16
- data/bin/bconv +30 -47
- data/bin/cinderella +51 -68
- data/bin/decode_entities +28 -36
- data/bin/guess_encoding +53 -81
- data/lib/cmess.rb +35 -26
- data/lib/cmess/bconv.rb +23 -25
- data/lib/cmess/cinderella.rb +21 -20
- data/lib/cmess/cli.rb +27 -17
- data/lib/cmess/decode_entities.rb +19 -20
- data/lib/cmess/guess_encoding.rb +20 -18
- data/lib/cmess/guess_encoding/automatic.rb +151 -125
- data/lib/cmess/guess_encoding/encoding.rb +16 -18
- data/lib/cmess/guess_encoding/manual.rb +26 -31
- data/lib/cmess/version.rb +2 -2
- metadata +25 -28
data/bin/cinderella
CHANGED
@@ -6,40 +6,33 @@
|
|
6
6
|
# cinderella -- Handle double encoded characters #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C) 2007 University of Cologne,
|
10
|
-
#
|
11
|
-
#
|
9
|
+
# Copyright (C) 2007-2011 University of Cologne, #
|
10
|
+
# Albertus-Magnus-Platz, #
|
11
|
+
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
13
|
# Authors: #
|
14
14
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
15
15
|
# #
|
16
16
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
|
-
# terms of the GNU General Public License as published by the Free
|
18
|
-
# Foundation; either version 3 of the License, or (at your option)
|
19
|
-
# version.
|
17
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
18
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
19
|
+
# any later version. #
|
20
20
|
# #
|
21
21
|
# cmess is distributed in the hope that it will be useful, but WITHOUT ANY #
|
22
22
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
23
|
-
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
24
|
-
# details.
|
23
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
24
|
+
# more details. #
|
25
25
|
# #
|
26
|
-
# You should have received a copy of the GNU General Public License
|
27
|
-
# with cmess. If not, see <http://www.gnu.org/licenses/>.
|
26
|
+
# You should have received a copy of the GNU Affero General Public License #
|
27
|
+
# along with cmess. If not, see <http://www.gnu.org/licenses/>. #
|
28
28
|
# #
|
29
29
|
###############################################################################
|
30
30
|
#++
|
31
31
|
|
32
|
-
require 'optparse'
|
33
|
-
require 'yaml'
|
34
|
-
|
35
|
-
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
36
|
-
|
37
32
|
require 'cmess/cinderella'
|
38
|
-
require 'cmess/cli'
|
39
|
-
|
40
33
|
include CMess::CLI
|
41
34
|
|
42
|
-
|
35
|
+
progname = File.basename($0)
|
43
36
|
|
44
37
|
options = {
|
45
38
|
:input => STDIN,
|
@@ -52,82 +45,82 @@ options = {
|
|
52
45
|
:repair => false
|
53
46
|
}
|
54
47
|
|
55
|
-
|
48
|
+
parse_options { |opts|
|
56
49
|
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
57
50
|
|
58
51
|
opts.separator ''
|
59
52
|
opts.separator 'Options:'
|
60
53
|
|
61
|
-
opts.on('-i', '--input FILE',
|
62
|
-
options[:input] = open_file_or_std(
|
54
|
+
opts.on('-i', '--input FILE', 'Input file to read from [Default: STDIN]') { |input|
|
55
|
+
options[:input] = open_file_or_std(input)
|
63
56
|
options[:input_set] = true
|
64
57
|
}
|
65
58
|
|
66
59
|
opts.separator ''
|
67
60
|
|
68
|
-
opts.on('-p', '--pot FILE',
|
69
|
-
options[:pot] = open_file_or_std(
|
61
|
+
opts.on('-p', '--pot FILE', 'The good into the pot...') { |pot|
|
62
|
+
options[:pot] = open_file_or_std(pot, 'w')
|
70
63
|
}
|
71
64
|
|
72
|
-
opts.on('-c', '--crop FILE',
|
73
|
-
options[:crop] = open_file_or_std(
|
65
|
+
opts.on('-c', '--crop FILE', '...the bad into the crop') { |crop|
|
66
|
+
options[:crop] = open_file_or_std(crop, 'w')
|
74
67
|
}
|
75
68
|
|
76
69
|
opts.separator ''
|
77
70
|
|
78
|
-
opts.on('-o', '--output [FILE]',
|
79
|
-
options[:output] = open_file_or_std(
|
80
|
-
|
81
|
-
options[:pot] = options[:output]
|
82
|
-
options[:crop] = options[:output]
|
71
|
+
opts.on('-o', '--output [FILE]', 'Write both good and bad lines to FILE or', 'default [Default: STDOUT] (Particularly', "useful in combination with the '-r' option)") { |output|
|
72
|
+
options[:output] = open_file_or_std(output, 'w') if output
|
73
|
+
options[:pot] = options[:crop] = options[:output]
|
83
74
|
}
|
84
75
|
|
85
76
|
opts.separator ''
|
86
77
|
|
87
|
-
opts.on('-I', '--in-place FILE', "Modify file in-place; sets '-i' and '-o'",
|
88
|
-
options[:input], options[:output] = open_file_in_place(
|
78
|
+
opts.on('-I', '--in-place FILE', "Modify file in-place; sets '-i' and '-o'", '(Only really useful in combination with', "the '-r' option)") { |file|
|
79
|
+
options[:input], options[:output] = open_file_in_place(file)
|
89
80
|
options[:input_set] = true
|
90
81
|
|
91
|
-
options[:pot]
|
92
|
-
options[:crop] = options[:output]
|
82
|
+
options[:pot] = options[:crop] = options[:output]
|
93
83
|
}
|
94
84
|
|
95
85
|
opts.separator ''
|
96
86
|
|
97
|
-
opts.on('-e', '--source-encoding ENCODING',
|
98
|
-
options[:source_encoding] =
|
87
|
+
opts.on('-e', '--source-encoding ENCODING', 'Source encoding (from) [REQUIRED]') { |encoding|
|
88
|
+
options[:source_encoding] = encoding
|
99
89
|
}
|
100
90
|
|
101
|
-
opts.on('-t', '--target-encoding ENCODING', "Target encoding (to); see '-l' for a list", "of available encodings [Default: #{options[:target_encoding]}]") { |
|
102
|
-
options[:target_encoding] =
|
91
|
+
opts.on('-t', '--target-encoding ENCODING', "Target encoding (to); see '-l' for a list", "of available encodings [Default: #{options[:target_encoding]}]") { |encoding|
|
92
|
+
options[:target_encoding] = encoding
|
103
93
|
}
|
104
94
|
|
105
95
|
opts.separator ''
|
106
96
|
|
107
|
-
opts.on('-T', '--addtl-target-encodings DIRECTORY',
|
108
|
-
ensure_directory(
|
109
|
-
|
110
|
-
options[:csets] |= [File.expand_path(d)]
|
97
|
+
opts.on('-T', '--addtl-target-encodings DIRECTORY', 'Directory providing additional char files', 'for target encoding') { |directory|
|
98
|
+
ensure_directory(directory)
|
99
|
+
options[:csets] |= [File.expand_path(directory)]
|
111
100
|
}
|
112
101
|
|
113
102
|
opts.separator ''
|
114
103
|
|
115
|
-
opts.on('-l', '--list-encodings',
|
116
|
-
csets =
|
104
|
+
opts.on('-l', '--list-encodings', 'Display a list of available target encodings', "and exit; see '-T' on how to add your own") {
|
105
|
+
csets = {}
|
106
|
+
|
107
|
+
options[:csets].each { |cset|
|
117
108
|
encodings = Dir[File.join(cset, '*.yaml')].sort.map { |yaml|
|
118
109
|
File.basename(yaml, '.yaml') unless File.symlink?(yaml)
|
119
110
|
}.compact
|
120
111
|
|
121
|
-
|
122
|
-
hash
|
112
|
+
csets[cset] = encodings unless encodings.empty?
|
123
113
|
}
|
124
114
|
|
125
115
|
if csets.empty?
|
126
|
-
puts "No target encodings available for #{
|
116
|
+
puts "No target encodings available for #{progname}"
|
127
117
|
else
|
128
|
-
puts "Available target encodings for #{
|
118
|
+
puts "Available target encodings for #{progname}:"
|
119
|
+
|
129
120
|
csets.each { |cset, encodings|
|
121
|
+
puts
|
130
122
|
puts "[#{cset}]"
|
123
|
+
|
131
124
|
encodings.each { |encoding|
|
132
125
|
puts " - #{encoding}"
|
133
126
|
}
|
@@ -139,52 +132,42 @@ OptionParser.new(nil, 40) { |opts|
|
|
139
132
|
|
140
133
|
opts.separator ''
|
141
134
|
|
142
|
-
opts.on('-r', '--repair',
|
135
|
+
opts.on('-r', '--repair', 'Try to repair corrupted characters') {
|
143
136
|
options[:repair] = true
|
144
137
|
}
|
145
138
|
|
146
139
|
opts.separator ''
|
147
140
|
opts.separator 'Generic options:'
|
148
141
|
|
149
|
-
opts.on('-h', '--help',
|
142
|
+
opts.on('-h', '--help', 'Print this help message and exit') {
|
150
143
|
puts opts
|
151
144
|
exit
|
152
145
|
}
|
153
146
|
|
154
|
-
opts.on('--version',
|
155
|
-
puts "#{
|
147
|
+
opts.on('--version', 'Print program version and exit') {
|
148
|
+
puts "#{progname} v#{CMess::Cinderella::VERSION} (part of cmess v#{CMess::VERSION})"
|
156
149
|
exit
|
157
150
|
}
|
158
151
|
|
159
152
|
opts.separator ''
|
160
153
|
opts.separator "If '-p' or '-c' is omitted, and '-o' is not given either, that particular output"
|
161
|
-
opts.separator
|
162
|
-
}
|
154
|
+
opts.separator 'is ignored. When FILE is -, either STDIN or STDOUT is used (as appropriate).'
|
155
|
+
}
|
163
156
|
|
164
157
|
cli do
|
165
158
|
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
166
159
|
|
167
|
-
abort "No source encoding given! (Use the '-e' switch to do so; see '--help' for more information)"
|
168
|
-
unless options[:source_encoding]
|
160
|
+
abort "No source encoding given! (Use the '-e' switch to do so; see '--help' for more information)" unless options[:source_encoding]
|
169
161
|
|
170
162
|
yaml_file = "#{options[:target_encoding].downcase}.yaml"
|
171
|
-
char_file = options[:csets].
|
163
|
+
char_file = options[:csets].find { |cset|
|
172
164
|
path = File.join(cset, yaml_file)
|
173
165
|
break path if File.readable?(path)
|
174
166
|
}
|
175
167
|
|
176
|
-
abort "Char file not found for target encoding: #{options[:target_encoding]}"
|
177
|
-
unless char_file
|
168
|
+
abort "Char file not found for target encoding: #{options[:target_encoding]}" unless char_file
|
178
169
|
|
179
170
|
trailing_args_as_input(options)
|
180
171
|
|
181
|
-
CMess::Cinderella.pick(
|
182
|
-
options[:input],
|
183
|
-
options[:pot],
|
184
|
-
options[:crop],
|
185
|
-
options[:source_encoding],
|
186
|
-
options[:target_encoding],
|
187
|
-
YAML.load_file(char_file),
|
188
|
-
options[:repair]
|
189
|
-
)
|
172
|
+
CMess::Cinderella.pick(options.merge(:chars => YAML.load_file(char_file)))
|
190
173
|
end
|
data/bin/decode_entities
CHANGED
@@ -6,40 +6,32 @@
|
|
6
6
|
# decode_entities -- Decode HTML entities #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C) 2007 University of Cologne,
|
10
|
-
#
|
11
|
-
#
|
9
|
+
# Copyright (C) 2007-2011 University of Cologne, #
|
10
|
+
# Albertus-Magnus-Platz, #
|
11
|
+
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
13
|
# Authors: #
|
14
14
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
15
15
|
# #
|
16
16
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
|
-
# terms of the GNU General Public License as published by the Free
|
18
|
-
# Foundation; either version 3 of the License, or (at your option)
|
19
|
-
# version.
|
17
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
18
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
19
|
+
# any later version. #
|
20
20
|
# #
|
21
21
|
# cmess is distributed in the hope that it will be useful, but WITHOUT ANY #
|
22
22
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
23
|
-
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
24
|
-
# details.
|
23
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
24
|
+
# more details. #
|
25
25
|
# #
|
26
|
-
# You should have received a copy of the GNU General Public License
|
27
|
-
# with cmess. If not, see <http://www.gnu.org/licenses/>.
|
26
|
+
# You should have received a copy of the GNU Affero General Public License #
|
27
|
+
# along with cmess. If not, see <http://www.gnu.org/licenses/>. #
|
28
28
|
# #
|
29
29
|
###############################################################################
|
30
30
|
#++
|
31
31
|
|
32
|
-
require 'optparse'
|
33
|
-
|
34
|
-
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
35
|
-
|
36
32
|
require 'cmess/decode_entities'
|
37
|
-
require 'cmess/cli'
|
38
|
-
|
39
33
|
include CMess::CLI
|
40
34
|
|
41
|
-
PROGNAME = File.basename($0)
|
42
|
-
|
43
35
|
options = {
|
44
36
|
:input => STDIN,
|
45
37
|
:output => STDOUT,
|
@@ -48,59 +40,59 @@ options = {
|
|
48
40
|
:flavour => CMess::DecodeEntities::DEFAULT_FLAVOUR
|
49
41
|
}
|
50
42
|
|
51
|
-
|
43
|
+
parse_options { |opts|
|
52
44
|
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
53
45
|
|
54
46
|
opts.separator ''
|
55
47
|
opts.separator 'Options:'
|
56
48
|
|
57
|
-
opts.on('-i', '--input FILE',
|
58
|
-
options[:input] = open_file_or_std(
|
49
|
+
opts.on('-i', '--input FILE', 'Input file to read from [Default: STDIN]') { |input|
|
50
|
+
options[:input] = open_file_or_std(input)
|
59
51
|
options[:input_set] = true
|
60
52
|
}
|
61
53
|
|
62
|
-
opts.on('-o', '--output FILE',
|
63
|
-
options[:output] = open_file_or_std(
|
54
|
+
opts.on('-o', '--output FILE', 'Output file to write to [Default: STDOUT]') { |output|
|
55
|
+
options[:output] = open_file_or_std(output, 'w')
|
64
56
|
}
|
65
57
|
|
66
|
-
opts.on('-I', '--in-place FILE', "Modify file in-place; sets '-i' and '-o'") { |
|
67
|
-
options[:input], options[:output] = open_file_in_place(
|
58
|
+
opts.on('-I', '--in-place FILE', "Modify file in-place; sets '-i' and '-o'") { |file|
|
59
|
+
options[:input], options[:output] = open_file_in_place(file)
|
68
60
|
options[:input_set] = true
|
69
61
|
}
|
70
62
|
|
71
63
|
opts.separator ''
|
72
64
|
|
73
|
-
opts.on('-e', '--source-encoding ENCODING', "Encoding of input file [Default: #{options[:source_encoding].upcase}]") { |
|
74
|
-
options[:source_encoding] =
|
65
|
+
opts.on('-e', '--source-encoding ENCODING', "Encoding of input file [Default: #{options[:source_encoding].upcase}]") { |encoding|
|
66
|
+
options[:source_encoding] = encoding.downcase
|
75
67
|
}
|
76
68
|
|
77
|
-
opts.on('-t', '--target-encoding ENCODING',
|
78
|
-
options[:target_encoding] =
|
69
|
+
opts.on('-t', '--target-encoding ENCODING', 'Desired encoding for output file [Default: <source_encoding>]') { |encoding|
|
70
|
+
options[:target_encoding] = encoding.downcase
|
79
71
|
}
|
80
72
|
|
81
73
|
opts.separator ''
|
82
74
|
|
83
75
|
opts.on('-f', '--flavour FLAVOUR', "Flavour to use for the HTMLEntities decoder [Default: #{options[:flavour]}]",
|
84
|
-
"(Available flavours are: #{HTMLEntities::FLAVORS.join(', ')})") { |
|
85
|
-
options[:flavour] =
|
76
|
+
"(Available flavours are: #{HTMLEntities::FLAVORS.join(', ')})") { |flavour|
|
77
|
+
options[:flavour] = flavour
|
86
78
|
}
|
87
79
|
|
88
80
|
opts.separator ''
|
89
81
|
opts.separator 'Generic options:'
|
90
82
|
|
91
|
-
opts.on('-h', '--help',
|
83
|
+
opts.on('-h', '--help', 'Print this help message and exit') {
|
92
84
|
puts opts
|
93
85
|
exit
|
94
86
|
}
|
95
87
|
|
96
|
-
opts.on('--version',
|
97
|
-
puts "#{
|
88
|
+
opts.on('--version', 'Print program version and exit') {
|
89
|
+
puts "#{File.basename($0)} v#{CMess::DecodeEntities::VERSION} (part of cmess v#{CMess::VERSION})"
|
98
90
|
exit
|
99
91
|
}
|
100
92
|
|
101
93
|
opts.separator ''
|
102
|
-
opts.separator
|
103
|
-
}
|
94
|
+
opts.separator 'When FILE is -, either STDIN or STDOUT is used (as appropriate).'
|
95
|
+
}
|
104
96
|
|
105
97
|
cli do
|
106
98
|
trailing_args_as_input(options)
|
data/bin/guess_encoding
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# guess_encoding -- Assist with guessing the encoding of some input at hand #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2011 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
11
|
# 50923 Cologne, Germany #
|
12
12
|
# #
|
@@ -14,38 +14,24 @@
|
|
14
14
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
15
15
|
# #
|
16
16
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
|
-
# terms of the GNU General Public License as published by the Free
|
18
|
-
# Foundation; either version 3 of the License, or (at your option)
|
19
|
-
# version.
|
17
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
18
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
19
|
+
# any later version. #
|
20
20
|
# #
|
21
21
|
# cmess is distributed in the hope that it will be useful, but WITHOUT ANY #
|
22
22
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
23
|
-
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
24
|
-
# details.
|
23
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
24
|
+
# more details. #
|
25
25
|
# #
|
26
|
-
# You should have received a copy of the GNU General Public License
|
27
|
-
# with cmess. If not, see <http://www.gnu.org/licenses/>.
|
26
|
+
# You should have received a copy of the GNU Affero General Public License #
|
27
|
+
# along with cmess. If not, see <http://www.gnu.org/licenses/>. #
|
28
28
|
# #
|
29
29
|
###############################################################################
|
30
30
|
#++
|
31
31
|
|
32
|
-
require 'optparse'
|
33
|
-
|
34
|
-
require 'rubygems'
|
35
|
-
require 'nuggets/string/word_wrap'
|
36
|
-
|
37
|
-
$: << File.join(File.dirname(__FILE__), '..', 'lib')
|
38
|
-
|
39
32
|
require 'cmess/guess_encoding'
|
40
|
-
require 'cmess/cli'
|
41
|
-
|
42
33
|
include CMess::CLI
|
43
34
|
|
44
|
-
PROGNAME = File.basename($0)
|
45
|
-
|
46
|
-
# short-cut
|
47
|
-
CGE = CMess::GuessEncoding
|
48
|
-
|
49
35
|
options = {
|
50
36
|
:input => STDIN,
|
51
37
|
:line => 1,
|
@@ -56,140 +42,136 @@ options = {
|
|
56
42
|
:chunk_size => nil,
|
57
43
|
:ignore_bom => false,
|
58
44
|
:charcodes => nil,
|
59
|
-
:
|
60
|
-
:octal => false
|
45
|
+
:base => 16
|
61
46
|
}
|
62
47
|
|
63
|
-
|
48
|
+
parse_options { |opts|
|
64
49
|
opts.banner = "Usage: #{$0} [options] [FILE...]"
|
65
50
|
|
66
51
|
opts.separator ''
|
67
52
|
opts.separator 'Options:'
|
68
53
|
|
69
|
-
opts.on('-i', '--input FILE',
|
70
|
-
options[:input] = open_file_or_std(
|
54
|
+
opts.on('-i', '--input FILE', 'Input file to read from [Default: STDIN]') { |input|
|
55
|
+
options[:input] = open_file_or_std(input)
|
71
56
|
options[:input_set] = true
|
72
57
|
}
|
73
58
|
|
74
59
|
opts.separator ''
|
75
60
|
opts.separator ' * Automatic guessing'
|
76
|
-
opts.separator ''
|
77
61
|
|
78
|
-
opts.on('-c', '--chunk-size SIZE', Integer,
|
79
|
-
options[:chunk_size] =
|
62
|
+
opts.on('-c', '--chunk-size SIZE', Integer, 'Size of chunks input will be read in until a valid encoding', 'has been found; by default the whole file will be read') { |size|
|
63
|
+
options[:chunk_size] = size
|
80
64
|
}
|
81
65
|
|
82
66
|
opts.separator ''
|
83
67
|
|
84
|
-
opts.on('-b', '--ignore-bom',
|
68
|
+
opts.on('-b', '--ignore-bom', 'Ignore detected BOM (if any); see below for a list of', 'supported encodings') {
|
85
69
|
options[:ignore_bom] = true
|
86
70
|
}
|
87
71
|
|
88
72
|
opts.separator ''
|
89
73
|
opts.separator ' * Manual guessing'
|
90
|
-
opts.separator ''
|
91
74
|
|
92
|
-
opts.on('-m', '--manual',
|
75
|
+
opts.on('-m', '--manual', 'Present variously encoded input for manual encoding guessing') {
|
93
76
|
options[:manual] = true
|
94
77
|
}
|
95
78
|
|
96
79
|
opts.separator ''
|
97
80
|
|
98
|
-
opts.on('-l', '--line LINE', "Line number of input file to use for testing [Default: #{options[:line]}]") { |
|
99
|
-
options[:line] =
|
81
|
+
opts.on('-l', '--line LINE', Integer, "Line number of input file to use for testing [Default: #{options[:line]}]") { |line|
|
82
|
+
options[:line] = line
|
100
83
|
|
101
84
|
unless options[:line] > 0
|
102
|
-
options[:input].read
|
103
|
-
abort
|
85
|
+
options[:input].read # prevent 'Broken pipe' error
|
86
|
+
abort 'Line number must be greater then 0!'
|
104
87
|
end
|
105
88
|
}
|
106
89
|
|
107
90
|
opts.separator ''
|
108
91
|
|
109
|
-
opts.on('-e', '--encodings ENCODINGS...',
|
92
|
+
opts.on('-e', '--encodings ENCODINGS...', 'List of encodings to try >instead of< default (see below)') { |encodings|
|
110
93
|
options[:encodings] ||= []
|
111
|
-
options[:encodings] +=
|
94
|
+
options[:encodings] += arg_list(encodings)
|
112
95
|
}
|
113
96
|
|
114
|
-
opts.on('-a', '--additional-encodings ENCODINGS...',
|
115
|
-
options[:additional_encodings] +=
|
97
|
+
opts.on('-a', '--additional-encodings ENCODINGS...', 'List of encodings to try >in addition to< default (see below)') { |encodings|
|
98
|
+
options[:additional_encodings] += arg_list(encodings)
|
116
99
|
}
|
117
100
|
|
118
101
|
opts.separator ''
|
119
102
|
|
120
|
-
opts.on('-t', '--target-encoding ENCODING', "Target encoding of your system [Default: #{options[:target_encoding]}]") { |
|
121
|
-
options[:target_encoding] =
|
103
|
+
opts.on('-t', '--target-encoding ENCODING', "Target encoding of your system [Default: #{options[:target_encoding]}]") { |encoding|
|
104
|
+
options[:target_encoding] = encoding
|
122
105
|
}
|
123
106
|
|
124
107
|
opts.separator ''
|
125
108
|
|
126
109
|
opts.on('-L', '--list-encodings', 'Print a list of all available encodings on your system and exit') {
|
127
|
-
puts
|
110
|
+
puts CMess::GuessEncoding::Encoding.all_encodings
|
128
111
|
exit
|
129
112
|
}
|
130
113
|
|
131
114
|
opts.separator ''
|
132
115
|
opts.separator ' * Charcodes'
|
133
|
-
opts.separator ''
|
134
116
|
|
135
|
-
opts.on('-C', '--charcodes CHARCODES',
|
136
|
-
options[:charcodes] =
|
117
|
+
opts.on('-C', '--charcodes CHARCODES', 'Specify a list of character codes (in hexadecimal by default)', "for manual guessing. (Options '-e', '-a', and '-t' apply here", 'as well; see "Manual guessing" for details.)') { |charcodes|
|
118
|
+
options[:charcodes] = arg_list(charcodes)
|
137
119
|
}
|
138
120
|
|
139
121
|
opts.separator ''
|
140
122
|
|
141
|
-
opts.on('-D', '--decimal',
|
142
|
-
options[:
|
123
|
+
opts.on('-D', '--decimal', 'Charcodes are in decimal') {
|
124
|
+
options[:base] = 10
|
143
125
|
}
|
144
126
|
|
145
|
-
opts.on('-O', '--octal',
|
146
|
-
options[:
|
127
|
+
opts.on('-O', '--octal', 'Charcodes are in octal') {
|
128
|
+
options[:base] = 8
|
147
129
|
}
|
148
130
|
|
149
131
|
opts.separator ''
|
150
132
|
opts.separator 'Generic options:'
|
151
133
|
|
152
|
-
opts.on('-h', '--help',
|
134
|
+
opts.on('-h', '--help', 'Print this help message and exit') {
|
153
135
|
puts opts
|
154
136
|
exit
|
155
137
|
}
|
156
138
|
|
157
|
-
opts.on('--version',
|
158
|
-
puts "#{
|
139
|
+
opts.on('--version', 'Print program version and exit') {
|
140
|
+
puts "#{File.basename($0)} v#{CMess::GuessEncoding::VERSION} (part of cmess v#{CMess::VERSION})"
|
159
141
|
exit
|
160
142
|
}
|
161
143
|
|
162
144
|
opts.separator ''
|
163
145
|
opts.separator 'Supported encodings for automatic guessing (will be tried in that order):'
|
164
|
-
|
165
|
-
opts.separator
|
146
|
+
CMess::GuessEncoding::Automatic.supported_encodings.join(', ').word_wrap(110, true).each { |list|
|
147
|
+
opts.separator list
|
166
148
|
}
|
167
149
|
|
168
150
|
opts.separator ''
|
169
151
|
opts.separator 'Supported encodings for BOM detection (will be tried in that order):'
|
170
|
-
|
171
|
-
opts.separator
|
152
|
+
CMess::GuessEncoding::Automatic.supported_boms.join(', ').word_wrap(110, true).each { |list|
|
153
|
+
opts.separator list
|
172
154
|
}
|
173
155
|
|
174
156
|
opts.separator ''
|
175
157
|
opts.separator 'Default encodings for manual guessing:'
|
176
|
-
|
177
|
-
opts.separator
|
158
|
+
CMess::GuessEncoding::Manual::ENCODINGS.join(', ').word_wrap(110, true).each { |list|
|
159
|
+
opts.separator list
|
178
160
|
}
|
179
161
|
|
180
162
|
opts.separator ''
|
181
163
|
opts.separator 'Likely candidates for additional testing:'
|
182
|
-
|
183
|
-
opts.separator
|
164
|
+
CMess::GuessEncoding::Manual::CANDIDATES.join(', ').word_wrap(110, true).each { |list|
|
165
|
+
opts.separator list
|
184
166
|
}
|
185
167
|
|
186
168
|
opts.separator ''
|
187
|
-
opts.separator
|
169
|
+
opts.separator "NOTE: To select all encodings available on your system (see '-L'), specify __ALL__."
|
188
170
|
opts.separator ' To select the likely candidates named above, specify __COMMON__.'
|
189
171
|
|
190
172
|
opts.separator ''
|
191
|
-
opts.separator
|
192
|
-
}
|
173
|
+
opts.separator 'When FILE is -, STDIN is used.'
|
174
|
+
}
|
193
175
|
|
194
176
|
cli do
|
195
177
|
trailing_args_as_input(options)
|
@@ -198,27 +180,17 @@ cli do
|
|
198
180
|
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
199
181
|
|
200
182
|
if charcodes = options[:charcodes]
|
201
|
-
|
202
|
-
input = charcodes.map { |c| c.to_i(base).chr }.join
|
183
|
+
input = charcodes.map { |charcode| charcode.to_i(options[:base]).chr }.join
|
203
184
|
else
|
204
|
-
# reset line counter
|
205
|
-
|
206
|
-
|
207
|
-
input = options[:input].each { |line|
|
208
|
-
break line if $. == options[:line]
|
209
|
-
}
|
185
|
+
$. = 0 # reset line counter
|
186
|
+
input = options[:input].each { |line| break line if $. == options[:line] }
|
210
187
|
|
211
188
|
abort "Input was empty!" if $..zero?
|
212
189
|
abort "Line not found -- input has only #{$.} line#{'s' if $. != 1}" unless input.is_a?(String)
|
213
190
|
end
|
214
191
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
options[:encodings],
|
219
|
-
options[:additional_encodings]
|
220
|
-
)
|
221
|
-
else # automatic
|
222
|
-
puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
|
192
|
+
CMess::GuessEncoding.manual(options.merge(:input => input))
|
193
|
+
else
|
194
|
+
puts CMess::GuessEncoding.automatic(options[:input], options[:chunk_size], options[:ignore_bom])
|
223
195
|
end
|
224
196
|
end
|