cmess 0.0.5.186 → 0.0.6.192
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +5 -0
- data/README +2 -2
- data/bin/guess_encoding +44 -10
- data/lib/cmess/guess_encoding.rb +4 -4
- data/lib/cmess/version.rb +4 -4
- metadata +2 -2
data/ChangeLog
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
= Revision history for cmess
|
2
2
|
|
3
|
+
== 0.0.6 [2008-01-30]
|
4
|
+
|
5
|
+
* Added ability to specify charcodes as input for manual guessing
|
6
|
+
* Improved automatic guessing and further enhancements
|
7
|
+
|
3
8
|
== 0.0.5 [2008-01-21]
|
4
9
|
|
5
10
|
* Made automatic guessing the default for guess_encoding
|
data/README
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to cmess version 0.0.
|
5
|
+
This documentation refers to cmess version 0.0.6
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
@@ -37,7 +37,7 @@ TODO: well, more of the description... ;-)
|
|
37
37
|
|
38
38
|
== LICENSE AND COPYRIGHT
|
39
39
|
|
40
|
-
Copyright (C) 2007 University of Cologne,
|
40
|
+
Copyright (C) 2007-2008 University of Cologne,
|
41
41
|
Albertus-Magnus-Platz, 50932 Cologne, Germany
|
42
42
|
|
43
43
|
cmess is free software: you can redistribute it and/or modify it under the
|
data/bin/guess_encoding
CHANGED
@@ -6,9 +6,9 @@
|
|
6
6
|
# guess_encoding -- Assist with guessing the encoding of some input at hand #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C) 2007 University of Cologne,
|
10
|
-
#
|
11
|
-
#
|
9
|
+
# Copyright (C) 2007-2008 University of Cologne, #
|
10
|
+
# Albertus-Magnus-Platz, #
|
11
|
+
# 50932 Cologne, Germany #
|
12
12
|
# #
|
13
13
|
# Authors: #
|
14
14
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -47,8 +47,8 @@ PROGNAME = File.basename($0)
|
|
47
47
|
# short-cut
|
48
48
|
CGE = CMess::GuessEncoding
|
49
49
|
|
50
|
-
# how to split list of
|
51
|
-
|
50
|
+
# how to split list of arguments
|
51
|
+
SPLIT_ARG_LIST_RE = /\s*[,\s]\s*/o
|
52
52
|
|
53
53
|
options = {
|
54
54
|
:input => STDIN,
|
@@ -58,7 +58,10 @@ options = {
|
|
58
58
|
:target_encoding => determine_system_encoding,
|
59
59
|
:manual => false,
|
60
60
|
:chunk_size => nil,
|
61
|
-
:ignore_bom => false
|
61
|
+
:ignore_bom => false,
|
62
|
+
:charcodes => nil,
|
63
|
+
:decimal => false,
|
64
|
+
:octal => false
|
62
65
|
}
|
63
66
|
|
64
67
|
OptionParser.new(nil, 40) { |opts|
|
@@ -94,6 +97,8 @@ OptionParser.new(nil, 40) { |opts|
|
|
94
97
|
options[:manual] = true
|
95
98
|
}
|
96
99
|
|
100
|
+
opts.separator ''
|
101
|
+
|
97
102
|
opts.on('-l', '--line LINE', "Line number of input file to use for testing [Default: #{options[:line]}]") { |l|
|
98
103
|
options[:line] = l.to_i
|
99
104
|
|
@@ -111,7 +116,7 @@ OptionParser.new(nil, 40) { |opts|
|
|
111
116
|
}
|
112
117
|
|
113
118
|
opts.on('-a', '--additional-encodings ENCODINGS...', "List of encodings to try >in addition to< default (see below)") { |e|
|
114
|
-
options[:additional_encodings] += e.split(
|
119
|
+
options[:additional_encodings] += e.split(SPLIT_ARG_LIST_RE)
|
115
120
|
}
|
116
121
|
|
117
122
|
opts.separator ''
|
@@ -120,6 +125,24 @@ OptionParser.new(nil, 40) { |opts|
|
|
120
125
|
options[:target_encoding] = e
|
121
126
|
}
|
122
127
|
|
128
|
+
opts.separator ''
|
129
|
+
opts.separator ' * Charcodes'
|
130
|
+
opts.separator ''
|
131
|
+
|
132
|
+
opts.on('-C', '--charcodes CHARCODES', "Specify a list of character codes (in hexadecimal by default)", "for manual guessing. (Options '-e', '-a', and '-t' apply here", "as well; see under \"Manual guessing\" for details.)") { |c|
|
133
|
+
options[:charcodes] = c.split(SPLIT_ARG_LIST_RE)
|
134
|
+
}
|
135
|
+
|
136
|
+
opts.separator ''
|
137
|
+
|
138
|
+
opts.on('-D', '--decimal', "Charcodes are in decimal") {
|
139
|
+
options[:decimal] = true
|
140
|
+
}
|
141
|
+
|
142
|
+
opts.on('-O', '--octal', "Charcodes are in octal") {
|
143
|
+
options[:octal] = true
|
144
|
+
}
|
145
|
+
|
123
146
|
opts.separator ''
|
124
147
|
opts.separator 'Generic options:'
|
125
148
|
|
@@ -165,9 +188,7 @@ if ARGV[0] && !options[:input_set]
|
|
165
188
|
options[:input] = open_file_or_std(ARGV[0])
|
166
189
|
end
|
167
190
|
|
168
|
-
|
169
|
-
puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
|
170
|
-
else
|
191
|
+
if options[:manual]
|
171
192
|
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
172
193
|
|
173
194
|
# reset line counter
|
@@ -185,4 +206,17 @@ else
|
|
185
206
|
options[:encodings],
|
186
207
|
options[:additional_encodings]
|
187
208
|
)
|
209
|
+
elsif charcodes = options[:charcodes]
|
210
|
+
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
211
|
+
|
212
|
+
base = options[:octal] ? 8 : options[:decimal] ? 10 : 16
|
213
|
+
|
214
|
+
CGE::Manual.display(
|
215
|
+
charcodes.map { |c| c.to_i(base).chr }.join,
|
216
|
+
options[:target_encoding],
|
217
|
+
options[:encodings],
|
218
|
+
options[:additional_encodings]
|
219
|
+
)
|
220
|
+
else # automatic
|
221
|
+
puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
|
188
222
|
end
|
data/lib/cmess/guess_encoding.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007 University of Cologne,
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Copyright (C) 2007-2008 University of Cologne, #
|
7
|
+
# Albertus-Magnus-Platz, #
|
8
|
+
# 50932 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -130,7 +130,7 @@ module CMess::GuessEncoding
|
|
130
130
|
encodings.each { |encoding|
|
131
131
|
converted = begin
|
132
132
|
Iconv.conv(target, encoding, input)
|
133
|
-
rescue Iconv::IllegalSequence => err
|
133
|
+
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => err
|
134
134
|
"ILLEGAL INPUT SEQUENCE: #{err}"
|
135
135
|
rescue Iconv::InvalidEncoding
|
136
136
|
if encoding == target
|
data/lib/cmess/version.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007 University of Cologne,
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Copyright (C) 2007-2008 University of Cologne, #
|
7
|
+
# Albertus-Magnus-Platz, #
|
8
|
+
# 50932 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -30,7 +30,7 @@ module CMess::Version
|
|
30
30
|
|
31
31
|
MAJOR = 0
|
32
32
|
MINOR = 0
|
33
|
-
TINY =
|
33
|
+
TINY = 6
|
34
34
|
|
35
35
|
class << self
|
36
36
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6.192
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-01-
|
12
|
+
date: 2008-01-30 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|