cmess 0.0.5.186 → 0.0.6.192
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +5 -0
- data/README +2 -2
- data/bin/guess_encoding +44 -10
- data/lib/cmess/guess_encoding.rb +4 -4
- data/lib/cmess/version.rb +4 -4
- metadata +2 -2
data/ChangeLog
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
= Revision history for cmess
|
2
2
|
|
3
|
+
== 0.0.6 [2008-01-30]
|
4
|
+
|
5
|
+
* Added ability to specify charcodes as input for manual guessing
|
6
|
+
* Improved automatic guessing and further enhancements
|
7
|
+
|
3
8
|
== 0.0.5 [2008-01-21]
|
4
9
|
|
5
10
|
* Made automatic guessing the default for guess_encoding
|
data/README
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to cmess version 0.0.
|
5
|
+
This documentation refers to cmess version 0.0.6
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
@@ -37,7 +37,7 @@ TODO: well, more of the description... ;-)
|
|
37
37
|
|
38
38
|
== LICENSE AND COPYRIGHT
|
39
39
|
|
40
|
-
Copyright (C) 2007 University of Cologne,
|
40
|
+
Copyright (C) 2007-2008 University of Cologne,
|
41
41
|
Albertus-Magnus-Platz, 50932 Cologne, Germany
|
42
42
|
|
43
43
|
cmess is free software: you can redistribute it and/or modify it under the
|
data/bin/guess_encoding
CHANGED
@@ -6,9 +6,9 @@
|
|
6
6
|
# guess_encoding -- Assist with guessing the encoding of some input at hand #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C) 2007 University of Cologne,
|
10
|
-
#
|
11
|
-
#
|
9
|
+
# Copyright (C) 2007-2008 University of Cologne, #
|
10
|
+
# Albertus-Magnus-Platz, #
|
11
|
+
# 50932 Cologne, Germany #
|
12
12
|
# #
|
13
13
|
# Authors: #
|
14
14
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -47,8 +47,8 @@ PROGNAME = File.basename($0)
|
|
47
47
|
# short-cut
|
48
48
|
CGE = CMess::GuessEncoding
|
49
49
|
|
50
|
-
# how to split list of
|
51
|
-
|
50
|
+
# how to split list of arguments
|
51
|
+
SPLIT_ARG_LIST_RE = /\s*[,\s]\s*/o
|
52
52
|
|
53
53
|
options = {
|
54
54
|
:input => STDIN,
|
@@ -58,7 +58,10 @@ options = {
|
|
58
58
|
:target_encoding => determine_system_encoding,
|
59
59
|
:manual => false,
|
60
60
|
:chunk_size => nil,
|
61
|
-
:ignore_bom => false
|
61
|
+
:ignore_bom => false,
|
62
|
+
:charcodes => nil,
|
63
|
+
:decimal => false,
|
64
|
+
:octal => false
|
62
65
|
}
|
63
66
|
|
64
67
|
OptionParser.new(nil, 40) { |opts|
|
@@ -94,6 +97,8 @@ OptionParser.new(nil, 40) { |opts|
|
|
94
97
|
options[:manual] = true
|
95
98
|
}
|
96
99
|
|
100
|
+
opts.separator ''
|
101
|
+
|
97
102
|
opts.on('-l', '--line LINE', "Line number of input file to use for testing [Default: #{options[:line]}]") { |l|
|
98
103
|
options[:line] = l.to_i
|
99
104
|
|
@@ -111,7 +116,7 @@ OptionParser.new(nil, 40) { |opts|
|
|
111
116
|
}
|
112
117
|
|
113
118
|
opts.on('-a', '--additional-encodings ENCODINGS...', "List of encodings to try >in addition to< default (see below)") { |e|
|
114
|
-
options[:additional_encodings] += e.split(
|
119
|
+
options[:additional_encodings] += e.split(SPLIT_ARG_LIST_RE)
|
115
120
|
}
|
116
121
|
|
117
122
|
opts.separator ''
|
@@ -120,6 +125,24 @@ OptionParser.new(nil, 40) { |opts|
|
|
120
125
|
options[:target_encoding] = e
|
121
126
|
}
|
122
127
|
|
128
|
+
opts.separator ''
|
129
|
+
opts.separator ' * Charcodes'
|
130
|
+
opts.separator ''
|
131
|
+
|
132
|
+
opts.on('-C', '--charcodes CHARCODES', "Specify a list of character codes (in hexadecimal by default)", "for manual guessing. (Options '-e', '-a', and '-t' apply here", "as well; see under \"Manual guessing\" for details.)") { |c|
|
133
|
+
options[:charcodes] = c.split(SPLIT_ARG_LIST_RE)
|
134
|
+
}
|
135
|
+
|
136
|
+
opts.separator ''
|
137
|
+
|
138
|
+
opts.on('-D', '--decimal', "Charcodes are in decimal") {
|
139
|
+
options[:decimal] = true
|
140
|
+
}
|
141
|
+
|
142
|
+
opts.on('-O', '--octal', "Charcodes are in octal") {
|
143
|
+
options[:octal] = true
|
144
|
+
}
|
145
|
+
|
123
146
|
opts.separator ''
|
124
147
|
opts.separator 'Generic options:'
|
125
148
|
|
@@ -165,9 +188,7 @@ if ARGV[0] && !options[:input_set]
|
|
165
188
|
options[:input] = open_file_or_std(ARGV[0])
|
166
189
|
end
|
167
190
|
|
168
|
-
|
169
|
-
puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
|
170
|
-
else
|
191
|
+
if options[:manual]
|
171
192
|
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
172
193
|
|
173
194
|
# reset line counter
|
@@ -185,4 +206,17 @@ else
|
|
185
206
|
options[:encodings],
|
186
207
|
options[:additional_encodings]
|
187
208
|
)
|
209
|
+
elsif charcodes = options[:charcodes]
|
210
|
+
options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
|
211
|
+
|
212
|
+
base = options[:octal] ? 8 : options[:decimal] ? 10 : 16
|
213
|
+
|
214
|
+
CGE::Manual.display(
|
215
|
+
charcodes.map { |c| c.to_i(base).chr }.join,
|
216
|
+
options[:target_encoding],
|
217
|
+
options[:encodings],
|
218
|
+
options[:additional_encodings]
|
219
|
+
)
|
220
|
+
else # automatic
|
221
|
+
puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
|
188
222
|
end
|
data/lib/cmess/guess_encoding.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007 University of Cologne,
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Copyright (C) 2007-2008 University of Cologne, #
|
7
|
+
# Albertus-Magnus-Platz, #
|
8
|
+
# 50932 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -130,7 +130,7 @@ module CMess::GuessEncoding
|
|
130
130
|
encodings.each { |encoding|
|
131
131
|
converted = begin
|
132
132
|
Iconv.conv(target, encoding, input)
|
133
|
-
rescue Iconv::IllegalSequence => err
|
133
|
+
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => err
|
134
134
|
"ILLEGAL INPUT SEQUENCE: #{err}"
|
135
135
|
rescue Iconv::InvalidEncoding
|
136
136
|
if encoding == target
|
data/lib/cmess/version.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007 University of Cologne,
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Copyright (C) 2007-2008 University of Cologne, #
|
7
|
+
# Albertus-Magnus-Platz, #
|
8
|
+
# 50932 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -30,7 +30,7 @@ module CMess::Version
|
|
30
30
|
|
31
31
|
MAJOR = 0
|
32
32
|
MINOR = 0
|
33
|
-
TINY =
|
33
|
+
TINY = 6
|
34
34
|
|
35
35
|
class << self
|
36
36
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6.192
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-01-
|
12
|
+
date: 2008-01-30 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|