cmess 0.0.5.186 → 0.0.6.192

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog CHANGED
@@ -1,5 +1,10 @@
1
1
  = Revision history for cmess
2
2
 
3
+ == 0.0.6 [2008-01-30]
4
+
5
+ * Added ability to specify charcodes as input for manual guessing
6
+ * Improved automatic guessing and further enhancements
7
+
3
8
  == 0.0.5 [2008-01-21]
4
9
 
5
10
  * Made automatic guessing the default for guess_encoding
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to cmess version 0.0.5
5
+ This documentation refers to cmess version 0.0.6
6
6
 
7
7
 
8
8
  == DESCRIPTION
@@ -37,7 +37,7 @@ TODO: well, more of the description... ;-)
37
37
 
38
38
  == LICENSE AND COPYRIGHT
39
39
 
40
- Copyright (C) 2007 University of Cologne,
40
+ Copyright (C) 2007-2008 University of Cologne,
41
41
  Albertus-Magnus-Platz, 50932 Cologne, Germany
42
42
 
43
43
  cmess is free software: you can redistribute it and/or modify it under the
data/bin/guess_encoding CHANGED
@@ -6,9 +6,9 @@
6
6
  # guess_encoding -- Assist with guessing the encoding of some input at hand #
7
7
  # [A component of cmess, the encoding tool-box] #
8
8
  # #
9
- # Copyright (C) 2007 University of Cologne, #
10
- # Albertus-Magnus-Platz, #
11
- # 50932 Cologne, Germany #
9
+ # Copyright (C) 2007-2008 University of Cologne, #
10
+ # Albertus-Magnus-Platz, #
11
+ # 50932 Cologne, Germany #
12
12
  # #
13
13
  # Authors: #
14
14
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -47,8 +47,8 @@ PROGNAME = File.basename($0)
47
47
  # short-cut
48
48
  CGE = CMess::GuessEncoding
49
49
 
50
- # how to split list of encodings
51
- SPLIT_ENCODING_LIST_RE = /\s*[,\s]\s*/o
50
+ # how to split list of arguments
51
+ SPLIT_ARG_LIST_RE = /\s*[,\s]\s*/o
52
52
 
53
53
  options = {
54
54
  :input => STDIN,
@@ -58,7 +58,10 @@ options = {
58
58
  :target_encoding => determine_system_encoding,
59
59
  :manual => false,
60
60
  :chunk_size => nil,
61
- :ignore_bom => false
61
+ :ignore_bom => false,
62
+ :charcodes => nil,
63
+ :decimal => false,
64
+ :octal => false
62
65
  }
63
66
 
64
67
  OptionParser.new(nil, 40) { |opts|
@@ -94,6 +97,8 @@ OptionParser.new(nil, 40) { |opts|
94
97
  options[:manual] = true
95
98
  }
96
99
 
100
+ opts.separator ''
101
+
97
102
  opts.on('-l', '--line LINE', "Line number of input file to use for testing [Default: #{options[:line]}]") { |l|
98
103
  options[:line] = l.to_i
99
104
 
@@ -111,7 +116,7 @@ OptionParser.new(nil, 40) { |opts|
111
116
  }
112
117
 
113
118
  opts.on('-a', '--additional-encodings ENCODINGS...', "List of encodings to try >in addition to< default (see below)") { |e|
114
- options[:additional_encodings] += e.split(SPLIT_ENCODING_LIST_RE)
119
+ options[:additional_encodings] += e.split(SPLIT_ARG_LIST_RE)
115
120
  }
116
121
 
117
122
  opts.separator ''
@@ -120,6 +125,24 @@ OptionParser.new(nil, 40) { |opts|
120
125
  options[:target_encoding] = e
121
126
  }
122
127
 
128
+ opts.separator ''
129
+ opts.separator ' * Charcodes'
130
+ opts.separator ''
131
+
132
+ opts.on('-C', '--charcodes CHARCODES', "Specify a list of character codes (in hexadecimal by default)", "for manual guessing. (Options '-e', '-a', and '-t' apply here", "as well; see under \"Manual guessing\" for details.)") { |c|
133
+ options[:charcodes] = c.split(SPLIT_ARG_LIST_RE)
134
+ }
135
+
136
+ opts.separator ''
137
+
138
+ opts.on('-D', '--decimal', "Charcodes are in decimal") {
139
+ options[:decimal] = true
140
+ }
141
+
142
+ opts.on('-O', '--octal', "Charcodes are in octal") {
143
+ options[:octal] = true
144
+ }
145
+
123
146
  opts.separator ''
124
147
  opts.separator 'Generic options:'
125
148
 
@@ -165,9 +188,7 @@ if ARGV[0] && !options[:input_set]
165
188
  options[:input] = open_file_or_std(ARGV[0])
166
189
  end
167
190
 
168
- unless options[:manual]
169
- puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
170
- else
191
+ if options[:manual]
171
192
  options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
172
193
 
173
194
  # reset line counter
@@ -185,4 +206,17 @@ else
185
206
  options[:encodings],
186
207
  options[:additional_encodings]
187
208
  )
209
+ elsif charcodes = options[:charcodes]
210
+ options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
211
+
212
+ base = options[:octal] ? 8 : options[:decimal] ? 10 : 16
213
+
214
+ CGE::Manual.display(
215
+ charcodes.map { |c| c.to_i(base).chr }.join,
216
+ options[:target_encoding],
217
+ options[:encodings],
218
+ options[:additional_encodings]
219
+ )
220
+ else # automatic
221
+ puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
188
222
  end
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # A component of cmess, the encoding tool-box. #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -130,7 +130,7 @@ module CMess::GuessEncoding
130
130
  encodings.each { |encoding|
131
131
  converted = begin
132
132
  Iconv.conv(target, encoding, input)
133
- rescue Iconv::IllegalSequence => err
133
+ rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => err
134
134
  "ILLEGAL INPUT SEQUENCE: #{err}"
135
135
  rescue Iconv::InvalidEncoding
136
136
  if encoding == target
data/lib/cmess/version.rb CHANGED
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # A component of cmess, the encoding tool-box. #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -30,7 +30,7 @@ module CMess::Version
30
30
 
31
31
  MAJOR = 0
32
32
  MINOR = 0
33
- TINY = 5
33
+ TINY = 6
34
34
 
35
35
  class << self
36
36
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmess
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5.186
4
+ version: 0.0.6.192
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-01-24 00:00:00 +01:00
12
+ date: 2008-01-30 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency