cmess 0.0.5.186 → 0.0.6.192

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog CHANGED
@@ -1,5 +1,10 @@
1
1
  = Revision history for cmess
2
2
 
3
+ == 0.0.6 [2008-01-30]
4
+
5
+ * Added ability to specify charcodes as input for manual guessing
6
+ * Improved automatic guessing and further enhancements
7
+
3
8
  == 0.0.5 [2008-01-21]
4
9
 
5
10
  * Made automatic guessing the default for guess_encoding
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to cmess version 0.0.5
5
+ This documentation refers to cmess version 0.0.6
6
6
 
7
7
 
8
8
  == DESCRIPTION
@@ -37,7 +37,7 @@ TODO: well, more of the description... ;-)
37
37
 
38
38
  == LICENSE AND COPYRIGHT
39
39
 
40
- Copyright (C) 2007 University of Cologne,
40
+ Copyright (C) 2007-2008 University of Cologne,
41
41
  Albertus-Magnus-Platz, 50932 Cologne, Germany
42
42
 
43
43
  cmess is free software: you can redistribute it and/or modify it under the
data/bin/guess_encoding CHANGED
@@ -6,9 +6,9 @@
6
6
  # guess_encoding -- Assist with guessing the encoding of some input at hand #
7
7
  # [A component of cmess, the encoding tool-box] #
8
8
  # #
9
- # Copyright (C) 2007 University of Cologne, #
10
- # Albertus-Magnus-Platz, #
11
- # 50932 Cologne, Germany #
9
+ # Copyright (C) 2007-2008 University of Cologne, #
10
+ # Albertus-Magnus-Platz, #
11
+ # 50932 Cologne, Germany #
12
12
  # #
13
13
  # Authors: #
14
14
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -47,8 +47,8 @@ PROGNAME = File.basename($0)
47
47
  # short-cut
48
48
  CGE = CMess::GuessEncoding
49
49
 
50
- # how to split list of encodings
51
- SPLIT_ENCODING_LIST_RE = /\s*[,\s]\s*/o
50
+ # how to split list of arguments
51
+ SPLIT_ARG_LIST_RE = /\s*[,\s]\s*/o
52
52
 
53
53
  options = {
54
54
  :input => STDIN,
@@ -58,7 +58,10 @@ options = {
58
58
  :target_encoding => determine_system_encoding,
59
59
  :manual => false,
60
60
  :chunk_size => nil,
61
- :ignore_bom => false
61
+ :ignore_bom => false,
62
+ :charcodes => nil,
63
+ :decimal => false,
64
+ :octal => false
62
65
  }
63
66
 
64
67
  OptionParser.new(nil, 40) { |opts|
@@ -94,6 +97,8 @@ OptionParser.new(nil, 40) { |opts|
94
97
  options[:manual] = true
95
98
  }
96
99
 
100
+ opts.separator ''
101
+
97
102
  opts.on('-l', '--line LINE', "Line number of input file to use for testing [Default: #{options[:line]}]") { |l|
98
103
  options[:line] = l.to_i
99
104
 
@@ -111,7 +116,7 @@ OptionParser.new(nil, 40) { |opts|
111
116
  }
112
117
 
113
118
  opts.on('-a', '--additional-encodings ENCODINGS...', "List of encodings to try >in addition to< default (see below)") { |e|
114
- options[:additional_encodings] += e.split(SPLIT_ENCODING_LIST_RE)
119
+ options[:additional_encodings] += e.split(SPLIT_ARG_LIST_RE)
115
120
  }
116
121
 
117
122
  opts.separator ''
@@ -120,6 +125,24 @@ OptionParser.new(nil, 40) { |opts|
120
125
  options[:target_encoding] = e
121
126
  }
122
127
 
128
+ opts.separator ''
129
+ opts.separator ' * Charcodes'
130
+ opts.separator ''
131
+
132
+ opts.on('-C', '--charcodes CHARCODES', "Specify a list of character codes (in hexadecimal by default)", "for manual guessing. (Options '-e', '-a', and '-t' apply here", "as well; see under \"Manual guessing\" for details.)") { |c|
133
+ options[:charcodes] = c.split(SPLIT_ARG_LIST_RE)
134
+ }
135
+
136
+ opts.separator ''
137
+
138
+ opts.on('-D', '--decimal', "Charcodes are in decimal") {
139
+ options[:decimal] = true
140
+ }
141
+
142
+ opts.on('-O', '--octal', "Charcodes are in octal") {
143
+ options[:octal] = true
144
+ }
145
+
123
146
  opts.separator ''
124
147
  opts.separator 'Generic options:'
125
148
 
@@ -165,9 +188,7 @@ if ARGV[0] && !options[:input_set]
165
188
  options[:input] = open_file_or_std(ARGV[0])
166
189
  end
167
190
 
168
- unless options[:manual]
169
- puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
170
- else
191
+ if options[:manual]
171
192
  options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
172
193
 
173
194
  # reset line counter
@@ -185,4 +206,17 @@ else
185
206
  options[:encodings],
186
207
  options[:additional_encodings]
187
208
  )
209
+ elsif charcodes = options[:charcodes]
210
+ options[:target_encoding].call if options[:target_encoding].respond_to?(:call)
211
+
212
+ base = options[:octal] ? 8 : options[:decimal] ? 10 : 16
213
+
214
+ CGE::Manual.display(
215
+ charcodes.map { |c| c.to_i(base).chr }.join,
216
+ options[:target_encoding],
217
+ options[:encodings],
218
+ options[:additional_encodings]
219
+ )
220
+ else # automatic
221
+ puts CGE::Automatic.guess(options[:input], options[:chunk_size], options[:ignore_bom])
188
222
  end
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # A component of cmess, the encoding tool-box. #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -130,7 +130,7 @@ module CMess::GuessEncoding
130
130
  encodings.each { |encoding|
131
131
  converted = begin
132
132
  Iconv.conv(target, encoding, input)
133
- rescue Iconv::IllegalSequence => err
133
+ rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => err
134
134
  "ILLEGAL INPUT SEQUENCE: #{err}"
135
135
  rescue Iconv::InvalidEncoding
136
136
  if encoding == target
data/lib/cmess/version.rb CHANGED
@@ -3,9 +3,9 @@
3
3
  # #
4
4
  # A component of cmess, the encoding tool-box. #
5
5
  # #
6
- # Copyright (C) 2007 University of Cologne, #
7
- # Albertus-Magnus-Platz, #
8
- # 50932 Cologne, Germany #
6
+ # Copyright (C) 2007-2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
9
  # #
10
10
  # Authors: #
11
11
  # Jens Wille <jens.wille@uni-koeln.de> #
@@ -30,7 +30,7 @@ module CMess::Version
30
30
 
31
31
  MAJOR = 0
32
32
  MINOR = 0
33
- TINY = 5
33
+ TINY = 6
34
34
 
35
35
  class << self
36
36
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmess
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5.186
4
+ version: 0.0.6.192
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-01-24 00:00:00 +01:00
12
+ date: 2008-01-30 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency