cmess 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -2
- data/Rakefile +11 -10
- data/bin/bconv +1 -1
- data/bin/cinderella +1 -1
- data/bin/decode_entities +1 -1
- data/bin/guess_encoding +5 -4
- data/data/chartab.yaml +10 -10
- data/lib/cmess.rb +1 -1
- data/lib/cmess/bconv.rb +7 -5
- data/lib/cmess/cinderella.rb +14 -9
- data/lib/cmess/cli.rb +5 -3
- data/lib/cmess/decode_entities.rb +6 -5
- data/lib/cmess/guess_encoding.rb +5 -5
- data/lib/cmess/guess_encoding/automatic.rb +8 -4
- data/lib/cmess/guess_encoding/encoding.rb +7 -3
- data/lib/cmess/guess_encoding/manual.rb +22 -8
- data/lib/cmess/version.rb +20 -44
- metadata +62 -49
data/README
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to cmess version 0.2.
|
5
|
+
This documentation refers to cmess version 0.2.3
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
@@ -48,7 +48,7 @@ Rubyforge project:: <http://rubyforge.org/projects/prometheus>
|
|
48
48
|
== LICENSE AND COPYRIGHT
|
49
49
|
|
50
50
|
Copyright (C) 2007-2009 University of Cologne,
|
51
|
-
Albertus-Magnus-Platz,
|
51
|
+
Albertus-Magnus-Platz, 50923 Cologne, Germany
|
52
52
|
|
53
53
|
cmess is free software: you can redistribute it and/or modify it under the
|
54
54
|
terms of the GNU General Public License as published by the Free Software
|
data/Rakefile
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
|
2
|
-
require 'cmess'
|
1
|
+
require %q{lib/cmess/version}
|
3
2
|
|
4
3
|
begin
|
5
4
|
require 'hen'
|
6
5
|
|
7
6
|
Hen.lay! {{
|
8
7
|
:rubyforge => {
|
9
|
-
:
|
8
|
+
:project => %q{prometheus},
|
9
|
+
:package => %q{cmess}
|
10
10
|
},
|
11
11
|
|
12
12
|
:gem => {
|
@@ -26,11 +26,12 @@ end
|
|
26
26
|
|
27
27
|
namespace :guess_encoding do
|
28
28
|
|
29
|
-
require 'cmess/guess_encoding'
|
30
|
-
include CMess::GuessEncoding::Encoding
|
31
|
-
|
32
29
|
desc "Compare actual encoding and automatic guess of example files"
|
33
30
|
task :check_examples do
|
31
|
+
require 'lib/cmess/guess_encoding'
|
32
|
+
|
33
|
+
E = CMess::GuessEncoding::Encoding
|
34
|
+
|
34
35
|
Dir[File.join(File.dirname(__FILE__), 'example', 'guess_encoding', '??.*.txt')].sort.each { |example|
|
35
36
|
language, encoding = File.basename(example, '.txt').split('.')
|
36
37
|
encoding.upcase!
|
@@ -38,10 +39,10 @@ namespace :guess_encoding do
|
|
38
39
|
guessed = CMess::GuessEncoding::Automatic.guess(File.open(example))
|
39
40
|
|
40
41
|
match = case guessed
|
41
|
-
when UNKNOWN:
|
42
|
-
when ASCII:
|
43
|
-
when encoding:
|
44
|
-
else
|
42
|
+
when E::UNKNOWN: '?'
|
43
|
+
when E::ASCII: '#'
|
44
|
+
when encoding: '+'
|
45
|
+
else '-'
|
45
46
|
end
|
46
47
|
|
47
48
|
puts '%s %s/%-11s => %s' % [match, language, encoding, guessed]
|
data/bin/bconv
CHANGED
data/bin/cinderella
CHANGED
data/bin/decode_entities
CHANGED
data/bin/guess_encoding
CHANGED
@@ -6,9 +6,9 @@
|
|
6
6
|
# guess_encoding -- Assist with guessing the encoding of some input at hand #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
|
-
#
|
11
|
+
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
13
|
# Authors: #
|
14
14
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -123,7 +123,7 @@ OptionParser.new(nil, 40) { |opts|
|
|
123
123
|
|
124
124
|
opts.separator ''
|
125
125
|
|
126
|
-
opts.on('--list-encodings', 'Print a list of all available encodings on your system and exit') {
|
126
|
+
opts.on('-L', '--list-encodings', 'Print a list of all available encodings on your system and exit') {
|
127
127
|
puts CGE::Encoding.all_encodings
|
128
128
|
exit
|
129
129
|
}
|
@@ -184,7 +184,8 @@ OptionParser.new(nil, 40) { |opts|
|
|
184
184
|
}
|
185
185
|
|
186
186
|
opts.separator ''
|
187
|
-
opts.separator 'NOTE: To select all encodings available on your system, specify __ALL__.'
|
187
|
+
opts.separator 'NOTE: To select all encodings available on your system (see \'-L\'), specify __ALL__.'
|
188
|
+
opts.separator ' To select the likely candidates named above, specify __COMMON__.'
|
188
189
|
|
189
190
|
opts.separator ''
|
190
191
|
opts.separator "When FILE is -, STDIN is used."
|
data/data/chartab.yaml
CHANGED
@@ -199,8 +199,8 @@
|
|
199
199
|
"000A":
|
200
200
|
__name__: <control>
|
201
201
|
__table__: Basic Latin
|
202
|
-
ALLEGRO_OSTWEST: []
|
203
|
-
ALLEGRO_WINDOWS: []
|
202
|
+
ALLEGRO_OSTWEST: [10] # XXX
|
203
|
+
ALLEGRO_WINDOWS: [10] # XXX
|
204
204
|
CP1250: [10]
|
205
205
|
CP1252: [10]
|
206
206
|
CP1254: [10]
|
@@ -211,10 +211,10 @@
|
|
211
211
|
ISO_8859_15: [10]
|
212
212
|
ISO_8859_2: [10]
|
213
213
|
ISO_8859_9: [10]
|
214
|
-
MAB2: []
|
215
|
-
MAB_DISKETTE: []
|
214
|
+
MAB2: [10] # XXX
|
215
|
+
MAB_DISKETTE: [10] # XXX
|
216
216
|
PICA: [10]
|
217
|
-
USMARC_ANSEL: []
|
217
|
+
USMARC_ANSEL: [10] # XXX
|
218
218
|
"000B":
|
219
219
|
__name__: <control>
|
220
220
|
__table__: Basic Latin
|
@@ -256,8 +256,8 @@
|
|
256
256
|
"000D":
|
257
257
|
__name__: <control>
|
258
258
|
__table__: Basic Latin
|
259
|
-
ALLEGRO_OSTWEST: []
|
260
|
-
ALLEGRO_WINDOWS: []
|
259
|
+
ALLEGRO_OSTWEST: [13] # XXX
|
260
|
+
ALLEGRO_WINDOWS: [13] # XXX
|
261
261
|
CP1250: [13]
|
262
262
|
CP1252: [13]
|
263
263
|
CP1254: [13]
|
@@ -268,10 +268,10 @@
|
|
268
268
|
ISO_8859_15: [13]
|
269
269
|
ISO_8859_2: [13]
|
270
270
|
ISO_8859_9: [13]
|
271
|
-
MAB2: []
|
272
|
-
MAB_DISKETTE: []
|
271
|
+
MAB2: [13] # XXX
|
272
|
+
MAB_DISKETTE: [13] # XXX
|
273
273
|
PICA: [13]
|
274
|
-
USMARC_ANSEL: []
|
274
|
+
USMARC_ANSEL: [13] # XXX
|
275
275
|
"000E":
|
276
276
|
__name__: <control>
|
277
277
|
__table__: Basic Latin
|
data/lib/cmess.rb
CHANGED
data/lib/cmess/bconv.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2008 University of Cologne,
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Copyright (C) 2008-2010 University of Cologne, #
|
7
|
+
# Albertus-Magnus-Platz, #
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -32,14 +32,15 @@ require 'cmess'
|
|
32
32
|
|
33
33
|
# Convert between bibliographic (and other) encodings.
|
34
34
|
|
35
|
-
|
35
|
+
module CMess
|
36
|
+
class BConv
|
36
37
|
|
37
38
|
# our version ;-)
|
38
39
|
VERSION = '0.0.2'
|
39
40
|
|
40
41
|
INTERMEDIATE_ENCODING = 'utf-8'
|
41
42
|
|
42
|
-
DEFAULT_CHARTAB_FILE = File.join(
|
43
|
+
DEFAULT_CHARTAB_FILE = File.join(DATA_DIR, 'chartab.yaml')
|
43
44
|
|
44
45
|
class << self
|
45
46
|
|
@@ -166,4 +167,5 @@ class CMess::BConv
|
|
166
167
|
map
|
167
168
|
end
|
168
169
|
|
170
|
+
end
|
169
171
|
end
|
data/lib/cmess/cinderella.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007 University of Cologne,
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
|
+
# Albertus-Magnus-Platz, #
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -36,20 +36,24 @@ require 'cmess'
|
|
36
36
|
# containing those doubly encoded characters; if asked to repair doubly
|
37
37
|
# encoded characters, substitutes them with their original character.
|
38
38
|
|
39
|
-
module CMess
|
39
|
+
module CMess
|
40
|
+
module Cinderella
|
40
41
|
|
41
42
|
extend self
|
42
43
|
|
43
44
|
# our version ;-)
|
44
|
-
VERSION = '0.0.
|
45
|
+
VERSION = '0.0.4'
|
45
46
|
|
46
|
-
DEFAULT_CSETS_DIR = File.join(
|
47
|
+
DEFAULT_CSETS_DIR = File.join(DATA_DIR, 'csets')
|
47
48
|
|
48
49
|
def pick(input, pot, crop, source_encoding, target_encoding, chars, repair = false)
|
49
|
-
iconv = Iconv.new(target_encoding, source_encoding)
|
50
|
+
iconv, encoded = Iconv.new(target_encoding, source_encoding), {}
|
50
51
|
|
51
|
-
|
52
|
-
|
52
|
+
chars.each { |char|
|
53
|
+
begin
|
54
|
+
encoded[iconv.iconv(char)] = char
|
55
|
+
rescue Iconv::IllegalSequence
|
56
|
+
end
|
53
57
|
}
|
54
58
|
|
55
59
|
regexp = Regexp.union(*encoded.keys)
|
@@ -63,4 +67,5 @@ module CMess::Cinderella
|
|
63
67
|
}
|
64
68
|
end
|
65
69
|
|
70
|
+
end
|
66
71
|
end
|
data/lib/cmess/cli.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
|
-
#
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -31,7 +31,8 @@ require 'tempfile'
|
|
31
31
|
require 'rubygems'
|
32
32
|
require 'nuggets/env/user_encoding'
|
33
33
|
|
34
|
-
module CMess
|
34
|
+
module CMess
|
35
|
+
module CLI
|
35
36
|
|
36
37
|
# how to split list of arguments
|
37
38
|
SPLIT_ARG_LIST_RE = /\s*[,\s]\s*/o
|
@@ -117,4 +118,5 @@ it explicitly via the ENCODING environment variable or via the '-t' option.
|
|
117
118
|
end
|
118
119
|
end
|
119
120
|
|
121
|
+
end
|
120
122
|
end
|
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007 University of Cologne,
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
|
+
# Albertus-Magnus-Platz, #
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -27,12 +27,12 @@
|
|
27
27
|
#++
|
28
28
|
|
29
29
|
require 'iconv'
|
30
|
-
require 'cmess'
|
31
30
|
|
32
31
|
require 'rubygems'
|
33
32
|
require 'htmlentities'
|
34
33
|
|
35
|
-
module CMess
|
34
|
+
module CMess
|
35
|
+
module DecodeEntities
|
36
36
|
|
37
37
|
extend self
|
38
38
|
|
@@ -68,4 +68,5 @@ module CMess::DecodeEntities
|
|
68
68
|
}
|
69
69
|
end
|
70
70
|
|
71
|
+
end
|
71
72
|
end
|
data/lib/cmess/guess_encoding.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
|
-
#
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -30,13 +30,12 @@
|
|
30
30
|
###############################################################################
|
31
31
|
#++
|
32
32
|
|
33
|
-
require 'cmess'
|
34
|
-
|
35
33
|
# Allows to guess an input's encoding either manually or automatically.
|
36
34
|
# Works actually pretty good -- for the supported encodings. See Manual
|
37
35
|
# and Automatic for details.
|
38
36
|
|
39
|
-
module CMess
|
37
|
+
module CMess
|
38
|
+
module GuessEncoding
|
40
39
|
|
41
40
|
# our version ;-)
|
42
41
|
VERSION = '0.0.9'
|
@@ -53,6 +52,7 @@ module CMess::GuessEncoding
|
|
53
52
|
|
54
53
|
end
|
55
54
|
|
55
|
+
end
|
56
56
|
end
|
57
57
|
|
58
58
|
%w[encoding manual automatic].each { |lib|
|
@@ -5,9 +5,9 @@
|
|
5
5
|
# #
|
6
6
|
# A component of cmess, the encoding tool-box. #
|
7
7
|
# #
|
8
|
-
# Copyright (C) 2007-
|
8
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
9
9
|
# Albertus-Magnus-Platz, #
|
10
|
-
#
|
10
|
+
# 50923 Cologne, Germany #
|
11
11
|
# #
|
12
12
|
# Authors: #
|
13
13
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -45,14 +45,16 @@ require 'forwardable'
|
|
45
45
|
#
|
46
46
|
# If a BOM is found, it may determine the encoding directly.
|
47
47
|
|
48
|
-
|
48
|
+
module CMess
|
49
|
+
module GuessEncoding
|
50
|
+
class Automatic
|
49
51
|
|
50
52
|
extend Forwardable
|
51
53
|
|
52
54
|
def_delegators self, :encoding_guessers, :supported_encoding?,
|
53
55
|
:bom_guessers, :supported_bom?
|
54
56
|
|
55
|
-
include
|
57
|
+
include Encoding
|
56
58
|
|
57
59
|
# Creates a converter for desired encoding (from UTF-8)
|
58
60
|
ICONV_FOR = Hash.new { |h, k| h[k] = Iconv.new(k, UTF_8) }
|
@@ -353,4 +355,6 @@ class CMess::GuessEncoding::Automatic
|
|
353
355
|
starts_with?(0xfb, 0xee, 0x28)
|
354
356
|
end
|
355
357
|
|
358
|
+
end
|
359
|
+
end
|
356
360
|
end
|
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
|
-
#
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -32,7 +32,9 @@
|
|
32
32
|
|
33
33
|
# Namespace for our encodings.
|
34
34
|
|
35
|
-
module CMess
|
35
|
+
module CMess
|
36
|
+
module GuessEncoding
|
37
|
+
module Encoding
|
36
38
|
|
37
39
|
extend self
|
38
40
|
|
@@ -81,4 +83,6 @@ module CMess::GuessEncoding::Encoding
|
|
81
83
|
base.extend self
|
82
84
|
end
|
83
85
|
|
86
|
+
end
|
87
|
+
end
|
84
88
|
end
|
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
|
-
#
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -32,15 +32,21 @@
|
|
32
32
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
|
+
require 'rubygems'
|
36
|
+
require 'nuggets/array/runiq'
|
37
|
+
require 'nuggets/array/in_order'
|
38
|
+
|
35
39
|
# Outputs given string (or line), being encoded in target encoding, encoded in
|
36
40
|
# various test encodings, thus allowing to identify the (seemingly) correct
|
37
41
|
# encoding by visually comparing the input string with its desired appearance.
|
38
42
|
|
39
|
-
module CMess
|
43
|
+
module CMess
|
44
|
+
module GuessEncoding
|
45
|
+
module Manual
|
40
46
|
|
41
47
|
extend self
|
42
48
|
|
43
|
-
include
|
49
|
+
include Encoding
|
44
50
|
|
45
51
|
# default encodings to try
|
46
52
|
ENCODINGS = [
|
@@ -79,12 +85,18 @@ module CMess::GuessEncoding::Manual
|
|
79
85
|
target = target_encoding
|
80
86
|
|
81
87
|
encodings = (encodings || ENCODINGS) + additional_encodings
|
82
|
-
|
83
|
-
|
84
|
-
|
88
|
+
|
89
|
+
if encodings.include?('__ALL__')
|
90
|
+
encodings.replace(all_encodings.dup)
|
91
|
+
elsif encodings.delete('__COMMON__')
|
92
|
+
encodings.concat(CANDIDATES)
|
93
|
+
end
|
94
|
+
|
95
|
+
# uniq with additional encodings staying at the end
|
96
|
+
encodings.runiq!
|
85
97
|
|
86
98
|
# move target encoding to front
|
87
|
-
encodings
|
99
|
+
encodings.in_order!(target)
|
88
100
|
|
89
101
|
max_length = encodings.map { |encoding| encoding.length }.max
|
90
102
|
|
@@ -105,4 +117,6 @@ module CMess::GuessEncoding::Manual
|
|
105
117
|
}
|
106
118
|
end
|
107
119
|
|
120
|
+
end
|
121
|
+
end
|
108
122
|
end
|
data/lib/cmess/version.rb
CHANGED
@@ -1,51 +1,27 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
#
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
# version.
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
|
21
|
-
# details. #
|
22
|
-
# #
|
23
|
-
# You should have received a copy of the GNU General Public License along #
|
24
|
-
# with cmess. If not, see <http://www.gnu.org/licenses/>. #
|
25
|
-
# #
|
26
|
-
###############################################################################
|
27
|
-
#++
|
28
|
-
|
29
|
-
module CMess::Version
|
30
|
-
|
31
|
-
MAJOR = 0
|
32
|
-
MINOR = 2
|
33
|
-
TINY = 2
|
34
|
-
|
35
|
-
class << self
|
36
|
-
|
37
|
-
# Returns array representation.
|
38
|
-
def to_a
|
39
|
-
[MAJOR, MINOR, TINY]
|
40
|
-
end
|
1
|
+
module CMess
|
2
|
+
|
3
|
+
module Version
|
4
|
+
|
5
|
+
MAJOR = 0
|
6
|
+
MINOR = 2
|
7
|
+
TINY = 3
|
8
|
+
|
9
|
+
class << self
|
10
|
+
|
11
|
+
# Returns array representation.
|
12
|
+
def to_a
|
13
|
+
[MAJOR, MINOR, TINY]
|
14
|
+
end
|
15
|
+
|
16
|
+
# Short-cut for version string.
|
17
|
+
def to_s
|
18
|
+
to_a.join('.')
|
19
|
+
end
|
41
20
|
|
42
|
-
# Short-cut for version string.
|
43
|
-
def to_s
|
44
|
-
to_a.join('.')
|
45
21
|
end
|
46
22
|
|
47
23
|
end
|
48
24
|
|
49
|
-
|
25
|
+
VERSION = Version.to_s
|
50
26
|
|
51
27
|
end
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 2
|
8
|
+
- 3
|
9
|
+
version: 0.2.3
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Jens Wille
|
@@ -9,36 +14,42 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date:
|
17
|
+
date: 2010-04-23 00:00:00 +02:00
|
13
18
|
default_executable:
|
14
19
|
dependencies:
|
15
20
|
- !ruby/object:Gem::Dependency
|
16
21
|
name: ruby-nuggets
|
17
|
-
|
18
|
-
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
24
|
requirements:
|
21
25
|
- - ">="
|
22
26
|
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
- 3
|
30
|
+
- 3
|
23
31
|
version: 0.3.3
|
24
|
-
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
25
34
|
- !ruby/object:Gem::Dependency
|
26
35
|
name: htmlentities
|
27
|
-
|
28
|
-
|
29
|
-
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
38
|
requirements:
|
31
39
|
- - ">="
|
32
40
|
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
33
43
|
version: "0"
|
34
|
-
|
44
|
+
type: :runtime
|
45
|
+
version_requirements: *id002
|
35
46
|
description: "\n Assist with handling messed up encodings (Currently includes the\n following tools: bconv, cinderella, decode_entities, guess_encoding)\n "
|
36
47
|
email: jens.wille@uni-koeln.de
|
37
48
|
executables:
|
38
|
-
-
|
49
|
+
- bconv
|
39
50
|
- decode_entities
|
40
51
|
- guess_encoding
|
41
|
-
-
|
52
|
+
- cinderella
|
42
53
|
extensions: []
|
43
54
|
|
44
55
|
extra_rdoc_files:
|
@@ -46,87 +57,89 @@ extra_rdoc_files:
|
|
46
57
|
- ChangeLog
|
47
58
|
- README
|
48
59
|
files:
|
49
|
-
- lib/cmess
|
50
|
-
- lib/cmess/guess_encoding/encoding.rb
|
60
|
+
- lib/cmess.rb
|
51
61
|
- lib/cmess/guess_encoding/automatic.rb
|
62
|
+
- lib/cmess/guess_encoding/encoding.rb
|
52
63
|
- lib/cmess/guess_encoding/manual.rb
|
53
|
-
- lib/cmess/
|
54
|
-
- lib/cmess/version.rb
|
64
|
+
- lib/cmess/bconv.rb
|
55
65
|
- lib/cmess/cli.rb
|
56
66
|
- lib/cmess/cinderella.rb
|
57
|
-
- lib/cmess/
|
58
|
-
- lib/cmess.rb
|
59
|
-
-
|
67
|
+
- lib/cmess/guess_encoding.rb
|
68
|
+
- lib/cmess/decode_entities.rb
|
69
|
+
- lib/cmess/version.rb
|
70
|
+
- bin/bconv
|
60
71
|
- bin/decode_entities
|
61
72
|
- bin/guess_encoding
|
62
|
-
- bin/
|
63
|
-
- COPYING
|
64
|
-
- Rakefile
|
73
|
+
- bin/cinderella
|
65
74
|
- README
|
66
75
|
- ChangeLog
|
67
|
-
-
|
68
|
-
-
|
69
|
-
- example/cinderella/crop
|
70
|
-
- example/cinderella/crop_repaired
|
71
|
-
- example/cinderella/pot
|
76
|
+
- Rakefile
|
77
|
+
- COPYING
|
72
78
|
- example/guess_encoding/en.utf-8.txt
|
73
|
-
- example/guess_encoding/check_results
|
74
79
|
- example/guess_encoding/de.utf-8.txt
|
75
80
|
- example/guess_encoding/it.utf-8.txt
|
81
|
+
- example/guess_encoding/check_results
|
76
82
|
- example/guess_encoding/fr.utf-8.txt
|
77
|
-
-
|
78
|
-
-
|
79
|
-
-
|
83
|
+
- example/cinderella/empty6-slash_repaired.txt
|
84
|
+
- example/cinderella/empty6-slash.txt
|
85
|
+
- example/cinderella/crop
|
86
|
+
- example/cinderella/pot
|
87
|
+
- example/cinderella/crop_repaired
|
80
88
|
- data/csets/latin1.yaml
|
81
|
-
- data/csets/
|
82
|
-
- data/csets/
|
83
|
-
- data/csets/unicode/
|
84
|
-
- data/csets/unicode/cyrillic-supplement.yaml
|
85
|
-
- data/csets/unicode/latin_extended_additional.yaml
|
86
|
-
- data/csets/unicode/ipa_extensions.yaml
|
87
|
-
- data/csets/unicode/latin-extended-d.yaml
|
89
|
+
- data/csets/iso_8859-15.yaml
|
90
|
+
- data/csets/iso_8859-1.yaml
|
91
|
+
- data/csets/unicode/latin_extended_a.yaml
|
88
92
|
- data/csets/unicode/basic_latin.yaml
|
93
|
+
- data/csets/unicode/ipa_extensions.yaml
|
89
94
|
- data/csets/unicode/latin_extended_b.yaml
|
90
|
-
- data/csets/unicode/
|
95
|
+
- data/csets/unicode/latin-extended-d.yaml
|
96
|
+
- data/csets/unicode/letterlike_symbols.yaml
|
97
|
+
- data/csets/unicode/latin_extended_additional.yaml
|
98
|
+
- data/csets/unicode/greek.yaml
|
91
99
|
- data/csets/unicode/latin-extended-c.yaml
|
92
100
|
- data/csets/unicode/spacing_modifier_letters.yaml
|
101
|
+
- data/csets/unicode/cyrillic-supplement.yaml
|
93
102
|
- data/csets/unicode/cyrillic.yaml
|
94
|
-
- data/csets/unicode/
|
95
|
-
- data/csets/iso_8859-1.yaml
|
103
|
+
- data/csets/unicode/latin_1_supplement.yaml
|
96
104
|
- data/csets/utf-8.yaml
|
105
|
+
- data/csets/utf8.yaml
|
106
|
+
- data/test_chars.yaml
|
107
|
+
- data/chartab.yaml
|
97
108
|
has_rdoc: true
|
98
109
|
homepage: http://prometheus.rubyforge.org/cmess
|
99
110
|
licenses: []
|
100
111
|
|
101
112
|
post_install_message:
|
102
113
|
rdoc_options:
|
103
|
-
- --
|
104
|
-
- cmess Application documentation
|
114
|
+
- --line-numbers
|
105
115
|
- --inline-source
|
106
|
-
- --charset
|
107
|
-
- UTF-8
|
108
116
|
- --main
|
109
117
|
- README
|
118
|
+
- --charset
|
119
|
+
- UTF-8
|
120
|
+
- --title
|
121
|
+
- cmess Application documentation
|
110
122
|
- --all
|
111
|
-
- --line-numbers
|
112
123
|
require_paths:
|
113
124
|
- lib
|
114
125
|
required_ruby_version: !ruby/object:Gem::Requirement
|
115
126
|
requirements:
|
116
127
|
- - ">="
|
117
128
|
- !ruby/object:Gem::Version
|
129
|
+
segments:
|
130
|
+
- 0
|
118
131
|
version: "0"
|
119
|
-
version:
|
120
132
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
121
133
|
requirements:
|
122
134
|
- - ">="
|
123
135
|
- !ruby/object:Gem::Version
|
136
|
+
segments:
|
137
|
+
- 0
|
124
138
|
version: "0"
|
125
|
-
version:
|
126
139
|
requirements: []
|
127
140
|
|
128
141
|
rubyforge_project: prometheus
|
129
|
-
rubygems_version: 1.3.
|
142
|
+
rubygems_version: 1.3.6
|
130
143
|
signing_key:
|
131
144
|
specification_version: 3
|
132
145
|
summary: "Assist with handling messed up encodings (Currently includes the following tools: bconv, cinderella, decode_entities, guess_encoding)"
|