cmess 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -2
- data/Rakefile +11 -10
- data/bin/bconv +1 -1
- data/bin/cinderella +1 -1
- data/bin/decode_entities +1 -1
- data/bin/guess_encoding +5 -4
- data/data/chartab.yaml +10 -10
- data/lib/cmess.rb +1 -1
- data/lib/cmess/bconv.rb +7 -5
- data/lib/cmess/cinderella.rb +14 -9
- data/lib/cmess/cli.rb +5 -3
- data/lib/cmess/decode_entities.rb +6 -5
- data/lib/cmess/guess_encoding.rb +5 -5
- data/lib/cmess/guess_encoding/automatic.rb +8 -4
- data/lib/cmess/guess_encoding/encoding.rb +7 -3
- data/lib/cmess/guess_encoding/manual.rb +22 -8
- data/lib/cmess/version.rb +20 -44
- metadata +62 -49
data/README
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to cmess version 0.2.
|
5
|
+
This documentation refers to cmess version 0.2.3
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
@@ -48,7 +48,7 @@ Rubyforge project:: <http://rubyforge.org/projects/prometheus>
|
|
48
48
|
== LICENSE AND COPYRIGHT
|
49
49
|
|
50
50
|
Copyright (C) 2007-2009 University of Cologne,
|
51
|
-
Albertus-Magnus-Platz,
|
51
|
+
Albertus-Magnus-Platz, 50923 Cologne, Germany
|
52
52
|
|
53
53
|
cmess is free software: you can redistribute it and/or modify it under the
|
54
54
|
terms of the GNU General Public License as published by the Free Software
|
data/Rakefile
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
|
2
|
-
require 'cmess'
|
1
|
+
require %q{lib/cmess/version}
|
3
2
|
|
4
3
|
begin
|
5
4
|
require 'hen'
|
6
5
|
|
7
6
|
Hen.lay! {{
|
8
7
|
:rubyforge => {
|
9
|
-
:
|
8
|
+
:project => %q{prometheus},
|
9
|
+
:package => %q{cmess}
|
10
10
|
},
|
11
11
|
|
12
12
|
:gem => {
|
@@ -26,11 +26,12 @@ end
|
|
26
26
|
|
27
27
|
namespace :guess_encoding do
|
28
28
|
|
29
|
-
require 'cmess/guess_encoding'
|
30
|
-
include CMess::GuessEncoding::Encoding
|
31
|
-
|
32
29
|
desc "Compare actual encoding and automatic guess of example files"
|
33
30
|
task :check_examples do
|
31
|
+
require 'lib/cmess/guess_encoding'
|
32
|
+
|
33
|
+
E = CMess::GuessEncoding::Encoding
|
34
|
+
|
34
35
|
Dir[File.join(File.dirname(__FILE__), 'example', 'guess_encoding', '??.*.txt')].sort.each { |example|
|
35
36
|
language, encoding = File.basename(example, '.txt').split('.')
|
36
37
|
encoding.upcase!
|
@@ -38,10 +39,10 @@ namespace :guess_encoding do
|
|
38
39
|
guessed = CMess::GuessEncoding::Automatic.guess(File.open(example))
|
39
40
|
|
40
41
|
match = case guessed
|
41
|
-
when UNKNOWN:
|
42
|
-
when ASCII:
|
43
|
-
when encoding:
|
44
|
-
else
|
42
|
+
when E::UNKNOWN: '?'
|
43
|
+
when E::ASCII: '#'
|
44
|
+
when encoding: '+'
|
45
|
+
else '-'
|
45
46
|
end
|
46
47
|
|
47
48
|
puts '%s %s/%-11s => %s' % [match, language, encoding, guessed]
|
data/bin/bconv
CHANGED
data/bin/cinderella
CHANGED
data/bin/decode_entities
CHANGED
data/bin/guess_encoding
CHANGED
@@ -6,9 +6,9 @@
|
|
6
6
|
# guess_encoding -- Assist with guessing the encoding of some input at hand #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
|
-
#
|
11
|
+
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
13
|
# Authors: #
|
14
14
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -123,7 +123,7 @@ OptionParser.new(nil, 40) { |opts|
|
|
123
123
|
|
124
124
|
opts.separator ''
|
125
125
|
|
126
|
-
opts.on('--list-encodings', 'Print a list of all available encodings on your system and exit') {
|
126
|
+
opts.on('-L', '--list-encodings', 'Print a list of all available encodings on your system and exit') {
|
127
127
|
puts CGE::Encoding.all_encodings
|
128
128
|
exit
|
129
129
|
}
|
@@ -184,7 +184,8 @@ OptionParser.new(nil, 40) { |opts|
|
|
184
184
|
}
|
185
185
|
|
186
186
|
opts.separator ''
|
187
|
-
opts.separator 'NOTE: To select all encodings available on your system, specify __ALL__.'
|
187
|
+
opts.separator 'NOTE: To select all encodings available on your system (see \'-L\'), specify __ALL__.'
|
188
|
+
opts.separator ' To select the likely candidates named above, specify __COMMON__.'
|
188
189
|
|
189
190
|
opts.separator ''
|
190
191
|
opts.separator "When FILE is -, STDIN is used."
|
data/data/chartab.yaml
CHANGED
@@ -199,8 +199,8 @@
|
|
199
199
|
"000A":
|
200
200
|
__name__: <control>
|
201
201
|
__table__: Basic Latin
|
202
|
-
ALLEGRO_OSTWEST: []
|
203
|
-
ALLEGRO_WINDOWS: []
|
202
|
+
ALLEGRO_OSTWEST: [10] # XXX
|
203
|
+
ALLEGRO_WINDOWS: [10] # XXX
|
204
204
|
CP1250: [10]
|
205
205
|
CP1252: [10]
|
206
206
|
CP1254: [10]
|
@@ -211,10 +211,10 @@
|
|
211
211
|
ISO_8859_15: [10]
|
212
212
|
ISO_8859_2: [10]
|
213
213
|
ISO_8859_9: [10]
|
214
|
-
MAB2: []
|
215
|
-
MAB_DISKETTE: []
|
214
|
+
MAB2: [10] # XXX
|
215
|
+
MAB_DISKETTE: [10] # XXX
|
216
216
|
PICA: [10]
|
217
|
-
USMARC_ANSEL: []
|
217
|
+
USMARC_ANSEL: [10] # XXX
|
218
218
|
"000B":
|
219
219
|
__name__: <control>
|
220
220
|
__table__: Basic Latin
|
@@ -256,8 +256,8 @@
|
|
256
256
|
"000D":
|
257
257
|
__name__: <control>
|
258
258
|
__table__: Basic Latin
|
259
|
-
ALLEGRO_OSTWEST: []
|
260
|
-
ALLEGRO_WINDOWS: []
|
259
|
+
ALLEGRO_OSTWEST: [13] # XXX
|
260
|
+
ALLEGRO_WINDOWS: [13] # XXX
|
261
261
|
CP1250: [13]
|
262
262
|
CP1252: [13]
|
263
263
|
CP1254: [13]
|
@@ -268,10 +268,10 @@
|
|
268
268
|
ISO_8859_15: [13]
|
269
269
|
ISO_8859_2: [13]
|
270
270
|
ISO_8859_9: [13]
|
271
|
-
MAB2: []
|
272
|
-
MAB_DISKETTE: []
|
271
|
+
MAB2: [13] # XXX
|
272
|
+
MAB_DISKETTE: [13] # XXX
|
273
273
|
PICA: [13]
|
274
|
-
USMARC_ANSEL: []
|
274
|
+
USMARC_ANSEL: [13] # XXX
|
275
275
|
"000E":
|
276
276
|
__name__: <control>
|
277
277
|
__table__: Basic Latin
|
data/lib/cmess.rb
CHANGED
data/lib/cmess/bconv.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2008 University of Cologne,
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Copyright (C) 2008-2010 University of Cologne, #
|
7
|
+
# Albertus-Magnus-Platz, #
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -32,14 +32,15 @@ require 'cmess'
|
|
32
32
|
|
33
33
|
# Convert between bibliographic (and other) encodings.
|
34
34
|
|
35
|
-
|
35
|
+
module CMess
|
36
|
+
class BConv
|
36
37
|
|
37
38
|
# our version ;-)
|
38
39
|
VERSION = '0.0.2'
|
39
40
|
|
40
41
|
INTERMEDIATE_ENCODING = 'utf-8'
|
41
42
|
|
42
|
-
DEFAULT_CHARTAB_FILE = File.join(
|
43
|
+
DEFAULT_CHARTAB_FILE = File.join(DATA_DIR, 'chartab.yaml')
|
43
44
|
|
44
45
|
class << self
|
45
46
|
|
@@ -166,4 +167,5 @@ class CMess::BConv
|
|
166
167
|
map
|
167
168
|
end
|
168
169
|
|
170
|
+
end
|
169
171
|
end
|
data/lib/cmess/cinderella.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007 University of Cologne,
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
|
+
# Albertus-Magnus-Platz, #
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -36,20 +36,24 @@ require 'cmess'
|
|
36
36
|
# containing those doubly encoded characters; if asked to repair doubly
|
37
37
|
# encoded characters, substitutes them with their original character.
|
38
38
|
|
39
|
-
module CMess
|
39
|
+
module CMess
|
40
|
+
module Cinderella
|
40
41
|
|
41
42
|
extend self
|
42
43
|
|
43
44
|
# our version ;-)
|
44
|
-
VERSION = '0.0.
|
45
|
+
VERSION = '0.0.4'
|
45
46
|
|
46
|
-
DEFAULT_CSETS_DIR = File.join(
|
47
|
+
DEFAULT_CSETS_DIR = File.join(DATA_DIR, 'csets')
|
47
48
|
|
48
49
|
def pick(input, pot, crop, source_encoding, target_encoding, chars, repair = false)
|
49
|
-
iconv = Iconv.new(target_encoding, source_encoding)
|
50
|
+
iconv, encoded = Iconv.new(target_encoding, source_encoding), {}
|
50
51
|
|
51
|
-
|
52
|
-
|
52
|
+
chars.each { |char|
|
53
|
+
begin
|
54
|
+
encoded[iconv.iconv(char)] = char
|
55
|
+
rescue Iconv::IllegalSequence
|
56
|
+
end
|
53
57
|
}
|
54
58
|
|
55
59
|
regexp = Regexp.union(*encoded.keys)
|
@@ -63,4 +67,5 @@ module CMess::Cinderella
|
|
63
67
|
}
|
64
68
|
end
|
65
69
|
|
70
|
+
end
|
66
71
|
end
|
data/lib/cmess/cli.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
|
-
#
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -31,7 +31,8 @@ require 'tempfile'
|
|
31
31
|
require 'rubygems'
|
32
32
|
require 'nuggets/env/user_encoding'
|
33
33
|
|
34
|
-
module CMess
|
34
|
+
module CMess
|
35
|
+
module CLI
|
35
36
|
|
36
37
|
# how to split list of arguments
|
37
38
|
SPLIT_ARG_LIST_RE = /\s*[,\s]\s*/o
|
@@ -117,4 +118,5 @@ it explicitly via the ENCODING environment variable or via the '-t' option.
|
|
117
118
|
end
|
118
119
|
end
|
119
120
|
|
121
|
+
end
|
120
122
|
end
|
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007 University of Cologne,
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
|
+
# Albertus-Magnus-Platz, #
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -27,12 +27,12 @@
|
|
27
27
|
#++
|
28
28
|
|
29
29
|
require 'iconv'
|
30
|
-
require 'cmess'
|
31
30
|
|
32
31
|
require 'rubygems'
|
33
32
|
require 'htmlentities'
|
34
33
|
|
35
|
-
module CMess
|
34
|
+
module CMess
|
35
|
+
module DecodeEntities
|
36
36
|
|
37
37
|
extend self
|
38
38
|
|
@@ -68,4 +68,5 @@ module CMess::DecodeEntities
|
|
68
68
|
}
|
69
69
|
end
|
70
70
|
|
71
|
+
end
|
71
72
|
end
|
data/lib/cmess/guess_encoding.rb
CHANGED
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
|
-
#
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -30,13 +30,12 @@
|
|
30
30
|
###############################################################################
|
31
31
|
#++
|
32
32
|
|
33
|
-
require 'cmess'
|
34
|
-
|
35
33
|
# Allows to guess an input's encoding either manually or automatically.
|
36
34
|
# Works actually pretty good -- for the supported encodings. See Manual
|
37
35
|
# and Automatic for details.
|
38
36
|
|
39
|
-
module CMess
|
37
|
+
module CMess
|
38
|
+
module GuessEncoding
|
40
39
|
|
41
40
|
# our version ;-)
|
42
41
|
VERSION = '0.0.9'
|
@@ -53,6 +52,7 @@ module CMess::GuessEncoding
|
|
53
52
|
|
54
53
|
end
|
55
54
|
|
55
|
+
end
|
56
56
|
end
|
57
57
|
|
58
58
|
%w[encoding manual automatic].each { |lib|
|
@@ -5,9 +5,9 @@
|
|
5
5
|
# #
|
6
6
|
# A component of cmess, the encoding tool-box. #
|
7
7
|
# #
|
8
|
-
# Copyright (C) 2007-
|
8
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
9
9
|
# Albertus-Magnus-Platz, #
|
10
|
-
#
|
10
|
+
# 50923 Cologne, Germany #
|
11
11
|
# #
|
12
12
|
# Authors: #
|
13
13
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -45,14 +45,16 @@ require 'forwardable'
|
|
45
45
|
#
|
46
46
|
# If a BOM is found, it may determine the encoding directly.
|
47
47
|
|
48
|
-
|
48
|
+
module CMess
|
49
|
+
module GuessEncoding
|
50
|
+
class Automatic
|
49
51
|
|
50
52
|
extend Forwardable
|
51
53
|
|
52
54
|
def_delegators self, :encoding_guessers, :supported_encoding?,
|
53
55
|
:bom_guessers, :supported_bom?
|
54
56
|
|
55
|
-
include
|
57
|
+
include Encoding
|
56
58
|
|
57
59
|
# Creates a converter for desired encoding (from UTF-8)
|
58
60
|
ICONV_FOR = Hash.new { |h, k| h[k] = Iconv.new(k, UTF_8) }
|
@@ -353,4 +355,6 @@ class CMess::GuessEncoding::Automatic
|
|
353
355
|
starts_with?(0xfb, 0xee, 0x28)
|
354
356
|
end
|
355
357
|
|
358
|
+
end
|
359
|
+
end
|
356
360
|
end
|
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
|
-
#
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -32,7 +32,9 @@
|
|
32
32
|
|
33
33
|
# Namespace for our encodings.
|
34
34
|
|
35
|
-
module CMess
|
35
|
+
module CMess
|
36
|
+
module GuessEncoding
|
37
|
+
module Encoding
|
36
38
|
|
37
39
|
extend self
|
38
40
|
|
@@ -81,4 +83,6 @@ module CMess::GuessEncoding::Encoding
|
|
81
83
|
base.extend self
|
82
84
|
end
|
83
85
|
|
86
|
+
end
|
87
|
+
end
|
84
88
|
end
|
@@ -3,9 +3,9 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2010 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
|
-
#
|
8
|
+
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
10
|
# Authors: #
|
11
11
|
# Jens Wille <jens.wille@uni-koeln.de> #
|
@@ -32,15 +32,21 @@
|
|
32
32
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
|
+
require 'rubygems'
|
36
|
+
require 'nuggets/array/runiq'
|
37
|
+
require 'nuggets/array/in_order'
|
38
|
+
|
35
39
|
# Outputs given string (or line), being encoded in target encoding, encoded in
|
36
40
|
# various test encodings, thus allowing to identify the (seemingly) correct
|
37
41
|
# encoding by visually comparing the input string with its desired appearance.
|
38
42
|
|
39
|
-
module CMess
|
43
|
+
module CMess
|
44
|
+
module GuessEncoding
|
45
|
+
module Manual
|
40
46
|
|
41
47
|
extend self
|
42
48
|
|
43
|
-
include
|
49
|
+
include Encoding
|
44
50
|
|
45
51
|
# default encodings to try
|
46
52
|
ENCODINGS = [
|
@@ -79,12 +85,18 @@ module CMess::GuessEncoding::Manual
|
|
79
85
|
target = target_encoding
|
80
86
|
|
81
87
|
encodings = (encodings || ENCODINGS) + additional_encodings
|
82
|
-
|
83
|
-
|
84
|
-
|
88
|
+
|
89
|
+
if encodings.include?('__ALL__')
|
90
|
+
encodings.replace(all_encodings.dup)
|
91
|
+
elsif encodings.delete('__COMMON__')
|
92
|
+
encodings.concat(CANDIDATES)
|
93
|
+
end
|
94
|
+
|
95
|
+
# uniq with additional encodings staying at the end
|
96
|
+
encodings.runiq!
|
85
97
|
|
86
98
|
# move target encoding to front
|
87
|
-
encodings
|
99
|
+
encodings.in_order!(target)
|
88
100
|
|
89
101
|
max_length = encodings.map { |encoding| encoding.length }.max
|
90
102
|
|
@@ -105,4 +117,6 @@ module CMess::GuessEncoding::Manual
|
|
105
117
|
}
|
106
118
|
end
|
107
119
|
|
120
|
+
end
|
121
|
+
end
|
108
122
|
end
|
data/lib/cmess/version.rb
CHANGED
@@ -1,51 +1,27 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
#
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
# version.
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
|
21
|
-
# details. #
|
22
|
-
# #
|
23
|
-
# You should have received a copy of the GNU General Public License along #
|
24
|
-
# with cmess. If not, see <http://www.gnu.org/licenses/>. #
|
25
|
-
# #
|
26
|
-
###############################################################################
|
27
|
-
#++
|
28
|
-
|
29
|
-
module CMess::Version
|
30
|
-
|
31
|
-
MAJOR = 0
|
32
|
-
MINOR = 2
|
33
|
-
TINY = 2
|
34
|
-
|
35
|
-
class << self
|
36
|
-
|
37
|
-
# Returns array representation.
|
38
|
-
def to_a
|
39
|
-
[MAJOR, MINOR, TINY]
|
40
|
-
end
|
1
|
+
module CMess
|
2
|
+
|
3
|
+
module Version
|
4
|
+
|
5
|
+
MAJOR = 0
|
6
|
+
MINOR = 2
|
7
|
+
TINY = 3
|
8
|
+
|
9
|
+
class << self
|
10
|
+
|
11
|
+
# Returns array representation.
|
12
|
+
def to_a
|
13
|
+
[MAJOR, MINOR, TINY]
|
14
|
+
end
|
15
|
+
|
16
|
+
# Short-cut for version string.
|
17
|
+
def to_s
|
18
|
+
to_a.join('.')
|
19
|
+
end
|
41
20
|
|
42
|
-
# Short-cut for version string.
|
43
|
-
def to_s
|
44
|
-
to_a.join('.')
|
45
21
|
end
|
46
22
|
|
47
23
|
end
|
48
24
|
|
49
|
-
|
25
|
+
VERSION = Version.to_s
|
50
26
|
|
51
27
|
end
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 2
|
8
|
+
- 3
|
9
|
+
version: 0.2.3
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Jens Wille
|
@@ -9,36 +14,42 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date:
|
17
|
+
date: 2010-04-23 00:00:00 +02:00
|
13
18
|
default_executable:
|
14
19
|
dependencies:
|
15
20
|
- !ruby/object:Gem::Dependency
|
16
21
|
name: ruby-nuggets
|
17
|
-
|
18
|
-
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
24
|
requirements:
|
21
25
|
- - ">="
|
22
26
|
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
- 3
|
30
|
+
- 3
|
23
31
|
version: 0.3.3
|
24
|
-
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
25
34
|
- !ruby/object:Gem::Dependency
|
26
35
|
name: htmlentities
|
27
|
-
|
28
|
-
|
29
|
-
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
38
|
requirements:
|
31
39
|
- - ">="
|
32
40
|
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
33
43
|
version: "0"
|
34
|
-
|
44
|
+
type: :runtime
|
45
|
+
version_requirements: *id002
|
35
46
|
description: "\n Assist with handling messed up encodings (Currently includes the\n following tools: bconv, cinderella, decode_entities, guess_encoding)\n "
|
36
47
|
email: jens.wille@uni-koeln.de
|
37
48
|
executables:
|
38
|
-
-
|
49
|
+
- bconv
|
39
50
|
- decode_entities
|
40
51
|
- guess_encoding
|
41
|
-
-
|
52
|
+
- cinderella
|
42
53
|
extensions: []
|
43
54
|
|
44
55
|
extra_rdoc_files:
|
@@ -46,87 +57,89 @@ extra_rdoc_files:
|
|
46
57
|
- ChangeLog
|
47
58
|
- README
|
48
59
|
files:
|
49
|
-
- lib/cmess
|
50
|
-
- lib/cmess/guess_encoding/encoding.rb
|
60
|
+
- lib/cmess.rb
|
51
61
|
- lib/cmess/guess_encoding/automatic.rb
|
62
|
+
- lib/cmess/guess_encoding/encoding.rb
|
52
63
|
- lib/cmess/guess_encoding/manual.rb
|
53
|
-
- lib/cmess/
|
54
|
-
- lib/cmess/version.rb
|
64
|
+
- lib/cmess/bconv.rb
|
55
65
|
- lib/cmess/cli.rb
|
56
66
|
- lib/cmess/cinderella.rb
|
57
|
-
- lib/cmess/
|
58
|
-
- lib/cmess.rb
|
59
|
-
-
|
67
|
+
- lib/cmess/guess_encoding.rb
|
68
|
+
- lib/cmess/decode_entities.rb
|
69
|
+
- lib/cmess/version.rb
|
70
|
+
- bin/bconv
|
60
71
|
- bin/decode_entities
|
61
72
|
- bin/guess_encoding
|
62
|
-
- bin/
|
63
|
-
- COPYING
|
64
|
-
- Rakefile
|
73
|
+
- bin/cinderella
|
65
74
|
- README
|
66
75
|
- ChangeLog
|
67
|
-
-
|
68
|
-
-
|
69
|
-
- example/cinderella/crop
|
70
|
-
- example/cinderella/crop_repaired
|
71
|
-
- example/cinderella/pot
|
76
|
+
- Rakefile
|
77
|
+
- COPYING
|
72
78
|
- example/guess_encoding/en.utf-8.txt
|
73
|
-
- example/guess_encoding/check_results
|
74
79
|
- example/guess_encoding/de.utf-8.txt
|
75
80
|
- example/guess_encoding/it.utf-8.txt
|
81
|
+
- example/guess_encoding/check_results
|
76
82
|
- example/guess_encoding/fr.utf-8.txt
|
77
|
-
-
|
78
|
-
-
|
79
|
-
-
|
83
|
+
- example/cinderella/empty6-slash_repaired.txt
|
84
|
+
- example/cinderella/empty6-slash.txt
|
85
|
+
- example/cinderella/crop
|
86
|
+
- example/cinderella/pot
|
87
|
+
- example/cinderella/crop_repaired
|
80
88
|
- data/csets/latin1.yaml
|
81
|
-
- data/csets/
|
82
|
-
- data/csets/
|
83
|
-
- data/csets/unicode/
|
84
|
-
- data/csets/unicode/cyrillic-supplement.yaml
|
85
|
-
- data/csets/unicode/latin_extended_additional.yaml
|
86
|
-
- data/csets/unicode/ipa_extensions.yaml
|
87
|
-
- data/csets/unicode/latin-extended-d.yaml
|
89
|
+
- data/csets/iso_8859-15.yaml
|
90
|
+
- data/csets/iso_8859-1.yaml
|
91
|
+
- data/csets/unicode/latin_extended_a.yaml
|
88
92
|
- data/csets/unicode/basic_latin.yaml
|
93
|
+
- data/csets/unicode/ipa_extensions.yaml
|
89
94
|
- data/csets/unicode/latin_extended_b.yaml
|
90
|
-
- data/csets/unicode/
|
95
|
+
- data/csets/unicode/latin-extended-d.yaml
|
96
|
+
- data/csets/unicode/letterlike_symbols.yaml
|
97
|
+
- data/csets/unicode/latin_extended_additional.yaml
|
98
|
+
- data/csets/unicode/greek.yaml
|
91
99
|
- data/csets/unicode/latin-extended-c.yaml
|
92
100
|
- data/csets/unicode/spacing_modifier_letters.yaml
|
101
|
+
- data/csets/unicode/cyrillic-supplement.yaml
|
93
102
|
- data/csets/unicode/cyrillic.yaml
|
94
|
-
- data/csets/unicode/
|
95
|
-
- data/csets/iso_8859-1.yaml
|
103
|
+
- data/csets/unicode/latin_1_supplement.yaml
|
96
104
|
- data/csets/utf-8.yaml
|
105
|
+
- data/csets/utf8.yaml
|
106
|
+
- data/test_chars.yaml
|
107
|
+
- data/chartab.yaml
|
97
108
|
has_rdoc: true
|
98
109
|
homepage: http://prometheus.rubyforge.org/cmess
|
99
110
|
licenses: []
|
100
111
|
|
101
112
|
post_install_message:
|
102
113
|
rdoc_options:
|
103
|
-
- --
|
104
|
-
- cmess Application documentation
|
114
|
+
- --line-numbers
|
105
115
|
- --inline-source
|
106
|
-
- --charset
|
107
|
-
- UTF-8
|
108
116
|
- --main
|
109
117
|
- README
|
118
|
+
- --charset
|
119
|
+
- UTF-8
|
120
|
+
- --title
|
121
|
+
- cmess Application documentation
|
110
122
|
- --all
|
111
|
-
- --line-numbers
|
112
123
|
require_paths:
|
113
124
|
- lib
|
114
125
|
required_ruby_version: !ruby/object:Gem::Requirement
|
115
126
|
requirements:
|
116
127
|
- - ">="
|
117
128
|
- !ruby/object:Gem::Version
|
129
|
+
segments:
|
130
|
+
- 0
|
118
131
|
version: "0"
|
119
|
-
version:
|
120
132
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
121
133
|
requirements:
|
122
134
|
- - ">="
|
123
135
|
- !ruby/object:Gem::Version
|
136
|
+
segments:
|
137
|
+
- 0
|
124
138
|
version: "0"
|
125
|
-
version:
|
126
139
|
requirements: []
|
127
140
|
|
128
141
|
rubyforge_project: prometheus
|
129
|
-
rubygems_version: 1.3.
|
142
|
+
rubygems_version: 1.3.6
|
130
143
|
signing_key:
|
131
144
|
specification_version: 3
|
132
145
|
summary: "Assist with handling messed up encodings (Currently includes the following tools: bconv, cinderella, decode_entities, guess_encoding)"
|