cmess 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ChangeLog +12 -0
- data/README +13 -8
- data/Rakefile +15 -15
- data/bin/bconv +9 -7
- data/bin/cinderella +13 -11
- data/bin/decode_entities +9 -7
- data/bin/guess_encoding +23 -18
- data/lib/cmess.rb +4 -3
- data/lib/cmess/bconv.rb +37 -56
- data/lib/cmess/cinderella.rb +19 -18
- data/lib/cmess/cli.rb +8 -12
- data/lib/cmess/decode_entities.rb +21 -24
- data/lib/cmess/guess_encoding.rb +5 -3
- data/lib/cmess/guess_encoding/automatic.rb +29 -42
- data/lib/cmess/guess_encoding/encoding.rb +5 -5
- data/lib/cmess/guess_encoding/manual.rb +14 -13
- data/lib/cmess/version.rb +2 -2
- metadata +89 -105
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6388ed1476587af57eb196817003d94d58ecb268
|
4
|
+
data.tar.gz: 41551903b954d2de73b62203467d93955ac2516f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4ffc33d0ed9392797a97a5eb535cb0652687e0b8abcd2b6a30de85d41beb63b65ebfa434a625b6aa91bfa379ad5fe8a2f46286aa3450b930633bea9241ba30e7
|
7
|
+
data.tar.gz: e11d90e25d6b5fb5c0f6d81f7d2824e2c7dd662e5feef1f7978cd66588fcd0a5a18cc0d60fbe2d1ee6b3cac254198ff07a64bd985b1d7bb5f3a1f27aa7d7096a
|
data/ChangeLog
CHANGED
@@ -1,5 +1,17 @@
|
|
1
|
+
# markup: rd
|
2
|
+
|
1
3
|
= Revision history for cmess
|
2
4
|
|
5
|
+
== 0.4.0 [2013-08-02]
|
6
|
+
|
7
|
+
* Updated for Ruby 1.9.2+. <b>Ruby 1.8 no longer supported.</b>
|
8
|
+
* guess_encoding: Added <tt>--reverse</tt> option.
|
9
|
+
* Housekeeping.
|
10
|
+
|
11
|
+
== 0.3.1 [2011-08-16]
|
12
|
+
|
13
|
+
* decode_entities: Fixed regression.
|
14
|
+
|
3
15
|
== 0.3.0 [2011-07-25]
|
4
16
|
|
5
17
|
* Extensive refactoring
|
data/README
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to cmess version 0.
|
5
|
+
This documentation refers to cmess version 0.4.0
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
@@ -22,21 +22,24 @@ there are:
|
|
22
22
|
(see CMess::BConv)
|
23
23
|
+decode_entities+:: Decode HTML entities in a string. (see CMess::DecodeEntities)
|
24
24
|
|
25
|
-
|
25
|
+
|
26
|
+
== SUPPORTED PLATFORMS
|
27
|
+
|
28
|
+
Requires Ruby version 1.9.2 or higher; use the latest 0.3.x release on older
|
29
|
+
Ruby versions. CMess has been tested with ruby 2.0.0p247 on x86_64-linux.
|
26
30
|
|
27
31
|
|
28
32
|
== LINKS
|
29
33
|
|
30
34
|
<b></b>
|
31
|
-
Documentation::
|
32
|
-
Source code::
|
33
|
-
|
34
|
-
RubyGem:: http://rubygems.org/gems/cmess
|
35
|
+
Documentation:: http://blackwinter.github.com/cmess
|
36
|
+
Source code:: http://github.com/blackwinter/cmess
|
37
|
+
RubyGem:: http://rubygems.org/gems/cmess
|
35
38
|
|
36
39
|
|
37
40
|
== AUTHORS
|
38
41
|
|
39
|
-
* Jens Wille <mailto:jens.wille@
|
42
|
+
* Jens Wille <mailto:jens.wille@gmail.com>
|
40
43
|
|
41
44
|
|
42
45
|
== CREDITS
|
@@ -48,9 +51,11 @@ RubyGem:: http://rubygems.org/gems/cmess
|
|
48
51
|
|
49
52
|
== LICENSE AND COPYRIGHT
|
50
53
|
|
51
|
-
Copyright (C) 2007-
|
54
|
+
Copyright (C) 2007-2012 University of Cologne,
|
52
55
|
Albertus-Magnus-Platz, 50923 Cologne, Germany
|
53
56
|
|
57
|
+
Copyright (C) 2013 Jens Wille
|
58
|
+
|
54
59
|
cmess is free software: you can redistribute it and/or modify it under the
|
55
60
|
terms of the GNU Affero General Public License as published by the Free
|
56
61
|
Software Foundation, either version 3 of the License, or (at your option)
|
data/Rakefile
CHANGED
@@ -6,21 +6,21 @@ begin
|
|
6
6
|
require 'hen'
|
7
7
|
|
8
8
|
Hen.lay! {{
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
:
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
:
|
21
|
-
:
|
22
|
-
|
23
|
-
:
|
9
|
+
gem: {
|
10
|
+
name: %q{cmess},
|
11
|
+
version: CMess::VERSION,
|
12
|
+
summary: <<-EOT,
|
13
|
+
Assist with handling messed up encodings (Currently includes the
|
14
|
+
following tools: #{Dir['bin/*'].map { |e| File.basename(e) }.sort.join(', ')})
|
15
|
+
EOT
|
16
|
+
author: %q{Jens Wille},
|
17
|
+
email: %q{jens.wille@gmail.com},
|
18
|
+
license: %q{AGPL},
|
19
|
+
homepage: :blackwinter,
|
20
|
+
extra_files: FileList['data/**/*'].to_a,
|
21
|
+
dependencies: [['ruby-nuggets', '>= 0.3.3'], 'htmlentities'],
|
22
|
+
|
23
|
+
required_ruby_version: '>= 1.9.2'
|
24
24
|
}
|
25
25
|
}}
|
26
26
|
rescue LoadError => err
|
data/bin/bconv
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
# bconv -- Convert between bibliographic (and other) encodings #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C) 2008-
|
9
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
11
|
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
|
+
# Copyright (C) 2013 Jens Wille #
|
14
|
+
# #
|
13
15
|
# Authors: #
|
14
|
-
# Jens Wille <jens.wille@
|
16
|
+
# Jens Wille <jens.wille@gmail.com> #
|
15
17
|
# #
|
16
18
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
19
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -33,11 +35,11 @@ require 'cmess/bconv'
|
|
33
35
|
include CMess::CLI
|
34
36
|
|
35
37
|
options = {
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
38
|
+
input: STDIN,
|
39
|
+
output: STDOUT,
|
40
|
+
source_encoding: determine_system_encoding,
|
41
|
+
target_encoding: determine_system_encoding,
|
42
|
+
chartab_file: CMess::BConv::DEFAULT_CHARTAB_FILE
|
41
43
|
}
|
42
44
|
|
43
45
|
parse_options { |opts|
|
data/bin/cinderella
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
# cinderella -- Handle double encoded characters #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C)
|
9
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
11
|
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
|
+
# Copyright (C) 2013 Jens Wille #
|
14
|
+
# #
|
13
15
|
# Authors: #
|
14
|
-
# Jens Wille <jens.wille@
|
16
|
+
# Jens Wille <jens.wille@gmail.com> #
|
15
17
|
# #
|
16
18
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
19
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -35,14 +37,14 @@ include CMess::CLI
|
|
35
37
|
progname = File.basename($0)
|
36
38
|
|
37
39
|
options = {
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
41
|
-
:
|
42
|
-
:
|
43
|
-
:
|
44
|
-
:
|
45
|
-
:
|
40
|
+
input: STDIN,
|
41
|
+
output: STDOUT,
|
42
|
+
pot: nil,
|
43
|
+
crop: nil,
|
44
|
+
source_encoding: nil,
|
45
|
+
target_encoding: determine_system_encoding,
|
46
|
+
csets: [CMess::Cinderella::DEFAULT_CSETS_DIR],
|
47
|
+
repair: false
|
46
48
|
}
|
47
49
|
|
48
50
|
parse_options { |opts|
|
@@ -169,5 +171,5 @@ cli do
|
|
169
171
|
|
170
172
|
trailing_args_as_input(options)
|
171
173
|
|
172
|
-
CMess::Cinderella.pick(options.merge(:
|
174
|
+
CMess::Cinderella.pick(options.merge(chars: YAML.load_file(char_file)))
|
173
175
|
end
|
data/bin/decode_entities
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
# decode_entities -- Decode HTML entities #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C)
|
9
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
11
|
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
|
+
# Copyright (C) 2013 Jens Wille #
|
14
|
+
# #
|
13
15
|
# Authors: #
|
14
|
-
# Jens Wille <jens.wille@
|
16
|
+
# Jens Wille <jens.wille@gmail.com> #
|
15
17
|
# #
|
16
18
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
19
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -33,11 +35,11 @@ require 'cmess/decode_entities'
|
|
33
35
|
include CMess::CLI
|
34
36
|
|
35
37
|
options = {
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
38
|
+
input: STDIN,
|
39
|
+
output: STDOUT,
|
40
|
+
source_encoding: CMess::DecodeEntities::ENCODING,
|
41
|
+
target_encoding: nil,
|
42
|
+
flavour: CMess::DecodeEntities::DEFAULT_FLAVOUR
|
41
43
|
}
|
42
44
|
|
43
45
|
parse_options { |opts|
|
data/bin/guess_encoding
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
# guess_encoding -- Assist with guessing the encoding of some input at hand #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C)
|
9
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
11
|
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
|
+
# Copyright (C) 2013 Jens Wille #
|
14
|
+
# #
|
13
15
|
# Authors: #
|
14
|
-
# Jens Wille <jens.wille@
|
16
|
+
# Jens Wille <jens.wille@gmail.com> #
|
15
17
|
# #
|
16
18
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
19
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -33,16 +35,17 @@ require 'cmess/guess_encoding'
|
|
33
35
|
include CMess::CLI
|
34
36
|
|
35
37
|
options = {
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
41
|
-
:
|
42
|
-
:
|
43
|
-
:
|
44
|
-
:
|
45
|
-
:
|
38
|
+
input: STDIN,
|
39
|
+
line: 1,
|
40
|
+
encodings: nil,
|
41
|
+
additional_encodings: [],
|
42
|
+
target_encoding: determine_system_encoding,
|
43
|
+
manual: false,
|
44
|
+
reverse: false,
|
45
|
+
chunk_size: nil,
|
46
|
+
ignore_bom: false,
|
47
|
+
charcodes: nil,
|
48
|
+
base: 16
|
46
49
|
}
|
47
50
|
|
48
51
|
parse_options { |opts|
|
@@ -79,12 +82,8 @@ parse_options { |opts|
|
|
79
82
|
opts.separator ''
|
80
83
|
|
81
84
|
opts.on('-l', '--line LINE', Integer, "Line number of input file to use for testing [Default: #{options[:line]}]") { |line|
|
85
|
+
abort 'Line number must be greater than 0!' unless line > 0
|
82
86
|
options[:line] = line
|
83
|
-
|
84
|
-
unless options[:line] > 0
|
85
|
-
options[:input].read # prevent 'Broken pipe' error
|
86
|
-
abort 'Line number must be greater then 0!'
|
87
|
-
end
|
88
87
|
}
|
89
88
|
|
90
89
|
opts.separator ''
|
@@ -111,6 +110,12 @@ parse_options { |opts|
|
|
111
110
|
exit
|
112
111
|
}
|
113
112
|
|
113
|
+
opts.separator ''
|
114
|
+
|
115
|
+
opts.on('-R', '--reverse', 'Reverse encoding direction (FROM target TO encodings)') {
|
116
|
+
options[:reverse] = true
|
117
|
+
}
|
118
|
+
|
114
119
|
opts.separator ''
|
115
120
|
opts.separator ' * Charcodes'
|
116
121
|
|
@@ -189,7 +194,7 @@ cli do
|
|
189
194
|
abort "Line not found -- input has only #{$.} line#{'s' if $. != 1}" unless input.is_a?(String)
|
190
195
|
end
|
191
196
|
|
192
|
-
CMess::GuessEncoding.manual(options.merge(:
|
197
|
+
CMess::GuessEncoding.manual(options.merge(input: input))
|
193
198
|
else
|
194
199
|
puts CMess::GuessEncoding.automatic(options[:input], options[:chunk_size], options[:ignore_bom])
|
195
200
|
end
|
data/lib/cmess.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# cmess -- Assist with handling messed up encodings #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
14
16
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -27,7 +29,6 @@
|
|
27
29
|
#++
|
28
30
|
|
29
31
|
require 'cmess/version'
|
30
|
-
require 'iconv'
|
31
32
|
|
32
33
|
# See README for more information.
|
33
34
|
|
data/lib/cmess/bconv.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2008-
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
14
16
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -33,9 +35,9 @@ require 'yaml'
|
|
33
35
|
|
34
36
|
class CMess::BConv
|
35
37
|
|
36
|
-
VERSION = '0.0
|
38
|
+
VERSION = '0.1.0'
|
37
39
|
|
38
|
-
|
40
|
+
ENCODING = 'utf-8'
|
39
41
|
|
40
42
|
DEFAULT_CHARTAB_FILE = File.join(CMess::DATA_DIR, 'chartab.yaml')
|
41
43
|
|
@@ -67,8 +69,6 @@ class CMess::BConv
|
|
67
69
|
|
68
70
|
end
|
69
71
|
|
70
|
-
attr_reader :input, :output, :source_encoding, :target_encoding, :chartab, :encodings
|
71
|
-
|
72
72
|
def initialize(options)
|
73
73
|
@input, @output, _ = CMess.ensure_options!(options,
|
74
74
|
:input, :output, :source_encoding, :target_encoding
|
@@ -79,83 +79,64 @@ class CMess::BConv
|
|
79
79
|
|
80
80
|
@chartab = self.class.load_chartab(options[:chartab] || DEFAULT_CHARTAB_FILE)
|
81
81
|
@encodings = self.class.encodings(@chartab)
|
82
|
-
end
|
83
82
|
|
84
|
-
|
85
|
-
|
83
|
+
[:source_encoding, :target_encoding].each { |key|
|
84
|
+
instance_variable_set("@#{key}", encoding = options[key].upcase)
|
85
|
+
instance_variable_set("@have_#{key}", encodings.include?(encoding))
|
86
|
+
}
|
86
87
|
end
|
87
88
|
|
89
|
+
attr_reader :input, :output, :source_encoding, :target_encoding, :chartab, :encodings
|
90
|
+
|
88
91
|
def convert
|
89
|
-
|
90
|
-
if encoding?(target_encoding)
|
91
|
-
@charmap = chartab.inject({}) { |hash, (code, map)|
|
92
|
-
hash.update(map[source_encoding] => map[target_encoding].pack('U*'))
|
93
|
-
}
|
92
|
+
source, target, out, charmap = source_encoding, target_encoding, output, {}
|
94
93
|
|
95
|
-
|
96
|
-
|
94
|
+
if @have_source_encoding
|
95
|
+
if @have_target_encoding
|
96
|
+
chartab.each { |code, map|
|
97
|
+
charmap[map[source]] = map[target].pack('U*')
|
97
98
|
}
|
98
|
-
else
|
99
|
-
iconv = iconv_to
|
100
99
|
|
101
|
-
|
102
|
-
|
100
|
+
input.each_byte { |char| out.print(map(char, charmap)) }
|
101
|
+
else
|
102
|
+
chartab.each { |code, map|
|
103
|
+
charmap[map[source]] = [code.to_i(16)].pack('U*')
|
103
104
|
}
|
104
105
|
|
106
|
+
source = ENCODING
|
107
|
+
|
105
108
|
input.each_byte { |char|
|
106
|
-
|
109
|
+
out.print(encode(map(char, charmap), source, target))
|
107
110
|
}
|
108
111
|
end
|
109
112
|
else
|
110
|
-
if
|
111
|
-
|
112
|
-
|
113
|
-
charmap = chartab.inject({}) { |hash, (code, map)|
|
114
|
-
hash.update(code.to_i(16) => map[target_encoding].pack('U*'))
|
113
|
+
if @have_target_encoding
|
114
|
+
chartab.each { |code, map|
|
115
|
+
charmap[code.to_i(16)] = map[target].pack('U*')
|
115
116
|
}
|
116
117
|
|
118
|
+
target = ENCODING
|
119
|
+
|
117
120
|
input.each { |line|
|
118
|
-
|
119
|
-
|
121
|
+
encode(line, source, target).unpack('U*').each { |char|
|
122
|
+
out.print(charmap[char])
|
120
123
|
}
|
121
124
|
}
|
122
125
|
else
|
123
|
-
|
124
|
-
|
125
|
-
input.each { |line|
|
126
|
-
output.puts iconv.iconv(line)
|
127
|
-
}
|
126
|
+
input.each { |line| out.print(encode(line, source, target)) }
|
128
127
|
end
|
129
128
|
end
|
130
129
|
end
|
131
130
|
|
132
131
|
private
|
133
132
|
|
134
|
-
def
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
raise ArgumentError, "invalid encoding: source encoding = #{from}, target encoding = #{to}"
|
139
|
-
end
|
140
|
-
|
141
|
-
def iconv.iconv(*args)
|
142
|
-
super
|
143
|
-
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => err
|
144
|
-
warn "ILLEGAL INPUT SEQUENCE: #{err}"; ''
|
145
|
-
end
|
146
|
-
|
147
|
-
iconv
|
148
|
-
end
|
149
|
-
|
150
|
-
def iconv_from(from = source_encoding)
|
151
|
-
iconv_from_to(from, INTERMEDIATE_ENCODING)
|
152
|
-
end
|
153
|
-
|
154
|
-
def iconv_to(to = target_encoding)
|
155
|
-
iconv_from_to(INTERMEDIATE_ENCODING, to)
|
133
|
+
def encode(string, source, target)
|
134
|
+
string.encode(target, source)
|
135
|
+
rescue Encoding::UndefinedConversionError => err
|
136
|
+
warn "ILLEGAL INPUT SEQUENCE: #{err.error_char}"
|
156
137
|
end
|
157
138
|
|
158
|
-
def map(char, charmap
|
139
|
+
def map(char, charmap)
|
159
140
|
unless map = charmap[[char]]
|
160
141
|
unless map = charmap[[char, c = input.getc]]
|
161
142
|
input.ungetc(c) if c
|
data/lib/cmess/cinderella.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
14
16
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -39,34 +41,33 @@ module CMess::Cinderella
|
|
39
41
|
|
40
42
|
extend self
|
41
43
|
|
42
|
-
VERSION = '0.0
|
44
|
+
VERSION = '0.1.0'
|
43
45
|
|
44
46
|
DEFAULT_CSETS_DIR = File.join(CMess::DATA_DIR, 'csets')
|
45
47
|
|
46
48
|
def pick(options)
|
47
|
-
CMess.ensure_options!(options,
|
49
|
+
input, pot, crop, source, target, chars = CMess.ensure_options!(options,
|
48
50
|
:input, :pot, :crop, :source_encoding, :target_encoding, :chars
|
49
51
|
)
|
50
52
|
|
51
53
|
encoded = {}
|
52
|
-
|
53
|
-
|
54
|
-
options[:chars].each { |char|
|
55
|
-
begin
|
56
|
-
encoded[iconv.iconv(char)] = char
|
57
|
-
rescue Iconv::IllegalSequence
|
58
|
-
end
|
59
|
-
}
|
54
|
+
chars.each { |char| encoded[encode(char, source, target)] = char }
|
60
55
|
|
61
56
|
regexp = Regexp.union(*encoded.keys)
|
62
|
-
pot, crop, repair = options.values_at(:pot, :crop, :repair)
|
63
57
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
58
|
+
input.each { |line|
|
59
|
+
out = line =~ regexp ? crop : pot or next
|
60
|
+
|
61
|
+
line.gsub!(regexp) { |m| encoded[m] } if repair
|
62
|
+
out.puts(line)
|
69
63
|
}
|
70
64
|
end
|
71
65
|
|
66
|
+
private
|
67
|
+
|
68
|
+
def encode(string, source, target)
|
69
|
+
string.encode(target, source)
|
70
|
+
rescue Encoding::UndefinedConversionError
|
71
|
+
end
|
72
|
+
|
72
73
|
end
|
data/lib/cmess/cli.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
14
16
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -102,18 +104,12 @@ module CMess::CLI
|
|
102
104
|
end
|
103
105
|
|
104
106
|
def determine_system_encoding
|
105
|
-
ENV.user_encoding ||
|
106
|
-
|
107
|
-
abort <<-EOT
|
107
|
+
ENV.user_encoding || lambda {
|
108
|
+
abort <<-EOT
|
108
109
|
Your system's encoding couldn't be determined automatically -- please specify
|
109
110
|
it explicitly via the ENCODING environment variable or via the '-t' option.
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
def dummy.to_s; 'NOT FOUND' end
|
114
|
-
|
115
|
-
dummy
|
116
|
-
end
|
111
|
+
EOT
|
112
|
+
}.tap { |dummy| def dummy.to_s; 'NOT FOUND'; end }
|
117
113
|
end
|
118
114
|
|
119
115
|
def cli
|
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
14
16
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -33,43 +35,38 @@ module CMess::DecodeEntities
|
|
33
35
|
|
34
36
|
extend self
|
35
37
|
|
36
|
-
VERSION = '0.0
|
38
|
+
VERSION = '0.1.0'
|
37
39
|
|
38
40
|
# HTMLEntities requires UTF-8
|
39
|
-
|
40
|
-
|
41
|
-
ICONV_DUMMY = begin
|
42
|
-
dummy = Object.new
|
43
|
-
|
44
|
-
def dummy.iconv(string)
|
45
|
-
string
|
46
|
-
end
|
47
|
-
|
48
|
-
dummy
|
49
|
-
end
|
41
|
+
ENCODING = 'UTF-8'
|
50
42
|
|
51
43
|
DEFAULT_FLAVOUR = 'xml-safe'
|
52
44
|
|
53
45
|
def decode(options)
|
54
|
-
input, output,
|
46
|
+
input, output, source = CMess.ensure_options!(options,
|
55
47
|
:input, :output, :source_encoding
|
56
48
|
)
|
57
49
|
|
58
|
-
|
59
|
-
|
60
|
-
iconv_in = source_encoding != INTERMEDIATE_ENCODING ?
|
61
|
-
Iconv.new(INTERMEDIATE_ENCODING, source_encoding) : ICONV_DUMMY
|
50
|
+
target, entities, encoding = options[:target_encoding] || source,
|
51
|
+
HTMLEntities.new(options[:flavour] || DEFAULT_FLAVOUR), ENCODING
|
62
52
|
|
63
|
-
|
64
|
-
Iconv.new(target_encoding, INTERMEDIATE_ENCODING) : ICONV_DUMMY
|
65
|
-
|
66
|
-
html_entities = HTMLEntities.new(options[:flavour] || DEFAULT_FLAVOUR)
|
53
|
+
skip_source, skip_target = source == encoding, target == encoding
|
67
54
|
|
68
55
|
input.each { |line|
|
69
|
-
|
56
|
+
line = encode(line, source, encoding) unless skip_source
|
57
|
+
line = entities.decode(line)
|
58
|
+
line = encode(line, encoding, target) unless skip_target
|
59
|
+
|
60
|
+
output.puts(line)
|
70
61
|
}
|
71
62
|
end
|
72
63
|
|
64
|
+
private
|
65
|
+
|
66
|
+
def encode(string, source, target)
|
67
|
+
string.encode(target, source)
|
68
|
+
end
|
69
|
+
|
73
70
|
end
|
74
71
|
|
75
72
|
class HTMLEntities # :nodoc:
|
data/lib/cmess/guess_encoding.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# Contributors: #
|
14
16
|
# John Vorhauer <john@vorhauer.de> (idea and original implementation #
|
@@ -38,7 +40,7 @@ require 'cmess'
|
|
38
40
|
|
39
41
|
module CMess::GuessEncoding
|
40
42
|
|
41
|
-
VERSION = '0.
|
43
|
+
VERSION = '0.2.0'
|
42
44
|
|
43
45
|
autoload :Encoding, 'cmess/guess_encoding/encoding'
|
44
46
|
autoload :Manual, 'cmess/guess_encoding/manual'
|
@@ -5,12 +5,14 @@
|
|
5
5
|
# #
|
6
6
|
# A component of cmess, the encoding tool-box. #
|
7
7
|
# #
|
8
|
-
# Copyright (C)
|
8
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
9
9
|
# Albertus-Magnus-Platz, #
|
10
10
|
# 50923 Cologne, Germany #
|
11
11
|
# #
|
12
|
+
# Copyright (C) 2013 Jens Wille #
|
13
|
+
# #
|
12
14
|
# Authors: #
|
13
|
-
# Jens Wille <jens.wille@
|
15
|
+
# Jens Wille <jens.wille@gmail.com> #
|
14
16
|
# #
|
15
17
|
# Contributors: #
|
16
18
|
# John Vorhauer <john@vorhauer.de> (idea and original implementation #
|
@@ -32,8 +34,6 @@
|
|
32
34
|
###############################################################################
|
33
35
|
#++
|
34
36
|
|
35
|
-
$KCODE = 'u' if RUBY_VERSION < '1.9'
|
36
|
-
|
37
37
|
require 'cmess/guess_encoding'
|
38
38
|
|
39
39
|
require 'yaml'
|
@@ -57,9 +57,6 @@ class CMess::GuessEncoding::Automatic
|
|
57
57
|
|
58
58
|
include CMess::GuessEncoding::Encoding
|
59
59
|
|
60
|
-
# Creates a converter for desired encoding (from UTF-8).
|
61
|
-
ICONV_FOR = Hash.new { |h, k| h[k] = Iconv.new(k, UTF_8) }
|
62
|
-
|
63
60
|
# Single-byte encodings to test statistically by TEST_CHARS.
|
64
61
|
TEST_ENCODINGS = [
|
65
62
|
MACINTOSH,
|
@@ -87,22 +84,13 @@ class CMess::GuessEncoding::Automatic
|
|
87
84
|
CHARS_TO_TEST = (
|
88
85
|
'€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂ' <<
|
89
86
|
'ÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
|
90
|
-
).
|
87
|
+
).chars.to_a
|
91
88
|
|
92
89
|
# Map TEST_ENCODINGS to respectively encoded CHARS_TO_TEST.
|
93
|
-
TEST_CHARS = Hash.new { |
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
begin
|
98
|
-
byte = *ICONV_FOR[encoding].iconv(char).unpack('C')
|
99
|
-
rescue Iconv::IllegalSequence
|
100
|
-
end
|
101
|
-
}.compact
|
102
|
-
|
103
|
-
TEST_ENCODINGS << encoding unless TEST_ENCODINGS.include?(encoding)
|
104
|
-
|
105
|
-
hash[encoding] = encchars
|
90
|
+
TEST_CHARS = Hash.new { |h, k|
|
91
|
+
e, f = self[k], UTF_8
|
92
|
+
TEST_ENCODINGS << e unless TEST_ENCODINGS.include?(e)
|
93
|
+
h[e] = CHARS_TO_TEST.flat_map { |c| c.encode(e, f).unpack('C') }
|
106
94
|
}.update(YAML.load_file(File.join(CMess::DATA_DIR, 'test_chars.yaml')))
|
107
95
|
|
108
96
|
# Relative count of TEST_CHARS must exceed this threshold to yield
|
@@ -134,10 +122,10 @@ class CMess::GuessEncoding::Automatic
|
|
134
122
|
|
135
123
|
def encoding(*encodings, &block)
|
136
124
|
encodings.flatten.each { |encoding|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
125
|
+
unless @supported_encodings.include?(encoding)
|
126
|
+
@supported_encodings << encoding
|
127
|
+
@encoding_guessers << block
|
128
|
+
end
|
141
129
|
}
|
142
130
|
end
|
143
131
|
|
@@ -146,10 +134,10 @@ class CMess::GuessEncoding::Automatic
|
|
146
134
|
end
|
147
135
|
|
148
136
|
def bom_encoding(encoding, &block)
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
137
|
+
unless @supported_boms.include?(encoding)
|
138
|
+
@supported_boms << encoding
|
139
|
+
@bom_guessers << lambda { |*| encoding if instance_eval(&block) }
|
140
|
+
end
|
153
141
|
end
|
154
142
|
|
155
143
|
def supported_bom?(encoding)
|
@@ -158,30 +146,30 @@ class CMess::GuessEncoding::Automatic
|
|
158
146
|
|
159
147
|
end
|
160
148
|
|
161
|
-
attr_reader :input, :chunk_size, :byte_count, :byte_total, :first_byte
|
162
|
-
|
163
149
|
def initialize(input, chunk_size = nil)
|
164
150
|
@input = case input
|
165
151
|
when IO then input
|
166
152
|
when String then StringIO.new(input)
|
167
|
-
else
|
168
|
-
|
153
|
+
else raise ArgumentError,
|
154
|
+
"don't know how to handle input of type #{input.class}"
|
169
155
|
end
|
170
156
|
|
171
157
|
@chunk_size = chunk_size
|
172
158
|
end
|
173
159
|
|
160
|
+
attr_reader :input, :chunk_size, :byte_count, :byte_total, :first_byte
|
161
|
+
|
174
162
|
def guess(ignore_bom = false)
|
175
163
|
return bom if bom && !ignore_bom
|
176
164
|
|
177
165
|
while read
|
178
166
|
encoding_guessers.each { |block|
|
179
|
-
encoding = instance_eval(&block)
|
180
|
-
|
167
|
+
if encoding = instance_eval(&block) and supported_encoding?(encoding)
|
168
|
+
return encoding
|
169
|
+
end
|
181
170
|
}
|
182
171
|
end
|
183
172
|
|
184
|
-
# nothing suitable found :-(
|
185
173
|
UNKNOWN
|
186
174
|
end
|
187
175
|
|
@@ -206,14 +194,13 @@ class CMess::GuessEncoding::Automatic
|
|
206
194
|
end
|
207
195
|
|
208
196
|
bom_guessers.each { |block|
|
209
|
-
encoding = instance_eval(&block)
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
197
|
+
if encoding = instance_eval(&block) and supported_encoding?(encoding)
|
198
|
+
return encoding
|
199
|
+
else
|
200
|
+
input.rewind
|
201
|
+
end
|
214
202
|
}
|
215
203
|
|
216
|
-
# nothing suitable found :-(
|
217
204
|
nil
|
218
205
|
end
|
219
206
|
|
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# Contributors: #
|
14
16
|
# John Vorhauer <john@vorhauer.de> (idea and original implementation #
|
@@ -50,9 +52,7 @@ module CMess::GuessEncoding::Encoding
|
|
50
52
|
private
|
51
53
|
|
52
54
|
def get_all_encodings
|
53
|
-
|
54
|
-
get_or_set_encoding_const(encoding.sub(%r{/*\z}, ''))
|
55
|
-
}
|
55
|
+
Encoding.name_list.map { |encoding| get_or_set_encoding_const(encoding) }
|
56
56
|
end
|
57
57
|
|
58
58
|
def const_name_for(encoding)
|
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# Contributors: #
|
14
16
|
# John Vorhauer <john@vorhauer.de> (idea and original implementation #
|
@@ -57,13 +59,13 @@ module CMess::GuessEncoding::Manual
|
|
57
59
|
CP1252,
|
58
60
|
CP850,
|
59
61
|
CP852,
|
60
|
-
CP856,
|
61
62
|
UTF_8
|
62
63
|
]
|
63
64
|
|
64
65
|
# Likely candidates to suggest to the user
|
65
66
|
CANDIDATES = [
|
66
67
|
ANSI_X34,
|
68
|
+
CP856,
|
67
69
|
EBCDIC_AT_DE,
|
68
70
|
EBCDIC_US,
|
69
71
|
EUC_JP,
|
@@ -95,19 +97,18 @@ module CMess::GuessEncoding::Manual
|
|
95
97
|
# move target encoding to front
|
96
98
|
encodings.in_order!(target)
|
97
99
|
|
98
|
-
max_length = encodings.max(:length)
|
100
|
+
max_length, reverse = encodings.max(:length), options[:reverse]
|
99
101
|
|
100
102
|
encodings.each { |encoding|
|
103
|
+
args = [target, encoding]
|
104
|
+
args.reverse! if reverse
|
105
|
+
|
101
106
|
converted = begin
|
102
|
-
|
103
|
-
rescue
|
104
|
-
"ILLEGAL INPUT SEQUENCE: #{err}"
|
105
|
-
rescue
|
106
|
-
|
107
|
-
raise ArgumentError, "invalid encoding: #{encoding}"
|
108
|
-
else
|
109
|
-
'INVALID ENCODING!'
|
110
|
-
end
|
107
|
+
input.encode(*args)
|
108
|
+
rescue Encoding::UndefinedConversionError => err
|
109
|
+
"ILLEGAL INPUT SEQUENCE: #{err.error_char}"
|
110
|
+
rescue Encoding::ConverterNotFoundError => err
|
111
|
+
err.to_s
|
111
112
|
end
|
112
113
|
|
113
114
|
puts "%-#{max_length}s : %s" % [encoding, converted]
|
data/lib/cmess/version.rb
CHANGED
metadata
CHANGED
@@ -1,153 +1,137 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 3
|
9
|
-
- 1
|
10
|
-
version: 0.3.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- Jens Wille
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2013-08-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
21
14
|
name: ruby-nuggets
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
- - ">="
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
hash: 21
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
- 3
|
32
|
-
- 3
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
33
19
|
version: 0.3.3
|
34
20
|
type: :runtime
|
35
|
-
version_requirements: *id001
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: htmlentities
|
38
21
|
prerelease: false
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.3.3
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: htmlentities
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
48
34
|
type: :runtime
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: |
|
42
|
+
Assist with handling messed up encodings (Currently includes the
|
43
|
+
following tools: bconv, cinderella, decode_entities, guess_encoding)
|
44
|
+
email: jens.wille@gmail.com
|
45
|
+
executables:
|
53
46
|
- bconv
|
47
|
+
- cinderella
|
54
48
|
- decode_entities
|
55
49
|
- guess_encoding
|
56
|
-
- cinderella
|
57
50
|
extensions: []
|
58
|
-
|
59
|
-
extra_rdoc_files:
|
51
|
+
extra_rdoc_files:
|
60
52
|
- README
|
61
53
|
- COPYING
|
62
54
|
- ChangeLog
|
63
|
-
files:
|
55
|
+
files:
|
64
56
|
- lib/cmess.rb
|
65
|
-
- lib/cmess/guess_encoding/automatic.rb
|
66
|
-
- lib/cmess/guess_encoding/encoding.rb
|
67
|
-
- lib/cmess/guess_encoding/manual.rb
|
68
57
|
- lib/cmess/bconv.rb
|
69
|
-
- lib/cmess/cli.rb
|
70
58
|
- lib/cmess/cinderella.rb
|
71
|
-
- lib/cmess/
|
59
|
+
- lib/cmess/cli.rb
|
72
60
|
- lib/cmess/decode_entities.rb
|
61
|
+
- lib/cmess/guess_encoding.rb
|
62
|
+
- lib/cmess/guess_encoding/automatic.rb
|
63
|
+
- lib/cmess/guess_encoding/encoding.rb
|
64
|
+
- lib/cmess/guess_encoding/manual.rb
|
73
65
|
- lib/cmess/version.rb
|
74
66
|
- bin/bconv
|
67
|
+
- bin/cinderella
|
75
68
|
- bin/decode_entities
|
76
69
|
- bin/guess_encoding
|
77
|
-
-
|
78
|
-
- data/csets/latin1.yaml
|
79
|
-
- data/csets/iso_8859-15.yaml
|
70
|
+
- data/chartab.yaml
|
80
71
|
- data/csets/iso_8859-1.yaml
|
81
|
-
- data/csets/
|
72
|
+
- data/csets/iso_8859-15.yaml
|
73
|
+
- data/csets/latin1.yaml
|
82
74
|
- data/csets/unicode/basic_latin.yaml
|
75
|
+
- data/csets/unicode/cyrillic-supplement.yaml
|
76
|
+
- data/csets/unicode/cyrillic.yaml
|
77
|
+
- data/csets/unicode/greek.yaml
|
83
78
|
- data/csets/unicode/ipa_extensions.yaml
|
84
|
-
- data/csets/unicode/
|
79
|
+
- data/csets/unicode/latin-extended-c.yaml
|
85
80
|
- data/csets/unicode/latin-extended-d.yaml
|
86
|
-
- data/csets/unicode/
|
81
|
+
- data/csets/unicode/latin_1_supplement.yaml
|
82
|
+
- data/csets/unicode/latin_extended_a.yaml
|
87
83
|
- data/csets/unicode/latin_extended_additional.yaml
|
88
|
-
- data/csets/unicode/
|
89
|
-
- data/csets/unicode/
|
84
|
+
- data/csets/unicode/latin_extended_b.yaml
|
85
|
+
- data/csets/unicode/letterlike_symbols.yaml
|
90
86
|
- data/csets/unicode/spacing_modifier_letters.yaml
|
91
|
-
- data/csets/unicode/cyrillic-supplement.yaml
|
92
|
-
- data/csets/unicode/cyrillic.yaml
|
93
|
-
- data/csets/unicode/latin_1_supplement.yaml
|
94
87
|
- data/csets/utf-8.yaml
|
95
88
|
- data/csets/utf8.yaml
|
96
89
|
- data/test_chars.yaml
|
97
|
-
-
|
98
|
-
- README
|
90
|
+
- COPYING
|
99
91
|
- ChangeLog
|
92
|
+
- README
|
100
93
|
- Rakefile
|
101
|
-
- COPYING
|
102
|
-
- example/guess_encoding/en.utf-8.txt
|
103
|
-
- example/guess_encoding/de.utf-8.txt
|
104
|
-
- example/guess_encoding/it.utf-8.txt
|
105
|
-
- example/guess_encoding/check_results
|
106
|
-
- example/guess_encoding/fr.utf-8.txt
|
107
|
-
- example/cinderella/empty6-slash_repaired.txt
|
108
|
-
- example/cinderella/empty6-slash.txt
|
109
94
|
- example/cinderella/crop
|
110
|
-
- example/cinderella/pot
|
111
95
|
- example/cinderella/crop_repaired
|
112
|
-
|
113
|
-
|
114
|
-
|
96
|
+
- example/cinderella/empty6-slash.txt
|
97
|
+
- example/cinderella/empty6-slash_repaired.txt
|
98
|
+
- example/cinderella/pot
|
99
|
+
- example/guess_encoding/check_results
|
100
|
+
- example/guess_encoding/de.utf-8.txt
|
101
|
+
- example/guess_encoding/en.utf-8.txt
|
102
|
+
- example/guess_encoding/fr.utf-8.txt
|
103
|
+
- example/guess_encoding/it.utf-8.txt
|
104
|
+
homepage: http://github.com/blackwinter/cmess
|
105
|
+
licenses:
|
106
|
+
- AGPL
|
107
|
+
metadata: {}
|
115
108
|
post_install_message:
|
116
|
-
rdoc_options:
|
117
|
-
- --main
|
118
|
-
- README
|
109
|
+
rdoc_options:
|
119
110
|
- --charset
|
120
111
|
- UTF-8
|
112
|
+
- --line-numbers
|
121
113
|
- --all
|
122
114
|
- --title
|
123
|
-
- cmess Application documentation (v0.
|
124
|
-
- --
|
125
|
-
|
115
|
+
- cmess Application documentation (v0.4.0)
|
116
|
+
- --main
|
117
|
+
- README
|
118
|
+
require_paths:
|
126
119
|
- lib
|
127
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
none: false
|
138
|
-
requirements:
|
139
|
-
- - ">="
|
140
|
-
- !ruby/object:Gem::Version
|
141
|
-
hash: 3
|
142
|
-
segments:
|
143
|
-
- 0
|
144
|
-
version: "0"
|
120
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 1.9.2
|
125
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - '>='
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
145
130
|
requirements: []
|
146
|
-
|
147
|
-
|
148
|
-
rubygems_version: 1.8.8
|
131
|
+
rubyforge_project:
|
132
|
+
rubygems_version: 2.0.6
|
149
133
|
signing_key:
|
150
|
-
specification_version:
|
151
|
-
summary:
|
134
|
+
specification_version: 4
|
135
|
+
summary: 'Assist with handling messed up encodings (Currently includes the following
|
136
|
+
tools: bconv, cinderella, decode_entities, guess_encoding)'
|
152
137
|
test_files: []
|
153
|
-
|