cmess 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ChangeLog +12 -0
- data/README +13 -8
- data/Rakefile +15 -15
- data/bin/bconv +9 -7
- data/bin/cinderella +13 -11
- data/bin/decode_entities +9 -7
- data/bin/guess_encoding +23 -18
- data/lib/cmess.rb +4 -3
- data/lib/cmess/bconv.rb +37 -56
- data/lib/cmess/cinderella.rb +19 -18
- data/lib/cmess/cli.rb +8 -12
- data/lib/cmess/decode_entities.rb +21 -24
- data/lib/cmess/guess_encoding.rb +5 -3
- data/lib/cmess/guess_encoding/automatic.rb +29 -42
- data/lib/cmess/guess_encoding/encoding.rb +5 -5
- data/lib/cmess/guess_encoding/manual.rb +14 -13
- data/lib/cmess/version.rb +2 -2
- metadata +89 -105
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6388ed1476587af57eb196817003d94d58ecb268
|
4
|
+
data.tar.gz: 41551903b954d2de73b62203467d93955ac2516f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4ffc33d0ed9392797a97a5eb535cb0652687e0b8abcd2b6a30de85d41beb63b65ebfa434a625b6aa91bfa379ad5fe8a2f46286aa3450b930633bea9241ba30e7
|
7
|
+
data.tar.gz: e11d90e25d6b5fb5c0f6d81f7d2824e2c7dd662e5feef1f7978cd66588fcd0a5a18cc0d60fbe2d1ee6b3cac254198ff07a64bd985b1d7bb5f3a1f27aa7d7096a
|
data/ChangeLog
CHANGED
@@ -1,5 +1,17 @@
|
|
1
|
+
# markup: rd
|
2
|
+
|
1
3
|
= Revision history for cmess
|
2
4
|
|
5
|
+
== 0.4.0 [2013-08-02]
|
6
|
+
|
7
|
+
* Updated for Ruby 1.9.2+. <b>Ruby 1.8 no longer supported.</b>
|
8
|
+
* guess_encoding: Added <tt>--reverse</tt> option.
|
9
|
+
* Housekeeping.
|
10
|
+
|
11
|
+
== 0.3.1 [2011-08-16]
|
12
|
+
|
13
|
+
* decode_entities: Fixed regression.
|
14
|
+
|
3
15
|
== 0.3.0 [2011-07-25]
|
4
16
|
|
5
17
|
* Extensive refactoring
|
data/README
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to cmess version 0.
|
5
|
+
This documentation refers to cmess version 0.4.0
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
@@ -22,21 +22,24 @@ there are:
|
|
22
22
|
(see CMess::BConv)
|
23
23
|
+decode_entities+:: Decode HTML entities in a string. (see CMess::DecodeEntities)
|
24
24
|
|
25
|
-
|
25
|
+
|
26
|
+
== SUPPORTED PLATFORMS
|
27
|
+
|
28
|
+
Requires Ruby version 1.9.2 or higher; use the latest 0.3.x release on older
|
29
|
+
Ruby versions. CMess has been tested with ruby 2.0.0p247 on x86_64-linux.
|
26
30
|
|
27
31
|
|
28
32
|
== LINKS
|
29
33
|
|
30
34
|
<b></b>
|
31
|
-
Documentation::
|
32
|
-
Source code::
|
33
|
-
|
34
|
-
RubyGem:: http://rubygems.org/gems/cmess
|
35
|
+
Documentation:: http://blackwinter.github.com/cmess
|
36
|
+
Source code:: http://github.com/blackwinter/cmess
|
37
|
+
RubyGem:: http://rubygems.org/gems/cmess
|
35
38
|
|
36
39
|
|
37
40
|
== AUTHORS
|
38
41
|
|
39
|
-
* Jens Wille <mailto:jens.wille@
|
42
|
+
* Jens Wille <mailto:jens.wille@gmail.com>
|
40
43
|
|
41
44
|
|
42
45
|
== CREDITS
|
@@ -48,9 +51,11 @@ RubyGem:: http://rubygems.org/gems/cmess
|
|
48
51
|
|
49
52
|
== LICENSE AND COPYRIGHT
|
50
53
|
|
51
|
-
Copyright (C) 2007-
|
54
|
+
Copyright (C) 2007-2012 University of Cologne,
|
52
55
|
Albertus-Magnus-Platz, 50923 Cologne, Germany
|
53
56
|
|
57
|
+
Copyright (C) 2013 Jens Wille
|
58
|
+
|
54
59
|
cmess is free software: you can redistribute it and/or modify it under the
|
55
60
|
terms of the GNU Affero General Public License as published by the Free
|
56
61
|
Software Foundation, either version 3 of the License, or (at your option)
|
data/Rakefile
CHANGED
@@ -6,21 +6,21 @@ begin
|
|
6
6
|
require 'hen'
|
7
7
|
|
8
8
|
Hen.lay! {{
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
:
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
:
|
21
|
-
:
|
22
|
-
|
23
|
-
:
|
9
|
+
gem: {
|
10
|
+
name: %q{cmess},
|
11
|
+
version: CMess::VERSION,
|
12
|
+
summary: <<-EOT,
|
13
|
+
Assist with handling messed up encodings (Currently includes the
|
14
|
+
following tools: #{Dir['bin/*'].map { |e| File.basename(e) }.sort.join(', ')})
|
15
|
+
EOT
|
16
|
+
author: %q{Jens Wille},
|
17
|
+
email: %q{jens.wille@gmail.com},
|
18
|
+
license: %q{AGPL},
|
19
|
+
homepage: :blackwinter,
|
20
|
+
extra_files: FileList['data/**/*'].to_a,
|
21
|
+
dependencies: [['ruby-nuggets', '>= 0.3.3'], 'htmlentities'],
|
22
|
+
|
23
|
+
required_ruby_version: '>= 1.9.2'
|
24
24
|
}
|
25
25
|
}}
|
26
26
|
rescue LoadError => err
|
data/bin/bconv
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
# bconv -- Convert between bibliographic (and other) encodings #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C) 2008-
|
9
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
11
|
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
|
+
# Copyright (C) 2013 Jens Wille #
|
14
|
+
# #
|
13
15
|
# Authors: #
|
14
|
-
# Jens Wille <jens.wille@
|
16
|
+
# Jens Wille <jens.wille@gmail.com> #
|
15
17
|
# #
|
16
18
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
19
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -33,11 +35,11 @@ require 'cmess/bconv'
|
|
33
35
|
include CMess::CLI
|
34
36
|
|
35
37
|
options = {
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
38
|
+
input: STDIN,
|
39
|
+
output: STDOUT,
|
40
|
+
source_encoding: determine_system_encoding,
|
41
|
+
target_encoding: determine_system_encoding,
|
42
|
+
chartab_file: CMess::BConv::DEFAULT_CHARTAB_FILE
|
41
43
|
}
|
42
44
|
|
43
45
|
parse_options { |opts|
|
data/bin/cinderella
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
# cinderella -- Handle double encoded characters #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C)
|
9
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
11
|
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
|
+
# Copyright (C) 2013 Jens Wille #
|
14
|
+
# #
|
13
15
|
# Authors: #
|
14
|
-
# Jens Wille <jens.wille@
|
16
|
+
# Jens Wille <jens.wille@gmail.com> #
|
15
17
|
# #
|
16
18
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
19
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -35,14 +37,14 @@ include CMess::CLI
|
|
35
37
|
progname = File.basename($0)
|
36
38
|
|
37
39
|
options = {
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
41
|
-
:
|
42
|
-
:
|
43
|
-
:
|
44
|
-
:
|
45
|
-
:
|
40
|
+
input: STDIN,
|
41
|
+
output: STDOUT,
|
42
|
+
pot: nil,
|
43
|
+
crop: nil,
|
44
|
+
source_encoding: nil,
|
45
|
+
target_encoding: determine_system_encoding,
|
46
|
+
csets: [CMess::Cinderella::DEFAULT_CSETS_DIR],
|
47
|
+
repair: false
|
46
48
|
}
|
47
49
|
|
48
50
|
parse_options { |opts|
|
@@ -169,5 +171,5 @@ cli do
|
|
169
171
|
|
170
172
|
trailing_args_as_input(options)
|
171
173
|
|
172
|
-
CMess::Cinderella.pick(options.merge(:
|
174
|
+
CMess::Cinderella.pick(options.merge(chars: YAML.load_file(char_file)))
|
173
175
|
end
|
data/bin/decode_entities
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
# decode_entities -- Decode HTML entities #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C)
|
9
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
11
|
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
|
+
# Copyright (C) 2013 Jens Wille #
|
14
|
+
# #
|
13
15
|
# Authors: #
|
14
|
-
# Jens Wille <jens.wille@
|
16
|
+
# Jens Wille <jens.wille@gmail.com> #
|
15
17
|
# #
|
16
18
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
19
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -33,11 +35,11 @@ require 'cmess/decode_entities'
|
|
33
35
|
include CMess::CLI
|
34
36
|
|
35
37
|
options = {
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
38
|
+
input: STDIN,
|
39
|
+
output: STDOUT,
|
40
|
+
source_encoding: CMess::DecodeEntities::ENCODING,
|
41
|
+
target_encoding: nil,
|
42
|
+
flavour: CMess::DecodeEntities::DEFAULT_FLAVOUR
|
41
43
|
}
|
42
44
|
|
43
45
|
parse_options { |opts|
|
data/bin/guess_encoding
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
# guess_encoding -- Assist with guessing the encoding of some input at hand #
|
7
7
|
# [A component of cmess, the encoding tool-box] #
|
8
8
|
# #
|
9
|
-
# Copyright (C)
|
9
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
10
10
|
# Albertus-Magnus-Platz, #
|
11
11
|
# 50923 Cologne, Germany #
|
12
12
|
# #
|
13
|
+
# Copyright (C) 2013 Jens Wille #
|
14
|
+
# #
|
13
15
|
# Authors: #
|
14
|
-
# Jens Wille <jens.wille@
|
16
|
+
# Jens Wille <jens.wille@gmail.com> #
|
15
17
|
# #
|
16
18
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
17
19
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -33,16 +35,17 @@ require 'cmess/guess_encoding'
|
|
33
35
|
include CMess::CLI
|
34
36
|
|
35
37
|
options = {
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
41
|
-
:
|
42
|
-
:
|
43
|
-
:
|
44
|
-
:
|
45
|
-
:
|
38
|
+
input: STDIN,
|
39
|
+
line: 1,
|
40
|
+
encodings: nil,
|
41
|
+
additional_encodings: [],
|
42
|
+
target_encoding: determine_system_encoding,
|
43
|
+
manual: false,
|
44
|
+
reverse: false,
|
45
|
+
chunk_size: nil,
|
46
|
+
ignore_bom: false,
|
47
|
+
charcodes: nil,
|
48
|
+
base: 16
|
46
49
|
}
|
47
50
|
|
48
51
|
parse_options { |opts|
|
@@ -79,12 +82,8 @@ parse_options { |opts|
|
|
79
82
|
opts.separator ''
|
80
83
|
|
81
84
|
opts.on('-l', '--line LINE', Integer, "Line number of input file to use for testing [Default: #{options[:line]}]") { |line|
|
85
|
+
abort 'Line number must be greater than 0!' unless line > 0
|
82
86
|
options[:line] = line
|
83
|
-
|
84
|
-
unless options[:line] > 0
|
85
|
-
options[:input].read # prevent 'Broken pipe' error
|
86
|
-
abort 'Line number must be greater then 0!'
|
87
|
-
end
|
88
87
|
}
|
89
88
|
|
90
89
|
opts.separator ''
|
@@ -111,6 +110,12 @@ parse_options { |opts|
|
|
111
110
|
exit
|
112
111
|
}
|
113
112
|
|
113
|
+
opts.separator ''
|
114
|
+
|
115
|
+
opts.on('-R', '--reverse', 'Reverse encoding direction (FROM target TO encodings)') {
|
116
|
+
options[:reverse] = true
|
117
|
+
}
|
118
|
+
|
114
119
|
opts.separator ''
|
115
120
|
opts.separator ' * Charcodes'
|
116
121
|
|
@@ -189,7 +194,7 @@ cli do
|
|
189
194
|
abort "Line not found -- input has only #{$.} line#{'s' if $. != 1}" unless input.is_a?(String)
|
190
195
|
end
|
191
196
|
|
192
|
-
CMess::GuessEncoding.manual(options.merge(:
|
197
|
+
CMess::GuessEncoding.manual(options.merge(input: input))
|
193
198
|
else
|
194
199
|
puts CMess::GuessEncoding.automatic(options[:input], options[:chunk_size], options[:ignore_bom])
|
195
200
|
end
|
data/lib/cmess.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# cmess -- Assist with handling messed up encodings #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
14
16
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -27,7 +29,6 @@
|
|
27
29
|
#++
|
28
30
|
|
29
31
|
require 'cmess/version'
|
30
|
-
require 'iconv'
|
31
32
|
|
32
33
|
# See README for more information.
|
33
34
|
|
data/lib/cmess/bconv.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2008-
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
14
16
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -33,9 +35,9 @@ require 'yaml'
|
|
33
35
|
|
34
36
|
class CMess::BConv
|
35
37
|
|
36
|
-
VERSION = '0.0
|
38
|
+
VERSION = '0.1.0'
|
37
39
|
|
38
|
-
|
40
|
+
ENCODING = 'utf-8'
|
39
41
|
|
40
42
|
DEFAULT_CHARTAB_FILE = File.join(CMess::DATA_DIR, 'chartab.yaml')
|
41
43
|
|
@@ -67,8 +69,6 @@ class CMess::BConv
|
|
67
69
|
|
68
70
|
end
|
69
71
|
|
70
|
-
attr_reader :input, :output, :source_encoding, :target_encoding, :chartab, :encodings
|
71
|
-
|
72
72
|
def initialize(options)
|
73
73
|
@input, @output, _ = CMess.ensure_options!(options,
|
74
74
|
:input, :output, :source_encoding, :target_encoding
|
@@ -79,83 +79,64 @@ class CMess::BConv
|
|
79
79
|
|
80
80
|
@chartab = self.class.load_chartab(options[:chartab] || DEFAULT_CHARTAB_FILE)
|
81
81
|
@encodings = self.class.encodings(@chartab)
|
82
|
-
end
|
83
82
|
|
84
|
-
|
85
|
-
|
83
|
+
[:source_encoding, :target_encoding].each { |key|
|
84
|
+
instance_variable_set("@#{key}", encoding = options[key].upcase)
|
85
|
+
instance_variable_set("@have_#{key}", encodings.include?(encoding))
|
86
|
+
}
|
86
87
|
end
|
87
88
|
|
89
|
+
attr_reader :input, :output, :source_encoding, :target_encoding, :chartab, :encodings
|
90
|
+
|
88
91
|
def convert
|
89
|
-
|
90
|
-
if encoding?(target_encoding)
|
91
|
-
@charmap = chartab.inject({}) { |hash, (code, map)|
|
92
|
-
hash.update(map[source_encoding] => map[target_encoding].pack('U*'))
|
93
|
-
}
|
92
|
+
source, target, out, charmap = source_encoding, target_encoding, output, {}
|
94
93
|
|
95
|
-
|
96
|
-
|
94
|
+
if @have_source_encoding
|
95
|
+
if @have_target_encoding
|
96
|
+
chartab.each { |code, map|
|
97
|
+
charmap[map[source]] = map[target].pack('U*')
|
97
98
|
}
|
98
|
-
else
|
99
|
-
iconv = iconv_to
|
100
99
|
|
101
|
-
|
102
|
-
|
100
|
+
input.each_byte { |char| out.print(map(char, charmap)) }
|
101
|
+
else
|
102
|
+
chartab.each { |code, map|
|
103
|
+
charmap[map[source]] = [code.to_i(16)].pack('U*')
|
103
104
|
}
|
104
105
|
|
106
|
+
source = ENCODING
|
107
|
+
|
105
108
|
input.each_byte { |char|
|
106
|
-
|
109
|
+
out.print(encode(map(char, charmap), source, target))
|
107
110
|
}
|
108
111
|
end
|
109
112
|
else
|
110
|
-
if
|
111
|
-
|
112
|
-
|
113
|
-
charmap = chartab.inject({}) { |hash, (code, map)|
|
114
|
-
hash.update(code.to_i(16) => map[target_encoding].pack('U*'))
|
113
|
+
if @have_target_encoding
|
114
|
+
chartab.each { |code, map|
|
115
|
+
charmap[code.to_i(16)] = map[target].pack('U*')
|
115
116
|
}
|
116
117
|
|
118
|
+
target = ENCODING
|
119
|
+
|
117
120
|
input.each { |line|
|
118
|
-
|
119
|
-
|
121
|
+
encode(line, source, target).unpack('U*').each { |char|
|
122
|
+
out.print(charmap[char])
|
120
123
|
}
|
121
124
|
}
|
122
125
|
else
|
123
|
-
|
124
|
-
|
125
|
-
input.each { |line|
|
126
|
-
output.puts iconv.iconv(line)
|
127
|
-
}
|
126
|
+
input.each { |line| out.print(encode(line, source, target)) }
|
128
127
|
end
|
129
128
|
end
|
130
129
|
end
|
131
130
|
|
132
131
|
private
|
133
132
|
|
134
|
-
def
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
raise ArgumentError, "invalid encoding: source encoding = #{from}, target encoding = #{to}"
|
139
|
-
end
|
140
|
-
|
141
|
-
def iconv.iconv(*args)
|
142
|
-
super
|
143
|
-
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => err
|
144
|
-
warn "ILLEGAL INPUT SEQUENCE: #{err}"; ''
|
145
|
-
end
|
146
|
-
|
147
|
-
iconv
|
148
|
-
end
|
149
|
-
|
150
|
-
def iconv_from(from = source_encoding)
|
151
|
-
iconv_from_to(from, INTERMEDIATE_ENCODING)
|
152
|
-
end
|
153
|
-
|
154
|
-
def iconv_to(to = target_encoding)
|
155
|
-
iconv_from_to(INTERMEDIATE_ENCODING, to)
|
133
|
+
def encode(string, source, target)
|
134
|
+
string.encode(target, source)
|
135
|
+
rescue Encoding::UndefinedConversionError => err
|
136
|
+
warn "ILLEGAL INPUT SEQUENCE: #{err.error_char}"
|
156
137
|
end
|
157
138
|
|
158
|
-
def map(char, charmap
|
139
|
+
def map(char, charmap)
|
159
140
|
unless map = charmap[[char]]
|
160
141
|
unless map = charmap[[char, c = input.getc]]
|
161
142
|
input.ungetc(c) if c
|
data/lib/cmess/cinderella.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
14
16
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -39,34 +41,33 @@ module CMess::Cinderella
|
|
39
41
|
|
40
42
|
extend self
|
41
43
|
|
42
|
-
VERSION = '0.0
|
44
|
+
VERSION = '0.1.0'
|
43
45
|
|
44
46
|
DEFAULT_CSETS_DIR = File.join(CMess::DATA_DIR, 'csets')
|
45
47
|
|
46
48
|
def pick(options)
|
47
|
-
CMess.ensure_options!(options,
|
49
|
+
input, pot, crop, source, target, chars = CMess.ensure_options!(options,
|
48
50
|
:input, :pot, :crop, :source_encoding, :target_encoding, :chars
|
49
51
|
)
|
50
52
|
|
51
53
|
encoded = {}
|
52
|
-
|
53
|
-
|
54
|
-
options[:chars].each { |char|
|
55
|
-
begin
|
56
|
-
encoded[iconv.iconv(char)] = char
|
57
|
-
rescue Iconv::IllegalSequence
|
58
|
-
end
|
59
|
-
}
|
54
|
+
chars.each { |char| encoded[encode(char, source, target)] = char }
|
60
55
|
|
61
56
|
regexp = Regexp.union(*encoded.keys)
|
62
|
-
pot, crop, repair = options.values_at(:pot, :crop, :repair)
|
63
57
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
58
|
+
input.each { |line|
|
59
|
+
out = line =~ regexp ? crop : pot or next
|
60
|
+
|
61
|
+
line.gsub!(regexp) { |m| encoded[m] } if repair
|
62
|
+
out.puts(line)
|
69
63
|
}
|
70
64
|
end
|
71
65
|
|
66
|
+
private
|
67
|
+
|
68
|
+
def encode(string, source, target)
|
69
|
+
string.encode(target, source)
|
70
|
+
rescue Encoding::UndefinedConversionError
|
71
|
+
end
|
72
|
+
|
72
73
|
end
|
data/lib/cmess/cli.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
14
16
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -102,18 +104,12 @@ module CMess::CLI
|
|
102
104
|
end
|
103
105
|
|
104
106
|
def determine_system_encoding
|
105
|
-
ENV.user_encoding ||
|
106
|
-
|
107
|
-
abort <<-EOT
|
107
|
+
ENV.user_encoding || lambda {
|
108
|
+
abort <<-EOT
|
108
109
|
Your system's encoding couldn't be determined automatically -- please specify
|
109
110
|
it explicitly via the ENCODING environment variable or via the '-t' option.
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
def dummy.to_s; 'NOT FOUND' end
|
114
|
-
|
115
|
-
dummy
|
116
|
-
end
|
111
|
+
EOT
|
112
|
+
}.tap { |dummy| def dummy.to_s; 'NOT FOUND'; end }
|
117
113
|
end
|
118
114
|
|
119
115
|
def cli
|
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
14
16
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -33,43 +35,38 @@ module CMess::DecodeEntities
|
|
33
35
|
|
34
36
|
extend self
|
35
37
|
|
36
|
-
VERSION = '0.0
|
38
|
+
VERSION = '0.1.0'
|
37
39
|
|
38
40
|
# HTMLEntities requires UTF-8
|
39
|
-
|
40
|
-
|
41
|
-
ICONV_DUMMY = begin
|
42
|
-
dummy = Object.new
|
43
|
-
|
44
|
-
def dummy.iconv(string)
|
45
|
-
string
|
46
|
-
end
|
47
|
-
|
48
|
-
dummy
|
49
|
-
end
|
41
|
+
ENCODING = 'UTF-8'
|
50
42
|
|
51
43
|
DEFAULT_FLAVOUR = 'xml-safe'
|
52
44
|
|
53
45
|
def decode(options)
|
54
|
-
input, output,
|
46
|
+
input, output, source = CMess.ensure_options!(options,
|
55
47
|
:input, :output, :source_encoding
|
56
48
|
)
|
57
49
|
|
58
|
-
|
59
|
-
|
60
|
-
iconv_in = source_encoding != INTERMEDIATE_ENCODING ?
|
61
|
-
Iconv.new(INTERMEDIATE_ENCODING, source_encoding) : ICONV_DUMMY
|
50
|
+
target, entities, encoding = options[:target_encoding] || source,
|
51
|
+
HTMLEntities.new(options[:flavour] || DEFAULT_FLAVOUR), ENCODING
|
62
52
|
|
63
|
-
|
64
|
-
Iconv.new(target_encoding, INTERMEDIATE_ENCODING) : ICONV_DUMMY
|
65
|
-
|
66
|
-
html_entities = HTMLEntities.new(options[:flavour] || DEFAULT_FLAVOUR)
|
53
|
+
skip_source, skip_target = source == encoding, target == encoding
|
67
54
|
|
68
55
|
input.each { |line|
|
69
|
-
|
56
|
+
line = encode(line, source, encoding) unless skip_source
|
57
|
+
line = entities.decode(line)
|
58
|
+
line = encode(line, encoding, target) unless skip_target
|
59
|
+
|
60
|
+
output.puts(line)
|
70
61
|
}
|
71
62
|
end
|
72
63
|
|
64
|
+
private
|
65
|
+
|
66
|
+
def encode(string, source, target)
|
67
|
+
string.encode(target, source)
|
68
|
+
end
|
69
|
+
|
73
70
|
end
|
74
71
|
|
75
72
|
class HTMLEntities # :nodoc:
|
data/lib/cmess/guess_encoding.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# Contributors: #
|
14
16
|
# John Vorhauer <john@vorhauer.de> (idea and original implementation #
|
@@ -38,7 +40,7 @@ require 'cmess'
|
|
38
40
|
|
39
41
|
module CMess::GuessEncoding
|
40
42
|
|
41
|
-
VERSION = '0.
|
43
|
+
VERSION = '0.2.0'
|
42
44
|
|
43
45
|
autoload :Encoding, 'cmess/guess_encoding/encoding'
|
44
46
|
autoload :Manual, 'cmess/guess_encoding/manual'
|
@@ -5,12 +5,14 @@
|
|
5
5
|
# #
|
6
6
|
# A component of cmess, the encoding tool-box. #
|
7
7
|
# #
|
8
|
-
# Copyright (C)
|
8
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
9
9
|
# Albertus-Magnus-Platz, #
|
10
10
|
# 50923 Cologne, Germany #
|
11
11
|
# #
|
12
|
+
# Copyright (C) 2013 Jens Wille #
|
13
|
+
# #
|
12
14
|
# Authors: #
|
13
|
-
# Jens Wille <jens.wille@
|
15
|
+
# Jens Wille <jens.wille@gmail.com> #
|
14
16
|
# #
|
15
17
|
# Contributors: #
|
16
18
|
# John Vorhauer <john@vorhauer.de> (idea and original implementation #
|
@@ -32,8 +34,6 @@
|
|
32
34
|
###############################################################################
|
33
35
|
#++
|
34
36
|
|
35
|
-
$KCODE = 'u' if RUBY_VERSION < '1.9'
|
36
|
-
|
37
37
|
require 'cmess/guess_encoding'
|
38
38
|
|
39
39
|
require 'yaml'
|
@@ -57,9 +57,6 @@ class CMess::GuessEncoding::Automatic
|
|
57
57
|
|
58
58
|
include CMess::GuessEncoding::Encoding
|
59
59
|
|
60
|
-
# Creates a converter for desired encoding (from UTF-8).
|
61
|
-
ICONV_FOR = Hash.new { |h, k| h[k] = Iconv.new(k, UTF_8) }
|
62
|
-
|
63
60
|
# Single-byte encodings to test statistically by TEST_CHARS.
|
64
61
|
TEST_ENCODINGS = [
|
65
62
|
MACINTOSH,
|
@@ -87,22 +84,13 @@ class CMess::GuessEncoding::Automatic
|
|
87
84
|
CHARS_TO_TEST = (
|
88
85
|
'€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂ' <<
|
89
86
|
'ÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
|
90
|
-
).
|
87
|
+
).chars.to_a
|
91
88
|
|
92
89
|
# Map TEST_ENCODINGS to respectively encoded CHARS_TO_TEST.
|
93
|
-
TEST_CHARS = Hash.new { |
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
begin
|
98
|
-
byte = *ICONV_FOR[encoding].iconv(char).unpack('C')
|
99
|
-
rescue Iconv::IllegalSequence
|
100
|
-
end
|
101
|
-
}.compact
|
102
|
-
|
103
|
-
TEST_ENCODINGS << encoding unless TEST_ENCODINGS.include?(encoding)
|
104
|
-
|
105
|
-
hash[encoding] = encchars
|
90
|
+
TEST_CHARS = Hash.new { |h, k|
|
91
|
+
e, f = self[k], UTF_8
|
92
|
+
TEST_ENCODINGS << e unless TEST_ENCODINGS.include?(e)
|
93
|
+
h[e] = CHARS_TO_TEST.flat_map { |c| c.encode(e, f).unpack('C') }
|
106
94
|
}.update(YAML.load_file(File.join(CMess::DATA_DIR, 'test_chars.yaml')))
|
107
95
|
|
108
96
|
# Relative count of TEST_CHARS must exceed this threshold to yield
|
@@ -134,10 +122,10 @@ class CMess::GuessEncoding::Automatic
|
|
134
122
|
|
135
123
|
def encoding(*encodings, &block)
|
136
124
|
encodings.flatten.each { |encoding|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
125
|
+
unless @supported_encodings.include?(encoding)
|
126
|
+
@supported_encodings << encoding
|
127
|
+
@encoding_guessers << block
|
128
|
+
end
|
141
129
|
}
|
142
130
|
end
|
143
131
|
|
@@ -146,10 +134,10 @@ class CMess::GuessEncoding::Automatic
|
|
146
134
|
end
|
147
135
|
|
148
136
|
def bom_encoding(encoding, &block)
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
137
|
+
unless @supported_boms.include?(encoding)
|
138
|
+
@supported_boms << encoding
|
139
|
+
@bom_guessers << lambda { |*| encoding if instance_eval(&block) }
|
140
|
+
end
|
153
141
|
end
|
154
142
|
|
155
143
|
def supported_bom?(encoding)
|
@@ -158,30 +146,30 @@ class CMess::GuessEncoding::Automatic
|
|
158
146
|
|
159
147
|
end
|
160
148
|
|
161
|
-
attr_reader :input, :chunk_size, :byte_count, :byte_total, :first_byte
|
162
|
-
|
163
149
|
def initialize(input, chunk_size = nil)
|
164
150
|
@input = case input
|
165
151
|
when IO then input
|
166
152
|
when String then StringIO.new(input)
|
167
|
-
else
|
168
|
-
|
153
|
+
else raise ArgumentError,
|
154
|
+
"don't know how to handle input of type #{input.class}"
|
169
155
|
end
|
170
156
|
|
171
157
|
@chunk_size = chunk_size
|
172
158
|
end
|
173
159
|
|
160
|
+
attr_reader :input, :chunk_size, :byte_count, :byte_total, :first_byte
|
161
|
+
|
174
162
|
def guess(ignore_bom = false)
|
175
163
|
return bom if bom && !ignore_bom
|
176
164
|
|
177
165
|
while read
|
178
166
|
encoding_guessers.each { |block|
|
179
|
-
encoding = instance_eval(&block)
|
180
|
-
|
167
|
+
if encoding = instance_eval(&block) and supported_encoding?(encoding)
|
168
|
+
return encoding
|
169
|
+
end
|
181
170
|
}
|
182
171
|
end
|
183
172
|
|
184
|
-
# nothing suitable found :-(
|
185
173
|
UNKNOWN
|
186
174
|
end
|
187
175
|
|
@@ -206,14 +194,13 @@ class CMess::GuessEncoding::Automatic
|
|
206
194
|
end
|
207
195
|
|
208
196
|
bom_guessers.each { |block|
|
209
|
-
encoding = instance_eval(&block)
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
197
|
+
if encoding = instance_eval(&block) and supported_encoding?(encoding)
|
198
|
+
return encoding
|
199
|
+
else
|
200
|
+
input.rewind
|
201
|
+
end
|
214
202
|
}
|
215
203
|
|
216
|
-
# nothing suitable found :-(
|
217
204
|
nil
|
218
205
|
end
|
219
206
|
|
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# Contributors: #
|
14
16
|
# John Vorhauer <john@vorhauer.de> (idea and original implementation #
|
@@ -50,9 +52,7 @@ module CMess::GuessEncoding::Encoding
|
|
50
52
|
private
|
51
53
|
|
52
54
|
def get_all_encodings
|
53
|
-
|
54
|
-
get_or_set_encoding_const(encoding.sub(%r{/*\z}, ''))
|
55
|
-
}
|
55
|
+
Encoding.name_list.map { |encoding| get_or_set_encoding_const(encoding) }
|
56
56
|
end
|
57
57
|
|
58
58
|
def const_name_for(encoding)
|
@@ -3,12 +3,14 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C)
|
6
|
+
# Copyright (C) 2008-2012 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
10
|
+
# Copyright (C) 2013 Jens Wille #
|
11
|
+
# #
|
10
12
|
# Authors: #
|
11
|
-
# Jens Wille <jens.wille@
|
13
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
14
|
# #
|
13
15
|
# Contributors: #
|
14
16
|
# John Vorhauer <john@vorhauer.de> (idea and original implementation #
|
@@ -57,13 +59,13 @@ module CMess::GuessEncoding::Manual
|
|
57
59
|
CP1252,
|
58
60
|
CP850,
|
59
61
|
CP852,
|
60
|
-
CP856,
|
61
62
|
UTF_8
|
62
63
|
]
|
63
64
|
|
64
65
|
# Likely candidates to suggest to the user
|
65
66
|
CANDIDATES = [
|
66
67
|
ANSI_X34,
|
68
|
+
CP856,
|
67
69
|
EBCDIC_AT_DE,
|
68
70
|
EBCDIC_US,
|
69
71
|
EUC_JP,
|
@@ -95,19 +97,18 @@ module CMess::GuessEncoding::Manual
|
|
95
97
|
# move target encoding to front
|
96
98
|
encodings.in_order!(target)
|
97
99
|
|
98
|
-
max_length = encodings.max(:length)
|
100
|
+
max_length, reverse = encodings.max(:length), options[:reverse]
|
99
101
|
|
100
102
|
encodings.each { |encoding|
|
103
|
+
args = [target, encoding]
|
104
|
+
args.reverse! if reverse
|
105
|
+
|
101
106
|
converted = begin
|
102
|
-
|
103
|
-
rescue
|
104
|
-
"ILLEGAL INPUT SEQUENCE: #{err}"
|
105
|
-
rescue
|
106
|
-
|
107
|
-
raise ArgumentError, "invalid encoding: #{encoding}"
|
108
|
-
else
|
109
|
-
'INVALID ENCODING!'
|
110
|
-
end
|
107
|
+
input.encode(*args)
|
108
|
+
rescue Encoding::UndefinedConversionError => err
|
109
|
+
"ILLEGAL INPUT SEQUENCE: #{err.error_char}"
|
110
|
+
rescue Encoding::ConverterNotFoundError => err
|
111
|
+
err.to_s
|
111
112
|
end
|
112
113
|
|
113
114
|
puts "%-#{max_length}s : %s" % [encoding, converted]
|
data/lib/cmess/version.rb
CHANGED
metadata
CHANGED
@@ -1,153 +1,137 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 3
|
9
|
-
- 1
|
10
|
-
version: 0.3.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- Jens Wille
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2013-08-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
21
14
|
name: ruby-nuggets
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
- - ">="
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
hash: 21
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
- 3
|
32
|
-
- 3
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
33
19
|
version: 0.3.3
|
34
20
|
type: :runtime
|
35
|
-
version_requirements: *id001
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: htmlentities
|
38
21
|
prerelease: false
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.3.3
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: htmlentities
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
48
34
|
type: :runtime
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: |
|
42
|
+
Assist with handling messed up encodings (Currently includes the
|
43
|
+
following tools: bconv, cinderella, decode_entities, guess_encoding)
|
44
|
+
email: jens.wille@gmail.com
|
45
|
+
executables:
|
53
46
|
- bconv
|
47
|
+
- cinderella
|
54
48
|
- decode_entities
|
55
49
|
- guess_encoding
|
56
|
-
- cinderella
|
57
50
|
extensions: []
|
58
|
-
|
59
|
-
extra_rdoc_files:
|
51
|
+
extra_rdoc_files:
|
60
52
|
- README
|
61
53
|
- COPYING
|
62
54
|
- ChangeLog
|
63
|
-
files:
|
55
|
+
files:
|
64
56
|
- lib/cmess.rb
|
65
|
-
- lib/cmess/guess_encoding/automatic.rb
|
66
|
-
- lib/cmess/guess_encoding/encoding.rb
|
67
|
-
- lib/cmess/guess_encoding/manual.rb
|
68
57
|
- lib/cmess/bconv.rb
|
69
|
-
- lib/cmess/cli.rb
|
70
58
|
- lib/cmess/cinderella.rb
|
71
|
-
- lib/cmess/
|
59
|
+
- lib/cmess/cli.rb
|
72
60
|
- lib/cmess/decode_entities.rb
|
61
|
+
- lib/cmess/guess_encoding.rb
|
62
|
+
- lib/cmess/guess_encoding/automatic.rb
|
63
|
+
- lib/cmess/guess_encoding/encoding.rb
|
64
|
+
- lib/cmess/guess_encoding/manual.rb
|
73
65
|
- lib/cmess/version.rb
|
74
66
|
- bin/bconv
|
67
|
+
- bin/cinderella
|
75
68
|
- bin/decode_entities
|
76
69
|
- bin/guess_encoding
|
77
|
-
-
|
78
|
-
- data/csets/latin1.yaml
|
79
|
-
- data/csets/iso_8859-15.yaml
|
70
|
+
- data/chartab.yaml
|
80
71
|
- data/csets/iso_8859-1.yaml
|
81
|
-
- data/csets/
|
72
|
+
- data/csets/iso_8859-15.yaml
|
73
|
+
- data/csets/latin1.yaml
|
82
74
|
- data/csets/unicode/basic_latin.yaml
|
75
|
+
- data/csets/unicode/cyrillic-supplement.yaml
|
76
|
+
- data/csets/unicode/cyrillic.yaml
|
77
|
+
- data/csets/unicode/greek.yaml
|
83
78
|
- data/csets/unicode/ipa_extensions.yaml
|
84
|
-
- data/csets/unicode/
|
79
|
+
- data/csets/unicode/latin-extended-c.yaml
|
85
80
|
- data/csets/unicode/latin-extended-d.yaml
|
86
|
-
- data/csets/unicode/
|
81
|
+
- data/csets/unicode/latin_1_supplement.yaml
|
82
|
+
- data/csets/unicode/latin_extended_a.yaml
|
87
83
|
- data/csets/unicode/latin_extended_additional.yaml
|
88
|
-
- data/csets/unicode/
|
89
|
-
- data/csets/unicode/
|
84
|
+
- data/csets/unicode/latin_extended_b.yaml
|
85
|
+
- data/csets/unicode/letterlike_symbols.yaml
|
90
86
|
- data/csets/unicode/spacing_modifier_letters.yaml
|
91
|
-
- data/csets/unicode/cyrillic-supplement.yaml
|
92
|
-
- data/csets/unicode/cyrillic.yaml
|
93
|
-
- data/csets/unicode/latin_1_supplement.yaml
|
94
87
|
- data/csets/utf-8.yaml
|
95
88
|
- data/csets/utf8.yaml
|
96
89
|
- data/test_chars.yaml
|
97
|
-
-
|
98
|
-
- README
|
90
|
+
- COPYING
|
99
91
|
- ChangeLog
|
92
|
+
- README
|
100
93
|
- Rakefile
|
101
|
-
- COPYING
|
102
|
-
- example/guess_encoding/en.utf-8.txt
|
103
|
-
- example/guess_encoding/de.utf-8.txt
|
104
|
-
- example/guess_encoding/it.utf-8.txt
|
105
|
-
- example/guess_encoding/check_results
|
106
|
-
- example/guess_encoding/fr.utf-8.txt
|
107
|
-
- example/cinderella/empty6-slash_repaired.txt
|
108
|
-
- example/cinderella/empty6-slash.txt
|
109
94
|
- example/cinderella/crop
|
110
|
-
- example/cinderella/pot
|
111
95
|
- example/cinderella/crop_repaired
|
112
|
-
|
113
|
-
|
114
|
-
|
96
|
+
- example/cinderella/empty6-slash.txt
|
97
|
+
- example/cinderella/empty6-slash_repaired.txt
|
98
|
+
- example/cinderella/pot
|
99
|
+
- example/guess_encoding/check_results
|
100
|
+
- example/guess_encoding/de.utf-8.txt
|
101
|
+
- example/guess_encoding/en.utf-8.txt
|
102
|
+
- example/guess_encoding/fr.utf-8.txt
|
103
|
+
- example/guess_encoding/it.utf-8.txt
|
104
|
+
homepage: http://github.com/blackwinter/cmess
|
105
|
+
licenses:
|
106
|
+
- AGPL
|
107
|
+
metadata: {}
|
115
108
|
post_install_message:
|
116
|
-
rdoc_options:
|
117
|
-
- --main
|
118
|
-
- README
|
109
|
+
rdoc_options:
|
119
110
|
- --charset
|
120
111
|
- UTF-8
|
112
|
+
- --line-numbers
|
121
113
|
- --all
|
122
114
|
- --title
|
123
|
-
- cmess Application documentation (v0.
|
124
|
-
- --
|
125
|
-
|
115
|
+
- cmess Application documentation (v0.4.0)
|
116
|
+
- --main
|
117
|
+
- README
|
118
|
+
require_paths:
|
126
119
|
- lib
|
127
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
none: false
|
138
|
-
requirements:
|
139
|
-
- - ">="
|
140
|
-
- !ruby/object:Gem::Version
|
141
|
-
hash: 3
|
142
|
-
segments:
|
143
|
-
- 0
|
144
|
-
version: "0"
|
120
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 1.9.2
|
125
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - '>='
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
145
130
|
requirements: []
|
146
|
-
|
147
|
-
|
148
|
-
rubygems_version: 1.8.8
|
131
|
+
rubyforge_project:
|
132
|
+
rubygems_version: 2.0.6
|
149
133
|
signing_key:
|
150
|
-
specification_version:
|
151
|
-
summary:
|
134
|
+
specification_version: 4
|
135
|
+
summary: 'Assist with handling messed up encodings (Currently includes the following
|
136
|
+
tools: bconv, cinderella, decode_entities, guess_encoding)'
|
152
137
|
test_files: []
|
153
|
-
|