cmess 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +1 -1
- data/data/test_chars.yaml +26 -0
- data/lib/cmess/decode_entities.rb +1 -1
- data/lib/cmess/guess_encoding.rb +1 -1
- data/lib/cmess/guess_encoding/automatic.rb +14 -1
- data/lib/cmess/guess_encoding/encoding.rb +7 -1
- data/lib/cmess/version.rb +1 -1
- metadata +37 -37
data/README
CHANGED
data/data/test_chars.yaml
CHANGED
@@ -5,8 +5,34 @@
|
|
5
5
|
|
6
6
|
"ISO-8859-1": [129, 141, 143, 144, 157, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255]
|
7
7
|
|
8
|
+
"ISO-8859-2": [169, 174, 185, 190, 164, 167, 168, 173, 176, 180, 184, 193, 194, 196, 199, 201, 203, 205, 206, 211, 212, 214, 215, 218, 220, 221, 223, 225, 226, 228, 231, 233, 235, 237, 238, 243, 244, 246, 247, 250, 252, 253]
|
9
|
+
|
10
|
+
"ISO-8859-3": [163, 164, 167, 168, 173, 176, 178, 179, 180, 181, 183, 184, 189, 192, 193, 194, 196, 199, 200, 201, 202, 203, 204, 205, 206, 207, 209, 210, 211, 212, 214, 215, 217, 218, 219, 220, 223, 224, 225, 226, 228, 231, 232, 233, 234, 235, 236, 237, 238, 239, 241, 242, 243, 244, 246, 247, 249, 250, 251, 252]
|
11
|
+
|
12
|
+
"ISO-8859-4": [169, 174, 185, 190, 164, 167, 168, 173, 175, 176, 180, 184, 193, 194, 195, 196, 197, 198, 201, 203, 205, 206, 212, 213, 214, 215, 216, 218, 219, 220, 223, 225, 226, 227, 228, 229, 230, 233, 235, 237, 238, 244, 245, 246, 247, 248, 250, 251, 252]
|
13
|
+
|
14
|
+
"ISO-8859-5": [253, 173]
|
15
|
+
|
16
|
+
"ISO-8859-6": [164, 173]
|
17
|
+
|
18
|
+
"ISO-8859-7": [164, 161, 162, 163, 166, 167, 168, 169, 171, 172, 173, 176, 177, 178, 179, 183, 187, 189]
|
19
|
+
|
20
|
+
"ISO-8859-8": [162, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 187, 188, 189, 190, 170, 186]
|
21
|
+
|
22
|
+
"ISO-8859-9": [161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 255]
|
23
|
+
|
24
|
+
"ISO-8859-10": [170, 172, 186, 188, 167, 173, 176, 183, 193, 194, 195, 196, 197, 198, 201, 203, 205, 206, 207, 208, 211, 212, 213, 214, 216, 218, 219, 220, 221, 222, 223, 225, 226, 227, 228, 229, 230, 233, 235, 237, 238, 239, 240, 243, 244, 245, 246, 248, 250, 251, 252, 253, 254]
|
25
|
+
|
26
|
+
"ISO-8859-11": []
|
27
|
+
|
28
|
+
"ISO-8859-13": [165, 208, 222, 255, 180, 161, 240, 254, 162, 163, 164, 166, 167, 169, 171, 172, 173, 174, 176, 177, 178, 179, 181, 182, 183, 185, 187, 188, 189, 190, 196, 197, 175, 201, 211, 213, 214, 215, 168, 220, 223, 228, 229, 191, 233, 243, 245, 246, 247, 184, 252]
|
29
|
+
|
30
|
+
"ISO-8859-14": [175, 163, 167, 169, 173, 174, 182, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 241, 242, 243, 244, 245, 246, 248, 249, 250, 251, 252, 253, 255]
|
31
|
+
|
8
32
|
"ISO-8859-15": [164, 129, 166, 188, 141, 180, 143, 144, 168, 189, 157, 184, 190, 161, 162, 163, 165, 167, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 181, 182, 183, 185, 186, 187, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255]
|
9
33
|
|
34
|
+
"ISO-8859-16": [164, 165, 166, 188, 180, 181, 168, 189, 184, 190, 167, 169, 171, 173, 176, 177, 182, 183, 187, 192, 193, 194, 196, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 210, 211, 212, 214, 217, 218, 219, 220, 223, 224, 225, 226, 228, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 242, 243, 244, 246, 249, 250, 251, 252, 255]
|
35
|
+
|
10
36
|
"CP1252": [128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 142, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 158, 159, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255]
|
11
37
|
|
12
38
|
"CP850": [159, 173, 189, 156, 207, 190, 221, 245, 249, 184, 166, 174, 170, 240, 169, 238, 248, 241, 253, 252, 239, 230, 244, 250, 247, 251, 167, 175, 172, 171, 243, 168, 183, 181, 182, 199, 142, 143, 146, 128, 212, 144, 210, 211, 222, 214, 215, 216, 209, 165, 227, 224, 226, 229, 153, 158, 157, 235, 233, 234, 154, 237, 232, 225, 133, 160, 131, 198, 132, 134, 145, 135, 138, 130, 136, 137, 141, 161, 140, 139, 208, 164, 149, 162, 147, 228, 148, 246, 155, 151, 163, 150, 129, 236, 231, 152]
|
data/lib/cmess/guess_encoding.rb
CHANGED
@@ -61,7 +61,20 @@ class CMess::GuessEncoding::Automatic
|
|
61
61
|
TEST_ENCODINGS = [
|
62
62
|
MACINTOSH,
|
63
63
|
ISO_8859_1,
|
64
|
+
ISO_8859_2,
|
65
|
+
ISO_8859_3,
|
66
|
+
ISO_8859_4,
|
67
|
+
ISO_8859_5,
|
68
|
+
ISO_8859_6,
|
69
|
+
ISO_8859_7,
|
70
|
+
ISO_8859_8,
|
71
|
+
ISO_8859_9,
|
72
|
+
ISO_8859_10,
|
73
|
+
ISO_8859_11,
|
74
|
+
ISO_8859_13,
|
75
|
+
ISO_8859_14,
|
64
76
|
ISO_8859_15,
|
77
|
+
ISO_8859_16,
|
65
78
|
CP1252,
|
66
79
|
CP850,
|
67
80
|
MS_ANSI
|
@@ -75,7 +88,7 @@ class CMess::GuessEncoding::Automatic
|
|
75
88
|
|
76
89
|
# Map TEST_ENCODINGS to respectively encoded CHARS_TO_TEST
|
77
90
|
TEST_CHARS = Hash.new { |hash, encoding|
|
78
|
-
encoding =
|
91
|
+
encoding = get_or_set_encoding_const(encoding)
|
79
92
|
encchars = CHARS_TO_TEST.map { |char|
|
80
93
|
begin
|
81
94
|
byte = *ICONV_FOR[encoding].iconv(char).unpack('C')
|
@@ -41,6 +41,10 @@ module CMess::GuessEncoding::Encoding
|
|
41
41
|
const_set(:ALL_ENCODINGS, get_all_encodings)
|
42
42
|
end
|
43
43
|
|
44
|
+
def [](encoding)
|
45
|
+
get_or_set_encoding_const(encoding)
|
46
|
+
end
|
47
|
+
|
44
48
|
private
|
45
49
|
|
46
50
|
def get_all_encodings
|
@@ -64,7 +68,9 @@ module CMess::GuessEncoding::Encoding
|
|
64
68
|
|
65
69
|
%w[
|
66
70
|
UNKNOWN ASCII MACINTOSH
|
67
|
-
ISO-8859-1 ISO-8859-2 ISO-8859-
|
71
|
+
ISO-8859-1 ISO-8859-2 ISO-8859-3 ISO-8859-4 ISO-8859-5
|
72
|
+
ISO-8859-6 ISO-8859-7 ISO-8859-8 ISO-8859-9 ISO-8859-10
|
73
|
+
ISO-8859-11 ISO-8859-13 ISO-8859-14 ISO-8859-15 ISO-8859-16
|
68
74
|
CP1250 CP1251 CP1252 CP850 CP852 CP856
|
69
75
|
UTF-8 UTF-16 UTF-16BE UTF-16LE UTF-32 UTF-32BE UTF-32LE
|
70
76
|
UTF-7 UTF-EBCDIC SCSU BOCU-1
|
data/lib/cmess/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-10-24 00:00:00 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -35,10 +35,10 @@ dependencies:
|
|
35
35
|
description: "\n Assist with handling messed up encodings (Currently includes the\n following tools: bconv, cinderella, decode_entities, guess_encoding)\n "
|
36
36
|
email: jens.wille@uni-koeln.de
|
37
37
|
executables:
|
38
|
-
- guess_encoding
|
39
38
|
- cinderella
|
40
|
-
- bconv
|
41
39
|
- decode_entities
|
40
|
+
- guess_encoding
|
41
|
+
- bconv
|
42
42
|
extensions: []
|
43
43
|
|
44
44
|
extra_rdoc_files:
|
@@ -46,69 +46,69 @@ extra_rdoc_files:
|
|
46
46
|
- ChangeLog
|
47
47
|
- README
|
48
48
|
files:
|
49
|
-
- lib/cmess/guess_encoding
|
50
|
-
- lib/cmess/guess_encoding/automatic.rb
|
49
|
+
- lib/cmess/guess_encoding.rb
|
51
50
|
- lib/cmess/guess_encoding/encoding.rb
|
51
|
+
- lib/cmess/guess_encoding/automatic.rb
|
52
|
+
- lib/cmess/guess_encoding/manual.rb
|
52
53
|
- lib/cmess/decode_entities.rb
|
54
|
+
- lib/cmess/version.rb
|
53
55
|
- lib/cmess/cli.rb
|
54
56
|
- lib/cmess/cinderella.rb
|
55
57
|
- lib/cmess/bconv.rb
|
56
|
-
- lib/cmess/guess_encoding.rb
|
57
|
-
- lib/cmess/version.rb
|
58
58
|
- lib/cmess.rb
|
59
|
-
- bin/guess_encoding
|
60
59
|
- bin/cinderella
|
61
|
-
- bin/bconv
|
62
60
|
- bin/decode_entities
|
63
|
-
-
|
61
|
+
- bin/guess_encoding
|
62
|
+
- bin/bconv
|
64
63
|
- COPYING
|
65
|
-
-
|
64
|
+
- Rakefile
|
66
65
|
- README
|
67
|
-
-
|
68
|
-
- example/guess_encoding/fr.utf-8.txt
|
69
|
-
- example/guess_encoding/check_results
|
70
|
-
- example/guess_encoding/it.utf-8.txt
|
71
|
-
- example/guess_encoding/de.utf-8.txt
|
72
|
-
- example/cinderella/pot
|
66
|
+
- ChangeLog
|
73
67
|
- example/cinderella/empty6-slash_repaired.txt
|
74
|
-
- example/cinderella/crop
|
75
68
|
- example/cinderella/empty6-slash.txt
|
69
|
+
- example/cinderella/crop
|
76
70
|
- example/cinderella/crop_repaired
|
71
|
+
- example/cinderella/pot
|
72
|
+
- example/guess_encoding/en.utf-8.txt
|
73
|
+
- example/guess_encoding/check_results
|
74
|
+
- example/guess_encoding/de.utf-8.txt
|
75
|
+
- example/guess_encoding/it.utf-8.txt
|
76
|
+
- example/guess_encoding/fr.utf-8.txt
|
77
|
+
- data/test_chars.yaml
|
77
78
|
- data/chartab.yaml
|
78
|
-
- data/csets/
|
79
|
-
- data/csets/
|
80
|
-
- data/csets/
|
81
|
-
- data/csets/unicode/
|
79
|
+
- data/csets/iso_8859-15.yaml
|
80
|
+
- data/csets/latin1.yaml
|
81
|
+
- data/csets/utf8.yaml
|
82
|
+
- data/csets/unicode/letterlike_symbols.yaml
|
82
83
|
- data/csets/unicode/latin_1_supplement.yaml
|
83
|
-
- data/csets/unicode/
|
84
|
-
- data/csets/unicode/
|
84
|
+
- data/csets/unicode/cyrillic-supplement.yaml
|
85
|
+
- data/csets/unicode/latin_extended_additional.yaml
|
86
|
+
- data/csets/unicode/ipa_extensions.yaml
|
85
87
|
- data/csets/unicode/latin-extended-d.yaml
|
86
88
|
- data/csets/unicode/basic_latin.yaml
|
87
|
-
- data/csets/unicode/
|
89
|
+
- data/csets/unicode/latin_extended_b.yaml
|
90
|
+
- data/csets/unicode/latin_extended_a.yaml
|
91
|
+
- data/csets/unicode/latin-extended-c.yaml
|
92
|
+
- data/csets/unicode/spacing_modifier_letters.yaml
|
88
93
|
- data/csets/unicode/cyrillic.yaml
|
89
|
-
- data/csets/unicode/
|
90
|
-
- data/csets/unicode/letterlike_symbols.yaml
|
94
|
+
- data/csets/unicode/greek.yaml
|
91
95
|
- data/csets/iso_8859-1.yaml
|
92
|
-
- data/csets/utf8.yaml
|
93
96
|
- data/csets/utf-8.yaml
|
94
|
-
- data/csets/latin1.yaml
|
95
|
-
- data/csets/iso_8859-15.yaml
|
96
|
-
- data/test_chars.yaml
|
97
97
|
has_rdoc: true
|
98
98
|
homepage: http://prometheus.rubyforge.org/cmess
|
99
99
|
licenses: []
|
100
100
|
|
101
101
|
post_install_message:
|
102
102
|
rdoc_options:
|
103
|
-
- --main
|
104
|
-
- README
|
105
|
-
- --line-numbers
|
106
|
-
- --inline-source
|
107
103
|
- --title
|
108
104
|
- cmess Application documentation
|
109
|
-
- --
|
105
|
+
- --inline-source
|
110
106
|
- --charset
|
111
107
|
- UTF-8
|
108
|
+
- --main
|
109
|
+
- README
|
110
|
+
- --all
|
111
|
+
- --line-numbers
|
112
112
|
require_paths:
|
113
113
|
- lib
|
114
114
|
required_ruby_version: !ruby/object:Gem::Requirement
|