cmess 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +68 -81
- data/ChangeLog +28 -0
- data/README +23 -21
- data/Rakefile +15 -16
- data/bin/bconv +30 -47
- data/bin/cinderella +51 -68
- data/bin/decode_entities +28 -36
- data/bin/guess_encoding +53 -81
- data/lib/cmess.rb +35 -26
- data/lib/cmess/bconv.rb +23 -25
- data/lib/cmess/cinderella.rb +21 -20
- data/lib/cmess/cli.rb +27 -17
- data/lib/cmess/decode_entities.rb +19 -20
- data/lib/cmess/guess_encoding.rb +20 -18
- data/lib/cmess/guess_encoding/automatic.rb +151 -125
- data/lib/cmess/guess_encoding/encoding.rb +16 -18
- data/lib/cmess/guess_encoding/manual.rb +26 -31
- data/lib/cmess/version.rb +2 -2
- metadata +25 -28
@@ -3,7 +3,7 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2011 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
@@ -15,40 +15,39 @@
|
|
15
15
|
# for automatic encoding detection) #
|
16
16
|
# #
|
17
17
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
18
|
-
# terms of the GNU General Public License as published by the Free
|
19
|
-
# Foundation; either version 3 of the License, or (at your option)
|
20
|
-
# version.
|
18
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
19
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
20
|
+
# any later version. #
|
21
21
|
# #
|
22
22
|
# cmess is distributed in the hope that it will be useful, but WITHOUT ANY #
|
23
23
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
24
|
-
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
25
|
-
# details.
|
24
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
25
|
+
# more details. #
|
26
26
|
# #
|
27
|
-
# You should have received a copy of the GNU General Public License
|
28
|
-
# with cmess. If not, see <http://www.gnu.org/licenses/>.
|
27
|
+
# You should have received a copy of the GNU Affero General Public License #
|
28
|
+
# along with cmess. If not, see <http://www.gnu.org/licenses/>. #
|
29
29
|
# #
|
30
30
|
###############################################################################
|
31
31
|
#++
|
32
32
|
|
33
|
-
require '
|
33
|
+
require 'cmess/guess_encoding'
|
34
34
|
|
35
|
-
require 'rubygems'
|
36
35
|
require 'nuggets/array/runiq'
|
37
36
|
require 'nuggets/array/in_order'
|
37
|
+
require 'nuggets/enumerable/minmax'
|
38
38
|
|
39
|
-
# Outputs given string (or line), being encoded in target encoding,
|
40
|
-
# various test encodings, thus allowing to identify the
|
41
|
-
# encoding by visually comparing the input string
|
39
|
+
# Outputs given string (or line), being encoded in target encoding,
|
40
|
+
# encoded in various test encodings, thus allowing to identify the
|
41
|
+
# (seemingly) correct encoding by visually comparing the input string
|
42
|
+
# with its desired appearance.
|
42
43
|
|
43
|
-
module CMess
|
44
|
-
module GuessEncoding
|
45
|
-
module Manual
|
44
|
+
module CMess::GuessEncoding::Manual
|
46
45
|
|
47
46
|
extend self
|
48
47
|
|
49
|
-
include Encoding
|
48
|
+
include CMess::GuessEncoding::Encoding
|
50
49
|
|
51
|
-
#
|
50
|
+
# Default encodings to try
|
52
51
|
ENCODINGS = [
|
53
52
|
ISO_8859_1,
|
54
53
|
ISO_8859_2,
|
@@ -62,7 +61,7 @@ module CMess
|
|
62
61
|
UTF_8
|
63
62
|
]
|
64
63
|
|
65
|
-
#
|
64
|
+
# Likely candidates to suggest to the user
|
66
65
|
CANDIDATES = [
|
67
66
|
ANSI_X34,
|
68
67
|
EBCDIC_AT_DE,
|
@@ -81,16 +80,14 @@ module CMess
|
|
81
80
|
UTF_32LE
|
82
81
|
]
|
83
82
|
|
84
|
-
def display(
|
85
|
-
target = target_encoding
|
83
|
+
def display(options)
|
84
|
+
input, target = CMess.ensure_options!(options, :input, :target_encoding)
|
86
85
|
|
87
|
-
encodings = (encodings || ENCODINGS) +
|
86
|
+
encodings = (options[:encodings] || ENCODINGS) +
|
87
|
+
(options[:additional_encodings] || [])
|
88
88
|
|
89
|
-
if encodings.
|
90
|
-
|
91
|
-
elsif encodings.delete('__COMMON__')
|
92
|
-
encodings.concat(CANDIDATES)
|
93
|
-
end
|
89
|
+
encodings.concat(all_encodings) if encodings.delete('__ALL__')
|
90
|
+
encodings.concat(CANDIDATES) if encodings.delete('__COMMON__')
|
94
91
|
|
95
92
|
# uniq with additional encodings staying at the end
|
96
93
|
encodings.runiq!
|
@@ -98,7 +95,7 @@ module CMess
|
|
98
95
|
# move target encoding to front
|
99
96
|
encodings.in_order!(target)
|
100
97
|
|
101
|
-
max_length = encodings.
|
98
|
+
max_length = encodings.max(:length)
|
102
99
|
|
103
100
|
encodings.each { |encoding|
|
104
101
|
converted = begin
|
@@ -109,7 +106,7 @@ module CMess
|
|
109
106
|
if encoding == target
|
110
107
|
raise ArgumentError, "invalid encoding: #{encoding}"
|
111
108
|
else
|
112
|
-
|
109
|
+
'INVALID ENCODING!'
|
113
110
|
end
|
114
111
|
end
|
115
112
|
|
@@ -117,6 +114,4 @@ module CMess
|
|
117
114
|
}
|
118
115
|
end
|
119
116
|
|
120
|
-
end
|
121
|
-
end
|
122
117
|
end
|
data/lib/cmess/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 19
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 3
|
9
|
+
- 0
|
10
|
+
version: 0.3.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jens Wille
|
@@ -15,8 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
19
|
-
default_executable:
|
18
|
+
date: 2011-07-25 00:00:00 Z
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
22
21
|
name: ruby-nuggets
|
@@ -58,9 +57,9 @@ executables:
|
|
58
57
|
extensions: []
|
59
58
|
|
60
59
|
extra_rdoc_files:
|
60
|
+
- README
|
61
61
|
- COPYING
|
62
62
|
- ChangeLog
|
63
|
-
- README
|
64
63
|
files:
|
65
64
|
- lib/cmess.rb
|
66
65
|
- lib/cmess/guess_encoding/automatic.rb
|
@@ -76,20 +75,6 @@ files:
|
|
76
75
|
- bin/decode_entities
|
77
76
|
- bin/guess_encoding
|
78
77
|
- bin/cinderella
|
79
|
-
- README
|
80
|
-
- ChangeLog
|
81
|
-
- Rakefile
|
82
|
-
- COPYING
|
83
|
-
- example/guess_encoding/en.utf-8.txt
|
84
|
-
- example/guess_encoding/de.utf-8.txt
|
85
|
-
- example/guess_encoding/it.utf-8.txt
|
86
|
-
- example/guess_encoding/check_results
|
87
|
-
- example/guess_encoding/fr.utf-8.txt
|
88
|
-
- example/cinderella/empty6-slash_repaired.txt
|
89
|
-
- example/cinderella/empty6-slash.txt
|
90
|
-
- example/cinderella/crop
|
91
|
-
- example/cinderella/pot
|
92
|
-
- example/cinderella/crop_repaired
|
93
78
|
- data/csets/latin1.yaml
|
94
79
|
- data/csets/iso_8859-15.yaml
|
95
80
|
- data/csets/iso_8859-1.yaml
|
@@ -110,21 +95,33 @@ files:
|
|
110
95
|
- data/csets/utf8.yaml
|
111
96
|
- data/test_chars.yaml
|
112
97
|
- data/chartab.yaml
|
113
|
-
|
98
|
+
- README
|
99
|
+
- ChangeLog
|
100
|
+
- Rakefile
|
101
|
+
- COPYING
|
102
|
+
- example/guess_encoding/en.utf-8.txt
|
103
|
+
- example/guess_encoding/de.utf-8.txt
|
104
|
+
- example/guess_encoding/it.utf-8.txt
|
105
|
+
- example/guess_encoding/check_results
|
106
|
+
- example/guess_encoding/fr.utf-8.txt
|
107
|
+
- example/cinderella/empty6-slash_repaired.txt
|
108
|
+
- example/cinderella/empty6-slash.txt
|
109
|
+
- example/cinderella/crop
|
110
|
+
- example/cinderella/pot
|
111
|
+
- example/cinderella/crop_repaired
|
114
112
|
homepage: http://prometheus.rubyforge.org/cmess
|
115
113
|
licenses: []
|
116
114
|
|
117
115
|
post_install_message:
|
118
116
|
rdoc_options:
|
117
|
+
- --charset
|
118
|
+
- UTF-8
|
119
119
|
- --title
|
120
|
-
- cmess Application documentation
|
120
|
+
- cmess Application documentation (v0.3.0)
|
121
121
|
- --main
|
122
122
|
- README
|
123
123
|
- --line-numbers
|
124
|
-
- --inline-source
|
125
124
|
- --all
|
126
|
-
- --charset
|
127
|
-
- UTF-8
|
128
125
|
require_paths:
|
129
126
|
- lib
|
130
127
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -148,7 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
148
145
|
requirements: []
|
149
146
|
|
150
147
|
rubyforge_project: prometheus
|
151
|
-
rubygems_version: 1.
|
148
|
+
rubygems_version: 1.8.5
|
152
149
|
signing_key:
|
153
150
|
specification_version: 3
|
154
151
|
summary: "Assist with handling messed up encodings (Currently includes the following tools: bconv, cinderella, decode_entities, guess_encoding)"
|