cmess 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +68 -81
- data/ChangeLog +28 -0
- data/README +23 -21
- data/Rakefile +15 -16
- data/bin/bconv +30 -47
- data/bin/cinderella +51 -68
- data/bin/decode_entities +28 -36
- data/bin/guess_encoding +53 -81
- data/lib/cmess.rb +35 -26
- data/lib/cmess/bconv.rb +23 -25
- data/lib/cmess/cinderella.rb +21 -20
- data/lib/cmess/cli.rb +27 -17
- data/lib/cmess/decode_entities.rb +19 -20
- data/lib/cmess/guess_encoding.rb +20 -18
- data/lib/cmess/guess_encoding/automatic.rb +151 -125
- data/lib/cmess/guess_encoding/encoding.rb +16 -18
- data/lib/cmess/guess_encoding/manual.rb +26 -31
- data/lib/cmess/version.rb +2 -2
- metadata +25 -28
@@ -3,7 +3,7 @@
|
|
3
3
|
# #
|
4
4
|
# A component of cmess, the encoding tool-box. #
|
5
5
|
# #
|
6
|
-
# Copyright (C) 2007-
|
6
|
+
# Copyright (C) 2007-2011 University of Cologne, #
|
7
7
|
# Albertus-Magnus-Platz, #
|
8
8
|
# 50923 Cologne, Germany #
|
9
9
|
# #
|
@@ -15,40 +15,39 @@
|
|
15
15
|
# for automatic encoding detection) #
|
16
16
|
# #
|
17
17
|
# cmess is free software; you can redistribute it and/or modify it under the #
|
18
|
-
# terms of the GNU General Public License as published by the Free
|
19
|
-
# Foundation; either version 3 of the License, or (at your option)
|
20
|
-
# version.
|
18
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
19
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
20
|
+
# any later version. #
|
21
21
|
# #
|
22
22
|
# cmess is distributed in the hope that it will be useful, but WITHOUT ANY #
|
23
23
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
24
|
-
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
25
|
-
# details.
|
24
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
25
|
+
# more details. #
|
26
26
|
# #
|
27
|
-
# You should have received a copy of the GNU General Public License
|
28
|
-
# with cmess. If not, see <http://www.gnu.org/licenses/>.
|
27
|
+
# You should have received a copy of the GNU Affero General Public License #
|
28
|
+
# along with cmess. If not, see <http://www.gnu.org/licenses/>. #
|
29
29
|
# #
|
30
30
|
###############################################################################
|
31
31
|
#++
|
32
32
|
|
33
|
-
require '
|
33
|
+
require 'cmess/guess_encoding'
|
34
34
|
|
35
|
-
require 'rubygems'
|
36
35
|
require 'nuggets/array/runiq'
|
37
36
|
require 'nuggets/array/in_order'
|
37
|
+
require 'nuggets/enumerable/minmax'
|
38
38
|
|
39
|
-
# Outputs given string (or line), being encoded in target encoding,
|
40
|
-
# various test encodings, thus allowing to identify the
|
41
|
-
# encoding by visually comparing the input string
|
39
|
+
# Outputs given string (or line), being encoded in target encoding,
|
40
|
+
# encoded in various test encodings, thus allowing to identify the
|
41
|
+
# (seemingly) correct encoding by visually comparing the input string
|
42
|
+
# with its desired appearance.
|
42
43
|
|
43
|
-
module CMess
|
44
|
-
module GuessEncoding
|
45
|
-
module Manual
|
44
|
+
module CMess::GuessEncoding::Manual
|
46
45
|
|
47
46
|
extend self
|
48
47
|
|
49
|
-
include Encoding
|
48
|
+
include CMess::GuessEncoding::Encoding
|
50
49
|
|
51
|
-
#
|
50
|
+
# Default encodings to try
|
52
51
|
ENCODINGS = [
|
53
52
|
ISO_8859_1,
|
54
53
|
ISO_8859_2,
|
@@ -62,7 +61,7 @@ module CMess
|
|
62
61
|
UTF_8
|
63
62
|
]
|
64
63
|
|
65
|
-
#
|
64
|
+
# Likely candidates to suggest to the user
|
66
65
|
CANDIDATES = [
|
67
66
|
ANSI_X34,
|
68
67
|
EBCDIC_AT_DE,
|
@@ -81,16 +80,14 @@ module CMess
|
|
81
80
|
UTF_32LE
|
82
81
|
]
|
83
82
|
|
84
|
-
def display(
|
85
|
-
target = target_encoding
|
83
|
+
def display(options)
|
84
|
+
input, target = CMess.ensure_options!(options, :input, :target_encoding)
|
86
85
|
|
87
|
-
encodings = (encodings || ENCODINGS) +
|
86
|
+
encodings = (options[:encodings] || ENCODINGS) +
|
87
|
+
(options[:additional_encodings] || [])
|
88
88
|
|
89
|
-
if encodings.
|
90
|
-
|
91
|
-
elsif encodings.delete('__COMMON__')
|
92
|
-
encodings.concat(CANDIDATES)
|
93
|
-
end
|
89
|
+
encodings.concat(all_encodings) if encodings.delete('__ALL__')
|
90
|
+
encodings.concat(CANDIDATES) if encodings.delete('__COMMON__')
|
94
91
|
|
95
92
|
# uniq with additional encodings staying at the end
|
96
93
|
encodings.runiq!
|
@@ -98,7 +95,7 @@ module CMess
|
|
98
95
|
# move target encoding to front
|
99
96
|
encodings.in_order!(target)
|
100
97
|
|
101
|
-
max_length = encodings.
|
98
|
+
max_length = encodings.max(:length)
|
102
99
|
|
103
100
|
encodings.each { |encoding|
|
104
101
|
converted = begin
|
@@ -109,7 +106,7 @@ module CMess
|
|
109
106
|
if encoding == target
|
110
107
|
raise ArgumentError, "invalid encoding: #{encoding}"
|
111
108
|
else
|
112
|
-
|
109
|
+
'INVALID ENCODING!'
|
113
110
|
end
|
114
111
|
end
|
115
112
|
|
@@ -117,6 +114,4 @@ module CMess
|
|
117
114
|
}
|
118
115
|
end
|
119
116
|
|
120
|
-
end
|
121
|
-
end
|
122
117
|
end
|
data/lib/cmess/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 19
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 3
|
9
|
+
- 0
|
10
|
+
version: 0.3.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jens Wille
|
@@ -15,8 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
19
|
-
default_executable:
|
18
|
+
date: 2011-07-25 00:00:00 Z
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
22
21
|
name: ruby-nuggets
|
@@ -58,9 +57,9 @@ executables:
|
|
58
57
|
extensions: []
|
59
58
|
|
60
59
|
extra_rdoc_files:
|
60
|
+
- README
|
61
61
|
- COPYING
|
62
62
|
- ChangeLog
|
63
|
-
- README
|
64
63
|
files:
|
65
64
|
- lib/cmess.rb
|
66
65
|
- lib/cmess/guess_encoding/automatic.rb
|
@@ -76,20 +75,6 @@ files:
|
|
76
75
|
- bin/decode_entities
|
77
76
|
- bin/guess_encoding
|
78
77
|
- bin/cinderella
|
79
|
-
- README
|
80
|
-
- ChangeLog
|
81
|
-
- Rakefile
|
82
|
-
- COPYING
|
83
|
-
- example/guess_encoding/en.utf-8.txt
|
84
|
-
- example/guess_encoding/de.utf-8.txt
|
85
|
-
- example/guess_encoding/it.utf-8.txt
|
86
|
-
- example/guess_encoding/check_results
|
87
|
-
- example/guess_encoding/fr.utf-8.txt
|
88
|
-
- example/cinderella/empty6-slash_repaired.txt
|
89
|
-
- example/cinderella/empty6-slash.txt
|
90
|
-
- example/cinderella/crop
|
91
|
-
- example/cinderella/pot
|
92
|
-
- example/cinderella/crop_repaired
|
93
78
|
- data/csets/latin1.yaml
|
94
79
|
- data/csets/iso_8859-15.yaml
|
95
80
|
- data/csets/iso_8859-1.yaml
|
@@ -110,21 +95,33 @@ files:
|
|
110
95
|
- data/csets/utf8.yaml
|
111
96
|
- data/test_chars.yaml
|
112
97
|
- data/chartab.yaml
|
113
|
-
|
98
|
+
- README
|
99
|
+
- ChangeLog
|
100
|
+
- Rakefile
|
101
|
+
- COPYING
|
102
|
+
- example/guess_encoding/en.utf-8.txt
|
103
|
+
- example/guess_encoding/de.utf-8.txt
|
104
|
+
- example/guess_encoding/it.utf-8.txt
|
105
|
+
- example/guess_encoding/check_results
|
106
|
+
- example/guess_encoding/fr.utf-8.txt
|
107
|
+
- example/cinderella/empty6-slash_repaired.txt
|
108
|
+
- example/cinderella/empty6-slash.txt
|
109
|
+
- example/cinderella/crop
|
110
|
+
- example/cinderella/pot
|
111
|
+
- example/cinderella/crop_repaired
|
114
112
|
homepage: http://prometheus.rubyforge.org/cmess
|
115
113
|
licenses: []
|
116
114
|
|
117
115
|
post_install_message:
|
118
116
|
rdoc_options:
|
117
|
+
- --charset
|
118
|
+
- UTF-8
|
119
119
|
- --title
|
120
|
-
- cmess Application documentation
|
120
|
+
- cmess Application documentation (v0.3.0)
|
121
121
|
- --main
|
122
122
|
- README
|
123
123
|
- --line-numbers
|
124
|
-
- --inline-source
|
125
124
|
- --all
|
126
|
-
- --charset
|
127
|
-
- UTF-8
|
128
125
|
require_paths:
|
129
126
|
- lib
|
130
127
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -148,7 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
148
145
|
requirements: []
|
149
146
|
|
150
147
|
rubyforge_project: prometheus
|
151
|
-
rubygems_version: 1.
|
148
|
+
rubygems_version: 1.8.5
|
152
149
|
signing_key:
|
153
150
|
specification_version: 3
|
154
151
|
summary: "Assist with handling messed up encodings (Currently includes the following tools: bconv, cinderella, decode_entities, guess_encoding)"
|