cmess 0.0.5.182 → 0.0.5.184
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cmess/guess_encoding.rb +33 -7
- metadata +7 -7
data/lib/cmess/guess_encoding.rb
CHANGED
@@ -30,6 +30,8 @@
|
|
30
30
|
###############################################################################
|
31
31
|
#++
|
32
32
|
|
33
|
+
$KCODE = 'u'
|
34
|
+
|
33
35
|
require 'iconv'
|
34
36
|
require 'forwardable'
|
35
37
|
|
@@ -145,6 +147,31 @@ module CMess::GuessEncoding
|
|
145
147
|
|
146
148
|
include Encoding
|
147
149
|
|
150
|
+
# Creates a converter for desired encoding (from UTF-8)
|
151
|
+
ICONV_FOR = Hash.new { |h, k| h[k] = Iconv.new(k, UTF_8) }
|
152
|
+
|
153
|
+
# Encodings to test statistically by TEST_CHARS
|
154
|
+
TEST_ENCODINGS = [
|
155
|
+
MACINTOSH,
|
156
|
+
ISO_8859_1,
|
157
|
+
ISO_8859_15,
|
158
|
+
CP1252,
|
159
|
+
CP850,
|
160
|
+
MS_ANSI
|
161
|
+
]
|
162
|
+
|
163
|
+
# Certain chars to test for in TEST_ENCODINGS
|
164
|
+
TEST_CHARS = 'ÁÀÂÄÃÇÉÈÊËÍÌÎÏÑÓÒÔÖÚÙÛÜÆáàâäãçéèêëíìîïñóòôöúùûüæ'.
|
165
|
+
split(//).inject(Hash.new { |h, k| h[k] = [] }) { |hash, char|
|
166
|
+
TEST_ENCODINGS.each { |encoding|
|
167
|
+
hash[encoding] += ICONV_FOR[encoding].iconv(char).unpack('C')
|
168
|
+
}
|
169
|
+
hash
|
170
|
+
}
|
171
|
+
|
172
|
+
# Relative count of TEST_CHARS must exceed this threshold to yield a match
|
173
|
+
TEST_THRESHOLD = 0.0004
|
174
|
+
|
148
175
|
@supported_encodings = []
|
149
176
|
@encoding_guessers = []
|
150
177
|
@supported_boms = []
|
@@ -318,13 +345,12 @@ module CMess::GuessEncoding
|
|
318
345
|
esc_bytes > 0 && esc_bytes == fol_bytes
|
319
346
|
end
|
320
347
|
|
321
|
-
# Analyse statistical appearance of German umlauts
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
break encoding if relative_byte_count(byte_count_sum(umlauts)) > 0.001
|
348
|
+
# Analyse statistical appearance of German umlauts and other accented
|
349
|
+
# letters (see TEST_CHARS)
|
350
|
+
encodings *TEST_ENCODINGS do
|
351
|
+
TEST_ENCODINGS.each { |encoding|
|
352
|
+
break encoding if
|
353
|
+
relative_byte_count(byte_count_sum(TEST_CHARS[encoding])) > TEST_THRESHOLD
|
328
354
|
}
|
329
355
|
end
|
330
356
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.5.
|
4
|
+
version: 0.0.5.184
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-01-
|
12
|
+
date: 2008-01-23 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -85,15 +85,15 @@ has_rdoc: true
|
|
85
85
|
homepage: http://prometheus.rubyforge.org/cmess
|
86
86
|
post_install_message:
|
87
87
|
rdoc_options:
|
88
|
-
- --all
|
89
|
-
- --title
|
90
|
-
- cmess Application documentation
|
91
88
|
- --line-numbers
|
92
|
-
- --main
|
93
|
-
- README
|
94
89
|
- --inline-source
|
95
90
|
- --charset
|
96
91
|
- UTF-8
|
92
|
+
- --main
|
93
|
+
- README
|
94
|
+
- --all
|
95
|
+
- --title
|
96
|
+
- cmess Application documentation
|
97
97
|
require_paths:
|
98
98
|
- lib
|
99
99
|
required_ruby_version: !ruby/object:Gem::Requirement
|