cmess 0.0.5.182 → 0.0.5.184
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/cmess/guess_encoding.rb +33 -7
- metadata +7 -7
data/lib/cmess/guess_encoding.rb
CHANGED
@@ -30,6 +30,8 @@
|
|
30
30
|
###############################################################################
|
31
31
|
#++
|
32
32
|
|
33
|
+
$KCODE = 'u'
|
34
|
+
|
33
35
|
require 'iconv'
|
34
36
|
require 'forwardable'
|
35
37
|
|
@@ -145,6 +147,31 @@ module CMess::GuessEncoding
|
|
145
147
|
|
146
148
|
include Encoding
|
147
149
|
|
150
|
+
# Creates a converter for desired encoding (from UTF-8)
|
151
|
+
ICONV_FOR = Hash.new { |h, k| h[k] = Iconv.new(k, UTF_8) }
|
152
|
+
|
153
|
+
# Encodings to test statistically by TEST_CHARS
|
154
|
+
TEST_ENCODINGS = [
|
155
|
+
MACINTOSH,
|
156
|
+
ISO_8859_1,
|
157
|
+
ISO_8859_15,
|
158
|
+
CP1252,
|
159
|
+
CP850,
|
160
|
+
MS_ANSI
|
161
|
+
]
|
162
|
+
|
163
|
+
# Certain chars to test for in TEST_ENCODINGS
|
164
|
+
TEST_CHARS = 'ÁÀÂÄÃÇÉÈÊËÍÌÎÏÑÓÒÔÖÚÙÛÜÆáàâäãçéèêëíìîïñóòôöúùûüæ'.
|
165
|
+
split(//).inject(Hash.new { |h, k| h[k] = [] }) { |hash, char|
|
166
|
+
TEST_ENCODINGS.each { |encoding|
|
167
|
+
hash[encoding] += ICONV_FOR[encoding].iconv(char).unpack('C')
|
168
|
+
}
|
169
|
+
hash
|
170
|
+
}
|
171
|
+
|
172
|
+
# Relative count of TEST_CHARS must exceed this threshold to yield a match
|
173
|
+
TEST_THRESHOLD = 0.0004
|
174
|
+
|
148
175
|
@supported_encodings = []
|
149
176
|
@encoding_guessers = []
|
150
177
|
@supported_boms = []
|
@@ -318,13 +345,12 @@ module CMess::GuessEncoding
|
|
318
345
|
esc_bytes > 0 && esc_bytes == fol_bytes
|
319
346
|
end
|
320
347
|
|
321
|
-
# Analyse statistical appearance of German umlauts
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
break encoding if relative_byte_count(byte_count_sum(umlauts)) > 0.001
|
348
|
+
# Analyse statistical appearance of German umlauts and other accented
|
349
|
+
# letters (see TEST_CHARS)
|
350
|
+
encodings *TEST_ENCODINGS do
|
351
|
+
TEST_ENCODINGS.each { |encoding|
|
352
|
+
break encoding if
|
353
|
+
relative_byte_count(byte_count_sum(TEST_CHARS[encoding])) > TEST_THRESHOLD
|
328
354
|
}
|
329
355
|
end
|
330
356
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmess
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.5.
|
4
|
+
version: 0.0.5.184
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-01-
|
12
|
+
date: 2008-01-23 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -85,15 +85,15 @@ has_rdoc: true
|
|
85
85
|
homepage: http://prometheus.rubyforge.org/cmess
|
86
86
|
post_install_message:
|
87
87
|
rdoc_options:
|
88
|
-
- --all
|
89
|
-
- --title
|
90
|
-
- cmess Application documentation
|
91
88
|
- --line-numbers
|
92
|
-
- --main
|
93
|
-
- README
|
94
89
|
- --inline-source
|
95
90
|
- --charset
|
96
91
|
- UTF-8
|
92
|
+
- --main
|
93
|
+
- README
|
94
|
+
- --all
|
95
|
+
- --title
|
96
|
+
- cmess Application documentation
|
97
97
|
require_paths:
|
98
98
|
- lib
|
99
99
|
required_ruby_version: !ruby/object:Gem::Requirement
|