cmess 0.0.5.182 → 0.0.5.184

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/cmess/guess_encoding.rb +33 -7
  2. metadata +7 -7
@@ -30,6 +30,8 @@
30
30
  ###############################################################################
31
31
  #++
32
32
 
33
+ $KCODE = 'u'
34
+
33
35
  require 'iconv'
34
36
  require 'forwardable'
35
37
 
@@ -145,6 +147,31 @@ module CMess::GuessEncoding
145
147
 
146
148
  include Encoding
147
149
 
150
+ # Creates a converter for desired encoding (from UTF-8)
151
+ ICONV_FOR = Hash.new { |h, k| h[k] = Iconv.new(k, UTF_8) }
152
+
153
+ # Encodings to test statistically by TEST_CHARS
154
+ TEST_ENCODINGS = [
155
+ MACINTOSH,
156
+ ISO_8859_1,
157
+ ISO_8859_15,
158
+ CP1252,
159
+ CP850,
160
+ MS_ANSI
161
+ ]
162
+
163
+ # Certain chars to test for in TEST_ENCODINGS
164
+ TEST_CHARS = 'ÁÀÂÄÃÇÉÈÊËÍÌÎÏÑÓÒÔÖÚÙÛÜÆáàâäãçéèêëíìîïñóòôöúùûüæ'.
165
+ split(//).inject(Hash.new { |h, k| h[k] = [] }) { |hash, char|
166
+ TEST_ENCODINGS.each { |encoding|
167
+ hash[encoding] += ICONV_FOR[encoding].iconv(char).unpack('C')
168
+ }
169
+ hash
170
+ }
171
+
172
+ # Relative count of TEST_CHARS must exceed this threshold to yield a match
173
+ TEST_THRESHOLD = 0.0004
174
+
148
175
  @supported_encodings = []
149
176
  @encoding_guessers = []
150
177
  @supported_boms = []
@@ -318,13 +345,12 @@ module CMess::GuessEncoding
318
345
  esc_bytes > 0 && esc_bytes == fol_bytes
319
346
  end
320
347
 
321
- # Analyse statistical appearance of German umlauts (=> ÄäÖöÜüß)
322
- encodings MACINTOSH, ISO_8859_1 do
323
- {
324
- MACINTOSH => [0x80, 0x8a, 0x85, 0x9a, 0x86, 0x9f, 0xa7],
325
- ISO_8859_1 => [0xc4, 0xe4, 0xd6, 0xf6, 0xdc, 0xfc, 0xdf]
326
- }.each { |encoding, umlauts|
327
- break encoding if relative_byte_count(byte_count_sum(umlauts)) > 0.001
348
+ # Analyse statistical appearance of German umlauts and other accented
349
+ # letters (see TEST_CHARS)
350
+ encodings *TEST_ENCODINGS do
351
+ TEST_ENCODINGS.each { |encoding|
352
+ break encoding if
353
+ relative_byte_count(byte_count_sum(TEST_CHARS[encoding])) > TEST_THRESHOLD
328
354
  }
329
355
  end
330
356
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmess
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5.182
4
+ version: 0.0.5.184
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-01-21 00:00:00 +01:00
12
+ date: 2008-01-23 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -85,15 +85,15 @@ has_rdoc: true
85
85
  homepage: http://prometheus.rubyforge.org/cmess
86
86
  post_install_message:
87
87
  rdoc_options:
88
- - --all
89
- - --title
90
- - cmess Application documentation
91
88
  - --line-numbers
92
- - --main
93
- - README
94
89
  - --inline-source
95
90
  - --charset
96
91
  - UTF-8
92
+ - --main
93
+ - README
94
+ - --all
95
+ - --title
96
+ - cmess Application documentation
97
97
  require_paths:
98
98
  - lib
99
99
  required_ruby_version: !ruby/object:Gem::Requirement