cmess 0.0.5.182 → 0.0.5.184

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/cmess/guess_encoding.rb +33 -7
  2. metadata +7 -7
@@ -30,6 +30,8 @@
30
30
  ###############################################################################
31
31
  #++
32
32
 
33
+ $KCODE = 'u'
34
+
33
35
  require 'iconv'
34
36
  require 'forwardable'
35
37
 
@@ -145,6 +147,31 @@ module CMess::GuessEncoding
145
147
 
146
148
  include Encoding
147
149
 
150
+ # Creates a converter for desired encoding (from UTF-8)
151
+ ICONV_FOR = Hash.new { |h, k| h[k] = Iconv.new(k, UTF_8) }
152
+
153
+ # Encodings to test statistically by TEST_CHARS
154
+ TEST_ENCODINGS = [
155
+ MACINTOSH,
156
+ ISO_8859_1,
157
+ ISO_8859_15,
158
+ CP1252,
159
+ CP850,
160
+ MS_ANSI
161
+ ]
162
+
163
+ # Certain chars to test for in TEST_ENCODINGS
164
+ TEST_CHARS = 'ÁÀÂÄÃÇÉÈÊËÍÌÎÏÑÓÒÔÖÚÙÛÜÆáàâäãçéèêëíìîïñóòôöúùûüæ'.
165
+ split(//).inject(Hash.new { |h, k| h[k] = [] }) { |hash, char|
166
+ TEST_ENCODINGS.each { |encoding|
167
+ hash[encoding] += ICONV_FOR[encoding].iconv(char).unpack('C')
168
+ }
169
+ hash
170
+ }
171
+
172
+ # Relative count of TEST_CHARS must exceed this threshold to yield a match
173
+ TEST_THRESHOLD = 0.0004
174
+
148
175
  @supported_encodings = []
149
176
  @encoding_guessers = []
150
177
  @supported_boms = []
@@ -318,13 +345,12 @@ module CMess::GuessEncoding
318
345
  esc_bytes > 0 && esc_bytes == fol_bytes
319
346
  end
320
347
 
321
- # Analyse statistical appearance of German umlauts (=> ÄäÖöÜüß)
322
- encodings MACINTOSH, ISO_8859_1 do
323
- {
324
- MACINTOSH => [0x80, 0x8a, 0x85, 0x9a, 0x86, 0x9f, 0xa7],
325
- ISO_8859_1 => [0xc4, 0xe4, 0xd6, 0xf6, 0xdc, 0xfc, 0xdf]
326
- }.each { |encoding, umlauts|
327
- break encoding if relative_byte_count(byte_count_sum(umlauts)) > 0.001
348
+ # Analyse statistical appearance of German umlauts and other accented
349
+ # letters (see TEST_CHARS)
350
+ encodings *TEST_ENCODINGS do
351
+ TEST_ENCODINGS.each { |encoding|
352
+ break encoding if
353
+ relative_byte_count(byte_count_sum(TEST_CHARS[encoding])) > TEST_THRESHOLD
328
354
  }
329
355
  end
330
356
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmess
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5.182
4
+ version: 0.0.5.184
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-01-21 00:00:00 +01:00
12
+ date: 2008-01-23 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -85,15 +85,15 @@ has_rdoc: true
85
85
  homepage: http://prometheus.rubyforge.org/cmess
86
86
  post_install_message:
87
87
  rdoc_options:
88
- - --all
89
- - --title
90
- - cmess Application documentation
91
88
  - --line-numbers
92
- - --main
93
- - README
94
89
  - --inline-source
95
90
  - --charset
96
91
  - UTF-8
92
+ - --main
93
+ - README
94
+ - --all
95
+ - --title
96
+ - cmess Application documentation
97
97
  require_paths:
98
98
  - lib
99
99
  required_ruby_version: !ruby/object:Gem::Requirement