edouard-rchardet 1.3.4.0 → 1.3.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -3,26 +3,21 @@ rchardet
3
3
 
4
4
  rchardet is an encoding auto-detection library in Ruby. This library is a port of the auto-detection code in Mozilla. It means taking a sequence of bytes in an unknown character encoding, and attempting to determine the encoding so you can read the text. It’s like cracking a code when you don’t have the decryption key.
5
5
 
6
- This fork is compatible with ruby 1.9.
6
+ This fork is compatible with ruby 1.9, and runs in production at [webtranslateit.com](https://webtranslateit.com). Here’s an [introductory blog post to our encoding detection strategy](http://blog.webtranslateit.com/post/6380685137).
7
7
 
8
8
  Usage
9
9
  -----
10
10
 
11
- require 'rubygems'
12
- require 'rchardet'
11
+ ```ruby
12
+ require 'rubygems'
13
+ require 'rchardet'
13
14
 
14
- cd = CharDet.detect(some_data)
15
- encoding = cd['encoding']
16
- confidence = cd['confidence'] # 0.0 <= confidence <= 1.0
15
+ cd = CharDet.detect(some_data)
16
+ encoding = cd['encoding']
17
+ confidence = cd['confidence'] # 0.0 <= confidence <= 1.0
18
+ ```
17
19
 
18
20
  Running tests
19
21
  -------------
20
22
 
21
23
  ruby spec/all.rb
22
-
23
- Project page
24
- ------------
25
-
26
- http://rubyforge.org/projects/rchardet
27
-
28
- Made for rFeedParser <http://rfeedparser.rubyforge.org>.
@@ -216,7 +216,15 @@ module CharDet
216
216
  'charsetName' => "ISO-8859-2"
217
217
  }
218
218
 
219
- Win1250HungarianModel = {
219
+ Latin1HungarianModel = {
220
+ 'charToOrderMap' => Latin2_HungarianCharToOrderMap,
221
+ 'precedenceMatrix' => HungarianLangModel,
222
+ 'mTypicalPositiveRatio' => 0.930605,
223
+ 'keepEnglishLetter' => true,
224
+ 'charsetName' => "ISO-8859-1"
225
+ }
226
+
227
+ Win1250HungarianModel = {
220
228
  'charToOrderMap' => Win1250HungarianCharToOrderMap,
221
229
  'precedenceMatrix' => HungarianLangModel,
222
230
  'mTypicalPositiveRatio' => 0.947368,
@@ -31,20 +31,21 @@ module CharDet
31
31
  class SBCSGroupProber < CharSetGroupProber
32
32
  def initialize
33
33
  super
34
- @_mProbers = [
35
- SingleByteCharSetProber.new(Win1251CyrillicModel),
36
- SingleByteCharSetProber.new(Koi8rModel),
37
- SingleByteCharSetProber.new(Latin5CyrillicModel),
38
- SingleByteCharSetProber.new(MacCyrillicModel),
39
- SingleByteCharSetProber.new(Ibm866Model),
40
- SingleByteCharSetProber.new(Ibm855Model),
41
- SingleByteCharSetProber.new(Latin7GreekModel),
42
- SingleByteCharSetProber.new(Win1253GreekModel),
43
- SingleByteCharSetProber.new(Latin5BulgarianModel),
44
- SingleByteCharSetProber.new(Win1251BulgarianModel),
45
- SingleByteCharSetProber.new(Latin2HungarianModel),
46
- SingleByteCharSetProber.new(Win1250HungarianModel),
47
- SingleByteCharSetProber.new(TIS620ThaiModel),
34
+ @_mProbers = [
35
+ SingleByteCharSetProber.new(Win1251CyrillicModel),
36
+ SingleByteCharSetProber.new(Koi8rModel),
37
+ SingleByteCharSetProber.new(Latin5CyrillicModel),
38
+ SingleByteCharSetProber.new(MacCyrillicModel),
39
+ SingleByteCharSetProber.new(Ibm866Model),
40
+ SingleByteCharSetProber.new(Ibm855Model),
41
+ SingleByteCharSetProber.new(Latin7GreekModel),
42
+ SingleByteCharSetProber.new(Win1253GreekModel),
43
+ SingleByteCharSetProber.new(Latin5BulgarianModel),
44
+ SingleByteCharSetProber.new(Win1251BulgarianModel),
45
+ SingleByteCharSetProber.new(Latin1HungarianModel),
46
+ SingleByteCharSetProber.new(Latin2HungarianModel),
47
+ SingleByteCharSetProber.new(Win1250HungarianModel),
48
+ SingleByteCharSetProber.new(TIS620ThaiModel),
48
49
  ]
49
50
  hebrewProber = HebrewProber.new()
50
51
  logicalHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, false, hebrewProber)
metadata CHANGED
@@ -1,48 +1,39 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: edouard-rchardet
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 1
7
- - 3
8
- - 4
9
- - 0
10
- version: 1.3.4.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.3.4.1
5
+ prerelease:
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Jeff Hodges
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2011-05-26 00:00:00 +02:00
12
+ date: 2011-08-01 00:00:00.000000000 +02:00
19
13
  default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
22
16
  name: bacon
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: &2157473800 !ruby/object:Gem::Requirement
25
18
  none: false
26
- requirements:
19
+ requirements:
27
20
  - - ~>
28
- - !ruby/object:Gem::Version
29
- segments:
30
- - 1
31
- - 1
32
- - 0
21
+ - !ruby/object:Gem::Version
33
22
  version: 1.1.0
34
23
  type: :development
35
- version_requirements: *id001
36
- description: "Character encoding auto-detection in Ruby. This library is a port of the auto-detection code in Mozilla. It means taking a sequence of bytes in an unknown character encoding, and attempting to determine the encoding so you can read the text. It\xE2\x80\x99s like cracking a code when you don\xE2\x80\x99t have the decryption key."
24
+ prerelease: false
25
+ version_requirements: *2157473800
26
+ description: Character encoding auto-detection in Ruby. This library is a port of
27
+ the auto-detection code in Mozilla. It means taking a sequence of bytes in an unknown
28
+ character encoding, and attempting to determine the encoding so you can read the
29
+ text. It’s like cracking a code when you don’t have the decryption key.
37
30
  email: jeff at somethingsimilar dot com
38
31
  executables: []
39
-
40
32
  extensions: []
41
-
42
- extra_rdoc_files:
33
+ extra_rdoc_files:
43
34
  - README.md
44
35
  - COPYING
45
- files:
36
+ files:
46
37
  - COPYING
47
38
  - Rakefile
48
39
  - README.md
@@ -84,34 +75,26 @@ files:
84
75
  has_rdoc: true
85
76
  homepage: http://github.com/mcommons/rchardet/tree/master
86
77
  licenses: []
87
-
88
78
  post_install_message:
89
79
  rdoc_options: []
90
-
91
- require_paths:
80
+ require_paths:
92
81
  - lib
93
- required_ruby_version: !ruby/object:Gem::Requirement
82
+ required_ruby_version: !ruby/object:Gem::Requirement
94
83
  none: false
95
- requirements:
96
- - - ">="
97
- - !ruby/object:Gem::Version
98
- segments:
99
- - 0
100
- version: "0"
101
- required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
89
  none: false
103
- requirements:
104
- - - ">="
105
- - !ruby/object:Gem::Version
106
- segments:
107
- - 0
108
- version: "0"
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
109
94
  requirements: []
110
-
111
95
  rubyforge_project: rchardet
112
- rubygems_version: 1.3.7
96
+ rubygems_version: 1.6.2
113
97
  signing_key:
114
98
  specification_version: 3
115
- summary: Character encoding auto-detection in Ruby. As smart as your browser. Open source.
99
+ summary: Character encoding detection in Ruby. Ruby 1.9 compatible.
116
100
  test_files: []
117
-