edouard-rchardet 1.3.4.0 → 1.3.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +8 -13
- data/lib/rchardet/langhungarianmodel.rb +9 -1
- data/lib/rchardet/sbcsgroupprober.rb +15 -14
- metadata +32 -49
data/README.md
CHANGED
@@ -3,26 +3,21 @@ rchardet
|
|
3
3
|
|
4
4
|
rchardet is an encoding auto-detection library in Ruby. This library is a port of the auto-detection code in Mozilla. It means taking a sequence of bytes in an unknown character encoding, and attempting to determine the encoding so you can read the text. It’s like cracking a code when you don’t have the decryption key.
|
5
5
|
|
6
|
-
This fork is compatible with ruby 1.9.
|
6
|
+
This fork is compatible with ruby 1.9, and runs in production at [webtranslateit.com](https://webtranslateit.com). Here’s an [introductory blog post to our encoding detection strategy](http://blog.webtranslateit.com/post/6380685137).
|
7
7
|
|
8
8
|
Usage
|
9
9
|
-----
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
```ruby
|
12
|
+
require 'rubygems'
|
13
|
+
require 'rchardet'
|
13
14
|
|
14
|
-
|
15
|
-
|
16
|
-
|
15
|
+
cd = CharDet.detect(some_data)
|
16
|
+
encoding = cd['encoding']
|
17
|
+
confidence = cd['confidence'] # 0.0 <= confidence <= 1.0
|
18
|
+
```
|
17
19
|
|
18
20
|
Running tests
|
19
21
|
-------------
|
20
22
|
|
21
23
|
ruby spec/all.rb
|
22
|
-
|
23
|
-
Project page
|
24
|
-
------------
|
25
|
-
|
26
|
-
http://rubyforge.org/projects/rchardet
|
27
|
-
|
28
|
-
Made for rFeedParser <http://rfeedparser.rubyforge.org>.
|
@@ -216,7 +216,15 @@ module CharDet
|
|
216
216
|
'charsetName' => "ISO-8859-2"
|
217
217
|
}
|
218
218
|
|
219
|
-
|
219
|
+
Latin1HungarianModel = {
|
220
|
+
'charToOrderMap' => Latin2_HungarianCharToOrderMap,
|
221
|
+
'precedenceMatrix' => HungarianLangModel,
|
222
|
+
'mTypicalPositiveRatio' => 0.930605,
|
223
|
+
'keepEnglishLetter' => true,
|
224
|
+
'charsetName' => "ISO-8859-1"
|
225
|
+
}
|
226
|
+
|
227
|
+
Win1250HungarianModel = {
|
220
228
|
'charToOrderMap' => Win1250HungarianCharToOrderMap,
|
221
229
|
'precedenceMatrix' => HungarianLangModel,
|
222
230
|
'mTypicalPositiveRatio' => 0.947368,
|
@@ -31,20 +31,21 @@ module CharDet
|
|
31
31
|
class SBCSGroupProber < CharSetGroupProber
|
32
32
|
def initialize
|
33
33
|
super
|
34
|
-
@_mProbers = [
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
34
|
+
@_mProbers = [
|
35
|
+
SingleByteCharSetProber.new(Win1251CyrillicModel),
|
36
|
+
SingleByteCharSetProber.new(Koi8rModel),
|
37
|
+
SingleByteCharSetProber.new(Latin5CyrillicModel),
|
38
|
+
SingleByteCharSetProber.new(MacCyrillicModel),
|
39
|
+
SingleByteCharSetProber.new(Ibm866Model),
|
40
|
+
SingleByteCharSetProber.new(Ibm855Model),
|
41
|
+
SingleByteCharSetProber.new(Latin7GreekModel),
|
42
|
+
SingleByteCharSetProber.new(Win1253GreekModel),
|
43
|
+
SingleByteCharSetProber.new(Latin5BulgarianModel),
|
44
|
+
SingleByteCharSetProber.new(Win1251BulgarianModel),
|
45
|
+
SingleByteCharSetProber.new(Latin1HungarianModel),
|
46
|
+
SingleByteCharSetProber.new(Latin2HungarianModel),
|
47
|
+
SingleByteCharSetProber.new(Win1250HungarianModel),
|
48
|
+
SingleByteCharSetProber.new(TIS620ThaiModel),
|
48
49
|
]
|
49
50
|
hebrewProber = HebrewProber.new()
|
50
51
|
logicalHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, false, hebrewProber)
|
metadata
CHANGED
@@ -1,48 +1,39 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: edouard-rchardet
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 1
|
7
|
-
- 3
|
8
|
-
- 4
|
9
|
-
- 0
|
10
|
-
version: 1.3.4.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.3.4.1
|
5
|
+
prerelease:
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Jeff Hodges
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
date: 2011-05-26 00:00:00 +02:00
|
12
|
+
date: 2011-08-01 00:00:00.000000000 +02:00
|
19
13
|
default_executable:
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
22
16
|
name: bacon
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: &2157473800 !ruby/object:Gem::Requirement
|
25
18
|
none: false
|
26
|
-
requirements:
|
19
|
+
requirements:
|
27
20
|
- - ~>
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
segments:
|
30
|
-
- 1
|
31
|
-
- 1
|
32
|
-
- 0
|
21
|
+
- !ruby/object:Gem::Version
|
33
22
|
version: 1.1.0
|
34
23
|
type: :development
|
35
|
-
|
36
|
-
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *2157473800
|
26
|
+
description: Character encoding auto-detection in Ruby. This library is a port of
|
27
|
+
the auto-detection code in Mozilla. It means taking a sequence of bytes in an unknown
|
28
|
+
character encoding, and attempting to determine the encoding so you can read the
|
29
|
+
text. It’s like cracking a code when you don’t have the decryption key.
|
37
30
|
email: jeff at somethingsimilar dot com
|
38
31
|
executables: []
|
39
|
-
|
40
32
|
extensions: []
|
41
|
-
|
42
|
-
extra_rdoc_files:
|
33
|
+
extra_rdoc_files:
|
43
34
|
- README.md
|
44
35
|
- COPYING
|
45
|
-
files:
|
36
|
+
files:
|
46
37
|
- COPYING
|
47
38
|
- Rakefile
|
48
39
|
- README.md
|
@@ -84,34 +75,26 @@ files:
|
|
84
75
|
has_rdoc: true
|
85
76
|
homepage: http://github.com/mcommons/rchardet/tree/master
|
86
77
|
licenses: []
|
87
|
-
|
88
78
|
post_install_message:
|
89
79
|
rdoc_options: []
|
90
|
-
|
91
|
-
require_paths:
|
80
|
+
require_paths:
|
92
81
|
- lib
|
93
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
83
|
none: false
|
95
|
-
requirements:
|
96
|
-
- -
|
97
|
-
- !ruby/object:Gem::Version
|
98
|
-
|
99
|
-
|
100
|
-
version: "0"
|
101
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ! '>='
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
89
|
none: false
|
103
|
-
requirements:
|
104
|
-
- -
|
105
|
-
- !ruby/object:Gem::Version
|
106
|
-
|
107
|
-
- 0
|
108
|
-
version: "0"
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
109
94
|
requirements: []
|
110
|
-
|
111
95
|
rubyforge_project: rchardet
|
112
|
-
rubygems_version: 1.
|
96
|
+
rubygems_version: 1.6.2
|
113
97
|
signing_key:
|
114
98
|
specification_version: 3
|
115
|
-
summary: Character encoding
|
99
|
+
summary: Character encoding detection in Ruby. Ruby 1.9 compatible.
|
116
100
|
test_files: []
|
117
|
-
|