rchardet 1.1 → 1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +0 -0
- data/README +12 -0
- data/lib/rchardet.rb +14 -8
- data/lib/rchardet/big5freq.rb +0 -0
- data/lib/rchardet/big5prober.rb +0 -0
- data/lib/rchardet/chardistribution.rb +0 -0
- data/lib/rchardet/charsetgroupprober.rb +0 -0
- data/lib/rchardet/charsetprober.rb +0 -0
- data/lib/rchardet/codingstatemachine.rb +0 -0
- data/lib/rchardet/constants.rb +0 -0
- data/lib/rchardet/escprober.rb +0 -0
- data/lib/rchardet/escsm.rb +0 -0
- data/lib/rchardet/eucjpprober.rb +0 -0
- data/lib/rchardet/euckrfreq.rb +0 -0
- data/lib/rchardet/euckrprober.rb +0 -0
- data/lib/rchardet/euctwfreq.rb +0 -0
- data/lib/rchardet/euctwprober.rb +0 -0
- data/lib/rchardet/gb2312freq.rb +0 -0
- data/lib/rchardet/gb2312prober.rb +0 -0
- data/lib/rchardet/hebrewprober.rb +0 -0
- data/lib/rchardet/jisfreq.rb +0 -0
- data/lib/rchardet/jpcntx.rb +0 -0
- data/lib/rchardet/langbulgarianmodel.rb +0 -0
- data/lib/rchardet/langcyrillicmodel.rb +0 -0
- data/lib/rchardet/langgreekmodel.rb +0 -0
- data/lib/rchardet/langhebrewmodel.rb +0 -0
- data/lib/rchardet/langhungarianmodel.rb +0 -0
- data/lib/rchardet/langthaimodel.rb +0 -0
- data/lib/rchardet/latin1prober.rb +0 -0
- data/lib/rchardet/mbcharsetprober.rb +0 -0
- data/lib/rchardet/mbcsgroupprober.rb +0 -0
- data/lib/rchardet/mbcssm.rb +0 -0
- data/lib/rchardet/sbcharsetprober.rb +0 -0
- data/lib/rchardet/sbcsgroupprober.rb +0 -0
- data/lib/rchardet/sjisprober.rb +0 -0
- data/lib/rchardet/universaldetector.rb +2 -2
- data/lib/rchardet/utf8prober.rb +0 -0
- metadata +42 -33
data/COPYING
CHANGED
File without changes
|
data/README
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
Usage:
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rchardet'
|
4
|
+
|
5
|
+
cd = CharDet.detect(some_data)
|
6
|
+
encoding = cd['encoding']
|
7
|
+
confidence = cd['confidence'] # 0.0 <= confidence <= 1.0
|
8
|
+
|
9
|
+
Project page:
|
10
|
+
http://rubyforge.org/projects/rchardet
|
11
|
+
|
12
|
+
Made for rFeedParser <http://rfeedparser.rubyforge.org>.
|
data/lib/rchardet.rb
CHANGED
@@ -14,6 +14,9 @@
|
|
14
14
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
15
15
|
# 02110-1301 USA
|
16
16
|
######################### END LICENSE BLOCK #########################
|
17
|
+
|
18
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
|
19
|
+
|
17
20
|
require 'rchardet/charsetprober'
|
18
21
|
require 'rchardet/mbcharsetprober'
|
19
22
|
|
@@ -51,11 +54,14 @@ require 'rchardet/sbcsgroupprober'
|
|
51
54
|
require 'rchardet/sjisprober'
|
52
55
|
require 'rchardet/universaldetector'
|
53
56
|
require 'rchardet/utf8prober'
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
57
|
+
|
58
|
+
module CharDet
|
59
|
+
VERSION = "1.2"
|
60
|
+
def CharDet.detect(aBuf)
|
61
|
+
u = UniversalDetector.new
|
62
|
+
u.reset
|
63
|
+
u.feed(aBuf)
|
64
|
+
u.close
|
65
|
+
u.result
|
66
|
+
end
|
67
|
+
end
|
data/lib/rchardet/big5freq.rb
CHANGED
File without changes
|
data/lib/rchardet/big5prober.rb
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/lib/rchardet/constants.rb
CHANGED
File without changes
|
data/lib/rchardet/escprober.rb
CHANGED
File without changes
|
data/lib/rchardet/escsm.rb
CHANGED
File without changes
|
data/lib/rchardet/eucjpprober.rb
CHANGED
File without changes
|
data/lib/rchardet/euckrfreq.rb
CHANGED
File without changes
|
data/lib/rchardet/euckrprober.rb
CHANGED
File without changes
|
data/lib/rchardet/euctwfreq.rb
CHANGED
File without changes
|
data/lib/rchardet/euctwprober.rb
CHANGED
File without changes
|
data/lib/rchardet/gb2312freq.rb
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/lib/rchardet/jisfreq.rb
CHANGED
File without changes
|
data/lib/rchardet/jpcntx.rb
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/lib/rchardet/mbcssm.rb
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/lib/rchardet/sjisprober.rb
CHANGED
File without changes
|
@@ -81,10 +81,10 @@ module CharDet
|
|
81
81
|
elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
|
82
82
|
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
83
83
|
@result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
|
84
|
-
elsif aBuf[0...2] ==
|
84
|
+
elsif aBuf[0...2] == "\xFF\xFE"
|
85
85
|
# FF FE UTF-16, little endian BOM
|
86
86
|
@result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
|
87
|
-
elsif aBuf[0...2] ==
|
87
|
+
elsif aBuf[0...2] == "\xFE\xFF"
|
88
88
|
# FE FF UTF-16, big endian BOM
|
89
89
|
@result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
|
90
90
|
end
|
data/lib/rchardet/utf8prober.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,33 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.2
|
3
|
-
specification_version: 1
|
4
2
|
name: rchardet
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "1.
|
7
|
-
date: 2007-07-05 00:00:00 -07:00
|
8
|
-
summary: Character encoding auto-detection in Ruby
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
email: jeff at somethingsimilar dot com
|
12
|
-
homepage: http://rubyforge.org/projects/rchardet
|
13
|
-
rubyforge_project: rchardet
|
14
|
-
description:
|
15
|
-
autorequire: chardet
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: false
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: "1.2"
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- Jeff Hodges
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-08-22 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: jeff at somethingsimilar dot com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README
|
24
|
+
- COPYING
|
31
25
|
files:
|
32
26
|
- lib/rchardet
|
33
27
|
- lib/rchardet/big5freq.rb
|
@@ -65,18 +59,33 @@ files:
|
|
65
59
|
- lib/rchardet/universaldetector.rb
|
66
60
|
- lib/rchardet/utf8prober.rb
|
67
61
|
- lib/rchardet.rb
|
62
|
+
- README
|
68
63
|
- COPYING
|
69
|
-
|
70
|
-
|
64
|
+
has_rdoc: false
|
65
|
+
homepage: http://github.com/jmhodges/rchardet/tree/master
|
66
|
+
post_install_message:
|
71
67
|
rdoc_options: []
|
72
68
|
|
73
|
-
|
74
|
-
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: "0"
|
76
|
+
version:
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: "0"
|
82
|
+
version:
|
79
83
|
requirements: []
|
80
84
|
|
81
|
-
|
85
|
+
rubyforge_project: rchardet
|
86
|
+
rubygems_version: 1.2.0
|
87
|
+
signing_key:
|
88
|
+
specification_version: 2
|
89
|
+
summary: Character encoding auto-detection in Ruby. As smart as your browser. Open source.
|
90
|
+
test_files: []
|
82
91
|
|