rchardet 1.1 → 1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +0 -0
- data/README +12 -0
- data/lib/rchardet.rb +14 -8
- data/lib/rchardet/big5freq.rb +0 -0
- data/lib/rchardet/big5prober.rb +0 -0
- data/lib/rchardet/chardistribution.rb +0 -0
- data/lib/rchardet/charsetgroupprober.rb +0 -0
- data/lib/rchardet/charsetprober.rb +0 -0
- data/lib/rchardet/codingstatemachine.rb +0 -0
- data/lib/rchardet/constants.rb +0 -0
- data/lib/rchardet/escprober.rb +0 -0
- data/lib/rchardet/escsm.rb +0 -0
- data/lib/rchardet/eucjpprober.rb +0 -0
- data/lib/rchardet/euckrfreq.rb +0 -0
- data/lib/rchardet/euckrprober.rb +0 -0
- data/lib/rchardet/euctwfreq.rb +0 -0
- data/lib/rchardet/euctwprober.rb +0 -0
- data/lib/rchardet/gb2312freq.rb +0 -0
- data/lib/rchardet/gb2312prober.rb +0 -0
- data/lib/rchardet/hebrewprober.rb +0 -0
- data/lib/rchardet/jisfreq.rb +0 -0
- data/lib/rchardet/jpcntx.rb +0 -0
- data/lib/rchardet/langbulgarianmodel.rb +0 -0
- data/lib/rchardet/langcyrillicmodel.rb +0 -0
- data/lib/rchardet/langgreekmodel.rb +0 -0
- data/lib/rchardet/langhebrewmodel.rb +0 -0
- data/lib/rchardet/langhungarianmodel.rb +0 -0
- data/lib/rchardet/langthaimodel.rb +0 -0
- data/lib/rchardet/latin1prober.rb +0 -0
- data/lib/rchardet/mbcharsetprober.rb +0 -0
- data/lib/rchardet/mbcsgroupprober.rb +0 -0
- data/lib/rchardet/mbcssm.rb +0 -0
- data/lib/rchardet/sbcharsetprober.rb +0 -0
- data/lib/rchardet/sbcsgroupprober.rb +0 -0
- data/lib/rchardet/sjisprober.rb +0 -0
- data/lib/rchardet/universaldetector.rb +2 -2
- data/lib/rchardet/utf8prober.rb +0 -0
- metadata +42 -33
data/COPYING
CHANGED
File without changes
|
data/README
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
Usage:
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rchardet'
|
4
|
+
|
5
|
+
cd = CharDet.detect(some_data)
|
6
|
+
encoding = cd['encoding']
|
7
|
+
confidence = cd['confidence'] # 0.0 <= confidence <= 1.0
|
8
|
+
|
9
|
+
Project page:
|
10
|
+
http://rubyforge.org/projects/rchardet
|
11
|
+
|
12
|
+
Made for rFeedParser <http://rfeedparser.rubyforge.org>.
|
data/lib/rchardet.rb
CHANGED
@@ -14,6 +14,9 @@
|
|
14
14
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
15
15
|
# 02110-1301 USA
|
16
16
|
######################### END LICENSE BLOCK #########################
|
17
|
+
|
18
|
+
$LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__))
|
19
|
+
|
17
20
|
require 'rchardet/charsetprober'
|
18
21
|
require 'rchardet/mbcharsetprober'
|
19
22
|
|
@@ -51,11 +54,14 @@ require 'rchardet/sbcsgroupprober'
|
|
51
54
|
require 'rchardet/sjisprober'
|
52
55
|
require 'rchardet/universaldetector'
|
53
56
|
require 'rchardet/utf8prober'
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
57
|
+
|
58
|
+
module CharDet
|
59
|
+
VERSION = "1.2"
|
60
|
+
def CharDet.detect(aBuf)
|
61
|
+
u = UniversalDetector.new
|
62
|
+
u.reset
|
63
|
+
u.feed(aBuf)
|
64
|
+
u.close
|
65
|
+
u.result
|
66
|
+
end
|
67
|
+
end
|
data/lib/rchardet/big5freq.rb
CHANGED
File without changes
|
data/lib/rchardet/big5prober.rb
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/lib/rchardet/constants.rb
CHANGED
File without changes
|
data/lib/rchardet/escprober.rb
CHANGED
File without changes
|
data/lib/rchardet/escsm.rb
CHANGED
File without changes
|
data/lib/rchardet/eucjpprober.rb
CHANGED
File without changes
|
data/lib/rchardet/euckrfreq.rb
CHANGED
File without changes
|
data/lib/rchardet/euckrprober.rb
CHANGED
File without changes
|
data/lib/rchardet/euctwfreq.rb
CHANGED
File without changes
|
data/lib/rchardet/euctwprober.rb
CHANGED
File without changes
|
data/lib/rchardet/gb2312freq.rb
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/lib/rchardet/jisfreq.rb
CHANGED
File without changes
|
data/lib/rchardet/jpcntx.rb
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/lib/rchardet/mbcssm.rb
CHANGED
File without changes
|
File without changes
|
File without changes
|
data/lib/rchardet/sjisprober.rb
CHANGED
File without changes
|
@@ -81,10 +81,10 @@ module CharDet
|
|
81
81
|
elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
|
82
82
|
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
83
83
|
@result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
|
84
|
-
elsif aBuf[0...2] ==
|
84
|
+
elsif aBuf[0...2] == "\xFF\xFE"
|
85
85
|
# FF FE UTF-16, little endian BOM
|
86
86
|
@result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
|
87
|
-
elsif aBuf[0...2] ==
|
87
|
+
elsif aBuf[0...2] == "\xFE\xFF"
|
88
88
|
# FE FF UTF-16, big endian BOM
|
89
89
|
@result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
|
90
90
|
end
|
data/lib/rchardet/utf8prober.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,33 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.2
|
3
|
-
specification_version: 1
|
4
2
|
name: rchardet
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "1.
|
7
|
-
date: 2007-07-05 00:00:00 -07:00
|
8
|
-
summary: Character encoding auto-detection in Ruby
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
email: jeff at somethingsimilar dot com
|
12
|
-
homepage: http://rubyforge.org/projects/rchardet
|
13
|
-
rubyforge_project: rchardet
|
14
|
-
description:
|
15
|
-
autorequire: chardet
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: false
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: "1.2"
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- Jeff Hodges
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-08-22 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: jeff at somethingsimilar dot com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README
|
24
|
+
- COPYING
|
31
25
|
files:
|
32
26
|
- lib/rchardet
|
33
27
|
- lib/rchardet/big5freq.rb
|
@@ -65,18 +59,33 @@ files:
|
|
65
59
|
- lib/rchardet/universaldetector.rb
|
66
60
|
- lib/rchardet/utf8prober.rb
|
67
61
|
- lib/rchardet.rb
|
62
|
+
- README
|
68
63
|
- COPYING
|
69
|
-
|
70
|
-
|
64
|
+
has_rdoc: false
|
65
|
+
homepage: http://github.com/jmhodges/rchardet/tree/master
|
66
|
+
post_install_message:
|
71
67
|
rdoc_options: []
|
72
68
|
|
73
|
-
|
74
|
-
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: "0"
|
76
|
+
version:
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: "0"
|
82
|
+
version:
|
79
83
|
requirements: []
|
80
84
|
|
81
|
-
|
85
|
+
rubyforge_project: rchardet
|
86
|
+
rubygems_version: 1.2.0
|
87
|
+
signing_key:
|
88
|
+
specification_version: 2
|
89
|
+
summary: Character encoding auto-detection in Ruby. As smart as your browser. Open source.
|
90
|
+
test_files: []
|
82
91
|
|