rchardet 1.0 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rchardet/charsetprober.rb +9 -4
- metadata +4 -4
@@ -53,13 +53,18 @@ module CharDet
|
|
53
53
|
end
|
54
54
|
|
55
55
|
def filter_high_bit_only(aBuf)
|
56
|
-
|
57
|
-
|
56
|
+
# DO NOT USE `gsub!`
|
57
|
+
# It will remove all characters from the buffer that is later used by
|
58
|
+
# other probers. This is because gsub! removes data from the instance variable
|
59
|
+
# that will be passed to later probers, while gsub makes a new instance variable
|
60
|
+
# that will not.
|
61
|
+
newBuf = aBuf.gsub(/([\x00-\x7F])+/, ' ')
|
62
|
+
return newBuf
|
58
63
|
end
|
59
64
|
|
60
65
|
def filter_without_english_letters(aBuf)
|
61
|
-
aBuf.gsub
|
62
|
-
return
|
66
|
+
newBuf = aBuf.gsub(/([A-Za-z])+/,' ')
|
67
|
+
return newBuf
|
63
68
|
end
|
64
69
|
|
65
70
|
def filter_with_english_letters(aBuf)
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.2
|
3
3
|
specification_version: 1
|
4
4
|
name: rchardet
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "1.
|
7
|
-
date: 2007-
|
6
|
+
version: "1.1"
|
7
|
+
date: 2007-07-05 00:00:00 -07:00
|
8
8
|
summary: Character encoding auto-detection in Ruby
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -30,7 +30,6 @@ authors:
|
|
30
30
|
- Jeff Hodges
|
31
31
|
files:
|
32
32
|
- lib/rchardet
|
33
|
-
- lib/rchardet.rb
|
34
33
|
- lib/rchardet/big5freq.rb
|
35
34
|
- lib/rchardet/big5prober.rb
|
36
35
|
- lib/rchardet/chardistribution.rb
|
@@ -65,6 +64,7 @@ files:
|
|
65
64
|
- lib/rchardet/sjisprober.rb
|
66
65
|
- lib/rchardet/universaldetector.rb
|
67
66
|
- lib/rchardet/utf8prober.rb
|
67
|
+
- lib/rchardet.rb
|
68
68
|
- COPYING
|
69
69
|
test_files: []
|
70
70
|
|