rchardet 1.0 → 1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rchardet/charsetprober.rb +9 -4
- metadata +4 -4
@@ -53,13 +53,18 @@ module CharDet
|
|
53
53
|
end
|
54
54
|
|
55
55
|
def filter_high_bit_only(aBuf)
|
56
|
-
|
57
|
-
|
56
|
+
# DO NOT USE `gsub!`
|
57
|
+
# It will remove all characters from the buffer that is later used by
|
58
|
+
# other probers. This is because gsub! removes data from the instance variable
|
59
|
+
# that will be passed to later probers, while gsub makes a new instance variable
|
60
|
+
# that will not.
|
61
|
+
newBuf = aBuf.gsub(/([\x00-\x7F])+/, ' ')
|
62
|
+
return newBuf
|
58
63
|
end
|
59
64
|
|
60
65
|
def filter_without_english_letters(aBuf)
|
61
|
-
aBuf.gsub
|
62
|
-
return
|
66
|
+
newBuf = aBuf.gsub(/([A-Za-z])+/,' ')
|
67
|
+
return newBuf
|
63
68
|
end
|
64
69
|
|
65
70
|
def filter_with_english_letters(aBuf)
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.2
|
3
3
|
specification_version: 1
|
4
4
|
name: rchardet
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "1.
|
7
|
-
date: 2007-
|
6
|
+
version: "1.1"
|
7
|
+
date: 2007-07-05 00:00:00 -07:00
|
8
8
|
summary: Character encoding auto-detection in Ruby
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -30,7 +30,6 @@ authors:
|
|
30
30
|
- Jeff Hodges
|
31
31
|
files:
|
32
32
|
- lib/rchardet
|
33
|
-
- lib/rchardet.rb
|
34
33
|
- lib/rchardet/big5freq.rb
|
35
34
|
- lib/rchardet/big5prober.rb
|
36
35
|
- lib/rchardet/chardistribution.rb
|
@@ -65,6 +64,7 @@ files:
|
|
65
64
|
- lib/rchardet/sjisprober.rb
|
66
65
|
- lib/rchardet/universaldetector.rb
|
67
66
|
- lib/rchardet/utf8prober.rb
|
67
|
+
- lib/rchardet.rb
|
68
68
|
- COPYING
|
69
69
|
test_files: []
|
70
70
|
|