rchardet 1.3 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -57,8 +57,8 @@ module CharDet
57
57
  if order >= 0
58
58
  @_mTotalChars += 1
59
59
  # order is valid
60
- if order < @_mTableSize:
61
- if 512 > @_mCharToFreqOrder[order]:
60
+ if order < @_mTableSize
61
+ if 512 > @_mCharToFreqOrder[order]
62
62
  @_mFreqChars += 1
63
63
  end
64
64
  end
@@ -72,7 +72,7 @@ module CharDet
72
72
  return SURE_NO
73
73
  end
74
74
 
75
- if @_mTotalChars != @_mFreqChars:
75
+ if @_mTotalChars != @_mFreqChars
76
76
  r = @_mFreqChars / ((@_mTotalChars - @_mFreqChars) * @_mTypicalDistributionRatio)
77
77
  if r < SURE_YES
78
78
  return r
@@ -227,10 +227,10 @@ module CharDet
227
227
  # first byte range: 0xa0 -- 0xfe
228
228
  # second byte range: 0xa1 -- 0xfe
229
229
  # no validation needed here. State machine has done that
230
- if aStr[0..0] >= "\xA0":
231
- return 94 * (aStr[0] - 0xA1) + aStr[1] - 0xa1
230
+ if aStr[0..0] >= "\xA0"
231
+ return 94 * (aStr[0] - 0xA1) + aStr[1] - 0xa1
232
232
  else
233
- return -1
233
+ return -1
234
234
  end
235
235
  end
236
236
  end
@@ -40,7 +40,7 @@ module CharDet
40
40
  super
41
41
  @_mActiveNum = 0
42
42
 
43
- for prober in @_mProbers:
43
+ for prober in @_mProbers
44
44
  if prober
45
45
  prober.reset()
46
46
  prober.active = true
@@ -28,6 +28,8 @@
28
28
 
29
29
  module CharDet
30
30
  class CodingStateMachine
31
+ attr_accessor :active
32
+
31
33
  def initialize(sm)
32
34
  @_mModel = sm
33
35
  @_mCurrentBytePos = 0
@@ -41,7 +41,7 @@ module CharDet
41
41
 
42
42
  def reset
43
43
  super()
44
- for codingSM in @_mCodingSM:
44
+ for codingSM in @_mCodingSM
45
45
  next if not codingSM
46
46
  codingSM.active = true
47
47
  codingSM.reset()
@@ -56,7 +56,7 @@ module CharDet
56
56
  elsif codingState == EItsMe
57
57
  @_mState = EFoundIt
58
58
  break
59
- elsif codingState == EStart:
59
+ elsif codingState == EStart
60
60
  charLen = @_mCodingSM.get_current_charlen()
61
61
  if i == 0
62
62
  @_mLastChar[1] = aBuf[0..0]
@@ -34,7 +34,7 @@ module CharDet
34
34
  MINIMUM_DATA_THRESHOLD = 4
35
35
 
36
36
  # This is hiragana 2-char sequence table, the number in each cell represents its frequency category
37
- jp2CharContext = [
37
+ JP2_CHAR_CONTEXT = [
38
38
  [0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1],
39
39
  [2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4],
40
40
  [0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2],
@@ -150,13 +150,13 @@ module CharDet
150
150
  @_mNeedToSkipCharNum = i - aLen
151
151
  @_mLastCharOrder = -1
152
152
  else
153
- if (order != -1) and (@_mLastCharOrder != -1):
153
+ if (order != -1) and (@_mLastCharOrder != -1)
154
154
  @_mTotalRel += 1
155
- if @_mTotalRel > MAX_REL_THRESHOLD:
155
+ if @_mTotalRel > MAX_REL_THRESHOLD
156
156
  @_mDone = true
157
157
  break
158
158
  end
159
- @_mRelSample[jp2CharContext[@_mLastCharOrder][order]] += 1
159
+ @_mRelSample[JP2_CHAR_CONTEXT[@_mLastCharOrder][order]] += 1
160
160
  end
161
161
  @_mLastCharOrder = order
162
162
  end
@@ -169,7 +169,7 @@ module CharDet
169
169
 
170
170
  def get_confidence
171
171
  # This is just one way to calculate confidence. It works well for me.
172
- if @_mTotalRel > MINIMUM_DATA_THRESHOLD:
172
+ if @_mTotalRel > MINIMUM_DATA_THRESHOLD
173
173
  return (@_mTotalRel - @_mRelSample[0]) / @_mTotalRel
174
174
  else
175
175
  return DONT_KNOW
@@ -208,7 +208,7 @@ module CharDet
208
208
  return -1, 1 unless aStr
209
209
  # find out current char's byte length
210
210
  aStr = aStr[0..1].join if aStr.class == Array
211
- if (aStr[0..0] == "\x8E") or ((aStr[0..0] >= "\xA1") and (aStr[0..0] <= "\xFE")):
211
+ if (aStr[0..0] == "\x8E") or ((aStr[0..0] >= "\xA1") and (aStr[0..0] <= "\xFE"))
212
212
  charLen = 2
213
213
  elsif aStr[0..0] == "\x8F"
214
214
  charLen = 3
@@ -71,7 +71,7 @@ module CharDet
71
71
 
72
72
  @_mLastChar[0] = aBuf[aLen - 1.. aLen-1]
73
73
 
74
- if get_state() == EDetecting:
74
+ if get_state() == EDetecting
75
75
  if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
76
  @_mState = EFoundIt
77
77
  end
@@ -91,11 +91,11 @@ module CharDet
91
91
  end
92
92
 
93
93
  @_mGotData = true
94
- if @result['encoding'] and (@result['confidence'] > 0.0):
94
+ if @result['encoding'] and (@result['confidence'] > 0.0)
95
95
  @done = true
96
96
  return
97
97
  end
98
- if @_mInputState == EPureAscii:
98
+ if @_mInputState == EPureAscii
99
99
  if @_highBitDetector =~ (aBuf)
100
100
  @_mInputState = EHighbyte
101
101
  elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
@@ -109,7 +109,7 @@ module CharDet
109
109
  @_mEscCharSetProber = EscCharSetProber.new()
110
110
  end
111
111
  if @_mEscCharSetProber.feed(aBuf) == EFoundIt
112
- @result = {'encoding' => self._mEscCharSetProber.get_charset_name(),
112
+ @result = {'encoding' => @_mEscCharSetProber.get_charset_name(),
113
113
  'confidence' => @_mEscCharSetProber.get_confidence()
114
114
  }
115
115
  @done = true
@@ -138,12 +138,12 @@ module CharDet
138
138
  end
139
139
  @done = true
140
140
 
141
- if @_mInputState == EPureAscii:
141
+ if @_mInputState == EPureAscii
142
142
  @result = {'encoding' => 'ascii', 'confidence' => 1.0}
143
143
  return @result
144
144
  end
145
145
 
146
- if @_mInputState == EHighbyte:
146
+ if @_mInputState == EHighbyte
147
147
  confidences = {}
148
148
  @_mCharSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
149
149
  maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
@@ -63,7 +63,7 @@ module CharDet
63
63
  end
64
64
  end
65
65
 
66
- if get_state() == EDetecting:
66
+ if get_state() == EDetecting
67
67
  if get_confidence() > SHORTCUT_THRESHOLD
68
68
  @_mState = EFoundIt
69
69
  end
data/lib/rchardet.rb CHANGED
@@ -56,7 +56,7 @@ require 'rchardet/universaldetector'
56
56
  require 'rchardet/utf8prober'
57
57
 
58
58
  module CharDet
59
- VERSION = "1.3"
59
+ VERSION = "1.3.1"
60
60
  def CharDet.detect(aBuf)
61
61
  u = UniversalDetector.new
62
62
  u.reset
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rchardet
3
3
  version: !ruby/object:Gem::Version
4
- version: "1.3"
4
+ hash: 25
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 3
9
+ - 1
10
+ version: 1.3.1
5
11
  platform: ruby
6
12
  authors:
7
13
  - Jeff Hodges
@@ -9,8 +15,7 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2009-07-19 00:00:00 -07:00
13
- default_executable:
18
+ date: 2012-12-02 00:00:00 Z
14
19
  dependencies: []
15
20
 
16
21
  description:
@@ -60,7 +65,6 @@ files:
60
65
  - lib/rchardet.rb
61
66
  - README
62
67
  - COPYING
63
- has_rdoc: true
64
68
  homepage: http://github.com/jmhodges/rchardet/tree/master
65
69
  licenses: []
66
70
 
@@ -70,21 +74,27 @@ rdoc_options: []
70
74
  require_paths:
71
75
  - lib
72
76
  required_ruby_version: !ruby/object:Gem::Requirement
77
+ none: false
73
78
  requirements:
74
79
  - - ">="
75
80
  - !ruby/object:Gem::Version
81
+ hash: 3
82
+ segments:
83
+ - 0
76
84
  version: "0"
77
- version:
78
85
  required_rubygems_version: !ruby/object:Gem::Requirement
86
+ none: false
79
87
  requirements:
80
88
  - - ">="
81
89
  - !ruby/object:Gem::Version
90
+ hash: 3
91
+ segments:
92
+ - 0
82
93
  version: "0"
83
- version:
84
94
  requirements: []
85
95
 
86
96
  rubyforge_project: rchardet
87
- rubygems_version: 1.3.4
97
+ rubygems_version: 1.8.15
88
98
  signing_key:
89
99
  specification_version: 3
90
100
  summary: Character encoding auto-detection in Ruby. As smart as your browser. Open source.