rchardet 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/lib/rchardet.rb +1 -3
  2. data/lib/rchardet/big5freq.rb +2 -2
  3. data/lib/rchardet/big5prober.rb +2 -2
  4. data/lib/rchardet/chardistribution.rb +74 -69
  5. data/lib/rchardet/charsetgroupprober.rb +50 -52
  6. data/lib/rchardet/charsetprober.rb +2 -7
  7. data/lib/rchardet/codingstatemachine.rb +14 -13
  8. data/lib/rchardet/constants.rb +0 -0
  9. data/lib/rchardet/escprober.rb +34 -34
  10. data/lib/rchardet/escsm.rb +33 -32
  11. data/lib/rchardet/eucjpprober.rb +28 -28
  12. data/lib/rchardet/euckrfreq.rb +2 -1
  13. data/lib/rchardet/euckrprober.rb +2 -2
  14. data/lib/rchardet/euctwfreq.rb +2 -1
  15. data/lib/rchardet/euctwprober.rb +2 -2
  16. data/lib/rchardet/gb2312freq.rb +2 -2
  17. data/lib/rchardet/gb2312prober.rb +2 -2
  18. data/lib/rchardet/hebrewprober.rb +40 -40
  19. data/lib/rchardet/jisfreq.rb +2 -1
  20. data/lib/rchardet/jpcntx.rb +131 -130
  21. data/lib/rchardet/langbulgarianmodel.rb +6 -6
  22. data/lib/rchardet/langcyrillicmodel.rb +13 -13
  23. data/lib/rchardet/langgreekmodel.rb +5 -5
  24. data/lib/rchardet/langhebrewmodel.rb +3 -3
  25. data/lib/rchardet/langhungarianmodel.rb +5 -5
  26. data/lib/rchardet/langthaimodel.rb +3 -3
  27. data/lib/rchardet/latin1prober.rb +18 -18
  28. data/lib/rchardet/mbcharsetprober.rb +30 -30
  29. data/lib/rchardet/mbcsgroupprober.rb +9 -9
  30. data/lib/rchardet/mbcssm.rb +72 -72
  31. data/lib/rchardet/sbcharsetprober.rb +48 -50
  32. data/lib/rchardet/sbcsgroupprober.rb +16 -16
  33. data/lib/rchardet/sjisprober.rb +28 -28
  34. data/lib/rchardet/universaldetector.rb +92 -90
  35. data/lib/rchardet/utf8prober.rb +25 -25
  36. data/lib/rchardet/version.rb +3 -0
  37. metadata +30 -47
  38. data/COPYING +0 -504
  39. data/README +0 -12
@@ -40,68 +40,68 @@ module CharDet
40
40
  class SingleByteCharSetProber < CharSetProber
41
41
  def initialize(model, reversed=false, nameProber=nil)
42
42
  super()
43
- @_mModel = model
44
- @_mReversed = reversed # TRUE if we need to reverse every pair in the model lookup
45
- @_mNameProber = nameProber # Optional auxiliary prober for name decision
43
+ @model = model
44
+ @reversed = reversed # TRUE if we need to reverse every pair in the model lookup
45
+ @nameProber = nameProber # Optional auxiliary prober for name decision
46
46
  reset()
47
47
  end
48
48
 
49
49
  def reset
50
50
  super()
51
- @_mLastOrder = 255 # char order of last character
52
- @_mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
53
- @_mTotalSeqs = 0
54
- @_mTotalChar = 0
55
- @_mFreqChar = 0 # characters that fall in our sampling range
51
+ @lastOrder = 255 # char order of last character
52
+ @seqCounters = [0] * NUMBER_OF_SEQ_CAT
53
+ @totalSeqs = 0
54
+ @totalChar = 0
55
+ @freqChar = 0 # characters that fall in our sampling range
56
56
  end
57
57
 
58
58
  def get_charset_name
59
- if @_mNameProber
60
- return @_mNameProber.get_charset_name()
59
+ if @nameProber
60
+ return @nameProber.get_charset_name()
61
61
  else
62
- return @_mModel['charsetName']
62
+ return @model['charsetName']
63
63
  end
64
64
  end
65
65
 
66
66
  def feed(aBuf)
67
- if not @_mModel['keepEnglishLetter']
68
- aBuf = filter_without_english_letters(aBuf)
67
+ if !@model['keepEnglishLetter']
68
+ aBuf = filter_without_english_letters(aBuf)
69
69
  end
70
70
  aLen = aBuf.length
71
- if not aLen
72
- return get_state()
71
+ if aLen == 0
72
+ return get_state()
73
73
  end
74
74
  aBuf.each_byte do |b|
75
- c = b.chr
76
- order = @_mModel['charToOrderMap'][c[0]]
77
- if order < SYMBOL_CAT_ORDER
78
- @_mTotalChar += 1
79
- end
80
- if order < SAMPLE_SIZE
81
- @_mFreqChar += 1
82
- if @_mLastOrder < SAMPLE_SIZE
83
- @_mTotalSeqs += 1
84
- if not @_mReversed
85
- @_mSeqCounters[@_mModel['precedenceMatrix'][(@_mLastOrder * SAMPLE_SIZE) + order]] += 1
86
- else # reverse the order of the letters in the lookup
87
- @_mSeqCounters[@_mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + @_mLastOrder]] += 1
88
- end
89
- end
90
- end
91
- @_mLastOrder = order
75
+ c = b.chr
76
+ order = @model['charToOrderMap'][c.bytes.first]
77
+ if order < SYMBOL_CAT_ORDER
78
+ @totalChar += 1
79
+ end
80
+ if order < SAMPLE_SIZE
81
+ @freqChar += 1
82
+ if @lastOrder < SAMPLE_SIZE
83
+ @totalSeqs += 1
84
+ if !@reversed
85
+ @seqCounters[@model['precedenceMatrix'][(@lastOrder * SAMPLE_SIZE) + order]] += 1
86
+ else # reverse the order of the letters in the lookup
87
+ @seqCounters[@model['precedenceMatrix'][(order * SAMPLE_SIZE) + @lastOrder]] += 1
88
+ end
89
+ end
90
+ end
91
+ @lastOrder = order
92
92
  end
93
93
 
94
94
  if get_state() == EDetecting
95
- if @_mTotalSeqs > SB_ENOUGH_REL_THRESHOLD
96
- cf = get_confidence()
97
- if cf > POSITIVE_SHORTCUT_THRESHOLD
98
- $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
99
- @_mState = EFoundIt
100
- elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
101
- $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
102
- @_mState = ENotMe
103
- end
104
- end
95
+ if @totalSeqs > SB_ENOUGH_REL_THRESHOLD
96
+ cf = get_confidence()
97
+ if cf > POSITIVE_SHORTCUT_THRESHOLD
98
+ $stderr << "#{@model['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
99
+ @state = EFoundIt
100
+ elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
101
+ $stderr << "#{@model['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
102
+ @state = ENotMe
103
+ end
104
+ end
105
105
  end
106
106
 
107
107
  return get_state()
@@ -109,14 +109,12 @@ module CharDet
109
109
 
110
110
  def get_confidence
111
111
  r = 0.01
112
- if @_mTotalSeqs > 0
113
- # print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
114
- r = (1.0 * @_mSeqCounters[POSITIVE_CAT]) / @_mTotalSeqs / @_mModel['mTypicalPositiveRatio']
115
- # print r, self._mFreqChar, self._mTotalChar
116
- r = r * @_mFreqChar / @_mTotalChar
117
- if r >= 1.0
118
- r = 0.99
119
- end
112
+ if @totalSeqs > 0
113
+ r = (1.0 * @seqCounters[POSITIVE_CAT]) / @totalSeqs / @model['mTypicalPositiveRatio']
114
+ r = r * @freqChar / @totalChar
115
+ if r >= 1.0
116
+ r = 0.99
117
+ end
120
118
  end
121
119
  return r
122
120
  end
@@ -31,26 +31,26 @@ module CharDet
31
31
  class SBCSGroupProber < CharSetGroupProber
32
32
  def initialize
33
33
  super
34
- @_mProbers = [
35
- SingleByteCharSetProber.new(Win1251CyrillicModel),
36
- SingleByteCharSetProber.new(Koi8rModel),
37
- SingleByteCharSetProber.new(Latin5CyrillicModel),
38
- SingleByteCharSetProber.new(MacCyrillicModel),
39
- SingleByteCharSetProber.new(Ibm866Model),
40
- SingleByteCharSetProber.new(Ibm855Model),
41
- SingleByteCharSetProber.new(Latin7GreekModel),
42
- SingleByteCharSetProber.new(Win1253GreekModel),
43
- SingleByteCharSetProber.new(Latin5BulgarianModel),
44
- SingleByteCharSetProber.new(Win1251BulgarianModel),
45
- SingleByteCharSetProber.new(Latin2HungarianModel),
46
- SingleByteCharSetProber.new(Win1250HungarianModel),
47
- SingleByteCharSetProber.new(TIS620ThaiModel),
48
- ]
34
+ @probers = [
35
+ SingleByteCharSetProber.new(Win1251CyrillicModel),
36
+ SingleByteCharSetProber.new(Koi8rModel),
37
+ SingleByteCharSetProber.new(Latin5CyrillicModel),
38
+ SingleByteCharSetProber.new(MacCyrillicModel),
39
+ SingleByteCharSetProber.new(Ibm866Model),
40
+ SingleByteCharSetProber.new(Ibm855Model),
41
+ SingleByteCharSetProber.new(Latin7GreekModel),
42
+ SingleByteCharSetProber.new(Win1253GreekModel),
43
+ SingleByteCharSetProber.new(Latin5BulgarianModel),
44
+ SingleByteCharSetProber.new(Win1251BulgarianModel),
45
+ SingleByteCharSetProber.new(Latin2HungarianModel),
46
+ SingleByteCharSetProber.new(Win1250HungarianModel),
47
+ SingleByteCharSetProber.new(TIS620ThaiModel),
48
+ ]
49
49
  hebrewProber = HebrewProber.new()
50
50
  logicalHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, false, hebrewProber)
51
51
  visualHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, true, hebrewProber)
52
52
  hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
53
- @_mProbers += [hebrewProber, logicalHebrewProber, visualHebrewProber]
53
+ @probers += [hebrewProber, logicalHebrewProber, visualHebrewProber]
54
54
 
55
55
  reset()
56
56
  end
@@ -30,15 +30,15 @@ module CharDet
30
30
  class SJISProber < MultiByteCharSetProber
31
31
  def initialize
32
32
  super()
33
- @_mCodingSM = CodingStateMachine.new(SJISSMModel)
34
- @_mDistributionAnalyzer = SJISDistributionAnalysis.new()
35
- @_mContextAnalyzer = SJISContextAnalysis.new()
33
+ @codingSM = CodingStateMachine.new(SJISSMModel)
34
+ @distributionAnalyzer = SJISDistributionAnalysis.new()
35
+ @contextAnalyzer = SJISContextAnalysis.new()
36
36
  reset()
37
37
  end
38
38
 
39
39
  def reset
40
40
  super()
41
- @_mContextAnalyzer.reset()
41
+ @contextAnalyzer.reset()
42
42
  end
43
43
 
44
44
  def get_charset_name
@@ -48,40 +48,40 @@ module CharDet
48
48
  def feed(aBuf)
49
49
  aLen = aBuf.length
50
50
  for i in (0...aLen)
51
- codingState = @_mCodingSM.next_state(aBuf[i..i])
52
- if codingState == EError
53
- $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
54
- @_mState = ENotMe
55
- break
56
- elsif codingState == EItsMe
57
- @_mState = EFoundIt
58
- break
59
- elsif codingState == EStart
60
- charLen = @_mCodingSM.get_current_charlen()
61
- if i == 0
62
- @_mLastChar[1] = aBuf[0..0]
63
- @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..-1], charLen)
64
- @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
65
- else
66
- @_mContextAnalyzer.feed(aBuf[i + 1 - charLen ... i + 3 - charLen], charLen)
67
- @_mDistributionAnalyzer.feed(aBuf[i - 1 ... i + 1], charLen)
68
- end
69
- end
51
+ codingState = @codingSM.next_state(aBuf[i,1])
52
+ if codingState == EError
53
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
54
+ @state = ENotMe
55
+ break
56
+ elsif codingState == EItsMe
57
+ @state = EFoundIt
58
+ break
59
+ elsif codingState == EStart
60
+ charLen = @codingSM.get_current_charlen()
61
+ if i == 0
62
+ @lastChar[1] = aBuf[0, 1]
63
+ @contextAnalyzer.feed(@lastChar[2-charLen, 1], charLen)
64
+ @distributionAnalyzer.feed(@lastChar, charLen)
65
+ else
66
+ @contextAnalyzer.feed(aBuf[i+1-charLen, 2], charLen)
67
+ @distributionAnalyzer.feed(aBuf[i-1, 2], charLen)
68
+ end
69
+ end
70
70
  end
71
71
 
72
- @_mLastChar[0] = aBuf[aLen - 1.. aLen-1]
72
+ @lastChar[0] = aBuf[aLen-1, 1]
73
73
 
74
74
  if get_state() == EDetecting
75
- if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
- @_mState = EFoundIt
77
- end
75
+ if @contextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
+ @state = EFoundIt
77
+ end
78
78
  end
79
79
 
80
80
  return get_state()
81
81
  end
82
82
 
83
83
  def get_confidence
84
- l = [@_mContextAnalyzer.get_confidence(), @_mDistributionAnalyzer.get_confidence()]
84
+ l = [@contextAnalyzer.get_confidence(), @distributionAnalyzer.get_confidence()]
85
85
  return l.max
86
86
  end
87
87
  end
@@ -1,3 +1,4 @@
1
+ # encoding: US-ASCII
1
2
  ######################## BEGIN LICENSE BLOCK ########################
2
3
  # The Original Code is Mozilla Universal charset detector code.
3
4
  #
@@ -34,27 +35,28 @@ module CharDet
34
35
  EHighbyte = 2
35
36
 
36
37
  class UniversalDetector
37
- attr_accessor :result
38
+ attr_reader :done, :result
39
+
38
40
  def initialize
39
- @_highBitDetector = /[\x80-\xFF]/
40
- @_escDetector = /(\033|\~\{)/
41
- @_mEscCharSetProber = nil
42
- @_mCharSetProbers = []
41
+ @highBitDetector = /[\x80-\xFF]/
42
+ @escDetector = /(\033|\~\{)/
43
+ @escCharSetProber = nil
44
+ @charSetProbers = []
43
45
  reset()
44
46
  end
45
47
 
46
48
  def reset
47
49
  @result = {'encoding' => nil, 'confidence' => 0.0}
48
50
  @done = false
49
- @_mStart = true
50
- @_mGotData = false
51
- @_mInputState = EPureAscii
52
- @_mLastChar = ''
53
- if @_mEscCharSetProber
54
- @_mEscCharSetProber.reset()
51
+ @start = true
52
+ @gotData = false
53
+ @inputState = EPureAscii
54
+ @lastChar = ''
55
+ if @escCharSetProber
56
+ @escCharSetProber.reset()
55
57
  end
56
- for prober in @_mCharSetProbers
57
- prober.reset()
58
+ for prober in @charSetProbers
59
+ prober.reset()
58
60
  end
59
61
  end
60
62
 
@@ -62,104 +64,104 @@ module CharDet
62
64
  return if @done
63
65
 
64
66
  aLen = aBuf.length
65
- return if not aLen
67
+ return if aLen == 0
66
68
 
67
- if not @_mGotData
68
- # If the data starts with BOM, we know it is UTF
69
- if aBuf[0...3] == "\xEF\xBB\xBF"
70
- # EF BB BF UTF-8 with BOM
71
- @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
72
- elsif aBuf[0...4] == "\xFF\xFE\x00\x00"
73
- # FF FE 00 00 UTF-32, little-endian BOM
74
- @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
75
- elsif aBuf[0...4] == "\x00\x00\xFE\xFF"
76
- # 00 00 FE FF UTF-32, big-endian BOM
77
- @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
78
- elsif aBuf[0...4] == "\xFE\xFF\x00\x00"
79
- # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
80
- @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
81
- elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
82
- # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
83
- @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
84
- elsif aBuf[0...2] == "\xFF\xFE"
85
- # FF FE UTF-16, little endian BOM
86
- @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
87
- elsif aBuf[0...2] == "\xFE\xFF"
88
- # FE FF UTF-16, big endian BOM
89
- @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
90
- end
69
+ if !@gotData
70
+ # If the data starts with BOM, we know it is UTF
71
+ if aBuf[0, 3] == "\xEF\xBB\xBF"
72
+ # EF BB BF UTF-8 with BOM
73
+ @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
74
+ elsif aBuf[0, 4] == "\xFF\xFE\x00\x00"
75
+ # FF FE 00 00 UTF-32, little-endian BOM
76
+ @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
77
+ elsif aBuf[0, 4] == "\x00\x00\xFE\xFF"
78
+ # 00 00 FE FF UTF-32, big-endian BOM
79
+ @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
80
+ elsif aBuf[0, 4] == "\xFE\xFF\x00\x00"
81
+ # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
82
+ @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
83
+ elsif aBuf[0, 4] == "\x00\x00\xFF\xFE"
84
+ # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
85
+ @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
86
+ elsif aBuf[0, 2] == "\xFF\xFE"
87
+ # FF FE UTF-16, little endian BOM
88
+ @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
89
+ elsif aBuf[0, 2] == "\xFE\xFF"
90
+ # FE FF UTF-16, big endian BOM
91
+ @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
92
+ end
91
93
  end
92
94
 
93
- @_mGotData = true
95
+ @gotData = true
94
96
  if @result['encoding'] and (@result['confidence'] > 0.0)
95
- @done = true
96
- return
97
+ @done = true
98
+ return
97
99
  end
98
- if @_mInputState == EPureAscii
99
- if @_highBitDetector =~ (aBuf)
100
- @_mInputState = EHighbyte
101
- elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
102
- @_mInputState = EEscAscii
103
- end
100
+ if @inputState == EPureAscii
101
+ if @highBitDetector =~ (aBuf)
102
+ @inputState = EHighbyte
103
+ elsif (@inputState == EPureAscii) and @escDetector =~ (@lastChar + aBuf)
104
+ @inputState = EEscAscii
105
+ end
104
106
  end
105
107
 
106
- @_mLastChar = aBuf[-1..-1]
107
- if @_mInputState == EEscAscii
108
- if not @_mEscCharSetProber
109
- @_mEscCharSetProber = EscCharSetProber.new()
110
- end
111
- if @_mEscCharSetProber.feed(aBuf) == EFoundIt
112
- @result = {'encoding' => @_mEscCharSetProber.get_charset_name(),
113
- 'confidence' => @_mEscCharSetProber.get_confidence()
114
- }
115
- @done = true
116
- end
117
- elsif @_mInputState == EHighbyte
118
- if not @_mCharSetProbers or @_mCharSetProbers.empty?
119
- @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
120
- end
121
- for prober in @_mCharSetProbers
122
- if prober.feed(aBuf) == EFoundIt
123
- @result = {'encoding' => prober.get_charset_name(),
124
- 'confidence' => prober.get_confidence()}
125
- @done = true
126
- break
127
- end
128
- end
108
+ @lastChar = aBuf[-1, 1]
109
+ if @inputState == EEscAscii
110
+ if !@escCharSetProber
111
+ @escCharSetProber = EscCharSetProber.new()
112
+ end
113
+ if @escCharSetProber.feed(aBuf) == EFoundIt
114
+ @result = {'encoding' => @escCharSetProber.get_charset_name(),
115
+ 'confidence' => @escCharSetProber.get_confidence()
116
+ }
117
+ @done = true
118
+ end
119
+ elsif @inputState == EHighbyte
120
+ if @charSetProbers.nil? || @charSetProbers.empty?
121
+ @charSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
122
+ end
123
+ for prober in @charSetProbers
124
+ if prober.feed(aBuf) == EFoundIt
125
+ @result = {'encoding' => prober.get_charset_name(),
126
+ 'confidence' => prober.get_confidence()}
127
+ @done = true
128
+ break
129
+ end
130
+ end
129
131
  end
130
132
 
131
133
  end
132
134
 
133
135
  def close
134
136
  return if @done
135
- if not @_mGotData
136
- $stderr << "no data received!\n" if $debug
137
- return
137
+ if !@gotData
138
+ $stderr << "no data received!\n" if $debug
139
+ return
138
140
  end
139
141
  @done = true
140
142
 
141
- if @_mInputState == EPureAscii
142
- @result = {'encoding' => 'ascii', 'confidence' => 1.0}
143
- return @result
143
+ if @inputState == EPureAscii
144
+ @result = {'encoding' => 'ascii', 'confidence' => 1.0}
145
+ return @result
144
146
  end
145
147
 
146
- if @_mInputState == EHighbyte
147
- confidences = {}
148
- @_mCharSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
149
- maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
150
- if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
151
- @result = {'encoding' => maxProber.get_charset_name(),
152
- 'confidence' => maxProber.get_confidence()}
153
- return @result
154
- end
148
+ if @inputState == EHighbyte
149
+ confidences = {}
150
+ @charSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
151
+ maxProber = @charSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
152
+ if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
153
+ @result = {'encoding' => maxProber.get_charset_name(),
154
+ 'confidence' => maxProber.get_confidence()}
155
+ return @result
156
+ end
155
157
  end
156
158
 
157
159
  if $debug
158
- $stderr << "no probers hit minimum threshhold\n" if $debug
159
- for prober in @_mCharSetProbers[0]._mProbers
160
- next if not prober
161
- $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
162
- end
160
+ $stderr << "no probers hit minimum threshhold\n" if $debug
161
+ for prober in @charSetProbers[0].probers
162
+ next if !prober
163
+ $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
164
+ end
163
165
  end
164
166
  end
165
167
  end