gigo 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/README.md +2 -8
  2. data/gemfiles/activesupport30.gemfile.lock +1 -1
  3. data/gemfiles/activesupport31.gemfile.lock +1 -1
  4. data/gemfiles/activesupport32.gemfile.lock +1 -1
  5. data/gemfiles/activesupport40.gemfile.lock +2 -2
  6. data/lib/gigo.rb +0 -1
  7. data/lib/gigo/version.rb +1 -1
  8. data/test/cases/gigo_test.rb +0 -1
  9. metadata +3 -39
  10. data/lib/gigo/rchardet.rb +0 -67
  11. data/lib/gigo/rchardet/big5freq.rb +0 -927
  12. data/lib/gigo/rchardet/big5prober.rb +0 -43
  13. data/lib/gigo/rchardet/chardistribution.rb +0 -238
  14. data/lib/gigo/rchardet/charsetgroupprober.rb +0 -113
  15. data/lib/gigo/rchardet/charsetprober.rb +0 -76
  16. data/lib/gigo/rchardet/codingstatemachine.rb +0 -66
  17. data/lib/gigo/rchardet/constants.rb +0 -43
  18. data/lib/gigo/rchardet/escprober.rb +0 -90
  19. data/lib/gigo/rchardet/escsm.rb +0 -245
  20. data/lib/gigo/rchardet/eucjpprober.rb +0 -89
  21. data/lib/gigo/rchardet/euckrfreq.rb +0 -598
  22. data/lib/gigo/rchardet/euckrprober.rb +0 -43
  23. data/lib/gigo/rchardet/euctwfreq.rb +0 -431
  24. data/lib/gigo/rchardet/euctwprober.rb +0 -43
  25. data/lib/gigo/rchardet/gb2312freq.rb +0 -475
  26. data/lib/gigo/rchardet/gb2312prober.rb +0 -43
  27. data/lib/gigo/rchardet/hebrewprober.rb +0 -291
  28. data/lib/gigo/rchardet/jisfreq.rb +0 -571
  29. data/lib/gigo/rchardet/jpcntx.rb +0 -230
  30. data/lib/gigo/rchardet/langbulgarianmodel.rb +0 -230
  31. data/lib/gigo/rchardet/langcyrillicmodel.rb +0 -331
  32. data/lib/gigo/rchardet/langgreekmodel.rb +0 -228
  33. data/lib/gigo/rchardet/langhebrewmodel.rb +0 -203
  34. data/lib/gigo/rchardet/langhungarianmodel.rb +0 -227
  35. data/lib/gigo/rchardet/langthaimodel.rb +0 -202
  36. data/lib/gigo/rchardet/latin1prober.rb +0 -148
  37. data/lib/gigo/rchardet/mbcharsetprober.rb +0 -91
  38. data/lib/gigo/rchardet/mbcsgroupprober.rb +0 -48
  39. data/lib/gigo/rchardet/mbcssm.rb +0 -543
  40. data/lib/gigo/rchardet/sbcharsetprober.rb +0 -125
  41. data/lib/gigo/rchardet/sbcsgroupprober.rb +0 -59
  42. data/lib/gigo/rchardet/sjisprober.rb +0 -89
  43. data/lib/gigo/rchardet/universaldetector.rb +0 -169
  44. data/lib/gigo/rchardet/utf8prober.rb +0 -87
  45. data/lib/gigo/transcoders/rchardet.rb +0 -22
@@ -1,125 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is Mozilla Universal charset detector code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 2001
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- # Shy Shalom - original C code
13
- #
14
- # This library is free software; you can redistribute it and/or
15
- # modify it under the terms of the GNU Lesser General Public
16
- # License as published by the Free Software Foundation; either
17
- # version 2.1 of the License, or (at your option) any later version.
18
- #
19
- # This library is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
- # Lesser General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Lesser General Public
25
- # License along with this library; if not, write to the Free Software
26
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
- # 02110-1301 USA
28
- ######################### END LICENSE BLOCK #########################
29
- module GIGO
30
- module CharDet
31
- SAMPLE_SIZE = 64
32
- SB_ENOUGH_REL_THRESHOLD = 1024
33
- POSITIVE_SHORTCUT_THRESHOLD = 0.95
34
- NEGATIVE_SHORTCUT_THRESHOLD = 0.05
35
- SYMBOL_CAT_ORDER = 250
36
- NUMBER_OF_SEQ_CAT = 4
37
- POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
38
- #NEGATIVE_CAT = 0
39
-
40
- class SingleByteCharSetProber < CharSetProber
41
- def initialize(model, reversed=false, nameProber=nil)
42
- super()
43
- @_mModel = model
44
- @_mReversed = reversed # TRUE if we need to reverse every pair in the model lookup
45
- @_mNameProber = nameProber # Optional auxiliary prober for name decision
46
- reset()
47
- end
48
-
49
- def reset
50
- super()
51
- @_mLastOrder = 255 # char order of last character
52
- @_mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
53
- @_mTotalSeqs = 0
54
- @_mTotalChar = 0
55
- @_mFreqChar = 0 # characters that fall in our sampling range
56
- end
57
-
58
- def get_charset_name
59
- if @_mNameProber
60
- return @_mNameProber.get_charset_name()
61
- else
62
- return @_mModel['charsetName']
63
- end
64
- end
65
-
66
- def feed(aBuf)
67
- if not @_mModel['keepEnglishLetter']
68
- aBuf = filter_without_english_letters(aBuf)
69
- end
70
- aLen = aBuf.length
71
- if not aLen
72
- return get_state()
73
- end
74
- for c in aBuf.split('')
75
- char = c.respond_to?(:bytes) ? c.bytes.first : c[0]
76
- order = @_mModel['charToOrderMap'][char]
77
- if order < SYMBOL_CAT_ORDER
78
- @_mTotalChar += 1
79
- end
80
- if order < SAMPLE_SIZE
81
- @_mFreqChar += 1
82
- if @_mLastOrder < SAMPLE_SIZE
83
- @_mTotalSeqs += 1
84
- if not @_mReversed
85
- @_mSeqCounters[@_mModel['precedenceMatrix'][(@_mLastOrder * SAMPLE_SIZE) + order]] += 1
86
- else # reverse the order of the letters in the lookup
87
- @_mSeqCounters[@_mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + @_mLastOrder]] += 1
88
- end
89
- end
90
- end
91
- @_mLastOrder = order
92
- end
93
-
94
- if get_state == EDetecting
95
- if @_mTotalSeqs > SB_ENOUGH_REL_THRESHOLD
96
- cf = get_confidence
97
- if cf > POSITIVE_SHORTCUT_THRESHOLD
98
- $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
99
- @_mState = EFoundIt
100
- elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
101
- $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
102
- @_mState = ENotMe
103
- end
104
- end
105
- end
106
-
107
- return get_state
108
- end
109
-
110
- def get_confidence
111
- r = 0.01
112
- if @_mTotalSeqs > 0
113
- # print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
114
- r = (1.0 * @_mSeqCounters[POSITIVE_CAT]) / @_mTotalSeqs / @_mModel['mTypicalPositiveRatio']
115
- # print r, self._mFreqChar, self._mTotalChar
116
- r = r * @_mFreqChar / @_mTotalChar
117
- if r >= 1.0
118
- r = 0.99
119
- end
120
- end
121
- return r
122
- end
123
- end
124
- end
125
- end
@@ -1,59 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is Mozilla Universal charset detector code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 2001
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- # Shy Shalom - original C code
13
- #
14
- # This library is free software; you can redistribute it and/or
15
- # modify it under the terms of the GNU Lesser General Public
16
- # License as published by the Free Software Foundation; either
17
- # version 2.1 of the License, or (at your option) any later version.
18
- #
19
- # This library is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
- # Lesser General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Lesser General Public
25
- # License along with this library; if not, write to the Free Software
26
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
- # 02110-1301 USA
28
- ######################### END LICENSE BLOCK #########################
29
- module GIGO
30
- module CharDet
31
- class SBCSGroupProber < CharSetGroupProber
32
- def initialize
33
- super
34
- @_mProbers = [
35
- SingleByteCharSetProber.new(Win1251CyrillicModel),
36
- SingleByteCharSetProber.new(Koi8rModel),
37
- SingleByteCharSetProber.new(Latin5CyrillicModel),
38
- SingleByteCharSetProber.new(MacCyrillicModel),
39
- SingleByteCharSetProber.new(Ibm866Model),
40
- SingleByteCharSetProber.new(Ibm855Model),
41
- SingleByteCharSetProber.new(Latin7GreekModel),
42
- SingleByteCharSetProber.new(Win1253GreekModel),
43
- SingleByteCharSetProber.new(Latin5BulgarianModel),
44
- SingleByteCharSetProber.new(Win1251BulgarianModel),
45
- SingleByteCharSetProber.new(Latin2HungarianModel),
46
- SingleByteCharSetProber.new(Win1250HungarianModel),
47
- SingleByteCharSetProber.new(TIS620ThaiModel),
48
- ]
49
- hebrewProber = HebrewProber.new()
50
- logicalHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, false, hebrewProber)
51
- visualHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, true, hebrewProber)
52
- hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
53
- @_mProbers += [hebrewProber, logicalHebrewProber, visualHebrewProber]
54
-
55
- reset()
56
- end
57
- end
58
- end
59
- end
@@ -1,89 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is mozilla.org code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 1998
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- #
13
- # This library is free software; you can redistribute it and/or
14
- # modify it under the terms of the GNU Lesser General Public
15
- # License as published by the Free Software Foundation; either
16
- # version 2.1 of the License, or (at your option) any later version.
17
- #
18
- # This library is distributed in the hope that it will be useful,
19
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
- # Lesser General Public License for more details.
22
- #
23
- # You should have received a copy of the GNU Lesser General Public
24
- # License along with this library; if not, write to the Free Software
25
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
- # 02110-1301 USA
27
- ######################### END LICENSE BLOCK #########################
28
- module GIGO
29
- module CharDet
30
- class SJISProber < MultiByteCharSetProber
31
- def initialize
32
- super()
33
- @_mCodingSM = CodingStateMachine.new(SJISSMModel)
34
- @_mDistributionAnalyzer = SJISDistributionAnalysis.new()
35
- @_mContextAnalyzer = SJISContextAnalysis.new()
36
- reset()
37
- end
38
-
39
- def reset
40
- super()
41
- @_mContextAnalyzer.reset()
42
- end
43
-
44
- def get_charset_name
45
- return "SHIFT_JIS"
46
- end
47
-
48
- def feed(aBuf)
49
- aLen = aBuf.length
50
- for i in (0...aLen)
51
- codingState = @_mCodingSM.next_state(aBuf[i..i])
52
- if codingState == EError
53
- $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
54
- @_mState = ENotMe
55
- break
56
- elsif codingState == EItsMe
57
- @_mState = EFoundIt
58
- break
59
- elsif codingState == EStart
60
- charLen = @_mCodingSM.get_current_charlen()
61
- if i == 0
62
- @_mLastChar[1] = aBuf[0..0]
63
- @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..-1], charLen)
64
- @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
65
- else
66
- @_mContextAnalyzer.feed(aBuf[i + 1 - charLen ... i + 3 - charLen], charLen)
67
- @_mDistributionAnalyzer.feed(aBuf[i - 1 ... i + 1], charLen)
68
- end
69
- end
70
- end
71
-
72
- @_mLastChar[0] = aBuf[aLen - 1.. aLen-1]
73
-
74
- if get_state() == EDetecting
75
- if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
- @_mState = EFoundIt
77
- end
78
- end
79
-
80
- return get_state()
81
- end
82
-
83
- def get_confidence
84
- l = [@_mContextAnalyzer.get_confidence(), @_mDistributionAnalyzer.get_confidence()]
85
- return l.max
86
- end
87
- end
88
- end
89
- end
@@ -1,169 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is Mozilla Universal charset detector code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 2001
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- # Shy Shalom - original C code
13
- #
14
- # This library is free software; you can redistribute it and/or
15
- # modify it under the terms of the GNU Lesser General Public
16
- # License as published by the Free Software Foundation; either
17
- # version 2.1 of the License, or (at your option) any later version.
18
- #
19
- # This library is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
- # Lesser General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Lesser General Public
25
- # License along with this library; if not, write to the Free Software
26
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
- # 02110-1301 USA
28
- ######################### END LICENSE BLOCK #########################
29
- module GIGO
30
- module CharDet
31
- MINIMUM_THRESHOLD = 0.20
32
- EPureAscii = 0
33
- EEscAscii = 1
34
- EHighbyte = 2
35
-
36
- class UniversalDetector
37
- attr_accessor :result
38
- def initialize
39
- @_highBitDetector = Regexp.new '[\x80-\xFF]', nil, 'n'
40
- @_escDetector = Regexp.new '(\033|\~\{)', nil, 'n'
41
- @_mEscCharSetProber = nil
42
- @_mCharSetProbers = []
43
- reset()
44
- end
45
-
46
- def reset
47
- @result = {'encoding' => nil, 'confidence' => 0.0}
48
- @done = false
49
- @_mStart = true
50
- @_mGotData = false
51
- @_mInputState = EPureAscii
52
- @_mLastChar = ''
53
- if @_mEscCharSetProber
54
- @_mEscCharSetProber.reset
55
- end
56
- for prober in @_mCharSetProbers
57
- prober.reset
58
- end
59
- end
60
-
61
- def feed(aBuf)
62
- # HACK: force internal encoding to be ascii-8bit (ruby 1.8 bytes)
63
- aBuf = aBuf.force_encoding('ASCII-8BIT') if aBuf.respond_to?(:force_encoding)
64
- return if @done
65
-
66
- aLen = aBuf.length
67
- return if not aLen
68
-
69
- if not @_mGotData
70
- # If the data starts with BOM, we know it is UTF
71
- if aBuf[0...3] == "\xEF\xBB\xBF"
72
- # EF BB BF UTF-8 with BOM
73
- @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
74
- elsif aBuf[0...4] == "\xFF\xFE\x00\x00"
75
- # FF FE 00 00 UTF-32, little-endian BOM
76
- @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
77
- elsif aBuf[0...4] == "\x00\x00\xFE\xFF"
78
- # 00 00 FE FF UTF-32, big-endian BOM
79
- @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
80
- elsif aBuf[0...4] == "\xFE\xFF\x00\x00"
81
- # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
82
- @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
83
- elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
84
- # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
85
- @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
86
- elsif aBuf[0...2] == "\xFF\xFE"
87
- # FF FE UTF-16, little endian BOM
88
- @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
89
- elsif aBuf[0...2] == "\xFE\xFF"
90
- # FE FF UTF-16, big endian BOM
91
- @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
92
- end
93
- end
94
-
95
- @_mGotData = true
96
- if @result['encoding'] and (@result['confidence'] > 0.0)
97
- @done = true
98
- return
99
- end
100
- if @_mInputState == EPureAscii
101
- if @_highBitDetector =~ (aBuf)
102
- @_mInputState = EHighbyte
103
- elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
104
- @_mInputState = EEscAscii
105
- end
106
- end
107
-
108
- @_mLastChar = aBuf[-1..-1]
109
- if @_mInputState == EEscAscii
110
- if not @_mEscCharSetProber
111
- @_mEscCharSetProber = EscCharSetProber.new
112
- end
113
- if @_mEscCharSetProber.feed(aBuf) == EFoundIt
114
- @result = {'encoding' => self._mEscCharSetProber.get_charset_name(),
115
- 'confidence' => @_mEscCharSetProber.get_confidence()
116
- }
117
- @done = true
118
- end
119
- elsif @_mInputState == EHighbyte
120
- if not @_mCharSetProbers or @_mCharSetProbers.empty?
121
- @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
122
- end
123
- for prober in @_mCharSetProbers
124
- if prober.feed(aBuf) == EFoundIt
125
- @result = {'encoding' => prober.get_charset_name(),
126
- 'confidence' => prober.get_confidence()}
127
- @done = true
128
- break
129
- end
130
- end
131
- end
132
-
133
- end
134
-
135
- def close
136
- return if @done
137
- if not @_mGotData
138
- $stderr << "no data received!\n" if $debug
139
- return
140
- end
141
- @done = true
142
-
143
- if @_mInputState == EPureAscii
144
- @result = {'encoding' => 'ascii', 'confidence' => 1.0}
145
- return @result
146
- end
147
-
148
- if @_mInputState == EHighbyte
149
- confidences = {}
150
- @_mCharSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
151
- maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
152
- if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
153
- @result = {'encoding' => maxProber.get_charset_name(),
154
- 'confidence' => maxProber.get_confidence()}
155
- return @result
156
- end
157
- end
158
-
159
- if $debug
160
- $stderr << "no probers hit minimum threshhold\n" if $debug
161
- for prober in @_mCharSetProbers[0]._mProbers
162
- next if not prober
163
- $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
164
- end
165
- end
166
- end
167
- end
168
- end
169
- end
@@ -1,87 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is mozilla.org code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 1998
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- #
13
- # This library is free software; you can redistribute it and/or
14
- # modify it under the terms of the GNU Lesser General Public
15
- # License as published by the Free Software Foundation; either
16
- # version 2.1 of the License, or (at your option) any later version.
17
- #
18
- # This library is distributed in the hope that it will be useful,
19
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
- # Lesser General Public License for more details.
22
- #
23
- # You should have received a copy of the GNU Lesser General Public
24
- # License along with this library; if not, write to the Free Software
25
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
- # 02110-1301 USA
27
- ######################### END LICENSE BLOCK #########################
28
- module GIGO
29
- module CharDet
30
- ONE_CHAR_PROB = 0.5
31
-
32
- class UTF8Prober < CharSetProber
33
- def initialize
34
- super()
35
- @_mCodingSM = CodingStateMachine.new(UTF8SMModel)
36
- reset()
37
- end
38
-
39
- def reset
40
- super()
41
- @_mCodingSM.reset()
42
- @_mNumOfMBChar = 0
43
- end
44
-
45
- def get_charset_name
46
- return "utf-8"
47
- end
48
-
49
- def feed(aBuf)
50
- for c in aBuf.split('')
51
- codingState = @_mCodingSM.next_state(c)
52
- if codingState == EError
53
- @_mState = ENotMe
54
- break
55
- elsif codingState == EItsMe
56
- @_mState = EFoundIt
57
- break
58
- elsif codingState == EStart
59
- if @_mCodingSM.get_current_charlen() >= 2
60
- @_mNumOfMBChar += 1
61
- end
62
- end
63
- end
64
-
65
- if get_state == EDetecting
66
- if get_confidence > SHORTCUT_THRESHOLD
67
- @_mState = EFoundIt
68
- end
69
- end
70
-
71
- return get_state
72
- end
73
-
74
- def get_confidence
75
- unlike = 0.99
76
- if @_mNumOfMBChar < 6
77
- for i in (0...@_mNumOfMBChar)
78
- unlike = unlike * ONE_CHAR_PROB
79
- end
80
- return 1.0 - unlike
81
- else
82
- return unlike
83
- end
84
- end
85
- end
86
- end
87
- end