lg_pod_plugin 1.0.8 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/lg +5 -0
- data/lib/command/cache.rb +22 -18
- data/lib/command/command.rb +27 -35
- data/lib/command/install.rb +52 -0
- data/lib/command/update.rb +39 -0
- data/lib/lg_pod_plugin/downloader.rb +12 -17
- data/lib/lg_pod_plugin/git_util.rb +150 -106
- data/lib/lg_pod_plugin/install.rb +74 -88
- data/lib/lg_pod_plugin/l_cache.rb +11 -77
- data/lib/lg_pod_plugin/l_util.rb +6 -2
- data/lib/lg_pod_plugin/request.rb +90 -82
- data/lib/lg_pod_plugin/version.rb +1 -1
- data/lib/lg_pod_plugin.rb +9 -5
- metadata +57 -118
- data/lib/git/author.rb +0 -14
- data/lib/git/base/factory.rb +0 -101
- data/lib/git/base.rb +0 -670
- data/lib/git/branch.rb +0 -126
- data/lib/git/branches.rb +0 -71
- data/lib/git/config.rb +0 -22
- data/lib/git/diff.rb +0 -155
- data/lib/git/encoding_utils.rb +0 -33
- data/lib/git/escaped_path.rb +0 -77
- data/lib/git/index.rb +0 -5
- data/lib/git/lib.rb +0 -1215
- data/lib/git/log.rb +0 -135
- data/lib/git/object.rb +0 -312
- data/lib/git/path.rb +0 -31
- data/lib/git/remote.rb +0 -36
- data/lib/git/repository.rb +0 -6
- data/lib/git/stash.rb +0 -27
- data/lib/git/stashes.rb +0 -55
- data/lib/git/status.rb +0 -199
- data/lib/git/url.rb +0 -127
- data/lib/git/version.rb +0 -5
- data/lib/git/working_directory.rb +0 -4
- data/lib/git/worktree.rb +0 -38
- data/lib/git/worktrees.rb +0 -47
- data/lib/git.rb +0 -326
- data/lib/rchardet/big5freq.rb +0 -927
- data/lib/rchardet/big5prober.rb +0 -42
- data/lib/rchardet/chardistribution.rb +0 -250
- data/lib/rchardet/charsetgroupprober.rb +0 -110
- data/lib/rchardet/charsetprober.rb +0 -70
- data/lib/rchardet/codingstatemachine.rb +0 -67
- data/lib/rchardet/constants.rb +0 -42
- data/lib/rchardet/escprober.rb +0 -90
- data/lib/rchardet/escsm.rb +0 -245
- data/lib/rchardet/eucjpprober.rb +0 -88
- data/lib/rchardet/euckrfreq.rb +0 -597
- data/lib/rchardet/euckrprober.rb +0 -42
- data/lib/rchardet/euctwfreq.rb +0 -431
- data/lib/rchardet/euctwprober.rb +0 -42
- data/lib/rchardet/gb18030freq.rb +0 -474
- data/lib/rchardet/gb18030prober.rb +0 -42
- data/lib/rchardet/hebrewprober.rb +0 -289
- data/lib/rchardet/jisfreq.rb +0 -571
- data/lib/rchardet/jpcntx.rb +0 -229
- data/lib/rchardet/langbulgarianmodel.rb +0 -229
- data/lib/rchardet/langcyrillicmodel.rb +0 -330
- data/lib/rchardet/langgreekmodel.rb +0 -227
- data/lib/rchardet/langhebrewmodel.rb +0 -202
- data/lib/rchardet/langhungarianmodel.rb +0 -226
- data/lib/rchardet/langthaimodel.rb +0 -201
- data/lib/rchardet/latin1prober.rb +0 -147
- data/lib/rchardet/mbcharsetprober.rb +0 -89
- data/lib/rchardet/mbcsgroupprober.rb +0 -47
- data/lib/rchardet/mbcssm.rb +0 -542
- data/lib/rchardet/sbcharsetprober.rb +0 -122
- data/lib/rchardet/sbcsgroupprober.rb +0 -58
- data/lib/rchardet/sjisprober.rb +0 -88
- data/lib/rchardet/universaldetector.rb +0 -179
- data/lib/rchardet/utf8prober.rb +0 -87
- data/lib/rchardet/version.rb +0 -3
- data/lib/rchardet.rb +0 -67
- data/lib/zip/central_directory.rb +0 -212
- data/lib/zip/compressor.rb +0 -9
- data/lib/zip/constants.rb +0 -115
- data/lib/zip/crypto/decrypted_io.rb +0 -40
- data/lib/zip/crypto/encryption.rb +0 -11
- data/lib/zip/crypto/null_encryption.rb +0 -43
- data/lib/zip/crypto/traditional_encryption.rb +0 -99
- data/lib/zip/decompressor.rb +0 -31
- data/lib/zip/deflater.rb +0 -34
- data/lib/zip/dos_time.rb +0 -53
- data/lib/zip/entry.rb +0 -719
- data/lib/zip/entry_set.rb +0 -88
- data/lib/zip/errors.rb +0 -19
- data/lib/zip/extra_field/generic.rb +0 -44
- data/lib/zip/extra_field/ntfs.rb +0 -94
- data/lib/zip/extra_field/old_unix.rb +0 -46
- data/lib/zip/extra_field/universal_time.rb +0 -77
- data/lib/zip/extra_field/unix.rb +0 -39
- data/lib/zip/extra_field/zip64.rb +0 -70
- data/lib/zip/extra_field/zip64_placeholder.rb +0 -15
- data/lib/zip/extra_field.rb +0 -103
- data/lib/zip/file.rb +0 -468
- data/lib/zip/filesystem.rb +0 -643
- data/lib/zip/inflater.rb +0 -54
- data/lib/zip/input_stream.rb +0 -180
- data/lib/zip/ioextras/abstract_input_stream.rb +0 -122
- data/lib/zip/ioextras/abstract_output_stream.rb +0 -43
- data/lib/zip/ioextras.rb +0 -36
- data/lib/zip/null_compressor.rb +0 -15
- data/lib/zip/null_decompressor.rb +0 -19
- data/lib/zip/null_input_stream.rb +0 -10
- data/lib/zip/output_stream.rb +0 -198
- data/lib/zip/pass_thru_compressor.rb +0 -23
- data/lib/zip/pass_thru_decompressor.rb +0 -31
- data/lib/zip/streamable_directory.rb +0 -15
- data/lib/zip/streamable_stream.rb +0 -52
- data/lib/zip/version.rb +0 -3
- data/lib/zip.rb +0 -72
data/lib/rchardet/mbcssm.rb
DELETED
@@ -1,542 +0,0 @@
|
|
1
|
-
######################## BEGIN LICENSE BLOCK ########################
|
2
|
-
# The Original Code is mozilla.org code.
|
3
|
-
#
|
4
|
-
# The Initial Developer of the Original Code is
|
5
|
-
# Netscape Communications Corporation.
|
6
|
-
# Portions created by the Initial Developer are Copyright (C) 1998
|
7
|
-
# the Initial Developer. All Rights Reserved.
|
8
|
-
#
|
9
|
-
# Contributor(s):
|
10
|
-
# Jeff Hodges - port to Ruby
|
11
|
-
# Mark Pilgrim - port to Python
|
12
|
-
#
|
13
|
-
# This library is free software; you can redistribute it and/or
|
14
|
-
# modify it under the terms of the GNU Lesser General Public
|
15
|
-
# License as published by the Free Software Foundation; either
|
16
|
-
# version 2.1 of the License, or (at your option) any later version.
|
17
|
-
#
|
18
|
-
# This library is distributed in the hope that it will be useful,
|
19
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
20
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
21
|
-
# Lesser General Public License for more details.
|
22
|
-
#
|
23
|
-
# You should have received a copy of the GNU Lesser General Public
|
24
|
-
# License along with this library; if not, write to the Free Software
|
25
|
-
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
26
|
-
# 02110-1301 USA
|
27
|
-
######################### END LICENSE BLOCK #########################
|
28
|
-
|
29
|
-
module CharDet
|
30
|
-
# BIG5
|
31
|
-
|
32
|
-
BIG5_cls = [
|
33
|
-
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
|
34
|
-
1,1,1,1,1,1,0,0, # 08 - 0f
|
35
|
-
1,1,1,1,1,1,1,1, # 10 - 17
|
36
|
-
1,1,1,0,1,1,1,1, # 18 - 1f
|
37
|
-
1,1,1,1,1,1,1,1, # 20 - 27
|
38
|
-
1,1,1,1,1,1,1,1, # 28 - 2f
|
39
|
-
1,1,1,1,1,1,1,1, # 30 - 37
|
40
|
-
1,1,1,1,1,1,1,1, # 38 - 3f
|
41
|
-
2,2,2,2,2,2,2,2, # 40 - 47
|
42
|
-
2,2,2,2,2,2,2,2, # 48 - 4f
|
43
|
-
2,2,2,2,2,2,2,2, # 50 - 57
|
44
|
-
2,2,2,2,2,2,2,2, # 58 - 5f
|
45
|
-
2,2,2,2,2,2,2,2, # 60 - 67
|
46
|
-
2,2,2,2,2,2,2,2, # 68 - 6f
|
47
|
-
2,2,2,2,2,2,2,2, # 70 - 77
|
48
|
-
2,2,2,2,2,2,2,1, # 78 - 7f
|
49
|
-
4,4,4,4,4,4,4,4, # 80 - 87
|
50
|
-
4,4,4,4,4,4,4,4, # 88 - 8f
|
51
|
-
4,4,4,4,4,4,4,4, # 90 - 97
|
52
|
-
4,4,4,4,4,4,4,4, # 98 - 9f
|
53
|
-
4,3,3,3,3,3,3,3, # a0 - a7
|
54
|
-
3,3,3,3,3,3,3,3, # a8 - af
|
55
|
-
3,3,3,3,3,3,3,3, # b0 - b7
|
56
|
-
3,3,3,3,3,3,3,3, # b8 - bf
|
57
|
-
3,3,3,3,3,3,3,3, # c0 - c7
|
58
|
-
3,3,3,3,3,3,3,3, # c8 - cf
|
59
|
-
3,3,3,3,3,3,3,3, # d0 - d7
|
60
|
-
3,3,3,3,3,3,3,3, # d8 - df
|
61
|
-
3,3,3,3,3,3,3,3, # e0 - e7
|
62
|
-
3,3,3,3,3,3,3,3, # e8 - ef
|
63
|
-
3,3,3,3,3,3,3,3, # f0 - f7
|
64
|
-
3,3,3,3,3,3,3,0 # f8 - ff
|
65
|
-
].freeze
|
66
|
-
|
67
|
-
BIG5_st = [
|
68
|
-
EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
|
69
|
-
EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,#08-0f
|
70
|
-
EError,EStart,EStart,EStart,EStart,EStart,EStart,EStart #10-17
|
71
|
-
].freeze
|
72
|
-
|
73
|
-
Big5CharLenTable = [0, 1, 1, 2, 0].freeze
|
74
|
-
|
75
|
-
Big5SMModel = {'classTable' => BIG5_cls,
|
76
|
-
'classFactor' => 5,
|
77
|
-
'stateTable' => BIG5_st,
|
78
|
-
'charLenTable' => Big5CharLenTable,
|
79
|
-
'name' => 'Big5'
|
80
|
-
}.freeze
|
81
|
-
|
82
|
-
# EUC-JP
|
83
|
-
|
84
|
-
EUCJP_cls = [
|
85
|
-
4,4,4,4,4,4,4,4, # 00 - 07
|
86
|
-
4,4,4,4,4,4,5,5, # 08 - 0f
|
87
|
-
4,4,4,4,4,4,4,4, # 10 - 17
|
88
|
-
4,4,4,5,4,4,4,4, # 18 - 1f
|
89
|
-
4,4,4,4,4,4,4,4, # 20 - 27
|
90
|
-
4,4,4,4,4,4,4,4, # 28 - 2f
|
91
|
-
4,4,4,4,4,4,4,4, # 30 - 37
|
92
|
-
4,4,4,4,4,4,4,4, # 38 - 3f
|
93
|
-
4,4,4,4,4,4,4,4, # 40 - 47
|
94
|
-
4,4,4,4,4,4,4,4, # 48 - 4f
|
95
|
-
4,4,4,4,4,4,4,4, # 50 - 57
|
96
|
-
4,4,4,4,4,4,4,4, # 58 - 5f
|
97
|
-
4,4,4,4,4,4,4,4, # 60 - 67
|
98
|
-
4,4,4,4,4,4,4,4, # 68 - 6f
|
99
|
-
4,4,4,4,4,4,4,4, # 70 - 77
|
100
|
-
4,4,4,4,4,4,4,4, # 78 - 7f
|
101
|
-
5,5,5,5,5,5,5,5, # 80 - 87
|
102
|
-
5,5,5,5,5,5,1,3, # 88 - 8f
|
103
|
-
5,5,5,5,5,5,5,5, # 90 - 97
|
104
|
-
5,5,5,5,5,5,5,5, # 98 - 9f
|
105
|
-
5,2,2,2,2,2,2,2, # a0 - a7
|
106
|
-
2,2,2,2,2,2,2,2, # a8 - af
|
107
|
-
2,2,2,2,2,2,2,2, # b0 - b7
|
108
|
-
2,2,2,2,2,2,2,2, # b8 - bf
|
109
|
-
2,2,2,2,2,2,2,2, # c0 - c7
|
110
|
-
2,2,2,2,2,2,2,2, # c8 - cf
|
111
|
-
2,2,2,2,2,2,2,2, # d0 - d7
|
112
|
-
2,2,2,2,2,2,2,2, # d8 - df
|
113
|
-
0,0,0,0,0,0,0,0, # e0 - e7
|
114
|
-
0,0,0,0,0,0,0,0, # e8 - ef
|
115
|
-
0,0,0,0,0,0,0,0, # f0 - f7
|
116
|
-
0,0,0,0,0,0,0,5 # f8 - ff
|
117
|
-
].freeze
|
118
|
-
|
119
|
-
EUCJP_st = [
|
120
|
-
3, 4, 3, 5,EStart,EError,EError,EError,#00-07
|
121
|
-
EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
|
122
|
-
EItsMe,EItsMe,EStart,EError,EStart,EError,EError,EError,#10-17
|
123
|
-
EError,EError,EStart,EError,EError,EError, 3,EError,#18-1f
|
124
|
-
3,EError,EError,EError,EStart,EStart,EStart,EStart #20-27
|
125
|
-
].freeze
|
126
|
-
|
127
|
-
EUCJPCharLenTable = [2, 2, 2, 3, 1, 0].freeze
|
128
|
-
|
129
|
-
EUCJPSMModel = {'classTable' => EUCJP_cls,
|
130
|
-
'classFactor' => 6,
|
131
|
-
'stateTable' => EUCJP_st,
|
132
|
-
'charLenTable' => EUCJPCharLenTable,
|
133
|
-
'name' => 'EUC-JP'
|
134
|
-
}.freeze
|
135
|
-
|
136
|
-
# EUC-KR
|
137
|
-
|
138
|
-
EUCKR_cls = [
|
139
|
-
1,1,1,1,1,1,1,1, # 00 - 07
|
140
|
-
1,1,1,1,1,1,0,0, # 08 - 0f
|
141
|
-
1,1,1,1,1,1,1,1, # 10 - 17
|
142
|
-
1,1,1,0,1,1,1,1, # 18 - 1f
|
143
|
-
1,1,1,1,1,1,1,1, # 20 - 27
|
144
|
-
1,1,1,1,1,1,1,1, # 28 - 2f
|
145
|
-
1,1,1,1,1,1,1,1, # 30 - 37
|
146
|
-
1,1,1,1,1,1,1,1, # 38 - 3f
|
147
|
-
1,1,1,1,1,1,1,1, # 40 - 47
|
148
|
-
1,1,1,1,1,1,1,1, # 48 - 4f
|
149
|
-
1,1,1,1,1,1,1,1, # 50 - 57
|
150
|
-
1,1,1,1,1,1,1,1, # 58 - 5f
|
151
|
-
1,1,1,1,1,1,1,1, # 60 - 67
|
152
|
-
1,1,1,1,1,1,1,1, # 68 - 6f
|
153
|
-
1,1,1,1,1,1,1,1, # 70 - 77
|
154
|
-
1,1,1,1,1,1,1,1, # 78 - 7f
|
155
|
-
0,0,0,0,0,0,0,0, # 80 - 87
|
156
|
-
0,0,0,0,0,0,0,0, # 88 - 8f
|
157
|
-
0,0,0,0,0,0,0,0, # 90 - 97
|
158
|
-
0,0,0,0,0,0,0,0, # 98 - 9f
|
159
|
-
0,2,2,2,2,2,2,2, # a0 - a7
|
160
|
-
2,2,2,2,2,3,3,3, # a8 - af
|
161
|
-
2,2,2,2,2,2,2,2, # b0 - b7
|
162
|
-
2,2,2,2,2,2,2,2, # b8 - bf
|
163
|
-
2,2,2,2,2,2,2,2, # c0 - c7
|
164
|
-
2,3,2,2,2,2,2,2, # c8 - cf
|
165
|
-
2,2,2,2,2,2,2,2, # d0 - d7
|
166
|
-
2,2,2,2,2,2,2,2, # d8 - df
|
167
|
-
2,2,2,2,2,2,2,2, # e0 - e7
|
168
|
-
2,2,2,2,2,2,2,2, # e8 - ef
|
169
|
-
2,2,2,2,2,2,2,2, # f0 - f7
|
170
|
-
2,2,2,2,2,2,2,0 # f8 - ff
|
171
|
-
].freeze
|
172
|
-
|
173
|
-
EUCKR_st = [
|
174
|
-
EError,EStart, 3,EError,EError,EError,EError,EError,#00-07
|
175
|
-
EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,EStart#08-0f
|
176
|
-
].freeze
|
177
|
-
|
178
|
-
EUCKRCharLenTable = [0, 1, 2, 0].freeze
|
179
|
-
|
180
|
-
EUCKRSMModel = {'classTable' => EUCKR_cls,
|
181
|
-
'classFactor' => 4,
|
182
|
-
'stateTable' => EUCKR_st,
|
183
|
-
'charLenTable' => EUCKRCharLenTable,
|
184
|
-
'name' => 'EUC-KR'
|
185
|
-
}.freeze
|
186
|
-
|
187
|
-
# EUC-TW
|
188
|
-
|
189
|
-
EUCTW_cls = [
|
190
|
-
2,2,2,2,2,2,2,2, # 00 - 07
|
191
|
-
2,2,2,2,2,2,0,0, # 08 - 0f
|
192
|
-
2,2,2,2,2,2,2,2, # 10 - 17
|
193
|
-
2,2,2,0,2,2,2,2, # 18 - 1f
|
194
|
-
2,2,2,2,2,2,2,2, # 20 - 27
|
195
|
-
2,2,2,2,2,2,2,2, # 28 - 2f
|
196
|
-
2,2,2,2,2,2,2,2, # 30 - 37
|
197
|
-
2,2,2,2,2,2,2,2, # 38 - 3f
|
198
|
-
2,2,2,2,2,2,2,2, # 40 - 47
|
199
|
-
2,2,2,2,2,2,2,2, # 48 - 4f
|
200
|
-
2,2,2,2,2,2,2,2, # 50 - 57
|
201
|
-
2,2,2,2,2,2,2,2, # 58 - 5f
|
202
|
-
2,2,2,2,2,2,2,2, # 60 - 67
|
203
|
-
2,2,2,2,2,2,2,2, # 68 - 6f
|
204
|
-
2,2,2,2,2,2,2,2, # 70 - 77
|
205
|
-
2,2,2,2,2,2,2,2, # 78 - 7f
|
206
|
-
0,0,0,0,0,0,0,0, # 80 - 87
|
207
|
-
0,0,0,0,0,0,6,0, # 88 - 8f
|
208
|
-
0,0,0,0,0,0,0,0, # 90 - 97
|
209
|
-
0,0,0,0,0,0,0,0, # 98 - 9f
|
210
|
-
0,3,4,4,4,4,4,4, # a0 - a7
|
211
|
-
5,5,1,1,1,1,1,1, # a8 - af
|
212
|
-
1,1,1,1,1,1,1,1, # b0 - b7
|
213
|
-
1,1,1,1,1,1,1,1, # b8 - bf
|
214
|
-
1,1,3,1,3,3,3,3, # c0 - c7
|
215
|
-
3,3,3,3,3,3,3,3, # c8 - cf
|
216
|
-
3,3,3,3,3,3,3,3, # d0 - d7
|
217
|
-
3,3,3,3,3,3,3,3, # d8 - df
|
218
|
-
3,3,3,3,3,3,3,3, # e0 - e7
|
219
|
-
3,3,3,3,3,3,3,3, # e8 - ef
|
220
|
-
3,3,3,3,3,3,3,3, # f0 - f7
|
221
|
-
3,3,3,3,3,3,3,0 # f8 - ff
|
222
|
-
].freeze
|
223
|
-
|
224
|
-
EUCTW_st = [
|
225
|
-
EError,EError,EStart, 3, 3, 3, 4,EError,#00-07
|
226
|
-
EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
|
227
|
-
EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EStart,EError,#10-17
|
228
|
-
EStart,EStart,EStart,EError,EError,EError,EError,EError,#18-1f
|
229
|
-
5,EError,EError,EError,EStart,EError,EStart,EStart,#20-27
|
230
|
-
EStart,EError,EStart,EStart,EStart,EStart,EStart,EStart #28-2f
|
231
|
-
].freeze
|
232
|
-
|
233
|
-
EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3].freeze
|
234
|
-
|
235
|
-
EUCTWSMModel = {'classTable' => EUCTW_cls,
|
236
|
-
'classFactor' => 7,
|
237
|
-
'stateTable' => EUCTW_st,
|
238
|
-
'charLenTable' => EUCTWCharLenTable,
|
239
|
-
'name' => 'x-euc-tw'
|
240
|
-
}.freeze
|
241
|
-
|
242
|
-
# GB18030
|
243
|
-
|
244
|
-
GB18030_cls = [
|
245
|
-
1,1,1,1,1,1,1,1, # 00 - 07
|
246
|
-
1,1,1,1,1,1,0,0, # 08 - 0f
|
247
|
-
1,1,1,1,1,1,1,1, # 10 - 17
|
248
|
-
1,1,1,0,1,1,1,1, # 18 - 1f
|
249
|
-
1,1,1,1,1,1,1,1, # 20 - 27
|
250
|
-
1,1,1,1,1,1,1,1, # 28 - 2f
|
251
|
-
3,3,3,3,3,3,3,3, # 30 - 37
|
252
|
-
3,3,1,1,1,1,1,1, # 38 - 3f
|
253
|
-
2,2,2,2,2,2,2,2, # 40 - 47
|
254
|
-
2,2,2,2,2,2,2,2, # 48 - 4f
|
255
|
-
2,2,2,2,2,2,2,2, # 50 - 57
|
256
|
-
2,2,2,2,2,2,2,2, # 58 - 5f
|
257
|
-
2,2,2,2,2,2,2,2, # 60 - 67
|
258
|
-
2,2,2,2,2,2,2,2, # 68 - 6f
|
259
|
-
2,2,2,2,2,2,2,2, # 70 - 77
|
260
|
-
2,2,2,2,2,2,2,4, # 78 - 7f
|
261
|
-
5,6,6,6,6,6,6,6, # 80 - 87
|
262
|
-
6,6,6,6,6,6,6,6, # 88 - 8f
|
263
|
-
6,6,6,6,6,6,6,6, # 90 - 97
|
264
|
-
6,6,6,6,6,6,6,6, # 98 - 9f
|
265
|
-
6,6,6,6,6,6,6,6, # a0 - a7
|
266
|
-
6,6,6,6,6,6,6,6, # a8 - af
|
267
|
-
6,6,6,6,6,6,6,6, # b0 - b7
|
268
|
-
6,6,6,6,6,6,6,6, # b8 - bf
|
269
|
-
6,6,6,6,6,6,6,6, # c0 - c7
|
270
|
-
6,6,6,6,6,6,6,6, # c8 - cf
|
271
|
-
6,6,6,6,6,6,6,6, # d0 - d7
|
272
|
-
6,6,6,6,6,6,6,6, # d8 - df
|
273
|
-
6,6,6,6,6,6,6,6, # e0 - e7
|
274
|
-
6,6,6,6,6,6,6,6, # e8 - ef
|
275
|
-
6,6,6,6,6,6,6,6, # f0 - f7
|
276
|
-
6,6,6,6,6,6,6,0 # f8 - ff
|
277
|
-
].freeze
|
278
|
-
|
279
|
-
GB18030_st = [
|
280
|
-
EError,EStart,EStart,EStart,EStart,EStart, 3,EError,#00-07
|
281
|
-
EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
|
282
|
-
EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,#10-17
|
283
|
-
4,EError,EStart,EStart,EError,EError,EError,EError,#18-1f
|
284
|
-
EError,EError, 5,EError,EError,EError,EItsMe,EError,#20-27
|
285
|
-
EError,EError,EStart,EStart,EStart,EStart,EStart,EStart#28-2f
|
286
|
-
].freeze
|
287
|
-
|
288
|
-
# To be accurate, the length of class 6 can be either 2 or 4.
|
289
|
-
# But it is not necessary to discriminate between the two since
|
290
|
-
# it is used for frequency analysis only, and we are validing
|
291
|
-
# each code range there as well. So it is safe to set it to be
|
292
|
-
# 2 here.
|
293
|
-
GB18030CharLenTable = [0, 1, 1, 1, 1, 1, 2].freeze
|
294
|
-
|
295
|
-
GB18030SMModel = {'classTable' => GB18030_cls,
|
296
|
-
'classFactor' => 7,
|
297
|
-
'stateTable' => GB18030_st,
|
298
|
-
'charLenTable' => GB18030CharLenTable,
|
299
|
-
'name' => 'GB18030'
|
300
|
-
}.freeze
|
301
|
-
|
302
|
-
# Shift_JIS
|
303
|
-
|
304
|
-
SJIS_cls = [
|
305
|
-
1,1,1,1,1,1,1,1, # 00 - 07
|
306
|
-
1,1,1,1,1,1,0,0, # 08 - 0f
|
307
|
-
1,1,1,1,1,1,1,1, # 10 - 17
|
308
|
-
1,1,1,0,1,1,1,1, # 18 - 1f
|
309
|
-
1,1,1,1,1,1,1,1, # 20 - 27
|
310
|
-
1,1,1,1,1,1,1,1, # 28 - 2f
|
311
|
-
1,1,1,1,1,1,1,1, # 30 - 37
|
312
|
-
1,1,1,1,1,1,1,1, # 38 - 3f
|
313
|
-
2,2,2,2,2,2,2,2, # 40 - 47
|
314
|
-
2,2,2,2,2,2,2,2, # 48 - 4f
|
315
|
-
2,2,2,2,2,2,2,2, # 50 - 57
|
316
|
-
2,2,2,2,2,2,2,2, # 58 - 5f
|
317
|
-
2,2,2,2,2,2,2,2, # 60 - 67
|
318
|
-
2,2,2,2,2,2,2,2, # 68 - 6f
|
319
|
-
2,2,2,2,2,2,2,2, # 70 - 77
|
320
|
-
2,2,2,2,2,2,2,1, # 78 - 7f
|
321
|
-
3,3,3,3,3,3,3,3, # 80 - 87
|
322
|
-
3,3,3,3,3,3,3,3, # 88 - 8f
|
323
|
-
3,3,3,3,3,3,3,3, # 90 - 97
|
324
|
-
3,3,3,3,3,3,3,3, # 98 - 9f
|
325
|
-
#0xa0 is illegal in sjis encoding, but some pages does
|
326
|
-
#contain such byte. We need to be more error forgiven.
|
327
|
-
2,2,2,2,2,2,2,2, # a0 - a7
|
328
|
-
2,2,2,2,2,2,2,2, # a8 - af
|
329
|
-
2,2,2,2,2,2,2,2, # b0 - b7
|
330
|
-
2,2,2,2,2,2,2,2, # b8 - bf
|
331
|
-
2,2,2,2,2,2,2,2, # c0 - c7
|
332
|
-
2,2,2,2,2,2,2,2, # c8 - cf
|
333
|
-
2,2,2,2,2,2,2,2, # d0 - d7
|
334
|
-
2,2,2,2,2,2,2,2, # d8 - df
|
335
|
-
3,3,3,3,3,3,3,3, # e0 - e7
|
336
|
-
3,3,3,3,3,4,4,4, # e8 - ef
|
337
|
-
4,4,4,4,4,4,4,4, # f0 - f7
|
338
|
-
4,4,4,4,4,0,0,0 # f8 - ff
|
339
|
-
].freeze
|
340
|
-
|
341
|
-
SJIS_st = [
|
342
|
-
EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
|
343
|
-
EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
|
344
|
-
EItsMe,EItsMe,EError,EError,EStart,EStart,EStart,EStart#10-17
|
345
|
-
].freeze
|
346
|
-
|
347
|
-
SJISCharLenTable = [0, 1, 1, 2, 0, 0].freeze
|
348
|
-
|
349
|
-
SJISSMModel = {'classTable' => SJIS_cls,
|
350
|
-
'classFactor' => 6,
|
351
|
-
'stateTable' => SJIS_st,
|
352
|
-
'charLenTable' => SJISCharLenTable,
|
353
|
-
'name' => 'Shift_JIS'
|
354
|
-
}.freeze
|
355
|
-
|
356
|
-
# UCS2-BE
|
357
|
-
|
358
|
-
UCS2BE_cls = [
|
359
|
-
0,0,0,0,0,0,0,0, # 00 - 07
|
360
|
-
0,0,1,0,0,2,0,0, # 08 - 0f
|
361
|
-
0,0,0,0,0,0,0,0, # 10 - 17
|
362
|
-
0,0,0,3,0,0,0,0, # 18 - 1f
|
363
|
-
0,0,0,0,0,0,0,0, # 20 - 27
|
364
|
-
0,3,3,3,3,3,0,0, # 28 - 2f
|
365
|
-
0,0,0,0,0,0,0,0, # 30 - 37
|
366
|
-
0,0,0,0,0,0,0,0, # 38 - 3f
|
367
|
-
0,0,0,0,0,0,0,0, # 40 - 47
|
368
|
-
0,0,0,0,0,0,0,0, # 48 - 4f
|
369
|
-
0,0,0,0,0,0,0,0, # 50 - 57
|
370
|
-
0,0,0,0,0,0,0,0, # 58 - 5f
|
371
|
-
0,0,0,0,0,0,0,0, # 60 - 67
|
372
|
-
0,0,0,0,0,0,0,0, # 68 - 6f
|
373
|
-
0,0,0,0,0,0,0,0, # 70 - 77
|
374
|
-
0,0,0,0,0,0,0,0, # 78 - 7f
|
375
|
-
0,0,0,0,0,0,0,0, # 80 - 87
|
376
|
-
0,0,0,0,0,0,0,0, # 88 - 8f
|
377
|
-
0,0,0,0,0,0,0,0, # 90 - 97
|
378
|
-
0,0,0,0,0,0,0,0, # 98 - 9f
|
379
|
-
0,0,0,0,0,0,0,0, # a0 - a7
|
380
|
-
0,0,0,0,0,0,0,0, # a8 - af
|
381
|
-
0,0,0,0,0,0,0,0, # b0 - b7
|
382
|
-
0,0,0,0,0,0,0,0, # b8 - bf
|
383
|
-
0,0,0,0,0,0,0,0, # c0 - c7
|
384
|
-
0,0,0,0,0,0,0,0, # c8 - cf
|
385
|
-
0,0,0,0,0,0,0,0, # d0 - d7
|
386
|
-
0,0,0,0,0,0,0,0, # d8 - df
|
387
|
-
0,0,0,0,0,0,0,0, # e0 - e7
|
388
|
-
0,0,0,0,0,0,0,0, # e8 - ef
|
389
|
-
0,0,0,0,0,0,0,0, # f0 - f7
|
390
|
-
0,0,0,0,0,0,4,5 # f8 - ff
|
391
|
-
].freeze
|
392
|
-
|
393
|
-
UCS2BE_st = [
|
394
|
-
5, 7, 7,EError, 4, 3,EError,EError,#00-07
|
395
|
-
EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
|
396
|
-
EItsMe,EItsMe, 6, 6, 6, 6,EError,EError,#10-17
|
397
|
-
6, 6, 6, 6, 6,EItsMe, 6, 6,#18-1f
|
398
|
-
6, 6, 6, 6, 5, 7, 7,EError,#20-27
|
399
|
-
5, 8, 6, 6,EError, 6, 6, 6,#28-2f
|
400
|
-
6, 6, 6, 6,EError,EError,EStart,EStart#30-37
|
401
|
-
].freeze
|
402
|
-
|
403
|
-
UCS2BECharLenTable = [2, 2, 2, 0, 2, 2].freeze
|
404
|
-
|
405
|
-
UCS2BESMModel = {'classTable' => UCS2BE_cls,
|
406
|
-
'classFactor' => 6,
|
407
|
-
'stateTable' => UCS2BE_st,
|
408
|
-
'charLenTable' => UCS2BECharLenTable,
|
409
|
-
'name' => 'UTF-16BE'
|
410
|
-
}.freeze
|
411
|
-
|
412
|
-
# UCS2-LE
|
413
|
-
|
414
|
-
UCS2LE_cls = [
|
415
|
-
0,0,0,0,0,0,0,0, # 00 - 07
|
416
|
-
0,0,1,0,0,2,0,0, # 08 - 0f
|
417
|
-
0,0,0,0,0,0,0,0, # 10 - 17
|
418
|
-
0,0,0,3,0,0,0,0, # 18 - 1f
|
419
|
-
0,0,0,0,0,0,0,0, # 20 - 27
|
420
|
-
0,3,3,3,3,3,0,0, # 28 - 2f
|
421
|
-
0,0,0,0,0,0,0,0, # 30 - 37
|
422
|
-
0,0,0,0,0,0,0,0, # 38 - 3f
|
423
|
-
0,0,0,0,0,0,0,0, # 40 - 47
|
424
|
-
0,0,0,0,0,0,0,0, # 48 - 4f
|
425
|
-
0,0,0,0,0,0,0,0, # 50 - 57
|
426
|
-
0,0,0,0,0,0,0,0, # 58 - 5f
|
427
|
-
0,0,0,0,0,0,0,0, # 60 - 67
|
428
|
-
0,0,0,0,0,0,0,0, # 68 - 6f
|
429
|
-
0,0,0,0,0,0,0,0, # 70 - 77
|
430
|
-
0,0,0,0,0,0,0,0, # 78 - 7f
|
431
|
-
0,0,0,0,0,0,0,0, # 80 - 87
|
432
|
-
0,0,0,0,0,0,0,0, # 88 - 8f
|
433
|
-
0,0,0,0,0,0,0,0, # 90 - 97
|
434
|
-
0,0,0,0,0,0,0,0, # 98 - 9f
|
435
|
-
0,0,0,0,0,0,0,0, # a0 - a7
|
436
|
-
0,0,0,0,0,0,0,0, # a8 - af
|
437
|
-
0,0,0,0,0,0,0,0, # b0 - b7
|
438
|
-
0,0,0,0,0,0,0,0, # b8 - bf
|
439
|
-
0,0,0,0,0,0,0,0, # c0 - c7
|
440
|
-
0,0,0,0,0,0,0,0, # c8 - cf
|
441
|
-
0,0,0,0,0,0,0,0, # d0 - d7
|
442
|
-
0,0,0,0,0,0,0,0, # d8 - df
|
443
|
-
0,0,0,0,0,0,0,0, # e0 - e7
|
444
|
-
0,0,0,0,0,0,0,0, # e8 - ef
|
445
|
-
0,0,0,0,0,0,0,0, # f0 - f7
|
446
|
-
0,0,0,0,0,0,4,5 # f8 - ff
|
447
|
-
].freeze
|
448
|
-
|
449
|
-
UCS2LE_st = [
|
450
|
-
6, 6, 7, 6, 4, 3,EError,EError,#00-07
|
451
|
-
EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
|
452
|
-
EItsMe,EItsMe, 5, 5, 5,EError,EItsMe,EError,#10-17
|
453
|
-
5, 5, 5,EError, 5,EError, 6, 6,#18-1f
|
454
|
-
7, 6, 8, 8, 5, 5, 5,EError,#20-27
|
455
|
-
5, 5, 5,EError,EError,EError, 5, 5,#28-2f
|
456
|
-
5, 5, 5,EError, 5,EError,EStart,EStart#30-37
|
457
|
-
].freeze
|
458
|
-
|
459
|
-
UCS2LECharLenTable = [2, 2, 2, 2, 2, 2].freeze
|
460
|
-
|
461
|
-
UCS2LESMModel = {'classTable' => UCS2LE_cls,
|
462
|
-
'classFactor' => 6,
|
463
|
-
'stateTable' => UCS2LE_st,
|
464
|
-
'charLenTable' => UCS2LECharLenTable,
|
465
|
-
'name' => 'UTF-16LE'
|
466
|
-
}.freeze
|
467
|
-
|
468
|
-
# UTF-8
|
469
|
-
|
470
|
-
UTF8_cls = [
|
471
|
-
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
|
472
|
-
1,1,1,1,1,1,0,0, # 08 - 0f
|
473
|
-
1,1,1,1,1,1,1,1, # 10 - 17
|
474
|
-
1,1,1,0,1,1,1,1, # 18 - 1f
|
475
|
-
1,1,1,1,1,1,1,1, # 20 - 27
|
476
|
-
1,1,1,1,1,1,1,1, # 28 - 2f
|
477
|
-
1,1,1,1,1,1,1,1, # 30 - 37
|
478
|
-
1,1,1,1,1,1,1,1, # 38 - 3f
|
479
|
-
1,1,1,1,1,1,1,1, # 40 - 47
|
480
|
-
1,1,1,1,1,1,1,1, # 48 - 4f
|
481
|
-
1,1,1,1,1,1,1,1, # 50 - 57
|
482
|
-
1,1,1,1,1,1,1,1, # 58 - 5f
|
483
|
-
1,1,1,1,1,1,1,1, # 60 - 67
|
484
|
-
1,1,1,1,1,1,1,1, # 68 - 6f
|
485
|
-
1,1,1,1,1,1,1,1, # 70 - 77
|
486
|
-
1,1,1,1,1,1,1,1, # 78 - 7f
|
487
|
-
2,2,2,2,3,3,3,3, # 80 - 87
|
488
|
-
4,4,4,4,4,4,4,4, # 88 - 8f
|
489
|
-
4,4,4,4,4,4,4,4, # 90 - 97
|
490
|
-
4,4,4,4,4,4,4,4, # 98 - 9f
|
491
|
-
5,5,5,5,5,5,5,5, # a0 - a7
|
492
|
-
5,5,5,5,5,5,5,5, # a8 - af
|
493
|
-
5,5,5,5,5,5,5,5, # b0 - b7
|
494
|
-
5,5,5,5,5,5,5,5, # b8 - bf
|
495
|
-
0,0,6,6,6,6,6,6, # c0 - c7
|
496
|
-
6,6,6,6,6,6,6,6, # c8 - cf
|
497
|
-
6,6,6,6,6,6,6,6, # d0 - d7
|
498
|
-
6,6,6,6,6,6,6,6, # d8 - df
|
499
|
-
7,8,8,8,8,8,8,8, # e0 - e7
|
500
|
-
8,8,8,8,8,9,8,8, # e8 - ef
|
501
|
-
10,11,11,11,11,11,11,11, # f0 - f7
|
502
|
-
12,13,13,13,14,15,0,0 # f8 - ff
|
503
|
-
].freeze
|
504
|
-
|
505
|
-
UTF8_st = [
|
506
|
-
EError,EStart,EError,EError,EError,EError, 12, 10,#00-07
|
507
|
-
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
|
508
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#10-17
|
509
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#18-1f
|
510
|
-
EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,#20-27
|
511
|
-
EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,#28-2f
|
512
|
-
EError,EError, 5, 5, 5, 5,EError,EError,#30-37
|
513
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#38-3f
|
514
|
-
EError,EError,EError, 5, 5, 5,EError,EError,#40-47
|
515
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#48-4f
|
516
|
-
EError,EError, 7, 7, 7, 7,EError,EError,#50-57
|
517
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#58-5f
|
518
|
-
EError,EError,EError,EError, 7, 7,EError,EError,#60-67
|
519
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#68-6f
|
520
|
-
EError,EError, 9, 9, 9, 9,EError,EError,#70-77
|
521
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#78-7f
|
522
|
-
EError,EError,EError,EError,EError, 9,EError,EError,#80-87
|
523
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#88-8f
|
524
|
-
EError,EError, 12, 12, 12, 12,EError,EError,#90-97
|
525
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#98-9f
|
526
|
-
EError,EError,EError,EError,EError, 12,EError,EError,#a0-a7
|
527
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#a8-af
|
528
|
-
EError,EError, 12, 12, 12,EError,EError,EError,#b0-b7
|
529
|
-
EError,EError,EError,EError,EError,EError,EError,EError,#b8-bf
|
530
|
-
EError,EError,EStart,EStart,EStart,EStart,EError,EError,#c0-c7
|
531
|
-
EError,EError,EError,EError,EError,EError,EError,EError#c8-cf
|
532
|
-
].freeze
|
533
|
-
|
534
|
-
UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6].freeze
|
535
|
-
|
536
|
-
UTF8SMModel = {'classTable' => UTF8_cls,
|
537
|
-
'classFactor' => 16,
|
538
|
-
'stateTable' => UTF8_st,
|
539
|
-
'charLenTable' => UTF8CharLenTable,
|
540
|
-
'name' => 'UTF-8'
|
541
|
-
}.freeze
|
542
|
-
end
|
@@ -1,122 +0,0 @@
|
|
1
|
-
######################## BEGIN LICENSE BLOCK ########################
|
2
|
-
# The Original Code is Mozilla Universal charset detector code.
|
3
|
-
#
|
4
|
-
# The Initial Developer of the Original Code is
|
5
|
-
# Netscape Communications Corporation.
|
6
|
-
# Portions created by the Initial Developer are Copyright (C) 2001
|
7
|
-
# the Initial Developer. All Rights Reserved.
|
8
|
-
#
|
9
|
-
# Contributor(s):
|
10
|
-
# Jeff Hodges - port to Ruby
|
11
|
-
# Mark Pilgrim - port to Python
|
12
|
-
# Shy Shalom - original C code
|
13
|
-
#
|
14
|
-
# This library is free software; you can redistribute it and/or
|
15
|
-
# modify it under the terms of the GNU Lesser General Public
|
16
|
-
# License as published by the Free Software Foundation; either
|
17
|
-
# version 2.1 of the License, or (at your option) any later version.
|
18
|
-
#
|
19
|
-
# This library is distributed in the hope that it will be useful,
|
20
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
21
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
22
|
-
# Lesser General Public License for more details.
|
23
|
-
#
|
24
|
-
# You should have received a copy of the GNU Lesser General Public
|
25
|
-
# License along with this library; if not, write to the Free Software
|
26
|
-
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
27
|
-
# 02110-1301 USA
|
28
|
-
######################### END LICENSE BLOCK #########################
|
29
|
-
|
30
|
-
module CharDet
|
31
|
-
SAMPLE_SIZE = 64
|
32
|
-
SB_ENOUGH_REL_THRESHOLD = 1024
|
33
|
-
POSITIVE_SHORTCUT_THRESHOLD = 0.95
|
34
|
-
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
|
35
|
-
SYMBOL_CAT_ORDER = 250
|
36
|
-
NUMBER_OF_SEQ_CAT = 4
|
37
|
-
POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
|
38
|
-
#NEGATIVE_CAT = 0
|
39
|
-
|
40
|
-
class SingleByteCharSetProber < CharSetProber
|
41
|
-
def initialize(model, reversed=false, nameProber=nil)
|
42
|
-
super()
|
43
|
-
@model = model
|
44
|
-
@reversed = reversed # TRUE if we need to reverse every pair in the model lookup
|
45
|
-
@nameProber = nameProber # Optional auxiliary prober for name decision
|
46
|
-
reset()
|
47
|
-
end
|
48
|
-
|
49
|
-
def reset
|
50
|
-
super()
|
51
|
-
@lastOrder = 255 # char order of last character
|
52
|
-
@seqCounters = [0] * NUMBER_OF_SEQ_CAT
|
53
|
-
@totalSeqs = 0
|
54
|
-
@totalChar = 0
|
55
|
-
@freqChar = 0 # characters that fall in our sampling range
|
56
|
-
end
|
57
|
-
|
58
|
-
def get_charset_name
|
59
|
-
if @nameProber
|
60
|
-
return @nameProber.get_charset_name()
|
61
|
-
else
|
62
|
-
return @model['charsetName']
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def feed(aBuf)
|
67
|
-
if !@model['keepEnglishLetter']
|
68
|
-
aBuf = filter_without_english_letters(aBuf)
|
69
|
-
end
|
70
|
-
aLen = aBuf.length
|
71
|
-
if aLen == 0
|
72
|
-
return get_state()
|
73
|
-
end
|
74
|
-
aBuf.each_byte do |b|
|
75
|
-
c = b.chr
|
76
|
-
order = @model['charToOrderMap'][c.bytes.first]
|
77
|
-
if order < SYMBOL_CAT_ORDER
|
78
|
-
@totalChar += 1
|
79
|
-
end
|
80
|
-
if order < SAMPLE_SIZE
|
81
|
-
@freqChar += 1
|
82
|
-
if @lastOrder < SAMPLE_SIZE
|
83
|
-
@totalSeqs += 1
|
84
|
-
if !@reversed
|
85
|
-
@seqCounters[@model['precedenceMatrix'][(@lastOrder * SAMPLE_SIZE) + order]] += 1
|
86
|
-
else # reverse the order of the letters in the lookup
|
87
|
-
@seqCounters[@model['precedenceMatrix'][(order * SAMPLE_SIZE) + @lastOrder]] += 1
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|
91
|
-
@lastOrder = order
|
92
|
-
end
|
93
|
-
|
94
|
-
if get_state() == EDetecting
|
95
|
-
if @totalSeqs > SB_ENOUGH_REL_THRESHOLD
|
96
|
-
cf = get_confidence()
|
97
|
-
if cf > POSITIVE_SHORTCUT_THRESHOLD
|
98
|
-
$stderr << "#{@model['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
|
99
|
-
@state = EFoundIt
|
100
|
-
elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
|
101
|
-
$stderr << "#{@model['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
|
102
|
-
@state = ENotMe
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
return get_state()
|
108
|
-
end
|
109
|
-
|
110
|
-
def get_confidence
|
111
|
-
r = 0.01
|
112
|
-
if @totalSeqs > 0
|
113
|
-
r = (1.0 * @seqCounters[POSITIVE_CAT]) / @totalSeqs / @model['mTypicalPositiveRatio']
|
114
|
-
r = r * @freqChar / @totalChar
|
115
|
-
if r >= 1.0
|
116
|
-
r = 0.99
|
117
|
-
end
|
118
|
-
end
|
119
|
-
return r
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|