lg_pod_plugin 1.0.8 → 1.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/bin/lg +5 -0
  3. data/lib/command/cache.rb +22 -18
  4. data/lib/command/command.rb +27 -35
  5. data/lib/command/install.rb +52 -0
  6. data/lib/command/update.rb +39 -0
  7. data/lib/lg_pod_plugin/downloader.rb +12 -17
  8. data/lib/lg_pod_plugin/git_util.rb +150 -106
  9. data/lib/lg_pod_plugin/install.rb +74 -88
  10. data/lib/lg_pod_plugin/l_cache.rb +11 -77
  11. data/lib/lg_pod_plugin/l_util.rb +6 -2
  12. data/lib/lg_pod_plugin/request.rb +90 -82
  13. data/lib/lg_pod_plugin/version.rb +1 -1
  14. data/lib/lg_pod_plugin.rb +9 -5
  15. metadata +57 -118
  16. data/lib/git/author.rb +0 -14
  17. data/lib/git/base/factory.rb +0 -101
  18. data/lib/git/base.rb +0 -670
  19. data/lib/git/branch.rb +0 -126
  20. data/lib/git/branches.rb +0 -71
  21. data/lib/git/config.rb +0 -22
  22. data/lib/git/diff.rb +0 -155
  23. data/lib/git/encoding_utils.rb +0 -33
  24. data/lib/git/escaped_path.rb +0 -77
  25. data/lib/git/index.rb +0 -5
  26. data/lib/git/lib.rb +0 -1215
  27. data/lib/git/log.rb +0 -135
  28. data/lib/git/object.rb +0 -312
  29. data/lib/git/path.rb +0 -31
  30. data/lib/git/remote.rb +0 -36
  31. data/lib/git/repository.rb +0 -6
  32. data/lib/git/stash.rb +0 -27
  33. data/lib/git/stashes.rb +0 -55
  34. data/lib/git/status.rb +0 -199
  35. data/lib/git/url.rb +0 -127
  36. data/lib/git/version.rb +0 -5
  37. data/lib/git/working_directory.rb +0 -4
  38. data/lib/git/worktree.rb +0 -38
  39. data/lib/git/worktrees.rb +0 -47
  40. data/lib/git.rb +0 -326
  41. data/lib/rchardet/big5freq.rb +0 -927
  42. data/lib/rchardet/big5prober.rb +0 -42
  43. data/lib/rchardet/chardistribution.rb +0 -250
  44. data/lib/rchardet/charsetgroupprober.rb +0 -110
  45. data/lib/rchardet/charsetprober.rb +0 -70
  46. data/lib/rchardet/codingstatemachine.rb +0 -67
  47. data/lib/rchardet/constants.rb +0 -42
  48. data/lib/rchardet/escprober.rb +0 -90
  49. data/lib/rchardet/escsm.rb +0 -245
  50. data/lib/rchardet/eucjpprober.rb +0 -88
  51. data/lib/rchardet/euckrfreq.rb +0 -597
  52. data/lib/rchardet/euckrprober.rb +0 -42
  53. data/lib/rchardet/euctwfreq.rb +0 -431
  54. data/lib/rchardet/euctwprober.rb +0 -42
  55. data/lib/rchardet/gb18030freq.rb +0 -474
  56. data/lib/rchardet/gb18030prober.rb +0 -42
  57. data/lib/rchardet/hebrewprober.rb +0 -289
  58. data/lib/rchardet/jisfreq.rb +0 -571
  59. data/lib/rchardet/jpcntx.rb +0 -229
  60. data/lib/rchardet/langbulgarianmodel.rb +0 -229
  61. data/lib/rchardet/langcyrillicmodel.rb +0 -330
  62. data/lib/rchardet/langgreekmodel.rb +0 -227
  63. data/lib/rchardet/langhebrewmodel.rb +0 -202
  64. data/lib/rchardet/langhungarianmodel.rb +0 -226
  65. data/lib/rchardet/langthaimodel.rb +0 -201
  66. data/lib/rchardet/latin1prober.rb +0 -147
  67. data/lib/rchardet/mbcharsetprober.rb +0 -89
  68. data/lib/rchardet/mbcsgroupprober.rb +0 -47
  69. data/lib/rchardet/mbcssm.rb +0 -542
  70. data/lib/rchardet/sbcharsetprober.rb +0 -122
  71. data/lib/rchardet/sbcsgroupprober.rb +0 -58
  72. data/lib/rchardet/sjisprober.rb +0 -88
  73. data/lib/rchardet/universaldetector.rb +0 -179
  74. data/lib/rchardet/utf8prober.rb +0 -87
  75. data/lib/rchardet/version.rb +0 -3
  76. data/lib/rchardet.rb +0 -67
  77. data/lib/zip/central_directory.rb +0 -212
  78. data/lib/zip/compressor.rb +0 -9
  79. data/lib/zip/constants.rb +0 -115
  80. data/lib/zip/crypto/decrypted_io.rb +0 -40
  81. data/lib/zip/crypto/encryption.rb +0 -11
  82. data/lib/zip/crypto/null_encryption.rb +0 -43
  83. data/lib/zip/crypto/traditional_encryption.rb +0 -99
  84. data/lib/zip/decompressor.rb +0 -31
  85. data/lib/zip/deflater.rb +0 -34
  86. data/lib/zip/dos_time.rb +0 -53
  87. data/lib/zip/entry.rb +0 -719
  88. data/lib/zip/entry_set.rb +0 -88
  89. data/lib/zip/errors.rb +0 -19
  90. data/lib/zip/extra_field/generic.rb +0 -44
  91. data/lib/zip/extra_field/ntfs.rb +0 -94
  92. data/lib/zip/extra_field/old_unix.rb +0 -46
  93. data/lib/zip/extra_field/universal_time.rb +0 -77
  94. data/lib/zip/extra_field/unix.rb +0 -39
  95. data/lib/zip/extra_field/zip64.rb +0 -70
  96. data/lib/zip/extra_field/zip64_placeholder.rb +0 -15
  97. data/lib/zip/extra_field.rb +0 -103
  98. data/lib/zip/file.rb +0 -468
  99. data/lib/zip/filesystem.rb +0 -643
  100. data/lib/zip/inflater.rb +0 -54
  101. data/lib/zip/input_stream.rb +0 -180
  102. data/lib/zip/ioextras/abstract_input_stream.rb +0 -122
  103. data/lib/zip/ioextras/abstract_output_stream.rb +0 -43
  104. data/lib/zip/ioextras.rb +0 -36
  105. data/lib/zip/null_compressor.rb +0 -15
  106. data/lib/zip/null_decompressor.rb +0 -19
  107. data/lib/zip/null_input_stream.rb +0 -10
  108. data/lib/zip/output_stream.rb +0 -198
  109. data/lib/zip/pass_thru_compressor.rb +0 -23
  110. data/lib/zip/pass_thru_decompressor.rb +0 -31
  111. data/lib/zip/streamable_directory.rb +0 -15
  112. data/lib/zip/streamable_stream.rb +0 -52
  113. data/lib/zip/version.rb +0 -3
  114. data/lib/zip.rb +0 -72
@@ -1,542 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is mozilla.org code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 1998
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- #
13
- # This library is free software; you can redistribute it and/or
14
- # modify it under the terms of the GNU Lesser General Public
15
- # License as published by the Free Software Foundation; either
16
- # version 2.1 of the License, or (at your option) any later version.
17
- #
18
- # This library is distributed in the hope that it will be useful,
19
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
- # Lesser General Public License for more details.
22
- #
23
- # You should have received a copy of the GNU Lesser General Public
24
- # License along with this library; if not, write to the Free Software
25
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
- # 02110-1301 USA
27
- ######################### END LICENSE BLOCK #########################
28
-
29
- module CharDet
30
- # BIG5
31
-
32
- BIG5_cls = [
33
- 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
34
- 1,1,1,1,1,1,0,0, # 08 - 0f
35
- 1,1,1,1,1,1,1,1, # 10 - 17
36
- 1,1,1,0,1,1,1,1, # 18 - 1f
37
- 1,1,1,1,1,1,1,1, # 20 - 27
38
- 1,1,1,1,1,1,1,1, # 28 - 2f
39
- 1,1,1,1,1,1,1,1, # 30 - 37
40
- 1,1,1,1,1,1,1,1, # 38 - 3f
41
- 2,2,2,2,2,2,2,2, # 40 - 47
42
- 2,2,2,2,2,2,2,2, # 48 - 4f
43
- 2,2,2,2,2,2,2,2, # 50 - 57
44
- 2,2,2,2,2,2,2,2, # 58 - 5f
45
- 2,2,2,2,2,2,2,2, # 60 - 67
46
- 2,2,2,2,2,2,2,2, # 68 - 6f
47
- 2,2,2,2,2,2,2,2, # 70 - 77
48
- 2,2,2,2,2,2,2,1, # 78 - 7f
49
- 4,4,4,4,4,4,4,4, # 80 - 87
50
- 4,4,4,4,4,4,4,4, # 88 - 8f
51
- 4,4,4,4,4,4,4,4, # 90 - 97
52
- 4,4,4,4,4,4,4,4, # 98 - 9f
53
- 4,3,3,3,3,3,3,3, # a0 - a7
54
- 3,3,3,3,3,3,3,3, # a8 - af
55
- 3,3,3,3,3,3,3,3, # b0 - b7
56
- 3,3,3,3,3,3,3,3, # b8 - bf
57
- 3,3,3,3,3,3,3,3, # c0 - c7
58
- 3,3,3,3,3,3,3,3, # c8 - cf
59
- 3,3,3,3,3,3,3,3, # d0 - d7
60
- 3,3,3,3,3,3,3,3, # d8 - df
61
- 3,3,3,3,3,3,3,3, # e0 - e7
62
- 3,3,3,3,3,3,3,3, # e8 - ef
63
- 3,3,3,3,3,3,3,3, # f0 - f7
64
- 3,3,3,3,3,3,3,0 # f8 - ff
65
- ].freeze
66
-
67
- BIG5_st = [
68
- EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
69
- EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,#08-0f
70
- EError,EStart,EStart,EStart,EStart,EStart,EStart,EStart #10-17
71
- ].freeze
72
-
73
- Big5CharLenTable = [0, 1, 1, 2, 0].freeze
74
-
75
- Big5SMModel = {'classTable' => BIG5_cls,
76
- 'classFactor' => 5,
77
- 'stateTable' => BIG5_st,
78
- 'charLenTable' => Big5CharLenTable,
79
- 'name' => 'Big5'
80
- }.freeze
81
-
82
- # EUC-JP
83
-
84
- EUCJP_cls = [
85
- 4,4,4,4,4,4,4,4, # 00 - 07
86
- 4,4,4,4,4,4,5,5, # 08 - 0f
87
- 4,4,4,4,4,4,4,4, # 10 - 17
88
- 4,4,4,5,4,4,4,4, # 18 - 1f
89
- 4,4,4,4,4,4,4,4, # 20 - 27
90
- 4,4,4,4,4,4,4,4, # 28 - 2f
91
- 4,4,4,4,4,4,4,4, # 30 - 37
92
- 4,4,4,4,4,4,4,4, # 38 - 3f
93
- 4,4,4,4,4,4,4,4, # 40 - 47
94
- 4,4,4,4,4,4,4,4, # 48 - 4f
95
- 4,4,4,4,4,4,4,4, # 50 - 57
96
- 4,4,4,4,4,4,4,4, # 58 - 5f
97
- 4,4,4,4,4,4,4,4, # 60 - 67
98
- 4,4,4,4,4,4,4,4, # 68 - 6f
99
- 4,4,4,4,4,4,4,4, # 70 - 77
100
- 4,4,4,4,4,4,4,4, # 78 - 7f
101
- 5,5,5,5,5,5,5,5, # 80 - 87
102
- 5,5,5,5,5,5,1,3, # 88 - 8f
103
- 5,5,5,5,5,5,5,5, # 90 - 97
104
- 5,5,5,5,5,5,5,5, # 98 - 9f
105
- 5,2,2,2,2,2,2,2, # a0 - a7
106
- 2,2,2,2,2,2,2,2, # a8 - af
107
- 2,2,2,2,2,2,2,2, # b0 - b7
108
- 2,2,2,2,2,2,2,2, # b8 - bf
109
- 2,2,2,2,2,2,2,2, # c0 - c7
110
- 2,2,2,2,2,2,2,2, # c8 - cf
111
- 2,2,2,2,2,2,2,2, # d0 - d7
112
- 2,2,2,2,2,2,2,2, # d8 - df
113
- 0,0,0,0,0,0,0,0, # e0 - e7
114
- 0,0,0,0,0,0,0,0, # e8 - ef
115
- 0,0,0,0,0,0,0,0, # f0 - f7
116
- 0,0,0,0,0,0,0,5 # f8 - ff
117
- ].freeze
118
-
119
- EUCJP_st = [
120
- 3, 4, 3, 5,EStart,EError,EError,EError,#00-07
121
- EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
122
- EItsMe,EItsMe,EStart,EError,EStart,EError,EError,EError,#10-17
123
- EError,EError,EStart,EError,EError,EError, 3,EError,#18-1f
124
- 3,EError,EError,EError,EStart,EStart,EStart,EStart #20-27
125
- ].freeze
126
-
127
- EUCJPCharLenTable = [2, 2, 2, 3, 1, 0].freeze
128
-
129
- EUCJPSMModel = {'classTable' => EUCJP_cls,
130
- 'classFactor' => 6,
131
- 'stateTable' => EUCJP_st,
132
- 'charLenTable' => EUCJPCharLenTable,
133
- 'name' => 'EUC-JP'
134
- }.freeze
135
-
136
- # EUC-KR
137
-
138
- EUCKR_cls = [
139
- 1,1,1,1,1,1,1,1, # 00 - 07
140
- 1,1,1,1,1,1,0,0, # 08 - 0f
141
- 1,1,1,1,1,1,1,1, # 10 - 17
142
- 1,1,1,0,1,1,1,1, # 18 - 1f
143
- 1,1,1,1,1,1,1,1, # 20 - 27
144
- 1,1,1,1,1,1,1,1, # 28 - 2f
145
- 1,1,1,1,1,1,1,1, # 30 - 37
146
- 1,1,1,1,1,1,1,1, # 38 - 3f
147
- 1,1,1,1,1,1,1,1, # 40 - 47
148
- 1,1,1,1,1,1,1,1, # 48 - 4f
149
- 1,1,1,1,1,1,1,1, # 50 - 57
150
- 1,1,1,1,1,1,1,1, # 58 - 5f
151
- 1,1,1,1,1,1,1,1, # 60 - 67
152
- 1,1,1,1,1,1,1,1, # 68 - 6f
153
- 1,1,1,1,1,1,1,1, # 70 - 77
154
- 1,1,1,1,1,1,1,1, # 78 - 7f
155
- 0,0,0,0,0,0,0,0, # 80 - 87
156
- 0,0,0,0,0,0,0,0, # 88 - 8f
157
- 0,0,0,0,0,0,0,0, # 90 - 97
158
- 0,0,0,0,0,0,0,0, # 98 - 9f
159
- 0,2,2,2,2,2,2,2, # a0 - a7
160
- 2,2,2,2,2,3,3,3, # a8 - af
161
- 2,2,2,2,2,2,2,2, # b0 - b7
162
- 2,2,2,2,2,2,2,2, # b8 - bf
163
- 2,2,2,2,2,2,2,2, # c0 - c7
164
- 2,3,2,2,2,2,2,2, # c8 - cf
165
- 2,2,2,2,2,2,2,2, # d0 - d7
166
- 2,2,2,2,2,2,2,2, # d8 - df
167
- 2,2,2,2,2,2,2,2, # e0 - e7
168
- 2,2,2,2,2,2,2,2, # e8 - ef
169
- 2,2,2,2,2,2,2,2, # f0 - f7
170
- 2,2,2,2,2,2,2,0 # f8 - ff
171
- ].freeze
172
-
173
- EUCKR_st = [
174
- EError,EStart, 3,EError,EError,EError,EError,EError,#00-07
175
- EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,EStart#08-0f
176
- ].freeze
177
-
178
- EUCKRCharLenTable = [0, 1, 2, 0].freeze
179
-
180
- EUCKRSMModel = {'classTable' => EUCKR_cls,
181
- 'classFactor' => 4,
182
- 'stateTable' => EUCKR_st,
183
- 'charLenTable' => EUCKRCharLenTable,
184
- 'name' => 'EUC-KR'
185
- }.freeze
186
-
187
- # EUC-TW
188
-
189
- EUCTW_cls = [
190
- 2,2,2,2,2,2,2,2, # 00 - 07
191
- 2,2,2,2,2,2,0,0, # 08 - 0f
192
- 2,2,2,2,2,2,2,2, # 10 - 17
193
- 2,2,2,0,2,2,2,2, # 18 - 1f
194
- 2,2,2,2,2,2,2,2, # 20 - 27
195
- 2,2,2,2,2,2,2,2, # 28 - 2f
196
- 2,2,2,2,2,2,2,2, # 30 - 37
197
- 2,2,2,2,2,2,2,2, # 38 - 3f
198
- 2,2,2,2,2,2,2,2, # 40 - 47
199
- 2,2,2,2,2,2,2,2, # 48 - 4f
200
- 2,2,2,2,2,2,2,2, # 50 - 57
201
- 2,2,2,2,2,2,2,2, # 58 - 5f
202
- 2,2,2,2,2,2,2,2, # 60 - 67
203
- 2,2,2,2,2,2,2,2, # 68 - 6f
204
- 2,2,2,2,2,2,2,2, # 70 - 77
205
- 2,2,2,2,2,2,2,2, # 78 - 7f
206
- 0,0,0,0,0,0,0,0, # 80 - 87
207
- 0,0,0,0,0,0,6,0, # 88 - 8f
208
- 0,0,0,0,0,0,0,0, # 90 - 97
209
- 0,0,0,0,0,0,0,0, # 98 - 9f
210
- 0,3,4,4,4,4,4,4, # a0 - a7
211
- 5,5,1,1,1,1,1,1, # a8 - af
212
- 1,1,1,1,1,1,1,1, # b0 - b7
213
- 1,1,1,1,1,1,1,1, # b8 - bf
214
- 1,1,3,1,3,3,3,3, # c0 - c7
215
- 3,3,3,3,3,3,3,3, # c8 - cf
216
- 3,3,3,3,3,3,3,3, # d0 - d7
217
- 3,3,3,3,3,3,3,3, # d8 - df
218
- 3,3,3,3,3,3,3,3, # e0 - e7
219
- 3,3,3,3,3,3,3,3, # e8 - ef
220
- 3,3,3,3,3,3,3,3, # f0 - f7
221
- 3,3,3,3,3,3,3,0 # f8 - ff
222
- ].freeze
223
-
224
- EUCTW_st = [
225
- EError,EError,EStart, 3, 3, 3, 4,EError,#00-07
226
- EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
227
- EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EStart,EError,#10-17
228
- EStart,EStart,EStart,EError,EError,EError,EError,EError,#18-1f
229
- 5,EError,EError,EError,EStart,EError,EStart,EStart,#20-27
230
- EStart,EError,EStart,EStart,EStart,EStart,EStart,EStart #28-2f
231
- ].freeze
232
-
233
- EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3].freeze
234
-
235
- EUCTWSMModel = {'classTable' => EUCTW_cls,
236
- 'classFactor' => 7,
237
- 'stateTable' => EUCTW_st,
238
- 'charLenTable' => EUCTWCharLenTable,
239
- 'name' => 'x-euc-tw'
240
- }.freeze
241
-
242
- # GB18030
243
-
244
- GB18030_cls = [
245
- 1,1,1,1,1,1,1,1, # 00 - 07
246
- 1,1,1,1,1,1,0,0, # 08 - 0f
247
- 1,1,1,1,1,1,1,1, # 10 - 17
248
- 1,1,1,0,1,1,1,1, # 18 - 1f
249
- 1,1,1,1,1,1,1,1, # 20 - 27
250
- 1,1,1,1,1,1,1,1, # 28 - 2f
251
- 3,3,3,3,3,3,3,3, # 30 - 37
252
- 3,3,1,1,1,1,1,1, # 38 - 3f
253
- 2,2,2,2,2,2,2,2, # 40 - 47
254
- 2,2,2,2,2,2,2,2, # 48 - 4f
255
- 2,2,2,2,2,2,2,2, # 50 - 57
256
- 2,2,2,2,2,2,2,2, # 58 - 5f
257
- 2,2,2,2,2,2,2,2, # 60 - 67
258
- 2,2,2,2,2,2,2,2, # 68 - 6f
259
- 2,2,2,2,2,2,2,2, # 70 - 77
260
- 2,2,2,2,2,2,2,4, # 78 - 7f
261
- 5,6,6,6,6,6,6,6, # 80 - 87
262
- 6,6,6,6,6,6,6,6, # 88 - 8f
263
- 6,6,6,6,6,6,6,6, # 90 - 97
264
- 6,6,6,6,6,6,6,6, # 98 - 9f
265
- 6,6,6,6,6,6,6,6, # a0 - a7
266
- 6,6,6,6,6,6,6,6, # a8 - af
267
- 6,6,6,6,6,6,6,6, # b0 - b7
268
- 6,6,6,6,6,6,6,6, # b8 - bf
269
- 6,6,6,6,6,6,6,6, # c0 - c7
270
- 6,6,6,6,6,6,6,6, # c8 - cf
271
- 6,6,6,6,6,6,6,6, # d0 - d7
272
- 6,6,6,6,6,6,6,6, # d8 - df
273
- 6,6,6,6,6,6,6,6, # e0 - e7
274
- 6,6,6,6,6,6,6,6, # e8 - ef
275
- 6,6,6,6,6,6,6,6, # f0 - f7
276
- 6,6,6,6,6,6,6,0 # f8 - ff
277
- ].freeze
278
-
279
- GB18030_st = [
280
- EError,EStart,EStart,EStart,EStart,EStart, 3,EError,#00-07
281
- EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
282
- EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,#10-17
283
- 4,EError,EStart,EStart,EError,EError,EError,EError,#18-1f
284
- EError,EError, 5,EError,EError,EError,EItsMe,EError,#20-27
285
- EError,EError,EStart,EStart,EStart,EStart,EStart,EStart#28-2f
286
- ].freeze
287
-
288
- # To be accurate, the length of class 6 can be either 2 or 4.
289
- # But it is not necessary to discriminate between the two since
290
- # it is used for frequency analysis only, and we are validing
291
- # each code range there as well. So it is safe to set it to be
292
- # 2 here.
293
- GB18030CharLenTable = [0, 1, 1, 1, 1, 1, 2].freeze
294
-
295
- GB18030SMModel = {'classTable' => GB18030_cls,
296
- 'classFactor' => 7,
297
- 'stateTable' => GB18030_st,
298
- 'charLenTable' => GB18030CharLenTable,
299
- 'name' => 'GB18030'
300
- }.freeze
301
-
302
- # Shift_JIS
303
-
304
- SJIS_cls = [
305
- 1,1,1,1,1,1,1,1, # 00 - 07
306
- 1,1,1,1,1,1,0,0, # 08 - 0f
307
- 1,1,1,1,1,1,1,1, # 10 - 17
308
- 1,1,1,0,1,1,1,1, # 18 - 1f
309
- 1,1,1,1,1,1,1,1, # 20 - 27
310
- 1,1,1,1,1,1,1,1, # 28 - 2f
311
- 1,1,1,1,1,1,1,1, # 30 - 37
312
- 1,1,1,1,1,1,1,1, # 38 - 3f
313
- 2,2,2,2,2,2,2,2, # 40 - 47
314
- 2,2,2,2,2,2,2,2, # 48 - 4f
315
- 2,2,2,2,2,2,2,2, # 50 - 57
316
- 2,2,2,2,2,2,2,2, # 58 - 5f
317
- 2,2,2,2,2,2,2,2, # 60 - 67
318
- 2,2,2,2,2,2,2,2, # 68 - 6f
319
- 2,2,2,2,2,2,2,2, # 70 - 77
320
- 2,2,2,2,2,2,2,1, # 78 - 7f
321
- 3,3,3,3,3,3,3,3, # 80 - 87
322
- 3,3,3,3,3,3,3,3, # 88 - 8f
323
- 3,3,3,3,3,3,3,3, # 90 - 97
324
- 3,3,3,3,3,3,3,3, # 98 - 9f
325
- #0xa0 is illegal in sjis encoding, but some pages does
326
- #contain such byte. We need to be more error forgiven.
327
- 2,2,2,2,2,2,2,2, # a0 - a7
328
- 2,2,2,2,2,2,2,2, # a8 - af
329
- 2,2,2,2,2,2,2,2, # b0 - b7
330
- 2,2,2,2,2,2,2,2, # b8 - bf
331
- 2,2,2,2,2,2,2,2, # c0 - c7
332
- 2,2,2,2,2,2,2,2, # c8 - cf
333
- 2,2,2,2,2,2,2,2, # d0 - d7
334
- 2,2,2,2,2,2,2,2, # d8 - df
335
- 3,3,3,3,3,3,3,3, # e0 - e7
336
- 3,3,3,3,3,4,4,4, # e8 - ef
337
- 4,4,4,4,4,4,4,4, # f0 - f7
338
- 4,4,4,4,4,0,0,0 # f8 - ff
339
- ].freeze
340
-
341
- SJIS_st = [
342
- EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
343
- EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
344
- EItsMe,EItsMe,EError,EError,EStart,EStart,EStart,EStart#10-17
345
- ].freeze
346
-
347
- SJISCharLenTable = [0, 1, 1, 2, 0, 0].freeze
348
-
349
- SJISSMModel = {'classTable' => SJIS_cls,
350
- 'classFactor' => 6,
351
- 'stateTable' => SJIS_st,
352
- 'charLenTable' => SJISCharLenTable,
353
- 'name' => 'Shift_JIS'
354
- }.freeze
355
-
356
- # UCS2-BE
357
-
358
- UCS2BE_cls = [
359
- 0,0,0,0,0,0,0,0, # 00 - 07
360
- 0,0,1,0,0,2,0,0, # 08 - 0f
361
- 0,0,0,0,0,0,0,0, # 10 - 17
362
- 0,0,0,3,0,0,0,0, # 18 - 1f
363
- 0,0,0,0,0,0,0,0, # 20 - 27
364
- 0,3,3,3,3,3,0,0, # 28 - 2f
365
- 0,0,0,0,0,0,0,0, # 30 - 37
366
- 0,0,0,0,0,0,0,0, # 38 - 3f
367
- 0,0,0,0,0,0,0,0, # 40 - 47
368
- 0,0,0,0,0,0,0,0, # 48 - 4f
369
- 0,0,0,0,0,0,0,0, # 50 - 57
370
- 0,0,0,0,0,0,0,0, # 58 - 5f
371
- 0,0,0,0,0,0,0,0, # 60 - 67
372
- 0,0,0,0,0,0,0,0, # 68 - 6f
373
- 0,0,0,0,0,0,0,0, # 70 - 77
374
- 0,0,0,0,0,0,0,0, # 78 - 7f
375
- 0,0,0,0,0,0,0,0, # 80 - 87
376
- 0,0,0,0,0,0,0,0, # 88 - 8f
377
- 0,0,0,0,0,0,0,0, # 90 - 97
378
- 0,0,0,0,0,0,0,0, # 98 - 9f
379
- 0,0,0,0,0,0,0,0, # a0 - a7
380
- 0,0,0,0,0,0,0,0, # a8 - af
381
- 0,0,0,0,0,0,0,0, # b0 - b7
382
- 0,0,0,0,0,0,0,0, # b8 - bf
383
- 0,0,0,0,0,0,0,0, # c0 - c7
384
- 0,0,0,0,0,0,0,0, # c8 - cf
385
- 0,0,0,0,0,0,0,0, # d0 - d7
386
- 0,0,0,0,0,0,0,0, # d8 - df
387
- 0,0,0,0,0,0,0,0, # e0 - e7
388
- 0,0,0,0,0,0,0,0, # e8 - ef
389
- 0,0,0,0,0,0,0,0, # f0 - f7
390
- 0,0,0,0,0,0,4,5 # f8 - ff
391
- ].freeze
392
-
393
- UCS2BE_st = [
394
- 5, 7, 7,EError, 4, 3,EError,EError,#00-07
395
- EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
396
- EItsMe,EItsMe, 6, 6, 6, 6,EError,EError,#10-17
397
- 6, 6, 6, 6, 6,EItsMe, 6, 6,#18-1f
398
- 6, 6, 6, 6, 5, 7, 7,EError,#20-27
399
- 5, 8, 6, 6,EError, 6, 6, 6,#28-2f
400
- 6, 6, 6, 6,EError,EError,EStart,EStart#30-37
401
- ].freeze
402
-
403
- UCS2BECharLenTable = [2, 2, 2, 0, 2, 2].freeze
404
-
405
- UCS2BESMModel = {'classTable' => UCS2BE_cls,
406
- 'classFactor' => 6,
407
- 'stateTable' => UCS2BE_st,
408
- 'charLenTable' => UCS2BECharLenTable,
409
- 'name' => 'UTF-16BE'
410
- }.freeze
411
-
412
- # UCS2-LE
413
-
414
- UCS2LE_cls = [
415
- 0,0,0,0,0,0,0,0, # 00 - 07
416
- 0,0,1,0,0,2,0,0, # 08 - 0f
417
- 0,0,0,0,0,0,0,0, # 10 - 17
418
- 0,0,0,3,0,0,0,0, # 18 - 1f
419
- 0,0,0,0,0,0,0,0, # 20 - 27
420
- 0,3,3,3,3,3,0,0, # 28 - 2f
421
- 0,0,0,0,0,0,0,0, # 30 - 37
422
- 0,0,0,0,0,0,0,0, # 38 - 3f
423
- 0,0,0,0,0,0,0,0, # 40 - 47
424
- 0,0,0,0,0,0,0,0, # 48 - 4f
425
- 0,0,0,0,0,0,0,0, # 50 - 57
426
- 0,0,0,0,0,0,0,0, # 58 - 5f
427
- 0,0,0,0,0,0,0,0, # 60 - 67
428
- 0,0,0,0,0,0,0,0, # 68 - 6f
429
- 0,0,0,0,0,0,0,0, # 70 - 77
430
- 0,0,0,0,0,0,0,0, # 78 - 7f
431
- 0,0,0,0,0,0,0,0, # 80 - 87
432
- 0,0,0,0,0,0,0,0, # 88 - 8f
433
- 0,0,0,0,0,0,0,0, # 90 - 97
434
- 0,0,0,0,0,0,0,0, # 98 - 9f
435
- 0,0,0,0,0,0,0,0, # a0 - a7
436
- 0,0,0,0,0,0,0,0, # a8 - af
437
- 0,0,0,0,0,0,0,0, # b0 - b7
438
- 0,0,0,0,0,0,0,0, # b8 - bf
439
- 0,0,0,0,0,0,0,0, # c0 - c7
440
- 0,0,0,0,0,0,0,0, # c8 - cf
441
- 0,0,0,0,0,0,0,0, # d0 - d7
442
- 0,0,0,0,0,0,0,0, # d8 - df
443
- 0,0,0,0,0,0,0,0, # e0 - e7
444
- 0,0,0,0,0,0,0,0, # e8 - ef
445
- 0,0,0,0,0,0,0,0, # f0 - f7
446
- 0,0,0,0,0,0,4,5 # f8 - ff
447
- ].freeze
448
-
449
- UCS2LE_st = [
450
- 6, 6, 7, 6, 4, 3,EError,EError,#00-07
451
- EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
452
- EItsMe,EItsMe, 5, 5, 5,EError,EItsMe,EError,#10-17
453
- 5, 5, 5,EError, 5,EError, 6, 6,#18-1f
454
- 7, 6, 8, 8, 5, 5, 5,EError,#20-27
455
- 5, 5, 5,EError,EError,EError, 5, 5,#28-2f
456
- 5, 5, 5,EError, 5,EError,EStart,EStart#30-37
457
- ].freeze
458
-
459
- UCS2LECharLenTable = [2, 2, 2, 2, 2, 2].freeze
460
-
461
- UCS2LESMModel = {'classTable' => UCS2LE_cls,
462
- 'classFactor' => 6,
463
- 'stateTable' => UCS2LE_st,
464
- 'charLenTable' => UCS2LECharLenTable,
465
- 'name' => 'UTF-16LE'
466
- }.freeze
467
-
468
- # UTF-8
469
-
470
- UTF8_cls = [
471
- 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
472
- 1,1,1,1,1,1,0,0, # 08 - 0f
473
- 1,1,1,1,1,1,1,1, # 10 - 17
474
- 1,1,1,0,1,1,1,1, # 18 - 1f
475
- 1,1,1,1,1,1,1,1, # 20 - 27
476
- 1,1,1,1,1,1,1,1, # 28 - 2f
477
- 1,1,1,1,1,1,1,1, # 30 - 37
478
- 1,1,1,1,1,1,1,1, # 38 - 3f
479
- 1,1,1,1,1,1,1,1, # 40 - 47
480
- 1,1,1,1,1,1,1,1, # 48 - 4f
481
- 1,1,1,1,1,1,1,1, # 50 - 57
482
- 1,1,1,1,1,1,1,1, # 58 - 5f
483
- 1,1,1,1,1,1,1,1, # 60 - 67
484
- 1,1,1,1,1,1,1,1, # 68 - 6f
485
- 1,1,1,1,1,1,1,1, # 70 - 77
486
- 1,1,1,1,1,1,1,1, # 78 - 7f
487
- 2,2,2,2,3,3,3,3, # 80 - 87
488
- 4,4,4,4,4,4,4,4, # 88 - 8f
489
- 4,4,4,4,4,4,4,4, # 90 - 97
490
- 4,4,4,4,4,4,4,4, # 98 - 9f
491
- 5,5,5,5,5,5,5,5, # a0 - a7
492
- 5,5,5,5,5,5,5,5, # a8 - af
493
- 5,5,5,5,5,5,5,5, # b0 - b7
494
- 5,5,5,5,5,5,5,5, # b8 - bf
495
- 0,0,6,6,6,6,6,6, # c0 - c7
496
- 6,6,6,6,6,6,6,6, # c8 - cf
497
- 6,6,6,6,6,6,6,6, # d0 - d7
498
- 6,6,6,6,6,6,6,6, # d8 - df
499
- 7,8,8,8,8,8,8,8, # e0 - e7
500
- 8,8,8,8,8,9,8,8, # e8 - ef
501
- 10,11,11,11,11,11,11,11, # f0 - f7
502
- 12,13,13,13,14,15,0,0 # f8 - ff
503
- ].freeze
504
-
505
- UTF8_st = [
506
- EError,EStart,EError,EError,EError,EError, 12, 10,#00-07
507
- 9, 11, 8, 7, 6, 5, 4, 3,#08-0f
508
- EError,EError,EError,EError,EError,EError,EError,EError,#10-17
509
- EError,EError,EError,EError,EError,EError,EError,EError,#18-1f
510
- EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,#20-27
511
- EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,#28-2f
512
- EError,EError, 5, 5, 5, 5,EError,EError,#30-37
513
- EError,EError,EError,EError,EError,EError,EError,EError,#38-3f
514
- EError,EError,EError, 5, 5, 5,EError,EError,#40-47
515
- EError,EError,EError,EError,EError,EError,EError,EError,#48-4f
516
- EError,EError, 7, 7, 7, 7,EError,EError,#50-57
517
- EError,EError,EError,EError,EError,EError,EError,EError,#58-5f
518
- EError,EError,EError,EError, 7, 7,EError,EError,#60-67
519
- EError,EError,EError,EError,EError,EError,EError,EError,#68-6f
520
- EError,EError, 9, 9, 9, 9,EError,EError,#70-77
521
- EError,EError,EError,EError,EError,EError,EError,EError,#78-7f
522
- EError,EError,EError,EError,EError, 9,EError,EError,#80-87
523
- EError,EError,EError,EError,EError,EError,EError,EError,#88-8f
524
- EError,EError, 12, 12, 12, 12,EError,EError,#90-97
525
- EError,EError,EError,EError,EError,EError,EError,EError,#98-9f
526
- EError,EError,EError,EError,EError, 12,EError,EError,#a0-a7
527
- EError,EError,EError,EError,EError,EError,EError,EError,#a8-af
528
- EError,EError, 12, 12, 12,EError,EError,EError,#b0-b7
529
- EError,EError,EError,EError,EError,EError,EError,EError,#b8-bf
530
- EError,EError,EStart,EStart,EStart,EStart,EError,EError,#c0-c7
531
- EError,EError,EError,EError,EError,EError,EError,EError#c8-cf
532
- ].freeze
533
-
534
- UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6].freeze
535
-
536
- UTF8SMModel = {'classTable' => UTF8_cls,
537
- 'classFactor' => 16,
538
- 'stateTable' => UTF8_st,
539
- 'charLenTable' => UTF8CharLenTable,
540
- 'name' => 'UTF-8'
541
- }.freeze
542
- end
@@ -1,122 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is Mozilla Universal charset detector code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 2001
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- # Shy Shalom - original C code
13
- #
14
- # This library is free software; you can redistribute it and/or
15
- # modify it under the terms of the GNU Lesser General Public
16
- # License as published by the Free Software Foundation; either
17
- # version 2.1 of the License, or (at your option) any later version.
18
- #
19
- # This library is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
- # Lesser General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Lesser General Public
25
- # License along with this library; if not, write to the Free Software
26
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
- # 02110-1301 USA
28
- ######################### END LICENSE BLOCK #########################
29
-
30
- module CharDet
31
- SAMPLE_SIZE = 64
32
- SB_ENOUGH_REL_THRESHOLD = 1024
33
- POSITIVE_SHORTCUT_THRESHOLD = 0.95
34
- NEGATIVE_SHORTCUT_THRESHOLD = 0.05
35
- SYMBOL_CAT_ORDER = 250
36
- NUMBER_OF_SEQ_CAT = 4
37
- POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
38
- #NEGATIVE_CAT = 0
39
-
40
- class SingleByteCharSetProber < CharSetProber
41
- def initialize(model, reversed=false, nameProber=nil)
42
- super()
43
- @model = model
44
- @reversed = reversed # TRUE if we need to reverse every pair in the model lookup
45
- @nameProber = nameProber # Optional auxiliary prober for name decision
46
- reset()
47
- end
48
-
49
- def reset
50
- super()
51
- @lastOrder = 255 # char order of last character
52
- @seqCounters = [0] * NUMBER_OF_SEQ_CAT
53
- @totalSeqs = 0
54
- @totalChar = 0
55
- @freqChar = 0 # characters that fall in our sampling range
56
- end
57
-
58
- def get_charset_name
59
- if @nameProber
60
- return @nameProber.get_charset_name()
61
- else
62
- return @model['charsetName']
63
- end
64
- end
65
-
66
- def feed(aBuf)
67
- if !@model['keepEnglishLetter']
68
- aBuf = filter_without_english_letters(aBuf)
69
- end
70
- aLen = aBuf.length
71
- if aLen == 0
72
- return get_state()
73
- end
74
- aBuf.each_byte do |b|
75
- c = b.chr
76
- order = @model['charToOrderMap'][c.bytes.first]
77
- if order < SYMBOL_CAT_ORDER
78
- @totalChar += 1
79
- end
80
- if order < SAMPLE_SIZE
81
- @freqChar += 1
82
- if @lastOrder < SAMPLE_SIZE
83
- @totalSeqs += 1
84
- if !@reversed
85
- @seqCounters[@model['precedenceMatrix'][(@lastOrder * SAMPLE_SIZE) + order]] += 1
86
- else # reverse the order of the letters in the lookup
87
- @seqCounters[@model['precedenceMatrix'][(order * SAMPLE_SIZE) + @lastOrder]] += 1
88
- end
89
- end
90
- end
91
- @lastOrder = order
92
- end
93
-
94
- if get_state() == EDetecting
95
- if @totalSeqs > SB_ENOUGH_REL_THRESHOLD
96
- cf = get_confidence()
97
- if cf > POSITIVE_SHORTCUT_THRESHOLD
98
- $stderr << "#{@model['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
99
- @state = EFoundIt
100
- elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
101
- $stderr << "#{@model['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
102
- @state = ENotMe
103
- end
104
- end
105
- end
106
-
107
- return get_state()
108
- end
109
-
110
- def get_confidence
111
- r = 0.01
112
- if @totalSeqs > 0
113
- r = (1.0 * @seqCounters[POSITIVE_CAT]) / @totalSeqs / @model['mTypicalPositiveRatio']
114
- r = r * @freqChar / @totalChar
115
- if r >= 1.0
116
- r = 0.99
117
- end
118
- end
119
- return r
120
- end
121
- end
122
- end