lg_pod_plugin 1.0.8 → 1.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/bin/lg +5 -0
  3. data/lib/command/cache.rb +22 -18
  4. data/lib/command/command.rb +27 -35
  5. data/lib/command/install.rb +52 -0
  6. data/lib/command/update.rb +39 -0
  7. data/lib/lg_pod_plugin/downloader.rb +12 -17
  8. data/lib/lg_pod_plugin/git_util.rb +150 -106
  9. data/lib/lg_pod_plugin/install.rb +74 -88
  10. data/lib/lg_pod_plugin/l_cache.rb +11 -77
  11. data/lib/lg_pod_plugin/l_util.rb +6 -2
  12. data/lib/lg_pod_plugin/request.rb +90 -82
  13. data/lib/lg_pod_plugin/version.rb +1 -1
  14. data/lib/lg_pod_plugin.rb +9 -5
  15. metadata +57 -118
  16. data/lib/git/author.rb +0 -14
  17. data/lib/git/base/factory.rb +0 -101
  18. data/lib/git/base.rb +0 -670
  19. data/lib/git/branch.rb +0 -126
  20. data/lib/git/branches.rb +0 -71
  21. data/lib/git/config.rb +0 -22
  22. data/lib/git/diff.rb +0 -155
  23. data/lib/git/encoding_utils.rb +0 -33
  24. data/lib/git/escaped_path.rb +0 -77
  25. data/lib/git/index.rb +0 -5
  26. data/lib/git/lib.rb +0 -1215
  27. data/lib/git/log.rb +0 -135
  28. data/lib/git/object.rb +0 -312
  29. data/lib/git/path.rb +0 -31
  30. data/lib/git/remote.rb +0 -36
  31. data/lib/git/repository.rb +0 -6
  32. data/lib/git/stash.rb +0 -27
  33. data/lib/git/stashes.rb +0 -55
  34. data/lib/git/status.rb +0 -199
  35. data/lib/git/url.rb +0 -127
  36. data/lib/git/version.rb +0 -5
  37. data/lib/git/working_directory.rb +0 -4
  38. data/lib/git/worktree.rb +0 -38
  39. data/lib/git/worktrees.rb +0 -47
  40. data/lib/git.rb +0 -326
  41. data/lib/rchardet/big5freq.rb +0 -927
  42. data/lib/rchardet/big5prober.rb +0 -42
  43. data/lib/rchardet/chardistribution.rb +0 -250
  44. data/lib/rchardet/charsetgroupprober.rb +0 -110
  45. data/lib/rchardet/charsetprober.rb +0 -70
  46. data/lib/rchardet/codingstatemachine.rb +0 -67
  47. data/lib/rchardet/constants.rb +0 -42
  48. data/lib/rchardet/escprober.rb +0 -90
  49. data/lib/rchardet/escsm.rb +0 -245
  50. data/lib/rchardet/eucjpprober.rb +0 -88
  51. data/lib/rchardet/euckrfreq.rb +0 -597
  52. data/lib/rchardet/euckrprober.rb +0 -42
  53. data/lib/rchardet/euctwfreq.rb +0 -431
  54. data/lib/rchardet/euctwprober.rb +0 -42
  55. data/lib/rchardet/gb18030freq.rb +0 -474
  56. data/lib/rchardet/gb18030prober.rb +0 -42
  57. data/lib/rchardet/hebrewprober.rb +0 -289
  58. data/lib/rchardet/jisfreq.rb +0 -571
  59. data/lib/rchardet/jpcntx.rb +0 -229
  60. data/lib/rchardet/langbulgarianmodel.rb +0 -229
  61. data/lib/rchardet/langcyrillicmodel.rb +0 -330
  62. data/lib/rchardet/langgreekmodel.rb +0 -227
  63. data/lib/rchardet/langhebrewmodel.rb +0 -202
  64. data/lib/rchardet/langhungarianmodel.rb +0 -226
  65. data/lib/rchardet/langthaimodel.rb +0 -201
  66. data/lib/rchardet/latin1prober.rb +0 -147
  67. data/lib/rchardet/mbcharsetprober.rb +0 -89
  68. data/lib/rchardet/mbcsgroupprober.rb +0 -47
  69. data/lib/rchardet/mbcssm.rb +0 -542
  70. data/lib/rchardet/sbcharsetprober.rb +0 -122
  71. data/lib/rchardet/sbcsgroupprober.rb +0 -58
  72. data/lib/rchardet/sjisprober.rb +0 -88
  73. data/lib/rchardet/universaldetector.rb +0 -179
  74. data/lib/rchardet/utf8prober.rb +0 -87
  75. data/lib/rchardet/version.rb +0 -3
  76. data/lib/rchardet.rb +0 -67
  77. data/lib/zip/central_directory.rb +0 -212
  78. data/lib/zip/compressor.rb +0 -9
  79. data/lib/zip/constants.rb +0 -115
  80. data/lib/zip/crypto/decrypted_io.rb +0 -40
  81. data/lib/zip/crypto/encryption.rb +0 -11
  82. data/lib/zip/crypto/null_encryption.rb +0 -43
  83. data/lib/zip/crypto/traditional_encryption.rb +0 -99
  84. data/lib/zip/decompressor.rb +0 -31
  85. data/lib/zip/deflater.rb +0 -34
  86. data/lib/zip/dos_time.rb +0 -53
  87. data/lib/zip/entry.rb +0 -719
  88. data/lib/zip/entry_set.rb +0 -88
  89. data/lib/zip/errors.rb +0 -19
  90. data/lib/zip/extra_field/generic.rb +0 -44
  91. data/lib/zip/extra_field/ntfs.rb +0 -94
  92. data/lib/zip/extra_field/old_unix.rb +0 -46
  93. data/lib/zip/extra_field/universal_time.rb +0 -77
  94. data/lib/zip/extra_field/unix.rb +0 -39
  95. data/lib/zip/extra_field/zip64.rb +0 -70
  96. data/lib/zip/extra_field/zip64_placeholder.rb +0 -15
  97. data/lib/zip/extra_field.rb +0 -103
  98. data/lib/zip/file.rb +0 -468
  99. data/lib/zip/filesystem.rb +0 -643
  100. data/lib/zip/inflater.rb +0 -54
  101. data/lib/zip/input_stream.rb +0 -180
  102. data/lib/zip/ioextras/abstract_input_stream.rb +0 -122
  103. data/lib/zip/ioextras/abstract_output_stream.rb +0 -43
  104. data/lib/zip/ioextras.rb +0 -36
  105. data/lib/zip/null_compressor.rb +0 -15
  106. data/lib/zip/null_decompressor.rb +0 -19
  107. data/lib/zip/null_input_stream.rb +0 -10
  108. data/lib/zip/output_stream.rb +0 -198
  109. data/lib/zip/pass_thru_compressor.rb +0 -23
  110. data/lib/zip/pass_thru_decompressor.rb +0 -31
  111. data/lib/zip/streamable_directory.rb +0 -15
  112. data/lib/zip/streamable_stream.rb +0 -52
  113. data/lib/zip/version.rb +0 -3
  114. data/lib/zip.rb +0 -72
@@ -1,42 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is Mozilla Communicator client code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 1998
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- #
13
- # This library is free software; you can redistribute it and/or
14
- # modify it under the terms of the GNU Lesser General Public
15
- # License as published by the Free Software Foundation; either
16
- # version 2.1 of the License, or (at your option) any later version.
17
- #
18
- # This library is distributed in the hope that it will be useful,
19
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
- # Lesser General Public License for more details.
22
- #
23
- # You should have received a copy of the GNU Lesser General Public
24
- # License along with this library; if not, write to the Free Software
25
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
- # 02110-1301 USA
27
- ######################### END LICENSE BLOCK #########################
28
-
29
- module CharDet
30
- class Big5Prober < MultiByteCharSetProber
31
- def initialize
32
- super
33
- @codingSM = CodingStateMachine.new(Big5SMModel)
34
- @distributionAnalyzer = Big5DistributionAnalysis.new()
35
- reset()
36
- end
37
-
38
- def get_charset_name
39
- return "Big5"
40
- end
41
- end
42
- end
@@ -1,250 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is Mozilla Communicator client code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 1998
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges
11
- # Mark Pilgrim - port to Python
12
- #
13
- # This library is free software; you can redistribute it and/or
14
- # modify it under the terms of the GNU Lesser General Public
15
- # License as published by the Free Software Foundation; either
16
- # version 2.1 of the License, or (at your option) any later version.
17
- #
18
- # This library is distributed in the hope that it will be useful,
19
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
- # Lesser General Public License for more details.
22
- #
23
- # You should have received a copy of the GNU Lesser General Public
24
- # License along with this library; if not, write to the Free Software
25
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
- # 02110-1301 USA
27
- ######################### END LICENSE BLOCK #########################
28
-
29
- module CharDet
30
- ENOUGH_DATA_THRESHOLD = 1024
31
- SURE_YES = 0.99
32
- SURE_NO = 0.01
33
-
34
- class CharDistributionAnalysis
35
- def initialize
36
- @charToFreqOrder = nil # Mapping table to get frequency order from char order (get from GetOrder())
37
- @tableSize = nil # Size of above table
38
- @typicalDistributionRatio = nil # This is a constant value which varies from language to language, used in calculating confidence. See http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html for further detail.
39
- reset()
40
- end
41
-
42
- def reset
43
- # # """reset analyser, clear any state"""
44
- @done = false # If this flag is set to constants.True, detection is done and conclusion has been made
45
- @totalChars = 0 # Total characters encountered
46
- @freqChars = 0 # The number of characters whose frequency order is less than 512
47
- end
48
-
49
- def feed(aStr, aCharLen)
50
- # # """feed a character with known length"""
51
- if aCharLen == 2
52
- # we only care about 2-bytes character in our distribution analysis
53
- order = get_order(aStr)
54
- else
55
- order = -1
56
- end
57
- if order >= 0
58
- @totalChars += 1
59
- # order is valid
60
- if order < @tableSize
61
- if 512 > @charToFreqOrder[order]
62
- @freqChars += 1
63
- end
64
- end
65
- end
66
- end
67
-
68
- def get_confidence
69
- # """return confidence based on existing data"""
70
- # if we didn't receive any character in our consideration range, return negative answer
71
- if @totalChars <= 0
72
- return SURE_NO
73
- end
74
-
75
- if @totalChars != @freqChars
76
- r = @freqChars / ((@totalChars - @freqChars) * @typicalDistributionRatio)
77
- if r < SURE_YES
78
- return r
79
- end
80
- end
81
-
82
- # normalize confidence (we don't want to be 100% sure)
83
- return SURE_YES
84
- end
85
-
86
- def got_enough_data
87
- # It is not necessary to receive all data to draw conclusion. For charset detection,
88
- # certain amount of data is enough
89
- return @totalChars > ENOUGH_DATA_THRESHOLD
90
- end
91
-
92
- def get_order(aStr)
93
- # We do not handle characters based on the original encoding string, but
94
- # convert this encoding string to a number, here called order.
95
- # This allows multiple encodings of a language to share one frequency table.
96
- return -1
97
- end
98
- end
99
-
100
- class EUCTWDistributionAnalysis < CharDistributionAnalysis
101
- def initialize
102
- super()
103
- @charToFreqOrder = EUCTWCharToFreqOrder
104
- @tableSize = EUCTW_TABLE_SIZE
105
- @typicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
106
- end
107
-
108
- def get_order(aStr)
109
- # for euc-TW encoding, we are interested
110
- # first byte range: 0xc4 -- 0xfe
111
- # second byte range: 0xa1 -- 0xfe
112
- # no validation needed here. State machine has done that
113
- if aStr[0, 1] >= "\xC4"
114
- bytes = aStr.bytes.to_a
115
- return 94 * (bytes[0] - 0xC4) + bytes[1] - 0xA1
116
- else
117
- return -1
118
- end
119
- end
120
-
121
- def get_confidence
122
- if @freqChars <= MINIMUM_DATA_THRESHOLD
123
- return SURE_NO
124
- end
125
-
126
- super
127
- end
128
- end
129
-
130
- class EUCKRDistributionAnalysis < CharDistributionAnalysis
131
- def initialize
132
- super()
133
- @charToFreqOrder = EUCKRCharToFreqOrder
134
- @tableSize = EUCKR_TABLE_SIZE
135
- @typicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
136
- end
137
-
138
- def get_order(aStr)
139
- # for euc-KR encoding, we are interested
140
- # first byte range: 0xb0 -- 0xfe
141
- # second byte range: 0xa1 -- 0xfe
142
- # no validation needed here. State machine has done that
143
- if aStr[0, 1] >= "\xB0"
144
- bytes = aStr.bytes.to_a
145
- return 94 * (bytes[0] - 0xB0) + bytes[1] - 0xA1
146
- else
147
- return -1
148
- end
149
- end
150
- end
151
-
152
- class GB18030DistributionAnalysis < CharDistributionAnalysis
153
- def initialize
154
- super()
155
- @charToFreqOrder = GB18030CharToFreqOrder
156
- @tableSize = GB18030_TABLE_SIZE
157
- @typicalDistributionRatio = GB18030_TYPICAL_DISTRIBUTION_RATIO
158
- end
159
-
160
- def get_order(aStr)
161
- # for GB18030 encoding, we are interested
162
- # first byte range: 0xb0 -- 0xfe
163
- # second byte range: 0xa1 -- 0xfe
164
- # no validation needed here. State machine has done that
165
- if (aStr[0, 1] >= "\xB0") and (aStr[1, 1] >= "\xA1")
166
- bytes = aStr.bytes.to_a
167
- return 94 * (bytes[0] - 0xB0) + bytes[1] - 0xA1
168
- else
169
- return -1
170
- end
171
- end
172
- end
173
-
174
- class Big5DistributionAnalysis < CharDistributionAnalysis
175
- def initialize
176
- super
177
- @charToFreqOrder = Big5CharToFreqOrder
178
- @tableSize = BIG5_TABLE_SIZE
179
- @typicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
180
- end
181
-
182
- def get_order(aStr)
183
- # for big5 encoding, we are interested
184
- # first byte range: 0xa4 -- 0xfe
185
- # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
186
- # no validation needed here. State machine has done that
187
- if aStr[0, 1] >= "\xA4"
188
- bytes = aStr.bytes.to_a
189
- if aStr[1, 1] >= "\xA1"
190
- return 157 * (bytes[0] - 0xA4) + bytes[1] - 0xA1 + 63
191
- else
192
- return 157 * (bytes[0] - 0xA4) + bytes[1] - 0x40
193
- end
194
- else
195
- return -1
196
- end
197
- end
198
- end
199
-
200
- class SJISDistributionAnalysis < CharDistributionAnalysis
201
- def initialize
202
- super()
203
- @charToFreqOrder = JISCharToFreqOrder
204
- @tableSize = JIS_TABLE_SIZE
205
- @typicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
206
- end
207
-
208
- def get_order(aStr)
209
- # for sjis encoding, we are interested
210
- # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
211
- # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
212
- # no validation needed here. State machine has done that
213
- bytes = aStr.bytes.to_a
214
- if (aStr[0, 1] >= "\x81") and (aStr[0, 1] <= "\x9F")
215
- order = 188 * (bytes[0] - 0x81)
216
- elsif (aStr[0, 1] >= "\xE0") and (aStr[0, 1] <= "\xEF")
217
- order = 188 * (bytes[0] - 0xE0 + 31)
218
- else
219
- return -1
220
- end
221
- order = order + bytes[1] - 0x40
222
- if aStr[1, 1] > "\x7F"
223
- order =- 1
224
- end
225
- return order
226
- end
227
- end
228
-
229
- class EUCJPDistributionAnalysis < CharDistributionAnalysis
230
- def initialize
231
- super()
232
- @charToFreqOrder = JISCharToFreqOrder
233
- @tableSize = JIS_TABLE_SIZE
234
- @typicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
235
- end
236
-
237
- def get_order(aStr)
238
- # for euc-JP encoding, we are interested
239
- # first byte range: 0xa0 -- 0xfe
240
- # second byte range: 0xa1 -- 0xfe
241
- # no validation needed here. State machine has done that
242
- if aStr[0, 1] >= "\xA0"
243
- bytes = aStr.bytes.to_a
244
- return 94 * (bytes[0] - 0xA1) + bytes[1] - 0xa1
245
- else
246
- return -1
247
- end
248
- end
249
- end
250
- end
@@ -1,110 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is Mozilla Communicator client code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 1998
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- #
13
- # This library is free software; you can redistribute it and/or
14
- # modify it under the terms of the GNU Lesser General Public
15
- # License as published by the Free Software Foundation; either
16
- # version 2.1 of the License, or (at your option) any later version.
17
- #
18
- # This library is distributed in the hope that it will be useful,
19
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
- # Lesser General Public License for more details.
22
- #
23
- # You should have received a copy of the GNU Lesser General Public
24
- # License along with this library; if not, write to the Free Software
25
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
- # 02110-1301 USA
27
- ######################### END LICENSE BLOCK #########################
28
-
29
- module CharDet
30
- class CharSetGroupProber < CharSetProber
31
- attr_accessor :probers
32
- def initialize
33
- super
34
- @activeNum = 0
35
- @probers = []
36
- @bestGuessProber = nil
37
- end
38
-
39
- def reset
40
- super
41
- @activeNum = 0
42
-
43
- for prober in @probers
44
- if prober
45
- prober.reset()
46
- prober.active = true
47
- @activeNum += 1
48
- end
49
- end
50
- @bestGuessProber = nil
51
- end
52
-
53
- def get_charset_name
54
- if !@bestGuessProber
55
- get_confidence()
56
- if !@bestGuessProber
57
- return nil
58
- end
59
- end
60
- return @bestGuessProber.get_charset_name()
61
- end
62
-
63
- def feed(aBuf)
64
- for prober in @probers
65
- next unless prober
66
- next unless prober.active
67
- st = prober.feed(aBuf)
68
- next unless st
69
- if st == EFoundIt
70
- @bestGuessProber = prober
71
- return get_state()
72
- elsif st == ENotMe
73
- prober.active = false
74
- @activeNum -= 1
75
- if @activeNum <= 0
76
- @state = ENotMe
77
- return get_state()
78
- end
79
- end
80
- end
81
- return get_state()
82
- end
83
-
84
- def get_confidence()
85
- st = get_state()
86
- if st == EFoundIt
87
- return 0.99
88
- elsif st == ENotMe
89
- return 0.01
90
- end
91
- bestConf = 0.0
92
- @bestGuessProber = nil
93
- for prober in @probers
94
- next unless prober
95
- unless prober.active
96
- $stderr << "#{prober.get_charset_name()} not active\n" if $debug
97
- next
98
- end
99
- cf = prober.get_confidence()
100
- $stderr << "#{prober.get_charset_name} confidence = #{cf}\n" if $debug
101
- if bestConf < cf
102
- bestConf = cf
103
- @bestGuessProber = prober
104
- end
105
- end
106
- return 0.0 unless @bestGuessProber
107
- return bestConf
108
- end
109
- end
110
- end
@@ -1,70 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is Mozilla Universal charset detector code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 2001
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- # Shy Shalom - original C code
13
- #
14
- # This library is free software; you can redistribute it and/or
15
- # modify it under the terms of the GNU Lesser General Public
16
- # License as published by the Free Software Foundation; either
17
- # version 2.1 of the License, or (at your option) any later version.
18
- #
19
- # This library is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
- # Lesser General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Lesser General Public
25
- # License along with this library; if not, write to the Free Software
26
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
- # 02110-1301 USA
28
- ######################### END LICENSE BLOCK #########################
29
-
30
- module CharDet
31
- class CharSetProber
32
- attr_accessor :active
33
- def initialize
34
- end
35
-
36
- def reset
37
- @state = EDetecting
38
- end
39
-
40
- def get_charset_name
41
- return nil
42
- end
43
-
44
- def feed(aBuf)
45
- end
46
-
47
- def get_state
48
- return @state
49
- end
50
-
51
- def get_confidence
52
- return 0.0
53
- end
54
-
55
- def filter_high_bit_only(aBuf)
56
- newBuf = aBuf.gsub(/([\x00-\x7F])+/, ' ')
57
- return newBuf
58
- end
59
-
60
- def filter_without_english_letters(aBuf)
61
- newBuf = aBuf.gsub(/([A-Za-z])+/,' ')
62
- return newBuf
63
- end
64
-
65
- def filter_with_english_letters(aBuf)
66
- # TODO
67
- return aBuf
68
- end
69
- end
70
- end
@@ -1,67 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is mozilla.org code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 1998
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- #
13
- # This library is free software; you can redistribute it and/or
14
- # modify it under the terms of the GNU Lesser General Public
15
- # License as published by the Free Software Foundation; either
16
- # version 2.1 of the License, or (at your option) any later version.
17
- #
18
- # This library is distributed in the hope that it will be useful,
19
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
- # Lesser General Public License for more details.
22
- #
23
- # You should have received a copy of the GNU Lesser General Public
24
- # License along with this library; if not, write to the Free Software
25
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
- # 02110-1301 USA
27
- ######################### END LICENSE BLOCK #########################
28
-
29
- module CharDet
30
- class CodingStateMachine
31
- attr_accessor :active
32
-
33
- def initialize(sm)
34
- @model = sm
35
- @currentBytePos = 0
36
- @currentCharLen = 0
37
- reset()
38
- end
39
-
40
- def reset
41
- @currentState = EStart
42
- end
43
-
44
- def next_state(c)
45
- # for each byte we get its class
46
- # if it is first byte, we also get byte length
47
- b = c.bytes.first
48
- byteCls = @model['classTable'][b]
49
- if @currentState == EStart
50
- @currentBytePos = 0
51
- @currentCharLen = @model['charLenTable'][byteCls]
52
- end
53
- # from byte's class and stateTable, we get its next state
54
- @currentState = @model['stateTable'][@currentState * @model['classFactor'] + byteCls]
55
- @currentBytePos += 1
56
- return @currentState
57
- end
58
-
59
- def get_current_charlen
60
- return @currentCharLen
61
- end
62
-
63
- def get_coding_state_machine
64
- return @model['name']
65
- end
66
- end
67
- end
@@ -1,42 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is Mozilla Universal charset detector code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 2001
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- # Shy Shalom - original C code
13
- #
14
- # This library is free software; you can redistribute it and/or
15
- # modify it under the terms of the GNU Lesser General Public
16
- # License as published by the Free Software Foundation; either
17
- # version 2.1 of the License, or (at your option) any later version.
18
- #
19
- # This library is distributed in the hope that it will be useful,
20
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
- # Lesser General Public License for more details.
23
- #
24
- # You should have received a copy of the GNU Lesser General Public
25
- # License along with this library; if not, write to the Free Software
26
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
- # 02110-1301 USA
28
- ######################### END LICENSE BLOCK #########################
29
-
30
- module CharDet
31
- $debug = false
32
-
33
- EDetecting = 0
34
- EFoundIt = 1
35
- ENotMe = 2
36
-
37
- EStart = 0
38
- EError = 1
39
- EItsMe = 2
40
-
41
- SHORTCUT_THRESHOLD = 0.95
42
- end
@@ -1,90 +0,0 @@
1
- ######################## BEGIN LICENSE BLOCK ########################
2
- # The Original Code is mozilla.org code.
3
- #
4
- # The Initial Developer of the Original Code is
5
- # Netscape Communications Corporation.
6
- # Portions created by the Initial Developer are Copyright (C) 1998
7
- # the Initial Developer. All Rights Reserved.
8
- #
9
- # Contributor(s):
10
- # Jeff Hodges - port to Ruby
11
- # Mark Pilgrim - port to Python
12
- #
13
- # This library is free software; you can redistribute it and/or
14
- # modify it under the terms of the GNU Lesser General Public
15
- # License as published by the Free Software Foundation; either
16
- # version 2.1 of the License, or (at your option) any later version.
17
- #
18
- # This library is distributed in the hope that it will be useful,
19
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
- # Lesser General Public License for more details.
22
- #
23
- # You should have received a copy of the GNU Lesser General Public
24
- # License along with this library; if not, write to the Free Software
25
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
- # 02110-1301 USA
27
- ######################### END LICENSE BLOCK #########################
28
-
29
- module CharDet
30
- class EscCharSetProber < CharSetProber
31
- def initialize
32
- super()
33
- @codingSM = [
34
- CodingStateMachine.new(HZSMModel),
35
- CodingStateMachine.new(ISO2022CNSMModel),
36
- CodingStateMachine.new(ISO2022JPSMModel),
37
- CodingStateMachine.new(ISO2022KRSMModel)
38
- ]
39
- reset()
40
- end
41
-
42
- def reset
43
- super()
44
- for codingSM in @codingSM
45
- next if !codingSM
46
- codingSM.active = true
47
- codingSM.reset()
48
- end
49
- @activeSM = @codingSM.length
50
- @detectedCharset = nil
51
- end
52
-
53
- def get_charset_name
54
- return @detectedCharset
55
- end
56
-
57
- def get_confidence
58
- if @detectedCharset
59
- return 0.99
60
- else
61
- return 0.00
62
- end
63
- end
64
-
65
- def feed(aBuf)
66
- aBuf.each_byte do |b|
67
- c = b.chr
68
- for codingSM in @codingSM
69
- next unless codingSM
70
- next unless codingSM.active
71
- codingState = codingSM.next_state(c)
72
- if codingState == EError
73
- codingSM.active = false
74
- @activeSM -= 1
75
- if @activeSM <= 0
76
- @state = ENotMe
77
- return get_state()
78
- end
79
- elsif codingState == EItsMe
80
- @state = EFoundIt
81
- @detectedCharset = codingSM.get_coding_state_machine()
82
- return get_state()
83
- end
84
- end
85
- end
86
-
87
- return get_state()
88
- end
89
- end
90
- end