lg_pod_plugin 1.0.4 → 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/lib/git/author.rb +14 -0
  3. data/lib/git/base/factory.rb +101 -0
  4. data/lib/git/base.rb +670 -0
  5. data/lib/git/branch.rb +126 -0
  6. data/lib/git/branches.rb +71 -0
  7. data/lib/git/config.rb +22 -0
  8. data/lib/git/diff.rb +155 -0
  9. data/lib/git/encoding_utils.rb +33 -0
  10. data/lib/git/escaped_path.rb +77 -0
  11. data/lib/git/index.rb +5 -0
  12. data/lib/git/lib.rb +1215 -0
  13. data/lib/git/log.rb +135 -0
  14. data/lib/git/object.rb +312 -0
  15. data/lib/git/path.rb +31 -0
  16. data/lib/git/remote.rb +36 -0
  17. data/lib/git/repository.rb +6 -0
  18. data/lib/git/stash.rb +27 -0
  19. data/lib/git/stashes.rb +55 -0
  20. data/lib/git/status.rb +199 -0
  21. data/lib/git/url.rb +127 -0
  22. data/lib/git/version.rb +5 -0
  23. data/lib/git/working_directory.rb +4 -0
  24. data/lib/git/worktree.rb +38 -0
  25. data/lib/git/worktrees.rb +47 -0
  26. data/lib/git.rb +326 -0
  27. data/lib/lg_pod_plugin/database.rb +104 -104
  28. data/lib/lg_pod_plugin/{download.rb → downloader.rb} +1 -1
  29. data/lib/lg_pod_plugin/file_path.rb +1 -1
  30. data/lib/lg_pod_plugin/git_util.rb +154 -50
  31. data/lib/lg_pod_plugin/install.rb +27 -21
  32. data/lib/lg_pod_plugin/l_cache.rb +13 -14
  33. data/lib/lg_pod_plugin/l_util.rb +39 -0
  34. data/lib/lg_pod_plugin/request.rb +9 -10
  35. data/lib/lg_pod_plugin/version.rb +1 -1
  36. data/lib/lg_pod_plugin.rb +1 -3
  37. data/lib/rchardet/big5freq.rb +927 -0
  38. data/lib/rchardet/big5prober.rb +42 -0
  39. data/lib/rchardet/chardistribution.rb +250 -0
  40. data/lib/rchardet/charsetgroupprober.rb +110 -0
  41. data/lib/rchardet/charsetprober.rb +70 -0
  42. data/lib/rchardet/codingstatemachine.rb +67 -0
  43. data/lib/rchardet/constants.rb +42 -0
  44. data/lib/rchardet/escprober.rb +90 -0
  45. data/lib/rchardet/escsm.rb +245 -0
  46. data/lib/rchardet/eucjpprober.rb +88 -0
  47. data/lib/rchardet/euckrfreq.rb +597 -0
  48. data/lib/rchardet/euckrprober.rb +42 -0
  49. data/lib/rchardet/euctwfreq.rb +431 -0
  50. data/lib/rchardet/euctwprober.rb +42 -0
  51. data/lib/rchardet/gb18030freq.rb +474 -0
  52. data/lib/rchardet/gb18030prober.rb +42 -0
  53. data/lib/rchardet/hebrewprober.rb +289 -0
  54. data/lib/rchardet/jisfreq.rb +571 -0
  55. data/lib/rchardet/jpcntx.rb +229 -0
  56. data/lib/rchardet/langbulgarianmodel.rb +229 -0
  57. data/lib/rchardet/langcyrillicmodel.rb +330 -0
  58. data/lib/rchardet/langgreekmodel.rb +227 -0
  59. data/lib/rchardet/langhebrewmodel.rb +202 -0
  60. data/lib/rchardet/langhungarianmodel.rb +226 -0
  61. data/lib/rchardet/langthaimodel.rb +201 -0
  62. data/lib/rchardet/latin1prober.rb +147 -0
  63. data/lib/rchardet/mbcharsetprober.rb +89 -0
  64. data/lib/rchardet/mbcsgroupprober.rb +47 -0
  65. data/lib/rchardet/mbcssm.rb +542 -0
  66. data/lib/rchardet/sbcharsetprober.rb +122 -0
  67. data/lib/rchardet/sbcsgroupprober.rb +58 -0
  68. data/lib/rchardet/sjisprober.rb +88 -0
  69. data/lib/rchardet/universaldetector.rb +179 -0
  70. data/lib/rchardet/utf8prober.rb +87 -0
  71. data/lib/rchardet/version.rb +3 -0
  72. data/lib/rchardet.rb +67 -0
  73. data/lib/zip/central_directory.rb +212 -0
  74. data/lib/zip/compressor.rb +9 -0
  75. data/lib/zip/constants.rb +115 -0
  76. data/lib/zip/crypto/decrypted_io.rb +40 -0
  77. data/lib/zip/crypto/encryption.rb +11 -0
  78. data/lib/zip/crypto/null_encryption.rb +43 -0
  79. data/lib/zip/crypto/traditional_encryption.rb +99 -0
  80. data/lib/zip/decompressor.rb +31 -0
  81. data/lib/zip/deflater.rb +34 -0
  82. data/lib/zip/dos_time.rb +53 -0
  83. data/lib/zip/entry.rb +719 -0
  84. data/lib/zip/entry_set.rb +88 -0
  85. data/lib/zip/errors.rb +19 -0
  86. data/lib/zip/extra_field/generic.rb +44 -0
  87. data/lib/zip/extra_field/ntfs.rb +94 -0
  88. data/lib/zip/extra_field/old_unix.rb +46 -0
  89. data/lib/zip/extra_field/universal_time.rb +77 -0
  90. data/lib/zip/extra_field/unix.rb +39 -0
  91. data/lib/zip/extra_field/zip64.rb +70 -0
  92. data/lib/zip/extra_field/zip64_placeholder.rb +15 -0
  93. data/lib/zip/extra_field.rb +103 -0
  94. data/lib/zip/file.rb +468 -0
  95. data/lib/zip/filesystem.rb +643 -0
  96. data/lib/zip/inflater.rb +54 -0
  97. data/lib/zip/input_stream.rb +180 -0
  98. data/lib/zip/ioextras/abstract_input_stream.rb +122 -0
  99. data/lib/zip/ioextras/abstract_output_stream.rb +43 -0
  100. data/lib/zip/ioextras.rb +36 -0
  101. data/lib/zip/null_compressor.rb +15 -0
  102. data/lib/zip/null_decompressor.rb +19 -0
  103. data/lib/zip/null_input_stream.rb +10 -0
  104. data/lib/zip/output_stream.rb +198 -0
  105. data/lib/zip/pass_thru_compressor.rb +23 -0
  106. data/lib/zip/pass_thru_decompressor.rb +31 -0
  107. data/lib/zip/streamable_directory.rb +15 -0
  108. data/lib/zip/streamable_stream.rb +52 -0
  109. data/lib/zip/version.rb +3 -0
  110. data/lib/zip.rb +72 -0
  111. metadata +103 -31
@@ -0,0 +1,42 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Communicator client code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ class Big5Prober < MultiByteCharSetProber
31
+ def initialize
32
+ super
33
+ @codingSM = CodingStateMachine.new(Big5SMModel)
34
+ @distributionAnalyzer = Big5DistributionAnalysis.new()
35
+ reset()
36
+ end
37
+
38
+ def get_charset_name
39
+ return "Big5"
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,250 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Communicator client code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ ENOUGH_DATA_THRESHOLD = 1024
31
+ SURE_YES = 0.99
32
+ SURE_NO = 0.01
33
+
34
+ class CharDistributionAnalysis
35
+ def initialize
36
+ @charToFreqOrder = nil # Mapping table to get frequency order from char order (get from GetOrder())
37
+ @tableSize = nil # Size of above table
38
+ @typicalDistributionRatio = nil # This is a constant value which varies from language to language, used in calculating confidence. See http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html for further detail.
39
+ reset()
40
+ end
41
+
42
+ def reset
43
+ # # """reset analyser, clear any state"""
44
+ @done = false # If this flag is set to constants.True, detection is done and conclusion has been made
45
+ @totalChars = 0 # Total characters encountered
46
+ @freqChars = 0 # The number of characters whose frequency order is less than 512
47
+ end
48
+
49
+ def feed(aStr, aCharLen)
50
+ # # """feed a character with known length"""
51
+ if aCharLen == 2
52
+ # we only care about 2-bytes character in our distribution analysis
53
+ order = get_order(aStr)
54
+ else
55
+ order = -1
56
+ end
57
+ if order >= 0
58
+ @totalChars += 1
59
+ # order is valid
60
+ if order < @tableSize
61
+ if 512 > @charToFreqOrder[order]
62
+ @freqChars += 1
63
+ end
64
+ end
65
+ end
66
+ end
67
+
68
+ def get_confidence
69
+ # """return confidence based on existing data"""
70
+ # if we didn't receive any character in our consideration range, return negative answer
71
+ if @totalChars <= 0
72
+ return SURE_NO
73
+ end
74
+
75
+ if @totalChars != @freqChars
76
+ r = @freqChars / ((@totalChars - @freqChars) * @typicalDistributionRatio)
77
+ if r < SURE_YES
78
+ return r
79
+ end
80
+ end
81
+
82
+ # normalize confidence (we don't want to be 100% sure)
83
+ return SURE_YES
84
+ end
85
+
86
+ def got_enough_data
87
+ # It is not necessary to receive all data to draw conclusion. For charset detection,
88
+ # certain amount of data is enough
89
+ return @totalChars > ENOUGH_DATA_THRESHOLD
90
+ end
91
+
92
+ def get_order(aStr)
93
+ # We do not handle characters based on the original encoding string, but
94
+ # convert this encoding string to a number, here called order.
95
+ # This allows multiple encodings of a language to share one frequency table.
96
+ return -1
97
+ end
98
+ end
99
+
100
+ class EUCTWDistributionAnalysis < CharDistributionAnalysis
101
+ def initialize
102
+ super()
103
+ @charToFreqOrder = EUCTWCharToFreqOrder
104
+ @tableSize = EUCTW_TABLE_SIZE
105
+ @typicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
106
+ end
107
+
108
+ def get_order(aStr)
109
+ # for euc-TW encoding, we are interested
110
+ # first byte range: 0xc4 -- 0xfe
111
+ # second byte range: 0xa1 -- 0xfe
112
+ # no validation needed here. State machine has done that
113
+ if aStr[0, 1] >= "\xC4"
114
+ bytes = aStr.bytes.to_a
115
+ return 94 * (bytes[0] - 0xC4) + bytes[1] - 0xA1
116
+ else
117
+ return -1
118
+ end
119
+ end
120
+
121
+ def get_confidence
122
+ if @freqChars <= MINIMUM_DATA_THRESHOLD
123
+ return SURE_NO
124
+ end
125
+
126
+ super
127
+ end
128
+ end
129
+
130
+ class EUCKRDistributionAnalysis < CharDistributionAnalysis
131
+ def initialize
132
+ super()
133
+ @charToFreqOrder = EUCKRCharToFreqOrder
134
+ @tableSize = EUCKR_TABLE_SIZE
135
+ @typicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
136
+ end
137
+
138
+ def get_order(aStr)
139
+ # for euc-KR encoding, we are interested
140
+ # first byte range: 0xb0 -- 0xfe
141
+ # second byte range: 0xa1 -- 0xfe
142
+ # no validation needed here. State machine has done that
143
+ if aStr[0, 1] >= "\xB0"
144
+ bytes = aStr.bytes.to_a
145
+ return 94 * (bytes[0] - 0xB0) + bytes[1] - 0xA1
146
+ else
147
+ return -1
148
+ end
149
+ end
150
+ end
151
+
152
+ class GB18030DistributionAnalysis < CharDistributionAnalysis
153
+ def initialize
154
+ super()
155
+ @charToFreqOrder = GB18030CharToFreqOrder
156
+ @tableSize = GB18030_TABLE_SIZE
157
+ @typicalDistributionRatio = GB18030_TYPICAL_DISTRIBUTION_RATIO
158
+ end
159
+
160
+ def get_order(aStr)
161
+ # for GB18030 encoding, we are interested
162
+ # first byte range: 0xb0 -- 0xfe
163
+ # second byte range: 0xa1 -- 0xfe
164
+ # no validation needed here. State machine has done that
165
+ if (aStr[0, 1] >= "\xB0") and (aStr[1, 1] >= "\xA1")
166
+ bytes = aStr.bytes.to_a
167
+ return 94 * (bytes[0] - 0xB0) + bytes[1] - 0xA1
168
+ else
169
+ return -1
170
+ end
171
+ end
172
+ end
173
+
174
+ class Big5DistributionAnalysis < CharDistributionAnalysis
175
+ def initialize
176
+ super
177
+ @charToFreqOrder = Big5CharToFreqOrder
178
+ @tableSize = BIG5_TABLE_SIZE
179
+ @typicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
180
+ end
181
+
182
+ def get_order(aStr)
183
+ # for big5 encoding, we are interested
184
+ # first byte range: 0xa4 -- 0xfe
185
+ # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
186
+ # no validation needed here. State machine has done that
187
+ if aStr[0, 1] >= "\xA4"
188
+ bytes = aStr.bytes.to_a
189
+ if aStr[1, 1] >= "\xA1"
190
+ return 157 * (bytes[0] - 0xA4) + bytes[1] - 0xA1 + 63
191
+ else
192
+ return 157 * (bytes[0] - 0xA4) + bytes[1] - 0x40
193
+ end
194
+ else
195
+ return -1
196
+ end
197
+ end
198
+ end
199
+
200
+ class SJISDistributionAnalysis < CharDistributionAnalysis
201
+ def initialize
202
+ super()
203
+ @charToFreqOrder = JISCharToFreqOrder
204
+ @tableSize = JIS_TABLE_SIZE
205
+ @typicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
206
+ end
207
+
208
+ def get_order(aStr)
209
+ # for sjis encoding, we are interested
210
+ # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
211
+ # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
212
+ # no validation needed here. State machine has done that
213
+ bytes = aStr.bytes.to_a
214
+ if (aStr[0, 1] >= "\x81") and (aStr[0, 1] <= "\x9F")
215
+ order = 188 * (bytes[0] - 0x81)
216
+ elsif (aStr[0, 1] >= "\xE0") and (aStr[0, 1] <= "\xEF")
217
+ order = 188 * (bytes[0] - 0xE0 + 31)
218
+ else
219
+ return -1
220
+ end
221
+ order = order + bytes[1] - 0x40
222
+ if aStr[1, 1] > "\x7F"
223
+ order =- 1
224
+ end
225
+ return order
226
+ end
227
+ end
228
+
229
+ class EUCJPDistributionAnalysis < CharDistributionAnalysis
230
+ def initialize
231
+ super()
232
+ @charToFreqOrder = JISCharToFreqOrder
233
+ @tableSize = JIS_TABLE_SIZE
234
+ @typicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
235
+ end
236
+
237
+ def get_order(aStr)
238
+ # for euc-JP encoding, we are interested
239
+ # first byte range: 0xa0 -- 0xfe
240
+ # second byte range: 0xa1 -- 0xfe
241
+ # no validation needed here. State machine has done that
242
+ if aStr[0, 1] >= "\xA0"
243
+ bytes = aStr.bytes.to_a
244
+ return 94 * (bytes[0] - 0xA1) + bytes[1] - 0xa1
245
+ else
246
+ return -1
247
+ end
248
+ end
249
+ end
250
+ end
@@ -0,0 +1,110 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Communicator client code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ class CharSetGroupProber < CharSetProber
31
+ attr_accessor :probers
32
+ def initialize
33
+ super
34
+ @activeNum = 0
35
+ @probers = []
36
+ @bestGuessProber = nil
37
+ end
38
+
39
+ def reset
40
+ super
41
+ @activeNum = 0
42
+
43
+ for prober in @probers
44
+ if prober
45
+ prober.reset()
46
+ prober.active = true
47
+ @activeNum += 1
48
+ end
49
+ end
50
+ @bestGuessProber = nil
51
+ end
52
+
53
+ def get_charset_name
54
+ if !@bestGuessProber
55
+ get_confidence()
56
+ if !@bestGuessProber
57
+ return nil
58
+ end
59
+ end
60
+ return @bestGuessProber.get_charset_name()
61
+ end
62
+
63
+ def feed(aBuf)
64
+ for prober in @probers
65
+ next unless prober
66
+ next unless prober.active
67
+ st = prober.feed(aBuf)
68
+ next unless st
69
+ if st == EFoundIt
70
+ @bestGuessProber = prober
71
+ return get_state()
72
+ elsif st == ENotMe
73
+ prober.active = false
74
+ @activeNum -= 1
75
+ if @activeNum <= 0
76
+ @state = ENotMe
77
+ return get_state()
78
+ end
79
+ end
80
+ end
81
+ return get_state()
82
+ end
83
+
84
+ def get_confidence()
85
+ st = get_state()
86
+ if st == EFoundIt
87
+ return 0.99
88
+ elsif st == ENotMe
89
+ return 0.01
90
+ end
91
+ bestConf = 0.0
92
+ @bestGuessProber = nil
93
+ for prober in @probers
94
+ next unless prober
95
+ unless prober.active
96
+ $stderr << "#{prober.get_charset_name()} not active\n" if $debug
97
+ next
98
+ end
99
+ cf = prober.get_confidence()
100
+ $stderr << "#{prober.get_charset_name} confidence = #{cf}\n" if $debug
101
+ if bestConf < cf
102
+ bestConf = cf
103
+ @bestGuessProber = prober
104
+ end
105
+ end
106
+ return 0.0 unless @bestGuessProber
107
+ return bestConf
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,70 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Universal charset detector code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 2001
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ # Shy Shalom - original C code
13
+ #
14
+ # This library is free software; you can redistribute it and/or
15
+ # modify it under the terms of the GNU Lesser General Public
16
+ # License as published by the Free Software Foundation; either
17
+ # version 2.1 of the License, or (at your option) any later version.
18
+ #
19
+ # This library is distributed in the hope that it will be useful,
20
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
+ # Lesser General Public License for more details.
23
+ #
24
+ # You should have received a copy of the GNU Lesser General Public
25
+ # License along with this library; if not, write to the Free Software
26
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
+ # 02110-1301 USA
28
+ ######################### END LICENSE BLOCK #########################
29
+
30
+ module CharDet
31
+ class CharSetProber
32
+ attr_accessor :active
33
+ def initialize
34
+ end
35
+
36
+ def reset
37
+ @state = EDetecting
38
+ end
39
+
40
+ def get_charset_name
41
+ return nil
42
+ end
43
+
44
+ def feed(aBuf)
45
+ end
46
+
47
+ def get_state
48
+ return @state
49
+ end
50
+
51
+ def get_confidence
52
+ return 0.0
53
+ end
54
+
55
+ def filter_high_bit_only(aBuf)
56
+ newBuf = aBuf.gsub(/([\x00-\x7F])+/, ' ')
57
+ return newBuf
58
+ end
59
+
60
+ def filter_without_english_letters(aBuf)
61
+ newBuf = aBuf.gsub(/([A-Za-z])+/,' ')
62
+ return newBuf
63
+ end
64
+
65
+ def filter_with_english_letters(aBuf)
66
+ # TODO
67
+ return aBuf
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,67 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is mozilla.org code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ class CodingStateMachine
31
+ attr_accessor :active
32
+
33
+ def initialize(sm)
34
+ @model = sm
35
+ @currentBytePos = 0
36
+ @currentCharLen = 0
37
+ reset()
38
+ end
39
+
40
+ def reset
41
+ @currentState = EStart
42
+ end
43
+
44
+ def next_state(c)
45
+ # for each byte we get its class
46
+ # if it is first byte, we also get byte length
47
+ b = c.bytes.first
48
+ byteCls = @model['classTable'][b]
49
+ if @currentState == EStart
50
+ @currentBytePos = 0
51
+ @currentCharLen = @model['charLenTable'][byteCls]
52
+ end
53
+ # from byte's class and stateTable, we get its next state
54
+ @currentState = @model['stateTable'][@currentState * @model['classFactor'] + byteCls]
55
+ @currentBytePos += 1
56
+ return @currentState
57
+ end
58
+
59
+ def get_current_charlen
60
+ return @currentCharLen
61
+ end
62
+
63
+ def get_coding_state_machine
64
+ return @model['name']
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,42 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is Mozilla Universal charset detector code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 2001
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ # Shy Shalom - original C code
13
+ #
14
+ # This library is free software; you can redistribute it and/or
15
+ # modify it under the terms of the GNU Lesser General Public
16
+ # License as published by the Free Software Foundation; either
17
+ # version 2.1 of the License, or (at your option) any later version.
18
+ #
19
+ # This library is distributed in the hope that it will be useful,
20
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
+ # Lesser General Public License for more details.
23
+ #
24
+ # You should have received a copy of the GNU Lesser General Public
25
+ # License along with this library; if not, write to the Free Software
26
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
+ # 02110-1301 USA
28
+ ######################### END LICENSE BLOCK #########################
29
+
30
+ module CharDet
31
+ $debug = false
32
+
33
+ EDetecting = 0
34
+ EFoundIt = 1
35
+ ENotMe = 2
36
+
37
+ EStart = 0
38
+ EError = 1
39
+ EItsMe = 2
40
+
41
+ SHORTCUT_THRESHOLD = 0.95
42
+ end
@@ -0,0 +1,90 @@
1
+ ######################## BEGIN LICENSE BLOCK ########################
2
+ # The Original Code is mozilla.org code.
3
+ #
4
+ # The Initial Developer of the Original Code is
5
+ # Netscape Communications Corporation.
6
+ # Portions created by the Initial Developer are Copyright (C) 1998
7
+ # the Initial Developer. All Rights Reserved.
8
+ #
9
+ # Contributor(s):
10
+ # Jeff Hodges - port to Ruby
11
+ # Mark Pilgrim - port to Python
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2.1 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26
+ # 02110-1301 USA
27
+ ######################### END LICENSE BLOCK #########################
28
+
29
+ module CharDet
30
+ class EscCharSetProber < CharSetProber
31
+ def initialize
32
+ super()
33
+ @codingSM = [
34
+ CodingStateMachine.new(HZSMModel),
35
+ CodingStateMachine.new(ISO2022CNSMModel),
36
+ CodingStateMachine.new(ISO2022JPSMModel),
37
+ CodingStateMachine.new(ISO2022KRSMModel)
38
+ ]
39
+ reset()
40
+ end
41
+
42
+ def reset
43
+ super()
44
+ for codingSM in @codingSM
45
+ next if !codingSM
46
+ codingSM.active = true
47
+ codingSM.reset()
48
+ end
49
+ @activeSM = @codingSM.length
50
+ @detectedCharset = nil
51
+ end
52
+
53
+ def get_charset_name
54
+ return @detectedCharset
55
+ end
56
+
57
+ def get_confidence
58
+ if @detectedCharset
59
+ return 0.99
60
+ else
61
+ return 0.00
62
+ end
63
+ end
64
+
65
+ def feed(aBuf)
66
+ aBuf.each_byte do |b|
67
+ c = b.chr
68
+ for codingSM in @codingSM
69
+ next unless codingSM
70
+ next unless codingSM.active
71
+ codingState = codingSM.next_state(c)
72
+ if codingState == EError
73
+ codingSM.active = false
74
+ @activeSM -= 1
75
+ if @activeSM <= 0
76
+ @state = ENotMe
77
+ return get_state()
78
+ end
79
+ elsif codingState == EItsMe
80
+ @state = EFoundIt
81
+ @detectedCharset = codingSM.get_coding_state_machine()
82
+ return get_state()
83
+ end
84
+ end
85
+ end
86
+
87
+ return get_state()
88
+ end
89
+ end
90
+ end