lg_pod_plugin 1.0.4 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/git/author.rb +14 -0
- data/lib/git/base/factory.rb +101 -0
- data/lib/git/base.rb +670 -0
- data/lib/git/branch.rb +126 -0
- data/lib/git/branches.rb +71 -0
- data/lib/git/config.rb +22 -0
- data/lib/git/diff.rb +155 -0
- data/lib/git/encoding_utils.rb +33 -0
- data/lib/git/escaped_path.rb +77 -0
- data/lib/git/index.rb +5 -0
- data/lib/git/lib.rb +1215 -0
- data/lib/git/log.rb +135 -0
- data/lib/git/object.rb +312 -0
- data/lib/git/path.rb +31 -0
- data/lib/git/remote.rb +36 -0
- data/lib/git/repository.rb +6 -0
- data/lib/git/stash.rb +27 -0
- data/lib/git/stashes.rb +55 -0
- data/lib/git/status.rb +199 -0
- data/lib/git/url.rb +127 -0
- data/lib/git/version.rb +5 -0
- data/lib/git/working_directory.rb +4 -0
- data/lib/git/worktree.rb +38 -0
- data/lib/git/worktrees.rb +47 -0
- data/lib/git.rb +326 -0
- data/lib/lg_pod_plugin/database.rb +104 -104
- data/lib/lg_pod_plugin/{download.rb → downloader.rb} +1 -1
- data/lib/lg_pod_plugin/file_path.rb +1 -1
- data/lib/lg_pod_plugin/git_util.rb +154 -50
- data/lib/lg_pod_plugin/install.rb +27 -21
- data/lib/lg_pod_plugin/l_cache.rb +13 -14
- data/lib/lg_pod_plugin/l_util.rb +39 -0
- data/lib/lg_pod_plugin/request.rb +9 -10
- data/lib/lg_pod_plugin/version.rb +1 -1
- data/lib/lg_pod_plugin.rb +1 -3
- data/lib/rchardet/big5freq.rb +927 -0
- data/lib/rchardet/big5prober.rb +42 -0
- data/lib/rchardet/chardistribution.rb +250 -0
- data/lib/rchardet/charsetgroupprober.rb +110 -0
- data/lib/rchardet/charsetprober.rb +70 -0
- data/lib/rchardet/codingstatemachine.rb +67 -0
- data/lib/rchardet/constants.rb +42 -0
- data/lib/rchardet/escprober.rb +90 -0
- data/lib/rchardet/escsm.rb +245 -0
- data/lib/rchardet/eucjpprober.rb +88 -0
- data/lib/rchardet/euckrfreq.rb +597 -0
- data/lib/rchardet/euckrprober.rb +42 -0
- data/lib/rchardet/euctwfreq.rb +431 -0
- data/lib/rchardet/euctwprober.rb +42 -0
- data/lib/rchardet/gb18030freq.rb +474 -0
- data/lib/rchardet/gb18030prober.rb +42 -0
- data/lib/rchardet/hebrewprober.rb +289 -0
- data/lib/rchardet/jisfreq.rb +571 -0
- data/lib/rchardet/jpcntx.rb +229 -0
- data/lib/rchardet/langbulgarianmodel.rb +229 -0
- data/lib/rchardet/langcyrillicmodel.rb +330 -0
- data/lib/rchardet/langgreekmodel.rb +227 -0
- data/lib/rchardet/langhebrewmodel.rb +202 -0
- data/lib/rchardet/langhungarianmodel.rb +226 -0
- data/lib/rchardet/langthaimodel.rb +201 -0
- data/lib/rchardet/latin1prober.rb +147 -0
- data/lib/rchardet/mbcharsetprober.rb +89 -0
- data/lib/rchardet/mbcsgroupprober.rb +47 -0
- data/lib/rchardet/mbcssm.rb +542 -0
- data/lib/rchardet/sbcharsetprober.rb +122 -0
- data/lib/rchardet/sbcsgroupprober.rb +58 -0
- data/lib/rchardet/sjisprober.rb +88 -0
- data/lib/rchardet/universaldetector.rb +179 -0
- data/lib/rchardet/utf8prober.rb +87 -0
- data/lib/rchardet/version.rb +3 -0
- data/lib/rchardet.rb +67 -0
- data/lib/zip/central_directory.rb +212 -0
- data/lib/zip/compressor.rb +9 -0
- data/lib/zip/constants.rb +115 -0
- data/lib/zip/crypto/decrypted_io.rb +40 -0
- data/lib/zip/crypto/encryption.rb +11 -0
- data/lib/zip/crypto/null_encryption.rb +43 -0
- data/lib/zip/crypto/traditional_encryption.rb +99 -0
- data/lib/zip/decompressor.rb +31 -0
- data/lib/zip/deflater.rb +34 -0
- data/lib/zip/dos_time.rb +53 -0
- data/lib/zip/entry.rb +719 -0
- data/lib/zip/entry_set.rb +88 -0
- data/lib/zip/errors.rb +19 -0
- data/lib/zip/extra_field/generic.rb +44 -0
- data/lib/zip/extra_field/ntfs.rb +94 -0
- data/lib/zip/extra_field/old_unix.rb +46 -0
- data/lib/zip/extra_field/universal_time.rb +77 -0
- data/lib/zip/extra_field/unix.rb +39 -0
- data/lib/zip/extra_field/zip64.rb +70 -0
- data/lib/zip/extra_field/zip64_placeholder.rb +15 -0
- data/lib/zip/extra_field.rb +103 -0
- data/lib/zip/file.rb +468 -0
- data/lib/zip/filesystem.rb +643 -0
- data/lib/zip/inflater.rb +54 -0
- data/lib/zip/input_stream.rb +180 -0
- data/lib/zip/ioextras/abstract_input_stream.rb +122 -0
- data/lib/zip/ioextras/abstract_output_stream.rb +43 -0
- data/lib/zip/ioextras.rb +36 -0
- data/lib/zip/null_compressor.rb +15 -0
- data/lib/zip/null_decompressor.rb +19 -0
- data/lib/zip/null_input_stream.rb +10 -0
- data/lib/zip/output_stream.rb +198 -0
- data/lib/zip/pass_thru_compressor.rb +23 -0
- data/lib/zip/pass_thru_decompressor.rb +31 -0
- data/lib/zip/streamable_directory.rb +15 -0
- data/lib/zip/streamable_stream.rb +52 -0
- data/lib/zip/version.rb +3 -0
- data/lib/zip.rb +72 -0
- metadata +103 -31
@@ -0,0 +1,58 @@
|
|
1
|
+
######################## BEGIN LICENSE BLOCK ########################
|
2
|
+
# The Original Code is Mozilla Universal charset detector code.
|
3
|
+
#
|
4
|
+
# The Initial Developer of the Original Code is
|
5
|
+
# Netscape Communications Corporation.
|
6
|
+
# Portions created by the Initial Developer are Copyright (C) 2001
|
7
|
+
# the Initial Developer. All Rights Reserved.
|
8
|
+
#
|
9
|
+
# Contributor(s):
|
10
|
+
# Jeff Hodges - port to Ruby
|
11
|
+
# Mark Pilgrim - port to Python
|
12
|
+
# Shy Shalom - original C code
|
13
|
+
#
|
14
|
+
# This library is free software; you can redistribute it and/or
|
15
|
+
# modify it under the terms of the GNU Lesser General Public
|
16
|
+
# License as published by the Free Software Foundation; either
|
17
|
+
# version 2.1 of the License, or (at your option) any later version.
|
18
|
+
#
|
19
|
+
# This library is distributed in the hope that it will be useful,
|
20
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
21
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
22
|
+
# Lesser General Public License for more details.
|
23
|
+
#
|
24
|
+
# You should have received a copy of the GNU Lesser General Public
|
25
|
+
# License along with this library; if not, write to the Free Software
|
26
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
27
|
+
# 02110-1301 USA
|
28
|
+
######################### END LICENSE BLOCK #########################
|
29
|
+
|
30
|
+
module CharDet
|
31
|
+
class SBCSGroupProber < CharSetGroupProber
|
32
|
+
def initialize
|
33
|
+
super
|
34
|
+
@probers = [
|
35
|
+
SingleByteCharSetProber.new(Win1251CyrillicModel),
|
36
|
+
SingleByteCharSetProber.new(Koi8rModel),
|
37
|
+
SingleByteCharSetProber.new(Latin5CyrillicModel),
|
38
|
+
SingleByteCharSetProber.new(MacCyrillicModel),
|
39
|
+
SingleByteCharSetProber.new(Ibm866Model),
|
40
|
+
SingleByteCharSetProber.new(Ibm855Model),
|
41
|
+
SingleByteCharSetProber.new(Latin7GreekModel),
|
42
|
+
SingleByteCharSetProber.new(Win1253GreekModel),
|
43
|
+
SingleByteCharSetProber.new(Latin5BulgarianModel),
|
44
|
+
SingleByteCharSetProber.new(Win1251BulgarianModel),
|
45
|
+
SingleByteCharSetProber.new(Latin2HungarianModel),
|
46
|
+
SingleByteCharSetProber.new(Win1250HungarianModel),
|
47
|
+
SingleByteCharSetProber.new(TIS620ThaiModel),
|
48
|
+
]
|
49
|
+
hebrewProber = HebrewProber.new()
|
50
|
+
logicalHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, false, hebrewProber)
|
51
|
+
visualHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, true, hebrewProber)
|
52
|
+
hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
|
53
|
+
@probers += [hebrewProber, logicalHebrewProber, visualHebrewProber]
|
54
|
+
|
55
|
+
reset()
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
######################## BEGIN LICENSE BLOCK ########################
|
2
|
+
# The Original Code is mozilla.org code.
|
3
|
+
#
|
4
|
+
# The Initial Developer of the Original Code is
|
5
|
+
# Netscape Communications Corporation.
|
6
|
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
7
|
+
# the Initial Developer. All Rights Reserved.
|
8
|
+
#
|
9
|
+
# Contributor(s):
|
10
|
+
# Jeff Hodges - port to Ruby
|
11
|
+
# Mark Pilgrim - port to Python
|
12
|
+
#
|
13
|
+
# This library is free software; you can redistribute it and/or
|
14
|
+
# modify it under the terms of the GNU Lesser General Public
|
15
|
+
# License as published by the Free Software Foundation; either
|
16
|
+
# version 2.1 of the License, or (at your option) any later version.
|
17
|
+
#
|
18
|
+
# This library is distributed in the hope that it will be useful,
|
19
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
20
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
21
|
+
# Lesser General Public License for more details.
|
22
|
+
#
|
23
|
+
# You should have received a copy of the GNU Lesser General Public
|
24
|
+
# License along with this library; if not, write to the Free Software
|
25
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
26
|
+
# 02110-1301 USA
|
27
|
+
######################### END LICENSE BLOCK #########################
|
28
|
+
|
29
|
+
module CharDet
|
30
|
+
class SJISProber < MultiByteCharSetProber
|
31
|
+
def initialize
|
32
|
+
super()
|
33
|
+
@codingSM = CodingStateMachine.new(SJISSMModel)
|
34
|
+
@distributionAnalyzer = SJISDistributionAnalysis.new()
|
35
|
+
@contextAnalyzer = SJISContextAnalysis.new()
|
36
|
+
reset()
|
37
|
+
end
|
38
|
+
|
39
|
+
def reset
|
40
|
+
super()
|
41
|
+
@contextAnalyzer.reset()
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_charset_name
|
45
|
+
return "SHIFT_JIS"
|
46
|
+
end
|
47
|
+
|
48
|
+
def feed(aBuf)
|
49
|
+
aLen = aBuf.length
|
50
|
+
for i in (0...aLen)
|
51
|
+
codingState = @codingSM.next_state(aBuf[i,1])
|
52
|
+
if codingState == EError
|
53
|
+
$stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
|
54
|
+
@state = ENotMe
|
55
|
+
break
|
56
|
+
elsif codingState == EItsMe
|
57
|
+
@state = EFoundIt
|
58
|
+
break
|
59
|
+
elsif codingState == EStart
|
60
|
+
charLen = @codingSM.get_current_charlen()
|
61
|
+
if i == 0
|
62
|
+
@lastChar[1] = aBuf[0, 1]
|
63
|
+
@contextAnalyzer.feed(@lastChar[2-charLen, 1], charLen)
|
64
|
+
@distributionAnalyzer.feed(@lastChar, charLen)
|
65
|
+
else
|
66
|
+
@contextAnalyzer.feed(aBuf[i+1-charLen, 2], charLen)
|
67
|
+
@distributionAnalyzer.feed(aBuf[i-1, 2], charLen)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
@lastChar[0] = aBuf[aLen-1, 1]
|
73
|
+
|
74
|
+
if get_state() == EDetecting
|
75
|
+
if @contextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
|
76
|
+
@state = EFoundIt
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
return get_state()
|
81
|
+
end
|
82
|
+
|
83
|
+
def get_confidence
|
84
|
+
l = [@contextAnalyzer.get_confidence(), @distributionAnalyzer.get_confidence()]
|
85
|
+
return l.max
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,179 @@
|
|
1
|
+
# encoding: US-ASCII
|
2
|
+
######################## BEGIN LICENSE BLOCK ########################
|
3
|
+
# The Original Code is Mozilla Universal charset detector code.
|
4
|
+
#
|
5
|
+
# The Initial Developer of the Original Code is
|
6
|
+
# Netscape Communications Corporation.
|
7
|
+
# Portions created by the Initial Developer are Copyright (C) 2001
|
8
|
+
# the Initial Developer. All Rights Reserved.
|
9
|
+
#
|
10
|
+
# Contributor(s):
|
11
|
+
# Jeff Hodges - port to Ruby
|
12
|
+
# Mark Pilgrim - port to Python
|
13
|
+
# Shy Shalom - original C code
|
14
|
+
#
|
15
|
+
# This library is free software; you can redistribute it and/or
|
16
|
+
# modify it under the terms of the GNU Lesser General Public
|
17
|
+
# License as published by the Free Software Foundation; either
|
18
|
+
# version 2.1 of the License, or (at your option) any later version.
|
19
|
+
#
|
20
|
+
# This library is distributed in the hope that it will be useful,
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
23
|
+
# Lesser General Public License for more details.
|
24
|
+
#
|
25
|
+
# You should have received a copy of the GNU Lesser General Public
|
26
|
+
# License along with this library; if not, write to the Free Software
|
27
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
28
|
+
# 02110-1301 USA
|
29
|
+
######################### END LICENSE BLOCK #########################
|
30
|
+
|
31
|
+
module CharDet
|
32
|
+
MINIMUM_DATA_THRESHOLD = 4
|
33
|
+
MINIMUM_THRESHOLD = 0.20
|
34
|
+
EPureAscii = 0
|
35
|
+
EEscAscii = 1
|
36
|
+
EHighbyte = 2
|
37
|
+
|
38
|
+
class UniversalDetector
|
39
|
+
attr_reader :done, :result
|
40
|
+
|
41
|
+
def initialize
|
42
|
+
@highBitDetector = /[\x80-\xFF]/n
|
43
|
+
@escDetector = /(\033|\~\{)/n
|
44
|
+
@escCharSetProber = nil
|
45
|
+
@charSetProbers = []
|
46
|
+
reset()
|
47
|
+
end
|
48
|
+
|
49
|
+
def reset
|
50
|
+
@result = {'encoding' => nil, 'confidence' => 0.0}
|
51
|
+
@done = false
|
52
|
+
@start = true
|
53
|
+
@gotData = false
|
54
|
+
@inputState = EPureAscii
|
55
|
+
@lastChar = ''
|
56
|
+
if @escCharSetProber
|
57
|
+
@escCharSetProber.reset()
|
58
|
+
end
|
59
|
+
for prober in @charSetProbers
|
60
|
+
prober.reset()
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def feed(aBuf)
|
65
|
+
return if @done
|
66
|
+
|
67
|
+
aLen = aBuf.length
|
68
|
+
return if aLen == 0
|
69
|
+
|
70
|
+
if !@gotData
|
71
|
+
# If the data starts with BOM, we know it is UTF
|
72
|
+
if aBuf[0, 3] == "\xEF\xBB\xBF"
|
73
|
+
# EF BB BF UTF-8 with BOM
|
74
|
+
@result = {'encoding' => "UTF-8", 'confidence' => 1.0}
|
75
|
+
elsif aBuf[0, 4] == "\xFF\xFE\x00\x00"
|
76
|
+
# FF FE 00 00 UTF-32, little-endian BOM
|
77
|
+
@result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
|
78
|
+
elsif aBuf[0, 4] == "\x00\x00\xFE\xFF"
|
79
|
+
# 00 00 FE FF UTF-32, big-endian BOM
|
80
|
+
@result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
|
81
|
+
elsif aBuf[0, 4] == "\xFE\xFF\x00\x00"
|
82
|
+
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
83
|
+
@result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
|
84
|
+
elsif aBuf[0, 4] == "\x00\x00\xFF\xFE"
|
85
|
+
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
86
|
+
@result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
|
87
|
+
elsif aBuf[0, 2] == "\xFF\xFE"
|
88
|
+
# FF FE UTF-16, little endian BOM
|
89
|
+
@result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
|
90
|
+
elsif aBuf[0, 2] == "\xFE\xFF"
|
91
|
+
# FE FF UTF-16, big endian BOM
|
92
|
+
@result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
|
93
|
+
elsif aBuf[0, 3] == "\x2B\x2F\x76" && ["\x38", "\x39", "\x2B", "\x2F"].include?(aBuf[3, 1])
|
94
|
+
# NOTE: Ruby only includes "dummy" support for UTF-7.
|
95
|
+
# A Ruby UTF-7 string can't have methods called on it, nor can it be converted to anything else, but "BINARY"/"ASCII-8BIT".
|
96
|
+
# Still, this doesn't make detection useless, as UTF-7 encodings exist in the wild, and the scenario may need to be handled.
|
97
|
+
# 2B 2F 76 38 UTF-7
|
98
|
+
# 2B 2F 76 39 UTF-7
|
99
|
+
# 2B 2F 76 2B UTF-7
|
100
|
+
# 2B 2F 76 2F UTF-7
|
101
|
+
# 2B 2F 76 38 2D UTF-7 with no following character (empty string)
|
102
|
+
@result = {'encoding' => "UTF-7", 'confidence' => 0.99}
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
@gotData = true
|
107
|
+
if @result['encoding'] and (@result['confidence'] > 0.0)
|
108
|
+
@done = true
|
109
|
+
return
|
110
|
+
end
|
111
|
+
if @inputState == EPureAscii
|
112
|
+
if @highBitDetector =~ (aBuf)
|
113
|
+
@inputState = EHighbyte
|
114
|
+
elsif (@inputState == EPureAscii) and @escDetector =~ (@lastChar + aBuf)
|
115
|
+
@inputState = EEscAscii
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
@lastChar = aBuf[-1, 1]
|
120
|
+
if @inputState == EEscAscii
|
121
|
+
if !@escCharSetProber
|
122
|
+
@escCharSetProber = EscCharSetProber.new()
|
123
|
+
end
|
124
|
+
if @escCharSetProber.feed(aBuf) == EFoundIt
|
125
|
+
@result = {'encoding' => @escCharSetProber.get_charset_name(),
|
126
|
+
'confidence' => @escCharSetProber.get_confidence()
|
127
|
+
}
|
128
|
+
@done = true
|
129
|
+
end
|
130
|
+
elsif @inputState == EHighbyte
|
131
|
+
if @charSetProbers.nil? || @charSetProbers.empty?
|
132
|
+
@charSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
|
133
|
+
end
|
134
|
+
for prober in @charSetProbers
|
135
|
+
if prober.feed(aBuf) == EFoundIt
|
136
|
+
@result = {'encoding' => prober.get_charset_name(),
|
137
|
+
'confidence' => prober.get_confidence()}
|
138
|
+
@done = true
|
139
|
+
break
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
def close
|
147
|
+
return if @done
|
148
|
+
if !@gotData
|
149
|
+
$stderr << "no data received!\n" if $debug
|
150
|
+
return
|
151
|
+
end
|
152
|
+
@done = true
|
153
|
+
|
154
|
+
if @inputState == EPureAscii
|
155
|
+
@result = {'encoding' => 'ascii', 'confidence' => 1.0}
|
156
|
+
return @result
|
157
|
+
end
|
158
|
+
|
159
|
+
if @inputState == EHighbyte
|
160
|
+
confidences = {}
|
161
|
+
@charSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
|
162
|
+
maxProber = @charSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
|
163
|
+
if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
|
164
|
+
@result = {'encoding' => maxProber.get_charset_name(),
|
165
|
+
'confidence' => maxProber.get_confidence()}
|
166
|
+
return @result
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
if $debug
|
171
|
+
$stderr << "no probers hit minimum threshhold\n" if $debug
|
172
|
+
for prober in @charSetProbers[0].probers
|
173
|
+
next if !prober
|
174
|
+
$stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
######################## BEGIN LICENSE BLOCK ########################
|
2
|
+
# The Original Code is mozilla.org code.
|
3
|
+
#
|
4
|
+
# The Initial Developer of the Original Code is
|
5
|
+
# Netscape Communications Corporation.
|
6
|
+
# Portions created by the Initial Developer are Copyright (C) 1998
|
7
|
+
# the Initial Developer. All Rights Reserved.
|
8
|
+
#
|
9
|
+
# Contributor(s):
|
10
|
+
# Jeff Hodges - port to Ruby
|
11
|
+
# Mark Pilgrim - port to Python
|
12
|
+
#
|
13
|
+
# This library is free software; you can redistribute it and/or
|
14
|
+
# modify it under the terms of the GNU Lesser General Public
|
15
|
+
# License as published by the Free Software Foundation; either
|
16
|
+
# version 2.1 of the License, or (at your option) any later version.
|
17
|
+
#
|
18
|
+
# This library is distributed in the hope that it will be useful,
|
19
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
20
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
21
|
+
# Lesser General Public License for more details.
|
22
|
+
#
|
23
|
+
# You should have received a copy of the GNU Lesser General Public
|
24
|
+
# License along with this library; if not, write to the Free Software
|
25
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
26
|
+
# 02110-1301 USA
|
27
|
+
######################### END LICENSE BLOCK #########################
|
28
|
+
|
29
|
+
module CharDet
|
30
|
+
ONE_CHAR_PROB = 0.5
|
31
|
+
|
32
|
+
class UTF8Prober < CharSetProber
|
33
|
+
def initialize
|
34
|
+
super()
|
35
|
+
@codingSM = CodingStateMachine.new(UTF8SMModel)
|
36
|
+
reset()
|
37
|
+
end
|
38
|
+
|
39
|
+
def reset
|
40
|
+
super()
|
41
|
+
@codingSM.reset()
|
42
|
+
@numOfMBChar = 0
|
43
|
+
end
|
44
|
+
|
45
|
+
def get_charset_name
|
46
|
+
return "utf-8"
|
47
|
+
end
|
48
|
+
|
49
|
+
def feed(aBuf)
|
50
|
+
aBuf.each_byte do |b|
|
51
|
+
c = b.chr
|
52
|
+
codingState = @codingSM.next_state(c)
|
53
|
+
if codingState == EError
|
54
|
+
@state = ENotMe
|
55
|
+
break
|
56
|
+
elsif codingState == EItsMe
|
57
|
+
@state = EFoundIt
|
58
|
+
break
|
59
|
+
elsif codingState == EStart
|
60
|
+
if @codingSM.get_current_charlen() >= 2
|
61
|
+
@numOfMBChar += 1
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
if get_state() == EDetecting
|
67
|
+
if get_confidence() > SHORTCUT_THRESHOLD
|
68
|
+
@state = EFoundIt
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
return get_state()
|
73
|
+
end
|
74
|
+
|
75
|
+
def get_confidence
|
76
|
+
unlike = 0.99
|
77
|
+
if @numOfMBChar < 6
|
78
|
+
for i in (0...@numOfMBChar)
|
79
|
+
unlike = unlike * ONE_CHAR_PROB
|
80
|
+
end
|
81
|
+
return 1.0 - unlike
|
82
|
+
else
|
83
|
+
return unlike
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
data/lib/rchardet.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
######################## BEGIN LICENSE BLOCK ########################
|
2
|
+
# This library is free software; you can redistribute it and/or
|
3
|
+
# modify it under the terms of the GNU Lesser General Public
|
4
|
+
# License as published by the Free Software Foundation; either
|
5
|
+
# version 2.1 of the License, or (at your option) any later version.
|
6
|
+
#
|
7
|
+
# This library is distributed in the hope that it will be useful,
|
8
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
9
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
10
|
+
# Lesser General Public License for more details.
|
11
|
+
#
|
12
|
+
# You should have received a copy of the GNU Lesser General Public
|
13
|
+
# License along with this library; if not, write to the Free Software
|
14
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
15
|
+
# 02110-1301 USA
|
16
|
+
######################### END LICENSE BLOCK #########################
|
17
|
+
|
18
|
+
require 'rchardet/version'
|
19
|
+
require 'rchardet/charsetprober'
|
20
|
+
require 'rchardet/mbcharsetprober'
|
21
|
+
|
22
|
+
require 'rchardet/big5freq'
|
23
|
+
require 'rchardet/big5prober'
|
24
|
+
require 'rchardet/chardistribution'
|
25
|
+
require 'rchardet/charsetgroupprober'
|
26
|
+
|
27
|
+
require 'rchardet/codingstatemachine'
|
28
|
+
require 'rchardet/constants'
|
29
|
+
require 'rchardet/escprober'
|
30
|
+
require 'rchardet/escsm'
|
31
|
+
require 'rchardet/eucjpprober'
|
32
|
+
require 'rchardet/euckrfreq'
|
33
|
+
require 'rchardet/euckrprober'
|
34
|
+
require 'rchardet/euctwfreq'
|
35
|
+
require 'rchardet/euctwprober'
|
36
|
+
require 'rchardet/gb18030freq'
|
37
|
+
require 'rchardet/gb18030prober'
|
38
|
+
require 'rchardet/hebrewprober'
|
39
|
+
require 'rchardet/jisfreq'
|
40
|
+
require 'rchardet/jpcntx'
|
41
|
+
require 'rchardet/langbulgarianmodel'
|
42
|
+
require 'rchardet/langcyrillicmodel'
|
43
|
+
require 'rchardet/langgreekmodel'
|
44
|
+
require 'rchardet/langhebrewmodel'
|
45
|
+
require 'rchardet/langhungarianmodel'
|
46
|
+
require 'rchardet/langthaimodel'
|
47
|
+
require 'rchardet/latin1prober'
|
48
|
+
|
49
|
+
require 'rchardet/mbcsgroupprober'
|
50
|
+
require 'rchardet/mbcssm'
|
51
|
+
require 'rchardet/sbcharsetprober'
|
52
|
+
require 'rchardet/sbcsgroupprober'
|
53
|
+
require 'rchardet/sjisprober'
|
54
|
+
require 'rchardet/universaldetector'
|
55
|
+
require 'rchardet/utf8prober'
|
56
|
+
|
57
|
+
module CharDet
|
58
|
+
def CharDet.detect(aBuf)
|
59
|
+
aBuf = aBuf.dup.force_encoding(Encoding::BINARY)
|
60
|
+
|
61
|
+
u = UniversalDetector.new
|
62
|
+
u.reset
|
63
|
+
u.feed(aBuf)
|
64
|
+
u.close
|
65
|
+
u.result
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
module Zip
|
2
|
+
class CentralDirectory
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
END_OF_CDS = 0x06054b50
|
6
|
+
ZIP64_END_OF_CDS = 0x06064b50
|
7
|
+
ZIP64_EOCD_LOCATOR = 0x07064b50
|
8
|
+
MAX_END_OF_CDS_SIZE = 65_536 + 18
|
9
|
+
STATIC_EOCD_SIZE = 22
|
10
|
+
|
11
|
+
attr_reader :comment
|
12
|
+
|
13
|
+
# Returns an Enumerable containing the entries.
|
14
|
+
def entries
|
15
|
+
@entry_set.entries
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(entries = EntrySet.new, comment = '') #:nodoc:
|
19
|
+
super()
|
20
|
+
@entry_set = entries.kind_of?(EntrySet) ? entries : EntrySet.new(entries)
|
21
|
+
@comment = comment
|
22
|
+
end
|
23
|
+
|
24
|
+
def write_to_stream(io) #:nodoc:
|
25
|
+
cdir_offset = io.tell
|
26
|
+
@entry_set.each { |entry| entry.write_c_dir_entry(io) }
|
27
|
+
eocd_offset = io.tell
|
28
|
+
cdir_size = eocd_offset - cdir_offset
|
29
|
+
if ::Zip.write_zip64_support
|
30
|
+
need_zip64_eocd = cdir_offset > 0xFFFFFFFF || cdir_size > 0xFFFFFFFF || @entry_set.size > 0xFFFF
|
31
|
+
need_zip64_eocd ||= @entry_set.any? { |entry| entry.extra['Zip64'] }
|
32
|
+
if need_zip64_eocd
|
33
|
+
write_64_e_o_c_d(io, cdir_offset, cdir_size)
|
34
|
+
write_64_eocd_locator(io, eocd_offset)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
write_e_o_c_d(io, cdir_offset, cdir_size)
|
38
|
+
end
|
39
|
+
|
40
|
+
def write_e_o_c_d(io, offset, cdir_size) #:nodoc:
|
41
|
+
tmp = [
|
42
|
+
END_OF_CDS,
|
43
|
+
0, # @numberOfThisDisk
|
44
|
+
0, # @numberOfDiskWithStartOfCDir
|
45
|
+
@entry_set ? [@entry_set.size, 0xFFFF].min : 0,
|
46
|
+
@entry_set ? [@entry_set.size, 0xFFFF].min : 0,
|
47
|
+
[cdir_size, 0xFFFFFFFF].min,
|
48
|
+
[offset, 0xFFFFFFFF].min,
|
49
|
+
@comment ? @comment.bytesize : 0
|
50
|
+
]
|
51
|
+
io << tmp.pack('VvvvvVVv')
|
52
|
+
io << @comment
|
53
|
+
end
|
54
|
+
|
55
|
+
private :write_e_o_c_d
|
56
|
+
|
57
|
+
def write_64_e_o_c_d(io, offset, cdir_size) #:nodoc:
|
58
|
+
tmp = [
|
59
|
+
ZIP64_END_OF_CDS,
|
60
|
+
44, # size of zip64 end of central directory record (excludes signature and field itself)
|
61
|
+
VERSION_MADE_BY,
|
62
|
+
VERSION_NEEDED_TO_EXTRACT_ZIP64,
|
63
|
+
0, # @numberOfThisDisk
|
64
|
+
0, # @numberOfDiskWithStartOfCDir
|
65
|
+
@entry_set ? @entry_set.size : 0, # number of entries on this disk
|
66
|
+
@entry_set ? @entry_set.size : 0, # number of entries total
|
67
|
+
cdir_size, # size of central directory
|
68
|
+
offset # offset of start of central directory in its disk
|
69
|
+
]
|
70
|
+
io << tmp.pack('VQ<vvVVQ<Q<Q<Q<')
|
71
|
+
end
|
72
|
+
|
73
|
+
private :write_64_e_o_c_d
|
74
|
+
|
75
|
+
def write_64_eocd_locator(io, zip64_eocd_offset)
|
76
|
+
tmp = [
|
77
|
+
ZIP64_EOCD_LOCATOR,
|
78
|
+
0, # number of disk containing the start of zip64 eocd record
|
79
|
+
zip64_eocd_offset, # offset of the start of zip64 eocd record in its disk
|
80
|
+
1 # total number of disks
|
81
|
+
]
|
82
|
+
io << tmp.pack('VVQ<V')
|
83
|
+
end
|
84
|
+
|
85
|
+
private :write_64_eocd_locator
|
86
|
+
|
87
|
+
def read_64_e_o_c_d(buf) #:nodoc:
|
88
|
+
buf = get_64_e_o_c_d(buf)
|
89
|
+
@size_of_zip64_e_o_c_d = Entry.read_zip_64_long(buf)
|
90
|
+
@version_made_by = Entry.read_zip_short(buf)
|
91
|
+
@version_needed_for_extract = Entry.read_zip_short(buf)
|
92
|
+
@number_of_this_disk = Entry.read_zip_long(buf)
|
93
|
+
@number_of_disk_with_start_of_cdir = Entry.read_zip_long(buf)
|
94
|
+
@total_number_of_entries_in_cdir_on_this_disk = Entry.read_zip_64_long(buf)
|
95
|
+
@size = Entry.read_zip_64_long(buf)
|
96
|
+
@size_in_bytes = Entry.read_zip_64_long(buf)
|
97
|
+
@cdir_offset = Entry.read_zip_64_long(buf)
|
98
|
+
@zip_64_extensible = buf.slice!(0, buf.bytesize)
|
99
|
+
raise Error, 'Zip consistency problem while reading eocd structure' unless buf.empty?
|
100
|
+
end
|
101
|
+
|
102
|
+
def read_e_o_c_d(buf) #:nodoc:
|
103
|
+
buf = get_e_o_c_d(buf)
|
104
|
+
@number_of_this_disk = Entry.read_zip_short(buf)
|
105
|
+
@number_of_disk_with_start_of_cdir = Entry.read_zip_short(buf)
|
106
|
+
@total_number_of_entries_in_cdir_on_this_disk = Entry.read_zip_short(buf)
|
107
|
+
@size = Entry.read_zip_short(buf)
|
108
|
+
@size_in_bytes = Entry.read_zip_long(buf)
|
109
|
+
@cdir_offset = Entry.read_zip_long(buf)
|
110
|
+
comment_length = Entry.read_zip_short(buf)
|
111
|
+
@comment = if comment_length.to_i <= 0
|
112
|
+
buf.slice!(0, buf.size)
|
113
|
+
else
|
114
|
+
buf.read(comment_length)
|
115
|
+
end
|
116
|
+
raise Error, 'Zip consistency problem while reading eocd structure' unless buf.empty?
|
117
|
+
end
|
118
|
+
|
119
|
+
def read_central_directory_entries(io) #:nodoc:
|
120
|
+
begin
|
121
|
+
io.seek(@cdir_offset, IO::SEEK_SET)
|
122
|
+
rescue Errno::EINVAL
|
123
|
+
raise Error, 'Zip consistency problem while reading central directory entry'
|
124
|
+
end
|
125
|
+
@entry_set = EntrySet.new
|
126
|
+
@size.times do
|
127
|
+
@entry_set << Entry.read_c_dir_entry(io)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def read_from_stream(io) #:nodoc:
|
132
|
+
buf = start_buf(io)
|
133
|
+
if zip64_file?(buf)
|
134
|
+
read_64_e_o_c_d(buf)
|
135
|
+
else
|
136
|
+
read_e_o_c_d(buf)
|
137
|
+
end
|
138
|
+
read_central_directory_entries(io)
|
139
|
+
end
|
140
|
+
|
141
|
+
def get_e_o_c_d(buf) #:nodoc:
|
142
|
+
sig_index = buf.rindex([END_OF_CDS].pack('V'))
|
143
|
+
raise Error, 'Zip end of central directory signature not found' unless sig_index
|
144
|
+
|
145
|
+
buf = buf.slice!((sig_index + 4)..(buf.bytesize))
|
146
|
+
|
147
|
+
def buf.read(count)
|
148
|
+
slice!(0, count)
|
149
|
+
end
|
150
|
+
|
151
|
+
buf
|
152
|
+
end
|
153
|
+
|
154
|
+
def zip64_file?(buf)
|
155
|
+
buf.rindex([ZIP64_END_OF_CDS].pack('V')) && buf.rindex([ZIP64_EOCD_LOCATOR].pack('V'))
|
156
|
+
end
|
157
|
+
|
158
|
+
def start_buf(io)
|
159
|
+
begin
|
160
|
+
io.seek(-MAX_END_OF_CDS_SIZE, IO::SEEK_END)
|
161
|
+
rescue Errno::EINVAL
|
162
|
+
io.seek(0, IO::SEEK_SET)
|
163
|
+
end
|
164
|
+
io.read
|
165
|
+
end
|
166
|
+
|
167
|
+
def get_64_e_o_c_d(buf) #:nodoc:
|
168
|
+
zip_64_start = buf.rindex([ZIP64_END_OF_CDS].pack('V'))
|
169
|
+
raise Error, 'Zip64 end of central directory signature not found' unless zip_64_start
|
170
|
+
|
171
|
+
zip_64_locator = buf.rindex([ZIP64_EOCD_LOCATOR].pack('V'))
|
172
|
+
raise Error, 'Zip64 end of central directory signature locator not found' unless zip_64_locator
|
173
|
+
|
174
|
+
buf = buf.slice!((zip_64_start + 4)..zip_64_locator)
|
175
|
+
|
176
|
+
def buf.read(count)
|
177
|
+
slice!(0, count)
|
178
|
+
end
|
179
|
+
|
180
|
+
buf
|
181
|
+
end
|
182
|
+
|
183
|
+
# For iterating over the entries.
|
184
|
+
def each(&a_proc)
|
185
|
+
@entry_set.each(&a_proc)
|
186
|
+
end
|
187
|
+
|
188
|
+
# Returns the number of entries in the central directory (and
|
189
|
+
# consequently in the zip archive).
|
190
|
+
def size
|
191
|
+
@entry_set.size
|
192
|
+
end
|
193
|
+
|
194
|
+
def self.read_from_stream(io) #:nodoc:
|
195
|
+
cdir = new
|
196
|
+
cdir.read_from_stream(io)
|
197
|
+
cdir
|
198
|
+
rescue Error
|
199
|
+
nil
|
200
|
+
end
|
201
|
+
|
202
|
+
def ==(other) #:nodoc:
|
203
|
+
return false unless other.kind_of?(CentralDirectory)
|
204
|
+
|
205
|
+
@entry_set.entries.sort == other.entries.sort && comment == other.comment
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Copyright (C) 2002, 2003 Thomas Sondergaard
|
211
|
+
# rubyzip is free software; you can redistribute it and/or
|
212
|
+
# modify it under the terms of the ruby license.
|