lg_pod_plugin 1.0.7 → 1.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/lg +5 -0
- data/lib/command/cache.rb +22 -18
- data/lib/command/command.rb +27 -35
- data/lib/command/install.rb +52 -0
- data/lib/command/update.rb +39 -0
- data/lib/lg_pod_plugin/downloader.rb +12 -17
- data/lib/lg_pod_plugin/git_util.rb +150 -106
- data/lib/lg_pod_plugin/install.rb +74 -88
- data/lib/lg_pod_plugin/l_cache.rb +11 -77
- data/lib/lg_pod_plugin/l_util.rb +6 -2
- data/lib/lg_pod_plugin/request.rb +90 -82
- data/lib/lg_pod_plugin/version.rb +1 -1
- data/lib/lg_pod_plugin.rb +11 -7
- metadata +57 -118
- data/lib/git/author.rb +0 -14
- data/lib/git/base/factory.rb +0 -101
- data/lib/git/base.rb +0 -670
- data/lib/git/branch.rb +0 -126
- data/lib/git/branches.rb +0 -71
- data/lib/git/config.rb +0 -22
- data/lib/git/diff.rb +0 -155
- data/lib/git/encoding_utils.rb +0 -33
- data/lib/git/escaped_path.rb +0 -77
- data/lib/git/index.rb +0 -5
- data/lib/git/lib.rb +0 -1215
- data/lib/git/log.rb +0 -135
- data/lib/git/object.rb +0 -312
- data/lib/git/path.rb +0 -31
- data/lib/git/remote.rb +0 -36
- data/lib/git/repository.rb +0 -6
- data/lib/git/stash.rb +0 -27
- data/lib/git/stashes.rb +0 -55
- data/lib/git/status.rb +0 -199
- data/lib/git/url.rb +0 -127
- data/lib/git/version.rb +0 -5
- data/lib/git/working_directory.rb +0 -4
- data/lib/git/worktree.rb +0 -38
- data/lib/git/worktrees.rb +0 -47
- data/lib/git.rb +0 -326
- data/lib/rchardet/big5freq.rb +0 -927
- data/lib/rchardet/big5prober.rb +0 -42
- data/lib/rchardet/chardistribution.rb +0 -250
- data/lib/rchardet/charsetgroupprober.rb +0 -110
- data/lib/rchardet/charsetprober.rb +0 -70
- data/lib/rchardet/codingstatemachine.rb +0 -67
- data/lib/rchardet/constants.rb +0 -42
- data/lib/rchardet/escprober.rb +0 -90
- data/lib/rchardet/escsm.rb +0 -245
- data/lib/rchardet/eucjpprober.rb +0 -88
- data/lib/rchardet/euckrfreq.rb +0 -597
- data/lib/rchardet/euckrprober.rb +0 -42
- data/lib/rchardet/euctwfreq.rb +0 -431
- data/lib/rchardet/euctwprober.rb +0 -42
- data/lib/rchardet/gb18030freq.rb +0 -474
- data/lib/rchardet/gb18030prober.rb +0 -42
- data/lib/rchardet/hebrewprober.rb +0 -289
- data/lib/rchardet/jisfreq.rb +0 -571
- data/lib/rchardet/jpcntx.rb +0 -229
- data/lib/rchardet/langbulgarianmodel.rb +0 -229
- data/lib/rchardet/langcyrillicmodel.rb +0 -330
- data/lib/rchardet/langgreekmodel.rb +0 -227
- data/lib/rchardet/langhebrewmodel.rb +0 -202
- data/lib/rchardet/langhungarianmodel.rb +0 -226
- data/lib/rchardet/langthaimodel.rb +0 -201
- data/lib/rchardet/latin1prober.rb +0 -147
- data/lib/rchardet/mbcharsetprober.rb +0 -89
- data/lib/rchardet/mbcsgroupprober.rb +0 -47
- data/lib/rchardet/mbcssm.rb +0 -542
- data/lib/rchardet/sbcharsetprober.rb +0 -122
- data/lib/rchardet/sbcsgroupprober.rb +0 -58
- data/lib/rchardet/sjisprober.rb +0 -88
- data/lib/rchardet/universaldetector.rb +0 -179
- data/lib/rchardet/utf8prober.rb +0 -87
- data/lib/rchardet/version.rb +0 -3
- data/lib/rchardet.rb +0 -67
- data/lib/zip/central_directory.rb +0 -212
- data/lib/zip/compressor.rb +0 -9
- data/lib/zip/constants.rb +0 -115
- data/lib/zip/crypto/decrypted_io.rb +0 -40
- data/lib/zip/crypto/encryption.rb +0 -11
- data/lib/zip/crypto/null_encryption.rb +0 -43
- data/lib/zip/crypto/traditional_encryption.rb +0 -99
- data/lib/zip/decompressor.rb +0 -31
- data/lib/zip/deflater.rb +0 -34
- data/lib/zip/dos_time.rb +0 -53
- data/lib/zip/entry.rb +0 -719
- data/lib/zip/entry_set.rb +0 -88
- data/lib/zip/errors.rb +0 -19
- data/lib/zip/extra_field/generic.rb +0 -44
- data/lib/zip/extra_field/ntfs.rb +0 -94
- data/lib/zip/extra_field/old_unix.rb +0 -46
- data/lib/zip/extra_field/universal_time.rb +0 -77
- data/lib/zip/extra_field/unix.rb +0 -39
- data/lib/zip/extra_field/zip64.rb +0 -70
- data/lib/zip/extra_field/zip64_placeholder.rb +0 -15
- data/lib/zip/extra_field.rb +0 -103
- data/lib/zip/file.rb +0 -468
- data/lib/zip/filesystem.rb +0 -643
- data/lib/zip/inflater.rb +0 -54
- data/lib/zip/input_stream.rb +0 -180
- data/lib/zip/ioextras/abstract_input_stream.rb +0 -122
- data/lib/zip/ioextras/abstract_output_stream.rb +0 -43
- data/lib/zip/ioextras.rb +0 -36
- data/lib/zip/null_compressor.rb +0 -15
- data/lib/zip/null_decompressor.rb +0 -19
- data/lib/zip/null_input_stream.rb +0 -10
- data/lib/zip/output_stream.rb +0 -198
- data/lib/zip/pass_thru_compressor.rb +0 -23
- data/lib/zip/pass_thru_decompressor.rb +0 -31
- data/lib/zip/streamable_directory.rb +0 -15
- data/lib/zip/streamable_stream.rb +0 -52
- data/lib/zip/version.rb +0 -3
- data/lib/zip.rb +0 -72
@@ -1,58 +0,0 @@
|
|
1
|
-
######################## BEGIN LICENSE BLOCK ########################
|
2
|
-
# The Original Code is Mozilla Universal charset detector code.
|
3
|
-
#
|
4
|
-
# The Initial Developer of the Original Code is
|
5
|
-
# Netscape Communications Corporation.
|
6
|
-
# Portions created by the Initial Developer are Copyright (C) 2001
|
7
|
-
# the Initial Developer. All Rights Reserved.
|
8
|
-
#
|
9
|
-
# Contributor(s):
|
10
|
-
# Jeff Hodges - port to Ruby
|
11
|
-
# Mark Pilgrim - port to Python
|
12
|
-
# Shy Shalom - original C code
|
13
|
-
#
|
14
|
-
# This library is free software; you can redistribute it and/or
|
15
|
-
# modify it under the terms of the GNU Lesser General Public
|
16
|
-
# License as published by the Free Software Foundation; either
|
17
|
-
# version 2.1 of the License, or (at your option) any later version.
|
18
|
-
#
|
19
|
-
# This library is distributed in the hope that it will be useful,
|
20
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
21
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
22
|
-
# Lesser General Public License for more details.
|
23
|
-
#
|
24
|
-
# You should have received a copy of the GNU Lesser General Public
|
25
|
-
# License along with this library; if not, write to the Free Software
|
26
|
-
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
27
|
-
# 02110-1301 USA
|
28
|
-
######################### END LICENSE BLOCK #########################
|
29
|
-
|
30
|
-
module CharDet
|
31
|
-
class SBCSGroupProber < CharSetGroupProber
|
32
|
-
def initialize
|
33
|
-
super
|
34
|
-
@probers = [
|
35
|
-
SingleByteCharSetProber.new(Win1251CyrillicModel),
|
36
|
-
SingleByteCharSetProber.new(Koi8rModel),
|
37
|
-
SingleByteCharSetProber.new(Latin5CyrillicModel),
|
38
|
-
SingleByteCharSetProber.new(MacCyrillicModel),
|
39
|
-
SingleByteCharSetProber.new(Ibm866Model),
|
40
|
-
SingleByteCharSetProber.new(Ibm855Model),
|
41
|
-
SingleByteCharSetProber.new(Latin7GreekModel),
|
42
|
-
SingleByteCharSetProber.new(Win1253GreekModel),
|
43
|
-
SingleByteCharSetProber.new(Latin5BulgarianModel),
|
44
|
-
SingleByteCharSetProber.new(Win1251BulgarianModel),
|
45
|
-
SingleByteCharSetProber.new(Latin2HungarianModel),
|
46
|
-
SingleByteCharSetProber.new(Win1250HungarianModel),
|
47
|
-
SingleByteCharSetProber.new(TIS620ThaiModel),
|
48
|
-
]
|
49
|
-
hebrewProber = HebrewProber.new()
|
50
|
-
logicalHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, false, hebrewProber)
|
51
|
-
visualHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, true, hebrewProber)
|
52
|
-
hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
|
53
|
-
@probers += [hebrewProber, logicalHebrewProber, visualHebrewProber]
|
54
|
-
|
55
|
-
reset()
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
data/lib/rchardet/sjisprober.rb
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
######################## BEGIN LICENSE BLOCK ########################
|
2
|
-
# The Original Code is mozilla.org code.
|
3
|
-
#
|
4
|
-
# The Initial Developer of the Original Code is
|
5
|
-
# Netscape Communications Corporation.
|
6
|
-
# Portions created by the Initial Developer are Copyright (C) 1998
|
7
|
-
# the Initial Developer. All Rights Reserved.
|
8
|
-
#
|
9
|
-
# Contributor(s):
|
10
|
-
# Jeff Hodges - port to Ruby
|
11
|
-
# Mark Pilgrim - port to Python
|
12
|
-
#
|
13
|
-
# This library is free software; you can redistribute it and/or
|
14
|
-
# modify it under the terms of the GNU Lesser General Public
|
15
|
-
# License as published by the Free Software Foundation; either
|
16
|
-
# version 2.1 of the License, or (at your option) any later version.
|
17
|
-
#
|
18
|
-
# This library is distributed in the hope that it will be useful,
|
19
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
20
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
21
|
-
# Lesser General Public License for more details.
|
22
|
-
#
|
23
|
-
# You should have received a copy of the GNU Lesser General Public
|
24
|
-
# License along with this library; if not, write to the Free Software
|
25
|
-
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
26
|
-
# 02110-1301 USA
|
27
|
-
######################### END LICENSE BLOCK #########################
|
28
|
-
|
29
|
-
module CharDet
|
30
|
-
class SJISProber < MultiByteCharSetProber
|
31
|
-
def initialize
|
32
|
-
super()
|
33
|
-
@codingSM = CodingStateMachine.new(SJISSMModel)
|
34
|
-
@distributionAnalyzer = SJISDistributionAnalysis.new()
|
35
|
-
@contextAnalyzer = SJISContextAnalysis.new()
|
36
|
-
reset()
|
37
|
-
end
|
38
|
-
|
39
|
-
def reset
|
40
|
-
super()
|
41
|
-
@contextAnalyzer.reset()
|
42
|
-
end
|
43
|
-
|
44
|
-
def get_charset_name
|
45
|
-
return "SHIFT_JIS"
|
46
|
-
end
|
47
|
-
|
48
|
-
def feed(aBuf)
|
49
|
-
aLen = aBuf.length
|
50
|
-
for i in (0...aLen)
|
51
|
-
codingState = @codingSM.next_state(aBuf[i,1])
|
52
|
-
if codingState == EError
|
53
|
-
$stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
|
54
|
-
@state = ENotMe
|
55
|
-
break
|
56
|
-
elsif codingState == EItsMe
|
57
|
-
@state = EFoundIt
|
58
|
-
break
|
59
|
-
elsif codingState == EStart
|
60
|
-
charLen = @codingSM.get_current_charlen()
|
61
|
-
if i == 0
|
62
|
-
@lastChar[1] = aBuf[0, 1]
|
63
|
-
@contextAnalyzer.feed(@lastChar[2-charLen, 1], charLen)
|
64
|
-
@distributionAnalyzer.feed(@lastChar, charLen)
|
65
|
-
else
|
66
|
-
@contextAnalyzer.feed(aBuf[i+1-charLen, 2], charLen)
|
67
|
-
@distributionAnalyzer.feed(aBuf[i-1, 2], charLen)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
@lastChar[0] = aBuf[aLen-1, 1]
|
73
|
-
|
74
|
-
if get_state() == EDetecting
|
75
|
-
if @contextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
|
76
|
-
@state = EFoundIt
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
return get_state()
|
81
|
-
end
|
82
|
-
|
83
|
-
def get_confidence
|
84
|
-
l = [@contextAnalyzer.get_confidence(), @distributionAnalyzer.get_confidence()]
|
85
|
-
return l.max
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
@@ -1,179 +0,0 @@
|
|
1
|
-
# encoding: US-ASCII
|
2
|
-
######################## BEGIN LICENSE BLOCK ########################
|
3
|
-
# The Original Code is Mozilla Universal charset detector code.
|
4
|
-
#
|
5
|
-
# The Initial Developer of the Original Code is
|
6
|
-
# Netscape Communications Corporation.
|
7
|
-
# Portions created by the Initial Developer are Copyright (C) 2001
|
8
|
-
# the Initial Developer. All Rights Reserved.
|
9
|
-
#
|
10
|
-
# Contributor(s):
|
11
|
-
# Jeff Hodges - port to Ruby
|
12
|
-
# Mark Pilgrim - port to Python
|
13
|
-
# Shy Shalom - original C code
|
14
|
-
#
|
15
|
-
# This library is free software; you can redistribute it and/or
|
16
|
-
# modify it under the terms of the GNU Lesser General Public
|
17
|
-
# License as published by the Free Software Foundation; either
|
18
|
-
# version 2.1 of the License, or (at your option) any later version.
|
19
|
-
#
|
20
|
-
# This library is distributed in the hope that it will be useful,
|
21
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
22
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
23
|
-
# Lesser General Public License for more details.
|
24
|
-
#
|
25
|
-
# You should have received a copy of the GNU Lesser General Public
|
26
|
-
# License along with this library; if not, write to the Free Software
|
27
|
-
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
28
|
-
# 02110-1301 USA
|
29
|
-
######################### END LICENSE BLOCK #########################
|
30
|
-
|
31
|
-
module CharDet
|
32
|
-
MINIMUM_DATA_THRESHOLD = 4
|
33
|
-
MINIMUM_THRESHOLD = 0.20
|
34
|
-
EPureAscii = 0
|
35
|
-
EEscAscii = 1
|
36
|
-
EHighbyte = 2
|
37
|
-
|
38
|
-
class UniversalDetector
|
39
|
-
attr_reader :done, :result
|
40
|
-
|
41
|
-
def initialize
|
42
|
-
@highBitDetector = /[\x80-\xFF]/n
|
43
|
-
@escDetector = /(\033|\~\{)/n
|
44
|
-
@escCharSetProber = nil
|
45
|
-
@charSetProbers = []
|
46
|
-
reset()
|
47
|
-
end
|
48
|
-
|
49
|
-
def reset
|
50
|
-
@result = {'encoding' => nil, 'confidence' => 0.0}
|
51
|
-
@done = false
|
52
|
-
@start = true
|
53
|
-
@gotData = false
|
54
|
-
@inputState = EPureAscii
|
55
|
-
@lastChar = ''
|
56
|
-
if @escCharSetProber
|
57
|
-
@escCharSetProber.reset()
|
58
|
-
end
|
59
|
-
for prober in @charSetProbers
|
60
|
-
prober.reset()
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
def feed(aBuf)
|
65
|
-
return if @done
|
66
|
-
|
67
|
-
aLen = aBuf.length
|
68
|
-
return if aLen == 0
|
69
|
-
|
70
|
-
if !@gotData
|
71
|
-
# If the data starts with BOM, we know it is UTF
|
72
|
-
if aBuf[0, 3] == "\xEF\xBB\xBF"
|
73
|
-
# EF BB BF UTF-8 with BOM
|
74
|
-
@result = {'encoding' => "UTF-8", 'confidence' => 1.0}
|
75
|
-
elsif aBuf[0, 4] == "\xFF\xFE\x00\x00"
|
76
|
-
# FF FE 00 00 UTF-32, little-endian BOM
|
77
|
-
@result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
|
78
|
-
elsif aBuf[0, 4] == "\x00\x00\xFE\xFF"
|
79
|
-
# 00 00 FE FF UTF-32, big-endian BOM
|
80
|
-
@result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
|
81
|
-
elsif aBuf[0, 4] == "\xFE\xFF\x00\x00"
|
82
|
-
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
83
|
-
@result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
|
84
|
-
elsif aBuf[0, 4] == "\x00\x00\xFF\xFE"
|
85
|
-
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
86
|
-
@result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
|
87
|
-
elsif aBuf[0, 2] == "\xFF\xFE"
|
88
|
-
# FF FE UTF-16, little endian BOM
|
89
|
-
@result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
|
90
|
-
elsif aBuf[0, 2] == "\xFE\xFF"
|
91
|
-
# FE FF UTF-16, big endian BOM
|
92
|
-
@result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
|
93
|
-
elsif aBuf[0, 3] == "\x2B\x2F\x76" && ["\x38", "\x39", "\x2B", "\x2F"].include?(aBuf[3, 1])
|
94
|
-
# NOTE: Ruby only includes "dummy" support for UTF-7.
|
95
|
-
# A Ruby UTF-7 string can't have methods called on it, nor can it be converted to anything else, but "BINARY"/"ASCII-8BIT".
|
96
|
-
# Still, this doesn't make detection useless, as UTF-7 encodings exist in the wild, and the scenario may need to be handled.
|
97
|
-
# 2B 2F 76 38 UTF-7
|
98
|
-
# 2B 2F 76 39 UTF-7
|
99
|
-
# 2B 2F 76 2B UTF-7
|
100
|
-
# 2B 2F 76 2F UTF-7
|
101
|
-
# 2B 2F 76 38 2D UTF-7 with no following character (empty string)
|
102
|
-
@result = {'encoding' => "UTF-7", 'confidence' => 0.99}
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
@gotData = true
|
107
|
-
if @result['encoding'] and (@result['confidence'] > 0.0)
|
108
|
-
@done = true
|
109
|
-
return
|
110
|
-
end
|
111
|
-
if @inputState == EPureAscii
|
112
|
-
if @highBitDetector =~ (aBuf)
|
113
|
-
@inputState = EHighbyte
|
114
|
-
elsif (@inputState == EPureAscii) and @escDetector =~ (@lastChar + aBuf)
|
115
|
-
@inputState = EEscAscii
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
@lastChar = aBuf[-1, 1]
|
120
|
-
if @inputState == EEscAscii
|
121
|
-
if !@escCharSetProber
|
122
|
-
@escCharSetProber = EscCharSetProber.new()
|
123
|
-
end
|
124
|
-
if @escCharSetProber.feed(aBuf) == EFoundIt
|
125
|
-
@result = {'encoding' => @escCharSetProber.get_charset_name(),
|
126
|
-
'confidence' => @escCharSetProber.get_confidence()
|
127
|
-
}
|
128
|
-
@done = true
|
129
|
-
end
|
130
|
-
elsif @inputState == EHighbyte
|
131
|
-
if @charSetProbers.nil? || @charSetProbers.empty?
|
132
|
-
@charSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
|
133
|
-
end
|
134
|
-
for prober in @charSetProbers
|
135
|
-
if prober.feed(aBuf) == EFoundIt
|
136
|
-
@result = {'encoding' => prober.get_charset_name(),
|
137
|
-
'confidence' => prober.get_confidence()}
|
138
|
-
@done = true
|
139
|
-
break
|
140
|
-
end
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
end
|
145
|
-
|
146
|
-
def close
|
147
|
-
return if @done
|
148
|
-
if !@gotData
|
149
|
-
$stderr << "no data received!\n" if $debug
|
150
|
-
return
|
151
|
-
end
|
152
|
-
@done = true
|
153
|
-
|
154
|
-
if @inputState == EPureAscii
|
155
|
-
@result = {'encoding' => 'ascii', 'confidence' => 1.0}
|
156
|
-
return @result
|
157
|
-
end
|
158
|
-
|
159
|
-
if @inputState == EHighbyte
|
160
|
-
confidences = {}
|
161
|
-
@charSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
|
162
|
-
maxProber = @charSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
|
163
|
-
if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
|
164
|
-
@result = {'encoding' => maxProber.get_charset_name(),
|
165
|
-
'confidence' => maxProber.get_confidence()}
|
166
|
-
return @result
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
if $debug
|
171
|
-
$stderr << "no probers hit minimum threshhold\n" if $debug
|
172
|
-
for prober in @charSetProbers[0].probers
|
173
|
-
next if !prober
|
174
|
-
$stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
|
175
|
-
end
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
data/lib/rchardet/utf8prober.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
######################## BEGIN LICENSE BLOCK ########################
|
2
|
-
# The Original Code is mozilla.org code.
|
3
|
-
#
|
4
|
-
# The Initial Developer of the Original Code is
|
5
|
-
# Netscape Communications Corporation.
|
6
|
-
# Portions created by the Initial Developer are Copyright (C) 1998
|
7
|
-
# the Initial Developer. All Rights Reserved.
|
8
|
-
#
|
9
|
-
# Contributor(s):
|
10
|
-
# Jeff Hodges - port to Ruby
|
11
|
-
# Mark Pilgrim - port to Python
|
12
|
-
#
|
13
|
-
# This library is free software; you can redistribute it and/or
|
14
|
-
# modify it under the terms of the GNU Lesser General Public
|
15
|
-
# License as published by the Free Software Foundation; either
|
16
|
-
# version 2.1 of the License, or (at your option) any later version.
|
17
|
-
#
|
18
|
-
# This library is distributed in the hope that it will be useful,
|
19
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
20
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
21
|
-
# Lesser General Public License for more details.
|
22
|
-
#
|
23
|
-
# You should have received a copy of the GNU Lesser General Public
|
24
|
-
# License along with this library; if not, write to the Free Software
|
25
|
-
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
26
|
-
# 02110-1301 USA
|
27
|
-
######################### END LICENSE BLOCK #########################
|
28
|
-
|
29
|
-
module CharDet
|
30
|
-
ONE_CHAR_PROB = 0.5
|
31
|
-
|
32
|
-
class UTF8Prober < CharSetProber
|
33
|
-
def initialize
|
34
|
-
super()
|
35
|
-
@codingSM = CodingStateMachine.new(UTF8SMModel)
|
36
|
-
reset()
|
37
|
-
end
|
38
|
-
|
39
|
-
def reset
|
40
|
-
super()
|
41
|
-
@codingSM.reset()
|
42
|
-
@numOfMBChar = 0
|
43
|
-
end
|
44
|
-
|
45
|
-
def get_charset_name
|
46
|
-
return "utf-8"
|
47
|
-
end
|
48
|
-
|
49
|
-
def feed(aBuf)
|
50
|
-
aBuf.each_byte do |b|
|
51
|
-
c = b.chr
|
52
|
-
codingState = @codingSM.next_state(c)
|
53
|
-
if codingState == EError
|
54
|
-
@state = ENotMe
|
55
|
-
break
|
56
|
-
elsif codingState == EItsMe
|
57
|
-
@state = EFoundIt
|
58
|
-
break
|
59
|
-
elsif codingState == EStart
|
60
|
-
if @codingSM.get_current_charlen() >= 2
|
61
|
-
@numOfMBChar += 1
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
if get_state() == EDetecting
|
67
|
-
if get_confidence() > SHORTCUT_THRESHOLD
|
68
|
-
@state = EFoundIt
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
return get_state()
|
73
|
-
end
|
74
|
-
|
75
|
-
def get_confidence
|
76
|
-
unlike = 0.99
|
77
|
-
if @numOfMBChar < 6
|
78
|
-
for i in (0...@numOfMBChar)
|
79
|
-
unlike = unlike * ONE_CHAR_PROB
|
80
|
-
end
|
81
|
-
return 1.0 - unlike
|
82
|
-
else
|
83
|
-
return unlike
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
data/lib/rchardet/version.rb
DELETED
data/lib/rchardet.rb
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
######################## BEGIN LICENSE BLOCK ########################
|
2
|
-
# This library is free software; you can redistribute it and/or
|
3
|
-
# modify it under the terms of the GNU Lesser General Public
|
4
|
-
# License as published by the Free Software Foundation; either
|
5
|
-
# version 2.1 of the License, or (at your option) any later version.
|
6
|
-
#
|
7
|
-
# This library is distributed in the hope that it will be useful,
|
8
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
9
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
10
|
-
# Lesser General Public License for more details.
|
11
|
-
#
|
12
|
-
# You should have received a copy of the GNU Lesser General Public
|
13
|
-
# License along with this library; if not, write to the Free Software
|
14
|
-
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
15
|
-
# 02110-1301 USA
|
16
|
-
######################### END LICENSE BLOCK #########################
|
17
|
-
|
18
|
-
require 'rchardet/version'
|
19
|
-
require 'rchardet/charsetprober'
|
20
|
-
require 'rchardet/mbcharsetprober'
|
21
|
-
|
22
|
-
require 'rchardet/big5freq'
|
23
|
-
require 'rchardet/big5prober'
|
24
|
-
require 'rchardet/chardistribution'
|
25
|
-
require 'rchardet/charsetgroupprober'
|
26
|
-
|
27
|
-
require 'rchardet/codingstatemachine'
|
28
|
-
require 'rchardet/constants'
|
29
|
-
require 'rchardet/escprober'
|
30
|
-
require 'rchardet/escsm'
|
31
|
-
require 'rchardet/eucjpprober'
|
32
|
-
require 'rchardet/euckrfreq'
|
33
|
-
require 'rchardet/euckrprober'
|
34
|
-
require 'rchardet/euctwfreq'
|
35
|
-
require 'rchardet/euctwprober'
|
36
|
-
require 'rchardet/gb18030freq'
|
37
|
-
require 'rchardet/gb18030prober'
|
38
|
-
require 'rchardet/hebrewprober'
|
39
|
-
require 'rchardet/jisfreq'
|
40
|
-
require 'rchardet/jpcntx'
|
41
|
-
require 'rchardet/langbulgarianmodel'
|
42
|
-
require 'rchardet/langcyrillicmodel'
|
43
|
-
require 'rchardet/langgreekmodel'
|
44
|
-
require 'rchardet/langhebrewmodel'
|
45
|
-
require 'rchardet/langhungarianmodel'
|
46
|
-
require 'rchardet/langthaimodel'
|
47
|
-
require 'rchardet/latin1prober'
|
48
|
-
|
49
|
-
require 'rchardet/mbcsgroupprober'
|
50
|
-
require 'rchardet/mbcssm'
|
51
|
-
require 'rchardet/sbcharsetprober'
|
52
|
-
require 'rchardet/sbcsgroupprober'
|
53
|
-
require 'rchardet/sjisprober'
|
54
|
-
require 'rchardet/universaldetector'
|
55
|
-
require 'rchardet/utf8prober'
|
56
|
-
|
57
|
-
module CharDet
|
58
|
-
def CharDet.detect(aBuf)
|
59
|
-
aBuf = aBuf.dup.force_encoding(Encoding::BINARY)
|
60
|
-
|
61
|
-
u = UniversalDetector.new
|
62
|
-
u.reset
|
63
|
-
u.feed(aBuf)
|
64
|
-
u.close
|
65
|
-
u.result
|
66
|
-
end
|
67
|
-
end
|
@@ -1,212 +0,0 @@
|
|
1
|
-
module Zip
|
2
|
-
class CentralDirectory
|
3
|
-
include Enumerable
|
4
|
-
|
5
|
-
END_OF_CDS = 0x06054b50
|
6
|
-
ZIP64_END_OF_CDS = 0x06064b50
|
7
|
-
ZIP64_EOCD_LOCATOR = 0x07064b50
|
8
|
-
MAX_END_OF_CDS_SIZE = 65_536 + 18
|
9
|
-
STATIC_EOCD_SIZE = 22
|
10
|
-
|
11
|
-
attr_reader :comment
|
12
|
-
|
13
|
-
# Returns an Enumerable containing the entries.
|
14
|
-
def entries
|
15
|
-
@entry_set.entries
|
16
|
-
end
|
17
|
-
|
18
|
-
def initialize(entries = EntrySet.new, comment = '') #:nodoc:
|
19
|
-
super()
|
20
|
-
@entry_set = entries.kind_of?(EntrySet) ? entries : EntrySet.new(entries)
|
21
|
-
@comment = comment
|
22
|
-
end
|
23
|
-
|
24
|
-
def write_to_stream(io) #:nodoc:
|
25
|
-
cdir_offset = io.tell
|
26
|
-
@entry_set.each { |entry| entry.write_c_dir_entry(io) }
|
27
|
-
eocd_offset = io.tell
|
28
|
-
cdir_size = eocd_offset - cdir_offset
|
29
|
-
if ::Zip.write_zip64_support
|
30
|
-
need_zip64_eocd = cdir_offset > 0xFFFFFFFF || cdir_size > 0xFFFFFFFF || @entry_set.size > 0xFFFF
|
31
|
-
need_zip64_eocd ||= @entry_set.any? { |entry| entry.extra['Zip64'] }
|
32
|
-
if need_zip64_eocd
|
33
|
-
write_64_e_o_c_d(io, cdir_offset, cdir_size)
|
34
|
-
write_64_eocd_locator(io, eocd_offset)
|
35
|
-
end
|
36
|
-
end
|
37
|
-
write_e_o_c_d(io, cdir_offset, cdir_size)
|
38
|
-
end
|
39
|
-
|
40
|
-
def write_e_o_c_d(io, offset, cdir_size) #:nodoc:
|
41
|
-
tmp = [
|
42
|
-
END_OF_CDS,
|
43
|
-
0, # @numberOfThisDisk
|
44
|
-
0, # @numberOfDiskWithStartOfCDir
|
45
|
-
@entry_set ? [@entry_set.size, 0xFFFF].min : 0,
|
46
|
-
@entry_set ? [@entry_set.size, 0xFFFF].min : 0,
|
47
|
-
[cdir_size, 0xFFFFFFFF].min,
|
48
|
-
[offset, 0xFFFFFFFF].min,
|
49
|
-
@comment ? @comment.bytesize : 0
|
50
|
-
]
|
51
|
-
io << tmp.pack('VvvvvVVv')
|
52
|
-
io << @comment
|
53
|
-
end
|
54
|
-
|
55
|
-
private :write_e_o_c_d
|
56
|
-
|
57
|
-
def write_64_e_o_c_d(io, offset, cdir_size) #:nodoc:
|
58
|
-
tmp = [
|
59
|
-
ZIP64_END_OF_CDS,
|
60
|
-
44, # size of zip64 end of central directory record (excludes signature and field itself)
|
61
|
-
VERSION_MADE_BY,
|
62
|
-
VERSION_NEEDED_TO_EXTRACT_ZIP64,
|
63
|
-
0, # @numberOfThisDisk
|
64
|
-
0, # @numberOfDiskWithStartOfCDir
|
65
|
-
@entry_set ? @entry_set.size : 0, # number of entries on this disk
|
66
|
-
@entry_set ? @entry_set.size : 0, # number of entries total
|
67
|
-
cdir_size, # size of central directory
|
68
|
-
offset # offset of start of central directory in its disk
|
69
|
-
]
|
70
|
-
io << tmp.pack('VQ<vvVVQ<Q<Q<Q<')
|
71
|
-
end
|
72
|
-
|
73
|
-
private :write_64_e_o_c_d
|
74
|
-
|
75
|
-
def write_64_eocd_locator(io, zip64_eocd_offset)
|
76
|
-
tmp = [
|
77
|
-
ZIP64_EOCD_LOCATOR,
|
78
|
-
0, # number of disk containing the start of zip64 eocd record
|
79
|
-
zip64_eocd_offset, # offset of the start of zip64 eocd record in its disk
|
80
|
-
1 # total number of disks
|
81
|
-
]
|
82
|
-
io << tmp.pack('VVQ<V')
|
83
|
-
end
|
84
|
-
|
85
|
-
private :write_64_eocd_locator
|
86
|
-
|
87
|
-
def read_64_e_o_c_d(buf) #:nodoc:
|
88
|
-
buf = get_64_e_o_c_d(buf)
|
89
|
-
@size_of_zip64_e_o_c_d = Entry.read_zip_64_long(buf)
|
90
|
-
@version_made_by = Entry.read_zip_short(buf)
|
91
|
-
@version_needed_for_extract = Entry.read_zip_short(buf)
|
92
|
-
@number_of_this_disk = Entry.read_zip_long(buf)
|
93
|
-
@number_of_disk_with_start_of_cdir = Entry.read_zip_long(buf)
|
94
|
-
@total_number_of_entries_in_cdir_on_this_disk = Entry.read_zip_64_long(buf)
|
95
|
-
@size = Entry.read_zip_64_long(buf)
|
96
|
-
@size_in_bytes = Entry.read_zip_64_long(buf)
|
97
|
-
@cdir_offset = Entry.read_zip_64_long(buf)
|
98
|
-
@zip_64_extensible = buf.slice!(0, buf.bytesize)
|
99
|
-
raise Error, 'Zip consistency problem while reading eocd structure' unless buf.empty?
|
100
|
-
end
|
101
|
-
|
102
|
-
def read_e_o_c_d(buf) #:nodoc:
|
103
|
-
buf = get_e_o_c_d(buf)
|
104
|
-
@number_of_this_disk = Entry.read_zip_short(buf)
|
105
|
-
@number_of_disk_with_start_of_cdir = Entry.read_zip_short(buf)
|
106
|
-
@total_number_of_entries_in_cdir_on_this_disk = Entry.read_zip_short(buf)
|
107
|
-
@size = Entry.read_zip_short(buf)
|
108
|
-
@size_in_bytes = Entry.read_zip_long(buf)
|
109
|
-
@cdir_offset = Entry.read_zip_long(buf)
|
110
|
-
comment_length = Entry.read_zip_short(buf)
|
111
|
-
@comment = if comment_length.to_i <= 0
|
112
|
-
buf.slice!(0, buf.size)
|
113
|
-
else
|
114
|
-
buf.read(comment_length)
|
115
|
-
end
|
116
|
-
raise Error, 'Zip consistency problem while reading eocd structure' unless buf.empty?
|
117
|
-
end
|
118
|
-
|
119
|
-
def read_central_directory_entries(io) #:nodoc:
|
120
|
-
begin
|
121
|
-
io.seek(@cdir_offset, IO::SEEK_SET)
|
122
|
-
rescue Errno::EINVAL
|
123
|
-
raise Error, 'Zip consistency problem while reading central directory entry'
|
124
|
-
end
|
125
|
-
@entry_set = EntrySet.new
|
126
|
-
@size.times do
|
127
|
-
@entry_set << Entry.read_c_dir_entry(io)
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
def read_from_stream(io) #:nodoc:
|
132
|
-
buf = start_buf(io)
|
133
|
-
if zip64_file?(buf)
|
134
|
-
read_64_e_o_c_d(buf)
|
135
|
-
else
|
136
|
-
read_e_o_c_d(buf)
|
137
|
-
end
|
138
|
-
read_central_directory_entries(io)
|
139
|
-
end
|
140
|
-
|
141
|
-
def get_e_o_c_d(buf) #:nodoc:
|
142
|
-
sig_index = buf.rindex([END_OF_CDS].pack('V'))
|
143
|
-
raise Error, 'Zip end of central directory signature not found' unless sig_index
|
144
|
-
|
145
|
-
buf = buf.slice!((sig_index + 4)..(buf.bytesize))
|
146
|
-
|
147
|
-
def buf.read(count)
|
148
|
-
slice!(0, count)
|
149
|
-
end
|
150
|
-
|
151
|
-
buf
|
152
|
-
end
|
153
|
-
|
154
|
-
def zip64_file?(buf)
|
155
|
-
buf.rindex([ZIP64_END_OF_CDS].pack('V')) && buf.rindex([ZIP64_EOCD_LOCATOR].pack('V'))
|
156
|
-
end
|
157
|
-
|
158
|
-
def start_buf(io)
|
159
|
-
begin
|
160
|
-
io.seek(-MAX_END_OF_CDS_SIZE, IO::SEEK_END)
|
161
|
-
rescue Errno::EINVAL
|
162
|
-
io.seek(0, IO::SEEK_SET)
|
163
|
-
end
|
164
|
-
io.read
|
165
|
-
end
|
166
|
-
|
167
|
-
def get_64_e_o_c_d(buf) #:nodoc:
|
168
|
-
zip_64_start = buf.rindex([ZIP64_END_OF_CDS].pack('V'))
|
169
|
-
raise Error, 'Zip64 end of central directory signature not found' unless zip_64_start
|
170
|
-
|
171
|
-
zip_64_locator = buf.rindex([ZIP64_EOCD_LOCATOR].pack('V'))
|
172
|
-
raise Error, 'Zip64 end of central directory signature locator not found' unless zip_64_locator
|
173
|
-
|
174
|
-
buf = buf.slice!((zip_64_start + 4)..zip_64_locator)
|
175
|
-
|
176
|
-
def buf.read(count)
|
177
|
-
slice!(0, count)
|
178
|
-
end
|
179
|
-
|
180
|
-
buf
|
181
|
-
end
|
182
|
-
|
183
|
-
# For iterating over the entries.
|
184
|
-
def each(&a_proc)
|
185
|
-
@entry_set.each(&a_proc)
|
186
|
-
end
|
187
|
-
|
188
|
-
# Returns the number of entries in the central directory (and
|
189
|
-
# consequently in the zip archive).
|
190
|
-
def size
|
191
|
-
@entry_set.size
|
192
|
-
end
|
193
|
-
|
194
|
-
def self.read_from_stream(io) #:nodoc:
|
195
|
-
cdir = new
|
196
|
-
cdir.read_from_stream(io)
|
197
|
-
cdir
|
198
|
-
rescue Error
|
199
|
-
nil
|
200
|
-
end
|
201
|
-
|
202
|
-
def ==(other) #:nodoc:
|
203
|
-
return false unless other.kind_of?(CentralDirectory)
|
204
|
-
|
205
|
-
@entry_set.entries.sort == other.entries.sort && comment == other.comment
|
206
|
-
end
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
|
-
# Copyright (C) 2002, 2003 Thomas Sondergaard
|
211
|
-
# rubyzip is free software; you can redistribute it and/or
|
212
|
-
# modify it under the terms of the ruby license.
|