zip_tricks 3.1.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/zip_tricks.rb +1 -1
- data/lib/zip_tricks/file_reader.rb +104 -114
- data/lib/zip_tricks/file_reader/inflating_reader.rb +30 -0
- data/lib/zip_tricks/file_reader/stored_reader.rb +29 -0
- data/lib/zip_tricks/remote_io.rb +1 -2
- data/lib/zip_tricks/remote_uncap.rb +10 -40
- data/lib/zip_tricks/streamer/writable.rb +1 -1
- data/spec/spec_helper.rb +12 -1
- data/spec/zip_tricks/file_reader_spec.rb +126 -32
- data/spec/zip_tricks/remote_uncap_spec.rb +24 -52
- data/spec/zip_tricks/streamer/writable_spec.rb +27 -0
- data/zip_tricks.gemspec +6 -3
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b478a7ffbae2dbb20270a1e4c4b63f495feef87
|
4
|
+
data.tar.gz: d9a8ca3a1596ee653fcf2e2b9153a9e59bd912fc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 689d25f85a79987750fe3bbdf845cfe76c987713709a3d7fc8d2bc944a570958168c2b6c38a6f657b254d499052c079aad84c85b6421b9c228740dd3c9f79903
|
7
|
+
data.tar.gz: 434ffe473e5b0339b40ead5437e070cc1bd6dcce910db58e516a207806cd0eab16f86cc04d342201e2f31f121438308351239d923f4b4e5311850755c77ce339
|
data/lib/zip_tricks.rb
CHANGED
@@ -47,71 +47,18 @@ require 'stringio'
|
|
47
47
|
# and so on, and sets these entries up with the absolute _offsets_ into the source file/IO object.
|
48
48
|
# These offsets can then be used to extract the actual compressed data of the files and to expand it.
|
49
49
|
class ZipTricks::FileReader
|
50
|
+
require_relative 'file_reader/stored_reader'
|
51
|
+
require_relative 'file_reader/inflating_reader'
|
52
|
+
|
50
53
|
ReadError = Class.new(StandardError)
|
51
54
|
UnsupportedFeature = Class.new(StandardError)
|
52
55
|
InvalidStructure = Class.new(ReadError)
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
@io = from_io
|
57
|
-
@compressed_data_size = compressed_data_size
|
58
|
-
@already_read = 0
|
59
|
-
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
60
|
-
end
|
61
|
-
|
62
|
-
def extract(n_bytes=nil)
|
63
|
-
n_bytes ||= (@compressed_data_size - @already_read)
|
64
|
-
|
65
|
-
return if eof?
|
66
|
-
|
67
|
-
available = @compressed_data_size - @already_read
|
68
|
-
|
69
|
-
return if available.zero?
|
70
|
-
|
71
|
-
n_bytes = available if n_bytes > available
|
72
|
-
|
73
|
-
return '' if n_bytes.zero?
|
74
|
-
|
75
|
-
compressed_chunk = @io.read(n_bytes)
|
76
|
-
@already_read += compressed_chunk.bytesize
|
77
|
-
@zlib_inflater.inflate(compressed_chunk)
|
78
|
-
end
|
79
|
-
|
80
|
-
def eof?
|
81
|
-
@zlib_inflater.finished?
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
class StoredReader
|
86
|
-
def initialize(from_io, compressed_data_size)
|
87
|
-
@io = from_io
|
88
|
-
@compressed_data_size = compressed_data_size
|
89
|
-
@already_read = 0
|
90
|
-
end
|
91
|
-
|
92
|
-
def extract(n_bytes=nil)
|
93
|
-
n_bytes ||= (@compressed_data_size - @already_read)
|
94
|
-
|
95
|
-
return if eof?
|
96
|
-
|
97
|
-
available = @compressed_data_size - @already_read
|
98
|
-
|
99
|
-
return if available.zero?
|
100
|
-
|
101
|
-
n_bytes = available if n_bytes > available
|
102
|
-
|
103
|
-
return '' if n_bytes.zero?
|
104
|
-
|
105
|
-
compressed_chunk = @io.read(n_bytes)
|
106
|
-
@already_read += compressed_chunk.bytesize
|
107
|
-
compressed_chunk
|
108
|
-
end
|
109
|
-
|
110
|
-
def eof?
|
111
|
-
@already_read >= @compressed_data_size
|
56
|
+
LocalHeaderPending = Class.new(StandardError) do
|
57
|
+
def message
|
58
|
+
"The compressed data offset is not available (local header has not been read)"
|
112
59
|
end
|
113
60
|
end
|
114
|
-
|
61
|
+
|
115
62
|
private_constant :StoredReader, :InflatingReader
|
116
63
|
|
117
64
|
# Represents a file within the ZIP archive being read
|
@@ -162,13 +109,9 @@ class ZipTricks::FileReader
|
|
162
109
|
# @return [String] the file comment
|
163
110
|
attr_accessor :comment
|
164
111
|
|
165
|
-
# @return [Fixnum] at what offset you should start reading
|
166
|
-
# for the compressed data in your original IO object
|
167
|
-
attr_accessor :compressed_data_offset
|
168
|
-
|
169
112
|
# Returns a reader for the actual compressed data of the entry.
|
170
113
|
#
|
171
|
-
# reader = entry.
|
114
|
+
# reader = entry.extractor_from(source_file)
|
172
115
|
# outfile << reader.extract(512 * 1024) until reader.eof?
|
173
116
|
#
|
174
117
|
# @return [#extract(n_bytes), #eof?] the reader for the data
|
@@ -180,17 +123,50 @@ class ZipTricks::FileReader
|
|
180
123
|
when 0
|
181
124
|
StoredReader.new(from_io, compressed_size)
|
182
125
|
else
|
183
|
-
raise "Unsupported storage mode for reading
|
126
|
+
raise UnsupportedFeature, "Unsupported storage mode for reading - %d" % storage_mode
|
184
127
|
end
|
185
128
|
end
|
129
|
+
|
130
|
+
# @return [Fixnum] at what offset you should start reading
|
131
|
+
# for the compressed data in your original IO object
|
132
|
+
def compressed_data_offset
|
133
|
+
@compressed_data_offset or raise LocalHeaderPending
|
134
|
+
end
|
135
|
+
|
136
|
+
# Tells whether the compressed data offset is already known for this entry
|
137
|
+
# @return [Boolean]
|
138
|
+
def known_offset?
|
139
|
+
!@compressed_data_offset.nil?
|
140
|
+
end
|
141
|
+
|
142
|
+
# Sets the offset at which the compressed data for this file starts in the ZIP.
|
143
|
+
# By default, the value will be set by the Reader for you. If you use delayed
|
144
|
+
# reading, you need to set it by using the `get_compressed_data_offset` on the Reader:
|
145
|
+
#
|
146
|
+
# entry.compressed_data_offset = reader.get_compressed_data_offset(io: file,
|
147
|
+
# local_file_header_offset: entry.local_header_offset)
|
148
|
+
def compressed_data_offset=(offset)
|
149
|
+
@compressed_data_offset = offset.to_i
|
150
|
+
end
|
186
151
|
end
|
187
152
|
|
188
153
|
# Parse an IO handle to a ZIP archive into an array of Entry objects.
|
189
154
|
#
|
190
155
|
# @param io[#tell, #seek, #read, #size] an IO-ish object
|
191
|
-
# @param read_local_headers[Boolean] whether
|
156
|
+
# @param read_local_headers[Boolean] whether the local headers must be read upfront. When reading
|
157
|
+
# a locally available ZIP file this option will not have much use since the small reads from
|
158
|
+
# the file handle are not going to be that important. However, if you are using remote reads
|
159
|
+
# to decipher a ZIP file located on an HTTP server, the operation _must_ perform an HTTP
|
160
|
+
# request for _each entry in the ZIP file_ to determine where the actual file data starts.
|
161
|
+
# This, for a ZIP archive of 1000 files, will incur 1000 extra HTTP requests - which you might
|
162
|
+
# not want to perform upfront, or - at least - not want to perform _at once_. When the option is
|
163
|
+
# set to `false`, you will be getting instances of `LazyEntry` instead of `Entry`. Those objects
|
164
|
+
# will raise an exception when you attempt to access their compressed data offset in the ZIP
|
165
|
+
# (since the reads have not been performed yet). As a rule, this option can be left in it's
|
166
|
+
# default setting (`true`) unless you want to _only_ read the central directory, or you need
|
167
|
+
# to limit the number of HTTP requests.
|
192
168
|
# @return [Array<Entry>] an array of entries within the ZIP being parsed
|
193
|
-
def read_zip_structure(io
|
169
|
+
def read_zip_structure(io:, read_local_headers: true)
|
194
170
|
zip_file_size = io.size
|
195
171
|
eocd_offset = get_eocd_offset(io, zip_file_size)
|
196
172
|
|
@@ -213,24 +189,68 @@ class ZipTricks::FileReader
|
|
213
189
|
read_cdir_entry(central_directory_io)
|
214
190
|
end
|
215
191
|
|
216
|
-
entries
|
217
|
-
|
218
|
-
|
219
|
-
entry.compressed_data_offset = find_compressed_data_start_offset(io, entry.local_file_header_offset)
|
220
|
-
end
|
221
|
-
end
|
192
|
+
read_local_headers(entries, io) if read_local_headers
|
193
|
+
|
194
|
+
entries
|
222
195
|
end
|
223
196
|
|
197
|
+
# Get the offset in the IO at which the actual compressed data of the file starts within the ZIP.
|
198
|
+
# The method will eager-read the entire local header for the file (the maximum size the local header may use),
|
199
|
+
# starting at the given offset, and will then compute its size. That size plus the local header offset
|
200
|
+
# given will be the compressed data offset of the entry (read starting at this offset to get the data).
|
201
|
+
#
|
202
|
+
# @param io[#seek, #read] an IO-ish object the ZIP file can be read from
|
203
|
+
# @param local_header_offset[Fixnum] absolute offset (0-based) where the local file header is supposed to begin
|
204
|
+
# @return [Fixnum] absolute offset (0-based) of where the compressed data begins for this file within the ZIP
|
205
|
+
def get_compressed_data_offset(io:, local_file_header_offset:)
|
206
|
+
seek(io, local_file_header_offset)
|
207
|
+
|
208
|
+
# Reading in bulk is cheaper - grab the maximum length of the local header,
|
209
|
+
# including any headroom
|
210
|
+
local_file_header_str_plus_headroom = io.read(MAX_LOCAL_HEADER_SIZE)
|
211
|
+
io_starting_at_local_header = StringIO.new(local_file_header_str_plus_headroom)
|
212
|
+
|
213
|
+
assert_signature(io_starting_at_local_header, 0x04034b50)
|
214
|
+
|
215
|
+
# The rest is unreliable, and we have that information from the central directory already.
|
216
|
+
# So just skip over it to get at the offset where the compressed data begins
|
217
|
+
skip_ahead_2(io_starting_at_local_header) # Version needed to extract
|
218
|
+
skip_ahead_2(io_starting_at_local_header) # gp flags
|
219
|
+
skip_ahead_2(io_starting_at_local_header) # storage mode
|
220
|
+
skip_ahead_2(io_starting_at_local_header) # dos time
|
221
|
+
skip_ahead_2(io_starting_at_local_header) # dos date
|
222
|
+
skip_ahead_4(io_starting_at_local_header) # CRC32
|
223
|
+
|
224
|
+
skip_ahead_4(io_starting_at_local_header) # Comp size
|
225
|
+
skip_ahead_4(io_starting_at_local_header) # Uncomp size
|
226
|
+
|
227
|
+
filename_size = read_2b(io_starting_at_local_header)
|
228
|
+
extra_size = read_2b(io_starting_at_local_header)
|
229
|
+
|
230
|
+
skip_ahead_n(io_starting_at_local_header, filename_size)
|
231
|
+
skip_ahead_n(io_starting_at_local_header, extra_size)
|
232
|
+
|
233
|
+
local_file_header_offset + io_starting_at_local_header.tell
|
234
|
+
end
|
235
|
+
|
224
236
|
# Parse an IO handle to a ZIP archive into an array of Entry objects.
|
225
237
|
#
|
226
|
-
# @param
|
238
|
+
# @param options[Hash] any options the instance method of the same name accepts
|
227
239
|
# @return [Array<Entry>] an array of entries within the ZIP being parsed
|
228
|
-
def self.read_zip_structure(
|
229
|
-
new.read_zip_structure(
|
240
|
+
def self.read_zip_structure(**options)
|
241
|
+
new.read_zip_structure(**options)
|
230
242
|
end
|
231
243
|
|
232
244
|
private
|
233
245
|
|
246
|
+
def read_local_headers(entries, io)
|
247
|
+
entries.each_with_index do |entry, i|
|
248
|
+
log { 'Reading the local header for entry %d at offset %d' % [i, entry.local_file_header_offset] }
|
249
|
+
off = get_compressed_data_offset(io: io, local_file_header_offset: entry.local_file_header_offset)
|
250
|
+
entry.compressed_data_offset = off
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
234
254
|
def skip_ahead_2(io)
|
235
255
|
skip_ahead_n(io, 2)
|
236
256
|
end
|
@@ -287,40 +307,7 @@ class ZipTricks::FileReader
|
|
287
307
|
read_n(io, 8).unpack(C_Qe).shift
|
288
308
|
end
|
289
309
|
|
290
|
-
def find_compressed_data_start_offset(file_io, local_header_offset)
|
291
|
-
seek(file_io, local_header_offset)
|
292
|
-
|
293
|
-
# Reading in bulk is cheaper - grab the maximum length of the local header, including
|
294
|
-
# any headroom
|
295
|
-
local_file_header_str_plus_headroom = file_io.read(MAX_LOCAL_HEADER_SIZE)
|
296
|
-
io = StringIO.new(local_file_header_str_plus_headroom)
|
297
|
-
|
298
|
-
assert_signature(io, 0x04034b50)
|
299
|
-
|
300
|
-
# The rest is unreliable, and we have that information from the central directory already.
|
301
|
-
# So just skip over it to get at the offset where the compressed data begins
|
302
|
-
skip_ahead_2(io) # Version needed to extract
|
303
|
-
skip_ahead_2(io) # gp flags
|
304
|
-
skip_ahead_2(io) # storage mode
|
305
|
-
skip_ahead_2(io) # dos time
|
306
|
-
skip_ahead_2(io) # dos date
|
307
|
-
skip_ahead_4(io) # CRC32
|
308
|
-
|
309
|
-
skip_ahead_4(io) # Comp size
|
310
|
-
skip_ahead_4(io) # Uncomp size
|
311
|
-
|
312
|
-
filename_size = read_2b(io)
|
313
|
-
extra_size = read_2b(io)
|
314
|
-
|
315
|
-
skip_ahead_n(io, filename_size)
|
316
|
-
skip_ahead_n(io, extra_size)
|
317
|
-
|
318
|
-
local_header_offset + io.tell
|
319
|
-
end
|
320
|
-
|
321
|
-
|
322
310
|
def read_cdir_entry(io)
|
323
|
-
expected_at = io.tell
|
324
311
|
assert_signature(io, 0x02014b50)
|
325
312
|
ZipEntry.new.tap do |e|
|
326
313
|
e.made_by = read_2b(io)
|
@@ -357,7 +344,9 @@ class ZipTricks::FileReader
|
|
357
344
|
end
|
358
345
|
|
359
346
|
# ...of which we really only need the Zip64 extra
|
360
|
-
if zip64_extra_contents = extra_table[1]
|
347
|
+
if zip64_extra_contents = extra_table[1]
|
348
|
+
# If the Zip64 extra is present, we let it override all
|
349
|
+
# the values fetched from the conventional header
|
361
350
|
zip64_extra = StringIO.new(zip64_extra_contents)
|
362
351
|
log { 'Will read Zip64 extra data for %s, %d bytes' % [e.filename, zip64_extra.size] }
|
363
352
|
# Now here be dragons. The APPNOTE specifies that
|
@@ -533,10 +522,11 @@ class ZipTricks::FileReader
|
|
533
522
|
def num_files_and_central_directory_offset(file_io, eocd_offset)
|
534
523
|
seek(file_io, eocd_offset)
|
535
524
|
|
536
|
-
|
537
|
-
|
538
|
-
|
525
|
+
# The size of the EOCD record is known upfront, so use a strict read
|
526
|
+
eocd_record_str = read_n(file_io, SIZE_OF_USABLE_EOCD_RECORD)
|
527
|
+
io = StringIO.new(eocd_record_str)
|
539
528
|
|
529
|
+
assert_signature(io, 0x06054b50)
|
540
530
|
skip_ahead_2(io) # number_of_this_disk
|
541
531
|
skip_ahead_2(io) # number of the disk with the EOCD record
|
542
532
|
skip_ahead_2(io) # number of entries in the central directory of this disk
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class ZipTricks::FileReader::InflatingReader
|
2
|
+
def initialize(from_io, compressed_data_size)
|
3
|
+
@io = from_io
|
4
|
+
@compressed_data_size = compressed_data_size
|
5
|
+
@already_read = 0
|
6
|
+
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
7
|
+
end
|
8
|
+
|
9
|
+
def extract(n_bytes=nil)
|
10
|
+
n_bytes ||= (@compressed_data_size - @already_read)
|
11
|
+
|
12
|
+
return if eof?
|
13
|
+
|
14
|
+
available = @compressed_data_size - @already_read
|
15
|
+
|
16
|
+
return if available.zero?
|
17
|
+
|
18
|
+
n_bytes = available if n_bytes > available
|
19
|
+
|
20
|
+
return '' if n_bytes.zero?
|
21
|
+
|
22
|
+
compressed_chunk = @io.read(n_bytes)
|
23
|
+
@already_read += compressed_chunk.bytesize
|
24
|
+
@zlib_inflater.inflate(compressed_chunk)
|
25
|
+
end
|
26
|
+
|
27
|
+
def eof?
|
28
|
+
@zlib_inflater.finished?
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class ZipTricks::FileReader::StoredReader
|
2
|
+
def initialize(from_io, compressed_data_size)
|
3
|
+
@io = from_io
|
4
|
+
@compressed_data_size = compressed_data_size
|
5
|
+
@already_read = 0
|
6
|
+
end
|
7
|
+
|
8
|
+
def extract(n_bytes=nil)
|
9
|
+
n_bytes ||= (@compressed_data_size - @already_read)
|
10
|
+
|
11
|
+
return if eof?
|
12
|
+
|
13
|
+
available = @compressed_data_size - @already_read
|
14
|
+
|
15
|
+
return if available.zero?
|
16
|
+
|
17
|
+
n_bytes = available if n_bytes > available
|
18
|
+
|
19
|
+
return '' if n_bytes.zero?
|
20
|
+
|
21
|
+
compressed_chunk = @io.read(n_bytes)
|
22
|
+
@already_read += compressed_chunk.bytesize
|
23
|
+
compressed_chunk
|
24
|
+
end
|
25
|
+
|
26
|
+
def eof?
|
27
|
+
@already_read >= @compressed_data_size
|
28
|
+
end
|
29
|
+
end
|
data/lib/zip_tricks/remote_io.rb
CHANGED
@@ -29,8 +29,7 @@ class ZipTricks::RemoteIO
|
|
29
29
|
end
|
30
30
|
|
31
31
|
# Emulates IO#read, but requires the number of bytes to read
|
32
|
-
# The
|
33
|
-
# not match the number requested. The read will be limited to the
|
32
|
+
# The read will be limited to the
|
34
33
|
# size of the remote resource relative to the current offset in the IO,
|
35
34
|
# so if you are at offset 0 in the IO of size 10, doing a `read(20)`
|
36
35
|
# will only return you 10 bytes of result, and not raise any exceptions.
|
@@ -7,55 +7,23 @@
|
|
7
7
|
# before you use this module.
|
8
8
|
class ZipTricks::RemoteUncap
|
9
9
|
|
10
|
-
# Represents a file embedded within a remote ZIP archive
|
11
|
-
class RemoteZipEntry
|
12
|
-
|
13
|
-
# @return [String] filename of the file in the remote ZIP
|
14
|
-
attr_accessor :name
|
15
|
-
|
16
|
-
# @return [Fixnum] size in bytes of the file when uncompressed
|
17
|
-
attr_accessor :size_uncompressed
|
18
|
-
|
19
|
-
# @return [Fixnum] size in bytes of the file when compressed (the segment in the ZIP)
|
20
|
-
attr_accessor :size_compressed
|
21
|
-
|
22
|
-
# @return [Fixnum] compression method (0 for stored, 8 for deflate)
|
23
|
-
attr_accessor :compression_method
|
24
|
-
|
25
|
-
# @return [Fixnum] where the file data starts within the ZIP
|
26
|
-
attr_accessor :starts_at_offset
|
27
|
-
|
28
|
-
# @return [Fixnum] where the file data ends within the zip.
|
29
|
-
# Will be equal to starts_at_offset if the file is empty
|
30
|
-
attr_accessor :ends_at_offset
|
31
|
-
|
32
|
-
# Yields the object during initialization
|
33
|
-
def initialize
|
34
|
-
yield self
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
10
|
# @param uri[String] the HTTP(S) URL to read the ZIP footer from
|
39
|
-
# @
|
40
|
-
|
11
|
+
# @param reader_class[Class] which class to use for reading
|
12
|
+
# @param options_for_zip_reader[Hash] any additional options to give to {ZipTricks::FileReader} when reading
|
13
|
+
# @return [Array<ZipTricks::FileReader::ZipEntry>] metadata about the files within the remote archive
|
14
|
+
def self.files_within_zip_at(uri, reader_class: ZipTricks::FileReader, **options_for_zip_reader)
|
41
15
|
fetcher = new(uri)
|
42
16
|
fake_io = ZipTricks::RemoteIO.new(fetcher)
|
43
|
-
|
44
|
-
|
45
|
-
RemoteZipEntry.new do | entry |
|
46
|
-
entry.name = remote_entry.filename
|
47
|
-
entry.starts_at_offset = remote_entry.compressed_data_offset
|
48
|
-
entry.size_uncompressed = remote_entry.uncompressed_size
|
49
|
-
entry.size_compressed = remote_entry.compressed_size
|
50
|
-
entry.compression_method = remote_entry.storage_mode
|
51
|
-
end
|
52
|
-
end
|
17
|
+
reader = reader_class.new
|
18
|
+
reader.read_zip_structure(io: fake_io, **options_for_zip_reader)
|
53
19
|
end
|
54
20
|
|
55
21
|
def initialize(uri)
|
56
22
|
@uri = URI(uri)
|
57
23
|
end
|
58
24
|
|
25
|
+
# Only used internally when reading the remote ZIP.
|
26
|
+
#
|
59
27
|
# @param range[Range] the HTTP range of data to fetch from remote
|
60
28
|
# @return [String] the response body of the ranged request
|
61
29
|
def request_range(range)
|
@@ -65,6 +33,8 @@ class ZipTricks::RemoteUncap
|
|
65
33
|
http.request(request).body
|
66
34
|
end
|
67
35
|
|
36
|
+
# Only used internally when reading the remote ZIP.
|
37
|
+
#
|
68
38
|
# @return [Fixnum] the byte size of the ranged request
|
69
39
|
def request_object_size
|
70
40
|
http = Net::HTTP.start(@uri.hostname, @uri.port)
|
data/spec/spec_helper.rb
CHANGED
@@ -7,6 +7,17 @@ require 'digest'
|
|
7
7
|
require 'fileutils'
|
8
8
|
require 'shellwords'
|
9
9
|
require 'zip'
|
10
|
+
require 'delegate'
|
11
|
+
|
12
|
+
class ReadMonitor < SimpleDelegator
|
13
|
+
def read(*)
|
14
|
+
super.tap { @num_reads ||= 0; @num_reads += 1 }
|
15
|
+
end
|
16
|
+
|
17
|
+
def num_reads
|
18
|
+
@num_reads || 0
|
19
|
+
end
|
20
|
+
end
|
10
21
|
|
11
22
|
module Keepalive
|
12
23
|
# Travis-CI kills the build if it does not receive output on standard out or standard error
|
@@ -45,7 +56,7 @@ module ZipInspection
|
|
45
56
|
$zip_inspection_buf ||= StringIO.new
|
46
57
|
$zip_inspection_buf.puts "\n"
|
47
58
|
$zip_inspection_buf.puts "Inspecting ZIP output of #{inspect}." # The only way to get at the RSpec example without using the block argument
|
48
|
-
$zip_inspection_buf.puts "Be aware that the zipinfo version on OSX is too old to deal with
|
59
|
+
$zip_inspection_buf.puts "Be aware that the zipinfo version on OSX is too old to deal with Zip64."
|
49
60
|
escaped_cmd = Shellwords.join([zipinfo_path, '-tlhvz', path_to_zip])
|
50
61
|
$zip_inspection_buf.puts `#{escaped_cmd}`
|
51
62
|
end
|
@@ -1,48 +1,142 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
|
3
2
|
describe ZipTricks::FileReader do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
3
|
+
|
4
|
+
describe 'with an end-to-end ZIP file to read' do
|
5
|
+
it 'reads and uncompresses the file written deflated with data descriptors' do
|
6
|
+
zipfile = StringIO.new
|
7
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
8
|
+
tolstoy.force_encoding(Encoding::BINARY)
|
9
|
+
|
10
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
11
|
+
zip.write_deflated_file('war-and-peace.txt') do |sink|
|
12
|
+
sink << tolstoy
|
13
|
+
end
|
12
14
|
end
|
15
|
+
|
16
|
+
entries = described_class.read_zip_structure(io: zipfile)
|
17
|
+
expect(entries.length).to eq(1)
|
18
|
+
|
19
|
+
entry = entries.first
|
20
|
+
|
21
|
+
readback = ''
|
22
|
+
reader = entry.extractor_from(zipfile)
|
23
|
+
readback << reader.extract(10) until reader.eof?
|
24
|
+
|
25
|
+
expect(readback.bytesize).to eq(tolstoy.bytesize)
|
26
|
+
expect(readback[0..10]).to eq(tolstoy[0..10])
|
27
|
+
expect(readback[-10..-1]).to eq(tolstoy[-10..-1])
|
13
28
|
end
|
14
29
|
|
15
|
-
|
16
|
-
|
30
|
+
it 'performs local file header reads by default' do
|
31
|
+
zipfile = StringIO.new
|
32
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
33
|
+
tolstoy.force_encoding(Encoding::BINARY)
|
17
34
|
|
18
|
-
|
35
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
36
|
+
40.times do |i|
|
37
|
+
zip.write_deflated_file('war-and-peace-%d.txt' % i) { |sink| sink << tolstoy }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
zipfile.rewind
|
41
|
+
|
42
|
+
read_monitor = ReadMonitor.new(zipfile)
|
43
|
+
entries = described_class.read_zip_structure(io: read_monitor, read_local_headers: true)
|
44
|
+
expect(read_monitor.num_reads).to eq(44)
|
45
|
+
end
|
19
46
|
|
20
|
-
|
21
|
-
|
22
|
-
|
47
|
+
it 'performs local file header reads when `read_local_headers` is set to true' do
|
48
|
+
zipfile = StringIO.new
|
49
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
50
|
+
tolstoy.force_encoding(Encoding::BINARY)
|
23
51
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
52
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
53
|
+
40.times do |i|
|
54
|
+
zip.write_deflated_file('war-and-peace-%d.txt' % i) { |sink| sink << tolstoy }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
zipfile.rewind
|
58
|
+
|
59
|
+
read_monitor = ReadMonitor.new(zipfile)
|
60
|
+
entries = described_class.read_zip_structure(io: read_monitor, read_local_headers: true)
|
61
|
+
expect(read_monitor.num_reads).to eq(44)
|
28
62
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
63
|
+
expect(entries.length).to eq(40)
|
64
|
+
entry = entries.first
|
65
|
+
expect(entry).to be_known_offset
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'performs a limited number of reads when `read_local_headers` is set to false' do
|
69
|
+
zipfile = StringIO.new
|
70
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
71
|
+
tolstoy.force_encoding(Encoding::BINARY)
|
72
|
+
|
73
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
74
|
+
40.times do |i|
|
75
|
+
zip.write_deflated_file('war-and-peace-%d.txt' % i) { |sink| sink << tolstoy }
|
76
|
+
end
|
35
77
|
end
|
78
|
+
zipfile.rewind
|
79
|
+
read_monitor = ReadMonitor.new(zipfile)
|
80
|
+
|
81
|
+
entries = described_class.read_zip_structure(io: read_monitor, read_local_headers: false)
|
82
|
+
|
83
|
+
expect(read_monitor.num_reads).to eq(4)
|
84
|
+
expect(entries.length).to eq(40)
|
85
|
+
entry = entries.first
|
86
|
+
expect(entry).not_to be_known_offset
|
87
|
+
expect {
|
88
|
+
entry.compressed_data_offset
|
89
|
+
}.to raise_error(/read/)
|
36
90
|
end
|
37
91
|
|
38
|
-
|
39
|
-
|
92
|
+
it 'reads the file written stored with data descriptors' do
|
93
|
+
zipfile = StringIO.new
|
94
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
95
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
96
|
+
zip.write_stored_file('war-and-peace.txt') do |sink|
|
97
|
+
sink << tolstoy
|
98
|
+
end
|
99
|
+
end
|
40
100
|
|
41
|
-
|
101
|
+
entries = described_class.read_zip_structure(io: zipfile)
|
102
|
+
expect(entries.length).to eq(1)
|
42
103
|
|
43
|
-
|
44
|
-
|
45
|
-
|
104
|
+
entry = entries.first
|
105
|
+
|
106
|
+
readback = entry.extractor_from(zipfile).extract
|
107
|
+
expect(readback.bytesize).to eq(tolstoy.bytesize)
|
108
|
+
expect(readback[0..10]).to eq(tolstoy[0..10])
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe '#get_compressed_data_offset' do
|
113
|
+
it 'reads the offset for an entry having Zip64 extra fields' do
|
114
|
+
w = ZipTricks::ZipWriter.new
|
115
|
+
out = StringIO.new
|
116
|
+
out << Random.new.bytes(7656177)
|
117
|
+
w.write_local_file_header(io: out, filename: 'some file',
|
118
|
+
compressed_size: 0xFFFFFFFF + 5, uncompressed_size: 0xFFFFFFFFF, crc32: 123, gp_flags: 4,
|
119
|
+
mtime: Time.now, storage_mode: 8)
|
120
|
+
|
121
|
+
out.rewind
|
122
|
+
|
123
|
+
compressed_data_offset = subject.get_compressed_data_offset(io: out, local_file_header_offset: 7656177)
|
124
|
+
expect(compressed_data_offset).to eq(7656236)
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'reads the offset for an entry having a long name' do
|
128
|
+
w = ZipTricks::ZipWriter.new
|
129
|
+
out = StringIO.new
|
130
|
+
out << Random.new.bytes(7)
|
131
|
+
w.write_local_file_header(io: out, filename: 'This is a file with a ridiculously long name.doc',
|
132
|
+
compressed_size: 10, uncompressed_size: 15, crc32: 123, gp_flags: 4,
|
133
|
+
mtime: Time.now, storage_mode: 8)
|
134
|
+
|
135
|
+
out.rewind
|
136
|
+
|
137
|
+
compressed_data_offset = subject.get_compressed_data_offset(io: out, local_file_header_offset: 7)
|
138
|
+
expect(compressed_data_offset).to eq(85)
|
139
|
+
end
|
46
140
|
end
|
47
141
|
|
48
142
|
it 'is able to latch to the EOCD location even if the signature for the EOCD record appears all over the ZIP' do
|
@@ -63,7 +157,7 @@ describe ZipTricks::FileReader do
|
|
63
157
|
central_directory_size: z.tell - where, num_files_in_archive: 1, comment: evil_str)
|
64
158
|
|
65
159
|
z.rewind
|
66
|
-
entries = described_class.read_zip_structure(z)
|
160
|
+
entries = described_class.read_zip_structure(io: z)
|
67
161
|
expect(entries.length).to eq(1)
|
68
162
|
end
|
69
163
|
|
@@ -16,19 +16,10 @@ describe ZipTricks::RemoteUncap, webmock: true do
|
|
16
16
|
payload2 << Random.new.bytes(1024 * 1024 * 3)
|
17
17
|
payload2.flush; payload2.rewind
|
18
18
|
|
19
|
-
payload1_crc = Zlib.crc32(payload1.read).tap { payload1.rewind }
|
20
|
-
payload2_crc = Zlib.crc32(payload2.read).tap { payload2.rewind }
|
21
|
-
|
22
19
|
File.open('temp.zip', 'wb') do |f|
|
23
20
|
ZipTricks::Streamer.open(f) do | zip |
|
24
|
-
zip.
|
25
|
-
|
26
|
-
zip << blob
|
27
|
-
end
|
28
|
-
zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
|
29
|
-
while blob = payload2.read(1024 * 5)
|
30
|
-
zip << blob
|
31
|
-
end
|
21
|
+
zip.write_stored_file('first-file.bin') { |w| IO.copy_stream(payload1, w) }
|
22
|
+
zip.write_stored_file('second-file.bin') { |w| IO.copy_stream(payload2, w) }
|
32
23
|
end
|
33
24
|
end
|
34
25
|
payload1.rewind; payload2.rewind
|
@@ -53,17 +44,17 @@ describe ZipTricks::RemoteUncap, webmock: true do
|
|
53
44
|
|
54
45
|
first, second = *files
|
55
46
|
|
56
|
-
expect(first.
|
57
|
-
expect(first.
|
47
|
+
expect(first.filename).to eq('first-file.bin')
|
48
|
+
expect(first.uncompressed_size).to eq(payload1.size)
|
58
49
|
File.open('temp.zip', 'rb') do |readback|
|
59
|
-
readback.seek(first.
|
50
|
+
readback.seek(first.compressed_data_offset, IO::SEEK_SET)
|
60
51
|
expect(readback.read(12)).to eq(payload1.read(12))
|
61
52
|
end
|
62
53
|
|
63
|
-
expect(second.
|
64
|
-
expect(second.
|
54
|
+
expect(second.filename).to eq('second-file.bin')
|
55
|
+
expect(second.uncompressed_size).to eq(payload2.size)
|
65
56
|
File.open('temp.zip', 'rb') do |readback|
|
66
|
-
readback.seek(second.
|
57
|
+
readback.seek(second.compressed_data_offset, IO::SEEK_SET)
|
67
58
|
expect(readback.read(12)).to eq(payload2.read(12))
|
68
59
|
end
|
69
60
|
end
|
@@ -79,50 +70,31 @@ describe ZipTricks::RemoteUncap, webmock: true do
|
|
79
70
|
payload1_crc = Zlib.crc32(payload1.read).tap { payload1.rewind }
|
80
71
|
payload2_crc = Zlib.crc32(payload2.read).tap { payload2.rewind }
|
81
72
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
|
87
|
-
while blob = payload2.read(1024 * 5)
|
88
|
-
zip << blob
|
89
|
-
end
|
90
|
-
end
|
73
|
+
readable_zip = Tempfile.new 'somezip'
|
74
|
+
ZipTricks::Streamer.open(readable_zip) do | zip |
|
75
|
+
zip.add_stored_entry(filename: 'first-file-zero-size.bin', size: payload1.size, crc32: payload1_crc)
|
76
|
+
zip.write_stored_file('second-file.bin') {|w| IO.copy_stream(payload2, w) }
|
91
77
|
end
|
92
|
-
|
93
|
-
|
94
|
-
expect(File).to be_exist('temp.zip')
|
78
|
+
readable_zip.flush; readable_zip.rewind
|
95
79
|
|
96
80
|
allow_any_instance_of(described_class).to receive(:request_object_size) {
|
97
|
-
|
81
|
+
readable_zip.size
|
98
82
|
}
|
99
83
|
allow_any_instance_of(described_class).to receive(:request_range) {|_instance, range|
|
100
|
-
|
101
|
-
|
102
|
-
f.read(range.end - range.begin + 1)
|
103
|
-
end
|
84
|
+
readable_zip.seek(range.begin, IO::SEEK_SET)
|
85
|
+
readable_zip.read(range.end - range.begin + 1)
|
104
86
|
}
|
105
87
|
|
106
88
|
payload1.rewind; payload2.rewind
|
107
89
|
|
108
|
-
|
109
|
-
expect(files).to be_kind_of(Array)
|
110
|
-
expect(files.length).to eq(2)
|
111
|
-
|
112
|
-
first, second = *files
|
113
|
-
|
114
|
-
expect(first.name).to eq('first-file.bin')
|
115
|
-
expect(first.size_uncompressed).to eq(payload1.size)
|
116
|
-
File.open('temp.zip', 'rb') do |readback|
|
117
|
-
readback.seek(first.starts_at_offset, IO::SEEK_SET)
|
118
|
-
expect(readback.read(0)).to eq(payload1.read(0))
|
119
|
-
end
|
90
|
+
first, second = described_class.files_within_zip_at('http://fake.example.com')
|
120
91
|
|
121
|
-
expect(
|
122
|
-
expect(
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
92
|
+
expect(first.filename).to eq('first-file-zero-size.bin')
|
93
|
+
expect(first.compressed_size).to be_zero
|
94
|
+
|
95
|
+
expect(second.filename).to eq('second-file.bin')
|
96
|
+
expect(second.uncompressed_size).to eq(payload2.size)
|
97
|
+
readable_zip.seek(second.compressed_data_offset, IO::SEEK_SET)
|
98
|
+
expect(readable_zip.read(12)).to eq(payload2.read(12))
|
127
99
|
end
|
128
100
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
|
3
|
+
describe ZipTricks::Streamer::Writable do
|
4
|
+
describe '#<<' do
|
5
|
+
it 'writes the given data to the destination and returns self' do
|
6
|
+
buf = StringIO.new
|
7
|
+
subject = described_class.new(buf)
|
8
|
+
|
9
|
+
result = subject << 'hello!'
|
10
|
+
|
11
|
+
expect(buf.string).to eq('hello!')
|
12
|
+
expect(result).to eq(subject)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe '#write' do
|
17
|
+
it 'writes the given data to the destination and returns the number of bytes written' do
|
18
|
+
buf = StringIO.new
|
19
|
+
subject = described_class.new(buf)
|
20
|
+
|
21
|
+
result = subject.write('hello!')
|
22
|
+
|
23
|
+
expect(buf.string).to eq('hello!')
|
24
|
+
expect(result).to eq(6)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/zip_tricks.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: zip_tricks
|
5
|
+
# stub: zip_tricks 4.0.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "zip_tricks"
|
9
|
-
s.version = "
|
9
|
+
s.version = "4.0.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Julik Tarkhanov"]
|
14
|
-
s.date = "2016-08-
|
14
|
+
s.date = "2016-08-19"
|
15
15
|
s.description = "Makes rubyzip stream, for real"
|
16
16
|
s.email = "me@julik.nl"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -36,6 +36,8 @@ Gem::Specification.new do |s|
|
|
36
36
|
"lib/zip_tricks/block_deflate.rb",
|
37
37
|
"lib/zip_tricks/block_write.rb",
|
38
38
|
"lib/zip_tricks/file_reader.rb",
|
39
|
+
"lib/zip_tricks/file_reader/inflating_reader.rb",
|
40
|
+
"lib/zip_tricks/file_reader/stored_reader.rb",
|
39
41
|
"lib/zip_tricks/null_writer.rb",
|
40
42
|
"lib/zip_tricks/rack_body.rb",
|
41
43
|
"lib/zip_tricks/remote_io.rb",
|
@@ -59,6 +61,7 @@ Gem::Specification.new do |s|
|
|
59
61
|
"spec/zip_tricks/remote_uncap_spec.rb",
|
60
62
|
"spec/zip_tricks/size_estimator_spec.rb",
|
61
63
|
"spec/zip_tricks/stream_crc32_spec.rb",
|
64
|
+
"spec/zip_tricks/streamer/writable_spec.rb",
|
62
65
|
"spec/zip_tricks/streamer_spec.rb",
|
63
66
|
"spec/zip_tricks/war-and-peace.txt",
|
64
67
|
"spec/zip_tricks/write_and_tell_spec.rb",
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zip_tricks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julik Tarkhanov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-08-
|
11
|
+
date: 2016-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -187,6 +187,8 @@ files:
|
|
187
187
|
- lib/zip_tricks/block_deflate.rb
|
188
188
|
- lib/zip_tricks/block_write.rb
|
189
189
|
- lib/zip_tricks/file_reader.rb
|
190
|
+
- lib/zip_tricks/file_reader/inflating_reader.rb
|
191
|
+
- lib/zip_tricks/file_reader/stored_reader.rb
|
190
192
|
- lib/zip_tricks/null_writer.rb
|
191
193
|
- lib/zip_tricks/rack_body.rb
|
192
194
|
- lib/zip_tricks/remote_io.rb
|
@@ -210,6 +212,7 @@ files:
|
|
210
212
|
- spec/zip_tricks/remote_uncap_spec.rb
|
211
213
|
- spec/zip_tricks/size_estimator_spec.rb
|
212
214
|
- spec/zip_tricks/stream_crc32_spec.rb
|
215
|
+
- spec/zip_tricks/streamer/writable_spec.rb
|
213
216
|
- spec/zip_tricks/streamer_spec.rb
|
214
217
|
- spec/zip_tricks/war-and-peace.txt
|
215
218
|
- spec/zip_tricks/write_and_tell_spec.rb
|