zip_tricks 4.4.2 → 4.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.codeclimate.yml +7 -0
- data/.gitignore +6 -0
- data/.rubocop.yml +79 -0
- data/.rubocop_todo.yml +43 -0
- data/.travis.yml +3 -1
- data/CHANGELOG.md +9 -0
- data/Rakefile +7 -4
- data/examples/archive_size_estimate.rb +8 -6
- data/examples/config.ru +3 -1
- data/examples/parallel_compression_with_block_deflate.rb +31 -20
- data/examples/rack_application.rb +25 -17
- data/lib/zip_tricks.rb +4 -2
- data/lib/zip_tricks/block_deflate.rb +43 -25
- data/lib/zip_tricks/block_write.rb +20 -10
- data/lib/zip_tricks/file_reader.rb +241 -145
- data/lib/zip_tricks/file_reader/inflating_reader.rb +4 -1
- data/lib/zip_tricks/file_reader/stored_reader.rb +4 -1
- data/lib/zip_tricks/null_writer.rb +5 -5
- data/lib/zip_tricks/rack_body.rb +7 -4
- data/lib/zip_tricks/rails_streaming.rb +3 -1
- data/lib/zip_tricks/remote_io.rb +9 -5
- data/lib/zip_tricks/remote_uncap.rb +10 -5
- data/lib/zip_tricks/size_estimator.rb +39 -27
- data/lib/zip_tricks/stream_crc32.rb +2 -0
- data/lib/zip_tricks/streamer.rb +254 -98
- data/lib/zip_tricks/streamer/deflated_writer.rb +6 -9
- data/lib/zip_tricks/streamer/entry.rb +11 -3
- data/lib/zip_tricks/streamer/stored_writer.rb +5 -7
- data/lib/zip_tricks/streamer/writable.rb +30 -7
- data/lib/zip_tricks/version.rb +3 -1
- data/lib/zip_tricks/write_and_tell.rb +2 -0
- data/lib/zip_tricks/zip_writer.rb +54 -44
- data/testing/generate_test_files.rb +68 -38
- data/testing/support.rb +21 -16
- data/testing/test-report.txt +28 -0
- data/zip_tricks.gemspec +24 -22
- metadata +23 -5
@@ -1,5 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Stashes a block given by the Rack webserver when calling each() on a body, and calls
|
2
4
|
# that block every time it is written to using :<< (shovel). Poses as an IO for rubyzip.
|
5
|
+
|
3
6
|
class ZipTricks::BlockWrite
|
4
7
|
# The block is the block given to each() of the Rack body, or other block you want
|
5
8
|
# to receive the string chunks written by the zip compressor.
|
@@ -8,26 +11,33 @@ class ZipTricks::BlockWrite
|
|
8
11
|
end
|
9
12
|
|
10
13
|
# Make sure those methods raise outright
|
11
|
-
[
|
12
|
-
define_method(m) do |*
|
14
|
+
%i[seek pos= to_s].each do |m|
|
15
|
+
define_method(m) do |*_args|
|
13
16
|
raise "#{m} not supported - this IO adapter is non-rewindable"
|
14
17
|
end
|
15
18
|
end
|
16
19
|
|
17
|
-
# Every time this object gets written to, call the Rack body each() block
|
20
|
+
# Every time this object gets written to, call the Rack body each() block
|
21
|
+
# with the bytes given instead.
|
18
22
|
def <<(buf)
|
19
23
|
return if buf.nil?
|
20
24
|
|
21
25
|
# Ensure we ALWAYS write in binary encoding.
|
22
|
-
encoded =
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
26
|
+
encoded =
|
27
|
+
if buf.encoding != Encoding::BINARY
|
28
|
+
# If we got a frozen string we can't force_encoding on it
|
29
|
+
begin
|
30
|
+
buf.force_encoding(Encoding::BINARY)
|
31
|
+
rescue
|
32
|
+
buf.dup.force_encoding(Encoding::BINARY)
|
33
|
+
end
|
34
|
+
else
|
35
|
+
buf
|
36
|
+
end
|
28
37
|
|
29
38
|
# buf.dup.force_encoding(Encoding::BINARY)
|
30
|
-
|
39
|
+
# Zero-size output has a special meaning when using chunked encoding
|
40
|
+
return if encoded.bytesize.zero?
|
31
41
|
|
32
42
|
@block.call(encoded)
|
33
43
|
self
|
@@ -1,3 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# rubocop:disable Naming/ConstantName
|
4
|
+
|
1
5
|
require 'stringio'
|
2
6
|
|
3
7
|
# A very barebones ZIP file reader. Is made for maximum interoperability, but at the same
|
@@ -41,20 +45,25 @@ require 'stringio'
|
|
41
45
|
#
|
42
46
|
# ## Mode of operation
|
43
47
|
#
|
44
|
-
# By default, `FileReader` _ignores_ the data in local file headers (as it is
|
45
|
-
# It reads the ZIP file "from the tail", finds the
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
48
|
+
# By default, `FileReader` _ignores_ the data in local file headers (as it is
|
49
|
+
# often unreliable). It reads the ZIP file "from the tail", finds the
|
50
|
+
# end-of-central-directory signatures, then reads the central directory entries,
|
51
|
+
# reconstitutes the entries with their filenames, attributes and so on, and
|
52
|
+
# sets these entries up with the absolute _offsets_ into the source file/IO object.
|
53
|
+
# These offsets can then be used to extract the actual compressed data of
|
54
|
+
# the files and to expand it.
|
49
55
|
#
|
50
56
|
# ## Recovering damaged or incomplete ZIP files
|
51
57
|
#
|
52
|
-
# If the ZIP file you are trying to read does not contain the central directory
|
53
|
-
# will not work, since it starts the read process
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
58
|
+
# If the ZIP file you are trying to read does not contain the central directory
|
59
|
+
# records `read_zip_structure` will not work, since it starts the read process
|
60
|
+
# from the EOCD marker at the end of the central directory and then crawls
|
61
|
+
# "back" in the IO to figure out the rest. You can explicitly apply a fallback
|
62
|
+
# for reading the archive "straight ahead" instead using `read_zip_straight_ahead`
|
63
|
+
# - the method will instead scan your IO from the very start, skipping over
|
64
|
+
# the actual entry data. This is less efficient than central directory parsing since
|
57
65
|
# it involves a much larger number of reads (1 read from the IO per entry in the ZIP).
|
66
|
+
|
58
67
|
class ZipTricks::FileReader
|
59
68
|
require_relative 'file_reader/stored_reader'
|
60
69
|
require_relative 'file_reader/inflating_reader'
|
@@ -64,17 +73,17 @@ class ZipTricks::FileReader
|
|
64
73
|
InvalidStructure = Class.new(ReadError)
|
65
74
|
LocalHeaderPending = Class.new(StandardError) do
|
66
75
|
def message
|
67
|
-
|
76
|
+
'The compressed data offset is not available (local header has not been read)'
|
68
77
|
end
|
69
78
|
end
|
70
79
|
MissingEOCD = Class.new(StandardError) do
|
71
80
|
def message
|
72
|
-
|
81
|
+
'Could not find the EOCD signature in the buffer - maybe a malformed ZIP file'
|
73
82
|
end
|
74
83
|
end
|
75
|
-
|
84
|
+
|
76
85
|
private_constant :StoredReader, :InflatingReader
|
77
|
-
|
86
|
+
|
78
87
|
# Represents a file within the ZIP archive being read
|
79
88
|
class ZipEntry
|
80
89
|
# @return [Fixnum] bit-packed version signature of the program that made the archive
|
@@ -137,28 +146,29 @@ class ZipTricks::FileReader
|
|
137
146
|
when 0
|
138
147
|
StoredReader.new(from_io, compressed_size)
|
139
148
|
else
|
140
|
-
raise UnsupportedFeature,
|
149
|
+
raise UnsupportedFeature, format('Unsupported storage mode for reading - %d',
|
150
|
+
storage_mode)
|
141
151
|
end
|
142
152
|
end
|
143
|
-
|
153
|
+
|
144
154
|
# @return [Fixnum] at what offset you should start reading
|
145
155
|
# for the compressed data in your original IO object
|
146
156
|
def compressed_data_offset
|
147
|
-
@compressed_data_offset
|
157
|
+
@compressed_data_offset || raise(LocalHeaderPending)
|
148
158
|
end
|
149
|
-
|
159
|
+
|
150
160
|
# Tells whether the compressed data offset is already known for this entry
|
151
161
|
# @return [Boolean]
|
152
162
|
def known_offset?
|
153
163
|
!@compressed_data_offset.nil?
|
154
164
|
end
|
155
|
-
|
165
|
+
|
156
166
|
# Tells whether the entry uses a data descriptor (this is defined
|
157
167
|
# by bit 3 in the GP flags).
|
158
168
|
def uses_data_descriptor?
|
159
169
|
(gp_flags & 0x0008) == 0x0008
|
160
170
|
end
|
161
|
-
|
171
|
+
|
162
172
|
# Sets the offset at which the compressed data for this file starts in the ZIP.
|
163
173
|
# By default, the value will be set by the Reader for you. If you use delayed
|
164
174
|
# reading, you need to set it by using the `get_compressed_data_offset` on the Reader:
|
@@ -191,12 +201,14 @@ class ZipTricks::FileReader
|
|
191
201
|
eocd_offset = get_eocd_offset(io, zip_file_size)
|
192
202
|
|
193
203
|
zip64_end_of_cdir_location = get_zip64_eocd_location(io, eocd_offset)
|
194
|
-
num_files, cdir_location,
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
204
|
+
num_files, cdir_location, _cdir_size =
|
205
|
+
if zip64_end_of_cdir_location
|
206
|
+
num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
|
207
|
+
else
|
208
|
+
num_files_and_central_directory_offset(io, eocd_offset)
|
209
|
+
end
|
210
|
+
|
211
|
+
log { format('Located the central directory start at %d', cdir_location) }
|
200
212
|
seek(io, cdir_location)
|
201
213
|
|
202
214
|
# Read the entire central directory AND anything behind it, in one fell swoop.
|
@@ -214,55 +226,77 @@ class ZipTricks::FileReader
|
|
214
226
|
# the central directory size alltogether.
|
215
227
|
central_directory_str = io.read # and not read_n(io, cdir_size), see above
|
216
228
|
central_directory_io = StringIO.new(central_directory_str)
|
217
|
-
log
|
229
|
+
log do
|
230
|
+
format('Read %d bytes with central directory + EOCD record and locator',
|
231
|
+
central_directory_str.bytesize)
|
232
|
+
end
|
218
233
|
|
219
234
|
entries = (0...num_files).map do |entry_n|
|
220
|
-
|
235
|
+
offset_location = cdir_location + central_directory_io.tell
|
236
|
+
log do
|
237
|
+
format('Reading the central directory entry %d starting at offset %d',
|
238
|
+
entry_n, offset_location)
|
239
|
+
end
|
221
240
|
read_cdir_entry(central_directory_io)
|
222
241
|
end
|
223
|
-
|
242
|
+
|
224
243
|
read_local_headers(entries, io) if read_local_headers
|
225
|
-
|
244
|
+
|
226
245
|
entries
|
227
246
|
end
|
228
247
|
|
229
|
-
# Sometimes you might encounter truncated ZIP files, which do not contain
|
230
|
-
# whatsoever - or where the central directory is
|
231
|
-
#
|
232
|
-
#
|
233
|
-
#
|
234
|
-
#
|
235
|
-
#
|
248
|
+
# Sometimes you might encounter truncated ZIP files, which do not contain
|
249
|
+
# any central directory whatsoever - or where the central directory is
|
250
|
+
# truncated. In that case, employing the technique of reading the ZIP
|
251
|
+
# "from the end" is impossible, and the only recourse is reading each
|
252
|
+
# local file header in sucession. If the entries in such a ZIP use data
|
253
|
+
# descriptors, you would need to scan after the entry until you encounter
|
254
|
+
# the data descriptor signature - and that might be unreliable at best.
|
255
|
+
# Therefore, this reading technique does not support data descriptors.
|
256
|
+
# It can however recover the entries you still can read if these entries
|
257
|
+
# contain all the necessary information about the contained file.
|
236
258
|
#
|
237
|
-
# @param io[#tell, #read, #seek] the IO-ish object to read the local file
|
238
|
-
# @return [Array<ZipEntry>] an array of entries that could be
|
259
|
+
# @param io[#tell, #read, #seek] the IO-ish object to read the local file
|
260
|
+
# headers from @return [Array<ZipEntry>] an array of entries that could be
|
261
|
+
# recovered before hitting EOF
|
239
262
|
def read_zip_straight_ahead(io:)
|
240
263
|
entries = []
|
241
264
|
loop do
|
242
265
|
cur_offset = io.tell
|
243
266
|
entry = read_local_file_header(io: io)
|
244
267
|
if entry.uses_data_descriptor?
|
245
|
-
raise UnsupportedFeature, "The local file header at #{cur_offset} uses
|
268
|
+
raise UnsupportedFeature, "The local file header at #{cur_offset} uses \
|
269
|
+
a data descriptor and the start of next entry \
|
270
|
+
cannot be found"
|
246
271
|
end
|
247
272
|
entries << entry
|
248
273
|
next_local_header_offset = entry.compressed_data_offset + entry.compressed_size
|
249
|
-
log
|
274
|
+
log do
|
275
|
+
format('Recovered a local file file header at offset %d, seeking to the next at %d',
|
276
|
+
cur_offset, next_local_header_offset)
|
277
|
+
end
|
250
278
|
seek(io, next_local_header_offset) # Seek to the next entry, and raise if seek is impossible
|
251
279
|
end
|
252
280
|
entries
|
253
281
|
rescue ReadError
|
254
|
-
log
|
282
|
+
log do
|
283
|
+
format('Got a read/seek error after reaching %d, no more entries can be recovered',
|
284
|
+
cur_offset)
|
285
|
+
end
|
255
286
|
entries
|
256
287
|
end
|
257
|
-
|
258
|
-
# Parse the local header entry and get the offset in the IO at which the
|
259
|
-
# file starts within the ZIP.
|
260
|
-
# The method will eager-read the entire local header for the file
|
261
|
-
#
|
262
|
-
#
|
288
|
+
|
289
|
+
# Parse the local header entry and get the offset in the IO at which the
|
290
|
+
# actual compressed data of the file starts within the ZIP.
|
291
|
+
# The method will eager-read the entire local header for the file
|
292
|
+
# (the maximum size the local header may use), starting at the given offset,
|
293
|
+
# and will then compute its size. That size plus the local header offset
|
294
|
+
# given will be the compressed data offset of the entry (read starting at
|
295
|
+
# this offset to get the data).
|
263
296
|
#
|
264
297
|
# @param io[#read] an IO-ish object the ZIP file can be read from
|
265
|
-
# @return [Array<ZipEntry, Fixnum>] the parsed local header entry and
|
298
|
+
# @return [Array<ZipEntry, Fixnum>] the parsed local header entry and
|
299
|
+
# the compressed data offset
|
266
300
|
def read_local_file_header(io:)
|
267
301
|
local_file_header_offset = io.tell
|
268
302
|
|
@@ -270,7 +304,7 @@ class ZipTricks::FileReader
|
|
270
304
|
# including any headroom for extra fields etc.
|
271
305
|
local_file_header_str_plus_headroom = io.read(MAX_LOCAL_HEADER_SIZE)
|
272
306
|
raise ReadError if local_file_header_str_plus_headroom.nil? # reached EOF
|
273
|
-
|
307
|
+
|
274
308
|
io_starting_at_local_header = StringIO.new(local_file_header_str_plus_headroom)
|
275
309
|
|
276
310
|
assert_signature(io_starting_at_local_header, 0x04034b50)
|
@@ -288,19 +322,22 @@ class ZipTricks::FileReader
|
|
288
322
|
extra_size = read_2b(io_starting_at_local_header)
|
289
323
|
e.filename = read_n(io_starting_at_local_header, filename_size)
|
290
324
|
extra_fields_str = read_n(io_starting_at_local_header, extra_size)
|
291
|
-
|
325
|
+
|
292
326
|
# Parse out the extra fields
|
293
327
|
extra_table = parse_out_extra_fields(extra_fields_str)
|
294
|
-
|
328
|
+
|
295
329
|
# ...of which we really only need the Zip64 extra
|
296
330
|
if zip64_extra_contents = extra_table[1]
|
297
331
|
# If the Zip64 extra is present, we let it override all
|
298
332
|
# the values fetched from the conventional header
|
299
333
|
zip64_extra = StringIO.new(zip64_extra_contents)
|
300
|
-
log
|
334
|
+
log do
|
335
|
+
format('Will read Zip64 extra data from local header field for %s, %d bytes',
|
336
|
+
e.filename, zip64_extra.size)
|
337
|
+
end
|
301
338
|
# Now here be dragons. The APPNOTE specifies that
|
302
339
|
#
|
303
|
-
# > The order of the fields in the ZIP64 extended
|
340
|
+
# > The order of the fields in the ZIP64 extended
|
304
341
|
# > information record is fixed, but the fields will
|
305
342
|
# > only appear if the corresponding Local or Central
|
306
343
|
# > directory record field is set to 0xFFFF or 0xFFFFFFFF.
|
@@ -317,14 +354,17 @@ class ZipTricks::FileReader
|
|
317
354
|
e
|
318
355
|
end
|
319
356
|
|
320
|
-
# Get the offset in the IO at which the actual compressed data of the file
|
321
|
-
# The method will eager-read the entire local header
|
322
|
-
#
|
323
|
-
# given
|
357
|
+
# Get the offset in the IO at which the actual compressed data of the file
|
358
|
+
# starts within the ZIP. The method will eager-read the entire local header
|
359
|
+
# for the file (the maximum size the local header may use), starting at the
|
360
|
+
# given offset, and will then compute its size. That size plus the local
|
361
|
+
# header offset given will be the compressed data offset of the entry
|
362
|
+
# (read starting at this offset to get the data).
|
324
363
|
#
|
325
364
|
# @param io[#seek, #read] an IO-ish object the ZIP file can be read from
|
326
|
-
# @param local_header_offset[Fixnum] absolute offset (0-based) where the
|
327
|
-
# @return [Fixnum] absolute offset
|
365
|
+
# @param local_header_offset[Fixnum] absolute offset (0-based) where the
|
366
|
+
# local file header is supposed to begin @return [Fixnum] absolute offset
|
367
|
+
# (0-based) of where the compressed data begins for this file within the ZIP
|
328
368
|
def get_compressed_data_offset(io:, local_file_header_offset:)
|
329
369
|
seek(io, local_file_header_offset)
|
330
370
|
entry_recovered_from_local_file_header = read_local_file_header(io: io)
|
@@ -350,17 +390,21 @@ class ZipTricks::FileReader
|
|
350
390
|
def self.read_zip_straight_ahead(**options)
|
351
391
|
new.read_zip_straight_ahead(**options)
|
352
392
|
end
|
353
|
-
|
393
|
+
|
354
394
|
private
|
355
395
|
|
356
396
|
def read_local_headers(entries, io)
|
357
397
|
entries.each_with_index do |entry, i|
|
358
|
-
log
|
359
|
-
|
398
|
+
log do
|
399
|
+
format('Reading the local header for entry %d at offset %d',
|
400
|
+
i, entry.local_file_header_offset)
|
401
|
+
end
|
402
|
+
off = get_compressed_data_offset(io: io,
|
403
|
+
local_file_header_offset: entry.local_file_header_offset)
|
360
404
|
entry.compressed_data_offset = off
|
361
405
|
end
|
362
406
|
end
|
363
|
-
|
407
|
+
|
364
408
|
def skip_ahead_2(io)
|
365
409
|
skip_ahead_n(io, 2)
|
366
410
|
end
|
@@ -375,13 +419,17 @@ class ZipTricks::FileReader
|
|
375
419
|
|
376
420
|
def seek(io, absolute_pos)
|
377
421
|
io.seek(absolute_pos, IO::SEEK_SET)
|
378
|
-
|
422
|
+
unless absolute_pos == io.tell
|
423
|
+
raise ReadError,
|
424
|
+
"Expected to seek to #{absolute_pos} but only \
|
425
|
+
got to #{io.tell}"
|
426
|
+
end
|
379
427
|
nil
|
380
428
|
end
|
381
429
|
|
382
430
|
def assert_signature(io, signature_magic_number)
|
383
|
-
packed = [signature_magic_number].pack(C_V)
|
384
431
|
readback = read_4b(io)
|
432
|
+
# Rubocop: Use a guard clause instead of wrapping the code inside a conditional expression
|
385
433
|
if readback != signature_magic_number
|
386
434
|
expected = '0x0' + signature_magic_number.to_s(16)
|
387
435
|
actual = '0x0' + readback.to_s(16)
|
@@ -394,15 +442,21 @@ class ZipTricks::FileReader
|
|
394
442
|
io.seek(io.tell + n, IO::SEEK_SET)
|
395
443
|
pos_after = io.tell
|
396
444
|
delta = pos_after - pos_before
|
397
|
-
|
445
|
+
unless delta == n
|
446
|
+
raise ReadError, "Expected to seek #{n} bytes ahead, but could \
|
447
|
+
only seek #{delta} bytes ahead"
|
448
|
+
end
|
398
449
|
nil
|
399
450
|
end
|
400
451
|
|
401
452
|
def read_n(io, n_bytes)
|
402
|
-
io.read(n_bytes).tap
|
453
|
+
io.read(n_bytes).tap do |d|
|
403
454
|
raise ReadError, "Expected to read #{n_bytes} bytes, but the IO was at the end" if d.nil?
|
404
|
-
|
405
|
-
|
455
|
+
unless d.bytesize == n_bytes
|
456
|
+
raise ReadError, "Expected to read #{n_bytes} bytes, \
|
457
|
+
read #{d.bytesize}"
|
458
|
+
end
|
459
|
+
end
|
406
460
|
end
|
407
461
|
|
408
462
|
def read_2b(io)
|
@@ -418,8 +472,12 @@ class ZipTricks::FileReader
|
|
418
472
|
end
|
419
473
|
|
420
474
|
def read_cdir_entry(io)
|
475
|
+
# Rubocop: convention: Assignment Branch Condition size for
|
476
|
+
# read_cdir_entry is too high. [45.66/15]
|
477
|
+
# Rubocop: convention: Method has too many lines. [30/10]
|
421
478
|
assert_signature(io, 0x02014b50)
|
422
479
|
ZipEntry.new.tap do |e|
|
480
|
+
# Rubocop: convention: Block has too many lines. [27/25]
|
423
481
|
e.made_by = read_2b(io)
|
424
482
|
e.version_needed_to_extract = read_2b(io)
|
425
483
|
e.gp_flags = read_2b(io)
|
@@ -447,24 +505,35 @@ class ZipTricks::FileReader
|
|
447
505
|
extra_table = parse_out_extra_fields(extras)
|
448
506
|
|
449
507
|
# ...of which we really only need the Zip64 extra
|
450
|
-
if zip64_extra_contents
|
508
|
+
if zip64_extra_contents ||= extra_table[1]
|
451
509
|
# If the Zip64 extra is present, we let it override all
|
452
510
|
# the values fetched from the conventional header
|
453
511
|
zip64_extra = StringIO.new(zip64_extra_contents)
|
454
|
-
log
|
512
|
+
log do
|
513
|
+
format('Will read Zip64 extra data for %s, %d bytes',
|
514
|
+
e.filename, zip64_extra.size)
|
515
|
+
end
|
455
516
|
# Now here be dragons. The APPNOTE specifies that
|
456
517
|
#
|
457
|
-
# > The order of the fields in the ZIP64 extended
|
518
|
+
# > The order of the fields in the ZIP64 extended
|
458
519
|
# > information record is fixed, but the fields will
|
459
520
|
# > only appear if the corresponding Local or Central
|
460
521
|
# > directory record field is set to 0xFFFF or 0xFFFFFFFF.
|
461
522
|
#
|
462
523
|
# It means that before we read this stuff we need to check if the previously-read
|
463
524
|
# values are at overflow, and only _then_ proceed to read them. Bah.
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
525
|
+
# Rubocop: convention: Line is too long.
|
526
|
+
if e.uncompressed_size == 0xFFFFFFFF
|
527
|
+
e.uncompressed_size = read_8b(zip64_extra)
|
528
|
+
end
|
529
|
+
if e.compressed_size == 0xFFFFFFFF
|
530
|
+
e.compressed_size = read_8b(zip64_extra)
|
531
|
+
end
|
532
|
+
if e.local_file_header_offset == 0xFFFFFFFF
|
533
|
+
e.local_file_header_offset = read_8b(zip64_extra)
|
534
|
+
end
|
535
|
+
# Disk number comes last and we can skip it anyway, since we do
|
536
|
+
# not support multi-disk archives
|
468
537
|
end
|
469
538
|
end
|
470
539
|
end
|
@@ -480,25 +549,30 @@ class ZipTricks::FileReader
|
|
480
549
|
file_io.seek(implied_position_of_eocd_record, IO::SEEK_SET)
|
481
550
|
str_containing_eocd_record = file_io.read(MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE)
|
482
551
|
eocd_idx_in_buf = locate_eocd_signature(str_containing_eocd_record)
|
483
|
-
|
552
|
+
|
484
553
|
raise MissingEOCD unless eocd_idx_in_buf
|
485
|
-
|
554
|
+
|
486
555
|
eocd_offset = implied_position_of_eocd_record + eocd_idx_in_buf
|
487
|
-
log { 'Found EOCD signature at offset %d'
|
488
|
-
|
556
|
+
log { format('Found EOCD signature at offset %d', eocd_offset) }
|
557
|
+
|
489
558
|
eocd_offset
|
490
559
|
end
|
491
560
|
|
492
|
-
# This is tricky. Essentially, we have to scan the maximum possible number
|
493
|
-
# theoretically occupy including the comment),
|
494
|
-
#
|
495
|
-
#
|
561
|
+
# This is tricky. Essentially, we have to scan the maximum possible number
|
562
|
+
# of bytes (that the EOCD can theoretically occupy including the comment),
|
563
|
+
# and we have to find a combination of:
|
564
|
+
# [EOCD signature, <some ZIP medatata>, comment byte size, the comment of
|
565
|
+
# that size, eof].
|
566
|
+
# The only way I could find to do this was with a sliding window, but
|
567
|
+
# there probably is a better way.
|
568
|
+
# Rubocop: convention: Assignment Branch Condition size for
|
569
|
+
# locate_eocd_signature is too high. [17.49/15]
|
570
|
+
# Rubocop: convention: Method has too many lines. [14/10]
|
496
571
|
def locate_eocd_signature(in_str)
|
497
572
|
# We have to scan from the _very_ tail. We read the very minimum size
|
498
573
|
# the EOCD record can have (up to and including the comment size), using
|
499
574
|
# a sliding window. Once our end offset matches the comment size we found our
|
500
575
|
# EOCD marker.
|
501
|
-
eocd_signature_int = 0x06054b50
|
502
576
|
unpack_pattern = 'VvvvvVVv'
|
503
577
|
minimum_record_size = 22
|
504
578
|
end_location = minimum_record_size * -1
|
@@ -507,23 +581,26 @@ class ZipTricks::FileReader
|
|
507
581
|
# We use negative values because if we used positive slice indices
|
508
582
|
# we would have to detect the rollover ourselves
|
509
583
|
break unless window = in_str[end_location, minimum_record_size]
|
510
|
-
|
584
|
+
|
511
585
|
window_location = in_str.bytesize + end_location
|
512
586
|
unpacked = window.unpack(unpack_pattern)
|
513
587
|
# If we found the signarue, pick up the comment size, and check if the size of the window
|
514
588
|
# plus that comment size is where we are in the string. If we are - bingo.
|
515
|
-
if unpacked[0] == 0x06054b50 && comment_size = unpacked[-1]
|
589
|
+
if unpacked[0] == 0x06054b50 && comment_size = unpacked[-1]
|
516
590
|
assumed_eocd_location = in_str.bytesize - comment_size - minimum_record_size
|
517
591
|
# if the comment size is where we should be at - we found our EOCD
|
518
592
|
return assumed_eocd_location if assumed_eocd_location == window_location
|
519
593
|
end
|
520
|
-
|
594
|
+
|
521
595
|
end_location -= 1 # Shift the window back, by one byte, and try again.
|
522
596
|
end
|
523
597
|
end
|
524
|
-
|
598
|
+
|
525
599
|
# Find the Zip64 EOCD locator segment offset. Do this by seeking backwards from the
|
526
600
|
# EOCD record in the archive by fixed offsets
|
601
|
+
# Rubocop: convention: Assignment Branch Condition size for
|
602
|
+
# get_zip64_eocd_location is too high. [15.17/15]
|
603
|
+
# Rubocop: convention: Method has too many lines. [15/10]
|
527
604
|
def get_zip64_eocd_location(file_io, eocd_offset)
|
528
605
|
zip64_eocd_loc_offset = eocd_offset
|
529
606
|
zip64_eocd_loc_offset -= 4 # The signature
|
@@ -531,28 +608,34 @@ class ZipTricks::FileReader
|
|
531
608
|
zip64_eocd_loc_offset -= 8 # Offset of the zip64 central directory record
|
532
609
|
zip64_eocd_loc_offset -= 4 # Total number of disks
|
533
610
|
|
534
|
-
log
|
611
|
+
log do
|
612
|
+
format('Will look for the Zip64 EOCD locator signature at offset %d',
|
613
|
+
zip64_eocd_loc_offset)
|
614
|
+
end
|
535
615
|
|
536
616
|
# If the offset is negative there is certainly no Zip64 EOCD locator here
|
537
617
|
return unless zip64_eocd_loc_offset >= 0
|
538
618
|
|
539
619
|
file_io.seek(zip64_eocd_loc_offset, IO::SEEK_SET)
|
540
620
|
assert_signature(file_io, 0x07064b50)
|
541
|
-
|
542
|
-
log { 'Found Zip64 EOCD locator at offset %d'
|
621
|
+
|
622
|
+
log { format('Found Zip64 EOCD locator at offset %d', zip64_eocd_loc_offset) }
|
543
623
|
|
544
624
|
disk_num = read_4b(file_io) # number of the disk
|
545
|
-
raise UnsupportedFeature,
|
625
|
+
raise UnsupportedFeature, 'The archive spans multiple disks' if disk_num != 0
|
546
626
|
read_8b(file_io)
|
547
627
|
rescue ReadError
|
548
628
|
nil
|
549
629
|
end
|
550
630
|
|
631
|
+
# Rubocop: convention: Assignment Branch Condition size for
|
632
|
+
# num_files_and_central_directory_offset_zip64 is too high. [21.12/15]
|
633
|
+
# Rubocop: convention: Method has too many lines. [17/10]
|
551
634
|
def num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
|
552
635
|
seek(io, zip64_end_of_cdir_location)
|
553
|
-
|
636
|
+
|
554
637
|
assert_signature(io, 0x06064b50)
|
555
|
-
|
638
|
+
|
556
639
|
zip64_eocdr_size = read_8b(io)
|
557
640
|
zip64_eocdr = read_n(io, zip64_eocdr_size) # Reading in bulk is cheaper
|
558
641
|
zip64_eocdr = StringIO.new(zip64_eocdr)
|
@@ -561,13 +644,21 @@ class ZipTricks::FileReader
|
|
561
644
|
|
562
645
|
disk_n = read_4b(zip64_eocdr) # number of this disk
|
563
646
|
disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
|
564
|
-
|
647
|
+
if disk_n != disk_n_with_eocdr
|
648
|
+
raise UnsupportedFeature, 'The archive spans multiple disks'
|
649
|
+
end
|
565
650
|
|
566
651
|
num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
|
567
652
|
num_files_total = read_8b(zip64_eocdr) # files total in the central directory
|
568
|
-
raise UnsupportedFeature, "The archive spans multiple disks" if num_files_this_disk != num_files_total
|
569
653
|
|
570
|
-
|
654
|
+
if num_files_this_disk != num_files_total
|
655
|
+
raise UnsupportedFeature, 'The archive spans multiple disks'
|
656
|
+
end
|
657
|
+
|
658
|
+
log do
|
659
|
+
format('Zip64 EOCD record states there are %d files in the archive',
|
660
|
+
num_files_total)
|
661
|
+
end
|
571
662
|
|
572
663
|
central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
|
573
664
|
central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
|
@@ -575,58 +666,63 @@ class ZipTricks::FileReader
|
|
575
666
|
[num_files_total, central_dir_offset, central_dir_size]
|
576
667
|
end
|
577
668
|
|
578
|
-
C_V = 'V'
|
579
|
-
C_v = 'v'
|
580
|
-
C_Qe = 'Q<'
|
581
|
-
|
582
|
-
# To prevent too many tiny reads, read the maximum possible size of end of
|
583
|
-
# upfront (all the fixed fields + at most 0xFFFF
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
669
|
+
C_V = 'V'
|
670
|
+
C_v = 'v'
|
671
|
+
C_Qe = 'Q<'
|
672
|
+
|
673
|
+
# To prevent too many tiny reads, read the maximum possible size of end of
|
674
|
+
# central directory record upfront (all the fixed fields + at most 0xFFFF
|
675
|
+
# bytes of the archive comment)
|
676
|
+
MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE =
|
677
|
+
begin
|
678
|
+
4 + # Offset of the start of central directory
|
679
|
+
4 + # Size of the central directory
|
680
|
+
2 + # Number of files in the cdir
|
681
|
+
4 + # End-of-central-directory signature
|
682
|
+
2 + # Number of this disk
|
683
|
+
2 + # Number of disk with the start of cdir
|
684
|
+
2 + # Number of files in the cdir of this disk
|
685
|
+
2 + # The comment size
|
686
|
+
0xFFFF # Maximum comment size
|
687
|
+
end
|
595
688
|
|
596
689
|
# To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
|
597
690
|
# The maximum size is all the usual items, plus the maximum size
|
598
691
|
# of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
|
599
|
-
MAX_LOCAL_HEADER_SIZE =
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
692
|
+
MAX_LOCAL_HEADER_SIZE =
|
693
|
+
begin
|
694
|
+
4 + # signature
|
695
|
+
2 + # Version needed to extract
|
696
|
+
2 + # gp flags
|
697
|
+
2 + # storage mode
|
698
|
+
2 + # dos time
|
699
|
+
2 + # dos date
|
700
|
+
4 + # CRC32
|
701
|
+
4 + # Comp size
|
702
|
+
4 + # Uncomp size
|
703
|
+
2 + # Filename size
|
704
|
+
2 + # Extra fields size
|
705
|
+
0xFFFF + # Maximum filename size
|
706
|
+
0xFFFF # Maximum extra fields size
|
707
|
+
end
|
614
708
|
|
615
|
-
SIZE_OF_USABLE_EOCD_RECORD =
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
709
|
+
SIZE_OF_USABLE_EOCD_RECORD =
|
710
|
+
begin
|
711
|
+
4 + # Signature
|
712
|
+
2 + # Number of this disk
|
713
|
+
2 + # Number of the disk with the EOCD record
|
714
|
+
2 + # Number of entries in the central directory of this disk
|
715
|
+
2 + # Number of entries in the central directory total
|
716
|
+
4 + # Size of the central directory
|
717
|
+
4 # Start of the central directory offset
|
718
|
+
end
|
624
719
|
|
720
|
+
# Rubocop: convention: Method has too many lines. [11/10]
|
625
721
|
def num_files_and_central_directory_offset(file_io, eocd_offset)
|
626
722
|
seek(file_io, eocd_offset)
|
627
723
|
|
628
724
|
# The size of the EOCD record is known upfront, so use a strict read
|
629
|
-
eocd_record_str = read_n(file_io, SIZE_OF_USABLE_EOCD_RECORD)
|
725
|
+
eocd_record_str = read_n(file_io, SIZE_OF_USABLE_EOCD_RECORD)
|
630
726
|
io = StringIO.new(eocd_record_str)
|
631
727
|
|
632
728
|
assert_signature(io, 0x06054b50)
|
@@ -640,8 +736,8 @@ class ZipTricks::FileReader
|
|
640
736
|
end
|
641
737
|
|
642
738
|
private_constant :C_V, :C_v, :C_Qe, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
|
643
|
-
|
644
|
-
|
739
|
+
:MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
|
740
|
+
|
645
741
|
# Is provided as a stub to be overridden in a subclass if you need it. Will report
|
646
742
|
# during various stages of reading. The log message is contained in the return value
|
647
743
|
# of `yield` in the method (the log messages are lazy-evaluated).
|
@@ -649,11 +745,11 @@ class ZipTricks::FileReader
|
|
649
745
|
# The most minimal implementation for the method is just this:
|
650
746
|
# $stderr.puts(yield)
|
651
747
|
end
|
652
|
-
|
748
|
+
|
653
749
|
def parse_out_extra_fields(extra_fields_str)
|
654
750
|
extra_table = {}
|
655
751
|
extras_buf = StringIO.new(extra_fields_str)
|
656
|
-
until extras_buf.eof?
|
752
|
+
until extras_buf.eof?
|
657
753
|
extra_id = read_2b(extras_buf)
|
658
754
|
extra_size = read_2b(extras_buf)
|
659
755
|
extra_contents = read_n(extras_buf, extra_size)
|