zip_tricks 4.4.2 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.codeclimate.yml +7 -0
- data/.gitignore +6 -0
- data/.rubocop.yml +79 -0
- data/.rubocop_todo.yml +43 -0
- data/.travis.yml +3 -1
- data/CHANGELOG.md +9 -0
- data/Rakefile +7 -4
- data/examples/archive_size_estimate.rb +8 -6
- data/examples/config.ru +3 -1
- data/examples/parallel_compression_with_block_deflate.rb +31 -20
- data/examples/rack_application.rb +25 -17
- data/lib/zip_tricks.rb +4 -2
- data/lib/zip_tricks/block_deflate.rb +43 -25
- data/lib/zip_tricks/block_write.rb +20 -10
- data/lib/zip_tricks/file_reader.rb +241 -145
- data/lib/zip_tricks/file_reader/inflating_reader.rb +4 -1
- data/lib/zip_tricks/file_reader/stored_reader.rb +4 -1
- data/lib/zip_tricks/null_writer.rb +5 -5
- data/lib/zip_tricks/rack_body.rb +7 -4
- data/lib/zip_tricks/rails_streaming.rb +3 -1
- data/lib/zip_tricks/remote_io.rb +9 -5
- data/lib/zip_tricks/remote_uncap.rb +10 -5
- data/lib/zip_tricks/size_estimator.rb +39 -27
- data/lib/zip_tricks/stream_crc32.rb +2 -0
- data/lib/zip_tricks/streamer.rb +254 -98
- data/lib/zip_tricks/streamer/deflated_writer.rb +6 -9
- data/lib/zip_tricks/streamer/entry.rb +11 -3
- data/lib/zip_tricks/streamer/stored_writer.rb +5 -7
- data/lib/zip_tricks/streamer/writable.rb +30 -7
- data/lib/zip_tricks/version.rb +3 -1
- data/lib/zip_tricks/write_and_tell.rb +2 -0
- data/lib/zip_tricks/zip_writer.rb +54 -44
- data/testing/generate_test_files.rb +68 -38
- data/testing/support.rb +21 -16
- data/testing/test-report.txt +28 -0
- data/zip_tricks.gemspec +24 -22
- metadata +23 -5
@@ -1,5 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Stashes a block given by the Rack webserver when calling each() on a body, and calls
|
2
4
|
# that block every time it is written to using :<< (shovel). Poses as an IO for rubyzip.
|
5
|
+
|
3
6
|
class ZipTricks::BlockWrite
|
4
7
|
# The block is the block given to each() of the Rack body, or other block you want
|
5
8
|
# to receive the string chunks written by the zip compressor.
|
@@ -8,26 +11,33 @@ class ZipTricks::BlockWrite
|
|
8
11
|
end
|
9
12
|
|
10
13
|
# Make sure those methods raise outright
|
11
|
-
[
|
12
|
-
define_method(m) do |*
|
14
|
+
%i[seek pos= to_s].each do |m|
|
15
|
+
define_method(m) do |*_args|
|
13
16
|
raise "#{m} not supported - this IO adapter is non-rewindable"
|
14
17
|
end
|
15
18
|
end
|
16
19
|
|
17
|
-
# Every time this object gets written to, call the Rack body each() block
|
20
|
+
# Every time this object gets written to, call the Rack body each() block
|
21
|
+
# with the bytes given instead.
|
18
22
|
def <<(buf)
|
19
23
|
return if buf.nil?
|
20
24
|
|
21
25
|
# Ensure we ALWAYS write in binary encoding.
|
22
|
-
encoded =
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
26
|
+
encoded =
|
27
|
+
if buf.encoding != Encoding::BINARY
|
28
|
+
# If we got a frozen string we can't force_encoding on it
|
29
|
+
begin
|
30
|
+
buf.force_encoding(Encoding::BINARY)
|
31
|
+
rescue
|
32
|
+
buf.dup.force_encoding(Encoding::BINARY)
|
33
|
+
end
|
34
|
+
else
|
35
|
+
buf
|
36
|
+
end
|
28
37
|
|
29
38
|
# buf.dup.force_encoding(Encoding::BINARY)
|
30
|
-
|
39
|
+
# Zero-size output has a special meaning when using chunked encoding
|
40
|
+
return if encoded.bytesize.zero?
|
31
41
|
|
32
42
|
@block.call(encoded)
|
33
43
|
self
|
@@ -1,3 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# rubocop:disable Naming/ConstantName
|
4
|
+
|
1
5
|
require 'stringio'
|
2
6
|
|
3
7
|
# A very barebones ZIP file reader. Is made for maximum interoperability, but at the same
|
@@ -41,20 +45,25 @@ require 'stringio'
|
|
41
45
|
#
|
42
46
|
# ## Mode of operation
|
43
47
|
#
|
44
|
-
# By default, `FileReader` _ignores_ the data in local file headers (as it is
|
45
|
-
# It reads the ZIP file "from the tail", finds the
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
48
|
+
# By default, `FileReader` _ignores_ the data in local file headers (as it is
|
49
|
+
# often unreliable). It reads the ZIP file "from the tail", finds the
|
50
|
+
# end-of-central-directory signatures, then reads the central directory entries,
|
51
|
+
# reconstitutes the entries with their filenames, attributes and so on, and
|
52
|
+
# sets these entries up with the absolute _offsets_ into the source file/IO object.
|
53
|
+
# These offsets can then be used to extract the actual compressed data of
|
54
|
+
# the files and to expand it.
|
49
55
|
#
|
50
56
|
# ## Recovering damaged or incomplete ZIP files
|
51
57
|
#
|
52
|
-
# If the ZIP file you are trying to read does not contain the central directory
|
53
|
-
# will not work, since it starts the read process
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
58
|
+
# If the ZIP file you are trying to read does not contain the central directory
|
59
|
+
# records `read_zip_structure` will not work, since it starts the read process
|
60
|
+
# from the EOCD marker at the end of the central directory and then crawls
|
61
|
+
# "back" in the IO to figure out the rest. You can explicitly apply a fallback
|
62
|
+
# for reading the archive "straight ahead" instead using `read_zip_straight_ahead`
|
63
|
+
# - the method will instead scan your IO from the very start, skipping over
|
64
|
+
# the actual entry data. This is less efficient than central directory parsing since
|
57
65
|
# it involves a much larger number of reads (1 read from the IO per entry in the ZIP).
|
66
|
+
|
58
67
|
class ZipTricks::FileReader
|
59
68
|
require_relative 'file_reader/stored_reader'
|
60
69
|
require_relative 'file_reader/inflating_reader'
|
@@ -64,17 +73,17 @@ class ZipTricks::FileReader
|
|
64
73
|
InvalidStructure = Class.new(ReadError)
|
65
74
|
LocalHeaderPending = Class.new(StandardError) do
|
66
75
|
def message
|
67
|
-
|
76
|
+
'The compressed data offset is not available (local header has not been read)'
|
68
77
|
end
|
69
78
|
end
|
70
79
|
MissingEOCD = Class.new(StandardError) do
|
71
80
|
def message
|
72
|
-
|
81
|
+
'Could not find the EOCD signature in the buffer - maybe a malformed ZIP file'
|
73
82
|
end
|
74
83
|
end
|
75
|
-
|
84
|
+
|
76
85
|
private_constant :StoredReader, :InflatingReader
|
77
|
-
|
86
|
+
|
78
87
|
# Represents a file within the ZIP archive being read
|
79
88
|
class ZipEntry
|
80
89
|
# @return [Fixnum] bit-packed version signature of the program that made the archive
|
@@ -137,28 +146,29 @@ class ZipTricks::FileReader
|
|
137
146
|
when 0
|
138
147
|
StoredReader.new(from_io, compressed_size)
|
139
148
|
else
|
140
|
-
raise UnsupportedFeature,
|
149
|
+
raise UnsupportedFeature, format('Unsupported storage mode for reading - %d',
|
150
|
+
storage_mode)
|
141
151
|
end
|
142
152
|
end
|
143
|
-
|
153
|
+
|
144
154
|
# @return [Fixnum] at what offset you should start reading
|
145
155
|
# for the compressed data in your original IO object
|
146
156
|
def compressed_data_offset
|
147
|
-
@compressed_data_offset
|
157
|
+
@compressed_data_offset || raise(LocalHeaderPending)
|
148
158
|
end
|
149
|
-
|
159
|
+
|
150
160
|
# Tells whether the compressed data offset is already known for this entry
|
151
161
|
# @return [Boolean]
|
152
162
|
def known_offset?
|
153
163
|
!@compressed_data_offset.nil?
|
154
164
|
end
|
155
|
-
|
165
|
+
|
156
166
|
# Tells whether the entry uses a data descriptor (this is defined
|
157
167
|
# by bit 3 in the GP flags).
|
158
168
|
def uses_data_descriptor?
|
159
169
|
(gp_flags & 0x0008) == 0x0008
|
160
170
|
end
|
161
|
-
|
171
|
+
|
162
172
|
# Sets the offset at which the compressed data for this file starts in the ZIP.
|
163
173
|
# By default, the value will be set by the Reader for you. If you use delayed
|
164
174
|
# reading, you need to set it by using the `get_compressed_data_offset` on the Reader:
|
@@ -191,12 +201,14 @@ class ZipTricks::FileReader
|
|
191
201
|
eocd_offset = get_eocd_offset(io, zip_file_size)
|
192
202
|
|
193
203
|
zip64_end_of_cdir_location = get_zip64_eocd_location(io, eocd_offset)
|
194
|
-
num_files, cdir_location,
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
204
|
+
num_files, cdir_location, _cdir_size =
|
205
|
+
if zip64_end_of_cdir_location
|
206
|
+
num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
|
207
|
+
else
|
208
|
+
num_files_and_central_directory_offset(io, eocd_offset)
|
209
|
+
end
|
210
|
+
|
211
|
+
log { format('Located the central directory start at %d', cdir_location) }
|
200
212
|
seek(io, cdir_location)
|
201
213
|
|
202
214
|
# Read the entire central directory AND anything behind it, in one fell swoop.
|
@@ -214,55 +226,77 @@ class ZipTricks::FileReader
|
|
214
226
|
# the central directory size alltogether.
|
215
227
|
central_directory_str = io.read # and not read_n(io, cdir_size), see above
|
216
228
|
central_directory_io = StringIO.new(central_directory_str)
|
217
|
-
log
|
229
|
+
log do
|
230
|
+
format('Read %d bytes with central directory + EOCD record and locator',
|
231
|
+
central_directory_str.bytesize)
|
232
|
+
end
|
218
233
|
|
219
234
|
entries = (0...num_files).map do |entry_n|
|
220
|
-
|
235
|
+
offset_location = cdir_location + central_directory_io.tell
|
236
|
+
log do
|
237
|
+
format('Reading the central directory entry %d starting at offset %d',
|
238
|
+
entry_n, offset_location)
|
239
|
+
end
|
221
240
|
read_cdir_entry(central_directory_io)
|
222
241
|
end
|
223
|
-
|
242
|
+
|
224
243
|
read_local_headers(entries, io) if read_local_headers
|
225
|
-
|
244
|
+
|
226
245
|
entries
|
227
246
|
end
|
228
247
|
|
229
|
-
# Sometimes you might encounter truncated ZIP files, which do not contain
|
230
|
-
# whatsoever - or where the central directory is
|
231
|
-
#
|
232
|
-
#
|
233
|
-
#
|
234
|
-
#
|
235
|
-
#
|
248
|
+
# Sometimes you might encounter truncated ZIP files, which do not contain
|
249
|
+
# any central directory whatsoever - or where the central directory is
|
250
|
+
# truncated. In that case, employing the technique of reading the ZIP
|
251
|
+
# "from the end" is impossible, and the only recourse is reading each
|
252
|
+
# local file header in sucession. If the entries in such a ZIP use data
|
253
|
+
# descriptors, you would need to scan after the entry until you encounter
|
254
|
+
# the data descriptor signature - and that might be unreliable at best.
|
255
|
+
# Therefore, this reading technique does not support data descriptors.
|
256
|
+
# It can however recover the entries you still can read if these entries
|
257
|
+
# contain all the necessary information about the contained file.
|
236
258
|
#
|
237
|
-
# @param io[#tell, #read, #seek] the IO-ish object to read the local file
|
238
|
-
# @return [Array<ZipEntry>] an array of entries that could be
|
259
|
+
# @param io[#tell, #read, #seek] the IO-ish object to read the local file
|
260
|
+
# headers from @return [Array<ZipEntry>] an array of entries that could be
|
261
|
+
# recovered before hitting EOF
|
239
262
|
def read_zip_straight_ahead(io:)
|
240
263
|
entries = []
|
241
264
|
loop do
|
242
265
|
cur_offset = io.tell
|
243
266
|
entry = read_local_file_header(io: io)
|
244
267
|
if entry.uses_data_descriptor?
|
245
|
-
raise UnsupportedFeature, "The local file header at #{cur_offset} uses
|
268
|
+
raise UnsupportedFeature, "The local file header at #{cur_offset} uses \
|
269
|
+
a data descriptor and the start of next entry \
|
270
|
+
cannot be found"
|
246
271
|
end
|
247
272
|
entries << entry
|
248
273
|
next_local_header_offset = entry.compressed_data_offset + entry.compressed_size
|
249
|
-
log
|
274
|
+
log do
|
275
|
+
format('Recovered a local file file header at offset %d, seeking to the next at %d',
|
276
|
+
cur_offset, next_local_header_offset)
|
277
|
+
end
|
250
278
|
seek(io, next_local_header_offset) # Seek to the next entry, and raise if seek is impossible
|
251
279
|
end
|
252
280
|
entries
|
253
281
|
rescue ReadError
|
254
|
-
log
|
282
|
+
log do
|
283
|
+
format('Got a read/seek error after reaching %d, no more entries can be recovered',
|
284
|
+
cur_offset)
|
285
|
+
end
|
255
286
|
entries
|
256
287
|
end
|
257
|
-
|
258
|
-
# Parse the local header entry and get the offset in the IO at which the
|
259
|
-
# file starts within the ZIP.
|
260
|
-
# The method will eager-read the entire local header for the file
|
261
|
-
#
|
262
|
-
#
|
288
|
+
|
289
|
+
# Parse the local header entry and get the offset in the IO at which the
|
290
|
+
# actual compressed data of the file starts within the ZIP.
|
291
|
+
# The method will eager-read the entire local header for the file
|
292
|
+
# (the maximum size the local header may use), starting at the given offset,
|
293
|
+
# and will then compute its size. That size plus the local header offset
|
294
|
+
# given will be the compressed data offset of the entry (read starting at
|
295
|
+
# this offset to get the data).
|
263
296
|
#
|
264
297
|
# @param io[#read] an IO-ish object the ZIP file can be read from
|
265
|
-
# @return [Array<ZipEntry, Fixnum>] the parsed local header entry and
|
298
|
+
# @return [Array<ZipEntry, Fixnum>] the parsed local header entry and
|
299
|
+
# the compressed data offset
|
266
300
|
def read_local_file_header(io:)
|
267
301
|
local_file_header_offset = io.tell
|
268
302
|
|
@@ -270,7 +304,7 @@ class ZipTricks::FileReader
|
|
270
304
|
# including any headroom for extra fields etc.
|
271
305
|
local_file_header_str_plus_headroom = io.read(MAX_LOCAL_HEADER_SIZE)
|
272
306
|
raise ReadError if local_file_header_str_plus_headroom.nil? # reached EOF
|
273
|
-
|
307
|
+
|
274
308
|
io_starting_at_local_header = StringIO.new(local_file_header_str_plus_headroom)
|
275
309
|
|
276
310
|
assert_signature(io_starting_at_local_header, 0x04034b50)
|
@@ -288,19 +322,22 @@ class ZipTricks::FileReader
|
|
288
322
|
extra_size = read_2b(io_starting_at_local_header)
|
289
323
|
e.filename = read_n(io_starting_at_local_header, filename_size)
|
290
324
|
extra_fields_str = read_n(io_starting_at_local_header, extra_size)
|
291
|
-
|
325
|
+
|
292
326
|
# Parse out the extra fields
|
293
327
|
extra_table = parse_out_extra_fields(extra_fields_str)
|
294
|
-
|
328
|
+
|
295
329
|
# ...of which we really only need the Zip64 extra
|
296
330
|
if zip64_extra_contents = extra_table[1]
|
297
331
|
# If the Zip64 extra is present, we let it override all
|
298
332
|
# the values fetched from the conventional header
|
299
333
|
zip64_extra = StringIO.new(zip64_extra_contents)
|
300
|
-
log
|
334
|
+
log do
|
335
|
+
format('Will read Zip64 extra data from local header field for %s, %d bytes',
|
336
|
+
e.filename, zip64_extra.size)
|
337
|
+
end
|
301
338
|
# Now here be dragons. The APPNOTE specifies that
|
302
339
|
#
|
303
|
-
# > The order of the fields in the ZIP64 extended
|
340
|
+
# > The order of the fields in the ZIP64 extended
|
304
341
|
# > information record is fixed, but the fields will
|
305
342
|
# > only appear if the corresponding Local or Central
|
306
343
|
# > directory record field is set to 0xFFFF or 0xFFFFFFFF.
|
@@ -317,14 +354,17 @@ class ZipTricks::FileReader
|
|
317
354
|
e
|
318
355
|
end
|
319
356
|
|
320
|
-
# Get the offset in the IO at which the actual compressed data of the file
|
321
|
-
# The method will eager-read the entire local header
|
322
|
-
#
|
323
|
-
# given
|
357
|
+
# Get the offset in the IO at which the actual compressed data of the file
|
358
|
+
# starts within the ZIP. The method will eager-read the entire local header
|
359
|
+
# for the file (the maximum size the local header may use), starting at the
|
360
|
+
# given offset, and will then compute its size. That size plus the local
|
361
|
+
# header offset given will be the compressed data offset of the entry
|
362
|
+
# (read starting at this offset to get the data).
|
324
363
|
#
|
325
364
|
# @param io[#seek, #read] an IO-ish object the ZIP file can be read from
|
326
|
-
# @param local_header_offset[Fixnum] absolute offset (0-based) where the
|
327
|
-
# @return [Fixnum] absolute offset
|
365
|
+
# @param local_header_offset[Fixnum] absolute offset (0-based) where the
|
366
|
+
# local file header is supposed to begin @return [Fixnum] absolute offset
|
367
|
+
# (0-based) of where the compressed data begins for this file within the ZIP
|
328
368
|
def get_compressed_data_offset(io:, local_file_header_offset:)
|
329
369
|
seek(io, local_file_header_offset)
|
330
370
|
entry_recovered_from_local_file_header = read_local_file_header(io: io)
|
@@ -350,17 +390,21 @@ class ZipTricks::FileReader
|
|
350
390
|
def self.read_zip_straight_ahead(**options)
|
351
391
|
new.read_zip_straight_ahead(**options)
|
352
392
|
end
|
353
|
-
|
393
|
+
|
354
394
|
private
|
355
395
|
|
356
396
|
def read_local_headers(entries, io)
|
357
397
|
entries.each_with_index do |entry, i|
|
358
|
-
log
|
359
|
-
|
398
|
+
log do
|
399
|
+
format('Reading the local header for entry %d at offset %d',
|
400
|
+
i, entry.local_file_header_offset)
|
401
|
+
end
|
402
|
+
off = get_compressed_data_offset(io: io,
|
403
|
+
local_file_header_offset: entry.local_file_header_offset)
|
360
404
|
entry.compressed_data_offset = off
|
361
405
|
end
|
362
406
|
end
|
363
|
-
|
407
|
+
|
364
408
|
def skip_ahead_2(io)
|
365
409
|
skip_ahead_n(io, 2)
|
366
410
|
end
|
@@ -375,13 +419,17 @@ class ZipTricks::FileReader
|
|
375
419
|
|
376
420
|
def seek(io, absolute_pos)
|
377
421
|
io.seek(absolute_pos, IO::SEEK_SET)
|
378
|
-
|
422
|
+
unless absolute_pos == io.tell
|
423
|
+
raise ReadError,
|
424
|
+
"Expected to seek to #{absolute_pos} but only \
|
425
|
+
got to #{io.tell}"
|
426
|
+
end
|
379
427
|
nil
|
380
428
|
end
|
381
429
|
|
382
430
|
def assert_signature(io, signature_magic_number)
|
383
|
-
packed = [signature_magic_number].pack(C_V)
|
384
431
|
readback = read_4b(io)
|
432
|
+
# Rubocop: Use a guard clause instead of wrapping the code inside a conditional expression
|
385
433
|
if readback != signature_magic_number
|
386
434
|
expected = '0x0' + signature_magic_number.to_s(16)
|
387
435
|
actual = '0x0' + readback.to_s(16)
|
@@ -394,15 +442,21 @@ class ZipTricks::FileReader
|
|
394
442
|
io.seek(io.tell + n, IO::SEEK_SET)
|
395
443
|
pos_after = io.tell
|
396
444
|
delta = pos_after - pos_before
|
397
|
-
|
445
|
+
unless delta == n
|
446
|
+
raise ReadError, "Expected to seek #{n} bytes ahead, but could \
|
447
|
+
only seek #{delta} bytes ahead"
|
448
|
+
end
|
398
449
|
nil
|
399
450
|
end
|
400
451
|
|
401
452
|
def read_n(io, n_bytes)
|
402
|
-
io.read(n_bytes).tap
|
453
|
+
io.read(n_bytes).tap do |d|
|
403
454
|
raise ReadError, "Expected to read #{n_bytes} bytes, but the IO was at the end" if d.nil?
|
404
|
-
|
405
|
-
|
455
|
+
unless d.bytesize == n_bytes
|
456
|
+
raise ReadError, "Expected to read #{n_bytes} bytes, \
|
457
|
+
read #{d.bytesize}"
|
458
|
+
end
|
459
|
+
end
|
406
460
|
end
|
407
461
|
|
408
462
|
def read_2b(io)
|
@@ -418,8 +472,12 @@ class ZipTricks::FileReader
|
|
418
472
|
end
|
419
473
|
|
420
474
|
def read_cdir_entry(io)
|
475
|
+
# Rubocop: convention: Assignment Branch Condition size for
|
476
|
+
# read_cdir_entry is too high. [45.66/15]
|
477
|
+
# Rubocop: convention: Method has too many lines. [30/10]
|
421
478
|
assert_signature(io, 0x02014b50)
|
422
479
|
ZipEntry.new.tap do |e|
|
480
|
+
# Rubocop: convention: Block has too many lines. [27/25]
|
423
481
|
e.made_by = read_2b(io)
|
424
482
|
e.version_needed_to_extract = read_2b(io)
|
425
483
|
e.gp_flags = read_2b(io)
|
@@ -447,24 +505,35 @@ class ZipTricks::FileReader
|
|
447
505
|
extra_table = parse_out_extra_fields(extras)
|
448
506
|
|
449
507
|
# ...of which we really only need the Zip64 extra
|
450
|
-
if zip64_extra_contents
|
508
|
+
if zip64_extra_contents ||= extra_table[1]
|
451
509
|
# If the Zip64 extra is present, we let it override all
|
452
510
|
# the values fetched from the conventional header
|
453
511
|
zip64_extra = StringIO.new(zip64_extra_contents)
|
454
|
-
log
|
512
|
+
log do
|
513
|
+
format('Will read Zip64 extra data for %s, %d bytes',
|
514
|
+
e.filename, zip64_extra.size)
|
515
|
+
end
|
455
516
|
# Now here be dragons. The APPNOTE specifies that
|
456
517
|
#
|
457
|
-
# > The order of the fields in the ZIP64 extended
|
518
|
+
# > The order of the fields in the ZIP64 extended
|
458
519
|
# > information record is fixed, but the fields will
|
459
520
|
# > only appear if the corresponding Local or Central
|
460
521
|
# > directory record field is set to 0xFFFF or 0xFFFFFFFF.
|
461
522
|
#
|
462
523
|
# It means that before we read this stuff we need to check if the previously-read
|
463
524
|
# values are at overflow, and only _then_ proceed to read them. Bah.
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
525
|
+
# Rubocop: convention: Line is too long.
|
526
|
+
if e.uncompressed_size == 0xFFFFFFFF
|
527
|
+
e.uncompressed_size = read_8b(zip64_extra)
|
528
|
+
end
|
529
|
+
if e.compressed_size == 0xFFFFFFFF
|
530
|
+
e.compressed_size = read_8b(zip64_extra)
|
531
|
+
end
|
532
|
+
if e.local_file_header_offset == 0xFFFFFFFF
|
533
|
+
e.local_file_header_offset = read_8b(zip64_extra)
|
534
|
+
end
|
535
|
+
# Disk number comes last and we can skip it anyway, since we do
|
536
|
+
# not support multi-disk archives
|
468
537
|
end
|
469
538
|
end
|
470
539
|
end
|
@@ -480,25 +549,30 @@ class ZipTricks::FileReader
|
|
480
549
|
file_io.seek(implied_position_of_eocd_record, IO::SEEK_SET)
|
481
550
|
str_containing_eocd_record = file_io.read(MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE)
|
482
551
|
eocd_idx_in_buf = locate_eocd_signature(str_containing_eocd_record)
|
483
|
-
|
552
|
+
|
484
553
|
raise MissingEOCD unless eocd_idx_in_buf
|
485
|
-
|
554
|
+
|
486
555
|
eocd_offset = implied_position_of_eocd_record + eocd_idx_in_buf
|
487
|
-
log { 'Found EOCD signature at offset %d'
|
488
|
-
|
556
|
+
log { format('Found EOCD signature at offset %d', eocd_offset) }
|
557
|
+
|
489
558
|
eocd_offset
|
490
559
|
end
|
491
560
|
|
492
|
-
# This is tricky. Essentially, we have to scan the maximum possible number
|
493
|
-
# theoretically occupy including the comment),
|
494
|
-
#
|
495
|
-
#
|
561
|
+
# This is tricky. Essentially, we have to scan the maximum possible number
|
562
|
+
# of bytes (that the EOCD can theoretically occupy including the comment),
|
563
|
+
# and we have to find a combination of:
|
564
|
+
# [EOCD signature, <some ZIP medatata>, comment byte size, the comment of
|
565
|
+
# that size, eof].
|
566
|
+
# The only way I could find to do this was with a sliding window, but
|
567
|
+
# there probably is a better way.
|
568
|
+
# Rubocop: convention: Assignment Branch Condition size for
|
569
|
+
# locate_eocd_signature is too high. [17.49/15]
|
570
|
+
# Rubocop: convention: Method has too many lines. [14/10]
|
496
571
|
def locate_eocd_signature(in_str)
|
497
572
|
# We have to scan from the _very_ tail. We read the very minimum size
|
498
573
|
# the EOCD record can have (up to and including the comment size), using
|
499
574
|
# a sliding window. Once our end offset matches the comment size we found our
|
500
575
|
# EOCD marker.
|
501
|
-
eocd_signature_int = 0x06054b50
|
502
576
|
unpack_pattern = 'VvvvvVVv'
|
503
577
|
minimum_record_size = 22
|
504
578
|
end_location = minimum_record_size * -1
|
@@ -507,23 +581,26 @@ class ZipTricks::FileReader
|
|
507
581
|
# We use negative values because if we used positive slice indices
|
508
582
|
# we would have to detect the rollover ourselves
|
509
583
|
break unless window = in_str[end_location, minimum_record_size]
|
510
|
-
|
584
|
+
|
511
585
|
window_location = in_str.bytesize + end_location
|
512
586
|
unpacked = window.unpack(unpack_pattern)
|
513
587
|
# If we found the signarue, pick up the comment size, and check if the size of the window
|
514
588
|
# plus that comment size is where we are in the string. If we are - bingo.
|
515
|
-
if unpacked[0] == 0x06054b50 && comment_size = unpacked[-1]
|
589
|
+
if unpacked[0] == 0x06054b50 && comment_size = unpacked[-1]
|
516
590
|
assumed_eocd_location = in_str.bytesize - comment_size - minimum_record_size
|
517
591
|
# if the comment size is where we should be at - we found our EOCD
|
518
592
|
return assumed_eocd_location if assumed_eocd_location == window_location
|
519
593
|
end
|
520
|
-
|
594
|
+
|
521
595
|
end_location -= 1 # Shift the window back, by one byte, and try again.
|
522
596
|
end
|
523
597
|
end
|
524
|
-
|
598
|
+
|
525
599
|
# Find the Zip64 EOCD locator segment offset. Do this by seeking backwards from the
|
526
600
|
# EOCD record in the archive by fixed offsets
|
601
|
+
# Rubocop: convention: Assignment Branch Condition size for
|
602
|
+
# get_zip64_eocd_location is too high. [15.17/15]
|
603
|
+
# Rubocop: convention: Method has too many lines. [15/10]
|
527
604
|
def get_zip64_eocd_location(file_io, eocd_offset)
|
528
605
|
zip64_eocd_loc_offset = eocd_offset
|
529
606
|
zip64_eocd_loc_offset -= 4 # The signature
|
@@ -531,28 +608,34 @@ class ZipTricks::FileReader
|
|
531
608
|
zip64_eocd_loc_offset -= 8 # Offset of the zip64 central directory record
|
532
609
|
zip64_eocd_loc_offset -= 4 # Total number of disks
|
533
610
|
|
534
|
-
log
|
611
|
+
log do
|
612
|
+
format('Will look for the Zip64 EOCD locator signature at offset %d',
|
613
|
+
zip64_eocd_loc_offset)
|
614
|
+
end
|
535
615
|
|
536
616
|
# If the offset is negative there is certainly no Zip64 EOCD locator here
|
537
617
|
return unless zip64_eocd_loc_offset >= 0
|
538
618
|
|
539
619
|
file_io.seek(zip64_eocd_loc_offset, IO::SEEK_SET)
|
540
620
|
assert_signature(file_io, 0x07064b50)
|
541
|
-
|
542
|
-
log { 'Found Zip64 EOCD locator at offset %d'
|
621
|
+
|
622
|
+
log { format('Found Zip64 EOCD locator at offset %d', zip64_eocd_loc_offset) }
|
543
623
|
|
544
624
|
disk_num = read_4b(file_io) # number of the disk
|
545
|
-
raise UnsupportedFeature,
|
625
|
+
raise UnsupportedFeature, 'The archive spans multiple disks' if disk_num != 0
|
546
626
|
read_8b(file_io)
|
547
627
|
rescue ReadError
|
548
628
|
nil
|
549
629
|
end
|
550
630
|
|
631
|
+
# Rubocop: convention: Assignment Branch Condition size for
|
632
|
+
# num_files_and_central_directory_offset_zip64 is too high. [21.12/15]
|
633
|
+
# Rubocop: convention: Method has too many lines. [17/10]
|
551
634
|
def num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
|
552
635
|
seek(io, zip64_end_of_cdir_location)
|
553
|
-
|
636
|
+
|
554
637
|
assert_signature(io, 0x06064b50)
|
555
|
-
|
638
|
+
|
556
639
|
zip64_eocdr_size = read_8b(io)
|
557
640
|
zip64_eocdr = read_n(io, zip64_eocdr_size) # Reading in bulk is cheaper
|
558
641
|
zip64_eocdr = StringIO.new(zip64_eocdr)
|
@@ -561,13 +644,21 @@ class ZipTricks::FileReader
|
|
561
644
|
|
562
645
|
disk_n = read_4b(zip64_eocdr) # number of this disk
|
563
646
|
disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
|
564
|
-
|
647
|
+
if disk_n != disk_n_with_eocdr
|
648
|
+
raise UnsupportedFeature, 'The archive spans multiple disks'
|
649
|
+
end
|
565
650
|
|
566
651
|
num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
|
567
652
|
num_files_total = read_8b(zip64_eocdr) # files total in the central directory
|
568
|
-
raise UnsupportedFeature, "The archive spans multiple disks" if num_files_this_disk != num_files_total
|
569
653
|
|
570
|
-
|
654
|
+
if num_files_this_disk != num_files_total
|
655
|
+
raise UnsupportedFeature, 'The archive spans multiple disks'
|
656
|
+
end
|
657
|
+
|
658
|
+
log do
|
659
|
+
format('Zip64 EOCD record states there are %d files in the archive',
|
660
|
+
num_files_total)
|
661
|
+
end
|
571
662
|
|
572
663
|
central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
|
573
664
|
central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
|
@@ -575,58 +666,63 @@ class ZipTricks::FileReader
|
|
575
666
|
[num_files_total, central_dir_offset, central_dir_size]
|
576
667
|
end
|
577
668
|
|
578
|
-
C_V = 'V'
|
579
|
-
C_v = 'v'
|
580
|
-
C_Qe = 'Q<'
|
581
|
-
|
582
|
-
# To prevent too many tiny reads, read the maximum possible size of end of
|
583
|
-
# upfront (all the fixed fields + at most 0xFFFF
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
669
|
+
C_V = 'V'
|
670
|
+
C_v = 'v'
|
671
|
+
C_Qe = 'Q<'
|
672
|
+
|
673
|
+
# To prevent too many tiny reads, read the maximum possible size of end of
|
674
|
+
# central directory record upfront (all the fixed fields + at most 0xFFFF
|
675
|
+
# bytes of the archive comment)
|
676
|
+
MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE =
|
677
|
+
begin
|
678
|
+
4 + # Offset of the start of central directory
|
679
|
+
4 + # Size of the central directory
|
680
|
+
2 + # Number of files in the cdir
|
681
|
+
4 + # End-of-central-directory signature
|
682
|
+
2 + # Number of this disk
|
683
|
+
2 + # Number of disk with the start of cdir
|
684
|
+
2 + # Number of files in the cdir of this disk
|
685
|
+
2 + # The comment size
|
686
|
+
0xFFFF # Maximum comment size
|
687
|
+
end
|
595
688
|
|
596
689
|
# To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
|
597
690
|
# The maximum size is all the usual items, plus the maximum size
|
598
691
|
# of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
|
599
|
-
MAX_LOCAL_HEADER_SIZE =
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
692
|
+
MAX_LOCAL_HEADER_SIZE =
|
693
|
+
begin
|
694
|
+
4 + # signature
|
695
|
+
2 + # Version needed to extract
|
696
|
+
2 + # gp flags
|
697
|
+
2 + # storage mode
|
698
|
+
2 + # dos time
|
699
|
+
2 + # dos date
|
700
|
+
4 + # CRC32
|
701
|
+
4 + # Comp size
|
702
|
+
4 + # Uncomp size
|
703
|
+
2 + # Filename size
|
704
|
+
2 + # Extra fields size
|
705
|
+
0xFFFF + # Maximum filename size
|
706
|
+
0xFFFF # Maximum extra fields size
|
707
|
+
end
|
614
708
|
|
615
|
-
SIZE_OF_USABLE_EOCD_RECORD =
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
709
|
+
SIZE_OF_USABLE_EOCD_RECORD =
|
710
|
+
begin
|
711
|
+
4 + # Signature
|
712
|
+
2 + # Number of this disk
|
713
|
+
2 + # Number of the disk with the EOCD record
|
714
|
+
2 + # Number of entries in the central directory of this disk
|
715
|
+
2 + # Number of entries in the central directory total
|
716
|
+
4 + # Size of the central directory
|
717
|
+
4 # Start of the central directory offset
|
718
|
+
end
|
624
719
|
|
720
|
+
# Rubocop: convention: Method has too many lines. [11/10]
|
625
721
|
def num_files_and_central_directory_offset(file_io, eocd_offset)
|
626
722
|
seek(file_io, eocd_offset)
|
627
723
|
|
628
724
|
# The size of the EOCD record is known upfront, so use a strict read
|
629
|
-
eocd_record_str = read_n(file_io, SIZE_OF_USABLE_EOCD_RECORD)
|
725
|
+
eocd_record_str = read_n(file_io, SIZE_OF_USABLE_EOCD_RECORD)
|
630
726
|
io = StringIO.new(eocd_record_str)
|
631
727
|
|
632
728
|
assert_signature(io, 0x06054b50)
|
@@ -640,8 +736,8 @@ class ZipTricks::FileReader
|
|
640
736
|
end
|
641
737
|
|
642
738
|
private_constant :C_V, :C_v, :C_Qe, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
|
643
|
-
|
644
|
-
|
739
|
+
:MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
|
740
|
+
|
645
741
|
# Is provided as a stub to be overridden in a subclass if you need it. Will report
|
646
742
|
# during various stages of reading. The log message is contained in the return value
|
647
743
|
# of `yield` in the method (the log messages are lazy-evaluated).
|
@@ -649,11 +745,11 @@ class ZipTricks::FileReader
|
|
649
745
|
# The most minimal implementation for the method is just this:
|
650
746
|
# $stderr.puts(yield)
|
651
747
|
end
|
652
|
-
|
748
|
+
|
653
749
|
def parse_out_extra_fields(extra_fields_str)
|
654
750
|
extra_table = {}
|
655
751
|
extras_buf = StringIO.new(extra_fields_str)
|
656
|
-
until extras_buf.eof?
|
752
|
+
until extras_buf.eof?
|
657
753
|
extra_id = read_2b(extras_buf)
|
658
754
|
extra_size = read_2b(extras_buf)
|
659
755
|
extra_contents = read_n(extras_buf, extra_size)
|