zip_tricks 2.8.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/IMPLEMENTATION_DETAILS.md +2 -10
- data/README.md +62 -59
- data/examples/archive_size_estimate.rb +4 -4
- data/examples/rack_application.rb +3 -5
- data/lib/zip_tricks/block_deflate.rb +21 -0
- data/lib/zip_tricks/file_reader.rb +491 -0
- data/lib/zip_tricks/null_writer.rb +7 -2
- data/lib/zip_tricks/rack_body.rb +3 -3
- data/lib/zip_tricks/remote_io.rb +30 -20
- data/lib/zip_tricks/remote_uncap.rb +10 -10
- data/lib/zip_tricks/size_estimator.rb +64 -0
- data/lib/zip_tricks/stream_crc32.rb +2 -2
- data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
- data/lib/zip_tricks/streamer/entry.rb +21 -0
- data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
- data/lib/zip_tricks/streamer/writable.rb +20 -0
- data/lib/zip_tricks/streamer.rb +172 -66
- data/lib/zip_tricks/zip_writer.rb +346 -0
- data/lib/zip_tricks.rb +1 -4
- data/spec/spec_helper.rb +1 -38
- data/spec/zip_tricks/file_reader_spec.rb +47 -0
- data/spec/zip_tricks/rack_body_spec.rb +2 -2
- data/spec/zip_tricks/remote_io_spec.rb +8 -20
- data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
- data/spec/zip_tricks/size_estimator_spec.rb +31 -0
- data/spec/zip_tricks/streamer_spec.rb +59 -36
- data/spec/zip_tricks/zip_writer_spec.rb +408 -0
- data/zip_tricks.gemspec +20 -14
- metadata +33 -16
- data/lib/zip_tricks/manifest.rb +0 -85
- data/lib/zip_tricks/microzip.rb +0 -339
- data/lib/zip_tricks/stored_size_estimator.rb +0 -44
- data/spec/zip_tricks/manifest_spec.rb +0 -60
- data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
- data/spec/zip_tricks/microzip_spec.rb +0 -546
- data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22
@@ -0,0 +1,491 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
# A very barebones ZIP file reader. Is made for maximum interoperability, but at the same
|
4
|
+
# time we attempt to keep it somewhat concise.
|
5
|
+
#
|
6
|
+
# ## REALLY CRAZY IMPORTANT STUFF: SECURITY IMPLICATIONS
|
7
|
+
#
|
8
|
+
# Please **BEWARE** - using this is a security risk if you are reading files that have been
|
9
|
+
# supplied by users. This implementation has _not_ been formally verified for correctness. As
|
10
|
+
# ZIP files contain relative offsets in lots of places it might be possible for a maliciously
|
11
|
+
# crafted ZIP file to put the decode procedure in an endless loop, make it attempt huge reads
|
12
|
+
# from the input file and so on. Additionally, the reader module for deflated data has
|
13
|
+
# no support for ZIP bomb protection. So either limit the `FileReader` usage to the files you
|
14
|
+
# trust, or triple-check all the inputs upfront. Patches to make this reader more secure
|
15
|
+
# are welcome of course.
|
16
|
+
#
|
17
|
+
# ## Usage
|
18
|
+
#
|
19
|
+
# File.open('zipfile.zip', 'rb') do |f|
|
20
|
+
# entries = FileReader.read_zip_structure(f)
|
21
|
+
# entries.each do |e|
|
22
|
+
# File.open(e.filename, 'wb') do |extracted_file|
|
23
|
+
# ex = e.extractor_from(f)
|
24
|
+
# extracted_file << ex.extract(1024 * 1024) until ex.eof?
|
25
|
+
# end
|
26
|
+
# end
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# ## Supported features
|
30
|
+
#
|
31
|
+
# * Deflate and stored storage modes
|
32
|
+
# * Zip64 (extra fields and offsets)
|
33
|
+
# * Data descriptors
|
34
|
+
#
|
35
|
+
# ## Unsupported features
|
36
|
+
#
|
37
|
+
# * Archives split over multiple disks/files
|
38
|
+
# * Any ZIP encryption
|
39
|
+
# * EFS language flag and InfoZIP filename extra field
|
40
|
+
# * CRC32 checksums are _not_ verified
|
41
|
+
#
|
42
|
+
# ## Mode of operation
|
43
|
+
#
|
44
|
+
# Basically, `FileReader` _ignores_ the data in local file headers (as it is often unreliable).
|
45
|
+
# It reads the ZIP file "from the tail", finds the end-of-central-directory signatures, then
|
46
|
+
# reads the central directory entries, reconstitutes the entries with their filenames, attributes
|
47
|
+
# and so on, and sets these entries up with the absolute _offsets_ into the source file/IO object.
|
48
|
+
# These offsets can then be used to extract the actual compressed data of the files and to expand it.
|
49
|
+
class ZipTricks::FileReader
|
50
|
+
ReadError = Class.new(StandardError)
|
51
|
+
UnsupportedFeature = Class.new(StandardError)
|
52
|
+
InvalidStructure = Class.new(ReadError)
|
53
|
+
|
54
|
+
class InflatingReader
|
55
|
+
def initialize(from_io, compressed_data_size)
|
56
|
+
@io = from_io
|
57
|
+
@compressed_data_size = compressed_data_size
|
58
|
+
@already_read = 0
|
59
|
+
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
60
|
+
end
|
61
|
+
|
62
|
+
def extract(n_bytes=nil)
|
63
|
+
n_bytes ||= (@compressed_data_size - @already_read)
|
64
|
+
|
65
|
+
return if eof?
|
66
|
+
|
67
|
+
available = @compressed_data_size - @already_read
|
68
|
+
|
69
|
+
return if available.zero?
|
70
|
+
|
71
|
+
n_bytes = available if n_bytes > available
|
72
|
+
|
73
|
+
return '' if n_bytes.zero?
|
74
|
+
|
75
|
+
compressed_chunk = @io.read(n_bytes)
|
76
|
+
@already_read += compressed_chunk.bytesize
|
77
|
+
@zlib_inflater.inflate(compressed_chunk)
|
78
|
+
end
|
79
|
+
|
80
|
+
def eof?
|
81
|
+
@zlib_inflater.finished?
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class StoredReader
|
86
|
+
def initialize(from_io, compressed_data_size)
|
87
|
+
@io = from_io
|
88
|
+
@compressed_data_size = compressed_data_size
|
89
|
+
@already_read = 0
|
90
|
+
end
|
91
|
+
|
92
|
+
def extract(n_bytes=nil)
|
93
|
+
n_bytes ||= (@compressed_data_size - @already_read)
|
94
|
+
|
95
|
+
return if eof?
|
96
|
+
|
97
|
+
available = @compressed_data_size - @already_read
|
98
|
+
|
99
|
+
return if available.zero?
|
100
|
+
|
101
|
+
n_bytes = available if n_bytes > available
|
102
|
+
|
103
|
+
return '' if n_bytes.zero?
|
104
|
+
|
105
|
+
compressed_chunk = @io.read(n_bytes)
|
106
|
+
@already_read += compressed_chunk.bytesize
|
107
|
+
compressed_chunk
|
108
|
+
end
|
109
|
+
|
110
|
+
def eof?
|
111
|
+
@already_read >= @compressed_data_size
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
private_constant :StoredReader, :InflatingReader
|
116
|
+
|
117
|
+
# Represents a file within the ZIP archive being read
|
118
|
+
class ZipEntry
|
119
|
+
# @return [Fixnum] bit-packed version signature of the program that made the archive
|
120
|
+
attr_accessor :made_by
|
121
|
+
|
122
|
+
# @return [Fixnum] ZIP version support needed to extract this file
|
123
|
+
attr_accessor :version_needed_to_extract
|
124
|
+
|
125
|
+
# @return [Fixnum] bit-packed general purpose flags
|
126
|
+
attr_accessor :gp_flags
|
127
|
+
|
128
|
+
# @return [Fixnum] Storage mode (0 for stored, 8 for deflate)
|
129
|
+
attr_accessor :storage_mode
|
130
|
+
|
131
|
+
# @return [Fixnum] the bit-packed DOS time
|
132
|
+
attr_accessor :dos_time
|
133
|
+
|
134
|
+
# @return [Fixnum] the bit-packed DOS date
|
135
|
+
attr_accessor :dos_date
|
136
|
+
|
137
|
+
# @return [Fixnum] the CRC32 checksum of this file
|
138
|
+
attr_accessor :crc32
|
139
|
+
|
140
|
+
# @return [Fixnum] size of compressed file data in the ZIP
|
141
|
+
attr_accessor :compressed_size
|
142
|
+
|
143
|
+
# @return [Fixnum] size of the file once uncompressed
|
144
|
+
attr_accessor :uncompressed_size
|
145
|
+
|
146
|
+
# @return [String] the filename
|
147
|
+
attr_accessor :filename
|
148
|
+
|
149
|
+
# @return [Fixnum] disk number where this file starts
|
150
|
+
attr_accessor :disk_number_start
|
151
|
+
|
152
|
+
# @return [Fixnum] internal attributes of the file
|
153
|
+
attr_accessor :internal_attrs
|
154
|
+
|
155
|
+
# @return [Fixnum] external attributes of the file
|
156
|
+
attr_accessor :external_attrs
|
157
|
+
|
158
|
+
# @return [Fixnum] at what offset the local file header starts
|
159
|
+
# in your original IO object
|
160
|
+
attr_accessor :local_file_header_offset
|
161
|
+
|
162
|
+
# @return [String] the file comment
|
163
|
+
attr_accessor :comment
|
164
|
+
|
165
|
+
# @return [Fixnum] at what offset you should start reading
|
166
|
+
# for the compressed data in your original IO object
|
167
|
+
attr_accessor :compressed_data_offset
|
168
|
+
|
169
|
+
# Returns a reader for the actual compressed data of the entry.
|
170
|
+
#
|
171
|
+
# reader = entry.reader(source_file)
|
172
|
+
# outfile << reader.extract(512 * 1024) until reader.eof?
|
173
|
+
#
|
174
|
+
# @return [#extract(n_bytes), #eof?] the reader for the data
|
175
|
+
def extractor_from(from_io)
|
176
|
+
from_io.seek(compressed_data_offset, IO::SEEK_SET)
|
177
|
+
case storage_mode
|
178
|
+
when 8
|
179
|
+
InflatingReader.new(from_io, compressed_size)
|
180
|
+
when 0
|
181
|
+
StoredReader.new(from_io, compressed_size)
|
182
|
+
else
|
183
|
+
raise "Unsupported storage mode for reading (#{storage_mode})"
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Parse an IO handle to a ZIP archive into an array of Entry objects.
|
189
|
+
#
|
190
|
+
# @param io[#tell, #seek, #read, #size] an IO-ish object
|
191
|
+
# @return [Array<Entry>] an array of entries within the ZIP being parsed
|
192
|
+
def read_zip_structure(io)
|
193
|
+
zip_file_size = io.size
|
194
|
+
eocd_offset = get_eocd_offset(io, zip_file_size)
|
195
|
+
|
196
|
+
zip64_end_of_cdir_location = get_zip64_eocd_locator_offset(io, eocd_offset)
|
197
|
+
num_files, cdir_location, cdir_size = if zip64_end_of_cdir_location
|
198
|
+
num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
|
199
|
+
else
|
200
|
+
num_files_and_central_directory_offset(io, eocd_offset)
|
201
|
+
end
|
202
|
+
seek(io, cdir_location)
|
203
|
+
|
204
|
+
# Read the entire central directory in one fell swoop
|
205
|
+
central_directory_str = read_n(io, cdir_size)
|
206
|
+
central_directory_io = StringIO.new(central_directory_str)
|
207
|
+
|
208
|
+
entries = (1..num_files).map { read_cdir_entry(central_directory_io) }
|
209
|
+
entries.each do |entry|
|
210
|
+
entry.compressed_data_offset = find_compressed_data_start_offset(io, entry.local_file_header_offset)
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
# Parse an IO handle to a ZIP archive into an array of Entry objects.
|
215
|
+
#
|
216
|
+
# @param io[#tell, #seek, #read, #size] an IO-ish object
|
217
|
+
# @return [Array<Entry>] an array of entries within the ZIP being parsed
|
218
|
+
def self.read_zip_structure(io)
|
219
|
+
new.read_zip_structure(io)
|
220
|
+
end
|
221
|
+
|
222
|
+
private
|
223
|
+
|
224
|
+
def skip_ahead_2(io)
|
225
|
+
skip_ahead_n(io, 2)
|
226
|
+
end
|
227
|
+
|
228
|
+
def skip_ahead_4(io)
|
229
|
+
skip_ahead_n(io, 4)
|
230
|
+
end
|
231
|
+
|
232
|
+
def skip_ahead_8(io)
|
233
|
+
skip_ahead_n(io, 8)
|
234
|
+
end
|
235
|
+
|
236
|
+
def seek(io, absolute_pos)
|
237
|
+
io.seek(absolute_pos, IO::SEEK_SET)
|
238
|
+
raise ReadError, "Expected to seek to #{absolute_pos} but only got to #{io.tell}" unless absolute_pos == io.tell
|
239
|
+
nil
|
240
|
+
end
|
241
|
+
|
242
|
+
def assert_signature(io, signature_magic_number)
|
243
|
+
packed = [signature_magic_number].pack(C_V)
|
244
|
+
readback = read_4b(io)
|
245
|
+
if readback != signature_magic_number
|
246
|
+
expected = '0x0' + signature_magic_number.to_s(16)
|
247
|
+
actual = '0x0' + readback.to_s(16)
|
248
|
+
raise InvalidStructure, "Expected signature #{expected}, but read #{actual}"
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def skip_ahead_n(io, n)
|
253
|
+
pos_before = io.tell
|
254
|
+
io.seek(io.tell + n, IO::SEEK_SET)
|
255
|
+
pos_after = io.tell
|
256
|
+
delta = pos_after - pos_before
|
257
|
+
raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead" unless delta == n
|
258
|
+
nil
|
259
|
+
end
|
260
|
+
|
261
|
+
def read_n(io, n_bytes)
|
262
|
+
io.read(n_bytes).tap {|d|
|
263
|
+
raise ReadError, "Expected to read #{n_bytes} bytes, but the IO was at the end" if d.nil?
|
264
|
+
raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}" unless d.bytesize == n_bytes
|
265
|
+
}
|
266
|
+
end
|
267
|
+
|
268
|
+
def read_2b(io)
|
269
|
+
read_n(io, 2).unpack(C_v).shift
|
270
|
+
end
|
271
|
+
|
272
|
+
def read_4b(io)
|
273
|
+
read_n(io, 4).unpack(C_V).shift
|
274
|
+
end
|
275
|
+
|
276
|
+
def read_8b(io)
|
277
|
+
read_n(io, 8).unpack(C_Qe).shift
|
278
|
+
end
|
279
|
+
|
280
|
+
def find_compressed_data_start_offset(file_io, local_header_offset)
|
281
|
+
seek(file_io, local_header_offset)
|
282
|
+
|
283
|
+
# Reading in bulk is cheaper - grab the maximum length of the local header, including
|
284
|
+
# any headroom
|
285
|
+
local_file_header_str_plus_headroom = file_io.read(MAX_LOCAL_HEADER_SIZE)
|
286
|
+
io = StringIO.new(local_file_header_str_plus_headroom)
|
287
|
+
|
288
|
+
assert_signature(io, 0x04034b50)
|
289
|
+
|
290
|
+
# The rest is unreliable, and we have that information from the central directory already.
|
291
|
+
# So just skip over it to get at the offset where the compressed data begins
|
292
|
+
skip_ahead_2(io) # Version needed to extract
|
293
|
+
skip_ahead_2(io) # gp flags
|
294
|
+
skip_ahead_2(io) # storage mode
|
295
|
+
skip_ahead_2(io) # dos time
|
296
|
+
skip_ahead_2(io) # dos date
|
297
|
+
skip_ahead_4(io) # CRC32
|
298
|
+
|
299
|
+
skip_ahead_4(io) # Comp size
|
300
|
+
skip_ahead_4(io) # Uncomp size
|
301
|
+
|
302
|
+
filename_size = read_2b(io)
|
303
|
+
extra_size = read_2b(io)
|
304
|
+
|
305
|
+
skip_ahead_n(io, filename_size)
|
306
|
+
skip_ahead_n(io, extra_size)
|
307
|
+
|
308
|
+
local_header_offset + io.tell
|
309
|
+
end
|
310
|
+
|
311
|
+
|
312
|
+
def read_cdir_entry(io)
|
313
|
+
expected_at = io.tell
|
314
|
+
assert_signature(io, 0x02014b50)
|
315
|
+
ZipEntry.new.tap do |e|
|
316
|
+
e.made_by = read_2b(io)
|
317
|
+
e.version_needed_to_extract = read_2b(io)
|
318
|
+
e.gp_flags = read_2b(io)
|
319
|
+
e.storage_mode = read_2b(io)
|
320
|
+
e.dos_time = read_2b(io)
|
321
|
+
e.dos_date = read_2b(io)
|
322
|
+
e.crc32 = read_4b(io)
|
323
|
+
e.compressed_size = read_4b(io)
|
324
|
+
e.uncompressed_size = read_4b(io)
|
325
|
+
filename_size = read_2b(io)
|
326
|
+
extra_size = read_2b(io)
|
327
|
+
comment_len = read_2b(io)
|
328
|
+
e.disk_number_start = read_2b(io)
|
329
|
+
e.internal_attrs = read_2b(io)
|
330
|
+
e.external_attrs = read_4b(io)
|
331
|
+
e.local_file_header_offset = read_4b(io)
|
332
|
+
e.filename = read_n(io, filename_size)
|
333
|
+
|
334
|
+
# Extra fields
|
335
|
+
extras = read_n(io, extra_size)
|
336
|
+
# Comment
|
337
|
+
e.comment = read_n(io, comment_len)
|
338
|
+
|
339
|
+
# Parse out the extra fields
|
340
|
+
extra_table = {}
|
341
|
+
extras_buf = StringIO.new(extras)
|
342
|
+
until extras_buf.eof? do
|
343
|
+
extra_id = read_2b(extras_buf)
|
344
|
+
extra_size = read_2b(extras_buf)
|
345
|
+
extra_contents = read_n(extras_buf, extra_size)
|
346
|
+
extra_table[extra_id] = extra_contents
|
347
|
+
end
|
348
|
+
|
349
|
+
# ...of which we really only need the Zip64 extra
|
350
|
+
if zip64_extra_contents = extra_table[1] # Zip64 extra
|
351
|
+
zip64_extra = StringIO.new(zip64_extra_contents)
|
352
|
+
e.uncompressed_size = read_8b(zip64_extra)
|
353
|
+
e.compressed_size = read_8b(zip64_extra)
|
354
|
+
e.local_file_header_offset = read_8b(zip64_extra)
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
def get_eocd_offset(file_io, zip_file_size)
|
360
|
+
# Start reading from the _comment_ of the zip file (from the very end).
|
361
|
+
# The maximum size of the comment is 0xFFFF (what fits in 2 bytes)
|
362
|
+
implied_position_of_eocd_record = zip_file_size - MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE
|
363
|
+
implied_position_of_eocd_record = 0 if implied_position_of_eocd_record < 0
|
364
|
+
|
365
|
+
# Use a soft seek (we might not be able to get as far behind in the IO as we want)
|
366
|
+
# and a soft read (we might not be able to read as many bytes as we want)
|
367
|
+
file_io.seek(implied_position_of_eocd_record, IO::SEEK_SET)
|
368
|
+
str_containing_eocd_record = file_io.read(MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE)
|
369
|
+
|
370
|
+
# TODO: what to do if multiple occurrences of the signature are found, somehow?
|
371
|
+
eocd_sig = [0x06054b50].pack(C_V)
|
372
|
+
eocd_idx_in_buf = str_containing_eocd_record.index(eocd_sig)
|
373
|
+
|
374
|
+
raise "Could not find the EOCD signature in the buffer - maybe a malformed ZIP file" unless eocd_idx_in_buf
|
375
|
+
|
376
|
+
implied_position_of_eocd_record + eocd_idx_in_buf
|
377
|
+
end
|
378
|
+
|
379
|
+
# Find the Zip64 EOCD locator segment offset. Do this by seeking backwards from the
|
380
|
+
# EOCD record in the archive by fixed offsets
|
381
|
+
def get_zip64_eocd_locator_offset(file_io, eocd_offset)
|
382
|
+
zip64_eocd_loc_offset = eocd_offset
|
383
|
+
zip64_eocd_loc_offset -= 4 # The signature
|
384
|
+
zip64_eocd_loc_offset -= 4 # Which disk has the Zip64 end of central directory record
|
385
|
+
zip64_eocd_loc_offset -= 8 # Offset of the zip64 central directory record
|
386
|
+
zip64_eocd_loc_offset -= 4 # Total number of disks
|
387
|
+
|
388
|
+
# If the offset is negative there is certainly no Zip64 EOCD locator here
|
389
|
+
return unless zip64_eocd_loc_offset >= 0
|
390
|
+
|
391
|
+
file_io.seek(zip64_eocd_loc_offset, IO::SEEK_SET)
|
392
|
+
assert_signature(file_io, 0x07064b50)
|
393
|
+
disk_num = read_4b(file_io) # number of the disk
|
394
|
+
raise UnsupportedFeature, "The archive spans multiple disks" if disk_num != 0
|
395
|
+
read_8b(file_io)
|
396
|
+
rescue ReadError
|
397
|
+
nil
|
398
|
+
end
|
399
|
+
|
400
|
+
def num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
|
401
|
+
seek(io, zip64_end_of_cdir_location)
|
402
|
+
|
403
|
+
assert_signature(io, 0x06064b50)
|
404
|
+
|
405
|
+
zip64_eocdr_size = read_8b(io)
|
406
|
+
zip64_eocdr = read_n(io, zip64_eocdr_size) # Reading in bulk is cheaper
|
407
|
+
zip64_eocdr = StringIO.new(zip64_eocdr)
|
408
|
+
skip_ahead_2(zip64_eocdr) # version made by
|
409
|
+
skip_ahead_2(zip64_eocdr) # version needed to extract
|
410
|
+
|
411
|
+
disk_n = read_4b(zip64_eocdr) # number of this disk
|
412
|
+
disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
|
413
|
+
raise UnsupportedFeature, "The archive spans multiple disks" if disk_n != disk_n_with_eocdr
|
414
|
+
|
415
|
+
num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
|
416
|
+
num_files_total = read_8b(zip64_eocdr) # files total in the central directory
|
417
|
+
|
418
|
+
raise UnsupportedFeature, "The archive spans multiple disks" if num_files_this_disk != num_files_total
|
419
|
+
|
420
|
+
central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
|
421
|
+
central_dir_offset = read_8b(zip64_eocdr) # Where the central directory starts
|
422
|
+
|
423
|
+
[num_files_total, central_dir_offset, central_dir_size]
|
424
|
+
end
|
425
|
+
|
426
|
+
C_V = 'V'.freeze
|
427
|
+
C_v = 'v'.freeze
|
428
|
+
C_Qe = 'Q<'.freeze
|
429
|
+
|
430
|
+
# To prevent too many tiny reads, read the maximum possible size of end of central directory record
|
431
|
+
# upfront (all the fixed fields + at most 0xFFFF bytes of the archive comment)
|
432
|
+
MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE = begin
|
433
|
+
4 + # Offset of the start of central directory
|
434
|
+
4 + # Size of the central directory
|
435
|
+
2 + # Number of files in the cdir
|
436
|
+
4 + # End-of-central-directory signature
|
437
|
+
2 + # Number of this disk
|
438
|
+
2 + # Number of disk with the start of cdir
|
439
|
+
2 + # Number of files in the cdir of this disk
|
440
|
+
2 + # The comment size
|
441
|
+
0xFFFF # Maximum comment size
|
442
|
+
end
|
443
|
+
|
444
|
+
# To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
|
445
|
+
# The maximum size is all the usual items, plus the maximum size
|
446
|
+
# of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
|
447
|
+
MAX_LOCAL_HEADER_SIZE = begin
|
448
|
+
4 + # signature
|
449
|
+
2 + # Version needed to extract
|
450
|
+
2 + # gp flags
|
451
|
+
2 + # storage mode
|
452
|
+
2 + # dos time
|
453
|
+
2 + # dos date
|
454
|
+
4 + # CRC32
|
455
|
+
4 + # Comp size
|
456
|
+
4 + # Uncomp size
|
457
|
+
2 + # Filename size
|
458
|
+
2 + # Extra fields size
|
459
|
+
0xFFFF + # Maximum filename size
|
460
|
+
0xFFFF # Maximum extra fields size
|
461
|
+
end
|
462
|
+
|
463
|
+
SIZE_OF_USABLE_EOCD_RECORD = begin
|
464
|
+
4 + # Signature
|
465
|
+
2 + # Number of this disk
|
466
|
+
2 + # Number of the disk with the EOCD record
|
467
|
+
2 + # Number of entries in the central directory of this disk
|
468
|
+
2 + # Number of entries in the central directory total
|
469
|
+
4 + # Size of the central directory
|
470
|
+
4 # Start of the central directory offset
|
471
|
+
end
|
472
|
+
|
473
|
+
def num_files_and_central_directory_offset(file_io, eocd_offset)
|
474
|
+
seek(file_io, eocd_offset)
|
475
|
+
|
476
|
+
io = StringIO.new(read_n(file_io, SIZE_OF_USABLE_EOCD_RECORD))
|
477
|
+
|
478
|
+
assert_signature(io, 0x06054b50)
|
479
|
+
|
480
|
+
skip_ahead_2(io) # number_of_this_disk
|
481
|
+
skip_ahead_2(io) # number of the disk with the EOCD record
|
482
|
+
skip_ahead_2(io) # number of entries in the central directory of this disk
|
483
|
+
num_files = read_2b(io) # number of entries in the central directory total
|
484
|
+
cdir_size = read_4b(io) # size of the central directory
|
485
|
+
cdir_offset = read_4b(io) # start of central directorty offset
|
486
|
+
[num_files, cdir_offset, cdir_size]
|
487
|
+
end
|
488
|
+
|
489
|
+
private_constant :C_V, :C_v, :C_Qe, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
|
490
|
+
:MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
|
491
|
+
end
|
@@ -2,6 +2,11 @@
|
|
2
2
|
# write operations, but want to discard the data (like when
|
3
3
|
# estimating the size of a ZIP)
|
4
4
|
module ZipTricks::NullWriter
|
5
|
-
|
6
|
-
|
5
|
+
# @param data[String] the data to write
|
6
|
+
# @return [self]
|
7
|
+
def self.<<(data); self; end
|
8
|
+
|
9
|
+
# @param data[String] the data to write
|
10
|
+
# @return [Fixnum] the amount of data that was supposed to be written
|
11
|
+
def self.write(data); data.bytesize; end
|
7
12
|
end
|
data/lib/zip_tricks/rack_body.rb
CHANGED
@@ -9,13 +9,13 @@ class ZipTricks::RackBody
|
|
9
9
|
# The archive will be automatically closed at the end of the block.
|
10
10
|
#
|
11
11
|
# # Precompute the Content-Length ahead of time
|
12
|
-
# content_length = ZipTricks::
|
13
|
-
# estimator.add_stored_entry('large.tif', size
|
12
|
+
# content_length = ZipTricks::SizeEstimator.estimate do | estimator |
|
13
|
+
# estimator.add_stored_entry(filename: 'large.tif', size: 1289894)
|
14
14
|
# end
|
15
15
|
#
|
16
16
|
# # Prepare the response body. The block will only be called when the response starts to be written.
|
17
17
|
# body = ZipTricks::RackBody.new do | streamer |
|
18
|
-
# streamer.add_stored_entry('large.tif', size
|
18
|
+
# streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
|
19
19
|
# streamer << large_file.read(1024*1024) until large_file.eof?
|
20
20
|
# ...
|
21
21
|
# end
|
data/lib/zip_tricks/remote_io.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# An object that fakes just-enough of an IO to be dangerous
|
2
|
-
# - or, more precisely, to be useful as a source for the
|
3
|
-
# central directory parser
|
2
|
+
# - or, more precisely, to be useful as a source for the FileReader
|
3
|
+
# central directory parser. Effectively we substitute an IO object
|
4
|
+
# for an object that fetches parts of the remote file over HTTP using `Range:`
|
5
|
+
# headers. The `RemoteIO` acts as an adapter between an object that performs the
|
6
|
+
# actual fetches over HTTP and an object that expects a handful of IO methods to be
|
7
|
+
# available.
|
4
8
|
class ZipTricks::RemoteIO
|
5
|
-
|
6
|
-
# @param fetcher[#request_object_size, #request_range] an object that can fetch
|
9
|
+
# @param fetcher[#request_object_size, #request_range] an object that perform fetches
|
7
10
|
def initialize(fetcher = :NOT_SET)
|
8
11
|
@pos = 0
|
9
12
|
@fetcher = fetcher
|
@@ -12,21 +15,29 @@ class ZipTricks::RemoteIO
|
|
12
15
|
|
13
16
|
# Emulates IO#seek
|
14
17
|
def seek(offset, mode = IO::SEEK_SET)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
@pos = clamp(0, offset, @remote_size)
|
19
|
-
when IO::SEEK_END
|
20
|
-
@remote_size ||= request_object_size
|
21
|
-
@pos = clamp(0, @remote_size + offset, @remote_size)
|
22
|
-
else
|
23
|
-
raise Errno::ENOTSUP, "Seek mode #{mode.inspect} not supported"
|
24
|
-
end
|
18
|
+
raise "Unsupported read mode #{mode}" unless mode == IO::SEEK_SET
|
19
|
+
@remote_size ||= request_object_size
|
20
|
+
@pos = clamp(0, offset, @remote_size)
|
25
21
|
0 # always return 0!
|
26
22
|
end
|
27
|
-
|
28
|
-
# Emulates IO#
|
29
|
-
|
23
|
+
|
24
|
+
# Emulates IO#size.
|
25
|
+
#
|
26
|
+
# @return [Fixnum] the size of the remote resource
|
27
|
+
def size
|
28
|
+
@remote_size ||= request_object_size
|
29
|
+
end
|
30
|
+
|
31
|
+
# Emulates IO#read, but requires the number of bytes to read
|
32
|
+
# The method will raise if the number of bytes read from remote does
|
33
|
+
# not match the number requested. The read will be limited to the
|
34
|
+
# size of the remote resource relative to the current offset in the IO,
|
35
|
+
# so if you are at offset 0 in the IO of size 10, doing a `read(20)`
|
36
|
+
# will only return you 10 bytes of result, and not raise any exceptions.
|
37
|
+
#
|
38
|
+
# @param n_bytes[Fixnum, nil] how many bytes to read, or `nil` to read all the way to the end
|
39
|
+
# @return [String] the read bytes
|
40
|
+
def read(n_bytes=nil)
|
30
41
|
@remote_size ||= request_object_size
|
31
42
|
|
32
43
|
# If the resource is empty there is nothing to read
|
@@ -47,11 +58,10 @@ class ZipTricks::RemoteIO
|
|
47
58
|
end
|
48
59
|
end
|
49
60
|
|
50
|
-
# Returns the current pointer position within the IO
|
51
|
-
# Not used by RubyZip but used in tests of our own
|
61
|
+
# Returns the current pointer position within the IO
|
52
62
|
#
|
53
63
|
# @return [Fixnum]
|
54
|
-
def
|
64
|
+
def tell
|
55
65
|
@pos
|
56
66
|
end
|
57
67
|
|
@@ -2,6 +2,9 @@
|
|
2
2
|
# downloading the entire file. The central directory provides the
|
3
3
|
# offsets at which the actual file contents is located. You can then
|
4
4
|
# use the `Range:` HTTP headers to download those entries separately.
|
5
|
+
#
|
6
|
+
# Please read the security warning in `FileReader` _VERY CAREFULLY_
|
7
|
+
# before you use this module.
|
5
8
|
class ZipTricks::RemoteUncap
|
6
9
|
|
7
10
|
# Represents a file embedded within a remote ZIP archive
|
@@ -37,17 +40,14 @@ class ZipTricks::RemoteUncap
|
|
37
40
|
def self.files_within_zip_at(uri)
|
38
41
|
fetcher = new(uri)
|
39
42
|
fake_io = ZipTricks::RemoteIO.new(fetcher)
|
40
|
-
|
41
|
-
|
42
|
-
dir.entries.map do | rubyzip_entry |
|
43
|
+
entries = ZipTricks.const_get(:FileReader).read_zip_structure(fake_io)
|
44
|
+
entries.map do | remote_entry |
|
43
45
|
RemoteZipEntry.new do | entry |
|
44
|
-
entry.name
|
45
|
-
entry.
|
46
|
-
entry.
|
47
|
-
entry.
|
48
|
-
|
49
|
-
entry.starts_at_offset = rubyzip_entry.local_header_offset + rubyzip_entry.calculate_local_header_size
|
50
|
-
entry.ends_at_offset = entry.starts_at_offset + rubyzip_entry.compressed_size
|
46
|
+
entry.name = remote_entry.filename
|
47
|
+
entry.starts_at_offset = remote_entry.compressed_data_offset
|
48
|
+
entry.size_uncompressed = remote_entry.uncompressed_size
|
49
|
+
entry.size_compressed = remote_entry.compressed_size
|
50
|
+
entry.compression_method = remote_entry.storage_mode
|
51
51
|
end
|
52
52
|
end
|
53
53
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# Helps to estimate archive sizes
|
2
|
+
class ZipTricks::SizeEstimator
|
3
|
+
require_relative 'streamer'
|
4
|
+
|
5
|
+
# Used to mark a couple of methods public
|
6
|
+
class DetailStreamer < ::ZipTricks::Streamer
|
7
|
+
public :add_file_and_write_local_header, :write_data_descriptor_for_last_entry
|
8
|
+
end
|
9
|
+
private_constant :DetailStreamer
|
10
|
+
|
11
|
+
# Creates a new estimator with a Streamer object. Normally you should use
|
12
|
+
# `estimate` instead an not use this method directly.
|
13
|
+
def initialize(streamer)
|
14
|
+
@streamer = streamer
|
15
|
+
end
|
16
|
+
private :initialize
|
17
|
+
|
18
|
+
# Performs the estimate using fake archiving. It needs to know the sizes of the
|
19
|
+
# entries upfront. Usage:
|
20
|
+
#
|
21
|
+
# expected_zip_size = SizeEstimator.estimate do | estimator |
|
22
|
+
# estimator.add_stored_entry(filename: "file.doc", size: 898291)
|
23
|
+
# estimator.add_compressed_entry(filename: "family.tif", uncompressed_size: 89281911, compressed_size: 121908)
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# @return [Fixnum] the size of the resulting archive, in bytes
|
27
|
+
# @yield [SizeEstimator] the estimator
|
28
|
+
def self.estimate
|
29
|
+
output_io = ZipTricks::WriteAndTell.new(ZipTricks::NullWriter)
|
30
|
+
DetailStreamer.open(output_io) { |zip| yield(new(zip)) }
|
31
|
+
output_io.tell
|
32
|
+
end
|
33
|
+
|
34
|
+
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
35
|
+
#
|
36
|
+
# @param filename [String] the name of the file (filenames are variable-width in the ZIP)
|
37
|
+
# @param size [Fixnum] size of the uncompressed entry
|
38
|
+
# @param use_data_descriptor[Boolean] whether the entry uses a postfix data descriptor to specify size
|
39
|
+
# @return self
|
40
|
+
def add_stored_entry(filename:, size:, use_data_descriptor: false)
|
41
|
+
udd = !!use_data_descriptor
|
42
|
+
@streamer.add_file_and_write_local_header(filename: filename, crc32: 0, storage_mode: 0,
|
43
|
+
compressed_size: size, uncompressed_size: size, use_data_descriptor: udd)
|
44
|
+
@streamer.simulate_write(size)
|
45
|
+
@streamer.write_data_descriptor_for_last_entry if udd
|
46
|
+
self
|
47
|
+
end
|
48
|
+
|
49
|
+
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
50
|
+
#
|
51
|
+
# @param filename [String] the name of the file (filenames are variable-width in the ZIP)
|
52
|
+
# @param uncompressed_size [Fixnum] size of the uncompressed entry
|
53
|
+
# @param compressed_size [Fixnum] size of the compressed entry
|
54
|
+
# @param use_data_descriptor[Boolean] whether the entry uses a postfix data descriptor to specify size
|
55
|
+
# @return self
|
56
|
+
def add_compressed_entry(filename:, uncompressed_size:, compressed_size:, use_data_descriptor: false)
|
57
|
+
udd = !!use_data_descriptor
|
58
|
+
@streamer.add_file_and_write_local_header(filename: filename, crc32: 0, storage_mode: 8,
|
59
|
+
compressed_size: compressed_size, uncompressed_size: uncompressed_size, use_data_descriptor: udd)
|
60
|
+
@streamer.simulate_write(compressed_size)
|
61
|
+
@streamer.write_data_descriptor_for_last_entry if udd
|
62
|
+
self
|
63
|
+
end
|
64
|
+
end
|