zip_tricks 2.8.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/IMPLEMENTATION_DETAILS.md +2 -10
- data/README.md +62 -59
- data/examples/archive_size_estimate.rb +4 -4
- data/examples/rack_application.rb +3 -5
- data/lib/zip_tricks/block_deflate.rb +21 -0
- data/lib/zip_tricks/file_reader.rb +491 -0
- data/lib/zip_tricks/null_writer.rb +7 -2
- data/lib/zip_tricks/rack_body.rb +3 -3
- data/lib/zip_tricks/remote_io.rb +30 -20
- data/lib/zip_tricks/remote_uncap.rb +10 -10
- data/lib/zip_tricks/size_estimator.rb +64 -0
- data/lib/zip_tricks/stream_crc32.rb +2 -2
- data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
- data/lib/zip_tricks/streamer/entry.rb +21 -0
- data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
- data/lib/zip_tricks/streamer/writable.rb +20 -0
- data/lib/zip_tricks/streamer.rb +172 -66
- data/lib/zip_tricks/zip_writer.rb +346 -0
- data/lib/zip_tricks.rb +1 -4
- data/spec/spec_helper.rb +1 -38
- data/spec/zip_tricks/file_reader_spec.rb +47 -0
- data/spec/zip_tricks/rack_body_spec.rb +2 -2
- data/spec/zip_tricks/remote_io_spec.rb +8 -20
- data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
- data/spec/zip_tricks/size_estimator_spec.rb +31 -0
- data/spec/zip_tricks/streamer_spec.rb +59 -36
- data/spec/zip_tricks/zip_writer_spec.rb +408 -0
- data/zip_tricks.gemspec +20 -14
- metadata +33 -16
- data/lib/zip_tricks/manifest.rb +0 -85
- data/lib/zip_tricks/microzip.rb +0 -339
- data/lib/zip_tricks/stored_size_estimator.rb +0 -44
- data/spec/zip_tricks/manifest_spec.rb +0 -60
- data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
- data/spec/zip_tricks/microzip_spec.rb +0 -546
- data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22
@@ -0,0 +1,346 @@
|
|
1
|
+
# A low-level ZIP file data writer. You can use it to write out various headers and central directory elements
|
2
|
+
# separately. The class handles the actual encoding of the data according to the ZIP format APPNOTE document.
|
3
|
+
#
|
4
|
+
# The primary reason the writer is a separate object is because it is kept stateless. That is, all the data that
|
5
|
+
# is needed for writing a piece of the ZIP (say, the EOCD record, or a data descriptor) can be written
|
6
|
+
# without depending on data available elsewhere. This makes the writer very easy to test, since each of
|
7
|
+
# it's methods outputs something that only depends on the method's arguments. For example, we use this
|
8
|
+
# to test writing Zip64 files which, when tested in a streaming fashion, would need tricky IO stubs
|
9
|
+
# to wind IO objects back and forth by large offsets. Instead, we can just write out the EOCD record
|
10
|
+
# with given offsets as arguments.
|
11
|
+
#
|
12
|
+
# Since some methods need a lot of data about the entity being written, everything is passed via
|
13
|
+
# keyword arguments - this way it is much less likely that you can make a mistake writing something.
|
14
|
+
#
|
15
|
+
# Another reason for having a separate Writer is that most ZIP libraries attach the methods for
|
16
|
+
# writing out the file headers to some sort of Entry object, which represents a file within the ZIP.
|
17
|
+
# However, when you are diagnosing issues with the ZIP files you produce, you actually want to have
|
18
|
+
# absolute _most_ of the code responsible for writing the actual encoded bytes available to you on
|
19
|
+
# one screen. Altering or checking that code then becomes much, much easier. The methods doing the
|
20
|
+
# writing are also intentionally left very verbose - so that you can follow what is happening at
|
21
|
+
# all times.
|
22
|
+
#
|
23
|
+
# All methods of the writer accept anything that responds to `<<` as `io` argument - you can use
|
24
|
+
# that to output to String objects, or to output to Arrays that you can later join together.
|
25
|
+
class ZipTricks::ZipWriter
|
26
|
+
FOUR_BYTE_MAX_UINT = 0xFFFFFFFF
|
27
|
+
TWO_BYTE_MAX_UINT = 0xFFFF
|
28
|
+
ZIP_TRICKS_COMMENT = 'Written using ZipTricks %s' % ZipTricks::VERSION
|
29
|
+
VERSION_MADE_BY = 52
|
30
|
+
VERSION_NEEDED_TO_EXTRACT = 20
|
31
|
+
VERSION_NEEDED_TO_EXTRACT_ZIP64 = 45
|
32
|
+
DEFAULT_EXTERNAL_ATTRS = begin
|
33
|
+
# These need to be set so that the unarchived files do not become executable on UNIX, for
|
34
|
+
# security purposes. Strictly speaking we would want to make this user-customizable,
|
35
|
+
# but for now just putting in sane defaults will do. For example, Trac with zipinfo does this:
|
36
|
+
# zipinfo.external_attr = 0644 << 16L # permissions -r-wr--r--.
|
37
|
+
# We snatch the incantations from Rubyzip for this.
|
38
|
+
unix_perms = 0644
|
39
|
+
file_type_file = 010
|
40
|
+
external_attrs = (file_type_file << 12 | (unix_perms & 07777)) << 16
|
41
|
+
end
|
42
|
+
MADE_BY_SIGNATURE = begin
|
43
|
+
# A combination of the VERSION_MADE_BY low byte and the OS type high byte
|
44
|
+
os_type = 3 # UNIX
|
45
|
+
[VERSION_MADE_BY, os_type].pack('CC')
|
46
|
+
end
|
47
|
+
|
48
|
+
C_V = 'V'.freeze # Encode a 4-byte little-endian uint
|
49
|
+
C_v = 'v'.freeze # Encode a 2-byte little-endian uint
|
50
|
+
C_Qe = 'Q<'.freeze # Encode an 8-byte little-endian uint
|
51
|
+
|
52
|
+
private_constant :FOUR_BYTE_MAX_UINT, :TWO_BYTE_MAX_UINT,
|
53
|
+
:VERSION_MADE_BY, :VERSION_NEEDED_TO_EXTRACT, :VERSION_NEEDED_TO_EXTRACT_ZIP64,
|
54
|
+
:DEFAULT_EXTERNAL_ATTRS, :MADE_BY_SIGNATURE,
|
55
|
+
:C_V, :C_v, :C_Qe, :ZIP_TRICKS_COMMENT
|
56
|
+
|
57
|
+
# Writes the local file header, that precedes the actual file _data_.
|
58
|
+
#
|
59
|
+
# @param io[#<<] the buffer to write the local file header to
|
60
|
+
# @param filename[String] the name of the file in the archive
|
61
|
+
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
62
|
+
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
63
|
+
# @param crc32[Fixnum] The CRC32 checksum of the file
|
64
|
+
# @param mtime[Time] the modification time to be recorded in the ZIP
|
65
|
+
# @param gp_flags[Fixnum] bit-packed general purpose flags
|
66
|
+
# @param storage_mode[Fixnum] 8 for deflated, 0 for stored...
|
67
|
+
# @return [void]
|
68
|
+
def write_local_file_header(io:, filename:, compressed_size:, uncompressed_size:, crc32:, gp_flags:, mtime:, storage_mode:)
|
69
|
+
requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT)
|
70
|
+
|
71
|
+
io << [0x04034b50].pack(C_V) # local file header signature 4 bytes (0x04034b50)
|
72
|
+
if requires_zip64 # version needed to extract 2 bytes
|
73
|
+
io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v)
|
74
|
+
else
|
75
|
+
io << [VERSION_NEEDED_TO_EXTRACT].pack(C_v)
|
76
|
+
end
|
77
|
+
|
78
|
+
io << [gp_flags].pack(C_v) # general purpose bit flag 2 bytes
|
79
|
+
io << [storage_mode].pack(C_v) # compression method 2 bytes
|
80
|
+
io << [to_binary_dos_time(mtime)].pack(C_v) # last mod file time 2 bytes
|
81
|
+
io << [to_binary_dos_date(mtime)].pack(C_v) # last mod file date 2 bytes
|
82
|
+
io << [crc32].pack(C_V) # crc-32 4 bytes
|
83
|
+
|
84
|
+
if requires_zip64
|
85
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # compressed size 4 bytes
|
86
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # uncompressed size 4 bytes
|
87
|
+
else
|
88
|
+
io << [compressed_size].pack(C_V) # compressed size 4 bytes
|
89
|
+
io << [uncompressed_size].pack(C_V) # uncompressed size 4 bytes
|
90
|
+
end
|
91
|
+
|
92
|
+
# Filename should not be longer than 0xFFFF otherwise this wont fit here
|
93
|
+
io << [filename.bytesize].pack(C_v) # file name length 2 bytes
|
94
|
+
|
95
|
+
extra_size = 0
|
96
|
+
if requires_zip64
|
97
|
+
extra_size += bytesize_of {|buf| write_zip_64_extra_for_local_file_header(io: buf, compressed_size: 0, uncompressed_size: 0) }
|
98
|
+
end
|
99
|
+
io << [extra_size].pack(C_v) # extra field length 2 bytes
|
100
|
+
|
101
|
+
io << filename # file name (variable size)
|
102
|
+
|
103
|
+
# Interesting tidbit:
|
104
|
+
# https://social.technet.microsoft.com/Forums/windows/en-US/6a60399f-2879-4859-b7ab-6ddd08a70948
|
105
|
+
# TL;DR of it is: Windows 7 Explorer _will_ open Zip64 entries. However, it desires to have the
|
106
|
+
# Zip64 extra field as _the first_ extra field. If we decide to add the Info-ZIP UTF-8 field...
|
107
|
+
if requires_zip64
|
108
|
+
write_zip_64_extra_for_local_file_header(io: io, compressed_size: compressed_size, uncompressed_size: uncompressed_size)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Writes the file header for the central directory, for a particular file in the archive. When writing out this data,
|
113
|
+
# ensure that the CRC32 and both sizes (compressed/uncompressed) are correct for the entry in question.
|
114
|
+
#
|
115
|
+
# @param io[#<<] the buffer to write the local file header to
|
116
|
+
# @param filename[String] the name of the file in the archive
|
117
|
+
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
118
|
+
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
119
|
+
# @param crc32[Fixnum] The CRC32 checksum of the file
|
120
|
+
# @param mtime[Time] the modification time to be recorded in the ZIP
|
121
|
+
# @param external_attrs[Fixnum] bit-packed external attributes (defaults to UNIX file with 0644 permissions set)
|
122
|
+
# @param gp_flags[Fixnum] bit-packed general purpose flags
|
123
|
+
# @return [void]
|
124
|
+
def write_central_directory_file_header(io:, local_file_header_location:, gp_flags:, storage_mode:, compressed_size:, uncompressed_size:, mtime:, crc32:,
|
125
|
+
filename:, external_attrs: DEFAULT_EXTERNAL_ATTRS)
|
126
|
+
# At this point if the header begins somewhere beyound 0xFFFFFFFF we _have_ to record the offset
|
127
|
+
# of the local file header as a zip64 extra field, so we give up, give in, you loose, love will always win...
|
128
|
+
add_zip64 = (local_file_header_location > FOUR_BYTE_MAX_UINT) ||
|
129
|
+
(compressed_size > FOUR_BYTE_MAX_UINT) || (uncompressed_size > FOUR_BYTE_MAX_UINT)
|
130
|
+
|
131
|
+
io << [0x02014b50].pack(C_V) # central file header signature 4 bytes (0x02014b50)
|
132
|
+
io << MADE_BY_SIGNATURE # version made by 2 bytes
|
133
|
+
if add_zip64
|
134
|
+
io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) # version needed to extract 2 bytes
|
135
|
+
else
|
136
|
+
io << [VERSION_NEEDED_TO_EXTRACT].pack(C_v) # version needed to extract 2 bytes
|
137
|
+
end
|
138
|
+
|
139
|
+
io << [gp_flags].pack(C_v) # general purpose bit flag 2 bytes
|
140
|
+
io << [storage_mode].pack(C_v) # compression method 2 bytes
|
141
|
+
io << [to_binary_dos_time(mtime)].pack(C_v) # last mod file time 2 bytes
|
142
|
+
io << [to_binary_dos_date(mtime)].pack(C_v) # last mod file date 2 bytes
|
143
|
+
io << [crc32].pack(C_V) # crc-32 4 bytes
|
144
|
+
|
145
|
+
if add_zip64
|
146
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # compressed size 4 bytes
|
147
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # uncompressed size 4 bytes
|
148
|
+
else
|
149
|
+
io << [compressed_size].pack(C_V) # compressed size 4 bytes
|
150
|
+
io << [uncompressed_size].pack(C_V) # uncompressed size 4 bytes
|
151
|
+
end
|
152
|
+
|
153
|
+
# Filename should not be longer than 0xFFFF otherwise this wont fit here
|
154
|
+
io << [filename.bytesize].pack(C_v) # file name length 2 bytes
|
155
|
+
|
156
|
+
extra_size = 0
|
157
|
+
if add_zip64
|
158
|
+
extra_size += bytesize_of {|buf|
|
159
|
+
# Supply zeroes for most values as we obnly care about the size of the data written
|
160
|
+
write_zip_64_extra_for_central_directory_file_header(io: buf, compressed_size: 0, uncompressed_size: 0, local_file_header_location: 0)
|
161
|
+
}
|
162
|
+
end
|
163
|
+
io << [extra_size].pack(C_v) # extra field length 2 bytes
|
164
|
+
|
165
|
+
io << [0].pack(C_v) # file comment length 2 bytes
|
166
|
+
|
167
|
+
# For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used
|
168
|
+
# because otherwise it does not properly advance the pointer when reading the Zip64 extra field
|
169
|
+
# https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff
|
170
|
+
if add_zip64 # disk number start 2 bytes
|
171
|
+
io << [TWO_BYTE_MAX_UINT].pack(C_v)
|
172
|
+
else
|
173
|
+
io << [0].pack(C_v)
|
174
|
+
end
|
175
|
+
io << [0].pack(C_v) # internal file attributes 2 bytes
|
176
|
+
io << [DEFAULT_EXTERNAL_ATTRS].pack(C_V) # external file attributes 4 bytes
|
177
|
+
|
178
|
+
if add_zip64 # relative offset of local header 4 bytes
|
179
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_V)
|
180
|
+
else
|
181
|
+
io << [local_file_header_location].pack(C_V)
|
182
|
+
end
|
183
|
+
io << filename # file name (variable size)
|
184
|
+
|
185
|
+
if add_zip64 # extra field (variable size)
|
186
|
+
write_zip_64_extra_for_central_directory_file_header(io: io, local_file_header_location: local_file_header_location,
|
187
|
+
compressed_size: compressed_size, uncompressed_size: uncompressed_size)
|
188
|
+
end
|
189
|
+
#(empty) # file comment (variable size)
|
190
|
+
end
|
191
|
+
|
192
|
+
# Writes the data descriptor following the file data for a file whose local file header
|
193
|
+
# was written with general-purpose flag bit 3 set. If the one of the sizes exceeds the Zip64 threshold,
|
194
|
+
# the data descriptor will have the sizes written out as 8-byte values instead of 4-byte values.
|
195
|
+
#
|
196
|
+
# @param io[#<<] the buffer to write the local file header to
|
197
|
+
# @param crc32[Fixnum] The CRC32 checksum of the file
|
198
|
+
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
199
|
+
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
200
|
+
# @return [void]
|
201
|
+
def write_data_descriptor(io:, compressed_size:, uncompressed_size:, crc32:)
|
202
|
+
io << [0x08074b50].pack(C_V) # Although not originally assigned a signature, the value
|
203
|
+
# 0x08074b50 has commonly been adopted as a signature value
|
204
|
+
# for the data descriptor record.
|
205
|
+
io << [crc32].pack(C_V) # crc-32 4 bytes
|
206
|
+
|
207
|
+
|
208
|
+
# If one of the sizes is above 0xFFFFFFF use ZIP64 lengths (8 bytes) instead. A good unarchiver
|
209
|
+
# will decide to unpack it as such if it finds the Zip64 extra for the file in the central directory.
|
210
|
+
# So also use the opportune moment to switch the entry to Zip64 if needed
|
211
|
+
requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT)
|
212
|
+
pack_spec = requires_zip64 ? C_Qe : C_V
|
213
|
+
|
214
|
+
io << [compressed_size].pack(pack_spec) # compressed size 4 bytes, or 8 bytes for ZIP64
|
215
|
+
io << [uncompressed_size].pack(pack_spec) # uncompressed size 4 bytes, or 8 bytes for ZIP64
|
216
|
+
end
|
217
|
+
|
218
|
+
# Writes the "end of central directory record" (including the Zip6 salient bits if necessary)
|
219
|
+
#
|
220
|
+
# @param io[#<<] the buffer to write the central directory to.
|
221
|
+
# @param start_of_central_directory_location[Fixnum] byte offset of the start of central directory form the beginning of ZIP file
|
222
|
+
# @param central_directory_size[Fixnum] the size of the central directory (only file headers) in bytes
|
223
|
+
# @param num_files_in_archive[Fixnum] How many files the archive contains
|
224
|
+
# @return [void]
|
225
|
+
def write_end_of_central_directory(io:, start_of_central_directory_location:, central_directory_size:, num_files_in_archive:)
|
226
|
+
zip64_eocdr_offset = start_of_central_directory_location + central_directory_size
|
227
|
+
|
228
|
+
zip64_required = central_directory_size > FOUR_BYTE_MAX_UINT ||
|
229
|
+
start_of_central_directory_location > FOUR_BYTE_MAX_UINT ||
|
230
|
+
zip64_eocdr_offset > FOUR_BYTE_MAX_UINT ||
|
231
|
+
num_files_in_archive > TWO_BYTE_MAX_UINT
|
232
|
+
|
233
|
+
# Then, if zip64 is used
|
234
|
+
if zip64_required
|
235
|
+
# [zip64 end of central directory record]
|
236
|
+
# zip64 end of central dir
|
237
|
+
io << [0x06064b50].pack(C_V) # signature 4 bytes (0x06064b50)
|
238
|
+
io << [44].pack(C_Qe) # size of zip64 end of central
|
239
|
+
# directory record 8 bytes
|
240
|
+
# (this is ex. the 12 bytes of the signature and the size value itself).
|
241
|
+
# Without the extensible data sector (which we are not using)
|
242
|
+
# it is always 44 bytes.
|
243
|
+
io << MADE_BY_SIGNATURE # version made by 2 bytes
|
244
|
+
io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) # version needed to extract 2 bytes
|
245
|
+
io << [0].pack(C_V) # number of this disk 4 bytes
|
246
|
+
io << [0].pack(C_V) # number of the disk with the
|
247
|
+
# start of the central directory 4 bytes
|
248
|
+
io << [num_files_in_archive].pack(C_Qe) # total number of entries in the
|
249
|
+
# central directory on this disk 8 bytes
|
250
|
+
io << [num_files_in_archive].pack(C_Qe) # total number of entries in the
|
251
|
+
# central directory 8 bytes
|
252
|
+
io << [central_directory_size].pack(C_Qe) # size of the central directory 8 bytes
|
253
|
+
# offset of start of central
|
254
|
+
# directory with respect to
|
255
|
+
io << [start_of_central_directory_location].pack(C_Qe) # the starting disk number 8 bytes
|
256
|
+
# zip64 extensible data sector (variable size), blank for us
|
257
|
+
|
258
|
+
# [zip64 end of central directory locator]
|
259
|
+
io << [0x07064b50].pack(C_V) # zip64 end of central dir locator
|
260
|
+
# signature 4 bytes (0x07064b50)
|
261
|
+
io << [0].pack(C_V) # number of the disk with the
|
262
|
+
# start of the zip64 end of
|
263
|
+
# central directory 4 bytes
|
264
|
+
io << [zip64_eocdr_offset].pack(C_Qe) # relative offset of the zip64
|
265
|
+
# end of central directory record 8 bytes
|
266
|
+
# (note: "relative" is actually "from the start of the file")
|
267
|
+
io << [1].pack(C_V) # total number of disks 4 bytes
|
268
|
+
end
|
269
|
+
|
270
|
+
# Then the end of central directory record:
|
271
|
+
io << [0x06054b50].pack(C_V) # end of central dir signature 4 bytes (0x06054b50)
|
272
|
+
io << [0].pack(C_v) # number of this disk 2 bytes
|
273
|
+
io << [0].pack(C_v) # number of the disk with the
|
274
|
+
# start of the central directory 2 bytes
|
275
|
+
|
276
|
+
if zip64_required # the number of entries will be read from the zip64 part of the central directory
|
277
|
+
io << [TWO_BYTE_MAX_UINT].pack(C_v) # total number of entries in the
|
278
|
+
# central directory on this disk 2 bytes
|
279
|
+
io << [TWO_BYTE_MAX_UINT].pack(C_v) # total number of entries in
|
280
|
+
# the central directory 2 bytes
|
281
|
+
else
|
282
|
+
io << [num_files_in_archive].pack(C_v) # total number of entries in the
|
283
|
+
# central directory on this disk 2 bytes
|
284
|
+
io << [num_files_in_archive].pack(C_v) # total number of entries in
|
285
|
+
# the central directory 2 bytes
|
286
|
+
end
|
287
|
+
|
288
|
+
if zip64_required
|
289
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # size of the central directory 4 bytes
|
290
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # offset of start of central
|
291
|
+
# directory with respect to
|
292
|
+
# the starting disk number 4 bytes
|
293
|
+
else
|
294
|
+
io << [central_directory_size].pack(C_V) # size of the central directory 4 bytes
|
295
|
+
io << [start_of_central_directory_location].pack(C_V) # offset of start of central
|
296
|
+
# directory with respect to
|
297
|
+
# the starting disk number 4 bytes
|
298
|
+
end
|
299
|
+
io << [ZIP_TRICKS_COMMENT.bytesize].pack(C_v) # .ZIP file comment length 2 bytes
|
300
|
+
io << ZIP_TRICKS_COMMENT # .ZIP file comment (variable size)
|
301
|
+
end
|
302
|
+
|
303
|
+
private
|
304
|
+
|
305
|
+
# Writes the Zip64 extra field for the local file header. Will be used by `write_local_file_header` when any sizes given to it warrant that.
|
306
|
+
#
|
307
|
+
# @param io[#<<] the buffer to write the local file header to
|
308
|
+
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
309
|
+
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
310
|
+
# @return [void]
|
311
|
+
def write_zip_64_extra_for_local_file_header(io:, compressed_size:, uncompressed_size:)
|
312
|
+
io << [0x0001].pack(C_v) # 2 bytes Tag for this "extra" block type
|
313
|
+
io << [16].pack(C_v) # 2 bytes Size of this "extra" block. For us it will always be 16 (2x8)
|
314
|
+
io << [uncompressed_size].pack(C_Qe) # 8 bytes Original uncompressed file size
|
315
|
+
io << [compressed_size].pack(C_Qe) # 8 bytes Size of compressed data
|
316
|
+
end
|
317
|
+
|
318
|
+
# Writes the Zip64 extra field for the central directory header.It differs from the extra used in the local file header because it
|
319
|
+
# also contains the location of the local file header in the ZIP as an 8-byte int.
|
320
|
+
#
|
321
|
+
# @param io[#<<] the buffer to write the local file header to
|
322
|
+
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
323
|
+
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
324
|
+
# @param local_file_header_location[Fixnum] Byte offset of the start of the local file header from the beginning of the ZIP archive
|
325
|
+
# @return [void]
|
326
|
+
def write_zip_64_extra_for_central_directory_file_header(io:, compressed_size:, uncompressed_size:, local_file_header_location:)
|
327
|
+
io << [0x0001].pack(C_v) # 2 bytes Tag for this "extra" block type
|
328
|
+
io << [28].pack(C_v) # 2 bytes Size of this "extra" block. For us it will always be 28
|
329
|
+
io << [uncompressed_size].pack(C_Qe) # 8 bytes Original uncompressed file size
|
330
|
+
io << [compressed_size].pack(C_Qe) # 8 bytes Size of compressed data
|
331
|
+
io << [local_file_header_location].pack(C_Qe) # 8 bytes Offset of local header record
|
332
|
+
io << [0].pack(C_V) # 4 bytes Number of the disk on which this file starts
|
333
|
+
end
|
334
|
+
|
335
|
+
def bytesize_of
|
336
|
+
''.force_encoding(Encoding::BINARY).tap {|b| yield(b) }.bytesize
|
337
|
+
end
|
338
|
+
|
339
|
+
def to_binary_dos_time(t)
|
340
|
+
(t.sec/2) + (t.min << 5) + (t.hour << 11)
|
341
|
+
end
|
342
|
+
|
343
|
+
def to_binary_dos_date(t)
|
344
|
+
(t.day) + (t.month << 5) + ((t.year - 1980) << 9)
|
345
|
+
end
|
346
|
+
end
|
data/lib/zip_tricks.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -6,6 +6,7 @@ require 'zip_tricks'
|
|
6
6
|
require 'digest'
|
7
7
|
require 'fileutils'
|
8
8
|
require 'shellwords'
|
9
|
+
require 'zip'
|
9
10
|
|
10
11
|
module Keepalive
|
11
12
|
# Travis-CI kills the build if it does not receive output on standard out or standard error
|
@@ -22,7 +23,6 @@ module Keepalive
|
|
22
23
|
extend self
|
23
24
|
end
|
24
25
|
|
25
|
-
|
26
26
|
class ManagedTempfile < Tempfile
|
27
27
|
@@managed_tempfiles = []
|
28
28
|
|
@@ -39,38 +39,6 @@ class ManagedTempfile < Tempfile
|
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
42
|
-
# A Tempfile filled with N bytes of random data, that also knows the CRC32 of that data
|
43
|
-
class RandomFile < ManagedTempfile
|
44
|
-
attr_reader :crc32
|
45
|
-
RANDOM_MEG = Random.new.bytes(1024 * 1024) # Allocate it once to prevent heap churn
|
46
|
-
def initialize(size)
|
47
|
-
super('random-bin')
|
48
|
-
binmode
|
49
|
-
crc = ZipTricks::StreamCRC32.new
|
50
|
-
bytes = size % (1024 * 1024)
|
51
|
-
megs = size / (1024 * 1024)
|
52
|
-
megs.times do
|
53
|
-
Keepalive.still_alive!
|
54
|
-
self << RANDOM_MEG
|
55
|
-
crc << RANDOM_MEG
|
56
|
-
end
|
57
|
-
random_blob = Random.new.bytes(bytes)
|
58
|
-
self << random_blob
|
59
|
-
crc << random_blob
|
60
|
-
@crc32 = crc.to_i
|
61
|
-
rewind
|
62
|
-
end
|
63
|
-
|
64
|
-
def copy_to(io)
|
65
|
-
rewind
|
66
|
-
while data = read(10*1024*1024)
|
67
|
-
io << data
|
68
|
-
Keepalive.still_alive!
|
69
|
-
end
|
70
|
-
rewind
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
42
|
module ZipInspection
|
75
43
|
def inspect_zip_with_external_tool(path_to_zip)
|
76
44
|
zipinfo_path = 'zipinfo'
|
@@ -96,11 +64,6 @@ module ZipInspection
|
|
96
64
|
au_path = '/System/Library/CoreServices/Applications/Archive Utility.app/Contents/MacOS/Archive Utility'
|
97
65
|
open_with_external_app(au_path, path_to_zip, skip_if_missing)
|
98
66
|
end
|
99
|
-
|
100
|
-
def open_zip_with_unarchiver(path_to_zip, skip_if_missing: false)
|
101
|
-
ua_path = '/Applications/The Unarchiver.app/Contents/MacOS/The Unarchiver'
|
102
|
-
open_with_external_app(ua_path, path_to_zip, skip_if_missing)
|
103
|
-
end
|
104
67
|
end
|
105
68
|
|
106
69
|
RSpec.configure do |config|
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ZipTricks::FileReader do
|
4
|
+
it 'reads and uncompresses the file written deflated with data descriptors' do
|
5
|
+
zipfile = StringIO.new
|
6
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
7
|
+
tolstoy.force_encoding(Encoding::BINARY)
|
8
|
+
|
9
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
10
|
+
zip.write_deflated_file('war-and-peace.txt') do |sink|
|
11
|
+
sink << tolstoy
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
entries = described_class.read_zip_structure(zipfile)
|
16
|
+
expect(entries.length).to eq(1)
|
17
|
+
|
18
|
+
entry = entries.first
|
19
|
+
|
20
|
+
readback = ''
|
21
|
+
reader = entry.extractor_from(zipfile)
|
22
|
+
readback << reader.extract(10) until reader.eof?
|
23
|
+
|
24
|
+
expect(readback.bytesize).to eq(tolstoy.bytesize)
|
25
|
+
expect(readback[0..10]).to eq(tolstoy[0..10])
|
26
|
+
expect(readback[-10..-1]).to eq(tolstoy[-10..-1])
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'reads the file written stored with data descriptors' do
|
30
|
+
zipfile = StringIO.new
|
31
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
32
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
33
|
+
zip.write_stored_file('war-and-peace.txt') do |sink|
|
34
|
+
sink << tolstoy
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
entries = described_class.read_zip_structure(zipfile)
|
39
|
+
expect(entries.length).to eq(1)
|
40
|
+
|
41
|
+
entry = entries.first
|
42
|
+
|
43
|
+
readback = entry.extractor_from(zipfile).extract
|
44
|
+
expect(readback.bytesize).to eq(tolstoy.bytesize)
|
45
|
+
expect(readback[0..10]).to eq(tolstoy[0..10])
|
46
|
+
end
|
47
|
+
end
|
@@ -7,7 +7,7 @@ describe ZipTricks::RackBody do
|
|
7
7
|
file_body = SecureRandom.random_bytes(1024 * 1024 + 8981)
|
8
8
|
|
9
9
|
body = described_class.new do | zip |
|
10
|
-
zip.add_stored_entry("A file", file_body.bytesize, Zlib.crc32(file_body))
|
10
|
+
zip.add_stored_entry(filename: "A file", size: file_body.bytesize, crc32: Zlib.crc32(file_body))
|
11
11
|
zip << file_body
|
12
12
|
end
|
13
13
|
|
@@ -17,7 +17,7 @@ describe ZipTricks::RackBody do
|
|
17
17
|
body.close
|
18
18
|
|
19
19
|
output_buf.rewind
|
20
|
-
expect(output_buf.size).to eq(
|
20
|
+
expect(output_buf.size).to eq(1057696)
|
21
21
|
|
22
22
|
per_filename = {}
|
23
23
|
Zip::File.open(output_buf.path) do |zip_file|
|
@@ -38,7 +38,7 @@ describe ZipTricks::RemoteIO do
|
|
38
38
|
uncap = described_class.new
|
39
39
|
expect {
|
40
40
|
uncap.seek(123, :UNSUPPORTED)
|
41
|
-
}.to raise_error(
|
41
|
+
}.to raise_error(/unsupported/i)
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
@@ -50,18 +50,6 @@ describe ZipTricks::RemoteIO do
|
|
50
50
|
expect(uncap.seek(10, mode)).to eq(0)
|
51
51
|
end
|
52
52
|
end
|
53
|
-
|
54
|
-
context 'with SEEK_END mode' do
|
55
|
-
it 'seens to 10 bytes to the end of the IO' do
|
56
|
-
uncap = described_class.new
|
57
|
-
expect(uncap).to receive(:request_object_size).and_return(100)
|
58
|
-
|
59
|
-
mode = IO::SEEK_END
|
60
|
-
offset = -10
|
61
|
-
expect(uncap.seek(-10, IO::SEEK_END)).to eq(0)
|
62
|
-
expect(uncap.pos).to eq(90)
|
63
|
-
end
|
64
|
-
end
|
65
53
|
end
|
66
54
|
|
67
55
|
describe '#read' do
|
@@ -87,10 +75,10 @@ describe ZipTricks::RemoteIO do
|
|
87
75
|
|
88
76
|
context 'without arguments' do
|
89
77
|
it 'reads the entire buffer and alters the position pointer' do
|
90
|
-
expect(@subject.
|
78
|
+
expect(@subject.tell).to eq(0)
|
91
79
|
read = @subject.read
|
92
80
|
expect(read.bytesize).to eq(@buf.size)
|
93
|
-
expect(@subject.
|
81
|
+
expect(@subject.tell).to eq(@buf.size)
|
94
82
|
end
|
95
83
|
end
|
96
84
|
|
@@ -105,7 +93,7 @@ describe ZipTricks::RemoteIO do
|
|
105
93
|
|
106
94
|
it 'returns exact amount of bytes at the start of the buffer' do
|
107
95
|
bytes_read = @subject.read(10)
|
108
|
-
expect(@subject.
|
96
|
+
expect(@subject.tell).to eq(10)
|
109
97
|
@buf.seek(0)
|
110
98
|
expect(bytes_read).to eq(@buf.read(10))
|
111
99
|
end
|
@@ -114,7 +102,7 @@ describe ZipTricks::RemoteIO do
|
|
114
102
|
@subject.seek(456, IO::SEEK_SET)
|
115
103
|
|
116
104
|
bytes_read = @subject.read(10)
|
117
|
-
expect(@subject.
|
105
|
+
expect(@subject.tell).to eq(456+10)
|
118
106
|
|
119
107
|
@buf.seek(456)
|
120
108
|
expect(bytes_read).to eq(@buf.read(10))
|
@@ -124,13 +112,13 @@ describe ZipTricks::RemoteIO do
|
|
124
112
|
at_end = @buf.size - 4
|
125
113
|
@subject.seek(at_end, IO::SEEK_SET)
|
126
114
|
|
127
|
-
expect(@subject.
|
115
|
+
expect(@subject.tell).to eq(15728636)
|
128
116
|
bytes_read = @subject.read(10)
|
129
|
-
expect(@subject.
|
117
|
+
expect(@subject.tell).to eq(@buf.size) # Should have moved the pos pointer to the end
|
130
118
|
|
131
119
|
expect(bytes_read.bytesize).to eq(4)
|
132
120
|
|
133
|
-
expect(@subject.
|
121
|
+
expect(@subject.tell).to eq(@buf.size)
|
134
122
|
end
|
135
123
|
end
|
136
124
|
end
|
@@ -21,11 +21,11 @@ describe ZipTricks::RemoteUncap, webmock: true do
|
|
21
21
|
|
22
22
|
File.open('temp.zip', 'wb') do |f|
|
23
23
|
ZipTricks::Streamer.open(f) do | zip |
|
24
|
-
zip.add_stored_entry('first-file.bin', payload1.size, payload1_crc)
|
24
|
+
zip.add_stored_entry(filename: 'first-file.bin', size: payload1.size, crc32: payload1_crc)
|
25
25
|
while blob = payload1.read(1024 * 5)
|
26
26
|
zip << blob
|
27
27
|
end
|
28
|
-
zip.add_stored_entry('second-file.bin', payload2.size, payload2_crc)
|
28
|
+
zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
|
29
29
|
while blob = payload2.read(1024 * 5)
|
30
30
|
zip << blob
|
31
31
|
end
|
@@ -81,9 +81,9 @@ describe ZipTricks::RemoteUncap, webmock: true do
|
|
81
81
|
|
82
82
|
File.open('temp.zip', 'wb') do |f|
|
83
83
|
ZipTricks::Streamer.open(f) do | zip |
|
84
|
-
zip.add_stored_entry('first-file.bin', payload1.size, payload1_crc)
|
84
|
+
zip.add_stored_entry(filename: 'first-file.bin', size: payload1.size, crc32: payload1_crc)
|
85
85
|
zip << '' # It is empty, so a read() would return nil
|
86
|
-
zip.add_stored_entry('second-file.bin', payload2.size, payload2_crc)
|
86
|
+
zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
|
87
87
|
while blob = payload2.read(1024 * 5)
|
88
88
|
zip << blob
|
89
89
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
|
3
|
+
describe ZipTricks::SizeEstimator do
|
4
|
+
it 'accurately predicts the output zip size' do
|
5
|
+
# Generate a couple of random files
|
6
|
+
raw_file_1 = SecureRandom.random_bytes(1024 * 20)
|
7
|
+
raw_file_2 = SecureRandom.random_bytes(1024 * 128)
|
8
|
+
raw_file_3 = SecureRandom.random_bytes(1258695)
|
9
|
+
|
10
|
+
predicted_size = described_class.estimate do | estimator |
|
11
|
+
r = estimator.add_stored_entry(filename: "first-file.bin", size: raw_file_1.size)
|
12
|
+
expect(r).to eq(estimator), "add_stored_entry should return self"
|
13
|
+
|
14
|
+
estimator.add_stored_entry(filename: "second-file.bin", size: raw_file_2.size)
|
15
|
+
|
16
|
+
r = estimator.add_compressed_entry(filename: "second-flie.bin", compressed_size: raw_file_3.size,
|
17
|
+
uncompressed_size: raw_file_2.size, )
|
18
|
+
expect(r).to eq(estimator), "add_compressed_entry should return self"
|
19
|
+
|
20
|
+
r = estimator.add_stored_entry(filename: "first-file-with-descriptor.bin", size: raw_file_1.size,
|
21
|
+
use_data_descriptor: true)
|
22
|
+
expect(r).to eq(estimator), "add_stored_entry should return self"
|
23
|
+
|
24
|
+
r = estimator.add_compressed_entry(filename: "second-file-with-descriptor.bin", compressed_size: raw_file_3.size,
|
25
|
+
uncompressed_size: raw_file_2.size, use_data_descriptor: true)
|
26
|
+
expect(r).to eq(estimator), "add_compressed_entry should return self"
|
27
|
+
end
|
28
|
+
|
29
|
+
expect(predicted_size).to eq(2690095)
|
30
|
+
end
|
31
|
+
end
|