zip_tricks 2.8.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/IMPLEMENTATION_DETAILS.md +2 -10
  4. data/README.md +62 -59
  5. data/examples/archive_size_estimate.rb +4 -4
  6. data/examples/rack_application.rb +3 -5
  7. data/lib/zip_tricks/block_deflate.rb +21 -0
  8. data/lib/zip_tricks/file_reader.rb +491 -0
  9. data/lib/zip_tricks/null_writer.rb +7 -2
  10. data/lib/zip_tricks/rack_body.rb +3 -3
  11. data/lib/zip_tricks/remote_io.rb +30 -20
  12. data/lib/zip_tricks/remote_uncap.rb +10 -10
  13. data/lib/zip_tricks/size_estimator.rb +64 -0
  14. data/lib/zip_tricks/stream_crc32.rb +2 -2
  15. data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
  16. data/lib/zip_tricks/streamer/entry.rb +21 -0
  17. data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
  18. data/lib/zip_tricks/streamer/writable.rb +20 -0
  19. data/lib/zip_tricks/streamer.rb +172 -66
  20. data/lib/zip_tricks/zip_writer.rb +346 -0
  21. data/lib/zip_tricks.rb +1 -4
  22. data/spec/spec_helper.rb +1 -38
  23. data/spec/zip_tricks/file_reader_spec.rb +47 -0
  24. data/spec/zip_tricks/rack_body_spec.rb +2 -2
  25. data/spec/zip_tricks/remote_io_spec.rb +8 -20
  26. data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
  27. data/spec/zip_tricks/size_estimator_spec.rb +31 -0
  28. data/spec/zip_tricks/streamer_spec.rb +59 -36
  29. data/spec/zip_tricks/zip_writer_spec.rb +408 -0
  30. data/zip_tricks.gemspec +20 -14
  31. metadata +33 -16
  32. data/lib/zip_tricks/manifest.rb +0 -85
  33. data/lib/zip_tricks/microzip.rb +0 -339
  34. data/lib/zip_tricks/stored_size_estimator.rb +0 -44
  35. data/spec/zip_tricks/manifest_spec.rb +0 -60
  36. data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
  37. data/spec/zip_tricks/microzip_spec.rb +0 -546
  38. data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22
@@ -0,0 +1,346 @@
1
+ # A low-level ZIP file data writer. You can use it to write out various headers and central directory elements
2
+ # separately. The class handles the actual encoding of the data according to the ZIP format APPNOTE document.
3
+ #
4
+ # The primary reason the writer is a separate object is because it is kept stateless. That is, all the data that
5
+ # is needed for writing a piece of the ZIP (say, the EOCD record, or a data descriptor) can be written
6
+ # without depending on data available elsewhere. This makes the writer very easy to test, since each of
7
+ # it's methods outputs something that only depends on the method's arguments. For example, we use this
8
+ # to test writing Zip64 files which, when tested in a streaming fashion, would need tricky IO stubs
9
+ # to wind IO objects back and forth by large offsets. Instead, we can just write out the EOCD record
10
+ # with given offsets as arguments.
11
+ #
12
+ # Since some methods need a lot of data about the entity being written, everything is passed via
13
+ # keyword arguments - this way it is much less likely that you can make a mistake writing something.
14
+ #
15
+ # Another reason for having a separate Writer is that most ZIP libraries attach the methods for
16
+ # writing out the file headers to some sort of Entry object, which represents a file within the ZIP.
17
+ # However, when you are diagnosing issues with the ZIP files you produce, you actually want to have
18
+ # absolute _most_ of the code responsible for writing the actual encoded bytes available to you on
19
+ # one screen. Altering or checking that code then becomes much, much easier. The methods doing the
20
+ # writing are also intentionally left very verbose - so that you can follow what is happening at
21
+ # all times.
22
+ #
23
+ # All methods of the writer accept anything that responds to `<<` as `io` argument - you can use
24
+ # that to output to String objects, or to output to Arrays that you can later join together.
25
+ class ZipTricks::ZipWriter
26
+ FOUR_BYTE_MAX_UINT = 0xFFFFFFFF
27
+ TWO_BYTE_MAX_UINT = 0xFFFF
28
+ ZIP_TRICKS_COMMENT = 'Written using ZipTricks %s' % ZipTricks::VERSION
29
+ VERSION_MADE_BY = 52
30
+ VERSION_NEEDED_TO_EXTRACT = 20
31
+ VERSION_NEEDED_TO_EXTRACT_ZIP64 = 45
32
+ DEFAULT_EXTERNAL_ATTRS = begin
33
+ # These need to be set so that the unarchived files do not become executable on UNIX, for
34
+ # security purposes. Strictly speaking we would want to make this user-customizable,
35
+ # but for now just putting in sane defaults will do. For example, Trac with zipinfo does this:
36
+ # zipinfo.external_attr = 0644 << 16L # permissions -r-wr--r--.
37
+ # We snatch the incantations from Rubyzip for this.
38
+ unix_perms = 0644
39
+ file_type_file = 010
40
+ external_attrs = (file_type_file << 12 | (unix_perms & 07777)) << 16
41
+ end
42
+ MADE_BY_SIGNATURE = begin
43
+ # A combination of the VERSION_MADE_BY low byte and the OS type high byte
44
+ os_type = 3 # UNIX
45
+ [VERSION_MADE_BY, os_type].pack('CC')
46
+ end
47
+
48
+ C_V = 'V'.freeze # Encode a 4-byte little-endian uint
49
+ C_v = 'v'.freeze # Encode a 2-byte little-endian uint
50
+ C_Qe = 'Q<'.freeze # Encode an 8-byte little-endian uint
51
+
52
+ private_constant :FOUR_BYTE_MAX_UINT, :TWO_BYTE_MAX_UINT,
53
+ :VERSION_MADE_BY, :VERSION_NEEDED_TO_EXTRACT, :VERSION_NEEDED_TO_EXTRACT_ZIP64,
54
+ :DEFAULT_EXTERNAL_ATTRS, :MADE_BY_SIGNATURE,
55
+ :C_V, :C_v, :C_Qe, :ZIP_TRICKS_COMMENT
56
+
57
+ # Writes the local file header, that precedes the actual file _data_.
58
+ #
59
+ # @param io[#<<] the buffer to write the local file header to
60
+ # @param filename[String] the name of the file in the archive
61
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
62
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
63
+ # @param crc32[Fixnum] The CRC32 checksum of the file
64
+ # @param mtime[Time] the modification time to be recorded in the ZIP
65
+ # @param gp_flags[Fixnum] bit-packed general purpose flags
66
+ # @param storage_mode[Fixnum] 8 for deflated, 0 for stored...
67
+ # @return [void]
68
+ def write_local_file_header(io:, filename:, compressed_size:, uncompressed_size:, crc32:, gp_flags:, mtime:, storage_mode:)
69
+ requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT)
70
+
71
+ io << [0x04034b50].pack(C_V) # local file header signature 4 bytes (0x04034b50)
72
+ if requires_zip64 # version needed to extract 2 bytes
73
+ io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v)
74
+ else
75
+ io << [VERSION_NEEDED_TO_EXTRACT].pack(C_v)
76
+ end
77
+
78
+ io << [gp_flags].pack(C_v) # general purpose bit flag 2 bytes
79
+ io << [storage_mode].pack(C_v) # compression method 2 bytes
80
+ io << [to_binary_dos_time(mtime)].pack(C_v) # last mod file time 2 bytes
81
+ io << [to_binary_dos_date(mtime)].pack(C_v) # last mod file date 2 bytes
82
+ io << [crc32].pack(C_V) # crc-32 4 bytes
83
+
84
+ if requires_zip64
85
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # compressed size 4 bytes
86
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # uncompressed size 4 bytes
87
+ else
88
+ io << [compressed_size].pack(C_V) # compressed size 4 bytes
89
+ io << [uncompressed_size].pack(C_V) # uncompressed size 4 bytes
90
+ end
91
+
92
+ # Filename should not be longer than 0xFFFF otherwise this wont fit here
93
+ io << [filename.bytesize].pack(C_v) # file name length 2 bytes
94
+
95
+ extra_size = 0
96
+ if requires_zip64
97
+ extra_size += bytesize_of {|buf| write_zip_64_extra_for_local_file_header(io: buf, compressed_size: 0, uncompressed_size: 0) }
98
+ end
99
+ io << [extra_size].pack(C_v) # extra field length 2 bytes
100
+
101
+ io << filename # file name (variable size)
102
+
103
+ # Interesting tidbit:
104
+ # https://social.technet.microsoft.com/Forums/windows/en-US/6a60399f-2879-4859-b7ab-6ddd08a70948
105
+ # TL;DR of it is: Windows 7 Explorer _will_ open Zip64 entries. However, it desires to have the
106
+ # Zip64 extra field as _the first_ extra field. If we decide to add the Info-ZIP UTF-8 field...
107
+ if requires_zip64
108
+ write_zip_64_extra_for_local_file_header(io: io, compressed_size: compressed_size, uncompressed_size: uncompressed_size)
109
+ end
110
+ end
111
+
112
+ # Writes the file header for the central directory, for a particular file in the archive. When writing out this data,
113
+ # ensure that the CRC32 and both sizes (compressed/uncompressed) are correct for the entry in question.
114
+ #
115
+ # @param io[#<<] the buffer to write the local file header to
116
+ # @param filename[String] the name of the file in the archive
117
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
118
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
119
+ # @param crc32[Fixnum] The CRC32 checksum of the file
120
+ # @param mtime[Time] the modification time to be recorded in the ZIP
121
+ # @param external_attrs[Fixnum] bit-packed external attributes (defaults to UNIX file with 0644 permissions set)
122
+ # @param gp_flags[Fixnum] bit-packed general purpose flags
123
+ # @return [void]
124
+ def write_central_directory_file_header(io:, local_file_header_location:, gp_flags:, storage_mode:, compressed_size:, uncompressed_size:, mtime:, crc32:,
125
+ filename:, external_attrs: DEFAULT_EXTERNAL_ATTRS)
126
+ # At this point if the header begins somewhere beyound 0xFFFFFFFF we _have_ to record the offset
127
+ # of the local file header as a zip64 extra field, so we give up, give in, you loose, love will always win...
128
+ add_zip64 = (local_file_header_location > FOUR_BYTE_MAX_UINT) ||
129
+ (compressed_size > FOUR_BYTE_MAX_UINT) || (uncompressed_size > FOUR_BYTE_MAX_UINT)
130
+
131
+ io << [0x02014b50].pack(C_V) # central file header signature 4 bytes (0x02014b50)
132
+ io << MADE_BY_SIGNATURE # version made by 2 bytes
133
+ if add_zip64
134
+ io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) # version needed to extract 2 bytes
135
+ else
136
+ io << [VERSION_NEEDED_TO_EXTRACT].pack(C_v) # version needed to extract 2 bytes
137
+ end
138
+
139
+ io << [gp_flags].pack(C_v) # general purpose bit flag 2 bytes
140
+ io << [storage_mode].pack(C_v) # compression method 2 bytes
141
+ io << [to_binary_dos_time(mtime)].pack(C_v) # last mod file time 2 bytes
142
+ io << [to_binary_dos_date(mtime)].pack(C_v) # last mod file date 2 bytes
143
+ io << [crc32].pack(C_V) # crc-32 4 bytes
144
+
145
+ if add_zip64
146
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # compressed size 4 bytes
147
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # uncompressed size 4 bytes
148
+ else
149
+ io << [compressed_size].pack(C_V) # compressed size 4 bytes
150
+ io << [uncompressed_size].pack(C_V) # uncompressed size 4 bytes
151
+ end
152
+
153
+ # Filename should not be longer than 0xFFFF otherwise this wont fit here
154
+ io << [filename.bytesize].pack(C_v) # file name length 2 bytes
155
+
156
+ extra_size = 0
157
+ if add_zip64
158
+ extra_size += bytesize_of {|buf|
159
+ # Supply zeroes for most values as we obnly care about the size of the data written
160
+ write_zip_64_extra_for_central_directory_file_header(io: buf, compressed_size: 0, uncompressed_size: 0, local_file_header_location: 0)
161
+ }
162
+ end
163
+ io << [extra_size].pack(C_v) # extra field length 2 bytes
164
+
165
+ io << [0].pack(C_v) # file comment length 2 bytes
166
+
167
+ # For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used
168
+ # because otherwise it does not properly advance the pointer when reading the Zip64 extra field
169
+ # https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff
170
+ if add_zip64 # disk number start 2 bytes
171
+ io << [TWO_BYTE_MAX_UINT].pack(C_v)
172
+ else
173
+ io << [0].pack(C_v)
174
+ end
175
+ io << [0].pack(C_v) # internal file attributes 2 bytes
176
+ io << [DEFAULT_EXTERNAL_ATTRS].pack(C_V) # external file attributes 4 bytes
177
+
178
+ if add_zip64 # relative offset of local header 4 bytes
179
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V)
180
+ else
181
+ io << [local_file_header_location].pack(C_V)
182
+ end
183
+ io << filename # file name (variable size)
184
+
185
+ if add_zip64 # extra field (variable size)
186
+ write_zip_64_extra_for_central_directory_file_header(io: io, local_file_header_location: local_file_header_location,
187
+ compressed_size: compressed_size, uncompressed_size: uncompressed_size)
188
+ end
189
+ #(empty) # file comment (variable size)
190
+ end
191
+
192
+ # Writes the data descriptor following the file data for a file whose local file header
193
+ # was written with general-purpose flag bit 3 set. If the one of the sizes exceeds the Zip64 threshold,
194
+ # the data descriptor will have the sizes written out as 8-byte values instead of 4-byte values.
195
+ #
196
+ # @param io[#<<] the buffer to write the local file header to
197
+ # @param crc32[Fixnum] The CRC32 checksum of the file
198
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
199
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
200
+ # @return [void]
201
+ def write_data_descriptor(io:, compressed_size:, uncompressed_size:, crc32:)
202
+ io << [0x08074b50].pack(C_V) # Although not originally assigned a signature, the value
203
+ # 0x08074b50 has commonly been adopted as a signature value
204
+ # for the data descriptor record.
205
+ io << [crc32].pack(C_V) # crc-32 4 bytes
206
+
207
+
208
+ # If one of the sizes is above 0xFFFFFFF use ZIP64 lengths (8 bytes) instead. A good unarchiver
209
+ # will decide to unpack it as such if it finds the Zip64 extra for the file in the central directory.
210
+ # So also use the opportune moment to switch the entry to Zip64 if needed
211
+ requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT)
212
+ pack_spec = requires_zip64 ? C_Qe : C_V
213
+
214
+ io << [compressed_size].pack(pack_spec) # compressed size 4 bytes, or 8 bytes for ZIP64
215
+ io << [uncompressed_size].pack(pack_spec) # uncompressed size 4 bytes, or 8 bytes for ZIP64
216
+ end
217
+
218
+ # Writes the "end of central directory record" (including the Zip6 salient bits if necessary)
219
+ #
220
+ # @param io[#<<] the buffer to write the central directory to.
221
+ # @param start_of_central_directory_location[Fixnum] byte offset of the start of central directory form the beginning of ZIP file
222
+ # @param central_directory_size[Fixnum] the size of the central directory (only file headers) in bytes
223
+ # @param num_files_in_archive[Fixnum] How many files the archive contains
224
+ # @return [void]
225
+ def write_end_of_central_directory(io:, start_of_central_directory_location:, central_directory_size:, num_files_in_archive:)
226
+ zip64_eocdr_offset = start_of_central_directory_location + central_directory_size
227
+
228
+ zip64_required = central_directory_size > FOUR_BYTE_MAX_UINT ||
229
+ start_of_central_directory_location > FOUR_BYTE_MAX_UINT ||
230
+ zip64_eocdr_offset > FOUR_BYTE_MAX_UINT ||
231
+ num_files_in_archive > TWO_BYTE_MAX_UINT
232
+
233
+ # Then, if zip64 is used
234
+ if zip64_required
235
+ # [zip64 end of central directory record]
236
+ # zip64 end of central dir
237
+ io << [0x06064b50].pack(C_V) # signature 4 bytes (0x06064b50)
238
+ io << [44].pack(C_Qe) # size of zip64 end of central
239
+ # directory record 8 bytes
240
+ # (this is ex. the 12 bytes of the signature and the size value itself).
241
+ # Without the extensible data sector (which we are not using)
242
+ # it is always 44 bytes.
243
+ io << MADE_BY_SIGNATURE # version made by 2 bytes
244
+ io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) # version needed to extract 2 bytes
245
+ io << [0].pack(C_V) # number of this disk 4 bytes
246
+ io << [0].pack(C_V) # number of the disk with the
247
+ # start of the central directory 4 bytes
248
+ io << [num_files_in_archive].pack(C_Qe) # total number of entries in the
249
+ # central directory on this disk 8 bytes
250
+ io << [num_files_in_archive].pack(C_Qe) # total number of entries in the
251
+ # central directory 8 bytes
252
+ io << [central_directory_size].pack(C_Qe) # size of the central directory 8 bytes
253
+ # offset of start of central
254
+ # directory with respect to
255
+ io << [start_of_central_directory_location].pack(C_Qe) # the starting disk number 8 bytes
256
+ # zip64 extensible data sector (variable size), blank for us
257
+
258
+ # [zip64 end of central directory locator]
259
+ io << [0x07064b50].pack(C_V) # zip64 end of central dir locator
260
+ # signature 4 bytes (0x07064b50)
261
+ io << [0].pack(C_V) # number of the disk with the
262
+ # start of the zip64 end of
263
+ # central directory 4 bytes
264
+ io << [zip64_eocdr_offset].pack(C_Qe) # relative offset of the zip64
265
+ # end of central directory record 8 bytes
266
+ # (note: "relative" is actually "from the start of the file")
267
+ io << [1].pack(C_V) # total number of disks 4 bytes
268
+ end
269
+
270
+ # Then the end of central directory record:
271
+ io << [0x06054b50].pack(C_V) # end of central dir signature 4 bytes (0x06054b50)
272
+ io << [0].pack(C_v) # number of this disk 2 bytes
273
+ io << [0].pack(C_v) # number of the disk with the
274
+ # start of the central directory 2 bytes
275
+
276
+ if zip64_required # the number of entries will be read from the zip64 part of the central directory
277
+ io << [TWO_BYTE_MAX_UINT].pack(C_v) # total number of entries in the
278
+ # central directory on this disk 2 bytes
279
+ io << [TWO_BYTE_MAX_UINT].pack(C_v) # total number of entries in
280
+ # the central directory 2 bytes
281
+ else
282
+ io << [num_files_in_archive].pack(C_v) # total number of entries in the
283
+ # central directory on this disk 2 bytes
284
+ io << [num_files_in_archive].pack(C_v) # total number of entries in
285
+ # the central directory 2 bytes
286
+ end
287
+
288
+ if zip64_required
289
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # size of the central directory 4 bytes
290
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # offset of start of central
291
+ # directory with respect to
292
+ # the starting disk number 4 bytes
293
+ else
294
+ io << [central_directory_size].pack(C_V) # size of the central directory 4 bytes
295
+ io << [start_of_central_directory_location].pack(C_V) # offset of start of central
296
+ # directory with respect to
297
+ # the starting disk number 4 bytes
298
+ end
299
+ io << [ZIP_TRICKS_COMMENT.bytesize].pack(C_v) # .ZIP file comment length 2 bytes
300
+ io << ZIP_TRICKS_COMMENT # .ZIP file comment (variable size)
301
+ end
302
+
303
+ private
304
+
305
+ # Writes the Zip64 extra field for the local file header. Will be used by `write_local_file_header` when any sizes given to it warrant that.
306
+ #
307
+ # @param io[#<<] the buffer to write the local file header to
308
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
309
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
310
+ # @return [void]
311
+ def write_zip_64_extra_for_local_file_header(io:, compressed_size:, uncompressed_size:)
312
+ io << [0x0001].pack(C_v) # 2 bytes Tag for this "extra" block type
313
+ io << [16].pack(C_v) # 2 bytes Size of this "extra" block. For us it will always be 16 (2x8)
314
+ io << [uncompressed_size].pack(C_Qe) # 8 bytes Original uncompressed file size
315
+ io << [compressed_size].pack(C_Qe) # 8 bytes Size of compressed data
316
+ end
317
+
318
+ # Writes the Zip64 extra field for the central directory header.It differs from the extra used in the local file header because it
319
+ # also contains the location of the local file header in the ZIP as an 8-byte int.
320
+ #
321
+ # @param io[#<<] the buffer to write the local file header to
322
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
323
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
324
+ # @param local_file_header_location[Fixnum] Byte offset of the start of the local file header from the beginning of the ZIP archive
325
+ # @return [void]
326
+ def write_zip_64_extra_for_central_directory_file_header(io:, compressed_size:, uncompressed_size:, local_file_header_location:)
327
+ io << [0x0001].pack(C_v) # 2 bytes Tag for this "extra" block type
328
+ io << [28].pack(C_v) # 2 bytes Size of this "extra" block. For us it will always be 28
329
+ io << [uncompressed_size].pack(C_Qe) # 8 bytes Original uncompressed file size
330
+ io << [compressed_size].pack(C_Qe) # 8 bytes Size of compressed data
331
+ io << [local_file_header_location].pack(C_Qe) # 8 bytes Offset of local header record
332
+ io << [0].pack(C_V) # 4 bytes Number of the disk on which this file starts
333
+ end
334
+
335
+ def bytesize_of
336
+ ''.force_encoding(Encoding::BINARY).tap {|b| yield(b) }.bytesize
337
+ end
338
+
339
+ def to_binary_dos_time(t)
340
+ (t.sec/2) + (t.min << 5) + (t.hour << 11)
341
+ end
342
+
343
+ def to_binary_dos_date(t)
344
+ (t.day) + (t.month << 5) + ((t.year - 1980) << 9)
345
+ end
346
+ end
data/lib/zip_tricks.rb CHANGED
@@ -1,8 +1,5 @@
1
- require 'zip'
2
- require 'very_tiny_state_machine'
3
-
4
1
  module ZipTricks
5
- VERSION = '2.8.1'
2
+ VERSION = '3.0.0'
6
3
 
7
4
  # Require all the sub-components except myself
8
5
  Dir.glob(__dir__ + '/**/*.rb').sort.each {|p| require p unless p == __FILE__ }
data/spec/spec_helper.rb CHANGED
@@ -6,6 +6,7 @@ require 'zip_tricks'
6
6
  require 'digest'
7
7
  require 'fileutils'
8
8
  require 'shellwords'
9
+ require 'zip'
9
10
 
10
11
  module Keepalive
11
12
  # Travis-CI kills the build if it does not receive output on standard out or standard error
@@ -22,7 +23,6 @@ module Keepalive
22
23
  extend self
23
24
  end
24
25
 
25
-
26
26
  class ManagedTempfile < Tempfile
27
27
  @@managed_tempfiles = []
28
28
 
@@ -39,38 +39,6 @@ class ManagedTempfile < Tempfile
39
39
  end
40
40
  end
41
41
 
42
- # A Tempfile filled with N bytes of random data, that also knows the CRC32 of that data
43
- class RandomFile < ManagedTempfile
44
- attr_reader :crc32
45
- RANDOM_MEG = Random.new.bytes(1024 * 1024) # Allocate it once to prevent heap churn
46
- def initialize(size)
47
- super('random-bin')
48
- binmode
49
- crc = ZipTricks::StreamCRC32.new
50
- bytes = size % (1024 * 1024)
51
- megs = size / (1024 * 1024)
52
- megs.times do
53
- Keepalive.still_alive!
54
- self << RANDOM_MEG
55
- crc << RANDOM_MEG
56
- end
57
- random_blob = Random.new.bytes(bytes)
58
- self << random_blob
59
- crc << random_blob
60
- @crc32 = crc.to_i
61
- rewind
62
- end
63
-
64
- def copy_to(io)
65
- rewind
66
- while data = read(10*1024*1024)
67
- io << data
68
- Keepalive.still_alive!
69
- end
70
- rewind
71
- end
72
- end
73
-
74
42
  module ZipInspection
75
43
  def inspect_zip_with_external_tool(path_to_zip)
76
44
  zipinfo_path = 'zipinfo'
@@ -96,11 +64,6 @@ module ZipInspection
96
64
  au_path = '/System/Library/CoreServices/Applications/Archive Utility.app/Contents/MacOS/Archive Utility'
97
65
  open_with_external_app(au_path, path_to_zip, skip_if_missing)
98
66
  end
99
-
100
- def open_zip_with_unarchiver(path_to_zip, skip_if_missing: false)
101
- ua_path = '/Applications/The Unarchiver.app/Contents/MacOS/The Unarchiver'
102
- open_with_external_app(ua_path, path_to_zip, skip_if_missing)
103
- end
104
67
  end
105
68
 
106
69
  RSpec.configure do |config|
@@ -0,0 +1,47 @@
1
+ require 'spec_helper'
2
+
3
+ describe ZipTricks::FileReader do
4
+ it 'reads and uncompresses the file written deflated with data descriptors' do
5
+ zipfile = StringIO.new
6
+ tolstoy = File.read(__dir__ + '/war-and-peace.txt')
7
+ tolstoy.force_encoding(Encoding::BINARY)
8
+
9
+ ZipTricks::Streamer.open(zipfile) do |zip|
10
+ zip.write_deflated_file('war-and-peace.txt') do |sink|
11
+ sink << tolstoy
12
+ end
13
+ end
14
+
15
+ entries = described_class.read_zip_structure(zipfile)
16
+ expect(entries.length).to eq(1)
17
+
18
+ entry = entries.first
19
+
20
+ readback = ''
21
+ reader = entry.extractor_from(zipfile)
22
+ readback << reader.extract(10) until reader.eof?
23
+
24
+ expect(readback.bytesize).to eq(tolstoy.bytesize)
25
+ expect(readback[0..10]).to eq(tolstoy[0..10])
26
+ expect(readback[-10..-1]).to eq(tolstoy[-10..-1])
27
+ end
28
+
29
+ it 'reads the file written stored with data descriptors' do
30
+ zipfile = StringIO.new
31
+ tolstoy = File.read(__dir__ + '/war-and-peace.txt')
32
+ ZipTricks::Streamer.open(zipfile) do |zip|
33
+ zip.write_stored_file('war-and-peace.txt') do |sink|
34
+ sink << tolstoy
35
+ end
36
+ end
37
+
38
+ entries = described_class.read_zip_structure(zipfile)
39
+ expect(entries.length).to eq(1)
40
+
41
+ entry = entries.first
42
+
43
+ readback = entry.extractor_from(zipfile).extract
44
+ expect(readback.bytesize).to eq(tolstoy.bytesize)
45
+ expect(readback[0..10]).to eq(tolstoy[0..10])
46
+ end
47
+ end
@@ -7,7 +7,7 @@ describe ZipTricks::RackBody do
7
7
  file_body = SecureRandom.random_bytes(1024 * 1024 + 8981)
8
8
 
9
9
  body = described_class.new do | zip |
10
- zip.add_stored_entry("A file", file_body.bytesize, Zlib.crc32(file_body))
10
+ zip.add_stored_entry(filename: "A file", size: file_body.bytesize, crc32: Zlib.crc32(file_body))
11
11
  zip << file_body
12
12
  end
13
13
 
@@ -17,7 +17,7 @@ describe ZipTricks::RackBody do
17
17
  body.close
18
18
 
19
19
  output_buf.rewind
20
- expect(output_buf.size).to eq(1057667)
20
+ expect(output_buf.size).to eq(1057696)
21
21
 
22
22
  per_filename = {}
23
23
  Zip::File.open(output_buf.path) do |zip_file|
@@ -38,7 +38,7 @@ describe ZipTricks::RemoteIO do
38
38
  uncap = described_class.new
39
39
  expect {
40
40
  uncap.seek(123, :UNSUPPORTED)
41
- }.to raise_error(Errno::ENOTSUP)
41
+ }.to raise_error(/unsupported/i)
42
42
  end
43
43
  end
44
44
 
@@ -50,18 +50,6 @@ describe ZipTricks::RemoteIO do
50
50
  expect(uncap.seek(10, mode)).to eq(0)
51
51
  end
52
52
  end
53
-
54
- context 'with SEEK_END mode' do
55
- it 'seens to 10 bytes to the end of the IO' do
56
- uncap = described_class.new
57
- expect(uncap).to receive(:request_object_size).and_return(100)
58
-
59
- mode = IO::SEEK_END
60
- offset = -10
61
- expect(uncap.seek(-10, IO::SEEK_END)).to eq(0)
62
- expect(uncap.pos).to eq(90)
63
- end
64
- end
65
53
  end
66
54
 
67
55
  describe '#read' do
@@ -87,10 +75,10 @@ describe ZipTricks::RemoteIO do
87
75
 
88
76
  context 'without arguments' do
89
77
  it 'reads the entire buffer and alters the position pointer' do
90
- expect(@subject.pos).to eq(0)
78
+ expect(@subject.tell).to eq(0)
91
79
  read = @subject.read
92
80
  expect(read.bytesize).to eq(@buf.size)
93
- expect(@subject.pos).to eq(@buf.size)
81
+ expect(@subject.tell).to eq(@buf.size)
94
82
  end
95
83
  end
96
84
 
@@ -105,7 +93,7 @@ describe ZipTricks::RemoteIO do
105
93
 
106
94
  it 'returns exact amount of bytes at the start of the buffer' do
107
95
  bytes_read = @subject.read(10)
108
- expect(@subject.pos).to eq(10)
96
+ expect(@subject.tell).to eq(10)
109
97
  @buf.seek(0)
110
98
  expect(bytes_read).to eq(@buf.read(10))
111
99
  end
@@ -114,7 +102,7 @@ describe ZipTricks::RemoteIO do
114
102
  @subject.seek(456, IO::SEEK_SET)
115
103
 
116
104
  bytes_read = @subject.read(10)
117
- expect(@subject.pos).to eq(456+10)
105
+ expect(@subject.tell).to eq(456+10)
118
106
 
119
107
  @buf.seek(456)
120
108
  expect(bytes_read).to eq(@buf.read(10))
@@ -124,13 +112,13 @@ describe ZipTricks::RemoteIO do
124
112
  at_end = @buf.size - 4
125
113
  @subject.seek(at_end, IO::SEEK_SET)
126
114
 
127
- expect(@subject.pos).to eq(15728636)
115
+ expect(@subject.tell).to eq(15728636)
128
116
  bytes_read = @subject.read(10)
129
- expect(@subject.pos).to eq(@buf.size) # Should have moved the pos pointer to the end
117
+ expect(@subject.tell).to eq(@buf.size) # Should have moved the pos pointer to the end
130
118
 
131
119
  expect(bytes_read.bytesize).to eq(4)
132
120
 
133
- expect(@subject.pos).to eq(@buf.size)
121
+ expect(@subject.tell).to eq(@buf.size)
134
122
  end
135
123
  end
136
124
  end
@@ -21,11 +21,11 @@ describe ZipTricks::RemoteUncap, webmock: true do
21
21
 
22
22
  File.open('temp.zip', 'wb') do |f|
23
23
  ZipTricks::Streamer.open(f) do | zip |
24
- zip.add_stored_entry('first-file.bin', payload1.size, payload1_crc)
24
+ zip.add_stored_entry(filename: 'first-file.bin', size: payload1.size, crc32: payload1_crc)
25
25
  while blob = payload1.read(1024 * 5)
26
26
  zip << blob
27
27
  end
28
- zip.add_stored_entry('second-file.bin', payload2.size, payload2_crc)
28
+ zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
29
29
  while blob = payload2.read(1024 * 5)
30
30
  zip << blob
31
31
  end
@@ -81,9 +81,9 @@ describe ZipTricks::RemoteUncap, webmock: true do
81
81
 
82
82
  File.open('temp.zip', 'wb') do |f|
83
83
  ZipTricks::Streamer.open(f) do | zip |
84
- zip.add_stored_entry('first-file.bin', payload1.size, payload1_crc)
84
+ zip.add_stored_entry(filename: 'first-file.bin', size: payload1.size, crc32: payload1_crc)
85
85
  zip << '' # It is empty, so a read() would return nil
86
- zip.add_stored_entry('second-file.bin', payload2.size, payload2_crc)
86
+ zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
87
87
  while blob = payload2.read(1024 * 5)
88
88
  zip << blob
89
89
  end
@@ -0,0 +1,31 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe ZipTricks::SizeEstimator do
4
+ it 'accurately predicts the output zip size' do
5
+ # Generate a couple of random files
6
+ raw_file_1 = SecureRandom.random_bytes(1024 * 20)
7
+ raw_file_2 = SecureRandom.random_bytes(1024 * 128)
8
+ raw_file_3 = SecureRandom.random_bytes(1258695)
9
+
10
+ predicted_size = described_class.estimate do | estimator |
11
+ r = estimator.add_stored_entry(filename: "first-file.bin", size: raw_file_1.size)
12
+ expect(r).to eq(estimator), "add_stored_entry should return self"
13
+
14
+ estimator.add_stored_entry(filename: "second-file.bin", size: raw_file_2.size)
15
+
16
+ r = estimator.add_compressed_entry(filename: "second-flie.bin", compressed_size: raw_file_3.size,
17
+ uncompressed_size: raw_file_2.size, )
18
+ expect(r).to eq(estimator), "add_compressed_entry should return self"
19
+
20
+ r = estimator.add_stored_entry(filename: "first-file-with-descriptor.bin", size: raw_file_1.size,
21
+ use_data_descriptor: true)
22
+ expect(r).to eq(estimator), "add_stored_entry should return self"
23
+
24
+ r = estimator.add_compressed_entry(filename: "second-file-with-descriptor.bin", compressed_size: raw_file_3.size,
25
+ uncompressed_size: raw_file_2.size, use_data_descriptor: true)
26
+ expect(r).to eq(estimator), "add_compressed_entry should return self"
27
+ end
28
+
29
+ expect(predicted_size).to eq(2690095)
30
+ end
31
+ end