zip_tricks 2.8.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/IMPLEMENTATION_DETAILS.md +2 -10
  4. data/README.md +62 -59
  5. data/examples/archive_size_estimate.rb +4 -4
  6. data/examples/rack_application.rb +3 -5
  7. data/lib/zip_tricks/block_deflate.rb +21 -0
  8. data/lib/zip_tricks/file_reader.rb +491 -0
  9. data/lib/zip_tricks/null_writer.rb +7 -2
  10. data/lib/zip_tricks/rack_body.rb +3 -3
  11. data/lib/zip_tricks/remote_io.rb +30 -20
  12. data/lib/zip_tricks/remote_uncap.rb +10 -10
  13. data/lib/zip_tricks/size_estimator.rb +64 -0
  14. data/lib/zip_tricks/stream_crc32.rb +2 -2
  15. data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
  16. data/lib/zip_tricks/streamer/entry.rb +21 -0
  17. data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
  18. data/lib/zip_tricks/streamer/writable.rb +20 -0
  19. data/lib/zip_tricks/streamer.rb +172 -66
  20. data/lib/zip_tricks/zip_writer.rb +346 -0
  21. data/lib/zip_tricks.rb +1 -4
  22. data/spec/spec_helper.rb +1 -38
  23. data/spec/zip_tricks/file_reader_spec.rb +47 -0
  24. data/spec/zip_tricks/rack_body_spec.rb +2 -2
  25. data/spec/zip_tricks/remote_io_spec.rb +8 -20
  26. data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
  27. data/spec/zip_tricks/size_estimator_spec.rb +31 -0
  28. data/spec/zip_tricks/streamer_spec.rb +59 -36
  29. data/spec/zip_tricks/zip_writer_spec.rb +408 -0
  30. data/zip_tricks.gemspec +20 -14
  31. metadata +33 -16
  32. data/lib/zip_tricks/manifest.rb +0 -85
  33. data/lib/zip_tricks/microzip.rb +0 -339
  34. data/lib/zip_tricks/stored_size_estimator.rb +0 -44
  35. data/spec/zip_tricks/manifest_spec.rb +0 -60
  36. data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
  37. data/spec/zip_tricks/microzip_spec.rb +0 -546
  38. data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22
@@ -0,0 +1,346 @@
1
+ # A low-level ZIP file data writer. You can use it to write out various headers and central directory elements
2
+ # separately. The class handles the actual encoding of the data according to the ZIP format APPNOTE document.
3
+ #
4
+ # The primary reason the writer is a separate object is because it is kept stateless. That is, all the data that
5
+ # is needed for writing a piece of the ZIP (say, the EOCD record, or a data descriptor) can be written
6
+ # without depending on data available elsewhere. This makes the writer very easy to test, since each of
7
+ # it's methods outputs something that only depends on the method's arguments. For example, we use this
8
+ # to test writing Zip64 files which, when tested in a streaming fashion, would need tricky IO stubs
9
+ # to wind IO objects back and forth by large offsets. Instead, we can just write out the EOCD record
10
+ # with given offsets as arguments.
11
+ #
12
+ # Since some methods need a lot of data about the entity being written, everything is passed via
13
+ # keyword arguments - this way it is much less likely that you can make a mistake writing something.
14
+ #
15
+ # Another reason for having a separate Writer is that most ZIP libraries attach the methods for
16
+ # writing out the file headers to some sort of Entry object, which represents a file within the ZIP.
17
+ # However, when you are diagnosing issues with the ZIP files you produce, you actually want to have
18
+ # absolute _most_ of the code responsible for writing the actual encoded bytes available to you on
19
+ # one screen. Altering or checking that code then becomes much, much easier. The methods doing the
20
+ # writing are also intentionally left very verbose - so that you can follow what is happening at
21
+ # all times.
22
+ #
23
+ # All methods of the writer accept anything that responds to `<<` as `io` argument - you can use
24
+ # that to output to String objects, or to output to Arrays that you can later join together.
25
+ class ZipTricks::ZipWriter
26
+ FOUR_BYTE_MAX_UINT = 0xFFFFFFFF
27
+ TWO_BYTE_MAX_UINT = 0xFFFF
28
+ ZIP_TRICKS_COMMENT = 'Written using ZipTricks %s' % ZipTricks::VERSION
29
+ VERSION_MADE_BY = 52
30
+ VERSION_NEEDED_TO_EXTRACT = 20
31
+ VERSION_NEEDED_TO_EXTRACT_ZIP64 = 45
32
+ DEFAULT_EXTERNAL_ATTRS = begin
33
+ # These need to be set so that the unarchived files do not become executable on UNIX, for
34
+ # security purposes. Strictly speaking we would want to make this user-customizable,
35
+ # but for now just putting in sane defaults will do. For example, Trac with zipinfo does this:
36
+ # zipinfo.external_attr = 0644 << 16L # permissions -r-wr--r--.
37
+ # We snatch the incantations from Rubyzip for this.
38
+ unix_perms = 0644
39
+ file_type_file = 010
40
+ external_attrs = (file_type_file << 12 | (unix_perms & 07777)) << 16
41
+ end
42
+ MADE_BY_SIGNATURE = begin
43
+ # A combination of the VERSION_MADE_BY low byte and the OS type high byte
44
+ os_type = 3 # UNIX
45
+ [VERSION_MADE_BY, os_type].pack('CC')
46
+ end
47
+
48
+ C_V = 'V'.freeze # Encode a 4-byte little-endian uint
49
+ C_v = 'v'.freeze # Encode a 2-byte little-endian uint
50
+ C_Qe = 'Q<'.freeze # Encode an 8-byte little-endian uint
51
+
52
+ private_constant :FOUR_BYTE_MAX_UINT, :TWO_BYTE_MAX_UINT,
53
+ :VERSION_MADE_BY, :VERSION_NEEDED_TO_EXTRACT, :VERSION_NEEDED_TO_EXTRACT_ZIP64,
54
+ :DEFAULT_EXTERNAL_ATTRS, :MADE_BY_SIGNATURE,
55
+ :C_V, :C_v, :C_Qe, :ZIP_TRICKS_COMMENT
56
+
57
+ # Writes the local file header, that precedes the actual file _data_.
58
+ #
59
+ # @param io[#<<] the buffer to write the local file header to
60
+ # @param filename[String] the name of the file in the archive
61
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
62
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
63
+ # @param crc32[Fixnum] The CRC32 checksum of the file
64
+ # @param mtime[Time] the modification time to be recorded in the ZIP
65
+ # @param gp_flags[Fixnum] bit-packed general purpose flags
66
+ # @param storage_mode[Fixnum] 8 for deflated, 0 for stored...
67
+ # @return [void]
68
+ def write_local_file_header(io:, filename:, compressed_size:, uncompressed_size:, crc32:, gp_flags:, mtime:, storage_mode:)
69
+ requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT)
70
+
71
+ io << [0x04034b50].pack(C_V) # local file header signature 4 bytes (0x04034b50)
72
+ if requires_zip64 # version needed to extract 2 bytes
73
+ io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v)
74
+ else
75
+ io << [VERSION_NEEDED_TO_EXTRACT].pack(C_v)
76
+ end
77
+
78
+ io << [gp_flags].pack(C_v) # general purpose bit flag 2 bytes
79
+ io << [storage_mode].pack(C_v) # compression method 2 bytes
80
+ io << [to_binary_dos_time(mtime)].pack(C_v) # last mod file time 2 bytes
81
+ io << [to_binary_dos_date(mtime)].pack(C_v) # last mod file date 2 bytes
82
+ io << [crc32].pack(C_V) # crc-32 4 bytes
83
+
84
+ if requires_zip64
85
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # compressed size 4 bytes
86
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # uncompressed size 4 bytes
87
+ else
88
+ io << [compressed_size].pack(C_V) # compressed size 4 bytes
89
+ io << [uncompressed_size].pack(C_V) # uncompressed size 4 bytes
90
+ end
91
+
92
+ # Filename should not be longer than 0xFFFF otherwise this wont fit here
93
+ io << [filename.bytesize].pack(C_v) # file name length 2 bytes
94
+
95
+ extra_size = 0
96
+ if requires_zip64
97
+ extra_size += bytesize_of {|buf| write_zip_64_extra_for_local_file_header(io: buf, compressed_size: 0, uncompressed_size: 0) }
98
+ end
99
+ io << [extra_size].pack(C_v) # extra field length 2 bytes
100
+
101
+ io << filename # file name (variable size)
102
+
103
+ # Interesting tidbit:
104
+ # https://social.technet.microsoft.com/Forums/windows/en-US/6a60399f-2879-4859-b7ab-6ddd08a70948
105
+ # TL;DR of it is: Windows 7 Explorer _will_ open Zip64 entries. However, it desires to have the
106
+ # Zip64 extra field as _the first_ extra field. If we decide to add the Info-ZIP UTF-8 field...
107
+ if requires_zip64
108
+ write_zip_64_extra_for_local_file_header(io: io, compressed_size: compressed_size, uncompressed_size: uncompressed_size)
109
+ end
110
+ end
111
+
112
+ # Writes the file header for the central directory, for a particular file in the archive. When writing out this data,
113
+ # ensure that the CRC32 and both sizes (compressed/uncompressed) are correct for the entry in question.
114
+ #
115
+ # @param io[#<<] the buffer to write the local file header to
116
+ # @param filename[String] the name of the file in the archive
117
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
118
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
119
+ # @param crc32[Fixnum] The CRC32 checksum of the file
120
+ # @param mtime[Time] the modification time to be recorded in the ZIP
121
+ # @param external_attrs[Fixnum] bit-packed external attributes (defaults to UNIX file with 0644 permissions set)
122
+ # @param gp_flags[Fixnum] bit-packed general purpose flags
123
+ # @return [void]
124
+ def write_central_directory_file_header(io:, local_file_header_location:, gp_flags:, storage_mode:, compressed_size:, uncompressed_size:, mtime:, crc32:,
125
+ filename:, external_attrs: DEFAULT_EXTERNAL_ATTRS)
126
+ # At this point if the header begins somewhere beyound 0xFFFFFFFF we _have_ to record the offset
127
+ # of the local file header as a zip64 extra field, so we give up, give in, you loose, love will always win...
128
+ add_zip64 = (local_file_header_location > FOUR_BYTE_MAX_UINT) ||
129
+ (compressed_size > FOUR_BYTE_MAX_UINT) || (uncompressed_size > FOUR_BYTE_MAX_UINT)
130
+
131
+ io << [0x02014b50].pack(C_V) # central file header signature 4 bytes (0x02014b50)
132
+ io << MADE_BY_SIGNATURE # version made by 2 bytes
133
+ if add_zip64
134
+ io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) # version needed to extract 2 bytes
135
+ else
136
+ io << [VERSION_NEEDED_TO_EXTRACT].pack(C_v) # version needed to extract 2 bytes
137
+ end
138
+
139
+ io << [gp_flags].pack(C_v) # general purpose bit flag 2 bytes
140
+ io << [storage_mode].pack(C_v) # compression method 2 bytes
141
+ io << [to_binary_dos_time(mtime)].pack(C_v) # last mod file time 2 bytes
142
+ io << [to_binary_dos_date(mtime)].pack(C_v) # last mod file date 2 bytes
143
+ io << [crc32].pack(C_V) # crc-32 4 bytes
144
+
145
+ if add_zip64
146
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # compressed size 4 bytes
147
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # uncompressed size 4 bytes
148
+ else
149
+ io << [compressed_size].pack(C_V) # compressed size 4 bytes
150
+ io << [uncompressed_size].pack(C_V) # uncompressed size 4 bytes
151
+ end
152
+
153
+ # Filename should not be longer than 0xFFFF otherwise this wont fit here
154
+ io << [filename.bytesize].pack(C_v) # file name length 2 bytes
155
+
156
+ extra_size = 0
157
+ if add_zip64
158
+ extra_size += bytesize_of {|buf|
159
+ # Supply zeroes for most values as we obnly care about the size of the data written
160
+ write_zip_64_extra_for_central_directory_file_header(io: buf, compressed_size: 0, uncompressed_size: 0, local_file_header_location: 0)
161
+ }
162
+ end
163
+ io << [extra_size].pack(C_v) # extra field length 2 bytes
164
+
165
+ io << [0].pack(C_v) # file comment length 2 bytes
166
+
167
+ # For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used
168
+ # because otherwise it does not properly advance the pointer when reading the Zip64 extra field
169
+ # https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff
170
+ if add_zip64 # disk number start 2 bytes
171
+ io << [TWO_BYTE_MAX_UINT].pack(C_v)
172
+ else
173
+ io << [0].pack(C_v)
174
+ end
175
+ io << [0].pack(C_v) # internal file attributes 2 bytes
176
+ io << [DEFAULT_EXTERNAL_ATTRS].pack(C_V) # external file attributes 4 bytes
177
+
178
+ if add_zip64 # relative offset of local header 4 bytes
179
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V)
180
+ else
181
+ io << [local_file_header_location].pack(C_V)
182
+ end
183
+ io << filename # file name (variable size)
184
+
185
+ if add_zip64 # extra field (variable size)
186
+ write_zip_64_extra_for_central_directory_file_header(io: io, local_file_header_location: local_file_header_location,
187
+ compressed_size: compressed_size, uncompressed_size: uncompressed_size)
188
+ end
189
+ #(empty) # file comment (variable size)
190
+ end
191
+
192
+ # Writes the data descriptor following the file data for a file whose local file header
193
+ # was written with general-purpose flag bit 3 set. If the one of the sizes exceeds the Zip64 threshold,
194
+ # the data descriptor will have the sizes written out as 8-byte values instead of 4-byte values.
195
+ #
196
+ # @param io[#<<] the buffer to write the local file header to
197
+ # @param crc32[Fixnum] The CRC32 checksum of the file
198
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
199
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
200
+ # @return [void]
201
+ def write_data_descriptor(io:, compressed_size:, uncompressed_size:, crc32:)
202
+ io << [0x08074b50].pack(C_V) # Although not originally assigned a signature, the value
203
+ # 0x08074b50 has commonly been adopted as a signature value
204
+ # for the data descriptor record.
205
+ io << [crc32].pack(C_V) # crc-32 4 bytes
206
+
207
+
208
+ # If one of the sizes is above 0xFFFFFFF use ZIP64 lengths (8 bytes) instead. A good unarchiver
209
+ # will decide to unpack it as such if it finds the Zip64 extra for the file in the central directory.
210
+ # So also use the opportune moment to switch the entry to Zip64 if needed
211
+ requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT)
212
+ pack_spec = requires_zip64 ? C_Qe : C_V
213
+
214
+ io << [compressed_size].pack(pack_spec) # compressed size 4 bytes, or 8 bytes for ZIP64
215
+ io << [uncompressed_size].pack(pack_spec) # uncompressed size 4 bytes, or 8 bytes for ZIP64
216
+ end
217
+
218
+ # Writes the "end of central directory record" (including the Zip6 salient bits if necessary)
219
+ #
220
+ # @param io[#<<] the buffer to write the central directory to.
221
+ # @param start_of_central_directory_location[Fixnum] byte offset of the start of central directory form the beginning of ZIP file
222
+ # @param central_directory_size[Fixnum] the size of the central directory (only file headers) in bytes
223
+ # @param num_files_in_archive[Fixnum] How many files the archive contains
224
+ # @return [void]
225
+ def write_end_of_central_directory(io:, start_of_central_directory_location:, central_directory_size:, num_files_in_archive:)
226
+ zip64_eocdr_offset = start_of_central_directory_location + central_directory_size
227
+
228
+ zip64_required = central_directory_size > FOUR_BYTE_MAX_UINT ||
229
+ start_of_central_directory_location > FOUR_BYTE_MAX_UINT ||
230
+ zip64_eocdr_offset > FOUR_BYTE_MAX_UINT ||
231
+ num_files_in_archive > TWO_BYTE_MAX_UINT
232
+
233
+ # Then, if zip64 is used
234
+ if zip64_required
235
+ # [zip64 end of central directory record]
236
+ # zip64 end of central dir
237
+ io << [0x06064b50].pack(C_V) # signature 4 bytes (0x06064b50)
238
+ io << [44].pack(C_Qe) # size of zip64 end of central
239
+ # directory record 8 bytes
240
+ # (this is ex. the 12 bytes of the signature and the size value itself).
241
+ # Without the extensible data sector (which we are not using)
242
+ # it is always 44 bytes.
243
+ io << MADE_BY_SIGNATURE # version made by 2 bytes
244
+ io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) # version needed to extract 2 bytes
245
+ io << [0].pack(C_V) # number of this disk 4 bytes
246
+ io << [0].pack(C_V) # number of the disk with the
247
+ # start of the central directory 4 bytes
248
+ io << [num_files_in_archive].pack(C_Qe) # total number of entries in the
249
+ # central directory on this disk 8 bytes
250
+ io << [num_files_in_archive].pack(C_Qe) # total number of entries in the
251
+ # central directory 8 bytes
252
+ io << [central_directory_size].pack(C_Qe) # size of the central directory 8 bytes
253
+ # offset of start of central
254
+ # directory with respect to
255
+ io << [start_of_central_directory_location].pack(C_Qe) # the starting disk number 8 bytes
256
+ # zip64 extensible data sector (variable size), blank for us
257
+
258
+ # [zip64 end of central directory locator]
259
+ io << [0x07064b50].pack(C_V) # zip64 end of central dir locator
260
+ # signature 4 bytes (0x07064b50)
261
+ io << [0].pack(C_V) # number of the disk with the
262
+ # start of the zip64 end of
263
+ # central directory 4 bytes
264
+ io << [zip64_eocdr_offset].pack(C_Qe) # relative offset of the zip64
265
+ # end of central directory record 8 bytes
266
+ # (note: "relative" is actually "from the start of the file")
267
+ io << [1].pack(C_V) # total number of disks 4 bytes
268
+ end
269
+
270
+ # Then the end of central directory record:
271
+ io << [0x06054b50].pack(C_V) # end of central dir signature 4 bytes (0x06054b50)
272
+ io << [0].pack(C_v) # number of this disk 2 bytes
273
+ io << [0].pack(C_v) # number of the disk with the
274
+ # start of the central directory 2 bytes
275
+
276
+ if zip64_required # the number of entries will be read from the zip64 part of the central directory
277
+ io << [TWO_BYTE_MAX_UINT].pack(C_v) # total number of entries in the
278
+ # central directory on this disk 2 bytes
279
+ io << [TWO_BYTE_MAX_UINT].pack(C_v) # total number of entries in
280
+ # the central directory 2 bytes
281
+ else
282
+ io << [num_files_in_archive].pack(C_v) # total number of entries in the
283
+ # central directory on this disk 2 bytes
284
+ io << [num_files_in_archive].pack(C_v) # total number of entries in
285
+ # the central directory 2 bytes
286
+ end
287
+
288
+ if zip64_required
289
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # size of the central directory 4 bytes
290
+ io << [FOUR_BYTE_MAX_UINT].pack(C_V) # offset of start of central
291
+ # directory with respect to
292
+ # the starting disk number 4 bytes
293
+ else
294
+ io << [central_directory_size].pack(C_V) # size of the central directory 4 bytes
295
+ io << [start_of_central_directory_location].pack(C_V) # offset of start of central
296
+ # directory with respect to
297
+ # the starting disk number 4 bytes
298
+ end
299
+ io << [ZIP_TRICKS_COMMENT.bytesize].pack(C_v) # .ZIP file comment length 2 bytes
300
+ io << ZIP_TRICKS_COMMENT # .ZIP file comment (variable size)
301
+ end
302
+
303
+ private
304
+
305
+ # Writes the Zip64 extra field for the local file header. Will be used by `write_local_file_header` when any sizes given to it warrant that.
306
+ #
307
+ # @param io[#<<] the buffer to write the local file header to
308
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
309
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
310
+ # @return [void]
311
+ def write_zip_64_extra_for_local_file_header(io:, compressed_size:, uncompressed_size:)
312
+ io << [0x0001].pack(C_v) # 2 bytes Tag for this "extra" block type
313
+ io << [16].pack(C_v) # 2 bytes Size of this "extra" block. For us it will always be 16 (2x8)
314
+ io << [uncompressed_size].pack(C_Qe) # 8 bytes Original uncompressed file size
315
+ io << [compressed_size].pack(C_Qe) # 8 bytes Size of compressed data
316
+ end
317
+
318
+ # Writes the Zip64 extra field for the central directory header.It differs from the extra used in the local file header because it
319
+ # also contains the location of the local file header in the ZIP as an 8-byte int.
320
+ #
321
+ # @param io[#<<] the buffer to write the local file header to
322
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
323
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
324
+ # @param local_file_header_location[Fixnum] Byte offset of the start of the local file header from the beginning of the ZIP archive
325
+ # @return [void]
326
+ def write_zip_64_extra_for_central_directory_file_header(io:, compressed_size:, uncompressed_size:, local_file_header_location:)
327
+ io << [0x0001].pack(C_v) # 2 bytes Tag for this "extra" block type
328
+ io << [28].pack(C_v) # 2 bytes Size of this "extra" block. For us it will always be 28
329
+ io << [uncompressed_size].pack(C_Qe) # 8 bytes Original uncompressed file size
330
+ io << [compressed_size].pack(C_Qe) # 8 bytes Size of compressed data
331
+ io << [local_file_header_location].pack(C_Qe) # 8 bytes Offset of local header record
332
+ io << [0].pack(C_V) # 4 bytes Number of the disk on which this file starts
333
+ end
334
+
335
+ def bytesize_of
336
+ ''.force_encoding(Encoding::BINARY).tap {|b| yield(b) }.bytesize
337
+ end
338
+
339
+ def to_binary_dos_time(t)
340
+ (t.sec/2) + (t.min << 5) + (t.hour << 11)
341
+ end
342
+
343
+ def to_binary_dos_date(t)
344
+ (t.day) + (t.month << 5) + ((t.year - 1980) << 9)
345
+ end
346
+ end
data/lib/zip_tricks.rb CHANGED
@@ -1,8 +1,5 @@
1
- require 'zip'
2
- require 'very_tiny_state_machine'
3
-
4
1
  module ZipTricks
5
- VERSION = '2.8.1'
2
+ VERSION = '3.0.0'
6
3
 
7
4
  # Require all the sub-components except myself
8
5
  Dir.glob(__dir__ + '/**/*.rb').sort.each {|p| require p unless p == __FILE__ }
data/spec/spec_helper.rb CHANGED
@@ -6,6 +6,7 @@ require 'zip_tricks'
6
6
  require 'digest'
7
7
  require 'fileutils'
8
8
  require 'shellwords'
9
+ require 'zip'
9
10
 
10
11
  module Keepalive
11
12
  # Travis-CI kills the build if it does not receive output on standard out or standard error
@@ -22,7 +23,6 @@ module Keepalive
22
23
  extend self
23
24
  end
24
25
 
25
-
26
26
  class ManagedTempfile < Tempfile
27
27
  @@managed_tempfiles = []
28
28
 
@@ -39,38 +39,6 @@ class ManagedTempfile < Tempfile
39
39
  end
40
40
  end
41
41
 
42
- # A Tempfile filled with N bytes of random data, that also knows the CRC32 of that data
43
- class RandomFile < ManagedTempfile
44
- attr_reader :crc32
45
- RANDOM_MEG = Random.new.bytes(1024 * 1024) # Allocate it once to prevent heap churn
46
- def initialize(size)
47
- super('random-bin')
48
- binmode
49
- crc = ZipTricks::StreamCRC32.new
50
- bytes = size % (1024 * 1024)
51
- megs = size / (1024 * 1024)
52
- megs.times do
53
- Keepalive.still_alive!
54
- self << RANDOM_MEG
55
- crc << RANDOM_MEG
56
- end
57
- random_blob = Random.new.bytes(bytes)
58
- self << random_blob
59
- crc << random_blob
60
- @crc32 = crc.to_i
61
- rewind
62
- end
63
-
64
- def copy_to(io)
65
- rewind
66
- while data = read(10*1024*1024)
67
- io << data
68
- Keepalive.still_alive!
69
- end
70
- rewind
71
- end
72
- end
73
-
74
42
  module ZipInspection
75
43
  def inspect_zip_with_external_tool(path_to_zip)
76
44
  zipinfo_path = 'zipinfo'
@@ -96,11 +64,6 @@ module ZipInspection
96
64
  au_path = '/System/Library/CoreServices/Applications/Archive Utility.app/Contents/MacOS/Archive Utility'
97
65
  open_with_external_app(au_path, path_to_zip, skip_if_missing)
98
66
  end
99
-
100
- def open_zip_with_unarchiver(path_to_zip, skip_if_missing: false)
101
- ua_path = '/Applications/The Unarchiver.app/Contents/MacOS/The Unarchiver'
102
- open_with_external_app(ua_path, path_to_zip, skip_if_missing)
103
- end
104
67
  end
105
68
 
106
69
  RSpec.configure do |config|
@@ -0,0 +1,47 @@
1
+ require 'spec_helper'
2
+
3
+ describe ZipTricks::FileReader do
4
+ it 'reads and uncompresses the file written deflated with data descriptors' do
5
+ zipfile = StringIO.new
6
+ tolstoy = File.read(__dir__ + '/war-and-peace.txt')
7
+ tolstoy.force_encoding(Encoding::BINARY)
8
+
9
+ ZipTricks::Streamer.open(zipfile) do |zip|
10
+ zip.write_deflated_file('war-and-peace.txt') do |sink|
11
+ sink << tolstoy
12
+ end
13
+ end
14
+
15
+ entries = described_class.read_zip_structure(zipfile)
16
+ expect(entries.length).to eq(1)
17
+
18
+ entry = entries.first
19
+
20
+ readback = ''
21
+ reader = entry.extractor_from(zipfile)
22
+ readback << reader.extract(10) until reader.eof?
23
+
24
+ expect(readback.bytesize).to eq(tolstoy.bytesize)
25
+ expect(readback[0..10]).to eq(tolstoy[0..10])
26
+ expect(readback[-10..-1]).to eq(tolstoy[-10..-1])
27
+ end
28
+
29
+ it 'reads the file written stored with data descriptors' do
30
+ zipfile = StringIO.new
31
+ tolstoy = File.read(__dir__ + '/war-and-peace.txt')
32
+ ZipTricks::Streamer.open(zipfile) do |zip|
33
+ zip.write_stored_file('war-and-peace.txt') do |sink|
34
+ sink << tolstoy
35
+ end
36
+ end
37
+
38
+ entries = described_class.read_zip_structure(zipfile)
39
+ expect(entries.length).to eq(1)
40
+
41
+ entry = entries.first
42
+
43
+ readback = entry.extractor_from(zipfile).extract
44
+ expect(readback.bytesize).to eq(tolstoy.bytesize)
45
+ expect(readback[0..10]).to eq(tolstoy[0..10])
46
+ end
47
+ end
@@ -7,7 +7,7 @@ describe ZipTricks::RackBody do
7
7
  file_body = SecureRandom.random_bytes(1024 * 1024 + 8981)
8
8
 
9
9
  body = described_class.new do | zip |
10
- zip.add_stored_entry("A file", file_body.bytesize, Zlib.crc32(file_body))
10
+ zip.add_stored_entry(filename: "A file", size: file_body.bytesize, crc32: Zlib.crc32(file_body))
11
11
  zip << file_body
12
12
  end
13
13
 
@@ -17,7 +17,7 @@ describe ZipTricks::RackBody do
17
17
  body.close
18
18
 
19
19
  output_buf.rewind
20
- expect(output_buf.size).to eq(1057667)
20
+ expect(output_buf.size).to eq(1057696)
21
21
 
22
22
  per_filename = {}
23
23
  Zip::File.open(output_buf.path) do |zip_file|
@@ -38,7 +38,7 @@ describe ZipTricks::RemoteIO do
38
38
  uncap = described_class.new
39
39
  expect {
40
40
  uncap.seek(123, :UNSUPPORTED)
41
- }.to raise_error(Errno::ENOTSUP)
41
+ }.to raise_error(/unsupported/i)
42
42
  end
43
43
  end
44
44
 
@@ -50,18 +50,6 @@ describe ZipTricks::RemoteIO do
50
50
  expect(uncap.seek(10, mode)).to eq(0)
51
51
  end
52
52
  end
53
-
54
- context 'with SEEK_END mode' do
55
- it 'seens to 10 bytes to the end of the IO' do
56
- uncap = described_class.new
57
- expect(uncap).to receive(:request_object_size).and_return(100)
58
-
59
- mode = IO::SEEK_END
60
- offset = -10
61
- expect(uncap.seek(-10, IO::SEEK_END)).to eq(0)
62
- expect(uncap.pos).to eq(90)
63
- end
64
- end
65
53
  end
66
54
 
67
55
  describe '#read' do
@@ -87,10 +75,10 @@ describe ZipTricks::RemoteIO do
87
75
 
88
76
  context 'without arguments' do
89
77
  it 'reads the entire buffer and alters the position pointer' do
90
- expect(@subject.pos).to eq(0)
78
+ expect(@subject.tell).to eq(0)
91
79
  read = @subject.read
92
80
  expect(read.bytesize).to eq(@buf.size)
93
- expect(@subject.pos).to eq(@buf.size)
81
+ expect(@subject.tell).to eq(@buf.size)
94
82
  end
95
83
  end
96
84
 
@@ -105,7 +93,7 @@ describe ZipTricks::RemoteIO do
105
93
 
106
94
  it 'returns exact amount of bytes at the start of the buffer' do
107
95
  bytes_read = @subject.read(10)
108
- expect(@subject.pos).to eq(10)
96
+ expect(@subject.tell).to eq(10)
109
97
  @buf.seek(0)
110
98
  expect(bytes_read).to eq(@buf.read(10))
111
99
  end
@@ -114,7 +102,7 @@ describe ZipTricks::RemoteIO do
114
102
  @subject.seek(456, IO::SEEK_SET)
115
103
 
116
104
  bytes_read = @subject.read(10)
117
- expect(@subject.pos).to eq(456+10)
105
+ expect(@subject.tell).to eq(456+10)
118
106
 
119
107
  @buf.seek(456)
120
108
  expect(bytes_read).to eq(@buf.read(10))
@@ -124,13 +112,13 @@ describe ZipTricks::RemoteIO do
124
112
  at_end = @buf.size - 4
125
113
  @subject.seek(at_end, IO::SEEK_SET)
126
114
 
127
- expect(@subject.pos).to eq(15728636)
115
+ expect(@subject.tell).to eq(15728636)
128
116
  bytes_read = @subject.read(10)
129
- expect(@subject.pos).to eq(@buf.size) # Should have moved the pos pointer to the end
117
+ expect(@subject.tell).to eq(@buf.size) # Should have moved the pos pointer to the end
130
118
 
131
119
  expect(bytes_read.bytesize).to eq(4)
132
120
 
133
- expect(@subject.pos).to eq(@buf.size)
121
+ expect(@subject.tell).to eq(@buf.size)
134
122
  end
135
123
  end
136
124
  end
@@ -21,11 +21,11 @@ describe ZipTricks::RemoteUncap, webmock: true do
21
21
 
22
22
  File.open('temp.zip', 'wb') do |f|
23
23
  ZipTricks::Streamer.open(f) do | zip |
24
- zip.add_stored_entry('first-file.bin', payload1.size, payload1_crc)
24
+ zip.add_stored_entry(filename: 'first-file.bin', size: payload1.size, crc32: payload1_crc)
25
25
  while blob = payload1.read(1024 * 5)
26
26
  zip << blob
27
27
  end
28
- zip.add_stored_entry('second-file.bin', payload2.size, payload2_crc)
28
+ zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
29
29
  while blob = payload2.read(1024 * 5)
30
30
  zip << blob
31
31
  end
@@ -81,9 +81,9 @@ describe ZipTricks::RemoteUncap, webmock: true do
81
81
 
82
82
  File.open('temp.zip', 'wb') do |f|
83
83
  ZipTricks::Streamer.open(f) do | zip |
84
- zip.add_stored_entry('first-file.bin', payload1.size, payload1_crc)
84
+ zip.add_stored_entry(filename: 'first-file.bin', size: payload1.size, crc32: payload1_crc)
85
85
  zip << '' # It is empty, so a read() would return nil
86
- zip.add_stored_entry('second-file.bin', payload2.size, payload2_crc)
86
+ zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
87
87
  while blob = payload2.read(1024 * 5)
88
88
  zip << blob
89
89
  end
@@ -0,0 +1,31 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe ZipTricks::SizeEstimator do
4
+ it 'accurately predicts the output zip size' do
5
+ # Generate a couple of random files
6
+ raw_file_1 = SecureRandom.random_bytes(1024 * 20)
7
+ raw_file_2 = SecureRandom.random_bytes(1024 * 128)
8
+ raw_file_3 = SecureRandom.random_bytes(1258695)
9
+
10
+ predicted_size = described_class.estimate do | estimator |
11
+ r = estimator.add_stored_entry(filename: "first-file.bin", size: raw_file_1.size)
12
+ expect(r).to eq(estimator), "add_stored_entry should return self"
13
+
14
+ estimator.add_stored_entry(filename: "second-file.bin", size: raw_file_2.size)
15
+
16
+ r = estimator.add_compressed_entry(filename: "second-flie.bin", compressed_size: raw_file_3.size,
17
+ uncompressed_size: raw_file_2.size, )
18
+ expect(r).to eq(estimator), "add_compressed_entry should return self"
19
+
20
+ r = estimator.add_stored_entry(filename: "first-file-with-descriptor.bin", size: raw_file_1.size,
21
+ use_data_descriptor: true)
22
+ expect(r).to eq(estimator), "add_stored_entry should return self"
23
+
24
+ r = estimator.add_compressed_entry(filename: "second-file-with-descriptor.bin", compressed_size: raw_file_3.size,
25
+ uncompressed_size: raw_file_2.size, use_data_descriptor: true)
26
+ expect(r).to eq(estimator), "add_compressed_entry should return self"
27
+ end
28
+
29
+ expect(predicted_size).to eq(2690095)
30
+ end
31
+ end