zip_kit 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +7 -0
  3. data/.document +5 -0
  4. data/.github/workflows/ci.yml +29 -0
  5. data/.gitignore +61 -0
  6. data/.rspec +1 -0
  7. data/.standard.yml +8 -0
  8. data/.yardopts +1 -0
  9. data/CHANGELOG.md +255 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +153 -0
  12. data/Gemfile +4 -0
  13. data/IMPLEMENTATION_DETAILS.md +97 -0
  14. data/LICENSE.txt +20 -0
  15. data/README.md +234 -0
  16. data/Rakefile +21 -0
  17. data/bench/buffered_crc32_bench.rb +109 -0
  18. data/examples/archive_size_estimate.rb +15 -0
  19. data/examples/config.ru +7 -0
  20. data/examples/deferred_write.rb +58 -0
  21. data/examples/parallel_compression_with_block_deflate.rb +86 -0
  22. data/examples/rack_application.rb +63 -0
  23. data/examples/s3_upload.rb +23 -0
  24. data/lib/zip_kit/block_deflate.rb +130 -0
  25. data/lib/zip_kit/block_write.rb +47 -0
  26. data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
  27. data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
  28. data/lib/zip_kit/file_reader.rb +740 -0
  29. data/lib/zip_kit/null_writer.rb +12 -0
  30. data/lib/zip_kit/output_enumerator.rb +150 -0
  31. data/lib/zip_kit/path_set.rb +163 -0
  32. data/lib/zip_kit/rack_chunked_body.rb +32 -0
  33. data/lib/zip_kit/rack_tempfile_body.rb +61 -0
  34. data/lib/zip_kit/rails_streaming.rb +37 -0
  35. data/lib/zip_kit/remote_io.rb +114 -0
  36. data/lib/zip_kit/remote_uncap.rb +22 -0
  37. data/lib/zip_kit/size_estimator.rb +84 -0
  38. data/lib/zip_kit/stream_crc32.rb +60 -0
  39. data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
  40. data/lib/zip_kit/streamer/entry.rb +37 -0
  41. data/lib/zip_kit/streamer/filler.rb +9 -0
  42. data/lib/zip_kit/streamer/heuristic.rb +68 -0
  43. data/lib/zip_kit/streamer/stored_writer.rb +39 -0
  44. data/lib/zip_kit/streamer/writable.rb +36 -0
  45. data/lib/zip_kit/streamer.rb +614 -0
  46. data/lib/zip_kit/uniquify_filename.rb +39 -0
  47. data/lib/zip_kit/version.rb +5 -0
  48. data/lib/zip_kit/write_and_tell.rb +40 -0
  49. data/lib/zip_kit/write_buffer.rb +71 -0
  50. data/lib/zip_kit/write_shovel.rb +22 -0
  51. data/lib/zip_kit/zip_writer.rb +436 -0
  52. data/lib/zip_kit.rb +24 -0
  53. data/zip_kit.gemspec +41 -0
  54. metadata +335 -0
@@ -0,0 +1,614 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ # Is used to write streamed ZIP archives into the provided IO-ish object.
6
+ # The output IO is never going to be rewound or seeked, so the output
7
+ # of this object can be coupled directly to, say, a Rack output. The
8
+ # output can also be a String, Array or anything that responds to `<<`.
9
+ #
10
+ # Allows for splicing raw files (for "stored" entries without compression)
11
+ # and splicing of deflated files (for "deflated" storage mode).
12
+ #
13
+ # For stored entries, you need to know the CRC32 (as a uint) and the filesize upfront,
14
+ # before the writing of the entry body starts.
15
+ #
16
+ # Any object that responds to `<<` can be used as the Streamer target - you can use
17
+ # a String, an Array, a Socket or a File, at your leisure.
18
+ #
19
+ # ## Using the Streamer with runtime compression
20
+ #
21
+ # You can use the Streamer with data descriptors (the CRC32 and the sizes will be
22
+ # written after the file data). This allows non-rewinding on-the-fly compression.
23
+ # The streamer will pick the optimum compression method ("stored" or "deflated")
24
+ # depending on the nature of the byte stream you send into it (by using a small buffer).
25
+ # If you are compressing large files, the Deflater object that the Streamer controls
26
+ # will be regularly flushed to prevent memory inflation.
27
+ #
28
+ # ZipKit::Streamer.open(file_socket_or_string) do |zip|
29
+ # zip.write_file('mov.mp4') do |sink|
30
+ # File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
31
+ # end
32
+ # zip.write_file('long-novel.txt') do |sink|
33
+ # File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
34
+ # end
35
+ # end
36
+ #
37
+ # The central directory will be written automatically at the end of the block.
38
+ #
39
+ # ## Using the Streamer with entries of known size and having a known CRC32 checksum
40
+ #
41
+ # Streamer allows "IO splicing" - in this mode it will only control the metadata output,
42
+ # but you can write the data to the socket/file outside of the Streamer. For example, when
43
+ # using the sendfile gem:
44
+ #
45
+ # ZipKit::Streamer.open(socket) do | zip |
46
+ # zip.add_stored_entry(filename: "myfile1.bin", size: 9090821, crc32: 12485)
47
+ # socket.sendfile(tempfile1)
48
+ # zip.simulate_write(tempfile1.size)
49
+ #
50
+ # zip.add_stored_entry(filename: "myfile2.bin", size: 458678, crc32: 89568)
51
+ # socket.sendfile(tempfile2)
52
+ # zip.simulate_write(tempfile2.size)
53
+ # end
54
+ #
55
+ # Note that you need to use `simulate_write` in this case. This needs to happen since Streamer
56
+ # writes absolute offsets into the ZIP (local file header offsets and the like),
57
+ # and it relies on the output object to tell it how many bytes have been written
58
+ # so far. When using `sendfile` the Ruby write methods get bypassed entirely, and the
59
+ # offsets in the IO will not be updated - which will result in an invalid ZIP.
60
+ #
61
+ #
62
+ # ## On-the-fly deflate -using the Streamer with async/suspended writes and data descriptors
63
+ #
64
+ # If you are unable to use the block versions of `write_deflated_file` and `write_stored_file`
65
+ # there is an option to use a separate writer object. It gets returned from `write_deflated_file`
66
+ # and `write_stored_file` if you do not provide them with a block, and will accept data writes.
67
+ # Do note that you _must_ call `#close` on that object yourself:
68
+ #
69
+ # ZipKit::Streamer.open(socket) do | zip |
70
+ # w = zip.write_stored_file('mov.mp4')
71
+ # IO.copy_stream(source_io, w)
72
+ # w.close
73
+ # end
74
+ #
75
+ # The central directory will be written automatically at the end of the `open` block. If you need
76
+ # to manage the Streamer manually, or defer the central directory write until appropriate, use
77
+ # the constructor instead and call `Streamer#close`:
78
+ #
79
+ # zip = ZipKit::Streamer.new(out_io)
80
+ # .....
81
+ # zip.close
82
+ #
83
+ # Calling {Streamer#close} **will not** call `#close` on the underlying IO object.
84
+ class ZipKit::Streamer
85
+ autoload :DeflatedWriter, File.dirname(__FILE__) + "/streamer/deflated_writer.rb"
86
+ autoload :Writable, File.dirname(__FILE__) + "/streamer/writable.rb"
87
+ autoload :StoredWriter, File.dirname(__FILE__) + "/streamer/stored_writer.rb"
88
+ autoload :Entry, File.dirname(__FILE__) + "/streamer/entry.rb"
89
+ autoload :Filler, File.dirname(__FILE__) + "/streamer/filler.rb"
90
+ autoload :Heuristic, File.dirname(__FILE__) + "/streamer/heuristic.rb"
91
+
92
+ include ZipKit::WriteShovel
93
+
94
+ STORED = 0
95
+ DEFLATED = 8
96
+
97
+ EntryBodySizeMismatch = Class.new(StandardError)
98
+ InvalidOutput = Class.new(ArgumentError)
99
+ Overflow = Class.new(StandardError)
100
+ UnknownMode = Class.new(StandardError)
101
+ OffsetOutOfSync = Class.new(StandardError)
102
+
103
+ private_constant :STORED, :DEFLATED
104
+
105
+ # Creates a new Streamer on top of the given IO-ish object and yields it. Once the given block
106
+ # returns, the Streamer will have it's `close` method called, which will write out the central
107
+ # directory of the archive to the output.
108
+ #
109
+ # @param stream [IO] the destination IO for the ZIP (should respond to `tell` and `<<`)
110
+ # @param kwargs_for_new [Hash] keyword arguments for #initialize
111
+ # @yield [Streamer] the streamer that can be written to
112
+ def self.open(stream, **kwargs_for_new)
113
+ archive = new(stream, **kwargs_for_new)
114
+ yield(archive)
115
+ archive.close
116
+ end
117
+
118
+ # Creates a new Streamer on top of the given IO-ish object.
119
+ #
120
+ # @param writable[#<<] the destination IO for the ZIP. Anything that responds to `<<` can be used.
121
+ # @param writer[ZipKit::ZipWriter] the object to be used as the writer.
122
+ # Defaults to an instance of ZipKit::ZipWriter, normally you won't need to override it
123
+ # @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered,
124
+ # should be suffixed with (1), (2) etc. Default value is `false` - if
125
+ # dupliate names are used an exception will be raised
126
+ def initialize(writable, writer: create_writer, auto_rename_duplicate_filenames: false)
127
+ raise InvalidOutput, "The writable must respond to #<< or #write" unless writable.respond_to?(:<<) || writable.respond_to?(:write)
128
+
129
+ @out = ZipKit::WriteAndTell.new(writable)
130
+ @files = []
131
+ @path_set = ZipKit::PathSet.new
132
+ @writer = writer
133
+ @dedupe_filenames = auto_rename_duplicate_filenames
134
+ end
135
+
136
+ # Writes a part of a zip entry body (actual binary data of the entry) into the output stream.
137
+ #
138
+ # @param binary_data [String] a String in binary encoding
139
+ # @return self
140
+ def <<(binary_data)
141
+ @out << binary_data
142
+ self
143
+ end
144
+
145
+ # Advances the internal IO pointer to keep the offsets of the ZIP file in
146
+ # check. Use this if you are going to use accelerated writes to the socket
147
+ # (like the `sendfile()` call) after writing the headers, or if you
148
+ # just need to figure out the size of the archive.
149
+ #
150
+ # @param num_bytes [Integer] how many bytes are going to be written bypassing the Streamer
151
+ # @return [Integer] position in the output stream / ZIP archive
152
+ def simulate_write(num_bytes)
153
+ @out.advance_position_by(num_bytes)
154
+ @out.tell
155
+ end
156
+
157
+ # Writes out the local header for an entry (file in the ZIP) that is using
158
+ # the deflated storage model (is compressed). Once this method is called,
159
+ # the `<<` method has to be called to write the actual contents of the body.
160
+ #
161
+ # Note that the deflated body that is going to be written into the output
162
+ # has to be _precompressed_ (pre-deflated) before writing it into the
163
+ # Streamer, because otherwise it is impossible to know it's size upfront.
164
+ #
165
+ # @param filename [String] the name of the file in the entry
166
+ # @param modification_time [Time] the modification time of the file in the archive
167
+ # @param compressed_size [Integer] the size of the compressed entry that
168
+ # is going to be written into the archive
169
+ # @param uncompressed_size [Integer] the size of the entry when uncompressed, in bytes
170
+ # @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
171
+ # @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor
172
+ # @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
173
+ # @return [Integer] the offset the output IO is at after writing the entry header
174
+ def add_deflated_entry(filename:, modification_time: Time.now.utc, compressed_size: 0, uncompressed_size: 0, crc32: 0, unix_permissions: nil, use_data_descriptor: false)
175
+ add_file_and_write_local_header(filename: filename,
176
+ modification_time: modification_time,
177
+ crc32: crc32,
178
+ storage_mode: DEFLATED,
179
+ compressed_size: compressed_size,
180
+ uncompressed_size: uncompressed_size,
181
+ unix_permissions: unix_permissions,
182
+ use_data_descriptor: use_data_descriptor)
183
+ @out.tell
184
+ end
185
+
186
+ # Writes out the local header for an entry (file in the ZIP) that is using
187
+ # the stored storage model (is stored as-is).
188
+ # Once this method is called, the `<<` method has to be called one or more
189
+ # times to write the actual contents of the body.
190
+ #
191
+ # @param filename [String] the name of the file in the entry
192
+ # @param modification_time [Time] the modification time of the file in the archive
193
+ # @param size [Integer] the size of the file when uncompressed, in bytes
194
+ # @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
195
+ # @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor. When in use
196
+ # @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
197
+ # @return [Integer] the offset the output IO is at after writing the entry header
198
+ def add_stored_entry(filename:, modification_time: Time.now.utc, size: 0, crc32: 0, unix_permissions: nil, use_data_descriptor: false)
199
+ add_file_and_write_local_header(filename: filename,
200
+ modification_time: modification_time,
201
+ crc32: crc32,
202
+ storage_mode: STORED,
203
+ compressed_size: size,
204
+ uncompressed_size: size,
205
+ unix_permissions: unix_permissions,
206
+ use_data_descriptor: use_data_descriptor)
207
+ @out.tell
208
+ end
209
+
210
+ # Adds an empty directory to the archive with a size of 0 and permissions of 755.
211
+ #
212
+ # @param dirname [String] the name of the directory in the archive
213
+ # @param modification_time [Time] the modification time of the directory in the archive
214
+ # @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
215
+ # @return [Integer] the offset the output IO is at after writing the entry header
216
+ def add_empty_directory(dirname:, modification_time: Time.now.utc, unix_permissions: nil)
217
+ add_file_and_write_local_header(filename: dirname.to_s + "/",
218
+ modification_time: modification_time,
219
+ crc32: 0,
220
+ storage_mode: STORED,
221
+ compressed_size: 0,
222
+ uncompressed_size: 0,
223
+ unix_permissions: unix_permissions,
224
+ use_data_descriptor: false)
225
+ @out.tell
226
+ end
227
+
228
+ # Opens the stream for a file stored in the archive, and yields a writer
229
+ # for that file to the block.
230
+ # The writer will buffer a small amount of data and see whether compression is
231
+ # effective for the data being output. If compression turns out to work well -
232
+ # for instance, if the output is mostly text - it is going to create a deflated
233
+ # file inside the zip. If the compression benefits are negligible, it will
234
+ # create a stored file inside the zip. It will delegate either to `write_deflated_file`
235
+ # or to `write_stored_file`.
236
+ #
237
+ # Using a block, the write will be terminated with a data descriptor outright.
238
+ #
239
+ # zip.write_file("foo.txt") do |sink|
240
+ # IO.copy_stream(source_file, sink)
241
+ # end
242
+ #
243
+ # If deferred writes are desired (for example - to integrate with an API that
244
+ # does not support blocks, or to work with non-blocking environments) the method
245
+ # has to be called without a block. In that case it returns the sink instead,
246
+ # permitting to write to it in a deferred fashion. When `close` is called on
247
+ # the sink, any remanining compression output will be flushed and the data
248
+ # descriptor is going to be written.
249
+ #
250
+ # Note that even though it does not have to happen within the same call stack,
251
+ # call sequencing still must be observed. It is therefore not possible to do
252
+ # this:
253
+ #
254
+ # writer_for_file1 = zip.write_file("somefile.jpg")
255
+ # writer_for_file2 = zip.write_file("another.tif")
256
+ # writer_for_file1 << data
257
+ # writer_for_file2 << data
258
+ #
259
+ # because it is likely to result in an invalid ZIP file structure later on.
260
+ # So using this facility in async scenarios is certainly possible, but care
261
+ # and attention is recommended.
262
+ #
263
+ # @param filename[String] the name of the file in the archive
264
+ # @param modification_time [Time] the modification time of the file in the archive
265
+ # @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
266
+ # @yield
267
+ # sink[#<<, #write]
268
+ # an object that the file contents must be written to.
269
+ # Do not call `#close` on it - Streamer will do it for you. Write in chunks to achieve proper streaming
270
+ # output (using `IO.copy_stream` is a good approach).
271
+ # @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
272
+ def write_file(filename, modification_time: Time.now.utc, unix_permissions: nil, &blk)
273
+ writable = ZipKit::Streamer::Heuristic.new(self, filename, modification_time: modification_time, unix_permissions: unix_permissions)
274
+ yield_or_return_writable(writable, &blk)
275
+ end
276
+
277
+ # Opens the stream for a stored file in the archive, and yields a writer
278
+ # for that file to the block.
279
+ # Once the write completes, a data descriptor will be written with the
280
+ # actual compressed/uncompressed sizes and the CRC32 checksum.
281
+ #
282
+ # Using a block, the write will be terminated with a data descriptor outright.
283
+ #
284
+ # zip.write_stored_file("foo.txt") do |sink|
285
+ # IO.copy_stream(source_file, sink)
286
+ # end
287
+ #
288
+ # If deferred writes are desired (for example - to integrate with an API that
289
+ # does not support blocks, or to work with non-blocking environments) the method
290
+ # has to be called without a block. In that case it returns the sink instead,
291
+ # permitting to write to it in a deferred fashion. When `close` is called on
292
+ # the sink, any remanining compression output will be flushed and the data
293
+ # descriptor is going to be written.
294
+ #
295
+ # Note that even though it does not have to happen within the same call stack,
296
+ # call sequencing still must be observed. It is therefore not possible to do
297
+ # this:
298
+ #
299
+ # writer_for_file1 = zip.write_stored_file("somefile.jpg")
300
+ # writer_for_file2 = zip.write_stored_file("another.tif")
301
+ # writer_for_file1 << data
302
+ # writer_for_file2 << data
303
+ #
304
+ # because it is likely to result in an invalid ZIP file structure later on.
305
+ # So using this facility in async scenarios is certainly possible, but care
306
+ # and attention is recommended.
307
+ #
308
+ # If an exception is raised inside the block that is passed to the method, a `rollback!` call
309
+ # will be performed automatically and the entry just written will be omitted from the ZIP
310
+ # central directory. This can be useful if you want to rescue the exception and reattempt
311
+ # adding the ZIP file. Note that you will need to call `write_deflated_file` again to start a
312
+ # new file - you can't keep writing to the one that failed.
313
+ #
314
+ # @param filename[String] the name of the file in the archive
315
+ # @param modification_time [Time] the modification time of the file in the archive
316
+ # @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
317
+ # @yield
318
+ # sink[#<<, #write]
319
+ # an object that the file contents must be written to.
320
+ # Do not call `#close` on it - Streamer will do it for you. Write in chunks to achieve proper streaming
321
+ # output (using `IO.copy_stream` is a good approach).
322
+ # @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
323
+ def write_stored_file(filename, modification_time: Time.now.utc, unix_permissions: nil, &blk)
324
+ add_stored_entry(filename: filename,
325
+ modification_time: modification_time,
326
+ use_data_descriptor: true,
327
+ crc32: 0,
328
+ size: 0,
329
+ unix_permissions: unix_permissions)
330
+
331
+ writable = Writable.new(self, StoredWriter.new(@out))
332
+ yield_or_return_writable(writable, &blk)
333
+ end
334
+
335
+ # Opens the stream for a deflated file in the archive, and yields a writer
336
+ # for that file to the block. Once the write completes, a data descriptor
337
+ # will be written with the actual compressed/uncompressed sizes and the
338
+ # CRC32 checksum.
339
+ #
340
+ # Using a block, the write will be terminated with a data descriptor outright.
341
+ #
342
+ # zip.write_stored_file("foo.txt") do |sink|
343
+ # IO.copy_stream(source_file, sink)
344
+ # end
345
+ #
346
+ # If deferred writes are desired (for example - to integrate with an API that
347
+ # does not support blocks, or to work with non-blocking environments) the method
348
+ # has to be called without a block. In that case it returns the sink instead,
349
+ # permitting to write to it in a deferred fashion. When `close` is called on
350
+ # the sink, any remanining compression output will be flushed and the data
351
+ # descriptor is going to be written.
352
+ #
353
+ # Note that even though it does not have to happen within the same call stack,
354
+ # call sequencing still must be observed. It is therefore not possible to do
355
+ # this:
356
+ #
357
+ # writer_for_file1 = zip.write_deflated_file("somefile.jpg")
358
+ # writer_for_file2 = zip.write_deflated_file("another.tif")
359
+ # writer_for_file1 << data
360
+ # writer_for_file2 << data
361
+ # writer_for_file1.close
362
+ # writer_for_file2.close
363
+ #
364
+ # because it is likely to result in an invalid ZIP file structure later on.
365
+ # So using this facility in async scenarios is certainly possible, but care
366
+ # and attention is recommended.
367
+ #
368
+ # If an exception is raised inside the block that is passed to the method, a `rollback!` call
369
+ # will be performed automatically and the entry just written will be omitted from the ZIP
370
+ # central directory. This can be useful if you want to rescue the exception and reattempt
371
+ # adding the ZIP file. Note that you will need to call `write_deflated_file` again to start a
372
+ # new file - you can't keep writing to the one that failed.
373
+ #
374
+ # @param filename[String] the name of the file in the archive
375
+ # @param modification_time [Time] the modification time of the file in the archive
376
+ # @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
377
+ # @yield
378
+ # sink[#<<, #write]
379
+ # an object that the file contents must be written to.
380
+ # Do not call `#close` on it - Streamer will do it for you. Write in chunks to achieve proper streaming
381
+ # output (using `IO.copy_stream` is a good approach).
382
+ # @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
383
+ def write_deflated_file(filename, modification_time: Time.now.utc, unix_permissions: nil, &blk)
384
+ add_deflated_entry(filename: filename,
385
+ modification_time: modification_time,
386
+ use_data_descriptor: true,
387
+ crc32: 0,
388
+ compressed_size: 0,
389
+ uncompressed_size: 0,
390
+ unix_permissions: unix_permissions)
391
+
392
+ writable = Writable.new(self, DeflatedWriter.new(@out))
393
+ yield_or_return_writable(writable, &blk)
394
+ end
395
+
396
+ # Closes the archive. Writes the central directory, and switches the writer into
397
+ # a state where it can no longer be written to.
398
+ #
399
+ # Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
400
+ #
401
+ # @return [Integer] the offset the output IO is at after closing the archive
402
+ def close
403
+ # Make sure offsets are in order
404
+ verify_offsets!
405
+
406
+ # Record the central directory offset, so that it can be written into the EOCD record
407
+ cdir_starts_at = @out.tell
408
+
409
+ # Write out the central directory entries, one for each file
410
+ @files.each do |entry|
411
+ # Skip fillers which are standing in for broken/incomplete files
412
+ next if entry.filler?
413
+
414
+ @writer.write_central_directory_file_header(io: @out,
415
+ local_file_header_location: entry.local_header_offset,
416
+ gp_flags: entry.gp_flags,
417
+ storage_mode: entry.storage_mode,
418
+ compressed_size: entry.compressed_size,
419
+ uncompressed_size: entry.uncompressed_size,
420
+ mtime: entry.mtime,
421
+ crc32: entry.crc32,
422
+ filename: entry.filename,
423
+ unix_permissions: entry.unix_permissions)
424
+ end
425
+
426
+ # Record the central directory size, for the EOCDR
427
+ cdir_size = @out.tell - cdir_starts_at
428
+
429
+ # Write out the EOCDR
430
+ @writer.write_end_of_central_directory(io: @out,
431
+ start_of_central_directory_location: cdir_starts_at,
432
+ central_directory_size: cdir_size,
433
+ num_files_in_archive: @files.length)
434
+
435
+ # Clear the files so that GC will not have to trace all the way to here to deallocate them
436
+ @files.clear
437
+ @path_set.clear
438
+
439
+ # and return the final offset
440
+ @out.tell
441
+ end
442
+
443
+ # Sets up the ZipWriter with wrappers if necessary. The method is called once, when the Streamer
444
+ # gets instantiated - the Writer then gets reused. This method is primarily there so that you
445
+ # can override it.
446
+ #
447
+ # @return [ZipKit::ZipWriter] the writer to perform writes with
448
+ def create_writer
449
+ ZipKit::ZipWriter.new
450
+ end
451
+
452
+ # Updates the last entry written with the CRC32 checksum and compressed/uncompressed
453
+ # sizes. For stored entries, `compressed_size` and `uncompressed_size` are the same.
454
+ # After updating the entry will immediately write the data descriptor bytes
455
+ # to the output.
456
+ #
457
+ # @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
458
+ # @param compressed_size [Integer] the size of the compressed segment within the ZIP
459
+ # @param uncompressed_size [Integer] the size of the entry once uncompressed
460
+ # @return [Integer] the offset the output IO is at after writing the data descriptor
461
+ def update_last_entry_and_write_data_descriptor(crc32:, compressed_size:, uncompressed_size:)
462
+ # Save the information into the entry for when the time comes to write
463
+ # out the central directory
464
+ last_entry = @files.fetch(-1)
465
+ last_entry.crc32 = crc32
466
+ last_entry.compressed_size = compressed_size
467
+ last_entry.uncompressed_size = uncompressed_size
468
+
469
+ offset_before_data_descriptor = @out.tell
470
+ @writer.write_data_descriptor(io: @out,
471
+ crc32: last_entry.crc32,
472
+ compressed_size: last_entry.compressed_size,
473
+ uncompressed_size: last_entry.uncompressed_size)
474
+ last_entry.bytes_used_for_data_descriptor = @out.tell - offset_before_data_descriptor
475
+
476
+ @out.tell
477
+ end
478
+
479
+ # Removes the buffered local entry for the last file written. This can be used when rescuing from exceptions
480
+ # when you want to skip the file that failed writing into the ZIP from getting written out into the
481
+ # ZIP central directory. This is useful when, for example, you encounter errors retrieving the file
482
+ # that you want to place inside the ZIP from a remote storage location and some network exception
483
+ # gets raised. `write_deflated_file` and `write_stored_file` will rollback for you automatically.
484
+ # Of course it is not possible to remove the failed entry from the ZIP file entirely, as the data
485
+ # is likely already on the wire. However, excluding the entry from the central directory of the ZIP
486
+ # file will allow better-behaved ZIP unarchivers to extract the entries which did store correctly,
487
+ # provided they read the ZIP from the central directory and not straight-ahead.
488
+ #
489
+ # @example
490
+ # zip.add_stored_entry(filename: "data.bin", size: 4.megabytes, crc32: the_crc)
491
+ # while chunk = remote.read(65*2048)
492
+ # zip << chunk
493
+ # rescue Timeout::Error
494
+ # zip.rollback!
495
+ # # and proceed to the next file
496
+ # end
497
+ # @return [Integer] position in the output stream / ZIP archive
498
+ def rollback!
499
+ removed_entry = @files.pop
500
+ return @out.tell unless removed_entry
501
+
502
+ @path_set.clear
503
+ @files.each do |e|
504
+ @path_set.add_directory_or_file_path(e.filename) unless e.filler?
505
+ end
506
+ @files << Filler.new(@out.tell - removed_entry.local_header_offset)
507
+
508
+ @out.tell
509
+ end
510
+
511
+ private
512
+
513
+ def yield_or_return_writable(writable, &block_to_pass_writable_to)
514
+ if block_to_pass_writable_to
515
+ begin
516
+ yield(writable)
517
+ writable.close
518
+ rescue
519
+ writable.close
520
+ rollback!
521
+ raise
522
+ end
523
+ end
524
+
525
+ writable
526
+ end
527
+
528
+ def verify_offsets!
529
+ # We need to check whether the offsets noted for the entries actually make sense
530
+ computed_offset = @files.map(&:total_bytes_used).inject(0, &:+)
531
+ actual_offset = @out.tell
532
+ if computed_offset != actual_offset
533
+ message = <<~EMS
534
+ The offset of the Streamer output IO is out of sync with the expected value. All entries written so far,
535
+ including their compressed bodies, local headers and data descriptors, add up to a certain offset,
536
+ but this offset does not match the actual offset of the IO.
537
+
538
+ Entries add up to #{computed_offset} bytes and the IO is at #{actual_offset} bytes.
539
+
540
+ This can happen if you write local headers for an entry, write the "body" of the entry directly to the IO
541
+ object which is your destination, but do not adjust the offset known to the Streamer object. To adjust
542
+ the offfset you need to call `Streamer#simulate_write(body_size)` after outputting the entry. Otherwise
543
+ the local header offsets of the entries you write are going to be incorrect and some ZIP applications
544
+ are going to have problems opening your archive.
545
+ EMS
546
+ raise OffsetOutOfSync, message
547
+ end
548
+ end
549
+
550
+ def add_file_and_write_local_header(
551
+ filename:,
552
+ modification_time:,
553
+ crc32:,
554
+ storage_mode:,
555
+ compressed_size:,
556
+ uncompressed_size:,
557
+ use_data_descriptor:,
558
+ unix_permissions:
559
+ )
560
+
561
+ # Clean backslashes
562
+ filename = remove_backslash(filename)
563
+ raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
564
+ raise Overflow, "Filename is too long" if filename.bytesize > 0xFFFF
565
+
566
+ # If we need to massage filenames to enforce uniqueness,
567
+ # do so before we check for file/directory conflicts
568
+ filename = ZipKit::UniquifyFilename.call(filename, @path_set) if @dedupe_filenames
569
+
570
+ # Make sure there is no file/directory clobbering (conflicts), or - if deduping is disabled -
571
+ # no duplicate filenames/paths
572
+ if filename.end_with?("/")
573
+ @path_set.add_directory_path(filename)
574
+ else
575
+ @path_set.add_file_path(filename)
576
+ end
577
+
578
+ if use_data_descriptor
579
+ crc32 = 0
580
+ compressed_size = 0
581
+ uncompressed_size = 0
582
+ end
583
+
584
+ local_header_starts_at = @out.tell
585
+
586
+ e = Entry.new(filename,
587
+ crc32,
588
+ compressed_size,
589
+ uncompressed_size,
590
+ storage_mode,
591
+ modification_time,
592
+ use_data_descriptor,
593
+ _local_file_header_offset = local_header_starts_at,
594
+ _bytes_used_for_local_header = 0,
595
+ _bytes_used_for_data_descriptor = 0,
596
+ unix_permissions)
597
+
598
+ @writer.write_local_file_header(io: @out,
599
+ gp_flags: e.gp_flags,
600
+ crc32: e.crc32,
601
+ compressed_size: e.compressed_size,
602
+ uncompressed_size: e.uncompressed_size,
603
+ mtime: e.mtime,
604
+ filename: e.filename,
605
+ storage_mode: e.storage_mode)
606
+ e.bytes_used_for_local_header = @out.tell - e.local_header_offset
607
+
608
+ @files << e
609
+ end
610
+
611
+ def remove_backslash(filename)
612
+ filename.tr("\\", "_")
613
+ end
614
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ZipKit::UniquifyFilename
4
+ # Makes a given filename unique by appending a (n) suffix
5
+ # between just before the filename extension. So "file.txt" gets
6
+ # transformed into "file (1).txt". The transformation is applied
7
+ # repeatedly as long as the generated filename is present
8
+ # in `while_included_in` object
9
+ #
10
+ # @param path[String] the path to make unique
11
+ # @param while_included_in[#include?] an object that stores the list of already used paths
12
+ # @return [String] the path as is, or with the suffix required to make it unique
13
+ def self.call(path, while_included_in)
14
+ return path unless while_included_in.include?(path)
15
+
16
+ # we add (1), (2), (n) at the end of a filename before the filename extension,
17
+ # but only if there is a duplicate
18
+ copy_pattern = /\((\d+)\)$/
19
+ parts = path.split(".")
20
+ ext = if parts.last =~ /gz|zip/ && parts.size > 2
21
+ parts.pop(2)
22
+ elsif parts.size > 1
23
+ parts.pop
24
+ end
25
+ fn_last_part = parts.pop
26
+
27
+ duplicate_counter = 1
28
+ loop do
29
+ fn_last_part = if fn_last_part&.match?(copy_pattern)
30
+ fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
31
+ else
32
+ "#{fn_last_part} (#{duplicate_counter})"
33
+ end
34
+ new_path = (parts + [fn_last_part, ext]).compact.join(".")
35
+ return new_path unless while_included_in.include?(new_path)
36
+ duplicate_counter += 1
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ZipKit
4
+ VERSION = "6.0.0"
5
+ end