zip_kit 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.codeclimate.yml +7 -0
- data/.document +5 -0
- data/.github/workflows/ci.yml +29 -0
- data/.gitignore +61 -0
- data/.rspec +1 -0
- data/.standard.yml +8 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +255 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/CONTRIBUTING.md +153 -0
- data/Gemfile +4 -0
- data/IMPLEMENTATION_DETAILS.md +97 -0
- data/LICENSE.txt +20 -0
- data/README.md +234 -0
- data/Rakefile +21 -0
- data/bench/buffered_crc32_bench.rb +109 -0
- data/examples/archive_size_estimate.rb +15 -0
- data/examples/config.ru +7 -0
- data/examples/deferred_write.rb +58 -0
- data/examples/parallel_compression_with_block_deflate.rb +86 -0
- data/examples/rack_application.rb +63 -0
- data/examples/s3_upload.rb +23 -0
- data/lib/zip_kit/block_deflate.rb +130 -0
- data/lib/zip_kit/block_write.rb +47 -0
- data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
- data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
- data/lib/zip_kit/file_reader.rb +740 -0
- data/lib/zip_kit/null_writer.rb +12 -0
- data/lib/zip_kit/output_enumerator.rb +150 -0
- data/lib/zip_kit/path_set.rb +163 -0
- data/lib/zip_kit/rack_chunked_body.rb +32 -0
- data/lib/zip_kit/rack_tempfile_body.rb +61 -0
- data/lib/zip_kit/rails_streaming.rb +37 -0
- data/lib/zip_kit/remote_io.rb +114 -0
- data/lib/zip_kit/remote_uncap.rb +22 -0
- data/lib/zip_kit/size_estimator.rb +84 -0
- data/lib/zip_kit/stream_crc32.rb +60 -0
- data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
- data/lib/zip_kit/streamer/entry.rb +37 -0
- data/lib/zip_kit/streamer/filler.rb +9 -0
- data/lib/zip_kit/streamer/heuristic.rb +68 -0
- data/lib/zip_kit/streamer/stored_writer.rb +39 -0
- data/lib/zip_kit/streamer/writable.rb +36 -0
- data/lib/zip_kit/streamer.rb +614 -0
- data/lib/zip_kit/uniquify_filename.rb +39 -0
- data/lib/zip_kit/version.rb +5 -0
- data/lib/zip_kit/write_and_tell.rb +40 -0
- data/lib/zip_kit/write_buffer.rb +71 -0
- data/lib/zip_kit/write_shovel.rb +22 -0
- data/lib/zip_kit/zip_writer.rb +436 -0
- data/lib/zip_kit.rb +24 -0
- data/zip_kit.gemspec +41 -0
- metadata +335 -0
@@ -0,0 +1,614 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "set"
|
4
|
+
|
5
|
+
# Is used to write streamed ZIP archives into the provided IO-ish object.
|
6
|
+
# The output IO is never going to be rewound or seeked, so the output
|
7
|
+
# of this object can be coupled directly to, say, a Rack output. The
|
8
|
+
# output can also be a String, Array or anything that responds to `<<`.
|
9
|
+
#
|
10
|
+
# Allows for splicing raw files (for "stored" entries without compression)
|
11
|
+
# and splicing of deflated files (for "deflated" storage mode).
|
12
|
+
#
|
13
|
+
# For stored entries, you need to know the CRC32 (as a uint) and the filesize upfront,
|
14
|
+
# before the writing of the entry body starts.
|
15
|
+
#
|
16
|
+
# Any object that responds to `<<` can be used as the Streamer target - you can use
|
17
|
+
# a String, an Array, a Socket or a File, at your leisure.
|
18
|
+
#
|
19
|
+
# ## Using the Streamer with runtime compression
|
20
|
+
#
|
21
|
+
# You can use the Streamer with data descriptors (the CRC32 and the sizes will be
|
22
|
+
# written after the file data). This allows non-rewinding on-the-fly compression.
|
23
|
+
# The streamer will pick the optimum compression method ("stored" or "deflated")
|
24
|
+
# depending on the nature of the byte stream you send into it (by using a small buffer).
|
25
|
+
# If you are compressing large files, the Deflater object that the Streamer controls
|
26
|
+
# will be regularly flushed to prevent memory inflation.
|
27
|
+
#
|
28
|
+
# ZipKit::Streamer.open(file_socket_or_string) do |zip|
|
29
|
+
# zip.write_file('mov.mp4') do |sink|
|
30
|
+
# File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
|
31
|
+
# end
|
32
|
+
# zip.write_file('long-novel.txt') do |sink|
|
33
|
+
# File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
|
34
|
+
# end
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# The central directory will be written automatically at the end of the block.
|
38
|
+
#
|
39
|
+
# ## Using the Streamer with entries of known size and having a known CRC32 checksum
|
40
|
+
#
|
41
|
+
# Streamer allows "IO splicing" - in this mode it will only control the metadata output,
|
42
|
+
# but you can write the data to the socket/file outside of the Streamer. For example, when
|
43
|
+
# using the sendfile gem:
|
44
|
+
#
|
45
|
+
# ZipKit::Streamer.open(socket) do | zip |
|
46
|
+
# zip.add_stored_entry(filename: "myfile1.bin", size: 9090821, crc32: 12485)
|
47
|
+
# socket.sendfile(tempfile1)
|
48
|
+
# zip.simulate_write(tempfile1.size)
|
49
|
+
#
|
50
|
+
# zip.add_stored_entry(filename: "myfile2.bin", size: 458678, crc32: 89568)
|
51
|
+
# socket.sendfile(tempfile2)
|
52
|
+
# zip.simulate_write(tempfile2.size)
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# Note that you need to use `simulate_write` in this case. This needs to happen since Streamer
|
56
|
+
# writes absolute offsets into the ZIP (local file header offsets and the like),
|
57
|
+
# and it relies on the output object to tell it how many bytes have been written
|
58
|
+
# so far. When using `sendfile` the Ruby write methods get bypassed entirely, and the
|
59
|
+
# offsets in the IO will not be updated - which will result in an invalid ZIP.
|
60
|
+
#
|
61
|
+
#
|
62
|
+
# ## On-the-fly deflate -using the Streamer with async/suspended writes and data descriptors
|
63
|
+
#
|
64
|
+
# If you are unable to use the block versions of `write_deflated_file` and `write_stored_file`
|
65
|
+
# there is an option to use a separate writer object. It gets returned from `write_deflated_file`
|
66
|
+
# and `write_stored_file` if you do not provide them with a block, and will accept data writes.
|
67
|
+
# Do note that you _must_ call `#close` on that object yourself:
|
68
|
+
#
|
69
|
+
# ZipKit::Streamer.open(socket) do | zip |
|
70
|
+
# w = zip.write_stored_file('mov.mp4')
|
71
|
+
# IO.copy_stream(source_io, w)
|
72
|
+
# w.close
|
73
|
+
# end
|
74
|
+
#
|
75
|
+
# The central directory will be written automatically at the end of the `open` block. If you need
|
76
|
+
# to manage the Streamer manually, or defer the central directory write until appropriate, use
|
77
|
+
# the constructor instead and call `Streamer#close`:
|
78
|
+
#
|
79
|
+
# zip = ZipKit::Streamer.new(out_io)
|
80
|
+
# .....
|
81
|
+
# zip.close
|
82
|
+
#
|
83
|
+
# Calling {Streamer#close} **will not** call `#close` on the underlying IO object.
|
84
|
+
class ZipKit::Streamer
|
85
|
+
autoload :DeflatedWriter, File.dirname(__FILE__) + "/streamer/deflated_writer.rb"
|
86
|
+
autoload :Writable, File.dirname(__FILE__) + "/streamer/writable.rb"
|
87
|
+
autoload :StoredWriter, File.dirname(__FILE__) + "/streamer/stored_writer.rb"
|
88
|
+
autoload :Entry, File.dirname(__FILE__) + "/streamer/entry.rb"
|
89
|
+
autoload :Filler, File.dirname(__FILE__) + "/streamer/filler.rb"
|
90
|
+
autoload :Heuristic, File.dirname(__FILE__) + "/streamer/heuristic.rb"
|
91
|
+
|
92
|
+
include ZipKit::WriteShovel
|
93
|
+
|
94
|
+
STORED = 0
|
95
|
+
DEFLATED = 8
|
96
|
+
|
97
|
+
EntryBodySizeMismatch = Class.new(StandardError)
|
98
|
+
InvalidOutput = Class.new(ArgumentError)
|
99
|
+
Overflow = Class.new(StandardError)
|
100
|
+
UnknownMode = Class.new(StandardError)
|
101
|
+
OffsetOutOfSync = Class.new(StandardError)
|
102
|
+
|
103
|
+
private_constant :STORED, :DEFLATED
|
104
|
+
|
105
|
+
# Creates a new Streamer on top of the given IO-ish object and yields it. Once the given block
|
106
|
+
# returns, the Streamer will have it's `close` method called, which will write out the central
|
107
|
+
# directory of the archive to the output.
|
108
|
+
#
|
109
|
+
# @param stream [IO] the destination IO for the ZIP (should respond to `tell` and `<<`)
|
110
|
+
# @param kwargs_for_new [Hash] keyword arguments for #initialize
|
111
|
+
# @yield [Streamer] the streamer that can be written to
|
112
|
+
def self.open(stream, **kwargs_for_new)
|
113
|
+
archive = new(stream, **kwargs_for_new)
|
114
|
+
yield(archive)
|
115
|
+
archive.close
|
116
|
+
end
|
117
|
+
|
118
|
+
# Creates a new Streamer on top of the given IO-ish object.
|
119
|
+
#
|
120
|
+
# @param writable[#<<] the destination IO for the ZIP. Anything that responds to `<<` can be used.
|
121
|
+
# @param writer[ZipKit::ZipWriter] the object to be used as the writer.
|
122
|
+
# Defaults to an instance of ZipKit::ZipWriter, normally you won't need to override it
|
123
|
+
# @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered,
|
124
|
+
# should be suffixed with (1), (2) etc. Default value is `false` - if
|
125
|
+
# dupliate names are used an exception will be raised
|
126
|
+
def initialize(writable, writer: create_writer, auto_rename_duplicate_filenames: false)
|
127
|
+
raise InvalidOutput, "The writable must respond to #<< or #write" unless writable.respond_to?(:<<) || writable.respond_to?(:write)
|
128
|
+
|
129
|
+
@out = ZipKit::WriteAndTell.new(writable)
|
130
|
+
@files = []
|
131
|
+
@path_set = ZipKit::PathSet.new
|
132
|
+
@writer = writer
|
133
|
+
@dedupe_filenames = auto_rename_duplicate_filenames
|
134
|
+
end
|
135
|
+
|
136
|
+
# Writes a part of a zip entry body (actual binary data of the entry) into the output stream.
|
137
|
+
#
|
138
|
+
# @param binary_data [String] a String in binary encoding
|
139
|
+
# @return self
|
140
|
+
def <<(binary_data)
|
141
|
+
@out << binary_data
|
142
|
+
self
|
143
|
+
end
|
144
|
+
|
145
|
+
# Advances the internal IO pointer to keep the offsets of the ZIP file in
|
146
|
+
# check. Use this if you are going to use accelerated writes to the socket
|
147
|
+
# (like the `sendfile()` call) after writing the headers, or if you
|
148
|
+
# just need to figure out the size of the archive.
|
149
|
+
#
|
150
|
+
# @param num_bytes [Integer] how many bytes are going to be written bypassing the Streamer
|
151
|
+
# @return [Integer] position in the output stream / ZIP archive
|
152
|
+
def simulate_write(num_bytes)
|
153
|
+
@out.advance_position_by(num_bytes)
|
154
|
+
@out.tell
|
155
|
+
end
|
156
|
+
|
157
|
+
# Writes out the local header for an entry (file in the ZIP) that is using
|
158
|
+
# the deflated storage model (is compressed). Once this method is called,
|
159
|
+
# the `<<` method has to be called to write the actual contents of the body.
|
160
|
+
#
|
161
|
+
# Note that the deflated body that is going to be written into the output
|
162
|
+
# has to be _precompressed_ (pre-deflated) before writing it into the
|
163
|
+
# Streamer, because otherwise it is impossible to know it's size upfront.
|
164
|
+
#
|
165
|
+
# @param filename [String] the name of the file in the entry
|
166
|
+
# @param modification_time [Time] the modification time of the file in the archive
|
167
|
+
# @param compressed_size [Integer] the size of the compressed entry that
|
168
|
+
# is going to be written into the archive
|
169
|
+
# @param uncompressed_size [Integer] the size of the entry when uncompressed, in bytes
|
170
|
+
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
171
|
+
# @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor
|
172
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
173
|
+
# @return [Integer] the offset the output IO is at after writing the entry header
|
174
|
+
def add_deflated_entry(filename:, modification_time: Time.now.utc, compressed_size: 0, uncompressed_size: 0, crc32: 0, unix_permissions: nil, use_data_descriptor: false)
|
175
|
+
add_file_and_write_local_header(filename: filename,
|
176
|
+
modification_time: modification_time,
|
177
|
+
crc32: crc32,
|
178
|
+
storage_mode: DEFLATED,
|
179
|
+
compressed_size: compressed_size,
|
180
|
+
uncompressed_size: uncompressed_size,
|
181
|
+
unix_permissions: unix_permissions,
|
182
|
+
use_data_descriptor: use_data_descriptor)
|
183
|
+
@out.tell
|
184
|
+
end
|
185
|
+
|
186
|
+
# Writes out the local header for an entry (file in the ZIP) that is using
|
187
|
+
# the stored storage model (is stored as-is).
|
188
|
+
# Once this method is called, the `<<` method has to be called one or more
|
189
|
+
# times to write the actual contents of the body.
|
190
|
+
#
|
191
|
+
# @param filename [String] the name of the file in the entry
|
192
|
+
# @param modification_time [Time] the modification time of the file in the archive
|
193
|
+
# @param size [Integer] the size of the file when uncompressed, in bytes
|
194
|
+
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
195
|
+
# @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor. When in use
|
196
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
197
|
+
# @return [Integer] the offset the output IO is at after writing the entry header
|
198
|
+
def add_stored_entry(filename:, modification_time: Time.now.utc, size: 0, crc32: 0, unix_permissions: nil, use_data_descriptor: false)
|
199
|
+
add_file_and_write_local_header(filename: filename,
|
200
|
+
modification_time: modification_time,
|
201
|
+
crc32: crc32,
|
202
|
+
storage_mode: STORED,
|
203
|
+
compressed_size: size,
|
204
|
+
uncompressed_size: size,
|
205
|
+
unix_permissions: unix_permissions,
|
206
|
+
use_data_descriptor: use_data_descriptor)
|
207
|
+
@out.tell
|
208
|
+
end
|
209
|
+
|
210
|
+
# Adds an empty directory to the archive with a size of 0 and permissions of 755.
|
211
|
+
#
|
212
|
+
# @param dirname [String] the name of the directory in the archive
|
213
|
+
# @param modification_time [Time] the modification time of the directory in the archive
|
214
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
215
|
+
# @return [Integer] the offset the output IO is at after writing the entry header
|
216
|
+
def add_empty_directory(dirname:, modification_time: Time.now.utc, unix_permissions: nil)
|
217
|
+
add_file_and_write_local_header(filename: dirname.to_s + "/",
|
218
|
+
modification_time: modification_time,
|
219
|
+
crc32: 0,
|
220
|
+
storage_mode: STORED,
|
221
|
+
compressed_size: 0,
|
222
|
+
uncompressed_size: 0,
|
223
|
+
unix_permissions: unix_permissions,
|
224
|
+
use_data_descriptor: false)
|
225
|
+
@out.tell
|
226
|
+
end
|
227
|
+
|
228
|
+
# Opens the stream for a file stored in the archive, and yields a writer
|
229
|
+
# for that file to the block.
|
230
|
+
# The writer will buffer a small amount of data and see whether compression is
|
231
|
+
# effective for the data being output. If compression turns out to work well -
|
232
|
+
# for instance, if the output is mostly text - it is going to create a deflated
|
233
|
+
# file inside the zip. If the compression benefits are negligible, it will
|
234
|
+
# create a stored file inside the zip. It will delegate either to `write_deflated_file`
|
235
|
+
# or to `write_stored_file`.
|
236
|
+
#
|
237
|
+
# Using a block, the write will be terminated with a data descriptor outright.
|
238
|
+
#
|
239
|
+
# zip.write_file("foo.txt") do |sink|
|
240
|
+
# IO.copy_stream(source_file, sink)
|
241
|
+
# end
|
242
|
+
#
|
243
|
+
# If deferred writes are desired (for example - to integrate with an API that
|
244
|
+
# does not support blocks, or to work with non-blocking environments) the method
|
245
|
+
# has to be called without a block. In that case it returns the sink instead,
|
246
|
+
# permitting to write to it in a deferred fashion. When `close` is called on
|
247
|
+
# the sink, any remanining compression output will be flushed and the data
|
248
|
+
# descriptor is going to be written.
|
249
|
+
#
|
250
|
+
# Note that even though it does not have to happen within the same call stack,
|
251
|
+
# call sequencing still must be observed. It is therefore not possible to do
|
252
|
+
# this:
|
253
|
+
#
|
254
|
+
# writer_for_file1 = zip.write_file("somefile.jpg")
|
255
|
+
# writer_for_file2 = zip.write_file("another.tif")
|
256
|
+
# writer_for_file1 << data
|
257
|
+
# writer_for_file2 << data
|
258
|
+
#
|
259
|
+
# because it is likely to result in an invalid ZIP file structure later on.
|
260
|
+
# So using this facility in async scenarios is certainly possible, but care
|
261
|
+
# and attention is recommended.
|
262
|
+
#
|
263
|
+
# @param filename[String] the name of the file in the archive
|
264
|
+
# @param modification_time [Time] the modification time of the file in the archive
|
265
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
266
|
+
# @yield
|
267
|
+
# sink[#<<, #write]
|
268
|
+
# an object that the file contents must be written to.
|
269
|
+
# Do not call `#close` on it - Streamer will do it for you. Write in chunks to achieve proper streaming
|
270
|
+
# output (using `IO.copy_stream` is a good approach).
|
271
|
+
# @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
|
272
|
+
def write_file(filename, modification_time: Time.now.utc, unix_permissions: nil, &blk)
|
273
|
+
writable = ZipKit::Streamer::Heuristic.new(self, filename, modification_time: modification_time, unix_permissions: unix_permissions)
|
274
|
+
yield_or_return_writable(writable, &blk)
|
275
|
+
end
|
276
|
+
|
277
|
+
# Opens the stream for a stored file in the archive, and yields a writer
|
278
|
+
# for that file to the block.
|
279
|
+
# Once the write completes, a data descriptor will be written with the
|
280
|
+
# actual compressed/uncompressed sizes and the CRC32 checksum.
|
281
|
+
#
|
282
|
+
# Using a block, the write will be terminated with a data descriptor outright.
|
283
|
+
#
|
284
|
+
# zip.write_stored_file("foo.txt") do |sink|
|
285
|
+
# IO.copy_stream(source_file, sink)
|
286
|
+
# end
|
287
|
+
#
|
288
|
+
# If deferred writes are desired (for example - to integrate with an API that
|
289
|
+
# does not support blocks, or to work with non-blocking environments) the method
|
290
|
+
# has to be called without a block. In that case it returns the sink instead,
|
291
|
+
# permitting to write to it in a deferred fashion. When `close` is called on
|
292
|
+
# the sink, any remanining compression output will be flushed and the data
|
293
|
+
# descriptor is going to be written.
|
294
|
+
#
|
295
|
+
# Note that even though it does not have to happen within the same call stack,
|
296
|
+
# call sequencing still must be observed. It is therefore not possible to do
|
297
|
+
# this:
|
298
|
+
#
|
299
|
+
# writer_for_file1 = zip.write_stored_file("somefile.jpg")
|
300
|
+
# writer_for_file2 = zip.write_stored_file("another.tif")
|
301
|
+
# writer_for_file1 << data
|
302
|
+
# writer_for_file2 << data
|
303
|
+
#
|
304
|
+
# because it is likely to result in an invalid ZIP file structure later on.
|
305
|
+
# So using this facility in async scenarios is certainly possible, but care
|
306
|
+
# and attention is recommended.
|
307
|
+
#
|
308
|
+
# If an exception is raised inside the block that is passed to the method, a `rollback!` call
|
309
|
+
# will be performed automatically and the entry just written will be omitted from the ZIP
|
310
|
+
# central directory. This can be useful if you want to rescue the exception and reattempt
|
311
|
+
# adding the ZIP file. Note that you will need to call `write_deflated_file` again to start a
|
312
|
+
# new file - you can't keep writing to the one that failed.
|
313
|
+
#
|
314
|
+
# @param filename[String] the name of the file in the archive
|
315
|
+
# @param modification_time [Time] the modification time of the file in the archive
|
316
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
317
|
+
# @yield
|
318
|
+
# sink[#<<, #write]
|
319
|
+
# an object that the file contents must be written to.
|
320
|
+
# Do not call `#close` on it - Streamer will do it for you. Write in chunks to achieve proper streaming
|
321
|
+
# output (using `IO.copy_stream` is a good approach).
|
322
|
+
# @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
|
323
|
+
def write_stored_file(filename, modification_time: Time.now.utc, unix_permissions: nil, &blk)
|
324
|
+
add_stored_entry(filename: filename,
|
325
|
+
modification_time: modification_time,
|
326
|
+
use_data_descriptor: true,
|
327
|
+
crc32: 0,
|
328
|
+
size: 0,
|
329
|
+
unix_permissions: unix_permissions)
|
330
|
+
|
331
|
+
writable = Writable.new(self, StoredWriter.new(@out))
|
332
|
+
yield_or_return_writable(writable, &blk)
|
333
|
+
end
|
334
|
+
|
335
|
+
# Opens the stream for a deflated file in the archive, and yields a writer
|
336
|
+
# for that file to the block. Once the write completes, a data descriptor
|
337
|
+
# will be written with the actual compressed/uncompressed sizes and the
|
338
|
+
# CRC32 checksum.
|
339
|
+
#
|
340
|
+
# Using a block, the write will be terminated with a data descriptor outright.
|
341
|
+
#
|
342
|
+
# zip.write_stored_file("foo.txt") do |sink|
|
343
|
+
# IO.copy_stream(source_file, sink)
|
344
|
+
# end
|
345
|
+
#
|
346
|
+
# If deferred writes are desired (for example - to integrate with an API that
|
347
|
+
# does not support blocks, or to work with non-blocking environments) the method
|
348
|
+
# has to be called without a block. In that case it returns the sink instead,
|
349
|
+
# permitting to write to it in a deferred fashion. When `close` is called on
|
350
|
+
# the sink, any remanining compression output will be flushed and the data
|
351
|
+
# descriptor is going to be written.
|
352
|
+
#
|
353
|
+
# Note that even though it does not have to happen within the same call stack,
|
354
|
+
# call sequencing still must be observed. It is therefore not possible to do
|
355
|
+
# this:
|
356
|
+
#
|
357
|
+
# writer_for_file1 = zip.write_deflated_file("somefile.jpg")
|
358
|
+
# writer_for_file2 = zip.write_deflated_file("another.tif")
|
359
|
+
# writer_for_file1 << data
|
360
|
+
# writer_for_file2 << data
|
361
|
+
# writer_for_file1.close
|
362
|
+
# writer_for_file2.close
|
363
|
+
#
|
364
|
+
# because it is likely to result in an invalid ZIP file structure later on.
|
365
|
+
# So using this facility in async scenarios is certainly possible, but care
|
366
|
+
# and attention is recommended.
|
367
|
+
#
|
368
|
+
# If an exception is raised inside the block that is passed to the method, a `rollback!` call
|
369
|
+
# will be performed automatically and the entry just written will be omitted from the ZIP
|
370
|
+
# central directory. This can be useful if you want to rescue the exception and reattempt
|
371
|
+
# adding the ZIP file. Note that you will need to call `write_deflated_file` again to start a
|
372
|
+
# new file - you can't keep writing to the one that failed.
|
373
|
+
#
|
374
|
+
# @param filename[String] the name of the file in the archive
|
375
|
+
# @param modification_time [Time] the modification time of the file in the archive
|
376
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
377
|
+
# @yield
|
378
|
+
# sink[#<<, #write]
|
379
|
+
# an object that the file contents must be written to.
|
380
|
+
# Do not call `#close` on it - Streamer will do it for you. Write in chunks to achieve proper streaming
|
381
|
+
# output (using `IO.copy_stream` is a good approach).
|
382
|
+
# @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
|
383
|
+
def write_deflated_file(filename, modification_time: Time.now.utc, unix_permissions: nil, &blk)
|
384
|
+
add_deflated_entry(filename: filename,
|
385
|
+
modification_time: modification_time,
|
386
|
+
use_data_descriptor: true,
|
387
|
+
crc32: 0,
|
388
|
+
compressed_size: 0,
|
389
|
+
uncompressed_size: 0,
|
390
|
+
unix_permissions: unix_permissions)
|
391
|
+
|
392
|
+
writable = Writable.new(self, DeflatedWriter.new(@out))
|
393
|
+
yield_or_return_writable(writable, &blk)
|
394
|
+
end
|
395
|
+
|
396
|
+
# Closes the archive. Writes the central directory, and switches the writer into
|
397
|
+
# a state where it can no longer be written to.
|
398
|
+
#
|
399
|
+
# Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
|
400
|
+
#
|
401
|
+
# @return [Integer] the offset the output IO is at after closing the archive
|
402
|
+
def close
|
403
|
+
# Make sure offsets are in order
|
404
|
+
verify_offsets!
|
405
|
+
|
406
|
+
# Record the central directory offset, so that it can be written into the EOCD record
|
407
|
+
cdir_starts_at = @out.tell
|
408
|
+
|
409
|
+
# Write out the central directory entries, one for each file
|
410
|
+
@files.each do |entry|
|
411
|
+
# Skip fillers which are standing in for broken/incomplete files
|
412
|
+
next if entry.filler?
|
413
|
+
|
414
|
+
@writer.write_central_directory_file_header(io: @out,
|
415
|
+
local_file_header_location: entry.local_header_offset,
|
416
|
+
gp_flags: entry.gp_flags,
|
417
|
+
storage_mode: entry.storage_mode,
|
418
|
+
compressed_size: entry.compressed_size,
|
419
|
+
uncompressed_size: entry.uncompressed_size,
|
420
|
+
mtime: entry.mtime,
|
421
|
+
crc32: entry.crc32,
|
422
|
+
filename: entry.filename,
|
423
|
+
unix_permissions: entry.unix_permissions)
|
424
|
+
end
|
425
|
+
|
426
|
+
# Record the central directory size, for the EOCDR
|
427
|
+
cdir_size = @out.tell - cdir_starts_at
|
428
|
+
|
429
|
+
# Write out the EOCDR
|
430
|
+
@writer.write_end_of_central_directory(io: @out,
|
431
|
+
start_of_central_directory_location: cdir_starts_at,
|
432
|
+
central_directory_size: cdir_size,
|
433
|
+
num_files_in_archive: @files.length)
|
434
|
+
|
435
|
+
# Clear the files so that GC will not have to trace all the way to here to deallocate them
|
436
|
+
@files.clear
|
437
|
+
@path_set.clear
|
438
|
+
|
439
|
+
# and return the final offset
|
440
|
+
@out.tell
|
441
|
+
end
|
442
|
+
|
443
|
+
# Sets up the ZipWriter with wrappers if necessary. The method is called once, when the Streamer
|
444
|
+
# gets instantiated - the Writer then gets reused. This method is primarily there so that you
|
445
|
+
# can override it.
|
446
|
+
#
|
447
|
+
# @return [ZipKit::ZipWriter] the writer to perform writes with
|
448
|
+
def create_writer
|
449
|
+
ZipKit::ZipWriter.new
|
450
|
+
end
|
451
|
+
|
452
|
+
# Updates the last entry written with the CRC32 checksum and compressed/uncompressed
|
453
|
+
# sizes. For stored entries, `compressed_size` and `uncompressed_size` are the same.
|
454
|
+
# After updating the entry will immediately write the data descriptor bytes
|
455
|
+
# to the output.
|
456
|
+
#
|
457
|
+
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
458
|
+
# @param compressed_size [Integer] the size of the compressed segment within the ZIP
|
459
|
+
# @param uncompressed_size [Integer] the size of the entry once uncompressed
|
460
|
+
# @return [Integer] the offset the output IO is at after writing the data descriptor
|
461
|
+
def update_last_entry_and_write_data_descriptor(crc32:, compressed_size:, uncompressed_size:)
|
462
|
+
# Save the information into the entry for when the time comes to write
|
463
|
+
# out the central directory
|
464
|
+
last_entry = @files.fetch(-1)
|
465
|
+
last_entry.crc32 = crc32
|
466
|
+
last_entry.compressed_size = compressed_size
|
467
|
+
last_entry.uncompressed_size = uncompressed_size
|
468
|
+
|
469
|
+
offset_before_data_descriptor = @out.tell
|
470
|
+
@writer.write_data_descriptor(io: @out,
|
471
|
+
crc32: last_entry.crc32,
|
472
|
+
compressed_size: last_entry.compressed_size,
|
473
|
+
uncompressed_size: last_entry.uncompressed_size)
|
474
|
+
last_entry.bytes_used_for_data_descriptor = @out.tell - offset_before_data_descriptor
|
475
|
+
|
476
|
+
@out.tell
|
477
|
+
end
|
478
|
+
|
479
|
+
# Removes the buffered local entry for the last file written. This can be used when rescuing from exceptions
|
480
|
+
# when you want to skip the file that failed writing into the ZIP from getting written out into the
|
481
|
+
# ZIP central directory. This is useful when, for example, you encounter errors retrieving the file
|
482
|
+
# that you want to place inside the ZIP from a remote storage location and some network exception
|
483
|
+
# gets raised. `write_deflated_file` and `write_stored_file` will rollback for you automatically.
|
484
|
+
# Of course it is not possible to remove the failed entry from the ZIP file entirely, as the data
|
485
|
+
# is likely already on the wire. However, excluding the entry from the central directory of the ZIP
|
486
|
+
# file will allow better-behaved ZIP unarchivers to extract the entries which did store correctly,
|
487
|
+
# provided they read the ZIP from the central directory and not straight-ahead.
|
488
|
+
#
|
489
|
+
# @example
|
490
|
+
# zip.add_stored_entry(filename: "data.bin", size: 4.megabytes, crc32: the_crc)
|
491
|
+
# while chunk = remote.read(65*2048)
|
492
|
+
# zip << chunk
|
493
|
+
# rescue Timeout::Error
|
494
|
+
# zip.rollback!
|
495
|
+
# # and proceed to the next file
|
496
|
+
# end
|
497
|
+
# @return [Integer] position in the output stream / ZIP archive
|
498
|
+
def rollback!
|
499
|
+
removed_entry = @files.pop
|
500
|
+
return @out.tell unless removed_entry
|
501
|
+
|
502
|
+
@path_set.clear
|
503
|
+
@files.each do |e|
|
504
|
+
@path_set.add_directory_or_file_path(e.filename) unless e.filler?
|
505
|
+
end
|
506
|
+
@files << Filler.new(@out.tell - removed_entry.local_header_offset)
|
507
|
+
|
508
|
+
@out.tell
|
509
|
+
end
|
510
|
+
|
511
|
+
private
|
512
|
+
|
513
|
+
def yield_or_return_writable(writable, &block_to_pass_writable_to)
|
514
|
+
if block_to_pass_writable_to
|
515
|
+
begin
|
516
|
+
yield(writable)
|
517
|
+
writable.close
|
518
|
+
rescue
|
519
|
+
writable.close
|
520
|
+
rollback!
|
521
|
+
raise
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
writable
|
526
|
+
end
|
527
|
+
|
528
|
+
def verify_offsets!
|
529
|
+
# We need to check whether the offsets noted for the entries actually make sense
|
530
|
+
computed_offset = @files.map(&:total_bytes_used).inject(0, &:+)
|
531
|
+
actual_offset = @out.tell
|
532
|
+
if computed_offset != actual_offset
|
533
|
+
message = <<~EMS
|
534
|
+
The offset of the Streamer output IO is out of sync with the expected value. All entries written so far,
|
535
|
+
including their compressed bodies, local headers and data descriptors, add up to a certain offset,
|
536
|
+
but this offset does not match the actual offset of the IO.
|
537
|
+
|
538
|
+
Entries add up to #{computed_offset} bytes and the IO is at #{actual_offset} bytes.
|
539
|
+
|
540
|
+
This can happen if you write local headers for an entry, write the "body" of the entry directly to the IO
|
541
|
+
object which is your destination, but do not adjust the offset known to the Streamer object. To adjust
|
542
|
+
the offfset you need to call `Streamer#simulate_write(body_size)` after outputting the entry. Otherwise
|
543
|
+
the local header offsets of the entries you write are going to be incorrect and some ZIP applications
|
544
|
+
are going to have problems opening your archive.
|
545
|
+
EMS
|
546
|
+
raise OffsetOutOfSync, message
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
def add_file_and_write_local_header(
|
551
|
+
filename:,
|
552
|
+
modification_time:,
|
553
|
+
crc32:,
|
554
|
+
storage_mode:,
|
555
|
+
compressed_size:,
|
556
|
+
uncompressed_size:,
|
557
|
+
use_data_descriptor:,
|
558
|
+
unix_permissions:
|
559
|
+
)
|
560
|
+
|
561
|
+
# Clean backslashes
|
562
|
+
filename = remove_backslash(filename)
|
563
|
+
raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
|
564
|
+
raise Overflow, "Filename is too long" if filename.bytesize > 0xFFFF
|
565
|
+
|
566
|
+
# If we need to massage filenames to enforce uniqueness,
|
567
|
+
# do so before we check for file/directory conflicts
|
568
|
+
filename = ZipKit::UniquifyFilename.call(filename, @path_set) if @dedupe_filenames
|
569
|
+
|
570
|
+
# Make sure there is no file/directory clobbering (conflicts), or - if deduping is disabled -
|
571
|
+
# no duplicate filenames/paths
|
572
|
+
if filename.end_with?("/")
|
573
|
+
@path_set.add_directory_path(filename)
|
574
|
+
else
|
575
|
+
@path_set.add_file_path(filename)
|
576
|
+
end
|
577
|
+
|
578
|
+
if use_data_descriptor
|
579
|
+
crc32 = 0
|
580
|
+
compressed_size = 0
|
581
|
+
uncompressed_size = 0
|
582
|
+
end
|
583
|
+
|
584
|
+
local_header_starts_at = @out.tell
|
585
|
+
|
586
|
+
e = Entry.new(filename,
|
587
|
+
crc32,
|
588
|
+
compressed_size,
|
589
|
+
uncompressed_size,
|
590
|
+
storage_mode,
|
591
|
+
modification_time,
|
592
|
+
use_data_descriptor,
|
593
|
+
_local_file_header_offset = local_header_starts_at,
|
594
|
+
_bytes_used_for_local_header = 0,
|
595
|
+
_bytes_used_for_data_descriptor = 0,
|
596
|
+
unix_permissions)
|
597
|
+
|
598
|
+
@writer.write_local_file_header(io: @out,
|
599
|
+
gp_flags: e.gp_flags,
|
600
|
+
crc32: e.crc32,
|
601
|
+
compressed_size: e.compressed_size,
|
602
|
+
uncompressed_size: e.uncompressed_size,
|
603
|
+
mtime: e.mtime,
|
604
|
+
filename: e.filename,
|
605
|
+
storage_mode: e.storage_mode)
|
606
|
+
e.bytes_used_for_local_header = @out.tell - e.local_header_offset
|
607
|
+
|
608
|
+
@files << e
|
609
|
+
end
|
610
|
+
|
611
|
+
def remove_backslash(filename)
|
612
|
+
filename.tr("\\", "_")
|
613
|
+
end
|
614
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ZipKit::UniquifyFilename
|
4
|
+
# Makes a given filename unique by appending a (n) suffix
|
5
|
+
# between just before the filename extension. So "file.txt" gets
|
6
|
+
# transformed into "file (1).txt". The transformation is applied
|
7
|
+
# repeatedly as long as the generated filename is present
|
8
|
+
# in `while_included_in` object
|
9
|
+
#
|
10
|
+
# @param path[String] the path to make unique
|
11
|
+
# @param while_included_in[#include?] an object that stores the list of already used paths
|
12
|
+
# @return [String] the path as is, or with the suffix required to make it unique
|
13
|
+
def self.call(path, while_included_in)
|
14
|
+
return path unless while_included_in.include?(path)
|
15
|
+
|
16
|
+
# we add (1), (2), (n) at the end of a filename before the filename extension,
|
17
|
+
# but only if there is a duplicate
|
18
|
+
copy_pattern = /\((\d+)\)$/
|
19
|
+
parts = path.split(".")
|
20
|
+
ext = if parts.last =~ /gz|zip/ && parts.size > 2
|
21
|
+
parts.pop(2)
|
22
|
+
elsif parts.size > 1
|
23
|
+
parts.pop
|
24
|
+
end
|
25
|
+
fn_last_part = parts.pop
|
26
|
+
|
27
|
+
duplicate_counter = 1
|
28
|
+
loop do
|
29
|
+
fn_last_part = if fn_last_part&.match?(copy_pattern)
|
30
|
+
fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
|
31
|
+
else
|
32
|
+
"#{fn_last_part} (#{duplicate_counter})"
|
33
|
+
end
|
34
|
+
new_path = (parts + [fn_last_part, ext]).compact.join(".")
|
35
|
+
return new_path unless while_included_in.include?(new_path)
|
36
|
+
duplicate_counter += 1
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|