zip_tricks 5.2.0 → 5.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +96 -0
- data/.gitignore +2 -1
- data/.rubocop.yml +2 -0
- data/CHANGELOG.md +37 -1
- data/CONTRIBUTING.md +4 -4
- data/README.md +28 -17
- data/lib/zip_tricks/block_write.rb +26 -21
- data/lib/zip_tricks/file_reader.rb +8 -6
- data/lib/zip_tricks/null_writer.rb +1 -1
- data/lib/zip_tricks/output_enumerator.rb +48 -27
- data/lib/zip_tricks/path_set.rb +4 -0
- data/lib/zip_tricks/rails_streaming.rb +7 -9
- data/lib/zip_tricks/stream_crc32.rb +2 -2
- data/lib/zip_tricks/streamer.rb +69 -25
- data/lib/zip_tricks/streamer/deflated_writer.rb +9 -28
- data/lib/zip_tricks/streamer/entry.rb +5 -1
- data/lib/zip_tricks/streamer/stored_writer.rb +4 -3
- data/lib/zip_tricks/version.rb +1 -1
- data/lib/zip_tricks/write_and_tell.rb +2 -12
- data/lib/zip_tricks/write_buffer.rb +37 -17
- data/lib/zip_tricks/zip_writer.rb +24 -24
- data/zip_tricks.gemspec +3 -5
- metadata +8 -44
- data/.travis.yml +0 -11
- data/qa/README_QA.md +0 -16
- data/qa/generate_test_files.rb +0 -126
- data/qa/in/VTYL8830.jpg +0 -0
- data/qa/in/war-and-peace.txt +0 -10810
- data/qa/support.rb +0 -88
- data/qa/test-report-2016-07-28.txt +0 -156
- data/qa/test-report-2016-12-12.txt +0 -156
- data/qa/test-report-2017-04-2.txt +0 -168
- data/qa/test-report.txt +0 -28
@@ -1,43 +1,64 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
3
|
+
# The output enumerator makes it possible to "pull" from a ZipTricks streamer
|
4
|
+
# object instead of having it "push" writes to you. It will "stash" the block which
|
5
|
+
# writes the ZIP archive through the streamer, and when you call `each` on the Enumerator
|
6
|
+
# it will yield you the bytes the block writes. Since it is an enumerator you can
|
7
|
+
# use `next` to take chunks written by the ZipTricks streamer one by one. It can be very
|
8
|
+
# convenient when you need to segment your ZIP output into bigger chunks for, say,
|
9
|
+
# uploading them to a cloud storage provider such as S3.
|
10
|
+
#
|
11
|
+
# Another use of the output enumerator is outputting a ZIP archive from Rails or Rack,
|
12
|
+
# where an object responding to `each` is required which yields Strings. For instance,
|
13
|
+
# you can return a ZIP archive from Rack like so:
|
14
|
+
#
|
15
|
+
# iterable_zip_body = ZipTricks::OutputEnumerator.new do | streamer |
|
16
|
+
# streamer.write_deflated_file('big.csv') do |sink|
|
17
|
+
# CSV(sink) do |csv_writer|
|
18
|
+
# csv_writer << Person.column_names
|
19
|
+
# Person.all.find_each do |person|
|
20
|
+
# csv_writer << person.attributes.values
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# [200, {'Content-Type' => 'binary/octet-stream'}, iterable_zip_body]
|
6
27
|
class ZipTricks::OutputEnumerator
|
7
|
-
|
8
|
-
#
|
9
|
-
# body will be read by the webserver, and will receive a {ZipTricks::Streamer}
|
10
|
-
# as it's block argument. You can then add entries to the Streamer as usual.
|
11
|
-
# The archive will be automatically closed at the end of the block.
|
28
|
+
DEFAULT_WRITE_BUFFER_SIZE = 64 * 1024
|
29
|
+
# Creates a new OutputEnumerator.
|
12
30
|
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
# body = ZipTricks::OutputEnumerator.new do | streamer |
|
22
|
-
# streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
|
23
|
-
# streamer << large_file.read(1024*1024) until large_file.eof?
|
24
|
-
# ...
|
25
|
-
# end
|
26
|
-
#
|
27
|
-
# return [200, {'Content-Type' => 'binary/octet-stream',
|
28
|
-
# 'Content-Length' => content_length.to_s}, body]
|
29
|
-
def initialize(**streamer_options, &blk)
|
31
|
+
# @param streamer_options[Hash] options for Streamer, see {ZipTricks::Streamer.new}
|
32
|
+
# @param write_buffer_size[Integer] By default all ZipTricks writes are unbuffered. For output to sockets
|
33
|
+
# it is beneficial to bulkify those writes so that they are roughly sized to a socket buffer chunk. This
|
34
|
+
# object will bulkify writes for you in this way (so `each` will yield not on every call to `<<` from the Streamer
|
35
|
+
# but at block size boundaries or greater). Set it to 0 for unbuffered writes.
|
36
|
+
# @param blk a block that will receive the Streamer object when executing. The block will not be executed
|
37
|
+
# immediately but only once `each` is called on the OutputEnumerator
|
38
|
+
def initialize(write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE, **streamer_options, &blk)
|
30
39
|
@streamer_options = streamer_options.to_h
|
40
|
+
@bufsize = write_buffer_size.to_i
|
31
41
|
@archiving_block = blk
|
32
42
|
end
|
33
43
|
|
34
44
|
# Executes the block given to the constructor with a {ZipTricks::Streamer}
|
35
45
|
# and passes each written chunk to the block given to the method. This allows one
|
36
|
-
# to "take" output of the ZIP piecewise.
|
46
|
+
# to "take" output of the ZIP piecewise. If called without a block will return an Enumerator
|
47
|
+
# that you can pull data from using `next`.
|
48
|
+
#
|
49
|
+
# **NOTE** Because the `WriteBuffer` inside this object can reuse the buffer, it is important
|
50
|
+
# that the `String` that is yielded **either** gets consumed eagerly (written byte-by-byte somewhere, or `#dup`-ed)
|
51
|
+
# since the write buffer will clear it after your block returns. If you expand this Enumerator
|
52
|
+
# eagerly into an Array you might notice that a lot of the segments of your ZIP output are
|
53
|
+
# empty - this means that you need to duplicate them.
|
54
|
+
#
|
55
|
+
# @yield [String] a chunk of the ZIP output in binary encoding
|
37
56
|
def each
|
38
57
|
if block_given?
|
39
58
|
block_write = ZipTricks::BlockWrite.new { |chunk| yield(chunk) }
|
40
|
-
ZipTricks::
|
59
|
+
buffer = ZipTricks::WriteBuffer.new(block_write, @bufsize)
|
60
|
+
ZipTricks::Streamer.open(buffer, **@streamer_options, &@archiving_block)
|
61
|
+
buffer.flush
|
41
62
|
else
|
42
63
|
enum_for(:each)
|
43
64
|
end
|
data/lib/zip_tricks/path_set.rb
CHANGED
@@ -32,6 +32,10 @@
|
|
32
32
|
# conflict is avoided. This is not possible to apply to directories, because when one of the
|
33
33
|
# path components is reused in multiple filenames it means those entities should end up in
|
34
34
|
# the same directory (subdirectory) once the archive is opened.
|
35
|
+
#
|
36
|
+
# The `PathSet` keeps track of entries as they get added using 2 Sets (cheap presence checks),
|
37
|
+
# one for directories and one for files. It will raise a `Conflict` exception if there are
|
38
|
+
# files clobbering one another, or in case files collide with directories.
|
35
39
|
class ZipTricks::PathSet
|
36
40
|
class Conflict < StandardError
|
37
41
|
end
|
@@ -1,22 +1,20 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# Should be included into a Rails controller
|
4
|
-
# for easy ZIP output from any action.
|
3
|
+
# Should be included into a Rails controller for easy ZIP output from any action.
|
5
4
|
module ZipTricks::RailsStreaming
|
6
5
|
# Opens a {ZipTricks::Streamer} and yields it to the caller. The output of the streamer
|
7
6
|
# gets automatically forwarded to the Rails response stream. When the output completes,
|
8
7
|
# the Rails response stream is going to be closed automatically.
|
8
|
+
# @param zip_streamer_options[Hash] options that will be passed to the Streamer.
|
9
|
+
# See {ZipTricks::Streamer#initialize} for the full list of options.
|
9
10
|
# @yield [Streamer] the streamer that can be written to
|
10
|
-
|
11
|
+
# @return [ZipTricks::OutputEnumerator] The output enumerator assigned to the response body
|
12
|
+
def zip_tricks_stream(**zip_streamer_options, &zip_streaming_blk)
|
11
13
|
# Set a reasonable content type
|
12
14
|
response.headers['Content-Type'] = 'application/zip'
|
13
15
|
# Make sure nginx buffering is suppressed - see https://github.com/WeTransfer/zip_tricks/issues/48
|
14
16
|
response.headers['X-Accel-Buffering'] = 'no'
|
15
|
-
|
16
|
-
|
17
|
-
w = ZipTricks::BlockWrite.new { |chunk| response.stream.write(chunk) }
|
18
|
-
ZipTricks::Streamer.open(w) { |z| yield(z) }
|
19
|
-
ensure
|
20
|
-
response.stream.close
|
17
|
+
response.sending_file = true
|
18
|
+
self.response_body = ZipTricks::OutputEnumerator.new(**zip_streamer_options, &zip_streaming_blk)
|
21
19
|
end
|
22
20
|
end
|
@@ -27,7 +27,7 @@ class ZipTricks::StreamCRC32
|
|
27
27
|
|
28
28
|
# Creates a new streaming CRC32 calculator
|
29
29
|
def initialize
|
30
|
-
@crc = Zlib.crc32
|
30
|
+
@crc = Zlib.crc32
|
31
31
|
end
|
32
32
|
|
33
33
|
# Append data to the CRC32. Updates the contained CRC32 value in place.
|
@@ -35,7 +35,7 @@ class ZipTricks::StreamCRC32
|
|
35
35
|
# @param blob[String] the string to compute the CRC32 from
|
36
36
|
# @return [self]
|
37
37
|
def <<(blob)
|
38
|
-
@crc = Zlib.
|
38
|
+
@crc = Zlib.crc32(blob, @crc)
|
39
39
|
self
|
40
40
|
end
|
41
41
|
|
data/lib/zip_tricks/streamer.rb
CHANGED
@@ -91,8 +91,9 @@ class ZipTricks::Streamer
|
|
91
91
|
InvalidOutput = Class.new(ArgumentError)
|
92
92
|
Overflow = Class.new(StandardError)
|
93
93
|
UnknownMode = Class.new(StandardError)
|
94
|
+
OffsetOutOfSync = Class.new(StandardError)
|
94
95
|
|
95
|
-
private_constant :
|
96
|
+
private_constant :STORED, :DEFLATED
|
96
97
|
|
97
98
|
# Creates a new Streamer on top of the given IO-ish object and yields it. Once the given block
|
98
99
|
# returns, the Streamer will have it's `close` method called, which will write out the central
|
@@ -130,28 +131,26 @@ class ZipTricks::Streamer
|
|
130
131
|
# end
|
131
132
|
#
|
132
133
|
# @param kwargs_for_new [Hash] keyword arguments for {Streamer.new}
|
133
|
-
# @return [
|
134
|
+
# @return [ZipTricks::OutputEnumerator] the enumerator you can read bytestrings of the ZIP from by calling `each`
|
134
135
|
def self.output_enum(**kwargs_for_new, &zip_streamer_block)
|
135
136
|
ZipTricks::OutputEnumerator.new(**kwargs_for_new, &zip_streamer_block)
|
136
137
|
end
|
137
138
|
|
138
139
|
# Creates a new Streamer on top of the given IO-ish object.
|
139
140
|
#
|
140
|
-
# @param
|
141
|
+
# @param writable[#<<] the destination IO for the ZIP. Anything that responds to `<<` can be used.
|
141
142
|
# @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
|
142
143
|
# Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
|
143
144
|
# @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered,
|
144
145
|
# should be suffixed with (1), (2) etc. Default value is `false` - if
|
145
146
|
# dupliate names are used an exception will be raised
|
146
|
-
def initialize(
|
147
|
-
raise InvalidOutput, 'The
|
148
|
-
|
149
|
-
@dedupe_filenames = auto_rename_duplicate_filenames
|
150
|
-
@out = ZipTricks::WriteAndTell.new(stream)
|
147
|
+
def initialize(writable, writer: create_writer, auto_rename_duplicate_filenames: false)
|
148
|
+
raise InvalidOutput, 'The writable must respond to #<<' unless writable.respond_to?(:<<)
|
149
|
+
@out = ZipTricks::WriteAndTell.new(writable)
|
151
150
|
@files = []
|
152
|
-
@local_header_offsets = []
|
153
151
|
@path_set = ZipTricks::PathSet.new
|
154
152
|
@writer = writer
|
153
|
+
@dedupe_filenames = auto_rename_duplicate_filenames
|
155
154
|
end
|
156
155
|
|
157
156
|
# Writes a part of a zip entry body (actual binary data of the entry) into the output stream.
|
@@ -201,14 +200,16 @@ class ZipTricks::Streamer
|
|
201
200
|
# @param uncompressed_size [Integer] the size of the entry when uncompressed, in bytes
|
202
201
|
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
203
202
|
# @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor
|
203
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
204
204
|
# @return [Integer] the offset the output IO is at after writing the entry header
|
205
|
-
def add_deflated_entry(filename:, modification_time: Time.now.utc, compressed_size: 0, uncompressed_size: 0, crc32: 0, use_data_descriptor: false)
|
205
|
+
def add_deflated_entry(filename:, modification_time: Time.now.utc, compressed_size: 0, uncompressed_size: 0, crc32: 0, unix_permissions: nil, use_data_descriptor: false)
|
206
206
|
add_file_and_write_local_header(filename: filename,
|
207
207
|
modification_time: modification_time,
|
208
208
|
crc32: crc32,
|
209
209
|
storage_mode: DEFLATED,
|
210
210
|
compressed_size: compressed_size,
|
211
211
|
uncompressed_size: uncompressed_size,
|
212
|
+
unix_permissions: unix_permissions,
|
212
213
|
use_data_descriptor: use_data_descriptor)
|
213
214
|
@out.tell
|
214
215
|
end
|
@@ -223,14 +224,16 @@ class ZipTricks::Streamer
|
|
223
224
|
# @param size [Integer] the size of the file when uncompressed, in bytes
|
224
225
|
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
225
226
|
# @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor. When in use
|
227
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
226
228
|
# @return [Integer] the offset the output IO is at after writing the entry header
|
227
|
-
def add_stored_entry(filename:, modification_time: Time.now.utc, size: 0, crc32: 0, use_data_descriptor: false)
|
229
|
+
def add_stored_entry(filename:, modification_time: Time.now.utc, size: 0, crc32: 0, unix_permissions: nil, use_data_descriptor: false)
|
228
230
|
add_file_and_write_local_header(filename: filename,
|
229
231
|
modification_time: modification_time,
|
230
232
|
crc32: crc32,
|
231
233
|
storage_mode: STORED,
|
232
234
|
compressed_size: size,
|
233
235
|
uncompressed_size: size,
|
236
|
+
unix_permissions: unix_permissions,
|
234
237
|
use_data_descriptor: use_data_descriptor)
|
235
238
|
@out.tell
|
236
239
|
end
|
@@ -239,14 +242,16 @@ class ZipTricks::Streamer
|
|
239
242
|
#
|
240
243
|
# @param dirname [String] the name of the directory in the archive
|
241
244
|
# @param modification_time [Time] the modification time of the directory in the archive
|
245
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
242
246
|
# @return [Integer] the offset the output IO is at after writing the entry header
|
243
|
-
def add_empty_directory(dirname:, modification_time: Time.now.utc)
|
247
|
+
def add_empty_directory(dirname:, modification_time: Time.now.utc, unix_permissions: nil)
|
244
248
|
add_file_and_write_local_header(filename: dirname.to_s + '/',
|
245
249
|
modification_time: modification_time,
|
246
250
|
crc32: 0,
|
247
251
|
storage_mode: STORED,
|
248
252
|
compressed_size: 0,
|
249
253
|
uncompressed_size: 0,
|
254
|
+
unix_permissions: unix_permissions,
|
250
255
|
use_data_descriptor: false)
|
251
256
|
@out.tell
|
252
257
|
end
|
@@ -284,14 +289,16 @@ class ZipTricks::Streamer
|
|
284
289
|
#
|
285
290
|
# @param filename[String] the name of the file in the archive
|
286
291
|
# @param modification_time [Time] the modification time of the file in the archive
|
292
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
287
293
|
# @yield [#<<, #write] an object that the file contents must be written to that will be automatically closed
|
288
294
|
# @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
|
289
|
-
def write_stored_file(filename, modification_time: Time.now.utc)
|
295
|
+
def write_stored_file(filename, modification_time: Time.now.utc, unix_permissions: nil)
|
290
296
|
add_stored_entry(filename: filename,
|
291
297
|
modification_time: modification_time,
|
292
298
|
use_data_descriptor: true,
|
293
299
|
crc32: 0,
|
294
|
-
size: 0
|
300
|
+
size: 0,
|
301
|
+
unix_permissions: unix_permissions)
|
295
302
|
|
296
303
|
writable = Writable.new(self, StoredWriter.new(@out))
|
297
304
|
if block_given?
|
@@ -336,14 +343,16 @@ class ZipTricks::Streamer
|
|
336
343
|
#
|
337
344
|
# @param filename[String] the name of the file in the archive
|
338
345
|
# @param modification_time [Time] the modification time of the file in the archive
|
346
|
+
# @param unix_permissions[Fixnum?] which UNIX permissions to set, normally the default should be used
|
339
347
|
# @yield [#<<, #write] an object that the file contents must be written to
|
340
|
-
def write_deflated_file(filename, modification_time: Time.now.utc)
|
348
|
+
def write_deflated_file(filename, modification_time: Time.now.utc, unix_permissions: nil)
|
341
349
|
add_deflated_entry(filename: filename,
|
342
350
|
modification_time: modification_time,
|
343
351
|
use_data_descriptor: true,
|
344
352
|
crc32: 0,
|
345
353
|
compressed_size: 0,
|
346
|
-
uncompressed_size: 0
|
354
|
+
uncompressed_size: 0,
|
355
|
+
unix_permissions: unix_permissions)
|
347
356
|
|
348
357
|
writable = Writable.new(self, DeflatedWriter.new(@out))
|
349
358
|
if block_given?
|
@@ -360,21 +369,24 @@ class ZipTricks::Streamer
|
|
360
369
|
#
|
361
370
|
# @return [Integer] the offset the output IO is at after closing the archive
|
362
371
|
def close
|
372
|
+
# Make sure offsets are in order
|
373
|
+
verify_offsets!
|
374
|
+
|
363
375
|
# Record the central directory offset, so that it can be written into the EOCD record
|
364
376
|
cdir_starts_at = @out.tell
|
365
377
|
|
366
378
|
# Write out the central directory entries, one for each file
|
367
|
-
@files.
|
368
|
-
header_loc = @local_header_offsets.fetch(i)
|
379
|
+
@files.each do |entry|
|
369
380
|
@writer.write_central_directory_file_header(io: @out,
|
370
|
-
local_file_header_location:
|
381
|
+
local_file_header_location: entry.local_header_offset,
|
371
382
|
gp_flags: entry.gp_flags,
|
372
383
|
storage_mode: entry.storage_mode,
|
373
384
|
compressed_size: entry.compressed_size,
|
374
385
|
uncompressed_size: entry.uncompressed_size,
|
375
386
|
mtime: entry.mtime,
|
376
387
|
crc32: entry.crc32,
|
377
|
-
filename: entry.filename
|
388
|
+
filename: entry.filename,
|
389
|
+
unix_permissions: entry.unix_permissions)
|
378
390
|
end
|
379
391
|
|
380
392
|
# Record the central directory size, for the EOCDR
|
@@ -420,15 +432,40 @@ class ZipTricks::Streamer
|
|
420
432
|
last_entry.compressed_size = compressed_size
|
421
433
|
last_entry.uncompressed_size = uncompressed_size
|
422
434
|
|
435
|
+
offset_before_data_descriptor = @out.tell
|
423
436
|
@writer.write_data_descriptor(io: @out,
|
424
437
|
crc32: last_entry.crc32,
|
425
438
|
compressed_size: last_entry.compressed_size,
|
426
439
|
uncompressed_size: last_entry.uncompressed_size)
|
440
|
+
last_entry.bytes_used_for_data_descriptor = @out.tell - offset_before_data_descriptor
|
441
|
+
|
427
442
|
@out.tell
|
428
443
|
end
|
429
444
|
|
430
445
|
private
|
431
446
|
|
447
|
+
def verify_offsets!
|
448
|
+
# We need to check whether the offsets noted for the entries actually make sense
|
449
|
+
computed_offset = @files.map(&:total_bytes_used).inject(0, &:+)
|
450
|
+
actual_offset = @out.tell
|
451
|
+
if computed_offset != actual_offset
|
452
|
+
message = <<-EMS
|
453
|
+
The offset of the Streamer output IO is out of sync with the expected value. All entries written so far,
|
454
|
+
including their compressed bodies, local headers and data descriptors, add up to a certain offset,
|
455
|
+
but this offset does not match the actual offset of the IO.
|
456
|
+
|
457
|
+
Entries add up to #{computed_offset} bytes and the IO is at #{actual_offset} bytes.
|
458
|
+
|
459
|
+
This can happen if you write local headers for an entry, write the "body" of the entry directly to the IO
|
460
|
+
object which is your destination, but do not adjust the offset known to the Streamer object. To adjust
|
461
|
+
the offfset you need to call `Streamer#simulate_write(body_size)` after outputting the entry. Otherwise
|
462
|
+
the local header offsets of the entries you write are going to be incorrect and some ZIP applications
|
463
|
+
are going to have problems opening your archive.
|
464
|
+
EMS
|
465
|
+
raise OffsetOutOfSync, message
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
432
469
|
def add_file_and_write_local_header(
|
433
470
|
filename:,
|
434
471
|
modification_time:,
|
@@ -436,7 +473,8 @@ class ZipTricks::Streamer
|
|
436
473
|
storage_mode:,
|
437
474
|
compressed_size:,
|
438
475
|
uncompressed_size:,
|
439
|
-
use_data_descriptor
|
476
|
+
use_data_descriptor:,
|
477
|
+
unix_permissions:)
|
440
478
|
|
441
479
|
# Clean backslashes
|
442
480
|
filename = remove_backslash(filename)
|
@@ -461,16 +499,19 @@ class ZipTricks::Streamer
|
|
461
499
|
uncompressed_size = 0
|
462
500
|
end
|
463
501
|
|
502
|
+
local_header_starts_at = @out.tell
|
503
|
+
|
464
504
|
e = Entry.new(filename,
|
465
505
|
crc32,
|
466
506
|
compressed_size,
|
467
507
|
uncompressed_size,
|
468
508
|
storage_mode,
|
469
509
|
modification_time,
|
470
|
-
use_data_descriptor
|
471
|
-
|
472
|
-
|
473
|
-
|
510
|
+
use_data_descriptor,
|
511
|
+
_local_file_header_offset = local_header_starts_at,
|
512
|
+
_bytes_used_for_local_header = 0,
|
513
|
+
_bytes_used_for_data_descriptor = 0,
|
514
|
+
unix_permissions)
|
474
515
|
|
475
516
|
@writer.write_local_file_header(io: @out,
|
476
517
|
gp_flags: e.gp_flags,
|
@@ -480,6 +521,9 @@ class ZipTricks::Streamer
|
|
480
521
|
mtime: e.mtime,
|
481
522
|
filename: e.filename,
|
482
523
|
storage_mode: e.storage_mode)
|
524
|
+
e.bytes_used_for_local_header = @out.tell - e.local_header_offset
|
525
|
+
|
526
|
+
@files << e
|
483
527
|
end
|
484
528
|
|
485
529
|
def remove_backslash(filename)
|
@@ -4,13 +4,6 @@
|
|
4
4
|
# registers data passing through it in a CRC32 checksum calculator. Is made to be completely
|
5
5
|
# interchangeable with the StoredWriter in terms of interface.
|
6
6
|
class ZipTricks::Streamer::DeflatedWriter
|
7
|
-
# After how many bytes of incoming data the deflater for the
|
8
|
-
# contents must be flushed. This is done to prevent unreasonable
|
9
|
-
# memory use when archiving large files, and to ensure we write to
|
10
|
-
# the socket often enough while still maintaining acceptable
|
11
|
-
# compression
|
12
|
-
FLUSH_EVERY_N_BYTES = 1024 * 1024 * 5
|
13
|
-
|
14
7
|
# The amount of bytes we will buffer before computing the intermediate
|
15
8
|
# CRC32 checksums. Benchmarks show that the optimum is 64KB (see
|
16
9
|
# `bench/buffered_crc32_bench.rb), if that is exceeded Zlib is going
|
@@ -18,11 +11,10 @@ class ZipTricks::Streamer::DeflatedWriter
|
|
18
11
|
CRC32_BUFFER_SIZE = 64 * 1024
|
19
12
|
|
20
13
|
def initialize(io)
|
21
|
-
@compressed_io =
|
22
|
-
@uncompressed_size = 0
|
14
|
+
@compressed_io = io
|
23
15
|
@deflater = ::Zlib::Deflate.new(Zlib::DEFAULT_COMPRESSION, -::Zlib::MAX_WBITS)
|
24
|
-
@crc = ZipTricks::
|
25
|
-
@
|
16
|
+
@crc = ZipTricks::StreamCRC32.new
|
17
|
+
@crc_buf = ZipTricks::WriteBuffer.new(@crc, CRC32_BUFFER_SIZE)
|
26
18
|
end
|
27
19
|
|
28
20
|
# Writes the given data into the deflater, and flushes the deflater
|
@@ -31,13 +23,8 @@ class ZipTricks::Streamer::DeflatedWriter
|
|
31
23
|
# @param data[String] data to be written
|
32
24
|
# @return self
|
33
25
|
def <<(data)
|
34
|
-
@
|
35
|
-
@
|
36
|
-
@compressed_io << @deflater.deflate(data)
|
37
|
-
@crc << data
|
38
|
-
|
39
|
-
interim_flush
|
40
|
-
|
26
|
+
@deflater.deflate(data) { |chunk| @compressed_io << chunk }
|
27
|
+
@crc_buf << data
|
41
28
|
self
|
42
29
|
end
|
43
30
|
|
@@ -45,18 +32,12 @@ class ZipTricks::Streamer::DeflatedWriter
|
|
45
32
|
# compressed data written and the CRC32 checksum. The return value
|
46
33
|
# can be directly used as the argument to {Streamer#update_last_entry_and_write_data_descriptor}
|
47
34
|
#
|
48
|
-
# @param data[String] data to be written
|
49
35
|
# @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
|
50
36
|
def finish
|
51
37
|
@compressed_io << @deflater.finish until @deflater.finished?
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
def interim_flush
|
58
|
-
return if @bytes_since_last_flush < FLUSH_EVERY_N_BYTES
|
59
|
-
@compressed_io << @deflater.flush
|
60
|
-
@bytes_since_last_flush = 0
|
38
|
+
@crc_buf.flush
|
39
|
+
{crc32: @crc.to_i, compressed_size: @deflater.total_out, uncompressed_size: @deflater.total_in}
|
40
|
+
ensure
|
41
|
+
@deflater.close
|
61
42
|
end
|
62
43
|
end
|