zip_kit 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.codeclimate.yml +7 -0
- data/.document +5 -0
- data/.github/workflows/ci.yml +29 -0
- data/.gitignore +61 -0
- data/.rspec +1 -0
- data/.standard.yml +8 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +255 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/CONTRIBUTING.md +153 -0
- data/Gemfile +4 -0
- data/IMPLEMENTATION_DETAILS.md +97 -0
- data/LICENSE.txt +20 -0
- data/README.md +234 -0
- data/Rakefile +21 -0
- data/bench/buffered_crc32_bench.rb +109 -0
- data/examples/archive_size_estimate.rb +15 -0
- data/examples/config.ru +7 -0
- data/examples/deferred_write.rb +58 -0
- data/examples/parallel_compression_with_block_deflate.rb +86 -0
- data/examples/rack_application.rb +63 -0
- data/examples/s3_upload.rb +23 -0
- data/lib/zip_kit/block_deflate.rb +130 -0
- data/lib/zip_kit/block_write.rb +47 -0
- data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
- data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
- data/lib/zip_kit/file_reader.rb +740 -0
- data/lib/zip_kit/null_writer.rb +12 -0
- data/lib/zip_kit/output_enumerator.rb +150 -0
- data/lib/zip_kit/path_set.rb +163 -0
- data/lib/zip_kit/rack_chunked_body.rb +32 -0
- data/lib/zip_kit/rack_tempfile_body.rb +61 -0
- data/lib/zip_kit/rails_streaming.rb +37 -0
- data/lib/zip_kit/remote_io.rb +114 -0
- data/lib/zip_kit/remote_uncap.rb +22 -0
- data/lib/zip_kit/size_estimator.rb +84 -0
- data/lib/zip_kit/stream_crc32.rb +60 -0
- data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
- data/lib/zip_kit/streamer/entry.rb +37 -0
- data/lib/zip_kit/streamer/filler.rb +9 -0
- data/lib/zip_kit/streamer/heuristic.rb +68 -0
- data/lib/zip_kit/streamer/stored_writer.rb +39 -0
- data/lib/zip_kit/streamer/writable.rb +36 -0
- data/lib/zip_kit/streamer.rb +614 -0
- data/lib/zip_kit/uniquify_filename.rb +39 -0
- data/lib/zip_kit/version.rb +5 -0
- data/lib/zip_kit/write_and_tell.rb +40 -0
- data/lib/zip_kit/write_buffer.rb +71 -0
- data/lib/zip_kit/write_shovel.rb +22 -0
- data/lib/zip_kit/zip_writer.rb +436 -0
- data/lib/zip_kit.rb +24 -0
- data/zip_kit.gemspec +41 -0
- metadata +335 -0
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# A simple stateful class for keeping track of a CRC32 value through multiple writes
|
4
|
+
class ZipKit::StreamCRC32
|
5
|
+
include ZipKit::WriteShovel
|
6
|
+
|
7
|
+
STRINGS_HAVE_CAPACITY_SUPPORT = begin
|
8
|
+
String.new("", capacity: 1)
|
9
|
+
true
|
10
|
+
rescue ArgumentError
|
11
|
+
false
|
12
|
+
end
|
13
|
+
CRC_BUF_SIZE = 1024 * 512
|
14
|
+
private_constant :STRINGS_HAVE_CAPACITY_SUPPORT, :CRC_BUF_SIZE
|
15
|
+
|
16
|
+
# Compute a CRC32 value from an IO object. The object should respond to `read` and `eof?`
|
17
|
+
#
|
18
|
+
# @param io[IO] the IO to read the data from
|
19
|
+
# @return [Fixnum] the computed CRC32 value
|
20
|
+
def self.from_io(io)
|
21
|
+
# If we can specify the string capacity upfront we will not have to resize
|
22
|
+
# the string during operation. This saves time but is only available on
|
23
|
+
# recent Ruby 2.x versions.
|
24
|
+
blob = STRINGS_HAVE_CAPACITY_SUPPORT ? String.new("", capacity: CRC_BUF_SIZE) : +""
|
25
|
+
crc = new
|
26
|
+
crc << io.read(CRC_BUF_SIZE, blob) until io.eof?
|
27
|
+
crc.to_i
|
28
|
+
end
|
29
|
+
|
30
|
+
# Creates a new streaming CRC32 calculator
|
31
|
+
def initialize
|
32
|
+
@crc = Zlib.crc32
|
33
|
+
end
|
34
|
+
|
35
|
+
# Append data to the CRC32. Updates the contained CRC32 value in place.
|
36
|
+
#
|
37
|
+
# @param blob[String] the string to compute the CRC32 from
|
38
|
+
# @return [self]
|
39
|
+
def <<(blob)
|
40
|
+
@crc = Zlib.crc32(blob, @crc)
|
41
|
+
self
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns the CRC32 value computed so far
|
45
|
+
#
|
46
|
+
# @return [Fixnum] the updated CRC32 value for all the blobs so far
|
47
|
+
def to_i
|
48
|
+
@crc
|
49
|
+
end
|
50
|
+
|
51
|
+
# Appends a known CRC32 value to the current one, and combines the
|
52
|
+
# contained CRC32 value in-place.
|
53
|
+
#
|
54
|
+
# @param crc32[Fixnum] the CRC32 value to append
|
55
|
+
# @param blob_size[Fixnum] the size of the daata the `crc32` is computed from
|
56
|
+
# @return [Fixnum] the updated CRC32 value for all the blobs so far
|
57
|
+
def append(crc32, blob_size)
|
58
|
+
@crc = Zlib.crc32_combine(@crc, crc32, blob_size)
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Sends writes to the given `io` compressed using a `Zlib::Deflate`. Also
|
4
|
+
# registers data passing through it in a CRC32 checksum calculator. Is made to be completely
|
5
|
+
# interchangeable with the StoredWriter in terms of interface.
|
6
|
+
class ZipKit::Streamer::DeflatedWriter
|
7
|
+
include ZipKit::WriteShovel
|
8
|
+
|
9
|
+
# The amount of bytes we will buffer before computing the intermediate
|
10
|
+
# CRC32 checksums. Benchmarks show that the optimum is 64KB (see
|
11
|
+
# `bench/buffered_crc32_bench.rb), if that is exceeded Zlib is going
|
12
|
+
# to perform internal CRC combine calls which will make the speed go down again.
|
13
|
+
CRC32_BUFFER_SIZE = 64 * 1024
|
14
|
+
|
15
|
+
def initialize(io)
|
16
|
+
@compressed_io = io
|
17
|
+
@deflater = ::Zlib::Deflate.new(Zlib::DEFAULT_COMPRESSION, -::Zlib::MAX_WBITS)
|
18
|
+
@crc = ZipKit::StreamCRC32.new
|
19
|
+
@crc_buf = ZipKit::WriteBuffer.new(@crc, CRC32_BUFFER_SIZE)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Writes the given data into the deflater, and flushes the deflater
|
23
|
+
# after having written more than FLUSH_EVERY_N_BYTES bytes of data
|
24
|
+
#
|
25
|
+
# @param data[String] data to be written
|
26
|
+
# @return self
|
27
|
+
def <<(data)
|
28
|
+
@deflater.deflate(data) { |chunk| @compressed_io << chunk }
|
29
|
+
@crc_buf << data
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns the amount of data received for writing, the amount of
|
34
|
+
# compressed data written and the CRC32 checksum. The return value
|
35
|
+
# can be directly used as the argument to {Streamer#update_last_entry_and_write_data_descriptor}
|
36
|
+
#
|
37
|
+
# @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
|
38
|
+
def finish
|
39
|
+
@compressed_io << @deflater.finish until @deflater.finished?
|
40
|
+
@crc_buf.flush
|
41
|
+
{crc32: @crc.to_i, compressed_size: @deflater.total_out, uncompressed_size: @deflater.total_in}
|
42
|
+
ensure
|
43
|
+
@deflater.close
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Is used internally by Streamer to keep track of entries in the archive during writing.
|
4
|
+
# Normally you will not have to use this class directly
|
5
|
+
class ZipKit::Streamer::Entry < Struct.new(:filename, :crc32, :compressed_size,
|
6
|
+
:uncompressed_size, :storage_mode, :mtime,
|
7
|
+
:use_data_descriptor, :local_header_offset, :bytes_used_for_local_header, :bytes_used_for_data_descriptor, :unix_permissions)
|
8
|
+
def initialize(*)
|
9
|
+
super
|
10
|
+
filename.force_encoding(Encoding::UTF_8)
|
11
|
+
@requires_efs_flag = !(begin
|
12
|
+
filename.encode(Encoding::ASCII)
|
13
|
+
rescue
|
14
|
+
false
|
15
|
+
end)
|
16
|
+
end
|
17
|
+
|
18
|
+
def total_bytes_used
|
19
|
+
bytes_used_for_local_header + compressed_size + bytes_used_for_data_descriptor
|
20
|
+
end
|
21
|
+
|
22
|
+
# Set the general purpose flags for the entry. We care about is the EFS
|
23
|
+
# bit (bit 11) which should be set if the filename is UTF8. If it is, we need to set the
|
24
|
+
# bit so that the unarchiving application knows that the filename in the archive is UTF-8
|
25
|
+
# encoded, and not some DOS default. For ASCII entries it does not matter.
|
26
|
+
# Additionally, we care about bit 3 which toggles the use of the postfix data descriptor.
|
27
|
+
def gp_flags
|
28
|
+
flag = 0b00000000000
|
29
|
+
flag |= 0b100000000000 if @requires_efs_flag # bit 11
|
30
|
+
flag |= 0x0008 if use_data_descriptor # bit 3
|
31
|
+
flag
|
32
|
+
end
|
33
|
+
|
34
|
+
def filler?
|
35
|
+
false
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Is used internally by Streamer to keep track of entries in the archive during writing.
|
4
|
+
# Normally you will not have to use this class directly
|
5
|
+
class ZipKit::Streamer::Filler < Struct.new(:total_bytes_used)
|
6
|
+
def filler?
|
7
|
+
true
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Will be used to pick whether to store a file in the `stored` or
|
4
|
+
# `deflated` mode, by compressing the first N bytes of the file and
|
5
|
+
# comparing the stored and deflated data sizes. If deflate produces
|
6
|
+
# a sizable compression gain for this data, it will create a deflated
|
7
|
+
# file inside the ZIP archive. If the file doesn't compress well, it
|
8
|
+
# will use the "stored" mode for the entry. About 128KB of the
|
9
|
+
# file will be buffered to pick the appropriate storage mode. The
|
10
|
+
# Heuristic will call either `write_stored_file` or `write_deflated_file`
|
11
|
+
# on the Streamer passed into it once it knows which compression
|
12
|
+
# method should be applied
|
13
|
+
class ZipKit::Streamer::Heuristic
|
14
|
+
include ZipKit::WriteShovel
|
15
|
+
|
16
|
+
BYTES_WRITTEN_THRESHOLD = 128 * 1024
|
17
|
+
MINIMUM_VIABLE_COMPRESSION = 0.75
|
18
|
+
|
19
|
+
def initialize(streamer, filename, **write_file_options)
|
20
|
+
@streamer = streamer
|
21
|
+
@filename = filename
|
22
|
+
@write_file_options = write_file_options
|
23
|
+
|
24
|
+
@buf = StringIO.new.binmode
|
25
|
+
@deflater = ::Zlib::Deflate.new(Zlib::DEFAULT_COMPRESSION, -::Zlib::MAX_WBITS)
|
26
|
+
@bytes_deflated = 0
|
27
|
+
|
28
|
+
@winner = nil
|
29
|
+
end
|
30
|
+
|
31
|
+
def <<(bytes)
|
32
|
+
if @winner
|
33
|
+
@winner << bytes
|
34
|
+
else
|
35
|
+
@buf << bytes
|
36
|
+
@deflater.deflate(bytes) { |chunk| @bytes_deflated += chunk.bytesize }
|
37
|
+
decide if @buf.size > BYTES_WRITTEN_THRESHOLD
|
38
|
+
end
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
def write(bytes)
|
43
|
+
self << bytes
|
44
|
+
bytes.bytesize
|
45
|
+
end
|
46
|
+
|
47
|
+
def close
|
48
|
+
decide unless @winner
|
49
|
+
@winner.close
|
50
|
+
end
|
51
|
+
|
52
|
+
private def decide
|
53
|
+
# Finish and then close the deflater - it has likely buffered some data
|
54
|
+
@bytes_deflated += @deflater.finish.bytesize until @deflater.finished?
|
55
|
+
# If the deflated version is smaller than the stored one
|
56
|
+
# - use deflate, otherwise stored
|
57
|
+
ratio = @bytes_deflated / @buf.size.to_f
|
58
|
+
@winner = if ratio <= MINIMUM_VIABLE_COMPRESSION
|
59
|
+
@streamer.write_deflated_file(@filename, **@write_file_options)
|
60
|
+
else
|
61
|
+
@streamer.write_stored_file(@filename, **@write_file_options)
|
62
|
+
end
|
63
|
+
# Copy the buffered uncompressed data into the newly initialized writable
|
64
|
+
@buf.rewind
|
65
|
+
IO.copy_stream(@buf, @winner)
|
66
|
+
@buf.truncate(0)
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Sends writes to the given `io`, and also registers all the data passing
|
4
|
+
# through it in a CRC32 checksum calculator. Is made to be completely
|
5
|
+
# interchangeable with the DeflatedWriter in terms of interface.
|
6
|
+
class ZipKit::Streamer::StoredWriter
|
7
|
+
include ZipKit::WriteShovel
|
8
|
+
|
9
|
+
# The amount of bytes we will buffer before computing the intermediate
|
10
|
+
# CRC32 checksums. Benchmarks show that the optimum is 64KB (see
|
11
|
+
# `bench/buffered_crc32_bench.rb), if that is exceeded Zlib is going
|
12
|
+
# to perform internal CRC combine calls which will make the speed go down again.
|
13
|
+
CRC32_BUFFER_SIZE = 64 * 1024
|
14
|
+
|
15
|
+
def initialize(io)
|
16
|
+
@io = ZipKit::WriteAndTell.new(io)
|
17
|
+
@crc_compute = ZipKit::StreamCRC32.new
|
18
|
+
@crc = ZipKit::WriteBuffer.new(@crc_compute, CRC32_BUFFER_SIZE)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Writes the given data to the contained IO object.
|
22
|
+
#
|
23
|
+
# @param data[String] data to be written
|
24
|
+
# @return self
|
25
|
+
def <<(data)
|
26
|
+
@io << data
|
27
|
+
@crc << data
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns the amount of data written and the CRC32 checksum. The return value
|
32
|
+
# can be directly used as the argument to {Streamer#update_last_entry_and_write_data_descriptor}
|
33
|
+
#
|
34
|
+
# @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
|
35
|
+
def finish
|
36
|
+
@crc.flush
|
37
|
+
{crc32: @crc_compute.to_i, compressed_size: @io.tell, uncompressed_size: @io.tell}
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Gets yielded from the writing methods of the Streamer
|
4
|
+
# and accepts the data being written into the ZIP for deflate
|
5
|
+
# or stored modes. Can be used as a destination for `IO.copy_stream`
|
6
|
+
#
|
7
|
+
# IO.copy_stream(File.open('source.bin', 'rb), writable)
|
8
|
+
class ZipKit::Streamer::Writable
|
9
|
+
include ZipKit::WriteShovel
|
10
|
+
|
11
|
+
# Initializes a new Writable with the object it delegates the writes to.
|
12
|
+
# Normally you would not need to use this method directly
|
13
|
+
def initialize(streamer, writer)
|
14
|
+
@streamer = streamer
|
15
|
+
@writer = writer
|
16
|
+
@closed = false
|
17
|
+
end
|
18
|
+
|
19
|
+
# Writes the given data to the output stream
|
20
|
+
#
|
21
|
+
# @param d[String] the binary string to write (part of the uncompressed file)
|
22
|
+
# @return [self]
|
23
|
+
def <<(d)
|
24
|
+
raise "Trying to write to a closed Writable" if @closed
|
25
|
+
@writer << d
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
29
|
+
# Flushes the writer and recovers the CRC32/size values. It then calls
|
30
|
+
# `update_last_entry_and_write_data_descriptor` on the given Streamer.
|
31
|
+
def close
|
32
|
+
return if @closed
|
33
|
+
@streamer.update_last_entry_and_write_data_descriptor(**@writer.finish)
|
34
|
+
@closed = true
|
35
|
+
end
|
36
|
+
end
|