zip_kit 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.codeclimate.yml +7 -0
- data/.document +5 -0
- data/.github/workflows/ci.yml +29 -0
- data/.gitignore +61 -0
- data/.rspec +1 -0
- data/.standard.yml +8 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +255 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/CONTRIBUTING.md +153 -0
- data/Gemfile +4 -0
- data/IMPLEMENTATION_DETAILS.md +97 -0
- data/LICENSE.txt +20 -0
- data/README.md +234 -0
- data/Rakefile +21 -0
- data/bench/buffered_crc32_bench.rb +109 -0
- data/examples/archive_size_estimate.rb +15 -0
- data/examples/config.ru +7 -0
- data/examples/deferred_write.rb +58 -0
- data/examples/parallel_compression_with_block_deflate.rb +86 -0
- data/examples/rack_application.rb +63 -0
- data/examples/s3_upload.rb +23 -0
- data/lib/zip_kit/block_deflate.rb +130 -0
- data/lib/zip_kit/block_write.rb +47 -0
- data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
- data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
- data/lib/zip_kit/file_reader.rb +740 -0
- data/lib/zip_kit/null_writer.rb +12 -0
- data/lib/zip_kit/output_enumerator.rb +150 -0
- data/lib/zip_kit/path_set.rb +163 -0
- data/lib/zip_kit/rack_chunked_body.rb +32 -0
- data/lib/zip_kit/rack_tempfile_body.rb +61 -0
- data/lib/zip_kit/rails_streaming.rb +37 -0
- data/lib/zip_kit/remote_io.rb +114 -0
- data/lib/zip_kit/remote_uncap.rb +22 -0
- data/lib/zip_kit/size_estimator.rb +84 -0
- data/lib/zip_kit/stream_crc32.rb +60 -0
- data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
- data/lib/zip_kit/streamer/entry.rb +37 -0
- data/lib/zip_kit/streamer/filler.rb +9 -0
- data/lib/zip_kit/streamer/heuristic.rb +68 -0
- data/lib/zip_kit/streamer/stored_writer.rb +39 -0
- data/lib/zip_kit/streamer/writable.rb +36 -0
- data/lib/zip_kit/streamer.rb +614 -0
- data/lib/zip_kit/uniquify_filename.rb +39 -0
- data/lib/zip_kit/version.rb +5 -0
- data/lib/zip_kit/write_and_tell.rb +40 -0
- data/lib/zip_kit/write_buffer.rb +71 -0
- data/lib/zip_kit/write_shovel.rb +22 -0
- data/lib/zip_kit/zip_writer.rb +436 -0
- data/lib/zip_kit.rb +24 -0
- data/zip_kit.gemspec +41 -0
- metadata +335 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../lib/zip_kit"
|
4
|
+
|
5
|
+
# An example of how you can create a Rack endpoint for your ZIP downloads.
|
6
|
+
# NEVER run this in production - it is a huge security risk.
|
7
|
+
# What this app will do is pick PATH_INFO (your request URL path)
|
8
|
+
# and grab a file located at this path on your filesystem. The file will then
|
9
|
+
# be added to a ZIP archive created completely programmatically. No data will
|
10
|
+
# be cached on disk and the contents of the ZIP file will _not_ be buffered in
|
11
|
+
# it's entirety before sending. Unless you use a buffering Rack server of
|
12
|
+
# course (WEBrick or Thin).
|
13
|
+
class ZipDownload
|
14
|
+
def call(env)
|
15
|
+
file_path = env["PATH_INFO"] # Should be the absolute path on the filesystem
|
16
|
+
|
17
|
+
# Open the file for binary reading
|
18
|
+
f = File.open(file_path, "rb")
|
19
|
+
filename = File.basename(file_path)
|
20
|
+
|
21
|
+
# Compute the CRC32 upfront. We do not use local footers for post-computing
|
22
|
+
# the CRC32, so you _do_ have to precompute it beforehand. Ideally, you
|
23
|
+
# would do that before storing the files you will be sending out later on.
|
24
|
+
crc32 = ZipKit::StreamCRC32.from_io(f)
|
25
|
+
f.rewind
|
26
|
+
|
27
|
+
# Compute the size of the download, so that a
|
28
|
+
# real Content-Length header can be sent. Also, if your download
|
29
|
+
# stops at some point, the downloading browser will be able to tell
|
30
|
+
# the user that the download stalled or was aborted in-flight.
|
31
|
+
# Note that using the size estimator here does _not_ read or compress
|
32
|
+
# your original file, so it is very fast.
|
33
|
+
size = ZipKit::SizeEstimator.estimate { |ar|
|
34
|
+
ar.add_stored_entry(filename, f.size)
|
35
|
+
}
|
36
|
+
|
37
|
+
# Create a suitable Rack response body, that will support each(),
|
38
|
+
# close() and all the other methods. We can then return it up the stack.
|
39
|
+
zip_response_body = ZipKit::OutputEnumerator.new do |zip|
|
40
|
+
# We are adding only one file to the ZIP here, but you could do that
|
41
|
+
# with an arbitrary number of files of course.
|
42
|
+
zip.add_stored_entry(filename: filename, size: f.size, crc32: crc32)
|
43
|
+
# Write the contents of the file. It is stored, so the writes go
|
44
|
+
# directly to the Rack output, bypassing any RubyZip
|
45
|
+
# deflaters/compressors. In fact you are yielding the "blob" string
|
46
|
+
# here directly to the Rack server handler.
|
47
|
+
IO.copy_stream(f, zip)
|
48
|
+
ensure
|
49
|
+
f.close # Make sure the opened file we read from gets closed
|
50
|
+
end
|
51
|
+
|
52
|
+
# Add a Content-Disposition so that the download has a .zip extension
|
53
|
+
# (this will not work well with UTF-8 filenames on Windows, but hey!)
|
54
|
+
content_disposition = "attachment; filename=%<filename>s.zip" % {filename: filename}
|
55
|
+
|
56
|
+
# and return the response, adding the Content-Length we have computed earlier
|
57
|
+
[
|
58
|
+
200,
|
59
|
+
{"Content-Length" => size.to_s, "Content-Disposition" => content_disposition},
|
60
|
+
zip_response_body
|
61
|
+
]
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../lib/zip_kit"
|
4
|
+
|
5
|
+
# Any writable object can be used as a destination for the Streamer.
|
6
|
+
# For example, you can write to an S3 bucket. Newer versions of the S3 SDK
|
7
|
+
# support a method called `upload_stream` which allows streaming uploads. The
|
8
|
+
# SDK will split your streamed bytes into appropriately-sized multipart upload
|
9
|
+
# parts and PUT them onto S3.
|
10
|
+
bucket = Aws::S3::Bucket.new("mybucket")
|
11
|
+
obj = bucket.object("big.zip")
|
12
|
+
obj.upload_stream do |write_stream|
|
13
|
+
ZipKit::Streamer.open(write_stream) do |zip|
|
14
|
+
zip.write_file("large.csv") do |sink|
|
15
|
+
CSV(sink) do |csv|
|
16
|
+
csv << ["Line", "Item"]
|
17
|
+
20_000.times do |n|
|
18
|
+
csv << [n, "Item number #{n}"]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "zlib"
|
4
|
+
|
5
|
+
# Permits Deflate compression in independent blocks. The workflow is as follows:
|
6
|
+
#
|
7
|
+
# * Run every block to compress through deflate_chunk, remove the header,
|
8
|
+
# footer and adler32 from the result
|
9
|
+
# * Write out the compressed block bodies (the ones deflate_chunk returns)
|
10
|
+
# to your output, in sequence
|
11
|
+
# * Write out the footer (\03\00)
|
12
|
+
#
|
13
|
+
# The resulting stream is guaranteed to be handled properly by all zip
|
14
|
+
# unarchiving tools, including the BOMArchiveHelper/ArchiveUtility on OSX.
|
15
|
+
#
|
16
|
+
# You could also build a compressor for Rubyzip using this module quite easily,
|
17
|
+
# even though this is outside the scope of the library.
|
18
|
+
#
|
19
|
+
# When you deflate the chunks separately, you need to write the end marker
|
20
|
+
# yourself (using `write_terminator`).
|
21
|
+
# If you just want to deflate a large IO's contents, use
|
22
|
+
# `deflate_in_blocks_and_terminate` to have the end marker written out for you.
|
23
|
+
#
|
24
|
+
# Basic usage to compress a file in parts:
|
25
|
+
#
|
26
|
+
# source_file = File.open('12_gigs.bin', 'rb')
|
27
|
+
# compressed = Tempfile.new
|
28
|
+
# # Will not compress everything in memory, but do it per chunk to spare
|
29
|
+
# memory. `compressed`
|
30
|
+
# # will be written to at the end of each chunk.
|
31
|
+
# ZipKit::BlockDeflate.deflate_in_blocks_and_terminate(source_file,
|
32
|
+
# compressed)
|
33
|
+
#
|
34
|
+
# You can also do the same to parts that you will later concatenate together
|
35
|
+
# elsewhere, in that case you need to skip the end marker:
|
36
|
+
#
|
37
|
+
# compressed = Tempfile.new
|
38
|
+
# ZipKit::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb),
|
39
|
+
# compressed)
|
40
|
+
# ZipKit::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb),
|
41
|
+
# compressed)
|
42
|
+
# ZipKit::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb),
|
43
|
+
# compressed)
|
44
|
+
# ZipKit::BlockDeflate.write_terminator(compressed)
|
45
|
+
#
|
46
|
+
# You can also elect to just compress strings in memory (to splice them later):
|
47
|
+
#
|
48
|
+
# compressed_string = ZipKit::BlockDeflate.deflate_chunk(big_string)
|
49
|
+
|
50
|
+
class ZipKit::BlockDeflate
|
51
|
+
DEFAULT_BLOCKSIZE = 1_024 * 1024 * 5
|
52
|
+
END_MARKER = [3, 0].pack("C*")
|
53
|
+
# Zlib::NO_COMPRESSION..
|
54
|
+
VALID_COMPRESSIONS = (Zlib::DEFAULT_COMPRESSION..Zlib::BEST_COMPRESSION).to_a.freeze
|
55
|
+
# Write the end marker (\x3\x0) to the given IO.
|
56
|
+
#
|
57
|
+
# `output_io` can also be a {ZipKit::Streamer} to expedite ops.
|
58
|
+
#
|
59
|
+
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
60
|
+
# @return [Fixnum] number of bytes written to `output_io`
|
61
|
+
def self.write_terminator(output_io)
|
62
|
+
output_io << END_MARKER
|
63
|
+
END_MARKER.bytesize
|
64
|
+
end
|
65
|
+
|
66
|
+
# Compress a given binary string and flush the deflate stream at byte boundary.
|
67
|
+
# The returned string can be spliced into another deflate stream.
|
68
|
+
#
|
69
|
+
# @param bytes [String] Bytes to compress
|
70
|
+
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
71
|
+
# @return [String] compressed bytes
|
72
|
+
def self.deflate_chunk(bytes, level: Zlib::DEFAULT_COMPRESSION)
|
73
|
+
raise "Invalid Zlib compression level #{level}" unless VALID_COMPRESSIONS.include?(level)
|
74
|
+
z = Zlib::Deflate.new(level)
|
75
|
+
compressed_blob = z.deflate(bytes, Zlib::SYNC_FLUSH)
|
76
|
+
compressed_blob << z.finish
|
77
|
+
z.close
|
78
|
+
|
79
|
+
# Remove the header (2 bytes), the [3,0] end marker and the adler (4 bytes)
|
80
|
+
compressed_blob[2...-6]
|
81
|
+
end
|
82
|
+
|
83
|
+
# Compress the contents of input_io into output_io, in blocks
|
84
|
+
# of block_size. Aligns the parts so that they can be concatenated later.
|
85
|
+
# Writes deflate end marker (\x3\x0) into `output_io` as the final step, so
|
86
|
+
# the contents of `output_io` can be spliced verbatim into a ZIP archive.
|
87
|
+
#
|
88
|
+
# Once the write completes, no more parts for concatenation should be written to
|
89
|
+
# the same stream.
|
90
|
+
#
|
91
|
+
# `output_io` can also be a {ZipKit::Streamer} to expedite ops.
|
92
|
+
#
|
93
|
+
# @param input_io [IO] the stream to read from (should respond to `:read`)
|
94
|
+
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
95
|
+
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
96
|
+
# @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
97
|
+
# @return [Fixnum] number of bytes written to `output_io`
|
98
|
+
def self.deflate_in_blocks_and_terminate(input_io,
|
99
|
+
output_io,
|
100
|
+
level: Zlib::DEFAULT_COMPRESSION,
|
101
|
+
block_size: DEFAULT_BLOCKSIZE)
|
102
|
+
bytes_written = deflate_in_blocks(input_io, output_io, level: level, block_size: block_size)
|
103
|
+
bytes_written + write_terminator(output_io)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Compress the contents of input_io into output_io, in blocks
|
107
|
+
# of block_size. Align the parts so that they can be concatenated later.
|
108
|
+
# Will not write the deflate end marker (\x3\x0) so more parts can be written
|
109
|
+
# later and succesfully read back in provided the end marker wll be written.
|
110
|
+
#
|
111
|
+
# `output_io` can also be a {ZipKit::Streamer} to expedite ops.
|
112
|
+
#
|
113
|
+
# @param input_io [IO] the stream to read from (should respond to `:read`)
|
114
|
+
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
115
|
+
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
116
|
+
# @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
117
|
+
# @return [Fixnum] number of bytes written to `output_io`
|
118
|
+
def self.deflate_in_blocks(input_io,
|
119
|
+
output_io,
|
120
|
+
level: Zlib::DEFAULT_COMPRESSION,
|
121
|
+
block_size: DEFAULT_BLOCKSIZE)
|
122
|
+
bytes_written = 0
|
123
|
+
while (block = input_io.read(block_size))
|
124
|
+
deflated = deflate_chunk(block, level: level)
|
125
|
+
output_io << deflated
|
126
|
+
bytes_written += deflated.bytesize
|
127
|
+
end
|
128
|
+
bytes_written
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Acts as a converter between callers which send data to the `#<<` method (such as all the ZipKit
|
4
|
+
# writer methods, which push onto anything), and a given block. Every time `#<<` gets called on the BlockWrite,
|
5
|
+
# the block given to the constructor will be called with the same argument. ZipKit uses this object
|
6
|
+
# when integrating with Rack and in the OutputEnumerator. Normally you wouldn't need to use it manually but
|
7
|
+
# you always can. BlockWrite will also ensure the binary string encoding is forced onto any string
|
8
|
+
# that passes through it.
|
9
|
+
#
|
10
|
+
# For example, you can create a Rack response body like so:
|
11
|
+
#
|
12
|
+
# class MyRackResponse
|
13
|
+
# def each
|
14
|
+
# writer = ZipKit::BlockWrite.new {|chunk| yield(chunk) }
|
15
|
+
# writer << "Hello" << "world" << "!"
|
16
|
+
# end
|
17
|
+
# end
|
18
|
+
# [200, {}, MyRackResponse.new]
|
19
|
+
class ZipKit::BlockWrite
|
20
|
+
# Creates a new BlockWrite.
|
21
|
+
#
|
22
|
+
# @param block The block that will be called when this object receives the `<<` message
|
23
|
+
def initialize(&block)
|
24
|
+
@block = block
|
25
|
+
end
|
26
|
+
|
27
|
+
# Make sure those methods raise outright
|
28
|
+
%i[seek pos= to_s].each do |m|
|
29
|
+
define_method(m) do |*_args|
|
30
|
+
raise "#{m} not supported - this IO adapter is non-rewindable"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Sends a string through to the block stored in the BlockWrite.
|
35
|
+
#
|
36
|
+
# @param buf[String] the string to write. Note that a zero-length String
|
37
|
+
# will not be forwarded to the block, as it has special meaning when used
|
38
|
+
# with chunked encoding (it indicates the end of the stream).
|
39
|
+
# @return self
|
40
|
+
def <<(buf)
|
41
|
+
# Zero-size output has a special meaning when using chunked encoding
|
42
|
+
return if buf.nil? || buf.bytesize.zero?
|
43
|
+
|
44
|
+
@block.call(buf.b)
|
45
|
+
self
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rubocop: convention: Missing top-level class documentation comment.
|
4
|
+
class ZipKit::FileReader::InflatingReader
|
5
|
+
def initialize(from_io, compressed_data_size)
|
6
|
+
@io = from_io
|
7
|
+
@compressed_data_size = compressed_data_size
|
8
|
+
@already_read = 0
|
9
|
+
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
10
|
+
end
|
11
|
+
|
12
|
+
def extract(n_bytes = nil)
|
13
|
+
n_bytes ||= (@compressed_data_size - @already_read)
|
14
|
+
|
15
|
+
return if eof?
|
16
|
+
|
17
|
+
available = @compressed_data_size - @already_read
|
18
|
+
|
19
|
+
return if available.zero?
|
20
|
+
|
21
|
+
n_bytes = available if n_bytes > available
|
22
|
+
|
23
|
+
return "" if n_bytes.zero?
|
24
|
+
|
25
|
+
compressed_chunk = @io.read(n_bytes)
|
26
|
+
|
27
|
+
return if compressed_chunk.nil?
|
28
|
+
|
29
|
+
@already_read += compressed_chunk.bytesize
|
30
|
+
@zlib_inflater.inflate(compressed_chunk)
|
31
|
+
end
|
32
|
+
|
33
|
+
def eof?
|
34
|
+
@zlib_inflater.finished?
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rubocop: convention: Missing top-level class documentation comment.
|
4
|
+
class ZipKit::FileReader::StoredReader
|
5
|
+
def initialize(from_io, compressed_data_size)
|
6
|
+
@io = from_io
|
7
|
+
@compressed_data_size = compressed_data_size
|
8
|
+
@already_read = 0
|
9
|
+
end
|
10
|
+
|
11
|
+
def extract(n_bytes = nil)
|
12
|
+
n_bytes ||= (@compressed_data_size - @already_read)
|
13
|
+
|
14
|
+
return if eof?
|
15
|
+
|
16
|
+
available = @compressed_data_size - @already_read
|
17
|
+
|
18
|
+
return if available.zero?
|
19
|
+
|
20
|
+
n_bytes = available if n_bytes > available
|
21
|
+
|
22
|
+
return "" if n_bytes.zero?
|
23
|
+
|
24
|
+
compressed_chunk = @io.read(n_bytes)
|
25
|
+
|
26
|
+
return if compressed_chunk.nil?
|
27
|
+
|
28
|
+
@already_read += compressed_chunk.bytesize
|
29
|
+
compressed_chunk
|
30
|
+
end
|
31
|
+
|
32
|
+
def eof?
|
33
|
+
@already_read >= @compressed_data_size
|
34
|
+
end
|
35
|
+
end
|