zip_kit 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.codeclimate.yml +7 -0
- data/.document +5 -0
- data/.github/workflows/ci.yml +29 -0
- data/.gitignore +61 -0
- data/.rspec +1 -0
- data/.standard.yml +8 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +255 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/CONTRIBUTING.md +153 -0
- data/Gemfile +4 -0
- data/IMPLEMENTATION_DETAILS.md +97 -0
- data/LICENSE.txt +20 -0
- data/README.md +234 -0
- data/Rakefile +21 -0
- data/bench/buffered_crc32_bench.rb +109 -0
- data/examples/archive_size_estimate.rb +15 -0
- data/examples/config.ru +7 -0
- data/examples/deferred_write.rb +58 -0
- data/examples/parallel_compression_with_block_deflate.rb +86 -0
- data/examples/rack_application.rb +63 -0
- data/examples/s3_upload.rb +23 -0
- data/lib/zip_kit/block_deflate.rb +130 -0
- data/lib/zip_kit/block_write.rb +47 -0
- data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
- data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
- data/lib/zip_kit/file_reader.rb +740 -0
- data/lib/zip_kit/null_writer.rb +12 -0
- data/lib/zip_kit/output_enumerator.rb +150 -0
- data/lib/zip_kit/path_set.rb +163 -0
- data/lib/zip_kit/rack_chunked_body.rb +32 -0
- data/lib/zip_kit/rack_tempfile_body.rb +61 -0
- data/lib/zip_kit/rails_streaming.rb +37 -0
- data/lib/zip_kit/remote_io.rb +114 -0
- data/lib/zip_kit/remote_uncap.rb +22 -0
- data/lib/zip_kit/size_estimator.rb +84 -0
- data/lib/zip_kit/stream_crc32.rb +60 -0
- data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
- data/lib/zip_kit/streamer/entry.rb +37 -0
- data/lib/zip_kit/streamer/filler.rb +9 -0
- data/lib/zip_kit/streamer/heuristic.rb +68 -0
- data/lib/zip_kit/streamer/stored_writer.rb +39 -0
- data/lib/zip_kit/streamer/writable.rb +36 -0
- data/lib/zip_kit/streamer.rb +614 -0
- data/lib/zip_kit/uniquify_filename.rb +39 -0
- data/lib/zip_kit/version.rb +5 -0
- data/lib/zip_kit/write_and_tell.rb +40 -0
- data/lib/zip_kit/write_buffer.rb +71 -0
- data/lib/zip_kit/write_shovel.rb +22 -0
- data/lib/zip_kit/zip_writer.rb +436 -0
- data/lib/zip_kit.rb +24 -0
- data/zip_kit.gemspec +41 -0
- metadata +335 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../lib/zip_kit"
|
4
|
+
|
5
|
+
# An example of how you can create a Rack endpoint for your ZIP downloads.
|
6
|
+
# NEVER run this in production - it is a huge security risk.
|
7
|
+
# What this app will do is pick PATH_INFO (your request URL path)
|
8
|
+
# and grab a file located at this path on your filesystem. The file will then
|
9
|
+
# be added to a ZIP archive created completely programmatically. No data will
|
10
|
+
# be cached on disk and the contents of the ZIP file will _not_ be buffered in
|
11
|
+
# it's entirety before sending. Unless you use a buffering Rack server of
|
12
|
+
# course (WEBrick or Thin).
|
13
|
+
class ZipDownload
|
14
|
+
def call(env)
|
15
|
+
file_path = env["PATH_INFO"] # Should be the absolute path on the filesystem
|
16
|
+
|
17
|
+
# Open the file for binary reading
|
18
|
+
f = File.open(file_path, "rb")
|
19
|
+
filename = File.basename(file_path)
|
20
|
+
|
21
|
+
# Compute the CRC32 upfront. We do not use local footers for post-computing
|
22
|
+
# the CRC32, so you _do_ have to precompute it beforehand. Ideally, you
|
23
|
+
# would do that before storing the files you will be sending out later on.
|
24
|
+
crc32 = ZipKit::StreamCRC32.from_io(f)
|
25
|
+
f.rewind
|
26
|
+
|
27
|
+
# Compute the size of the download, so that a
|
28
|
+
# real Content-Length header can be sent. Also, if your download
|
29
|
+
# stops at some point, the downloading browser will be able to tell
|
30
|
+
# the user that the download stalled or was aborted in-flight.
|
31
|
+
# Note that using the size estimator here does _not_ read or compress
|
32
|
+
# your original file, so it is very fast.
|
33
|
+
size = ZipKit::SizeEstimator.estimate { |ar|
|
34
|
+
ar.add_stored_entry(filename, f.size)
|
35
|
+
}
|
36
|
+
|
37
|
+
# Create a suitable Rack response body, that will support each(),
|
38
|
+
# close() and all the other methods. We can then return it up the stack.
|
39
|
+
zip_response_body = ZipKit::OutputEnumerator.new do |zip|
|
40
|
+
# We are adding only one file to the ZIP here, but you could do that
|
41
|
+
# with an arbitrary number of files of course.
|
42
|
+
zip.add_stored_entry(filename: filename, size: f.size, crc32: crc32)
|
43
|
+
# Write the contents of the file. It is stored, so the writes go
|
44
|
+
# directly to the Rack output, bypassing any RubyZip
|
45
|
+
# deflaters/compressors. In fact you are yielding the "blob" string
|
46
|
+
# here directly to the Rack server handler.
|
47
|
+
IO.copy_stream(f, zip)
|
48
|
+
ensure
|
49
|
+
f.close # Make sure the opened file we read from gets closed
|
50
|
+
end
|
51
|
+
|
52
|
+
# Add a Content-Disposition so that the download has a .zip extension
|
53
|
+
# (this will not work well with UTF-8 filenames on Windows, but hey!)
|
54
|
+
content_disposition = "attachment; filename=%<filename>s.zip" % {filename: filename}
|
55
|
+
|
56
|
+
# and return the response, adding the Content-Length we have computed earlier
|
57
|
+
[
|
58
|
+
200,
|
59
|
+
{"Content-Length" => size.to_s, "Content-Disposition" => content_disposition},
|
60
|
+
zip_response_body
|
61
|
+
]
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../lib/zip_kit"
|
4
|
+
|
5
|
+
# Any writable object can be used as a destination for the Streamer.
|
6
|
+
# For example, you can write to an S3 bucket. Newer versions of the S3 SDK
|
7
|
+
# support a method called `upload_stream` which allows streaming uploads. The
|
8
|
+
# SDK will split your streamed bytes into appropriately-sized multipart upload
|
9
|
+
# parts and PUT them onto S3.
|
10
|
+
bucket = Aws::S3::Bucket.new("mybucket")
|
11
|
+
obj = bucket.object("big.zip")
|
12
|
+
obj.upload_stream do |write_stream|
|
13
|
+
ZipKit::Streamer.open(write_stream) do |zip|
|
14
|
+
zip.write_file("large.csv") do |sink|
|
15
|
+
CSV(sink) do |csv|
|
16
|
+
csv << ["Line", "Item"]
|
17
|
+
20_000.times do |n|
|
18
|
+
csv << [n, "Item number #{n}"]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "zlib"
|
4
|
+
|
5
|
+
# Permits Deflate compression in independent blocks. The workflow is as follows:
|
6
|
+
#
|
7
|
+
# * Run every block to compress through deflate_chunk, remove the header,
|
8
|
+
# footer and adler32 from the result
|
9
|
+
# * Write out the compressed block bodies (the ones deflate_chunk returns)
|
10
|
+
# to your output, in sequence
|
11
|
+
# * Write out the footer (\03\00)
|
12
|
+
#
|
13
|
+
# The resulting stream is guaranteed to be handled properly by all zip
|
14
|
+
# unarchiving tools, including the BOMArchiveHelper/ArchiveUtility on OSX.
|
15
|
+
#
|
16
|
+
# You could also build a compressor for Rubyzip using this module quite easily,
|
17
|
+
# even though this is outside the scope of the library.
|
18
|
+
#
|
19
|
+
# When you deflate the chunks separately, you need to write the end marker
|
20
|
+
# yourself (using `write_terminator`).
|
21
|
+
# If you just want to deflate a large IO's contents, use
|
22
|
+
# `deflate_in_blocks_and_terminate` to have the end marker written out for you.
|
23
|
+
#
|
24
|
+
# Basic usage to compress a file in parts:
|
25
|
+
#
|
26
|
+
# source_file = File.open('12_gigs.bin', 'rb')
|
27
|
+
# compressed = Tempfile.new
|
28
|
+
# # Will not compress everything in memory, but do it per chunk to spare
|
29
|
+
# memory. `compressed`
|
30
|
+
# # will be written to at the end of each chunk.
|
31
|
+
# ZipKit::BlockDeflate.deflate_in_blocks_and_terminate(source_file,
|
32
|
+
# compressed)
|
33
|
+
#
|
34
|
+
# You can also do the same to parts that you will later concatenate together
|
35
|
+
# elsewhere, in that case you need to skip the end marker:
|
36
|
+
#
|
37
|
+
# compressed = Tempfile.new
|
38
|
+
# ZipKit::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb),
|
39
|
+
# compressed)
|
40
|
+
# ZipKit::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb),
|
41
|
+
# compressed)
|
42
|
+
# ZipKit::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb),
|
43
|
+
# compressed)
|
44
|
+
# ZipKit::BlockDeflate.write_terminator(compressed)
|
45
|
+
#
|
46
|
+
# You can also elect to just compress strings in memory (to splice them later):
|
47
|
+
#
|
48
|
+
# compressed_string = ZipKit::BlockDeflate.deflate_chunk(big_string)
|
49
|
+
|
50
|
+
class ZipKit::BlockDeflate
|
51
|
+
DEFAULT_BLOCKSIZE = 1_024 * 1024 * 5
|
52
|
+
END_MARKER = [3, 0].pack("C*")
|
53
|
+
# Zlib::NO_COMPRESSION..
|
54
|
+
VALID_COMPRESSIONS = (Zlib::DEFAULT_COMPRESSION..Zlib::BEST_COMPRESSION).to_a.freeze
|
55
|
+
# Write the end marker (\x3\x0) to the given IO.
|
56
|
+
#
|
57
|
+
# `output_io` can also be a {ZipKit::Streamer} to expedite ops.
|
58
|
+
#
|
59
|
+
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
60
|
+
# @return [Fixnum] number of bytes written to `output_io`
|
61
|
+
def self.write_terminator(output_io)
|
62
|
+
output_io << END_MARKER
|
63
|
+
END_MARKER.bytesize
|
64
|
+
end
|
65
|
+
|
66
|
+
# Compress a given binary string and flush the deflate stream at byte boundary.
|
67
|
+
# The returned string can be spliced into another deflate stream.
|
68
|
+
#
|
69
|
+
# @param bytes [String] Bytes to compress
|
70
|
+
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
71
|
+
# @return [String] compressed bytes
|
72
|
+
def self.deflate_chunk(bytes, level: Zlib::DEFAULT_COMPRESSION)
|
73
|
+
raise "Invalid Zlib compression level #{level}" unless VALID_COMPRESSIONS.include?(level)
|
74
|
+
z = Zlib::Deflate.new(level)
|
75
|
+
compressed_blob = z.deflate(bytes, Zlib::SYNC_FLUSH)
|
76
|
+
compressed_blob << z.finish
|
77
|
+
z.close
|
78
|
+
|
79
|
+
# Remove the header (2 bytes), the [3,0] end marker and the adler (4 bytes)
|
80
|
+
compressed_blob[2...-6]
|
81
|
+
end
|
82
|
+
|
83
|
+
# Compress the contents of input_io into output_io, in blocks
|
84
|
+
# of block_size. Aligns the parts so that they can be concatenated later.
|
85
|
+
# Writes deflate end marker (\x3\x0) into `output_io` as the final step, so
|
86
|
+
# the contents of `output_io` can be spliced verbatim into a ZIP archive.
|
87
|
+
#
|
88
|
+
# Once the write completes, no more parts for concatenation should be written to
|
89
|
+
# the same stream.
|
90
|
+
#
|
91
|
+
# `output_io` can also be a {ZipKit::Streamer} to expedite ops.
|
92
|
+
#
|
93
|
+
# @param input_io [IO] the stream to read from (should respond to `:read`)
|
94
|
+
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
95
|
+
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
96
|
+
# @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
97
|
+
# @return [Fixnum] number of bytes written to `output_io`
|
98
|
+
def self.deflate_in_blocks_and_terminate(input_io,
|
99
|
+
output_io,
|
100
|
+
level: Zlib::DEFAULT_COMPRESSION,
|
101
|
+
block_size: DEFAULT_BLOCKSIZE)
|
102
|
+
bytes_written = deflate_in_blocks(input_io, output_io, level: level, block_size: block_size)
|
103
|
+
bytes_written + write_terminator(output_io)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Compress the contents of input_io into output_io, in blocks
|
107
|
+
# of block_size. Align the parts so that they can be concatenated later.
|
108
|
+
# Will not write the deflate end marker (\x3\x0) so more parts can be written
|
109
|
+
# later and succesfully read back in provided the end marker wll be written.
|
110
|
+
#
|
111
|
+
# `output_io` can also be a {ZipKit::Streamer} to expedite ops.
|
112
|
+
#
|
113
|
+
# @param input_io [IO] the stream to read from (should respond to `:read`)
|
114
|
+
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
115
|
+
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
116
|
+
# @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
117
|
+
# @return [Fixnum] number of bytes written to `output_io`
|
118
|
+
def self.deflate_in_blocks(input_io,
|
119
|
+
output_io,
|
120
|
+
level: Zlib::DEFAULT_COMPRESSION,
|
121
|
+
block_size: DEFAULT_BLOCKSIZE)
|
122
|
+
bytes_written = 0
|
123
|
+
while (block = input_io.read(block_size))
|
124
|
+
deflated = deflate_chunk(block, level: level)
|
125
|
+
output_io << deflated
|
126
|
+
bytes_written += deflated.bytesize
|
127
|
+
end
|
128
|
+
bytes_written
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Acts as a converter between callers which send data to the `#<<` method (such as all the ZipKit
|
4
|
+
# writer methods, which push onto anything), and a given block. Every time `#<<` gets called on the BlockWrite,
|
5
|
+
# the block given to the constructor will be called with the same argument. ZipKit uses this object
|
6
|
+
# when integrating with Rack and in the OutputEnumerator. Normally you wouldn't need to use it manually but
|
7
|
+
# you always can. BlockWrite will also ensure the binary string encoding is forced onto any string
|
8
|
+
# that passes through it.
|
9
|
+
#
|
10
|
+
# For example, you can create a Rack response body like so:
|
11
|
+
#
|
12
|
+
# class MyRackResponse
|
13
|
+
# def each
|
14
|
+
# writer = ZipKit::BlockWrite.new {|chunk| yield(chunk) }
|
15
|
+
# writer << "Hello" << "world" << "!"
|
16
|
+
# end
|
17
|
+
# end
|
18
|
+
# [200, {}, MyRackResponse.new]
|
19
|
+
class ZipKit::BlockWrite
|
20
|
+
# Creates a new BlockWrite.
|
21
|
+
#
|
22
|
+
# @param block The block that will be called when this object receives the `<<` message
|
23
|
+
def initialize(&block)
|
24
|
+
@block = block
|
25
|
+
end
|
26
|
+
|
27
|
+
# Make sure those methods raise outright
|
28
|
+
%i[seek pos= to_s].each do |m|
|
29
|
+
define_method(m) do |*_args|
|
30
|
+
raise "#{m} not supported - this IO adapter is non-rewindable"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Sends a string through to the block stored in the BlockWrite.
|
35
|
+
#
|
36
|
+
# @param buf[String] the string to write. Note that a zero-length String
|
37
|
+
# will not be forwarded to the block, as it has special meaning when used
|
38
|
+
# with chunked encoding (it indicates the end of the stream).
|
39
|
+
# @return self
|
40
|
+
def <<(buf)
|
41
|
+
# Zero-size output has a special meaning when using chunked encoding
|
42
|
+
return if buf.nil? || buf.bytesize.zero?
|
43
|
+
|
44
|
+
@block.call(buf.b)
|
45
|
+
self
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rubocop: convention: Missing top-level class documentation comment.
|
4
|
+
class ZipKit::FileReader::InflatingReader
|
5
|
+
def initialize(from_io, compressed_data_size)
|
6
|
+
@io = from_io
|
7
|
+
@compressed_data_size = compressed_data_size
|
8
|
+
@already_read = 0
|
9
|
+
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
10
|
+
end
|
11
|
+
|
12
|
+
def extract(n_bytes = nil)
|
13
|
+
n_bytes ||= (@compressed_data_size - @already_read)
|
14
|
+
|
15
|
+
return if eof?
|
16
|
+
|
17
|
+
available = @compressed_data_size - @already_read
|
18
|
+
|
19
|
+
return if available.zero?
|
20
|
+
|
21
|
+
n_bytes = available if n_bytes > available
|
22
|
+
|
23
|
+
return "" if n_bytes.zero?
|
24
|
+
|
25
|
+
compressed_chunk = @io.read(n_bytes)
|
26
|
+
|
27
|
+
return if compressed_chunk.nil?
|
28
|
+
|
29
|
+
@already_read += compressed_chunk.bytesize
|
30
|
+
@zlib_inflater.inflate(compressed_chunk)
|
31
|
+
end
|
32
|
+
|
33
|
+
def eof?
|
34
|
+
@zlib_inflater.finished?
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rubocop: convention: Missing top-level class documentation comment.
|
4
|
+
class ZipKit::FileReader::StoredReader
|
5
|
+
def initialize(from_io, compressed_data_size)
|
6
|
+
@io = from_io
|
7
|
+
@compressed_data_size = compressed_data_size
|
8
|
+
@already_read = 0
|
9
|
+
end
|
10
|
+
|
11
|
+
def extract(n_bytes = nil)
|
12
|
+
n_bytes ||= (@compressed_data_size - @already_read)
|
13
|
+
|
14
|
+
return if eof?
|
15
|
+
|
16
|
+
available = @compressed_data_size - @already_read
|
17
|
+
|
18
|
+
return if available.zero?
|
19
|
+
|
20
|
+
n_bytes = available if n_bytes > available
|
21
|
+
|
22
|
+
return "" if n_bytes.zero?
|
23
|
+
|
24
|
+
compressed_chunk = @io.read(n_bytes)
|
25
|
+
|
26
|
+
return if compressed_chunk.nil?
|
27
|
+
|
28
|
+
@already_read += compressed_chunk.bytesize
|
29
|
+
compressed_chunk
|
30
|
+
end
|
31
|
+
|
32
|
+
def eof?
|
33
|
+
@already_read >= @compressed_data_size
|
34
|
+
end
|
35
|
+
end
|