zip_kit 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +7 -0
  3. data/.document +5 -0
  4. data/.github/workflows/ci.yml +29 -0
  5. data/.gitignore +61 -0
  6. data/.rspec +1 -0
  7. data/.standard.yml +8 -0
  8. data/.yardopts +1 -0
  9. data/CHANGELOG.md +255 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +153 -0
  12. data/Gemfile +4 -0
  13. data/IMPLEMENTATION_DETAILS.md +97 -0
  14. data/LICENSE.txt +20 -0
  15. data/README.md +234 -0
  16. data/Rakefile +21 -0
  17. data/bench/buffered_crc32_bench.rb +109 -0
  18. data/examples/archive_size_estimate.rb +15 -0
  19. data/examples/config.ru +7 -0
  20. data/examples/deferred_write.rb +58 -0
  21. data/examples/parallel_compression_with_block_deflate.rb +86 -0
  22. data/examples/rack_application.rb +63 -0
  23. data/examples/s3_upload.rb +23 -0
  24. data/lib/zip_kit/block_deflate.rb +130 -0
  25. data/lib/zip_kit/block_write.rb +47 -0
  26. data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
  27. data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
  28. data/lib/zip_kit/file_reader.rb +740 -0
  29. data/lib/zip_kit/null_writer.rb +12 -0
  30. data/lib/zip_kit/output_enumerator.rb +150 -0
  31. data/lib/zip_kit/path_set.rb +163 -0
  32. data/lib/zip_kit/rack_chunked_body.rb +32 -0
  33. data/lib/zip_kit/rack_tempfile_body.rb +61 -0
  34. data/lib/zip_kit/rails_streaming.rb +37 -0
  35. data/lib/zip_kit/remote_io.rb +114 -0
  36. data/lib/zip_kit/remote_uncap.rb +22 -0
  37. data/lib/zip_kit/size_estimator.rb +84 -0
  38. data/lib/zip_kit/stream_crc32.rb +60 -0
  39. data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
  40. data/lib/zip_kit/streamer/entry.rb +37 -0
  41. data/lib/zip_kit/streamer/filler.rb +9 -0
  42. data/lib/zip_kit/streamer/heuristic.rb +68 -0
  43. data/lib/zip_kit/streamer/stored_writer.rb +39 -0
  44. data/lib/zip_kit/streamer/writable.rb +36 -0
  45. data/lib/zip_kit/streamer.rb +614 -0
  46. data/lib/zip_kit/uniquify_filename.rb +39 -0
  47. data/lib/zip_kit/version.rb +5 -0
  48. data/lib/zip_kit/write_and_tell.rb +40 -0
  49. data/lib/zip_kit/write_buffer.rb +71 -0
  50. data/lib/zip_kit/write_shovel.rb +22 -0
  51. data/lib/zip_kit/zip_writer.rb +436 -0
  52. data/lib/zip_kit.rb +24 -0
  53. data/zip_kit.gemspec +41 -0
  54. metadata +335 -0
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../lib/zip_kit"
4
+
5
+ # An example of how you can create a Rack endpoint for your ZIP downloads.
6
+ # NEVER run this in production - it is a huge security risk.
7
+ # What this app will do is pick PATH_INFO (your request URL path)
8
+ # and grab a file located at this path on your filesystem. The file will then
9
+ # be added to a ZIP archive created completely programmatically. No data will
10
+ # be cached on disk and the contents of the ZIP file will _not_ be buffered in
11
+ # it's entirety before sending. Unless you use a buffering Rack server of
12
+ # course (WEBrick or Thin).
13
+ class ZipDownload
14
+ def call(env)
15
+ file_path = env["PATH_INFO"] # Should be the absolute path on the filesystem
16
+
17
+ # Open the file for binary reading
18
+ f = File.open(file_path, "rb")
19
+ filename = File.basename(file_path)
20
+
21
+ # Compute the CRC32 upfront. We do not use local footers for post-computing
22
+ # the CRC32, so you _do_ have to precompute it beforehand. Ideally, you
23
+ # would do that before storing the files you will be sending out later on.
24
+ crc32 = ZipKit::StreamCRC32.from_io(f)
25
+ f.rewind
26
+
27
+ # Compute the size of the download, so that a
28
+ # real Content-Length header can be sent. Also, if your download
29
+ # stops at some point, the downloading browser will be able to tell
30
+ # the user that the download stalled or was aborted in-flight.
31
+ # Note that using the size estimator here does _not_ read or compress
32
+ # your original file, so it is very fast.
33
+ size = ZipKit::SizeEstimator.estimate { |ar|
34
+ ar.add_stored_entry(filename, f.size)
35
+ }
36
+
37
+ # Create a suitable Rack response body, that will support each(),
38
+ # close() and all the other methods. We can then return it up the stack.
39
+ zip_response_body = ZipKit::OutputEnumerator.new do |zip|
40
+ # We are adding only one file to the ZIP here, but you could do that
41
+ # with an arbitrary number of files of course.
42
+ zip.add_stored_entry(filename: filename, size: f.size, crc32: crc32)
43
+ # Write the contents of the file. It is stored, so the writes go
44
+ # directly to the Rack output, bypassing any RubyZip
45
+ # deflaters/compressors. In fact you are yielding the "blob" string
46
+ # here directly to the Rack server handler.
47
+ IO.copy_stream(f, zip)
48
+ ensure
49
+ f.close # Make sure the opened file we read from gets closed
50
+ end
51
+
52
+ # Add a Content-Disposition so that the download has a .zip extension
53
+ # (this will not work well with UTF-8 filenames on Windows, but hey!)
54
+ content_disposition = "attachment; filename=%<filename>s.zip" % {filename: filename}
55
+
56
+ # and return the response, adding the Content-Length we have computed earlier
57
+ [
58
+ 200,
59
+ {"Content-Length" => size.to_s, "Content-Disposition" => content_disposition},
60
+ zip_response_body
61
+ ]
62
+ end
63
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../lib/zip_kit"
4
+
5
+ # Any writable object can be used as a destination for the Streamer.
6
+ # For example, you can write to an S3 bucket. Newer versions of the S3 SDK
7
+ # support a method called `upload_stream` which allows streaming uploads. The
8
+ # SDK will split your streamed bytes into appropriately-sized multipart upload
9
+ # parts and PUT them onto S3.
10
+ bucket = Aws::S3::Bucket.new("mybucket")
11
+ obj = bucket.object("big.zip")
12
+ obj.upload_stream do |write_stream|
13
+ ZipKit::Streamer.open(write_stream) do |zip|
14
+ zip.write_file("large.csv") do |sink|
15
+ CSV(sink) do |csv|
16
+ csv << ["Line", "Item"]
17
+ 20_000.times do |n|
18
+ csv << [n, "Item number #{n}"]
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+
5
+ # Permits Deflate compression in independent blocks. The workflow is as follows:
6
+ #
7
+ # * Run every block to compress through deflate_chunk, remove the header,
8
+ # footer and adler32 from the result
9
+ # * Write out the compressed block bodies (the ones deflate_chunk returns)
10
+ # to your output, in sequence
11
+ # * Write out the footer (\03\00)
12
+ #
13
+ # The resulting stream is guaranteed to be handled properly by all zip
14
+ # unarchiving tools, including the BOMArchiveHelper/ArchiveUtility on OSX.
15
+ #
16
+ # You could also build a compressor for Rubyzip using this module quite easily,
17
+ # even though this is outside the scope of the library.
18
+ #
19
+ # When you deflate the chunks separately, you need to write the end marker
20
+ # yourself (using `write_terminator`).
21
+ # If you just want to deflate a large IO's contents, use
22
+ # `deflate_in_blocks_and_terminate` to have the end marker written out for you.
23
+ #
24
+ # Basic usage to compress a file in parts:
25
+ #
26
+ # source_file = File.open('12_gigs.bin', 'rb')
27
+ # compressed = Tempfile.new
28
+ # # Will not compress everything in memory, but do it per chunk to spare
29
+ # memory. `compressed`
30
+ # # will be written to at the end of each chunk.
31
+ # ZipKit::BlockDeflate.deflate_in_blocks_and_terminate(source_file,
32
+ # compressed)
33
+ #
34
+ # You can also do the same to parts that you will later concatenate together
35
+ # elsewhere, in that case you need to skip the end marker:
36
+ #
37
+ # compressed = Tempfile.new
38
+ # ZipKit::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb),
39
+ # compressed)
40
+ # ZipKit::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb),
41
+ # compressed)
42
+ # ZipKit::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb),
43
+ # compressed)
44
+ # ZipKit::BlockDeflate.write_terminator(compressed)
45
+ #
46
+ # You can also elect to just compress strings in memory (to splice them later):
47
+ #
48
+ # compressed_string = ZipKit::BlockDeflate.deflate_chunk(big_string)
49
+
50
+ class ZipKit::BlockDeflate
51
+ DEFAULT_BLOCKSIZE = 1_024 * 1024 * 5
52
+ END_MARKER = [3, 0].pack("C*")
53
+ # Zlib::NO_COMPRESSION..
54
+ VALID_COMPRESSIONS = (Zlib::DEFAULT_COMPRESSION..Zlib::BEST_COMPRESSION).to_a.freeze
55
+ # Write the end marker (\x3\x0) to the given IO.
56
+ #
57
+ # `output_io` can also be a {ZipKit::Streamer} to expedite ops.
58
+ #
59
+ # @param output_io [IO] the stream to write to (should respond to `:<<`)
60
+ # @return [Fixnum] number of bytes written to `output_io`
61
+ def self.write_terminator(output_io)
62
+ output_io << END_MARKER
63
+ END_MARKER.bytesize
64
+ end
65
+
66
+ # Compress a given binary string and flush the deflate stream at byte boundary.
67
+ # The returned string can be spliced into another deflate stream.
68
+ #
69
+ # @param bytes [String] Bytes to compress
70
+ # @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
71
+ # @return [String] compressed bytes
72
+ def self.deflate_chunk(bytes, level: Zlib::DEFAULT_COMPRESSION)
73
+ raise "Invalid Zlib compression level #{level}" unless VALID_COMPRESSIONS.include?(level)
74
+ z = Zlib::Deflate.new(level)
75
+ compressed_blob = z.deflate(bytes, Zlib::SYNC_FLUSH)
76
+ compressed_blob << z.finish
77
+ z.close
78
+
79
+ # Remove the header (2 bytes), the [3,0] end marker and the adler (4 bytes)
80
+ compressed_blob[2...-6]
81
+ end
82
+
83
+ # Compress the contents of input_io into output_io, in blocks
84
+ # of block_size. Aligns the parts so that they can be concatenated later.
85
+ # Writes deflate end marker (\x3\x0) into `output_io` as the final step, so
86
+ # the contents of `output_io` can be spliced verbatim into a ZIP archive.
87
+ #
88
+ # Once the write completes, no more parts for concatenation should be written to
89
+ # the same stream.
90
+ #
91
+ # `output_io` can also be a {ZipKit::Streamer} to expedite ops.
92
+ #
93
+ # @param input_io [IO] the stream to read from (should respond to `:read`)
94
+ # @param output_io [IO] the stream to write to (should respond to `:<<`)
95
+ # @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
96
+ # @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
97
+ # @return [Fixnum] number of bytes written to `output_io`
98
+ def self.deflate_in_blocks_and_terminate(input_io,
99
+ output_io,
100
+ level: Zlib::DEFAULT_COMPRESSION,
101
+ block_size: DEFAULT_BLOCKSIZE)
102
+ bytes_written = deflate_in_blocks(input_io, output_io, level: level, block_size: block_size)
103
+ bytes_written + write_terminator(output_io)
104
+ end
105
+
106
+ # Compress the contents of input_io into output_io, in blocks
107
+ # of block_size. Align the parts so that they can be concatenated later.
108
+ # Will not write the deflate end marker (\x3\x0) so more parts can be written
109
+ # later and succesfully read back in provided the end marker wll be written.
110
+ #
111
+ # `output_io` can also be a {ZipKit::Streamer} to expedite ops.
112
+ #
113
+ # @param input_io [IO] the stream to read from (should respond to `:read`)
114
+ # @param output_io [IO] the stream to write to (should respond to `:<<`)
115
+ # @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
116
+ # @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
117
+ # @return [Fixnum] number of bytes written to `output_io`
118
+ def self.deflate_in_blocks(input_io,
119
+ output_io,
120
+ level: Zlib::DEFAULT_COMPRESSION,
121
+ block_size: DEFAULT_BLOCKSIZE)
122
+ bytes_written = 0
123
+ while (block = input_io.read(block_size))
124
+ deflated = deflate_chunk(block, level: level)
125
+ output_io << deflated
126
+ bytes_written += deflated.bytesize
127
+ end
128
+ bytes_written
129
+ end
130
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Acts as a converter between callers which send data to the `#<<` method (such as all the ZipKit
4
+ # writer methods, which push onto anything), and a given block. Every time `#<<` gets called on the BlockWrite,
5
+ # the block given to the constructor will be called with the same argument. ZipKit uses this object
6
+ # when integrating with Rack and in the OutputEnumerator. Normally you wouldn't need to use it manually but
7
+ # you always can. BlockWrite will also ensure the binary string encoding is forced onto any string
8
+ # that passes through it.
9
+ #
10
+ # For example, you can create a Rack response body like so:
11
+ #
12
+ # class MyRackResponse
13
+ # def each
14
+ # writer = ZipKit::BlockWrite.new {|chunk| yield(chunk) }
15
+ # writer << "Hello" << "world" << "!"
16
+ # end
17
+ # end
18
+ # [200, {}, MyRackResponse.new]
19
+ class ZipKit::BlockWrite
20
+ # Creates a new BlockWrite.
21
+ #
22
+ # @param block The block that will be called when this object receives the `<<` message
23
+ def initialize(&block)
24
+ @block = block
25
+ end
26
+
27
+ # Make sure those methods raise outright
28
+ %i[seek pos= to_s].each do |m|
29
+ define_method(m) do |*_args|
30
+ raise "#{m} not supported - this IO adapter is non-rewindable"
31
+ end
32
+ end
33
+
34
+ # Sends a string through to the block stored in the BlockWrite.
35
+ #
36
+ # @param buf[String] the string to write. Note that a zero-length String
37
+ # will not be forwarded to the block, as it has special meaning when used
38
+ # with chunked encoding (it indicates the end of the stream).
39
+ # @return self
40
+ def <<(buf)
41
+ # Zero-size output has a special meaning when using chunked encoding
42
+ return if buf.nil? || buf.bytesize.zero?
43
+
44
+ @block.call(buf.b)
45
+ self
46
+ end
47
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rubocop: convention: Missing top-level class documentation comment.
4
+ class ZipKit::FileReader::InflatingReader
5
+ def initialize(from_io, compressed_data_size)
6
+ @io = from_io
7
+ @compressed_data_size = compressed_data_size
8
+ @already_read = 0
9
+ @zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
10
+ end
11
+
12
+ def extract(n_bytes = nil)
13
+ n_bytes ||= (@compressed_data_size - @already_read)
14
+
15
+ return if eof?
16
+
17
+ available = @compressed_data_size - @already_read
18
+
19
+ return if available.zero?
20
+
21
+ n_bytes = available if n_bytes > available
22
+
23
+ return "" if n_bytes.zero?
24
+
25
+ compressed_chunk = @io.read(n_bytes)
26
+
27
+ return if compressed_chunk.nil?
28
+
29
+ @already_read += compressed_chunk.bytesize
30
+ @zlib_inflater.inflate(compressed_chunk)
31
+ end
32
+
33
+ def eof?
34
+ @zlib_inflater.finished?
35
+ end
36
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rubocop: convention: Missing top-level class documentation comment.
4
+ class ZipKit::FileReader::StoredReader
5
+ def initialize(from_io, compressed_data_size)
6
+ @io = from_io
7
+ @compressed_data_size = compressed_data_size
8
+ @already_read = 0
9
+ end
10
+
11
+ def extract(n_bytes = nil)
12
+ n_bytes ||= (@compressed_data_size - @already_read)
13
+
14
+ return if eof?
15
+
16
+ available = @compressed_data_size - @already_read
17
+
18
+ return if available.zero?
19
+
20
+ n_bytes = available if n_bytes > available
21
+
22
+ return "" if n_bytes.zero?
23
+
24
+ compressed_chunk = @io.read(n_bytes)
25
+
26
+ return if compressed_chunk.nil?
27
+
28
+ @already_read += compressed_chunk.bytesize
29
+ compressed_chunk
30
+ end
31
+
32
+ def eof?
33
+ @already_read >= @compressed_data_size
34
+ end
35
+ end