zip_kit 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +7 -0
  3. data/.document +5 -0
  4. data/.github/workflows/ci.yml +29 -0
  5. data/.gitignore +61 -0
  6. data/.rspec +1 -0
  7. data/.standard.yml +8 -0
  8. data/.yardopts +1 -0
  9. data/CHANGELOG.md +255 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +153 -0
  12. data/Gemfile +4 -0
  13. data/IMPLEMENTATION_DETAILS.md +97 -0
  14. data/LICENSE.txt +20 -0
  15. data/README.md +234 -0
  16. data/Rakefile +21 -0
  17. data/bench/buffered_crc32_bench.rb +109 -0
  18. data/examples/archive_size_estimate.rb +15 -0
  19. data/examples/config.ru +7 -0
  20. data/examples/deferred_write.rb +58 -0
  21. data/examples/parallel_compression_with_block_deflate.rb +86 -0
  22. data/examples/rack_application.rb +63 -0
  23. data/examples/s3_upload.rb +23 -0
  24. data/lib/zip_kit/block_deflate.rb +130 -0
  25. data/lib/zip_kit/block_write.rb +47 -0
  26. data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
  27. data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
  28. data/lib/zip_kit/file_reader.rb +740 -0
  29. data/lib/zip_kit/null_writer.rb +12 -0
  30. data/lib/zip_kit/output_enumerator.rb +150 -0
  31. data/lib/zip_kit/path_set.rb +163 -0
  32. data/lib/zip_kit/rack_chunked_body.rb +32 -0
  33. data/lib/zip_kit/rack_tempfile_body.rb +61 -0
  34. data/lib/zip_kit/rails_streaming.rb +37 -0
  35. data/lib/zip_kit/remote_io.rb +114 -0
  36. data/lib/zip_kit/remote_uncap.rb +22 -0
  37. data/lib/zip_kit/size_estimator.rb +84 -0
  38. data/lib/zip_kit/stream_crc32.rb +60 -0
  39. data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
  40. data/lib/zip_kit/streamer/entry.rb +37 -0
  41. data/lib/zip_kit/streamer/filler.rb +9 -0
  42. data/lib/zip_kit/streamer/heuristic.rb +68 -0
  43. data/lib/zip_kit/streamer/stored_writer.rb +39 -0
  44. data/lib/zip_kit/streamer/writable.rb +36 -0
  45. data/lib/zip_kit/streamer.rb +614 -0
  46. data/lib/zip_kit/uniquify_filename.rb +39 -0
  47. data/lib/zip_kit/version.rb +5 -0
  48. data/lib/zip_kit/write_and_tell.rb +40 -0
  49. data/lib/zip_kit/write_buffer.rb +71 -0
  50. data/lib/zip_kit/write_shovel.rb +22 -0
  51. data/lib/zip_kit/zip_writer.rb +436 -0
  52. data/lib/zip_kit.rb +24 -0
  53. data/zip_kit.gemspec +41 -0
  54. metadata +335 -0
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A simple stateful class for keeping track of a CRC32 value through multiple writes
4
+ class ZipKit::StreamCRC32
5
+ include ZipKit::WriteShovel
6
+
7
+ STRINGS_HAVE_CAPACITY_SUPPORT = begin
8
+ String.new("", capacity: 1)
9
+ true
10
+ rescue ArgumentError
11
+ false
12
+ end
13
+ CRC_BUF_SIZE = 1024 * 512
14
+ private_constant :STRINGS_HAVE_CAPACITY_SUPPORT, :CRC_BUF_SIZE
15
+
16
+ # Compute a CRC32 value from an IO object. The object should respond to `read` and `eof?`
17
+ #
18
+ # @param io[IO] the IO to read the data from
19
+ # @return [Fixnum] the computed CRC32 value
20
+ def self.from_io(io)
21
+ # If we can specify the string capacity upfront we will not have to resize
22
+ # the string during operation. This saves time but is only available on
23
+ # recent Ruby 2.x versions.
24
+ blob = STRINGS_HAVE_CAPACITY_SUPPORT ? String.new("", capacity: CRC_BUF_SIZE) : +""
25
+ crc = new
26
+ crc << io.read(CRC_BUF_SIZE, blob) until io.eof?
27
+ crc.to_i
28
+ end
29
+
30
+ # Creates a new streaming CRC32 calculator
31
+ def initialize
32
+ @crc = Zlib.crc32
33
+ end
34
+
35
+ # Append data to the CRC32. Updates the contained CRC32 value in place.
36
+ #
37
+ # @param blob[String] the string to compute the CRC32 from
38
+ # @return [self]
39
+ def <<(blob)
40
+ @crc = Zlib.crc32(blob, @crc)
41
+ self
42
+ end
43
+
44
+ # Returns the CRC32 value computed so far
45
+ #
46
+ # @return [Fixnum] the updated CRC32 value for all the blobs so far
47
+ def to_i
48
+ @crc
49
+ end
50
+
51
+ # Appends a known CRC32 value to the current one, and combines the
52
+ # contained CRC32 value in-place.
53
+ #
54
+ # @param crc32[Fixnum] the CRC32 value to append
55
+ # @param blob_size[Fixnum] the size of the daata the `crc32` is computed from
56
+ # @return [Fixnum] the updated CRC32 value for all the blobs so far
57
+ def append(crc32, blob_size)
58
+ @crc = Zlib.crc32_combine(@crc, crc32, blob_size)
59
+ end
60
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Sends writes to the given `io` compressed using a `Zlib::Deflate`. Also
4
+ # registers data passing through it in a CRC32 checksum calculator. Is made to be completely
5
+ # interchangeable with the StoredWriter in terms of interface.
6
+ class ZipKit::Streamer::DeflatedWriter
7
+ include ZipKit::WriteShovel
8
+
9
+ # The amount of bytes we will buffer before computing the intermediate
10
+ # CRC32 checksums. Benchmarks show that the optimum is 64KB (see
11
+ # `bench/buffered_crc32_bench.rb), if that is exceeded Zlib is going
12
+ # to perform internal CRC combine calls which will make the speed go down again.
13
+ CRC32_BUFFER_SIZE = 64 * 1024
14
+
15
+ def initialize(io)
16
+ @compressed_io = io
17
+ @deflater = ::Zlib::Deflate.new(Zlib::DEFAULT_COMPRESSION, -::Zlib::MAX_WBITS)
18
+ @crc = ZipKit::StreamCRC32.new
19
+ @crc_buf = ZipKit::WriteBuffer.new(@crc, CRC32_BUFFER_SIZE)
20
+ end
21
+
22
+ # Writes the given data into the deflater, and flushes the deflater
23
+ # after having written more than FLUSH_EVERY_N_BYTES bytes of data
24
+ #
25
+ # @param data[String] data to be written
26
+ # @return self
27
+ def <<(data)
28
+ @deflater.deflate(data) { |chunk| @compressed_io << chunk }
29
+ @crc_buf << data
30
+ self
31
+ end
32
+
33
+ # Returns the amount of data received for writing, the amount of
34
+ # compressed data written and the CRC32 checksum. The return value
35
+ # can be directly used as the argument to {Streamer#update_last_entry_and_write_data_descriptor}
36
+ #
37
+ # @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
38
+ def finish
39
+ @compressed_io << @deflater.finish until @deflater.finished?
40
+ @crc_buf.flush
41
+ {crc32: @crc.to_i, compressed_size: @deflater.total_out, uncompressed_size: @deflater.total_in}
42
+ ensure
43
+ @deflater.close
44
+ end
45
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Is used internally by Streamer to keep track of entries in the archive during writing.
4
+ # Normally you will not have to use this class directly
5
+ class ZipKit::Streamer::Entry < Struct.new(:filename, :crc32, :compressed_size,
6
+ :uncompressed_size, :storage_mode, :mtime,
7
+ :use_data_descriptor, :local_header_offset, :bytes_used_for_local_header, :bytes_used_for_data_descriptor, :unix_permissions)
8
+ def initialize(*)
9
+ super
10
+ filename.force_encoding(Encoding::UTF_8)
11
+ @requires_efs_flag = !(begin
12
+ filename.encode(Encoding::ASCII)
13
+ rescue
14
+ false
15
+ end)
16
+ end
17
+
18
+ def total_bytes_used
19
+ bytes_used_for_local_header + compressed_size + bytes_used_for_data_descriptor
20
+ end
21
+
22
+ # Set the general purpose flags for the entry. We care about is the EFS
23
+ # bit (bit 11) which should be set if the filename is UTF8. If it is, we need to set the
24
+ # bit so that the unarchiving application knows that the filename in the archive is UTF-8
25
+ # encoded, and not some DOS default. For ASCII entries it does not matter.
26
+ # Additionally, we care about bit 3 which toggles the use of the postfix data descriptor.
27
+ def gp_flags
28
+ flag = 0b00000000000
29
+ flag |= 0b100000000000 if @requires_efs_flag # bit 11
30
+ flag |= 0x0008 if use_data_descriptor # bit 3
31
+ flag
32
+ end
33
+
34
+ def filler?
35
+ false
36
+ end
37
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Is used internally by Streamer to keep track of entries in the archive during writing.
4
+ # Normally you will not have to use this class directly
5
+ class ZipKit::Streamer::Filler < Struct.new(:total_bytes_used)
6
+ def filler?
7
+ true
8
+ end
9
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Will be used to pick whether to store a file in the `stored` or
4
+ # `deflated` mode, by compressing the first N bytes of the file and
5
+ # comparing the stored and deflated data sizes. If deflate produces
6
+ # a sizable compression gain for this data, it will create a deflated
7
+ # file inside the ZIP archive. If the file doesn't compress well, it
8
+ # will use the "stored" mode for the entry. About 128KB of the
9
+ # file will be buffered to pick the appropriate storage mode. The
10
+ # Heuristic will call either `write_stored_file` or `write_deflated_file`
11
+ # on the Streamer passed into it once it knows which compression
12
+ # method should be applied
13
+ class ZipKit::Streamer::Heuristic
14
+ include ZipKit::WriteShovel
15
+
16
+ BYTES_WRITTEN_THRESHOLD = 128 * 1024
17
+ MINIMUM_VIABLE_COMPRESSION = 0.75
18
+
19
+ def initialize(streamer, filename, **write_file_options)
20
+ @streamer = streamer
21
+ @filename = filename
22
+ @write_file_options = write_file_options
23
+
24
+ @buf = StringIO.new.binmode
25
+ @deflater = ::Zlib::Deflate.new(Zlib::DEFAULT_COMPRESSION, -::Zlib::MAX_WBITS)
26
+ @bytes_deflated = 0
27
+
28
+ @winner = nil
29
+ end
30
+
31
+ def <<(bytes)
32
+ if @winner
33
+ @winner << bytes
34
+ else
35
+ @buf << bytes
36
+ @deflater.deflate(bytes) { |chunk| @bytes_deflated += chunk.bytesize }
37
+ decide if @buf.size > BYTES_WRITTEN_THRESHOLD
38
+ end
39
+ self
40
+ end
41
+
42
+ def write(bytes)
43
+ self << bytes
44
+ bytes.bytesize
45
+ end
46
+
47
+ def close
48
+ decide unless @winner
49
+ @winner.close
50
+ end
51
+
52
+ private def decide
53
+ # Finish and then close the deflater - it has likely buffered some data
54
+ @bytes_deflated += @deflater.finish.bytesize until @deflater.finished?
55
+ # If the deflated version is smaller than the stored one
56
+ # - use deflate, otherwise stored
57
+ ratio = @bytes_deflated / @buf.size.to_f
58
+ @winner = if ratio <= MINIMUM_VIABLE_COMPRESSION
59
+ @streamer.write_deflated_file(@filename, **@write_file_options)
60
+ else
61
+ @streamer.write_stored_file(@filename, **@write_file_options)
62
+ end
63
+ # Copy the buffered uncompressed data into the newly initialized writable
64
+ @buf.rewind
65
+ IO.copy_stream(@buf, @winner)
66
+ @buf.truncate(0)
67
+ end
68
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Sends writes to the given `io`, and also registers all the data passing
4
+ # through it in a CRC32 checksum calculator. Is made to be completely
5
+ # interchangeable with the DeflatedWriter in terms of interface.
6
+ class ZipKit::Streamer::StoredWriter
7
+ include ZipKit::WriteShovel
8
+
9
+ # The amount of bytes we will buffer before computing the intermediate
10
+ # CRC32 checksums. Benchmarks show that the optimum is 64KB (see
11
+ # `bench/buffered_crc32_bench.rb), if that is exceeded Zlib is going
12
+ # to perform internal CRC combine calls which will make the speed go down again.
13
+ CRC32_BUFFER_SIZE = 64 * 1024
14
+
15
+ def initialize(io)
16
+ @io = ZipKit::WriteAndTell.new(io)
17
+ @crc_compute = ZipKit::StreamCRC32.new
18
+ @crc = ZipKit::WriteBuffer.new(@crc_compute, CRC32_BUFFER_SIZE)
19
+ end
20
+
21
+ # Writes the given data to the contained IO object.
22
+ #
23
+ # @param data[String] data to be written
24
+ # @return self
25
+ def <<(data)
26
+ @io << data
27
+ @crc << data
28
+ self
29
+ end
30
+
31
+ # Returns the amount of data written and the CRC32 checksum. The return value
32
+ # can be directly used as the argument to {Streamer#update_last_entry_and_write_data_descriptor}
33
+ #
34
+ # @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
35
+ def finish
36
+ @crc.flush
37
+ {crc32: @crc_compute.to_i, compressed_size: @io.tell, uncompressed_size: @io.tell}
38
+ end
39
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Gets yielded from the writing methods of the Streamer
4
+ # and accepts the data being written into the ZIP for deflate
5
+ # or stored modes. Can be used as a destination for `IO.copy_stream`
6
+ #
7
+ # IO.copy_stream(File.open('source.bin', 'rb), writable)
8
+ class ZipKit::Streamer::Writable
9
+ include ZipKit::WriteShovel
10
+
11
+ # Initializes a new Writable with the object it delegates the writes to.
12
+ # Normally you would not need to use this method directly
13
+ def initialize(streamer, writer)
14
+ @streamer = streamer
15
+ @writer = writer
16
+ @closed = false
17
+ end
18
+
19
+ # Writes the given data to the output stream
20
+ #
21
+ # @param d[String] the binary string to write (part of the uncompressed file)
22
+ # @return [self]
23
+ def <<(d)
24
+ raise "Trying to write to a closed Writable" if @closed
25
+ @writer << d
26
+ self
27
+ end
28
+
29
+ # Flushes the writer and recovers the CRC32/size values. It then calls
30
+ # `update_last_entry_and_write_data_descriptor` on the given Streamer.
31
+ def close
32
+ return if @closed
33
+ @streamer.update_last_entry_and_write_data_descriptor(**@writer.finish)
34
+ @closed = true
35
+ end
36
+ end