zip_kit 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +7 -0
  3. data/.document +5 -0
  4. data/.github/workflows/ci.yml +29 -0
  5. data/.gitignore +61 -0
  6. data/.rspec +1 -0
  7. data/.standard.yml +8 -0
  8. data/.yardopts +1 -0
  9. data/CHANGELOG.md +255 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +153 -0
  12. data/Gemfile +4 -0
  13. data/IMPLEMENTATION_DETAILS.md +97 -0
  14. data/LICENSE.txt +20 -0
  15. data/README.md +234 -0
  16. data/Rakefile +21 -0
  17. data/bench/buffered_crc32_bench.rb +109 -0
  18. data/examples/archive_size_estimate.rb +15 -0
  19. data/examples/config.ru +7 -0
  20. data/examples/deferred_write.rb +58 -0
  21. data/examples/parallel_compression_with_block_deflate.rb +86 -0
  22. data/examples/rack_application.rb +63 -0
  23. data/examples/s3_upload.rb +23 -0
  24. data/lib/zip_kit/block_deflate.rb +130 -0
  25. data/lib/zip_kit/block_write.rb +47 -0
  26. data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
  27. data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
  28. data/lib/zip_kit/file_reader.rb +740 -0
  29. data/lib/zip_kit/null_writer.rb +12 -0
  30. data/lib/zip_kit/output_enumerator.rb +150 -0
  31. data/lib/zip_kit/path_set.rb +163 -0
  32. data/lib/zip_kit/rack_chunked_body.rb +32 -0
  33. data/lib/zip_kit/rack_tempfile_body.rb +61 -0
  34. data/lib/zip_kit/rails_streaming.rb +37 -0
  35. data/lib/zip_kit/remote_io.rb +114 -0
  36. data/lib/zip_kit/remote_uncap.rb +22 -0
  37. data/lib/zip_kit/size_estimator.rb +84 -0
  38. data/lib/zip_kit/stream_crc32.rb +60 -0
  39. data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
  40. data/lib/zip_kit/streamer/entry.rb +37 -0
  41. data/lib/zip_kit/streamer/filler.rb +9 -0
  42. data/lib/zip_kit/streamer/heuristic.rb +68 -0
  43. data/lib/zip_kit/streamer/stored_writer.rb +39 -0
  44. data/lib/zip_kit/streamer/writable.rb +36 -0
  45. data/lib/zip_kit/streamer.rb +614 -0
  46. data/lib/zip_kit/uniquify_filename.rb +39 -0
  47. data/lib/zip_kit/version.rb +5 -0
  48. data/lib/zip_kit/write_and_tell.rb +40 -0
  49. data/lib/zip_kit/write_buffer.rb +71 -0
  50. data/lib/zip_kit/write_shovel.rb +22 -0
  51. data/lib/zip_kit/zip_writer.rb +436 -0
  52. data/lib/zip_kit.rb +24 -0
  53. data/zip_kit.gemspec +41 -0
  54. metadata +335 -0
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A simple stateful class for keeping track of a CRC32 value through multiple writes
4
+ class ZipKit::StreamCRC32
5
+ include ZipKit::WriteShovel
6
+
7
+ STRINGS_HAVE_CAPACITY_SUPPORT = begin
8
+ String.new("", capacity: 1)
9
+ true
10
+ rescue ArgumentError
11
+ false
12
+ end
13
+ CRC_BUF_SIZE = 1024 * 512
14
+ private_constant :STRINGS_HAVE_CAPACITY_SUPPORT, :CRC_BUF_SIZE
15
+
16
+ # Compute a CRC32 value from an IO object. The object should respond to `read` and `eof?`
17
+ #
18
+ # @param io[IO] the IO to read the data from
19
+ # @return [Fixnum] the computed CRC32 value
20
+ def self.from_io(io)
21
+ # If we can specify the string capacity upfront we will not have to resize
22
+ # the string during operation. This saves time but is only available on
23
+ # recent Ruby 2.x versions.
24
+ blob = STRINGS_HAVE_CAPACITY_SUPPORT ? String.new("", capacity: CRC_BUF_SIZE) : +""
25
+ crc = new
26
+ crc << io.read(CRC_BUF_SIZE, blob) until io.eof?
27
+ crc.to_i
28
+ end
29
+
30
+ # Creates a new streaming CRC32 calculator
31
+ def initialize
32
+ @crc = Zlib.crc32
33
+ end
34
+
35
+ # Append data to the CRC32. Updates the contained CRC32 value in place.
36
+ #
37
+ # @param blob[String] the string to compute the CRC32 from
38
+ # @return [self]
39
+ def <<(blob)
40
+ @crc = Zlib.crc32(blob, @crc)
41
+ self
42
+ end
43
+
44
+ # Returns the CRC32 value computed so far
45
+ #
46
+ # @return [Fixnum] the updated CRC32 value for all the blobs so far
47
+ def to_i
48
+ @crc
49
+ end
50
+
51
+ # Appends a known CRC32 value to the current one, and combines the
52
+ # contained CRC32 value in-place.
53
+ #
54
+ # @param crc32[Fixnum] the CRC32 value to append
55
+ # @param blob_size[Fixnum] the size of the daata the `crc32` is computed from
56
+ # @return [Fixnum] the updated CRC32 value for all the blobs so far
57
+ def append(crc32, blob_size)
58
+ @crc = Zlib.crc32_combine(@crc, crc32, blob_size)
59
+ end
60
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Sends writes to the given `io` compressed using a `Zlib::Deflate`. Also
4
+ # registers data passing through it in a CRC32 checksum calculator. Is made to be completely
5
+ # interchangeable with the StoredWriter in terms of interface.
6
+ class ZipKit::Streamer::DeflatedWriter
7
+ include ZipKit::WriteShovel
8
+
9
+ # The amount of bytes we will buffer before computing the intermediate
10
+ # CRC32 checksums. Benchmarks show that the optimum is 64KB (see
11
+ # `bench/buffered_crc32_bench.rb), if that is exceeded Zlib is going
12
+ # to perform internal CRC combine calls which will make the speed go down again.
13
+ CRC32_BUFFER_SIZE = 64 * 1024
14
+
15
+ def initialize(io)
16
+ @compressed_io = io
17
+ @deflater = ::Zlib::Deflate.new(Zlib::DEFAULT_COMPRESSION, -::Zlib::MAX_WBITS)
18
+ @crc = ZipKit::StreamCRC32.new
19
+ @crc_buf = ZipKit::WriteBuffer.new(@crc, CRC32_BUFFER_SIZE)
20
+ end
21
+
22
+ # Writes the given data into the deflater, and flushes the deflater
23
+ # after having written more than FLUSH_EVERY_N_BYTES bytes of data
24
+ #
25
+ # @param data[String] data to be written
26
+ # @return self
27
+ def <<(data)
28
+ @deflater.deflate(data) { |chunk| @compressed_io << chunk }
29
+ @crc_buf << data
30
+ self
31
+ end
32
+
33
+ # Returns the amount of data received for writing, the amount of
34
+ # compressed data written and the CRC32 checksum. The return value
35
+ # can be directly used as the argument to {Streamer#update_last_entry_and_write_data_descriptor}
36
+ #
37
+ # @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
38
+ def finish
39
+ @compressed_io << @deflater.finish until @deflater.finished?
40
+ @crc_buf.flush
41
+ {crc32: @crc.to_i, compressed_size: @deflater.total_out, uncompressed_size: @deflater.total_in}
42
+ ensure
43
+ @deflater.close
44
+ end
45
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Is used internally by Streamer to keep track of entries in the archive during writing.
4
+ # Normally you will not have to use this class directly
5
+ class ZipKit::Streamer::Entry < Struct.new(:filename, :crc32, :compressed_size,
6
+ :uncompressed_size, :storage_mode, :mtime,
7
+ :use_data_descriptor, :local_header_offset, :bytes_used_for_local_header, :bytes_used_for_data_descriptor, :unix_permissions)
8
+ def initialize(*)
9
+ super
10
+ filename.force_encoding(Encoding::UTF_8)
11
+ @requires_efs_flag = !(begin
12
+ filename.encode(Encoding::ASCII)
13
+ rescue
14
+ false
15
+ end)
16
+ end
17
+
18
+ def total_bytes_used
19
+ bytes_used_for_local_header + compressed_size + bytes_used_for_data_descriptor
20
+ end
21
+
22
+ # Set the general purpose flags for the entry. We care about is the EFS
23
+ # bit (bit 11) which should be set if the filename is UTF8. If it is, we need to set the
24
+ # bit so that the unarchiving application knows that the filename in the archive is UTF-8
25
+ # encoded, and not some DOS default. For ASCII entries it does not matter.
26
+ # Additionally, we care about bit 3 which toggles the use of the postfix data descriptor.
27
+ def gp_flags
28
+ flag = 0b00000000000
29
+ flag |= 0b100000000000 if @requires_efs_flag # bit 11
30
+ flag |= 0x0008 if use_data_descriptor # bit 3
31
+ flag
32
+ end
33
+
34
+ def filler?
35
+ false
36
+ end
37
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Is used internally by Streamer to keep track of entries in the archive during writing.
4
+ # Normally you will not have to use this class directly
5
+ class ZipKit::Streamer::Filler < Struct.new(:total_bytes_used)
6
+ def filler?
7
+ true
8
+ end
9
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Will be used to pick whether to store a file in the `stored` or
4
+ # `deflated` mode, by compressing the first N bytes of the file and
5
+ # comparing the stored and deflated data sizes. If deflate produces
6
+ # a sizable compression gain for this data, it will create a deflated
7
+ # file inside the ZIP archive. If the file doesn't compress well, it
8
+ # will use the "stored" mode for the entry. About 128KB of the
9
+ # file will be buffered to pick the appropriate storage mode. The
10
+ # Heuristic will call either `write_stored_file` or `write_deflated_file`
11
+ # on the Streamer passed into it once it knows which compression
12
+ # method should be applied
13
+ class ZipKit::Streamer::Heuristic
14
+ include ZipKit::WriteShovel
15
+
16
+ BYTES_WRITTEN_THRESHOLD = 128 * 1024
17
+ MINIMUM_VIABLE_COMPRESSION = 0.75
18
+
19
+ def initialize(streamer, filename, **write_file_options)
20
+ @streamer = streamer
21
+ @filename = filename
22
+ @write_file_options = write_file_options
23
+
24
+ @buf = StringIO.new.binmode
25
+ @deflater = ::Zlib::Deflate.new(Zlib::DEFAULT_COMPRESSION, -::Zlib::MAX_WBITS)
26
+ @bytes_deflated = 0
27
+
28
+ @winner = nil
29
+ end
30
+
31
+ def <<(bytes)
32
+ if @winner
33
+ @winner << bytes
34
+ else
35
+ @buf << bytes
36
+ @deflater.deflate(bytes) { |chunk| @bytes_deflated += chunk.bytesize }
37
+ decide if @buf.size > BYTES_WRITTEN_THRESHOLD
38
+ end
39
+ self
40
+ end
41
+
42
+ def write(bytes)
43
+ self << bytes
44
+ bytes.bytesize
45
+ end
46
+
47
+ def close
48
+ decide unless @winner
49
+ @winner.close
50
+ end
51
+
52
+ private def decide
53
+ # Finish and then close the deflater - it has likely buffered some data
54
+ @bytes_deflated += @deflater.finish.bytesize until @deflater.finished?
55
+ # If the deflated version is smaller than the stored one
56
+ # - use deflate, otherwise stored
57
+ ratio = @bytes_deflated / @buf.size.to_f
58
+ @winner = if ratio <= MINIMUM_VIABLE_COMPRESSION
59
+ @streamer.write_deflated_file(@filename, **@write_file_options)
60
+ else
61
+ @streamer.write_stored_file(@filename, **@write_file_options)
62
+ end
63
+ # Copy the buffered uncompressed data into the newly initialized writable
64
+ @buf.rewind
65
+ IO.copy_stream(@buf, @winner)
66
+ @buf.truncate(0)
67
+ end
68
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Sends writes to the given `io`, and also registers all the data passing
4
+ # through it in a CRC32 checksum calculator. Is made to be completely
5
+ # interchangeable with the DeflatedWriter in terms of interface.
6
+ class ZipKit::Streamer::StoredWriter
7
+ include ZipKit::WriteShovel
8
+
9
+ # The amount of bytes we will buffer before computing the intermediate
10
+ # CRC32 checksums. Benchmarks show that the optimum is 64KB (see
11
+ # `bench/buffered_crc32_bench.rb), if that is exceeded Zlib is going
12
+ # to perform internal CRC combine calls which will make the speed go down again.
13
+ CRC32_BUFFER_SIZE = 64 * 1024
14
+
15
+ def initialize(io)
16
+ @io = ZipKit::WriteAndTell.new(io)
17
+ @crc_compute = ZipKit::StreamCRC32.new
18
+ @crc = ZipKit::WriteBuffer.new(@crc_compute, CRC32_BUFFER_SIZE)
19
+ end
20
+
21
+ # Writes the given data to the contained IO object.
22
+ #
23
+ # @param data[String] data to be written
24
+ # @return self
25
+ def <<(data)
26
+ @io << data
27
+ @crc << data
28
+ self
29
+ end
30
+
31
+ # Returns the amount of data written and the CRC32 checksum. The return value
32
+ # can be directly used as the argument to {Streamer#update_last_entry_and_write_data_descriptor}
33
+ #
34
+ # @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
35
+ def finish
36
+ @crc.flush
37
+ {crc32: @crc_compute.to_i, compressed_size: @io.tell, uncompressed_size: @io.tell}
38
+ end
39
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Gets yielded from the writing methods of the Streamer
4
+ # and accepts the data being written into the ZIP for deflate
5
+ # or stored modes. Can be used as a destination for `IO.copy_stream`
6
+ #
7
+ # IO.copy_stream(File.open('source.bin', 'rb), writable)
8
+ class ZipKit::Streamer::Writable
9
+ include ZipKit::WriteShovel
10
+
11
+ # Initializes a new Writable with the object it delegates the writes to.
12
+ # Normally you would not need to use this method directly
13
+ def initialize(streamer, writer)
14
+ @streamer = streamer
15
+ @writer = writer
16
+ @closed = false
17
+ end
18
+
19
+ # Writes the given data to the output stream
20
+ #
21
+ # @param d[String] the binary string to write (part of the uncompressed file)
22
+ # @return [self]
23
+ def <<(d)
24
+ raise "Trying to write to a closed Writable" if @closed
25
+ @writer << d
26
+ self
27
+ end
28
+
29
+ # Flushes the writer and recovers the CRC32/size values. It then calls
30
+ # `update_last_entry_and_write_data_descriptor` on the given Streamer.
31
+ def close
32
+ return if @closed
33
+ @streamer.update_last_entry_and_write_data_descriptor(**@writer.finish)
34
+ @closed = true
35
+ end
36
+ end