RubyGems - zip_kit - Versions diffs - 6.0.0 - Mend

zip_kit 6.0.0

Files changed (54) hide show

checksums.yaml +7 -0
data/.codeclimate.yml +7 -0
data/.document +5 -0
data/.github/workflows/ci.yml +29 -0
data/.gitignore +61 -0
data/.rspec +1 -0
data/.standard.yml +8 -0
data/.yardopts +1 -0
data/CHANGELOG.md +255 -0
data/CODE_OF_CONDUCT.md +46 -0
data/CONTRIBUTING.md +153 -0
data/Gemfile +4 -0
data/IMPLEMENTATION_DETAILS.md +97 -0
data/LICENSE.txt +20 -0
data/README.md +234 -0
data/Rakefile +21 -0
data/bench/buffered_crc32_bench.rb +109 -0
data/examples/archive_size_estimate.rb +15 -0
data/examples/config.ru +7 -0
data/examples/deferred_write.rb +58 -0
data/examples/parallel_compression_with_block_deflate.rb +86 -0
data/examples/rack_application.rb +63 -0
data/examples/s3_upload.rb +23 -0
data/lib/zip_kit/block_deflate.rb +130 -0
data/lib/zip_kit/block_write.rb +47 -0
data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
data/lib/zip_kit/file_reader.rb +740 -0
data/lib/zip_kit/null_writer.rb +12 -0
data/lib/zip_kit/output_enumerator.rb +150 -0
data/lib/zip_kit/path_set.rb +163 -0
data/lib/zip_kit/rack_chunked_body.rb +32 -0
data/lib/zip_kit/rack_tempfile_body.rb +61 -0
data/lib/zip_kit/rails_streaming.rb +37 -0
data/lib/zip_kit/remote_io.rb +114 -0
data/lib/zip_kit/remote_uncap.rb +22 -0
data/lib/zip_kit/size_estimator.rb +84 -0
data/lib/zip_kit/stream_crc32.rb +60 -0
data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
data/lib/zip_kit/streamer/entry.rb +37 -0
data/lib/zip_kit/streamer/filler.rb +9 -0
data/lib/zip_kit/streamer/heuristic.rb +68 -0
data/lib/zip_kit/streamer/stored_writer.rb +39 -0
data/lib/zip_kit/streamer/writable.rb +36 -0
data/lib/zip_kit/streamer.rb +614 -0
data/lib/zip_kit/uniquify_filename.rb +39 -0
data/lib/zip_kit/version.rb +5 -0
data/lib/zip_kit/write_and_tell.rb +40 -0
data/lib/zip_kit/write_buffer.rb +71 -0
data/lib/zip_kit/write_shovel.rb +22 -0
data/lib/zip_kit/zip_writer.rb +436 -0
data/lib/zip_kit.rb +24 -0
data/zip_kit.gemspec +41 -0
metadata +335 -0

data/examples/rack_application.rb ADDED Viewed

@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+require_relative "../lib/zip_kit"
+# An example of how you can create a Rack endpoint for your ZIP downloads.
+# NEVER run this in production - it is a huge security risk.
+# What this app will do is pick PATH_INFO (your request URL path)
+# and grab a file located at this path on your filesystem. The file will then
+# be added to a ZIP archive created completely programmatically. No data will
+# be cached on disk and the contents of the ZIP file will _not_ be buffered in
+# it's entirety before sending. Unless you use a buffering Rack server of
+# course (WEBrick or Thin).
+class ZipDownload
+  def call(env)
+    file_path = env["PATH_INFO"] # Should be the absolute path on the filesystem
+    # Open the file for binary reading
+    f = File.open(file_path, "rb")
+    filename = File.basename(file_path)
+    # Compute the CRC32 upfront. We do not use local footers for post-computing
+    # the CRC32, so you _do_ have to precompute it beforehand. Ideally, you
+    # would do that before storing the files you will be sending out later on.
+    crc32 = ZipKit::StreamCRC32.from_io(f)
+    f.rewind
+    # Compute the size of the download, so that a
+    # real Content-Length header can be sent. Also, if your download
+    # stops at some point, the downloading browser will be able to tell
+    # the user that the download stalled or was aborted in-flight.
+    # Note that using the size estimator here does _not_ read or compress
+    # your original file, so it is very fast.
+    size = ZipKit::SizeEstimator.estimate { |ar|
+      ar.add_stored_entry(filename, f.size)
+    }
+    # Create a suitable Rack response body, that will support each(),
+    # close() and all the other methods. We can then return it up the stack.
+    zip_response_body = ZipKit::OutputEnumerator.new do |zip|
+      # We are adding only one file to the ZIP here, but you could do that
+      # with an arbitrary number of files of course.
+      zip.add_stored_entry(filename: filename, size: f.size, crc32: crc32)
+      # Write the contents of the file. It is stored, so the writes go
+      # directly to the Rack output, bypassing any RubyZip
+      # deflaters/compressors. In fact you are yielding the "blob" string
+      # here directly to the Rack server handler.
+      IO.copy_stream(f, zip)
+    ensure
+      f.close # Make sure the opened file we read from gets closed
+    end
+    # Add a Content-Disposition so that the download has a .zip extension
+    # (this will not work well with UTF-8 filenames on Windows, but hey!)
+    content_disposition = "attachment; filename=%<filename>s.zip" % {filename: filename}
+    # and return the response, adding the Content-Length we have computed earlier
+    [
+      200,
+      {"Content-Length" => size.to_s, "Content-Disposition" => content_disposition},
+      zip_response_body
+    ]
+  end
+end

data/examples/s3_upload.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+require_relative "../lib/zip_kit"
+# Any writable object can be used as a destination for the Streamer.
+# For example, you can write to an S3 bucket. Newer versions of the S3 SDK
+# support a method called `upload_stream` which allows streaming uploads. The
+# SDK will split your streamed bytes into appropriately-sized multipart upload
+# parts and PUT them onto S3.
+bucket = Aws::S3::Bucket.new("mybucket")
+obj = bucket.object("big.zip")
+obj.upload_stream do |write_stream|
+  ZipKit::Streamer.open(write_stream) do |zip|
+    zip.write_file("large.csv") do |sink|
+      CSV(sink) do |csv|
+        csv << ["Line", "Item"]
+        20_000.times do |n|
+          csv << [n, "Item number #{n}"]
+        end
+      end
+    end
+  end
+end

data/lib/zip_kit/block_deflate.rb ADDED Viewed

@@ -0,0 +1,130 @@
+# frozen_string_literal: true
+require "zlib"
+# Permits Deflate compression in independent blocks. The workflow is as follows:
+#
+# * Run every block to compress through deflate_chunk, remove the header,
+#   footer and adler32 from the result
+# * Write out the compressed block bodies (the ones deflate_chunk returns)
+#   to your output, in sequence
+# * Write out the footer (\03\00)
+#
+# The resulting stream is guaranteed to be handled properly by all zip
+# unarchiving tools, including the BOMArchiveHelper/ArchiveUtility on OSX.
+#
+# You could also build a compressor for Rubyzip using this module quite easily,
+# even though this is outside the scope of the library.
+#
+# When you deflate the chunks separately, you need to write the end marker
+# yourself (using `write_terminator`).
+# If you just want to deflate a large IO's contents, use
+# `deflate_in_blocks_and_terminate` to have the end marker written out for you.
+#
+# Basic usage to compress a file in parts:
+#
+#     source_file = File.open('12_gigs.bin', 'rb')
+#     compressed = Tempfile.new
+#     # Will not compress everything in memory, but do it per chunk to spare
+#       memory. `compressed`
+#     # will be written to at the end of each chunk.
+#     ZipKit::BlockDeflate.deflate_in_blocks_and_terminate(source_file,
+#                                                             compressed)
+#
+# You can also do the same to parts that you will later concatenate together
+# elsewhere, in that case you need to skip the end marker:
+#
+#     compressed = Tempfile.new
+#     ZipKit::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb),
+#                                               compressed)
+#     ZipKit::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb),
+#                                               compressed)
+#     ZipKit::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb),
+#                                               compressed)
+#     ZipKit::BlockDeflate.write_terminator(compressed)
+#
+# You can also elect to just compress strings in memory (to splice them later):
+#
+#     compressed_string = ZipKit::BlockDeflate.deflate_chunk(big_string)
+class ZipKit::BlockDeflate
+  DEFAULT_BLOCKSIZE = 1_024 * 1024 * 5
+  END_MARKER = [3, 0].pack("C*")
+  # Zlib::NO_COMPRESSION..
+  VALID_COMPRESSIONS = (Zlib::DEFAULT_COMPRESSION..Zlib::BEST_COMPRESSION).to_a.freeze
+  # Write the end marker (\x3\x0) to the given IO.
+  #
+  # `output_io` can also be a {ZipKit::Streamer} to expedite ops.
+  #
+  # @param output_io [IO] the stream to write to (should respond to `:<<`)
+  # @return [Fixnum] number of bytes written to `output_io`
+  def self.write_terminator(output_io)
+    output_io << END_MARKER
+    END_MARKER.bytesize
+  end
+  # Compress a given binary string and flush the deflate stream at byte boundary.
+  # The returned string can be spliced into another deflate stream.
+  #
+  # @param bytes [String] Bytes to compress
+  # @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
+  # @return [String] compressed bytes
+  def self.deflate_chunk(bytes, level: Zlib::DEFAULT_COMPRESSION)
+    raise "Invalid Zlib compression level #{level}" unless VALID_COMPRESSIONS.include?(level)
+    z = Zlib::Deflate.new(level)
+    compressed_blob = z.deflate(bytes, Zlib::SYNC_FLUSH)
+    compressed_blob << z.finish
+    z.close
+    # Remove the header (2 bytes), the [3,0] end marker and the adler (4 bytes)
+    compressed_blob[2...-6]
+  end
+  # Compress the contents of input_io into output_io, in blocks
+  # of block_size. Aligns the parts so that they can be concatenated later.
+  # Writes deflate end marker (\x3\x0) into `output_io` as the final step, so
+  # the contents of `output_io` can be spliced verbatim into a ZIP archive.
+  #
+  # Once the write completes, no more parts for concatenation should be written to
+  # the same stream.
+  #
+  # `output_io` can also be a {ZipKit::Streamer} to expedite ops.
+  #
+  # @param input_io [IO] the stream to read from (should respond to `:read`)
+  # @param output_io [IO] the stream to write to (should respond to `:<<`)
+  # @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
+  # @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
+  # @return [Fixnum] number of bytes written to `output_io`
+  def self.deflate_in_blocks_and_terminate(input_io,
+    output_io,
+    level: Zlib::DEFAULT_COMPRESSION,
+    block_size: DEFAULT_BLOCKSIZE)
+    bytes_written = deflate_in_blocks(input_io, output_io, level: level, block_size: block_size)
+    bytes_written + write_terminator(output_io)
+  end
+  # Compress the contents of input_io into output_io, in blocks
+  # of block_size. Align the parts so that they can be concatenated later.
+  # Will not write the deflate end marker (\x3\x0) so more parts can be written
+  # later and succesfully read back in provided the end marker wll be written.
+  #
+  # `output_io` can also be a {ZipKit::Streamer} to expedite ops.
+  #
+  # @param input_io [IO] the stream to read from (should respond to `:read`)
+  # @param output_io [IO] the stream to write to (should respond to `:<<`)
+  # @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
+  # @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
+  # @return [Fixnum] number of bytes written to `output_io`
+  def self.deflate_in_blocks(input_io,
+    output_io,
+    level: Zlib::DEFAULT_COMPRESSION,
+    block_size: DEFAULT_BLOCKSIZE)
+    bytes_written = 0
+    while (block = input_io.read(block_size))
+      deflated = deflate_chunk(block, level: level)
+      output_io << deflated
+      bytes_written += deflated.bytesize
+    end
+    bytes_written
+  end
+end

data/lib/zip_kit/block_write.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+# Acts as a converter between callers which send data to the `#<<` method (such as all the ZipKit
+# writer methods, which push onto anything), and a given block. Every time `#<<` gets called on the BlockWrite,
+# the block given to the constructor will be called with the same argument. ZipKit uses this object
+# when integrating with Rack and in the OutputEnumerator. Normally you wouldn't need to use it manually but
+# you always can. BlockWrite will also ensure the binary string encoding is forced onto any string
+# that passes through it.
+#
+# For example, you can create a Rack response body like so:
+#
+#     class MyRackResponse
+#       def each
+#         writer = ZipKit::BlockWrite.new {|chunk| yield(chunk) }
+#         writer << "Hello" << "world" << "!"
+#       end
+#     end
+#     [200, {}, MyRackResponse.new]
+class ZipKit::BlockWrite
+  # Creates a new BlockWrite.
+  #
+  # @param block The block that will be called when this object receives the `<<` message
+  def initialize(&block)
+    @block = block
+  end
+  # Make sure those methods raise outright
+  %i[seek pos= to_s].each do |m|
+    define_method(m) do |*_args|
+      raise "#{m} not supported - this IO adapter is non-rewindable"
+    end
+  end
+  # Sends a string through to the block stored in the BlockWrite.
+  #
+  # @param buf[String] the string to write. Note that a zero-length String
+  #    will not be forwarded to the block, as it has special meaning when used
+  #    with chunked encoding (it indicates the end of the stream).
+  # @return self
+  def <<(buf)
+    # Zero-size output has a special meaning  when using chunked encoding
+    return if buf.nil? || buf.bytesize.zero?
+    @block.call(buf.b)
+    self
+  end
+end

data/lib/zip_kit/file_reader/inflating_reader.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+# Rubocop: convention: Missing top-level class documentation comment.
+class ZipKit::FileReader::InflatingReader
+  def initialize(from_io, compressed_data_size)
+    @io = from_io
+    @compressed_data_size = compressed_data_size
+    @already_read = 0
+    @zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
+  end
+  def extract(n_bytes = nil)
+    n_bytes ||= (@compressed_data_size - @already_read)
+    return if eof?
+    available = @compressed_data_size - @already_read
+    return if available.zero?
+    n_bytes = available if n_bytes > available
+    return "" if n_bytes.zero?
+    compressed_chunk = @io.read(n_bytes)
+    return if compressed_chunk.nil?
+    @already_read += compressed_chunk.bytesize
+    @zlib_inflater.inflate(compressed_chunk)
+  end
+  def eof?
+    @zlib_inflater.finished?
+  end
+end

data/lib/zip_kit/file_reader/stored_reader.rb ADDED Viewed

@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+# Rubocop: convention: Missing top-level class documentation comment.
+class ZipKit::FileReader::StoredReader
+  def initialize(from_io, compressed_data_size)
+    @io = from_io
+    @compressed_data_size = compressed_data_size
+    @already_read = 0
+  end
+  def extract(n_bytes = nil)
+    n_bytes ||= (@compressed_data_size - @already_read)
+    return if eof?
+    available = @compressed_data_size - @already_read
+    return if available.zero?
+    n_bytes = available if n_bytes > available
+    return "" if n_bytes.zero?
+    compressed_chunk = @io.read(n_bytes)
+    return if compressed_chunk.nil?
+    @already_read += compressed_chunk.bytesize
+    compressed_chunk
+  end
+  def eof?
+    @already_read >= @compressed_data_size
+  end
+end