RubyGems - zip_tricks - Versions diffs - 2.8.1 → 3.0.0 - Mend

zip_tricks 2.8.1 → 3.0.0

Files changed (38) hide show

checksums.yaml +4 -4
data/Gemfile +3 -3
data/IMPLEMENTATION_DETAILS.md +2 -10
data/README.md +62 -59
data/examples/archive_size_estimate.rb +4 -4
data/examples/rack_application.rb +3 -5
data/lib/zip_tricks/block_deflate.rb +21 -0
data/lib/zip_tricks/file_reader.rb +491 -0
data/lib/zip_tricks/null_writer.rb +7 -2
data/lib/zip_tricks/rack_body.rb +3 -3
data/lib/zip_tricks/remote_io.rb +30 -20
data/lib/zip_tricks/remote_uncap.rb +10 -10
data/lib/zip_tricks/size_estimator.rb +64 -0
data/lib/zip_tricks/stream_crc32.rb +2 -2
data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
data/lib/zip_tricks/streamer/entry.rb +21 -0
data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
data/lib/zip_tricks/streamer/writable.rb +20 -0
data/lib/zip_tricks/streamer.rb +172 -66
data/lib/zip_tricks/zip_writer.rb +346 -0
data/lib/zip_tricks.rb +1 -4
data/spec/spec_helper.rb +1 -38
data/spec/zip_tricks/file_reader_spec.rb +47 -0
data/spec/zip_tricks/rack_body_spec.rb +2 -2
data/spec/zip_tricks/remote_io_spec.rb +8 -20
data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
data/spec/zip_tricks/size_estimator_spec.rb +31 -0
data/spec/zip_tricks/streamer_spec.rb +59 -36
data/spec/zip_tricks/zip_writer_spec.rb +408 -0
data/zip_tricks.gemspec +20 -14
metadata +33 -16
data/lib/zip_tricks/manifest.rb +0 -85
data/lib/zip_tricks/microzip.rb +0 -339
data/lib/zip_tricks/stored_size_estimator.rb +0 -44
data/spec/zip_tricks/manifest_spec.rb +0 -60
data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
data/spec/zip_tricks/microzip_spec.rb +0 -546
data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22

data/lib/zip_tricks/file_reader.rb ADDED Viewed

@@ -0,0 +1,491 @@
+require 'stringio'
+# A very barebones ZIP file reader. Is made for maximum interoperability, but at the same
+# time we attempt to keep it somewhat concise.
+#
+# ## REALLY CRAZY IMPORTANT STUFF: SECURITY IMPLICATIONS
+#
+# Please **BEWARE** - using this is a security risk if you are reading files that have been
+# supplied by users. This implementation has _not_ been formally verified for correctness. As
+# ZIP files contain relative offsets in lots of places it might be possible for a maliciously
+# crafted ZIP file to put the decode procedure in an endless loop, make it attempt huge reads
+# from the input file and so on. Additionally, the reader module for deflated data has
+# no support for ZIP bomb protection. So either limit the `FileReader` usage to the files you
+# trust, or triple-check all the inputs upfront. Patches to make this reader more secure
+# are welcome of course.
+#
+# ## Usage
+#
+#     File.open('zipfile.zip', 'rb') do |f|
+#       entries = FileReader.read_zip_structure(f)
+#       entries.each do |e|
+#         File.open(e.filename, 'wb') do |extracted_file|
+#           ex = e.extractor_from(f)
+#           extracted_file << ex.extract(1024 * 1024) until ex.eof?
+#         end
+#       end
+#     end
+#
+# ## Supported features
+#
+# * Deflate and stored storage modes
+# * Zip64 (extra fields and offsets)
+# * Data descriptors
+#
+# ## Unsupported features
+#
+# * Archives split over multiple disks/files
+# * Any ZIP encryption
+# * EFS language flag and InfoZIP filename extra field
+# * CRC32 checksums are _not_ verified
+#
+# ## Mode of operation
+#
+# Basically, `FileReader` _ignores_ the data in local file headers (as it is often unreliable).
+# It reads the ZIP file "from the tail", finds the end-of-central-directory signatures, then
+# reads the central directory entries, reconstitutes the entries with their filenames, attributes
+# and so on, and sets these entries up with the absolute _offsets_ into the source file/IO object.
+# These offsets can then be used to extract the actual compressed data of the files and to expand it.
+class ZipTricks::FileReader
+  ReadError = Class.new(StandardError)
+  UnsupportedFeature = Class.new(StandardError)
+  InvalidStructure = Class.new(ReadError)
+  class InflatingReader
+    def initialize(from_io, compressed_data_size)
+      @io = from_io
+      @compressed_data_size = compressed_data_size
+      @already_read = 0
+      @zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
+    end
+    def extract(n_bytes=nil)
+      n_bytes ||= (@compressed_data_size - @already_read)
+      return if eof?
+      available = @compressed_data_size - @already_read
+      return if available.zero?
+      n_bytes = available if n_bytes > available
+      return '' if n_bytes.zero?
+      compressed_chunk = @io.read(n_bytes)
+      @already_read += compressed_chunk.bytesize
+      @zlib_inflater.inflate(compressed_chunk)
+    end
+    def eof?
+      @zlib_inflater.finished?
+    end
+  end
+  class StoredReader
+    def initialize(from_io, compressed_data_size)
+      @io = from_io
+      @compressed_data_size = compressed_data_size
+      @already_read = 0
+    end
+    def extract(n_bytes=nil)
+      n_bytes ||= (@compressed_data_size - @already_read)
+      return if eof?
+      available = @compressed_data_size - @already_read
+      return if available.zero?
+      n_bytes = available if n_bytes > available
+      return '' if n_bytes.zero?
+      compressed_chunk = @io.read(n_bytes)
+      @already_read += compressed_chunk.bytesize
+      compressed_chunk
+    end
+    def eof?
+      @already_read >= @compressed_data_size
+    end
+  end
+  private_constant :StoredReader, :InflatingReader
+  # Represents a file within the ZIP archive being read
+  class ZipEntry
+    # @return [Fixnum] bit-packed version signature of the program that made the archive
+    attr_accessor :made_by
+    # @return [Fixnum] ZIP version support needed to extract this file
+    attr_accessor :version_needed_to_extract
+    # @return [Fixnum] bit-packed general purpose flags
+    attr_accessor :gp_flags
+    # @return [Fixnum] Storage mode (0 for stored, 8 for deflate)
+    attr_accessor :storage_mode
+    # @return [Fixnum] the bit-packed DOS time
+    attr_accessor :dos_time
+    # @return [Fixnum] the bit-packed DOS date
+    attr_accessor :dos_date
+    # @return [Fixnum] the CRC32 checksum of this file
+    attr_accessor :crc32
+    # @return [Fixnum] size of compressed file data in the ZIP
+    attr_accessor :compressed_size
+    # @return [Fixnum] size of the file once uncompressed
+    attr_accessor :uncompressed_size
+    # @return [String] the filename
+    attr_accessor :filename
+    # @return [Fixnum] disk number where this file starts
+    attr_accessor :disk_number_start
+    # @return [Fixnum] internal attributes of the file
+    attr_accessor :internal_attrs
+    # @return [Fixnum] external attributes of the file
+    attr_accessor :external_attrs
+    # @return [Fixnum] at what offset the local file header starts
+    #        in your original IO object
+    attr_accessor :local_file_header_offset
+    # @return [String] the file comment
+    attr_accessor :comment
+    # @return [Fixnum] at what offset you should start reading
+    #       for the compressed data in your original IO object
+    attr_accessor :compressed_data_offset
+    # Returns a reader for the actual compressed data of the entry.
+    #
+    #   reader = entry.reader(source_file)
+    #   outfile << reader.extract(512 * 1024) until reader.eof?
+    #
+    # @return [#extract(n_bytes), #eof?] the reader for the data
+    def extractor_from(from_io)
+      from_io.seek(compressed_data_offset, IO::SEEK_SET)
+      case storage_mode
+      when 8
+        InflatingReader.new(from_io, compressed_size)
+      when 0
+        StoredReader.new(from_io, compressed_size)
+      else
+        raise "Unsupported storage mode for reading (#{storage_mode})"
+      end
+    end
+  end
+  # Parse an IO handle to a ZIP archive into an array of Entry objects.
+  #
+  # @param io[#tell, #seek, #read, #size] an IO-ish object
+  # @return [Array<Entry>] an array of entries within the ZIP being parsed
+  def read_zip_structure(io)
+    zip_file_size = io.size
+    eocd_offset = get_eocd_offset(io, zip_file_size)
+    zip64_end_of_cdir_location = get_zip64_eocd_locator_offset(io, eocd_offset)
+    num_files, cdir_location, cdir_size = if zip64_end_of_cdir_location
+      num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
+    else
+      num_files_and_central_directory_offset(io, eocd_offset)
+    end
+    seek(io, cdir_location)
+    # Read the entire central directory in one fell swoop
+    central_directory_str = read_n(io, cdir_size)
+    central_directory_io = StringIO.new(central_directory_str)
+    entries = (1..num_files).map { read_cdir_entry(central_directory_io) }
+    entries.each do |entry|
+      entry.compressed_data_offset = find_compressed_data_start_offset(io, entry.local_file_header_offset)
+    end
+  end
+  # Parse an IO handle to a ZIP archive into an array of Entry objects.
+  #
+  # @param io[#tell, #seek, #read, #size] an IO-ish object
+  # @return [Array<Entry>] an array of entries within the ZIP being parsed
+  def self.read_zip_structure(io)
+    new.read_zip_structure(io)
+  end
+  private
+  def skip_ahead_2(io)
+    skip_ahead_n(io, 2)
+  end
+  def skip_ahead_4(io)
+    skip_ahead_n(io, 4)
+  end
+  def skip_ahead_8(io)
+    skip_ahead_n(io, 8)
+  end
+  def seek(io, absolute_pos)
+    io.seek(absolute_pos, IO::SEEK_SET)
+    raise ReadError, "Expected to seek to #{absolute_pos} but only got to #{io.tell}" unless absolute_pos == io.tell
+    nil
+  end
+  def assert_signature(io, signature_magic_number)
+    packed = [signature_magic_number].pack(C_V)
+    readback = read_4b(io)
+    if readback != signature_magic_number
+      expected = '0x0' + signature_magic_number.to_s(16)
+      actual = '0x0' + readback.to_s(16)
+      raise InvalidStructure, "Expected signature #{expected}, but read #{actual}"
+    end
+  end
+  def skip_ahead_n(io, n)
+    pos_before = io.tell
+    io.seek(io.tell + n, IO::SEEK_SET)
+    pos_after = io.tell
+    delta = pos_after - pos_before
+    raise ReadError, "Expected to seek #{n} bytes ahead, but could only seek #{delta} bytes ahead" unless delta == n
+    nil
+  end
+  def read_n(io, n_bytes)
+    io.read(n_bytes).tap {|d|
+      raise ReadError, "Expected to read #{n_bytes} bytes, but the IO was at the end" if d.nil?
+      raise ReadError, "Expected to read #{n_bytes} bytes, read #{d.bytesize}" unless d.bytesize == n_bytes
+    }
+  end
+  def read_2b(io)
+    read_n(io, 2).unpack(C_v).shift
+  end
+  def read_4b(io)
+    read_n(io, 4).unpack(C_V).shift
+  end
+  def read_8b(io)
+    read_n(io, 8).unpack(C_Qe).shift
+  end
+  def find_compressed_data_start_offset(file_io, local_header_offset)
+    seek(file_io, local_header_offset)
+    # Reading in bulk is cheaper - grab the maximum length of the local header, including
+    # any headroom
+    local_file_header_str_plus_headroom = file_io.read(MAX_LOCAL_HEADER_SIZE)
+    io = StringIO.new(local_file_header_str_plus_headroom)
+    assert_signature(io, 0x04034b50)
+    # The rest is unreliable, and we have that information from the central directory already.
+    # So just skip over it to get at the offset where the compressed data begins
+    skip_ahead_2(io) # Version needed to extract
+    skip_ahead_2(io) # gp flags
+    skip_ahead_2(io) # storage mode
+    skip_ahead_2(io) # dos time
+    skip_ahead_2(io) # dos date
+    skip_ahead_4(io) # CRC32
+    skip_ahead_4(io) # Comp size
+    skip_ahead_4(io) # Uncomp size
+    filename_size = read_2b(io)
+    extra_size = read_2b(io)
+    skip_ahead_n(io, filename_size)
+    skip_ahead_n(io, extra_size)
+    local_header_offset + io.tell
+  end
+  def read_cdir_entry(io)
+    expected_at = io.tell
+    assert_signature(io, 0x02014b50)
+    ZipEntry.new.tap do |e|
+      e.made_by = read_2b(io)
+      e.version_needed_to_extract = read_2b(io)
+      e.gp_flags = read_2b(io)
+      e.storage_mode = read_2b(io)
+      e.dos_time = read_2b(io)
+      e.dos_date = read_2b(io)
+      e.crc32 = read_4b(io)
+      e.compressed_size = read_4b(io)
+      e.uncompressed_size = read_4b(io)
+      filename_size = read_2b(io)
+      extra_size = read_2b(io)
+      comment_len = read_2b(io)
+      e.disk_number_start = read_2b(io)
+      e.internal_attrs = read_2b(io)
+      e.external_attrs = read_4b(io)
+      e.local_file_header_offset = read_4b(io)
+      e.filename = read_n(io, filename_size)
+      # Extra fields
+      extras = read_n(io, extra_size)
+      # Comment
+      e.comment = read_n(io, comment_len)
+      # Parse out the extra fields
+      extra_table = {}
+      extras_buf = StringIO.new(extras)
+      until extras_buf.eof? do
+        extra_id = read_2b(extras_buf)
+        extra_size = read_2b(extras_buf)
+        extra_contents = read_n(extras_buf, extra_size)
+        extra_table[extra_id] = extra_contents
+      end
+      # ...of which we really only need the Zip64 extra
+      if zip64_extra_contents = extra_table[1] # Zip64 extra
+        zip64_extra = StringIO.new(zip64_extra_contents)
+        e.uncompressed_size = read_8b(zip64_extra)
+        e.compressed_size = read_8b(zip64_extra)
+        e.local_file_header_offset = read_8b(zip64_extra)
+      end
+    end
+  end
+  def get_eocd_offset(file_io, zip_file_size)
+    # Start reading from the _comment_ of the zip file (from the very end).
+    # The maximum size of the comment is 0xFFFF (what fits in 2 bytes)
+    implied_position_of_eocd_record = zip_file_size - MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE
+    implied_position_of_eocd_record = 0 if implied_position_of_eocd_record < 0
+    # Use a soft seek (we might not be able to get as far behind in the IO as we want)
+    # and a soft read (we might not be able to read as many bytes as we want)
+    file_io.seek(implied_position_of_eocd_record, IO::SEEK_SET)
+    str_containing_eocd_record = file_io.read(MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE)
+    # TODO: what to do if multiple occurrences of the signature are found, somehow?
+    eocd_sig = [0x06054b50].pack(C_V)
+    eocd_idx_in_buf = str_containing_eocd_record.index(eocd_sig)
+    raise "Could not find the EOCD signature in the buffer - maybe a malformed ZIP file" unless eocd_idx_in_buf
+    implied_position_of_eocd_record + eocd_idx_in_buf
+  end
+  # Find the Zip64 EOCD locator segment offset. Do this by seeking backwards from the
+  # EOCD record in the archive by fixed offsets
+  def get_zip64_eocd_locator_offset(file_io, eocd_offset)
+    zip64_eocd_loc_offset = eocd_offset
+    zip64_eocd_loc_offset -= 4 # The signature
+    zip64_eocd_loc_offset -= 4 # Which disk has the Zip64 end of central directory record
+    zip64_eocd_loc_offset -= 8 # Offset of the zip64 central directory record
+    zip64_eocd_loc_offset -= 4 # Total number of disks
+    # If the offset is negative there is certainly no Zip64 EOCD locator here
+    return unless zip64_eocd_loc_offset >= 0
+    file_io.seek(zip64_eocd_loc_offset, IO::SEEK_SET)
+    assert_signature(file_io, 0x07064b50)
+    disk_num = read_4b(file_io) # number of the disk
+    raise UnsupportedFeature, "The archive spans multiple disks" if disk_num != 0
+    read_8b(file_io)
+  rescue ReadError
+    nil
+  end
+  def num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
+    seek(io, zip64_end_of_cdir_location)
+    assert_signature(io, 0x06064b50)
+    zip64_eocdr_size = read_8b(io)
+    zip64_eocdr = read_n(io, zip64_eocdr_size) # Reading in bulk is cheaper
+    zip64_eocdr = StringIO.new(zip64_eocdr)
+    skip_ahead_2(zip64_eocdr) # version made by
+    skip_ahead_2(zip64_eocdr) # version needed to extract
+    disk_n = read_4b(zip64_eocdr) # number of this disk
+    disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
+    raise UnsupportedFeature, "The archive spans multiple disks" if disk_n != disk_n_with_eocdr
+    num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
+    num_files_total     = read_8b(zip64_eocdr) # files total in the central directory
+    raise UnsupportedFeature, "The archive spans multiple disks" if num_files_this_disk != num_files_total
+    central_dir_size    = read_8b(zip64_eocdr) # Size of the central directory
+    central_dir_offset  = read_8b(zip64_eocdr) # Where the central directory starts
+    [num_files_total, central_dir_offset, central_dir_size]
+  end
+  C_V = 'V'.freeze
+  C_v = 'v'.freeze
+  C_Qe = 'Q<'.freeze
+  # To prevent too many tiny reads, read the maximum possible size of end of central directory record
+  # upfront (all the fixed fields + at most 0xFFFF bytes of the archive comment)
+  MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE = begin
+    4 + # Offset of the start of central directory
+    4 + # Size of the central directory
+    2 + # Number of files in the cdir
+    4 + # End-of-central-directory signature
+    2 + # Number of this disk
+    2 + # Number of disk with the start of cdir
+    2 + # Number of files in the cdir of this disk
+    2 + # The comment size
+    0xFFFF # Maximum comment size
+  end
+  # To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
+  # The maximum size is all the usual items, plus the maximum size
+  # of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
+  MAX_LOCAL_HEADER_SIZE =  begin
+    4 + # signature
+    2 + # Version needed to extract
+    2 + # gp flags
+    2 + # storage mode
+    2 + # dos time
+    2 + # dos date
+    4 + # CRC32
+    4 + # Comp size
+    4 + # Uncomp size
+    2 + # Filename size
+    2 + # Extra fields size
+    0xFFFF + # Maximum filename size
+    0xFFFF   # Maximum extra fields size
+  end
+  SIZE_OF_USABLE_EOCD_RECORD = begin
+    4 + # Signature
+    2 + # Number of this disk
+    2 + # Number of the disk with the EOCD record
+    2 + # Number of entries in the central directory of this disk
+    2 + # Number of entries in the central directory total
+    4 + # Size of the central directory
+    4   # Start of the central directory offset
+  end
+  def num_files_and_central_directory_offset(file_io, eocd_offset)
+    seek(file_io, eocd_offset)
+    io = StringIO.new(read_n(file_io, SIZE_OF_USABLE_EOCD_RECORD))
+    assert_signature(io, 0x06054b50)
+    skip_ahead_2(io) # number_of_this_disk
+    skip_ahead_2(io) # number of the disk with the EOCD record
+    skip_ahead_2(io) # number of entries in the central directory of this disk
+    num_files = read_2b(io)   # number of entries in the central directory total
+    cdir_size = read_4b(io)   # size of the central directory
+    cdir_offset = read_4b(io) # start of central directorty offset
+    [num_files, cdir_offset, cdir_size]
+  end
+  private_constant :C_V, :C_v, :C_Qe, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
+    :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
+end

data/lib/zip_tricks/null_writer.rb CHANGED Viewed

@@ -2,6 +2,11 @@
 # write operations, but want to discard the data (like when
 # estimating the size of a ZIP)
 module ZipTricks::NullWriter
-  def <<(data); self; end
-  extend self
+  # @param data[String] the data to write
+  # @return [self]
+  def self.<<(data); self; end
+  # @param data[String] the data to write
+  # @return [Fixnum] the amount of data that was supposed to be written
+  def self.write(data); data.bytesize; end
 end

data/lib/zip_tricks/rack_body.rb CHANGED Viewed

@@ -9,13 +9,13 @@ class ZipTricks::RackBody
   # The archive will be automatically closed at the end of the block.
   #
   #     # Precompute the Content-Length ahead of time
-  #     content_length = ZipTricks::StoredSizeEstimator.perform_fake_archiving do | estimator |
-  #       estimator.add_stored_entry('large.tif', size=1289894)
+  #     content_length = ZipTricks::SizeEstimator.estimate do | estimator |
+  #       estimator.add_stored_entry(filename: 'large.tif', size: 1289894)
   #     end
   #
   #     # Prepare the response body. The block will only be called when the response starts to be written.
   #     body = ZipTricks::RackBody.new do | streamer |
-  #       streamer.add_stored_entry('large.tif', size=1289894, crc32=198210)
+  #       streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
   #       streamer << large_file.read(1024*1024) until large_file.eof?
   #       ...
   #     end

data/lib/zip_tricks/remote_io.rb CHANGED Viewed

@@ -1,9 +1,12 @@
 # An object that fakes just-enough of an IO to be dangerous
-# - or, more precisely, to be useful as a source for the RubyZip
-# central directory parser
+# - or, more precisely, to be useful as a source for the FileReader
+# central directory parser. Effectively we substitute an IO object
+# for an object that fetches parts of the remote file over HTTP using `Range:`
+# headers. The `RemoteIO` acts as an adapter between an object that performs the
+# actual fetches over HTTP and an object that expects a handful of IO methods to be
+# available.
 class ZipTricks::RemoteIO
-  # @param fetcher[#request_object_size, #request_range] an object that can fetch
+  # @param fetcher[#request_object_size, #request_range] an object that perform fetches
   def initialize(fetcher = :NOT_SET)
     @pos = 0
     @fetcher = fetcher
@@ -12,21 +15,29 @@ class ZipTricks::RemoteIO
   # Emulates IO#seek
   def seek(offset, mode = IO::SEEK_SET)
-    case mode
-      when IO::SEEK_SET
-        @remote_size ||= request_object_size
-        @pos = clamp(0, offset, @remote_size)
-      when IO::SEEK_END
-        @remote_size ||= request_object_size
-        @pos = clamp(0, @remote_size + offset, @remote_size)
-      else
-        raise Errno::ENOTSUP, "Seek mode #{mode.inspect} not supported"
-    end
+    raise "Unsupported read mode #{mode}" unless mode == IO::SEEK_SET
+    @remote_size ||= request_object_size
+    @pos = clamp(0, offset, @remote_size)
     0 # always return 0!
   end
-  # Emulates IO#read
-  def read(n_bytes = nil)
+  # Emulates IO#size.
+  #
+  # @return [Fixnum] the size of the remote resource
+  def size
+    @remote_size ||= request_object_size
+  end
+  # Emulates IO#read, but requires the number of bytes to read
+  # The method will raise if the number of bytes read from remote does
+  # not match the number requested. The read will be limited to the
+  # size of the remote resource relative to the current offset in the IO,
+  # so if you are at offset 0 in the IO of size 10, doing a `read(20)`
+  # will only return you 10 bytes of result, and not raise any exceptions.
+  #
+  # @param n_bytes[Fixnum, nil] how many bytes to read, or `nil` to read all the way to the end
+  # @return [String] the read bytes
+  def read(n_bytes=nil)
     @remote_size ||= request_object_size
     # If the resource is empty there is nothing to read
@@ -47,11 +58,10 @@ class ZipTricks::RemoteIO
     end
   end
-  # Returns the current pointer position within the IO.
-  # Not used by RubyZip but used in tests of our own
+  # Returns the current pointer position within the IO
   #
   # @return [Fixnum]
-  def pos
+  def tell
     @pos
   end

data/lib/zip_tricks/remote_uncap.rb CHANGED Viewed

@@ -2,6 +2,9 @@
 # downloading the entire file. The central directory provides the
 # offsets at which the actual file contents is located. You can then
 # use the `Range:` HTTP headers to download those entries separately.
+#
+# Please read the security warning in `FileReader` _VERY CAREFULLY_
+# before you use this module.
 class ZipTricks::RemoteUncap
   # Represents a file embedded within a remote ZIP archive
@@ -37,17 +40,14 @@ class ZipTricks::RemoteUncap
   def self.files_within_zip_at(uri)
     fetcher = new(uri)
     fake_io = ZipTricks::RemoteIO.new(fetcher)
-    dir = Zip::CentralDirectory.read_from_stream(fake_io)
-    dir.entries.map do | rubyzip_entry |
+    entries = ZipTricks.const_get(:FileReader).read_zip_structure(fake_io)
+    entries.map do | remote_entry |
       RemoteZipEntry.new do | entry |
-        entry.name = rubyzip_entry.name
-        entry.size_uncompressed = rubyzip_entry.size
-        entry.size_compressed = rubyzip_entry.compressed_size
-        entry.compression_method = rubyzip_entry.compression_method
-        entry.starts_at_offset = rubyzip_entry.local_header_offset + rubyzip_entry.calculate_local_header_size
-        entry.ends_at_offset = entry.starts_at_offset + rubyzip_entry.compressed_size
+        entry.name               = remote_entry.filename
+        entry.starts_at_offset   = remote_entry.compressed_data_offset
+        entry.size_uncompressed  = remote_entry.uncompressed_size
+        entry.size_compressed    = remote_entry.compressed_size
+        entry.compression_method = remote_entry.storage_mode
       end
     end
   end

data/lib/zip_tricks/size_estimator.rb ADDED Viewed

@@ -0,0 +1,64 @@
+# Helps to estimate archive sizes
+class ZipTricks::SizeEstimator
+  require_relative 'streamer'
+  # Used to mark a couple of methods public
+  class DetailStreamer < ::ZipTricks::Streamer
+    public :add_file_and_write_local_header, :write_data_descriptor_for_last_entry
+  end
+  private_constant :DetailStreamer
+  # Creates a new estimator with a Streamer object. Normally you should use
+  # `estimate` instead an not use this method directly.
+  def initialize(streamer)
+    @streamer = streamer
+  end
+  private :initialize
+  # Performs the estimate using fake archiving. It needs to know the sizes of the
+  # entries upfront. Usage:
+  #
+  #     expected_zip_size = SizeEstimator.estimate do | estimator |
+  #       estimator.add_stored_entry(filename: "file.doc", size: 898291)
+  #       estimator.add_compressed_entry(filename: "family.tif", uncompressed_size: 89281911, compressed_size: 121908)
+  #     end
+  #
+  # @return [Fixnum] the size of the resulting archive, in bytes
+  # @yield [SizeEstimator] the estimator
+  def self.estimate
+    output_io = ZipTricks::WriteAndTell.new(ZipTricks::NullWriter)
+    DetailStreamer.open(output_io) { |zip| yield(new(zip)) }
+    output_io.tell
+  end
+  # Add a fake entry to the archive, to see how big it is going to be in the end.
+  #
+  # @param filename [String] the name of the file (filenames are variable-width in the ZIP)
+  # @param size [Fixnum] size of the uncompressed entry
+  # @param use_data_descriptor[Boolean] whether the entry uses a postfix data descriptor to specify size
+  # @return self
+  def add_stored_entry(filename:, size:, use_data_descriptor: false)
+    udd = !!use_data_descriptor
+    @streamer.add_file_and_write_local_header(filename: filename, crc32: 0, storage_mode: 0,
+      compressed_size: size, uncompressed_size: size, use_data_descriptor: udd)
+    @streamer.simulate_write(size)
+    @streamer.write_data_descriptor_for_last_entry if udd
+    self
+  end
+  # Add a fake entry to the archive, to see how big it is going to be in the end.
+  #
+  # @param filename [String] the name of the file (filenames are variable-width in the ZIP)
+  # @param uncompressed_size [Fixnum] size of the uncompressed entry
+  # @param compressed_size [Fixnum] size of the compressed entry
+  # @param use_data_descriptor[Boolean] whether the entry uses a postfix data descriptor to specify size
+  # @return self
+  def add_compressed_entry(filename:, uncompressed_size:, compressed_size:, use_data_descriptor: false)
+    udd = !!use_data_descriptor
+    @streamer.add_file_and_write_local_header(filename: filename, crc32: 0, storage_mode: 8,
+      compressed_size: compressed_size, uncompressed_size: uncompressed_size, use_data_descriptor: udd)
+    @streamer.simulate_write(compressed_size)
+    @streamer.write_data_descriptor_for_last_entry if udd
+    self
+  end
+end