RubyGems - zip_tricks - Versions diffs - 2.8.1 → 3.0.0 - Mend

zip_tricks 2.8.1 → 3.0.0

Files changed (38) hide show

checksums.yaml +4 -4
data/Gemfile +3 -3
data/IMPLEMENTATION_DETAILS.md +2 -10
data/README.md +62 -59
data/examples/archive_size_estimate.rb +4 -4
data/examples/rack_application.rb +3 -5
data/lib/zip_tricks/block_deflate.rb +21 -0
data/lib/zip_tricks/file_reader.rb +491 -0
data/lib/zip_tricks/null_writer.rb +7 -2
data/lib/zip_tricks/rack_body.rb +3 -3
data/lib/zip_tricks/remote_io.rb +30 -20
data/lib/zip_tricks/remote_uncap.rb +10 -10
data/lib/zip_tricks/size_estimator.rb +64 -0
data/lib/zip_tricks/stream_crc32.rb +2 -2
data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
data/lib/zip_tricks/streamer/entry.rb +21 -0
data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
data/lib/zip_tricks/streamer/writable.rb +20 -0
data/lib/zip_tricks/streamer.rb +172 -66
data/lib/zip_tricks/zip_writer.rb +346 -0
data/lib/zip_tricks.rb +1 -4
data/spec/spec_helper.rb +1 -38
data/spec/zip_tricks/file_reader_spec.rb +47 -0
data/spec/zip_tricks/rack_body_spec.rb +2 -2
data/spec/zip_tricks/remote_io_spec.rb +8 -20
data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
data/spec/zip_tricks/size_estimator_spec.rb +31 -0
data/spec/zip_tricks/streamer_spec.rb +59 -36
data/spec/zip_tricks/zip_writer_spec.rb +408 -0
data/zip_tricks.gemspec +20 -14
metadata +33 -16
data/lib/zip_tricks/manifest.rb +0 -85
data/lib/zip_tricks/microzip.rb +0 -339
data/lib/zip_tricks/stored_size_estimator.rb +0 -44
data/spec/zip_tricks/manifest_spec.rb +0 -60
data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
data/spec/zip_tricks/microzip_spec.rb +0 -546
data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 05fb68904433a8f06dd50d668c29d3c0cf8780f2
-  data.tar.gz: af0dfe83697cc9cd6bb4ed1a8714cffbb3ceedd7
+  metadata.gz: 2bc3917bd654f3fca15ae6bce1769eb0fa45dd12
+  data.tar.gz: 12f495895ed59e23cc89bf7b86b91bd9422c1608
 SHA512:
-  metadata.gz: 4b583b966eb87b502f428bd43d7bd9d9c800112bb3c256aa63a8cda6dcfb02052cf653de2df4022e23f49db37e157d81d336b0db8c8527c88f6e2de2e8974172
-  data.tar.gz: 22f263d209256d28c552c6fe04b9981d1181eb7f062233f7bc407146987faf2d1f14db9b8902ddf89d4038680839971291c5e47d8b57cbc8ed248a9360bf431b
+  metadata.gz: 66fe048eddc9c00ed459e02e30ba8607a76cf297de88bf595694d14eec42e7b1075752b42fae8e86dcfe06ca2814db985452ed0244f803207491b100baed1041
+  data.tar.gz: 717bd4d51c597fefdd77bfe0f7df3c41f2ea8ae0122f0b5330ab6cf63b6183370e5dd8bbc19b868a6f52ae53a84147bfdb8bee54bbc44b63df563074192f7ec8

data/Gemfile CHANGED Viewed

@@ -1,13 +1,13 @@
 source "http://rubygems.org"
-gem 'rubyzip', '~> 1.1', '>= 1.1.7'
-gem 'very_tiny_state_machine', '~> 2'
 group :development do
+  gem 'rubyzip', '~> 1.1', '>= 1.1.7'
+  gem 'terminal-table'
   gem 'range_utils'
   gem 'rack', '~> 1.6' # For Jeweler
   gem 'rake', '~> 10.4'
   gem "rspec", "~> 3.2.0", '< 3.3'
+  gem 'coderay'
   gem "yard", "~> 0.8"
   gem "bundler", "~> 1.0"
   gem "jeweler", "~> 2.0.1"

data/IMPLEMENTATION_DETAILS.md CHANGED Viewed

@@ -24,16 +24,8 @@ Data descriptors permit you to generate "postfix" ZIP files (where you write the
 know the CRC32 and the file size upfront, then write the compressed file data, and only then - once you know what your CRC32,
 compressed and uncompressed sizes are etc. - write them into a data descriptor that follows the file data.
-The streamer does _not_ use data descriptors, because their use [is problematic](https://github.com/thejoshwolfe/yazl/issues/13)
-with the 7Zip version that we want to support. Or rather - not the use of data descriptors themselves, but the use of the GP flag
-bit 3 that trips up that version of 7Zip. If we were to use data descriptors, we would have to up the minimum supported version
-of 7Zip.
-That means, in turn, that **to use the ZipTricks streamer you have to know the CRC32 and the sizes of the compressed/uncompressed
-file upfront.** So you have to precompute them in some way. To do that, you can use `BlockDeflate` to precompress the file in
-parallel, and `StreamCRC32` to compute the CRC checksum, before feeding them to the ZIP writer.
-This approach might be reconsidered in the future.
+The streamer has optional support for data descriptors. Their use can apparently [ be problematic](https://github.com/thejoshwolfe/yazl/issues/13)
+with the 7Zip version that we want to support, but in our tests everything worked fine.
 For more info see https://github.com/thejoshwolfe/yazl#general-purpose-bit-flag

data/README.md CHANGED Viewed

@@ -2,77 +2,81 @@
 [![Build Status](https://travis-ci.org/WeTransfer/zip_tricks.svg?branch=master)](https://travis-ci.org/WeTransfer/zip_tricks)
-Makes Rubyzip sing, dance and play saxophone for streaming applications.
+Allows streaming, non-rewinding ZIP file output from Ruby.
 Spiritual successor to [zipline](https://github.com/fringd/zipline)
-Requires Ruby 2.1+, rubyzip and a couple of other gems (all available to jRuby as well).
-The library is composed of a loose set of modules.
+Requires Ruby 2.1+ syntax support and a working zlib (all available to jRuby as well).
-## Usage by example
+## Create a ZIP file without size estimation, compress on-the-fly)
-Check out the `examples/` directory at the root of the project. This will give you a good idea
-of various use cases the library supports.
+When you compress on the fly and use data descriptors it is not really possible to compute the file size upfront.
+But it is very likely to yield good compression - especially if you send things like CSV files.
-## BlockDeflate
+    out = my_tempfile # can also be a socket
+    ZipTricks::Streamer.open(out) do |zip|
+      zip.write_stored_file('mov.mp4.txt') do |sink|
+        File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
+      end
+      zip.write_deflated_file('long-novel.txt') do |sink|
+        File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
+      end
+    end
-Deflate a byte stream in blocks of N bytes, optionally writing a terminator marker. This can be used to
-compress a file in parts.
+## Send the same ZIP file from a Rack response
-    source_file = File.open('12_gigs.bin', 'rb')
-    compressed = Tempfile.new
-    # Will not compress everything in memory, but do it per chunk to spare memory. `compressed`
-    # will be written to at the end of each chunk.
-    ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file, compressed)
+Create a `RackBody` object and give it's constructor a block that adds files.
+The block will only be called when actually sending the response to the client
+(unless you are using a buffering Rack webserver, such as Webrick).
-You can also do the same to parts that you will later concatenate together elsewhere, in that case
-you need to skip the end marker:
+    body = ZipTricks::RackBody.new do | zip |
+      zip.write_stored_file('mov.mp4') do |sink| # Those MPEG4 files do not compress that well
+        File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
+      end
+      zip.write_deflated_file('long-novel.txt') do |sink|
+        File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
+      end
+    end
+    [200, {'Transfer-Encoding' => 'chunked'}, body]
-    compressed = Tempfile.new
-    ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb), compressed)
-    ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb), compressed)
-    ZipTricks::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb), compressed)
-    ZipTricks::BlockDeflate.write_terminator(compressed)
+## Send a ZIP file of known size, with correct headers
-You can also elect to just compress strings in memory (to splice them later):
+Use the `SizeEstimator` to compute the correct size of the resulting archive.
-    compressed_string = ZipTricks::BlockDeflate.deflate_chunk(big_string)
+    zip_body = ZipTricks::RackBody.new do | zip |
+      zip.add_stored_entry(filename: "myfile1.bin", size: 9090821, crc32: 12485)
+      zip << read_file('myfile1.bin')
+      zip.add_stored_entry(filename: "myfile2.bin", size: 458678, crc32: 89568)
+      zip << read_file('myfile2.bin')
+    end
+    bytesize = ZipTricks::SizeEstimator.estimate do |z|
+     z.add_stored_entry(filename: 'myfile1.bin', size: 9090821)
+     z.add_stored_entry(filename: 'myfile2.bin', size: 458678)
+    end
+    [200, {'Content-Length' => bytesize.to_s}, zip_body]
-## Streamer
+## Other usage examples
-Is used to write a streaming ZIP file when you know the CRC32 for the raw files
-and the sizes of these files upfront. This writes the local headers immediately, without having to
-rewind the output IO. It also avoids using the local footers instead of headers, therefore permitting
-Zip64-sized entries to be stored easily.
+Check out the `examples/` directory at the root of the project. This will give you a good idea
+of various use cases the library supports.
-    # io has to be an object that supports #<< and #tell
-    io = ... # can be a Tempfile, but can also be a BlockWrite adapter for, say, Rack
+## Writing ZIP files using the Streamer bypass
+You do not have to "feed" all the contents of the files you put in the archive through the Streamer object.
+If the write destination for your use case is a `Socket` (say, you are writing using Rack hijack) and you know
+the metadata of the file upfront (the CRC32 of the uncompressed file and the sizes), you can write directly
+to that socket using some accelerated writing technique, and only use the Streamer to write out the ZIP metadata.
+    # io has to be an object that supports #<<
     ZipTricks::Streamer.open(io) do | zip |
-      # raw_file is written "as is" (STORED mode)
+      # raw_file is written "as is" (STORED mode).
+      # Write the local file header first..
       zip.add_stored_entry("first-file.bin", raw_file.size, raw_file_crc32)
-      while blob = raw_file.read(2048)
-        zip << blob
-      end
-      # another_file is assumed to be block-deflated (DEFLATE mode)
-      zip.add_compressed_entry("another-file.bin", another_file_size, another_file_crc32, compressed_file.size)
-      while blob = compressed_file.read(2048)
-        zip << blob
-      end
-      # If you are storing block-deflated parts of a single file, you have to terminate the output
-      # with an end marker manually
-      zip.add_compressed_entry("compressed-in-parts.bin", another_file_size, another_file_crc32, deflated_size)
-      while blob = part1.read(2048)
-        zip << blob
-      end
-      while blob = part2.read(2048)
-        zip << blob
-      end
-      ZipTricks::BlockDeflate.write_terminator(zip)
+      # then send the actual file contents bypassing the Streamer interface
+      io.sendfile(my_temp_file)
-      ... # more file writes etc.
+      # ...and then adjust the ZIP offsets within the Streamer
+      zip.simulate_write(my_temp_file.size)
     end
 ## RackBody
@@ -83,7 +87,7 @@ and will receive a {ZipTricks::Streamer} as it's block argument. You can then ad
 The archive will be automatically closed at the end of the block.
     # Precompute the Content-Length ahead of time
-    content_length = ZipTricks::StoredSizeEstimator.perform_fake_archiving do | estimator |
+    content_length = ZipTricks::SizeEstimator.estimate do | estimator |
       estimator.add_stored_entry('large.tif', size=1289894)
     end
@@ -94,7 +98,7 @@ The archive will be automatically closed at the end of the block.
       ...
     end
-    return [200, {'Content-Type' => 'binary/octet-stream', 'Content-Length' => content_length.to_s}, body]
+    [200, {'Content-Type' => 'binary/octet-stream', 'Content-Length' => content_length.to_s}, body]
 ## BlockWrite
@@ -108,13 +112,13 @@ destination. For Rack/Rails just use RackBody since it sets this up for you.
       ....
     end
-## StoredSizeEstimator
+## SizeEstimator
 Is used to predict the size of the ZIP archive after output. This can be used to generate, say, a `Content-Length` header,
 or to predict the size of the resulting archive on the storage device. The size is estimated using a very fast "fake archiving"
 procedure, so it computes the sizes of all the headers and the central directory very accurately.
-    expected_zip_archive_size = StoredSizeEstimator.perform_fake_archiving do | estimator |
+    expected_zip_archive_size = SizeEstimator.estimate do | estimator |
       estimator.add_stored_entry("file.doc", size=898291)
       estimator.add_compressed_entry("family.JPG", size=89281911, compressed_size=89218)
     end
@@ -146,5 +150,4 @@ library functions.
 ## Copyright
-Copyright (c) 2015 WeTransfer. See LICENSE.txt for
-further details.
+Copyright (c) 2016 WeTransfer. See LICENSE.txt for further details.

data/examples/archive_size_estimate.rb CHANGED Viewed

@@ -3,11 +3,11 @@ require_relative '../lib/zip_tricks'
 # Predict how large a ZIP file is going to be without having access to the actual
 # file contents, but using just the filenames (influences the file size) and the size
 # of the files
-zip_archive_size_in_bytes = ZipTricks::StoredSizeEstimator.perform_fake_archiving do |zip|
+zip_archive_size_in_bytes = ZipTricks::SizeEstimator.estimate do |zip|
   # Pretend we are going to make a ZIP file which contains a few
   # MP4 files (those do not compress all too well)
-  zip.add_stored_entry("MOV_1234.MP4", 898090)
-  zip.add_stored_entry("MOV_1235.MP4", 7855126)
+  zip.add_stored_entry(filename: "MOV_1234.MP4", size: 898090)
+  zip.add_stored_entry(filename: "MOV_1235.MP4", size: 7855126)
 end
-zip_archive_size_in_bytes #=> 8753438
+puts zip_archive_size_in_bytes #=> 8753467

data/examples/rack_application.rb CHANGED Viewed

@@ -27,7 +27,7 @@ class ZipDownload
     # the user that the download stalled or was aborted in-flight.
     # Note that using the size estimator here does _not_ read or compress
     # your original file, so it is very fast.
-    size = ZipTricks::StoredSizeEstimator.perform_fake_archiving do |ar|
+    size = ZipTricks::SizeEstimator.estimate do |ar|
       ar.add_stored_entry(filename, f.size)
     end
@@ -37,13 +37,11 @@ class ZipDownload
       begin
         # We are adding only one file to the ZIP here, but you could do that
         # with an arbitrary number of files of course.
-        zip.add_stored_entry(filename, f.size, crc32)
+        zip.add_stored_entry(filename: filename, size: f.size, crc32: crc32)
         # Write the contents of the file. It is stored, so the writes go directly
         # to the Rack output, bypassing any RubyZip deflaters/compressors. In fact you
         # are yielding the "blob" string here directly to the Rack server handler.
-        while blob = f.read(1024 * 128)
-          zip << blob
-        end
+        IO.copy_stream(f, zip)
       ensure
         f.close # Make sure the opened file we read from gets closed
       end

data/lib/zip_tricks/block_deflate.rb CHANGED Viewed

@@ -13,6 +13,27 @@
 # When you deflate the chunks separately, you need to write the end marker yourself (using `write_terminator`).
 # If you just want to deflate a large IO's contents, use `deflate_in_blocks_and_terminate` to have the end marker
 # written out for you.
+#
+# Basic usage to compress a file in parts:
+#
+#     source_file = File.open('12_gigs.bin', 'rb')
+#     compressed = Tempfile.new
+#     # Will not compress everything in memory, but do it per chunk to spare memory. `compressed`
+#     # will be written to at the end of each chunk.
+#     ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file, compressed)
+#
+# You can also do the same to parts that you will later concatenate together elsewhere, in that case
+# you need to skip the end marker:
+#
+#     compressed = Tempfile.new
+#     ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb), compressed)
+#     ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb), compressed)
+#     ZipTricks::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb), compressed)
+#     ZipTricks::BlockDeflate.write_terminator(compressed)
+#
+# You can also elect to just compress strings in memory (to splice them later):
+#
+#     compressed_string = ZipTricks::BlockDeflate.deflate_chunk(big_string)
 module ZipTricks::BlockDeflate
   DEFAULT_BLOCKSIZE = 1024*1024*5
   END_MARKER = [3, 0].pack("C*")