zip_tricks 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +5 -0
- data/.yardopts +1 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +20 -0
- data/README.md +145 -0
- data/Rakefile +51 -0
- data/lib/zip_tricks/block_deflate.rb +89 -0
- data/lib/zip_tricks/block_write.rb +40 -0
- data/lib/zip_tricks/manifest.rb +85 -0
- data/lib/zip_tricks/null_writer.rb +7 -0
- data/lib/zip_tricks/rack_body.rb +41 -0
- data/lib/zip_tricks/stored_size_estimator.rb +44 -0
- data/lib/zip_tricks/stream_crc32.rb +43 -0
- data/lib/zip_tricks/streamer.rb +175 -0
- data/lib/zip_tricks/write_and_tell.rb +33 -0
- data/lib/zip_tricks.rb +9 -0
- data/spec/spec_helper.rb +14 -0
- data/spec/zip_tricks/block_deflate_spec.rb +111 -0
- data/spec/zip_tricks/block_write_spec.rb +95 -0
- data/spec/zip_tricks/manifest_spec.rb +60 -0
- data/spec/zip_tricks/rack_body_spec.rb +34 -0
- data/spec/zip_tricks/stored_size_estimator_spec.rb +22 -0
- data/spec/zip_tricks/stream_crc32_spec.rb +38 -0
- data/spec/zip_tricks/streamer_spec.rb +253 -0
- data/spec/zip_tricks/war-and-peace.txt +10810 -0
- data/spec/zip_tricks/write_and_tell_spec.rb +43 -0
- data/zip_tricks.gemspec +90 -0
- metadata +192 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c0cda66fb0e3453ab3c239126c67686d124fcce5
|
4
|
+
data.tar.gz: 28327929e311c13ad556b85cef2ddf73ee1fc653
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5f5e227ce148f6d7468371f8d8653b8fa15331f0495e674f0e8972fa32d532865757acfb1c0051b147fe44fcbc81e0798821bd3c7cf1dd75e4e3e85852715e90
|
7
|
+
data.tar.gz: 5f455aed3939b423b1459e510a00cfc749701102e1dc714677fc74a2e2de6b9b18f7aebf23a09ffc68b5bd1c51bb627e22c305f1d1ffc0b9c933a4954f111828
|
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.travis.yml
ADDED
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup markdown
|
data/Gemfile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem 'rubyzip', '~> 1.1.7'
|
4
|
+
gem 'very_tiny_state_machine', '~> 2'
|
5
|
+
|
6
|
+
group :development do
|
7
|
+
gem 'rake', '~> 10.4'
|
8
|
+
gem "rspec", "~> 3.2.0", '< 3.3'
|
9
|
+
gem "rdoc", "~> 3.12"
|
10
|
+
gem "bundler", "~> 1.0"
|
11
|
+
gem "jeweler", "~> 2.0.1"
|
12
|
+
gem 'range_utils'
|
13
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2016 WeTransfer
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
# zip_tricks
|
2
|
+
|
3
|
+
[![Build Status](https://travis-ci.org/WeTransfer/zip_tricks.svg?branch=master)](https://travis-ci.org/WeTransfer/zip_tricks)
|
4
|
+
|
5
|
+
Makes Rubyzip sing, dance and play saxophone for streaming applications.
|
6
|
+
Spiritual successor to [zipline](https://github.com/fringd/zipline)
|
7
|
+
|
8
|
+
Requires Ruby 2.1+, rubyzip and a couple of other gems (all available to jRuby as well).
|
9
|
+
The library is composed of a loose set of modules which are described below.
|
10
|
+
|
11
|
+
## BlockDeflate
|
12
|
+
|
13
|
+
Deflate a byte stream in blocks of N bytes, optionally writing a terminator marker. This can be used to
|
14
|
+
compress a file in parts.
|
15
|
+
|
16
|
+
source_file = File.open('12_gigs.bin', 'rb')
|
17
|
+
compressed = Tempfile.new
|
18
|
+
# Will not compress everything in memory, but do it per chunk to spare memory. `compressed`
|
19
|
+
# will be written to at the end of each chunk.
|
20
|
+
ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file, compressed)
|
21
|
+
|
22
|
+
You can also do the same to parts that you will later concatenate together elsewhere, in that case
|
23
|
+
you need to skip the end marker:
|
24
|
+
|
25
|
+
compressed = Tempfile.new
|
26
|
+
ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb), compressed)
|
27
|
+
ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb), compressed)
|
28
|
+
ZipTricks::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb), compressed)
|
29
|
+
ZipTricks::BlockDeflate.write_terminator(compressed)
|
30
|
+
|
31
|
+
You can also elect to just compress strings in memory (to splice them later):
|
32
|
+
|
33
|
+
compressed_string = ZipTricks::BlockDeflate.deflate_chunk(big_string)
|
34
|
+
|
35
|
+
## Streamer
|
36
|
+
|
37
|
+
Is used to write a streaming ZIP file when you know the CRC32 for the raw files
|
38
|
+
and the sizes of these files upfront. This writes the local headers immediately, without having to
|
39
|
+
rewind the output IO. It also avoids using the local footers instead of headers, therefore permitting
|
40
|
+
Zip64-sized entries to be stored easily.
|
41
|
+
|
42
|
+
# io has to be an object that supports #<< and #tell
|
43
|
+
io = ... # can be a Tempfile, but can also be a BlockWrite adapter for, say, Rack
|
44
|
+
|
45
|
+
ZipTricks::Streamer.open(io) do | zip |
|
46
|
+
|
47
|
+
# raw_file is written "as is" (STORED mode)
|
48
|
+
zip.add_stored_entry("first-file.bin", raw_file.size, raw_file_crc32)
|
49
|
+
while blob = raw_file.read(2048)
|
50
|
+
zip << blob
|
51
|
+
end
|
52
|
+
|
53
|
+
# another_file is assumed to be block-deflated (DEFLATE mode)
|
54
|
+
zip.add_compressed_entry("another-file.bin", another_file_size, another_file_crc32, compressed_file.size)
|
55
|
+
while blob = compressed_file.read(2048)
|
56
|
+
zip << blob
|
57
|
+
end
|
58
|
+
|
59
|
+
# If you are storing block-deflated parts of a single file, you have to terminate the output
|
60
|
+
# with an end marker manually
|
61
|
+
zip.add_compressed_entry("compressed-in-parts.bin", another_file_size, another_file_crc32, deflated_size)
|
62
|
+
while blob = part1.read(2048)
|
63
|
+
zip << blob
|
64
|
+
end
|
65
|
+
while blob = part2.read(2048)
|
66
|
+
zip << blob
|
67
|
+
end
|
68
|
+
ZipTricks::BlockDeflate.write_terminator(zip)
|
69
|
+
|
70
|
+
... # more file writes etc.
|
71
|
+
end
|
72
|
+
|
73
|
+
## RackBody
|
74
|
+
|
75
|
+
Can be used to output a streamed ZIP archive directly through a Rack response body.
|
76
|
+
The block given to the constructor will be called when the response body will be read by the webserver,
|
77
|
+
and will receive a {ZipTricks::Streamer} as it's block argument. You can then add entries to the Streamer as usual.
|
78
|
+
The archive will be automatically closed at the end of the block.
|
79
|
+
|
80
|
+
# Precompute the Content-Length ahead of time
|
81
|
+
content_length = ZipTricks::StoredSizeEstimator.perform_fake_archiving do | estimator |
|
82
|
+
estimator.add_stored_entry('large.tif', size=1289894)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Prepare the response body. The block will only be called when the response starts to be written.
|
86
|
+
body = ZipTricks::RackBody.new do | streamer |
|
87
|
+
streamer.add_stored_entry('large.tif', size=1289894, crc32=198210)
|
88
|
+
streamer << large_file.read(1024*1024) until large_file.eof?
|
89
|
+
...
|
90
|
+
end
|
91
|
+
|
92
|
+
return [200, {'Content-Type' => 'binary/octet-stream', 'Content-Length' => content_length.to_s}, body]
|
93
|
+
|
94
|
+
## BlockWrite
|
95
|
+
|
96
|
+
Can be used as the destination IO, but will call the given block instead on every call to `:<<`.
|
97
|
+
This can be used to attach the output of the zip compressor to the Rack response body, or another
|
98
|
+
destination. For Rack/Rails just use RackBody since it sets this up for you.
|
99
|
+
|
100
|
+
io = ZipTricks::BlockWrite.new{|data| socket << data }
|
101
|
+
ZipTricks::Streamer.open(io) do | zip |
|
102
|
+
zip.add_stored_entry("first-file.bin", raw_file.size, raw_file_crc32)
|
103
|
+
....
|
104
|
+
end
|
105
|
+
|
106
|
+
## StoredSizeEstimator
|
107
|
+
|
108
|
+
Is used to predict the size of the ZIP archive after output. This can be used to generate, say, a `Content-Length` header,
|
109
|
+
or to predict the size of the resulting archive on the storage device. The size is estimated using a very fast "fake archiving"
|
110
|
+
procedure, so it computes the sizes of all the headers and the central directory very accurately.
|
111
|
+
|
112
|
+
expected_zip_archive_size = StoredSizeEstimator.perform_fake_archiving do | estimator |
|
113
|
+
estimator.add_stored_entry("file.doc", size=898291)
|
114
|
+
estimator.add_compressed_entry("family.JPG", size=89281911, compressed_size=89218)
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
## StreamCRC32
|
119
|
+
|
120
|
+
Computes the CRC32 value in a streaming fashion. Is slightly more convenient for the purpose than using the raw Zlib
|
121
|
+
library functions.
|
122
|
+
|
123
|
+
crc = ZipTricks::StreamCRC32.new
|
124
|
+
crc << large_file.read(1024 * 12) until large_file.eof?
|
125
|
+
...
|
126
|
+
|
127
|
+
crc.to_i # Returns the actual CRC32 value computed so far
|
128
|
+
...
|
129
|
+
# Append a known CRC32 value that has been computed previosuly
|
130
|
+
crc.append(precomputed_crc32, size_of_the_blob_computed_from)
|
131
|
+
|
132
|
+
## Contributing to zip_tricks
|
133
|
+
|
134
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
135
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
136
|
+
* Fork the project.
|
137
|
+
* Start a feature/bugfix branch.
|
138
|
+
* Commit and push until you are happy with your contribution.
|
139
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
140
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
141
|
+
|
142
|
+
## Copyright
|
143
|
+
|
144
|
+
Copyright (c) 2015 WeTransfer. See LICENSE.txt for
|
145
|
+
further details.
|
data/Rakefile
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
require_relative 'lib/zip_tricks'
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
|
17
|
+
gem.name = "zip_tricks"
|
18
|
+
gem.homepage = "http://github.com/wetransfer/zip_tricks"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.version = ZipTricks::VERSION
|
21
|
+
gem.summary = %Q{Makes rubyzip stream, for real}
|
22
|
+
gem.description = %Q{Makes rubyzip stream, for real}
|
23
|
+
gem.email = "me@julik.nl"
|
24
|
+
gem.authors = ["Julik Tarkhanov"]
|
25
|
+
# dependencies defined in Gemfile
|
26
|
+
end
|
27
|
+
Jeweler::RubygemsDotOrgTasks.new
|
28
|
+
|
29
|
+
require 'rspec/core'
|
30
|
+
require 'rspec/core/rake_task'
|
31
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
32
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
33
|
+
end
|
34
|
+
|
35
|
+
desc "Code coverage detail"
|
36
|
+
task :simplecov do
|
37
|
+
ENV['COVERAGE'] = "true"
|
38
|
+
Rake::Task['spec'].execute
|
39
|
+
end
|
40
|
+
|
41
|
+
task :default => :spec
|
42
|
+
|
43
|
+
require 'rdoc/task'
|
44
|
+
Rake::RDocTask.new do |rdoc|
|
45
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
46
|
+
|
47
|
+
rdoc.rdoc_dir = 'rdoc'
|
48
|
+
rdoc.title = "zip_tricks #{version}"
|
49
|
+
rdoc.rdoc_files.include('README*')
|
50
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
51
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# Permits Deflate compression in independent blocks. The workflow is as follows:
|
2
|
+
#
|
3
|
+
# * Run every block to compress through deflate_chunk, remove the header, footer and adler32 from the result
|
4
|
+
# * Write out the compressed block bodies (the ones deflate_chunk returns)to your output, in sequence
|
5
|
+
# * Write out the footer (\03\00)
|
6
|
+
#
|
7
|
+
# The resulting stream is guaranteed to be handled properly by all zip unarchiving tools, including the
|
8
|
+
# BOMArchiveHelper/ArchiveUtility on OSX.
|
9
|
+
#
|
10
|
+
# You could also build a compressor for Rubyzip using this module quite easily,
|
11
|
+
# even though this is outside the scope of the library.
|
12
|
+
#
|
13
|
+
# When you deflate the chunks separately, you need to write the end marker yourself (using `write_terminator`).
|
14
|
+
# If you just want to deflate a large IO's contents, use `deflate_in_blocks_and_terminate` to have the end marker
|
15
|
+
# written out for you.
|
16
|
+
module ZipTricks::BlockDeflate
|
17
|
+
DEFAULT_BLOCKSIZE = 1024*1024*5
|
18
|
+
END_MARKER = [3, 0].pack("C*")
|
19
|
+
VALID_COMPRESSIONS = (Zlib::DEFAULT_COMPRESSION..Zlib::BEST_COMPRESSION).to_a.freeze # Zlib::NO_COMPRESSION..
|
20
|
+
# Write the end marker (\x3\x0) to the given IO.
|
21
|
+
#
|
22
|
+
# `output_io` can also be a {ZipTricks::Streamer} to expedite ops.
|
23
|
+
#
|
24
|
+
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
25
|
+
# @return [Fixnum] number of bytes written to `output_io`
|
26
|
+
def self.write_terminator(output_io)
|
27
|
+
output_io << END_MARKER
|
28
|
+
END_MARKER.bytesize
|
29
|
+
end
|
30
|
+
|
31
|
+
# Compress a given binary string and flush the deflate stream at byte boundary.
|
32
|
+
# The returned string can be spliced into another deflate stream.
|
33
|
+
#
|
34
|
+
# @param bytes [String] Bytes to compress
|
35
|
+
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
36
|
+
# @return [String] compressed bytes
|
37
|
+
def self.deflate_chunk(bytes, level: Zlib::DEFAULT_COMPRESSION)
|
38
|
+
raise "Invalid Zlib compression level #{level}" unless VALID_COMPRESSIONS.include?(level)
|
39
|
+
z = Zlib::Deflate.new(level)
|
40
|
+
compressed_blob = z.deflate(bytes, Zlib::SYNC_FLUSH)
|
41
|
+
compressed_blob << z.finish
|
42
|
+
z.close
|
43
|
+
|
44
|
+
# Remove the header (2 bytes), the [3,0] end marker and the adler (4 bytes)
|
45
|
+
compressed_blob[2...-6]
|
46
|
+
end
|
47
|
+
|
48
|
+
# Compress the contents of input_io into output_io, in blocks
|
49
|
+
# of block_size. Aligns the parts so that they can be concatenated later.
|
50
|
+
# Writes deflate end marker (\x3\x0) into `output_io` as the final step, so
|
51
|
+
# the contents of `output_io` can be spliced verbatim into a ZIP archive.
|
52
|
+
#
|
53
|
+
# Once the write completes, no more parts for concatenation should be written to
|
54
|
+
# the same stream.
|
55
|
+
#
|
56
|
+
# `output_io` can also be a {ZipTricks::Streamer} to expedite ops.
|
57
|
+
#
|
58
|
+
# @param input_io [IO] the stream to read from (should respond to `:read`)
|
59
|
+
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
60
|
+
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
61
|
+
# @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
62
|
+
# @return [Fixnum] number of bytes written to `output_io`
|
63
|
+
def self.deflate_in_blocks_and_terminate(input_io, output_io, level: Zlib::DEFAULT_COMPRESSION, block_size: DEFAULT_BLOCKSIZE)
|
64
|
+
bytes_written = deflate_in_blocks(input_io, output_io, level: level, block_size: block_size)
|
65
|
+
bytes_written + write_terminator(output_io)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Compress the contents of input_io into output_io, in blocks
|
69
|
+
# of block_size. Align the parts so that they can be concatenated later.
|
70
|
+
# Will not write the deflate end marker (\x3\x0) so more parts can be written
|
71
|
+
# later and succesfully read back in provided the end marker wll be written.
|
72
|
+
#
|
73
|
+
# `output_io` can also be a {ZipTricks::Streamer} to expedite ops.
|
74
|
+
#
|
75
|
+
# @param input_io [IO] the stream to read from (should respond to `:read`)
|
76
|
+
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
77
|
+
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
78
|
+
# @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
79
|
+
# @return [Fixnum] number of bytes written to `output_io`
|
80
|
+
def self.deflate_in_blocks(input_io, output_io, level: Zlib::DEFAULT_COMPRESSION, block_size: DEFAULT_BLOCKSIZE)
|
81
|
+
bytes_written = 0
|
82
|
+
while block = input_io.read(block_size)
|
83
|
+
deflated = deflate_chunk(block, level: level)
|
84
|
+
output_io << deflated
|
85
|
+
bytes_written += deflated.bytesize
|
86
|
+
end
|
87
|
+
bytes_written
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Stashes a block given by the Rack webserver when calling each() on a body, and calls
|
2
|
+
# that block every time it is written to using :<< (shovel). Poses as an IO for rubyzip.
|
3
|
+
class ZipTricks::BlockWrite
|
4
|
+
# The block is the block given to each() of the Rack body, or other block you want
|
5
|
+
# to receive the string chunks written by the zip compressor.
|
6
|
+
def initialize(&block)
|
7
|
+
@block = block
|
8
|
+
end
|
9
|
+
|
10
|
+
# Make sure those methods raise outright
|
11
|
+
[:seek, :pos=, :to_s].each do |m|
|
12
|
+
define_method(m) do |*args|
|
13
|
+
raise "#{m} not supported - this IO adapter is non-rewindable"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Every time this object gets written to, call the Rack body each() block with the bytes given instead.
|
18
|
+
def <<(buf)
|
19
|
+
return if buf.nil?
|
20
|
+
|
21
|
+
# Ensure we ALWAYS write in binary encoding.
|
22
|
+
encoded = if buf.encoding != Encoding::BINARY
|
23
|
+
# If we got a frozen string we can't force_encoding on it
|
24
|
+
buf.force_encoding(Encoding::BINARY) rescue buf.dup.force_encoding(Encoding::BINARY)
|
25
|
+
else
|
26
|
+
buf
|
27
|
+
end
|
28
|
+
|
29
|
+
# buf.dup.force_encoding(Encoding::BINARY)
|
30
|
+
return if encoded.bytesize.zero? # Zero-size output has a special meaning when using chunked encoding
|
31
|
+
|
32
|
+
@block.call(encoded)
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
# Does nothing
|
37
|
+
def close
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Helps to estimate archive sizes
|
2
|
+
class ZipTricks::Manifest < Struct.new(:zip_streamer, :io, :part_list)
|
3
|
+
|
4
|
+
# Describes a span within the ZIP bytestream
|
5
|
+
class ZipSpan < Struct.new(:part_type, :byte_range_in_zip, :filename, :additional_metadata)
|
6
|
+
end
|
7
|
+
|
8
|
+
# Builds an array of spans within the ZIP file and computes the size of the resulting archive in bytes.
|
9
|
+
#
|
10
|
+
# zip_spans, bytesize = Manifest.build do | b |
|
11
|
+
# b.add_stored_entry(name: "file.doc", size: 898291)
|
12
|
+
# b.add_compressed_entry(name: "family.tif", size: 89281911, compressed_size: 121908)
|
13
|
+
# end
|
14
|
+
# bytesize #=> ... (Fixnum or Bignum)
|
15
|
+
# zip_spans[0] #=> Manifest::ZipSpan(part_type: :entry_header, byte_range_in_zip: 0..44, ...)
|
16
|
+
# zip_spans[-1] #=> Manifest::ZipSpan(part_type: :central_directory, byte_range_in_zip: 776721..898921, ...)
|
17
|
+
#
|
18
|
+
# @return [Array<Array<ZipSpan>, Fixnum>] an array of byte spans within the final ZIP, and the total size of the archive
|
19
|
+
# @yield [Manifest] the manifest object you can add entries to
|
20
|
+
def self.build
|
21
|
+
output_io = ZipTricks::WriteAndTell.new(ZipTricks::NullWriter)
|
22
|
+
part_list = []
|
23
|
+
last_range_end = 0
|
24
|
+
ZipTricks::Streamer.open(output_io) do | zip_streamer |
|
25
|
+
manifest = new(zip_streamer, output_io, part_list)
|
26
|
+
yield(manifest)
|
27
|
+
last_range_end = part_list[-1].byte_range_in_zip.end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Record the position of the central directory
|
31
|
+
directory_location = (last_range_end + 1)..(output_io.tell - 1)
|
32
|
+
part_list << ZipSpan.new(:central_directory, directory_location, :central_directory, nil)
|
33
|
+
|
34
|
+
[part_list, output_io.tell]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
38
|
+
#
|
39
|
+
# @param name [String] the name of the file (filenames are variable-width in the ZIP)
|
40
|
+
# @param size_uncompressed [Fixnum] size of the uncompressed entry
|
41
|
+
# @param segment_info[Object] if you need to save anything to retrieve later from the Manifest,
|
42
|
+
# pass it here (like the URL of the file)
|
43
|
+
# @return self
|
44
|
+
def add_stored_entry(name:, size_uncompressed:, segment_info: nil)
|
45
|
+
register_part(:entry_header, name, segment_info) do
|
46
|
+
zip_streamer.add_stored_entry(name, size_uncompressed, C_fake_crc)
|
47
|
+
end
|
48
|
+
|
49
|
+
register_part(:entry_body, name, segment_info) do
|
50
|
+
zip_streamer.simulate_write(size_uncompressed)
|
51
|
+
end
|
52
|
+
|
53
|
+
self
|
54
|
+
end
|
55
|
+
|
56
|
+
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
57
|
+
#
|
58
|
+
# @param name [String] the name of the file (filenames are variable-width in the ZIP)
|
59
|
+
# @param size_uncompressed [Fixnum] size of the uncompressed entry
|
60
|
+
# @param size_compressed [Fixnum] size of the compressed entry
|
61
|
+
# @param segment_info[Object] if you need to save anything to retrieve later from the Manifest,
|
62
|
+
# pass it here (like the URL of the file)
|
63
|
+
# @return self
|
64
|
+
def add_compressed_entry(name:, size_uncompressed:, size_compressed:, segment_info: nil)
|
65
|
+
register_part(:entry_header, name, segment_info) do
|
66
|
+
zip_streamer.add_compressed_entry(name, size_uncompressed, C_fake_crc, size_compressed)
|
67
|
+
end
|
68
|
+
|
69
|
+
register_part(:entry_body, name, segment_info) do
|
70
|
+
zip_streamer.simulate_write(size_compressed)
|
71
|
+
end
|
72
|
+
|
73
|
+
self
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
C_fake_crc = Zlib.crc32('Mary had a little lamb')
|
79
|
+
private_constant :C_fake_crc
|
80
|
+
|
81
|
+
def register_part(span_type, filename, metadata)
|
82
|
+
before, _, after = io.tell, yield, (io.tell - 1)
|
83
|
+
part_list << ZipSpan.new(span_type, (before..after), filename, metadata)
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Can be used as a Rack response body directly. Will yield
|
2
|
+
# a {ZipTricks::Streamer} for adding entries to the archive and writing
|
3
|
+
# zip entry bodies.
|
4
|
+
class ZipTricks::RackBody
|
5
|
+
# Prepares a new Rack response body with a Zip output stream.
|
6
|
+
# The block given to the constructor will be called when the response
|
7
|
+
# body will be read by the webserver, and will receive a {ZipTricks::Streamer}
|
8
|
+
# as it's block argument. You can then add entries to the Streamer as usual.
|
9
|
+
# The archive will be automatically closed at the end of the block.
|
10
|
+
#
|
11
|
+
# # Precompute the Content-Length ahead of time
|
12
|
+
# content_length = ZipTricks::StoredSizeEstimator.perform_fake_archiving do | estimator |
|
13
|
+
# estimator.add_stored_entry('large.tif', size=1289894)
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# # Prepare the response body. The block will only be called when the response starts to be written.
|
17
|
+
# body = ZipTricks::RackBody.new do | streamer |
|
18
|
+
# streamer.add_stored_entry('large.tif', size=1289894, crc32=198210)
|
19
|
+
# streamer << large_file.read(1024*1024) until large_file.eof?
|
20
|
+
# ...
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# return [200, {'Content-Type' => 'binary/octet-stream', 'Content-Length' => content_length.to_s}, body]
|
24
|
+
def initialize(&blk)
|
25
|
+
@archiving_block = blk
|
26
|
+
end
|
27
|
+
|
28
|
+
# Connects a {ZipTricks::BlockWrite} to the Rack webserver output,
|
29
|
+
# and calls the proc given to the constructor with a {ZipTricks::Streamer}
|
30
|
+
# for archive writing.
|
31
|
+
def each(&body_chunk_block)
|
32
|
+
fake_io = ZipTricks::BlockWrite.new(&body_chunk_block)
|
33
|
+
ZipTricks::Streamer.open(fake_io, &@archiving_block)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Does nothing because nothing has to be deallocated or canceled
|
37
|
+
# even if the zip output is incomplete. The archive gets closed
|
38
|
+
# automatically as part of {ZipTricks::Streamer.open}
|
39
|
+
def close
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Helps to estimate archive sizes
|
2
|
+
class ZipTricks::StoredSizeEstimator < Struct.new(:manifest)
|
3
|
+
|
4
|
+
# Performs the estimate using fake archiving. It needs to know the sizes of the
|
5
|
+
# entries upfront. Usage:
|
6
|
+
#
|
7
|
+
# expected_zip_size = StoredSizeEstimator.perform_fake_archiving do | estimator |
|
8
|
+
# estimator.add_stored_entry("file.doc", size=898291)
|
9
|
+
# estimator.add_compressed_entry("family.tif", size=89281911, compressed_size=121908)
|
10
|
+
# end
|
11
|
+
#
|
12
|
+
# @return [Fixnum] the size of the resulting archive, in bytes
|
13
|
+
# @yield [StoredSizeEstimator] the estimator
|
14
|
+
def self.perform_fake_archiving
|
15
|
+
_, bytes = ZipTricks::Manifest.build do |manifest|
|
16
|
+
# The API for this class uses positional arguments. The Manifest API
|
17
|
+
# uses keyword arguments.
|
18
|
+
call_adapter = new(manifest)
|
19
|
+
yield(call_adapter)
|
20
|
+
end
|
21
|
+
bytes
|
22
|
+
end
|
23
|
+
|
24
|
+
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
25
|
+
#
|
26
|
+
# @param name [String] the name of the file (filenames are variable-width in the ZIP)
|
27
|
+
# @param size_uncompressed [Fixnum] size of the uncompressed entry
|
28
|
+
# @return self
|
29
|
+
def add_stored_entry(name, size_uncompressed)
|
30
|
+
manifest.add_stored_entry(name: name, size_uncompressed: size_uncompressed)
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
35
|
+
#
|
36
|
+
# @param name [String] the name of the file (filenames are variable-width in the ZIP)
|
37
|
+
# @param size_uncompressed [Fixnum] size of the uncompressed entry
|
38
|
+
# @param size_compressed [Fixnum] size of the compressed entry
|
39
|
+
# @return self
|
40
|
+
def add_compressed_entry(name, size_uncompressed, size_compressed)
|
41
|
+
manifest.add_compressed_entry(name: name, size_uncompressed: size_uncompressed, size_compressed: size_compressed)
|
42
|
+
self
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# A simple stateful class for keeping track of a CRC32 value through multiple writes
|
2
|
+
class ZipTricks::StreamCRC32
|
3
|
+
# Compute a CRC32 value from an IO object. The object should respond to `read` and `eof?`
|
4
|
+
#
|
5
|
+
# @param io[IO] the IO to read the data from
|
6
|
+
# @return [Fixnum] the computed CRC32 value
|
7
|
+
def self.from_io(io)
|
8
|
+
crc = new
|
9
|
+
crc << io.read(1024 * 512) until io.eof?
|
10
|
+
crc.to_i
|
11
|
+
end
|
12
|
+
|
13
|
+
# Creates a new streaming CRC32 calculator
|
14
|
+
def initialize
|
15
|
+
@crc = Zlib.crc32('')
|
16
|
+
end
|
17
|
+
|
18
|
+
# Append data to the CRC32. Updates the contained CRC32 value in place.
|
19
|
+
#
|
20
|
+
# @param blob[String] the string to compute the CRC32 from
|
21
|
+
# @return [self]
|
22
|
+
def <<(blob)
|
23
|
+
@crc = Zlib.crc32_combine(@crc, Zlib.crc32(blob), blob.bytesize)
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns the CRC32 value computed so far
|
28
|
+
#
|
29
|
+
# @return crc[Fixnum] the updated CRC32 value for all the blobs so far
|
30
|
+
def to_i
|
31
|
+
@crc
|
32
|
+
end
|
33
|
+
|
34
|
+
# Appends a known CRC32 value to the current one, and combines the
|
35
|
+
# contained CRC32 value in-place.
|
36
|
+
#
|
37
|
+
# @param crc32[Fixnum] the CRC32 value to append
|
38
|
+
# @param blob_size[Fixnum] the size of the daata the `crc32` is computed from
|
39
|
+
# @return crc[Fixnum] the updated CRC32 value for all the blobs so far
|
40
|
+
def append(crc32, blob_size)
|
41
|
+
@crc = Zlib.crc32_combine(@crc, crc32, blob_size)
|
42
|
+
end
|
43
|
+
end
|