zip_tricks 2.8.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/IMPLEMENTATION_DETAILS.md +2 -10
- data/README.md +62 -59
- data/examples/archive_size_estimate.rb +4 -4
- data/examples/rack_application.rb +3 -5
- data/lib/zip_tricks/block_deflate.rb +21 -0
- data/lib/zip_tricks/file_reader.rb +491 -0
- data/lib/zip_tricks/null_writer.rb +7 -2
- data/lib/zip_tricks/rack_body.rb +3 -3
- data/lib/zip_tricks/remote_io.rb +30 -20
- data/lib/zip_tricks/remote_uncap.rb +10 -10
- data/lib/zip_tricks/size_estimator.rb +64 -0
- data/lib/zip_tricks/stream_crc32.rb +2 -2
- data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
- data/lib/zip_tricks/streamer/entry.rb +21 -0
- data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
- data/lib/zip_tricks/streamer/writable.rb +20 -0
- data/lib/zip_tricks/streamer.rb +172 -66
- data/lib/zip_tricks/zip_writer.rb +346 -0
- data/lib/zip_tricks.rb +1 -4
- data/spec/spec_helper.rb +1 -38
- data/spec/zip_tricks/file_reader_spec.rb +47 -0
- data/spec/zip_tricks/rack_body_spec.rb +2 -2
- data/spec/zip_tricks/remote_io_spec.rb +8 -20
- data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
- data/spec/zip_tricks/size_estimator_spec.rb +31 -0
- data/spec/zip_tricks/streamer_spec.rb +59 -36
- data/spec/zip_tricks/zip_writer_spec.rb +408 -0
- data/zip_tricks.gemspec +20 -14
- metadata +33 -16
- data/lib/zip_tricks/manifest.rb +0 -85
- data/lib/zip_tricks/microzip.rb +0 -339
- data/lib/zip_tricks/stored_size_estimator.rb +0 -44
- data/spec/zip_tricks/manifest_spec.rb +0 -60
- data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
- data/spec/zip_tricks/microzip_spec.rb +0 -546
- data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2bc3917bd654f3fca15ae6bce1769eb0fa45dd12
|
4
|
+
data.tar.gz: 12f495895ed59e23cc89bf7b86b91bd9422c1608
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66fe048eddc9c00ed459e02e30ba8607a76cf297de88bf595694d14eec42e7b1075752b42fae8e86dcfe06ca2814db985452ed0244f803207491b100baed1041
|
7
|
+
data.tar.gz: 717bd4d51c597fefdd77bfe0f7df3c41f2ea8ae0122f0b5330ab6cf63b6183370e5dd8bbc19b868a6f52ae53a84147bfdb8bee54bbc44b63df563074192f7ec8
|
data/Gemfile
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
|
-
gem 'rubyzip', '~> 1.1', '>= 1.1.7'
|
4
|
-
gem 'very_tiny_state_machine', '~> 2'
|
5
|
-
|
6
3
|
group :development do
|
4
|
+
gem 'rubyzip', '~> 1.1', '>= 1.1.7'
|
5
|
+
gem 'terminal-table'
|
7
6
|
gem 'range_utils'
|
8
7
|
gem 'rack', '~> 1.6' # For Jeweler
|
9
8
|
gem 'rake', '~> 10.4'
|
10
9
|
gem "rspec", "~> 3.2.0", '< 3.3'
|
10
|
+
gem 'coderay'
|
11
11
|
gem "yard", "~> 0.8"
|
12
12
|
gem "bundler", "~> 1.0"
|
13
13
|
gem "jeweler", "~> 2.0.1"
|
data/IMPLEMENTATION_DETAILS.md
CHANGED
@@ -24,16 +24,8 @@ Data descriptors permit you to generate "postfix" ZIP files (where you write the
|
|
24
24
|
know the CRC32 and the file size upfront, then write the compressed file data, and only then - once you know what your CRC32,
|
25
25
|
compressed and uncompressed sizes are etc. - write them into a data descriptor that follows the file data.
|
26
26
|
|
27
|
-
The streamer
|
28
|
-
with the 7Zip version that we want to support
|
29
|
-
bit 3 that trips up that version of 7Zip. If we were to use data descriptors, we would have to up the minimum supported version
|
30
|
-
of 7Zip.
|
31
|
-
|
32
|
-
That means, in turn, that **to use the ZipTricks streamer you have to know the CRC32 and the sizes of the compressed/uncompressed
|
33
|
-
file upfront.** So you have to precompute them in some way. To do that, you can use `BlockDeflate` to precompress the file in
|
34
|
-
parallel, and `StreamCRC32` to compute the CRC checksum, before feeding them to the ZIP writer.
|
35
|
-
|
36
|
-
This approach might be reconsidered in the future.
|
27
|
+
The streamer has optional support for data descriptors. Their use can apparently [ be problematic](https://github.com/thejoshwolfe/yazl/issues/13)
|
28
|
+
with the 7Zip version that we want to support, but in our tests everything worked fine.
|
37
29
|
|
38
30
|
For more info see https://github.com/thejoshwolfe/yazl#general-purpose-bit-flag
|
39
31
|
|
data/README.md
CHANGED
@@ -2,77 +2,81 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://travis-ci.org/WeTransfer/zip_tricks.svg?branch=master)](https://travis-ci.org/WeTransfer/zip_tricks)
|
4
4
|
|
5
|
-
|
5
|
+
Allows streaming, non-rewinding ZIP file output from Ruby.
|
6
6
|
Spiritual successor to [zipline](https://github.com/fringd/zipline)
|
7
7
|
|
8
|
-
Requires Ruby 2.1
|
9
|
-
The library is composed of a loose set of modules.
|
8
|
+
Requires Ruby 2.1+ syntax support and a working zlib (all available to jRuby as well).
|
10
9
|
|
11
|
-
##
|
10
|
+
## Create a ZIP file without size estimation, compress on-the-fly)
|
12
11
|
|
13
|
-
|
14
|
-
|
12
|
+
When you compress on the fly and use data descriptors it is not really possible to compute the file size upfront.
|
13
|
+
But it is very likely to yield good compression - especially if you send things like CSV files.
|
15
14
|
|
16
|
-
|
15
|
+
out = my_tempfile # can also be a socket
|
16
|
+
ZipTricks::Streamer.open(out) do |zip|
|
17
|
+
zip.write_stored_file('mov.mp4.txt') do |sink|
|
18
|
+
File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
|
19
|
+
end
|
20
|
+
zip.write_deflated_file('long-novel.txt') do |sink|
|
21
|
+
File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
|
22
|
+
end
|
23
|
+
end
|
17
24
|
|
18
|
-
|
19
|
-
compress a file in parts.
|
25
|
+
## Send the same ZIP file from a Rack response
|
20
26
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
# will be written to at the end of each chunk.
|
25
|
-
ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file, compressed)
|
27
|
+
Create a `RackBody` object and give it's constructor a block that adds files.
|
28
|
+
The block will only be called when actually sending the response to the client
|
29
|
+
(unless you are using a buffering Rack webserver, such as Webrick).
|
26
30
|
|
27
|
-
|
28
|
-
|
31
|
+
body = ZipTricks::RackBody.new do | zip |
|
32
|
+
zip.write_stored_file('mov.mp4') do |sink| # Those MPEG4 files do not compress that well
|
33
|
+
File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
|
34
|
+
end
|
35
|
+
zip.write_deflated_file('long-novel.txt') do |sink|
|
36
|
+
File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
[200, {'Transfer-Encoding' => 'chunked'}, body]
|
29
40
|
|
30
|
-
|
31
|
-
ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb), compressed)
|
32
|
-
ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb), compressed)
|
33
|
-
ZipTricks::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb), compressed)
|
34
|
-
ZipTricks::BlockDeflate.write_terminator(compressed)
|
41
|
+
## Send a ZIP file of known size, with correct headers
|
35
42
|
|
36
|
-
|
43
|
+
Use the `SizeEstimator` to compute the correct size of the resulting archive.
|
37
44
|
|
38
|
-
|
45
|
+
zip_body = ZipTricks::RackBody.new do | zip |
|
46
|
+
zip.add_stored_entry(filename: "myfile1.bin", size: 9090821, crc32: 12485)
|
47
|
+
zip << read_file('myfile1.bin')
|
48
|
+
zip.add_stored_entry(filename: "myfile2.bin", size: 458678, crc32: 89568)
|
49
|
+
zip << read_file('myfile2.bin')
|
50
|
+
end
|
51
|
+
bytesize = ZipTricks::SizeEstimator.estimate do |z|
|
52
|
+
z.add_stored_entry(filename: 'myfile1.bin', size: 9090821)
|
53
|
+
z.add_stored_entry(filename: 'myfile2.bin', size: 458678)
|
54
|
+
end
|
55
|
+
[200, {'Content-Length' => bytesize.to_s}, zip_body]
|
39
56
|
|
40
|
-
##
|
57
|
+
## Other usage examples
|
41
58
|
|
42
|
-
|
43
|
-
|
44
|
-
rewind the output IO. It also avoids using the local footers instead of headers, therefore permitting
|
45
|
-
Zip64-sized entries to be stored easily.
|
59
|
+
Check out the `examples/` directory at the root of the project. This will give you a good idea
|
60
|
+
of various use cases the library supports.
|
46
61
|
|
47
|
-
|
48
|
-
|
49
|
-
|
62
|
+
## Writing ZIP files using the Streamer bypass
|
63
|
+
|
64
|
+
You do not have to "feed" all the contents of the files you put in the archive through the Streamer object.
|
65
|
+
If the write destination for your use case is a `Socket` (say, you are writing using Rack hijack) and you know
|
66
|
+
the metadata of the file upfront (the CRC32 of the uncompressed file and the sizes), you can write directly
|
67
|
+
to that socket using some accelerated writing technique, and only use the Streamer to write out the ZIP metadata.
|
68
|
+
|
69
|
+
# io has to be an object that supports #<<
|
50
70
|
ZipTricks::Streamer.open(io) do | zip |
|
51
|
-
|
52
|
-
#
|
71
|
+
# raw_file is written "as is" (STORED mode).
|
72
|
+
# Write the local file header first..
|
53
73
|
zip.add_stored_entry("first-file.bin", raw_file.size, raw_file_crc32)
|
54
|
-
while blob = raw_file.read(2048)
|
55
|
-
zip << blob
|
56
|
-
end
|
57
|
-
|
58
|
-
# another_file is assumed to be block-deflated (DEFLATE mode)
|
59
|
-
zip.add_compressed_entry("another-file.bin", another_file_size, another_file_crc32, compressed_file.size)
|
60
|
-
while blob = compressed_file.read(2048)
|
61
|
-
zip << blob
|
62
|
-
end
|
63
74
|
|
64
|
-
#
|
65
|
-
|
66
|
-
zip.add_compressed_entry("compressed-in-parts.bin", another_file_size, another_file_crc32, deflated_size)
|
67
|
-
while blob = part1.read(2048)
|
68
|
-
zip << blob
|
69
|
-
end
|
70
|
-
while blob = part2.read(2048)
|
71
|
-
zip << blob
|
72
|
-
end
|
73
|
-
ZipTricks::BlockDeflate.write_terminator(zip)
|
75
|
+
# then send the actual file contents bypassing the Streamer interface
|
76
|
+
io.sendfile(my_temp_file)
|
74
77
|
|
75
|
-
...
|
78
|
+
# ...and then adjust the ZIP offsets within the Streamer
|
79
|
+
zip.simulate_write(my_temp_file.size)
|
76
80
|
end
|
77
81
|
|
78
82
|
## RackBody
|
@@ -83,7 +87,7 @@ and will receive a {ZipTricks::Streamer} as it's block argument. You can then ad
|
|
83
87
|
The archive will be automatically closed at the end of the block.
|
84
88
|
|
85
89
|
# Precompute the Content-Length ahead of time
|
86
|
-
content_length = ZipTricks::
|
90
|
+
content_length = ZipTricks::SizeEstimator.estimate do | estimator |
|
87
91
|
estimator.add_stored_entry('large.tif', size=1289894)
|
88
92
|
end
|
89
93
|
|
@@ -94,7 +98,7 @@ The archive will be automatically closed at the end of the block.
|
|
94
98
|
...
|
95
99
|
end
|
96
100
|
|
97
|
-
|
101
|
+
[200, {'Content-Type' => 'binary/octet-stream', 'Content-Length' => content_length.to_s}, body]
|
98
102
|
|
99
103
|
## BlockWrite
|
100
104
|
|
@@ -108,13 +112,13 @@ destination. For Rack/Rails just use RackBody since it sets this up for you.
|
|
108
112
|
....
|
109
113
|
end
|
110
114
|
|
111
|
-
##
|
115
|
+
## SizeEstimator
|
112
116
|
|
113
117
|
Is used to predict the size of the ZIP archive after output. This can be used to generate, say, a `Content-Length` header,
|
114
118
|
or to predict the size of the resulting archive on the storage device. The size is estimated using a very fast "fake archiving"
|
115
119
|
procedure, so it computes the sizes of all the headers and the central directory very accurately.
|
116
120
|
|
117
|
-
expected_zip_archive_size =
|
121
|
+
expected_zip_archive_size = SizeEstimator.estimate do | estimator |
|
118
122
|
estimator.add_stored_entry("file.doc", size=898291)
|
119
123
|
estimator.add_compressed_entry("family.JPG", size=89281911, compressed_size=89218)
|
120
124
|
end
|
@@ -146,5 +150,4 @@ library functions.
|
|
146
150
|
|
147
151
|
## Copyright
|
148
152
|
|
149
|
-
Copyright (c)
|
150
|
-
further details.
|
153
|
+
Copyright (c) 2016 WeTransfer. See LICENSE.txt for further details.
|
@@ -3,11 +3,11 @@ require_relative '../lib/zip_tricks'
|
|
3
3
|
# Predict how large a ZIP file is going to be without having access to the actual
|
4
4
|
# file contents, but using just the filenames (influences the file size) and the size
|
5
5
|
# of the files
|
6
|
-
zip_archive_size_in_bytes = ZipTricks::
|
6
|
+
zip_archive_size_in_bytes = ZipTricks::SizeEstimator.estimate do |zip|
|
7
7
|
# Pretend we are going to make a ZIP file which contains a few
|
8
8
|
# MP4 files (those do not compress all too well)
|
9
|
-
zip.add_stored_entry("MOV_1234.MP4", 898090)
|
10
|
-
zip.add_stored_entry("MOV_1235.MP4", 7855126)
|
9
|
+
zip.add_stored_entry(filename: "MOV_1234.MP4", size: 898090)
|
10
|
+
zip.add_stored_entry(filename: "MOV_1235.MP4", size: 7855126)
|
11
11
|
end
|
12
12
|
|
13
|
-
zip_archive_size_in_bytes #=>
|
13
|
+
puts zip_archive_size_in_bytes #=> 8753467
|
@@ -27,7 +27,7 @@ class ZipDownload
|
|
27
27
|
# the user that the download stalled or was aborted in-flight.
|
28
28
|
# Note that using the size estimator here does _not_ read or compress
|
29
29
|
# your original file, so it is very fast.
|
30
|
-
size = ZipTricks::
|
30
|
+
size = ZipTricks::SizeEstimator.estimate do |ar|
|
31
31
|
ar.add_stored_entry(filename, f.size)
|
32
32
|
end
|
33
33
|
|
@@ -37,13 +37,11 @@ class ZipDownload
|
|
37
37
|
begin
|
38
38
|
# We are adding only one file to the ZIP here, but you could do that
|
39
39
|
# with an arbitrary number of files of course.
|
40
|
-
zip.add_stored_entry(filename, f.size, crc32)
|
40
|
+
zip.add_stored_entry(filename: filename, size: f.size, crc32: crc32)
|
41
41
|
# Write the contents of the file. It is stored, so the writes go directly
|
42
42
|
# to the Rack output, bypassing any RubyZip deflaters/compressors. In fact you
|
43
43
|
# are yielding the "blob" string here directly to the Rack server handler.
|
44
|
-
|
45
|
-
zip << blob
|
46
|
-
end
|
44
|
+
IO.copy_stream(f, zip)
|
47
45
|
ensure
|
48
46
|
f.close # Make sure the opened file we read from gets closed
|
49
47
|
end
|
@@ -13,6 +13,27 @@
|
|
13
13
|
# When you deflate the chunks separately, you need to write the end marker yourself (using `write_terminator`).
|
14
14
|
# If you just want to deflate a large IO's contents, use `deflate_in_blocks_and_terminate` to have the end marker
|
15
15
|
# written out for you.
|
16
|
+
#
|
17
|
+
# Basic usage to compress a file in parts:
|
18
|
+
#
|
19
|
+
# source_file = File.open('12_gigs.bin', 'rb')
|
20
|
+
# compressed = Tempfile.new
|
21
|
+
# # Will not compress everything in memory, but do it per chunk to spare memory. `compressed`
|
22
|
+
# # will be written to at the end of each chunk.
|
23
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file, compressed)
|
24
|
+
#
|
25
|
+
# You can also do the same to parts that you will later concatenate together elsewhere, in that case
|
26
|
+
# you need to skip the end marker:
|
27
|
+
#
|
28
|
+
# compressed = Tempfile.new
|
29
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb), compressed)
|
30
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb), compressed)
|
31
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb), compressed)
|
32
|
+
# ZipTricks::BlockDeflate.write_terminator(compressed)
|
33
|
+
#
|
34
|
+
# You can also elect to just compress strings in memory (to splice them later):
|
35
|
+
#
|
36
|
+
# compressed_string = ZipTricks::BlockDeflate.deflate_chunk(big_string)
|
16
37
|
module ZipTricks::BlockDeflate
|
17
38
|
DEFAULT_BLOCKSIZE = 1024*1024*5
|
18
39
|
END_MARKER = [3, 0].pack("C*")
|