zip_tricks 2.8.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/IMPLEMENTATION_DETAILS.md +2 -10
- data/README.md +62 -59
- data/examples/archive_size_estimate.rb +4 -4
- data/examples/rack_application.rb +3 -5
- data/lib/zip_tricks/block_deflate.rb +21 -0
- data/lib/zip_tricks/file_reader.rb +491 -0
- data/lib/zip_tricks/null_writer.rb +7 -2
- data/lib/zip_tricks/rack_body.rb +3 -3
- data/lib/zip_tricks/remote_io.rb +30 -20
- data/lib/zip_tricks/remote_uncap.rb +10 -10
- data/lib/zip_tricks/size_estimator.rb +64 -0
- data/lib/zip_tricks/stream_crc32.rb +2 -2
- data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
- data/lib/zip_tricks/streamer/entry.rb +21 -0
- data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
- data/lib/zip_tricks/streamer/writable.rb +20 -0
- data/lib/zip_tricks/streamer.rb +172 -66
- data/lib/zip_tricks/zip_writer.rb +346 -0
- data/lib/zip_tricks.rb +1 -4
- data/spec/spec_helper.rb +1 -38
- data/spec/zip_tricks/file_reader_spec.rb +47 -0
- data/spec/zip_tricks/rack_body_spec.rb +2 -2
- data/spec/zip_tricks/remote_io_spec.rb +8 -20
- data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
- data/spec/zip_tricks/size_estimator_spec.rb +31 -0
- data/spec/zip_tricks/streamer_spec.rb +59 -36
- data/spec/zip_tricks/zip_writer_spec.rb +408 -0
- data/zip_tricks.gemspec +20 -14
- metadata +33 -16
- data/lib/zip_tricks/manifest.rb +0 -85
- data/lib/zip_tricks/microzip.rb +0 -339
- data/lib/zip_tricks/stored_size_estimator.rb +0 -44
- data/spec/zip_tricks/manifest_spec.rb +0 -60
- data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
- data/spec/zip_tricks/microzip_spec.rb +0 -546
- data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2bc3917bd654f3fca15ae6bce1769eb0fa45dd12
|
4
|
+
data.tar.gz: 12f495895ed59e23cc89bf7b86b91bd9422c1608
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66fe048eddc9c00ed459e02e30ba8607a76cf297de88bf595694d14eec42e7b1075752b42fae8e86dcfe06ca2814db985452ed0244f803207491b100baed1041
|
7
|
+
data.tar.gz: 717bd4d51c597fefdd77bfe0f7df3c41f2ea8ae0122f0b5330ab6cf63b6183370e5dd8bbc19b868a6f52ae53a84147bfdb8bee54bbc44b63df563074192f7ec8
|
data/Gemfile
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
|
-
gem 'rubyzip', '~> 1.1', '>= 1.1.7'
|
4
|
-
gem 'very_tiny_state_machine', '~> 2'
|
5
|
-
|
6
3
|
group :development do
|
4
|
+
gem 'rubyzip', '~> 1.1', '>= 1.1.7'
|
5
|
+
gem 'terminal-table'
|
7
6
|
gem 'range_utils'
|
8
7
|
gem 'rack', '~> 1.6' # For Jeweler
|
9
8
|
gem 'rake', '~> 10.4'
|
10
9
|
gem "rspec", "~> 3.2.0", '< 3.3'
|
10
|
+
gem 'coderay'
|
11
11
|
gem "yard", "~> 0.8"
|
12
12
|
gem "bundler", "~> 1.0"
|
13
13
|
gem "jeweler", "~> 2.0.1"
|
data/IMPLEMENTATION_DETAILS.md
CHANGED
@@ -24,16 +24,8 @@ Data descriptors permit you to generate "postfix" ZIP files (where you write the
|
|
24
24
|
know the CRC32 and the file size upfront, then write the compressed file data, and only then - once you know what your CRC32,
|
25
25
|
compressed and uncompressed sizes are etc. - write them into a data descriptor that follows the file data.
|
26
26
|
|
27
|
-
The streamer
|
28
|
-
with the 7Zip version that we want to support
|
29
|
-
bit 3 that trips up that version of 7Zip. If we were to use data descriptors, we would have to up the minimum supported version
|
30
|
-
of 7Zip.
|
31
|
-
|
32
|
-
That means, in turn, that **to use the ZipTricks streamer you have to know the CRC32 and the sizes of the compressed/uncompressed
|
33
|
-
file upfront.** So you have to precompute them in some way. To do that, you can use `BlockDeflate` to precompress the file in
|
34
|
-
parallel, and `StreamCRC32` to compute the CRC checksum, before feeding them to the ZIP writer.
|
35
|
-
|
36
|
-
This approach might be reconsidered in the future.
|
27
|
+
The streamer has optional support for data descriptors. Their use can apparently [ be problematic](https://github.com/thejoshwolfe/yazl/issues/13)
|
28
|
+
with the 7Zip version that we want to support, but in our tests everything worked fine.
|
37
29
|
|
38
30
|
For more info see https://github.com/thejoshwolfe/yazl#general-purpose-bit-flag
|
39
31
|
|
data/README.md
CHANGED
@@ -2,77 +2,81 @@
|
|
2
2
|
|
3
3
|
[](https://travis-ci.org/WeTransfer/zip_tricks)
|
4
4
|
|
5
|
-
|
5
|
+
Allows streaming, non-rewinding ZIP file output from Ruby.
|
6
6
|
Spiritual successor to [zipline](https://github.com/fringd/zipline)
|
7
7
|
|
8
|
-
Requires Ruby 2.1
|
9
|
-
The library is composed of a loose set of modules.
|
8
|
+
Requires Ruby 2.1+ syntax support and a working zlib (all available to jRuby as well).
|
10
9
|
|
11
|
-
##
|
10
|
+
## Create a ZIP file without size estimation, compress on-the-fly)
|
12
11
|
|
13
|
-
|
14
|
-
|
12
|
+
When you compress on the fly and use data descriptors it is not really possible to compute the file size upfront.
|
13
|
+
But it is very likely to yield good compression - especially if you send things like CSV files.
|
15
14
|
|
16
|
-
|
15
|
+
out = my_tempfile # can also be a socket
|
16
|
+
ZipTricks::Streamer.open(out) do |zip|
|
17
|
+
zip.write_stored_file('mov.mp4.txt') do |sink|
|
18
|
+
File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
|
19
|
+
end
|
20
|
+
zip.write_deflated_file('long-novel.txt') do |sink|
|
21
|
+
File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
|
22
|
+
end
|
23
|
+
end
|
17
24
|
|
18
|
-
|
19
|
-
compress a file in parts.
|
25
|
+
## Send the same ZIP file from a Rack response
|
20
26
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
# will be written to at the end of each chunk.
|
25
|
-
ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file, compressed)
|
27
|
+
Create a `RackBody` object and give it's constructor a block that adds files.
|
28
|
+
The block will only be called when actually sending the response to the client
|
29
|
+
(unless you are using a buffering Rack webserver, such as Webrick).
|
26
30
|
|
27
|
-
|
28
|
-
|
31
|
+
body = ZipTricks::RackBody.new do | zip |
|
32
|
+
zip.write_stored_file('mov.mp4') do |sink| # Those MPEG4 files do not compress that well
|
33
|
+
File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
|
34
|
+
end
|
35
|
+
zip.write_deflated_file('long-novel.txt') do |sink|
|
36
|
+
File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
[200, {'Transfer-Encoding' => 'chunked'}, body]
|
29
40
|
|
30
|
-
|
31
|
-
ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb), compressed)
|
32
|
-
ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb), compressed)
|
33
|
-
ZipTricks::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb), compressed)
|
34
|
-
ZipTricks::BlockDeflate.write_terminator(compressed)
|
41
|
+
## Send a ZIP file of known size, with correct headers
|
35
42
|
|
36
|
-
|
43
|
+
Use the `SizeEstimator` to compute the correct size of the resulting archive.
|
37
44
|
|
38
|
-
|
45
|
+
zip_body = ZipTricks::RackBody.new do | zip |
|
46
|
+
zip.add_stored_entry(filename: "myfile1.bin", size: 9090821, crc32: 12485)
|
47
|
+
zip << read_file('myfile1.bin')
|
48
|
+
zip.add_stored_entry(filename: "myfile2.bin", size: 458678, crc32: 89568)
|
49
|
+
zip << read_file('myfile2.bin')
|
50
|
+
end
|
51
|
+
bytesize = ZipTricks::SizeEstimator.estimate do |z|
|
52
|
+
z.add_stored_entry(filename: 'myfile1.bin', size: 9090821)
|
53
|
+
z.add_stored_entry(filename: 'myfile2.bin', size: 458678)
|
54
|
+
end
|
55
|
+
[200, {'Content-Length' => bytesize.to_s}, zip_body]
|
39
56
|
|
40
|
-
##
|
57
|
+
## Other usage examples
|
41
58
|
|
42
|
-
|
43
|
-
|
44
|
-
rewind the output IO. It also avoids using the local footers instead of headers, therefore permitting
|
45
|
-
Zip64-sized entries to be stored easily.
|
59
|
+
Check out the `examples/` directory at the root of the project. This will give you a good idea
|
60
|
+
of various use cases the library supports.
|
46
61
|
|
47
|
-
|
48
|
-
|
49
|
-
|
62
|
+
## Writing ZIP files using the Streamer bypass
|
63
|
+
|
64
|
+
You do not have to "feed" all the contents of the files you put in the archive through the Streamer object.
|
65
|
+
If the write destination for your use case is a `Socket` (say, you are writing using Rack hijack) and you know
|
66
|
+
the metadata of the file upfront (the CRC32 of the uncompressed file and the sizes), you can write directly
|
67
|
+
to that socket using some accelerated writing technique, and only use the Streamer to write out the ZIP metadata.
|
68
|
+
|
69
|
+
# io has to be an object that supports #<<
|
50
70
|
ZipTricks::Streamer.open(io) do | zip |
|
51
|
-
|
52
|
-
#
|
71
|
+
# raw_file is written "as is" (STORED mode).
|
72
|
+
# Write the local file header first..
|
53
73
|
zip.add_stored_entry("first-file.bin", raw_file.size, raw_file_crc32)
|
54
|
-
while blob = raw_file.read(2048)
|
55
|
-
zip << blob
|
56
|
-
end
|
57
|
-
|
58
|
-
# another_file is assumed to be block-deflated (DEFLATE mode)
|
59
|
-
zip.add_compressed_entry("another-file.bin", another_file_size, another_file_crc32, compressed_file.size)
|
60
|
-
while blob = compressed_file.read(2048)
|
61
|
-
zip << blob
|
62
|
-
end
|
63
74
|
|
64
|
-
#
|
65
|
-
|
66
|
-
zip.add_compressed_entry("compressed-in-parts.bin", another_file_size, another_file_crc32, deflated_size)
|
67
|
-
while blob = part1.read(2048)
|
68
|
-
zip << blob
|
69
|
-
end
|
70
|
-
while blob = part2.read(2048)
|
71
|
-
zip << blob
|
72
|
-
end
|
73
|
-
ZipTricks::BlockDeflate.write_terminator(zip)
|
75
|
+
# then send the actual file contents bypassing the Streamer interface
|
76
|
+
io.sendfile(my_temp_file)
|
74
77
|
|
75
|
-
...
|
78
|
+
# ...and then adjust the ZIP offsets within the Streamer
|
79
|
+
zip.simulate_write(my_temp_file.size)
|
76
80
|
end
|
77
81
|
|
78
82
|
## RackBody
|
@@ -83,7 +87,7 @@ and will receive a {ZipTricks::Streamer} as it's block argument. You can then ad
|
|
83
87
|
The archive will be automatically closed at the end of the block.
|
84
88
|
|
85
89
|
# Precompute the Content-Length ahead of time
|
86
|
-
content_length = ZipTricks::
|
90
|
+
content_length = ZipTricks::SizeEstimator.estimate do | estimator |
|
87
91
|
estimator.add_stored_entry('large.tif', size=1289894)
|
88
92
|
end
|
89
93
|
|
@@ -94,7 +98,7 @@ The archive will be automatically closed at the end of the block.
|
|
94
98
|
...
|
95
99
|
end
|
96
100
|
|
97
|
-
|
101
|
+
[200, {'Content-Type' => 'binary/octet-stream', 'Content-Length' => content_length.to_s}, body]
|
98
102
|
|
99
103
|
## BlockWrite
|
100
104
|
|
@@ -108,13 +112,13 @@ destination. For Rack/Rails just use RackBody since it sets this up for you.
|
|
108
112
|
....
|
109
113
|
end
|
110
114
|
|
111
|
-
##
|
115
|
+
## SizeEstimator
|
112
116
|
|
113
117
|
Is used to predict the size of the ZIP archive after output. This can be used to generate, say, a `Content-Length` header,
|
114
118
|
or to predict the size of the resulting archive on the storage device. The size is estimated using a very fast "fake archiving"
|
115
119
|
procedure, so it computes the sizes of all the headers and the central directory very accurately.
|
116
120
|
|
117
|
-
expected_zip_archive_size =
|
121
|
+
expected_zip_archive_size = SizeEstimator.estimate do | estimator |
|
118
122
|
estimator.add_stored_entry("file.doc", size=898291)
|
119
123
|
estimator.add_compressed_entry("family.JPG", size=89281911, compressed_size=89218)
|
120
124
|
end
|
@@ -146,5 +150,4 @@ library functions.
|
|
146
150
|
|
147
151
|
## Copyright
|
148
152
|
|
149
|
-
Copyright (c)
|
150
|
-
further details.
|
153
|
+
Copyright (c) 2016 WeTransfer. See LICENSE.txt for further details.
|
@@ -3,11 +3,11 @@ require_relative '../lib/zip_tricks'
|
|
3
3
|
# Predict how large a ZIP file is going to be without having access to the actual
|
4
4
|
# file contents, but using just the filenames (influences the file size) and the size
|
5
5
|
# of the files
|
6
|
-
zip_archive_size_in_bytes = ZipTricks::
|
6
|
+
zip_archive_size_in_bytes = ZipTricks::SizeEstimator.estimate do |zip|
|
7
7
|
# Pretend we are going to make a ZIP file which contains a few
|
8
8
|
# MP4 files (those do not compress all too well)
|
9
|
-
zip.add_stored_entry("MOV_1234.MP4", 898090)
|
10
|
-
zip.add_stored_entry("MOV_1235.MP4", 7855126)
|
9
|
+
zip.add_stored_entry(filename: "MOV_1234.MP4", size: 898090)
|
10
|
+
zip.add_stored_entry(filename: "MOV_1235.MP4", size: 7855126)
|
11
11
|
end
|
12
12
|
|
13
|
-
zip_archive_size_in_bytes #=>
|
13
|
+
puts zip_archive_size_in_bytes #=> 8753467
|
@@ -27,7 +27,7 @@ class ZipDownload
|
|
27
27
|
# the user that the download stalled or was aborted in-flight.
|
28
28
|
# Note that using the size estimator here does _not_ read or compress
|
29
29
|
# your original file, so it is very fast.
|
30
|
-
size = ZipTricks::
|
30
|
+
size = ZipTricks::SizeEstimator.estimate do |ar|
|
31
31
|
ar.add_stored_entry(filename, f.size)
|
32
32
|
end
|
33
33
|
|
@@ -37,13 +37,11 @@ class ZipDownload
|
|
37
37
|
begin
|
38
38
|
# We are adding only one file to the ZIP here, but you could do that
|
39
39
|
# with an arbitrary number of files of course.
|
40
|
-
zip.add_stored_entry(filename, f.size, crc32)
|
40
|
+
zip.add_stored_entry(filename: filename, size: f.size, crc32: crc32)
|
41
41
|
# Write the contents of the file. It is stored, so the writes go directly
|
42
42
|
# to the Rack output, bypassing any RubyZip deflaters/compressors. In fact you
|
43
43
|
# are yielding the "blob" string here directly to the Rack server handler.
|
44
|
-
|
45
|
-
zip << blob
|
46
|
-
end
|
44
|
+
IO.copy_stream(f, zip)
|
47
45
|
ensure
|
48
46
|
f.close # Make sure the opened file we read from gets closed
|
49
47
|
end
|
@@ -13,6 +13,27 @@
|
|
13
13
|
# When you deflate the chunks separately, you need to write the end marker yourself (using `write_terminator`).
|
14
14
|
# If you just want to deflate a large IO's contents, use `deflate_in_blocks_and_terminate` to have the end marker
|
15
15
|
# written out for you.
|
16
|
+
#
|
17
|
+
# Basic usage to compress a file in parts:
|
18
|
+
#
|
19
|
+
# source_file = File.open('12_gigs.bin', 'rb')
|
20
|
+
# compressed = Tempfile.new
|
21
|
+
# # Will not compress everything in memory, but do it per chunk to spare memory. `compressed`
|
22
|
+
# # will be written to at the end of each chunk.
|
23
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file, compressed)
|
24
|
+
#
|
25
|
+
# You can also do the same to parts that you will later concatenate together elsewhere, in that case
|
26
|
+
# you need to skip the end marker:
|
27
|
+
#
|
28
|
+
# compressed = Tempfile.new
|
29
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb), compressed)
|
30
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb), compressed)
|
31
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb), compressed)
|
32
|
+
# ZipTricks::BlockDeflate.write_terminator(compressed)
|
33
|
+
#
|
34
|
+
# You can also elect to just compress strings in memory (to splice them later):
|
35
|
+
#
|
36
|
+
# compressed_string = ZipTricks::BlockDeflate.deflate_chunk(big_string)
|
16
37
|
module ZipTricks::BlockDeflate
|
17
38
|
DEFAULT_BLOCKSIZE = 1024*1024*5
|
18
39
|
END_MARKER = [3, 0].pack("C*")
|