zip_tricks 5.4.0 → 5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +12 -8
- data/lib/zip_tricks/output_enumerator.rb +48 -27
- data/lib/zip_tricks/streamer.rb +6 -7
- data/lib/zip_tricks/streamer/deflated_writer.rb +4 -2
- data/lib/zip_tricks/streamer/stored_writer.rb +4 -2
- data/lib/zip_tricks/version.rb +1 -1
- data/lib/zip_tricks/write_buffer.rb +37 -17
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50ba2d6a0b5bde1cf51443c7ff4228a7e99a1cc1ac843e3ef23c0d197878a04b
|
4
|
+
data.tar.gz: 5fdb377cc34fd6d9edb4e7932e79ebe28bc2dc91aa71348e386b8b601e4f06f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c2ae765d7e6c584632b7606d66b970c100b9679cb6b503209be1179bc9582f83727b13154bfce5281dd8dc105f1f5112d2794df7f97ebb987a1e224f67f4840
|
7
|
+
data.tar.gz: e06306028f18fe1eb16abe12c48cd93182d11451298cde6068b9ccb37b78846be469b6bd48848507f38f8796b00281f27391589d20d9163b3845cba4112411c5
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 5.5.0
|
2
|
+
|
3
|
+
* In `OutputEnumerator` apply some amount of buffering to be within a UNIX socket size for metatada writes. This
|
4
|
+
speeds up usage with Puma by about 20 percent, as there won't be as many `syswrite` calls on the socket.
|
5
|
+
* Make `StoredWriter` and `DeflatedWriter` public constants so that standalone tests can be written for them
|
6
|
+
|
1
7
|
## 5.4.0
|
2
8
|
|
3
9
|
* Use block form for zlib Deflater calls to conserve memory
|
data/README.md
CHANGED
@@ -35,9 +35,9 @@ class ZipsController < ActionController::Base
|
|
35
35
|
zip_tricks_stream do |zip|
|
36
36
|
zip.write_deflated_file('report1.csv') do |sink|
|
37
37
|
CSV(sink) do |csv_write|
|
38
|
-
|
38
|
+
csv_write << Person.column_names
|
39
39
|
Person.all.find_each do |person|
|
40
|
-
|
40
|
+
csv_write << person.attributes.values
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
@@ -75,12 +75,15 @@ since you do not know how large the compressed data segments are going to be.
|
|
75
75
|
|
76
76
|
## Send a ZIP from a Rack response
|
77
77
|
|
78
|
-
|
79
|
-
|
78
|
+
To "pull" data from ZipTricks you can create an `OutputEnumerator` object which will yield the binary chunks piece
|
79
|
+
by piece, and apply some amount of buffering as well. Since this `OutputEnumerator` responds to `#each` and yields
|
80
|
+
Strings it also can (and should!) be used as a Rack response body. Return it to your webserver and you will
|
81
|
+
have your ZIP streamed. The block that you give to the `OutputEnumerator` will only start executing once your
|
82
|
+
response body starts getting iterated over - when actually sending the response to the client
|
80
83
|
(unless you are using a buffering Rack webserver, such as Webrick).
|
81
84
|
|
82
85
|
```ruby
|
83
|
-
body = ZipTricks::
|
86
|
+
body = ZipTricks::Streamer.output_enum do | zip |
|
84
87
|
zip.write_stored_file('mov.mp4') do |sink| # Those MPEG4 files do not compress that well
|
85
88
|
File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
|
86
89
|
end
|
@@ -127,11 +130,12 @@ ZipTricks::Streamer.open(io) do | zip |
|
|
127
130
|
# Write the local file header first..
|
128
131
|
zip.add_stored_entry(filename: "first-file.bin", size: raw_file.size, crc32: raw_file_crc32)
|
129
132
|
|
130
|
-
#
|
133
|
+
# Adjust the ZIP offsets within the Streamer
|
134
|
+
zip.simulate_write(my_temp_file.size)
|
135
|
+
|
136
|
+
# ...and then send the actual file contents bypassing the Streamer interface
|
131
137
|
io.sendfile(my_temp_file)
|
132
138
|
|
133
|
-
# ...and then adjust the ZIP offsets within the Streamer
|
134
|
-
zip.simulate_write(my_temp_file.size)
|
135
139
|
end
|
136
140
|
```
|
137
141
|
|
@@ -1,43 +1,64 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
3
|
+
# The output enumerator makes it possible to "pull" from a ZipTricks streamer
|
4
|
+
# object instead of having it "push" writes to you. It will "stash" the block which
|
5
|
+
# writes the ZIP archive through the streamer, and when you call `each` on the Enumerator
|
6
|
+
# it will yield you the bytes the block writes. Since it is an enumerator you can
|
7
|
+
# use `next` to take chunks written by the ZipTricks streamer one by one. It can be very
|
8
|
+
# convenient when you need to segment your ZIP output into bigger chunks for, say,
|
9
|
+
# uploading them to a cloud storage provider such as S3.
|
10
|
+
#
|
11
|
+
# Another use of the output enumerator is outputting a ZIP archive from Rails or Rack,
|
12
|
+
# where an object responding to `each` is required which yields Strings. For instance,
|
13
|
+
# you can return a ZIP archive from Rack like so:
|
14
|
+
#
|
15
|
+
# iterable_zip_body = ZipTricks::OutputEnumerator.new do | streamer |
|
16
|
+
# streamer.write_deflated_file('big.csv') do |sink|
|
17
|
+
# CSV(sink) do |csv_writer|
|
18
|
+
# csv_writer << Person.column_names
|
19
|
+
# Person.all.find_each do |person|
|
20
|
+
# csv_writer << person.attributes.values
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# [200, {'Content-Type' => 'binary/octet-stream'}, iterable_zip_body]
|
6
27
|
class ZipTricks::OutputEnumerator
|
7
|
-
|
8
|
-
#
|
9
|
-
# body will be read by the webserver, and will receive a {ZipTricks::Streamer}
|
10
|
-
# as it's block argument. You can then add entries to the Streamer as usual.
|
11
|
-
# The archive will be automatically closed at the end of the block.
|
28
|
+
DEFAULT_WRITE_BUFFER_SIZE = 64 * 1024
|
29
|
+
# Creates a new OutputEnumerator.
|
12
30
|
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
# body = ZipTricks::OutputEnumerator.new do | streamer |
|
22
|
-
# streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
|
23
|
-
# streamer << large_file.read(1024*1024) until large_file.eof?
|
24
|
-
# ...
|
25
|
-
# end
|
26
|
-
#
|
27
|
-
# return [200, {'Content-Type' => 'binary/octet-stream',
|
28
|
-
# 'Content-Length' => content_length.to_s}, body]
|
29
|
-
def initialize(**streamer_options, &blk)
|
31
|
+
# @param streamer_options[Hash] options for Streamer, see {ZipTricks::Streamer.new}
|
32
|
+
# @param write_buffer_size[Integer] By default all ZipTricks writes are unbuffered. For output to sockets
|
33
|
+
# it is beneficial to bulkify those writes so that they are roughly sized to a socket buffer chunk. This
|
34
|
+
# object will bulkify writes for you in this way (so `each` will yield not on every call to `<<` from the Streamer
|
35
|
+
# but at block size boundaries or greater). Set it to 0 for unbuffered writes.
|
36
|
+
# @param blk a block that will receive the Streamer object when executing. The block will not be executed
|
37
|
+
# immediately but only once `each` is called on the OutputEnumerator
|
38
|
+
def initialize(write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE, **streamer_options, &blk)
|
30
39
|
@streamer_options = streamer_options.to_h
|
40
|
+
@bufsize = write_buffer_size.to_i
|
31
41
|
@archiving_block = blk
|
32
42
|
end
|
33
43
|
|
34
44
|
# Executes the block given to the constructor with a {ZipTricks::Streamer}
|
35
45
|
# and passes each written chunk to the block given to the method. This allows one
|
36
|
-
# to "take" output of the ZIP piecewise.
|
46
|
+
# to "take" output of the ZIP piecewise. If called without a block will return an Enumerator
|
47
|
+
# that you can pull data from using `next`.
|
48
|
+
#
|
49
|
+
# **NOTE** Because the `WriteBuffer` inside this object can reuse the buffer, it is important
|
50
|
+
# that the `String` that is yielded **either** gets consumed eagerly (written byte-by-byte somewhere, or `#dup`-ed)
|
51
|
+
# since the write buffer will clear it after your block returns. If you expand this Enumerator
|
52
|
+
# eagerly into an Array you might notice that a lot of the segments of your ZIP output are
|
53
|
+
# empty - this means that you need to duplicate them.
|
54
|
+
#
|
55
|
+
# @yield [String] a chunk of the ZIP output in binary encoding
|
37
56
|
def each
|
38
57
|
if block_given?
|
39
58
|
block_write = ZipTricks::BlockWrite.new { |chunk| yield(chunk) }
|
40
|
-
ZipTricks::
|
59
|
+
buffer = ZipTricks::WriteBuffer.new(block_write, @bufsize)
|
60
|
+
ZipTricks::Streamer.open(buffer, **@streamer_options, &@archiving_block)
|
61
|
+
buffer.flush
|
41
62
|
else
|
42
63
|
enum_for(:each)
|
43
64
|
end
|
data/lib/zip_tricks/streamer.rb
CHANGED
@@ -93,7 +93,7 @@ class ZipTricks::Streamer
|
|
93
93
|
UnknownMode = Class.new(StandardError)
|
94
94
|
OffsetOutOfSync = Class.new(StandardError)
|
95
95
|
|
96
|
-
private_constant :
|
96
|
+
private_constant :STORED, :DEFLATED
|
97
97
|
|
98
98
|
# Creates a new Streamer on top of the given IO-ish object and yields it. Once the given block
|
99
99
|
# returns, the Streamer will have it's `close` method called, which will write out the central
|
@@ -138,20 +138,19 @@ class ZipTricks::Streamer
|
|
138
138
|
|
139
139
|
# Creates a new Streamer on top of the given IO-ish object.
|
140
140
|
#
|
141
|
-
# @param
|
141
|
+
# @param writable[#<<] the destination IO for the ZIP. Anything that responds to `<<` can be used.
|
142
142
|
# @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
|
143
143
|
# Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
|
144
144
|
# @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered,
|
145
145
|
# should be suffixed with (1), (2) etc. Default value is `false` - if
|
146
146
|
# dupliate names are used an exception will be raised
|
147
|
-
def initialize(
|
148
|
-
raise InvalidOutput, 'The
|
149
|
-
|
150
|
-
@dedupe_filenames = auto_rename_duplicate_filenames
|
151
|
-
@out = ZipTricks::WriteAndTell.new(stream)
|
147
|
+
def initialize(writable, writer: create_writer, auto_rename_duplicate_filenames: false)
|
148
|
+
raise InvalidOutput, 'The writable must respond to #<<' unless writable.respond_to?(:<<)
|
149
|
+
@out = ZipTricks::WriteAndTell.new(writable)
|
152
150
|
@files = []
|
153
151
|
@path_set = ZipTricks::PathSet.new
|
154
152
|
@writer = writer
|
153
|
+
@dedupe_filenames = auto_rename_duplicate_filenames
|
155
154
|
end
|
156
155
|
|
157
156
|
# Writes a part of a zip entry body (actual binary data of the entry) into the output stream.
|
@@ -13,7 +13,8 @@ class ZipTricks::Streamer::DeflatedWriter
|
|
13
13
|
def initialize(io)
|
14
14
|
@compressed_io = io
|
15
15
|
@deflater = ::Zlib::Deflate.new(Zlib::DEFAULT_COMPRESSION, -::Zlib::MAX_WBITS)
|
16
|
-
@crc = ZipTricks::
|
16
|
+
@crc = ZipTricks::StreamCRC32.new
|
17
|
+
@crc_buf = ZipTricks::WriteBuffer.new(@crc, CRC32_BUFFER_SIZE)
|
17
18
|
end
|
18
19
|
|
19
20
|
# Writes the given data into the deflater, and flushes the deflater
|
@@ -23,7 +24,7 @@ class ZipTricks::Streamer::DeflatedWriter
|
|
23
24
|
# @return self
|
24
25
|
def <<(data)
|
25
26
|
@deflater.deflate(data) { |chunk| @compressed_io << chunk }
|
26
|
-
@
|
27
|
+
@crc_buf << data
|
27
28
|
self
|
28
29
|
end
|
29
30
|
|
@@ -34,6 +35,7 @@ class ZipTricks::Streamer::DeflatedWriter
|
|
34
35
|
# @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
|
35
36
|
def finish
|
36
37
|
@compressed_io << @deflater.finish until @deflater.finished?
|
38
|
+
@crc_buf.flush
|
37
39
|
{crc32: @crc.to_i, compressed_size: @deflater.total_out, uncompressed_size: @deflater.total_in}
|
38
40
|
ensure
|
39
41
|
@deflater.close
|
@@ -12,7 +12,8 @@ class ZipTricks::Streamer::StoredWriter
|
|
12
12
|
|
13
13
|
def initialize(io)
|
14
14
|
@io = ZipTricks::WriteAndTell.new(io)
|
15
|
-
@
|
15
|
+
@crc_compute = ZipTricks::StreamCRC32.new
|
16
|
+
@crc = ZipTricks::WriteBuffer.new(@crc_compute, CRC32_BUFFER_SIZE)
|
16
17
|
end
|
17
18
|
|
18
19
|
# Writes the given data to the contained IO object.
|
@@ -30,6 +31,7 @@ class ZipTricks::Streamer::StoredWriter
|
|
30
31
|
#
|
31
32
|
# @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
|
32
33
|
def finish
|
33
|
-
|
34
|
+
@crc.flush
|
35
|
+
{crc32: @crc_compute.to_i, compressed_size: @io.tell, uncompressed_size: @io.tell}
|
34
36
|
end
|
35
37
|
end
|
data/lib/zip_tricks/version.rb
CHANGED
@@ -7,13 +7,34 @@
|
|
7
7
|
# CRC32 combine operations - and this adds up. Since the CRC32 value
|
8
8
|
# is usually not needed until the complete output has completed
|
9
9
|
# we can buffer at least some amount of data before computing CRC32 over it.
|
10
|
+
# We also use this buffer for output via Rack, where some amount of buffering
|
11
|
+
# helps reduce the number of syscalls made by the webserver. ZipTricks performs
|
12
|
+
# lots of very small writes, and some degree of speedup (about 20%) can be achieved
|
13
|
+
# with a buffer of a few KB.
|
14
|
+
#
|
15
|
+
# Note that there is no guarantee that the write buffer is going to flush at or above
|
16
|
+
# the given `buffer_size`, because for writes which exceed the buffer size it will
|
17
|
+
# first `flush` and then write through the oversized chunk, without buffering it. This
|
18
|
+
# helps conserve memory. Also note that the buffer will *not* duplicate strings for you
|
19
|
+
# and *will* yield the same buffer String over and over, so if you are storing it in an
|
20
|
+
# Array you might need to duplicate it.
|
21
|
+
#
|
22
|
+
# Note also that the WriteBuffer assumes that the object it `<<`-writes into is going
|
23
|
+
# to **consume** in some way the string that it passes in. After the `<<` method returns,
|
24
|
+
# the WriteBuffer will be cleared, and it passes the same String reference on every call
|
25
|
+
# to `<<`. Therefore, if you need to retain the output of the WriteBuffer in, say, an Array,
|
26
|
+
# you might need to `.dup` the `String` it gives you.
|
10
27
|
class ZipTricks::WriteBuffer
|
11
28
|
# Creates a new WriteBuffer bypassing into a given writable object
|
12
29
|
#
|
13
|
-
# @param writable[#<<] An object that responds to `#<<` with
|
30
|
+
# @param writable[#<<] An object that responds to `#<<` with a String as argument
|
14
31
|
# @param buffer_size[Integer] How many bytes to buffer
|
15
32
|
def initialize(writable, buffer_size)
|
16
|
-
|
33
|
+
# Allocating the buffer using a zero-padded String as a variation
|
34
|
+
# on using capacity:, which JRuby apparently does not like very much. The
|
35
|
+
# desire here is that the buffer doesn't have to be resized during the lifetime
|
36
|
+
# of the object.
|
37
|
+
@buf = ("\0".b * (buffer_size * 2)).clear
|
17
38
|
@buffer_size = buffer_size
|
18
39
|
@writable = writable
|
19
40
|
end
|
@@ -24,28 +45,27 @@ class ZipTricks::WriteBuffer
|
|
24
45
|
# @param data[String] data to be written
|
25
46
|
# @return self
|
26
47
|
def <<(data)
|
27
|
-
|
28
|
-
|
48
|
+
if data.bytesize >= @buffer_size
|
49
|
+
flush unless @buf.empty? # <- this is were we can output less than @buffer_size
|
50
|
+
@writable << data
|
51
|
+
else
|
52
|
+
@buf << data
|
53
|
+
flush if @buf.bytesize >= @buffer_size
|
54
|
+
end
|
29
55
|
self
|
30
56
|
end
|
31
57
|
|
32
58
|
# Explicitly flushes the buffer if it contains anything
|
33
59
|
#
|
34
60
|
# @return self
|
35
|
-
def flush
|
36
|
-
|
37
|
-
|
38
|
-
|
61
|
+
def flush
|
62
|
+
unless @buf.empty?
|
63
|
+
@writable << @buf
|
64
|
+
@buf.clear
|
65
|
+
end
|
39
66
|
self
|
40
67
|
end
|
41
68
|
|
42
|
-
#
|
43
|
-
|
44
|
-
# computation by retrieving the CRC as an integer
|
45
|
-
#
|
46
|
-
# @return [Integer] the return value of `writable#to_i`
|
47
|
-
def to_i
|
48
|
-
flush!
|
49
|
-
@writable.to_i
|
50
|
-
end
|
69
|
+
# `flush!` was renamed to `flush` but we preserve this method for backwards compatibility
|
70
|
+
alias_method :flush!, :flush
|
51
71
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zip_tricks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julik Tarkhanov
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: exe
|
13
13
|
cert_chain: []
|
14
|
-
date: 2020-11-
|
14
|
+
date: 2020-11-23 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: bundler
|