zip_tricks 5.4.0 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +12 -8
- data/lib/zip_tricks/output_enumerator.rb +48 -27
- data/lib/zip_tricks/streamer.rb +6 -7
- data/lib/zip_tricks/streamer/deflated_writer.rb +4 -2
- data/lib/zip_tricks/streamer/stored_writer.rb +4 -2
- data/lib/zip_tricks/version.rb +1 -1
- data/lib/zip_tricks/write_buffer.rb +37 -17
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50ba2d6a0b5bde1cf51443c7ff4228a7e99a1cc1ac843e3ef23c0d197878a04b
|
4
|
+
data.tar.gz: 5fdb377cc34fd6d9edb4e7932e79ebe28bc2dc91aa71348e386b8b601e4f06f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c2ae765d7e6c584632b7606d66b970c100b9679cb6b503209be1179bc9582f83727b13154bfce5281dd8dc105f1f5112d2794df7f97ebb987a1e224f67f4840
|
7
|
+
data.tar.gz: e06306028f18fe1eb16abe12c48cd93182d11451298cde6068b9ccb37b78846be469b6bd48848507f38f8796b00281f27391589d20d9163b3845cba4112411c5
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 5.5.0
|
2
|
+
|
3
|
+
* In `OutputEnumerator` apply some amount of buffering to be within a UNIX socket size for metatada writes. This
|
4
|
+
speeds up usage with Puma by about 20 percent, as there won't be as many `syswrite` calls on the socket.
|
5
|
+
* Make `StoredWriter` and `DeflatedWriter` public constants so that standalone tests can be written for them
|
6
|
+
|
1
7
|
## 5.4.0
|
2
8
|
|
3
9
|
* Use block form for zlib Deflater calls to conserve memory
|
data/README.md
CHANGED
@@ -35,9 +35,9 @@ class ZipsController < ActionController::Base
|
|
35
35
|
zip_tricks_stream do |zip|
|
36
36
|
zip.write_deflated_file('report1.csv') do |sink|
|
37
37
|
CSV(sink) do |csv_write|
|
38
|
-
|
38
|
+
csv_write << Person.column_names
|
39
39
|
Person.all.find_each do |person|
|
40
|
-
|
40
|
+
csv_write << person.attributes.values
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
@@ -75,12 +75,15 @@ since you do not know how large the compressed data segments are going to be.
|
|
75
75
|
|
76
76
|
## Send a ZIP from a Rack response
|
77
77
|
|
78
|
-
|
79
|
-
|
78
|
+
To "pull" data from ZipTricks you can create an `OutputEnumerator` object which will yield the binary chunks piece
|
79
|
+
by piece, and apply some amount of buffering as well. Since this `OutputEnumerator` responds to `#each` and yields
|
80
|
+
Strings it also can (and should!) be used as a Rack response body. Return it to your webserver and you will
|
81
|
+
have your ZIP streamed. The block that you give to the `OutputEnumerator` will only start executing once your
|
82
|
+
response body starts getting iterated over - when actually sending the response to the client
|
80
83
|
(unless you are using a buffering Rack webserver, such as Webrick).
|
81
84
|
|
82
85
|
```ruby
|
83
|
-
body = ZipTricks::
|
86
|
+
body = ZipTricks::Streamer.output_enum do | zip |
|
84
87
|
zip.write_stored_file('mov.mp4') do |sink| # Those MPEG4 files do not compress that well
|
85
88
|
File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
|
86
89
|
end
|
@@ -127,11 +130,12 @@ ZipTricks::Streamer.open(io) do | zip |
|
|
127
130
|
# Write the local file header first..
|
128
131
|
zip.add_stored_entry(filename: "first-file.bin", size: raw_file.size, crc32: raw_file_crc32)
|
129
132
|
|
130
|
-
#
|
133
|
+
# Adjust the ZIP offsets within the Streamer
|
134
|
+
zip.simulate_write(my_temp_file.size)
|
135
|
+
|
136
|
+
# ...and then send the actual file contents bypassing the Streamer interface
|
131
137
|
io.sendfile(my_temp_file)
|
132
138
|
|
133
|
-
# ...and then adjust the ZIP offsets within the Streamer
|
134
|
-
zip.simulate_write(my_temp_file.size)
|
135
139
|
end
|
136
140
|
```
|
137
141
|
|
@@ -1,43 +1,64 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
3
|
+
# The output enumerator makes it possible to "pull" from a ZipTricks streamer
|
4
|
+
# object instead of having it "push" writes to you. It will "stash" the block which
|
5
|
+
# writes the ZIP archive through the streamer, and when you call `each` on the Enumerator
|
6
|
+
# it will yield you the bytes the block writes. Since it is an enumerator you can
|
7
|
+
# use `next` to take chunks written by the ZipTricks streamer one by one. It can be very
|
8
|
+
# convenient when you need to segment your ZIP output into bigger chunks for, say,
|
9
|
+
# uploading them to a cloud storage provider such as S3.
|
10
|
+
#
|
11
|
+
# Another use of the output enumerator is outputting a ZIP archive from Rails or Rack,
|
12
|
+
# where an object responding to `each` is required which yields Strings. For instance,
|
13
|
+
# you can return a ZIP archive from Rack like so:
|
14
|
+
#
|
15
|
+
# iterable_zip_body = ZipTricks::OutputEnumerator.new do | streamer |
|
16
|
+
# streamer.write_deflated_file('big.csv') do |sink|
|
17
|
+
# CSV(sink) do |csv_writer|
|
18
|
+
# csv_writer << Person.column_names
|
19
|
+
# Person.all.find_each do |person|
|
20
|
+
# csv_writer << person.attributes.values
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# [200, {'Content-Type' => 'binary/octet-stream'}, iterable_zip_body]
|
6
27
|
class ZipTricks::OutputEnumerator
|
7
|
-
|
8
|
-
#
|
9
|
-
# body will be read by the webserver, and will receive a {ZipTricks::Streamer}
|
10
|
-
# as it's block argument. You can then add entries to the Streamer as usual.
|
11
|
-
# The archive will be automatically closed at the end of the block.
|
28
|
+
DEFAULT_WRITE_BUFFER_SIZE = 64 * 1024
|
29
|
+
# Creates a new OutputEnumerator.
|
12
30
|
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
# body = ZipTricks::OutputEnumerator.new do | streamer |
|
22
|
-
# streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
|
23
|
-
# streamer << large_file.read(1024*1024) until large_file.eof?
|
24
|
-
# ...
|
25
|
-
# end
|
26
|
-
#
|
27
|
-
# return [200, {'Content-Type' => 'binary/octet-stream',
|
28
|
-
# 'Content-Length' => content_length.to_s}, body]
|
29
|
-
def initialize(**streamer_options, &blk)
|
31
|
+
# @param streamer_options[Hash] options for Streamer, see {ZipTricks::Streamer.new}
|
32
|
+
# @param write_buffer_size[Integer] By default all ZipTricks writes are unbuffered. For output to sockets
|
33
|
+
# it is beneficial to bulkify those writes so that they are roughly sized to a socket buffer chunk. This
|
34
|
+
# object will bulkify writes for you in this way (so `each` will yield not on every call to `<<` from the Streamer
|
35
|
+
# but at block size boundaries or greater). Set it to 0 for unbuffered writes.
|
36
|
+
# @param blk a block that will receive the Streamer object when executing. The block will not be executed
|
37
|
+
# immediately but only once `each` is called on the OutputEnumerator
|
38
|
+
def initialize(write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE, **streamer_options, &blk)
|
30
39
|
@streamer_options = streamer_options.to_h
|
40
|
+
@bufsize = write_buffer_size.to_i
|
31
41
|
@archiving_block = blk
|
32
42
|
end
|
33
43
|
|
34
44
|
# Executes the block given to the constructor with a {ZipTricks::Streamer}
|
35
45
|
# and passes each written chunk to the block given to the method. This allows one
|
36
|
-
# to "take" output of the ZIP piecewise.
|
46
|
+
# to "take" output of the ZIP piecewise. If called without a block will return an Enumerator
|
47
|
+
# that you can pull data from using `next`.
|
48
|
+
#
|
49
|
+
# **NOTE** Because the `WriteBuffer` inside this object can reuse the buffer, it is important
|
50
|
+
# that the `String` that is yielded **either** gets consumed eagerly (written byte-by-byte somewhere, or `#dup`-ed)
|
51
|
+
# since the write buffer will clear it after your block returns. If you expand this Enumerator
|
52
|
+
# eagerly into an Array you might notice that a lot of the segments of your ZIP output are
|
53
|
+
# empty - this means that you need to duplicate them.
|
54
|
+
#
|
55
|
+
# @yield [String] a chunk of the ZIP output in binary encoding
|
37
56
|
def each
|
38
57
|
if block_given?
|
39
58
|
block_write = ZipTricks::BlockWrite.new { |chunk| yield(chunk) }
|
40
|
-
ZipTricks::
|
59
|
+
buffer = ZipTricks::WriteBuffer.new(block_write, @bufsize)
|
60
|
+
ZipTricks::Streamer.open(buffer, **@streamer_options, &@archiving_block)
|
61
|
+
buffer.flush
|
41
62
|
else
|
42
63
|
enum_for(:each)
|
43
64
|
end
|
data/lib/zip_tricks/streamer.rb
CHANGED
@@ -93,7 +93,7 @@ class ZipTricks::Streamer
|
|
93
93
|
UnknownMode = Class.new(StandardError)
|
94
94
|
OffsetOutOfSync = Class.new(StandardError)
|
95
95
|
|
96
|
-
private_constant :
|
96
|
+
private_constant :STORED, :DEFLATED
|
97
97
|
|
98
98
|
# Creates a new Streamer on top of the given IO-ish object and yields it. Once the given block
|
99
99
|
# returns, the Streamer will have it's `close` method called, which will write out the central
|
@@ -138,20 +138,19 @@ class ZipTricks::Streamer
|
|
138
138
|
|
139
139
|
# Creates a new Streamer on top of the given IO-ish object.
|
140
140
|
#
|
141
|
-
# @param
|
141
|
+
# @param writable[#<<] the destination IO for the ZIP. Anything that responds to `<<` can be used.
|
142
142
|
# @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
|
143
143
|
# Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
|
144
144
|
# @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered,
|
145
145
|
# should be suffixed with (1), (2) etc. Default value is `false` - if
|
146
146
|
# dupliate names are used an exception will be raised
|
147
|
-
def initialize(
|
148
|
-
raise InvalidOutput, 'The
|
149
|
-
|
150
|
-
@dedupe_filenames = auto_rename_duplicate_filenames
|
151
|
-
@out = ZipTricks::WriteAndTell.new(stream)
|
147
|
+
def initialize(writable, writer: create_writer, auto_rename_duplicate_filenames: false)
|
148
|
+
raise InvalidOutput, 'The writable must respond to #<<' unless writable.respond_to?(:<<)
|
149
|
+
@out = ZipTricks::WriteAndTell.new(writable)
|
152
150
|
@files = []
|
153
151
|
@path_set = ZipTricks::PathSet.new
|
154
152
|
@writer = writer
|
153
|
+
@dedupe_filenames = auto_rename_duplicate_filenames
|
155
154
|
end
|
156
155
|
|
157
156
|
# Writes a part of a zip entry body (actual binary data of the entry) into the output stream.
|
@@ -13,7 +13,8 @@ class ZipTricks::Streamer::DeflatedWriter
|
|
13
13
|
def initialize(io)
|
14
14
|
@compressed_io = io
|
15
15
|
@deflater = ::Zlib::Deflate.new(Zlib::DEFAULT_COMPRESSION, -::Zlib::MAX_WBITS)
|
16
|
-
@crc = ZipTricks::
|
16
|
+
@crc = ZipTricks::StreamCRC32.new
|
17
|
+
@crc_buf = ZipTricks::WriteBuffer.new(@crc, CRC32_BUFFER_SIZE)
|
17
18
|
end
|
18
19
|
|
19
20
|
# Writes the given data into the deflater, and flushes the deflater
|
@@ -23,7 +24,7 @@ class ZipTricks::Streamer::DeflatedWriter
|
|
23
24
|
# @return self
|
24
25
|
def <<(data)
|
25
26
|
@deflater.deflate(data) { |chunk| @compressed_io << chunk }
|
26
|
-
@
|
27
|
+
@crc_buf << data
|
27
28
|
self
|
28
29
|
end
|
29
30
|
|
@@ -34,6 +35,7 @@ class ZipTricks::Streamer::DeflatedWriter
|
|
34
35
|
# @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
|
35
36
|
def finish
|
36
37
|
@compressed_io << @deflater.finish until @deflater.finished?
|
38
|
+
@crc_buf.flush
|
37
39
|
{crc32: @crc.to_i, compressed_size: @deflater.total_out, uncompressed_size: @deflater.total_in}
|
38
40
|
ensure
|
39
41
|
@deflater.close
|
@@ -12,7 +12,8 @@ class ZipTricks::Streamer::StoredWriter
|
|
12
12
|
|
13
13
|
def initialize(io)
|
14
14
|
@io = ZipTricks::WriteAndTell.new(io)
|
15
|
-
@
|
15
|
+
@crc_compute = ZipTricks::StreamCRC32.new
|
16
|
+
@crc = ZipTricks::WriteBuffer.new(@crc_compute, CRC32_BUFFER_SIZE)
|
16
17
|
end
|
17
18
|
|
18
19
|
# Writes the given data to the contained IO object.
|
@@ -30,6 +31,7 @@ class ZipTricks::Streamer::StoredWriter
|
|
30
31
|
#
|
31
32
|
# @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
|
32
33
|
def finish
|
33
|
-
|
34
|
+
@crc.flush
|
35
|
+
{crc32: @crc_compute.to_i, compressed_size: @io.tell, uncompressed_size: @io.tell}
|
34
36
|
end
|
35
37
|
end
|
data/lib/zip_tricks/version.rb
CHANGED
@@ -7,13 +7,34 @@
|
|
7
7
|
# CRC32 combine operations - and this adds up. Since the CRC32 value
|
8
8
|
# is usually not needed until the complete output has completed
|
9
9
|
# we can buffer at least some amount of data before computing CRC32 over it.
|
10
|
+
# We also use this buffer for output via Rack, where some amount of buffering
|
11
|
+
# helps reduce the number of syscalls made by the webserver. ZipTricks performs
|
12
|
+
# lots of very small writes, and some degree of speedup (about 20%) can be achieved
|
13
|
+
# with a buffer of a few KB.
|
14
|
+
#
|
15
|
+
# Note that there is no guarantee that the write buffer is going to flush at or above
|
16
|
+
# the given `buffer_size`, because for writes which exceed the buffer size it will
|
17
|
+
# first `flush` and then write through the oversized chunk, without buffering it. This
|
18
|
+
# helps conserve memory. Also note that the buffer will *not* duplicate strings for you
|
19
|
+
# and *will* yield the same buffer String over and over, so if you are storing it in an
|
20
|
+
# Array you might need to duplicate it.
|
21
|
+
#
|
22
|
+
# Note also that the WriteBuffer assumes that the object it `<<`-writes into is going
|
23
|
+
# to **consume** in some way the string that it passes in. After the `<<` method returns,
|
24
|
+
# the WriteBuffer will be cleared, and it passes the same String reference on every call
|
25
|
+
# to `<<`. Therefore, if you need to retain the output of the WriteBuffer in, say, an Array,
|
26
|
+
# you might need to `.dup` the `String` it gives you.
|
10
27
|
class ZipTricks::WriteBuffer
|
11
28
|
# Creates a new WriteBuffer bypassing into a given writable object
|
12
29
|
#
|
13
|
-
# @param writable[#<<] An object that responds to `#<<` with
|
30
|
+
# @param writable[#<<] An object that responds to `#<<` with a String as argument
|
14
31
|
# @param buffer_size[Integer] How many bytes to buffer
|
15
32
|
def initialize(writable, buffer_size)
|
16
|
-
|
33
|
+
# Allocating the buffer using a zero-padded String as a variation
|
34
|
+
# on using capacity:, which JRuby apparently does not like very much. The
|
35
|
+
# desire here is that the buffer doesn't have to be resized during the lifetime
|
36
|
+
# of the object.
|
37
|
+
@buf = ("\0".b * (buffer_size * 2)).clear
|
17
38
|
@buffer_size = buffer_size
|
18
39
|
@writable = writable
|
19
40
|
end
|
@@ -24,28 +45,27 @@ class ZipTricks::WriteBuffer
|
|
24
45
|
# @param data[String] data to be written
|
25
46
|
# @return self
|
26
47
|
def <<(data)
|
27
|
-
|
28
|
-
|
48
|
+
if data.bytesize >= @buffer_size
|
49
|
+
flush unless @buf.empty? # <- this is were we can output less than @buffer_size
|
50
|
+
@writable << data
|
51
|
+
else
|
52
|
+
@buf << data
|
53
|
+
flush if @buf.bytesize >= @buffer_size
|
54
|
+
end
|
29
55
|
self
|
30
56
|
end
|
31
57
|
|
32
58
|
# Explicitly flushes the buffer if it contains anything
|
33
59
|
#
|
34
60
|
# @return self
|
35
|
-
def flush
|
36
|
-
|
37
|
-
|
38
|
-
|
61
|
+
def flush
|
62
|
+
unless @buf.empty?
|
63
|
+
@writable << @buf
|
64
|
+
@buf.clear
|
65
|
+
end
|
39
66
|
self
|
40
67
|
end
|
41
68
|
|
42
|
-
#
|
43
|
-
|
44
|
-
# computation by retrieving the CRC as an integer
|
45
|
-
#
|
46
|
-
# @return [Integer] the return value of `writable#to_i`
|
47
|
-
def to_i
|
48
|
-
flush!
|
49
|
-
@writable.to_i
|
50
|
-
end
|
69
|
+
# `flush!` was renamed to `flush` but we preserve this method for backwards compatibility
|
70
|
+
alias_method :flush!, :flush
|
51
71
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zip_tricks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julik Tarkhanov
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: exe
|
13
13
|
cert_chain: []
|
14
|
-
date: 2020-11-
|
14
|
+
date: 2020-11-23 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: bundler
|