zip_tricks 5.4.0 → 5.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 05e8eea8ecf1ad0b9b9cb132c54dde1abd60dc003afd1e5a1ce989786ce89608
4
- data.tar.gz: fbdc2172fc3becefa4dd8720713acb11d838900465ac3cb42de38fec76fd0d90
3
+ metadata.gz: 50ba2d6a0b5bde1cf51443c7ff4228a7e99a1cc1ac843e3ef23c0d197878a04b
4
+ data.tar.gz: 5fdb377cc34fd6d9edb4e7932e79ebe28bc2dc91aa71348e386b8b601e4f06f1
5
5
  SHA512:
6
- metadata.gz: 449c59e898d2b54a089b60d7aebe7633d9f65ad64a5ce014a7a44e1683f6d6cec4997f732fd06746edeec96594d5041b68efcf19571ef92c9798acc05ffda7bd
7
- data.tar.gz: 5ed26109e12373acfb9866531ef03c4302141bbb8a312f759ff768295c8d7926f650745f3f2624ad652333e57cd5bc5f98f932ace41504aeca45a668ef7a4f4a
6
+ metadata.gz: 4c2ae765d7e6c584632b7606d66b970c100b9679cb6b503209be1179bc9582f83727b13154bfce5281dd8dc105f1f5112d2794df7f97ebb987a1e224f67f4840
7
+ data.tar.gz: e06306028f18fe1eb16abe12c48cd93182d11451298cde6068b9ccb37b78846be469b6bd48848507f38f8796b00281f27391589d20d9163b3845cba4112411c5
@@ -1,3 +1,9 @@
1
+ ## 5.5.0
2
+
3
+ * In `OutputEnumerator` apply some amount of buffering to be within a UNIX socket size for metatada writes. This
4
+ speeds up usage with Puma by about 20 percent, as there won't be as many `syswrite` calls on the socket.
5
+ * Make `StoredWriter` and `DeflatedWriter` public constants so that standalone tests can be written for them
6
+
1
7
  ## 5.4.0
2
8
 
3
9
  * Use block form for zlib Deflater calls to conserve memory
data/README.md CHANGED
@@ -35,9 +35,9 @@ class ZipsController < ActionController::Base
35
35
  zip_tricks_stream do |zip|
36
36
  zip.write_deflated_file('report1.csv') do |sink|
37
37
  CSV(sink) do |csv_write|
38
- csv << Person.column_names
38
+ csv_write << Person.column_names
39
39
  Person.all.find_each do |person|
40
- csv << person.attributes.values
40
+ csv_write << person.attributes.values
41
41
  end
42
42
  end
43
43
  end
@@ -75,12 +75,15 @@ since you do not know how large the compressed data segments are going to be.
75
75
 
76
76
  ## Send a ZIP from a Rack response
77
77
 
78
- Create a `RackBody` object and give it's constructor a block that adds files.
79
- The block will only be called when actually sending the response to the client
78
+ To "pull" data from ZipTricks you can create an `OutputEnumerator` object which will yield the binary chunks piece
79
+ by piece, and apply some amount of buffering as well. Since this `OutputEnumerator` responds to `#each` and yields
80
+ Strings it also can (and should!) be used as a Rack response body. Return it to your webserver and you will
81
+ have your ZIP streamed. The block that you give to the `OutputEnumerator` will only start executing once your
82
+ response body starts getting iterated over - when actually sending the response to the client
80
83
  (unless you are using a buffering Rack webserver, such as Webrick).
81
84
 
82
85
  ```ruby
83
- body = ZipTricks::RackBody.new do | zip |
86
+ body = ZipTricks::Streamer.output_enum do | zip |
84
87
  zip.write_stored_file('mov.mp4') do |sink| # Those MPEG4 files do not compress that well
85
88
  File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
86
89
  end
@@ -127,11 +130,12 @@ ZipTricks::Streamer.open(io) do | zip |
127
130
  # Write the local file header first..
128
131
  zip.add_stored_entry(filename: "first-file.bin", size: raw_file.size, crc32: raw_file_crc32)
129
132
 
130
- # then send the actual file contents bypassing the Streamer interface
133
+ # Adjust the ZIP offsets within the Streamer
134
+ zip.simulate_write(my_temp_file.size)
135
+
136
+ # ...and then send the actual file contents bypassing the Streamer interface
131
137
  io.sendfile(my_temp_file)
132
138
 
133
- # ...and then adjust the ZIP offsets within the Streamer
134
- zip.simulate_write(my_temp_file.size)
135
139
  end
136
140
  ```
137
141
 
@@ -1,43 +1,64 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Can be used as a Rack response body directly. Will yield
4
- # a {ZipTricks::Streamer} for adding entries to the archive and writing
5
- # zip entry bodies.
3
+ # The output enumerator makes it possible to "pull" from a ZipTricks streamer
4
+ # object instead of having it "push" writes to you. It will "stash" the block which
5
+ # writes the ZIP archive through the streamer, and when you call `each` on the Enumerator
6
+ # it will yield you the bytes the block writes. Since it is an enumerator you can
7
+ # use `next` to take chunks written by the ZipTricks streamer one by one. It can be very
8
+ # convenient when you need to segment your ZIP output into bigger chunks for, say,
9
+ # uploading them to a cloud storage provider such as S3.
10
+ #
11
+ # Another use of the output enumerator is outputting a ZIP archive from Rails or Rack,
12
+ # where an object responding to `each` is required which yields Strings. For instance,
13
+ # you can return a ZIP archive from Rack like so:
14
+ #
15
+ # iterable_zip_body = ZipTricks::OutputEnumerator.new do | streamer |
16
+ # streamer.write_deflated_file('big.csv') do |sink|
17
+ # CSV(sink) do |csv_writer|
18
+ # csv_writer << Person.column_names
19
+ # Person.all.find_each do |person|
20
+ # csv_writer << person.attributes.values
21
+ # end
22
+ # end
23
+ # end
24
+ # end
25
+ #
26
+ # [200, {'Content-Type' => 'binary/octet-stream'}, iterable_zip_body]
6
27
  class ZipTricks::OutputEnumerator
7
- # Prepares a new Rack response body with a Zip output stream.
8
- # The block given to the constructor will be called when the response
9
- # body will be read by the webserver, and will receive a {ZipTricks::Streamer}
10
- # as it's block argument. You can then add entries to the Streamer as usual.
11
- # The archive will be automatically closed at the end of the block.
28
+ DEFAULT_WRITE_BUFFER_SIZE = 64 * 1024
29
+ # Creates a new OutputEnumerator.
12
30
  #
13
- # # Precompute the Content-Length ahead of time
14
- # content_length = ZipTricks::SizeEstimator.estimate do | estimator |
15
- # estimator.add_stored_entry(filename: 'large.tif', size: 1289894)
16
- # end
17
- #
18
- # # Prepare the response body.
19
- # # The block will only be called when the
20
- # # response starts to be written.
21
- # body = ZipTricks::OutputEnumerator.new do | streamer |
22
- # streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
23
- # streamer << large_file.read(1024*1024) until large_file.eof?
24
- # ...
25
- # end
26
- #
27
- # return [200, {'Content-Type' => 'binary/octet-stream',
28
- # 'Content-Length' => content_length.to_s}, body]
29
- def initialize(**streamer_options, &blk)
31
+ # @param streamer_options[Hash] options for Streamer, see {ZipTricks::Streamer.new}
32
+ # @param write_buffer_size[Integer] By default all ZipTricks writes are unbuffered. For output to sockets
33
+ # it is beneficial to bulkify those writes so that they are roughly sized to a socket buffer chunk. This
34
+ # object will bulkify writes for you in this way (so `each` will yield not on every call to `<<` from the Streamer
35
+ # but at block size boundaries or greater). Set it to 0 for unbuffered writes.
36
+ # @param blk a block that will receive the Streamer object when executing. The block will not be executed
37
+ # immediately but only once `each` is called on the OutputEnumerator
38
+ def initialize(write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE, **streamer_options, &blk)
30
39
  @streamer_options = streamer_options.to_h
40
+ @bufsize = write_buffer_size.to_i
31
41
  @archiving_block = blk
32
42
  end
33
43
 
34
44
  # Executes the block given to the constructor with a {ZipTricks::Streamer}
35
45
  # and passes each written chunk to the block given to the method. This allows one
36
- # to "take" output of the ZIP piecewise.
46
+ # to "take" output of the ZIP piecewise. If called without a block will return an Enumerator
47
+ # that you can pull data from using `next`.
48
+ #
49
+ # **NOTE** Because the `WriteBuffer` inside this object can reuse the buffer, it is important
50
+ # that the `String` that is yielded **either** gets consumed eagerly (written byte-by-byte somewhere, or `#dup`-ed)
51
+ # since the write buffer will clear it after your block returns. If you expand this Enumerator
52
+ # eagerly into an Array you might notice that a lot of the segments of your ZIP output are
53
+ # empty - this means that you need to duplicate them.
54
+ #
55
+ # @yield [String] a chunk of the ZIP output in binary encoding
37
56
  def each
38
57
  if block_given?
39
58
  block_write = ZipTricks::BlockWrite.new { |chunk| yield(chunk) }
40
- ZipTricks::Streamer.open(block_write, **@streamer_options, &@archiving_block)
59
+ buffer = ZipTricks::WriteBuffer.new(block_write, @bufsize)
60
+ ZipTricks::Streamer.open(buffer, **@streamer_options, &@archiving_block)
61
+ buffer.flush
41
62
  else
42
63
  enum_for(:each)
43
64
  end
@@ -93,7 +93,7 @@ class ZipTricks::Streamer
93
93
  UnknownMode = Class.new(StandardError)
94
94
  OffsetOutOfSync = Class.new(StandardError)
95
95
 
96
- private_constant :DeflatedWriter, :StoredWriter, :STORED, :DEFLATED
96
+ private_constant :STORED, :DEFLATED
97
97
 
98
98
  # Creates a new Streamer on top of the given IO-ish object and yields it. Once the given block
99
99
  # returns, the Streamer will have it's `close` method called, which will write out the central
@@ -138,20 +138,19 @@ class ZipTricks::Streamer
138
138
 
139
139
  # Creates a new Streamer on top of the given IO-ish object.
140
140
  #
141
- # @param stream[IO] the destination IO for the ZIP. Anything that responds to `<<` can be used.
141
+ # @param writable[#<<] the destination IO for the ZIP. Anything that responds to `<<` can be used.
142
142
  # @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
143
143
  # Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
144
144
  # @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered,
145
145
  # should be suffixed with (1), (2) etc. Default value is `false` - if
146
146
  # dupliate names are used an exception will be raised
147
- def initialize(stream, writer: create_writer, auto_rename_duplicate_filenames: false)
148
- raise InvalidOutput, 'The stream must respond to #<<' unless stream.respond_to?(:<<)
149
-
150
- @dedupe_filenames = auto_rename_duplicate_filenames
151
- @out = ZipTricks::WriteAndTell.new(stream)
147
+ def initialize(writable, writer: create_writer, auto_rename_duplicate_filenames: false)
148
+ raise InvalidOutput, 'The writable must respond to #<<' unless writable.respond_to?(:<<)
149
+ @out = ZipTricks::WriteAndTell.new(writable)
152
150
  @files = []
153
151
  @path_set = ZipTricks::PathSet.new
154
152
  @writer = writer
153
+ @dedupe_filenames = auto_rename_duplicate_filenames
155
154
  end
156
155
 
157
156
  # Writes a part of a zip entry body (actual binary data of the entry) into the output stream.
@@ -13,7 +13,8 @@ class ZipTricks::Streamer::DeflatedWriter
13
13
  def initialize(io)
14
14
  @compressed_io = io
15
15
  @deflater = ::Zlib::Deflate.new(Zlib::DEFAULT_COMPRESSION, -::Zlib::MAX_WBITS)
16
- @crc = ZipTricks::WriteBuffer.new(ZipTricks::StreamCRC32.new, CRC32_BUFFER_SIZE)
16
+ @crc = ZipTricks::StreamCRC32.new
17
+ @crc_buf = ZipTricks::WriteBuffer.new(@crc, CRC32_BUFFER_SIZE)
17
18
  end
18
19
 
19
20
  # Writes the given data into the deflater, and flushes the deflater
@@ -23,7 +24,7 @@ class ZipTricks::Streamer::DeflatedWriter
23
24
  # @return self
24
25
  def <<(data)
25
26
  @deflater.deflate(data) { |chunk| @compressed_io << chunk }
26
- @crc << data
27
+ @crc_buf << data
27
28
  self
28
29
  end
29
30
 
@@ -34,6 +35,7 @@ class ZipTricks::Streamer::DeflatedWriter
34
35
  # @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
35
36
  def finish
36
37
  @compressed_io << @deflater.finish until @deflater.finished?
38
+ @crc_buf.flush
37
39
  {crc32: @crc.to_i, compressed_size: @deflater.total_out, uncompressed_size: @deflater.total_in}
38
40
  ensure
39
41
  @deflater.close
@@ -12,7 +12,8 @@ class ZipTricks::Streamer::StoredWriter
12
12
 
13
13
  def initialize(io)
14
14
  @io = ZipTricks::WriteAndTell.new(io)
15
- @crc = ZipTricks::WriteBuffer.new(ZipTricks::StreamCRC32.new, CRC32_BUFFER_SIZE)
15
+ @crc_compute = ZipTricks::StreamCRC32.new
16
+ @crc = ZipTricks::WriteBuffer.new(@crc_compute, CRC32_BUFFER_SIZE)
16
17
  end
17
18
 
18
19
  # Writes the given data to the contained IO object.
@@ -30,6 +31,7 @@ class ZipTricks::Streamer::StoredWriter
30
31
  #
31
32
  # @return [Hash] a hash of `{crc32, compressed_size, uncompressed_size}`
32
33
  def finish
33
- {crc32: @crc.to_i, compressed_size: @io.tell, uncompressed_size: @io.tell}
34
+ @crc.flush
35
+ {crc32: @crc_compute.to_i, compressed_size: @io.tell, uncompressed_size: @io.tell}
34
36
  end
35
37
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ZipTricks
4
- VERSION = '5.4.0'
4
+ VERSION = '5.5.0'
5
5
  end
@@ -7,13 +7,34 @@
7
7
  # CRC32 combine operations - and this adds up. Since the CRC32 value
8
8
  # is usually not needed until the complete output has completed
9
9
  # we can buffer at least some amount of data before computing CRC32 over it.
10
+ # We also use this buffer for output via Rack, where some amount of buffering
11
+ # helps reduce the number of syscalls made by the webserver. ZipTricks performs
12
+ # lots of very small writes, and some degree of speedup (about 20%) can be achieved
13
+ # with a buffer of a few KB.
14
+ #
15
+ # Note that there is no guarantee that the write buffer is going to flush at or above
16
+ # the given `buffer_size`, because for writes which exceed the buffer size it will
17
+ # first `flush` and then write through the oversized chunk, without buffering it. This
18
+ # helps conserve memory. Also note that the buffer will *not* duplicate strings for you
19
+ # and *will* yield the same buffer String over and over, so if you are storing it in an
20
+ # Array you might need to duplicate it.
21
+ #
22
+ # Note also that the WriteBuffer assumes that the object it `<<`-writes into is going
23
+ # to **consume** in some way the string that it passes in. After the `<<` method returns,
24
+ # the WriteBuffer will be cleared, and it passes the same String reference on every call
25
+ # to `<<`. Therefore, if you need to retain the output of the WriteBuffer in, say, an Array,
26
+ # you might need to `.dup` the `String` it gives you.
10
27
  class ZipTricks::WriteBuffer
11
28
  # Creates a new WriteBuffer bypassing into a given writable object
12
29
  #
13
- # @param writable[#<<] An object that responds to `#<<` with string as argument
30
+ # @param writable[#<<] An object that responds to `#<<` with a String as argument
14
31
  # @param buffer_size[Integer] How many bytes to buffer
15
32
  def initialize(writable, buffer_size)
16
- @buf = StringIO.new
33
+ # Allocating the buffer using a zero-padded String as a variation
34
+ # on using capacity:, which JRuby apparently does not like very much. The
35
+ # desire here is that the buffer doesn't have to be resized during the lifetime
36
+ # of the object.
37
+ @buf = ("\0".b * (buffer_size * 2)).clear
17
38
  @buffer_size = buffer_size
18
39
  @writable = writable
19
40
  end
@@ -24,28 +45,27 @@ class ZipTricks::WriteBuffer
24
45
  # @param data[String] data to be written
25
46
  # @return self
26
47
  def <<(data)
27
- @buf << data
28
- flush! if @buf.size > @buffer_size
48
+ if data.bytesize >= @buffer_size
49
+ flush unless @buf.empty? # <- this is were we can output less than @buffer_size
50
+ @writable << data
51
+ else
52
+ @buf << data
53
+ flush if @buf.bytesize >= @buffer_size
54
+ end
29
55
  self
30
56
  end
31
57
 
32
58
  # Explicitly flushes the buffer if it contains anything
33
59
  #
34
60
  # @return self
35
- def flush!
36
- @writable << @buf.string if @buf.size > 0
37
- @buf.truncate(0)
38
- @buf.rewind
61
+ def flush
62
+ unless @buf.empty?
63
+ @writable << @buf
64
+ @buf.clear
65
+ end
39
66
  self
40
67
  end
41
68
 
42
- # Flushes the buffer and returns the result of `#to_i` of the contained `writable`.
43
- # Primarily facilitates working with StreamCRC32 objects where you finish the
44
- # computation by retrieving the CRC as an integer
45
- #
46
- # @return [Integer] the return value of `writable#to_i`
47
- def to_i
48
- flush!
49
- @writable.to_i
50
- end
69
+ # `flush!` was renamed to `flush` but we preserve this method for backwards compatibility
70
+ alias_method :flush!, :flush
51
71
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zip_tricks
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.4.0
4
+ version: 5.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julik Tarkhanov
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: exe
13
13
  cert_chain: []
14
- date: 2020-11-19 00:00:00.000000000 Z
14
+ date: 2020-11-23 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: bundler