zip_tricks 4.4.2 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rubocop: convention: Missing top-level class documentation comment.
1
4
  class ZipTricks::FileReader::InflatingReader
2
5
  def initialize(from_io, compressed_data_size)
3
6
  @io = from_io
@@ -6,7 +9,7 @@ class ZipTricks::FileReader::InflatingReader
6
9
  @zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
7
10
  end
8
11
 
9
- def extract(n_bytes=nil)
12
+ def extract(n_bytes = nil)
10
13
  n_bytes ||= (@compressed_data_size - @already_read)
11
14
 
12
15
  return if eof?
@@ -1,3 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rubocop: convention: Missing top-level class documentation comment.
1
4
  class ZipTricks::FileReader::StoredReader
2
5
  def initialize(from_io, compressed_data_size)
3
6
  @io = from_io
@@ -5,7 +8,7 @@ class ZipTricks::FileReader::StoredReader
5
8
  @already_read = 0
6
9
  end
7
10
 
8
- def extract(n_bytes=nil)
11
+ def extract(n_bytes = nil)
9
12
  n_bytes ||= (@compressed_data_size - @already_read)
10
13
 
11
14
  return if eof?
@@ -1,12 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Used when you need to supply a destination IO for some
2
4
  # write operations, but want to discard the data (like when
3
5
  # estimating the size of a ZIP)
4
6
  module ZipTricks::NullWriter
5
7
  # @param data[String] the data to write
6
8
  # @return [self]
7
- def self.<<(data); self; end
8
-
9
- # @param data[String] the data to write
10
- # @return [Fixnum] the amount of data that was supposed to be written
11
- def self.write(data); data.bytesize; end
9
+ def self.<<(_)
10
+ self
11
+ end
12
12
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Can be used as a Rack response body directly. Will yield
2
4
  # a {ZipTricks::Streamer} for adding entries to the archive and writing
3
5
  # zip entry bodies.
@@ -13,14 +15,16 @@ class ZipTricks::RackBody
13
15
  # estimator.add_stored_entry(filename: 'large.tif', size: 1289894)
14
16
  # end
15
17
  #
16
- # # Prepare the response body. The block will only be called when the response starts to be written.
18
+ # # Prepare the response body. The block will only be called when the
19
+ # response starts to be written.
17
20
  # body = ZipTricks::RackBody.new do | streamer |
18
21
  # streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
19
22
  # streamer << large_file.read(1024*1024) until large_file.eof?
20
23
  # ...
21
24
  # end
22
25
  #
23
- # return [200, {'Content-Type' => 'binary/octet-stream', 'Content-Length' => content_length.to_s}, body]
26
+ # return [200, {'Content-Type' => 'binary/octet-stream',
27
+ # 'Content-Length' => content_length.to_s}, body]
24
28
  def initialize(&blk)
25
29
  @archiving_block = blk
26
30
  end
@@ -36,6 +40,5 @@ class ZipTricks::RackBody
36
40
  # Does nothing because nothing has to be deallocated or canceled
37
41
  # even if the zip output is incomplete. The archive gets closed
38
42
  # automatically as part of {ZipTricks::Streamer.open}
39
- def close
40
- end
43
+ def close; end
41
44
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Should be included into a Rails controller (together with `ActionController::Live`)
2
4
  # for easy ZIP output from any action.
3
5
  module ZipTricks::RailsStreaming
@@ -10,7 +12,7 @@ module ZipTricks::RailsStreaming
10
12
  # Create a wrapper for the write call that quacks like something you
11
13
  # can << to, used by ZipTricks
12
14
  w = ZipTricks::BlockWrite.new { |chunk| response.stream.write(chunk) }
13
- ZipTricks::Streamer.open(w){|z| yield(z) }
15
+ ZipTricks::Streamer.open(w) { |z| yield(z) }
14
16
  ensure
15
17
  response.stream.close
16
18
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # An object that fakes just-enough of an IO to be dangerous
2
4
  # - or, more precisely, to be useful as a source for the FileReader
3
5
  # central directory parser. Effectively we substitute an IO object
@@ -20,23 +22,25 @@ class ZipTricks::RemoteIO
20
22
  @pos = clamp(0, offset, @remote_size)
21
23
  0 # always return 0!
22
24
  end
23
-
25
+
24
26
  # Emulates IO#size.
25
27
  #
26
28
  # @return [Fixnum] the size of the remote resource
27
29
  def size
28
30
  @remote_size ||= request_object_size
29
31
  end
30
-
32
+
31
33
  # Emulates IO#read, but requires the number of bytes to read
32
34
  # The read will be limited to the
33
35
  # size of the remote resource relative to the current offset in the IO,
34
36
  # so if you are at offset 0 in the IO of size 10, doing a `read(20)`
35
- # will only return you 10 bytes of result, and not raise any exceptions.
37
+ # will only return you 10 bytes of result, and not raise any exceptions.
36
38
  #
37
39
  # @param n_bytes[Fixnum, nil] how many bytes to read, or `nil` to read all the way to the end
38
40
  # @return [String] the read bytes
39
- def read(n_bytes=nil)
41
+ # Rubocop: convention: Assignment Branch Condition size for read is too high. [17.92/15]
42
+ # Rubocop: convention: Method has too many lines. [13/10]
43
+ def read(n_bytes = nil)
40
44
  @remote_size ||= request_object_size
41
45
 
42
46
  # If the resource is empty there is nothing to read
@@ -87,7 +91,7 @@ class ZipTricks::RemoteIO
87
91
 
88
92
  private
89
93
 
90
- def clamp(a,b,c)
94
+ def clamp(a, b, c)
91
95
  return a if b < a
92
96
  return c if b > c
93
97
  b
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Alows reading the central directory of a remote ZIP file without
2
4
  # downloading the entire file. The central directory provides the
3
5
  # offsets at which the actual file contents is located. You can then
@@ -6,12 +8,15 @@
6
8
  # Please read the security warning in `FileReader` _VERY CAREFULLY_
7
9
  # before you use this module.
8
10
  class ZipTricks::RemoteUncap
9
-
10
- # @param uri[String] the HTTP(S) URL to read the ZIP footer from
11
+ # @param uri[String] the HTTP(S) URL to read the ZIP footer from
11
12
  # @param reader_class[Class] which class to use for reading
12
- # @param options_for_zip_reader[Hash] any additional options to give to {ZipTricks::FileReader} when reading
13
- # @return [Array<ZipTricks::FileReader::ZipEntry>] metadata about the files within the remote archive
14
- def self.files_within_zip_at(uri, reader_class: ZipTricks::FileReader, **options_for_zip_reader)
13
+ # @param options_for_zip_reader[Hash] any additional options to give to
14
+ # {ZipTricks::FileReader} when reading
15
+ # @return [Array<ZipTricks::FileReader::ZipEntry>] metadata about the
16
+ # files within the remote archive
17
+ def self.files_within_zip_at(uri,
18
+ reader_class: ZipTricks::FileReader,
19
+ **options_for_zip_reader)
15
20
  fetcher = new(uri)
16
21
  fake_io = ZipTricks::RemoteIO.new(fetcher)
17
22
  reader = reader_class.new
@@ -1,48 +1,50 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Helps to estimate archive sizes
2
4
  class ZipTricks::SizeEstimator
3
5
  require_relative 'streamer'
4
-
5
- # Used to mark a couple of methods public
6
- class DetailStreamer < ::ZipTricks::Streamer
7
- public :add_file_and_write_local_header, :write_data_descriptor_for_last_entry
8
- end
9
- private_constant :DetailStreamer
10
-
6
+
11
7
  # Creates a new estimator with a Streamer object. Normally you should use
12
8
  # `estimate` instead an not use this method directly.
13
9
  def initialize(streamer)
14
10
  @streamer = streamer
15
11
  end
16
12
  private :initialize
17
-
13
+
18
14
  # Performs the estimate using fake archiving. It needs to know the sizes of the
19
15
  # entries upfront. Usage:
20
16
  #
21
17
  # expected_zip_size = SizeEstimator.estimate do | estimator |
22
18
  # estimator.add_stored_entry(filename: "file.doc", size: 898291)
23
- # estimator.add_compressed_entry(filename: "family.tif", uncompressed_size: 89281911, compressed_size: 121908)
19
+ # estimator.add_deflated_entry(filename: "family.tif",
20
+ # uncompressed_size: 89281911, compressed_size: 121908)
24
21
  # end
25
22
  #
26
- # @return [Fixnum] the size of the resulting archive, in bytes
23
+ # @return [Integer] the size of the resulting archive, in bytes
27
24
  # @yield [SizeEstimator] the estimator
28
25
  def self.estimate
29
- output_io = ZipTricks::WriteAndTell.new(ZipTricks::NullWriter)
30
- DetailStreamer.open(output_io) { |zip| yield(new(zip)) }
31
- output_io.tell
26
+ streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter)
27
+ estimator = new(streamer)
28
+ yield(estimator)
29
+ streamer.close # Returns the .tell of the contained IO
32
30
  end
33
31
 
34
32
  # Add a fake entry to the archive, to see how big it is going to be in the end.
35
33
  #
36
34
  # @param filename [String] the name of the file (filenames are variable-width in the ZIP)
37
35
  # @param size [Fixnum] size of the uncompressed entry
38
- # @param use_data_descriptor[Boolean] whether the entry uses a postfix data descriptor to specify size
36
+ # @param use_data_descriptor[Boolean] whether the entry uses a postfix
37
+ # data descriptor to specify size
39
38
  # @return self
40
39
  def add_stored_entry(filename:, size:, use_data_descriptor: false)
41
- udd = !!use_data_descriptor
42
- @streamer.add_file_and_write_local_header(filename: filename, crc32: 0, storage_mode: 0,
43
- compressed_size: size, uncompressed_size: size, use_data_descriptor: udd)
40
+ @streamer.add_stored_entry(filename: filename,
41
+ crc32: 0,
42
+ size: size,
43
+ use_data_descriptor: use_data_descriptor)
44
44
  @streamer.simulate_write(size)
45
- @streamer.write_data_descriptor_for_last_entry if udd
45
+ if use_data_descriptor
46
+ @streamer.update_last_entry_and_write_data_descriptor(crc32: 0, compressed_size: size, uncompressed_size: size)
47
+ end
46
48
  self
47
49
  end
48
50
 
@@ -51,24 +53,34 @@ class ZipTricks::SizeEstimator
51
53
  # @param filename [String] the name of the file (filenames are variable-width in the ZIP)
52
54
  # @param uncompressed_size [Fixnum] size of the uncompressed entry
53
55
  # @param compressed_size [Fixnum] size of the compressed entry
54
- # @param use_data_descriptor[Boolean] whether the entry uses a postfix data descriptor to specify size
56
+ # @param use_data_descriptor[Boolean] whether the entry uses a postfix data
57
+ # descriptor to specify size
55
58
  # @return self
56
- def add_compressed_entry(filename:, uncompressed_size:, compressed_size:, use_data_descriptor: false)
57
- udd = !!use_data_descriptor
58
- @streamer.add_file_and_write_local_header(filename: filename, crc32: 0, storage_mode: 8,
59
- compressed_size: compressed_size, uncompressed_size: uncompressed_size, use_data_descriptor: udd)
59
+ def add_deflated_entry(filename:, uncompressed_size:, compressed_size:, use_data_descriptor: false)
60
+ @streamer.add_deflated_entry(filename: filename,
61
+ crc32: 0,
62
+ compressed_size: compressed_size,
63
+ uncompressed_size: uncompressed_size,
64
+ use_data_descriptor: use_data_descriptor)
65
+
60
66
  @streamer.simulate_write(compressed_size)
61
- @streamer.write_data_descriptor_for_last_entry if udd
67
+ if use_data_descriptor
68
+ @streamer.update_last_entry_and_write_data_descriptor(crc32: 0,
69
+ compressed_size: compressed_size,
70
+ uncompressed_size: uncompressed_size)
71
+ end
62
72
  self
63
73
  end
64
-
74
+
75
+ # Will be phased out in ZipTricks 5.x
76
+ alias_method :add_compressed_entry, :add_deflated_entry
77
+
65
78
  # Add an empty directory to the archive.
66
79
  #
67
80
  # @param dirname [String] the name of the directory
68
81
  # @return self
69
82
  def add_empty_directory_entry(dirname:)
70
- @streamer.add_file_and_write_local_header(filename: "#{dirname}" + "/", crc32: 0, storage_mode: 8,
71
- compressed_size: 0, uncompressed_size: 0)
83
+ @streamer.add_empty_directory(dirname: dirname)
72
84
  self
73
85
  end
74
86
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # A simple stateful class for keeping track of a CRC32 value through multiple writes
2
4
  class ZipTricks::StreamCRC32
3
5
  # Compute a CRC32 value from an IO object. The object should respond to `read` and `eof?`
@@ -1,6 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
1
5
  # Is used to write streamed ZIP archives into the provided IO-ish object.
2
6
  # The output IO is never going to be rewound or seeked, so the output
3
- # of this object can be coupled directly to, say, a Rack output.
7
+ # of this object can be coupled directly to, say, a Rack output. The
8
+ # output can also be a String, Array or anything that responds to `<<`.
4
9
  #
5
10
  # Allows for splicing raw files (for "stored" entries without compression)
6
11
  # and splicing of deflated files (for "deflated" storage mode).
@@ -16,7 +21,7 @@
16
21
  # You can use the Streamer with data descriptors (the CRC32 and the sizes will be
17
22
  # written after the file data). This allows non-rewinding on-the-fly compression.
18
23
  # If you are compressing large files, the Deflater object that the Streamer controls
19
- # will be regularly flushed to prevent memory inflation.
24
+ # will be regularly flushed to prevent memory inflation.
20
25
  #
21
26
  # ZipTricks::Streamer.open(file_socket_or_string) do |zip|
22
27
  # zip.write_stored_file('mov.mp4') do |sink|
@@ -51,7 +56,28 @@
51
56
  # so far. When using `sendfile` the Ruby write methods get bypassed entirely, and the
52
57
  # offsets in the IO will not be updated - which will result in an invalid ZIP.
53
58
  #
54
- # The central directory will be written automatically at the end of the `open` block.
59
+ #
60
+ # ## On-the-fly deflate -using the Streamer with async/suspended writes and data descriptors
61
+ #
62
+ # If you are unable to use the block versions of `write_deflated_file` and `write_stored_file`
63
+ # there is an option to use a separate writer object. It gets returned from `write_deflated_file`
64
+ # and `write_stored_file` if you do not provide them with a block, and will accept data writes.
65
+ #
66
+ # ZipTricks::Streamer.open(socket) do | zip |
67
+ # w = zip.write_stored_file('mov.mp4')
68
+ # w << data
69
+ # w.close
70
+ # end
71
+ #
72
+ # The central directory will be written automatically at the end of the `open` block. If you need
73
+ # to manage the Streamer manually, or defer the central directory write until appropriate, use
74
+ # the constructor instead and call `Streamer#close`:
75
+ #
76
+ # zip = ZipTricks::Streamer.new(out_io)
77
+ # .....
78
+ # zip.close
79
+ #
80
+ # Calling {Streamer#close} **will not** call `#close` on the underlying IO object.
55
81
  class ZipTricks::Streamer
56
82
  require_relative 'streamer/deflated_writer'
57
83
  require_relative 'streamer/writable'
@@ -82,16 +108,13 @@ class ZipTricks::Streamer
82
108
 
83
109
  # Creates a new Streamer on top of the given IO-ish object.
84
110
  #
85
- # @param stream[IO] the destination IO for the ZIP (should respond to `<<`)
111
+ # @param stream[IO] the destination IO for the ZIP. Anything that responds to `<<` can be used.
86
112
  # @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
87
113
  # Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
88
114
  def initialize(stream, writer: create_writer)
89
- raise InvalidOutput, "The stream must respond to #<<" unless stream.respond_to?(:<<)
90
- unless stream.respond_to?(:tell) && stream.respond_to?(:advance_position_by)
91
- stream = ZipTricks::WriteAndTell.new(stream)
92
- end
115
+ raise InvalidOutput, 'The stream must respond to #<<' unless stream.respond_to?(:<<)
93
116
 
94
- @out = stream
117
+ @out = ZipTricks::WriteAndTell.new(stream)
95
118
  @files = []
96
119
  @local_header_offsets = []
97
120
  @filenames_set = Set.new
@@ -112,114 +135,189 @@ class ZipTricks::Streamer
112
135
  # `IO.copy_stream(from, to)`.
113
136
  #
114
137
  # @param binary_data [String] a String in binary encoding
115
- # @return [Fixnum] the number of bytes written
138
+ # @return [Integer] the number of bytes written
116
139
  def write(binary_data)
117
140
  @out << binary_data
118
141
  binary_data.bytesize
119
142
  end
120
143
 
121
- # Advances the internal IO pointer to keep the offsets of the ZIP file in check. Use this if you are going
122
- # to use accelerated writes to the socket (like the `sendfile()` call) after writing the headers, or if you
144
+ # Advances the internal IO pointer to keep the offsets of the ZIP file in
145
+ # check. Use this if you are going to use accelerated writes to the socket
146
+ # (like the `sendfile()` call) after writing the headers, or if you
123
147
  # just need to figure out the size of the archive.
124
148
  #
125
- # @param num_bytes [Numeric] how many bytes are going to be written bypassing the Streamer
126
- # @return [Numeric] position in the output stream / ZIP archive
149
+ # @param num_bytes [Integer] how many bytes are going to be written bypassing the Streamer
150
+ # @return [Integer] position in the output stream / ZIP archive
127
151
  def simulate_write(num_bytes)
128
152
  @out.advance_position_by(num_bytes)
129
153
  @out.tell
130
154
  end
131
155
 
132
- # Writes out the local header for an entry (file in the ZIP) that is using the deflated storage model (is compressed).
133
- # Once this method is called, the `<<` method has to be called to write the actual contents of the body.
156
+ # Writes out the local header for an entry (file in the ZIP) that is using
157
+ # the deflated storage model (is compressed). Once this method is called,
158
+ # the `<<` method has to be called to write the actual contents of the body.
134
159
  #
135
- # Note that the deflated body that is going to be written into the output has to be _precompressed_ (pre-deflated)
136
- # before writing it into the Streamer, because otherwise it is impossible to know it's size upfront.
160
+ # Note that the deflated body that is going to be written into the output
161
+ # has to be _precompressed_ (pre-deflated) before writing it into the
162
+ # Streamer, because otherwise it is impossible to know it's size upfront.
137
163
  #
138
164
  # @param filename [String] the name of the file in the entry
139
- # @param compressed_size [Fixnum] the size of the compressed entry that is going to be written into the archive
140
- # @param uncompressed_size [Fixnum] the size of the entry when uncompressed, in bytes
141
- # @param crc32 [Fixnum] the CRC32 checksum of the entry when uncompressed
142
- # @return [Fixnum] the offset the output IO is at after writing the entry header
143
- def add_compressed_entry(filename:, compressed_size:, uncompressed_size:, crc32:)
144
- add_file_and_write_local_header(filename: filename, crc32: crc32, storage_mode: DEFLATED,
145
- compressed_size: compressed_size, uncompressed_size: uncompressed_size)
165
+ # @param compressed_size [Integer] the size of the compressed entry that
166
+ # is going to be written into the archive
167
+ # @param uncompressed_size [Integer] the size of the entry when uncompressed, in bytes
168
+ # @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
169
+ # @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor
170
+ # @return [Integer] the offset the output IO is at after writing the entry header
171
+ def add_deflated_entry(filename:, compressed_size: 0, uncompressed_size: 0, crc32: 0, use_data_descriptor: false)
172
+ add_file_and_write_local_header(filename: filename, crc32: crc32,
173
+ storage_mode: DEFLATED,
174
+ compressed_size: compressed_size,
175
+ uncompressed_size: uncompressed_size,
176
+ use_data_descriptor: use_data_descriptor)
146
177
  @out.tell
147
178
  end
148
179
 
149
- # Writes out the local header for an entry (file in the ZIP) that is using the stored storage model (is stored as-is).
150
- # Once this method is called, the `<<` method has to be called one or more times to write the actual contents of the body.
180
+ # Will be phased out in ZipTricks 5.x
181
+ alias_method :add_compressed_entry, :add_deflated_entry
182
+
183
+ # Writes out the local header for an entry (file in the ZIP) that is using
184
+ # the stored storage model (is stored as-is).
185
+ # Once this method is called, the `<<` method has to be called one or more
186
+ # times to write the actual contents of the body.
151
187
  #
152
188
  # @param filename [String] the name of the file in the entry
153
- # @param size [Fixnum] the size of the file when uncompressed, in bytes
154
- # @param crc32 [Fixnum] the CRC32 checksum of the entry when uncompressed
155
- # @return [Fixnum] the offset the output IO is at after writing the entry header
156
- def add_stored_entry(filename:, size:, crc32:)
157
- add_file_and_write_local_header(filename: filename, crc32: crc32, storage_mode: STORED,
158
- compressed_size: size, uncompressed_size: size)
189
+ # @param size [Integer] the size of the file when uncompressed, in bytes
190
+ # @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
191
+ # @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor. When in use
192
+ # @return [Integer] the offset the output IO is at after writing the entry header
193
+ def add_stored_entry(filename:, size: 0, crc32: 0, use_data_descriptor: false)
194
+ add_file_and_write_local_header(filename: filename,
195
+ crc32: crc32,
196
+ storage_mode: STORED,
197
+ compressed_size: size,
198
+ uncompressed_size: size,
199
+ use_data_descriptor: use_data_descriptor)
159
200
  @out.tell
160
201
  end
161
202
 
162
203
  # Adds an empty directory to the archive with a size of 0 and permissions of 755.
163
204
  #
164
205
  # @param dirname [String] the name of the directory in the archive
165
- # @return [Fixnum] the offset the output IO is at after writing the entry header
206
+ # @return [Integer] the offset the output IO is at after writing the entry header
166
207
  def add_empty_directory(dirname:)
167
- add_file_and_write_local_header(filename: "#{dirname}" + "/", crc32: 0, storage_mode: STORED,
168
- compressed_size: 0, uncompressed_size: 0)
208
+ add_file_and_write_local_header(filename: dirname.to_s + '/',
209
+ crc32: 0,
210
+ storage_mode: STORED,
211
+ compressed_size: 0,
212
+ uncompressed_size: 0,
213
+ use_data_descriptor: false)
169
214
  @out.tell
170
215
  end
171
-
172
- # Opens the stream for a stored file in the archive, and yields a writer for that file to the block.
173
- # Once the write completes, a data descriptor will be written with the actual compressed/uncompressed
174
- # sizes and the CRC32 checksum.
216
+
217
+ # Opens the stream for a stored file in the archive, and yields a writer
218
+ # for that file to the block.
219
+ # Once the write completes, a data descriptor will be written with the
220
+ # actual compressed/uncompressed sizes and the CRC32 checksum.
221
+ #
222
+ # Using a block, the write will be terminated with a data descriptor outright.
223
+ #
224
+ # zip.write_stored_file("foo.txt") do |sink|
225
+ # IO.copy_stream(source_file, sink)
226
+ # end
227
+ #
228
+ # If deferred writes are desired (for example - to integerate with an API that
229
+ # does not support blocks, or to work with non-blocking environments) the method
230
+ # has to be called without a block. In that case it returns the sink instead,
231
+ # permitting to write to it in a deferred fashion. When `close` is called on
232
+ # the sink, any remanining compression output will be flushed and the data
233
+ # descriptor is going to be written.
234
+ #
235
+ # Note that even though it does not have to happen within the same call stack,
236
+ # call sequencing still must be observed. It is therefore not possible to do
237
+ # this:
238
+ #
239
+ # writer_for_file1 = zip.write_stored_file("somefile.jpg")
240
+ # writer_for_file2 = zip.write_stored_file("another.tif")
241
+ # writer_for_file1 << data
242
+ # writer_for_file2 << data
243
+ #
244
+ # because it is likely to result in an invalid ZIP file structure later on.
245
+ # So using this facility in async scenarios is certainly possible, but care
246
+ # and attention is recommended.
175
247
  #
176
248
  # @param filename[String] the name of the file in the archive
177
- # @yield [#<<, #write] an object that the file contents must be written to
249
+ # @yield [#<<, #write] an object that the file contents must be written to that will be automatically closed
250
+ # @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
178
251
  def write_stored_file(filename)
179
- add_file_and_write_local_header(filename: filename, storage_mode: STORED,
180
- use_data_descriptor: true, crc32: 0, compressed_size: 0, uncompressed_size: 0)
181
-
182
- w = StoredWriter.new(@out)
183
- yield(Writable.new(w))
184
- crc, comp, uncomp = w.finish
185
-
186
- # Save the information into the entry for when the time comes to write out the central directory
187
- last_entry = @files.last
188
- last_entry.crc32 = crc
189
- last_entry.compressed_size = comp
190
- last_entry.uncompressed_size = uncomp
252
+ add_stored_entry(filename: filename,
253
+ use_data_descriptor: true,
254
+ crc32: 0,
255
+ size: 0)
191
256
 
192
- @writer.write_data_descriptor(io: @out, crc32: crc, compressed_size: comp, uncompressed_size: uncomp)
257
+ writable = Writable.new(self, StoredWriter.new(@out))
258
+ if block_given?
259
+ yield(writable)
260
+ writable.close
261
+ end
262
+ writable
193
263
  end
194
264
 
195
- # Opens the stream for a deflated file in the archive, and yields a writer for that file to the block.
196
- # Once the write completes, a data descriptor will be written with the actual compressed/uncompressed
197
- # sizes and the CRC32 checksum.
265
+ # Opens the stream for a deflated file in the archive, and yields a writer
266
+ # for that file to the block. Once the write completes, a data descriptor
267
+ # will be written with the actual compressed/uncompressed sizes and the
268
+ # CRC32 checksum.
269
+ #
270
+ # Using a block, the write will be terminated with a data descriptor outright.
271
+ #
272
+ # zip.write_stored_file("foo.txt") do |sink|
273
+ # IO.copy_stream(source_file, sink)
274
+ # end
275
+ #
276
+ # If deferred writes are desired (for example - to integerate with an API that
277
+ # does not support blocks, or to work with non-blocking environments) the method
278
+ # has to be called without a block. In that case it returns the sink instead,
279
+ # permitting to write to it in a deferred fashion. When `close` is called on
280
+ # the sink, any remanining compression output will be flushed and the data
281
+ # descriptor is going to be written.
282
+ #
283
+ # Note that even though it does not have to happen within the same call stack,
284
+ # call sequencing still must be observed. It is therefore not possible to do
285
+ # this:
286
+ #
287
+ # writer_for_file1 = zip.write_deflated_file("somefile.jpg")
288
+ # writer_for_file2 = zip.write_deflated_file("another.tif")
289
+ # writer_for_file1 << data
290
+ # writer_for_file2 << data
291
+ # writer_for_file1.close
292
+ # writer_for_file2.close
293
+ #
294
+ # because it is likely to result in an invalid ZIP file structure later on.
295
+ # So using this facility in async scenarios is certainly possible, but care
296
+ # and attention is recommended.
198
297
  #
199
298
  # @param filename[String] the name of the file in the archive
200
299
  # @yield [#<<, #write] an object that the file contents must be written to
201
300
  def write_deflated_file(filename)
202
- add_file_and_write_local_header(filename: filename, storage_mode: DEFLATED,
203
- use_data_descriptor: true, crc32: 0, compressed_size: 0, uncompressed_size: 0)
204
-
205
- w = DeflatedWriter.new(@out)
206
- yield(Writable.new(w))
207
- crc, comp, uncomp = w.finish
208
-
209
- # Save the information into the entry for when the time comes to write out the central directory
210
- last_entry = @files[-1]
211
- last_entry.crc32 = crc
212
- last_entry.compressed_size = comp
213
- last_entry.uncompressed_size = uncomp
214
- write_data_descriptor_for_last_entry
301
+ add_deflated_entry(filename: filename,
302
+ use_data_descriptor: true,
303
+ crc32: 0,
304
+ compressed_size: 0,
305
+ uncompressed_size: 0)
306
+
307
+ writable = Writable.new(self, DeflatedWriter.new(@out))
308
+ if block_given?
309
+ yield(writable)
310
+ writable.close
311
+ end
312
+ writable
215
313
  end
216
-
314
+
217
315
  # Closes the archive. Writes the central directory, and switches the writer into
218
316
  # a state where it can no longer be written to.
219
317
  #
220
318
  # Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
221
319
  #
222
- # @return [Fixnum] the offset the output IO is at after closing the archive
320
+ # @return [Integer] the offset the output IO is at after closing the archive
223
321
  def close
224
322
  # Record the central directory offset, so that it can be written into the EOCD record
225
323
  cdir_starts_at = @out.tell
@@ -227,18 +325,31 @@ class ZipTricks::Streamer
227
325
  # Write out the central directory entries, one for each file
228
326
  @files.each_with_index do |entry, i|
229
327
  header_loc = @local_header_offsets.fetch(i)
230
- @writer.write_central_directory_file_header(io: @out, local_file_header_location: header_loc,
231
- gp_flags: entry.gp_flags, storage_mode: entry.storage_mode,
232
- compressed_size: entry.compressed_size, uncompressed_size: entry.uncompressed_size,
233
- mtime: entry.mtime, crc32: entry.crc32, filename: entry.filename) #, external_attrs: DEFAULT_EXTERNAL_ATTRS)
328
+ @writer.write_central_directory_file_header(io: @out,
329
+ local_file_header_location: header_loc,
330
+ gp_flags: entry.gp_flags,
331
+ storage_mode: entry.storage_mode,
332
+ compressed_size: entry.compressed_size,
333
+ uncompressed_size: entry.uncompressed_size,
334
+ mtime: entry.mtime,
335
+ crc32: entry.crc32,
336
+ filename: entry.filename)
234
337
  end
235
338
 
236
339
  # Record the central directory size, for the EOCDR
237
340
  cdir_size = @out.tell - cdir_starts_at
238
341
 
239
342
  # Write out the EOCDR
240
- @writer. write_end_of_central_directory(io: @out, start_of_central_directory_location: cdir_starts_at,
241
- central_directory_size: cdir_size, num_files_in_archive: @files.length)
343
+ @writer.write_end_of_central_directory(io: @out,
344
+ start_of_central_directory_location: cdir_starts_at,
345
+ central_directory_size: cdir_size,
346
+ num_files_in_archive: @files.length)
347
+
348
+ # Clear the files so that GC will not have to trace all the way to here to deallocate them
349
+ @files.clear
350
+ @filenames_set.clear
351
+
352
+ # and return the final offset
242
353
  @out.tell
243
354
  end
244
355
 
@@ -251,29 +362,73 @@ class ZipTricks::Streamer
251
362
  ZipTricks::ZipWriter.new
252
363
  end
253
364
 
365
+ # Updates the last entry written with the CRC32 checksum and compressed/uncompressed
366
+ # sizes. For stored entries, `compressed_size` and `uncompressed_size` are the same.
367
+ # After updating the entry will immediately write the data descriptor bytes
368
+ # to the output.
369
+ #
370
+ # @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
371
+ # @param compressed_size [Integer] the size of the compressed segment within the ZIP
372
+ # @param uncompressed_size [Integer] the size of the entry once uncompressed
373
+ # @return [Integer] the offset the output IO is at after writing the data descriptor
374
+ def update_last_entry_and_write_data_descriptor(crc32:, compressed_size:, uncompressed_size:)
375
+ # Save the information into the entry for when the time comes to write
376
+ # out the central directory
377
+ last_entry = @files.fetch(-1)
378
+ last_entry.crc32 = crc32
379
+ last_entry.compressed_size = compressed_size
380
+ last_entry.uncompressed_size = uncompressed_size
381
+
382
+ @writer.write_data_descriptor(io: @out,
383
+ crc32: last_entry.crc32,
384
+ compressed_size: last_entry.compressed_size,
385
+ uncompressed_size: last_entry.uncompressed_size)
386
+ @out.tell
387
+ end
388
+
254
389
  private
255
390
 
256
- def add_file_and_write_local_header(filename:, crc32:, storage_mode:, compressed_size:,
257
- uncompressed_size:, use_data_descriptor: false)
391
+ def add_file_and_write_local_header(filename:,
392
+ crc32:,
393
+ storage_mode:,
394
+ compressed_size:,
395
+ uncompressed_size:,
396
+ use_data_descriptor:)
258
397
 
259
398
  # Clean backslashes and uniqify filenames if there are duplicates
260
399
  filename = remove_backslash(filename)
261
400
  filename = uniquify_name(filename) if @filenames_set.include?(filename)
262
401
 
263
- raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
264
- raise Overflow, "Filename is too long" if filename.bytesize > 0xFFFF
402
+ unless [STORED, DEFLATED].include?(storage_mode)
403
+ raise UnknownMode, "Unknown compression mode #{storage_mode}"
404
+ end
405
+
406
+ raise Overflow, 'Filename is too long' if filename.bytesize > 0xFFFF
265
407
 
266
- e = Entry.new(filename, crc32, compressed_size, uncompressed_size, storage_mode, mtime=Time.now.utc, use_data_descriptor)
408
+ if use_data_descriptor
409
+ crc32 = 0
410
+ compressed_size = 0
411
+ uncompressed_size = 0
412
+ end
413
+
414
+ e = Entry.new(filename,
415
+ crc32,
416
+ compressed_size,
417
+ uncompressed_size,
418
+ storage_mode,
419
+ mtime = Time.now.utc,
420
+ use_data_descriptor)
267
421
  @files << e
268
422
  @filenames_set << e.filename
269
423
  @local_header_offsets << @out.tell
270
- @writer.write_local_file_header(io: @out, gp_flags: e.gp_flags, crc32: e.crc32, compressed_size: e.compressed_size,
271
- uncompressed_size: e.uncompressed_size, mtime: e.mtime, filename: e.filename, storage_mode: e.storage_mode)
272
- end
273
-
274
- def write_data_descriptor_for_last_entry
275
- e = @files.fetch(-1)
276
- @writer.write_data_descriptor(io: @out, crc32: 0, compressed_size: e.compressed_size, uncompressed_size: e.uncompressed_size)
424
+ @writer.write_local_file_header(io: @out,
425
+ gp_flags: e.gp_flags,
426
+ crc32: e.crc32,
427
+ compressed_size: e.compressed_size,
428
+ uncompressed_size: e.uncompressed_size,
429
+ mtime: e.mtime,
430
+ filename: e.filename,
431
+ storage_mode: e.storage_mode)
277
432
  end
278
433
 
279
434
  def remove_backslash(filename)
@@ -281,8 +436,9 @@ class ZipTricks::Streamer
281
436
  end
282
437
 
283
438
  def uniquify_name(filename)
284
- copy_pattern = /\((\d+)\)$/ # we add (1), (2), (n) at the end of a filename if there is a duplicate
285
- parts = filename.split(".")
439
+ # we add (1), (2), (n) at the end of a filename if there is a duplicate
440
+ copy_pattern = /\((\d+)\)$/
441
+ parts = filename.split('.')
286
442
  ext = if parts.last =~ /gz|zip/ && parts.size > 2
287
443
  parts.pop(2)
288
444
  elsif parts.size > 1
@@ -292,12 +448,12 @@ class ZipTricks::Streamer
292
448
 
293
449
  duplicate_counter = 1
294
450
  loop do
295
- if fn_last_part =~ copy_pattern
296
- fn_last_part.sub!(copy_pattern, "(#{duplicate_counter})")
297
- else
298
- fn_last_part = "#{fn_last_part} (#{duplicate_counter})"
299
- end
300
- new_filename = (parts + [fn_last_part, ext]).compact.join(".")
451
+ fn_last_part = if fn_last_part =~ copy_pattern
452
+ fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
453
+ else
454
+ "#{fn_last_part} (#{duplicate_counter})"
455
+ end
456
+ new_filename = (parts + [fn_last_part, ext]).compact.join('.')
301
457
  return new_filename unless @filenames_set.include?(new_filename)
302
458
  duplicate_counter += 1
303
459
  end