zip_tricks 4.4.2 → 4.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rubocop: convention: Missing top-level class documentation comment.
1
4
  class ZipTricks::FileReader::InflatingReader
2
5
  def initialize(from_io, compressed_data_size)
3
6
  @io = from_io
@@ -6,7 +9,7 @@ class ZipTricks::FileReader::InflatingReader
6
9
  @zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
7
10
  end
8
11
 
9
- def extract(n_bytes=nil)
12
+ def extract(n_bytes = nil)
10
13
  n_bytes ||= (@compressed_data_size - @already_read)
11
14
 
12
15
  return if eof?
@@ -1,3 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rubocop: convention: Missing top-level class documentation comment.
1
4
  class ZipTricks::FileReader::StoredReader
2
5
  def initialize(from_io, compressed_data_size)
3
6
  @io = from_io
@@ -5,7 +8,7 @@ class ZipTricks::FileReader::StoredReader
5
8
  @already_read = 0
6
9
  end
7
10
 
8
- def extract(n_bytes=nil)
11
+ def extract(n_bytes = nil)
9
12
  n_bytes ||= (@compressed_data_size - @already_read)
10
13
 
11
14
  return if eof?
@@ -1,12 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Used when you need to supply a destination IO for some
2
4
  # write operations, but want to discard the data (like when
3
5
  # estimating the size of a ZIP)
4
6
  module ZipTricks::NullWriter
5
7
  # @param data[String] the data to write
6
8
  # @return [self]
7
- def self.<<(data); self; end
8
-
9
- # @param data[String] the data to write
10
- # @return [Fixnum] the amount of data that was supposed to be written
11
- def self.write(data); data.bytesize; end
9
+ def self.<<(_)
10
+ self
11
+ end
12
12
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Can be used as a Rack response body directly. Will yield
2
4
  # a {ZipTricks::Streamer} for adding entries to the archive and writing
3
5
  # zip entry bodies.
@@ -13,14 +15,16 @@ class ZipTricks::RackBody
13
15
  # estimator.add_stored_entry(filename: 'large.tif', size: 1289894)
14
16
  # end
15
17
  #
16
- # # Prepare the response body. The block will only be called when the response starts to be written.
18
+ # # Prepare the response body. The block will only be called when the
19
+ # response starts to be written.
17
20
  # body = ZipTricks::RackBody.new do | streamer |
18
21
  # streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
19
22
  # streamer << large_file.read(1024*1024) until large_file.eof?
20
23
  # ...
21
24
  # end
22
25
  #
23
- # return [200, {'Content-Type' => 'binary/octet-stream', 'Content-Length' => content_length.to_s}, body]
26
+ # return [200, {'Content-Type' => 'binary/octet-stream',
27
+ # 'Content-Length' => content_length.to_s}, body]
24
28
  def initialize(&blk)
25
29
  @archiving_block = blk
26
30
  end
@@ -36,6 +40,5 @@ class ZipTricks::RackBody
36
40
  # Does nothing because nothing has to be deallocated or canceled
37
41
  # even if the zip output is incomplete. The archive gets closed
38
42
  # automatically as part of {ZipTricks::Streamer.open}
39
- def close
40
- end
43
+ def close; end
41
44
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Should be included into a Rails controller (together with `ActionController::Live`)
2
4
  # for easy ZIP output from any action.
3
5
  module ZipTricks::RailsStreaming
@@ -10,7 +12,7 @@ module ZipTricks::RailsStreaming
10
12
  # Create a wrapper for the write call that quacks like something you
11
13
  # can << to, used by ZipTricks
12
14
  w = ZipTricks::BlockWrite.new { |chunk| response.stream.write(chunk) }
13
- ZipTricks::Streamer.open(w){|z| yield(z) }
15
+ ZipTricks::Streamer.open(w) { |z| yield(z) }
14
16
  ensure
15
17
  response.stream.close
16
18
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # An object that fakes just-enough of an IO to be dangerous
2
4
  # - or, more precisely, to be useful as a source for the FileReader
3
5
  # central directory parser. Effectively we substitute an IO object
@@ -20,23 +22,25 @@ class ZipTricks::RemoteIO
20
22
  @pos = clamp(0, offset, @remote_size)
21
23
  0 # always return 0!
22
24
  end
23
-
25
+
24
26
  # Emulates IO#size.
25
27
  #
26
28
  # @return [Fixnum] the size of the remote resource
27
29
  def size
28
30
  @remote_size ||= request_object_size
29
31
  end
30
-
32
+
31
33
  # Emulates IO#read, but requires the number of bytes to read
32
34
  # The read will be limited to the
33
35
  # size of the remote resource relative to the current offset in the IO,
34
36
  # so if you are at offset 0 in the IO of size 10, doing a `read(20)`
35
- # will only return you 10 bytes of result, and not raise any exceptions.
37
+ # will only return you 10 bytes of result, and not raise any exceptions.
36
38
  #
37
39
  # @param n_bytes[Fixnum, nil] how many bytes to read, or `nil` to read all the way to the end
38
40
  # @return [String] the read bytes
39
- def read(n_bytes=nil)
41
+ # Rubocop: convention: Assignment Branch Condition size for read is too high. [17.92/15]
42
+ # Rubocop: convention: Method has too many lines. [13/10]
43
+ def read(n_bytes = nil)
40
44
  @remote_size ||= request_object_size
41
45
 
42
46
  # If the resource is empty there is nothing to read
@@ -87,7 +91,7 @@ class ZipTricks::RemoteIO
87
91
 
88
92
  private
89
93
 
90
- def clamp(a,b,c)
94
+ def clamp(a, b, c)
91
95
  return a if b < a
92
96
  return c if b > c
93
97
  b
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Alows reading the central directory of a remote ZIP file without
2
4
  # downloading the entire file. The central directory provides the
3
5
  # offsets at which the actual file contents is located. You can then
@@ -6,12 +8,15 @@
6
8
  # Please read the security warning in `FileReader` _VERY CAREFULLY_
7
9
  # before you use this module.
8
10
  class ZipTricks::RemoteUncap
9
-
10
- # @param uri[String] the HTTP(S) URL to read the ZIP footer from
11
+ # @param uri[String] the HTTP(S) URL to read the ZIP footer from
11
12
  # @param reader_class[Class] which class to use for reading
12
- # @param options_for_zip_reader[Hash] any additional options to give to {ZipTricks::FileReader} when reading
13
- # @return [Array<ZipTricks::FileReader::ZipEntry>] metadata about the files within the remote archive
14
- def self.files_within_zip_at(uri, reader_class: ZipTricks::FileReader, **options_for_zip_reader)
13
+ # @param options_for_zip_reader[Hash] any additional options to give to
14
+ # {ZipTricks::FileReader} when reading
15
+ # @return [Array<ZipTricks::FileReader::ZipEntry>] metadata about the
16
+ # files within the remote archive
17
+ def self.files_within_zip_at(uri,
18
+ reader_class: ZipTricks::FileReader,
19
+ **options_for_zip_reader)
15
20
  fetcher = new(uri)
16
21
  fake_io = ZipTricks::RemoteIO.new(fetcher)
17
22
  reader = reader_class.new
@@ -1,48 +1,50 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Helps to estimate archive sizes
2
4
  class ZipTricks::SizeEstimator
3
5
  require_relative 'streamer'
4
-
5
- # Used to mark a couple of methods public
6
- class DetailStreamer < ::ZipTricks::Streamer
7
- public :add_file_and_write_local_header, :write_data_descriptor_for_last_entry
8
- end
9
- private_constant :DetailStreamer
10
-
6
+
11
7
  # Creates a new estimator with a Streamer object. Normally you should use
12
8
  # `estimate` instead an not use this method directly.
13
9
  def initialize(streamer)
14
10
  @streamer = streamer
15
11
  end
16
12
  private :initialize
17
-
13
+
18
14
  # Performs the estimate using fake archiving. It needs to know the sizes of the
19
15
  # entries upfront. Usage:
20
16
  #
21
17
  # expected_zip_size = SizeEstimator.estimate do | estimator |
22
18
  # estimator.add_stored_entry(filename: "file.doc", size: 898291)
23
- # estimator.add_compressed_entry(filename: "family.tif", uncompressed_size: 89281911, compressed_size: 121908)
19
+ # estimator.add_deflated_entry(filename: "family.tif",
20
+ # uncompressed_size: 89281911, compressed_size: 121908)
24
21
  # end
25
22
  #
26
- # @return [Fixnum] the size of the resulting archive, in bytes
23
+ # @return [Integer] the size of the resulting archive, in bytes
27
24
  # @yield [SizeEstimator] the estimator
28
25
  def self.estimate
29
- output_io = ZipTricks::WriteAndTell.new(ZipTricks::NullWriter)
30
- DetailStreamer.open(output_io) { |zip| yield(new(zip)) }
31
- output_io.tell
26
+ streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter)
27
+ estimator = new(streamer)
28
+ yield(estimator)
29
+ streamer.close # Returns the .tell of the contained IO
32
30
  end
33
31
 
34
32
  # Add a fake entry to the archive, to see how big it is going to be in the end.
35
33
  #
36
34
  # @param filename [String] the name of the file (filenames are variable-width in the ZIP)
37
35
  # @param size [Fixnum] size of the uncompressed entry
38
- # @param use_data_descriptor[Boolean] whether the entry uses a postfix data descriptor to specify size
36
+ # @param use_data_descriptor[Boolean] whether the entry uses a postfix
37
+ # data descriptor to specify size
39
38
  # @return self
40
39
  def add_stored_entry(filename:, size:, use_data_descriptor: false)
41
- udd = !!use_data_descriptor
42
- @streamer.add_file_and_write_local_header(filename: filename, crc32: 0, storage_mode: 0,
43
- compressed_size: size, uncompressed_size: size, use_data_descriptor: udd)
40
+ @streamer.add_stored_entry(filename: filename,
41
+ crc32: 0,
42
+ size: size,
43
+ use_data_descriptor: use_data_descriptor)
44
44
  @streamer.simulate_write(size)
45
- @streamer.write_data_descriptor_for_last_entry if udd
45
+ if use_data_descriptor
46
+ @streamer.update_last_entry_and_write_data_descriptor(crc32: 0, compressed_size: size, uncompressed_size: size)
47
+ end
46
48
  self
47
49
  end
48
50
 
@@ -51,24 +53,34 @@ class ZipTricks::SizeEstimator
51
53
  # @param filename [String] the name of the file (filenames are variable-width in the ZIP)
52
54
  # @param uncompressed_size [Fixnum] size of the uncompressed entry
53
55
  # @param compressed_size [Fixnum] size of the compressed entry
54
- # @param use_data_descriptor[Boolean] whether the entry uses a postfix data descriptor to specify size
56
+ # @param use_data_descriptor[Boolean] whether the entry uses a postfix data
57
+ # descriptor to specify size
55
58
  # @return self
56
- def add_compressed_entry(filename:, uncompressed_size:, compressed_size:, use_data_descriptor: false)
57
- udd = !!use_data_descriptor
58
- @streamer.add_file_and_write_local_header(filename: filename, crc32: 0, storage_mode: 8,
59
- compressed_size: compressed_size, uncompressed_size: uncompressed_size, use_data_descriptor: udd)
59
+ def add_deflated_entry(filename:, uncompressed_size:, compressed_size:, use_data_descriptor: false)
60
+ @streamer.add_deflated_entry(filename: filename,
61
+ crc32: 0,
62
+ compressed_size: compressed_size,
63
+ uncompressed_size: uncompressed_size,
64
+ use_data_descriptor: use_data_descriptor)
65
+
60
66
  @streamer.simulate_write(compressed_size)
61
- @streamer.write_data_descriptor_for_last_entry if udd
67
+ if use_data_descriptor
68
+ @streamer.update_last_entry_and_write_data_descriptor(crc32: 0,
69
+ compressed_size: compressed_size,
70
+ uncompressed_size: uncompressed_size)
71
+ end
62
72
  self
63
73
  end
64
-
74
+
75
+ # Will be phased out in ZipTricks 5.x
76
+ alias_method :add_compressed_entry, :add_deflated_entry
77
+
65
78
  # Add an empty directory to the archive.
66
79
  #
67
80
  # @param dirname [String] the name of the directory
68
81
  # @return self
69
82
  def add_empty_directory_entry(dirname:)
70
- @streamer.add_file_and_write_local_header(filename: "#{dirname}" + "/", crc32: 0, storage_mode: 8,
71
- compressed_size: 0, uncompressed_size: 0)
83
+ @streamer.add_empty_directory(dirname: dirname)
72
84
  self
73
85
  end
74
86
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # A simple stateful class for keeping track of a CRC32 value through multiple writes
2
4
  class ZipTricks::StreamCRC32
3
5
  # Compute a CRC32 value from an IO object. The object should respond to `read` and `eof?`
@@ -1,6 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
1
5
  # Is used to write streamed ZIP archives into the provided IO-ish object.
2
6
  # The output IO is never going to be rewound or seeked, so the output
3
- # of this object can be coupled directly to, say, a Rack output.
7
+ # of this object can be coupled directly to, say, a Rack output. The
8
+ # output can also be a String, Array or anything that responds to `<<`.
4
9
  #
5
10
  # Allows for splicing raw files (for "stored" entries without compression)
6
11
  # and splicing of deflated files (for "deflated" storage mode).
@@ -16,7 +21,7 @@
16
21
  # You can use the Streamer with data descriptors (the CRC32 and the sizes will be
17
22
  # written after the file data). This allows non-rewinding on-the-fly compression.
18
23
  # If you are compressing large files, the Deflater object that the Streamer controls
19
- # will be regularly flushed to prevent memory inflation.
24
+ # will be regularly flushed to prevent memory inflation.
20
25
  #
21
26
  # ZipTricks::Streamer.open(file_socket_or_string) do |zip|
22
27
  # zip.write_stored_file('mov.mp4') do |sink|
@@ -51,7 +56,28 @@
51
56
  # so far. When using `sendfile` the Ruby write methods get bypassed entirely, and the
52
57
  # offsets in the IO will not be updated - which will result in an invalid ZIP.
53
58
  #
54
- # The central directory will be written automatically at the end of the `open` block.
59
+ #
60
+ # ## On-the-fly deflate -using the Streamer with async/suspended writes and data descriptors
61
+ #
62
+ # If you are unable to use the block versions of `write_deflated_file` and `write_stored_file`
63
+ # there is an option to use a separate writer object. It gets returned from `write_deflated_file`
64
+ # and `write_stored_file` if you do not provide them with a block, and will accept data writes.
65
+ #
66
+ # ZipTricks::Streamer.open(socket) do | zip |
67
+ # w = zip.write_stored_file('mov.mp4')
68
+ # w << data
69
+ # w.close
70
+ # end
71
+ #
72
+ # The central directory will be written automatically at the end of the `open` block. If you need
73
+ # to manage the Streamer manually, or defer the central directory write until appropriate, use
74
+ # the constructor instead and call `Streamer#close`:
75
+ #
76
+ # zip = ZipTricks::Streamer.new(out_io)
77
+ # .....
78
+ # zip.close
79
+ #
80
+ # Calling {Streamer#close} **will not** call `#close` on the underlying IO object.
55
81
  class ZipTricks::Streamer
56
82
  require_relative 'streamer/deflated_writer'
57
83
  require_relative 'streamer/writable'
@@ -82,16 +108,13 @@ class ZipTricks::Streamer
82
108
 
83
109
  # Creates a new Streamer on top of the given IO-ish object.
84
110
  #
85
- # @param stream[IO] the destination IO for the ZIP (should respond to `<<`)
111
+ # @param stream[IO] the destination IO for the ZIP. Anything that responds to `<<` can be used.
86
112
  # @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
87
113
  # Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
88
114
  def initialize(stream, writer: create_writer)
89
- raise InvalidOutput, "The stream must respond to #<<" unless stream.respond_to?(:<<)
90
- unless stream.respond_to?(:tell) && stream.respond_to?(:advance_position_by)
91
- stream = ZipTricks::WriteAndTell.new(stream)
92
- end
115
+ raise InvalidOutput, 'The stream must respond to #<<' unless stream.respond_to?(:<<)
93
116
 
94
- @out = stream
117
+ @out = ZipTricks::WriteAndTell.new(stream)
95
118
  @files = []
96
119
  @local_header_offsets = []
97
120
  @filenames_set = Set.new
@@ -112,114 +135,189 @@ class ZipTricks::Streamer
112
135
  # `IO.copy_stream(from, to)`.
113
136
  #
114
137
  # @param binary_data [String] a String in binary encoding
115
- # @return [Fixnum] the number of bytes written
138
+ # @return [Integer] the number of bytes written
116
139
  def write(binary_data)
117
140
  @out << binary_data
118
141
  binary_data.bytesize
119
142
  end
120
143
 
121
- # Advances the internal IO pointer to keep the offsets of the ZIP file in check. Use this if you are going
122
- # to use accelerated writes to the socket (like the `sendfile()` call) after writing the headers, or if you
144
+ # Advances the internal IO pointer to keep the offsets of the ZIP file in
145
+ # check. Use this if you are going to use accelerated writes to the socket
146
+ # (like the `sendfile()` call) after writing the headers, or if you
123
147
  # just need to figure out the size of the archive.
124
148
  #
125
- # @param num_bytes [Numeric] how many bytes are going to be written bypassing the Streamer
126
- # @return [Numeric] position in the output stream / ZIP archive
149
+ # @param num_bytes [Integer] how many bytes are going to be written bypassing the Streamer
150
+ # @return [Integer] position in the output stream / ZIP archive
127
151
  def simulate_write(num_bytes)
128
152
  @out.advance_position_by(num_bytes)
129
153
  @out.tell
130
154
  end
131
155
 
132
- # Writes out the local header for an entry (file in the ZIP) that is using the deflated storage model (is compressed).
133
- # Once this method is called, the `<<` method has to be called to write the actual contents of the body.
156
+ # Writes out the local header for an entry (file in the ZIP) that is using
157
+ # the deflated storage model (is compressed). Once this method is called,
158
+ # the `<<` method has to be called to write the actual contents of the body.
134
159
  #
135
- # Note that the deflated body that is going to be written into the output has to be _precompressed_ (pre-deflated)
136
- # before writing it into the Streamer, because otherwise it is impossible to know it's size upfront.
160
+ # Note that the deflated body that is going to be written into the output
161
+ # has to be _precompressed_ (pre-deflated) before writing it into the
162
+ # Streamer, because otherwise it is impossible to know it's size upfront.
137
163
  #
138
164
  # @param filename [String] the name of the file in the entry
139
- # @param compressed_size [Fixnum] the size of the compressed entry that is going to be written into the archive
140
- # @param uncompressed_size [Fixnum] the size of the entry when uncompressed, in bytes
141
- # @param crc32 [Fixnum] the CRC32 checksum of the entry when uncompressed
142
- # @return [Fixnum] the offset the output IO is at after writing the entry header
143
- def add_compressed_entry(filename:, compressed_size:, uncompressed_size:, crc32:)
144
- add_file_and_write_local_header(filename: filename, crc32: crc32, storage_mode: DEFLATED,
145
- compressed_size: compressed_size, uncompressed_size: uncompressed_size)
165
+ # @param compressed_size [Integer] the size of the compressed entry that
166
+ # is going to be written into the archive
167
+ # @param uncompressed_size [Integer] the size of the entry when uncompressed, in bytes
168
+ # @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
169
+ # @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor
170
+ # @return [Integer] the offset the output IO is at after writing the entry header
171
+ def add_deflated_entry(filename:, compressed_size: 0, uncompressed_size: 0, crc32: 0, use_data_descriptor: false)
172
+ add_file_and_write_local_header(filename: filename, crc32: crc32,
173
+ storage_mode: DEFLATED,
174
+ compressed_size: compressed_size,
175
+ uncompressed_size: uncompressed_size,
176
+ use_data_descriptor: use_data_descriptor)
146
177
  @out.tell
147
178
  end
148
179
 
149
- # Writes out the local header for an entry (file in the ZIP) that is using the stored storage model (is stored as-is).
150
- # Once this method is called, the `<<` method has to be called one or more times to write the actual contents of the body.
180
+ # Will be phased out in ZipTricks 5.x
181
+ alias_method :add_compressed_entry, :add_deflated_entry
182
+
183
+ # Writes out the local header for an entry (file in the ZIP) that is using
184
+ # the stored storage model (is stored as-is).
185
+ # Once this method is called, the `<<` method has to be called one or more
186
+ # times to write the actual contents of the body.
151
187
  #
152
188
  # @param filename [String] the name of the file in the entry
153
- # @param size [Fixnum] the size of the file when uncompressed, in bytes
154
- # @param crc32 [Fixnum] the CRC32 checksum of the entry when uncompressed
155
- # @return [Fixnum] the offset the output IO is at after writing the entry header
156
- def add_stored_entry(filename:, size:, crc32:)
157
- add_file_and_write_local_header(filename: filename, crc32: crc32, storage_mode: STORED,
158
- compressed_size: size, uncompressed_size: size)
189
+ # @param size [Integer] the size of the file when uncompressed, in bytes
190
+ # @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
191
+ # @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor. When in use
192
+ # @return [Integer] the offset the output IO is at after writing the entry header
193
+ def add_stored_entry(filename:, size: 0, crc32: 0, use_data_descriptor: false)
194
+ add_file_and_write_local_header(filename: filename,
195
+ crc32: crc32,
196
+ storage_mode: STORED,
197
+ compressed_size: size,
198
+ uncompressed_size: size,
199
+ use_data_descriptor: use_data_descriptor)
159
200
  @out.tell
160
201
  end
161
202
 
162
203
  # Adds an empty directory to the archive with a size of 0 and permissions of 755.
163
204
  #
164
205
  # @param dirname [String] the name of the directory in the archive
165
- # @return [Fixnum] the offset the output IO is at after writing the entry header
206
+ # @return [Integer] the offset the output IO is at after writing the entry header
166
207
  def add_empty_directory(dirname:)
167
- add_file_and_write_local_header(filename: "#{dirname}" + "/", crc32: 0, storage_mode: STORED,
168
- compressed_size: 0, uncompressed_size: 0)
208
+ add_file_and_write_local_header(filename: dirname.to_s + '/',
209
+ crc32: 0,
210
+ storage_mode: STORED,
211
+ compressed_size: 0,
212
+ uncompressed_size: 0,
213
+ use_data_descriptor: false)
169
214
  @out.tell
170
215
  end
171
-
172
- # Opens the stream for a stored file in the archive, and yields a writer for that file to the block.
173
- # Once the write completes, a data descriptor will be written with the actual compressed/uncompressed
174
- # sizes and the CRC32 checksum.
216
+
217
+ # Opens the stream for a stored file in the archive, and yields a writer
218
+ # for that file to the block.
219
+ # Once the write completes, a data descriptor will be written with the
220
+ # actual compressed/uncompressed sizes and the CRC32 checksum.
221
+ #
222
+ # Using a block, the write will be terminated with a data descriptor outright.
223
+ #
224
+ # zip.write_stored_file("foo.txt") do |sink|
225
+ # IO.copy_stream(source_file, sink)
226
+ # end
227
+ #
228
+ # If deferred writes are desired (for example - to integerate with an API that
229
+ # does not support blocks, or to work with non-blocking environments) the method
230
+ # has to be called without a block. In that case it returns the sink instead,
231
+ # permitting to write to it in a deferred fashion. When `close` is called on
232
+ # the sink, any remanining compression output will be flushed and the data
233
+ # descriptor is going to be written.
234
+ #
235
+ # Note that even though it does not have to happen within the same call stack,
236
+ # call sequencing still must be observed. It is therefore not possible to do
237
+ # this:
238
+ #
239
+ # writer_for_file1 = zip.write_stored_file("somefile.jpg")
240
+ # writer_for_file2 = zip.write_stored_file("another.tif")
241
+ # writer_for_file1 << data
242
+ # writer_for_file2 << data
243
+ #
244
+ # because it is likely to result in an invalid ZIP file structure later on.
245
+ # So using this facility in async scenarios is certainly possible, but care
246
+ # and attention is recommended.
175
247
  #
176
248
  # @param filename[String] the name of the file in the archive
177
- # @yield [#<<, #write] an object that the file contents must be written to
249
+ # @yield [#<<, #write] an object that the file contents must be written to that will be automatically closed
250
+ # @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
178
251
  def write_stored_file(filename)
179
- add_file_and_write_local_header(filename: filename, storage_mode: STORED,
180
- use_data_descriptor: true, crc32: 0, compressed_size: 0, uncompressed_size: 0)
181
-
182
- w = StoredWriter.new(@out)
183
- yield(Writable.new(w))
184
- crc, comp, uncomp = w.finish
185
-
186
- # Save the information into the entry for when the time comes to write out the central directory
187
- last_entry = @files.last
188
- last_entry.crc32 = crc
189
- last_entry.compressed_size = comp
190
- last_entry.uncompressed_size = uncomp
252
+ add_stored_entry(filename: filename,
253
+ use_data_descriptor: true,
254
+ crc32: 0,
255
+ size: 0)
191
256
 
192
- @writer.write_data_descriptor(io: @out, crc32: crc, compressed_size: comp, uncompressed_size: uncomp)
257
+ writable = Writable.new(self, StoredWriter.new(@out))
258
+ if block_given?
259
+ yield(writable)
260
+ writable.close
261
+ end
262
+ writable
193
263
  end
194
264
 
195
- # Opens the stream for a deflated file in the archive, and yields a writer for that file to the block.
196
- # Once the write completes, a data descriptor will be written with the actual compressed/uncompressed
197
- # sizes and the CRC32 checksum.
265
+ # Opens the stream for a deflated file in the archive, and yields a writer
266
+ # for that file to the block. Once the write completes, a data descriptor
267
+ # will be written with the actual compressed/uncompressed sizes and the
268
+ # CRC32 checksum.
269
+ #
270
+ # Using a block, the write will be terminated with a data descriptor outright.
271
+ #
272
+ # zip.write_stored_file("foo.txt") do |sink|
273
+ # IO.copy_stream(source_file, sink)
274
+ # end
275
+ #
276
+ # If deferred writes are desired (for example - to integerate with an API that
277
+ # does not support blocks, or to work with non-blocking environments) the method
278
+ # has to be called without a block. In that case it returns the sink instead,
279
+ # permitting to write to it in a deferred fashion. When `close` is called on
280
+ # the sink, any remanining compression output will be flushed and the data
281
+ # descriptor is going to be written.
282
+ #
283
+ # Note that even though it does not have to happen within the same call stack,
284
+ # call sequencing still must be observed. It is therefore not possible to do
285
+ # this:
286
+ #
287
+ # writer_for_file1 = zip.write_deflated_file("somefile.jpg")
288
+ # writer_for_file2 = zip.write_deflated_file("another.tif")
289
+ # writer_for_file1 << data
290
+ # writer_for_file2 << data
291
+ # writer_for_file1.close
292
+ # writer_for_file2.close
293
+ #
294
+ # because it is likely to result in an invalid ZIP file structure later on.
295
+ # So using this facility in async scenarios is certainly possible, but care
296
+ # and attention is recommended.
198
297
  #
199
298
  # @param filename[String] the name of the file in the archive
200
299
  # @yield [#<<, #write] an object that the file contents must be written to
201
300
  def write_deflated_file(filename)
202
- add_file_and_write_local_header(filename: filename, storage_mode: DEFLATED,
203
- use_data_descriptor: true, crc32: 0, compressed_size: 0, uncompressed_size: 0)
204
-
205
- w = DeflatedWriter.new(@out)
206
- yield(Writable.new(w))
207
- crc, comp, uncomp = w.finish
208
-
209
- # Save the information into the entry for when the time comes to write out the central directory
210
- last_entry = @files[-1]
211
- last_entry.crc32 = crc
212
- last_entry.compressed_size = comp
213
- last_entry.uncompressed_size = uncomp
214
- write_data_descriptor_for_last_entry
301
+ add_deflated_entry(filename: filename,
302
+ use_data_descriptor: true,
303
+ crc32: 0,
304
+ compressed_size: 0,
305
+ uncompressed_size: 0)
306
+
307
+ writable = Writable.new(self, DeflatedWriter.new(@out))
308
+ if block_given?
309
+ yield(writable)
310
+ writable.close
311
+ end
312
+ writable
215
313
  end
216
-
314
+
217
315
  # Closes the archive. Writes the central directory, and switches the writer into
218
316
  # a state where it can no longer be written to.
219
317
  #
220
318
  # Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
221
319
  #
222
- # @return [Fixnum] the offset the output IO is at after closing the archive
320
+ # @return [Integer] the offset the output IO is at after closing the archive
223
321
  def close
224
322
  # Record the central directory offset, so that it can be written into the EOCD record
225
323
  cdir_starts_at = @out.tell
@@ -227,18 +325,31 @@ class ZipTricks::Streamer
227
325
  # Write out the central directory entries, one for each file
228
326
  @files.each_with_index do |entry, i|
229
327
  header_loc = @local_header_offsets.fetch(i)
230
- @writer.write_central_directory_file_header(io: @out, local_file_header_location: header_loc,
231
- gp_flags: entry.gp_flags, storage_mode: entry.storage_mode,
232
- compressed_size: entry.compressed_size, uncompressed_size: entry.uncompressed_size,
233
- mtime: entry.mtime, crc32: entry.crc32, filename: entry.filename) #, external_attrs: DEFAULT_EXTERNAL_ATTRS)
328
+ @writer.write_central_directory_file_header(io: @out,
329
+ local_file_header_location: header_loc,
330
+ gp_flags: entry.gp_flags,
331
+ storage_mode: entry.storage_mode,
332
+ compressed_size: entry.compressed_size,
333
+ uncompressed_size: entry.uncompressed_size,
334
+ mtime: entry.mtime,
335
+ crc32: entry.crc32,
336
+ filename: entry.filename)
234
337
  end
235
338
 
236
339
  # Record the central directory size, for the EOCDR
237
340
  cdir_size = @out.tell - cdir_starts_at
238
341
 
239
342
  # Write out the EOCDR
240
- @writer. write_end_of_central_directory(io: @out, start_of_central_directory_location: cdir_starts_at,
241
- central_directory_size: cdir_size, num_files_in_archive: @files.length)
343
+ @writer.write_end_of_central_directory(io: @out,
344
+ start_of_central_directory_location: cdir_starts_at,
345
+ central_directory_size: cdir_size,
346
+ num_files_in_archive: @files.length)
347
+
348
+ # Clear the files so that GC will not have to trace all the way to here to deallocate them
349
+ @files.clear
350
+ @filenames_set.clear
351
+
352
+ # and return the final offset
242
353
  @out.tell
243
354
  end
244
355
 
@@ -251,29 +362,73 @@ class ZipTricks::Streamer
251
362
  ZipTricks::ZipWriter.new
252
363
  end
253
364
 
365
+ # Updates the last entry written with the CRC32 checksum and compressed/uncompressed
366
+ # sizes. For stored entries, `compressed_size` and `uncompressed_size` are the same.
367
+ # After updating the entry will immediately write the data descriptor bytes
368
+ # to the output.
369
+ #
370
+ # @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
371
+ # @param compressed_size [Integer] the size of the compressed segment within the ZIP
372
+ # @param uncompressed_size [Integer] the size of the entry once uncompressed
373
+ # @return [Integer] the offset the output IO is at after writing the data descriptor
374
+ def update_last_entry_and_write_data_descriptor(crc32:, compressed_size:, uncompressed_size:)
375
+ # Save the information into the entry for when the time comes to write
376
+ # out the central directory
377
+ last_entry = @files.fetch(-1)
378
+ last_entry.crc32 = crc32
379
+ last_entry.compressed_size = compressed_size
380
+ last_entry.uncompressed_size = uncompressed_size
381
+
382
+ @writer.write_data_descriptor(io: @out,
383
+ crc32: last_entry.crc32,
384
+ compressed_size: last_entry.compressed_size,
385
+ uncompressed_size: last_entry.uncompressed_size)
386
+ @out.tell
387
+ end
388
+
254
389
  private
255
390
 
256
- def add_file_and_write_local_header(filename:, crc32:, storage_mode:, compressed_size:,
257
- uncompressed_size:, use_data_descriptor: false)
391
+ def add_file_and_write_local_header(filename:,
392
+ crc32:,
393
+ storage_mode:,
394
+ compressed_size:,
395
+ uncompressed_size:,
396
+ use_data_descriptor:)
258
397
 
259
398
  # Clean backslashes and uniqify filenames if there are duplicates
260
399
  filename = remove_backslash(filename)
261
400
  filename = uniquify_name(filename) if @filenames_set.include?(filename)
262
401
 
263
- raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
264
- raise Overflow, "Filename is too long" if filename.bytesize > 0xFFFF
402
+ unless [STORED, DEFLATED].include?(storage_mode)
403
+ raise UnknownMode, "Unknown compression mode #{storage_mode}"
404
+ end
405
+
406
+ raise Overflow, 'Filename is too long' if filename.bytesize > 0xFFFF
265
407
 
266
- e = Entry.new(filename, crc32, compressed_size, uncompressed_size, storage_mode, mtime=Time.now.utc, use_data_descriptor)
408
+ if use_data_descriptor
409
+ crc32 = 0
410
+ compressed_size = 0
411
+ uncompressed_size = 0
412
+ end
413
+
414
+ e = Entry.new(filename,
415
+ crc32,
416
+ compressed_size,
417
+ uncompressed_size,
418
+ storage_mode,
419
+ mtime = Time.now.utc,
420
+ use_data_descriptor)
267
421
  @files << e
268
422
  @filenames_set << e.filename
269
423
  @local_header_offsets << @out.tell
270
- @writer.write_local_file_header(io: @out, gp_flags: e.gp_flags, crc32: e.crc32, compressed_size: e.compressed_size,
271
- uncompressed_size: e.uncompressed_size, mtime: e.mtime, filename: e.filename, storage_mode: e.storage_mode)
272
- end
273
-
274
- def write_data_descriptor_for_last_entry
275
- e = @files.fetch(-1)
276
- @writer.write_data_descriptor(io: @out, crc32: 0, compressed_size: e.compressed_size, uncompressed_size: e.uncompressed_size)
424
+ @writer.write_local_file_header(io: @out,
425
+ gp_flags: e.gp_flags,
426
+ crc32: e.crc32,
427
+ compressed_size: e.compressed_size,
428
+ uncompressed_size: e.uncompressed_size,
429
+ mtime: e.mtime,
430
+ filename: e.filename,
431
+ storage_mode: e.storage_mode)
277
432
  end
278
433
 
279
434
  def remove_backslash(filename)
@@ -281,8 +436,9 @@ class ZipTricks::Streamer
281
436
  end
282
437
 
283
438
  def uniquify_name(filename)
284
- copy_pattern = /\((\d+)\)$/ # we add (1), (2), (n) at the end of a filename if there is a duplicate
285
- parts = filename.split(".")
439
+ # we add (1), (2), (n) at the end of a filename if there is a duplicate
440
+ copy_pattern = /\((\d+)\)$/
441
+ parts = filename.split('.')
286
442
  ext = if parts.last =~ /gz|zip/ && parts.size > 2
287
443
  parts.pop(2)
288
444
  elsif parts.size > 1
@@ -292,12 +448,12 @@ class ZipTricks::Streamer
292
448
 
293
449
  duplicate_counter = 1
294
450
  loop do
295
- if fn_last_part =~ copy_pattern
296
- fn_last_part.sub!(copy_pattern, "(#{duplicate_counter})")
297
- else
298
- fn_last_part = "#{fn_last_part} (#{duplicate_counter})"
299
- end
300
- new_filename = (parts + [fn_last_part, ext]).compact.join(".")
451
+ fn_last_part = if fn_last_part =~ copy_pattern
452
+ fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
453
+ else
454
+ "#{fn_last_part} (#{duplicate_counter})"
455
+ end
456
+ new_filename = (parts + [fn_last_part, ext]).compact.join('.')
301
457
  return new_filename unless @filenames_set.include?(new_filename)
302
458
  duplicate_counter += 1
303
459
  end