zip_tricks 2.8.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/IMPLEMENTATION_DETAILS.md +2 -10
  4. data/README.md +62 -59
  5. data/examples/archive_size_estimate.rb +4 -4
  6. data/examples/rack_application.rb +3 -5
  7. data/lib/zip_tricks/block_deflate.rb +21 -0
  8. data/lib/zip_tricks/file_reader.rb +491 -0
  9. data/lib/zip_tricks/null_writer.rb +7 -2
  10. data/lib/zip_tricks/rack_body.rb +3 -3
  11. data/lib/zip_tricks/remote_io.rb +30 -20
  12. data/lib/zip_tricks/remote_uncap.rb +10 -10
  13. data/lib/zip_tricks/size_estimator.rb +64 -0
  14. data/lib/zip_tricks/stream_crc32.rb +2 -2
  15. data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
  16. data/lib/zip_tricks/streamer/entry.rb +21 -0
  17. data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
  18. data/lib/zip_tricks/streamer/writable.rb +20 -0
  19. data/lib/zip_tricks/streamer.rb +172 -66
  20. data/lib/zip_tricks/zip_writer.rb +346 -0
  21. data/lib/zip_tricks.rb +1 -4
  22. data/spec/spec_helper.rb +1 -38
  23. data/spec/zip_tricks/file_reader_spec.rb +47 -0
  24. data/spec/zip_tricks/rack_body_spec.rb +2 -2
  25. data/spec/zip_tricks/remote_io_spec.rb +8 -20
  26. data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
  27. data/spec/zip_tricks/size_estimator_spec.rb +31 -0
  28. data/spec/zip_tricks/streamer_spec.rb +59 -36
  29. data/spec/zip_tricks/zip_writer_spec.rb +408 -0
  30. data/zip_tricks.gemspec +20 -14
  31. metadata +33 -16
  32. data/lib/zip_tricks/manifest.rb +0 -85
  33. data/lib/zip_tricks/microzip.rb +0 -339
  34. data/lib/zip_tricks/stored_size_estimator.rb +0 -44
  35. data/spec/zip_tricks/manifest_spec.rb +0 -60
  36. data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
  37. data/spec/zip_tricks/microzip_spec.rb +0 -546
  38. data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22
@@ -26,7 +26,7 @@ class ZipTricks::StreamCRC32
26
26
 
27
27
  # Returns the CRC32 value computed so far
28
28
  #
29
- # @return crc[Fixnum] the updated CRC32 value for all the blobs so far
29
+ # @return [Fixnum] the updated CRC32 value for all the blobs so far
30
30
  def to_i
31
31
  @crc
32
32
  end
@@ -36,7 +36,7 @@ class ZipTricks::StreamCRC32
36
36
  #
37
37
  # @param crc32[Fixnum] the CRC32 value to append
38
38
  # @param blob_size[Fixnum] the size of the daata the `crc32` is computed from
39
- # @return crc[Fixnum] the updated CRC32 value for all the blobs so far
39
+ # @return [Fixnum] the updated CRC32 value for all the blobs so far
40
40
  def append(crc32, blob_size)
41
41
  @crc = Zlib.crc32_combine(@crc, crc32, blob_size)
42
42
  end
@@ -0,0 +1,26 @@
1
+ class ZipTricks::Streamer::DeflatedWriter
2
+ def initialize(io)
3
+ @io = io
4
+ @uncompressed_size = 0
5
+ @started_at = @io.tell
6
+ @crc = ZipTricks::StreamCRC32.new
7
+ @bytes_since_last_flush = 0
8
+ end
9
+
10
+ def finish
11
+ ZipTricks::BlockDeflate.write_terminator(@io)
12
+ [@crc.to_i, @io.tell - @started_at, @uncompressed_size]
13
+ end
14
+
15
+ def <<(data)
16
+ @uncompressed_size += data.bytesize
17
+ @io << ZipTricks::BlockDeflate.deflate_chunk(data)
18
+ @crc << data
19
+ self
20
+ end
21
+
22
+ def write(data)
23
+ self << data
24
+ data.bytesize
25
+ end
26
+ end
@@ -0,0 +1,21 @@
1
+ # Is used internally by Streamer to keep track of entries in the archive during writing.
2
+ # Normally you will not have to use this class directly
3
+ class ZipTricks::Streamer::Entry < Struct.new(:filename, :crc32, :compressed_size, :uncompressed_size, :storage_mode, :mtime, :use_data_descriptor)
4
+ def initialize(*)
5
+ super
6
+ filename.force_encoding(Encoding::UTF_8)
7
+ @requires_efs_flag = !(filename.encode(Encoding::ASCII) rescue false)
8
+ end
9
+
10
+ # Set the general purpose flags for the entry. We care about is the EFS
11
+ # bit (bit 11) which should be set if the filename is UTF8. If it is, we need to set the
12
+ # bit so that the unarchiving application knows that the filename in the archive is UTF-8
13
+ # encoded, and not some DOS default. For ASCII entries it does not matter.
14
+ # Additionally, we care about bit 3 which toggles the use of the postfix data descriptor.
15
+ def gp_flags
16
+ flag = 0b00000000000
17
+ flag |= 0b100000000000 if @requires_efs_flag # bit 11
18
+ flag |= 0x0008 if use_data_descriptor # bit 3
19
+ flag
20
+ end
21
+ end
@@ -0,0 +1,25 @@
1
+ class ZipTricks::Streamer::StoredWriter
2
+ def initialize(io)
3
+ @io = io
4
+ @uncompressed_size = 0
5
+ @compressed_size = 0
6
+ @started_at = @io.tell
7
+ @crc = ZipTricks::StreamCRC32.new
8
+ end
9
+
10
+ def <<(data)
11
+ @io << data
12
+ @crc << data
13
+ self
14
+ end
15
+
16
+ def write(data)
17
+ self << data
18
+ data.bytesize
19
+ end
20
+
21
+ def finish
22
+ size = @io.tell - @started_at
23
+ [@crc.to_i, size, size]
24
+ end
25
+ end
@@ -0,0 +1,20 @@
1
+ # Gets yielded from the writing methods of the CompressingStreamer
2
+ # and accepts the data being written into the ZIP
3
+ class ZipTricks::Streamer::Writable
4
+ # Initializes a new Writable with the object it delegates the writes to.
5
+ # Normally you would not need to use this method directly
6
+ def initialize(writer)
7
+ @writer = writer
8
+ end
9
+ # Writes the given data to the output stream
10
+ #
11
+ # @param d[String] the binary string to write (part of the uncompressed file)
12
+ # @return [self]
13
+ def <<(d); @writer << d; self; end
14
+
15
+ # Writes the given data to the output stream
16
+ #
17
+ # @param d[String] the binary string to write (part of the uncompressed file)
18
+ # @return [Fixnum] the number of bytes written
19
+ def write(d); @writer << d; end
20
+ end
@@ -8,17 +8,61 @@
8
8
  # For stored entries, you need to know the CRC32 (as a uint) and the filesize upfront,
9
9
  # before the writing of the entry body starts.
10
10
  #
11
- # For compressed entries, you need to know the bytesize of the precompressed entry
12
- # as well.
11
+ # Any object that responds to `<<` can be used as the Streamer target - you can use
12
+ # a String, an Array, a Socket or a File, at your leisure.
13
+ #
14
+ # ## Using the Streamer with runtime compression
15
+ #
16
+ # You can use the Streamer with data descriptors (the CRC32 and the sizes will be
17
+ # written after the file data). This allows non-rewinding on-the-fly compression.
18
+ # If you are compressing large files, the Deflater object that the Streamer controls
19
+ # will be regularly flushed to prevent memory inflation.
20
+ #
21
+ # ZipTricks::Streamer.open(file_socket_or_string) do |zip|
22
+ # zip.write_stored_file('mov.mp4') do |sink|
23
+ # File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
24
+ # end
25
+ # zip.write_deflated_file('long-novel.txt') do |sink|
26
+ # File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
27
+ # end
28
+ # end
29
+ #
30
+ # The central directory will be written automatically at the end of the block.
31
+ #
32
+ # ## Using the Streamer with entries of known size and having a known CRC32 checksum
33
+ #
34
+ # Streamer allows "IO splicing" - in this mode it will only control the metadata output,
35
+ # but you can write the data to the socket/file outside of the Streamer. For example, when
36
+ # using the sendfile gem:
37
+ #
38
+ # ZipTricks::Streamer.open(socket) do | zip |
39
+ # zip.add_stored_entry(filename: "myfile1.bin", size: 9090821, crc32: 12485)
40
+ # zip.simulate_write(tempfile1.size)
41
+ # socket.sendfile(tempfile1)
42
+ # zip.add_stored_entry(filename: "myfile2.bin", size: 458678, crc32: 89568)
43
+ # zip.simulate_write(tempfile2.size)
44
+ # socket.sendfile(tempfile2)
45
+ # end
46
+ #
47
+ # Note that you need to use `simulate_write` to let the
48
+ # The central directory will be written automatically at the end of the block.
13
49
  class ZipTricks::Streamer
50
+ require_relative 'streamer/deflated_writer'
51
+ require_relative 'streamer/writable'
52
+ require_relative 'streamer/stored_writer'
53
+ require_relative 'streamer/entry'
54
+
55
+ STORED = 0
56
+ DEFLATED = 8
57
+
14
58
  EntryBodySizeMismatch = Class.new(StandardError)
15
59
  InvalidOutput = Class.new(ArgumentError)
60
+ Overflow = Class.new(StandardError)
61
+ PathError = Class.new(StandardError)
62
+ DuplicateFilenames = Class.new(StandardError)
63
+ UnknownMode = Class.new(StandardError)
16
64
 
17
- # Language encoding flag (EFS) bit (general purpose bit 11)
18
- EFS = 0b100000000000
19
-
20
- # Default general purpose flags for each entry.
21
- DEFAULT_GP_FLAGS = 0b00000000000
65
+ private_constant :DeflatedWriter, :StoredWriter, :STORED, :DEFLATED
22
66
 
23
67
  # Creates a new Streamer on top of the given IO-ish object and yields it. Once the given block
24
68
  # returns, the Streamer will have it's `close` method called, which will write out the central
@@ -34,21 +78,17 @@ class ZipTricks::Streamer
34
78
 
35
79
  # Creates a new Streamer on top of the given IO-ish object.
36
80
  #
37
- # @param stream [IO] the destination IO for the ZIP (should respond to `tell` and `<<`)
81
+ # @param stream [IO] the destination IO for the ZIP (should respond to `<<`)
38
82
  def initialize(stream)
39
- raise InvalidOutput, "The stream should respond to #<<" unless stream.respond_to?(:<<)
40
- stream = ZipTricks::WriteAndTell.new(stream) unless stream.respond_to?(:tell) && stream.respond_to?(:advance_position_by)
41
-
42
- @output_stream = stream
43
- @zip = ZipTricks::Microzip.new
83
+ raise InvalidOutput, "The stream must respond to #<<" unless stream.respond_to?(:<<)
84
+ unless stream.respond_to?(:tell) && stream.respond_to?(:advance_position_by)
85
+ stream = ZipTricks::WriteAndTell.new(stream)
86
+ end
44
87
 
45
- @state_monitor = VeryTinyStateMachine.new(:before_entry, callbacks_to=self)
46
- @state_monitor.permit_state :in_entry_header, :in_entry_body, :in_central_directory, :closed
47
- @state_monitor.permit_transition :before_entry => :in_entry_header
48
- @state_monitor.permit_transition :in_entry_header => :in_entry_body
49
- @state_monitor.permit_transition :in_entry_body => :in_entry_header
50
- @state_monitor.permit_transition :in_entry_body => :in_central_directory
51
- @state_monitor.permit_transition :in_central_directory => :closed
88
+ @out = stream
89
+ @files = []
90
+ @local_header_offsets = []
91
+ @writer = create_writer
52
92
  end
53
93
 
54
94
  # Writes a part of a zip entry body (actual binary data of the entry) into the output stream.
@@ -56,9 +96,7 @@ class ZipTricks::Streamer
56
96
  # @param binary_data [String] a String in binary encoding
57
97
  # @return self
58
98
  def <<(binary_data)
59
- @state_monitor.transition_or_maintain! :in_entry_body
60
- @output_stream << binary_data
61
- @bytes_written_for_entry += binary_data.bytesize
99
+ @out << binary_data
62
100
  self
63
101
  end
64
102
 
@@ -69,7 +107,7 @@ class ZipTricks::Streamer
69
107
  # @param binary_data [String] a String in binary encoding
70
108
  # @return [Fixnum] the number of bytes written
71
109
  def write(binary_data)
72
- self << binary_data
110
+ @out << binary_data
73
111
  binary_data.bytesize
74
112
  end
75
113
 
@@ -80,10 +118,8 @@ class ZipTricks::Streamer
80
118
  # @param num_bytes [Numeric] how many bytes are going to be written bypassing the Streamer
81
119
  # @return [Numeric] position in the output stream / ZIP archive
82
120
  def simulate_write(num_bytes)
83
- @state_monitor.transition_or_maintain! :in_entry_body
84
- @output_stream.advance_position_by(num_bytes)
85
- @bytes_written_for_entry += num_bytes
86
- @output_stream.tell
121
+ @out.advance_position_by(num_bytes)
122
+ @out.tell
87
123
  end
88
124
 
89
125
  # Writes out the local header for an entry (file in the ZIP) that is using the deflated storage model (is compressed).
@@ -92,67 +128,137 @@ class ZipTricks::Streamer
92
128
  # Note that the deflated body that is going to be written into the output has to be _precompressed_ (pre-deflated)
93
129
  # before writing it into the Streamer, because otherwise it is impossible to know it's size upfront.
94
130
  #
95
- # @param entry_name [String] the name of the file in the entry
131
+ # @param filename [String] the name of the file in the entry
132
+ # @param compressed_size [Fixnum] the size of the compressed entry that is going to be written into the archive
96
133
  # @param uncompressed_size [Fixnum] the size of the entry when uncompressed, in bytes
97
134
  # @param crc32 [Fixnum] the CRC32 checksum of the entry when uncompressed
98
- # @param compressed_size [Fixnum] the size of the compressed entry that is going to be written into the archive
99
135
  # @return [Fixnum] the offset the output IO is at after writing the entry header
100
- def add_compressed_entry(entry_name, uncompressed_size, crc32, compressed_size)
101
- @state_monitor.transition! :in_entry_header
102
- @zip.add_local_file_header(io: @output_stream, filename: entry_name, crc32: crc32,
103
- compressed_size: compressed_size, uncompressed_size: uncompressed_size, storage_mode: ZipTricks::Microzip::DEFLATED)
104
- @expected_bytes_for_entry = compressed_size
105
- @bytes_written_for_entry = 0
106
- @output_stream.tell
136
+ def add_compressed_entry(filename:, compressed_size:, uncompressed_size:, crc32:)
137
+ add_file_and_write_local_header(filename: filename, crc32: crc32, storage_mode: DEFLATED,
138
+ compressed_size: compressed_size, uncompressed_size: uncompressed_size)
139
+ @out.tell
107
140
  end
108
141
 
109
142
  # Writes out the local header for an entry (file in the ZIP) that is using the stored storage model (is stored as-is).
110
143
  # Once this method is called, the `<<` method has to be called one or more times to write the actual contents of the body.
111
144
  #
112
- # @param entry_name [String] the name of the file in the entry
113
- # @param uncompressed_size [Fixnum] the size of the entry when uncompressed, in bytes
145
+ # @param filename [String] the name of the file in the entry
146
+ # @param size [Fixnum] the size of the file when uncompressed, in bytes
114
147
  # @param crc32 [Fixnum] the CRC32 checksum of the entry when uncompressed
115
148
  # @return [Fixnum] the offset the output IO is at after writing the entry header
116
- def add_stored_entry(entry_name, uncompressed_size, crc32)
117
- @state_monitor.transition! :in_entry_header
118
- @zip.add_local_file_header(io: @output_stream, filename: entry_name, crc32: crc32,
119
- compressed_size: uncompressed_size, uncompressed_size: uncompressed_size, storage_mode: ZipTricks::Microzip::STORED)
120
- @bytes_written_for_entry = 0
121
- @expected_bytes_for_entry = uncompressed_size
122
- @output_stream.tell
149
+ def add_stored_entry(filename:, size:, crc32:)
150
+ add_file_and_write_local_header(filename: filename, crc32: crc32, storage_mode: STORED,
151
+ compressed_size: size, uncompressed_size: size)
152
+ @out.tell
123
153
  end
124
154
 
125
- # Writes out the global footer and the directory entry header and the global directory of the ZIP
126
- # archive using the information about the entries added using `add_stored_entry` and `add_compressed_entry`.
155
+ # Opens the stream for a stored file in the archive, and yields a writer for that file to the block.
156
+ # Once the write completes, a data descriptor will be written with the actual compressed/uncompressed
157
+ # sizes and the CRC32 checksum.
127
158
  #
128
- # Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
159
+ # @param filename[String] the name of the file in the archive
160
+ # @yield [#<<, #write] an object that the file contents must be written to
161
+ def write_stored_file(filename)
162
+ add_file_and_write_local_header(filename: filename, storage_mode: STORED,
163
+ use_data_descriptor: true, crc32: 0, compressed_size: 0, uncompressed_size: 0)
164
+
165
+ w = StoredWriter.new(@out)
166
+ yield(Writable.new(w))
167
+ crc, comp, uncomp = w.finish
168
+
169
+ # Save the information into the entry for when the time comes to write out the central directory
170
+ last_entry = @files[-1]
171
+ last_entry.crc32 = crc
172
+ last_entry.compressed_size = comp
173
+ last_entry.uncompressed_size = uncomp
174
+
175
+ @writer.write_data_descriptor(io: @out, crc32: crc, compressed_size: comp, uncompressed_size: uncomp)
176
+ end
177
+
178
+ # Opens the stream for a deflated file in the archive, and yields a writer for that file to the block.
179
+ # Once the write completes, a data descriptor will be written with the actual compressed/uncompressed
180
+ # sizes and the CRC32 checksum.
129
181
  #
130
- # @return [Fixnum] the offset the output IO is at after writing the central directory
131
- def write_central_directory!
132
- @state_monitor.transition! :in_central_directory
133
- @zip.write_central_directory(@output_stream)
134
- @output_stream.tell
182
+ # @param filename[String] the name of the file in the archive
183
+ # @yield [#<<, #write] an object that the file contents must be written to
184
+ def write_deflated_file(filename)
185
+ add_file_and_write_local_header(filename: filename, storage_mode: DEFLATED,
186
+ use_data_descriptor: true, crc32: 0, compressed_size: 0, uncompressed_size: 0)
187
+
188
+ w = DeflatedWriter.new(@out)
189
+ yield(Writable.new(w))
190
+ crc, comp, uncomp = w.finish
191
+
192
+ # Save the information into the entry for when the time comes to write out the central directory
193
+ last_entry = @files[-1]
194
+ last_entry.crc32 = crc
195
+ last_entry.compressed_size = comp
196
+ last_entry.uncompressed_size = uncomp
197
+ write_data_descriptor_for_last_entry
135
198
  end
136
199
 
137
- # Closes the archive. Writes the central directory if it has not yet been written.
138
- # Switches the Streamer into a state where it can no longer be written to.
200
+ # Closes the archive. Writes the central directory, and switches the writer into
201
+ # a state where it can no longer be written to.
139
202
  #
140
203
  # Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
141
204
  #
142
205
  # @return [Fixnum] the offset the output IO is at after closing the archive
143
206
  def close
144
- write_central_directory! unless @state_monitor.in_state?(:in_central_directory)
145
- @state_monitor.transition! :closed
146
- @output_stream.tell
147
- end
207
+ # Record the central directory offset, so that it can be written into the EOCD record
208
+ cdir_starts_at = @out.tell
209
+
210
+ # Write out the central directory entries, one for each file
211
+ @files.each_with_index do |entry, i|
212
+ header_loc = @local_header_offsets.fetch(i)
213
+ @writer.write_central_directory_file_header(io: @out, local_file_header_location: header_loc,
214
+ gp_flags: entry.gp_flags, storage_mode: entry.storage_mode,
215
+ compressed_size: entry.compressed_size, uncompressed_size: entry.uncompressed_size,
216
+ mtime: entry.mtime, crc32: entry.crc32, filename: entry.filename) #, external_attrs: DEFAULT_EXTERNAL_ATTRS)
217
+ end
218
+
219
+ # Record the central directory size, for the EOCDR
220
+ cdir_size = @out.tell - cdir_starts_at
148
221
 
222
+ # Write out the EOCDR
223
+ @writer. write_end_of_central_directory(io: @out, start_of_central_directory_location: cdir_starts_at,
224
+ central_directory_size: cdir_size, num_files_in_archive: @files.length)
225
+ @out.tell
226
+ end
227
+
228
+ # Sets up the ZipWriter with wrappers if necessary. The method is called once, when the Streamer
229
+ # gets instantiated - the Writer then gets reused. This method is primarily there so that you
230
+ # can override it.
231
+ #
232
+ # @return [ZipTricks::ZipWriter] the writer to perform writes with
233
+ def create_writer
234
+ ZipTricks::ZipWriter.new
235
+ end
236
+
149
237
  private
150
-
151
- # Checks whether the number of bytes written conforms to the declared entry size
152
- def leaving_in_entry_body_state
153
- if @bytes_written_for_entry != @expected_bytes_for_entry
154
- msg = "Wrong number of bytes written for entry (expected %d, got %d)" % [@expected_bytes_for_entry, @bytes_written_for_entry]
155
- raise EntryBodySizeMismatch, msg
238
+
239
+ def add_file_and_write_local_header(filename:, crc32:, storage_mode:, compressed_size:,
240
+ uncompressed_size:, use_data_descriptor: false)
241
+ if @files.any?{|e| e.filename == filename }
242
+ raise DuplicateFilenames, "Filename #{filename.inspect} already used in the archive"
156
243
  end
244
+
245
+ raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
246
+ raise Overflow, "Filename is too long" if filename.bytesize > 0xFFFF
247
+ raise PathError, "Paths in ZIP may only contain forward slashes (UNIX separators)" if filename.include?('\\')
248
+
249
+ @check_compressed_size_after_leaving_body = !use_data_descriptor
250
+ @bytes_written_for_entry = 0
251
+ @expected_bytes_for_entry = compressed_size
252
+
253
+ e = Entry.new(filename, crc32, compressed_size, uncompressed_size, storage_mode, mtime=Time.now.utc, use_data_descriptor)
254
+ @files << e
255
+ @local_header_offsets << @out.tell
256
+ @writer.write_local_file_header(io: @out, gp_flags: e.gp_flags, crc32: e.crc32, compressed_size: e.compressed_size,
257
+ uncompressed_size: e.uncompressed_size, mtime: e.mtime, filename: e.filename, storage_mode: e.storage_mode)
258
+ end
259
+
260
+ def write_data_descriptor_for_last_entry
261
+ e = @files.fetch(-1)
262
+ @writer.write_data_descriptor(io: @out, crc32: 0, compressed_size: e.compressed_size, uncompressed_size: e.uncompressed_size)
157
263
  end
158
264
  end