zip_tricks 2.8.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/IMPLEMENTATION_DETAILS.md +2 -10
  4. data/README.md +62 -59
  5. data/examples/archive_size_estimate.rb +4 -4
  6. data/examples/rack_application.rb +3 -5
  7. data/lib/zip_tricks/block_deflate.rb +21 -0
  8. data/lib/zip_tricks/file_reader.rb +491 -0
  9. data/lib/zip_tricks/null_writer.rb +7 -2
  10. data/lib/zip_tricks/rack_body.rb +3 -3
  11. data/lib/zip_tricks/remote_io.rb +30 -20
  12. data/lib/zip_tricks/remote_uncap.rb +10 -10
  13. data/lib/zip_tricks/size_estimator.rb +64 -0
  14. data/lib/zip_tricks/stream_crc32.rb +2 -2
  15. data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
  16. data/lib/zip_tricks/streamer/entry.rb +21 -0
  17. data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
  18. data/lib/zip_tricks/streamer/writable.rb +20 -0
  19. data/lib/zip_tricks/streamer.rb +172 -66
  20. data/lib/zip_tricks/zip_writer.rb +346 -0
  21. data/lib/zip_tricks.rb +1 -4
  22. data/spec/spec_helper.rb +1 -38
  23. data/spec/zip_tricks/file_reader_spec.rb +47 -0
  24. data/spec/zip_tricks/rack_body_spec.rb +2 -2
  25. data/spec/zip_tricks/remote_io_spec.rb +8 -20
  26. data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
  27. data/spec/zip_tricks/size_estimator_spec.rb +31 -0
  28. data/spec/zip_tricks/streamer_spec.rb +59 -36
  29. data/spec/zip_tricks/zip_writer_spec.rb +408 -0
  30. data/zip_tricks.gemspec +20 -14
  31. metadata +33 -16
  32. data/lib/zip_tricks/manifest.rb +0 -85
  33. data/lib/zip_tricks/microzip.rb +0 -339
  34. data/lib/zip_tricks/stored_size_estimator.rb +0 -44
  35. data/spec/zip_tricks/manifest_spec.rb +0 -60
  36. data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
  37. data/spec/zip_tricks/microzip_spec.rb +0 -546
  38. data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22
@@ -26,7 +26,7 @@ class ZipTricks::StreamCRC32
26
26
 
27
27
  # Returns the CRC32 value computed so far
28
28
  #
29
- # @return crc[Fixnum] the updated CRC32 value for all the blobs so far
29
+ # @return [Fixnum] the updated CRC32 value for all the blobs so far
30
30
  def to_i
31
31
  @crc
32
32
  end
@@ -36,7 +36,7 @@ class ZipTricks::StreamCRC32
36
36
  #
37
37
  # @param crc32[Fixnum] the CRC32 value to append
38
38
  # @param blob_size[Fixnum] the size of the daata the `crc32` is computed from
39
- # @return crc[Fixnum] the updated CRC32 value for all the blobs so far
39
+ # @return [Fixnum] the updated CRC32 value for all the blobs so far
40
40
  def append(crc32, blob_size)
41
41
  @crc = Zlib.crc32_combine(@crc, crc32, blob_size)
42
42
  end
@@ -0,0 +1,26 @@
1
+ class ZipTricks::Streamer::DeflatedWriter
2
+ def initialize(io)
3
+ @io = io
4
+ @uncompressed_size = 0
5
+ @started_at = @io.tell
6
+ @crc = ZipTricks::StreamCRC32.new
7
+ @bytes_since_last_flush = 0
8
+ end
9
+
10
+ def finish
11
+ ZipTricks::BlockDeflate.write_terminator(@io)
12
+ [@crc.to_i, @io.tell - @started_at, @uncompressed_size]
13
+ end
14
+
15
+ def <<(data)
16
+ @uncompressed_size += data.bytesize
17
+ @io << ZipTricks::BlockDeflate.deflate_chunk(data)
18
+ @crc << data
19
+ self
20
+ end
21
+
22
+ def write(data)
23
+ self << data
24
+ data.bytesize
25
+ end
26
+ end
@@ -0,0 +1,21 @@
1
+ # Is used internally by Streamer to keep track of entries in the archive during writing.
2
+ # Normally you will not have to use this class directly
3
+ class ZipTricks::Streamer::Entry < Struct.new(:filename, :crc32, :compressed_size, :uncompressed_size, :storage_mode, :mtime, :use_data_descriptor)
4
+ def initialize(*)
5
+ super
6
+ filename.force_encoding(Encoding::UTF_8)
7
+ @requires_efs_flag = !(filename.encode(Encoding::ASCII) rescue false)
8
+ end
9
+
10
+ # Set the general purpose flags for the entry. We care about is the EFS
11
+ # bit (bit 11) which should be set if the filename is UTF8. If it is, we need to set the
12
+ # bit so that the unarchiving application knows that the filename in the archive is UTF-8
13
+ # encoded, and not some DOS default. For ASCII entries it does not matter.
14
+ # Additionally, we care about bit 3 which toggles the use of the postfix data descriptor.
15
+ def gp_flags
16
+ flag = 0b00000000000
17
+ flag |= 0b100000000000 if @requires_efs_flag # bit 11
18
+ flag |= 0x0008 if use_data_descriptor # bit 3
19
+ flag
20
+ end
21
+ end
@@ -0,0 +1,25 @@
1
+ class ZipTricks::Streamer::StoredWriter
2
+ def initialize(io)
3
+ @io = io
4
+ @uncompressed_size = 0
5
+ @compressed_size = 0
6
+ @started_at = @io.tell
7
+ @crc = ZipTricks::StreamCRC32.new
8
+ end
9
+
10
+ def <<(data)
11
+ @io << data
12
+ @crc << data
13
+ self
14
+ end
15
+
16
+ def write(data)
17
+ self << data
18
+ data.bytesize
19
+ end
20
+
21
+ def finish
22
+ size = @io.tell - @started_at
23
+ [@crc.to_i, size, size]
24
+ end
25
+ end
@@ -0,0 +1,20 @@
1
+ # Gets yielded from the writing methods of the CompressingStreamer
2
+ # and accepts the data being written into the ZIP
3
+ class ZipTricks::Streamer::Writable
4
+ # Initializes a new Writable with the object it delegates the writes to.
5
+ # Normally you would not need to use this method directly
6
+ def initialize(writer)
7
+ @writer = writer
8
+ end
9
+ # Writes the given data to the output stream
10
+ #
11
+ # @param d[String] the binary string to write (part of the uncompressed file)
12
+ # @return [self]
13
+ def <<(d); @writer << d; self; end
14
+
15
+ # Writes the given data to the output stream
16
+ #
17
+ # @param d[String] the binary string to write (part of the uncompressed file)
18
+ # @return [Fixnum] the number of bytes written
19
+ def write(d); @writer << d; end
20
+ end
@@ -8,17 +8,61 @@
8
8
  # For stored entries, you need to know the CRC32 (as a uint) and the filesize upfront,
9
9
  # before the writing of the entry body starts.
10
10
  #
11
- # For compressed entries, you need to know the bytesize of the precompressed entry
12
- # as well.
11
+ # Any object that responds to `<<` can be used as the Streamer target - you can use
12
+ # a String, an Array, a Socket or a File, at your leisure.
13
+ #
14
+ # ## Using the Streamer with runtime compression
15
+ #
16
+ # You can use the Streamer with data descriptors (the CRC32 and the sizes will be
17
+ # written after the file data). This allows non-rewinding on-the-fly compression.
18
+ # If you are compressing large files, the Deflater object that the Streamer controls
19
+ # will be regularly flushed to prevent memory inflation.
20
+ #
21
+ # ZipTricks::Streamer.open(file_socket_or_string) do |zip|
22
+ # zip.write_stored_file('mov.mp4') do |sink|
23
+ # File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
24
+ # end
25
+ # zip.write_deflated_file('long-novel.txt') do |sink|
26
+ # File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
27
+ # end
28
+ # end
29
+ #
30
+ # The central directory will be written automatically at the end of the block.
31
+ #
32
+ # ## Using the Streamer with entries of known size and having a known CRC32 checksum
33
+ #
34
+ # Streamer allows "IO splicing" - in this mode it will only control the metadata output,
35
+ # but you can write the data to the socket/file outside of the Streamer. For example, when
36
+ # using the sendfile gem:
37
+ #
38
+ # ZipTricks::Streamer.open(socket) do | zip |
39
+ # zip.add_stored_entry(filename: "myfile1.bin", size: 9090821, crc32: 12485)
40
+ # zip.simulate_write(tempfile1.size)
41
+ # socket.sendfile(tempfile1)
42
+ # zip.add_stored_entry(filename: "myfile2.bin", size: 458678, crc32: 89568)
43
+ # zip.simulate_write(tempfile2.size)
44
+ # socket.sendfile(tempfile2)
45
+ # end
46
+ #
47
+ # Note that you need to use `simulate_write` to let the
48
+ # The central directory will be written automatically at the end of the block.
13
49
  class ZipTricks::Streamer
50
+ require_relative 'streamer/deflated_writer'
51
+ require_relative 'streamer/writable'
52
+ require_relative 'streamer/stored_writer'
53
+ require_relative 'streamer/entry'
54
+
55
+ STORED = 0
56
+ DEFLATED = 8
57
+
14
58
  EntryBodySizeMismatch = Class.new(StandardError)
15
59
  InvalidOutput = Class.new(ArgumentError)
60
+ Overflow = Class.new(StandardError)
61
+ PathError = Class.new(StandardError)
62
+ DuplicateFilenames = Class.new(StandardError)
63
+ UnknownMode = Class.new(StandardError)
16
64
 
17
- # Language encoding flag (EFS) bit (general purpose bit 11)
18
- EFS = 0b100000000000
19
-
20
- # Default general purpose flags for each entry.
21
- DEFAULT_GP_FLAGS = 0b00000000000
65
+ private_constant :DeflatedWriter, :StoredWriter, :STORED, :DEFLATED
22
66
 
23
67
  # Creates a new Streamer on top of the given IO-ish object and yields it. Once the given block
24
68
  # returns, the Streamer will have it's `close` method called, which will write out the central
@@ -34,21 +78,17 @@ class ZipTricks::Streamer
34
78
 
35
79
  # Creates a new Streamer on top of the given IO-ish object.
36
80
  #
37
- # @param stream [IO] the destination IO for the ZIP (should respond to `tell` and `<<`)
81
+ # @param stream [IO] the destination IO for the ZIP (should respond to `<<`)
38
82
  def initialize(stream)
39
- raise InvalidOutput, "The stream should respond to #<<" unless stream.respond_to?(:<<)
40
- stream = ZipTricks::WriteAndTell.new(stream) unless stream.respond_to?(:tell) && stream.respond_to?(:advance_position_by)
41
-
42
- @output_stream = stream
43
- @zip = ZipTricks::Microzip.new
83
+ raise InvalidOutput, "The stream must respond to #<<" unless stream.respond_to?(:<<)
84
+ unless stream.respond_to?(:tell) && stream.respond_to?(:advance_position_by)
85
+ stream = ZipTricks::WriteAndTell.new(stream)
86
+ end
44
87
 
45
- @state_monitor = VeryTinyStateMachine.new(:before_entry, callbacks_to=self)
46
- @state_monitor.permit_state :in_entry_header, :in_entry_body, :in_central_directory, :closed
47
- @state_monitor.permit_transition :before_entry => :in_entry_header
48
- @state_monitor.permit_transition :in_entry_header => :in_entry_body
49
- @state_monitor.permit_transition :in_entry_body => :in_entry_header
50
- @state_monitor.permit_transition :in_entry_body => :in_central_directory
51
- @state_monitor.permit_transition :in_central_directory => :closed
88
+ @out = stream
89
+ @files = []
90
+ @local_header_offsets = []
91
+ @writer = create_writer
52
92
  end
53
93
 
54
94
  # Writes a part of a zip entry body (actual binary data of the entry) into the output stream.
@@ -56,9 +96,7 @@ class ZipTricks::Streamer
56
96
  # @param binary_data [String] a String in binary encoding
57
97
  # @return self
58
98
  def <<(binary_data)
59
- @state_monitor.transition_or_maintain! :in_entry_body
60
- @output_stream << binary_data
61
- @bytes_written_for_entry += binary_data.bytesize
99
+ @out << binary_data
62
100
  self
63
101
  end
64
102
 
@@ -69,7 +107,7 @@ class ZipTricks::Streamer
69
107
  # @param binary_data [String] a String in binary encoding
70
108
  # @return [Fixnum] the number of bytes written
71
109
  def write(binary_data)
72
- self << binary_data
110
+ @out << binary_data
73
111
  binary_data.bytesize
74
112
  end
75
113
 
@@ -80,10 +118,8 @@ class ZipTricks::Streamer
80
118
  # @param num_bytes [Numeric] how many bytes are going to be written bypassing the Streamer
81
119
  # @return [Numeric] position in the output stream / ZIP archive
82
120
  def simulate_write(num_bytes)
83
- @state_monitor.transition_or_maintain! :in_entry_body
84
- @output_stream.advance_position_by(num_bytes)
85
- @bytes_written_for_entry += num_bytes
86
- @output_stream.tell
121
+ @out.advance_position_by(num_bytes)
122
+ @out.tell
87
123
  end
88
124
 
89
125
  # Writes out the local header for an entry (file in the ZIP) that is using the deflated storage model (is compressed).
@@ -92,67 +128,137 @@ class ZipTricks::Streamer
92
128
  # Note that the deflated body that is going to be written into the output has to be _precompressed_ (pre-deflated)
93
129
  # before writing it into the Streamer, because otherwise it is impossible to know it's size upfront.
94
130
  #
95
- # @param entry_name [String] the name of the file in the entry
131
+ # @param filename [String] the name of the file in the entry
132
+ # @param compressed_size [Fixnum] the size of the compressed entry that is going to be written into the archive
96
133
  # @param uncompressed_size [Fixnum] the size of the entry when uncompressed, in bytes
97
134
  # @param crc32 [Fixnum] the CRC32 checksum of the entry when uncompressed
98
- # @param compressed_size [Fixnum] the size of the compressed entry that is going to be written into the archive
99
135
  # @return [Fixnum] the offset the output IO is at after writing the entry header
100
- def add_compressed_entry(entry_name, uncompressed_size, crc32, compressed_size)
101
- @state_monitor.transition! :in_entry_header
102
- @zip.add_local_file_header(io: @output_stream, filename: entry_name, crc32: crc32,
103
- compressed_size: compressed_size, uncompressed_size: uncompressed_size, storage_mode: ZipTricks::Microzip::DEFLATED)
104
- @expected_bytes_for_entry = compressed_size
105
- @bytes_written_for_entry = 0
106
- @output_stream.tell
136
+ def add_compressed_entry(filename:, compressed_size:, uncompressed_size:, crc32:)
137
+ add_file_and_write_local_header(filename: filename, crc32: crc32, storage_mode: DEFLATED,
138
+ compressed_size: compressed_size, uncompressed_size: uncompressed_size)
139
+ @out.tell
107
140
  end
108
141
 
109
142
  # Writes out the local header for an entry (file in the ZIP) that is using the stored storage model (is stored as-is).
110
143
  # Once this method is called, the `<<` method has to be called one or more times to write the actual contents of the body.
111
144
  #
112
- # @param entry_name [String] the name of the file in the entry
113
- # @param uncompressed_size [Fixnum] the size of the entry when uncompressed, in bytes
145
+ # @param filename [String] the name of the file in the entry
146
+ # @param size [Fixnum] the size of the file when uncompressed, in bytes
114
147
  # @param crc32 [Fixnum] the CRC32 checksum of the entry when uncompressed
115
148
  # @return [Fixnum] the offset the output IO is at after writing the entry header
116
- def add_stored_entry(entry_name, uncompressed_size, crc32)
117
- @state_monitor.transition! :in_entry_header
118
- @zip.add_local_file_header(io: @output_stream, filename: entry_name, crc32: crc32,
119
- compressed_size: uncompressed_size, uncompressed_size: uncompressed_size, storage_mode: ZipTricks::Microzip::STORED)
120
- @bytes_written_for_entry = 0
121
- @expected_bytes_for_entry = uncompressed_size
122
- @output_stream.tell
149
+ def add_stored_entry(filename:, size:, crc32:)
150
+ add_file_and_write_local_header(filename: filename, crc32: crc32, storage_mode: STORED,
151
+ compressed_size: size, uncompressed_size: size)
152
+ @out.tell
123
153
  end
124
154
 
125
- # Writes out the global footer and the directory entry header and the global directory of the ZIP
126
- # archive using the information about the entries added using `add_stored_entry` and `add_compressed_entry`.
155
+ # Opens the stream for a stored file in the archive, and yields a writer for that file to the block.
156
+ # Once the write completes, a data descriptor will be written with the actual compressed/uncompressed
157
+ # sizes and the CRC32 checksum.
127
158
  #
128
- # Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
159
+ # @param filename[String] the name of the file in the archive
160
+ # @yield [#<<, #write] an object that the file contents must be written to
161
+ def write_stored_file(filename)
162
+ add_file_and_write_local_header(filename: filename, storage_mode: STORED,
163
+ use_data_descriptor: true, crc32: 0, compressed_size: 0, uncompressed_size: 0)
164
+
165
+ w = StoredWriter.new(@out)
166
+ yield(Writable.new(w))
167
+ crc, comp, uncomp = w.finish
168
+
169
+ # Save the information into the entry for when the time comes to write out the central directory
170
+ last_entry = @files[-1]
171
+ last_entry.crc32 = crc
172
+ last_entry.compressed_size = comp
173
+ last_entry.uncompressed_size = uncomp
174
+
175
+ @writer.write_data_descriptor(io: @out, crc32: crc, compressed_size: comp, uncompressed_size: uncomp)
176
+ end
177
+
178
+ # Opens the stream for a deflated file in the archive, and yields a writer for that file to the block.
179
+ # Once the write completes, a data descriptor will be written with the actual compressed/uncompressed
180
+ # sizes and the CRC32 checksum.
129
181
  #
130
- # @return [Fixnum] the offset the output IO is at after writing the central directory
131
- def write_central_directory!
132
- @state_monitor.transition! :in_central_directory
133
- @zip.write_central_directory(@output_stream)
134
- @output_stream.tell
182
+ # @param filename[String] the name of the file in the archive
183
+ # @yield [#<<, #write] an object that the file contents must be written to
184
+ def write_deflated_file(filename)
185
+ add_file_and_write_local_header(filename: filename, storage_mode: DEFLATED,
186
+ use_data_descriptor: true, crc32: 0, compressed_size: 0, uncompressed_size: 0)
187
+
188
+ w = DeflatedWriter.new(@out)
189
+ yield(Writable.new(w))
190
+ crc, comp, uncomp = w.finish
191
+
192
+ # Save the information into the entry for when the time comes to write out the central directory
193
+ last_entry = @files[-1]
194
+ last_entry.crc32 = crc
195
+ last_entry.compressed_size = comp
196
+ last_entry.uncompressed_size = uncomp
197
+ write_data_descriptor_for_last_entry
135
198
  end
136
199
 
137
- # Closes the archive. Writes the central directory if it has not yet been written.
138
- # Switches the Streamer into a state where it can no longer be written to.
200
+ # Closes the archive. Writes the central directory, and switches the writer into
201
+ # a state where it can no longer be written to.
139
202
  #
140
203
  # Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
141
204
  #
142
205
  # @return [Fixnum] the offset the output IO is at after closing the archive
143
206
  def close
144
- write_central_directory! unless @state_monitor.in_state?(:in_central_directory)
145
- @state_monitor.transition! :closed
146
- @output_stream.tell
147
- end
207
+ # Record the central directory offset, so that it can be written into the EOCD record
208
+ cdir_starts_at = @out.tell
209
+
210
+ # Write out the central directory entries, one for each file
211
+ @files.each_with_index do |entry, i|
212
+ header_loc = @local_header_offsets.fetch(i)
213
+ @writer.write_central_directory_file_header(io: @out, local_file_header_location: header_loc,
214
+ gp_flags: entry.gp_flags, storage_mode: entry.storage_mode,
215
+ compressed_size: entry.compressed_size, uncompressed_size: entry.uncompressed_size,
216
+ mtime: entry.mtime, crc32: entry.crc32, filename: entry.filename) #, external_attrs: DEFAULT_EXTERNAL_ATTRS)
217
+ end
218
+
219
+ # Record the central directory size, for the EOCDR
220
+ cdir_size = @out.tell - cdir_starts_at
148
221
 
222
+ # Write out the EOCDR
223
+ @writer. write_end_of_central_directory(io: @out, start_of_central_directory_location: cdir_starts_at,
224
+ central_directory_size: cdir_size, num_files_in_archive: @files.length)
225
+ @out.tell
226
+ end
227
+
228
+ # Sets up the ZipWriter with wrappers if necessary. The method is called once, when the Streamer
229
+ # gets instantiated - the Writer then gets reused. This method is primarily there so that you
230
+ # can override it.
231
+ #
232
+ # @return [ZipTricks::ZipWriter] the writer to perform writes with
233
+ def create_writer
234
+ ZipTricks::ZipWriter.new
235
+ end
236
+
149
237
  private
150
-
151
- # Checks whether the number of bytes written conforms to the declared entry size
152
- def leaving_in_entry_body_state
153
- if @bytes_written_for_entry != @expected_bytes_for_entry
154
- msg = "Wrong number of bytes written for entry (expected %d, got %d)" % [@expected_bytes_for_entry, @bytes_written_for_entry]
155
- raise EntryBodySizeMismatch, msg
238
+
239
+ def add_file_and_write_local_header(filename:, crc32:, storage_mode:, compressed_size:,
240
+ uncompressed_size:, use_data_descriptor: false)
241
+ if @files.any?{|e| e.filename == filename }
242
+ raise DuplicateFilenames, "Filename #{filename.inspect} already used in the archive"
156
243
  end
244
+
245
+ raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
246
+ raise Overflow, "Filename is too long" if filename.bytesize > 0xFFFF
247
+ raise PathError, "Paths in ZIP may only contain forward slashes (UNIX separators)" if filename.include?('\\')
248
+
249
+ @check_compressed_size_after_leaving_body = !use_data_descriptor
250
+ @bytes_written_for_entry = 0
251
+ @expected_bytes_for_entry = compressed_size
252
+
253
+ e = Entry.new(filename, crc32, compressed_size, uncompressed_size, storage_mode, mtime=Time.now.utc, use_data_descriptor)
254
+ @files << e
255
+ @local_header_offsets << @out.tell
256
+ @writer.write_local_file_header(io: @out, gp_flags: e.gp_flags, crc32: e.crc32, compressed_size: e.compressed_size,
257
+ uncompressed_size: e.uncompressed_size, mtime: e.mtime, filename: e.filename, storage_mode: e.storage_mode)
258
+ end
259
+
260
+ def write_data_descriptor_for_last_entry
261
+ e = @files.fetch(-1)
262
+ @writer.write_data_descriptor(io: @out, crc32: 0, compressed_size: e.compressed_size, uncompressed_size: e.uncompressed_size)
157
263
  end
158
264
  end