zip_tricks 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2bc3917bd654f3fca15ae6bce1769eb0fa45dd12
4
- data.tar.gz: 12f495895ed59e23cc89bf7b86b91bd9422c1608
3
+ metadata.gz: d3b168fde79da25b24e3cb4b69caac818f1337f3
4
+ data.tar.gz: e1d7b9995636811c9c662a70e2caffb3a085709b
5
5
  SHA512:
6
- metadata.gz: 66fe048eddc9c00ed459e02e30ba8607a76cf297de88bf595694d14eec42e7b1075752b42fae8e86dcfe06ca2814db985452ed0244f803207491b100baed1041
7
- data.tar.gz: 717bd4d51c597fefdd77bfe0f7df3c41f2ea8ae0122f0b5330ab6cf63b6183370e5dd8bbc19b868a6f52ae53a84147bfdb8bee54bbc44b63df563074192f7ec8
6
+ metadata.gz: 65f27dacbcc5fd09e229f8543497fe3b3ff3da861696b355e01688f8b78dc7d02f4163de11e4a909c83f273a42005f63be7c1c7b7196f8657f0dd4d26cc4cc4f
7
+ data.tar.gz: 66b7db54cdc4702d0f867b8f263ea1bca2016405dd19db43bc4a7368ddb0d4b912b67344b7d0efc3156b0fbfe5868aa78b3cc6b8c22c87cddfd57282c8842dec
data/README.md CHANGED
@@ -42,16 +42,20 @@ The block will only be called when actually sending the response to the client
42
42
 
43
43
  Use the `SizeEstimator` to compute the correct size of the resulting archive.
44
44
 
45
+ # Prepare the response body. The block will only be called when the response starts to be written.
45
46
  zip_body = ZipTricks::RackBody.new do | zip |
46
47
  zip.add_stored_entry(filename: "myfile1.bin", size: 9090821, crc32: 12485)
47
48
  zip << read_file('myfile1.bin')
48
49
  zip.add_stored_entry(filename: "myfile2.bin", size: 458678, crc32: 89568)
49
50
  zip << read_file('myfile2.bin')
50
51
  end
52
+
53
+ # Precompute the Content-Length ahead of time
51
54
  bytesize = ZipTricks::SizeEstimator.estimate do |z|
52
55
  z.add_stored_entry(filename: 'myfile1.bin', size: 9090821)
53
56
  z.add_stored_entry(filename: 'myfile2.bin', size: 458678)
54
57
  end
58
+
55
59
  [200, {'Content-Length' => bytesize.to_s}, zip_body]
56
60
 
57
61
  ## Other usage examples
@@ -79,55 +83,10 @@ to that socket using some accelerated writing technique, and only use the Stream
79
83
  zip.simulate_write(my_temp_file.size)
80
84
  end
81
85
 
82
- ## RackBody
83
-
84
- Can be used to output a streamed ZIP archive directly through a Rack response body.
85
- The block given to the constructor will be called when the response body will be read by the webserver,
86
- and will receive a {ZipTricks::Streamer} as it's block argument. You can then add entries to the Streamer as usual.
87
- The archive will be automatically closed at the end of the block.
88
-
89
- # Precompute the Content-Length ahead of time
90
- content_length = ZipTricks::SizeEstimator.estimate do | estimator |
91
- estimator.add_stored_entry('large.tif', size=1289894)
92
- end
93
-
94
- # Prepare the response body. The block will only be called when the response starts to be written.
95
- body = ZipTricks::RackBody.new do | streamer |
96
- streamer.add_stored_entry('large.tif', size=1289894, crc32=198210)
97
- streamer << large_file.read(1024*1024) until large_file.eof?
98
- ...
99
- end
100
-
101
- [200, {'Content-Type' => 'binary/octet-stream', 'Content-Length' => content_length.to_s}, body]
102
-
103
- ## BlockWrite
104
-
105
- Can be used as the destination IO, but will call the given block instead on every call to `:<<`.
106
- This can be used to attach the output of the zip compressor to the Rack response body, or another
107
- destination. For Rack/Rails just use RackBody since it sets this up for you.
108
-
109
- io = ZipTricks::BlockWrite.new{|data| socket << data }
110
- ZipTricks::Streamer.open(io) do | zip |
111
- zip.add_stored_entry("first-file.bin", raw_file.size, raw_file_crc32)
112
- ....
113
- end
114
-
115
- ## SizeEstimator
116
-
117
- Is used to predict the size of the ZIP archive after output. This can be used to generate, say, a `Content-Length` header,
118
- or to predict the size of the resulting archive on the storage device. The size is estimated using a very fast "fake archiving"
119
- procedure, so it computes the sizes of all the headers and the central directory very accurately.
120
-
121
- expected_zip_archive_size = SizeEstimator.estimate do | estimator |
122
- estimator.add_stored_entry("file.doc", size=898291)
123
- estimator.add_compressed_entry("family.JPG", size=89281911, compressed_size=89218)
124
- end
125
-
126
-
127
- ## StreamCRC32
86
+ ## Computing the CRC32 value of a large file
128
87
 
129
- Computes the CRC32 value in a streaming fashion. Is slightly more convenient for the purpose than using the raw Zlib
130
- library functions.
88
+ `BlockCRC32` computes the CRC32 checksum of an IO in a streaming fashion. It is slightly more convenient for the purpose
89
+ than using the raw Zlib library functions.
131
90
 
132
91
  crc = ZipTricks::StreamCRC32.new
133
92
  crc << large_file.read(1024 * 12) until large_file.eof?
data/lib/zip_tricks.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module ZipTricks
2
- VERSION = '3.0.0'
2
+ VERSION = '3.1.0'
3
3
 
4
4
  # Require all the sub-components except myself
5
5
  Dir.glob(__dir__ + '/**/*.rb').sort.each {|p| require p unless p == __FILE__ }
@@ -366,16 +366,47 @@ class ZipTricks::FileReader
366
366
  # and a soft read (we might not be able to read as many bytes as we want)
367
367
  file_io.seek(implied_position_of_eocd_record, IO::SEEK_SET)
368
368
  str_containing_eocd_record = file_io.read(MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE)
369
-
370
- # TODO: what to do if multiple occurrences of the signature are found, somehow?
371
- eocd_sig = [0x06054b50].pack(C_V)
372
- eocd_idx_in_buf = str_containing_eocd_record.index(eocd_sig)
373
-
369
+ eocd_idx_in_buf = locate_eocd_signature(str_containing_eocd_record)
370
+
374
371
  raise "Could not find the EOCD signature in the buffer - maybe a malformed ZIP file" unless eocd_idx_in_buf
375
372
 
376
373
  implied_position_of_eocd_record + eocd_idx_in_buf
377
374
  end
378
375
 
376
+ # This is tricky. Essentially, we have to scan the maximum possible number of bytes (that the EOCD can
377
+ # theoretically occupy including the comment), and we have to find a combination of:
378
+ # [EOCD signature, <some ZIP medatata>, comment byte size, the comment of that size, eof].
379
+ # The only way I could find to do this was with a sliding window, but there probably is a better way.
380
+ def locate_eocd_signature(in_str)
381
+ # We have to scan from the _very_ tail. We read the very minimum size
382
+ # the EOCD record can have (up to and including the comment size), using
383
+ # a sliding window. Once our end offset matches the comment size we found our
384
+ # EOCD marker.
385
+ eocd_signature_int = 0x06054b50
386
+ unpack_pattern = 'VvvvvVVv'
387
+ minimum_record_size = 22
388
+ end_location = minimum_record_size * -1
389
+ loop do
390
+ # If the window is nil, we have rolled off the start of the string, nothing to do here.
391
+ # We use negative values because if we used positive slice indices
392
+ # we would have to detect the rollover ourselves
393
+ break unless window = in_str[end_location, minimum_record_size]
394
+
395
+ window_location = in_str.bytesize + end_location
396
+ unpacked = window.unpack(unpack_pattern)
397
+
398
+ # If we found the signarue, pick up the comment size, and check if the size of the window
399
+ # plus that comment size is where we are in the string. If we are - bingo.
400
+ if unpacked[0] == 0x06054b50 && comment_size = unpacked[-1]
401
+ assumed_eocd_location = in_str.bytesize - comment_size - minimum_record_size
402
+ # if the comment size is where we should be at - we found our EOCD
403
+ return assumed_eocd_location if assumed_eocd_location == window_location
404
+ end
405
+
406
+ end_location -= 1 # Shift the window back, by one byte, and try again.
407
+ end
408
+ end
409
+
379
410
  # Find the Zip64 EOCD locator segment offset. Do this by seeking backwards from the
380
411
  # EOCD record in the archive by fixed offsets
381
412
  def get_zip64_eocd_locator_offset(file_io, eocd_offset)
@@ -221,8 +221,9 @@ class ZipTricks::ZipWriter
221
221
  # @param start_of_central_directory_location[Fixnum] byte offset of the start of central directory form the beginning of ZIP file
222
222
  # @param central_directory_size[Fixnum] the size of the central directory (only file headers) in bytes
223
223
  # @param num_files_in_archive[Fixnum] How many files the archive contains
224
+ # @param comment[String] the comment for the archive (defaults to ZIP_TRICKS_COMMENT)
224
225
  # @return [void]
225
- def write_end_of_central_directory(io:, start_of_central_directory_location:, central_directory_size:, num_files_in_archive:)
226
+ def write_end_of_central_directory(io:, start_of_central_directory_location:, central_directory_size:, num_files_in_archive:, comment: ZIP_TRICKS_COMMENT)
226
227
  zip64_eocdr_offset = start_of_central_directory_location + central_directory_size
227
228
 
228
229
  zip64_required = central_directory_size > FOUR_BYTE_MAX_UINT ||
@@ -296,8 +297,8 @@ class ZipTricks::ZipWriter
296
297
  # directory with respect to
297
298
  # the starting disk number 4 bytes
298
299
  end
299
- io << [ZIP_TRICKS_COMMENT.bytesize].pack(C_v) # .ZIP file comment length 2 bytes
300
- io << ZIP_TRICKS_COMMENT # .ZIP file comment (variable size)
300
+ io << [comment.bytesize].pack(C_v) # .ZIP file comment length 2 bytes
301
+ io << comment # .ZIP file comment (variable size)
301
302
  end
302
303
 
303
304
  private
@@ -44,4 +44,26 @@ describe ZipTricks::FileReader do
44
44
  expect(readback.bytesize).to eq(tolstoy.bytesize)
45
45
  expect(readback[0..10]).to eq(tolstoy[0..10])
46
46
  end
47
+
48
+ it 'is able to latch to the EOCD location even if the signature for the EOCD record appears all over the ZIP' do
49
+ # A VERY evil ZIP file which has this signature all over
50
+ eocd_sig = [0x06054b50].pack('V')
51
+ evil_str = "#{eocd_sig} and #{eocd_sig}"
52
+
53
+ z = StringIO.new
54
+ w = ZipTricks::ZipWriter.new
55
+ w.write_local_file_header(io: z, filename: evil_str, compressed_size: evil_str.bytesize,
56
+ uncompressed_size: evil_str.bytesize, crc32: 0x06054b50, gp_flags: 0, mtime: Time.now, storage_mode: 0)
57
+ z << evil_str
58
+ where = z.tell
59
+ w.write_central_directory_file_header(io: z, local_file_header_location: 0, gp_flags: 0, storage_mode: 0,
60
+ filename: evil_str, compressed_size: evil_str.bytesize,
61
+ uncompressed_size: evil_str.bytesize, mtime: Time.now, crc32: 0x06054b50)
62
+ w.write_end_of_central_directory(io: z, start_of_central_directory_location: where,
63
+ central_directory_size: z.tell - where, num_files_in_archive: 1, comment: evil_str)
64
+
65
+ z.rewind
66
+ entries = described_class.read_zip_structure(z)
67
+ expect(entries.length).to eq(1)
68
+ end
47
69
  end
@@ -330,6 +330,17 @@ describe ZipTricks::ZipWriter do
330
330
  expect(br.read_n(comment_length)).to match(/ZipTricks/)
331
331
  end
332
332
 
333
+ it 'writes out the custom comment' do
334
+ buf = ''
335
+ comment = 'Ohai mate'
336
+ subject.write_end_of_central_directory(io: buf, start_of_central_directory_location: 9091211,
337
+ central_directory_size: 9091, num_files_in_archive: 4, comment: comment)
338
+
339
+ size_and_comment = buf[((comment.bytesize + 2) * -1)..-1]
340
+ comment_size = size_and_comment.unpack('v')[0]
341
+ expect(comment_size).to eq(comment.bytesize)
342
+ end
343
+
333
344
  it 'writes out the Zip64 EOCD as well if the central directory is located beyound 4GB in the archive' do
334
345
  buf = StringIO.new
335
346
 
data/zip_tricks.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: zip_tricks 3.0.0 ruby lib
5
+ # stub: zip_tricks 3.1.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "zip_tricks"
9
- s.version = "3.0.0"
9
+ s.version = "3.1.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Julik Tarkhanov"]
14
- s.date = "2016-08-03"
14
+ s.date = "2016-08-16"
15
15
  s.description = "Makes rubyzip stream, for real"
16
16
  s.email = "me@julik.nl"
17
17
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zip_tricks
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julik Tarkhanov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-03 00:00:00.000000000 Z
11
+ date: 2016-08-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip