zip_tricks 3.1.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3434ce881bfe7fdf3494ed494f8e251082d94510
4
- data.tar.gz: 30062193918dacf4a018d7bde923303f0a14c47f
3
+ metadata.gz: 6b478a7ffbae2dbb20270a1e4c4b63f495feef87
4
+ data.tar.gz: d9a8ca3a1596ee653fcf2e2b9153a9e59bd912fc
5
5
  SHA512:
6
- metadata.gz: 88be7f1bebd8e0faa8906203b594e022b173390039732a3eaafa53ec903770b0a619f7a0895a8203b75c6c09f5908ca400d86611f443e34fdead53554edd8e14
7
- data.tar.gz: 5c4362394b3666ce528e16c657b019a8185875555af2202a8c976df523c309017ac060ec61082617b3573722cb1d2e4eb002502ef9c8b770227859756e0afe5f
6
+ metadata.gz: 689d25f85a79987750fe3bbdf845cfe76c987713709a3d7fc8d2bc944a570958168c2b6c38a6f657b254d499052c079aad84c85b6421b9c228740dd3c9f79903
7
+ data.tar.gz: 434ffe473e5b0339b40ead5437e070cc1bd6dcce910db58e516a207806cd0eab16f86cc04d342201e2f31f121438308351239d923f4b4e5311850755c77ce339
data/lib/zip_tricks.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module ZipTricks
2
- VERSION = '3.1.1'
2
+ VERSION = '4.0.0'
3
3
 
4
4
  # Require all the sub-components except myself
5
5
  Dir.glob(__dir__ + '/**/*.rb').sort.each {|p| require p unless p == __FILE__ }
@@ -47,71 +47,18 @@ require 'stringio'
47
47
  # and so on, and sets these entries up with the absolute _offsets_ into the source file/IO object.
48
48
  # These offsets can then be used to extract the actual compressed data of the files and to expand it.
49
49
  class ZipTricks::FileReader
50
+ require_relative 'file_reader/stored_reader'
51
+ require_relative 'file_reader/inflating_reader'
52
+
50
53
  ReadError = Class.new(StandardError)
51
54
  UnsupportedFeature = Class.new(StandardError)
52
55
  InvalidStructure = Class.new(ReadError)
53
-
54
- class InflatingReader
55
- def initialize(from_io, compressed_data_size)
56
- @io = from_io
57
- @compressed_data_size = compressed_data_size
58
- @already_read = 0
59
- @zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
60
- end
61
-
62
- def extract(n_bytes=nil)
63
- n_bytes ||= (@compressed_data_size - @already_read)
64
-
65
- return if eof?
66
-
67
- available = @compressed_data_size - @already_read
68
-
69
- return if available.zero?
70
-
71
- n_bytes = available if n_bytes > available
72
-
73
- return '' if n_bytes.zero?
74
-
75
- compressed_chunk = @io.read(n_bytes)
76
- @already_read += compressed_chunk.bytesize
77
- @zlib_inflater.inflate(compressed_chunk)
78
- end
79
-
80
- def eof?
81
- @zlib_inflater.finished?
82
- end
83
- end
84
-
85
- class StoredReader
86
- def initialize(from_io, compressed_data_size)
87
- @io = from_io
88
- @compressed_data_size = compressed_data_size
89
- @already_read = 0
90
- end
91
-
92
- def extract(n_bytes=nil)
93
- n_bytes ||= (@compressed_data_size - @already_read)
94
-
95
- return if eof?
96
-
97
- available = @compressed_data_size - @already_read
98
-
99
- return if available.zero?
100
-
101
- n_bytes = available if n_bytes > available
102
-
103
- return '' if n_bytes.zero?
104
-
105
- compressed_chunk = @io.read(n_bytes)
106
- @already_read += compressed_chunk.bytesize
107
- compressed_chunk
108
- end
109
-
110
- def eof?
111
- @already_read >= @compressed_data_size
56
+ LocalHeaderPending = Class.new(StandardError) do
57
+ def message
58
+ "The compressed data offset is not available (local header has not been read)"
112
59
  end
113
60
  end
114
-
61
+
115
62
  private_constant :StoredReader, :InflatingReader
116
63
 
117
64
  # Represents a file within the ZIP archive being read
@@ -162,13 +109,9 @@ class ZipTricks::FileReader
162
109
  # @return [String] the file comment
163
110
  attr_accessor :comment
164
111
 
165
- # @return [Fixnum] at what offset you should start reading
166
- # for the compressed data in your original IO object
167
- attr_accessor :compressed_data_offset
168
-
169
112
  # Returns a reader for the actual compressed data of the entry.
170
113
  #
171
- # reader = entry.reader(source_file)
114
+ # reader = entry.extractor_from(source_file)
172
115
  # outfile << reader.extract(512 * 1024) until reader.eof?
173
116
  #
174
117
  # @return [#extract(n_bytes), #eof?] the reader for the data
@@ -180,17 +123,50 @@ class ZipTricks::FileReader
180
123
  when 0
181
124
  StoredReader.new(from_io, compressed_size)
182
125
  else
183
- raise "Unsupported storage mode for reading (#{storage_mode})"
126
+ raise UnsupportedFeature, "Unsupported storage mode for reading - %d" % storage_mode
184
127
  end
185
128
  end
129
+
130
+ # @return [Fixnum] at what offset you should start reading
131
+ # for the compressed data in your original IO object
132
+ def compressed_data_offset
133
+ @compressed_data_offset or raise LocalHeaderPending
134
+ end
135
+
136
+ # Tells whether the compressed data offset is already known for this entry
137
+ # @return [Boolean]
138
+ def known_offset?
139
+ !@compressed_data_offset.nil?
140
+ end
141
+
142
+ # Sets the offset at which the compressed data for this file starts in the ZIP.
143
+ # By default, the value will be set by the Reader for you. If you use delayed
144
+ # reading, you need to set it by using the `get_compressed_data_offset` on the Reader:
145
+ #
146
+ # entry.compressed_data_offset = reader.get_compressed_data_offset(io: file,
147
+ # local_file_header_offset: entry.local_header_offset)
148
+ def compressed_data_offset=(offset)
149
+ @compressed_data_offset = offset.to_i
150
+ end
186
151
  end
187
152
 
188
153
  # Parse an IO handle to a ZIP archive into an array of Entry objects.
189
154
  #
190
155
  # @param io[#tell, #seek, #read, #size] an IO-ish object
191
- # @param read_local_headers[Boolean] whether to proceed to read the local headers in addition to the central directory
156
+ # @param read_local_headers[Boolean] whether the local headers must be read upfront. When reading
157
+ # a locally available ZIP file this option will not have much use since the small reads from
158
+ # the file handle are not going to be that important. However, if you are using remote reads
159
+ # to decipher a ZIP file located on an HTTP server, the operation _must_ perform an HTTP
160
+ # request for _each entry in the ZIP file_ to determine where the actual file data starts.
161
+ # This, for a ZIP archive of 1000 files, will incur 1000 extra HTTP requests - which you might
162
+ # not want to perform upfront, or - at least - not want to perform _at once_. When the option is
163
+ # set to `false`, you will be getting instances of `LazyEntry` instead of `Entry`. Those objects
164
+ # will raise an exception when you attempt to access their compressed data offset in the ZIP
165
+ # (since the reads have not been performed yet). As a rule, this option can be left in it's
166
+ # default setting (`true`) unless you want to _only_ read the central directory, or you need
167
+ # to limit the number of HTTP requests.
192
168
  # @return [Array<Entry>] an array of entries within the ZIP being parsed
193
- def read_zip_structure(io, read_local_headers: true)
169
+ def read_zip_structure(io:, read_local_headers: true)
194
170
  zip_file_size = io.size
195
171
  eocd_offset = get_eocd_offset(io, zip_file_size)
196
172
 
@@ -213,24 +189,68 @@ class ZipTricks::FileReader
213
189
  read_cdir_entry(central_directory_io)
214
190
  end
215
191
 
216
- entries.each_with_index do |entry, i|
217
- if read_local_headers
218
- log { 'Reading the local header for entry %d at offset %d' % [i, entry.local_file_header_offset] }
219
- entry.compressed_data_offset = find_compressed_data_start_offset(io, entry.local_file_header_offset)
220
- end
221
- end
192
+ read_local_headers(entries, io) if read_local_headers
193
+
194
+ entries
222
195
  end
223
196
 
197
+ # Get the offset in the IO at which the actual compressed data of the file starts within the ZIP.
198
+ # The method will eager-read the entire local header for the file (the maximum size the local header may use),
199
+ # starting at the given offset, and will then compute its size. That size plus the local header offset
200
+ # given will be the compressed data offset of the entry (read starting at this offset to get the data).
201
+ #
202
+ # @param io[#seek, #read] an IO-ish object the ZIP file can be read from
203
+ # @param local_header_offset[Fixnum] absolute offset (0-based) where the local file header is supposed to begin
204
+ # @return [Fixnum] absolute offset (0-based) of where the compressed data begins for this file within the ZIP
205
+ def get_compressed_data_offset(io:, local_file_header_offset:)
206
+ seek(io, local_file_header_offset)
207
+
208
+ # Reading in bulk is cheaper - grab the maximum length of the local header,
209
+ # including any headroom
210
+ local_file_header_str_plus_headroom = io.read(MAX_LOCAL_HEADER_SIZE)
211
+ io_starting_at_local_header = StringIO.new(local_file_header_str_plus_headroom)
212
+
213
+ assert_signature(io_starting_at_local_header, 0x04034b50)
214
+
215
+ # The rest is unreliable, and we have that information from the central directory already.
216
+ # So just skip over it to get at the offset where the compressed data begins
217
+ skip_ahead_2(io_starting_at_local_header) # Version needed to extract
218
+ skip_ahead_2(io_starting_at_local_header) # gp flags
219
+ skip_ahead_2(io_starting_at_local_header) # storage mode
220
+ skip_ahead_2(io_starting_at_local_header) # dos time
221
+ skip_ahead_2(io_starting_at_local_header) # dos date
222
+ skip_ahead_4(io_starting_at_local_header) # CRC32
223
+
224
+ skip_ahead_4(io_starting_at_local_header) # Comp size
225
+ skip_ahead_4(io_starting_at_local_header) # Uncomp size
226
+
227
+ filename_size = read_2b(io_starting_at_local_header)
228
+ extra_size = read_2b(io_starting_at_local_header)
229
+
230
+ skip_ahead_n(io_starting_at_local_header, filename_size)
231
+ skip_ahead_n(io_starting_at_local_header, extra_size)
232
+
233
+ local_file_header_offset + io_starting_at_local_header.tell
234
+ end
235
+
224
236
  # Parse an IO handle to a ZIP archive into an array of Entry objects.
225
237
  #
226
- # @param io[#tell, #seek, #read, #size] an IO-ish object
238
+ # @param options[Hash] any options the instance method of the same name accepts
227
239
  # @return [Array<Entry>] an array of entries within the ZIP being parsed
228
- def self.read_zip_structure(io)
229
- new.read_zip_structure(io)
240
+ def self.read_zip_structure(**options)
241
+ new.read_zip_structure(**options)
230
242
  end
231
243
 
232
244
  private
233
245
 
246
+ def read_local_headers(entries, io)
247
+ entries.each_with_index do |entry, i|
248
+ log { 'Reading the local header for entry %d at offset %d' % [i, entry.local_file_header_offset] }
249
+ off = get_compressed_data_offset(io: io, local_file_header_offset: entry.local_file_header_offset)
250
+ entry.compressed_data_offset = off
251
+ end
252
+ end
253
+
234
254
  def skip_ahead_2(io)
235
255
  skip_ahead_n(io, 2)
236
256
  end
@@ -287,40 +307,7 @@ class ZipTricks::FileReader
287
307
  read_n(io, 8).unpack(C_Qe).shift
288
308
  end
289
309
 
290
- def find_compressed_data_start_offset(file_io, local_header_offset)
291
- seek(file_io, local_header_offset)
292
-
293
- # Reading in bulk is cheaper - grab the maximum length of the local header, including
294
- # any headroom
295
- local_file_header_str_plus_headroom = file_io.read(MAX_LOCAL_HEADER_SIZE)
296
- io = StringIO.new(local_file_header_str_plus_headroom)
297
-
298
- assert_signature(io, 0x04034b50)
299
-
300
- # The rest is unreliable, and we have that information from the central directory already.
301
- # So just skip over it to get at the offset where the compressed data begins
302
- skip_ahead_2(io) # Version needed to extract
303
- skip_ahead_2(io) # gp flags
304
- skip_ahead_2(io) # storage mode
305
- skip_ahead_2(io) # dos time
306
- skip_ahead_2(io) # dos date
307
- skip_ahead_4(io) # CRC32
308
-
309
- skip_ahead_4(io) # Comp size
310
- skip_ahead_4(io) # Uncomp size
311
-
312
- filename_size = read_2b(io)
313
- extra_size = read_2b(io)
314
-
315
- skip_ahead_n(io, filename_size)
316
- skip_ahead_n(io, extra_size)
317
-
318
- local_header_offset + io.tell
319
- end
320
-
321
-
322
310
  def read_cdir_entry(io)
323
- expected_at = io.tell
324
311
  assert_signature(io, 0x02014b50)
325
312
  ZipEntry.new.tap do |e|
326
313
  e.made_by = read_2b(io)
@@ -357,7 +344,9 @@ class ZipTricks::FileReader
357
344
  end
358
345
 
359
346
  # ...of which we really only need the Zip64 extra
360
- if zip64_extra_contents = extra_table[1] # Zip64 extra
347
+ if zip64_extra_contents = extra_table[1]
348
+ # If the Zip64 extra is present, we let it override all
349
+ # the values fetched from the conventional header
361
350
  zip64_extra = StringIO.new(zip64_extra_contents)
362
351
  log { 'Will read Zip64 extra data for %s, %d bytes' % [e.filename, zip64_extra.size] }
363
352
  # Now here be dragons. The APPNOTE specifies that
@@ -533,10 +522,11 @@ class ZipTricks::FileReader
533
522
  def num_files_and_central_directory_offset(file_io, eocd_offset)
534
523
  seek(file_io, eocd_offset)
535
524
 
536
- io = StringIO.new(read_n(file_io, SIZE_OF_USABLE_EOCD_RECORD))
537
-
538
- assert_signature(io, 0x06054b50)
525
+ # The size of the EOCD record is known upfront, so use a strict read
526
+ eocd_record_str = read_n(file_io, SIZE_OF_USABLE_EOCD_RECORD)
527
+ io = StringIO.new(eocd_record_str)
539
528
 
529
+ assert_signature(io, 0x06054b50)
540
530
  skip_ahead_2(io) # number_of_this_disk
541
531
  skip_ahead_2(io) # number of the disk with the EOCD record
542
532
  skip_ahead_2(io) # number of entries in the central directory of this disk
@@ -0,0 +1,30 @@
1
+ class ZipTricks::FileReader::InflatingReader
2
+ def initialize(from_io, compressed_data_size)
3
+ @io = from_io
4
+ @compressed_data_size = compressed_data_size
5
+ @already_read = 0
6
+ @zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
7
+ end
8
+
9
+ def extract(n_bytes=nil)
10
+ n_bytes ||= (@compressed_data_size - @already_read)
11
+
12
+ return if eof?
13
+
14
+ available = @compressed_data_size - @already_read
15
+
16
+ return if available.zero?
17
+
18
+ n_bytes = available if n_bytes > available
19
+
20
+ return '' if n_bytes.zero?
21
+
22
+ compressed_chunk = @io.read(n_bytes)
23
+ @already_read += compressed_chunk.bytesize
24
+ @zlib_inflater.inflate(compressed_chunk)
25
+ end
26
+
27
+ def eof?
28
+ @zlib_inflater.finished?
29
+ end
30
+ end
@@ -0,0 +1,29 @@
1
+ class ZipTricks::FileReader::StoredReader
2
+ def initialize(from_io, compressed_data_size)
3
+ @io = from_io
4
+ @compressed_data_size = compressed_data_size
5
+ @already_read = 0
6
+ end
7
+
8
+ def extract(n_bytes=nil)
9
+ n_bytes ||= (@compressed_data_size - @already_read)
10
+
11
+ return if eof?
12
+
13
+ available = @compressed_data_size - @already_read
14
+
15
+ return if available.zero?
16
+
17
+ n_bytes = available if n_bytes > available
18
+
19
+ return '' if n_bytes.zero?
20
+
21
+ compressed_chunk = @io.read(n_bytes)
22
+ @already_read += compressed_chunk.bytesize
23
+ compressed_chunk
24
+ end
25
+
26
+ def eof?
27
+ @already_read >= @compressed_data_size
28
+ end
29
+ end
@@ -29,8 +29,7 @@ class ZipTricks::RemoteIO
29
29
  end
30
30
 
31
31
  # Emulates IO#read, but requires the number of bytes to read
32
- # The method will raise if the number of bytes read from remote does
33
- # not match the number requested. The read will be limited to the
32
+ # The read will be limited to the
34
33
  # size of the remote resource relative to the current offset in the IO,
35
34
  # so if you are at offset 0 in the IO of size 10, doing a `read(20)`
36
35
  # will only return you 10 bytes of result, and not raise any exceptions.
@@ -7,55 +7,23 @@
7
7
  # before you use this module.
8
8
  class ZipTricks::RemoteUncap
9
9
 
10
- # Represents a file embedded within a remote ZIP archive
11
- class RemoteZipEntry
12
-
13
- # @return [String] filename of the file in the remote ZIP
14
- attr_accessor :name
15
-
16
- # @return [Fixnum] size in bytes of the file when uncompressed
17
- attr_accessor :size_uncompressed
18
-
19
- # @return [Fixnum] size in bytes of the file when compressed (the segment in the ZIP)
20
- attr_accessor :size_compressed
21
-
22
- # @return [Fixnum] compression method (0 for stored, 8 for deflate)
23
- attr_accessor :compression_method
24
-
25
- # @return [Fixnum] where the file data starts within the ZIP
26
- attr_accessor :starts_at_offset
27
-
28
- # @return [Fixnum] where the file data ends within the zip.
29
- # Will be equal to starts_at_offset if the file is empty
30
- attr_accessor :ends_at_offset
31
-
32
- # Yields the object during initialization
33
- def initialize
34
- yield self
35
- end
36
- end
37
-
38
10
  # @param uri[String] the HTTP(S) URL to read the ZIP footer from
39
- # @return [Array<RemoteZipEntry>] metadata about the files within the remote archive
40
- def self.files_within_zip_at(uri)
11
+ # @param reader_class[Class] which class to use for reading
12
+ # @param options_for_zip_reader[Hash] any additional options to give to {ZipTricks::FileReader} when reading
13
+ # @return [Array<ZipTricks::FileReader::ZipEntry>] metadata about the files within the remote archive
14
+ def self.files_within_zip_at(uri, reader_class: ZipTricks::FileReader, **options_for_zip_reader)
41
15
  fetcher = new(uri)
42
16
  fake_io = ZipTricks::RemoteIO.new(fetcher)
43
- entries = ZipTricks.const_get(:FileReader).read_zip_structure(fake_io)
44
- entries.map do | remote_entry |
45
- RemoteZipEntry.new do | entry |
46
- entry.name = remote_entry.filename
47
- entry.starts_at_offset = remote_entry.compressed_data_offset
48
- entry.size_uncompressed = remote_entry.uncompressed_size
49
- entry.size_compressed = remote_entry.compressed_size
50
- entry.compression_method = remote_entry.storage_mode
51
- end
52
- end
17
+ reader = reader_class.new
18
+ reader.read_zip_structure(io: fake_io, **options_for_zip_reader)
53
19
  end
54
20
 
55
21
  def initialize(uri)
56
22
  @uri = URI(uri)
57
23
  end
58
24
 
25
+ # Only used internally when reading the remote ZIP.
26
+ #
59
27
  # @param range[Range] the HTTP range of data to fetch from remote
60
28
  # @return [String] the response body of the ranged request
61
29
  def request_range(range)
@@ -65,6 +33,8 @@ class ZipTricks::RemoteUncap
65
33
  http.request(request).body
66
34
  end
67
35
 
36
+ # Only used internally when reading the remote ZIP.
37
+ #
68
38
  # @return [Fixnum] the byte size of the ranged request
69
39
  def request_object_size
70
40
  http = Net::HTTP.start(@uri.hostname, @uri.port)
@@ -16,5 +16,5 @@ class ZipTricks::Streamer::Writable
16
16
  #
17
17
  # @param d[String] the binary string to write (part of the uncompressed file)
18
18
  # @return [Fixnum] the number of bytes written
19
- def write(d); @writer << d; end
19
+ def write(d); @writer << d; d.bytesize; end
20
20
  end
data/spec/spec_helper.rb CHANGED
@@ -7,6 +7,17 @@ require 'digest'
7
7
  require 'fileutils'
8
8
  require 'shellwords'
9
9
  require 'zip'
10
+ require 'delegate'
11
+
12
+ class ReadMonitor < SimpleDelegator
13
+ def read(*)
14
+ super.tap { @num_reads ||= 0; @num_reads += 1 }
15
+ end
16
+
17
+ def num_reads
18
+ @num_reads || 0
19
+ end
20
+ end
10
21
 
11
22
  module Keepalive
12
23
  # Travis-CI kills the build if it does not receive output on standard out or standard error
@@ -45,7 +56,7 @@ module ZipInspection
45
56
  $zip_inspection_buf ||= StringIO.new
46
57
  $zip_inspection_buf.puts "\n"
47
58
  $zip_inspection_buf.puts "Inspecting ZIP output of #{inspect}." # The only way to get at the RSpec example without using the block argument
48
- $zip_inspection_buf.puts "Be aware that the zipinfo version on OSX is too old to deal with Zip6."
59
+ $zip_inspection_buf.puts "Be aware that the zipinfo version on OSX is too old to deal with Zip64."
49
60
  escaped_cmd = Shellwords.join([zipinfo_path, '-tlhvz', path_to_zip])
50
61
  $zip_inspection_buf.puts `#{escaped_cmd}`
51
62
  end
@@ -1,48 +1,142 @@
1
1
  require 'spec_helper'
2
-
3
2
  describe ZipTricks::FileReader do
4
- it 'reads and uncompresses the file written deflated with data descriptors' do
5
- zipfile = StringIO.new
6
- tolstoy = File.read(__dir__ + '/war-and-peace.txt')
7
- tolstoy.force_encoding(Encoding::BINARY)
8
-
9
- ZipTricks::Streamer.open(zipfile) do |zip|
10
- zip.write_deflated_file('war-and-peace.txt') do |sink|
11
- sink << tolstoy
3
+
4
+ describe 'with an end-to-end ZIP file to read' do
5
+ it 'reads and uncompresses the file written deflated with data descriptors' do
6
+ zipfile = StringIO.new
7
+ tolstoy = File.read(__dir__ + '/war-and-peace.txt')
8
+ tolstoy.force_encoding(Encoding::BINARY)
9
+
10
+ ZipTricks::Streamer.open(zipfile) do |zip|
11
+ zip.write_deflated_file('war-and-peace.txt') do |sink|
12
+ sink << tolstoy
13
+ end
12
14
  end
15
+
16
+ entries = described_class.read_zip_structure(io: zipfile)
17
+ expect(entries.length).to eq(1)
18
+
19
+ entry = entries.first
20
+
21
+ readback = ''
22
+ reader = entry.extractor_from(zipfile)
23
+ readback << reader.extract(10) until reader.eof?
24
+
25
+ expect(readback.bytesize).to eq(tolstoy.bytesize)
26
+ expect(readback[0..10]).to eq(tolstoy[0..10])
27
+ expect(readback[-10..-1]).to eq(tolstoy[-10..-1])
13
28
  end
14
29
 
15
- entries = described_class.read_zip_structure(zipfile)
16
- expect(entries.length).to eq(1)
30
+ it 'performs local file header reads by default' do
31
+ zipfile = StringIO.new
32
+ tolstoy = File.read(__dir__ + '/war-and-peace.txt')
33
+ tolstoy.force_encoding(Encoding::BINARY)
17
34
 
18
- entry = entries.first
35
+ ZipTricks::Streamer.open(zipfile) do |zip|
36
+ 40.times do |i|
37
+ zip.write_deflated_file('war-and-peace-%d.txt' % i) { |sink| sink << tolstoy }
38
+ end
39
+ end
40
+ zipfile.rewind
41
+
42
+ read_monitor = ReadMonitor.new(zipfile)
43
+ entries = described_class.read_zip_structure(io: read_monitor, read_local_headers: true)
44
+ expect(read_monitor.num_reads).to eq(44)
45
+ end
19
46
 
20
- readback = ''
21
- reader = entry.extractor_from(zipfile)
22
- readback << reader.extract(10) until reader.eof?
47
+ it 'performs local file header reads when `read_local_headers` is set to true' do
48
+ zipfile = StringIO.new
49
+ tolstoy = File.read(__dir__ + '/war-and-peace.txt')
50
+ tolstoy.force_encoding(Encoding::BINARY)
23
51
 
24
- expect(readback.bytesize).to eq(tolstoy.bytesize)
25
- expect(readback[0..10]).to eq(tolstoy[0..10])
26
- expect(readback[-10..-1]).to eq(tolstoy[-10..-1])
27
- end
52
+ ZipTricks::Streamer.open(zipfile) do |zip|
53
+ 40.times do |i|
54
+ zip.write_deflated_file('war-and-peace-%d.txt' % i) { |sink| sink << tolstoy }
55
+ end
56
+ end
57
+ zipfile.rewind
58
+
59
+ read_monitor = ReadMonitor.new(zipfile)
60
+ entries = described_class.read_zip_structure(io: read_monitor, read_local_headers: true)
61
+ expect(read_monitor.num_reads).to eq(44)
28
62
 
29
- it 'reads the file written stored with data descriptors' do
30
- zipfile = StringIO.new
31
- tolstoy = File.read(__dir__ + '/war-and-peace.txt')
32
- ZipTricks::Streamer.open(zipfile) do |zip|
33
- zip.write_stored_file('war-and-peace.txt') do |sink|
34
- sink << tolstoy
63
+ expect(entries.length).to eq(40)
64
+ entry = entries.first
65
+ expect(entry).to be_known_offset
66
+ end
67
+
68
+ it 'performs a limited number of reads when `read_local_headers` is set to false' do
69
+ zipfile = StringIO.new
70
+ tolstoy = File.read(__dir__ + '/war-and-peace.txt')
71
+ tolstoy.force_encoding(Encoding::BINARY)
72
+
73
+ ZipTricks::Streamer.open(zipfile) do |zip|
74
+ 40.times do |i|
75
+ zip.write_deflated_file('war-and-peace-%d.txt' % i) { |sink| sink << tolstoy }
76
+ end
35
77
  end
78
+ zipfile.rewind
79
+ read_monitor = ReadMonitor.new(zipfile)
80
+
81
+ entries = described_class.read_zip_structure(io: read_monitor, read_local_headers: false)
82
+
83
+ expect(read_monitor.num_reads).to eq(4)
84
+ expect(entries.length).to eq(40)
85
+ entry = entries.first
86
+ expect(entry).not_to be_known_offset
87
+ expect {
88
+ entry.compressed_data_offset
89
+ }.to raise_error(/read/)
36
90
  end
37
91
 
38
- entries = described_class.read_zip_structure(zipfile)
39
- expect(entries.length).to eq(1)
92
+ it 'reads the file written stored with data descriptors' do
93
+ zipfile = StringIO.new
94
+ tolstoy = File.read(__dir__ + '/war-and-peace.txt')
95
+ ZipTricks::Streamer.open(zipfile) do |zip|
96
+ zip.write_stored_file('war-and-peace.txt') do |sink|
97
+ sink << tolstoy
98
+ end
99
+ end
40
100
 
41
- entry = entries.first
101
+ entries = described_class.read_zip_structure(io: zipfile)
102
+ expect(entries.length).to eq(1)
42
103
 
43
- readback = entry.extractor_from(zipfile).extract
44
- expect(readback.bytesize).to eq(tolstoy.bytesize)
45
- expect(readback[0..10]).to eq(tolstoy[0..10])
104
+ entry = entries.first
105
+
106
+ readback = entry.extractor_from(zipfile).extract
107
+ expect(readback.bytesize).to eq(tolstoy.bytesize)
108
+ expect(readback[0..10]).to eq(tolstoy[0..10])
109
+ end
110
+ end
111
+
112
+ describe '#get_compressed_data_offset' do
113
+ it 'reads the offset for an entry having Zip64 extra fields' do
114
+ w = ZipTricks::ZipWriter.new
115
+ out = StringIO.new
116
+ out << Random.new.bytes(7656177)
117
+ w.write_local_file_header(io: out, filename: 'some file',
118
+ compressed_size: 0xFFFFFFFF + 5, uncompressed_size: 0xFFFFFFFFF, crc32: 123, gp_flags: 4,
119
+ mtime: Time.now, storage_mode: 8)
120
+
121
+ out.rewind
122
+
123
+ compressed_data_offset = subject.get_compressed_data_offset(io: out, local_file_header_offset: 7656177)
124
+ expect(compressed_data_offset).to eq(7656236)
125
+ end
126
+
127
+ it 'reads the offset for an entry having a long name' do
128
+ w = ZipTricks::ZipWriter.new
129
+ out = StringIO.new
130
+ out << Random.new.bytes(7)
131
+ w.write_local_file_header(io: out, filename: 'This is a file with a ridiculously long name.doc',
132
+ compressed_size: 10, uncompressed_size: 15, crc32: 123, gp_flags: 4,
133
+ mtime: Time.now, storage_mode: 8)
134
+
135
+ out.rewind
136
+
137
+ compressed_data_offset = subject.get_compressed_data_offset(io: out, local_file_header_offset: 7)
138
+ expect(compressed_data_offset).to eq(85)
139
+ end
46
140
  end
47
141
 
48
142
  it 'is able to latch to the EOCD location even if the signature for the EOCD record appears all over the ZIP' do
@@ -63,7 +157,7 @@ describe ZipTricks::FileReader do
63
157
  central_directory_size: z.tell - where, num_files_in_archive: 1, comment: evil_str)
64
158
 
65
159
  z.rewind
66
- entries = described_class.read_zip_structure(z)
160
+ entries = described_class.read_zip_structure(io: z)
67
161
  expect(entries.length).to eq(1)
68
162
  end
69
163
 
@@ -16,19 +16,10 @@ describe ZipTricks::RemoteUncap, webmock: true do
16
16
  payload2 << Random.new.bytes(1024 * 1024 * 3)
17
17
  payload2.flush; payload2.rewind
18
18
 
19
- payload1_crc = Zlib.crc32(payload1.read).tap { payload1.rewind }
20
- payload2_crc = Zlib.crc32(payload2.read).tap { payload2.rewind }
21
-
22
19
  File.open('temp.zip', 'wb') do |f|
23
20
  ZipTricks::Streamer.open(f) do | zip |
24
- zip.add_stored_entry(filename: 'first-file.bin', size: payload1.size, crc32: payload1_crc)
25
- while blob = payload1.read(1024 * 5)
26
- zip << blob
27
- end
28
- zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
29
- while blob = payload2.read(1024 * 5)
30
- zip << blob
31
- end
21
+ zip.write_stored_file('first-file.bin') { |w| IO.copy_stream(payload1, w) }
22
+ zip.write_stored_file('second-file.bin') { |w| IO.copy_stream(payload2, w) }
32
23
  end
33
24
  end
34
25
  payload1.rewind; payload2.rewind
@@ -53,17 +44,17 @@ describe ZipTricks::RemoteUncap, webmock: true do
53
44
 
54
45
  first, second = *files
55
46
 
56
- expect(first.name).to eq('first-file.bin')
57
- expect(first.size_uncompressed).to eq(payload1.size)
47
+ expect(first.filename).to eq('first-file.bin')
48
+ expect(first.uncompressed_size).to eq(payload1.size)
58
49
  File.open('temp.zip', 'rb') do |readback|
59
- readback.seek(first.starts_at_offset, IO::SEEK_SET)
50
+ readback.seek(first.compressed_data_offset, IO::SEEK_SET)
60
51
  expect(readback.read(12)).to eq(payload1.read(12))
61
52
  end
62
53
 
63
- expect(second.name).to eq('second-file.bin')
64
- expect(second.size_uncompressed).to eq(payload2.size)
54
+ expect(second.filename).to eq('second-file.bin')
55
+ expect(second.uncompressed_size).to eq(payload2.size)
65
56
  File.open('temp.zip', 'rb') do |readback|
66
- readback.seek(second.starts_at_offset, IO::SEEK_SET)
57
+ readback.seek(second.compressed_data_offset, IO::SEEK_SET)
67
58
  expect(readback.read(12)).to eq(payload2.read(12))
68
59
  end
69
60
  end
@@ -79,50 +70,31 @@ describe ZipTricks::RemoteUncap, webmock: true do
79
70
  payload1_crc = Zlib.crc32(payload1.read).tap { payload1.rewind }
80
71
  payload2_crc = Zlib.crc32(payload2.read).tap { payload2.rewind }
81
72
 
82
- File.open('temp.zip', 'wb') do |f|
83
- ZipTricks::Streamer.open(f) do | zip |
84
- zip.add_stored_entry(filename: 'first-file.bin', size: payload1.size, crc32: payload1_crc)
85
- zip << '' # It is empty, so a read() would return nil
86
- zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
87
- while blob = payload2.read(1024 * 5)
88
- zip << blob
89
- end
90
- end
73
+ readable_zip = Tempfile.new 'somezip'
74
+ ZipTricks::Streamer.open(readable_zip) do | zip |
75
+ zip.add_stored_entry(filename: 'first-file-zero-size.bin', size: payload1.size, crc32: payload1_crc)
76
+ zip.write_stored_file('second-file.bin') {|w| IO.copy_stream(payload2, w) }
91
77
  end
92
- payload1.rewind; payload2.rewind
93
-
94
- expect(File).to be_exist('temp.zip')
78
+ readable_zip.flush; readable_zip.rewind
95
79
 
96
80
  allow_any_instance_of(described_class).to receive(:request_object_size) {
97
- File.size('temp.zip')
81
+ readable_zip.size
98
82
  }
99
83
  allow_any_instance_of(described_class).to receive(:request_range) {|_instance, range|
100
- File.open('temp.zip', 'rb') do |f|
101
- f.seek(range.begin)
102
- f.read(range.end - range.begin + 1)
103
- end
84
+ readable_zip.seek(range.begin, IO::SEEK_SET)
85
+ readable_zip.read(range.end - range.begin + 1)
104
86
  }
105
87
 
106
88
  payload1.rewind; payload2.rewind
107
89
 
108
- files = described_class.files_within_zip_at('http://fake.example.com')
109
- expect(files).to be_kind_of(Array)
110
- expect(files.length).to eq(2)
111
-
112
- first, second = *files
113
-
114
- expect(first.name).to eq('first-file.bin')
115
- expect(first.size_uncompressed).to eq(payload1.size)
116
- File.open('temp.zip', 'rb') do |readback|
117
- readback.seek(first.starts_at_offset, IO::SEEK_SET)
118
- expect(readback.read(0)).to eq(payload1.read(0))
119
- end
90
+ first, second = described_class.files_within_zip_at('http://fake.example.com')
120
91
 
121
- expect(second.name).to eq('second-file.bin')
122
- expect(second.size_uncompressed).to eq(payload2.size)
123
- File.open('temp.zip', 'rb') do |readback|
124
- readback.seek(second.starts_at_offset, IO::SEEK_SET)
125
- expect(readback.read(12)).to eq(payload2.read(12))
126
- end
92
+ expect(first.filename).to eq('first-file-zero-size.bin')
93
+ expect(first.compressed_size).to be_zero
94
+
95
+ expect(second.filename).to eq('second-file.bin')
96
+ expect(second.uncompressed_size).to eq(payload2.size)
97
+ readable_zip.seek(second.compressed_data_offset, IO::SEEK_SET)
98
+ expect(readable_zip.read(12)).to eq(payload2.read(12))
127
99
  end
128
100
  end
@@ -0,0 +1,27 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe ZipTricks::Streamer::Writable do
4
+ describe '#<<' do
5
+ it 'writes the given data to the destination and returns self' do
6
+ buf = StringIO.new
7
+ subject = described_class.new(buf)
8
+
9
+ result = subject << 'hello!'
10
+
11
+ expect(buf.string).to eq('hello!')
12
+ expect(result).to eq(subject)
13
+ end
14
+ end
15
+
16
+ describe '#write' do
17
+ it 'writes the given data to the destination and returns the number of bytes written' do
18
+ buf = StringIO.new
19
+ subject = described_class.new(buf)
20
+
21
+ result = subject.write('hello!')
22
+
23
+ expect(buf.string).to eq('hello!')
24
+ expect(result).to eq(6)
25
+ end
26
+ end
27
+ end
data/zip_tricks.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: zip_tricks 3.1.1 ruby lib
5
+ # stub: zip_tricks 4.0.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "zip_tricks"
9
- s.version = "3.1.1"
9
+ s.version = "4.0.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Julik Tarkhanov"]
14
- s.date = "2016-08-17"
14
+ s.date = "2016-08-19"
15
15
  s.description = "Makes rubyzip stream, for real"
16
16
  s.email = "me@julik.nl"
17
17
  s.extra_rdoc_files = [
@@ -36,6 +36,8 @@ Gem::Specification.new do |s|
36
36
  "lib/zip_tricks/block_deflate.rb",
37
37
  "lib/zip_tricks/block_write.rb",
38
38
  "lib/zip_tricks/file_reader.rb",
39
+ "lib/zip_tricks/file_reader/inflating_reader.rb",
40
+ "lib/zip_tricks/file_reader/stored_reader.rb",
39
41
  "lib/zip_tricks/null_writer.rb",
40
42
  "lib/zip_tricks/rack_body.rb",
41
43
  "lib/zip_tricks/remote_io.rb",
@@ -59,6 +61,7 @@ Gem::Specification.new do |s|
59
61
  "spec/zip_tricks/remote_uncap_spec.rb",
60
62
  "spec/zip_tricks/size_estimator_spec.rb",
61
63
  "spec/zip_tricks/stream_crc32_spec.rb",
64
+ "spec/zip_tricks/streamer/writable_spec.rb",
62
65
  "spec/zip_tricks/streamer_spec.rb",
63
66
  "spec/zip_tricks/war-and-peace.txt",
64
67
  "spec/zip_tricks/write_and_tell_spec.rb",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zip_tricks
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.1
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julik Tarkhanov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-17 00:00:00.000000000 Z
11
+ date: 2016-08-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -187,6 +187,8 @@ files:
187
187
  - lib/zip_tricks/block_deflate.rb
188
188
  - lib/zip_tricks/block_write.rb
189
189
  - lib/zip_tricks/file_reader.rb
190
+ - lib/zip_tricks/file_reader/inflating_reader.rb
191
+ - lib/zip_tricks/file_reader/stored_reader.rb
190
192
  - lib/zip_tricks/null_writer.rb
191
193
  - lib/zip_tricks/rack_body.rb
192
194
  - lib/zip_tricks/remote_io.rb
@@ -210,6 +212,7 @@ files:
210
212
  - spec/zip_tricks/remote_uncap_spec.rb
211
213
  - spec/zip_tricks/size_estimator_spec.rb
212
214
  - spec/zip_tricks/stream_crc32_spec.rb
215
+ - spec/zip_tricks/streamer/writable_spec.rb
213
216
  - spec/zip_tricks/streamer_spec.rb
214
217
  - spec/zip_tricks/war-and-peace.txt
215
218
  - spec/zip_tricks/write_and_tell_spec.rb