zip_tricks 3.1.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/zip_tricks.rb +1 -1
- data/lib/zip_tricks/file_reader.rb +104 -114
- data/lib/zip_tricks/file_reader/inflating_reader.rb +30 -0
- data/lib/zip_tricks/file_reader/stored_reader.rb +29 -0
- data/lib/zip_tricks/remote_io.rb +1 -2
- data/lib/zip_tricks/remote_uncap.rb +10 -40
- data/lib/zip_tricks/streamer/writable.rb +1 -1
- data/spec/spec_helper.rb +12 -1
- data/spec/zip_tricks/file_reader_spec.rb +126 -32
- data/spec/zip_tricks/remote_uncap_spec.rb +24 -52
- data/spec/zip_tricks/streamer/writable_spec.rb +27 -0
- data/zip_tricks.gemspec +6 -3
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b478a7ffbae2dbb20270a1e4c4b63f495feef87
|
4
|
+
data.tar.gz: d9a8ca3a1596ee653fcf2e2b9153a9e59bd912fc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 689d25f85a79987750fe3bbdf845cfe76c987713709a3d7fc8d2bc944a570958168c2b6c38a6f657b254d499052c079aad84c85b6421b9c228740dd3c9f79903
|
7
|
+
data.tar.gz: 434ffe473e5b0339b40ead5437e070cc1bd6dcce910db58e516a207806cd0eab16f86cc04d342201e2f31f121438308351239d923f4b4e5311850755c77ce339
|
data/lib/zip_tricks.rb
CHANGED
@@ -47,71 +47,18 @@ require 'stringio'
|
|
47
47
|
# and so on, and sets these entries up with the absolute _offsets_ into the source file/IO object.
|
48
48
|
# These offsets can then be used to extract the actual compressed data of the files and to expand it.
|
49
49
|
class ZipTricks::FileReader
|
50
|
+
require_relative 'file_reader/stored_reader'
|
51
|
+
require_relative 'file_reader/inflating_reader'
|
52
|
+
|
50
53
|
ReadError = Class.new(StandardError)
|
51
54
|
UnsupportedFeature = Class.new(StandardError)
|
52
55
|
InvalidStructure = Class.new(ReadError)
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
@io = from_io
|
57
|
-
@compressed_data_size = compressed_data_size
|
58
|
-
@already_read = 0
|
59
|
-
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
60
|
-
end
|
61
|
-
|
62
|
-
def extract(n_bytes=nil)
|
63
|
-
n_bytes ||= (@compressed_data_size - @already_read)
|
64
|
-
|
65
|
-
return if eof?
|
66
|
-
|
67
|
-
available = @compressed_data_size - @already_read
|
68
|
-
|
69
|
-
return if available.zero?
|
70
|
-
|
71
|
-
n_bytes = available if n_bytes > available
|
72
|
-
|
73
|
-
return '' if n_bytes.zero?
|
74
|
-
|
75
|
-
compressed_chunk = @io.read(n_bytes)
|
76
|
-
@already_read += compressed_chunk.bytesize
|
77
|
-
@zlib_inflater.inflate(compressed_chunk)
|
78
|
-
end
|
79
|
-
|
80
|
-
def eof?
|
81
|
-
@zlib_inflater.finished?
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
class StoredReader
|
86
|
-
def initialize(from_io, compressed_data_size)
|
87
|
-
@io = from_io
|
88
|
-
@compressed_data_size = compressed_data_size
|
89
|
-
@already_read = 0
|
90
|
-
end
|
91
|
-
|
92
|
-
def extract(n_bytes=nil)
|
93
|
-
n_bytes ||= (@compressed_data_size - @already_read)
|
94
|
-
|
95
|
-
return if eof?
|
96
|
-
|
97
|
-
available = @compressed_data_size - @already_read
|
98
|
-
|
99
|
-
return if available.zero?
|
100
|
-
|
101
|
-
n_bytes = available if n_bytes > available
|
102
|
-
|
103
|
-
return '' if n_bytes.zero?
|
104
|
-
|
105
|
-
compressed_chunk = @io.read(n_bytes)
|
106
|
-
@already_read += compressed_chunk.bytesize
|
107
|
-
compressed_chunk
|
108
|
-
end
|
109
|
-
|
110
|
-
def eof?
|
111
|
-
@already_read >= @compressed_data_size
|
56
|
+
LocalHeaderPending = Class.new(StandardError) do
|
57
|
+
def message
|
58
|
+
"The compressed data offset is not available (local header has not been read)"
|
112
59
|
end
|
113
60
|
end
|
114
|
-
|
61
|
+
|
115
62
|
private_constant :StoredReader, :InflatingReader
|
116
63
|
|
117
64
|
# Represents a file within the ZIP archive being read
|
@@ -162,13 +109,9 @@ class ZipTricks::FileReader
|
|
162
109
|
# @return [String] the file comment
|
163
110
|
attr_accessor :comment
|
164
111
|
|
165
|
-
# @return [Fixnum] at what offset you should start reading
|
166
|
-
# for the compressed data in your original IO object
|
167
|
-
attr_accessor :compressed_data_offset
|
168
|
-
|
169
112
|
# Returns a reader for the actual compressed data of the entry.
|
170
113
|
#
|
171
|
-
# reader = entry.
|
114
|
+
# reader = entry.extractor_from(source_file)
|
172
115
|
# outfile << reader.extract(512 * 1024) until reader.eof?
|
173
116
|
#
|
174
117
|
# @return [#extract(n_bytes), #eof?] the reader for the data
|
@@ -180,17 +123,50 @@ class ZipTricks::FileReader
|
|
180
123
|
when 0
|
181
124
|
StoredReader.new(from_io, compressed_size)
|
182
125
|
else
|
183
|
-
raise "Unsupported storage mode for reading
|
126
|
+
raise UnsupportedFeature, "Unsupported storage mode for reading - %d" % storage_mode
|
184
127
|
end
|
185
128
|
end
|
129
|
+
|
130
|
+
# @return [Fixnum] at what offset you should start reading
|
131
|
+
# for the compressed data in your original IO object
|
132
|
+
def compressed_data_offset
|
133
|
+
@compressed_data_offset or raise LocalHeaderPending
|
134
|
+
end
|
135
|
+
|
136
|
+
# Tells whether the compressed data offset is already known for this entry
|
137
|
+
# @return [Boolean]
|
138
|
+
def known_offset?
|
139
|
+
!@compressed_data_offset.nil?
|
140
|
+
end
|
141
|
+
|
142
|
+
# Sets the offset at which the compressed data for this file starts in the ZIP.
|
143
|
+
# By default, the value will be set by the Reader for you. If you use delayed
|
144
|
+
# reading, you need to set it by using the `get_compressed_data_offset` on the Reader:
|
145
|
+
#
|
146
|
+
# entry.compressed_data_offset = reader.get_compressed_data_offset(io: file,
|
147
|
+
# local_file_header_offset: entry.local_header_offset)
|
148
|
+
def compressed_data_offset=(offset)
|
149
|
+
@compressed_data_offset = offset.to_i
|
150
|
+
end
|
186
151
|
end
|
187
152
|
|
188
153
|
# Parse an IO handle to a ZIP archive into an array of Entry objects.
|
189
154
|
#
|
190
155
|
# @param io[#tell, #seek, #read, #size] an IO-ish object
|
191
|
-
# @param read_local_headers[Boolean] whether
|
156
|
+
# @param read_local_headers[Boolean] whether the local headers must be read upfront. When reading
|
157
|
+
# a locally available ZIP file this option will not have much use since the small reads from
|
158
|
+
# the file handle are not going to be that important. However, if you are using remote reads
|
159
|
+
# to decipher a ZIP file located on an HTTP server, the operation _must_ perform an HTTP
|
160
|
+
# request for _each entry in the ZIP file_ to determine where the actual file data starts.
|
161
|
+
# This, for a ZIP archive of 1000 files, will incur 1000 extra HTTP requests - which you might
|
162
|
+
# not want to perform upfront, or - at least - not want to perform _at once_. When the option is
|
163
|
+
# set to `false`, you will be getting instances of `LazyEntry` instead of `Entry`. Those objects
|
164
|
+
# will raise an exception when you attempt to access their compressed data offset in the ZIP
|
165
|
+
# (since the reads have not been performed yet). As a rule, this option can be left in it's
|
166
|
+
# default setting (`true`) unless you want to _only_ read the central directory, or you need
|
167
|
+
# to limit the number of HTTP requests.
|
192
168
|
# @return [Array<Entry>] an array of entries within the ZIP being parsed
|
193
|
-
def read_zip_structure(io
|
169
|
+
def read_zip_structure(io:, read_local_headers: true)
|
194
170
|
zip_file_size = io.size
|
195
171
|
eocd_offset = get_eocd_offset(io, zip_file_size)
|
196
172
|
|
@@ -213,24 +189,68 @@ class ZipTricks::FileReader
|
|
213
189
|
read_cdir_entry(central_directory_io)
|
214
190
|
end
|
215
191
|
|
216
|
-
entries
|
217
|
-
|
218
|
-
|
219
|
-
entry.compressed_data_offset = find_compressed_data_start_offset(io, entry.local_file_header_offset)
|
220
|
-
end
|
221
|
-
end
|
192
|
+
read_local_headers(entries, io) if read_local_headers
|
193
|
+
|
194
|
+
entries
|
222
195
|
end
|
223
196
|
|
197
|
+
# Get the offset in the IO at which the actual compressed data of the file starts within the ZIP.
|
198
|
+
# The method will eager-read the entire local header for the file (the maximum size the local header may use),
|
199
|
+
# starting at the given offset, and will then compute its size. That size plus the local header offset
|
200
|
+
# given will be the compressed data offset of the entry (read starting at this offset to get the data).
|
201
|
+
#
|
202
|
+
# @param io[#seek, #read] an IO-ish object the ZIP file can be read from
|
203
|
+
# @param local_header_offset[Fixnum] absolute offset (0-based) where the local file header is supposed to begin
|
204
|
+
# @return [Fixnum] absolute offset (0-based) of where the compressed data begins for this file within the ZIP
|
205
|
+
def get_compressed_data_offset(io:, local_file_header_offset:)
|
206
|
+
seek(io, local_file_header_offset)
|
207
|
+
|
208
|
+
# Reading in bulk is cheaper - grab the maximum length of the local header,
|
209
|
+
# including any headroom
|
210
|
+
local_file_header_str_plus_headroom = io.read(MAX_LOCAL_HEADER_SIZE)
|
211
|
+
io_starting_at_local_header = StringIO.new(local_file_header_str_plus_headroom)
|
212
|
+
|
213
|
+
assert_signature(io_starting_at_local_header, 0x04034b50)
|
214
|
+
|
215
|
+
# The rest is unreliable, and we have that information from the central directory already.
|
216
|
+
# So just skip over it to get at the offset where the compressed data begins
|
217
|
+
skip_ahead_2(io_starting_at_local_header) # Version needed to extract
|
218
|
+
skip_ahead_2(io_starting_at_local_header) # gp flags
|
219
|
+
skip_ahead_2(io_starting_at_local_header) # storage mode
|
220
|
+
skip_ahead_2(io_starting_at_local_header) # dos time
|
221
|
+
skip_ahead_2(io_starting_at_local_header) # dos date
|
222
|
+
skip_ahead_4(io_starting_at_local_header) # CRC32
|
223
|
+
|
224
|
+
skip_ahead_4(io_starting_at_local_header) # Comp size
|
225
|
+
skip_ahead_4(io_starting_at_local_header) # Uncomp size
|
226
|
+
|
227
|
+
filename_size = read_2b(io_starting_at_local_header)
|
228
|
+
extra_size = read_2b(io_starting_at_local_header)
|
229
|
+
|
230
|
+
skip_ahead_n(io_starting_at_local_header, filename_size)
|
231
|
+
skip_ahead_n(io_starting_at_local_header, extra_size)
|
232
|
+
|
233
|
+
local_file_header_offset + io_starting_at_local_header.tell
|
234
|
+
end
|
235
|
+
|
224
236
|
# Parse an IO handle to a ZIP archive into an array of Entry objects.
|
225
237
|
#
|
226
|
-
# @param
|
238
|
+
# @param options[Hash] any options the instance method of the same name accepts
|
227
239
|
# @return [Array<Entry>] an array of entries within the ZIP being parsed
|
228
|
-
def self.read_zip_structure(
|
229
|
-
new.read_zip_structure(
|
240
|
+
def self.read_zip_structure(**options)
|
241
|
+
new.read_zip_structure(**options)
|
230
242
|
end
|
231
243
|
|
232
244
|
private
|
233
245
|
|
246
|
+
def read_local_headers(entries, io)
|
247
|
+
entries.each_with_index do |entry, i|
|
248
|
+
log { 'Reading the local header for entry %d at offset %d' % [i, entry.local_file_header_offset] }
|
249
|
+
off = get_compressed_data_offset(io: io, local_file_header_offset: entry.local_file_header_offset)
|
250
|
+
entry.compressed_data_offset = off
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
234
254
|
def skip_ahead_2(io)
|
235
255
|
skip_ahead_n(io, 2)
|
236
256
|
end
|
@@ -287,40 +307,7 @@ class ZipTricks::FileReader
|
|
287
307
|
read_n(io, 8).unpack(C_Qe).shift
|
288
308
|
end
|
289
309
|
|
290
|
-
def find_compressed_data_start_offset(file_io, local_header_offset)
|
291
|
-
seek(file_io, local_header_offset)
|
292
|
-
|
293
|
-
# Reading in bulk is cheaper - grab the maximum length of the local header, including
|
294
|
-
# any headroom
|
295
|
-
local_file_header_str_plus_headroom = file_io.read(MAX_LOCAL_HEADER_SIZE)
|
296
|
-
io = StringIO.new(local_file_header_str_plus_headroom)
|
297
|
-
|
298
|
-
assert_signature(io, 0x04034b50)
|
299
|
-
|
300
|
-
# The rest is unreliable, and we have that information from the central directory already.
|
301
|
-
# So just skip over it to get at the offset where the compressed data begins
|
302
|
-
skip_ahead_2(io) # Version needed to extract
|
303
|
-
skip_ahead_2(io) # gp flags
|
304
|
-
skip_ahead_2(io) # storage mode
|
305
|
-
skip_ahead_2(io) # dos time
|
306
|
-
skip_ahead_2(io) # dos date
|
307
|
-
skip_ahead_4(io) # CRC32
|
308
|
-
|
309
|
-
skip_ahead_4(io) # Comp size
|
310
|
-
skip_ahead_4(io) # Uncomp size
|
311
|
-
|
312
|
-
filename_size = read_2b(io)
|
313
|
-
extra_size = read_2b(io)
|
314
|
-
|
315
|
-
skip_ahead_n(io, filename_size)
|
316
|
-
skip_ahead_n(io, extra_size)
|
317
|
-
|
318
|
-
local_header_offset + io.tell
|
319
|
-
end
|
320
|
-
|
321
|
-
|
322
310
|
def read_cdir_entry(io)
|
323
|
-
expected_at = io.tell
|
324
311
|
assert_signature(io, 0x02014b50)
|
325
312
|
ZipEntry.new.tap do |e|
|
326
313
|
e.made_by = read_2b(io)
|
@@ -357,7 +344,9 @@ class ZipTricks::FileReader
|
|
357
344
|
end
|
358
345
|
|
359
346
|
# ...of which we really only need the Zip64 extra
|
360
|
-
if zip64_extra_contents = extra_table[1]
|
347
|
+
if zip64_extra_contents = extra_table[1]
|
348
|
+
# If the Zip64 extra is present, we let it override all
|
349
|
+
# the values fetched from the conventional header
|
361
350
|
zip64_extra = StringIO.new(zip64_extra_contents)
|
362
351
|
log { 'Will read Zip64 extra data for %s, %d bytes' % [e.filename, zip64_extra.size] }
|
363
352
|
# Now here be dragons. The APPNOTE specifies that
|
@@ -533,10 +522,11 @@ class ZipTricks::FileReader
|
|
533
522
|
def num_files_and_central_directory_offset(file_io, eocd_offset)
|
534
523
|
seek(file_io, eocd_offset)
|
535
524
|
|
536
|
-
|
537
|
-
|
538
|
-
|
525
|
+
# The size of the EOCD record is known upfront, so use a strict read
|
526
|
+
eocd_record_str = read_n(file_io, SIZE_OF_USABLE_EOCD_RECORD)
|
527
|
+
io = StringIO.new(eocd_record_str)
|
539
528
|
|
529
|
+
assert_signature(io, 0x06054b50)
|
540
530
|
skip_ahead_2(io) # number_of_this_disk
|
541
531
|
skip_ahead_2(io) # number of the disk with the EOCD record
|
542
532
|
skip_ahead_2(io) # number of entries in the central directory of this disk
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class ZipTricks::FileReader::InflatingReader
|
2
|
+
def initialize(from_io, compressed_data_size)
|
3
|
+
@io = from_io
|
4
|
+
@compressed_data_size = compressed_data_size
|
5
|
+
@already_read = 0
|
6
|
+
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
7
|
+
end
|
8
|
+
|
9
|
+
def extract(n_bytes=nil)
|
10
|
+
n_bytes ||= (@compressed_data_size - @already_read)
|
11
|
+
|
12
|
+
return if eof?
|
13
|
+
|
14
|
+
available = @compressed_data_size - @already_read
|
15
|
+
|
16
|
+
return if available.zero?
|
17
|
+
|
18
|
+
n_bytes = available if n_bytes > available
|
19
|
+
|
20
|
+
return '' if n_bytes.zero?
|
21
|
+
|
22
|
+
compressed_chunk = @io.read(n_bytes)
|
23
|
+
@already_read += compressed_chunk.bytesize
|
24
|
+
@zlib_inflater.inflate(compressed_chunk)
|
25
|
+
end
|
26
|
+
|
27
|
+
def eof?
|
28
|
+
@zlib_inflater.finished?
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class ZipTricks::FileReader::StoredReader
|
2
|
+
def initialize(from_io, compressed_data_size)
|
3
|
+
@io = from_io
|
4
|
+
@compressed_data_size = compressed_data_size
|
5
|
+
@already_read = 0
|
6
|
+
end
|
7
|
+
|
8
|
+
def extract(n_bytes=nil)
|
9
|
+
n_bytes ||= (@compressed_data_size - @already_read)
|
10
|
+
|
11
|
+
return if eof?
|
12
|
+
|
13
|
+
available = @compressed_data_size - @already_read
|
14
|
+
|
15
|
+
return if available.zero?
|
16
|
+
|
17
|
+
n_bytes = available if n_bytes > available
|
18
|
+
|
19
|
+
return '' if n_bytes.zero?
|
20
|
+
|
21
|
+
compressed_chunk = @io.read(n_bytes)
|
22
|
+
@already_read += compressed_chunk.bytesize
|
23
|
+
compressed_chunk
|
24
|
+
end
|
25
|
+
|
26
|
+
def eof?
|
27
|
+
@already_read >= @compressed_data_size
|
28
|
+
end
|
29
|
+
end
|
data/lib/zip_tricks/remote_io.rb
CHANGED
@@ -29,8 +29,7 @@ class ZipTricks::RemoteIO
|
|
29
29
|
end
|
30
30
|
|
31
31
|
# Emulates IO#read, but requires the number of bytes to read
|
32
|
-
# The
|
33
|
-
# not match the number requested. The read will be limited to the
|
32
|
+
# The read will be limited to the
|
34
33
|
# size of the remote resource relative to the current offset in the IO,
|
35
34
|
# so if you are at offset 0 in the IO of size 10, doing a `read(20)`
|
36
35
|
# will only return you 10 bytes of result, and not raise any exceptions.
|
@@ -7,55 +7,23 @@
|
|
7
7
|
# before you use this module.
|
8
8
|
class ZipTricks::RemoteUncap
|
9
9
|
|
10
|
-
# Represents a file embedded within a remote ZIP archive
|
11
|
-
class RemoteZipEntry
|
12
|
-
|
13
|
-
# @return [String] filename of the file in the remote ZIP
|
14
|
-
attr_accessor :name
|
15
|
-
|
16
|
-
# @return [Fixnum] size in bytes of the file when uncompressed
|
17
|
-
attr_accessor :size_uncompressed
|
18
|
-
|
19
|
-
# @return [Fixnum] size in bytes of the file when compressed (the segment in the ZIP)
|
20
|
-
attr_accessor :size_compressed
|
21
|
-
|
22
|
-
# @return [Fixnum] compression method (0 for stored, 8 for deflate)
|
23
|
-
attr_accessor :compression_method
|
24
|
-
|
25
|
-
# @return [Fixnum] where the file data starts within the ZIP
|
26
|
-
attr_accessor :starts_at_offset
|
27
|
-
|
28
|
-
# @return [Fixnum] where the file data ends within the zip.
|
29
|
-
# Will be equal to starts_at_offset if the file is empty
|
30
|
-
attr_accessor :ends_at_offset
|
31
|
-
|
32
|
-
# Yields the object during initialization
|
33
|
-
def initialize
|
34
|
-
yield self
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
10
|
# @param uri[String] the HTTP(S) URL to read the ZIP footer from
|
39
|
-
# @
|
40
|
-
|
11
|
+
# @param reader_class[Class] which class to use for reading
|
12
|
+
# @param options_for_zip_reader[Hash] any additional options to give to {ZipTricks::FileReader} when reading
|
13
|
+
# @return [Array<ZipTricks::FileReader::ZipEntry>] metadata about the files within the remote archive
|
14
|
+
def self.files_within_zip_at(uri, reader_class: ZipTricks::FileReader, **options_for_zip_reader)
|
41
15
|
fetcher = new(uri)
|
42
16
|
fake_io = ZipTricks::RemoteIO.new(fetcher)
|
43
|
-
|
44
|
-
|
45
|
-
RemoteZipEntry.new do | entry |
|
46
|
-
entry.name = remote_entry.filename
|
47
|
-
entry.starts_at_offset = remote_entry.compressed_data_offset
|
48
|
-
entry.size_uncompressed = remote_entry.uncompressed_size
|
49
|
-
entry.size_compressed = remote_entry.compressed_size
|
50
|
-
entry.compression_method = remote_entry.storage_mode
|
51
|
-
end
|
52
|
-
end
|
17
|
+
reader = reader_class.new
|
18
|
+
reader.read_zip_structure(io: fake_io, **options_for_zip_reader)
|
53
19
|
end
|
54
20
|
|
55
21
|
def initialize(uri)
|
56
22
|
@uri = URI(uri)
|
57
23
|
end
|
58
24
|
|
25
|
+
# Only used internally when reading the remote ZIP.
|
26
|
+
#
|
59
27
|
# @param range[Range] the HTTP range of data to fetch from remote
|
60
28
|
# @return [String] the response body of the ranged request
|
61
29
|
def request_range(range)
|
@@ -65,6 +33,8 @@ class ZipTricks::RemoteUncap
|
|
65
33
|
http.request(request).body
|
66
34
|
end
|
67
35
|
|
36
|
+
# Only used internally when reading the remote ZIP.
|
37
|
+
#
|
68
38
|
# @return [Fixnum] the byte size of the ranged request
|
69
39
|
def request_object_size
|
70
40
|
http = Net::HTTP.start(@uri.hostname, @uri.port)
|
data/spec/spec_helper.rb
CHANGED
@@ -7,6 +7,17 @@ require 'digest'
|
|
7
7
|
require 'fileutils'
|
8
8
|
require 'shellwords'
|
9
9
|
require 'zip'
|
10
|
+
require 'delegate'
|
11
|
+
|
12
|
+
class ReadMonitor < SimpleDelegator
|
13
|
+
def read(*)
|
14
|
+
super.tap { @num_reads ||= 0; @num_reads += 1 }
|
15
|
+
end
|
16
|
+
|
17
|
+
def num_reads
|
18
|
+
@num_reads || 0
|
19
|
+
end
|
20
|
+
end
|
10
21
|
|
11
22
|
module Keepalive
|
12
23
|
# Travis-CI kills the build if it does not receive output on standard out or standard error
|
@@ -45,7 +56,7 @@ module ZipInspection
|
|
45
56
|
$zip_inspection_buf ||= StringIO.new
|
46
57
|
$zip_inspection_buf.puts "\n"
|
47
58
|
$zip_inspection_buf.puts "Inspecting ZIP output of #{inspect}." # The only way to get at the RSpec example without using the block argument
|
48
|
-
$zip_inspection_buf.puts "Be aware that the zipinfo version on OSX is too old to deal with
|
59
|
+
$zip_inspection_buf.puts "Be aware that the zipinfo version on OSX is too old to deal with Zip64."
|
49
60
|
escaped_cmd = Shellwords.join([zipinfo_path, '-tlhvz', path_to_zip])
|
50
61
|
$zip_inspection_buf.puts `#{escaped_cmd}`
|
51
62
|
end
|
@@ -1,48 +1,142 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
|
3
2
|
describe ZipTricks::FileReader do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
3
|
+
|
4
|
+
describe 'with an end-to-end ZIP file to read' do
|
5
|
+
it 'reads and uncompresses the file written deflated with data descriptors' do
|
6
|
+
zipfile = StringIO.new
|
7
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
8
|
+
tolstoy.force_encoding(Encoding::BINARY)
|
9
|
+
|
10
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
11
|
+
zip.write_deflated_file('war-and-peace.txt') do |sink|
|
12
|
+
sink << tolstoy
|
13
|
+
end
|
12
14
|
end
|
15
|
+
|
16
|
+
entries = described_class.read_zip_structure(io: zipfile)
|
17
|
+
expect(entries.length).to eq(1)
|
18
|
+
|
19
|
+
entry = entries.first
|
20
|
+
|
21
|
+
readback = ''
|
22
|
+
reader = entry.extractor_from(zipfile)
|
23
|
+
readback << reader.extract(10) until reader.eof?
|
24
|
+
|
25
|
+
expect(readback.bytesize).to eq(tolstoy.bytesize)
|
26
|
+
expect(readback[0..10]).to eq(tolstoy[0..10])
|
27
|
+
expect(readback[-10..-1]).to eq(tolstoy[-10..-1])
|
13
28
|
end
|
14
29
|
|
15
|
-
|
16
|
-
|
30
|
+
it 'performs local file header reads by default' do
|
31
|
+
zipfile = StringIO.new
|
32
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
33
|
+
tolstoy.force_encoding(Encoding::BINARY)
|
17
34
|
|
18
|
-
|
35
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
36
|
+
40.times do |i|
|
37
|
+
zip.write_deflated_file('war-and-peace-%d.txt' % i) { |sink| sink << tolstoy }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
zipfile.rewind
|
41
|
+
|
42
|
+
read_monitor = ReadMonitor.new(zipfile)
|
43
|
+
entries = described_class.read_zip_structure(io: read_monitor, read_local_headers: true)
|
44
|
+
expect(read_monitor.num_reads).to eq(44)
|
45
|
+
end
|
19
46
|
|
20
|
-
|
21
|
-
|
22
|
-
|
47
|
+
it 'performs local file header reads when `read_local_headers` is set to true' do
|
48
|
+
zipfile = StringIO.new
|
49
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
50
|
+
tolstoy.force_encoding(Encoding::BINARY)
|
23
51
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
52
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
53
|
+
40.times do |i|
|
54
|
+
zip.write_deflated_file('war-and-peace-%d.txt' % i) { |sink| sink << tolstoy }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
zipfile.rewind
|
58
|
+
|
59
|
+
read_monitor = ReadMonitor.new(zipfile)
|
60
|
+
entries = described_class.read_zip_structure(io: read_monitor, read_local_headers: true)
|
61
|
+
expect(read_monitor.num_reads).to eq(44)
|
28
62
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
63
|
+
expect(entries.length).to eq(40)
|
64
|
+
entry = entries.first
|
65
|
+
expect(entry).to be_known_offset
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'performs a limited number of reads when `read_local_headers` is set to false' do
|
69
|
+
zipfile = StringIO.new
|
70
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
71
|
+
tolstoy.force_encoding(Encoding::BINARY)
|
72
|
+
|
73
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
74
|
+
40.times do |i|
|
75
|
+
zip.write_deflated_file('war-and-peace-%d.txt' % i) { |sink| sink << tolstoy }
|
76
|
+
end
|
35
77
|
end
|
78
|
+
zipfile.rewind
|
79
|
+
read_monitor = ReadMonitor.new(zipfile)
|
80
|
+
|
81
|
+
entries = described_class.read_zip_structure(io: read_monitor, read_local_headers: false)
|
82
|
+
|
83
|
+
expect(read_monitor.num_reads).to eq(4)
|
84
|
+
expect(entries.length).to eq(40)
|
85
|
+
entry = entries.first
|
86
|
+
expect(entry).not_to be_known_offset
|
87
|
+
expect {
|
88
|
+
entry.compressed_data_offset
|
89
|
+
}.to raise_error(/read/)
|
36
90
|
end
|
37
91
|
|
38
|
-
|
39
|
-
|
92
|
+
it 'reads the file written stored with data descriptors' do
|
93
|
+
zipfile = StringIO.new
|
94
|
+
tolstoy = File.read(__dir__ + '/war-and-peace.txt')
|
95
|
+
ZipTricks::Streamer.open(zipfile) do |zip|
|
96
|
+
zip.write_stored_file('war-and-peace.txt') do |sink|
|
97
|
+
sink << tolstoy
|
98
|
+
end
|
99
|
+
end
|
40
100
|
|
41
|
-
|
101
|
+
entries = described_class.read_zip_structure(io: zipfile)
|
102
|
+
expect(entries.length).to eq(1)
|
42
103
|
|
43
|
-
|
44
|
-
|
45
|
-
|
104
|
+
entry = entries.first
|
105
|
+
|
106
|
+
readback = entry.extractor_from(zipfile).extract
|
107
|
+
expect(readback.bytesize).to eq(tolstoy.bytesize)
|
108
|
+
expect(readback[0..10]).to eq(tolstoy[0..10])
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe '#get_compressed_data_offset' do
|
113
|
+
it 'reads the offset for an entry having Zip64 extra fields' do
|
114
|
+
w = ZipTricks::ZipWriter.new
|
115
|
+
out = StringIO.new
|
116
|
+
out << Random.new.bytes(7656177)
|
117
|
+
w.write_local_file_header(io: out, filename: 'some file',
|
118
|
+
compressed_size: 0xFFFFFFFF + 5, uncompressed_size: 0xFFFFFFFFF, crc32: 123, gp_flags: 4,
|
119
|
+
mtime: Time.now, storage_mode: 8)
|
120
|
+
|
121
|
+
out.rewind
|
122
|
+
|
123
|
+
compressed_data_offset = subject.get_compressed_data_offset(io: out, local_file_header_offset: 7656177)
|
124
|
+
expect(compressed_data_offset).to eq(7656236)
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'reads the offset for an entry having a long name' do
|
128
|
+
w = ZipTricks::ZipWriter.new
|
129
|
+
out = StringIO.new
|
130
|
+
out << Random.new.bytes(7)
|
131
|
+
w.write_local_file_header(io: out, filename: 'This is a file with a ridiculously long name.doc',
|
132
|
+
compressed_size: 10, uncompressed_size: 15, crc32: 123, gp_flags: 4,
|
133
|
+
mtime: Time.now, storage_mode: 8)
|
134
|
+
|
135
|
+
out.rewind
|
136
|
+
|
137
|
+
compressed_data_offset = subject.get_compressed_data_offset(io: out, local_file_header_offset: 7)
|
138
|
+
expect(compressed_data_offset).to eq(85)
|
139
|
+
end
|
46
140
|
end
|
47
141
|
|
48
142
|
it 'is able to latch to the EOCD location even if the signature for the EOCD record appears all over the ZIP' do
|
@@ -63,7 +157,7 @@ describe ZipTricks::FileReader do
|
|
63
157
|
central_directory_size: z.tell - where, num_files_in_archive: 1, comment: evil_str)
|
64
158
|
|
65
159
|
z.rewind
|
66
|
-
entries = described_class.read_zip_structure(z)
|
160
|
+
entries = described_class.read_zip_structure(io: z)
|
67
161
|
expect(entries.length).to eq(1)
|
68
162
|
end
|
69
163
|
|
@@ -16,19 +16,10 @@ describe ZipTricks::RemoteUncap, webmock: true do
|
|
16
16
|
payload2 << Random.new.bytes(1024 * 1024 * 3)
|
17
17
|
payload2.flush; payload2.rewind
|
18
18
|
|
19
|
-
payload1_crc = Zlib.crc32(payload1.read).tap { payload1.rewind }
|
20
|
-
payload2_crc = Zlib.crc32(payload2.read).tap { payload2.rewind }
|
21
|
-
|
22
19
|
File.open('temp.zip', 'wb') do |f|
|
23
20
|
ZipTricks::Streamer.open(f) do | zip |
|
24
|
-
zip.
|
25
|
-
|
26
|
-
zip << blob
|
27
|
-
end
|
28
|
-
zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
|
29
|
-
while blob = payload2.read(1024 * 5)
|
30
|
-
zip << blob
|
31
|
-
end
|
21
|
+
zip.write_stored_file('first-file.bin') { |w| IO.copy_stream(payload1, w) }
|
22
|
+
zip.write_stored_file('second-file.bin') { |w| IO.copy_stream(payload2, w) }
|
32
23
|
end
|
33
24
|
end
|
34
25
|
payload1.rewind; payload2.rewind
|
@@ -53,17 +44,17 @@ describe ZipTricks::RemoteUncap, webmock: true do
|
|
53
44
|
|
54
45
|
first, second = *files
|
55
46
|
|
56
|
-
expect(first.
|
57
|
-
expect(first.
|
47
|
+
expect(first.filename).to eq('first-file.bin')
|
48
|
+
expect(first.uncompressed_size).to eq(payload1.size)
|
58
49
|
File.open('temp.zip', 'rb') do |readback|
|
59
|
-
readback.seek(first.
|
50
|
+
readback.seek(first.compressed_data_offset, IO::SEEK_SET)
|
60
51
|
expect(readback.read(12)).to eq(payload1.read(12))
|
61
52
|
end
|
62
53
|
|
63
|
-
expect(second.
|
64
|
-
expect(second.
|
54
|
+
expect(second.filename).to eq('second-file.bin')
|
55
|
+
expect(second.uncompressed_size).to eq(payload2.size)
|
65
56
|
File.open('temp.zip', 'rb') do |readback|
|
66
|
-
readback.seek(second.
|
57
|
+
readback.seek(second.compressed_data_offset, IO::SEEK_SET)
|
67
58
|
expect(readback.read(12)).to eq(payload2.read(12))
|
68
59
|
end
|
69
60
|
end
|
@@ -79,50 +70,31 @@ describe ZipTricks::RemoteUncap, webmock: true do
|
|
79
70
|
payload1_crc = Zlib.crc32(payload1.read).tap { payload1.rewind }
|
80
71
|
payload2_crc = Zlib.crc32(payload2.read).tap { payload2.rewind }
|
81
72
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
zip.add_stored_entry(filename: 'second-file.bin', size: payload2.size, crc32: payload2_crc)
|
87
|
-
while blob = payload2.read(1024 * 5)
|
88
|
-
zip << blob
|
89
|
-
end
|
90
|
-
end
|
73
|
+
readable_zip = Tempfile.new 'somezip'
|
74
|
+
ZipTricks::Streamer.open(readable_zip) do | zip |
|
75
|
+
zip.add_stored_entry(filename: 'first-file-zero-size.bin', size: payload1.size, crc32: payload1_crc)
|
76
|
+
zip.write_stored_file('second-file.bin') {|w| IO.copy_stream(payload2, w) }
|
91
77
|
end
|
92
|
-
|
93
|
-
|
94
|
-
expect(File).to be_exist('temp.zip')
|
78
|
+
readable_zip.flush; readable_zip.rewind
|
95
79
|
|
96
80
|
allow_any_instance_of(described_class).to receive(:request_object_size) {
|
97
|
-
|
81
|
+
readable_zip.size
|
98
82
|
}
|
99
83
|
allow_any_instance_of(described_class).to receive(:request_range) {|_instance, range|
|
100
|
-
|
101
|
-
|
102
|
-
f.read(range.end - range.begin + 1)
|
103
|
-
end
|
84
|
+
readable_zip.seek(range.begin, IO::SEEK_SET)
|
85
|
+
readable_zip.read(range.end - range.begin + 1)
|
104
86
|
}
|
105
87
|
|
106
88
|
payload1.rewind; payload2.rewind
|
107
89
|
|
108
|
-
|
109
|
-
expect(files).to be_kind_of(Array)
|
110
|
-
expect(files.length).to eq(2)
|
111
|
-
|
112
|
-
first, second = *files
|
113
|
-
|
114
|
-
expect(first.name).to eq('first-file.bin')
|
115
|
-
expect(first.size_uncompressed).to eq(payload1.size)
|
116
|
-
File.open('temp.zip', 'rb') do |readback|
|
117
|
-
readback.seek(first.starts_at_offset, IO::SEEK_SET)
|
118
|
-
expect(readback.read(0)).to eq(payload1.read(0))
|
119
|
-
end
|
90
|
+
first, second = described_class.files_within_zip_at('http://fake.example.com')
|
120
91
|
|
121
|
-
expect(
|
122
|
-
expect(
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
92
|
+
expect(first.filename).to eq('first-file-zero-size.bin')
|
93
|
+
expect(first.compressed_size).to be_zero
|
94
|
+
|
95
|
+
expect(second.filename).to eq('second-file.bin')
|
96
|
+
expect(second.uncompressed_size).to eq(payload2.size)
|
97
|
+
readable_zip.seek(second.compressed_data_offset, IO::SEEK_SET)
|
98
|
+
expect(readable_zip.read(12)).to eq(payload2.read(12))
|
127
99
|
end
|
128
100
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
|
3
|
+
describe ZipTricks::Streamer::Writable do
|
4
|
+
describe '#<<' do
|
5
|
+
it 'writes the given data to the destination and returns self' do
|
6
|
+
buf = StringIO.new
|
7
|
+
subject = described_class.new(buf)
|
8
|
+
|
9
|
+
result = subject << 'hello!'
|
10
|
+
|
11
|
+
expect(buf.string).to eq('hello!')
|
12
|
+
expect(result).to eq(subject)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe '#write' do
|
17
|
+
it 'writes the given data to the destination and returns the number of bytes written' do
|
18
|
+
buf = StringIO.new
|
19
|
+
subject = described_class.new(buf)
|
20
|
+
|
21
|
+
result = subject.write('hello!')
|
22
|
+
|
23
|
+
expect(buf.string).to eq('hello!')
|
24
|
+
expect(result).to eq(6)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/zip_tricks.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: zip_tricks
|
5
|
+
# stub: zip_tricks 4.0.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "zip_tricks"
|
9
|
-
s.version = "
|
9
|
+
s.version = "4.0.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Julik Tarkhanov"]
|
14
|
-
s.date = "2016-08-
|
14
|
+
s.date = "2016-08-19"
|
15
15
|
s.description = "Makes rubyzip stream, for real"
|
16
16
|
s.email = "me@julik.nl"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -36,6 +36,8 @@ Gem::Specification.new do |s|
|
|
36
36
|
"lib/zip_tricks/block_deflate.rb",
|
37
37
|
"lib/zip_tricks/block_write.rb",
|
38
38
|
"lib/zip_tricks/file_reader.rb",
|
39
|
+
"lib/zip_tricks/file_reader/inflating_reader.rb",
|
40
|
+
"lib/zip_tricks/file_reader/stored_reader.rb",
|
39
41
|
"lib/zip_tricks/null_writer.rb",
|
40
42
|
"lib/zip_tricks/rack_body.rb",
|
41
43
|
"lib/zip_tricks/remote_io.rb",
|
@@ -59,6 +61,7 @@ Gem::Specification.new do |s|
|
|
59
61
|
"spec/zip_tricks/remote_uncap_spec.rb",
|
60
62
|
"spec/zip_tricks/size_estimator_spec.rb",
|
61
63
|
"spec/zip_tricks/stream_crc32_spec.rb",
|
64
|
+
"spec/zip_tricks/streamer/writable_spec.rb",
|
62
65
|
"spec/zip_tricks/streamer_spec.rb",
|
63
66
|
"spec/zip_tricks/war-and-peace.txt",
|
64
67
|
"spec/zip_tricks/write_and_tell_spec.rb",
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zip_tricks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julik Tarkhanov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-08-
|
11
|
+
date: 2016-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -187,6 +187,8 @@ files:
|
|
187
187
|
- lib/zip_tricks/block_deflate.rb
|
188
188
|
- lib/zip_tricks/block_write.rb
|
189
189
|
- lib/zip_tricks/file_reader.rb
|
190
|
+
- lib/zip_tricks/file_reader/inflating_reader.rb
|
191
|
+
- lib/zip_tricks/file_reader/stored_reader.rb
|
190
192
|
- lib/zip_tricks/null_writer.rb
|
191
193
|
- lib/zip_tricks/rack_body.rb
|
192
194
|
- lib/zip_tricks/remote_io.rb
|
@@ -210,6 +212,7 @@ files:
|
|
210
212
|
- spec/zip_tricks/remote_uncap_spec.rb
|
211
213
|
- spec/zip_tricks/size_estimator_spec.rb
|
212
214
|
- spec/zip_tricks/stream_crc32_spec.rb
|
215
|
+
- spec/zip_tricks/streamer/writable_spec.rb
|
213
216
|
- spec/zip_tricks/streamer_spec.rb
|
214
217
|
- spec/zip_tricks/war-and-peace.txt
|
215
218
|
- spec/zip_tricks/write_and_tell_spec.rb
|