zip_tricks 2.8.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/IMPLEMENTATION_DETAILS.md +2 -10
- data/README.md +62 -59
- data/examples/archive_size_estimate.rb +4 -4
- data/examples/rack_application.rb +3 -5
- data/lib/zip_tricks/block_deflate.rb +21 -0
- data/lib/zip_tricks/file_reader.rb +491 -0
- data/lib/zip_tricks/null_writer.rb +7 -2
- data/lib/zip_tricks/rack_body.rb +3 -3
- data/lib/zip_tricks/remote_io.rb +30 -20
- data/lib/zip_tricks/remote_uncap.rb +10 -10
- data/lib/zip_tricks/size_estimator.rb +64 -0
- data/lib/zip_tricks/stream_crc32.rb +2 -2
- data/lib/zip_tricks/streamer/deflated_writer.rb +26 -0
- data/lib/zip_tricks/streamer/entry.rb +21 -0
- data/lib/zip_tricks/streamer/stored_writer.rb +25 -0
- data/lib/zip_tricks/streamer/writable.rb +20 -0
- data/lib/zip_tricks/streamer.rb +172 -66
- data/lib/zip_tricks/zip_writer.rb +346 -0
- data/lib/zip_tricks.rb +1 -4
- data/spec/spec_helper.rb +1 -38
- data/spec/zip_tricks/file_reader_spec.rb +47 -0
- data/spec/zip_tricks/rack_body_spec.rb +2 -2
- data/spec/zip_tricks/remote_io_spec.rb +8 -20
- data/spec/zip_tricks/remote_uncap_spec.rb +4 -4
- data/spec/zip_tricks/size_estimator_spec.rb +31 -0
- data/spec/zip_tricks/streamer_spec.rb +59 -36
- data/spec/zip_tricks/zip_writer_spec.rb +408 -0
- data/zip_tricks.gemspec +20 -14
- metadata +33 -16
- data/lib/zip_tricks/manifest.rb +0 -85
- data/lib/zip_tricks/microzip.rb +0 -339
- data/lib/zip_tricks/stored_size_estimator.rb +0 -44
- data/spec/zip_tricks/manifest_spec.rb +0 -60
- data/spec/zip_tricks/microzip_interop_spec.rb +0 -48
- data/spec/zip_tricks/microzip_spec.rb +0 -546
- data/spec/zip_tricks/stored_size_estimator_spec.rb +0 -22
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zip_tricks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julik Tarkhanov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
- - "~>"
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: '1.1'
|
23
|
-
type: :
|
23
|
+
type: :development
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
@@ -31,19 +31,19 @@ dependencies:
|
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: '1.1'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
|
-
name:
|
34
|
+
name: terminal-table
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
36
36
|
requirements:
|
37
|
-
- - "
|
37
|
+
- - ">="
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version: '
|
40
|
-
type: :
|
39
|
+
version: '0'
|
40
|
+
type: :development
|
41
41
|
prerelease: false
|
42
42
|
version_requirements: !ruby/object:Gem::Requirement
|
43
43
|
requirements:
|
44
|
-
- - "
|
44
|
+
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: '
|
46
|
+
version: '0'
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: range_utils
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -106,6 +106,20 @@ dependencies:
|
|
106
106
|
- - "~>"
|
107
107
|
- !ruby/object:Gem::Version
|
108
108
|
version: 3.2.0
|
109
|
+
- !ruby/object:Gem::Dependency
|
110
|
+
name: coderay
|
111
|
+
requirement: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: '0'
|
116
|
+
type: :development
|
117
|
+
prerelease: false
|
118
|
+
version_requirements: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
109
123
|
- !ruby/object:Gem::Dependency
|
110
124
|
name: yard
|
111
125
|
requirement: !ruby/object:Gem::Requirement
|
@@ -172,30 +186,33 @@ files:
|
|
172
186
|
- lib/zip_tricks.rb
|
173
187
|
- lib/zip_tricks/block_deflate.rb
|
174
188
|
- lib/zip_tricks/block_write.rb
|
175
|
-
- lib/zip_tricks/
|
176
|
-
- lib/zip_tricks/microzip.rb
|
189
|
+
- lib/zip_tricks/file_reader.rb
|
177
190
|
- lib/zip_tricks/null_writer.rb
|
178
191
|
- lib/zip_tricks/rack_body.rb
|
179
192
|
- lib/zip_tricks/remote_io.rb
|
180
193
|
- lib/zip_tricks/remote_uncap.rb
|
181
|
-
- lib/zip_tricks/
|
194
|
+
- lib/zip_tricks/size_estimator.rb
|
182
195
|
- lib/zip_tricks/stream_crc32.rb
|
183
196
|
- lib/zip_tricks/streamer.rb
|
197
|
+
- lib/zip_tricks/streamer/deflated_writer.rb
|
198
|
+
- lib/zip_tricks/streamer/entry.rb
|
199
|
+
- lib/zip_tricks/streamer/stored_writer.rb
|
200
|
+
- lib/zip_tricks/streamer/writable.rb
|
184
201
|
- lib/zip_tricks/write_and_tell.rb
|
202
|
+
- lib/zip_tricks/zip_writer.rb
|
185
203
|
- spec/spec_helper.rb
|
186
204
|
- spec/zip_tricks/block_deflate_spec.rb
|
187
205
|
- spec/zip_tricks/block_write_spec.rb
|
188
|
-
- spec/zip_tricks/
|
189
|
-
- spec/zip_tricks/microzip_interop_spec.rb
|
190
|
-
- spec/zip_tricks/microzip_spec.rb
|
206
|
+
- spec/zip_tricks/file_reader_spec.rb
|
191
207
|
- spec/zip_tricks/rack_body_spec.rb
|
192
208
|
- spec/zip_tricks/remote_io_spec.rb
|
193
209
|
- spec/zip_tricks/remote_uncap_spec.rb
|
194
|
-
- spec/zip_tricks/
|
210
|
+
- spec/zip_tricks/size_estimator_spec.rb
|
195
211
|
- spec/zip_tricks/stream_crc32_spec.rb
|
196
212
|
- spec/zip_tricks/streamer_spec.rb
|
197
213
|
- spec/zip_tricks/war-and-peace.txt
|
198
214
|
- spec/zip_tricks/write_and_tell_spec.rb
|
215
|
+
- spec/zip_tricks/zip_writer_spec.rb
|
199
216
|
- zip_tricks.gemspec
|
200
217
|
homepage: http://github.com/wetransfer/zip_tricks
|
201
218
|
licenses:
|
data/lib/zip_tricks/manifest.rb
DELETED
@@ -1,85 +0,0 @@
|
|
1
|
-
# Helps to estimate archive sizes
|
2
|
-
class ZipTricks::Manifest < Struct.new(:zip_streamer, :io, :part_list)
|
3
|
-
|
4
|
-
# Describes a span within the ZIP bytestream
|
5
|
-
class ZipSpan < Struct.new(:part_type, :byte_range_in_zip, :filename, :additional_metadata)
|
6
|
-
end
|
7
|
-
|
8
|
-
# Builds an array of spans within the ZIP file and computes the size of the resulting archive in bytes.
|
9
|
-
#
|
10
|
-
# zip_spans, bytesize = Manifest.build do | b |
|
11
|
-
# b.add_stored_entry(name: "file.doc", size: 898291)
|
12
|
-
# b.add_compressed_entry(name: "family.tif", size: 89281911, compressed_size: 121908)
|
13
|
-
# end
|
14
|
-
# bytesize #=> ... (Fixnum or Bignum)
|
15
|
-
# zip_spans[0] #=> Manifest::ZipSpan(part_type: :entry_header, byte_range_in_zip: 0..44, ...)
|
16
|
-
# zip_spans[-1] #=> Manifest::ZipSpan(part_type: :central_directory, byte_range_in_zip: 776721..898921, ...)
|
17
|
-
#
|
18
|
-
# @return [Array<ZipSpan>, Fixnum] an array of byte spans within the final ZIP, and the total size of the archive
|
19
|
-
# @yield [Manifest] the manifest object you can add entries to
|
20
|
-
def self.build
|
21
|
-
output_io = ZipTricks::WriteAndTell.new(ZipTricks::NullWriter)
|
22
|
-
part_list = []
|
23
|
-
last_range_end = 0
|
24
|
-
ZipTricks::Streamer.open(output_io) do | zip_streamer |
|
25
|
-
manifest = new(zip_streamer, output_io, part_list)
|
26
|
-
yield(manifest)
|
27
|
-
last_range_end = part_list[-1].byte_range_in_zip.end
|
28
|
-
end
|
29
|
-
|
30
|
-
# Record the position of the central directory
|
31
|
-
directory_location = (last_range_end + 1)..(output_io.tell - 1)
|
32
|
-
part_list << ZipSpan.new(:central_directory, directory_location, :central_directory, nil)
|
33
|
-
|
34
|
-
[part_list, output_io.tell]
|
35
|
-
end
|
36
|
-
|
37
|
-
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
38
|
-
#
|
39
|
-
# @param name [String] the name of the file (filenames are variable-width in the ZIP)
|
40
|
-
# @param size_uncompressed [Fixnum] size of the uncompressed entry
|
41
|
-
# @param segment_info[Object] if you need to save anything to retrieve later from the Manifest,
|
42
|
-
# pass it here (like the URL of the file)
|
43
|
-
# @return self
|
44
|
-
def add_stored_entry(name:, size_uncompressed:, segment_info: nil)
|
45
|
-
register_part(:entry_header, name, segment_info) do
|
46
|
-
zip_streamer.add_stored_entry(name, size_uncompressed, C_fake_crc)
|
47
|
-
end
|
48
|
-
|
49
|
-
register_part(:entry_body, name, segment_info) do
|
50
|
-
zip_streamer.simulate_write(size_uncompressed)
|
51
|
-
end
|
52
|
-
|
53
|
-
self
|
54
|
-
end
|
55
|
-
|
56
|
-
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
57
|
-
#
|
58
|
-
# @param name [String] the name of the file (filenames are variable-width in the ZIP)
|
59
|
-
# @param size_uncompressed [Fixnum] size of the uncompressed entry
|
60
|
-
# @param size_compressed [Fixnum] size of the compressed entry
|
61
|
-
# @param segment_info[Object] if you need to save anything to retrieve later from the Manifest,
|
62
|
-
# pass it here (like the URL of the file)
|
63
|
-
# @return self
|
64
|
-
def add_compressed_entry(name:, size_uncompressed:, size_compressed:, segment_info: nil)
|
65
|
-
register_part(:entry_header, name, segment_info) do
|
66
|
-
zip_streamer.add_compressed_entry(name, size_uncompressed, C_fake_crc, size_compressed)
|
67
|
-
end
|
68
|
-
|
69
|
-
register_part(:entry_body, name, segment_info) do
|
70
|
-
zip_streamer.simulate_write(size_compressed)
|
71
|
-
end
|
72
|
-
|
73
|
-
self
|
74
|
-
end
|
75
|
-
|
76
|
-
private
|
77
|
-
|
78
|
-
C_fake_crc = Zlib.crc32('Mary had a little lamb')
|
79
|
-
private_constant :C_fake_crc
|
80
|
-
|
81
|
-
def register_part(span_type, filename, metadata)
|
82
|
-
before, _, after = io.tell, yield, (io.tell - 1)
|
83
|
-
part_list << ZipSpan.new(span_type, (before..after), filename, metadata)
|
84
|
-
end
|
85
|
-
end
|
data/lib/zip_tricks/microzip.rb
DELETED
@@ -1,339 +0,0 @@
|
|
1
|
-
# A replacement for RubyZip for streaming, with a couple of small differences.
|
2
|
-
# The first difference is that it is verbosely-written-to-the-spec and you can actually
|
3
|
-
# follow what is happening. It does not support quite a few fancy features of Rubyzip,
|
4
|
-
# but instead it can be digested in one reading, and has solid Zip64 support. It also does
|
5
|
-
# not attempt any tricks with Zip64 placeholder extra fields because the ZipTricks streaming
|
6
|
-
# engine assumes you _know_ how large your file is (both compressed and uncompressed) _and_
|
7
|
-
# you have the file's CRC32 checksum upfront.
|
8
|
-
#
|
9
|
-
# Just like Rubyzip it will switch to Zip64 automatically if required, but there is no global
|
10
|
-
# setting to enable that behavior - it is always on.
|
11
|
-
class ZipTricks::Microzip
|
12
|
-
STORED = 0
|
13
|
-
DEFLATED = 8
|
14
|
-
|
15
|
-
TooMuch = Class.new(StandardError)
|
16
|
-
PathError = Class.new(StandardError)
|
17
|
-
DuplicateFilenames = Class.new(StandardError)
|
18
|
-
UnknownMode = Class.new(StandardError)
|
19
|
-
|
20
|
-
FOUR_BYTE_MAX_UINT = 0xFFFFFFFF
|
21
|
-
TWO_BYTE_MAX_UINT = 0xFFFF
|
22
|
-
|
23
|
-
VERSION_MADE_BY = 52
|
24
|
-
VERSION_NEEDED_TO_EXTRACT = 20
|
25
|
-
VERSION_NEEDED_TO_EXTRACT_ZIP64 = 45
|
26
|
-
DEFAULT_EXTERNAL_ATTRS = begin
|
27
|
-
# These need to be set so that the unarchived files do not become executable on UNIX, for
|
28
|
-
# security purposes. Strictly speaking we would want to make this user-customizable,
|
29
|
-
# but for now just putting in sane defaults will do. For example, Trac with zipinfo does this:
|
30
|
-
# zipinfo.external_attr = 0644 << 16L # permissions -r-wr--r--.
|
31
|
-
# We snatch the incantations from Rubyzip for this.
|
32
|
-
unix_perms = 0644
|
33
|
-
file_type_file = 010
|
34
|
-
external_attrs = (file_type_file << 12 | (unix_perms & 07777)) << 16
|
35
|
-
end
|
36
|
-
MADE_BY_SIGNATURE = begin
|
37
|
-
# A combination of the VERSION_MADE_BY low byte and the OS type high byte
|
38
|
-
os_type = 3 # UNIX
|
39
|
-
[VERSION_MADE_BY, os_type].pack('CC')
|
40
|
-
end
|
41
|
-
|
42
|
-
C_V = 'V'.freeze
|
43
|
-
C_v = 'v'.freeze
|
44
|
-
C_Qe = 'Q<'.freeze
|
45
|
-
|
46
|
-
class Entry < Struct.new(:filename, :crc32, :compressed_size, :uncompressed_size, :storage_mode, :mtime)
|
47
|
-
def initialize(*)
|
48
|
-
super
|
49
|
-
filename.force_encoding(Encoding::UTF_8)
|
50
|
-
@requires_efs_flag = !(filename.encode(Encoding::ASCII) rescue false)
|
51
|
-
@requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT)
|
52
|
-
raise TooMuch, "Filename is too long" if filename.bytesize > TWO_BYTE_MAX_UINT
|
53
|
-
raise PathError, "Paths in ZIP may only contain forward slashes (UNIX separators)" if filename.include?('\\')
|
54
|
-
end
|
55
|
-
|
56
|
-
def requires_zip64?
|
57
|
-
@requires_zip64
|
58
|
-
end
|
59
|
-
|
60
|
-
# Set the general purpose flags for the entry. The only flag we care about is the EFS
|
61
|
-
# bit (bit 11) which should be set if the filename is UTF8. If it is, we need to set the
|
62
|
-
# bit so that the unarchiving application knows that the filename in the archive is UTF-8
|
63
|
-
# encoded, and not some DOS default. For ASCII entries it does not matter.
|
64
|
-
def gp_flags_based_on_filename
|
65
|
-
@requires_efs_flag ? (0b00000000000 | 0b100000000000) : 0b00000000000
|
66
|
-
end
|
67
|
-
|
68
|
-
def write_local_file_header(io)
|
69
|
-
# TBD: caveat. If this entry _does_ fit into a standard zip segment (both compressed and
|
70
|
-
# uncompressed size at or below 0xFFFF etc), but it is _located_ at an offset that requires
|
71
|
-
# Zip64 to be used (beyound 4GB), we are going to be omitting the Zip64 extras in the local
|
72
|
-
# file header, but we will be enabling them when writing the central directory. Then the
|
73
|
-
# CD record for the file _will_ have Zip64 extra, but the local file header won't. In theory,
|
74
|
-
# this should not pose a problem, but then again... life in this world can be harsh.
|
75
|
-
#
|
76
|
-
# If it turns out that it _does_ pose a problem, we can always do:
|
77
|
-
#
|
78
|
-
# @requires_zip64 = true if io.tell > FOUR_BYTE_MAX_UINT
|
79
|
-
#
|
80
|
-
# right here, and have the data written regardless even if the file fits.
|
81
|
-
io << [0x04034b50].pack(C_V) # local file header signature 4 bytes (0x04034b50)
|
82
|
-
|
83
|
-
if @requires_zip64 # version needed to extract 2 bytes
|
84
|
-
io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v)
|
85
|
-
else
|
86
|
-
io << [VERSION_NEEDED_TO_EXTRACT].pack(C_v)
|
87
|
-
end
|
88
|
-
|
89
|
-
io << [gp_flags_based_on_filename].pack("v") # general purpose bit flag 2 bytes
|
90
|
-
io << [storage_mode].pack("v") # compression method 2 bytes
|
91
|
-
io << [to_binary_dos_time(mtime)].pack(C_v) # last mod file time 2 bytes
|
92
|
-
io << [to_binary_dos_date(mtime)].pack(C_v) # last mod file date 2 bytes
|
93
|
-
io << [crc32].pack(C_V) # crc-32 4 bytes
|
94
|
-
|
95
|
-
if @requires_zip64
|
96
|
-
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # compressed size 4 bytes
|
97
|
-
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # uncompressed size 4 bytes
|
98
|
-
else
|
99
|
-
io << [compressed_size].pack(C_V) # compressed size 4 bytes
|
100
|
-
io << [uncompressed_size].pack(C_V) # uncompressed size 4 bytes
|
101
|
-
end
|
102
|
-
|
103
|
-
# Filename should not be longer than 0xFFFF otherwise this wont fit here
|
104
|
-
io << [filename.bytesize].pack(C_v) # file name length 2 bytes
|
105
|
-
|
106
|
-
extra_size = 0
|
107
|
-
if @requires_zip64
|
108
|
-
extra_size += bytesize_of {|buf| write_zip_64_extra_for_local_file_header(buf) }
|
109
|
-
end
|
110
|
-
io << [extra_size].pack(C_v) # extra field length 2 bytes
|
111
|
-
|
112
|
-
io << filename # file name (variable size)
|
113
|
-
|
114
|
-
# Interesting tidbit:
|
115
|
-
# https://social.technet.microsoft.com/Forums/windows/en-US/6a60399f-2879-4859-b7ab-6ddd08a70948
|
116
|
-
# TL;DR of it is: Windows 7 Explorer _will_ open Zip64 entries. However, it desires to have the
|
117
|
-
# Zip64 extra field as _the first_ extra field. If we decide to add the Info-ZIP UTF-8 field...
|
118
|
-
write_zip_64_extra_for_local_file_header(io) if @requires_zip64
|
119
|
-
end
|
120
|
-
|
121
|
-
def write_zip_64_extra_for_local_file_header(io)
|
122
|
-
io << [0x0001].pack(C_v) # 2 bytes Tag for this "extra" block type
|
123
|
-
io << [16].pack(C_v) # 2 bytes Size of this "extra" block. For us it will always be 16 (2x8)
|
124
|
-
io << [uncompressed_size].pack(C_Qe) # 8 bytes Original uncompressed file size
|
125
|
-
io << [compressed_size].pack(C_Qe) # 8 bytes Size of compressed data
|
126
|
-
end
|
127
|
-
|
128
|
-
def write_zip_64_extra_for_central_directory_file_header(io, local_file_header_location)
|
129
|
-
io << [0x0001].pack(C_v) # 2 bytes Tag for this "extra" block type
|
130
|
-
io << [28].pack(C_v) # 2 bytes Size of this "extra" block. For us it will always be 28
|
131
|
-
io << [uncompressed_size].pack(C_Qe) # 8 bytes Original uncompressed file size
|
132
|
-
io << [compressed_size].pack(C_Qe) # 8 bytes Size of compressed data
|
133
|
-
io << [local_file_header_location].pack(C_Qe) # 8 bytes Offset of local header record
|
134
|
-
io << [0].pack(C_V) # 4 bytes Number of the disk on which this file starts
|
135
|
-
end
|
136
|
-
|
137
|
-
def write_central_directory_file_header(io, local_file_header_location)
|
138
|
-
# At this point if the header begins somewhere beyound 0xFFFFFFFF we _have_ to record the offset
|
139
|
-
# of the local file header as a zip64 extra field, so we give up, give in, you loose, love will always win...
|
140
|
-
@requires_zip64 = true if local_file_header_location > FOUR_BYTE_MAX_UINT
|
141
|
-
|
142
|
-
io << [0x02014b50].pack(C_V) # central file header signature 4 bytes (0x02014b50)
|
143
|
-
io << MADE_BY_SIGNATURE # version made by 2 bytes
|
144
|
-
if @requires_zip64
|
145
|
-
io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) # version needed to extract 2 bytes
|
146
|
-
else
|
147
|
-
io << [VERSION_NEEDED_TO_EXTRACT].pack(C_v) # version needed to extract 2 bytes
|
148
|
-
end
|
149
|
-
|
150
|
-
io << [gp_flags_based_on_filename].pack(C_v) # general purpose bit flag 2 bytes
|
151
|
-
io << [storage_mode].pack(C_v) # compression method 2 bytes
|
152
|
-
io << [to_binary_dos_time(mtime)].pack(C_v) # last mod file time 2 bytes
|
153
|
-
io << [to_binary_dos_date(mtime)].pack(C_v) # last mod file date 2 bytes
|
154
|
-
io << [crc32].pack(C_V) # crc-32 4 bytes
|
155
|
-
|
156
|
-
if @requires_zip64
|
157
|
-
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # compressed size 4 bytes
|
158
|
-
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # uncompressed size 4 bytes
|
159
|
-
else
|
160
|
-
io << [compressed_size].pack(C_V) # compressed size 4 bytes
|
161
|
-
io << [uncompressed_size].pack(C_V) # uncompressed size 4 bytes
|
162
|
-
end
|
163
|
-
|
164
|
-
# Filename should not be longer than 0xFFFF otherwise this wont fit here
|
165
|
-
io << [filename.bytesize].pack(C_v) # file name length 2 bytes
|
166
|
-
|
167
|
-
extra_size = 0
|
168
|
-
if @requires_zip64
|
169
|
-
extra_size += bytesize_of {|buf|
|
170
|
-
write_zip_64_extra_for_central_directory_file_header(buf, local_file_header_location)
|
171
|
-
}
|
172
|
-
end
|
173
|
-
io << [extra_size].pack(C_v) # extra field length 2 bytes
|
174
|
-
|
175
|
-
io << [0].pack(C_v) # file comment length 2 bytes
|
176
|
-
|
177
|
-
# For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used
|
178
|
-
# because otherwise it does not properly advance the pointer when reading the Zip64 extra field
|
179
|
-
# https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff
|
180
|
-
if @requires_zip64
|
181
|
-
io << [TWO_BYTE_MAX_UINT].pack(C_v) # disk number start 2 bytes
|
182
|
-
else
|
183
|
-
io << [0].pack(C_v) # disk number start 2 bytes
|
184
|
-
end
|
185
|
-
io << [0].pack(C_v) # internal file attributes 2 bytes
|
186
|
-
io << [DEFAULT_EXTERNAL_ATTRS].pack(C_V) # external file attributes 4 bytes
|
187
|
-
|
188
|
-
if @requires_zip64
|
189
|
-
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # relative offset of local header 4 bytes
|
190
|
-
else
|
191
|
-
io << [local_file_header_location].pack(C_V) # relative offset of local header 4 bytes
|
192
|
-
end
|
193
|
-
io << filename # file name (variable size)
|
194
|
-
|
195
|
-
if @requires_zip64 # extra field (variable size)
|
196
|
-
write_zip_64_extra_for_central_directory_file_header(io, local_file_header_location)
|
197
|
-
end
|
198
|
-
# file comment (variable size)
|
199
|
-
end
|
200
|
-
|
201
|
-
private
|
202
|
-
|
203
|
-
def bytesize_of
|
204
|
-
''.force_encoding(Encoding::BINARY).tap {|b| yield(b) }.bytesize
|
205
|
-
end
|
206
|
-
|
207
|
-
def to_binary_dos_time(t)
|
208
|
-
(t.sec/2) + (t.min << 5) + (t.hour << 11)
|
209
|
-
end
|
210
|
-
|
211
|
-
def to_binary_dos_date(t)
|
212
|
-
(t.day) + (t.month << 5) + ((t.year - 1980) << 9)
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
# Creates a new streaming writer.
|
217
|
-
# The writer is stateful and knows it's list of ZIP file entries as they are being added.
|
218
|
-
def initialize
|
219
|
-
@files = []
|
220
|
-
@local_header_offsets = []
|
221
|
-
end
|
222
|
-
|
223
|
-
# Adds a file to the entry list and immediately writes out it's local file header into the
|
224
|
-
# output stream.
|
225
|
-
#
|
226
|
-
# @param io[#<<, #tell] the buffer to write the local file header to
|
227
|
-
# @param filename[String] The name of the file
|
228
|
-
# @param crc32[Fixnum] The CRC32 checksum of the file
|
229
|
-
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
230
|
-
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
231
|
-
# @param storage_mode[Fixnum] Either 0 for "stored" or 8 for "deflated"
|
232
|
-
# @param mtime[Time] What modification time to record for the file
|
233
|
-
# @return [void]
|
234
|
-
def add_local_file_header(io:, filename:, crc32:, compressed_size:, uncompressed_size:, storage_mode:, mtime: Time.now.utc)
|
235
|
-
if @files.any?{|e| e.filename == filename }
|
236
|
-
raise DuplicateFilenames, "Filename #{filename.inspect} already used in the archive"
|
237
|
-
end
|
238
|
-
raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
|
239
|
-
e = Entry.new(filename, crc32, compressed_size, uncompressed_size, storage_mode, mtime)
|
240
|
-
@files << e
|
241
|
-
@local_header_offsets << io.tell
|
242
|
-
e.write_local_file_header(io)
|
243
|
-
end
|
244
|
-
|
245
|
-
# Writes the central directory (including the Zip6 salient bits if necessary)
|
246
|
-
#
|
247
|
-
# @param io[#<<, #tell] the buffer to write the central directory to.
|
248
|
-
# The method will use `tell` on the buffer since it has to know where the central directory is located
|
249
|
-
# @return [void]
|
250
|
-
def write_central_directory(io)
|
251
|
-
start_of_central_directory = io.tell
|
252
|
-
|
253
|
-
# Central directory file headers, per file in order
|
254
|
-
@files.each_with_index do |file, i|
|
255
|
-
local_file_header_offset_from_start_of_file = @local_header_offsets.fetch(i)
|
256
|
-
file.write_central_directory_file_header(io, local_file_header_offset_from_start_of_file)
|
257
|
-
end
|
258
|
-
central_dir_size = io.tell - start_of_central_directory
|
259
|
-
|
260
|
-
zip64_required = central_dir_size > FOUR_BYTE_MAX_UINT ||
|
261
|
-
start_of_central_directory > FOUR_BYTE_MAX_UINT ||
|
262
|
-
@files.length > TWO_BYTE_MAX_UINT ||
|
263
|
-
@files.any?(&:requires_zip64?)
|
264
|
-
|
265
|
-
# Then, if zip64 is used
|
266
|
-
if zip64_required
|
267
|
-
# [zip64 end of central directory record]
|
268
|
-
zip64_eocdr_offset = io.tell
|
269
|
-
# zip64 end of central dir
|
270
|
-
io << [0x06064b50].pack(C_V) # signature 4 bytes (0x06064b50)
|
271
|
-
io << [44].pack(C_Qe) # size of zip64 end of central
|
272
|
-
# directory record 8 bytes
|
273
|
-
# (this is ex. the 12 bytes of the signature and the size value itself).
|
274
|
-
# Without the extensible data sector it is always 44.
|
275
|
-
io << MADE_BY_SIGNATURE # version made by 2 bytes
|
276
|
-
io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) # version needed to extract 2 bytes
|
277
|
-
io << [0].pack(C_V) # number of this disk 4 bytes
|
278
|
-
io << [0].pack(C_V) # number of the disk with the
|
279
|
-
# start of the central directory 4 bytes
|
280
|
-
io << [@files.length].pack(C_Qe) # total number of entries in the
|
281
|
-
# central directory on this disk 8 bytes
|
282
|
-
io << [@files.length].pack(C_Qe) # total number of entries in the
|
283
|
-
# central directory 8 bytes
|
284
|
-
io << [central_dir_size].pack(C_Qe) # size of the central directory 8 bytes
|
285
|
-
# offset of start of central
|
286
|
-
# directory with respect to
|
287
|
-
io << [start_of_central_directory].pack(C_Qe) # the starting disk number 8 bytes
|
288
|
-
# zip64 extensible data sector (variable size), blank for us
|
289
|
-
|
290
|
-
# [zip64 end of central directory locator]
|
291
|
-
io << [0x07064b50].pack(C_V) # zip64 end of central dir locator
|
292
|
-
# signature 4 bytes (0x07064b50)
|
293
|
-
io << [0].pack(C_V) # number of the disk with the
|
294
|
-
# start of the zip64 end of
|
295
|
-
# central directory 4 bytes
|
296
|
-
io << [zip64_eocdr_offset].pack(C_Qe) # relative offset of the zip64
|
297
|
-
# end of central directory record 8 bytes
|
298
|
-
# (note: "relative" is actually "from the start of the file")
|
299
|
-
io << [1].pack(C_V) # total number of disks 4 bytes
|
300
|
-
end
|
301
|
-
|
302
|
-
# Then the end of central directory record:
|
303
|
-
io << [0x06054b50].pack(C_V) # end of central dir signature 4 bytes (0x06054b50)
|
304
|
-
io << [0].pack(C_v) # number of this disk 2 bytes
|
305
|
-
io << [0].pack(C_v) # number of the disk with the
|
306
|
-
# start of the central directory 2 bytes
|
307
|
-
|
308
|
-
if zip64_required # the number of entries will be read from the zip64 part of the central directory
|
309
|
-
io << [TWO_BYTE_MAX_UINT].pack(C_v) # total number of entries in the
|
310
|
-
# central directory on this disk 2 bytes
|
311
|
-
io << [TWO_BYTE_MAX_UINT].pack(C_v) # total number of entries in
|
312
|
-
# the central directory 2 bytes
|
313
|
-
else
|
314
|
-
io << [@files.length].pack(C_v) # total number of entries in the
|
315
|
-
# central directory on this disk 2 bytes
|
316
|
-
io << [@files.length].pack(C_v) # total number of entries in
|
317
|
-
# the central directory 2 bytes
|
318
|
-
end
|
319
|
-
|
320
|
-
if zip64_required
|
321
|
-
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # size of the central directory 4 bytes
|
322
|
-
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # offset of start of central
|
323
|
-
# directory with respect to
|
324
|
-
# the starting disk number 4 bytes
|
325
|
-
else
|
326
|
-
io << [central_dir_size].pack(C_V) # size of the central directory 4 bytes
|
327
|
-
io << [start_of_central_directory].pack(C_V) # offset of start of central
|
328
|
-
# directory with respect to
|
329
|
-
# the starting disk number 4 bytes
|
330
|
-
end
|
331
|
-
io << [0].pack(C_v) # .ZIP file comment length 2 bytes
|
332
|
-
# .ZIP file comment (variable size)
|
333
|
-
end
|
334
|
-
|
335
|
-
private_constant :FOUR_BYTE_MAX_UINT, :TWO_BYTE_MAX_UINT,
|
336
|
-
:VERSION_MADE_BY, :VERSION_NEEDED_TO_EXTRACT, :VERSION_NEEDED_TO_EXTRACT_ZIP64,
|
337
|
-
:DEFAULT_EXTERNAL_ATTRS, :MADE_BY_SIGNATURE,
|
338
|
-
:Entry, :C_V, :C_v, :C_Qe
|
339
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
# Helps to estimate archive sizes
|
2
|
-
class ZipTricks::StoredSizeEstimator < Struct.new(:manifest)
|
3
|
-
|
4
|
-
# Performs the estimate using fake archiving. It needs to know the sizes of the
|
5
|
-
# entries upfront. Usage:
|
6
|
-
#
|
7
|
-
# expected_zip_size = StoredSizeEstimator.perform_fake_archiving do | estimator |
|
8
|
-
# estimator.add_stored_entry("file.doc", size=898291)
|
9
|
-
# estimator.add_compressed_entry("family.tif", size=89281911, compressed_size=121908)
|
10
|
-
# end
|
11
|
-
#
|
12
|
-
# @return [Fixnum] the size of the resulting archive, in bytes
|
13
|
-
# @yield [StoredSizeEstimator] the estimator
|
14
|
-
def self.perform_fake_archiving
|
15
|
-
_, bytes = ZipTricks::Manifest.build do |manifest|
|
16
|
-
# The API for this class uses positional arguments. The Manifest API
|
17
|
-
# uses keyword arguments.
|
18
|
-
call_adapter = new(manifest)
|
19
|
-
yield(call_adapter)
|
20
|
-
end
|
21
|
-
bytes
|
22
|
-
end
|
23
|
-
|
24
|
-
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
25
|
-
#
|
26
|
-
# @param name [String] the name of the file (filenames are variable-width in the ZIP)
|
27
|
-
# @param size_uncompressed [Fixnum] size of the uncompressed entry
|
28
|
-
# @return self
|
29
|
-
def add_stored_entry(name, size_uncompressed)
|
30
|
-
manifest.add_stored_entry(name: name, size_uncompressed: size_uncompressed)
|
31
|
-
self
|
32
|
-
end
|
33
|
-
|
34
|
-
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
35
|
-
#
|
36
|
-
# @param name [String] the name of the file (filenames are variable-width in the ZIP)
|
37
|
-
# @param size_uncompressed [Fixnum] size of the uncompressed entry
|
38
|
-
# @param size_compressed [Fixnum] size of the compressed entry
|
39
|
-
# @return self
|
40
|
-
def add_compressed_entry(name, size_uncompressed, size_compressed)
|
41
|
-
manifest.add_compressed_entry(name: name, size_uncompressed: size_uncompressed, size_compressed: size_compressed)
|
42
|
-
self
|
43
|
-
end
|
44
|
-
end
|
@@ -1,60 +0,0 @@
|
|
1
|
-
require_relative '../spec_helper'
|
2
|
-
|
3
|
-
describe ZipTricks::Manifest do
|
4
|
-
it 'builds a map of the contained ranges, and has its cumulative size match the predicted archive size exactly' do
|
5
|
-
# Generate a couple of random files
|
6
|
-
raw_file_1 = SecureRandom.random_bytes(1024 * 20)
|
7
|
-
raw_file_2 = SecureRandom.random_bytes(1024 * 128)
|
8
|
-
raw_file_3 = SecureRandom.random_bytes(1258695)
|
9
|
-
|
10
|
-
manifest, bytesize = described_class.build do | builder |
|
11
|
-
r = builder.add_stored_entry(name: "first-file.bin", size_uncompressed: raw_file_1.size)
|
12
|
-
expect(r).to eq(builder), "add_stored_entry should return self"
|
13
|
-
|
14
|
-
builder.add_stored_entry(name: "second-file.bin", size_uncompressed: raw_file_2.size)
|
15
|
-
|
16
|
-
r = builder.add_compressed_entry(name: "second-file-comp.bin", size_uncompressed: raw_file_2.size,
|
17
|
-
size_compressed: raw_file_3.size, segment_info: 'http://example.com/second-file-deflated-segment.bin')
|
18
|
-
expect(r).to eq(builder), "add_compressed_entry should return self"
|
19
|
-
end
|
20
|
-
|
21
|
-
require 'range_utils'
|
22
|
-
|
23
|
-
expect(manifest).to be_kind_of(Array)
|
24
|
-
total_size_of_all_parts = manifest.inject(0) do | total_bytes, span |
|
25
|
-
total_bytes + RangeUtils.size_from_range(span.byte_range_in_zip)
|
26
|
-
end
|
27
|
-
expect(total_size_of_all_parts).to eq(1410595)
|
28
|
-
expect(bytesize).to eq(1410595)
|
29
|
-
|
30
|
-
expect(manifest.length).to eq(7)
|
31
|
-
|
32
|
-
first_header = manifest[0]
|
33
|
-
expect(first_header.part_type).to eq(:entry_header)
|
34
|
-
expect(first_header.byte_range_in_zip).to eq(0..43)
|
35
|
-
expect(first_header.filename).to eq("first-file.bin")
|
36
|
-
expect(first_header.additional_metadata).to be_nil
|
37
|
-
|
38
|
-
first_body = manifest[1]
|
39
|
-
expect(first_body.part_type).to eq(:entry_body)
|
40
|
-
expect(first_body.byte_range_in_zip).to eq(44..20523)
|
41
|
-
expect(first_body.filename).to eq("first-file.bin")
|
42
|
-
expect(first_body.additional_metadata).to be_nil
|
43
|
-
|
44
|
-
third_header = manifest[4]
|
45
|
-
expect(third_header.part_type).to eq(:entry_header)
|
46
|
-
expect(third_header.byte_range_in_zip).to eq(151641..151690)
|
47
|
-
expect(third_header.filename).to eq("second-file-comp.bin")
|
48
|
-
expect(third_header.additional_metadata).to eq("http://example.com/second-file-deflated-segment.bin")
|
49
|
-
|
50
|
-
third_body = manifest[5]
|
51
|
-
expect(third_body.part_type).to eq(:entry_body)
|
52
|
-
expect(third_body.byte_range_in_zip).to eq(151691..1410385)
|
53
|
-
expect(third_body.filename).to eq("second-file-comp.bin")
|
54
|
-
expect(third_body.additional_metadata).to eq("http://example.com/second-file-deflated-segment.bin")
|
55
|
-
|
56
|
-
cd = manifest[-1]
|
57
|
-
expect(cd.part_type).to eq(:central_directory)
|
58
|
-
expect(cd.byte_range_in_zip).to eq(1410386..1410594)
|
59
|
-
end
|
60
|
-
end
|