zip_kit 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.codeclimate.yml +7 -0
- data/.document +5 -0
- data/.github/workflows/ci.yml +29 -0
- data/.gitignore +61 -0
- data/.rspec +1 -0
- data/.standard.yml +8 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +255 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/CONTRIBUTING.md +153 -0
- data/Gemfile +4 -0
- data/IMPLEMENTATION_DETAILS.md +97 -0
- data/LICENSE.txt +20 -0
- data/README.md +234 -0
- data/Rakefile +21 -0
- data/bench/buffered_crc32_bench.rb +109 -0
- data/examples/archive_size_estimate.rb +15 -0
- data/examples/config.ru +7 -0
- data/examples/deferred_write.rb +58 -0
- data/examples/parallel_compression_with_block_deflate.rb +86 -0
- data/examples/rack_application.rb +63 -0
- data/examples/s3_upload.rb +23 -0
- data/lib/zip_kit/block_deflate.rb +130 -0
- data/lib/zip_kit/block_write.rb +47 -0
- data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
- data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
- data/lib/zip_kit/file_reader.rb +740 -0
- data/lib/zip_kit/null_writer.rb +12 -0
- data/lib/zip_kit/output_enumerator.rb +150 -0
- data/lib/zip_kit/path_set.rb +163 -0
- data/lib/zip_kit/rack_chunked_body.rb +32 -0
- data/lib/zip_kit/rack_tempfile_body.rb +61 -0
- data/lib/zip_kit/rails_streaming.rb +37 -0
- data/lib/zip_kit/remote_io.rb +114 -0
- data/lib/zip_kit/remote_uncap.rb +22 -0
- data/lib/zip_kit/size_estimator.rb +84 -0
- data/lib/zip_kit/stream_crc32.rb +60 -0
- data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
- data/lib/zip_kit/streamer/entry.rb +37 -0
- data/lib/zip_kit/streamer/filler.rb +9 -0
- data/lib/zip_kit/streamer/heuristic.rb +68 -0
- data/lib/zip_kit/streamer/stored_writer.rb +39 -0
- data/lib/zip_kit/streamer/writable.rb +36 -0
- data/lib/zip_kit/streamer.rb +614 -0
- data/lib/zip_kit/uniquify_filename.rb +39 -0
- data/lib/zip_kit/version.rb +5 -0
- data/lib/zip_kit/write_and_tell.rb +40 -0
- data/lib/zip_kit/write_buffer.rb +71 -0
- data/lib/zip_kit/write_shovel.rb +22 -0
- data/lib/zip_kit/zip_writer.rb +436 -0
- data/lib/zip_kit.rb +24 -0
- data/zip_kit.gemspec +41 -0
- metadata +335 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# A tiny wrapper over any object that supports :<<.
|
4
|
+
# Adds :tell and :advance_position_by. This is needed for write destinations
|
5
|
+
# which do not respond to `#pos` or `#tell`. A lot of ZIP archive format parts
|
6
|
+
# include "offsets in archive" - a byte offset from the start of file. Keeping
|
7
|
+
# track of this value is what this object will do. It also allows "advancing"
|
8
|
+
# this value if data gets written using a bypass (such as `IO#sendfile`)
|
9
|
+
class ZipKit::WriteAndTell
|
10
|
+
include ZipKit::WriteShovel
|
11
|
+
|
12
|
+
def initialize(io)
|
13
|
+
@io = io
|
14
|
+
@pos = 0
|
15
|
+
# Some objects (such as ActionController::Live `stream` object) cannot be "pushed" into
|
16
|
+
# using the :<< operator, but only support `write`. For ease we add a small shim in that case instead of having
|
17
|
+
# the user abstract it themselves.
|
18
|
+
@use_write = !io.respond_to?(:<<)
|
19
|
+
end
|
20
|
+
|
21
|
+
def <<(bytes)
|
22
|
+
return self if bytes.nil?
|
23
|
+
if @use_write
|
24
|
+
@io.write(bytes.b)
|
25
|
+
else
|
26
|
+
@io << bytes.b
|
27
|
+
end
|
28
|
+
|
29
|
+
@pos += bytes.bytesize
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
def advance_position_by(num_bytes)
|
34
|
+
@pos += num_bytes
|
35
|
+
end
|
36
|
+
|
37
|
+
def tell
|
38
|
+
@pos
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Some operations (such as CRC32) benefit when they are performed
|
4
|
+
# on larger chunks of data. In certain use cases, it is possible that
|
5
|
+
# the consumer of ZipKit is going to be writing small chunks
|
6
|
+
# in rapid succession, so CRC32 is going to have to perform a lot of
|
7
|
+
# CRC32 combine operations - and this adds up. Since the CRC32 value
|
8
|
+
# is usually not needed until the complete output has completed
|
9
|
+
# we can buffer at least some amount of data before computing CRC32 over it.
|
10
|
+
# We also use this buffer for output via Rack, where some amount of buffering
|
11
|
+
# helps reduce the number of syscalls made by the webserver. ZipKit performs
|
12
|
+
# lots of very small writes, and some degree of speedup (about 20%) can be achieved
|
13
|
+
# with a buffer of a few KB.
|
14
|
+
#
|
15
|
+
# Note that there is no guarantee that the write buffer is going to flush at or above
|
16
|
+
# the given `buffer_size`, because for writes which exceed the buffer size it will
|
17
|
+
# first `flush` and then write through the oversized chunk, without buffering it. This
|
18
|
+
# helps conserve memory. Also note that the buffer will *not* duplicate strings for you
|
19
|
+
# and *will* yield the same buffer String over and over, so if you are storing it in an
|
20
|
+
# Array you might need to duplicate it.
|
21
|
+
#
|
22
|
+
# Note also that the WriteBuffer assumes that the object it `<<`-writes into is going
|
23
|
+
# to **consume** in some way the string that it passes in. After the `<<` method returns,
|
24
|
+
# the WriteBuffer will be cleared, and it passes the same String reference on every call
|
25
|
+
# to `<<`. Therefore, if you need to retain the output of the WriteBuffer in, say, an Array,
|
26
|
+
# you might need to `.dup` the `String` it gives you.
|
27
|
+
class ZipKit::WriteBuffer
|
28
|
+
# Creates a new WriteBuffer bypassing into a given writable object
|
29
|
+
#
|
30
|
+
# @param writable[#<<] An object that responds to `#<<` with a String as argument
|
31
|
+
# @param buffer_size[Integer] How many bytes to buffer
|
32
|
+
def initialize(writable, buffer_size)
|
33
|
+
# Allocating the buffer using a zero-padded String as a variation
|
34
|
+
# on using capacity:, which JRuby apparently does not like very much. The
|
35
|
+
# desire here is that the buffer doesn't have to be resized during the lifetime
|
36
|
+
# of the object.
|
37
|
+
@buf = ("\0".b * (buffer_size * 2)).clear
|
38
|
+
@buffer_size = buffer_size
|
39
|
+
@writable = writable
|
40
|
+
end
|
41
|
+
|
42
|
+
# Appends the given data to the write buffer, and flushes the buffer into the
|
43
|
+
# writable if the buffer size exceeds the `buffer_size` given at initialization
|
44
|
+
#
|
45
|
+
# @param data[String] data to be written
|
46
|
+
# @return self
|
47
|
+
def <<(data)
|
48
|
+
if data.bytesize >= @buffer_size
|
49
|
+
flush unless @buf.empty? # <- this is were we can output less than @buffer_size
|
50
|
+
@writable << data
|
51
|
+
else
|
52
|
+
@buf << data
|
53
|
+
flush if @buf.bytesize >= @buffer_size
|
54
|
+
end
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
# Explicitly flushes the buffer if it contains anything
|
59
|
+
#
|
60
|
+
# @return self
|
61
|
+
def flush
|
62
|
+
unless @buf.empty?
|
63
|
+
@writable << @buf
|
64
|
+
@buf.clear
|
65
|
+
end
|
66
|
+
self
|
67
|
+
end
|
68
|
+
|
69
|
+
# `flush!` was renamed to `flush` but we preserve this method for backwards compatibility
|
70
|
+
alias_method :flush!, :flush
|
71
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# A lot of objects in ZipKit accept bytes that may be sent
|
4
|
+
# to the `<<` operator (the "shovel" operator). This is in the tradition
|
5
|
+
# of late Jim Weirich and his Builder gem. In [this presentation](https://youtu.be/1BVFlvRPZVM?t=2403)
|
6
|
+
# he justifies this design very eloquently. In ZipKit we follow this example.
|
7
|
+
# However, there is a number of methods in Ruby - including the standard library -
|
8
|
+
# which expect your object to implement the `write` method instead. Since the `write`
|
9
|
+
# method can be expressed in terms of the `<<` method, why not allow all ZipKit
|
10
|
+
# "IO-ish" things to also respond to `write`? This is what this module does.
|
11
|
+
# Jim would be proud. We miss you, Jim.
|
12
|
+
module ZipKit::WriteShovel
|
13
|
+
# Writes the given data to the output stream. Allows the object to be used as
|
14
|
+
# a target for `IO.copy_stream(from, to)`
|
15
|
+
#
|
16
|
+
# @param d[String] the binary string to write (part of the uncompressed file)
|
17
|
+
# @return [Fixnum] the number of bytes written
|
18
|
+
def write(bytes)
|
19
|
+
self << bytes
|
20
|
+
bytes.bytesize
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,436 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# A low-level ZIP file data writer. You can use it to write out various headers and central directory elements
|
4
|
+
# separately. The class handles the actual encoding of the data according to the ZIP format APPNOTE document.
|
5
|
+
#
|
6
|
+
# The primary reason the writer is a separate object is because it is kept stateless. That is, all the data that
|
7
|
+
# is needed for writing a piece of the ZIP (say, the EOCD record, or a data descriptor) can be written
|
8
|
+
# without depending on data available elsewhere. This makes the writer very easy to test, since each of
|
9
|
+
# it's methods outputs something that only depends on the method's arguments. For example, we use this
|
10
|
+
# to test writing Zip64 files which, when tested in a streaming fashion, would need tricky IO stubs
|
11
|
+
# to wind IO objects back and forth by large offsets. Instead, we can just write out the EOCD record
|
12
|
+
# with given offsets as arguments.
|
13
|
+
#
|
14
|
+
# Since some methods need a lot of data about the entity being written, everything is passed via
|
15
|
+
# keyword arguments - this way it is much less likely that you can make a mistake writing something.
|
16
|
+
#
|
17
|
+
# Another reason for having a separate Writer is that most ZIP libraries attach the methods for
|
18
|
+
# writing out the file headers to some sort of Entry object, which represents a file within the ZIP.
|
19
|
+
# However, when you are diagnosing issues with the ZIP files you produce, you actually want to have
|
20
|
+
# absolute _most_ of the code responsible for writing the actual encoded bytes available to you on
|
21
|
+
# one screen. Altering or checking that code then becomes much, much easier. The methods doing the
|
22
|
+
# writing are also intentionally left very verbose - so that you can follow what is happening at
|
23
|
+
# all times.
|
24
|
+
#
|
25
|
+
# All methods of the writer accept anything that responds to `<<` as `io` argument - you can use
|
26
|
+
# that to output to String objects, or to output to Arrays that you can later join together.
|
27
|
+
class ZipKit::ZipWriter
|
28
|
+
FOUR_BYTE_MAX_UINT = 0xFFFFFFFF
|
29
|
+
TWO_BYTE_MAX_UINT = 0xFFFF
|
30
|
+
ZIP_TRICKS_COMMENT = "Written using ZipKit %<version>s" % {version: ZipKit::VERSION}
|
31
|
+
VERSION_MADE_BY = 52
|
32
|
+
VERSION_NEEDED_TO_EXTRACT = 20
|
33
|
+
VERSION_NEEDED_TO_EXTRACT_ZIP64 = 45
|
34
|
+
DEFAULT_FILE_UNIX_PERMISSIONS = 0o644
|
35
|
+
DEFAULT_DIRECTORY_UNIX_PERMISSIONS = 0o755
|
36
|
+
FILE_TYPE_FILE = 0o10
|
37
|
+
FILE_TYPE_DIRECTORY = 0o04
|
38
|
+
MADE_BY_SIGNATURE = begin
|
39
|
+
# A combination of the VERSION_MADE_BY low byte and the OS type high byte
|
40
|
+
os_type = 3 # UNIX
|
41
|
+
[VERSION_MADE_BY, os_type].pack("CC")
|
42
|
+
end
|
43
|
+
|
44
|
+
C_UINT4 = "V" # Encode a 4-byte unsigned little-endian uint
|
45
|
+
C_UINT2 = "v" # Encode a 2-byte unsigned little-endian uint
|
46
|
+
C_UINT8 = "Q<" # Encode an 8-byte unsigned little-endian uint
|
47
|
+
C_CHAR = "C" # For bit-encoded strings
|
48
|
+
C_INT4 = "l<" # Encode a 4-byte signed little-endian int
|
49
|
+
|
50
|
+
private_constant :FOUR_BYTE_MAX_UINT,
|
51
|
+
:TWO_BYTE_MAX_UINT,
|
52
|
+
:VERSION_MADE_BY,
|
53
|
+
:VERSION_NEEDED_TO_EXTRACT,
|
54
|
+
:VERSION_NEEDED_TO_EXTRACT_ZIP64,
|
55
|
+
:FILE_TYPE_FILE,
|
56
|
+
:FILE_TYPE_DIRECTORY,
|
57
|
+
:MADE_BY_SIGNATURE,
|
58
|
+
:C_UINT4,
|
59
|
+
:C_UINT2,
|
60
|
+
:C_UINT8,
|
61
|
+
:ZIP_TRICKS_COMMENT
|
62
|
+
|
63
|
+
# Writes the local file header, that precedes the actual file _data_.
|
64
|
+
#
|
65
|
+
# @param io[#<<] the buffer to write the local file header to
|
66
|
+
# @param filename[String] the name of the file in the archive
|
67
|
+
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
68
|
+
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
69
|
+
# @param crc32[Fixnum] The CRC32 checksum of the file
|
70
|
+
# @param mtime[Time] the modification time to be recorded in the ZIP
|
71
|
+
# @param gp_flags[Fixnum] bit-packed general purpose flags
|
72
|
+
# @param storage_mode[Fixnum] 8 for deflated, 0 for stored...
|
73
|
+
# @return [void]
|
74
|
+
def write_local_file_header(io:, filename:, compressed_size:, uncompressed_size:, crc32:, gp_flags:, mtime:, storage_mode:)
|
75
|
+
requires_zip64 = compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT
|
76
|
+
|
77
|
+
io << [0x04034b50].pack(C_UINT4) # local file header signature 4 bytes (0x04034b50)
|
78
|
+
io << if requires_zip64 # version needed to extract 2 bytes
|
79
|
+
[VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_UINT2)
|
80
|
+
else
|
81
|
+
[VERSION_NEEDED_TO_EXTRACT].pack(C_UINT2)
|
82
|
+
end
|
83
|
+
|
84
|
+
io << [gp_flags].pack(C_UINT2) # general purpose bit flag 2 bytes
|
85
|
+
io << [storage_mode].pack(C_UINT2) # compression method 2 bytes
|
86
|
+
io << [to_binary_dos_time(mtime)].pack(C_UINT2) # last mod file time 2 bytes
|
87
|
+
io << [to_binary_dos_date(mtime)].pack(C_UINT2) # last mod file date 2 bytes
|
88
|
+
io << [crc32].pack(C_UINT4) # crc-32 4 bytes
|
89
|
+
|
90
|
+
if requires_zip64
|
91
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # compressed size 4 bytes
|
92
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # uncompressed size 4 bytes
|
93
|
+
else
|
94
|
+
io << [compressed_size].pack(C_UINT4) # compressed size 4 bytes
|
95
|
+
io << [uncompressed_size].pack(C_UINT4) # uncompressed size 4 bytes
|
96
|
+
end
|
97
|
+
|
98
|
+
# Filename should not be longer than 0xFFFF otherwise this wont fit here
|
99
|
+
io << [filename.bytesize].pack(C_UINT2) # file name length 2 bytes
|
100
|
+
|
101
|
+
extra_fields = StringIO.new
|
102
|
+
|
103
|
+
# Interesting tidbit:
|
104
|
+
# https://social.technet.microsoft.com/Forums/windows/en-US/6a60399f-2879-4859-b7ab-6ddd08a70948
|
105
|
+
# TL;DR of it is: Windows 7 Explorer _will_ open Zip64 entries. However, it desires to have the
|
106
|
+
# Zip64 extra field as _the first_ extra field.
|
107
|
+
if requires_zip64
|
108
|
+
extra_fields << zip_64_extra_for_local_file_header(compressed_size: compressed_size, uncompressed_size: uncompressed_size)
|
109
|
+
end
|
110
|
+
extra_fields << timestamp_extra_for_local_file_header(mtime)
|
111
|
+
|
112
|
+
io << [extra_fields.size].pack(C_UINT2) # extra field length 2 bytes
|
113
|
+
|
114
|
+
io << filename # file name (variable size)
|
115
|
+
io << extra_fields.string
|
116
|
+
end
|
117
|
+
|
118
|
+
# Writes the file header for the central directory, for a particular file in the archive. When writing out this data,
|
119
|
+
# ensure that the CRC32 and both sizes (compressed/uncompressed) are correct for the entry in question.
|
120
|
+
#
|
121
|
+
# @param io[#<<] the buffer to write the local file header to
|
122
|
+
# @param filename[String] the name of the file in the archive
|
123
|
+
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
124
|
+
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
125
|
+
# @param crc32[Fixnum] The CRC32 checksum of the file
|
126
|
+
# @param mtime[Time] the modification time to be recorded in the ZIP
|
127
|
+
# @param gp_flags[Fixnum] bit-packed general purpose flags
|
128
|
+
# @param unix_permissions[Fixnum?] the permissions for the file, or nil for the default to be used
|
129
|
+
# @return [void]
|
130
|
+
def write_central_directory_file_header(io:,
|
131
|
+
local_file_header_location:,
|
132
|
+
gp_flags:,
|
133
|
+
storage_mode:,
|
134
|
+
compressed_size:,
|
135
|
+
uncompressed_size:,
|
136
|
+
mtime:,
|
137
|
+
crc32:,
|
138
|
+
filename:,
|
139
|
+
unix_permissions: nil)
|
140
|
+
# At this point if the header begins somewhere beyound 0xFFFFFFFF we _have_ to record the offset
|
141
|
+
# of the local file header as a zip64 extra field, so we give up, give in, you loose, love will always win...
|
142
|
+
add_zip64 = (local_file_header_location > FOUR_BYTE_MAX_UINT) ||
|
143
|
+
(compressed_size > FOUR_BYTE_MAX_UINT) || (uncompressed_size > FOUR_BYTE_MAX_UINT)
|
144
|
+
|
145
|
+
io << [0x02014b50].pack(C_UINT4) # central file header signature 4 bytes (0x02014b50)
|
146
|
+
io << MADE_BY_SIGNATURE # version made by 2 bytes
|
147
|
+
io << if add_zip64
|
148
|
+
[VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_UINT2) # version needed to extract 2 bytes
|
149
|
+
else
|
150
|
+
[VERSION_NEEDED_TO_EXTRACT].pack(C_UINT2) # version needed to extract 2 bytes
|
151
|
+
end
|
152
|
+
|
153
|
+
io << [gp_flags].pack(C_UINT2) # general purpose bit flag 2 bytes
|
154
|
+
io << [storage_mode].pack(C_UINT2) # compression method 2 bytes
|
155
|
+
io << [to_binary_dos_time(mtime)].pack(C_UINT2) # last mod file time 2 bytes
|
156
|
+
io << [to_binary_dos_date(mtime)].pack(C_UINT2) # last mod file date 2 bytes
|
157
|
+
io << [crc32].pack(C_UINT4) # crc-32 4 bytes
|
158
|
+
|
159
|
+
if add_zip64
|
160
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # compressed size 4 bytes
|
161
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # uncompressed size 4 bytes
|
162
|
+
else
|
163
|
+
io << [compressed_size].pack(C_UINT4) # compressed size 4 bytes
|
164
|
+
io << [uncompressed_size].pack(C_UINT4) # uncompressed size 4 bytes
|
165
|
+
end
|
166
|
+
|
167
|
+
# Filename should not be longer than 0xFFFF otherwise this wont fit here
|
168
|
+
io << [filename.bytesize].pack(C_UINT2) # file name length 2 bytes
|
169
|
+
|
170
|
+
extra_fields = StringIO.new
|
171
|
+
if add_zip64
|
172
|
+
extra_fields << zip_64_extra_for_central_directory_file_header(local_file_header_location: local_file_header_location,
|
173
|
+
compressed_size: compressed_size,
|
174
|
+
uncompressed_size: uncompressed_size)
|
175
|
+
end
|
176
|
+
extra_fields << timestamp_extra_for_central_directory_entry(mtime)
|
177
|
+
|
178
|
+
io << [extra_fields.size].pack(C_UINT2) # extra field length 2 bytes
|
179
|
+
|
180
|
+
io << [0].pack(C_UINT2) # file comment length 2 bytes
|
181
|
+
|
182
|
+
# For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used
|
183
|
+
# because otherwise it does not properly advance the pointer when reading the Zip64 extra field
|
184
|
+
# https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff
|
185
|
+
io << if add_zip64 # disk number start 2 bytes
|
186
|
+
[TWO_BYTE_MAX_UINT].pack(C_UINT2)
|
187
|
+
else
|
188
|
+
[0].pack(C_UINT2)
|
189
|
+
end
|
190
|
+
io << [0].pack(C_UINT2) # internal file attributes 2 bytes
|
191
|
+
|
192
|
+
# Because the add_empty_directory method will create a directory with a trailing "/",
|
193
|
+
# this check can be used to assign proper permissions to the created directory.
|
194
|
+
# external file attributes 4 bytes
|
195
|
+
external_attrs = if filename.end_with?("/")
|
196
|
+
unix_permissions ||= DEFAULT_DIRECTORY_UNIX_PERMISSIONS
|
197
|
+
generate_external_attrs(unix_permissions, FILE_TYPE_DIRECTORY)
|
198
|
+
else
|
199
|
+
unix_permissions ||= DEFAULT_FILE_UNIX_PERMISSIONS
|
200
|
+
generate_external_attrs(unix_permissions, FILE_TYPE_FILE)
|
201
|
+
end
|
202
|
+
io << [external_attrs].pack(C_UINT4)
|
203
|
+
|
204
|
+
io << if add_zip64 # relative offset of local header 4 bytes
|
205
|
+
[FOUR_BYTE_MAX_UINT].pack(C_UINT4)
|
206
|
+
else
|
207
|
+
[local_file_header_location].pack(C_UINT4)
|
208
|
+
end
|
209
|
+
|
210
|
+
io << filename # file name (variable size)
|
211
|
+
io << extra_fields.string # extra field (variable size)
|
212
|
+
# (empty) # file comment (variable size)
|
213
|
+
end
|
214
|
+
|
215
|
+
# Writes the data descriptor following the file data for a file whose local file header
|
216
|
+
# was written with general-purpose flag bit 3 set. If the one of the sizes exceeds the Zip64 threshold,
|
217
|
+
# the data descriptor will have the sizes written out as 8-byte values instead of 4-byte values.
|
218
|
+
#
|
219
|
+
# @param io[#<<] the buffer to write the local file header to
|
220
|
+
# @param crc32[Fixnum] The CRC32 checksum of the file
|
221
|
+
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
222
|
+
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
223
|
+
# @return [void]
|
224
|
+
def write_data_descriptor(io:, compressed_size:, uncompressed_size:, crc32:)
|
225
|
+
io << [0x08074b50].pack(C_UINT4) # Although not originally assigned a signature, the value
|
226
|
+
# 0x08074b50 has commonly been adopted as a signature value
|
227
|
+
# for the data descriptor record.
|
228
|
+
io << [crc32].pack(C_UINT4) # crc-32 4 bytes
|
229
|
+
|
230
|
+
# If one of the sizes is above 0xFFFFFFF use ZIP64 lengths (8 bytes) instead. A good unarchiver
|
231
|
+
# will decide to unpack it as such if it finds the Zip64 extra for the file in the central directory.
|
232
|
+
# So also use the opportune moment to switch the entry to Zip64 if needed
|
233
|
+
requires_zip64 = compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT
|
234
|
+
pack_spec = requires_zip64 ? C_UINT8 : C_UINT4
|
235
|
+
|
236
|
+
io << [compressed_size].pack(pack_spec) # compressed size 4 bytes, or 8 bytes for ZIP64
|
237
|
+
io << [uncompressed_size].pack(pack_spec) # uncompressed size 4 bytes, or 8 bytes for ZIP64
|
238
|
+
end
|
239
|
+
|
240
|
+
# Writes the "end of central directory record" (including the Zip6 salient bits if necessary)
|
241
|
+
#
|
242
|
+
# @param io[#<<] the buffer to write the central directory to.
|
243
|
+
# @param start_of_central_directory_location[Fixnum] byte offset of the start of central directory form the beginning of ZIP file
|
244
|
+
# @param central_directory_size[Fixnum] the size of the central directory (only file headers) in bytes
|
245
|
+
# @param num_files_in_archive[Fixnum] How many files the archive contains
|
246
|
+
# @param comment[String] the comment for the archive (defaults to ZIP_TRICKS_COMMENT)
|
247
|
+
# @return [void]
|
248
|
+
def write_end_of_central_directory(io:, start_of_central_directory_location:, central_directory_size:, num_files_in_archive:, comment: ZIP_TRICKS_COMMENT)
|
249
|
+
zip64_eocdr_offset = start_of_central_directory_location + central_directory_size
|
250
|
+
|
251
|
+
zip64_required = central_directory_size > FOUR_BYTE_MAX_UINT ||
|
252
|
+
start_of_central_directory_location > FOUR_BYTE_MAX_UINT ||
|
253
|
+
zip64_eocdr_offset > FOUR_BYTE_MAX_UINT ||
|
254
|
+
num_files_in_archive > TWO_BYTE_MAX_UINT
|
255
|
+
|
256
|
+
# Then, if zip64 is used
|
257
|
+
if zip64_required
|
258
|
+
# [zip64 end of central directory record]
|
259
|
+
# zip64 end of central dir
|
260
|
+
io << [0x06064b50].pack(C_UINT4) # signature 4 bytes (0x06064b50)
|
261
|
+
io << [44].pack(C_UINT8) # size of zip64 end of central
|
262
|
+
# directory record 8 bytes
|
263
|
+
# (this is ex. the 12 bytes of the signature and the size value itself).
|
264
|
+
# Without the extensible data sector (which we are not using)
|
265
|
+
# it is always 44 bytes.
|
266
|
+
io << MADE_BY_SIGNATURE # version made by 2 bytes
|
267
|
+
io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_UINT2) # version needed to extract 2 bytes
|
268
|
+
io << [0].pack(C_UINT4) # number of this disk 4 bytes
|
269
|
+
io << [0].pack(C_UINT4) # number of the disk with the
|
270
|
+
# start of the central directory 4 bytes
|
271
|
+
io << [num_files_in_archive].pack(C_UINT8) # total number of entries in the
|
272
|
+
# central directory on this disk 8 bytes
|
273
|
+
io << [num_files_in_archive].pack(C_UINT8) # total number of entries in the
|
274
|
+
# central directory 8 bytes
|
275
|
+
io << [central_directory_size].pack(C_UINT8) # size of the central directory 8 bytes
|
276
|
+
# offset of start of central
|
277
|
+
# directory with respect to
|
278
|
+
io << [start_of_central_directory_location].pack(C_UINT8) # the starting disk number 8 bytes
|
279
|
+
# zip64 extensible data sector (variable size), blank for us
|
280
|
+
|
281
|
+
# [zip64 end of central directory locator]
|
282
|
+
io << [0x07064b50].pack(C_UINT4) # zip64 end of central dir locator
|
283
|
+
# signature 4 bytes (0x07064b50)
|
284
|
+
io << [0].pack(C_UINT4) # number of the disk with the
|
285
|
+
# start of the zip64 end of
|
286
|
+
# central directory 4 bytes
|
287
|
+
io << [zip64_eocdr_offset].pack(C_UINT8) # relative offset of the zip64
|
288
|
+
# end of central directory record 8 bytes
|
289
|
+
# (note: "relative" is actually "from the start of the file")
|
290
|
+
io << [1].pack(C_UINT4) # total number of disks 4 bytes
|
291
|
+
end
|
292
|
+
|
293
|
+
# Then the end of central directory record:
|
294
|
+
io << [0x06054b50].pack(C_UINT4) # end of central dir signature 4 bytes (0x06054b50)
|
295
|
+
io << [0].pack(C_UINT2) # number of this disk 2 bytes
|
296
|
+
io << [0].pack(C_UINT2) # number of the disk with the
|
297
|
+
# start of the central directory 2 bytes
|
298
|
+
|
299
|
+
if zip64_required # the number of entries will be read from the zip64 part of the central directory
|
300
|
+
io << [TWO_BYTE_MAX_UINT].pack(C_UINT2) # total number of entries in the
|
301
|
+
# central directory on this disk 2 bytes
|
302
|
+
io << [TWO_BYTE_MAX_UINT].pack(C_UINT2) # total number of entries in
|
303
|
+
# the central directory 2 bytes
|
304
|
+
else
|
305
|
+
io << [num_files_in_archive].pack(C_UINT2) # total number of entries in the
|
306
|
+
# central directory on this disk 2 bytes
|
307
|
+
io << [num_files_in_archive].pack(C_UINT2) # total number of entries in
|
308
|
+
# the central directory 2 bytes
|
309
|
+
end
|
310
|
+
|
311
|
+
if zip64_required
|
312
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # size of the central directory 4 bytes
|
313
|
+
io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # offset of start of central
|
314
|
+
# directory with respect to
|
315
|
+
# the starting disk number 4 bytes
|
316
|
+
else
|
317
|
+
io << [central_directory_size].pack(C_UINT4) # size of the central directory 4 bytes
|
318
|
+
io << [start_of_central_directory_location].pack(C_UINT4) # offset of start of central
|
319
|
+
# directory with respect to
|
320
|
+
# the starting disk number 4 bytes
|
321
|
+
end
|
322
|
+
io << [comment.bytesize].pack(C_UINT2) # .ZIP file comment length 2 bytes
|
323
|
+
io << comment # .ZIP file comment (variable size)
|
324
|
+
end
|
325
|
+
|
326
|
+
private
|
327
|
+
|
328
|
+
# Writes the Zip64 extra field for the local file header. Will be used by `write_local_file_header` when any sizes given to it warrant that.
|
329
|
+
#
|
330
|
+
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
331
|
+
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
332
|
+
# @return [String]
|
333
|
+
def zip_64_extra_for_local_file_header(compressed_size:, uncompressed_size:)
|
334
|
+
data_and_packspecs = [
|
335
|
+
0x0001, C_UINT2, # 2 bytes Tag for this "extra" block type
|
336
|
+
16, C_UINT2, # 2 bytes Size of this "extra" block. For us it will always be 16 (2x8)
|
337
|
+
uncompressed_size, C_UINT8, # 8 bytes Original uncompressed file size
|
338
|
+
compressed_size, C_UINT8 # 8 bytes Size of compressed data
|
339
|
+
]
|
340
|
+
pack_array(data_and_packspecs)
|
341
|
+
end
|
342
|
+
|
343
|
+
# Writes the extended timestamp information field for local headers.
|
344
|
+
#
|
345
|
+
# The spec defines 2
|
346
|
+
# different formats - the one for the local file header can also accomodate the
|
347
|
+
# atime and ctime, whereas the one for the central directory can only take
|
348
|
+
# the mtime - and refers the reader to the local header extra to obtain the
|
349
|
+
# remaining times
|
350
|
+
def timestamp_extra_for_local_file_header(mtime)
|
351
|
+
# Local-header version:
|
352
|
+
#
|
353
|
+
# Value Size Description
|
354
|
+
# ----- ---- -----------
|
355
|
+
# (time) 0x5455 Short tag for this extra block type ("UT")
|
356
|
+
# TSize Short total data size for this block
|
357
|
+
# Flags Byte info bits
|
358
|
+
# (ModTime) Long time of last modification (UTC/GMT)
|
359
|
+
# (AcTime) Long time of last access (UTC/GMT)
|
360
|
+
# (CrTime) Long time of original creation (UTC/GMT)
|
361
|
+
#
|
362
|
+
# Central-header version:
|
363
|
+
#
|
364
|
+
# Value Size Description
|
365
|
+
# ----- ---- -----------
|
366
|
+
# (time) 0x5455 Short tag for this extra block type ("UT")
|
367
|
+
# TSize Short total data size for this block
|
368
|
+
# Flags Byte info bits (refers to local header!)
|
369
|
+
# (ModTime) Long time of last modification (UTC/GMT)
|
370
|
+
#
|
371
|
+
# The lower three bits of Flags in both headers indicate which time-
|
372
|
+
# stamps are present in the LOCAL extra field:
|
373
|
+
#
|
374
|
+
# bit 0 if set, modification time is present
|
375
|
+
# bit 1 if set, access time is present
|
376
|
+
# bit 2 if set, creation time is present
|
377
|
+
# bits 3-7 reserved for additional timestamps; not set
|
378
|
+
flags = 0b00000001 # Set the lowest bit only, to indicate that only mtime is present
|
379
|
+
data_and_packspecs = [
|
380
|
+
0x5455, C_UINT2, # tag for this extra block type ("UT")
|
381
|
+
(1 + 4), C_UINT2, # the size of this block (1 byte used for the Flag + 3 longs used for the timestamp)
|
382
|
+
flags, C_CHAR, # encode a single byte
|
383
|
+
mtime.utc.to_i, C_INT4 # Use a signed int, not the unsigned one used by the rest of the ZIP spec.
|
384
|
+
]
|
385
|
+
# The atime and ctime can be omitted if not present
|
386
|
+
pack_array(data_and_packspecs)
|
387
|
+
end
|
388
|
+
|
389
|
+
# Since we do not supply atime or ctime, the contents of the two extra fields (central dir and local header)
|
390
|
+
# is exactly the same, so we can use a method alias.
|
391
|
+
alias_method :timestamp_extra_for_central_directory_entry, :timestamp_extra_for_local_file_header
|
392
|
+
|
393
|
+
# Writes the Zip64 extra field for the central directory header.It differs from the extra used in the local file header because it
|
394
|
+
# also contains the location of the local file header in the ZIP as an 8-byte int.
|
395
|
+
#
|
396
|
+
# @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
|
397
|
+
# @param uncompressed_size[Fixnum] The size of the file once extracted
|
398
|
+
# @param local_file_header_location[Fixnum] Byte offset of the start of the local file header from the beginning of the ZIP archive
|
399
|
+
# @return [String]
|
400
|
+
def zip_64_extra_for_central_directory_file_header(compressed_size:, uncompressed_size:, local_file_header_location:)
|
401
|
+
data_and_packspecs = [
|
402
|
+
0x0001, C_UINT2, # 2 bytes Tag for this "extra" block type
|
403
|
+
28, C_UINT2, # 2 bytes Size of this "extra" block. For us it will always be 28
|
404
|
+
uncompressed_size, C_UINT8, # 8 bytes Original uncompressed file size
|
405
|
+
compressed_size, C_UINT8, # 8 bytes Size of compressed data
|
406
|
+
local_file_header_location, C_UINT8, # 8 bytes Offset of local header record
|
407
|
+
0, C_UINT4 # 4 bytes Number of the disk on which this file starts
|
408
|
+
]
|
409
|
+
pack_array(data_and_packspecs)
|
410
|
+
end
|
411
|
+
|
412
|
+
def to_binary_dos_time(t)
|
413
|
+
(t.sec / 2) + (t.min << 5) + (t.hour << 11)
|
414
|
+
end
|
415
|
+
|
416
|
+
def to_binary_dos_date(t)
|
417
|
+
t.day + (t.month << 5) + ((t.year - 1980) << 9)
|
418
|
+
end
|
419
|
+
|
420
|
+
# Unzips a given array of tuples of "numeric value, pack specifier" and then packs all the odd
|
421
|
+
# values using specifiers from all the even values. It is harder to explain than to show:
|
422
|
+
#
|
423
|
+
# pack_array([1, 'V', 2, 'v', 148, 'v]) #=> "\x01\x00\x00\x00\x02\x00\x94\x00"
|
424
|
+
#
|
425
|
+
# will do the following two transforms:
|
426
|
+
#
|
427
|
+
# [1, 'V', 2, 'v', 148, 'v] -> [1,2,148], ['V','v','v'] -> [1,2,148].pack('Vvv') -> "\x01\x00\x00\x00\x02\x00\x94\x00"
|
428
|
+
def pack_array(values_to_packspecs)
|
429
|
+
values, packspecs = values_to_packspecs.partition.each_with_index { |_, i| i.even? }
|
430
|
+
values.pack(packspecs.join)
|
431
|
+
end
|
432
|
+
|
433
|
+
def generate_external_attrs(unix_permissions_int, file_type_int)
|
434
|
+
(file_type_int << 12 | (unix_permissions_int & 0o7777)) << 16
|
435
|
+
end
|
436
|
+
end
|
data/lib/zip_kit.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ZipKit
|
4
|
+
autoload :OutputEnumerator, File.dirname(__FILE__) + "/zip_kit/rack_body.rb"
|
5
|
+
autoload :RailsStreaming, File.dirname(__FILE__) + "/zip_kit/rails_streaming.rb"
|
6
|
+
autoload :ZipWriter, File.dirname(__FILE__) + "/zip_kit/zip_writer.rb"
|
7
|
+
autoload :RemoteIO, File.dirname(__FILE__) + "/zip_kit/remote_io.rb"
|
8
|
+
autoload :NullWriter, File.dirname(__FILE__) + "/zip_kit/null_writer.rb"
|
9
|
+
autoload :OutputEnumerator, File.dirname(__FILE__) + "/zip_kit/output_enumerator.rb"
|
10
|
+
autoload :BlockDeflate, File.dirname(__FILE__) + "/zip_kit/block_deflate.rb"
|
11
|
+
autoload :WriteAndTell, File.dirname(__FILE__) + "/zip_kit/write_and_tell.rb"
|
12
|
+
autoload :RemoteUncap, File.dirname(__FILE__) + "/zip_kit/remote_uncap.rb"
|
13
|
+
autoload :FileReader, File.dirname(__FILE__) + "/zip_kit/file_reader.rb"
|
14
|
+
autoload :UniquifyFilename, File.dirname(__FILE__) + "/zip_kit/uniquify_filename.rb"
|
15
|
+
autoload :SizeEstimator, File.dirname(__FILE__) + "/zip_kit/size_estimator.rb"
|
16
|
+
autoload :Streamer, File.dirname(__FILE__) + "/zip_kit/streamer.rb"
|
17
|
+
autoload :PathSet, File.dirname(__FILE__) + "/zip_kit/path_set.rb"
|
18
|
+
autoload :StreamCRC32, File.dirname(__FILE__) + "/zip_kit/stream_crc32.rb"
|
19
|
+
autoload :BlockWrite, File.dirname(__FILE__) + "/zip_kit/block_write.rb"
|
20
|
+
autoload :WriteBuffer, File.dirname(__FILE__) + "/zip_kit/write_buffer.rb"
|
21
|
+
autoload :WriteShovel, File.dirname(__FILE__) + "/zip_kit/write_shovel.rb"
|
22
|
+
autoload :RackChunkedBody, File.dirname(__FILE__) + "/zip_kit/rack_chunked_body.rb"
|
23
|
+
autoload :RackTempfileBody, File.dirname(__FILE__) + "/zip_kit/rack_tempfile_body.rb"
|
24
|
+
end
|
data/zip_kit.gemspec
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require "zip_kit/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "zip_kit"
|
7
|
+
spec.version = ZipKit::VERSION
|
8
|
+
spec.authors = ["Julik Tarkhanov", "Noah Berman", "Dmitry Tymchuk", "David Bosveld", "Felix Bünemann"]
|
9
|
+
spec.email = ["me@julik.nl"]
|
10
|
+
spec.required_ruby_version = ">= 2.6.0"
|
11
|
+
|
12
|
+
spec.summary = "Stream out ZIP files from Ruby"
|
13
|
+
spec.description = "Stream out ZIP files from Ruby"
|
14
|
+
spec.homepage = "https://github.com/julik/zip_kit"
|
15
|
+
|
16
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
18
|
+
f.match(%r{^(test|spec|features|gemfiles)/})
|
19
|
+
end
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler"
|
25
|
+
spec.add_development_dependency "rubyzip", "~> 1"
|
26
|
+
|
27
|
+
spec.add_development_dependency "rack" # For tests where we spin up a server
|
28
|
+
spec.add_development_dependency "rake", "~> 12.2"
|
29
|
+
spec.add_development_dependency "rspec", "~> 3"
|
30
|
+
spec.add_development_dependency "rspec-mocks", "~> 3.10", ">= 3.10.2" # ruby 3 compatibility
|
31
|
+
spec.add_development_dependency "complexity_assert"
|
32
|
+
spec.add_development_dependency "coderay"
|
33
|
+
spec.add_development_dependency "benchmark-ips"
|
34
|
+
spec.add_development_dependency "allocation_stats", "~> 0.1.5"
|
35
|
+
spec.add_development_dependency "yard", "~> 0.9"
|
36
|
+
spec.add_development_dependency "standard", "1.28.5" # Very specific version of standard for 2.6 with _known_ settings
|
37
|
+
spec.add_development_dependency "magic_frozen_string_literal"
|
38
|
+
spec.add_development_dependency "puma"
|
39
|
+
spec.add_development_dependency "actionpack", "~> 5" # For testing RailsStreaming against an actual Rails controller
|
40
|
+
spec.add_development_dependency "nokogiri", "~> 1", ">= 1.13" # Rails 5 does by mistake use an older Nokogiri otherwise
|
41
|
+
end
|