zip_kit 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +7 -0
  3. data/.document +5 -0
  4. data/.github/workflows/ci.yml +29 -0
  5. data/.gitignore +61 -0
  6. data/.rspec +1 -0
  7. data/.standard.yml +8 -0
  8. data/.yardopts +1 -0
  9. data/CHANGELOG.md +255 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +153 -0
  12. data/Gemfile +4 -0
  13. data/IMPLEMENTATION_DETAILS.md +97 -0
  14. data/LICENSE.txt +20 -0
  15. data/README.md +234 -0
  16. data/Rakefile +21 -0
  17. data/bench/buffered_crc32_bench.rb +109 -0
  18. data/examples/archive_size_estimate.rb +15 -0
  19. data/examples/config.ru +7 -0
  20. data/examples/deferred_write.rb +58 -0
  21. data/examples/parallel_compression_with_block_deflate.rb +86 -0
  22. data/examples/rack_application.rb +63 -0
  23. data/examples/s3_upload.rb +23 -0
  24. data/lib/zip_kit/block_deflate.rb +130 -0
  25. data/lib/zip_kit/block_write.rb +47 -0
  26. data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
  27. data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
  28. data/lib/zip_kit/file_reader.rb +740 -0
  29. data/lib/zip_kit/null_writer.rb +12 -0
  30. data/lib/zip_kit/output_enumerator.rb +150 -0
  31. data/lib/zip_kit/path_set.rb +163 -0
  32. data/lib/zip_kit/rack_chunked_body.rb +32 -0
  33. data/lib/zip_kit/rack_tempfile_body.rb +61 -0
  34. data/lib/zip_kit/rails_streaming.rb +37 -0
  35. data/lib/zip_kit/remote_io.rb +114 -0
  36. data/lib/zip_kit/remote_uncap.rb +22 -0
  37. data/lib/zip_kit/size_estimator.rb +84 -0
  38. data/lib/zip_kit/stream_crc32.rb +60 -0
  39. data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
  40. data/lib/zip_kit/streamer/entry.rb +37 -0
  41. data/lib/zip_kit/streamer/filler.rb +9 -0
  42. data/lib/zip_kit/streamer/heuristic.rb +68 -0
  43. data/lib/zip_kit/streamer/stored_writer.rb +39 -0
  44. data/lib/zip_kit/streamer/writable.rb +36 -0
  45. data/lib/zip_kit/streamer.rb +614 -0
  46. data/lib/zip_kit/uniquify_filename.rb +39 -0
  47. data/lib/zip_kit/version.rb +5 -0
  48. data/lib/zip_kit/write_and_tell.rb +40 -0
  49. data/lib/zip_kit/write_buffer.rb +71 -0
  50. data/lib/zip_kit/write_shovel.rb +22 -0
  51. data/lib/zip_kit/zip_writer.rb +436 -0
  52. data/lib/zip_kit.rb +24 -0
  53. data/zip_kit.gemspec +41 -0
  54. metadata +335 -0
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A tiny wrapper over any object that supports :<<.
4
+ # Adds :tell and :advance_position_by. This is needed for write destinations
5
+ # which do not respond to `#pos` or `#tell`. A lot of ZIP archive format parts
6
+ # include "offsets in archive" - a byte offset from the start of file. Keeping
7
+ # track of this value is what this object will do. It also allows "advancing"
8
+ # this value if data gets written using a bypass (such as `IO#sendfile`)
9
+ class ZipKit::WriteAndTell
10
+ include ZipKit::WriteShovel
11
+
12
+ def initialize(io)
13
+ @io = io
14
+ @pos = 0
15
+ # Some objects (such as ActionController::Live `stream` object) cannot be "pushed" into
16
+ # using the :<< operator, but only support `write`. For ease we add a small shim in that case instead of having
17
+ # the user abstract it themselves.
18
+ @use_write = !io.respond_to?(:<<)
19
+ end
20
+
21
+ def <<(bytes)
22
+ return self if bytes.nil?
23
+ if @use_write
24
+ @io.write(bytes.b)
25
+ else
26
+ @io << bytes.b
27
+ end
28
+
29
+ @pos += bytes.bytesize
30
+ self
31
+ end
32
+
33
+ def advance_position_by(num_bytes)
34
+ @pos += num_bytes
35
+ end
36
+
37
+ def tell
38
+ @pos
39
+ end
40
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Some operations (such as CRC32) benefit when they are performed
4
+ # on larger chunks of data. In certain use cases, it is possible that
5
+ # the consumer of ZipKit is going to be writing small chunks
6
+ # in rapid succession, so CRC32 is going to have to perform a lot of
7
+ # CRC32 combine operations - and this adds up. Since the CRC32 value
8
+ # is usually not needed until the complete output has completed
9
+ # we can buffer at least some amount of data before computing CRC32 over it.
10
+ # We also use this buffer for output via Rack, where some amount of buffering
11
+ # helps reduce the number of syscalls made by the webserver. ZipKit performs
12
+ # lots of very small writes, and some degree of speedup (about 20%) can be achieved
13
+ # with a buffer of a few KB.
14
+ #
15
+ # Note that there is no guarantee that the write buffer is going to flush at or above
16
+ # the given `buffer_size`, because for writes which exceed the buffer size it will
17
+ # first `flush` and then write through the oversized chunk, without buffering it. This
18
+ # helps conserve memory. Also note that the buffer will *not* duplicate strings for you
19
+ # and *will* yield the same buffer String over and over, so if you are storing it in an
20
+ # Array you might need to duplicate it.
21
+ #
22
+ # Note also that the WriteBuffer assumes that the object it `<<`-writes into is going
23
+ # to **consume** in some way the string that it passes in. After the `<<` method returns,
24
+ # the WriteBuffer will be cleared, and it passes the same String reference on every call
25
+ # to `<<`. Therefore, if you need to retain the output of the WriteBuffer in, say, an Array,
26
+ # you might need to `.dup` the `String` it gives you.
27
+ class ZipKit::WriteBuffer
28
+ # Creates a new WriteBuffer bypassing into a given writable object
29
+ #
30
+ # @param writable[#<<] An object that responds to `#<<` with a String as argument
31
+ # @param buffer_size[Integer] How many bytes to buffer
32
+ def initialize(writable, buffer_size)
33
+ # Allocating the buffer using a zero-padded String as a variation
34
+ # on using capacity:, which JRuby apparently does not like very much. The
35
+ # desire here is that the buffer doesn't have to be resized during the lifetime
36
+ # of the object.
37
+ @buf = ("\0".b * (buffer_size * 2)).clear
38
+ @buffer_size = buffer_size
39
+ @writable = writable
40
+ end
41
+
42
+ # Appends the given data to the write buffer, and flushes the buffer into the
43
+ # writable if the buffer size exceeds the `buffer_size` given at initialization
44
+ #
45
+ # @param data[String] data to be written
46
+ # @return self
47
+ def <<(data)
48
+ if data.bytesize >= @buffer_size
49
+ flush unless @buf.empty? # <- this is were we can output less than @buffer_size
50
+ @writable << data
51
+ else
52
+ @buf << data
53
+ flush if @buf.bytesize >= @buffer_size
54
+ end
55
+ self
56
+ end
57
+
58
+ # Explicitly flushes the buffer if it contains anything
59
+ #
60
+ # @return self
61
+ def flush
62
+ unless @buf.empty?
63
+ @writable << @buf
64
+ @buf.clear
65
+ end
66
+ self
67
+ end
68
+
69
+ # `flush!` was renamed to `flush` but we preserve this method for backwards compatibility
70
+ alias_method :flush!, :flush
71
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A lot of objects in ZipKit accept bytes that may be sent
4
+ # to the `<<` operator (the "shovel" operator). This is in the tradition
5
+ # of late Jim Weirich and his Builder gem. In [this presentation](https://youtu.be/1BVFlvRPZVM?t=2403)
6
+ # he justifies this design very eloquently. In ZipKit we follow this example.
7
+ # However, there is a number of methods in Ruby - including the standard library -
8
+ # which expect your object to implement the `write` method instead. Since the `write`
9
+ # method can be expressed in terms of the `<<` method, why not allow all ZipKit
10
+ # "IO-ish" things to also respond to `write`? This is what this module does.
11
+ # Jim would be proud. We miss you, Jim.
12
+ module ZipKit::WriteShovel
13
+ # Writes the given data to the output stream. Allows the object to be used as
14
+ # a target for `IO.copy_stream(from, to)`
15
+ #
16
+ # @param d[String] the binary string to write (part of the uncompressed file)
17
+ # @return [Fixnum] the number of bytes written
18
+ def write(bytes)
19
+ self << bytes
20
+ bytes.bytesize
21
+ end
22
+ end
@@ -0,0 +1,436 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A low-level ZIP file data writer. You can use it to write out various headers and central directory elements
4
+ # separately. The class handles the actual encoding of the data according to the ZIP format APPNOTE document.
5
+ #
6
+ # The primary reason the writer is a separate object is because it is kept stateless. That is, all the data that
7
+ # is needed for writing a piece of the ZIP (say, the EOCD record, or a data descriptor) can be written
8
+ # without depending on data available elsewhere. This makes the writer very easy to test, since each of
9
+ # it's methods outputs something that only depends on the method's arguments. For example, we use this
10
+ # to test writing Zip64 files which, when tested in a streaming fashion, would need tricky IO stubs
11
+ # to wind IO objects back and forth by large offsets. Instead, we can just write out the EOCD record
12
+ # with given offsets as arguments.
13
+ #
14
+ # Since some methods need a lot of data about the entity being written, everything is passed via
15
+ # keyword arguments - this way it is much less likely that you can make a mistake writing something.
16
+ #
17
+ # Another reason for having a separate Writer is that most ZIP libraries attach the methods for
18
+ # writing out the file headers to some sort of Entry object, which represents a file within the ZIP.
19
+ # However, when you are diagnosing issues with the ZIP files you produce, you actually want to have
20
+ # absolute _most_ of the code responsible for writing the actual encoded bytes available to you on
21
+ # one screen. Altering or checking that code then becomes much, much easier. The methods doing the
22
+ # writing are also intentionally left very verbose - so that you can follow what is happening at
23
+ # all times.
24
+ #
25
+ # All methods of the writer accept anything that responds to `<<` as `io` argument - you can use
26
+ # that to output to String objects, or to output to Arrays that you can later join together.
27
+ class ZipKit::ZipWriter
28
+ FOUR_BYTE_MAX_UINT = 0xFFFFFFFF
29
+ TWO_BYTE_MAX_UINT = 0xFFFF
30
+ ZIP_TRICKS_COMMENT = "Written using ZipKit %<version>s" % {version: ZipKit::VERSION}
31
+ VERSION_MADE_BY = 52
32
+ VERSION_NEEDED_TO_EXTRACT = 20
33
+ VERSION_NEEDED_TO_EXTRACT_ZIP64 = 45
34
+ DEFAULT_FILE_UNIX_PERMISSIONS = 0o644
35
+ DEFAULT_DIRECTORY_UNIX_PERMISSIONS = 0o755
36
+ FILE_TYPE_FILE = 0o10
37
+ FILE_TYPE_DIRECTORY = 0o04
38
+ MADE_BY_SIGNATURE = begin
39
+ # A combination of the VERSION_MADE_BY low byte and the OS type high byte
40
+ os_type = 3 # UNIX
41
+ [VERSION_MADE_BY, os_type].pack("CC")
42
+ end
43
+
44
+ C_UINT4 = "V" # Encode a 4-byte unsigned little-endian uint
45
+ C_UINT2 = "v" # Encode a 2-byte unsigned little-endian uint
46
+ C_UINT8 = "Q<" # Encode an 8-byte unsigned little-endian uint
47
+ C_CHAR = "C" # For bit-encoded strings
48
+ C_INT4 = "l<" # Encode a 4-byte signed little-endian int
49
+
50
+ private_constant :FOUR_BYTE_MAX_UINT,
51
+ :TWO_BYTE_MAX_UINT,
52
+ :VERSION_MADE_BY,
53
+ :VERSION_NEEDED_TO_EXTRACT,
54
+ :VERSION_NEEDED_TO_EXTRACT_ZIP64,
55
+ :FILE_TYPE_FILE,
56
+ :FILE_TYPE_DIRECTORY,
57
+ :MADE_BY_SIGNATURE,
58
+ :C_UINT4,
59
+ :C_UINT2,
60
+ :C_UINT8,
61
+ :ZIP_TRICKS_COMMENT
62
+
63
+ # Writes the local file header, that precedes the actual file _data_.
64
+ #
65
+ # @param io[#<<] the buffer to write the local file header to
66
+ # @param filename[String] the name of the file in the archive
67
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
68
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
69
+ # @param crc32[Fixnum] The CRC32 checksum of the file
70
+ # @param mtime[Time] the modification time to be recorded in the ZIP
71
+ # @param gp_flags[Fixnum] bit-packed general purpose flags
72
+ # @param storage_mode[Fixnum] 8 for deflated, 0 for stored...
73
+ # @return [void]
74
+ def write_local_file_header(io:, filename:, compressed_size:, uncompressed_size:, crc32:, gp_flags:, mtime:, storage_mode:)
75
+ requires_zip64 = compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT
76
+
77
+ io << [0x04034b50].pack(C_UINT4) # local file header signature 4 bytes (0x04034b50)
78
+ io << if requires_zip64 # version needed to extract 2 bytes
79
+ [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_UINT2)
80
+ else
81
+ [VERSION_NEEDED_TO_EXTRACT].pack(C_UINT2)
82
+ end
83
+
84
+ io << [gp_flags].pack(C_UINT2) # general purpose bit flag 2 bytes
85
+ io << [storage_mode].pack(C_UINT2) # compression method 2 bytes
86
+ io << [to_binary_dos_time(mtime)].pack(C_UINT2) # last mod file time 2 bytes
87
+ io << [to_binary_dos_date(mtime)].pack(C_UINT2) # last mod file date 2 bytes
88
+ io << [crc32].pack(C_UINT4) # crc-32 4 bytes
89
+
90
+ if requires_zip64
91
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # compressed size 4 bytes
92
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # uncompressed size 4 bytes
93
+ else
94
+ io << [compressed_size].pack(C_UINT4) # compressed size 4 bytes
95
+ io << [uncompressed_size].pack(C_UINT4) # uncompressed size 4 bytes
96
+ end
97
+
98
+ # Filename should not be longer than 0xFFFF otherwise this wont fit here
99
+ io << [filename.bytesize].pack(C_UINT2) # file name length 2 bytes
100
+
101
+ extra_fields = StringIO.new
102
+
103
+ # Interesting tidbit:
104
+ # https://social.technet.microsoft.com/Forums/windows/en-US/6a60399f-2879-4859-b7ab-6ddd08a70948
105
+ # TL;DR of it is: Windows 7 Explorer _will_ open Zip64 entries. However, it desires to have the
106
+ # Zip64 extra field as _the first_ extra field.
107
+ if requires_zip64
108
+ extra_fields << zip_64_extra_for_local_file_header(compressed_size: compressed_size, uncompressed_size: uncompressed_size)
109
+ end
110
+ extra_fields << timestamp_extra_for_local_file_header(mtime)
111
+
112
+ io << [extra_fields.size].pack(C_UINT2) # extra field length 2 bytes
113
+
114
+ io << filename # file name (variable size)
115
+ io << extra_fields.string
116
+ end
117
+
118
+ # Writes the file header for the central directory, for a particular file in the archive. When writing out this data,
119
+ # ensure that the CRC32 and both sizes (compressed/uncompressed) are correct for the entry in question.
120
+ #
121
+ # @param io[#<<] the buffer to write the local file header to
122
+ # @param filename[String] the name of the file in the archive
123
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
124
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
125
+ # @param crc32[Fixnum] The CRC32 checksum of the file
126
+ # @param mtime[Time] the modification time to be recorded in the ZIP
127
+ # @param gp_flags[Fixnum] bit-packed general purpose flags
128
+ # @param unix_permissions[Fixnum?] the permissions for the file, or nil for the default to be used
129
+ # @return [void]
130
+ def write_central_directory_file_header(io:,
131
+ local_file_header_location:,
132
+ gp_flags:,
133
+ storage_mode:,
134
+ compressed_size:,
135
+ uncompressed_size:,
136
+ mtime:,
137
+ crc32:,
138
+ filename:,
139
+ unix_permissions: nil)
140
+ # At this point if the header begins somewhere beyound 0xFFFFFFFF we _have_ to record the offset
141
+ # of the local file header as a zip64 extra field, so we give up, give in, you loose, love will always win...
142
+ add_zip64 = (local_file_header_location > FOUR_BYTE_MAX_UINT) ||
143
+ (compressed_size > FOUR_BYTE_MAX_UINT) || (uncompressed_size > FOUR_BYTE_MAX_UINT)
144
+
145
+ io << [0x02014b50].pack(C_UINT4) # central file header signature 4 bytes (0x02014b50)
146
+ io << MADE_BY_SIGNATURE # version made by 2 bytes
147
+ io << if add_zip64
148
+ [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_UINT2) # version needed to extract 2 bytes
149
+ else
150
+ [VERSION_NEEDED_TO_EXTRACT].pack(C_UINT2) # version needed to extract 2 bytes
151
+ end
152
+
153
+ io << [gp_flags].pack(C_UINT2) # general purpose bit flag 2 bytes
154
+ io << [storage_mode].pack(C_UINT2) # compression method 2 bytes
155
+ io << [to_binary_dos_time(mtime)].pack(C_UINT2) # last mod file time 2 bytes
156
+ io << [to_binary_dos_date(mtime)].pack(C_UINT2) # last mod file date 2 bytes
157
+ io << [crc32].pack(C_UINT4) # crc-32 4 bytes
158
+
159
+ if add_zip64
160
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # compressed size 4 bytes
161
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # uncompressed size 4 bytes
162
+ else
163
+ io << [compressed_size].pack(C_UINT4) # compressed size 4 bytes
164
+ io << [uncompressed_size].pack(C_UINT4) # uncompressed size 4 bytes
165
+ end
166
+
167
+ # Filename should not be longer than 0xFFFF otherwise this wont fit here
168
+ io << [filename.bytesize].pack(C_UINT2) # file name length 2 bytes
169
+
170
+ extra_fields = StringIO.new
171
+ if add_zip64
172
+ extra_fields << zip_64_extra_for_central_directory_file_header(local_file_header_location: local_file_header_location,
173
+ compressed_size: compressed_size,
174
+ uncompressed_size: uncompressed_size)
175
+ end
176
+ extra_fields << timestamp_extra_for_central_directory_entry(mtime)
177
+
178
+ io << [extra_fields.size].pack(C_UINT2) # extra field length 2 bytes
179
+
180
+ io << [0].pack(C_UINT2) # file comment length 2 bytes
181
+
182
+ # For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used
183
+ # because otherwise it does not properly advance the pointer when reading the Zip64 extra field
184
+ # https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff
185
+ io << if add_zip64 # disk number start 2 bytes
186
+ [TWO_BYTE_MAX_UINT].pack(C_UINT2)
187
+ else
188
+ [0].pack(C_UINT2)
189
+ end
190
+ io << [0].pack(C_UINT2) # internal file attributes 2 bytes
191
+
192
+ # Because the add_empty_directory method will create a directory with a trailing "/",
193
+ # this check can be used to assign proper permissions to the created directory.
194
+ # external file attributes 4 bytes
195
+ external_attrs = if filename.end_with?("/")
196
+ unix_permissions ||= DEFAULT_DIRECTORY_UNIX_PERMISSIONS
197
+ generate_external_attrs(unix_permissions, FILE_TYPE_DIRECTORY)
198
+ else
199
+ unix_permissions ||= DEFAULT_FILE_UNIX_PERMISSIONS
200
+ generate_external_attrs(unix_permissions, FILE_TYPE_FILE)
201
+ end
202
+ io << [external_attrs].pack(C_UINT4)
203
+
204
+ io << if add_zip64 # relative offset of local header 4 bytes
205
+ [FOUR_BYTE_MAX_UINT].pack(C_UINT4)
206
+ else
207
+ [local_file_header_location].pack(C_UINT4)
208
+ end
209
+
210
+ io << filename # file name (variable size)
211
+ io << extra_fields.string # extra field (variable size)
212
+ # (empty) # file comment (variable size)
213
+ end
214
+
215
+ # Writes the data descriptor following the file data for a file whose local file header
216
+ # was written with general-purpose flag bit 3 set. If the one of the sizes exceeds the Zip64 threshold,
217
+ # the data descriptor will have the sizes written out as 8-byte values instead of 4-byte values.
218
+ #
219
+ # @param io[#<<] the buffer to write the local file header to
220
+ # @param crc32[Fixnum] The CRC32 checksum of the file
221
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
222
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
223
+ # @return [void]
224
+ def write_data_descriptor(io:, compressed_size:, uncompressed_size:, crc32:)
225
+ io << [0x08074b50].pack(C_UINT4) # Although not originally assigned a signature, the value
226
+ # 0x08074b50 has commonly been adopted as a signature value
227
+ # for the data descriptor record.
228
+ io << [crc32].pack(C_UINT4) # crc-32 4 bytes
229
+
230
+ # If one of the sizes is above 0xFFFFFFF use ZIP64 lengths (8 bytes) instead. A good unarchiver
231
+ # will decide to unpack it as such if it finds the Zip64 extra for the file in the central directory.
232
+ # So also use the opportune moment to switch the entry to Zip64 if needed
233
+ requires_zip64 = compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT
234
+ pack_spec = requires_zip64 ? C_UINT8 : C_UINT4
235
+
236
+ io << [compressed_size].pack(pack_spec) # compressed size 4 bytes, or 8 bytes for ZIP64
237
+ io << [uncompressed_size].pack(pack_spec) # uncompressed size 4 bytes, or 8 bytes for ZIP64
238
+ end
239
+
240
+ # Writes the "end of central directory record" (including the Zip6 salient bits if necessary)
241
+ #
242
+ # @param io[#<<] the buffer to write the central directory to.
243
+ # @param start_of_central_directory_location[Fixnum] byte offset of the start of central directory form the beginning of ZIP file
244
+ # @param central_directory_size[Fixnum] the size of the central directory (only file headers) in bytes
245
+ # @param num_files_in_archive[Fixnum] How many files the archive contains
246
+ # @param comment[String] the comment for the archive (defaults to ZIP_TRICKS_COMMENT)
247
+ # @return [void]
248
+ def write_end_of_central_directory(io:, start_of_central_directory_location:, central_directory_size:, num_files_in_archive:, comment: ZIP_TRICKS_COMMENT)
249
+ zip64_eocdr_offset = start_of_central_directory_location + central_directory_size
250
+
251
+ zip64_required = central_directory_size > FOUR_BYTE_MAX_UINT ||
252
+ start_of_central_directory_location > FOUR_BYTE_MAX_UINT ||
253
+ zip64_eocdr_offset > FOUR_BYTE_MAX_UINT ||
254
+ num_files_in_archive > TWO_BYTE_MAX_UINT
255
+
256
+ # Then, if zip64 is used
257
+ if zip64_required
258
+ # [zip64 end of central directory record]
259
+ # zip64 end of central dir
260
+ io << [0x06064b50].pack(C_UINT4) # signature 4 bytes (0x06064b50)
261
+ io << [44].pack(C_UINT8) # size of zip64 end of central
262
+ # directory record 8 bytes
263
+ # (this is ex. the 12 bytes of the signature and the size value itself).
264
+ # Without the extensible data sector (which we are not using)
265
+ # it is always 44 bytes.
266
+ io << MADE_BY_SIGNATURE # version made by 2 bytes
267
+ io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_UINT2) # version needed to extract 2 bytes
268
+ io << [0].pack(C_UINT4) # number of this disk 4 bytes
269
+ io << [0].pack(C_UINT4) # number of the disk with the
270
+ # start of the central directory 4 bytes
271
+ io << [num_files_in_archive].pack(C_UINT8) # total number of entries in the
272
+ # central directory on this disk 8 bytes
273
+ io << [num_files_in_archive].pack(C_UINT8) # total number of entries in the
274
+ # central directory 8 bytes
275
+ io << [central_directory_size].pack(C_UINT8) # size of the central directory 8 bytes
276
+ # offset of start of central
277
+ # directory with respect to
278
+ io << [start_of_central_directory_location].pack(C_UINT8) # the starting disk number 8 bytes
279
+ # zip64 extensible data sector (variable size), blank for us
280
+
281
+ # [zip64 end of central directory locator]
282
+ io << [0x07064b50].pack(C_UINT4) # zip64 end of central dir locator
283
+ # signature 4 bytes (0x07064b50)
284
+ io << [0].pack(C_UINT4) # number of the disk with the
285
+ # start of the zip64 end of
286
+ # central directory 4 bytes
287
+ io << [zip64_eocdr_offset].pack(C_UINT8) # relative offset of the zip64
288
+ # end of central directory record 8 bytes
289
+ # (note: "relative" is actually "from the start of the file")
290
+ io << [1].pack(C_UINT4) # total number of disks 4 bytes
291
+ end
292
+
293
+ # Then the end of central directory record:
294
+ io << [0x06054b50].pack(C_UINT4) # end of central dir signature 4 bytes (0x06054b50)
295
+ io << [0].pack(C_UINT2) # number of this disk 2 bytes
296
+ io << [0].pack(C_UINT2) # number of the disk with the
297
+ # start of the central directory 2 bytes
298
+
299
+ if zip64_required # the number of entries will be read from the zip64 part of the central directory
300
+ io << [TWO_BYTE_MAX_UINT].pack(C_UINT2) # total number of entries in the
301
+ # central directory on this disk 2 bytes
302
+ io << [TWO_BYTE_MAX_UINT].pack(C_UINT2) # total number of entries in
303
+ # the central directory 2 bytes
304
+ else
305
+ io << [num_files_in_archive].pack(C_UINT2) # total number of entries in the
306
+ # central directory on this disk 2 bytes
307
+ io << [num_files_in_archive].pack(C_UINT2) # total number of entries in
308
+ # the central directory 2 bytes
309
+ end
310
+
311
+ if zip64_required
312
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # size of the central directory 4 bytes
313
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # offset of start of central
314
+ # directory with respect to
315
+ # the starting disk number 4 bytes
316
+ else
317
+ io << [central_directory_size].pack(C_UINT4) # size of the central directory 4 bytes
318
+ io << [start_of_central_directory_location].pack(C_UINT4) # offset of start of central
319
+ # directory with respect to
320
+ # the starting disk number 4 bytes
321
+ end
322
+ io << [comment.bytesize].pack(C_UINT2) # .ZIP file comment length 2 bytes
323
+ io << comment # .ZIP file comment (variable size)
324
+ end
325
+
326
+ private
327
+
328
+ # Writes the Zip64 extra field for the local file header. Will be used by `write_local_file_header` when any sizes given to it warrant that.
329
+ #
330
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
331
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
332
+ # @return [String]
333
+ def zip_64_extra_for_local_file_header(compressed_size:, uncompressed_size:)
334
+ data_and_packspecs = [
335
+ 0x0001, C_UINT2, # 2 bytes Tag for this "extra" block type
336
+ 16, C_UINT2, # 2 bytes Size of this "extra" block. For us it will always be 16 (2x8)
337
+ uncompressed_size, C_UINT8, # 8 bytes Original uncompressed file size
338
+ compressed_size, C_UINT8 # 8 bytes Size of compressed data
339
+ ]
340
+ pack_array(data_and_packspecs)
341
+ end
342
+
343
+ # Writes the extended timestamp information field for local headers.
344
+ #
345
+ # The spec defines 2
346
+ # different formats - the one for the local file header can also accomodate the
347
+ # atime and ctime, whereas the one for the central directory can only take
348
+ # the mtime - and refers the reader to the local header extra to obtain the
349
+ # remaining times
350
+ def timestamp_extra_for_local_file_header(mtime)
351
+ # Local-header version:
352
+ #
353
+ # Value Size Description
354
+ # ----- ---- -----------
355
+ # (time) 0x5455 Short tag for this extra block type ("UT")
356
+ # TSize Short total data size for this block
357
+ # Flags Byte info bits
358
+ # (ModTime) Long time of last modification (UTC/GMT)
359
+ # (AcTime) Long time of last access (UTC/GMT)
360
+ # (CrTime) Long time of original creation (UTC/GMT)
361
+ #
362
+ # Central-header version:
363
+ #
364
+ # Value Size Description
365
+ # ----- ---- -----------
366
+ # (time) 0x5455 Short tag for this extra block type ("UT")
367
+ # TSize Short total data size for this block
368
+ # Flags Byte info bits (refers to local header!)
369
+ # (ModTime) Long time of last modification (UTC/GMT)
370
+ #
371
+ # The lower three bits of Flags in both headers indicate which time-
372
+ # stamps are present in the LOCAL extra field:
373
+ #
374
+ # bit 0 if set, modification time is present
375
+ # bit 1 if set, access time is present
376
+ # bit 2 if set, creation time is present
377
+ # bits 3-7 reserved for additional timestamps; not set
378
+ flags = 0b00000001 # Set the lowest bit only, to indicate that only mtime is present
379
+ data_and_packspecs = [
380
+ 0x5455, C_UINT2, # tag for this extra block type ("UT")
381
+ (1 + 4), C_UINT2, # the size of this block (1 byte used for the Flag + 3 longs used for the timestamp)
382
+ flags, C_CHAR, # encode a single byte
383
+ mtime.utc.to_i, C_INT4 # Use a signed int, not the unsigned one used by the rest of the ZIP spec.
384
+ ]
385
+ # The atime and ctime can be omitted if not present
386
+ pack_array(data_and_packspecs)
387
+ end
388
+
389
+ # Since we do not supply atime or ctime, the contents of the two extra fields (central dir and local header)
390
+ # is exactly the same, so we can use a method alias.
391
+ alias_method :timestamp_extra_for_central_directory_entry, :timestamp_extra_for_local_file_header
392
+
393
+ # Writes the Zip64 extra field for the central directory header.It differs from the extra used in the local file header because it
394
+ # also contains the location of the local file header in the ZIP as an 8-byte int.
395
+ #
396
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
397
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
398
+ # @param local_file_header_location[Fixnum] Byte offset of the start of the local file header from the beginning of the ZIP archive
399
+ # @return [String]
400
+ def zip_64_extra_for_central_directory_file_header(compressed_size:, uncompressed_size:, local_file_header_location:)
401
+ data_and_packspecs = [
402
+ 0x0001, C_UINT2, # 2 bytes Tag for this "extra" block type
403
+ 28, C_UINT2, # 2 bytes Size of this "extra" block. For us it will always be 28
404
+ uncompressed_size, C_UINT8, # 8 bytes Original uncompressed file size
405
+ compressed_size, C_UINT8, # 8 bytes Size of compressed data
406
+ local_file_header_location, C_UINT8, # 8 bytes Offset of local header record
407
+ 0, C_UINT4 # 4 bytes Number of the disk on which this file starts
408
+ ]
409
+ pack_array(data_and_packspecs)
410
+ end
411
+
412
+ def to_binary_dos_time(t)
413
+ (t.sec / 2) + (t.min << 5) + (t.hour << 11)
414
+ end
415
+
416
+ def to_binary_dos_date(t)
417
+ t.day + (t.month << 5) + ((t.year - 1980) << 9)
418
+ end
419
+
420
+ # Unzips a given array of tuples of "numeric value, pack specifier" and then packs all the odd
421
+ # values using specifiers from all the even values. It is harder to explain than to show:
422
+ #
423
+ # pack_array([1, 'V', 2, 'v', 148, 'v]) #=> "\x01\x00\x00\x00\x02\x00\x94\x00"
424
+ #
425
+ # will do the following two transforms:
426
+ #
427
+ # [1, 'V', 2, 'v', 148, 'v] -> [1,2,148], ['V','v','v'] -> [1,2,148].pack('Vvv') -> "\x01\x00\x00\x00\x02\x00\x94\x00"
428
+ def pack_array(values_to_packspecs)
429
+ values, packspecs = values_to_packspecs.partition.each_with_index { |_, i| i.even? }
430
+ values.pack(packspecs.join)
431
+ end
432
+
433
+ def generate_external_attrs(unix_permissions_int, file_type_int)
434
+ (file_type_int << 12 | (unix_permissions_int & 0o7777)) << 16
435
+ end
436
+ end
data/lib/zip_kit.rb ADDED
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ZipKit
4
+ autoload :OutputEnumerator, File.dirname(__FILE__) + "/zip_kit/rack_body.rb"
5
+ autoload :RailsStreaming, File.dirname(__FILE__) + "/zip_kit/rails_streaming.rb"
6
+ autoload :ZipWriter, File.dirname(__FILE__) + "/zip_kit/zip_writer.rb"
7
+ autoload :RemoteIO, File.dirname(__FILE__) + "/zip_kit/remote_io.rb"
8
+ autoload :NullWriter, File.dirname(__FILE__) + "/zip_kit/null_writer.rb"
9
+ autoload :OutputEnumerator, File.dirname(__FILE__) + "/zip_kit/output_enumerator.rb"
10
+ autoload :BlockDeflate, File.dirname(__FILE__) + "/zip_kit/block_deflate.rb"
11
+ autoload :WriteAndTell, File.dirname(__FILE__) + "/zip_kit/write_and_tell.rb"
12
+ autoload :RemoteUncap, File.dirname(__FILE__) + "/zip_kit/remote_uncap.rb"
13
+ autoload :FileReader, File.dirname(__FILE__) + "/zip_kit/file_reader.rb"
14
+ autoload :UniquifyFilename, File.dirname(__FILE__) + "/zip_kit/uniquify_filename.rb"
15
+ autoload :SizeEstimator, File.dirname(__FILE__) + "/zip_kit/size_estimator.rb"
16
+ autoload :Streamer, File.dirname(__FILE__) + "/zip_kit/streamer.rb"
17
+ autoload :PathSet, File.dirname(__FILE__) + "/zip_kit/path_set.rb"
18
+ autoload :StreamCRC32, File.dirname(__FILE__) + "/zip_kit/stream_crc32.rb"
19
+ autoload :BlockWrite, File.dirname(__FILE__) + "/zip_kit/block_write.rb"
20
+ autoload :WriteBuffer, File.dirname(__FILE__) + "/zip_kit/write_buffer.rb"
21
+ autoload :WriteShovel, File.dirname(__FILE__) + "/zip_kit/write_shovel.rb"
22
+ autoload :RackChunkedBody, File.dirname(__FILE__) + "/zip_kit/rack_chunked_body.rb"
23
+ autoload :RackTempfileBody, File.dirname(__FILE__) + "/zip_kit/rack_tempfile_body.rb"
24
+ end
data/zip_kit.gemspec ADDED
@@ -0,0 +1,41 @@
1
+ lib = File.expand_path("../lib", __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "zip_kit/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "zip_kit"
7
+ spec.version = ZipKit::VERSION
8
+ spec.authors = ["Julik Tarkhanov", "Noah Berman", "Dmitry Tymchuk", "David Bosveld", "Felix Bünemann"]
9
+ spec.email = ["me@julik.nl"]
10
+ spec.required_ruby_version = ">= 2.6.0"
11
+
12
+ spec.summary = "Stream out ZIP files from Ruby"
13
+ spec.description = "Stream out ZIP files from Ruby"
14
+ spec.homepage = "https://github.com/julik/zip_kit"
15
+
16
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features|gemfiles)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_development_dependency "bundler"
25
+ spec.add_development_dependency "rubyzip", "~> 1"
26
+
27
+ spec.add_development_dependency "rack" # For tests where we spin up a server
28
+ spec.add_development_dependency "rake", "~> 12.2"
29
+ spec.add_development_dependency "rspec", "~> 3"
30
+ spec.add_development_dependency "rspec-mocks", "~> 3.10", ">= 3.10.2" # ruby 3 compatibility
31
+ spec.add_development_dependency "complexity_assert"
32
+ spec.add_development_dependency "coderay"
33
+ spec.add_development_dependency "benchmark-ips"
34
+ spec.add_development_dependency "allocation_stats", "~> 0.1.5"
35
+ spec.add_development_dependency "yard", "~> 0.9"
36
+ spec.add_development_dependency "standard", "1.28.5" # Very specific version of standard for 2.6 with _known_ settings
37
+ spec.add_development_dependency "magic_frozen_string_literal"
38
+ spec.add_development_dependency "puma"
39
+ spec.add_development_dependency "actionpack", "~> 5" # For testing RailsStreaming against an actual Rails controller
40
+ spec.add_development_dependency "nokogiri", "~> 1", ">= 1.13" # Rails 5 does by mistake use an older Nokogiri otherwise
41
+ end