zip_kit 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +7 -0
  3. data/.document +5 -0
  4. data/.github/workflows/ci.yml +29 -0
  5. data/.gitignore +61 -0
  6. data/.rspec +1 -0
  7. data/.standard.yml +8 -0
  8. data/.yardopts +1 -0
  9. data/CHANGELOG.md +255 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +153 -0
  12. data/Gemfile +4 -0
  13. data/IMPLEMENTATION_DETAILS.md +97 -0
  14. data/LICENSE.txt +20 -0
  15. data/README.md +234 -0
  16. data/Rakefile +21 -0
  17. data/bench/buffered_crc32_bench.rb +109 -0
  18. data/examples/archive_size_estimate.rb +15 -0
  19. data/examples/config.ru +7 -0
  20. data/examples/deferred_write.rb +58 -0
  21. data/examples/parallel_compression_with_block_deflate.rb +86 -0
  22. data/examples/rack_application.rb +63 -0
  23. data/examples/s3_upload.rb +23 -0
  24. data/lib/zip_kit/block_deflate.rb +130 -0
  25. data/lib/zip_kit/block_write.rb +47 -0
  26. data/lib/zip_kit/file_reader/inflating_reader.rb +36 -0
  27. data/lib/zip_kit/file_reader/stored_reader.rb +35 -0
  28. data/lib/zip_kit/file_reader.rb +740 -0
  29. data/lib/zip_kit/null_writer.rb +12 -0
  30. data/lib/zip_kit/output_enumerator.rb +150 -0
  31. data/lib/zip_kit/path_set.rb +163 -0
  32. data/lib/zip_kit/rack_chunked_body.rb +32 -0
  33. data/lib/zip_kit/rack_tempfile_body.rb +61 -0
  34. data/lib/zip_kit/rails_streaming.rb +37 -0
  35. data/lib/zip_kit/remote_io.rb +114 -0
  36. data/lib/zip_kit/remote_uncap.rb +22 -0
  37. data/lib/zip_kit/size_estimator.rb +84 -0
  38. data/lib/zip_kit/stream_crc32.rb +60 -0
  39. data/lib/zip_kit/streamer/deflated_writer.rb +45 -0
  40. data/lib/zip_kit/streamer/entry.rb +37 -0
  41. data/lib/zip_kit/streamer/filler.rb +9 -0
  42. data/lib/zip_kit/streamer/heuristic.rb +68 -0
  43. data/lib/zip_kit/streamer/stored_writer.rb +39 -0
  44. data/lib/zip_kit/streamer/writable.rb +36 -0
  45. data/lib/zip_kit/streamer.rb +614 -0
  46. data/lib/zip_kit/uniquify_filename.rb +39 -0
  47. data/lib/zip_kit/version.rb +5 -0
  48. data/lib/zip_kit/write_and_tell.rb +40 -0
  49. data/lib/zip_kit/write_buffer.rb +71 -0
  50. data/lib/zip_kit/write_shovel.rb +22 -0
  51. data/lib/zip_kit/zip_writer.rb +436 -0
  52. data/lib/zip_kit.rb +24 -0
  53. data/zip_kit.gemspec +41 -0
  54. metadata +335 -0
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A tiny wrapper over any object that supports :<<.
4
+ # Adds :tell and :advance_position_by. This is needed for write destinations
5
+ # which do not respond to `#pos` or `#tell`. A lot of ZIP archive format parts
6
+ # include "offsets in archive" - a byte offset from the start of file. Keeping
7
+ # track of this value is what this object will do. It also allows "advancing"
8
+ # this value if data gets written using a bypass (such as `IO#sendfile`)
9
+ class ZipKit::WriteAndTell
10
+ include ZipKit::WriteShovel
11
+
12
+ def initialize(io)
13
+ @io = io
14
+ @pos = 0
15
+ # Some objects (such as ActionController::Live `stream` object) cannot be "pushed" into
16
+ # using the :<< operator, but only support `write`. For ease we add a small shim in that case instead of having
17
+ # the user abstract it themselves.
18
+ @use_write = !io.respond_to?(:<<)
19
+ end
20
+
21
+ def <<(bytes)
22
+ return self if bytes.nil?
23
+ if @use_write
24
+ @io.write(bytes.b)
25
+ else
26
+ @io << bytes.b
27
+ end
28
+
29
+ @pos += bytes.bytesize
30
+ self
31
+ end
32
+
33
+ def advance_position_by(num_bytes)
34
+ @pos += num_bytes
35
+ end
36
+
37
+ def tell
38
+ @pos
39
+ end
40
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Some operations (such as CRC32) benefit when they are performed
4
+ # on larger chunks of data. In certain use cases, it is possible that
5
+ # the consumer of ZipKit is going to be writing small chunks
6
+ # in rapid succession, so CRC32 is going to have to perform a lot of
7
+ # CRC32 combine operations - and this adds up. Since the CRC32 value
8
+ # is usually not needed until the complete output has completed
9
+ # we can buffer at least some amount of data before computing CRC32 over it.
10
+ # We also use this buffer for output via Rack, where some amount of buffering
11
+ # helps reduce the number of syscalls made by the webserver. ZipKit performs
12
+ # lots of very small writes, and some degree of speedup (about 20%) can be achieved
13
+ # with a buffer of a few KB.
14
+ #
15
+ # Note that there is no guarantee that the write buffer is going to flush at or above
16
+ # the given `buffer_size`, because for writes which exceed the buffer size it will
17
+ # first `flush` and then write through the oversized chunk, without buffering it. This
18
+ # helps conserve memory. Also note that the buffer will *not* duplicate strings for you
19
+ # and *will* yield the same buffer String over and over, so if you are storing it in an
20
+ # Array you might need to duplicate it.
21
+ #
22
+ # Note also that the WriteBuffer assumes that the object it `<<`-writes into is going
23
+ # to **consume** in some way the string that it passes in. After the `<<` method returns,
24
+ # the WriteBuffer will be cleared, and it passes the same String reference on every call
25
+ # to `<<`. Therefore, if you need to retain the output of the WriteBuffer in, say, an Array,
26
+ # you might need to `.dup` the `String` it gives you.
27
+ class ZipKit::WriteBuffer
28
+ # Creates a new WriteBuffer bypassing into a given writable object
29
+ #
30
+ # @param writable[#<<] An object that responds to `#<<` with a String as argument
31
+ # @param buffer_size[Integer] How many bytes to buffer
32
+ def initialize(writable, buffer_size)
33
+ # Allocating the buffer using a zero-padded String as a variation
34
+ # on using capacity:, which JRuby apparently does not like very much. The
35
+ # desire here is that the buffer doesn't have to be resized during the lifetime
36
+ # of the object.
37
+ @buf = ("\0".b * (buffer_size * 2)).clear
38
+ @buffer_size = buffer_size
39
+ @writable = writable
40
+ end
41
+
42
+ # Appends the given data to the write buffer, and flushes the buffer into the
43
+ # writable if the buffer size exceeds the `buffer_size` given at initialization
44
+ #
45
+ # @param data[String] data to be written
46
+ # @return self
47
+ def <<(data)
48
+ if data.bytesize >= @buffer_size
49
+ flush unless @buf.empty? # <- this is were we can output less than @buffer_size
50
+ @writable << data
51
+ else
52
+ @buf << data
53
+ flush if @buf.bytesize >= @buffer_size
54
+ end
55
+ self
56
+ end
57
+
58
+ # Explicitly flushes the buffer if it contains anything
59
+ #
60
+ # @return self
61
+ def flush
62
+ unless @buf.empty?
63
+ @writable << @buf
64
+ @buf.clear
65
+ end
66
+ self
67
+ end
68
+
69
+ # `flush!` was renamed to `flush` but we preserve this method for backwards compatibility
70
+ alias_method :flush!, :flush
71
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A lot of objects in ZipKit accept bytes that may be sent
4
+ # to the `<<` operator (the "shovel" operator). This is in the tradition
5
+ # of late Jim Weirich and his Builder gem. In [this presentation](https://youtu.be/1BVFlvRPZVM?t=2403)
6
+ # he justifies this design very eloquently. In ZipKit we follow this example.
7
+ # However, there is a number of methods in Ruby - including the standard library -
8
+ # which expect your object to implement the `write` method instead. Since the `write`
9
+ # method can be expressed in terms of the `<<` method, why not allow all ZipKit
10
+ # "IO-ish" things to also respond to `write`? This is what this module does.
11
+ # Jim would be proud. We miss you, Jim.
12
+ module ZipKit::WriteShovel
13
+ # Writes the given data to the output stream. Allows the object to be used as
14
+ # a target for `IO.copy_stream(from, to)`
15
+ #
16
+ # @param d[String] the binary string to write (part of the uncompressed file)
17
+ # @return [Fixnum] the number of bytes written
18
+ def write(bytes)
19
+ self << bytes
20
+ bytes.bytesize
21
+ end
22
+ end
@@ -0,0 +1,436 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A low-level ZIP file data writer. You can use it to write out various headers and central directory elements
4
+ # separately. The class handles the actual encoding of the data according to the ZIP format APPNOTE document.
5
+ #
6
+ # The primary reason the writer is a separate object is because it is kept stateless. That is, all the data that
7
+ # is needed for writing a piece of the ZIP (say, the EOCD record, or a data descriptor) can be written
8
+ # without depending on data available elsewhere. This makes the writer very easy to test, since each of
9
+ # it's methods outputs something that only depends on the method's arguments. For example, we use this
10
+ # to test writing Zip64 files which, when tested in a streaming fashion, would need tricky IO stubs
11
+ # to wind IO objects back and forth by large offsets. Instead, we can just write out the EOCD record
12
+ # with given offsets as arguments.
13
+ #
14
+ # Since some methods need a lot of data about the entity being written, everything is passed via
15
+ # keyword arguments - this way it is much less likely that you can make a mistake writing something.
16
+ #
17
+ # Another reason for having a separate Writer is that most ZIP libraries attach the methods for
18
+ # writing out the file headers to some sort of Entry object, which represents a file within the ZIP.
19
+ # However, when you are diagnosing issues with the ZIP files you produce, you actually want to have
20
+ # absolute _most_ of the code responsible for writing the actual encoded bytes available to you on
21
+ # one screen. Altering or checking that code then becomes much, much easier. The methods doing the
22
+ # writing are also intentionally left very verbose - so that you can follow what is happening at
23
+ # all times.
24
+ #
25
+ # All methods of the writer accept anything that responds to `<<` as `io` argument - you can use
26
+ # that to output to String objects, or to output to Arrays that you can later join together.
27
+ class ZipKit::ZipWriter
28
+ FOUR_BYTE_MAX_UINT = 0xFFFFFFFF
29
+ TWO_BYTE_MAX_UINT = 0xFFFF
30
+ ZIP_TRICKS_COMMENT = "Written using ZipKit %<version>s" % {version: ZipKit::VERSION}
31
+ VERSION_MADE_BY = 52
32
+ VERSION_NEEDED_TO_EXTRACT = 20
33
+ VERSION_NEEDED_TO_EXTRACT_ZIP64 = 45
34
+ DEFAULT_FILE_UNIX_PERMISSIONS = 0o644
35
+ DEFAULT_DIRECTORY_UNIX_PERMISSIONS = 0o755
36
+ FILE_TYPE_FILE = 0o10
37
+ FILE_TYPE_DIRECTORY = 0o04
38
+ MADE_BY_SIGNATURE = begin
39
+ # A combination of the VERSION_MADE_BY low byte and the OS type high byte
40
+ os_type = 3 # UNIX
41
+ [VERSION_MADE_BY, os_type].pack("CC")
42
+ end
43
+
44
+ C_UINT4 = "V" # Encode a 4-byte unsigned little-endian uint
45
+ C_UINT2 = "v" # Encode a 2-byte unsigned little-endian uint
46
+ C_UINT8 = "Q<" # Encode an 8-byte unsigned little-endian uint
47
+ C_CHAR = "C" # For bit-encoded strings
48
+ C_INT4 = "l<" # Encode a 4-byte signed little-endian int
49
+
50
+ private_constant :FOUR_BYTE_MAX_UINT,
51
+ :TWO_BYTE_MAX_UINT,
52
+ :VERSION_MADE_BY,
53
+ :VERSION_NEEDED_TO_EXTRACT,
54
+ :VERSION_NEEDED_TO_EXTRACT_ZIP64,
55
+ :FILE_TYPE_FILE,
56
+ :FILE_TYPE_DIRECTORY,
57
+ :MADE_BY_SIGNATURE,
58
+ :C_UINT4,
59
+ :C_UINT2,
60
+ :C_UINT8,
61
+ :ZIP_TRICKS_COMMENT
62
+
63
+ # Writes the local file header, that precedes the actual file _data_.
64
+ #
65
+ # @param io[#<<] the buffer to write the local file header to
66
+ # @param filename[String] the name of the file in the archive
67
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
68
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
69
+ # @param crc32[Fixnum] The CRC32 checksum of the file
70
+ # @param mtime[Time] the modification time to be recorded in the ZIP
71
+ # @param gp_flags[Fixnum] bit-packed general purpose flags
72
+ # @param storage_mode[Fixnum] 8 for deflated, 0 for stored...
73
+ # @return [void]
74
+ def write_local_file_header(io:, filename:, compressed_size:, uncompressed_size:, crc32:, gp_flags:, mtime:, storage_mode:)
75
+ requires_zip64 = compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT
76
+
77
+ io << [0x04034b50].pack(C_UINT4) # local file header signature 4 bytes (0x04034b50)
78
+ io << if requires_zip64 # version needed to extract 2 bytes
79
+ [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_UINT2)
80
+ else
81
+ [VERSION_NEEDED_TO_EXTRACT].pack(C_UINT2)
82
+ end
83
+
84
+ io << [gp_flags].pack(C_UINT2) # general purpose bit flag 2 bytes
85
+ io << [storage_mode].pack(C_UINT2) # compression method 2 bytes
86
+ io << [to_binary_dos_time(mtime)].pack(C_UINT2) # last mod file time 2 bytes
87
+ io << [to_binary_dos_date(mtime)].pack(C_UINT2) # last mod file date 2 bytes
88
+ io << [crc32].pack(C_UINT4) # crc-32 4 bytes
89
+
90
+ if requires_zip64
91
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # compressed size 4 bytes
92
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # uncompressed size 4 bytes
93
+ else
94
+ io << [compressed_size].pack(C_UINT4) # compressed size 4 bytes
95
+ io << [uncompressed_size].pack(C_UINT4) # uncompressed size 4 bytes
96
+ end
97
+
98
+ # Filename should not be longer than 0xFFFF otherwise this wont fit here
99
+ io << [filename.bytesize].pack(C_UINT2) # file name length 2 bytes
100
+
101
+ extra_fields = StringIO.new
102
+
103
+ # Interesting tidbit:
104
+ # https://social.technet.microsoft.com/Forums/windows/en-US/6a60399f-2879-4859-b7ab-6ddd08a70948
105
+ # TL;DR of it is: Windows 7 Explorer _will_ open Zip64 entries. However, it desires to have the
106
+ # Zip64 extra field as _the first_ extra field.
107
+ if requires_zip64
108
+ extra_fields << zip_64_extra_for_local_file_header(compressed_size: compressed_size, uncompressed_size: uncompressed_size)
109
+ end
110
+ extra_fields << timestamp_extra_for_local_file_header(mtime)
111
+
112
+ io << [extra_fields.size].pack(C_UINT2) # extra field length 2 bytes
113
+
114
+ io << filename # file name (variable size)
115
+ io << extra_fields.string
116
+ end
117
+
118
+ # Writes the file header for the central directory, for a particular file in the archive. When writing out this data,
119
+ # ensure that the CRC32 and both sizes (compressed/uncompressed) are correct for the entry in question.
120
+ #
121
+ # @param io[#<<] the buffer to write the local file header to
122
+ # @param filename[String] the name of the file in the archive
123
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
124
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
125
+ # @param crc32[Fixnum] The CRC32 checksum of the file
126
+ # @param mtime[Time] the modification time to be recorded in the ZIP
127
+ # @param gp_flags[Fixnum] bit-packed general purpose flags
128
+ # @param unix_permissions[Fixnum?] the permissions for the file, or nil for the default to be used
129
+ # @return [void]
130
+ def write_central_directory_file_header(io:,
131
+ local_file_header_location:,
132
+ gp_flags:,
133
+ storage_mode:,
134
+ compressed_size:,
135
+ uncompressed_size:,
136
+ mtime:,
137
+ crc32:,
138
+ filename:,
139
+ unix_permissions: nil)
140
+ # At this point if the header begins somewhere beyound 0xFFFFFFFF we _have_ to record the offset
141
+ # of the local file header as a zip64 extra field, so we give up, give in, you loose, love will always win...
142
+ add_zip64 = (local_file_header_location > FOUR_BYTE_MAX_UINT) ||
143
+ (compressed_size > FOUR_BYTE_MAX_UINT) || (uncompressed_size > FOUR_BYTE_MAX_UINT)
144
+
145
+ io << [0x02014b50].pack(C_UINT4) # central file header signature 4 bytes (0x02014b50)
146
+ io << MADE_BY_SIGNATURE # version made by 2 bytes
147
+ io << if add_zip64
148
+ [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_UINT2) # version needed to extract 2 bytes
149
+ else
150
+ [VERSION_NEEDED_TO_EXTRACT].pack(C_UINT2) # version needed to extract 2 bytes
151
+ end
152
+
153
+ io << [gp_flags].pack(C_UINT2) # general purpose bit flag 2 bytes
154
+ io << [storage_mode].pack(C_UINT2) # compression method 2 bytes
155
+ io << [to_binary_dos_time(mtime)].pack(C_UINT2) # last mod file time 2 bytes
156
+ io << [to_binary_dos_date(mtime)].pack(C_UINT2) # last mod file date 2 bytes
157
+ io << [crc32].pack(C_UINT4) # crc-32 4 bytes
158
+
159
+ if add_zip64
160
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # compressed size 4 bytes
161
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # uncompressed size 4 bytes
162
+ else
163
+ io << [compressed_size].pack(C_UINT4) # compressed size 4 bytes
164
+ io << [uncompressed_size].pack(C_UINT4) # uncompressed size 4 bytes
165
+ end
166
+
167
+ # Filename should not be longer than 0xFFFF otherwise this wont fit here
168
+ io << [filename.bytesize].pack(C_UINT2) # file name length 2 bytes
169
+
170
+ extra_fields = StringIO.new
171
+ if add_zip64
172
+ extra_fields << zip_64_extra_for_central_directory_file_header(local_file_header_location: local_file_header_location,
173
+ compressed_size: compressed_size,
174
+ uncompressed_size: uncompressed_size)
175
+ end
176
+ extra_fields << timestamp_extra_for_central_directory_entry(mtime)
177
+
178
+ io << [extra_fields.size].pack(C_UINT2) # extra field length 2 bytes
179
+
180
+ io << [0].pack(C_UINT2) # file comment length 2 bytes
181
+
182
+ # For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used
183
+ # because otherwise it does not properly advance the pointer when reading the Zip64 extra field
184
+ # https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff
185
+ io << if add_zip64 # disk number start 2 bytes
186
+ [TWO_BYTE_MAX_UINT].pack(C_UINT2)
187
+ else
188
+ [0].pack(C_UINT2)
189
+ end
190
+ io << [0].pack(C_UINT2) # internal file attributes 2 bytes
191
+
192
+ # Because the add_empty_directory method will create a directory with a trailing "/",
193
+ # this check can be used to assign proper permissions to the created directory.
194
+ # external file attributes 4 bytes
195
+ external_attrs = if filename.end_with?("/")
196
+ unix_permissions ||= DEFAULT_DIRECTORY_UNIX_PERMISSIONS
197
+ generate_external_attrs(unix_permissions, FILE_TYPE_DIRECTORY)
198
+ else
199
+ unix_permissions ||= DEFAULT_FILE_UNIX_PERMISSIONS
200
+ generate_external_attrs(unix_permissions, FILE_TYPE_FILE)
201
+ end
202
+ io << [external_attrs].pack(C_UINT4)
203
+
204
+ io << if add_zip64 # relative offset of local header 4 bytes
205
+ [FOUR_BYTE_MAX_UINT].pack(C_UINT4)
206
+ else
207
+ [local_file_header_location].pack(C_UINT4)
208
+ end
209
+
210
+ io << filename # file name (variable size)
211
+ io << extra_fields.string # extra field (variable size)
212
+ # (empty) # file comment (variable size)
213
+ end
214
+
215
+ # Writes the data descriptor following the file data for a file whose local file header
216
+ # was written with general-purpose flag bit 3 set. If the one of the sizes exceeds the Zip64 threshold,
217
+ # the data descriptor will have the sizes written out as 8-byte values instead of 4-byte values.
218
+ #
219
+ # @param io[#<<] the buffer to write the local file header to
220
+ # @param crc32[Fixnum] The CRC32 checksum of the file
221
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
222
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
223
+ # @return [void]
224
+ def write_data_descriptor(io:, compressed_size:, uncompressed_size:, crc32:)
225
+ io << [0x08074b50].pack(C_UINT4) # Although not originally assigned a signature, the value
226
+ # 0x08074b50 has commonly been adopted as a signature value
227
+ # for the data descriptor record.
228
+ io << [crc32].pack(C_UINT4) # crc-32 4 bytes
229
+
230
+ # If one of the sizes is above 0xFFFFFFF use ZIP64 lengths (8 bytes) instead. A good unarchiver
231
+ # will decide to unpack it as such if it finds the Zip64 extra for the file in the central directory.
232
+ # So also use the opportune moment to switch the entry to Zip64 if needed
233
+ requires_zip64 = compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT
234
+ pack_spec = requires_zip64 ? C_UINT8 : C_UINT4
235
+
236
+ io << [compressed_size].pack(pack_spec) # compressed size 4 bytes, or 8 bytes for ZIP64
237
+ io << [uncompressed_size].pack(pack_spec) # uncompressed size 4 bytes, or 8 bytes for ZIP64
238
+ end
239
+
240
+ # Writes the "end of central directory record" (including the Zip6 salient bits if necessary)
241
+ #
242
+ # @param io[#<<] the buffer to write the central directory to.
243
+ # @param start_of_central_directory_location[Fixnum] byte offset of the start of central directory form the beginning of ZIP file
244
+ # @param central_directory_size[Fixnum] the size of the central directory (only file headers) in bytes
245
+ # @param num_files_in_archive[Fixnum] How many files the archive contains
246
+ # @param comment[String] the comment for the archive (defaults to ZIP_TRICKS_COMMENT)
247
+ # @return [void]
248
+ def write_end_of_central_directory(io:, start_of_central_directory_location:, central_directory_size:, num_files_in_archive:, comment: ZIP_TRICKS_COMMENT)
249
+ zip64_eocdr_offset = start_of_central_directory_location + central_directory_size
250
+
251
+ zip64_required = central_directory_size > FOUR_BYTE_MAX_UINT ||
252
+ start_of_central_directory_location > FOUR_BYTE_MAX_UINT ||
253
+ zip64_eocdr_offset > FOUR_BYTE_MAX_UINT ||
254
+ num_files_in_archive > TWO_BYTE_MAX_UINT
255
+
256
+ # Then, if zip64 is used
257
+ if zip64_required
258
+ # [zip64 end of central directory record]
259
+ # zip64 end of central dir
260
+ io << [0x06064b50].pack(C_UINT4) # signature 4 bytes (0x06064b50)
261
+ io << [44].pack(C_UINT8) # size of zip64 end of central
262
+ # directory record 8 bytes
263
+ # (this is ex. the 12 bytes of the signature and the size value itself).
264
+ # Without the extensible data sector (which we are not using)
265
+ # it is always 44 bytes.
266
+ io << MADE_BY_SIGNATURE # version made by 2 bytes
267
+ io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_UINT2) # version needed to extract 2 bytes
268
+ io << [0].pack(C_UINT4) # number of this disk 4 bytes
269
+ io << [0].pack(C_UINT4) # number of the disk with the
270
+ # start of the central directory 4 bytes
271
+ io << [num_files_in_archive].pack(C_UINT8) # total number of entries in the
272
+ # central directory on this disk 8 bytes
273
+ io << [num_files_in_archive].pack(C_UINT8) # total number of entries in the
274
+ # central directory 8 bytes
275
+ io << [central_directory_size].pack(C_UINT8) # size of the central directory 8 bytes
276
+ # offset of start of central
277
+ # directory with respect to
278
+ io << [start_of_central_directory_location].pack(C_UINT8) # the starting disk number 8 bytes
279
+ # zip64 extensible data sector (variable size), blank for us
280
+
281
+ # [zip64 end of central directory locator]
282
+ io << [0x07064b50].pack(C_UINT4) # zip64 end of central dir locator
283
+ # signature 4 bytes (0x07064b50)
284
+ io << [0].pack(C_UINT4) # number of the disk with the
285
+ # start of the zip64 end of
286
+ # central directory 4 bytes
287
+ io << [zip64_eocdr_offset].pack(C_UINT8) # relative offset of the zip64
288
+ # end of central directory record 8 bytes
289
+ # (note: "relative" is actually "from the start of the file")
290
+ io << [1].pack(C_UINT4) # total number of disks 4 bytes
291
+ end
292
+
293
+ # Then the end of central directory record:
294
+ io << [0x06054b50].pack(C_UINT4) # end of central dir signature 4 bytes (0x06054b50)
295
+ io << [0].pack(C_UINT2) # number of this disk 2 bytes
296
+ io << [0].pack(C_UINT2) # number of the disk with the
297
+ # start of the central directory 2 bytes
298
+
299
+ if zip64_required # the number of entries will be read from the zip64 part of the central directory
300
+ io << [TWO_BYTE_MAX_UINT].pack(C_UINT2) # total number of entries in the
301
+ # central directory on this disk 2 bytes
302
+ io << [TWO_BYTE_MAX_UINT].pack(C_UINT2) # total number of entries in
303
+ # the central directory 2 bytes
304
+ else
305
+ io << [num_files_in_archive].pack(C_UINT2) # total number of entries in the
306
+ # central directory on this disk 2 bytes
307
+ io << [num_files_in_archive].pack(C_UINT2) # total number of entries in
308
+ # the central directory 2 bytes
309
+ end
310
+
311
+ if zip64_required
312
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # size of the central directory 4 bytes
313
+ io << [FOUR_BYTE_MAX_UINT].pack(C_UINT4) # offset of start of central
314
+ # directory with respect to
315
+ # the starting disk number 4 bytes
316
+ else
317
+ io << [central_directory_size].pack(C_UINT4) # size of the central directory 4 bytes
318
+ io << [start_of_central_directory_location].pack(C_UINT4) # offset of start of central
319
+ # directory with respect to
320
+ # the starting disk number 4 bytes
321
+ end
322
+ io << [comment.bytesize].pack(C_UINT2) # .ZIP file comment length 2 bytes
323
+ io << comment # .ZIP file comment (variable size)
324
+ end
325
+
326
+ private
327
+
328
+ # Writes the Zip64 extra field for the local file header. Will be used by `write_local_file_header` when any sizes given to it warrant that.
329
+ #
330
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
331
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
332
+ # @return [String]
333
+ def zip_64_extra_for_local_file_header(compressed_size:, uncompressed_size:)
334
+ data_and_packspecs = [
335
+ 0x0001, C_UINT2, # 2 bytes Tag for this "extra" block type
336
+ 16, C_UINT2, # 2 bytes Size of this "extra" block. For us it will always be 16 (2x8)
337
+ uncompressed_size, C_UINT8, # 8 bytes Original uncompressed file size
338
+ compressed_size, C_UINT8 # 8 bytes Size of compressed data
339
+ ]
340
+ pack_array(data_and_packspecs)
341
+ end
342
+
343
+ # Writes the extended timestamp information field for local headers.
344
+ #
345
+ # The spec defines 2
346
+ # different formats - the one for the local file header can also accomodate the
347
+ # atime and ctime, whereas the one for the central directory can only take
348
+ # the mtime - and refers the reader to the local header extra to obtain the
349
+ # remaining times
350
+ def timestamp_extra_for_local_file_header(mtime)
351
+ # Local-header version:
352
+ #
353
+ # Value Size Description
354
+ # ----- ---- -----------
355
+ # (time) 0x5455 Short tag for this extra block type ("UT")
356
+ # TSize Short total data size for this block
357
+ # Flags Byte info bits
358
+ # (ModTime) Long time of last modification (UTC/GMT)
359
+ # (AcTime) Long time of last access (UTC/GMT)
360
+ # (CrTime) Long time of original creation (UTC/GMT)
361
+ #
362
+ # Central-header version:
363
+ #
364
+ # Value Size Description
365
+ # ----- ---- -----------
366
+ # (time) 0x5455 Short tag for this extra block type ("UT")
367
+ # TSize Short total data size for this block
368
+ # Flags Byte info bits (refers to local header!)
369
+ # (ModTime) Long time of last modification (UTC/GMT)
370
+ #
371
+ # The lower three bits of Flags in both headers indicate which time-
372
+ # stamps are present in the LOCAL extra field:
373
+ #
374
+ # bit 0 if set, modification time is present
375
+ # bit 1 if set, access time is present
376
+ # bit 2 if set, creation time is present
377
+ # bits 3-7 reserved for additional timestamps; not set
378
+ flags = 0b00000001 # Set the lowest bit only, to indicate that only mtime is present
379
+ data_and_packspecs = [
380
+ 0x5455, C_UINT2, # tag for this extra block type ("UT")
381
+ (1 + 4), C_UINT2, # the size of this block (1 byte used for the Flag + 3 longs used for the timestamp)
382
+ flags, C_CHAR, # encode a single byte
383
+ mtime.utc.to_i, C_INT4 # Use a signed int, not the unsigned one used by the rest of the ZIP spec.
384
+ ]
385
+ # The atime and ctime can be omitted if not present
386
+ pack_array(data_and_packspecs)
387
+ end
388
+
389
+ # Since we do not supply atime or ctime, the contents of the two extra fields (central dir and local header)
390
+ # is exactly the same, so we can use a method alias.
391
+ alias_method :timestamp_extra_for_central_directory_entry, :timestamp_extra_for_local_file_header
392
+
393
+ # Writes the Zip64 extra field for the central directory header.It differs from the extra used in the local file header because it
394
+ # also contains the location of the local file header in the ZIP as an 8-byte int.
395
+ #
396
+ # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP
397
+ # @param uncompressed_size[Fixnum] The size of the file once extracted
398
+ # @param local_file_header_location[Fixnum] Byte offset of the start of the local file header from the beginning of the ZIP archive
399
+ # @return [String]
400
+ def zip_64_extra_for_central_directory_file_header(compressed_size:, uncompressed_size:, local_file_header_location:)
401
+ data_and_packspecs = [
402
+ 0x0001, C_UINT2, # 2 bytes Tag for this "extra" block type
403
+ 28, C_UINT2, # 2 bytes Size of this "extra" block. For us it will always be 28
404
+ uncompressed_size, C_UINT8, # 8 bytes Original uncompressed file size
405
+ compressed_size, C_UINT8, # 8 bytes Size of compressed data
406
+ local_file_header_location, C_UINT8, # 8 bytes Offset of local header record
407
+ 0, C_UINT4 # 4 bytes Number of the disk on which this file starts
408
+ ]
409
+ pack_array(data_and_packspecs)
410
+ end
411
+
412
+ def to_binary_dos_time(t)
413
+ (t.sec / 2) + (t.min << 5) + (t.hour << 11)
414
+ end
415
+
416
+ def to_binary_dos_date(t)
417
+ t.day + (t.month << 5) + ((t.year - 1980) << 9)
418
+ end
419
+
420
+ # Unzips a given array of tuples of "numeric value, pack specifier" and then packs all the odd
421
+ # values using specifiers from all the even values. It is harder to explain than to show:
422
+ #
423
+ # pack_array([1, 'V', 2, 'v', 148, 'v]) #=> "\x01\x00\x00\x00\x02\x00\x94\x00"
424
+ #
425
+ # will do the following two transforms:
426
+ #
427
+ # [1, 'V', 2, 'v', 148, 'v] -> [1,2,148], ['V','v','v'] -> [1,2,148].pack('Vvv') -> "\x01\x00\x00\x00\x02\x00\x94\x00"
428
+ def pack_array(values_to_packspecs)
429
+ values, packspecs = values_to_packspecs.partition.each_with_index { |_, i| i.even? }
430
+ values.pack(packspecs.join)
431
+ end
432
+
433
+ def generate_external_attrs(unix_permissions_int, file_type_int)
434
+ (file_type_int << 12 | (unix_permissions_int & 0o7777)) << 16
435
+ end
436
+ end
data/lib/zip_kit.rb ADDED
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ZipKit
4
+ autoload :OutputEnumerator, File.dirname(__FILE__) + "/zip_kit/rack_body.rb"
5
+ autoload :RailsStreaming, File.dirname(__FILE__) + "/zip_kit/rails_streaming.rb"
6
+ autoload :ZipWriter, File.dirname(__FILE__) + "/zip_kit/zip_writer.rb"
7
+ autoload :RemoteIO, File.dirname(__FILE__) + "/zip_kit/remote_io.rb"
8
+ autoload :NullWriter, File.dirname(__FILE__) + "/zip_kit/null_writer.rb"
9
+ autoload :OutputEnumerator, File.dirname(__FILE__) + "/zip_kit/output_enumerator.rb"
10
+ autoload :BlockDeflate, File.dirname(__FILE__) + "/zip_kit/block_deflate.rb"
11
+ autoload :WriteAndTell, File.dirname(__FILE__) + "/zip_kit/write_and_tell.rb"
12
+ autoload :RemoteUncap, File.dirname(__FILE__) + "/zip_kit/remote_uncap.rb"
13
+ autoload :FileReader, File.dirname(__FILE__) + "/zip_kit/file_reader.rb"
14
+ autoload :UniquifyFilename, File.dirname(__FILE__) + "/zip_kit/uniquify_filename.rb"
15
+ autoload :SizeEstimator, File.dirname(__FILE__) + "/zip_kit/size_estimator.rb"
16
+ autoload :Streamer, File.dirname(__FILE__) + "/zip_kit/streamer.rb"
17
+ autoload :PathSet, File.dirname(__FILE__) + "/zip_kit/path_set.rb"
18
+ autoload :StreamCRC32, File.dirname(__FILE__) + "/zip_kit/stream_crc32.rb"
19
+ autoload :BlockWrite, File.dirname(__FILE__) + "/zip_kit/block_write.rb"
20
+ autoload :WriteBuffer, File.dirname(__FILE__) + "/zip_kit/write_buffer.rb"
21
+ autoload :WriteShovel, File.dirname(__FILE__) + "/zip_kit/write_shovel.rb"
22
+ autoload :RackChunkedBody, File.dirname(__FILE__) + "/zip_kit/rack_chunked_body.rb"
23
+ autoload :RackTempfileBody, File.dirname(__FILE__) + "/zip_kit/rack_tempfile_body.rb"
24
+ end
data/zip_kit.gemspec ADDED
@@ -0,0 +1,41 @@
1
+ lib = File.expand_path("../lib", __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "zip_kit/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "zip_kit"
7
+ spec.version = ZipKit::VERSION
8
+ spec.authors = ["Julik Tarkhanov", "Noah Berman", "Dmitry Tymchuk", "David Bosveld", "Felix Bünemann"]
9
+ spec.email = ["me@julik.nl"]
10
+ spec.required_ruby_version = ">= 2.6.0"
11
+
12
+ spec.summary = "Stream out ZIP files from Ruby"
13
+ spec.description = "Stream out ZIP files from Ruby"
14
+ spec.homepage = "https://github.com/julik/zip_kit"
15
+
16
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features|gemfiles)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_development_dependency "bundler"
25
+ spec.add_development_dependency "rubyzip", "~> 1"
26
+
27
+ spec.add_development_dependency "rack" # For tests where we spin up a server
28
+ spec.add_development_dependency "rake", "~> 12.2"
29
+ spec.add_development_dependency "rspec", "~> 3"
30
+ spec.add_development_dependency "rspec-mocks", "~> 3.10", ">= 3.10.2" # ruby 3 compatibility
31
+ spec.add_development_dependency "complexity_assert"
32
+ spec.add_development_dependency "coderay"
33
+ spec.add_development_dependency "benchmark-ips"
34
+ spec.add_development_dependency "allocation_stats", "~> 0.1.5"
35
+ spec.add_development_dependency "yard", "~> 0.9"
36
+ spec.add_development_dependency "standard", "1.28.5" # Very specific version of standard for 2.6 with _known_ settings
37
+ spec.add_development_dependency "magic_frozen_string_literal"
38
+ spec.add_development_dependency "puma"
39
+ spec.add_development_dependency "actionpack", "~> 5" # For testing RailsStreaming against an actual Rails controller
40
+ spec.add_development_dependency "nokogiri", "~> 1", ">= 1.13" # Rails 5 does by mistake use an older Nokogiri otherwise
41
+ end