zip_kit 6.0.1 → 6.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/rbi/zip_kit.rbi ADDED
@@ -0,0 +1,2181 @@
1
+ # typed: strong
2
+ module ZipKit
3
+ VERSION = T.let("6.2.1", T.untyped)
4
+
5
+ # A ZIP archive contains a flat list of entries. These entries can implicitly
6
+ # create directories when the archive is expanded. For example, an entry with
7
+ # the filename of "some folder/file.docx" will make the unarchiving application
8
+ # create a directory called "some folder" automatically, and then deposit the
9
+ # file "file.docx" in that directory. These "implicit" directories can be
10
+ # arbitrarily nested, and create a tree structure of directories. That structure
11
+ # however is implicit as the archive contains a flat list.
12
+ #
13
+ # This creates opportunities for conflicts. For example, imagine the following
14
+ # structure:
15
+ #
16
+ # * `something/` - specifies an empty directory with the name "something"
17
+ # * `something` - specifies a file, creates a conflict
18
+ #
19
+ # This can be prevented with filename uniqueness checks. It does get funkier however
20
+ # as the rabbit hole goes down:
21
+ #
22
+ # * `dir/subdir/another_subdir/yet_another_subdir/file.bin` - declares a file and directories
23
+ # * `dir/subdir/another_subdir/yet_another_subdir` - declares a file at one of the levels, creates a conflict
24
+ #
25
+ # The results of this ZIP structure aren't very easy to predict as they depend on the
26
+ # application that opens the archive. For example, BOMArchiveHelper on macOS will expand files
27
+ # as they are declared in the ZIP, but once a conflict occurs it will fail with "error -21". It
28
+ # is not very transparent to the user why unarchiving fails, and it has to - and can reliably - only
29
+ # be prevented when the archive gets created.
30
+ #
31
+ # Unfortunately that conflicts with another "magical" feature of ZipKit which automatically
32
+ # "fixes" duplicate filenames - filenames (paths) which have already been added to the archive.
33
+ # This fix is performed by appending (1), then (2) and so forth to the filename so that the
34
+ # conflict is avoided. This is not possible to apply to directories, because when one of the
35
+ # path components is reused in multiple filenames it means those entities should end up in
36
+ # the same directory (subdirectory) once the archive is opened.
37
+ #
38
+ # The `PathSet` keeps track of entries as they get added using 2 Sets (cheap presence checks),
39
+ # one for directories and one for files. It will raise a `Conflict` exception if there are
40
+ # files clobbering one another, or in case files collide with directories.
41
+ class PathSet
42
+ sig { void }
43
+ def initialize; end
44
+
45
+ # Adds a directory path to the set of known paths, including
46
+ # all the directories that contain it. So, calling
47
+ # add_directory_path("dir/dir2/dir3")
48
+ # will add "dir", "dir/dir2", "dir/dir2/dir3".
49
+ #
50
+ # _@param_ `path` — the path to the directory to add
51
+ sig { params(path: String).void }
52
+ def add_directory_path(path); end
53
+
54
+ # Adds a file path to the set of known paths, including
55
+ # all the directories that contain it. Once a file has been added,
56
+ # it is no longer possible to add a directory having the same path
57
+ # as this would cause conflict.
58
+ #
59
+ # The operation also adds all the containing directories for the file, so
60
+ # add_file_path("dir/dir2/file.doc")
61
+ # will add "dir" and "dir/dir2" as directories, "dir/dir2/dir3".
62
+ #
63
+ # _@param_ `file_path` — the path to the directory to add
64
+ sig { params(file_path: String).void }
65
+ def add_file_path(file_path); end
66
+
67
+ # Tells whether a specific full path is already known to the PathSet.
68
+ # Can be a path for a directory or for a file.
69
+ #
70
+ # _@param_ `path_in_archive` — the path to check for inclusion
71
+ sig { params(path_in_archive: String).returns(T::Boolean) }
72
+ def include?(path_in_archive); end
73
+
74
+ # Clears the contained sets
75
+ sig { void }
76
+ def clear; end
77
+
78
+ # sord omit - no YARD type given for "path_in_archive", using untyped
79
+ # Adds the directory or file path to the path set
80
+ sig { params(path_in_archive: T.untyped).void }
81
+ def add_directory_or_file_path(path_in_archive); end
82
+
83
+ # sord omit - no YARD type given for "path", using untyped
84
+ # sord omit - no YARD return type given, using untyped
85
+ sig { params(path: T.untyped).returns(T.untyped) }
86
+ def non_empty_path_components(path); end
87
+
88
+ # sord omit - no YARD type given for "path", using untyped
89
+ # sord omit - no YARD return type given, using untyped
90
+ sig { params(path: T.untyped).returns(T.untyped) }
91
+ def path_and_ancestors(path); end
92
+
93
+ class Conflict < StandardError
94
+ end
95
+
96
+ class FileClobbersDirectory < ZipKit::PathSet::Conflict
97
+ end
98
+
99
+ class DirectoryClobbersFile < ZipKit::PathSet::Conflict
100
+ end
101
+ end
102
+
103
+ # Is used to write ZIP archives without having to read them back or to overwrite
104
+ # data. It outputs into any object that supports `<<` or `write`, namely:
105
+ #
106
+ # An `Array`, `File`, `IO`, `Socket` and even `String` all can be output destinations
107
+ # for the `Streamer`.
108
+ #
109
+ # You can also combine output through the `Streamer` with direct output to the destination,
110
+ # all while preserving the correct offsets in the ZIP file structures. This allows usage
111
+ # of `sendfile()` or socket `splice()` calls for "through" proxying.
112
+ #
113
+ # If you want to avoid data descriptors - or write data bypassing the Streamer -
114
+ # you need to know the CRC32 (as a uint) and the filesize upfront,
115
+ # before the writing of the entry body starts.
116
+ #
117
+ # ## Using the Streamer with runtime compression
118
+ #
119
+ # You can use the Streamer with data descriptors (the CRC32 and the sizes will be
120
+ # written after the file data). This allows non-rewinding on-the-fly compression.
121
+ # The streamer will pick the optimum compression method ("stored" or "deflated")
122
+ # depending on the nature of the byte stream you send into it (by using a small buffer).
123
+ # If you are compressing large files, the Deflater object that the Streamer controls
124
+ # will be regularly flushed to prevent memory inflation.
125
+ #
126
+ # ZipKit::Streamer.open(file_socket_or_string) do |zip|
127
+ # zip.write_file('mov.mp4') do |sink|
128
+ # File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
129
+ # end
130
+ # zip.write_file('long-novel.txt') do |sink|
131
+ # File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
132
+ # end
133
+ # end
134
+ #
135
+ # The central directory will be written automatically at the end of the `open` block.
136
+ #
137
+ # ## Using the Streamer with entries of known size and having a known CRC32 checksum
138
+ #
139
+ # Streamer allows "IO splicing" - in this mode it will only control the metadata output,
140
+ # but you can write the data to the socket/file outside of the Streamer. For example, when
141
+ # using the sendfile gem:
142
+ #
143
+ # ZipKit::Streamer.open(socket) do | zip |
144
+ # zip.add_stored_entry(filename: "myfile1.bin", size: 9090821, crc32: 12485)
145
+ # socket.sendfile(tempfile1)
146
+ # zip.simulate_write(tempfile1.size)
147
+ #
148
+ # zip.add_stored_entry(filename: "myfile2.bin", size: 458678, crc32: 89568)
149
+ # socket.sendfile(tempfile2)
150
+ # zip.simulate_write(tempfile2.size)
151
+ # end
152
+ #
153
+ # Note that you need to use `simulate_write` in this case. This needs to happen since Streamer
154
+ # writes absolute offsets into the ZIP (local file header offsets and the like),
155
+ # and it relies on the output object to tell it how many bytes have been written
156
+ # so far. When using `sendfile` the Ruby write methods get bypassed entirely, and the
157
+ # offsets in the IO will not be updated - which will result in an invalid ZIP.
158
+ #
159
+ #
160
+ # ## On-the-fly deflate -using the Streamer with async/suspended writes and data descriptors
161
+ #
162
+ # If you are unable to use the block versions of `write_deflated_file` and `write_stored_file`
163
+ # there is an option to use a separate writer object. It gets returned from `write_deflated_file`
164
+ # and `write_stored_file` if you do not provide them with a block, and will accept data writes.
165
+ # Do note that you _must_ call `#close` on that object yourself:
166
+ #
167
+ # ZipKit::Streamer.open(socket) do | zip |
168
+ # w = zip.write_stored_file('mov.mp4')
169
+ # IO.copy_stream(source_io, w)
170
+ # w.close
171
+ # end
172
+ #
173
+ # The central directory will be written automatically at the end of the `open` block. If you need
174
+ # to manage the Streamer manually, or defer the central directory write until appropriate, use
175
+ # the constructor instead and call `Streamer#close`:
176
+ #
177
+ # zip = ZipKit::Streamer.new(out_io)
178
+ # .....
179
+ # zip.close
180
+ #
181
+ # Calling {Streamer#close} **will not** call `#close` on the underlying IO object.
182
+ class Streamer
183
+ include ZipKit::WriteShovel
184
+ STORED = T.let(0, T.untyped)
185
+ DEFLATED = T.let(8, T.untyped)
186
+ EntryBodySizeMismatch = T.let(Class.new(StandardError), T.untyped)
187
+ InvalidOutput = T.let(Class.new(ArgumentError), T.untyped)
188
+ Overflow = T.let(Class.new(StandardError), T.untyped)
189
+ UnknownMode = T.let(Class.new(StandardError), T.untyped)
190
+ OffsetOutOfSync = T.let(Class.new(StandardError), T.untyped)
191
+
192
+ # sord omit - no YARD return type given, using untyped
193
+ # Creates a new Streamer on top of the given IO-ish object and yields it. Once the given block
194
+ # returns, the Streamer will have it's `close` method called, which will write out the central
195
+ # directory of the archive to the output.
196
+ #
197
+ # _@param_ `stream` — the destination IO for the ZIP (should respond to `tell` and `<<`)
198
+ #
199
+ # _@param_ `kwargs_for_new` — keyword arguments for #initialize
200
+ sig { params(stream: IO, kwargs_for_new: T::Hash[T.untyped, T.untyped]).returns(T.untyped) }
201
+ def self.open(stream, **kwargs_for_new); end
202
+
203
+ # sord duck - #<< looks like a duck type, replacing with untyped
204
+ # Creates a new Streamer on top of the given IO-ish object.
205
+ #
206
+ # _@param_ `writable` — the destination IO for the ZIP. Anything that responds to `<<` can be used.
207
+ #
208
+ # _@param_ `writer` — the object to be used as the writer. Defaults to an instance of ZipKit::ZipWriter, normally you won't need to override it
209
+ #
210
+ # _@param_ `auto_rename_duplicate_filenames` — whether duplicate filenames, when encountered, should be suffixed with (1), (2) etc. Default value is `false` - if dupliate names are used an exception will be raised
211
+ sig { params(writable: T.untyped, writer: ZipKit::ZipWriter, auto_rename_duplicate_filenames: T::Boolean).void }
212
+ def initialize(writable, writer: create_writer, auto_rename_duplicate_filenames: false); end
213
+
214
+ # Writes a part of a zip entry body (actual binary data of the entry) into the output stream.
215
+ #
216
+ # _@param_ `binary_data` — a String in binary encoding
217
+ #
218
+ # _@return_ — self
219
+ sig { params(binary_data: String).returns(T.untyped) }
220
+ def <<(binary_data); end
221
+
222
+ # Advances the internal IO pointer to keep the offsets of the ZIP file in
223
+ # check. Use this if you are going to use accelerated writes to the socket
224
+ # (like the `sendfile()` call) after writing the headers, or if you
225
+ # just need to figure out the size of the archive.
226
+ #
227
+ # _@param_ `num_bytes` — how many bytes are going to be written bypassing the Streamer
228
+ #
229
+ # _@return_ — position in the output stream / ZIP archive
230
+ sig { params(num_bytes: Integer).returns(Integer) }
231
+ def simulate_write(num_bytes); end
232
+
233
+ # Writes out the local header for an entry (file in the ZIP) that is using
234
+ # the deflated storage model (is compressed). Once this method is called,
235
+ # the `<<` method has to be called to write the actual contents of the body.
236
+ #
237
+ # Note that the deflated body that is going to be written into the output
238
+ # has to be _precompressed_ (pre-deflated) before writing it into the
239
+ # Streamer, because otherwise it is impossible to know it's size upfront.
240
+ #
241
+ # _@param_ `filename` — the name of the file in the entry
242
+ #
243
+ # _@param_ `modification_time` — the modification time of the file in the archive
244
+ #
245
+ # _@param_ `compressed_size` — the size of the compressed entry that is going to be written into the archive
246
+ #
247
+ # _@param_ `uncompressed_size` — the size of the entry when uncompressed, in bytes
248
+ #
249
+ # _@param_ `crc32` — the CRC32 checksum of the entry when uncompressed
250
+ #
251
+ # _@param_ `use_data_descriptor` — whether the entry body will be followed by a data descriptor
252
+ #
253
+ # _@param_ `unix_permissions` — which UNIX permissions to set, normally the default should be used
254
+ #
255
+ # _@return_ — the offset the output IO is at after writing the entry header
256
+ sig do
257
+ params(
258
+ filename: String,
259
+ modification_time: Time,
260
+ compressed_size: Integer,
261
+ uncompressed_size: Integer,
262
+ crc32: Integer,
263
+ unix_permissions: T.nilable(Integer),
264
+ use_data_descriptor: T::Boolean
265
+ ).returns(Integer)
266
+ end
267
+ def add_deflated_entry(filename:, modification_time: Time.now.utc, compressed_size: 0, uncompressed_size: 0, crc32: 0, unix_permissions: nil, use_data_descriptor: false); end
268
+
269
+ # Writes out the local header for an entry (file in the ZIP) that is using
270
+ # the stored storage model (is stored as-is).
271
+ # Once this method is called, the `<<` method has to be called one or more
272
+ # times to write the actual contents of the body.
273
+ #
274
+ # _@param_ `filename` — the name of the file in the entry
275
+ #
276
+ # _@param_ `modification_time` — the modification time of the file in the archive
277
+ #
278
+ # _@param_ `size` — the size of the file when uncompressed, in bytes
279
+ #
280
+ # _@param_ `crc32` — the CRC32 checksum of the entry when uncompressed
281
+ #
282
+ # _@param_ `use_data_descriptor` — whether the entry body will be followed by a data descriptor. When in use
283
+ #
284
+ # _@param_ `unix_permissions` — which UNIX permissions to set, normally the default should be used
285
+ #
286
+ # _@return_ — the offset the output IO is at after writing the entry header
287
+ sig do
288
+ params(
289
+ filename: String,
290
+ modification_time: Time,
291
+ size: Integer,
292
+ crc32: Integer,
293
+ unix_permissions: T.nilable(Integer),
294
+ use_data_descriptor: T::Boolean
295
+ ).returns(Integer)
296
+ end
297
+ def add_stored_entry(filename:, modification_time: Time.now.utc, size: 0, crc32: 0, unix_permissions: nil, use_data_descriptor: false); end
298
+
299
+ # Adds an empty directory to the archive with a size of 0 and permissions of 755.
300
+ #
301
+ # _@param_ `dirname` — the name of the directory in the archive
302
+ #
303
+ # _@param_ `modification_time` — the modification time of the directory in the archive
304
+ #
305
+ # _@param_ `unix_permissions` — which UNIX permissions to set, normally the default should be used
306
+ #
307
+ # _@return_ — the offset the output IO is at after writing the entry header
308
+ sig { params(dirname: String, modification_time: Time, unix_permissions: T.nilable(Integer)).returns(Integer) }
309
+ def add_empty_directory(dirname:, modification_time: Time.now.utc, unix_permissions: nil); end
310
+
311
+ # Opens the stream for a file stored in the archive, and yields a writer
312
+ # for that file to the block.
313
+ # The writer will buffer a small amount of data and see whether compression is
314
+ # effective for the data being output. If compression turns out to work well -
315
+ # for instance, if the output is mostly text - it is going to create a deflated
316
+ # file inside the zip. If the compression benefits are negligible, it will
317
+ # create a stored file inside the zip. It will delegate either to `write_deflated_file`
318
+ # or to `write_stored_file`.
319
+ #
320
+ # Using a block, the write will be terminated with a data descriptor outright.
321
+ #
322
+ # zip.write_file("foo.txt") do |sink|
323
+ # IO.copy_stream(source_file, sink)
324
+ # end
325
+ #
326
+ # If deferred writes are desired (for example - to integrate with an API that
327
+ # does not support blocks, or to work with non-blocking environments) the method
328
+ # has to be called without a block. In that case it returns the sink instead,
329
+ # permitting to write to it in a deferred fashion. When `close` is called on
330
+ # the sink, any remanining compression output will be flushed and the data
331
+ # descriptor is going to be written.
332
+ #
333
+ # Note that even though it does not have to happen within the same call stack,
334
+ # call sequencing still must be observed. It is therefore not possible to do
335
+ # this:
336
+ #
337
+ # writer_for_file1 = zip.write_file("somefile.jpg")
338
+ # writer_for_file2 = zip.write_file("another.tif")
339
+ # writer_for_file1 << data
340
+ # writer_for_file2 << data
341
+ #
342
+ # because it is likely to result in an invalid ZIP file structure later on.
343
+ # So using this facility in async scenarios is certainly possible, but care
344
+ # and attention is recommended.
345
+ #
346
+ # _@param_ `filename` — the name of the file in the archive
347
+ #
348
+ # _@param_ `modification_time` — the modification time of the file in the archive
349
+ #
350
+ # _@param_ `unix_permissions` — which UNIX permissions to set, normally the default should be used
351
+ #
352
+ # _@return_ — without a block - the Writable sink which has to be closed manually
353
+ sig do
354
+ params(
355
+ filename: String,
356
+ modification_time: Time,
357
+ unix_permissions: T.nilable(Integer),
358
+ blk: T.proc.params(sink: ZipKit::Streamer::Writable).void
359
+ ).returns(ZipKit::Streamer::Writable)
360
+ end
361
+ def write_file(filename, modification_time: Time.now.utc, unix_permissions: nil, &blk); end
362
+
363
+ # Opens the stream for a stored file in the archive, and yields a writer
364
+ # for that file to the block.
365
+ # Once the write completes, a data descriptor will be written with the
366
+ # actual compressed/uncompressed sizes and the CRC32 checksum.
367
+ #
368
+ # Using a block, the write will be terminated with a data descriptor outright.
369
+ #
370
+ # zip.write_stored_file("foo.txt") do |sink|
371
+ # IO.copy_stream(source_file, sink)
372
+ # end
373
+ #
374
+ # If deferred writes are desired (for example - to integrate with an API that
375
+ # does not support blocks, or to work with non-blocking environments) the method
376
+ # has to be called without a block. In that case it returns the sink instead,
377
+ # permitting to write to it in a deferred fashion. When `close` is called on
378
+ # the sink, any remanining compression output will be flushed and the data
379
+ # descriptor is going to be written.
380
+ #
381
+ # Note that even though it does not have to happen within the same call stack,
382
+ # call sequencing still must be observed. It is therefore not possible to do
383
+ # this:
384
+ #
385
+ # writer_for_file1 = zip.write_stored_file("somefile.jpg")
386
+ # writer_for_file2 = zip.write_stored_file("another.tif")
387
+ # writer_for_file1 << data
388
+ # writer_for_file2 << data
389
+ #
390
+ # because it is likely to result in an invalid ZIP file structure later on.
391
+ # So using this facility in async scenarios is certainly possible, but care
392
+ # and attention is recommended.
393
+ #
394
+ # If an exception is raised inside the block that is passed to the method, a `rollback!` call
395
+ # will be performed automatically and the entry just written will be omitted from the ZIP
396
+ # central directory. This can be useful if you want to rescue the exception and reattempt
397
+ # adding the ZIP file. Note that you will need to call `write_deflated_file` again to start a
398
+ # new file - you can't keep writing to the one that failed.
399
+ #
400
+ # _@param_ `filename` — the name of the file in the archive
401
+ #
402
+ # _@param_ `modification_time` — the modification time of the file in the archive
403
+ #
404
+ # _@param_ `unix_permissions` — which UNIX permissions to set, normally the default should be used
405
+ #
406
+ # _@return_ — without a block - the Writable sink which has to be closed manually
407
+ sig do
408
+ params(
409
+ filename: String,
410
+ modification_time: Time,
411
+ unix_permissions: T.nilable(Integer),
412
+ blk: T.proc.params(sink: ZipKit::Streamer::Writable).void
413
+ ).returns(ZipKit::Streamer::Writable)
414
+ end
415
+ def write_stored_file(filename, modification_time: Time.now.utc, unix_permissions: nil, &blk); end
416
+
417
+ # Opens the stream for a deflated file in the archive, and yields a writer
418
+ # for that file to the block. Once the write completes, a data descriptor
419
+ # will be written with the actual compressed/uncompressed sizes and the
420
+ # CRC32 checksum.
421
+ #
422
+ # Using a block, the write will be terminated with a data descriptor outright.
423
+ #
424
+ # zip.write_stored_file("foo.txt") do |sink|
425
+ # IO.copy_stream(source_file, sink)
426
+ # end
427
+ #
428
+ # If deferred writes are desired (for example - to integrate with an API that
429
+ # does not support blocks, or to work with non-blocking environments) the method
430
+ # has to be called without a block. In that case it returns the sink instead,
431
+ # permitting to write to it in a deferred fashion. When `close` is called on
432
+ # the sink, any remanining compression output will be flushed and the data
433
+ # descriptor is going to be written.
434
+ #
435
+ # Note that even though it does not have to happen within the same call stack,
436
+ # call sequencing still must be observed. It is therefore not possible to do
437
+ # this:
438
+ #
439
+ # writer_for_file1 = zip.write_deflated_file("somefile.jpg")
440
+ # writer_for_file2 = zip.write_deflated_file("another.tif")
441
+ # writer_for_file1 << data
442
+ # writer_for_file2 << data
443
+ # writer_for_file1.close
444
+ # writer_for_file2.close
445
+ #
446
+ # because it is likely to result in an invalid ZIP file structure later on.
447
+ # So using this facility in async scenarios is certainly possible, but care
448
+ # and attention is recommended.
449
+ #
450
+ # If an exception is raised inside the block that is passed to the method, a `rollback!` call
451
+ # will be performed automatically and the entry just written will be omitted from the ZIP
452
+ # central directory. This can be useful if you want to rescue the exception and reattempt
453
+ # adding the ZIP file. Note that you will need to call `write_deflated_file` again to start a
454
+ # new file - you can't keep writing to the one that failed.
455
+ #
456
+ # _@param_ `filename` — the name of the file in the archive
457
+ #
458
+ # _@param_ `modification_time` — the modification time of the file in the archive
459
+ #
460
+ # _@param_ `unix_permissions` — which UNIX permissions to set, normally the default should be used
461
+ #
462
+ # _@return_ — without a block - the Writable sink which has to be closed manually
463
+ sig do
464
+ params(
465
+ filename: String,
466
+ modification_time: Time,
467
+ unix_permissions: T.nilable(Integer),
468
+ blk: T.proc.params(sink: ZipKit::Streamer::Writable).void
469
+ ).returns(ZipKit::Streamer::Writable)
470
+ end
471
+ def write_deflated_file(filename, modification_time: Time.now.utc, unix_permissions: nil, &blk); end
472
+
473
+ # Closes the archive. Writes the central directory, and switches the writer into
474
+ # a state where it can no longer be written to.
475
+ #
476
+ # Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
477
+ #
478
+ # _@return_ — the offset the output IO is at after closing the archive
479
+ sig { returns(Integer) }
480
+ def close; end
481
+
482
+ # Sets up the ZipWriter with wrappers if necessary. The method is called once, when the Streamer
483
+ # gets instantiated - the Writer then gets reused. This method is primarily there so that you
484
+ # can override it.
485
+ #
486
+ # _@return_ — the writer to perform writes with
487
+ sig { returns(ZipKit::ZipWriter) }
488
+ def create_writer; end
489
+
490
+ # Updates the last entry written with the CRC32 checksum and compressed/uncompressed
491
+ # sizes. For stored entries, `compressed_size` and `uncompressed_size` are the same.
492
+ # After updating the entry will immediately write the data descriptor bytes
493
+ # to the output.
494
+ #
495
+ # _@param_ `crc32` — the CRC32 checksum of the entry when uncompressed
496
+ #
497
+ # _@param_ `compressed_size` — the size of the compressed segment within the ZIP
498
+ #
499
+ # _@param_ `uncompressed_size` — the size of the entry once uncompressed
500
+ #
501
+ # _@return_ — the offset the output IO is at after writing the data descriptor
502
+ sig { params(crc32: Integer, compressed_size: Integer, uncompressed_size: Integer).returns(Integer) }
503
+ def update_last_entry_and_write_data_descriptor(crc32:, compressed_size:, uncompressed_size:); end
504
+
505
+ # Removes the buffered local entry for the last file written. This can be used when rescuing from exceptions
506
+ # when you want to skip the file that failed writing into the ZIP from getting written out into the
507
+ # ZIP central directory. This is useful when, for example, you encounter errors retrieving the file
508
+ # that you want to place inside the ZIP from a remote storage location and some network exception
509
+ # gets raised. `write_deflated_file` and `write_stored_file` will rollback for you automatically.
510
+ # Of course it is not possible to remove the failed entry from the ZIP file entirely, as the data
511
+ # is likely already on the wire. However, excluding the entry from the central directory of the ZIP
512
+ # file will allow better-behaved ZIP unarchivers to extract the entries which did store correctly,
513
+ # provided they read the ZIP from the central directory and not straight-ahead.
514
+ #
515
+ # _@return_ — position in the output stream / ZIP archive
516
+ #
517
+ # ```ruby
518
+ # zip.add_stored_entry(filename: "data.bin", size: 4.megabytes, crc32: the_crc)
519
+ # while chunk = remote.read(65*2048)
520
+ # zip << chunk
521
+ # rescue Timeout::Error
522
+ # zip.rollback!
523
+ # # and proceed to the next file
524
+ # end
525
+ # ```
526
+ sig { returns(Integer) }
527
+ def rollback!; end
528
+
529
+ # sord omit - no YARD type given for "writable", using untyped
530
+ # sord omit - no YARD return type given, using untyped
531
+ sig { params(writable: T.untyped, block_to_pass_writable_to: T.untyped).returns(T.untyped) }
532
+ def yield_or_return_writable(writable, &block_to_pass_writable_to); end
533
+
534
+ # sord omit - no YARD return type given, using untyped
535
+ sig { returns(T.untyped) }
536
+ def verify_offsets!; end
537
+
538
+ # sord omit - no YARD type given for "filename:", using untyped
539
+ # sord omit - no YARD type given for "modification_time:", using untyped
540
+ # sord omit - no YARD type given for "crc32:", using untyped
541
+ # sord omit - no YARD type given for "storage_mode:", using untyped
542
+ # sord omit - no YARD type given for "compressed_size:", using untyped
543
+ # sord omit - no YARD type given for "uncompressed_size:", using untyped
544
+ # sord omit - no YARD type given for "use_data_descriptor:", using untyped
545
+ # sord omit - no YARD type given for "unix_permissions:", using untyped
546
+ # sord omit - no YARD return type given, using untyped
547
+ sig do
548
+ params(
549
+ filename: T.untyped,
550
+ modification_time: T.untyped,
551
+ crc32: T.untyped,
552
+ storage_mode: T.untyped,
553
+ compressed_size: T.untyped,
554
+ uncompressed_size: T.untyped,
555
+ use_data_descriptor: T.untyped,
556
+ unix_permissions: T.untyped
557
+ ).returns(T.untyped)
558
+ end
559
+ def add_file_and_write_local_header(filename:, modification_time:, crc32:, storage_mode:, compressed_size:, uncompressed_size:, use_data_descriptor:, unix_permissions:); end
560
+
561
+ # sord omit - no YARD type given for "filename", using untyped
562
+ # sord omit - no YARD return type given, using untyped
563
+ sig { params(filename: T.untyped).returns(T.untyped) }
564
+ def remove_backslash(filename); end
565
+
566
+ # Writes the given data to the output stream. Allows the object to be used as
567
+ # a target for `IO.copy_stream(from, to)`
568
+ #
569
+ # _@param_ `bytes` — the binary string to write (part of the uncompressed file)
570
+ #
571
+ # _@return_ — the number of bytes written (will always be the bytesize of `bytes`)
572
+ sig { params(bytes: String).returns(Fixnum) }
573
+ def write(bytes); end
574
+
575
+ # Is used internally by Streamer to keep track of entries in the archive during writing.
576
+ # Normally you will not have to use this class directly
577
+ class Entry < Struct
578
+ sig { void }
579
+ def initialize; end
580
+
581
+ # sord omit - no YARD return type given, using untyped
582
+ sig { returns(T.untyped) }
583
+ def total_bytes_used; end
584
+
585
+ # sord omit - no YARD return type given, using untyped
586
+ # Set the general purpose flags for the entry. We care about is the EFS
587
+ # bit (bit 11) which should be set if the filename is UTF8. If it is, we need to set the
588
+ # bit so that the unarchiving application knows that the filename in the archive is UTF-8
589
+ # encoded, and not some DOS default. For ASCII entries it does not matter.
590
+ # Additionally, we care about bit 3 which toggles the use of the postfix data descriptor.
591
+ sig { returns(T.untyped) }
592
+ def gp_flags; end
593
+
594
+ sig { returns(T::Boolean) }
595
+ def filler?; end
596
+
597
+ # Returns the value of attribute filename
598
+ sig { returns(Object) }
599
+ attr_accessor :filename
600
+
601
+ # Returns the value of attribute crc32
602
+ sig { returns(Object) }
603
+ attr_accessor :crc32
604
+
605
+ # Returns the value of attribute compressed_size
606
+ sig { returns(Object) }
607
+ attr_accessor :compressed_size
608
+
609
+ # Returns the value of attribute uncompressed_size
610
+ sig { returns(Object) }
611
+ attr_accessor :uncompressed_size
612
+
613
+ # Returns the value of attribute storage_mode
614
+ sig { returns(Object) }
615
+ attr_accessor :storage_mode
616
+
617
+ # Returns the value of attribute mtime
618
+ sig { returns(Object) }
619
+ attr_accessor :mtime
620
+
621
+ # Returns the value of attribute use_data_descriptor
622
+ sig { returns(Object) }
623
+ attr_accessor :use_data_descriptor
624
+
625
+ # Returns the value of attribute local_header_offset
626
+ sig { returns(Object) }
627
+ attr_accessor :local_header_offset
628
+
629
+ # Returns the value of attribute bytes_used_for_local_header
630
+ sig { returns(Object) }
631
+ attr_accessor :bytes_used_for_local_header
632
+
633
+ # Returns the value of attribute bytes_used_for_data_descriptor
634
+ sig { returns(Object) }
635
+ attr_accessor :bytes_used_for_data_descriptor
636
+
637
+ # Returns the value of attribute unix_permissions
638
+ sig { returns(Object) }
639
+ attr_accessor :unix_permissions
640
+ end
641
+
642
+ # Is used internally by Streamer to keep track of entries in the archive during writing.
643
+ # Normally you will not have to use this class directly
644
+ class Filler < Struct
645
+ sig { returns(T::Boolean) }
646
+ def filler?; end
647
+
648
+ # Returns the value of attribute total_bytes_used
649
+ sig { returns(Object) }
650
+ attr_accessor :total_bytes_used
651
+ end
652
+
653
+ # Gets yielded from the writing methods of the Streamer
654
+ # and accepts the data being written into the ZIP for deflate
655
+ # or stored modes. Can be used as a destination for `IO.copy_stream`
656
+ #
657
+ # IO.copy_stream(File.open('source.bin', 'rb), writable)
658
+ class Writable
659
+ include ZipKit::WriteShovel
660
+
661
+ # sord omit - no YARD type given for "streamer", using untyped
662
+ # sord omit - no YARD type given for "writer", using untyped
663
+ # Initializes a new Writable with the object it delegates the writes to.
664
+ # Normally you would not need to use this method directly
665
+ sig { params(streamer: T.untyped, writer: T.untyped).void }
666
+ def initialize(streamer, writer); end
667
+
668
+ # Writes the given data to the output stream
669
+ #
670
+ # _@param_ `d` — the binary string to write (part of the uncompressed file)
671
+ sig { params(d: String).returns(T.self_type) }
672
+ def <<(d); end
673
+
674
+ # sord omit - no YARD return type given, using untyped
675
+ # Flushes the writer and recovers the CRC32/size values. It then calls
676
+ # `update_last_entry_and_write_data_descriptor` on the given Streamer.
677
+ sig { returns(T.untyped) }
678
+ def close; end
679
+
680
+ # Writes the given data to the output stream. Allows the object to be used as
681
+ # a target for `IO.copy_stream(from, to)`
682
+ #
683
+ # _@param_ `bytes` — the binary string to write (part of the uncompressed file)
684
+ #
685
+ # _@return_ — the number of bytes written (will always be the bytesize of `bytes`)
686
+ sig { params(bytes: String).returns(Fixnum) }
687
+ def write(bytes); end
688
+ end
689
+
690
+ # Will be used to pick whether to store a file in the `stored` or
691
+ # `deflated` mode, by compressing the first N bytes of the file and
692
+ # comparing the stored and deflated data sizes. If deflate produces
693
+ # a sizable compression gain for this data, it will create a deflated
694
+ # file inside the ZIP archive. If the file doesn't compress well, it
695
+ # will use the "stored" mode for the entry. About 128KB of the
696
+ # file will be buffered to pick the appropriate storage mode. The
697
+ # Heuristic will call either `write_stored_file` or `write_deflated_file`
698
+ # on the Streamer passed into it once it knows which compression
699
+ # method should be applied
700
+ class Heuristic < ZipKit::Streamer::Writable
701
+ BYTES_WRITTEN_THRESHOLD = T.let(128 * 1024, T.untyped)
702
+ MINIMUM_VIABLE_COMPRESSION = T.let(0.75, T.untyped)
703
+
704
+ # sord omit - no YARD type given for "streamer", using untyped
705
+ # sord omit - no YARD type given for "filename", using untyped
706
+ # sord omit - no YARD type given for "**write_file_options", using untyped
707
+ sig { params(streamer: T.untyped, filename: T.untyped, write_file_options: T.untyped).void }
708
+ def initialize(streamer, filename, **write_file_options); end
709
+
710
+ # sord infer - argument name in single @param inferred as "bytes"
711
+ sig { params(bytes: String).returns(T.self_type) }
712
+ def <<(bytes); end
713
+
714
+ # sord omit - no YARD return type given, using untyped
715
+ sig { returns(T.untyped) }
716
+ def close; end
717
+
718
+ # sord omit - no YARD return type given, using untyped
719
+ sig { returns(T.untyped) }
720
+ def decide; end
721
+ end
722
+
723
+ # Sends writes to the given `io`, and also registers all the data passing
724
+ # through it in a CRC32 checksum calculator. Is made to be completely
725
+ # interchangeable with the DeflatedWriter in terms of interface.
726
+ class StoredWriter
727
+ include ZipKit::WriteShovel
728
+ CRC32_BUFFER_SIZE = T.let(64 * 1024, T.untyped)
729
+
730
+ # sord omit - no YARD type given for "io", using untyped
731
+ sig { params(io: T.untyped).void }
732
+ def initialize(io); end
733
+
734
+ # Writes the given data to the contained IO object.
735
+ #
736
+ # _@param_ `data` — data to be written
737
+ #
738
+ # _@return_ — self
739
+ sig { params(data: String).returns(T.untyped) }
740
+ def <<(data); end
741
+
742
+ # Returns the amount of data written and the CRC32 checksum. The return value
743
+ # can be directly used as the argument to {Streamer#update_last_entry_and_write_data_descriptor}
744
+ #
745
+ # _@return_ — a hash of `{crc32, compressed_size, uncompressed_size}`
746
+ sig { returns(T::Hash[T.untyped, T.untyped]) }
747
+ def finish; end
748
+
749
+ # Writes the given data to the output stream. Allows the object to be used as
750
+ # a target for `IO.copy_stream(from, to)`
751
+ #
752
+ # _@param_ `bytes` — the binary string to write (part of the uncompressed file)
753
+ #
754
+ # _@return_ — the number of bytes written (will always be the bytesize of `bytes`)
755
+ sig { params(bytes: String).returns(Fixnum) }
756
+ def write(bytes); end
757
+ end
758
+
759
+ # Sends writes to the given `io` compressed using a `Zlib::Deflate`. Also
760
+ # registers data passing through it in a CRC32 checksum calculator. Is made to be completely
761
+ # interchangeable with the StoredWriter in terms of interface.
762
+ class DeflatedWriter
763
+ include ZipKit::WriteShovel
764
+ CRC32_BUFFER_SIZE = T.let(64 * 1024, T.untyped)
765
+
766
+ # sord omit - no YARD type given for "io", using untyped
767
+ sig { params(io: T.untyped).void }
768
+ def initialize(io); end
769
+
770
+ # Writes the given data into the deflater, and flushes the deflater
771
+ # after having written more than FLUSH_EVERY_N_BYTES bytes of data
772
+ #
773
+ # _@param_ `data` — data to be written
774
+ #
775
+ # _@return_ — self
776
+ sig { params(data: String).returns(T.untyped) }
777
+ def <<(data); end
778
+
779
+ # Returns the amount of data received for writing, the amount of
780
+ # compressed data written and the CRC32 checksum. The return value
781
+ # can be directly used as the argument to {Streamer#update_last_entry_and_write_data_descriptor}
782
+ #
783
+ # _@return_ — a hash of `{crc32, compressed_size, uncompressed_size}`
784
+ sig { returns(T::Hash[T.untyped, T.untyped]) }
785
+ def finish; end
786
+
787
+ # Writes the given data to the output stream. Allows the object to be used as
788
+ # a target for `IO.copy_stream(from, to)`
789
+ #
790
+ # _@param_ `bytes` — the binary string to write (part of the uncompressed file)
791
+ #
792
+ # _@return_ — the number of bytes written (will always be the bytesize of `bytes`)
793
+ sig { params(bytes: String).returns(Fixnum) }
794
+ def write(bytes); end
795
+ end
796
+ end
797
+
798
+ # An object that fakes just-enough of an IO to be dangerous
799
+ # - or, more precisely, to be useful as a source for the FileReader
800
+ # central directory parser. Effectively we substitute an IO object
801
+ # for an object that fetches parts of the remote file over HTTP using `Range:`
802
+ # headers. The `RemoteIO` acts as an adapter between an object that performs the
803
+ # actual fetches over HTTP and an object that expects a handful of IO methods to be
804
+ # available.
805
+ class RemoteIO
806
+ # sord warn - URI wasn't able to be resolved to a constant in this project
807
+ # _@param_ `url` — the HTTP/HTTPS URL of the object to be retrieved
808
+ sig { params(url: T.any(String, URI)).void }
809
+ def initialize(url); end
810
+
811
+ # sord omit - no YARD return type given, using untyped
812
+ # Emulates IO#seek
813
+ #
814
+ # _@param_ `offset` — absolute offset in the remote resource to seek to
815
+ #
816
+ # _@param_ `mode` — The seek mode (only SEEK_SET is supported)
817
+ sig { params(offset: Integer, mode: Integer).returns(T.untyped) }
818
+ def seek(offset, mode = IO::SEEK_SET); end
819
+
820
+ # Emulates IO#size.
821
+ #
822
+ # _@return_ — the size of the remote resource
823
+ sig { returns(Integer) }
824
+ def size; end
825
+
826
+ # Emulates IO#read, but requires the number of bytes to read
827
+ # The read will be limited to the
828
+ # size of the remote resource relative to the current offset in the IO,
829
+ # so if you are at offset 0 in the IO of size 10, doing a `read(20)`
830
+ # will only return you 10 bytes of result, and not raise any exceptions.
831
+ #
832
+ # _@param_ `n_bytes` — how many bytes to read, or `nil` to read all the way to the end
833
+ #
834
+ # _@return_ — the read bytes
835
+ sig { params(n_bytes: T.nilable(Fixnum)).returns(String) }
836
+ def read(n_bytes = nil); end
837
+
838
+ # Returns the current pointer position within the IO
839
+ sig { returns(Fixnum) }
840
+ def tell; end
841
+
842
+ # Only used internally when reading the remote ZIP.
843
+ #
844
+ # _@param_ `range` — the HTTP range of data to fetch from remote
845
+ #
846
+ # _@return_ — the response body of the ranged request
847
+ sig { params(range: T::Range[T.untyped]).returns(String) }
848
+ def request_range(range); end
849
+
850
+ # For working with S3 it is a better idea to perform a GET request for one byte, since doing a HEAD
851
+ # request needs a different permission - and standard GET presigned URLs are not allowed to perform it
852
+ #
853
+ # _@return_ — the size of the remote resource, parsed either from Content-Length or Content-Range header
854
+ sig { returns(Integer) }
855
+ def request_object_size; end
856
+
857
+ # sord omit - no YARD type given for "a", using untyped
858
+ # sord omit - no YARD type given for "b", using untyped
859
+ # sord omit - no YARD type given for "c", using untyped
860
+ # sord omit - no YARD return type given, using untyped
861
+ sig { params(a: T.untyped, b: T.untyped, c: T.untyped).returns(T.untyped) }
862
+ def clamp(a, b, c); end
863
+ end
864
+
865
+ # A low-level ZIP file data writer. You can use it to write out various headers and central directory elements
866
+ # separately. The class handles the actual encoding of the data according to the ZIP format APPNOTE document.
867
+ #
868
+ # The primary reason the writer is a separate object is because it is kept stateless. That is, all the data that
869
+ # is needed for writing a piece of the ZIP (say, the EOCD record, or a data descriptor) can be written
870
+ # without depending on data available elsewhere. This makes the writer very easy to test, since each of
871
+ # it's methods outputs something that only depends on the method's arguments. For example, we use this
872
+ # to test writing Zip64 files which, when tested in a streaming fashion, would need tricky IO stubs
873
+ # to wind IO objects back and forth by large offsets. Instead, we can just write out the EOCD record
874
+ # with given offsets as arguments.
875
+ #
876
+ # Since some methods need a lot of data about the entity being written, everything is passed via
877
+ # keyword arguments - this way it is much less likely that you can make a mistake writing something.
878
+ #
879
+ # Another reason for having a separate Writer is that most ZIP libraries attach the methods for
880
+ # writing out the file headers to some sort of Entry object, which represents a file within the ZIP.
881
+ # However, when you are diagnosing issues with the ZIP files you produce, you actually want to have
882
+ # absolute _most_ of the code responsible for writing the actual encoded bytes available to you on
883
+ # one screen. Altering or checking that code then becomes much, much easier. The methods doing the
884
+ # writing are also intentionally left very verbose - so that you can follow what is happening at
885
+ # all times.
886
+ #
887
+ # All methods of the writer accept anything that responds to `<<` as `io` argument - you can use
888
+ # that to output to String objects, or to output to Arrays that you can later join together.
889
+ class ZipWriter
890
+ FOUR_BYTE_MAX_UINT = T.let(0xFFFFFFFF, T.untyped)
891
+ TWO_BYTE_MAX_UINT = T.let(0xFFFF, T.untyped)
892
+ ZIP_KIT_COMMENT = T.let("Written using ZipKit %<version>s" % {version: ZipKit::VERSION}, T.untyped)
893
+ VERSION_MADE_BY = T.let(52, T.untyped)
894
+ VERSION_NEEDED_TO_EXTRACT = T.let(20, T.untyped)
895
+ VERSION_NEEDED_TO_EXTRACT_ZIP64 = T.let(45, T.untyped)
896
+ DEFAULT_FILE_UNIX_PERMISSIONS = T.let(0o644, T.untyped)
897
+ DEFAULT_DIRECTORY_UNIX_PERMISSIONS = T.let(0o755, T.untyped)
898
+ FILE_TYPE_FILE = T.let(0o10, T.untyped)
899
+ FILE_TYPE_DIRECTORY = T.let(0o04, T.untyped)
900
+ MADE_BY_SIGNATURE = T.let(begin
901
+ # A combination of the VERSION_MADE_BY low byte and the OS type high byte
902
+ os_type = 3 # UNIX
903
+ [VERSION_MADE_BY, os_type].pack("CC")
904
+ end, T.untyped)
905
+ C_UINT4 = T.let("V", T.untyped)
906
+ C_UINT2 = T.let("v", T.untyped)
907
+ C_UINT8 = T.let("Q<", T.untyped)
908
+ C_CHAR = T.let("C", T.untyped)
909
+ C_INT4 = T.let("l<", T.untyped)
910
+
911
+ # sord duck - #<< looks like a duck type, replacing with untyped
912
+ # Writes the local file header, that precedes the actual file _data_.
913
+ #
914
+ # _@param_ `io` — the buffer to write the local file header to
915
+ #
916
+ # _@param_ `filename` — the name of the file in the archive
917
+ #
918
+ # _@param_ `compressed_size` — The size of the compressed (or stored) data - how much space it uses in the ZIP
919
+ #
920
+ # _@param_ `uncompressed_size` — The size of the file once extracted
921
+ #
922
+ # _@param_ `crc32` — The CRC32 checksum of the file
923
+ #
924
+ # _@param_ `mtime` — the modification time to be recorded in the ZIP
925
+ #
926
+ # _@param_ `gp_flags` — bit-packed general purpose flags
927
+ #
928
+ # _@param_ `storage_mode` — 8 for deflated, 0 for stored...
929
+ sig do
930
+ params(
931
+ io: T.untyped,
932
+ filename: String,
933
+ compressed_size: Fixnum,
934
+ uncompressed_size: Fixnum,
935
+ crc32: Fixnum,
936
+ gp_flags: Fixnum,
937
+ mtime: Time,
938
+ storage_mode: Fixnum
939
+ ).void
940
+ end
941
+ def write_local_file_header(io:, filename:, compressed_size:, uncompressed_size:, crc32:, gp_flags:, mtime:, storage_mode:); end
942
+
943
+ # sord duck - #<< looks like a duck type, replacing with untyped
944
+ # sord omit - no YARD type given for "local_file_header_location:", using untyped
945
+ # sord omit - no YARD type given for "storage_mode:", using untyped
946
+ # Writes the file header for the central directory, for a particular file in the archive. When writing out this data,
947
+ # ensure that the CRC32 and both sizes (compressed/uncompressed) are correct for the entry in question.
948
+ #
949
+ # _@param_ `io` — the buffer to write the local file header to
950
+ #
951
+ # _@param_ `filename` — the name of the file in the archive
952
+ #
953
+ # _@param_ `compressed_size` — The size of the compressed (or stored) data - how much space it uses in the ZIP
954
+ #
955
+ # _@param_ `uncompressed_size` — The size of the file once extracted
956
+ #
957
+ # _@param_ `crc32` — The CRC32 checksum of the file
958
+ #
959
+ # _@param_ `mtime` — the modification time to be recorded in the ZIP
960
+ #
961
+ # _@param_ `gp_flags` — bit-packed general purpose flags
962
+ #
963
+ # _@param_ `unix_permissions` — the permissions for the file, or nil for the default to be used
964
+ sig do
965
+ params(
966
+ io: T.untyped,
967
+ local_file_header_location: T.untyped,
968
+ gp_flags: Fixnum,
969
+ storage_mode: T.untyped,
970
+ compressed_size: Fixnum,
971
+ uncompressed_size: Fixnum,
972
+ mtime: Time,
973
+ crc32: Fixnum,
974
+ filename: String,
975
+ unix_permissions: T.nilable(Integer)
976
+ ).void
977
+ end
978
+ def write_central_directory_file_header(io:, local_file_header_location:, gp_flags:, storage_mode:, compressed_size:, uncompressed_size:, mtime:, crc32:, filename:, unix_permissions: nil); end
979
+
980
+ # sord duck - #<< looks like a duck type, replacing with untyped
981
+ # Writes the data descriptor following the file data for a file whose local file header
982
+ # was written with general-purpose flag bit 3 set. If the one of the sizes exceeds the Zip64 threshold,
983
+ # the data descriptor will have the sizes written out as 8-byte values instead of 4-byte values.
984
+ #
985
+ # _@param_ `io` — the buffer to write the local file header to
986
+ #
987
+ # _@param_ `crc32` — The CRC32 checksum of the file
988
+ #
989
+ # _@param_ `compressed_size` — The size of the compressed (or stored) data - how much space it uses in the ZIP
990
+ #
991
+ # _@param_ `uncompressed_size` — The size of the file once extracted
992
+ sig do
993
+ params(
994
+ io: T.untyped,
995
+ compressed_size: Fixnum,
996
+ uncompressed_size: Fixnum,
997
+ crc32: Fixnum
998
+ ).void
999
+ end
1000
+ def write_data_descriptor(io:, compressed_size:, uncompressed_size:, crc32:); end
1001
+
1002
+ # sord duck - #<< looks like a duck type, replacing with untyped
1003
+ # Writes the "end of central directory record" (including the Zip6 salient bits if necessary)
1004
+ #
1005
+ # _@param_ `io` — the buffer to write the central directory to.
1006
+ #
1007
+ # _@param_ `start_of_central_directory_location` — byte offset of the start of central directory form the beginning of ZIP file
1008
+ #
1009
+ # _@param_ `central_directory_size` — the size of the central directory (only file headers) in bytes
1010
+ #
1011
+ # _@param_ `num_files_in_archive` — How many files the archive contains
1012
+ #
1013
+ # _@param_ `comment` — the comment for the archive (defaults to ZIP_KIT_COMMENT)
1014
+ sig do
1015
+ params(
1016
+ io: T.untyped,
1017
+ start_of_central_directory_location: Fixnum,
1018
+ central_directory_size: Fixnum,
1019
+ num_files_in_archive: Fixnum,
1020
+ comment: String
1021
+ ).void
1022
+ end
1023
+ def write_end_of_central_directory(io:, start_of_central_directory_location:, central_directory_size:, num_files_in_archive:, comment: ZIP_KIT_COMMENT); end
1024
+
1025
+ # Writes the Zip64 extra field for the local file header. Will be used by `write_local_file_header` when any sizes given to it warrant that.
1026
+ #
1027
+ # _@param_ `compressed_size` — The size of the compressed (or stored) data - how much space it uses in the ZIP
1028
+ #
1029
+ # _@param_ `uncompressed_size` — The size of the file once extracted
1030
+ sig { params(compressed_size: Fixnum, uncompressed_size: Fixnum).returns(String) }
1031
+ def zip_64_extra_for_local_file_header(compressed_size:, uncompressed_size:); end
1032
+
1033
+ # sord omit - no YARD type given for "mtime", using untyped
1034
+ # sord omit - no YARD return type given, using untyped
1035
+ # Writes the extended timestamp information field for local headers.
1036
+ #
1037
+ # The spec defines 2
1038
+ # different formats - the one for the local file header can also accomodate the
1039
+ # atime and ctime, whereas the one for the central directory can only take
1040
+ # the mtime - and refers the reader to the local header extra to obtain the
1041
+ # remaining times
1042
+ sig { params(mtime: T.untyped).returns(T.untyped) }
1043
+ def timestamp_extra_for_local_file_header(mtime); end
1044
+
1045
+ # Writes the Zip64 extra field for the central directory header.It differs from the extra used in the local file header because it
1046
+ # also contains the location of the local file header in the ZIP as an 8-byte int.
1047
+ #
1048
+ # _@param_ `compressed_size` — The size of the compressed (or stored) data - how much space it uses in the ZIP
1049
+ #
1050
+ # _@param_ `uncompressed_size` — The size of the file once extracted
1051
+ #
1052
+ # _@param_ `local_file_header_location` — Byte offset of the start of the local file header from the beginning of the ZIP archive
1053
+ sig { params(compressed_size: Fixnum, uncompressed_size: Fixnum, local_file_header_location: Fixnum).returns(String) }
1054
+ def zip_64_extra_for_central_directory_file_header(compressed_size:, uncompressed_size:, local_file_header_location:); end
1055
+
1056
+ # sord omit - no YARD type given for "t", using untyped
1057
+ # sord omit - no YARD return type given, using untyped
1058
+ sig { params(t: T.untyped).returns(T.untyped) }
1059
+ def to_binary_dos_time(t); end
1060
+
1061
+ # sord omit - no YARD type given for "t", using untyped
1062
+ # sord omit - no YARD return type given, using untyped
1063
+ sig { params(t: T.untyped).returns(T.untyped) }
1064
+ def to_binary_dos_date(t); end
1065
+
1066
+ # sord omit - no YARD type given for "values_to_packspecs", using untyped
1067
+ # sord omit - no YARD return type given, using untyped
1068
+ # Unzips a given array of tuples of "numeric value, pack specifier" and then packs all the odd
1069
+ # values using specifiers from all the even values. It is harder to explain than to show:
1070
+ #
1071
+ # pack_array([1, 'V', 2, 'v', 148, 'v]) #=> "\x01\x00\x00\x00\x02\x00\x94\x00"
1072
+ #
1073
+ # will do the following two transforms:
1074
+ #
1075
+ # [1, 'V', 2, 'v', 148, 'v] -> [1,2,148], ['V','v','v'] -> [1,2,148].pack('Vvv') -> "\x01\x00\x00\x00\x02\x00\x94\x00".
1076
+ # This might seem like a "clever optimisation" but the issue is that `pack` needs an array allocated per call, and
1077
+ # we output very verbosely - value-by-value. This might be quite a few array allocs. Using something like this
1078
+ # helps us save the array allocs
1079
+ sig { params(values_to_packspecs: T.untyped).returns(T.untyped) }
1080
+ def pack_array(values_to_packspecs); end
1081
+
1082
+ # sord omit - no YARD type given for "unix_permissions_int", using untyped
1083
+ # sord omit - no YARD type given for "file_type_int", using untyped
1084
+ # sord omit - no YARD return type given, using untyped
1085
+ sig { params(unix_permissions_int: T.untyped, file_type_int: T.untyped).returns(T.untyped) }
1086
+ def generate_external_attrs(unix_permissions_int, file_type_int); end
1087
+ end
1088
+
1089
+ # Acts as a converter between callers which send data to the `#<<` method (such as all the ZipKit
1090
+ # writer methods, which push onto anything), and a given block. Every time `#<<` gets called on the BlockWrite,
1091
+ # the block given to the constructor will be called with the same argument. ZipKit uses this object
1092
+ # when integrating with Rack and in the OutputEnumerator. Normally you wouldn't need to use it manually but
1093
+ # you always can. BlockWrite will also ensure the binary string encoding is forced onto any string
1094
+ # that passes through it.
1095
+ #
1096
+ # For example, you can create a Rack response body like so:
1097
+ #
1098
+ # class MyRackResponse
1099
+ # def each
1100
+ # writer = ZipKit::BlockWrite.new {|chunk| yield(chunk) }
1101
+ # writer << "Hello" << "world" << "!"
1102
+ # end
1103
+ # end
1104
+ # [200, {}, MyRackResponse.new]
1105
+ class BlockWrite
1106
+ include ZipKit::WriteShovel
1107
+
1108
+ # Creates a new BlockWrite.
1109
+ #
1110
+ # _@param_ `block` — The block that will be called when this object receives the `<<` message
1111
+ sig { params(block: T.proc.params(bytes: String).void).void }
1112
+ def initialize(&block); end
1113
+
1114
+ # Sends a string through to the block stored in the BlockWrite.
1115
+ #
1116
+ # _@param_ `buf` — the string to write. Note that a zero-length String will not be forwarded to the block, as it has special meaning when used with chunked encoding (it indicates the end of the stream).
1117
+ sig { params(buf: String).returns(ZipKit::BlockWrite) }
1118
+ def <<(buf); end
1119
+
1120
+ # Writes the given data to the output stream. Allows the object to be used as
1121
+ # a target for `IO.copy_stream(from, to)`
1122
+ #
1123
+ # _@param_ `bytes` — the binary string to write (part of the uncompressed file)
1124
+ #
1125
+ # _@return_ — the number of bytes written (will always be the bytesize of `bytes`)
1126
+ sig { params(bytes: String).returns(Fixnum) }
1127
+ def write(bytes); end
1128
+ end
1129
+
1130
+ # A very barebones ZIP file reader. Is made for maximum interoperability, but at the same
1131
+ # time we attempt to keep it somewhat concise.
1132
+ #
1133
+ # ## REALLY CRAZY IMPORTANT STUFF: SECURITY IMPLICATIONS
1134
+ #
1135
+ # Please **BEWARE** - using this is a security risk if you are reading files that have been
1136
+ # supplied by users. This implementation has _not_ been formally verified for correctness. As
1137
+ # ZIP files contain relative offsets in lots of places it might be possible for a maliciously
1138
+ # crafted ZIP file to put the decode procedure in an endless loop, make it attempt huge reads
1139
+ # from the input file and so on. Additionally, the reader module for deflated data has
1140
+ # no support for ZIP bomb protection. So either limit the `FileReader` usage to the files you
1141
+ # trust, or triple-check all the inputs upfront. Patches to make this reader more secure
1142
+ # are welcome of course.
1143
+ #
1144
+ # ## Usage
1145
+ #
1146
+ # File.open('zipfile.zip', 'rb') do |f|
1147
+ # entries = ZipKit::FileReader.read_zip_structure(io: f)
1148
+ # entries.each do |e|
1149
+ # File.open(e.filename, 'wb') do |extracted_file|
1150
+ # ex = e.extractor_from(f)
1151
+ # extracted_file << ex.extract(1024 * 1024) until ex.eof?
1152
+ # end
1153
+ # end
1154
+ # end
1155
+ #
1156
+ # ## Supported features
1157
+ #
1158
+ # * Deflate and stored storage modes
1159
+ # * Zip64 (extra fields and offsets)
1160
+ # * Data descriptors
1161
+ #
1162
+ # ## Unsupported features
1163
+ #
1164
+ # * Archives split over multiple disks/files
1165
+ # * Any ZIP encryption
1166
+ # * EFS language flag and InfoZIP filename extra field
1167
+ # * CRC32 checksums are _not_ verified
1168
+ #
1169
+ # ## Mode of operation
1170
+ #
1171
+ # By default, `FileReader` _ignores_ the data in local file headers (as it is
1172
+ # often unreliable). It reads the ZIP file "from the tail", finds the
1173
+ # end-of-central-directory signatures, then reads the central directory entries,
1174
+ # reconstitutes the entries with their filenames, attributes and so on, and
1175
+ # sets these entries up with the absolute _offsets_ into the source file/IO object.
1176
+ # These offsets can then be used to extract the actual compressed data of
1177
+ # the files and to expand it.
1178
+ #
1179
+ # ## Recovering damaged or incomplete ZIP files
1180
+ #
1181
+ # If the ZIP file you are trying to read does not contain the central directory
1182
+ # records `read_zip_structure` will not work, since it starts the read process
1183
+ # from the EOCD marker at the end of the central directory and then crawls
1184
+ # "back" in the IO to figure out the rest. You can explicitly apply a fallback
1185
+ # for reading the archive "straight ahead" instead using `read_zip_straight_ahead`
1186
+ # - the method will instead scan your IO from the very start, skipping over
1187
+ # the actual entry data. This is less efficient than central directory parsing since
1188
+ # it involves a much larger number of reads (1 read from the IO per entry in the ZIP).
1189
+ class FileReader
1190
+ ReadError = T.let(Class.new(StandardError), T.untyped)
1191
+ UnsupportedFeature = T.let(Class.new(StandardError), T.untyped)
1192
+ InvalidStructure = T.let(Class.new(ReadError), T.untyped)
1193
+ LocalHeaderPending = T.let(Class.new(StandardError) do
1194
+ def message
1195
+ "The compressed data offset is not available (local header has not been read)"
1196
+ end
1197
+ end, T.untyped)
1198
+ MissingEOCD = T.let(Class.new(StandardError) do
1199
+ def message
1200
+ "Could not find the EOCD signature in the buffer - maybe a malformed ZIP file"
1201
+ end
1202
+ end, T.untyped)
1203
+ C_UINT4 = T.let("V", T.untyped)
1204
+ C_UINT2 = T.let("v", T.untyped)
1205
+ C_UINT8 = T.let("Q<", T.untyped)
1206
+ MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE = T.let(4 + # Offset of the start of central directory
1207
+ 4 + # Size of the central directory
1208
+ 2 + # Number of files in the cdir
1209
+ 4 + # End-of-central-directory signature
1210
+ 2 + # Number of this disk
1211
+ 2 + # Number of disk with the start of cdir
1212
+ 2 + # Number of files in the cdir of this disk
1213
+ 2 + # The comment size
1214
+ 0xFFFF, T.untyped)
1215
+ MAX_LOCAL_HEADER_SIZE = T.let(4 + # signature
1216
+ 2 + # Version needed to extract
1217
+ 2 + # gp flags
1218
+ 2 + # storage mode
1219
+ 2 + # dos time
1220
+ 2 + # dos date
1221
+ 4 + # CRC32
1222
+ 4 + # Comp size
1223
+ 4 + # Uncomp size
1224
+ 2 + # Filename size
1225
+ 2 + # Extra fields size
1226
+ 0xFFFF + # Maximum filename size
1227
+ 0xFFFF, T.untyped)
1228
+ SIZE_OF_USABLE_EOCD_RECORD = T.let(4 + # Signature
1229
+ 2 + # Number of this disk
1230
+ 2 + # Number of the disk with the EOCD record
1231
+ 2 + # Number of entries in the central directory of this disk
1232
+ 2 + # Number of entries in the central directory total
1233
+ 4 + # Size of the central directory
1234
+ 4, T.untyped)
1235
+
1236
+ # sord duck - #tell looks like a duck type, replacing with untyped
1237
+ # sord duck - #seek looks like a duck type, replacing with untyped
1238
+ # sord duck - #read looks like a duck type, replacing with untyped
1239
+ # sord duck - #size looks like a duck type, replacing with untyped
1240
+ # Parse an IO handle to a ZIP archive into an array of Entry objects.
1241
+ #
1242
+ # _@param_ `io` — an IO-ish object
1243
+ #
1244
+ # _@param_ `read_local_headers` — whether the local headers must be read upfront. When reading a locally available ZIP file this option will not have much use since the small reads from the file handle are not going to be that important. However, if you are using remote reads to decipher a ZIP file located on an HTTP server, the operation _must_ perform an HTTP request for _each entry in the ZIP file_ to determine where the actual file data starts. This, for a ZIP archive of 1000 files, will incur 1000 extra HTTP requests - which you might not want to perform upfront, or - at least - not want to perform _at once_. When the option is set to `false`, you will be getting instances of `LazyEntry` instead of `Entry`. Those objects will raise an exception when you attempt to access their compressed data offset in the ZIP (since the reads have not been performed yet). As a rule, this option can be left in it's default setting (`true`) unless you want to _only_ read the central directory, or you need to limit the number of HTTP requests.
1245
+ #
1246
+ # _@return_ — an array of entries within the ZIP being parsed
1247
+ sig { params(io: T.untyped, read_local_headers: T::Boolean).returns(T::Array[ZipEntry]) }
1248
+ def read_zip_structure(io:, read_local_headers: true); end
1249
+
1250
+ # sord duck - #tell looks like a duck type, replacing with untyped
1251
+ # sord duck - #read looks like a duck type, replacing with untyped
1252
+ # sord duck - #seek looks like a duck type, replacing with untyped
1253
+ # sord omit - no YARD return type given, using untyped
1254
+ # Sometimes you might encounter truncated ZIP files, which do not contain
1255
+ # any central directory whatsoever - or where the central directory is
1256
+ # truncated. In that case, employing the technique of reading the ZIP
1257
+ # "from the end" is impossible, and the only recourse is reading each
1258
+ # local file header in sucession. If the entries in such a ZIP use data
1259
+ # descriptors, you would need to scan after the entry until you encounter
1260
+ # the data descriptor signature - and that might be unreliable at best.
1261
+ # Therefore, this reading technique does not support data descriptors.
1262
+ # It can however recover the entries you still can read if these entries
1263
+ # contain all the necessary information about the contained file.
1264
+ #
1265
+ # headers from @return [Array<ZipEntry>] an array of entries that could be
1266
+ # recovered before hitting EOF
1267
+ #
1268
+ # _@param_ `io` — the IO-ish object to read the local file
1269
+ sig { params(io: T.untyped).returns(T.untyped) }
1270
+ def read_zip_straight_ahead(io:); end
1271
+
1272
+ # sord duck - #read looks like a duck type, replacing with untyped
1273
+ # Parse the local header entry and get the offset in the IO at which the
1274
+ # actual compressed data of the file starts within the ZIP.
1275
+ # The method will eager-read the entire local header for the file
1276
+ # (the maximum size the local header may use), starting at the given offset,
1277
+ # and will then compute its size. That size plus the local header offset
1278
+ # given will be the compressed data offset of the entry (read starting at
1279
+ # this offset to get the data).
1280
+ #
1281
+ # the compressed data offset
1282
+ #
1283
+ # _@param_ `io` — an IO-ish object the ZIP file can be read from
1284
+ #
1285
+ # _@return_ — the parsed local header entry and
1286
+ sig { params(io: T.untyped).returns(T::Array[T.any(ZipEntry, Fixnum)]) }
1287
+ def read_local_file_header(io:); end
1288
+
1289
+ # sord duck - #seek looks like a duck type, replacing with untyped
1290
+ # sord duck - #read looks like a duck type, replacing with untyped
1291
+ # sord omit - no YARD return type given, using untyped
1292
+ # Get the offset in the IO at which the actual compressed data of the file
1293
+ # starts within the ZIP. The method will eager-read the entire local header
1294
+ # for the file (the maximum size the local header may use), starting at the
1295
+ # given offset, and will then compute its size. That size plus the local
1296
+ # header offset given will be the compressed data offset of the entry
1297
+ # (read starting at this offset to get the data).
1298
+ #
1299
+ # local file header is supposed to begin @return [Fixnum] absolute offset
1300
+ # (0-based) of where the compressed data begins for this file within the ZIP
1301
+ #
1302
+ # _@param_ `io` — an IO-ish object the ZIP file can be read from
1303
+ #
1304
+ # _@param_ `local_file_header_offset` — absolute offset (0-based) where the
1305
+ sig { params(io: T.untyped, local_file_header_offset: Fixnum).returns(T.untyped) }
1306
+ def get_compressed_data_offset(io:, local_file_header_offset:); end
1307
+
1308
+ # Parse an IO handle to a ZIP archive into an array of Entry objects, reading from the end
1309
+ # of the IO object.
1310
+ #
1311
+ # _@param_ `options` — any options the instance method of the same name accepts
1312
+ #
1313
+ # _@return_ — an array of entries within the ZIP being parsed
1314
+ #
1315
+ # _@see_ `#read_zip_structure`
1316
+ sig { params(options: T::Hash[T.untyped, T.untyped]).returns(T::Array[ZipEntry]) }
1317
+ def self.read_zip_structure(**options); end
1318
+
1319
+ # Parse an IO handle to a ZIP archive into an array of Entry objects, reading from the start of
1320
+ # the file and parsing local file headers one-by-one
1321
+ #
1322
+ # _@param_ `options` — any options the instance method of the same name accepts
1323
+ #
1324
+ # _@return_ — an array of entries within the ZIP being parsed
1325
+ #
1326
+ # _@see_ `#read_zip_straight_ahead`
1327
+ sig { params(options: T::Hash[T.untyped, T.untyped]).returns(T::Array[ZipEntry]) }
1328
+ def self.read_zip_straight_ahead(**options); end
1329
+
1330
+ # sord omit - no YARD type given for "entries", using untyped
1331
+ # sord omit - no YARD type given for "io", using untyped
1332
+ # sord omit - no YARD return type given, using untyped
1333
+ sig { params(entries: T.untyped, io: T.untyped).returns(T.untyped) }
1334
+ def read_local_headers(entries, io); end
1335
+
1336
+ # sord omit - no YARD type given for "io", using untyped
1337
+ # sord omit - no YARD return type given, using untyped
1338
+ sig { params(io: T.untyped).returns(T.untyped) }
1339
+ def skip_ahead_2(io); end
1340
+
1341
+ # sord omit - no YARD type given for "io", using untyped
1342
+ # sord omit - no YARD return type given, using untyped
1343
+ sig { params(io: T.untyped).returns(T.untyped) }
1344
+ def skip_ahead_4(io); end
1345
+
1346
+ # sord omit - no YARD type given for "io", using untyped
1347
+ # sord omit - no YARD return type given, using untyped
1348
+ sig { params(io: T.untyped).returns(T.untyped) }
1349
+ def skip_ahead_8(io); end
1350
+
1351
+ # sord omit - no YARD type given for "io", using untyped
1352
+ # sord omit - no YARD type given for "absolute_pos", using untyped
1353
+ # sord omit - no YARD return type given, using untyped
1354
+ sig { params(io: T.untyped, absolute_pos: T.untyped).returns(T.untyped) }
1355
+ def seek(io, absolute_pos); end
1356
+
1357
+ # sord omit - no YARD type given for "io", using untyped
1358
+ # sord omit - no YARD type given for "signature_magic_number", using untyped
1359
+ # sord omit - no YARD return type given, using untyped
1360
+ sig { params(io: T.untyped, signature_magic_number: T.untyped).returns(T.untyped) }
1361
+ def assert_signature(io, signature_magic_number); end
1362
+
1363
+ # sord omit - no YARD type given for "io", using untyped
1364
+ # sord omit - no YARD type given for "n", using untyped
1365
+ # sord omit - no YARD return type given, using untyped
1366
+ sig { params(io: T.untyped, n: T.untyped).returns(T.untyped) }
1367
+ def skip_ahead_n(io, n); end
1368
+
1369
+ # sord omit - no YARD type given for "io", using untyped
1370
+ # sord omit - no YARD type given for "n_bytes", using untyped
1371
+ # sord omit - no YARD return type given, using untyped
1372
+ sig { params(io: T.untyped, n_bytes: T.untyped).returns(T.untyped) }
1373
+ def read_n(io, n_bytes); end
1374
+
1375
+ # sord omit - no YARD type given for "io", using untyped
1376
+ # sord omit - no YARD return type given, using untyped
1377
+ sig { params(io: T.untyped).returns(T.untyped) }
1378
+ def read_2b(io); end
1379
+
1380
+ # sord omit - no YARD type given for "io", using untyped
1381
+ # sord omit - no YARD return type given, using untyped
1382
+ sig { params(io: T.untyped).returns(T.untyped) }
1383
+ def read_4b(io); end
1384
+
1385
+ # sord omit - no YARD type given for "io", using untyped
1386
+ # sord omit - no YARD return type given, using untyped
1387
+ sig { params(io: T.untyped).returns(T.untyped) }
1388
+ def read_8b(io); end
1389
+
1390
+ # sord omit - no YARD type given for "io", using untyped
1391
+ # sord omit - no YARD return type given, using untyped
1392
+ sig { params(io: T.untyped).returns(T.untyped) }
1393
+ def read_cdir_entry(io); end
1394
+
1395
+ # sord omit - no YARD type given for "file_io", using untyped
1396
+ # sord omit - no YARD type given for "zip_file_size", using untyped
1397
+ # sord omit - no YARD return type given, using untyped
1398
+ sig { params(file_io: T.untyped, zip_file_size: T.untyped).returns(T.untyped) }
1399
+ def get_eocd_offset(file_io, zip_file_size); end
1400
+
1401
+ # sord omit - no YARD type given for "of_substring", using untyped
1402
+ # sord omit - no YARD type given for "in_string", using untyped
1403
+ # sord omit - no YARD return type given, using untyped
1404
+ sig { params(of_substring: T.untyped, in_string: T.untyped).returns(T.untyped) }
1405
+ def all_indices_of_substr_in_str(of_substring, in_string); end
1406
+
1407
+ # sord omit - no YARD type given for "in_str", using untyped
1408
+ # sord omit - no YARD return type given, using untyped
1409
+ # We have to scan the maximum possible number
1410
+ # of bytes that the EOCD can theoretically occupy including the comment after it,
1411
+ # and we have to find a combination of:
1412
+ # [EOCD signature, <some ZIP medatata>, comment byte size, comment of size]
1413
+ # at the end. To do so, we first find all indices of the signature in the trailer
1414
+ # string, and then check whether the bytestring starting at the signature and
1415
+ # ending at the end of string satisfies that given pattern.
1416
+ sig { params(in_str: T.untyped).returns(T.untyped) }
1417
+ def locate_eocd_signature(in_str); end
1418
+
1419
+ # sord omit - no YARD type given for "file_io", using untyped
1420
+ # sord omit - no YARD type given for "eocd_offset", using untyped
1421
+ # sord omit - no YARD return type given, using untyped
1422
+ # Find the Zip64 EOCD locator segment offset. Do this by seeking backwards from the
1423
+ # EOCD record in the archive by fixed offsets
1424
+ # get_zip64_eocd_location is too high. [15.17/15]
1425
+ sig { params(file_io: T.untyped, eocd_offset: T.untyped).returns(T.untyped) }
1426
+ def get_zip64_eocd_location(file_io, eocd_offset); end
1427
+
1428
+ # sord omit - no YARD type given for "io", using untyped
1429
+ # sord omit - no YARD type given for "zip64_end_of_cdir_location", using untyped
1430
+ # sord omit - no YARD return type given, using untyped
1431
+ # num_files_and_central_directory_offset_zip64 is too high. [21.12/15]
1432
+ sig { params(io: T.untyped, zip64_end_of_cdir_location: T.untyped).returns(T.untyped) }
1433
+ def num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location); end
1434
+
1435
+ # sord omit - no YARD type given for "file_io", using untyped
1436
+ # sord omit - no YARD type given for "eocd_offset", using untyped
1437
+ # sord omit - no YARD return type given, using untyped
1438
+ # Start of the central directory offset
1439
+ sig { params(file_io: T.untyped, eocd_offset: T.untyped).returns(T.untyped) }
1440
+ def num_files_and_central_directory_offset(file_io, eocd_offset); end
1441
+
1442
+ # sord omit - no YARD return type given, using untyped
1443
+ # Is provided as a stub to be overridden in a subclass if you need it. Will report
1444
+ # during various stages of reading. The log message is contained in the return value
1445
+ # of `yield` in the method (the log messages are lazy-evaluated).
1446
+ sig { returns(T.untyped) }
1447
+ def log; end
1448
+
1449
+ # sord omit - no YARD type given for "extra_fields_str", using untyped
1450
+ # sord omit - no YARD return type given, using untyped
1451
+ sig { params(extra_fields_str: T.untyped).returns(T.untyped) }
1452
+ def parse_out_extra_fields(extra_fields_str); end
1453
+
1454
+ # Rubocop: convention: Missing top-level class documentation comment.
1455
+ class StoredReader
1456
+ # sord omit - no YARD type given for "from_io", using untyped
1457
+ # sord omit - no YARD type given for "compressed_data_size", using untyped
1458
+ sig { params(from_io: T.untyped, compressed_data_size: T.untyped).void }
1459
+ def initialize(from_io, compressed_data_size); end
1460
+
1461
+ # sord omit - no YARD type given for "n_bytes", using untyped
1462
+ # sord omit - no YARD return type given, using untyped
1463
+ sig { params(n_bytes: T.untyped).returns(T.untyped) }
1464
+ def extract(n_bytes = nil); end
1465
+
1466
+ sig { returns(T::Boolean) }
1467
+ def eof?; end
1468
+ end
1469
+
1470
+ # Rubocop: convention: Missing top-level class documentation comment.
1471
+ class InflatingReader
1472
+ # sord omit - no YARD type given for "from_io", using untyped
1473
+ # sord omit - no YARD type given for "compressed_data_size", using untyped
1474
+ sig { params(from_io: T.untyped, compressed_data_size: T.untyped).void }
1475
+ def initialize(from_io, compressed_data_size); end
1476
+
1477
+ # sord omit - no YARD type given for "n_bytes", using untyped
1478
+ # sord omit - no YARD return type given, using untyped
1479
+ sig { params(n_bytes: T.untyped).returns(T.untyped) }
1480
+ def extract(n_bytes = nil); end
1481
+
1482
+ sig { returns(T::Boolean) }
1483
+ def eof?; end
1484
+ end
1485
+
1486
+ # Represents a file within the ZIP archive being read. This is different from
1487
+ # the Entry object used in Streamer for ZIP writing, since during writing more
1488
+ # data can be kept in memory for immediate use.
1489
+ class ZipEntry
1490
+ # sord omit - no YARD type given for "from_io", using untyped
1491
+ # Returns a reader for the actual compressed data of the entry.
1492
+ #
1493
+ # reader = entry.extractor_from(source_file)
1494
+ # outfile << reader.extract(512 * 1024) until reader.eof?
1495
+ #
1496
+ # _@return_ — the reader for the data
1497
+ sig { params(from_io: T.untyped).returns(T.any(StoredReader, InflatingReader)) }
1498
+ def extractor_from(from_io); end
1499
+
1500
+ # _@return_ — at what offset you should start reading
1501
+ # for the compressed data in your original IO object
1502
+ sig { returns(Fixnum) }
1503
+ def compressed_data_offset; end
1504
+
1505
+ # Tells whether the compressed data offset is already known for this entry
1506
+ sig { returns(T::Boolean) }
1507
+ def known_offset?; end
1508
+
1509
+ # Tells whether the entry uses a data descriptor (this is defined
1510
+ # by bit 3 in the GP flags).
1511
+ sig { returns(T::Boolean) }
1512
+ def uses_data_descriptor?; end
1513
+
1514
+ # sord infer - inferred type of parameter "offset" as Fixnum using getter's return type
1515
+ # sord omit - no YARD return type given, using untyped
1516
+ # Sets the offset at which the compressed data for this file starts in the ZIP.
1517
+ # By default, the value will be set by the Reader for you. If you use delayed
1518
+ # reading, you need to set it by using the `get_compressed_data_offset` on the Reader:
1519
+ #
1520
+ # entry.compressed_data_offset = reader.get_compressed_data_offset(io: file,
1521
+ # local_file_header_offset: entry.local_header_offset)
1522
+ sig { params(offset: Fixnum).returns(T.untyped) }
1523
+ def compressed_data_offset=(offset); end
1524
+
1525
+ # _@return_ — bit-packed version signature of the program that made the archive
1526
+ sig { returns(Fixnum) }
1527
+ attr_accessor :made_by
1528
+
1529
+ # _@return_ — ZIP version support needed to extract this file
1530
+ sig { returns(Fixnum) }
1531
+ attr_accessor :version_needed_to_extract
1532
+
1533
+ # _@return_ — bit-packed general purpose flags
1534
+ sig { returns(Fixnum) }
1535
+ attr_accessor :gp_flags
1536
+
1537
+ # _@return_ — Storage mode (0 for stored, 8 for deflate)
1538
+ sig { returns(Fixnum) }
1539
+ attr_accessor :storage_mode
1540
+
1541
+ # _@return_ — the bit-packed DOS time
1542
+ sig { returns(Fixnum) }
1543
+ attr_accessor :dos_time
1544
+
1545
+ # _@return_ — the bit-packed DOS date
1546
+ sig { returns(Fixnum) }
1547
+ attr_accessor :dos_date
1548
+
1549
+ # _@return_ — the CRC32 checksum of this file
1550
+ sig { returns(Fixnum) }
1551
+ attr_accessor :crc32
1552
+
1553
+ # _@return_ — size of compressed file data in the ZIP
1554
+ sig { returns(Fixnum) }
1555
+ attr_accessor :compressed_size
1556
+
1557
+ # _@return_ — size of the file once uncompressed
1558
+ sig { returns(Fixnum) }
1559
+ attr_accessor :uncompressed_size
1560
+
1561
+ # _@return_ — the filename
1562
+ sig { returns(String) }
1563
+ attr_accessor :filename
1564
+
1565
+ # _@return_ — disk number where this file starts
1566
+ sig { returns(Fixnum) }
1567
+ attr_accessor :disk_number_start
1568
+
1569
+ # _@return_ — internal attributes of the file
1570
+ sig { returns(Fixnum) }
1571
+ attr_accessor :internal_attrs
1572
+
1573
+ # _@return_ — external attributes of the file
1574
+ sig { returns(Fixnum) }
1575
+ attr_accessor :external_attrs
1576
+
1577
+ # _@return_ — at what offset the local file header starts
1578
+ # in your original IO object
1579
+ sig { returns(Fixnum) }
1580
+ attr_accessor :local_file_header_offset
1581
+
1582
+ # _@return_ — the file comment
1583
+ sig { returns(String) }
1584
+ attr_accessor :comment
1585
+ end
1586
+ end
1587
+
1588
+ # Used when you need to supply a destination IO for some
1589
+ # write operations, but want to discard the data (like when
1590
+ # estimating the size of a ZIP)
1591
+ module NullWriter
1592
+ # _@param_ `_` — the data to write
1593
+ sig { params(_: String).returns(T.self_type) }
1594
+ def self.<<(_); end
1595
+ end
1596
+
1597
+ # Alows reading the central directory of a remote ZIP file without
1598
+ # downloading the entire file. The central directory provides the
1599
+ # offsets at which the actual file contents is located. You can then
1600
+ # use the `Range:` HTTP headers to download those entries separately.
1601
+ #
1602
+ # Please read the security warning in `FileReader` _VERY CAREFULLY_
1603
+ # before you use this module.
1604
+ module RemoteUncap
1605
+ # {ZipKit::FileReader} when reading
1606
+ # files within the remote archive
1607
+ #
1608
+ # _@param_ `uri` — the HTTP(S) URL to read the ZIP footer from
1609
+ #
1610
+ # _@param_ `reader_class` — which class to use for reading
1611
+ #
1612
+ # _@param_ `options_for_zip_reader` — any additional options to give to
1613
+ #
1614
+ # _@return_ — metadata about the
1615
+ sig { params(uri: String, reader_class: Class, options_for_zip_reader: T::Hash[T.untyped, T.untyped]).returns(T::Array[ZipKit::FileReader::ZipEntry]) }
1616
+ def self.files_within_zip_at(uri, reader_class: ZipKit::FileReader, **options_for_zip_reader); end
1617
+ end
1618
+
1619
+ # A simple stateful class for keeping track of a CRC32 value through multiple writes
1620
+ class StreamCRC32
1621
+ include ZipKit::WriteShovel
1622
+ STRINGS_HAVE_CAPACITY_SUPPORT = T.let(begin
1623
+ String.new("", capacity: 1)
1624
+ true
1625
+ rescue ArgumentError
1626
+ false
1627
+ end, T.untyped)
1628
+ CRC_BUF_SIZE = T.let(1024 * 512, T.untyped)
1629
+
1630
+ # Compute a CRC32 value from an IO object. The object should respond to `read` and `eof?`
1631
+ #
1632
+ # _@param_ `io` — the IO to read the data from
1633
+ #
1634
+ # _@return_ — the computed CRC32 value
1635
+ sig { params(io: IO).returns(Fixnum) }
1636
+ def self.from_io(io); end
1637
+
1638
+ # Creates a new streaming CRC32 calculator
1639
+ sig { void }
1640
+ def initialize; end
1641
+
1642
+ # Append data to the CRC32. Updates the contained CRC32 value in place.
1643
+ #
1644
+ # _@param_ `blob` — the string to compute the CRC32 from
1645
+ sig { params(blob: String).returns(T.self_type) }
1646
+ def <<(blob); end
1647
+
1648
+ # Returns the CRC32 value computed so far
1649
+ #
1650
+ # _@return_ — the updated CRC32 value for all the blobs so far
1651
+ sig { returns(Fixnum) }
1652
+ def to_i; end
1653
+
1654
+ # Appends a known CRC32 value to the current one, and combines the
1655
+ # contained CRC32 value in-place.
1656
+ #
1657
+ # _@param_ `crc32` — the CRC32 value to append
1658
+ #
1659
+ # _@param_ `blob_size` — the size of the daata the `crc32` is computed from
1660
+ #
1661
+ # _@return_ — the updated CRC32 value for all the blobs so far
1662
+ sig { params(crc32: Fixnum, blob_size: Fixnum).returns(Fixnum) }
1663
+ def append(crc32, blob_size); end
1664
+
1665
+ # Writes the given data to the output stream. Allows the object to be used as
1666
+ # a target for `IO.copy_stream(from, to)`
1667
+ #
1668
+ # _@param_ `bytes` — the binary string to write (part of the uncompressed file)
1669
+ #
1670
+ # _@return_ — the number of bytes written (will always be the bytesize of `bytes`)
1671
+ sig { params(bytes: String).returns(Fixnum) }
1672
+ def write(bytes); end
1673
+ end
1674
+
1675
+ # Some operations (such as CRC32) benefit when they are performed
1676
+ # on larger chunks of data. In certain use cases, it is possible that
1677
+ # the consumer of ZipKit is going to be writing small chunks
1678
+ # in rapid succession, so CRC32 is going to have to perform a lot of
1679
+ # CRC32 combine operations - and this adds up. Since the CRC32 value
1680
+ # is usually not needed until the complete output has completed
1681
+ # we can buffer at least some amount of data before computing CRC32 over it.
1682
+ # We also use this buffer for output via Rack, where some amount of buffering
1683
+ # helps reduce the number of syscalls made by the webserver. ZipKit performs
1684
+ # lots of very small writes, and some degree of speedup (about 20%) can be achieved
1685
+ # with a buffer of a few KB.
1686
+ #
1687
+ # Note that there is no guarantee that the write buffer is going to flush at or above
1688
+ # the given `buffer_size`, because for writes which exceed the buffer size it will
1689
+ # first `flush` and then write through the oversized chunk, without buffering it. This
1690
+ # helps conserve memory. Also note that the buffer will *not* duplicate strings for you
1691
+ # and *will* yield the same buffer String over and over, so if you are storing it in an
1692
+ # Array you might need to duplicate it.
1693
+ #
1694
+ # Note also that the WriteBuffer assumes that the object it `<<`-writes into is going
1695
+ # to **consume** in some way the string that it passes in. After the `<<` method returns,
1696
+ # the WriteBuffer will be cleared, and it passes the same String reference on every call
1697
+ # to `<<`. Therefore, if you need to retain the output of the WriteBuffer in, say, an Array,
1698
+ # you might need to `.dup` the `String` it gives you.
1699
+ class WriteBuffer
1700
+ # sord duck - #<< looks like a duck type, replacing with untyped
1701
+ # Creates a new WriteBuffer bypassing into a given writable object
1702
+ #
1703
+ # _@param_ `writable` — An object that responds to `#<<` with a String as argument
1704
+ #
1705
+ # _@param_ `buffer_size` — How many bytes to buffer
1706
+ sig { params(writable: T.untyped, buffer_size: Integer).void }
1707
+ def initialize(writable, buffer_size); end
1708
+
1709
+ # Appends the given data to the write buffer, and flushes the buffer into the
1710
+ # writable if the buffer size exceeds the `buffer_size` given at initialization
1711
+ #
1712
+ # _@param_ `data` — data to be written
1713
+ #
1714
+ # _@return_ — self
1715
+ sig { params(data: String).returns(T.untyped) }
1716
+ def <<(data); end
1717
+
1718
+ # Explicitly flushes the buffer if it contains anything
1719
+ #
1720
+ # _@return_ — self
1721
+ sig { returns(T.untyped) }
1722
+ def flush; end
1723
+ end
1724
+
1725
+ # A lot of objects in ZipKit accept bytes that may be sent
1726
+ # to the `<<` operator (the "shovel" operator). This is in the tradition
1727
+ # of late Jim Weirich and his Builder gem. In [this presentation](https://youtu.be/1BVFlvRPZVM?t=2403)
1728
+ # he justifies this design very eloquently. In ZipKit we follow this example.
1729
+ # However, there is a number of methods in Ruby - including the standard library -
1730
+ # which expect your object to implement the `write` method instead. Since the `write`
1731
+ # method can be expressed in terms of the `<<` method, why not allow all ZipKit
1732
+ # "IO-ish" things to also respond to `write`? This is what this module does.
1733
+ # Jim would be proud. We miss you, Jim.
1734
+ module WriteShovel
1735
+ # Writes the given data to the output stream. Allows the object to be used as
1736
+ # a target for `IO.copy_stream(from, to)`
1737
+ #
1738
+ # _@param_ `bytes` — the binary string to write (part of the uncompressed file)
1739
+ #
1740
+ # _@return_ — the number of bytes written (will always be the bytesize of `bytes`)
1741
+ sig { params(bytes: String).returns(Fixnum) }
1742
+ def write(bytes); end
1743
+ end
1744
+
1745
+ # Permits Deflate compression in independent blocks. The workflow is as follows:
1746
+ #
1747
+ # * Run every block to compress through deflate_chunk, remove the header,
1748
+ # footer and adler32 from the result
1749
+ # * Write out the compressed block bodies (the ones deflate_chunk returns)
1750
+ # to your output, in sequence
1751
+ # * Write out the footer (\03\00)
1752
+ #
1753
+ # The resulting stream is guaranteed to be handled properly by all zip
1754
+ # unarchiving tools, including the BOMArchiveHelper/ArchiveUtility on OSX.
1755
+ #
1756
+ # You could also build a compressor for Rubyzip using this module quite easily,
1757
+ # even though this is outside the scope of the library.
1758
+ #
1759
+ # When you deflate the chunks separately, you need to write the end marker
1760
+ # yourself (using `write_terminator`).
1761
+ # If you just want to deflate a large IO's contents, use
1762
+ # `deflate_in_blocks_and_terminate` to have the end marker written out for you.
1763
+ #
1764
+ # Basic usage to compress a file in parts:
1765
+ #
1766
+ # source_file = File.open('12_gigs.bin', 'rb')
1767
+ # compressed = Tempfile.new
1768
+ # # Will not compress everything in memory, but do it per chunk to spare
1769
+ # memory. `compressed`
1770
+ # # will be written to at the end of each chunk.
1771
+ # ZipKit::BlockDeflate.deflate_in_blocks_and_terminate(source_file,
1772
+ # compressed)
1773
+ #
1774
+ # You can also do the same to parts that you will later concatenate together
1775
+ # elsewhere, in that case you need to skip the end marker:
1776
+ #
1777
+ # compressed = Tempfile.new
1778
+ # ZipKit::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb),
1779
+ # compressed)
1780
+ # ZipKit::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb),
1781
+ # compressed)
1782
+ # ZipKit::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb),
1783
+ # compressed)
1784
+ # ZipKit::BlockDeflate.write_terminator(compressed)
1785
+ #
1786
+ # You can also elect to just compress strings in memory (to splice them later):
1787
+ #
1788
+ # compressed_string = ZipKit::BlockDeflate.deflate_chunk(big_string)
1789
+ class BlockDeflate
1790
+ DEFAULT_BLOCKSIZE = T.let(1_024 * 1024 * 5, T.untyped)
1791
+ END_MARKER = T.let([3, 0].pack("C*"), T.untyped)
1792
+ VALID_COMPRESSIONS = T.let((Zlib::DEFAULT_COMPRESSION..Zlib::BEST_COMPRESSION).to_a.freeze, T.untyped)
1793
+
1794
+ # Write the end marker (\x3\x0) to the given IO.
1795
+ #
1796
+ # `output_io` can also be a {ZipKit::Streamer} to expedite ops.
1797
+ #
1798
+ # _@param_ `output_io` — the stream to write to (should respond to `:<<`)
1799
+ #
1800
+ # _@return_ — number of bytes written to `output_io`
1801
+ sig { params(output_io: IO).returns(Fixnum) }
1802
+ def self.write_terminator(output_io); end
1803
+
1804
+ # Compress a given binary string and flush the deflate stream at byte boundary.
1805
+ # The returned string can be spliced into another deflate stream.
1806
+ #
1807
+ # _@param_ `bytes` — Bytes to compress
1808
+ #
1809
+ # _@param_ `level` — Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
1810
+ #
1811
+ # _@return_ — compressed bytes
1812
+ sig { params(bytes: String, level: Fixnum).returns(String) }
1813
+ def self.deflate_chunk(bytes, level: Zlib::DEFAULT_COMPRESSION); end
1814
+
1815
+ # Compress the contents of input_io into output_io, in blocks
1816
+ # of block_size. Aligns the parts so that they can be concatenated later.
1817
+ # Writes deflate end marker (\x3\x0) into `output_io` as the final step, so
1818
+ # the contents of `output_io` can be spliced verbatim into a ZIP archive.
1819
+ #
1820
+ # Once the write completes, no more parts for concatenation should be written to
1821
+ # the same stream.
1822
+ #
1823
+ # `output_io` can also be a {ZipKit::Streamer} to expedite ops.
1824
+ #
1825
+ # _@param_ `input_io` — the stream to read from (should respond to `:read`)
1826
+ #
1827
+ # _@param_ `output_io` — the stream to write to (should respond to `:<<`)
1828
+ #
1829
+ # _@param_ `level` — Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
1830
+ #
1831
+ # _@param_ `block_size` — The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
1832
+ #
1833
+ # _@return_ — number of bytes written to `output_io`
1834
+ sig do
1835
+ params(
1836
+ input_io: IO,
1837
+ output_io: IO,
1838
+ level: Fixnum,
1839
+ block_size: Fixnum
1840
+ ).returns(Fixnum)
1841
+ end
1842
+ def self.deflate_in_blocks_and_terminate(input_io, output_io, level: Zlib::DEFAULT_COMPRESSION, block_size: DEFAULT_BLOCKSIZE); end
1843
+
1844
+ # Compress the contents of input_io into output_io, in blocks
1845
+ # of block_size. Align the parts so that they can be concatenated later.
1846
+ # Will not write the deflate end marker (\x3\x0) so more parts can be written
1847
+ # later and succesfully read back in provided the end marker wll be written.
1848
+ #
1849
+ # `output_io` can also be a {ZipKit::Streamer} to expedite ops.
1850
+ #
1851
+ # _@param_ `input_io` — the stream to read from (should respond to `:read`)
1852
+ #
1853
+ # _@param_ `output_io` — the stream to write to (should respond to `:<<`)
1854
+ #
1855
+ # _@param_ `level` — Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
1856
+ #
1857
+ # _@param_ `block_size` — The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
1858
+ #
1859
+ # _@return_ — number of bytes written to `output_io`
1860
+ sig do
1861
+ params(
1862
+ input_io: IO,
1863
+ output_io: IO,
1864
+ level: Fixnum,
1865
+ block_size: Fixnum
1866
+ ).returns(Fixnum)
1867
+ end
1868
+ def self.deflate_in_blocks(input_io, output_io, level: Zlib::DEFAULT_COMPRESSION, block_size: DEFAULT_BLOCKSIZE); end
1869
+ end
1870
+
1871
+ # Helps to estimate archive sizes
1872
+ class SizeEstimator
1873
+ # Creates a new estimator with a Streamer object. Normally you should use
1874
+ # `estimate` instead an not use this method directly.
1875
+ #
1876
+ # _@param_ `streamer`
1877
+ sig { params(streamer: ZipKit::Streamer).void }
1878
+ def initialize(streamer); end
1879
+
1880
+ # Performs the estimate using fake archiving. It needs to know the sizes of the
1881
+ # entries upfront. Usage:
1882
+ #
1883
+ # expected_zip_size = SizeEstimator.estimate do | estimator |
1884
+ # estimator.add_stored_entry(filename: "file.doc", size: 898291)
1885
+ # estimator.add_deflated_entry(filename: "family.tif",
1886
+ # uncompressed_size: 89281911, compressed_size: 121908)
1887
+ # end
1888
+ #
1889
+ # _@param_ `kwargs_for_streamer_new` — Any options to pass to Streamer, see {Streamer#initialize}
1890
+ #
1891
+ # _@return_ — the size of the resulting archive, in bytes
1892
+ sig { params(kwargs_for_streamer_new: T.untyped, blk: T.proc.params(the: SizeEstimator).void).returns(Integer) }
1893
+ def self.estimate(**kwargs_for_streamer_new, &blk); end
1894
+
1895
+ # Add a fake entry to the archive, to see how big it is going to be in the end.
1896
+ #
1897
+ # data descriptor to specify size
1898
+ #
1899
+ # _@param_ `filename` — the name of the file (filenames are variable-width in the ZIP)
1900
+ #
1901
+ # _@param_ `size` — size of the uncompressed entry
1902
+ #
1903
+ # _@param_ `use_data_descriptor` — whether the entry uses a postfix
1904
+ #
1905
+ # _@return_ — self
1906
+ sig { params(filename: String, size: Fixnum, use_data_descriptor: T::Boolean).returns(T.untyped) }
1907
+ def add_stored_entry(filename:, size:, use_data_descriptor: false); end
1908
+
1909
+ # Add a fake entry to the archive, to see how big it is going to be in the end.
1910
+ #
1911
+ # _@param_ `filename` — the name of the file (filenames are variable-width in the ZIP)
1912
+ #
1913
+ # _@param_ `uncompressed_size` — size of the uncompressed entry
1914
+ #
1915
+ # _@param_ `compressed_size` — size of the compressed entry
1916
+ #
1917
+ # _@param_ `use_data_descriptor` — whether the entry uses a postfix data descriptor to specify size
1918
+ #
1919
+ # _@return_ — self
1920
+ sig do
1921
+ params(
1922
+ filename: String,
1923
+ uncompressed_size: Fixnum,
1924
+ compressed_size: Fixnum,
1925
+ use_data_descriptor: T::Boolean
1926
+ ).returns(T.untyped)
1927
+ end
1928
+ def add_deflated_entry(filename:, uncompressed_size:, compressed_size:, use_data_descriptor: false); end
1929
+
1930
+ # Add an empty directory to the archive.
1931
+ #
1932
+ # _@param_ `dirname` — the name of the directory
1933
+ #
1934
+ # _@return_ — self
1935
+ sig { params(dirname: String).returns(T.untyped) }
1936
+ def add_empty_directory_entry(dirname:); end
1937
+ end
1938
+
1939
+ # A tiny wrapper over any object that supports :<<.
1940
+ # Adds :tell and :advance_position_by. This is needed for write destinations
1941
+ # which do not respond to `#pos` or `#tell`. A lot of ZIP archive format parts
1942
+ # include "offsets in archive" - a byte offset from the start of file. Keeping
1943
+ # track of this value is what this object will do. It also allows "advancing"
1944
+ # this value if data gets written using a bypass (such as `IO#sendfile`)
1945
+ class WriteAndTell
1946
+ include ZipKit::WriteShovel
1947
+
1948
+ # sord omit - no YARD type given for "io", using untyped
1949
+ sig { params(io: T.untyped).void }
1950
+ def initialize(io); end
1951
+
1952
+ # sord omit - no YARD type given for "bytes", using untyped
1953
+ # sord omit - no YARD return type given, using untyped
1954
+ sig { params(bytes: T.untyped).returns(T.untyped) }
1955
+ def <<(bytes); end
1956
+
1957
+ # sord omit - no YARD type given for "num_bytes", using untyped
1958
+ # sord omit - no YARD return type given, using untyped
1959
+ sig { params(num_bytes: T.untyped).returns(T.untyped) }
1960
+ def advance_position_by(num_bytes); end
1961
+
1962
+ # sord omit - no YARD return type given, using untyped
1963
+ sig { returns(T.untyped) }
1964
+ def tell; end
1965
+
1966
+ # Writes the given data to the output stream. Allows the object to be used as
1967
+ # a target for `IO.copy_stream(from, to)`
1968
+ #
1969
+ # _@param_ `bytes` — the binary string to write (part of the uncompressed file)
1970
+ #
1971
+ # _@return_ — the number of bytes written (will always be the bytesize of `bytes`)
1972
+ sig { params(bytes: String).returns(Fixnum) }
1973
+ def write(bytes); end
1974
+ end
1975
+
1976
+ # Should be included into a Rails controller for easy ZIP output from any action.
1977
+ module RailsStreaming
1978
+ # Opens a {ZipKit::Streamer} and yields it to the caller. The output of the streamer
1979
+ # gets automatically forwarded to the Rails response stream. When the output completes,
1980
+ # the Rails response stream is going to be closed automatically.
1981
+ #
1982
+ # _@param_ `filename` — name of the file for the Content-Disposition header
1983
+ #
1984
+ # _@param_ `type` — the content type (MIME type) of the archive being output
1985
+ #
1986
+ # _@param_ `use_chunked_transfer_encoding` — whether to forcibly encode output as chunked. Normally you should not need this.
1987
+ #
1988
+ # _@param_ `zip_streamer_options` — options that will be passed to the Streamer. See {ZipKit::Streamer#initialize} for the full list of options.
1989
+ #
1990
+ # _@return_ — The output enumerator assigned to the response body
1991
+ sig do
1992
+ params(
1993
+ filename: String,
1994
+ type: String,
1995
+ use_chunked_transfer_encoding: T::Boolean,
1996
+ zip_streamer_options: T::Hash[T.untyped, T.untyped],
1997
+ zip_streaming_blk: T.proc.params(the: ZipKit::Streamer).void
1998
+ ).returns(ZipKit::OutputEnumerator)
1999
+ end
2000
+ def zip_kit_stream(filename: "download.zip", type: "application/zip", use_chunked_transfer_encoding: false, **zip_streamer_options, &zip_streaming_blk); end
2001
+ end
2002
+
2003
+ # The output enumerator makes it possible to "pull" from a ZipKit streamer
2004
+ # object instead of having it "push" writes to you. It will "stash" the block which
2005
+ # writes the ZIP archive through the streamer, and when you call `each` on the Enumerator
2006
+ # it will yield you the bytes the block writes. Since it is an enumerator you can
2007
+ # use `next` to take chunks written by the ZipKit streamer one by one. It can be very
2008
+ # convenient when you need to segment your ZIP output into bigger chunks for, say,
2009
+ # uploading them to a cloud storage provider such as S3.
2010
+ #
2011
+ # Another use of the `OutputEnumerator` is as a Rack response body - since a Rack
2012
+ # response body object must support `#each` yielding successive binary strings.
2013
+ # Which is exactly what `OutputEnumerator` does.
2014
+ #
2015
+ # The enumerator can provide you some more conveinences for HTTP output - correct streaming
2016
+ # headers and a body with chunked transfer encoding.
2017
+ #
2018
+ # iterable_zip_body = ZipKit::OutputEnumerator.new do | streamer |
2019
+ # streamer.write_file('big.csv') do |sink|
2020
+ # CSV(sink) do |csv_writer|
2021
+ # csv_writer << Person.column_names
2022
+ # Person.all.find_each do |person|
2023
+ # csv_writer << person.attributes.values
2024
+ # end
2025
+ # end
2026
+ # end
2027
+ # end
2028
+ #
2029
+ # You can grab the headers one usually needs for streaming from `#streaming_http_headers`:
2030
+ #
2031
+ # [200, iterable_zip_body.streaming_http_headers, iterable_zip_body]
2032
+ #
2033
+ # to bypass things like `Rack::ETag` and the nginx buffering.
2034
+ class OutputEnumerator
2035
+ DEFAULT_WRITE_BUFFER_SIZE = T.let(64 * 1024, T.untyped)
2036
+
2037
+ # Creates a new OutputEnumerator enumerator. The enumerator can be read from using `each`,
2038
+ # and the creation of the ZIP is in lockstep with the caller calling `each` on the returned
2039
+ # output enumerator object. This can be used when the calling program wants to stream the
2040
+ # output of the ZIP archive and throttle that output, or split it into chunks, or use it
2041
+ # as a generator.
2042
+ #
2043
+ # For example:
2044
+ #
2045
+ # # The block given to {output_enum} won't be executed immediately - rather it
2046
+ # # will only start to execute when the caller starts to read from the output
2047
+ # # by calling `each`
2048
+ # body = ::ZipKit::OutputEnumerator.new(writer: CustomWriter) do |streamer|
2049
+ # streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
2050
+ # streamer << large_file.read(1024*1024) until large_file.eof?
2051
+ # ...
2052
+ # end
2053
+ #
2054
+ # body.each do |bin_string|
2055
+ # # Send the output somewhere, buffer it in a file etc.
2056
+ # # The block passed into `initialize` will only start executing once `#each`
2057
+ # # is called
2058
+ # ...
2059
+ # end
2060
+ #
2061
+ # _@param_ `streamer_options` — options for Streamer, see {ZipKit::Streamer.new}
2062
+ #
2063
+ # _@param_ `write_buffer_size` — By default all ZipKit writes are unbuffered. For output to sockets it is beneficial to bulkify those writes so that they are roughly sized to a socket buffer chunk. This object will bulkify writes for you in this way (so `each` will yield not on every call to `<<` from the Streamer but at block size boundaries or greater). Set the parameter to 0 for unbuffered writes.
2064
+ #
2065
+ # _@param_ `blk` — a block that will receive the Streamer object when executing. The block will not be executed immediately but only once `each` is called on the OutputEnumerator
2066
+ sig { params(write_buffer_size: Integer, streamer_options: T::Hash[T.untyped, T.untyped], blk: T.untyped).void }
2067
+ def initialize(write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE, **streamer_options, &blk); end
2068
+
2069
+ # sord omit - no YARD return type given, using untyped
2070
+ # Executes the block given to the constructor with a {ZipKit::Streamer}
2071
+ # and passes each written chunk to the block given to the method. This allows one
2072
+ # to "take" output of the ZIP piecewise. If called without a block will return an Enumerator
2073
+ # that you can pull data from using `next`.
2074
+ #
2075
+ # **NOTE** Because the `WriteBuffer` inside this object can reuse the buffer, it is important
2076
+ # that the `String` that is yielded **either** gets consumed eagerly (written byte-by-byte somewhere, or `#dup`-ed)
2077
+ # since the write buffer will clear it after your block returns. If you expand this Enumerator
2078
+ # eagerly into an Array you might notice that a lot of the segments of your ZIP output are
2079
+ # empty - this means that you need to duplicate them.
2080
+ sig { returns(T.untyped) }
2081
+ def each; end
2082
+
2083
+ # Returns a Hash of HTTP response headers you are likely to need to have your response stream correctly.
2084
+ # This is on the {ZipKit::OutputEnumerator} class since those headers are common, independent of the
2085
+ # particular response body getting served. You might want to override the headers with your particular
2086
+ # ones - for example, specific content types are needed for files which are, technically, ZIP files
2087
+ # but are of a file format built "on top" of ZIPs - such as ODTs, [pkpass files](https://developer.apple.com/documentation/walletpasses/building_a_pass)
2088
+ # and ePubs.
2089
+ sig { returns(T::Hash[T.untyped, T.untyped]) }
2090
+ def self.streaming_http_headers; end
2091
+
2092
+ # Returns a Hash of HTTP response headers for this particular response. This used to contain "Content-Length" for
2093
+ # presized responses, but is now effectively a no-op.
2094
+ #
2095
+ # _@see_ `[ZipKit::OutputEnumerator.streaming_http_headers]`
2096
+ sig { returns(T::Hash[T.untyped, T.untyped]) }
2097
+ def streaming_http_headers; end
2098
+
2099
+ # Returns a tuple of `headers, body` - headers are a `Hash` and the body is
2100
+ # an object that can be used as a Rack response body. This method used to accept arguments
2101
+ # but will now just ignore them.
2102
+ sig { returns(T::Array[T.untyped]) }
2103
+ def to_headers_and_rack_response_body; end
2104
+ end
2105
+
2106
+ # A body wrapper that emits chunked responses, creating valid
2107
+ # Transfer-Encoding::Chunked HTTP response body. This is copied from Rack::Chunked::Body,
2108
+ # because Rack is not going to include that class after version 3.x
2109
+ # Rails has a substitute class for this inside ActionController::Streaming,
2110
+ # but that module is a private constant in the Rails codebase, and is thus
2111
+ # considered "private" from the Rails standpoint. It is not that much code to
2112
+ # carry, so we copy it into our code.
2113
+ class RackChunkedBody
2114
+ TERM = T.let("\r\n", T.untyped)
2115
+ TAIL = T.let("0#{TERM}", T.untyped)
2116
+
2117
+ # sord duck - #each looks like a duck type, replacing with untyped
2118
+ # _@param_ `body` — the enumerable that yields bytes, usually a `OutputEnumerator`
2119
+ sig { params(body: T.untyped).void }
2120
+ def initialize(body); end
2121
+
2122
+ # sord omit - no YARD return type given, using untyped
2123
+ # For each string yielded by the response body, yield
2124
+ # the element in chunked encoding - and finish off with a terminator
2125
+ sig { returns(T.untyped) }
2126
+ def each; end
2127
+ end
2128
+
2129
+ module UniquifyFilename
2130
+ # sord duck - #include? looks like a duck type, replacing with untyped
2131
+ # Makes a given filename unique by appending a (n) suffix
2132
+ # between just before the filename extension. So "file.txt" gets
2133
+ # transformed into "file (1).txt". The transformation is applied
2134
+ # repeatedly as long as the generated filename is present
2135
+ # in `while_included_in` object
2136
+ #
2137
+ # _@param_ `path` — the path to make unique
2138
+ #
2139
+ # _@param_ `while_included_in` — an object that stores the list of already used paths
2140
+ #
2141
+ # _@return_ — the path as is, or with the suffix required to make it unique
2142
+ sig { params(path: String, while_included_in: T.untyped).returns(String) }
2143
+ def self.call(path, while_included_in); end
2144
+ end
2145
+
2146
+ # Contains a file handle which can be closed once the response finishes sending.
2147
+ # It supports `to_path` so that `Rack::Sendfile` can intercept it.
2148
+ # This class is deprecated and is going to be removed in zip_kit 7.x
2149
+ # @api deprecated
2150
+ class RackTempfileBody
2151
+ TEMPFILE_NAME_PREFIX = T.let("zip-tricks-tf-body-", T.untyped)
2152
+
2153
+ # sord omit - no YARD type given for "env", using untyped
2154
+ # sord duck - #each looks like a duck type, replacing with untyped
2155
+ # _@param_ `body` — the enumerable that yields bytes, usually a `OutputEnumerator`. The `body` will be read in full immediately and closed.
2156
+ sig { params(env: T.untyped, body: T.untyped).void }
2157
+ def initialize(env, body); end
2158
+
2159
+ # Returns the size of the contained `Tempfile` so that a correct
2160
+ # Content-Length header can be set
2161
+ sig { returns(Integer) }
2162
+ def size; end
2163
+
2164
+ # Returns the path to the `Tempfile`, so that Rack::Sendfile can send this response
2165
+ # using the downstream webserver
2166
+ sig { returns(String) }
2167
+ def to_path; end
2168
+
2169
+ # Stream the file's contents if `Rack::Sendfile` isn't present.
2170
+ sig { void }
2171
+ def each; end
2172
+
2173
+ # sord omit - no YARD return type given, using untyped
2174
+ sig { returns(T.untyped) }
2175
+ def flush; end
2176
+
2177
+ # sord omit - no YARD type given for :tempfile, using untyped
2178
+ sig { returns(T.untyped) }
2179
+ attr_reader :tempfile
2180
+ end
2181
+ end