zip_kit 6.3.0 → 6.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +31 -8
- data/CHANGELOG.md +9 -0
- data/IMPLEMENTATION_DETAILS.md +9 -35
- data/README.md +25 -11
- data/RUBYZIP_DIFFERENCES.md +1 -4
- data/lib/zip_kit/block_deflate.rb +8 -8
- data/lib/zip_kit/file_reader.rb +17 -17
- data/lib/zip_kit/output_enumerator.rb +11 -0
- data/lib/zip_kit/rails_streaming.rb +1 -1
- data/lib/zip_kit/railtie.rb +3 -1
- data/lib/zip_kit/remote_io.rb +4 -4
- data/lib/zip_kit/size_estimator.rb +14 -8
- data/lib/zip_kit/stream_crc32.rb +5 -5
- data/lib/zip_kit/streamer/heuristic.rb +8 -0
- data/lib/zip_kit/streamer.rb +23 -5
- data/lib/zip_kit/version.rb +1 -1
- data/lib/zip_kit/write_shovel.rb +1 -1
- data/lib/zip_kit/zip_writer.rb +20 -20
- data/rbi/zip_kit.rbi +75 -66
- data/zip_kit.gemspec +2 -0
- metadata +31 -9
- data/.document +0 -5
- data/.rspec +0 -1
- data/bench/buffered_crc32_bench.rb +0 -109
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ff5f4284066004d435d36f28020e6ef2279fe0abe36c520436db3bd39d7608a
|
4
|
+
data.tar.gz: ac9f9c4312c632410cee6ffb98c2ec471f9de16e5260793a30df8ac28639218d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34389f0a2d38a532af341c7694fcd10c2bbfb2f819ce9d1168afd51fbfc815b8c4ef0b36929f286f3ad31ead98479b07b871ec35d1d8f5adefbf2bb2717f3f58
|
7
|
+
data.tar.gz: 821151643cc5adafd9fe3446412e9d9de60ff049f6bab3ca1e9a7c0dc24dbcef0688b8bc0ab1fa51d88feb85892b3707ccbf8483e41c532680c2e9ddb073e823
|
data/.github/workflows/ci.yml
CHANGED
@@ -7,23 +7,46 @@ env:
|
|
7
7
|
BUNDLE_PATH: vendor/bundle
|
8
8
|
|
9
9
|
jobs:
|
10
|
-
|
11
|
-
name: Tests
|
10
|
+
test_baseline_ruby:
|
11
|
+
name: "Tests (Ruby 2.6 baseline)"
|
12
12
|
runs-on: ubuntu-22.04
|
13
|
-
strategy:
|
14
|
-
matrix:
|
15
|
-
ruby:
|
16
|
-
- '2.6'
|
17
|
-
- '3.2'
|
18
13
|
steps:
|
19
14
|
- name: Checkout
|
20
15
|
uses: actions/checkout@v4
|
21
16
|
- name: Setup Ruby
|
22
17
|
uses: ruby/setup-ruby@v1
|
23
18
|
with:
|
24
|
-
ruby-version:
|
19
|
+
ruby-version: '2.6'
|
25
20
|
bundler-cache: true
|
26
21
|
- name: "Tests"
|
27
22
|
run: bundle exec rspec --backtrace --fail-fast
|
23
|
+
|
24
|
+
test_newest_ruby:
|
25
|
+
name: "Tests (Ruby 3.4 with frozen string literals)"
|
26
|
+
runs-on: ubuntu-22.04
|
27
|
+
steps:
|
28
|
+
- name: Checkout
|
29
|
+
uses: actions/checkout@v4
|
30
|
+
- name: Setup Ruby
|
31
|
+
uses: ruby/setup-ruby@v1
|
32
|
+
with:
|
33
|
+
ruby-version: '3.4.1'
|
34
|
+
bundler-cache: true
|
35
|
+
- name: "Tests" # Make the test suite hard-crash on frozen string literal violations
|
36
|
+
env:
|
37
|
+
RUBYOPT: "--enable=frozen-string-literal --debug=frozen-string-literal"
|
38
|
+
run: "bundle exec rspec --backtrace --fail-fast"
|
39
|
+
|
40
|
+
lint_baseline_ruby: # We need to use syntax appropriate for the minimum supported Ruby version
|
41
|
+
name: Lint (Ruby 2.6 syntax)
|
42
|
+
runs-on: ubuntu-22.04
|
43
|
+
steps:
|
44
|
+
- name: Checkout
|
45
|
+
uses: actions/checkout@v4
|
46
|
+
- name: Setup Ruby
|
47
|
+
uses: ruby/setup-ruby@v1
|
48
|
+
with:
|
49
|
+
ruby-version: '2.6'
|
50
|
+
bundler-cache: true
|
28
51
|
- name: "Lint"
|
29
52
|
run: bundle exec rake standard
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 6.3.2
|
2
|
+
|
3
|
+
* Make sure `rollback!` correctly works with `write_file` and the original exception gets re-raised from `write_file` if
|
4
|
+
closing the current entry happens in `Writable#close`
|
5
|
+
|
6
|
+
## 6.3.1
|
7
|
+
|
8
|
+
* Include `RailsStreaming` in a Rails loader callback, so that ActionController does not need to be in the namespace.
|
9
|
+
|
1
10
|
## 6.3.0
|
2
11
|
|
3
12
|
* Include `RailsStreaming` automatically via a Railtie. It is not really necessary to force people to manage it manually.
|
data/IMPLEMENTATION_DETAILS.md
CHANGED
@@ -5,6 +5,7 @@ The ZipKit streaming implementation is designed around the following requirement
|
|
5
5
|
* Only ahead-writes (no IO seek or rewind)
|
6
6
|
* Automatic switching to Zip64 as the files get written (no IO seeks), but not requiring Zip64 support if the archive can do without
|
7
7
|
* Make use of the fact that CRC32 checksums and the sizes of the files (compressed _and_ uncompressed) are known upfront
|
8
|
+
* Make it possible to output "sparse" ZIP archives (manifests that can be resolved into a ZIP via edge includes)
|
8
9
|
|
9
10
|
It strives to be compatible with the following unzip programs _at the minimum:_
|
10
11
|
|
@@ -14,9 +15,6 @@ It strives to be compatible with the following unzip programs _at the minimum:_
|
|
14
15
|
* Windows 7 - 7Zip 9.20
|
15
16
|
|
16
17
|
Below is the list of _specific_ decisions taken when writing the implementation, with an explanation for each.
|
17
|
-
We specifically _omit_ a number of things that we could do, but that are not necessary to satisfy our objectives.
|
18
|
-
The omissions are _intentional_ since we do not want to have things of which we _assume_ they work, or have things
|
19
|
-
that work only for one obscure unarchiver in one obscure case (like WinRAR with chinese filenames).
|
20
18
|
|
21
19
|
## Data descriptors (postfix CRC32/file sizes)
|
22
20
|
|
@@ -53,38 +51,14 @@ field, any other extra fields should come after.
|
|
53
51
|
|
54
52
|
If a diacritic-containing character (such as å) does fit into the DOS-437
|
55
53
|
codepage, it should be encodable as such. This would, in theory, let older Windows tools
|
56
|
-
decode the filename correctly. However, this
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
it will (very rightfully) ask us how we should decode each filename that does not have the EFS bit,
|
65
|
-
but does contain something non-ASCII-decodable. This is horrible UX for users.
|
66
|
-
|
67
|
-
So, basically, we have 2 choices, for filenames containing diacritics (for bona-fide UTF-8 you do not
|
68
|
-
even get those choices, you _have_ to use UTF-8):
|
69
|
-
|
70
|
-
* Make life easier for Windows users by setting stuff to DOS, not care about the standard _and_ make
|
71
|
-
most of Mac users upset
|
72
|
-
* Make life easy for Mac users and conform to the standard, and tell Windows users to get a _decent_
|
73
|
-
ZIP unarchiving tool.
|
74
|
-
|
75
|
-
We are going with option 2, and this is well-thought-out. Trust me. If you want the crazytown
|
76
|
-
filename encoding scheme that is described here http://stackoverflow.com/questions/13261347
|
77
|
-
you can try this:
|
78
|
-
|
79
|
-
[Encoding::CP437, Encoding::ISO_8859_1, Encoding::UTF_8]
|
80
|
-
|
81
|
-
While this could work, we found it to be broken in practice as the decoding of the filename
|
82
|
-
also depends on the system locale.
|
83
|
-
|
84
|
-
Additionally, the tests with the unarchivers we _do_ support have shown that including the InfoZIP
|
85
|
-
extra field does not actually help any of them recognize the file name correctly. And the use of
|
86
|
-
those fields for the UTF-8 filename, per spec, tells us we should not set the EFS bit - which ruins
|
87
|
-
the unarchiving for all other solutions. As any other, this decision may be changed in the future.
|
54
|
+
decode the filename correctly. However, this only works under the following circumstances:
|
55
|
+
|
56
|
+
* All the filenames in the archive are within the same "super-ASCII" encoding
|
57
|
+
* The Windows locale on the computer opening the archive is set to the same locale as the filename in the archive
|
58
|
+
|
59
|
+
A better approach is to use the EFS flag, which we enable when a filename does not encode cleanly
|
60
|
+
into base ASCII. The extended filename extra field did not work well for us - and it does not
|
61
|
+
combine correctly with the EFS flag.
|
88
62
|
|
89
63
|
There are some interesting notes about the Info-ZIP/EFS combination here
|
90
64
|
https://commons.apache.org/proper/commons-compress/zip.html
|
data/README.md
CHANGED
@@ -5,23 +5,38 @@
|
|
5
5
|
|
6
6
|
Allows streaming, non-rewinding ZIP file output from Ruby.
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
to develop zip_tricks and for sharing it with the community.
|
8
|
+
> [!IMPORTANT]
|
9
|
+
> `zip_kit` is a successor to and continuation of [zip_tricks.](https://github.com/WeTransfer/zip_tricks)
|
10
|
+
> I am grateful to WeTransfer for allowing me to develop zip_tricks and for sharing it with the community.
|
11
11
|
|
12
12
|
Allows you to write a ZIP archive out to a `File`, `Socket`, `String` or `Array` without having to rewind it at any
|
13
13
|
point. Usable for creating very large ZIP archives for immediate sending out to clients, or for writing
|
14
14
|
large ZIP archives without memory inflation.
|
15
15
|
|
16
|
-
The
|
17
|
-
|
16
|
+
The gem handled all the zipping needs for WeTransfer for half a decade, with hundreds of millions
|
17
|
+
of correct ZIP files generated. It is compatible with most end-user applications for opening archives.
|
18
|
+
|
19
|
+
The files output with zip_kit will be valid [OCF containers](https://www.w3.org/TR/epub-33/#sec-container-zip),
|
20
|
+
the library can be used to generate JAR files, EPUBs, OpenOffice/Office documents etc.
|
18
21
|
|
19
22
|
## How does it work? How is it different from Rubyzip?
|
20
23
|
|
24
|
+
zip_kit outputs the metadata of the ZIP file as it becomes available. Same for the content of the ZIP
|
25
|
+
entries. This allows nearly-unbuffered, streaming output. When reading ZIP files, zip_kit only reads
|
26
|
+
the metadata and does so in an accelerated, efficient way - permitting ZIP unarchiving directly from
|
27
|
+
a resource on HTTP (provided that the server supports HTTP ranges).
|
28
|
+
|
21
29
|
Check out [the implementation details](IMPLEMENTATION_DETAILS.md) on the design of the library, and
|
22
30
|
we have a separate [reference](RUBYZIP_DIFFERENCES.md) on why you might want to use ZipKit over
|
23
31
|
Rubyzip and vice versa.
|
24
32
|
|
33
|
+
## Migrating from zip_tricks
|
34
|
+
|
35
|
+
If you want to migrate your code from zip_tricks to zip_kit, all you need to do is a blanket replacement in your code.
|
36
|
+
Swap out the `ZipTricks` constant for `ZipKit` and you should be in business. All of the API available in ZipTricks 5.x
|
37
|
+
still works as of ZipKit 6.x and will stay working. If something in your project still depends on zip_tricks you can use
|
38
|
+
both gems inside of the same "apex" project - there will be no conflicts.
|
39
|
+
|
25
40
|
## Requirements
|
26
41
|
|
27
42
|
Ruby 2.6+ syntax support is required, as well as a a working zlib (all available to jRuby as well).
|
@@ -60,9 +75,8 @@ If you want some more conveniences you can also use [zipline](https://github.com
|
|
60
75
|
will automatically process and stream attachments (Carrierwave, Shrine, ActiveStorage) and remote objects
|
61
76
|
via HTTP.
|
62
77
|
|
63
|
-
`
|
64
|
-
and will stream without it.
|
65
|
-
together with `Live` just fine if you need to.
|
78
|
+
`zip_kit_stream` does *not* require [ActionController::Live](https://api.rubyonrails.org/classes/ActionController/Live.html)
|
79
|
+
and will stream without it. It will work inside `Live` controllers just fine though.
|
66
80
|
|
67
81
|
## Writing into streaming destinations
|
68
82
|
|
@@ -128,10 +142,10 @@ output direct to STDOUT (so that you can run `$ ruby archive.rb > file.zip` in y
|
|
128
142
|
|
129
143
|
```ruby
|
130
144
|
ZipKit::Streamer.open($stdout) do |zip|
|
131
|
-
zip.write_file('mov.mp4
|
145
|
+
zip.write_file('mov.mp4') do |sink| # Will use "stored" mode
|
132
146
|
File.open('mov.mp4', 'rb'){|source| IO.copy_stream(source, sink) }
|
133
147
|
end
|
134
|
-
zip.write_file('long-novel.txt') do |sink|
|
148
|
+
zip.write_file('long-novel.txt') do |sink| # Will use "deflated" mode
|
135
149
|
File.open('novel.txt', 'rb'){|source| IO.copy_stream(source, sink) }
|
136
150
|
end
|
137
151
|
end
|
@@ -166,7 +180,7 @@ end
|
|
166
180
|
Sending a file with data descriptors is not always desirable - you don't really know how large your ZIP is going to be.
|
167
181
|
If you want to present your users with proper download progress, you would need to set a `Content-Length` header - and
|
168
182
|
know ahead of time how large your download is going to be. This can be done with ZipKit, provided you know how large
|
169
|
-
the compressed versions of your file are going to be. Use the
|
183
|
+
the compressed versions of your file are going to be. Use the `ZipKit::SizeEstimator` to do the pre-calculation - it
|
170
184
|
is not going to produce any large amounts of output, and will give you a to-the-byte value for your future archive:
|
171
185
|
|
172
186
|
```ruby
|
data/RUBYZIP_DIFFERENCES.md
CHANGED
@@ -16,10 +16,6 @@ differences in supported features which may be important for you when choosing.
|
|
16
16
|
and Zip64, and is economical enough to enable "remote uncapping" where pieces of a ZIP file get read over HTTP to reconstruct
|
17
17
|
the archive structure. Actual reading can then be done on a per-entry basis. Rubyzip reads entry data from local entries, which
|
18
18
|
is error prone and much less economical than using the central directory
|
19
|
-
* ZipKit deliberately _does not_ allow you to crawl directories to add to an archive, as this has been used for security exploits
|
20
|
-
in Rubyzip.
|
21
|
-
* ZipKit deliberately _does not_ allow you to extract a ZIP archive directly to the filesystem, as this has been used for security
|
22
|
-
exploits in Rubyzip.
|
23
19
|
* When writing, ZipKit applies careful buffering to speed up CRC32 calculations. Rubyzip combines CRC32 values at every write, which
|
24
20
|
can be slow if there are many small writes.
|
25
21
|
* ZipKit comes with a Rails helper and a Rack-compatible response body for facilitating streaming. Rubyzip has no Rails integration
|
@@ -29,6 +25,7 @@ differences in supported features which may be important for you when choosing.
|
|
29
25
|
* ZipKit requires components using autoloading, which means that your application will likely boot faster as you will almost never
|
30
26
|
need all of the features in one codebase. Rubyzip requires its components eagerly.
|
31
27
|
* ZipKit comes with exhaustive YARD documentation and `.rbi` typedefs for [Sorbet/Tapioca](https://sorbet.org/blog/2022/07/27/srb-tapioca)
|
28
|
+
* ZipKit allows you to compose "sparse" ZIP files where the contents of the files inside the archive comes from an external source, and does not have to be passed through the library (or be turned into Ruby strings), which enables interesting use cases such as download proxies with random access and resume.
|
32
29
|
|
33
30
|
## What Rubyzip supports and ZipKit does not
|
34
31
|
|
@@ -55,7 +55,7 @@ class ZipKit::BlockDeflate
|
|
55
55
|
# `output_io` can also be a {ZipKit::Streamer} to expedite ops.
|
56
56
|
#
|
57
57
|
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
58
|
-
# @return [
|
58
|
+
# @return [Integer] number of bytes written to `output_io`
|
59
59
|
def self.write_terminator(output_io)
|
60
60
|
output_io << END_MARKER
|
61
61
|
END_MARKER.bytesize
|
@@ -65,7 +65,7 @@ class ZipKit::BlockDeflate
|
|
65
65
|
# The returned string can be spliced into another deflate stream.
|
66
66
|
#
|
67
67
|
# @param bytes [String] Bytes to compress
|
68
|
-
# @param level [
|
68
|
+
# @param level [Integer] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
69
69
|
# @return [String] compressed bytes
|
70
70
|
def self.deflate_chunk(bytes, level: Zlib::DEFAULT_COMPRESSION)
|
71
71
|
raise "Invalid Zlib compression level #{level}" unless VALID_COMPRESSIONS.include?(level)
|
@@ -90,9 +90,9 @@ class ZipKit::BlockDeflate
|
|
90
90
|
#
|
91
91
|
# @param input_io [IO] the stream to read from (should respond to `:read`)
|
92
92
|
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
93
|
-
# @param level [
|
94
|
-
# @param block_size [
|
95
|
-
# @return [
|
93
|
+
# @param level [Integer] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
94
|
+
# @param block_size [Integer] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
95
|
+
# @return [Integer] number of bytes written to `output_io`
|
96
96
|
def self.deflate_in_blocks_and_terminate(input_io,
|
97
97
|
output_io,
|
98
98
|
level: Zlib::DEFAULT_COMPRESSION,
|
@@ -110,9 +110,9 @@ class ZipKit::BlockDeflate
|
|
110
110
|
#
|
111
111
|
# @param input_io [IO] the stream to read from (should respond to `:read`)
|
112
112
|
# @param output_io [IO] the stream to write to (should respond to `:<<`)
|
113
|
-
# @param level [
|
114
|
-
# @param block_size [
|
115
|
-
# @return [
|
113
|
+
# @param level [Integer] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
114
|
+
# @param block_size [Integer] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
115
|
+
# @return [Integer] number of bytes written to `output_io`
|
116
116
|
def self.deflate_in_blocks(input_io,
|
117
117
|
output_io,
|
118
118
|
level: Zlib::DEFAULT_COMPRESSION,
|
data/lib/zip_kit/file_reader.rb
CHANGED
@@ -86,46 +86,46 @@ class ZipKit::FileReader
|
|
86
86
|
# the Entry object used in Streamer for ZIP writing, since during writing more
|
87
87
|
# data can be kept in memory for immediate use.
|
88
88
|
class ZipEntry
|
89
|
-
# @return [
|
89
|
+
# @return [Integer] bit-packed version signature of the program that made the archive
|
90
90
|
attr_accessor :made_by
|
91
91
|
|
92
|
-
# @return [
|
92
|
+
# @return [Integer] ZIP version support needed to extract this file
|
93
93
|
attr_accessor :version_needed_to_extract
|
94
94
|
|
95
|
-
# @return [
|
95
|
+
# @return [Integer] bit-packed general purpose flags
|
96
96
|
attr_accessor :gp_flags
|
97
97
|
|
98
|
-
# @return [
|
98
|
+
# @return [Integer] Storage mode (0 for stored, 8 for deflate)
|
99
99
|
attr_accessor :storage_mode
|
100
100
|
|
101
|
-
# @return [
|
101
|
+
# @return [Integer] the bit-packed DOS time
|
102
102
|
attr_accessor :dos_time
|
103
103
|
|
104
|
-
# @return [
|
104
|
+
# @return [Integer] the bit-packed DOS date
|
105
105
|
attr_accessor :dos_date
|
106
106
|
|
107
|
-
# @return [
|
107
|
+
# @return [Integer] the CRC32 checksum of this file
|
108
108
|
attr_accessor :crc32
|
109
109
|
|
110
|
-
# @return [
|
110
|
+
# @return [Integer] size of compressed file data in the ZIP
|
111
111
|
attr_accessor :compressed_size
|
112
112
|
|
113
|
-
# @return [
|
113
|
+
# @return [Integer] size of the file once uncompressed
|
114
114
|
attr_accessor :uncompressed_size
|
115
115
|
|
116
116
|
# @return [String] the filename
|
117
117
|
attr_accessor :filename
|
118
118
|
|
119
|
-
# @return [
|
119
|
+
# @return [Integer] disk number where this file starts
|
120
120
|
attr_accessor :disk_number_start
|
121
121
|
|
122
|
-
# @return [
|
122
|
+
# @return [Integer] internal attributes of the file
|
123
123
|
attr_accessor :internal_attrs
|
124
124
|
|
125
|
-
# @return [
|
125
|
+
# @return [Integer] external attributes of the file
|
126
126
|
attr_accessor :external_attrs
|
127
127
|
|
128
|
-
# @return [
|
128
|
+
# @return [Integer] at what offset the local file header starts
|
129
129
|
# in your original IO object
|
130
130
|
attr_accessor :local_file_header_offset
|
131
131
|
|
@@ -151,7 +151,7 @@ class ZipKit::FileReader
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
-
# @return [
|
154
|
+
# @return [Integer] at what offset you should start reading
|
155
155
|
# for the compressed data in your original IO object
|
156
156
|
def compressed_data_offset
|
157
157
|
@compressed_data_offset || raise(LocalHeaderPending)
|
@@ -298,7 +298,7 @@ class ZipKit::FileReader
|
|
298
298
|
# this offset to get the data).
|
299
299
|
#
|
300
300
|
# @param io[#read] an IO-ish object the ZIP file can be read from
|
301
|
-
# @return [Array<ZipEntry,
|
301
|
+
# @return [Array<ZipEntry, Integer>] the parsed local header entry and
|
302
302
|
# the compressed data offset
|
303
303
|
def read_local_file_header(io:)
|
304
304
|
local_file_header_offset = io.tell
|
@@ -365,8 +365,8 @@ class ZipKit::FileReader
|
|
365
365
|
# (read starting at this offset to get the data).
|
366
366
|
#
|
367
367
|
# @param io[#seek, #read] an IO-ish object the ZIP file can be read from
|
368
|
-
# @param local_file_header_offset[
|
369
|
-
# local file header is supposed to begin @return [
|
368
|
+
# @param local_file_header_offset[Integer] absolute offset (0-based) where the
|
369
|
+
# local file header is supposed to begin @return [Integer] absolute offset
|
370
370
|
# (0-based) of where the compressed data begins for this file within the ZIP
|
371
371
|
def get_compressed_data_offset(io:, local_file_header_offset:)
|
372
372
|
seek(io, local_file_header_offset)
|
@@ -112,6 +112,17 @@ class ZipKit::OutputEnumerator
|
|
112
112
|
# but are of a file format built "on top" of ZIPs - such as ODTs, [pkpass files](https://developer.apple.com/documentation/walletpasses/building_a_pass)
|
113
113
|
# and ePubs.
|
114
114
|
#
|
115
|
+
# More value, however, is in the "technical" headers this method will provide. It will take the following steps to make sure streaming works correctly.
|
116
|
+
#
|
117
|
+
# * `Last-Modified` will be set to "now" so that the response is considered "fresh" by `Rack::ETag`. This is done so that `Rack::ETag` won't try to
|
118
|
+
# calculate a lax ETag value and thus won't start buffering your response out of nowhere
|
119
|
+
# * `Content-Encoding` will be set to `identity`. This is so that proxies or the Rack middleware that applies compression to the response (like gzip)
|
120
|
+
# is not going to try to compress your response. It also tells the receiving browsers (or downstream proxies) that they should not attempt to
|
121
|
+
# open or uncompress the response before saving it or passing it onwards.
|
122
|
+
# * `X-Accel-Buffering` will be set to 'no` - this tells both nginx and the Google Cloud load balancer that the response should not be buffered
|
123
|
+
#
|
124
|
+
# These header values are known to get as close as possible to guaranteeing streaming on most environments where Ruby web applications may be hosted.
|
125
|
+
#
|
115
126
|
# @return [Hash]
|
116
127
|
def self.streaming_http_headers
|
117
128
|
_headers = {
|
@@ -24,7 +24,7 @@ module ZipKit::RailsStreaming
|
|
24
24
|
# @param use_chunked_transfer_encoding[Boolean] whether to forcibly encode output as chunked. Normally you should not need this.
|
25
25
|
# @param output_enumerator_options[Hash] options that will be passed to the OutputEnumerator - these include
|
26
26
|
# options for the Streamer. See {ZipKit::OutputEnumerator#initialize} for the full list of options.
|
27
|
-
# @yieldparam [ZipKit::Streamer]
|
27
|
+
# @yieldparam zip[ZipKit::Streamer] the {ZipKit::Streamer} that can be written to
|
28
28
|
# @return [Boolean] always returns true
|
29
29
|
def zip_kit_stream(filename: "download.zip", type: "application/zip", use_chunked_transfer_encoding: false, **output_enumerator_options, &zip_streaming_blk)
|
30
30
|
# We want some common headers for file sending. Rails will also set
|
data/lib/zip_kit/railtie.rb
CHANGED
data/lib/zip_kit/remote_io.rb
CHANGED
@@ -40,7 +40,7 @@ class ZipKit::RemoteIO
|
|
40
40
|
# so if you are at offset 0 in the IO of size 10, doing a `read(20)`
|
41
41
|
# will only return you 10 bytes of result, and not raise any exceptions.
|
42
42
|
#
|
43
|
-
# @param n_bytes[
|
43
|
+
# @param n_bytes[Integer, nil] how many bytes to read, or `nil` to read all the way to the end
|
44
44
|
# @return [String] the read bytes
|
45
45
|
def read(n_bytes = nil)
|
46
46
|
# If the resource is empty there is nothing to read
|
@@ -62,7 +62,7 @@ class ZipKit::RemoteIO
|
|
62
62
|
|
63
63
|
# Returns the current pointer position within the IO
|
64
64
|
#
|
65
|
-
# @return [
|
65
|
+
# @return [Integer]
|
66
66
|
def tell
|
67
67
|
@pos
|
68
68
|
end
|
@@ -74,7 +74,7 @@ class ZipKit::RemoteIO
|
|
74
74
|
# @param range[Range] the HTTP range of data to fetch from remote
|
75
75
|
# @return [String] the response body of the ranged request
|
76
76
|
def request_range(range)
|
77
|
-
http = Net::HTTP.start(@uri.hostname, @uri.port)
|
77
|
+
http = Net::HTTP.start(@uri.hostname, @uri.port, use_ssl: @uri.scheme == "https")
|
78
78
|
request = Net::HTTP::Get.new(@uri)
|
79
79
|
request.range = range
|
80
80
|
response = http.request(request)
|
@@ -91,7 +91,7 @@ class ZipKit::RemoteIO
|
|
91
91
|
#
|
92
92
|
# @return [Integer] the size of the remote resource, parsed either from Content-Length or Content-Range header
|
93
93
|
def request_object_size
|
94
|
-
http = Net::HTTP.start(@uri.hostname, @uri.port)
|
94
|
+
http = Net::HTTP.start(@uri.hostname, @uri.port, use_ssl: @uri.scheme == "https")
|
95
95
|
request = Net::HTTP::Get.new(@uri)
|
96
96
|
request.range = 0..0
|
97
97
|
response = http.request(request)
|
@@ -24,7 +24,7 @@ class ZipKit::SizeEstimator
|
|
24
24
|
#
|
25
25
|
# @param kwargs_for_streamer_new Any options to pass to Streamer, see {Streamer#initialize}
|
26
26
|
# @return [Integer] the size of the resulting archive, in bytes
|
27
|
-
# @yieldparam [SizeEstimator] the estimator
|
27
|
+
# @yieldparam estimator[SizeEstimator] the estimator
|
28
28
|
def self.estimate(**kwargs_for_streamer_new)
|
29
29
|
streamer = ZipKit::Streamer.new(ZipKit::NullWriter, **kwargs_for_streamer_new)
|
30
30
|
estimator = new(streamer)
|
@@ -35,9 +35,12 @@ class ZipKit::SizeEstimator
|
|
35
35
|
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
36
36
|
#
|
37
37
|
# @param filename [String] the name of the file (filenames are variable-width in the ZIP)
|
38
|
-
# @param size [
|
39
|
-
# @param use_data_descriptor[Boolean] whether
|
40
|
-
#
|
38
|
+
# @param size [Integer] size of the uncompressed entry
|
39
|
+
# @param use_data_descriptor[Boolean] whether there is going to be a data descriptor written
|
40
|
+
# after the entry body, to specify size.
|
41
|
+
# You must enable this if you are going to be
|
42
|
+
# using {Streamer#write_stored_file} as otherwise your
|
43
|
+
# estimated size is not going to be accurate
|
41
44
|
# @return self
|
42
45
|
def add_stored_entry(filename:, size:, use_data_descriptor: false)
|
43
46
|
@streamer.add_stored_entry(filename: filename,
|
@@ -54,10 +57,13 @@ class ZipKit::SizeEstimator
|
|
54
57
|
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
55
58
|
#
|
56
59
|
# @param filename [String] the name of the file (filenames are variable-width in the ZIP)
|
57
|
-
# @param uncompressed_size [
|
58
|
-
# @param compressed_size [
|
59
|
-
# @param use_data_descriptor[Boolean] whether
|
60
|
-
#
|
60
|
+
# @param uncompressed_size [Integer] size of the uncompressed entry
|
61
|
+
# @param compressed_size [Integer] size of the compressed entry
|
62
|
+
# @param use_data_descriptor[Boolean] whether there is going to be a data descriptor written
|
63
|
+
# after the entry body, to specify size.
|
64
|
+
# You must enable this if you are going to be
|
65
|
+
# using {Streamer#write_deflated_file} as otherwise your
|
66
|
+
# estimated size is not going to be accurate
|
61
67
|
# @return self
|
62
68
|
def add_deflated_entry(filename:, uncompressed_size:, compressed_size:, use_data_descriptor: false)
|
63
69
|
@streamer.add_deflated_entry(filename: filename,
|
data/lib/zip_kit/stream_crc32.rb
CHANGED
@@ -16,7 +16,7 @@ class ZipKit::StreamCRC32
|
|
16
16
|
# Compute a CRC32 value from an IO object. The object should respond to `read` and `eof?`
|
17
17
|
#
|
18
18
|
# @param io[IO] the IO to read the data from
|
19
|
-
# @return [
|
19
|
+
# @return [Integer] the computed CRC32 value
|
20
20
|
def self.from_io(io)
|
21
21
|
# If we can specify the string capacity upfront we will not have to resize
|
22
22
|
# the string during operation. This saves time but is only available on
|
@@ -43,7 +43,7 @@ class ZipKit::StreamCRC32
|
|
43
43
|
|
44
44
|
# Returns the CRC32 value computed so far
|
45
45
|
#
|
46
|
-
# @return [
|
46
|
+
# @return [Integer] the updated CRC32 value for all the blobs so far
|
47
47
|
def to_i
|
48
48
|
@crc
|
49
49
|
end
|
@@ -51,9 +51,9 @@ class ZipKit::StreamCRC32
|
|
51
51
|
# Appends a known CRC32 value to the current one, and combines the
|
52
52
|
# contained CRC32 value in-place.
|
53
53
|
#
|
54
|
-
# @param crc32[
|
55
|
-
# @param blob_size[
|
56
|
-
# @return [
|
54
|
+
# @param crc32[Integer] the CRC32 value to append
|
55
|
+
# @param blob_size[Integer] the size of the daata the `crc32` is computed from
|
56
|
+
# @return [Integer] the updated CRC32 value for all the blobs so far
|
57
57
|
def append(crc32, blob_size)
|
58
58
|
@crc = Zlib.crc32_combine(@crc, crc32, blob_size)
|
59
59
|
end
|
@@ -26,6 +26,7 @@ class ZipKit::Streamer::Heuristic < ZipKit::Streamer::Writable
|
|
26
26
|
@bytes_deflated = 0
|
27
27
|
|
28
28
|
@winner = nil
|
29
|
+
@started_closing = false
|
29
30
|
end
|
30
31
|
|
31
32
|
def <<(bytes)
|
@@ -40,6 +41,9 @@ class ZipKit::Streamer::Heuristic < ZipKit::Streamer::Writable
|
|
40
41
|
end
|
41
42
|
|
42
43
|
def close
|
44
|
+
return if @started_closing
|
45
|
+
@started_closing = true # started_closing because an exception may get raised inside close(), as we add an entry there
|
46
|
+
|
43
47
|
decide unless @winner
|
44
48
|
@winner.close
|
45
49
|
end
|
@@ -47,6 +51,7 @@ class ZipKit::Streamer::Heuristic < ZipKit::Streamer::Writable
|
|
47
51
|
private def decide
|
48
52
|
# Finish and then close the deflater - it has likely buffered some data
|
49
53
|
@bytes_deflated += @deflater.finish.bytesize until @deflater.finished?
|
54
|
+
|
50
55
|
# If the deflated version is smaller than the stored one
|
51
56
|
# - use deflate, otherwise stored
|
52
57
|
ratio = @bytes_deflated / @buf.size.to_f
|
@@ -55,9 +60,12 @@ class ZipKit::Streamer::Heuristic < ZipKit::Streamer::Writable
|
|
55
60
|
else
|
56
61
|
@streamer.write_stored_file(@filename, **@write_file_options)
|
57
62
|
end
|
63
|
+
|
58
64
|
# Copy the buffered uncompressed data into the newly initialized writable
|
59
65
|
@buf.rewind
|
60
66
|
IO.copy_stream(@buf, @winner)
|
61
67
|
@buf.truncate(0)
|
68
|
+
ensure
|
69
|
+
@deflater.close
|
62
70
|
end
|
63
71
|
end
|
data/lib/zip_kit/streamer.rb
CHANGED
@@ -5,8 +5,12 @@ require "set"
|
|
5
5
|
# Is used to write ZIP archives without having to read them back or to overwrite
|
6
6
|
# data. It outputs into any object that supports `<<` or `write`, namely:
|
7
7
|
#
|
8
|
-
#
|
9
|
-
#
|
8
|
+
# * `Array` - will contain binary strings
|
9
|
+
# * `File` - data will be written to it as it gets generated
|
10
|
+
# * `IO` (`Socket`, `StringIO`) - data gets written into it
|
11
|
+
# * `String` - in binary encoding and unfrozen - also makes a decent output target
|
12
|
+
#
|
13
|
+
# or anything else that responds to `#<<` or `#write`.
|
10
14
|
#
|
11
15
|
# You can also combine output through the `Streamer` with direct output to the destination,
|
12
16
|
# all while preserving the correct offsets in the ZIP file structures. This allows usage
|
@@ -482,6 +486,10 @@ class ZipKit::Streamer
|
|
482
486
|
# is likely already on the wire. However, excluding the entry from the central directory of the ZIP
|
483
487
|
# file will allow better-behaved ZIP unarchivers to extract the entries which did store correctly,
|
484
488
|
# provided they read the ZIP from the central directory and not straight-ahead.
|
489
|
+
# Rolling back does not perform any writes.
|
490
|
+
#
|
491
|
+
# `rollback!` gets called for you if an exception is raised inside the block of `write_file`,
|
492
|
+
# `write_deflated_file` and `write_stored_file`.
|
485
493
|
#
|
486
494
|
# @example
|
487
495
|
# zip.add_stored_entry(filename: "data.bin", size: 4.megabytes, crc32: the_crc)
|
@@ -493,14 +501,17 @@ class ZipKit::Streamer
|
|
493
501
|
# end
|
494
502
|
# @return [Integer] position in the output stream / ZIP archive
|
495
503
|
def rollback!
|
496
|
-
|
497
|
-
return @out.tell unless removed_entry
|
504
|
+
@files.pop if @remove_last_file_at_rollback
|
498
505
|
|
506
|
+
# Recreate the path set from remaining entries (PathSet does not support cheap deletes yet)
|
499
507
|
@path_set.clear
|
500
508
|
@files.each do |e|
|
501
509
|
@path_set.add_directory_or_file_path(e.filename) unless e.filler?
|
502
510
|
end
|
503
|
-
|
511
|
+
|
512
|
+
# Create filler for the truncated or unusable local file entry that did get written into the output
|
513
|
+
filler_size_bytes = @out.tell - @offset_before_last_local_file_header
|
514
|
+
@files << Filler.new(filler_size_bytes)
|
504
515
|
|
505
516
|
@out.tell
|
506
517
|
end
|
@@ -554,6 +565,11 @@ class ZipKit::Streamer
|
|
554
565
|
use_data_descriptor:,
|
555
566
|
unix_permissions:
|
556
567
|
)
|
568
|
+
# Set state needed for proper rollback later. If write_local_file_header
|
569
|
+
# does manage to write _some_ bytes, but fails later (we write in tiny bits sometimes)
|
570
|
+
# we should be able to create a filler from this offset on when we
|
571
|
+
@offset_before_last_local_file_header = @out.tell
|
572
|
+
@remove_last_file_at_rollback = false
|
557
573
|
|
558
574
|
# Clean backslashes
|
559
575
|
filename = remove_backslash(filename)
|
@@ -600,9 +616,11 @@ class ZipKit::Streamer
|
|
600
616
|
mtime: e.mtime,
|
601
617
|
filename: e.filename,
|
602
618
|
storage_mode: e.storage_mode)
|
619
|
+
|
603
620
|
e.bytes_used_for_local_header = @out.tell - e.local_header_offset
|
604
621
|
|
605
622
|
@files << e
|
623
|
+
@remove_last_file_at_rollback = true
|
606
624
|
end
|
607
625
|
|
608
626
|
def remove_backslash(filename)
|
data/lib/zip_kit/version.rb
CHANGED