zip_tricks 4.4.2 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.codeclimate.yml +7 -0
- data/.gitignore +6 -0
- data/.rubocop.yml +79 -0
- data/.rubocop_todo.yml +43 -0
- data/.travis.yml +3 -1
- data/CHANGELOG.md +9 -0
- data/Rakefile +7 -4
- data/examples/archive_size_estimate.rb +8 -6
- data/examples/config.ru +3 -1
- data/examples/parallel_compression_with_block_deflate.rb +31 -20
- data/examples/rack_application.rb +25 -17
- data/lib/zip_tricks.rb +4 -2
- data/lib/zip_tricks/block_deflate.rb +43 -25
- data/lib/zip_tricks/block_write.rb +20 -10
- data/lib/zip_tricks/file_reader.rb +241 -145
- data/lib/zip_tricks/file_reader/inflating_reader.rb +4 -1
- data/lib/zip_tricks/file_reader/stored_reader.rb +4 -1
- data/lib/zip_tricks/null_writer.rb +5 -5
- data/lib/zip_tricks/rack_body.rb +7 -4
- data/lib/zip_tricks/rails_streaming.rb +3 -1
- data/lib/zip_tricks/remote_io.rb +9 -5
- data/lib/zip_tricks/remote_uncap.rb +10 -5
- data/lib/zip_tricks/size_estimator.rb +39 -27
- data/lib/zip_tricks/stream_crc32.rb +2 -0
- data/lib/zip_tricks/streamer.rb +254 -98
- data/lib/zip_tricks/streamer/deflated_writer.rb +6 -9
- data/lib/zip_tricks/streamer/entry.rb +11 -3
- data/lib/zip_tricks/streamer/stored_writer.rb +5 -7
- data/lib/zip_tricks/streamer/writable.rb +30 -7
- data/lib/zip_tricks/version.rb +3 -1
- data/lib/zip_tricks/write_and_tell.rb +2 -0
- data/lib/zip_tricks/zip_writer.rb +54 -44
- data/testing/generate_test_files.rb +68 -38
- data/testing/support.rb +21 -16
- data/testing/test-report.txt +28 -0
- data/zip_tricks.gemspec +24 -22
- metadata +23 -5
@@ -1,3 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rubocop: convention: Missing top-level class documentation comment.
|
1
4
|
class ZipTricks::FileReader::InflatingReader
|
2
5
|
def initialize(from_io, compressed_data_size)
|
3
6
|
@io = from_io
|
@@ -6,7 +9,7 @@ class ZipTricks::FileReader::InflatingReader
|
|
6
9
|
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
7
10
|
end
|
8
11
|
|
9
|
-
def extract(n_bytes=nil)
|
12
|
+
def extract(n_bytes = nil)
|
10
13
|
n_bytes ||= (@compressed_data_size - @already_read)
|
11
14
|
|
12
15
|
return if eof?
|
@@ -1,3 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rubocop: convention: Missing top-level class documentation comment.
|
1
4
|
class ZipTricks::FileReader::StoredReader
|
2
5
|
def initialize(from_io, compressed_data_size)
|
3
6
|
@io = from_io
|
@@ -5,7 +8,7 @@ class ZipTricks::FileReader::StoredReader
|
|
5
8
|
@already_read = 0
|
6
9
|
end
|
7
10
|
|
8
|
-
def extract(n_bytes=nil)
|
11
|
+
def extract(n_bytes = nil)
|
9
12
|
n_bytes ||= (@compressed_data_size - @already_read)
|
10
13
|
|
11
14
|
return if eof?
|
@@ -1,12 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Used when you need to supply a destination IO for some
|
2
4
|
# write operations, but want to discard the data (like when
|
3
5
|
# estimating the size of a ZIP)
|
4
6
|
module ZipTricks::NullWriter
|
5
7
|
# @param data[String] the data to write
|
6
8
|
# @return [self]
|
7
|
-
def self.<<(
|
8
|
-
|
9
|
-
|
10
|
-
# @return [Fixnum] the amount of data that was supposed to be written
|
11
|
-
def self.write(data); data.bytesize; end
|
9
|
+
def self.<<(_)
|
10
|
+
self
|
11
|
+
end
|
12
12
|
end
|
data/lib/zip_tricks/rack_body.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Can be used as a Rack response body directly. Will yield
|
2
4
|
# a {ZipTricks::Streamer} for adding entries to the archive and writing
|
3
5
|
# zip entry bodies.
|
@@ -13,14 +15,16 @@ class ZipTricks::RackBody
|
|
13
15
|
# estimator.add_stored_entry(filename: 'large.tif', size: 1289894)
|
14
16
|
# end
|
15
17
|
#
|
16
|
-
# # Prepare the response body. The block will only be called when the
|
18
|
+
# # Prepare the response body. The block will only be called when the
|
19
|
+
# response starts to be written.
|
17
20
|
# body = ZipTricks::RackBody.new do | streamer |
|
18
21
|
# streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
|
19
22
|
# streamer << large_file.read(1024*1024) until large_file.eof?
|
20
23
|
# ...
|
21
24
|
# end
|
22
25
|
#
|
23
|
-
# return [200, {'Content-Type' => 'binary/octet-stream',
|
26
|
+
# return [200, {'Content-Type' => 'binary/octet-stream',
|
27
|
+
# 'Content-Length' => content_length.to_s}, body]
|
24
28
|
def initialize(&blk)
|
25
29
|
@archiving_block = blk
|
26
30
|
end
|
@@ -36,6 +40,5 @@ class ZipTricks::RackBody
|
|
36
40
|
# Does nothing because nothing has to be deallocated or canceled
|
37
41
|
# even if the zip output is incomplete. The archive gets closed
|
38
42
|
# automatically as part of {ZipTricks::Streamer.open}
|
39
|
-
def close
|
40
|
-
end
|
43
|
+
def close; end
|
41
44
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Should be included into a Rails controller (together with `ActionController::Live`)
|
2
4
|
# for easy ZIP output from any action.
|
3
5
|
module ZipTricks::RailsStreaming
|
@@ -10,7 +12,7 @@ module ZipTricks::RailsStreaming
|
|
10
12
|
# Create a wrapper for the write call that quacks like something you
|
11
13
|
# can << to, used by ZipTricks
|
12
14
|
w = ZipTricks::BlockWrite.new { |chunk| response.stream.write(chunk) }
|
13
|
-
ZipTricks::Streamer.open(w){|z| yield(z) }
|
15
|
+
ZipTricks::Streamer.open(w) { |z| yield(z) }
|
14
16
|
ensure
|
15
17
|
response.stream.close
|
16
18
|
end
|
data/lib/zip_tricks/remote_io.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# An object that fakes just-enough of an IO to be dangerous
|
2
4
|
# - or, more precisely, to be useful as a source for the FileReader
|
3
5
|
# central directory parser. Effectively we substitute an IO object
|
@@ -20,23 +22,25 @@ class ZipTricks::RemoteIO
|
|
20
22
|
@pos = clamp(0, offset, @remote_size)
|
21
23
|
0 # always return 0!
|
22
24
|
end
|
23
|
-
|
25
|
+
|
24
26
|
# Emulates IO#size.
|
25
27
|
#
|
26
28
|
# @return [Fixnum] the size of the remote resource
|
27
29
|
def size
|
28
30
|
@remote_size ||= request_object_size
|
29
31
|
end
|
30
|
-
|
32
|
+
|
31
33
|
# Emulates IO#read, but requires the number of bytes to read
|
32
34
|
# The read will be limited to the
|
33
35
|
# size of the remote resource relative to the current offset in the IO,
|
34
36
|
# so if you are at offset 0 in the IO of size 10, doing a `read(20)`
|
35
|
-
# will only return you 10 bytes of result, and not raise any exceptions.
|
37
|
+
# will only return you 10 bytes of result, and not raise any exceptions.
|
36
38
|
#
|
37
39
|
# @param n_bytes[Fixnum, nil] how many bytes to read, or `nil` to read all the way to the end
|
38
40
|
# @return [String] the read bytes
|
39
|
-
|
41
|
+
# Rubocop: convention: Assignment Branch Condition size for read is too high. [17.92/15]
|
42
|
+
# Rubocop: convention: Method has too many lines. [13/10]
|
43
|
+
def read(n_bytes = nil)
|
40
44
|
@remote_size ||= request_object_size
|
41
45
|
|
42
46
|
# If the resource is empty there is nothing to read
|
@@ -87,7 +91,7 @@ class ZipTricks::RemoteIO
|
|
87
91
|
|
88
92
|
private
|
89
93
|
|
90
|
-
def clamp(a,b,c)
|
94
|
+
def clamp(a, b, c)
|
91
95
|
return a if b < a
|
92
96
|
return c if b > c
|
93
97
|
b
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Alows reading the central directory of a remote ZIP file without
|
2
4
|
# downloading the entire file. The central directory provides the
|
3
5
|
# offsets at which the actual file contents is located. You can then
|
@@ -6,12 +8,15 @@
|
|
6
8
|
# Please read the security warning in `FileReader` _VERY CAREFULLY_
|
7
9
|
# before you use this module.
|
8
10
|
class ZipTricks::RemoteUncap
|
9
|
-
|
10
|
-
# @param uri[String] the HTTP(S) URL to read the ZIP footer from
|
11
|
+
# @param uri[String] the HTTP(S) URL to read the ZIP footer from
|
11
12
|
# @param reader_class[Class] which class to use for reading
|
12
|
-
# @param options_for_zip_reader[Hash] any additional options to give to
|
13
|
-
#
|
14
|
-
|
13
|
+
# @param options_for_zip_reader[Hash] any additional options to give to
|
14
|
+
# {ZipTricks::FileReader} when reading
|
15
|
+
# @return [Array<ZipTricks::FileReader::ZipEntry>] metadata about the
|
16
|
+
# files within the remote archive
|
17
|
+
def self.files_within_zip_at(uri,
|
18
|
+
reader_class: ZipTricks::FileReader,
|
19
|
+
**options_for_zip_reader)
|
15
20
|
fetcher = new(uri)
|
16
21
|
fake_io = ZipTricks::RemoteIO.new(fetcher)
|
17
22
|
reader = reader_class.new
|
@@ -1,48 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Helps to estimate archive sizes
|
2
4
|
class ZipTricks::SizeEstimator
|
3
5
|
require_relative 'streamer'
|
4
|
-
|
5
|
-
# Used to mark a couple of methods public
|
6
|
-
class DetailStreamer < ::ZipTricks::Streamer
|
7
|
-
public :add_file_and_write_local_header, :write_data_descriptor_for_last_entry
|
8
|
-
end
|
9
|
-
private_constant :DetailStreamer
|
10
|
-
|
6
|
+
|
11
7
|
# Creates a new estimator with a Streamer object. Normally you should use
|
12
8
|
# `estimate` instead an not use this method directly.
|
13
9
|
def initialize(streamer)
|
14
10
|
@streamer = streamer
|
15
11
|
end
|
16
12
|
private :initialize
|
17
|
-
|
13
|
+
|
18
14
|
# Performs the estimate using fake archiving. It needs to know the sizes of the
|
19
15
|
# entries upfront. Usage:
|
20
16
|
#
|
21
17
|
# expected_zip_size = SizeEstimator.estimate do | estimator |
|
22
18
|
# estimator.add_stored_entry(filename: "file.doc", size: 898291)
|
23
|
-
# estimator.
|
19
|
+
# estimator.add_deflated_entry(filename: "family.tif",
|
20
|
+
# uncompressed_size: 89281911, compressed_size: 121908)
|
24
21
|
# end
|
25
22
|
#
|
26
|
-
# @return [
|
23
|
+
# @return [Integer] the size of the resulting archive, in bytes
|
27
24
|
# @yield [SizeEstimator] the estimator
|
28
25
|
def self.estimate
|
29
|
-
|
30
|
-
|
31
|
-
|
26
|
+
streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter)
|
27
|
+
estimator = new(streamer)
|
28
|
+
yield(estimator)
|
29
|
+
streamer.close # Returns the .tell of the contained IO
|
32
30
|
end
|
33
31
|
|
34
32
|
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
35
33
|
#
|
36
34
|
# @param filename [String] the name of the file (filenames are variable-width in the ZIP)
|
37
35
|
# @param size [Fixnum] size of the uncompressed entry
|
38
|
-
# @param use_data_descriptor[Boolean] whether the entry uses a postfix
|
36
|
+
# @param use_data_descriptor[Boolean] whether the entry uses a postfix
|
37
|
+
# data descriptor to specify size
|
39
38
|
# @return self
|
40
39
|
def add_stored_entry(filename:, size:, use_data_descriptor: false)
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
@streamer.add_stored_entry(filename: filename,
|
41
|
+
crc32: 0,
|
42
|
+
size: size,
|
43
|
+
use_data_descriptor: use_data_descriptor)
|
44
44
|
@streamer.simulate_write(size)
|
45
|
-
|
45
|
+
if use_data_descriptor
|
46
|
+
@streamer.update_last_entry_and_write_data_descriptor(crc32: 0, compressed_size: size, uncompressed_size: size)
|
47
|
+
end
|
46
48
|
self
|
47
49
|
end
|
48
50
|
|
@@ -51,24 +53,34 @@ class ZipTricks::SizeEstimator
|
|
51
53
|
# @param filename [String] the name of the file (filenames are variable-width in the ZIP)
|
52
54
|
# @param uncompressed_size [Fixnum] size of the uncompressed entry
|
53
55
|
# @param compressed_size [Fixnum] size of the compressed entry
|
54
|
-
# @param use_data_descriptor[Boolean] whether the entry uses a postfix data
|
56
|
+
# @param use_data_descriptor[Boolean] whether the entry uses a postfix data
|
57
|
+
# descriptor to specify size
|
55
58
|
# @return self
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
|
59
|
+
def add_deflated_entry(filename:, uncompressed_size:, compressed_size:, use_data_descriptor: false)
|
60
|
+
@streamer.add_deflated_entry(filename: filename,
|
61
|
+
crc32: 0,
|
62
|
+
compressed_size: compressed_size,
|
63
|
+
uncompressed_size: uncompressed_size,
|
64
|
+
use_data_descriptor: use_data_descriptor)
|
65
|
+
|
60
66
|
@streamer.simulate_write(compressed_size)
|
61
|
-
|
67
|
+
if use_data_descriptor
|
68
|
+
@streamer.update_last_entry_and_write_data_descriptor(crc32: 0,
|
69
|
+
compressed_size: compressed_size,
|
70
|
+
uncompressed_size: uncompressed_size)
|
71
|
+
end
|
62
72
|
self
|
63
73
|
end
|
64
|
-
|
74
|
+
|
75
|
+
# Will be phased out in ZipTricks 5.x
|
76
|
+
alias_method :add_compressed_entry, :add_deflated_entry
|
77
|
+
|
65
78
|
# Add an empty directory to the archive.
|
66
79
|
#
|
67
80
|
# @param dirname [String] the name of the directory
|
68
81
|
# @return self
|
69
82
|
def add_empty_directory_entry(dirname:)
|
70
|
-
@streamer.
|
71
|
-
compressed_size: 0, uncompressed_size: 0)
|
83
|
+
@streamer.add_empty_directory(dirname: dirname)
|
72
84
|
self
|
73
85
|
end
|
74
86
|
end
|
data/lib/zip_tricks/streamer.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
|
1
5
|
# Is used to write streamed ZIP archives into the provided IO-ish object.
|
2
6
|
# The output IO is never going to be rewound or seeked, so the output
|
3
|
-
# of this object can be coupled directly to, say, a Rack output.
|
7
|
+
# of this object can be coupled directly to, say, a Rack output. The
|
8
|
+
# output can also be a String, Array or anything that responds to `<<`.
|
4
9
|
#
|
5
10
|
# Allows for splicing raw files (for "stored" entries without compression)
|
6
11
|
# and splicing of deflated files (for "deflated" storage mode).
|
@@ -16,7 +21,7 @@
|
|
16
21
|
# You can use the Streamer with data descriptors (the CRC32 and the sizes will be
|
17
22
|
# written after the file data). This allows non-rewinding on-the-fly compression.
|
18
23
|
# If you are compressing large files, the Deflater object that the Streamer controls
|
19
|
-
# will be regularly flushed to prevent memory inflation.
|
24
|
+
# will be regularly flushed to prevent memory inflation.
|
20
25
|
#
|
21
26
|
# ZipTricks::Streamer.open(file_socket_or_string) do |zip|
|
22
27
|
# zip.write_stored_file('mov.mp4') do |sink|
|
@@ -51,7 +56,28 @@
|
|
51
56
|
# so far. When using `sendfile` the Ruby write methods get bypassed entirely, and the
|
52
57
|
# offsets in the IO will not be updated - which will result in an invalid ZIP.
|
53
58
|
#
|
54
|
-
#
|
59
|
+
#
|
60
|
+
# ## On-the-fly deflate -using the Streamer with async/suspended writes and data descriptors
|
61
|
+
#
|
62
|
+
# If you are unable to use the block versions of `write_deflated_file` and `write_stored_file`
|
63
|
+
# there is an option to use a separate writer object. It gets returned from `write_deflated_file`
|
64
|
+
# and `write_stored_file` if you do not provide them with a block, and will accept data writes.
|
65
|
+
#
|
66
|
+
# ZipTricks::Streamer.open(socket) do | zip |
|
67
|
+
# w = zip.write_stored_file('mov.mp4')
|
68
|
+
# w << data
|
69
|
+
# w.close
|
70
|
+
# end
|
71
|
+
#
|
72
|
+
# The central directory will be written automatically at the end of the `open` block. If you need
|
73
|
+
# to manage the Streamer manually, or defer the central directory write until appropriate, use
|
74
|
+
# the constructor instead and call `Streamer#close`:
|
75
|
+
#
|
76
|
+
# zip = ZipTricks::Streamer.new(out_io)
|
77
|
+
# .....
|
78
|
+
# zip.close
|
79
|
+
#
|
80
|
+
# Calling {Streamer#close} **will not** call `#close` on the underlying IO object.
|
55
81
|
class ZipTricks::Streamer
|
56
82
|
require_relative 'streamer/deflated_writer'
|
57
83
|
require_relative 'streamer/writable'
|
@@ -82,16 +108,13 @@ class ZipTricks::Streamer
|
|
82
108
|
|
83
109
|
# Creates a new Streamer on top of the given IO-ish object.
|
84
110
|
#
|
85
|
-
# @param stream[IO] the destination IO for the ZIP
|
111
|
+
# @param stream[IO] the destination IO for the ZIP. Anything that responds to `<<` can be used.
|
86
112
|
# @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
|
87
113
|
# Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
|
88
114
|
def initialize(stream, writer: create_writer)
|
89
|
-
raise InvalidOutput,
|
90
|
-
unless stream.respond_to?(:tell) && stream.respond_to?(:advance_position_by)
|
91
|
-
stream = ZipTricks::WriteAndTell.new(stream)
|
92
|
-
end
|
115
|
+
raise InvalidOutput, 'The stream must respond to #<<' unless stream.respond_to?(:<<)
|
93
116
|
|
94
|
-
@out = stream
|
117
|
+
@out = ZipTricks::WriteAndTell.new(stream)
|
95
118
|
@files = []
|
96
119
|
@local_header_offsets = []
|
97
120
|
@filenames_set = Set.new
|
@@ -112,114 +135,189 @@ class ZipTricks::Streamer
|
|
112
135
|
# `IO.copy_stream(from, to)`.
|
113
136
|
#
|
114
137
|
# @param binary_data [String] a String in binary encoding
|
115
|
-
# @return [
|
138
|
+
# @return [Integer] the number of bytes written
|
116
139
|
def write(binary_data)
|
117
140
|
@out << binary_data
|
118
141
|
binary_data.bytesize
|
119
142
|
end
|
120
143
|
|
121
|
-
# Advances the internal IO pointer to keep the offsets of the ZIP file in
|
122
|
-
#
|
144
|
+
# Advances the internal IO pointer to keep the offsets of the ZIP file in
|
145
|
+
# check. Use this if you are going to use accelerated writes to the socket
|
146
|
+
# (like the `sendfile()` call) after writing the headers, or if you
|
123
147
|
# just need to figure out the size of the archive.
|
124
148
|
#
|
125
|
-
# @param num_bytes [
|
126
|
-
# @return [
|
149
|
+
# @param num_bytes [Integer] how many bytes are going to be written bypassing the Streamer
|
150
|
+
# @return [Integer] position in the output stream / ZIP archive
|
127
151
|
def simulate_write(num_bytes)
|
128
152
|
@out.advance_position_by(num_bytes)
|
129
153
|
@out.tell
|
130
154
|
end
|
131
155
|
|
132
|
-
# Writes out the local header for an entry (file in the ZIP) that is using
|
133
|
-
#
|
156
|
+
# Writes out the local header for an entry (file in the ZIP) that is using
|
157
|
+
# the deflated storage model (is compressed). Once this method is called,
|
158
|
+
# the `<<` method has to be called to write the actual contents of the body.
|
134
159
|
#
|
135
|
-
# Note that the deflated body that is going to be written into the output
|
136
|
-
#
|
160
|
+
# Note that the deflated body that is going to be written into the output
|
161
|
+
# has to be _precompressed_ (pre-deflated) before writing it into the
|
162
|
+
# Streamer, because otherwise it is impossible to know it's size upfront.
|
137
163
|
#
|
138
164
|
# @param filename [String] the name of the file in the entry
|
139
|
-
# @param compressed_size [
|
140
|
-
#
|
141
|
-
# @param
|
142
|
-
# @
|
143
|
-
|
144
|
-
|
145
|
-
|
165
|
+
# @param compressed_size [Integer] the size of the compressed entry that
|
166
|
+
# is going to be written into the archive
|
167
|
+
# @param uncompressed_size [Integer] the size of the entry when uncompressed, in bytes
|
168
|
+
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
169
|
+
# @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor
|
170
|
+
# @return [Integer] the offset the output IO is at after writing the entry header
|
171
|
+
def add_deflated_entry(filename:, compressed_size: 0, uncompressed_size: 0, crc32: 0, use_data_descriptor: false)
|
172
|
+
add_file_and_write_local_header(filename: filename, crc32: crc32,
|
173
|
+
storage_mode: DEFLATED,
|
174
|
+
compressed_size: compressed_size,
|
175
|
+
uncompressed_size: uncompressed_size,
|
176
|
+
use_data_descriptor: use_data_descriptor)
|
146
177
|
@out.tell
|
147
178
|
end
|
148
179
|
|
149
|
-
#
|
150
|
-
|
180
|
+
# Will be phased out in ZipTricks 5.x
|
181
|
+
alias_method :add_compressed_entry, :add_deflated_entry
|
182
|
+
|
183
|
+
# Writes out the local header for an entry (file in the ZIP) that is using
|
184
|
+
# the stored storage model (is stored as-is).
|
185
|
+
# Once this method is called, the `<<` method has to be called one or more
|
186
|
+
# times to write the actual contents of the body.
|
151
187
|
#
|
152
188
|
# @param filename [String] the name of the file in the entry
|
153
|
-
# @param size [
|
154
|
-
# @param crc32 [
|
155
|
-
# @
|
156
|
-
|
157
|
-
|
158
|
-
|
189
|
+
# @param size [Integer] the size of the file when uncompressed, in bytes
|
190
|
+
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
191
|
+
# @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor. When in use
|
192
|
+
# @return [Integer] the offset the output IO is at after writing the entry header
|
193
|
+
def add_stored_entry(filename:, size: 0, crc32: 0, use_data_descriptor: false)
|
194
|
+
add_file_and_write_local_header(filename: filename,
|
195
|
+
crc32: crc32,
|
196
|
+
storage_mode: STORED,
|
197
|
+
compressed_size: size,
|
198
|
+
uncompressed_size: size,
|
199
|
+
use_data_descriptor: use_data_descriptor)
|
159
200
|
@out.tell
|
160
201
|
end
|
161
202
|
|
162
203
|
# Adds an empty directory to the archive with a size of 0 and permissions of 755.
|
163
204
|
#
|
164
205
|
# @param dirname [String] the name of the directory in the archive
|
165
|
-
# @return [
|
206
|
+
# @return [Integer] the offset the output IO is at after writing the entry header
|
166
207
|
def add_empty_directory(dirname:)
|
167
|
-
add_file_and_write_local_header(filename:
|
168
|
-
|
208
|
+
add_file_and_write_local_header(filename: dirname.to_s + '/',
|
209
|
+
crc32: 0,
|
210
|
+
storage_mode: STORED,
|
211
|
+
compressed_size: 0,
|
212
|
+
uncompressed_size: 0,
|
213
|
+
use_data_descriptor: false)
|
169
214
|
@out.tell
|
170
215
|
end
|
171
|
-
|
172
|
-
# Opens the stream for a stored file in the archive, and yields a writer
|
173
|
-
#
|
174
|
-
#
|
216
|
+
|
217
|
+
# Opens the stream for a stored file in the archive, and yields a writer
|
218
|
+
# for that file to the block.
|
219
|
+
# Once the write completes, a data descriptor will be written with the
|
220
|
+
# actual compressed/uncompressed sizes and the CRC32 checksum.
|
221
|
+
#
|
222
|
+
# Using a block, the write will be terminated with a data descriptor outright.
|
223
|
+
#
|
224
|
+
# zip.write_stored_file("foo.txt") do |sink|
|
225
|
+
# IO.copy_stream(source_file, sink)
|
226
|
+
# end
|
227
|
+
#
|
228
|
+
# If deferred writes are desired (for example - to integerate with an API that
|
229
|
+
# does not support blocks, or to work with non-blocking environments) the method
|
230
|
+
# has to be called without a block. In that case it returns the sink instead,
|
231
|
+
# permitting to write to it in a deferred fashion. When `close` is called on
|
232
|
+
# the sink, any remanining compression output will be flushed and the data
|
233
|
+
# descriptor is going to be written.
|
234
|
+
#
|
235
|
+
# Note that even though it does not have to happen within the same call stack,
|
236
|
+
# call sequencing still must be observed. It is therefore not possible to do
|
237
|
+
# this:
|
238
|
+
#
|
239
|
+
# writer_for_file1 = zip.write_stored_file("somefile.jpg")
|
240
|
+
# writer_for_file2 = zip.write_stored_file("another.tif")
|
241
|
+
# writer_for_file1 << data
|
242
|
+
# writer_for_file2 << data
|
243
|
+
#
|
244
|
+
# because it is likely to result in an invalid ZIP file structure later on.
|
245
|
+
# So using this facility in async scenarios is certainly possible, but care
|
246
|
+
# and attention is recommended.
|
175
247
|
#
|
176
248
|
# @param filename[String] the name of the file in the archive
|
177
|
-
# @yield [#<<, #write] an object that the file contents must be written to
|
249
|
+
# @yield [#<<, #write] an object that the file contents must be written to that will be automatically closed
|
250
|
+
# @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
|
178
251
|
def write_stored_file(filename)
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
yield(Writable.new(w))
|
184
|
-
crc, comp, uncomp = w.finish
|
185
|
-
|
186
|
-
# Save the information into the entry for when the time comes to write out the central directory
|
187
|
-
last_entry = @files.last
|
188
|
-
last_entry.crc32 = crc
|
189
|
-
last_entry.compressed_size = comp
|
190
|
-
last_entry.uncompressed_size = uncomp
|
252
|
+
add_stored_entry(filename: filename,
|
253
|
+
use_data_descriptor: true,
|
254
|
+
crc32: 0,
|
255
|
+
size: 0)
|
191
256
|
|
192
|
-
|
257
|
+
writable = Writable.new(self, StoredWriter.new(@out))
|
258
|
+
if block_given?
|
259
|
+
yield(writable)
|
260
|
+
writable.close
|
261
|
+
end
|
262
|
+
writable
|
193
263
|
end
|
194
264
|
|
195
|
-
# Opens the stream for a deflated file in the archive, and yields a writer
|
196
|
-
# Once the write completes, a data descriptor
|
197
|
-
# sizes and the
|
265
|
+
# Opens the stream for a deflated file in the archive, and yields a writer
|
266
|
+
# for that file to the block. Once the write completes, a data descriptor
|
267
|
+
# will be written with the actual compressed/uncompressed sizes and the
|
268
|
+
# CRC32 checksum.
|
269
|
+
#
|
270
|
+
# Using a block, the write will be terminated with a data descriptor outright.
|
271
|
+
#
|
272
|
+
# zip.write_stored_file("foo.txt") do |sink|
|
273
|
+
# IO.copy_stream(source_file, sink)
|
274
|
+
# end
|
275
|
+
#
|
276
|
+
# If deferred writes are desired (for example - to integerate with an API that
|
277
|
+
# does not support blocks, or to work with non-blocking environments) the method
|
278
|
+
# has to be called without a block. In that case it returns the sink instead,
|
279
|
+
# permitting to write to it in a deferred fashion. When `close` is called on
|
280
|
+
# the sink, any remanining compression output will be flushed and the data
|
281
|
+
# descriptor is going to be written.
|
282
|
+
#
|
283
|
+
# Note that even though it does not have to happen within the same call stack,
|
284
|
+
# call sequencing still must be observed. It is therefore not possible to do
|
285
|
+
# this:
|
286
|
+
#
|
287
|
+
# writer_for_file1 = zip.write_deflated_file("somefile.jpg")
|
288
|
+
# writer_for_file2 = zip.write_deflated_file("another.tif")
|
289
|
+
# writer_for_file1 << data
|
290
|
+
# writer_for_file2 << data
|
291
|
+
# writer_for_file1.close
|
292
|
+
# writer_for_file2.close
|
293
|
+
#
|
294
|
+
# because it is likely to result in an invalid ZIP file structure later on.
|
295
|
+
# So using this facility in async scenarios is certainly possible, but care
|
296
|
+
# and attention is recommended.
|
198
297
|
#
|
199
298
|
# @param filename[String] the name of the file in the archive
|
200
299
|
# @yield [#<<, #write] an object that the file contents must be written to
|
201
300
|
def write_deflated_file(filename)
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
write_data_descriptor_for_last_entry
|
301
|
+
add_deflated_entry(filename: filename,
|
302
|
+
use_data_descriptor: true,
|
303
|
+
crc32: 0,
|
304
|
+
compressed_size: 0,
|
305
|
+
uncompressed_size: 0)
|
306
|
+
|
307
|
+
writable = Writable.new(self, DeflatedWriter.new(@out))
|
308
|
+
if block_given?
|
309
|
+
yield(writable)
|
310
|
+
writable.close
|
311
|
+
end
|
312
|
+
writable
|
215
313
|
end
|
216
|
-
|
314
|
+
|
217
315
|
# Closes the archive. Writes the central directory, and switches the writer into
|
218
316
|
# a state where it can no longer be written to.
|
219
317
|
#
|
220
318
|
# Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
|
221
319
|
#
|
222
|
-
# @return [
|
320
|
+
# @return [Integer] the offset the output IO is at after closing the archive
|
223
321
|
def close
|
224
322
|
# Record the central directory offset, so that it can be written into the EOCD record
|
225
323
|
cdir_starts_at = @out.tell
|
@@ -227,18 +325,31 @@ class ZipTricks::Streamer
|
|
227
325
|
# Write out the central directory entries, one for each file
|
228
326
|
@files.each_with_index do |entry, i|
|
229
327
|
header_loc = @local_header_offsets.fetch(i)
|
230
|
-
@writer.write_central_directory_file_header(io: @out,
|
231
|
-
|
232
|
-
|
233
|
-
|
328
|
+
@writer.write_central_directory_file_header(io: @out,
|
329
|
+
local_file_header_location: header_loc,
|
330
|
+
gp_flags: entry.gp_flags,
|
331
|
+
storage_mode: entry.storage_mode,
|
332
|
+
compressed_size: entry.compressed_size,
|
333
|
+
uncompressed_size: entry.uncompressed_size,
|
334
|
+
mtime: entry.mtime,
|
335
|
+
crc32: entry.crc32,
|
336
|
+
filename: entry.filename)
|
234
337
|
end
|
235
338
|
|
236
339
|
# Record the central directory size, for the EOCDR
|
237
340
|
cdir_size = @out.tell - cdir_starts_at
|
238
341
|
|
239
342
|
# Write out the EOCDR
|
240
|
-
@writer.
|
241
|
-
|
343
|
+
@writer.write_end_of_central_directory(io: @out,
|
344
|
+
start_of_central_directory_location: cdir_starts_at,
|
345
|
+
central_directory_size: cdir_size,
|
346
|
+
num_files_in_archive: @files.length)
|
347
|
+
|
348
|
+
# Clear the files so that GC will not have to trace all the way to here to deallocate them
|
349
|
+
@files.clear
|
350
|
+
@filenames_set.clear
|
351
|
+
|
352
|
+
# and return the final offset
|
242
353
|
@out.tell
|
243
354
|
end
|
244
355
|
|
@@ -251,29 +362,73 @@ class ZipTricks::Streamer
|
|
251
362
|
ZipTricks::ZipWriter.new
|
252
363
|
end
|
253
364
|
|
365
|
+
# Updates the last entry written with the CRC32 checksum and compressed/uncompressed
|
366
|
+
# sizes. For stored entries, `compressed_size` and `uncompressed_size` are the same.
|
367
|
+
# After updating the entry will immediately write the data descriptor bytes
|
368
|
+
# to the output.
|
369
|
+
#
|
370
|
+
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
371
|
+
# @param compressed_size [Integer] the size of the compressed segment within the ZIP
|
372
|
+
# @param uncompressed_size [Integer] the size of the entry once uncompressed
|
373
|
+
# @return [Integer] the offset the output IO is at after writing the data descriptor
|
374
|
+
def update_last_entry_and_write_data_descriptor(crc32:, compressed_size:, uncompressed_size:)
|
375
|
+
# Save the information into the entry for when the time comes to write
|
376
|
+
# out the central directory
|
377
|
+
last_entry = @files.fetch(-1)
|
378
|
+
last_entry.crc32 = crc32
|
379
|
+
last_entry.compressed_size = compressed_size
|
380
|
+
last_entry.uncompressed_size = uncompressed_size
|
381
|
+
|
382
|
+
@writer.write_data_descriptor(io: @out,
|
383
|
+
crc32: last_entry.crc32,
|
384
|
+
compressed_size: last_entry.compressed_size,
|
385
|
+
uncompressed_size: last_entry.uncompressed_size)
|
386
|
+
@out.tell
|
387
|
+
end
|
388
|
+
|
254
389
|
private
|
255
390
|
|
256
|
-
def add_file_and_write_local_header(filename:,
|
257
|
-
|
391
|
+
def add_file_and_write_local_header(filename:,
|
392
|
+
crc32:,
|
393
|
+
storage_mode:,
|
394
|
+
compressed_size:,
|
395
|
+
uncompressed_size:,
|
396
|
+
use_data_descriptor:)
|
258
397
|
|
259
398
|
# Clean backslashes and uniqify filenames if there are duplicates
|
260
399
|
filename = remove_backslash(filename)
|
261
400
|
filename = uniquify_name(filename) if @filenames_set.include?(filename)
|
262
401
|
|
263
|
-
|
264
|
-
|
402
|
+
unless [STORED, DEFLATED].include?(storage_mode)
|
403
|
+
raise UnknownMode, "Unknown compression mode #{storage_mode}"
|
404
|
+
end
|
405
|
+
|
406
|
+
raise Overflow, 'Filename is too long' if filename.bytesize > 0xFFFF
|
265
407
|
|
266
|
-
|
408
|
+
if use_data_descriptor
|
409
|
+
crc32 = 0
|
410
|
+
compressed_size = 0
|
411
|
+
uncompressed_size = 0
|
412
|
+
end
|
413
|
+
|
414
|
+
e = Entry.new(filename,
|
415
|
+
crc32,
|
416
|
+
compressed_size,
|
417
|
+
uncompressed_size,
|
418
|
+
storage_mode,
|
419
|
+
mtime = Time.now.utc,
|
420
|
+
use_data_descriptor)
|
267
421
|
@files << e
|
268
422
|
@filenames_set << e.filename
|
269
423
|
@local_header_offsets << @out.tell
|
270
|
-
@writer.write_local_file_header(io: @out,
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
424
|
+
@writer.write_local_file_header(io: @out,
|
425
|
+
gp_flags: e.gp_flags,
|
426
|
+
crc32: e.crc32,
|
427
|
+
compressed_size: e.compressed_size,
|
428
|
+
uncompressed_size: e.uncompressed_size,
|
429
|
+
mtime: e.mtime,
|
430
|
+
filename: e.filename,
|
431
|
+
storage_mode: e.storage_mode)
|
277
432
|
end
|
278
433
|
|
279
434
|
def remove_backslash(filename)
|
@@ -281,8 +436,9 @@ class ZipTricks::Streamer
|
|
281
436
|
end
|
282
437
|
|
283
438
|
def uniquify_name(filename)
|
284
|
-
|
285
|
-
|
439
|
+
# we add (1), (2), (n) at the end of a filename if there is a duplicate
|
440
|
+
copy_pattern = /\((\d+)\)$/
|
441
|
+
parts = filename.split('.')
|
286
442
|
ext = if parts.last =~ /gz|zip/ && parts.size > 2
|
287
443
|
parts.pop(2)
|
288
444
|
elsif parts.size > 1
|
@@ -292,12 +448,12 @@ class ZipTricks::Streamer
|
|
292
448
|
|
293
449
|
duplicate_counter = 1
|
294
450
|
loop do
|
295
|
-
if fn_last_part =~ copy_pattern
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
new_filename = (parts + [fn_last_part, ext]).compact.join(
|
451
|
+
fn_last_part = if fn_last_part =~ copy_pattern
|
452
|
+
fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
|
453
|
+
else
|
454
|
+
"#{fn_last_part} (#{duplicate_counter})"
|
455
|
+
end
|
456
|
+
new_filename = (parts + [fn_last_part, ext]).compact.join('.')
|
301
457
|
return new_filename unless @filenames_set.include?(new_filename)
|
302
458
|
duplicate_counter += 1
|
303
459
|
end
|