zip_tricks 4.4.2 → 4.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.codeclimate.yml +7 -0
- data/.gitignore +6 -0
- data/.rubocop.yml +79 -0
- data/.rubocop_todo.yml +43 -0
- data/.travis.yml +3 -1
- data/CHANGELOG.md +9 -0
- data/Rakefile +7 -4
- data/examples/archive_size_estimate.rb +8 -6
- data/examples/config.ru +3 -1
- data/examples/parallel_compression_with_block_deflate.rb +31 -20
- data/examples/rack_application.rb +25 -17
- data/lib/zip_tricks.rb +4 -2
- data/lib/zip_tricks/block_deflate.rb +43 -25
- data/lib/zip_tricks/block_write.rb +20 -10
- data/lib/zip_tricks/file_reader.rb +241 -145
- data/lib/zip_tricks/file_reader/inflating_reader.rb +4 -1
- data/lib/zip_tricks/file_reader/stored_reader.rb +4 -1
- data/lib/zip_tricks/null_writer.rb +5 -5
- data/lib/zip_tricks/rack_body.rb +7 -4
- data/lib/zip_tricks/rails_streaming.rb +3 -1
- data/lib/zip_tricks/remote_io.rb +9 -5
- data/lib/zip_tricks/remote_uncap.rb +10 -5
- data/lib/zip_tricks/size_estimator.rb +39 -27
- data/lib/zip_tricks/stream_crc32.rb +2 -0
- data/lib/zip_tricks/streamer.rb +254 -98
- data/lib/zip_tricks/streamer/deflated_writer.rb +6 -9
- data/lib/zip_tricks/streamer/entry.rb +11 -3
- data/lib/zip_tricks/streamer/stored_writer.rb +5 -7
- data/lib/zip_tricks/streamer/writable.rb +30 -7
- data/lib/zip_tricks/version.rb +3 -1
- data/lib/zip_tricks/write_and_tell.rb +2 -0
- data/lib/zip_tricks/zip_writer.rb +54 -44
- data/testing/generate_test_files.rb +68 -38
- data/testing/support.rb +21 -16
- data/testing/test-report.txt +28 -0
- data/zip_tricks.gemspec +24 -22
- metadata +23 -5
@@ -1,3 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rubocop: convention: Missing top-level class documentation comment.
|
1
4
|
class ZipTricks::FileReader::InflatingReader
|
2
5
|
def initialize(from_io, compressed_data_size)
|
3
6
|
@io = from_io
|
@@ -6,7 +9,7 @@ class ZipTricks::FileReader::InflatingReader
|
|
6
9
|
@zlib_inflater = ::Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
7
10
|
end
|
8
11
|
|
9
|
-
def extract(n_bytes=nil)
|
12
|
+
def extract(n_bytes = nil)
|
10
13
|
n_bytes ||= (@compressed_data_size - @already_read)
|
11
14
|
|
12
15
|
return if eof?
|
@@ -1,3 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rubocop: convention: Missing top-level class documentation comment.
|
1
4
|
class ZipTricks::FileReader::StoredReader
|
2
5
|
def initialize(from_io, compressed_data_size)
|
3
6
|
@io = from_io
|
@@ -5,7 +8,7 @@ class ZipTricks::FileReader::StoredReader
|
|
5
8
|
@already_read = 0
|
6
9
|
end
|
7
10
|
|
8
|
-
def extract(n_bytes=nil)
|
11
|
+
def extract(n_bytes = nil)
|
9
12
|
n_bytes ||= (@compressed_data_size - @already_read)
|
10
13
|
|
11
14
|
return if eof?
|
@@ -1,12 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Used when you need to supply a destination IO for some
|
2
4
|
# write operations, but want to discard the data (like when
|
3
5
|
# estimating the size of a ZIP)
|
4
6
|
module ZipTricks::NullWriter
|
5
7
|
# @param data[String] the data to write
|
6
8
|
# @return [self]
|
7
|
-
def self.<<(
|
8
|
-
|
9
|
-
|
10
|
-
# @return [Fixnum] the amount of data that was supposed to be written
|
11
|
-
def self.write(data); data.bytesize; end
|
9
|
+
def self.<<(_)
|
10
|
+
self
|
11
|
+
end
|
12
12
|
end
|
data/lib/zip_tricks/rack_body.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Can be used as a Rack response body directly. Will yield
|
2
4
|
# a {ZipTricks::Streamer} for adding entries to the archive and writing
|
3
5
|
# zip entry bodies.
|
@@ -13,14 +15,16 @@ class ZipTricks::RackBody
|
|
13
15
|
# estimator.add_stored_entry(filename: 'large.tif', size: 1289894)
|
14
16
|
# end
|
15
17
|
#
|
16
|
-
# # Prepare the response body. The block will only be called when the
|
18
|
+
# # Prepare the response body. The block will only be called when the
|
19
|
+
# response starts to be written.
|
17
20
|
# body = ZipTricks::RackBody.new do | streamer |
|
18
21
|
# streamer.add_stored_entry(filename: 'large.tif', size: 1289894, crc32: 198210)
|
19
22
|
# streamer << large_file.read(1024*1024) until large_file.eof?
|
20
23
|
# ...
|
21
24
|
# end
|
22
25
|
#
|
23
|
-
# return [200, {'Content-Type' => 'binary/octet-stream',
|
26
|
+
# return [200, {'Content-Type' => 'binary/octet-stream',
|
27
|
+
# 'Content-Length' => content_length.to_s}, body]
|
24
28
|
def initialize(&blk)
|
25
29
|
@archiving_block = blk
|
26
30
|
end
|
@@ -36,6 +40,5 @@ class ZipTricks::RackBody
|
|
36
40
|
# Does nothing because nothing has to be deallocated or canceled
|
37
41
|
# even if the zip output is incomplete. The archive gets closed
|
38
42
|
# automatically as part of {ZipTricks::Streamer.open}
|
39
|
-
def close
|
40
|
-
end
|
43
|
+
def close; end
|
41
44
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Should be included into a Rails controller (together with `ActionController::Live`)
|
2
4
|
# for easy ZIP output from any action.
|
3
5
|
module ZipTricks::RailsStreaming
|
@@ -10,7 +12,7 @@ module ZipTricks::RailsStreaming
|
|
10
12
|
# Create a wrapper for the write call that quacks like something you
|
11
13
|
# can << to, used by ZipTricks
|
12
14
|
w = ZipTricks::BlockWrite.new { |chunk| response.stream.write(chunk) }
|
13
|
-
ZipTricks::Streamer.open(w){|z| yield(z) }
|
15
|
+
ZipTricks::Streamer.open(w) { |z| yield(z) }
|
14
16
|
ensure
|
15
17
|
response.stream.close
|
16
18
|
end
|
data/lib/zip_tricks/remote_io.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# An object that fakes just-enough of an IO to be dangerous
|
2
4
|
# - or, more precisely, to be useful as a source for the FileReader
|
3
5
|
# central directory parser. Effectively we substitute an IO object
|
@@ -20,23 +22,25 @@ class ZipTricks::RemoteIO
|
|
20
22
|
@pos = clamp(0, offset, @remote_size)
|
21
23
|
0 # always return 0!
|
22
24
|
end
|
23
|
-
|
25
|
+
|
24
26
|
# Emulates IO#size.
|
25
27
|
#
|
26
28
|
# @return [Fixnum] the size of the remote resource
|
27
29
|
def size
|
28
30
|
@remote_size ||= request_object_size
|
29
31
|
end
|
30
|
-
|
32
|
+
|
31
33
|
# Emulates IO#read, but requires the number of bytes to read
|
32
34
|
# The read will be limited to the
|
33
35
|
# size of the remote resource relative to the current offset in the IO,
|
34
36
|
# so if you are at offset 0 in the IO of size 10, doing a `read(20)`
|
35
|
-
# will only return you 10 bytes of result, and not raise any exceptions.
|
37
|
+
# will only return you 10 bytes of result, and not raise any exceptions.
|
36
38
|
#
|
37
39
|
# @param n_bytes[Fixnum, nil] how many bytes to read, or `nil` to read all the way to the end
|
38
40
|
# @return [String] the read bytes
|
39
|
-
|
41
|
+
# Rubocop: convention: Assignment Branch Condition size for read is too high. [17.92/15]
|
42
|
+
# Rubocop: convention: Method has too many lines. [13/10]
|
43
|
+
def read(n_bytes = nil)
|
40
44
|
@remote_size ||= request_object_size
|
41
45
|
|
42
46
|
# If the resource is empty there is nothing to read
|
@@ -87,7 +91,7 @@ class ZipTricks::RemoteIO
|
|
87
91
|
|
88
92
|
private
|
89
93
|
|
90
|
-
def clamp(a,b,c)
|
94
|
+
def clamp(a, b, c)
|
91
95
|
return a if b < a
|
92
96
|
return c if b > c
|
93
97
|
b
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Alows reading the central directory of a remote ZIP file without
|
2
4
|
# downloading the entire file. The central directory provides the
|
3
5
|
# offsets at which the actual file contents is located. You can then
|
@@ -6,12 +8,15 @@
|
|
6
8
|
# Please read the security warning in `FileReader` _VERY CAREFULLY_
|
7
9
|
# before you use this module.
|
8
10
|
class ZipTricks::RemoteUncap
|
9
|
-
|
10
|
-
# @param uri[String] the HTTP(S) URL to read the ZIP footer from
|
11
|
+
# @param uri[String] the HTTP(S) URL to read the ZIP footer from
|
11
12
|
# @param reader_class[Class] which class to use for reading
|
12
|
-
# @param options_for_zip_reader[Hash] any additional options to give to
|
13
|
-
#
|
14
|
-
|
13
|
+
# @param options_for_zip_reader[Hash] any additional options to give to
|
14
|
+
# {ZipTricks::FileReader} when reading
|
15
|
+
# @return [Array<ZipTricks::FileReader::ZipEntry>] metadata about the
|
16
|
+
# files within the remote archive
|
17
|
+
def self.files_within_zip_at(uri,
|
18
|
+
reader_class: ZipTricks::FileReader,
|
19
|
+
**options_for_zip_reader)
|
15
20
|
fetcher = new(uri)
|
16
21
|
fake_io = ZipTricks::RemoteIO.new(fetcher)
|
17
22
|
reader = reader_class.new
|
@@ -1,48 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Helps to estimate archive sizes
|
2
4
|
class ZipTricks::SizeEstimator
|
3
5
|
require_relative 'streamer'
|
4
|
-
|
5
|
-
# Used to mark a couple of methods public
|
6
|
-
class DetailStreamer < ::ZipTricks::Streamer
|
7
|
-
public :add_file_and_write_local_header, :write_data_descriptor_for_last_entry
|
8
|
-
end
|
9
|
-
private_constant :DetailStreamer
|
10
|
-
|
6
|
+
|
11
7
|
# Creates a new estimator with a Streamer object. Normally you should use
|
12
8
|
# `estimate` instead an not use this method directly.
|
13
9
|
def initialize(streamer)
|
14
10
|
@streamer = streamer
|
15
11
|
end
|
16
12
|
private :initialize
|
17
|
-
|
13
|
+
|
18
14
|
# Performs the estimate using fake archiving. It needs to know the sizes of the
|
19
15
|
# entries upfront. Usage:
|
20
16
|
#
|
21
17
|
# expected_zip_size = SizeEstimator.estimate do | estimator |
|
22
18
|
# estimator.add_stored_entry(filename: "file.doc", size: 898291)
|
23
|
-
# estimator.
|
19
|
+
# estimator.add_deflated_entry(filename: "family.tif",
|
20
|
+
# uncompressed_size: 89281911, compressed_size: 121908)
|
24
21
|
# end
|
25
22
|
#
|
26
|
-
# @return [
|
23
|
+
# @return [Integer] the size of the resulting archive, in bytes
|
27
24
|
# @yield [SizeEstimator] the estimator
|
28
25
|
def self.estimate
|
29
|
-
|
30
|
-
|
31
|
-
|
26
|
+
streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter)
|
27
|
+
estimator = new(streamer)
|
28
|
+
yield(estimator)
|
29
|
+
streamer.close # Returns the .tell of the contained IO
|
32
30
|
end
|
33
31
|
|
34
32
|
# Add a fake entry to the archive, to see how big it is going to be in the end.
|
35
33
|
#
|
36
34
|
# @param filename [String] the name of the file (filenames are variable-width in the ZIP)
|
37
35
|
# @param size [Fixnum] size of the uncompressed entry
|
38
|
-
# @param use_data_descriptor[Boolean] whether the entry uses a postfix
|
36
|
+
# @param use_data_descriptor[Boolean] whether the entry uses a postfix
|
37
|
+
# data descriptor to specify size
|
39
38
|
# @return self
|
40
39
|
def add_stored_entry(filename:, size:, use_data_descriptor: false)
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
@streamer.add_stored_entry(filename: filename,
|
41
|
+
crc32: 0,
|
42
|
+
size: size,
|
43
|
+
use_data_descriptor: use_data_descriptor)
|
44
44
|
@streamer.simulate_write(size)
|
45
|
-
|
45
|
+
if use_data_descriptor
|
46
|
+
@streamer.update_last_entry_and_write_data_descriptor(crc32: 0, compressed_size: size, uncompressed_size: size)
|
47
|
+
end
|
46
48
|
self
|
47
49
|
end
|
48
50
|
|
@@ -51,24 +53,34 @@ class ZipTricks::SizeEstimator
|
|
51
53
|
# @param filename [String] the name of the file (filenames are variable-width in the ZIP)
|
52
54
|
# @param uncompressed_size [Fixnum] size of the uncompressed entry
|
53
55
|
# @param compressed_size [Fixnum] size of the compressed entry
|
54
|
-
# @param use_data_descriptor[Boolean] whether the entry uses a postfix data
|
56
|
+
# @param use_data_descriptor[Boolean] whether the entry uses a postfix data
|
57
|
+
# descriptor to specify size
|
55
58
|
# @return self
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
|
59
|
+
def add_deflated_entry(filename:, uncompressed_size:, compressed_size:, use_data_descriptor: false)
|
60
|
+
@streamer.add_deflated_entry(filename: filename,
|
61
|
+
crc32: 0,
|
62
|
+
compressed_size: compressed_size,
|
63
|
+
uncompressed_size: uncompressed_size,
|
64
|
+
use_data_descriptor: use_data_descriptor)
|
65
|
+
|
60
66
|
@streamer.simulate_write(compressed_size)
|
61
|
-
|
67
|
+
if use_data_descriptor
|
68
|
+
@streamer.update_last_entry_and_write_data_descriptor(crc32: 0,
|
69
|
+
compressed_size: compressed_size,
|
70
|
+
uncompressed_size: uncompressed_size)
|
71
|
+
end
|
62
72
|
self
|
63
73
|
end
|
64
|
-
|
74
|
+
|
75
|
+
# Will be phased out in ZipTricks 5.x
|
76
|
+
alias_method :add_compressed_entry, :add_deflated_entry
|
77
|
+
|
65
78
|
# Add an empty directory to the archive.
|
66
79
|
#
|
67
80
|
# @param dirname [String] the name of the directory
|
68
81
|
# @return self
|
69
82
|
def add_empty_directory_entry(dirname:)
|
70
|
-
@streamer.
|
71
|
-
compressed_size: 0, uncompressed_size: 0)
|
83
|
+
@streamer.add_empty_directory(dirname: dirname)
|
72
84
|
self
|
73
85
|
end
|
74
86
|
end
|
data/lib/zip_tricks/streamer.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
|
1
5
|
# Is used to write streamed ZIP archives into the provided IO-ish object.
|
2
6
|
# The output IO is never going to be rewound or seeked, so the output
|
3
|
-
# of this object can be coupled directly to, say, a Rack output.
|
7
|
+
# of this object can be coupled directly to, say, a Rack output. The
|
8
|
+
# output can also be a String, Array or anything that responds to `<<`.
|
4
9
|
#
|
5
10
|
# Allows for splicing raw files (for "stored" entries without compression)
|
6
11
|
# and splicing of deflated files (for "deflated" storage mode).
|
@@ -16,7 +21,7 @@
|
|
16
21
|
# You can use the Streamer with data descriptors (the CRC32 and the sizes will be
|
17
22
|
# written after the file data). This allows non-rewinding on-the-fly compression.
|
18
23
|
# If you are compressing large files, the Deflater object that the Streamer controls
|
19
|
-
# will be regularly flushed to prevent memory inflation.
|
24
|
+
# will be regularly flushed to prevent memory inflation.
|
20
25
|
#
|
21
26
|
# ZipTricks::Streamer.open(file_socket_or_string) do |zip|
|
22
27
|
# zip.write_stored_file('mov.mp4') do |sink|
|
@@ -51,7 +56,28 @@
|
|
51
56
|
# so far. When using `sendfile` the Ruby write methods get bypassed entirely, and the
|
52
57
|
# offsets in the IO will not be updated - which will result in an invalid ZIP.
|
53
58
|
#
|
54
|
-
#
|
59
|
+
#
|
60
|
+
# ## On-the-fly deflate -using the Streamer with async/suspended writes and data descriptors
|
61
|
+
#
|
62
|
+
# If you are unable to use the block versions of `write_deflated_file` and `write_stored_file`
|
63
|
+
# there is an option to use a separate writer object. It gets returned from `write_deflated_file`
|
64
|
+
# and `write_stored_file` if you do not provide them with a block, and will accept data writes.
|
65
|
+
#
|
66
|
+
# ZipTricks::Streamer.open(socket) do | zip |
|
67
|
+
# w = zip.write_stored_file('mov.mp4')
|
68
|
+
# w << data
|
69
|
+
# w.close
|
70
|
+
# end
|
71
|
+
#
|
72
|
+
# The central directory will be written automatically at the end of the `open` block. If you need
|
73
|
+
# to manage the Streamer manually, or defer the central directory write until appropriate, use
|
74
|
+
# the constructor instead and call `Streamer#close`:
|
75
|
+
#
|
76
|
+
# zip = ZipTricks::Streamer.new(out_io)
|
77
|
+
# .....
|
78
|
+
# zip.close
|
79
|
+
#
|
80
|
+
# Calling {Streamer#close} **will not** call `#close` on the underlying IO object.
|
55
81
|
class ZipTricks::Streamer
|
56
82
|
require_relative 'streamer/deflated_writer'
|
57
83
|
require_relative 'streamer/writable'
|
@@ -82,16 +108,13 @@ class ZipTricks::Streamer
|
|
82
108
|
|
83
109
|
# Creates a new Streamer on top of the given IO-ish object.
|
84
110
|
#
|
85
|
-
# @param stream[IO] the destination IO for the ZIP
|
111
|
+
# @param stream[IO] the destination IO for the ZIP. Anything that responds to `<<` can be used.
|
86
112
|
# @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
|
87
113
|
# Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
|
88
114
|
def initialize(stream, writer: create_writer)
|
89
|
-
raise InvalidOutput,
|
90
|
-
unless stream.respond_to?(:tell) && stream.respond_to?(:advance_position_by)
|
91
|
-
stream = ZipTricks::WriteAndTell.new(stream)
|
92
|
-
end
|
115
|
+
raise InvalidOutput, 'The stream must respond to #<<' unless stream.respond_to?(:<<)
|
93
116
|
|
94
|
-
@out = stream
|
117
|
+
@out = ZipTricks::WriteAndTell.new(stream)
|
95
118
|
@files = []
|
96
119
|
@local_header_offsets = []
|
97
120
|
@filenames_set = Set.new
|
@@ -112,114 +135,189 @@ class ZipTricks::Streamer
|
|
112
135
|
# `IO.copy_stream(from, to)`.
|
113
136
|
#
|
114
137
|
# @param binary_data [String] a String in binary encoding
|
115
|
-
# @return [
|
138
|
+
# @return [Integer] the number of bytes written
|
116
139
|
def write(binary_data)
|
117
140
|
@out << binary_data
|
118
141
|
binary_data.bytesize
|
119
142
|
end
|
120
143
|
|
121
|
-
# Advances the internal IO pointer to keep the offsets of the ZIP file in
|
122
|
-
#
|
144
|
+
# Advances the internal IO pointer to keep the offsets of the ZIP file in
|
145
|
+
# check. Use this if you are going to use accelerated writes to the socket
|
146
|
+
# (like the `sendfile()` call) after writing the headers, or if you
|
123
147
|
# just need to figure out the size of the archive.
|
124
148
|
#
|
125
|
-
# @param num_bytes [
|
126
|
-
# @return [
|
149
|
+
# @param num_bytes [Integer] how many bytes are going to be written bypassing the Streamer
|
150
|
+
# @return [Integer] position in the output stream / ZIP archive
|
127
151
|
def simulate_write(num_bytes)
|
128
152
|
@out.advance_position_by(num_bytes)
|
129
153
|
@out.tell
|
130
154
|
end
|
131
155
|
|
132
|
-
# Writes out the local header for an entry (file in the ZIP) that is using
|
133
|
-
#
|
156
|
+
# Writes out the local header for an entry (file in the ZIP) that is using
|
157
|
+
# the deflated storage model (is compressed). Once this method is called,
|
158
|
+
# the `<<` method has to be called to write the actual contents of the body.
|
134
159
|
#
|
135
|
-
# Note that the deflated body that is going to be written into the output
|
136
|
-
#
|
160
|
+
# Note that the deflated body that is going to be written into the output
|
161
|
+
# has to be _precompressed_ (pre-deflated) before writing it into the
|
162
|
+
# Streamer, because otherwise it is impossible to know it's size upfront.
|
137
163
|
#
|
138
164
|
# @param filename [String] the name of the file in the entry
|
139
|
-
# @param compressed_size [
|
140
|
-
#
|
141
|
-
# @param
|
142
|
-
# @
|
143
|
-
|
144
|
-
|
145
|
-
|
165
|
+
# @param compressed_size [Integer] the size of the compressed entry that
|
166
|
+
# is going to be written into the archive
|
167
|
+
# @param uncompressed_size [Integer] the size of the entry when uncompressed, in bytes
|
168
|
+
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
169
|
+
# @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor
|
170
|
+
# @return [Integer] the offset the output IO is at after writing the entry header
|
171
|
+
def add_deflated_entry(filename:, compressed_size: 0, uncompressed_size: 0, crc32: 0, use_data_descriptor: false)
|
172
|
+
add_file_and_write_local_header(filename: filename, crc32: crc32,
|
173
|
+
storage_mode: DEFLATED,
|
174
|
+
compressed_size: compressed_size,
|
175
|
+
uncompressed_size: uncompressed_size,
|
176
|
+
use_data_descriptor: use_data_descriptor)
|
146
177
|
@out.tell
|
147
178
|
end
|
148
179
|
|
149
|
-
#
|
150
|
-
|
180
|
+
# Will be phased out in ZipTricks 5.x
|
181
|
+
alias_method :add_compressed_entry, :add_deflated_entry
|
182
|
+
|
183
|
+
# Writes out the local header for an entry (file in the ZIP) that is using
|
184
|
+
# the stored storage model (is stored as-is).
|
185
|
+
# Once this method is called, the `<<` method has to be called one or more
|
186
|
+
# times to write the actual contents of the body.
|
151
187
|
#
|
152
188
|
# @param filename [String] the name of the file in the entry
|
153
|
-
# @param size [
|
154
|
-
# @param crc32 [
|
155
|
-
# @
|
156
|
-
|
157
|
-
|
158
|
-
|
189
|
+
# @param size [Integer] the size of the file when uncompressed, in bytes
|
190
|
+
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
191
|
+
# @param use_data_descriptor [Boolean] whether the entry body will be followed by a data descriptor. When in use
|
192
|
+
# @return [Integer] the offset the output IO is at after writing the entry header
|
193
|
+
def add_stored_entry(filename:, size: 0, crc32: 0, use_data_descriptor: false)
|
194
|
+
add_file_and_write_local_header(filename: filename,
|
195
|
+
crc32: crc32,
|
196
|
+
storage_mode: STORED,
|
197
|
+
compressed_size: size,
|
198
|
+
uncompressed_size: size,
|
199
|
+
use_data_descriptor: use_data_descriptor)
|
159
200
|
@out.tell
|
160
201
|
end
|
161
202
|
|
162
203
|
# Adds an empty directory to the archive with a size of 0 and permissions of 755.
|
163
204
|
#
|
164
205
|
# @param dirname [String] the name of the directory in the archive
|
165
|
-
# @return [
|
206
|
+
# @return [Integer] the offset the output IO is at after writing the entry header
|
166
207
|
def add_empty_directory(dirname:)
|
167
|
-
add_file_and_write_local_header(filename:
|
168
|
-
|
208
|
+
add_file_and_write_local_header(filename: dirname.to_s + '/',
|
209
|
+
crc32: 0,
|
210
|
+
storage_mode: STORED,
|
211
|
+
compressed_size: 0,
|
212
|
+
uncompressed_size: 0,
|
213
|
+
use_data_descriptor: false)
|
169
214
|
@out.tell
|
170
215
|
end
|
171
|
-
|
172
|
-
# Opens the stream for a stored file in the archive, and yields a writer
|
173
|
-
#
|
174
|
-
#
|
216
|
+
|
217
|
+
# Opens the stream for a stored file in the archive, and yields a writer
|
218
|
+
# for that file to the block.
|
219
|
+
# Once the write completes, a data descriptor will be written with the
|
220
|
+
# actual compressed/uncompressed sizes and the CRC32 checksum.
|
221
|
+
#
|
222
|
+
# Using a block, the write will be terminated with a data descriptor outright.
|
223
|
+
#
|
224
|
+
# zip.write_stored_file("foo.txt") do |sink|
|
225
|
+
# IO.copy_stream(source_file, sink)
|
226
|
+
# end
|
227
|
+
#
|
228
|
+
# If deferred writes are desired (for example - to integerate with an API that
|
229
|
+
# does not support blocks, or to work with non-blocking environments) the method
|
230
|
+
# has to be called without a block. In that case it returns the sink instead,
|
231
|
+
# permitting to write to it in a deferred fashion. When `close` is called on
|
232
|
+
# the sink, any remanining compression output will be flushed and the data
|
233
|
+
# descriptor is going to be written.
|
234
|
+
#
|
235
|
+
# Note that even though it does not have to happen within the same call stack,
|
236
|
+
# call sequencing still must be observed. It is therefore not possible to do
|
237
|
+
# this:
|
238
|
+
#
|
239
|
+
# writer_for_file1 = zip.write_stored_file("somefile.jpg")
|
240
|
+
# writer_for_file2 = zip.write_stored_file("another.tif")
|
241
|
+
# writer_for_file1 << data
|
242
|
+
# writer_for_file2 << data
|
243
|
+
#
|
244
|
+
# because it is likely to result in an invalid ZIP file structure later on.
|
245
|
+
# So using this facility in async scenarios is certainly possible, but care
|
246
|
+
# and attention is recommended.
|
175
247
|
#
|
176
248
|
# @param filename[String] the name of the file in the archive
|
177
|
-
# @yield [#<<, #write] an object that the file contents must be written to
|
249
|
+
# @yield [#<<, #write] an object that the file contents must be written to that will be automatically closed
|
250
|
+
# @return [#<<, #write, #close] an object that the file contents must be written to, has to be closed manually
|
178
251
|
def write_stored_file(filename)
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
yield(Writable.new(w))
|
184
|
-
crc, comp, uncomp = w.finish
|
185
|
-
|
186
|
-
# Save the information into the entry for when the time comes to write out the central directory
|
187
|
-
last_entry = @files.last
|
188
|
-
last_entry.crc32 = crc
|
189
|
-
last_entry.compressed_size = comp
|
190
|
-
last_entry.uncompressed_size = uncomp
|
252
|
+
add_stored_entry(filename: filename,
|
253
|
+
use_data_descriptor: true,
|
254
|
+
crc32: 0,
|
255
|
+
size: 0)
|
191
256
|
|
192
|
-
|
257
|
+
writable = Writable.new(self, StoredWriter.new(@out))
|
258
|
+
if block_given?
|
259
|
+
yield(writable)
|
260
|
+
writable.close
|
261
|
+
end
|
262
|
+
writable
|
193
263
|
end
|
194
264
|
|
195
|
-
# Opens the stream for a deflated file in the archive, and yields a writer
|
196
|
-
# Once the write completes, a data descriptor
|
197
|
-
# sizes and the
|
265
|
+
# Opens the stream for a deflated file in the archive, and yields a writer
|
266
|
+
# for that file to the block. Once the write completes, a data descriptor
|
267
|
+
# will be written with the actual compressed/uncompressed sizes and the
|
268
|
+
# CRC32 checksum.
|
269
|
+
#
|
270
|
+
# Using a block, the write will be terminated with a data descriptor outright.
|
271
|
+
#
|
272
|
+
# zip.write_stored_file("foo.txt") do |sink|
|
273
|
+
# IO.copy_stream(source_file, sink)
|
274
|
+
# end
|
275
|
+
#
|
276
|
+
# If deferred writes are desired (for example - to integerate with an API that
|
277
|
+
# does not support blocks, or to work with non-blocking environments) the method
|
278
|
+
# has to be called without a block. In that case it returns the sink instead,
|
279
|
+
# permitting to write to it in a deferred fashion. When `close` is called on
|
280
|
+
# the sink, any remanining compression output will be flushed and the data
|
281
|
+
# descriptor is going to be written.
|
282
|
+
#
|
283
|
+
# Note that even though it does not have to happen within the same call stack,
|
284
|
+
# call sequencing still must be observed. It is therefore not possible to do
|
285
|
+
# this:
|
286
|
+
#
|
287
|
+
# writer_for_file1 = zip.write_deflated_file("somefile.jpg")
|
288
|
+
# writer_for_file2 = zip.write_deflated_file("another.tif")
|
289
|
+
# writer_for_file1 << data
|
290
|
+
# writer_for_file2 << data
|
291
|
+
# writer_for_file1.close
|
292
|
+
# writer_for_file2.close
|
293
|
+
#
|
294
|
+
# because it is likely to result in an invalid ZIP file structure later on.
|
295
|
+
# So using this facility in async scenarios is certainly possible, but care
|
296
|
+
# and attention is recommended.
|
198
297
|
#
|
199
298
|
# @param filename[String] the name of the file in the archive
|
200
299
|
# @yield [#<<, #write] an object that the file contents must be written to
|
201
300
|
def write_deflated_file(filename)
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
write_data_descriptor_for_last_entry
|
301
|
+
add_deflated_entry(filename: filename,
|
302
|
+
use_data_descriptor: true,
|
303
|
+
crc32: 0,
|
304
|
+
compressed_size: 0,
|
305
|
+
uncompressed_size: 0)
|
306
|
+
|
307
|
+
writable = Writable.new(self, DeflatedWriter.new(@out))
|
308
|
+
if block_given?
|
309
|
+
yield(writable)
|
310
|
+
writable.close
|
311
|
+
end
|
312
|
+
writable
|
215
313
|
end
|
216
|
-
|
314
|
+
|
217
315
|
# Closes the archive. Writes the central directory, and switches the writer into
|
218
316
|
# a state where it can no longer be written to.
|
219
317
|
#
|
220
318
|
# Once this method is called, the `Streamer` should be discarded (the ZIP archive is complete).
|
221
319
|
#
|
222
|
-
# @return [
|
320
|
+
# @return [Integer] the offset the output IO is at after closing the archive
|
223
321
|
def close
|
224
322
|
# Record the central directory offset, so that it can be written into the EOCD record
|
225
323
|
cdir_starts_at = @out.tell
|
@@ -227,18 +325,31 @@ class ZipTricks::Streamer
|
|
227
325
|
# Write out the central directory entries, one for each file
|
228
326
|
@files.each_with_index do |entry, i|
|
229
327
|
header_loc = @local_header_offsets.fetch(i)
|
230
|
-
@writer.write_central_directory_file_header(io: @out,
|
231
|
-
|
232
|
-
|
233
|
-
|
328
|
+
@writer.write_central_directory_file_header(io: @out,
|
329
|
+
local_file_header_location: header_loc,
|
330
|
+
gp_flags: entry.gp_flags,
|
331
|
+
storage_mode: entry.storage_mode,
|
332
|
+
compressed_size: entry.compressed_size,
|
333
|
+
uncompressed_size: entry.uncompressed_size,
|
334
|
+
mtime: entry.mtime,
|
335
|
+
crc32: entry.crc32,
|
336
|
+
filename: entry.filename)
|
234
337
|
end
|
235
338
|
|
236
339
|
# Record the central directory size, for the EOCDR
|
237
340
|
cdir_size = @out.tell - cdir_starts_at
|
238
341
|
|
239
342
|
# Write out the EOCDR
|
240
|
-
@writer.
|
241
|
-
|
343
|
+
@writer.write_end_of_central_directory(io: @out,
|
344
|
+
start_of_central_directory_location: cdir_starts_at,
|
345
|
+
central_directory_size: cdir_size,
|
346
|
+
num_files_in_archive: @files.length)
|
347
|
+
|
348
|
+
# Clear the files so that GC will not have to trace all the way to here to deallocate them
|
349
|
+
@files.clear
|
350
|
+
@filenames_set.clear
|
351
|
+
|
352
|
+
# and return the final offset
|
242
353
|
@out.tell
|
243
354
|
end
|
244
355
|
|
@@ -251,29 +362,73 @@ class ZipTricks::Streamer
|
|
251
362
|
ZipTricks::ZipWriter.new
|
252
363
|
end
|
253
364
|
|
365
|
+
# Updates the last entry written with the CRC32 checksum and compressed/uncompressed
|
366
|
+
# sizes. For stored entries, `compressed_size` and `uncompressed_size` are the same.
|
367
|
+
# After updating the entry will immediately write the data descriptor bytes
|
368
|
+
# to the output.
|
369
|
+
#
|
370
|
+
# @param crc32 [Integer] the CRC32 checksum of the entry when uncompressed
|
371
|
+
# @param compressed_size [Integer] the size of the compressed segment within the ZIP
|
372
|
+
# @param uncompressed_size [Integer] the size of the entry once uncompressed
|
373
|
+
# @return [Integer] the offset the output IO is at after writing the data descriptor
|
374
|
+
def update_last_entry_and_write_data_descriptor(crc32:, compressed_size:, uncompressed_size:)
|
375
|
+
# Save the information into the entry for when the time comes to write
|
376
|
+
# out the central directory
|
377
|
+
last_entry = @files.fetch(-1)
|
378
|
+
last_entry.crc32 = crc32
|
379
|
+
last_entry.compressed_size = compressed_size
|
380
|
+
last_entry.uncompressed_size = uncompressed_size
|
381
|
+
|
382
|
+
@writer.write_data_descriptor(io: @out,
|
383
|
+
crc32: last_entry.crc32,
|
384
|
+
compressed_size: last_entry.compressed_size,
|
385
|
+
uncompressed_size: last_entry.uncompressed_size)
|
386
|
+
@out.tell
|
387
|
+
end
|
388
|
+
|
254
389
|
private
|
255
390
|
|
256
|
-
def add_file_and_write_local_header(filename:,
|
257
|
-
|
391
|
+
def add_file_and_write_local_header(filename:,
|
392
|
+
crc32:,
|
393
|
+
storage_mode:,
|
394
|
+
compressed_size:,
|
395
|
+
uncompressed_size:,
|
396
|
+
use_data_descriptor:)
|
258
397
|
|
259
398
|
# Clean backslashes and uniqify filenames if there are duplicates
|
260
399
|
filename = remove_backslash(filename)
|
261
400
|
filename = uniquify_name(filename) if @filenames_set.include?(filename)
|
262
401
|
|
263
|
-
|
264
|
-
|
402
|
+
unless [STORED, DEFLATED].include?(storage_mode)
|
403
|
+
raise UnknownMode, "Unknown compression mode #{storage_mode}"
|
404
|
+
end
|
405
|
+
|
406
|
+
raise Overflow, 'Filename is too long' if filename.bytesize > 0xFFFF
|
265
407
|
|
266
|
-
|
408
|
+
if use_data_descriptor
|
409
|
+
crc32 = 0
|
410
|
+
compressed_size = 0
|
411
|
+
uncompressed_size = 0
|
412
|
+
end
|
413
|
+
|
414
|
+
e = Entry.new(filename,
|
415
|
+
crc32,
|
416
|
+
compressed_size,
|
417
|
+
uncompressed_size,
|
418
|
+
storage_mode,
|
419
|
+
mtime = Time.now.utc,
|
420
|
+
use_data_descriptor)
|
267
421
|
@files << e
|
268
422
|
@filenames_set << e.filename
|
269
423
|
@local_header_offsets << @out.tell
|
270
|
-
@writer.write_local_file_header(io: @out,
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
424
|
+
@writer.write_local_file_header(io: @out,
|
425
|
+
gp_flags: e.gp_flags,
|
426
|
+
crc32: e.crc32,
|
427
|
+
compressed_size: e.compressed_size,
|
428
|
+
uncompressed_size: e.uncompressed_size,
|
429
|
+
mtime: e.mtime,
|
430
|
+
filename: e.filename,
|
431
|
+
storage_mode: e.storage_mode)
|
277
432
|
end
|
278
433
|
|
279
434
|
def remove_backslash(filename)
|
@@ -281,8 +436,9 @@ class ZipTricks::Streamer
|
|
281
436
|
end
|
282
437
|
|
283
438
|
def uniquify_name(filename)
|
284
|
-
|
285
|
-
|
439
|
+
# we add (1), (2), (n) at the end of a filename if there is a duplicate
|
440
|
+
copy_pattern = /\((\d+)\)$/
|
441
|
+
parts = filename.split('.')
|
286
442
|
ext = if parts.last =~ /gz|zip/ && parts.size > 2
|
287
443
|
parts.pop(2)
|
288
444
|
elsif parts.size > 1
|
@@ -292,12 +448,12 @@ class ZipTricks::Streamer
|
|
292
448
|
|
293
449
|
duplicate_counter = 1
|
294
450
|
loop do
|
295
|
-
if fn_last_part =~ copy_pattern
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
new_filename = (parts + [fn_last_part, ext]).compact.join(
|
451
|
+
fn_last_part = if fn_last_part =~ copy_pattern
|
452
|
+
fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
|
453
|
+
else
|
454
|
+
"#{fn_last_part} (#{duplicate_counter})"
|
455
|
+
end
|
456
|
+
new_filename = (parts + [fn_last_part, ext]).compact.join('.')
|
301
457
|
return new_filename unless @filenames_set.include?(new_filename)
|
302
458
|
duplicate_counter += 1
|
303
459
|
end
|