zip_tricks 4.4.2 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.codeclimate.yml +7 -0
- data/.gitignore +6 -0
- data/.rubocop.yml +79 -0
- data/.rubocop_todo.yml +43 -0
- data/.travis.yml +3 -1
- data/CHANGELOG.md +9 -0
- data/Rakefile +7 -4
- data/examples/archive_size_estimate.rb +8 -6
- data/examples/config.ru +3 -1
- data/examples/parallel_compression_with_block_deflate.rb +31 -20
- data/examples/rack_application.rb +25 -17
- data/lib/zip_tricks.rb +4 -2
- data/lib/zip_tricks/block_deflate.rb +43 -25
- data/lib/zip_tricks/block_write.rb +20 -10
- data/lib/zip_tricks/file_reader.rb +241 -145
- data/lib/zip_tricks/file_reader/inflating_reader.rb +4 -1
- data/lib/zip_tricks/file_reader/stored_reader.rb +4 -1
- data/lib/zip_tricks/null_writer.rb +5 -5
- data/lib/zip_tricks/rack_body.rb +7 -4
- data/lib/zip_tricks/rails_streaming.rb +3 -1
- data/lib/zip_tricks/remote_io.rb +9 -5
- data/lib/zip_tricks/remote_uncap.rb +10 -5
- data/lib/zip_tricks/size_estimator.rb +39 -27
- data/lib/zip_tricks/stream_crc32.rb +2 -0
- data/lib/zip_tricks/streamer.rb +254 -98
- data/lib/zip_tricks/streamer/deflated_writer.rb +6 -9
- data/lib/zip_tricks/streamer/entry.rb +11 -3
- data/lib/zip_tricks/streamer/stored_writer.rb +5 -7
- data/lib/zip_tricks/streamer/writable.rb +30 -7
- data/lib/zip_tricks/version.rb +3 -1
- data/lib/zip_tricks/write_and_tell.rb +2 -0
- data/lib/zip_tricks/zip_writer.rb +54 -44
- data/testing/generate_test_files.rb +68 -38
- data/testing/support.rb +21 -16
- data/testing/test-report.txt +28 -0
- data/zip_tricks.gemspec +24 -22
- metadata +23 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dd1865613940a7038642206baf6cd58457b87ff5
|
4
|
+
data.tar.gz: 3ebda3c665c9671d9f033d6f768ccdc800d7f2f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c16f15ab10788d90fe96e61e8ecf20a20eee6a7e94d2d22538c34964d51eb9ee4cc64459dd46997f2b994212944e34e8162d7880ff4d79343c86a5d1ed09fd77
|
7
|
+
data.tar.gz: da1d6e24031365ea67fc2fa0fbf3096f57b007c6ac35449b57b1783eecfa703d530c1c695429335bee252a841da6722f3bca341bd6fc8c5e9e24bf6379edee76
|
data/.codeclimate.yml
ADDED
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
Metrics/LineLength:
|
4
|
+
Max: 120
|
5
|
+
|
6
|
+
AllCops:
|
7
|
+
Exclude:
|
8
|
+
- spec/**/*.rb
|
9
|
+
- zip_tricks.gemspec
|
10
|
+
- Gemfile
|
11
|
+
- Rakefile
|
12
|
+
- vendor/**/*
|
13
|
+
- julik_scratchpad/**/*
|
14
|
+
|
15
|
+
Style/ClassAndModuleChildren:
|
16
|
+
Enabled:
|
17
|
+
false
|
18
|
+
|
19
|
+
# "begin; ... rescue; end " without saying that we only rescue StandardError and below.
|
20
|
+
# Can be debated, but I think it is reasonable to expect any Rubyist to know what errors
|
21
|
+
# the standard rescue clause covers.
|
22
|
+
Lint/RescueWithoutErrorClass:
|
23
|
+
Enabled: false
|
24
|
+
|
25
|
+
Lint/AssignmentInCondition:
|
26
|
+
Enabled: false
|
27
|
+
|
28
|
+
# We use % all over and nothing ever came out of it
|
29
|
+
Style/FormatString:
|
30
|
+
Enabled: false
|
31
|
+
|
32
|
+
# We have classes with methods having long bodies. We apply a lot of literal small
|
33
|
+
# operations which have to be applied in one locality specifically. Additionally,
|
34
|
+
# class length is a bit of a boilerplate metric
|
35
|
+
Metrics/ClassLength:
|
36
|
+
Enabled: false
|
37
|
+
|
38
|
+
# Idem. We have quite some methods that do a lot of small writes/reads in rapid
|
39
|
+
# succession - not because those methods have high cyclomatic complexity, but
|
40
|
+
# because they perform a single operation consisting of many sequential small
|
41
|
+
# ones.
|
42
|
+
Metrics/MethodLength:
|
43
|
+
Enabled: false
|
44
|
+
|
45
|
+
Metrics/AbcSize:
|
46
|
+
Exclude:
|
47
|
+
- examples/rack_application.rb
|
48
|
+
- lib/zip_tricks/file_reader.rb
|
49
|
+
- lib/zip_tricks/remote_io.rb
|
50
|
+
- lib/zip_tricks/streamer.rb
|
51
|
+
- testing/support.rb
|
52
|
+
|
53
|
+
Metrics/BlockLength:
|
54
|
+
Exclude:
|
55
|
+
- lib/zip_tricks/file_reader.rb
|
56
|
+
|
57
|
+
Layout/MultilineOperationIndentation:
|
58
|
+
Exclude:
|
59
|
+
- lib/zip_tricks/file_reader.rb
|
60
|
+
|
61
|
+
Metrics/ParameterLists:
|
62
|
+
Exclude:
|
63
|
+
- lib/zip_tricks/streamer.rb
|
64
|
+
|
65
|
+
Style/GlobalVars:
|
66
|
+
Exclude:
|
67
|
+
- testing/generate_test_files.rb
|
68
|
+
- testing/support.rb
|
69
|
+
|
70
|
+
# The advice this cop gives varies depending on the Ruby version,
|
71
|
+
# so no - I will not be following two conflicting recommendations.
|
72
|
+
Style/MutableConstant:
|
73
|
+
Enabled: false
|
74
|
+
|
75
|
+
Layout/SpaceInsideHashLiteralBraces:
|
76
|
+
EnforcedStyle: no_space
|
77
|
+
|
78
|
+
Style/Alias:
|
79
|
+
EnforcedStyle: prefer_alias_method
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2017-09-11 13:38:55 +0200 using RuboCop version 0.49.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 6
|
10
|
+
Lint/UselessAssignment:
|
11
|
+
Exclude:
|
12
|
+
- 'lib/zip_tricks/streamer.rb'
|
13
|
+
- 'spec/zip_tricks/block_deflate_spec.rb'
|
14
|
+
- 'spec/zip_tricks/file_reader_spec.rb'
|
15
|
+
- 'spec/zip_tricks/remote_uncap_spec.rb'
|
16
|
+
- 'spec/zip_tricks/stream_crc32_spec.rb'
|
17
|
+
- 'spec/zip_tricks/streamer_spec.rb'
|
18
|
+
|
19
|
+
# Offense count: 1
|
20
|
+
Metrics/PerceivedComplexity:
|
21
|
+
Max: 8
|
22
|
+
|
23
|
+
# Offense count: 4
|
24
|
+
Style/Documentation:
|
25
|
+
Exclude:
|
26
|
+
- 'spec/**/*'
|
27
|
+
- 'test/**/*'
|
28
|
+
- 'lib/zip_tricks/block_deflate.rb'
|
29
|
+
- 'lib/zip_tricks/block_write.rb'
|
30
|
+
- 'lib/zip_tricks/file_reader.rb'
|
31
|
+
- 'lib/zip_tricks/streamer/deflated_writer.rb'
|
32
|
+
|
33
|
+
# Offense count: 1
|
34
|
+
# Configuration parameters: MinBodyLength.
|
35
|
+
Style/GuardClause:
|
36
|
+
Exclude:
|
37
|
+
- 'lib/zip_tricks/file_reader.rb'
|
38
|
+
|
39
|
+
# Offense count: 2
|
40
|
+
Style/StructInheritance:
|
41
|
+
Exclude:
|
42
|
+
- 'lib/zip_tricks/streamer/entry.rb'
|
43
|
+
- 'spec/zip_tricks/zip_writer_spec.rb'
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 4.5.0
|
2
|
+
|
3
|
+
* Rename `Streamer#add_compressed_entry` and `SizeEstimator#add_compressed_entry` to `add_deflated_entry`
|
4
|
+
to indicate the type of compression that is going to get used.
|
5
|
+
* Make `Streamer#write_(deflated|stored)_file` return a writable object that can be `.close`d, to
|
6
|
+
permit usage of those methods in situations where suspending a block is inconvenient (make deferred writing possible).
|
7
|
+
* Fix CRC32 checksums in `Streamer#write_deflated_file`
|
8
|
+
* Add `Streamer#update_last_entry_and_write_data_descriptor` to permit externally-driven flows that use data descriptors
|
9
|
+
|
1
10
|
## 4.4.2
|
2
11
|
|
3
12
|
* Add 2.4 to Travis rubies
|
data/Rakefile
CHANGED
@@ -1,12 +1,15 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
3
|
require 'yard'
|
4
|
+
require 'rubocop/rake_task'
|
4
5
|
|
5
6
|
YARD::Rake::YardocTask.new(:doc) do |t|
|
6
|
-
# The dash has to be between the two to "divide" the source files and
|
7
|
+
# The dash has to be between the two to "divide" the source files and
|
7
8
|
# miscellaneous documentation files that contain no code
|
8
9
|
t.files = ['lib/**/*.rb', '-', 'LICENSE.txt', 'IMPLEMENTATION_DETAILS.md']
|
9
10
|
end
|
10
11
|
|
11
12
|
RSpec::Core::RakeTask.new(:spec)
|
12
|
-
task :
|
13
|
+
task default: :spec
|
14
|
+
|
15
|
+
RuboCop::RakeTask.new
|
@@ -1,13 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../lib/zip_tricks'
|
2
4
|
|
3
|
-
# Predict how large a ZIP file is going to be without having access to
|
4
|
-
# file contents, but using just the filenames (influences the
|
5
|
-
# of the files
|
5
|
+
# Predict how large a ZIP file is going to be without having access to
|
6
|
+
# the actual file contents, but using just the filenames (influences the
|
7
|
+
# file size) and the size of the files
|
6
8
|
zip_archive_size_in_bytes = ZipTricks::SizeEstimator.estimate do |zip|
|
7
9
|
# Pretend we are going to make a ZIP file which contains a few
|
8
10
|
# MP4 files (those do not compress all too well)
|
9
|
-
zip.add_stored_entry(filename:
|
10
|
-
zip.add_stored_entry(filename:
|
11
|
+
zip.add_stored_entry(filename: 'MOV_1234.MP4', size: 898_090)
|
12
|
+
zip.add_stored_entry(filename: 'MOV_1235.MP4', size: 7_855_126)
|
11
13
|
end
|
12
14
|
|
13
|
-
puts zip_archive_size_in_bytes #=>
|
15
|
+
puts zip_archive_size_in_bytes #=> 8_753_467
|
data/examples/config.ru
CHANGED
@@ -1,50 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../lib/zip_tricks'
|
2
4
|
require 'tempfile'
|
3
5
|
|
4
6
|
# This shows how to perform compression in parallel (a-la pigz, but in a less
|
5
|
-
# advanced fashion since the compression tables are not shared - to
|
7
|
+
# advanced fashion since the compression tables are not shared - to
|
8
|
+
# minimize shared state).
|
6
9
|
#
|
7
|
-
# When using this approach, compressing a large file can be performed as a
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# and
|
11
|
-
#
|
10
|
+
# When using this approach, compressing a large file can be performed as a
|
11
|
+
# map-reduce operation.
|
12
|
+
# First you prepare all the data per part of your (potentially very large) file,
|
13
|
+
# and then you use the reduce task to combine that data into one linear zip.
|
14
|
+
# In this example we will generate threads and collect their return values in
|
15
|
+
# the order the threads were launched, which guarantees a consistent reduce.
|
12
16
|
#
|
13
17
|
# So, let each thread generate a part of the file, and also
|
14
18
|
# compute the CRC32 of it. The thread will compress it's own part
|
15
|
-
# as well, in an independent deflate segment - the threads do not share
|
16
|
-
# multiplex this over multiple processes or
|
19
|
+
# as well, in an independent deflate segment - the threads do not share
|
20
|
+
# anything. You could also multiplex this over multiple processes or
|
21
|
+
# even machines.
|
17
22
|
threads = (0..12).map do
|
18
23
|
Thread.new do
|
19
24
|
source_tempfile = Tempfile.new 't'
|
20
25
|
source_tempfile.binmode
|
21
|
-
|
26
|
+
|
22
27
|
# Fill the part with random content
|
23
28
|
12.times { source_tempfile << Random.new.bytes(1 * 1024 * 1024) }
|
24
29
|
source_tempfile.rewind
|
25
|
-
|
30
|
+
|
26
31
|
# Compute the CRC32 of the source file
|
27
32
|
part_crc = ZipTricks::StreamCRC32.from_io(source_tempfile)
|
28
33
|
source_tempfile.rewind
|
29
|
-
|
34
|
+
|
30
35
|
# Create a compressed part
|
31
36
|
compressed_tempfile = Tempfile.new('tc')
|
32
37
|
compressed_tempfile.binmode
|
33
|
-
ZipTricks::BlockDeflate.deflate_in_blocks(source_tempfile,
|
34
|
-
|
38
|
+
ZipTricks::BlockDeflate.deflate_in_blocks(source_tempfile,
|
39
|
+
compressed_tempfile)
|
40
|
+
|
35
41
|
source_tempfile.close!
|
36
42
|
# The data that the splicing process needs.
|
37
43
|
[compressed_tempfile, part_crc, source_tempfile.size]
|
38
44
|
end
|
39
45
|
end
|
40
46
|
|
41
|
-
# Threads return us a tuple with [compressed_tempfile, source_part_size,
|
47
|
+
# Threads return us a tuple with [compressed_tempfile, source_part_size,
|
48
|
+
# source_part_crc]
|
42
49
|
compressed_tempfiles_and_crc_of_parts = threads.map(&:join).map(&:value)
|
43
50
|
|
44
|
-
# Now we need to compute the CRC32 of the _entire_ file, and it has to be
|
45
|
-
# of the _source_ file (uncompressed), not of the compressed variant.
|
51
|
+
# Now we need to compute the CRC32 of the _entire_ file, and it has to be
|
52
|
+
# the CRC32 of the _source_ file (uncompressed), not of the compressed variant.
|
53
|
+
# Handily we know
|
46
54
|
entire_file_crc = ZipTricks::StreamCRC32.new
|
47
|
-
compressed_tempfiles_and_crc_of_parts.each do |
|
55
|
+
compressed_tempfiles_and_crc_of_parts.each do |_, source_part_crc, source_part_size|
|
48
56
|
entire_file_crc.append(source_part_crc, source_part_size)
|
49
57
|
end
|
50
58
|
|
@@ -56,7 +64,7 @@ ZipTricks::BlockDeflate.write_terminator(last_compressed_part)
|
|
56
64
|
# To figure that out we just sum the sizes of the files
|
57
65
|
compressed_part_files = compressed_tempfiles_and_crc_of_parts.map(&:first)
|
58
66
|
size_of_deflated_segment = compressed_part_files.map(&:size).inject(&:+)
|
59
|
-
size_of_uncompressed_file = compressed_tempfiles_and_crc_of_parts.map{|e| e[2]}.inject(&:+)
|
67
|
+
size_of_uncompressed_file = compressed_tempfiles_and_crc_of_parts.map { |e| e[2] }.inject(&:+)
|
60
68
|
|
61
69
|
# And now we can create a ZIP with our compressed file in it's entirety.
|
62
70
|
# We use a File as a destination here, but you can also use a socket or a
|
@@ -64,8 +72,11 @@ size_of_uncompressed_file = compressed_tempfiles_and_crc_of_parts.map{|e| e[2]}.
|
|
64
72
|
# made for streaming.
|
65
73
|
output = File.open('zip_created_in_parallel.zip', 'wb')
|
66
74
|
|
67
|
-
ZipTricks::Streamer.open(output) do |
|
68
|
-
zip.
|
75
|
+
ZipTricks::Streamer.open(output) do |zip|
|
76
|
+
zip.add_deflated_entry('parallel.bin',
|
77
|
+
size_of_uncompressed_file,
|
78
|
+
entire_file_crc.to_i,
|
79
|
+
size_of_deflated_segment)
|
69
80
|
compressed_part_files.each do |part_file|
|
70
81
|
part_file.rewind
|
71
82
|
while blob = part_file.read(2048)
|
@@ -1,26 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../lib/zip_tricks'
|
2
4
|
|
3
5
|
# An example of how you can create a Rack endpoint for your ZIP downloads.
|
4
6
|
# NEVER run this in production - it is a huge security risk.
|
5
7
|
# What this app will do is pick PATH_INFO (your request URL path)
|
6
8
|
# and grab a file located at this path on your filesystem. The file will then
|
7
|
-
# be added to a ZIP archive created completely programmatically. No data will
|
8
|
-
# on disk and the contents of the ZIP file will _not_ be buffered in
|
9
|
-
# before sending. Unless you use a buffering Rack server of
|
9
|
+
# be added to a ZIP archive created completely programmatically. No data will
|
10
|
+
# be cached on disk and the contents of the ZIP file will _not_ be buffered in
|
11
|
+
# it's entirety before sending. Unless you use a buffering Rack server of
|
12
|
+
# course (WEBrick or Thin).
|
10
13
|
class ZipDownload
|
11
14
|
def call(env)
|
12
15
|
file_path = env['PATH_INFO'] # Should be the absolute path on the filesystem
|
13
|
-
|
16
|
+
|
14
17
|
# Open the file for binary reading
|
15
18
|
f = File.open(file_path, 'rb')
|
16
19
|
filename = File.basename(file_path)
|
17
|
-
|
18
|
-
# Compute the CRC32 upfront. We do not use local footers for post-computing
|
19
|
-
# so you _do_ have to precompute it beforehand. Ideally, you
|
20
|
-
# storing the files you will be sending out later on.
|
20
|
+
|
21
|
+
# Compute the CRC32 upfront. We do not use local footers for post-computing
|
22
|
+
# the CRC32, so you _do_ have to precompute it beforehand. Ideally, you
|
23
|
+
# would do that before storing the files you will be sending out later on.
|
21
24
|
crc32 = ZipTricks::StreamCRC32.from_io(f)
|
22
25
|
f.rewind
|
23
|
-
|
26
|
+
|
24
27
|
# Compute the size of the download, so that a
|
25
28
|
# real Content-Length header can be sent. Also, if your download
|
26
29
|
# stops at some point, the downloading browser will be able to tell
|
@@ -30,7 +33,7 @@ class ZipDownload
|
|
30
33
|
size = ZipTricks::SizeEstimator.estimate do |ar|
|
31
34
|
ar.add_stored_entry(filename, f.size)
|
32
35
|
end
|
33
|
-
|
36
|
+
|
34
37
|
# Create a suitable Rack response body, that will support each(),
|
35
38
|
# close() and all the other methods. We can then return it up the stack.
|
36
39
|
zip_response_body = ZipTricks::RackBody.new do |zip|
|
@@ -38,20 +41,25 @@ class ZipDownload
|
|
38
41
|
# We are adding only one file to the ZIP here, but you could do that
|
39
42
|
# with an arbitrary number of files of course.
|
40
43
|
zip.add_stored_entry(filename: filename, size: f.size, crc32: crc32)
|
41
|
-
# Write the contents of the file. It is stored, so the writes go
|
42
|
-
# to the Rack output, bypassing any RubyZip
|
43
|
-
# are yielding the "blob" string
|
44
|
+
# Write the contents of the file. It is stored, so the writes go
|
45
|
+
# directly to the Rack output, bypassing any RubyZip
|
46
|
+
# deflaters/compressors. In fact you are yielding the "blob" string
|
47
|
+
# here directly to the Rack server handler.
|
44
48
|
IO.copy_stream(f, zip)
|
45
49
|
ensure
|
46
50
|
f.close # Make sure the opened file we read from gets closed
|
47
51
|
end
|
48
52
|
end
|
49
|
-
|
53
|
+
|
50
54
|
# Add a Content-Disposition so that the download has a .zip extension
|
51
55
|
# (this will not work well with UTF-8 filenames on Windows, but hey!)
|
52
|
-
content_disposition = 'attachment; filename=%s.zip'
|
53
|
-
|
56
|
+
content_disposition = format('attachment; filename=%s.zip', filename)
|
57
|
+
|
54
58
|
# and return the response, adding the Content-Length we have computed earlier
|
55
|
-
[
|
59
|
+
[
|
60
|
+
200,
|
61
|
+
{'Content-Length' => size.to_s, 'Content-Disposition' => content_disposition},
|
62
|
+
zip_response_body
|
63
|
+
]
|
56
64
|
end
|
57
65
|
end
|
data/lib/zip_tricks.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Require all the sub-components except myself
|
1
4
|
module ZipTricks
|
2
|
-
|
3
|
-
Dir.glob(__dir__ + '/**/*.rb').sort.each {|p| require p unless p == __FILE__ }
|
5
|
+
Dir.glob(__dir__ + '/**/*.rb').sort.each { |p| require p unless p == __FILE__ }
|
4
6
|
end
|
@@ -1,45 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'zlib'
|
2
4
|
|
3
5
|
# Permits Deflate compression in independent blocks. The workflow is as follows:
|
4
6
|
#
|
5
|
-
# * Run every block to compress through deflate_chunk, remove the header,
|
6
|
-
#
|
7
|
+
# * Run every block to compress through deflate_chunk, remove the header,
|
8
|
+
# footer and adler32 from the result
|
9
|
+
# * Write out the compressed block bodies (the ones deflate_chunk returns)
|
10
|
+
# to your output, in sequence
|
7
11
|
# * Write out the footer (\03\00)
|
8
12
|
#
|
9
|
-
# The resulting stream is guaranteed to be handled properly by all zip
|
10
|
-
# BOMArchiveHelper/ArchiveUtility on OSX.
|
13
|
+
# The resulting stream is guaranteed to be handled properly by all zip
|
14
|
+
# unarchiving tools, including the BOMArchiveHelper/ArchiveUtility on OSX.
|
11
15
|
#
|
12
16
|
# You could also build a compressor for Rubyzip using this module quite easily,
|
13
17
|
# even though this is outside the scope of the library.
|
14
18
|
#
|
15
|
-
# When you deflate the chunks separately, you need to write the end marker
|
16
|
-
#
|
17
|
-
#
|
19
|
+
# When you deflate the chunks separately, you need to write the end marker
|
20
|
+
# yourself (using `write_terminator`).
|
21
|
+
# If you just want to deflate a large IO's contents, use
|
22
|
+
# `deflate_in_blocks_and_terminate` to have the end marker written out for you.
|
18
23
|
#
|
19
24
|
# Basic usage to compress a file in parts:
|
20
|
-
#
|
25
|
+
#
|
21
26
|
# source_file = File.open('12_gigs.bin', 'rb')
|
22
27
|
# compressed = Tempfile.new
|
23
|
-
# # Will not compress everything in memory, but do it per chunk to spare
|
28
|
+
# # Will not compress everything in memory, but do it per chunk to spare
|
29
|
+
# memory. `compressed`
|
24
30
|
# # will be written to at the end of each chunk.
|
25
|
-
# ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file,
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
31
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file,
|
32
|
+
# compressed)
|
33
|
+
#
|
34
|
+
# You can also do the same to parts that you will later concatenate together
|
35
|
+
# elsewhere, in that case you need to skip the end marker:
|
36
|
+
#
|
30
37
|
# compressed = Tempfile.new
|
31
|
-
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb),
|
32
|
-
#
|
33
|
-
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('
|
38
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb),
|
39
|
+
# compressed)
|
40
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb),
|
41
|
+
# compressed)
|
42
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb),
|
43
|
+
# compressed)
|
34
44
|
# ZipTricks::BlockDeflate.write_terminator(compressed)
|
35
|
-
#
|
45
|
+
#
|
36
46
|
# You can also elect to just compress strings in memory (to splice them later):
|
37
|
-
#
|
47
|
+
#
|
38
48
|
# compressed_string = ZipTricks::BlockDeflate.deflate_chunk(big_string)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
49
|
+
|
50
|
+
class ZipTricks::BlockDeflate
|
51
|
+
DEFAULT_BLOCKSIZE = 1_024 * 1024 * 5
|
52
|
+
END_MARKER = [3, 0].pack('C*')
|
53
|
+
# Zlib::NO_COMPRESSION..
|
54
|
+
VALID_COMPRESSIONS = (Zlib::DEFAULT_COMPRESSION..Zlib::BEST_COMPRESSION).to_a.freeze
|
43
55
|
# Write the end marker (\x3\x0) to the given IO.
|
44
56
|
#
|
45
57
|
# `output_io` can also be a {ZipTricks::Streamer} to expedite ops.
|
@@ -83,7 +95,10 @@ module ZipTricks::BlockDeflate
|
|
83
95
|
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
84
96
|
# @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
85
97
|
# @return [Fixnum] number of bytes written to `output_io`
|
86
|
-
def self.deflate_in_blocks_and_terminate(input_io,
|
98
|
+
def self.deflate_in_blocks_and_terminate(input_io,
|
99
|
+
output_io,
|
100
|
+
level: Zlib::DEFAULT_COMPRESSION,
|
101
|
+
block_size: DEFAULT_BLOCKSIZE)
|
87
102
|
bytes_written = deflate_in_blocks(input_io, output_io, level: level, block_size: block_size)
|
88
103
|
bytes_written + write_terminator(output_io)
|
89
104
|
end
|
@@ -100,7 +115,10 @@ module ZipTricks::BlockDeflate
|
|
100
115
|
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
101
116
|
# @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
102
117
|
# @return [Fixnum] number of bytes written to `output_io`
|
103
|
-
def self.deflate_in_blocks(input_io,
|
118
|
+
def self.deflate_in_blocks(input_io,
|
119
|
+
output_io,
|
120
|
+
level: Zlib::DEFAULT_COMPRESSION,
|
121
|
+
block_size: DEFAULT_BLOCKSIZE)
|
104
122
|
bytes_written = 0
|
105
123
|
while block = input_io.read(block_size)
|
106
124
|
deflated = deflate_chunk(block, level: level)
|