zip_tricks 4.4.2 → 4.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.codeclimate.yml +7 -0
- data/.gitignore +6 -0
- data/.rubocop.yml +79 -0
- data/.rubocop_todo.yml +43 -0
- data/.travis.yml +3 -1
- data/CHANGELOG.md +9 -0
- data/Rakefile +7 -4
- data/examples/archive_size_estimate.rb +8 -6
- data/examples/config.ru +3 -1
- data/examples/parallel_compression_with_block_deflate.rb +31 -20
- data/examples/rack_application.rb +25 -17
- data/lib/zip_tricks.rb +4 -2
- data/lib/zip_tricks/block_deflate.rb +43 -25
- data/lib/zip_tricks/block_write.rb +20 -10
- data/lib/zip_tricks/file_reader.rb +241 -145
- data/lib/zip_tricks/file_reader/inflating_reader.rb +4 -1
- data/lib/zip_tricks/file_reader/stored_reader.rb +4 -1
- data/lib/zip_tricks/null_writer.rb +5 -5
- data/lib/zip_tricks/rack_body.rb +7 -4
- data/lib/zip_tricks/rails_streaming.rb +3 -1
- data/lib/zip_tricks/remote_io.rb +9 -5
- data/lib/zip_tricks/remote_uncap.rb +10 -5
- data/lib/zip_tricks/size_estimator.rb +39 -27
- data/lib/zip_tricks/stream_crc32.rb +2 -0
- data/lib/zip_tricks/streamer.rb +254 -98
- data/lib/zip_tricks/streamer/deflated_writer.rb +6 -9
- data/lib/zip_tricks/streamer/entry.rb +11 -3
- data/lib/zip_tricks/streamer/stored_writer.rb +5 -7
- data/lib/zip_tricks/streamer/writable.rb +30 -7
- data/lib/zip_tricks/version.rb +3 -1
- data/lib/zip_tricks/write_and_tell.rb +2 -0
- data/lib/zip_tricks/zip_writer.rb +54 -44
- data/testing/generate_test_files.rb +68 -38
- data/testing/support.rb +21 -16
- data/testing/test-report.txt +28 -0
- data/zip_tricks.gemspec +24 -22
- metadata +23 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dd1865613940a7038642206baf6cd58457b87ff5
|
4
|
+
data.tar.gz: 3ebda3c665c9671d9f033d6f768ccdc800d7f2f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c16f15ab10788d90fe96e61e8ecf20a20eee6a7e94d2d22538c34964d51eb9ee4cc64459dd46997f2b994212944e34e8162d7880ff4d79343c86a5d1ed09fd77
|
7
|
+
data.tar.gz: da1d6e24031365ea67fc2fa0fbf3096f57b007c6ac35449b57b1783eecfa703d530c1c695429335bee252a841da6722f3bca341bd6fc8c5e9e24bf6379edee76
|
data/.codeclimate.yml
ADDED
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
Metrics/LineLength:
|
4
|
+
Max: 120
|
5
|
+
|
6
|
+
AllCops:
|
7
|
+
Exclude:
|
8
|
+
- spec/**/*.rb
|
9
|
+
- zip_tricks.gemspec
|
10
|
+
- Gemfile
|
11
|
+
- Rakefile
|
12
|
+
- vendor/**/*
|
13
|
+
- julik_scratchpad/**/*
|
14
|
+
|
15
|
+
Style/ClassAndModuleChildren:
|
16
|
+
Enabled:
|
17
|
+
false
|
18
|
+
|
19
|
+
# "begin; ... rescue; end " without saying that we only rescue StandardError and below.
|
20
|
+
# Can be debated, but I think it is reasonable to expect any Rubyist to know what errors
|
21
|
+
# the standard rescue clause covers.
|
22
|
+
Lint/RescueWithoutErrorClass:
|
23
|
+
Enabled: false
|
24
|
+
|
25
|
+
Lint/AssignmentInCondition:
|
26
|
+
Enabled: false
|
27
|
+
|
28
|
+
# We use % all over and nothing ever came out of it
|
29
|
+
Style/FormatString:
|
30
|
+
Enabled: false
|
31
|
+
|
32
|
+
# We have classes with methods having long bodies. We apply a lot of literal small
|
33
|
+
# operations which have to be applied in one locality specifically. Additionally,
|
34
|
+
# class length is a bit of a boilerplate metric
|
35
|
+
Metrics/ClassLength:
|
36
|
+
Enabled: false
|
37
|
+
|
38
|
+
# Idem. We have quite some methods that do a lot of small writes/reads in rapid
|
39
|
+
# succession - not because those methods have high cyclomatic complexity, but
|
40
|
+
# because they perform a single operation consisting of many sequential small
|
41
|
+
# ones.
|
42
|
+
Metrics/MethodLength:
|
43
|
+
Enabled: false
|
44
|
+
|
45
|
+
Metrics/AbcSize:
|
46
|
+
Exclude:
|
47
|
+
- examples/rack_application.rb
|
48
|
+
- lib/zip_tricks/file_reader.rb
|
49
|
+
- lib/zip_tricks/remote_io.rb
|
50
|
+
- lib/zip_tricks/streamer.rb
|
51
|
+
- testing/support.rb
|
52
|
+
|
53
|
+
Metrics/BlockLength:
|
54
|
+
Exclude:
|
55
|
+
- lib/zip_tricks/file_reader.rb
|
56
|
+
|
57
|
+
Layout/MultilineOperationIndentation:
|
58
|
+
Exclude:
|
59
|
+
- lib/zip_tricks/file_reader.rb
|
60
|
+
|
61
|
+
Metrics/ParameterLists:
|
62
|
+
Exclude:
|
63
|
+
- lib/zip_tricks/streamer.rb
|
64
|
+
|
65
|
+
Style/GlobalVars:
|
66
|
+
Exclude:
|
67
|
+
- testing/generate_test_files.rb
|
68
|
+
- testing/support.rb
|
69
|
+
|
70
|
+
# The advice this cop gives varies depending on the Ruby version,
|
71
|
+
# so no - I will not be following two conflicting recommendations.
|
72
|
+
Style/MutableConstant:
|
73
|
+
Enabled: false
|
74
|
+
|
75
|
+
Layout/SpaceInsideHashLiteralBraces:
|
76
|
+
EnforcedStyle: no_space
|
77
|
+
|
78
|
+
Style/Alias:
|
79
|
+
EnforcedStyle: prefer_alias_method
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2017-09-11 13:38:55 +0200 using RuboCop version 0.49.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 6
|
10
|
+
Lint/UselessAssignment:
|
11
|
+
Exclude:
|
12
|
+
- 'lib/zip_tricks/streamer.rb'
|
13
|
+
- 'spec/zip_tricks/block_deflate_spec.rb'
|
14
|
+
- 'spec/zip_tricks/file_reader_spec.rb'
|
15
|
+
- 'spec/zip_tricks/remote_uncap_spec.rb'
|
16
|
+
- 'spec/zip_tricks/stream_crc32_spec.rb'
|
17
|
+
- 'spec/zip_tricks/streamer_spec.rb'
|
18
|
+
|
19
|
+
# Offense count: 1
|
20
|
+
Metrics/PerceivedComplexity:
|
21
|
+
Max: 8
|
22
|
+
|
23
|
+
# Offense count: 4
|
24
|
+
Style/Documentation:
|
25
|
+
Exclude:
|
26
|
+
- 'spec/**/*'
|
27
|
+
- 'test/**/*'
|
28
|
+
- 'lib/zip_tricks/block_deflate.rb'
|
29
|
+
- 'lib/zip_tricks/block_write.rb'
|
30
|
+
- 'lib/zip_tricks/file_reader.rb'
|
31
|
+
- 'lib/zip_tricks/streamer/deflated_writer.rb'
|
32
|
+
|
33
|
+
# Offense count: 1
|
34
|
+
# Configuration parameters: MinBodyLength.
|
35
|
+
Style/GuardClause:
|
36
|
+
Exclude:
|
37
|
+
- 'lib/zip_tricks/file_reader.rb'
|
38
|
+
|
39
|
+
# Offense count: 2
|
40
|
+
Style/StructInheritance:
|
41
|
+
Exclude:
|
42
|
+
- 'lib/zip_tricks/streamer/entry.rb'
|
43
|
+
- 'spec/zip_tricks/zip_writer_spec.rb'
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 4.5.0
|
2
|
+
|
3
|
+
* Rename `Streamer#add_compressed_entry` and `SizeEstimator#add_compressed_entry` to `add_deflated_entry`
|
4
|
+
to indicate the type of compression that is going to get used.
|
5
|
+
* Make `Streamer#write_(deflated|stored)_file` return a writable object that can be `.close`d, to
|
6
|
+
permit usage of those methods in situations where suspending a block is inconvenient (make deferred writing possible).
|
7
|
+
* Fix CRC32 checksums in `Streamer#write_deflated_file`
|
8
|
+
* Add `Streamer#update_last_entry_and_write_data_descriptor` to permit externally-driven flows that use data descriptors
|
9
|
+
|
1
10
|
## 4.4.2
|
2
11
|
|
3
12
|
* Add 2.4 to Travis rubies
|
data/Rakefile
CHANGED
@@ -1,12 +1,15 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
3
|
require 'yard'
|
4
|
+
require 'rubocop/rake_task'
|
4
5
|
|
5
6
|
YARD::Rake::YardocTask.new(:doc) do |t|
|
6
|
-
# The dash has to be between the two to "divide" the source files and
|
7
|
+
# The dash has to be between the two to "divide" the source files and
|
7
8
|
# miscellaneous documentation files that contain no code
|
8
9
|
t.files = ['lib/**/*.rb', '-', 'LICENSE.txt', 'IMPLEMENTATION_DETAILS.md']
|
9
10
|
end
|
10
11
|
|
11
12
|
RSpec::Core::RakeTask.new(:spec)
|
12
|
-
task :
|
13
|
+
task default: :spec
|
14
|
+
|
15
|
+
RuboCop::RakeTask.new
|
@@ -1,13 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../lib/zip_tricks'
|
2
4
|
|
3
|
-
# Predict how large a ZIP file is going to be without having access to
|
4
|
-
# file contents, but using just the filenames (influences the
|
5
|
-
# of the files
|
5
|
+
# Predict how large a ZIP file is going to be without having access to
|
6
|
+
# the actual file contents, but using just the filenames (influences the
|
7
|
+
# file size) and the size of the files
|
6
8
|
zip_archive_size_in_bytes = ZipTricks::SizeEstimator.estimate do |zip|
|
7
9
|
# Pretend we are going to make a ZIP file which contains a few
|
8
10
|
# MP4 files (those do not compress all too well)
|
9
|
-
zip.add_stored_entry(filename:
|
10
|
-
zip.add_stored_entry(filename:
|
11
|
+
zip.add_stored_entry(filename: 'MOV_1234.MP4', size: 898_090)
|
12
|
+
zip.add_stored_entry(filename: 'MOV_1235.MP4', size: 7_855_126)
|
11
13
|
end
|
12
14
|
|
13
|
-
puts zip_archive_size_in_bytes #=>
|
15
|
+
puts zip_archive_size_in_bytes #=> 8_753_467
|
data/examples/config.ru
CHANGED
@@ -1,50 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../lib/zip_tricks'
|
2
4
|
require 'tempfile'
|
3
5
|
|
4
6
|
# This shows how to perform compression in parallel (a-la pigz, but in a less
|
5
|
-
# advanced fashion since the compression tables are not shared - to
|
7
|
+
# advanced fashion since the compression tables are not shared - to
|
8
|
+
# minimize shared state).
|
6
9
|
#
|
7
|
-
# When using this approach, compressing a large file can be performed as a
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# and
|
11
|
-
#
|
10
|
+
# When using this approach, compressing a large file can be performed as a
|
11
|
+
# map-reduce operation.
|
12
|
+
# First you prepare all the data per part of your (potentially very large) file,
|
13
|
+
# and then you use the reduce task to combine that data into one linear zip.
|
14
|
+
# In this example we will generate threads and collect their return values in
|
15
|
+
# the order the threads were launched, which guarantees a consistent reduce.
|
12
16
|
#
|
13
17
|
# So, let each thread generate a part of the file, and also
|
14
18
|
# compute the CRC32 of it. The thread will compress it's own part
|
15
|
-
# as well, in an independent deflate segment - the threads do not share
|
16
|
-
# multiplex this over multiple processes or
|
19
|
+
# as well, in an independent deflate segment - the threads do not share
|
20
|
+
# anything. You could also multiplex this over multiple processes or
|
21
|
+
# even machines.
|
17
22
|
threads = (0..12).map do
|
18
23
|
Thread.new do
|
19
24
|
source_tempfile = Tempfile.new 't'
|
20
25
|
source_tempfile.binmode
|
21
|
-
|
26
|
+
|
22
27
|
# Fill the part with random content
|
23
28
|
12.times { source_tempfile << Random.new.bytes(1 * 1024 * 1024) }
|
24
29
|
source_tempfile.rewind
|
25
|
-
|
30
|
+
|
26
31
|
# Compute the CRC32 of the source file
|
27
32
|
part_crc = ZipTricks::StreamCRC32.from_io(source_tempfile)
|
28
33
|
source_tempfile.rewind
|
29
|
-
|
34
|
+
|
30
35
|
# Create a compressed part
|
31
36
|
compressed_tempfile = Tempfile.new('tc')
|
32
37
|
compressed_tempfile.binmode
|
33
|
-
ZipTricks::BlockDeflate.deflate_in_blocks(source_tempfile,
|
34
|
-
|
38
|
+
ZipTricks::BlockDeflate.deflate_in_blocks(source_tempfile,
|
39
|
+
compressed_tempfile)
|
40
|
+
|
35
41
|
source_tempfile.close!
|
36
42
|
# The data that the splicing process needs.
|
37
43
|
[compressed_tempfile, part_crc, source_tempfile.size]
|
38
44
|
end
|
39
45
|
end
|
40
46
|
|
41
|
-
# Threads return us a tuple with [compressed_tempfile, source_part_size,
|
47
|
+
# Threads return us a tuple with [compressed_tempfile, source_part_size,
|
48
|
+
# source_part_crc]
|
42
49
|
compressed_tempfiles_and_crc_of_parts = threads.map(&:join).map(&:value)
|
43
50
|
|
44
|
-
# Now we need to compute the CRC32 of the _entire_ file, and it has to be
|
45
|
-
# of the _source_ file (uncompressed), not of the compressed variant.
|
51
|
+
# Now we need to compute the CRC32 of the _entire_ file, and it has to be
|
52
|
+
# the CRC32 of the _source_ file (uncompressed), not of the compressed variant.
|
53
|
+
# Handily we know
|
46
54
|
entire_file_crc = ZipTricks::StreamCRC32.new
|
47
|
-
compressed_tempfiles_and_crc_of_parts.each do |
|
55
|
+
compressed_tempfiles_and_crc_of_parts.each do |_, source_part_crc, source_part_size|
|
48
56
|
entire_file_crc.append(source_part_crc, source_part_size)
|
49
57
|
end
|
50
58
|
|
@@ -56,7 +64,7 @@ ZipTricks::BlockDeflate.write_terminator(last_compressed_part)
|
|
56
64
|
# To figure that out we just sum the sizes of the files
|
57
65
|
compressed_part_files = compressed_tempfiles_and_crc_of_parts.map(&:first)
|
58
66
|
size_of_deflated_segment = compressed_part_files.map(&:size).inject(&:+)
|
59
|
-
size_of_uncompressed_file = compressed_tempfiles_and_crc_of_parts.map{|e| e[2]}.inject(&:+)
|
67
|
+
size_of_uncompressed_file = compressed_tempfiles_and_crc_of_parts.map { |e| e[2] }.inject(&:+)
|
60
68
|
|
61
69
|
# And now we can create a ZIP with our compressed file in it's entirety.
|
62
70
|
# We use a File as a destination here, but you can also use a socket or a
|
@@ -64,8 +72,11 @@ size_of_uncompressed_file = compressed_tempfiles_and_crc_of_parts.map{|e| e[2]}.
|
|
64
72
|
# made for streaming.
|
65
73
|
output = File.open('zip_created_in_parallel.zip', 'wb')
|
66
74
|
|
67
|
-
ZipTricks::Streamer.open(output) do |
|
68
|
-
zip.
|
75
|
+
ZipTricks::Streamer.open(output) do |zip|
|
76
|
+
zip.add_deflated_entry('parallel.bin',
|
77
|
+
size_of_uncompressed_file,
|
78
|
+
entire_file_crc.to_i,
|
79
|
+
size_of_deflated_segment)
|
69
80
|
compressed_part_files.each do |part_file|
|
70
81
|
part_file.rewind
|
71
82
|
while blob = part_file.read(2048)
|
@@ -1,26 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative '../lib/zip_tricks'
|
2
4
|
|
3
5
|
# An example of how you can create a Rack endpoint for your ZIP downloads.
|
4
6
|
# NEVER run this in production - it is a huge security risk.
|
5
7
|
# What this app will do is pick PATH_INFO (your request URL path)
|
6
8
|
# and grab a file located at this path on your filesystem. The file will then
|
7
|
-
# be added to a ZIP archive created completely programmatically. No data will
|
8
|
-
# on disk and the contents of the ZIP file will _not_ be buffered in
|
9
|
-
# before sending. Unless you use a buffering Rack server of
|
9
|
+
# be added to a ZIP archive created completely programmatically. No data will
|
10
|
+
# be cached on disk and the contents of the ZIP file will _not_ be buffered in
|
11
|
+
# it's entirety before sending. Unless you use a buffering Rack server of
|
12
|
+
# course (WEBrick or Thin).
|
10
13
|
class ZipDownload
|
11
14
|
def call(env)
|
12
15
|
file_path = env['PATH_INFO'] # Should be the absolute path on the filesystem
|
13
|
-
|
16
|
+
|
14
17
|
# Open the file for binary reading
|
15
18
|
f = File.open(file_path, 'rb')
|
16
19
|
filename = File.basename(file_path)
|
17
|
-
|
18
|
-
# Compute the CRC32 upfront. We do not use local footers for post-computing
|
19
|
-
# so you _do_ have to precompute it beforehand. Ideally, you
|
20
|
-
# storing the files you will be sending out later on.
|
20
|
+
|
21
|
+
# Compute the CRC32 upfront. We do not use local footers for post-computing
|
22
|
+
# the CRC32, so you _do_ have to precompute it beforehand. Ideally, you
|
23
|
+
# would do that before storing the files you will be sending out later on.
|
21
24
|
crc32 = ZipTricks::StreamCRC32.from_io(f)
|
22
25
|
f.rewind
|
23
|
-
|
26
|
+
|
24
27
|
# Compute the size of the download, so that a
|
25
28
|
# real Content-Length header can be sent. Also, if your download
|
26
29
|
# stops at some point, the downloading browser will be able to tell
|
@@ -30,7 +33,7 @@ class ZipDownload
|
|
30
33
|
size = ZipTricks::SizeEstimator.estimate do |ar|
|
31
34
|
ar.add_stored_entry(filename, f.size)
|
32
35
|
end
|
33
|
-
|
36
|
+
|
34
37
|
# Create a suitable Rack response body, that will support each(),
|
35
38
|
# close() and all the other methods. We can then return it up the stack.
|
36
39
|
zip_response_body = ZipTricks::RackBody.new do |zip|
|
@@ -38,20 +41,25 @@ class ZipDownload
|
|
38
41
|
# We are adding only one file to the ZIP here, but you could do that
|
39
42
|
# with an arbitrary number of files of course.
|
40
43
|
zip.add_stored_entry(filename: filename, size: f.size, crc32: crc32)
|
41
|
-
# Write the contents of the file. It is stored, so the writes go
|
42
|
-
# to the Rack output, bypassing any RubyZip
|
43
|
-
# are yielding the "blob" string
|
44
|
+
# Write the contents of the file. It is stored, so the writes go
|
45
|
+
# directly to the Rack output, bypassing any RubyZip
|
46
|
+
# deflaters/compressors. In fact you are yielding the "blob" string
|
47
|
+
# here directly to the Rack server handler.
|
44
48
|
IO.copy_stream(f, zip)
|
45
49
|
ensure
|
46
50
|
f.close # Make sure the opened file we read from gets closed
|
47
51
|
end
|
48
52
|
end
|
49
|
-
|
53
|
+
|
50
54
|
# Add a Content-Disposition so that the download has a .zip extension
|
51
55
|
# (this will not work well with UTF-8 filenames on Windows, but hey!)
|
52
|
-
content_disposition = 'attachment; filename=%s.zip'
|
53
|
-
|
56
|
+
content_disposition = format('attachment; filename=%s.zip', filename)
|
57
|
+
|
54
58
|
# and return the response, adding the Content-Length we have computed earlier
|
55
|
-
[
|
59
|
+
[
|
60
|
+
200,
|
61
|
+
{'Content-Length' => size.to_s, 'Content-Disposition' => content_disposition},
|
62
|
+
zip_response_body
|
63
|
+
]
|
56
64
|
end
|
57
65
|
end
|
data/lib/zip_tricks.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Require all the sub-components except myself
|
1
4
|
module ZipTricks
|
2
|
-
|
3
|
-
Dir.glob(__dir__ + '/**/*.rb').sort.each {|p| require p unless p == __FILE__ }
|
5
|
+
Dir.glob(__dir__ + '/**/*.rb').sort.each { |p| require p unless p == __FILE__ }
|
4
6
|
end
|
@@ -1,45 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'zlib'
|
2
4
|
|
3
5
|
# Permits Deflate compression in independent blocks. The workflow is as follows:
|
4
6
|
#
|
5
|
-
# * Run every block to compress through deflate_chunk, remove the header,
|
6
|
-
#
|
7
|
+
# * Run every block to compress through deflate_chunk, remove the header,
|
8
|
+
# footer and adler32 from the result
|
9
|
+
# * Write out the compressed block bodies (the ones deflate_chunk returns)
|
10
|
+
# to your output, in sequence
|
7
11
|
# * Write out the footer (\03\00)
|
8
12
|
#
|
9
|
-
# The resulting stream is guaranteed to be handled properly by all zip
|
10
|
-
# BOMArchiveHelper/ArchiveUtility on OSX.
|
13
|
+
# The resulting stream is guaranteed to be handled properly by all zip
|
14
|
+
# unarchiving tools, including the BOMArchiveHelper/ArchiveUtility on OSX.
|
11
15
|
#
|
12
16
|
# You could also build a compressor for Rubyzip using this module quite easily,
|
13
17
|
# even though this is outside the scope of the library.
|
14
18
|
#
|
15
|
-
# When you deflate the chunks separately, you need to write the end marker
|
16
|
-
#
|
17
|
-
#
|
19
|
+
# When you deflate the chunks separately, you need to write the end marker
|
20
|
+
# yourself (using `write_terminator`).
|
21
|
+
# If you just want to deflate a large IO's contents, use
|
22
|
+
# `deflate_in_blocks_and_terminate` to have the end marker written out for you.
|
18
23
|
#
|
19
24
|
# Basic usage to compress a file in parts:
|
20
|
-
#
|
25
|
+
#
|
21
26
|
# source_file = File.open('12_gigs.bin', 'rb')
|
22
27
|
# compressed = Tempfile.new
|
23
|
-
# # Will not compress everything in memory, but do it per chunk to spare
|
28
|
+
# # Will not compress everything in memory, but do it per chunk to spare
|
29
|
+
# memory. `compressed`
|
24
30
|
# # will be written to at the end of each chunk.
|
25
|
-
# ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file,
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
31
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks_and_terminate(source_file,
|
32
|
+
# compressed)
|
33
|
+
#
|
34
|
+
# You can also do the same to parts that you will later concatenate together
|
35
|
+
# elsewhere, in that case you need to skip the end marker:
|
36
|
+
#
|
30
37
|
# compressed = Tempfile.new
|
31
|
-
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb),
|
32
|
-
#
|
33
|
-
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('
|
38
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part1.bin', 'rb),
|
39
|
+
# compressed)
|
40
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('part2.bin', 'rb),
|
41
|
+
# compressed)
|
42
|
+
# ZipTricks::BlockDeflate.deflate_in_blocks(File.open('partN.bin', 'rb),
|
43
|
+
# compressed)
|
34
44
|
# ZipTricks::BlockDeflate.write_terminator(compressed)
|
35
|
-
#
|
45
|
+
#
|
36
46
|
# You can also elect to just compress strings in memory (to splice them later):
|
37
|
-
#
|
47
|
+
#
|
38
48
|
# compressed_string = ZipTricks::BlockDeflate.deflate_chunk(big_string)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
49
|
+
|
50
|
+
class ZipTricks::BlockDeflate
|
51
|
+
DEFAULT_BLOCKSIZE = 1_024 * 1024 * 5
|
52
|
+
END_MARKER = [3, 0].pack('C*')
|
53
|
+
# Zlib::NO_COMPRESSION..
|
54
|
+
VALID_COMPRESSIONS = (Zlib::DEFAULT_COMPRESSION..Zlib::BEST_COMPRESSION).to_a.freeze
|
43
55
|
# Write the end marker (\x3\x0) to the given IO.
|
44
56
|
#
|
45
57
|
# `output_io` can also be a {ZipTricks::Streamer} to expedite ops.
|
@@ -83,7 +95,10 @@ module ZipTricks::BlockDeflate
|
|
83
95
|
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
84
96
|
# @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
85
97
|
# @return [Fixnum] number of bytes written to `output_io`
|
86
|
-
def self.deflate_in_blocks_and_terminate(input_io,
|
98
|
+
def self.deflate_in_blocks_and_terminate(input_io,
|
99
|
+
output_io,
|
100
|
+
level: Zlib::DEFAULT_COMPRESSION,
|
101
|
+
block_size: DEFAULT_BLOCKSIZE)
|
87
102
|
bytes_written = deflate_in_blocks(input_io, output_io, level: level, block_size: block_size)
|
88
103
|
bytes_written + write_terminator(output_io)
|
89
104
|
end
|
@@ -100,7 +115,10 @@ module ZipTricks::BlockDeflate
|
|
100
115
|
# @param level [Fixnum] Zlib compression level (defaults to `Zlib::DEFAULT_COMPRESSION`)
|
101
116
|
# @param block_size [Fixnum] The block size to use (defaults to `DEFAULT_BLOCKSIZE`)
|
102
117
|
# @return [Fixnum] number of bytes written to `output_io`
|
103
|
-
def self.deflate_in_blocks(input_io,
|
118
|
+
def self.deflate_in_blocks(input_io,
|
119
|
+
output_io,
|
120
|
+
level: Zlib::DEFAULT_COMPRESSION,
|
121
|
+
block_size: DEFAULT_BLOCKSIZE)
|
104
122
|
bytes_written = 0
|
105
123
|
while block = input_io.read(block_size)
|
106
124
|
deflated = deflate_chunk(block, level: level)
|