http_zip 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.travis.yml +7 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +57 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/http_zip.gemspec +36 -0
- data/lib/http_zip/entry.rb +89 -0
- data/lib/http_zip/errors.rb +7 -0
- data/lib/http_zip/file.rb +81 -0
- data/lib/http_zip/parser/central_directory.rb +92 -0
- data/lib/http_zip/parser/central_directory_file_header.rb +115 -0
- data/lib/http_zip/range_request.rb +70 -0
- data/lib/http_zip/version.rb +5 -0
- data/lib/http_zip.rb +9 -0
- metadata +148 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c5c21ac606a405c8527ce48a94f760ecd195104a4ac84a25c84bbb095a748c21
|
4
|
+
data.tar.gz: 3b9c5d83fcbbbf09b858411fe10f56fc582738914bfbd5b54626571705eff72c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6770e900fbf8a657716f0426e394646ba1bcd796cce64317c9d34996bd792847d1cd147ac0c7a49a64641787519a15c4d2abcc8933f01379e75f9607359049c2
|
7
|
+
data.tar.gz: 1d85ea79fd3e03302e242c4fce86a4fdf45ead85e663f3a5b4b91cf5cb735081a5d50aecd2e7931de6ef7b1064dcc6d174c2b25222689cee24ba7d26cbe8c2e9
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2022 Marvin Killing, Peter Retzlaff
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# HttpZip
|
2
|
+
|
3
|
+
HttpZip is a Ruby gem to extract individual files from a remote ZIP archive, without the need to download the entire file.
|
4
|
+
|
5
|
+
If your Zip file is hosted on a server that supports Content-Range requests and you only want to extract individual files, you don't need to download
|
6
|
+
the entire archive to do that. HttpZip uses Content-Range requests to first read only the Central Directory of your archive and builds a list of entries
|
7
|
+
from that. You can then download and extract individual entries without downloading the entire archive.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'http_zip'
|
15
|
+
```
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle install
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
$ gem install http_zip
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
# Create a new HttpZip::File referencing your remote archive.
|
29
|
+
# This only makes a HEAD request to check the server for
|
30
|
+
# Range request support.
|
31
|
+
zip = HttpZip::File.new("https://www.example.org/archive.zip")
|
32
|
+
|
33
|
+
# Get a reference to a specific file.
|
34
|
+
# This only requests the archive's Central Directory Entry.
|
35
|
+
entry = zip.entries.find { |e| e.name == 'compressed.txt' }
|
36
|
+
|
37
|
+
# Read the extracted file contents into memory.
|
38
|
+
# This downloads the entry's compressed contents and uncompresses
|
39
|
+
# them locally.
|
40
|
+
content = entry.read
|
41
|
+
# You can also write the extracted entry directly to a local file.
|
42
|
+
entry.write_to_file('/path/extracted.txt')
|
43
|
+
```
|
44
|
+
|
45
|
+
If the server that the zip file is hosted on doesn't support Range requests, HttpZip will throw `HttpZip::ContentRangeError`. If you want you can check this beforehand by calling:
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
HttpZip::RangeRequest.server_supports_content_range?(url)
|
49
|
+
```
|
50
|
+
|
51
|
+
## Contributing
|
52
|
+
|
53
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/peret/http_zip.
|
54
|
+
|
55
|
+
## License
|
56
|
+
|
57
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'http_zip'
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require 'irb'
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/http_zip.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'http_zip/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'http_zip'
|
9
|
+
spec.version = HttpZip::VERSION
|
10
|
+
spec.authors = ['Marvin Killing', 'Peter Retzlaff']
|
11
|
+
spec.email = ['pe.retzlaff@gmail.com']
|
12
|
+
|
13
|
+
spec.summary = 'HttpZip is a gem to extract individual files from a remote ZIP archive, without the need to download the entire file.'
|
14
|
+
spec.homepage = 'https://github.com/peret/http_zip'
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
18
|
+
spec.metadata['source_code_uri'] = 'https://github.com/peret/http_zip'
|
19
|
+
|
20
|
+
# Specify which files should be added to the gem when it is released.
|
21
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
22
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
23
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
24
|
+
end
|
25
|
+
spec.bindir = 'exe'
|
26
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
+
spec.require_paths = ['lib']
|
28
|
+
|
29
|
+
spec.add_runtime_dependency 'httparty', '~> 0.20'
|
30
|
+
|
31
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
32
|
+
spec.add_development_dependency 'minitest', '~> 5.15'
|
33
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
34
|
+
spec.add_development_dependency 'simplecov', '~> 0.21'
|
35
|
+
spec.add_development_dependency 'webmock', '~> 3.14'
|
36
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HttpZip
|
4
|
+
# Describes one entry in an HTTP zip archive
|
5
|
+
# @attr_reader [String] name filename of the entry
|
6
|
+
class Entry
|
7
|
+
attr_reader :name
|
8
|
+
|
9
|
+
def initialize(url, name, header_offset, central_directory_file_compressed_size)
|
10
|
+
@range_request = HttpZip::RangeRequest.new(url)
|
11
|
+
@name = name
|
12
|
+
@header_offset = header_offset
|
13
|
+
@compressed_size = central_directory_file_compressed_size
|
14
|
+
end
|
15
|
+
|
16
|
+
# Get the decompressed content of the file entry
|
17
|
+
# Makes 2 HTTP requests (GET, GET)
|
18
|
+
def read
|
19
|
+
# decompress the file
|
20
|
+
from = @header_offset + header_size
|
21
|
+
to = @header_offset + header_size + @compressed_size
|
22
|
+
|
23
|
+
decompress, _finish = decompress_funcs
|
24
|
+
|
25
|
+
compressed_contents = @range_request.get(from, to)
|
26
|
+
decompress.call(compressed_contents)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Get the decompressed content of the file entry
|
30
|
+
# Makes 2 HTTP requests (GET, GET)
|
31
|
+
def write_to_file(filename)
|
32
|
+
from = @header_offset + header_size
|
33
|
+
to = @header_offset + header_size + @compressed_size
|
34
|
+
|
35
|
+
decompress, finish = decompress_funcs
|
36
|
+
|
37
|
+
::File.open(filename, 'wb') do |out_file|
|
38
|
+
@range_request.get(from, to) do |chunk|
|
39
|
+
decompressed = decompress.call(chunk)
|
40
|
+
out_file.write(decompressed)
|
41
|
+
end
|
42
|
+
finish.call
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def header
|
49
|
+
@header ||= @range_request.get(@header_offset, @header_offset + 30)
|
50
|
+
@header
|
51
|
+
end
|
52
|
+
|
53
|
+
def header_size
|
54
|
+
# find out where the file contents start and how large the file is
|
55
|
+
file_name_length = header[26...28].unpack1('v')
|
56
|
+
extra_field_length = header[28...30].unpack1('v')
|
57
|
+
30 + file_name_length + extra_field_length
|
58
|
+
end
|
59
|
+
|
60
|
+
def decompress_funcs
|
61
|
+
# which compression method is used?
|
62
|
+
compression_method = header[8...10].unpack1('v')
|
63
|
+
|
64
|
+
case compression_method
|
65
|
+
when 0
|
66
|
+
# STORED content, doesn't require decompression
|
67
|
+
decompress = lambda { |input|
|
68
|
+
input
|
69
|
+
}
|
70
|
+
finish = -> {}
|
71
|
+
when 8
|
72
|
+
inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS)
|
73
|
+
# DEFLATED content, inflate it
|
74
|
+
decompress = lambda { |input|
|
75
|
+
inflater.inflate(input)
|
76
|
+
}
|
77
|
+
finish = lambda do
|
78
|
+
inflater.finish
|
79
|
+
inflater.close
|
80
|
+
end
|
81
|
+
else
|
82
|
+
raise HttpZip::ZipError,
|
83
|
+
"Unsupported compression method #{compression_method}. HttpZip only supports compression methods 0 (STORED) and 8 (DEFLATE)."
|
84
|
+
end
|
85
|
+
|
86
|
+
[decompress, finish]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HttpZip
|
4
|
+
# 256x256 bytes is the maximum length of the EOCD comment,
|
5
|
+
# 22 bytes is the remaining EOCD size
|
6
|
+
# 20 bytes is the EOCD64 locator size
|
7
|
+
MAXIMUM_EOCD_AND_EOCD64_LOCATOR_SIZE = (256 * 256) + 22 + 20
|
8
|
+
EOCD64_SIZE_WITHOUT_COMMENT = 56
|
9
|
+
|
10
|
+
# HttpZip reads ZIP-files over a HTTP connection that supports the Content-Range header.
|
11
|
+
# It is a helpful tool to extract single files from large HTTP archives without having to
|
12
|
+
# download them fully.
|
13
|
+
#
|
14
|
+
# Resources regarding the ZIP file format:
|
15
|
+
# https://en.wikipedia.org/wiki/ZIP_(file_format)
|
16
|
+
# https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
|
17
|
+
class File
|
18
|
+
# Create a HttpZip file object that is located at url.
|
19
|
+
#
|
20
|
+
# @param [String] url where the file is hosted
|
21
|
+
# @raise [ContentRangeError] if the server does not support the Range header
|
22
|
+
def initialize(url)
|
23
|
+
@url = url
|
24
|
+
@entries = nil
|
25
|
+
@range_request = RangeRequest.new(url)
|
26
|
+
@range_request.check_server_supports_content_range!
|
27
|
+
end
|
28
|
+
|
29
|
+
# Get all entries in the zip archive as an array of HttpZip::Entry.
|
30
|
+
# Makes up to 4 HTTP requests (HEAD, GET, GET, GET?)
|
31
|
+
def entries
|
32
|
+
return @entries if @entries
|
33
|
+
|
34
|
+
@entries = []
|
35
|
+
last_bytes_of_file = @range_request.last(MAXIMUM_EOCD_AND_EOCD64_LOCATOR_SIZE)
|
36
|
+
central_directory_bytes = get_central_directory(last_bytes_of_file)
|
37
|
+
|
38
|
+
# iterate through central directory and spit out file entries
|
39
|
+
until central_directory_bytes.empty?
|
40
|
+
# get information about the current file entry
|
41
|
+
file_header = HttpZip::Parser::CentralDirectoryFileHeader.new(central_directory_bytes)
|
42
|
+
@entries << HttpZip::Entry.new(
|
43
|
+
@url,
|
44
|
+
file_header.file_name,
|
45
|
+
file_header.header_offset,
|
46
|
+
file_header.compressed_size
|
47
|
+
)
|
48
|
+
|
49
|
+
# skip ahead to next file entry
|
50
|
+
central_directory_bytes = central_directory_bytes[(file_header.end_of_entry)..-1]
|
51
|
+
end
|
52
|
+
|
53
|
+
@entries
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# The central directory contains all file names within the archive as well as
|
59
|
+
# their offsets to the beginning of the archive file.
|
60
|
+
# Get the whole central directory so the client can traverse it and find the
|
61
|
+
# file entry they are looking for.
|
62
|
+
#
|
63
|
+
# makes 1 GET request for non-Zip64 files, 2 GET requests for Zip64 files
|
64
|
+
def get_central_directory(last_bytes_of_file)
|
65
|
+
central_directory = HttpZip::Parser::CentralDirectory.new(last_bytes_of_file)
|
66
|
+
if central_directory.eocd64_offset
|
67
|
+
# This is a Zip64 archive, so parse the EOCD64 block to find out where the central directory
|
68
|
+
# is located
|
69
|
+
eocd64_block = @range_request.get(
|
70
|
+
central_directory.eocd64_offset,
|
71
|
+
central_directory.eocd64_offset + EOCD64_SIZE_WITHOUT_COMMENT
|
72
|
+
)
|
73
|
+
central_directory.parse_eocd64!(eocd64_block)
|
74
|
+
end
|
75
|
+
|
76
|
+
# get the actual central directory
|
77
|
+
central_directory_end = central_directory.offset + central_directory.size
|
78
|
+
@range_request.get(central_directory.offset, central_directory_end)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HttpZip
|
4
|
+
module Parser
|
5
|
+
# Parses the End Of Central Directory (EOCD) block of a zip file.
|
6
|
+
class CentralDirectory
|
7
|
+
EOCD_BLOCK_IDENTIFIER = "\x50\x4B\x05\x06"
|
8
|
+
EOCD64_LOCATOR_BLOCK_IDENTIFER = "\x50\x4b\x06\x07"
|
9
|
+
EOCD64_BLOCK_IDENTIFER = "\x50\x4b\x06\x06"
|
10
|
+
|
11
|
+
attr_reader :size, :offset, :eocd64_offset
|
12
|
+
|
13
|
+
# Create a new instance of CentralDirectory.
|
14
|
+
#
|
15
|
+
# @param [String] end_of_central_directory_bytes the byte string including the EOCD block
|
16
|
+
def initialize(end_of_central_directory_bytes)
|
17
|
+
@bytes = end_of_central_directory_bytes
|
18
|
+
|
19
|
+
parse!
|
20
|
+
end
|
21
|
+
|
22
|
+
# Read the size and offset of the central directory from a Zip64 EOCD block.
|
23
|
+
#
|
24
|
+
# @param [String] eocd64_block the byte string including the EOCD block for a zip64 archive
|
25
|
+
# @raise [ZipError] if the byte stream does not contain a valid EOCD64 block
|
26
|
+
def parse_eocd64!(eocd64_block)
|
27
|
+
unless eocd64_block.start_with?(EOCD64_BLOCK_IDENTIFER)
|
28
|
+
raise ZipError, 'EOCD64 record not found'
|
29
|
+
end
|
30
|
+
|
31
|
+
@size, @offset = eocd64_block[40..-1].unpack('Q<Q<')
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
# Parses the size and offset of the central directory from the EOCD block.
|
37
|
+
# If this is a zip64 archive, the `eocd64_offset` will be set.
|
38
|
+
# @raise [ZipError] if this is a zip64 archive and the EOCD64 locator block is not found or
|
39
|
+
# the archive is split on multipe disks.
|
40
|
+
def parse!
|
41
|
+
eocd_block_index = get_eocd_block_index(@bytes)
|
42
|
+
eocd_block = @bytes[eocd_block_index..-1]
|
43
|
+
@size, @offset = eocd_block[12...20].unpack('VV')
|
44
|
+
return if @size != 0xFFFFFFFF && @offset != 0xFFFFFFFF
|
45
|
+
|
46
|
+
# there will be a zip64 EOCD locator block before the EOCD block
|
47
|
+
# parse the EOCD locator to find out where the EOCD64 block starts
|
48
|
+
eocd64_locator_block = @bytes[(eocd_block_index - 20)..eocd_block_index]
|
49
|
+
unless eocd64_locator_block.start_with?(EOCD64_LOCATOR_BLOCK_IDENTIFER)
|
50
|
+
raise ZipError, 'Could not locate the EOCD64 locator block'
|
51
|
+
end
|
52
|
+
|
53
|
+
@eocd64_offset, total_num_disks = eocd64_locator_block[8..-1].unpack('Q<V')
|
54
|
+
return if total_num_disks == 1
|
55
|
+
|
56
|
+
raise ZipError, 'Multi-disk archives are not supported'
|
57
|
+
end
|
58
|
+
|
59
|
+
# In order to find the central directory, we have to first find the EOCD block.
|
60
|
+
# The EOCD block (End Of Central Directory) identifies the end of the central directory
|
61
|
+
# of the zip file and contains the offset where the central directory is located and its length.
|
62
|
+
# The EOCD block is always at the end of the file.
|
63
|
+
def get_eocd_block_index(last_bytes_of_file)
|
64
|
+
# From the end of the file, get the maximum amount of bytes the EOCD block can have
|
65
|
+
candidate_eocd_block = last_bytes_of_file
|
66
|
+
|
67
|
+
# Scan the downloaded bytes from right to left to find the magic EOCD
|
68
|
+
# block identifier
|
69
|
+
eocd_block_start_index = nil
|
70
|
+
search_end_position = candidate_eocd_block.length
|
71
|
+
loop do
|
72
|
+
eocd_block_start_index = candidate_eocd_block.rindex(EOCD_BLOCK_IDENTIFIER,
|
73
|
+
search_end_position)
|
74
|
+
|
75
|
+
raise ZipError, 'Could not locate valid EOCD block' if eocd_block_start_index.nil?
|
76
|
+
|
77
|
+
# we have a candidate, verify that we found the actual eocd block start by
|
78
|
+
# checking whether its position + length matches the end of the file
|
79
|
+
comment_length = candidate_eocd_block[(eocd_block_start_index + 20)...(eocd_block_start_index + 22)].unpack1('v')
|
80
|
+
if (eocd_block_start_index + 22 + comment_length) == candidate_eocd_block.length
|
81
|
+
# we found it
|
82
|
+
break
|
83
|
+
end
|
84
|
+
|
85
|
+
search_end_position = eocd_block_start_index
|
86
|
+
end
|
87
|
+
|
88
|
+
eocd_block_start_index
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HttpZip
|
4
|
+
module Parser
|
5
|
+
# Parses the Central Directory File Header.
|
6
|
+
class CentralDirectoryFileHeader
|
7
|
+
ZIP64_EXTRA_FIELD_HEADER_ID = "\x01\x00"
|
8
|
+
CENTRAL_DIRECTORY_FILE_HEADER_IDENTIFIER = "\x50\x4B\x01\x02"
|
9
|
+
|
10
|
+
attr_reader(
|
11
|
+
:compressed_size,
|
12
|
+
:uncompressed_size,
|
13
|
+
:file_name_length,
|
14
|
+
:extra_field_length,
|
15
|
+
:file_comment_length,
|
16
|
+
:disk_number,
|
17
|
+
:internal_file_attributes,
|
18
|
+
:external_file_attributes,
|
19
|
+
:header_offset,
|
20
|
+
:file_name,
|
21
|
+
:end_of_entry
|
22
|
+
)
|
23
|
+
|
24
|
+
# Create a new instance of CentralDirectoryFileHeader.
|
25
|
+
#
|
26
|
+
# @param [String] file_header_bytes the byte string of the file header
|
27
|
+
# @raises [ZipError] if byte string doesn't not represent a valid file header
|
28
|
+
def initialize(file_header_bytes)
|
29
|
+
@bytes = file_header_bytes
|
30
|
+
unless @bytes.start_with?(CENTRAL_DIRECTORY_FILE_HEADER_IDENTIFIER)
|
31
|
+
raise ZipError, 'Central Directory File Header seems to be corrupt'
|
32
|
+
end
|
33
|
+
|
34
|
+
parse!
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
# Parses the fields from the Central Directory File Header,
|
40
|
+
# including data in Zip64 extra fields
|
41
|
+
def parse!
|
42
|
+
@compressed_size,
|
43
|
+
@uncompressed_size,
|
44
|
+
@file_name_length,
|
45
|
+
@extra_field_length,
|
46
|
+
@file_comment_length,
|
47
|
+
@disk_number,
|
48
|
+
@internal_file_attributes,
|
49
|
+
@external_file_attributes,
|
50
|
+
@header_offset = @bytes[20...46].unpack('VVvvvvvVV')
|
51
|
+
|
52
|
+
file_name_end = 46 + file_name_length
|
53
|
+
@file_name = @bytes[46...file_name_end]
|
54
|
+
@end_of_entry = file_name_end + @extra_field_length + @file_comment_length
|
55
|
+
|
56
|
+
# check if any of the values could not be represented by standard zip and will be stored in a
|
57
|
+
# Zip64 extra field
|
58
|
+
extra_field_bytes = @bytes[file_name_end...(file_name_end + @extra_field_length)]
|
59
|
+
parse_zip64_extra_field_if_present!(extra_field_bytes)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Parses the extra fields section of a Central Directory File Header in order to extract
|
63
|
+
# the larger values for uncompressed size, compressed size, header offset, and disk number
|
64
|
+
# of the ZIP file if they weren’t specified in the Central Directory File Header already.
|
65
|
+
#
|
66
|
+
# @param [String] full_extra_field_bytes the byte stream of the full extra fields
|
67
|
+
# section of this Central Directory File Header
|
68
|
+
def parse_zip64_extra_field_if_present!(full_extra_field_bytes)
|
69
|
+
remaining_extra_field_bytes = full_extra_field_bytes
|
70
|
+
until remaining_extra_field_bytes.empty?
|
71
|
+
# zipalign might fill up the extra fields with all zero characters,
|
72
|
+
# so we need to abort if there’s nothing of value in the extra fields
|
73
|
+
break if remaining_extra_field_bytes.delete("\0").empty?
|
74
|
+
|
75
|
+
record_length = remaining_extra_field_bytes[2...4].unpack1('v')
|
76
|
+
|
77
|
+
# did we find the Zip64 extra field?
|
78
|
+
if remaining_extra_field_bytes.start_with?(ZIP64_EXTRA_FIELD_HEADER_ID)
|
79
|
+
read_values_from_extra_field_bytes!(remaining_extra_field_bytes[2..-1])
|
80
|
+
break
|
81
|
+
end
|
82
|
+
|
83
|
+
total_extra_field_length = 2 + 2 + record_length
|
84
|
+
remaining_extra_field_bytes = remaining_extra_field_bytes[total_extra_field_length..-1]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Sets values for uncompressed size, compressed size, header offset, and disk number
|
89
|
+
# according to the values stored in the extra field.
|
90
|
+
#
|
91
|
+
# @param [String] extra_field_bytes the byte stream of the extra fields, starting right after
|
92
|
+
# the extra field header identifier
|
93
|
+
def read_values_from_extra_field_bytes!(extra_field_bytes)
|
94
|
+
# the zip64 extra field tries to store as little information as possible,
|
95
|
+
# so only the values too large for the non-zip64 file header will be stored here
|
96
|
+
ptr = 2 # ignore the size field, since it seems to be incorrect in some cases
|
97
|
+
if @uncompressed_size == 0xFFFFFFFF
|
98
|
+
@uncompressed_size = extra_field_bytes[ptr...(ptr + 8)].unpack1('Q<')
|
99
|
+
ptr += 8
|
100
|
+
end
|
101
|
+
if @compressed_size == 0xFFFFFFFF
|
102
|
+
@compressed_size = extra_field_bytes[ptr...(ptr + 8)].unpack1('Q<')
|
103
|
+
ptr += 8
|
104
|
+
end
|
105
|
+
if @header_offset == 0xFFFFFFFF
|
106
|
+
@header_offset = extra_field_bytes[ptr...(ptr + 8)].unpack1('Q<')
|
107
|
+
ptr += 8
|
108
|
+
end
|
109
|
+
if @disk_number == 0xFFFF
|
110
|
+
@disk_number = extra_field_bytes[ptr...(ptr + 4)].unpack1('V')
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'httparty'
|
4
|
+
|
5
|
+
module HttpZip
|
6
|
+
# Class to make Range requests to a HTTP server
|
7
|
+
class RangeRequest
|
8
|
+
# Create a new RangeRequest object
|
9
|
+
#
|
10
|
+
# @param [String] url remote file URL
|
11
|
+
def initialize(url)
|
12
|
+
@url = url
|
13
|
+
end
|
14
|
+
|
15
|
+
# Request a partial object via HTTP. If a block is given, yields the response body in chunks.
|
16
|
+
#
|
17
|
+
# @param [Integer] from start byte of the range to request. Inclusive.
|
18
|
+
# @param [Integer] to end byte of the range to request. Exclusive.
|
19
|
+
# @yield [chunk] yields a chunk of data to the block
|
20
|
+
# @raise [ContentRangeError] if the server responds with anything other than 206 Partial Content
|
21
|
+
def get(from, to)
|
22
|
+
options = { headers: { 'Range' => "bytes=#{from}-#{to - 1}" } }
|
23
|
+
options[:stream_body] = true if block_given?
|
24
|
+
|
25
|
+
response = HTTParty.get(@url, options) do |chunk|
|
26
|
+
yield chunk if block_given?
|
27
|
+
end
|
28
|
+
|
29
|
+
if response.code != 206
|
30
|
+
# oops, we downloaded the whole file
|
31
|
+
raise ContentRangeError, 'Server does not support the Range header'
|
32
|
+
end
|
33
|
+
|
34
|
+
response.body
|
35
|
+
end
|
36
|
+
|
37
|
+
# Request the last `num_bytes` bytes of the remote file via HTTP.
|
38
|
+
#
|
39
|
+
# @param [Integer] num_bytes number of bytes to request
|
40
|
+
# @raise [ContentRangeError] if the server responds with anything other than 206 Partial Content
|
41
|
+
def last(num_bytes)
|
42
|
+
response = HTTParty.get(@url, headers: { 'Range' => "bytes=-#{num_bytes}" })
|
43
|
+
if response.code != 206
|
44
|
+
# oops, we downloaded the whole file
|
45
|
+
raise ContentRangeError, 'Server does not support the Range header'
|
46
|
+
end
|
47
|
+
|
48
|
+
response.body
|
49
|
+
end
|
50
|
+
|
51
|
+
# Tests if the server supports the Range header by checking the "Accept-Ranges" header,
|
52
|
+
# otherwise raises an exception.
|
53
|
+
#
|
54
|
+
# @raise [ContentRangeError] if the server does not support the Range header
|
55
|
+
def check_server_supports_content_range!
|
56
|
+
return if self.class.server_supports_content_range?(@url)
|
57
|
+
|
58
|
+
raise ContentRangeError, 'Server does not support the Range header'
|
59
|
+
end
|
60
|
+
|
61
|
+
# Tests if the server supports the Range header by checking the "Accept-Ranges" header.
|
62
|
+
#
|
63
|
+
# @param [String] url remote file URL
|
64
|
+
# @return [Boolean] true if the server supports the Range header
|
65
|
+
def self.server_supports_content_range?(url)
|
66
|
+
response = HTTParty.head(url)
|
67
|
+
response.headers['Accept-Ranges'] && response.headers['Accept-Ranges'].downcase != 'none'
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/http_zip.rb
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'http_zip/version'
|
4
|
+
require 'http_zip/errors'
|
5
|
+
require 'http_zip/range_request'
|
6
|
+
require 'http_zip/entry'
|
7
|
+
require 'http_zip/file'
|
8
|
+
require 'http_zip/parser/central_directory_file_header'
|
9
|
+
require 'http_zip/parser/central_directory'
|
metadata
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: http_zip
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Marvin Killing
|
8
|
+
- Peter Retzlaff
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2022-04-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: httparty
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0.20'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0.20'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: bundler
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '2.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '2.0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: minitest
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '5.15'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '5.15'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rake
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '10.0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '10.0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: simplecov
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0.21'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0.21'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: webmock
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - "~>"
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '3.14'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - "~>"
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '3.14'
|
98
|
+
description:
|
99
|
+
email:
|
100
|
+
- pe.retzlaff@gmail.com
|
101
|
+
executables: []
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".gitignore"
|
106
|
+
- ".travis.yml"
|
107
|
+
- Gemfile
|
108
|
+
- LICENSE.txt
|
109
|
+
- README.md
|
110
|
+
- Rakefile
|
111
|
+
- bin/console
|
112
|
+
- bin/setup
|
113
|
+
- http_zip.gemspec
|
114
|
+
- lib/http_zip.rb
|
115
|
+
- lib/http_zip/entry.rb
|
116
|
+
- lib/http_zip/errors.rb
|
117
|
+
- lib/http_zip/file.rb
|
118
|
+
- lib/http_zip/parser/central_directory.rb
|
119
|
+
- lib/http_zip/parser/central_directory_file_header.rb
|
120
|
+
- lib/http_zip/range_request.rb
|
121
|
+
- lib/http_zip/version.rb
|
122
|
+
homepage: https://github.com/peret/http_zip
|
123
|
+
licenses:
|
124
|
+
- MIT
|
125
|
+
metadata:
|
126
|
+
homepage_uri: https://github.com/peret/http_zip
|
127
|
+
source_code_uri: https://github.com/peret/http_zip
|
128
|
+
post_install_message:
|
129
|
+
rdoc_options: []
|
130
|
+
require_paths:
|
131
|
+
- lib
|
132
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - ">="
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
|
+
requirements:
|
139
|
+
- - ">="
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
requirements: []
|
143
|
+
rubygems_version: 3.0.9
|
144
|
+
signing_key:
|
145
|
+
specification_version: 4
|
146
|
+
summary: HttpZip is a gem to extract individual files from a remote ZIP archive, without
|
147
|
+
the need to download the entire file.
|
148
|
+
test_files: []
|