http_zip 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c5c21ac606a405c8527ce48a94f760ecd195104a4ac84a25c84bbb095a748c21
4
+ data.tar.gz: 3b9c5d83fcbbbf09b858411fe10f56fc582738914bfbd5b54626571705eff72c
5
+ SHA512:
6
+ metadata.gz: 6770e900fbf8a657716f0426e394646ba1bcd796cce64317c9d34996bd792847d1cd147ac0c7a49a64641787519a15c4d2abcc8933f01379e75f9607359049c2
7
+ data.tar.gz: 1d85ea79fd3e03302e242c4fce86a4fdf45ead85e663f3a5b4b91cf5cb735081a5d50aecd2e7931de6ef7b1064dcc6d174c2b25222689cee24ba7d26cbe8c2e9
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ .DS_Store
10
+ Gemfile.lock
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.6.9
7
+ before_install: gem install bundler -v 2.0.2
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in http_zip.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2022 Marvin Killing, Peter Retzlaff
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,57 @@
1
+ # HttpZip
2
+
3
+ HttpZip is a Ruby gem to extract individual files from a remote ZIP archive, without the need to download the entire file.
4
+
5
+ If your Zip file is hosted on a server that supports Content-Range requests and you only want to extract individual files, you don't need to download
6
+ the entire archive to do that. HttpZip uses Content-Range requests to first read only the Central Directory of your archive and builds a list of entries
7
+ from that. You can then download and extract individual entries without downloading the entire archive.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ ```ruby
14
+ gem 'http_zip'
15
+ ```
16
+
17
+ And then execute:
18
+
19
+ $ bundle install
20
+
21
+ Or install it yourself as:
22
+
23
+ $ gem install http_zip
24
+
25
+ ## Usage
26
+
27
+ ```ruby
28
+ # Create a new HttpZip::File referencing your remote archive.
29
+ # This only makes a HEAD request to check the server for
30
+ # Range request support.
31
+ zip = HttpZip::File.new("https://www.example.org/archive.zip")
32
+
33
+ # Get a reference to a specific file.
34
+ # This only requests the archive's Central Directory Entry.
35
+ entry = zip.entries.find { |e| e.name == 'compressed.txt' }
36
+
37
+ # Read the extracted file contents into memory.
38
+ # This downloads the entry's compressed contents and uncompresses
39
+ # them locally.
40
+ content = entry.read
41
+ # You can also write the extracted entry directly to a local file.
42
+ entry.write_to_file('/path/extracted.txt')
43
+ ```
44
+
45
+ If the server that the zip file is hosted on doesn't support Range requests, HttpZip will throw `HttpZip::ContentRangeError`. If you want you can check this beforehand by calling:
46
+
47
+ ```ruby
48
+ HttpZip::RangeRequest.server_supports_content_range?(url)
49
+ ```
50
+
51
+ ## Contributing
52
+
53
+ Bug reports and pull requests are welcome on GitHub at https://github.com/peret/http_zip.
54
+
55
+ ## License
56
+
57
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << 'test'
8
+ t.libs << 'lib'
9
+ t.test_files = FileList['test/**/*_test.rb']
10
+ end
11
+
12
+ task default: :test
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'http_zip'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require 'irb'
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/http_zip.gemspec ADDED
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'http_zip/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'http_zip'
9
+ spec.version = HttpZip::VERSION
10
+ spec.authors = ['Marvin Killing', 'Peter Retzlaff']
11
+ spec.email = ['pe.retzlaff@gmail.com']
12
+
13
+ spec.summary = 'HttpZip is a gem to extract individual files from a remote ZIP archive, without the need to download the entire file.'
14
+ spec.homepage = 'https://github.com/peret/http_zip'
15
+ spec.license = 'MIT'
16
+
17
+ spec.metadata['homepage_uri'] = spec.homepage
18
+ spec.metadata['source_code_uri'] = 'https://github.com/peret/http_zip'
19
+
20
+ # Specify which files should be added to the gem when it is released.
21
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
24
+ end
25
+ spec.bindir = 'exe'
26
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ['lib']
28
+
29
+ spec.add_runtime_dependency 'httparty', '~> 0.20'
30
+
31
+ spec.add_development_dependency 'bundler', '~> 2.0'
32
+ spec.add_development_dependency 'minitest', '~> 5.15'
33
+ spec.add_development_dependency 'rake', '~> 10.0'
34
+ spec.add_development_dependency 'simplecov', '~> 0.21'
35
+ spec.add_development_dependency 'webmock', '~> 3.14'
36
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HttpZip
4
+ # Describes one entry in an HTTP zip archive
5
+ # @attr_reader [String] name filename of the entry
6
+ class Entry
7
+ attr_reader :name
8
+
9
+ def initialize(url, name, header_offset, central_directory_file_compressed_size)
10
+ @range_request = HttpZip::RangeRequest.new(url)
11
+ @name = name
12
+ @header_offset = header_offset
13
+ @compressed_size = central_directory_file_compressed_size
14
+ end
15
+
16
+ # Get the decompressed content of the file entry
17
+ # Makes 2 HTTP requests (GET, GET)
18
+ def read
19
+ # decompress the file
20
+ from = @header_offset + header_size
21
+ to = @header_offset + header_size + @compressed_size
22
+
23
+ decompress, _finish = decompress_funcs
24
+
25
+ compressed_contents = @range_request.get(from, to)
26
+ decompress.call(compressed_contents)
27
+ end
28
+
29
+ # Get the decompressed content of the file entry
30
+ # Makes 2 HTTP requests (GET, GET)
31
+ def write_to_file(filename)
32
+ from = @header_offset + header_size
33
+ to = @header_offset + header_size + @compressed_size
34
+
35
+ decompress, finish = decompress_funcs
36
+
37
+ ::File.open(filename, 'wb') do |out_file|
38
+ @range_request.get(from, to) do |chunk|
39
+ decompressed = decompress.call(chunk)
40
+ out_file.write(decompressed)
41
+ end
42
+ finish.call
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def header
49
+ @header ||= @range_request.get(@header_offset, @header_offset + 30)
50
+ @header
51
+ end
52
+
53
+ def header_size
54
+ # find out where the file contents start and how large the file is
55
+ file_name_length = header[26...28].unpack1('v')
56
+ extra_field_length = header[28...30].unpack1('v')
57
+ 30 + file_name_length + extra_field_length
58
+ end
59
+
60
+ def decompress_funcs
61
+ # which compression method is used?
62
+ compression_method = header[8...10].unpack1('v')
63
+
64
+ case compression_method
65
+ when 0
66
+ # STORED content, doesn't require decompression
67
+ decompress = lambda { |input|
68
+ input
69
+ }
70
+ finish = -> {}
71
+ when 8
72
+ inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS)
73
+ # DEFLATED content, inflate it
74
+ decompress = lambda { |input|
75
+ inflater.inflate(input)
76
+ }
77
+ finish = lambda do
78
+ inflater.finish
79
+ inflater.close
80
+ end
81
+ else
82
+ raise HttpZip::ZipError,
83
+ "Unsupported compression method #{compression_method}. HttpZip only supports compression methods 0 (STORED) and 8 (DEFLATE)."
84
+ end
85
+
86
+ [decompress, finish]
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HttpZip
4
+ class Error < StandardError; end
5
+ class ContentRangeError < Error; end
6
+ class ZipError < Error; end
7
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HttpZip
4
+ # 256x256 bytes is the maximum length of the EOCD comment,
5
+ # 22 bytes is the remaining EOCD size
6
+ # 20 bytes is the EOCD64 locator size
7
+ MAXIMUM_EOCD_AND_EOCD64_LOCATOR_SIZE = (256 * 256) + 22 + 20
8
+ EOCD64_SIZE_WITHOUT_COMMENT = 56
9
+
10
+ # HttpZip reads ZIP-files over a HTTP connection that supports the Content-Range header.
11
+ # It is a helpful tool to extract single files from large HTTP archives without having to
12
+ # download them fully.
13
+ #
14
+ # Resources regarding the ZIP file format:
15
+ # https://en.wikipedia.org/wiki/ZIP_(file_format)
16
+ # https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
17
+ class File
18
+ # Create a HttpZip file object that is located at url.
19
+ #
20
+ # @param [String] url where the file is hosted
21
+ # @raise [ContentRangeError] if the server does not support the Range header
22
+ def initialize(url)
23
+ @url = url
24
+ @entries = nil
25
+ @range_request = RangeRequest.new(url)
26
+ @range_request.check_server_supports_content_range!
27
+ end
28
+
29
+ # Get all entries in the zip archive as an array of HttpZip::Entry.
30
+ # Makes up to 4 HTTP requests (HEAD, GET, GET, GET?)
31
+ def entries
32
+ return @entries if @entries
33
+
34
+ @entries = []
35
+ last_bytes_of_file = @range_request.last(MAXIMUM_EOCD_AND_EOCD64_LOCATOR_SIZE)
36
+ central_directory_bytes = get_central_directory(last_bytes_of_file)
37
+
38
+ # iterate through central directory and spit out file entries
39
+ until central_directory_bytes.empty?
40
+ # get information about the current file entry
41
+ file_header = HttpZip::Parser::CentralDirectoryFileHeader.new(central_directory_bytes)
42
+ @entries << HttpZip::Entry.new(
43
+ @url,
44
+ file_header.file_name,
45
+ file_header.header_offset,
46
+ file_header.compressed_size
47
+ )
48
+
49
+ # skip ahead to next file entry
50
+ central_directory_bytes = central_directory_bytes[(file_header.end_of_entry)..-1]
51
+ end
52
+
53
+ @entries
54
+ end
55
+
56
+ private
57
+
58
+ # The central directory contains all file names within the archive as well as
59
+ # their offsets to the beginning of the archive file.
60
+ # Get the whole central directory so the client can traverse it and find the
61
+ # file entry they are looking for.
62
+ #
63
+ # makes 1 GET request for non-Zip64 files, 2 GET requests for Zip64 files
64
+ def get_central_directory(last_bytes_of_file)
65
+ central_directory = HttpZip::Parser::CentralDirectory.new(last_bytes_of_file)
66
+ if central_directory.eocd64_offset
67
+ # This is a Zip64 archive, so parse the EOCD64 block to find out where the central directory
68
+ # is located
69
+ eocd64_block = @range_request.get(
70
+ central_directory.eocd64_offset,
71
+ central_directory.eocd64_offset + EOCD64_SIZE_WITHOUT_COMMENT
72
+ )
73
+ central_directory.parse_eocd64!(eocd64_block)
74
+ end
75
+
76
+ # get the actual central directory
77
+ central_directory_end = central_directory.offset + central_directory.size
78
+ @range_request.get(central_directory.offset, central_directory_end)
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HttpZip
4
+ module Parser
5
+ # Parses the End Of Central Directory (EOCD) block of a zip file.
6
+ class CentralDirectory
7
+ EOCD_BLOCK_IDENTIFIER = "\x50\x4B\x05\x06"
8
+ EOCD64_LOCATOR_BLOCK_IDENTIFER = "\x50\x4b\x06\x07"
9
+ EOCD64_BLOCK_IDENTIFER = "\x50\x4b\x06\x06"
10
+
11
+ attr_reader :size, :offset, :eocd64_offset
12
+
13
+ # Create a new instance of CentralDirectory.
14
+ #
15
+ # @param [String] end_of_central_directory_bytes the byte string including the EOCD block
16
+ def initialize(end_of_central_directory_bytes)
17
+ @bytes = end_of_central_directory_bytes
18
+
19
+ parse!
20
+ end
21
+
22
+ # Read the size and offset of the central directory from a Zip64 EOCD block.
23
+ #
24
+ # @param [String] eocd64_block the byte string including the EOCD block for a zip64 archive
25
+ # @raise [ZipError] if the byte stream does not contain a valid EOCD64 block
26
+ def parse_eocd64!(eocd64_block)
27
+ unless eocd64_block.start_with?(EOCD64_BLOCK_IDENTIFER)
28
+ raise ZipError, 'EOCD64 record not found'
29
+ end
30
+
31
+ @size, @offset = eocd64_block[40..-1].unpack('Q<Q<')
32
+ end
33
+
34
+ private
35
+
36
+ # Parses the size and offset of the central directory from the EOCD block.
37
+ # If this is a zip64 archive, the `eocd64_offset` will be set.
38
+ # @raise [ZipError] if this is a zip64 archive and the EOCD64 locator block is not found or
39
+ # the archive is split on multipe disks.
40
+ def parse!
41
+ eocd_block_index = get_eocd_block_index(@bytes)
42
+ eocd_block = @bytes[eocd_block_index..-1]
43
+ @size, @offset = eocd_block[12...20].unpack('VV')
44
+ return if @size != 0xFFFFFFFF && @offset != 0xFFFFFFFF
45
+
46
+ # there will be a zip64 EOCD locator block before the EOCD block
47
+ # parse the EOCD locator to find out where the EOCD64 block starts
48
+ eocd64_locator_block = @bytes[(eocd_block_index - 20)..eocd_block_index]
49
+ unless eocd64_locator_block.start_with?(EOCD64_LOCATOR_BLOCK_IDENTIFER)
50
+ raise ZipError, 'Could not locate the EOCD64 locator block'
51
+ end
52
+
53
+ @eocd64_offset, total_num_disks = eocd64_locator_block[8..-1].unpack('Q<V')
54
+ return if total_num_disks == 1
55
+
56
+ raise ZipError, 'Multi-disk archives are not supported'
57
+ end
58
+
59
+ # In order to find the central directory, we have to first find the EOCD block.
60
+ # The EOCD block (End Of Central Directory) identifies the end of the central directory
61
+ # of the zip file and contains the offset where the central directory is located and its length.
62
+ # The EOCD block is always at the end of the file.
63
+ def get_eocd_block_index(last_bytes_of_file)
64
+ # From the end of the file, get the maximum amount of bytes the EOCD block can have
65
+ candidate_eocd_block = last_bytes_of_file
66
+
67
+ # Scan the downloaded bytes from right to left to find the magic EOCD
68
+ # block identifier
69
+ eocd_block_start_index = nil
70
+ search_end_position = candidate_eocd_block.length
71
+ loop do
72
+ eocd_block_start_index = candidate_eocd_block.rindex(EOCD_BLOCK_IDENTIFIER,
73
+ search_end_position)
74
+
75
+ raise ZipError, 'Could not locate valid EOCD block' if eocd_block_start_index.nil?
76
+
77
+ # we have a candidate, verify that we found the actual eocd block start by
78
+ # checking whether its position + length matches the end of the file
79
+ comment_length = candidate_eocd_block[(eocd_block_start_index + 20)...(eocd_block_start_index + 22)].unpack1('v')
80
+ if (eocd_block_start_index + 22 + comment_length) == candidate_eocd_block.length
81
+ # we found it
82
+ break
83
+ end
84
+
85
+ search_end_position = eocd_block_start_index
86
+ end
87
+
88
+ eocd_block_start_index
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HttpZip
4
+ module Parser
5
+ # Parses the Central Directory File Header.
6
+ class CentralDirectoryFileHeader
7
+ ZIP64_EXTRA_FIELD_HEADER_ID = "\x01\x00"
8
+ CENTRAL_DIRECTORY_FILE_HEADER_IDENTIFIER = "\x50\x4B\x01\x02"
9
+
10
+ attr_reader(
11
+ :compressed_size,
12
+ :uncompressed_size,
13
+ :file_name_length,
14
+ :extra_field_length,
15
+ :file_comment_length,
16
+ :disk_number,
17
+ :internal_file_attributes,
18
+ :external_file_attributes,
19
+ :header_offset,
20
+ :file_name,
21
+ :end_of_entry
22
+ )
23
+
24
+ # Create a new instance of CentralDirectoryFileHeader.
25
+ #
26
+ # @param [String] file_header_bytes the byte string of the file header
27
+ # @raises [ZipError] if byte string doesn't not represent a valid file header
28
+ def initialize(file_header_bytes)
29
+ @bytes = file_header_bytes
30
+ unless @bytes.start_with?(CENTRAL_DIRECTORY_FILE_HEADER_IDENTIFIER)
31
+ raise ZipError, 'Central Directory File Header seems to be corrupt'
32
+ end
33
+
34
+ parse!
35
+ end
36
+
37
+ private
38
+
39
+ # Parses the fields from the Central Directory File Header,
40
+ # including data in Zip64 extra fields
41
+ def parse!
42
+ @compressed_size,
43
+ @uncompressed_size,
44
+ @file_name_length,
45
+ @extra_field_length,
46
+ @file_comment_length,
47
+ @disk_number,
48
+ @internal_file_attributes,
49
+ @external_file_attributes,
50
+ @header_offset = @bytes[20...46].unpack('VVvvvvvVV')
51
+
52
+ file_name_end = 46 + file_name_length
53
+ @file_name = @bytes[46...file_name_end]
54
+ @end_of_entry = file_name_end + @extra_field_length + @file_comment_length
55
+
56
+ # check if any of the values could not be represented by standard zip and will be stored in a
57
+ # Zip64 extra field
58
+ extra_field_bytes = @bytes[file_name_end...(file_name_end + @extra_field_length)]
59
+ parse_zip64_extra_field_if_present!(extra_field_bytes)
60
+ end
61
+
62
+ # Parses the extra fields section of a Central Directory File Header in order to extract
63
+ # the larger values for uncompressed size, compressed size, header offset, and disk number
64
+ # of the ZIP file if they weren’t specified in the Central Directory File Header already.
65
+ #
66
+ # @param [String] full_extra_field_bytes the byte stream of the full extra fields
67
+ # section of this Central Directory File Header
68
+ def parse_zip64_extra_field_if_present!(full_extra_field_bytes)
69
+ remaining_extra_field_bytes = full_extra_field_bytes
70
+ until remaining_extra_field_bytes.empty?
71
+ # zipalign might fill up the extra fields with all zero characters,
72
+ # so we need to abort if there’s nothing of value in the extra fields
73
+ break if remaining_extra_field_bytes.delete("\0").empty?
74
+
75
+ record_length = remaining_extra_field_bytes[2...4].unpack1('v')
76
+
77
+ # did we find the Zip64 extra field?
78
+ if remaining_extra_field_bytes.start_with?(ZIP64_EXTRA_FIELD_HEADER_ID)
79
+ read_values_from_extra_field_bytes!(remaining_extra_field_bytes[2..-1])
80
+ break
81
+ end
82
+
83
+ total_extra_field_length = 2 + 2 + record_length
84
+ remaining_extra_field_bytes = remaining_extra_field_bytes[total_extra_field_length..-1]
85
+ end
86
+ end
87
+
88
+ # Sets values for uncompressed size, compressed size, header offset, and disk number
89
+ # according to the values stored in the extra field.
90
+ #
91
+ # @param [String] extra_field_bytes the byte stream of the extra fields, starting right after
92
+ # the extra field header identifier
93
+ def read_values_from_extra_field_bytes!(extra_field_bytes)
94
+ # the zip64 extra field tries to store as little information as possible,
95
+ # so only the values too large for the non-zip64 file header will be stored here
96
+ ptr = 2 # ignore the size field, since it seems to be incorrect in some cases
97
+ if @uncompressed_size == 0xFFFFFFFF
98
+ @uncompressed_size = extra_field_bytes[ptr...(ptr + 8)].unpack1('Q<')
99
+ ptr += 8
100
+ end
101
+ if @compressed_size == 0xFFFFFFFF
102
+ @compressed_size = extra_field_bytes[ptr...(ptr + 8)].unpack1('Q<')
103
+ ptr += 8
104
+ end
105
+ if @header_offset == 0xFFFFFFFF
106
+ @header_offset = extra_field_bytes[ptr...(ptr + 8)].unpack1('Q<')
107
+ ptr += 8
108
+ end
109
+ if @disk_number == 0xFFFF
110
+ @disk_number = extra_field_bytes[ptr...(ptr + 4)].unpack1('V')
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'httparty'
4
+
5
+ module HttpZip
6
+ # Class to make Range requests to a HTTP server
7
+ class RangeRequest
8
+ # Create a new RangeRequest object
9
+ #
10
+ # @param [String] url remote file URL
11
+ def initialize(url)
12
+ @url = url
13
+ end
14
+
15
+ # Request a partial object via HTTP. If a block is given, yields the response body in chunks.
16
+ #
17
+ # @param [Integer] from start byte of the range to request. Inclusive.
18
+ # @param [Integer] to end byte of the range to request. Exclusive.
19
+ # @yield [chunk] yields a chunk of data to the block
20
+ # @raise [ContentRangeError] if the server responds with anything other than 206 Partial Content
21
+ def get(from, to)
22
+ options = { headers: { 'Range' => "bytes=#{from}-#{to - 1}" } }
23
+ options[:stream_body] = true if block_given?
24
+
25
+ response = HTTParty.get(@url, options) do |chunk|
26
+ yield chunk if block_given?
27
+ end
28
+
29
+ if response.code != 206
30
+ # oops, we downloaded the whole file
31
+ raise ContentRangeError, 'Server does not support the Range header'
32
+ end
33
+
34
+ response.body
35
+ end
36
+
37
+ # Request the last `num_bytes` bytes of the remote file via HTTP.
38
+ #
39
+ # @param [Integer] num_bytes number of bytes to request
40
+ # @raise [ContentRangeError] if the server responds with anything other than 206 Partial Content
41
+ def last(num_bytes)
42
+ response = HTTParty.get(@url, headers: { 'Range' => "bytes=-#{num_bytes}" })
43
+ if response.code != 206
44
+ # oops, we downloaded the whole file
45
+ raise ContentRangeError, 'Server does not support the Range header'
46
+ end
47
+
48
+ response.body
49
+ end
50
+
51
+ # Tests if the server supports the Range header by checking the "Accept-Ranges" header,
52
+ # otherwise raises an exception.
53
+ #
54
+ # @raise [ContentRangeError] if the server does not support the Range header
55
+ def check_server_supports_content_range!
56
+ return if self.class.server_supports_content_range?(@url)
57
+
58
+ raise ContentRangeError, 'Server does not support the Range header'
59
+ end
60
+
61
+ # Tests if the server supports the Range header by checking the "Accept-Ranges" header.
62
+ #
63
+ # @param [String] url remote file URL
64
+ # @return [Boolean] true if the server supports the Range header
65
+ def self.server_supports_content_range?(url)
66
+ response = HTTParty.head(url)
67
+ response.headers['Accept-Ranges'] && response.headers['Accept-Ranges'].downcase != 'none'
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HttpZip
4
+ VERSION = '1.0.0'
5
+ end
data/lib/http_zip.rb ADDED
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'http_zip/version'
4
+ require 'http_zip/errors'
5
+ require 'http_zip/range_request'
6
+ require 'http_zip/entry'
7
+ require 'http_zip/file'
8
+ require 'http_zip/parser/central_directory_file_header'
9
+ require 'http_zip/parser/central_directory'
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: http_zip
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Marvin Killing
8
+ - Peter Retzlaff
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2022-04-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: httparty
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '0.20'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '0.20'
28
+ - !ruby/object:Gem::Dependency
29
+ name: bundler
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '2.0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '2.0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: minitest
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '5.15'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '5.15'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rake
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '10.0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '10.0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: simplecov
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '0.21'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '0.21'
84
+ - !ruby/object:Gem::Dependency
85
+ name: webmock
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - "~>"
89
+ - !ruby/object:Gem::Version
90
+ version: '3.14'
91
+ type: :development
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '3.14'
98
+ description:
99
+ email:
100
+ - pe.retzlaff@gmail.com
101
+ executables: []
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - ".gitignore"
106
+ - ".travis.yml"
107
+ - Gemfile
108
+ - LICENSE.txt
109
+ - README.md
110
+ - Rakefile
111
+ - bin/console
112
+ - bin/setup
113
+ - http_zip.gemspec
114
+ - lib/http_zip.rb
115
+ - lib/http_zip/entry.rb
116
+ - lib/http_zip/errors.rb
117
+ - lib/http_zip/file.rb
118
+ - lib/http_zip/parser/central_directory.rb
119
+ - lib/http_zip/parser/central_directory_file_header.rb
120
+ - lib/http_zip/range_request.rb
121
+ - lib/http_zip/version.rb
122
+ homepage: https://github.com/peret/http_zip
123
+ licenses:
124
+ - MIT
125
+ metadata:
126
+ homepage_uri: https://github.com/peret/http_zip
127
+ source_code_uri: https://github.com/peret/http_zip
128
+ post_install_message:
129
+ rdoc_options: []
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ">="
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ required_rubygems_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ requirements: []
143
+ rubygems_version: 3.0.9
144
+ signing_key:
145
+ specification_version: 4
146
+ summary: HttpZip is a gem to extract individual files from a remote ZIP archive, without
147
+ the need to download the entire file.
148
+ test_files: []