format_parser 1.7.0 → 2.0.0.pre.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +6 -11
- data/CHANGELOG.md +6 -0
- data/format_parser.gemspec +10 -11
- data/lib/care.rb +5 -11
- data/lib/format_parser/version.rb +1 -1
- data/lib/format_parser.rb +8 -11
- data/lib/io_utils.rb +2 -6
- data/lib/parsers/aac_parser/adts_header_info.rb +3 -9
- data/lib/parsers/dpx_parser/dpx_structs.rb +1 -1
- data/lib/parsers/exif_parser.rb +2 -4
- data/lib/parsers/fdx_parser.rb +2 -2
- data/lib/parsers/flac_parser.rb +2 -6
- data/lib/parsers/jpeg_parser.rb +2 -2
- data/lib/parsers/moov_parser.rb +5 -7
- data/lib/parsers/mp3_parser.rb +2 -6
- data/lib/parsers/mpeg_parser.rb +1 -3
- data/lib/parsers/wav_parser.rb +9 -12
- data/lib/parsers/zip_parser/file_reader.rb +45 -70
- data/lib/parsers/zip_parser.rb +1 -1
- data/lib/read_limiter.rb +8 -16
- data/lib/remote_io.rb +67 -34
- data/lib/string.rb +9 -0
- data/spec/attributes_json_spec.rb +0 -3
- data/spec/remote_fetching_spec.rb +3 -8
- data/spec/remote_io_spec.rb +231 -92
- data/spec/spec_helper.rb +4 -0
- metadata +47 -72
data/lib/read_limiter.rb
CHANGED
|
@@ -45,9 +45,7 @@ class FormatParser::ReadLimiter
|
|
|
45
45
|
# @return Integer
|
|
46
46
|
def seek(to)
|
|
47
47
|
@seeks += 1
|
|
48
|
-
if @max_seeks && @seeks > @max_seeks
|
|
49
|
-
raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks
|
|
50
|
-
end
|
|
48
|
+
raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks if @max_seeks && @seeks > @max_seeks
|
|
51
49
|
@io.seek(to)
|
|
52
50
|
end
|
|
53
51
|
|
|
@@ -60,26 +58,20 @@ class FormatParser::ReadLimiter
|
|
|
60
58
|
@bytes += n_bytes
|
|
61
59
|
@reads += 1
|
|
62
60
|
|
|
63
|
-
if @max_bytes && @bytes > @max_bytes
|
|
64
|
-
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
if @max_reads && @reads > @max_reads
|
|
68
|
-
raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads
|
|
69
|
-
end
|
|
61
|
+
raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes if @max_bytes && @bytes > @max_bytes
|
|
62
|
+
raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads if @max_reads && @reads > @max_reads
|
|
70
63
|
|
|
71
64
|
@io.read(n_bytes)
|
|
72
65
|
end
|
|
73
66
|
|
|
74
67
|
# Sends the metrics about the state of this ReadLimiter to a Measurometer
|
|
75
68
|
#
|
|
76
|
-
# @param
|
|
77
|
-
# `format_parser.TIFF.read_limiter.num_seeks` and so forth
|
|
69
|
+
# @param parser[String] the parser to add as a tag.
|
|
78
70
|
# @return void
|
|
79
|
-
def send_metrics(
|
|
80
|
-
Measurometer.add_distribution_value('format_parser
|
|
81
|
-
Measurometer.add_distribution_value('format_parser
|
|
82
|
-
Measurometer.add_distribution_value('format_parser
|
|
71
|
+
def send_metrics(parser)
|
|
72
|
+
Measurometer.add_distribution_value('format_parser.read_limiter.num_seeks', @seeks, parser: parser)
|
|
73
|
+
Measurometer.add_distribution_value('format_parser.read_limiter.num_reads', @reads, parser: parser)
|
|
74
|
+
Measurometer.add_distribution_value('format_parser.read_limiter.read_bytes', @bytes, parser: parser)
|
|
83
75
|
end
|
|
84
76
|
|
|
85
77
|
# Resets all the recorded call counters so that the object can be reused for the next parser,
|
data/lib/remote_io.rb
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
+
require 'net/http'
|
|
2
|
+
|
|
1
3
|
# Acts as a wrapper for turning a given URL into an IO object
|
|
2
|
-
# you can read from and seek in.
|
|
3
|
-
# to perform fetches, so if you apply Faraday configuration
|
|
4
|
-
# tweaks using `Faraday.default_connection = ...` these will
|
|
5
|
-
# take effect for these RemoteIO objects as well
|
|
4
|
+
# you can read from and seek in.
|
|
6
5
|
class FormatParser::RemoteIO
|
|
7
6
|
class UpstreamError < StandardError
|
|
8
7
|
# @return Integer
|
|
9
8
|
attr_reader :status_code
|
|
9
|
+
|
|
10
10
|
def initialize(status_code, message)
|
|
11
|
-
@status_code = status_code
|
|
11
|
+
@status_code = Integer(status_code)
|
|
12
12
|
super(message)
|
|
13
13
|
end
|
|
14
14
|
end
|
|
@@ -23,13 +23,19 @@ class FormatParser::RemoteIO
|
|
|
23
23
|
class InvalidRequest < UpstreamError
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
-
#
|
|
26
|
+
# Represents a failure where the maximum amount of
|
|
27
|
+
# redirect requests are exceeded.
|
|
28
|
+
class RedirectLimitReached < UpstreamError
|
|
29
|
+
def initialize(uri)
|
|
30
|
+
super(504, "Too many redirects; last one to: #{uri}")
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# @param uri[String, URI::Generic] the remote URL to obtain
|
|
27
35
|
# @param headers[Hash] (optional) the HTTP headers to be used in the HTTP request
|
|
28
36
|
def initialize(uri, headers: {})
|
|
29
|
-
require 'faraday'
|
|
30
|
-
require 'faraday_middleware/response/follow_redirects'
|
|
31
37
|
@headers = headers
|
|
32
|
-
@uri = uri
|
|
38
|
+
@uri = URI(uri)
|
|
33
39
|
@pos = 0
|
|
34
40
|
@remote_size = false
|
|
35
41
|
end
|
|
@@ -63,7 +69,7 @@ class FormatParser::RemoteIO
|
|
|
63
69
|
# @return [String] the read bytes
|
|
64
70
|
def read(n_bytes)
|
|
65
71
|
http_range = (@pos..(@pos + n_bytes - 1))
|
|
66
|
-
maybe_size, maybe_body = Measurometer.instrument('format_parser.
|
|
72
|
+
maybe_size, maybe_body = Measurometer.instrument('format_parser.remote_io.read') { request_range(http_range) }
|
|
67
73
|
if maybe_size && maybe_body
|
|
68
74
|
@remote_size = maybe_size
|
|
69
75
|
@pos += maybe_body.bytesize
|
|
@@ -73,23 +79,39 @@ class FormatParser::RemoteIO
|
|
|
73
79
|
|
|
74
80
|
protected
|
|
75
81
|
|
|
82
|
+
REDIRECT_LIMIT = 3
|
|
83
|
+
UNSAFE_URI_CHARS = %r{[^\-_.!~*'()a-zA-Z\d;/?:@&=+$,\[\]%]}
|
|
84
|
+
|
|
85
|
+
# Generate the URI to fetch from following a redirect response.
|
|
86
|
+
#
|
|
87
|
+
# @param location[String] The new URI reference, as provided by the Location header of the previous response.
|
|
88
|
+
# @param previous_uri[URI] The URI used in the previous request.
|
|
89
|
+
def redirect_uri(location, previous_uri)
|
|
90
|
+
# Escape unsafe characters in location. Use location as new URI if absolute, otherwise use it to replace the path of
|
|
91
|
+
# the previous URI.
|
|
92
|
+
new_uri = previous_uri.merge(location.to_s.gsub(UNSAFE_URI_CHARS) do |unsafe_char|
|
|
93
|
+
"%#{unsafe_char.unpack('H2' * unsafe_char.bytesize).join('%').upcase}"
|
|
94
|
+
end)
|
|
95
|
+
# Keep previous URI's fragment if not present in location (https://www.rfc-editor.org/rfc/rfc9110.html#section-10.2.2-5)
|
|
96
|
+
new_uri.fragment = previous_uri.fragment unless new_uri.fragment
|
|
97
|
+
new_uri
|
|
98
|
+
end
|
|
99
|
+
|
|
76
100
|
# Only used internally when reading the remote file
|
|
77
101
|
#
|
|
78
|
-
# @param range[Range]
|
|
79
|
-
# @
|
|
80
|
-
|
|
102
|
+
# @param range[Range] The HTTP range of data to fetch from remote
|
|
103
|
+
# @param uri[URI] The URI to fetch from
|
|
104
|
+
# @param redirects[Integer] The amount of remaining permitted redirects
|
|
105
|
+
# @return [[Integer, String]] The response body of the ranged request
|
|
106
|
+
def request_range(range, uri = @uri, redirects = REDIRECT_LIMIT)
|
|
81
107
|
# We use a GET and not a HEAD request followed by a GET because
|
|
82
108
|
# S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
|
|
83
109
|
# combine the first GET of a segment and retrieving the size of the resource
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
# we still need the default adapter, more details: https://blog.thecodewhisperer.com/permalink/losing-time-to-faraday
|
|
87
|
-
faraday.adapter Faraday.default_adapter
|
|
110
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
|
|
111
|
+
http.request_get(uri, @headers.merge({ 'range' => 'bytes=%d-%d' % [range.begin, range.end] }))
|
|
88
112
|
end
|
|
89
|
-
response
|
|
90
|
-
|
|
91
|
-
case response.status
|
|
92
|
-
when 200
|
|
113
|
+
case response
|
|
114
|
+
when Net::HTTPOK
|
|
93
115
|
# S3 returns 200 when you request a Range that is fully satisfied by the entire object,
|
|
94
116
|
# we take that into account here. Also, for very tiny responses (and also for empty responses)
|
|
95
117
|
# the responses are going to be 200 which does not mean we cannot proceed
|
|
@@ -100,16 +122,16 @@ class FormatParser::RemoteIO
|
|
|
100
122
|
error_message = [
|
|
101
123
|
"We requested #{requested_range_size} bytes, but the server sent us more",
|
|
102
124
|
"(#{response_size} bytes) - it likely has no `Range:` support.",
|
|
103
|
-
"The error occurred when talking to #{
|
|
125
|
+
"The error occurred when talking to #{uri}"
|
|
104
126
|
]
|
|
105
|
-
raise InvalidRequest.new(response.
|
|
127
|
+
raise InvalidRequest.new(response.code, error_message.join("\n"))
|
|
106
128
|
end
|
|
107
129
|
[response_size, response.body]
|
|
108
|
-
when
|
|
130
|
+
when Net::HTTPPartialContent
|
|
109
131
|
# Figure out of the server supports content ranges, if it doesn't we have no
|
|
110
132
|
# business working with that server
|
|
111
|
-
range_header = response
|
|
112
|
-
raise InvalidRequest.new(response.
|
|
133
|
+
range_header = response['Content-Range']
|
|
134
|
+
raise InvalidRequest.new(response.code, "The server replied with 206 status but no Content-Range at #{uri}") unless range_header
|
|
113
135
|
|
|
114
136
|
# "Content-Range: bytes 0-0/307404381" is how the response header is structured
|
|
115
137
|
size = range_header[/\/(\d+)$/, 1].to_i
|
|
@@ -117,19 +139,30 @@ class FormatParser::RemoteIO
|
|
|
117
139
|
# If we request a _larger_ range than what can be satisfied by the server,
|
|
118
140
|
# the response is going to only contain what _can_ be sent and the status is also going
|
|
119
141
|
# to be 206
|
|
120
|
-
|
|
121
|
-
when
|
|
142
|
+
[size, response.body]
|
|
143
|
+
when Net::HTTPMovedPermanently, Net::HTTPFound, Net::HTTPSeeOther, Net::HTTPTemporaryRedirect, Net::HTTPPermanentRedirect
|
|
144
|
+
raise RedirectLimitReached, uri if redirects == 0
|
|
145
|
+
location = response['location']
|
|
146
|
+
if location
|
|
147
|
+
new_uri = redirect_uri(location, uri)
|
|
148
|
+
# Clear the Authorization header if the new URI has a different host.
|
|
149
|
+
@headers.delete('Authorization') unless [@uri.scheme, @uri.host, @uri.port] == [new_uri.scheme, new_uri.host, new_uri.port]
|
|
150
|
+
request_range(range, new_uri, redirects - 1)
|
|
151
|
+
else
|
|
152
|
+
raise InvalidRequest.new(response.code, "Server at #{uri} replied with a #{response.code}, indicating redirection; however, the location header was empty.")
|
|
153
|
+
end
|
|
154
|
+
when Net::HTTPRangeNotSatisfiable
|
|
122
155
|
# We return `nil` if we tried to read past the end of the IO,
|
|
123
156
|
# which satisfies the Ruby IO convention. The caller should deal with `nil` being the result of a read()
|
|
124
157
|
# S3 will also handily _not_ supply us with the Content-Range of the actual resource, so we
|
|
125
158
|
# cannot hint size with this response - at lease not when working with S3
|
|
126
|
-
|
|
127
|
-
when
|
|
128
|
-
Measurometer.increment_counter('format_parser.
|
|
129
|
-
raise IntermittentFailure.new(response.
|
|
159
|
+
nil
|
|
160
|
+
when Net::HTTPServerError
|
|
161
|
+
Measurometer.increment_counter('format_parser.remote_io.upstream50x_errors', 1)
|
|
162
|
+
raise IntermittentFailure.new(response.code, "Server at #{uri} replied with a #{response.code} and we might want to retry")
|
|
130
163
|
else
|
|
131
|
-
Measurometer.increment_counter('format_parser.
|
|
132
|
-
raise InvalidRequest.new(response.
|
|
164
|
+
Measurometer.increment_counter('format_parser.remote_io.invalid_request_errors', 1)
|
|
165
|
+
raise InvalidRequest.new(response.code, "Server at #{uri} replied with a #{response.code} and refused our request")
|
|
133
166
|
end
|
|
134
167
|
end
|
|
135
168
|
end
|
data/lib/string.rb
ADDED
|
@@ -106,9 +106,6 @@ describe FormatParser::AttributesJSON do
|
|
|
106
106
|
struct: Struct.new(:key).new('Value'),
|
|
107
107
|
content: "\x01\xFF\xFEb\x00i\x00r\x00d\x00s\x00 \x005\x00 \x00m\x00o\x00r\x00e\x00 \x00c\x00o\x00m\x00p\x00".b
|
|
108
108
|
}
|
|
109
|
-
expect {
|
|
110
|
-
JSON.pretty_generate(nasty_hash) # Should not raise an error
|
|
111
|
-
}.to raise_error(Encoding::UndefinedConversionError)
|
|
112
109
|
|
|
113
110
|
anon_class = Struct.new(:evil)
|
|
114
111
|
anon_class.include FormatParser::AttributesJSON
|
|
@@ -124,14 +124,9 @@ describe 'Fetching data from HTTP remotes' do
|
|
|
124
124
|
end
|
|
125
125
|
|
|
126
126
|
it 'sends provided HTTP headers in the request' do
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
FormatParser.parse_http('invalid_url') rescue nil
|
|
131
|
-
|
|
132
|
-
expect(Faraday)
|
|
133
|
-
.to receive(:new)
|
|
134
|
-
.with(headers: {'test-header' => 'test-value'})
|
|
127
|
+
expect_any_instance_of(Net::HTTP)
|
|
128
|
+
.to receive(:request_get)
|
|
129
|
+
.with(anything, a_hash_including('test-header' => 'test-value'))
|
|
135
130
|
.and_call_original
|
|
136
131
|
|
|
137
132
|
file_information = FormatParser.parse_http(
|
data/spec/remote_io_spec.rb
CHANGED
|
@@ -3,131 +3,270 @@ require 'spec_helper'
|
|
|
3
3
|
describe FormatParser::RemoteIO do
|
|
4
4
|
it_behaves_like 'an IO object compatible with IOConstraint'
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
# 2XX
|
|
7
|
+
|
|
8
|
+
context 'when the response code is 200 (OK)' do
|
|
9
|
+
context 'when the response size does not exceed the requested range' do
|
|
10
|
+
it 'returns the entire response body' do
|
|
11
|
+
url = 'https://images.invalid/img.jpg'
|
|
12
|
+
body = 'response body'
|
|
13
|
+
|
|
14
|
+
stub = stub_request(:get, url)
|
|
15
|
+
.with(headers: { 'range' => 'bytes=10-109' })
|
|
16
|
+
.to_return(body: body, status: 200)
|
|
17
|
+
|
|
18
|
+
rio = described_class.new(url)
|
|
19
|
+
rio.seek(10)
|
|
20
|
+
read_result = rio.read(100)
|
|
21
|
+
|
|
22
|
+
expect(read_result).to eq(body)
|
|
23
|
+
expect(stub).to have_been_requested
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
context 'when the response size exceeds the requested range' do
|
|
28
|
+
it 'raises an error' do
|
|
29
|
+
url = 'https://images.invalid/img.jpg'
|
|
30
|
+
body = 'This response is way longer than 10 bytes.'
|
|
31
|
+
|
|
32
|
+
stub = stub_request(:get, url)
|
|
33
|
+
.with(headers: { 'range' => 'bytes=10-19' })
|
|
34
|
+
.to_return(body: body, status: 200)
|
|
35
|
+
|
|
36
|
+
rio = described_class.new(url)
|
|
37
|
+
rio.seek(10)
|
|
38
|
+
|
|
39
|
+
expect { rio.read(10) }.to raise_error(
|
|
40
|
+
"We requested 10 bytes, but the server sent us more\n"\
|
|
41
|
+
"(42 bytes) - it likely has no `Range:` support.\n"\
|
|
42
|
+
"The error occurred when talking to #{url}"
|
|
43
|
+
)
|
|
44
|
+
expect(stub).to have_been_requested
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
8
48
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
49
|
+
context 'when the response status code is 206 (Partial Content)' do
|
|
50
|
+
context 'when the Content-Range header is present' do
|
|
51
|
+
it 'returns the response body' do
|
|
52
|
+
url = 'https://images.invalid/img.jpg'
|
|
53
|
+
body = 'response body'
|
|
13
54
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
end
|
|
55
|
+
stub = stub_request(:get, url)
|
|
56
|
+
.with(headers: { 'range' => 'bytes=10-109' })
|
|
57
|
+
.to_return(body: body, headers: { 'Content-Range' => '10-109/2577' }, status: 206)
|
|
18
58
|
|
|
19
|
-
|
|
20
|
-
|
|
59
|
+
rio = described_class.new(url)
|
|
60
|
+
rio.seek(10)
|
|
61
|
+
read_result = rio.read(100)
|
|
21
62
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
|
|
63
|
+
expect(read_result).to eq(body)
|
|
64
|
+
expect(stub).to have_been_requested
|
|
65
|
+
end
|
|
26
66
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
expect(read_result).to eq('This is the response')
|
|
30
|
-
end
|
|
67
|
+
it 'maintains and exposes pos' do
|
|
68
|
+
url = 'https://images.invalid/img.jpg'
|
|
31
69
|
|
|
32
|
-
|
|
33
|
-
|
|
70
|
+
stub = stub_request(:get, url)
|
|
71
|
+
.with(headers: { 'range' => 'bytes=0-0' })
|
|
72
|
+
.to_return(body: 'a', headers: { 'Content-Range' => '0-0/13' }, status: 206)
|
|
34
73
|
|
|
35
|
-
|
|
36
|
-
faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
|
|
37
|
-
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
|
38
|
-
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
|
74
|
+
rio = described_class.new(url)
|
|
39
75
|
|
|
40
|
-
|
|
41
|
-
expect { rio.read(100) }.to raise_error(/replied with a 403 and refused/)
|
|
42
|
-
end
|
|
76
|
+
expect(rio.pos).to eq(0)
|
|
43
77
|
|
|
44
|
-
|
|
45
|
-
rio = described_class.new('https://images.invalid/img.jpg')
|
|
78
|
+
rio.read(1)
|
|
46
79
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
80
|
+
expect(rio.pos).to eq(1)
|
|
81
|
+
expect(stub).to have_been_requested
|
|
82
|
+
end
|
|
83
|
+
end
|
|
51
84
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
85
|
+
context 'when the Content-Range header is not present' do
|
|
86
|
+
it 'raises an error' do
|
|
87
|
+
url = 'https://images.invalid/img.jpg'
|
|
88
|
+
|
|
89
|
+
stub = stub_request(:get, url)
|
|
90
|
+
.with(headers: { 'range' => 'bytes=10-109' })
|
|
91
|
+
.to_return(status: 206)
|
|
55
92
|
|
|
56
|
-
|
|
57
|
-
|
|
93
|
+
rio = described_class.new(url)
|
|
94
|
+
rio.seek(10)
|
|
58
95
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
96
|
+
expect { rio.read(100) }.to raise_error("The server replied with 206 status but no Content-Range at #{url}")
|
|
97
|
+
expect(stub).to have_been_requested
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
63
101
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
102
|
+
# 3XX
|
|
103
|
+
|
|
104
|
+
[301, 302, 303, 307, 308].each do |code|
|
|
105
|
+
context "when the response code is #{code}" do
|
|
106
|
+
context 'when the location header is present and the redirect limit is not exceeded' do
|
|
107
|
+
context 'when the location is absolute' do
|
|
108
|
+
it 'redirects to the specified location, without the Authorization header' do
|
|
109
|
+
redirecting_url = 'https://my_images.invalid/my_image'
|
|
110
|
+
destination_url = 'https://images.invalid/img.jpg'
|
|
111
|
+
body = 'response body'
|
|
112
|
+
|
|
113
|
+
redirect_stub = stub_request(:get, redirecting_url)
|
|
114
|
+
.with(headers: { 'Authorization' => 'token', 'range' => 'bytes=10-109' })
|
|
115
|
+
.to_return(headers: { 'location' => destination_url }, status: code)
|
|
116
|
+
destination_stub = stub_request(:get, destination_url)
|
|
117
|
+
.with { |request| request.headers['Range'] == 'bytes=10-109' && !request.headers.key?('Authorization') }
|
|
118
|
+
.to_return(body: body, status: 200)
|
|
119
|
+
|
|
120
|
+
rio = described_class.new(redirecting_url, headers: { 'Authorization' => 'token' })
|
|
121
|
+
rio.seek(10)
|
|
122
|
+
read_result = rio.read(100)
|
|
123
|
+
|
|
124
|
+
expect(read_result).to eq(body)
|
|
125
|
+
expect(redirect_stub).to have_been_requested
|
|
126
|
+
expect(destination_stub).to have_been_requested
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
context 'when the location is relative' do
|
|
131
|
+
it 'redirects to the specified location under the same host, with the same Authorization header' do
|
|
132
|
+
host = 'https://images.invalid'
|
|
133
|
+
redirecting_path = '/my_image'
|
|
134
|
+
redirecting_url = host + redirecting_path
|
|
135
|
+
destination_path = '/img.jpg'
|
|
136
|
+
destination_url = host + destination_path
|
|
137
|
+
body = 'response body'
|
|
138
|
+
|
|
139
|
+
redirect_stub = stub_request(:get, redirecting_url)
|
|
140
|
+
.with(headers: { 'Authorization' => 'token', 'range' => 'bytes=10-109' })
|
|
141
|
+
.to_return(headers: { 'location' => destination_path }, status: code)
|
|
142
|
+
destination_stub = stub_request(:get, destination_url)
|
|
143
|
+
.with(headers: { 'Authorization' => 'token', 'range' => 'bytes=10-109' })
|
|
144
|
+
.to_return(body: body, status: 200)
|
|
145
|
+
|
|
146
|
+
rio = described_class.new(redirecting_url, headers: { 'Authorization' => 'token' })
|
|
147
|
+
rio.seek(10)
|
|
148
|
+
read_result = rio.read(100)
|
|
149
|
+
|
|
150
|
+
expect(read_result).to eq(body)
|
|
151
|
+
expect(redirect_stub).to have_been_requested
|
|
152
|
+
expect(destination_stub).to have_been_requested
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
context 'when the location header is not present' do
|
|
158
|
+
it 'raises an error' do
|
|
159
|
+
url = 'https://images.invalid/my_image'
|
|
160
|
+
|
|
161
|
+
stub = stub_request(:get, url)
|
|
162
|
+
.with(headers: { 'range' => 'bytes=10-109' })
|
|
163
|
+
.to_return(status: code)
|
|
164
|
+
|
|
165
|
+
rio = described_class.new(url)
|
|
166
|
+
rio.seek(10)
|
|
167
|
+
|
|
168
|
+
expect { rio.read(100) }.to raise_error("Server at #{url} replied with a #{code}, indicating redirection; however, the location header was empty.")
|
|
169
|
+
expect(stub).to have_been_requested
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
context 'when the redirect limit is exceeded' do
|
|
174
|
+
it 'raises an error' do
|
|
175
|
+
redirecting_url = 'https://images.invalid/my_image'
|
|
176
|
+
destination_url = 'https://images.invalid/img.jpg'
|
|
177
|
+
|
|
178
|
+
stub = stub_request(:get, /https:\/\/images\.invalid.*/)
|
|
179
|
+
.with(headers: { 'range' => 'bytes=10-109' })
|
|
180
|
+
.to_return(headers: { 'location' => destination_url }, status: code)
|
|
181
|
+
|
|
182
|
+
rio = described_class.new(redirecting_url)
|
|
183
|
+
rio.seek(10)
|
|
184
|
+
|
|
185
|
+
expect { rio.read(100) }.to raise_error("Too many redirects; last one to: #{destination_url}")
|
|
186
|
+
expect(stub).to have_been_requested.times(4)
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
67
190
|
end
|
|
68
191
|
|
|
69
|
-
|
|
70
|
-
rio = described_class.new('https://images.invalid/img.jpg')
|
|
192
|
+
# 4XX
|
|
71
193
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
|
194
|
+
context 'when the response status code is 416 (Range Not Satisfiable)' do
|
|
195
|
+
it 'returns nil' do
|
|
196
|
+
url = 'https://images.invalid/img.jpg'
|
|
76
197
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
198
|
+
stub = stub_request(:get, url)
|
|
199
|
+
.with(headers: { 'range' => 'bytes=100-199' })
|
|
200
|
+
.to_return(status: 416)
|
|
201
|
+
|
|
202
|
+
rio = described_class.new(url)
|
|
203
|
+
rio.seek(100)
|
|
80
204
|
|
|
81
|
-
|
|
82
|
-
|
|
205
|
+
expect(rio.read(100)).to be_nil
|
|
206
|
+
expect(stub).to have_been_requested
|
|
207
|
+
end
|
|
83
208
|
|
|
84
|
-
|
|
85
|
-
|
|
209
|
+
it 'does not change pos or size' do
|
|
210
|
+
url = 'https://images.invalid/img.jpg'
|
|
86
211
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
.with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
|
|
91
|
-
.ordered
|
|
92
|
-
.and_return(fake_resp1)
|
|
93
|
-
expect(faraday_conn).to receive(:get)
|
|
94
|
-
.with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
|
95
|
-
.ordered
|
|
96
|
-
.and_return(fake_resp2)
|
|
212
|
+
stub = stub_request(:get, url)
|
|
213
|
+
.with(headers: { 'range' => 'bytes=0-0' })
|
|
214
|
+
.to_return(body: 'response body', headers: { 'Content-Range' => 'bytes 0-0/13' }, status: 206)
|
|
97
215
|
|
|
98
|
-
|
|
216
|
+
rio = described_class.new(url)
|
|
217
|
+
rio.read(1)
|
|
99
218
|
|
|
100
|
-
|
|
219
|
+
expect(rio.size).to eq(13)
|
|
220
|
+
expect(stub).to have_been_requested
|
|
101
221
|
|
|
102
|
-
|
|
103
|
-
|
|
222
|
+
stub = stub_request(:get, url)
|
|
223
|
+
.with(headers: { 'range' => 'bytes=100-199' })
|
|
224
|
+
.to_return(status: 416)
|
|
104
225
|
|
|
105
|
-
|
|
226
|
+
rio.seek(100)
|
|
227
|
+
rio.read(100)
|
|
228
|
+
|
|
229
|
+
expect(rio.pos).to eq(100)
|
|
230
|
+
expect(rio.size).to eq(13)
|
|
231
|
+
expect(stub).to have_been_requested
|
|
232
|
+
end
|
|
106
233
|
end
|
|
107
234
|
|
|
108
|
-
|
|
109
|
-
|
|
235
|
+
[*400..415, *417..499].each do |code|
|
|
236
|
+
context "when the response status code is #{code}" do
|
|
237
|
+
it 'raises an error' do
|
|
238
|
+
url = 'https://images.invalid/img.jpg'
|
|
239
|
+
|
|
240
|
+
stub = stub_request(:get, url)
|
|
241
|
+
.with(headers: { 'range' => 'bytes=100-199' })
|
|
242
|
+
.to_return(status: code)
|
|
110
243
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
|
114
|
-
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
|
|
244
|
+
rio = described_class.new(url)
|
|
245
|
+
rio.seek(100)
|
|
115
246
|
|
|
116
|
-
|
|
117
|
-
|
|
247
|
+
expect { rio.read(100) }.to raise_error("Server at #{url} replied with a #{code} and refused our request")
|
|
248
|
+
expect(stub).to have_been_requested
|
|
249
|
+
end
|
|
250
|
+
end
|
|
118
251
|
end
|
|
119
252
|
|
|
120
|
-
|
|
121
|
-
|
|
253
|
+
# 5XX
|
|
254
|
+
|
|
255
|
+
(500..599).each do |code|
|
|
256
|
+
context "when the response status code is #{code}" do
|
|
257
|
+
it 'raises an error' do
|
|
258
|
+
url = 'https://images.invalid/img.jpg'
|
|
122
259
|
|
|
123
|
-
|
|
260
|
+
stub = stub_request(:get, url)
|
|
261
|
+
.with(headers: { 'range' => 'bytes=100-199' })
|
|
262
|
+
.to_return(status: code)
|
|
124
263
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
allow(Faraday).to receive(:new).and_return(faraday_conn)
|
|
128
|
-
expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
|
|
129
|
-
rio.read(1)
|
|
264
|
+
rio = described_class.new(url)
|
|
265
|
+
rio.seek(100)
|
|
130
266
|
|
|
131
|
-
|
|
267
|
+
expect { rio.read(100) }.to raise_error("Server at #{url} replied with a #{code} and we might want to retry")
|
|
268
|
+
expect(stub).to have_been_requested
|
|
269
|
+
end
|
|
270
|
+
end
|
|
132
271
|
end
|
|
133
272
|
end
|
data/spec/spec_helper.rb
CHANGED
|
@@ -7,6 +7,10 @@ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
|
|
7
7
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
8
8
|
|
|
9
9
|
require 'rspec'
|
|
10
|
+
require 'webmock/rspec'
|
|
11
|
+
|
|
12
|
+
WebMock.disable_net_connect!(allow_localhost: true)
|
|
13
|
+
|
|
10
14
|
require 'format_parser'
|
|
11
15
|
|
|
12
16
|
module SpecHelpers
|