format_parser 1.7.0 → 2.0.0.pre.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/read_limiter.rb CHANGED
@@ -45,9 +45,7 @@ class FormatParser::ReadLimiter
45
45
  # @return Integer
46
46
  def seek(to)
47
47
  @seeks += 1
48
- if @max_seeks && @seeks > @max_seeks
49
- raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks
50
- end
48
+ raise BudgetExceeded, 'Seek budget exceeded (%d seeks performed)' % @max_seeks if @max_seeks && @seeks > @max_seeks
51
49
  @io.seek(to)
52
50
  end
53
51
 
@@ -60,26 +58,20 @@ class FormatParser::ReadLimiter
60
58
  @bytes += n_bytes
61
59
  @reads += 1
62
60
 
63
- if @max_bytes && @bytes > @max_bytes
64
- raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes
65
- end
66
-
67
- if @max_reads && @reads > @max_reads
68
- raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads
69
- end
61
+ raise BudgetExceeded, 'Read bytes budget (%d) exceeded' % @max_bytes if @max_bytes && @bytes > @max_bytes
62
+ raise BudgetExceeded, 'Number of read() calls exceeded (%d max)' % @max_reads if @max_reads && @reads > @max_reads
70
63
 
71
64
  @io.read(n_bytes)
72
65
  end
73
66
 
74
67
  # Sends the metrics about the state of this ReadLimiter to a Measurometer
75
68
  #
76
- # @param prefix[String] the prefix to set. For example, with prefix "TIFF" the metrics will be called
77
- # `format_parser.TIFF.read_limiter.num_seeks` and so forth
69
+ # @param parser[String] the parser to add as a tag.
78
70
  # @return void
79
- def send_metrics(prefix)
80
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_seeks' % prefix, @seeks)
81
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_reads' % prefix, @reads)
82
- Measurometer.add_distribution_value('format_parser.%s.read_limiter.read_bytes' % prefix, @bytes)
71
+ def send_metrics(parser)
72
+ Measurometer.add_distribution_value('format_parser.read_limiter.num_seeks', @seeks, parser: parser)
73
+ Measurometer.add_distribution_value('format_parser.read_limiter.num_reads', @reads, parser: parser)
74
+ Measurometer.add_distribution_value('format_parser.read_limiter.read_bytes', @bytes, parser: parser)
83
75
  end
84
76
 
85
77
  # Resets all the recorded call counters so that the object can be reused for the next parser,
data/lib/remote_io.rb CHANGED
@@ -1,14 +1,14 @@
1
+ require 'net/http'
2
+
1
3
  # Acts as a wrapper for turning a given URL into an IO object
2
- # you can read from and seek in. Uses Faraday under the hood
3
- # to perform fetches, so if you apply Faraday configuration
4
- # tweaks using `Faraday.default_connection = ...` these will
5
- # take effect for these RemoteIO objects as well
4
+ # you can read from and seek in.
6
5
  class FormatParser::RemoteIO
7
6
  class UpstreamError < StandardError
8
7
  # @return Integer
9
8
  attr_reader :status_code
9
+
10
10
  def initialize(status_code, message)
11
- @status_code = status_code
11
+ @status_code = Integer(status_code)
12
12
  super(message)
13
13
  end
14
14
  end
@@ -23,13 +23,19 @@ class FormatParser::RemoteIO
23
23
  class InvalidRequest < UpstreamError
24
24
  end
25
25
 
26
- # @param uri[URI, String] the remote URL to obtain
26
+ # Represents a failure where the maximum amount of
27
+ # redirect requests are exceeded.
28
+ class RedirectLimitReached < UpstreamError
29
+ def initialize(uri)
30
+ super(504, "Too many redirects; last one to: #{uri}")
31
+ end
32
+ end
33
+
34
+ # @param uri[String, URI::Generic] the remote URL to obtain
27
35
  # @param headers[Hash] (optional) the HTTP headers to be used in the HTTP request
28
36
  def initialize(uri, headers: {})
29
- require 'faraday'
30
- require 'faraday_middleware/response/follow_redirects'
31
37
  @headers = headers
32
- @uri = uri
38
+ @uri = URI(uri)
33
39
  @pos = 0
34
40
  @remote_size = false
35
41
  end
@@ -63,7 +69,7 @@ class FormatParser::RemoteIO
63
69
  # @return [String] the read bytes
64
70
  def read(n_bytes)
65
71
  http_range = (@pos..(@pos + n_bytes - 1))
66
- maybe_size, maybe_body = Measurometer.instrument('format_parser.RemoteIO.read') { request_range(http_range) }
72
+ maybe_size, maybe_body = Measurometer.instrument('format_parser.remote_io.read') { request_range(http_range) }
67
73
  if maybe_size && maybe_body
68
74
  @remote_size = maybe_size
69
75
  @pos += maybe_body.bytesize
@@ -73,23 +79,39 @@ class FormatParser::RemoteIO
73
79
 
74
80
  protected
75
81
 
82
+ REDIRECT_LIMIT = 3
83
+ UNSAFE_URI_CHARS = %r{[^\-_.!~*'()a-zA-Z\d;/?:@&=+$,\[\]%]}
84
+
85
+ # Generate the URI to fetch from following a redirect response.
86
+ #
87
+ # @param location[String] The new URI reference, as provided by the Location header of the previous response.
88
+ # @param previous_uri[URI] The URI used in the previous request.
89
+ def redirect_uri(location, previous_uri)
90
+ # Escape unsafe characters in location. Use location as new URI if absolute, otherwise use it to replace the path of
91
+ # the previous URI.
92
+ new_uri = previous_uri.merge(location.to_s.gsub(UNSAFE_URI_CHARS) do |unsafe_char|
93
+ "%#{unsafe_char.unpack('H2' * unsafe_char.bytesize).join('%').upcase}"
94
+ end)
95
+ # Keep previous URI's fragment if not present in location (https://www.rfc-editor.org/rfc/rfc9110.html#section-10.2.2-5)
96
+ new_uri.fragment = previous_uri.fragment unless new_uri.fragment
97
+ new_uri
98
+ end
99
+
76
100
  # Only used internally when reading the remote file
77
101
  #
78
- # @param range[Range] the HTTP range of data to fetch from remote
79
- # @return [String] the response body of the ranged request
80
- def request_range(range)
102
+ # @param range[Range] The HTTP range of data to fetch from remote
103
+ # @param uri[URI] The URI to fetch from
104
+ # @param redirects[Integer] The amount of remaining permitted redirects
105
+ # @return [[Integer, String]] The response body of the ranged request
106
+ def request_range(range, uri = @uri, redirects = REDIRECT_LIMIT)
81
107
  # We use a GET and not a HEAD request followed by a GET because
82
108
  # S3 does not allow HEAD requests if you only presigned your URL for GETs, so we
83
109
  # combine the first GET of a segment and retrieving the size of the resource
84
- conn = Faraday.new(headers: @headers) do |faraday|
85
- faraday.use FaradayMiddleware::FollowRedirects
86
- # we still need the default adapter, more details: https://blog.thecodewhisperer.com/permalink/losing-time-to-faraday
87
- faraday.adapter Faraday.default_adapter
110
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
111
+ http.request_get(uri, @headers.merge({ 'range' => 'bytes=%d-%d' % [range.begin, range.end] }))
88
112
  end
89
- response = conn.get(@uri, nil, range: 'bytes=%d-%d' % [range.begin, range.end])
90
-
91
- case response.status
92
- when 200
113
+ case response
114
+ when Net::HTTPOK
93
115
  # S3 returns 200 when you request a Range that is fully satisfied by the entire object,
94
116
  # we take that into account here. Also, for very tiny responses (and also for empty responses)
95
117
  # the responses are going to be 200 which does not mean we cannot proceed
@@ -100,16 +122,16 @@ class FormatParser::RemoteIO
100
122
  error_message = [
101
123
  "We requested #{requested_range_size} bytes, but the server sent us more",
102
124
  "(#{response_size} bytes) - it likely has no `Range:` support.",
103
- "The error occurred when talking to #{@uri})"
125
+ "The error occurred when talking to #{uri}"
104
126
  ]
105
- raise InvalidRequest.new(response.status, error_message.join("\n"))
127
+ raise InvalidRequest.new(response.code, error_message.join("\n"))
106
128
  end
107
129
  [response_size, response.body]
108
- when 206
130
+ when Net::HTTPPartialContent
109
131
  # Figure out of the server supports content ranges, if it doesn't we have no
110
132
  # business working with that server
111
- range_header = response.headers['Content-Range']
112
- raise InvalidRequest.new(response.status, "The server replied with 206 status but no Content-Range at #{@uri}") unless range_header
133
+ range_header = response['Content-Range']
134
+ raise InvalidRequest.new(response.code, "The server replied with 206 status but no Content-Range at #{uri}") unless range_header
113
135
 
114
136
  # "Content-Range: bytes 0-0/307404381" is how the response header is structured
115
137
  size = range_header[/\/(\d+)$/, 1].to_i
@@ -117,19 +139,30 @@ class FormatParser::RemoteIO
117
139
  # If we request a _larger_ range than what can be satisfied by the server,
118
140
  # the response is going to only contain what _can_ be sent and the status is also going
119
141
  # to be 206
120
- return [size, response.body]
121
- when 416
142
+ [size, response.body]
143
+ when Net::HTTPMovedPermanently, Net::HTTPFound, Net::HTTPSeeOther, Net::HTTPTemporaryRedirect, Net::HTTPPermanentRedirect
144
+ raise RedirectLimitReached, uri if redirects == 0
145
+ location = response['location']
146
+ if location
147
+ new_uri = redirect_uri(location, uri)
148
+ # Clear the Authorization header if the new URI has a different host.
149
+ @headers.delete('Authorization') unless [@uri.scheme, @uri.host, @uri.port] == [new_uri.scheme, new_uri.host, new_uri.port]
150
+ request_range(range, new_uri, redirects - 1)
151
+ else
152
+ raise InvalidRequest.new(response.code, "Server at #{uri} replied with a #{response.code}, indicating redirection; however, the location header was empty.")
153
+ end
154
+ when Net::HTTPRangeNotSatisfiable
122
155
  # We return `nil` if we tried to read past the end of the IO,
123
156
  # which satisfies the Ruby IO convention. The caller should deal with `nil` being the result of a read()
124
157
  # S3 will also handily _not_ supply us with the Content-Range of the actual resource, so we
125
158
  # cannot hint size with this response - at lease not when working with S3
126
- return
127
- when 500..599
128
- Measurometer.increment_counter('format_parser.RemoteIO.upstream50x_errors', 1)
129
- raise IntermittentFailure.new(response.status, "Server at #{@uri} replied with a #{response.status} and we might want to retry")
159
+ nil
160
+ when Net::HTTPServerError
161
+ Measurometer.increment_counter('format_parser.remote_io.upstream50x_errors', 1)
162
+ raise IntermittentFailure.new(response.code, "Server at #{uri} replied with a #{response.code} and we might want to retry")
130
163
  else
131
- Measurometer.increment_counter('format_parser.RemoteIO.invalid_request_errors', 1)
132
- raise InvalidRequest.new(response.status, "Server at #{@uri} replied with a #{response.status} and refused our request")
164
+ Measurometer.increment_counter('format_parser.remote_io.invalid_request_errors', 1)
165
+ raise InvalidRequest.new(response.code, "Server at #{uri} replied with a #{response.code} and refused our request")
133
166
  end
134
167
  end
135
168
  end
data/lib/string.rb ADDED
@@ -0,0 +1,9 @@
1
+ class String
2
+ def underscore
3
+ gsub(/::/, '/').
4
+ gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2').
5
+ gsub(/([a-z\d])([A-Z])/, '\1_\2').
6
+ tr('-', '_').
7
+ downcase
8
+ end
9
+ end
@@ -106,9 +106,6 @@ describe FormatParser::AttributesJSON do
106
106
  struct: Struct.new(:key).new('Value'),
107
107
  content: "\x01\xFF\xFEb\x00i\x00r\x00d\x00s\x00 \x005\x00 \x00m\x00o\x00r\x00e\x00 \x00c\x00o\x00m\x00p\x00".b
108
108
  }
109
- expect {
110
- JSON.pretty_generate(nasty_hash) # Should not raise an error
111
- }.to raise_error(Encoding::UndefinedConversionError)
112
109
 
113
110
  anon_class = Struct.new(:evil)
114
111
  anon_class.include FormatParser::AttributesJSON
@@ -124,14 +124,9 @@ describe 'Fetching data from HTTP remotes' do
124
124
  end
125
125
 
126
126
  it 'sends provided HTTP headers in the request' do
127
- # Faraday is required only after calling .parse_http
128
- # This line is just to trigger this require, then it's possible to
129
- # add an expectation of how Faraday is initialized after.
130
- FormatParser.parse_http('invalid_url') rescue nil
131
-
132
- expect(Faraday)
133
- .to receive(:new)
134
- .with(headers: {'test-header' => 'test-value'})
127
+ expect_any_instance_of(Net::HTTP)
128
+ .to receive(:request_get)
129
+ .with(anything, a_hash_including('test-header' => 'test-value'))
135
130
  .and_call_original
136
131
 
137
132
  file_information = FormatParser.parse_http(
@@ -3,131 +3,270 @@ require 'spec_helper'
3
3
  describe FormatParser::RemoteIO do
4
4
  it_behaves_like 'an IO object compatible with IOConstraint'
5
5
 
6
- it 'returns the partial content when the server supplies a 206 status' do
7
- rio = described_class.new('https://images.invalid/img.jpg')
6
+ # 2XX
7
+
8
+ context 'when the response code is 200 (OK)' do
9
+ context 'when the response size does not exceed the requested range' do
10
+ it 'returns the entire response body' do
11
+ url = 'https://images.invalid/img.jpg'
12
+ body = 'response body'
13
+
14
+ stub = stub_request(:get, url)
15
+ .with(headers: { 'range' => 'bytes=10-109' })
16
+ .to_return(body: body, status: 200)
17
+
18
+ rio = described_class.new(url)
19
+ rio.seek(10)
20
+ read_result = rio.read(100)
21
+
22
+ expect(read_result).to eq(body)
23
+ expect(stub).to have_been_requested
24
+ end
25
+ end
26
+
27
+ context 'when the response size exceeds the requested range' do
28
+ it 'raises an error' do
29
+ url = 'https://images.invalid/img.jpg'
30
+ body = 'This response is way longer than 10 bytes.'
31
+
32
+ stub = stub_request(:get, url)
33
+ .with(headers: { 'range' => 'bytes=10-19' })
34
+ .to_return(body: body, status: 200)
35
+
36
+ rio = described_class.new(url)
37
+ rio.seek(10)
38
+
39
+ expect { rio.read(10) }.to raise_error(
40
+ "We requested 10 bytes, but the server sent us more\n"\
41
+ "(42 bytes) - it likely has no `Range:` support.\n"\
42
+ "The error occurred when talking to #{url}"
43
+ )
44
+ expect(stub).to have_been_requested
45
+ end
46
+ end
47
+ end
8
48
 
9
- fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 206, body: 'This is the response')
10
- faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
11
- allow(Faraday).to receive(:new).and_return(faraday_conn)
12
- expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
49
+ context 'when the response status code is 206 (Partial Content)' do
50
+ context 'when the Content-Range header is present' do
51
+ it 'returns the response body' do
52
+ url = 'https://images.invalid/img.jpg'
53
+ body = 'response body'
13
54
 
14
- rio.seek(10)
15
- read_result = rio.read(100)
16
- expect(read_result).to eq('This is the response')
17
- end
55
+ stub = stub_request(:get, url)
56
+ .with(headers: { 'range' => 'bytes=10-109' })
57
+ .to_return(body: body, headers: { 'Content-Range' => '10-109/2577' }, status: 206)
18
58
 
19
- it 'returns the entire content when the server supplies the Content-Range response but sends a 200 status' do
20
- rio = described_class.new('https://images.invalid/img.jpg')
59
+ rio = described_class.new(url)
60
+ rio.seek(10)
61
+ read_result = rio.read(100)
21
62
 
22
- fake_resp = double(headers: {'Content-Range' => '10-109/2577'}, status: 200, body: 'This is the response')
23
- faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
24
- allow(Faraday).to receive(:new).and_return(faraday_conn)
25
- expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=10-109')
63
+ expect(read_result).to eq(body)
64
+ expect(stub).to have_been_requested
65
+ end
26
66
 
27
- rio.seek(10)
28
- read_result = rio.read(100)
29
- expect(read_result).to eq('This is the response')
30
- end
67
+ it 'maintains and exposes pos' do
68
+ url = 'https://images.invalid/img.jpg'
31
69
 
32
- it 'raises a specific error for all 4xx responses except 416' do
33
- rio = described_class.new('https://images.invalid/img.jpg')
70
+ stub = stub_request(:get, url)
71
+ .with(headers: { 'range' => 'bytes=0-0' })
72
+ .to_return(body: 'a', headers: { 'Content-Range' => '0-0/13' }, status: 206)
34
73
 
35
- fake_resp = double(headers: {}, status: 403, body: 'Please log in')
36
- faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
37
- allow(Faraday).to receive(:new).and_return(faraday_conn)
38
- expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
74
+ rio = described_class.new(url)
39
75
 
40
- rio.seek(100)
41
- expect { rio.read(100) }.to raise_error(/replied with a 403 and refused/)
42
- end
76
+ expect(rio.pos).to eq(0)
43
77
 
44
- it 'returns nil on a 416 response' do
45
- rio = described_class.new('https://images.invalid/img.jpg')
78
+ rio.read(1)
46
79
 
47
- fake_resp = double(headers: {}, status: 416, body: 'You stepped off the ledge of the range')
48
- faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
49
- allow(Faraday).to receive(:new).and_return(faraday_conn)
50
- expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
80
+ expect(rio.pos).to eq(1)
81
+ expect(stub).to have_been_requested
82
+ end
83
+ end
51
84
 
52
- rio.seek(100)
53
- expect(rio.read(100)).to be_nil
54
- end
85
+ context 'when the Content-Range header is not present' do
86
+ it 'raises an error' do
87
+ url = 'https://images.invalid/img.jpg'
88
+
89
+ stub = stub_request(:get, url)
90
+ .with(headers: { 'range' => 'bytes=10-109' })
91
+ .to_return(status: 206)
55
92
 
56
- it 'sets the status_code of the exception on a 4xx response from upstream' do
57
- rio = described_class.new('https://images.invalid/img.jpg')
93
+ rio = described_class.new(url)
94
+ rio.seek(10)
58
95
 
59
- fake_resp = double(headers: {}, status: 403, body: 'Please log in')
60
- faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
61
- allow(Faraday).to receive(:new).and_return(faraday_conn)
62
- expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
96
+ expect { rio.read(100) }.to raise_error("The server replied with 206 status but no Content-Range at #{url}")
97
+ expect(stub).to have_been_requested
98
+ end
99
+ end
100
+ end
63
101
 
64
- rio.seek(100)
65
- # rubocop: disable Lint/AmbiguousBlockAssociation
66
- expect { rio.read(100) }.to raise_error { |e| expect(e.status_code).to eq(403) }
102
+ # 3XX
103
+
104
+ [301, 302, 303, 307, 308].each do |code|
105
+ context "when the response code is #{code}" do
106
+ context 'when the location header is present and the redirect limit is not exceeded' do
107
+ context 'when the location is absolute' do
108
+ it 'redirects to the specified location, without the Authorization header' do
109
+ redirecting_url = 'https://my_images.invalid/my_image'
110
+ destination_url = 'https://images.invalid/img.jpg'
111
+ body = 'response body'
112
+
113
+ redirect_stub = stub_request(:get, redirecting_url)
114
+ .with(headers: { 'Authorization' => 'token', 'range' => 'bytes=10-109' })
115
+ .to_return(headers: { 'location' => destination_url }, status: code)
116
+ destination_stub = stub_request(:get, destination_url)
117
+ .with { |request| request.headers['Range'] == 'bytes=10-109' && !request.headers.key?('Authorization') }
118
+ .to_return(body: body, status: 200)
119
+
120
+ rio = described_class.new(redirecting_url, headers: { 'Authorization' => 'token' })
121
+ rio.seek(10)
122
+ read_result = rio.read(100)
123
+
124
+ expect(read_result).to eq(body)
125
+ expect(redirect_stub).to have_been_requested
126
+ expect(destination_stub).to have_been_requested
127
+ end
128
+ end
129
+
130
+ context 'when the location is relative' do
131
+ it 'redirects to the specified location under the same host, with the same Authorization header' do
132
+ host = 'https://images.invalid'
133
+ redirecting_path = '/my_image'
134
+ redirecting_url = host + redirecting_path
135
+ destination_path = '/img.jpg'
136
+ destination_url = host + destination_path
137
+ body = 'response body'
138
+
139
+ redirect_stub = stub_request(:get, redirecting_url)
140
+ .with(headers: { 'Authorization' => 'token', 'range' => 'bytes=10-109' })
141
+ .to_return(headers: { 'location' => destination_path }, status: code)
142
+ destination_stub = stub_request(:get, destination_url)
143
+ .with(headers: { 'Authorization' => 'token', 'range' => 'bytes=10-109' })
144
+ .to_return(body: body, status: 200)
145
+
146
+ rio = described_class.new(redirecting_url, headers: { 'Authorization' => 'token' })
147
+ rio.seek(10)
148
+ read_result = rio.read(100)
149
+
150
+ expect(read_result).to eq(body)
151
+ expect(redirect_stub).to have_been_requested
152
+ expect(destination_stub).to have_been_requested
153
+ end
154
+ end
155
+ end
156
+
157
+ context 'when the location header is not present' do
158
+ it 'raises an error' do
159
+ url = 'https://images.invalid/my_image'
160
+
161
+ stub = stub_request(:get, url)
162
+ .with(headers: { 'range' => 'bytes=10-109' })
163
+ .to_return(status: code)
164
+
165
+ rio = described_class.new(url)
166
+ rio.seek(10)
167
+
168
+ expect { rio.read(100) }.to raise_error("Server at #{url} replied with a #{code}, indicating redirection; however, the location header was empty.")
169
+ expect(stub).to have_been_requested
170
+ end
171
+ end
172
+
173
+ context 'when the redirect limit is exceeded' do
174
+ it 'raises an error' do
175
+ redirecting_url = 'https://images.invalid/my_image'
176
+ destination_url = 'https://images.invalid/img.jpg'
177
+
178
+ stub = stub_request(:get, /https:\/\/images\.invalid.*/)
179
+ .with(headers: { 'range' => 'bytes=10-109' })
180
+ .to_return(headers: { 'location' => destination_url }, status: code)
181
+
182
+ rio = described_class.new(redirecting_url)
183
+ rio.seek(10)
184
+
185
+ expect { rio.read(100) }.to raise_error("Too many redirects; last one to: #{destination_url}")
186
+ expect(stub).to have_been_requested.times(4)
187
+ end
188
+ end
189
+ end
67
190
  end
68
191
 
69
- it 'returns a nil when the range cannot be satisfied and the response is 416' do
70
- rio = described_class.new('https://images.invalid/img.jpg')
192
+ # 4XX
71
193
 
72
- fake_resp = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
73
- faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
74
- allow(Faraday).to receive(:new).and_return(faraday_conn)
75
- expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
194
+ context 'when the response status code is 416 (Range Not Satisfiable)' do
195
+ it 'returns nil' do
196
+ url = 'https://images.invalid/img.jpg'
76
197
 
77
- rio.seek(100)
78
- expect(rio.read(100)).to be_nil
79
- end
198
+ stub = stub_request(:get, url)
199
+ .with(headers: { 'range' => 'bytes=100-199' })
200
+ .to_return(status: 416)
201
+
202
+ rio = described_class.new(url)
203
+ rio.seek(100)
80
204
 
81
- it 'does not overwrite size when the range cannot be satisfied and the response is 416' do
82
- rio = described_class.new('https://images.invalid/img.jpg')
205
+ expect(rio.read(100)).to be_nil
206
+ expect(stub).to have_been_requested
207
+ end
83
208
 
84
- fake_resp1 = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
85
- fake_resp2 = double(headers: {}, status: 416, body: 'You jumped off the end of the file maam')
209
+ it 'does not change pos or size' do
210
+ url = 'https://images.invalid/img.jpg'
86
211
 
87
- faraday_conn = instance_double(Faraday::Connection)
88
- allow(Faraday).to receive(:new).and_return(faraday_conn)
89
- expect(faraday_conn).to receive(:get)
90
- .with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
91
- .ordered
92
- .and_return(fake_resp1)
93
- expect(faraday_conn).to receive(:get)
94
- .with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
95
- .ordered
96
- .and_return(fake_resp2)
212
+ stub = stub_request(:get, url)
213
+ .with(headers: { 'range' => 'bytes=0-0' })
214
+ .to_return(body: 'response body', headers: { 'Content-Range' => 'bytes 0-0/13' }, status: 206)
97
215
 
98
- rio.read(1)
216
+ rio = described_class.new(url)
217
+ rio.read(1)
99
218
 
100
- expect(rio.size).to eq(13)
219
+ expect(rio.size).to eq(13)
220
+ expect(stub).to have_been_requested
101
221
 
102
- rio.seek(100)
103
- expect(rio.read(100)).to be_nil
222
+ stub = stub_request(:get, url)
223
+ .with(headers: { 'range' => 'bytes=100-199' })
224
+ .to_return(status: 416)
104
225
 
105
- expect(rio.size).to eq(13)
226
+ rio.seek(100)
227
+ rio.read(100)
228
+
229
+ expect(rio.pos).to eq(100)
230
+ expect(rio.size).to eq(13)
231
+ expect(stub).to have_been_requested
232
+ end
106
233
  end
107
234
 
108
- it 'raises a specific error for all 5xx responses' do
109
- rio = described_class.new('https://images.invalid/img.jpg')
235
+ [*400..415, *417..499].each do |code|
236
+ context "when the response status code is #{code}" do
237
+ it 'raises an error' do
238
+ url = 'https://images.invalid/img.jpg'
239
+
240
+ stub = stub_request(:get, url)
241
+ .with(headers: { 'range' => 'bytes=100-199' })
242
+ .to_return(status: code)
110
243
 
111
- fake_resp = double(headers: {}, status: 502, body: 'Guru meditation')
112
- faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
113
- allow(Faraday).to receive(:new).and_return(faraday_conn)
114
- expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=100-199')
244
+ rio = described_class.new(url)
245
+ rio.seek(100)
115
246
 
116
- rio.seek(100)
117
- expect { rio.read(100) }.to raise_error(/replied with a 502 and we might want to retry/)
247
+ expect { rio.read(100) }.to raise_error("Server at #{url} replied with a #{code} and refused our request")
248
+ expect(stub).to have_been_requested
249
+ end
250
+ end
118
251
  end
119
252
 
120
- it 'maintains and exposes #pos' do
121
- rio = described_class.new('https://images.invalid/img.jpg')
253
+ # 5XX
254
+
255
+ (500..599).each do |code|
256
+ context "when the response status code is #{code}" do
257
+ it 'raises an error' do
258
+ url = 'https://images.invalid/img.jpg'
122
259
 
123
- expect(rio.pos).to eq(0)
260
+ stub = stub_request(:get, url)
261
+ .with(headers: { 'range' => 'bytes=100-199' })
262
+ .to_return(status: code)
124
263
 
125
- fake_resp = double(headers: {'Content-Range' => 'bytes 0-0/13'}, status: 206, body: 'a')
126
- faraday_conn = instance_double(Faraday::Connection, get: fake_resp)
127
- allow(Faraday).to receive(:new).and_return(faraday_conn)
128
- expect(faraday_conn).to receive(:get).with('https://images.invalid/img.jpg', nil, range: 'bytes=0-0')
129
- rio.read(1)
264
+ rio = described_class.new(url)
265
+ rio.seek(100)
130
266
 
131
- expect(rio.pos).to eq(1)
267
+ expect { rio.read(100) }.to raise_error("Server at #{url} replied with a #{code} and we might want to retry")
268
+ expect(stub).to have_been_requested
269
+ end
270
+ end
132
271
  end
133
272
  end
data/spec/spec_helper.rb CHANGED
@@ -7,6 +7,10 @@ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
7
7
  $LOAD_PATH.unshift(File.dirname(__FILE__))
8
8
 
9
9
  require 'rspec'
10
+ require 'webmock/rspec'
11
+
12
+ WebMock.disable_net_connect!(allow_localhost: true)
13
+
10
14
  require 'format_parser'
11
15
 
12
16
  module SpecHelpers