down 4.5.0 → 4.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +7 -7
- data/down.gemspec +2 -0
- data/lib/down.rb +2 -0
- data/lib/down/backend.rb +8 -7
- data/lib/down/http.rb +13 -17
- data/lib/down/net_http.rb +90 -20
- data/lib/down/utils.rb +2 -0
- data/lib/down/version.rb +1 -1
- data/lib/down/wget.rb +39 -16
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1976aa6977c5e4161c2f13e0259840edae016dea49087ce8ea3bf46918b0eaee
|
4
|
+
data.tar.gz: 24d875754c8174a2bf5030e9f35a14bda4d2f2a30b143e002eecdc128f8a8a6d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b4102338d79a8ecfb5c643aa387aec15b1af79cb4c0ef7eda29828c79e454b9037bf388b82e52fbefcfe3ab22792e1a06a46f7ed8356228c5c8f49c203972c3
|
7
|
+
data.tar.gz: 25c901d8e9f6d447ff2332c69e957a189baaa3960938ce1f3560ef11723ff209162c3c82b608dda7da2d0120469b8a59f8b9bf6577709f7e81f28f081544f852
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 4.6.0 (2018-09-29)
|
2
|
+
|
3
|
+
* Ensure URLs are properly encoded in `NetHttp#download` and `#open` using Addressable (@linyaoli)
|
4
|
+
|
5
|
+
* Raise `ResponseError` with clear message when redirect URI was invalid in Down::NetHttp (@janko-m)
|
6
|
+
|
1
7
|
## 4.5.0 (2018-05-11)
|
2
8
|
|
3
9
|
* Deprecate passing an `HTTP::Client` object to `Down::Http#initialize` (@janko-m)
|
data/README.md
CHANGED
@@ -24,8 +24,8 @@ tempfile #=> #<Tempfile:/var/folders/k7/6zx6dx6x7ys3rv3srh0nyfj00000gn/T/2015092
|
|
24
24
|
|
25
25
|
### Metadata
|
26
26
|
|
27
|
-
The returned Tempfile has
|
28
|
-
|
27
|
+
The returned Tempfile has some additional attributes extracted from the
|
28
|
+
response data:
|
29
29
|
|
30
30
|
```rb
|
31
31
|
tempfile.content_type #=> "text/plain"
|
@@ -345,9 +345,8 @@ Some features that give the HTTP.rb backend an advantage over `open-uri` +
|
|
345
345
|
|
346
346
|
* Low memory usage (**10x less** than `open-uri`/`Net::HTTP`)
|
347
347
|
* Correct URI parsing with [Addressable::URI]
|
348
|
-
* Proper support
|
349
|
-
*
|
350
|
-
* Chaninable HTTP client builder API for setting default options
|
348
|
+
* Proper SSL support
|
349
|
+
* Chaninable builder API for setting default options
|
351
350
|
* Support for persistent connections
|
352
351
|
|
353
352
|
#### Additional options
|
@@ -359,11 +358,12 @@ Down::Http.download("http://example.org/image.jpg", headers: { "Foo" => "Bar" })
|
|
359
358
|
Down::Http.open("http://example.org/image.jpg", follow: { max_hops: 0 })
|
360
359
|
```
|
361
360
|
|
362
|
-
|
361
|
+
However, it's recommended to configure request options using http.rb's
|
362
|
+
chainable API, as it's more convenient than passing raw options.
|
363
363
|
|
364
364
|
```rb
|
365
365
|
Down::Http.open("http://example.org/image.jpg") do |client|
|
366
|
-
client.timeout(connect: 3)
|
366
|
+
client.timeout(connect: 3, read: 3)
|
367
367
|
end
|
368
368
|
```
|
369
369
|
|
data/down.gemspec
CHANGED
@@ -15,6 +15,8 @@ Gem::Specification.new do |spec|
|
|
15
15
|
spec.files = Dir["README.md", "LICENSE.txt", "CHANGELOG.md", "*.gemspec", "lib/**/*.rb"]
|
16
16
|
spec.require_path = "lib"
|
17
17
|
|
18
|
+
spec.add_dependency "addressable", "~> 2.5"
|
19
|
+
|
18
20
|
spec.add_development_dependency "minitest", "~> 5.8"
|
19
21
|
spec.add_development_dependency "mocha", "~> 1.5"
|
20
22
|
spec.add_development_dependency "rake"
|
data/lib/down.rb
CHANGED
@@ -14,6 +14,7 @@ module Down
|
|
14
14
|
backend.open(*args, &block)
|
15
15
|
end
|
16
16
|
|
17
|
+
# Allows setting a backend via a symbol or a downloader object.
|
17
18
|
def backend(value = nil)
|
18
19
|
if value.is_a?(Symbol)
|
19
20
|
require "down/#{value}"
|
@@ -26,4 +27,5 @@ module Down
|
|
26
27
|
end
|
27
28
|
end
|
28
29
|
|
30
|
+
# Set Net::HTTP as the default backend
|
29
31
|
Down.backend Down::NetHttp
|
data/lib/down/backend.rb
CHANGED
@@ -19,14 +19,15 @@ module Down
|
|
19
19
|
|
20
20
|
private
|
21
21
|
|
22
|
+
# If destination path is defined, move tempfile to the destination,
|
23
|
+
# otherwise return the tempfile unchanged.
|
22
24
|
def download_result(tempfile, destination)
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
end
|
25
|
+
return tempfile unless destination
|
26
|
+
|
27
|
+
tempfile.close # required for Windows
|
28
|
+
FileUtils.mv tempfile.path, destination
|
29
|
+
|
30
|
+
nil
|
30
31
|
end
|
31
32
|
end
|
32
33
|
end
|
data/lib/down/http.rb
CHANGED
@@ -7,11 +7,11 @@ require "http"
|
|
7
7
|
require "down/backend"
|
8
8
|
|
9
9
|
require "tempfile"
|
10
|
-
require "cgi"
|
11
|
-
require "base64"
|
12
10
|
|
13
11
|
module Down
|
12
|
+
# Provides streaming downloads implemented with HTTP.rb.
|
14
13
|
class Http < Backend
|
14
|
+
# Initializes the backend with common defaults.
|
15
15
|
def initialize(options = {}, &block)
|
16
16
|
if options.is_a?(HTTP::Client)
|
17
17
|
warn "[Down] Passing an HTTP::Client object to Down::Http#initialize is deprecated and won't be supported in Down 5. Use the block initialization instead."
|
@@ -28,6 +28,8 @@ module Down
|
|
28
28
|
@client = block.call(@client) if block
|
29
29
|
end
|
30
30
|
|
31
|
+
# Downlods the remote file to disk. Accepts HTTP.rb options via a hash or a
|
32
|
+
# block, and some additional options as well.
|
31
33
|
def download(url, max_size: nil, progress_proc: nil, content_length_proc: nil, destination: nil, **options, &block)
|
32
34
|
response = request(url, **options, &block)
|
33
35
|
|
@@ -63,6 +65,9 @@ module Down
|
|
63
65
|
raise
|
64
66
|
end
|
65
67
|
|
68
|
+
# Starts retrieving the remote file and returns an IO-like object which
|
69
|
+
# downloads the response body on-demand. Accepts HTTP.rb options via a hash
|
70
|
+
# or a block.
|
66
71
|
def open(url, rewindable: true, **options, &block)
|
67
72
|
response = request(url, **options, &block)
|
68
73
|
|
@@ -84,9 +89,10 @@ module Down
|
|
84
89
|
end
|
85
90
|
|
86
91
|
def send_request(method, url, **options, &block)
|
87
|
-
|
92
|
+
uri = HTTP::URI.parse(url)
|
88
93
|
|
89
94
|
client = @client
|
95
|
+
client = client.basic_auth(user: uri.user, pass: uri.password) if uri.user || uri.password
|
90
96
|
client = block.call(client) if block
|
91
97
|
|
92
98
|
client.request(method, url, options)
|
@@ -94,6 +100,7 @@ module Down
|
|
94
100
|
request_error!(exception)
|
95
101
|
end
|
96
102
|
|
103
|
+
# Yields chunks of the response body to the block.
|
97
104
|
def stream_body(response, &block)
|
98
105
|
response.body.each(&block)
|
99
106
|
rescue => exception
|
@@ -102,20 +109,7 @@ module Down
|
|
102
109
|
response.connection.close unless @client.persistent?
|
103
110
|
end
|
104
111
|
|
105
|
-
|
106
|
-
uri = HTTP::URI.parse(url)
|
107
|
-
|
108
|
-
if uri.user || uri.password
|
109
|
-
user, pass = uri.user, uri.password
|
110
|
-
authorization = "Basic #{Base64.strict_encode64("#{user}:#{pass}")}"
|
111
|
-
options[:headers] ||= {}
|
112
|
-
options[:headers].merge!("Authorization" => authorization)
|
113
|
-
uri.user = uri.password = nil
|
114
|
-
end
|
115
|
-
|
116
|
-
uri.to_s
|
117
|
-
end
|
118
|
-
|
112
|
+
# Raises non-sucessful response as a Down::ResponseError.
|
119
113
|
def response_error!(response)
|
120
114
|
args = [response.status.to_s, response: response]
|
121
115
|
|
@@ -126,6 +120,7 @@ module Down
|
|
126
120
|
end
|
127
121
|
end
|
128
122
|
|
123
|
+
# Re-raise HTTP.rb exceptions as Down::Error exceptions.
|
129
124
|
def request_error!(exception)
|
130
125
|
case exception
|
131
126
|
when HTTP::Request::UnsupportedSchemeError, Addressable::URI::InvalidURIError
|
@@ -143,6 +138,7 @@ module Down
|
|
143
138
|
end
|
144
139
|
end
|
145
140
|
|
141
|
+
# Defines some additional attributes for the returned Tempfile.
|
146
142
|
module DownloadedFile
|
147
143
|
attr_accessor :url, :headers
|
148
144
|
|
data/lib/down/net_http.rb
CHANGED
@@ -2,15 +2,17 @@
|
|
2
2
|
|
3
3
|
require "open-uri"
|
4
4
|
require "net/https"
|
5
|
+
require "addressable/uri"
|
5
6
|
|
6
7
|
require "down/backend"
|
7
8
|
|
8
9
|
require "tempfile"
|
9
10
|
require "fileutils"
|
10
|
-
require "cgi"
|
11
11
|
|
12
12
|
module Down
|
13
|
+
# Provides streaming downloads implemented with Net::HTTP and open-uri.
|
13
14
|
class NetHttp < Backend
|
15
|
+
# Initializes the backend with common defaults.
|
14
16
|
def initialize(options = {})
|
15
17
|
@options = {
|
16
18
|
"User-Agent" => "Down/#{Down::VERSION}",
|
@@ -20,6 +22,8 @@ module Down
|
|
20
22
|
}.merge(options)
|
21
23
|
end
|
22
24
|
|
25
|
+
# Downloads a remote file to disk using open-uri. Accepts any open-uri
|
26
|
+
# options, and a few more.
|
23
27
|
def download(url, options = {})
|
24
28
|
options = @options.merge(options)
|
25
29
|
|
@@ -29,6 +33,11 @@ module Down
|
|
29
33
|
content_length_proc = options.delete(:content_length_proc)
|
30
34
|
destination = options.delete(:destination)
|
31
35
|
|
36
|
+
# Use open-uri's :content_lenth_proc or :progress_proc to raise an
|
37
|
+
# exception early if the file is too large.
|
38
|
+
#
|
39
|
+
# Also disable following redirects, as we'll provide our own
|
40
|
+
# implementation that has the ability to limit the number of redirects.
|
32
41
|
open_uri_options = {
|
33
42
|
content_length_proc: proc { |size|
|
34
43
|
if size && max_size && size > max_size
|
@@ -45,6 +54,7 @@ module Down
|
|
45
54
|
redirect: false,
|
46
55
|
}
|
47
56
|
|
57
|
+
# Handle basic authentication in the :proxy option.
|
48
58
|
if options[:proxy]
|
49
59
|
proxy = URI(options.delete(:proxy))
|
50
60
|
user = proxy.user
|
@@ -62,8 +72,9 @@ module Down
|
|
62
72
|
|
63
73
|
open_uri_options.merge!(options)
|
64
74
|
|
65
|
-
uri = ensure_uri(url)
|
75
|
+
uri = ensure_uri(addressable_normalize(url))
|
66
76
|
|
77
|
+
# Handle basic authentication in the remote URL.
|
67
78
|
if uri.user || uri.password
|
68
79
|
open_uri_options[:http_basic_authentication] ||= [uri.user, uri.password]
|
69
80
|
uri.user = nil
|
@@ -72,6 +83,7 @@ module Down
|
|
72
83
|
|
73
84
|
open_uri_file = open_uri(uri, open_uri_options, follows_remaining: max_redirects)
|
74
85
|
|
86
|
+
# Handle the fact that open-uri returns StringIOs for small files.
|
75
87
|
tempfile = ensure_tempfile(open_uri_file, File.extname(open_uri_file.base_uri.path))
|
76
88
|
OpenURI::Meta.init tempfile, open_uri_file # add back open-uri methods
|
77
89
|
tempfile.extend Down::NetHttp::DownloadedFile
|
@@ -79,11 +91,13 @@ module Down
|
|
79
91
|
download_result(tempfile, destination)
|
80
92
|
end
|
81
93
|
|
94
|
+
# Starts retrieving the remote file using Net::HTTP and returns an IO-like
|
95
|
+
# object which downloads the response body on-demand.
|
82
96
|
def open(url, options = {})
|
97
|
+
uri = ensure_uri(addressable_normalize(url))
|
83
98
|
options = @options.merge(options)
|
84
99
|
|
85
|
-
|
86
|
-
|
100
|
+
# Create a Fiber that halts when response headers are received.
|
87
101
|
request = Fiber.new do
|
88
102
|
net_http_request(uri, options) do |response|
|
89
103
|
Fiber.yield response
|
@@ -94,6 +108,7 @@ module Down
|
|
94
108
|
|
95
109
|
response_error!(response) unless response.is_a?(Net::HTTPSuccess)
|
96
110
|
|
111
|
+
# Build an IO-like object that will retrieve response body on-demand.
|
97
112
|
Down::ChunkedIO.new(
|
98
113
|
chunks: enum_for(:stream_body, response),
|
99
114
|
size: response["Content-Length"] && response["Content-Length"].to_i,
|
@@ -113,13 +128,22 @@ module Down
|
|
113
128
|
|
114
129
|
private
|
115
130
|
|
131
|
+
# Calls open-uri's URI::HTTP#open method. Additionally handles redirects.
|
116
132
|
def open_uri(uri, options, follows_remaining: 0)
|
117
|
-
|
133
|
+
uri.open(options)
|
118
134
|
rescue OpenURI::HTTPRedirect => exception
|
119
135
|
raise Down::TooManyRedirects, "too many redirects" if follows_remaining == 0
|
120
136
|
|
121
|
-
|
137
|
+
# fail if redirect URI scheme is not http or https
|
138
|
+
begin
|
139
|
+
uri = ensure_uri(exception.uri)
|
140
|
+
rescue Down::InvalidUrl
|
141
|
+
response = rebuild_response_from_open_uri_exception(exception)
|
142
|
+
|
143
|
+
raise ResponseError.new("Invalid Redirect URI: #{exception.uri}", response: response)
|
144
|
+
end
|
122
145
|
|
146
|
+
# forward cookies on the redirect
|
123
147
|
if !exception.io.meta["set-cookie"].to_s.empty?
|
124
148
|
options["Cookie"] = exception.io.meta["set-cookie"]
|
125
149
|
end
|
@@ -127,11 +151,11 @@ module Down
|
|
127
151
|
follows_remaining -= 1
|
128
152
|
retry
|
129
153
|
rescue OpenURI::HTTPError => exception
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
exception.
|
134
|
-
|
154
|
+
response = rebuild_response_from_open_uri_exception(exception)
|
155
|
+
|
156
|
+
# open-uri attempts to parse the redirect URI, so we re-raise that exception
|
157
|
+
if exception.message.include?("(Invalid Location URI)")
|
158
|
+
raise ResponseError.new("Invalid Redirect URI: #{response["Location"]}", response: response)
|
135
159
|
end
|
136
160
|
|
137
161
|
response_error!(response)
|
@@ -159,6 +183,7 @@ module Down
|
|
159
183
|
tempfile
|
160
184
|
end
|
161
185
|
|
186
|
+
# Makes a Net::HTTP request and follows redirects.
|
162
187
|
def net_http_request(uri, options, follows_remaining: options.fetch(:max_redirects, 2), &block)
|
163
188
|
http, request = create_net_http(uri, options)
|
164
189
|
|
@@ -167,7 +192,10 @@ module Down
|
|
167
192
|
http.request(request) do |response|
|
168
193
|
unless response.is_a?(Net::HTTPRedirection)
|
169
194
|
yield response
|
170
|
-
|
195
|
+
# In certain cases the caller wants to download only one portion
|
196
|
+
# of the file and close the connection, so we tell Net::HTTP that
|
197
|
+
# it shouldn't continue retrieving it.
|
198
|
+
response.instance_variable_set("@read", true)
|
171
199
|
end
|
172
200
|
end
|
173
201
|
end
|
@@ -178,13 +206,21 @@ module Down
|
|
178
206
|
if response.is_a?(Net::HTTPRedirection)
|
179
207
|
raise Down::TooManyRedirects if follows_remaining == 0
|
180
208
|
|
181
|
-
|
209
|
+
# fail if redirect URI is not a valid http or https URL
|
210
|
+
begin
|
211
|
+
location = ensure_uri(response["Location"], allow_relative: true)
|
212
|
+
rescue Down::InvalidUrl
|
213
|
+
raise ResponseError.new("Invalid Redirect URI: #{response["Location"]}", response: response)
|
214
|
+
end
|
215
|
+
|
216
|
+
# handle relative redirects
|
182
217
|
location = uri + location if location.relative?
|
183
218
|
|
184
219
|
net_http_request(location, options, follows_remaining: follows_remaining - 1, &block)
|
185
220
|
end
|
186
221
|
end
|
187
222
|
|
223
|
+
# Build a Net::HTTP object for making a request.
|
188
224
|
def create_net_http(uri, options)
|
189
225
|
http_class = Net::HTTP
|
190
226
|
|
@@ -195,7 +231,7 @@ module Down
|
|
195
231
|
|
196
232
|
http = http_class.new(uri.host, uri.port)
|
197
233
|
|
198
|
-
# taken from open-uri implementation
|
234
|
+
# Handle SSL parameters (taken from the open-uri implementation).
|
199
235
|
if uri.is_a?(URI::HTTPS)
|
200
236
|
http.use_ssl = true
|
201
237
|
http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
|
@@ -214,7 +250,7 @@ module Down
|
|
214
250
|
http.open_timeout = options[:open_timeout] if options.key?(:open_timeout)
|
215
251
|
|
216
252
|
request_headers = options.select { |key, value| key.is_a?(String) }
|
217
|
-
request_headers["Accept-Encoding"] = "" #
|
253
|
+
request_headers["Accept-Encoding"] = "" # Net::HTTP's inflater causes FiberErrors
|
218
254
|
|
219
255
|
get = Net::HTTP::Get.new(uri.request_uri, request_headers)
|
220
256
|
get.basic_auth(uri.user, uri.password) if uri.user || uri.password
|
@@ -222,20 +258,51 @@ module Down
|
|
222
258
|
[http, get]
|
223
259
|
end
|
224
260
|
|
261
|
+
# Yields chunks of the response body to the block.
|
225
262
|
def stream_body(response, &block)
|
226
263
|
response.read_body(&block)
|
227
264
|
rescue => exception
|
228
265
|
request_error!(exception)
|
229
266
|
end
|
230
267
|
|
231
|
-
|
232
|
-
|
233
|
-
|
268
|
+
# Checks that the url is a valid URI and that its scheme is http or https.
|
269
|
+
def ensure_uri(url, allow_relative: false)
|
270
|
+
begin
|
271
|
+
uri = URI(url)
|
272
|
+
rescue URI::InvalidURIError => exception
|
273
|
+
raise Down::InvalidUrl, exception.message
|
274
|
+
end
|
275
|
+
|
276
|
+
unless allow_relative && uri.relative?
|
277
|
+
raise Down::InvalidUrl, "URL scheme needs to be http or https: #{uri}" unless uri.is_a?(URI::HTTP)
|
278
|
+
end
|
279
|
+
|
234
280
|
uri
|
235
|
-
rescue URI::InvalidURIError => exception
|
236
|
-
raise Down::InvalidUrl, exception.message
|
237
281
|
end
|
238
282
|
|
283
|
+
# Makes sure that the URL is properly encoded.
|
284
|
+
def addressable_normalize(url)
|
285
|
+
addressable_uri = Addressable::URI.parse(url)
|
286
|
+
addressable_uri.normalize.to_s
|
287
|
+
end
|
288
|
+
|
289
|
+
# When open-uri raises an exception, it doesn't expose the response object.
|
290
|
+
# Fortunately, the exception object holds response data that can be used to
|
291
|
+
# rebuild the Net::HTTP response object.
|
292
|
+
def rebuild_response_from_open_uri_exception(exception)
|
293
|
+
code, message = exception.io.status
|
294
|
+
|
295
|
+
response_class = Net::HTTPResponse::CODE_TO_OBJ.fetch(code)
|
296
|
+
response = response_class.new(nil, code, message)
|
297
|
+
|
298
|
+
exception.io.metas.each do |name, values|
|
299
|
+
values.each { |value| response.add_field(name, value) }
|
300
|
+
end
|
301
|
+
|
302
|
+
response
|
303
|
+
end
|
304
|
+
|
305
|
+
# Raises non-sucessful response as a Down::ResponseError.
|
239
306
|
def response_error!(response)
|
240
307
|
code = response.code.to_i
|
241
308
|
message = response.message.split(" ").map(&:capitalize).join(" ")
|
@@ -249,6 +316,7 @@ module Down
|
|
249
316
|
end
|
250
317
|
end
|
251
318
|
|
319
|
+
# Re-raise Net::HTTP exceptions as Down::Error exceptions.
|
252
320
|
def request_error!(exception)
|
253
321
|
case exception
|
254
322
|
when Net::OpenTimeout
|
@@ -264,6 +332,8 @@ module Down
|
|
264
332
|
end
|
265
333
|
end
|
266
334
|
|
335
|
+
# Defines some additional attributes for the returned Tempfile (on top of what
|
336
|
+
# OpenURI::Meta already defines).
|
267
337
|
module DownloadedFile
|
268
338
|
def original_filename
|
269
339
|
Utils.filename_from_content_disposition(meta["content-disposition"]) ||
|
data/lib/down/utils.rb
CHANGED
@@ -4,6 +4,7 @@ module Down
|
|
4
4
|
module Utils
|
5
5
|
module_function
|
6
6
|
|
7
|
+
# Retrieves potential filename from the "Content-Disposition" header.
|
7
8
|
def filename_from_content_disposition(content_disposition)
|
8
9
|
content_disposition = content_disposition.to_s
|
9
10
|
|
@@ -13,6 +14,7 @@ module Down
|
|
13
14
|
filename unless filename.empty?
|
14
15
|
end
|
15
16
|
|
17
|
+
# Retrieves potential filename from the URL path.
|
16
18
|
def filename_from_path(path)
|
17
19
|
filename = path.split("/").last
|
18
20
|
CGI.unescape(filename) if filename
|
data/lib/down/version.rb
CHANGED
data/lib/down/wget.rb
CHANGED
@@ -11,10 +11,12 @@ require "down/backend"
|
|
11
11
|
|
12
12
|
require "tempfile"
|
13
13
|
require "uri"
|
14
|
-
require "cgi"
|
15
14
|
|
16
15
|
module Down
|
16
|
+
# Provides streaming downloads implemented with the wget command-line tool.
|
17
|
+
# The design is very similar to Down::Http.
|
17
18
|
class Wget < Backend
|
19
|
+
# Initializes the backend with common defaults.
|
18
20
|
def initialize(*arguments)
|
19
21
|
@arguments = [
|
20
22
|
user_agent: "Down/#{Down::VERSION}",
|
@@ -25,6 +27,8 @@ module Down
|
|
25
27
|
] + arguments
|
26
28
|
end
|
27
29
|
|
30
|
+
# Downlods the remote file to disk. Accepts wget command-line options and
|
31
|
+
# some additional options as well.
|
28
32
|
def download(url, *args, max_size: nil, content_length_proc: nil, progress_proc: nil, destination: nil, **options)
|
29
33
|
io = open(url, *args, **options, rewindable: false)
|
30
34
|
|
@@ -63,10 +67,13 @@ module Down
|
|
63
67
|
io.close if io
|
64
68
|
end
|
65
69
|
|
70
|
+
# Starts retrieving the remote file and returns an IO-like object which
|
71
|
+
# downloads the response body on-demand. Accepts wget command-line options.
|
66
72
|
def open(url, *args, rewindable: true, **options)
|
67
73
|
arguments = generate_command(url, *args, **options)
|
68
74
|
|
69
75
|
command = Down::Wget::Command.execute(arguments)
|
76
|
+
# Wrap the wget command output in an IO-like object.
|
70
77
|
output = Down::ChunkedIO.new(
|
71
78
|
chunks: command.enum_for(:output),
|
72
79
|
on_close: command.method(:terminate),
|
@@ -78,6 +85,7 @@ module Down
|
|
78
85
|
header_string << output.readpartial until header_string.include?("\r\n\r\n")
|
79
86
|
header_string, first_chunk = header_string.split("\r\n\r\n", 2)
|
80
87
|
|
88
|
+
# Use an HTTP parser to parse out the response headers.
|
81
89
|
parser = HTTP::Parser.new
|
82
90
|
parser << header_string
|
83
91
|
|
@@ -92,6 +100,7 @@ module Down
|
|
92
100
|
content_length = headers["Content-Length"].to_i if headers["Content-Length"]
|
93
101
|
charset = headers["Content-Type"][/;\s*charset=([^;]+)/i, 1] if headers["Content-Type"]
|
94
102
|
|
103
|
+
# Create an Enumerator which will lazily retrieve chunks of response body.
|
95
104
|
chunks = Enumerator.new do |yielder|
|
96
105
|
yielder << first_chunk if first_chunk
|
97
106
|
yielder << output.readpartial until output.eof?
|
@@ -109,6 +118,7 @@ module Down
|
|
109
118
|
|
110
119
|
private
|
111
120
|
|
121
|
+
# Generates the wget command.
|
112
122
|
def generate_command(url, *args, **options)
|
113
123
|
command = %W[wget --no-verbose --save-headers -O -]
|
114
124
|
|
@@ -131,10 +141,12 @@ module Down
|
|
131
141
|
command
|
132
142
|
end
|
133
143
|
|
144
|
+
# Handles executing the wget command.
|
134
145
|
class Command
|
135
146
|
PIPE_BUFFER_SIZE = 64*1024
|
136
147
|
|
137
148
|
def self.execute(arguments)
|
149
|
+
# posix-spawn gem has better performance, so we use it if it's available
|
138
150
|
if defined?(POSIX::Spawn)
|
139
151
|
pid, stdin_pipe, stdout_pipe, stderr_pipe = POSIX::Spawn.popen4(*arguments)
|
140
152
|
status_reaper = Process.detach(pid)
|
@@ -156,6 +168,7 @@ module Down
|
|
156
168
|
@stderr_pipe = stderr_pipe
|
157
169
|
end
|
158
170
|
|
171
|
+
# Yields chunks of stdout. At the end handles the exit status.
|
159
172
|
def output
|
160
173
|
# Keep emptying the stderr buffer, to allow the subprocess to send more
|
161
174
|
# than 64KB if it wants to.
|
@@ -165,8 +178,32 @@ module Down
|
|
165
178
|
|
166
179
|
status = @status_reaper.value
|
167
180
|
stderr = stderr_reader.value
|
181
|
+
|
168
182
|
close
|
169
183
|
|
184
|
+
handle_status(status, stderr)
|
185
|
+
end
|
186
|
+
|
187
|
+
def terminate
|
188
|
+
begin
|
189
|
+
Process.kill("TERM", @status_reaper[:pid])
|
190
|
+
Process.waitpid(@status_reaper[:pid])
|
191
|
+
rescue Errno::ESRCH
|
192
|
+
# process has already terminated
|
193
|
+
end
|
194
|
+
|
195
|
+
close
|
196
|
+
end
|
197
|
+
|
198
|
+
def close
|
199
|
+
@stdout_pipe.close unless @stdout_pipe.closed?
|
200
|
+
@stderr_pipe.close unless @stderr_pipe.closed?
|
201
|
+
end
|
202
|
+
|
203
|
+
private
|
204
|
+
|
205
|
+
# Translates nonzero wget exit statuses into exceptions.
|
206
|
+
def handle_status(status, stderr)
|
170
207
|
case status.exitstatus
|
171
208
|
when 0 # No problems occurred
|
172
209
|
# success
|
@@ -188,23 +225,9 @@ module Down
|
|
188
225
|
raise Down::ResponseError, stderr
|
189
226
|
end
|
190
227
|
end
|
191
|
-
|
192
|
-
def terminate
|
193
|
-
begin
|
194
|
-
Process.kill("TERM", @status_reaper[:pid])
|
195
|
-
rescue Errno::ESRCH
|
196
|
-
# process has already terminated
|
197
|
-
end
|
198
|
-
|
199
|
-
close
|
200
|
-
end
|
201
|
-
|
202
|
-
def close
|
203
|
-
@stdout_pipe.close unless @stdout_pipe.closed?
|
204
|
-
@stderr_pipe.close unless @stderr_pipe.closed?
|
205
|
-
end
|
206
228
|
end
|
207
229
|
|
230
|
+
# Adds additional attributes to the Tempfile returned in #download.
|
208
231
|
module DownloadedFile
|
209
232
|
attr_accessor :url, :headers
|
210
233
|
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: down
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janko Marohnić
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-09-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: addressable
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.5'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: minitest
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|