down 4.5.0 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +7 -7
- data/down.gemspec +2 -0
- data/lib/down.rb +2 -0
- data/lib/down/backend.rb +8 -7
- data/lib/down/http.rb +13 -17
- data/lib/down/net_http.rb +90 -20
- data/lib/down/utils.rb +2 -0
- data/lib/down/version.rb +1 -1
- data/lib/down/wget.rb +39 -16
- metadata +16 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1976aa6977c5e4161c2f13e0259840edae016dea49087ce8ea3bf46918b0eaee
|
|
4
|
+
data.tar.gz: 24d875754c8174a2bf5030e9f35a14bda4d2f2a30b143e002eecdc128f8a8a6d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6b4102338d79a8ecfb5c643aa387aec15b1af79cb4c0ef7eda29828c79e454b9037bf388b82e52fbefcfe3ab22792e1a06a46f7ed8356228c5c8f49c203972c3
|
|
7
|
+
data.tar.gz: 25c901d8e9f6d447ff2332c69e957a189baaa3960938ce1f3560ef11723ff209162c3c82b608dda7da2d0120469b8a59f8b9bf6577709f7e81f28f081544f852
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
## 4.6.0 (2018-09-29)
|
|
2
|
+
|
|
3
|
+
* Ensure URLs are properly encoded in `NetHttp#download` and `#open` using Addressable (@linyaoli)
|
|
4
|
+
|
|
5
|
+
* Raise `ResponseError` with clear message when redirect URI was invalid in Down::NetHttp (@janko-m)
|
|
6
|
+
|
|
1
7
|
## 4.5.0 (2018-05-11)
|
|
2
8
|
|
|
3
9
|
* Deprecate passing an `HTTP::Client` object to `Down::Http#initialize` (@janko-m)
|
data/README.md
CHANGED
|
@@ -24,8 +24,8 @@ tempfile #=> #<Tempfile:/var/folders/k7/6zx6dx6x7ys3rv3srh0nyfj00000gn/T/2015092
|
|
|
24
24
|
|
|
25
25
|
### Metadata
|
|
26
26
|
|
|
27
|
-
The returned Tempfile has
|
|
28
|
-
|
|
27
|
+
The returned Tempfile has some additional attributes extracted from the
|
|
28
|
+
response data:
|
|
29
29
|
|
|
30
30
|
```rb
|
|
31
31
|
tempfile.content_type #=> "text/plain"
|
|
@@ -345,9 +345,8 @@ Some features that give the HTTP.rb backend an advantage over `open-uri` +
|
|
|
345
345
|
|
|
346
346
|
* Low memory usage (**10x less** than `open-uri`/`Net::HTTP`)
|
|
347
347
|
* Correct URI parsing with [Addressable::URI]
|
|
348
|
-
* Proper support
|
|
349
|
-
*
|
|
350
|
-
* Chaninable HTTP client builder API for setting default options
|
|
348
|
+
* Proper SSL support
|
|
349
|
+
* Chaninable builder API for setting default options
|
|
351
350
|
* Support for persistent connections
|
|
352
351
|
|
|
353
352
|
#### Additional options
|
|
@@ -359,11 +358,12 @@ Down::Http.download("http://example.org/image.jpg", headers: { "Foo" => "Bar" })
|
|
|
359
358
|
Down::Http.open("http://example.org/image.jpg", follow: { max_hops: 0 })
|
|
360
359
|
```
|
|
361
360
|
|
|
362
|
-
|
|
361
|
+
However, it's recommended to configure request options using http.rb's
|
|
362
|
+
chainable API, as it's more convenient than passing raw options.
|
|
363
363
|
|
|
364
364
|
```rb
|
|
365
365
|
Down::Http.open("http://example.org/image.jpg") do |client|
|
|
366
|
-
client.timeout(connect: 3)
|
|
366
|
+
client.timeout(connect: 3, read: 3)
|
|
367
367
|
end
|
|
368
368
|
```
|
|
369
369
|
|
data/down.gemspec
CHANGED
|
@@ -15,6 +15,8 @@ Gem::Specification.new do |spec|
|
|
|
15
15
|
spec.files = Dir["README.md", "LICENSE.txt", "CHANGELOG.md", "*.gemspec", "lib/**/*.rb"]
|
|
16
16
|
spec.require_path = "lib"
|
|
17
17
|
|
|
18
|
+
spec.add_dependency "addressable", "~> 2.5"
|
|
19
|
+
|
|
18
20
|
spec.add_development_dependency "minitest", "~> 5.8"
|
|
19
21
|
spec.add_development_dependency "mocha", "~> 1.5"
|
|
20
22
|
spec.add_development_dependency "rake"
|
data/lib/down.rb
CHANGED
|
@@ -14,6 +14,7 @@ module Down
|
|
|
14
14
|
backend.open(*args, &block)
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
+
# Allows setting a backend via a symbol or a downloader object.
|
|
17
18
|
def backend(value = nil)
|
|
18
19
|
if value.is_a?(Symbol)
|
|
19
20
|
require "down/#{value}"
|
|
@@ -26,4 +27,5 @@ module Down
|
|
|
26
27
|
end
|
|
27
28
|
end
|
|
28
29
|
|
|
30
|
+
# Set Net::HTTP as the default backend
|
|
29
31
|
Down.backend Down::NetHttp
|
data/lib/down/backend.rb
CHANGED
|
@@ -19,14 +19,15 @@ module Down
|
|
|
19
19
|
|
|
20
20
|
private
|
|
21
21
|
|
|
22
|
+
# If destination path is defined, move tempfile to the destination,
|
|
23
|
+
# otherwise return the tempfile unchanged.
|
|
22
24
|
def download_result(tempfile, destination)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
end
|
|
25
|
+
return tempfile unless destination
|
|
26
|
+
|
|
27
|
+
tempfile.close # required for Windows
|
|
28
|
+
FileUtils.mv tempfile.path, destination
|
|
29
|
+
|
|
30
|
+
nil
|
|
30
31
|
end
|
|
31
32
|
end
|
|
32
33
|
end
|
data/lib/down/http.rb
CHANGED
|
@@ -7,11 +7,11 @@ require "http"
|
|
|
7
7
|
require "down/backend"
|
|
8
8
|
|
|
9
9
|
require "tempfile"
|
|
10
|
-
require "cgi"
|
|
11
|
-
require "base64"
|
|
12
10
|
|
|
13
11
|
module Down
|
|
12
|
+
# Provides streaming downloads implemented with HTTP.rb.
|
|
14
13
|
class Http < Backend
|
|
14
|
+
# Initializes the backend with common defaults.
|
|
15
15
|
def initialize(options = {}, &block)
|
|
16
16
|
if options.is_a?(HTTP::Client)
|
|
17
17
|
warn "[Down] Passing an HTTP::Client object to Down::Http#initialize is deprecated and won't be supported in Down 5. Use the block initialization instead."
|
|
@@ -28,6 +28,8 @@ module Down
|
|
|
28
28
|
@client = block.call(@client) if block
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
+
# Downlods the remote file to disk. Accepts HTTP.rb options via a hash or a
|
|
32
|
+
# block, and some additional options as well.
|
|
31
33
|
def download(url, max_size: nil, progress_proc: nil, content_length_proc: nil, destination: nil, **options, &block)
|
|
32
34
|
response = request(url, **options, &block)
|
|
33
35
|
|
|
@@ -63,6 +65,9 @@ module Down
|
|
|
63
65
|
raise
|
|
64
66
|
end
|
|
65
67
|
|
|
68
|
+
# Starts retrieving the remote file and returns an IO-like object which
|
|
69
|
+
# downloads the response body on-demand. Accepts HTTP.rb options via a hash
|
|
70
|
+
# or a block.
|
|
66
71
|
def open(url, rewindable: true, **options, &block)
|
|
67
72
|
response = request(url, **options, &block)
|
|
68
73
|
|
|
@@ -84,9 +89,10 @@ module Down
|
|
|
84
89
|
end
|
|
85
90
|
|
|
86
91
|
def send_request(method, url, **options, &block)
|
|
87
|
-
|
|
92
|
+
uri = HTTP::URI.parse(url)
|
|
88
93
|
|
|
89
94
|
client = @client
|
|
95
|
+
client = client.basic_auth(user: uri.user, pass: uri.password) if uri.user || uri.password
|
|
90
96
|
client = block.call(client) if block
|
|
91
97
|
|
|
92
98
|
client.request(method, url, options)
|
|
@@ -94,6 +100,7 @@ module Down
|
|
|
94
100
|
request_error!(exception)
|
|
95
101
|
end
|
|
96
102
|
|
|
103
|
+
# Yields chunks of the response body to the block.
|
|
97
104
|
def stream_body(response, &block)
|
|
98
105
|
response.body.each(&block)
|
|
99
106
|
rescue => exception
|
|
@@ -102,20 +109,7 @@ module Down
|
|
|
102
109
|
response.connection.close unless @client.persistent?
|
|
103
110
|
end
|
|
104
111
|
|
|
105
|
-
|
|
106
|
-
uri = HTTP::URI.parse(url)
|
|
107
|
-
|
|
108
|
-
if uri.user || uri.password
|
|
109
|
-
user, pass = uri.user, uri.password
|
|
110
|
-
authorization = "Basic #{Base64.strict_encode64("#{user}:#{pass}")}"
|
|
111
|
-
options[:headers] ||= {}
|
|
112
|
-
options[:headers].merge!("Authorization" => authorization)
|
|
113
|
-
uri.user = uri.password = nil
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
uri.to_s
|
|
117
|
-
end
|
|
118
|
-
|
|
112
|
+
# Raises non-sucessful response as a Down::ResponseError.
|
|
119
113
|
def response_error!(response)
|
|
120
114
|
args = [response.status.to_s, response: response]
|
|
121
115
|
|
|
@@ -126,6 +120,7 @@ module Down
|
|
|
126
120
|
end
|
|
127
121
|
end
|
|
128
122
|
|
|
123
|
+
# Re-raise HTTP.rb exceptions as Down::Error exceptions.
|
|
129
124
|
def request_error!(exception)
|
|
130
125
|
case exception
|
|
131
126
|
when HTTP::Request::UnsupportedSchemeError, Addressable::URI::InvalidURIError
|
|
@@ -143,6 +138,7 @@ module Down
|
|
|
143
138
|
end
|
|
144
139
|
end
|
|
145
140
|
|
|
141
|
+
# Defines some additional attributes for the returned Tempfile.
|
|
146
142
|
module DownloadedFile
|
|
147
143
|
attr_accessor :url, :headers
|
|
148
144
|
|
data/lib/down/net_http.rb
CHANGED
|
@@ -2,15 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
require "open-uri"
|
|
4
4
|
require "net/https"
|
|
5
|
+
require "addressable/uri"
|
|
5
6
|
|
|
6
7
|
require "down/backend"
|
|
7
8
|
|
|
8
9
|
require "tempfile"
|
|
9
10
|
require "fileutils"
|
|
10
|
-
require "cgi"
|
|
11
11
|
|
|
12
12
|
module Down
|
|
13
|
+
# Provides streaming downloads implemented with Net::HTTP and open-uri.
|
|
13
14
|
class NetHttp < Backend
|
|
15
|
+
# Initializes the backend with common defaults.
|
|
14
16
|
def initialize(options = {})
|
|
15
17
|
@options = {
|
|
16
18
|
"User-Agent" => "Down/#{Down::VERSION}",
|
|
@@ -20,6 +22,8 @@ module Down
|
|
|
20
22
|
}.merge(options)
|
|
21
23
|
end
|
|
22
24
|
|
|
25
|
+
# Downloads a remote file to disk using open-uri. Accepts any open-uri
|
|
26
|
+
# options, and a few more.
|
|
23
27
|
def download(url, options = {})
|
|
24
28
|
options = @options.merge(options)
|
|
25
29
|
|
|
@@ -29,6 +33,11 @@ module Down
|
|
|
29
33
|
content_length_proc = options.delete(:content_length_proc)
|
|
30
34
|
destination = options.delete(:destination)
|
|
31
35
|
|
|
36
|
+
# Use open-uri's :content_lenth_proc or :progress_proc to raise an
|
|
37
|
+
# exception early if the file is too large.
|
|
38
|
+
#
|
|
39
|
+
# Also disable following redirects, as we'll provide our own
|
|
40
|
+
# implementation that has the ability to limit the number of redirects.
|
|
32
41
|
open_uri_options = {
|
|
33
42
|
content_length_proc: proc { |size|
|
|
34
43
|
if size && max_size && size > max_size
|
|
@@ -45,6 +54,7 @@ module Down
|
|
|
45
54
|
redirect: false,
|
|
46
55
|
}
|
|
47
56
|
|
|
57
|
+
# Handle basic authentication in the :proxy option.
|
|
48
58
|
if options[:proxy]
|
|
49
59
|
proxy = URI(options.delete(:proxy))
|
|
50
60
|
user = proxy.user
|
|
@@ -62,8 +72,9 @@ module Down
|
|
|
62
72
|
|
|
63
73
|
open_uri_options.merge!(options)
|
|
64
74
|
|
|
65
|
-
uri = ensure_uri(url)
|
|
75
|
+
uri = ensure_uri(addressable_normalize(url))
|
|
66
76
|
|
|
77
|
+
# Handle basic authentication in the remote URL.
|
|
67
78
|
if uri.user || uri.password
|
|
68
79
|
open_uri_options[:http_basic_authentication] ||= [uri.user, uri.password]
|
|
69
80
|
uri.user = nil
|
|
@@ -72,6 +83,7 @@ module Down
|
|
|
72
83
|
|
|
73
84
|
open_uri_file = open_uri(uri, open_uri_options, follows_remaining: max_redirects)
|
|
74
85
|
|
|
86
|
+
# Handle the fact that open-uri returns StringIOs for small files.
|
|
75
87
|
tempfile = ensure_tempfile(open_uri_file, File.extname(open_uri_file.base_uri.path))
|
|
76
88
|
OpenURI::Meta.init tempfile, open_uri_file # add back open-uri methods
|
|
77
89
|
tempfile.extend Down::NetHttp::DownloadedFile
|
|
@@ -79,11 +91,13 @@ module Down
|
|
|
79
91
|
download_result(tempfile, destination)
|
|
80
92
|
end
|
|
81
93
|
|
|
94
|
+
# Starts retrieving the remote file using Net::HTTP and returns an IO-like
|
|
95
|
+
# object which downloads the response body on-demand.
|
|
82
96
|
def open(url, options = {})
|
|
97
|
+
uri = ensure_uri(addressable_normalize(url))
|
|
83
98
|
options = @options.merge(options)
|
|
84
99
|
|
|
85
|
-
|
|
86
|
-
|
|
100
|
+
# Create a Fiber that halts when response headers are received.
|
|
87
101
|
request = Fiber.new do
|
|
88
102
|
net_http_request(uri, options) do |response|
|
|
89
103
|
Fiber.yield response
|
|
@@ -94,6 +108,7 @@ module Down
|
|
|
94
108
|
|
|
95
109
|
response_error!(response) unless response.is_a?(Net::HTTPSuccess)
|
|
96
110
|
|
|
111
|
+
# Build an IO-like object that will retrieve response body on-demand.
|
|
97
112
|
Down::ChunkedIO.new(
|
|
98
113
|
chunks: enum_for(:stream_body, response),
|
|
99
114
|
size: response["Content-Length"] && response["Content-Length"].to_i,
|
|
@@ -113,13 +128,22 @@ module Down
|
|
|
113
128
|
|
|
114
129
|
private
|
|
115
130
|
|
|
131
|
+
# Calls open-uri's URI::HTTP#open method. Additionally handles redirects.
|
|
116
132
|
def open_uri(uri, options, follows_remaining: 0)
|
|
117
|
-
|
|
133
|
+
uri.open(options)
|
|
118
134
|
rescue OpenURI::HTTPRedirect => exception
|
|
119
135
|
raise Down::TooManyRedirects, "too many redirects" if follows_remaining == 0
|
|
120
136
|
|
|
121
|
-
|
|
137
|
+
# fail if redirect URI scheme is not http or https
|
|
138
|
+
begin
|
|
139
|
+
uri = ensure_uri(exception.uri)
|
|
140
|
+
rescue Down::InvalidUrl
|
|
141
|
+
response = rebuild_response_from_open_uri_exception(exception)
|
|
142
|
+
|
|
143
|
+
raise ResponseError.new("Invalid Redirect URI: #{exception.uri}", response: response)
|
|
144
|
+
end
|
|
122
145
|
|
|
146
|
+
# forward cookies on the redirect
|
|
123
147
|
if !exception.io.meta["set-cookie"].to_s.empty?
|
|
124
148
|
options["Cookie"] = exception.io.meta["set-cookie"]
|
|
125
149
|
end
|
|
@@ -127,11 +151,11 @@ module Down
|
|
|
127
151
|
follows_remaining -= 1
|
|
128
152
|
retry
|
|
129
153
|
rescue OpenURI::HTTPError => exception
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
exception.
|
|
134
|
-
|
|
154
|
+
response = rebuild_response_from_open_uri_exception(exception)
|
|
155
|
+
|
|
156
|
+
# open-uri attempts to parse the redirect URI, so we re-raise that exception
|
|
157
|
+
if exception.message.include?("(Invalid Location URI)")
|
|
158
|
+
raise ResponseError.new("Invalid Redirect URI: #{response["Location"]}", response: response)
|
|
135
159
|
end
|
|
136
160
|
|
|
137
161
|
response_error!(response)
|
|
@@ -159,6 +183,7 @@ module Down
|
|
|
159
183
|
tempfile
|
|
160
184
|
end
|
|
161
185
|
|
|
186
|
+
# Makes a Net::HTTP request and follows redirects.
|
|
162
187
|
def net_http_request(uri, options, follows_remaining: options.fetch(:max_redirects, 2), &block)
|
|
163
188
|
http, request = create_net_http(uri, options)
|
|
164
189
|
|
|
@@ -167,7 +192,10 @@ module Down
|
|
|
167
192
|
http.request(request) do |response|
|
|
168
193
|
unless response.is_a?(Net::HTTPRedirection)
|
|
169
194
|
yield response
|
|
170
|
-
|
|
195
|
+
# In certain cases the caller wants to download only one portion
|
|
196
|
+
# of the file and close the connection, so we tell Net::HTTP that
|
|
197
|
+
# it shouldn't continue retrieving it.
|
|
198
|
+
response.instance_variable_set("@read", true)
|
|
171
199
|
end
|
|
172
200
|
end
|
|
173
201
|
end
|
|
@@ -178,13 +206,21 @@ module Down
|
|
|
178
206
|
if response.is_a?(Net::HTTPRedirection)
|
|
179
207
|
raise Down::TooManyRedirects if follows_remaining == 0
|
|
180
208
|
|
|
181
|
-
|
|
209
|
+
# fail if redirect URI is not a valid http or https URL
|
|
210
|
+
begin
|
|
211
|
+
location = ensure_uri(response["Location"], allow_relative: true)
|
|
212
|
+
rescue Down::InvalidUrl
|
|
213
|
+
raise ResponseError.new("Invalid Redirect URI: #{response["Location"]}", response: response)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# handle relative redirects
|
|
182
217
|
location = uri + location if location.relative?
|
|
183
218
|
|
|
184
219
|
net_http_request(location, options, follows_remaining: follows_remaining - 1, &block)
|
|
185
220
|
end
|
|
186
221
|
end
|
|
187
222
|
|
|
223
|
+
# Build a Net::HTTP object for making a request.
|
|
188
224
|
def create_net_http(uri, options)
|
|
189
225
|
http_class = Net::HTTP
|
|
190
226
|
|
|
@@ -195,7 +231,7 @@ module Down
|
|
|
195
231
|
|
|
196
232
|
http = http_class.new(uri.host, uri.port)
|
|
197
233
|
|
|
198
|
-
# taken from open-uri implementation
|
|
234
|
+
# Handle SSL parameters (taken from the open-uri implementation).
|
|
199
235
|
if uri.is_a?(URI::HTTPS)
|
|
200
236
|
http.use_ssl = true
|
|
201
237
|
http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
|
|
@@ -214,7 +250,7 @@ module Down
|
|
|
214
250
|
http.open_timeout = options[:open_timeout] if options.key?(:open_timeout)
|
|
215
251
|
|
|
216
252
|
request_headers = options.select { |key, value| key.is_a?(String) }
|
|
217
|
-
request_headers["Accept-Encoding"] = "" #
|
|
253
|
+
request_headers["Accept-Encoding"] = "" # Net::HTTP's inflater causes FiberErrors
|
|
218
254
|
|
|
219
255
|
get = Net::HTTP::Get.new(uri.request_uri, request_headers)
|
|
220
256
|
get.basic_auth(uri.user, uri.password) if uri.user || uri.password
|
|
@@ -222,20 +258,51 @@ module Down
|
|
|
222
258
|
[http, get]
|
|
223
259
|
end
|
|
224
260
|
|
|
261
|
+
# Yields chunks of the response body to the block.
|
|
225
262
|
def stream_body(response, &block)
|
|
226
263
|
response.read_body(&block)
|
|
227
264
|
rescue => exception
|
|
228
265
|
request_error!(exception)
|
|
229
266
|
end
|
|
230
267
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
268
|
+
# Checks that the url is a valid URI and that its scheme is http or https.
|
|
269
|
+
def ensure_uri(url, allow_relative: false)
|
|
270
|
+
begin
|
|
271
|
+
uri = URI(url)
|
|
272
|
+
rescue URI::InvalidURIError => exception
|
|
273
|
+
raise Down::InvalidUrl, exception.message
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
unless allow_relative && uri.relative?
|
|
277
|
+
raise Down::InvalidUrl, "URL scheme needs to be http or https: #{uri}" unless uri.is_a?(URI::HTTP)
|
|
278
|
+
end
|
|
279
|
+
|
|
234
280
|
uri
|
|
235
|
-
rescue URI::InvalidURIError => exception
|
|
236
|
-
raise Down::InvalidUrl, exception.message
|
|
237
281
|
end
|
|
238
282
|
|
|
283
|
+
# Makes sure that the URL is properly encoded.
|
|
284
|
+
def addressable_normalize(url)
|
|
285
|
+
addressable_uri = Addressable::URI.parse(url)
|
|
286
|
+
addressable_uri.normalize.to_s
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# When open-uri raises an exception, it doesn't expose the response object.
|
|
290
|
+
# Fortunately, the exception object holds response data that can be used to
|
|
291
|
+
# rebuild the Net::HTTP response object.
|
|
292
|
+
def rebuild_response_from_open_uri_exception(exception)
|
|
293
|
+
code, message = exception.io.status
|
|
294
|
+
|
|
295
|
+
response_class = Net::HTTPResponse::CODE_TO_OBJ.fetch(code)
|
|
296
|
+
response = response_class.new(nil, code, message)
|
|
297
|
+
|
|
298
|
+
exception.io.metas.each do |name, values|
|
|
299
|
+
values.each { |value| response.add_field(name, value) }
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
response
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Raises non-sucessful response as a Down::ResponseError.
|
|
239
306
|
def response_error!(response)
|
|
240
307
|
code = response.code.to_i
|
|
241
308
|
message = response.message.split(" ").map(&:capitalize).join(" ")
|
|
@@ -249,6 +316,7 @@ module Down
|
|
|
249
316
|
end
|
|
250
317
|
end
|
|
251
318
|
|
|
319
|
+
# Re-raise Net::HTTP exceptions as Down::Error exceptions.
|
|
252
320
|
def request_error!(exception)
|
|
253
321
|
case exception
|
|
254
322
|
when Net::OpenTimeout
|
|
@@ -264,6 +332,8 @@ module Down
|
|
|
264
332
|
end
|
|
265
333
|
end
|
|
266
334
|
|
|
335
|
+
# Defines some additional attributes for the returned Tempfile (on top of what
|
|
336
|
+
# OpenURI::Meta already defines).
|
|
267
337
|
module DownloadedFile
|
|
268
338
|
def original_filename
|
|
269
339
|
Utils.filename_from_content_disposition(meta["content-disposition"]) ||
|
data/lib/down/utils.rb
CHANGED
|
@@ -4,6 +4,7 @@ module Down
|
|
|
4
4
|
module Utils
|
|
5
5
|
module_function
|
|
6
6
|
|
|
7
|
+
# Retrieves potential filename from the "Content-Disposition" header.
|
|
7
8
|
def filename_from_content_disposition(content_disposition)
|
|
8
9
|
content_disposition = content_disposition.to_s
|
|
9
10
|
|
|
@@ -13,6 +14,7 @@ module Down
|
|
|
13
14
|
filename unless filename.empty?
|
|
14
15
|
end
|
|
15
16
|
|
|
17
|
+
# Retrieves potential filename from the URL path.
|
|
16
18
|
def filename_from_path(path)
|
|
17
19
|
filename = path.split("/").last
|
|
18
20
|
CGI.unescape(filename) if filename
|
data/lib/down/version.rb
CHANGED
data/lib/down/wget.rb
CHANGED
|
@@ -11,10 +11,12 @@ require "down/backend"
|
|
|
11
11
|
|
|
12
12
|
require "tempfile"
|
|
13
13
|
require "uri"
|
|
14
|
-
require "cgi"
|
|
15
14
|
|
|
16
15
|
module Down
|
|
16
|
+
# Provides streaming downloads implemented with the wget command-line tool.
|
|
17
|
+
# The design is very similar to Down::Http.
|
|
17
18
|
class Wget < Backend
|
|
19
|
+
# Initializes the backend with common defaults.
|
|
18
20
|
def initialize(*arguments)
|
|
19
21
|
@arguments = [
|
|
20
22
|
user_agent: "Down/#{Down::VERSION}",
|
|
@@ -25,6 +27,8 @@ module Down
|
|
|
25
27
|
] + arguments
|
|
26
28
|
end
|
|
27
29
|
|
|
30
|
+
# Downlods the remote file to disk. Accepts wget command-line options and
|
|
31
|
+
# some additional options as well.
|
|
28
32
|
def download(url, *args, max_size: nil, content_length_proc: nil, progress_proc: nil, destination: nil, **options)
|
|
29
33
|
io = open(url, *args, **options, rewindable: false)
|
|
30
34
|
|
|
@@ -63,10 +67,13 @@ module Down
|
|
|
63
67
|
io.close if io
|
|
64
68
|
end
|
|
65
69
|
|
|
70
|
+
# Starts retrieving the remote file and returns an IO-like object which
|
|
71
|
+
# downloads the response body on-demand. Accepts wget command-line options.
|
|
66
72
|
def open(url, *args, rewindable: true, **options)
|
|
67
73
|
arguments = generate_command(url, *args, **options)
|
|
68
74
|
|
|
69
75
|
command = Down::Wget::Command.execute(arguments)
|
|
76
|
+
# Wrap the wget command output in an IO-like object.
|
|
70
77
|
output = Down::ChunkedIO.new(
|
|
71
78
|
chunks: command.enum_for(:output),
|
|
72
79
|
on_close: command.method(:terminate),
|
|
@@ -78,6 +85,7 @@ module Down
|
|
|
78
85
|
header_string << output.readpartial until header_string.include?("\r\n\r\n")
|
|
79
86
|
header_string, first_chunk = header_string.split("\r\n\r\n", 2)
|
|
80
87
|
|
|
88
|
+
# Use an HTTP parser to parse out the response headers.
|
|
81
89
|
parser = HTTP::Parser.new
|
|
82
90
|
parser << header_string
|
|
83
91
|
|
|
@@ -92,6 +100,7 @@ module Down
|
|
|
92
100
|
content_length = headers["Content-Length"].to_i if headers["Content-Length"]
|
|
93
101
|
charset = headers["Content-Type"][/;\s*charset=([^;]+)/i, 1] if headers["Content-Type"]
|
|
94
102
|
|
|
103
|
+
# Create an Enumerator which will lazily retrieve chunks of response body.
|
|
95
104
|
chunks = Enumerator.new do |yielder|
|
|
96
105
|
yielder << first_chunk if first_chunk
|
|
97
106
|
yielder << output.readpartial until output.eof?
|
|
@@ -109,6 +118,7 @@ module Down
|
|
|
109
118
|
|
|
110
119
|
private
|
|
111
120
|
|
|
121
|
+
# Generates the wget command.
|
|
112
122
|
def generate_command(url, *args, **options)
|
|
113
123
|
command = %W[wget --no-verbose --save-headers -O -]
|
|
114
124
|
|
|
@@ -131,10 +141,12 @@ module Down
|
|
|
131
141
|
command
|
|
132
142
|
end
|
|
133
143
|
|
|
144
|
+
# Handles executing the wget command.
|
|
134
145
|
class Command
|
|
135
146
|
PIPE_BUFFER_SIZE = 64*1024
|
|
136
147
|
|
|
137
148
|
def self.execute(arguments)
|
|
149
|
+
# posix-spawn gem has better performance, so we use it if it's available
|
|
138
150
|
if defined?(POSIX::Spawn)
|
|
139
151
|
pid, stdin_pipe, stdout_pipe, stderr_pipe = POSIX::Spawn.popen4(*arguments)
|
|
140
152
|
status_reaper = Process.detach(pid)
|
|
@@ -156,6 +168,7 @@ module Down
|
|
|
156
168
|
@stderr_pipe = stderr_pipe
|
|
157
169
|
end
|
|
158
170
|
|
|
171
|
+
# Yields chunks of stdout. At the end handles the exit status.
|
|
159
172
|
def output
|
|
160
173
|
# Keep emptying the stderr buffer, to allow the subprocess to send more
|
|
161
174
|
# than 64KB if it wants to.
|
|
@@ -165,8 +178,32 @@ module Down
|
|
|
165
178
|
|
|
166
179
|
status = @status_reaper.value
|
|
167
180
|
stderr = stderr_reader.value
|
|
181
|
+
|
|
168
182
|
close
|
|
169
183
|
|
|
184
|
+
handle_status(status, stderr)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def terminate
|
|
188
|
+
begin
|
|
189
|
+
Process.kill("TERM", @status_reaper[:pid])
|
|
190
|
+
Process.waitpid(@status_reaper[:pid])
|
|
191
|
+
rescue Errno::ESRCH
|
|
192
|
+
# process has already terminated
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
close
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def close
|
|
199
|
+
@stdout_pipe.close unless @stdout_pipe.closed?
|
|
200
|
+
@stderr_pipe.close unless @stderr_pipe.closed?
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
private
|
|
204
|
+
|
|
205
|
+
# Translates nonzero wget exit statuses into exceptions.
|
|
206
|
+
def handle_status(status, stderr)
|
|
170
207
|
case status.exitstatus
|
|
171
208
|
when 0 # No problems occurred
|
|
172
209
|
# success
|
|
@@ -188,23 +225,9 @@ module Down
|
|
|
188
225
|
raise Down::ResponseError, stderr
|
|
189
226
|
end
|
|
190
227
|
end
|
|
191
|
-
|
|
192
|
-
def terminate
|
|
193
|
-
begin
|
|
194
|
-
Process.kill("TERM", @status_reaper[:pid])
|
|
195
|
-
rescue Errno::ESRCH
|
|
196
|
-
# process has already terminated
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
close
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
def close
|
|
203
|
-
@stdout_pipe.close unless @stdout_pipe.closed?
|
|
204
|
-
@stderr_pipe.close unless @stderr_pipe.closed?
|
|
205
|
-
end
|
|
206
228
|
end
|
|
207
229
|
|
|
230
|
+
# Adds additional attributes to the Tempfile returned in #download.
|
|
208
231
|
module DownloadedFile
|
|
209
232
|
attr_accessor :url, :headers
|
|
210
233
|
|
metadata
CHANGED
|
@@ -1,15 +1,29 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: down
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.
|
|
4
|
+
version: 4.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Janko Marohnić
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2018-
|
|
11
|
+
date: 2018-09-28 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: addressable
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '2.5'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '2.5'
|
|
13
27
|
- !ruby/object:Gem::Dependency
|
|
14
28
|
name: minitest
|
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|