down 3.2.0 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +94 -33
- data/down.gemspec +2 -0
- data/lib/down.rb +28 -1
- data/lib/down/backend.rb +17 -0
- data/lib/down/chunked_io.rb +38 -26
- data/lib/down/errors.rb +3 -1
- data/lib/down/http.rb +20 -30
- data/lib/down/net_http.rb +15 -21
- data/lib/down/version.rb +3 -1
- data/lib/down/wget.rb +229 -0
- metadata +32 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d150dd7f5e9123d9c89ea2cc982fe154fb2345d
|
4
|
+
data.tar.gz: 3ecf281961282363ae6b9388c0d3d58cb901be76
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 93edf1ffb5f96ddc649a907bda723bcdcb82ab908ba8e1ca55cf9360e6cab79b4a8d3bc4957f6445bbce612ce857de7bd4486ccbd0b508455e5fb03899416d63
|
7
|
+
data.tar.gz: 13f2f52396e71eccc5e4f8cd9fcb6a2094ec10ac24beeff109745800d85bbee46fcf6c4f5b2b400836c299657d574a8fa86e72f45c32eb6adb7de6f6643495e8
|
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# Down
|
2
2
|
|
3
3
|
Down is a utility tool for streaming, flexible and safe downloading of remote
|
4
|
-
files. It can use [open-uri] + `Net::HTTP
|
5
|
-
library.
|
4
|
+
files. It can use [open-uri] + `Net::HTTP`, [HTTP.rb] or `wget` as the backend
|
5
|
+
HTTP library.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -27,8 +27,9 @@ The returned Tempfile has `#content_type` and `#original_filename` attributes
|
|
27
27
|
determined from the response headers:
|
28
28
|
|
29
29
|
```rb
|
30
|
-
tempfile.content_type #=> "
|
31
|
-
tempfile.original_filename #=> "
|
30
|
+
tempfile.content_type #=> "text/plain"
|
31
|
+
tempfile.original_filename #=> "document.txt"
|
32
|
+
tempfile.charset #=> "utf-8"
|
32
33
|
```
|
33
34
|
|
34
35
|
### Maximum size
|
@@ -57,6 +58,19 @@ Down.download("http://user:password@example.org")
|
|
57
58
|
Down.open("http://user:password@example.org")
|
58
59
|
```
|
59
60
|
|
61
|
+
### Progress
|
62
|
+
|
63
|
+
`Down.download` supports `:content_length_proc`, which gets called with the
|
64
|
+
value of the `Content-Length` header as soon as it's received, and
|
65
|
+
`:progress_proc`, which gets called with current filesize whenever a new chunk
|
66
|
+
is downloaded.
|
67
|
+
|
68
|
+
```rb
|
69
|
+
Down.download "http://example.com/movie.mp4",
|
70
|
+
content_length_proc: -> (content_length) { ... },
|
71
|
+
progress_proc: -> (progress) { ... }
|
72
|
+
```
|
73
|
+
|
60
74
|
## Streaming
|
61
75
|
|
62
76
|
Down has the ability to retrieve content of the remote file *as it is being
|
@@ -180,24 +194,26 @@ the `Down::Error` subclasses. This is Down's exception hierarchy:
|
|
180
194
|
|
181
195
|
By default Down implements `Down.download` and `Down.open` using the built-in
|
182
196
|
[open-uri] + [Net::HTTP] Ruby standard libraries. However, there are other
|
183
|
-
backends as well
|
197
|
+
backends as well, see the sections below.
|
198
|
+
|
199
|
+
You can use the backend directly:
|
184
200
|
|
185
201
|
```rb
|
186
|
-
require "down/net_http"
|
187
|
-
|
202
|
+
require "down/net_http"
|
203
|
+
|
204
|
+
Down::NetHttp.download("...")
|
205
|
+
Down::NetHttp.open("...")
|
188
206
|
```
|
189
207
|
|
190
|
-
|
191
|
-
but it's recommended you always use the backends explicitly:
|
208
|
+
Or you can set the backend globally (default is `:net_http`):
|
192
209
|
|
193
210
|
```rb
|
194
|
-
|
211
|
+
require "down"
|
212
|
+
|
213
|
+
Down.backend :http # use the Down::Http backend
|
214
|
+
|
195
215
|
Down.download("...")
|
196
216
|
Down.open("...")
|
197
|
-
|
198
|
-
# recommended
|
199
|
-
Down::NetHttp.download("...")
|
200
|
-
Down::NetHttp.open("...")
|
201
217
|
```
|
202
218
|
|
203
219
|
### open-uri + Net::HTTP
|
@@ -282,6 +298,15 @@ as request headers, like with open-uri.
|
|
282
298
|
Down::NetHttp.open("http://example.com/image.jpg", {"Authorization" => "..."})
|
283
299
|
```
|
284
300
|
|
301
|
+
You can also initialize the backend with default options:
|
302
|
+
|
303
|
+
```rb
|
304
|
+
net_http = Down::NetHttp.new(open_timeout: 3)
|
305
|
+
|
306
|
+
net_http.download("http://example.com/image.jpg")
|
307
|
+
net_http.open("http://example.com/image.jpg")
|
308
|
+
```
|
309
|
+
|
285
310
|
### HTTP.rb
|
286
311
|
|
287
312
|
```rb
|
@@ -307,41 +332,77 @@ Net::HTTP include:
|
|
307
332
|
* Chaninable HTTP client builder API for setting default options
|
308
333
|
* Support for persistent connections
|
309
334
|
|
310
|
-
|
335
|
+
#### Additional options
|
311
336
|
|
312
|
-
|
313
|
-
via `Down::Http.client`:
|
337
|
+
All additional options will be forwarded to `HTTP::Client#request`:
|
314
338
|
|
315
339
|
```rb
|
316
|
-
|
317
|
-
Down::Http.
|
318
|
-
|
340
|
+
Down::Http.download("http://example.org/image.jpg", timeout: { open: 3 })
|
341
|
+
Down::Http.open("http://example.org/image.jpg", follow: { max_hops: 0 })
|
342
|
+
```
|
343
|
+
|
344
|
+
If you prefer to add options using the chainable API, you can pass a block:
|
319
345
|
|
320
|
-
|
321
|
-
Down::Http.
|
346
|
+
```rb
|
347
|
+
Down::Http.open("http://example.org/image.jpg") do |client|
|
348
|
+
client.timeout(open: 3)
|
349
|
+
end
|
322
350
|
```
|
323
351
|
|
324
|
-
|
352
|
+
You can also initialize the backend with default options:
|
353
|
+
|
354
|
+
```rb
|
355
|
+
http = Down::Http.new(timeout: { open: 3 })
|
356
|
+
# or
|
357
|
+
http = Down::Http.new(HTTP.timeout(open: 3))
|
358
|
+
|
359
|
+
http.download("http://example.com/image.jpg")
|
360
|
+
http.open("http://example.com/image.jpg")
|
361
|
+
```
|
325
362
|
|
326
|
-
|
327
|
-
forwarded to `HTTP::Client#request`:
|
363
|
+
### Wget (experimental)
|
328
364
|
|
329
365
|
```rb
|
330
|
-
|
366
|
+
gem "down", ">= 3.0"
|
367
|
+
gem "posix-spawn" # omit if on JRuby
|
368
|
+
gem "http_parser.rb"
|
331
369
|
```
|
370
|
+
```rb
|
371
|
+
require "down/wget"
|
332
372
|
|
333
|
-
|
373
|
+
tempfile = Down::Wget.download("http://nature.com/forest.jpg")
|
374
|
+
tempfile #=> #<Tempfile:/var/folders/k7/6zx6dx6x7ys3rv3srh0nyfj00000gn/T/20150925-55456-z7vxqz.jpg>
|
375
|
+
|
376
|
+
io = Down::Wget.open("http://nature.com/forest.jpg")
|
377
|
+
io #=> #<Down::ChunkedIO ...>
|
378
|
+
```
|
379
|
+
|
380
|
+
The Wget backend uses the `wget` command line utility for downloading. One
|
381
|
+
major advantage of `wget` is that it automatically resumes downloads that were
|
382
|
+
interrupted due to network failures, which is very useful when you're
|
383
|
+
downloading large files.
|
384
|
+
|
385
|
+
However, the Wget backend should still be considered experimental, as it wasn't
|
386
|
+
easy to implement a CLI wrapper that streams output, so it's possible that I've
|
387
|
+
made mistakes. Let me know how it's working out for you 😉.
|
388
|
+
|
389
|
+
#### Additional arguments
|
390
|
+
|
391
|
+
You can pass additional arguments to the underlying `wget` commmand via symbols:
|
334
392
|
|
335
393
|
```rb
|
336
|
-
Down::
|
337
|
-
|
338
|
-
end
|
394
|
+
Down::Wget.download("http://nature.com/forest.jpg", :no_proxy, connect_timeout: 3)
|
395
|
+
Down::Wget.open("http://nature.com/forest.jpg", user: "janko", password: "secret")
|
339
396
|
```
|
340
397
|
|
341
|
-
|
398
|
+
You can also initialize the backend with default arguments:
|
342
399
|
|
343
|
-
|
344
|
-
|
400
|
+
```rb
|
401
|
+
wget = Down::Wget.new(:no_proxy, connect_timeout: 3)
|
402
|
+
|
403
|
+
wget.download("http://nature.com/forest.jpg")
|
404
|
+
wget.open("http://nature.com/forest.jpg")
|
405
|
+
```
|
345
406
|
|
346
407
|
## Supported Ruby versions
|
347
408
|
|
data/down.gemspec
CHANGED
@@ -18,5 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.add_development_dependency "minitest", "~> 5.8"
|
19
19
|
spec.add_development_dependency "mocha"
|
20
20
|
spec.add_development_dependency "http", "~> 2.1"
|
21
|
+
spec.add_development_dependency "posix-spawn" unless RUBY_ENGINE == "jruby"
|
22
|
+
spec.add_development_dependency "http_parser.rb"
|
21
23
|
spec.add_development_dependency "docker-api"
|
22
24
|
end
|
data/lib/down.rb
CHANGED
@@ -1,4 +1,31 @@
|
|
1
1
|
# frozen-string-literal: true
|
2
2
|
|
3
3
|
require "down/version"
|
4
|
-
|
4
|
+
|
5
|
+
module Down
|
6
|
+
module_function
|
7
|
+
|
8
|
+
def download(*args, &block)
|
9
|
+
backend.download(*args, &block)
|
10
|
+
end
|
11
|
+
|
12
|
+
def open(*args, &block)
|
13
|
+
backend.open(*args, &block)
|
14
|
+
end
|
15
|
+
|
16
|
+
def backend(value = nil)
|
17
|
+
if value.is_a?(Symbol)
|
18
|
+
require "down/#{value}"
|
19
|
+
@backend = Down.const_get(value.to_s.split("_").map(&:capitalize).join)
|
20
|
+
elsif value
|
21
|
+
@backend = value
|
22
|
+
else
|
23
|
+
backend :net_http if @backend.nil?
|
24
|
+
@backend
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def backend=(value)
|
29
|
+
@backend = value
|
30
|
+
end
|
31
|
+
end
|
data/lib/down/backend.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen-string-literal: true
|
2
|
+
|
3
|
+
require "down/version"
|
4
|
+
require "down/chunked_io"
|
5
|
+
require "down/errors"
|
6
|
+
|
7
|
+
module Down
|
8
|
+
class Backend
|
9
|
+
def self.download(*args, &block)
|
10
|
+
new.download(*args, &block)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.open(*args, &block)
|
14
|
+
new.open(*args, &block)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/down/chunked_io.rb
CHANGED
@@ -29,23 +29,52 @@ module Down
|
|
29
29
|
def read(length = nil, outbuf = nil)
|
30
30
|
raise IOError, "closed stream" if closed?
|
31
31
|
|
32
|
-
|
32
|
+
remaining_length = length
|
33
|
+
|
34
|
+
begin
|
35
|
+
data = readpartial(remaining_length, outbuf)
|
36
|
+
data = data.dup unless outbuf
|
37
|
+
remaining_length = length - data.bytesize if length
|
38
|
+
rescue EOFError
|
39
|
+
end
|
40
|
+
|
41
|
+
until remaining_length == 0 || eof?
|
42
|
+
data << readpartial(remaining_length)
|
43
|
+
remaining_length = length - data.bytesize if length
|
44
|
+
end
|
45
|
+
|
46
|
+
data.to_s unless length && (data.nil? || data.empty?)
|
47
|
+
end
|
48
|
+
|
49
|
+
def readpartial(length = nil, outbuf = nil)
|
50
|
+
raise IOError, "closed stream" if closed?
|
51
|
+
|
52
|
+
data = outbuf.replace("").force_encoding(@encoding) if outbuf
|
33
53
|
|
34
54
|
if cache && !cache.eof?
|
35
|
-
cache.read(length, outbuf)
|
36
|
-
|
55
|
+
data = cache.read(length, outbuf)
|
56
|
+
data.force_encoding(@encoding)
|
37
57
|
end
|
38
58
|
|
39
|
-
|
40
|
-
|
59
|
+
if @buffer.nil? && (data.nil? || data.empty?)
|
60
|
+
raise EOFError, "end of file reached" if chunks_depleted?
|
61
|
+
@buffer = retrieve_chunk
|
62
|
+
end
|
63
|
+
|
64
|
+
remaining_length = data && length ? length - data.bytesize : length
|
41
65
|
|
42
|
-
|
43
|
-
|
66
|
+
unless @buffer.nil? || remaining_length == 0
|
67
|
+
buffered_data = if remaining_length && remaining_length < @buffer.bytesize
|
68
|
+
@buffer.byteslice(0, remaining_length)
|
44
69
|
else
|
45
70
|
@buffer
|
46
71
|
end
|
47
72
|
|
48
|
-
|
73
|
+
if data
|
74
|
+
data << buffered_data
|
75
|
+
else
|
76
|
+
data = buffered_data
|
77
|
+
end
|
49
78
|
|
50
79
|
cache.write(buffered_data) if cache
|
51
80
|
|
@@ -56,24 +85,7 @@ module Down
|
|
56
85
|
end
|
57
86
|
end
|
58
87
|
|
59
|
-
|
60
|
-
end
|
61
|
-
|
62
|
-
def readpartial(maxlen = nil, outbuf = nil)
|
63
|
-
raise IOError, "closed stream" if closed?
|
64
|
-
|
65
|
-
available_length = 0
|
66
|
-
available_length += cache.size - cache.pos if cache
|
67
|
-
available_length += @buffer.bytesize if @buffer
|
68
|
-
|
69
|
-
if available_length > 0
|
70
|
-
read([available_length, *maxlen].min, outbuf)
|
71
|
-
elsif !chunks_depleted?
|
72
|
-
read([@next_chunk.bytesize, *maxlen].min, outbuf)
|
73
|
-
else
|
74
|
-
outbuf.replace("").force_encoding(@encoding) if outbuf
|
75
|
-
raise EOFError, "end of file reached"
|
76
|
-
end
|
88
|
+
data
|
77
89
|
end
|
78
90
|
|
79
91
|
def eof?
|
data/lib/down/errors.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen-string-literal: true
|
2
|
+
|
1
3
|
module Down
|
2
4
|
# generic error which is a superclass to all other errors
|
3
5
|
class Error < StandardError; end
|
@@ -18,7 +20,7 @@ module Down
|
|
18
20
|
class ResponseError < NotFound
|
19
21
|
attr_reader :response
|
20
22
|
|
21
|
-
def initialize(message, response:)
|
23
|
+
def initialize(message, response: nil)
|
22
24
|
super(message)
|
23
25
|
@response = response
|
24
26
|
end
|
data/lib/down/http.rb
CHANGED
@@ -2,9 +2,7 @@
|
|
2
2
|
|
3
3
|
require "http"
|
4
4
|
|
5
|
-
require "down/
|
6
|
-
require "down/chunked_io"
|
7
|
-
require "down/errors"
|
5
|
+
require "down/backend"
|
8
6
|
|
9
7
|
require "tempfile"
|
10
8
|
require "cgi"
|
@@ -15,33 +13,31 @@ if Gem::Version.new(HTTP::VERSION) < Gem::Version.new("2.1.0")
|
|
15
13
|
end
|
16
14
|
|
17
15
|
module Down
|
18
|
-
|
16
|
+
class Http < Backend
|
17
|
+
def initialize(client_or_options = nil)
|
18
|
+
options = client_or_options
|
19
|
+
options = client_or_options.default_options if client_or_options.is_a?(HTTP::Client)
|
19
20
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
def open(url, **options, &block)
|
25
|
-
Http.open(url, **options, &block)
|
26
|
-
end
|
27
|
-
|
28
|
-
module Http
|
29
|
-
module_function
|
30
|
-
|
31
|
-
def download(url, **options, &block)
|
32
|
-
max_size = options.delete(:max_size)
|
21
|
+
@client = HTTP.headers("User-Agent" => "Down/#{VERSION}").follow(max_hops: 2)
|
22
|
+
@client = HTTP::Client.new(@client.default_options.merge(options)) if options
|
23
|
+
end
|
33
24
|
|
25
|
+
def download(url, max_size: nil, progress_proc: nil, content_length_proc: nil, **options, &block)
|
34
26
|
io = open(url, **options, rewindable: false, &block)
|
35
27
|
|
28
|
+
content_length_proc.call(io.size) if content_length_proc && io.size
|
29
|
+
|
36
30
|
if max_size && io.size && io.size > max_size
|
37
31
|
raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)"
|
38
32
|
end
|
39
33
|
|
40
34
|
extname = File.extname(io.data[:response].uri.path)
|
41
|
-
tempfile = Tempfile.new(["down", extname], binmode: true)
|
35
|
+
tempfile = Tempfile.new(["down-http", extname], binmode: true)
|
42
36
|
|
43
|
-
io.
|
44
|
-
tempfile.write(
|
37
|
+
until io.eof?
|
38
|
+
tempfile.write(io.readpartial)
|
39
|
+
|
40
|
+
progress_proc.call(tempfile.size) if progress_proc
|
45
41
|
|
46
42
|
if max_size && tempfile.size > max_size
|
47
43
|
raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)"
|
@@ -75,11 +71,13 @@ module Down
|
|
75
71
|
size: response.content_length,
|
76
72
|
encoding: response.content_type.charset,
|
77
73
|
rewindable: rewindable,
|
78
|
-
on_close: (-> { response.connection.close } unless client.persistent?),
|
74
|
+
on_close: (-> { response.connection.close } unless @client.persistent?),
|
79
75
|
data: { status: response.code, headers: response.headers.to_h, response: response },
|
80
76
|
)
|
81
77
|
end
|
82
78
|
|
79
|
+
private
|
80
|
+
|
83
81
|
def get(url, **options, &block)
|
84
82
|
uri = HTTP::URI.parse(url)
|
85
83
|
|
@@ -90,19 +88,11 @@ module Down
|
|
90
88
|
uri.user = uri.password = nil
|
91
89
|
end
|
92
90
|
|
93
|
-
client =
|
91
|
+
client = @client
|
94
92
|
client = block.call(client) if block
|
95
93
|
client.get(url, options)
|
96
94
|
end
|
97
95
|
|
98
|
-
def client
|
99
|
-
Thread.current[:down_client] ||= ::HTTP.headers("User-Agent" => "Down/#{VERSION}").follow(max_hops: 2)
|
100
|
-
end
|
101
|
-
|
102
|
-
def client=(value)
|
103
|
-
Thread.current[:down_client] = value
|
104
|
-
end
|
105
|
-
|
106
96
|
def response_error!(response)
|
107
97
|
args = [response.status.to_s, response: response]
|
108
98
|
|
data/lib/down/net_http.rb
CHANGED
@@ -1,33 +1,23 @@
|
|
1
|
+
# frozen-string-literal: true
|
2
|
+
|
1
3
|
require "open-uri"
|
2
4
|
require "net/http"
|
3
5
|
|
4
|
-
require "down/
|
5
|
-
require "down/chunked_io"
|
6
|
-
require "down/errors"
|
6
|
+
require "down/backend"
|
7
7
|
|
8
8
|
require "tempfile"
|
9
9
|
require "fileutils"
|
10
10
|
require "cgi"
|
11
11
|
|
12
12
|
module Down
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
def open(uri, options = {})
|
20
|
-
NetHttp.open(uri, options)
|
21
|
-
end
|
22
|
-
|
23
|
-
def copy_to_tempfile(basename, io)
|
24
|
-
NetHttp.copy_to_tempfile(basename, io)
|
25
|
-
end
|
26
|
-
|
27
|
-
module NetHttp
|
28
|
-
module_function
|
13
|
+
class NetHttp < Backend
|
14
|
+
def initialize(options = {})
|
15
|
+
@options = options
|
16
|
+
end
|
29
17
|
|
30
18
|
def download(uri, options = {})
|
19
|
+
options = @options.merge(options)
|
20
|
+
|
31
21
|
max_size = options.delete(:max_size)
|
32
22
|
max_redirects = options.delete(:max_redirects) || 2
|
33
23
|
progress_proc = options.delete(:progress_proc)
|
@@ -65,7 +55,7 @@ module Down
|
|
65
55
|
end
|
66
56
|
end
|
67
57
|
|
68
|
-
open_uri_options.
|
58
|
+
open_uri_options.merge!(options)
|
69
59
|
|
70
60
|
tries = max_redirects + 1
|
71
61
|
|
@@ -122,6 +112,8 @@ module Down
|
|
122
112
|
end
|
123
113
|
|
124
114
|
def open(uri, options = {})
|
115
|
+
options = @options.merge(options)
|
116
|
+
|
125
117
|
begin
|
126
118
|
uri = URI(uri)
|
127
119
|
if uri.class != URI::HTTP && uri.class != URI::HTTPS
|
@@ -197,8 +189,10 @@ module Down
|
|
197
189
|
)
|
198
190
|
end
|
199
191
|
|
192
|
+
private
|
193
|
+
|
200
194
|
def copy_to_tempfile(basename, io)
|
201
|
-
tempfile = Tempfile.new(["down", File.extname(basename)], binmode: true)
|
195
|
+
tempfile = Tempfile.new(["down-net_http", File.extname(basename)], binmode: true)
|
202
196
|
if io.is_a?(OpenURI::Meta) && io.is_a?(Tempfile)
|
203
197
|
io.close
|
204
198
|
tempfile.close
|
data/lib/down/version.rb
CHANGED
data/lib/down/wget.rb
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
# frozen-string-literal: true
|
2
|
+
|
3
|
+
if RUBY_ENGINE == "jruby"
|
4
|
+
require "open3"
|
5
|
+
else
|
6
|
+
require "posix-spawn"
|
7
|
+
end
|
8
|
+
require "http_parser"
|
9
|
+
|
10
|
+
require "down/backend"
|
11
|
+
|
12
|
+
require "tempfile"
|
13
|
+
require "uri"
|
14
|
+
require "cgi"
|
15
|
+
|
16
|
+
module Down
|
17
|
+
class Wget < Backend
|
18
|
+
def initialize(*arguments)
|
19
|
+
@arguments = [max_redirect: 2, user_agent: "Down/#{VERSION}"] + arguments
|
20
|
+
end
|
21
|
+
|
22
|
+
def download(url, *args, max_size: nil, content_length_proc: nil, progress_proc: nil, **options)
|
23
|
+
io = open(url, **options, rewindable: false)
|
24
|
+
|
25
|
+
content_length_proc.call(io.size) if content_length_proc && io.size
|
26
|
+
|
27
|
+
if max_size && io.size && io.size > max_size
|
28
|
+
raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)"
|
29
|
+
end
|
30
|
+
|
31
|
+
extname = File.extname(URI(url).path)
|
32
|
+
tempfile = Tempfile.new(["down-wget", extname], binmode: true)
|
33
|
+
|
34
|
+
until io.eof?
|
35
|
+
tempfile.write(io.readpartial)
|
36
|
+
|
37
|
+
progress_proc.call(tempfile.size) if progress_proc
|
38
|
+
|
39
|
+
if max_size && tempfile.size > max_size
|
40
|
+
raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
tempfile.open # flush written content
|
45
|
+
|
46
|
+
tempfile.extend DownloadedFile
|
47
|
+
tempfile.url = url
|
48
|
+
tempfile.headers = io.data[:headers]
|
49
|
+
|
50
|
+
tempfile
|
51
|
+
rescue
|
52
|
+
tempfile.close! if tempfile
|
53
|
+
raise
|
54
|
+
ensure
|
55
|
+
io.close if io
|
56
|
+
end
|
57
|
+
|
58
|
+
def open(url, *args, rewindable: true, **options)
|
59
|
+
arguments = generate_command(url, *args, **options)
|
60
|
+
|
61
|
+
command = Command.execute(arguments)
|
62
|
+
output = Down::ChunkedIO.new(
|
63
|
+
chunks: command.enum_for(:output),
|
64
|
+
on_close: command.method(:terminate),
|
65
|
+
rewindable: false,
|
66
|
+
)
|
67
|
+
|
68
|
+
# https://github.com/tmm1/http_parser.rb/issues/29#issuecomment-309976363
|
69
|
+
header_string = output.readpartial
|
70
|
+
header_string << output.readpartial until header_string.include?("\r\n\r\n")
|
71
|
+
header_string, first_chunk = header_string.split("\r\n\r\n", 2)
|
72
|
+
|
73
|
+
parser = HTTP::Parser.new
|
74
|
+
parser << header_string
|
75
|
+
|
76
|
+
if parser.headers.nil?
|
77
|
+
output.close
|
78
|
+
raise Down::Error, "failed to parse response headers"
|
79
|
+
end
|
80
|
+
|
81
|
+
headers = parser.headers
|
82
|
+
status = parser.status_code
|
83
|
+
|
84
|
+
content_length = headers["Content-Length"].to_i if headers["Content-Length"]
|
85
|
+
charset = headers["Content-Type"][/;\s*charset=([^;]+)/i, 1] if headers["Content-Type"]
|
86
|
+
|
87
|
+
chunks = Enumerator.new do |yielder|
|
88
|
+
yielder << first_chunk if first_chunk
|
89
|
+
yielder << output.readpartial until output.eof?
|
90
|
+
end
|
91
|
+
|
92
|
+
Down::ChunkedIO.new(
|
93
|
+
chunks: chunks,
|
94
|
+
size: content_length,
|
95
|
+
encoding: charset,
|
96
|
+
rewindable: rewindable,
|
97
|
+
on_close: output.method(:close),
|
98
|
+
data: { status: status, headers: headers },
|
99
|
+
)
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
def generate_command(url, *args, **options)
|
105
|
+
command = %W[wget --no-verbose --save-headers -O -]
|
106
|
+
|
107
|
+
options = @arguments.grep(Hash).inject({}, :merge).merge(options)
|
108
|
+
args = @arguments.grep(Symbol) + args
|
109
|
+
|
110
|
+
(args + options.to_a).each do |option, value|
|
111
|
+
if option.length == 1
|
112
|
+
command << "-#{option}"
|
113
|
+
else
|
114
|
+
command << "--#{option.to_s.gsub("_", "-")}"
|
115
|
+
end
|
116
|
+
|
117
|
+
command << value.to_s unless value.nil?
|
118
|
+
end
|
119
|
+
|
120
|
+
command << url
|
121
|
+
command
|
122
|
+
end
|
123
|
+
|
124
|
+
class Command
|
125
|
+
PIPE_BUFFER_SIZE = 64*1024
|
126
|
+
|
127
|
+
def self.execute(arguments)
|
128
|
+
if RUBY_ENGINE == "jruby"
|
129
|
+
stdin_pipe, stdout_pipe, stderr_pipe, status_reaper = Open3.popen3(*arguments)
|
130
|
+
else
|
131
|
+
pid, stdin_pipe, stdout_pipe, stderr_pipe = POSIX::Spawn.popen4(*arguments)
|
132
|
+
status_reaper = Process.detach(pid)
|
133
|
+
end
|
134
|
+
|
135
|
+
stdin_pipe.close
|
136
|
+
[stdout_pipe, stderr_pipe].each(&:binmode)
|
137
|
+
|
138
|
+
new(stdout_pipe, stderr_pipe, status_reaper)
|
139
|
+
rescue Errno::ENOENT
|
140
|
+
raise Down::Error, "wget is not installed"
|
141
|
+
end
|
142
|
+
|
143
|
+
def initialize(stdout_pipe, stderr_pipe, status_reaper)
|
144
|
+
@status_reaper = status_reaper
|
145
|
+
@stdout_pipe = stdout_pipe
|
146
|
+
@stderr_pipe = stderr_pipe
|
147
|
+
end
|
148
|
+
|
149
|
+
def output
|
150
|
+
# Keep emptying the stderr buffer, to allow the subprocess to send more
|
151
|
+
# than 64KB if it wants to.
|
152
|
+
stderr_reader = Thread.new { @stderr_pipe.read }
|
153
|
+
|
154
|
+
yield @stdout_pipe.readpartial(PIPE_BUFFER_SIZE) until @stdout_pipe.eof?
|
155
|
+
|
156
|
+
status = @status_reaper.value
|
157
|
+
stderr = stderr_reader.value
|
158
|
+
close
|
159
|
+
|
160
|
+
case status.exitstatus
|
161
|
+
when 0 # No problems occurred
|
162
|
+
# success
|
163
|
+
when 1, # Generic error code
|
164
|
+
2, # Parse error---for instance, when parsing command-line options, the .wgetrc or .netrc...
|
165
|
+
3 # File I/O error
|
166
|
+
raise Down::Error, stderr
|
167
|
+
when 4 # Network failure
|
168
|
+
raise Down::TimeoutError, stderr if stderr.include?("timed out")
|
169
|
+
raise Down::ConnectionError, stderr
|
170
|
+
when 5 # SSL verification failure
|
171
|
+
raise Down::SSLError, stderr
|
172
|
+
when 6 # Username/password authentication failure
|
173
|
+
raise Down::ClientError, stderr
|
174
|
+
when 7 # Protocol errors
|
175
|
+
raise Down::Error, stderr
|
176
|
+
when 8 # Server issued an error response
|
177
|
+
raise Down::TooManyRedirects, stderr if stderr.include?("redirections exceeded")
|
178
|
+
raise Down::ResponseError, stderr
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def terminate
|
183
|
+
begin
|
184
|
+
Process.kill("TERM", @status_reaper[:pid])
|
185
|
+
rescue Errno::ESRCH
|
186
|
+
# process has already terminated
|
187
|
+
end
|
188
|
+
|
189
|
+
close
|
190
|
+
end
|
191
|
+
|
192
|
+
def close
|
193
|
+
@stdout_pipe.close unless @stdout_pipe.closed?
|
194
|
+
@stderr_pipe.close unless @stderr_pipe.closed?
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
module DownloadedFile
|
199
|
+
attr_accessor :url, :headers
|
200
|
+
|
201
|
+
def original_filename
|
202
|
+
filename_from_content_disposition || filename_from_url
|
203
|
+
end
|
204
|
+
|
205
|
+
def content_type
|
206
|
+
headers["Content-Type"].to_s.split(";").first
|
207
|
+
end
|
208
|
+
|
209
|
+
def charset
|
210
|
+
headers["Content-Type"].to_s[/;\s*charset=([^;]+)/i, 1]
|
211
|
+
end
|
212
|
+
|
213
|
+
private
|
214
|
+
|
215
|
+
def filename_from_content_disposition
|
216
|
+
content_disposition = headers["Content-Disposition"].to_s
|
217
|
+
filename = content_disposition[/filename="([^"]*)"/, 1] || content_disposition[/filename=(.+)/, 1]
|
218
|
+
filename = CGI.unescape(filename.to_s.strip)
|
219
|
+
filename unless filename.empty?
|
220
|
+
end
|
221
|
+
|
222
|
+
def filename_from_url
|
223
|
+
path = URI(url).path
|
224
|
+
filename = path.split("/").last
|
225
|
+
CGI.unescape(filename) if filename
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: down
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janko Marohnić
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: minitest
|
@@ -52,6 +52,34 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.1'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: posix-spawn
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: http_parser.rb
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: docker-api
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -77,11 +105,13 @@ files:
|
|
77
105
|
- README.md
|
78
106
|
- down.gemspec
|
79
107
|
- lib/down.rb
|
108
|
+
- lib/down/backend.rb
|
80
109
|
- lib/down/chunked_io.rb
|
81
110
|
- lib/down/errors.rb
|
82
111
|
- lib/down/http.rb
|
83
112
|
- lib/down/net_http.rb
|
84
113
|
- lib/down/version.rb
|
114
|
+
- lib/down/wget.rb
|
85
115
|
homepage: https://github.com/janko-m/down
|
86
116
|
licenses:
|
87
117
|
- MIT
|