down 4.8.1 → 5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +76 -0
- data/README.md +113 -38
- data/down.gemspec +11 -5
- data/lib/down/backend.rb +11 -4
- data/lib/down/chunked_io.rb +75 -42
- data/lib/down/errors.rb +12 -9
- data/lib/down/http.rb +14 -14
- data/lib/down/httpx.rb +175 -0
- data/lib/down/net_http.rb +79 -38
- data/lib/down/version.rb +1 -1
- data/lib/down/wget.rb +5 -5
- data/lib/down.rb +4 -4
- metadata +34 -13
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e0544a70de3afab2a00c68df6923572a5533f05a0964b8c5186728019d04e55b
|
|
4
|
+
data.tar.gz: 3474da6a6c7a182aa02deb72139002ebf97ac98bee9c4a833de2b0413e373924
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 64d22c0a25ddf60f2dea37cb03264ec5770a8c7877fb7a843052abca4b043ba9d2d0c8eed830fc2cec97bb852338b114e2907c357aaf4b68aad26a6860459f67
|
|
7
|
+
data.tar.gz: 992209a7e4201cab464958bac0960ba4a6dc0344cd410b7ceaec2a02b89ebc1bd876396349204bb0bfe175a279fbdb41900c9fee85eee7e98d74eec7afa2e7a0
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,79 @@
|
|
|
1
|
+
## 5.4.0 (2022-12-26)
|
|
2
|
+
|
|
3
|
+
* Add new HTTPX backend, which supports HTTP/2 protocol among other features (@HoneyryderChuck)
|
|
4
|
+
|
|
5
|
+
## 5.3.1 (2022-03-25)
|
|
6
|
+
|
|
7
|
+
* Correctly split cookie headers on `;` instead of `,` when forwarding them on redirects (@ermolaev)
|
|
8
|
+
|
|
9
|
+
## 5.3.0 (2022-02-20)
|
|
10
|
+
|
|
11
|
+
* Add `:extension` argument to `Down.download` for overriding tempfile extension (@razum2um)
|
|
12
|
+
|
|
13
|
+
* Normalize response header names for http.rb and wget backends (@zarqman)
|
|
14
|
+
|
|
15
|
+
## 5.2.4 (2021-09-12)
|
|
16
|
+
|
|
17
|
+
* Keep original cookies between redirections (@antprt)
|
|
18
|
+
|
|
19
|
+
## 5.2.3 (2021-08-03)
|
|
20
|
+
|
|
21
|
+
* Bump addressable version requirement to 2.8+ to remediate vulnerability (@aldodelgado)
|
|
22
|
+
|
|
23
|
+
## 5.2.2 (2021-05-27)
|
|
24
|
+
|
|
25
|
+
* Add info about received content length in `Down::TooLarge` error (@evheny0)
|
|
26
|
+
|
|
27
|
+
* Relax http.rb constraint to allow versions 5.x (@mgrunberg)
|
|
28
|
+
|
|
29
|
+
## 5.2.1 (2021-04-26)
|
|
30
|
+
|
|
31
|
+
* Raise `Down::NotModified` on 304 response status in `Down::NetHttp#open` (@ellafeldmann)
|
|
32
|
+
|
|
33
|
+
## 5.2.0 (2020-09-20)
|
|
34
|
+
|
|
35
|
+
* Add `:uri_normalizer` option to `Down::NetHttp` (@janko)
|
|
36
|
+
|
|
37
|
+
* Add `:http_basic_authentication` option to `Down::NetHttp#open` (@janko)
|
|
38
|
+
|
|
39
|
+
* Fix uninitialized instance variables warnings in `Down::ChunkedIO` (@janko)
|
|
40
|
+
|
|
41
|
+
* Handle unknown HTTP error codes in `Down::NetHttp` (@darndt)
|
|
42
|
+
|
|
43
|
+
## 5.1.1 (2020-02-04)
|
|
44
|
+
|
|
45
|
+
* Fix keyword arguments warnings on Ruby 2.7 in `Down.download` and `Down.open` (@janko)
|
|
46
|
+
|
|
47
|
+
## 5.1.0 (2020-01-09)
|
|
48
|
+
|
|
49
|
+
* Fix keyword arguments warnings on Ruby 2.7 (@janko)
|
|
50
|
+
|
|
51
|
+
* Fix `FrozenError` exception in `Down::ChunkedIO#readpartial` (@janko)
|
|
52
|
+
|
|
53
|
+
* Deprecate passing headers as top-level options in `Down::NetHttp` (@janko)
|
|
54
|
+
|
|
55
|
+
## 5.0.1 (2019-12-20)
|
|
56
|
+
|
|
57
|
+
* In `Down::NetHttp` only use Addressable normalization if `URI.parse` fails (@coding-chimp)
|
|
58
|
+
|
|
59
|
+
## 5.0.0 (2019-09-26)
|
|
60
|
+
|
|
61
|
+
* Change `ChunkedIO#each_chunk` to return chunks in original encoding (@janko)
|
|
62
|
+
|
|
63
|
+
* Always return binary strings in `ChunkedIO#readpartial` (@janko)
|
|
64
|
+
|
|
65
|
+
* Handle frozen chunks in `Down::ChunkedIO` (@janko)
|
|
66
|
+
|
|
67
|
+
* Change `ChunkedIO#gets` to return lines in specified encoding (@janko)
|
|
68
|
+
|
|
69
|
+
* Halve memory allocation for `ChunkedIO#gets` (@janko)
|
|
70
|
+
|
|
71
|
+
* Halve memory allocation for `ChunkedIO#read` without arguments (@janko)
|
|
72
|
+
|
|
73
|
+
* Drop support for `HTTP::Client` argument in `Down::HTTP.new` (@janko)
|
|
74
|
+
|
|
75
|
+
* Repurpose `Down::NotFound` to be raised on `404 Not Found` response (@janko)
|
|
76
|
+
|
|
1
77
|
## 4.8.1 (2019-05-01)
|
|
2
78
|
|
|
3
79
|
* Make `ChunkedIO#read`/`#readpartial` with length always return strings in binary encoding (@janko)
|
data/README.md
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# Down
|
|
2
2
|
|
|
3
3
|
Down is a utility tool for streaming, flexible and safe downloading of remote
|
|
4
|
-
files. It can use [open-uri] + `Net::HTTP`, [
|
|
5
|
-
HTTP library.
|
|
4
|
+
files. It can use [open-uri] + `Net::HTTP`, [http.rb], [HTTPX], or `wget` as
|
|
5
|
+
the backend HTTP library.
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
9
9
|
```rb
|
|
10
|
-
gem "down", "~>
|
|
10
|
+
gem "down", "~> 5.0"
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
## Downloading
|
|
@@ -57,6 +57,17 @@ specific location on disk, you can specify the `:destination` option:
|
|
|
57
57
|
|
|
58
58
|
```rb
|
|
59
59
|
Down.download("http://example.com/image.jpg", destination: "/path/to/destination")
|
|
60
|
+
#=> nil
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
In this case `Down.download` won't have any return value, so if you need a File
|
|
64
|
+
object you'll have to create it manually.
|
|
65
|
+
|
|
66
|
+
You can also keep the tempfile, but override the extension:
|
|
67
|
+
|
|
68
|
+
```rb
|
|
69
|
+
tempfile = Down.download("http://example.com/some/file", extension: "txt")
|
|
70
|
+
File.extname(tempfile.path) #=> ".txt"
|
|
60
71
|
```
|
|
61
72
|
|
|
62
73
|
### Basic authentication
|
|
@@ -103,6 +114,16 @@ remote_file.eof? #=> true
|
|
|
103
114
|
remote_file.close # closes the HTTP connection and deletes the internal Tempfile
|
|
104
115
|
```
|
|
105
116
|
|
|
117
|
+
The following IO methods are implemented:
|
|
118
|
+
|
|
119
|
+
* `#read` & `#readpartial`
|
|
120
|
+
* `#gets`
|
|
121
|
+
* `#seek`
|
|
122
|
+
* `#pos` & `#tell`
|
|
123
|
+
* `#eof?`
|
|
124
|
+
* `#rewind`
|
|
125
|
+
* `#close`
|
|
126
|
+
|
|
106
127
|
### Caching
|
|
107
128
|
|
|
108
129
|
By default the downloaded content is internally cached into a `Tempfile`, so
|
|
@@ -143,14 +164,14 @@ You can access the response status and headers of the HTTP request that was made
|
|
|
143
164
|
```rb
|
|
144
165
|
remote_file = Down.open("http://example.com/image.jpg")
|
|
145
166
|
remote_file.data[:status] #=> 200
|
|
146
|
-
remote_file.data[:headers] #=> { ... }
|
|
167
|
+
remote_file.data[:headers] #=> { "Content-Type" => "image/jpeg", ... } (header names are normalized)
|
|
147
168
|
remote_file.data[:response] # returns the response object
|
|
148
169
|
```
|
|
149
170
|
|
|
150
|
-
Note that `Down::
|
|
151
|
-
status was 4xx or 5xx.
|
|
171
|
+
Note that a `Down::ResponseError` exception will automatically be raised if
|
|
172
|
+
response status was 4xx or 5xx.
|
|
152
173
|
|
|
153
|
-
###
|
|
174
|
+
### Down::ChunkedIO
|
|
154
175
|
|
|
155
176
|
The `Down.open` performs HTTP logic and returns an instance of
|
|
156
177
|
`Down::ChunkedIO`. However, `Down::ChunkedIO` is a generic class that can wrap
|
|
@@ -196,21 +217,25 @@ the `Down::Error` subclasses. This is Down's exception hierarchy:
|
|
|
196
217
|
|
|
197
218
|
* `Down::Error`
|
|
198
219
|
* `Down::TooLarge`
|
|
199
|
-
* `Down::
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
* `Down::
|
|
205
|
-
* `Down::
|
|
206
|
-
|
|
207
|
-
|
|
220
|
+
* `Down::InvalidUrl`
|
|
221
|
+
* `Down::TooManyRedirects`
|
|
222
|
+
* `Down::NotModified`
|
|
223
|
+
* `Down::ResponseError`
|
|
224
|
+
* `Down::ClientError`
|
|
225
|
+
* `Down::NotFound`
|
|
226
|
+
* `Down::ServerError`
|
|
227
|
+
* `Down::ConnectionError`
|
|
228
|
+
* `Down::TimeoutError`
|
|
229
|
+
* `Down::SSLError`
|
|
208
230
|
|
|
209
231
|
## Backends
|
|
210
232
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
233
|
+
The following backends are available:
|
|
234
|
+
|
|
235
|
+
* [Down::NetHttp](#downnethttp) (default)
|
|
236
|
+
* [Down::Http](#downhttp)
|
|
237
|
+
* [Down::Httpx](#downhttpx)
|
|
238
|
+
* [Down::Wget](#downwget)
|
|
214
239
|
|
|
215
240
|
You can use the backend directly:
|
|
216
241
|
|
|
@@ -232,10 +257,13 @@ Down.download("...")
|
|
|
232
257
|
Down.open("...")
|
|
233
258
|
```
|
|
234
259
|
|
|
235
|
-
###
|
|
260
|
+
### Down::NetHttp
|
|
261
|
+
|
|
262
|
+
The `Down::NetHttp` backend implements downloads using [open-uri] and
|
|
263
|
+
[Net::HTTP] standard libraries.
|
|
236
264
|
|
|
237
265
|
```rb
|
|
238
|
-
gem "down", "~>
|
|
266
|
+
gem "down", "~> 5.0"
|
|
239
267
|
```
|
|
240
268
|
```rb
|
|
241
269
|
require "down/net_http"
|
|
@@ -314,6 +342,18 @@ Down::NetHttp.open("http://example.com/image.jpg",
|
|
|
314
342
|
ssl_verify_mode: OpenSSL::SSL::VERIFY_PEER)
|
|
315
343
|
```
|
|
316
344
|
|
|
345
|
+
#### URI normalization
|
|
346
|
+
|
|
347
|
+
If the URL isn't parseable by `URI.parse`, `Down::NetHttp` will
|
|
348
|
+
attempt to normalize the URL using [Addressable::URI], URI-escaping
|
|
349
|
+
any potentially unescaped characters. You can change the normalizer
|
|
350
|
+
via the `:uri_normalizer` option:
|
|
351
|
+
|
|
352
|
+
```rb
|
|
353
|
+
# this skips URL normalization
|
|
354
|
+
Down::NetHttp.download("http://example.com/image.jpg", uri_normalizer: -> (url) { url })
|
|
355
|
+
```
|
|
356
|
+
|
|
317
357
|
#### Additional options
|
|
318
358
|
|
|
319
359
|
Any additional options passed to `Down.download` will be forwarded to
|
|
@@ -334,11 +374,13 @@ net_http.download("http://example.com/image.jpg")
|
|
|
334
374
|
net_http.open("http://example.com/image.jpg")
|
|
335
375
|
```
|
|
336
376
|
|
|
337
|
-
###
|
|
377
|
+
### Down::Http
|
|
378
|
+
|
|
379
|
+
The `Down::Http` backend implements downloads using the [http.rb] gem.
|
|
338
380
|
|
|
339
381
|
```rb
|
|
340
|
-
gem "down", "~>
|
|
341
|
-
gem "http", "~>
|
|
382
|
+
gem "down", "~> 5.0"
|
|
383
|
+
gem "http", "~> 5.0"
|
|
342
384
|
```
|
|
343
385
|
```rb
|
|
344
386
|
require "down/http"
|
|
@@ -350,7 +392,7 @@ io = Down::Http.open("http://nature.com/forest.jpg")
|
|
|
350
392
|
io #=> #<Down::ChunkedIO ...>
|
|
351
393
|
```
|
|
352
394
|
|
|
353
|
-
Some features that give the
|
|
395
|
+
Some features that give the http.rb backend an advantage over `open-uri` and
|
|
354
396
|
`Net::HTTP` include:
|
|
355
397
|
|
|
356
398
|
* Low memory usage (**10x less** than `open-uri`/`Net::HTTP`)
|
|
@@ -401,10 +443,35 @@ down = Down::Http.new(method: :post)
|
|
|
401
443
|
down.download("http://example.org/image.jpg")
|
|
402
444
|
```
|
|
403
445
|
|
|
404
|
-
###
|
|
446
|
+
### Down::Httpx
|
|
447
|
+
|
|
448
|
+
The `Down::Httpx` backend implements downloads using the [HTTPX] gem, which
|
|
449
|
+
supports the HTTP/2 protocol, in addition to many other features.
|
|
450
|
+
|
|
451
|
+
```rb
|
|
452
|
+
gem "down", "~> 5.0"
|
|
453
|
+
gem "httpx", "~> 0.22"
|
|
454
|
+
```
|
|
455
|
+
```rb
|
|
456
|
+
require "down/httpx"
|
|
457
|
+
|
|
458
|
+
tempfile = Down::Httpx.download("http://nature.com/forest.jpg")
|
|
459
|
+
tempfile #=> #<Tempfile:/var/folders/k7/6zx6dx6x7ys3rv3srh0nyfj00000gn/T/20150925-55456-z7vxqz.jpg>
|
|
460
|
+
|
|
461
|
+
io = Down::Httpx.open("http://nature.com/forest.jpg")
|
|
462
|
+
io #=> #<Down::ChunkedIO ...>
|
|
463
|
+
```
|
|
464
|
+
|
|
465
|
+
It's implemented in much of the same way as `Down::Http`, so be sure to check
|
|
466
|
+
its docs for ways to pass additional options.
|
|
467
|
+
|
|
468
|
+
### Down::Wget (experimental)
|
|
469
|
+
|
|
470
|
+
The `Down::Wget` backend implements downloads using the `wget` command line
|
|
471
|
+
utility.
|
|
405
472
|
|
|
406
473
|
```rb
|
|
407
|
-
gem "down", "~>
|
|
474
|
+
gem "down", "~> 5.0"
|
|
408
475
|
gem "posix-spawn" # omit if on JRuby
|
|
409
476
|
gem "http_parser.rb"
|
|
410
477
|
```
|
|
@@ -418,9 +485,8 @@ io = Down::Wget.open("http://nature.com/forest.jpg")
|
|
|
418
485
|
io #=> #<Down::ChunkedIO ...>
|
|
419
486
|
```
|
|
420
487
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
interrupted due to network failures, which is very useful when you're
|
|
488
|
+
One major advantage of `wget` is that it automatically resumes downloads that
|
|
489
|
+
were interrupted due to network failures, which is very useful when you're
|
|
424
490
|
downloading large files.
|
|
425
491
|
|
|
426
492
|
However, the Wget backend should still be considered experimental, as it wasn't
|
|
@@ -447,28 +513,37 @@ wget.open("http://nature.com/forest.jpg")
|
|
|
447
513
|
|
|
448
514
|
## Supported Ruby versions
|
|
449
515
|
|
|
450
|
-
* MRI 2.2
|
|
451
516
|
* MRI 2.3
|
|
452
517
|
* MRI 2.4
|
|
453
|
-
*
|
|
518
|
+
* MRI 2.5
|
|
519
|
+
* MRI 2.6
|
|
520
|
+
* MRI 2.7
|
|
521
|
+
* MRI 3.0
|
|
522
|
+
* MRI 3.1
|
|
523
|
+
* JRuby 9.3
|
|
454
524
|
|
|
455
525
|
## Development
|
|
456
526
|
|
|
457
|
-
|
|
527
|
+
Tests require that a [httpbin] server is running locally, which you can do via Docker:
|
|
528
|
+
|
|
529
|
+
```sh
|
|
530
|
+
$ docker pull kennethreitz/httpbin
|
|
531
|
+
$ docker run -p 80:80 kennethreitz/httpbin
|
|
532
|
+
```
|
|
533
|
+
|
|
534
|
+
Then you can run tests:
|
|
458
535
|
|
|
459
536
|
```
|
|
460
537
|
$ bundle exec rake test
|
|
461
538
|
```
|
|
462
539
|
|
|
463
|
-
The test suite pulls and runs [kennethreitz/httpbin] as a Docker container, so
|
|
464
|
-
you'll need to have Docker installed and running.
|
|
465
|
-
|
|
466
540
|
## License
|
|
467
541
|
|
|
468
542
|
[MIT](LICENSE.txt)
|
|
469
543
|
|
|
470
544
|
[open-uri]: http://ruby-doc.org/stdlib-2.3.0/libdoc/open-uri/rdoc/OpenURI.html
|
|
471
545
|
[Net::HTTP]: https://ruby-doc.org/stdlib-2.4.1/libdoc/net/http/rdoc/Net/HTTP.html
|
|
472
|
-
[
|
|
546
|
+
[http.rb]: https://github.com/httprb/http
|
|
547
|
+
[HTTPX]: https://github.com/HoneyryderChuck/httpx
|
|
473
548
|
[Addressable::URI]: https://github.com/sporkmonger/addressable
|
|
474
|
-
[
|
|
549
|
+
[httpbin]: https://github.com/postmanlabs/httpbin
|
data/down.gemspec
CHANGED
|
@@ -4,7 +4,7 @@ Gem::Specification.new do |spec|
|
|
|
4
4
|
spec.name = "down"
|
|
5
5
|
spec.version = Down::VERSION
|
|
6
6
|
|
|
7
|
-
spec.required_ruby_version = ">= 2.
|
|
7
|
+
spec.required_ruby_version = ">= 2.3"
|
|
8
8
|
|
|
9
9
|
spec.summary = "Robust streaming downloads using Net::HTTP, HTTP.rb or wget."
|
|
10
10
|
spec.homepage = "https://github.com/janko/down"
|
|
@@ -15,13 +15,19 @@ Gem::Specification.new do |spec|
|
|
|
15
15
|
spec.files = Dir["README.md", "LICENSE.txt", "CHANGELOG.md", "*.gemspec", "lib/**/*.rb"]
|
|
16
16
|
spec.require_path = "lib"
|
|
17
17
|
|
|
18
|
-
spec.add_dependency "addressable", "~> 2.
|
|
18
|
+
spec.add_dependency "addressable", "~> 2.8"
|
|
19
19
|
|
|
20
20
|
spec.add_development_dependency "minitest", "~> 5.8"
|
|
21
21
|
spec.add_development_dependency "mocha", "~> 1.5"
|
|
22
22
|
spec.add_development_dependency "rake"
|
|
23
|
-
spec.add_development_dependency "
|
|
23
|
+
spec.add_development_dependency "httpx", "~> 0.22", ">= 0.22.2"
|
|
24
|
+
# http 5.0 drop support of ruby 2.3 and 2.4. We still support those versions.
|
|
25
|
+
if RUBY_VERSION >= "2.5"
|
|
26
|
+
spec.add_development_dependency "http", "~> 5.0"
|
|
27
|
+
else
|
|
28
|
+
spec.add_development_dependency "http", "~> 4.3"
|
|
29
|
+
end
|
|
24
30
|
spec.add_development_dependency "posix-spawn" unless RUBY_ENGINE == "jruby"
|
|
25
|
-
spec.add_development_dependency "http_parser.rb"
|
|
26
|
-
spec.add_development_dependency "
|
|
31
|
+
spec.add_development_dependency "http_parser.rb" unless RUBY_ENGINE == "jruby"
|
|
32
|
+
spec.add_development_dependency "warning" if RUBY_VERSION >= "2.4"
|
|
27
33
|
end
|
data/lib/down/backend.rb
CHANGED
|
@@ -9,12 +9,12 @@ require "fileutils"
|
|
|
9
9
|
|
|
10
10
|
module Down
|
|
11
11
|
class Backend
|
|
12
|
-
def self.download(*args, &block)
|
|
13
|
-
new.download(*args, &block)
|
|
12
|
+
def self.download(*args, **options, &block)
|
|
13
|
+
new.download(*args, **options, &block)
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
def self.open(*args, &block)
|
|
17
|
-
new.open(*args, &block)
|
|
16
|
+
def self.open(*args, **options, &block)
|
|
17
|
+
new.open(*args, **options, &block)
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
private
|
|
@@ -29,5 +29,12 @@ module Down
|
|
|
29
29
|
|
|
30
30
|
nil
|
|
31
31
|
end
|
|
32
|
+
|
|
33
|
+
def normalize_headers(response_headers)
|
|
34
|
+
response_headers.inject({}) do |headers, (downcased_name, value)|
|
|
35
|
+
name = downcased_name.split("-").map(&:capitalize).join("-")
|
|
36
|
+
headers.merge!(name => value)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
32
39
|
end
|
|
33
40
|
end
|
data/lib/down/chunked_io.rb
CHANGED
|
@@ -36,6 +36,8 @@ module Down
|
|
|
36
36
|
@rewindable = rewindable
|
|
37
37
|
@buffer = nil
|
|
38
38
|
@position = 0
|
|
39
|
+
@next_chunk = nil
|
|
40
|
+
@closed = false
|
|
39
41
|
|
|
40
42
|
retrieve_chunk # fetch first chunk so that we know whether the file is empty
|
|
41
43
|
end
|
|
@@ -63,21 +65,20 @@ module Down
|
|
|
63
65
|
def read(length = nil, outbuf = nil)
|
|
64
66
|
fail IOError, "closed stream" if closed?
|
|
65
67
|
|
|
66
|
-
|
|
68
|
+
data = outbuf.clear.force_encoding(Encoding::BINARY) if outbuf
|
|
69
|
+
data ||= "".b
|
|
67
70
|
|
|
68
|
-
|
|
69
|
-
data = readpartial(remaining_length, outbuf)
|
|
70
|
-
data = data.dup unless outbuf
|
|
71
|
-
remaining_length = length - data.bytesize if length
|
|
72
|
-
rescue EOFError
|
|
73
|
-
end
|
|
71
|
+
remaining_length = length
|
|
74
72
|
|
|
75
73
|
until remaining_length == 0 || eof?
|
|
76
|
-
data << readpartial(remaining_length)
|
|
74
|
+
data << readpartial(remaining_length, buffer ||= String.new)
|
|
77
75
|
remaining_length = length - data.bytesize if length
|
|
78
76
|
end
|
|
79
77
|
|
|
80
|
-
|
|
78
|
+
buffer.clear if buffer # deallocate string
|
|
79
|
+
|
|
80
|
+
data.force_encoding(@encoding) unless length
|
|
81
|
+
data unless data.empty? && length && length > 0
|
|
81
82
|
end
|
|
82
83
|
|
|
83
84
|
# Implements IO#gets semantics. Without arguments it retrieves lines of
|
|
@@ -108,27 +109,33 @@ module Down
|
|
|
108
109
|
|
|
109
110
|
separator = "\n\n" if separator.empty?
|
|
110
111
|
|
|
111
|
-
|
|
112
|
-
data = readpartial(limit)
|
|
112
|
+
data = String.new
|
|
113
113
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
114
|
+
until data.include?(separator) || data.bytesize == limit || eof?
|
|
115
|
+
remaining_length = limit - data.bytesize if limit
|
|
116
|
+
data << readpartial(remaining_length, buffer ||= String.new)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
buffer.clear if buffer # deallocate buffer
|
|
120
|
+
|
|
121
|
+
line, extra = data.split(separator, 2)
|
|
122
|
+
line << separator if data.include?(separator)
|
|
118
123
|
|
|
119
|
-
|
|
120
|
-
line << separator if data.include?(separator)
|
|
124
|
+
data.clear # deallocate data
|
|
121
125
|
|
|
126
|
+
if extra
|
|
122
127
|
if cache
|
|
123
|
-
cache.pos -= extra.
|
|
128
|
+
cache.pos -= extra.bytesize
|
|
124
129
|
else
|
|
125
|
-
|
|
130
|
+
if @buffer
|
|
131
|
+
@buffer.prepend(extra)
|
|
132
|
+
else
|
|
133
|
+
@buffer = extra
|
|
134
|
+
end
|
|
126
135
|
end
|
|
127
|
-
rescue EOFError
|
|
128
|
-
line = nil
|
|
129
136
|
end
|
|
130
137
|
|
|
131
|
-
line
|
|
138
|
+
line.force_encoding(@encoding) if line
|
|
132
139
|
end
|
|
133
140
|
|
|
134
141
|
# Implements IO#readpartial semantics. If there is any content readily
|
|
@@ -139,33 +146,33 @@ module Down
|
|
|
139
146
|
# or the next chunk. This is useful when you don't care about the size of
|
|
140
147
|
# chunks and you want to minimize string allocations.
|
|
141
148
|
#
|
|
142
|
-
# With `
|
|
149
|
+
# With `maxlen` argument returns maximum of that amount of bytes (default
|
|
150
|
+
# is 16KB).
|
|
143
151
|
#
|
|
144
152
|
# With `outbuf` argument each call will return that same string object,
|
|
145
153
|
# where the value is replaced with retrieved content.
|
|
146
154
|
#
|
|
147
155
|
# Raises EOFError if end of file is reached. Raises IOError if closed.
|
|
148
|
-
def readpartial(
|
|
156
|
+
def readpartial(maxlen = nil, outbuf = nil)
|
|
149
157
|
fail IOError, "closed stream" if closed?
|
|
150
158
|
|
|
151
|
-
|
|
159
|
+
maxlen ||= 16*1024
|
|
152
160
|
|
|
153
|
-
|
|
161
|
+
data = cache.read(maxlen, outbuf) if cache && !cache.eof?
|
|
162
|
+
data ||= outbuf.clear.force_encoding(Encoding::BINARY) if outbuf
|
|
163
|
+
data ||= "".b
|
|
154
164
|
|
|
155
|
-
if
|
|
156
|
-
data = cache.read(length, outbuf)
|
|
157
|
-
data.force_encoding(@encoding)
|
|
158
|
-
end
|
|
165
|
+
return data if maxlen == 0
|
|
159
166
|
|
|
160
|
-
if @buffer.nil? &&
|
|
167
|
+
if @buffer.nil? && data.empty?
|
|
161
168
|
fail EOFError, "end of file reached" if chunks_depleted?
|
|
162
169
|
@buffer = retrieve_chunk
|
|
163
170
|
end
|
|
164
171
|
|
|
165
|
-
remaining_length =
|
|
172
|
+
remaining_length = maxlen - data.bytesize
|
|
166
173
|
|
|
167
174
|
unless @buffer.nil? || remaining_length == 0
|
|
168
|
-
if remaining_length
|
|
175
|
+
if remaining_length < @buffer.bytesize
|
|
169
176
|
buffered_data = @buffer.byteslice(0, remaining_length)
|
|
170
177
|
@buffer = @buffer.byteslice(remaining_length..-1)
|
|
171
178
|
else
|
|
@@ -173,21 +180,46 @@ module Down
|
|
|
173
180
|
@buffer = nil
|
|
174
181
|
end
|
|
175
182
|
|
|
176
|
-
|
|
177
|
-
data << buffered_data
|
|
178
|
-
else
|
|
179
|
-
data = buffered_data
|
|
180
|
-
end
|
|
183
|
+
data << buffered_data
|
|
181
184
|
|
|
182
185
|
cache.write(buffered_data) if cache
|
|
183
186
|
|
|
184
|
-
buffered_data.clear unless buffered_data.
|
|
187
|
+
buffered_data.clear unless buffered_data.frozen?
|
|
185
188
|
end
|
|
186
189
|
|
|
187
190
|
@position += data.bytesize
|
|
188
191
|
|
|
189
|
-
data.force_encoding(Encoding::BINARY)
|
|
190
|
-
|
|
192
|
+
data.force_encoding(Encoding::BINARY)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Implements IO#seek semantics.
|
|
196
|
+
def seek(amount, whence = IO::SEEK_SET)
|
|
197
|
+
fail Errno::ESPIPE, "Illegal seek" if cache.nil?
|
|
198
|
+
|
|
199
|
+
case whence
|
|
200
|
+
when IO::SEEK_SET, :SET
|
|
201
|
+
target_pos = amount
|
|
202
|
+
when IO::SEEK_CUR, :CUR
|
|
203
|
+
target_pos = @position + amount
|
|
204
|
+
when IO::SEEK_END, :END
|
|
205
|
+
unless chunks_depleted?
|
|
206
|
+
cache.seek(0, IO::SEEK_END)
|
|
207
|
+
IO.copy_stream(self, File::NULL)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
target_pos = cache.size + amount
|
|
211
|
+
else
|
|
212
|
+
fail ArgumentError, "invalid whence: #{whence.inspect}"
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
if target_pos <= cache.size
|
|
216
|
+
cache.seek(target_pos)
|
|
217
|
+
else
|
|
218
|
+
cache.seek(0, IO::SEEK_END)
|
|
219
|
+
IO.copy_stream(self, File::NULL, target_pos - cache.size)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
@position = cache.pos
|
|
191
223
|
end
|
|
192
224
|
|
|
193
225
|
# Implements IO#pos semantics. Returns the current position of the
|
|
@@ -195,6 +227,7 @@ module Down
|
|
|
195
227
|
def pos
|
|
196
228
|
@position
|
|
197
229
|
end
|
|
230
|
+
alias tell pos
|
|
198
231
|
|
|
199
232
|
# Implements IO#eof? semantics. Returns whether we've reached end of file.
|
|
200
233
|
# It returns true if cache is at the end and there is no more content to
|
|
@@ -272,7 +305,7 @@ module Down
|
|
|
272
305
|
def retrieve_chunk
|
|
273
306
|
chunk = @next_chunk
|
|
274
307
|
@next_chunk = chunks_fiber.resume
|
|
275
|
-
chunk
|
|
308
|
+
chunk
|
|
276
309
|
end
|
|
277
310
|
|
|
278
311
|
# Returns whether there is any content left to retrieve.
|
data/lib/down/errors.rb
CHANGED
|
@@ -7,20 +7,20 @@ module Down
|
|
|
7
7
|
# raised when the file is larger than the specified maximum size
|
|
8
8
|
class TooLarge < Error; end
|
|
9
9
|
|
|
10
|
-
# raised when the file failed to be retrieved for whatever reason
|
|
11
|
-
class NotFound < Error; end
|
|
12
|
-
|
|
13
10
|
# raised when the given URL couldn't be parsed
|
|
14
|
-
class InvalidUrl <
|
|
11
|
+
class InvalidUrl < Error; end
|
|
15
12
|
|
|
16
13
|
# raised when the number of redirects was larger than the specified maximum
|
|
17
|
-
class TooManyRedirects <
|
|
14
|
+
class TooManyRedirects < Error; end
|
|
15
|
+
|
|
16
|
+
# raised when the requested resource has not been modified
|
|
17
|
+
class NotModified < Error; end
|
|
18
18
|
|
|
19
19
|
# raised when response returned 4xx or 5xx response
|
|
20
|
-
class ResponseError <
|
|
20
|
+
class ResponseError < Error
|
|
21
21
|
attr_reader :response
|
|
22
22
|
|
|
23
|
-
def initialize(message, response
|
|
23
|
+
def initialize(message, response = nil)
|
|
24
24
|
super(message)
|
|
25
25
|
@response = response
|
|
26
26
|
end
|
|
@@ -29,15 +29,18 @@ module Down
|
|
|
29
29
|
# raised when response returned 4xx response
|
|
30
30
|
class ClientError < ResponseError; end
|
|
31
31
|
|
|
32
|
+
# raised when response returned 404 response
|
|
33
|
+
class NotFound < ClientError; end
|
|
34
|
+
|
|
32
35
|
# raised when response returned 5xx response
|
|
33
36
|
class ServerError < ResponseError; end
|
|
34
37
|
|
|
35
38
|
# raised when there was an error connecting to the server
|
|
36
|
-
class ConnectionError <
|
|
39
|
+
class ConnectionError < Error; end
|
|
37
40
|
|
|
38
41
|
# raised when connecting to the server too longer than the specified timeout
|
|
39
42
|
class TimeoutError < ConnectionError; end
|
|
40
43
|
|
|
41
44
|
# raised when an SSL error was raised
|
|
42
|
-
class SSLError <
|
|
45
|
+
class SSLError < Error; end
|
|
43
46
|
end
|