url_fetcher 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +8 -0
- data/VERSION +1 -1
- data/lib/url_fetcher/errors.rb +28 -0
- data/lib/url_fetcher.rb +12 -4
- data/spec/spec_helper.rb +0 -1
- data/spec/url_fetcher_spec.rb +46 -38
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a80bd44f053156a452d45a80f43ef5b468fa19e
|
4
|
+
data.tar.gz: ba7077dedcf80cf86ee76b8d6a37f2f431fd42f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fede6c50ad1bbdbf67e7ff4281828b604d2696efa6d77c18a01697d5175c105ceebd35cbb2f965caed2036ece8b851c3e031069425d9c89eafe7846ee5591473
|
7
|
+
data.tar.gz: c34c7070a64cb0d1715d347f211a6fd4b831d549949d70ff2d8720e54420826d35df86a7ade93a61523f4ee422e36a6b747b4dbc042088f4d79c0bd062de89bb
|
data/.travis.yml
ADDED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.2
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class UrlFetcher
|
2
|
+
|
3
|
+
class Error < StandardError ; end
|
4
|
+
|
5
|
+
class TooManyRedirects < Error
|
6
|
+
attr_reader :url, :max_attempts
|
7
|
+
def initialize(url, max_attempts)
|
8
|
+
@url, @max_attempts = url, max_attempts
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
"#{url} has too many redirects (over #{max_attempts})."
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
class CircularRedirect < Error
|
18
|
+
attr_reader :url
|
19
|
+
|
20
|
+
def initialize(url)
|
21
|
+
@url = url
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
"#{url} has a redirect loop."
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/url_fetcher.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
require "url_fetcher/errors"
|
1
2
|
require "net/http"
|
3
|
+
require "openssl"
|
2
4
|
require "open-uri"
|
3
5
|
require "tempfile"
|
4
6
|
|
@@ -7,7 +9,8 @@ require "tempfile"
|
|
7
9
|
# into memory all at once.
|
8
10
|
class UrlFetcher
|
9
11
|
MEGABYTE = 1048576
|
10
|
-
|
12
|
+
MAX_ATTEMPTS = 5.freeze
|
13
|
+
|
11
14
|
attr_reader :url
|
12
15
|
|
13
16
|
# Create a fetcher for the specified URL.
|
@@ -53,10 +56,15 @@ class UrlFetcher
|
|
53
56
|
end
|
54
57
|
|
55
58
|
def fetch_response(url, options, previous_attempts = [])
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
if previous_attempts.size > MAX_ATTEMPTS
|
60
|
+
raise TooManyRedirects.new(previous_attempts.first, MAX_ATTEMPTS)
|
61
|
+
end
|
59
62
|
|
63
|
+
if previous_attempts.include?(url)
|
64
|
+
raise CircularRedirect.new(previous_attempts.first)
|
65
|
+
end
|
66
|
+
|
67
|
+
previous_attempts << url
|
60
68
|
uri = URI(url)
|
61
69
|
|
62
70
|
http = Net::HTTP.new(uri.host, uri.port)
|
data/spec/spec_helper.rb
CHANGED
data/spec/url_fetcher_spec.rb
CHANGED
@@ -5,56 +5,58 @@ describe UrlFetcher do
|
|
5
5
|
it "should fetch a URL to a temp file" do
|
6
6
|
WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
|
7
7
|
url_fetcher = UrlFetcher.new("http://example.com/test")
|
8
|
-
url_fetcher.
|
9
|
-
url_fetcher.
|
10
|
-
url_fetcher.header("content-length").
|
8
|
+
expect(url_fetcher).to be_success
|
9
|
+
expect(url_fetcher).not_to be_redirect
|
10
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
11
11
|
url_fetcher.body.open
|
12
|
-
url_fetcher.body.read.
|
12
|
+
expect(url_fetcher.body.read).to eql("Hello")
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should perform a POST request" do
|
16
16
|
WebMock.stub_request(:post, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
|
17
17
|
url_fetcher = UrlFetcher.new("http://example.com/test", :method => :post)
|
18
|
-
url_fetcher.
|
19
|
-
url_fetcher.
|
20
|
-
url_fetcher.header("content-length").
|
18
|
+
expect(url_fetcher).to be_success
|
19
|
+
expect(url_fetcher).not_to be_redirect
|
20
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
21
21
|
url_fetcher.body.open
|
22
|
-
url_fetcher.body.read.
|
22
|
+
expect(url_fetcher.body.read).to eql("Hello")
|
23
23
|
end
|
24
24
|
|
25
25
|
it "should perform a HEAD request" do
|
26
26
|
WebMock.stub_request(:head, "http://example.com/test").to_return(:status => 200, :body => nil, :headers => {"Content-Length" => 5})
|
27
27
|
url_fetcher = UrlFetcher.new("http://example.com/test", :method => :head)
|
28
|
-
url_fetcher.
|
29
|
-
url_fetcher.
|
30
|
-
url_fetcher.header("content-length").
|
31
|
-
url_fetcher.body.
|
28
|
+
expect(url_fetcher).to be_success
|
29
|
+
expect(url_fetcher).not_to be_redirect
|
30
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
31
|
+
expect(url_fetcher.body).to be_nil
|
32
32
|
end
|
33
33
|
|
34
34
|
it "should work with SSL" do
|
35
35
|
WebMock.stub_request(:get, "https://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
|
36
36
|
url_fetcher = UrlFetcher.new("https://example.com/test")
|
37
|
-
url_fetcher.
|
38
|
-
url_fetcher.
|
39
|
-
url_fetcher.header("content-length").
|
40
|
-
url_fetcher.body.open.read.
|
37
|
+
expect(url_fetcher).to be_success
|
38
|
+
expect(url_fetcher).not_to be_redirect
|
39
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
40
|
+
expect(url_fetcher.body.open.read).to eql("Hello")
|
41
41
|
end
|
42
42
|
|
43
43
|
it "should honor redirects" do
|
44
44
|
WebMock.stub_request(:get, "http://example.com/test1").to_return(:status => 301, :headers => {"Location" => "http://example.com/test2"})
|
45
45
|
WebMock.stub_request(:get, "http://example.com/test2").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
|
46
46
|
url_fetcher = UrlFetcher.new("http://example.com/test1")
|
47
|
-
|
48
|
-
url_fetcher.
|
49
|
-
url_fetcher
|
50
|
-
url_fetcher.
|
47
|
+
|
48
|
+
expect(url_fetcher).to be_success
|
49
|
+
expect(url_fetcher).not_to be_redirect
|
50
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
51
|
+
expect(url_fetcher.body.open.read).to eql("Hello")
|
51
52
|
end
|
52
53
|
|
53
54
|
it "should not honor redirects if :follow_redirects == false" do
|
54
55
|
WebMock.stub_request(:get, "http://example.com/test1").to_return(:status => 301, :headers => {"Location" => "http://example.com/test2"})
|
55
56
|
url_fetcher = UrlFetcher.new("http://example.com/test1", :follow_redirects => false)
|
56
|
-
|
57
|
-
url_fetcher.
|
57
|
+
|
58
|
+
expect(url_fetcher).not_to be_success
|
59
|
+
expect(url_fetcher).to be_redirect
|
58
60
|
end
|
59
61
|
|
60
62
|
it "should call a block before each redirect with the new location" do
|
@@ -65,10 +67,12 @@ describe UrlFetcher do
|
|
65
67
|
url_fetcher = UrlFetcher.new("http://example.com/test1") do |location|
|
66
68
|
redirects << location
|
67
69
|
end
|
68
|
-
|
69
|
-
url_fetcher.
|
70
|
-
url_fetcher.
|
71
|
-
|
70
|
+
|
71
|
+
expect(url_fetcher).to be_success
|
72
|
+
expect(url_fetcher).not_to be_redirect
|
73
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
74
|
+
expect(url_fetcher.body.open.read).to eql("Hello")
|
75
|
+
expect(redirects).to eql(["http://example.com/test2", "http://example.com/test3"])
|
72
76
|
end
|
73
77
|
|
74
78
|
it "should abort redirecting if a block is given that returns false" do
|
@@ -80,41 +84,45 @@ describe UrlFetcher do
|
|
80
84
|
redirects << location
|
81
85
|
false
|
82
86
|
end
|
83
|
-
|
84
|
-
url_fetcher.
|
85
|
-
url_fetcher.
|
86
|
-
|
87
|
+
|
88
|
+
expect(url_fetcher).not_to be_success
|
89
|
+
expect(url_fetcher).to be_redirect
|
90
|
+
expect(url_fetcher.body).to be_nil
|
91
|
+
expect(redirects).to eql(["http://example.com/test2"])
|
87
92
|
end
|
88
93
|
|
89
94
|
it "should raise an error if there is a circular redirect" do
|
90
95
|
WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 302, :headers => {"Location" => "http://example.com/test"})
|
91
|
-
|
96
|
+
expect{ UrlFetcher.new("http://example.com/test") }.to raise_error(UrlFetcher::CircularRedirect)
|
92
97
|
end
|
93
98
|
|
94
99
|
it "should raise an error if there are too many redirects" do
|
95
100
|
6.times do |i|
|
96
101
|
WebMock.stub_request(:get, "http://example.com/test#{i}").to_return(:status => 302, :headers => {"Location" => "http://example.com/test#{i + 1}"})
|
97
102
|
end
|
98
|
-
|
103
|
+
expect{ UrlFetcher.new("http://example.com/test0") }.to raise_error(UrlFetcher::TooManyRedirects)
|
99
104
|
end
|
100
105
|
|
101
106
|
it "should raise an error if an HTTP error is returned" do
|
102
107
|
WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 404, :body => "Not Found")
|
103
|
-
|
108
|
+
expect{ UrlFetcher.new("http://example.com/test") }.to raise_error(Net::HTTPServerException)
|
104
109
|
end
|
105
110
|
|
106
111
|
it "should not unlink the temp file if asked not to" do
|
107
112
|
WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
|
108
113
|
url_fetcher = UrlFetcher.new("http://example.com/test", :unlink => false)
|
109
|
-
|
110
|
-
url_fetcher.
|
111
|
-
url_fetcher.
|
114
|
+
|
115
|
+
expect(url_fetcher).to be_success
|
116
|
+
expect(url_fetcher).not_to be_redirect
|
117
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
118
|
+
expect(url_fetcher.body.open.read).to eql("Hello")
|
119
|
+
expect(url_fetcher.body.path).not_to be_nil
|
112
120
|
end
|
113
121
|
|
114
122
|
it "should limit the size of the file downloaded" do
|
115
123
|
WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 1001})
|
116
|
-
|
124
|
+
expect do
|
117
125
|
UrlFetcher.new("http://example.com/test", :max_size => 1000)
|
118
|
-
end.
|
126
|
+
end.to raise_error
|
119
127
|
end
|
120
128
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- weheartit
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -89,12 +89,14 @@ extra_rdoc_files: []
|
|
89
89
|
files:
|
90
90
|
- ".gitignore"
|
91
91
|
- ".rspec"
|
92
|
+
- ".travis.yml"
|
92
93
|
- Gemfile
|
93
94
|
- LICENSE.txt
|
94
95
|
- README.md
|
95
96
|
- Rakefile
|
96
97
|
- VERSION
|
97
98
|
- lib/url_fetcher.rb
|
99
|
+
- lib/url_fetcher/errors.rb
|
98
100
|
- spec/spec_helper.rb
|
99
101
|
- spec/url_fetcher_spec.rb
|
100
102
|
- url_fetcher.gemspec
|
@@ -118,7 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
118
120
|
version: '0'
|
119
121
|
requirements: []
|
120
122
|
rubyforge_project:
|
121
|
-
rubygems_version: 2.
|
123
|
+
rubygems_version: 2.3.0
|
122
124
|
signing_key:
|
123
125
|
specification_version: 4
|
124
126
|
summary: Fetch resources from the internetz with circular redirects support
|