url_fetcher 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +8 -0
- data/VERSION +1 -1
- data/lib/url_fetcher/errors.rb +28 -0
- data/lib/url_fetcher.rb +12 -4
- data/spec/spec_helper.rb +0 -1
- data/spec/url_fetcher_spec.rb +46 -38
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a80bd44f053156a452d45a80f43ef5b468fa19e
|
4
|
+
data.tar.gz: ba7077dedcf80cf86ee76b8d6a37f2f431fd42f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fede6c50ad1bbdbf67e7ff4281828b604d2696efa6d77c18a01697d5175c105ceebd35cbb2f965caed2036ece8b851c3e031069425d9c89eafe7846ee5591473
|
7
|
+
data.tar.gz: c34c7070a64cb0d1715d347f211a6fd4b831d549949d70ff2d8720e54420826d35df86a7ade93a61523f4ee422e36a6b747b4dbc042088f4d79c0bd062de89bb
|
data/.travis.yml
ADDED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.2
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class UrlFetcher
|
2
|
+
|
3
|
+
class Error < StandardError ; end
|
4
|
+
|
5
|
+
class TooManyRedirects < Error
|
6
|
+
attr_reader :url, :max_attempts
|
7
|
+
def initialize(url, max_attempts)
|
8
|
+
@url, @max_attempts = url, max_attempts
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
"#{url} has too many redirects (over #{max_attempts})."
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
class CircularRedirect < Error
|
18
|
+
attr_reader :url
|
19
|
+
|
20
|
+
def initialize(url)
|
21
|
+
@url = url
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
"#{url} has a redirect loop."
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/url_fetcher.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
require "url_fetcher/errors"
|
1
2
|
require "net/http"
|
3
|
+
require "openssl"
|
2
4
|
require "open-uri"
|
3
5
|
require "tempfile"
|
4
6
|
|
@@ -7,7 +9,8 @@ require "tempfile"
|
|
7
9
|
# into memory all at once.
|
8
10
|
class UrlFetcher
|
9
11
|
MEGABYTE = 1048576
|
10
|
-
|
12
|
+
MAX_ATTEMPTS = 5.freeze
|
13
|
+
|
11
14
|
attr_reader :url
|
12
15
|
|
13
16
|
# Create a fetcher for the specified URL.
|
@@ -53,10 +56,15 @@ class UrlFetcher
|
|
53
56
|
end
|
54
57
|
|
55
58
|
def fetch_response(url, options, previous_attempts = [])
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
if previous_attempts.size > MAX_ATTEMPTS
|
60
|
+
raise TooManyRedirects.new(previous_attempts.first, MAX_ATTEMPTS)
|
61
|
+
end
|
59
62
|
|
63
|
+
if previous_attempts.include?(url)
|
64
|
+
raise CircularRedirect.new(previous_attempts.first)
|
65
|
+
end
|
66
|
+
|
67
|
+
previous_attempts << url
|
60
68
|
uri = URI(url)
|
61
69
|
|
62
70
|
http = Net::HTTP.new(uri.host, uri.port)
|
data/spec/spec_helper.rb
CHANGED
data/spec/url_fetcher_spec.rb
CHANGED
@@ -5,56 +5,58 @@ describe UrlFetcher do
|
|
5
5
|
it "should fetch a URL to a temp file" do
|
6
6
|
WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
|
7
7
|
url_fetcher = UrlFetcher.new("http://example.com/test")
|
8
|
-
url_fetcher.
|
9
|
-
url_fetcher.
|
10
|
-
url_fetcher.header("content-length").
|
8
|
+
expect(url_fetcher).to be_success
|
9
|
+
expect(url_fetcher).not_to be_redirect
|
10
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
11
11
|
url_fetcher.body.open
|
12
|
-
url_fetcher.body.read.
|
12
|
+
expect(url_fetcher.body.read).to eql("Hello")
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should perform a POST request" do
|
16
16
|
WebMock.stub_request(:post, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
|
17
17
|
url_fetcher = UrlFetcher.new("http://example.com/test", :method => :post)
|
18
|
-
url_fetcher.
|
19
|
-
url_fetcher.
|
20
|
-
url_fetcher.header("content-length").
|
18
|
+
expect(url_fetcher).to be_success
|
19
|
+
expect(url_fetcher).not_to be_redirect
|
20
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
21
21
|
url_fetcher.body.open
|
22
|
-
url_fetcher.body.read.
|
22
|
+
expect(url_fetcher.body.read).to eql("Hello")
|
23
23
|
end
|
24
24
|
|
25
25
|
it "should perform a HEAD request" do
|
26
26
|
WebMock.stub_request(:head, "http://example.com/test").to_return(:status => 200, :body => nil, :headers => {"Content-Length" => 5})
|
27
27
|
url_fetcher = UrlFetcher.new("http://example.com/test", :method => :head)
|
28
|
-
url_fetcher.
|
29
|
-
url_fetcher.
|
30
|
-
url_fetcher.header("content-length").
|
31
|
-
url_fetcher.body.
|
28
|
+
expect(url_fetcher).to be_success
|
29
|
+
expect(url_fetcher).not_to be_redirect
|
30
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
31
|
+
expect(url_fetcher.body).to be_nil
|
32
32
|
end
|
33
33
|
|
34
34
|
it "should work with SSL" do
|
35
35
|
WebMock.stub_request(:get, "https://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
|
36
36
|
url_fetcher = UrlFetcher.new("https://example.com/test")
|
37
|
-
url_fetcher.
|
38
|
-
url_fetcher.
|
39
|
-
url_fetcher.header("content-length").
|
40
|
-
url_fetcher.body.open.read.
|
37
|
+
expect(url_fetcher).to be_success
|
38
|
+
expect(url_fetcher).not_to be_redirect
|
39
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
40
|
+
expect(url_fetcher.body.open.read).to eql("Hello")
|
41
41
|
end
|
42
42
|
|
43
43
|
it "should honor redirects" do
|
44
44
|
WebMock.stub_request(:get, "http://example.com/test1").to_return(:status => 301, :headers => {"Location" => "http://example.com/test2"})
|
45
45
|
WebMock.stub_request(:get, "http://example.com/test2").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
|
46
46
|
url_fetcher = UrlFetcher.new("http://example.com/test1")
|
47
|
-
|
48
|
-
url_fetcher.
|
49
|
-
url_fetcher
|
50
|
-
url_fetcher.
|
47
|
+
|
48
|
+
expect(url_fetcher).to be_success
|
49
|
+
expect(url_fetcher).not_to be_redirect
|
50
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
51
|
+
expect(url_fetcher.body.open.read).to eql("Hello")
|
51
52
|
end
|
52
53
|
|
53
54
|
it "should not honor redirects if :follow_redirects == false" do
|
54
55
|
WebMock.stub_request(:get, "http://example.com/test1").to_return(:status => 301, :headers => {"Location" => "http://example.com/test2"})
|
55
56
|
url_fetcher = UrlFetcher.new("http://example.com/test1", :follow_redirects => false)
|
56
|
-
|
57
|
-
url_fetcher.
|
57
|
+
|
58
|
+
expect(url_fetcher).not_to be_success
|
59
|
+
expect(url_fetcher).to be_redirect
|
58
60
|
end
|
59
61
|
|
60
62
|
it "should call a block before each redirect with the new location" do
|
@@ -65,10 +67,12 @@ describe UrlFetcher do
|
|
65
67
|
url_fetcher = UrlFetcher.new("http://example.com/test1") do |location|
|
66
68
|
redirects << location
|
67
69
|
end
|
68
|
-
|
69
|
-
url_fetcher.
|
70
|
-
url_fetcher.
|
71
|
-
|
70
|
+
|
71
|
+
expect(url_fetcher).to be_success
|
72
|
+
expect(url_fetcher).not_to be_redirect
|
73
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
74
|
+
expect(url_fetcher.body.open.read).to eql("Hello")
|
75
|
+
expect(redirects).to eql(["http://example.com/test2", "http://example.com/test3"])
|
72
76
|
end
|
73
77
|
|
74
78
|
it "should abort redirecting if a block is given that returns false" do
|
@@ -80,41 +84,45 @@ describe UrlFetcher do
|
|
80
84
|
redirects << location
|
81
85
|
false
|
82
86
|
end
|
83
|
-
|
84
|
-
url_fetcher.
|
85
|
-
url_fetcher.
|
86
|
-
|
87
|
+
|
88
|
+
expect(url_fetcher).not_to be_success
|
89
|
+
expect(url_fetcher).to be_redirect
|
90
|
+
expect(url_fetcher.body).to be_nil
|
91
|
+
expect(redirects).to eql(["http://example.com/test2"])
|
87
92
|
end
|
88
93
|
|
89
94
|
it "should raise an error if there is a circular redirect" do
|
90
95
|
WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 302, :headers => {"Location" => "http://example.com/test"})
|
91
|
-
|
96
|
+
expect{ UrlFetcher.new("http://example.com/test") }.to raise_error(UrlFetcher::CircularRedirect)
|
92
97
|
end
|
93
98
|
|
94
99
|
it "should raise an error if there are too many redirects" do
|
95
100
|
6.times do |i|
|
96
101
|
WebMock.stub_request(:get, "http://example.com/test#{i}").to_return(:status => 302, :headers => {"Location" => "http://example.com/test#{i + 1}"})
|
97
102
|
end
|
98
|
-
|
103
|
+
expect{ UrlFetcher.new("http://example.com/test0") }.to raise_error(UrlFetcher::TooManyRedirects)
|
99
104
|
end
|
100
105
|
|
101
106
|
it "should raise an error if an HTTP error is returned" do
|
102
107
|
WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 404, :body => "Not Found")
|
103
|
-
|
108
|
+
expect{ UrlFetcher.new("http://example.com/test") }.to raise_error(Net::HTTPServerException)
|
104
109
|
end
|
105
110
|
|
106
111
|
it "should not unlink the temp file if asked not to" do
|
107
112
|
WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
|
108
113
|
url_fetcher = UrlFetcher.new("http://example.com/test", :unlink => false)
|
109
|
-
|
110
|
-
url_fetcher.
|
111
|
-
url_fetcher.
|
114
|
+
|
115
|
+
expect(url_fetcher).to be_success
|
116
|
+
expect(url_fetcher).not_to be_redirect
|
117
|
+
expect(url_fetcher.header("content-length")).to eql("5")
|
118
|
+
expect(url_fetcher.body.open.read).to eql("Hello")
|
119
|
+
expect(url_fetcher.body.path).not_to be_nil
|
112
120
|
end
|
113
121
|
|
114
122
|
it "should limit the size of the file downloaded" do
|
115
123
|
WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 1001})
|
116
|
-
|
124
|
+
expect do
|
117
125
|
UrlFetcher.new("http://example.com/test", :max_size => 1000)
|
118
|
-
end.
|
126
|
+
end.to raise_error
|
119
127
|
end
|
120
128
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- weheartit
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -89,12 +89,14 @@ extra_rdoc_files: []
|
|
89
89
|
files:
|
90
90
|
- ".gitignore"
|
91
91
|
- ".rspec"
|
92
|
+
- ".travis.yml"
|
92
93
|
- Gemfile
|
93
94
|
- LICENSE.txt
|
94
95
|
- README.md
|
95
96
|
- Rakefile
|
96
97
|
- VERSION
|
97
98
|
- lib/url_fetcher.rb
|
99
|
+
- lib/url_fetcher/errors.rb
|
98
100
|
- spec/spec_helper.rb
|
99
101
|
- spec/url_fetcher_spec.rb
|
100
102
|
- url_fetcher.gemspec
|
@@ -118,7 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
118
120
|
version: '0'
|
119
121
|
requirements: []
|
120
122
|
rubyforge_project:
|
121
|
-
rubygems_version: 2.
|
123
|
+
rubygems_version: 2.3.0
|
122
124
|
signing_key:
|
123
125
|
specification_version: 4
|
124
126
|
summary: Fetch resources from the internetz with circular redirects support
|