dubdubdub 0.2.7 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -1
- data/Gemfile.lock +10 -3
- data/dubdubdub.gemspec +12 -12
- data/lib/dubdubdub.rb +1 -1
- data/lib/dubdubdub/client.rb +27 -103
- data/lib/dubdubdub/exceptions.rb +88 -2
- data/spec/dubdubdub_spec.rb +41 -54
- data/spec/vcr/{follow_url → follow}/alias_link.yml +81 -27
- data/spec/vcr/follow/all_the_way.yml +682 -0
- data/spec/vcr/follow/base.yml +1064 -0
- data/spec/vcr/follow/eoferror.yml +143 -0
- data/spec/vcr/{follow_url → follow}/https.yml +36 -23
- data/spec/vcr/follow/pass_block.yml +270 -0
- data/spec/vcr/follow/pass_block_iteration.yml +334 -0
- data/spec/vcr/{follow_url → follow}/proxy.yml +57 -14
- data/spec/vcr/{follow_url → follow}/proxy_forbidden.yml +32 -14
- data/spec/vcr/follow/relative_redirects.yml +431 -0
- data/spec/vcr/follow/uri_error.yml +1015 -0
- data/spec/vcr/get/proxy.yml +168 -149
- metadata +13 -13
- data/spec/vcr/follow_url/base.yml +0 -1388
- data/spec/vcr/follow_url/block_base_url.yml +0 -502
- data/spec/vcr/follow_url/eoferror.yml +0 -98
- data/spec/vcr/follow_url/pass_block.yml +0 -733
- data/spec/vcr/follow_url/pass_block_iteration.yml +0 -455
- data/spec/vcr/follow_url/proxied.yml +0 -116
- data/spec/vcr/follow_url/relative_redirects.yml +0 -122
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
GIT
|
2
|
+
remote: https://github.com/daveola/rest-client
|
3
|
+
revision: 9bbe538aa2003e172f818eeff5c2e2e6828a8453
|
4
|
+
specs:
|
5
|
+
rest-client (1.6.7)
|
6
|
+
mime-types (>= 1.16)
|
7
|
+
netrc
|
8
|
+
|
1
9
|
GEM
|
2
10
|
remote: http://rubygems.org/
|
3
11
|
specs:
|
@@ -25,6 +33,7 @@ GEM
|
|
25
33
|
mime-types (1.19)
|
26
34
|
net-http-digest_auth (1.2.1)
|
27
35
|
net-http-persistent (2.8)
|
36
|
+
netrc (0.7.7)
|
28
37
|
nokogiri (1.5.5)
|
29
38
|
ntlm-http (0.1.1)
|
30
39
|
pry (0.9.10)
|
@@ -34,8 +43,6 @@ GEM
|
|
34
43
|
rake (10.0.2)
|
35
44
|
rdoc (3.12)
|
36
45
|
json (~> 1.4)
|
37
|
-
rest-client (1.6.7)
|
38
|
-
mime-types (>= 1.16)
|
39
46
|
rspec (2.8.0)
|
40
47
|
rspec-core (~> 2.8.0)
|
41
48
|
rspec-expectations (~> 2.8.0)
|
@@ -61,6 +68,6 @@ DEPENDENCIES
|
|
61
68
|
mechanize
|
62
69
|
nokogiri
|
63
70
|
pry
|
64
|
-
rest-client
|
71
|
+
rest-client!
|
65
72
|
rspec (~> 2.8.0)
|
66
73
|
vcr (~> 2.3.0)
|
data/dubdubdub.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "dubdubdub"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.3.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["James Hu"]
|
@@ -33,17 +33,17 @@ Gem::Specification.new do |s|
|
|
33
33
|
"spec/support/vcr.rb",
|
34
34
|
"spec/vcr/browse/basic.yml",
|
35
35
|
"spec/vcr/crawl/basic.yml",
|
36
|
-
"spec/vcr/
|
37
|
-
"spec/vcr/
|
38
|
-
"spec/vcr/
|
39
|
-
"spec/vcr/
|
40
|
-
"spec/vcr/
|
41
|
-
"spec/vcr/
|
42
|
-
"spec/vcr/
|
43
|
-
"spec/vcr/
|
44
|
-
"spec/vcr/
|
45
|
-
"spec/vcr/
|
46
|
-
"spec/vcr/
|
36
|
+
"spec/vcr/follow/alias_link.yml",
|
37
|
+
"spec/vcr/follow/all_the_way.yml",
|
38
|
+
"spec/vcr/follow/base.yml",
|
39
|
+
"spec/vcr/follow/eoferror.yml",
|
40
|
+
"spec/vcr/follow/https.yml",
|
41
|
+
"spec/vcr/follow/pass_block.yml",
|
42
|
+
"spec/vcr/follow/pass_block_iteration.yml",
|
43
|
+
"spec/vcr/follow/proxy.yml",
|
44
|
+
"spec/vcr/follow/proxy_forbidden.yml",
|
45
|
+
"spec/vcr/follow/relative_redirects.yml",
|
46
|
+
"spec/vcr/follow/uri_error.yml",
|
47
47
|
"spec/vcr/get/basic.yml",
|
48
48
|
"spec/vcr/get/params.yml",
|
49
49
|
"spec/vcr/get/proxy.yml"
|
data/lib/dubdubdub.rb
CHANGED
data/lib/dubdubdub/client.rb
CHANGED
@@ -19,7 +19,7 @@ class DubDubDub::Client
|
|
19
19
|
unless DubDubDub.configuration.ignore_proxy?
|
20
20
|
proxy = DubDubDub.configuration.proxy
|
21
21
|
|
22
|
-
raise
|
22
|
+
raise ArgumentError, "No proxy has been configured or provided!" if proxy.nil?
|
23
23
|
|
24
24
|
self.proxy = proxy
|
25
25
|
end
|
@@ -43,36 +43,19 @@ class DubDubDub::Client
|
|
43
43
|
end
|
44
44
|
|
45
45
|
def proxy
|
46
|
-
"#{proxy_host}:#{proxy_port}"
|
46
|
+
"#{proxy_host}:#{proxy_port}" if proxy_host and proxy_port
|
47
47
|
end
|
48
48
|
|
49
49
|
def proxy?
|
50
|
-
return false if DubDubDub.configuration.ignore_proxy
|
50
|
+
return false if DubDubDub.configuration.ignore_proxy?
|
51
51
|
|
52
52
|
!!proxy
|
53
53
|
end
|
54
54
|
|
55
|
-
# Returns a Net::HTTP object
|
56
|
-
def net_http(uri)
|
57
|
-
raise ArgumentError, "A URI must be provided!" unless uri.kind_of? URI::Generic
|
58
|
-
|
59
|
-
net_http_class = if proxy?
|
60
|
-
Net::HTTP.Proxy(proxy_host, proxy_port, proxy_user, proxy_password)
|
61
|
-
else
|
62
|
-
Net::HTTP
|
63
|
-
end
|
64
|
-
|
65
|
-
http = net_http_class.new(uri.host, uri.port)
|
66
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE # ssl certificate doesn't need to be verified, otherwise a OpenSSL::SSL::SSLError might get thrown
|
67
|
-
http.use_ssl = true if uri.scheme == "https"
|
68
|
-
|
69
|
-
http
|
70
|
-
end
|
71
|
-
|
72
55
|
# Returns a RestClient::Resource
|
73
56
|
def rest_client_resource(url)
|
74
57
|
options = {}
|
75
|
-
options[:proxy] = proxy if proxy?
|
58
|
+
options[:proxy] = "http://#{proxy}" if proxy?
|
76
59
|
|
77
60
|
RestClient::Resource.new(url, options)
|
78
61
|
end
|
@@ -109,93 +92,34 @@ class DubDubDub::Client
|
|
109
92
|
|
110
93
|
# Helper method to browse by using a GET request via Mechanize
|
111
94
|
def browse(url, *args)
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
# Follow a url to the end until it can no longer go any further
|
116
|
-
# Even if it times out, it will return the url that it times out on!
|
117
|
-
def follow_url(url, options = {}, &block)
|
118
|
-
default_options = { limit: 20, attempts: 5, timeout: 5 }
|
119
|
-
options = default_options.merge(options)
|
120
|
-
|
121
|
-
at_base = false
|
122
|
-
previous_uri = nil # Keep track of previous uri for relative path redirects
|
123
|
-
response = nil
|
124
|
-
user_agents = [
|
125
|
-
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.79 Safari/535.11',
|
126
|
-
''
|
127
|
-
]
|
128
|
-
urls = [] # the url history
|
129
|
-
|
130
|
-
raise ArgumentError if options[:until] and !options[:until].is_a?(Proc)
|
131
|
-
|
132
|
-
# before we begin, let's yield the initial url if a block was given
|
133
|
-
yield(url) if block_given?
|
134
|
-
|
135
|
-
options[:limit].downto(1).each do |i|
|
136
|
-
begin
|
137
|
-
at_base = true if options[:until] and options[:until].call(url)
|
138
|
-
|
139
|
-
uri = URI.parse(url)
|
140
|
-
net_http = net_http(uri)
|
141
|
-
at_base = true unless uri.respond_to?(:request_uri) # make sure its a proper url
|
142
|
-
|
143
|
-
unless at_base
|
144
|
-
request = Net::HTTP::Get.new(uri.request_uri)
|
145
|
-
request_attempts = 0
|
146
|
-
|
147
|
-
# we make a certain amount of attempts in case we timeout
|
148
|
-
while request_attempts < options[:attempts]
|
149
|
-
begin
|
150
|
-
request_attempts += 1
|
151
|
-
|
152
|
-
# Don't let the request take too long
|
153
|
-
response = Timeout::timeout(options[:timeout]) do
|
154
|
-
net_http.request(request)
|
155
|
-
end
|
156
|
-
|
157
|
-
break # if it reaches this, that means the request was successful do break out!
|
158
|
-
# If any of these exceptions are thrown, it has timed out, so keep trying depending on how many attempts we have
|
159
|
-
rescue Timeout::Error, Errno::ETIMEDOUT, Errno::EHOSTUNREACH
|
160
|
-
# do another attempt if we are allowed one, or stop
|
161
|
-
at_base = true and break if request_attempts == options[:attempts]
|
162
|
-
rescue SocketError # doesn't exist
|
163
|
-
at_base = true and break
|
164
|
-
end
|
165
|
-
end
|
166
|
-
|
167
|
-
case response
|
168
|
-
when Net::HTTPSuccess then at_base = true
|
169
|
-
when Net::HTTPRedirection then url = response['location']
|
170
|
-
when Net::HTTPForbidden then raise DubDubDub::Forbidden
|
171
|
-
# Couldn't resolve, just return url
|
172
|
-
else at_base = true
|
173
|
-
end if response
|
174
|
-
end
|
175
|
-
|
176
|
-
# If any of these exceptions get thrown, return the current url
|
177
|
-
rescue SocketError, EOFError
|
178
|
-
at_base = true
|
179
|
-
rescue URI::InvalidURIError
|
180
|
-
return url # Just return it
|
95
|
+
handle_net_http_exceptions do
|
96
|
+
handle_mechanize_exceptions do
|
97
|
+
mechanize.get(url, *args)
|
181
98
|
end
|
182
|
-
|
183
|
-
urls << url
|
184
|
-
|
185
|
-
break if at_base
|
186
|
-
|
187
|
-
previous_uri = uri # Keep track of previous uri
|
188
|
-
yield(url) if block_given?
|
189
99
|
end
|
100
|
+
end
|
190
101
|
|
191
|
-
|
102
|
+
# Follow a URL to the end
|
103
|
+
def follow(url)
|
104
|
+
browse(url).uri.to_s
|
105
|
+
end
|
192
106
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
107
|
+
private
|
108
|
+
def handle_net_http_exceptions(&block)
|
109
|
+
begin
|
110
|
+
yield
|
111
|
+
rescue Timeout::Error, Errno::ETIMEDOUT, Errno::EHOSTUNREACH
|
112
|
+
raise DubDubDub::ResponseError.new(e, 408) # Timeout
|
113
|
+
rescue SocketError, EOFError => e
|
114
|
+
raise DubDubDub::ResponseError.new(e, 404) # Not found
|
197
115
|
end
|
116
|
+
end
|
198
117
|
|
199
|
-
|
118
|
+
def handle_mechanize_exceptions(&block)
|
119
|
+
begin
|
120
|
+
yield
|
121
|
+
rescue Mechanize::ResponseCodeError => e
|
122
|
+
raise DubDubDub::ResponseError.new(e, e.response_code)
|
123
|
+
end
|
200
124
|
end
|
201
125
|
end
|
data/lib/dubdubdub/exceptions.rb
CHANGED
@@ -1,2 +1,88 @@
|
|
1
|
-
class DubDubDub::
|
2
|
-
|
1
|
+
class DubDubDub::Error < RuntimeError; end
|
2
|
+
|
3
|
+
# URL not properly formatted
|
4
|
+
class DubDubDub::URLFormatError < DubDubDub::Error; end
|
5
|
+
|
6
|
+
# There was an unhandled response. Contains
|
7
|
+
# a reference to the originating error.
|
8
|
+
class DubDubDub::ResponseError < DubDubDub::Error
|
9
|
+
attr_reader :error
|
10
|
+
attr_reader :code
|
11
|
+
|
12
|
+
# Pulled from rest-client, thanks!
|
13
|
+
STATUSES = {
|
14
|
+
100 => 'Continue',
|
15
|
+
101 => 'Switching Protocols',
|
16
|
+
102 => 'Processing', #WebDAV
|
17
|
+
|
18
|
+
200 => 'OK',
|
19
|
+
201 => 'Created',
|
20
|
+
202 => 'Accepted',
|
21
|
+
203 => 'Non-Authoritative Information', # http/1.1
|
22
|
+
204 => 'No Content',
|
23
|
+
205 => 'Reset Content',
|
24
|
+
206 => 'Partial Content',
|
25
|
+
207 => 'Multi-Status', #WebDAV
|
26
|
+
|
27
|
+
300 => 'Multiple Choices',
|
28
|
+
301 => 'Moved Permanently',
|
29
|
+
302 => 'Found',
|
30
|
+
303 => 'See Other', # http/1.1
|
31
|
+
304 => 'Not Modified',
|
32
|
+
305 => 'Use Proxy', # http/1.1
|
33
|
+
306 => 'Switch Proxy', # no longer used
|
34
|
+
307 => 'Temporary Redirect', # http/1.1
|
35
|
+
|
36
|
+
400 => 'Bad Request',
|
37
|
+
401 => 'Unauthorized',
|
38
|
+
402 => 'Payment Required',
|
39
|
+
403 => 'Forbidden',
|
40
|
+
404 => 'Resource Not Found',
|
41
|
+
405 => 'Method Not Allowed',
|
42
|
+
406 => 'Not Acceptable',
|
43
|
+
407 => 'Proxy Authentication Required',
|
44
|
+
408 => 'Request Timeout',
|
45
|
+
409 => 'Conflict',
|
46
|
+
410 => 'Gone',
|
47
|
+
411 => 'Length Required',
|
48
|
+
412 => 'Precondition Failed',
|
49
|
+
413 => 'Request Entity Too Large',
|
50
|
+
414 => 'Request-URI Too Long',
|
51
|
+
415 => 'Unsupported Media Type',
|
52
|
+
416 => 'Requested Range Not Satisfiable',
|
53
|
+
417 => 'Expectation Failed',
|
54
|
+
418 => 'I\'m A Teapot',
|
55
|
+
421 => 'Too Many Connections From This IP',
|
56
|
+
422 => 'Unprocessable Entity', #WebDAV
|
57
|
+
423 => 'Locked', #WebDAV
|
58
|
+
424 => 'Failed Dependency', #WebDAV
|
59
|
+
425 => 'Unordered Collection', #WebDAV
|
60
|
+
426 => 'Upgrade Required',
|
61
|
+
449 => 'Retry With', #Microsoft
|
62
|
+
450 => 'Blocked By Windows Parental Controls', #Microsoft
|
63
|
+
|
64
|
+
500 => 'Internal Server Error',
|
65
|
+
501 => 'Not Implemented',
|
66
|
+
502 => 'Bad Gateway',
|
67
|
+
503 => 'Service Unavailable',
|
68
|
+
504 => 'Gateway Timeout',
|
69
|
+
505 => 'HTTP Version Not Supported',
|
70
|
+
506 => 'Variant Also Negotiates',
|
71
|
+
507 => 'Insufficient Storage', #WebDAV
|
72
|
+
509 => 'Bandwidth Limit Exceeded', #Apache
|
73
|
+
510 => 'Not Extended'
|
74
|
+
}
|
75
|
+
|
76
|
+
def initialize(error, code)
|
77
|
+
@error = error
|
78
|
+
@code = code.to_i
|
79
|
+
end
|
80
|
+
|
81
|
+
def message
|
82
|
+
STATUSES[code.to_i]
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_s
|
86
|
+
"#{code}: #{message} => #{error.class.name}: #{error.message}"
|
87
|
+
end
|
88
|
+
end
|
data/spec/dubdubdub_spec.rb
CHANGED
@@ -57,7 +57,7 @@ describe DubDubDub do
|
|
57
57
|
config.proxy = nil
|
58
58
|
end
|
59
59
|
|
60
|
-
lambda { DubDubDub.new(proxy: true) }.should raise_error(
|
60
|
+
lambda { DubDubDub.new(proxy: true) }.should raise_error(ArgumentError)
|
61
61
|
end
|
62
62
|
|
63
63
|
it "doesn't raise an error if configured to ignore proxies and we have specified to use a global proxy that hasn't been set" do
|
@@ -66,7 +66,7 @@ describe DubDubDub do
|
|
66
66
|
config.proxy = nil
|
67
67
|
end
|
68
68
|
|
69
|
-
lambda { DubDubDub.new(proxy: true) }.should_not raise_error(
|
69
|
+
lambda { DubDubDub.new(proxy: true) }.should_not raise_error(ArgumentError)
|
70
70
|
end
|
71
71
|
|
72
72
|
it "does not pass the method to client if that method doesn't exist within the client" do
|
@@ -168,8 +168,11 @@ describe DubDubDub do
|
|
168
168
|
end
|
169
169
|
|
170
170
|
it "works with a proxy", vcr: { cassette_name: "get/proxy", record: :once } do
|
171
|
-
www.proxy = "
|
172
|
-
response = www.get "http://www.
|
171
|
+
www.proxy = "173.234.181.64:8800"
|
172
|
+
response = www.get "http://www.whatismyipaddress.com"
|
173
|
+
html = Nokogiri::HTML(response)
|
174
|
+
|
175
|
+
html.css('.ip').text.strip.should == "173.234.181.64"
|
173
176
|
end
|
174
177
|
end
|
175
178
|
|
@@ -187,78 +190,62 @@ describe DubDubDub do
|
|
187
190
|
end
|
188
191
|
end
|
189
192
|
|
190
|
-
describe '#
|
191
|
-
it "follows url to the end", vcr: { cassette_name: "
|
192
|
-
www.
|
193
|
-
www.
|
194
|
-
www.
|
193
|
+
describe '#follow' do
|
194
|
+
it "follows url to the end", vcr: { cassette_name: "follow/base", record: :once } do
|
195
|
+
www.follow("http://say.ly/TCc1CEp").should == "http://www.whosay.com/TomHanks/photos/148406"
|
196
|
+
www.follow("http://t.co//qbJx26r").should == "http://twitter.com/twitter/status/76360760606986241/photo/1"
|
197
|
+
www.follow("http://mypict.me/mMgLU").should == "http://mypict.me/mobile.php?id=336583610"
|
195
198
|
end
|
196
199
|
|
197
|
-
it "
|
198
|
-
www.
|
199
|
-
www.follow_url("http://ow.ly/9Rp7p", until: lambda { |url| url =~ /bit\.ly/ }).should == "http://bit.ly/GMx5lu"
|
200
|
-
www.follow_url("http://ow.ly/9Rp7p", until: lambda { |url| url =~ /bit\.lyyy/ }).should == "http://instagram.com/p/IbhSB6EKRQ/"
|
200
|
+
it "handles invalid uris", vcr: { cassette_name: "follow/invalid_uris", record: :once } do
|
201
|
+
lambda { www.follow("http://rank.1new.biz/sharp-紙パック式クリーナー-床用吸い込み口タイプ-オ/") }.should_not raise_error(DubDubDub::URLFormatError)
|
201
202
|
end
|
202
203
|
|
203
|
-
it "
|
204
|
-
|
205
|
-
|
206
|
-
www.follow_url("http://ow.ly/9Rp7p") do |url|
|
207
|
-
urls << url
|
208
|
-
end
|
209
|
-
|
210
|
-
urls.first.should == "http://ow.ly/9Rp7p" # first url should be the initial one
|
211
|
-
urls.count.should == 4
|
204
|
+
it "handles https", vcr: { cassette_name: "follow/https", record: :once } do
|
205
|
+
lambda { www.follow("https://www.youtube.com/watch?v=DM58Zdk7el0&feature=youtube_gdata_player") }.should_not raise_error(EOFError)
|
212
206
|
end
|
213
207
|
|
214
|
-
it "
|
215
|
-
|
208
|
+
it "raises an exception if doesn't exist", vcr: { cassette_name: "follow/doesnt_exist", record: :once } do
|
209
|
+
lambda { www.follow("http://cnnsadasdasdasdasdasd.com/asd") }.should raise_error(DubDubDub::ResponseError)
|
216
210
|
|
217
|
-
|
218
|
-
|
211
|
+
begin
|
212
|
+
www.follow("http://cnnsadasdasdasdasdasd.com/asd")
|
213
|
+
rescue DubDubDub::ResponseError => e
|
214
|
+
e.code.should == 404
|
215
|
+
e.error.should_not be_nil
|
216
|
+
e.message.should_not be_nil
|
219
217
|
end
|
220
|
-
|
221
|
-
urls.count.should == 1
|
222
|
-
urls.last.should == "http://twitpic.com/92a2p5"
|
223
|
-
end
|
224
|
-
|
225
|
-
it "handles invalid uris", vcr: { cassette_name: "follow_url/invalid_uris", record: :once } do
|
226
|
-
lambda { www.follow_url("http://rank.1new.biz/sharp-紙パック式クリーナー-床用吸い込み口タイプ-オ/") }.should_not raise_error(URI::InvalidURIError)
|
227
|
-
www.follow_url("http://rank.1new.biz/sharp-紙パック式クリーナー-床用吸い込み口タイプ-オ/").should == "http://rank.1new.biz/sharp-紙パック式クリーナー-床用吸い込み口タイプ-オ/"
|
228
218
|
end
|
229
219
|
|
230
|
-
it "
|
231
|
-
|
220
|
+
it "returns actual asset link for an alias link", vcr: { cassette_name: "follow/alias_link", record: :once } do
|
221
|
+
www.follow("http://yfrog.us/evlb0z:medium").should == "http://img535.imageshack.us/img535/9845/lb0.mp4"
|
232
222
|
end
|
233
223
|
|
234
|
-
it "
|
235
|
-
www.
|
224
|
+
it "does not raise a EOFError", vcr: { cassette_name: "follow/eoferror", record: :once } do
|
225
|
+
lambda { www.follow("http://www.soulpancake.com/post/1607/whats-your-beautiful-mess.html") }.should_not raise_error
|
236
226
|
end
|
237
227
|
|
238
|
-
it
|
239
|
-
www.
|
228
|
+
it 'works with a proxy', vcr: { cassette_name: "follow/proxy", record: :once } do
|
229
|
+
www.proxy = "198.154.114.100:8080"
|
230
|
+
www.follow("http://yfrog.us/evlb0z:medium").should == "http://img535.imageshack.us/img535/9845/lb0.mp4"
|
240
231
|
end
|
241
232
|
|
242
|
-
it "
|
243
|
-
|
233
|
+
it "works with relative path redirects", vcr: { cassette_name: "follow/relative_redirects", record: :once } do
|
234
|
+
www.follow("http://www.retailmenot.com/out/4223117").should == "http://www.papajohns.com/index.html"
|
244
235
|
end
|
245
236
|
|
246
|
-
it
|
247
|
-
www.proxy = "
|
248
|
-
www.
|
249
|
-
end
|
250
|
-
|
251
|
-
it 'works for domains', vcr: { cassette_name: "follow_url/domains", record: :once } do
|
252
|
-
www.follow_url("google.com").should == "google.com"
|
237
|
+
it "raises response error on a bad proxy", vcr: { cassette_name: "follow/proxy_forbidden", record: :once } do
|
238
|
+
www.proxy = "190.202.116.101:3128"
|
239
|
+
lambda { www.follow("http://yfrog.us/evlb0z:medium").should }.should raise_error(DubDubDub::ResponseError)
|
253
240
|
end
|
254
241
|
|
255
|
-
it "
|
256
|
-
www.
|
242
|
+
it "follows to the end for some types of urls", vcr: { cassette_name: "follow/all_the_way", record: :once } do
|
243
|
+
www.follow("http://www.apmebf.com/fo122tenm4/elq/32A39898/4432424/2/2/2").should == "http://www.bedbathandbeyond.com/default.asp?utm_source=WhaleShark+Media%3A+RetailMeNot%2Ecom&utm_medium=affiliate&utm_term=&utm_campaign=Bed+Bath+and+Beyond+Product+Catalog&aid=10817676&pid=2210202&sid=&"
|
257
244
|
end
|
258
245
|
|
259
|
-
it "
|
260
|
-
www.
|
261
|
-
|
246
|
+
it "handles doesn't error out due to URI", vcr: { cassette_name: "follow/uri_error", record: :once } do
|
247
|
+
url = www.follow "http://retailmenot.com/out/4231224"
|
248
|
+
url.should == "http://www.toysrus.com/category/index.jsp?categoryId=3999911"
|
262
249
|
end
|
263
250
|
end
|
264
251
|
end
|