unwind 0.9.3 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/unwind/version.rb +1 -1
- data/lib/unwind.rb +106 -57
- data/test/redirect_follower_test.rb +76 -67
- data/vcr_cassettes/with_cookie.yml +838 -0
- metadata +11 -10
data/lib/unwind/version.rb
CHANGED
data/lib/unwind.rb
CHANGED
@@ -3,63 +3,112 @@ require 'faraday'
|
|
3
3
|
|
4
4
|
module Unwind
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
class TooManyRedirects < StandardError; end
|
7
|
+
class MissingRedirectLocation < StandardError; end
|
8
|
+
|
9
|
+
class RedirectFollower
|
10
|
+
|
11
|
+
attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
|
12
|
+
|
13
|
+
def initialize(original_url, limit=5)
|
14
|
+
@original_url, @redirect_limit = original_url, limit
|
15
|
+
@redirects = []
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
def redirected?
|
20
|
+
!(self.final_url == self.original_url)
|
21
|
+
end
|
22
|
+
|
23
|
+
def resolve(current_url=nil, options={})
|
24
|
+
|
25
|
+
ok_to_continue?
|
26
|
+
|
27
|
+
current_url ||= self.original_url
|
28
|
+
#adding this header because we really only care about resolving the url
|
29
|
+
headers = (options || {}).merge({"accept-encoding" => "none"})
|
30
|
+
response = Faraday.get(current_url, headers)
|
31
|
+
|
32
|
+
if [301, 302, 303].include?(response.status)
|
33
|
+
@redirects << current_url.to_s
|
34
|
+
@redirect_limit -= 1
|
35
|
+
resolve(redirect_url(response).normalize, apply_cookie(response, headers))
|
36
|
+
else
|
37
|
+
@final_url = current_url.to_s
|
38
|
+
@response = response
|
39
|
+
self
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def self.resolve(original_url, limit=5)
|
45
|
+
new(original_url, limit).resolve
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
|
51
|
+
def ok_to_continue?
|
52
|
+
raise TooManyRedirects if redirect_limit < 0
|
53
|
+
end
|
54
|
+
|
55
|
+
def redirect_url(response)
|
56
|
+
if response['location'].nil?
|
57
|
+
body_match = response.body.match(/<a href=\"([^>]+)\">/i)
|
58
|
+
raise MissingRedirectLocation unless body_match
|
59
|
+
Addressable::URI.parse(body_match[0])
|
60
|
+
else
|
61
|
+
redirect_uri = Addressable::URI.parse(response['location'])
|
62
|
+
redirect_uri.relative? ? response.env[:url].join(response['location']) : redirect_uri
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def apply_cookie(response, headers)
|
67
|
+
if response.status == 302 && response['set-cookie']
|
68
|
+
headers.merge(:cookie => CookieHash.to_cookie_string(response['set-cookie']))
|
69
|
+
else
|
70
|
+
#todo: should we delete the cookie at this point if it exists?
|
71
|
+
headers
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
#borrowed (stolen) from HTTParty with minor updates
|
78
|
+
#to handle all cookies existing in a single string
|
79
|
+
class CookieHash < Hash
|
80
|
+
|
81
|
+
CLIENT_COOKIES = %w{path expires domain path secure httponly}
|
82
|
+
|
83
|
+
def add_cookies(value)
|
84
|
+
case value
|
85
|
+
when Hash
|
86
|
+
merge!(value)
|
87
|
+
when String
|
88
|
+
value = value.gsub(/expires=[\w,\s-:]+;/i, '')
|
89
|
+
value = value.gsub(/httponly[\,\;]*/i, '')
|
90
|
+
value.split(/[;,]\s/).each do |cookie|
|
91
|
+
array = cookie.split('=')
|
92
|
+
self[array[0].strip.to_sym] = array[1]
|
93
|
+
end
|
94
|
+
else
|
95
|
+
raise "add_cookies only takes a Hash or a String"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def to_cookie_string
|
100
|
+
delete_if { |k, v| CLIENT_COOKIES.include?(k.to_s.downcase) }.collect { |k, v| "#{k}=#{v}" }.join("; ")
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.to_cookie_string(*cookie_strings)
|
104
|
+
h = CookieHash.new
|
105
|
+
cookie_strings.each do |cs|
|
106
|
+
h.add_cookies(cs)
|
107
|
+
end
|
108
|
+
|
109
|
+
h.to_cookie_string
|
110
|
+
end
|
111
|
+
end
|
8
112
|
|
9
|
-
class RedirectFollower
|
10
|
-
|
11
|
-
attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
|
12
|
-
|
13
|
-
def initialize(original_url, limit=5)
|
14
|
-
@original_url, @redirect_limit = original_url, limit
|
15
|
-
@redirects = []
|
16
|
-
end
|
17
|
-
|
18
|
-
def redirected?
|
19
|
-
!(self.final_url == self.original_url)
|
20
|
-
end
|
21
|
-
|
22
|
-
def resolve(current_url=nil)
|
23
|
-
|
24
|
-
ok_to_continue?
|
25
|
-
|
26
|
-
current_url ||= self.original_url
|
27
|
-
response = Faraday.get(current_url)
|
28
|
-
|
29
|
-
if [301, 302, 307].include?(response.status)
|
30
|
-
@redirects << current_url.to_s
|
31
|
-
@redirect_limit -= 1
|
32
|
-
resolve redirect_url(response).normalize
|
33
|
-
else
|
34
|
-
@final_url = current_url.to_s
|
35
|
-
@response = response
|
36
|
-
self
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
|
41
|
-
def self.resolve(original_url, limit=5)
|
42
|
-
new(original_url, limit).resolve
|
43
|
-
end
|
44
|
-
|
45
|
-
private
|
46
|
-
|
47
|
-
def ok_to_continue?
|
48
|
-
raise TooManyRedirects if redirect_limit < 0
|
49
|
-
end
|
50
|
-
|
51
|
-
def redirect_url(response)
|
52
|
-
if response['location'].nil?
|
53
|
-
body_match = response.body.match(/<a href=\"([^>]+)\">/i)
|
54
|
-
raise MissingRedirectLocation unless body_match
|
55
|
-
Addressable::URI.parse(body_match[0])
|
56
|
-
else
|
57
|
-
redirect_uri = Addressable::URI.parse(response['location'])
|
58
|
-
redirect_uri.relative? ? response.env[:url].join(response['location']) : redirect_uri
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
|
63
|
-
end
|
64
113
|
|
65
114
|
end
|
@@ -3,84 +3,93 @@ require 'vcr'
|
|
3
3
|
require './lib/unwind'
|
4
4
|
|
5
5
|
VCR.config do |c|
|
6
|
-
|
7
|
-
|
6
|
+
c.stub_with :fakeweb
|
7
|
+
c.cassette_library_dir = 'vcr_cassettes'
|
8
8
|
end
|
9
9
|
|
10
|
-
describe
|
10
|
+
describe Unwind::RedirectFollower do
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
end
|
12
|
+
# needs to be regenerated to properly test...need to stop that :(
|
13
|
+
it 'should handle url with cookie requirement' do
|
14
|
+
VCR.use_cassette('with cookie') do
|
15
|
+
follower = Unwind::RedirectFollower.resolve('http://ow.ly/1hf37P')
|
16
|
+
assert_equal 200, follower.response.status
|
17
|
+
assert follower.redirected?
|
18
|
+
end
|
19
|
+
end
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
21
|
+
it 'should resolve the url' do
|
22
|
+
VCR.use_cassette('xZVND1') do
|
23
|
+
follower = Unwind::RedirectFollower.resolve('http://j.mp/xZVND1')
|
24
|
+
assert_equal 'http://ow.ly/i/s1O0', follower.final_url
|
25
|
+
assert_equal 'http://j.mp/xZVND1', follower.original_url
|
26
|
+
assert_equal 2, follower.redirects.count
|
27
|
+
assert follower.redirected?
|
28
|
+
end
|
29
|
+
end
|
29
30
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
it 'should handle relative redirects' do
|
32
|
+
VCR.use_cassette('relative stackoverflow') do
|
33
|
+
follower = Unwind::RedirectFollower.resolve('http://stackoverflow.com/q/9277007/871617?stw=1')
|
34
|
+
assert follower.redirected?
|
35
|
+
assert_equal 'http://stackoverflow.com/questions/9277007/gitlabhq-w-denied-for-rails', follower.final_url
|
36
|
+
end
|
37
|
+
end
|
37
38
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
39
|
+
it 'should still handine relative redirects' do
|
40
|
+
# http://bit.ly/A4H3a2
|
41
|
+
VCR.use_cassette('relative stackoverflow 2') do
|
42
|
+
follower = Unwind::RedirectFollower.resolve('http://bit.ly/A4H3a2')
|
43
|
+
assert follower.redirected?
|
44
|
+
end
|
45
|
+
end
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
47
|
+
it 'should handle redirects to pdfs' do
|
48
|
+
VCR.use_cassette('pdf') do
|
49
|
+
follower = Unwind::RedirectFollower.resolve('http://binged.it/wVSFs5')
|
50
|
+
assert follower.redirected?
|
51
|
+
assert_equal 'https://microsoft.promo.eprize.com/bingtwitter/public/fulfillment/rules.pdf', follower.final_url
|
52
|
+
end
|
53
|
+
end
|
52
54
|
|
53
|
-
|
55
|
+
it 'should handle the lame amazon spaces' do
|
56
|
+
VCR.use_cassette('amazon') do
|
57
|
+
follower = Unwind::RedirectFollower.resolve('http://amzn.to/xrHQWS')
|
58
|
+
assert follower.redirected?
|
59
|
+
end
|
60
|
+
end
|
54
61
|
|
55
|
-
|
56
|
-
VCR.use_cassette('ssl tpope') do
|
57
|
-
follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
|
58
|
-
assert follower.redirected?
|
59
|
-
assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
|
60
|
-
end
|
61
|
-
end
|
62
|
+
#http://amzn.to/xrHQWS
|
62
63
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
64
|
+
it 'should handle a https redirect' do
|
65
|
+
VCR.use_cassette('ssl tpope') do
|
66
|
+
follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
|
67
|
+
assert follower.redirected?
|
68
|
+
assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
|
69
|
+
end
|
70
|
+
end
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
end
|
72
|
+
it 'should not be redirected' do
|
73
|
+
VCR.use_cassette('no redirect') do
|
74
|
+
follower = Unwind::RedirectFollower.resolve('http://www.scottw.com')
|
75
|
+
assert !follower.redirected?
|
76
|
+
end
|
77
|
+
end
|
77
78
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
79
|
+
it 'should raise TooManyRedirects' do
|
80
|
+
VCR.use_cassette('xZVND1') do
|
81
|
+
follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
|
82
|
+
too_many_redirects = lambda {follower.resolve}
|
83
|
+
too_many_redirects.must_raise Unwind::TooManyRedirects
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'should raise MissingRedirectLocation' do
|
88
|
+
VCR.use_cassette('missing redirect') do
|
89
|
+
follower = Unwind::RedirectFollower.new('http://tinyurl.com/6oqzkff')
|
90
|
+
missing_redirect_location = lambda{follower.resolve}
|
91
|
+
missing_redirect_location.must_raise Unwind::MissingRedirectLocation
|
92
|
+
end
|
93
|
+
end
|
85
94
|
|
86
95
|
end
|