unwind 0.9.3 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/unwind/version.rb +1 -1
- data/lib/unwind.rb +106 -57
- data/test/redirect_follower_test.rb +76 -67
- data/vcr_cassettes/with_cookie.yml +838 -0
- metadata +11 -10
data/lib/unwind/version.rb
CHANGED
data/lib/unwind.rb
CHANGED
@@ -3,63 +3,112 @@ require 'faraday'
|
|
3
3
|
|
4
4
|
module Unwind
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
class TooManyRedirects < StandardError; end
|
7
|
+
class MissingRedirectLocation < StandardError; end
|
8
|
+
|
9
|
+
class RedirectFollower
|
10
|
+
|
11
|
+
attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
|
12
|
+
|
13
|
+
def initialize(original_url, limit=5)
|
14
|
+
@original_url, @redirect_limit = original_url, limit
|
15
|
+
@redirects = []
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
def redirected?
|
20
|
+
!(self.final_url == self.original_url)
|
21
|
+
end
|
22
|
+
|
23
|
+
def resolve(current_url=nil, options={})
|
24
|
+
|
25
|
+
ok_to_continue?
|
26
|
+
|
27
|
+
current_url ||= self.original_url
|
28
|
+
#adding this header because we really only care about resolving the url
|
29
|
+
headers = (options || {}).merge({"accept-encoding" => "none"})
|
30
|
+
response = Faraday.get(current_url, headers)
|
31
|
+
|
32
|
+
if [301, 302, 303].include?(response.status)
|
33
|
+
@redirects << current_url.to_s
|
34
|
+
@redirect_limit -= 1
|
35
|
+
resolve(redirect_url(response).normalize, apply_cookie(response, headers))
|
36
|
+
else
|
37
|
+
@final_url = current_url.to_s
|
38
|
+
@response = response
|
39
|
+
self
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def self.resolve(original_url, limit=5)
|
45
|
+
new(original_url, limit).resolve
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
|
51
|
+
def ok_to_continue?
|
52
|
+
raise TooManyRedirects if redirect_limit < 0
|
53
|
+
end
|
54
|
+
|
55
|
+
def redirect_url(response)
|
56
|
+
if response['location'].nil?
|
57
|
+
body_match = response.body.match(/<a href=\"([^>]+)\">/i)
|
58
|
+
raise MissingRedirectLocation unless body_match
|
59
|
+
Addressable::URI.parse(body_match[0])
|
60
|
+
else
|
61
|
+
redirect_uri = Addressable::URI.parse(response['location'])
|
62
|
+
redirect_uri.relative? ? response.env[:url].join(response['location']) : redirect_uri
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def apply_cookie(response, headers)
|
67
|
+
if response.status == 302 && response['set-cookie']
|
68
|
+
headers.merge(:cookie => CookieHash.to_cookie_string(response['set-cookie']))
|
69
|
+
else
|
70
|
+
#todo: should we delete the cookie at this point if it exists?
|
71
|
+
headers
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
#borrowed (stolen) from HTTParty with minor updates
|
78
|
+
#to handle all cookies existing in a single string
|
79
|
+
class CookieHash < Hash
|
80
|
+
|
81
|
+
CLIENT_COOKIES = %w{path expires domain path secure httponly}
|
82
|
+
|
83
|
+
def add_cookies(value)
|
84
|
+
case value
|
85
|
+
when Hash
|
86
|
+
merge!(value)
|
87
|
+
when String
|
88
|
+
value = value.gsub(/expires=[\w,\s-:]+;/i, '')
|
89
|
+
value = value.gsub(/httponly[\,\;]*/i, '')
|
90
|
+
value.split(/[;,]\s/).each do |cookie|
|
91
|
+
array = cookie.split('=')
|
92
|
+
self[array[0].strip.to_sym] = array[1]
|
93
|
+
end
|
94
|
+
else
|
95
|
+
raise "add_cookies only takes a Hash or a String"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def to_cookie_string
|
100
|
+
delete_if { |k, v| CLIENT_COOKIES.include?(k.to_s.downcase) }.collect { |k, v| "#{k}=#{v}" }.join("; ")
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.to_cookie_string(*cookie_strings)
|
104
|
+
h = CookieHash.new
|
105
|
+
cookie_strings.each do |cs|
|
106
|
+
h.add_cookies(cs)
|
107
|
+
end
|
108
|
+
|
109
|
+
h.to_cookie_string
|
110
|
+
end
|
111
|
+
end
|
8
112
|
|
9
|
-
class RedirectFollower
|
10
|
-
|
11
|
-
attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
|
12
|
-
|
13
|
-
def initialize(original_url, limit=5)
|
14
|
-
@original_url, @redirect_limit = original_url, limit
|
15
|
-
@redirects = []
|
16
|
-
end
|
17
|
-
|
18
|
-
def redirected?
|
19
|
-
!(self.final_url == self.original_url)
|
20
|
-
end
|
21
|
-
|
22
|
-
def resolve(current_url=nil)
|
23
|
-
|
24
|
-
ok_to_continue?
|
25
|
-
|
26
|
-
current_url ||= self.original_url
|
27
|
-
response = Faraday.get(current_url)
|
28
|
-
|
29
|
-
if [301, 302, 307].include?(response.status)
|
30
|
-
@redirects << current_url.to_s
|
31
|
-
@redirect_limit -= 1
|
32
|
-
resolve redirect_url(response).normalize
|
33
|
-
else
|
34
|
-
@final_url = current_url.to_s
|
35
|
-
@response = response
|
36
|
-
self
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
|
41
|
-
def self.resolve(original_url, limit=5)
|
42
|
-
new(original_url, limit).resolve
|
43
|
-
end
|
44
|
-
|
45
|
-
private
|
46
|
-
|
47
|
-
def ok_to_continue?
|
48
|
-
raise TooManyRedirects if redirect_limit < 0
|
49
|
-
end
|
50
|
-
|
51
|
-
def redirect_url(response)
|
52
|
-
if response['location'].nil?
|
53
|
-
body_match = response.body.match(/<a href=\"([^>]+)\">/i)
|
54
|
-
raise MissingRedirectLocation unless body_match
|
55
|
-
Addressable::URI.parse(body_match[0])
|
56
|
-
else
|
57
|
-
redirect_uri = Addressable::URI.parse(response['location'])
|
58
|
-
redirect_uri.relative? ? response.env[:url].join(response['location']) : redirect_uri
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
|
63
|
-
end
|
64
113
|
|
65
114
|
end
|
@@ -3,84 +3,93 @@ require 'vcr'
|
|
3
3
|
require './lib/unwind'
|
4
4
|
|
5
5
|
VCR.config do |c|
|
6
|
-
|
7
|
-
|
6
|
+
c.stub_with :fakeweb
|
7
|
+
c.cassette_library_dir = 'vcr_cassettes'
|
8
8
|
end
|
9
9
|
|
10
|
-
describe
|
10
|
+
describe Unwind::RedirectFollower do
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
end
|
12
|
+
# needs to be regenerated to properly test...need to stop that :(
|
13
|
+
it 'should handle url with cookie requirement' do
|
14
|
+
VCR.use_cassette('with cookie') do
|
15
|
+
follower = Unwind::RedirectFollower.resolve('http://ow.ly/1hf37P')
|
16
|
+
assert_equal 200, follower.response.status
|
17
|
+
assert follower.redirected?
|
18
|
+
end
|
19
|
+
end
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
21
|
+
it 'should resolve the url' do
|
22
|
+
VCR.use_cassette('xZVND1') do
|
23
|
+
follower = Unwind::RedirectFollower.resolve('http://j.mp/xZVND1')
|
24
|
+
assert_equal 'http://ow.ly/i/s1O0', follower.final_url
|
25
|
+
assert_equal 'http://j.mp/xZVND1', follower.original_url
|
26
|
+
assert_equal 2, follower.redirects.count
|
27
|
+
assert follower.redirected?
|
28
|
+
end
|
29
|
+
end
|
29
30
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
it 'should handle relative redirects' do
|
32
|
+
VCR.use_cassette('relative stackoverflow') do
|
33
|
+
follower = Unwind::RedirectFollower.resolve('http://stackoverflow.com/q/9277007/871617?stw=1')
|
34
|
+
assert follower.redirected?
|
35
|
+
assert_equal 'http://stackoverflow.com/questions/9277007/gitlabhq-w-denied-for-rails', follower.final_url
|
36
|
+
end
|
37
|
+
end
|
37
38
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
39
|
+
it 'should still handine relative redirects' do
|
40
|
+
# http://bit.ly/A4H3a2
|
41
|
+
VCR.use_cassette('relative stackoverflow 2') do
|
42
|
+
follower = Unwind::RedirectFollower.resolve('http://bit.ly/A4H3a2')
|
43
|
+
assert follower.redirected?
|
44
|
+
end
|
45
|
+
end
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
47
|
+
it 'should handle redirects to pdfs' do
|
48
|
+
VCR.use_cassette('pdf') do
|
49
|
+
follower = Unwind::RedirectFollower.resolve('http://binged.it/wVSFs5')
|
50
|
+
assert follower.redirected?
|
51
|
+
assert_equal 'https://microsoft.promo.eprize.com/bingtwitter/public/fulfillment/rules.pdf', follower.final_url
|
52
|
+
end
|
53
|
+
end
|
52
54
|
|
53
|
-
|
55
|
+
it 'should handle the lame amazon spaces' do
|
56
|
+
VCR.use_cassette('amazon') do
|
57
|
+
follower = Unwind::RedirectFollower.resolve('http://amzn.to/xrHQWS')
|
58
|
+
assert follower.redirected?
|
59
|
+
end
|
60
|
+
end
|
54
61
|
|
55
|
-
|
56
|
-
VCR.use_cassette('ssl tpope') do
|
57
|
-
follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
|
58
|
-
assert follower.redirected?
|
59
|
-
assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
|
60
|
-
end
|
61
|
-
end
|
62
|
+
#http://amzn.to/xrHQWS
|
62
63
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
64
|
+
it 'should handle a https redirect' do
|
65
|
+
VCR.use_cassette('ssl tpope') do
|
66
|
+
follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
|
67
|
+
assert follower.redirected?
|
68
|
+
assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
|
69
|
+
end
|
70
|
+
end
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
end
|
72
|
+
it 'should not be redirected' do
|
73
|
+
VCR.use_cassette('no redirect') do
|
74
|
+
follower = Unwind::RedirectFollower.resolve('http://www.scottw.com')
|
75
|
+
assert !follower.redirected?
|
76
|
+
end
|
77
|
+
end
|
77
78
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
79
|
+
it 'should raise TooManyRedirects' do
|
80
|
+
VCR.use_cassette('xZVND1') do
|
81
|
+
follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
|
82
|
+
too_many_redirects = lambda {follower.resolve}
|
83
|
+
too_many_redirects.must_raise Unwind::TooManyRedirects
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'should raise MissingRedirectLocation' do
|
88
|
+
VCR.use_cassette('missing redirect') do
|
89
|
+
follower = Unwind::RedirectFollower.new('http://tinyurl.com/6oqzkff')
|
90
|
+
missing_redirect_location = lambda{follower.resolve}
|
91
|
+
missing_redirect_location.must_raise Unwind::MissingRedirectLocation
|
92
|
+
end
|
93
|
+
end
|
85
94
|
|
86
95
|
end
|