unwind 0.2.1 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +0 -1
- data/Readme.md +2 -2
- data/lib/unwind/version.rb +1 -1
- data/lib/unwind.rb +34 -169
- data/test/redirect_follower_test.rb +22 -121
- data/unwind.gemspec +5 -7
- data/vcr_cassettes/xZVND1.yml +283 -337
- metadata +30 -77
- checksums.yaml +0 -7
- data/vcr_cassettes/amazon.yml +0 -6862
- data/vcr_cassettes/canonical_url.yml +0 -506
- data/vcr_cassettes/meta_refresh.yml +0 -169
- data/vcr_cassettes/missing_redirect.yml +0 -46
- data/vcr_cassettes/no_redirect.yml +0 -859
- data/vcr_cassettes/pdf.yml +0 -140
- data/vcr_cassettes/relative_canonical_url.yml +0 -1210
- data/vcr_cassettes/relative_meta_refresh.yml +0 -1905
- data/vcr_cassettes/relative_stackoverflow.yml +0 -1630
- data/vcr_cassettes/relative_stackoverflow_2.yml +0 -2245
- data/vcr_cassettes/ssl_tpope.yml +0 -1363
- data/vcr_cassettes/with_cookie.yml +0 -1667
data/.gitignore
CHANGED
data/Readme.md
CHANGED
@@ -4,7 +4,7 @@ Enables following a series of redirects (shortened urls)
|
|
4
4
|
|
5
5
|
# Prerequisites
|
6
6
|
|
7
|
-
|
7
|
+
Only tested on Ruby 1.9.3
|
8
8
|
|
9
9
|
# Example Code
|
10
10
|
|
@@ -22,4 +22,4 @@ Most of the code is based on John Nunemaker's blog post [Following Redirects wit
|
|
22
22
|
|
23
23
|
# License
|
24
24
|
|
25
|
-
Provided under the Do Whatever You Want With This Code License.
|
25
|
+
Provided under the Do Whatever You Want With This Code License.
|
data/lib/unwind/version.rb
CHANGED
data/lib/unwind.rb
CHANGED
@@ -1,188 +1,53 @@
|
|
1
1
|
require "unwind/version"
|
2
2
|
require 'net/http'
|
3
|
-
require 'addressable/uri'
|
4
3
|
|
5
4
|
module Unwind
|
6
5
|
|
7
|
-
|
8
|
-
class MissingRedirectLocation < StandardError; end
|
6
|
+
class TooManyRedirects < StandardError; end
|
9
7
|
|
10
|
-
|
8
|
+
class RedirectFollower
|
11
9
|
|
12
|
-
|
10
|
+
attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
|
13
11
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
def initialize(original_url, limit=5)
|
13
|
+
@original_url, @redirect_limit = original_url, limit
|
14
|
+
@redirects = []
|
15
|
+
end
|
18
16
|
|
19
|
-
|
20
|
-
!(self.final_url == self.original_url)
|
21
|
-
end
|
17
|
+
def resolve(current_url=nil)
|
22
18
|
|
23
|
-
|
24
|
-
ok_to_continue?
|
19
|
+
ok_to_continue?
|
25
20
|
|
26
|
-
|
27
|
-
#adding this header because we really only care about resolving the url
|
28
|
-
headers = (options || {}).merge({"accept-encoding" => "none"})
|
21
|
+
current_url ||= self.original_url
|
29
22
|
|
30
|
-
|
23
|
+
response = Net::HTTP.get_response(URI.parse(current_url))
|
31
24
|
|
32
|
-
|
25
|
+
if response.kind_of?(Net::HTTPRedirection)
|
26
|
+
@redirects << current_url
|
27
|
+
@redirect_limit -= 1
|
28
|
+
resolve(redirect_url(response))
|
29
|
+
else
|
30
|
+
@final_url = current_url
|
31
|
+
@response = response
|
32
|
+
self
|
33
|
+
end
|
34
|
+
end
|
33
35
|
|
34
|
-
|
35
|
-
request.add_field(header, value)
|
36
|
-
end
|
36
|
+
private
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
:use_ssl => url.scheme == 'https'
|
42
|
-
) do |http|
|
43
|
-
http.request(request)
|
44
|
-
end
|
38
|
+
def ok_to_continue?
|
39
|
+
raise TooManyRedirects if redirect_limit < 0
|
40
|
+
end
|
45
41
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
self
|
55
|
-
end
|
56
|
-
|
57
|
-
def self.resolve(original_url, limit=5)
|
58
|
-
new(original_url, limit).resolve
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
def record_redirect(url)
|
64
|
-
@redirects << url.to_s
|
65
|
-
@redirect_limit -= 1
|
66
|
-
end
|
67
|
-
|
68
|
-
def is_response_redirect?(response)
|
69
|
-
Net::HTTPRedirection === response
|
70
|
-
end
|
71
|
-
|
72
|
-
def handle_redirect(uri_to_redirect, url, response, headers)
|
73
|
-
record_redirect url
|
74
|
-
resolve(uri_to_redirect.normalize, apply_cookie(response, headers))
|
75
|
-
end
|
76
|
-
|
77
|
-
def handle_final_response(current_url, response)
|
78
|
-
current_url = current_url.dup.to_s
|
79
|
-
if Net::HTTPSuccess === response && canonical = canonical_link?(response)
|
80
|
-
@redirects << current_url
|
81
|
-
if Addressable::URI.parse(canonical).relative?
|
82
|
-
@final_url = make_url_absolute(current_url, Addressable::URI.parse(canonical)).to_s
|
83
|
-
else
|
84
|
-
@final_url = canonical
|
85
|
-
end
|
86
|
-
|
87
|
-
else
|
88
|
-
@final_url = current_url
|
89
|
-
end
|
90
|
-
@response = response
|
91
|
-
end
|
92
|
-
|
93
|
-
def ok_to_continue?
|
94
|
-
raise TooManyRedirects if redirect_limit < 0
|
95
|
-
end
|
96
|
-
|
97
|
-
def redirect_url(response)
|
98
|
-
if response['location'].nil?
|
99
|
-
body_match = (response.body || "").match(/<a href=\"([^>]+)\">/i)
|
100
|
-
raise MissingRedirectLocation unless body_match
|
101
|
-
Addressable::URI.parse(body_match[0])
|
102
|
-
else
|
103
|
-
redirect_uri = Addressable::URI.parse(response['location'])
|
104
|
-
redirect_uri.relative? ? Addressable::URI.parse(response.uri).join(response['location']) : redirect_uri
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def meta_refresh?(current_url, response)
|
109
|
-
if Net::HTTPSuccess === response
|
110
|
-
body_match = response.body.match(/<meta http-equiv=\"refresh\" content=\"0; URL=(.*?)\"\s*\/*>/i)
|
111
|
-
if body_match
|
112
|
-
uri = Addressable::URI.parse(body_match[1])
|
113
|
-
make_url_absolute(current_url, uri)
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
def canonical_link?(response)
|
119
|
-
body_match = response.body.match(/<link rel=[\'\"]canonical[\'\"] href=[\'\"](.*?)[\'\"]/i)
|
120
|
-
body_match ? Addressable::URI.parse(body_match[1]).to_s : false
|
121
|
-
end
|
122
|
-
|
123
|
-
def apply_cookie(response, headers)
|
124
|
-
if response.code.to_i == 302 && response['set-cookie']
|
125
|
-
headers.merge("cookie" => CookieHash.to_cookie_string(response['set-cookie']))
|
126
|
-
else
|
127
|
-
#todo: should we delete the cookie at this point if it exists?
|
128
|
-
headers
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
def make_url_absolute(current_url, relative_url)
|
133
|
-
current_uri = Addressable::URI.parse(current_url)
|
134
|
-
if (relative_url.relative?)
|
135
|
-
url = Addressable::URI.new(
|
136
|
-
:scheme => current_uri.scheme,
|
137
|
-
:user => current_uri.user,
|
138
|
-
:password => current_uri.password,
|
139
|
-
:host => current_uri.host,
|
140
|
-
:port => current_uri.port,
|
141
|
-
:path => relative_url.path,
|
142
|
-
:query => relative_url.query,
|
143
|
-
:fragment => relative_url.fragment)
|
144
|
-
else
|
145
|
-
relative_url
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
end
|
150
|
-
|
151
|
-
#borrowed (stolen) from HTTParty with minor updates
|
152
|
-
#to handle all cookies existing in a single string
|
153
|
-
class CookieHash < Hash
|
154
|
-
|
155
|
-
CLIENT_COOKIES = %w{path expires domain path secure httponly}
|
156
|
-
|
157
|
-
def add_cookies(value)
|
158
|
-
case value
|
159
|
-
when Hash
|
160
|
-
merge!(value)
|
161
|
-
when String
|
162
|
-
value = value.gsub(/expires=[\w,\s\-\:]+;/i, '')
|
163
|
-
value = value.gsub(/httponly[\,\;]*/i, '')
|
164
|
-
value.split(/[;,]\s/).each do |cookie|
|
165
|
-
array = cookie.split('=')
|
166
|
-
self[array[0].strip.to_sym] = array[1]
|
167
|
-
end
|
168
|
-
else
|
169
|
-
raise "add_cookies only takes a Hash or a String"
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
def to_cookie_string
|
174
|
-
delete_if { |k, v| CLIENT_COOKIES.include?(k.to_s.downcase) }.collect { |k, v| "#{k}=#{v}" }.join("; ")
|
175
|
-
end
|
176
|
-
|
177
|
-
def self.to_cookie_string(*cookie_strings)
|
178
|
-
h = CookieHash.new
|
179
|
-
cookie_strings.each do |cs|
|
180
|
-
h.add_cookies(cs)
|
181
|
-
end
|
182
|
-
|
183
|
-
h.to_cookie_string
|
184
|
-
end
|
185
|
-
end
|
42
|
+
def redirect_url(response)
|
43
|
+
if response['location'].nil?
|
44
|
+
response.body.match(/<a href=\"([^>]+)\">/i)[1]
|
45
|
+
else
|
46
|
+
response['location']
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
186
50
|
|
51
|
+
end
|
187
52
|
|
188
53
|
end
|
@@ -1,129 +1,30 @@
|
|
1
|
-
require 'minitest'
|
2
1
|
require 'minitest/autorun'
|
3
2
|
require 'vcr'
|
4
3
|
require './lib/unwind'
|
5
4
|
|
6
|
-
VCR.
|
7
|
-
|
8
|
-
|
5
|
+
VCR.config do |c|
|
6
|
+
c.stub_with :fakeweb
|
7
|
+
c.cassette_library_dir = 'vcr_cassettes'
|
9
8
|
end
|
10
9
|
|
11
|
-
describe
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'should handle relative redirects' do
|
33
|
-
VCR.use_cassette('relative stackoverflow') do
|
34
|
-
follower = Unwind::RedirectFollower.resolve('http://stackoverflow.com/q/9277007/871617?stw=1')
|
35
|
-
assert follower.redirected?
|
36
|
-
assert_equal 'http://stackoverflow.com/questions/9277007/gitlabhq-w-denied-for-rails', follower.final_url
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
it 'should still handine relative redirects' do
|
41
|
-
# http://bit.ly/A4H3a2
|
42
|
-
VCR.use_cassette('relative stackoverflow 2') do
|
43
|
-
follower = Unwind::RedirectFollower.resolve('http://bit.ly/A4H3a2')
|
44
|
-
assert follower.redirected?
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
it 'should handle redirects to pdfs' do
|
49
|
-
VCR.use_cassette('pdf') do
|
50
|
-
follower = Unwind::RedirectFollower.resolve('http://binged.it/wVSFs5')
|
51
|
-
assert follower.redirected?
|
52
|
-
assert_equal 'https://microsoft.promo.eprize.com/bingtwitter/public/fulfillment/rules.pdf', follower.final_url
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
it 'should handle the lame amazon spaces' do
|
57
|
-
VCR.use_cassette('amazon') do
|
58
|
-
follower = Unwind::RedirectFollower.resolve('http://amzn.to/xrHQWS')
|
59
|
-
assert follower.redirected?
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
#http://amzn.to/xrHQWS
|
64
|
-
|
65
|
-
it 'should handle a https redirect' do
|
66
|
-
VCR.use_cassette('ssl tpope') do
|
67
|
-
follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
|
68
|
-
assert follower.redirected?
|
69
|
-
assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
it 'should not be redirected' do
|
74
|
-
VCR.use_cassette('no redirect') do
|
75
|
-
follower = Unwind::RedirectFollower.resolve('https://flippa.com')
|
76
|
-
assert !follower.redirected?
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
it 'should set the final url as being the canonical url and treat it as s redirect' do
|
81
|
-
VCR.use_cassette('canonical url', :preserve_exact_body_bytes => true) do
|
82
|
-
follower = Unwind::RedirectFollower.resolve('http://www.scottw.com?test=abc')
|
83
|
-
assert follower.redirected?
|
84
|
-
assert 'http://www.scottw.com', follower.final_url
|
85
|
-
assert 'http://www.scottw?test=abc', follower.redirects[0]
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
it 'should raise TooManyRedirects' do
|
90
|
-
VCR.use_cassette('xZVND1') do
|
91
|
-
follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
|
92
|
-
too_many_redirects = lambda {follower.resolve}
|
93
|
-
too_many_redirects.must_raise Unwind::TooManyRedirects
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
it 'should raise MissingRedirectLocation' do
|
98
|
-
VCR.use_cassette('missing redirect') do
|
99
|
-
follower = Unwind::RedirectFollower.new('http://tinyurl.com/6oqzkff')
|
100
|
-
missing_redirect_location = lambda{follower.resolve}
|
101
|
-
missing_redirect_location.must_raise Unwind::MissingRedirectLocation
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
it 'should handle a meta-refresh' do
|
106
|
-
VCR.use_cassette('meta refresh') do
|
107
|
-
follower = Unwind::RedirectFollower.resolve('http://www.nullrefer.com/?www.google.com')
|
108
|
-
assert follower.redirected?
|
109
|
-
assert_equal "www.google.com.au", URI(follower.final_url).host
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
it 'should handle a relative meta-refresh' do
|
114
|
-
VCR.use_cassette('relative meta refresh') do
|
115
|
-
follower = Unwind::RedirectFollower.resolve('http://fb.me/2JYu23acx')
|
116
|
-
assert follower.redirected?
|
117
|
-
assert_equal 'https://www.facebook.com/londonswf/posts/696389650411604?_fb_noscript=1', follower.final_url
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
it 'should handle a relative canonical url' do
|
122
|
-
VCR.use_cassette('relative canonical url') do
|
123
|
-
follower = Unwind::RedirectFollower.resolve('http://youtu.be/hPJ0oLahGDg')
|
124
|
-
assert follower.redirected?
|
125
|
-
assert_equal 'http://www.youtube.com/watch?v=hPJ0oLahGDg', follower.final_url
|
126
|
-
end
|
127
|
-
end
|
10
|
+
describe 'Tests :)' do
|
11
|
+
|
12
|
+
it 'should resolve the url' do
|
13
|
+
VCR.use_cassette('xZVND1') do
|
14
|
+
follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1')
|
15
|
+
follower.resolve
|
16
|
+
assert_equal 'http://ow.ly/i/s1O0', follower.final_url
|
17
|
+
assert_equal 'http://j.mp/xZVND1', follower.original_url
|
18
|
+
assert_equal 2, follower.redirects.count
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should raise TooManyRedirects' do
|
23
|
+
VCR.use_cassette('xZVND1') do
|
24
|
+
follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
|
25
|
+
too_many_redirects = lambda {follower.resolve}
|
26
|
+
too_many_redirects.must_raise Unwind::TooManyRedirects
|
27
|
+
end
|
28
|
+
end
|
128
29
|
|
129
30
|
end
|
data/unwind.gemspec
CHANGED
@@ -7,8 +7,7 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.version = Unwind::VERSION
|
8
8
|
s.authors = ["Scott Watermasysk"]
|
9
9
|
s.email = ["scottwater@gmail.com"]
|
10
|
-
s.homepage = "
|
11
|
-
s.licenses = ['MIT']
|
10
|
+
s.homepage = ""
|
12
11
|
s.summary = %q{Follows a chain redirects.}
|
13
12
|
s.description = <<-description
|
14
13
|
Follows a chain of redirects and reports back on all the steps.
|
@@ -24,9 +23,8 @@ Gem::Specification.new do |s|
|
|
24
23
|
s.require_paths = ["lib"]
|
25
24
|
|
26
25
|
# specify any dependencies here; for example:
|
27
|
-
s.add_development_dependency
|
28
|
-
s.add_development_dependency
|
29
|
-
s.add_development_dependency
|
30
|
-
s.
|
31
|
-
s.add_runtime_dependency 'addressable', '~> 2.2'
|
26
|
+
s.add_development_dependency "rake"
|
27
|
+
s.add_development_dependency "vcr"
|
28
|
+
s.add_development_dependency "fakeweb"
|
29
|
+
# s.add_runtime_dependency "rest-client"
|
32
30
|
end
|