unwind 0.2.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +0 -1
- data/Readme.md +2 -2
- data/lib/unwind/version.rb +1 -1
- data/lib/unwind.rb +34 -169
- data/test/redirect_follower_test.rb +22 -121
- data/unwind.gemspec +5 -7
- data/vcr_cassettes/xZVND1.yml +283 -337
- metadata +30 -77
- checksums.yaml +0 -7
- data/vcr_cassettes/amazon.yml +0 -6862
- data/vcr_cassettes/canonical_url.yml +0 -506
- data/vcr_cassettes/meta_refresh.yml +0 -169
- data/vcr_cassettes/missing_redirect.yml +0 -46
- data/vcr_cassettes/no_redirect.yml +0 -859
- data/vcr_cassettes/pdf.yml +0 -140
- data/vcr_cassettes/relative_canonical_url.yml +0 -1210
- data/vcr_cassettes/relative_meta_refresh.yml +0 -1905
- data/vcr_cassettes/relative_stackoverflow.yml +0 -1630
- data/vcr_cassettes/relative_stackoverflow_2.yml +0 -2245
- data/vcr_cassettes/ssl_tpope.yml +0 -1363
- data/vcr_cassettes/with_cookie.yml +0 -1667
data/.gitignore
CHANGED
data/Readme.md
CHANGED
@@ -4,7 +4,7 @@ Enables following a series of redirects (shortened urls)
|
|
4
4
|
|
5
5
|
# Prerequisites
|
6
6
|
|
7
|
-
|
7
|
+
Only tested on Ruby 1.9.3
|
8
8
|
|
9
9
|
# Example Code
|
10
10
|
|
@@ -22,4 +22,4 @@ Most of the code is based on John Nunemaker's blog post [Following Redirects wit
|
|
22
22
|
|
23
23
|
# License
|
24
24
|
|
25
|
-
Provided under the Do Whatever You Want With This Code License.
|
25
|
+
Provided under the Do Whatever You Want With This Code License.
|
data/lib/unwind/version.rb
CHANGED
data/lib/unwind.rb
CHANGED
@@ -1,188 +1,53 @@
|
|
1
1
|
require "unwind/version"
|
2
2
|
require 'net/http'
|
3
|
-
require 'addressable/uri'
|
4
3
|
|
5
4
|
module Unwind
|
6
5
|
|
7
|
-
|
8
|
-
class MissingRedirectLocation < StandardError; end
|
6
|
+
class TooManyRedirects < StandardError; end
|
9
7
|
|
10
|
-
|
8
|
+
class RedirectFollower
|
11
9
|
|
12
|
-
|
10
|
+
attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
|
13
11
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
def initialize(original_url, limit=5)
|
13
|
+
@original_url, @redirect_limit = original_url, limit
|
14
|
+
@redirects = []
|
15
|
+
end
|
18
16
|
|
19
|
-
|
20
|
-
!(self.final_url == self.original_url)
|
21
|
-
end
|
17
|
+
def resolve(current_url=nil)
|
22
18
|
|
23
|
-
|
24
|
-
ok_to_continue?
|
19
|
+
ok_to_continue?
|
25
20
|
|
26
|
-
|
27
|
-
#adding this header because we really only care about resolving the url
|
28
|
-
headers = (options || {}).merge({"accept-encoding" => "none"})
|
21
|
+
current_url ||= self.original_url
|
29
22
|
|
30
|
-
|
23
|
+
response = Net::HTTP.get_response(URI.parse(current_url))
|
31
24
|
|
32
|
-
|
25
|
+
if response.kind_of?(Net::HTTPRedirection)
|
26
|
+
@redirects << current_url
|
27
|
+
@redirect_limit -= 1
|
28
|
+
resolve(redirect_url(response))
|
29
|
+
else
|
30
|
+
@final_url = current_url
|
31
|
+
@response = response
|
32
|
+
self
|
33
|
+
end
|
34
|
+
end
|
33
35
|
|
34
|
-
|
35
|
-
request.add_field(header, value)
|
36
|
-
end
|
36
|
+
private
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
:use_ssl => url.scheme == 'https'
|
42
|
-
) do |http|
|
43
|
-
http.request(request)
|
44
|
-
end
|
38
|
+
def ok_to_continue?
|
39
|
+
raise TooManyRedirects if redirect_limit < 0
|
40
|
+
end
|
45
41
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
self
|
55
|
-
end
|
56
|
-
|
57
|
-
def self.resolve(original_url, limit=5)
|
58
|
-
new(original_url, limit).resolve
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
def record_redirect(url)
|
64
|
-
@redirects << url.to_s
|
65
|
-
@redirect_limit -= 1
|
66
|
-
end
|
67
|
-
|
68
|
-
def is_response_redirect?(response)
|
69
|
-
Net::HTTPRedirection === response
|
70
|
-
end
|
71
|
-
|
72
|
-
def handle_redirect(uri_to_redirect, url, response, headers)
|
73
|
-
record_redirect url
|
74
|
-
resolve(uri_to_redirect.normalize, apply_cookie(response, headers))
|
75
|
-
end
|
76
|
-
|
77
|
-
def handle_final_response(current_url, response)
|
78
|
-
current_url = current_url.dup.to_s
|
79
|
-
if Net::HTTPSuccess === response && canonical = canonical_link?(response)
|
80
|
-
@redirects << current_url
|
81
|
-
if Addressable::URI.parse(canonical).relative?
|
82
|
-
@final_url = make_url_absolute(current_url, Addressable::URI.parse(canonical)).to_s
|
83
|
-
else
|
84
|
-
@final_url = canonical
|
85
|
-
end
|
86
|
-
|
87
|
-
else
|
88
|
-
@final_url = current_url
|
89
|
-
end
|
90
|
-
@response = response
|
91
|
-
end
|
92
|
-
|
93
|
-
def ok_to_continue?
|
94
|
-
raise TooManyRedirects if redirect_limit < 0
|
95
|
-
end
|
96
|
-
|
97
|
-
def redirect_url(response)
|
98
|
-
if response['location'].nil?
|
99
|
-
body_match = (response.body || "").match(/<a href=\"([^>]+)\">/i)
|
100
|
-
raise MissingRedirectLocation unless body_match
|
101
|
-
Addressable::URI.parse(body_match[0])
|
102
|
-
else
|
103
|
-
redirect_uri = Addressable::URI.parse(response['location'])
|
104
|
-
redirect_uri.relative? ? Addressable::URI.parse(response.uri).join(response['location']) : redirect_uri
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def meta_refresh?(current_url, response)
|
109
|
-
if Net::HTTPSuccess === response
|
110
|
-
body_match = response.body.match(/<meta http-equiv=\"refresh\" content=\"0; URL=(.*?)\"\s*\/*>/i)
|
111
|
-
if body_match
|
112
|
-
uri = Addressable::URI.parse(body_match[1])
|
113
|
-
make_url_absolute(current_url, uri)
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
def canonical_link?(response)
|
119
|
-
body_match = response.body.match(/<link rel=[\'\"]canonical[\'\"] href=[\'\"](.*?)[\'\"]/i)
|
120
|
-
body_match ? Addressable::URI.parse(body_match[1]).to_s : false
|
121
|
-
end
|
122
|
-
|
123
|
-
def apply_cookie(response, headers)
|
124
|
-
if response.code.to_i == 302 && response['set-cookie']
|
125
|
-
headers.merge("cookie" => CookieHash.to_cookie_string(response['set-cookie']))
|
126
|
-
else
|
127
|
-
#todo: should we delete the cookie at this point if it exists?
|
128
|
-
headers
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
def make_url_absolute(current_url, relative_url)
|
133
|
-
current_uri = Addressable::URI.parse(current_url)
|
134
|
-
if (relative_url.relative?)
|
135
|
-
url = Addressable::URI.new(
|
136
|
-
:scheme => current_uri.scheme,
|
137
|
-
:user => current_uri.user,
|
138
|
-
:password => current_uri.password,
|
139
|
-
:host => current_uri.host,
|
140
|
-
:port => current_uri.port,
|
141
|
-
:path => relative_url.path,
|
142
|
-
:query => relative_url.query,
|
143
|
-
:fragment => relative_url.fragment)
|
144
|
-
else
|
145
|
-
relative_url
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
end
|
150
|
-
|
151
|
-
#borrowed (stolen) from HTTParty with minor updates
|
152
|
-
#to handle all cookies existing in a single string
|
153
|
-
class CookieHash < Hash
|
154
|
-
|
155
|
-
CLIENT_COOKIES = %w{path expires domain path secure httponly}
|
156
|
-
|
157
|
-
def add_cookies(value)
|
158
|
-
case value
|
159
|
-
when Hash
|
160
|
-
merge!(value)
|
161
|
-
when String
|
162
|
-
value = value.gsub(/expires=[\w,\s\-\:]+;/i, '')
|
163
|
-
value = value.gsub(/httponly[\,\;]*/i, '')
|
164
|
-
value.split(/[;,]\s/).each do |cookie|
|
165
|
-
array = cookie.split('=')
|
166
|
-
self[array[0].strip.to_sym] = array[1]
|
167
|
-
end
|
168
|
-
else
|
169
|
-
raise "add_cookies only takes a Hash or a String"
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
def to_cookie_string
|
174
|
-
delete_if { |k, v| CLIENT_COOKIES.include?(k.to_s.downcase) }.collect { |k, v| "#{k}=#{v}" }.join("; ")
|
175
|
-
end
|
176
|
-
|
177
|
-
def self.to_cookie_string(*cookie_strings)
|
178
|
-
h = CookieHash.new
|
179
|
-
cookie_strings.each do |cs|
|
180
|
-
h.add_cookies(cs)
|
181
|
-
end
|
182
|
-
|
183
|
-
h.to_cookie_string
|
184
|
-
end
|
185
|
-
end
|
42
|
+
def redirect_url(response)
|
43
|
+
if response['location'].nil?
|
44
|
+
response.body.match(/<a href=\"([^>]+)\">/i)[1]
|
45
|
+
else
|
46
|
+
response['location']
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
186
50
|
|
51
|
+
end
|
187
52
|
|
188
53
|
end
|
@@ -1,129 +1,30 @@
|
|
1
|
-
require 'minitest'
|
2
1
|
require 'minitest/autorun'
|
3
2
|
require 'vcr'
|
4
3
|
require './lib/unwind'
|
5
4
|
|
6
|
-
VCR.
|
7
|
-
|
8
|
-
|
5
|
+
VCR.config do |c|
|
6
|
+
c.stub_with :fakeweb
|
7
|
+
c.cassette_library_dir = 'vcr_cassettes'
|
9
8
|
end
|
10
9
|
|
11
|
-
describe
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'should handle relative redirects' do
|
33
|
-
VCR.use_cassette('relative stackoverflow') do
|
34
|
-
follower = Unwind::RedirectFollower.resolve('http://stackoverflow.com/q/9277007/871617?stw=1')
|
35
|
-
assert follower.redirected?
|
36
|
-
assert_equal 'http://stackoverflow.com/questions/9277007/gitlabhq-w-denied-for-rails', follower.final_url
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
it 'should still handine relative redirects' do
|
41
|
-
# http://bit.ly/A4H3a2
|
42
|
-
VCR.use_cassette('relative stackoverflow 2') do
|
43
|
-
follower = Unwind::RedirectFollower.resolve('http://bit.ly/A4H3a2')
|
44
|
-
assert follower.redirected?
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
it 'should handle redirects to pdfs' do
|
49
|
-
VCR.use_cassette('pdf') do
|
50
|
-
follower = Unwind::RedirectFollower.resolve('http://binged.it/wVSFs5')
|
51
|
-
assert follower.redirected?
|
52
|
-
assert_equal 'https://microsoft.promo.eprize.com/bingtwitter/public/fulfillment/rules.pdf', follower.final_url
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
it 'should handle the lame amazon spaces' do
|
57
|
-
VCR.use_cassette('amazon') do
|
58
|
-
follower = Unwind::RedirectFollower.resolve('http://amzn.to/xrHQWS')
|
59
|
-
assert follower.redirected?
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
#http://amzn.to/xrHQWS
|
64
|
-
|
65
|
-
it 'should handle a https redirect' do
|
66
|
-
VCR.use_cassette('ssl tpope') do
|
67
|
-
follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
|
68
|
-
assert follower.redirected?
|
69
|
-
assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
it 'should not be redirected' do
|
74
|
-
VCR.use_cassette('no redirect') do
|
75
|
-
follower = Unwind::RedirectFollower.resolve('https://flippa.com')
|
76
|
-
assert !follower.redirected?
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
it 'should set the final url as being the canonical url and treat it as s redirect' do
|
81
|
-
VCR.use_cassette('canonical url', :preserve_exact_body_bytes => true) do
|
82
|
-
follower = Unwind::RedirectFollower.resolve('http://www.scottw.com?test=abc')
|
83
|
-
assert follower.redirected?
|
84
|
-
assert 'http://www.scottw.com', follower.final_url
|
85
|
-
assert 'http://www.scottw?test=abc', follower.redirects[0]
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
it 'should raise TooManyRedirects' do
|
90
|
-
VCR.use_cassette('xZVND1') do
|
91
|
-
follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
|
92
|
-
too_many_redirects = lambda {follower.resolve}
|
93
|
-
too_many_redirects.must_raise Unwind::TooManyRedirects
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
it 'should raise MissingRedirectLocation' do
|
98
|
-
VCR.use_cassette('missing redirect') do
|
99
|
-
follower = Unwind::RedirectFollower.new('http://tinyurl.com/6oqzkff')
|
100
|
-
missing_redirect_location = lambda{follower.resolve}
|
101
|
-
missing_redirect_location.must_raise Unwind::MissingRedirectLocation
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
it 'should handle a meta-refresh' do
|
106
|
-
VCR.use_cassette('meta refresh') do
|
107
|
-
follower = Unwind::RedirectFollower.resolve('http://www.nullrefer.com/?www.google.com')
|
108
|
-
assert follower.redirected?
|
109
|
-
assert_equal "www.google.com.au", URI(follower.final_url).host
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
it 'should handle a relative meta-refresh' do
|
114
|
-
VCR.use_cassette('relative meta refresh') do
|
115
|
-
follower = Unwind::RedirectFollower.resolve('http://fb.me/2JYu23acx')
|
116
|
-
assert follower.redirected?
|
117
|
-
assert_equal 'https://www.facebook.com/londonswf/posts/696389650411604?_fb_noscript=1', follower.final_url
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
it 'should handle a relative canonical url' do
|
122
|
-
VCR.use_cassette('relative canonical url') do
|
123
|
-
follower = Unwind::RedirectFollower.resolve('http://youtu.be/hPJ0oLahGDg')
|
124
|
-
assert follower.redirected?
|
125
|
-
assert_equal 'http://www.youtube.com/watch?v=hPJ0oLahGDg', follower.final_url
|
126
|
-
end
|
127
|
-
end
|
10
|
+
describe 'Tests :)' do
|
11
|
+
|
12
|
+
it 'should resolve the url' do
|
13
|
+
VCR.use_cassette('xZVND1') do
|
14
|
+
follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1')
|
15
|
+
follower.resolve
|
16
|
+
assert_equal 'http://ow.ly/i/s1O0', follower.final_url
|
17
|
+
assert_equal 'http://j.mp/xZVND1', follower.original_url
|
18
|
+
assert_equal 2, follower.redirects.count
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should raise TooManyRedirects' do
|
23
|
+
VCR.use_cassette('xZVND1') do
|
24
|
+
follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
|
25
|
+
too_many_redirects = lambda {follower.resolve}
|
26
|
+
too_many_redirects.must_raise Unwind::TooManyRedirects
|
27
|
+
end
|
28
|
+
end
|
128
29
|
|
129
30
|
end
|
data/unwind.gemspec
CHANGED
@@ -7,8 +7,7 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.version = Unwind::VERSION
|
8
8
|
s.authors = ["Scott Watermasysk"]
|
9
9
|
s.email = ["scottwater@gmail.com"]
|
10
|
-
s.homepage = "
|
11
|
-
s.licenses = ['MIT']
|
10
|
+
s.homepage = ""
|
12
11
|
s.summary = %q{Follows a chain redirects.}
|
13
12
|
s.description = <<-description
|
14
13
|
Follows a chain of redirects and reports back on all the steps.
|
@@ -24,9 +23,8 @@ Gem::Specification.new do |s|
|
|
24
23
|
s.require_paths = ["lib"]
|
25
24
|
|
26
25
|
# specify any dependencies here; for example:
|
27
|
-
s.add_development_dependency
|
28
|
-
s.add_development_dependency
|
29
|
-
s.add_development_dependency
|
30
|
-
s.
|
31
|
-
s.add_runtime_dependency 'addressable', '~> 2.2'
|
26
|
+
s.add_development_dependency "rake"
|
27
|
+
s.add_development_dependency "vcr"
|
28
|
+
s.add_development_dependency "fakeweb"
|
29
|
+
# s.add_runtime_dependency "rest-client"
|
32
30
|
end
|