unwind 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/Readme.md +25 -0
- data/example/sample.rb +9 -0
- data/lib/unwind/version.rb +3 -0
- data/lib/unwind.rb +188 -0
- data/test/redirect_follower_test.rb +129 -0
- data/unwind.gemspec +32 -0
- data/vcr_cassettes/amazon.yml +6862 -0
- data/vcr_cassettes/canonical_url.yml +506 -0
- data/vcr_cassettes/meta_refresh.yml +169 -0
- data/vcr_cassettes/missing_redirect.yml +46 -0
- data/vcr_cassettes/no_redirect.yml +859 -0
- data/vcr_cassettes/pdf.yml +140 -0
- data/vcr_cassettes/relative_canonical_url.yml +1210 -0
- data/vcr_cassettes/relative_meta_refresh.yml +1905 -0
- data/vcr_cassettes/relative_stackoverflow.yml +1630 -0
- data/vcr_cassettes/relative_stackoverflow_2.yml +2245 -0
- data/vcr_cassettes/ssl_tpope.yml +1363 -0
- data/vcr_cassettes/with_cookie.yml +1667 -0
- data/vcr_cassettes/xZVND1.yml +361 -0
- metadata +138 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 22070510ea3066c9103a8efe3d75cc4106642a9b
|
4
|
+
data.tar.gz: a55418df179313c6ad2c60dfbf8f8dd4600b4c18
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2b40822f4063259cbfad43246342d55f13fcdf6291f85e3b67decd4186e565b1ed373fd0647b0e995cd83ddf7cec88c4c13aef0959d6600de4e9dccc8f16e2e6
|
7
|
+
data.tar.gz: b6f9a718d3890d9c0570ee4be065727c7479ce9b342161241fec08ec0274241fde6ba7d025aa779c908b66d27f53cd039b7b0451d27a4ff0f0667af1ed137f48
|
data/Gemfile
ADDED
data/Rakefile
ADDED
data/Readme.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# Description
|
2
|
+
|
3
|
+
Enables following a series of redirects (shortened urls)
|
4
|
+
|
5
|
+
# Prerequisites
|
6
|
+
|
7
|
+
Tested on Ruby 1.9.3 & 2.0 (likely still works on 1.8.7, but you are on your own.)
|
8
|
+
|
9
|
+
# Example Code
|
10
|
+
|
11
|
+
require 'unwind'
|
12
|
+
|
13
|
+
follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1')
|
14
|
+
follower.resolve
|
15
|
+
assert_equal 'http://ow.ly/i/s1O0', follower.final_url
|
16
|
+
assert_equal 'http://j.mp/xZVND1', follower.original_url
|
17
|
+
assert_equal 2, follower.redirects.count
|
18
|
+
|
19
|
+
# Hat tip
|
20
|
+
|
21
|
+
Most of the code is based on John Nunemaker's blog post [Following Redirects with Net/HTTP](http://railstips.org/blog/archives/2009/03/04/following-redirects-with-nethttp/).
|
22
|
+
|
23
|
+
# License
|
24
|
+
|
25
|
+
Provided under the Do Whatever You Want With This Code License.
|
data/example/sample.rb
ADDED
data/lib/unwind.rb
ADDED
@@ -0,0 +1,188 @@
|
|
1
|
+
require "unwind/version"
|
2
|
+
require 'net/http'
|
3
|
+
require 'addressable/uri'
|
4
|
+
|
5
|
+
module Unwind
|
6
|
+
|
7
|
+
class TooManyRedirects < StandardError; end
|
8
|
+
class MissingRedirectLocation < StandardError; end
|
9
|
+
|
10
|
+
class RedirectFollower
|
11
|
+
|
12
|
+
attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
|
13
|
+
|
14
|
+
def initialize(original_url, limit=5)
|
15
|
+
@original_url, @redirect_limit = original_url, limit
|
16
|
+
@redirects = []
|
17
|
+
end
|
18
|
+
|
19
|
+
def redirected?
|
20
|
+
!(self.final_url == self.original_url)
|
21
|
+
end
|
22
|
+
|
23
|
+
def resolve(current_url=nil, options={})
|
24
|
+
ok_to_continue?
|
25
|
+
|
26
|
+
current_url ||= self.original_url
|
27
|
+
#adding this header because we really only care about resolving the url
|
28
|
+
headers = (options || {}).merge({"accept-encoding" => "none"})
|
29
|
+
|
30
|
+
url = URI.parse(current_url)
|
31
|
+
|
32
|
+
request = Net::HTTP::Get.new(url)
|
33
|
+
|
34
|
+
headers.each do |header, value|
|
35
|
+
request.add_field(header, value)
|
36
|
+
end
|
37
|
+
|
38
|
+
response = Net::HTTP.start(
|
39
|
+
url.host,
|
40
|
+
url.port,
|
41
|
+
:use_ssl => url.scheme == 'https'
|
42
|
+
) do |http|
|
43
|
+
http.request(request)
|
44
|
+
end
|
45
|
+
|
46
|
+
if is_response_redirect?(response)
|
47
|
+
handle_redirect(redirect_url(response), current_url, response, headers)
|
48
|
+
elsif meta_uri = meta_refresh?(current_url, response)
|
49
|
+
handle_redirect(meta_uri, current_url, response, headers)
|
50
|
+
else
|
51
|
+
handle_final_response(current_url, response)
|
52
|
+
end
|
53
|
+
|
54
|
+
self
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.resolve(original_url, limit=5)
|
58
|
+
new(original_url, limit).resolve
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def record_redirect(url)
|
64
|
+
@redirects << url.to_s
|
65
|
+
@redirect_limit -= 1
|
66
|
+
end
|
67
|
+
|
68
|
+
def is_response_redirect?(response)
|
69
|
+
Net::HTTPRedirection === response
|
70
|
+
end
|
71
|
+
|
72
|
+
def handle_redirect(uri_to_redirect, url, response, headers)
|
73
|
+
record_redirect url
|
74
|
+
resolve(uri_to_redirect.normalize, apply_cookie(response, headers))
|
75
|
+
end
|
76
|
+
|
77
|
+
def handle_final_response(current_url, response)
|
78
|
+
current_url = current_url.dup.to_s
|
79
|
+
if Net::HTTPSuccess === response && canonical = canonical_link?(response)
|
80
|
+
@redirects << current_url
|
81
|
+
if Addressable::URI.parse(canonical).relative?
|
82
|
+
@final_url = make_url_absolute(current_url, Addressable::URI.parse(canonical)).to_s
|
83
|
+
else
|
84
|
+
@final_url = canonical
|
85
|
+
end
|
86
|
+
|
87
|
+
else
|
88
|
+
@final_url = current_url
|
89
|
+
end
|
90
|
+
@response = response
|
91
|
+
end
|
92
|
+
|
93
|
+
def ok_to_continue?
|
94
|
+
raise TooManyRedirects if redirect_limit < 0
|
95
|
+
end
|
96
|
+
|
97
|
+
def redirect_url(response)
|
98
|
+
if response['location'].nil?
|
99
|
+
body_match = (response.body || "").match(/<a href=\"([^>]+)\">/i)
|
100
|
+
raise MissingRedirectLocation unless body_match
|
101
|
+
Addressable::URI.parse(body_match[0])
|
102
|
+
else
|
103
|
+
redirect_uri = Addressable::URI.parse(response['location'])
|
104
|
+
redirect_uri.relative? ? Addressable::URI.parse(response.uri).join(response['location']) : redirect_uri
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def meta_refresh?(current_url, response)
|
109
|
+
if Net::HTTPSuccess === response
|
110
|
+
body_match = response.body.match(/<meta http-equiv=\"refresh\" content=\"0; URL=(.*?)\"\s*\/*>/i)
|
111
|
+
if body_match
|
112
|
+
uri = Addressable::URI.parse(body_match[1])
|
113
|
+
make_url_absolute(current_url, uri)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def canonical_link?(response)
|
119
|
+
body_match = response.body.match(/<link rel=[\'\"]canonical[\'\"] href=[\'\"](.*?)[\'\"]/i)
|
120
|
+
body_match ? Addressable::URI.parse(body_match[1]).to_s : false
|
121
|
+
end
|
122
|
+
|
123
|
+
def apply_cookie(response, headers)
|
124
|
+
if response.code.to_i == 302 && response['set-cookie']
|
125
|
+
headers.merge("cookie" => CookieHash.to_cookie_string(response['set-cookie']))
|
126
|
+
else
|
127
|
+
#todo: should we delete the cookie at this point if it exists?
|
128
|
+
headers
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def make_url_absolute(current_url, relative_url)
|
133
|
+
current_uri = Addressable::URI.parse(current_url)
|
134
|
+
if (relative_url.relative?)
|
135
|
+
url = Addressable::URI.new(
|
136
|
+
:scheme => current_uri.scheme,
|
137
|
+
:user => current_uri.user,
|
138
|
+
:password => current_uri.password,
|
139
|
+
:host => current_uri.host,
|
140
|
+
:port => current_uri.port,
|
141
|
+
:path => relative_url.path,
|
142
|
+
:query => relative_url.query,
|
143
|
+
:fragment => relative_url.fragment)
|
144
|
+
else
|
145
|
+
relative_url
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
#borrowed (stolen) from HTTParty with minor updates
|
152
|
+
#to handle all cookies existing in a single string
|
153
|
+
class CookieHash < Hash
|
154
|
+
|
155
|
+
CLIENT_COOKIES = %w{path expires domain path secure httponly}
|
156
|
+
|
157
|
+
def add_cookies(value)
|
158
|
+
case value
|
159
|
+
when Hash
|
160
|
+
merge!(value)
|
161
|
+
when String
|
162
|
+
value = value.gsub(/expires=[\w,\s\-\:]+;/i, '')
|
163
|
+
value = value.gsub(/httponly[\,\;]*/i, '')
|
164
|
+
value.split(/[;,]\s/).each do |cookie|
|
165
|
+
array = cookie.split('=')
|
166
|
+
self[array[0].strip.to_sym] = array[1]
|
167
|
+
end
|
168
|
+
else
|
169
|
+
raise "add_cookies only takes a Hash or a String"
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def to_cookie_string
|
174
|
+
delete_if { |k, v| CLIENT_COOKIES.include?(k.to_s.downcase) }.collect { |k, v| "#{k}=#{v}" }.join("; ")
|
175
|
+
end
|
176
|
+
|
177
|
+
def self.to_cookie_string(*cookie_strings)
|
178
|
+
h = CookieHash.new
|
179
|
+
cookie_strings.each do |cs|
|
180
|
+
h.add_cookies(cs)
|
181
|
+
end
|
182
|
+
|
183
|
+
h.to_cookie_string
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require 'minitest'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'vcr'
|
4
|
+
require './lib/unwind'
|
5
|
+
|
6
|
+
VCR.configure do |c|
|
7
|
+
c.hook_into :webmock
|
8
|
+
c.cassette_library_dir = 'vcr_cassettes'
|
9
|
+
end
|
10
|
+
|
11
|
+
describe Unwind::RedirectFollower do
|
12
|
+
|
13
|
+
# needs to be regenerated to properly test...need to stop that :(
|
14
|
+
it 'should handle url with cookie requirement' do
|
15
|
+
VCR.use_cassette('with cookie') do
|
16
|
+
follower = Unwind::RedirectFollower.resolve('http://ow.ly/1hf37P')
|
17
|
+
assert_equal 200, follower.response.code.to_i
|
18
|
+
assert follower.redirected?
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should resolve the url' do
|
23
|
+
VCR.use_cassette('xZVND1') do
|
24
|
+
follower = Unwind::RedirectFollower.resolve('http://j.mp/xZVND1')
|
25
|
+
assert_equal 'http://ow.ly/i/s1O0', follower.final_url
|
26
|
+
assert_equal 'http://j.mp/xZVND1', follower.original_url
|
27
|
+
assert_equal 2, follower.redirects.count
|
28
|
+
assert follower.redirected?
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should handle relative redirects' do
|
33
|
+
VCR.use_cassette('relative stackoverflow') do
|
34
|
+
follower = Unwind::RedirectFollower.resolve('http://stackoverflow.com/q/9277007/871617?stw=1')
|
35
|
+
assert follower.redirected?
|
36
|
+
assert_equal 'http://stackoverflow.com/questions/9277007/gitlabhq-w-denied-for-rails', follower.final_url
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should still handine relative redirects' do
|
41
|
+
# http://bit.ly/A4H3a2
|
42
|
+
VCR.use_cassette('relative stackoverflow 2') do
|
43
|
+
follower = Unwind::RedirectFollower.resolve('http://bit.ly/A4H3a2')
|
44
|
+
assert follower.redirected?
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'should handle redirects to pdfs' do
|
49
|
+
VCR.use_cassette('pdf') do
|
50
|
+
follower = Unwind::RedirectFollower.resolve('http://binged.it/wVSFs5')
|
51
|
+
assert follower.redirected?
|
52
|
+
assert_equal 'https://microsoft.promo.eprize.com/bingtwitter/public/fulfillment/rules.pdf', follower.final_url
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'should handle the lame amazon spaces' do
|
57
|
+
VCR.use_cassette('amazon') do
|
58
|
+
follower = Unwind::RedirectFollower.resolve('http://amzn.to/xrHQWS')
|
59
|
+
assert follower.redirected?
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
#http://amzn.to/xrHQWS
|
64
|
+
|
65
|
+
it 'should handle a https redirect' do
|
66
|
+
VCR.use_cassette('ssl tpope') do
|
67
|
+
follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
|
68
|
+
assert follower.redirected?
|
69
|
+
assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'should not be redirected' do
|
74
|
+
VCR.use_cassette('no redirect') do
|
75
|
+
follower = Unwind::RedirectFollower.resolve('https://flippa.com')
|
76
|
+
assert !follower.redirected?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'should set the final url as being the canonical url and treat it as s redirect' do
|
81
|
+
VCR.use_cassette('canonical url', :preserve_exact_body_bytes => true) do
|
82
|
+
follower = Unwind::RedirectFollower.resolve('http://www.scottw.com?test=abc')
|
83
|
+
assert follower.redirected?
|
84
|
+
assert 'http://www.scottw.com', follower.final_url
|
85
|
+
assert 'http://www.scottw?test=abc', follower.redirects[0]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'should raise TooManyRedirects' do
|
90
|
+
VCR.use_cassette('xZVND1') do
|
91
|
+
follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
|
92
|
+
too_many_redirects = lambda {follower.resolve}
|
93
|
+
too_many_redirects.must_raise Unwind::TooManyRedirects
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'should raise MissingRedirectLocation' do
|
98
|
+
VCR.use_cassette('missing redirect') do
|
99
|
+
follower = Unwind::RedirectFollower.new('http://tinyurl.com/6oqzkff')
|
100
|
+
missing_redirect_location = lambda{follower.resolve}
|
101
|
+
missing_redirect_location.must_raise Unwind::MissingRedirectLocation
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'should handle a meta-refresh' do
|
106
|
+
VCR.use_cassette('meta refresh') do
|
107
|
+
follower = Unwind::RedirectFollower.resolve('http://www.nullrefer.com/?www.google.com')
|
108
|
+
assert follower.redirected?
|
109
|
+
assert_equal "www.google.com.au", URI(follower.final_url).host
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'should handle a relative meta-refresh' do
|
114
|
+
VCR.use_cassette('relative meta refresh') do
|
115
|
+
follower = Unwind::RedirectFollower.resolve('http://fb.me/2JYu23acx')
|
116
|
+
assert follower.redirected?
|
117
|
+
assert_equal 'https://www.facebook.com/londonswf/posts/696389650411604?_fb_noscript=1', follower.final_url
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
it 'should handle a relative canonical url' do
|
122
|
+
VCR.use_cassette('relative canonical url') do
|
123
|
+
follower = Unwind::RedirectFollower.resolve('http://youtu.be/hPJ0oLahGDg')
|
124
|
+
assert follower.redirected?
|
125
|
+
assert_equal 'http://www.youtube.com/watch?v=hPJ0oLahGDg', follower.final_url
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
data/unwind.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "unwind/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "unwind"
|
7
|
+
s.version = Unwind::VERSION
|
8
|
+
s.authors = ["Scott Watermasysk"]
|
9
|
+
s.email = ["scottwater@gmail.com"]
|
10
|
+
s.homepage = "http://www.scottw.com/unwind"
|
11
|
+
s.licenses = ['MIT']
|
12
|
+
s.summary = %q{Follows a chain redirects.}
|
13
|
+
s.description = <<-description
|
14
|
+
Follows a chain of redirects and reports back on all the steps.
|
15
|
+
Heavily inspired by John Nunemaker's blog post.
|
16
|
+
http://railstips.org/blog/archives/2009/03/04/following-redirects-with-nethttp/
|
17
|
+
description
|
18
|
+
|
19
|
+
s.rubyforge_project = "unwind"
|
20
|
+
|
21
|
+
s.files = `git ls-files`.split("\n")
|
22
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
23
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
24
|
+
s.require_paths = ["lib"]
|
25
|
+
|
26
|
+
# specify any dependencies here; for example:
|
27
|
+
s.add_development_dependency 'rake', '~> 10.1'
|
28
|
+
s.add_development_dependency 'minitest', '~> 5.2'
|
29
|
+
s.add_development_dependency 'vcr', '~> 2.8'
|
30
|
+
s.add_development_dependency 'webmock', '~> 1.17'
|
31
|
+
s.add_runtime_dependency 'addressable', '~> 2.2'
|
32
|
+
end
|