unwind 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 22070510ea3066c9103a8efe3d75cc4106642a9b
4
+ data.tar.gz: a55418df179313c6ad2c60dfbf8f8dd4600b4c18
5
+ SHA512:
6
+ metadata.gz: 2b40822f4063259cbfad43246342d55f13fcdf6291f85e3b67decd4186e565b1ed373fd0647b0e995cd83ddf7cec88c4c13aef0959d6600de4e9dccc8f16e2e6
7
+ data.tar.gz: b6f9a718d3890d9c0570ee4be065727c7479ce9b342161241fec08ec0274241fde6ba7d025aa779c908b66d27f53cd039b7b0451d27a4ff0f0667af1ed137f48
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in unwind.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.pattern = "test/*_test.rb"
6
+ end
7
+
8
+ task :default => :test
data/Readme.md ADDED
@@ -0,0 +1,25 @@
1
+ # Description
2
+
3
+ Enables following a series of redirects (shortened urls)
4
+
5
+ # Prerequisites
6
+
7
+ Tested on Ruby 1.9.3 & 2.0 (likely still works on 1.8.7, but you are on your own.)
8
+
9
+ # Example Code
10
+
11
+ require 'unwind'
12
+
13
+ follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1')
14
+ follower.resolve
15
+ assert_equal 'http://ow.ly/i/s1O0', follower.final_url
16
+ assert_equal 'http://j.mp/xZVND1', follower.original_url
17
+ assert_equal 2, follower.redirects.count
18
+
19
+ # Hat tip
20
+
21
+ Most of the code is based on John Nunemaker's blog post [Following Redirects with Net/HTTP](http://railstips.org/blog/archives/2009/03/04/following-redirects-with-nethttp/).
22
+
23
+ # License
24
+
25
+ Provided under the Do Whatever You Want With This Code License.
data/example/sample.rb ADDED
@@ -0,0 +1,9 @@
1
+ require './lib/unwind'
2
+
3
+ follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1')
4
+ follower.resolve
5
+ follower.redirects.each {|url| puts "redirects to #{url}"}
6
+ puts follower.original_url
7
+ puts follower.final_url
8
+
9
+
@@ -0,0 +1,3 @@
1
+ module Unwind
2
+ VERSION = "0.2.1"
3
+ end
data/lib/unwind.rb ADDED
@@ -0,0 +1,188 @@
1
+ require "unwind/version"
2
+ require 'net/http'
3
+ require 'addressable/uri'
4
+
5
+ module Unwind
6
+
7
+ class TooManyRedirects < StandardError; end
8
+ class MissingRedirectLocation < StandardError; end
9
+
10
+ class RedirectFollower
11
+
12
+ attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
13
+
14
+ def initialize(original_url, limit=5)
15
+ @original_url, @redirect_limit = original_url, limit
16
+ @redirects = []
17
+ end
18
+
19
+ def redirected?
20
+ !(self.final_url == self.original_url)
21
+ end
22
+
23
+ def resolve(current_url=nil, options={})
24
+ ok_to_continue?
25
+
26
+ current_url ||= self.original_url
27
+ #adding this header because we really only care about resolving the url
28
+ headers = (options || {}).merge({"accept-encoding" => "none"})
29
+
30
+ url = URI.parse(current_url)
31
+
32
+ request = Net::HTTP::Get.new(url)
33
+
34
+ headers.each do |header, value|
35
+ request.add_field(header, value)
36
+ end
37
+
38
+ response = Net::HTTP.start(
39
+ url.host,
40
+ url.port,
41
+ :use_ssl => url.scheme == 'https'
42
+ ) do |http|
43
+ http.request(request)
44
+ end
45
+
46
+ if is_response_redirect?(response)
47
+ handle_redirect(redirect_url(response), current_url, response, headers)
48
+ elsif meta_uri = meta_refresh?(current_url, response)
49
+ handle_redirect(meta_uri, current_url, response, headers)
50
+ else
51
+ handle_final_response(current_url, response)
52
+ end
53
+
54
+ self
55
+ end
56
+
57
+ def self.resolve(original_url, limit=5)
58
+ new(original_url, limit).resolve
59
+ end
60
+
61
+ private
62
+
63
+ def record_redirect(url)
64
+ @redirects << url.to_s
65
+ @redirect_limit -= 1
66
+ end
67
+
68
+ def is_response_redirect?(response)
69
+ Net::HTTPRedirection === response
70
+ end
71
+
72
+ def handle_redirect(uri_to_redirect, url, response, headers)
73
+ record_redirect url
74
+ resolve(uri_to_redirect.normalize, apply_cookie(response, headers))
75
+ end
76
+
77
+ def handle_final_response(current_url, response)
78
+ current_url = current_url.dup.to_s
79
+ if Net::HTTPSuccess === response && canonical = canonical_link?(response)
80
+ @redirects << current_url
81
+ if Addressable::URI.parse(canonical).relative?
82
+ @final_url = make_url_absolute(current_url, Addressable::URI.parse(canonical)).to_s
83
+ else
84
+ @final_url = canonical
85
+ end
86
+
87
+ else
88
+ @final_url = current_url
89
+ end
90
+ @response = response
91
+ end
92
+
93
+ def ok_to_continue?
94
+ raise TooManyRedirects if redirect_limit < 0
95
+ end
96
+
97
+ def redirect_url(response)
98
+ if response['location'].nil?
99
+ body_match = (response.body || "").match(/<a href=\"([^>]+)\">/i)
100
+ raise MissingRedirectLocation unless body_match
101
+ Addressable::URI.parse(body_match[0])
102
+ else
103
+ redirect_uri = Addressable::URI.parse(response['location'])
104
+ redirect_uri.relative? ? Addressable::URI.parse(response.uri).join(response['location']) : redirect_uri
105
+ end
106
+ end
107
+
108
+ def meta_refresh?(current_url, response)
109
+ if Net::HTTPSuccess === response
110
+ body_match = response.body.match(/<meta http-equiv=\"refresh\" content=\"0; URL=(.*?)\"\s*\/*>/i)
111
+ if body_match
112
+ uri = Addressable::URI.parse(body_match[1])
113
+ make_url_absolute(current_url, uri)
114
+ end
115
+ end
116
+ end
117
+
118
+ def canonical_link?(response)
119
+ body_match = response.body.match(/<link rel=[\'\"]canonical[\'\"] href=[\'\"](.*?)[\'\"]/i)
120
+ body_match ? Addressable::URI.parse(body_match[1]).to_s : false
121
+ end
122
+
123
+ def apply_cookie(response, headers)
124
+ if response.code.to_i == 302 && response['set-cookie']
125
+ headers.merge("cookie" => CookieHash.to_cookie_string(response['set-cookie']))
126
+ else
127
+ #todo: should we delete the cookie at this point if it exists?
128
+ headers
129
+ end
130
+ end
131
+
132
+ def make_url_absolute(current_url, relative_url)
133
+ current_uri = Addressable::URI.parse(current_url)
134
+ if (relative_url.relative?)
135
+ url = Addressable::URI.new(
136
+ :scheme => current_uri.scheme,
137
+ :user => current_uri.user,
138
+ :password => current_uri.password,
139
+ :host => current_uri.host,
140
+ :port => current_uri.port,
141
+ :path => relative_url.path,
142
+ :query => relative_url.query,
143
+ :fragment => relative_url.fragment)
144
+ else
145
+ relative_url
146
+ end
147
+ end
148
+
149
+ end
150
+
151
+ #borrowed (stolen) from HTTParty with minor updates
152
+ #to handle all cookies existing in a single string
153
+ class CookieHash < Hash
154
+
155
+ CLIENT_COOKIES = %w{path expires domain path secure httponly}
156
+
157
+ def add_cookies(value)
158
+ case value
159
+ when Hash
160
+ merge!(value)
161
+ when String
162
+ value = value.gsub(/expires=[\w,\s\-\:]+;/i, '')
163
+ value = value.gsub(/httponly[\,\;]*/i, '')
164
+ value.split(/[;,]\s/).each do |cookie|
165
+ array = cookie.split('=')
166
+ self[array[0].strip.to_sym] = array[1]
167
+ end
168
+ else
169
+ raise "add_cookies only takes a Hash or a String"
170
+ end
171
+ end
172
+
173
+ def to_cookie_string
174
+ delete_if { |k, v| CLIENT_COOKIES.include?(k.to_s.downcase) }.collect { |k, v| "#{k}=#{v}" }.join("; ")
175
+ end
176
+
177
+ def self.to_cookie_string(*cookie_strings)
178
+ h = CookieHash.new
179
+ cookie_strings.each do |cs|
180
+ h.add_cookies(cs)
181
+ end
182
+
183
+ h.to_cookie_string
184
+ end
185
+ end
186
+
187
+
188
+ end
@@ -0,0 +1,129 @@
1
+ require 'minitest'
2
+ require 'minitest/autorun'
3
+ require 'vcr'
4
+ require './lib/unwind'
5
+
6
+ VCR.configure do |c|
7
+ c.hook_into :webmock
8
+ c.cassette_library_dir = 'vcr_cassettes'
9
+ end
10
+
11
+ describe Unwind::RedirectFollower do
12
+
13
+ # needs to be regenerated to properly test...need to stop that :(
14
+ it 'should handle url with cookie requirement' do
15
+ VCR.use_cassette('with cookie') do
16
+ follower = Unwind::RedirectFollower.resolve('http://ow.ly/1hf37P')
17
+ assert_equal 200, follower.response.code.to_i
18
+ assert follower.redirected?
19
+ end
20
+ end
21
+
22
+ it 'should resolve the url' do
23
+ VCR.use_cassette('xZVND1') do
24
+ follower = Unwind::RedirectFollower.resolve('http://j.mp/xZVND1')
25
+ assert_equal 'http://ow.ly/i/s1O0', follower.final_url
26
+ assert_equal 'http://j.mp/xZVND1', follower.original_url
27
+ assert_equal 2, follower.redirects.count
28
+ assert follower.redirected?
29
+ end
30
+ end
31
+
32
+ it 'should handle relative redirects' do
33
+ VCR.use_cassette('relative stackoverflow') do
34
+ follower = Unwind::RedirectFollower.resolve('http://stackoverflow.com/q/9277007/871617?stw=1')
35
+ assert follower.redirected?
36
+ assert_equal 'http://stackoverflow.com/questions/9277007/gitlabhq-w-denied-for-rails', follower.final_url
37
+ end
38
+ end
39
+
40
+ it 'should still handine relative redirects' do
41
+ # http://bit.ly/A4H3a2
42
+ VCR.use_cassette('relative stackoverflow 2') do
43
+ follower = Unwind::RedirectFollower.resolve('http://bit.ly/A4H3a2')
44
+ assert follower.redirected?
45
+ end
46
+ end
47
+
48
+ it 'should handle redirects to pdfs' do
49
+ VCR.use_cassette('pdf') do
50
+ follower = Unwind::RedirectFollower.resolve('http://binged.it/wVSFs5')
51
+ assert follower.redirected?
52
+ assert_equal 'https://microsoft.promo.eprize.com/bingtwitter/public/fulfillment/rules.pdf', follower.final_url
53
+ end
54
+ end
55
+
56
+ it 'should handle the lame amazon spaces' do
57
+ VCR.use_cassette('amazon') do
58
+ follower = Unwind::RedirectFollower.resolve('http://amzn.to/xrHQWS')
59
+ assert follower.redirected?
60
+ end
61
+ end
62
+
63
+ #http://amzn.to/xrHQWS
64
+
65
+ it 'should handle a https redirect' do
66
+ VCR.use_cassette('ssl tpope') do
67
+ follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
68
+ assert follower.redirected?
69
+ assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
70
+ end
71
+ end
72
+
73
+ it 'should not be redirected' do
74
+ VCR.use_cassette('no redirect') do
75
+ follower = Unwind::RedirectFollower.resolve('https://flippa.com')
76
+ assert !follower.redirected?
77
+ end
78
+ end
79
+
80
+ it 'should set the final url as being the canonical url and treat it as s redirect' do
81
+ VCR.use_cassette('canonical url', :preserve_exact_body_bytes => true) do
82
+ follower = Unwind::RedirectFollower.resolve('http://www.scottw.com?test=abc')
83
+ assert follower.redirected?
84
+ assert 'http://www.scottw.com', follower.final_url
85
+ assert 'http://www.scottw?test=abc', follower.redirects[0]
86
+ end
87
+ end
88
+
89
+ it 'should raise TooManyRedirects' do
90
+ VCR.use_cassette('xZVND1') do
91
+ follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
92
+ too_many_redirects = lambda {follower.resolve}
93
+ too_many_redirects.must_raise Unwind::TooManyRedirects
94
+ end
95
+ end
96
+
97
+ it 'should raise MissingRedirectLocation' do
98
+ VCR.use_cassette('missing redirect') do
99
+ follower = Unwind::RedirectFollower.new('http://tinyurl.com/6oqzkff')
100
+ missing_redirect_location = lambda{follower.resolve}
101
+ missing_redirect_location.must_raise Unwind::MissingRedirectLocation
102
+ end
103
+ end
104
+
105
+ it 'should handle a meta-refresh' do
106
+ VCR.use_cassette('meta refresh') do
107
+ follower = Unwind::RedirectFollower.resolve('http://www.nullrefer.com/?www.google.com')
108
+ assert follower.redirected?
109
+ assert_equal "www.google.com.au", URI(follower.final_url).host
110
+ end
111
+ end
112
+
113
+ it 'should handle a relative meta-refresh' do
114
+ VCR.use_cassette('relative meta refresh') do
115
+ follower = Unwind::RedirectFollower.resolve('http://fb.me/2JYu23acx')
116
+ assert follower.redirected?
117
+ assert_equal 'https://www.facebook.com/londonswf/posts/696389650411604?_fb_noscript=1', follower.final_url
118
+ end
119
+ end
120
+
121
+ it 'should handle a relative canonical url' do
122
+ VCR.use_cassette('relative canonical url') do
123
+ follower = Unwind::RedirectFollower.resolve('http://youtu.be/hPJ0oLahGDg')
124
+ assert follower.redirected?
125
+ assert_equal 'http://www.youtube.com/watch?v=hPJ0oLahGDg', follower.final_url
126
+ end
127
+ end
128
+
129
+ end
data/unwind.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "unwind/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "unwind"
7
+ s.version = Unwind::VERSION
8
+ s.authors = ["Scott Watermasysk"]
9
+ s.email = ["scottwater@gmail.com"]
10
+ s.homepage = "http://www.scottw.com/unwind"
11
+ s.licenses = ['MIT']
12
+ s.summary = %q{Follows a chain redirects.}
13
+ s.description = <<-description
14
+ Follows a chain of redirects and reports back on all the steps.
15
+ Heavily inspired by John Nunemaker's blog post.
16
+ http://railstips.org/blog/archives/2009/03/04/following-redirects-with-nethttp/
17
+ description
18
+
19
+ s.rubyforge_project = "unwind"
20
+
21
+ s.files = `git ls-files`.split("\n")
22
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
23
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
24
+ s.require_paths = ["lib"]
25
+
26
+ # specify any dependencies here; for example:
27
+ s.add_development_dependency 'rake', '~> 10.1'
28
+ s.add_development_dependency 'minitest', '~> 5.2'
29
+ s.add_development_dependency 'vcr', '~> 2.8'
30
+ s.add_development_dependency 'webmock', '~> 1.17'
31
+ s.add_runtime_dependency 'addressable', '~> 2.2'
32
+ end