unwind 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 22070510ea3066c9103a8efe3d75cc4106642a9b
4
+ data.tar.gz: a55418df179313c6ad2c60dfbf8f8dd4600b4c18
5
+ SHA512:
6
+ metadata.gz: 2b40822f4063259cbfad43246342d55f13fcdf6291f85e3b67decd4186e565b1ed373fd0647b0e995cd83ddf7cec88c4c13aef0959d6600de4e9dccc8f16e2e6
7
+ data.tar.gz: b6f9a718d3890d9c0570ee4be065727c7479ce9b342161241fec08ec0274241fde6ba7d025aa779c908b66d27f53cd039b7b0451d27a4ff0f0667af1ed137f48
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in unwind.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.pattern = "test/*_test.rb"
6
+ end
7
+
8
+ task :default => :test
data/Readme.md ADDED
@@ -0,0 +1,25 @@
1
+ # Description
2
+
3
+ Enables following a series of redirects (shortened urls)
4
+
5
+ # Prerequisites
6
+
7
+ Tested on Ruby 1.9.3 & 2.0 (likely still works on 1.8.7, but you are on your own.)
8
+
9
+ # Example Code
10
+
11
+ require 'unwind'
12
+
13
+ follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1')
14
+ follower.resolve
15
+ assert_equal 'http://ow.ly/i/s1O0', follower.final_url
16
+ assert_equal 'http://j.mp/xZVND1', follower.original_url
17
+ assert_equal 2, follower.redirects.count
18
+
19
+ # Hat tip
20
+
21
+ Most of the code is based on John Nunemaker's blog post [Following Redirects with Net/HTTP](http://railstips.org/blog/archives/2009/03/04/following-redirects-with-nethttp/).
22
+
23
+ # License
24
+
25
+ Provided under the Do Whatever You Want With This Code License.
data/example/sample.rb ADDED
@@ -0,0 +1,9 @@
1
+ require './lib/unwind'
2
+
3
+ follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1')
4
+ follower.resolve
5
+ follower.redirects.each {|url| puts "redirects to #{url}"}
6
+ puts follower.original_url
7
+ puts follower.final_url
8
+
9
+
@@ -0,0 +1,3 @@
1
+ module Unwind
2
+ VERSION = "0.2.1"
3
+ end
data/lib/unwind.rb ADDED
@@ -0,0 +1,188 @@
1
+ require "unwind/version"
2
+ require 'net/http'
3
+ require 'addressable/uri'
4
+
5
+ module Unwind
6
+
7
+ class TooManyRedirects < StandardError; end
8
+ class MissingRedirectLocation < StandardError; end
9
+
10
+ class RedirectFollower
11
+
12
+ attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
13
+
14
+ def initialize(original_url, limit=5)
15
+ @original_url, @redirect_limit = original_url, limit
16
+ @redirects = []
17
+ end
18
+
19
+ def redirected?
20
+ !(self.final_url == self.original_url)
21
+ end
22
+
23
+ def resolve(current_url=nil, options={})
24
+ ok_to_continue?
25
+
26
+ current_url ||= self.original_url
27
+ #adding this header because we really only care about resolving the url
28
+ headers = (options || {}).merge({"accept-encoding" => "none"})
29
+
30
+ url = URI.parse(current_url)
31
+
32
+ request = Net::HTTP::Get.new(url)
33
+
34
+ headers.each do |header, value|
35
+ request.add_field(header, value)
36
+ end
37
+
38
+ response = Net::HTTP.start(
39
+ url.host,
40
+ url.port,
41
+ :use_ssl => url.scheme == 'https'
42
+ ) do |http|
43
+ http.request(request)
44
+ end
45
+
46
+ if is_response_redirect?(response)
47
+ handle_redirect(redirect_url(response), current_url, response, headers)
48
+ elsif meta_uri = meta_refresh?(current_url, response)
49
+ handle_redirect(meta_uri, current_url, response, headers)
50
+ else
51
+ handle_final_response(current_url, response)
52
+ end
53
+
54
+ self
55
+ end
56
+
57
+ def self.resolve(original_url, limit=5)
58
+ new(original_url, limit).resolve
59
+ end
60
+
61
+ private
62
+
63
+ def record_redirect(url)
64
+ @redirects << url.to_s
65
+ @redirect_limit -= 1
66
+ end
67
+
68
+ def is_response_redirect?(response)
69
+ Net::HTTPRedirection === response
70
+ end
71
+
72
+ def handle_redirect(uri_to_redirect, url, response, headers)
73
+ record_redirect url
74
+ resolve(uri_to_redirect.normalize, apply_cookie(response, headers))
75
+ end
76
+
77
+ def handle_final_response(current_url, response)
78
+ current_url = current_url.dup.to_s
79
+ if Net::HTTPSuccess === response && canonical = canonical_link?(response)
80
+ @redirects << current_url
81
+ if Addressable::URI.parse(canonical).relative?
82
+ @final_url = make_url_absolute(current_url, Addressable::URI.parse(canonical)).to_s
83
+ else
84
+ @final_url = canonical
85
+ end
86
+
87
+ else
88
+ @final_url = current_url
89
+ end
90
+ @response = response
91
+ end
92
+
93
+ def ok_to_continue?
94
+ raise TooManyRedirects if redirect_limit < 0
95
+ end
96
+
97
+ def redirect_url(response)
98
+ if response['location'].nil?
99
+ body_match = (response.body || "").match(/<a href=\"([^>]+)\">/i)
100
+ raise MissingRedirectLocation unless body_match
101
+ Addressable::URI.parse(body_match[0])
102
+ else
103
+ redirect_uri = Addressable::URI.parse(response['location'])
104
+ redirect_uri.relative? ? Addressable::URI.parse(response.uri).join(response['location']) : redirect_uri
105
+ end
106
+ end
107
+
108
+ def meta_refresh?(current_url, response)
109
+ if Net::HTTPSuccess === response
110
+ body_match = response.body.match(/<meta http-equiv=\"refresh\" content=\"0; URL=(.*?)\"\s*\/*>/i)
111
+ if body_match
112
+ uri = Addressable::URI.parse(body_match[1])
113
+ make_url_absolute(current_url, uri)
114
+ end
115
+ end
116
+ end
117
+
118
+ def canonical_link?(response)
119
+ body_match = response.body.match(/<link rel=[\'\"]canonical[\'\"] href=[\'\"](.*?)[\'\"]/i)
120
+ body_match ? Addressable::URI.parse(body_match[1]).to_s : false
121
+ end
122
+
123
+ def apply_cookie(response, headers)
124
+ if response.code.to_i == 302 && response['set-cookie']
125
+ headers.merge("cookie" => CookieHash.to_cookie_string(response['set-cookie']))
126
+ else
127
+ #todo: should we delete the cookie at this point if it exists?
128
+ headers
129
+ end
130
+ end
131
+
132
+ def make_url_absolute(current_url, relative_url)
133
+ current_uri = Addressable::URI.parse(current_url)
134
+ if (relative_url.relative?)
135
+ url = Addressable::URI.new(
136
+ :scheme => current_uri.scheme,
137
+ :user => current_uri.user,
138
+ :password => current_uri.password,
139
+ :host => current_uri.host,
140
+ :port => current_uri.port,
141
+ :path => relative_url.path,
142
+ :query => relative_url.query,
143
+ :fragment => relative_url.fragment)
144
+ else
145
+ relative_url
146
+ end
147
+ end
148
+
149
+ end
150
+
151
+ #borrowed (stolen) from HTTParty with minor updates
152
+ #to handle all cookies existing in a single string
153
+ class CookieHash < Hash
154
+
155
+ CLIENT_COOKIES = %w{path expires domain path secure httponly}
156
+
157
+ def add_cookies(value)
158
+ case value
159
+ when Hash
160
+ merge!(value)
161
+ when String
162
+ value = value.gsub(/expires=[\w,\s\-\:]+;/i, '')
163
+ value = value.gsub(/httponly[\,\;]*/i, '')
164
+ value.split(/[;,]\s/).each do |cookie|
165
+ array = cookie.split('=')
166
+ self[array[0].strip.to_sym] = array[1]
167
+ end
168
+ else
169
+ raise "add_cookies only takes a Hash or a String"
170
+ end
171
+ end
172
+
173
+ def to_cookie_string
174
+ delete_if { |k, v| CLIENT_COOKIES.include?(k.to_s.downcase) }.collect { |k, v| "#{k}=#{v}" }.join("; ")
175
+ end
176
+
177
+ def self.to_cookie_string(*cookie_strings)
178
+ h = CookieHash.new
179
+ cookie_strings.each do |cs|
180
+ h.add_cookies(cs)
181
+ end
182
+
183
+ h.to_cookie_string
184
+ end
185
+ end
186
+
187
+
188
+ end
@@ -0,0 +1,129 @@
1
+ require 'minitest'
2
+ require 'minitest/autorun'
3
+ require 'vcr'
4
+ require './lib/unwind'
5
+
6
+ VCR.configure do |c|
7
+ c.hook_into :webmock
8
+ c.cassette_library_dir = 'vcr_cassettes'
9
+ end
10
+
11
+ describe Unwind::RedirectFollower do
12
+
13
+ # needs to be regenerated to properly test...need to stop that :(
14
+ it 'should handle url with cookie requirement' do
15
+ VCR.use_cassette('with cookie') do
16
+ follower = Unwind::RedirectFollower.resolve('http://ow.ly/1hf37P')
17
+ assert_equal 200, follower.response.code.to_i
18
+ assert follower.redirected?
19
+ end
20
+ end
21
+
22
+ it 'should resolve the url' do
23
+ VCR.use_cassette('xZVND1') do
24
+ follower = Unwind::RedirectFollower.resolve('http://j.mp/xZVND1')
25
+ assert_equal 'http://ow.ly/i/s1O0', follower.final_url
26
+ assert_equal 'http://j.mp/xZVND1', follower.original_url
27
+ assert_equal 2, follower.redirects.count
28
+ assert follower.redirected?
29
+ end
30
+ end
31
+
32
+ it 'should handle relative redirects' do
33
+ VCR.use_cassette('relative stackoverflow') do
34
+ follower = Unwind::RedirectFollower.resolve('http://stackoverflow.com/q/9277007/871617?stw=1')
35
+ assert follower.redirected?
36
+ assert_equal 'http://stackoverflow.com/questions/9277007/gitlabhq-w-denied-for-rails', follower.final_url
37
+ end
38
+ end
39
+
40
+ it 'should still handine relative redirects' do
41
+ # http://bit.ly/A4H3a2
42
+ VCR.use_cassette('relative stackoverflow 2') do
43
+ follower = Unwind::RedirectFollower.resolve('http://bit.ly/A4H3a2')
44
+ assert follower.redirected?
45
+ end
46
+ end
47
+
48
+ it 'should handle redirects to pdfs' do
49
+ VCR.use_cassette('pdf') do
50
+ follower = Unwind::RedirectFollower.resolve('http://binged.it/wVSFs5')
51
+ assert follower.redirected?
52
+ assert_equal 'https://microsoft.promo.eprize.com/bingtwitter/public/fulfillment/rules.pdf', follower.final_url
53
+ end
54
+ end
55
+
56
+ it 'should handle the lame amazon spaces' do
57
+ VCR.use_cassette('amazon') do
58
+ follower = Unwind::RedirectFollower.resolve('http://amzn.to/xrHQWS')
59
+ assert follower.redirected?
60
+ end
61
+ end
62
+
63
+ #http://amzn.to/xrHQWS
64
+
65
+ it 'should handle a https redirect' do
66
+ VCR.use_cassette('ssl tpope') do
67
+ follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
68
+ assert follower.redirected?
69
+ assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
70
+ end
71
+ end
72
+
73
+ it 'should not be redirected' do
74
+ VCR.use_cassette('no redirect') do
75
+ follower = Unwind::RedirectFollower.resolve('https://flippa.com')
76
+ assert !follower.redirected?
77
+ end
78
+ end
79
+
80
+ it 'should set the final url as being the canonical url and treat it as s redirect' do
81
+ VCR.use_cassette('canonical url', :preserve_exact_body_bytes => true) do
82
+ follower = Unwind::RedirectFollower.resolve('http://www.scottw.com?test=abc')
83
+ assert follower.redirected?
84
+ assert 'http://www.scottw.com', follower.final_url
85
+ assert 'http://www.scottw?test=abc', follower.redirects[0]
86
+ end
87
+ end
88
+
89
+ it 'should raise TooManyRedirects' do
90
+ VCR.use_cassette('xZVND1') do
91
+ follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
92
+ too_many_redirects = lambda {follower.resolve}
93
+ too_many_redirects.must_raise Unwind::TooManyRedirects
94
+ end
95
+ end
96
+
97
+ it 'should raise MissingRedirectLocation' do
98
+ VCR.use_cassette('missing redirect') do
99
+ follower = Unwind::RedirectFollower.new('http://tinyurl.com/6oqzkff')
100
+ missing_redirect_location = lambda{follower.resolve}
101
+ missing_redirect_location.must_raise Unwind::MissingRedirectLocation
102
+ end
103
+ end
104
+
105
+ it 'should handle a meta-refresh' do
106
+ VCR.use_cassette('meta refresh') do
107
+ follower = Unwind::RedirectFollower.resolve('http://www.nullrefer.com/?www.google.com')
108
+ assert follower.redirected?
109
+ assert_equal "www.google.com.au", URI(follower.final_url).host
110
+ end
111
+ end
112
+
113
+ it 'should handle a relative meta-refresh' do
114
+ VCR.use_cassette('relative meta refresh') do
115
+ follower = Unwind::RedirectFollower.resolve('http://fb.me/2JYu23acx')
116
+ assert follower.redirected?
117
+ assert_equal 'https://www.facebook.com/londonswf/posts/696389650411604?_fb_noscript=1', follower.final_url
118
+ end
119
+ end
120
+
121
+ it 'should handle a relative canonical url' do
122
+ VCR.use_cassette('relative canonical url') do
123
+ follower = Unwind::RedirectFollower.resolve('http://youtu.be/hPJ0oLahGDg')
124
+ assert follower.redirected?
125
+ assert_equal 'http://www.youtube.com/watch?v=hPJ0oLahGDg', follower.final_url
126
+ end
127
+ end
128
+
129
+ end
data/unwind.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "unwind/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "unwind"
7
+ s.version = Unwind::VERSION
8
+ s.authors = ["Scott Watermasysk"]
9
+ s.email = ["scottwater@gmail.com"]
10
+ s.homepage = "http://www.scottw.com/unwind"
11
+ s.licenses = ['MIT']
12
+ s.summary = %q{Follows a chain redirects.}
13
+ s.description = <<-description
14
+ Follows a chain of redirects and reports back on all the steps.
15
+ Heavily inspired by John Nunemaker's blog post.
16
+ http://railstips.org/blog/archives/2009/03/04/following-redirects-with-nethttp/
17
+ description
18
+
19
+ s.rubyforge_project = "unwind"
20
+
21
+ s.files = `git ls-files`.split("\n")
22
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
23
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
24
+ s.require_paths = ["lib"]
25
+
26
+ # specify any dependencies here; for example:
27
+ s.add_development_dependency 'rake', '~> 10.1'
28
+ s.add_development_dependency 'minitest', '~> 5.2'
29
+ s.add_development_dependency 'vcr', '~> 2.8'
30
+ s.add_development_dependency 'webmock', '~> 1.17'
31
+ s.add_runtime_dependency 'addressable', '~> 2.2'
32
+ end