unwind 0.2.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -2,4 +2,3 @@
2
2
  .bundle
3
3
  Gemfile.lock
4
4
  pkg/*
5
- .rvmrc
data/Readme.md CHANGED
@@ -4,7 +4,7 @@ Enables following a series of redirects (shortened urls)
4
4
 
5
5
  # Prerequisites
6
6
 
7
- Tested on Ruby 1.9.3 & 2.0 (likely still works on 1.8.7, but you are on your own.)
7
+ Only tested on Ruby 1.9.3
8
8
 
9
9
  # Example Code
10
10
 
@@ -22,4 +22,4 @@ Most of the code is based on John Nunemaker's blog post [Following Redirects wit
22
22
 
23
23
  # License
24
24
 
25
- Provided under the Do Whatever You Want With This Code License.
25
+ Provided under the Do Whatever You Want With This Code License.
@@ -1,3 +1,3 @@
1
1
  module Unwind
2
- VERSION = "0.2.1"
2
+ VERSION = "0.9.0"
3
3
  end
data/lib/unwind.rb CHANGED
@@ -1,188 +1,53 @@
1
1
  require "unwind/version"
2
2
  require 'net/http'
3
- require 'addressable/uri'
4
3
 
5
4
  module Unwind
6
5
 
7
- class TooManyRedirects < StandardError; end
8
- class MissingRedirectLocation < StandardError; end
6
+ class TooManyRedirects < StandardError; end
9
7
 
10
- class RedirectFollower
8
+ class RedirectFollower
11
9
 
12
- attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
10
+ attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
13
11
 
14
- def initialize(original_url, limit=5)
15
- @original_url, @redirect_limit = original_url, limit
16
- @redirects = []
17
- end
12
+ def initialize(original_url, limit=5)
13
+ @original_url, @redirect_limit = original_url, limit
14
+ @redirects = []
15
+ end
18
16
 
19
- def redirected?
20
- !(self.final_url == self.original_url)
21
- end
17
+ def resolve(current_url=nil)
22
18
 
23
- def resolve(current_url=nil, options={})
24
- ok_to_continue?
19
+ ok_to_continue?
25
20
 
26
- current_url ||= self.original_url
27
- #adding this header because we really only care about resolving the url
28
- headers = (options || {}).merge({"accept-encoding" => "none"})
21
+ current_url ||= self.original_url
29
22
 
30
- url = URI.parse(current_url)
23
+ response = Net::HTTP.get_response(URI.parse(current_url))
31
24
 
32
- request = Net::HTTP::Get.new(url)
25
+ if response.kind_of?(Net::HTTPRedirection)
26
+ @redirects << current_url
27
+ @redirect_limit -= 1
28
+ resolve(redirect_url(response))
29
+ else
30
+ @final_url = current_url
31
+ @response = response
32
+ self
33
+ end
34
+ end
33
35
 
34
- headers.each do |header, value|
35
- request.add_field(header, value)
36
- end
36
+ private
37
37
 
38
- response = Net::HTTP.start(
39
- url.host,
40
- url.port,
41
- :use_ssl => url.scheme == 'https'
42
- ) do |http|
43
- http.request(request)
44
- end
38
+ def ok_to_continue?
39
+ raise TooManyRedirects if redirect_limit < 0
40
+ end
45
41
 
46
- if is_response_redirect?(response)
47
- handle_redirect(redirect_url(response), current_url, response, headers)
48
- elsif meta_uri = meta_refresh?(current_url, response)
49
- handle_redirect(meta_uri, current_url, response, headers)
50
- else
51
- handle_final_response(current_url, response)
52
- end
53
-
54
- self
55
- end
56
-
57
- def self.resolve(original_url, limit=5)
58
- new(original_url, limit).resolve
59
- end
60
-
61
- private
62
-
63
- def record_redirect(url)
64
- @redirects << url.to_s
65
- @redirect_limit -= 1
66
- end
67
-
68
- def is_response_redirect?(response)
69
- Net::HTTPRedirection === response
70
- end
71
-
72
- def handle_redirect(uri_to_redirect, url, response, headers)
73
- record_redirect url
74
- resolve(uri_to_redirect.normalize, apply_cookie(response, headers))
75
- end
76
-
77
- def handle_final_response(current_url, response)
78
- current_url = current_url.dup.to_s
79
- if Net::HTTPSuccess === response && canonical = canonical_link?(response)
80
- @redirects << current_url
81
- if Addressable::URI.parse(canonical).relative?
82
- @final_url = make_url_absolute(current_url, Addressable::URI.parse(canonical)).to_s
83
- else
84
- @final_url = canonical
85
- end
86
-
87
- else
88
- @final_url = current_url
89
- end
90
- @response = response
91
- end
92
-
93
- def ok_to_continue?
94
- raise TooManyRedirects if redirect_limit < 0
95
- end
96
-
97
- def redirect_url(response)
98
- if response['location'].nil?
99
- body_match = (response.body || "").match(/<a href=\"([^>]+)\">/i)
100
- raise MissingRedirectLocation unless body_match
101
- Addressable::URI.parse(body_match[0])
102
- else
103
- redirect_uri = Addressable::URI.parse(response['location'])
104
- redirect_uri.relative? ? Addressable::URI.parse(response.uri).join(response['location']) : redirect_uri
105
- end
106
- end
107
-
108
- def meta_refresh?(current_url, response)
109
- if Net::HTTPSuccess === response
110
- body_match = response.body.match(/<meta http-equiv=\"refresh\" content=\"0; URL=(.*?)\"\s*\/*>/i)
111
- if body_match
112
- uri = Addressable::URI.parse(body_match[1])
113
- make_url_absolute(current_url, uri)
114
- end
115
- end
116
- end
117
-
118
- def canonical_link?(response)
119
- body_match = response.body.match(/<link rel=[\'\"]canonical[\'\"] href=[\'\"](.*?)[\'\"]/i)
120
- body_match ? Addressable::URI.parse(body_match[1]).to_s : false
121
- end
122
-
123
- def apply_cookie(response, headers)
124
- if response.code.to_i == 302 && response['set-cookie']
125
- headers.merge("cookie" => CookieHash.to_cookie_string(response['set-cookie']))
126
- else
127
- #todo: should we delete the cookie at this point if it exists?
128
- headers
129
- end
130
- end
131
-
132
- def make_url_absolute(current_url, relative_url)
133
- current_uri = Addressable::URI.parse(current_url)
134
- if (relative_url.relative?)
135
- url = Addressable::URI.new(
136
- :scheme => current_uri.scheme,
137
- :user => current_uri.user,
138
- :password => current_uri.password,
139
- :host => current_uri.host,
140
- :port => current_uri.port,
141
- :path => relative_url.path,
142
- :query => relative_url.query,
143
- :fragment => relative_url.fragment)
144
- else
145
- relative_url
146
- end
147
- end
148
-
149
- end
150
-
151
- #borrowed (stolen) from HTTParty with minor updates
152
- #to handle all cookies existing in a single string
153
- class CookieHash < Hash
154
-
155
- CLIENT_COOKIES = %w{path expires domain path secure httponly}
156
-
157
- def add_cookies(value)
158
- case value
159
- when Hash
160
- merge!(value)
161
- when String
162
- value = value.gsub(/expires=[\w,\s\-\:]+;/i, '')
163
- value = value.gsub(/httponly[\,\;]*/i, '')
164
- value.split(/[;,]\s/).each do |cookie|
165
- array = cookie.split('=')
166
- self[array[0].strip.to_sym] = array[1]
167
- end
168
- else
169
- raise "add_cookies only takes a Hash or a String"
170
- end
171
- end
172
-
173
- def to_cookie_string
174
- delete_if { |k, v| CLIENT_COOKIES.include?(k.to_s.downcase) }.collect { |k, v| "#{k}=#{v}" }.join("; ")
175
- end
176
-
177
- def self.to_cookie_string(*cookie_strings)
178
- h = CookieHash.new
179
- cookie_strings.each do |cs|
180
- h.add_cookies(cs)
181
- end
182
-
183
- h.to_cookie_string
184
- end
185
- end
42
+ def redirect_url(response)
43
+ if response['location'].nil?
44
+ response.body.match(/<a href=\"([^>]+)\">/i)[1]
45
+ else
46
+ response['location']
47
+ end
48
+ end
49
+
186
50
 
51
+ end
187
52
 
188
53
  end
@@ -1,129 +1,30 @@
1
- require 'minitest'
2
1
  require 'minitest/autorun'
3
2
  require 'vcr'
4
3
  require './lib/unwind'
5
4
 
6
- VCR.configure do |c|
7
- c.hook_into :webmock
8
- c.cassette_library_dir = 'vcr_cassettes'
5
+ VCR.config do |c|
6
+ c.stub_with :fakeweb
7
+ c.cassette_library_dir = 'vcr_cassettes'
9
8
  end
10
9
 
11
- describe Unwind::RedirectFollower do
12
-
13
- # needs to be regenerated to properly test...need to stop that :(
14
- it 'should handle url with cookie requirement' do
15
- VCR.use_cassette('with cookie') do
16
- follower = Unwind::RedirectFollower.resolve('http://ow.ly/1hf37P')
17
- assert_equal 200, follower.response.code.to_i
18
- assert follower.redirected?
19
- end
20
- end
21
-
22
- it 'should resolve the url' do
23
- VCR.use_cassette('xZVND1') do
24
- follower = Unwind::RedirectFollower.resolve('http://j.mp/xZVND1')
25
- assert_equal 'http://ow.ly/i/s1O0', follower.final_url
26
- assert_equal 'http://j.mp/xZVND1', follower.original_url
27
- assert_equal 2, follower.redirects.count
28
- assert follower.redirected?
29
- end
30
- end
31
-
32
- it 'should handle relative redirects' do
33
- VCR.use_cassette('relative stackoverflow') do
34
- follower = Unwind::RedirectFollower.resolve('http://stackoverflow.com/q/9277007/871617?stw=1')
35
- assert follower.redirected?
36
- assert_equal 'http://stackoverflow.com/questions/9277007/gitlabhq-w-denied-for-rails', follower.final_url
37
- end
38
- end
39
-
40
- it 'should still handine relative redirects' do
41
- # http://bit.ly/A4H3a2
42
- VCR.use_cassette('relative stackoverflow 2') do
43
- follower = Unwind::RedirectFollower.resolve('http://bit.ly/A4H3a2')
44
- assert follower.redirected?
45
- end
46
- end
47
-
48
- it 'should handle redirects to pdfs' do
49
- VCR.use_cassette('pdf') do
50
- follower = Unwind::RedirectFollower.resolve('http://binged.it/wVSFs5')
51
- assert follower.redirected?
52
- assert_equal 'https://microsoft.promo.eprize.com/bingtwitter/public/fulfillment/rules.pdf', follower.final_url
53
- end
54
- end
55
-
56
- it 'should handle the lame amazon spaces' do
57
- VCR.use_cassette('amazon') do
58
- follower = Unwind::RedirectFollower.resolve('http://amzn.to/xrHQWS')
59
- assert follower.redirected?
60
- end
61
- end
62
-
63
- #http://amzn.to/xrHQWS
64
-
65
- it 'should handle a https redirect' do
66
- VCR.use_cassette('ssl tpope') do
67
- follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
68
- assert follower.redirected?
69
- assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
70
- end
71
- end
72
-
73
- it 'should not be redirected' do
74
- VCR.use_cassette('no redirect') do
75
- follower = Unwind::RedirectFollower.resolve('https://flippa.com')
76
- assert !follower.redirected?
77
- end
78
- end
79
-
80
- it 'should set the final url as being the canonical url and treat it as s redirect' do
81
- VCR.use_cassette('canonical url', :preserve_exact_body_bytes => true) do
82
- follower = Unwind::RedirectFollower.resolve('http://www.scottw.com?test=abc')
83
- assert follower.redirected?
84
- assert 'http://www.scottw.com', follower.final_url
85
- assert 'http://www.scottw?test=abc', follower.redirects[0]
86
- end
87
- end
88
-
89
- it 'should raise TooManyRedirects' do
90
- VCR.use_cassette('xZVND1') do
91
- follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
92
- too_many_redirects = lambda {follower.resolve}
93
- too_many_redirects.must_raise Unwind::TooManyRedirects
94
- end
95
- end
96
-
97
- it 'should raise MissingRedirectLocation' do
98
- VCR.use_cassette('missing redirect') do
99
- follower = Unwind::RedirectFollower.new('http://tinyurl.com/6oqzkff')
100
- missing_redirect_location = lambda{follower.resolve}
101
- missing_redirect_location.must_raise Unwind::MissingRedirectLocation
102
- end
103
- end
104
-
105
- it 'should handle a meta-refresh' do
106
- VCR.use_cassette('meta refresh') do
107
- follower = Unwind::RedirectFollower.resolve('http://www.nullrefer.com/?www.google.com')
108
- assert follower.redirected?
109
- assert_equal "www.google.com.au", URI(follower.final_url).host
110
- end
111
- end
112
-
113
- it 'should handle a relative meta-refresh' do
114
- VCR.use_cassette('relative meta refresh') do
115
- follower = Unwind::RedirectFollower.resolve('http://fb.me/2JYu23acx')
116
- assert follower.redirected?
117
- assert_equal 'https://www.facebook.com/londonswf/posts/696389650411604?_fb_noscript=1', follower.final_url
118
- end
119
- end
120
-
121
- it 'should handle a relative canonical url' do
122
- VCR.use_cassette('relative canonical url') do
123
- follower = Unwind::RedirectFollower.resolve('http://youtu.be/hPJ0oLahGDg')
124
- assert follower.redirected?
125
- assert_equal 'http://www.youtube.com/watch?v=hPJ0oLahGDg', follower.final_url
126
- end
127
- end
10
+ describe 'Tests :)' do
11
+
12
+ it 'should resolve the url' do
13
+ VCR.use_cassette('xZVND1') do
14
+ follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1')
15
+ follower.resolve
16
+ assert_equal 'http://ow.ly/i/s1O0', follower.final_url
17
+ assert_equal 'http://j.mp/xZVND1', follower.original_url
18
+ assert_equal 2, follower.redirects.count
19
+ end
20
+ end
21
+
22
+ it 'should raise TooManyRedirects' do
23
+ VCR.use_cassette('xZVND1') do
24
+ follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
25
+ too_many_redirects = lambda {follower.resolve}
26
+ too_many_redirects.must_raise Unwind::TooManyRedirects
27
+ end
28
+ end
128
29
 
129
30
  end
data/unwind.gemspec CHANGED
@@ -7,8 +7,7 @@ Gem::Specification.new do |s|
7
7
  s.version = Unwind::VERSION
8
8
  s.authors = ["Scott Watermasysk"]
9
9
  s.email = ["scottwater@gmail.com"]
10
- s.homepage = "http://www.scottw.com/unwind"
11
- s.licenses = ['MIT']
10
+ s.homepage = ""
12
11
  s.summary = %q{Follows a chain redirects.}
13
12
  s.description = <<-description
14
13
  Follows a chain of redirects and reports back on all the steps.
@@ -24,9 +23,8 @@ Gem::Specification.new do |s|
24
23
  s.require_paths = ["lib"]
25
24
 
26
25
  # specify any dependencies here; for example:
27
- s.add_development_dependency 'rake', '~> 10.1'
28
- s.add_development_dependency 'minitest', '~> 5.2'
29
- s.add_development_dependency 'vcr', '~> 2.8'
30
- s.add_development_dependency 'webmock', '~> 1.17'
31
- s.add_runtime_dependency 'addressable', '~> 2.2'
26
+ s.add_development_dependency "rake"
27
+ s.add_development_dependency "vcr"
28
+ s.add_development_dependency "fakeweb"
29
+ # s.add_runtime_dependency "rest-client"
32
30
  end