unwind 0.2.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -2,4 +2,3 @@
2
2
  .bundle
3
3
  Gemfile.lock
4
4
  pkg/*
5
- .rvmrc
data/Readme.md CHANGED
@@ -4,7 +4,7 @@ Enables following a series of redirects (shortened urls)
4
4
 
5
5
  # Prerequisites
6
6
 
7
- Tested on Ruby 1.9.3 & 2.0 (likely still works on 1.8.7, but you are on your own.)
7
+ Only tested on Ruby 1.9.3
8
8
 
9
9
  # Example Code
10
10
 
@@ -22,4 +22,4 @@ Most of the code is based on John Nunemaker's blog post [Following Redirects wit
22
22
 
23
23
  # License
24
24
 
25
- Provided under the Do Whatever You Want With This Code License.
25
+ Provided under the Do Whatever You Want With This Code License.
@@ -1,3 +1,3 @@
1
1
  module Unwind
2
- VERSION = "0.2.1"
2
+ VERSION = "0.9.0"
3
3
  end
data/lib/unwind.rb CHANGED
@@ -1,188 +1,53 @@
1
1
  require "unwind/version"
2
2
  require 'net/http'
3
- require 'addressable/uri'
4
3
 
5
4
  module Unwind
6
5
 
7
- class TooManyRedirects < StandardError; end
8
- class MissingRedirectLocation < StandardError; end
6
+ class TooManyRedirects < StandardError; end
9
7
 
10
- class RedirectFollower
8
+ class RedirectFollower
11
9
 
12
- attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
10
+ attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
13
11
 
14
- def initialize(original_url, limit=5)
15
- @original_url, @redirect_limit = original_url, limit
16
- @redirects = []
17
- end
12
+ def initialize(original_url, limit=5)
13
+ @original_url, @redirect_limit = original_url, limit
14
+ @redirects = []
15
+ end
18
16
 
19
- def redirected?
20
- !(self.final_url == self.original_url)
21
- end
17
+ def resolve(current_url=nil)
22
18
 
23
- def resolve(current_url=nil, options={})
24
- ok_to_continue?
19
+ ok_to_continue?
25
20
 
26
- current_url ||= self.original_url
27
- #adding this header because we really only care about resolving the url
28
- headers = (options || {}).merge({"accept-encoding" => "none"})
21
+ current_url ||= self.original_url
29
22
 
30
- url = URI.parse(current_url)
23
+ response = Net::HTTP.get_response(URI.parse(current_url))
31
24
 
32
- request = Net::HTTP::Get.new(url)
25
+ if response.kind_of?(Net::HTTPRedirection)
26
+ @redirects << current_url
27
+ @redirect_limit -= 1
28
+ resolve(redirect_url(response))
29
+ else
30
+ @final_url = current_url
31
+ @response = response
32
+ self
33
+ end
34
+ end
33
35
 
34
- headers.each do |header, value|
35
- request.add_field(header, value)
36
- end
36
+ private
37
37
 
38
- response = Net::HTTP.start(
39
- url.host,
40
- url.port,
41
- :use_ssl => url.scheme == 'https'
42
- ) do |http|
43
- http.request(request)
44
- end
38
+ def ok_to_continue?
39
+ raise TooManyRedirects if redirect_limit < 0
40
+ end
45
41
 
46
- if is_response_redirect?(response)
47
- handle_redirect(redirect_url(response), current_url, response, headers)
48
- elsif meta_uri = meta_refresh?(current_url, response)
49
- handle_redirect(meta_uri, current_url, response, headers)
50
- else
51
- handle_final_response(current_url, response)
52
- end
53
-
54
- self
55
- end
56
-
57
- def self.resolve(original_url, limit=5)
58
- new(original_url, limit).resolve
59
- end
60
-
61
- private
62
-
63
- def record_redirect(url)
64
- @redirects << url.to_s
65
- @redirect_limit -= 1
66
- end
67
-
68
- def is_response_redirect?(response)
69
- Net::HTTPRedirection === response
70
- end
71
-
72
- def handle_redirect(uri_to_redirect, url, response, headers)
73
- record_redirect url
74
- resolve(uri_to_redirect.normalize, apply_cookie(response, headers))
75
- end
76
-
77
- def handle_final_response(current_url, response)
78
- current_url = current_url.dup.to_s
79
- if Net::HTTPSuccess === response && canonical = canonical_link?(response)
80
- @redirects << current_url
81
- if Addressable::URI.parse(canonical).relative?
82
- @final_url = make_url_absolute(current_url, Addressable::URI.parse(canonical)).to_s
83
- else
84
- @final_url = canonical
85
- end
86
-
87
- else
88
- @final_url = current_url
89
- end
90
- @response = response
91
- end
92
-
93
- def ok_to_continue?
94
- raise TooManyRedirects if redirect_limit < 0
95
- end
96
-
97
- def redirect_url(response)
98
- if response['location'].nil?
99
- body_match = (response.body || "").match(/<a href=\"([^>]+)\">/i)
100
- raise MissingRedirectLocation unless body_match
101
- Addressable::URI.parse(body_match[0])
102
- else
103
- redirect_uri = Addressable::URI.parse(response['location'])
104
- redirect_uri.relative? ? Addressable::URI.parse(response.uri).join(response['location']) : redirect_uri
105
- end
106
- end
107
-
108
- def meta_refresh?(current_url, response)
109
- if Net::HTTPSuccess === response
110
- body_match = response.body.match(/<meta http-equiv=\"refresh\" content=\"0; URL=(.*?)\"\s*\/*>/i)
111
- if body_match
112
- uri = Addressable::URI.parse(body_match[1])
113
- make_url_absolute(current_url, uri)
114
- end
115
- end
116
- end
117
-
118
- def canonical_link?(response)
119
- body_match = response.body.match(/<link rel=[\'\"]canonical[\'\"] href=[\'\"](.*?)[\'\"]/i)
120
- body_match ? Addressable::URI.parse(body_match[1]).to_s : false
121
- end
122
-
123
- def apply_cookie(response, headers)
124
- if response.code.to_i == 302 && response['set-cookie']
125
- headers.merge("cookie" => CookieHash.to_cookie_string(response['set-cookie']))
126
- else
127
- #todo: should we delete the cookie at this point if it exists?
128
- headers
129
- end
130
- end
131
-
132
- def make_url_absolute(current_url, relative_url)
133
- current_uri = Addressable::URI.parse(current_url)
134
- if (relative_url.relative?)
135
- url = Addressable::URI.new(
136
- :scheme => current_uri.scheme,
137
- :user => current_uri.user,
138
- :password => current_uri.password,
139
- :host => current_uri.host,
140
- :port => current_uri.port,
141
- :path => relative_url.path,
142
- :query => relative_url.query,
143
- :fragment => relative_url.fragment)
144
- else
145
- relative_url
146
- end
147
- end
148
-
149
- end
150
-
151
- #borrowed (stolen) from HTTParty with minor updates
152
- #to handle all cookies existing in a single string
153
- class CookieHash < Hash
154
-
155
- CLIENT_COOKIES = %w{path expires domain path secure httponly}
156
-
157
- def add_cookies(value)
158
- case value
159
- when Hash
160
- merge!(value)
161
- when String
162
- value = value.gsub(/expires=[\w,\s\-\:]+;/i, '')
163
- value = value.gsub(/httponly[\,\;]*/i, '')
164
- value.split(/[;,]\s/).each do |cookie|
165
- array = cookie.split('=')
166
- self[array[0].strip.to_sym] = array[1]
167
- end
168
- else
169
- raise "add_cookies only takes a Hash or a String"
170
- end
171
- end
172
-
173
- def to_cookie_string
174
- delete_if { |k, v| CLIENT_COOKIES.include?(k.to_s.downcase) }.collect { |k, v| "#{k}=#{v}" }.join("; ")
175
- end
176
-
177
- def self.to_cookie_string(*cookie_strings)
178
- h = CookieHash.new
179
- cookie_strings.each do |cs|
180
- h.add_cookies(cs)
181
- end
182
-
183
- h.to_cookie_string
184
- end
185
- end
42
+ def redirect_url(response)
43
+ if response['location'].nil?
44
+ response.body.match(/<a href=\"([^>]+)\">/i)[1]
45
+ else
46
+ response['location']
47
+ end
48
+ end
49
+
186
50
 
51
+ end
187
52
 
188
53
  end
@@ -1,129 +1,30 @@
1
- require 'minitest'
2
1
  require 'minitest/autorun'
3
2
  require 'vcr'
4
3
  require './lib/unwind'
5
4
 
6
- VCR.configure do |c|
7
- c.hook_into :webmock
8
- c.cassette_library_dir = 'vcr_cassettes'
5
+ VCR.config do |c|
6
+ c.stub_with :fakeweb
7
+ c.cassette_library_dir = 'vcr_cassettes'
9
8
  end
10
9
 
11
- describe Unwind::RedirectFollower do
12
-
13
- # needs to be regenerated to properly test...need to stop that :(
14
- it 'should handle url with cookie requirement' do
15
- VCR.use_cassette('with cookie') do
16
- follower = Unwind::RedirectFollower.resolve('http://ow.ly/1hf37P')
17
- assert_equal 200, follower.response.code.to_i
18
- assert follower.redirected?
19
- end
20
- end
21
-
22
- it 'should resolve the url' do
23
- VCR.use_cassette('xZVND1') do
24
- follower = Unwind::RedirectFollower.resolve('http://j.mp/xZVND1')
25
- assert_equal 'http://ow.ly/i/s1O0', follower.final_url
26
- assert_equal 'http://j.mp/xZVND1', follower.original_url
27
- assert_equal 2, follower.redirects.count
28
- assert follower.redirected?
29
- end
30
- end
31
-
32
- it 'should handle relative redirects' do
33
- VCR.use_cassette('relative stackoverflow') do
34
- follower = Unwind::RedirectFollower.resolve('http://stackoverflow.com/q/9277007/871617?stw=1')
35
- assert follower.redirected?
36
- assert_equal 'http://stackoverflow.com/questions/9277007/gitlabhq-w-denied-for-rails', follower.final_url
37
- end
38
- end
39
-
40
- it 'should still handine relative redirects' do
41
- # http://bit.ly/A4H3a2
42
- VCR.use_cassette('relative stackoverflow 2') do
43
- follower = Unwind::RedirectFollower.resolve('http://bit.ly/A4H3a2')
44
- assert follower.redirected?
45
- end
46
- end
47
-
48
- it 'should handle redirects to pdfs' do
49
- VCR.use_cassette('pdf') do
50
- follower = Unwind::RedirectFollower.resolve('http://binged.it/wVSFs5')
51
- assert follower.redirected?
52
- assert_equal 'https://microsoft.promo.eprize.com/bingtwitter/public/fulfillment/rules.pdf', follower.final_url
53
- end
54
- end
55
-
56
- it 'should handle the lame amazon spaces' do
57
- VCR.use_cassette('amazon') do
58
- follower = Unwind::RedirectFollower.resolve('http://amzn.to/xrHQWS')
59
- assert follower.redirected?
60
- end
61
- end
62
-
63
- #http://amzn.to/xrHQWS
64
-
65
- it 'should handle a https redirect' do
66
- VCR.use_cassette('ssl tpope') do
67
- follower = Unwind::RedirectFollower.resolve('http://github.com/tpope/vim-rails')
68
- assert follower.redirected?
69
- assert_equal 'https://github.com/tpope/vim-rails', follower.final_url
70
- end
71
- end
72
-
73
- it 'should not be redirected' do
74
- VCR.use_cassette('no redirect') do
75
- follower = Unwind::RedirectFollower.resolve('https://flippa.com')
76
- assert !follower.redirected?
77
- end
78
- end
79
-
80
- it 'should set the final url as being the canonical url and treat it as s redirect' do
81
- VCR.use_cassette('canonical url', :preserve_exact_body_bytes => true) do
82
- follower = Unwind::RedirectFollower.resolve('http://www.scottw.com?test=abc')
83
- assert follower.redirected?
84
- assert 'http://www.scottw.com', follower.final_url
85
- assert 'http://www.scottw?test=abc', follower.redirects[0]
86
- end
87
- end
88
-
89
- it 'should raise TooManyRedirects' do
90
- VCR.use_cassette('xZVND1') do
91
- follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
92
- too_many_redirects = lambda {follower.resolve}
93
- too_many_redirects.must_raise Unwind::TooManyRedirects
94
- end
95
- end
96
-
97
- it 'should raise MissingRedirectLocation' do
98
- VCR.use_cassette('missing redirect') do
99
- follower = Unwind::RedirectFollower.new('http://tinyurl.com/6oqzkff')
100
- missing_redirect_location = lambda{follower.resolve}
101
- missing_redirect_location.must_raise Unwind::MissingRedirectLocation
102
- end
103
- end
104
-
105
- it 'should handle a meta-refresh' do
106
- VCR.use_cassette('meta refresh') do
107
- follower = Unwind::RedirectFollower.resolve('http://www.nullrefer.com/?www.google.com')
108
- assert follower.redirected?
109
- assert_equal "www.google.com.au", URI(follower.final_url).host
110
- end
111
- end
112
-
113
- it 'should handle a relative meta-refresh' do
114
- VCR.use_cassette('relative meta refresh') do
115
- follower = Unwind::RedirectFollower.resolve('http://fb.me/2JYu23acx')
116
- assert follower.redirected?
117
- assert_equal 'https://www.facebook.com/londonswf/posts/696389650411604?_fb_noscript=1', follower.final_url
118
- end
119
- end
120
-
121
- it 'should handle a relative canonical url' do
122
- VCR.use_cassette('relative canonical url') do
123
- follower = Unwind::RedirectFollower.resolve('http://youtu.be/hPJ0oLahGDg')
124
- assert follower.redirected?
125
- assert_equal 'http://www.youtube.com/watch?v=hPJ0oLahGDg', follower.final_url
126
- end
127
- end
10
+ describe 'Tests :)' do
11
+
12
+ it 'should resolve the url' do
13
+ VCR.use_cassette('xZVND1') do
14
+ follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1')
15
+ follower.resolve
16
+ assert_equal 'http://ow.ly/i/s1O0', follower.final_url
17
+ assert_equal 'http://j.mp/xZVND1', follower.original_url
18
+ assert_equal 2, follower.redirects.count
19
+ end
20
+ end
21
+
22
+ it 'should raise TooManyRedirects' do
23
+ VCR.use_cassette('xZVND1') do
24
+ follower = Unwind::RedirectFollower.new('http://j.mp/xZVND1', 1)
25
+ too_many_redirects = lambda {follower.resolve}
26
+ too_many_redirects.must_raise Unwind::TooManyRedirects
27
+ end
28
+ end
128
29
 
129
30
  end
data/unwind.gemspec CHANGED
@@ -7,8 +7,7 @@ Gem::Specification.new do |s|
7
7
  s.version = Unwind::VERSION
8
8
  s.authors = ["Scott Watermasysk"]
9
9
  s.email = ["scottwater@gmail.com"]
10
- s.homepage = "http://www.scottw.com/unwind"
11
- s.licenses = ['MIT']
10
+ s.homepage = ""
12
11
  s.summary = %q{Follows a chain redirects.}
13
12
  s.description = <<-description
14
13
  Follows a chain of redirects and reports back on all the steps.
@@ -24,9 +23,8 @@ Gem::Specification.new do |s|
24
23
  s.require_paths = ["lib"]
25
24
 
26
25
  # specify any dependencies here; for example:
27
- s.add_development_dependency 'rake', '~> 10.1'
28
- s.add_development_dependency 'minitest', '~> 5.2'
29
- s.add_development_dependency 'vcr', '~> 2.8'
30
- s.add_development_dependency 'webmock', '~> 1.17'
31
- s.add_runtime_dependency 'addressable', '~> 2.2'
26
+ s.add_development_dependency "rake"
27
+ s.add_development_dependency "vcr"
28
+ s.add_development_dependency "fakeweb"
29
+ # s.add_runtime_dependency "rest-client"
32
30
  end