url_canonicalize 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3dc466b71c5b337e8b92565b92356360e03b4292
4
- data.tar.gz: c45bcc67c37dccf7d6d63eac37fa465b308990e3
3
+ metadata.gz: ad637f9ccbf0f81bdaf1d3d240c633d9b588c23d
4
+ data.tar.gz: 018fb65d2c324e8fd85e0a48016713de1585ea21
5
5
  SHA512:
6
- metadata.gz: 518f8de7767b41a1898988a54ef4f27e3a296131a1961375f47fff5d0d3d1e24129c2645353d8bd997de1420c66af5b8bd2171d17242f59f5aa1c2a20b8be50b
7
- data.tar.gz: da8f99769e69a3603a7665fe29b5d6b82d16e255cb7734b56f347aba3af159dc6a6a040ffa66775f7a3c8e297e2a5162182e773af488d5d1f5c59cbc7233d878
6
+ metadata.gz: 0f6174381673a8b8e6ff050956d6804fe4d6d5011a590ad5d1aeaf6b147ddb0f2064f98372e4a70d79418b3eeae483d154d522ae964beafce4dd90347a4bf589
7
+ data.tar.gz: 93cef8c77e155af01698830a9fbfb905d6e0db9182a8f603af0e2bbe760145d584545af16372e5c3a46097d35db3246eaa0c764fbae3296333db048b8e4f0dba
@@ -20,7 +20,7 @@ module URLCanonicalize
20
20
 
21
21
  private
22
22
 
23
- attr_reader :last_known_good
23
+ attr_accessor :last_known_good
24
24
 
25
25
  def initialize(raw_url)
26
26
  @raw_url = raw_url
@@ -55,6 +55,8 @@ module URLCanonicalize
55
55
  end
56
56
 
57
57
  def redirect_loop_detected?
58
+ puts 'Redirect' # debug
59
+
58
60
  if redirect_list.include?(response.url)
59
61
  return true if last_known_good
60
62
  raise URLCanonicalize::Exception::Redirect, 'Redirect loop detected'
@@ -85,7 +87,9 @@ module URLCanonicalize
85
87
  end
86
88
 
87
89
  def handle_canonical_found
88
- @last_known_good = response.response
90
+ puts 'Canonical found' # debug
91
+
92
+ self.last_known_good = response.response
89
93
  return true if response.url == url || redirect_list.include?(response.url)
90
94
  set_url_from_response
91
95
  false
@@ -105,7 +109,9 @@ module URLCanonicalize
105
109
  end
106
110
 
107
111
  def handle_success
108
- @last_known_good = response
112
+ puts 'Success' # debug
113
+
114
+ self.last_known_good = response
109
115
  true
110
116
  end
111
117
 
@@ -23,6 +23,8 @@ module URLCanonicalize
23
23
  end
24
24
 
25
25
  def handle_response
26
+ log_response
27
+
26
28
  case response
27
29
  when Net::HTTPSuccess
28
30
  handle_success
@@ -52,8 +54,11 @@ module URLCanonicalize
52
54
  self.http_method = :get
53
55
  handle_success
54
56
  else
55
- location = relative_to_absolute(response['location'])
56
- URLCanonicalize::Response::Redirect.new(location)
57
+ if location
58
+ URLCanonicalize::Response::Redirect.new(location)
59
+ else
60
+ URLCanonicalize::Response::Failure.new(::URI::InvalidURIError, response['location'])
61
+ end
57
62
  end
58
63
  end
59
64
 
@@ -63,6 +68,7 @@ module URLCanonicalize
63
68
 
64
69
  def enhanced_response
65
70
  if canonical_url
71
+ puts " * canonical_url:\t#{canonical_url}" if ENV['DEBUG']
66
72
  response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
67
73
  URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
68
74
  else
@@ -98,6 +104,10 @@ module URLCanonicalize
98
104
  @host ||= uri.host
99
105
  end
100
106
 
107
+ def location
108
+ @location ||= relative_to_absolute(response['location'])
109
+ end
110
+
101
111
  def request_for_method
102
112
  r = base_request
103
113
  headers.each { |header_key, header_value| r[header_key] = header_value }
@@ -147,10 +157,16 @@ module URLCanonicalize
147
157
  if partial_uri.host
148
158
  partial_url # It's already absolute
149
159
  else
150
- base_uri = uri.dup || ::URI.parse(url)
151
- base_uri.path = partial_url
152
- base_uri.to_s
160
+ ::URI.join((uri || url), partial_url).to_s
153
161
  end
162
+ rescue ::URI::InvalidURIError
163
+ nil
164
+ end
165
+
166
+ def log_response
167
+ puts "#{http_method.upcase} #{url} #{response.code} #{response.message}" if ENV['DEBUG']
168
+ return unless ENV['DEBUG'].casecmp('headers')
169
+ response.each { |k, v| puts " #{k}:\t#{v}"}
154
170
  end
155
171
 
156
172
  NETWORK_EXCEPTIONS = [
@@ -165,7 +181,9 @@ module URLCanonicalize
165
181
  Net::ReadTimeout,
166
182
  OpenSSL::SSL::SSLError,
167
183
  SocketError,
168
- Timeout::Error
184
+ Timeout::Error,
185
+ Zlib::BufError,
186
+ Zlib::DataError
169
187
  ].freeze
170
188
  end
171
189
  end
@@ -1,3 +1,3 @@
1
1
  module URLCanonicalize
2
- VERSION = '0.1.0'.freeze
2
+ VERSION = '0.1.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_canonicalize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dominic Sayers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-20 00:00:00.000000000 Z
11
+ date: 2016-10-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable