url_canonicalize 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/url_canonicalize/http.rb +9 -3
- data/lib/url_canonicalize/request.rb +24 -6
- data/lib/url_canonicalize/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad637f9ccbf0f81bdaf1d3d240c633d9b588c23d
|
4
|
+
data.tar.gz: 018fb65d2c324e8fd85e0a48016713de1585ea21
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f6174381673a8b8e6ff050956d6804fe4d6d5011a590ad5d1aeaf6b147ddb0f2064f98372e4a70d79418b3eeae483d154d522ae964beafce4dd90347a4bf589
|
7
|
+
data.tar.gz: 93cef8c77e155af01698830a9fbfb905d6e0db9182a8f603af0e2bbe760145d584545af16372e5c3a46097d35db3246eaa0c764fbae3296333db048b8e4f0dba
|
@@ -20,7 +20,7 @@ module URLCanonicalize
|
|
20
20
|
|
21
21
|
private
|
22
22
|
|
23
|
-
|
23
|
+
attr_accessor :last_known_good
|
24
24
|
|
25
25
|
def initialize(raw_url)
|
26
26
|
@raw_url = raw_url
|
@@ -55,6 +55,8 @@ module URLCanonicalize
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def redirect_loop_detected?
|
58
|
+
puts 'Redirect' # debug
|
59
|
+
|
58
60
|
if redirect_list.include?(response.url)
|
59
61
|
return true if last_known_good
|
60
62
|
raise URLCanonicalize::Exception::Redirect, 'Redirect loop detected'
|
@@ -85,7 +87,9 @@ module URLCanonicalize
|
|
85
87
|
end
|
86
88
|
|
87
89
|
def handle_canonical_found
|
88
|
-
|
90
|
+
puts 'Canonical found' # debug
|
91
|
+
|
92
|
+
self.last_known_good = response.response
|
89
93
|
return true if response.url == url || redirect_list.include?(response.url)
|
90
94
|
set_url_from_response
|
91
95
|
false
|
@@ -105,7 +109,9 @@ module URLCanonicalize
|
|
105
109
|
end
|
106
110
|
|
107
111
|
def handle_success
|
108
|
-
|
112
|
+
puts 'Success' # debug
|
113
|
+
|
114
|
+
self.last_known_good = response
|
109
115
|
true
|
110
116
|
end
|
111
117
|
|
@@ -23,6 +23,8 @@ module URLCanonicalize
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def handle_response
|
26
|
+
log_response
|
27
|
+
|
26
28
|
case response
|
27
29
|
when Net::HTTPSuccess
|
28
30
|
handle_success
|
@@ -52,8 +54,11 @@ module URLCanonicalize
|
|
52
54
|
self.http_method = :get
|
53
55
|
handle_success
|
54
56
|
else
|
55
|
-
|
56
|
-
|
57
|
+
if location
|
58
|
+
URLCanonicalize::Response::Redirect.new(location)
|
59
|
+
else
|
60
|
+
URLCanonicalize::Response::Failure.new(::URI::InvalidURIError, response['location'])
|
61
|
+
end
|
57
62
|
end
|
58
63
|
end
|
59
64
|
|
@@ -63,6 +68,7 @@ module URLCanonicalize
|
|
63
68
|
|
64
69
|
def enhanced_response
|
65
70
|
if canonical_url
|
71
|
+
puts " * canonical_url:\t#{canonical_url}" if ENV['DEBUG']
|
66
72
|
response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
|
67
73
|
URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
|
68
74
|
else
|
@@ -98,6 +104,10 @@ module URLCanonicalize
|
|
98
104
|
@host ||= uri.host
|
99
105
|
end
|
100
106
|
|
107
|
+
def location
|
108
|
+
@location ||= relative_to_absolute(response['location'])
|
109
|
+
end
|
110
|
+
|
101
111
|
def request_for_method
|
102
112
|
r = base_request
|
103
113
|
headers.each { |header_key, header_value| r[header_key] = header_value }
|
@@ -147,10 +157,16 @@ module URLCanonicalize
|
|
147
157
|
if partial_uri.host
|
148
158
|
partial_url # It's already absolute
|
149
159
|
else
|
150
|
-
|
151
|
-
base_uri.path = partial_url
|
152
|
-
base_uri.to_s
|
160
|
+
::URI.join((uri || url), partial_url).to_s
|
153
161
|
end
|
162
|
+
rescue ::URI::InvalidURIError
|
163
|
+
nil
|
164
|
+
end
|
165
|
+
|
166
|
+
def log_response
|
167
|
+
puts "#{http_method.upcase} #{url} #{response.code} #{response.message}" if ENV['DEBUG']
|
168
|
+
return unless ENV['DEBUG'].casecmp('headers')
|
169
|
+
response.each { |k, v| puts " #{k}:\t#{v}"}
|
154
170
|
end
|
155
171
|
|
156
172
|
NETWORK_EXCEPTIONS = [
|
@@ -165,7 +181,9 @@ module URLCanonicalize
|
|
165
181
|
Net::ReadTimeout,
|
166
182
|
OpenSSL::SSL::SSLError,
|
167
183
|
SocketError,
|
168
|
-
Timeout::Error
|
184
|
+
Timeout::Error,
|
185
|
+
Zlib::BufError,
|
186
|
+
Zlib::DataError
|
169
187
|
].freeze
|
170
188
|
end
|
171
189
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_canonicalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dominic Sayers
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|