url_canonicalize 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/url_canonicalize/http.rb +9 -3
- data/lib/url_canonicalize/request.rb +24 -6
- data/lib/url_canonicalize/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad637f9ccbf0f81bdaf1d3d240c633d9b588c23d
|
4
|
+
data.tar.gz: 018fb65d2c324e8fd85e0a48016713de1585ea21
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f6174381673a8b8e6ff050956d6804fe4d6d5011a590ad5d1aeaf6b147ddb0f2064f98372e4a70d79418b3eeae483d154d522ae964beafce4dd90347a4bf589
|
7
|
+
data.tar.gz: 93cef8c77e155af01698830a9fbfb905d6e0db9182a8f603af0e2bbe760145d584545af16372e5c3a46097d35db3246eaa0c764fbae3296333db048b8e4f0dba
|
@@ -20,7 +20,7 @@ module URLCanonicalize
|
|
20
20
|
|
21
21
|
private
|
22
22
|
|
23
|
-
|
23
|
+
attr_accessor :last_known_good
|
24
24
|
|
25
25
|
def initialize(raw_url)
|
26
26
|
@raw_url = raw_url
|
@@ -55,6 +55,8 @@ module URLCanonicalize
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def redirect_loop_detected?
|
58
|
+
puts 'Redirect' # debug
|
59
|
+
|
58
60
|
if redirect_list.include?(response.url)
|
59
61
|
return true if last_known_good
|
60
62
|
raise URLCanonicalize::Exception::Redirect, 'Redirect loop detected'
|
@@ -85,7 +87,9 @@ module URLCanonicalize
|
|
85
87
|
end
|
86
88
|
|
87
89
|
def handle_canonical_found
|
88
|
-
|
90
|
+
puts 'Canonical found' # debug
|
91
|
+
|
92
|
+
self.last_known_good = response.response
|
89
93
|
return true if response.url == url || redirect_list.include?(response.url)
|
90
94
|
set_url_from_response
|
91
95
|
false
|
@@ -105,7 +109,9 @@ module URLCanonicalize
|
|
105
109
|
end
|
106
110
|
|
107
111
|
def handle_success
|
108
|
-
|
112
|
+
puts 'Success' # debug
|
113
|
+
|
114
|
+
self.last_known_good = response
|
109
115
|
true
|
110
116
|
end
|
111
117
|
|
@@ -23,6 +23,8 @@ module URLCanonicalize
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def handle_response
|
26
|
+
log_response
|
27
|
+
|
26
28
|
case response
|
27
29
|
when Net::HTTPSuccess
|
28
30
|
handle_success
|
@@ -52,8 +54,11 @@ module URLCanonicalize
|
|
52
54
|
self.http_method = :get
|
53
55
|
handle_success
|
54
56
|
else
|
55
|
-
|
56
|
-
|
57
|
+
if location
|
58
|
+
URLCanonicalize::Response::Redirect.new(location)
|
59
|
+
else
|
60
|
+
URLCanonicalize::Response::Failure.new(::URI::InvalidURIError, response['location'])
|
61
|
+
end
|
57
62
|
end
|
58
63
|
end
|
59
64
|
|
@@ -63,6 +68,7 @@ module URLCanonicalize
|
|
63
68
|
|
64
69
|
def enhanced_response
|
65
70
|
if canonical_url
|
71
|
+
puts " * canonical_url:\t#{canonical_url}" if ENV['DEBUG']
|
66
72
|
response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
|
67
73
|
URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
|
68
74
|
else
|
@@ -98,6 +104,10 @@ module URLCanonicalize
|
|
98
104
|
@host ||= uri.host
|
99
105
|
end
|
100
106
|
|
107
|
+
def location
|
108
|
+
@location ||= relative_to_absolute(response['location'])
|
109
|
+
end
|
110
|
+
|
101
111
|
def request_for_method
|
102
112
|
r = base_request
|
103
113
|
headers.each { |header_key, header_value| r[header_key] = header_value }
|
@@ -147,10 +157,16 @@ module URLCanonicalize
|
|
147
157
|
if partial_uri.host
|
148
158
|
partial_url # It's already absolute
|
149
159
|
else
|
150
|
-
|
151
|
-
base_uri.path = partial_url
|
152
|
-
base_uri.to_s
|
160
|
+
::URI.join((uri || url), partial_url).to_s
|
153
161
|
end
|
162
|
+
rescue ::URI::InvalidURIError
|
163
|
+
nil
|
164
|
+
end
|
165
|
+
|
166
|
+
def log_response
|
167
|
+
puts "#{http_method.upcase} #{url} #{response.code} #{response.message}" if ENV['DEBUG']
|
168
|
+
return unless ENV['DEBUG'].casecmp('headers')
|
169
|
+
response.each { |k, v| puts " #{k}:\t#{v}"}
|
154
170
|
end
|
155
171
|
|
156
172
|
NETWORK_EXCEPTIONS = [
|
@@ -165,7 +181,9 @@ module URLCanonicalize
|
|
165
181
|
Net::ReadTimeout,
|
166
182
|
OpenSSL::SSL::SSLError,
|
167
183
|
SocketError,
|
168
|
-
Timeout::Error
|
184
|
+
Timeout::Error,
|
185
|
+
Zlib::BufError,
|
186
|
+
Zlib::DataError
|
169
187
|
].freeze
|
170
188
|
end
|
171
189
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_canonicalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dominic Sayers
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|