url_canonicalize 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.codeclimate.yml +25 -0
- data/.travis.yml +6 -0
- data/README.md +3 -1
- data/lib/url_canonicalize/exception.rb +3 -2
- data/lib/url_canonicalize/http.rb +19 -6
- data/lib/url_canonicalize/request.rb +26 -8
- data/lib/url_canonicalize/uri.rb +9 -2
- data/lib/url_canonicalize/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 68f190eb361435b21097c512aaf3abcbca61e6f6
|
4
|
+
data.tar.gz: e3249038d7bbade9c294a940479a5586b23771ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ce00418601cdfcc99841836790d0ac81e4e1e542b811d4e07615674c57687a7f3a8b0c2b1dcfdea1f65596d43674105d2b5dbf40c609a0892e550e764b65d9f
|
7
|
+
data.tar.gz: d76b899fcf9989c6a05a39f4f6a950905f7fb003dc43037c8c0c66809fdf081d73ffdbe3247b76dd37ce0772e582748a3cee6573c506ab91bb6cd324acad720b
|
data/.codeclimate.yml
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
---
|
2
|
+
engines:
|
3
|
+
duplication:
|
4
|
+
enabled: true
|
5
|
+
config:
|
6
|
+
languages:
|
7
|
+
- ruby
|
8
|
+
- javascript
|
9
|
+
- python
|
10
|
+
- php
|
11
|
+
fixme:
|
12
|
+
enabled: true
|
13
|
+
rubocop:
|
14
|
+
enabled: true
|
15
|
+
ratings:
|
16
|
+
paths:
|
17
|
+
- "**.inc"
|
18
|
+
- "**.js"
|
19
|
+
- "**.jsx"
|
20
|
+
- "**.module"
|
21
|
+
- "**.php"
|
22
|
+
- "**.py"
|
23
|
+
- "**.rb"
|
24
|
+
exclude_paths:
|
25
|
+
- spec/
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
[](https://rubygems.org/gems/url_canonicalize)
|
4
4
|
[](https://circleci.com/gh/dominicsayers/url_canonicalize)
|
5
5
|
[](https://codeclimate.com/github/dominicsayers/url_canonicalize)
|
6
|
-
[](https://coveralls.io/github/dominicsayers/url_canonicalize?branch=master)
|
7
7
|
[](https://dependencyci.com/github/dominicsayers/url_canonicalize)
|
8
8
|
[](https://hakiri.io/github/dominicsayers/url_canonicalize/master)
|
9
9
|
|
@@ -23,7 +23,9 @@ gem 'url_canonicalize'
|
|
23
23
|
|
24
24
|
```ruby
|
25
25
|
'http://www.twitter.com'.canonicalize # => 'https://twitter.com/'
|
26
|
+
|
26
27
|
URI('http://www.twitter.com').canonicalize # => #<URI::HTTP:0x00000008767908 URL:https://twitter.com/>
|
28
|
+
|
27
29
|
Addressable::URI.canonicalize('http://www.twitter.com') # => #<Addressable::URI:0x43c9 URI:https://twitter.com/>
|
28
30
|
```
|
29
31
|
|
@@ -3,8 +3,9 @@
|
|
3
3
|
module URLCanonicalize
|
4
4
|
# Local exception classes to make handling exceptions easier
|
5
5
|
class Exception < RuntimeError
|
6
|
-
URI = Class.new(self)
|
7
|
-
Redirect = Class.new(self)
|
8
6
|
Failure = Class.new(self)
|
7
|
+
Redirect = Class.new(self)
|
8
|
+
Request = Class.new(self)
|
9
|
+
URI = Class.new(self)
|
9
10
|
end
|
10
11
|
end
|
@@ -15,7 +15,7 @@ module URLCanonicalize
|
|
15
15
|
@uri = nil
|
16
16
|
end
|
17
17
|
|
18
|
-
def
|
18
|
+
def do_request(http_request)
|
19
19
|
http.request http_request
|
20
20
|
end
|
21
21
|
|
@@ -29,13 +29,26 @@ module URLCanonicalize
|
|
29
29
|
|
30
30
|
# Fetch the response
|
31
31
|
def response
|
32
|
-
@response ||=
|
32
|
+
@response ||= fetch_response
|
33
|
+
end
|
34
|
+
|
35
|
+
def response_url
|
36
|
+
@response_url ||= response.url
|
37
|
+
end
|
38
|
+
|
39
|
+
def request
|
40
|
+
@request ||= Request.new(self)
|
41
|
+
end
|
42
|
+
|
43
|
+
def fetch_response
|
44
|
+
request.with_uri(uri).fetch
|
33
45
|
end
|
34
46
|
|
35
47
|
# Parse the response, and clear the response ready to follow the next redirect
|
36
48
|
def handle_response
|
37
49
|
result = parse_response
|
38
50
|
@response = nil
|
51
|
+
@response_url = nil
|
39
52
|
result
|
40
53
|
end
|
41
54
|
|
@@ -56,12 +69,12 @@ module URLCanonicalize
|
|
56
69
|
end
|
57
70
|
|
58
71
|
def redirect_loop_detected?
|
59
|
-
if redirect_list.include?(
|
72
|
+
if redirect_list.include?(response_url)
|
60
73
|
return true if last_known_good
|
61
74
|
raise URLCanonicalize::Exception::Redirect, 'Redirect loop detected'
|
62
75
|
end
|
63
76
|
|
64
|
-
redirect_list <<
|
77
|
+
redirect_list << response_url
|
65
78
|
increment_redirects
|
66
79
|
set_url_from_response
|
67
80
|
false
|
@@ -87,13 +100,13 @@ module URLCanonicalize
|
|
87
100
|
|
88
101
|
def handle_canonical_found
|
89
102
|
self.last_known_good = response.response
|
90
|
-
return true if
|
103
|
+
return true if response_url == url || redirect_list.include?(response_url)
|
91
104
|
set_url_from_response
|
92
105
|
false
|
93
106
|
end
|
94
107
|
|
95
108
|
def set_url_from_response
|
96
|
-
self.url =
|
109
|
+
self.url = response_url
|
97
110
|
end
|
98
111
|
|
99
112
|
def handle_failure
|
@@ -6,6 +6,22 @@ module URLCanonicalize
|
|
6
6
|
handle_response
|
7
7
|
end
|
8
8
|
|
9
|
+
def location
|
10
|
+
@location ||= relative_to_absolute(response['location'])
|
11
|
+
end
|
12
|
+
|
13
|
+
def with_uri(uri)
|
14
|
+
@uri = uri
|
15
|
+
|
16
|
+
@url = nil
|
17
|
+
@host = nil
|
18
|
+
@response = nil
|
19
|
+
@location = nil
|
20
|
+
@html = nil
|
21
|
+
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
9
25
|
private
|
10
26
|
|
11
27
|
attr_reader :http, :http_method
|
@@ -16,7 +32,12 @@ module URLCanonicalize
|
|
16
32
|
end
|
17
33
|
|
18
34
|
def response
|
19
|
-
@response ||=
|
35
|
+
@response ||= do_http_request
|
36
|
+
end
|
37
|
+
|
38
|
+
# We can stub this method in testing then call #response any number of times
|
39
|
+
def do_http_request #:nodoc: internal use only
|
40
|
+
http.do_request request # Some URLs can throw an exception here
|
20
41
|
end
|
21
42
|
|
22
43
|
def request
|
@@ -51,10 +72,10 @@ module URLCanonicalize
|
|
51
72
|
|
52
73
|
def handle_redirection
|
53
74
|
case response
|
54
|
-
when Net::HTTPFound, Net::HTTPMovedTemporarily, Net::HTTPTemporaryRedirect
|
75
|
+
when Net::HTTPFound, Net::HTTPMovedTemporarily, Net::HTTPTemporaryRedirect # Temporary redirection
|
55
76
|
self.http_method = :get
|
56
77
|
handle_success
|
57
|
-
else
|
78
|
+
else # Permanent redirection
|
58
79
|
if location
|
59
80
|
URLCanonicalize::Response::Redirect.new(location)
|
60
81
|
else
|
@@ -105,10 +126,6 @@ module URLCanonicalize
|
|
105
126
|
@host ||= uri.host
|
106
127
|
end
|
107
128
|
|
108
|
-
def location
|
109
|
-
@location ||= relative_to_absolute(response['location'])
|
110
|
-
end
|
111
|
-
|
112
129
|
def request_for_method
|
113
130
|
r = base_request
|
114
131
|
headers.each { |header_key, header_value| r[header_key] = header_value }
|
@@ -124,7 +141,7 @@ module URLCanonicalize
|
|
124
141
|
when :get
|
125
142
|
Net::HTTP::Get.new uri
|
126
143
|
else
|
127
|
-
raise URLCanonicalize::Exception::Request, "Unknown method: #{
|
144
|
+
raise URLCanonicalize::Exception::Request, "Unknown method: #{http_method}"
|
128
145
|
end
|
129
146
|
end
|
130
147
|
|
@@ -141,6 +158,7 @@ module URLCanonicalize
|
|
141
158
|
@http_method = value
|
142
159
|
@request = nil
|
143
160
|
@response = nil
|
161
|
+
@location = nil
|
144
162
|
@html = nil
|
145
163
|
end
|
146
164
|
|
data/lib/url_canonicalize/uri.rb
CHANGED
@@ -4,8 +4,9 @@ module URLCanonicalize
|
|
4
4
|
class URI
|
5
5
|
class << self
|
6
6
|
def parse(url)
|
7
|
-
uri = ::URI.parse(url)
|
8
|
-
uri
|
7
|
+
# uri = ::URI.parse decorate(url)
|
8
|
+
uri = ::URI.parse url
|
9
|
+
uri if valid? uri
|
9
10
|
rescue ::URI::InvalidURIError => e
|
10
11
|
new_exception = URLCanonicalize::Exception::URI.new("#{e.class}: #{e.message}")
|
11
12
|
new_exception.set_backtrace e.backtrace
|
@@ -20,7 +21,13 @@ module URLCanonicalize
|
|
20
21
|
true
|
21
22
|
end
|
22
23
|
|
24
|
+
def decorate(url)
|
25
|
+
return url if url.include? COLON
|
26
|
+
"http://#{url}" # Add protocol if we just receive a host name
|
27
|
+
end
|
28
|
+
|
23
29
|
VALID_CLASSES = [::URI::HTTP, ::URI::HTTPS].freeze
|
30
|
+
COLON = ':'
|
24
31
|
end
|
25
32
|
end
|
26
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_canonicalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dominic Sayers
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -46,11 +46,13 @@ executables: []
|
|
46
46
|
extensions: []
|
47
47
|
extra_rdoc_files: []
|
48
48
|
files:
|
49
|
+
- ".codeclimate.yml"
|
49
50
|
- ".gitignore"
|
50
51
|
- ".hound.yml"
|
51
52
|
- ".rspec"
|
52
53
|
- ".rubocop.yml"
|
53
54
|
- ".ruby-gemset"
|
55
|
+
- ".travis.yml"
|
54
56
|
- Gemfile
|
55
57
|
- Gemfile.local.example
|
56
58
|
- Guardfile
|