url_canonicalize 0.1.9 → 0.1.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.codeclimate.yml +25 -0
- data/.travis.yml +6 -0
- data/README.md +3 -1
- data/lib/url_canonicalize/exception.rb +3 -2
- data/lib/url_canonicalize/http.rb +19 -6
- data/lib/url_canonicalize/request.rb +26 -8
- data/lib/url_canonicalize/uri.rb +9 -2
- data/lib/url_canonicalize/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 68f190eb361435b21097c512aaf3abcbca61e6f6
|
4
|
+
data.tar.gz: e3249038d7bbade9c294a940479a5586b23771ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ce00418601cdfcc99841836790d0ac81e4e1e542b811d4e07615674c57687a7f3a8b0c2b1dcfdea1f65596d43674105d2b5dbf40c609a0892e550e764b65d9f
|
7
|
+
data.tar.gz: d76b899fcf9989c6a05a39f4f6a950905f7fb003dc43037c8c0c66809fdf081d73ffdbe3247b76dd37ce0772e582748a3cee6573c506ab91bb6cd324acad720b
|
data/.codeclimate.yml
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
---
|
2
|
+
engines:
|
3
|
+
duplication:
|
4
|
+
enabled: true
|
5
|
+
config:
|
6
|
+
languages:
|
7
|
+
- ruby
|
8
|
+
- javascript
|
9
|
+
- python
|
10
|
+
- php
|
11
|
+
fixme:
|
12
|
+
enabled: true
|
13
|
+
rubocop:
|
14
|
+
enabled: true
|
15
|
+
ratings:
|
16
|
+
paths:
|
17
|
+
- "**.inc"
|
18
|
+
- "**.js"
|
19
|
+
- "**.jsx"
|
20
|
+
- "**.module"
|
21
|
+
- "**.php"
|
22
|
+
- "**.py"
|
23
|
+
- "**.rb"
|
24
|
+
exclude_paths:
|
25
|
+
- spec/
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
[![Gem downloads](https://img.shields.io/gem/dt/url_canonicalize.svg)](https://rubygems.org/gems/url_canonicalize)
|
4
4
|
[![Build status](https://img.shields.io/circleci/project/dominicsayers/url_canonicalize/master.svg)](https://circleci.com/gh/dominicsayers/url_canonicalize)
|
5
5
|
[![Code quality](http://img.shields.io/codeclimate/github/dominicsayers/url_canonicalize.svg?style=flat)](https://codeclimate.com/github/dominicsayers/url_canonicalize)
|
6
|
-
[![Coverage Status](https://coveralls.io/repos/github/
|
6
|
+
[![Coverage Status](https://coveralls.io/repos/github/dominicsayers/url_canonicalize/badge.svg?branch=master)](https://coveralls.io/github/dominicsayers/url_canonicalize?branch=master)
|
7
7
|
[![Dependency Status](https://dependencyci.com/github/dominicsayers/url_canonicalize/badge)](https://dependencyci.com/github/dominicsayers/url_canonicalize)
|
8
8
|
[![Security](https://hakiri.io/github/dominicsayers/url_canonicalize/master.svg)](https://hakiri.io/github/dominicsayers/url_canonicalize/master)
|
9
9
|
|
@@ -23,7 +23,9 @@ gem 'url_canonicalize'
|
|
23
23
|
|
24
24
|
```ruby
|
25
25
|
'http://www.twitter.com'.canonicalize # => 'https://twitter.com/'
|
26
|
+
|
26
27
|
URI('http://www.twitter.com').canonicalize # => #<URI::HTTP:0x00000008767908 URL:https://twitter.com/>
|
28
|
+
|
27
29
|
Addressable::URI.canonicalize('http://www.twitter.com') # => #<Addressable::URI:0x43c9 URI:https://twitter.com/>
|
28
30
|
```
|
29
31
|
|
@@ -3,8 +3,9 @@
|
|
3
3
|
module URLCanonicalize
|
4
4
|
# Local exception classes to make handling exceptions easier
|
5
5
|
class Exception < RuntimeError
|
6
|
-
URI = Class.new(self)
|
7
|
-
Redirect = Class.new(self)
|
8
6
|
Failure = Class.new(self)
|
7
|
+
Redirect = Class.new(self)
|
8
|
+
Request = Class.new(self)
|
9
|
+
URI = Class.new(self)
|
9
10
|
end
|
10
11
|
end
|
@@ -15,7 +15,7 @@ module URLCanonicalize
|
|
15
15
|
@uri = nil
|
16
16
|
end
|
17
17
|
|
18
|
-
def
|
18
|
+
def do_request(http_request)
|
19
19
|
http.request http_request
|
20
20
|
end
|
21
21
|
|
@@ -29,13 +29,26 @@ module URLCanonicalize
|
|
29
29
|
|
30
30
|
# Fetch the response
|
31
31
|
def response
|
32
|
-
@response ||=
|
32
|
+
@response ||= fetch_response
|
33
|
+
end
|
34
|
+
|
35
|
+
def response_url
|
36
|
+
@response_url ||= response.url
|
37
|
+
end
|
38
|
+
|
39
|
+
def request
|
40
|
+
@request ||= Request.new(self)
|
41
|
+
end
|
42
|
+
|
43
|
+
def fetch_response
|
44
|
+
request.with_uri(uri).fetch
|
33
45
|
end
|
34
46
|
|
35
47
|
# Parse the response, and clear the response ready to follow the next redirect
|
36
48
|
def handle_response
|
37
49
|
result = parse_response
|
38
50
|
@response = nil
|
51
|
+
@response_url = nil
|
39
52
|
result
|
40
53
|
end
|
41
54
|
|
@@ -56,12 +69,12 @@ module URLCanonicalize
|
|
56
69
|
end
|
57
70
|
|
58
71
|
def redirect_loop_detected?
|
59
|
-
if redirect_list.include?(
|
72
|
+
if redirect_list.include?(response_url)
|
60
73
|
return true if last_known_good
|
61
74
|
raise URLCanonicalize::Exception::Redirect, 'Redirect loop detected'
|
62
75
|
end
|
63
76
|
|
64
|
-
redirect_list <<
|
77
|
+
redirect_list << response_url
|
65
78
|
increment_redirects
|
66
79
|
set_url_from_response
|
67
80
|
false
|
@@ -87,13 +100,13 @@ module URLCanonicalize
|
|
87
100
|
|
88
101
|
def handle_canonical_found
|
89
102
|
self.last_known_good = response.response
|
90
|
-
return true if
|
103
|
+
return true if response_url == url || redirect_list.include?(response_url)
|
91
104
|
set_url_from_response
|
92
105
|
false
|
93
106
|
end
|
94
107
|
|
95
108
|
def set_url_from_response
|
96
|
-
self.url =
|
109
|
+
self.url = response_url
|
97
110
|
end
|
98
111
|
|
99
112
|
def handle_failure
|
@@ -6,6 +6,22 @@ module URLCanonicalize
|
|
6
6
|
handle_response
|
7
7
|
end
|
8
8
|
|
9
|
+
def location
|
10
|
+
@location ||= relative_to_absolute(response['location'])
|
11
|
+
end
|
12
|
+
|
13
|
+
def with_uri(uri)
|
14
|
+
@uri = uri
|
15
|
+
|
16
|
+
@url = nil
|
17
|
+
@host = nil
|
18
|
+
@response = nil
|
19
|
+
@location = nil
|
20
|
+
@html = nil
|
21
|
+
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
9
25
|
private
|
10
26
|
|
11
27
|
attr_reader :http, :http_method
|
@@ -16,7 +32,12 @@ module URLCanonicalize
|
|
16
32
|
end
|
17
33
|
|
18
34
|
def response
|
19
|
-
@response ||=
|
35
|
+
@response ||= do_http_request
|
36
|
+
end
|
37
|
+
|
38
|
+
# We can stub this method in testing then call #response any number of times
|
39
|
+
def do_http_request #:nodoc: internal use only
|
40
|
+
http.do_request request # Some URLs can throw an exception here
|
20
41
|
end
|
21
42
|
|
22
43
|
def request
|
@@ -51,10 +72,10 @@ module URLCanonicalize
|
|
51
72
|
|
52
73
|
def handle_redirection
|
53
74
|
case response
|
54
|
-
when Net::HTTPFound, Net::HTTPMovedTemporarily, Net::HTTPTemporaryRedirect
|
75
|
+
when Net::HTTPFound, Net::HTTPMovedTemporarily, Net::HTTPTemporaryRedirect # Temporary redirection
|
55
76
|
self.http_method = :get
|
56
77
|
handle_success
|
57
|
-
else
|
78
|
+
else # Permanent redirection
|
58
79
|
if location
|
59
80
|
URLCanonicalize::Response::Redirect.new(location)
|
60
81
|
else
|
@@ -105,10 +126,6 @@ module URLCanonicalize
|
|
105
126
|
@host ||= uri.host
|
106
127
|
end
|
107
128
|
|
108
|
-
def location
|
109
|
-
@location ||= relative_to_absolute(response['location'])
|
110
|
-
end
|
111
|
-
|
112
129
|
def request_for_method
|
113
130
|
r = base_request
|
114
131
|
headers.each { |header_key, header_value| r[header_key] = header_value }
|
@@ -124,7 +141,7 @@ module URLCanonicalize
|
|
124
141
|
when :get
|
125
142
|
Net::HTTP::Get.new uri
|
126
143
|
else
|
127
|
-
raise URLCanonicalize::Exception::Request, "Unknown method: #{
|
144
|
+
raise URLCanonicalize::Exception::Request, "Unknown method: #{http_method}"
|
128
145
|
end
|
129
146
|
end
|
130
147
|
|
@@ -141,6 +158,7 @@ module URLCanonicalize
|
|
141
158
|
@http_method = value
|
142
159
|
@request = nil
|
143
160
|
@response = nil
|
161
|
+
@location = nil
|
144
162
|
@html = nil
|
145
163
|
end
|
146
164
|
|
data/lib/url_canonicalize/uri.rb
CHANGED
@@ -4,8 +4,9 @@ module URLCanonicalize
|
|
4
4
|
class URI
|
5
5
|
class << self
|
6
6
|
def parse(url)
|
7
|
-
uri = ::URI.parse(url)
|
8
|
-
uri
|
7
|
+
# uri = ::URI.parse decorate(url)
|
8
|
+
uri = ::URI.parse url
|
9
|
+
uri if valid? uri
|
9
10
|
rescue ::URI::InvalidURIError => e
|
10
11
|
new_exception = URLCanonicalize::Exception::URI.new("#{e.class}: #{e.message}")
|
11
12
|
new_exception.set_backtrace e.backtrace
|
@@ -20,7 +21,13 @@ module URLCanonicalize
|
|
20
21
|
true
|
21
22
|
end
|
22
23
|
|
24
|
+
def decorate(url)
|
25
|
+
return url if url.include? COLON
|
26
|
+
"http://#{url}" # Add protocol if we just receive a host name
|
27
|
+
end
|
28
|
+
|
23
29
|
VALID_CLASSES = [::URI::HTTP, ::URI::HTTPS].freeze
|
30
|
+
COLON = ':'
|
24
31
|
end
|
25
32
|
end
|
26
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_canonicalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dominic Sayers
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -46,11 +46,13 @@ executables: []
|
|
46
46
|
extensions: []
|
47
47
|
extra_rdoc_files: []
|
48
48
|
files:
|
49
|
+
- ".codeclimate.yml"
|
49
50
|
- ".gitignore"
|
50
51
|
- ".hound.yml"
|
51
52
|
- ".rspec"
|
52
53
|
- ".rubocop.yml"
|
53
54
|
- ".ruby-gemset"
|
55
|
+
- ".travis.yml"
|
54
56
|
- Gemfile
|
55
57
|
- Gemfile.local.example
|
56
58
|
- Guardfile
|