url_canonicalize 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/url_canonicalize.rb +1 -1
- data/lib/url_canonicalize/http.rb +3 -3
- data/lib/url_canonicalize/request.rb +16 -7
- data/lib/url_canonicalize/response.rb +15 -1
- data/lib/url_canonicalize/version.rb +1 -1
- metadata +1 -2
- data/Gemfile.lock +0 -131
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 009ed3bccd9e756ada7d68450e7b78fa3e0a673d
|
4
|
+
data.tar.gz: 33ff979501b4e5583a6a782e926f46776ea1a3e3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9d793abae4f70a91ff1f5fec5c79968078b0483e0045048a82fe5b1e2caac3962b7ea6c842d9daec5d59188ec2ded912160a518a2d0e3adc2d60070ca53e091
|
7
|
+
data.tar.gz: 928c6b774474831d33df54f4d214651e3fe7f65a93984d7dc1363ae274e888894f0a1d8dab6da51783e548a7bf3adf6bac9a6768ebeda184cb17306928d3f356
|
data/.gitignore
CHANGED
data/lib/url_canonicalize.rb
CHANGED
@@ -14,8 +14,8 @@ module URLCanonicalize
|
|
14
14
|
@uri = nil
|
15
15
|
end
|
16
16
|
|
17
|
-
def request(
|
18
|
-
http.request
|
17
|
+
def request(http_request)
|
18
|
+
http.request http_request
|
19
19
|
end
|
20
20
|
|
21
21
|
private
|
@@ -41,7 +41,7 @@ module URLCanonicalize
|
|
41
41
|
# Parse the response
|
42
42
|
def parse_response
|
43
43
|
case response
|
44
|
-
when
|
44
|
+
when URLCanonicalize::Response::Success
|
45
45
|
handle_success
|
46
46
|
when URLCanonicalize::Response::Redirect
|
47
47
|
redirect_loop_detected? || max_redirects_reached?
|
@@ -25,7 +25,7 @@ module URLCanonicalize
|
|
25
25
|
def handle_response
|
26
26
|
case response
|
27
27
|
when Net::HTTPSuccess
|
28
|
-
|
28
|
+
handle_success
|
29
29
|
when Net::HTTPRedirection
|
30
30
|
handle_redirection
|
31
31
|
else
|
@@ -35,15 +35,14 @@ module URLCanonicalize
|
|
35
35
|
handle_failure(e.class, e.message)
|
36
36
|
end
|
37
37
|
|
38
|
-
def
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
elsif http_method == :head
|
38
|
+
def handle_success
|
39
|
+
@canonical_url = $LAST_MATCH_INFO['url'] if response['link'] =~ /<(?<url>.+)>\s*;\s*rel="canonical"/i
|
40
|
+
|
41
|
+
if http_method == :head
|
43
42
|
self.http_method = :get
|
44
43
|
fetch
|
45
44
|
else
|
46
|
-
|
45
|
+
enhanced_response
|
47
46
|
end
|
48
47
|
end
|
49
48
|
|
@@ -61,6 +60,15 @@ module URLCanonicalize
|
|
61
60
|
URLCanonicalize::Response::Failure.new(klass, message)
|
62
61
|
end
|
63
62
|
|
63
|
+
def enhanced_response
|
64
|
+
if canonical_url
|
65
|
+
response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
|
66
|
+
URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
|
67
|
+
else
|
68
|
+
URLCanonicalize::Response::Success.new(url, response, html)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
64
72
|
def html
|
65
73
|
@html ||= Nokogiri::HTML response.body
|
66
74
|
end
|
@@ -117,6 +125,7 @@ module URLCanonicalize
|
|
117
125
|
@http_method = value
|
118
126
|
@request = nil
|
119
127
|
@response = nil
|
128
|
+
@html = nil
|
120
129
|
end
|
121
130
|
|
122
131
|
# Some sites treat HEAD requests as suspicious activity and block the
|
@@ -13,14 +13,28 @@ module URLCanonicalize
|
|
13
13
|
|
14
14
|
Redirect = Class.new(Generic)
|
15
15
|
|
16
|
+
# Add HTML to a successful response
|
17
|
+
class Success < Generic
|
18
|
+
attr_reader :response, :html
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def initialize(url, response, html)
|
23
|
+
@response = response
|
24
|
+
@html = html
|
25
|
+
super url
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# We found a canonical URL!
|
16
30
|
class CanonicalFound < Generic
|
17
31
|
attr_reader :response
|
18
32
|
|
19
33
|
private
|
20
34
|
|
21
35
|
def initialize(url, response)
|
22
|
-
@url = url
|
23
36
|
@response = response
|
37
|
+
super url
|
24
38
|
end
|
25
39
|
end
|
26
40
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_canonicalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dominic Sayers
|
@@ -54,7 +54,6 @@ files:
|
|
54
54
|
- ".ruby-version"
|
55
55
|
- Gemfile
|
56
56
|
- Gemfile.local.example
|
57
|
-
- Gemfile.lock
|
58
57
|
- Guardfile
|
59
58
|
- LICENSE
|
60
59
|
- README.md
|
data/Gemfile.lock
DELETED
@@ -1,131 +0,0 @@
|
|
1
|
-
PATH
|
2
|
-
remote: .
|
3
|
-
specs:
|
4
|
-
url_canonicalize (0.0.2)
|
5
|
-
addressable (~> 2)
|
6
|
-
nokogiri (~> 1)
|
7
|
-
|
8
|
-
GEM
|
9
|
-
remote: https://rubygems.org/
|
10
|
-
specs:
|
11
|
-
addressable (2.4.0)
|
12
|
-
ast (2.3.0)
|
13
|
-
builder (3.2.2)
|
14
|
-
coderay (1.1.1)
|
15
|
-
coveralls (0.8.15)
|
16
|
-
json (>= 1.8, < 3)
|
17
|
-
simplecov (~> 0.12.0)
|
18
|
-
term-ansicolor (~> 1.3)
|
19
|
-
thor (~> 0.19.1)
|
20
|
-
tins (>= 1.6.0, < 2)
|
21
|
-
crack (0.4.3)
|
22
|
-
safe_yaml (~> 1.0.0)
|
23
|
-
diff-lcs (1.2.5)
|
24
|
-
docile (1.1.5)
|
25
|
-
ffi (1.9.14)
|
26
|
-
formatador (0.2.5)
|
27
|
-
gem-release (0.7.4)
|
28
|
-
guard (2.14.0)
|
29
|
-
formatador (>= 0.2.4)
|
30
|
-
listen (>= 2.7, < 4.0)
|
31
|
-
lumberjack (~> 1.0)
|
32
|
-
nenv (~> 0.1)
|
33
|
-
notiffany (~> 0.0)
|
34
|
-
pry (>= 0.9.12)
|
35
|
-
shellany (~> 0.0)
|
36
|
-
thor (>= 0.18.1)
|
37
|
-
guard-compat (1.2.1)
|
38
|
-
guard-rspec (4.7.3)
|
39
|
-
guard (~> 2.1)
|
40
|
-
guard-compat (~> 1.1)
|
41
|
-
rspec (>= 2.99.0, < 4.0)
|
42
|
-
guard-rubocop (1.2.0)
|
43
|
-
guard (~> 2.0)
|
44
|
-
rubocop (~> 0.20)
|
45
|
-
hashdiff (0.3.0)
|
46
|
-
json (2.0.2)
|
47
|
-
listen (3.0.8)
|
48
|
-
rb-fsevent (~> 0.9, >= 0.9.4)
|
49
|
-
rb-inotify (~> 0.9, >= 0.9.7)
|
50
|
-
lumberjack (1.0.10)
|
51
|
-
method_source (0.8.2)
|
52
|
-
mini_portile2 (2.1.0)
|
53
|
-
nenv (0.3.0)
|
54
|
-
nokogiri (1.6.8.1)
|
55
|
-
mini_portile2 (~> 2.1.0)
|
56
|
-
notiffany (0.1.1)
|
57
|
-
nenv (~> 0.1)
|
58
|
-
shellany (~> 0.0)
|
59
|
-
parser (2.3.1.4)
|
60
|
-
ast (~> 2.2)
|
61
|
-
powerpack (0.1.1)
|
62
|
-
pry (0.10.4)
|
63
|
-
coderay (~> 1.1.0)
|
64
|
-
method_source (~> 0.8.1)
|
65
|
-
slop (~> 3.4)
|
66
|
-
rainbow (2.1.0)
|
67
|
-
rake (11.3.0)
|
68
|
-
rb-fsevent (0.9.7)
|
69
|
-
rb-inotify (0.9.7)
|
70
|
-
ffi (>= 0.5.0)
|
71
|
-
rspec (3.5.0)
|
72
|
-
rspec-core (~> 3.5.0)
|
73
|
-
rspec-expectations (~> 3.5.0)
|
74
|
-
rspec-mocks (~> 3.5.0)
|
75
|
-
rspec-core (3.5.4)
|
76
|
-
rspec-support (~> 3.5.0)
|
77
|
-
rspec-expectations (3.5.0)
|
78
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
79
|
-
rspec-support (~> 3.5.0)
|
80
|
-
rspec-mocks (3.5.0)
|
81
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
82
|
-
rspec-support (~> 3.5.0)
|
83
|
-
rspec-support (3.5.0)
|
84
|
-
rspec_junit_formatter (0.2.3)
|
85
|
-
builder (< 4)
|
86
|
-
rspec-core (>= 2, < 4, != 2.12.0)
|
87
|
-
rubocop (0.44.1)
|
88
|
-
parser (>= 2.3.1.1, < 3.0)
|
89
|
-
powerpack (~> 0.1)
|
90
|
-
rainbow (>= 1.99.1, < 3.0)
|
91
|
-
ruby-progressbar (~> 1.7)
|
92
|
-
unicode-display_width (~> 1.0, >= 1.0.1)
|
93
|
-
ruby-progressbar (1.8.1)
|
94
|
-
safe_yaml (1.0.4)
|
95
|
-
shellany (0.0.1)
|
96
|
-
simplecov (0.12.0)
|
97
|
-
docile (~> 1.1.0)
|
98
|
-
json (>= 1.8, < 3)
|
99
|
-
simplecov-html (~> 0.10.0)
|
100
|
-
simplecov-html (0.10.0)
|
101
|
-
slop (3.6.0)
|
102
|
-
term-ansicolor (1.4.0)
|
103
|
-
tins (~> 1.0)
|
104
|
-
thor (0.19.1)
|
105
|
-
tins (1.12.0)
|
106
|
-
unicode-display_width (1.1.1)
|
107
|
-
webmock (2.1.0)
|
108
|
-
addressable (>= 2.3.6)
|
109
|
-
crack (>= 0.3.2)
|
110
|
-
hashdiff
|
111
|
-
|
112
|
-
PLATFORMS
|
113
|
-
ruby
|
114
|
-
|
115
|
-
DEPENDENCIES
|
116
|
-
coveralls
|
117
|
-
gem-release
|
118
|
-
guard
|
119
|
-
guard-rspec
|
120
|
-
guard-rubocop
|
121
|
-
listen (~> 3.0, < 3.1)
|
122
|
-
rake
|
123
|
-
rspec
|
124
|
-
rspec_junit_formatter
|
125
|
-
rubocop
|
126
|
-
simplecov
|
127
|
-
url_canonicalize!
|
128
|
-
webmock
|
129
|
-
|
130
|
-
BUNDLED WITH
|
131
|
-
1.13.5
|