url_canonicalize 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/url_canonicalize.rb +1 -1
- data/lib/url_canonicalize/http.rb +3 -3
- data/lib/url_canonicalize/request.rb +16 -7
- data/lib/url_canonicalize/response.rb +15 -1
- data/lib/url_canonicalize/version.rb +1 -1
- metadata +1 -2
- data/Gemfile.lock +0 -131
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 009ed3bccd9e756ada7d68450e7b78fa3e0a673d
|
4
|
+
data.tar.gz: 33ff979501b4e5583a6a782e926f46776ea1a3e3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9d793abae4f70a91ff1f5fec5c79968078b0483e0045048a82fe5b1e2caac3962b7ea6c842d9daec5d59188ec2ded912160a518a2d0e3adc2d60070ca53e091
|
7
|
+
data.tar.gz: 928c6b774474831d33df54f4d214651e3fe7f65a93984d7dc1363ae274e888894f0a1d8dab6da51783e548a7bf3adf6bac9a6768ebeda184cb17306928d3f356
|
data/.gitignore
CHANGED
data/lib/url_canonicalize.rb
CHANGED
@@ -14,8 +14,8 @@ module URLCanonicalize
|
|
14
14
|
@uri = nil
|
15
15
|
end
|
16
16
|
|
17
|
-
def request(
|
18
|
-
http.request
|
17
|
+
def request(http_request)
|
18
|
+
http.request http_request
|
19
19
|
end
|
20
20
|
|
21
21
|
private
|
@@ -41,7 +41,7 @@ module URLCanonicalize
|
|
41
41
|
# Parse the response
|
42
42
|
def parse_response
|
43
43
|
case response
|
44
|
-
when
|
44
|
+
when URLCanonicalize::Response::Success
|
45
45
|
handle_success
|
46
46
|
when URLCanonicalize::Response::Redirect
|
47
47
|
redirect_loop_detected? || max_redirects_reached?
|
@@ -25,7 +25,7 @@ module URLCanonicalize
|
|
25
25
|
def handle_response
|
26
26
|
case response
|
27
27
|
when Net::HTTPSuccess
|
28
|
-
|
28
|
+
handle_success
|
29
29
|
when Net::HTTPRedirection
|
30
30
|
handle_redirection
|
31
31
|
else
|
@@ -35,15 +35,14 @@ module URLCanonicalize
|
|
35
35
|
handle_failure(e.class, e.message)
|
36
36
|
end
|
37
37
|
|
38
|
-
def
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
elsif http_method == :head
|
38
|
+
def handle_success
|
39
|
+
@canonical_url = $LAST_MATCH_INFO['url'] if response['link'] =~ /<(?<url>.+)>\s*;\s*rel="canonical"/i
|
40
|
+
|
41
|
+
if http_method == :head
|
43
42
|
self.http_method = :get
|
44
43
|
fetch
|
45
44
|
else
|
46
|
-
|
45
|
+
enhanced_response
|
47
46
|
end
|
48
47
|
end
|
49
48
|
|
@@ -61,6 +60,15 @@ module URLCanonicalize
|
|
61
60
|
URLCanonicalize::Response::Failure.new(klass, message)
|
62
61
|
end
|
63
62
|
|
63
|
+
def enhanced_response
|
64
|
+
if canonical_url
|
65
|
+
response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
|
66
|
+
URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
|
67
|
+
else
|
68
|
+
URLCanonicalize::Response::Success.new(url, response, html)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
64
72
|
def html
|
65
73
|
@html ||= Nokogiri::HTML response.body
|
66
74
|
end
|
@@ -117,6 +125,7 @@ module URLCanonicalize
|
|
117
125
|
@http_method = value
|
118
126
|
@request = nil
|
119
127
|
@response = nil
|
128
|
+
@html = nil
|
120
129
|
end
|
121
130
|
|
122
131
|
# Some sites treat HEAD requests as suspicious activity and block the
|
@@ -13,14 +13,28 @@ module URLCanonicalize
|
|
13
13
|
|
14
14
|
Redirect = Class.new(Generic)
|
15
15
|
|
16
|
+
# Add HTML to a successful response
|
17
|
+
class Success < Generic
|
18
|
+
attr_reader :response, :html
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def initialize(url, response, html)
|
23
|
+
@response = response
|
24
|
+
@html = html
|
25
|
+
super url
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# We found a canonical URL!
|
16
30
|
class CanonicalFound < Generic
|
17
31
|
attr_reader :response
|
18
32
|
|
19
33
|
private
|
20
34
|
|
21
35
|
def initialize(url, response)
|
22
|
-
@url = url
|
23
36
|
@response = response
|
37
|
+
super url
|
24
38
|
end
|
25
39
|
end
|
26
40
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_canonicalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dominic Sayers
|
@@ -54,7 +54,6 @@ files:
|
|
54
54
|
- ".ruby-version"
|
55
55
|
- Gemfile
|
56
56
|
- Gemfile.local.example
|
57
|
-
- Gemfile.lock
|
58
57
|
- Guardfile
|
59
58
|
- LICENSE
|
60
59
|
- README.md
|
data/Gemfile.lock
DELETED
@@ -1,131 +0,0 @@
|
|
1
|
-
PATH
|
2
|
-
remote: .
|
3
|
-
specs:
|
4
|
-
url_canonicalize (0.0.2)
|
5
|
-
addressable (~> 2)
|
6
|
-
nokogiri (~> 1)
|
7
|
-
|
8
|
-
GEM
|
9
|
-
remote: https://rubygems.org/
|
10
|
-
specs:
|
11
|
-
addressable (2.4.0)
|
12
|
-
ast (2.3.0)
|
13
|
-
builder (3.2.2)
|
14
|
-
coderay (1.1.1)
|
15
|
-
coveralls (0.8.15)
|
16
|
-
json (>= 1.8, < 3)
|
17
|
-
simplecov (~> 0.12.0)
|
18
|
-
term-ansicolor (~> 1.3)
|
19
|
-
thor (~> 0.19.1)
|
20
|
-
tins (>= 1.6.0, < 2)
|
21
|
-
crack (0.4.3)
|
22
|
-
safe_yaml (~> 1.0.0)
|
23
|
-
diff-lcs (1.2.5)
|
24
|
-
docile (1.1.5)
|
25
|
-
ffi (1.9.14)
|
26
|
-
formatador (0.2.5)
|
27
|
-
gem-release (0.7.4)
|
28
|
-
guard (2.14.0)
|
29
|
-
formatador (>= 0.2.4)
|
30
|
-
listen (>= 2.7, < 4.0)
|
31
|
-
lumberjack (~> 1.0)
|
32
|
-
nenv (~> 0.1)
|
33
|
-
notiffany (~> 0.0)
|
34
|
-
pry (>= 0.9.12)
|
35
|
-
shellany (~> 0.0)
|
36
|
-
thor (>= 0.18.1)
|
37
|
-
guard-compat (1.2.1)
|
38
|
-
guard-rspec (4.7.3)
|
39
|
-
guard (~> 2.1)
|
40
|
-
guard-compat (~> 1.1)
|
41
|
-
rspec (>= 2.99.0, < 4.0)
|
42
|
-
guard-rubocop (1.2.0)
|
43
|
-
guard (~> 2.0)
|
44
|
-
rubocop (~> 0.20)
|
45
|
-
hashdiff (0.3.0)
|
46
|
-
json (2.0.2)
|
47
|
-
listen (3.0.8)
|
48
|
-
rb-fsevent (~> 0.9, >= 0.9.4)
|
49
|
-
rb-inotify (~> 0.9, >= 0.9.7)
|
50
|
-
lumberjack (1.0.10)
|
51
|
-
method_source (0.8.2)
|
52
|
-
mini_portile2 (2.1.0)
|
53
|
-
nenv (0.3.0)
|
54
|
-
nokogiri (1.6.8.1)
|
55
|
-
mini_portile2 (~> 2.1.0)
|
56
|
-
notiffany (0.1.1)
|
57
|
-
nenv (~> 0.1)
|
58
|
-
shellany (~> 0.0)
|
59
|
-
parser (2.3.1.4)
|
60
|
-
ast (~> 2.2)
|
61
|
-
powerpack (0.1.1)
|
62
|
-
pry (0.10.4)
|
63
|
-
coderay (~> 1.1.0)
|
64
|
-
method_source (~> 0.8.1)
|
65
|
-
slop (~> 3.4)
|
66
|
-
rainbow (2.1.0)
|
67
|
-
rake (11.3.0)
|
68
|
-
rb-fsevent (0.9.7)
|
69
|
-
rb-inotify (0.9.7)
|
70
|
-
ffi (>= 0.5.0)
|
71
|
-
rspec (3.5.0)
|
72
|
-
rspec-core (~> 3.5.0)
|
73
|
-
rspec-expectations (~> 3.5.0)
|
74
|
-
rspec-mocks (~> 3.5.0)
|
75
|
-
rspec-core (3.5.4)
|
76
|
-
rspec-support (~> 3.5.0)
|
77
|
-
rspec-expectations (3.5.0)
|
78
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
79
|
-
rspec-support (~> 3.5.0)
|
80
|
-
rspec-mocks (3.5.0)
|
81
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
82
|
-
rspec-support (~> 3.5.0)
|
83
|
-
rspec-support (3.5.0)
|
84
|
-
rspec_junit_formatter (0.2.3)
|
85
|
-
builder (< 4)
|
86
|
-
rspec-core (>= 2, < 4, != 2.12.0)
|
87
|
-
rubocop (0.44.1)
|
88
|
-
parser (>= 2.3.1.1, < 3.0)
|
89
|
-
powerpack (~> 0.1)
|
90
|
-
rainbow (>= 1.99.1, < 3.0)
|
91
|
-
ruby-progressbar (~> 1.7)
|
92
|
-
unicode-display_width (~> 1.0, >= 1.0.1)
|
93
|
-
ruby-progressbar (1.8.1)
|
94
|
-
safe_yaml (1.0.4)
|
95
|
-
shellany (0.0.1)
|
96
|
-
simplecov (0.12.0)
|
97
|
-
docile (~> 1.1.0)
|
98
|
-
json (>= 1.8, < 3)
|
99
|
-
simplecov-html (~> 0.10.0)
|
100
|
-
simplecov-html (0.10.0)
|
101
|
-
slop (3.6.0)
|
102
|
-
term-ansicolor (1.4.0)
|
103
|
-
tins (~> 1.0)
|
104
|
-
thor (0.19.1)
|
105
|
-
tins (1.12.0)
|
106
|
-
unicode-display_width (1.1.1)
|
107
|
-
webmock (2.1.0)
|
108
|
-
addressable (>= 2.3.6)
|
109
|
-
crack (>= 0.3.2)
|
110
|
-
hashdiff
|
111
|
-
|
112
|
-
PLATFORMS
|
113
|
-
ruby
|
114
|
-
|
115
|
-
DEPENDENCIES
|
116
|
-
coveralls
|
117
|
-
gem-release
|
118
|
-
guard
|
119
|
-
guard-rspec
|
120
|
-
guard-rubocop
|
121
|
-
listen (~> 3.0, < 3.1)
|
122
|
-
rake
|
123
|
-
rspec
|
124
|
-
rspec_junit_formatter
|
125
|
-
rubocop
|
126
|
-
simplecov
|
127
|
-
url_canonicalize!
|
128
|
-
webmock
|
129
|
-
|
130
|
-
BUNDLED WITH
|
131
|
-
1.13.5
|