url_canonicalize 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -1
- data/lib/url_canonicalize/request.rb +27 -4
- data/lib/url_canonicalize/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e3ce5f5d168512959c94e6af9a6482681e46ca9
|
4
|
+
data.tar.gz: e09877ca0b7488e62b1aca3d29923ed0ebf56f23
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c39e085b024f763ce99d37be169e7d979bf160d347e46254eab572fdc033f01c5cf490cda18cbff5961c3e62de6793548884d80b0bd83cd6fa06134ca16cac76
|
7
|
+
data.tar.gz: c6828ce1595c250313ad52d746ce8b337a5a4066c8c1e94af18af4bfd2434dbd991111e3ce2ad3fc62c3b3f69bde6d36b32290d2aa11548fc7d22abfd8cc5937
|
data/.rubocop.yml
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
AllCops:
|
3
3
|
Exclude:
|
4
4
|
- '*.gemspec'
|
5
|
+
- 'spec/**/*'
|
5
6
|
|
6
7
|
Style/TrailingCommaInArguments:
|
7
8
|
EnforcedStyleForMultiline: no_comma
|
@@ -52,4 +53,4 @@ Lint/LiteralInInterpolation:
|
|
52
53
|
|
53
54
|
Metrics/ClassLength:
|
54
55
|
CountComments: false # count full line comments?
|
55
|
-
Max:
|
56
|
+
Max: 150
|
@@ -47,12 +47,15 @@ module URLCanonicalize
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def handle_redirection
|
50
|
+
puts response['location'] # debug
|
51
|
+
|
50
52
|
case response
|
51
53
|
when Net::HTTPFound, Net::HTTPMovedTemporarily, Net::HTTPTemporaryRedirect
|
52
54
|
self.http_method = :get
|
53
55
|
handle_success
|
54
56
|
else
|
55
|
-
|
57
|
+
location = relative_to_absolute(response['location'])
|
58
|
+
URLCanonicalize::Response::Redirect.new(location)
|
56
59
|
end
|
57
60
|
end
|
58
61
|
|
@@ -61,6 +64,8 @@ module URLCanonicalize
|
|
61
64
|
end
|
62
65
|
|
63
66
|
def enhanced_response
|
67
|
+
puts canonical_url # debug
|
68
|
+
|
64
69
|
if canonical_url
|
65
70
|
response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
|
66
71
|
URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
|
@@ -73,14 +78,18 @@ module URLCanonicalize
|
|
73
78
|
@html ||= Nokogiri::HTML response.body
|
74
79
|
end
|
75
80
|
|
76
|
-
def
|
77
|
-
@
|
81
|
+
def canonical_url
|
82
|
+
@canonical_url ||= relative_to_absolute(canonical_url_raw)
|
78
83
|
end
|
79
84
|
|
80
|
-
def
|
85
|
+
def canonical_url_raw
|
81
86
|
@canonical_url ||= canonical_url_element['href'] if canonical_url_element.is_a?(Nokogiri::XML::Element)
|
82
87
|
end
|
83
88
|
|
89
|
+
def canonical_url_element
|
90
|
+
@canonical_url_element ||= html.xpath('//head/link[@rel="canonical"]').first
|
91
|
+
end
|
92
|
+
|
84
93
|
def uri
|
85
94
|
@uri ||= http.uri
|
86
95
|
end
|
@@ -135,6 +144,20 @@ module URLCanonicalize
|
|
135
144
|
@http_method = :get if host =~ /(linkedin|crunchbase).com/
|
136
145
|
end
|
137
146
|
|
147
|
+
def relative_to_absolute(partial_url)
|
148
|
+
return unless partial_url
|
149
|
+
partial_uri = ::URI.parse(partial_url)
|
150
|
+
|
151
|
+
if partial_uri.host
|
152
|
+
partial_url # It's already absolute
|
153
|
+
else
|
154
|
+
base_uri = uri.dup || ::URI.parse(url)
|
155
|
+
base_uri.path = partial_url
|
156
|
+
puts base_uri.to_s # debug
|
157
|
+
base_uri.to_s
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
138
161
|
NETWORK_EXCEPTIONS = [
|
139
162
|
EOFError,
|
140
163
|
Errno::ECONNREFUSED,
|