url_canonicalize 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -1
- data/lib/url_canonicalize/request.rb +27 -4
- data/lib/url_canonicalize/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6e3ce5f5d168512959c94e6af9a6482681e46ca9
|
|
4
|
+
data.tar.gz: e09877ca0b7488e62b1aca3d29923ed0ebf56f23
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c39e085b024f763ce99d37be169e7d979bf160d347e46254eab572fdc033f01c5cf490cda18cbff5961c3e62de6793548884d80b0bd83cd6fa06134ca16cac76
|
|
7
|
+
data.tar.gz: c6828ce1595c250313ad52d746ce8b337a5a4066c8c1e94af18af4bfd2434dbd991111e3ce2ad3fc62c3b3f69bde6d36b32290d2aa11548fc7d22abfd8cc5937
|
data/.rubocop.yml
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
AllCops:
|
|
3
3
|
Exclude:
|
|
4
4
|
- '*.gemspec'
|
|
5
|
+
- 'spec/**/*'
|
|
5
6
|
|
|
6
7
|
Style/TrailingCommaInArguments:
|
|
7
8
|
EnforcedStyleForMultiline: no_comma
|
|
@@ -52,4 +53,4 @@ Lint/LiteralInInterpolation:
|
|
|
52
53
|
|
|
53
54
|
Metrics/ClassLength:
|
|
54
55
|
CountComments: false # count full line comments?
|
|
55
|
-
Max:
|
|
56
|
+
Max: 150
|
|
@@ -47,12 +47,15 @@ module URLCanonicalize
|
|
|
47
47
|
end
|
|
48
48
|
|
|
49
49
|
def handle_redirection
|
|
50
|
+
puts response['location'] # debug
|
|
51
|
+
|
|
50
52
|
case response
|
|
51
53
|
when Net::HTTPFound, Net::HTTPMovedTemporarily, Net::HTTPTemporaryRedirect
|
|
52
54
|
self.http_method = :get
|
|
53
55
|
handle_success
|
|
54
56
|
else
|
|
55
|
-
|
|
57
|
+
location = relative_to_absolute(response['location'])
|
|
58
|
+
URLCanonicalize::Response::Redirect.new(location)
|
|
56
59
|
end
|
|
57
60
|
end
|
|
58
61
|
|
|
@@ -61,6 +64,8 @@ module URLCanonicalize
|
|
|
61
64
|
end
|
|
62
65
|
|
|
63
66
|
def enhanced_response
|
|
67
|
+
puts canonical_url # debug
|
|
68
|
+
|
|
64
69
|
if canonical_url
|
|
65
70
|
response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
|
|
66
71
|
URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
|
|
@@ -73,14 +78,18 @@ module URLCanonicalize
|
|
|
73
78
|
@html ||= Nokogiri::HTML response.body
|
|
74
79
|
end
|
|
75
80
|
|
|
76
|
-
def
|
|
77
|
-
@
|
|
81
|
+
def canonical_url
|
|
82
|
+
@canonical_url ||= relative_to_absolute(canonical_url_raw)
|
|
78
83
|
end
|
|
79
84
|
|
|
80
|
-
def
|
|
85
|
+
def canonical_url_raw
|
|
81
86
|
@canonical_url ||= canonical_url_element['href'] if canonical_url_element.is_a?(Nokogiri::XML::Element)
|
|
82
87
|
end
|
|
83
88
|
|
|
89
|
+
def canonical_url_element
|
|
90
|
+
@canonical_url_element ||= html.xpath('//head/link[@rel="canonical"]').first
|
|
91
|
+
end
|
|
92
|
+
|
|
84
93
|
def uri
|
|
85
94
|
@uri ||= http.uri
|
|
86
95
|
end
|
|
@@ -135,6 +144,20 @@ module URLCanonicalize
|
|
|
135
144
|
@http_method = :get if host =~ /(linkedin|crunchbase).com/
|
|
136
145
|
end
|
|
137
146
|
|
|
147
|
+
def relative_to_absolute(partial_url)
|
|
148
|
+
return unless partial_url
|
|
149
|
+
partial_uri = ::URI.parse(partial_url)
|
|
150
|
+
|
|
151
|
+
if partial_uri.host
|
|
152
|
+
partial_url # It's already absolute
|
|
153
|
+
else
|
|
154
|
+
base_uri = uri.dup || ::URI.parse(url)
|
|
155
|
+
base_uri.path = partial_url
|
|
156
|
+
puts base_uri.to_s # debug
|
|
157
|
+
base_uri.to_s
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
138
161
|
NETWORK_EXCEPTIONS = [
|
|
139
162
|
EOFError,
|
|
140
163
|
Errno::ECONNREFUSED,
|