url_canonicalize 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 219d397a462ff3656b983b11f219de82331c3bdd
4
- data.tar.gz: 54a1b7916e2ed9bbc5bd0303a5f10a6d8bcd6cd8
3
+ metadata.gz: 6e3ce5f5d168512959c94e6af9a6482681e46ca9
4
+ data.tar.gz: e09877ca0b7488e62b1aca3d29923ed0ebf56f23
5
5
  SHA512:
6
- metadata.gz: 2c1bace965b619a010d4ebecfe27960f5e09dfd71d26de2f06083eae2a5967a7f582bc5a0ca2012827c630f44e980aa368ff04e074220501a52f3ab45cc4b6fa
7
- data.tar.gz: f73b70de8512cc161fdf89462b053e11dba67b4afecb048b7e06cc69067bb58cbb5d4353319958cf731306bae14ff7f8d39f193d1b27d8b3a25e9ca5bd7d027a
6
+ metadata.gz: c39e085b024f763ce99d37be169e7d979bf160d347e46254eab572fdc033f01c5cf490cda18cbff5961c3e62de6793548884d80b0bd83cd6fa06134ca16cac76
7
+ data.tar.gz: c6828ce1595c250313ad52d746ce8b337a5a4066c8c1e94af18af4bfd2434dbd991111e3ce2ad3fc62c3b3f69bde6d36b32290d2aa11548fc7d22abfd8cc5937
data/.rubocop.yml CHANGED
@@ -2,6 +2,7 @@
2
2
  AllCops:
3
3
  Exclude:
4
4
  - '*.gemspec'
5
+ - 'spec/**/*'
5
6
 
6
7
  Style/TrailingCommaInArguments:
7
8
  EnforcedStyleForMultiline: no_comma
@@ -52,4 +53,4 @@ Lint/LiteralInInterpolation:
52
53
 
53
54
  Metrics/ClassLength:
54
55
  CountComments: false # count full line comments?
55
- Max: 120
56
+ Max: 150
@@ -47,12 +47,15 @@ module URLCanonicalize
47
47
  end
48
48
 
49
49
  def handle_redirection
50
+ puts response['location'] # debug
51
+
50
52
  case response
51
53
  when Net::HTTPFound, Net::HTTPMovedTemporarily, Net::HTTPTemporaryRedirect
52
54
  self.http_method = :get
53
55
  handle_success
54
56
  else
55
- URLCanonicalize::Response::Redirect.new(response['location'])
57
+ location = relative_to_absolute(response['location'])
58
+ URLCanonicalize::Response::Redirect.new(location)
56
59
  end
57
60
  end
58
61
 
@@ -61,6 +64,8 @@ module URLCanonicalize
61
64
  end
62
65
 
63
66
  def enhanced_response
67
+ puts canonical_url # debug
68
+
64
69
  if canonical_url
65
70
  response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
66
71
  URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
@@ -73,14 +78,18 @@ module URLCanonicalize
73
78
  @html ||= Nokogiri::HTML response.body
74
79
  end
75
80
 
76
- def canonical_url_element
77
- @canonical_url_element ||= html.xpath('//head/link[@rel="canonical"]').first
81
+ def canonical_url
82
+ @canonical_url ||= relative_to_absolute(canonical_url_raw)
78
83
  end
79
84
 
80
- def canonical_url
85
+ def canonical_url_raw
81
86
  @canonical_url ||= canonical_url_element['href'] if canonical_url_element.is_a?(Nokogiri::XML::Element)
82
87
  end
83
88
 
89
+ def canonical_url_element
90
+ @canonical_url_element ||= html.xpath('//head/link[@rel="canonical"]').first
91
+ end
92
+
84
93
  def uri
85
94
  @uri ||= http.uri
86
95
  end
@@ -135,6 +144,20 @@ module URLCanonicalize
135
144
  @http_method = :get if host =~ /(linkedin|crunchbase).com/
136
145
  end
137
146
 
147
+ def relative_to_absolute(partial_url)
148
+ return unless partial_url
149
+ partial_uri = ::URI.parse(partial_url)
150
+
151
+ if partial_uri.host
152
+ partial_url # It's already absolute
153
+ else
154
+ base_uri = uri.dup || ::URI.parse(url)
155
+ base_uri.path = partial_url
156
+ puts base_uri.to_s # debug
157
+ base_uri.to_s
158
+ end
159
+ end
160
+
138
161
  NETWORK_EXCEPTIONS = [
139
162
  EOFError,
140
163
  Errno::ECONNREFUSED,
@@ -1,3 +1,3 @@
1
1
  module URLCanonicalize
2
- VERSION = '0.0.5'.freeze
2
+ VERSION = '0.0.6'.freeze
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_canonicalize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dominic Sayers