url_canonicalize 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 219d397a462ff3656b983b11f219de82331c3bdd
4
- data.tar.gz: 54a1b7916e2ed9bbc5bd0303a5f10a6d8bcd6cd8
3
+ metadata.gz: 6e3ce5f5d168512959c94e6af9a6482681e46ca9
4
+ data.tar.gz: e09877ca0b7488e62b1aca3d29923ed0ebf56f23
5
5
  SHA512:
6
- metadata.gz: 2c1bace965b619a010d4ebecfe27960f5e09dfd71d26de2f06083eae2a5967a7f582bc5a0ca2012827c630f44e980aa368ff04e074220501a52f3ab45cc4b6fa
7
- data.tar.gz: f73b70de8512cc161fdf89462b053e11dba67b4afecb048b7e06cc69067bb58cbb5d4353319958cf731306bae14ff7f8d39f193d1b27d8b3a25e9ca5bd7d027a
6
+ metadata.gz: c39e085b024f763ce99d37be169e7d979bf160d347e46254eab572fdc033f01c5cf490cda18cbff5961c3e62de6793548884d80b0bd83cd6fa06134ca16cac76
7
+ data.tar.gz: c6828ce1595c250313ad52d746ce8b337a5a4066c8c1e94af18af4bfd2434dbd991111e3ce2ad3fc62c3b3f69bde6d36b32290d2aa11548fc7d22abfd8cc5937
data/.rubocop.yml CHANGED
@@ -2,6 +2,7 @@
2
2
  AllCops:
3
3
  Exclude:
4
4
  - '*.gemspec'
5
+ - 'spec/**/*'
5
6
 
6
7
  Style/TrailingCommaInArguments:
7
8
  EnforcedStyleForMultiline: no_comma
@@ -52,4 +53,4 @@ Lint/LiteralInInterpolation:
52
53
 
53
54
  Metrics/ClassLength:
54
55
  CountComments: false # count full line comments?
55
- Max: 120
56
+ Max: 150
@@ -47,12 +47,15 @@ module URLCanonicalize
47
47
  end
48
48
 
49
49
  def handle_redirection
50
+ puts response['location'] # debug
51
+
50
52
  case response
51
53
  when Net::HTTPFound, Net::HTTPMovedTemporarily, Net::HTTPTemporaryRedirect
52
54
  self.http_method = :get
53
55
  handle_success
54
56
  else
55
- URLCanonicalize::Response::Redirect.new(response['location'])
57
+ location = relative_to_absolute(response['location'])
58
+ URLCanonicalize::Response::Redirect.new(location)
56
59
  end
57
60
  end
58
61
 
@@ -61,6 +64,8 @@ module URLCanonicalize
61
64
  end
62
65
 
63
66
  def enhanced_response
67
+ puts canonical_url # debug
68
+
64
69
  if canonical_url
65
70
  response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
66
71
  URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
@@ -73,14 +78,18 @@ module URLCanonicalize
73
78
  @html ||= Nokogiri::HTML response.body
74
79
  end
75
80
 
76
- def canonical_url_element
77
- @canonical_url_element ||= html.xpath('//head/link[@rel="canonical"]').first
81
+ def canonical_url
82
+ @canonical_url ||= relative_to_absolute(canonical_url_raw)
78
83
  end
79
84
 
80
- def canonical_url
85
+ def canonical_url_raw
81
86
  @canonical_url ||= canonical_url_element['href'] if canonical_url_element.is_a?(Nokogiri::XML::Element)
82
87
  end
83
88
 
89
+ def canonical_url_element
90
+ @canonical_url_element ||= html.xpath('//head/link[@rel="canonical"]').first
91
+ end
92
+
84
93
  def uri
85
94
  @uri ||= http.uri
86
95
  end
@@ -135,6 +144,20 @@ module URLCanonicalize
135
144
  @http_method = :get if host =~ /(linkedin|crunchbase).com/
136
145
  end
137
146
 
147
+ def relative_to_absolute(partial_url)
148
+ return unless partial_url
149
+ partial_uri = ::URI.parse(partial_url)
150
+
151
+ if partial_uri.host
152
+ partial_url # It's already absolute
153
+ else
154
+ base_uri = uri.dup || ::URI.parse(url)
155
+ base_uri.path = partial_url
156
+ puts base_uri.to_s # debug
157
+ base_uri.to_s
158
+ end
159
+ end
160
+
138
161
  NETWORK_EXCEPTIONS = [
139
162
  EOFError,
140
163
  Errno::ECONNREFUSED,
@@ -1,3 +1,3 @@
1
1
  module URLCanonicalize
2
- VERSION = '0.0.5'.freeze
2
+ VERSION = '0.0.6'.freeze
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_canonicalize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dominic Sayers