url_canonicalize 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5af7a447bc416a72583d21996065ce48fc29a0fd
4
- data.tar.gz: 856b26e146eec5c85784908af73ff28a28977a6e
3
+ metadata.gz: 009ed3bccd9e756ada7d68450e7b78fa3e0a673d
4
+ data.tar.gz: 33ff979501b4e5583a6a782e926f46776ea1a3e3
5
5
  SHA512:
6
- metadata.gz: 671fed9e62a503c7ba6797f8c501af6cb920c19145ec50690abbe1571a682467b914cb918210e960e91ce56b07e903bf15e17a77727b37f17030b6506b748b1d
7
- data.tar.gz: 5714152d98bc8d7b21002c146fe9acabb2c549b72e49a093655b27b0997bc707e82e2a27e56f28cf4e65adafdd761f9d37ad7c6c284eada2864757aa0a012139
6
+ metadata.gz: a9d793abae4f70a91ff1f5fec5c79968078b0483e0045048a82fe5b1e2caac3962b7ea6c842d9daec5d59188ec2ded912160a518a2d0e3adc2d60070ca53e091
7
+ data.tar.gz: 928c6b774474831d33df54f4d214651e3fe7f65a93984d7dc1363ae274e888894f0a1d8dab6da51783e548a7bf3adf6bac9a6768ebeda184cb17306928d3f356
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
+ Gemfile.lock
1
2
  *.local
2
3
  *.gem
3
4
  *.rbc
@@ -16,7 +16,7 @@ module URLCanonicalize
16
16
 
17
17
  class << self
18
18
  def canonicalize(url)
19
- fetch(url).uri.to_s
19
+ fetch(url).url
20
20
  end
21
21
 
22
22
  def fetch(url)
@@ -14,8 +14,8 @@ module URLCanonicalize
14
14
  @uri = nil
15
15
  end
16
16
 
17
- def request(request_object)
18
- http.request request_object
17
+ def request(http_request)
18
+ http.request http_request
19
19
  end
20
20
 
21
21
  private
@@ -41,7 +41,7 @@ module URLCanonicalize
41
41
  # Parse the response
42
42
  def parse_response
43
43
  case response
44
- when Net::HTTPSuccess
44
+ when URLCanonicalize::Response::Success
45
45
  handle_success
46
46
  when URLCanonicalize::Response::Redirect
47
47
  redirect_loop_detected? || max_redirects_reached?
@@ -25,7 +25,7 @@ module URLCanonicalize
25
25
  def handle_response
26
26
  case response
27
27
  when Net::HTTPSuccess
28
- look_for_canonical
28
+ handle_success
29
29
  when Net::HTTPRedirection
30
30
  handle_redirection
31
31
  else
@@ -35,15 +35,14 @@ module URLCanonicalize
35
35
  handle_failure(e.class, e.message)
36
36
  end
37
37
 
38
- def look_for_canonical
39
- # Look in response Link header
40
- if response['link'] =~ /<(?<url>.+)>\s*;\s*rel="canonical"/i
41
- URLCanonicalize::Response::CanonicalFound.new($LAST_MATCH_INFO['url'])
42
- elsif http_method == :head
38
+ def handle_success
39
+ @canonical_url = $LAST_MATCH_INFO['url'] if response['link'] =~ /<(?<url>.+)>\s*;\s*rel="canonical"/i
40
+
41
+ if http_method == :head
43
42
  self.http_method = :get
44
43
  fetch
45
44
  else
46
- canonical_url ? URLCanonicalize::Response::CanonicalFound.new(canonical_url, response) : response
45
+ enhanced_response
47
46
  end
48
47
  end
49
48
 
@@ -61,6 +60,15 @@ module URLCanonicalize
61
60
  URLCanonicalize::Response::Failure.new(klass, message)
62
61
  end
63
62
 
63
+ def enhanced_response
64
+ if canonical_url
65
+ response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
66
+ URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
67
+ else
68
+ URLCanonicalize::Response::Success.new(url, response, html)
69
+ end
70
+ end
71
+
64
72
  def html
65
73
  @html ||= Nokogiri::HTML response.body
66
74
  end
@@ -117,6 +125,7 @@ module URLCanonicalize
117
125
  @http_method = value
118
126
  @request = nil
119
127
  @response = nil
128
+ @html = nil
120
129
  end
121
130
 
122
131
  # Some sites treat HEAD requests as suspicious activity and block the
@@ -13,14 +13,28 @@ module URLCanonicalize
13
13
 
14
14
  Redirect = Class.new(Generic)
15
15
 
16
+ # Add HTML to a successful response
17
+ class Success < Generic
18
+ attr_reader :response, :html
19
+
20
+ private
21
+
22
+ def initialize(url, response, html)
23
+ @response = response
24
+ @html = html
25
+ super url
26
+ end
27
+ end
28
+
29
+ # We found a canonical URL!
16
30
  class CanonicalFound < Generic
17
31
  attr_reader :response
18
32
 
19
33
  private
20
34
 
21
35
  def initialize(url, response)
22
- @url = url
23
36
  @response = response
37
+ super url
24
38
  end
25
39
  end
26
40
 
@@ -1,3 +1,3 @@
1
1
  module URLCanonicalize
2
- VERSION = '0.0.3'.freeze
2
+ VERSION = '0.0.4'.freeze
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_canonicalize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dominic Sayers
@@ -54,7 +54,6 @@ files:
54
54
  - ".ruby-version"
55
55
  - Gemfile
56
56
  - Gemfile.local.example
57
- - Gemfile.lock
58
57
  - Guardfile
59
58
  - LICENSE
60
59
  - README.md
data/Gemfile.lock DELETED
@@ -1,131 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- url_canonicalize (0.0.2)
5
- addressable (~> 2)
6
- nokogiri (~> 1)
7
-
8
- GEM
9
- remote: https://rubygems.org/
10
- specs:
11
- addressable (2.4.0)
12
- ast (2.3.0)
13
- builder (3.2.2)
14
- coderay (1.1.1)
15
- coveralls (0.8.15)
16
- json (>= 1.8, < 3)
17
- simplecov (~> 0.12.0)
18
- term-ansicolor (~> 1.3)
19
- thor (~> 0.19.1)
20
- tins (>= 1.6.0, < 2)
21
- crack (0.4.3)
22
- safe_yaml (~> 1.0.0)
23
- diff-lcs (1.2.5)
24
- docile (1.1.5)
25
- ffi (1.9.14)
26
- formatador (0.2.5)
27
- gem-release (0.7.4)
28
- guard (2.14.0)
29
- formatador (>= 0.2.4)
30
- listen (>= 2.7, < 4.0)
31
- lumberjack (~> 1.0)
32
- nenv (~> 0.1)
33
- notiffany (~> 0.0)
34
- pry (>= 0.9.12)
35
- shellany (~> 0.0)
36
- thor (>= 0.18.1)
37
- guard-compat (1.2.1)
38
- guard-rspec (4.7.3)
39
- guard (~> 2.1)
40
- guard-compat (~> 1.1)
41
- rspec (>= 2.99.0, < 4.0)
42
- guard-rubocop (1.2.0)
43
- guard (~> 2.0)
44
- rubocop (~> 0.20)
45
- hashdiff (0.3.0)
46
- json (2.0.2)
47
- listen (3.0.8)
48
- rb-fsevent (~> 0.9, >= 0.9.4)
49
- rb-inotify (~> 0.9, >= 0.9.7)
50
- lumberjack (1.0.10)
51
- method_source (0.8.2)
52
- mini_portile2 (2.1.0)
53
- nenv (0.3.0)
54
- nokogiri (1.6.8.1)
55
- mini_portile2 (~> 2.1.0)
56
- notiffany (0.1.1)
57
- nenv (~> 0.1)
58
- shellany (~> 0.0)
59
- parser (2.3.1.4)
60
- ast (~> 2.2)
61
- powerpack (0.1.1)
62
- pry (0.10.4)
63
- coderay (~> 1.1.0)
64
- method_source (~> 0.8.1)
65
- slop (~> 3.4)
66
- rainbow (2.1.0)
67
- rake (11.3.0)
68
- rb-fsevent (0.9.7)
69
- rb-inotify (0.9.7)
70
- ffi (>= 0.5.0)
71
- rspec (3.5.0)
72
- rspec-core (~> 3.5.0)
73
- rspec-expectations (~> 3.5.0)
74
- rspec-mocks (~> 3.5.0)
75
- rspec-core (3.5.4)
76
- rspec-support (~> 3.5.0)
77
- rspec-expectations (3.5.0)
78
- diff-lcs (>= 1.2.0, < 2.0)
79
- rspec-support (~> 3.5.0)
80
- rspec-mocks (3.5.0)
81
- diff-lcs (>= 1.2.0, < 2.0)
82
- rspec-support (~> 3.5.0)
83
- rspec-support (3.5.0)
84
- rspec_junit_formatter (0.2.3)
85
- builder (< 4)
86
- rspec-core (>= 2, < 4, != 2.12.0)
87
- rubocop (0.44.1)
88
- parser (>= 2.3.1.1, < 3.0)
89
- powerpack (~> 0.1)
90
- rainbow (>= 1.99.1, < 3.0)
91
- ruby-progressbar (~> 1.7)
92
- unicode-display_width (~> 1.0, >= 1.0.1)
93
- ruby-progressbar (1.8.1)
94
- safe_yaml (1.0.4)
95
- shellany (0.0.1)
96
- simplecov (0.12.0)
97
- docile (~> 1.1.0)
98
- json (>= 1.8, < 3)
99
- simplecov-html (~> 0.10.0)
100
- simplecov-html (0.10.0)
101
- slop (3.6.0)
102
- term-ansicolor (1.4.0)
103
- tins (~> 1.0)
104
- thor (0.19.1)
105
- tins (1.12.0)
106
- unicode-display_width (1.1.1)
107
- webmock (2.1.0)
108
- addressable (>= 2.3.6)
109
- crack (>= 0.3.2)
110
- hashdiff
111
-
112
- PLATFORMS
113
- ruby
114
-
115
- DEPENDENCIES
116
- coveralls
117
- gem-release
118
- guard
119
- guard-rspec
120
- guard-rubocop
121
- listen (~> 3.0, < 3.1)
122
- rake
123
- rspec
124
- rspec_junit_formatter
125
- rubocop
126
- simplecov
127
- url_canonicalize!
128
- webmock
129
-
130
- BUNDLED WITH
131
- 1.13.5