url_canonicalize 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5af7a447bc416a72583d21996065ce48fc29a0fd
4
- data.tar.gz: 856b26e146eec5c85784908af73ff28a28977a6e
3
+ metadata.gz: 009ed3bccd9e756ada7d68450e7b78fa3e0a673d
4
+ data.tar.gz: 33ff979501b4e5583a6a782e926f46776ea1a3e3
5
5
  SHA512:
6
- metadata.gz: 671fed9e62a503c7ba6797f8c501af6cb920c19145ec50690abbe1571a682467b914cb918210e960e91ce56b07e903bf15e17a77727b37f17030b6506b748b1d
7
- data.tar.gz: 5714152d98bc8d7b21002c146fe9acabb2c549b72e49a093655b27b0997bc707e82e2a27e56f28cf4e65adafdd761f9d37ad7c6c284eada2864757aa0a012139
6
+ metadata.gz: a9d793abae4f70a91ff1f5fec5c79968078b0483e0045048a82fe5b1e2caac3962b7ea6c842d9daec5d59188ec2ded912160a518a2d0e3adc2d60070ca53e091
7
+ data.tar.gz: 928c6b774474831d33df54f4d214651e3fe7f65a93984d7dc1363ae274e888894f0a1d8dab6da51783e548a7bf3adf6bac9a6768ebeda184cb17306928d3f356
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
+ Gemfile.lock
1
2
  *.local
2
3
  *.gem
3
4
  *.rbc
@@ -16,7 +16,7 @@ module URLCanonicalize
16
16
 
17
17
  class << self
18
18
  def canonicalize(url)
19
- fetch(url).uri.to_s
19
+ fetch(url).url
20
20
  end
21
21
 
22
22
  def fetch(url)
@@ -14,8 +14,8 @@ module URLCanonicalize
14
14
  @uri = nil
15
15
  end
16
16
 
17
- def request(request_object)
18
- http.request request_object
17
+ def request(http_request)
18
+ http.request http_request
19
19
  end
20
20
 
21
21
  private
@@ -41,7 +41,7 @@ module URLCanonicalize
41
41
  # Parse the response
42
42
  def parse_response
43
43
  case response
44
- when Net::HTTPSuccess
44
+ when URLCanonicalize::Response::Success
45
45
  handle_success
46
46
  when URLCanonicalize::Response::Redirect
47
47
  redirect_loop_detected? || max_redirects_reached?
@@ -25,7 +25,7 @@ module URLCanonicalize
25
25
  def handle_response
26
26
  case response
27
27
  when Net::HTTPSuccess
28
- look_for_canonical
28
+ handle_success
29
29
  when Net::HTTPRedirection
30
30
  handle_redirection
31
31
  else
@@ -35,15 +35,14 @@ module URLCanonicalize
35
35
  handle_failure(e.class, e.message)
36
36
  end
37
37
 
38
- def look_for_canonical
39
- # Look in response Link header
40
- if response['link'] =~ /<(?<url>.+)>\s*;\s*rel="canonical"/i
41
- URLCanonicalize::Response::CanonicalFound.new($LAST_MATCH_INFO['url'])
42
- elsif http_method == :head
38
+ def handle_success
39
+ @canonical_url = $LAST_MATCH_INFO['url'] if response['link'] =~ /<(?<url>.+)>\s*;\s*rel="canonical"/i
40
+
41
+ if http_method == :head
43
42
  self.http_method = :get
44
43
  fetch
45
44
  else
46
- canonical_url ? URLCanonicalize::Response::CanonicalFound.new(canonical_url, response) : response
45
+ enhanced_response
47
46
  end
48
47
  end
49
48
 
@@ -61,6 +60,15 @@ module URLCanonicalize
61
60
  URLCanonicalize::Response::Failure.new(klass, message)
62
61
  end
63
62
 
63
+ def enhanced_response
64
+ if canonical_url
65
+ response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
66
+ URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
67
+ else
68
+ URLCanonicalize::Response::Success.new(url, response, html)
69
+ end
70
+ end
71
+
64
72
  def html
65
73
  @html ||= Nokogiri::HTML response.body
66
74
  end
@@ -117,6 +125,7 @@ module URLCanonicalize
117
125
  @http_method = value
118
126
  @request = nil
119
127
  @response = nil
128
+ @html = nil
120
129
  end
121
130
 
122
131
  # Some sites treat HEAD requests as suspicious activity and block the
@@ -13,14 +13,28 @@ module URLCanonicalize
13
13
 
14
14
  Redirect = Class.new(Generic)
15
15
 
16
+ # Add HTML to a successful response
17
+ class Success < Generic
18
+ attr_reader :response, :html
19
+
20
+ private
21
+
22
+ def initialize(url, response, html)
23
+ @response = response
24
+ @html = html
25
+ super url
26
+ end
27
+ end
28
+
29
+ # We found a canonical URL!
16
30
  class CanonicalFound < Generic
17
31
  attr_reader :response
18
32
 
19
33
  private
20
34
 
21
35
  def initialize(url, response)
22
- @url = url
23
36
  @response = response
37
+ super url
24
38
  end
25
39
  end
26
40
 
@@ -1,3 +1,3 @@
1
1
  module URLCanonicalize
2
- VERSION = '0.0.3'.freeze
2
+ VERSION = '0.0.4'.freeze
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_canonicalize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dominic Sayers
@@ -54,7 +54,6 @@ files:
54
54
  - ".ruby-version"
55
55
  - Gemfile
56
56
  - Gemfile.local.example
57
- - Gemfile.lock
58
57
  - Guardfile
59
58
  - LICENSE
60
59
  - README.md
data/Gemfile.lock DELETED
@@ -1,131 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- url_canonicalize (0.0.2)
5
- addressable (~> 2)
6
- nokogiri (~> 1)
7
-
8
- GEM
9
- remote: https://rubygems.org/
10
- specs:
11
- addressable (2.4.0)
12
- ast (2.3.0)
13
- builder (3.2.2)
14
- coderay (1.1.1)
15
- coveralls (0.8.15)
16
- json (>= 1.8, < 3)
17
- simplecov (~> 0.12.0)
18
- term-ansicolor (~> 1.3)
19
- thor (~> 0.19.1)
20
- tins (>= 1.6.0, < 2)
21
- crack (0.4.3)
22
- safe_yaml (~> 1.0.0)
23
- diff-lcs (1.2.5)
24
- docile (1.1.5)
25
- ffi (1.9.14)
26
- formatador (0.2.5)
27
- gem-release (0.7.4)
28
- guard (2.14.0)
29
- formatador (>= 0.2.4)
30
- listen (>= 2.7, < 4.0)
31
- lumberjack (~> 1.0)
32
- nenv (~> 0.1)
33
- notiffany (~> 0.0)
34
- pry (>= 0.9.12)
35
- shellany (~> 0.0)
36
- thor (>= 0.18.1)
37
- guard-compat (1.2.1)
38
- guard-rspec (4.7.3)
39
- guard (~> 2.1)
40
- guard-compat (~> 1.1)
41
- rspec (>= 2.99.0, < 4.0)
42
- guard-rubocop (1.2.0)
43
- guard (~> 2.0)
44
- rubocop (~> 0.20)
45
- hashdiff (0.3.0)
46
- json (2.0.2)
47
- listen (3.0.8)
48
- rb-fsevent (~> 0.9, >= 0.9.4)
49
- rb-inotify (~> 0.9, >= 0.9.7)
50
- lumberjack (1.0.10)
51
- method_source (0.8.2)
52
- mini_portile2 (2.1.0)
53
- nenv (0.3.0)
54
- nokogiri (1.6.8.1)
55
- mini_portile2 (~> 2.1.0)
56
- notiffany (0.1.1)
57
- nenv (~> 0.1)
58
- shellany (~> 0.0)
59
- parser (2.3.1.4)
60
- ast (~> 2.2)
61
- powerpack (0.1.1)
62
- pry (0.10.4)
63
- coderay (~> 1.1.0)
64
- method_source (~> 0.8.1)
65
- slop (~> 3.4)
66
- rainbow (2.1.0)
67
- rake (11.3.0)
68
- rb-fsevent (0.9.7)
69
- rb-inotify (0.9.7)
70
- ffi (>= 0.5.0)
71
- rspec (3.5.0)
72
- rspec-core (~> 3.5.0)
73
- rspec-expectations (~> 3.5.0)
74
- rspec-mocks (~> 3.5.0)
75
- rspec-core (3.5.4)
76
- rspec-support (~> 3.5.0)
77
- rspec-expectations (3.5.0)
78
- diff-lcs (>= 1.2.0, < 2.0)
79
- rspec-support (~> 3.5.0)
80
- rspec-mocks (3.5.0)
81
- diff-lcs (>= 1.2.0, < 2.0)
82
- rspec-support (~> 3.5.0)
83
- rspec-support (3.5.0)
84
- rspec_junit_formatter (0.2.3)
85
- builder (< 4)
86
- rspec-core (>= 2, < 4, != 2.12.0)
87
- rubocop (0.44.1)
88
- parser (>= 2.3.1.1, < 3.0)
89
- powerpack (~> 0.1)
90
- rainbow (>= 1.99.1, < 3.0)
91
- ruby-progressbar (~> 1.7)
92
- unicode-display_width (~> 1.0, >= 1.0.1)
93
- ruby-progressbar (1.8.1)
94
- safe_yaml (1.0.4)
95
- shellany (0.0.1)
96
- simplecov (0.12.0)
97
- docile (~> 1.1.0)
98
- json (>= 1.8, < 3)
99
- simplecov-html (~> 0.10.0)
100
- simplecov-html (0.10.0)
101
- slop (3.6.0)
102
- term-ansicolor (1.4.0)
103
- tins (~> 1.0)
104
- thor (0.19.1)
105
- tins (1.12.0)
106
- unicode-display_width (1.1.1)
107
- webmock (2.1.0)
108
- addressable (>= 2.3.6)
109
- crack (>= 0.3.2)
110
- hashdiff
111
-
112
- PLATFORMS
113
- ruby
114
-
115
- DEPENDENCIES
116
- coveralls
117
- gem-release
118
- guard
119
- guard-rspec
120
- guard-rubocop
121
- listen (~> 3.0, < 3.1)
122
- rake
123
- rspec
124
- rspec_junit_formatter
125
- rubocop
126
- simplecov
127
- url_canonicalize!
128
- webmock
129
-
130
- BUNDLED WITH
131
- 1.13.5