grell 1.6.8 → 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4fcc56b727dfa90261c133037100f70617f15fed
4
- data.tar.gz: 3e7d2a46160b4d8406a76b9190b8fdb64e4a0cf4
3
+ metadata.gz: 2a7136c81652b0260ee867e65380e69e5d3a2264
4
+ data.tar.gz: f95af6c4e4a99aa1216f8842c8829af521f7b1d9
5
5
  SHA512:
6
- metadata.gz: 1cd4529624bd1d7d8482ab3d30c408c9a3452af73375ab2688e07d361b0c5cd1246dbcfcf0152b9e4ede8ab1b8e7d568733822471d2df8e815bc8e4e48cbad83
7
- data.tar.gz: 95c2005f0edf3fcd191fa8f16e23d049f68eb4074cd720042eadbb982ffe7d1947b1bba9fe3737800f5252d504d0eaaed8d6ea1be51c4991231b0f2e453d8c5b
6
+ metadata.gz: 5b6c238c9894254531f5448dd5435603009b01c0c1fef015b748f3a91a662fa81fd0c7bbe882b95d88e8cbe91beee342222877f889931d5ed186cf0e2d03ef4e
7
+ data.tar.gz: 76a390dc30e53275ded279b27a8bd6ebabe4d9cefb0cc7367563467cd86fd37e6bb20440810d237137f400d72af6dd289602603ff6f70dce8f3bd6906f6d24e2
@@ -1,3 +1,5 @@
1
+ # 1.6.9
2
+ * Avoid following links when disabled by CSS (1.6.8 worked only for Javascript)
1
3
  # 1.6.8
2
4
  * Avoid following disabled links
3
5
 
@@ -43,7 +43,7 @@ module Grell
43
43
 
44
44
  # Number of times we have retried the current page
45
45
  def retries
46
- [@times_visited -1, 0].max
46
+ [@times_visited - 1, 0].max
47
47
  end
48
48
 
49
49
  # The current URL, this may be different from the URL we asked for if there was some redirect
@@ -205,17 +205,9 @@ module Grell
205
205
 
206
206
  private
207
207
  def all_links
208
- # <link> can only be used in the <head> as of: https://developer.mozilla.org/en/docs/Web/HTML/Element/link
209
- anchors_in_body = @rawpage.all_anchors.reject { |anchor| anchor.tag_name == 'link' }
210
-
211
- # Do not follow disabled links
212
- enabled_links = anchors_in_body.reject { |anchor| anchor.disabled? }
213
-
214
- unique_links = enabled_links.map do |anchor|
215
- anchor['href'] || anchor['data-href']
216
- end.compact
217
-
218
- unique_links.map{|link| link_to_url(link)}.uniq.compact
208
+ links = @rawpage.all_anchors.map { |anchor| Link.new(anchor) }
209
+ body_enabled_links = links.reject { |link| link.inside_header? || link.disabled? }
210
+ body_enabled_links.map { |link| link.to_url(host) }.uniq.compact
219
211
 
220
212
  rescue Capybara::Poltergeist::ObsoleteNode
221
213
  Grell.logger.warn "We found an obsolete node in #{@url}. Ignoring all links"
@@ -224,37 +216,54 @@ module Grell
224
216
  []
225
217
  end
226
218
 
227
- # We only accept links in this same host that start with a path
228
- # nil from this
229
- def link_to_url(link)
230
- uri = URI.parse(link)
231
- if uri.absolute?
232
- if uri.host != URI.parse(host).host
233
- Grell.logger.debug "GRELL does not follow links to external hosts: #{link}"
234
- nil
235
- else
236
- link # Absolute link to our own host
237
- end
238
- else
239
- if uri.path.nil?
240
- Grell.logger.debug "GRELL does not follow links without a path: #{uri}"
241
- nil
242
- end
243
- if uri.path.start_with?('/')
244
- host + link #convert to full URL
245
- else #links like href="google.com" the browser would go to http://google.com like "http://#{link}"
246
- Grell.logger.debug "GRELL Bad formatted link: #{link}, assuming external"
247
- nil
248
- end
219
+ # Private class to group all the methods related to links.
220
+ class Link
221
+ def initialize(anchor)
222
+ @anchor = anchor
249
223
  end
250
224
 
251
- rescue URI::InvalidURIError #We will have invalid links propagating till we navigate to them
252
- link
253
- end
254
- end
225
+ # <link> can only be used in the <head> as of: https://developer.mozilla.org/en/docs/Web/HTML/Element/link
226
+ def inside_header?
227
+ @anchor.tag_name == 'link'
228
+ end
255
229
 
230
+ # Is the link disabled by either Javascript or CSS?
231
+ def disabled?
232
+ @anchor.disabled? || !!@anchor.native.attributes['disabled']
233
+ end
256
234
 
235
+ # Some links may use data-href + javascript to do interesting things
236
+ def href
237
+ @anchor['href'] || @anchor['data-href']
238
+ end
257
239
 
258
- end
240
+ # We only accept links in this same host that start with a path
241
+ def to_url(host)
242
+ uri = URI.parse(href)
243
+ if uri.absolute?
244
+ if uri.host != URI.parse(host).host
245
+ Grell.logger.debug "GRELL does not follow links to external hosts: #{href}"
246
+ nil
247
+ else
248
+ href # Absolute link to our own host
249
+ end
250
+ else
251
+ if uri.path.nil?
252
+ Grell.logger.debug "GRELL does not follow links without a path: #{uri}"
253
+ nil
254
+ end
255
+ if uri.path.start_with?('/')
256
+ host + href # convert to full URL
257
+ else # links like href="google.com" the browser would go to http://google.com like "http://#{link}"
258
+ Grell.logger.debug "GRELL Bad formatted link: #{href}, assuming external"
259
+ nil
260
+ end
261
+ end
262
+ rescue URI::InvalidURIError # Invalid links propagating till we navigate to them
263
+ href
264
+ end
265
+ end
259
266
 
267
+ end
268
+ end
260
269
  end
@@ -1,3 +1,3 @@
1
1
  module Grell
2
- VERSION = "1.6.8".freeze
2
+ VERSION = "1.6.9".freeze
3
3
  end
@@ -253,6 +253,7 @@ RSpec.describe Grell::Page do
253
253
  <a href=\"/trusmis.html\">trusmis</a>
254
254
  <a href=\"/help.html\">help</a>
255
255
  <a href=\"javascript: void(0)\">help</a>
256
+ <a href=\"/helpdisabled.html\" disabled=\"\">helpdisabled</a>
256
257
  </body></html>"
257
258
  end
258
259
  let(:links) { ['http://www.example.com/trusmis.html', 'http://www.example.com/help.html'] }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grell
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.8
4
+ version: 1.6.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jordi Polo Carres
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-12 00:00:00.000000000 Z
11
+ date: 2016-07-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara