grell 1.6.8 → 1.6.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4fcc56b727dfa90261c133037100f70617f15fed
4
- data.tar.gz: 3e7d2a46160b4d8406a76b9190b8fdb64e4a0cf4
3
+ metadata.gz: 2a7136c81652b0260ee867e65380e69e5d3a2264
4
+ data.tar.gz: f95af6c4e4a99aa1216f8842c8829af521f7b1d9
5
5
  SHA512:
6
- metadata.gz: 1cd4529624bd1d7d8482ab3d30c408c9a3452af73375ab2688e07d361b0c5cd1246dbcfcf0152b9e4ede8ab1b8e7d568733822471d2df8e815bc8e4e48cbad83
7
- data.tar.gz: 95c2005f0edf3fcd191fa8f16e23d049f68eb4074cd720042eadbb982ffe7d1947b1bba9fe3737800f5252d504d0eaaed8d6ea1be51c4991231b0f2e453d8c5b
6
+ metadata.gz: 5b6c238c9894254531f5448dd5435603009b01c0c1fef015b748f3a91a662fa81fd0c7bbe882b95d88e8cbe91beee342222877f889931d5ed186cf0e2d03ef4e
7
+ data.tar.gz: 76a390dc30e53275ded279b27a8bd6ebabe4d9cefb0cc7367563467cd86fd37e6bb20440810d237137f400d72af6dd289602603ff6f70dce8f3bd6906f6d24e2
@@ -1,3 +1,5 @@
1
+ # 1.6.9
2
+ * Avoid following links when disabled by CSS (1.6.8 worked only for Javascript)
1
3
  # 1.6.8
2
4
  * Avoid following disabled links
3
5
 
@@ -43,7 +43,7 @@ module Grell
43
43
 
44
44
  # Number of times we have retried the current page
45
45
  def retries
46
- [@times_visited -1, 0].max
46
+ [@times_visited - 1, 0].max
47
47
  end
48
48
 
49
49
  # The current URL, this may be different from the URL we asked for if there was some redirect
@@ -205,17 +205,9 @@ module Grell
205
205
 
206
206
  private
207
207
  def all_links
208
- # <link> can only be used in the <head> as of: https://developer.mozilla.org/en/docs/Web/HTML/Element/link
209
- anchors_in_body = @rawpage.all_anchors.reject { |anchor| anchor.tag_name == 'link' }
210
-
211
- # Do not follow disabled links
212
- enabled_links = anchors_in_body.reject { |anchor| anchor.disabled? }
213
-
214
- unique_links = enabled_links.map do |anchor|
215
- anchor['href'] || anchor['data-href']
216
- end.compact
217
-
218
- unique_links.map{|link| link_to_url(link)}.uniq.compact
208
+ links = @rawpage.all_anchors.map { |anchor| Link.new(anchor) }
209
+ body_enabled_links = links.reject { |link| link.inside_header? || link.disabled? }
210
+ body_enabled_links.map { |link| link.to_url(host) }.uniq.compact
219
211
 
220
212
  rescue Capybara::Poltergeist::ObsoleteNode
221
213
  Grell.logger.warn "We found an obsolete node in #{@url}. Ignoring all links"
@@ -224,37 +216,54 @@ module Grell
224
216
  []
225
217
  end
226
218
 
227
- # We only accept links in this same host that start with a path
228
- # nil from this
229
- def link_to_url(link)
230
- uri = URI.parse(link)
231
- if uri.absolute?
232
- if uri.host != URI.parse(host).host
233
- Grell.logger.debug "GRELL does not follow links to external hosts: #{link}"
234
- nil
235
- else
236
- link # Absolute link to our own host
237
- end
238
- else
239
- if uri.path.nil?
240
- Grell.logger.debug "GRELL does not follow links without a path: #{uri}"
241
- nil
242
- end
243
- if uri.path.start_with?('/')
244
- host + link #convert to full URL
245
- else #links like href="google.com" the browser would go to http://google.com like "http://#{link}"
246
- Grell.logger.debug "GRELL Bad formatted link: #{link}, assuming external"
247
- nil
248
- end
219
+ # Private class to group all the methods related to links.
220
+ class Link
221
+ def initialize(anchor)
222
+ @anchor = anchor
249
223
  end
250
224
 
251
- rescue URI::InvalidURIError #We will have invalid links propagating till we navigate to them
252
- link
253
- end
254
- end
225
+ # <link> can only be used in the <head> as of: https://developer.mozilla.org/en/docs/Web/HTML/Element/link
226
+ def inside_header?
227
+ @anchor.tag_name == 'link'
228
+ end
255
229
 
230
+ # Is the link disabled by either Javascript or CSS?
231
+ def disabled?
232
+ @anchor.disabled? || !!@anchor.native.attributes['disabled']
233
+ end
256
234
 
235
+ # Some links may use data-href + javascript to do interesting things
236
+ def href
237
+ @anchor['href'] || @anchor['data-href']
238
+ end
257
239
 
258
- end
240
+ # We only accept links in this same host that start with a path
241
+ def to_url(host)
242
+ uri = URI.parse(href)
243
+ if uri.absolute?
244
+ if uri.host != URI.parse(host).host
245
+ Grell.logger.debug "GRELL does not follow links to external hosts: #{href}"
246
+ nil
247
+ else
248
+ href # Absolute link to our own host
249
+ end
250
+ else
251
+ if uri.path.nil?
252
+ Grell.logger.debug "GRELL does not follow links without a path: #{uri}"
253
+ nil
254
+ end
255
+ if uri.path.start_with?('/')
256
+ host + href # convert to full URL
257
+ else # links like href="google.com" the browser would go to http://google.com like "http://#{link}"
258
+ Grell.logger.debug "GRELL Bad formatted link: #{href}, assuming external"
259
+ nil
260
+ end
261
+ end
262
+ rescue URI::InvalidURIError # Invalid links propagating till we navigate to them
263
+ href
264
+ end
265
+ end
259
266
 
267
+ end
268
+ end
260
269
  end
@@ -1,3 +1,3 @@
1
1
  module Grell
2
- VERSION = "1.6.8".freeze
2
+ VERSION = "1.6.9".freeze
3
3
  end
@@ -253,6 +253,7 @@ RSpec.describe Grell::Page do
253
253
  <a href=\"/trusmis.html\">trusmis</a>
254
254
  <a href=\"/help.html\">help</a>
255
255
  <a href=\"javascript: void(0)\">help</a>
256
+ <a href=\"/helpdisabled.html\" disabled=\"\">helpdisabled</a>
256
257
  </body></html>"
257
258
  end
258
259
  let(:links) { ['http://www.example.com/trusmis.html', 'http://www.example.com/help.html'] }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grell
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.8
4
+ version: 1.6.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jordi Polo Carres
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-12 00:00:00.000000000 Z
11
+ date: 2016-07-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara