arachnid2 0.3.4 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 917f030ae3bfe4eb2183020eaab33463abd80940f2d3ad228ba7e86fcb9d1c3d
4
- data.tar.gz: dd7f349e22f52cf54c45740c4393371f9547c7b5f649048de92863e6a41facc1
3
+ metadata.gz: 5e25353806a447177f129c56d4c57c38c70223849f2bbd858c932f3f4ec8a4ef
4
+ data.tar.gz: d2725c9981671ee010692d82b97801ccc00a1f2b28663fb72b23bc08f6be890e
5
5
  SHA512:
6
- metadata.gz: d868813ceba88a7a078e666626c5de3bad85dbf3b2e834626dab9672f6a5b4f32858c0262bc46831b4c7294e4b8b349cfeaab8ef7625eeada588e9604cfdc525
7
- data.tar.gz: 1c1b3221ba6db4a12673197162e2be0328879124bd5280f71c63a34d801d4ed74f0cf6526eaff197cca7d805c1dc370f511c66e37467b42856735fc421bfa586
6
+ metadata.gz: 52a0b49101ca136ddee4c4ae8e976bd81cc9f3c559df3a94463bee7f42a2e4ce591330e2a587f5285bac98be52723ab518870ac8a8197413df8cd06267892858
7
+ data.tar.gz: 2514be62a0ae76a2d594f14d5ad8b66a45696bafa455a6347bb04b07ae99e48f322936d0afb6bd9e025c67ac9ce52213519f398a8f5deec54e508d6c4f1b4d84
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- arachnid2 (0.3.4)
4
+ arachnid2 (0.3.5)
5
5
  addressable
6
6
  adomain
7
7
  bloomfilter-rb
@@ -106,9 +106,11 @@ class Arachnid2
106
106
  # @return nil
107
107
  #
108
108
  def crawl(opts = {}, with_watir = false)
109
- crawl_watir(opts, &Proc.new) and return if with_watir
110
-
111
- Arachnid2::Typhoeus.new(@url).crawl(opts, &Proc.new)
109
+ if with_watir
110
+ crawl_watir(opts, &Proc.new)
111
+ else
112
+ Arachnid2::Typhoeus.new(@url).crawl(opts, &Proc.new)
113
+ end
112
114
  end
113
115
 
114
116
  def crawl_watir(opts)
@@ -1,3 +1,3 @@
1
1
  class Arachnid2
2
- VERSION = "0.3.4"
2
+ VERSION = "0.3.5"
3
3
  end
@@ -25,7 +25,18 @@ class Arachnid2
25
25
  @global_visited.insert(q)
26
26
 
27
27
  begin
28
- browser.goto q
28
+ begin
29
+ browser.goto q
30
+ rescue Selenium::WebDriver::Error::UnknownError => e
31
+ # Firefox and Selenium, in their infinite wisdom
32
+ # raise an error when a page cannot be loaded.
33
+ # At the time of writing this, the page at
34
+ # thewirecutter.com/cars/accessories-auto
35
+ # causes such an issue (too many redirects).
36
+ # This error handling moves us on from those pages.
37
+ raise e unless e.message =~ /.*Reached error page.*/i
38
+ next
39
+ end
29
40
  links = process(browser.url, browser.body.html)
30
41
  next unless links
31
42
 
@@ -35,6 +46,8 @@ class Arachnid2
35
46
  rescue => e
36
47
  raise e if @already_retried
37
48
  raise e unless "#{e.class}".include?("Selenium") || "#{e.class}".include?("Watir")
49
+ @browser.close if @browser rescue nil
50
+ @headless.destroy if @headless rescue nil
38
51
  @browser = nil
39
52
  @already_retried = true
40
53
  retry
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arachnid2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Nissen
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-15 00:00:00.000000000 Z
11
+ date: 2019-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler