arachnid2 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 917f030ae3bfe4eb2183020eaab33463abd80940f2d3ad228ba7e86fcb9d1c3d
4
- data.tar.gz: dd7f349e22f52cf54c45740c4393371f9547c7b5f649048de92863e6a41facc1
3
+ metadata.gz: 5e25353806a447177f129c56d4c57c38c70223849f2bbd858c932f3f4ec8a4ef
4
+ data.tar.gz: d2725c9981671ee010692d82b97801ccc00a1f2b28663fb72b23bc08f6be890e
5
5
  SHA512:
6
- metadata.gz: d868813ceba88a7a078e666626c5de3bad85dbf3b2e834626dab9672f6a5b4f32858c0262bc46831b4c7294e4b8b349cfeaab8ef7625eeada588e9604cfdc525
7
- data.tar.gz: 1c1b3221ba6db4a12673197162e2be0328879124bd5280f71c63a34d801d4ed74f0cf6526eaff197cca7d805c1dc370f511c66e37467b42856735fc421bfa586
6
+ metadata.gz: 52a0b49101ca136ddee4c4ae8e976bd81cc9f3c559df3a94463bee7f42a2e4ce591330e2a587f5285bac98be52723ab518870ac8a8197413df8cd06267892858
7
+ data.tar.gz: 2514be62a0ae76a2d594f14d5ad8b66a45696bafa455a6347bb04b07ae99e48f322936d0afb6bd9e025c67ac9ce52213519f398a8f5deec54e508d6c4f1b4d84
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- arachnid2 (0.3.4)
4
+ arachnid2 (0.3.5)
5
5
  addressable
6
6
  adomain
7
7
  bloomfilter-rb
@@ -106,9 +106,11 @@ class Arachnid2
106
106
  # @return nil
107
107
  #
108
108
  def crawl(opts = {}, with_watir = false)
109
- crawl_watir(opts, &Proc.new) and return if with_watir
110
-
111
- Arachnid2::Typhoeus.new(@url).crawl(opts, &Proc.new)
109
+ if with_watir
110
+ crawl_watir(opts, &Proc.new)
111
+ else
112
+ Arachnid2::Typhoeus.new(@url).crawl(opts, &Proc.new)
113
+ end
112
114
  end
113
115
 
114
116
  def crawl_watir(opts)
@@ -1,3 +1,3 @@
1
1
  class Arachnid2
2
- VERSION = "0.3.4"
2
+ VERSION = "0.3.5"
3
3
  end
@@ -25,7 +25,18 @@ class Arachnid2
25
25
  @global_visited.insert(q)
26
26
 
27
27
  begin
28
- browser.goto q
28
+ begin
29
+ browser.goto q
30
+ rescue Selenium::WebDriver::Error::UnknownError => e
31
+ # Firefox and Selenium, in their infinite wisdom
32
+ # raise an error when a page cannot be loaded.
33
+ # At the time of writing this, the page at
34
+ # thewirecutter.com/cars/accessories-auto
35
+ # causes such an issue (too many redirects).
36
+ # This error handling moves us on from those pages.
37
+ raise e unless e.message =~ /.*Reached error page.*/i
38
+ next
39
+ end
29
40
  links = process(browser.url, browser.body.html)
30
41
  next unless links
31
42
 
@@ -35,6 +46,8 @@ class Arachnid2
35
46
  rescue => e
36
47
  raise e if @already_retried
37
48
  raise e unless "#{e.class}".include?("Selenium") || "#{e.class}".include?("Watir")
49
+ @browser.close if @browser rescue nil
50
+ @headless.destroy if @headless rescue nil
38
51
  @browser = nil
39
52
  @already_retried = true
40
53
  retry
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arachnid2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Nissen
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-15 00:00:00.000000000 Z
11
+ date: 2019-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler