arachnid2 0.3.4 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/arachnid2.rb +5 -3
- data/lib/arachnid2/version.rb +1 -1
- data/lib/arachnid2/watir.rb +14 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e25353806a447177f129c56d4c57c38c70223849f2bbd858c932f3f4ec8a4ef
|
4
|
+
data.tar.gz: d2725c9981671ee010692d82b97801ccc00a1f2b28663fb72b23bc08f6be890e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52a0b49101ca136ddee4c4ae8e976bd81cc9f3c559df3a94463bee7f42a2e4ce591330e2a587f5285bac98be52723ab518870ac8a8197413df8cd06267892858
|
7
|
+
data.tar.gz: 2514be62a0ae76a2d594f14d5ad8b66a45696bafa455a6347bb04b07ae99e48f322936d0afb6bd9e025c67ac9ce52213519f398a8f5deec54e508d6c4f1b4d84
|
data/Gemfile.lock
CHANGED
data/lib/arachnid2.rb
CHANGED
@@ -106,9 +106,11 @@ class Arachnid2
|
|
106
106
|
# @return nil
|
107
107
|
#
|
108
108
|
def crawl(opts = {}, with_watir = false)
|
109
|
-
|
110
|
-
|
111
|
-
|
109
|
+
if with_watir
|
110
|
+
crawl_watir(opts, &Proc.new)
|
111
|
+
else
|
112
|
+
Arachnid2::Typhoeus.new(@url).crawl(opts, &Proc.new)
|
113
|
+
end
|
112
114
|
end
|
113
115
|
|
114
116
|
def crawl_watir(opts)
|
data/lib/arachnid2/version.rb
CHANGED
data/lib/arachnid2/watir.rb
CHANGED
@@ -25,7 +25,18 @@ class Arachnid2
|
|
25
25
|
@global_visited.insert(q)
|
26
26
|
|
27
27
|
begin
|
28
|
-
|
28
|
+
begin
|
29
|
+
browser.goto q
|
30
|
+
rescue Selenium::WebDriver::Error::UnknownError => e
|
31
|
+
# Firefox and Selenium, in their infinite wisdom
|
32
|
+
# raise an error when a page cannot be loaded.
|
33
|
+
# At the time of writing this, the page at
|
34
|
+
# thewirecutter.com/cars/accessories-auto
|
35
|
+
# causes such an issue (too many redirects).
|
36
|
+
# This error handling moves us on from those pages.
|
37
|
+
raise e unless e.message =~ /.*Reached error page.*/i
|
38
|
+
next
|
39
|
+
end
|
29
40
|
links = process(browser.url, browser.body.html)
|
30
41
|
next unless links
|
31
42
|
|
@@ -35,6 +46,8 @@ class Arachnid2
|
|
35
46
|
rescue => e
|
36
47
|
raise e if @already_retried
|
37
48
|
raise e unless "#{e.class}".include?("Selenium") || "#{e.class}".include?("Watir")
|
49
|
+
@browser.close if @browser rescue nil
|
50
|
+
@headless.destroy if @headless rescue nil
|
38
51
|
@browser = nil
|
39
52
|
@already_retried = true
|
40
53
|
retry
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arachnid2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Nissen
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-03-
|
11
|
+
date: 2019-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|