arachnid2 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/arachnid2.rb +5 -3
- data/lib/arachnid2/version.rb +1 -1
- data/lib/arachnid2/watir.rb +14 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e25353806a447177f129c56d4c57c38c70223849f2bbd858c932f3f4ec8a4ef
|
4
|
+
data.tar.gz: d2725c9981671ee010692d82b97801ccc00a1f2b28663fb72b23bc08f6be890e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52a0b49101ca136ddee4c4ae8e976bd81cc9f3c559df3a94463bee7f42a2e4ce591330e2a587f5285bac98be52723ab518870ac8a8197413df8cd06267892858
|
7
|
+
data.tar.gz: 2514be62a0ae76a2d594f14d5ad8b66a45696bafa455a6347bb04b07ae99e48f322936d0afb6bd9e025c67ac9ce52213519f398a8f5deec54e508d6c4f1b4d84
|
data/Gemfile.lock
CHANGED
data/lib/arachnid2.rb
CHANGED
@@ -106,9 +106,11 @@ class Arachnid2
|
|
106
106
|
# @return nil
|
107
107
|
#
|
108
108
|
def crawl(opts = {}, with_watir = false)
|
109
|
-
|
110
|
-
|
111
|
-
|
109
|
+
if with_watir
|
110
|
+
crawl_watir(opts, &Proc.new)
|
111
|
+
else
|
112
|
+
Arachnid2::Typhoeus.new(@url).crawl(opts, &Proc.new)
|
113
|
+
end
|
112
114
|
end
|
113
115
|
|
114
116
|
def crawl_watir(opts)
|
data/lib/arachnid2/version.rb
CHANGED
data/lib/arachnid2/watir.rb
CHANGED
@@ -25,7 +25,18 @@ class Arachnid2
|
|
25
25
|
@global_visited.insert(q)
|
26
26
|
|
27
27
|
begin
|
28
|
-
|
28
|
+
begin
|
29
|
+
browser.goto q
|
30
|
+
rescue Selenium::WebDriver::Error::UnknownError => e
|
31
|
+
# Firefox and Selenium, in their infinite wisdom
|
32
|
+
# raise an error when a page cannot be loaded.
|
33
|
+
# At the time of writing this, the page at
|
34
|
+
# thewirecutter.com/cars/accessories-auto
|
35
|
+
# causes such an issue (too many redirects).
|
36
|
+
# This error handling moves us on from those pages.
|
37
|
+
raise e unless e.message =~ /.*Reached error page.*/i
|
38
|
+
next
|
39
|
+
end
|
29
40
|
links = process(browser.url, browser.body.html)
|
30
41
|
next unless links
|
31
42
|
|
@@ -35,6 +46,8 @@ class Arachnid2
|
|
35
46
|
rescue => e
|
36
47
|
raise e if @already_retried
|
37
48
|
raise e unless "#{e.class}".include?("Selenium") || "#{e.class}".include?("Watir")
|
49
|
+
@browser.close if @browser rescue nil
|
50
|
+
@headless.destroy if @headless rescue nil
|
38
51
|
@browser = nil
|
39
52
|
@already_retried = true
|
40
53
|
retry
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arachnid2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Nissen
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-03-
|
11
|
+
date: 2019-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|