spider 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/AUTHORS +1 -1
- data/CHANGES +4 -1
- data/README.md +2 -0
- data/lib/spider.rb +1 -1
- data/lib/spider/spider_instance.rb +8 -4
- data/spider.gemspec +1 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae84ef471855de9c49c3499eb658d961342a4338
|
4
|
+
data.tar.gz: cf80d52423709af79478ff051f212dea0b8021a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72785824697410005b8738a32e74d71a78d4623522e7f3f4bb56318349c12d693cee8a8a003766c33afec0b70bb195ce079117413543d9a004fe7adfae03b9d8
|
7
|
+
data.tar.gz: 8bdd202aa793c3f39984e3394e915c915c2fd7d68b7f475c0d317103d31c4a61bdc3ceaa87cfea02a77ec2cdbf2e23cc245a4e080c211c03322ce729b5551bb8
|
data/AUTHORS
CHANGED
data/CHANGES
CHANGED
data/README.md
CHANGED
data/lib/spider.rb
CHANGED
@@ -4,7 +4,7 @@ require File.dirname(__FILE__)+'/spider/spider_instance'
|
|
4
4
|
# links, and doing it all over again.
|
5
5
|
class Spider
|
6
6
|
|
7
|
-
VERSION_INFO = [0, 5,
|
7
|
+
VERSION_INFO = [0, 5, 1] unless defined?(self::VERSION_INFO)
|
8
8
|
VERSION = VERSION_INFO.map(&:to_s).join('.') unless defined?(self::VERSION)
|
9
9
|
|
10
10
|
def self.version
|
@@ -35,6 +35,7 @@ class SpiderInstance
|
|
35
35
|
@headers = {}
|
36
36
|
@setup = nil
|
37
37
|
@teardown = nil
|
38
|
+
@interrupted = false
|
38
39
|
end
|
39
40
|
|
40
41
|
# Add a predicate that determines whether to continue down this URL's path.
|
@@ -161,8 +162,7 @@ class SpiderInstance
|
|
161
162
|
end
|
162
163
|
|
163
164
|
def start! #:nodoc:
|
164
|
-
interrupted =
|
165
|
-
trap("SIGINT") { interrupted = true }
|
165
|
+
trap("SIGINT") { @interrupted = true }
|
166
166
|
begin
|
167
167
|
next_urls = @next_urls.pop
|
168
168
|
tmp_n_u = {}
|
@@ -184,10 +184,14 @@ class SpiderInstance
|
|
184
184
|
#exit if interrupted
|
185
185
|
end
|
186
186
|
@teardown.call(a_url) unless @teardown.nil?
|
187
|
-
|
187
|
+
break if @interrupted
|
188
188
|
end
|
189
189
|
end
|
190
|
-
end while !@next_urls.empty?
|
190
|
+
end while !@next_urls.empty? && !@interrupted
|
191
|
+
end
|
192
|
+
|
193
|
+
def stop! #:nodoc:
|
194
|
+
@interrupted = true
|
191
195
|
end
|
192
196
|
|
193
197
|
def success_or_failure(code) #:nodoc:
|
data/spider.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Nagro
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: |
|
14
14
|
A Web spidering library: handles robots.txt, scraping, finding more
|
@@ -33,7 +33,8 @@ files:
|
|
33
33
|
- spec/spider_spec.rb
|
34
34
|
- spider.gemspec
|
35
35
|
homepage: https://github.com/johnnagro/spider
|
36
|
-
licenses:
|
36
|
+
licenses:
|
37
|
+
- MIT
|
37
38
|
metadata: {}
|
38
39
|
post_install_message:
|
39
40
|
rdoc_options: []
|
@@ -51,7 +52,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
51
52
|
version: '0'
|
52
53
|
requirements: []
|
53
54
|
rubyforge_project: spider
|
54
|
-
rubygems_version: 2.
|
55
|
+
rubygems_version: 2.6.6
|
55
56
|
signing_key:
|
56
57
|
specification_version: 4
|
57
58
|
summary: A Web spidering library
|