spider 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/AUTHORS +1 -1
- data/CHANGES +4 -1
- data/README.md +2 -0
- data/lib/spider.rb +1 -1
- data/lib/spider/spider_instance.rb +8 -4
- data/spider.gemspec +1 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae84ef471855de9c49c3499eb658d961342a4338
|
4
|
+
data.tar.gz: cf80d52423709af79478ff051f212dea0b8021a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72785824697410005b8738a32e74d71a78d4623522e7f3f4bb56318349c12d693cee8a8a003766c33afec0b70bb195ce079117413543d9a004fe7adfae03b9d8
|
7
|
+
data.tar.gz: 8bdd202aa793c3f39984e3394e915c915c2fd7d68b7f475c0d317103d31c4a61bdc3ceaa87cfea02a77ec2cdbf2e23cc245a4e080c211c03322ce729b5551bb8
|
data/AUTHORS
CHANGED
data/CHANGES
CHANGED
data/README.md
CHANGED
data/lib/spider.rb
CHANGED
@@ -4,7 +4,7 @@ require File.dirname(__FILE__)+'/spider/spider_instance'
|
|
4
4
|
# links, and doing it all over again.
|
5
5
|
class Spider
|
6
6
|
|
7
|
-
VERSION_INFO = [0, 5,
|
7
|
+
VERSION_INFO = [0, 5, 1] unless defined?(self::VERSION_INFO)
|
8
8
|
VERSION = VERSION_INFO.map(&:to_s).join('.') unless defined?(self::VERSION)
|
9
9
|
|
10
10
|
def self.version
|
@@ -35,6 +35,7 @@ class SpiderInstance
|
|
35
35
|
@headers = {}
|
36
36
|
@setup = nil
|
37
37
|
@teardown = nil
|
38
|
+
@interrupted = false
|
38
39
|
end
|
39
40
|
|
40
41
|
# Add a predicate that determines whether to continue down this URL's path.
|
@@ -161,8 +162,7 @@ class SpiderInstance
|
|
161
162
|
end
|
162
163
|
|
163
164
|
def start! #:nodoc:
|
164
|
-
interrupted =
|
165
|
-
trap("SIGINT") { interrupted = true }
|
165
|
+
trap("SIGINT") { @interrupted = true }
|
166
166
|
begin
|
167
167
|
next_urls = @next_urls.pop
|
168
168
|
tmp_n_u = {}
|
@@ -184,10 +184,14 @@ class SpiderInstance
|
|
184
184
|
#exit if interrupted
|
185
185
|
end
|
186
186
|
@teardown.call(a_url) unless @teardown.nil?
|
187
|
-
|
187
|
+
break if @interrupted
|
188
188
|
end
|
189
189
|
end
|
190
|
-
end while !@next_urls.empty?
|
190
|
+
end while !@next_urls.empty? && !@interrupted
|
191
|
+
end
|
192
|
+
|
193
|
+
def stop! #:nodoc:
|
194
|
+
@interrupted = true
|
191
195
|
end
|
192
196
|
|
193
197
|
def success_or_failure(code) #:nodoc:
|
data/spider.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Nagro
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: |
|
14
14
|
A Web spidering library: handles robots.txt, scraping, finding more
|
@@ -33,7 +33,8 @@ files:
|
|
33
33
|
- spec/spider_spec.rb
|
34
34
|
- spider.gemspec
|
35
35
|
homepage: https://github.com/johnnagro/spider
|
36
|
-
licenses:
|
36
|
+
licenses:
|
37
|
+
- MIT
|
37
38
|
metadata: {}
|
38
39
|
post_install_message:
|
39
40
|
rdoc_options: []
|
@@ -51,7 +52,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
51
52
|
version: '0'
|
52
53
|
requirements: []
|
53
54
|
rubyforge_project: spider
|
54
|
-
rubygems_version: 2.
|
55
|
+
rubygems_version: 2.6.6
|
55
56
|
signing_key:
|
56
57
|
specification_version: 4
|
57
58
|
summary: A Web spidering library
|