spider 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d6465ee9f80195a1002053f826f1b80187020a3
4
- data.tar.gz: 1218142b1d76482cf5baccd1f288934cd7a6b003
3
+ metadata.gz: ae84ef471855de9c49c3499eb658d961342a4338
4
+ data.tar.gz: cf80d52423709af79478ff051f212dea0b8021a2
5
5
  SHA512:
6
- metadata.gz: 2725ca0197ec2801836d94615e4ece0196c131a9ff500ed5837c22e320e06b33a8f609add7d41eabb8fa19114a60af71057b5bdebaf8f94e2be116148d6ad123
7
- data.tar.gz: 5497c85e9759542ecb0cbb612484de0b185f7428c5a2c5222e1fbc7e1e3f69bac727bfddd883967c5eeb6c5bfaca0b9dfbe130eaaed35cc9e8cb96fb87abddc5
6
+ metadata.gz: 72785824697410005b8738a32e74d71a78d4623522e7f3f4bb56318349c12d693cee8a8a003766c33afec0b70bb195ce079117413543d9a004fe7adfae03b9d8
7
+ data.tar.gz: 8bdd202aa793c3f39984e3394e915c915c2fd7d68b7f475c0d317103d31c4a61bdc3ceaa87cfea02a77ec2cdbf2e23cc245a4e080c211c03322ce729b5551bb8
data/AUTHORS CHANGED
@@ -7,6 +7,6 @@ James Edward Gray II
7
7
  Joao Eriberto Mota Filho
8
8
  John Buckley
9
9
  John Nagro
10
- Mike Burns
11
10
  Matt Horan
11
+ Mike Burns (original author)
12
12
  Sander van der Vliet
data/CHANGES CHANGED
@@ -1,4 +1,7 @@
1
- 2016-05-13
1
+ 2016-09-04 v0.5.1
2
+ * added the ability to stop a crawl
3
+
4
+ 2016-05-13 v0.5.0
2
5
  * fixed #1 thanks to @eribertomota
3
6
  * got it running on more recent versions of ruby
4
7
  * cleaned up the docs a bit
data/README.md CHANGED
@@ -151,3 +151,5 @@ scraping, collecting, and looping so that you can just handle the data._
151
151
  end
152
152
  end
153
153
  ```
154
+
155
+ _Copyright (c) 2007-2016 Spider Team Authors_
@@ -4,7 +4,7 @@ require File.dirname(__FILE__)+'/spider/spider_instance'
4
4
  # links, and doing it all over again.
5
5
  class Spider
6
6
 
7
- VERSION_INFO = [0, 5, 0] unless defined?(self::VERSION_INFO)
7
+ VERSION_INFO = [0, 5, 1] unless defined?(self::VERSION_INFO)
8
8
  VERSION = VERSION_INFO.map(&:to_s).join('.') unless defined?(self::VERSION)
9
9
 
10
10
  def self.version
@@ -35,6 +35,7 @@ class SpiderInstance
35
35
  @headers = {}
36
36
  @setup = nil
37
37
  @teardown = nil
38
+ @interrupted = false
38
39
  end
39
40
 
40
41
  # Add a predicate that determines whether to continue down this URL's path.
@@ -161,8 +162,7 @@ class SpiderInstance
161
162
  end
162
163
 
163
164
  def start! #:nodoc:
164
- interrupted = false
165
- trap("SIGINT") { interrupted = true }
165
+ trap("SIGINT") { @interrupted = true }
166
166
  begin
167
167
  next_urls = @next_urls.pop
168
168
  tmp_n_u = {}
@@ -184,10 +184,14 @@ class SpiderInstance
184
184
  #exit if interrupted
185
185
  end
186
186
  @teardown.call(a_url) unless @teardown.nil?
187
- exit if interrupted
187
+ break if @interrupted
188
188
  end
189
189
  end
190
- end while !@next_urls.empty?
190
+ end while !@next_urls.empty? && !@interrupted
191
+ end
192
+
193
+ def stop! #:nodoc:
194
+ @interrupted = true
191
195
  end
192
196
 
193
197
  def success_or_failure(code) #:nodoc:
@@ -5,6 +5,7 @@ require File.expand_path('../lib/spider', __FILE__)
5
5
  spec = Gem::Specification.new do |s|
6
6
  s.author = 'John Nagro'
7
7
  s.email = 'john.nagro@gmail.com'
8
+ s.license = 'MIT'
8
9
  s.has_rdoc = true
9
10
  s.homepage = 'https://github.com/johnnagro/spider'
10
11
  s.name = 'spider'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Nagro
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-13 00:00:00.000000000 Z
11
+ date: 2016-09-04 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  A Web spidering library: handles robots.txt, scraping, finding more
@@ -33,7 +33,8 @@ files:
33
33
  - spec/spider_spec.rb
34
34
  - spider.gemspec
35
35
  homepage: https://github.com/johnnagro/spider
36
- licenses: []
36
+ licenses:
37
+ - MIT
37
38
  metadata: {}
38
39
  post_install_message:
39
40
  rdoc_options: []
@@ -51,7 +52,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
51
52
  version: '0'
52
53
  requirements: []
53
54
  rubyforge_project: spider
54
- rubygems_version: 2.5.1
55
+ rubygems_version: 2.6.6
55
56
  signing_key:
56
57
  specification_version: 4
57
58
  summary: A Web spidering library