spider 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d6465ee9f80195a1002053f826f1b80187020a3
4
- data.tar.gz: 1218142b1d76482cf5baccd1f288934cd7a6b003
3
+ metadata.gz: ae84ef471855de9c49c3499eb658d961342a4338
4
+ data.tar.gz: cf80d52423709af79478ff051f212dea0b8021a2
5
5
  SHA512:
6
- metadata.gz: 2725ca0197ec2801836d94615e4ece0196c131a9ff500ed5837c22e320e06b33a8f609add7d41eabb8fa19114a60af71057b5bdebaf8f94e2be116148d6ad123
7
- data.tar.gz: 5497c85e9759542ecb0cbb612484de0b185f7428c5a2c5222e1fbc7e1e3f69bac727bfddd883967c5eeb6c5bfaca0b9dfbe130eaaed35cc9e8cb96fb87abddc5
6
+ metadata.gz: 72785824697410005b8738a32e74d71a78d4623522e7f3f4bb56318349c12d693cee8a8a003766c33afec0b70bb195ce079117413543d9a004fe7adfae03b9d8
7
+ data.tar.gz: 8bdd202aa793c3f39984e3394e915c915c2fd7d68b7f475c0d317103d31c4a61bdc3ceaa87cfea02a77ec2cdbf2e23cc245a4e080c211c03322ce729b5551bb8
data/AUTHORS CHANGED
@@ -7,6 +7,6 @@ James Edward Gray II
7
7
  Joao Eriberto Mota Filho
8
8
  John Buckley
9
9
  John Nagro
10
- Mike Burns
11
10
  Matt Horan
11
+ Mike Burns (original author)
12
12
  Sander van der Vliet
data/CHANGES CHANGED
@@ -1,4 +1,7 @@
1
- 2016-05-13
1
+ 2016-09-04 v0.5.1
2
+ * added the ability to stop a crawl
3
+
4
+ 2016-05-13 v0.5.0
2
5
  * fixed #1 thanks to @eribertomota
3
6
  * got it running on more recent versions of ruby
4
7
  * cleaned up the docs a bit
data/README.md CHANGED
@@ -151,3 +151,5 @@ scraping, collecting, and looping so that you can just handle the data._
151
151
  end
152
152
  end
153
153
  ```
154
+
155
+ _Copyright (c) 2007-2016 Spider Team Authors_
@@ -4,7 +4,7 @@ require File.dirname(__FILE__)+'/spider/spider_instance'
4
4
  # links, and doing it all over again.
5
5
  class Spider
6
6
 
7
- VERSION_INFO = [0, 5, 0] unless defined?(self::VERSION_INFO)
7
+ VERSION_INFO = [0, 5, 1] unless defined?(self::VERSION_INFO)
8
8
  VERSION = VERSION_INFO.map(&:to_s).join('.') unless defined?(self::VERSION)
9
9
 
10
10
  def self.version
@@ -35,6 +35,7 @@ class SpiderInstance
35
35
  @headers = {}
36
36
  @setup = nil
37
37
  @teardown = nil
38
+ @interrupted = false
38
39
  end
39
40
 
40
41
  # Add a predicate that determines whether to continue down this URL's path.
@@ -161,8 +162,7 @@ class SpiderInstance
161
162
  end
162
163
 
163
164
  def start! #:nodoc:
164
- interrupted = false
165
- trap("SIGINT") { interrupted = true }
165
+ trap("SIGINT") { @interrupted = true }
166
166
  begin
167
167
  next_urls = @next_urls.pop
168
168
  tmp_n_u = {}
@@ -184,10 +184,14 @@ class SpiderInstance
184
184
  #exit if interrupted
185
185
  end
186
186
  @teardown.call(a_url) unless @teardown.nil?
187
- exit if interrupted
187
+ break if @interrupted
188
188
  end
189
189
  end
190
- end while !@next_urls.empty?
190
+ end while !@next_urls.empty? && !@interrupted
191
+ end
192
+
193
+ def stop! #:nodoc:
194
+ @interrupted = true
191
195
  end
192
196
 
193
197
  def success_or_failure(code) #:nodoc:
@@ -5,6 +5,7 @@ require File.expand_path('../lib/spider', __FILE__)
5
5
  spec = Gem::Specification.new do |s|
6
6
  s.author = 'John Nagro'
7
7
  s.email = 'john.nagro@gmail.com'
8
+ s.license = 'MIT'
8
9
  s.has_rdoc = true
9
10
  s.homepage = 'https://github.com/johnnagro/spider'
10
11
  s.name = 'spider'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Nagro
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-13 00:00:00.000000000 Z
11
+ date: 2016-09-04 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  A Web spidering library: handles robots.txt, scraping, finding more
@@ -33,7 +33,8 @@ files:
33
33
  - spec/spider_spec.rb
34
34
  - spider.gemspec
35
35
  homepage: https://github.com/johnnagro/spider
36
- licenses: []
36
+ licenses:
37
+ - MIT
37
38
  metadata: {}
38
39
  post_install_message:
39
40
  rdoc_options: []
@@ -51,7 +52,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
51
52
  version: '0'
52
53
  requirements: []
53
54
  rubyforge_project: spider
54
- rubygems_version: 2.5.1
55
+ rubygems_version: 2.6.6
55
56
  signing_key:
56
57
  specification_version: 4
57
58
  summary: A Web spidering library