spider 0.5.1 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ae84ef471855de9c49c3499eb658d961342a4338
4
- data.tar.gz: cf80d52423709af79478ff051f212dea0b8021a2
3
+ metadata.gz: 1303a4df64456b14c15c76ef68959829f83458f4
4
+ data.tar.gz: a178f2d2d961b175eded73c24c96eea5ad763502
5
5
  SHA512:
6
- metadata.gz: 72785824697410005b8738a32e74d71a78d4623522e7f3f4bb56318349c12d693cee8a8a003766c33afec0b70bb195ce079117413543d9a004fe7adfae03b9d8
7
- data.tar.gz: 8bdd202aa793c3f39984e3394e915c915c2fd7d68b7f475c0d317103d31c4a61bdc3ceaa87cfea02a77ec2cdbf2e23cc245a4e080c211c03322ce729b5551bb8
6
+ metadata.gz: 6c556fbf8dc472250d9740b216983a6844263388b8fa18956858052606003685c2f84da8ac2815621295db086d78331181a353676600785df5bfdec7a39dae40
7
+ data.tar.gz: 9680ebca3b44c55c8f952e7f2903526f0267013967a2d38eab524970f1b702817515470cd48a04ba3ad3fbb46ccaf92ff47e911df4c62e1589ef5482e28a8b33
data/AUTHORS CHANGED
@@ -4,9 +4,11 @@ the following kind souls:
4
4
  Brian Campbell
5
5
  Henri Cook
6
6
  James Edward Gray II
7
+ Jeremy Evans
7
8
  Joao Eriberto Mota Filho
8
9
  John Buckley
9
10
  John Nagro
10
11
  Matt Horan
12
+ Marc (@brigriffin)
11
13
  Mike Burns (original author)
12
14
  Sander van der Vliet
data/README.md CHANGED
@@ -72,6 +72,16 @@ scraping, collecting, and looping so that you can just handle the data._
72
72
  end
73
73
  ```
74
74
 
75
+ ### Use Redis to track cycles
76
+
77
+ ```ruby
78
+ require 'spider'
79
+ require 'spider/included_in_redis'
80
+ Spider.start_at('http://cashcats.biz/') do |s|
81
+ s.check_already_seen_with IncludedInRedis.new(host: '127.0.0.1', port: 6379)
82
+ end
83
+ ```
84
+
75
85
  ### Track cycles with a custom object
76
86
 
77
87
  ```ruby
data/lib/spider.rb CHANGED
@@ -4,7 +4,7 @@ require File.dirname(__FILE__)+'/spider/spider_instance'
4
4
  # links, and doing it all over again.
5
5
  class Spider
6
6
 
7
- VERSION_INFO = [0, 5, 1] unless defined?(self::VERSION_INFO)
7
+ VERSION_INFO = [0, 5, 2] unless defined?(self::VERSION_INFO)
8
8
  VERSION = VERSION_INFO.map(&:to_s).join('.') unless defined?(self::VERSION)
9
9
 
10
10
  def self.version
@@ -0,0 +1,31 @@
1
+ # Use Redis to track cycles.
2
+
3
+ require 'redis'
4
+ require 'json'
5
+
6
+ # A specialized class using Redis to track items stored. It supports
7
+ # three operations: new, <<, and include? . Together these can be used to
8
+ # add items to Redis, then determine whether the item has been added.
9
+ #
10
+ # To use it with Spider use the check_already_seen_with method:
11
+ #
12
+ # Spider.start_at('http://example.com/') do |s|
13
+ # s.check_already_seen_with IncludedInRedis.new(host: '127.0.0.1', port: 6379)
14
+ # end
15
+ class IncludedInRedis
16
+ # Construct a new IncludedInRedis instance. All arguments here are
17
+ # passed to Redis (part of the redis gem).
18
+ def initialize(*a)
19
+ @c = Redis.new(*a)
20
+ end
21
+
22
+ # Add an item to Redis
23
+ def <<(v)
24
+ @c.set(v.to_s, v.to_json)
25
+ end
26
+
27
+ # True if the item is in Redis
28
+ def include?(v)
29
+ @c.get(v.to_s) == v.to_json
30
+ end
31
+ end
@@ -123,7 +123,7 @@ class SpiderInstance
123
123
  def on(code, p = nil, &block)
124
124
  f = p ? p : block
125
125
  case code
126
- when Fixnum
126
+ when Integer
127
127
  @callbacks[code] = f
128
128
  else
129
129
  @callbacks[code.to_sym] = f
@@ -0,0 +1,43 @@
1
+ require File.dirname(__FILE__)+'/../spec_helper'
2
+
3
+ def before_specing_redis
4
+ local_require 'spider/included_in_redis'
5
+ system('redis-server 127.0.0.1:6379')
6
+ end
7
+
8
+ def after_specing_redis
9
+ system('kill -KILL `pidof redis-server`')
10
+ end
11
+
12
+ Spec::Runner.configure { |c| c.mock_with :mocha }
13
+
14
+ describe 'Object to halt cycles' do
15
+ before do
16
+ before_specing_redis
17
+ end
18
+
19
+ it 'should understand <<' do
20
+ c = IncludedInRedis.new(host: 'localhost', port: 6379)
21
+ c.should respond_to(:<<)
22
+ end
23
+
24
+ it 'should understand included?' do
25
+ c = IncludedInRedis.new(host: 'localhost', port: 6379)
26
+ c.should respond_to(:include?)
27
+ end
28
+
29
+ it 'should produce false if the object is not included' do
30
+ c = IncludedInRedis.new(host: 'localhost', port: 6379)
31
+ c.include?('a').should be_false
32
+ end
33
+
34
+ it 'should produce true if the object is included' do
35
+ c = IncludedInRedis.new(host: 'localhost', port: 6379)
36
+ c << 'a'
37
+ c.include?('a').should be_true
38
+ end
39
+
40
+ after do
41
+ after_specing_redis
42
+ end
43
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Nagro
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-04 00:00:00.000000000 Z
11
+ date: 2018-04-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  A Web spidering library: handles robots.txt, scraping, finding more
@@ -24,11 +24,13 @@ files:
24
24
  - README.md
25
25
  - lib/spider.rb
26
26
  - lib/spider/included_in_memcached.rb
27
+ - lib/spider/included_in_redis.rb
27
28
  - lib/spider/next_urls_in_sqs.rb
28
29
  - lib/spider/robot_rules.rb
29
30
  - lib/spider/spider_instance.rb
30
31
  - spec/spec_helper.rb
31
32
  - spec/spider/included_in_memcached_spec.rb
33
+ - spec/spider/included_in_redis_spec.rb
32
34
  - spec/spider/spider_instance_spec.rb
33
35
  - spec/spider_spec.rb
34
36
  - spider.gemspec
@@ -52,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
52
54
  version: '0'
53
55
  requirements: []
54
56
  rubyforge_project: spider
55
- rubygems_version: 2.6.6
57
+ rubygems_version: 2.5.2.1
56
58
  signing_key:
57
59
  specification_version: 4
58
60
  summary: A Web spidering library