stalk_climber 0.0.6 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rdoc_options +7 -0
- data/.travis.yml +8 -10
- data/Gemfile +1 -0
- data/lib/stalk_climber.rb +5 -0
- data/lib/stalk_climber/climber.rb +42 -54
- data/lib/stalk_climber/climber_enumerable.rb +110 -0
- data/lib/stalk_climber/climber_enumerables.rb +7 -0
- data/lib/stalk_climber/connection.rb +49 -37
- data/lib/stalk_climber/connection_pool.rb +37 -7
- data/lib/stalk_climber/job.rb +270 -67
- data/lib/stalk_climber/lazy_enumerable.rb +1 -0
- data/lib/stalk_climber/tube.rb +182 -0
- data/lib/stalk_climber/tubes.rb +37 -0
- data/lib/stalk_climber/version.rb +2 -1
- data/test/test_helper.rb +4 -2
- data/test/unit/beaneater_job_test.rb +260 -0
- data/test/unit/climber_enumerable.rb +18 -0
- data/test/unit/climber_test.rb +31 -100
- data/test/unit/connection_pool_test.rb +48 -33
- data/test/unit/connection_test.rb +200 -149
- data/test/unit/job_test.rb +240 -147
- data/test/unit/jobs_test.rb +106 -0
- data/test/unit/tube_test.rb +89 -0
- data/test/unit/tubes_test.rb +52 -0
- metadata +17 -2
data/.rdoc_options
ADDED
data/.travis.yml
CHANGED
@@ -4,17 +4,15 @@ rvm:
|
|
4
4
|
- 1.9.3
|
5
5
|
- 2.0.0
|
6
6
|
- jruby-19mode
|
7
|
-
- rbx
|
7
|
+
- rbx
|
8
8
|
|
9
9
|
before_install:
|
10
|
-
-
|
11
|
-
-
|
12
|
-
-
|
13
|
-
-
|
14
|
-
-
|
15
|
-
-
|
16
|
-
- sudo chmod +x /etc/init.d/beanstalkd2
|
17
|
-
- sudo service beanstalkd2 start
|
10
|
+
- curl -L https://github.com/kr/beanstalkd/archive/v1.9.tar.gz | tar xz -C /tmp
|
11
|
+
- cd /tmp/beanstalkd-1.9/
|
12
|
+
- make
|
13
|
+
- ./beanstalkd &
|
14
|
+
- ./beanstalkd -p 11301 &
|
15
|
+
- cd $TRAVIS_BUILD_DIR
|
18
16
|
|
19
17
|
env:
|
20
18
|
- BEANSTALK_ADDRESSES='beanstalk://localhost:11300,beanstalk://localhost:11301'
|
@@ -22,4 +20,4 @@ env:
|
|
22
20
|
matrix:
|
23
21
|
allow_failures:
|
24
22
|
- rvm: jruby-19mode
|
25
|
-
- rvm: rbx
|
23
|
+
- rvm: rbx
|
data/Gemfile
CHANGED
data/lib/stalk_climber.rb
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
module StalkClimber; end
|
2
2
|
|
3
|
+
require 'forwardable'
|
3
4
|
require 'beaneater'
|
4
5
|
require 'stalk_climber/version'
|
5
6
|
require 'stalk_climber/lazy_enumerable'
|
7
|
+
require 'stalk_climber/climber_enumerable'
|
8
|
+
require 'stalk_climber/climber_enumerables'
|
6
9
|
require 'stalk_climber/connection'
|
7
10
|
require 'stalk_climber/connection_pool'
|
8
11
|
require 'stalk_climber/climber'
|
9
12
|
require 'stalk_climber/job'
|
13
|
+
require 'stalk_climber/tubes'
|
14
|
+
require 'stalk_climber/tube'
|
@@ -1,11 +1,24 @@
|
|
1
1
|
module StalkClimber
|
2
2
|
class Climber
|
3
|
-
include RUBY_VERSION >= '2.0.0' ? LazyEnumerable : Enumerable
|
4
3
|
|
5
|
-
|
6
|
-
attr_reader :cache
|
4
|
+
extend Forwardable
|
7
5
|
|
8
|
-
|
6
|
+
def_delegator :connection_pool, :tubes
|
7
|
+
|
8
|
+
# Collection of beanstalk_addresses the pool is connected to
|
9
|
+
attr_accessor :beanstalk_addresses
|
10
|
+
|
11
|
+
# Accessor to the climber's Jobs instance
|
12
|
+
attr_reader :jobs
|
13
|
+
|
14
|
+
# Tube used when injecting jobs to probe state of Beanstalkd
|
15
|
+
attr_accessor :test_tube
|
16
|
+
|
17
|
+
# :call-seq:
|
18
|
+
# connection_pool() => StalkClimber::ConnectionPool
|
19
|
+
#
|
20
|
+
# Returns or creates a ConnectionPool from beanstalk_addresses. Raises a
|
21
|
+
# RuntimeError if #beanstalk_addresses has not been set.
|
9
22
|
def connection_pool
|
10
23
|
return @connection_pool unless @connection_pool.nil?
|
11
24
|
if self.beanstalk_addresses.nil?
|
@@ -15,50 +28,27 @@ module StalkClimber
|
|
15
28
|
end
|
16
29
|
|
17
30
|
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
begin
|
25
|
-
yield enum.next
|
26
|
-
rescue StopIteration => e
|
27
|
-
return (e.nil? || !e.respond_to?(:result) || e.result.nil?) ? nil : e.result
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
alias_method :each, :climb
|
32
|
-
|
33
|
-
|
34
|
-
# Perform a threaded climb across all connections in the connection pool.
|
35
|
-
# This method cannot be used for enumerable enumeration because a break
|
36
|
-
# called from one of the threads will cause a LocalJumpError. This could be
|
37
|
-
# fixed, but expected behavior on break varies as to whether or not to wait
|
38
|
-
# for all threads before returning a result. However, still useful for
|
39
|
-
# operations that always visit all jobs.
|
40
|
-
# An instance of Job is yielded to +block+
|
41
|
-
def climb_threaded(&block)
|
42
|
-
threads = []
|
43
|
-
self.connection_pool.connections.each do |connection|
|
44
|
-
threads << Thread.new { connection.each(&block) }
|
45
|
-
end
|
46
|
-
threads.each(&:join)
|
47
|
-
return
|
48
|
-
end
|
49
|
-
alias_method :each_threaded, :climb_threaded
|
50
|
-
|
51
|
-
|
52
|
-
# Creates a new Climber instance, optionally yielding the instance
|
53
|
-
# if a block is given
|
54
|
-
def initialize(beanstalk_addresses = nil, test_tube = nil)
|
31
|
+
# Creates a new Climber instance, optionally yielding the instance for
|
32
|
+
# configuration if a block is given
|
33
|
+
#
|
34
|
+
# Climber.new('beanstalk://localhost:11300', 'stalk_climber')
|
35
|
+
# #=> #<StalkClimber::Job beanstalk_addresses="beanstalk://localhost:11300" test_tube="stalk_climber">
|
36
|
+
def initialize(beanstalk_addresses = nil, test_tube = nil) # :yields: climber
|
55
37
|
self.beanstalk_addresses = beanstalk_addresses
|
56
38
|
self.test_tube = test_tube
|
39
|
+
@jobs = StalkClimber::Jobs.new(self)
|
57
40
|
yield(self) if block_given?
|
58
41
|
end
|
59
42
|
|
60
43
|
|
61
|
-
#
|
44
|
+
# :call-seq:
|
45
|
+
# max_job_ids() => Hash{Beaneater::Connection => Integer}
|
46
|
+
#
|
47
|
+
# Returns a Hash with connections as keys and max_job_ids as values
|
48
|
+
#
|
49
|
+
# climber = Climber.new('beanstalk://localhost:11300', 'stalk_climber')
|
50
|
+
# climber.max_job_ids
|
51
|
+
# #=> {#<Beaneater::Connection host="localhost" port=11300>=>1183}
|
62
52
|
def max_job_ids
|
63
53
|
connection_pairs = connection_pool.connections.map do |connection|
|
64
54
|
[connection, connection.max_job_id]
|
@@ -67,19 +57,17 @@ module StalkClimber
|
|
67
57
|
end
|
68
58
|
|
69
59
|
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
60
|
+
# :call-seq:
|
61
|
+
# to_s() => String
|
62
|
+
#
|
63
|
+
# Return string representation of climber
|
64
|
+
#
|
65
|
+
# Climber.new('beanstalk://localhost:11300', 'stalk_climber').to_s
|
66
|
+
# #=> #<StalkClimber::Job beanstalk_addresses="beanstalk://localhost:11300" test_tube="stalk_climber">
|
67
|
+
def to_s
|
68
|
+
return "#<StalkClimber::Job beanstalk_addresses=#{beanstalk_addresses.inspect} test_tube=#{test_tube.inspect}>"
|
82
69
|
end
|
70
|
+
alias :inspect :to_s
|
83
71
|
|
84
72
|
end
|
85
73
|
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
module StalkClimber
|
2
|
+
|
3
|
+
module ClimberEnumerable
|
4
|
+
|
5
|
+
include RUBY_VERSION >= '2.0.0' ? LazyEnumerable : Enumerable
|
6
|
+
extend Forwardable
|
7
|
+
|
8
|
+
def_delegators :to_enum, :each
|
9
|
+
|
10
|
+
# A reference to the climber instance to which this enumerable belongs
|
11
|
+
attr_reader :climber
|
12
|
+
|
13
|
+
|
14
|
+
# :call-seq:
|
15
|
+
# new(enumerator_method) => New class including ClimberEnumerable
|
16
|
+
#
|
17
|
+
# Factory that simplifies the creation of ClimberEnumerable classes.
|
18
|
+
# Otherwise in simple cases a class would have to be defined only to
|
19
|
+
# include this module and set the desired :enumerator_method symbol.
|
20
|
+
# The :enumerator_method parameter is passed to each connection in
|
21
|
+
# the connection pool of the climber given at instantiation.
|
22
|
+
#
|
23
|
+
# jobs = ClimberEnumerable.new(:each_job)
|
24
|
+
# instance = jobs.new(climber)
|
25
|
+
# instance.each do |job|
|
26
|
+
# break job
|
27
|
+
# end
|
28
|
+
# #=> #<StalkClimber::Job id=1 body="Work to be done">
|
29
|
+
#
|
30
|
+
def self.new(enumerator_method)
|
31
|
+
return Class.new do
|
32
|
+
include StalkClimber::ClimberEnumerable
|
33
|
+
@enumerator_method = enumerator_method
|
34
|
+
|
35
|
+
# Create a new instance of a ClimberEnumerable when given +climber+ that
|
36
|
+
# references the StalkClimber that owns it
|
37
|
+
def initialize(climber)
|
38
|
+
@climber = climber
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
# Add :enumerator_method class level accessor to inheriting class
|
45
|
+
def self.included(base) # :nodoc:
|
46
|
+
class << base
|
47
|
+
attr_reader :enumerator_method
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# Perform a threaded iteration across all connections in the climber's
|
53
|
+
# connection pool. This method cannot be used for enumerable enumeration
|
54
|
+
# because a break called within one of the threads will cause a LocalJumpError.
|
55
|
+
# This could be fixed, but expected behavior on break varies as to whether
|
56
|
+
# or not to wait for all threads before returning a result. However, still
|
57
|
+
# useful for operations that always visit all elements.
|
58
|
+
# An instance of the element is yielded with each iteration.
|
59
|
+
#
|
60
|
+
# jobs = ClimberEnumerable.new(:each_job)
|
61
|
+
# instance = jobs.new(climber)
|
62
|
+
# instance.each_threaded do |job|
|
63
|
+
# ...
|
64
|
+
# end
|
65
|
+
def each_threaded(&block) # :yields: Object
|
66
|
+
threads = []
|
67
|
+
climber.connection_pool.connections.each do |connection|
|
68
|
+
threads << Thread.new { connection.send(self.class.enumerator_method, &block) }
|
69
|
+
end
|
70
|
+
threads.each(&:join)
|
71
|
+
return
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
# :call-seq:
|
76
|
+
# to_enum() => Enumerator
|
77
|
+
#
|
78
|
+
# Returns an Enumerator for enumerating elements on all connections.
|
79
|
+
# Connections are enumerated in the order defined. See Connection#to_enum
|
80
|
+
# for more information
|
81
|
+
# An instance of the element is yielded with each iteration.
|
82
|
+
def to_enum
|
83
|
+
return Enumerator.new do |yielder|
|
84
|
+
climber.connection_pool.connections.each do |connection|
|
85
|
+
connection.send(self.class.enumerator_method) do |element|
|
86
|
+
yielder << element
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
# :call-seq:
|
94
|
+
# each {|obj| block} => Object
|
95
|
+
#
|
96
|
+
# Iterate over all elements on all connections in the climber's connection
|
97
|
+
# pool. If no block is given, returns the enumerator provided by #to_enum.
|
98
|
+
# An instance of the element is yielded to a given block. For more information
|
99
|
+
# see the method on Connection designated :enumerator_method by the implementing class
|
100
|
+
#
|
101
|
+
# jobs = ClimberEnumerable.new(:each_job)
|
102
|
+
# instance = jobs.new(climber)
|
103
|
+
# instance.each do |job|
|
104
|
+
# break job
|
105
|
+
# end
|
106
|
+
# #=> #<StalkClimber::Job id=1 body="Work to be done">
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
@@ -1,48 +1,43 @@
|
|
1
1
|
module StalkClimber
|
2
2
|
class Connection < Beaneater::Connection
|
3
|
-
include RUBY_VERSION >= '2.0.0' ? LazyEnumerable : Enumerable
|
4
3
|
|
4
|
+
extend Forwardable
|
5
|
+
|
6
|
+
def_delegator :job_enumerator, :each, :each_job
|
7
|
+
|
8
|
+
# Default tube used when no custom tube in use
|
5
9
|
DEFAULT_TUBE = 'stalk_climber'
|
10
|
+
|
11
|
+
# Transmission used to probe state of Beanstalkd. Created with lowest
|
12
|
+
# possible priority and delay to reduce possibility of interference.
|
6
13
|
PROBE_TRANSMISSION = "put 4294967295 0 300 2\r\n{}"
|
7
14
|
|
8
|
-
|
15
|
+
# Last known maximum job id on the Beanstalkd server
|
16
|
+
attr_reader :max_climbed_job_id
|
17
|
+
# Last known existing minimum job id on the Beanstalkd server
|
18
|
+
attr_reader :min_climbed_job_id
|
19
|
+
# Tube to use when probing the Beanstalkd server for information
|
20
|
+
attr_reader :test_tube
|
9
21
|
|
10
22
|
|
23
|
+
# :call-seq:
|
24
|
+
# cached_jobs() => Hash
|
25
|
+
#
|
11
26
|
# Returns or creates a Hash used for caching jobs by ID
|
12
|
-
def
|
13
|
-
return @
|
27
|
+
def cached_jobs
|
28
|
+
return @cached_jobs ||= {}
|
14
29
|
end
|
15
30
|
|
16
31
|
|
17
32
|
# Resets the job cache and reinitializes the min and max climbed job ids
|
18
|
-
def
|
19
|
-
@
|
33
|
+
def clear_job_cache
|
34
|
+
@cached_jobs = nil
|
20
35
|
@min_climbed_job_id = Float::INFINITY
|
21
36
|
@max_climbed_job_id = 0
|
37
|
+
return true
|
22
38
|
end
|
23
39
|
|
24
40
|
|
25
|
-
# Interface for job enumerator/enumeration in descending ID order. Returns an instance of
|
26
|
-
# Job for each existing job on the beanstalk server. Jobs are enumerated in three phases. Jobs
|
27
|
-
# between max_job_id and the max_climbed_job_id are pulled from beanstalk, cached, and yielded.
|
28
|
-
# Jobs that have already been cached are yielded if they still exist, otherwise they are deleted
|
29
|
-
# from the cache. Finally, jobs between min_climbed_job_id and 1 are pulled from beanstalk, cached,
|
30
|
-
# and yielded.
|
31
|
-
# Connection#each fulfills Enumberable contract, allowing connection to behave as an Enumerable.
|
32
|
-
def climb
|
33
|
-
enum = to_enum
|
34
|
-
return enum unless block_given?
|
35
|
-
loop do
|
36
|
-
begin
|
37
|
-
yield enum.next
|
38
|
-
rescue StopIteration => e
|
39
|
-
return (e.nil? || !e.respond_to?(:result) || e.result.nil?) ? nil : e.result
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
alias_method :each, :climb
|
44
|
-
|
45
|
-
|
46
41
|
# Safe form of fetch_job!, returns a Job instance for the specified +job_id+.
|
47
42
|
# If the job does not exist, the error is caught and nil is passed returned instead.
|
48
43
|
def fetch_job(job_id)
|
@@ -84,7 +79,7 @@ module StalkClimber
|
|
84
79
|
def initialize(address, test_tube = DEFAULT_TUBE)
|
85
80
|
super(address)
|
86
81
|
@test_tube = test_tube || DEFAULT_TUBE
|
87
|
-
|
82
|
+
clear_job_cache
|
88
83
|
yield(self) if block_given?
|
89
84
|
use_test_tube
|
90
85
|
end
|
@@ -92,7 +87,7 @@ module StalkClimber
|
|
92
87
|
|
93
88
|
# Determintes the max job ID of the connection by inserting a job into the test tube
|
94
89
|
# and immediately deleting it. Before returning the max ID, the max ID is used to
|
95
|
-
# update the max_climbed_job_id (if sequentual) and possibly invalidate the cache.
|
90
|
+
# update the max_climbed_job_id (if sequentual) and possibly invalidate the job cache.
|
96
91
|
# The cache will be invalidated if the max ID is less than any known IDs since
|
97
92
|
# new job IDs should always increment unless there's been a change in server state.
|
98
93
|
def max_job_id
|
@@ -111,12 +106,12 @@ module StalkClimber
|
|
111
106
|
|
112
107
|
|
113
108
|
# Returns an Enumerator for crawling all existing jobs for a connection.
|
114
|
-
# See Connection#
|
115
|
-
def
|
109
|
+
# See Connection#each_job for more information.
|
110
|
+
def job_enumerator
|
116
111
|
return Enumerator.new do |yielder|
|
117
112
|
max_id = max_job_id
|
118
113
|
|
119
|
-
initial_cached_jobs =
|
114
|
+
initial_cached_jobs = cached_jobs.values_at(*cached_jobs.keys.sort.reverse)
|
120
115
|
|
121
116
|
max_id.downto(self.max_climbed_job_id + 1) do |job_id|
|
122
117
|
job = fetch_and_cache_job(job_id)
|
@@ -127,7 +122,7 @@ module StalkClimber
|
|
127
122
|
if job.exists?
|
128
123
|
yielder << job
|
129
124
|
else
|
130
|
-
self.
|
125
|
+
self.cached_jobs.delete(job.id)
|
131
126
|
end
|
132
127
|
end
|
133
128
|
|
@@ -135,6 +130,7 @@ module StalkClimber
|
|
135
130
|
job = fetch_and_cache_job(job_id)
|
136
131
|
yielder << job unless job.nil?
|
137
132
|
end
|
133
|
+
nil
|
138
134
|
end
|
139
135
|
end
|
140
136
|
|
@@ -147,7 +143,7 @@ module StalkClimber
|
|
147
143
|
# and nil is returned
|
148
144
|
def fetch_and_cache_job(job_id)
|
149
145
|
job = fetch_job(job_id)
|
150
|
-
self.
|
146
|
+
self.cached_jobs[job_id] = job unless job.nil?
|
151
147
|
@min_climbed_job_id = job_id if job_id < @min_climbed_job_id
|
152
148
|
@max_climbed_job_id = job_id if job_id > @max_climbed_job_id
|
153
149
|
return job
|
@@ -155,13 +151,13 @@ module StalkClimber
|
|
155
151
|
|
156
152
|
|
157
153
|
# Uses +new_max_id+ to update the max_climbed_job_id (if sequentual) and possibly invalidate
|
158
|
-
# the cache. The cache will be invalidated if +new_max_id+ is less than any known
|
159
|
-
# new job IDs should always increment unless there's been a change in server state.
|
154
|
+
# the job cache. The job cache will be invalidated if +new_max_id+ is less than any known
|
155
|
+
# IDs since new job IDs should always increment unless there's been a change in server state.
|
160
156
|
def update_climbed_job_ids_from_max_id(new_max_id)
|
161
157
|
if @max_climbed_job_id > 0 && @max_climbed_job_id == new_max_id - 1
|
162
158
|
@max_climbed_job_id = new_max_id
|
163
159
|
elsif new_max_id < @max_climbed_job_id
|
164
|
-
|
160
|
+
clear_job_cache
|
165
161
|
end
|
166
162
|
end
|
167
163
|
|
@@ -178,5 +174,21 @@ module StalkClimber
|
|
178
174
|
end
|
179
175
|
end
|
180
176
|
|
177
|
+
|
178
|
+
# :call-seq:
|
179
|
+
# each_job() => Enumerator
|
180
|
+
# each_job {|job| block }
|
181
|
+
# Interface for job enumerator/enumeration in descending ID order. Returns an instance of
|
182
|
+
# Job for each existing job on the beanstalk server. Jobs are enumerated in three phases. Jobs
|
183
|
+
# between max_job_id and the max_climbed_job_id are pulled from beanstalk, cached, and yielded.
|
184
|
+
# Jobs that have already been cached are yielded if they still exist, otherwise they are deleted
|
185
|
+
# from the job cache. Finally, jobs between min_climbed_job_id and 1 are pulled from beanstalk,
|
186
|
+
# cached, and yielded.
|
187
|
+
#
|
188
|
+
# connection = Connection.new('localhost:11300')
|
189
|
+
# connection.each_job do |job|
|
190
|
+
# job.delete
|
191
|
+
# end
|
192
|
+
|
181
193
|
end
|
182
194
|
end
|