stalk_climber 0.0.6 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rdoc_options +7 -0
- data/.travis.yml +8 -10
- data/Gemfile +1 -0
- data/lib/stalk_climber.rb +5 -0
- data/lib/stalk_climber/climber.rb +42 -54
- data/lib/stalk_climber/climber_enumerable.rb +110 -0
- data/lib/stalk_climber/climber_enumerables.rb +7 -0
- data/lib/stalk_climber/connection.rb +49 -37
- data/lib/stalk_climber/connection_pool.rb +37 -7
- data/lib/stalk_climber/job.rb +270 -67
- data/lib/stalk_climber/lazy_enumerable.rb +1 -0
- data/lib/stalk_climber/tube.rb +182 -0
- data/lib/stalk_climber/tubes.rb +37 -0
- data/lib/stalk_climber/version.rb +2 -1
- data/test/test_helper.rb +4 -2
- data/test/unit/beaneater_job_test.rb +260 -0
- data/test/unit/climber_enumerable.rb +18 -0
- data/test/unit/climber_test.rb +31 -100
- data/test/unit/connection_pool_test.rb +48 -33
- data/test/unit/connection_test.rb +200 -149
- data/test/unit/job_test.rb +240 -147
- data/test/unit/jobs_test.rb +106 -0
- data/test/unit/tube_test.rb +89 -0
- data/test/unit/tubes_test.rb +52 -0
- metadata +17 -2
data/.rdoc_options
ADDED
data/.travis.yml
CHANGED
@@ -4,17 +4,15 @@ rvm:
|
|
4
4
|
- 1.9.3
|
5
5
|
- 2.0.0
|
6
6
|
- jruby-19mode
|
7
|
-
- rbx
|
7
|
+
- rbx
|
8
8
|
|
9
9
|
before_install:
|
10
|
-
-
|
11
|
-
-
|
12
|
-
-
|
13
|
-
-
|
14
|
-
-
|
15
|
-
-
|
16
|
-
- sudo chmod +x /etc/init.d/beanstalkd2
|
17
|
-
- sudo service beanstalkd2 start
|
10
|
+
- curl -L https://github.com/kr/beanstalkd/archive/v1.9.tar.gz | tar xz -C /tmp
|
11
|
+
- cd /tmp/beanstalkd-1.9/
|
12
|
+
- make
|
13
|
+
- ./beanstalkd &
|
14
|
+
- ./beanstalkd -p 11301 &
|
15
|
+
- cd $TRAVIS_BUILD_DIR
|
18
16
|
|
19
17
|
env:
|
20
18
|
- BEANSTALK_ADDRESSES='beanstalk://localhost:11300,beanstalk://localhost:11301'
|
@@ -22,4 +20,4 @@ env:
|
|
22
20
|
matrix:
|
23
21
|
allow_failures:
|
24
22
|
- rvm: jruby-19mode
|
25
|
-
- rvm: rbx
|
23
|
+
- rvm: rbx
|
data/Gemfile
CHANGED
data/lib/stalk_climber.rb
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
module StalkClimber; end
|
2
2
|
|
3
|
+
require 'forwardable'
|
3
4
|
require 'beaneater'
|
4
5
|
require 'stalk_climber/version'
|
5
6
|
require 'stalk_climber/lazy_enumerable'
|
7
|
+
require 'stalk_climber/climber_enumerable'
|
8
|
+
require 'stalk_climber/climber_enumerables'
|
6
9
|
require 'stalk_climber/connection'
|
7
10
|
require 'stalk_climber/connection_pool'
|
8
11
|
require 'stalk_climber/climber'
|
9
12
|
require 'stalk_climber/job'
|
13
|
+
require 'stalk_climber/tubes'
|
14
|
+
require 'stalk_climber/tube'
|
@@ -1,11 +1,24 @@
|
|
1
1
|
module StalkClimber
|
2
2
|
class Climber
|
3
|
-
include RUBY_VERSION >= '2.0.0' ? LazyEnumerable : Enumerable
|
4
3
|
|
5
|
-
|
6
|
-
attr_reader :cache
|
4
|
+
extend Forwardable
|
7
5
|
|
8
|
-
|
6
|
+
def_delegator :connection_pool, :tubes
|
7
|
+
|
8
|
+
# Collection of beanstalk_addresses the pool is connected to
|
9
|
+
attr_accessor :beanstalk_addresses
|
10
|
+
|
11
|
+
# Accessor to the climber's Jobs instance
|
12
|
+
attr_reader :jobs
|
13
|
+
|
14
|
+
# Tube used when injecting jobs to probe state of Beanstalkd
|
15
|
+
attr_accessor :test_tube
|
16
|
+
|
17
|
+
# :call-seq:
|
18
|
+
# connection_pool() => StalkClimber::ConnectionPool
|
19
|
+
#
|
20
|
+
# Returns or creates a ConnectionPool from beanstalk_addresses. Raises a
|
21
|
+
# RuntimeError if #beanstalk_addresses has not been set.
|
9
22
|
def connection_pool
|
10
23
|
return @connection_pool unless @connection_pool.nil?
|
11
24
|
if self.beanstalk_addresses.nil?
|
@@ -15,50 +28,27 @@ module StalkClimber
|
|
15
28
|
end
|
16
29
|
|
17
30
|
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
begin
|
25
|
-
yield enum.next
|
26
|
-
rescue StopIteration => e
|
27
|
-
return (e.nil? || !e.respond_to?(:result) || e.result.nil?) ? nil : e.result
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
alias_method :each, :climb
|
32
|
-
|
33
|
-
|
34
|
-
# Perform a threaded climb across all connections in the connection pool.
|
35
|
-
# This method cannot be used for enumerable enumeration because a break
|
36
|
-
# called from one of the threads will cause a LocalJumpError. This could be
|
37
|
-
# fixed, but expected behavior on break varies as to whether or not to wait
|
38
|
-
# for all threads before returning a result. However, still useful for
|
39
|
-
# operations that always visit all jobs.
|
40
|
-
# An instance of Job is yielded to +block+
|
41
|
-
def climb_threaded(&block)
|
42
|
-
threads = []
|
43
|
-
self.connection_pool.connections.each do |connection|
|
44
|
-
threads << Thread.new { connection.each(&block) }
|
45
|
-
end
|
46
|
-
threads.each(&:join)
|
47
|
-
return
|
48
|
-
end
|
49
|
-
alias_method :each_threaded, :climb_threaded
|
50
|
-
|
51
|
-
|
52
|
-
# Creates a new Climber instance, optionally yielding the instance
|
53
|
-
# if a block is given
|
54
|
-
def initialize(beanstalk_addresses = nil, test_tube = nil)
|
31
|
+
# Creates a new Climber instance, optionally yielding the instance for
|
32
|
+
# configuration if a block is given
|
33
|
+
#
|
34
|
+
# Climber.new('beanstalk://localhost:11300', 'stalk_climber')
|
35
|
+
# #=> #<StalkClimber::Job beanstalk_addresses="beanstalk://localhost:11300" test_tube="stalk_climber">
|
36
|
+
def initialize(beanstalk_addresses = nil, test_tube = nil) # :yields: climber
|
55
37
|
self.beanstalk_addresses = beanstalk_addresses
|
56
38
|
self.test_tube = test_tube
|
39
|
+
@jobs = StalkClimber::Jobs.new(self)
|
57
40
|
yield(self) if block_given?
|
58
41
|
end
|
59
42
|
|
60
43
|
|
61
|
-
#
|
44
|
+
# :call-seq:
|
45
|
+
# max_job_ids() => Hash{Beaneater::Connection => Integer}
|
46
|
+
#
|
47
|
+
# Returns a Hash with connections as keys and max_job_ids as values
|
48
|
+
#
|
49
|
+
# climber = Climber.new('beanstalk://localhost:11300', 'stalk_climber')
|
50
|
+
# climber.max_job_ids
|
51
|
+
# #=> {#<Beaneater::Connection host="localhost" port=11300>=>1183}
|
62
52
|
def max_job_ids
|
63
53
|
connection_pairs = connection_pool.connections.map do |connection|
|
64
54
|
[connection, connection.max_job_id]
|
@@ -67,19 +57,17 @@ module StalkClimber
|
|
67
57
|
end
|
68
58
|
|
69
59
|
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
60
|
+
# :call-seq:
|
61
|
+
# to_s() => String
|
62
|
+
#
|
63
|
+
# Return string representation of climber
|
64
|
+
#
|
65
|
+
# Climber.new('beanstalk://localhost:11300', 'stalk_climber').to_s
|
66
|
+
# #=> #<StalkClimber::Job beanstalk_addresses="beanstalk://localhost:11300" test_tube="stalk_climber">
|
67
|
+
def to_s
|
68
|
+
return "#<StalkClimber::Job beanstalk_addresses=#{beanstalk_addresses.inspect} test_tube=#{test_tube.inspect}>"
|
82
69
|
end
|
70
|
+
alias :inspect :to_s
|
83
71
|
|
84
72
|
end
|
85
73
|
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
module StalkClimber
|
2
|
+
|
3
|
+
module ClimberEnumerable
|
4
|
+
|
5
|
+
include RUBY_VERSION >= '2.0.0' ? LazyEnumerable : Enumerable
|
6
|
+
extend Forwardable
|
7
|
+
|
8
|
+
def_delegators :to_enum, :each
|
9
|
+
|
10
|
+
# A reference to the climber instance to which this enumerable belongs
|
11
|
+
attr_reader :climber
|
12
|
+
|
13
|
+
|
14
|
+
# :call-seq:
|
15
|
+
# new(enumerator_method) => New class including ClimberEnumerable
|
16
|
+
#
|
17
|
+
# Factory that simplifies the creation of ClimberEnumerable classes.
|
18
|
+
# Otherwise in simple cases a class would have to be defined only to
|
19
|
+
# include this module and set the desired :enumerator_method symbol.
|
20
|
+
# The :enumerator_method parameter is passed to each connection in
|
21
|
+
# the connection pool of the climber given at instantiation.
|
22
|
+
#
|
23
|
+
# jobs = ClimberEnumerable.new(:each_job)
|
24
|
+
# instance = jobs.new(climber)
|
25
|
+
# instance.each do |job|
|
26
|
+
# break job
|
27
|
+
# end
|
28
|
+
# #=> #<StalkClimber::Job id=1 body="Work to be done">
|
29
|
+
#
|
30
|
+
def self.new(enumerator_method)
|
31
|
+
return Class.new do
|
32
|
+
include StalkClimber::ClimberEnumerable
|
33
|
+
@enumerator_method = enumerator_method
|
34
|
+
|
35
|
+
# Create a new instance of a ClimberEnumerable when given +climber+ that
|
36
|
+
# references the StalkClimber that owns it
|
37
|
+
def initialize(climber)
|
38
|
+
@climber = climber
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
# Add :enumerator_method class level accessor to inheriting class
|
45
|
+
def self.included(base) # :nodoc:
|
46
|
+
class << base
|
47
|
+
attr_reader :enumerator_method
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# Perform a threaded iteration across all connections in the climber's
|
53
|
+
# connection pool. This method cannot be used for enumerable enumeration
|
54
|
+
# because a break called within one of the threads will cause a LocalJumpError.
|
55
|
+
# This could be fixed, but expected behavior on break varies as to whether
|
56
|
+
# or not to wait for all threads before returning a result. However, still
|
57
|
+
# useful for operations that always visit all elements.
|
58
|
+
# An instance of the element is yielded with each iteration.
|
59
|
+
#
|
60
|
+
# jobs = ClimberEnumerable.new(:each_job)
|
61
|
+
# instance = jobs.new(climber)
|
62
|
+
# instance.each_threaded do |job|
|
63
|
+
# ...
|
64
|
+
# end
|
65
|
+
def each_threaded(&block) # :yields: Object
|
66
|
+
threads = []
|
67
|
+
climber.connection_pool.connections.each do |connection|
|
68
|
+
threads << Thread.new { connection.send(self.class.enumerator_method, &block) }
|
69
|
+
end
|
70
|
+
threads.each(&:join)
|
71
|
+
return
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
# :call-seq:
|
76
|
+
# to_enum() => Enumerator
|
77
|
+
#
|
78
|
+
# Returns an Enumerator for enumerating elements on all connections.
|
79
|
+
# Connections are enumerated in the order defined. See Connection#to_enum
|
80
|
+
# for more information
|
81
|
+
# An instance of the element is yielded with each iteration.
|
82
|
+
def to_enum
|
83
|
+
return Enumerator.new do |yielder|
|
84
|
+
climber.connection_pool.connections.each do |connection|
|
85
|
+
connection.send(self.class.enumerator_method) do |element|
|
86
|
+
yielder << element
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
# :call-seq:
|
94
|
+
# each {|obj| block} => Object
|
95
|
+
#
|
96
|
+
# Iterate over all elements on all connections in the climber's connection
|
97
|
+
# pool. If no block is given, returns the enumerator provided by #to_enum.
|
98
|
+
# An instance of the element is yielded to a given block. For more information
|
99
|
+
# see the method on Connection designated :enumerator_method by the implementing class
|
100
|
+
#
|
101
|
+
# jobs = ClimberEnumerable.new(:each_job)
|
102
|
+
# instance = jobs.new(climber)
|
103
|
+
# instance.each do |job|
|
104
|
+
# break job
|
105
|
+
# end
|
106
|
+
# #=> #<StalkClimber::Job id=1 body="Work to be done">
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
@@ -1,48 +1,43 @@
|
|
1
1
|
module StalkClimber
|
2
2
|
class Connection < Beaneater::Connection
|
3
|
-
include RUBY_VERSION >= '2.0.0' ? LazyEnumerable : Enumerable
|
4
3
|
|
4
|
+
extend Forwardable
|
5
|
+
|
6
|
+
def_delegator :job_enumerator, :each, :each_job
|
7
|
+
|
8
|
+
# Default tube used when no custom tube in use
|
5
9
|
DEFAULT_TUBE = 'stalk_climber'
|
10
|
+
|
11
|
+
# Transmission used to probe state of Beanstalkd. Created with lowest
|
12
|
+
# possible priority and delay to reduce possibility of interference.
|
6
13
|
PROBE_TRANSMISSION = "put 4294967295 0 300 2\r\n{}"
|
7
14
|
|
8
|
-
|
15
|
+
# Last known maximum job id on the Beanstalkd server
|
16
|
+
attr_reader :max_climbed_job_id
|
17
|
+
# Last known existing minimum job id on the Beanstalkd server
|
18
|
+
attr_reader :min_climbed_job_id
|
19
|
+
# Tube to use when probing the Beanstalkd server for information
|
20
|
+
attr_reader :test_tube
|
9
21
|
|
10
22
|
|
23
|
+
# :call-seq:
|
24
|
+
# cached_jobs() => Hash
|
25
|
+
#
|
11
26
|
# Returns or creates a Hash used for caching jobs by ID
|
12
|
-
def
|
13
|
-
return @
|
27
|
+
def cached_jobs
|
28
|
+
return @cached_jobs ||= {}
|
14
29
|
end
|
15
30
|
|
16
31
|
|
17
32
|
# Resets the job cache and reinitializes the min and max climbed job ids
|
18
|
-
def
|
19
|
-
@
|
33
|
+
def clear_job_cache
|
34
|
+
@cached_jobs = nil
|
20
35
|
@min_climbed_job_id = Float::INFINITY
|
21
36
|
@max_climbed_job_id = 0
|
37
|
+
return true
|
22
38
|
end
|
23
39
|
|
24
40
|
|
25
|
-
# Interface for job enumerator/enumeration in descending ID order. Returns an instance of
|
26
|
-
# Job for each existing job on the beanstalk server. Jobs are enumerated in three phases. Jobs
|
27
|
-
# between max_job_id and the max_climbed_job_id are pulled from beanstalk, cached, and yielded.
|
28
|
-
# Jobs that have already been cached are yielded if they still exist, otherwise they are deleted
|
29
|
-
# from the cache. Finally, jobs between min_climbed_job_id and 1 are pulled from beanstalk, cached,
|
30
|
-
# and yielded.
|
31
|
-
# Connection#each fulfills Enumberable contract, allowing connection to behave as an Enumerable.
|
32
|
-
def climb
|
33
|
-
enum = to_enum
|
34
|
-
return enum unless block_given?
|
35
|
-
loop do
|
36
|
-
begin
|
37
|
-
yield enum.next
|
38
|
-
rescue StopIteration => e
|
39
|
-
return (e.nil? || !e.respond_to?(:result) || e.result.nil?) ? nil : e.result
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
alias_method :each, :climb
|
44
|
-
|
45
|
-
|
46
41
|
# Safe form of fetch_job!, returns a Job instance for the specified +job_id+.
|
47
42
|
# If the job does not exist, the error is caught and nil is passed returned instead.
|
48
43
|
def fetch_job(job_id)
|
@@ -84,7 +79,7 @@ module StalkClimber
|
|
84
79
|
def initialize(address, test_tube = DEFAULT_TUBE)
|
85
80
|
super(address)
|
86
81
|
@test_tube = test_tube || DEFAULT_TUBE
|
87
|
-
|
82
|
+
clear_job_cache
|
88
83
|
yield(self) if block_given?
|
89
84
|
use_test_tube
|
90
85
|
end
|
@@ -92,7 +87,7 @@ module StalkClimber
|
|
92
87
|
|
93
88
|
# Determintes the max job ID of the connection by inserting a job into the test tube
|
94
89
|
# and immediately deleting it. Before returning the max ID, the max ID is used to
|
95
|
-
# update the max_climbed_job_id (if sequentual) and possibly invalidate the cache.
|
90
|
+
# update the max_climbed_job_id (if sequentual) and possibly invalidate the job cache.
|
96
91
|
# The cache will be invalidated if the max ID is less than any known IDs since
|
97
92
|
# new job IDs should always increment unless there's been a change in server state.
|
98
93
|
def max_job_id
|
@@ -111,12 +106,12 @@ module StalkClimber
|
|
111
106
|
|
112
107
|
|
113
108
|
# Returns an Enumerator for crawling all existing jobs for a connection.
|
114
|
-
# See Connection#
|
115
|
-
def
|
109
|
+
# See Connection#each_job for more information.
|
110
|
+
def job_enumerator
|
116
111
|
return Enumerator.new do |yielder|
|
117
112
|
max_id = max_job_id
|
118
113
|
|
119
|
-
initial_cached_jobs =
|
114
|
+
initial_cached_jobs = cached_jobs.values_at(*cached_jobs.keys.sort.reverse)
|
120
115
|
|
121
116
|
max_id.downto(self.max_climbed_job_id + 1) do |job_id|
|
122
117
|
job = fetch_and_cache_job(job_id)
|
@@ -127,7 +122,7 @@ module StalkClimber
|
|
127
122
|
if job.exists?
|
128
123
|
yielder << job
|
129
124
|
else
|
130
|
-
self.
|
125
|
+
self.cached_jobs.delete(job.id)
|
131
126
|
end
|
132
127
|
end
|
133
128
|
|
@@ -135,6 +130,7 @@ module StalkClimber
|
|
135
130
|
job = fetch_and_cache_job(job_id)
|
136
131
|
yielder << job unless job.nil?
|
137
132
|
end
|
133
|
+
nil
|
138
134
|
end
|
139
135
|
end
|
140
136
|
|
@@ -147,7 +143,7 @@ module StalkClimber
|
|
147
143
|
# and nil is returned
|
148
144
|
def fetch_and_cache_job(job_id)
|
149
145
|
job = fetch_job(job_id)
|
150
|
-
self.
|
146
|
+
self.cached_jobs[job_id] = job unless job.nil?
|
151
147
|
@min_climbed_job_id = job_id if job_id < @min_climbed_job_id
|
152
148
|
@max_climbed_job_id = job_id if job_id > @max_climbed_job_id
|
153
149
|
return job
|
@@ -155,13 +151,13 @@ module StalkClimber
|
|
155
151
|
|
156
152
|
|
157
153
|
# Uses +new_max_id+ to update the max_climbed_job_id (if sequentual) and possibly invalidate
|
158
|
-
# the cache. The cache will be invalidated if +new_max_id+ is less than any known
|
159
|
-
# new job IDs should always increment unless there's been a change in server state.
|
154
|
+
# the job cache. The job cache will be invalidated if +new_max_id+ is less than any known
|
155
|
+
# IDs since new job IDs should always increment unless there's been a change in server state.
|
160
156
|
def update_climbed_job_ids_from_max_id(new_max_id)
|
161
157
|
if @max_climbed_job_id > 0 && @max_climbed_job_id == new_max_id - 1
|
162
158
|
@max_climbed_job_id = new_max_id
|
163
159
|
elsif new_max_id < @max_climbed_job_id
|
164
|
-
|
160
|
+
clear_job_cache
|
165
161
|
end
|
166
162
|
end
|
167
163
|
|
@@ -178,5 +174,21 @@ module StalkClimber
|
|
178
174
|
end
|
179
175
|
end
|
180
176
|
|
177
|
+
|
178
|
+
# :call-seq:
|
179
|
+
# each_job() => Enumerator
|
180
|
+
# each_job {|job| block }
|
181
|
+
# Interface for job enumerator/enumeration in descending ID order. Returns an instance of
|
182
|
+
# Job for each existing job on the beanstalk server. Jobs are enumerated in three phases. Jobs
|
183
|
+
# between max_job_id and the max_climbed_job_id are pulled from beanstalk, cached, and yielded.
|
184
|
+
# Jobs that have already been cached are yielded if they still exist, otherwise they are deleted
|
185
|
+
# from the job cache. Finally, jobs between min_climbed_job_id and 1 are pulled from beanstalk,
|
186
|
+
# cached, and yielded.
|
187
|
+
#
|
188
|
+
# connection = Connection.new('localhost:11300')
|
189
|
+
# connection.each_job do |job|
|
190
|
+
# job.delete
|
191
|
+
# end
|
192
|
+
|
181
193
|
end
|
182
194
|
end
|