nogara-resque-scheduler 2.0.1 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +19 -11
- data/lib/resque/scheduler.rb +26 -75
- data/lib/resque/scheduler_locking.rb +111 -0
- data/lib/resque_scheduler.rb +4 -0
- data/lib/resque_scheduler/version.rb +1 -1
- data/test/scheduler_test.rb +31 -2
- metadata +3 -2
data/README.markdown
CHANGED
@@ -99,17 +99,6 @@ any nonempty value, they will take effect. `VERBOSE` simply dumps more output
|
|
99
99
|
to stdout. `MUTE` does the opposite and silences all output. `MUTE`
|
100
100
|
supersedes `VERBOSE`.
|
101
101
|
|
102
|
-
NOTE: You DO NOT want to run >1 instance of the scheduler. Doing so will
|
103
|
-
result in the same job being queued more than once. You only need one
|
104
|
-
instance of the scheduler running per resque instance (regardless of number
|
105
|
-
of machines).
|
106
|
-
|
107
|
-
If the scheduler process goes down for whatever reason, the delayed items
|
108
|
-
that should have fired during the outage will fire once the scheduler process
|
109
|
-
is started back up again (regardless of it being on a new machine). Missed
|
110
|
-
scheduled jobs, however, will not fire upon recovery of the scheduler process.
|
111
|
-
|
112
|
-
|
113
102
|
|
114
103
|
### Delayed jobs
|
115
104
|
|
@@ -280,6 +269,25 @@ custom job class to support the #scheduled method:
|
|
280
269
|
end
|
281
270
|
end
|
282
271
|
|
272
|
+
### Redundancy and Fail-Over
|
273
|
+
|
274
|
+
*>= 2.0.1 only. Prior to 2.0.1, it is not recommended to run multiple resque-scheduler processes and will result in duplicate jobs.*
|
275
|
+
|
276
|
+
You may want to have resque-scheduler running on multiple machines for
|
277
|
+
redudancy. Electing a master and failover is built in and default. Simply
|
278
|
+
run resque-scheduler on as many machine as you want pointing to the same
|
279
|
+
redis instance and schedule. The scheduler processes will use redis to
|
280
|
+
elect a master process and detect failover when the master dies. Precautions are
|
281
|
+
taken to prevent jobs from potentially being queued twice during failover even
|
282
|
+
when the clocks of the scheduler machines are slightly out of sync (or load affects
|
283
|
+
scheduled job firing time). If you want the gory details, look at Resque::SchedulerLocking.
|
284
|
+
|
285
|
+
If the scheduler process(es) goes down for whatever reason, the delayed items
|
286
|
+
that should have fired during the outage will fire once the scheduler process
|
287
|
+
is started back up again (regardless of it being on a new machine). Missed
|
288
|
+
scheduled jobs, however, will not fire upon recovery of the scheduler process.
|
289
|
+
Think of scheduled (recurring) jobs as cron jobs - if you stop cron, it doesn't fire
|
290
|
+
missed jobs once it starts back up.
|
283
291
|
|
284
292
|
|
285
293
|
### resque-web Additions
|
data/lib/resque/scheduler.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
require 'rufus/scheduler'
|
2
2
|
require 'thwait'
|
3
|
+
require 'resque/scheduler_locking'
|
3
4
|
|
4
5
|
module Resque
|
5
6
|
|
6
7
|
class Scheduler
|
7
8
|
|
8
9
|
extend Resque::Helpers
|
10
|
+
extend Resque::SchedulerLocking
|
9
11
|
|
10
12
|
class << self
|
11
13
|
|
12
|
-
LOCK_TIMEOUT = 60 * 5
|
13
|
-
|
14
14
|
# If true, logs more stuff...
|
15
15
|
attr_accessor :verbose
|
16
16
|
|
@@ -39,43 +39,30 @@ module Resque
|
|
39
39
|
# trap signals
|
40
40
|
register_signal_handlers
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
reload_schedule!
|
50
|
-
else
|
51
|
-
load_schedule!
|
52
|
-
end
|
53
|
-
|
54
|
-
first_time = false
|
55
|
-
|
56
|
-
# Now start the scheduling part of the loop.
|
42
|
+
# Load the schedule into rufus
|
43
|
+
# If dynamic is set, load that schedule otherwise use normal load
|
44
|
+
if dynamic
|
45
|
+
reload_schedule!
|
46
|
+
else
|
47
|
+
load_schedule!
|
48
|
+
end
|
57
49
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
poll_sleep
|
50
|
+
# Now start the scheduling part of the loop.
|
51
|
+
loop do
|
52
|
+
if is_master?
|
53
|
+
begin
|
54
|
+
handle_delayed_items
|
55
|
+
update_schedule if dynamic
|
56
|
+
rescue Errno::EAGAIN, Errno::ECONNRESET => e
|
57
|
+
warn e.message
|
67
58
|
end
|
68
|
-
|
69
|
-
unlock_scheduler
|
70
|
-
clear_schedule!
|
71
|
-
|
72
|
-
else
|
73
|
-
puts "Scheduler locked!!!"
|
74
|
-
sleep 5
|
75
59
|
end
|
60
|
+
poll_sleep
|
76
61
|
end
|
62
|
+
|
77
63
|
# never gets here.
|
78
64
|
end
|
65
|
+
|
79
66
|
|
80
67
|
# For all signals, set the shutdown flag and wait for current
|
81
68
|
# poll/enqueing to finish (should be almost istant). In the
|
@@ -151,8 +138,10 @@ module Resque
|
|
151
138
|
if !config[interval_type].nil? && config[interval_type].length > 0
|
152
139
|
args = optionizate_interval_value(config[interval_type])
|
153
140
|
@@scheduled_jobs[name] = rufus_scheduler.send(interval_type, *args) do
|
154
|
-
|
155
|
-
|
141
|
+
if is_master?
|
142
|
+
log! "queueing #{config['class']} (#{name})"
|
143
|
+
handle_errors { enqueue_from_config(config) }
|
144
|
+
end
|
156
145
|
end
|
157
146
|
interval_defined = true
|
158
147
|
break
|
@@ -187,7 +176,8 @@ module Resque
|
|
187
176
|
item = nil
|
188
177
|
begin
|
189
178
|
handle_shutdown do
|
190
|
-
|
179
|
+
# Continually check that it is still the master
|
180
|
+
if is_master? && item = Resque.next_item_for_timestamp(timestamp)
|
191
181
|
log "queuing #{item['class']} [delayed]"
|
192
182
|
handle_errors { enqueue_from_config(item) }
|
193
183
|
end
|
@@ -197,16 +187,8 @@ module Resque
|
|
197
187
|
end
|
198
188
|
|
199
189
|
def handle_shutdown
|
200
|
-
begin
|
201
|
-
unlock_scheduler if @shutdown
|
202
|
-
rescue
|
203
|
-
end
|
204
190
|
exit if @shutdown
|
205
191
|
yield
|
206
|
-
begin
|
207
|
-
unlock_scheduler if @shutdown
|
208
|
-
rescue
|
209
|
-
end
|
210
192
|
exit if @shutdown
|
211
193
|
end
|
212
194
|
|
@@ -324,37 +306,6 @@ module Resque
|
|
324
306
|
$0 = "resque-scheduler-#{ResqueScheduler::VERSION}: #{string}"
|
325
307
|
end
|
326
308
|
|
327
|
-
def lock_timeout
|
328
|
-
Time.now.utc.to_i + LOCK_TIMEOUT + 1
|
329
|
-
end
|
330
|
-
|
331
|
-
def can_lock_scheduler?
|
332
|
-
#using logic from http://redis.io/commands/getset
|
333
|
-
got_lock = Resque.redis.setnx('scheduler:lock', lock_timeout)
|
334
|
-
puts "First get lock #{got_lock}"
|
335
|
-
unless got_lock
|
336
|
-
timestamp = Resque.redis.get('scheduler:lock').to_i
|
337
|
-
puts "Timestamp: #{timestamp}"
|
338
|
-
timestamp_now = Time.now.utc.to_i
|
339
|
-
puts "Timestamp Now: #{timestamp_now}"
|
340
|
-
if timestamp_now > timestamp
|
341
|
-
timestamp_old = Resque.redis.getset('scheduler:lock', lock_timeout).to_i
|
342
|
-
puts "Timestamp Old: #{timestamp_old}"
|
343
|
-
if timestamp_old < timestamp_now
|
344
|
-
puts "Got lock here"
|
345
|
-
got_lock = true
|
346
|
-
end
|
347
|
-
end
|
348
|
-
end
|
349
|
-
puts "Second get lock #{got_lock}"
|
350
|
-
got_lock
|
351
|
-
end
|
352
|
-
|
353
|
-
def unlock_scheduler
|
354
|
-
puts "Unlocking scheduler lock"
|
355
|
-
Resque.redis.del('scheduler:lock')
|
356
|
-
end
|
357
|
-
|
358
309
|
end
|
359
310
|
|
360
311
|
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
|
2
|
+
# ### Locking the scheduler process
|
3
|
+
#
|
4
|
+
# There are two places in resque-scheduler that need to be synchonized
|
5
|
+
# in order to be able to run redundant scheduler processes while ensuring jobs don't
|
6
|
+
# get queued multiple times when the master process changes.
|
7
|
+
#
|
8
|
+
# 1) Processing the delayed queues (jobs that are created from enqueue_at/enqueue_in, etc)
|
9
|
+
# 2) Processing the scheduled (cron-like) jobs from rufus-scheduler
|
10
|
+
#
|
11
|
+
# Protecting the delayed queues (#1) is relatively easy. A simple SETNX in
|
12
|
+
# redis would suffice. However, protecting the scheduled jobs is trickier
|
13
|
+
# because the clocks on machines could be slightly off or actual firing times
|
14
|
+
# could vary slightly due to load. If scheduler A's clock is slightly ahead
|
15
|
+
# of scheduler B's clock (since they are on different machines), when
|
16
|
+
# scheduler A dies, we need to ensure that scheduler B doesn't queue jobs
|
17
|
+
# that A already queued before it's death. (This all assumes that it is
|
18
|
+
# better to miss a few scheduled jobs than it is to run them multiple times
|
19
|
+
# for the same iteration.)
|
20
|
+
#
|
21
|
+
# To avoid queuing multiple jobs in the case of master fail-over, the master
|
22
|
+
# should remain the master as long as it can rather than a simple SETNX which
|
23
|
+
# would result in the master roll being passed around frequently.
|
24
|
+
#
|
25
|
+
# Locking Scheme:
|
26
|
+
# Each resque-scheduler process attempts to get the master lock via SETNX.
|
27
|
+
# Once obtained, it sets the expiration for 3 minutes (configurable). The
|
28
|
+
# master process continually updates the timeout on the lock key to be 3
|
29
|
+
# minutes in the future in it's loop(s) (see `run`) and when jobs come out of
|
30
|
+
# rufus-scheduler (see `load_schedule_job`). That ensures that a minimum of
|
31
|
+
# 3 minutes must pass since the last queuing operation before a new master is
|
32
|
+
# chosen. If, for whatever reason, the master fails to update the expiration
|
33
|
+
# for 3 minutes, the key expires and the lock is up for grabs. If
|
34
|
+
# miraculously the original master comes back to life, it will realize it is
|
35
|
+
# no longer the master and stop processing jobs.
|
36
|
+
#
|
37
|
+
# The clocks on the scheduler machines can then be up to 3 minutes off from
|
38
|
+
# each other without the risk of queueing the same scheduled job twice during
|
39
|
+
# a master change. The catch is, in the event of a master change, no
|
40
|
+
# scheduled jobs will be queued during those 3 minutes. So, there is a trade
|
41
|
+
# off: the higher the timeout, the less likely scheduled jobs will be fired
|
42
|
+
# twice but greater chances of missing scheduled jobs. The lower the timeout,
|
43
|
+
# less likely jobs will be missed, greater the chances of jobs firing twice. If
|
44
|
+
# you don't care about jobs firing twice or are certain your machines' clocks
|
45
|
+
# are well in sync, a lower timeout is preferable. One thing to keep in mind:
|
46
|
+
# this only effects *scheduled* jobs - delayed jobs will never be lost or
|
47
|
+
# skipped since eventually a master will come online and it will process
|
48
|
+
# everything that is ready (no matter how old it is). Scheduled jobs work
|
49
|
+
# like cron - if you stop cron, no jobs fire while it's stopped and it doesn't
|
50
|
+
# fire jobs that were missed when it starts up again.
|
51
|
+
|
52
|
+
module Resque
|
53
|
+
|
54
|
+
module SchedulerLocking
|
55
|
+
|
56
|
+
# The TTL (in seconds) for the master lock
|
57
|
+
def lock_timeout=(v)
|
58
|
+
@lock_timeout = v
|
59
|
+
end
|
60
|
+
|
61
|
+
def lock_timeout
|
62
|
+
@lock_timeout ||= 60 * 3 # 3 minutes
|
63
|
+
end
|
64
|
+
|
65
|
+
def hostname
|
66
|
+
Socket.gethostbyname(Socket.gethostname).first
|
67
|
+
end
|
68
|
+
|
69
|
+
def process_id
|
70
|
+
Process.pid
|
71
|
+
end
|
72
|
+
|
73
|
+
def is_master?
|
74
|
+
acquire_master_lock! || has_master_lock?
|
75
|
+
end
|
76
|
+
|
77
|
+
def master_lock_value
|
78
|
+
[hostname, process_id].join(':')
|
79
|
+
end
|
80
|
+
|
81
|
+
def master_lock_key
|
82
|
+
:master_lock
|
83
|
+
end
|
84
|
+
|
85
|
+
def extend_lock!
|
86
|
+
# If the master fails to checkin for 3 minutes, the lock is released and is up for grabs
|
87
|
+
Resque.redis.expire(master_lock_key, lock_timeout)
|
88
|
+
end
|
89
|
+
|
90
|
+
def acquire_master_lock!
|
91
|
+
if Resque.redis.setnx(master_lock_key, master_lock_value)
|
92
|
+
extend_lock!
|
93
|
+
true
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def has_master_lock?
|
98
|
+
if Resque.redis.get(master_lock_key) == master_lock_value
|
99
|
+
extend_lock!
|
100
|
+
# Since this process could lose the lock between checking
|
101
|
+
# if it has it and extending the lock, check again to make
|
102
|
+
# sure it still has it.
|
103
|
+
if Resque.redis.get(master_lock_key) == master_lock_value
|
104
|
+
true
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
data/lib/resque_scheduler.rb
CHANGED
@@ -258,6 +258,9 @@ module ResqueScheduler
|
|
258
258
|
|
259
259
|
def clean_up_timestamp(key, timestamp)
|
260
260
|
# If the list is empty, remove it.
|
261
|
+
|
262
|
+
# Use a watch here to ensure nobody adds jobs to this delayed
|
263
|
+
# queue while we're removing it.
|
261
264
|
redis.watch key
|
262
265
|
if 0 == redis.llen(key).to_i
|
263
266
|
redis.multi do
|
@@ -268,6 +271,7 @@ module ResqueScheduler
|
|
268
271
|
redis.unwatch
|
269
272
|
end
|
270
273
|
end
|
274
|
+
|
271
275
|
def validate_job!(klass)
|
272
276
|
if klass.to_s.empty?
|
273
277
|
raise Resque::NoClassError.new("Jobs must be given a class.")
|
data/test/scheduler_test.rb
CHANGED
@@ -4,8 +4,7 @@ context "Resque::Scheduler" do
|
|
4
4
|
|
5
5
|
setup do
|
6
6
|
Resque::Scheduler.dynamic = false
|
7
|
-
Resque.redis.
|
8
|
-
Resque.redis.del(:schedules_changed)
|
7
|
+
Resque.redis.flushall
|
9
8
|
Resque::Scheduler.mute = true
|
10
9
|
Resque::Scheduler.clear_schedule!
|
11
10
|
Resque::Scheduler.send(:class_variable_set, :@@scheduled_jobs, {})
|
@@ -237,6 +236,36 @@ context "Resque::Scheduler" do
|
|
237
236
|
assert Resque.redis.sismember(:schedules_changed, "some_ivar_job3")
|
238
237
|
end
|
239
238
|
|
239
|
+
test "has_master_lock? returns false if lock is set to something else" do
|
240
|
+
Resque.redis.set(Resque::Scheduler.master_lock_key, "someothermachine:1234")
|
241
|
+
assert !Resque::Scheduler.has_master_lock?
|
242
|
+
end
|
243
|
+
|
244
|
+
test "has_master_lock? returns true if process has lock" do
|
245
|
+
assert Resque::Scheduler.acquire_master_lock!, "Should have acquired the master lock"
|
246
|
+
assert Resque::Scheduler.has_master_lock?, "Should have the master lock"
|
247
|
+
end
|
248
|
+
|
249
|
+
test "has_master_lock? extends the TTL of the lock key" do
|
250
|
+
Resque.redis.setex(Resque::Scheduler.master_lock_key, 5, Resque::Scheduler.master_lock_value)
|
251
|
+
Resque::Scheduler.has_master_lock?
|
252
|
+
assert Resque.redis.ttl(Resque::Scheduler.master_lock_key) > 5, "TTL should have been updated to 180"
|
253
|
+
end
|
254
|
+
|
255
|
+
test "acquire_master_lock! sets the TTL" do
|
256
|
+
assert Resque::Scheduler.acquire_master_lock!
|
257
|
+
assert (175..185).include?(Resque.redis.ttl(Resque::Scheduler.master_lock_key)), "TTL should have been updated to 180"
|
258
|
+
end
|
259
|
+
|
260
|
+
test "is_master? should return true if process already has master lock" do
|
261
|
+
assert Resque::Scheduler.acquire_master_lock!, "Should have acquired the master lock"
|
262
|
+
assert Resque::Scheduler.is_master?, "Should have the lock"
|
263
|
+
end
|
264
|
+
|
265
|
+
test "is_master? should return true if it needs to acquire the lock" do
|
266
|
+
assert Resque::Scheduler.is_master?, "Should acquire the lock"
|
267
|
+
end
|
268
|
+
|
240
269
|
test "adheres to lint" do
|
241
270
|
assert_nothing_raised do
|
242
271
|
Resque::Plugin.lint(Resque::Scheduler)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nogara-resque-scheduler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -91,6 +91,7 @@ files:
|
|
91
91
|
- README.markdown
|
92
92
|
- Rakefile
|
93
93
|
- lib/resque/scheduler.rb
|
94
|
+
- lib/resque/scheduler_locking.rb
|
94
95
|
- lib/resque_scheduler.rb
|
95
96
|
- lib/resque_scheduler/plugin.rb
|
96
97
|
- lib/resque_scheduler/server.rb
|