nogara-resque-scheduler 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +19 -11
- data/lib/resque/scheduler.rb +26 -75
- data/lib/resque/scheduler_locking.rb +111 -0
- data/lib/resque_scheduler.rb +4 -0
- data/lib/resque_scheduler/version.rb +1 -1
- data/test/scheduler_test.rb +31 -2
- metadata +3 -2
data/README.markdown
CHANGED
@@ -99,17 +99,6 @@ any nonempty value, they will take effect. `VERBOSE` simply dumps more output
|
|
99
99
|
to stdout. `MUTE` does the opposite and silences all output. `MUTE`
|
100
100
|
supersedes `VERBOSE`.
|
101
101
|
|
102
|
-
NOTE: You DO NOT want to run >1 instance of the scheduler. Doing so will
|
103
|
-
result in the same job being queued more than once. You only need one
|
104
|
-
instance of the scheduler running per resque instance (regardless of number
|
105
|
-
of machines).
|
106
|
-
|
107
|
-
If the scheduler process goes down for whatever reason, the delayed items
|
108
|
-
that should have fired during the outage will fire once the scheduler process
|
109
|
-
is started back up again (regardless of it being on a new machine). Missed
|
110
|
-
scheduled jobs, however, will not fire upon recovery of the scheduler process.
|
111
|
-
|
112
|
-
|
113
102
|
|
114
103
|
### Delayed jobs
|
115
104
|
|
@@ -280,6 +269,25 @@ custom job class to support the #scheduled method:
|
|
280
269
|
end
|
281
270
|
end
|
282
271
|
|
272
|
+
### Redundancy and Fail-Over
|
273
|
+
|
274
|
+
*>= 2.0.1 only. Prior to 2.0.1, it is not recommended to run multiple resque-scheduler processes and will result in duplicate jobs.*
|
275
|
+
|
276
|
+
You may want to have resque-scheduler running on multiple machines for
|
277
|
+
redudancy. Electing a master and failover is built in and default. Simply
|
278
|
+
run resque-scheduler on as many machine as you want pointing to the same
|
279
|
+
redis instance and schedule. The scheduler processes will use redis to
|
280
|
+
elect a master process and detect failover when the master dies. Precautions are
|
281
|
+
taken to prevent jobs from potentially being queued twice during failover even
|
282
|
+
when the clocks of the scheduler machines are slightly out of sync (or load affects
|
283
|
+
scheduled job firing time). If you want the gory details, look at Resque::SchedulerLocking.
|
284
|
+
|
285
|
+
If the scheduler process(es) goes down for whatever reason, the delayed items
|
286
|
+
that should have fired during the outage will fire once the scheduler process
|
287
|
+
is started back up again (regardless of it being on a new machine). Missed
|
288
|
+
scheduled jobs, however, will not fire upon recovery of the scheduler process.
|
289
|
+
Think of scheduled (recurring) jobs as cron jobs - if you stop cron, it doesn't fire
|
290
|
+
missed jobs once it starts back up.
|
283
291
|
|
284
292
|
|
285
293
|
### resque-web Additions
|
data/lib/resque/scheduler.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
require 'rufus/scheduler'
|
2
2
|
require 'thwait'
|
3
|
+
require 'resque/scheduler_locking'
|
3
4
|
|
4
5
|
module Resque
|
5
6
|
|
6
7
|
class Scheduler
|
7
8
|
|
8
9
|
extend Resque::Helpers
|
10
|
+
extend Resque::SchedulerLocking
|
9
11
|
|
10
12
|
class << self
|
11
13
|
|
12
|
-
LOCK_TIMEOUT = 60 * 5
|
13
|
-
|
14
14
|
# If true, logs more stuff...
|
15
15
|
attr_accessor :verbose
|
16
16
|
|
@@ -39,43 +39,30 @@ module Resque
|
|
39
39
|
# trap signals
|
40
40
|
register_signal_handlers
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
reload_schedule!
|
50
|
-
else
|
51
|
-
load_schedule!
|
52
|
-
end
|
53
|
-
|
54
|
-
first_time = false
|
55
|
-
|
56
|
-
# Now start the scheduling part of the loop.
|
42
|
+
# Load the schedule into rufus
|
43
|
+
# If dynamic is set, load that schedule otherwise use normal load
|
44
|
+
if dynamic
|
45
|
+
reload_schedule!
|
46
|
+
else
|
47
|
+
load_schedule!
|
48
|
+
end
|
57
49
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
poll_sleep
|
50
|
+
# Now start the scheduling part of the loop.
|
51
|
+
loop do
|
52
|
+
if is_master?
|
53
|
+
begin
|
54
|
+
handle_delayed_items
|
55
|
+
update_schedule if dynamic
|
56
|
+
rescue Errno::EAGAIN, Errno::ECONNRESET => e
|
57
|
+
warn e.message
|
67
58
|
end
|
68
|
-
|
69
|
-
unlock_scheduler
|
70
|
-
clear_schedule!
|
71
|
-
|
72
|
-
else
|
73
|
-
puts "Scheduler locked!!!"
|
74
|
-
sleep 5
|
75
59
|
end
|
60
|
+
poll_sleep
|
76
61
|
end
|
62
|
+
|
77
63
|
# never gets here.
|
78
64
|
end
|
65
|
+
|
79
66
|
|
80
67
|
# For all signals, set the shutdown flag and wait for current
|
81
68
|
# poll/enqueing to finish (should be almost istant). In the
|
@@ -151,8 +138,10 @@ module Resque
|
|
151
138
|
if !config[interval_type].nil? && config[interval_type].length > 0
|
152
139
|
args = optionizate_interval_value(config[interval_type])
|
153
140
|
@@scheduled_jobs[name] = rufus_scheduler.send(interval_type, *args) do
|
154
|
-
|
155
|
-
|
141
|
+
if is_master?
|
142
|
+
log! "queueing #{config['class']} (#{name})"
|
143
|
+
handle_errors { enqueue_from_config(config) }
|
144
|
+
end
|
156
145
|
end
|
157
146
|
interval_defined = true
|
158
147
|
break
|
@@ -187,7 +176,8 @@ module Resque
|
|
187
176
|
item = nil
|
188
177
|
begin
|
189
178
|
handle_shutdown do
|
190
|
-
|
179
|
+
# Continually check that it is still the master
|
180
|
+
if is_master? && item = Resque.next_item_for_timestamp(timestamp)
|
191
181
|
log "queuing #{item['class']} [delayed]"
|
192
182
|
handle_errors { enqueue_from_config(item) }
|
193
183
|
end
|
@@ -197,16 +187,8 @@ module Resque
|
|
197
187
|
end
|
198
188
|
|
199
189
|
def handle_shutdown
|
200
|
-
begin
|
201
|
-
unlock_scheduler if @shutdown
|
202
|
-
rescue
|
203
|
-
end
|
204
190
|
exit if @shutdown
|
205
191
|
yield
|
206
|
-
begin
|
207
|
-
unlock_scheduler if @shutdown
|
208
|
-
rescue
|
209
|
-
end
|
210
192
|
exit if @shutdown
|
211
193
|
end
|
212
194
|
|
@@ -324,37 +306,6 @@ module Resque
|
|
324
306
|
$0 = "resque-scheduler-#{ResqueScheduler::VERSION}: #{string}"
|
325
307
|
end
|
326
308
|
|
327
|
-
def lock_timeout
|
328
|
-
Time.now.utc.to_i + LOCK_TIMEOUT + 1
|
329
|
-
end
|
330
|
-
|
331
|
-
def can_lock_scheduler?
|
332
|
-
#using logic from http://redis.io/commands/getset
|
333
|
-
got_lock = Resque.redis.setnx('scheduler:lock', lock_timeout)
|
334
|
-
puts "First get lock #{got_lock}"
|
335
|
-
unless got_lock
|
336
|
-
timestamp = Resque.redis.get('scheduler:lock').to_i
|
337
|
-
puts "Timestamp: #{timestamp}"
|
338
|
-
timestamp_now = Time.now.utc.to_i
|
339
|
-
puts "Timestamp Now: #{timestamp_now}"
|
340
|
-
if timestamp_now > timestamp
|
341
|
-
timestamp_old = Resque.redis.getset('scheduler:lock', lock_timeout).to_i
|
342
|
-
puts "Timestamp Old: #{timestamp_old}"
|
343
|
-
if timestamp_old < timestamp_now
|
344
|
-
puts "Got lock here"
|
345
|
-
got_lock = true
|
346
|
-
end
|
347
|
-
end
|
348
|
-
end
|
349
|
-
puts "Second get lock #{got_lock}"
|
350
|
-
got_lock
|
351
|
-
end
|
352
|
-
|
353
|
-
def unlock_scheduler
|
354
|
-
puts "Unlocking scheduler lock"
|
355
|
-
Resque.redis.del('scheduler:lock')
|
356
|
-
end
|
357
|
-
|
358
309
|
end
|
359
310
|
|
360
311
|
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
|
2
|
+
# ### Locking the scheduler process
|
3
|
+
#
|
4
|
+
# There are two places in resque-scheduler that need to be synchonized
|
5
|
+
# in order to be able to run redundant scheduler processes while ensuring jobs don't
|
6
|
+
# get queued multiple times when the master process changes.
|
7
|
+
#
|
8
|
+
# 1) Processing the delayed queues (jobs that are created from enqueue_at/enqueue_in, etc)
|
9
|
+
# 2) Processing the scheduled (cron-like) jobs from rufus-scheduler
|
10
|
+
#
|
11
|
+
# Protecting the delayed queues (#1) is relatively easy. A simple SETNX in
|
12
|
+
# redis would suffice. However, protecting the scheduled jobs is trickier
|
13
|
+
# because the clocks on machines could be slightly off or actual firing times
|
14
|
+
# could vary slightly due to load. If scheduler A's clock is slightly ahead
|
15
|
+
# of scheduler B's clock (since they are on different machines), when
|
16
|
+
# scheduler A dies, we need to ensure that scheduler B doesn't queue jobs
|
17
|
+
# that A already queued before it's death. (This all assumes that it is
|
18
|
+
# better to miss a few scheduled jobs than it is to run them multiple times
|
19
|
+
# for the same iteration.)
|
20
|
+
#
|
21
|
+
# To avoid queuing multiple jobs in the case of master fail-over, the master
|
22
|
+
# should remain the master as long as it can rather than a simple SETNX which
|
23
|
+
# would result in the master roll being passed around frequently.
|
24
|
+
#
|
25
|
+
# Locking Scheme:
|
26
|
+
# Each resque-scheduler process attempts to get the master lock via SETNX.
|
27
|
+
# Once obtained, it sets the expiration for 3 minutes (configurable). The
|
28
|
+
# master process continually updates the timeout on the lock key to be 3
|
29
|
+
# minutes in the future in it's loop(s) (see `run`) and when jobs come out of
|
30
|
+
# rufus-scheduler (see `load_schedule_job`). That ensures that a minimum of
|
31
|
+
# 3 minutes must pass since the last queuing operation before a new master is
|
32
|
+
# chosen. If, for whatever reason, the master fails to update the expiration
|
33
|
+
# for 3 minutes, the key expires and the lock is up for grabs. If
|
34
|
+
# miraculously the original master comes back to life, it will realize it is
|
35
|
+
# no longer the master and stop processing jobs.
|
36
|
+
#
|
37
|
+
# The clocks on the scheduler machines can then be up to 3 minutes off from
|
38
|
+
# each other without the risk of queueing the same scheduled job twice during
|
39
|
+
# a master change. The catch is, in the event of a master change, no
|
40
|
+
# scheduled jobs will be queued during those 3 minutes. So, there is a trade
|
41
|
+
# off: the higher the timeout, the less likely scheduled jobs will be fired
|
42
|
+
# twice but greater chances of missing scheduled jobs. The lower the timeout,
|
43
|
+
# less likely jobs will be missed, greater the chances of jobs firing twice. If
|
44
|
+
# you don't care about jobs firing twice or are certain your machines' clocks
|
45
|
+
# are well in sync, a lower timeout is preferable. One thing to keep in mind:
|
46
|
+
# this only effects *scheduled* jobs - delayed jobs will never be lost or
|
47
|
+
# skipped since eventually a master will come online and it will process
|
48
|
+
# everything that is ready (no matter how old it is). Scheduled jobs work
|
49
|
+
# like cron - if you stop cron, no jobs fire while it's stopped and it doesn't
|
50
|
+
# fire jobs that were missed when it starts up again.
|
51
|
+
|
52
|
+
module Resque
|
53
|
+
|
54
|
+
module SchedulerLocking
|
55
|
+
|
56
|
+
# The TTL (in seconds) for the master lock
|
57
|
+
def lock_timeout=(v)
|
58
|
+
@lock_timeout = v
|
59
|
+
end
|
60
|
+
|
61
|
+
def lock_timeout
|
62
|
+
@lock_timeout ||= 60 * 3 # 3 minutes
|
63
|
+
end
|
64
|
+
|
65
|
+
def hostname
|
66
|
+
Socket.gethostbyname(Socket.gethostname).first
|
67
|
+
end
|
68
|
+
|
69
|
+
def process_id
|
70
|
+
Process.pid
|
71
|
+
end
|
72
|
+
|
73
|
+
def is_master?
|
74
|
+
acquire_master_lock! || has_master_lock?
|
75
|
+
end
|
76
|
+
|
77
|
+
def master_lock_value
|
78
|
+
[hostname, process_id].join(':')
|
79
|
+
end
|
80
|
+
|
81
|
+
def master_lock_key
|
82
|
+
:master_lock
|
83
|
+
end
|
84
|
+
|
85
|
+
def extend_lock!
|
86
|
+
# If the master fails to checkin for 3 minutes, the lock is released and is up for grabs
|
87
|
+
Resque.redis.expire(master_lock_key, lock_timeout)
|
88
|
+
end
|
89
|
+
|
90
|
+
def acquire_master_lock!
|
91
|
+
if Resque.redis.setnx(master_lock_key, master_lock_value)
|
92
|
+
extend_lock!
|
93
|
+
true
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def has_master_lock?
|
98
|
+
if Resque.redis.get(master_lock_key) == master_lock_value
|
99
|
+
extend_lock!
|
100
|
+
# Since this process could lose the lock between checking
|
101
|
+
# if it has it and extending the lock, check again to make
|
102
|
+
# sure it still has it.
|
103
|
+
if Resque.redis.get(master_lock_key) == master_lock_value
|
104
|
+
true
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
data/lib/resque_scheduler.rb
CHANGED
@@ -258,6 +258,9 @@ module ResqueScheduler
|
|
258
258
|
|
259
259
|
def clean_up_timestamp(key, timestamp)
|
260
260
|
# If the list is empty, remove it.
|
261
|
+
|
262
|
+
# Use a watch here to ensure nobody adds jobs to this delayed
|
263
|
+
# queue while we're removing it.
|
261
264
|
redis.watch key
|
262
265
|
if 0 == redis.llen(key).to_i
|
263
266
|
redis.multi do
|
@@ -268,6 +271,7 @@ module ResqueScheduler
|
|
268
271
|
redis.unwatch
|
269
272
|
end
|
270
273
|
end
|
274
|
+
|
271
275
|
def validate_job!(klass)
|
272
276
|
if klass.to_s.empty?
|
273
277
|
raise Resque::NoClassError.new("Jobs must be given a class.")
|
data/test/scheduler_test.rb
CHANGED
@@ -4,8 +4,7 @@ context "Resque::Scheduler" do
|
|
4
4
|
|
5
5
|
setup do
|
6
6
|
Resque::Scheduler.dynamic = false
|
7
|
-
Resque.redis.
|
8
|
-
Resque.redis.del(:schedules_changed)
|
7
|
+
Resque.redis.flushall
|
9
8
|
Resque::Scheduler.mute = true
|
10
9
|
Resque::Scheduler.clear_schedule!
|
11
10
|
Resque::Scheduler.send(:class_variable_set, :@@scheduled_jobs, {})
|
@@ -237,6 +236,36 @@ context "Resque::Scheduler" do
|
|
237
236
|
assert Resque.redis.sismember(:schedules_changed, "some_ivar_job3")
|
238
237
|
end
|
239
238
|
|
239
|
+
test "has_master_lock? returns false if lock is set to something else" do
|
240
|
+
Resque.redis.set(Resque::Scheduler.master_lock_key, "someothermachine:1234")
|
241
|
+
assert !Resque::Scheduler.has_master_lock?
|
242
|
+
end
|
243
|
+
|
244
|
+
test "has_master_lock? returns true if process has lock" do
|
245
|
+
assert Resque::Scheduler.acquire_master_lock!, "Should have acquired the master lock"
|
246
|
+
assert Resque::Scheduler.has_master_lock?, "Should have the master lock"
|
247
|
+
end
|
248
|
+
|
249
|
+
test "has_master_lock? extends the TTL of the lock key" do
|
250
|
+
Resque.redis.setex(Resque::Scheduler.master_lock_key, 5, Resque::Scheduler.master_lock_value)
|
251
|
+
Resque::Scheduler.has_master_lock?
|
252
|
+
assert Resque.redis.ttl(Resque::Scheduler.master_lock_key) > 5, "TTL should have been updated to 180"
|
253
|
+
end
|
254
|
+
|
255
|
+
test "acquire_master_lock! sets the TTL" do
|
256
|
+
assert Resque::Scheduler.acquire_master_lock!
|
257
|
+
assert (175..185).include?(Resque.redis.ttl(Resque::Scheduler.master_lock_key)), "TTL should have been updated to 180"
|
258
|
+
end
|
259
|
+
|
260
|
+
test "is_master? should return true if process already has master lock" do
|
261
|
+
assert Resque::Scheduler.acquire_master_lock!, "Should have acquired the master lock"
|
262
|
+
assert Resque::Scheduler.is_master?, "Should have the lock"
|
263
|
+
end
|
264
|
+
|
265
|
+
test "is_master? should return true if it needs to acquire the lock" do
|
266
|
+
assert Resque::Scheduler.is_master?, "Should acquire the lock"
|
267
|
+
end
|
268
|
+
|
240
269
|
test "adheres to lint" do
|
241
270
|
assert_nothing_raised do
|
242
271
|
Resque::Plugin.lint(Resque::Scheduler)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nogara-resque-scheduler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -91,6 +91,7 @@ files:
|
|
91
91
|
- README.markdown
|
92
92
|
- Rakefile
|
93
93
|
- lib/resque/scheduler.rb
|
94
|
+
- lib/resque/scheduler_locking.rb
|
94
95
|
- lib/resque_scheduler.rb
|
95
96
|
- lib/resque_scheduler/plugin.rb
|
96
97
|
- lib/resque_scheduler/server.rb
|