tom_queue 0.0.1.dev
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/tom_queue.rb +56 -0
- data/lib/tom_queue/deferred_work_manager.rb +233 -0
- data/lib/tom_queue/deferred_work_set.rb +165 -0
- data/lib/tom_queue/delayed_job.rb +33 -0
- data/lib/tom_queue/delayed_job/external_messages.rb +56 -0
- data/lib/tom_queue/delayed_job/job.rb +365 -0
- data/lib/tom_queue/external_consumer.rb +136 -0
- data/lib/tom_queue/logging_helper.rb +19 -0
- data/lib/tom_queue/queue_manager.rb +264 -0
- data/lib/tom_queue/sorted_array.rb +69 -0
- data/lib/tom_queue/work.rb +62 -0
- data/spec/database.yml +14 -0
- data/spec/helper.rb +75 -0
- data/spec/tom_queue/deferred_work/deferred_work_manager_integration_spec.rb +186 -0
- data/spec/tom_queue/deferred_work/deferred_work_manager_spec.rb +134 -0
- data/spec/tom_queue/deferred_work/deferred_work_set_spec.rb +134 -0
- data/spec/tom_queue/delayed_job/delayed_job_integration_spec.rb +155 -0
- data/spec/tom_queue/delayed_job/delayed_job_spec.rb +818 -0
- data/spec/tom_queue/external_consumer_integration_spec.rb +225 -0
- data/spec/tom_queue/helper.rb +91 -0
- data/spec/tom_queue/logging_helper_spec.rb +152 -0
- data/spec/tom_queue/queue_manager_spec.rb +218 -0
- data/spec/tom_queue/sorted_array_spec.rb +160 -0
- data/spec/tom_queue/tom_queue_integration_spec.rb +296 -0
- data/spec/tom_queue/tom_queue_spec.rb +30 -0
- data/spec/tom_queue/work_spec.rb +35 -0
- data/tom_queue.gemspec +21 -0
- metadata +137 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
module TomQueue
|
2
|
+
module DelayedJob
|
3
|
+
|
4
|
+
require 'tom_queue/delayed_job/external_messages'
|
5
|
+
require 'tom_queue/delayed_job/job'
|
6
|
+
|
7
|
+
# Map External priority values to the TomQueue priority levels
|
8
|
+
def priority_map
|
9
|
+
@@priority_map ||= Hash.new(TomQueue::NORMAL_PRIORITY)
|
10
|
+
end
|
11
|
+
module_function :priority_map
|
12
|
+
|
13
|
+
# Public: This installs the dynamic patches into Delayed Job to move scheduling over
|
14
|
+
# to AMQP. Generally, this should be called during a Rails initializer at some point.
|
15
|
+
def apply_hook!
|
16
|
+
Delayed::Worker.sleep_delay = 0
|
17
|
+
Delayed::Worker.backend = TomQueue::DelayedJob::Job
|
18
|
+
end
|
19
|
+
module_function :apply_hook!
|
20
|
+
|
21
|
+
# Public: External Message handlers
|
22
|
+
#
|
23
|
+
def handlers=(new_handlers)
|
24
|
+
@@handlers = new_handlers
|
25
|
+
end
|
26
|
+
def handlers
|
27
|
+
@@handlers ||= []
|
28
|
+
end
|
29
|
+
module_function :handlers, :handlers=
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module TomQueue
|
4
|
+
module DelayedJob
|
5
|
+
|
6
|
+
# Internal: This is mixed into the Job class, in order to support the handling of
|
7
|
+
# externally sourced AMQP messages
|
8
|
+
#
|
9
|
+
module ExternalMessages
|
10
|
+
extend ActiveSupport::Concern
|
11
|
+
|
12
|
+
module ClassMethods
|
13
|
+
|
14
|
+
# Internal: This resolves the correct handler for a given AMQP response
|
15
|
+
#
|
16
|
+
# work - the TomQueue::Work object
|
17
|
+
#
|
18
|
+
# Returns nil if no handler can be resolved
|
19
|
+
def resolve_external_handler(work)
|
20
|
+
|
21
|
+
# Look for a matching source exchange!
|
22
|
+
klass = TomQueue::DelayedJob.handlers.find { |klass| klass.claim_work?(work) }
|
23
|
+
|
24
|
+
if klass
|
25
|
+
debug { "Resolved external handler #{klass} for message. Calling the init block." }
|
26
|
+
|
27
|
+
block = klass.claim_work?(work)
|
28
|
+
|
29
|
+
job = block.call(work)
|
30
|
+
if job.is_a?(Delayed::Job)
|
31
|
+
debug { "Got a job #{job.id}"}
|
32
|
+
job
|
33
|
+
else
|
34
|
+
debug { "Handler returned non-job, I presume that is it."}
|
35
|
+
true
|
36
|
+
end
|
37
|
+
else
|
38
|
+
debug { "No external handler wants message, returning false" }
|
39
|
+
false
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Internal: This is called to setup the external handlers with a given queue-manager
|
44
|
+
#
|
45
|
+
# queue_manager - TomQueue::QueueManager to configure against
|
46
|
+
#
|
47
|
+
def setup_external_handler(queue_manager)
|
48
|
+
TomQueue::DelayedJob.handlers.each do |klass|
|
49
|
+
klass.setup_binding(queue_manager)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,365 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module TomQueue
|
4
|
+
module DelayedJob
|
5
|
+
|
6
|
+
# This is our wrapper for Delayed::Job (ActiveRecord) which augments the
|
7
|
+
# save operations with AMQP notifications and replaces the reserve method
|
8
|
+
# with a blocking AMQP pop operation.
|
9
|
+
#
|
10
|
+
# Since we want to retain the behaviour of Delayed::Job we over publish
|
11
|
+
# messages and work out if a job is ready to run in the reserve method.
|
12
|
+
#
|
13
|
+
# In order to prevent the worker considering stale job states, we attach
|
14
|
+
# two pieces of information, the job ID, so the job can be located and a
|
15
|
+
# digest of the record attributes so stale notifications can be detected.
|
16
|
+
#
|
17
|
+
# This means that the worker can simply load a job and, if a record is
|
18
|
+
# found, quickly drop the notification if any of the attributes have been
|
19
|
+
# changed since the message was published. Another notification will
|
20
|
+
# likely be en-route.
|
21
|
+
#
|
22
|
+
# Cases to consider:
|
23
|
+
#
|
24
|
+
# - after the commit of a transaction creating a job, we publish
|
25
|
+
# a message. We do this after commit as we want to make sure the
|
26
|
+
# worker considers the job when it has hit stable storage and will be
|
27
|
+
# found.
|
28
|
+
#
|
29
|
+
# - after the commit of a tx updating a job, we also publish.
|
30
|
+
# consider the scenario, job is created to run tomorrow, then updated
|
31
|
+
# to run in an hour. The first message will only get to the worker
|
32
|
+
# tomorrow, so we publish a second message to arrive in an hour and
|
33
|
+
# know the worker will disregard the message that arrives tomorrow.
|
34
|
+
#
|
35
|
+
# - rather than leaving the job un-acked for the duration of the process,
|
36
|
+
# we load the job, lock it and then re-publish a message that will
|
37
|
+
# trigger a worker after the maximum run duration. This will likely
|
38
|
+
# just be dropped since the job will have run successfully and been
|
39
|
+
# deleted, but equally could catch a job that has crashed the worker.
|
40
|
+
# This ties into the behaviour of DJ more closely than leaving the job
|
41
|
+
# un-acked.
|
42
|
+
#
|
43
|
+
# During the worker reserve method, we do a number of things:
|
44
|
+
#
|
45
|
+
# - look up the job by ID. We do this with an explicit pessimistic write
|
46
|
+
# lock for update, so concurrent workers block.
|
47
|
+
#
|
48
|
+
# - if there is no record, we ack the AMQP message and do nothing.
|
49
|
+
#
|
50
|
+
# - if there is a record, we lock the job with our worker and save it.
|
51
|
+
# (releasing the lock) At this point, concurrent workers won't find
|
52
|
+
# the job as it has been DJ locked by this worker.
|
53
|
+
#
|
54
|
+
# - when the job completes, we ack the message from the broker, and we're
|
55
|
+
# done.
|
56
|
+
#
|
57
|
+
# - in the event we get a message and the job is locked, the most likely
|
58
|
+
# reason is the other worker has crashed and the broker has re-delivered.
|
59
|
+
# Since the job will have been updated (to lock it) the digest won't match
|
60
|
+
# so we schedule a message to pick up the job when the max_run_time is
|
61
|
+
# reached.
|
62
|
+
#
|
63
|
+
class Job < ::Delayed::Backend::ActiveRecord::Job
|
64
|
+
|
65
|
+
include TomQueue::LoggingHelper
|
66
|
+
include TomQueue::DelayedJob::ExternalMessages
|
67
|
+
|
68
|
+
# Public: This provides a shared queue manager object, instantiated on
|
69
|
+
# the first call
|
70
|
+
#
|
71
|
+
# Returns a TomQueue::QueueManager instance
|
72
|
+
def self.tomqueue_manager
|
73
|
+
@@tomqueue_manager ||= TomQueue::QueueManager.new.tap do |manager|
|
74
|
+
setup_external_handler(manager)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Public: This calls #tomqueue_publish on all jobs currently
|
79
|
+
# in the delayed_job table. This will probably end up with
|
80
|
+
# duplicate messages, but the worker should do the right thing
|
81
|
+
#
|
82
|
+
# Jobs should automatically publish themselves, so you should only
|
83
|
+
# need to call this if you think TomQueue is misbehaving, or you're
|
84
|
+
# re-populating an empty queue server.
|
85
|
+
#
|
86
|
+
# Returns nil
|
87
|
+
def self.tomqueue_republish
|
88
|
+
self.find_each { |instance| instance.tomqueue_publish }
|
89
|
+
end
|
90
|
+
|
91
|
+
# Private: Skip the implicit tomqueue_publish when a record is being saved
|
92
|
+
attr_accessor :skip_publish
|
93
|
+
|
94
|
+
# This triggers the publish whenever a record is saved (and committed to
|
95
|
+
# stable storage).
|
96
|
+
#
|
97
|
+
# It's also worth noting that after_commit masks exceptions, so a failed
|
98
|
+
# publish won't bring down the caller.
|
99
|
+
#
|
100
|
+
after_save :tomqueue_trigger, :if => lambda { persisted? && !!run_at && !failed_at && !skip_publish}
|
101
|
+
|
102
|
+
after_commit :tomqueue_publish_triggers, :unless => lambda { self.class.tomqueue_triggers.empty? }
|
103
|
+
after_rollback :tomqueue_clear_triggers, :unless => lambda { self.class.tomqueue_triggers.empty? }
|
104
|
+
|
105
|
+
@@tomqueue_triggers = []
|
106
|
+
cattr_reader :tomqueue_triggers
|
107
|
+
|
108
|
+
def tomqueue_publish_triggers
|
109
|
+
while job = self.class.tomqueue_triggers.pop
|
110
|
+
job.tomqueue_publish
|
111
|
+
end
|
112
|
+
end
|
113
|
+
def tomqueue_clear_triggers
|
114
|
+
self.class.tomqueue_triggers.clear
|
115
|
+
end
|
116
|
+
def tomqueue_trigger
|
117
|
+
self.class.tomqueue_triggers << self
|
118
|
+
end
|
119
|
+
|
120
|
+
# Public: Send a notification to a worker to consider this job,
|
121
|
+
# via AMQP. This is called automatically when a job is created
|
122
|
+
# or updated (so you shouldn't need to call it directly unless
|
123
|
+
# you believe TomQueue is misbehaving)
|
124
|
+
#
|
125
|
+
# deliver_at - when this message should be delivered.
|
126
|
+
# (Optional, defaults to the job's run_at time)
|
127
|
+
#
|
128
|
+
# Returns nil
|
129
|
+
def tomqueue_publish(custom_run_at=nil)
|
130
|
+
return nil if self.skip_publish
|
131
|
+
raise ArgumentError, "cannot publish an unsaved Delayed::Job object" if new_record?
|
132
|
+
|
133
|
+
debug "[tomqueue_publish] Pushing notification for #{self.id} to run in #{((custom_run_at || self.run_at) - Time.now).round(2)}"
|
134
|
+
|
135
|
+
|
136
|
+
self.class.tomqueue_manager.publish(tomqueue_payload, {
|
137
|
+
:run_at => custom_run_at || self.run_at,
|
138
|
+
:priority => tomqueue_priority
|
139
|
+
})
|
140
|
+
|
141
|
+
rescue Exception => e
|
142
|
+
r = TomQueue.exception_reporter
|
143
|
+
r && r.notify(e)
|
144
|
+
|
145
|
+
error "[tomqueue_publish] Exception during publish: #{e.inspect}"
|
146
|
+
e.backtrace.each { |l| info l }
|
147
|
+
|
148
|
+
raise
|
149
|
+
end
|
150
|
+
|
151
|
+
# Private: Returns the mapped tom-queue priority for this job's priority vlaue
|
152
|
+
def tomqueue_priority
|
153
|
+
TomQueue::DelayedJob.priority_map.fetch(self.priority, nil).tap do |ret|
|
154
|
+
if ret.nil?
|
155
|
+
warn "[tomqueue_priority] Unknown priority level #{self.priority} specified, mapping to NORMAL priority"
|
156
|
+
return TomQueue::NORMAL_PRIORITY
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# Private: Prepare an AMQP payload for this job
|
162
|
+
#
|
163
|
+
# This is used by both #tomqueue_publish as well as tests to avoid
|
164
|
+
# maintaining mock payloads all over the place.
|
165
|
+
#
|
166
|
+
# Returns a string
|
167
|
+
def tomqueue_payload
|
168
|
+
JSON.dump({
|
169
|
+
"delayed_job_id" => self.id,
|
170
|
+
"delayed_job_digest" => tomqueue_digest,
|
171
|
+
"delayed_job_updated_at" => self.updated_at.iso8601(0)
|
172
|
+
})
|
173
|
+
end
|
174
|
+
|
175
|
+
# Private: Calculate a hexdigest of the attributes
|
176
|
+
#
|
177
|
+
# This is used to detect if the received message is stale, as it's
|
178
|
+
# sent as part of the AMQP payload and then re-calculated when the
|
179
|
+
# worker is about to run the job.
|
180
|
+
#
|
181
|
+
# Returns a string
|
182
|
+
BROKEN_DIGEST_CLASSES = [DateTime, Time, ActiveSupport::TimeWithZone]
|
183
|
+
def tomqueue_digest
|
184
|
+
digest_string = self.attributes.map { |k,v| BROKEN_DIGEST_CLASSES.include?(v.class) ? [k,v.to_i] : [k,v.to_s] }.to_s
|
185
|
+
Digest::MD5.hexdigest(digest_string)
|
186
|
+
end
|
187
|
+
|
188
|
+
|
189
|
+
# Public: is this job locked
|
190
|
+
#
|
191
|
+
# Returns boolean true if the job has been locked by a worker
|
192
|
+
def locked?
|
193
|
+
!!locked_by && !!locked_at && (locked_at + Delayed::Worker.max_run_time) >= Delayed::Job.db_time_now
|
194
|
+
end
|
195
|
+
|
196
|
+
# Public: Retrieves a job with a specific ID, acquiring a lock
|
197
|
+
# preventing other concurrent workers from doing the same.
|
198
|
+
#
|
199
|
+
# job_id - the ID of the job to acquire
|
200
|
+
# worker - the Delayed::Worker attempting to acquire the lock
|
201
|
+
# block - if provided, it is yeilded with the job object as the only argument
|
202
|
+
# whilst the job record is locked.
|
203
|
+
# If the block returns true, the lock is acquired.
|
204
|
+
# If the block returns false, the call will return nil
|
205
|
+
#
|
206
|
+
# NOTE: when a job has a stale lock, the block isn't yielded, as it is presumed
|
207
|
+
# the job has stared somewhere and crashed out - so we just return immediately
|
208
|
+
# as it will have previously passed the validity check (and may have changed since).
|
209
|
+
#
|
210
|
+
# Returns * a Delayed::Job instance if the job was found and lock acquired
|
211
|
+
# * nil if the job wasn't found
|
212
|
+
# * false if the job was found, but the lock wasn't acquired.
|
213
|
+
def self.acquire_locked_job(job_id, worker)
|
214
|
+
|
215
|
+
# We have to be careful here, we grab the DJ lock inside a transaction that holds
|
216
|
+
# a write lock on the record to avoid potential race conditions with other workers
|
217
|
+
# doing the same...
|
218
|
+
Delayed::Job.transaction do
|
219
|
+
|
220
|
+
# Load the job, ensuring we have a write lock so other workers in the same position
|
221
|
+
# block, avoiding race conditions
|
222
|
+
job = Delayed::Job.find_by_id(job_id, :lock => true)
|
223
|
+
|
224
|
+
if job.nil?
|
225
|
+
job = nil
|
226
|
+
|
227
|
+
elsif job.failed?
|
228
|
+
warn "[tomqueue] Received notification for failed job #{job.id}"
|
229
|
+
job = nil
|
230
|
+
|
231
|
+
elsif job.locked?
|
232
|
+
job = false
|
233
|
+
|
234
|
+
elsif job.locked_at || job.locked_by || (!block_given? || yield(job) == true)
|
235
|
+
|
236
|
+
if job.run_at > self.db_time_now + 5
|
237
|
+
warn "[tomqueue] Received early notification for job #{job.id} - expected at #{job.run_at}"
|
238
|
+
|
239
|
+
job.tomqueue_publish(job.run_at)
|
240
|
+
|
241
|
+
job = nil
|
242
|
+
else
|
243
|
+
job.skip_publish = true
|
244
|
+
|
245
|
+
job.locked_by = worker.name
|
246
|
+
job.locked_at = self.db_time_now
|
247
|
+
job.save!
|
248
|
+
|
249
|
+
job.skip_publish = nil
|
250
|
+
end
|
251
|
+
else
|
252
|
+
job = nil
|
253
|
+
end
|
254
|
+
|
255
|
+
job
|
256
|
+
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
# Public: Called by Delayed::Worker to retrieve the next job to process
|
261
|
+
#
|
262
|
+
# This is the glue beween TomQueue and DelayedJob and implements most of
|
263
|
+
# the behaviour discussed above.
|
264
|
+
#
|
265
|
+
# This function will block until a job becomes available to process. It tweaks
|
266
|
+
# the `Delayed::Worker.raise_signal_exceptions` during the blocking stage so
|
267
|
+
# the process can be interrupted.
|
268
|
+
#
|
269
|
+
# Returns Delayed::Job instance for the next job to process.
|
270
|
+
def self.reserve(worker, max_run_time = Delayed::Worker.max_run_time)
|
271
|
+
|
272
|
+
# Grab a job from the QueueManager - will block here, ensure we can be interrupted!
|
273
|
+
Delayed::Worker.raise_signal_exceptions, old_value = true, Delayed::Worker.raise_signal_exceptions
|
274
|
+
work = self.tomqueue_manager.pop
|
275
|
+
Delayed::Worker.raise_signal_exceptions = old_value
|
276
|
+
|
277
|
+
if work.nil?
|
278
|
+
warn "[reserve] TomQueue#pop returned nil, stalling for a second."
|
279
|
+
sleep 1.0
|
280
|
+
|
281
|
+
nil
|
282
|
+
else
|
283
|
+
|
284
|
+
passthrough = resolve_external_handler(work)
|
285
|
+
if passthrough == true
|
286
|
+
work.ack!
|
287
|
+
|
288
|
+
nil
|
289
|
+
elsif passthrough == false
|
290
|
+
|
291
|
+
decoded_payload = JSON.load(work.payload)
|
292
|
+
job_id = decoded_payload['delayed_job_id']
|
293
|
+
digest = decoded_payload['delayed_job_digest']
|
294
|
+
|
295
|
+
debug "[reserve] Popped notification for #{job_id}"
|
296
|
+
locked_job = self.acquire_locked_job(job_id, worker) do |job|
|
297
|
+
digest.nil? || job.tomqueue_digest == digest
|
298
|
+
end
|
299
|
+
|
300
|
+
if locked_job
|
301
|
+
info "[reserve] Acquired DB lock for job #{job_id}"
|
302
|
+
|
303
|
+
locked_job.tomqueue_work = work
|
304
|
+
else
|
305
|
+
work.ack!
|
306
|
+
|
307
|
+
if locked_job == false
|
308
|
+
# In this situation, we re-publish a message to run in max_run_time
|
309
|
+
# since the likely scenario is a woker has crashed and the original message
|
310
|
+
# was re-delivered.
|
311
|
+
#
|
312
|
+
# We schedule another AMQP message to arrive when the job's lock will have expired.
|
313
|
+
Delayed::Job.find_by_id(job_id).tap do |job|
|
314
|
+
debug { "[reserve] Notified about locked job #{job.id}, will schedule follow up at #{job.locked_at + max_run_time + 1}" }
|
315
|
+
job && job.tomqueue_publish(job.locked_at + max_run_time + 1)
|
316
|
+
end
|
317
|
+
|
318
|
+
locked_job = nil
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
locked_job
|
323
|
+
|
324
|
+
else
|
325
|
+
work.ack!
|
326
|
+
|
327
|
+
passthrough
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
rescue JSON::ParserError => e
|
332
|
+
work.ack!
|
333
|
+
error "[reserve] Failed to parse JSON payload: #{e.message}. Dropping AMQP message."
|
334
|
+
|
335
|
+
TomQueue.exception_reporter && TomQueue.exception_reporter.notify(e)
|
336
|
+
|
337
|
+
nil
|
338
|
+
|
339
|
+
rescue Exception => e
|
340
|
+
|
341
|
+
error "[reserve] Exception in reserve method: #{e.message}."
|
342
|
+
TomQueue.exception_reporter && TomQueue.exception_reporter.notify(e)
|
343
|
+
|
344
|
+
raise
|
345
|
+
end
|
346
|
+
|
347
|
+
# Internal: This is the AMQP notification object that triggered this job run
|
348
|
+
# and is used to ack! the work once the job has been invoked
|
349
|
+
#
|
350
|
+
# Returns nil or TomQueue::Work object
|
351
|
+
attr_accessor :tomqueue_work
|
352
|
+
|
353
|
+
# Internal: This wraps the job invocation with an acknowledgement of the original
|
354
|
+
# TomQueue work object, if one is around.
|
355
|
+
#
|
356
|
+
def invoke_job
|
357
|
+
super
|
358
|
+
ensure
|
359
|
+
debug "[invoke job:#{self.id}] Invoke completed, acking message."
|
360
|
+
self.tomqueue_work && self.tomqueue_work.ack!
|
361
|
+
end
|
362
|
+
|
363
|
+
end
|
364
|
+
end
|
365
|
+
end
|