tom_queue 0.0.1.dev

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,33 @@
1
+ module TomQueue
2
+ module DelayedJob
3
+
4
+ require 'tom_queue/delayed_job/external_messages'
5
+ require 'tom_queue/delayed_job/job'
6
+
7
+ # Map External priority values to the TomQueue priority levels
8
+ def priority_map
9
+ @@priority_map ||= Hash.new(TomQueue::NORMAL_PRIORITY)
10
+ end
11
+ module_function :priority_map
12
+
13
+ # Public: This installs the dynamic patches into Delayed Job to move scheduling over
14
+ # to AMQP. Generally, this should be called during a Rails initializer at some point.
15
+ def apply_hook!
16
+ Delayed::Worker.sleep_delay = 0
17
+ Delayed::Worker.backend = TomQueue::DelayedJob::Job
18
+ end
19
+ module_function :apply_hook!
20
+
21
+ # Public: External Message handlers
22
+ #
23
+ def handlers=(new_handlers)
24
+ @@handlers = new_handlers
25
+ end
26
+ def handlers
27
+ @@handlers ||= []
28
+ end
29
+ module_function :handlers, :handlers=
30
+
31
+ end
32
+ end
33
+
@@ -0,0 +1,56 @@
1
+ require 'active_support/concern'
2
+
3
+ module TomQueue
4
+ module DelayedJob
5
+
6
+ # Internal: This is mixed into the Job class, in order to support the handling of
7
+ # externally sourced AMQP messages
8
+ #
9
+ module ExternalMessages
10
+ extend ActiveSupport::Concern
11
+
12
+ module ClassMethods
13
+
14
+ # Internal: This resolves the correct handler for a given AMQP response
15
+ #
16
+ # work - the TomQueue::Work object
17
+ #
18
+ # Returns nil if no handler can be resolved
19
+ def resolve_external_handler(work)
20
+
21
+ # Look for a matching source exchange!
22
+ klass = TomQueue::DelayedJob.handlers.find { |klass| klass.claim_work?(work) }
23
+
24
+ if klass
25
+ debug { "Resolved external handler #{klass} for message. Calling the init block." }
26
+
27
+ block = klass.claim_work?(work)
28
+
29
+ job = block.call(work)
30
+ if job.is_a?(Delayed::Job)
31
+ debug { "Got a job #{job.id}"}
32
+ job
33
+ else
34
+ debug { "Handler returned non-job, I presume that is it."}
35
+ true
36
+ end
37
+ else
38
+ debug { "No external handler wants message, returning false" }
39
+ false
40
+ end
41
+ end
42
+
43
+ # Internal: This is called to setup the external handlers with a given queue-manager
44
+ #
45
+ # queue_manager - TomQueue::QueueManager to configure against
46
+ #
47
+ def setup_external_handler(queue_manager)
48
+ TomQueue::DelayedJob.handlers.each do |klass|
49
+ klass.setup_binding(queue_manager)
50
+ end
51
+ end
52
+
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,365 @@
1
+ require 'active_support/concern'
2
+
3
+ module TomQueue
4
+ module DelayedJob
5
+
6
+ # This is our wrapper for Delayed::Job (ActiveRecord) which augments the
7
+ # save operations with AMQP notifications and replaces the reserve method
8
+ # with a blocking AMQP pop operation.
9
+ #
10
+ # Since we want to retain the behaviour of Delayed::Job we over publish
11
+ # messages and work out if a job is ready to run in the reserve method.
12
+ #
13
+ # In order to prevent the worker considering stale job states, we attach
14
+ # two pieces of information, the job ID, so the job can be located and a
15
+ # digest of the record attributes so stale notifications can be detected.
16
+ #
17
+ # This means that the worker can simply load a job and, if a record is
18
+ # found, quickly drop the notification if any of the attributes have been
19
+ # changed since the message was published. Another notification will
20
+ # likely be en-route.
21
+ #
22
+ # Cases to consider:
23
+ #
24
+ # - after the commit of a transaction creating a job, we publish
25
+ # a message. We do this after commit as we want to make sure the
26
+ # worker considers the job when it has hit stable storage and will be
27
+ # found.
28
+ #
29
+ # - after the commit of a tx updating a job, we also publish.
30
+ # consider the scenario, job is created to run tomorrow, then updated
31
+ # to run in an hour. The first message will only get to the worker
32
+ # tomorrow, so we publish a second message to arrive in an hour and
33
+ # know the worker will disregard the message that arrives tomorrow.
34
+ #
35
+ # - rather than leaving the job un-acked for the duration of the process,
36
+ # we load the job, lock it and then re-publish a message that will
37
+ # trigger a worker after the maximum run duration. This will likely
38
+ # just be dropped since the job will have run successfully and been
39
+ # deleted, but equally could catch a job that has crashed the worker.
40
+ # This ties into the behaviour of DJ more closely than leaving the job
41
+ # un-acked.
42
+ #
43
+ # During the worker reserve method, we do a number of things:
44
+ #
45
+ # - look up the job by ID. We do this with an explicit pessimistic write
46
+ # lock for update, so concurrent workers block.
47
+ #
48
+ # - if there is no record, we ack the AMQP message and do nothing.
49
+ #
50
+ # - if there is a record, we lock the job with our worker and save it.
51
+ # (releasing the lock) At this point, concurrent workers won't find
52
+ # the job as it has been DJ locked by this worker.
53
+ #
54
+ # - when the job completes, we ack the message from the broker, and we're
55
+ # done.
56
+ #
57
+ # - in the event we get a message and the job is locked, the most likely
58
+ # reason is the other worker has crashed and the broker has re-delivered.
59
+ # Since the job will have been updated (to lock it) the digest won't match
60
+ # so we schedule a message to pick up the job when the max_run_time is
61
+ # reached.
62
+ #
63
+ class Job < ::Delayed::Backend::ActiveRecord::Job
64
+
65
+ include TomQueue::LoggingHelper
66
+ include TomQueue::DelayedJob::ExternalMessages
67
+
68
+ # Public: This provides a shared queue manager object, instantiated on
69
+ # the first call
70
+ #
71
+ # Returns a TomQueue::QueueManager instance
72
+ def self.tomqueue_manager
73
+ @@tomqueue_manager ||= TomQueue::QueueManager.new.tap do |manager|
74
+ setup_external_handler(manager)
75
+ end
76
+ end
77
+
78
+ # Public: This calls #tomqueue_publish on all jobs currently
79
+ # in the delayed_job table. This will probably end up with
80
+ # duplicate messages, but the worker should do the right thing
81
+ #
82
+ # Jobs should automatically publish themselves, so you should only
83
+ # need to call this if you think TomQueue is misbehaving, or you're
84
+ # re-populating an empty queue server.
85
+ #
86
+ # Returns nil
87
+ def self.tomqueue_republish
88
+ self.find_each { |instance| instance.tomqueue_publish }
89
+ end
90
+
91
+ # Private: Skip the implicit tomqueue_publish when a record is being saved
92
+ attr_accessor :skip_publish
93
+
94
+ # This triggers the publish whenever a record is saved (and committed to
95
+ # stable storage).
96
+ #
97
+ # It's also worth noting that after_commit masks exceptions, so a failed
98
+ # publish won't bring down the caller.
99
+ #
100
+ after_save :tomqueue_trigger, :if => lambda { persisted? && !!run_at && !failed_at && !skip_publish}
101
+
102
+ after_commit :tomqueue_publish_triggers, :unless => lambda { self.class.tomqueue_triggers.empty? }
103
+ after_rollback :tomqueue_clear_triggers, :unless => lambda { self.class.tomqueue_triggers.empty? }
104
+
105
+ @@tomqueue_triggers = []
106
+ cattr_reader :tomqueue_triggers
107
+
108
+ def tomqueue_publish_triggers
109
+ while job = self.class.tomqueue_triggers.pop
110
+ job.tomqueue_publish
111
+ end
112
+ end
113
+ def tomqueue_clear_triggers
114
+ self.class.tomqueue_triggers.clear
115
+ end
116
+ def tomqueue_trigger
117
+ self.class.tomqueue_triggers << self
118
+ end
119
+
120
+ # Public: Send a notification to a worker to consider this job,
121
+ # via AMQP. This is called automatically when a job is created
122
+ # or updated (so you shouldn't need to call it directly unless
123
+ # you believe TomQueue is misbehaving)
124
+ #
125
+ # deliver_at - when this message should be delivered.
126
+ # (Optional, defaults to the job's run_at time)
127
+ #
128
+ # Returns nil
129
+ def tomqueue_publish(custom_run_at=nil)
130
+ return nil if self.skip_publish
131
+ raise ArgumentError, "cannot publish an unsaved Delayed::Job object" if new_record?
132
+
133
+ debug "[tomqueue_publish] Pushing notification for #{self.id} to run in #{((custom_run_at || self.run_at) - Time.now).round(2)}"
134
+
135
+
136
+ self.class.tomqueue_manager.publish(tomqueue_payload, {
137
+ :run_at => custom_run_at || self.run_at,
138
+ :priority => tomqueue_priority
139
+ })
140
+
141
+ rescue Exception => e
142
+ r = TomQueue.exception_reporter
143
+ r && r.notify(e)
144
+
145
+ error "[tomqueue_publish] Exception during publish: #{e.inspect}"
146
+ e.backtrace.each { |l| info l }
147
+
148
+ raise
149
+ end
150
+
151
+ # Private: Returns the mapped tom-queue priority for this job's priority vlaue
152
+ def tomqueue_priority
153
+ TomQueue::DelayedJob.priority_map.fetch(self.priority, nil).tap do |ret|
154
+ if ret.nil?
155
+ warn "[tomqueue_priority] Unknown priority level #{self.priority} specified, mapping to NORMAL priority"
156
+ return TomQueue::NORMAL_PRIORITY
157
+ end
158
+ end
159
+ end
160
+
161
+ # Private: Prepare an AMQP payload for this job
162
+ #
163
+ # This is used by both #tomqueue_publish as well as tests to avoid
164
+ # maintaining mock payloads all over the place.
165
+ #
166
+ # Returns a string
167
+ def tomqueue_payload
168
+ JSON.dump({
169
+ "delayed_job_id" => self.id,
170
+ "delayed_job_digest" => tomqueue_digest,
171
+ "delayed_job_updated_at" => self.updated_at.iso8601(0)
172
+ })
173
+ end
174
+
175
+ # Private: Calculate a hexdigest of the attributes
176
+ #
177
+ # This is used to detect if the received message is stale, as it's
178
+ # sent as part of the AMQP payload and then re-calculated when the
179
+ # worker is about to run the job.
180
+ #
181
+ # Returns a string
182
+ BROKEN_DIGEST_CLASSES = [DateTime, Time, ActiveSupport::TimeWithZone]
183
+ def tomqueue_digest
184
+ digest_string = self.attributes.map { |k,v| BROKEN_DIGEST_CLASSES.include?(v.class) ? [k,v.to_i] : [k,v.to_s] }.to_s
185
+ Digest::MD5.hexdigest(digest_string)
186
+ end
187
+
188
+
189
+ # Public: is this job locked
190
+ #
191
+ # Returns boolean true if the job has been locked by a worker
192
+ def locked?
193
+ !!locked_by && !!locked_at && (locked_at + Delayed::Worker.max_run_time) >= Delayed::Job.db_time_now
194
+ end
195
+
196
+ # Public: Retrieves a job with a specific ID, acquiring a lock
197
+ # preventing other concurrent workers from doing the same.
198
+ #
199
+ # job_id - the ID of the job to acquire
200
+ # worker - the Delayed::Worker attempting to acquire the lock
201
+ # block - if provided, it is yeilded with the job object as the only argument
202
+ # whilst the job record is locked.
203
+ # If the block returns true, the lock is acquired.
204
+ # If the block returns false, the call will return nil
205
+ #
206
+ # NOTE: when a job has a stale lock, the block isn't yielded, as it is presumed
207
+ # the job has stared somewhere and crashed out - so we just return immediately
208
+ # as it will have previously passed the validity check (and may have changed since).
209
+ #
210
+ # Returns * a Delayed::Job instance if the job was found and lock acquired
211
+ # * nil if the job wasn't found
212
+ # * false if the job was found, but the lock wasn't acquired.
213
+ def self.acquire_locked_job(job_id, worker)
214
+
215
+ # We have to be careful here, we grab the DJ lock inside a transaction that holds
216
+ # a write lock on the record to avoid potential race conditions with other workers
217
+ # doing the same...
218
+ Delayed::Job.transaction do
219
+
220
+ # Load the job, ensuring we have a write lock so other workers in the same position
221
+ # block, avoiding race conditions
222
+ job = Delayed::Job.find_by_id(job_id, :lock => true)
223
+
224
+ if job.nil?
225
+ job = nil
226
+
227
+ elsif job.failed?
228
+ warn "[tomqueue] Received notification for failed job #{job.id}"
229
+ job = nil
230
+
231
+ elsif job.locked?
232
+ job = false
233
+
234
+ elsif job.locked_at || job.locked_by || (!block_given? || yield(job) == true)
235
+
236
+ if job.run_at > self.db_time_now + 5
237
+ warn "[tomqueue] Received early notification for job #{job.id} - expected at #{job.run_at}"
238
+
239
+ job.tomqueue_publish(job.run_at)
240
+
241
+ job = nil
242
+ else
243
+ job.skip_publish = true
244
+
245
+ job.locked_by = worker.name
246
+ job.locked_at = self.db_time_now
247
+ job.save!
248
+
249
+ job.skip_publish = nil
250
+ end
251
+ else
252
+ job = nil
253
+ end
254
+
255
+ job
256
+
257
+ end
258
+ end
259
+
260
+ # Public: Called by Delayed::Worker to retrieve the next job to process
261
+ #
262
+ # This is the glue beween TomQueue and DelayedJob and implements most of
263
+ # the behaviour discussed above.
264
+ #
265
+ # This function will block until a job becomes available to process. It tweaks
266
+ # the `Delayed::Worker.raise_signal_exceptions` during the blocking stage so
267
+ # the process can be interrupted.
268
+ #
269
+ # Returns Delayed::Job instance for the next job to process.
270
+ def self.reserve(worker, max_run_time = Delayed::Worker.max_run_time)
271
+
272
+ # Grab a job from the QueueManager - will block here, ensure we can be interrupted!
273
+ Delayed::Worker.raise_signal_exceptions, old_value = true, Delayed::Worker.raise_signal_exceptions
274
+ work = self.tomqueue_manager.pop
275
+ Delayed::Worker.raise_signal_exceptions = old_value
276
+
277
+ if work.nil?
278
+ warn "[reserve] TomQueue#pop returned nil, stalling for a second."
279
+ sleep 1.0
280
+
281
+ nil
282
+ else
283
+
284
+ passthrough = resolve_external_handler(work)
285
+ if passthrough == true
286
+ work.ack!
287
+
288
+ nil
289
+ elsif passthrough == false
290
+
291
+ decoded_payload = JSON.load(work.payload)
292
+ job_id = decoded_payload['delayed_job_id']
293
+ digest = decoded_payload['delayed_job_digest']
294
+
295
+ debug "[reserve] Popped notification for #{job_id}"
296
+ locked_job = self.acquire_locked_job(job_id, worker) do |job|
297
+ digest.nil? || job.tomqueue_digest == digest
298
+ end
299
+
300
+ if locked_job
301
+ info "[reserve] Acquired DB lock for job #{job_id}"
302
+
303
+ locked_job.tomqueue_work = work
304
+ else
305
+ work.ack!
306
+
307
+ if locked_job == false
308
+ # In this situation, we re-publish a message to run in max_run_time
309
+ # since the likely scenario is a woker has crashed and the original message
310
+ # was re-delivered.
311
+ #
312
+ # We schedule another AMQP message to arrive when the job's lock will have expired.
313
+ Delayed::Job.find_by_id(job_id).tap do |job|
314
+ debug { "[reserve] Notified about locked job #{job.id}, will schedule follow up at #{job.locked_at + max_run_time + 1}" }
315
+ job && job.tomqueue_publish(job.locked_at + max_run_time + 1)
316
+ end
317
+
318
+ locked_job = nil
319
+ end
320
+ end
321
+
322
+ locked_job
323
+
324
+ else
325
+ work.ack!
326
+
327
+ passthrough
328
+ end
329
+ end
330
+
331
+ rescue JSON::ParserError => e
332
+ work.ack!
333
+ error "[reserve] Failed to parse JSON payload: #{e.message}. Dropping AMQP message."
334
+
335
+ TomQueue.exception_reporter && TomQueue.exception_reporter.notify(e)
336
+
337
+ nil
338
+
339
+ rescue Exception => e
340
+
341
+ error "[reserve] Exception in reserve method: #{e.message}."
342
+ TomQueue.exception_reporter && TomQueue.exception_reporter.notify(e)
343
+
344
+ raise
345
+ end
346
+
347
+ # Internal: This is the AMQP notification object that triggered this job run
348
+ # and is used to ack! the work once the job has been invoked
349
+ #
350
+ # Returns nil or TomQueue::Work object
351
+ attr_accessor :tomqueue_work
352
+
353
+ # Internal: This wraps the job invocation with an acknowledgement of the original
354
+ # TomQueue work object, if one is around.
355
+ #
356
+ def invoke_job
357
+ super
358
+ ensure
359
+ debug "[invoke job:#{self.id}] Invoke completed, acking message."
360
+ self.tomqueue_work && self.tomqueue_work.ack!
361
+ end
362
+
363
+ end
364
+ end
365
+ end