tom_queue 0.0.1.dev

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ module TomQueue
2
+ module DelayedJob
3
+
4
+ require 'tom_queue/delayed_job/external_messages'
5
+ require 'tom_queue/delayed_job/job'
6
+
7
+ # Map External priority values to the TomQueue priority levels
8
+ def priority_map
9
+ @@priority_map ||= Hash.new(TomQueue::NORMAL_PRIORITY)
10
+ end
11
+ module_function :priority_map
12
+
13
+ # Public: This installs the dynamic patches into Delayed Job to move scheduling over
14
+ # to AMQP. Generally, this should be called during a Rails initializer at some point.
15
+ def apply_hook!
16
+ Delayed::Worker.sleep_delay = 0
17
+ Delayed::Worker.backend = TomQueue::DelayedJob::Job
18
+ end
19
+ module_function :apply_hook!
20
+
21
+ # Public: External Message handlers
22
+ #
23
+ def handlers=(new_handlers)
24
+ @@handlers = new_handlers
25
+ end
26
+ def handlers
27
+ @@handlers ||= []
28
+ end
29
+ module_function :handlers, :handlers=
30
+
31
+ end
32
+ end
33
+
@@ -0,0 +1,56 @@
1
+ require 'active_support/concern'
2
+
3
+ module TomQueue
4
+ module DelayedJob
5
+
6
+ # Internal: This is mixed into the Job class, in order to support the handling of
7
+ # externally sourced AMQP messages
8
+ #
9
+ module ExternalMessages
10
+ extend ActiveSupport::Concern
11
+
12
+ module ClassMethods
13
+
14
+ # Internal: This resolves the correct handler for a given AMQP response
15
+ #
16
+ # work - the TomQueue::Work object
17
+ #
18
+ # Returns nil if no handler can be resolved
19
+ def resolve_external_handler(work)
20
+
21
+ # Look for a matching source exchange!
22
+ klass = TomQueue::DelayedJob.handlers.find { |klass| klass.claim_work?(work) }
23
+
24
+ if klass
25
+ debug { "Resolved external handler #{klass} for message. Calling the init block." }
26
+
27
+ block = klass.claim_work?(work)
28
+
29
+ job = block.call(work)
30
+ if job.is_a?(Delayed::Job)
31
+ debug { "Got a job #{job.id}"}
32
+ job
33
+ else
34
+ debug { "Handler returned non-job, I presume that is it."}
35
+ true
36
+ end
37
+ else
38
+ debug { "No external handler wants message, returning false" }
39
+ false
40
+ end
41
+ end
42
+
43
+ # Internal: This is called to setup the external handlers with a given queue-manager
44
+ #
45
+ # queue_manager - TomQueue::QueueManager to configure against
46
+ #
47
+ def setup_external_handler(queue_manager)
48
+ TomQueue::DelayedJob.handlers.each do |klass|
49
+ klass.setup_binding(queue_manager)
50
+ end
51
+ end
52
+
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,365 @@
1
+ require 'active_support/concern'
2
+
3
+ module TomQueue
4
+ module DelayedJob
5
+
6
+ # This is our wrapper for Delayed::Job (ActiveRecord) which augments the
7
+ # save operations with AMQP notifications and replaces the reserve method
8
+ # with a blocking AMQP pop operation.
9
+ #
10
+ # Since we want to retain the behaviour of Delayed::Job we over publish
11
+ # messages and work out if a job is ready to run in the reserve method.
12
+ #
13
+ # In order to prevent the worker considering stale job states, we attach
14
+ # two pieces of information, the job ID, so the job can be located and a
15
+ # digest of the record attributes so stale notifications can be detected.
16
+ #
17
+ # This means that the worker can simply load a job and, if a record is
18
+ # found, quickly drop the notification if any of the attributes have been
19
+ # changed since the message was published. Another notification will
20
+ # likely be en-route.
21
+ #
22
+ # Cases to consider:
23
+ #
24
+ # - after the commit of a transaction creating a job, we publish
25
+ # a message. We do this after commit as we want to make sure the
26
+ # worker considers the job when it has hit stable storage and will be
27
+ # found.
28
+ #
29
+ # - after the commit of a tx updating a job, we also publish.
30
+ # consider the scenario, job is created to run tomorrow, then updated
31
+ # to run in an hour. The first message will only get to the worker
32
+ # tomorrow, so we publish a second message to arrive in an hour and
33
+ # know the worker will disregard the message that arrives tomorrow.
34
+ #
35
+ # - rather than leaving the job un-acked for the duration of the process,
36
+ # we load the job, lock it and then re-publish a message that will
37
+ # trigger a worker after the maximum run duration. This will likely
38
+ # just be dropped since the job will have run successfully and been
39
+ # deleted, but equally could catch a job that has crashed the worker.
40
+ # This ties into the behaviour of DJ more closely than leaving the job
41
+ # un-acked.
42
+ #
43
+ # During the worker reserve method, we do a number of things:
44
+ #
45
+ # - look up the job by ID. We do this with an explicit pessimistic write
46
+ # lock for update, so concurrent workers block.
47
+ #
48
+ # - if there is no record, we ack the AMQP message and do nothing.
49
+ #
50
+ # - if there is a record, we lock the job with our worker and save it.
51
+ # (releasing the lock) At this point, concurrent workers won't find
52
+ # the job as it has been DJ locked by this worker.
53
+ #
54
+ # - when the job completes, we ack the message from the broker, and we're
55
+ # done.
56
+ #
57
+ # - in the event we get a message and the job is locked, the most likely
58
+ # reason is the other worker has crashed and the broker has re-delivered.
59
+ # Since the job will have been updated (to lock it) the digest won't match
60
+ # so we schedule a message to pick up the job when the max_run_time is
61
+ # reached.
62
+ #
63
+ class Job < ::Delayed::Backend::ActiveRecord::Job
64
+
65
+ include TomQueue::LoggingHelper
66
+ include TomQueue::DelayedJob::ExternalMessages
67
+
68
+ # Public: This provides a shared queue manager object, instantiated on
69
+ # the first call
70
+ #
71
+ # Returns a TomQueue::QueueManager instance
72
+ def self.tomqueue_manager
73
+ @@tomqueue_manager ||= TomQueue::QueueManager.new.tap do |manager|
74
+ setup_external_handler(manager)
75
+ end
76
+ end
77
+
78
+ # Public: This calls #tomqueue_publish on all jobs currently
79
+ # in the delayed_job table. This will probably end up with
80
+ # duplicate messages, but the worker should do the right thing
81
+ #
82
+ # Jobs should automatically publish themselves, so you should only
83
+ # need to call this if you think TomQueue is misbehaving, or you're
84
+ # re-populating an empty queue server.
85
+ #
86
+ # Returns nil
87
+ def self.tomqueue_republish
88
+ self.find_each { |instance| instance.tomqueue_publish }
89
+ end
90
+
91
+ # Private: Skip the implicit tomqueue_publish when a record is being saved
92
+ attr_accessor :skip_publish
93
+
94
+ # This triggers the publish whenever a record is saved (and committed to
95
+ # stable storage).
96
+ #
97
+ # It's also worth noting that after_commit masks exceptions, so a failed
98
+ # publish won't bring down the caller.
99
+ #
100
+ after_save :tomqueue_trigger, :if => lambda { persisted? && !!run_at && !failed_at && !skip_publish}
101
+
102
+ after_commit :tomqueue_publish_triggers, :unless => lambda { self.class.tomqueue_triggers.empty? }
103
+ after_rollback :tomqueue_clear_triggers, :unless => lambda { self.class.tomqueue_triggers.empty? }
104
+
105
+ @@tomqueue_triggers = []
106
+ cattr_reader :tomqueue_triggers
107
+
108
+ def tomqueue_publish_triggers
109
+ while job = self.class.tomqueue_triggers.pop
110
+ job.tomqueue_publish
111
+ end
112
+ end
113
+ def tomqueue_clear_triggers
114
+ self.class.tomqueue_triggers.clear
115
+ end
116
+ def tomqueue_trigger
117
+ self.class.tomqueue_triggers << self
118
+ end
119
+
120
+ # Public: Send a notification to a worker to consider this job,
121
+ # via AMQP. This is called automatically when a job is created
122
+ # or updated (so you shouldn't need to call it directly unless
123
+ # you believe TomQueue is misbehaving)
124
+ #
125
+ # deliver_at - when this message should be delivered.
126
+ # (Optional, defaults to the job's run_at time)
127
+ #
128
+ # Returns nil
129
+ def tomqueue_publish(custom_run_at=nil)
130
+ return nil if self.skip_publish
131
+ raise ArgumentError, "cannot publish an unsaved Delayed::Job object" if new_record?
132
+
133
+ debug "[tomqueue_publish] Pushing notification for #{self.id} to run in #{((custom_run_at || self.run_at) - Time.now).round(2)}"
134
+
135
+
136
+ self.class.tomqueue_manager.publish(tomqueue_payload, {
137
+ :run_at => custom_run_at || self.run_at,
138
+ :priority => tomqueue_priority
139
+ })
140
+
141
+ rescue Exception => e
142
+ r = TomQueue.exception_reporter
143
+ r && r.notify(e)
144
+
145
+ error "[tomqueue_publish] Exception during publish: #{e.inspect}"
146
+ e.backtrace.each { |l| info l }
147
+
148
+ raise
149
+ end
150
+
151
+ # Private: Returns the mapped tom-queue priority for this job's priority vlaue
152
+ def tomqueue_priority
153
+ TomQueue::DelayedJob.priority_map.fetch(self.priority, nil).tap do |ret|
154
+ if ret.nil?
155
+ warn "[tomqueue_priority] Unknown priority level #{self.priority} specified, mapping to NORMAL priority"
156
+ return TomQueue::NORMAL_PRIORITY
157
+ end
158
+ end
159
+ end
160
+
161
+ # Private: Prepare an AMQP payload for this job
162
+ #
163
+ # This is used by both #tomqueue_publish as well as tests to avoid
164
+ # maintaining mock payloads all over the place.
165
+ #
166
+ # Returns a string
167
+ def tomqueue_payload
168
+ JSON.dump({
169
+ "delayed_job_id" => self.id,
170
+ "delayed_job_digest" => tomqueue_digest,
171
+ "delayed_job_updated_at" => self.updated_at.iso8601(0)
172
+ })
173
+ end
174
+
175
+ # Private: Calculate a hexdigest of the attributes
176
+ #
177
+ # This is used to detect if the received message is stale, as it's
178
+ # sent as part of the AMQP payload and then re-calculated when the
179
+ # worker is about to run the job.
180
+ #
181
+ # Returns a string
182
+ BROKEN_DIGEST_CLASSES = [DateTime, Time, ActiveSupport::TimeWithZone]
183
+ def tomqueue_digest
184
+ digest_string = self.attributes.map { |k,v| BROKEN_DIGEST_CLASSES.include?(v.class) ? [k,v.to_i] : [k,v.to_s] }.to_s
185
+ Digest::MD5.hexdigest(digest_string)
186
+ end
187
+
188
+
189
+ # Public: is this job locked
190
+ #
191
+ # Returns boolean true if the job has been locked by a worker
192
+ def locked?
193
+ !!locked_by && !!locked_at && (locked_at + Delayed::Worker.max_run_time) >= Delayed::Job.db_time_now
194
+ end
195
+
196
+ # Public: Retrieves a job with a specific ID, acquiring a lock
197
+ # preventing other concurrent workers from doing the same.
198
+ #
199
+ # job_id - the ID of the job to acquire
200
+ # worker - the Delayed::Worker attempting to acquire the lock
201
+ # block - if provided, it is yeilded with the job object as the only argument
202
+ # whilst the job record is locked.
203
+ # If the block returns true, the lock is acquired.
204
+ # If the block returns false, the call will return nil
205
+ #
206
+ # NOTE: when a job has a stale lock, the block isn't yielded, as it is presumed
207
+ # the job has stared somewhere and crashed out - so we just return immediately
208
+ # as it will have previously passed the validity check (and may have changed since).
209
+ #
210
+ # Returns * a Delayed::Job instance if the job was found and lock acquired
211
+ # * nil if the job wasn't found
212
+ # * false if the job was found, but the lock wasn't acquired.
213
+ def self.acquire_locked_job(job_id, worker)
214
+
215
+ # We have to be careful here, we grab the DJ lock inside a transaction that holds
216
+ # a write lock on the record to avoid potential race conditions with other workers
217
+ # doing the same...
218
+ Delayed::Job.transaction do
219
+
220
+ # Load the job, ensuring we have a write lock so other workers in the same position
221
+ # block, avoiding race conditions
222
+ job = Delayed::Job.find_by_id(job_id, :lock => true)
223
+
224
+ if job.nil?
225
+ job = nil
226
+
227
+ elsif job.failed?
228
+ warn "[tomqueue] Received notification for failed job #{job.id}"
229
+ job = nil
230
+
231
+ elsif job.locked?
232
+ job = false
233
+
234
+ elsif job.locked_at || job.locked_by || (!block_given? || yield(job) == true)
235
+
236
+ if job.run_at > self.db_time_now + 5
237
+ warn "[tomqueue] Received early notification for job #{job.id} - expected at #{job.run_at}"
238
+
239
+ job.tomqueue_publish(job.run_at)
240
+
241
+ job = nil
242
+ else
243
+ job.skip_publish = true
244
+
245
+ job.locked_by = worker.name
246
+ job.locked_at = self.db_time_now
247
+ job.save!
248
+
249
+ job.skip_publish = nil
250
+ end
251
+ else
252
+ job = nil
253
+ end
254
+
255
+ job
256
+
257
+ end
258
+ end
259
+
260
+ # Public: Called by Delayed::Worker to retrieve the next job to process
261
+ #
262
+ # This is the glue beween TomQueue and DelayedJob and implements most of
263
+ # the behaviour discussed above.
264
+ #
265
+ # This function will block until a job becomes available to process. It tweaks
266
+ # the `Delayed::Worker.raise_signal_exceptions` during the blocking stage so
267
+ # the process can be interrupted.
268
+ #
269
+ # Returns Delayed::Job instance for the next job to process.
270
+ def self.reserve(worker, max_run_time = Delayed::Worker.max_run_time)
271
+
272
+ # Grab a job from the QueueManager - will block here, ensure we can be interrupted!
273
+ Delayed::Worker.raise_signal_exceptions, old_value = true, Delayed::Worker.raise_signal_exceptions
274
+ work = self.tomqueue_manager.pop
275
+ Delayed::Worker.raise_signal_exceptions = old_value
276
+
277
+ if work.nil?
278
+ warn "[reserve] TomQueue#pop returned nil, stalling for a second."
279
+ sleep 1.0
280
+
281
+ nil
282
+ else
283
+
284
+ passthrough = resolve_external_handler(work)
285
+ if passthrough == true
286
+ work.ack!
287
+
288
+ nil
289
+ elsif passthrough == false
290
+
291
+ decoded_payload = JSON.load(work.payload)
292
+ job_id = decoded_payload['delayed_job_id']
293
+ digest = decoded_payload['delayed_job_digest']
294
+
295
+ debug "[reserve] Popped notification for #{job_id}"
296
+ locked_job = self.acquire_locked_job(job_id, worker) do |job|
297
+ digest.nil? || job.tomqueue_digest == digest
298
+ end
299
+
300
+ if locked_job
301
+ info "[reserve] Acquired DB lock for job #{job_id}"
302
+
303
+ locked_job.tomqueue_work = work
304
+ else
305
+ work.ack!
306
+
307
+ if locked_job == false
308
+ # In this situation, we re-publish a message to run in max_run_time
309
+ # since the likely scenario is a woker has crashed and the original message
310
+ # was re-delivered.
311
+ #
312
+ # We schedule another AMQP message to arrive when the job's lock will have expired.
313
+ Delayed::Job.find_by_id(job_id).tap do |job|
314
+ debug { "[reserve] Notified about locked job #{job.id}, will schedule follow up at #{job.locked_at + max_run_time + 1}" }
315
+ job && job.tomqueue_publish(job.locked_at + max_run_time + 1)
316
+ end
317
+
318
+ locked_job = nil
319
+ end
320
+ end
321
+
322
+ locked_job
323
+
324
+ else
325
+ work.ack!
326
+
327
+ passthrough
328
+ end
329
+ end
330
+
331
+ rescue JSON::ParserError => e
332
+ work.ack!
333
+ error "[reserve] Failed to parse JSON payload: #{e.message}. Dropping AMQP message."
334
+
335
+ TomQueue.exception_reporter && TomQueue.exception_reporter.notify(e)
336
+
337
+ nil
338
+
339
+ rescue Exception => e
340
+
341
+ error "[reserve] Exception in reserve method: #{e.message}."
342
+ TomQueue.exception_reporter && TomQueue.exception_reporter.notify(e)
343
+
344
+ raise
345
+ end
346
+
347
+ # Internal: This is the AMQP notification object that triggered this job run
348
+ # and is used to ack! the work once the job has been invoked
349
+ #
350
+ # Returns nil or TomQueue::Work object
351
+ attr_accessor :tomqueue_work
352
+
353
+ # Internal: This wraps the job invocation with an acknowledgement of the original
354
+ # TomQueue work object, if one is around.
355
+ #
356
+ def invoke_job
357
+ super
358
+ ensure
359
+ debug "[invoke job:#{self.id}] Invoke completed, acking message."
360
+ self.tomqueue_work && self.tomqueue_work.ack!
361
+ end
362
+
363
+ end
364
+ end
365
+ end