qless 0.9.3 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/Gemfile +9 -3
  2. data/README.md +70 -25
  3. data/Rakefile +125 -9
  4. data/exe/install_phantomjs +21 -0
  5. data/lib/qless.rb +115 -76
  6. data/lib/qless/config.rb +11 -9
  7. data/lib/qless/failure_formatter.rb +43 -0
  8. data/lib/qless/job.rb +201 -102
  9. data/lib/qless/job_reservers/ordered.rb +7 -1
  10. data/lib/qless/job_reservers/round_robin.rb +16 -6
  11. data/lib/qless/job_reservers/shuffled_round_robin.rb +9 -2
  12. data/lib/qless/lua/qless-lib.lua +2463 -0
  13. data/lib/qless/lua/qless.lua +2012 -0
  14. data/lib/qless/lua_script.rb +63 -12
  15. data/lib/qless/middleware/memory_usage_monitor.rb +62 -0
  16. data/lib/qless/middleware/metriks.rb +45 -0
  17. data/lib/qless/middleware/redis_reconnect.rb +6 -3
  18. data/lib/qless/middleware/requeue_exceptions.rb +94 -0
  19. data/lib/qless/middleware/retry_exceptions.rb +38 -9
  20. data/lib/qless/middleware/sentry.rb +3 -7
  21. data/lib/qless/middleware/timeout.rb +64 -0
  22. data/lib/qless/queue.rb +90 -55
  23. data/lib/qless/server.rb +177 -130
  24. data/lib/qless/server/views/_job.erb +33 -15
  25. data/lib/qless/server/views/completed.erb +11 -0
  26. data/lib/qless/server/views/layout.erb +70 -11
  27. data/lib/qless/server/views/overview.erb +93 -53
  28. data/lib/qless/server/views/queue.erb +9 -8
  29. data/lib/qless/server/views/queues.erb +18 -1
  30. data/lib/qless/subscriber.rb +37 -22
  31. data/lib/qless/tasks.rb +5 -10
  32. data/lib/qless/test_helpers/worker_helpers.rb +55 -0
  33. data/lib/qless/version.rb +3 -1
  34. data/lib/qless/worker.rb +4 -413
  35. data/lib/qless/worker/base.rb +247 -0
  36. data/lib/qless/worker/forking.rb +245 -0
  37. data/lib/qless/worker/serial.rb +41 -0
  38. metadata +135 -52
  39. data/lib/qless/qless-core/cancel.lua +0 -101
  40. data/lib/qless/qless-core/complete.lua +0 -233
  41. data/lib/qless/qless-core/config.lua +0 -56
  42. data/lib/qless/qless-core/depends.lua +0 -65
  43. data/lib/qless/qless-core/deregister_workers.lua +0 -12
  44. data/lib/qless/qless-core/fail.lua +0 -117
  45. data/lib/qless/qless-core/failed.lua +0 -83
  46. data/lib/qless/qless-core/get.lua +0 -37
  47. data/lib/qless/qless-core/heartbeat.lua +0 -51
  48. data/lib/qless/qless-core/jobs.lua +0 -41
  49. data/lib/qless/qless-core/pause.lua +0 -18
  50. data/lib/qless/qless-core/peek.lua +0 -165
  51. data/lib/qless/qless-core/pop.lua +0 -314
  52. data/lib/qless/qless-core/priority.lua +0 -32
  53. data/lib/qless/qless-core/put.lua +0 -169
  54. data/lib/qless/qless-core/qless-lib.lua +0 -2354
  55. data/lib/qless/qless-core/qless.lua +0 -1862
  56. data/lib/qless/qless-core/queues.lua +0 -58
  57. data/lib/qless/qless-core/recur.lua +0 -190
  58. data/lib/qless/qless-core/retry.lua +0 -73
  59. data/lib/qless/qless-core/stats.lua +0 -92
  60. data/lib/qless/qless-core/tag.lua +0 -100
  61. data/lib/qless/qless-core/track.lua +0 -79
  62. data/lib/qless/qless-core/unfail.lua +0 -54
  63. data/lib/qless/qless-core/unpause.lua +0 -12
  64. data/lib/qless/qless-core/workers.lua +0 -69
  65. data/lib/qless/wait_until.rb +0 -19
@@ -1,3 +1,5 @@
1
+ # Encoding: utf-8
2
+
1
3
  module Qless
2
4
  module JobReservers
3
5
  class Ordered
@@ -15,8 +17,12 @@ module Qless
15
17
  nil
16
18
  end
17
19
 
20
+ def prep_for_work!
21
+ # nothing here on purpose
22
+ end
23
+
18
24
  def description
19
- @description ||= @queues.map(&:name).join(', ') + " (ordered)"
25
+ @description ||= @queues.map(&:name).join(', ') + ' (ordered)'
20
26
  end
21
27
  end
22
28
  end
@@ -1,5 +1,8 @@
1
+ # Encoding: utf-8
2
+
1
3
  module Qless
2
4
  module JobReservers
5
+ # Round-robins through all the provided queues
3
6
  class RoundRobin
4
7
  attr_reader :queues
5
8
 
@@ -11,20 +14,28 @@ module Qless
11
14
 
12
15
  def reserve
13
16
  @num_queues.times do |i|
14
- if job = next_queue.pop
15
- return job
16
- end
17
+ job = next_queue.pop
18
+ return job if job
17
19
  end
18
20
  nil
19
21
  end
20
22
 
23
+ def prep_for_work!
24
+ # nothing here on purpose
25
+ end
26
+
21
27
  def description
22
- @description ||= @queues.map(&:name).join(', ') + " (#{self.class::TYPE_DESCRIPTION})"
28
+ @description ||=
29
+ @queues.map(&:name).join(', ') + " (#{self.class::TYPE_DESCRIPTION})"
30
+ end
31
+
32
+ def reset_description!
33
+ @description = nil
23
34
  end
24
35
 
25
36
  private
26
37
 
27
- TYPE_DESCRIPTION = "round robin"
38
+ TYPE_DESCRIPTION = 'round robin'
28
39
 
29
40
  def next_queue
30
41
  @last_popped_queue_index = (@last_popped_queue_index + 1) % @num_queues
@@ -33,4 +44,3 @@ module Qless
33
44
  end
34
45
  end
35
46
  end
36
-
@@ -1,14 +1,21 @@
1
+ # Encoding: utf-8
2
+
1
3
  require 'qless/job_reservers/round_robin'
2
4
 
3
5
  module Qless
4
6
  module JobReservers
7
+ # Like round-robin but shuffles the order of the queues
5
8
  class ShuffledRoundRobin < RoundRobin
6
9
  def initialize(queues)
7
10
  super(queues.shuffle)
8
11
  end
9
12
 
10
- TYPE_DESCRIPTION = "shuffled round robin"
13
+ def prep_for_work!
14
+ @queues = @queues.shuffle
15
+ reset_description!
16
+ end
17
+
18
+ TYPE_DESCRIPTION = 'shuffled round robin'
11
19
  end
12
20
  end
13
21
  end
14
-
@@ -0,0 +1,2463 @@
1
+ -- Current SHA: 525c39000dc71df53a3502491cb4daf0e1128f1d
2
+ -- This is a generated file
3
+ -------------------------------------------------------------------------------
4
+ -- Forward declarations to make everything happy
5
+ -------------------------------------------------------------------------------
6
+ local Qless = {
7
+ ns = 'ql:'
8
+ }
9
+
10
+ -- Queue forward delcaration
11
+ local QlessQueue = {
12
+ ns = Qless.ns .. 'q:'
13
+ }
14
+ QlessQueue.__index = QlessQueue
15
+
16
+ -- Worker forward declaration
17
+ local QlessWorker = {
18
+ ns = Qless.ns .. 'w:'
19
+ }
20
+ QlessWorker.__index = QlessWorker
21
+
22
+ -- Job forward declaration
23
+ local QlessJob = {
24
+ ns = Qless.ns .. 'j:'
25
+ }
26
+ QlessJob.__index = QlessJob
27
+
28
+ -- RecurringJob forward declaration
29
+ local QlessRecurringJob = {}
30
+ QlessRecurringJob.__index = QlessRecurringJob
31
+
32
+ -- Config forward declaration
33
+ Qless.config = {}
34
+
35
+ -- Extend a table. This comes up quite frequently
36
+ function table.extend(self, other)
37
+ for i, v in ipairs(other) do
38
+ table.insert(self, v)
39
+ end
40
+ end
41
+
42
+ -- This is essentially the same as redis' publish, but it prefixes the channel
43
+ -- with the Qless namespace
44
+ function Qless.publish(channel, message)
45
+ redis.call('publish', Qless.ns .. channel, message)
46
+ end
47
+
48
+ -- Return a job object given its job id
49
+ function Qless.job(jid)
50
+ assert(jid, 'Job(): no jid provided')
51
+ local job = {}
52
+ setmetatable(job, QlessJob)
53
+ job.jid = jid
54
+ return job
55
+ end
56
+
57
+ -- Return a recurring job object
58
+ function Qless.recurring(jid)
59
+ assert(jid, 'Recurring(): no jid provided')
60
+ local job = {}
61
+ setmetatable(job, QlessRecurringJob)
62
+ job.jid = jid
63
+ return job
64
+ end
65
+
66
+ -- Failed([group, [start, [limit]]])
67
+ -- ------------------------------------
68
+ -- If no group is provided, this returns a JSON blob of the counts of the
69
+ -- various groups of failures known. If a group is provided, it will report up
70
+ -- to `limit` from `start` of the jobs affected by that issue.
71
+ --
72
+ -- # If no group, then...
73
+ -- {
74
+ -- 'group1': 1,
75
+ -- 'group2': 5,
76
+ -- ...
77
+ -- }
78
+ --
79
+ -- # If a group is provided, then...
80
+ -- {
81
+ -- 'total': 20,
82
+ -- 'jobs': [
83
+ -- {
84
+ -- # All the normal keys for a job
85
+ -- 'jid': ...,
86
+ -- 'data': ...
87
+ -- # The message for this particular instance
88
+ -- 'message': ...,
89
+ -- 'group': ...,
90
+ -- }, ...
91
+ -- ]
92
+ -- }
93
+ --
94
+ function Qless.failed(group, start, limit)
95
+ start = assert(tonumber(start or 0),
96
+ 'Failed(): Arg "start" is not a number: ' .. (start or 'nil'))
97
+ limit = assert(tonumber(limit or 25),
98
+ 'Failed(): Arg "limit" is not a number: ' .. (limit or 'nil'))
99
+
100
+ if group then
101
+ -- If a group was provided, then we should do paginated lookup
102
+ return {
103
+ total = redis.call('llen', 'ql:f:' .. group),
104
+ jobs = redis.call('lrange', 'ql:f:' .. group, start, start + limit - 1)
105
+ }
106
+ else
107
+ -- Otherwise, we should just list all the known failure groups we have
108
+ local response = {}
109
+ local groups = redis.call('smembers', 'ql:failures')
110
+ for index, group in ipairs(groups) do
111
+ response[group] = redis.call('llen', 'ql:f:' .. group)
112
+ end
113
+ return response
114
+ end
115
+ end
116
+
117
+ -- Jobs(now, 'complete', [offset, [count]])
118
+ -- Jobs(now, (
119
+ -- 'stalled' | 'running' | 'scheduled' | 'depends', 'recurring'
120
+ -- ), queue, [offset, [count]])
121
+ -------------------------------------------------------------------------------
122
+ -- Return all the job ids currently considered to be in the provided state
123
+ -- in a particular queue. The response is a list of job ids:
124
+ --
125
+ -- [
126
+ -- jid1,
127
+ -- jid2,
128
+ -- ...
129
+ -- ]
130
+ function Qless.jobs(now, state, ...)
131
+ assert(state, 'Jobs(): Arg "state" missing')
132
+ if state == 'complete' then
133
+ local offset = assert(tonumber(arg[1] or 0),
134
+ 'Jobs(): Arg "offset" not a number: ' .. tostring(arg[1]))
135
+ local count = assert(tonumber(arg[2] or 25),
136
+ 'Jobs(): Arg "count" not a number: ' .. tostring(arg[2]))
137
+ return redis.call('zrevrange', 'ql:completed', offset,
138
+ offset + count - 1)
139
+ else
140
+ local name = assert(arg[1], 'Jobs(): Arg "queue" missing')
141
+ local offset = assert(tonumber(arg[2] or 0),
142
+ 'Jobs(): Arg "offset" not a number: ' .. tostring(arg[2]))
143
+ local count = assert(tonumber(arg[3] or 25),
144
+ 'Jobs(): Arg "count" not a number: ' .. tostring(arg[3]))
145
+
146
+ local queue = Qless.queue(name)
147
+ if state == 'running' then
148
+ return queue.locks.peek(now, offset, count)
149
+ elseif state == 'stalled' then
150
+ return queue.locks.expired(now, offset, count)
151
+ elseif state == 'scheduled' then
152
+ queue:check_scheduled(now, queue.scheduled.length())
153
+ return queue.scheduled.peek(now, offset, count)
154
+ elseif state == 'depends' then
155
+ return queue.depends.peek(now, offset, count)
156
+ elseif state == 'recurring' then
157
+ return queue.recurring.peek(math.huge, offset, count)
158
+ else
159
+ error('Jobs(): Unknown type "' .. state .. '"')
160
+ end
161
+ end
162
+ end
163
+
164
+ -- Track()
165
+ -- Track(now, ('track' | 'untrack'), jid)
166
+ -- ------------------------------------------
167
+ -- If no arguments are provided, it returns details of all currently-tracked
168
+ -- jobs. If the first argument is 'track', then it will start tracking the job
169
+ -- associated with that id, and 'untrack' stops tracking it. In this context,
170
+ -- tracking is nothing more than saving the job to a list of jobs that are
171
+ -- considered special.
172
+ --
173
+ -- {
174
+ -- 'jobs': [
175
+ -- {
176
+ -- 'jid': ...,
177
+ -- # All the other details you'd get from 'get'
178
+ -- }, {
179
+ -- ...
180
+ -- }
181
+ -- ], 'expired': [
182
+ -- # These are all the jids that are completed and whose data expired
183
+ -- 'deadbeef',
184
+ -- ...,
185
+ -- ...,
186
+ -- ]
187
+ -- }
188
+ --
189
+ function Qless.track(now, command, jid)
190
+ if command ~= nil then
191
+ assert(jid, 'Track(): Arg "jid" missing')
192
+ -- Verify that job exists
193
+ assert(Qless.job(jid):exists(), 'Track(): Job does not exist')
194
+ if string.lower(command) == 'track' then
195
+ Qless.publish('track', jid)
196
+ return redis.call('zadd', 'ql:tracked', now, jid)
197
+ elseif string.lower(command) == 'untrack' then
198
+ Qless.publish('untrack', jid)
199
+ return redis.call('zrem', 'ql:tracked', jid)
200
+ else
201
+ error('Track(): Unknown action "' .. command .. '"')
202
+ end
203
+ else
204
+ local response = {
205
+ jobs = {},
206
+ expired = {}
207
+ }
208
+ local jids = redis.call('zrange', 'ql:tracked', 0, -1)
209
+ for index, jid in ipairs(jids) do
210
+ local data = Qless.job(jid):data()
211
+ if data then
212
+ table.insert(response.jobs, data)
213
+ else
214
+ table.insert(response.expired, jid)
215
+ end
216
+ end
217
+ return response
218
+ end
219
+ end
220
+
221
+ -- tag(now, ('add' | 'remove'), jid, tag, [tag, ...])
222
+ -- tag(now, 'get', tag, [offset, [count]])
223
+ -- tag(now, 'top', [offset, [count]])
224
+ -- -----------------------------------------------------------------------------
225
+ -- Accepts a jid, 'add' or 'remove', and then a list of tags
226
+ -- to either add or remove from the job. Alternatively, 'get',
227
+ -- a tag to get jobs associated with that tag, and offset and
228
+ -- count
229
+ --
230
+ -- If 'add' or 'remove', the response is a list of the jobs
231
+ -- current tags, or False if the job doesn't exist. If 'get',
232
+ -- the response is of the form:
233
+ --
234
+ -- {
235
+ -- total: ...,
236
+ -- jobs: [
237
+ -- jid,
238
+ -- ...
239
+ -- ]
240
+ -- }
241
+ --
242
+ -- If 'top' is supplied, it returns the most commonly-used tags
243
+ -- in a paginated fashion.
244
+ function Qless.tag(now, command, ...)
245
+ assert(command,
246
+ 'Tag(): Arg "command" must be "add", "remove", "get" or "top"')
247
+
248
+ if command == 'add' then
249
+ local jid = assert(arg[1], 'Tag(): Arg "jid" missing')
250
+ local tags = redis.call('hget', QlessJob.ns .. jid, 'tags')
251
+ -- If the job has been canceled / deleted, then return false
252
+ if tags then
253
+ -- Decode the json blob, convert to dictionary
254
+ tags = cjson.decode(tags)
255
+ local _tags = {}
256
+ for i,v in ipairs(tags) do _tags[v] = true end
257
+
258
+ -- Otherwise, add the job to the sorted set with that tags
259
+ for i=2,#arg do
260
+ local tag = arg[i]
261
+ if _tags[tag] == nil then
262
+ _tags[tag] = true
263
+ table.insert(tags, tag)
264
+ end
265
+ redis.call('zadd', 'ql:t:' .. tag, now, jid)
266
+ redis.call('zincrby', 'ql:tags', 1, tag)
267
+ end
268
+
269
+ redis.call('hset', QlessJob.ns .. jid, 'tags', cjson.encode(tags))
270
+ return tags
271
+ else
272
+ error('Tag(): Job ' .. jid .. ' does not exist')
273
+ end
274
+ elseif command == 'remove' then
275
+ local jid = assert(arg[1], 'Tag(): Arg "jid" missing')
276
+ local tags = redis.call('hget', QlessJob.ns .. jid, 'tags')
277
+ -- If the job has been canceled / deleted, then return false
278
+ if tags then
279
+ -- Decode the json blob, convert to dictionary
280
+ tags = cjson.decode(tags)
281
+ local _tags = {}
282
+ for i,v in ipairs(tags) do _tags[v] = true end
283
+
284
+ -- Otherwise, add the job to the sorted set with that tags
285
+ for i=2,#arg do
286
+ local tag = arg[i]
287
+ _tags[tag] = nil
288
+ redis.call('zrem', 'ql:t:' .. tag, jid)
289
+ redis.call('zincrby', 'ql:tags', -1, tag)
290
+ end
291
+
292
+ local results = {}
293
+ for i,tag in ipairs(tags) do if _tags[tag] then table.insert(results, tag) end end
294
+
295
+ redis.call('hset', QlessJob.ns .. jid, 'tags', cjson.encode(results))
296
+ return results
297
+ else
298
+ error('Tag(): Job ' .. jid .. ' does not exist')
299
+ end
300
+ elseif command == 'get' then
301
+ local tag = assert(arg[1], 'Tag(): Arg "tag" missing')
302
+ local offset = assert(tonumber(arg[2] or 0),
303
+ 'Tag(): Arg "offset" not a number: ' .. tostring(arg[2]))
304
+ local count = assert(tonumber(arg[3] or 25),
305
+ 'Tag(): Arg "count" not a number: ' .. tostring(arg[3]))
306
+ return {
307
+ total = redis.call('zcard', 'ql:t:' .. tag),
308
+ jobs = redis.call('zrange', 'ql:t:' .. tag, offset, offset + count - 1)
309
+ }
310
+ elseif command == 'top' then
311
+ local offset = assert(tonumber(arg[1] or 0) , 'Tag(): Arg "offset" not a number: ' .. tostring(arg[1]))
312
+ local count = assert(tonumber(arg[2] or 25), 'Tag(): Arg "count" not a number: ' .. tostring(arg[2]))
313
+ return redis.call('zrevrangebyscore', 'ql:tags', '+inf', 2, 'limit', offset, count)
314
+ else
315
+ error('Tag(): First argument must be "add", "remove" or "get"')
316
+ end
317
+ end
318
+
319
+ -- Cancel(...)
320
+ -- --------------
321
+ -- Cancel a job from taking place. It will be deleted from the system, and any
322
+ -- attempts to renew a heartbeat will fail, and any attempts to complete it
323
+ -- will fail. If you try to get the data on the object, you will get nothing.
324
+ function Qless.cancel(...)
325
+ -- Dependents is a mapping of a job to its dependent jids
326
+ local dependents = {}
327
+ for _, jid in ipairs(arg) do
328
+ dependents[jid] = redis.call(
329
+ 'smembers', QlessJob.ns .. jid .. '-dependents') or {}
330
+ end
331
+
332
+ -- Now, we'll loop through every jid we intend to cancel, and we'll go
333
+ -- make sure that this operation will be ok
334
+ for i, jid in ipairs(arg) do
335
+ for j, dep in ipairs(dependents[jid]) do
336
+ if dependents[dep] == nil then
337
+ error('Cancel(): ' .. jid .. ' is a dependency of ' .. dep ..
338
+ ' but is not mentioned to be canceled')
339
+ end
340
+ end
341
+ end
342
+
343
+ -- If we've made it this far, then we are good to go. We can now just
344
+ -- remove any trace of all these jobs, as they form a dependent clique
345
+ for _, jid in ipairs(arg) do
346
+ -- Find any stage it's associated with and remove its from that stage
347
+ local state, queue, failure, worker = unpack(redis.call(
348
+ 'hmget', QlessJob.ns .. jid, 'state', 'queue', 'failure', 'worker'))
349
+
350
+ if state ~= 'complete' then
351
+ -- Send a message out on the appropriate channels
352
+ local encoded = cjson.encode({
353
+ jid = jid,
354
+ worker = worker,
355
+ event = 'canceled',
356
+ queue = queue
357
+ })
358
+ Qless.publish('log', encoded)
359
+
360
+ -- Remove this job from whatever worker has it, if any
361
+ if worker and (worker ~= '') then
362
+ redis.call('zrem', 'ql:w:' .. worker .. ':jobs', jid)
363
+ -- If necessary, send a message to the appropriate worker, too
364
+ Qless.publish('w:' .. worker, encoded)
365
+ end
366
+
367
+ -- Remove it from that queue
368
+ if queue then
369
+ local queue = Qless.queue(queue)
370
+ queue.work.remove(jid)
371
+ queue.locks.remove(jid)
372
+ queue.scheduled.remove(jid)
373
+ queue.depends.remove(jid)
374
+ end
375
+
376
+ -- We should probably go through all our dependencies and remove
377
+ -- ourselves from the list of dependents
378
+ for i, j in ipairs(redis.call(
379
+ 'smembers', QlessJob.ns .. jid .. '-dependencies')) do
380
+ redis.call('srem', QlessJob.ns .. j .. '-dependents', jid)
381
+ end
382
+
383
+ -- Delete any notion of dependencies it has
384
+ redis.call('del', QlessJob.ns .. jid .. '-dependencies')
385
+
386
+ -- If we're in the failed state, remove all of our data
387
+ if state == 'failed' then
388
+ failure = cjson.decode(failure)
389
+ -- We need to make this remove it from the failed queues
390
+ redis.call('lrem', 'ql:f:' .. failure.group, 0, jid)
391
+ if redis.call('llen', 'ql:f:' .. failure.group) == 0 then
392
+ redis.call('srem', 'ql:failures', failure.group)
393
+ end
394
+ -- Remove one count from the failed count of the particular
395
+ -- queue
396
+ local bin = failure.when - (failure.when % 86400)
397
+ local failed = redis.call(
398
+ 'hget', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed')
399
+ redis.call('hset',
400
+ 'ql:s:stats:' .. bin .. ':' .. queue, 'failed', failed - 1)
401
+ end
402
+
403
+ -- Remove it as a job that's tagged with this particular tag
404
+ local tags = cjson.decode(
405
+ redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
406
+ for i, tag in ipairs(tags) do
407
+ redis.call('zrem', 'ql:t:' .. tag, jid)
408
+ redis.call('zincrby', 'ql:tags', -1, tag)
409
+ end
410
+
411
+ -- If the job was being tracked, we should notify
412
+ if redis.call('zscore', 'ql:tracked', jid) ~= false then
413
+ Qless.publish('canceled', jid)
414
+ end
415
+
416
+ -- Just go ahead and delete our data
417
+ redis.call('del', QlessJob.ns .. jid)
418
+ redis.call('del', QlessJob.ns .. jid .. '-history')
419
+ end
420
+ end
421
+
422
+ return arg
423
+ end
424
+
425
+ -------------------------------------------------------------------------------
426
+ -- Configuration interactions
427
+ -------------------------------------------------------------------------------
428
+
429
+ -- This represents our default configuration settings
430
+ Qless.config.defaults = {
431
+ ['application'] = 'qless',
432
+ ['heartbeat'] = 60,
433
+ ['grace-period'] = 10,
434
+ ['stats-history'] = 30,
435
+ ['histogram-history'] = 7,
436
+ ['jobs-history-count'] = 50000,
437
+ ['jobs-history'] = 604800
438
+ }
439
+
440
+ -- Get one or more of the keys
441
+ Qless.config.get = function(key, default)
442
+ if key then
443
+ return redis.call('hget', 'ql:config', key) or
444
+ Qless.config.defaults[key] or default
445
+ else
446
+ -- Inspired by redis-lua https://github.com/nrk/redis-lua/blob/version-2.0/src/redis.lua
447
+ local reply = redis.call('hgetall', 'ql:config')
448
+ for i = 1, #reply, 2 do
449
+ Qless.config.defaults[reply[i]] = reply[i + 1]
450
+ end
451
+ return Qless.config.defaults
452
+ end
453
+ end
454
+
455
+ -- Set a configuration variable
456
+ Qless.config.set = function(option, value)
457
+ assert(option, 'config.set(): Arg "option" missing')
458
+ assert(value , 'config.set(): Arg "value" missing')
459
+ -- Send out a log message
460
+ Qless.publish('log', cjson.encode({
461
+ event = 'config_set',
462
+ option = option,
463
+ value = value
464
+ }))
465
+
466
+ redis.call('hset', 'ql:config', option, value)
467
+ end
468
+
469
+ -- Unset a configuration option
470
+ Qless.config.unset = function(option)
471
+ assert(option, 'config.unset(): Arg "option" missing')
472
+ -- Send out a log message
473
+ Qless.publish('log', cjson.encode({
474
+ event = 'config_unset',
475
+ option = option
476
+ }))
477
+
478
+ redis.call('hdel', 'ql:config', option)
479
+ end
480
+ -------------------------------------------------------------------------------
481
+ -- Job Class
482
+ --
483
+ -- It returns an object that represents the job with the provided JID
484
+ -------------------------------------------------------------------------------
485
+
486
+ -- This gets all the data associated with the job with the provided id. If the
487
+ -- job is not found, it returns nil. If found, it returns an object with the
488
+ -- appropriate properties
489
+ function QlessJob:data(...)
490
+ local job = redis.call(
491
+ 'hmget', QlessJob.ns .. self.jid, 'jid', 'klass', 'state', 'queue',
492
+ 'worker', 'priority', 'expires', 'retries', 'remaining', 'data',
493
+ 'tags', 'failure', 'spawned_from_jid')
494
+
495
+ -- Return nil if we haven't found it
496
+ if not job[1] then
497
+ return nil
498
+ end
499
+
500
+ local data = {
501
+ jid = job[1],
502
+ klass = job[2],
503
+ state = job[3],
504
+ queue = job[4],
505
+ worker = job[5] or '',
506
+ tracked = redis.call(
507
+ 'zscore', 'ql:tracked', self.jid) ~= false,
508
+ priority = tonumber(job[6]),
509
+ expires = tonumber(job[7]) or 0,
510
+ retries = tonumber(job[8]),
511
+ remaining = math.floor(tonumber(job[9])),
512
+ data = job[10],
513
+ tags = cjson.decode(job[11]),
514
+ history = self:history(),
515
+ failure = cjson.decode(job[12] or '{}'),
516
+ spawned_from_jid = job[13],
517
+ dependents = redis.call(
518
+ 'smembers', QlessJob.ns .. self.jid .. '-dependents'),
519
+ dependencies = redis.call(
520
+ 'smembers', QlessJob.ns .. self.jid .. '-dependencies')
521
+ }
522
+
523
+ if #arg > 0 then
524
+ -- This section could probably be optimized, but I wanted the interface
525
+ -- in place first
526
+ local response = {}
527
+ for index, key in ipairs(arg) do
528
+ table.insert(response, data[key])
529
+ end
530
+ return response
531
+ else
532
+ return data
533
+ end
534
+ end
535
+
536
+ -- Complete a job and optionally put it in another queue, either scheduled or
537
+ -- to be considered waiting immediately. It can also optionally accept other
538
+ -- jids on which this job will be considered dependent before it's considered
539
+ -- valid.
540
+ --
541
+ -- The variable-length arguments may be pairs of the form:
542
+ --
543
+ -- ('next' , queue) : The queue to advance it to next
544
+ -- ('delay' , delay) : The delay for the next queue
545
+ -- ('depends', : Json of jobs it depends on in the new queue
546
+ -- '["jid1", "jid2", ...]')
547
+ ---
548
+ function QlessJob:complete(now, worker, queue, data, ...)
549
+ assert(worker, 'Complete(): Arg "worker" missing')
550
+ assert(queue , 'Complete(): Arg "queue" missing')
551
+ data = assert(cjson.decode(data),
552
+ 'Complete(): Arg "data" missing or not JSON: ' .. tostring(data))
553
+
554
+ -- Read in all the optional parameters
555
+ local options = {}
556
+ for i = 1, #arg, 2 do options[arg[i]] = arg[i + 1] end
557
+
558
+ -- Sanity check on optional args
559
+ local nextq = options['next']
560
+ local delay = assert(tonumber(options['delay'] or 0))
561
+ local depends = assert(cjson.decode(options['depends'] or '[]'),
562
+ 'Complete(): Arg "depends" not JSON: ' .. tostring(options['depends']))
563
+
564
+ -- Depends doesn't make sense without nextq
565
+ if options['delay'] and nextq == nil then
566
+ error('Complete(): "delay" cannot be used without a "next".')
567
+ end
568
+
569
+ -- Depends doesn't make sense without nextq
570
+ if options['depends'] and nextq == nil then
571
+ error('Complete(): "depends" cannot be used without a "next".')
572
+ end
573
+
574
+ -- The bin is midnight of the provided day
575
+ -- 24 * 60 * 60 = 86400
576
+ local bin = now - (now % 86400)
577
+
578
+ -- First things first, we should see if the worker still owns this job
579
+ local lastworker, state, priority, retries, current_queue = unpack(
580
+ redis.call('hmget', QlessJob.ns .. self.jid, 'worker', 'state',
581
+ 'priority', 'retries', 'queue'))
582
+
583
+ if lastworker == false then
584
+ error('Complete(): Job does not exist')
585
+ elseif (state ~= 'running') then
586
+ error('Complete(): Job is not currently running: ' .. state)
587
+ elseif lastworker ~= worker then
588
+ error('Complete(): Job has been handed out to another worker: ' ..
589
+ tostring(lastworker))
590
+ elseif queue ~= current_queue then
591
+ error('Complete(): Job running in another queue: ' ..
592
+ tostring(current_queue))
593
+ end
594
+
595
+ -- Now we can assume that the worker does own the job. We need to
596
+ -- 1) Remove the job from the 'locks' from the old queue
597
+ -- 2) Enqueue it in the next stage if necessary
598
+ -- 3) Update the data
599
+ -- 4) Mark the job as completed, remove the worker, remove expires, and
600
+ -- update history
601
+ self:history(now, 'done')
602
+
603
+ if data then
604
+ redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data))
605
+ end
606
+
607
+ -- Remove the job from the previous queue
608
+ local queue_obj = Qless.queue(queue)
609
+ queue_obj.work.remove(self.jid)
610
+ queue_obj.locks.remove(self.jid)
611
+ queue_obj.scheduled.remove(self.jid)
612
+
613
+ ----------------------------------------------------------
614
+ -- This is the massive stats update that we have to do
615
+ ----------------------------------------------------------
616
+ -- This is how long we've been waiting to get popped
617
+ -- local waiting = math.floor(now) - history[#history]['popped']
618
+ local time = tonumber(
619
+ redis.call('hget', QlessJob.ns .. self.jid, 'time') or now)
620
+ local waiting = now - time
621
+ Qless.queue(queue):stat(now, 'run', waiting)
622
+ redis.call('hset', QlessJob.ns .. self.jid,
623
+ 'time', string.format("%.20f", now))
624
+
625
+ -- Remove this job from the jobs that the worker that was running it has
626
+ redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
627
+
628
+ if redis.call('zscore', 'ql:tracked', self.jid) ~= false then
629
+ Qless.publish('completed', self.jid)
630
+ end
631
+
632
+ if nextq then
633
+ queue_obj = Qless.queue(nextq)
634
+ -- Send a message out to log
635
+ Qless.publish('log', cjson.encode({
636
+ jid = self.jid,
637
+ event = 'advanced',
638
+ queue = queue,
639
+ to = nextq
640
+ }))
641
+
642
+ -- Enqueue the job
643
+ self:history(now, 'put', {q = nextq})
644
+
645
+ -- We're going to make sure that this queue is in the
646
+ -- set of known queues
647
+ if redis.call('zscore', 'ql:queues', nextq) == false then
648
+ redis.call('zadd', 'ql:queues', now, nextq)
649
+ end
650
+
651
+ redis.call('hmset', QlessJob.ns .. self.jid,
652
+ 'state', 'waiting',
653
+ 'worker', '',
654
+ 'failure', '{}',
655
+ 'queue', nextq,
656
+ 'expires', 0,
657
+ 'remaining', tonumber(retries))
658
+
659
+ if (delay > 0) and (#depends == 0) then
660
+ queue_obj.scheduled.add(now + delay, self.jid)
661
+ return 'scheduled'
662
+ else
663
+ -- These are the jids we legitimately have to wait on
664
+ local count = 0
665
+ for i, j in ipairs(depends) do
666
+ -- Make sure it's something other than 'nil' or complete.
667
+ local state = redis.call('hget', QlessJob.ns .. j, 'state')
668
+ if (state and state ~= 'complete') then
669
+ count = count + 1
670
+ redis.call(
671
+ 'sadd', QlessJob.ns .. j .. '-dependents',self.jid)
672
+ redis.call(
673
+ 'sadd', QlessJob.ns .. self.jid .. '-dependencies', j)
674
+ end
675
+ end
676
+ if count > 0 then
677
+ queue_obj.depends.add(now, self.jid)
678
+ redis.call('hset', QlessJob.ns .. self.jid, 'state', 'depends')
679
+ if delay > 0 then
680
+ -- We've already put it in 'depends'. Now, we must just save the data
681
+ -- for when it's scheduled
682
+ queue_obj.depends.add(now, self.jid)
683
+ redis.call('hset', QlessJob.ns .. self.jid, 'scheduled', now + delay)
684
+ end
685
+ return 'depends'
686
+ else
687
+ queue_obj.work.add(now, priority, self.jid)
688
+ return 'waiting'
689
+ end
690
+ end
691
+ else
692
+ -- Send a message out to log
693
+ Qless.publish('log', cjson.encode({
694
+ jid = self.jid,
695
+ event = 'completed',
696
+ queue = queue
697
+ }))
698
+
699
+ redis.call('hmset', QlessJob.ns .. self.jid,
700
+ 'state', 'complete',
701
+ 'worker', '',
702
+ 'failure', '{}',
703
+ 'queue', '',
704
+ 'expires', 0,
705
+ 'remaining', tonumber(retries))
706
+
707
+ -- Do the completion dance
708
+ local count = Qless.config.get('jobs-history-count')
709
+ local time = Qless.config.get('jobs-history')
710
+
711
+ -- These are the default values
712
+ count = tonumber(count or 50000)
713
+ time = tonumber(time or 7 * 24 * 60 * 60)
714
+
715
+ -- Schedule this job for destructination eventually
716
+ redis.call('zadd', 'ql:completed', now, self.jid)
717
+
718
+ -- Now look at the expired job data. First, based on the current time
719
+ local jids = redis.call('zrangebyscore', 'ql:completed', 0, now - time)
720
+ -- Any jobs that need to be expired... delete
721
+ for index, jid in ipairs(jids) do
722
+ local tags = cjson.decode(
723
+ redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
724
+ for i, tag in ipairs(tags) do
725
+ redis.call('zrem', 'ql:t:' .. tag, jid)
726
+ redis.call('zincrby', 'ql:tags', -1, tag)
727
+ end
728
+ redis.call('del', QlessJob.ns .. jid)
729
+ redis.call('del', QlessJob.ns .. jid .. '-history')
730
+ end
731
+ -- And now remove those from the queued-for-cleanup queue
732
+ redis.call('zremrangebyscore', 'ql:completed', 0, now - time)
733
+
734
+ -- Now take the all by the most recent 'count' ids
735
+ jids = redis.call('zrange', 'ql:completed', 0, (-1-count))
736
+ for index, jid in ipairs(jids) do
737
+ local tags = cjson.decode(
738
+ redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
739
+ for i, tag in ipairs(tags) do
740
+ redis.call('zrem', 'ql:t:' .. tag, jid)
741
+ redis.call('zincrby', 'ql:tags', -1, tag)
742
+ end
743
+ redis.call('del', QlessJob.ns .. jid)
744
+ redis.call('del', QlessJob.ns .. jid .. '-history')
745
+ end
746
+ redis.call('zremrangebyrank', 'ql:completed', 0, (-1-count))
747
+
748
+ -- Alright, if this has any dependents, then we should go ahead
749
+ -- and unstick those guys.
750
+ for i, j in ipairs(redis.call(
751
+ 'smembers', QlessJob.ns .. self.jid .. '-dependents')) do
752
+ redis.call('srem', QlessJob.ns .. j .. '-dependencies', self.jid)
753
+ if redis.call(
754
+ 'scard', QlessJob.ns .. j .. '-dependencies') == 0 then
755
+ local q, p, scheduled = unpack(
756
+ redis.call('hmget', QlessJob.ns .. j, 'queue', 'priority', 'scheduled'))
757
+ if q then
758
+ local queue = Qless.queue(q)
759
+ queue.depends.remove(j)
760
+ if scheduled then
761
+ queue.scheduled.add(scheduled, j)
762
+ redis.call('hset', QlessJob.ns .. j, 'state', 'scheduled')
763
+ redis.call('hdel', QlessJob.ns .. j, 'scheduled')
764
+ else
765
+ queue.work.add(now, p, j)
766
+ redis.call('hset', QlessJob.ns .. j, 'state', 'waiting')
767
+ end
768
+ end
769
+ end
770
+ end
771
+
772
+ -- Delete our dependents key
773
+ redis.call('del', QlessJob.ns .. self.jid .. '-dependents')
774
+
775
+ return 'complete'
776
+ end
777
+ end
778
+
779
+ -- Fail(now, worker, group, message, [data])
780
+ -- -------------------------------------------------
781
+ -- Mark the particular job as failed, with the provided group, and a more
782
+ -- specific message. By `group`, we mean some phrase that might be one of
783
+ -- several categorical modes of failure. The `message` is something more
784
+ -- job-specific, like perhaps a traceback.
785
+ --
786
+ -- This method should __not__ be used to note that a job has been dropped or
787
+ -- has failed in a transient way. This method __should__ be used to note that
788
+ -- a job has something really wrong with it that must be remedied.
789
+ --
790
+ -- The motivation behind the `group` is so that similar errors can be grouped
791
+ -- together. Optionally, updated data can be provided for the job. A job in
792
+ -- any state can be marked as failed. If it has been given to a worker as a
793
+ -- job, then its subsequent requests to heartbeat or complete that job will
794
+ -- fail. Failed jobs are kept until they are canceled or completed.
795
+ --
796
+ -- __Returns__ the id of the failed job if successful, or `False` on failure.
797
+ --
798
+ -- Args:
799
+ -- 1) jid
800
+ -- 2) worker
801
+ -- 3) group
802
+ -- 4) message
803
+ -- 5) the current time
804
+ -- 6) [data]
805
+ function QlessJob:fail(now, worker, group, message, data)
806
+ local worker = assert(worker , 'Fail(): Arg "worker" missing')
807
+ local group = assert(group , 'Fail(): Arg "group" missing')
808
+ local message = assert(message , 'Fail(): Arg "message" missing')
809
+
810
+ -- The bin is midnight of the provided day
811
+ -- 24 * 60 * 60 = 86400
812
+ local bin = now - (now % 86400)
813
+
814
+ if data then
815
+ data = cjson.decode(data)
816
+ end
817
+
818
+ -- First things first, we should get the history
819
+ local queue, state, oldworker = unpack(redis.call(
820
+ 'hmget', QlessJob.ns .. self.jid, 'queue', 'state', 'worker'))
821
+
822
+ -- If the job has been completed, we cannot fail it
823
+ if not state then
824
+ error('Fail(): Job does not exist')
825
+ elseif state ~= 'running' then
826
+ error('Fail(): Job not currently running: ' .. state)
827
+ elseif worker ~= oldworker then
828
+ error('Fail(): Job running with another worker: ' .. oldworker)
829
+ end
830
+
831
+ -- Send out a log message
832
+ Qless.publish('log', cjson.encode({
833
+ jid = self.jid,
834
+ event = 'failed',
835
+ worker = worker,
836
+ group = group,
837
+ message = message
838
+ }))
839
+
840
+ if redis.call('zscore', 'ql:tracked', self.jid) ~= false then
841
+ Qless.publish('failed', self.jid)
842
+ end
843
+
844
+ -- Remove this job from the jobs that the worker that was running it has
845
+ redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
846
+
847
+ -- Now, take the element of the history for which our provided worker is
848
+ -- the worker, and update 'failed'
849
+ self:history(now, 'failed', {worker = worker, group = group})
850
+
851
+ -- Increment the number of failures for that queue for the
852
+ -- given day.
853
+ redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failures', 1)
854
+ redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed' , 1)
855
+
856
+ -- Now remove the instance from the schedule, and work queues for the
857
+ -- queue it's in
858
+ local queue_obj = Qless.queue(queue)
859
+ queue_obj.work.remove(self.jid)
860
+ queue_obj.locks.remove(self.jid)
861
+ queue_obj.scheduled.remove(self.jid)
862
+
863
+ -- The reason that this appears here is that the above will fail if the
864
+ -- job doesn't exist
865
+ if data then
866
+ redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data))
867
+ end
868
+
869
+ redis.call('hmset', QlessJob.ns .. self.jid,
870
+ 'state', 'failed',
871
+ 'worker', '',
872
+ 'expires', '',
873
+ 'failure', cjson.encode({
874
+ ['group'] = group,
875
+ ['message'] = message,
876
+ ['when'] = math.floor(now),
877
+ ['worker'] = worker
878
+ }))
879
+
880
+ -- Add this group of failure to the list of failures
881
+ redis.call('sadd', 'ql:failures', group)
882
+ -- And add this particular instance to the failed groups
883
+ redis.call('lpush', 'ql:f:' .. group, self.jid)
884
+
885
+ -- Here is where we'd intcrement stats about the particular stage
886
+ -- and possibly the workers
887
+
888
+ return self.jid
889
+ end
890
+
891
+ -- retry(now, queue, worker, [delay, [group, [message]]])
892
+ -- ------------------------------------------
893
+ -- This script accepts jid, queue, worker and delay for retrying a job. This
894
+ -- is similar in functionality to `put`, except that this counts against the
895
+ -- retries a job has for a stage.
896
+ --
897
+ -- Throws an exception if:
898
+ -- - the worker is not the worker with a lock on the job
899
+ -- - the job is not actually running
900
+ --
901
+ -- Otherwise, it returns the number of retries remaining. If the allowed
902
+ -- retries have been exhausted, then it is automatically failed, and a negative
903
+ -- number is returned.
904
+ --
905
+ -- If a group and message is provided, then if the retries are exhausted, then
906
+ -- the provided group and message will be used in place of the default
907
+ -- messaging about retries in the particular queue being exhausted
908
+ function QlessJob:retry(now, queue, worker, delay, group, message)
909
+ assert(queue , 'Retry(): Arg "queue" missing')
910
+ assert(worker, 'Retry(): Arg "worker" missing')
911
+ delay = assert(tonumber(delay or 0),
912
+ 'Retry(): Arg "delay" not a number: ' .. tostring(delay))
913
+
914
+ -- Let's see what the old priority, and tags were
915
+ local oldqueue, state, retries, oldworker, priority, failure = unpack(
916
+ redis.call('hmget', QlessJob.ns .. self.jid, 'queue', 'state',
917
+ 'retries', 'worker', 'priority', 'failure'))
918
+
919
+ -- If this isn't the worker that owns
920
+ if oldworker == false then
921
+ error('Retry(): Job does not exist')
922
+ elseif state ~= 'running' then
923
+ error('Retry(): Job is not currently running: ' .. state)
924
+ elseif oldworker ~= worker then
925
+ error('Retry(): Job has been given to another worker: ' .. oldworker)
926
+ end
927
+
928
+ -- For each of these, decrement their retries. If any of them
929
+ -- have exhausted their retries, then we should mark them as
930
+ -- failed.
931
+ local remaining = tonumber(redis.call(
932
+ 'hincrby', QlessJob.ns .. self.jid, 'remaining', -1))
933
+ redis.call('hdel', QlessJob.ns .. self.jid, 'grace')
934
+
935
+ -- Remove it from the locks key of the old queue
936
+ Qless.queue(oldqueue).locks.remove(self.jid)
937
+
938
+ -- Remove this job from the worker that was previously working it
939
+ redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
940
+
941
+ if remaining < 0 then
942
+ -- Now remove the instance from the schedule, and work queues for the
943
+ -- queue it's in
944
+ local group = group or 'failed-retries-' .. queue
945
+ self:history(now, 'failed', {['group'] = group})
946
+
947
+ redis.call('hmset', QlessJob.ns .. self.jid, 'state', 'failed',
948
+ 'worker', '',
949
+ 'expires', '')
950
+ -- If the failure has not already been set, then set it
951
+ if group ~= nil and message ~= nil then
952
+ redis.call('hset', QlessJob.ns .. self.jid,
953
+ 'failure', cjson.encode({
954
+ ['group'] = group,
955
+ ['message'] = message,
956
+ ['when'] = math.floor(now),
957
+ ['worker'] = worker
958
+ })
959
+ )
960
+ else
961
+ redis.call('hset', QlessJob.ns .. self.jid,
962
+ 'failure', cjson.encode({
963
+ ['group'] = group,
964
+ ['message'] =
965
+ 'Job exhausted retries in queue "' .. oldqueue .. '"',
966
+ ['when'] = now,
967
+ ['worker'] = unpack(self:data('worker'))
968
+ }))
969
+ end
970
+
971
+ -- Add this type of failure to the list of failures
972
+ redis.call('sadd', 'ql:failures', group)
973
+ -- And add this particular instance to the failed types
974
+ redis.call('lpush', 'ql:f:' .. group, self.jid)
975
+ -- Increment the count of the failed jobs
976
+ local bin = now - (now % 86400)
977
+ redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failures', 1)
978
+ redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed' , 1)
979
+ else
980
+ -- Put it in the queue again with a delay. Like put()
981
+ local queue_obj = Qless.queue(queue)
982
+ if delay > 0 then
983
+ queue_obj.scheduled.add(now + delay, self.jid)
984
+ redis.call('hset', QlessJob.ns .. self.jid, 'state', 'scheduled')
985
+ else
986
+ queue_obj.work.add(now, priority, self.jid)
987
+ redis.call('hset', QlessJob.ns .. self.jid, 'state', 'waiting')
988
+ end
989
+
990
+ -- If a group and a message was provided, then we should save it
991
+ if group ~= nil and message ~= nil then
992
+ redis.call('hset', QlessJob.ns .. self.jid,
993
+ 'failure', cjson.encode({
994
+ ['group'] = group,
995
+ ['message'] = message,
996
+ ['when'] = math.floor(now),
997
+ ['worker'] = worker
998
+ })
999
+ )
1000
+ end
1001
+ end
1002
+
1003
+ return math.floor(remaining)
1004
+ end
1005
+
1006
+ -- Depends(jid, 'on', [jid, [jid, [...]]]
1007
+ -- Depends(jid, 'off', [jid, [jid, [...]]])
1008
+ -- Depends(jid, 'off', 'all')
1009
+ -------------------------------------------------------------------------------
1010
+ -- Add or remove dependencies a job has. If 'on' is provided, the provided
1011
+ -- jids are added as dependencies. If 'off' and 'all' are provided, then all
1012
+ -- the current dependencies are removed. If 'off' is provided and the next
1013
+ -- argument is not 'all', then those jids are removed as dependencies.
1014
+ --
1015
+ -- If a job is not already in the 'depends' state, then this call will return
1016
+ -- false. Otherwise, it will return true
1017
+ function QlessJob:depends(now, command, ...)
1018
+ assert(command, 'Depends(): Arg "command" missing')
1019
+ local state = redis.call('hget', QlessJob.ns .. self.jid, 'state')
1020
+ if state ~= 'depends' then
1021
+ error('Depends(): Job ' .. self.jid ..
1022
+ ' not in the depends state: ' .. tostring(state))
1023
+ end
1024
+
1025
+ if command == 'on' then
1026
+ -- These are the jids we legitimately have to wait on
1027
+ for i, j in ipairs(arg) do
1028
+ -- Make sure it's something other than 'nil' or complete.
1029
+ local state = redis.call('hget', QlessJob.ns .. j, 'state')
1030
+ if (state and state ~= 'complete') then
1031
+ redis.call(
1032
+ 'sadd', QlessJob.ns .. j .. '-dependents' , self.jid)
1033
+ redis.call(
1034
+ 'sadd', QlessJob.ns .. self.jid .. '-dependencies', j)
1035
+ end
1036
+ end
1037
+ return true
1038
+ elseif command == 'off' then
1039
+ if arg[1] == 'all' then
1040
+ for i, j in ipairs(redis.call(
1041
+ 'smembers', QlessJob.ns .. self.jid .. '-dependencies')) do
1042
+ redis.call('srem', QlessJob.ns .. j .. '-dependents', self.jid)
1043
+ end
1044
+ redis.call('del', QlessJob.ns .. self.jid .. '-dependencies')
1045
+ local q, p = unpack(redis.call(
1046
+ 'hmget', QlessJob.ns .. self.jid, 'queue', 'priority'))
1047
+ if q then
1048
+ local queue_obj = Qless.queue(q)
1049
+ queue_obj.depends.remove(self.jid)
1050
+ queue_obj.work.add(now, p, self.jid)
1051
+ redis.call('hset', QlessJob.ns .. self.jid, 'state', 'waiting')
1052
+ end
1053
+ else
1054
+ for i, j in ipairs(arg) do
1055
+ redis.call('srem', QlessJob.ns .. j .. '-dependents', self.jid)
1056
+ redis.call(
1057
+ 'srem', QlessJob.ns .. self.jid .. '-dependencies', j)
1058
+ if redis.call('scard',
1059
+ QlessJob.ns .. self.jid .. '-dependencies') == 0 then
1060
+ local q, p = unpack(redis.call(
1061
+ 'hmget', QlessJob.ns .. self.jid, 'queue', 'priority'))
1062
+ if q then
1063
+ local queue_obj = Qless.queue(q)
1064
+ queue_obj.depends.remove(self.jid)
1065
+ queue_obj.work.add(now, p, self.jid)
1066
+ redis.call('hset',
1067
+ QlessJob.ns .. self.jid, 'state', 'waiting')
1068
+ end
1069
+ end
1070
+ end
1071
+ end
1072
+ return true
1073
+ else
1074
+ error('Depends(): Argument "command" must be "on" or "off"')
1075
+ end
1076
+ end
1077
+
1078
+ -- Heartbeat
1079
+ ------------
1080
+ -- Renew this worker's lock on this job. Throws an exception if:
1081
+ -- - the job's been given to another worker
1082
+ -- - the job's been completed
1083
+ -- - the job's been canceled
1084
+ -- - the job's not running
1085
+ function QlessJob:heartbeat(now, worker, data)
1086
+ assert(worker, 'Heatbeat(): Arg "worker" missing')
1087
+
1088
+ -- We should find the heartbeat interval for this queue
1089
+ -- heartbeat. First, though, we need to find the queue
1090
+ -- this particular job is in
1091
+ local queue = redis.call('hget', QlessJob.ns .. self.jid, 'queue') or ''
1092
+ local expires = now + tonumber(
1093
+ Qless.config.get(queue .. '-heartbeat') or
1094
+ Qless.config.get('heartbeat', 60))
1095
+
1096
+ if data then
1097
+ data = cjson.decode(data)
1098
+ end
1099
+
1100
+ -- First, let's see if the worker still owns this job, and there is a
1101
+ -- worker
1102
+ local job_worker, state = unpack(
1103
+ redis.call('hmget', QlessJob.ns .. self.jid, 'worker', 'state'))
1104
+ if job_worker == false then
1105
+ -- This means the job doesn't exist
1106
+ error('Heartbeat(): Job does not exist')
1107
+ elseif state ~= 'running' then
1108
+ error('Heartbeat(): Job not currently running: ' .. state)
1109
+ elseif job_worker ~= worker or #job_worker == 0 then
1110
+ error('Heartbeat(): Job given out to another worker: ' .. job_worker)
1111
+ else
1112
+ -- Otherwise, optionally update the user data, and the heartbeat
1113
+ if data then
1114
+ -- I don't know if this is wise, but I'm decoding and encoding
1115
+ -- the user data to hopefully ensure its sanity
1116
+ redis.call('hmset', QlessJob.ns .. self.jid, 'expires',
1117
+ expires, 'worker', worker, 'data', cjson.encode(data))
1118
+ else
1119
+ redis.call('hmset', QlessJob.ns .. self.jid,
1120
+ 'expires', expires, 'worker', worker)
1121
+ end
1122
+
1123
+ -- Update hwen this job was last updated on that worker
1124
+ -- Add this job to the list of jobs handled by this worker
1125
+ redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, self.jid)
1126
+
1127
+ -- And now we should just update the locks
1128
+ local queue = Qless.queue(
1129
+ redis.call('hget', QlessJob.ns .. self.jid, 'queue'))
1130
+ queue.locks.add(expires, self.jid)
1131
+ return expires
1132
+ end
1133
+ end
1134
+
1135
+ -- Priority
1136
+ -- --------
1137
+ -- Update the priority of this job. If the job doesn't exist, throws an
1138
+ -- exception
1139
+ function QlessJob:priority(priority)
1140
+ priority = assert(tonumber(priority),
1141
+ 'Priority(): Arg "priority" missing or not a number: ' ..
1142
+ tostring(priority))
1143
+
1144
+ -- Get the queue the job is currently in, if any
1145
+ local queue = redis.call('hget', QlessJob.ns .. self.jid, 'queue')
1146
+
1147
+ if queue == nil then
1148
+ -- If the job doesn't exist, throw an error
1149
+ error('Priority(): Job ' .. self.jid .. ' does not exist')
1150
+ elseif queue == '' then
1151
+ -- Just adjust the priority
1152
+ redis.call('hset', QlessJob.ns .. self.jid, 'priority', priority)
1153
+ return priority
1154
+ else
1155
+ -- Adjust the priority and see if it's a candidate for updating
1156
+ -- its priority in the queue it's currently in
1157
+ local queue_obj = Qless.queue(queue)
1158
+ if queue_obj.work.score(self.jid) then
1159
+ queue_obj.work.add(0, priority, self.jid)
1160
+ end
1161
+ redis.call('hset', QlessJob.ns .. self.jid, 'priority', priority)
1162
+ return priority
1163
+ end
1164
+ end
1165
+
1166
+ -- Update the jobs' attributes with the provided dictionary
1167
+ function QlessJob:update(data)
1168
+ local tmp = {}
1169
+ for k, v in pairs(data) do
1170
+ table.insert(tmp, k)
1171
+ table.insert(tmp, v)
1172
+ end
1173
+ redis.call('hmset', QlessJob.ns .. self.jid, unpack(tmp))
1174
+ end
1175
+
1176
+ -- Times out the job now rather than when its lock is normally set to expire
1177
+ function QlessJob:timeout(now)
1178
+ local queue_name, state, worker = unpack(redis.call('hmget',
1179
+ QlessJob.ns .. self.jid, 'queue', 'state', 'worker'))
1180
+ if queue_name == nil then
1181
+ error('Timeout(): Job does not exist')
1182
+ elseif state ~= 'running' then
1183
+ error('Timeout(): Job ' .. self.jid .. ' not running')
1184
+ else
1185
+ -- Time out the job
1186
+ self:history(now, 'timed-out')
1187
+ local queue = Qless.queue(queue_name)
1188
+ queue.locks.remove(self.jid)
1189
+ queue.work.add(now, math.huge, self.jid)
1190
+ redis.call('hmset', QlessJob.ns .. self.jid,
1191
+ 'state', 'stalled', 'expires', 0)
1192
+ local encoded = cjson.encode({
1193
+ jid = self.jid,
1194
+ event = 'lock_lost',
1195
+ worker = worker
1196
+ })
1197
+ Qless.publish('w:' .. worker, encoded)
1198
+ Qless.publish('log', encoded)
1199
+ return queue_name
1200
+ end
1201
+ end
1202
+
1203
+ -- Return whether or not this job exists
1204
+ function QlessJob:exists()
1205
+ return redis.call('exists', QlessJob.ns .. self.jid) == 1
1206
+ end
1207
+
1208
+ -- Get or append to history
1209
+ function QlessJob:history(now, what, item)
1210
+ -- First, check if there's an old-style history, and update it if there is
1211
+ local history = redis.call('hget', QlessJob.ns .. self.jid, 'history')
1212
+ if history then
1213
+ history = cjson.decode(history)
1214
+ for i, value in ipairs(history) do
1215
+ redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
1216
+ cjson.encode({math.floor(value.put), 'put', {q = value.q}}))
1217
+
1218
+ -- If there's any popped time
1219
+ if value.popped then
1220
+ redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
1221
+ cjson.encode({math.floor(value.popped), 'popped',
1222
+ {worker = value.worker}}))
1223
+ end
1224
+
1225
+ -- If there's any failure
1226
+ if value.failed then
1227
+ redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
1228
+ cjson.encode(
1229
+ {math.floor(value.failed), 'failed', nil}))
1230
+ end
1231
+
1232
+ -- If it was completed
1233
+ if value.done then
1234
+ redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
1235
+ cjson.encode(
1236
+ {math.floor(value.done), 'done', nil}))
1237
+ end
1238
+ end
1239
+ -- With all this ported forward, delete the old-style history
1240
+ redis.call('hdel', QlessJob.ns .. self.jid, 'history')
1241
+ end
1242
+
1243
+ -- Now to the meat of the function
1244
+ if what == nil then
1245
+ -- Get the history
1246
+ local response = {}
1247
+ for i, value in ipairs(redis.call('lrange',
1248
+ QlessJob.ns .. self.jid .. '-history', 0, -1)) do
1249
+ value = cjson.decode(value)
1250
+ local dict = value[3] or {}
1251
+ dict['when'] = value[1]
1252
+ dict['what'] = value[2]
1253
+ table.insert(response, dict)
1254
+ end
1255
+ return response
1256
+ else
1257
+ -- Append to the history. If the length of the history should be limited,
1258
+ -- then we'll truncate it.
1259
+ local count = tonumber(Qless.config.get('max-job-history', 100))
1260
+ if count > 0 then
1261
+ -- We'll always keep the first item around
1262
+ local obj = redis.call('lpop', QlessJob.ns .. self.jid .. '-history')
1263
+ redis.call('ltrim', QlessJob.ns .. self.jid .. '-history', -count + 2, -1)
1264
+ if obj ~= nil then
1265
+ redis.call('lpush', QlessJob.ns .. self.jid .. '-history', obj)
1266
+ end
1267
+ end
1268
+ return redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
1269
+ cjson.encode({math.floor(now), what, item}))
1270
+ end
1271
+ end
1272
+ -------------------------------------------------------------------------------
1273
+ -- Queue class
1274
+ -------------------------------------------------------------------------------
1275
+ -- Return a queue object
1276
+ function Qless.queue(name)
1277
+ assert(name, 'Queue(): no queue name provided')
1278
+ local queue = {}
1279
+ setmetatable(queue, QlessQueue)
1280
+ queue.name = name
1281
+
1282
+ -- Access to our work
1283
+ queue.work = {
1284
+ peek = function(count)
1285
+ if count == 0 then
1286
+ return {}
1287
+ end
1288
+ local jids = {}
1289
+ for index, jid in ipairs(redis.call(
1290
+ 'zrevrange', queue:prefix('work'), 0, count - 1)) do
1291
+ table.insert(jids, jid)
1292
+ end
1293
+ return jids
1294
+ end, remove = function(...)
1295
+ if #arg > 0 then
1296
+ return redis.call('zrem', queue:prefix('work'), unpack(arg))
1297
+ end
1298
+ end, add = function(now, priority, jid)
1299
+ return redis.call('zadd',
1300
+ queue:prefix('work'), priority - (now / 10000000000), jid)
1301
+ end, score = function(jid)
1302
+ return redis.call('zscore', queue:prefix('work'), jid)
1303
+ end, length = function()
1304
+ return redis.call('zcard', queue:prefix('work'))
1305
+ end
1306
+ }
1307
+
1308
+ -- Access to our locks
1309
+ queue.locks = {
1310
+ expired = function(now, offset, count)
1311
+ return redis.call('zrangebyscore',
1312
+ queue:prefix('locks'), -math.huge, now, 'LIMIT', offset, count)
1313
+ end, peek = function(now, offset, count)
1314
+ return redis.call('zrangebyscore', queue:prefix('locks'),
1315
+ now, math.huge, 'LIMIT', offset, count)
1316
+ end, add = function(expires, jid)
1317
+ redis.call('zadd', queue:prefix('locks'), expires, jid)
1318
+ end, remove = function(...)
1319
+ if #arg > 0 then
1320
+ return redis.call('zrem', queue:prefix('locks'), unpack(arg))
1321
+ end
1322
+ end, running = function(now)
1323
+ return redis.call('zcount', queue:prefix('locks'), now, math.huge)
1324
+ end, length = function(now)
1325
+ -- If a 'now' is provided, we're interested in how many are before
1326
+ -- that time
1327
+ if now then
1328
+ return redis.call('zcount', queue:prefix('locks'), 0, now)
1329
+ else
1330
+ return redis.call('zcard', queue:prefix('locks'))
1331
+ end
1332
+ end
1333
+ }
1334
+
1335
+ -- Access to our dependent jobs
1336
+ queue.depends = {
1337
+ peek = function(now, offset, count)
1338
+ return redis.call('zrange',
1339
+ queue:prefix('depends'), offset, offset + count - 1)
1340
+ end, add = function(now, jid)
1341
+ redis.call('zadd', queue:prefix('depends'), now, jid)
1342
+ end, remove = function(...)
1343
+ if #arg > 0 then
1344
+ return redis.call('zrem', queue:prefix('depends'), unpack(arg))
1345
+ end
1346
+ end, length = function()
1347
+ return redis.call('zcard', queue:prefix('depends'))
1348
+ end
1349
+ }
1350
+
1351
+ -- Access to our scheduled jobs
1352
+ queue.scheduled = {
1353
+ peek = function(now, offset, count)
1354
+ return redis.call('zrange',
1355
+ queue:prefix('scheduled'), offset, offset + count - 1)
1356
+ end, ready = function(now, offset, count)
1357
+ return redis.call('zrangebyscore',
1358
+ queue:prefix('scheduled'), 0, now, 'LIMIT', offset, count)
1359
+ end, add = function(when, jid)
1360
+ redis.call('zadd', queue:prefix('scheduled'), when, jid)
1361
+ end, remove = function(...)
1362
+ if #arg > 0 then
1363
+ return redis.call('zrem', queue:prefix('scheduled'), unpack(arg))
1364
+ end
1365
+ end, length = function()
1366
+ return redis.call('zcard', queue:prefix('scheduled'))
1367
+ end
1368
+ }
1369
+
1370
+ -- Access to our recurring jobs
1371
+ queue.recurring = {
1372
+ peek = function(now, offset, count)
1373
+ return redis.call('zrangebyscore', queue:prefix('recur'),
1374
+ 0, now, 'LIMIT', offset, count)
1375
+ end, ready = function(now, offset, count)
1376
+ end, add = function(when, jid)
1377
+ redis.call('zadd', queue:prefix('recur'), when, jid)
1378
+ end, remove = function(...)
1379
+ if #arg > 0 then
1380
+ return redis.call('zrem', queue:prefix('recur'), unpack(arg))
1381
+ end
1382
+ end, update = function(increment, jid)
1383
+ redis.call('zincrby', queue:prefix('recur'), increment, jid)
1384
+ end, score = function(jid)
1385
+ return redis.call('zscore', queue:prefix('recur'), jid)
1386
+ end, length = function()
1387
+ return redis.call('zcard', queue:prefix('recur'))
1388
+ end
1389
+ }
1390
+ return queue
1391
+ end
1392
+
1393
+ -- Return the prefix for this particular queue
1394
+ function QlessQueue:prefix(group)
1395
+ if group then
1396
+ return QlessQueue.ns..self.name..'-'..group
1397
+ else
1398
+ return QlessQueue.ns..self.name
1399
+ end
1400
+ end
1401
+
1402
+ -- Stats(now, date)
1403
+ -- ---------------------
1404
+ -- Return the current statistics for a given queue on a given date. The
1405
+ -- results are returned are a JSON blob:
1406
+ --
1407
+ --
1408
+ -- {
1409
+ -- # These are unimplemented as of yet
1410
+ -- 'failed': 3,
1411
+ -- 'retries': 5,
1412
+ -- 'wait' : {
1413
+ -- 'total' : ...,
1414
+ -- 'mean' : ...,
1415
+ -- 'variance' : ...,
1416
+ -- 'histogram': [
1417
+ -- ...
1418
+ -- ]
1419
+ -- }, 'run': {
1420
+ -- 'total' : ...,
1421
+ -- 'mean' : ...,
1422
+ -- 'variance' : ...,
1423
+ -- 'histogram': [
1424
+ -- ...
1425
+ -- ]
1426
+ -- }
1427
+ -- }
1428
+ --
1429
+ -- The histogram's data points are at the second resolution for the first
1430
+ -- minute, the minute resolution for the first hour, the 15-minute resolution
1431
+ -- for the first day, the hour resolution for the first 3 days, and then at
1432
+ -- the day resolution from there on out. The `histogram` key is a list of
1433
+ -- those values.
1434
+ function QlessQueue:stats(now, date)
1435
+ date = assert(tonumber(date),
1436
+ 'Stats(): Arg "date" missing or not a number: '.. (date or 'nil'))
1437
+
1438
+ -- The bin is midnight of the provided day
1439
+ -- 24 * 60 * 60 = 86400
1440
+ local bin = date - (date % 86400)
1441
+
1442
+ -- This a table of all the keys we want to use in order to produce a histogram
1443
+ local histokeys = {
1444
+ 's0','s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20','s21','s22','s23','s24','s25','s26','s27','s28','s29','s30','s31','s32','s33','s34','s35','s36','s37','s38','s39','s40','s41','s42','s43','s44','s45','s46','s47','s48','s49','s50','s51','s52','s53','s54','s55','s56','s57','s58','s59',
1445
+ 'm1','m2','m3','m4','m5','m6','m7','m8','m9','m10','m11','m12','m13','m14','m15','m16','m17','m18','m19','m20','m21','m22','m23','m24','m25','m26','m27','m28','m29','m30','m31','m32','m33','m34','m35','m36','m37','m38','m39','m40','m41','m42','m43','m44','m45','m46','m47','m48','m49','m50','m51','m52','m53','m54','m55','m56','m57','m58','m59',
1446
+ 'h1','h2','h3','h4','h5','h6','h7','h8','h9','h10','h11','h12','h13','h14','h15','h16','h17','h18','h19','h20','h21','h22','h23',
1447
+ 'd1','d2','d3','d4','d5','d6'
1448
+ }
1449
+
1450
+ local mkstats = function(name, bin, queue)
1451
+ -- The results we'll be sending back
1452
+ local results = {}
1453
+
1454
+ local key = 'ql:s:' .. name .. ':' .. bin .. ':' .. queue
1455
+ local count, mean, vk = unpack(redis.call('hmget', key, 'total', 'mean', 'vk'))
1456
+
1457
+ count = tonumber(count) or 0
1458
+ mean = tonumber(mean) or 0
1459
+ vk = tonumber(vk)
1460
+
1461
+ results.count = count or 0
1462
+ results.mean = mean or 0
1463
+ results.histogram = {}
1464
+
1465
+ if not count then
1466
+ results.std = 0
1467
+ else
1468
+ if count > 1 then
1469
+ results.std = math.sqrt(vk / (count - 1))
1470
+ else
1471
+ results.std = 0
1472
+ end
1473
+ end
1474
+
1475
+ local histogram = redis.call('hmget', key, unpack(histokeys))
1476
+ for i=1,#histokeys do
1477
+ table.insert(results.histogram, tonumber(histogram[i]) or 0)
1478
+ end
1479
+ return results
1480
+ end
1481
+
1482
+ local retries, failed, failures = unpack(redis.call('hmget', 'ql:s:stats:' .. bin .. ':' .. self.name, 'retries', 'failed', 'failures'))
1483
+ return {
1484
+ retries = tonumber(retries or 0),
1485
+ failed = tonumber(failed or 0),
1486
+ failures = tonumber(failures or 0),
1487
+ wait = mkstats('wait', bin, self.name),
1488
+ run = mkstats('run' , bin, self.name)
1489
+ }
1490
+ end
1491
+
1492
+ -- Peek
1493
+ -------
1494
+ -- Examine the next jobs that would be popped from the queue without actually
1495
+ -- popping them.
1496
+ function QlessQueue:peek(now, count)
1497
+ count = assert(tonumber(count),
1498
+ 'Peek(): Arg "count" missing or not a number: ' .. tostring(count))
1499
+
1500
+ -- These are the ids that we're going to return. We'll begin with any jobs
1501
+ -- that have lost their locks
1502
+ local jids = self.locks.expired(now, 0, count)
1503
+
1504
+ -- If we still need jobs in order to meet demand, then we should
1505
+ -- look for all the recurring jobs that need jobs run
1506
+ self:check_recurring(now, count - #jids)
1507
+
1508
+ -- Now we've checked __all__ the locks for this queue the could
1509
+ -- have expired, and are no more than the number requested. If
1510
+ -- we still need values in order to meet the demand, then we
1511
+ -- should check if any scheduled items, and if so, we should
1512
+ -- insert them to ensure correctness when pulling off the next
1513
+ -- unit of work.
1514
+ self:check_scheduled(now, count - #jids)
1515
+
1516
+ -- With these in place, we can expand this list of jids based on the work
1517
+ -- queue itself and the priorities therein
1518
+ table.extend(jids, self.work.peek(count - #jids))
1519
+
1520
+ return jids
1521
+ end
1522
+
1523
+ -- Return true if this queue is paused
1524
+ function QlessQueue:paused()
1525
+ return redis.call('sismember', 'ql:paused_queues', self.name) == 1
1526
+ end
1527
+
1528
+ -- Pause this queue
1529
+ --
1530
+ -- Note: long term, we have discussed adding a rate-limiting
1531
+ -- feature to qless-core, which would be more flexible and
1532
+ -- could be used for pausing (i.e. pause = set the rate to 0).
1533
+ -- For now, this is far simpler, but we should rewrite this
1534
+ -- in terms of the rate limiting feature if/when that is added.
1535
+ function QlessQueue.pause(now, ...)
1536
+ redis.call('sadd', 'ql:paused_queues', unpack(arg))
1537
+ end
1538
+
1539
+ -- Unpause this queue
1540
+ function QlessQueue.unpause(...)
1541
+ redis.call('srem', 'ql:paused_queues', unpack(arg))
1542
+ end
1543
+
1544
+ -- Checks for expired locks, scheduled and recurring jobs, returning any
1545
+ -- jobs that are ready to be processes
1546
+ function QlessQueue:pop(now, worker, count)
1547
+ assert(worker, 'Pop(): Arg "worker" missing')
1548
+ count = assert(tonumber(count),
1549
+ 'Pop(): Arg "count" missing or not a number: ' .. tostring(count))
1550
+
1551
+ -- We should find the heartbeat interval for this queue heartbeat
1552
+ local expires = now + tonumber(
1553
+ Qless.config.get(self.name .. '-heartbeat') or
1554
+ Qless.config.get('heartbeat', 60))
1555
+
1556
+ -- If this queue is paused, then return no jobs
1557
+ if self:paused() then
1558
+ return {}
1559
+ end
1560
+
1561
+ -- Make sure we this worker to the list of seen workers
1562
+ redis.call('zadd', 'ql:workers', now, worker)
1563
+
1564
+ -- Check our max concurrency, and limit the count
1565
+ local max_concurrency = tonumber(
1566
+ Qless.config.get(self.name .. '-max-concurrency', 0))
1567
+
1568
+ if max_concurrency > 0 then
1569
+ -- Allow at most max_concurrency - #running
1570
+ local allowed = math.max(0, max_concurrency - self.locks.running(now))
1571
+ count = math.min(allowed, count)
1572
+ if count == 0 then
1573
+ return {}
1574
+ end
1575
+ end
1576
+
1577
+ local jids = self:invalidate_locks(now, count)
1578
+ -- Now we've checked __all__ the locks for this queue the could
1579
+ -- have expired, and are no more than the number requested.
1580
+
1581
+ -- If we still need jobs in order to meet demand, then we should
1582
+ -- look for all the recurring jobs that need jobs run
1583
+ self:check_recurring(now, count - #jids)
1584
+
1585
+ -- If we still need values in order to meet the demand, then we
1586
+ -- should check if any scheduled items, and if so, we should
1587
+ -- insert them to ensure correctness when pulling off the next
1588
+ -- unit of work.
1589
+ self:check_scheduled(now, count - #jids)
1590
+
1591
+ -- With these in place, we can expand this list of jids based on the work
1592
+ -- queue itself and the priorities therein
1593
+ table.extend(jids, self.work.peek(count - #jids))
1594
+
1595
+ local state
1596
+ for index, jid in ipairs(jids) do
1597
+ local job = Qless.job(jid)
1598
+ state = unpack(job:data('state'))
1599
+ job:history(now, 'popped', {worker = worker})
1600
+
1601
+ -- Update the wait time statistics
1602
+ local time = tonumber(
1603
+ redis.call('hget', QlessJob.ns .. jid, 'time') or now)
1604
+ local waiting = now - time
1605
+ self:stat(now, 'wait', waiting)
1606
+ redis.call('hset', QlessJob.ns .. jid,
1607
+ 'time', string.format("%.20f", now))
1608
+
1609
+ -- Add this job to the list of jobs handled by this worker
1610
+ redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, jid)
1611
+
1612
+ -- Update the jobs data, and add its locks, and return the job
1613
+ job:update({
1614
+ worker = worker,
1615
+ expires = expires,
1616
+ state = 'running'
1617
+ })
1618
+
1619
+ self.locks.add(expires, jid)
1620
+
1621
+ local tracked = redis.call('zscore', 'ql:tracked', jid) ~= false
1622
+ if tracked then
1623
+ Qless.publish('popped', jid)
1624
+ end
1625
+ end
1626
+
1627
+ -- If we are returning any jobs, then we should remove them from the work
1628
+ -- queue
1629
+ self.work.remove(unpack(jids))
1630
+
1631
+ return jids
1632
+ end
1633
+
1634
+ -- Update the stats for this queue
1635
+ function QlessQueue:stat(now, stat, val)
1636
+ -- The bin is midnight of the provided day
1637
+ local bin = now - (now % 86400)
1638
+ local key = 'ql:s:' .. stat .. ':' .. bin .. ':' .. self.name
1639
+
1640
+ -- Get the current data
1641
+ local count, mean, vk = unpack(
1642
+ redis.call('hmget', key, 'total', 'mean', 'vk'))
1643
+
1644
+ -- If there isn't any data there presently, then we must initialize it
1645
+ count = count or 0
1646
+ if count == 0 then
1647
+ mean = val
1648
+ vk = 0
1649
+ count = 1
1650
+ else
1651
+ count = count + 1
1652
+ local oldmean = mean
1653
+ mean = mean + (val - mean) / count
1654
+ vk = vk + (val - mean) * (val - oldmean)
1655
+ end
1656
+
1657
+ -- Now, update the histogram
1658
+ -- - `s1`, `s2`, ..., -- second-resolution histogram counts
1659
+ -- - `m1`, `m2`, ..., -- minute-resolution
1660
+ -- - `h1`, `h2`, ..., -- hour-resolution
1661
+ -- - `d1`, `d2`, ..., -- day-resolution
1662
+ val = math.floor(val)
1663
+ if val < 60 then -- seconds
1664
+ redis.call('hincrby', key, 's' .. val, 1)
1665
+ elseif val < 3600 then -- minutes
1666
+ redis.call('hincrby', key, 'm' .. math.floor(val / 60), 1)
1667
+ elseif val < 86400 then -- hours
1668
+ redis.call('hincrby', key, 'h' .. math.floor(val / 3600), 1)
1669
+ else -- days
1670
+ redis.call('hincrby', key, 'd' .. math.floor(val / 86400), 1)
1671
+ end
1672
+ redis.call('hmset', key, 'total', count, 'mean', mean, 'vk', vk)
1673
+ end
1674
+
1675
+ -- Put(now, jid, klass, data, delay,
1676
+ -- [priority, p],
1677
+ -- [tags, t],
1678
+ -- [retries, r],
1679
+ -- [depends, '[...]'])
1680
+ -- -----------------------
1681
+ -- Insert a job into the queue with the given priority, tags, delay, klass and
1682
+ -- data.
1683
+ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...)
1684
+ assert(jid , 'Put(): Arg "jid" missing')
1685
+ assert(klass, 'Put(): Arg "klass" missing')
1686
+ local data = assert(cjson.decode(raw_data),
1687
+ 'Put(): Arg "data" missing or not JSON: ' .. tostring(raw_data))
1688
+ delay = assert(tonumber(delay),
1689
+ 'Put(): Arg "delay" not a number: ' .. tostring(delay))
1690
+
1691
+ -- Read in all the optional parameters. All of these must come in pairs, so
1692
+ -- if we have an odd number of extra args, raise an error
1693
+ if #arg % 2 == 1 then
1694
+ error('Odd number of additional args: ' .. tostring(arg))
1695
+ end
1696
+ local options = {}
1697
+ for i = 1, #arg, 2 do options[arg[i]] = arg[i + 1] end
1698
+
1699
+ -- Let's see what the old priority and tags were
1700
+ local job = Qless.job(jid)
1701
+ local priority, tags, oldqueue, state, failure, retries, oldworker =
1702
+ unpack(redis.call('hmget', QlessJob.ns .. jid, 'priority', 'tags',
1703
+ 'queue', 'state', 'failure', 'retries', 'worker'))
1704
+
1705
+ -- If there are old tags, then we should remove the tags this job has
1706
+ if tags then
1707
+ Qless.tag(now, 'remove', jid, unpack(cjson.decode(tags)))
1708
+ end
1709
+
1710
+ -- Sanity check on optional args
1711
+ retries = assert(tonumber(options['retries'] or retries or 5) ,
1712
+ 'Put(): Arg "retries" not a number: ' .. tostring(options['retries']))
1713
+ tags = assert(cjson.decode(options['tags'] or tags or '[]' ),
1714
+ 'Put(): Arg "tags" not JSON' .. tostring(options['tags']))
1715
+ priority = assert(tonumber(options['priority'] or priority or 0),
1716
+ 'Put(): Arg "priority" not a number' .. tostring(options['priority']))
1717
+ local depends = assert(cjson.decode(options['depends'] or '[]') ,
1718
+ 'Put(): Arg "depends" not JSON: ' .. tostring(options['depends']))
1719
+
1720
+ -- If the job has old dependencies, determine which dependencies are
1721
+ -- in the new dependencies but not in the old ones, and which are in the
1722
+ -- old ones but not in the new
1723
+ if #depends > 0 then
1724
+ -- This makes it easier to check if it's in the new list
1725
+ local new = {}
1726
+ for _, d in ipairs(depends) do new[d] = 1 end
1727
+
1728
+ -- Now find what's in the original, but not the new
1729
+ local original = redis.call(
1730
+ 'smembers', QlessJob.ns .. jid .. '-dependencies')
1731
+ for _, dep in pairs(original) do
1732
+ if new[dep] == nil then
1733
+ -- Remove k as a dependency
1734
+ redis.call('srem', QlessJob.ns .. dep .. '-dependents' , jid)
1735
+ redis.call('srem', QlessJob.ns .. jid .. '-dependencies', dep)
1736
+ end
1737
+ end
1738
+ end
1739
+
1740
+ -- Send out a log message
1741
+ Qless.publish('log', cjson.encode({
1742
+ jid = jid,
1743
+ event = 'put',
1744
+ queue = self.name
1745
+ }))
1746
+
1747
+ -- Update the history to include this new change
1748
+ job:history(now, 'put', {q = self.name})
1749
+
1750
+ -- If this item was previously in another queue, then we should remove it from there
1751
+ if oldqueue then
1752
+ local queue_obj = Qless.queue(oldqueue)
1753
+ queue_obj.work.remove(jid)
1754
+ queue_obj.locks.remove(jid)
1755
+ queue_obj.depends.remove(jid)
1756
+ queue_obj.scheduled.remove(jid)
1757
+ end
1758
+
1759
+ -- If this had previously been given out to a worker, make sure to remove it
1760
+ -- from that worker's jobs
1761
+ if oldworker and oldworker ~= '' then
1762
+ redis.call('zrem', 'ql:w:' .. oldworker .. ':jobs', jid)
1763
+ -- If it's a different worker that's putting this job, send a notification
1764
+ -- to the last owner of the job
1765
+ if oldworker ~= worker then
1766
+ -- We need to inform whatever worker had that job
1767
+ local encoded = cjson.encode({
1768
+ jid = jid,
1769
+ event = 'lock_lost',
1770
+ worker = oldworker
1771
+ })
1772
+ Qless.publish('w:' .. oldworker, encoded)
1773
+ Qless.publish('log', encoded)
1774
+ end
1775
+ end
1776
+
1777
+ -- If the job was previously in the 'completed' state, then we should
1778
+ -- remove it from being enqueued for destructination
1779
+ if state == 'complete' then
1780
+ redis.call('zrem', 'ql:completed', jid)
1781
+ end
1782
+
1783
+ -- Add this job to the list of jobs tagged with whatever tags were supplied
1784
+ for i, tag in ipairs(tags) do
1785
+ redis.call('zadd', 'ql:t:' .. tag, now, jid)
1786
+ redis.call('zincrby', 'ql:tags', 1, tag)
1787
+ end
1788
+
1789
+ -- If we're in the failed state, remove all of our data
1790
+ if state == 'failed' then
1791
+ failure = cjson.decode(failure)
1792
+ -- We need to make this remove it from the failed queues
1793
+ redis.call('lrem', 'ql:f:' .. failure.group, 0, jid)
1794
+ if redis.call('llen', 'ql:f:' .. failure.group) == 0 then
1795
+ redis.call('srem', 'ql:failures', failure.group)
1796
+ end
1797
+ -- The bin is midnight of the provided day
1798
+ -- 24 * 60 * 60 = 86400
1799
+ local bin = failure.when - (failure.when % 86400)
1800
+ -- We also need to decrement the stats about the queue on
1801
+ -- the day that this failure actually happened.
1802
+ redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. self.name, 'failed' , -1)
1803
+ end
1804
+
1805
+ -- First, let's save its data
1806
+ redis.call('hmset', QlessJob.ns .. jid,
1807
+ 'jid' , jid,
1808
+ 'klass' , klass,
1809
+ 'data' , raw_data,
1810
+ 'priority' , priority,
1811
+ 'tags' , cjson.encode(tags),
1812
+ 'state' , ((delay > 0) and 'scheduled') or 'waiting',
1813
+ 'worker' , '',
1814
+ 'expires' , 0,
1815
+ 'queue' , self.name,
1816
+ 'retries' , retries,
1817
+ 'remaining', retries,
1818
+ 'time' , string.format("%.20f", now))
1819
+
1820
+ -- These are the jids we legitimately have to wait on
1821
+ for i, j in ipairs(depends) do
1822
+ -- Make sure it's something other than 'nil' or complete.
1823
+ local state = redis.call('hget', QlessJob.ns .. j, 'state')
1824
+ if (state and state ~= 'complete') then
1825
+ redis.call('sadd', QlessJob.ns .. j .. '-dependents' , jid)
1826
+ redis.call('sadd', QlessJob.ns .. jid .. '-dependencies', j)
1827
+ end
1828
+ end
1829
+
1830
+ -- Now, if a delay was provided, and if it's in the future,
1831
+ -- then we'll have to schedule it. Otherwise, we're just
1832
+ -- going to add it to the work queue.
1833
+ if delay > 0 then
1834
+ if redis.call('scard', QlessJob.ns .. jid .. '-dependencies') > 0 then
1835
+ -- We've already put it in 'depends'. Now, we must just save the data
1836
+ -- for when it's scheduled
1837
+ self.depends.add(now, jid)
1838
+ redis.call('hmset', QlessJob.ns .. jid,
1839
+ 'state', 'depends',
1840
+ 'scheduled', now + delay)
1841
+ else
1842
+ self.scheduled.add(now + delay, jid)
1843
+ end
1844
+ else
1845
+ if redis.call('scard', QlessJob.ns .. jid .. '-dependencies') > 0 then
1846
+ self.depends.add(now, jid)
1847
+ redis.call('hset', QlessJob.ns .. jid, 'state', 'depends')
1848
+ else
1849
+ self.work.add(now, priority, jid)
1850
+ end
1851
+ end
1852
+
1853
+ -- Lastly, we're going to make sure that this item is in the
1854
+ -- set of known queues. We should keep this sorted by the
1855
+ -- order in which we saw each of these queues
1856
+ if redis.call('zscore', 'ql:queues', self.name) == false then
1857
+ redis.call('zadd', 'ql:queues', now, self.name)
1858
+ end
1859
+
1860
+ if redis.call('zscore', 'ql:tracked', jid) ~= false then
1861
+ Qless.publish('put', jid)
1862
+ end
1863
+
1864
+ return jid
1865
+ end
1866
+
1867
+ -- Move `count` jobs out of the failed state and into this queue
1868
+ function QlessQueue:unfail(now, group, count)
1869
+ assert(group, 'Unfail(): Arg "group" missing')
1870
+ count = assert(tonumber(count or 25),
1871
+ 'Unfail(): Arg "count" not a number: ' .. tostring(count))
1872
+
1873
+ -- Get up to that many jobs, and we'll put them in the appropriate queue
1874
+ local jids = redis.call('lrange', 'ql:f:' .. group, -count, -1)
1875
+
1876
+ -- And now set each job's state, and put it into the appropriate queue
1877
+ local toinsert = {}
1878
+ for index, jid in ipairs(jids) do
1879
+ local job = Qless.job(jid)
1880
+ local data = job:data()
1881
+ job:history(now, 'put', {q = self.name})
1882
+ redis.call('hmset', QlessJob.ns .. data.jid,
1883
+ 'state' , 'waiting',
1884
+ 'worker' , '',
1885
+ 'expires' , 0,
1886
+ 'queue' , self.name,
1887
+ 'remaining', data.retries or 5)
1888
+ self.work.add(now, data.priority, data.jid)
1889
+ end
1890
+
1891
+ -- Remove these jobs from the failed state
1892
+ redis.call('ltrim', 'ql:f:' .. group, 0, -count - 1)
1893
+ if (redis.call('llen', 'ql:f:' .. group) == 0) then
1894
+ redis.call('srem', 'ql:failures', group)
1895
+ end
1896
+
1897
+ return #jids
1898
+ end
1899
+
1900
+ -- Recur a job of type klass in this queue
1901
+ function QlessQueue:recur(now, jid, klass, raw_data, spec, ...)
1902
+ assert(jid , 'RecurringJob On(): Arg "jid" missing')
1903
+ assert(klass, 'RecurringJob On(): Arg "klass" missing')
1904
+ assert(spec , 'RecurringJob On(): Arg "spec" missing')
1905
+ local data = assert(cjson.decode(raw_data),
1906
+ 'RecurringJob On(): Arg "data" not JSON: ' .. tostring(raw_data))
1907
+
1908
+ -- At some point in the future, we may have different types of recurring
1909
+ -- jobs, but for the time being, we only have 'interval'-type jobs
1910
+ if spec == 'interval' then
1911
+ local interval = assert(tonumber(arg[1]),
1912
+ 'Recur(): Arg "interval" not a number: ' .. tostring(arg[1]))
1913
+ local offset = assert(tonumber(arg[2]),
1914
+ 'Recur(): Arg "offset" not a number: ' .. tostring(arg[2]))
1915
+ if interval <= 0 then
1916
+ error('Recur(): Arg "interval" must be greater than 0')
1917
+ end
1918
+
1919
+ -- Read in all the optional parameters. All of these must come in
1920
+ -- pairs, so if we have an odd number of extra args, raise an error
1921
+ if #arg % 2 == 1 then
1922
+ error('Odd number of additional args: ' .. tostring(arg))
1923
+ end
1924
+
1925
+ -- Read in all the optional parameters
1926
+ local options = {}
1927
+ for i = 3, #arg, 2 do options[arg[i]] = arg[i + 1] end
1928
+ options.tags = assert(cjson.decode(options.tags or '{}'),
1929
+ 'Recur(): Arg "tags" must be JSON string array: ' .. tostring(
1930
+ options.tags))
1931
+ options.priority = assert(tonumber(options.priority or 0),
1932
+ 'Recur(): Arg "priority" not a number: ' .. tostring(
1933
+ options.priority))
1934
+ options.retries = assert(tonumber(options.retries or 0),
1935
+ 'Recur(): Arg "retries" not a number: ' .. tostring(
1936
+ options.retries))
1937
+ options.backlog = assert(tonumber(options.backlog or 0),
1938
+ 'Recur(): Arg "backlog" not a number: ' .. tostring(
1939
+ options.backlog))
1940
+
1941
+ local count, old_queue = unpack(redis.call('hmget', 'ql:r:' .. jid, 'count', 'queue'))
1942
+ count = count or 0
1943
+
1944
+ -- If it has previously been in another queue, then we should remove
1945
+ -- some information about it
1946
+ if old_queue then
1947
+ Qless.queue(old_queue).recurring.remove(jid)
1948
+ end
1949
+
1950
+ -- Do some insertions
1951
+ redis.call('hmset', 'ql:r:' .. jid,
1952
+ 'jid' , jid,
1953
+ 'klass' , klass,
1954
+ 'data' , raw_data,
1955
+ 'priority', options.priority,
1956
+ 'tags' , cjson.encode(options.tags or {}),
1957
+ 'state' , 'recur',
1958
+ 'queue' , self.name,
1959
+ 'type' , 'interval',
1960
+ -- How many jobs we've spawned from this
1961
+ 'count' , count,
1962
+ 'interval', interval,
1963
+ 'retries' , options.retries,
1964
+ 'backlog' , options.backlog)
1965
+ -- Now, we should schedule the next run of the job
1966
+ self.recurring.add(now + offset, jid)
1967
+
1968
+ -- Lastly, we're going to make sure that this item is in the
1969
+ -- set of known queues. We should keep this sorted by the
1970
+ -- order in which we saw each of these queues
1971
+ if redis.call('zscore', 'ql:queues', self.name) == false then
1972
+ redis.call('zadd', 'ql:queues', now, self.name)
1973
+ end
1974
+
1975
+ return jid
1976
+ else
1977
+ error('Recur(): schedule type "' .. tostring(spec) .. '" unknown')
1978
+ end
1979
+ end
1980
+
1981
+ -- Return the length of the queue
1982
+ function QlessQueue:length()
1983
+ return self.locks.length() + self.work.length() + self.scheduled.length()
1984
+ end
1985
+
1986
+ -------------------------------------------------------------------------------
1987
+ -- Housekeeping methods
1988
+ -------------------------------------------------------------------------------
1989
+ -- Instantiate any recurring jobs that are ready
1990
+ function QlessQueue:check_recurring(now, count)
1991
+ -- This is how many jobs we've moved so far
1992
+ local moved = 0
1993
+ -- These are the recurring jobs that need work
1994
+ local r = self.recurring.peek(now, 0, count)
1995
+ for index, jid in ipairs(r) do
1996
+ -- For each of the jids that need jobs scheduled, first
1997
+ -- get the last time each of them was run, and then increment
1998
+ -- it by its interval. While this time is less than now,
1999
+ -- we need to keep putting jobs on the queue
2000
+ local klass, data, priority, tags, retries, interval, backlog = unpack(
2001
+ redis.call('hmget', 'ql:r:' .. jid, 'klass', 'data', 'priority',
2002
+ 'tags', 'retries', 'interval', 'backlog'))
2003
+ local _tags = cjson.decode(tags)
2004
+ local score = math.floor(tonumber(self.recurring.score(jid)))
2005
+ interval = tonumber(interval)
2006
+
2007
+ -- If the backlog is set for this job, then see if it's been a long
2008
+ -- time since the last pop
2009
+ backlog = tonumber(backlog or 0)
2010
+ if backlog ~= 0 then
2011
+ -- Check how many jobs we could concievably generate
2012
+ local num = ((now - score) / interval)
2013
+ if num > backlog then
2014
+ -- Update the score
2015
+ score = score + (
2016
+ math.ceil(num - backlog) * interval
2017
+ )
2018
+ end
2019
+ end
2020
+
2021
+ -- We're saving this value so that in the history, we can accurately
2022
+ -- reflect when the job would normally have been scheduled
2023
+ while (score <= now) and (moved < count) do
2024
+ local count = redis.call('hincrby', 'ql:r:' .. jid, 'count', 1)
2025
+ moved = moved + 1
2026
+
2027
+ local child_jid = jid .. '-' .. count
2028
+
2029
+ -- Add this job to the list of jobs tagged with whatever tags were
2030
+ -- supplied
2031
+ for i, tag in ipairs(_tags) do
2032
+ redis.call('zadd', 'ql:t:' .. tag, now, child_jid)
2033
+ redis.call('zincrby', 'ql:tags', 1, tag)
2034
+ end
2035
+
2036
+ -- First, let's save its data
2037
+ redis.call('hmset', QlessJob.ns .. child_jid,
2038
+ 'jid' , child_jid,
2039
+ 'klass' , klass,
2040
+ 'data' , data,
2041
+ 'priority' , priority,
2042
+ 'tags' , tags,
2043
+ 'state' , 'waiting',
2044
+ 'worker' , '',
2045
+ 'expires' , 0,
2046
+ 'queue' , self.name,
2047
+ 'retries' , retries,
2048
+ 'remaining' , retries,
2049
+ 'time' , string.format("%.20f", score),
2050
+ 'spawned_from_jid', jid)
2051
+ Qless.job(child_jid):history(score, 'put', {q = self.name})
2052
+
2053
+ -- Now, if a delay was provided, and if it's in the future,
2054
+ -- then we'll have to schedule it. Otherwise, we're just
2055
+ -- going to add it to the work queue.
2056
+ self.work.add(score, priority, child_jid)
2057
+
2058
+ score = score + interval
2059
+ self.recurring.add(score, jid)
2060
+ end
2061
+ end
2062
+ end
2063
+
2064
+ -- Check for any jobs that have been scheduled, and shovel them onto
2065
+ -- the work queue. Returns nothing, but afterwards, up to `count`
2066
+ -- scheduled jobs will be moved into the work queue
2067
+ function QlessQueue:check_scheduled(now, count)
2068
+ -- zadd is a list of arguments that we'll be able to use to
2069
+ -- insert into the work queue
2070
+ local scheduled = self.scheduled.ready(now, 0, count)
2071
+ for index, jid in ipairs(scheduled) do
2072
+ -- With these in hand, we'll have to go out and find the
2073
+ -- priorities of these jobs, and then we'll insert them
2074
+ -- into the work queue and then when that's complete, we'll
2075
+ -- remove them from the scheduled queue
2076
+ local priority = tonumber(
2077
+ redis.call('hget', QlessJob.ns .. jid, 'priority') or 0)
2078
+ self.work.add(now, priority, jid)
2079
+ self.scheduled.remove(jid)
2080
+
2081
+ -- We should also update them to have the state 'waiting'
2082
+ -- instead of 'scheduled'
2083
+ redis.call('hset', QlessJob.ns .. jid, 'state', 'waiting')
2084
+ end
2085
+ end
2086
+
2087
+ -- Check for and invalidate any locks that have been lost. Returns the
2088
+ -- list of jids that have been invalidated
2089
+ function QlessQueue:invalidate_locks(now, count)
2090
+ local jids = {}
2091
+ -- Iterate through all the expired locks and add them to the list
2092
+ -- of keys that we'll return
2093
+ for index, jid in ipairs(self.locks.expired(now, 0, count)) do
2094
+ -- Remove this job from the jobs that the worker that was running it
2095
+ -- has
2096
+ local worker, failure = unpack(
2097
+ redis.call('hmget', QlessJob.ns .. jid, 'worker', 'failure'))
2098
+ redis.call('zrem', 'ql:w:' .. worker .. ':jobs', jid)
2099
+
2100
+ -- We'll provide a grace period after jobs time out for them to give
2101
+ -- some indication of the failure mode. After that time, however, we'll
2102
+ -- consider the worker dust in the wind
2103
+ local grace_period = tonumber(Qless.config.get('grace-period'))
2104
+
2105
+ -- Whether or not we've already sent a coutesy message
2106
+ local courtesy_sent = tonumber(
2107
+ redis.call('hget', QlessJob.ns .. jid, 'grace') or 0)
2108
+
2109
+ -- If the remaining value is an odd multiple of 0.5, then we'll assume
2110
+ -- that we're just sending the message. Otherwise, it's time to
2111
+ -- actually hand out the work to another worker
2112
+ local send_message = (courtesy_sent ~= 1)
2113
+ local invalidate = not send_message
2114
+
2115
+ -- If the grace period has been disabled, then we'll do both.
2116
+ if grace_period <= 0 then
2117
+ send_message = true
2118
+ invalidate = true
2119
+ end
2120
+
2121
+ if send_message then
2122
+ -- This is where we supply a courtesy message and give the worker
2123
+ -- time to provide a failure message
2124
+ if redis.call('zscore', 'ql:tracked', jid) ~= false then
2125
+ Qless.publish('stalled', jid)
2126
+ end
2127
+ Qless.job(jid):history(now, 'timed-out')
2128
+ redis.call('hset', QlessJob.ns .. jid, 'grace', 1)
2129
+
2130
+ -- Send a message to let the worker know that its lost its lock on
2131
+ -- the job
2132
+ local encoded = cjson.encode({
2133
+ jid = jid,
2134
+ event = 'lock_lost',
2135
+ worker = worker
2136
+ })
2137
+ Qless.publish('w:' .. worker, encoded)
2138
+ Qless.publish('log', encoded)
2139
+ self.locks.add(now + grace_period, jid)
2140
+
2141
+ -- If we got any expired locks, then we should increment the
2142
+ -- number of retries for this stage for this bin. The bin is
2143
+ -- midnight of the provided day
2144
+ local bin = now - (now % 86400)
2145
+ redis.call('hincrby',
2146
+ 'ql:s:stats:' .. bin .. ':' .. self.name, 'retries', 1)
2147
+ end
2148
+
2149
+ if invalidate then
2150
+ -- Unset the grace period attribute so that next time we'll send
2151
+ -- the grace period
2152
+ redis.call('hdel', QlessJob.ns .. jid, 'grace', 0)
2153
+
2154
+ -- See how many remaining retries the job has
2155
+ local remaining = tonumber(redis.call(
2156
+ 'hincrby', QlessJob.ns .. jid, 'remaining', -1))
2157
+
2158
+ -- This is where we actually have to time out the work
2159
+ if remaining < 0 then
2160
+ -- Now remove the instance from the schedule, and work queues
2161
+ -- for the queue it's in
2162
+ self.work.remove(jid)
2163
+ self.locks.remove(jid)
2164
+ self.scheduled.remove(jid)
2165
+
2166
+ local group = 'failed-retries-' .. Qless.job(jid):data()['queue']
2167
+ local job = Qless.job(jid)
2168
+ job:history(now, 'failed', {group = group})
2169
+ redis.call('hmset', QlessJob.ns .. jid, 'state', 'failed',
2170
+ 'worker', '',
2171
+ 'expires', '')
2172
+ -- If the failure has not already been set, then set it
2173
+ redis.call('hset', QlessJob.ns .. jid,
2174
+ 'failure', cjson.encode({
2175
+ ['group'] = group,
2176
+ ['message'] =
2177
+ 'Job exhausted retries in queue "' .. self.name .. '"',
2178
+ ['when'] = now,
2179
+ ['worker'] = unpack(job:data('worker'))
2180
+ }))
2181
+
2182
+ -- Add this type of failure to the list of failures
2183
+ redis.call('sadd', 'ql:failures', group)
2184
+ -- And add this particular instance to the failed types
2185
+ redis.call('lpush', 'ql:f:' .. group, jid)
2186
+
2187
+ if redis.call('zscore', 'ql:tracked', jid) ~= false then
2188
+ Qless.publish('failed', jid)
2189
+ end
2190
+ Qless.publish('log', cjson.encode({
2191
+ jid = jid,
2192
+ event = 'failed',
2193
+ group = group,
2194
+ worker = worker,
2195
+ message =
2196
+ 'Job exhausted retries in queue "' .. self.name .. '"'
2197
+ }))
2198
+
2199
+ -- Increment the count of the failed jobs
2200
+ local bin = now - (now % 86400)
2201
+ redis.call('hincrby',
2202
+ 'ql:s:stats:' .. bin .. ':' .. self.name, 'failures', 1)
2203
+ redis.call('hincrby',
2204
+ 'ql:s:stats:' .. bin .. ':' .. self.name, 'failed' , 1)
2205
+ else
2206
+ table.insert(jids, jid)
2207
+ end
2208
+ end
2209
+ end
2210
+
2211
+ return jids
2212
+ end
2213
+
2214
+ -- Forget the provided queues. As in, remove them from the list of known queues
2215
+ function QlessQueue.deregister(...)
2216
+ redis.call('zrem', Qless.ns .. 'queues', unpack(arg))
2217
+ end
2218
+
2219
+ -- Return information about a particular queue, or all queues
2220
+ -- [
2221
+ -- {
2222
+ -- 'name': 'testing',
2223
+ -- 'stalled': 2,
2224
+ -- 'waiting': 5,
2225
+ -- 'running': 5,
2226
+ -- 'scheduled': 10,
2227
+ -- 'depends': 5,
2228
+ -- 'recurring': 0
2229
+ -- }, {
2230
+ -- ...
2231
+ -- }
2232
+ -- ]
2233
+ function QlessQueue.counts(now, name)
2234
+ if name then
2235
+ local queue = Qless.queue(name)
2236
+ local stalled = queue.locks.length(now)
2237
+ -- Check for any scheduled jobs that need to be moved
2238
+ queue:check_scheduled(now, queue.scheduled.length())
2239
+ return {
2240
+ name = name,
2241
+ waiting = queue.work.length(),
2242
+ stalled = stalled,
2243
+ running = queue.locks.length() - stalled,
2244
+ scheduled = queue.scheduled.length(),
2245
+ depends = queue.depends.length(),
2246
+ recurring = queue.recurring.length(),
2247
+ paused = queue:paused()
2248
+ }
2249
+ else
2250
+ local queues = redis.call('zrange', 'ql:queues', 0, -1)
2251
+ local response = {}
2252
+ for index, qname in ipairs(queues) do
2253
+ table.insert(response, QlessQueue.counts(now, qname))
2254
+ end
2255
+ return response
2256
+ end
2257
+ end
2258
+ -- Get all the attributes of this particular job
2259
+ function QlessRecurringJob:data()
2260
+ local job = redis.call(
2261
+ 'hmget', 'ql:r:' .. self.jid, 'jid', 'klass', 'state', 'queue',
2262
+ 'priority', 'interval', 'retries', 'count', 'data', 'tags', 'backlog')
2263
+
2264
+ if not job[1] then
2265
+ return nil
2266
+ end
2267
+
2268
+ return {
2269
+ jid = job[1],
2270
+ klass = job[2],
2271
+ state = job[3],
2272
+ queue = job[4],
2273
+ priority = tonumber(job[5]),
2274
+ interval = tonumber(job[6]),
2275
+ retries = tonumber(job[7]),
2276
+ count = tonumber(job[8]),
2277
+ data = job[9],
2278
+ tags = cjson.decode(job[10]),
2279
+ backlog = tonumber(job[11] or 0)
2280
+ }
2281
+ end
2282
+
2283
+ -- Update the recurring job data. Key can be:
2284
+ -- - priority
2285
+ -- - interval
2286
+ -- - retries
2287
+ -- - data
2288
+ -- - klass
2289
+ -- - queue
2290
+ -- - backlog
2291
+ function QlessRecurringJob:update(now, ...)
2292
+ local options = {}
2293
+ -- Make sure that the job exists
2294
+ if redis.call('exists', 'ql:r:' .. self.jid) ~= 0 then
2295
+ for i = 1, #arg, 2 do
2296
+ local key = arg[i]
2297
+ local value = arg[i+1]
2298
+ assert(value, 'No value provided for ' .. tostring(key))
2299
+ if key == 'priority' or key == 'interval' or key == 'retries' then
2300
+ value = assert(tonumber(value), 'Recur(): Arg "' .. key .. '" must be a number: ' .. tostring(value))
2301
+ -- If the command is 'interval', then we need to update the
2302
+ -- time when it should next be scheduled
2303
+ if key == 'interval' then
2304
+ local queue, interval = unpack(redis.call('hmget', 'ql:r:' .. self.jid, 'queue', 'interval'))
2305
+ Qless.queue(queue).recurring.update(
2306
+ value - tonumber(interval), self.jid)
2307
+ end
2308
+ redis.call('hset', 'ql:r:' .. self.jid, key, value)
2309
+ elseif key == 'data' then
2310
+ assert(cjson.decode(value), 'Recur(): Arg "data" is not JSON-encoded: ' .. tostring(value))
2311
+ redis.call('hset', 'ql:r:' .. self.jid, 'data', value)
2312
+ elseif key == 'klass' then
2313
+ redis.call('hset', 'ql:r:' .. self.jid, 'klass', value)
2314
+ elseif key == 'queue' then
2315
+ local queue_obj = Qless.queue(
2316
+ redis.call('hget', 'ql:r:' .. self.jid, 'queue'))
2317
+ local score = queue_obj.recurring.score(self.jid)
2318
+ queue_obj.recurring.remove(self.jid)
2319
+ Qless.queue(value).recurring.add(score, self.jid)
2320
+ redis.call('hset', 'ql:r:' .. self.jid, 'queue', value)
2321
+ -- If we don't already know about the queue, learn about it
2322
+ if redis.call('zscore', 'ql:queues', value) == false then
2323
+ redis.call('zadd', 'ql:queues', now, value)
2324
+ end
2325
+ elseif key == 'backlog' then
2326
+ value = assert(tonumber(value),
2327
+ 'Recur(): Arg "backlog" not a number: ' .. tostring(value))
2328
+ redis.call('hset', 'ql:r:' .. self.jid, 'backlog', value)
2329
+ else
2330
+ error('Recur(): Unrecognized option "' .. key .. '"')
2331
+ end
2332
+ end
2333
+ return true
2334
+ else
2335
+ error('Recur(): No recurring job ' .. self.jid)
2336
+ end
2337
+ end
2338
+
2339
+ -- Tags this recurring job with the provided tags
2340
+ function QlessRecurringJob:tag(...)
2341
+ local tags = redis.call('hget', 'ql:r:' .. self.jid, 'tags')
2342
+ -- If the job has been canceled / deleted, then return false
2343
+ if tags then
2344
+ -- Decode the json blob, convert to dictionary
2345
+ tags = cjson.decode(tags)
2346
+ local _tags = {}
2347
+ for i,v in ipairs(tags) do _tags[v] = true end
2348
+
2349
+ -- Otherwise, add the job to the sorted set with that tags
2350
+ for i=1,#arg do if _tags[arg[i]] == nil then table.insert(tags, arg[i]) end end
2351
+
2352
+ tags = cjson.encode(tags)
2353
+ redis.call('hset', 'ql:r:' .. self.jid, 'tags', tags)
2354
+ return tags
2355
+ else
2356
+ error('Tag(): Job ' .. self.jid .. ' does not exist')
2357
+ end
2358
+ end
2359
+
2360
+ -- Removes a tag from the recurring job
2361
+ function QlessRecurringJob:untag(...)
2362
+ -- Get the existing tags
2363
+ local tags = redis.call('hget', 'ql:r:' .. self.jid, 'tags')
2364
+ -- If the job has been canceled / deleted, then return false
2365
+ if tags then
2366
+ -- Decode the json blob, convert to dictionary
2367
+ tags = cjson.decode(tags)
2368
+ local _tags = {}
2369
+ -- Make a hash
2370
+ for i,v in ipairs(tags) do _tags[v] = true end
2371
+ -- Delete these from the hash
2372
+ for i = 1,#arg do _tags[arg[i]] = nil end
2373
+ -- Back into a list
2374
+ local results = {}
2375
+ for i, tag in ipairs(tags) do if _tags[tag] then table.insert(results, tag) end end
2376
+ -- json encode them, set, and return
2377
+ tags = cjson.encode(results)
2378
+ redis.call('hset', 'ql:r:' .. self.jid, 'tags', tags)
2379
+ return tags
2380
+ else
2381
+ error('Untag(): Job ' .. self.jid .. ' does not exist')
2382
+ end
2383
+ end
2384
+
2385
+ -- Stop further occurrences of this job
2386
+ function QlessRecurringJob:unrecur()
2387
+ -- First, find out what queue it was attached to
2388
+ local queue = redis.call('hget', 'ql:r:' .. self.jid, 'queue')
2389
+ if queue then
2390
+ -- Now, delete it from the queue it was attached to, and delete the
2391
+ -- thing itself
2392
+ Qless.queue(queue).recurring.remove(self.jid)
2393
+ redis.call('del', 'ql:r:' .. self.jid)
2394
+ return true
2395
+ else
2396
+ return true
2397
+ end
2398
+ end
2399
+ -- Deregisters these workers from the list of known workers
2400
+ function QlessWorker.deregister(...)
2401
+ redis.call('zrem', 'ql:workers', unpack(arg))
2402
+ end
2403
+
2404
+ -- Provide data about all the workers, or if a specific worker is provided,
2405
+ -- then which jobs that worker is responsible for. If no worker is provided,
2406
+ -- expect a response of the form:
2407
+ --
2408
+ -- [
2409
+ -- # This is sorted by the recency of activity from that worker
2410
+ -- {
2411
+ -- 'name' : 'hostname1-pid1',
2412
+ -- 'jobs' : 20,
2413
+ -- 'stalled': 0
2414
+ -- }, {
2415
+ -- ...
2416
+ -- }
2417
+ -- ]
2418
+ --
2419
+ -- If a worker id is provided, then expect a response of the form:
2420
+ --
2421
+ -- {
2422
+ -- 'jobs': [
2423
+ -- jid1,
2424
+ -- jid2,
2425
+ -- ...
2426
+ -- ], 'stalled': [
2427
+ -- jid1,
2428
+ -- ...
2429
+ -- ]
2430
+ -- }
2431
+ --
2432
+ function QlessWorker.counts(now, worker)
2433
+ -- Clean up all the workers' job lists if they're too old. This is
2434
+ -- determined by the `max-worker-age` configuration, defaulting to the
2435
+ -- last day. Seems like a 'reasonable' default
2436
+ local interval = tonumber(Qless.config.get('max-worker-age', 86400))
2437
+
2438
+ local workers = redis.call('zrangebyscore', 'ql:workers', 0, now - interval)
2439
+ for index, worker in ipairs(workers) do
2440
+ redis.call('del', 'ql:w:' .. worker .. ':jobs')
2441
+ end
2442
+
2443
+ -- And now remove them from the list of known workers
2444
+ redis.call('zremrangebyscore', 'ql:workers', 0, now - interval)
2445
+
2446
+ if worker then
2447
+ return {
2448
+ jobs = redis.call('zrevrangebyscore', 'ql:w:' .. worker .. ':jobs', now + 8640000, now),
2449
+ stalled = redis.call('zrevrangebyscore', 'ql:w:' .. worker .. ':jobs', now, 0)
2450
+ }
2451
+ else
2452
+ local response = {}
2453
+ local workers = redis.call('zrevrange', 'ql:workers', 0, -1)
2454
+ for index, worker in ipairs(workers) do
2455
+ table.insert(response, {
2456
+ name = worker,
2457
+ jobs = redis.call('zcount', 'ql:w:' .. worker .. ':jobs', now, now + 8640000),
2458
+ stalled = redis.call('zcount', 'ql:w:' .. worker .. ':jobs', 0, now)
2459
+ })
2460
+ end
2461
+ return response
2462
+ end
2463
+ end