qless 0.9.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data/Gemfile +9 -3
  2. data/README.md +70 -25
  3. data/Rakefile +125 -9
  4. data/exe/install_phantomjs +21 -0
  5. data/lib/qless.rb +115 -76
  6. data/lib/qless/config.rb +11 -9
  7. data/lib/qless/failure_formatter.rb +43 -0
  8. data/lib/qless/job.rb +201 -102
  9. data/lib/qless/job_reservers/ordered.rb +7 -1
  10. data/lib/qless/job_reservers/round_robin.rb +16 -6
  11. data/lib/qless/job_reservers/shuffled_round_robin.rb +9 -2
  12. data/lib/qless/lua/qless-lib.lua +2463 -0
  13. data/lib/qless/lua/qless.lua +2012 -0
  14. data/lib/qless/lua_script.rb +63 -12
  15. data/lib/qless/middleware/memory_usage_monitor.rb +62 -0
  16. data/lib/qless/middleware/metriks.rb +45 -0
  17. data/lib/qless/middleware/redis_reconnect.rb +6 -3
  18. data/lib/qless/middleware/requeue_exceptions.rb +94 -0
  19. data/lib/qless/middleware/retry_exceptions.rb +38 -9
  20. data/lib/qless/middleware/sentry.rb +3 -7
  21. data/lib/qless/middleware/timeout.rb +64 -0
  22. data/lib/qless/queue.rb +90 -55
  23. data/lib/qless/server.rb +177 -130
  24. data/lib/qless/server/views/_job.erb +33 -15
  25. data/lib/qless/server/views/completed.erb +11 -0
  26. data/lib/qless/server/views/layout.erb +70 -11
  27. data/lib/qless/server/views/overview.erb +93 -53
  28. data/lib/qless/server/views/queue.erb +9 -8
  29. data/lib/qless/server/views/queues.erb +18 -1
  30. data/lib/qless/subscriber.rb +37 -22
  31. data/lib/qless/tasks.rb +5 -10
  32. data/lib/qless/test_helpers/worker_helpers.rb +55 -0
  33. data/lib/qless/version.rb +3 -1
  34. data/lib/qless/worker.rb +4 -413
  35. data/lib/qless/worker/base.rb +247 -0
  36. data/lib/qless/worker/forking.rb +245 -0
  37. data/lib/qless/worker/serial.rb +41 -0
  38. metadata +135 -52
  39. data/lib/qless/qless-core/cancel.lua +0 -101
  40. data/lib/qless/qless-core/complete.lua +0 -233
  41. data/lib/qless/qless-core/config.lua +0 -56
  42. data/lib/qless/qless-core/depends.lua +0 -65
  43. data/lib/qless/qless-core/deregister_workers.lua +0 -12
  44. data/lib/qless/qless-core/fail.lua +0 -117
  45. data/lib/qless/qless-core/failed.lua +0 -83
  46. data/lib/qless/qless-core/get.lua +0 -37
  47. data/lib/qless/qless-core/heartbeat.lua +0 -51
  48. data/lib/qless/qless-core/jobs.lua +0 -41
  49. data/lib/qless/qless-core/pause.lua +0 -18
  50. data/lib/qless/qless-core/peek.lua +0 -165
  51. data/lib/qless/qless-core/pop.lua +0 -314
  52. data/lib/qless/qless-core/priority.lua +0 -32
  53. data/lib/qless/qless-core/put.lua +0 -169
  54. data/lib/qless/qless-core/qless-lib.lua +0 -2354
  55. data/lib/qless/qless-core/qless.lua +0 -1862
  56. data/lib/qless/qless-core/queues.lua +0 -58
  57. data/lib/qless/qless-core/recur.lua +0 -190
  58. data/lib/qless/qless-core/retry.lua +0 -73
  59. data/lib/qless/qless-core/stats.lua +0 -92
  60. data/lib/qless/qless-core/tag.lua +0 -100
  61. data/lib/qless/qless-core/track.lua +0 -79
  62. data/lib/qless/qless-core/unfail.lua +0 -54
  63. data/lib/qless/qless-core/unpause.lua +0 -12
  64. data/lib/qless/qless-core/workers.lua +0 -69
  65. data/lib/qless/wait_until.rb +0 -19
@@ -1,3 +1,5 @@
1
+ # Encoding: utf-8
2
+
1
3
  module Qless
2
4
  module JobReservers
3
5
  class Ordered
@@ -15,8 +17,12 @@ module Qless
15
17
  nil
16
18
  end
17
19
 
20
+ def prep_for_work!
21
+ # nothing here on purpose
22
+ end
23
+
18
24
  def description
19
- @description ||= @queues.map(&:name).join(', ') + " (ordered)"
25
+ @description ||= @queues.map(&:name).join(', ') + ' (ordered)'
20
26
  end
21
27
  end
22
28
  end
@@ -1,5 +1,8 @@
1
+ # Encoding: utf-8
2
+
1
3
  module Qless
2
4
  module JobReservers
5
+ # Round-robins through all the provided queues
3
6
  class RoundRobin
4
7
  attr_reader :queues
5
8
 
@@ -11,20 +14,28 @@ module Qless
11
14
 
12
15
  def reserve
13
16
  @num_queues.times do |i|
14
- if job = next_queue.pop
15
- return job
16
- end
17
+ job = next_queue.pop
18
+ return job if job
17
19
  end
18
20
  nil
19
21
  end
20
22
 
23
+ def prep_for_work!
24
+ # nothing here on purpose
25
+ end
26
+
21
27
  def description
22
- @description ||= @queues.map(&:name).join(', ') + " (#{self.class::TYPE_DESCRIPTION})"
28
+ @description ||=
29
+ @queues.map(&:name).join(', ') + " (#{self.class::TYPE_DESCRIPTION})"
30
+ end
31
+
32
+ def reset_description!
33
+ @description = nil
23
34
  end
24
35
 
25
36
  private
26
37
 
27
- TYPE_DESCRIPTION = "round robin"
38
+ TYPE_DESCRIPTION = 'round robin'
28
39
 
29
40
  def next_queue
30
41
  @last_popped_queue_index = (@last_popped_queue_index + 1) % @num_queues
@@ -33,4 +44,3 @@ module Qless
33
44
  end
34
45
  end
35
46
  end
36
-
@@ -1,14 +1,21 @@
1
+ # Encoding: utf-8
2
+
1
3
  require 'qless/job_reservers/round_robin'
2
4
 
3
5
  module Qless
4
6
  module JobReservers
7
+ # Like round-robin but shuffles the order of the queues
5
8
  class ShuffledRoundRobin < RoundRobin
6
9
  def initialize(queues)
7
10
  super(queues.shuffle)
8
11
  end
9
12
 
10
- TYPE_DESCRIPTION = "shuffled round robin"
13
+ def prep_for_work!
14
+ @queues = @queues.shuffle
15
+ reset_description!
16
+ end
17
+
18
+ TYPE_DESCRIPTION = 'shuffled round robin'
11
19
  end
12
20
  end
13
21
  end
14
-
@@ -0,0 +1,2463 @@
1
+ -- Current SHA: 525c39000dc71df53a3502491cb4daf0e1128f1d
2
+ -- This is a generated file
3
+ -------------------------------------------------------------------------------
4
+ -- Forward declarations to make everything happy
5
+ -------------------------------------------------------------------------------
6
+ local Qless = {
7
+ ns = 'ql:'
8
+ }
9
+
10
+ -- Queue forward delcaration
11
+ local QlessQueue = {
12
+ ns = Qless.ns .. 'q:'
13
+ }
14
+ QlessQueue.__index = QlessQueue
15
+
16
+ -- Worker forward declaration
17
+ local QlessWorker = {
18
+ ns = Qless.ns .. 'w:'
19
+ }
20
+ QlessWorker.__index = QlessWorker
21
+
22
+ -- Job forward declaration
23
+ local QlessJob = {
24
+ ns = Qless.ns .. 'j:'
25
+ }
26
+ QlessJob.__index = QlessJob
27
+
28
+ -- RecurringJob forward declaration
29
+ local QlessRecurringJob = {}
30
+ QlessRecurringJob.__index = QlessRecurringJob
31
+
32
+ -- Config forward declaration
33
+ Qless.config = {}
34
+
35
+ -- Extend a table. This comes up quite frequently
36
+ function table.extend(self, other)
37
+ for i, v in ipairs(other) do
38
+ table.insert(self, v)
39
+ end
40
+ end
41
+
42
+ -- This is essentially the same as redis' publish, but it prefixes the channel
43
+ -- with the Qless namespace
44
+ function Qless.publish(channel, message)
45
+ redis.call('publish', Qless.ns .. channel, message)
46
+ end
47
+
48
+ -- Return a job object given its job id
49
+ function Qless.job(jid)
50
+ assert(jid, 'Job(): no jid provided')
51
+ local job = {}
52
+ setmetatable(job, QlessJob)
53
+ job.jid = jid
54
+ return job
55
+ end
56
+
57
+ -- Return a recurring job object
58
+ function Qless.recurring(jid)
59
+ assert(jid, 'Recurring(): no jid provided')
60
+ local job = {}
61
+ setmetatable(job, QlessRecurringJob)
62
+ job.jid = jid
63
+ return job
64
+ end
65
+
66
+ -- Failed([group, [start, [limit]]])
67
+ -- ------------------------------------
68
+ -- If no group is provided, this returns a JSON blob of the counts of the
69
+ -- various groups of failures known. If a group is provided, it will report up
70
+ -- to `limit` from `start` of the jobs affected by that issue.
71
+ --
72
+ -- # If no group, then...
73
+ -- {
74
+ -- 'group1': 1,
75
+ -- 'group2': 5,
76
+ -- ...
77
+ -- }
78
+ --
79
+ -- # If a group is provided, then...
80
+ -- {
81
+ -- 'total': 20,
82
+ -- 'jobs': [
83
+ -- {
84
+ -- # All the normal keys for a job
85
+ -- 'jid': ...,
86
+ -- 'data': ...
87
+ -- # The message for this particular instance
88
+ -- 'message': ...,
89
+ -- 'group': ...,
90
+ -- }, ...
91
+ -- ]
92
+ -- }
93
+ --
94
+ function Qless.failed(group, start, limit)
95
+ start = assert(tonumber(start or 0),
96
+ 'Failed(): Arg "start" is not a number: ' .. (start or 'nil'))
97
+ limit = assert(tonumber(limit or 25),
98
+ 'Failed(): Arg "limit" is not a number: ' .. (limit or 'nil'))
99
+
100
+ if group then
101
+ -- If a group was provided, then we should do paginated lookup
102
+ return {
103
+ total = redis.call('llen', 'ql:f:' .. group),
104
+ jobs = redis.call('lrange', 'ql:f:' .. group, start, start + limit - 1)
105
+ }
106
+ else
107
+ -- Otherwise, we should just list all the known failure groups we have
108
+ local response = {}
109
+ local groups = redis.call('smembers', 'ql:failures')
110
+ for index, group in ipairs(groups) do
111
+ response[group] = redis.call('llen', 'ql:f:' .. group)
112
+ end
113
+ return response
114
+ end
115
+ end
116
+
117
+ -- Jobs(now, 'complete', [offset, [count]])
118
+ -- Jobs(now, (
119
+ -- 'stalled' | 'running' | 'scheduled' | 'depends', 'recurring'
120
+ -- ), queue, [offset, [count]])
121
+ -------------------------------------------------------------------------------
122
+ -- Return all the job ids currently considered to be in the provided state
123
+ -- in a particular queue. The response is a list of job ids:
124
+ --
125
+ -- [
126
+ -- jid1,
127
+ -- jid2,
128
+ -- ...
129
+ -- ]
130
+ function Qless.jobs(now, state, ...)
131
+ assert(state, 'Jobs(): Arg "state" missing')
132
+ if state == 'complete' then
133
+ local offset = assert(tonumber(arg[1] or 0),
134
+ 'Jobs(): Arg "offset" not a number: ' .. tostring(arg[1]))
135
+ local count = assert(tonumber(arg[2] or 25),
136
+ 'Jobs(): Arg "count" not a number: ' .. tostring(arg[2]))
137
+ return redis.call('zrevrange', 'ql:completed', offset,
138
+ offset + count - 1)
139
+ else
140
+ local name = assert(arg[1], 'Jobs(): Arg "queue" missing')
141
+ local offset = assert(tonumber(arg[2] or 0),
142
+ 'Jobs(): Arg "offset" not a number: ' .. tostring(arg[2]))
143
+ local count = assert(tonumber(arg[3] or 25),
144
+ 'Jobs(): Arg "count" not a number: ' .. tostring(arg[3]))
145
+
146
+ local queue = Qless.queue(name)
147
+ if state == 'running' then
148
+ return queue.locks.peek(now, offset, count)
149
+ elseif state == 'stalled' then
150
+ return queue.locks.expired(now, offset, count)
151
+ elseif state == 'scheduled' then
152
+ queue:check_scheduled(now, queue.scheduled.length())
153
+ return queue.scheduled.peek(now, offset, count)
154
+ elseif state == 'depends' then
155
+ return queue.depends.peek(now, offset, count)
156
+ elseif state == 'recurring' then
157
+ return queue.recurring.peek(math.huge, offset, count)
158
+ else
159
+ error('Jobs(): Unknown type "' .. state .. '"')
160
+ end
161
+ end
162
+ end
163
+
164
+ -- Track()
165
+ -- Track(now, ('track' | 'untrack'), jid)
166
+ -- ------------------------------------------
167
+ -- If no arguments are provided, it returns details of all currently-tracked
168
+ -- jobs. If the first argument is 'track', then it will start tracking the job
169
+ -- associated with that id, and 'untrack' stops tracking it. In this context,
170
+ -- tracking is nothing more than saving the job to a list of jobs that are
171
+ -- considered special.
172
+ --
173
+ -- {
174
+ -- 'jobs': [
175
+ -- {
176
+ -- 'jid': ...,
177
+ -- # All the other details you'd get from 'get'
178
+ -- }, {
179
+ -- ...
180
+ -- }
181
+ -- ], 'expired': [
182
+ -- # These are all the jids that are completed and whose data expired
183
+ -- 'deadbeef',
184
+ -- ...,
185
+ -- ...,
186
+ -- ]
187
+ -- }
188
+ --
189
+ function Qless.track(now, command, jid)
190
+ if command ~= nil then
191
+ assert(jid, 'Track(): Arg "jid" missing')
192
+ -- Verify that job exists
193
+ assert(Qless.job(jid):exists(), 'Track(): Job does not exist')
194
+ if string.lower(command) == 'track' then
195
+ Qless.publish('track', jid)
196
+ return redis.call('zadd', 'ql:tracked', now, jid)
197
+ elseif string.lower(command) == 'untrack' then
198
+ Qless.publish('untrack', jid)
199
+ return redis.call('zrem', 'ql:tracked', jid)
200
+ else
201
+ error('Track(): Unknown action "' .. command .. '"')
202
+ end
203
+ else
204
+ local response = {
205
+ jobs = {},
206
+ expired = {}
207
+ }
208
+ local jids = redis.call('zrange', 'ql:tracked', 0, -1)
209
+ for index, jid in ipairs(jids) do
210
+ local data = Qless.job(jid):data()
211
+ if data then
212
+ table.insert(response.jobs, data)
213
+ else
214
+ table.insert(response.expired, jid)
215
+ end
216
+ end
217
+ return response
218
+ end
219
+ end
220
+
221
+ -- tag(now, ('add' | 'remove'), jid, tag, [tag, ...])
222
+ -- tag(now, 'get', tag, [offset, [count]])
223
+ -- tag(now, 'top', [offset, [count]])
224
+ -- -----------------------------------------------------------------------------
225
+ -- Accepts a jid, 'add' or 'remove', and then a list of tags
226
+ -- to either add or remove from the job. Alternatively, 'get',
227
+ -- a tag to get jobs associated with that tag, and offset and
228
+ -- count
229
+ --
230
+ -- If 'add' or 'remove', the response is a list of the jobs
231
+ -- current tags, or False if the job doesn't exist. If 'get',
232
+ -- the response is of the form:
233
+ --
234
+ -- {
235
+ -- total: ...,
236
+ -- jobs: [
237
+ -- jid,
238
+ -- ...
239
+ -- ]
240
+ -- }
241
+ --
242
+ -- If 'top' is supplied, it returns the most commonly-used tags
243
+ -- in a paginated fashion.
244
+ function Qless.tag(now, command, ...)
245
+ assert(command,
246
+ 'Tag(): Arg "command" must be "add", "remove", "get" or "top"')
247
+
248
+ if command == 'add' then
249
+ local jid = assert(arg[1], 'Tag(): Arg "jid" missing')
250
+ local tags = redis.call('hget', QlessJob.ns .. jid, 'tags')
251
+ -- If the job has been canceled / deleted, then return false
252
+ if tags then
253
+ -- Decode the json blob, convert to dictionary
254
+ tags = cjson.decode(tags)
255
+ local _tags = {}
256
+ for i,v in ipairs(tags) do _tags[v] = true end
257
+
258
+ -- Otherwise, add the job to the sorted set with that tags
259
+ for i=2,#arg do
260
+ local tag = arg[i]
261
+ if _tags[tag] == nil then
262
+ _tags[tag] = true
263
+ table.insert(tags, tag)
264
+ end
265
+ redis.call('zadd', 'ql:t:' .. tag, now, jid)
266
+ redis.call('zincrby', 'ql:tags', 1, tag)
267
+ end
268
+
269
+ redis.call('hset', QlessJob.ns .. jid, 'tags', cjson.encode(tags))
270
+ return tags
271
+ else
272
+ error('Tag(): Job ' .. jid .. ' does not exist')
273
+ end
274
+ elseif command == 'remove' then
275
+ local jid = assert(arg[1], 'Tag(): Arg "jid" missing')
276
+ local tags = redis.call('hget', QlessJob.ns .. jid, 'tags')
277
+ -- If the job has been canceled / deleted, then return false
278
+ if tags then
279
+ -- Decode the json blob, convert to dictionary
280
+ tags = cjson.decode(tags)
281
+ local _tags = {}
282
+ for i,v in ipairs(tags) do _tags[v] = true end
283
+
284
+ -- Otherwise, add the job to the sorted set with that tags
285
+ for i=2,#arg do
286
+ local tag = arg[i]
287
+ _tags[tag] = nil
288
+ redis.call('zrem', 'ql:t:' .. tag, jid)
289
+ redis.call('zincrby', 'ql:tags', -1, tag)
290
+ end
291
+
292
+ local results = {}
293
+ for i,tag in ipairs(tags) do if _tags[tag] then table.insert(results, tag) end end
294
+
295
+ redis.call('hset', QlessJob.ns .. jid, 'tags', cjson.encode(results))
296
+ return results
297
+ else
298
+ error('Tag(): Job ' .. jid .. ' does not exist')
299
+ end
300
+ elseif command == 'get' then
301
+ local tag = assert(arg[1], 'Tag(): Arg "tag" missing')
302
+ local offset = assert(tonumber(arg[2] or 0),
303
+ 'Tag(): Arg "offset" not a number: ' .. tostring(arg[2]))
304
+ local count = assert(tonumber(arg[3] or 25),
305
+ 'Tag(): Arg "count" not a number: ' .. tostring(arg[3]))
306
+ return {
307
+ total = redis.call('zcard', 'ql:t:' .. tag),
308
+ jobs = redis.call('zrange', 'ql:t:' .. tag, offset, offset + count - 1)
309
+ }
310
+ elseif command == 'top' then
311
+ local offset = assert(tonumber(arg[1] or 0) , 'Tag(): Arg "offset" not a number: ' .. tostring(arg[1]))
312
+ local count = assert(tonumber(arg[2] or 25), 'Tag(): Arg "count" not a number: ' .. tostring(arg[2]))
313
+ return redis.call('zrevrangebyscore', 'ql:tags', '+inf', 2, 'limit', offset, count)
314
+ else
315
+ error('Tag(): First argument must be "add", "remove" or "get"')
316
+ end
317
+ end
318
+
319
+ -- Cancel(...)
320
+ -- --------------
321
+ -- Cancel a job from taking place. It will be deleted from the system, and any
322
+ -- attempts to renew a heartbeat will fail, and any attempts to complete it
323
+ -- will fail. If you try to get the data on the object, you will get nothing.
324
+ function Qless.cancel(...)
325
+ -- Dependents is a mapping of a job to its dependent jids
326
+ local dependents = {}
327
+ for _, jid in ipairs(arg) do
328
+ dependents[jid] = redis.call(
329
+ 'smembers', QlessJob.ns .. jid .. '-dependents') or {}
330
+ end
331
+
332
+ -- Now, we'll loop through every jid we intend to cancel, and we'll go
333
+ -- make sure that this operation will be ok
334
+ for i, jid in ipairs(arg) do
335
+ for j, dep in ipairs(dependents[jid]) do
336
+ if dependents[dep] == nil then
337
+ error('Cancel(): ' .. jid .. ' is a dependency of ' .. dep ..
338
+ ' but is not mentioned to be canceled')
339
+ end
340
+ end
341
+ end
342
+
343
+ -- If we've made it this far, then we are good to go. We can now just
344
+ -- remove any trace of all these jobs, as they form a dependent clique
345
+ for _, jid in ipairs(arg) do
346
+ -- Find any stage it's associated with and remove its from that stage
347
+ local state, queue, failure, worker = unpack(redis.call(
348
+ 'hmget', QlessJob.ns .. jid, 'state', 'queue', 'failure', 'worker'))
349
+
350
+ if state ~= 'complete' then
351
+ -- Send a message out on the appropriate channels
352
+ local encoded = cjson.encode({
353
+ jid = jid,
354
+ worker = worker,
355
+ event = 'canceled',
356
+ queue = queue
357
+ })
358
+ Qless.publish('log', encoded)
359
+
360
+ -- Remove this job from whatever worker has it, if any
361
+ if worker and (worker ~= '') then
362
+ redis.call('zrem', 'ql:w:' .. worker .. ':jobs', jid)
363
+ -- If necessary, send a message to the appropriate worker, too
364
+ Qless.publish('w:' .. worker, encoded)
365
+ end
366
+
367
+ -- Remove it from that queue
368
+ if queue then
369
+ local queue = Qless.queue(queue)
370
+ queue.work.remove(jid)
371
+ queue.locks.remove(jid)
372
+ queue.scheduled.remove(jid)
373
+ queue.depends.remove(jid)
374
+ end
375
+
376
+ -- We should probably go through all our dependencies and remove
377
+ -- ourselves from the list of dependents
378
+ for i, j in ipairs(redis.call(
379
+ 'smembers', QlessJob.ns .. jid .. '-dependencies')) do
380
+ redis.call('srem', QlessJob.ns .. j .. '-dependents', jid)
381
+ end
382
+
383
+ -- Delete any notion of dependencies it has
384
+ redis.call('del', QlessJob.ns .. jid .. '-dependencies')
385
+
386
+ -- If we're in the failed state, remove all of our data
387
+ if state == 'failed' then
388
+ failure = cjson.decode(failure)
389
+ -- We need to make this remove it from the failed queues
390
+ redis.call('lrem', 'ql:f:' .. failure.group, 0, jid)
391
+ if redis.call('llen', 'ql:f:' .. failure.group) == 0 then
392
+ redis.call('srem', 'ql:failures', failure.group)
393
+ end
394
+ -- Remove one count from the failed count of the particular
395
+ -- queue
396
+ local bin = failure.when - (failure.when % 86400)
397
+ local failed = redis.call(
398
+ 'hget', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed')
399
+ redis.call('hset',
400
+ 'ql:s:stats:' .. bin .. ':' .. queue, 'failed', failed - 1)
401
+ end
402
+
403
+ -- Remove it as a job that's tagged with this particular tag
404
+ local tags = cjson.decode(
405
+ redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
406
+ for i, tag in ipairs(tags) do
407
+ redis.call('zrem', 'ql:t:' .. tag, jid)
408
+ redis.call('zincrby', 'ql:tags', -1, tag)
409
+ end
410
+
411
+ -- If the job was being tracked, we should notify
412
+ if redis.call('zscore', 'ql:tracked', jid) ~= false then
413
+ Qless.publish('canceled', jid)
414
+ end
415
+
416
+ -- Just go ahead and delete our data
417
+ redis.call('del', QlessJob.ns .. jid)
418
+ redis.call('del', QlessJob.ns .. jid .. '-history')
419
+ end
420
+ end
421
+
422
+ return arg
423
+ end
424
+
425
+ -------------------------------------------------------------------------------
426
+ -- Configuration interactions
427
+ -------------------------------------------------------------------------------
428
+
429
+ -- This represents our default configuration settings
430
+ Qless.config.defaults = {
431
+ ['application'] = 'qless',
432
+ ['heartbeat'] = 60,
433
+ ['grace-period'] = 10,
434
+ ['stats-history'] = 30,
435
+ ['histogram-history'] = 7,
436
+ ['jobs-history-count'] = 50000,
437
+ ['jobs-history'] = 604800
438
+ }
439
+
440
+ -- Get one or more of the keys
441
+ Qless.config.get = function(key, default)
442
+ if key then
443
+ return redis.call('hget', 'ql:config', key) or
444
+ Qless.config.defaults[key] or default
445
+ else
446
+ -- Inspired by redis-lua https://github.com/nrk/redis-lua/blob/version-2.0/src/redis.lua
447
+ local reply = redis.call('hgetall', 'ql:config')
448
+ for i = 1, #reply, 2 do
449
+ Qless.config.defaults[reply[i]] = reply[i + 1]
450
+ end
451
+ return Qless.config.defaults
452
+ end
453
+ end
454
+
455
+ -- Set a configuration variable
456
+ Qless.config.set = function(option, value)
457
+ assert(option, 'config.set(): Arg "option" missing')
458
+ assert(value , 'config.set(): Arg "value" missing')
459
+ -- Send out a log message
460
+ Qless.publish('log', cjson.encode({
461
+ event = 'config_set',
462
+ option = option,
463
+ value = value
464
+ }))
465
+
466
+ redis.call('hset', 'ql:config', option, value)
467
+ end
468
+
469
+ -- Unset a configuration option
470
+ Qless.config.unset = function(option)
471
+ assert(option, 'config.unset(): Arg "option" missing')
472
+ -- Send out a log message
473
+ Qless.publish('log', cjson.encode({
474
+ event = 'config_unset',
475
+ option = option
476
+ }))
477
+
478
+ redis.call('hdel', 'ql:config', option)
479
+ end
480
+ -------------------------------------------------------------------------------
481
+ -- Job Class
482
+ --
483
+ -- It returns an object that represents the job with the provided JID
484
+ -------------------------------------------------------------------------------
485
+
486
+ -- This gets all the data associated with the job with the provided id. If the
487
+ -- job is not found, it returns nil. If found, it returns an object with the
488
+ -- appropriate properties
489
+ function QlessJob:data(...)
490
+ local job = redis.call(
491
+ 'hmget', QlessJob.ns .. self.jid, 'jid', 'klass', 'state', 'queue',
492
+ 'worker', 'priority', 'expires', 'retries', 'remaining', 'data',
493
+ 'tags', 'failure', 'spawned_from_jid')
494
+
495
+ -- Return nil if we haven't found it
496
+ if not job[1] then
497
+ return nil
498
+ end
499
+
500
+ local data = {
501
+ jid = job[1],
502
+ klass = job[2],
503
+ state = job[3],
504
+ queue = job[4],
505
+ worker = job[5] or '',
506
+ tracked = redis.call(
507
+ 'zscore', 'ql:tracked', self.jid) ~= false,
508
+ priority = tonumber(job[6]),
509
+ expires = tonumber(job[7]) or 0,
510
+ retries = tonumber(job[8]),
511
+ remaining = math.floor(tonumber(job[9])),
512
+ data = job[10],
513
+ tags = cjson.decode(job[11]),
514
+ history = self:history(),
515
+ failure = cjson.decode(job[12] or '{}'),
516
+ spawned_from_jid = job[13],
517
+ dependents = redis.call(
518
+ 'smembers', QlessJob.ns .. self.jid .. '-dependents'),
519
+ dependencies = redis.call(
520
+ 'smembers', QlessJob.ns .. self.jid .. '-dependencies')
521
+ }
522
+
523
+ if #arg > 0 then
524
+ -- This section could probably be optimized, but I wanted the interface
525
+ -- in place first
526
+ local response = {}
527
+ for index, key in ipairs(arg) do
528
+ table.insert(response, data[key])
529
+ end
530
+ return response
531
+ else
532
+ return data
533
+ end
534
+ end
535
+
536
+ -- Complete a job and optionally put it in another queue, either scheduled or
537
+ -- to be considered waiting immediately. It can also optionally accept other
538
+ -- jids on which this job will be considered dependent before it's considered
539
+ -- valid.
540
+ --
541
+ -- The variable-length arguments may be pairs of the form:
542
+ --
543
+ -- ('next' , queue) : The queue to advance it to next
544
+ -- ('delay' , delay) : The delay for the next queue
545
+ -- ('depends', : Json of jobs it depends on in the new queue
546
+ -- '["jid1", "jid2", ...]')
547
+ ---
548
+ function QlessJob:complete(now, worker, queue, data, ...)
549
+ assert(worker, 'Complete(): Arg "worker" missing')
550
+ assert(queue , 'Complete(): Arg "queue" missing')
551
+ data = assert(cjson.decode(data),
552
+ 'Complete(): Arg "data" missing or not JSON: ' .. tostring(data))
553
+
554
+ -- Read in all the optional parameters
555
+ local options = {}
556
+ for i = 1, #arg, 2 do options[arg[i]] = arg[i + 1] end
557
+
558
+ -- Sanity check on optional args
559
+ local nextq = options['next']
560
+ local delay = assert(tonumber(options['delay'] or 0))
561
+ local depends = assert(cjson.decode(options['depends'] or '[]'),
562
+ 'Complete(): Arg "depends" not JSON: ' .. tostring(options['depends']))
563
+
564
+ -- Depends doesn't make sense without nextq
565
+ if options['delay'] and nextq == nil then
566
+ error('Complete(): "delay" cannot be used without a "next".')
567
+ end
568
+
569
+ -- Depends doesn't make sense without nextq
570
+ if options['depends'] and nextq == nil then
571
+ error('Complete(): "depends" cannot be used without a "next".')
572
+ end
573
+
574
+ -- The bin is midnight of the provided day
575
+ -- 24 * 60 * 60 = 86400
576
+ local bin = now - (now % 86400)
577
+
578
+ -- First things first, we should see if the worker still owns this job
579
+ local lastworker, state, priority, retries, current_queue = unpack(
580
+ redis.call('hmget', QlessJob.ns .. self.jid, 'worker', 'state',
581
+ 'priority', 'retries', 'queue'))
582
+
583
+ if lastworker == false then
584
+ error('Complete(): Job does not exist')
585
+ elseif (state ~= 'running') then
586
+ error('Complete(): Job is not currently running: ' .. state)
587
+ elseif lastworker ~= worker then
588
+ error('Complete(): Job has been handed out to another worker: ' ..
589
+ tostring(lastworker))
590
+ elseif queue ~= current_queue then
591
+ error('Complete(): Job running in another queue: ' ..
592
+ tostring(current_queue))
593
+ end
594
+
595
+ -- Now we can assume that the worker does own the job. We need to
596
+ -- 1) Remove the job from the 'locks' from the old queue
597
+ -- 2) Enqueue it in the next stage if necessary
598
+ -- 3) Update the data
599
+ -- 4) Mark the job as completed, remove the worker, remove expires, and
600
+ -- update history
601
+ self:history(now, 'done')
602
+
603
+ if data then
604
+ redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data))
605
+ end
606
+
607
+ -- Remove the job from the previous queue
608
+ local queue_obj = Qless.queue(queue)
609
+ queue_obj.work.remove(self.jid)
610
+ queue_obj.locks.remove(self.jid)
611
+ queue_obj.scheduled.remove(self.jid)
612
+
613
+ ----------------------------------------------------------
614
+ -- This is the massive stats update that we have to do
615
+ ----------------------------------------------------------
616
+ -- This is how long we've been waiting to get popped
617
+ -- local waiting = math.floor(now) - history[#history]['popped']
618
+ local time = tonumber(
619
+ redis.call('hget', QlessJob.ns .. self.jid, 'time') or now)
620
+ local waiting = now - time
621
+ Qless.queue(queue):stat(now, 'run', waiting)
622
+ redis.call('hset', QlessJob.ns .. self.jid,
623
+ 'time', string.format("%.20f", now))
624
+
625
+ -- Remove this job from the jobs that the worker that was running it has
626
+ redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
627
+
628
+ if redis.call('zscore', 'ql:tracked', self.jid) ~= false then
629
+ Qless.publish('completed', self.jid)
630
+ end
631
+
632
+ if nextq then
633
+ queue_obj = Qless.queue(nextq)
634
+ -- Send a message out to log
635
+ Qless.publish('log', cjson.encode({
636
+ jid = self.jid,
637
+ event = 'advanced',
638
+ queue = queue,
639
+ to = nextq
640
+ }))
641
+
642
+ -- Enqueue the job
643
+ self:history(now, 'put', {q = nextq})
644
+
645
+ -- We're going to make sure that this queue is in the
646
+ -- set of known queues
647
+ if redis.call('zscore', 'ql:queues', nextq) == false then
648
+ redis.call('zadd', 'ql:queues', now, nextq)
649
+ end
650
+
651
+ redis.call('hmset', QlessJob.ns .. self.jid,
652
+ 'state', 'waiting',
653
+ 'worker', '',
654
+ 'failure', '{}',
655
+ 'queue', nextq,
656
+ 'expires', 0,
657
+ 'remaining', tonumber(retries))
658
+
659
+ if (delay > 0) and (#depends == 0) then
660
+ queue_obj.scheduled.add(now + delay, self.jid)
661
+ return 'scheduled'
662
+ else
663
+ -- These are the jids we legitimately have to wait on
664
+ local count = 0
665
+ for i, j in ipairs(depends) do
666
+ -- Make sure it's something other than 'nil' or complete.
667
+ local state = redis.call('hget', QlessJob.ns .. j, 'state')
668
+ if (state and state ~= 'complete') then
669
+ count = count + 1
670
+ redis.call(
671
+ 'sadd', QlessJob.ns .. j .. '-dependents',self.jid)
672
+ redis.call(
673
+ 'sadd', QlessJob.ns .. self.jid .. '-dependencies', j)
674
+ end
675
+ end
676
+ if count > 0 then
677
+ queue_obj.depends.add(now, self.jid)
678
+ redis.call('hset', QlessJob.ns .. self.jid, 'state', 'depends')
679
+ if delay > 0 then
680
+ -- We've already put it in 'depends'. Now, we must just save the data
681
+ -- for when it's scheduled
682
+ queue_obj.depends.add(now, self.jid)
683
+ redis.call('hset', QlessJob.ns .. self.jid, 'scheduled', now + delay)
684
+ end
685
+ return 'depends'
686
+ else
687
+ queue_obj.work.add(now, priority, self.jid)
688
+ return 'waiting'
689
+ end
690
+ end
691
+ else
692
+ -- Send a message out to log
693
+ Qless.publish('log', cjson.encode({
694
+ jid = self.jid,
695
+ event = 'completed',
696
+ queue = queue
697
+ }))
698
+
699
+ redis.call('hmset', QlessJob.ns .. self.jid,
700
+ 'state', 'complete',
701
+ 'worker', '',
702
+ 'failure', '{}',
703
+ 'queue', '',
704
+ 'expires', 0,
705
+ 'remaining', tonumber(retries))
706
+
707
+ -- Do the completion dance
708
+ local count = Qless.config.get('jobs-history-count')
709
+ local time = Qless.config.get('jobs-history')
710
+
711
+ -- These are the default values
712
+ count = tonumber(count or 50000)
713
+ time = tonumber(time or 7 * 24 * 60 * 60)
714
+
715
+ -- Schedule this job for destructination eventually
716
+ redis.call('zadd', 'ql:completed', now, self.jid)
717
+
718
+ -- Now look at the expired job data. First, based on the current time
719
+ local jids = redis.call('zrangebyscore', 'ql:completed', 0, now - time)
720
+ -- Any jobs that need to be expired... delete
721
+ for index, jid in ipairs(jids) do
722
+ local tags = cjson.decode(
723
+ redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
724
+ for i, tag in ipairs(tags) do
725
+ redis.call('zrem', 'ql:t:' .. tag, jid)
726
+ redis.call('zincrby', 'ql:tags', -1, tag)
727
+ end
728
+ redis.call('del', QlessJob.ns .. jid)
729
+ redis.call('del', QlessJob.ns .. jid .. '-history')
730
+ end
731
+ -- And now remove those from the queued-for-cleanup queue
732
+ redis.call('zremrangebyscore', 'ql:completed', 0, now - time)
733
+
734
+ -- Now take the all by the most recent 'count' ids
735
+ jids = redis.call('zrange', 'ql:completed', 0, (-1-count))
736
+ for index, jid in ipairs(jids) do
737
+ local tags = cjson.decode(
738
+ redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
739
+ for i, tag in ipairs(tags) do
740
+ redis.call('zrem', 'ql:t:' .. tag, jid)
741
+ redis.call('zincrby', 'ql:tags', -1, tag)
742
+ end
743
+ redis.call('del', QlessJob.ns .. jid)
744
+ redis.call('del', QlessJob.ns .. jid .. '-history')
745
+ end
746
+ redis.call('zremrangebyrank', 'ql:completed', 0, (-1-count))
747
+
748
+ -- Alright, if this has any dependents, then we should go ahead
749
+ -- and unstick those guys.
750
+ for i, j in ipairs(redis.call(
751
+ 'smembers', QlessJob.ns .. self.jid .. '-dependents')) do
752
+ redis.call('srem', QlessJob.ns .. j .. '-dependencies', self.jid)
753
+ if redis.call(
754
+ 'scard', QlessJob.ns .. j .. '-dependencies') == 0 then
755
+ local q, p, scheduled = unpack(
756
+ redis.call('hmget', QlessJob.ns .. j, 'queue', 'priority', 'scheduled'))
757
+ if q then
758
+ local queue = Qless.queue(q)
759
+ queue.depends.remove(j)
760
+ if scheduled then
761
+ queue.scheduled.add(scheduled, j)
762
+ redis.call('hset', QlessJob.ns .. j, 'state', 'scheduled')
763
+ redis.call('hdel', QlessJob.ns .. j, 'scheduled')
764
+ else
765
+ queue.work.add(now, p, j)
766
+ redis.call('hset', QlessJob.ns .. j, 'state', 'waiting')
767
+ end
768
+ end
769
+ end
770
+ end
771
+
772
+ -- Delete our dependents key
773
+ redis.call('del', QlessJob.ns .. self.jid .. '-dependents')
774
+
775
+ return 'complete'
776
+ end
777
+ end
778
+
779
+ -- Fail(now, worker, group, message, [data])
780
+ -- -------------------------------------------------
781
+ -- Mark the particular job as failed, with the provided group, and a more
782
+ -- specific message. By `group`, we mean some phrase that might be one of
783
+ -- several categorical modes of failure. The `message` is something more
784
+ -- job-specific, like perhaps a traceback.
785
+ --
786
+ -- This method should __not__ be used to note that a job has been dropped or
787
+ -- has failed in a transient way. This method __should__ be used to note that
788
+ -- a job has something really wrong with it that must be remedied.
789
+ --
790
+ -- The motivation behind the `group` is so that similar errors can be grouped
791
+ -- together. Optionally, updated data can be provided for the job. A job in
792
+ -- any state can be marked as failed. If it has been given to a worker as a
793
+ -- job, then its subsequent requests to heartbeat or complete that job will
794
+ -- fail. Failed jobs are kept until they are canceled or completed.
795
+ --
796
+ -- __Returns__ the id of the failed job if successful, or `False` on failure.
797
+ --
798
+ -- Args:
799
+ -- 1) jid
800
+ -- 2) worker
801
+ -- 3) group
802
+ -- 4) message
803
+ -- 5) the current time
804
+ -- 6) [data]
805
+ function QlessJob:fail(now, worker, group, message, data)
806
+ local worker = assert(worker , 'Fail(): Arg "worker" missing')
807
+ local group = assert(group , 'Fail(): Arg "group" missing')
808
+ local message = assert(message , 'Fail(): Arg "message" missing')
809
+
810
+ -- The bin is midnight of the provided day
811
+ -- 24 * 60 * 60 = 86400
812
+ local bin = now - (now % 86400)
813
+
814
+ if data then
815
+ data = cjson.decode(data)
816
+ end
817
+
818
+ -- First things first, we should get the history
819
+ local queue, state, oldworker = unpack(redis.call(
820
+ 'hmget', QlessJob.ns .. self.jid, 'queue', 'state', 'worker'))
821
+
822
+ -- If the job has been completed, we cannot fail it
823
+ if not state then
824
+ error('Fail(): Job does not exist')
825
+ elseif state ~= 'running' then
826
+ error('Fail(): Job not currently running: ' .. state)
827
+ elseif worker ~= oldworker then
828
+ error('Fail(): Job running with another worker: ' .. oldworker)
829
+ end
830
+
831
+ -- Send out a log message
832
+ Qless.publish('log', cjson.encode({
833
+ jid = self.jid,
834
+ event = 'failed',
835
+ worker = worker,
836
+ group = group,
837
+ message = message
838
+ }))
839
+
840
+ if redis.call('zscore', 'ql:tracked', self.jid) ~= false then
841
+ Qless.publish('failed', self.jid)
842
+ end
843
+
844
+ -- Remove this job from the jobs that the worker that was running it has
845
+ redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
846
+
847
+ -- Now, take the element of the history for which our provided worker is
848
+ -- the worker, and update 'failed'
849
+ self:history(now, 'failed', {worker = worker, group = group})
850
+
851
+ -- Increment the number of failures for that queue for the
852
+ -- given day.
853
+ redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failures', 1)
854
+ redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed' , 1)
855
+
856
+ -- Now remove the instance from the schedule, and work queues for the
857
+ -- queue it's in
858
+ local queue_obj = Qless.queue(queue)
859
+ queue_obj.work.remove(self.jid)
860
+ queue_obj.locks.remove(self.jid)
861
+ queue_obj.scheduled.remove(self.jid)
862
+
863
+ -- The reason that this appears here is that the above will fail if the
864
+ -- job doesn't exist
865
+ if data then
866
+ redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data))
867
+ end
868
+
869
+ redis.call('hmset', QlessJob.ns .. self.jid,
870
+ 'state', 'failed',
871
+ 'worker', '',
872
+ 'expires', '',
873
+ 'failure', cjson.encode({
874
+ ['group'] = group,
875
+ ['message'] = message,
876
+ ['when'] = math.floor(now),
877
+ ['worker'] = worker
878
+ }))
879
+
880
+ -- Add this group of failure to the list of failures
881
+ redis.call('sadd', 'ql:failures', group)
882
+ -- And add this particular instance to the failed groups
883
+ redis.call('lpush', 'ql:f:' .. group, self.jid)
884
+
885
+ -- Here is where we'd intcrement stats about the particular stage
886
+ -- and possibly the workers
887
+
888
+ return self.jid
889
+ end
890
+
891
+ -- retry(now, queue, worker, [delay, [group, [message]]])
892
+ -- ------------------------------------------
893
+ -- This script accepts jid, queue, worker and delay for retrying a job. This
894
+ -- is similar in functionality to `put`, except that this counts against the
895
+ -- retries a job has for a stage.
896
+ --
897
+ -- Throws an exception if:
898
+ -- - the worker is not the worker with a lock on the job
899
+ -- - the job is not actually running
900
+ --
901
+ -- Otherwise, it returns the number of retries remaining. If the allowed
902
+ -- retries have been exhausted, then it is automatically failed, and a negative
903
+ -- number is returned.
904
+ --
905
+ -- If a group and message is provided, then if the retries are exhausted, then
906
+ -- the provided group and message will be used in place of the default
907
+ -- messaging about retries in the particular queue being exhausted
908
+ function QlessJob:retry(now, queue, worker, delay, group, message)
909
+ assert(queue , 'Retry(): Arg "queue" missing')
910
+ assert(worker, 'Retry(): Arg "worker" missing')
911
+ delay = assert(tonumber(delay or 0),
912
+ 'Retry(): Arg "delay" not a number: ' .. tostring(delay))
913
+
914
+ -- Let's see what the old priority, and tags were
915
+ local oldqueue, state, retries, oldworker, priority, failure = unpack(
916
+ redis.call('hmget', QlessJob.ns .. self.jid, 'queue', 'state',
917
+ 'retries', 'worker', 'priority', 'failure'))
918
+
919
+ -- If this isn't the worker that owns
920
+ if oldworker == false then
921
+ error('Retry(): Job does not exist')
922
+ elseif state ~= 'running' then
923
+ error('Retry(): Job is not currently running: ' .. state)
924
+ elseif oldworker ~= worker then
925
+ error('Retry(): Job has been given to another worker: ' .. oldworker)
926
+ end
927
+
928
+ -- For each of these, decrement their retries. If any of them
929
+ -- have exhausted their retries, then we should mark them as
930
+ -- failed.
931
+ local remaining = tonumber(redis.call(
932
+ 'hincrby', QlessJob.ns .. self.jid, 'remaining', -1))
933
+ redis.call('hdel', QlessJob.ns .. self.jid, 'grace')
934
+
935
+ -- Remove it from the locks key of the old queue
936
+ Qless.queue(oldqueue).locks.remove(self.jid)
937
+
938
+ -- Remove this job from the worker that was previously working it
939
+ redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
940
+
941
+ if remaining < 0 then
942
+ -- Now remove the instance from the schedule, and work queues for the
943
+ -- queue it's in
944
+ local group = group or 'failed-retries-' .. queue
945
+ self:history(now, 'failed', {['group'] = group})
946
+
947
+ redis.call('hmset', QlessJob.ns .. self.jid, 'state', 'failed',
948
+ 'worker', '',
949
+ 'expires', '')
950
+ -- If the failure has not already been set, then set it
951
+ if group ~= nil and message ~= nil then
952
+ redis.call('hset', QlessJob.ns .. self.jid,
953
+ 'failure', cjson.encode({
954
+ ['group'] = group,
955
+ ['message'] = message,
956
+ ['when'] = math.floor(now),
957
+ ['worker'] = worker
958
+ })
959
+ )
960
+ else
961
+ redis.call('hset', QlessJob.ns .. self.jid,
962
+ 'failure', cjson.encode({
963
+ ['group'] = group,
964
+ ['message'] =
965
+ 'Job exhausted retries in queue "' .. oldqueue .. '"',
966
+ ['when'] = now,
967
+ ['worker'] = unpack(self:data('worker'))
968
+ }))
969
+ end
970
+
971
+ -- Add this type of failure to the list of failures
972
+ redis.call('sadd', 'ql:failures', group)
973
+ -- And add this particular instance to the failed types
974
+ redis.call('lpush', 'ql:f:' .. group, self.jid)
975
+ -- Increment the count of the failed jobs
976
+ local bin = now - (now % 86400)
977
+ redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failures', 1)
978
+ redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed' , 1)
979
+ else
980
+ -- Put it in the queue again with a delay. Like put()
981
+ local queue_obj = Qless.queue(queue)
982
+ if delay > 0 then
983
+ queue_obj.scheduled.add(now + delay, self.jid)
984
+ redis.call('hset', QlessJob.ns .. self.jid, 'state', 'scheduled')
985
+ else
986
+ queue_obj.work.add(now, priority, self.jid)
987
+ redis.call('hset', QlessJob.ns .. self.jid, 'state', 'waiting')
988
+ end
989
+
990
+ -- If a group and a message was provided, then we should save it
991
+ if group ~= nil and message ~= nil then
992
+ redis.call('hset', QlessJob.ns .. self.jid,
993
+ 'failure', cjson.encode({
994
+ ['group'] = group,
995
+ ['message'] = message,
996
+ ['when'] = math.floor(now),
997
+ ['worker'] = worker
998
+ })
999
+ )
1000
+ end
1001
+ end
1002
+
1003
+ return math.floor(remaining)
1004
+ end
1005
+
1006
+ -- Depends(jid, 'on', [jid, [jid, [...]]]
1007
+ -- Depends(jid, 'off', [jid, [jid, [...]]])
1008
+ -- Depends(jid, 'off', 'all')
1009
+ -------------------------------------------------------------------------------
1010
+ -- Add or remove dependencies a job has. If 'on' is provided, the provided
1011
+ -- jids are added as dependencies. If 'off' and 'all' are provided, then all
1012
+ -- the current dependencies are removed. If 'off' is provided and the next
1013
+ -- argument is not 'all', then those jids are removed as dependencies.
1014
+ --
1015
+ -- If a job is not already in the 'depends' state, then this call will return
1016
+ -- false. Otherwise, it will return true
1017
+ function QlessJob:depends(now, command, ...)
1018
+ assert(command, 'Depends(): Arg "command" missing')
1019
+ local state = redis.call('hget', QlessJob.ns .. self.jid, 'state')
1020
+ if state ~= 'depends' then
1021
+ error('Depends(): Job ' .. self.jid ..
1022
+ ' not in the depends state: ' .. tostring(state))
1023
+ end
1024
+
1025
+ if command == 'on' then
1026
+ -- These are the jids we legitimately have to wait on
1027
+ for i, j in ipairs(arg) do
1028
+ -- Make sure it's something other than 'nil' or complete.
1029
+ local state = redis.call('hget', QlessJob.ns .. j, 'state')
1030
+ if (state and state ~= 'complete') then
1031
+ redis.call(
1032
+ 'sadd', QlessJob.ns .. j .. '-dependents' , self.jid)
1033
+ redis.call(
1034
+ 'sadd', QlessJob.ns .. self.jid .. '-dependencies', j)
1035
+ end
1036
+ end
1037
+ return true
1038
+ elseif command == 'off' then
1039
+ if arg[1] == 'all' then
1040
+ for i, j in ipairs(redis.call(
1041
+ 'smembers', QlessJob.ns .. self.jid .. '-dependencies')) do
1042
+ redis.call('srem', QlessJob.ns .. j .. '-dependents', self.jid)
1043
+ end
1044
+ redis.call('del', QlessJob.ns .. self.jid .. '-dependencies')
1045
+ local q, p = unpack(redis.call(
1046
+ 'hmget', QlessJob.ns .. self.jid, 'queue', 'priority'))
1047
+ if q then
1048
+ local queue_obj = Qless.queue(q)
1049
+ queue_obj.depends.remove(self.jid)
1050
+ queue_obj.work.add(now, p, self.jid)
1051
+ redis.call('hset', QlessJob.ns .. self.jid, 'state', 'waiting')
1052
+ end
1053
+ else
1054
+ for i, j in ipairs(arg) do
1055
+ redis.call('srem', QlessJob.ns .. j .. '-dependents', self.jid)
1056
+ redis.call(
1057
+ 'srem', QlessJob.ns .. self.jid .. '-dependencies', j)
1058
+ if redis.call('scard',
1059
+ QlessJob.ns .. self.jid .. '-dependencies') == 0 then
1060
+ local q, p = unpack(redis.call(
1061
+ 'hmget', QlessJob.ns .. self.jid, 'queue', 'priority'))
1062
+ if q then
1063
+ local queue_obj = Qless.queue(q)
1064
+ queue_obj.depends.remove(self.jid)
1065
+ queue_obj.work.add(now, p, self.jid)
1066
+ redis.call('hset',
1067
+ QlessJob.ns .. self.jid, 'state', 'waiting')
1068
+ end
1069
+ end
1070
+ end
1071
+ end
1072
+ return true
1073
+ else
1074
+ error('Depends(): Argument "command" must be "on" or "off"')
1075
+ end
1076
+ end
1077
+
1078
+ -- Heartbeat
1079
+ ------------
1080
+ -- Renew this worker's lock on this job. Throws an exception if:
1081
+ -- - the job's been given to another worker
1082
+ -- - the job's been completed
1083
+ -- - the job's been canceled
1084
+ -- - the job's not running
1085
+ function QlessJob:heartbeat(now, worker, data)
1086
+ assert(worker, 'Heatbeat(): Arg "worker" missing')
1087
+
1088
+ -- We should find the heartbeat interval for this queue
1089
+ -- heartbeat. First, though, we need to find the queue
1090
+ -- this particular job is in
1091
+ local queue = redis.call('hget', QlessJob.ns .. self.jid, 'queue') or ''
1092
+ local expires = now + tonumber(
1093
+ Qless.config.get(queue .. '-heartbeat') or
1094
+ Qless.config.get('heartbeat', 60))
1095
+
1096
+ if data then
1097
+ data = cjson.decode(data)
1098
+ end
1099
+
1100
+ -- First, let's see if the worker still owns this job, and there is a
1101
+ -- worker
1102
+ local job_worker, state = unpack(
1103
+ redis.call('hmget', QlessJob.ns .. self.jid, 'worker', 'state'))
1104
+ if job_worker == false then
1105
+ -- This means the job doesn't exist
1106
+ error('Heartbeat(): Job does not exist')
1107
+ elseif state ~= 'running' then
1108
+ error('Heartbeat(): Job not currently running: ' .. state)
1109
+ elseif job_worker ~= worker or #job_worker == 0 then
1110
+ error('Heartbeat(): Job given out to another worker: ' .. job_worker)
1111
+ else
1112
+ -- Otherwise, optionally update the user data, and the heartbeat
1113
+ if data then
1114
+ -- I don't know if this is wise, but I'm decoding and encoding
1115
+ -- the user data to hopefully ensure its sanity
1116
+ redis.call('hmset', QlessJob.ns .. self.jid, 'expires',
1117
+ expires, 'worker', worker, 'data', cjson.encode(data))
1118
+ else
1119
+ redis.call('hmset', QlessJob.ns .. self.jid,
1120
+ 'expires', expires, 'worker', worker)
1121
+ end
1122
+
1123
+ -- Update hwen this job was last updated on that worker
1124
+ -- Add this job to the list of jobs handled by this worker
1125
+ redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, self.jid)
1126
+
1127
+ -- And now we should just update the locks
1128
+ local queue = Qless.queue(
1129
+ redis.call('hget', QlessJob.ns .. self.jid, 'queue'))
1130
+ queue.locks.add(expires, self.jid)
1131
+ return expires
1132
+ end
1133
+ end
1134
+
1135
+ -- Priority
1136
+ -- --------
1137
+ -- Update the priority of this job. If the job doesn't exist, throws an
1138
+ -- exception
1139
+ function QlessJob:priority(priority)
1140
+ priority = assert(tonumber(priority),
1141
+ 'Priority(): Arg "priority" missing or not a number: ' ..
1142
+ tostring(priority))
1143
+
1144
+ -- Get the queue the job is currently in, if any
1145
+ local queue = redis.call('hget', QlessJob.ns .. self.jid, 'queue')
1146
+
1147
+ if queue == nil then
1148
+ -- If the job doesn't exist, throw an error
1149
+ error('Priority(): Job ' .. self.jid .. ' does not exist')
1150
+ elseif queue == '' then
1151
+ -- Just adjust the priority
1152
+ redis.call('hset', QlessJob.ns .. self.jid, 'priority', priority)
1153
+ return priority
1154
+ else
1155
+ -- Adjust the priority and see if it's a candidate for updating
1156
+ -- its priority in the queue it's currently in
1157
+ local queue_obj = Qless.queue(queue)
1158
+ if queue_obj.work.score(self.jid) then
1159
+ queue_obj.work.add(0, priority, self.jid)
1160
+ end
1161
+ redis.call('hset', QlessJob.ns .. self.jid, 'priority', priority)
1162
+ return priority
1163
+ end
1164
+ end
1165
+
1166
+ -- Update the jobs' attributes with the provided dictionary
1167
+ function QlessJob:update(data)
1168
+ local tmp = {}
1169
+ for k, v in pairs(data) do
1170
+ table.insert(tmp, k)
1171
+ table.insert(tmp, v)
1172
+ end
1173
+ redis.call('hmset', QlessJob.ns .. self.jid, unpack(tmp))
1174
+ end
1175
+
1176
+ -- Times out the job now rather than when its lock is normally set to expire
1177
+ function QlessJob:timeout(now)
1178
+ local queue_name, state, worker = unpack(redis.call('hmget',
1179
+ QlessJob.ns .. self.jid, 'queue', 'state', 'worker'))
1180
+ if queue_name == nil then
1181
+ error('Timeout(): Job does not exist')
1182
+ elseif state ~= 'running' then
1183
+ error('Timeout(): Job ' .. self.jid .. ' not running')
1184
+ else
1185
+ -- Time out the job
1186
+ self:history(now, 'timed-out')
1187
+ local queue = Qless.queue(queue_name)
1188
+ queue.locks.remove(self.jid)
1189
+ queue.work.add(now, math.huge, self.jid)
1190
+ redis.call('hmset', QlessJob.ns .. self.jid,
1191
+ 'state', 'stalled', 'expires', 0)
1192
+ local encoded = cjson.encode({
1193
+ jid = self.jid,
1194
+ event = 'lock_lost',
1195
+ worker = worker
1196
+ })
1197
+ Qless.publish('w:' .. worker, encoded)
1198
+ Qless.publish('log', encoded)
1199
+ return queue_name
1200
+ end
1201
+ end
1202
+
1203
+ -- Return whether or not this job exists
1204
+ function QlessJob:exists()
1205
+ return redis.call('exists', QlessJob.ns .. self.jid) == 1
1206
+ end
1207
+
1208
+ -- Get or append to history
1209
+ function QlessJob:history(now, what, item)
1210
+ -- First, check if there's an old-style history, and update it if there is
1211
+ local history = redis.call('hget', QlessJob.ns .. self.jid, 'history')
1212
+ if history then
1213
+ history = cjson.decode(history)
1214
+ for i, value in ipairs(history) do
1215
+ redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
1216
+ cjson.encode({math.floor(value.put), 'put', {q = value.q}}))
1217
+
1218
+ -- If there's any popped time
1219
+ if value.popped then
1220
+ redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
1221
+ cjson.encode({math.floor(value.popped), 'popped',
1222
+ {worker = value.worker}}))
1223
+ end
1224
+
1225
+ -- If there's any failure
1226
+ if value.failed then
1227
+ redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
1228
+ cjson.encode(
1229
+ {math.floor(value.failed), 'failed', nil}))
1230
+ end
1231
+
1232
+ -- If it was completed
1233
+ if value.done then
1234
+ redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
1235
+ cjson.encode(
1236
+ {math.floor(value.done), 'done', nil}))
1237
+ end
1238
+ end
1239
+ -- With all this ported forward, delete the old-style history
1240
+ redis.call('hdel', QlessJob.ns .. self.jid, 'history')
1241
+ end
1242
+
1243
+ -- Now to the meat of the function
1244
+ if what == nil then
1245
+ -- Get the history
1246
+ local response = {}
1247
+ for i, value in ipairs(redis.call('lrange',
1248
+ QlessJob.ns .. self.jid .. '-history', 0, -1)) do
1249
+ value = cjson.decode(value)
1250
+ local dict = value[3] or {}
1251
+ dict['when'] = value[1]
1252
+ dict['what'] = value[2]
1253
+ table.insert(response, dict)
1254
+ end
1255
+ return response
1256
+ else
1257
+ -- Append to the history. If the length of the history should be limited,
1258
+ -- then we'll truncate it.
1259
+ local count = tonumber(Qless.config.get('max-job-history', 100))
1260
+ if count > 0 then
1261
+ -- We'll always keep the first item around
1262
+ local obj = redis.call('lpop', QlessJob.ns .. self.jid .. '-history')
1263
+ redis.call('ltrim', QlessJob.ns .. self.jid .. '-history', -count + 2, -1)
1264
+ if obj ~= nil then
1265
+ redis.call('lpush', QlessJob.ns .. self.jid .. '-history', obj)
1266
+ end
1267
+ end
1268
+ return redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
1269
+ cjson.encode({math.floor(now), what, item}))
1270
+ end
1271
+ end
1272
+ -------------------------------------------------------------------------------
1273
+ -- Queue class
1274
+ -------------------------------------------------------------------------------
1275
+ -- Return a queue object
1276
+ function Qless.queue(name)
1277
+ assert(name, 'Queue(): no queue name provided')
1278
+ local queue = {}
1279
+ setmetatable(queue, QlessQueue)
1280
+ queue.name = name
1281
+
1282
+ -- Access to our work
1283
+ queue.work = {
1284
+ peek = function(count)
1285
+ if count == 0 then
1286
+ return {}
1287
+ end
1288
+ local jids = {}
1289
+ for index, jid in ipairs(redis.call(
1290
+ 'zrevrange', queue:prefix('work'), 0, count - 1)) do
1291
+ table.insert(jids, jid)
1292
+ end
1293
+ return jids
1294
+ end, remove = function(...)
1295
+ if #arg > 0 then
1296
+ return redis.call('zrem', queue:prefix('work'), unpack(arg))
1297
+ end
1298
+ end, add = function(now, priority, jid)
1299
+ return redis.call('zadd',
1300
+ queue:prefix('work'), priority - (now / 10000000000), jid)
1301
+ end, score = function(jid)
1302
+ return redis.call('zscore', queue:prefix('work'), jid)
1303
+ end, length = function()
1304
+ return redis.call('zcard', queue:prefix('work'))
1305
+ end
1306
+ }
1307
+
1308
+ -- Access to our locks
1309
+ queue.locks = {
1310
+ expired = function(now, offset, count)
1311
+ return redis.call('zrangebyscore',
1312
+ queue:prefix('locks'), -math.huge, now, 'LIMIT', offset, count)
1313
+ end, peek = function(now, offset, count)
1314
+ return redis.call('zrangebyscore', queue:prefix('locks'),
1315
+ now, math.huge, 'LIMIT', offset, count)
1316
+ end, add = function(expires, jid)
1317
+ redis.call('zadd', queue:prefix('locks'), expires, jid)
1318
+ end, remove = function(...)
1319
+ if #arg > 0 then
1320
+ return redis.call('zrem', queue:prefix('locks'), unpack(arg))
1321
+ end
1322
+ end, running = function(now)
1323
+ return redis.call('zcount', queue:prefix('locks'), now, math.huge)
1324
+ end, length = function(now)
1325
+ -- If a 'now' is provided, we're interested in how many are before
1326
+ -- that time
1327
+ if now then
1328
+ return redis.call('zcount', queue:prefix('locks'), 0, now)
1329
+ else
1330
+ return redis.call('zcard', queue:prefix('locks'))
1331
+ end
1332
+ end
1333
+ }
1334
+
1335
+ -- Access to our dependent jobs
1336
+ queue.depends = {
1337
+ peek = function(now, offset, count)
1338
+ return redis.call('zrange',
1339
+ queue:prefix('depends'), offset, offset + count - 1)
1340
+ end, add = function(now, jid)
1341
+ redis.call('zadd', queue:prefix('depends'), now, jid)
1342
+ end, remove = function(...)
1343
+ if #arg > 0 then
1344
+ return redis.call('zrem', queue:prefix('depends'), unpack(arg))
1345
+ end
1346
+ end, length = function()
1347
+ return redis.call('zcard', queue:prefix('depends'))
1348
+ end
1349
+ }
1350
+
1351
+ -- Access to our scheduled jobs
1352
+ queue.scheduled = {
1353
+ peek = function(now, offset, count)
1354
+ return redis.call('zrange',
1355
+ queue:prefix('scheduled'), offset, offset + count - 1)
1356
+ end, ready = function(now, offset, count)
1357
+ return redis.call('zrangebyscore',
1358
+ queue:prefix('scheduled'), 0, now, 'LIMIT', offset, count)
1359
+ end, add = function(when, jid)
1360
+ redis.call('zadd', queue:prefix('scheduled'), when, jid)
1361
+ end, remove = function(...)
1362
+ if #arg > 0 then
1363
+ return redis.call('zrem', queue:prefix('scheduled'), unpack(arg))
1364
+ end
1365
+ end, length = function()
1366
+ return redis.call('zcard', queue:prefix('scheduled'))
1367
+ end
1368
+ }
1369
+
1370
+ -- Access to our recurring jobs
1371
+ queue.recurring = {
1372
+ peek = function(now, offset, count)
1373
+ return redis.call('zrangebyscore', queue:prefix('recur'),
1374
+ 0, now, 'LIMIT', offset, count)
1375
+ end, ready = function(now, offset, count)
1376
+ end, add = function(when, jid)
1377
+ redis.call('zadd', queue:prefix('recur'), when, jid)
1378
+ end, remove = function(...)
1379
+ if #arg > 0 then
1380
+ return redis.call('zrem', queue:prefix('recur'), unpack(arg))
1381
+ end
1382
+ end, update = function(increment, jid)
1383
+ redis.call('zincrby', queue:prefix('recur'), increment, jid)
1384
+ end, score = function(jid)
1385
+ return redis.call('zscore', queue:prefix('recur'), jid)
1386
+ end, length = function()
1387
+ return redis.call('zcard', queue:prefix('recur'))
1388
+ end
1389
+ }
1390
+ return queue
1391
+ end
1392
+
1393
+ -- Return the prefix for this particular queue
1394
+ function QlessQueue:prefix(group)
1395
+ if group then
1396
+ return QlessQueue.ns..self.name..'-'..group
1397
+ else
1398
+ return QlessQueue.ns..self.name
1399
+ end
1400
+ end
1401
+
1402
+ -- Stats(now, date)
1403
+ -- ---------------------
1404
+ -- Return the current statistics for a given queue on a given date. The
1405
+ -- results are returned are a JSON blob:
1406
+ --
1407
+ --
1408
+ -- {
1409
+ -- # These are unimplemented as of yet
1410
+ -- 'failed': 3,
1411
+ -- 'retries': 5,
1412
+ -- 'wait' : {
1413
+ -- 'total' : ...,
1414
+ -- 'mean' : ...,
1415
+ -- 'variance' : ...,
1416
+ -- 'histogram': [
1417
+ -- ...
1418
+ -- ]
1419
+ -- }, 'run': {
1420
+ -- 'total' : ...,
1421
+ -- 'mean' : ...,
1422
+ -- 'variance' : ...,
1423
+ -- 'histogram': [
1424
+ -- ...
1425
+ -- ]
1426
+ -- }
1427
+ -- }
1428
+ --
1429
+ -- The histogram's data points are at the second resolution for the first
1430
+ -- minute, the minute resolution for the first hour, the 15-minute resolution
1431
+ -- for the first day, the hour resolution for the first 3 days, and then at
1432
+ -- the day resolution from there on out. The `histogram` key is a list of
1433
+ -- those values.
1434
+ function QlessQueue:stats(now, date)
1435
+ date = assert(tonumber(date),
1436
+ 'Stats(): Arg "date" missing or not a number: '.. (date or 'nil'))
1437
+
1438
+ -- The bin is midnight of the provided day
1439
+ -- 24 * 60 * 60 = 86400
1440
+ local bin = date - (date % 86400)
1441
+
1442
+ -- This a table of all the keys we want to use in order to produce a histogram
1443
+ local histokeys = {
1444
+ 's0','s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20','s21','s22','s23','s24','s25','s26','s27','s28','s29','s30','s31','s32','s33','s34','s35','s36','s37','s38','s39','s40','s41','s42','s43','s44','s45','s46','s47','s48','s49','s50','s51','s52','s53','s54','s55','s56','s57','s58','s59',
1445
+ 'm1','m2','m3','m4','m5','m6','m7','m8','m9','m10','m11','m12','m13','m14','m15','m16','m17','m18','m19','m20','m21','m22','m23','m24','m25','m26','m27','m28','m29','m30','m31','m32','m33','m34','m35','m36','m37','m38','m39','m40','m41','m42','m43','m44','m45','m46','m47','m48','m49','m50','m51','m52','m53','m54','m55','m56','m57','m58','m59',
1446
+ 'h1','h2','h3','h4','h5','h6','h7','h8','h9','h10','h11','h12','h13','h14','h15','h16','h17','h18','h19','h20','h21','h22','h23',
1447
+ 'd1','d2','d3','d4','d5','d6'
1448
+ }
1449
+
1450
+ local mkstats = function(name, bin, queue)
1451
+ -- The results we'll be sending back
1452
+ local results = {}
1453
+
1454
+ local key = 'ql:s:' .. name .. ':' .. bin .. ':' .. queue
1455
+ local count, mean, vk = unpack(redis.call('hmget', key, 'total', 'mean', 'vk'))
1456
+
1457
+ count = tonumber(count) or 0
1458
+ mean = tonumber(mean) or 0
1459
+ vk = tonumber(vk)
1460
+
1461
+ results.count = count or 0
1462
+ results.mean = mean or 0
1463
+ results.histogram = {}
1464
+
1465
+ if not count then
1466
+ results.std = 0
1467
+ else
1468
+ if count > 1 then
1469
+ results.std = math.sqrt(vk / (count - 1))
1470
+ else
1471
+ results.std = 0
1472
+ end
1473
+ end
1474
+
1475
+ local histogram = redis.call('hmget', key, unpack(histokeys))
1476
+ for i=1,#histokeys do
1477
+ table.insert(results.histogram, tonumber(histogram[i]) or 0)
1478
+ end
1479
+ return results
1480
+ end
1481
+
1482
+ local retries, failed, failures = unpack(redis.call('hmget', 'ql:s:stats:' .. bin .. ':' .. self.name, 'retries', 'failed', 'failures'))
1483
+ return {
1484
+ retries = tonumber(retries or 0),
1485
+ failed = tonumber(failed or 0),
1486
+ failures = tonumber(failures or 0),
1487
+ wait = mkstats('wait', bin, self.name),
1488
+ run = mkstats('run' , bin, self.name)
1489
+ }
1490
+ end
1491
+
1492
+ -- Peek
1493
+ -------
1494
+ -- Examine the next jobs that would be popped from the queue without actually
1495
+ -- popping them.
1496
+ function QlessQueue:peek(now, count)
1497
+ count = assert(tonumber(count),
1498
+ 'Peek(): Arg "count" missing or not a number: ' .. tostring(count))
1499
+
1500
+ -- These are the ids that we're going to return. We'll begin with any jobs
1501
+ -- that have lost their locks
1502
+ local jids = self.locks.expired(now, 0, count)
1503
+
1504
+ -- If we still need jobs in order to meet demand, then we should
1505
+ -- look for all the recurring jobs that need jobs run
1506
+ self:check_recurring(now, count - #jids)
1507
+
1508
+ -- Now we've checked __all__ the locks for this queue the could
1509
+ -- have expired, and are no more than the number requested. If
1510
+ -- we still need values in order to meet the demand, then we
1511
+ -- should check if any scheduled items, and if so, we should
1512
+ -- insert them to ensure correctness when pulling off the next
1513
+ -- unit of work.
1514
+ self:check_scheduled(now, count - #jids)
1515
+
1516
+ -- With these in place, we can expand this list of jids based on the work
1517
+ -- queue itself and the priorities therein
1518
+ table.extend(jids, self.work.peek(count - #jids))
1519
+
1520
+ return jids
1521
+ end
1522
+
1523
+ -- Return true if this queue is paused
1524
+ function QlessQueue:paused()
1525
+ return redis.call('sismember', 'ql:paused_queues', self.name) == 1
1526
+ end
1527
+
1528
+ -- Pause this queue
1529
+ --
1530
+ -- Note: long term, we have discussed adding a rate-limiting
1531
+ -- feature to qless-core, which would be more flexible and
1532
+ -- could be used for pausing (i.e. pause = set the rate to 0).
1533
+ -- For now, this is far simpler, but we should rewrite this
1534
+ -- in terms of the rate limiting feature if/when that is added.
1535
+ function QlessQueue.pause(now, ...)
1536
+ redis.call('sadd', 'ql:paused_queues', unpack(arg))
1537
+ end
1538
+
1539
+ -- Unpause this queue
1540
+ function QlessQueue.unpause(...)
1541
+ redis.call('srem', 'ql:paused_queues', unpack(arg))
1542
+ end
1543
+
1544
+ -- Checks for expired locks, scheduled and recurring jobs, returning any
1545
+ -- jobs that are ready to be processes
1546
+ function QlessQueue:pop(now, worker, count)
1547
+ assert(worker, 'Pop(): Arg "worker" missing')
1548
+ count = assert(tonumber(count),
1549
+ 'Pop(): Arg "count" missing or not a number: ' .. tostring(count))
1550
+
1551
+ -- We should find the heartbeat interval for this queue heartbeat
1552
+ local expires = now + tonumber(
1553
+ Qless.config.get(self.name .. '-heartbeat') or
1554
+ Qless.config.get('heartbeat', 60))
1555
+
1556
+ -- If this queue is paused, then return no jobs
1557
+ if self:paused() then
1558
+ return {}
1559
+ end
1560
+
1561
+ -- Make sure we this worker to the list of seen workers
1562
+ redis.call('zadd', 'ql:workers', now, worker)
1563
+
1564
+ -- Check our max concurrency, and limit the count
1565
+ local max_concurrency = tonumber(
1566
+ Qless.config.get(self.name .. '-max-concurrency', 0))
1567
+
1568
+ if max_concurrency > 0 then
1569
+ -- Allow at most max_concurrency - #running
1570
+ local allowed = math.max(0, max_concurrency - self.locks.running(now))
1571
+ count = math.min(allowed, count)
1572
+ if count == 0 then
1573
+ return {}
1574
+ end
1575
+ end
1576
+
1577
+ local jids = self:invalidate_locks(now, count)
1578
+ -- Now we've checked __all__ the locks for this queue the could
1579
+ -- have expired, and are no more than the number requested.
1580
+
1581
+ -- If we still need jobs in order to meet demand, then we should
1582
+ -- look for all the recurring jobs that need jobs run
1583
+ self:check_recurring(now, count - #jids)
1584
+
1585
+ -- If we still need values in order to meet the demand, then we
1586
+ -- should check if any scheduled items, and if so, we should
1587
+ -- insert them to ensure correctness when pulling off the next
1588
+ -- unit of work.
1589
+ self:check_scheduled(now, count - #jids)
1590
+
1591
+ -- With these in place, we can expand this list of jids based on the work
1592
+ -- queue itself and the priorities therein
1593
+ table.extend(jids, self.work.peek(count - #jids))
1594
+
1595
+ local state
1596
+ for index, jid in ipairs(jids) do
1597
+ local job = Qless.job(jid)
1598
+ state = unpack(job:data('state'))
1599
+ job:history(now, 'popped', {worker = worker})
1600
+
1601
+ -- Update the wait time statistics
1602
+ local time = tonumber(
1603
+ redis.call('hget', QlessJob.ns .. jid, 'time') or now)
1604
+ local waiting = now - time
1605
+ self:stat(now, 'wait', waiting)
1606
+ redis.call('hset', QlessJob.ns .. jid,
1607
+ 'time', string.format("%.20f", now))
1608
+
1609
+ -- Add this job to the list of jobs handled by this worker
1610
+ redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, jid)
1611
+
1612
+ -- Update the jobs data, and add its locks, and return the job
1613
+ job:update({
1614
+ worker = worker,
1615
+ expires = expires,
1616
+ state = 'running'
1617
+ })
1618
+
1619
+ self.locks.add(expires, jid)
1620
+
1621
+ local tracked = redis.call('zscore', 'ql:tracked', jid) ~= false
1622
+ if tracked then
1623
+ Qless.publish('popped', jid)
1624
+ end
1625
+ end
1626
+
1627
+ -- If we are returning any jobs, then we should remove them from the work
1628
+ -- queue
1629
+ self.work.remove(unpack(jids))
1630
+
1631
+ return jids
1632
+ end
1633
+
1634
+ -- Update the stats for this queue
1635
+ function QlessQueue:stat(now, stat, val)
1636
+ -- The bin is midnight of the provided day
1637
+ local bin = now - (now % 86400)
1638
+ local key = 'ql:s:' .. stat .. ':' .. bin .. ':' .. self.name
1639
+
1640
+ -- Get the current data
1641
+ local count, mean, vk = unpack(
1642
+ redis.call('hmget', key, 'total', 'mean', 'vk'))
1643
+
1644
+ -- If there isn't any data there presently, then we must initialize it
1645
+ count = count or 0
1646
+ if count == 0 then
1647
+ mean = val
1648
+ vk = 0
1649
+ count = 1
1650
+ else
1651
+ count = count + 1
1652
+ local oldmean = mean
1653
+ mean = mean + (val - mean) / count
1654
+ vk = vk + (val - mean) * (val - oldmean)
1655
+ end
1656
+
1657
+ -- Now, update the histogram
1658
+ -- - `s1`, `s2`, ..., -- second-resolution histogram counts
1659
+ -- - `m1`, `m2`, ..., -- minute-resolution
1660
+ -- - `h1`, `h2`, ..., -- hour-resolution
1661
+ -- - `d1`, `d2`, ..., -- day-resolution
1662
+ val = math.floor(val)
1663
+ if val < 60 then -- seconds
1664
+ redis.call('hincrby', key, 's' .. val, 1)
1665
+ elseif val < 3600 then -- minutes
1666
+ redis.call('hincrby', key, 'm' .. math.floor(val / 60), 1)
1667
+ elseif val < 86400 then -- hours
1668
+ redis.call('hincrby', key, 'h' .. math.floor(val / 3600), 1)
1669
+ else -- days
1670
+ redis.call('hincrby', key, 'd' .. math.floor(val / 86400), 1)
1671
+ end
1672
+ redis.call('hmset', key, 'total', count, 'mean', mean, 'vk', vk)
1673
+ end
1674
+
1675
+ -- Put(now, jid, klass, data, delay,
1676
+ -- [priority, p],
1677
+ -- [tags, t],
1678
+ -- [retries, r],
1679
+ -- [depends, '[...]'])
1680
+ -- -----------------------
1681
+ -- Insert a job into the queue with the given priority, tags, delay, klass and
1682
+ -- data.
1683
+ function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...)
1684
+ assert(jid , 'Put(): Arg "jid" missing')
1685
+ assert(klass, 'Put(): Arg "klass" missing')
1686
+ local data = assert(cjson.decode(raw_data),
1687
+ 'Put(): Arg "data" missing or not JSON: ' .. tostring(raw_data))
1688
+ delay = assert(tonumber(delay),
1689
+ 'Put(): Arg "delay" not a number: ' .. tostring(delay))
1690
+
1691
+ -- Read in all the optional parameters. All of these must come in pairs, so
1692
+ -- if we have an odd number of extra args, raise an error
1693
+ if #arg % 2 == 1 then
1694
+ error('Odd number of additional args: ' .. tostring(arg))
1695
+ end
1696
+ local options = {}
1697
+ for i = 1, #arg, 2 do options[arg[i]] = arg[i + 1] end
1698
+
1699
+ -- Let's see what the old priority and tags were
1700
+ local job = Qless.job(jid)
1701
+ local priority, tags, oldqueue, state, failure, retries, oldworker =
1702
+ unpack(redis.call('hmget', QlessJob.ns .. jid, 'priority', 'tags',
1703
+ 'queue', 'state', 'failure', 'retries', 'worker'))
1704
+
1705
+ -- If there are old tags, then we should remove the tags this job has
1706
+ if tags then
1707
+ Qless.tag(now, 'remove', jid, unpack(cjson.decode(tags)))
1708
+ end
1709
+
1710
+ -- Sanity check on optional args
1711
+ retries = assert(tonumber(options['retries'] or retries or 5) ,
1712
+ 'Put(): Arg "retries" not a number: ' .. tostring(options['retries']))
1713
+ tags = assert(cjson.decode(options['tags'] or tags or '[]' ),
1714
+ 'Put(): Arg "tags" not JSON' .. tostring(options['tags']))
1715
+ priority = assert(tonumber(options['priority'] or priority or 0),
1716
+ 'Put(): Arg "priority" not a number' .. tostring(options['priority']))
1717
+ local depends = assert(cjson.decode(options['depends'] or '[]') ,
1718
+ 'Put(): Arg "depends" not JSON: ' .. tostring(options['depends']))
1719
+
1720
+ -- If the job has old dependencies, determine which dependencies are
1721
+ -- in the new dependencies but not in the old ones, and which are in the
1722
+ -- old ones but not in the new
1723
+ if #depends > 0 then
1724
+ -- This makes it easier to check if it's in the new list
1725
+ local new = {}
1726
+ for _, d in ipairs(depends) do new[d] = 1 end
1727
+
1728
+ -- Now find what's in the original, but not the new
1729
+ local original = redis.call(
1730
+ 'smembers', QlessJob.ns .. jid .. '-dependencies')
1731
+ for _, dep in pairs(original) do
1732
+ if new[dep] == nil then
1733
+ -- Remove k as a dependency
1734
+ redis.call('srem', QlessJob.ns .. dep .. '-dependents' , jid)
1735
+ redis.call('srem', QlessJob.ns .. jid .. '-dependencies', dep)
1736
+ end
1737
+ end
1738
+ end
1739
+
1740
+ -- Send out a log message
1741
+ Qless.publish('log', cjson.encode({
1742
+ jid = jid,
1743
+ event = 'put',
1744
+ queue = self.name
1745
+ }))
1746
+
1747
+ -- Update the history to include this new change
1748
+ job:history(now, 'put', {q = self.name})
1749
+
1750
+ -- If this item was previously in another queue, then we should remove it from there
1751
+ if oldqueue then
1752
+ local queue_obj = Qless.queue(oldqueue)
1753
+ queue_obj.work.remove(jid)
1754
+ queue_obj.locks.remove(jid)
1755
+ queue_obj.depends.remove(jid)
1756
+ queue_obj.scheduled.remove(jid)
1757
+ end
1758
+
1759
+ -- If this had previously been given out to a worker, make sure to remove it
1760
+ -- from that worker's jobs
1761
+ if oldworker and oldworker ~= '' then
1762
+ redis.call('zrem', 'ql:w:' .. oldworker .. ':jobs', jid)
1763
+ -- If it's a different worker that's putting this job, send a notification
1764
+ -- to the last owner of the job
1765
+ if oldworker ~= worker then
1766
+ -- We need to inform whatever worker had that job
1767
+ local encoded = cjson.encode({
1768
+ jid = jid,
1769
+ event = 'lock_lost',
1770
+ worker = oldworker
1771
+ })
1772
+ Qless.publish('w:' .. oldworker, encoded)
1773
+ Qless.publish('log', encoded)
1774
+ end
1775
+ end
1776
+
1777
+ -- If the job was previously in the 'completed' state, then we should
1778
+ -- remove it from being enqueued for destructination
1779
+ if state == 'complete' then
1780
+ redis.call('zrem', 'ql:completed', jid)
1781
+ end
1782
+
1783
+ -- Add this job to the list of jobs tagged with whatever tags were supplied
1784
+ for i, tag in ipairs(tags) do
1785
+ redis.call('zadd', 'ql:t:' .. tag, now, jid)
1786
+ redis.call('zincrby', 'ql:tags', 1, tag)
1787
+ end
1788
+
1789
+ -- If we're in the failed state, remove all of our data
1790
+ if state == 'failed' then
1791
+ failure = cjson.decode(failure)
1792
+ -- We need to make this remove it from the failed queues
1793
+ redis.call('lrem', 'ql:f:' .. failure.group, 0, jid)
1794
+ if redis.call('llen', 'ql:f:' .. failure.group) == 0 then
1795
+ redis.call('srem', 'ql:failures', failure.group)
1796
+ end
1797
+ -- The bin is midnight of the provided day
1798
+ -- 24 * 60 * 60 = 86400
1799
+ local bin = failure.when - (failure.when % 86400)
1800
+ -- We also need to decrement the stats about the queue on
1801
+ -- the day that this failure actually happened.
1802
+ redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. self.name, 'failed' , -1)
1803
+ end
1804
+
1805
+ -- First, let's save its data
1806
+ redis.call('hmset', QlessJob.ns .. jid,
1807
+ 'jid' , jid,
1808
+ 'klass' , klass,
1809
+ 'data' , raw_data,
1810
+ 'priority' , priority,
1811
+ 'tags' , cjson.encode(tags),
1812
+ 'state' , ((delay > 0) and 'scheduled') or 'waiting',
1813
+ 'worker' , '',
1814
+ 'expires' , 0,
1815
+ 'queue' , self.name,
1816
+ 'retries' , retries,
1817
+ 'remaining', retries,
1818
+ 'time' , string.format("%.20f", now))
1819
+
1820
+ -- These are the jids we legitimately have to wait on
1821
+ for i, j in ipairs(depends) do
1822
+ -- Make sure it's something other than 'nil' or complete.
1823
+ local state = redis.call('hget', QlessJob.ns .. j, 'state')
1824
+ if (state and state ~= 'complete') then
1825
+ redis.call('sadd', QlessJob.ns .. j .. '-dependents' , jid)
1826
+ redis.call('sadd', QlessJob.ns .. jid .. '-dependencies', j)
1827
+ end
1828
+ end
1829
+
1830
+ -- Now, if a delay was provided, and if it's in the future,
1831
+ -- then we'll have to schedule it. Otherwise, we're just
1832
+ -- going to add it to the work queue.
1833
+ if delay > 0 then
1834
+ if redis.call('scard', QlessJob.ns .. jid .. '-dependencies') > 0 then
1835
+ -- We've already put it in 'depends'. Now, we must just save the data
1836
+ -- for when it's scheduled
1837
+ self.depends.add(now, jid)
1838
+ redis.call('hmset', QlessJob.ns .. jid,
1839
+ 'state', 'depends',
1840
+ 'scheduled', now + delay)
1841
+ else
1842
+ self.scheduled.add(now + delay, jid)
1843
+ end
1844
+ else
1845
+ if redis.call('scard', QlessJob.ns .. jid .. '-dependencies') > 0 then
1846
+ self.depends.add(now, jid)
1847
+ redis.call('hset', QlessJob.ns .. jid, 'state', 'depends')
1848
+ else
1849
+ self.work.add(now, priority, jid)
1850
+ end
1851
+ end
1852
+
1853
+ -- Lastly, we're going to make sure that this item is in the
1854
+ -- set of known queues. We should keep this sorted by the
1855
+ -- order in which we saw each of these queues
1856
+ if redis.call('zscore', 'ql:queues', self.name) == false then
1857
+ redis.call('zadd', 'ql:queues', now, self.name)
1858
+ end
1859
+
1860
+ if redis.call('zscore', 'ql:tracked', jid) ~= false then
1861
+ Qless.publish('put', jid)
1862
+ end
1863
+
1864
+ return jid
1865
+ end
1866
+
1867
+ -- Move `count` jobs out of the failed state and into this queue
1868
+ function QlessQueue:unfail(now, group, count)
1869
+ assert(group, 'Unfail(): Arg "group" missing')
1870
+ count = assert(tonumber(count or 25),
1871
+ 'Unfail(): Arg "count" not a number: ' .. tostring(count))
1872
+
1873
+ -- Get up to that many jobs, and we'll put them in the appropriate queue
1874
+ local jids = redis.call('lrange', 'ql:f:' .. group, -count, -1)
1875
+
1876
+ -- And now set each job's state, and put it into the appropriate queue
1877
+ local toinsert = {}
1878
+ for index, jid in ipairs(jids) do
1879
+ local job = Qless.job(jid)
1880
+ local data = job:data()
1881
+ job:history(now, 'put', {q = self.name})
1882
+ redis.call('hmset', QlessJob.ns .. data.jid,
1883
+ 'state' , 'waiting',
1884
+ 'worker' , '',
1885
+ 'expires' , 0,
1886
+ 'queue' , self.name,
1887
+ 'remaining', data.retries or 5)
1888
+ self.work.add(now, data.priority, data.jid)
1889
+ end
1890
+
1891
+ -- Remove these jobs from the failed state
1892
+ redis.call('ltrim', 'ql:f:' .. group, 0, -count - 1)
1893
+ if (redis.call('llen', 'ql:f:' .. group) == 0) then
1894
+ redis.call('srem', 'ql:failures', group)
1895
+ end
1896
+
1897
+ return #jids
1898
+ end
1899
+
1900
+ -- Recur a job of type klass in this queue
1901
+ function QlessQueue:recur(now, jid, klass, raw_data, spec, ...)
1902
+ assert(jid , 'RecurringJob On(): Arg "jid" missing')
1903
+ assert(klass, 'RecurringJob On(): Arg "klass" missing')
1904
+ assert(spec , 'RecurringJob On(): Arg "spec" missing')
1905
+ local data = assert(cjson.decode(raw_data),
1906
+ 'RecurringJob On(): Arg "data" not JSON: ' .. tostring(raw_data))
1907
+
1908
+ -- At some point in the future, we may have different types of recurring
1909
+ -- jobs, but for the time being, we only have 'interval'-type jobs
1910
+ if spec == 'interval' then
1911
+ local interval = assert(tonumber(arg[1]),
1912
+ 'Recur(): Arg "interval" not a number: ' .. tostring(arg[1]))
1913
+ local offset = assert(tonumber(arg[2]),
1914
+ 'Recur(): Arg "offset" not a number: ' .. tostring(arg[2]))
1915
+ if interval <= 0 then
1916
+ error('Recur(): Arg "interval" must be greater than 0')
1917
+ end
1918
+
1919
+ -- Read in all the optional parameters. All of these must come in
1920
+ -- pairs, so if we have an odd number of extra args, raise an error
1921
+ if #arg % 2 == 1 then
1922
+ error('Odd number of additional args: ' .. tostring(arg))
1923
+ end
1924
+
1925
+ -- Read in all the optional parameters
1926
+ local options = {}
1927
+ for i = 3, #arg, 2 do options[arg[i]] = arg[i + 1] end
1928
+ options.tags = assert(cjson.decode(options.tags or '{}'),
1929
+ 'Recur(): Arg "tags" must be JSON string array: ' .. tostring(
1930
+ options.tags))
1931
+ options.priority = assert(tonumber(options.priority or 0),
1932
+ 'Recur(): Arg "priority" not a number: ' .. tostring(
1933
+ options.priority))
1934
+ options.retries = assert(tonumber(options.retries or 0),
1935
+ 'Recur(): Arg "retries" not a number: ' .. tostring(
1936
+ options.retries))
1937
+ options.backlog = assert(tonumber(options.backlog or 0),
1938
+ 'Recur(): Arg "backlog" not a number: ' .. tostring(
1939
+ options.backlog))
1940
+
1941
+ local count, old_queue = unpack(redis.call('hmget', 'ql:r:' .. jid, 'count', 'queue'))
1942
+ count = count or 0
1943
+
1944
+ -- If it has previously been in another queue, then we should remove
1945
+ -- some information about it
1946
+ if old_queue then
1947
+ Qless.queue(old_queue).recurring.remove(jid)
1948
+ end
1949
+
1950
+ -- Do some insertions
1951
+ redis.call('hmset', 'ql:r:' .. jid,
1952
+ 'jid' , jid,
1953
+ 'klass' , klass,
1954
+ 'data' , raw_data,
1955
+ 'priority', options.priority,
1956
+ 'tags' , cjson.encode(options.tags or {}),
1957
+ 'state' , 'recur',
1958
+ 'queue' , self.name,
1959
+ 'type' , 'interval',
1960
+ -- How many jobs we've spawned from this
1961
+ 'count' , count,
1962
+ 'interval', interval,
1963
+ 'retries' , options.retries,
1964
+ 'backlog' , options.backlog)
1965
+ -- Now, we should schedule the next run of the job
1966
+ self.recurring.add(now + offset, jid)
1967
+
1968
+ -- Lastly, we're going to make sure that this item is in the
1969
+ -- set of known queues. We should keep this sorted by the
1970
+ -- order in which we saw each of these queues
1971
+ if redis.call('zscore', 'ql:queues', self.name) == false then
1972
+ redis.call('zadd', 'ql:queues', now, self.name)
1973
+ end
1974
+
1975
+ return jid
1976
+ else
1977
+ error('Recur(): schedule type "' .. tostring(spec) .. '" unknown')
1978
+ end
1979
+ end
1980
+
1981
+ -- Return the length of the queue
1982
+ function QlessQueue:length()
1983
+ return self.locks.length() + self.work.length() + self.scheduled.length()
1984
+ end
1985
+
1986
+ -------------------------------------------------------------------------------
1987
+ -- Housekeeping methods
1988
+ -------------------------------------------------------------------------------
1989
+ -- Instantiate any recurring jobs that are ready
1990
+ function QlessQueue:check_recurring(now, count)
1991
+ -- This is how many jobs we've moved so far
1992
+ local moved = 0
1993
+ -- These are the recurring jobs that need work
1994
+ local r = self.recurring.peek(now, 0, count)
1995
+ for index, jid in ipairs(r) do
1996
+ -- For each of the jids that need jobs scheduled, first
1997
+ -- get the last time each of them was run, and then increment
1998
+ -- it by its interval. While this time is less than now,
1999
+ -- we need to keep putting jobs on the queue
2000
+ local klass, data, priority, tags, retries, interval, backlog = unpack(
2001
+ redis.call('hmget', 'ql:r:' .. jid, 'klass', 'data', 'priority',
2002
+ 'tags', 'retries', 'interval', 'backlog'))
2003
+ local _tags = cjson.decode(tags)
2004
+ local score = math.floor(tonumber(self.recurring.score(jid)))
2005
+ interval = tonumber(interval)
2006
+
2007
+ -- If the backlog is set for this job, then see if it's been a long
2008
+ -- time since the last pop
2009
+ backlog = tonumber(backlog or 0)
2010
+ if backlog ~= 0 then
2011
+ -- Check how many jobs we could concievably generate
2012
+ local num = ((now - score) / interval)
2013
+ if num > backlog then
2014
+ -- Update the score
2015
+ score = score + (
2016
+ math.ceil(num - backlog) * interval
2017
+ )
2018
+ end
2019
+ end
2020
+
2021
+ -- We're saving this value so that in the history, we can accurately
2022
+ -- reflect when the job would normally have been scheduled
2023
+ while (score <= now) and (moved < count) do
2024
+ local count = redis.call('hincrby', 'ql:r:' .. jid, 'count', 1)
2025
+ moved = moved + 1
2026
+
2027
+ local child_jid = jid .. '-' .. count
2028
+
2029
+ -- Add this job to the list of jobs tagged with whatever tags were
2030
+ -- supplied
2031
+ for i, tag in ipairs(_tags) do
2032
+ redis.call('zadd', 'ql:t:' .. tag, now, child_jid)
2033
+ redis.call('zincrby', 'ql:tags', 1, tag)
2034
+ end
2035
+
2036
+ -- First, let's save its data
2037
+ redis.call('hmset', QlessJob.ns .. child_jid,
2038
+ 'jid' , child_jid,
2039
+ 'klass' , klass,
2040
+ 'data' , data,
2041
+ 'priority' , priority,
2042
+ 'tags' , tags,
2043
+ 'state' , 'waiting',
2044
+ 'worker' , '',
2045
+ 'expires' , 0,
2046
+ 'queue' , self.name,
2047
+ 'retries' , retries,
2048
+ 'remaining' , retries,
2049
+ 'time' , string.format("%.20f", score),
2050
+ 'spawned_from_jid', jid)
2051
+ Qless.job(child_jid):history(score, 'put', {q = self.name})
2052
+
2053
+ -- Now, if a delay was provided, and if it's in the future,
2054
+ -- then we'll have to schedule it. Otherwise, we're just
2055
+ -- going to add it to the work queue.
2056
+ self.work.add(score, priority, child_jid)
2057
+
2058
+ score = score + interval
2059
+ self.recurring.add(score, jid)
2060
+ end
2061
+ end
2062
+ end
2063
+
2064
+ -- Check for any jobs that have been scheduled, and shovel them onto
2065
+ -- the work queue. Returns nothing, but afterwards, up to `count`
2066
+ -- scheduled jobs will be moved into the work queue
2067
+ function QlessQueue:check_scheduled(now, count)
2068
+ -- zadd is a list of arguments that we'll be able to use to
2069
+ -- insert into the work queue
2070
+ local scheduled = self.scheduled.ready(now, 0, count)
2071
+ for index, jid in ipairs(scheduled) do
2072
+ -- With these in hand, we'll have to go out and find the
2073
+ -- priorities of these jobs, and then we'll insert them
2074
+ -- into the work queue and then when that's complete, we'll
2075
+ -- remove them from the scheduled queue
2076
+ local priority = tonumber(
2077
+ redis.call('hget', QlessJob.ns .. jid, 'priority') or 0)
2078
+ self.work.add(now, priority, jid)
2079
+ self.scheduled.remove(jid)
2080
+
2081
+ -- We should also update them to have the state 'waiting'
2082
+ -- instead of 'scheduled'
2083
+ redis.call('hset', QlessJob.ns .. jid, 'state', 'waiting')
2084
+ end
2085
+ end
2086
+
2087
+ -- Check for and invalidate any locks that have been lost. Returns the
2088
+ -- list of jids that have been invalidated
2089
+ function QlessQueue:invalidate_locks(now, count)
2090
+ local jids = {}
2091
+ -- Iterate through all the expired locks and add them to the list
2092
+ -- of keys that we'll return
2093
+ for index, jid in ipairs(self.locks.expired(now, 0, count)) do
2094
+ -- Remove this job from the jobs that the worker that was running it
2095
+ -- has
2096
+ local worker, failure = unpack(
2097
+ redis.call('hmget', QlessJob.ns .. jid, 'worker', 'failure'))
2098
+ redis.call('zrem', 'ql:w:' .. worker .. ':jobs', jid)
2099
+
2100
+ -- We'll provide a grace period after jobs time out for them to give
2101
+ -- some indication of the failure mode. After that time, however, we'll
2102
+ -- consider the worker dust in the wind
2103
+ local grace_period = tonumber(Qless.config.get('grace-period'))
2104
+
2105
+ -- Whether or not we've already sent a coutesy message
2106
+ local courtesy_sent = tonumber(
2107
+ redis.call('hget', QlessJob.ns .. jid, 'grace') or 0)
2108
+
2109
+ -- If the remaining value is an odd multiple of 0.5, then we'll assume
2110
+ -- that we're just sending the message. Otherwise, it's time to
2111
+ -- actually hand out the work to another worker
2112
+ local send_message = (courtesy_sent ~= 1)
2113
+ local invalidate = not send_message
2114
+
2115
+ -- If the grace period has been disabled, then we'll do both.
2116
+ if grace_period <= 0 then
2117
+ send_message = true
2118
+ invalidate = true
2119
+ end
2120
+
2121
+ if send_message then
2122
+ -- This is where we supply a courtesy message and give the worker
2123
+ -- time to provide a failure message
2124
+ if redis.call('zscore', 'ql:tracked', jid) ~= false then
2125
+ Qless.publish('stalled', jid)
2126
+ end
2127
+ Qless.job(jid):history(now, 'timed-out')
2128
+ redis.call('hset', QlessJob.ns .. jid, 'grace', 1)
2129
+
2130
+ -- Send a message to let the worker know that its lost its lock on
2131
+ -- the job
2132
+ local encoded = cjson.encode({
2133
+ jid = jid,
2134
+ event = 'lock_lost',
2135
+ worker = worker
2136
+ })
2137
+ Qless.publish('w:' .. worker, encoded)
2138
+ Qless.publish('log', encoded)
2139
+ self.locks.add(now + grace_period, jid)
2140
+
2141
+ -- If we got any expired locks, then we should increment the
2142
+ -- number of retries for this stage for this bin. The bin is
2143
+ -- midnight of the provided day
2144
+ local bin = now - (now % 86400)
2145
+ redis.call('hincrby',
2146
+ 'ql:s:stats:' .. bin .. ':' .. self.name, 'retries', 1)
2147
+ end
2148
+
2149
+ if invalidate then
2150
+ -- Unset the grace period attribute so that next time we'll send
2151
+ -- the grace period
2152
+ redis.call('hdel', QlessJob.ns .. jid, 'grace', 0)
2153
+
2154
+ -- See how many remaining retries the job has
2155
+ local remaining = tonumber(redis.call(
2156
+ 'hincrby', QlessJob.ns .. jid, 'remaining', -1))
2157
+
2158
+ -- This is where we actually have to time out the work
2159
+ if remaining < 0 then
2160
+ -- Now remove the instance from the schedule, and work queues
2161
+ -- for the queue it's in
2162
+ self.work.remove(jid)
2163
+ self.locks.remove(jid)
2164
+ self.scheduled.remove(jid)
2165
+
2166
+ local group = 'failed-retries-' .. Qless.job(jid):data()['queue']
2167
+ local job = Qless.job(jid)
2168
+ job:history(now, 'failed', {group = group})
2169
+ redis.call('hmset', QlessJob.ns .. jid, 'state', 'failed',
2170
+ 'worker', '',
2171
+ 'expires', '')
2172
+ -- If the failure has not already been set, then set it
2173
+ redis.call('hset', QlessJob.ns .. jid,
2174
+ 'failure', cjson.encode({
2175
+ ['group'] = group,
2176
+ ['message'] =
2177
+ 'Job exhausted retries in queue "' .. self.name .. '"',
2178
+ ['when'] = now,
2179
+ ['worker'] = unpack(job:data('worker'))
2180
+ }))
2181
+
2182
+ -- Add this type of failure to the list of failures
2183
+ redis.call('sadd', 'ql:failures', group)
2184
+ -- And add this particular instance to the failed types
2185
+ redis.call('lpush', 'ql:f:' .. group, jid)
2186
+
2187
+ if redis.call('zscore', 'ql:tracked', jid) ~= false then
2188
+ Qless.publish('failed', jid)
2189
+ end
2190
+ Qless.publish('log', cjson.encode({
2191
+ jid = jid,
2192
+ event = 'failed',
2193
+ group = group,
2194
+ worker = worker,
2195
+ message =
2196
+ 'Job exhausted retries in queue "' .. self.name .. '"'
2197
+ }))
2198
+
2199
+ -- Increment the count of the failed jobs
2200
+ local bin = now - (now % 86400)
2201
+ redis.call('hincrby',
2202
+ 'ql:s:stats:' .. bin .. ':' .. self.name, 'failures', 1)
2203
+ redis.call('hincrby',
2204
+ 'ql:s:stats:' .. bin .. ':' .. self.name, 'failed' , 1)
2205
+ else
2206
+ table.insert(jids, jid)
2207
+ end
2208
+ end
2209
+ end
2210
+
2211
+ return jids
2212
+ end
2213
+
2214
+ -- Forget the provided queues. As in, remove them from the list of known queues
2215
+ function QlessQueue.deregister(...)
2216
+ redis.call('zrem', Qless.ns .. 'queues', unpack(arg))
2217
+ end
2218
+
2219
+ -- Return information about a particular queue, or all queues
2220
+ -- [
2221
+ -- {
2222
+ -- 'name': 'testing',
2223
+ -- 'stalled': 2,
2224
+ -- 'waiting': 5,
2225
+ -- 'running': 5,
2226
+ -- 'scheduled': 10,
2227
+ -- 'depends': 5,
2228
+ -- 'recurring': 0
2229
+ -- }, {
2230
+ -- ...
2231
+ -- }
2232
+ -- ]
2233
+ function QlessQueue.counts(now, name)
2234
+ if name then
2235
+ local queue = Qless.queue(name)
2236
+ local stalled = queue.locks.length(now)
2237
+ -- Check for any scheduled jobs that need to be moved
2238
+ queue:check_scheduled(now, queue.scheduled.length())
2239
+ return {
2240
+ name = name,
2241
+ waiting = queue.work.length(),
2242
+ stalled = stalled,
2243
+ running = queue.locks.length() - stalled,
2244
+ scheduled = queue.scheduled.length(),
2245
+ depends = queue.depends.length(),
2246
+ recurring = queue.recurring.length(),
2247
+ paused = queue:paused()
2248
+ }
2249
+ else
2250
+ local queues = redis.call('zrange', 'ql:queues', 0, -1)
2251
+ local response = {}
2252
+ for index, qname in ipairs(queues) do
2253
+ table.insert(response, QlessQueue.counts(now, qname))
2254
+ end
2255
+ return response
2256
+ end
2257
+ end
2258
+ -- Get all the attributes of this particular job
2259
+ function QlessRecurringJob:data()
2260
+ local job = redis.call(
2261
+ 'hmget', 'ql:r:' .. self.jid, 'jid', 'klass', 'state', 'queue',
2262
+ 'priority', 'interval', 'retries', 'count', 'data', 'tags', 'backlog')
2263
+
2264
+ if not job[1] then
2265
+ return nil
2266
+ end
2267
+
2268
+ return {
2269
+ jid = job[1],
2270
+ klass = job[2],
2271
+ state = job[3],
2272
+ queue = job[4],
2273
+ priority = tonumber(job[5]),
2274
+ interval = tonumber(job[6]),
2275
+ retries = tonumber(job[7]),
2276
+ count = tonumber(job[8]),
2277
+ data = job[9],
2278
+ tags = cjson.decode(job[10]),
2279
+ backlog = tonumber(job[11] or 0)
2280
+ }
2281
+ end
2282
+
2283
+ -- Update the recurring job data. Key can be:
2284
+ -- - priority
2285
+ -- - interval
2286
+ -- - retries
2287
+ -- - data
2288
+ -- - klass
2289
+ -- - queue
2290
+ -- - backlog
2291
+ function QlessRecurringJob:update(now, ...)
2292
+ local options = {}
2293
+ -- Make sure that the job exists
2294
+ if redis.call('exists', 'ql:r:' .. self.jid) ~= 0 then
2295
+ for i = 1, #arg, 2 do
2296
+ local key = arg[i]
2297
+ local value = arg[i+1]
2298
+ assert(value, 'No value provided for ' .. tostring(key))
2299
+ if key == 'priority' or key == 'interval' or key == 'retries' then
2300
+ value = assert(tonumber(value), 'Recur(): Arg "' .. key .. '" must be a number: ' .. tostring(value))
2301
+ -- If the command is 'interval', then we need to update the
2302
+ -- time when it should next be scheduled
2303
+ if key == 'interval' then
2304
+ local queue, interval = unpack(redis.call('hmget', 'ql:r:' .. self.jid, 'queue', 'interval'))
2305
+ Qless.queue(queue).recurring.update(
2306
+ value - tonumber(interval), self.jid)
2307
+ end
2308
+ redis.call('hset', 'ql:r:' .. self.jid, key, value)
2309
+ elseif key == 'data' then
2310
+ assert(cjson.decode(value), 'Recur(): Arg "data" is not JSON-encoded: ' .. tostring(value))
2311
+ redis.call('hset', 'ql:r:' .. self.jid, 'data', value)
2312
+ elseif key == 'klass' then
2313
+ redis.call('hset', 'ql:r:' .. self.jid, 'klass', value)
2314
+ elseif key == 'queue' then
2315
+ local queue_obj = Qless.queue(
2316
+ redis.call('hget', 'ql:r:' .. self.jid, 'queue'))
2317
+ local score = queue_obj.recurring.score(self.jid)
2318
+ queue_obj.recurring.remove(self.jid)
2319
+ Qless.queue(value).recurring.add(score, self.jid)
2320
+ redis.call('hset', 'ql:r:' .. self.jid, 'queue', value)
2321
+ -- If we don't already know about the queue, learn about it
2322
+ if redis.call('zscore', 'ql:queues', value) == false then
2323
+ redis.call('zadd', 'ql:queues', now, value)
2324
+ end
2325
+ elseif key == 'backlog' then
2326
+ value = assert(tonumber(value),
2327
+ 'Recur(): Arg "backlog" not a number: ' .. tostring(value))
2328
+ redis.call('hset', 'ql:r:' .. self.jid, 'backlog', value)
2329
+ else
2330
+ error('Recur(): Unrecognized option "' .. key .. '"')
2331
+ end
2332
+ end
2333
+ return true
2334
+ else
2335
+ error('Recur(): No recurring job ' .. self.jid)
2336
+ end
2337
+ end
2338
+
2339
+ -- Tags this recurring job with the provided tags
2340
+ function QlessRecurringJob:tag(...)
2341
+ local tags = redis.call('hget', 'ql:r:' .. self.jid, 'tags')
2342
+ -- If the job has been canceled / deleted, then return false
2343
+ if tags then
2344
+ -- Decode the json blob, convert to dictionary
2345
+ tags = cjson.decode(tags)
2346
+ local _tags = {}
2347
+ for i,v in ipairs(tags) do _tags[v] = true end
2348
+
2349
+ -- Otherwise, add the job to the sorted set with that tags
2350
+ for i=1,#arg do if _tags[arg[i]] == nil then table.insert(tags, arg[i]) end end
2351
+
2352
+ tags = cjson.encode(tags)
2353
+ redis.call('hset', 'ql:r:' .. self.jid, 'tags', tags)
2354
+ return tags
2355
+ else
2356
+ error('Tag(): Job ' .. self.jid .. ' does not exist')
2357
+ end
2358
+ end
2359
+
2360
+ -- Removes a tag from the recurring job
2361
+ function QlessRecurringJob:untag(...)
2362
+ -- Get the existing tags
2363
+ local tags = redis.call('hget', 'ql:r:' .. self.jid, 'tags')
2364
+ -- If the job has been canceled / deleted, then return false
2365
+ if tags then
2366
+ -- Decode the json blob, convert to dictionary
2367
+ tags = cjson.decode(tags)
2368
+ local _tags = {}
2369
+ -- Make a hash
2370
+ for i,v in ipairs(tags) do _tags[v] = true end
2371
+ -- Delete these from the hash
2372
+ for i = 1,#arg do _tags[arg[i]] = nil end
2373
+ -- Back into a list
2374
+ local results = {}
2375
+ for i, tag in ipairs(tags) do if _tags[tag] then table.insert(results, tag) end end
2376
+ -- json encode them, set, and return
2377
+ tags = cjson.encode(results)
2378
+ redis.call('hset', 'ql:r:' .. self.jid, 'tags', tags)
2379
+ return tags
2380
+ else
2381
+ error('Untag(): Job ' .. self.jid .. ' does not exist')
2382
+ end
2383
+ end
2384
+
2385
+ -- Stop further occurrences of this job
2386
+ function QlessRecurringJob:unrecur()
2387
+ -- First, find out what queue it was attached to
2388
+ local queue = redis.call('hget', 'ql:r:' .. self.jid, 'queue')
2389
+ if queue then
2390
+ -- Now, delete it from the queue it was attached to, and delete the
2391
+ -- thing itself
2392
+ Qless.queue(queue).recurring.remove(self.jid)
2393
+ redis.call('del', 'ql:r:' .. self.jid)
2394
+ return true
2395
+ else
2396
+ return true
2397
+ end
2398
+ end
2399
+ -- Deregisters these workers from the list of known workers
2400
+ function QlessWorker.deregister(...)
2401
+ redis.call('zrem', 'ql:workers', unpack(arg))
2402
+ end
2403
+
2404
+ -- Provide data about all the workers, or if a specific worker is provided,
2405
+ -- then which jobs that worker is responsible for. If no worker is provided,
2406
+ -- expect a response of the form:
2407
+ --
2408
+ -- [
2409
+ -- # This is sorted by the recency of activity from that worker
2410
+ -- {
2411
+ -- 'name' : 'hostname1-pid1',
2412
+ -- 'jobs' : 20,
2413
+ -- 'stalled': 0
2414
+ -- }, {
2415
+ -- ...
2416
+ -- }
2417
+ -- ]
2418
+ --
2419
+ -- If a worker id is provided, then expect a response of the form:
2420
+ --
2421
+ -- {
2422
+ -- 'jobs': [
2423
+ -- jid1,
2424
+ -- jid2,
2425
+ -- ...
2426
+ -- ], 'stalled': [
2427
+ -- jid1,
2428
+ -- ...
2429
+ -- ]
2430
+ -- }
2431
+ --
2432
+ function QlessWorker.counts(now, worker)
2433
+ -- Clean up all the workers' job lists if they're too old. This is
2434
+ -- determined by the `max-worker-age` configuration, defaulting to the
2435
+ -- last day. Seems like a 'reasonable' default
2436
+ local interval = tonumber(Qless.config.get('max-worker-age', 86400))
2437
+
2438
+ local workers = redis.call('zrangebyscore', 'ql:workers', 0, now - interval)
2439
+ for index, worker in ipairs(workers) do
2440
+ redis.call('del', 'ql:w:' .. worker .. ':jobs')
2441
+ end
2442
+
2443
+ -- And now remove them from the list of known workers
2444
+ redis.call('zremrangebyscore', 'ql:workers', 0, now - interval)
2445
+
2446
+ if worker then
2447
+ return {
2448
+ jobs = redis.call('zrevrangebyscore', 'ql:w:' .. worker .. ':jobs', now + 8640000, now),
2449
+ stalled = redis.call('zrevrangebyscore', 'ql:w:' .. worker .. ':jobs', now, 0)
2450
+ }
2451
+ else
2452
+ local response = {}
2453
+ local workers = redis.call('zrevrange', 'ql:workers', 0, -1)
2454
+ for index, worker in ipairs(workers) do
2455
+ table.insert(response, {
2456
+ name = worker,
2457
+ jobs = redis.call('zcount', 'ql:w:' .. worker .. ':jobs', now, now + 8640000),
2458
+ stalled = redis.call('zcount', 'ql:w:' .. worker .. ':jobs', 0, now)
2459
+ })
2460
+ end
2461
+ return response
2462
+ end
2463
+ end