qless 0.9.2 → 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/README.md +42 -3
- data/Rakefile +26 -2
- data/{bin → exe}/qless-web +3 -2
- data/lib/qless.rb +55 -28
- data/lib/qless/config.rb +1 -3
- data/lib/qless/job.rb +127 -22
- data/lib/qless/job_reservers/round_robin.rb +3 -1
- data/lib/qless/job_reservers/shuffled_round_robin.rb +14 -0
- data/lib/qless/lua_script.rb +42 -0
- data/lib/qless/middleware/redis_reconnect.rb +24 -0
- data/lib/qless/middleware/retry_exceptions.rb +43 -0
- data/lib/qless/middleware/sentry.rb +70 -0
- data/lib/qless/qless-core/cancel.lua +89 -59
- data/lib/qless/qless-core/complete.lua +16 -1
- data/lib/qless/qless-core/config.lua +12 -0
- data/lib/qless/qless-core/deregister_workers.lua +12 -0
- data/lib/qless/qless-core/fail.lua +24 -14
- data/lib/qless/qless-core/heartbeat.lua +2 -1
- data/lib/qless/qless-core/pause.lua +18 -0
- data/lib/qless/qless-core/pop.lua +24 -3
- data/lib/qless/qless-core/put.lua +14 -1
- data/lib/qless/qless-core/qless-lib.lua +2354 -0
- data/lib/qless/qless-core/qless.lua +1862 -0
- data/lib/qless/qless-core/retry.lua +1 -1
- data/lib/qless/qless-core/unfail.lua +54 -0
- data/lib/qless/qless-core/unpause.lua +12 -0
- data/lib/qless/queue.rb +45 -21
- data/lib/qless/server.rb +38 -39
- data/lib/qless/server/static/css/docs.css +21 -1
- data/lib/qless/server/views/_job.erb +5 -5
- data/lib/qless/server/views/overview.erb +14 -9
- data/lib/qless/subscriber.rb +48 -0
- data/lib/qless/version.rb +1 -1
- data/lib/qless/wait_until.rb +19 -0
- data/lib/qless/worker.rb +243 -33
- metadata +49 -30
- data/bin/install_phantomjs +0 -7
- data/bin/qless-campfire +0 -106
- data/bin/qless-growl +0 -99
- data/lib/qless/lua.rb +0 -25
@@ -27,7 +27,8 @@ if data then
|
|
27
27
|
end
|
28
28
|
|
29
29
|
-- First, let's see if the worker still owns this job, and there is a worker
|
30
|
-
|
30
|
+
local job_worker = redis.call('hget', 'ql:j:' .. jid, 'worker')
|
31
|
+
if job_worker ~= worker or #job_worker == 0 then
|
31
32
|
return false
|
32
33
|
else
|
33
34
|
-- Otherwise, optionally update the user data, and the heartbeat
|
@@ -0,0 +1,18 @@
|
|
1
|
+
-- This script takes the name of the queue(s) and adds it
|
2
|
+
-- to the ql:paused_queues set.
|
3
|
+
--
|
4
|
+
-- Args: The list of queues to pause.
|
5
|
+
--
|
6
|
+
-- Note: long term, we have discussed adding a rate-limiting
|
7
|
+
-- feature to qless-core, which would be more flexible and
|
8
|
+
-- could be used for pausing (i.e. pause = set the rate to 0).
|
9
|
+
-- For now, this is far simpler, but we should rewrite this
|
10
|
+
-- in terms of the rate limiting feature if/when that is added.
|
11
|
+
|
12
|
+
if #KEYS > 0 then error('Pause(): No Keys should be provided') end
|
13
|
+
if #ARGV < 1 then error('Pause(): Must provide at least one queue to pause') end
|
14
|
+
|
15
|
+
local key = 'ql:paused_queues'
|
16
|
+
|
17
|
+
redis.call('sadd', key, unpack(ARGV))
|
18
|
+
|
@@ -26,8 +26,16 @@ local now = assert(tonumber(ARGV[3]) , 'Pop(): Arg "now" missing or not a nu
|
|
26
26
|
|
27
27
|
-- We should find the heartbeat interval for this queue
|
28
28
|
-- heartbeat
|
29
|
-
local _hb, _qhb = unpack(redis.call('hmget', 'ql:config', 'heartbeat', queue .. '-heartbeat'))
|
29
|
+
local _hb, _qhb, _mc = unpack(redis.call('hmget', 'ql:config', 'heartbeat', queue .. '-heartbeat', queue .. '-max-concurrency'))
|
30
30
|
local expires = now + tonumber(_qhb or _hb or 60)
|
31
|
+
local max_concurrency = tonumber(_mc or 0)
|
32
|
+
|
33
|
+
if max_concurrency > 0 then
|
34
|
+
-- We need to find out how many locks are still valid.
|
35
|
+
local num_still_locked = redis.call('zcount', key .. '-locks', now, '+inf')
|
36
|
+
-- Only allow the minimum of the two through
|
37
|
+
count = math.min(max_concurrency - num_still_locked, count)
|
38
|
+
end
|
31
39
|
|
32
40
|
-- The bin is midnight of the provided day
|
33
41
|
-- 24 * 60 * 60 = 86400
|
@@ -39,12 +47,25 @@ local keys = {}
|
|
39
47
|
-- Make sure we this worker to the list of seen workers
|
40
48
|
redis.call('zadd', 'ql:workers', now, worker)
|
41
49
|
|
50
|
+
if redis.call('sismember', 'ql:paused_queues', queue) == 1 then
|
51
|
+
return {}
|
52
|
+
end
|
53
|
+
|
42
54
|
-- Iterate through all the expired locks and add them to the list
|
43
55
|
-- of keys that we'll return
|
44
56
|
for index, jid in ipairs(redis.call('zrangebyscore', key .. '-locks', 0, now, 'LIMIT', 0, count)) do
|
45
57
|
-- Remove this job from the jobs that the worker that was running it has
|
46
58
|
local w = redis.call('hget', 'ql:j:' .. jid, 'worker')
|
47
59
|
redis.call('zrem', 'ql:w:' .. w .. ':jobs', jid)
|
60
|
+
|
61
|
+
-- Send a message to let the worker know that its lost its lock on the job
|
62
|
+
local encoded = cjson.encode({
|
63
|
+
jid = jid,
|
64
|
+
event = 'lock_lost',
|
65
|
+
worker = w
|
66
|
+
})
|
67
|
+
redis.call('publish', 'ql:w:' .. w, encoded)
|
68
|
+
redis.call('publish', 'ql:log', encoded)
|
48
69
|
|
49
70
|
-- For each of these, decrement their retries. If any of them
|
50
71
|
-- have exhausted their retries, then we should mark them as
|
@@ -66,7 +87,7 @@ for index, jid in ipairs(redis.call('zrangebyscore', key .. '-locks', 0, now, 'L
|
|
66
87
|
redis.call('hmset', 'ql:j:' .. jid, 'state', 'failed', 'worker', '',
|
67
88
|
'expires', '', 'history', cjson.encode(history), 'failure', cjson.encode({
|
68
89
|
['group'] = group,
|
69
|
-
['message'] = 'Job
|
90
|
+
['message'] = 'Job exhausted retries in queue "' .. queue .. '"',
|
70
91
|
['when'] = now,
|
71
92
|
['worker'] = history[#history]['worker']
|
72
93
|
}))
|
@@ -290,4 +311,4 @@ if #keys > 0 then
|
|
290
311
|
redis.call('zrem', key .. '-work', unpack(keys))
|
291
312
|
end
|
292
313
|
|
293
|
-
return response
|
314
|
+
return response
|
@@ -54,6 +54,13 @@ if delay > 0 and #depends > 0 then
|
|
54
54
|
error('Put(): "delay" and "depends" are not allowed to be used together')
|
55
55
|
end
|
56
56
|
|
57
|
+
-- Send out a log message
|
58
|
+
redis.call('publish', 'ql:log', cjson.encode({
|
59
|
+
jid = jid,
|
60
|
+
event = 'put',
|
61
|
+
queue = queue
|
62
|
+
}))
|
63
|
+
|
57
64
|
-- Update the history to include this new change
|
58
65
|
local history = cjson.decode(history or '{}')
|
59
66
|
table.insert(history, {
|
@@ -73,6 +80,12 @@ end
|
|
73
80
|
-- make sure to remove it from that worker's jobs
|
74
81
|
if worker then
|
75
82
|
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', jid)
|
83
|
+
-- We need to inform whatever worker had that job
|
84
|
+
redis.call('publish', 'ql:w:' .. worker, cjson.encode({
|
85
|
+
jid = jid,
|
86
|
+
event = 'put',
|
87
|
+
queue = queue
|
88
|
+
}))
|
76
89
|
end
|
77
90
|
|
78
91
|
-- If the job was previously in the 'completed' state, then we should remove
|
@@ -153,4 +166,4 @@ if redis.call('zscore', 'ql:tracked', jid) ~= false then
|
|
153
166
|
redis.call('publish', 'put', jid)
|
154
167
|
end
|
155
168
|
|
156
|
-
return jid
|
169
|
+
return jid
|
@@ -0,0 +1,2354 @@
|
|
1
|
+
-------------------------------------------------------------------------------
|
2
|
+
-- Forward declarations to make everything happy
|
3
|
+
-------------------------------------------------------------------------------
|
4
|
+
local Qless = {
|
5
|
+
ns = 'ql:'
|
6
|
+
}
|
7
|
+
|
8
|
+
-- Queue forward delcaration
|
9
|
+
local QlessQueue = {
|
10
|
+
ns = Qless.ns .. 'q:'
|
11
|
+
}
|
12
|
+
QlessQueue.__index = QlessQueue
|
13
|
+
|
14
|
+
-- Worker forward declaration
|
15
|
+
local QlessWorker = {
|
16
|
+
ns = Qless.ns .. 'w:'
|
17
|
+
}
|
18
|
+
QlessWorker.__index = QlessWorker
|
19
|
+
|
20
|
+
-- Job forward declaration
|
21
|
+
local QlessJob = {
|
22
|
+
ns = Qless.ns .. 'j:'
|
23
|
+
}
|
24
|
+
QlessJob.__index = QlessJob
|
25
|
+
|
26
|
+
-- RecurringJob forward declaration
|
27
|
+
local QlessRecurringJob = {}
|
28
|
+
QlessRecurringJob.__index = QlessRecurringJob
|
29
|
+
|
30
|
+
-- Config forward declaration
|
31
|
+
Qless.config = {}
|
32
|
+
|
33
|
+
-- Extend a table. This comes up quite frequently
|
34
|
+
function table.extend(self, other)
|
35
|
+
for i, v in ipairs(other) do
|
36
|
+
table.insert(self, v)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
function Qless.debug(message)
|
41
|
+
redis.call('publish', 'debug', tostring(message))
|
42
|
+
end
|
43
|
+
|
44
|
+
function Qless.publish(channel, message)
|
45
|
+
redis.call('publish', Qless.ns .. channel, message)
|
46
|
+
end
|
47
|
+
|
48
|
+
-- Return a job object
|
49
|
+
function Qless.job(jid)
|
50
|
+
assert(jid, 'Job(): no jid provided')
|
51
|
+
local job = {}
|
52
|
+
setmetatable(job, QlessJob)
|
53
|
+
job.jid = jid
|
54
|
+
return job
|
55
|
+
end
|
56
|
+
|
57
|
+
-- Return a recurring job object
|
58
|
+
function Qless.recurring(jid)
|
59
|
+
assert(jid, 'Recurring(): no jid provided')
|
60
|
+
local job = {}
|
61
|
+
setmetatable(job, QlessRecurringJob)
|
62
|
+
job.jid = jid
|
63
|
+
return job
|
64
|
+
end
|
65
|
+
|
66
|
+
-- Failed([group, [start, [limit]]])
|
67
|
+
-- ------------------------------------
|
68
|
+
-- If no group is provided, this returns a JSON blob of the counts of the
|
69
|
+
-- various groups of failures known. If a group is provided, it will report up
|
70
|
+
-- to `limit` from `start` of the jobs affected by that issue.
|
71
|
+
--
|
72
|
+
-- # If no group, then...
|
73
|
+
-- {
|
74
|
+
-- 'group1': 1,
|
75
|
+
-- 'group2': 5,
|
76
|
+
-- ...
|
77
|
+
-- }
|
78
|
+
--
|
79
|
+
-- # If a group is provided, then...
|
80
|
+
-- {
|
81
|
+
-- 'total': 20,
|
82
|
+
-- 'jobs': [
|
83
|
+
-- {
|
84
|
+
-- # All the normal keys for a job
|
85
|
+
-- 'jid': ...,
|
86
|
+
-- 'data': ...
|
87
|
+
-- # The message for this particular instance
|
88
|
+
-- 'message': ...,
|
89
|
+
-- 'group': ...,
|
90
|
+
-- }, ...
|
91
|
+
-- ]
|
92
|
+
-- }
|
93
|
+
--
|
94
|
+
function Qless.failed(group, start, limit)
|
95
|
+
start = assert(tonumber(start or 0),
|
96
|
+
'Failed(): Arg "start" is not a number: ' .. (start or 'nil'))
|
97
|
+
limit = assert(tonumber(limit or 25),
|
98
|
+
'Failed(): Arg "limit" is not a number: ' .. (limit or 'nil'))
|
99
|
+
|
100
|
+
if group then
|
101
|
+
-- If a group was provided, then we should do paginated lookup
|
102
|
+
return {
|
103
|
+
total = redis.call('llen', 'ql:f:' .. group),
|
104
|
+
jobs = redis.call('lrange', 'ql:f:' .. group, start, limit - 1)
|
105
|
+
}
|
106
|
+
else
|
107
|
+
-- Otherwise, we should just list all the known failure groups we have
|
108
|
+
local response = {}
|
109
|
+
local groups = redis.call('smembers', 'ql:failures')
|
110
|
+
for index, group in ipairs(groups) do
|
111
|
+
response[group] = redis.call('llen', 'ql:f:' .. group)
|
112
|
+
end
|
113
|
+
return response
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
-- Jobs(0, now, 'complete' | (
|
118
|
+
-- (
|
119
|
+
-- 'stalled' | 'running' | 'scheduled' | 'depends', 'recurring'
|
120
|
+
-- ), queue)
|
121
|
+
-- [offset, [count]])
|
122
|
+
-------------------------------------------------------------------------------
|
123
|
+
--
|
124
|
+
-- Return all the job ids currently considered to be in the provided state
|
125
|
+
-- in a particular queue. The response is a list of job ids:
|
126
|
+
--
|
127
|
+
-- [
|
128
|
+
-- jid1,
|
129
|
+
-- jid2,
|
130
|
+
-- ...
|
131
|
+
-- ]
|
132
|
+
function Qless.jobs(now, state, ...)
|
133
|
+
assert(state, 'Jobs(): Arg "state" missing')
|
134
|
+
if state == 'complete' then
|
135
|
+
local offset = assert(tonumber(arg[1] or 0),
|
136
|
+
'Jobs(): Arg "offset" not a number: ' .. tostring(arg[1]))
|
137
|
+
local count = assert(tonumber(arg[2] or 25),
|
138
|
+
'Jobs(): Arg "count" not a number: ' .. tostring(arg[2]))
|
139
|
+
return redis.call('zrevrange', 'ql:completed', offset,
|
140
|
+
offset + count - 1)
|
141
|
+
else
|
142
|
+
local name = assert(arg[1], 'Jobs(): Arg "queue" missing')
|
143
|
+
local offset = assert(tonumber(arg[2] or 0),
|
144
|
+
'Jobs(): Arg "offset" not a number: ' .. tostring(arg[2]))
|
145
|
+
local count = assert(tonumber(arg[3] or 25),
|
146
|
+
'Jobs(): Arg "count" not a number: ' .. tostring(arg[3]))
|
147
|
+
|
148
|
+
local queue = Qless.queue(name)
|
149
|
+
if state == 'running' then
|
150
|
+
return queue.locks.peek(now, offset, count)
|
151
|
+
elseif state == 'stalled' then
|
152
|
+
return queue.locks.expired(now, offset, count)
|
153
|
+
elseif state == 'scheduled' then
|
154
|
+
return queue.scheduled.peek(now, offset, count)
|
155
|
+
elseif state == 'depends' then
|
156
|
+
return queue.depends.peek(now, offset, count)
|
157
|
+
elseif state == 'recurring' then
|
158
|
+
return queue.recurring.peek(now, offset, count)
|
159
|
+
else
|
160
|
+
error('Jobs(): Unknown type "' .. state .. '"')
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
-- Track(0)
|
166
|
+
-- Track(0, ('track' | 'untrack'), jid, now)
|
167
|
+
-- ------------------------------------------
|
168
|
+
-- If no arguments are provided, it returns details of all currently-tracked
|
169
|
+
-- jobs. If the first argument is 'track', then it will start tracking the job
|
170
|
+
-- associated with that id, and 'untrack' stops tracking it. In this context,
|
171
|
+
-- tracking is nothing more than saving the job to a list of jobs that are
|
172
|
+
-- considered special.
|
173
|
+
--
|
174
|
+
-- {
|
175
|
+
-- 'jobs': [
|
176
|
+
-- {
|
177
|
+
-- 'jid': ...,
|
178
|
+
-- # All the other details you'd get from 'get'
|
179
|
+
-- }, {
|
180
|
+
-- ...
|
181
|
+
-- }
|
182
|
+
-- ], 'expired': [
|
183
|
+
-- # These are all the jids that are completed and whose data expired
|
184
|
+
-- 'deadbeef',
|
185
|
+
-- ...,
|
186
|
+
-- ...,
|
187
|
+
-- ]
|
188
|
+
-- }
|
189
|
+
--
|
190
|
+
function Qless.track(now, command, jid)
|
191
|
+
if command ~= nil then
|
192
|
+
assert(jid, 'Track(): Arg "jid" missing')
|
193
|
+
if string.lower(ARGV[1]) == 'track' then
|
194
|
+
Qless.publish('track', jid)
|
195
|
+
return redis.call('zadd', 'ql:tracked', now, jid)
|
196
|
+
elseif string.lower(ARGV[1]) == 'untrack' then
|
197
|
+
Qless.publish('untrack', jid)
|
198
|
+
return redis.call('zrem', 'ql:tracked', jid)
|
199
|
+
else
|
200
|
+
error('Track(): Unknown action "' .. command .. '"')
|
201
|
+
end
|
202
|
+
else
|
203
|
+
local response = {
|
204
|
+
jobs = {},
|
205
|
+
expired = {}
|
206
|
+
}
|
207
|
+
local jids = redis.call('zrange', 'ql:tracked', 0, -1)
|
208
|
+
for index, jid in ipairs(jids) do
|
209
|
+
local data = Qless.job(jid):data()
|
210
|
+
if data then
|
211
|
+
table.insert(response.jobs, data)
|
212
|
+
else
|
213
|
+
table.insert(response.expired, jid)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
return response
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
-- tag(0, now, ('add' | 'remove'), jid, tag, [tag, ...])
|
221
|
+
-- tag(0, now, 'get', tag, [offset, [count]])
|
222
|
+
-- tag(0, now, 'top', [offset, [count]])
|
223
|
+
-- ------------------------------------------------------------------------------------------------------------------
|
224
|
+
-- Accepts a jid, 'add' or 'remove', and then a list of tags
|
225
|
+
-- to either add or remove from the job. Alternatively, 'get',
|
226
|
+
-- a tag to get jobs associated with that tag, and offset and
|
227
|
+
-- count
|
228
|
+
--
|
229
|
+
-- If 'add' or 'remove', the response is a list of the jobs
|
230
|
+
-- current tags, or False if the job doesn't exist. If 'get',
|
231
|
+
-- the response is of the form:
|
232
|
+
--
|
233
|
+
-- {
|
234
|
+
-- total: ...,
|
235
|
+
-- jobs: [
|
236
|
+
-- jid,
|
237
|
+
-- ...
|
238
|
+
-- ]
|
239
|
+
-- }
|
240
|
+
--
|
241
|
+
-- If 'top' is supplied, it returns the most commonly-used tags
|
242
|
+
-- in a paginated fashion.
|
243
|
+
function Qless.tag(now, command, ...)
|
244
|
+
assert(command, 'Tag(): Arg "command" must be "add", "remove", "get" or "top"')
|
245
|
+
|
246
|
+
if command == 'add' then
|
247
|
+
local jid = assert(arg[1], 'Tag(): Arg "jid" missing')
|
248
|
+
local tags = redis.call('hget', QlessJob.ns .. jid, 'tags')
|
249
|
+
-- If the job has been canceled / deleted, then return false
|
250
|
+
if tags then
|
251
|
+
-- Decode the json blob, convert to dictionary
|
252
|
+
tags = cjson.decode(tags)
|
253
|
+
local _tags = {}
|
254
|
+
for i,v in ipairs(tags) do _tags[v] = true end
|
255
|
+
|
256
|
+
-- Otherwise, add the job to the sorted set with that tags
|
257
|
+
for i=2,#arg do
|
258
|
+
local tag = arg[i]
|
259
|
+
if _tags[tag] == nil then
|
260
|
+
table.insert(tags, tag)
|
261
|
+
end
|
262
|
+
redis.call('zadd', 'ql:t:' .. tag, now, jid)
|
263
|
+
redis.call('zincrby', 'ql:tags', 1, tag)
|
264
|
+
end
|
265
|
+
|
266
|
+
tags = cjson.encode(tags)
|
267
|
+
redis.call('hset', QlessJob.ns .. jid, 'tags', tags)
|
268
|
+
return tags
|
269
|
+
else
|
270
|
+
return false
|
271
|
+
end
|
272
|
+
elseif command == 'remove' then
|
273
|
+
local jid = assert(arg[1], 'Tag(): Arg "jid" missing')
|
274
|
+
local tags = redis.call('hget', QlessJob.ns .. jid, 'tags')
|
275
|
+
-- If the job has been canceled / deleted, then return false
|
276
|
+
if tags then
|
277
|
+
-- Decode the json blob, convert to dictionary
|
278
|
+
tags = cjson.decode(tags)
|
279
|
+
local _tags = {}
|
280
|
+
for i,v in ipairs(tags) do _tags[v] = true end
|
281
|
+
|
282
|
+
-- Otherwise, add the job to the sorted set with that tags
|
283
|
+
for i=2,#arg do
|
284
|
+
local tag = arg[i]
|
285
|
+
_tags[tag] = nil
|
286
|
+
redis.call('zrem', 'ql:t:' .. tag, jid)
|
287
|
+
redis.call('zincrby', 'ql:tags', -1, tag)
|
288
|
+
end
|
289
|
+
|
290
|
+
local results = {}
|
291
|
+
for i,tag in ipairs(tags) do if _tags[tag] then table.insert(results, tag) end end
|
292
|
+
|
293
|
+
tags = cjson.encode(results)
|
294
|
+
redis.call('hset', QlessJob.ns .. jid, 'tags', tags)
|
295
|
+
return results
|
296
|
+
else
|
297
|
+
return false
|
298
|
+
end
|
299
|
+
elseif command == 'get' then
|
300
|
+
local tag = assert(arg[1], 'Tag(): Arg "tag" missing')
|
301
|
+
local offset = assert(tonumber(arg[2] or 0),
|
302
|
+
'Tag(): Arg "offset" not a number: ' .. tostring(arg[2]))
|
303
|
+
local count = assert(tonumber(arg[3] or 25),
|
304
|
+
'Tag(): Arg "count" not a number: ' .. tostring(arg[3]))
|
305
|
+
return {
|
306
|
+
total = redis.call('zcard', 'ql:t:' .. tag),
|
307
|
+
jobs = redis.call('zrange', 'ql:t:' .. tag, offset, count)
|
308
|
+
}
|
309
|
+
elseif command == 'top' then
|
310
|
+
local offset = assert(tonumber(arg[1] or 0) , 'Tag(): Arg "offset" not a number: ' .. tostring(arg[1]))
|
311
|
+
local count = assert(tonumber(arg[2] or 25), 'Tag(): Arg "count" not a number: ' .. tostring(arg[2]))
|
312
|
+
return redis.call('zrevrangebyscore', 'ql:tags', '+inf', 2, 'limit', offset, count)
|
313
|
+
else
|
314
|
+
error('Tag(): First argument must be "add", "remove" or "get"')
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
-- Cancel(0)
|
319
|
+
-- --------------
|
320
|
+
-- Cancel a job from taking place. It will be deleted from the system, and any
|
321
|
+
-- attempts to renew a heartbeat will fail, and any attempts to complete it
|
322
|
+
-- will fail. If you try to get the data on the object, you will get nothing.
|
323
|
+
function Qless.cancel(...)
|
324
|
+
-- Dependents is a mapping of a job to its dependent jids
|
325
|
+
local dependents = {}
|
326
|
+
for _, jid in ipairs(arg) do
|
327
|
+
dependents[jid] = redis.call(
|
328
|
+
'smembers', QlessJob.ns .. jid .. '-dependents') or {}
|
329
|
+
end
|
330
|
+
|
331
|
+
-- Now, we'll loop through every jid we intend to cancel, and we'll go
|
332
|
+
-- make sure that this operation will be ok
|
333
|
+
for i, jid in ipairs(arg) do
|
334
|
+
for j, dep in ipairs(dependents[jid]) do
|
335
|
+
if dependents[dep] == nil then
|
336
|
+
error('Cancel(): ' .. jid .. ' is a dependency of ' .. dep ..
|
337
|
+
' but is not mentioned to be canceled')
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
-- If we've made it this far, then we are good to go. We can now just
|
343
|
+
-- remove any trace of all these jobs, as they form a dependent clique
|
344
|
+
for _, jid in ipairs(arg) do
|
345
|
+
-- Find any stage it's associated with and remove its from that stage
|
346
|
+
local state, queue, failure, worker = unpack(redis.call(
|
347
|
+
'hmget', QlessJob.ns .. jid, 'state', 'queue', 'failure', 'worker'))
|
348
|
+
|
349
|
+
if state ~= 'complete' then
|
350
|
+
-- Send a message out on the appropriate channels
|
351
|
+
local encoded = cjson.encode({
|
352
|
+
jid = jid,
|
353
|
+
worker = worker,
|
354
|
+
event = 'canceled',
|
355
|
+
queue = queue
|
356
|
+
})
|
357
|
+
Qless.publish('log', encoded)
|
358
|
+
|
359
|
+
-- Remove this job from whatever worker has it, if any
|
360
|
+
if worker and (worker ~= '') then
|
361
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', jid)
|
362
|
+
-- If necessary, send a message to the appropriate worker, too
|
363
|
+
Qless.publish('w:', worker, encoded)
|
364
|
+
end
|
365
|
+
|
366
|
+
-- Remove it from that queue
|
367
|
+
if queue then
|
368
|
+
local queue = Qless.queue(queue)
|
369
|
+
queue.work.remove(jid)
|
370
|
+
queue.locks.remove(jid)
|
371
|
+
queue.scheduled.remove(jid)
|
372
|
+
queue.depends.remove(jid)
|
373
|
+
end
|
374
|
+
|
375
|
+
-- We should probably go through all our dependencies and remove
|
376
|
+
-- ourselves from the list of dependents
|
377
|
+
for i, j in ipairs(redis.call(
|
378
|
+
'smembers', QlessJob.ns .. jid .. '-dependencies')) do
|
379
|
+
redis.call('srem', QlessJob.ns .. j .. '-dependents', jid)
|
380
|
+
end
|
381
|
+
|
382
|
+
-- Delete any notion of dependencies it has
|
383
|
+
redis.call('del', QlessJob.ns .. jid .. '-dependencies')
|
384
|
+
|
385
|
+
-- If we're in the failed state, remove all of our data
|
386
|
+
if state == 'failed' then
|
387
|
+
failure = cjson.decode(failure)
|
388
|
+
-- We need to make this remove it from the failed queues
|
389
|
+
redis.call('lrem', 'ql:f:' .. failure.group, 0, jid)
|
390
|
+
if redis.call('llen', 'ql:f:' .. failure.group) == 0 then
|
391
|
+
redis.call('srem', 'ql:failures', failure.group)
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
-- Remove it as a job that's tagged with this particular tag
|
396
|
+
local tags = cjson.decode(
|
397
|
+
redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
|
398
|
+
for i, tag in ipairs(tags) do
|
399
|
+
redis.call('zrem', 'ql:t:' .. tag, jid)
|
400
|
+
redis.call('zincrby', 'ql:tags', -1, tag)
|
401
|
+
end
|
402
|
+
|
403
|
+
-- If the job was being tracked, we should notify
|
404
|
+
if redis.call('zscore', 'ql:tracked', jid) ~= false then
|
405
|
+
Qless.publish('canceled', jid)
|
406
|
+
end
|
407
|
+
|
408
|
+
-- Just go ahead and delete our data
|
409
|
+
redis.call('del', QlessJob.ns .. jid)
|
410
|
+
redis.call('del', QlessJob.ns .. jid .. '-history')
|
411
|
+
end
|
412
|
+
end
|
413
|
+
|
414
|
+
return arg
|
415
|
+
end
|
416
|
+
|
417
|
+
-------------------------------------------------------------------------------
|
418
|
+
-- Configuration interactions
|
419
|
+
-------------------------------------------------------------------------------
|
420
|
+
|
421
|
+
-- This represents our default configuration settings
|
422
|
+
Qless.config.defaults = {
|
423
|
+
['application'] = 'qless',
|
424
|
+
['heartbeat'] = 60,
|
425
|
+
['grace-period'] = 10,
|
426
|
+
['stats-history'] = 30,
|
427
|
+
['histogram-history'] = 7,
|
428
|
+
['jobs-history-count'] = 50000,
|
429
|
+
['jobs-history'] = 604800
|
430
|
+
}
|
431
|
+
|
432
|
+
-- Get one or more of the keys
|
433
|
+
Qless.config.get = function(key, default)
|
434
|
+
if key then
|
435
|
+
return redis.call('hget', 'ql:config', key) or
|
436
|
+
Qless.config.defaults[key] or default
|
437
|
+
else
|
438
|
+
-- Inspired by redis-lua https://github.com/nrk/redis-lua/blob/version-2.0/src/redis.lua
|
439
|
+
local reply = redis.call('hgetall', 'ql:config')
|
440
|
+
for i = 1, #reply, 2 do
|
441
|
+
Qless.config.defaults[reply[i]] = reply[i + 1]
|
442
|
+
end
|
443
|
+
return Qless.config.defaults
|
444
|
+
end
|
445
|
+
end
|
446
|
+
|
447
|
+
-- Set a configuration variable
|
448
|
+
Qless.config.set = function(option, value)
|
449
|
+
assert(option, 'config.set(): Arg "option" missing')
|
450
|
+
assert(value , 'config.set(): Arg "value" missing')
|
451
|
+
-- Send out a log message
|
452
|
+
Qless.publish('log', cjson.encode({
|
453
|
+
event = 'config_set',
|
454
|
+
option = option,
|
455
|
+
value = value
|
456
|
+
}))
|
457
|
+
|
458
|
+
redis.call('hset', 'ql:config', option, value)
|
459
|
+
end
|
460
|
+
|
461
|
+
-- Unset a configuration option
|
462
|
+
Qless.config.unset = function(option)
|
463
|
+
assert(option, 'config.unset(): Arg "option" missing')
|
464
|
+
-- Send out a log message
|
465
|
+
Qless.publish('log', cjson.encode({
|
466
|
+
event = 'config_unset',
|
467
|
+
option = option
|
468
|
+
}))
|
469
|
+
|
470
|
+
redis.call('hdel', 'ql:config', option)
|
471
|
+
end
|
472
|
+
-------------------------------------------------------------------------------
|
473
|
+
-- Job Class
|
474
|
+
--
|
475
|
+
-- It returns an object that represents the job with the provided JID
|
476
|
+
-------------------------------------------------------------------------------
|
477
|
+
|
478
|
+
-- This gets all the data associated with the job with the provided id. If the
|
479
|
+
-- job is not found, it returns nil. If found, it returns an object with the
|
480
|
+
-- appropriate properties
|
481
|
+
function QlessJob:data(...)
|
482
|
+
local job = redis.call(
|
483
|
+
'hmget', QlessJob.ns .. self.jid, 'jid', 'klass', 'state', 'queue',
|
484
|
+
'worker', 'priority', 'expires', 'retries', 'remaining', 'data',
|
485
|
+
'tags', 'failure')
|
486
|
+
|
487
|
+
-- Return nil if we haven't found it
|
488
|
+
if not job[1] then
|
489
|
+
return nil
|
490
|
+
end
|
491
|
+
|
492
|
+
local data = {
|
493
|
+
jid = job[1],
|
494
|
+
klass = job[2],
|
495
|
+
state = job[3],
|
496
|
+
queue = job[4],
|
497
|
+
worker = job[5] or '',
|
498
|
+
tracked = redis.call(
|
499
|
+
'zscore', 'ql:tracked', self.jid) ~= false,
|
500
|
+
priority = tonumber(job[6]),
|
501
|
+
expires = tonumber(job[7]) or 0,
|
502
|
+
retries = tonumber(job[8]),
|
503
|
+
remaining = math.floor(tonumber(job[9])),
|
504
|
+
data = cjson.decode(job[10]),
|
505
|
+
tags = cjson.decode(job[11]),
|
506
|
+
history = self:history(),
|
507
|
+
failure = cjson.decode(job[12] or '{}'),
|
508
|
+
dependents = redis.call(
|
509
|
+
'smembers', QlessJob.ns .. self.jid .. '-dependents'),
|
510
|
+
dependencies = redis.call(
|
511
|
+
'smembers', QlessJob.ns .. self.jid .. '-dependencies')
|
512
|
+
}
|
513
|
+
|
514
|
+
if #arg > 0 then
|
515
|
+
-- This section could probably be optimized, but I wanted the interface
|
516
|
+
-- in place first
|
517
|
+
local response = {}
|
518
|
+
for index, key in ipairs(arg) do
|
519
|
+
table.insert(response, data[key])
|
520
|
+
end
|
521
|
+
return response
|
522
|
+
else
|
523
|
+
return data
|
524
|
+
end
|
525
|
+
end
|
526
|
+
|
527
|
+
-- Complete a job and optionally put it in another queue, either scheduled or
|
528
|
+
-- to be considered waiting immediately. It can also optionally accept other
|
529
|
+
-- jids on which this job will be considered dependent before it's considered
|
530
|
+
-- valid.
|
531
|
+
--
|
532
|
+
-- The variable-length arguments may be pairs of the form:
|
533
|
+
--
|
534
|
+
-- ('next' , queue) : The queue to advance it to next
|
535
|
+
-- ('delay' , delay) : The delay for the next queue
|
536
|
+
-- ('depends', : Json of jobs it depends on in the new queue
|
537
|
+
-- '["jid1", "jid2", ...]')
|
538
|
+
---
|
539
|
+
function QlessJob:complete(now, worker, queue, data, ...)
|
540
|
+
assert(worker, 'Complete(): Arg "worker" missing')
|
541
|
+
assert(queue , 'Complete(): Arg "queue" missing')
|
542
|
+
data = assert(cjson.decode(data),
|
543
|
+
'Complete(): Arg "data" missing or not JSON: ' .. tostring(data))
|
544
|
+
|
545
|
+
-- Read in all the optional parameters
|
546
|
+
local options = {}
|
547
|
+
for i = 1, #arg, 2 do options[arg[i]] = arg[i + 1] end
|
548
|
+
|
549
|
+
-- Sanity check on optional args
|
550
|
+
local nextq = options['next']
|
551
|
+
local delay = assert(tonumber(options['delay'] or 0))
|
552
|
+
local depends = assert(cjson.decode(options['depends'] or '[]'),
|
553
|
+
'Complete(): Arg "depends" not JSON: ' .. tostring(options['depends']))
|
554
|
+
|
555
|
+
-- Delay and depends are not allowed together
|
556
|
+
if delay > 0 and #depends > 0 then
|
557
|
+
error('Complete(): "delay" and "depends" are not allowed together')
|
558
|
+
end
|
559
|
+
|
560
|
+
-- Depends doesn't make sense without nextq
|
561
|
+
if options['delay'] and nextq == nil then
|
562
|
+
error('Complete(): "delay" cannot be used without a "next".')
|
563
|
+
end
|
564
|
+
|
565
|
+
-- Depends doesn't make sense without nextq
|
566
|
+
if options['depends'] and nextq == nil then
|
567
|
+
error('Complete(): "depends" cannot be used without a "next".')
|
568
|
+
end
|
569
|
+
|
570
|
+
-- The bin is midnight of the provided day
|
571
|
+
-- 24 * 60 * 60 = 86400
|
572
|
+
local bin = now - (now % 86400)
|
573
|
+
|
574
|
+
-- First things first, we should see if the worker still owns this job
|
575
|
+
local lastworker, state, priority, retries = unpack(
|
576
|
+
redis.call('hmget', QlessJob.ns .. self.jid, 'worker', 'state',
|
577
|
+
'priority', 'retries', 'dependents'))
|
578
|
+
|
579
|
+
if lastworker == false then
|
580
|
+
error('Complete(): Job does not exist')
|
581
|
+
elseif (state ~= 'running') then
|
582
|
+
error('Complete(): Job is not currently running: ' .. state)
|
583
|
+
elseif lastworker ~= worker then
|
584
|
+
error('Complete(): Job has been handed out to another worker: ' ..
|
585
|
+
tostring(lastworker))
|
586
|
+
end
|
587
|
+
|
588
|
+
-- Now we can assume that the worker does own the job. We need to
|
589
|
+
-- 1) Remove the job from the 'locks' from the old queue
|
590
|
+
-- 2) Enqueue it in the next stage if necessary
|
591
|
+
-- 3) Update the data
|
592
|
+
-- 4) Mark the job as completed, remove the worker, remove expires, and
|
593
|
+
-- update history
|
594
|
+
self:history(now, 'done')
|
595
|
+
|
596
|
+
if data then
|
597
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data))
|
598
|
+
end
|
599
|
+
|
600
|
+
-- Remove the job from the previous queue
|
601
|
+
local queue_obj = Qless.queue(queue)
|
602
|
+
queue_obj.work.remove(self.jid)
|
603
|
+
queue_obj.locks.remove(self.jid)
|
604
|
+
queue_obj.scheduled.remove(self.jid)
|
605
|
+
|
606
|
+
----------------------------------------------------------
|
607
|
+
-- This is the massive stats update that we have to do
|
608
|
+
----------------------------------------------------------
|
609
|
+
-- This is how long we've been waiting to get popped
|
610
|
+
-- local waiting = math.floor(now) - history[#history]['popped']
|
611
|
+
local waiting = 0
|
612
|
+
Qless.queue(queue):stat(now, 'run', waiting)
|
613
|
+
|
614
|
+
-- Remove this job from the jobs that the worker that was running it has
|
615
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
|
616
|
+
|
617
|
+
if redis.call('zscore', 'ql:tracked', self.jid) ~= false then
|
618
|
+
Qless.publish('completed', self.jid)
|
619
|
+
end
|
620
|
+
|
621
|
+
if nextq then
|
622
|
+
queue_obj = Qless.queue(nextq)
|
623
|
+
-- Send a message out to log
|
624
|
+
Qless.publish('log', cjson.encode({
|
625
|
+
jid = self.jid,
|
626
|
+
event = 'advanced',
|
627
|
+
queue = queue,
|
628
|
+
to = nextq
|
629
|
+
}))
|
630
|
+
|
631
|
+
-- Enqueue the job
|
632
|
+
self:history(now, 'put', {q = nextq})
|
633
|
+
|
634
|
+
-- We're going to make sure that this queue is in the
|
635
|
+
-- set of known queues
|
636
|
+
if redis.call('zscore', 'ql:queues', nextq) == false then
|
637
|
+
redis.call('zadd', 'ql:queues', now, nextq)
|
638
|
+
end
|
639
|
+
|
640
|
+
redis.call('hmset', QlessJob.ns .. self.jid,
|
641
|
+
'state', 'waiting',
|
642
|
+
'worker', '',
|
643
|
+
'failure', '{}',
|
644
|
+
'queue', nextq,
|
645
|
+
'expires', 0,
|
646
|
+
'remaining', tonumber(retries))
|
647
|
+
|
648
|
+
if delay > 0 then
|
649
|
+
queue_obj.scheduled.add(now + delay, self.jid)
|
650
|
+
return 'scheduled'
|
651
|
+
else
|
652
|
+
-- These are the jids we legitimately have to wait on
|
653
|
+
local count = 0
|
654
|
+
for i, j in ipairs(depends) do
|
655
|
+
-- Make sure it's something other than 'nil' or complete.
|
656
|
+
local state = redis.call('hget', QlessJob.ns .. j, 'state')
|
657
|
+
if (state and state ~= 'complete') then
|
658
|
+
count = count + 1
|
659
|
+
redis.call(
|
660
|
+
'sadd', QlessJob.ns .. j .. '-dependents',self.jid)
|
661
|
+
redis.call(
|
662
|
+
'sadd', QlessJob.ns .. self.jid .. '-dependencies', j)
|
663
|
+
end
|
664
|
+
end
|
665
|
+
if count > 0 then
|
666
|
+
queue_obj.depends.add(now, self.jid)
|
667
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'state', 'depends')
|
668
|
+
return 'depends'
|
669
|
+
else
|
670
|
+
queue_obj.work.add(now, priority, self.jid)
|
671
|
+
return 'waiting'
|
672
|
+
end
|
673
|
+
end
|
674
|
+
else
|
675
|
+
-- Send a message out to log
|
676
|
+
Qless.publish('log', cjson.encode({
|
677
|
+
jid = self.jid,
|
678
|
+
event = 'completed',
|
679
|
+
queue = queue
|
680
|
+
}))
|
681
|
+
|
682
|
+
redis.call('hmset', QlessJob.ns .. self.jid,
|
683
|
+
'state', 'complete',
|
684
|
+
'worker', '',
|
685
|
+
'failure', '{}',
|
686
|
+
'queue', '',
|
687
|
+
'expires', 0,
|
688
|
+
'remaining', tonumber(retries))
|
689
|
+
|
690
|
+
-- Do the completion dance
|
691
|
+
local count = Qless.config.get('jobs-history-count')
|
692
|
+
local time = Qless.config.get('jobs-history')
|
693
|
+
|
694
|
+
-- These are the default values
|
695
|
+
count = tonumber(count or 50000)
|
696
|
+
time = tonumber(time or 7 * 24 * 60 * 60)
|
697
|
+
|
698
|
+
-- Schedule this job for destructination eventually
|
699
|
+
redis.call('zadd', 'ql:completed', now, self.jid)
|
700
|
+
|
701
|
+
-- Now look at the expired job data. First, based on the current time
|
702
|
+
local jids = redis.call('zrangebyscore', 'ql:completed', 0, now - time)
|
703
|
+
-- Any jobs that need to be expired... delete
|
704
|
+
for index, jid in ipairs(jids) do
|
705
|
+
local tags = cjson.decode(redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
|
706
|
+
for i, tag in ipairs(tags) do
|
707
|
+
redis.call('zrem', 'ql:t:' .. tag, jid)
|
708
|
+
redis.call('zincrby', 'ql:tags', -1, tag)
|
709
|
+
end
|
710
|
+
redis.call('del', QlessJob.ns .. jid)
|
711
|
+
redis.call('del', QlessJob.ns .. jid .. '-history')
|
712
|
+
end
|
713
|
+
-- And now remove those from the queued-for-cleanup queue
|
714
|
+
redis.call('zremrangebyscore', 'ql:completed', 0, now - time)
|
715
|
+
|
716
|
+
-- Now take the all by the most recent 'count' ids
|
717
|
+
jids = redis.call('zrange', 'ql:completed', 0, (-1-count))
|
718
|
+
for index, jid in ipairs(jids) do
|
719
|
+
local tags = cjson.decode(redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
|
720
|
+
for i, tag in ipairs(tags) do
|
721
|
+
redis.call('zrem', 'ql:t:' .. tag, jid)
|
722
|
+
redis.call('zincrby', 'ql:tags', -1, tag)
|
723
|
+
end
|
724
|
+
redis.call('del', QlessJob.ns .. jid)
|
725
|
+
redis.call('del', QlessJob.ns .. jid .. '-history')
|
726
|
+
end
|
727
|
+
redis.call('zremrangebyrank', 'ql:completed', 0, (-1-count))
|
728
|
+
|
729
|
+
-- Alright, if this has any dependents, then we should go ahead
|
730
|
+
-- and unstick those guys.
|
731
|
+
for i, j in ipairs(redis.call('smembers', QlessJob.ns .. self.jid .. '-dependents')) do
|
732
|
+
redis.call('srem', QlessJob.ns .. j .. '-dependencies', self.jid)
|
733
|
+
if redis.call('scard', QlessJob.ns .. j .. '-dependencies') == 0 then
|
734
|
+
local q, p = unpack(redis.call('hmget', QlessJob.ns .. j, 'queue', 'priority'))
|
735
|
+
if q then
|
736
|
+
local queue = Qless.queue(q)
|
737
|
+
queue.depends.remove(j)
|
738
|
+
queue.work.add(now, p, j)
|
739
|
+
redis.call('hset', QlessJob.ns .. j, 'state', 'waiting')
|
740
|
+
end
|
741
|
+
end
|
742
|
+
end
|
743
|
+
|
744
|
+
-- Delete our dependents key
|
745
|
+
redis.call('del', QlessJob.ns .. self.jid .. '-dependents')
|
746
|
+
|
747
|
+
return 'complete'
|
748
|
+
end
|
749
|
+
end
|
750
|
+
|
751
|
+
-- Fail(jid, worker, group, message, now, [data])
|
752
|
+
-- -------------------------------------------------
|
753
|
+
-- Mark the particular job as failed, with the provided group, and a more
|
754
|
+
-- specific message. By `group`, we mean some phrase that might be one of
|
755
|
+
-- several categorical modes of failure. The `message` is something more
|
756
|
+
-- job-specific, like perhaps a traceback.
|
757
|
+
--
|
758
|
+
-- This method should __not__ be used to note that a job has been dropped or
|
759
|
+
-- has failed in a transient way. This method __should__ be used to note that
|
760
|
+
-- a job has something really wrong with it that must be remedied.
|
761
|
+
--
|
762
|
+
-- The motivation behind the `group` is so that similar errors can be grouped
|
763
|
+
-- together. Optionally, updated data can be provided for the job. A job in
|
764
|
+
-- any state can be marked as failed. If it has been given to a worker as a
|
765
|
+
-- job, then its subsequent requests to heartbeat or complete that job will
|
766
|
+
-- fail. Failed jobs are kept until they are canceled or completed.
|
767
|
+
--
|
768
|
+
-- __Returns__ the id of the failed job if successful, or `False` on failure.
|
769
|
+
--
|
770
|
+
-- Args:
|
771
|
+
-- 1) jid
|
772
|
+
-- 2) worker
|
773
|
+
-- 3) group
|
774
|
+
-- 4) message
|
775
|
+
-- 5) the current time
|
776
|
+
-- 6) [data]
|
777
|
+
function QlessJob:fail(now, worker, group, message, data)
|
778
|
+
local worker = assert(worker , 'Fail(): Arg "worker" missing')
|
779
|
+
local group = assert(group , 'Fail(): Arg "group" missing')
|
780
|
+
local message = assert(message , 'Fail(): Arg "message" missing')
|
781
|
+
|
782
|
+
-- The bin is midnight of the provided day
|
783
|
+
-- 24 * 60 * 60 = 86400
|
784
|
+
local bin = now - (now % 86400)
|
785
|
+
|
786
|
+
if data then
|
787
|
+
data = cjson.decode(data)
|
788
|
+
end
|
789
|
+
|
790
|
+
-- First things first, we should get the history
|
791
|
+
local queue, state = unpack(redis.call('hmget', QlessJob.ns .. self.jid,
|
792
|
+
'queue', 'state'))
|
793
|
+
|
794
|
+
-- If the job has been completed, we cannot fail it
|
795
|
+
if state ~= 'running' then
|
796
|
+
error('Fail(): Job not currently running: ' .. state)
|
797
|
+
end
|
798
|
+
|
799
|
+
-- Send out a log message
|
800
|
+
Qless.publish('log', cjson.encode({
|
801
|
+
jid = self.jid,
|
802
|
+
event = 'failed',
|
803
|
+
worker = worker,
|
804
|
+
group = group,
|
805
|
+
message = message
|
806
|
+
}))
|
807
|
+
|
808
|
+
if redis.call('zscore', 'ql:tracked', self.jid) ~= false then
|
809
|
+
Qless.publish('failed', self.jid)
|
810
|
+
end
|
811
|
+
|
812
|
+
-- Remove this job from the jobs that the worker that was running it has
|
813
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
|
814
|
+
|
815
|
+
-- Now, take the element of the history for which our provided worker is the worker, and update 'failed'
|
816
|
+
self:history(now, 'failed', {worker = worker, group = group})
|
817
|
+
|
818
|
+
-- Increment the number of failures for that queue for the
|
819
|
+
-- given day.
|
820
|
+
redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failures', 1)
|
821
|
+
redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed' , 1)
|
822
|
+
|
823
|
+
-- Now remove the instance from the schedule, and work queues for the queue it's in
|
824
|
+
local queue_obj = Qless.queue(queue)
|
825
|
+
queue_obj.work.remove(self.jid)
|
826
|
+
queue_obj.locks.remove(self.jid)
|
827
|
+
queue_obj.scheduled.remove(self.jid)
|
828
|
+
|
829
|
+
-- The reason that this appears here is that the above will fail if the job doesn't exist
|
830
|
+
if data then
|
831
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data))
|
832
|
+
end
|
833
|
+
|
834
|
+
redis.call('hmset', QlessJob.ns .. self.jid,
|
835
|
+
'state', 'failed',
|
836
|
+
'worker', '',
|
837
|
+
'expires', '',
|
838
|
+
'failure', cjson.encode({
|
839
|
+
['group'] = group,
|
840
|
+
['message'] = message,
|
841
|
+
['when'] = math.floor(now),
|
842
|
+
['worker'] = worker
|
843
|
+
}))
|
844
|
+
|
845
|
+
-- Add this group of failure to the list of failures
|
846
|
+
redis.call('sadd', 'ql:failures', group)
|
847
|
+
-- And add this particular instance to the failed groups
|
848
|
+
redis.call('lpush', 'ql:f:' .. group, self.jid)
|
849
|
+
|
850
|
+
-- Here is where we'd intcrement stats about the particular stage
|
851
|
+
-- and possibly the workers
|
852
|
+
|
853
|
+
return self.jid
|
854
|
+
end
|
855
|
+
|
856
|
+
-- retry(0, now, queue, worker, [delay])
|
857
|
+
-- ------------------------------------------
|
858
|
+
-- This script accepts jid, queue, worker and delay for
|
859
|
+
-- retrying a job. This is similar in functionality to
|
860
|
+
-- `put`, except that this counts against the retries
|
861
|
+
-- a job has for a stage.
|
862
|
+
--
|
863
|
+
-- If the worker is not the worker with a lock on the job,
|
864
|
+
-- then it returns false. If the job is not actually running,
|
865
|
+
-- then it returns false. Otherwise, it returns the number
|
866
|
+
-- of retries remaining. If the allowed retries have been
|
867
|
+
-- exhausted, then it is automatically failed, and a negative
|
868
|
+
-- number is returned.
|
869
|
+
function QlessJob:retry(now, queue, worker, delay, group, message)
|
870
|
+
assert(queue , 'Retry(): Arg "queue" missing')
|
871
|
+
assert(worker, 'Retry(): Arg "worker" missing')
|
872
|
+
delay = assert(tonumber(delay or 0),
|
873
|
+
'Retry(): Arg "delay" not a number: ' .. tostring(delay))
|
874
|
+
|
875
|
+
-- Let's see what the old priority, and tags were
|
876
|
+
local oldqueue, state, retries, oldworker, priority, failure = unpack(redis.call('hmget', QlessJob.ns .. self.jid, 'queue', 'state', 'retries', 'worker', 'priority', 'failure'))
|
877
|
+
|
878
|
+
-- If this isn't the worker that owns
|
879
|
+
if oldworker == false then
|
880
|
+
error('Retry(): Job does not exist')
|
881
|
+
elseif state ~= 'running' then
|
882
|
+
error('Retry(): Job is not currently running: ' .. state)
|
883
|
+
elseif oldworker ~= worker then
|
884
|
+
error('Retry(): Job has been handed out to another worker: ' .. oldworker)
|
885
|
+
end
|
886
|
+
|
887
|
+
-- For each of these, decrement their retries. If any of them
|
888
|
+
-- have exhausted their retries, then we should mark them as
|
889
|
+
-- failed.
|
890
|
+
local remaining = tonumber(redis.call(
|
891
|
+
'hincrbyfloat', QlessJob.ns .. self.jid, 'remaining', -0.5))
|
892
|
+
if (remaining * 2) % 2 == 1 then
|
893
|
+
local remaining = tonumber(redis.call(
|
894
|
+
'hincrbyfloat', QlessJob.ns .. self.jid, 'remaining', -0.5))
|
895
|
+
end
|
896
|
+
|
897
|
+
-- Remove it from the locks key of the old queue
|
898
|
+
Qless.queue(oldqueue).locks.remove(self.jid)
|
899
|
+
|
900
|
+
-- Remove this job from the worker that was previously working it
|
901
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
|
902
|
+
|
903
|
+
if remaining < 0 then
|
904
|
+
-- Now remove the instance from the schedule, and work queues for the queue it's in
|
905
|
+
local group = 'failed-retries-' .. queue
|
906
|
+
self:history(now, 'failed', {group = group})
|
907
|
+
|
908
|
+
redis.call('hmset', QlessJob.ns .. self.jid, 'state', 'failed',
|
909
|
+
'worker', '',
|
910
|
+
'expires', '')
|
911
|
+
-- If the failure has not already been set, then set it
|
912
|
+
if failure == {} then
|
913
|
+
redis.call('hset', QlessJob.ns .. self.jid,
|
914
|
+
'failure', cjson.encode({
|
915
|
+
['group'] = group,
|
916
|
+
['message'] =
|
917
|
+
'Job exhausted retries in queue "' .. self.name .. '"',
|
918
|
+
['when'] = now,
|
919
|
+
['worker'] = unpack(job:data('worker'))
|
920
|
+
}))
|
921
|
+
end
|
922
|
+
|
923
|
+
-- Add this type of failure to the list of failures
|
924
|
+
redis.call('sadd', 'ql:failures', group)
|
925
|
+
-- And add this particular instance to the failed types
|
926
|
+
redis.call('lpush', 'ql:f:' .. group, self.jid)
|
927
|
+
else
|
928
|
+
-- Put it in the queue again with a delay. Like put()
|
929
|
+
local queue_obj = Qless.queue(queue)
|
930
|
+
if delay > 0 then
|
931
|
+
queue_obj.scheduled.add(now + delay, self.jid)
|
932
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'state', 'scheduled')
|
933
|
+
else
|
934
|
+
queue_obj.work.add(now, priority, self.jid)
|
935
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'state', 'waiting')
|
936
|
+
end
|
937
|
+
|
938
|
+
-- If a group and a message was provided, then we should save it
|
939
|
+
if group ~= nil and message ~= nil then
|
940
|
+
redis.call('hset', QlessJob.ns .. self.jid,
|
941
|
+
'failure', cjson.encode({
|
942
|
+
['group'] = group,
|
943
|
+
['message'] = message,
|
944
|
+
['when'] = math.floor(now),
|
945
|
+
['worker'] = worker
|
946
|
+
})
|
947
|
+
)
|
948
|
+
end
|
949
|
+
end
|
950
|
+
|
951
|
+
return math.floor(remaining)
|
952
|
+
end
|
953
|
+
|
954
|
+
-- Depends(0, jid,
|
955
|
+
-- ('on', [jid, [jid, [...]]]) |
|
956
|
+
-- ('off',
|
957
|
+
-- ('all' | [jid, [jid, [...]]]))
|
958
|
+
-------------------------------------------------------------------------------
|
959
|
+
-- Add or remove dependencies a job has. If 'on' is provided, the provided
|
960
|
+
-- jids are added as dependencies. If 'off' and 'all' are provided, then all
|
961
|
+
-- the current dependencies are removed. If 'off' is provided and the next
|
962
|
+
-- argument is not 'all', then those jids are removed as dependencies.
|
963
|
+
--
|
964
|
+
-- If a job is not already in the 'depends' state, then this call will return
|
965
|
+
-- false. Otherwise, it will return true
|
966
|
+
--
|
967
|
+
-- Args:
|
968
|
+
-- 1) jid
|
969
|
+
function QlessJob:depends(now, command, ...)
|
970
|
+
assert(command, 'Depends(): Arg "command" missing')
|
971
|
+
if redis.call('hget', QlessJob.ns .. self.jid, 'state') ~= 'depends' then
|
972
|
+
return false
|
973
|
+
end
|
974
|
+
|
975
|
+
if command == 'on' then
|
976
|
+
-- These are the jids we legitimately have to wait on
|
977
|
+
for i, j in ipairs(arg) do
|
978
|
+
-- Make sure it's something other than 'nil' or complete.
|
979
|
+
local state = redis.call('hget', QlessJob.ns .. j, 'state')
|
980
|
+
if (state and state ~= 'complete') then
|
981
|
+
redis.call('sadd', QlessJob.ns .. j .. '-dependents' , self.jid)
|
982
|
+
redis.call('sadd', QlessJob.ns .. self.jid .. '-dependencies', j)
|
983
|
+
end
|
984
|
+
end
|
985
|
+
return true
|
986
|
+
elseif command == 'off' then
|
987
|
+
if arg[1] == 'all' then
|
988
|
+
for i, j in ipairs(redis.call('smembers', QlessJob.ns .. self.jid .. '-dependencies')) do
|
989
|
+
redis.call('srem', QlessJob.ns .. j .. '-dependents', self.jid)
|
990
|
+
end
|
991
|
+
redis.call('del', QlessJob.ns .. self.jid .. '-dependencies')
|
992
|
+
local q, p = unpack(redis.call('hmget', QlessJob.ns .. self.jid, 'queue', 'priority'))
|
993
|
+
if q then
|
994
|
+
local queue_obj = Qless.queue(q)
|
995
|
+
queue_obj.depends.remove(self.jid)
|
996
|
+
queue_obj.work.add(now, p, self.jid)
|
997
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'state', 'waiting')
|
998
|
+
end
|
999
|
+
else
|
1000
|
+
for i, j in ipairs(arg) do
|
1001
|
+
redis.call('srem', QlessJob.ns .. j .. '-dependents', self.jid)
|
1002
|
+
redis.call('srem', QlessJob.ns .. self.jid .. '-dependencies', j)
|
1003
|
+
if redis.call('scard', QlessJob.ns .. self.jid .. '-dependencies') == 0 then
|
1004
|
+
local q, p = unpack(redis.call('hmget', QlessJob.ns .. self.jid, 'queue', 'priority'))
|
1005
|
+
if q then
|
1006
|
+
local queue_obj = Qless.queue(q)
|
1007
|
+
queue_obj.depends.remove(self.jid)
|
1008
|
+
queue_obj.work.add(now, p, self.jid)
|
1009
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'state', 'waiting')
|
1010
|
+
end
|
1011
|
+
end
|
1012
|
+
end
|
1013
|
+
end
|
1014
|
+
return true
|
1015
|
+
else
|
1016
|
+
error('Depends(): Argument "command" must be "on" or "off"')
|
1017
|
+
end
|
1018
|
+
end
|
1019
|
+
|
1020
|
+
-- This scripts conducts a heartbeat for a job, and returns
|
1021
|
+
-- either the new expiration or False if the lock has been
|
1022
|
+
-- given to another node
|
1023
|
+
--
|
1024
|
+
-- Args:
|
1025
|
+
-- 1) now
|
1026
|
+
-- 2) worker
|
1027
|
+
-- 3) [data]
|
1028
|
+
function QlessJob:heartbeat(now, worker, data)
|
1029
|
+
assert(worker, 'Heatbeat(): Arg "worker" missing')
|
1030
|
+
|
1031
|
+
-- We should find the heartbeat interval for this queue
|
1032
|
+
-- heartbeat. First, though, we need to find the queue
|
1033
|
+
-- this particular job is in
|
1034
|
+
local queue = redis.call('hget', QlessJob.ns .. self.jid, 'queue') or ''
|
1035
|
+
local expires = now + tonumber(
|
1036
|
+
Qless.config.get(queue .. '-heartbeat') or
|
1037
|
+
Qless.config.get('heartbeat', 60))
|
1038
|
+
|
1039
|
+
if data then
|
1040
|
+
data = cjson.decode(data)
|
1041
|
+
end
|
1042
|
+
|
1043
|
+
-- First, let's see if the worker still owns this job, and there is a worker
|
1044
|
+
local job_worker, state = unpack(redis.call('hmget', QlessJob.ns .. self.jid, 'worker', 'state'))
|
1045
|
+
if job_worker == false then
|
1046
|
+
-- This means the job doesn't exist
|
1047
|
+
error('Heartbeat(): Job does not exist')
|
1048
|
+
elseif state ~= 'running' then
|
1049
|
+
error('Heartbeat(): Job not currently running: ' .. state)
|
1050
|
+
elseif job_worker ~= worker or #job_worker == 0 then
|
1051
|
+
error('Heartbeat(): Job has been handed out to another worker: ' .. job_worker)
|
1052
|
+
else
|
1053
|
+
-- Otherwise, optionally update the user data, and the heartbeat
|
1054
|
+
if data then
|
1055
|
+
-- I don't know if this is wise, but I'm decoding and encoding
|
1056
|
+
-- the user data to hopefully ensure its sanity
|
1057
|
+
redis.call('hmset', QlessJob.ns .. self.jid, 'expires', expires, 'worker', worker, 'data', cjson.encode(data))
|
1058
|
+
else
|
1059
|
+
redis.call('hmset', QlessJob.ns .. self.jid, 'expires', expires, 'worker', worker)
|
1060
|
+
end
|
1061
|
+
|
1062
|
+
-- Update hwen this job was last updated on that worker
|
1063
|
+
-- Add this job to the list of jobs handled by this worker
|
1064
|
+
redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, self.jid)
|
1065
|
+
|
1066
|
+
-- And now we should just update the locks
|
1067
|
+
local queue = Qless.queue(redis.call('hget', QlessJob.ns .. self.jid, 'queue'))
|
1068
|
+
queue.locks.add(expires, self.jid)
|
1069
|
+
return expires
|
1070
|
+
end
|
1071
|
+
end
|
1072
|
+
|
1073
|
+
-- priority(0, jid, priority)
|
1074
|
+
-- --------------------------
|
1075
|
+
-- Accepts a jid, and a new priority for the job. If the job
|
1076
|
+
-- doesn't exist, then return false. Otherwise, return the
|
1077
|
+
-- updated priority. If the job is waiting, then the change
|
1078
|
+
-- will be reflected in the order in which it's popped
|
1079
|
+
function QlessJob:priority(priority)
|
1080
|
+
priority = assert(tonumber(priority),
|
1081
|
+
'Priority(): Arg "priority" missing or not a number: ' .. tostring(priority))
|
1082
|
+
|
1083
|
+
-- Get the queue the job is currently in, if any
|
1084
|
+
local queue = redis.call('hget', QlessJob.ns .. self.jid, 'queue')
|
1085
|
+
|
1086
|
+
if queue == nil then
|
1087
|
+
return false
|
1088
|
+
elseif queue == '' then
|
1089
|
+
-- Just adjust the priority
|
1090
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'priority', priority)
|
1091
|
+
return priority
|
1092
|
+
else
|
1093
|
+
-- Adjust the priority and see if it's a candidate for updating
|
1094
|
+
-- its priority in the queue it's currently in
|
1095
|
+
local queue_obj = Qless.queue(queue)
|
1096
|
+
if queue_obj.work.score(self.jid) then
|
1097
|
+
queue_obj.work.add(0, priority, self.jid)
|
1098
|
+
end
|
1099
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'priority', priority)
|
1100
|
+
return priority
|
1101
|
+
end
|
1102
|
+
end
|
1103
|
+
|
1104
|
+
-- Update the jobs' attributes with the provided dictionary
|
1105
|
+
function QlessJob:update(data)
|
1106
|
+
local tmp = {}
|
1107
|
+
for k, v in pairs(data) do
|
1108
|
+
table.insert(tmp, k)
|
1109
|
+
table.insert(tmp, v)
|
1110
|
+
end
|
1111
|
+
redis.call('hmset', QlessJob.ns .. self.jid, unpack(tmp))
|
1112
|
+
end
|
1113
|
+
|
1114
|
+
-- Times out the job
|
1115
|
+
function QlessJob:timeout(now)
|
1116
|
+
local queue_name, state, worker = unpack(redis.call('hmget',
|
1117
|
+
QlessJob.ns .. self.jid, 'queue', 'state', 'worker'))
|
1118
|
+
if queue_name == nil then
|
1119
|
+
error('Timeout(): Job does not exist')
|
1120
|
+
elseif state ~= 'running' then
|
1121
|
+
error('Timeout(): Job not running')
|
1122
|
+
else
|
1123
|
+
-- Time out the job
|
1124
|
+
self:history(now, 'timed-out')
|
1125
|
+
local queue = Qless.queue(queue_name)
|
1126
|
+
queue.locks.remove(self.jid)
|
1127
|
+
queue.work.add(now, math.huge, self.jid)
|
1128
|
+
redis.call('hmset', QlessJob.ns .. self.jid,
|
1129
|
+
'state', 'stalled', 'expires', 0)
|
1130
|
+
local encoded = cjson.encode({
|
1131
|
+
jid = self.jid,
|
1132
|
+
event = 'lock_lost',
|
1133
|
+
worker = worker
|
1134
|
+
})
|
1135
|
+
Qless.publish('w:' .. worker, encoded)
|
1136
|
+
Qless.publish('log', encoded)
|
1137
|
+
return queue
|
1138
|
+
end
|
1139
|
+
end
|
1140
|
+
|
1141
|
+
-- Get or append to history
|
1142
|
+
function QlessJob:history(now, what, item)
|
1143
|
+
-- First, check if there's an old-style history, and update it if there is
|
1144
|
+
local history = redis.call('hget', QlessJob.ns .. self.jid, 'history')
|
1145
|
+
if history then
|
1146
|
+
history = cjson.decode(history)
|
1147
|
+
for i, value in ipairs(history) do
|
1148
|
+
redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
|
1149
|
+
cjson.encode({math.floor(value.put), 'put', {q = value.q}}))
|
1150
|
+
|
1151
|
+
-- If there's any popped time
|
1152
|
+
if value.popped then
|
1153
|
+
redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
|
1154
|
+
cjson.encode({math.floor(value.popped), 'popped',
|
1155
|
+
{worker = value.worker}}))
|
1156
|
+
end
|
1157
|
+
|
1158
|
+
-- If there's any failure
|
1159
|
+
if value.failed then
|
1160
|
+
redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
|
1161
|
+
cjson.encode(
|
1162
|
+
{math.floor(value.failed), 'failed', nil}))
|
1163
|
+
end
|
1164
|
+
|
1165
|
+
-- If it was completed
|
1166
|
+
if value.done then
|
1167
|
+
redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
|
1168
|
+
cjson.encode(
|
1169
|
+
{math.floor(value.done), 'done', nil}))
|
1170
|
+
end
|
1171
|
+
end
|
1172
|
+
-- With all this ported forward, delete the old-style history
|
1173
|
+
redis.call('hdel', QlessJob.ns .. self.jid, 'history')
|
1174
|
+
end
|
1175
|
+
|
1176
|
+
-- Now to the meat of the function
|
1177
|
+
if what == nil then
|
1178
|
+
-- Get the history
|
1179
|
+
local response = {}
|
1180
|
+
for i, value in ipairs(redis.call('lrange',
|
1181
|
+
QlessJob.ns .. self.jid .. '-history', 0, -1)) do
|
1182
|
+
value = cjson.decode(value)
|
1183
|
+
local dict = value[3] or {}
|
1184
|
+
dict['when'] = value[1]
|
1185
|
+
dict['what'] = value[2]
|
1186
|
+
table.insert(response, dict)
|
1187
|
+
end
|
1188
|
+
return response
|
1189
|
+
else
|
1190
|
+
-- Append to the history
|
1191
|
+
return redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
|
1192
|
+
cjson.encode({math.floor(now), what, item}))
|
1193
|
+
end
|
1194
|
+
end
|
1195
|
+
-------------------------------------------------------------------------------
|
1196
|
+
-- Queue class
|
1197
|
+
-------------------------------------------------------------------------------
|
1198
|
+
-- Return a queue object
|
1199
|
+
function Qless.queue(name)
|
1200
|
+
assert(name, 'Queue(): no queue name provided')
|
1201
|
+
local queue = {}
|
1202
|
+
setmetatable(queue, QlessQueue)
|
1203
|
+
queue.name = name
|
1204
|
+
|
1205
|
+
-- Access to our work
|
1206
|
+
queue.work = {
|
1207
|
+
peek = function(count)
|
1208
|
+
if count == 0 then
|
1209
|
+
return {}
|
1210
|
+
end
|
1211
|
+
local jids = {}
|
1212
|
+
for index, jid in ipairs(redis.call(
|
1213
|
+
'zrevrange', queue:prefix('work'), 0, count - 1)) do
|
1214
|
+
table.insert(jids, jid)
|
1215
|
+
end
|
1216
|
+
return jids
|
1217
|
+
end, remove = function(...)
|
1218
|
+
if #arg > 0 then
|
1219
|
+
return redis.call('zrem', queue:prefix('work'), unpack(arg))
|
1220
|
+
end
|
1221
|
+
end, add = function(now, priority, jid)
|
1222
|
+
return redis.call('zadd',
|
1223
|
+
queue:prefix('work'), priority - (now / 10000000000), jid)
|
1224
|
+
end, score = function(jid)
|
1225
|
+
return redis.call('zscore', queue:prefix('work'), jid)
|
1226
|
+
end, length = function()
|
1227
|
+
return redis.call('zcard', queue:prefix('work'))
|
1228
|
+
end
|
1229
|
+
}
|
1230
|
+
|
1231
|
+
-- Access to our locks
|
1232
|
+
queue.locks = {
|
1233
|
+
expired = function(now, offset, count)
|
1234
|
+
return redis.call('zrangebyscore',
|
1235
|
+
queue:prefix('locks'), -math.huge, now, 'LIMIT', offset, count)
|
1236
|
+
end, peek = function(now, offset, count)
|
1237
|
+
return redis.call('zrangebyscore', queue:prefix('locks'),
|
1238
|
+
now, math.huge, 'LIMIT', offset, count)
|
1239
|
+
end, add = function(expires, jid)
|
1240
|
+
redis.call('zadd', queue:prefix('locks'), expires, jid)
|
1241
|
+
end, remove = function(...)
|
1242
|
+
if #arg > 0 then
|
1243
|
+
return redis.call('zrem', queue:prefix('locks'), unpack(arg))
|
1244
|
+
end
|
1245
|
+
end, running = function(now)
|
1246
|
+
return redis.call('zcount', queue:prefix('locks'), now, math.huge)
|
1247
|
+
end, length = function(now)
|
1248
|
+
-- If a 'now' is provided, we're interested in how many are before
|
1249
|
+
-- that time
|
1250
|
+
if now then
|
1251
|
+
return redis.call('zcount', queue:prefix('locks'), 0, now)
|
1252
|
+
else
|
1253
|
+
return redis.call('zcard', queue:prefix('locks'))
|
1254
|
+
end
|
1255
|
+
end
|
1256
|
+
}
|
1257
|
+
|
1258
|
+
-- Access to our dependendant jobs
|
1259
|
+
queue.depends = {
|
1260
|
+
peek = function(now, offset, count)
|
1261
|
+
return redis.call('zrange',
|
1262
|
+
queue:prefix('depends'), offset, offset + count - 1)
|
1263
|
+
end, add = function(now, jid)
|
1264
|
+
redis.call('zadd', queue:prefix('depends'), now, jid)
|
1265
|
+
end, remove = function(...)
|
1266
|
+
if #arg > 0 then
|
1267
|
+
return redis.call('zrem', queue:prefix('depends'), unpack(arg))
|
1268
|
+
end
|
1269
|
+
end, length = function()
|
1270
|
+
return redis.call('zcard', queue:prefix('depends'))
|
1271
|
+
end
|
1272
|
+
}
|
1273
|
+
|
1274
|
+
-- Access to our scheduled jobs
|
1275
|
+
queue.scheduled = {
|
1276
|
+
peek = function(now, offset, count)
|
1277
|
+
return redis.call('zrange',
|
1278
|
+
queue:prefix('scheduled'), offset, offset + count - 1)
|
1279
|
+
end, ready = function(now, offset, count)
|
1280
|
+
return redis.call('zrangebyscore',
|
1281
|
+
queue:prefix('scheduled'), 0, now, 'LIMIT', offset, count)
|
1282
|
+
end, add = function(when, jid)
|
1283
|
+
redis.call('zadd', queue:prefix('scheduled'), when, jid)
|
1284
|
+
end, remove = function(...)
|
1285
|
+
if #arg > 0 then
|
1286
|
+
return redis.call('zrem', queue:prefix('scheduled'), unpack(arg))
|
1287
|
+
end
|
1288
|
+
end, length = function()
|
1289
|
+
return redis.call('zcard', queue:prefix('scheduled'))
|
1290
|
+
end
|
1291
|
+
}
|
1292
|
+
|
1293
|
+
-- Access to our recurring jobs
|
1294
|
+
queue.recurring = {
|
1295
|
+
peek = function(now, offset, count)
|
1296
|
+
return redis.call('zrangebyscore', queue:prefix('recur'),
|
1297
|
+
0, now, 'LIMIT', offset, count)
|
1298
|
+
end, ready = function(now, offset, count)
|
1299
|
+
end, add = function(when, jid)
|
1300
|
+
redis.call('zadd', queue:prefix('recur'), when, jid)
|
1301
|
+
end, remove = function(...)
|
1302
|
+
if #arg > 0 then
|
1303
|
+
return redis.call('zrem', queue:prefix('recur'), unpack(arg))
|
1304
|
+
end
|
1305
|
+
end, update = function(increment, jid)
|
1306
|
+
redis.call('zincrby', queue:prefix('recur'), increment, jid)
|
1307
|
+
end, score = function(jid)
|
1308
|
+
return redis.call('zscore', queue:prefix('recur'), jid)
|
1309
|
+
end, length = function()
|
1310
|
+
return redis.call('zcard', queue:prefix('recur'))
|
1311
|
+
end
|
1312
|
+
}
|
1313
|
+
return queue
|
1314
|
+
end
|
1315
|
+
|
1316
|
+
--! @brief Return the prefix for this particular queue
|
1317
|
+
function QlessQueue:prefix(group)
|
1318
|
+
if group then
|
1319
|
+
return QlessQueue.ns..self.name..'-'..group
|
1320
|
+
else
|
1321
|
+
return QlessQueue.ns..self.name
|
1322
|
+
end
|
1323
|
+
end
|
1324
|
+
|
1325
|
+
-- Stats(0, queue, date)
|
1326
|
+
-- ---------------------
|
1327
|
+
-- Return the current statistics for a given queue on a given date. The
|
1328
|
+
-- results are returned are a JSON blob:
|
1329
|
+
--
|
1330
|
+
--
|
1331
|
+
-- {
|
1332
|
+
-- # These are unimplemented as of yet
|
1333
|
+
-- 'failed': 3,
|
1334
|
+
-- 'retries': 5,
|
1335
|
+
-- 'wait' : {
|
1336
|
+
-- 'total' : ...,
|
1337
|
+
-- 'mean' : ...,
|
1338
|
+
-- 'variance' : ...,
|
1339
|
+
-- 'histogram': [
|
1340
|
+
-- ...
|
1341
|
+
-- ]
|
1342
|
+
-- }, 'run': {
|
1343
|
+
-- 'total' : ...,
|
1344
|
+
-- 'mean' : ...,
|
1345
|
+
-- 'variance' : ...,
|
1346
|
+
-- 'histogram': [
|
1347
|
+
-- ...
|
1348
|
+
-- ]
|
1349
|
+
-- }
|
1350
|
+
-- }
|
1351
|
+
--
|
1352
|
+
-- The histogram's data points are at the second resolution for the first
|
1353
|
+
-- minute, the minute resolution for the first hour, the 15-minute resolution
|
1354
|
+
-- for the first day, the hour resolution for the first 3 days, and then at
|
1355
|
+
-- the day resolution from there on out. The `histogram` key is a list of
|
1356
|
+
-- those values.
|
1357
|
+
--
|
1358
|
+
-- Args:
|
1359
|
+
-- 1) queue
|
1360
|
+
-- 2) time
|
1361
|
+
function QlessQueue:stats(now, date)
|
1362
|
+
date = assert(tonumber(date),
|
1363
|
+
'Stats(): Arg "date" missing or not a number: '.. (date or 'nil'))
|
1364
|
+
|
1365
|
+
-- The bin is midnight of the provided day
|
1366
|
+
-- 24 * 60 * 60 = 86400
|
1367
|
+
local bin = date - (date % 86400)
|
1368
|
+
|
1369
|
+
-- This a table of all the keys we want to use in order to produce a histogram
|
1370
|
+
local histokeys = {
|
1371
|
+
's0','s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20','s21','s22','s23','s24','s25','s26','s27','s28','s29','s30','s31','s32','s33','s34','s35','s36','s37','s38','s39','s40','s41','s42','s43','s44','s45','s46','s47','s48','s49','s50','s51','s52','s53','s54','s55','s56','s57','s58','s59',
|
1372
|
+
'm1','m2','m3','m4','m5','m6','m7','m8','m9','m10','m11','m12','m13','m14','m15','m16','m17','m18','m19','m20','m21','m22','m23','m24','m25','m26','m27','m28','m29','m30','m31','m32','m33','m34','m35','m36','m37','m38','m39','m40','m41','m42','m43','m44','m45','m46','m47','m48','m49','m50','m51','m52','m53','m54','m55','m56','m57','m58','m59',
|
1373
|
+
'h1','h2','h3','h4','h5','h6','h7','h8','h9','h10','h11','h12','h13','h14','h15','h16','h17','h18','h19','h20','h21','h22','h23',
|
1374
|
+
'd1','d2','d3','d4','d5','d6'
|
1375
|
+
}
|
1376
|
+
|
1377
|
+
local mkstats = function(name, bin, queue)
|
1378
|
+
-- The results we'll be sending back
|
1379
|
+
local results = {}
|
1380
|
+
|
1381
|
+
local key = 'ql:s:' .. name .. ':' .. bin .. ':' .. queue
|
1382
|
+
local count, mean, vk = unpack(redis.call('hmget', key, 'total', 'mean', 'vk'))
|
1383
|
+
|
1384
|
+
count = tonumber(count) or 0
|
1385
|
+
mean = tonumber(mean) or 0
|
1386
|
+
vk = tonumber(vk)
|
1387
|
+
|
1388
|
+
results.count = count or 0
|
1389
|
+
results.mean = mean or 0
|
1390
|
+
results.histogram = {}
|
1391
|
+
|
1392
|
+
if not count then
|
1393
|
+
results.std = 0
|
1394
|
+
else
|
1395
|
+
if count > 1 then
|
1396
|
+
results.std = math.sqrt(vk / (count - 1))
|
1397
|
+
else
|
1398
|
+
results.std = 0
|
1399
|
+
end
|
1400
|
+
end
|
1401
|
+
|
1402
|
+
local histogram = redis.call('hmget', key, unpack(histokeys))
|
1403
|
+
for i=1,#histokeys do
|
1404
|
+
table.insert(results.histogram, tonumber(histogram[i]) or 0)
|
1405
|
+
end
|
1406
|
+
return results
|
1407
|
+
end
|
1408
|
+
|
1409
|
+
local retries, failed, failures = unpack(redis.call('hmget', 'ql:s:stats:' .. bin .. ':' .. self.name, 'retries', 'failed', 'failures'))
|
1410
|
+
return {
|
1411
|
+
retries = tonumber(retries or 0),
|
1412
|
+
failed = tonumber(failed or 0),
|
1413
|
+
failures = tonumber(failures or 0),
|
1414
|
+
wait = mkstats('wait', bin, self.name),
|
1415
|
+
run = mkstats('run' , bin, self.name)
|
1416
|
+
}
|
1417
|
+
end
|
1418
|
+
|
1419
|
+
-- This script takes the name of the queue and then checks
|
1420
|
+
-- for any expired locks, then inserts any scheduled items
|
1421
|
+
-- that are now valid, and lastly returns any work items
|
1422
|
+
-- that can be handed over.
|
1423
|
+
--
|
1424
|
+
-- Keys:
|
1425
|
+
-- 1) queue name
|
1426
|
+
-- Args:
|
1427
|
+
-- 1) the number of items to return
|
1428
|
+
-- 2) the current time
|
1429
|
+
function QlessQueue:peek(now, count)
|
1430
|
+
count = assert(tonumber(count),
|
1431
|
+
'Peek(): Arg "count" missing or not a number: ' .. tostring(count))
|
1432
|
+
|
1433
|
+
-- These are the ids that we're going to return. We'll begin with any jobs
|
1434
|
+
-- that have lost their locks
|
1435
|
+
local jids = self.locks.expired(now, 0, count)
|
1436
|
+
|
1437
|
+
-- If we still need jobs in order to meet demand, then we should
|
1438
|
+
-- look for all the recurring jobs that need jobs run
|
1439
|
+
self:check_recurring(now, count - #jids)
|
1440
|
+
|
1441
|
+
-- Now we've checked __all__ the locks for this queue the could
|
1442
|
+
-- have expired, and are no more than the number requested. If
|
1443
|
+
-- we still need values in order to meet the demand, then we
|
1444
|
+
-- should check if any scheduled items, and if so, we should
|
1445
|
+
-- insert them to ensure correctness when pulling off the next
|
1446
|
+
-- unit of work.
|
1447
|
+
self:check_scheduled(now, count - #jids)
|
1448
|
+
|
1449
|
+
-- With these in place, we can expand this list of jids based on the work
|
1450
|
+
-- queue itself and the priorities therein
|
1451
|
+
table.extend(jids, self.work.peek(count - #jids))
|
1452
|
+
|
1453
|
+
return jids
|
1454
|
+
end
|
1455
|
+
|
1456
|
+
--! @brief Return true if this queue is paused
|
1457
|
+
function QlessQueue:paused()
|
1458
|
+
return redis.call('sismember', 'ql:paused_queues', self.name) == 1
|
1459
|
+
end
|
1460
|
+
|
1461
|
+
-- This script takes the name of the queue(s) and adds it
|
1462
|
+
-- to the ql:paused_queues set.
|
1463
|
+
--
|
1464
|
+
-- Args: The list of queues to pause.
|
1465
|
+
--
|
1466
|
+
-- Note: long term, we have discussed adding a rate-limiting
|
1467
|
+
-- feature to qless-core, which would be more flexible and
|
1468
|
+
-- could be used for pausing (i.e. pause = set the rate to 0).
|
1469
|
+
-- For now, this is far simpler, but we should rewrite this
|
1470
|
+
-- in terms of the rate limiting feature if/when that is added.
|
1471
|
+
function QlessQueue.pause(...)
|
1472
|
+
redis.call('sadd', 'ql:paused_queues', unpack(arg))
|
1473
|
+
end
|
1474
|
+
|
1475
|
+
-- This script takes the name of the queue(s) and removes it
|
1476
|
+
-- from the ql:paused_queues set.
|
1477
|
+
--
|
1478
|
+
-- Args: The list of queues to pause.
|
1479
|
+
function QlessQueue.unpause(...)
|
1480
|
+
redis.call('srem', 'ql:paused_queues', unpack(arg))
|
1481
|
+
end
|
1482
|
+
|
1483
|
+
-- This script takes the name of the queue and then checks
|
1484
|
+
-- for any expired locks, then inserts any scheduled items
|
1485
|
+
-- that are now valid, and lastly returns any work items
|
1486
|
+
-- that can be handed over.
|
1487
|
+
--
|
1488
|
+
-- Keys:
|
1489
|
+
-- 1) queue name
|
1490
|
+
-- Args:
|
1491
|
+
-- 1) worker name
|
1492
|
+
-- 2) the number of items to return
|
1493
|
+
-- 3) the current time
|
1494
|
+
function QlessQueue:pop(now, worker, count)
|
1495
|
+
assert(worker, 'Pop(): Arg "worker" missing')
|
1496
|
+
count = assert(tonumber(count),
|
1497
|
+
'Pop(): Arg "count" missing or not a number: ' .. tostring(count))
|
1498
|
+
|
1499
|
+
-- We should find the heartbeat interval for this queue heartbeat
|
1500
|
+
local expires = now + tonumber(
|
1501
|
+
Qless.config.get(self.name .. '-heartbeat') or
|
1502
|
+
Qless.config.get('heartbeat', 60))
|
1503
|
+
|
1504
|
+
-- If this queue is paused, then return no jobs
|
1505
|
+
if self:paused() then
|
1506
|
+
return {}
|
1507
|
+
end
|
1508
|
+
|
1509
|
+
-- Make sure we this worker to the list of seen workers
|
1510
|
+
redis.call('zadd', 'ql:workers', now, worker)
|
1511
|
+
|
1512
|
+
-- Check our max concurrency, and limit the count
|
1513
|
+
local max_concurrency = tonumber(
|
1514
|
+
Qless.config.get(self.name .. '-max-concurrency', 0))
|
1515
|
+
|
1516
|
+
if max_concurrency > 0 then
|
1517
|
+
-- Allow at most max_concurrency - #running
|
1518
|
+
local allowed = math.max(0, max_concurrency - self.locks.running(now))
|
1519
|
+
count = math.min(allowed, count)
|
1520
|
+
if count == 0 then
|
1521
|
+
return {}
|
1522
|
+
end
|
1523
|
+
end
|
1524
|
+
|
1525
|
+
local jids = self:invalidate_locks(now, count)
|
1526
|
+
-- Now we've checked __all__ the locks for this queue the could
|
1527
|
+
-- have expired, and are no more than the number requested.
|
1528
|
+
|
1529
|
+
-- If we still need jobs in order to meet demand, then we should
|
1530
|
+
-- look for all the recurring jobs that need jobs run
|
1531
|
+
self:check_recurring(now, count - #jids)
|
1532
|
+
|
1533
|
+
-- If we still need values in order to meet the demand, then we
|
1534
|
+
-- should check if any scheduled items, and if so, we should
|
1535
|
+
-- insert them to ensure correctness when pulling off the next
|
1536
|
+
-- unit of work.
|
1537
|
+
self:check_scheduled(now, count - #jids)
|
1538
|
+
|
1539
|
+
-- With these in place, we can expand this list of jids based on the work
|
1540
|
+
-- queue itself and the priorities therein
|
1541
|
+
table.extend(jids, self.work.peek(count - #jids))
|
1542
|
+
|
1543
|
+
local state
|
1544
|
+
for index, jid in ipairs(jids) do
|
1545
|
+
local job = Qless.job(jid)
|
1546
|
+
state = unpack(job:data('state'))
|
1547
|
+
job:history(now, 'popped', {worker = worker})
|
1548
|
+
|
1549
|
+
-- Update the wait time statistics
|
1550
|
+
-- local waiting = math.floor(now) - history[#history]['put']
|
1551
|
+
local waiting = 0
|
1552
|
+
self:stat(now, 'wait', waiting)
|
1553
|
+
|
1554
|
+
-- Add this job to the list of jobs handled by this worker
|
1555
|
+
redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, jid)
|
1556
|
+
|
1557
|
+
-- Update the jobs data, and add its locks, and return the job
|
1558
|
+
job:update({
|
1559
|
+
worker = worker,
|
1560
|
+
expires = expires,
|
1561
|
+
state = 'running'
|
1562
|
+
})
|
1563
|
+
|
1564
|
+
self.locks.add(expires, jid)
|
1565
|
+
|
1566
|
+
local tracked = redis.call('zscore', 'ql:tracked', jid) ~= false
|
1567
|
+
if tracked then
|
1568
|
+
Qless.publish('popped', jid)
|
1569
|
+
end
|
1570
|
+
end
|
1571
|
+
|
1572
|
+
-- If we are returning any jobs, then we should remove them from the work
|
1573
|
+
-- queue
|
1574
|
+
self.work.remove(unpack(jids))
|
1575
|
+
|
1576
|
+
return jids
|
1577
|
+
end
|
1578
|
+
|
1579
|
+
--! @brief Update the stats for this queue
|
1580
|
+
--! @param stat - name of the statistic to be updated ('wait', 'run', etc.)
|
1581
|
+
--! @param val - the value to update the statistics with
|
1582
|
+
function QlessQueue:stat(now, stat, val)
|
1583
|
+
-- The bin is midnight of the provided day
|
1584
|
+
local bin = now - (now % 86400)
|
1585
|
+
local key = 'ql:s:' .. stat .. ':' .. bin .. ':' .. self.name
|
1586
|
+
|
1587
|
+
-- Get the current data
|
1588
|
+
local count, mean, vk = unpack(
|
1589
|
+
redis.call('hmget', key, 'total', 'mean', 'vk'))
|
1590
|
+
|
1591
|
+
-- If there isn't any data there presently, then we must initialize it
|
1592
|
+
count = count or 0
|
1593
|
+
if count == 0 then
|
1594
|
+
mean = val
|
1595
|
+
vk = 0
|
1596
|
+
count = 1
|
1597
|
+
else
|
1598
|
+
count = count + 1
|
1599
|
+
local oldmean = mean
|
1600
|
+
mean = mean + (val - mean) / count
|
1601
|
+
vk = vk + (val - mean) * (val - oldmean)
|
1602
|
+
end
|
1603
|
+
|
1604
|
+
-- Now, update the histogram
|
1605
|
+
-- - `s1`, `s2`, ..., -- second-resolution histogram counts
|
1606
|
+
-- - `m1`, `m2`, ..., -- minute-resolution
|
1607
|
+
-- - `h1`, `h2`, ..., -- hour-resolution
|
1608
|
+
-- - `d1`, `d2`, ..., -- day-resolution
|
1609
|
+
val = math.floor(val)
|
1610
|
+
if val < 60 then -- seconds
|
1611
|
+
redis.call('hincrby', key, 's' .. val, 1)
|
1612
|
+
elseif val < 3600 then -- minutes
|
1613
|
+
redis.call('hincrby', key, 'm' .. math.floor(val / 60), 1)
|
1614
|
+
elseif val < 86400 then -- hours
|
1615
|
+
redis.call('hincrby', key, 'h' .. math.floor(val / 3600), 1)
|
1616
|
+
else -- days
|
1617
|
+
redis.call('hincrby', key, 'd' .. math.floor(val / 86400), 1)
|
1618
|
+
end
|
1619
|
+
redis.call('hmset', key, 'total', count, 'mean', mean, 'vk', vk)
|
1620
|
+
end
|
1621
|
+
|
1622
|
+
-- Put(1, jid, klass, data, now, delay, [priority, p], [tags, t], [retries, r], [depends, '[...]'])
|
1623
|
+
-- ----------------------------------------------------------------------------
|
1624
|
+
-- This script takes the name of the queue and then the
|
1625
|
+
-- info about the work item, and makes sure that it's
|
1626
|
+
-- enqueued.
|
1627
|
+
--
|
1628
|
+
-- At some point, I'd like to able to provide functionality
|
1629
|
+
-- that enables this to generate a unique ID for this piece
|
1630
|
+
-- of work. As such, client libraries should not expose
|
1631
|
+
-- setting the id from the user, as this is an implementation
|
1632
|
+
-- detail that's likely to change and users should not grow
|
1633
|
+
-- to depend on it.
|
1634
|
+
--
|
1635
|
+
-- Args:
|
1636
|
+
-- 1) jid
|
1637
|
+
-- 2) klass
|
1638
|
+
-- 3) data
|
1639
|
+
-- 4) now
|
1640
|
+
-- 5) delay
|
1641
|
+
-- *) [priority, p], [tags, t], [retries, r], [depends, '[...]']
|
1642
|
+
function QlessQueue:put(now, jid, klass, data, delay, ...)
|
1643
|
+
assert(jid , 'Put(): Arg "jid" missing')
|
1644
|
+
assert(klass, 'Put(): Arg "klass" missing')
|
1645
|
+
data = assert(cjson.decode(data),
|
1646
|
+
'Put(): Arg "data" missing or not JSON: ' .. tostring(data))
|
1647
|
+
delay = assert(tonumber(delay),
|
1648
|
+
'Put(): Arg "delay" not a number: ' .. tostring(delay))
|
1649
|
+
|
1650
|
+
-- Read in all the optional parameters
|
1651
|
+
local options = {}
|
1652
|
+
for i = 1, #arg, 2 do options[arg[i]] = arg[i + 1] end
|
1653
|
+
|
1654
|
+
-- Let's see what the old priority and tags were
|
1655
|
+
local job = Qless.job(jid)
|
1656
|
+
local priority, tags, oldqueue, state, failure, retries, worker = unpack(redis.call('hmget', QlessJob.ns .. jid, 'priority', 'tags', 'queue', 'state', 'failure', 'retries', 'worker'))
|
1657
|
+
|
1658
|
+
-- Sanity check on optional args
|
1659
|
+
retries = assert(tonumber(options['retries'] or retries or 5) , 'Put(): Arg "retries" not a number: ' .. tostring(options['retries']))
|
1660
|
+
tags = assert(cjson.decode(options['tags'] or tags or '[]' ), 'Put(): Arg "tags" not JSON' .. tostring(options['tags']))
|
1661
|
+
priority = assert(tonumber(options['priority'] or priority or 0), 'Put(): Arg "priority" not a number' .. tostring(options['priority']))
|
1662
|
+
local depends = assert(cjson.decode(options['depends'] or '[]') , 'Put(): Arg "depends" not JSON: ' .. tostring(options['depends']))
|
1663
|
+
|
1664
|
+
-- Delay and depends are not allowed together
|
1665
|
+
if delay > 0 and #depends > 0 then
|
1666
|
+
error('Put(): "delay" and "depends" are not allowed to be used together')
|
1667
|
+
end
|
1668
|
+
|
1669
|
+
-- Send out a log message
|
1670
|
+
Qless.publish('log', cjson.encode({
|
1671
|
+
jid = jid,
|
1672
|
+
event = 'put',
|
1673
|
+
queue = self.name
|
1674
|
+
}))
|
1675
|
+
|
1676
|
+
-- Update the history to include this new change
|
1677
|
+
job:history(now, 'put', {q = self.name})
|
1678
|
+
|
1679
|
+
-- If this item was previously in another queue, then we should remove it from there
|
1680
|
+
if oldqueue then
|
1681
|
+
local queue_obj = Qless.queue(oldqueue)
|
1682
|
+
queue_obj.work.remove(jid)
|
1683
|
+
queue_obj.locks.remove(jid)
|
1684
|
+
queue_obj.depends.remove(jid)
|
1685
|
+
queue_obj.scheduled.remove(jid)
|
1686
|
+
end
|
1687
|
+
|
1688
|
+
-- If this had previously been given out to a worker,
|
1689
|
+
-- make sure to remove it from that worker's jobs
|
1690
|
+
if worker then
|
1691
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', jid)
|
1692
|
+
-- We need to inform whatever worker had that job
|
1693
|
+
Qless.publish('w:' .. worker, cjson.encode({
|
1694
|
+
jid = jid,
|
1695
|
+
event = 'put',
|
1696
|
+
queue = self.name
|
1697
|
+
}))
|
1698
|
+
end
|
1699
|
+
|
1700
|
+
-- If the job was previously in the 'completed' state, then we should remove
|
1701
|
+
-- it from being enqueued for destructination
|
1702
|
+
if state == 'complete' then
|
1703
|
+
redis.call('zrem', 'ql:completed', jid)
|
1704
|
+
end
|
1705
|
+
|
1706
|
+
-- Add this job to the list of jobs tagged with whatever tags were supplied
|
1707
|
+
for i, tag in ipairs(tags) do
|
1708
|
+
redis.call('zadd', 'ql:t:' .. tag, now, jid)
|
1709
|
+
redis.call('zincrby', 'ql:tags', 1, tag)
|
1710
|
+
end
|
1711
|
+
|
1712
|
+
-- If we're in the failed state, remove all of our data
|
1713
|
+
if state == 'failed' then
|
1714
|
+
failure = cjson.decode(failure)
|
1715
|
+
-- We need to make this remove it from the failed queues
|
1716
|
+
redis.call('lrem', 'ql:f:' .. failure.group, 0, jid)
|
1717
|
+
if redis.call('llen', 'ql:f:' .. failure.group) == 0 then
|
1718
|
+
redis.call('srem', 'ql:failures', failure.group)
|
1719
|
+
end
|
1720
|
+
-- The bin is midnight of the provided day
|
1721
|
+
-- 24 * 60 * 60 = 86400
|
1722
|
+
local bin = failure.when - (failure.when % 86400)
|
1723
|
+
-- We also need to decrement the stats about the queue on
|
1724
|
+
-- the day that this failure actually happened.
|
1725
|
+
redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. self.name, 'failed' , -1)
|
1726
|
+
end
|
1727
|
+
|
1728
|
+
-- First, let's save its data
|
1729
|
+
redis.call('hmset', QlessJob.ns .. jid,
|
1730
|
+
'jid' , jid,
|
1731
|
+
'klass' , klass,
|
1732
|
+
'data' , cjson.encode(data),
|
1733
|
+
'priority' , priority,
|
1734
|
+
'tags' , cjson.encode(tags),
|
1735
|
+
'state' , ((delay > 0) and 'scheduled') or 'waiting',
|
1736
|
+
'worker' , '',
|
1737
|
+
'expires' , 0,
|
1738
|
+
'queue' , self.name,
|
1739
|
+
'retries' , retries,
|
1740
|
+
'remaining', retries)
|
1741
|
+
|
1742
|
+
-- These are the jids we legitimately have to wait on
|
1743
|
+
for i, j in ipairs(depends) do
|
1744
|
+
-- Make sure it's something other than 'nil' or complete.
|
1745
|
+
local state = redis.call('hget', QlessJob.ns .. j, 'state')
|
1746
|
+
if (state and state ~= 'complete') then
|
1747
|
+
redis.call('sadd', QlessJob.ns .. j .. '-dependents' , jid)
|
1748
|
+
redis.call('sadd', QlessJob.ns .. jid .. '-dependencies', j)
|
1749
|
+
end
|
1750
|
+
end
|
1751
|
+
|
1752
|
+
-- Now, if a delay was provided, and if it's in the future,
|
1753
|
+
-- then we'll have to schedule it. Otherwise, we're just
|
1754
|
+
-- going to add it to the work queue.
|
1755
|
+
if delay > 0 then
|
1756
|
+
self.scheduled.add(now + delay, jid)
|
1757
|
+
else
|
1758
|
+
if redis.call('scard', QlessJob.ns .. jid .. '-dependencies') > 0 then
|
1759
|
+
self.depends.add(now, jid)
|
1760
|
+
redis.call('hset', QlessJob.ns .. jid, 'state', 'depends')
|
1761
|
+
else
|
1762
|
+
self.work.add(now, priority, jid)
|
1763
|
+
end
|
1764
|
+
end
|
1765
|
+
|
1766
|
+
-- Lastly, we're going to make sure that this item is in the
|
1767
|
+
-- set of known queues. We should keep this sorted by the
|
1768
|
+
-- order in which we saw each of these queues
|
1769
|
+
if redis.call('zscore', 'ql:queues', self.name) == false then
|
1770
|
+
redis.call('zadd', 'ql:queues', now, self.name)
|
1771
|
+
end
|
1772
|
+
|
1773
|
+
if redis.call('zscore', 'ql:tracked', jid) ~= false then
|
1774
|
+
Qless.publish('put', jid)
|
1775
|
+
end
|
1776
|
+
|
1777
|
+
return jid
|
1778
|
+
end
|
1779
|
+
|
1780
|
+
-- Unfail(0, now, group, queue, [count])
|
1781
|
+
--
|
1782
|
+
-- Move `count` jobs out of the failed state and into the provided queue
|
1783
|
+
function QlessQueue:unfail(now, group, count)
|
1784
|
+
assert(group, 'Unfail(): Arg "group" missing')
|
1785
|
+
count = assert(tonumber(count or 25),
|
1786
|
+
'Unfail(): Arg "count" not a number: ' .. tostring(count))
|
1787
|
+
|
1788
|
+
-- Get up to that many jobs, and we'll put them in the appropriate queue
|
1789
|
+
local jids = redis.call('lrange', 'ql:f:' .. group, -count, -1)
|
1790
|
+
|
1791
|
+
-- And now set each job's state, and put it into the appropriate queue
|
1792
|
+
local toinsert = {}
|
1793
|
+
for index, jid in ipairs(jids) do
|
1794
|
+
local job = Qless.job(job)
|
1795
|
+
local data = job:data()
|
1796
|
+
job:history(now, 'put', {q = self.name})
|
1797
|
+
redis.call('hmset', QlessJob.ns .. data.jid,
|
1798
|
+
'state' , 'waiting',
|
1799
|
+
'worker' , '',
|
1800
|
+
'expires' , 0,
|
1801
|
+
'queue' , self.name,
|
1802
|
+
'remaining', data.retries or 5)
|
1803
|
+
self.work.add(now, data.priority, data.jid)
|
1804
|
+
end
|
1805
|
+
|
1806
|
+
-- Remove these jobs from the failed state
|
1807
|
+
redis.call('ltrim', 'ql:f:' .. group, 0, -count - 1)
|
1808
|
+
if (redis.call('llen', 'ql:f:' .. group) == 0) then
|
1809
|
+
redis.call('srem', 'ql:failures', group)
|
1810
|
+
end
|
1811
|
+
|
1812
|
+
return #jids
|
1813
|
+
end
|
1814
|
+
|
1815
|
+
function QlessQueue:recur(now, jid, klass, data, spec, ...)
|
1816
|
+
assert(jid , 'RecurringJob On(): Arg "jid" missing')
|
1817
|
+
assert(klass, 'RecurringJob On(): Arg "klass" missing')
|
1818
|
+
assert(spec , 'RecurringJob On(): Arg "spec" missing')
|
1819
|
+
data = assert(cjson.decode(data),
|
1820
|
+
'RecurringJob On(): Arg "data" not JSON: ' .. tostring(data))
|
1821
|
+
|
1822
|
+
-- At some point in the future, we may have different types of recurring
|
1823
|
+
-- jobs, but for the time being, we only have 'interval'-type jobs
|
1824
|
+
if spec == 'interval' then
|
1825
|
+
local interval = assert(tonumber(arg[1]),
|
1826
|
+
'Recur(): Arg "interval" not a number: ' .. tostring(arg[1]))
|
1827
|
+
local offset = assert(tonumber(arg[2]),
|
1828
|
+
'Recur(): Arg "offset" not a number: ' .. tostring(arg[2]))
|
1829
|
+
if interval <= 0 then
|
1830
|
+
error('Recur(): Arg "interval" must be greater than or equal to 0')
|
1831
|
+
end
|
1832
|
+
|
1833
|
+
-- Read in all the optional parameters
|
1834
|
+
local options = {}
|
1835
|
+
for i = 3, #arg, 2 do options[arg[i]] = arg[i + 1] end
|
1836
|
+
options.tags = assert(cjson.decode(options.tags or {}),
|
1837
|
+
'Recur(): Arg "tags" must be JSON string array: ' .. tostring(
|
1838
|
+
options.tags))
|
1839
|
+
options.priority = assert(tonumber(options.priority or 0),
|
1840
|
+
'Recur(): Arg "priority" not a number: ' .. tostring(
|
1841
|
+
options.priority))
|
1842
|
+
options.retries = assert(tonumber(options.retries or 0),
|
1843
|
+
'Recur(): Arg "retries" not a number: ' .. tostring(
|
1844
|
+
options.retries))
|
1845
|
+
options.backlog = assert(tonumber(options.backlog or 0),
|
1846
|
+
'Recur(): Arg "backlog" not a number: ' .. tostring(
|
1847
|
+
options.backlog))
|
1848
|
+
|
1849
|
+
local count, old_queue = unpack(redis.call('hmget', 'ql:r:' .. jid, 'count', 'queue'))
|
1850
|
+
count = count or 0
|
1851
|
+
|
1852
|
+
-- If it has previously been in another queue, then we should remove
|
1853
|
+
-- some information about it
|
1854
|
+
if old_queue then
|
1855
|
+
Qless.queue(old_queue).recurring.remove(jid)
|
1856
|
+
end
|
1857
|
+
|
1858
|
+
-- Do some insertions
|
1859
|
+
redis.call('hmset', 'ql:r:' .. jid,
|
1860
|
+
'jid' , jid,
|
1861
|
+
'klass' , klass,
|
1862
|
+
'data' , cjson.encode(data),
|
1863
|
+
'priority', options.priority,
|
1864
|
+
'tags' , cjson.encode(options.tags or {}),
|
1865
|
+
'state' , 'recur',
|
1866
|
+
'queue' , self.name,
|
1867
|
+
'type' , 'interval',
|
1868
|
+
-- How many jobs we've spawned from this
|
1869
|
+
'count' , count,
|
1870
|
+
'interval', interval,
|
1871
|
+
'retries' , options.retries,
|
1872
|
+
'backlog' , options.backlog)
|
1873
|
+
-- Now, we should schedule the next run of the job
|
1874
|
+
self.recurring.add(now + offset, jid)
|
1875
|
+
|
1876
|
+
-- Lastly, we're going to make sure that this item is in the
|
1877
|
+
-- set of known queues. We should keep this sorted by the
|
1878
|
+
-- order in which we saw each of these queues
|
1879
|
+
if redis.call('zscore', 'ql:queues', self.name) == false then
|
1880
|
+
redis.call('zadd', 'ql:queues', now, self.name)
|
1881
|
+
end
|
1882
|
+
|
1883
|
+
return jid
|
1884
|
+
else
|
1885
|
+
error('Recur(): schedule type "' .. tostring(spec) .. '" unknown')
|
1886
|
+
end
|
1887
|
+
end
|
1888
|
+
|
1889
|
+
-- Return the length of the queue
|
1890
|
+
function QlessQueue:length()
|
1891
|
+
return self.locks.length() + self.work.length() + self.scheduled.length()
|
1892
|
+
end
|
1893
|
+
|
1894
|
+
-------------------------------------------------------------------------------
|
1895
|
+
-- Housekeeping methods
|
1896
|
+
-------------------------------------------------------------------------------
|
1897
|
+
--! @brief Instantiate any recurring jobs that are ready
|
1898
|
+
function QlessQueue:check_recurring(now, count)
|
1899
|
+
-- This is how many jobs we've moved so far
|
1900
|
+
local moved = 0
|
1901
|
+
-- These are the recurring jobs that need work
|
1902
|
+
local r = self.recurring.peek(now, 0, count)
|
1903
|
+
for index, jid in ipairs(r) do
|
1904
|
+
-- For each of the jids that need jobs scheduled, first
|
1905
|
+
-- get the last time each of them was run, and then increment
|
1906
|
+
-- it by its interval. While this time is less than now,
|
1907
|
+
-- we need to keep putting jobs on the queue
|
1908
|
+
local klass, data, priority, tags, retries, interval, backlog = unpack(
|
1909
|
+
redis.call('hmget', 'ql:r:' .. jid, 'klass', 'data', 'priority',
|
1910
|
+
'tags', 'retries', 'interval', 'backlog'))
|
1911
|
+
local _tags = cjson.decode(tags)
|
1912
|
+
local score = math.floor(tonumber(self.recurring.score(jid)))
|
1913
|
+
interval = tonumber(interval)
|
1914
|
+
|
1915
|
+
-- If the backlog is set for this job, then see if it's been a long
|
1916
|
+
-- time since the last pop
|
1917
|
+
backlog = tonumber(backlog or 0)
|
1918
|
+
if backlog ~= 0 then
|
1919
|
+
-- Check how many jobs we could concievably generate
|
1920
|
+
local num = ((now - score) / interval)
|
1921
|
+
if num > backlog then
|
1922
|
+
-- Update the score
|
1923
|
+
score = score + (
|
1924
|
+
math.ceil(num - backlog) * interval
|
1925
|
+
)
|
1926
|
+
end
|
1927
|
+
end
|
1928
|
+
|
1929
|
+
-- We're saving this value so that in the history, we can accurately
|
1930
|
+
-- reflect when the job would normally have been scheduled
|
1931
|
+
while (score <= now) and (moved < count) do
|
1932
|
+
local count = redis.call('hincrby', 'ql:r:' .. jid, 'count', 1)
|
1933
|
+
moved = moved + 1
|
1934
|
+
|
1935
|
+
-- Add this job to the list of jobs tagged with whatever tags were
|
1936
|
+
-- supplied
|
1937
|
+
for i, tag in ipairs(_tags) do
|
1938
|
+
redis.call('zadd', 'ql:t:' .. tag, now, jid .. '-' .. count)
|
1939
|
+
redis.call('zincrby', 'ql:tags', 1, tag)
|
1940
|
+
end
|
1941
|
+
|
1942
|
+
-- First, let's save its data
|
1943
|
+
local child_jid = jid .. '-' .. count
|
1944
|
+
redis.call('hmset', QlessJob.ns .. child_jid,
|
1945
|
+
'jid' , jid .. '-' .. count,
|
1946
|
+
'klass' , klass,
|
1947
|
+
'data' , data,
|
1948
|
+
'priority' , priority,
|
1949
|
+
'tags' , tags,
|
1950
|
+
'state' , 'waiting',
|
1951
|
+
'worker' , '',
|
1952
|
+
'expires' , 0,
|
1953
|
+
'queue' , self.name,
|
1954
|
+
'retries' , retries,
|
1955
|
+
'remaining', retries)
|
1956
|
+
Qless.job(child_jid):history(score, 'put', {q = self.name})
|
1957
|
+
|
1958
|
+
-- Now, if a delay was provided, and if it's in the future,
|
1959
|
+
-- then we'll have to schedule it. Otherwise, we're just
|
1960
|
+
-- going to add it to the work queue.
|
1961
|
+
self.work.add(score, priority, jid .. '-' .. count)
|
1962
|
+
|
1963
|
+
score = score + interval
|
1964
|
+
self.recurring.add(score, jid)
|
1965
|
+
end
|
1966
|
+
end
|
1967
|
+
end
|
1968
|
+
|
1969
|
+
--! @brief Check for any jobs that have been scheduled, and shovel them onto
|
1970
|
+
--! the work queue. Returns nothing, but afterwards, up to `count`
|
1971
|
+
--! scheduled jobs will be moved into the work queue
|
1972
|
+
function QlessQueue:check_scheduled(now, count, execute)
|
1973
|
+
-- zadd is a list of arguments that we'll be able to use to
|
1974
|
+
-- insert into the work queue
|
1975
|
+
local zadd = {}
|
1976
|
+
local scheduled = self.scheduled.ready(now, 0, count)
|
1977
|
+
for index, jid in ipairs(scheduled) do
|
1978
|
+
-- With these in hand, we'll have to go out and find the
|
1979
|
+
-- priorities of these jobs, and then we'll insert them
|
1980
|
+
-- into the work queue and then when that's complete, we'll
|
1981
|
+
-- remove them from the scheduled queue
|
1982
|
+
local priority = tonumber(
|
1983
|
+
redis.call('hget', QlessJob.ns .. jid, 'priority') or 0)
|
1984
|
+
self.work.add(now, priority, jid)
|
1985
|
+
|
1986
|
+
-- We should also update them to have the state 'waiting'
|
1987
|
+
-- instead of 'scheduled'
|
1988
|
+
redis.call('hset', QlessJob.ns .. jid, 'state', 'waiting')
|
1989
|
+
end
|
1990
|
+
|
1991
|
+
if #zadd > 0 then
|
1992
|
+
-- Now add these to the work list, and then remove them
|
1993
|
+
-- from the scheduled list
|
1994
|
+
self.scheduled.remove(unpack(scheduled))
|
1995
|
+
end
|
1996
|
+
end
|
1997
|
+
|
1998
|
+
--! @brief Check for and invalidate any locks that have been lost. Returns the
|
1999
|
+
--! list of jids that have been invalidated
|
2000
|
+
function QlessQueue:invalidate_locks(now, count)
|
2001
|
+
local jids = {}
|
2002
|
+
-- Iterate through all the expired locks and add them to the list
|
2003
|
+
-- of keys that we'll return
|
2004
|
+
for index, jid in ipairs(self.locks.expired(now, 0, count)) do
|
2005
|
+
-- Remove this job from the jobs that the worker that was running it
|
2006
|
+
-- has
|
2007
|
+
local worker, failure = unpack(
|
2008
|
+
redis.call('hmget', QlessJob.ns .. jid, 'worker', 'failure'))
|
2009
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', jid)
|
2010
|
+
|
2011
|
+
-- We'll provide a grace period after jobs time out for them to give
|
2012
|
+
-- some indication of the failure mode. After that time, however, we'll
|
2013
|
+
-- consider the worker dust in the wind
|
2014
|
+
local grace_period = tonumber(Qless.config.get('grace-period'))
|
2015
|
+
|
2016
|
+
-- For each of these, decrement their retries. If any of them
|
2017
|
+
-- have exhausted their retries, then we should mark them as
|
2018
|
+
-- failed.
|
2019
|
+
local remaining = tonumber(redis.call(
|
2020
|
+
'hincrbyfloat', QlessJob.ns .. jid, 'remaining', -0.5))
|
2021
|
+
|
2022
|
+
-- If the remaining value is an odd multiple of 0.5, then we'll assume
|
2023
|
+
-- that we're just sending the message. Otherwise, it's time to
|
2024
|
+
-- actually hand out the work to another worker
|
2025
|
+
local send_message = ((remaining * 2) % 2 == 1)
|
2026
|
+
local invalidate = not send_message
|
2027
|
+
|
2028
|
+
-- If the grace period has been disabled, then we'll do both.
|
2029
|
+
if grace_period <= 0 then
|
2030
|
+
remaining = tonumber(redis.call(
|
2031
|
+
'hincrbyfloat', QlessJob.ns .. jid, 'remaining', -0.5))
|
2032
|
+
send_message = true
|
2033
|
+
invalidate = true
|
2034
|
+
end
|
2035
|
+
|
2036
|
+
if send_message then
|
2037
|
+
-- This is where we supply a courtesy message and give the worker
|
2038
|
+
-- time to provide a failure message
|
2039
|
+
if redis.call('zscore', 'ql:tracked', jid) ~= false then
|
2040
|
+
Qless.publish('stalled', jid)
|
2041
|
+
end
|
2042
|
+
Qless.job(jid):history(now, 'timed-out')
|
2043
|
+
|
2044
|
+
-- Send a message to let the worker know that its lost its lock on
|
2045
|
+
-- the job
|
2046
|
+
local encoded = cjson.encode({
|
2047
|
+
jid = jid,
|
2048
|
+
event = 'lock_lost',
|
2049
|
+
worker = worker
|
2050
|
+
})
|
2051
|
+
Qless.publish('w:' .. worker, encoded)
|
2052
|
+
Qless.publish('log', encoded)
|
2053
|
+
self.locks.add(now + grace_period, jid)
|
2054
|
+
|
2055
|
+
-- If we got any expired locks, then we should increment the
|
2056
|
+
-- number of retries for this stage for this bin. The bin is
|
2057
|
+
-- midnight of the provided day
|
2058
|
+
local bin = now - (now % 86400)
|
2059
|
+
redis.call('hincrby',
|
2060
|
+
'ql:s:stats:' .. bin .. ':' .. self.name, 'retries', 1)
|
2061
|
+
end
|
2062
|
+
|
2063
|
+
if invalidate then
|
2064
|
+
-- This is where we actually have to time out the work
|
2065
|
+
if remaining < 0 then
|
2066
|
+
-- Now remove the instance from the schedule, and work queues
|
2067
|
+
-- for the queue it's in
|
2068
|
+
self.work.remove(jid)
|
2069
|
+
self.locks.remove(jid)
|
2070
|
+
self.scheduled.remove(jid)
|
2071
|
+
|
2072
|
+
local group = 'failed-retries-' .. Qless.job(jid):data()['queue']
|
2073
|
+
local job = Qless.job(jid)
|
2074
|
+
job:history(now, 'failed', {group = group})
|
2075
|
+
redis.call('hmset', QlessJob.ns .. jid, 'state', 'failed',
|
2076
|
+
'worker', '',
|
2077
|
+
'expires', '')
|
2078
|
+
-- If the failure has not already been set, then set it
|
2079
|
+
if failure == {} then
|
2080
|
+
redis.call('hset', QlessJob.ns .. jid,
|
2081
|
+
'failure', cjson.encode({
|
2082
|
+
['group'] = group,
|
2083
|
+
['message'] =
|
2084
|
+
'Job exhausted retries in queue "' .. self.name .. '"',
|
2085
|
+
['when'] = now,
|
2086
|
+
['worker'] = unpack(job:data('worker'))
|
2087
|
+
}))
|
2088
|
+
end
|
2089
|
+
|
2090
|
+
-- Add this type of failure to the list of failures
|
2091
|
+
redis.call('sadd', 'ql:failures', group)
|
2092
|
+
-- And add this particular instance to the failed types
|
2093
|
+
redis.call('lpush', 'ql:f:' .. group, jid)
|
2094
|
+
|
2095
|
+
if redis.call('zscore', 'ql:tracked', jid) ~= false then
|
2096
|
+
Qless.publish('failed', jid)
|
2097
|
+
end
|
2098
|
+
else
|
2099
|
+
table.insert(jids, jid)
|
2100
|
+
end
|
2101
|
+
end
|
2102
|
+
end
|
2103
|
+
|
2104
|
+
return jids
|
2105
|
+
end
|
2106
|
+
|
2107
|
+
-- Forget the provided queues. As in, remove them from the list of known queues
|
2108
|
+
function QlessQueue.deregister(...)
|
2109
|
+
redis.call('zrem', Qless.ns .. 'queues', unpack(arg))
|
2110
|
+
end
|
2111
|
+
|
2112
|
+
-- Return information about a particular queue, or all queues
|
2113
|
+
-- [
|
2114
|
+
-- {
|
2115
|
+
-- 'name': 'testing',
|
2116
|
+
-- 'stalled': 2,
|
2117
|
+
-- 'waiting': 5,
|
2118
|
+
-- 'running': 5,
|
2119
|
+
-- 'scheduled': 10,
|
2120
|
+
-- 'depends': 5,
|
2121
|
+
-- 'recurring': 0
|
2122
|
+
-- }, {
|
2123
|
+
-- ...
|
2124
|
+
-- }
|
2125
|
+
-- ]
|
2126
|
+
function QlessQueue.counts(now, name)
|
2127
|
+
if name then
|
2128
|
+
local queue = Qless.queue(name)
|
2129
|
+
local stalled = queue.locks.length(now)
|
2130
|
+
return {
|
2131
|
+
name = name,
|
2132
|
+
waiting = queue.work.length(),
|
2133
|
+
stalled = stalled,
|
2134
|
+
running = queue.locks.length() - stalled,
|
2135
|
+
scheduled = queue.scheduled.length(),
|
2136
|
+
depends = queue.depends.length(),
|
2137
|
+
recurring = queue.recurring.length(),
|
2138
|
+
paused = queue:paused()
|
2139
|
+
}
|
2140
|
+
else
|
2141
|
+
local queues = redis.call('zrange', 'ql:queues', 0, -1)
|
2142
|
+
local response = {}
|
2143
|
+
for index, qname in ipairs(queues) do
|
2144
|
+
table.insert(response, QlessQueue.counts(now, qname))
|
2145
|
+
end
|
2146
|
+
return response
|
2147
|
+
end
|
2148
|
+
end
|
2149
|
+
-- Recur(0, 'on', queue, jid, klass, data, now, 'interval', second, offset, [priority p], [tags t], [retries r])
|
2150
|
+
-- Recur(0, 'off', jid)
|
2151
|
+
-- Recur(0, 'get', jid)
|
2152
|
+
-- Recur(0, 'update', jid, ['priority', priority], ['interval', interval], ['retries', retries], ['data', data], ['klass', klass], ['queue', queue])
|
2153
|
+
-- Recur(0, 'tag', jid, tag, [tag, [...]])
|
2154
|
+
-- Recur(0, 'untag', jid, tag, [tag, [...]])
|
2155
|
+
-- -------------------------------------------------------------------------------------------------------
|
2156
|
+
-- This script takes the name of a queue, and then the info
|
2157
|
+
-- info about the work item, and makes sure that jobs matching
|
2158
|
+
-- its criteria are regularly made available.
|
2159
|
+
function QlessRecurringJob:data()
|
2160
|
+
local job = redis.call(
|
2161
|
+
'hmget', 'ql:r:' .. self.jid, 'jid', 'klass', 'state', 'queue',
|
2162
|
+
'priority', 'interval', 'retries', 'count', 'data', 'tags', 'backlog')
|
2163
|
+
|
2164
|
+
if not job[1] then
|
2165
|
+
return nil
|
2166
|
+
end
|
2167
|
+
|
2168
|
+
return {
|
2169
|
+
jid = job[1],
|
2170
|
+
klass = job[2],
|
2171
|
+
state = job[3],
|
2172
|
+
queue = job[4],
|
2173
|
+
priority = tonumber(job[5]),
|
2174
|
+
interval = tonumber(job[6]),
|
2175
|
+
retries = tonumber(job[7]),
|
2176
|
+
count = tonumber(job[8]),
|
2177
|
+
data = cjson.decode(job[9]),
|
2178
|
+
tags = cjson.decode(job[10]),
|
2179
|
+
backlog = tonumber(job[11] or 0)
|
2180
|
+
}
|
2181
|
+
end
|
2182
|
+
|
2183
|
+
-- Update the recurring job data
|
2184
|
+
function QlessRecurringJob:update(...)
|
2185
|
+
local options = {}
|
2186
|
+
-- Make sure that the job exists
|
2187
|
+
if redis.call('exists', 'ql:r:' .. self.jid) ~= 0 then
|
2188
|
+
for i = 1, #arg, 2 do
|
2189
|
+
local key = arg[i]
|
2190
|
+
local value = arg[i+1]
|
2191
|
+
if key == 'priority' or key == 'interval' or key == 'retries' then
|
2192
|
+
value = assert(tonumber(value), 'Recur(): Arg "' .. key .. '" must be a number: ' .. tostring(value))
|
2193
|
+
-- If the command is 'interval', then we need to update the
|
2194
|
+
-- time when it should next be scheduled
|
2195
|
+
if key == 'interval' then
|
2196
|
+
local queue, interval = unpack(redis.call('hmget', 'ql:r:' .. self.jid, 'queue', 'interval'))
|
2197
|
+
Qless.queue(queue).recurring.update(
|
2198
|
+
value - tonumber(interval), self.jid)
|
2199
|
+
end
|
2200
|
+
redis.call('hset', 'ql:r:' .. self.jid, key, value)
|
2201
|
+
elseif key == 'data' then
|
2202
|
+
value = assert(cjson.decode(value), 'Recur(): Arg "data" is not JSON-encoded: ' .. tostring(value))
|
2203
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'data', cjson.encode(value))
|
2204
|
+
elseif key == 'klass' then
|
2205
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'klass', value)
|
2206
|
+
elseif key == 'queue' then
|
2207
|
+
local queue_obj = Qless.queue(
|
2208
|
+
redis.call('hget', 'ql:r:' .. self.jid, 'queue'))
|
2209
|
+
local score = queue_obj.recurring.score(self.jid)
|
2210
|
+
queue_obj.recurring.remove(self.jid)
|
2211
|
+
Qless.queue(value).recurring.add(score, self.jid)
|
2212
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'queue', value)
|
2213
|
+
elseif key == 'backlog' then
|
2214
|
+
value = assert(tonumber(value),
|
2215
|
+
'Recur(): Arg "backlog" not a number: ' .. tostring(value))
|
2216
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'backlog', value)
|
2217
|
+
else
|
2218
|
+
error('Recur(): Unrecognized option "' .. key .. '"')
|
2219
|
+
end
|
2220
|
+
end
|
2221
|
+
return true
|
2222
|
+
else
|
2223
|
+
error('Recur(): No recurring job ' .. self.jid)
|
2224
|
+
end
|
2225
|
+
end
|
2226
|
+
|
2227
|
+
function QlessRecurringJob:tag(...)
|
2228
|
+
local tags = redis.call('hget', 'ql:r:' .. self.jid, 'tags')
|
2229
|
+
-- If the job has been canceled / deleted, then return false
|
2230
|
+
if tags then
|
2231
|
+
-- Decode the json blob, convert to dictionary
|
2232
|
+
tags = cjson.decode(tags)
|
2233
|
+
local _tags = {}
|
2234
|
+
for i,v in ipairs(tags) do _tags[v] = true end
|
2235
|
+
|
2236
|
+
-- Otherwise, add the job to the sorted set with that tags
|
2237
|
+
for i=1,#arg do if _tags[arg[i]] == nil then table.insert(tags, arg[i]) end end
|
2238
|
+
|
2239
|
+
tags = cjson.encode(tags)
|
2240
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'tags', tags)
|
2241
|
+
return tags
|
2242
|
+
else
|
2243
|
+
return false
|
2244
|
+
end
|
2245
|
+
end
|
2246
|
+
|
2247
|
+
function QlessRecurringJob:untag(...)
|
2248
|
+
-- Get the existing tags
|
2249
|
+
local tags = redis.call('hget', 'ql:r:' .. self.jid, 'tags')
|
2250
|
+
-- If the job has been canceled / deleted, then return false
|
2251
|
+
if tags then
|
2252
|
+
-- Decode the json blob, convert to dictionary
|
2253
|
+
tags = cjson.decode(tags)
|
2254
|
+
local _tags = {}
|
2255
|
+
-- Make a hash
|
2256
|
+
for i,v in ipairs(tags) do _tags[v] = true end
|
2257
|
+
-- Delete these from the hash
|
2258
|
+
for i = 1,#arg do _tags[arg[i]] = nil end
|
2259
|
+
-- Back into a list
|
2260
|
+
local results = {}
|
2261
|
+
for i, tag in ipairs(tags) do if _tags[tag] then table.insert(results, tag) end end
|
2262
|
+
-- json encode them, set, and return
|
2263
|
+
tags = cjson.encode(results)
|
2264
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'tags', tags)
|
2265
|
+
return tags
|
2266
|
+
else
|
2267
|
+
return false
|
2268
|
+
end
|
2269
|
+
end
|
2270
|
+
|
2271
|
+
function QlessRecurringJob:unrecur()
|
2272
|
+
-- First, find out what queue it was attached to
|
2273
|
+
local queue = redis.call('hget', 'ql:r:' .. self.jid, 'queue')
|
2274
|
+
if queue then
|
2275
|
+
-- Now, delete it from the queue it was attached to, and delete the
|
2276
|
+
-- thing itself
|
2277
|
+
Qless.queue(queue).recurring.remove(self.jid)
|
2278
|
+
redis.call('del', 'ql:r:' .. self.jid)
|
2279
|
+
return true
|
2280
|
+
else
|
2281
|
+
return true
|
2282
|
+
end
|
2283
|
+
end
|
2284
|
+
-- DeregisterWorkers(0, worker)
|
2285
|
+
-- This script takes the name of a worker(s) on removes it/them
|
2286
|
+
-- from the ql:workers set.
|
2287
|
+
--
|
2288
|
+
-- Args: The list of workers to deregister.
|
2289
|
+
function QlessWorker.deregister(...)
|
2290
|
+
redis.call('zrem', 'ql:workers', unpack(arg))
|
2291
|
+
end
|
2292
|
+
|
2293
|
+
-- Workers(0, now, [worker])
|
2294
|
+
----------------------------
|
2295
|
+
-- Provide data about all the workers, or if a specific worker is provided,
|
2296
|
+
-- then which jobs that worker is responsible for. If no worker is provided,
|
2297
|
+
-- expect a response of the form:
|
2298
|
+
--
|
2299
|
+
-- [
|
2300
|
+
-- # This is sorted by the recency of activity from that worker
|
2301
|
+
-- {
|
2302
|
+
-- 'name' : 'hostname1-pid1',
|
2303
|
+
-- 'jobs' : 20,
|
2304
|
+
-- 'stalled': 0
|
2305
|
+
-- }, {
|
2306
|
+
-- ...
|
2307
|
+
-- }
|
2308
|
+
-- ]
|
2309
|
+
--
|
2310
|
+
-- If a worker id is provided, then expect a response of the form:
|
2311
|
+
--
|
2312
|
+
-- {
|
2313
|
+
-- 'jobs': [
|
2314
|
+
-- jid1,
|
2315
|
+
-- jid2,
|
2316
|
+
-- ...
|
2317
|
+
-- ], 'stalled': [
|
2318
|
+
-- jid1,
|
2319
|
+
-- ...
|
2320
|
+
-- ]
|
2321
|
+
-- }
|
2322
|
+
--
|
2323
|
+
function QlessWorker.counts(now, worker)
|
2324
|
+
-- Clean up all the workers' job lists if they're too old. This is
|
2325
|
+
-- determined by the `max-worker-age` configuration, defaulting to the
|
2326
|
+
-- last day. Seems like a 'reasonable' default
|
2327
|
+
local interval = tonumber(Qless.config.get('max-worker-age', 86400))
|
2328
|
+
|
2329
|
+
local workers = redis.call('zrangebyscore', 'ql:workers', 0, now - interval)
|
2330
|
+
for index, worker in ipairs(workers) do
|
2331
|
+
redis.call('del', 'ql:w:' .. worker .. ':jobs')
|
2332
|
+
end
|
2333
|
+
|
2334
|
+
-- And now remove them from the list of known workers
|
2335
|
+
redis.call('zremrangebyscore', 'ql:workers', 0, now - interval)
|
2336
|
+
|
2337
|
+
if worker then
|
2338
|
+
return {
|
2339
|
+
jobs = redis.call('zrevrangebyscore', 'ql:w:' .. worker .. ':jobs', now + 8640000, now),
|
2340
|
+
stalled = redis.call('zrevrangebyscore', 'ql:w:' .. worker .. ':jobs', now, 0)
|
2341
|
+
}
|
2342
|
+
else
|
2343
|
+
local response = {}
|
2344
|
+
local workers = redis.call('zrevrange', 'ql:workers', 0, -1)
|
2345
|
+
for index, worker in ipairs(workers) do
|
2346
|
+
table.insert(response, {
|
2347
|
+
name = worker,
|
2348
|
+
jobs = redis.call('zcount', 'ql:w:' .. worker .. ':jobs', now, now + 8640000),
|
2349
|
+
stalled = redis.call('zcount', 'ql:w:' .. worker .. ':jobs', 0, now)
|
2350
|
+
})
|
2351
|
+
end
|
2352
|
+
return response
|
2353
|
+
end
|
2354
|
+
end
|