qless 0.9.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +9 -3
- data/README.md +70 -25
- data/Rakefile +125 -9
- data/exe/install_phantomjs +21 -0
- data/lib/qless.rb +115 -76
- data/lib/qless/config.rb +11 -9
- data/lib/qless/failure_formatter.rb +43 -0
- data/lib/qless/job.rb +201 -102
- data/lib/qless/job_reservers/ordered.rb +7 -1
- data/lib/qless/job_reservers/round_robin.rb +16 -6
- data/lib/qless/job_reservers/shuffled_round_robin.rb +9 -2
- data/lib/qless/lua/qless-lib.lua +2463 -0
- data/lib/qless/lua/qless.lua +2012 -0
- data/lib/qless/lua_script.rb +63 -12
- data/lib/qless/middleware/memory_usage_monitor.rb +62 -0
- data/lib/qless/middleware/metriks.rb +45 -0
- data/lib/qless/middleware/redis_reconnect.rb +6 -3
- data/lib/qless/middleware/requeue_exceptions.rb +94 -0
- data/lib/qless/middleware/retry_exceptions.rb +38 -9
- data/lib/qless/middleware/sentry.rb +3 -7
- data/lib/qless/middleware/timeout.rb +64 -0
- data/lib/qless/queue.rb +90 -55
- data/lib/qless/server.rb +177 -130
- data/lib/qless/server/views/_job.erb +33 -15
- data/lib/qless/server/views/completed.erb +11 -0
- data/lib/qless/server/views/layout.erb +70 -11
- data/lib/qless/server/views/overview.erb +93 -53
- data/lib/qless/server/views/queue.erb +9 -8
- data/lib/qless/server/views/queues.erb +18 -1
- data/lib/qless/subscriber.rb +37 -22
- data/lib/qless/tasks.rb +5 -10
- data/lib/qless/test_helpers/worker_helpers.rb +55 -0
- data/lib/qless/version.rb +3 -1
- data/lib/qless/worker.rb +4 -413
- data/lib/qless/worker/base.rb +247 -0
- data/lib/qless/worker/forking.rb +245 -0
- data/lib/qless/worker/serial.rb +41 -0
- metadata +135 -52
- data/lib/qless/qless-core/cancel.lua +0 -101
- data/lib/qless/qless-core/complete.lua +0 -233
- data/lib/qless/qless-core/config.lua +0 -56
- data/lib/qless/qless-core/depends.lua +0 -65
- data/lib/qless/qless-core/deregister_workers.lua +0 -12
- data/lib/qless/qless-core/fail.lua +0 -117
- data/lib/qless/qless-core/failed.lua +0 -83
- data/lib/qless/qless-core/get.lua +0 -37
- data/lib/qless/qless-core/heartbeat.lua +0 -51
- data/lib/qless/qless-core/jobs.lua +0 -41
- data/lib/qless/qless-core/pause.lua +0 -18
- data/lib/qless/qless-core/peek.lua +0 -165
- data/lib/qless/qless-core/pop.lua +0 -314
- data/lib/qless/qless-core/priority.lua +0 -32
- data/lib/qless/qless-core/put.lua +0 -169
- data/lib/qless/qless-core/qless-lib.lua +0 -2354
- data/lib/qless/qless-core/qless.lua +0 -1862
- data/lib/qless/qless-core/queues.lua +0 -58
- data/lib/qless/qless-core/recur.lua +0 -190
- data/lib/qless/qless-core/retry.lua +0 -73
- data/lib/qless/qless-core/stats.lua +0 -92
- data/lib/qless/qless-core/tag.lua +0 -100
- data/lib/qless/qless-core/track.lua +0 -79
- data/lib/qless/qless-core/unfail.lua +0 -54
- data/lib/qless/qless-core/unpause.lua +0 -12
- data/lib/qless/qless-core/workers.lua +0 -69
- data/lib/qless/wait_until.rb +0 -19
@@ -1,3 +1,5 @@
|
|
1
|
+
# Encoding: utf-8
|
2
|
+
|
1
3
|
module Qless
|
2
4
|
module JobReservers
|
3
5
|
class Ordered
|
@@ -15,8 +17,12 @@ module Qless
|
|
15
17
|
nil
|
16
18
|
end
|
17
19
|
|
20
|
+
def prep_for_work!
|
21
|
+
# nothing here on purpose
|
22
|
+
end
|
23
|
+
|
18
24
|
def description
|
19
|
-
@description ||= @queues.map(&:name).join(', ') +
|
25
|
+
@description ||= @queues.map(&:name).join(', ') + ' (ordered)'
|
20
26
|
end
|
21
27
|
end
|
22
28
|
end
|
@@ -1,5 +1,8 @@
|
|
1
|
+
# Encoding: utf-8
|
2
|
+
|
1
3
|
module Qless
|
2
4
|
module JobReservers
|
5
|
+
# Round-robins through all the provided queues
|
3
6
|
class RoundRobin
|
4
7
|
attr_reader :queues
|
5
8
|
|
@@ -11,20 +14,28 @@ module Qless
|
|
11
14
|
|
12
15
|
def reserve
|
13
16
|
@num_queues.times do |i|
|
14
|
-
|
15
|
-
|
16
|
-
end
|
17
|
+
job = next_queue.pop
|
18
|
+
return job if job
|
17
19
|
end
|
18
20
|
nil
|
19
21
|
end
|
20
22
|
|
23
|
+
def prep_for_work!
|
24
|
+
# nothing here on purpose
|
25
|
+
end
|
26
|
+
|
21
27
|
def description
|
22
|
-
@description ||=
|
28
|
+
@description ||=
|
29
|
+
@queues.map(&:name).join(', ') + " (#{self.class::TYPE_DESCRIPTION})"
|
30
|
+
end
|
31
|
+
|
32
|
+
def reset_description!
|
33
|
+
@description = nil
|
23
34
|
end
|
24
35
|
|
25
36
|
private
|
26
37
|
|
27
|
-
TYPE_DESCRIPTION =
|
38
|
+
TYPE_DESCRIPTION = 'round robin'
|
28
39
|
|
29
40
|
def next_queue
|
30
41
|
@last_popped_queue_index = (@last_popped_queue_index + 1) % @num_queues
|
@@ -33,4 +44,3 @@ module Qless
|
|
33
44
|
end
|
34
45
|
end
|
35
46
|
end
|
36
|
-
|
@@ -1,14 +1,21 @@
|
|
1
|
+
# Encoding: utf-8
|
2
|
+
|
1
3
|
require 'qless/job_reservers/round_robin'
|
2
4
|
|
3
5
|
module Qless
|
4
6
|
module JobReservers
|
7
|
+
# Like round-robin but shuffles the order of the queues
|
5
8
|
class ShuffledRoundRobin < RoundRobin
|
6
9
|
def initialize(queues)
|
7
10
|
super(queues.shuffle)
|
8
11
|
end
|
9
12
|
|
10
|
-
|
13
|
+
def prep_for_work!
|
14
|
+
@queues = @queues.shuffle
|
15
|
+
reset_description!
|
16
|
+
end
|
17
|
+
|
18
|
+
TYPE_DESCRIPTION = 'shuffled round robin'
|
11
19
|
end
|
12
20
|
end
|
13
21
|
end
|
14
|
-
|
@@ -0,0 +1,2463 @@
|
|
1
|
+
-- Current SHA: 525c39000dc71df53a3502491cb4daf0e1128f1d
|
2
|
+
-- This is a generated file
|
3
|
+
-------------------------------------------------------------------------------
|
4
|
+
-- Forward declarations to make everything happy
|
5
|
+
-------------------------------------------------------------------------------
|
6
|
+
local Qless = {
|
7
|
+
ns = 'ql:'
|
8
|
+
}
|
9
|
+
|
10
|
+
-- Queue forward delcaration
|
11
|
+
local QlessQueue = {
|
12
|
+
ns = Qless.ns .. 'q:'
|
13
|
+
}
|
14
|
+
QlessQueue.__index = QlessQueue
|
15
|
+
|
16
|
+
-- Worker forward declaration
|
17
|
+
local QlessWorker = {
|
18
|
+
ns = Qless.ns .. 'w:'
|
19
|
+
}
|
20
|
+
QlessWorker.__index = QlessWorker
|
21
|
+
|
22
|
+
-- Job forward declaration
|
23
|
+
local QlessJob = {
|
24
|
+
ns = Qless.ns .. 'j:'
|
25
|
+
}
|
26
|
+
QlessJob.__index = QlessJob
|
27
|
+
|
28
|
+
-- RecurringJob forward declaration
|
29
|
+
local QlessRecurringJob = {}
|
30
|
+
QlessRecurringJob.__index = QlessRecurringJob
|
31
|
+
|
32
|
+
-- Config forward declaration
|
33
|
+
Qless.config = {}
|
34
|
+
|
35
|
+
-- Extend a table. This comes up quite frequently
|
36
|
+
function table.extend(self, other)
|
37
|
+
for i, v in ipairs(other) do
|
38
|
+
table.insert(self, v)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
-- This is essentially the same as redis' publish, but it prefixes the channel
|
43
|
+
-- with the Qless namespace
|
44
|
+
function Qless.publish(channel, message)
|
45
|
+
redis.call('publish', Qless.ns .. channel, message)
|
46
|
+
end
|
47
|
+
|
48
|
+
-- Return a job object given its job id
|
49
|
+
function Qless.job(jid)
|
50
|
+
assert(jid, 'Job(): no jid provided')
|
51
|
+
local job = {}
|
52
|
+
setmetatable(job, QlessJob)
|
53
|
+
job.jid = jid
|
54
|
+
return job
|
55
|
+
end
|
56
|
+
|
57
|
+
-- Return a recurring job object
|
58
|
+
function Qless.recurring(jid)
|
59
|
+
assert(jid, 'Recurring(): no jid provided')
|
60
|
+
local job = {}
|
61
|
+
setmetatable(job, QlessRecurringJob)
|
62
|
+
job.jid = jid
|
63
|
+
return job
|
64
|
+
end
|
65
|
+
|
66
|
+
-- Failed([group, [start, [limit]]])
|
67
|
+
-- ------------------------------------
|
68
|
+
-- If no group is provided, this returns a JSON blob of the counts of the
|
69
|
+
-- various groups of failures known. If a group is provided, it will report up
|
70
|
+
-- to `limit` from `start` of the jobs affected by that issue.
|
71
|
+
--
|
72
|
+
-- # If no group, then...
|
73
|
+
-- {
|
74
|
+
-- 'group1': 1,
|
75
|
+
-- 'group2': 5,
|
76
|
+
-- ...
|
77
|
+
-- }
|
78
|
+
--
|
79
|
+
-- # If a group is provided, then...
|
80
|
+
-- {
|
81
|
+
-- 'total': 20,
|
82
|
+
-- 'jobs': [
|
83
|
+
-- {
|
84
|
+
-- # All the normal keys for a job
|
85
|
+
-- 'jid': ...,
|
86
|
+
-- 'data': ...
|
87
|
+
-- # The message for this particular instance
|
88
|
+
-- 'message': ...,
|
89
|
+
-- 'group': ...,
|
90
|
+
-- }, ...
|
91
|
+
-- ]
|
92
|
+
-- }
|
93
|
+
--
|
94
|
+
function Qless.failed(group, start, limit)
|
95
|
+
start = assert(tonumber(start or 0),
|
96
|
+
'Failed(): Arg "start" is not a number: ' .. (start or 'nil'))
|
97
|
+
limit = assert(tonumber(limit or 25),
|
98
|
+
'Failed(): Arg "limit" is not a number: ' .. (limit or 'nil'))
|
99
|
+
|
100
|
+
if group then
|
101
|
+
-- If a group was provided, then we should do paginated lookup
|
102
|
+
return {
|
103
|
+
total = redis.call('llen', 'ql:f:' .. group),
|
104
|
+
jobs = redis.call('lrange', 'ql:f:' .. group, start, start + limit - 1)
|
105
|
+
}
|
106
|
+
else
|
107
|
+
-- Otherwise, we should just list all the known failure groups we have
|
108
|
+
local response = {}
|
109
|
+
local groups = redis.call('smembers', 'ql:failures')
|
110
|
+
for index, group in ipairs(groups) do
|
111
|
+
response[group] = redis.call('llen', 'ql:f:' .. group)
|
112
|
+
end
|
113
|
+
return response
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
-- Jobs(now, 'complete', [offset, [count]])
|
118
|
+
-- Jobs(now, (
|
119
|
+
-- 'stalled' | 'running' | 'scheduled' | 'depends', 'recurring'
|
120
|
+
-- ), queue, [offset, [count]])
|
121
|
+
-------------------------------------------------------------------------------
|
122
|
+
-- Return all the job ids currently considered to be in the provided state
|
123
|
+
-- in a particular queue. The response is a list of job ids:
|
124
|
+
--
|
125
|
+
-- [
|
126
|
+
-- jid1,
|
127
|
+
-- jid2,
|
128
|
+
-- ...
|
129
|
+
-- ]
|
130
|
+
function Qless.jobs(now, state, ...)
|
131
|
+
assert(state, 'Jobs(): Arg "state" missing')
|
132
|
+
if state == 'complete' then
|
133
|
+
local offset = assert(tonumber(arg[1] or 0),
|
134
|
+
'Jobs(): Arg "offset" not a number: ' .. tostring(arg[1]))
|
135
|
+
local count = assert(tonumber(arg[2] or 25),
|
136
|
+
'Jobs(): Arg "count" not a number: ' .. tostring(arg[2]))
|
137
|
+
return redis.call('zrevrange', 'ql:completed', offset,
|
138
|
+
offset + count - 1)
|
139
|
+
else
|
140
|
+
local name = assert(arg[1], 'Jobs(): Arg "queue" missing')
|
141
|
+
local offset = assert(tonumber(arg[2] or 0),
|
142
|
+
'Jobs(): Arg "offset" not a number: ' .. tostring(arg[2]))
|
143
|
+
local count = assert(tonumber(arg[3] or 25),
|
144
|
+
'Jobs(): Arg "count" not a number: ' .. tostring(arg[3]))
|
145
|
+
|
146
|
+
local queue = Qless.queue(name)
|
147
|
+
if state == 'running' then
|
148
|
+
return queue.locks.peek(now, offset, count)
|
149
|
+
elseif state == 'stalled' then
|
150
|
+
return queue.locks.expired(now, offset, count)
|
151
|
+
elseif state == 'scheduled' then
|
152
|
+
queue:check_scheduled(now, queue.scheduled.length())
|
153
|
+
return queue.scheduled.peek(now, offset, count)
|
154
|
+
elseif state == 'depends' then
|
155
|
+
return queue.depends.peek(now, offset, count)
|
156
|
+
elseif state == 'recurring' then
|
157
|
+
return queue.recurring.peek(math.huge, offset, count)
|
158
|
+
else
|
159
|
+
error('Jobs(): Unknown type "' .. state .. '"')
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
-- Track()
|
165
|
+
-- Track(now, ('track' | 'untrack'), jid)
|
166
|
+
-- ------------------------------------------
|
167
|
+
-- If no arguments are provided, it returns details of all currently-tracked
|
168
|
+
-- jobs. If the first argument is 'track', then it will start tracking the job
|
169
|
+
-- associated with that id, and 'untrack' stops tracking it. In this context,
|
170
|
+
-- tracking is nothing more than saving the job to a list of jobs that are
|
171
|
+
-- considered special.
|
172
|
+
--
|
173
|
+
-- {
|
174
|
+
-- 'jobs': [
|
175
|
+
-- {
|
176
|
+
-- 'jid': ...,
|
177
|
+
-- # All the other details you'd get from 'get'
|
178
|
+
-- }, {
|
179
|
+
-- ...
|
180
|
+
-- }
|
181
|
+
-- ], 'expired': [
|
182
|
+
-- # These are all the jids that are completed and whose data expired
|
183
|
+
-- 'deadbeef',
|
184
|
+
-- ...,
|
185
|
+
-- ...,
|
186
|
+
-- ]
|
187
|
+
-- }
|
188
|
+
--
|
189
|
+
function Qless.track(now, command, jid)
|
190
|
+
if command ~= nil then
|
191
|
+
assert(jid, 'Track(): Arg "jid" missing')
|
192
|
+
-- Verify that job exists
|
193
|
+
assert(Qless.job(jid):exists(), 'Track(): Job does not exist')
|
194
|
+
if string.lower(command) == 'track' then
|
195
|
+
Qless.publish('track', jid)
|
196
|
+
return redis.call('zadd', 'ql:tracked', now, jid)
|
197
|
+
elseif string.lower(command) == 'untrack' then
|
198
|
+
Qless.publish('untrack', jid)
|
199
|
+
return redis.call('zrem', 'ql:tracked', jid)
|
200
|
+
else
|
201
|
+
error('Track(): Unknown action "' .. command .. '"')
|
202
|
+
end
|
203
|
+
else
|
204
|
+
local response = {
|
205
|
+
jobs = {},
|
206
|
+
expired = {}
|
207
|
+
}
|
208
|
+
local jids = redis.call('zrange', 'ql:tracked', 0, -1)
|
209
|
+
for index, jid in ipairs(jids) do
|
210
|
+
local data = Qless.job(jid):data()
|
211
|
+
if data then
|
212
|
+
table.insert(response.jobs, data)
|
213
|
+
else
|
214
|
+
table.insert(response.expired, jid)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
return response
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
-- tag(now, ('add' | 'remove'), jid, tag, [tag, ...])
|
222
|
+
-- tag(now, 'get', tag, [offset, [count]])
|
223
|
+
-- tag(now, 'top', [offset, [count]])
|
224
|
+
-- -----------------------------------------------------------------------------
|
225
|
+
-- Accepts a jid, 'add' or 'remove', and then a list of tags
|
226
|
+
-- to either add or remove from the job. Alternatively, 'get',
|
227
|
+
-- a tag to get jobs associated with that tag, and offset and
|
228
|
+
-- count
|
229
|
+
--
|
230
|
+
-- If 'add' or 'remove', the response is a list of the jobs
|
231
|
+
-- current tags, or False if the job doesn't exist. If 'get',
|
232
|
+
-- the response is of the form:
|
233
|
+
--
|
234
|
+
-- {
|
235
|
+
-- total: ...,
|
236
|
+
-- jobs: [
|
237
|
+
-- jid,
|
238
|
+
-- ...
|
239
|
+
-- ]
|
240
|
+
-- }
|
241
|
+
--
|
242
|
+
-- If 'top' is supplied, it returns the most commonly-used tags
|
243
|
+
-- in a paginated fashion.
|
244
|
+
function Qless.tag(now, command, ...)
|
245
|
+
assert(command,
|
246
|
+
'Tag(): Arg "command" must be "add", "remove", "get" or "top"')
|
247
|
+
|
248
|
+
if command == 'add' then
|
249
|
+
local jid = assert(arg[1], 'Tag(): Arg "jid" missing')
|
250
|
+
local tags = redis.call('hget', QlessJob.ns .. jid, 'tags')
|
251
|
+
-- If the job has been canceled / deleted, then return false
|
252
|
+
if tags then
|
253
|
+
-- Decode the json blob, convert to dictionary
|
254
|
+
tags = cjson.decode(tags)
|
255
|
+
local _tags = {}
|
256
|
+
for i,v in ipairs(tags) do _tags[v] = true end
|
257
|
+
|
258
|
+
-- Otherwise, add the job to the sorted set with that tags
|
259
|
+
for i=2,#arg do
|
260
|
+
local tag = arg[i]
|
261
|
+
if _tags[tag] == nil then
|
262
|
+
_tags[tag] = true
|
263
|
+
table.insert(tags, tag)
|
264
|
+
end
|
265
|
+
redis.call('zadd', 'ql:t:' .. tag, now, jid)
|
266
|
+
redis.call('zincrby', 'ql:tags', 1, tag)
|
267
|
+
end
|
268
|
+
|
269
|
+
redis.call('hset', QlessJob.ns .. jid, 'tags', cjson.encode(tags))
|
270
|
+
return tags
|
271
|
+
else
|
272
|
+
error('Tag(): Job ' .. jid .. ' does not exist')
|
273
|
+
end
|
274
|
+
elseif command == 'remove' then
|
275
|
+
local jid = assert(arg[1], 'Tag(): Arg "jid" missing')
|
276
|
+
local tags = redis.call('hget', QlessJob.ns .. jid, 'tags')
|
277
|
+
-- If the job has been canceled / deleted, then return false
|
278
|
+
if tags then
|
279
|
+
-- Decode the json blob, convert to dictionary
|
280
|
+
tags = cjson.decode(tags)
|
281
|
+
local _tags = {}
|
282
|
+
for i,v in ipairs(tags) do _tags[v] = true end
|
283
|
+
|
284
|
+
-- Otherwise, add the job to the sorted set with that tags
|
285
|
+
for i=2,#arg do
|
286
|
+
local tag = arg[i]
|
287
|
+
_tags[tag] = nil
|
288
|
+
redis.call('zrem', 'ql:t:' .. tag, jid)
|
289
|
+
redis.call('zincrby', 'ql:tags', -1, tag)
|
290
|
+
end
|
291
|
+
|
292
|
+
local results = {}
|
293
|
+
for i,tag in ipairs(tags) do if _tags[tag] then table.insert(results, tag) end end
|
294
|
+
|
295
|
+
redis.call('hset', QlessJob.ns .. jid, 'tags', cjson.encode(results))
|
296
|
+
return results
|
297
|
+
else
|
298
|
+
error('Tag(): Job ' .. jid .. ' does not exist')
|
299
|
+
end
|
300
|
+
elseif command == 'get' then
|
301
|
+
local tag = assert(arg[1], 'Tag(): Arg "tag" missing')
|
302
|
+
local offset = assert(tonumber(arg[2] or 0),
|
303
|
+
'Tag(): Arg "offset" not a number: ' .. tostring(arg[2]))
|
304
|
+
local count = assert(tonumber(arg[3] or 25),
|
305
|
+
'Tag(): Arg "count" not a number: ' .. tostring(arg[3]))
|
306
|
+
return {
|
307
|
+
total = redis.call('zcard', 'ql:t:' .. tag),
|
308
|
+
jobs = redis.call('zrange', 'ql:t:' .. tag, offset, offset + count - 1)
|
309
|
+
}
|
310
|
+
elseif command == 'top' then
|
311
|
+
local offset = assert(tonumber(arg[1] or 0) , 'Tag(): Arg "offset" not a number: ' .. tostring(arg[1]))
|
312
|
+
local count = assert(tonumber(arg[2] or 25), 'Tag(): Arg "count" not a number: ' .. tostring(arg[2]))
|
313
|
+
return redis.call('zrevrangebyscore', 'ql:tags', '+inf', 2, 'limit', offset, count)
|
314
|
+
else
|
315
|
+
error('Tag(): First argument must be "add", "remove" or "get"')
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
-- Cancel(...)
|
320
|
+
-- --------------
|
321
|
+
-- Cancel a job from taking place. It will be deleted from the system, and any
|
322
|
+
-- attempts to renew a heartbeat will fail, and any attempts to complete it
|
323
|
+
-- will fail. If you try to get the data on the object, you will get nothing.
|
324
|
+
function Qless.cancel(...)
|
325
|
+
-- Dependents is a mapping of a job to its dependent jids
|
326
|
+
local dependents = {}
|
327
|
+
for _, jid in ipairs(arg) do
|
328
|
+
dependents[jid] = redis.call(
|
329
|
+
'smembers', QlessJob.ns .. jid .. '-dependents') or {}
|
330
|
+
end
|
331
|
+
|
332
|
+
-- Now, we'll loop through every jid we intend to cancel, and we'll go
|
333
|
+
-- make sure that this operation will be ok
|
334
|
+
for i, jid in ipairs(arg) do
|
335
|
+
for j, dep in ipairs(dependents[jid]) do
|
336
|
+
if dependents[dep] == nil then
|
337
|
+
error('Cancel(): ' .. jid .. ' is a dependency of ' .. dep ..
|
338
|
+
' but is not mentioned to be canceled')
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
-- If we've made it this far, then we are good to go. We can now just
|
344
|
+
-- remove any trace of all these jobs, as they form a dependent clique
|
345
|
+
for _, jid in ipairs(arg) do
|
346
|
+
-- Find any stage it's associated with and remove its from that stage
|
347
|
+
local state, queue, failure, worker = unpack(redis.call(
|
348
|
+
'hmget', QlessJob.ns .. jid, 'state', 'queue', 'failure', 'worker'))
|
349
|
+
|
350
|
+
if state ~= 'complete' then
|
351
|
+
-- Send a message out on the appropriate channels
|
352
|
+
local encoded = cjson.encode({
|
353
|
+
jid = jid,
|
354
|
+
worker = worker,
|
355
|
+
event = 'canceled',
|
356
|
+
queue = queue
|
357
|
+
})
|
358
|
+
Qless.publish('log', encoded)
|
359
|
+
|
360
|
+
-- Remove this job from whatever worker has it, if any
|
361
|
+
if worker and (worker ~= '') then
|
362
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', jid)
|
363
|
+
-- If necessary, send a message to the appropriate worker, too
|
364
|
+
Qless.publish('w:' .. worker, encoded)
|
365
|
+
end
|
366
|
+
|
367
|
+
-- Remove it from that queue
|
368
|
+
if queue then
|
369
|
+
local queue = Qless.queue(queue)
|
370
|
+
queue.work.remove(jid)
|
371
|
+
queue.locks.remove(jid)
|
372
|
+
queue.scheduled.remove(jid)
|
373
|
+
queue.depends.remove(jid)
|
374
|
+
end
|
375
|
+
|
376
|
+
-- We should probably go through all our dependencies and remove
|
377
|
+
-- ourselves from the list of dependents
|
378
|
+
for i, j in ipairs(redis.call(
|
379
|
+
'smembers', QlessJob.ns .. jid .. '-dependencies')) do
|
380
|
+
redis.call('srem', QlessJob.ns .. j .. '-dependents', jid)
|
381
|
+
end
|
382
|
+
|
383
|
+
-- Delete any notion of dependencies it has
|
384
|
+
redis.call('del', QlessJob.ns .. jid .. '-dependencies')
|
385
|
+
|
386
|
+
-- If we're in the failed state, remove all of our data
|
387
|
+
if state == 'failed' then
|
388
|
+
failure = cjson.decode(failure)
|
389
|
+
-- We need to make this remove it from the failed queues
|
390
|
+
redis.call('lrem', 'ql:f:' .. failure.group, 0, jid)
|
391
|
+
if redis.call('llen', 'ql:f:' .. failure.group) == 0 then
|
392
|
+
redis.call('srem', 'ql:failures', failure.group)
|
393
|
+
end
|
394
|
+
-- Remove one count from the failed count of the particular
|
395
|
+
-- queue
|
396
|
+
local bin = failure.when - (failure.when % 86400)
|
397
|
+
local failed = redis.call(
|
398
|
+
'hget', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed')
|
399
|
+
redis.call('hset',
|
400
|
+
'ql:s:stats:' .. bin .. ':' .. queue, 'failed', failed - 1)
|
401
|
+
end
|
402
|
+
|
403
|
+
-- Remove it as a job that's tagged with this particular tag
|
404
|
+
local tags = cjson.decode(
|
405
|
+
redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
|
406
|
+
for i, tag in ipairs(tags) do
|
407
|
+
redis.call('zrem', 'ql:t:' .. tag, jid)
|
408
|
+
redis.call('zincrby', 'ql:tags', -1, tag)
|
409
|
+
end
|
410
|
+
|
411
|
+
-- If the job was being tracked, we should notify
|
412
|
+
if redis.call('zscore', 'ql:tracked', jid) ~= false then
|
413
|
+
Qless.publish('canceled', jid)
|
414
|
+
end
|
415
|
+
|
416
|
+
-- Just go ahead and delete our data
|
417
|
+
redis.call('del', QlessJob.ns .. jid)
|
418
|
+
redis.call('del', QlessJob.ns .. jid .. '-history')
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
return arg
|
423
|
+
end
|
424
|
+
|
425
|
+
-------------------------------------------------------------------------------
|
426
|
+
-- Configuration interactions
|
427
|
+
-------------------------------------------------------------------------------
|
428
|
+
|
429
|
+
-- This represents our default configuration settings
|
430
|
+
Qless.config.defaults = {
|
431
|
+
['application'] = 'qless',
|
432
|
+
['heartbeat'] = 60,
|
433
|
+
['grace-period'] = 10,
|
434
|
+
['stats-history'] = 30,
|
435
|
+
['histogram-history'] = 7,
|
436
|
+
['jobs-history-count'] = 50000,
|
437
|
+
['jobs-history'] = 604800
|
438
|
+
}
|
439
|
+
|
440
|
+
-- Get one or more of the keys
|
441
|
+
Qless.config.get = function(key, default)
|
442
|
+
if key then
|
443
|
+
return redis.call('hget', 'ql:config', key) or
|
444
|
+
Qless.config.defaults[key] or default
|
445
|
+
else
|
446
|
+
-- Inspired by redis-lua https://github.com/nrk/redis-lua/blob/version-2.0/src/redis.lua
|
447
|
+
local reply = redis.call('hgetall', 'ql:config')
|
448
|
+
for i = 1, #reply, 2 do
|
449
|
+
Qless.config.defaults[reply[i]] = reply[i + 1]
|
450
|
+
end
|
451
|
+
return Qless.config.defaults
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
-- Set a configuration variable
|
456
|
+
Qless.config.set = function(option, value)
|
457
|
+
assert(option, 'config.set(): Arg "option" missing')
|
458
|
+
assert(value , 'config.set(): Arg "value" missing')
|
459
|
+
-- Send out a log message
|
460
|
+
Qless.publish('log', cjson.encode({
|
461
|
+
event = 'config_set',
|
462
|
+
option = option,
|
463
|
+
value = value
|
464
|
+
}))
|
465
|
+
|
466
|
+
redis.call('hset', 'ql:config', option, value)
|
467
|
+
end
|
468
|
+
|
469
|
+
-- Unset a configuration option
|
470
|
+
Qless.config.unset = function(option)
|
471
|
+
assert(option, 'config.unset(): Arg "option" missing')
|
472
|
+
-- Send out a log message
|
473
|
+
Qless.publish('log', cjson.encode({
|
474
|
+
event = 'config_unset',
|
475
|
+
option = option
|
476
|
+
}))
|
477
|
+
|
478
|
+
redis.call('hdel', 'ql:config', option)
|
479
|
+
end
|
480
|
+
-------------------------------------------------------------------------------
|
481
|
+
-- Job Class
|
482
|
+
--
|
483
|
+
-- It returns an object that represents the job with the provided JID
|
484
|
+
-------------------------------------------------------------------------------
|
485
|
+
|
486
|
+
-- This gets all the data associated with the job with the provided id. If the
|
487
|
+
-- job is not found, it returns nil. If found, it returns an object with the
|
488
|
+
-- appropriate properties
|
489
|
+
function QlessJob:data(...)
|
490
|
+
local job = redis.call(
|
491
|
+
'hmget', QlessJob.ns .. self.jid, 'jid', 'klass', 'state', 'queue',
|
492
|
+
'worker', 'priority', 'expires', 'retries', 'remaining', 'data',
|
493
|
+
'tags', 'failure', 'spawned_from_jid')
|
494
|
+
|
495
|
+
-- Return nil if we haven't found it
|
496
|
+
if not job[1] then
|
497
|
+
return nil
|
498
|
+
end
|
499
|
+
|
500
|
+
local data = {
|
501
|
+
jid = job[1],
|
502
|
+
klass = job[2],
|
503
|
+
state = job[3],
|
504
|
+
queue = job[4],
|
505
|
+
worker = job[5] or '',
|
506
|
+
tracked = redis.call(
|
507
|
+
'zscore', 'ql:tracked', self.jid) ~= false,
|
508
|
+
priority = tonumber(job[6]),
|
509
|
+
expires = tonumber(job[7]) or 0,
|
510
|
+
retries = tonumber(job[8]),
|
511
|
+
remaining = math.floor(tonumber(job[9])),
|
512
|
+
data = job[10],
|
513
|
+
tags = cjson.decode(job[11]),
|
514
|
+
history = self:history(),
|
515
|
+
failure = cjson.decode(job[12] or '{}'),
|
516
|
+
spawned_from_jid = job[13],
|
517
|
+
dependents = redis.call(
|
518
|
+
'smembers', QlessJob.ns .. self.jid .. '-dependents'),
|
519
|
+
dependencies = redis.call(
|
520
|
+
'smembers', QlessJob.ns .. self.jid .. '-dependencies')
|
521
|
+
}
|
522
|
+
|
523
|
+
if #arg > 0 then
|
524
|
+
-- This section could probably be optimized, but I wanted the interface
|
525
|
+
-- in place first
|
526
|
+
local response = {}
|
527
|
+
for index, key in ipairs(arg) do
|
528
|
+
table.insert(response, data[key])
|
529
|
+
end
|
530
|
+
return response
|
531
|
+
else
|
532
|
+
return data
|
533
|
+
end
|
534
|
+
end
|
535
|
+
|
536
|
+
-- Complete a job and optionally put it in another queue, either scheduled or
|
537
|
+
-- to be considered waiting immediately. It can also optionally accept other
|
538
|
+
-- jids on which this job will be considered dependent before it's considered
|
539
|
+
-- valid.
|
540
|
+
--
|
541
|
+
-- The variable-length arguments may be pairs of the form:
|
542
|
+
--
|
543
|
+
-- ('next' , queue) : The queue to advance it to next
|
544
|
+
-- ('delay' , delay) : The delay for the next queue
|
545
|
+
-- ('depends', : Json of jobs it depends on in the new queue
|
546
|
+
-- '["jid1", "jid2", ...]')
|
547
|
+
---
|
548
|
+
function QlessJob:complete(now, worker, queue, data, ...)
|
549
|
+
assert(worker, 'Complete(): Arg "worker" missing')
|
550
|
+
assert(queue , 'Complete(): Arg "queue" missing')
|
551
|
+
data = assert(cjson.decode(data),
|
552
|
+
'Complete(): Arg "data" missing or not JSON: ' .. tostring(data))
|
553
|
+
|
554
|
+
-- Read in all the optional parameters
|
555
|
+
local options = {}
|
556
|
+
for i = 1, #arg, 2 do options[arg[i]] = arg[i + 1] end
|
557
|
+
|
558
|
+
-- Sanity check on optional args
|
559
|
+
local nextq = options['next']
|
560
|
+
local delay = assert(tonumber(options['delay'] or 0))
|
561
|
+
local depends = assert(cjson.decode(options['depends'] or '[]'),
|
562
|
+
'Complete(): Arg "depends" not JSON: ' .. tostring(options['depends']))
|
563
|
+
|
564
|
+
-- Depends doesn't make sense without nextq
|
565
|
+
if options['delay'] and nextq == nil then
|
566
|
+
error('Complete(): "delay" cannot be used without a "next".')
|
567
|
+
end
|
568
|
+
|
569
|
+
-- Depends doesn't make sense without nextq
|
570
|
+
if options['depends'] and nextq == nil then
|
571
|
+
error('Complete(): "depends" cannot be used without a "next".')
|
572
|
+
end
|
573
|
+
|
574
|
+
-- The bin is midnight of the provided day
|
575
|
+
-- 24 * 60 * 60 = 86400
|
576
|
+
local bin = now - (now % 86400)
|
577
|
+
|
578
|
+
-- First things first, we should see if the worker still owns this job
|
579
|
+
local lastworker, state, priority, retries, current_queue = unpack(
|
580
|
+
redis.call('hmget', QlessJob.ns .. self.jid, 'worker', 'state',
|
581
|
+
'priority', 'retries', 'queue'))
|
582
|
+
|
583
|
+
if lastworker == false then
|
584
|
+
error('Complete(): Job does not exist')
|
585
|
+
elseif (state ~= 'running') then
|
586
|
+
error('Complete(): Job is not currently running: ' .. state)
|
587
|
+
elseif lastworker ~= worker then
|
588
|
+
error('Complete(): Job has been handed out to another worker: ' ..
|
589
|
+
tostring(lastworker))
|
590
|
+
elseif queue ~= current_queue then
|
591
|
+
error('Complete(): Job running in another queue: ' ..
|
592
|
+
tostring(current_queue))
|
593
|
+
end
|
594
|
+
|
595
|
+
-- Now we can assume that the worker does own the job. We need to
|
596
|
+
-- 1) Remove the job from the 'locks' from the old queue
|
597
|
+
-- 2) Enqueue it in the next stage if necessary
|
598
|
+
-- 3) Update the data
|
599
|
+
-- 4) Mark the job as completed, remove the worker, remove expires, and
|
600
|
+
-- update history
|
601
|
+
self:history(now, 'done')
|
602
|
+
|
603
|
+
if data then
|
604
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data))
|
605
|
+
end
|
606
|
+
|
607
|
+
-- Remove the job from the previous queue
|
608
|
+
local queue_obj = Qless.queue(queue)
|
609
|
+
queue_obj.work.remove(self.jid)
|
610
|
+
queue_obj.locks.remove(self.jid)
|
611
|
+
queue_obj.scheduled.remove(self.jid)
|
612
|
+
|
613
|
+
----------------------------------------------------------
|
614
|
+
-- This is the massive stats update that we have to do
|
615
|
+
----------------------------------------------------------
|
616
|
+
-- This is how long we've been waiting to get popped
|
617
|
+
-- local waiting = math.floor(now) - history[#history]['popped']
|
618
|
+
local time = tonumber(
|
619
|
+
redis.call('hget', QlessJob.ns .. self.jid, 'time') or now)
|
620
|
+
local waiting = now - time
|
621
|
+
Qless.queue(queue):stat(now, 'run', waiting)
|
622
|
+
redis.call('hset', QlessJob.ns .. self.jid,
|
623
|
+
'time', string.format("%.20f", now))
|
624
|
+
|
625
|
+
-- Remove this job from the jobs that the worker that was running it has
|
626
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
|
627
|
+
|
628
|
+
if redis.call('zscore', 'ql:tracked', self.jid) ~= false then
|
629
|
+
Qless.publish('completed', self.jid)
|
630
|
+
end
|
631
|
+
|
632
|
+
if nextq then
|
633
|
+
queue_obj = Qless.queue(nextq)
|
634
|
+
-- Send a message out to log
|
635
|
+
Qless.publish('log', cjson.encode({
|
636
|
+
jid = self.jid,
|
637
|
+
event = 'advanced',
|
638
|
+
queue = queue,
|
639
|
+
to = nextq
|
640
|
+
}))
|
641
|
+
|
642
|
+
-- Enqueue the job
|
643
|
+
self:history(now, 'put', {q = nextq})
|
644
|
+
|
645
|
+
-- We're going to make sure that this queue is in the
|
646
|
+
-- set of known queues
|
647
|
+
if redis.call('zscore', 'ql:queues', nextq) == false then
|
648
|
+
redis.call('zadd', 'ql:queues', now, nextq)
|
649
|
+
end
|
650
|
+
|
651
|
+
redis.call('hmset', QlessJob.ns .. self.jid,
|
652
|
+
'state', 'waiting',
|
653
|
+
'worker', '',
|
654
|
+
'failure', '{}',
|
655
|
+
'queue', nextq,
|
656
|
+
'expires', 0,
|
657
|
+
'remaining', tonumber(retries))
|
658
|
+
|
659
|
+
if (delay > 0) and (#depends == 0) then
|
660
|
+
queue_obj.scheduled.add(now + delay, self.jid)
|
661
|
+
return 'scheduled'
|
662
|
+
else
|
663
|
+
-- These are the jids we legitimately have to wait on
|
664
|
+
local count = 0
|
665
|
+
for i, j in ipairs(depends) do
|
666
|
+
-- Make sure it's something other than 'nil' or complete.
|
667
|
+
local state = redis.call('hget', QlessJob.ns .. j, 'state')
|
668
|
+
if (state and state ~= 'complete') then
|
669
|
+
count = count + 1
|
670
|
+
redis.call(
|
671
|
+
'sadd', QlessJob.ns .. j .. '-dependents',self.jid)
|
672
|
+
redis.call(
|
673
|
+
'sadd', QlessJob.ns .. self.jid .. '-dependencies', j)
|
674
|
+
end
|
675
|
+
end
|
676
|
+
if count > 0 then
|
677
|
+
queue_obj.depends.add(now, self.jid)
|
678
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'state', 'depends')
|
679
|
+
if delay > 0 then
|
680
|
+
-- We've already put it in 'depends'. Now, we must just save the data
|
681
|
+
-- for when it's scheduled
|
682
|
+
queue_obj.depends.add(now, self.jid)
|
683
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'scheduled', now + delay)
|
684
|
+
end
|
685
|
+
return 'depends'
|
686
|
+
else
|
687
|
+
queue_obj.work.add(now, priority, self.jid)
|
688
|
+
return 'waiting'
|
689
|
+
end
|
690
|
+
end
|
691
|
+
else
|
692
|
+
-- Send a message out to log
|
693
|
+
Qless.publish('log', cjson.encode({
|
694
|
+
jid = self.jid,
|
695
|
+
event = 'completed',
|
696
|
+
queue = queue
|
697
|
+
}))
|
698
|
+
|
699
|
+
redis.call('hmset', QlessJob.ns .. self.jid,
|
700
|
+
'state', 'complete',
|
701
|
+
'worker', '',
|
702
|
+
'failure', '{}',
|
703
|
+
'queue', '',
|
704
|
+
'expires', 0,
|
705
|
+
'remaining', tonumber(retries))
|
706
|
+
|
707
|
+
-- Do the completion dance
|
708
|
+
local count = Qless.config.get('jobs-history-count')
|
709
|
+
local time = Qless.config.get('jobs-history')
|
710
|
+
|
711
|
+
-- These are the default values
|
712
|
+
count = tonumber(count or 50000)
|
713
|
+
time = tonumber(time or 7 * 24 * 60 * 60)
|
714
|
+
|
715
|
+
-- Schedule this job for destructination eventually
|
716
|
+
redis.call('zadd', 'ql:completed', now, self.jid)
|
717
|
+
|
718
|
+
-- Now look at the expired job data. First, based on the current time
|
719
|
+
local jids = redis.call('zrangebyscore', 'ql:completed', 0, now - time)
|
720
|
+
-- Any jobs that need to be expired... delete
|
721
|
+
for index, jid in ipairs(jids) do
|
722
|
+
local tags = cjson.decode(
|
723
|
+
redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
|
724
|
+
for i, tag in ipairs(tags) do
|
725
|
+
redis.call('zrem', 'ql:t:' .. tag, jid)
|
726
|
+
redis.call('zincrby', 'ql:tags', -1, tag)
|
727
|
+
end
|
728
|
+
redis.call('del', QlessJob.ns .. jid)
|
729
|
+
redis.call('del', QlessJob.ns .. jid .. '-history')
|
730
|
+
end
|
731
|
+
-- And now remove those from the queued-for-cleanup queue
|
732
|
+
redis.call('zremrangebyscore', 'ql:completed', 0, now - time)
|
733
|
+
|
734
|
+
-- Now take the all by the most recent 'count' ids
|
735
|
+
jids = redis.call('zrange', 'ql:completed', 0, (-1-count))
|
736
|
+
for index, jid in ipairs(jids) do
|
737
|
+
local tags = cjson.decode(
|
738
|
+
redis.call('hget', QlessJob.ns .. jid, 'tags') or '{}')
|
739
|
+
for i, tag in ipairs(tags) do
|
740
|
+
redis.call('zrem', 'ql:t:' .. tag, jid)
|
741
|
+
redis.call('zincrby', 'ql:tags', -1, tag)
|
742
|
+
end
|
743
|
+
redis.call('del', QlessJob.ns .. jid)
|
744
|
+
redis.call('del', QlessJob.ns .. jid .. '-history')
|
745
|
+
end
|
746
|
+
redis.call('zremrangebyrank', 'ql:completed', 0, (-1-count))
|
747
|
+
|
748
|
+
-- Alright, if this has any dependents, then we should go ahead
|
749
|
+
-- and unstick those guys.
|
750
|
+
for i, j in ipairs(redis.call(
|
751
|
+
'smembers', QlessJob.ns .. self.jid .. '-dependents')) do
|
752
|
+
redis.call('srem', QlessJob.ns .. j .. '-dependencies', self.jid)
|
753
|
+
if redis.call(
|
754
|
+
'scard', QlessJob.ns .. j .. '-dependencies') == 0 then
|
755
|
+
local q, p, scheduled = unpack(
|
756
|
+
redis.call('hmget', QlessJob.ns .. j, 'queue', 'priority', 'scheduled'))
|
757
|
+
if q then
|
758
|
+
local queue = Qless.queue(q)
|
759
|
+
queue.depends.remove(j)
|
760
|
+
if scheduled then
|
761
|
+
queue.scheduled.add(scheduled, j)
|
762
|
+
redis.call('hset', QlessJob.ns .. j, 'state', 'scheduled')
|
763
|
+
redis.call('hdel', QlessJob.ns .. j, 'scheduled')
|
764
|
+
else
|
765
|
+
queue.work.add(now, p, j)
|
766
|
+
redis.call('hset', QlessJob.ns .. j, 'state', 'waiting')
|
767
|
+
end
|
768
|
+
end
|
769
|
+
end
|
770
|
+
end
|
771
|
+
|
772
|
+
-- Delete our dependents key
|
773
|
+
redis.call('del', QlessJob.ns .. self.jid .. '-dependents')
|
774
|
+
|
775
|
+
return 'complete'
|
776
|
+
end
|
777
|
+
end
|
778
|
+
|
779
|
+
-- Fail(now, worker, group, message, [data])
|
780
|
+
-- -------------------------------------------------
|
781
|
+
-- Mark the particular job as failed, with the provided group, and a more
|
782
|
+
-- specific message. By `group`, we mean some phrase that might be one of
|
783
|
+
-- several categorical modes of failure. The `message` is something more
|
784
|
+
-- job-specific, like perhaps a traceback.
|
785
|
+
--
|
786
|
+
-- This method should __not__ be used to note that a job has been dropped or
|
787
|
+
-- has failed in a transient way. This method __should__ be used to note that
|
788
|
+
-- a job has something really wrong with it that must be remedied.
|
789
|
+
--
|
790
|
+
-- The motivation behind the `group` is so that similar errors can be grouped
|
791
|
+
-- together. Optionally, updated data can be provided for the job. A job in
|
792
|
+
-- any state can be marked as failed. If it has been given to a worker as a
|
793
|
+
-- job, then its subsequent requests to heartbeat or complete that job will
|
794
|
+
-- fail. Failed jobs are kept until they are canceled or completed.
|
795
|
+
--
|
796
|
+
-- __Returns__ the id of the failed job if successful, or `False` on failure.
|
797
|
+
--
|
798
|
+
-- Args:
|
799
|
+
-- 1) jid
|
800
|
+
-- 2) worker
|
801
|
+
-- 3) group
|
802
|
+
-- 4) message
|
803
|
+
-- 5) the current time
|
804
|
+
-- 6) [data]
|
805
|
+
function QlessJob:fail(now, worker, group, message, data)
|
806
|
+
local worker = assert(worker , 'Fail(): Arg "worker" missing')
|
807
|
+
local group = assert(group , 'Fail(): Arg "group" missing')
|
808
|
+
local message = assert(message , 'Fail(): Arg "message" missing')
|
809
|
+
|
810
|
+
-- The bin is midnight of the provided day
|
811
|
+
-- 24 * 60 * 60 = 86400
|
812
|
+
local bin = now - (now % 86400)
|
813
|
+
|
814
|
+
if data then
|
815
|
+
data = cjson.decode(data)
|
816
|
+
end
|
817
|
+
|
818
|
+
-- First things first, we should get the history
|
819
|
+
local queue, state, oldworker = unpack(redis.call(
|
820
|
+
'hmget', QlessJob.ns .. self.jid, 'queue', 'state', 'worker'))
|
821
|
+
|
822
|
+
-- If the job has been completed, we cannot fail it
|
823
|
+
if not state then
|
824
|
+
error('Fail(): Job does not exist')
|
825
|
+
elseif state ~= 'running' then
|
826
|
+
error('Fail(): Job not currently running: ' .. state)
|
827
|
+
elseif worker ~= oldworker then
|
828
|
+
error('Fail(): Job running with another worker: ' .. oldworker)
|
829
|
+
end
|
830
|
+
|
831
|
+
-- Send out a log message
|
832
|
+
Qless.publish('log', cjson.encode({
|
833
|
+
jid = self.jid,
|
834
|
+
event = 'failed',
|
835
|
+
worker = worker,
|
836
|
+
group = group,
|
837
|
+
message = message
|
838
|
+
}))
|
839
|
+
|
840
|
+
if redis.call('zscore', 'ql:tracked', self.jid) ~= false then
|
841
|
+
Qless.publish('failed', self.jid)
|
842
|
+
end
|
843
|
+
|
844
|
+
-- Remove this job from the jobs that the worker that was running it has
|
845
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
|
846
|
+
|
847
|
+
-- Now, take the element of the history for which our provided worker is
|
848
|
+
-- the worker, and update 'failed'
|
849
|
+
self:history(now, 'failed', {worker = worker, group = group})
|
850
|
+
|
851
|
+
-- Increment the number of failures for that queue for the
|
852
|
+
-- given day.
|
853
|
+
redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failures', 1)
|
854
|
+
redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed' , 1)
|
855
|
+
|
856
|
+
-- Now remove the instance from the schedule, and work queues for the
|
857
|
+
-- queue it's in
|
858
|
+
local queue_obj = Qless.queue(queue)
|
859
|
+
queue_obj.work.remove(self.jid)
|
860
|
+
queue_obj.locks.remove(self.jid)
|
861
|
+
queue_obj.scheduled.remove(self.jid)
|
862
|
+
|
863
|
+
-- The reason that this appears here is that the above will fail if the
|
864
|
+
-- job doesn't exist
|
865
|
+
if data then
|
866
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'data', cjson.encode(data))
|
867
|
+
end
|
868
|
+
|
869
|
+
redis.call('hmset', QlessJob.ns .. self.jid,
|
870
|
+
'state', 'failed',
|
871
|
+
'worker', '',
|
872
|
+
'expires', '',
|
873
|
+
'failure', cjson.encode({
|
874
|
+
['group'] = group,
|
875
|
+
['message'] = message,
|
876
|
+
['when'] = math.floor(now),
|
877
|
+
['worker'] = worker
|
878
|
+
}))
|
879
|
+
|
880
|
+
-- Add this group of failure to the list of failures
|
881
|
+
redis.call('sadd', 'ql:failures', group)
|
882
|
+
-- And add this particular instance to the failed groups
|
883
|
+
redis.call('lpush', 'ql:f:' .. group, self.jid)
|
884
|
+
|
885
|
+
-- Here is where we'd intcrement stats about the particular stage
|
886
|
+
-- and possibly the workers
|
887
|
+
|
888
|
+
return self.jid
|
889
|
+
end
|
890
|
+
|
891
|
+
-- retry(now, queue, worker, [delay, [group, [message]]])
|
892
|
+
-- ------------------------------------------
|
893
|
+
-- This script accepts jid, queue, worker and delay for retrying a job. This
|
894
|
+
-- is similar in functionality to `put`, except that this counts against the
|
895
|
+
-- retries a job has for a stage.
|
896
|
+
--
|
897
|
+
-- Throws an exception if:
|
898
|
+
-- - the worker is not the worker with a lock on the job
|
899
|
+
-- - the job is not actually running
|
900
|
+
--
|
901
|
+
-- Otherwise, it returns the number of retries remaining. If the allowed
|
902
|
+
-- retries have been exhausted, then it is automatically failed, and a negative
|
903
|
+
-- number is returned.
|
904
|
+
--
|
905
|
+
-- If a group and message is provided, then if the retries are exhausted, then
|
906
|
+
-- the provided group and message will be used in place of the default
|
907
|
+
-- messaging about retries in the particular queue being exhausted
|
908
|
+
function QlessJob:retry(now, queue, worker, delay, group, message)
|
909
|
+
assert(queue , 'Retry(): Arg "queue" missing')
|
910
|
+
assert(worker, 'Retry(): Arg "worker" missing')
|
911
|
+
delay = assert(tonumber(delay or 0),
|
912
|
+
'Retry(): Arg "delay" not a number: ' .. tostring(delay))
|
913
|
+
|
914
|
+
-- Let's see what the old priority, and tags were
|
915
|
+
local oldqueue, state, retries, oldworker, priority, failure = unpack(
|
916
|
+
redis.call('hmget', QlessJob.ns .. self.jid, 'queue', 'state',
|
917
|
+
'retries', 'worker', 'priority', 'failure'))
|
918
|
+
|
919
|
+
-- If this isn't the worker that owns
|
920
|
+
if oldworker == false then
|
921
|
+
error('Retry(): Job does not exist')
|
922
|
+
elseif state ~= 'running' then
|
923
|
+
error('Retry(): Job is not currently running: ' .. state)
|
924
|
+
elseif oldworker ~= worker then
|
925
|
+
error('Retry(): Job has been given to another worker: ' .. oldworker)
|
926
|
+
end
|
927
|
+
|
928
|
+
-- For each of these, decrement their retries. If any of them
|
929
|
+
-- have exhausted their retries, then we should mark them as
|
930
|
+
-- failed.
|
931
|
+
local remaining = tonumber(redis.call(
|
932
|
+
'hincrby', QlessJob.ns .. self.jid, 'remaining', -1))
|
933
|
+
redis.call('hdel', QlessJob.ns .. self.jid, 'grace')
|
934
|
+
|
935
|
+
-- Remove it from the locks key of the old queue
|
936
|
+
Qless.queue(oldqueue).locks.remove(self.jid)
|
937
|
+
|
938
|
+
-- Remove this job from the worker that was previously working it
|
939
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', self.jid)
|
940
|
+
|
941
|
+
if remaining < 0 then
|
942
|
+
-- Now remove the instance from the schedule, and work queues for the
|
943
|
+
-- queue it's in
|
944
|
+
local group = group or 'failed-retries-' .. queue
|
945
|
+
self:history(now, 'failed', {['group'] = group})
|
946
|
+
|
947
|
+
redis.call('hmset', QlessJob.ns .. self.jid, 'state', 'failed',
|
948
|
+
'worker', '',
|
949
|
+
'expires', '')
|
950
|
+
-- If the failure has not already been set, then set it
|
951
|
+
if group ~= nil and message ~= nil then
|
952
|
+
redis.call('hset', QlessJob.ns .. self.jid,
|
953
|
+
'failure', cjson.encode({
|
954
|
+
['group'] = group,
|
955
|
+
['message'] = message,
|
956
|
+
['when'] = math.floor(now),
|
957
|
+
['worker'] = worker
|
958
|
+
})
|
959
|
+
)
|
960
|
+
else
|
961
|
+
redis.call('hset', QlessJob.ns .. self.jid,
|
962
|
+
'failure', cjson.encode({
|
963
|
+
['group'] = group,
|
964
|
+
['message'] =
|
965
|
+
'Job exhausted retries in queue "' .. oldqueue .. '"',
|
966
|
+
['when'] = now,
|
967
|
+
['worker'] = unpack(self:data('worker'))
|
968
|
+
}))
|
969
|
+
end
|
970
|
+
|
971
|
+
-- Add this type of failure to the list of failures
|
972
|
+
redis.call('sadd', 'ql:failures', group)
|
973
|
+
-- And add this particular instance to the failed types
|
974
|
+
redis.call('lpush', 'ql:f:' .. group, self.jid)
|
975
|
+
-- Increment the count of the failed jobs
|
976
|
+
local bin = now - (now % 86400)
|
977
|
+
redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failures', 1)
|
978
|
+
redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. queue, 'failed' , 1)
|
979
|
+
else
|
980
|
+
-- Put it in the queue again with a delay. Like put()
|
981
|
+
local queue_obj = Qless.queue(queue)
|
982
|
+
if delay > 0 then
|
983
|
+
queue_obj.scheduled.add(now + delay, self.jid)
|
984
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'state', 'scheduled')
|
985
|
+
else
|
986
|
+
queue_obj.work.add(now, priority, self.jid)
|
987
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'state', 'waiting')
|
988
|
+
end
|
989
|
+
|
990
|
+
-- If a group and a message was provided, then we should save it
|
991
|
+
if group ~= nil and message ~= nil then
|
992
|
+
redis.call('hset', QlessJob.ns .. self.jid,
|
993
|
+
'failure', cjson.encode({
|
994
|
+
['group'] = group,
|
995
|
+
['message'] = message,
|
996
|
+
['when'] = math.floor(now),
|
997
|
+
['worker'] = worker
|
998
|
+
})
|
999
|
+
)
|
1000
|
+
end
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
return math.floor(remaining)
|
1004
|
+
end
|
1005
|
+
|
1006
|
+
-- Depends(jid, 'on', [jid, [jid, [...]]]
|
1007
|
+
-- Depends(jid, 'off', [jid, [jid, [...]]])
|
1008
|
+
-- Depends(jid, 'off', 'all')
|
1009
|
+
-------------------------------------------------------------------------------
|
1010
|
+
-- Add or remove dependencies a job has. If 'on' is provided, the provided
|
1011
|
+
-- jids are added as dependencies. If 'off' and 'all' are provided, then all
|
1012
|
+
-- the current dependencies are removed. If 'off' is provided and the next
|
1013
|
+
-- argument is not 'all', then those jids are removed as dependencies.
|
1014
|
+
--
|
1015
|
+
-- If a job is not already in the 'depends' state, then this call will return
|
1016
|
+
-- false. Otherwise, it will return true
|
1017
|
+
function QlessJob:depends(now, command, ...)
|
1018
|
+
assert(command, 'Depends(): Arg "command" missing')
|
1019
|
+
local state = redis.call('hget', QlessJob.ns .. self.jid, 'state')
|
1020
|
+
if state ~= 'depends' then
|
1021
|
+
error('Depends(): Job ' .. self.jid ..
|
1022
|
+
' not in the depends state: ' .. tostring(state))
|
1023
|
+
end
|
1024
|
+
|
1025
|
+
if command == 'on' then
|
1026
|
+
-- These are the jids we legitimately have to wait on
|
1027
|
+
for i, j in ipairs(arg) do
|
1028
|
+
-- Make sure it's something other than 'nil' or complete.
|
1029
|
+
local state = redis.call('hget', QlessJob.ns .. j, 'state')
|
1030
|
+
if (state and state ~= 'complete') then
|
1031
|
+
redis.call(
|
1032
|
+
'sadd', QlessJob.ns .. j .. '-dependents' , self.jid)
|
1033
|
+
redis.call(
|
1034
|
+
'sadd', QlessJob.ns .. self.jid .. '-dependencies', j)
|
1035
|
+
end
|
1036
|
+
end
|
1037
|
+
return true
|
1038
|
+
elseif command == 'off' then
|
1039
|
+
if arg[1] == 'all' then
|
1040
|
+
for i, j in ipairs(redis.call(
|
1041
|
+
'smembers', QlessJob.ns .. self.jid .. '-dependencies')) do
|
1042
|
+
redis.call('srem', QlessJob.ns .. j .. '-dependents', self.jid)
|
1043
|
+
end
|
1044
|
+
redis.call('del', QlessJob.ns .. self.jid .. '-dependencies')
|
1045
|
+
local q, p = unpack(redis.call(
|
1046
|
+
'hmget', QlessJob.ns .. self.jid, 'queue', 'priority'))
|
1047
|
+
if q then
|
1048
|
+
local queue_obj = Qless.queue(q)
|
1049
|
+
queue_obj.depends.remove(self.jid)
|
1050
|
+
queue_obj.work.add(now, p, self.jid)
|
1051
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'state', 'waiting')
|
1052
|
+
end
|
1053
|
+
else
|
1054
|
+
for i, j in ipairs(arg) do
|
1055
|
+
redis.call('srem', QlessJob.ns .. j .. '-dependents', self.jid)
|
1056
|
+
redis.call(
|
1057
|
+
'srem', QlessJob.ns .. self.jid .. '-dependencies', j)
|
1058
|
+
if redis.call('scard',
|
1059
|
+
QlessJob.ns .. self.jid .. '-dependencies') == 0 then
|
1060
|
+
local q, p = unpack(redis.call(
|
1061
|
+
'hmget', QlessJob.ns .. self.jid, 'queue', 'priority'))
|
1062
|
+
if q then
|
1063
|
+
local queue_obj = Qless.queue(q)
|
1064
|
+
queue_obj.depends.remove(self.jid)
|
1065
|
+
queue_obj.work.add(now, p, self.jid)
|
1066
|
+
redis.call('hset',
|
1067
|
+
QlessJob.ns .. self.jid, 'state', 'waiting')
|
1068
|
+
end
|
1069
|
+
end
|
1070
|
+
end
|
1071
|
+
end
|
1072
|
+
return true
|
1073
|
+
else
|
1074
|
+
error('Depends(): Argument "command" must be "on" or "off"')
|
1075
|
+
end
|
1076
|
+
end
|
1077
|
+
|
1078
|
+
-- Heartbeat
|
1079
|
+
------------
|
1080
|
+
-- Renew this worker's lock on this job. Throws an exception if:
|
1081
|
+
-- - the job's been given to another worker
|
1082
|
+
-- - the job's been completed
|
1083
|
+
-- - the job's been canceled
|
1084
|
+
-- - the job's not running
|
1085
|
+
function QlessJob:heartbeat(now, worker, data)
|
1086
|
+
assert(worker, 'Heatbeat(): Arg "worker" missing')
|
1087
|
+
|
1088
|
+
-- We should find the heartbeat interval for this queue
|
1089
|
+
-- heartbeat. First, though, we need to find the queue
|
1090
|
+
-- this particular job is in
|
1091
|
+
local queue = redis.call('hget', QlessJob.ns .. self.jid, 'queue') or ''
|
1092
|
+
local expires = now + tonumber(
|
1093
|
+
Qless.config.get(queue .. '-heartbeat') or
|
1094
|
+
Qless.config.get('heartbeat', 60))
|
1095
|
+
|
1096
|
+
if data then
|
1097
|
+
data = cjson.decode(data)
|
1098
|
+
end
|
1099
|
+
|
1100
|
+
-- First, let's see if the worker still owns this job, and there is a
|
1101
|
+
-- worker
|
1102
|
+
local job_worker, state = unpack(
|
1103
|
+
redis.call('hmget', QlessJob.ns .. self.jid, 'worker', 'state'))
|
1104
|
+
if job_worker == false then
|
1105
|
+
-- This means the job doesn't exist
|
1106
|
+
error('Heartbeat(): Job does not exist')
|
1107
|
+
elseif state ~= 'running' then
|
1108
|
+
error('Heartbeat(): Job not currently running: ' .. state)
|
1109
|
+
elseif job_worker ~= worker or #job_worker == 0 then
|
1110
|
+
error('Heartbeat(): Job given out to another worker: ' .. job_worker)
|
1111
|
+
else
|
1112
|
+
-- Otherwise, optionally update the user data, and the heartbeat
|
1113
|
+
if data then
|
1114
|
+
-- I don't know if this is wise, but I'm decoding and encoding
|
1115
|
+
-- the user data to hopefully ensure its sanity
|
1116
|
+
redis.call('hmset', QlessJob.ns .. self.jid, 'expires',
|
1117
|
+
expires, 'worker', worker, 'data', cjson.encode(data))
|
1118
|
+
else
|
1119
|
+
redis.call('hmset', QlessJob.ns .. self.jid,
|
1120
|
+
'expires', expires, 'worker', worker)
|
1121
|
+
end
|
1122
|
+
|
1123
|
+
-- Update hwen this job was last updated on that worker
|
1124
|
+
-- Add this job to the list of jobs handled by this worker
|
1125
|
+
redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, self.jid)
|
1126
|
+
|
1127
|
+
-- And now we should just update the locks
|
1128
|
+
local queue = Qless.queue(
|
1129
|
+
redis.call('hget', QlessJob.ns .. self.jid, 'queue'))
|
1130
|
+
queue.locks.add(expires, self.jid)
|
1131
|
+
return expires
|
1132
|
+
end
|
1133
|
+
end
|
1134
|
+
|
1135
|
+
-- Priority
|
1136
|
+
-- --------
|
1137
|
+
-- Update the priority of this job. If the job doesn't exist, throws an
|
1138
|
+
-- exception
|
1139
|
+
function QlessJob:priority(priority)
|
1140
|
+
priority = assert(tonumber(priority),
|
1141
|
+
'Priority(): Arg "priority" missing or not a number: ' ..
|
1142
|
+
tostring(priority))
|
1143
|
+
|
1144
|
+
-- Get the queue the job is currently in, if any
|
1145
|
+
local queue = redis.call('hget', QlessJob.ns .. self.jid, 'queue')
|
1146
|
+
|
1147
|
+
if queue == nil then
|
1148
|
+
-- If the job doesn't exist, throw an error
|
1149
|
+
error('Priority(): Job ' .. self.jid .. ' does not exist')
|
1150
|
+
elseif queue == '' then
|
1151
|
+
-- Just adjust the priority
|
1152
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'priority', priority)
|
1153
|
+
return priority
|
1154
|
+
else
|
1155
|
+
-- Adjust the priority and see if it's a candidate for updating
|
1156
|
+
-- its priority in the queue it's currently in
|
1157
|
+
local queue_obj = Qless.queue(queue)
|
1158
|
+
if queue_obj.work.score(self.jid) then
|
1159
|
+
queue_obj.work.add(0, priority, self.jid)
|
1160
|
+
end
|
1161
|
+
redis.call('hset', QlessJob.ns .. self.jid, 'priority', priority)
|
1162
|
+
return priority
|
1163
|
+
end
|
1164
|
+
end
|
1165
|
+
|
1166
|
+
-- Update the jobs' attributes with the provided dictionary
|
1167
|
+
function QlessJob:update(data)
|
1168
|
+
local tmp = {}
|
1169
|
+
for k, v in pairs(data) do
|
1170
|
+
table.insert(tmp, k)
|
1171
|
+
table.insert(tmp, v)
|
1172
|
+
end
|
1173
|
+
redis.call('hmset', QlessJob.ns .. self.jid, unpack(tmp))
|
1174
|
+
end
|
1175
|
+
|
1176
|
+
-- Times out the job now rather than when its lock is normally set to expire
|
1177
|
+
function QlessJob:timeout(now)
|
1178
|
+
local queue_name, state, worker = unpack(redis.call('hmget',
|
1179
|
+
QlessJob.ns .. self.jid, 'queue', 'state', 'worker'))
|
1180
|
+
if queue_name == nil then
|
1181
|
+
error('Timeout(): Job does not exist')
|
1182
|
+
elseif state ~= 'running' then
|
1183
|
+
error('Timeout(): Job ' .. self.jid .. ' not running')
|
1184
|
+
else
|
1185
|
+
-- Time out the job
|
1186
|
+
self:history(now, 'timed-out')
|
1187
|
+
local queue = Qless.queue(queue_name)
|
1188
|
+
queue.locks.remove(self.jid)
|
1189
|
+
queue.work.add(now, math.huge, self.jid)
|
1190
|
+
redis.call('hmset', QlessJob.ns .. self.jid,
|
1191
|
+
'state', 'stalled', 'expires', 0)
|
1192
|
+
local encoded = cjson.encode({
|
1193
|
+
jid = self.jid,
|
1194
|
+
event = 'lock_lost',
|
1195
|
+
worker = worker
|
1196
|
+
})
|
1197
|
+
Qless.publish('w:' .. worker, encoded)
|
1198
|
+
Qless.publish('log', encoded)
|
1199
|
+
return queue_name
|
1200
|
+
end
|
1201
|
+
end
|
1202
|
+
|
1203
|
+
-- Return whether or not this job exists
|
1204
|
+
function QlessJob:exists()
|
1205
|
+
return redis.call('exists', QlessJob.ns .. self.jid) == 1
|
1206
|
+
end
|
1207
|
+
|
1208
|
+
-- Get or append to history
|
1209
|
+
function QlessJob:history(now, what, item)
|
1210
|
+
-- First, check if there's an old-style history, and update it if there is
|
1211
|
+
local history = redis.call('hget', QlessJob.ns .. self.jid, 'history')
|
1212
|
+
if history then
|
1213
|
+
history = cjson.decode(history)
|
1214
|
+
for i, value in ipairs(history) do
|
1215
|
+
redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
|
1216
|
+
cjson.encode({math.floor(value.put), 'put', {q = value.q}}))
|
1217
|
+
|
1218
|
+
-- If there's any popped time
|
1219
|
+
if value.popped then
|
1220
|
+
redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
|
1221
|
+
cjson.encode({math.floor(value.popped), 'popped',
|
1222
|
+
{worker = value.worker}}))
|
1223
|
+
end
|
1224
|
+
|
1225
|
+
-- If there's any failure
|
1226
|
+
if value.failed then
|
1227
|
+
redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
|
1228
|
+
cjson.encode(
|
1229
|
+
{math.floor(value.failed), 'failed', nil}))
|
1230
|
+
end
|
1231
|
+
|
1232
|
+
-- If it was completed
|
1233
|
+
if value.done then
|
1234
|
+
redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
|
1235
|
+
cjson.encode(
|
1236
|
+
{math.floor(value.done), 'done', nil}))
|
1237
|
+
end
|
1238
|
+
end
|
1239
|
+
-- With all this ported forward, delete the old-style history
|
1240
|
+
redis.call('hdel', QlessJob.ns .. self.jid, 'history')
|
1241
|
+
end
|
1242
|
+
|
1243
|
+
-- Now to the meat of the function
|
1244
|
+
if what == nil then
|
1245
|
+
-- Get the history
|
1246
|
+
local response = {}
|
1247
|
+
for i, value in ipairs(redis.call('lrange',
|
1248
|
+
QlessJob.ns .. self.jid .. '-history', 0, -1)) do
|
1249
|
+
value = cjson.decode(value)
|
1250
|
+
local dict = value[3] or {}
|
1251
|
+
dict['when'] = value[1]
|
1252
|
+
dict['what'] = value[2]
|
1253
|
+
table.insert(response, dict)
|
1254
|
+
end
|
1255
|
+
return response
|
1256
|
+
else
|
1257
|
+
-- Append to the history. If the length of the history should be limited,
|
1258
|
+
-- then we'll truncate it.
|
1259
|
+
local count = tonumber(Qless.config.get('max-job-history', 100))
|
1260
|
+
if count > 0 then
|
1261
|
+
-- We'll always keep the first item around
|
1262
|
+
local obj = redis.call('lpop', QlessJob.ns .. self.jid .. '-history')
|
1263
|
+
redis.call('ltrim', QlessJob.ns .. self.jid .. '-history', -count + 2, -1)
|
1264
|
+
if obj ~= nil then
|
1265
|
+
redis.call('lpush', QlessJob.ns .. self.jid .. '-history', obj)
|
1266
|
+
end
|
1267
|
+
end
|
1268
|
+
return redis.call('rpush', QlessJob.ns .. self.jid .. '-history',
|
1269
|
+
cjson.encode({math.floor(now), what, item}))
|
1270
|
+
end
|
1271
|
+
end
|
1272
|
+
-------------------------------------------------------------------------------
|
1273
|
+
-- Queue class
|
1274
|
+
-------------------------------------------------------------------------------
|
1275
|
+
-- Return a queue object
|
1276
|
+
function Qless.queue(name)
|
1277
|
+
assert(name, 'Queue(): no queue name provided')
|
1278
|
+
local queue = {}
|
1279
|
+
setmetatable(queue, QlessQueue)
|
1280
|
+
queue.name = name
|
1281
|
+
|
1282
|
+
-- Access to our work
|
1283
|
+
queue.work = {
|
1284
|
+
peek = function(count)
|
1285
|
+
if count == 0 then
|
1286
|
+
return {}
|
1287
|
+
end
|
1288
|
+
local jids = {}
|
1289
|
+
for index, jid in ipairs(redis.call(
|
1290
|
+
'zrevrange', queue:prefix('work'), 0, count - 1)) do
|
1291
|
+
table.insert(jids, jid)
|
1292
|
+
end
|
1293
|
+
return jids
|
1294
|
+
end, remove = function(...)
|
1295
|
+
if #arg > 0 then
|
1296
|
+
return redis.call('zrem', queue:prefix('work'), unpack(arg))
|
1297
|
+
end
|
1298
|
+
end, add = function(now, priority, jid)
|
1299
|
+
return redis.call('zadd',
|
1300
|
+
queue:prefix('work'), priority - (now / 10000000000), jid)
|
1301
|
+
end, score = function(jid)
|
1302
|
+
return redis.call('zscore', queue:prefix('work'), jid)
|
1303
|
+
end, length = function()
|
1304
|
+
return redis.call('zcard', queue:prefix('work'))
|
1305
|
+
end
|
1306
|
+
}
|
1307
|
+
|
1308
|
+
-- Access to our locks
|
1309
|
+
queue.locks = {
|
1310
|
+
expired = function(now, offset, count)
|
1311
|
+
return redis.call('zrangebyscore',
|
1312
|
+
queue:prefix('locks'), -math.huge, now, 'LIMIT', offset, count)
|
1313
|
+
end, peek = function(now, offset, count)
|
1314
|
+
return redis.call('zrangebyscore', queue:prefix('locks'),
|
1315
|
+
now, math.huge, 'LIMIT', offset, count)
|
1316
|
+
end, add = function(expires, jid)
|
1317
|
+
redis.call('zadd', queue:prefix('locks'), expires, jid)
|
1318
|
+
end, remove = function(...)
|
1319
|
+
if #arg > 0 then
|
1320
|
+
return redis.call('zrem', queue:prefix('locks'), unpack(arg))
|
1321
|
+
end
|
1322
|
+
end, running = function(now)
|
1323
|
+
return redis.call('zcount', queue:prefix('locks'), now, math.huge)
|
1324
|
+
end, length = function(now)
|
1325
|
+
-- If a 'now' is provided, we're interested in how many are before
|
1326
|
+
-- that time
|
1327
|
+
if now then
|
1328
|
+
return redis.call('zcount', queue:prefix('locks'), 0, now)
|
1329
|
+
else
|
1330
|
+
return redis.call('zcard', queue:prefix('locks'))
|
1331
|
+
end
|
1332
|
+
end
|
1333
|
+
}
|
1334
|
+
|
1335
|
+
-- Access to our dependent jobs
|
1336
|
+
queue.depends = {
|
1337
|
+
peek = function(now, offset, count)
|
1338
|
+
return redis.call('zrange',
|
1339
|
+
queue:prefix('depends'), offset, offset + count - 1)
|
1340
|
+
end, add = function(now, jid)
|
1341
|
+
redis.call('zadd', queue:prefix('depends'), now, jid)
|
1342
|
+
end, remove = function(...)
|
1343
|
+
if #arg > 0 then
|
1344
|
+
return redis.call('zrem', queue:prefix('depends'), unpack(arg))
|
1345
|
+
end
|
1346
|
+
end, length = function()
|
1347
|
+
return redis.call('zcard', queue:prefix('depends'))
|
1348
|
+
end
|
1349
|
+
}
|
1350
|
+
|
1351
|
+
-- Access to our scheduled jobs
|
1352
|
+
queue.scheduled = {
|
1353
|
+
peek = function(now, offset, count)
|
1354
|
+
return redis.call('zrange',
|
1355
|
+
queue:prefix('scheduled'), offset, offset + count - 1)
|
1356
|
+
end, ready = function(now, offset, count)
|
1357
|
+
return redis.call('zrangebyscore',
|
1358
|
+
queue:prefix('scheduled'), 0, now, 'LIMIT', offset, count)
|
1359
|
+
end, add = function(when, jid)
|
1360
|
+
redis.call('zadd', queue:prefix('scheduled'), when, jid)
|
1361
|
+
end, remove = function(...)
|
1362
|
+
if #arg > 0 then
|
1363
|
+
return redis.call('zrem', queue:prefix('scheduled'), unpack(arg))
|
1364
|
+
end
|
1365
|
+
end, length = function()
|
1366
|
+
return redis.call('zcard', queue:prefix('scheduled'))
|
1367
|
+
end
|
1368
|
+
}
|
1369
|
+
|
1370
|
+
-- Access to our recurring jobs
|
1371
|
+
queue.recurring = {
|
1372
|
+
peek = function(now, offset, count)
|
1373
|
+
return redis.call('zrangebyscore', queue:prefix('recur'),
|
1374
|
+
0, now, 'LIMIT', offset, count)
|
1375
|
+
end, ready = function(now, offset, count)
|
1376
|
+
end, add = function(when, jid)
|
1377
|
+
redis.call('zadd', queue:prefix('recur'), when, jid)
|
1378
|
+
end, remove = function(...)
|
1379
|
+
if #arg > 0 then
|
1380
|
+
return redis.call('zrem', queue:prefix('recur'), unpack(arg))
|
1381
|
+
end
|
1382
|
+
end, update = function(increment, jid)
|
1383
|
+
redis.call('zincrby', queue:prefix('recur'), increment, jid)
|
1384
|
+
end, score = function(jid)
|
1385
|
+
return redis.call('zscore', queue:prefix('recur'), jid)
|
1386
|
+
end, length = function()
|
1387
|
+
return redis.call('zcard', queue:prefix('recur'))
|
1388
|
+
end
|
1389
|
+
}
|
1390
|
+
return queue
|
1391
|
+
end
|
1392
|
+
|
1393
|
+
-- Return the prefix for this particular queue
|
1394
|
+
function QlessQueue:prefix(group)
|
1395
|
+
if group then
|
1396
|
+
return QlessQueue.ns..self.name..'-'..group
|
1397
|
+
else
|
1398
|
+
return QlessQueue.ns..self.name
|
1399
|
+
end
|
1400
|
+
end
|
1401
|
+
|
1402
|
+
-- Stats(now, date)
|
1403
|
+
-- ---------------------
|
1404
|
+
-- Return the current statistics for a given queue on a given date. The
|
1405
|
+
-- results are returned are a JSON blob:
|
1406
|
+
--
|
1407
|
+
--
|
1408
|
+
-- {
|
1409
|
+
-- # These are unimplemented as of yet
|
1410
|
+
-- 'failed': 3,
|
1411
|
+
-- 'retries': 5,
|
1412
|
+
-- 'wait' : {
|
1413
|
+
-- 'total' : ...,
|
1414
|
+
-- 'mean' : ...,
|
1415
|
+
-- 'variance' : ...,
|
1416
|
+
-- 'histogram': [
|
1417
|
+
-- ...
|
1418
|
+
-- ]
|
1419
|
+
-- }, 'run': {
|
1420
|
+
-- 'total' : ...,
|
1421
|
+
-- 'mean' : ...,
|
1422
|
+
-- 'variance' : ...,
|
1423
|
+
-- 'histogram': [
|
1424
|
+
-- ...
|
1425
|
+
-- ]
|
1426
|
+
-- }
|
1427
|
+
-- }
|
1428
|
+
--
|
1429
|
+
-- The histogram's data points are at the second resolution for the first
|
1430
|
+
-- minute, the minute resolution for the first hour, the 15-minute resolution
|
1431
|
+
-- for the first day, the hour resolution for the first 3 days, and then at
|
1432
|
+
-- the day resolution from there on out. The `histogram` key is a list of
|
1433
|
+
-- those values.
|
1434
|
+
function QlessQueue:stats(now, date)
|
1435
|
+
date = assert(tonumber(date),
|
1436
|
+
'Stats(): Arg "date" missing or not a number: '.. (date or 'nil'))
|
1437
|
+
|
1438
|
+
-- The bin is midnight of the provided day
|
1439
|
+
-- 24 * 60 * 60 = 86400
|
1440
|
+
local bin = date - (date % 86400)
|
1441
|
+
|
1442
|
+
-- This a table of all the keys we want to use in order to produce a histogram
|
1443
|
+
local histokeys = {
|
1444
|
+
's0','s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20','s21','s22','s23','s24','s25','s26','s27','s28','s29','s30','s31','s32','s33','s34','s35','s36','s37','s38','s39','s40','s41','s42','s43','s44','s45','s46','s47','s48','s49','s50','s51','s52','s53','s54','s55','s56','s57','s58','s59',
|
1445
|
+
'm1','m2','m3','m4','m5','m6','m7','m8','m9','m10','m11','m12','m13','m14','m15','m16','m17','m18','m19','m20','m21','m22','m23','m24','m25','m26','m27','m28','m29','m30','m31','m32','m33','m34','m35','m36','m37','m38','m39','m40','m41','m42','m43','m44','m45','m46','m47','m48','m49','m50','m51','m52','m53','m54','m55','m56','m57','m58','m59',
|
1446
|
+
'h1','h2','h3','h4','h5','h6','h7','h8','h9','h10','h11','h12','h13','h14','h15','h16','h17','h18','h19','h20','h21','h22','h23',
|
1447
|
+
'd1','d2','d3','d4','d5','d6'
|
1448
|
+
}
|
1449
|
+
|
1450
|
+
local mkstats = function(name, bin, queue)
|
1451
|
+
-- The results we'll be sending back
|
1452
|
+
local results = {}
|
1453
|
+
|
1454
|
+
local key = 'ql:s:' .. name .. ':' .. bin .. ':' .. queue
|
1455
|
+
local count, mean, vk = unpack(redis.call('hmget', key, 'total', 'mean', 'vk'))
|
1456
|
+
|
1457
|
+
count = tonumber(count) or 0
|
1458
|
+
mean = tonumber(mean) or 0
|
1459
|
+
vk = tonumber(vk)
|
1460
|
+
|
1461
|
+
results.count = count or 0
|
1462
|
+
results.mean = mean or 0
|
1463
|
+
results.histogram = {}
|
1464
|
+
|
1465
|
+
if not count then
|
1466
|
+
results.std = 0
|
1467
|
+
else
|
1468
|
+
if count > 1 then
|
1469
|
+
results.std = math.sqrt(vk / (count - 1))
|
1470
|
+
else
|
1471
|
+
results.std = 0
|
1472
|
+
end
|
1473
|
+
end
|
1474
|
+
|
1475
|
+
local histogram = redis.call('hmget', key, unpack(histokeys))
|
1476
|
+
for i=1,#histokeys do
|
1477
|
+
table.insert(results.histogram, tonumber(histogram[i]) or 0)
|
1478
|
+
end
|
1479
|
+
return results
|
1480
|
+
end
|
1481
|
+
|
1482
|
+
local retries, failed, failures = unpack(redis.call('hmget', 'ql:s:stats:' .. bin .. ':' .. self.name, 'retries', 'failed', 'failures'))
|
1483
|
+
return {
|
1484
|
+
retries = tonumber(retries or 0),
|
1485
|
+
failed = tonumber(failed or 0),
|
1486
|
+
failures = tonumber(failures or 0),
|
1487
|
+
wait = mkstats('wait', bin, self.name),
|
1488
|
+
run = mkstats('run' , bin, self.name)
|
1489
|
+
}
|
1490
|
+
end
|
1491
|
+
|
1492
|
+
-- Peek
|
1493
|
+
-------
|
1494
|
+
-- Examine the next jobs that would be popped from the queue without actually
|
1495
|
+
-- popping them.
|
1496
|
+
function QlessQueue:peek(now, count)
|
1497
|
+
count = assert(tonumber(count),
|
1498
|
+
'Peek(): Arg "count" missing or not a number: ' .. tostring(count))
|
1499
|
+
|
1500
|
+
-- These are the ids that we're going to return. We'll begin with any jobs
|
1501
|
+
-- that have lost their locks
|
1502
|
+
local jids = self.locks.expired(now, 0, count)
|
1503
|
+
|
1504
|
+
-- If we still need jobs in order to meet demand, then we should
|
1505
|
+
-- look for all the recurring jobs that need jobs run
|
1506
|
+
self:check_recurring(now, count - #jids)
|
1507
|
+
|
1508
|
+
-- Now we've checked __all__ the locks for this queue the could
|
1509
|
+
-- have expired, and are no more than the number requested. If
|
1510
|
+
-- we still need values in order to meet the demand, then we
|
1511
|
+
-- should check if any scheduled items, and if so, we should
|
1512
|
+
-- insert them to ensure correctness when pulling off the next
|
1513
|
+
-- unit of work.
|
1514
|
+
self:check_scheduled(now, count - #jids)
|
1515
|
+
|
1516
|
+
-- With these in place, we can expand this list of jids based on the work
|
1517
|
+
-- queue itself and the priorities therein
|
1518
|
+
table.extend(jids, self.work.peek(count - #jids))
|
1519
|
+
|
1520
|
+
return jids
|
1521
|
+
end
|
1522
|
+
|
1523
|
+
-- Return true if this queue is paused
|
1524
|
+
function QlessQueue:paused()
|
1525
|
+
return redis.call('sismember', 'ql:paused_queues', self.name) == 1
|
1526
|
+
end
|
1527
|
+
|
1528
|
+
-- Pause this queue
|
1529
|
+
--
|
1530
|
+
-- Note: long term, we have discussed adding a rate-limiting
|
1531
|
+
-- feature to qless-core, which would be more flexible and
|
1532
|
+
-- could be used for pausing (i.e. pause = set the rate to 0).
|
1533
|
+
-- For now, this is far simpler, but we should rewrite this
|
1534
|
+
-- in terms of the rate limiting feature if/when that is added.
|
1535
|
+
function QlessQueue.pause(now, ...)
|
1536
|
+
redis.call('sadd', 'ql:paused_queues', unpack(arg))
|
1537
|
+
end
|
1538
|
+
|
1539
|
+
-- Unpause this queue
|
1540
|
+
function QlessQueue.unpause(...)
|
1541
|
+
redis.call('srem', 'ql:paused_queues', unpack(arg))
|
1542
|
+
end
|
1543
|
+
|
1544
|
+
-- Checks for expired locks, scheduled and recurring jobs, returning any
|
1545
|
+
-- jobs that are ready to be processes
|
1546
|
+
function QlessQueue:pop(now, worker, count)
|
1547
|
+
assert(worker, 'Pop(): Arg "worker" missing')
|
1548
|
+
count = assert(tonumber(count),
|
1549
|
+
'Pop(): Arg "count" missing or not a number: ' .. tostring(count))
|
1550
|
+
|
1551
|
+
-- We should find the heartbeat interval for this queue heartbeat
|
1552
|
+
local expires = now + tonumber(
|
1553
|
+
Qless.config.get(self.name .. '-heartbeat') or
|
1554
|
+
Qless.config.get('heartbeat', 60))
|
1555
|
+
|
1556
|
+
-- If this queue is paused, then return no jobs
|
1557
|
+
if self:paused() then
|
1558
|
+
return {}
|
1559
|
+
end
|
1560
|
+
|
1561
|
+
-- Make sure we this worker to the list of seen workers
|
1562
|
+
redis.call('zadd', 'ql:workers', now, worker)
|
1563
|
+
|
1564
|
+
-- Check our max concurrency, and limit the count
|
1565
|
+
local max_concurrency = tonumber(
|
1566
|
+
Qless.config.get(self.name .. '-max-concurrency', 0))
|
1567
|
+
|
1568
|
+
if max_concurrency > 0 then
|
1569
|
+
-- Allow at most max_concurrency - #running
|
1570
|
+
local allowed = math.max(0, max_concurrency - self.locks.running(now))
|
1571
|
+
count = math.min(allowed, count)
|
1572
|
+
if count == 0 then
|
1573
|
+
return {}
|
1574
|
+
end
|
1575
|
+
end
|
1576
|
+
|
1577
|
+
local jids = self:invalidate_locks(now, count)
|
1578
|
+
-- Now we've checked __all__ the locks for this queue the could
|
1579
|
+
-- have expired, and are no more than the number requested.
|
1580
|
+
|
1581
|
+
-- If we still need jobs in order to meet demand, then we should
|
1582
|
+
-- look for all the recurring jobs that need jobs run
|
1583
|
+
self:check_recurring(now, count - #jids)
|
1584
|
+
|
1585
|
+
-- If we still need values in order to meet the demand, then we
|
1586
|
+
-- should check if any scheduled items, and if so, we should
|
1587
|
+
-- insert them to ensure correctness when pulling off the next
|
1588
|
+
-- unit of work.
|
1589
|
+
self:check_scheduled(now, count - #jids)
|
1590
|
+
|
1591
|
+
-- With these in place, we can expand this list of jids based on the work
|
1592
|
+
-- queue itself and the priorities therein
|
1593
|
+
table.extend(jids, self.work.peek(count - #jids))
|
1594
|
+
|
1595
|
+
local state
|
1596
|
+
for index, jid in ipairs(jids) do
|
1597
|
+
local job = Qless.job(jid)
|
1598
|
+
state = unpack(job:data('state'))
|
1599
|
+
job:history(now, 'popped', {worker = worker})
|
1600
|
+
|
1601
|
+
-- Update the wait time statistics
|
1602
|
+
local time = tonumber(
|
1603
|
+
redis.call('hget', QlessJob.ns .. jid, 'time') or now)
|
1604
|
+
local waiting = now - time
|
1605
|
+
self:stat(now, 'wait', waiting)
|
1606
|
+
redis.call('hset', QlessJob.ns .. jid,
|
1607
|
+
'time', string.format("%.20f", now))
|
1608
|
+
|
1609
|
+
-- Add this job to the list of jobs handled by this worker
|
1610
|
+
redis.call('zadd', 'ql:w:' .. worker .. ':jobs', expires, jid)
|
1611
|
+
|
1612
|
+
-- Update the jobs data, and add its locks, and return the job
|
1613
|
+
job:update({
|
1614
|
+
worker = worker,
|
1615
|
+
expires = expires,
|
1616
|
+
state = 'running'
|
1617
|
+
})
|
1618
|
+
|
1619
|
+
self.locks.add(expires, jid)
|
1620
|
+
|
1621
|
+
local tracked = redis.call('zscore', 'ql:tracked', jid) ~= false
|
1622
|
+
if tracked then
|
1623
|
+
Qless.publish('popped', jid)
|
1624
|
+
end
|
1625
|
+
end
|
1626
|
+
|
1627
|
+
-- If we are returning any jobs, then we should remove them from the work
|
1628
|
+
-- queue
|
1629
|
+
self.work.remove(unpack(jids))
|
1630
|
+
|
1631
|
+
return jids
|
1632
|
+
end
|
1633
|
+
|
1634
|
+
-- Update the stats for this queue
|
1635
|
+
function QlessQueue:stat(now, stat, val)
|
1636
|
+
-- The bin is midnight of the provided day
|
1637
|
+
local bin = now - (now % 86400)
|
1638
|
+
local key = 'ql:s:' .. stat .. ':' .. bin .. ':' .. self.name
|
1639
|
+
|
1640
|
+
-- Get the current data
|
1641
|
+
local count, mean, vk = unpack(
|
1642
|
+
redis.call('hmget', key, 'total', 'mean', 'vk'))
|
1643
|
+
|
1644
|
+
-- If there isn't any data there presently, then we must initialize it
|
1645
|
+
count = count or 0
|
1646
|
+
if count == 0 then
|
1647
|
+
mean = val
|
1648
|
+
vk = 0
|
1649
|
+
count = 1
|
1650
|
+
else
|
1651
|
+
count = count + 1
|
1652
|
+
local oldmean = mean
|
1653
|
+
mean = mean + (val - mean) / count
|
1654
|
+
vk = vk + (val - mean) * (val - oldmean)
|
1655
|
+
end
|
1656
|
+
|
1657
|
+
-- Now, update the histogram
|
1658
|
+
-- - `s1`, `s2`, ..., -- second-resolution histogram counts
|
1659
|
+
-- - `m1`, `m2`, ..., -- minute-resolution
|
1660
|
+
-- - `h1`, `h2`, ..., -- hour-resolution
|
1661
|
+
-- - `d1`, `d2`, ..., -- day-resolution
|
1662
|
+
val = math.floor(val)
|
1663
|
+
if val < 60 then -- seconds
|
1664
|
+
redis.call('hincrby', key, 's' .. val, 1)
|
1665
|
+
elseif val < 3600 then -- minutes
|
1666
|
+
redis.call('hincrby', key, 'm' .. math.floor(val / 60), 1)
|
1667
|
+
elseif val < 86400 then -- hours
|
1668
|
+
redis.call('hincrby', key, 'h' .. math.floor(val / 3600), 1)
|
1669
|
+
else -- days
|
1670
|
+
redis.call('hincrby', key, 'd' .. math.floor(val / 86400), 1)
|
1671
|
+
end
|
1672
|
+
redis.call('hmset', key, 'total', count, 'mean', mean, 'vk', vk)
|
1673
|
+
end
|
1674
|
+
|
1675
|
+
-- Put(now, jid, klass, data, delay,
|
1676
|
+
-- [priority, p],
|
1677
|
+
-- [tags, t],
|
1678
|
+
-- [retries, r],
|
1679
|
+
-- [depends, '[...]'])
|
1680
|
+
-- -----------------------
|
1681
|
+
-- Insert a job into the queue with the given priority, tags, delay, klass and
|
1682
|
+
-- data.
|
1683
|
+
function QlessQueue:put(now, worker, jid, klass, raw_data, delay, ...)
|
1684
|
+
assert(jid , 'Put(): Arg "jid" missing')
|
1685
|
+
assert(klass, 'Put(): Arg "klass" missing')
|
1686
|
+
local data = assert(cjson.decode(raw_data),
|
1687
|
+
'Put(): Arg "data" missing or not JSON: ' .. tostring(raw_data))
|
1688
|
+
delay = assert(tonumber(delay),
|
1689
|
+
'Put(): Arg "delay" not a number: ' .. tostring(delay))
|
1690
|
+
|
1691
|
+
-- Read in all the optional parameters. All of these must come in pairs, so
|
1692
|
+
-- if we have an odd number of extra args, raise an error
|
1693
|
+
if #arg % 2 == 1 then
|
1694
|
+
error('Odd number of additional args: ' .. tostring(arg))
|
1695
|
+
end
|
1696
|
+
local options = {}
|
1697
|
+
for i = 1, #arg, 2 do options[arg[i]] = arg[i + 1] end
|
1698
|
+
|
1699
|
+
-- Let's see what the old priority and tags were
|
1700
|
+
local job = Qless.job(jid)
|
1701
|
+
local priority, tags, oldqueue, state, failure, retries, oldworker =
|
1702
|
+
unpack(redis.call('hmget', QlessJob.ns .. jid, 'priority', 'tags',
|
1703
|
+
'queue', 'state', 'failure', 'retries', 'worker'))
|
1704
|
+
|
1705
|
+
-- If there are old tags, then we should remove the tags this job has
|
1706
|
+
if tags then
|
1707
|
+
Qless.tag(now, 'remove', jid, unpack(cjson.decode(tags)))
|
1708
|
+
end
|
1709
|
+
|
1710
|
+
-- Sanity check on optional args
|
1711
|
+
retries = assert(tonumber(options['retries'] or retries or 5) ,
|
1712
|
+
'Put(): Arg "retries" not a number: ' .. tostring(options['retries']))
|
1713
|
+
tags = assert(cjson.decode(options['tags'] or tags or '[]' ),
|
1714
|
+
'Put(): Arg "tags" not JSON' .. tostring(options['tags']))
|
1715
|
+
priority = assert(tonumber(options['priority'] or priority or 0),
|
1716
|
+
'Put(): Arg "priority" not a number' .. tostring(options['priority']))
|
1717
|
+
local depends = assert(cjson.decode(options['depends'] or '[]') ,
|
1718
|
+
'Put(): Arg "depends" not JSON: ' .. tostring(options['depends']))
|
1719
|
+
|
1720
|
+
-- If the job has old dependencies, determine which dependencies are
|
1721
|
+
-- in the new dependencies but not in the old ones, and which are in the
|
1722
|
+
-- old ones but not in the new
|
1723
|
+
if #depends > 0 then
|
1724
|
+
-- This makes it easier to check if it's in the new list
|
1725
|
+
local new = {}
|
1726
|
+
for _, d in ipairs(depends) do new[d] = 1 end
|
1727
|
+
|
1728
|
+
-- Now find what's in the original, but not the new
|
1729
|
+
local original = redis.call(
|
1730
|
+
'smembers', QlessJob.ns .. jid .. '-dependencies')
|
1731
|
+
for _, dep in pairs(original) do
|
1732
|
+
if new[dep] == nil then
|
1733
|
+
-- Remove k as a dependency
|
1734
|
+
redis.call('srem', QlessJob.ns .. dep .. '-dependents' , jid)
|
1735
|
+
redis.call('srem', QlessJob.ns .. jid .. '-dependencies', dep)
|
1736
|
+
end
|
1737
|
+
end
|
1738
|
+
end
|
1739
|
+
|
1740
|
+
-- Send out a log message
|
1741
|
+
Qless.publish('log', cjson.encode({
|
1742
|
+
jid = jid,
|
1743
|
+
event = 'put',
|
1744
|
+
queue = self.name
|
1745
|
+
}))
|
1746
|
+
|
1747
|
+
-- Update the history to include this new change
|
1748
|
+
job:history(now, 'put', {q = self.name})
|
1749
|
+
|
1750
|
+
-- If this item was previously in another queue, then we should remove it from there
|
1751
|
+
if oldqueue then
|
1752
|
+
local queue_obj = Qless.queue(oldqueue)
|
1753
|
+
queue_obj.work.remove(jid)
|
1754
|
+
queue_obj.locks.remove(jid)
|
1755
|
+
queue_obj.depends.remove(jid)
|
1756
|
+
queue_obj.scheduled.remove(jid)
|
1757
|
+
end
|
1758
|
+
|
1759
|
+
-- If this had previously been given out to a worker, make sure to remove it
|
1760
|
+
-- from that worker's jobs
|
1761
|
+
if oldworker and oldworker ~= '' then
|
1762
|
+
redis.call('zrem', 'ql:w:' .. oldworker .. ':jobs', jid)
|
1763
|
+
-- If it's a different worker that's putting this job, send a notification
|
1764
|
+
-- to the last owner of the job
|
1765
|
+
if oldworker ~= worker then
|
1766
|
+
-- We need to inform whatever worker had that job
|
1767
|
+
local encoded = cjson.encode({
|
1768
|
+
jid = jid,
|
1769
|
+
event = 'lock_lost',
|
1770
|
+
worker = oldworker
|
1771
|
+
})
|
1772
|
+
Qless.publish('w:' .. oldworker, encoded)
|
1773
|
+
Qless.publish('log', encoded)
|
1774
|
+
end
|
1775
|
+
end
|
1776
|
+
|
1777
|
+
-- If the job was previously in the 'completed' state, then we should
|
1778
|
+
-- remove it from being enqueued for destructination
|
1779
|
+
if state == 'complete' then
|
1780
|
+
redis.call('zrem', 'ql:completed', jid)
|
1781
|
+
end
|
1782
|
+
|
1783
|
+
-- Add this job to the list of jobs tagged with whatever tags were supplied
|
1784
|
+
for i, tag in ipairs(tags) do
|
1785
|
+
redis.call('zadd', 'ql:t:' .. tag, now, jid)
|
1786
|
+
redis.call('zincrby', 'ql:tags', 1, tag)
|
1787
|
+
end
|
1788
|
+
|
1789
|
+
-- If we're in the failed state, remove all of our data
|
1790
|
+
if state == 'failed' then
|
1791
|
+
failure = cjson.decode(failure)
|
1792
|
+
-- We need to make this remove it from the failed queues
|
1793
|
+
redis.call('lrem', 'ql:f:' .. failure.group, 0, jid)
|
1794
|
+
if redis.call('llen', 'ql:f:' .. failure.group) == 0 then
|
1795
|
+
redis.call('srem', 'ql:failures', failure.group)
|
1796
|
+
end
|
1797
|
+
-- The bin is midnight of the provided day
|
1798
|
+
-- 24 * 60 * 60 = 86400
|
1799
|
+
local bin = failure.when - (failure.when % 86400)
|
1800
|
+
-- We also need to decrement the stats about the queue on
|
1801
|
+
-- the day that this failure actually happened.
|
1802
|
+
redis.call('hincrby', 'ql:s:stats:' .. bin .. ':' .. self.name, 'failed' , -1)
|
1803
|
+
end
|
1804
|
+
|
1805
|
+
-- First, let's save its data
|
1806
|
+
redis.call('hmset', QlessJob.ns .. jid,
|
1807
|
+
'jid' , jid,
|
1808
|
+
'klass' , klass,
|
1809
|
+
'data' , raw_data,
|
1810
|
+
'priority' , priority,
|
1811
|
+
'tags' , cjson.encode(tags),
|
1812
|
+
'state' , ((delay > 0) and 'scheduled') or 'waiting',
|
1813
|
+
'worker' , '',
|
1814
|
+
'expires' , 0,
|
1815
|
+
'queue' , self.name,
|
1816
|
+
'retries' , retries,
|
1817
|
+
'remaining', retries,
|
1818
|
+
'time' , string.format("%.20f", now))
|
1819
|
+
|
1820
|
+
-- These are the jids we legitimately have to wait on
|
1821
|
+
for i, j in ipairs(depends) do
|
1822
|
+
-- Make sure it's something other than 'nil' or complete.
|
1823
|
+
local state = redis.call('hget', QlessJob.ns .. j, 'state')
|
1824
|
+
if (state and state ~= 'complete') then
|
1825
|
+
redis.call('sadd', QlessJob.ns .. j .. '-dependents' , jid)
|
1826
|
+
redis.call('sadd', QlessJob.ns .. jid .. '-dependencies', j)
|
1827
|
+
end
|
1828
|
+
end
|
1829
|
+
|
1830
|
+
-- Now, if a delay was provided, and if it's in the future,
|
1831
|
+
-- then we'll have to schedule it. Otherwise, we're just
|
1832
|
+
-- going to add it to the work queue.
|
1833
|
+
if delay > 0 then
|
1834
|
+
if redis.call('scard', QlessJob.ns .. jid .. '-dependencies') > 0 then
|
1835
|
+
-- We've already put it in 'depends'. Now, we must just save the data
|
1836
|
+
-- for when it's scheduled
|
1837
|
+
self.depends.add(now, jid)
|
1838
|
+
redis.call('hmset', QlessJob.ns .. jid,
|
1839
|
+
'state', 'depends',
|
1840
|
+
'scheduled', now + delay)
|
1841
|
+
else
|
1842
|
+
self.scheduled.add(now + delay, jid)
|
1843
|
+
end
|
1844
|
+
else
|
1845
|
+
if redis.call('scard', QlessJob.ns .. jid .. '-dependencies') > 0 then
|
1846
|
+
self.depends.add(now, jid)
|
1847
|
+
redis.call('hset', QlessJob.ns .. jid, 'state', 'depends')
|
1848
|
+
else
|
1849
|
+
self.work.add(now, priority, jid)
|
1850
|
+
end
|
1851
|
+
end
|
1852
|
+
|
1853
|
+
-- Lastly, we're going to make sure that this item is in the
|
1854
|
+
-- set of known queues. We should keep this sorted by the
|
1855
|
+
-- order in which we saw each of these queues
|
1856
|
+
if redis.call('zscore', 'ql:queues', self.name) == false then
|
1857
|
+
redis.call('zadd', 'ql:queues', now, self.name)
|
1858
|
+
end
|
1859
|
+
|
1860
|
+
if redis.call('zscore', 'ql:tracked', jid) ~= false then
|
1861
|
+
Qless.publish('put', jid)
|
1862
|
+
end
|
1863
|
+
|
1864
|
+
return jid
|
1865
|
+
end
|
1866
|
+
|
1867
|
+
-- Move `count` jobs out of the failed state and into this queue
|
1868
|
+
function QlessQueue:unfail(now, group, count)
|
1869
|
+
assert(group, 'Unfail(): Arg "group" missing')
|
1870
|
+
count = assert(tonumber(count or 25),
|
1871
|
+
'Unfail(): Arg "count" not a number: ' .. tostring(count))
|
1872
|
+
|
1873
|
+
-- Get up to that many jobs, and we'll put them in the appropriate queue
|
1874
|
+
local jids = redis.call('lrange', 'ql:f:' .. group, -count, -1)
|
1875
|
+
|
1876
|
+
-- And now set each job's state, and put it into the appropriate queue
|
1877
|
+
local toinsert = {}
|
1878
|
+
for index, jid in ipairs(jids) do
|
1879
|
+
local job = Qless.job(jid)
|
1880
|
+
local data = job:data()
|
1881
|
+
job:history(now, 'put', {q = self.name})
|
1882
|
+
redis.call('hmset', QlessJob.ns .. data.jid,
|
1883
|
+
'state' , 'waiting',
|
1884
|
+
'worker' , '',
|
1885
|
+
'expires' , 0,
|
1886
|
+
'queue' , self.name,
|
1887
|
+
'remaining', data.retries or 5)
|
1888
|
+
self.work.add(now, data.priority, data.jid)
|
1889
|
+
end
|
1890
|
+
|
1891
|
+
-- Remove these jobs from the failed state
|
1892
|
+
redis.call('ltrim', 'ql:f:' .. group, 0, -count - 1)
|
1893
|
+
if (redis.call('llen', 'ql:f:' .. group) == 0) then
|
1894
|
+
redis.call('srem', 'ql:failures', group)
|
1895
|
+
end
|
1896
|
+
|
1897
|
+
return #jids
|
1898
|
+
end
|
1899
|
+
|
1900
|
+
-- Recur a job of type klass in this queue
|
1901
|
+
function QlessQueue:recur(now, jid, klass, raw_data, spec, ...)
|
1902
|
+
assert(jid , 'RecurringJob On(): Arg "jid" missing')
|
1903
|
+
assert(klass, 'RecurringJob On(): Arg "klass" missing')
|
1904
|
+
assert(spec , 'RecurringJob On(): Arg "spec" missing')
|
1905
|
+
local data = assert(cjson.decode(raw_data),
|
1906
|
+
'RecurringJob On(): Arg "data" not JSON: ' .. tostring(raw_data))
|
1907
|
+
|
1908
|
+
-- At some point in the future, we may have different types of recurring
|
1909
|
+
-- jobs, but for the time being, we only have 'interval'-type jobs
|
1910
|
+
if spec == 'interval' then
|
1911
|
+
local interval = assert(tonumber(arg[1]),
|
1912
|
+
'Recur(): Arg "interval" not a number: ' .. tostring(arg[1]))
|
1913
|
+
local offset = assert(tonumber(arg[2]),
|
1914
|
+
'Recur(): Arg "offset" not a number: ' .. tostring(arg[2]))
|
1915
|
+
if interval <= 0 then
|
1916
|
+
error('Recur(): Arg "interval" must be greater than 0')
|
1917
|
+
end
|
1918
|
+
|
1919
|
+
-- Read in all the optional parameters. All of these must come in
|
1920
|
+
-- pairs, so if we have an odd number of extra args, raise an error
|
1921
|
+
if #arg % 2 == 1 then
|
1922
|
+
error('Odd number of additional args: ' .. tostring(arg))
|
1923
|
+
end
|
1924
|
+
|
1925
|
+
-- Read in all the optional parameters
|
1926
|
+
local options = {}
|
1927
|
+
for i = 3, #arg, 2 do options[arg[i]] = arg[i + 1] end
|
1928
|
+
options.tags = assert(cjson.decode(options.tags or '{}'),
|
1929
|
+
'Recur(): Arg "tags" must be JSON string array: ' .. tostring(
|
1930
|
+
options.tags))
|
1931
|
+
options.priority = assert(tonumber(options.priority or 0),
|
1932
|
+
'Recur(): Arg "priority" not a number: ' .. tostring(
|
1933
|
+
options.priority))
|
1934
|
+
options.retries = assert(tonumber(options.retries or 0),
|
1935
|
+
'Recur(): Arg "retries" not a number: ' .. tostring(
|
1936
|
+
options.retries))
|
1937
|
+
options.backlog = assert(tonumber(options.backlog or 0),
|
1938
|
+
'Recur(): Arg "backlog" not a number: ' .. tostring(
|
1939
|
+
options.backlog))
|
1940
|
+
|
1941
|
+
local count, old_queue = unpack(redis.call('hmget', 'ql:r:' .. jid, 'count', 'queue'))
|
1942
|
+
count = count or 0
|
1943
|
+
|
1944
|
+
-- If it has previously been in another queue, then we should remove
|
1945
|
+
-- some information about it
|
1946
|
+
if old_queue then
|
1947
|
+
Qless.queue(old_queue).recurring.remove(jid)
|
1948
|
+
end
|
1949
|
+
|
1950
|
+
-- Do some insertions
|
1951
|
+
redis.call('hmset', 'ql:r:' .. jid,
|
1952
|
+
'jid' , jid,
|
1953
|
+
'klass' , klass,
|
1954
|
+
'data' , raw_data,
|
1955
|
+
'priority', options.priority,
|
1956
|
+
'tags' , cjson.encode(options.tags or {}),
|
1957
|
+
'state' , 'recur',
|
1958
|
+
'queue' , self.name,
|
1959
|
+
'type' , 'interval',
|
1960
|
+
-- How many jobs we've spawned from this
|
1961
|
+
'count' , count,
|
1962
|
+
'interval', interval,
|
1963
|
+
'retries' , options.retries,
|
1964
|
+
'backlog' , options.backlog)
|
1965
|
+
-- Now, we should schedule the next run of the job
|
1966
|
+
self.recurring.add(now + offset, jid)
|
1967
|
+
|
1968
|
+
-- Lastly, we're going to make sure that this item is in the
|
1969
|
+
-- set of known queues. We should keep this sorted by the
|
1970
|
+
-- order in which we saw each of these queues
|
1971
|
+
if redis.call('zscore', 'ql:queues', self.name) == false then
|
1972
|
+
redis.call('zadd', 'ql:queues', now, self.name)
|
1973
|
+
end
|
1974
|
+
|
1975
|
+
return jid
|
1976
|
+
else
|
1977
|
+
error('Recur(): schedule type "' .. tostring(spec) .. '" unknown')
|
1978
|
+
end
|
1979
|
+
end
|
1980
|
+
|
1981
|
+
-- Return the length of the queue
|
1982
|
+
function QlessQueue:length()
|
1983
|
+
return self.locks.length() + self.work.length() + self.scheduled.length()
|
1984
|
+
end
|
1985
|
+
|
1986
|
+
-------------------------------------------------------------------------------
|
1987
|
+
-- Housekeeping methods
|
1988
|
+
-------------------------------------------------------------------------------
|
1989
|
+
-- Instantiate any recurring jobs that are ready
|
1990
|
+
function QlessQueue:check_recurring(now, count)
|
1991
|
+
-- This is how many jobs we've moved so far
|
1992
|
+
local moved = 0
|
1993
|
+
-- These are the recurring jobs that need work
|
1994
|
+
local r = self.recurring.peek(now, 0, count)
|
1995
|
+
for index, jid in ipairs(r) do
|
1996
|
+
-- For each of the jids that need jobs scheduled, first
|
1997
|
+
-- get the last time each of them was run, and then increment
|
1998
|
+
-- it by its interval. While this time is less than now,
|
1999
|
+
-- we need to keep putting jobs on the queue
|
2000
|
+
local klass, data, priority, tags, retries, interval, backlog = unpack(
|
2001
|
+
redis.call('hmget', 'ql:r:' .. jid, 'klass', 'data', 'priority',
|
2002
|
+
'tags', 'retries', 'interval', 'backlog'))
|
2003
|
+
local _tags = cjson.decode(tags)
|
2004
|
+
local score = math.floor(tonumber(self.recurring.score(jid)))
|
2005
|
+
interval = tonumber(interval)
|
2006
|
+
|
2007
|
+
-- If the backlog is set for this job, then see if it's been a long
|
2008
|
+
-- time since the last pop
|
2009
|
+
backlog = tonumber(backlog or 0)
|
2010
|
+
if backlog ~= 0 then
|
2011
|
+
-- Check how many jobs we could concievably generate
|
2012
|
+
local num = ((now - score) / interval)
|
2013
|
+
if num > backlog then
|
2014
|
+
-- Update the score
|
2015
|
+
score = score + (
|
2016
|
+
math.ceil(num - backlog) * interval
|
2017
|
+
)
|
2018
|
+
end
|
2019
|
+
end
|
2020
|
+
|
2021
|
+
-- We're saving this value so that in the history, we can accurately
|
2022
|
+
-- reflect when the job would normally have been scheduled
|
2023
|
+
while (score <= now) and (moved < count) do
|
2024
|
+
local count = redis.call('hincrby', 'ql:r:' .. jid, 'count', 1)
|
2025
|
+
moved = moved + 1
|
2026
|
+
|
2027
|
+
local child_jid = jid .. '-' .. count
|
2028
|
+
|
2029
|
+
-- Add this job to the list of jobs tagged with whatever tags were
|
2030
|
+
-- supplied
|
2031
|
+
for i, tag in ipairs(_tags) do
|
2032
|
+
redis.call('zadd', 'ql:t:' .. tag, now, child_jid)
|
2033
|
+
redis.call('zincrby', 'ql:tags', 1, tag)
|
2034
|
+
end
|
2035
|
+
|
2036
|
+
-- First, let's save its data
|
2037
|
+
redis.call('hmset', QlessJob.ns .. child_jid,
|
2038
|
+
'jid' , child_jid,
|
2039
|
+
'klass' , klass,
|
2040
|
+
'data' , data,
|
2041
|
+
'priority' , priority,
|
2042
|
+
'tags' , tags,
|
2043
|
+
'state' , 'waiting',
|
2044
|
+
'worker' , '',
|
2045
|
+
'expires' , 0,
|
2046
|
+
'queue' , self.name,
|
2047
|
+
'retries' , retries,
|
2048
|
+
'remaining' , retries,
|
2049
|
+
'time' , string.format("%.20f", score),
|
2050
|
+
'spawned_from_jid', jid)
|
2051
|
+
Qless.job(child_jid):history(score, 'put', {q = self.name})
|
2052
|
+
|
2053
|
+
-- Now, if a delay was provided, and if it's in the future,
|
2054
|
+
-- then we'll have to schedule it. Otherwise, we're just
|
2055
|
+
-- going to add it to the work queue.
|
2056
|
+
self.work.add(score, priority, child_jid)
|
2057
|
+
|
2058
|
+
score = score + interval
|
2059
|
+
self.recurring.add(score, jid)
|
2060
|
+
end
|
2061
|
+
end
|
2062
|
+
end
|
2063
|
+
|
2064
|
+
-- Check for any jobs that have been scheduled, and shovel them onto
|
2065
|
+
-- the work queue. Returns nothing, but afterwards, up to `count`
|
2066
|
+
-- scheduled jobs will be moved into the work queue
|
2067
|
+
function QlessQueue:check_scheduled(now, count)
|
2068
|
+
-- zadd is a list of arguments that we'll be able to use to
|
2069
|
+
-- insert into the work queue
|
2070
|
+
local scheduled = self.scheduled.ready(now, 0, count)
|
2071
|
+
for index, jid in ipairs(scheduled) do
|
2072
|
+
-- With these in hand, we'll have to go out and find the
|
2073
|
+
-- priorities of these jobs, and then we'll insert them
|
2074
|
+
-- into the work queue and then when that's complete, we'll
|
2075
|
+
-- remove them from the scheduled queue
|
2076
|
+
local priority = tonumber(
|
2077
|
+
redis.call('hget', QlessJob.ns .. jid, 'priority') or 0)
|
2078
|
+
self.work.add(now, priority, jid)
|
2079
|
+
self.scheduled.remove(jid)
|
2080
|
+
|
2081
|
+
-- We should also update them to have the state 'waiting'
|
2082
|
+
-- instead of 'scheduled'
|
2083
|
+
redis.call('hset', QlessJob.ns .. jid, 'state', 'waiting')
|
2084
|
+
end
|
2085
|
+
end
|
2086
|
+
|
2087
|
+
-- Check for and invalidate any locks that have been lost. Returns the
|
2088
|
+
-- list of jids that have been invalidated
|
2089
|
+
function QlessQueue:invalidate_locks(now, count)
|
2090
|
+
local jids = {}
|
2091
|
+
-- Iterate through all the expired locks and add them to the list
|
2092
|
+
-- of keys that we'll return
|
2093
|
+
for index, jid in ipairs(self.locks.expired(now, 0, count)) do
|
2094
|
+
-- Remove this job from the jobs that the worker that was running it
|
2095
|
+
-- has
|
2096
|
+
local worker, failure = unpack(
|
2097
|
+
redis.call('hmget', QlessJob.ns .. jid, 'worker', 'failure'))
|
2098
|
+
redis.call('zrem', 'ql:w:' .. worker .. ':jobs', jid)
|
2099
|
+
|
2100
|
+
-- We'll provide a grace period after jobs time out for them to give
|
2101
|
+
-- some indication of the failure mode. After that time, however, we'll
|
2102
|
+
-- consider the worker dust in the wind
|
2103
|
+
local grace_period = tonumber(Qless.config.get('grace-period'))
|
2104
|
+
|
2105
|
+
-- Whether or not we've already sent a coutesy message
|
2106
|
+
local courtesy_sent = tonumber(
|
2107
|
+
redis.call('hget', QlessJob.ns .. jid, 'grace') or 0)
|
2108
|
+
|
2109
|
+
-- If the remaining value is an odd multiple of 0.5, then we'll assume
|
2110
|
+
-- that we're just sending the message. Otherwise, it's time to
|
2111
|
+
-- actually hand out the work to another worker
|
2112
|
+
local send_message = (courtesy_sent ~= 1)
|
2113
|
+
local invalidate = not send_message
|
2114
|
+
|
2115
|
+
-- If the grace period has been disabled, then we'll do both.
|
2116
|
+
if grace_period <= 0 then
|
2117
|
+
send_message = true
|
2118
|
+
invalidate = true
|
2119
|
+
end
|
2120
|
+
|
2121
|
+
if send_message then
|
2122
|
+
-- This is where we supply a courtesy message and give the worker
|
2123
|
+
-- time to provide a failure message
|
2124
|
+
if redis.call('zscore', 'ql:tracked', jid) ~= false then
|
2125
|
+
Qless.publish('stalled', jid)
|
2126
|
+
end
|
2127
|
+
Qless.job(jid):history(now, 'timed-out')
|
2128
|
+
redis.call('hset', QlessJob.ns .. jid, 'grace', 1)
|
2129
|
+
|
2130
|
+
-- Send a message to let the worker know that its lost its lock on
|
2131
|
+
-- the job
|
2132
|
+
local encoded = cjson.encode({
|
2133
|
+
jid = jid,
|
2134
|
+
event = 'lock_lost',
|
2135
|
+
worker = worker
|
2136
|
+
})
|
2137
|
+
Qless.publish('w:' .. worker, encoded)
|
2138
|
+
Qless.publish('log', encoded)
|
2139
|
+
self.locks.add(now + grace_period, jid)
|
2140
|
+
|
2141
|
+
-- If we got any expired locks, then we should increment the
|
2142
|
+
-- number of retries for this stage for this bin. The bin is
|
2143
|
+
-- midnight of the provided day
|
2144
|
+
local bin = now - (now % 86400)
|
2145
|
+
redis.call('hincrby',
|
2146
|
+
'ql:s:stats:' .. bin .. ':' .. self.name, 'retries', 1)
|
2147
|
+
end
|
2148
|
+
|
2149
|
+
if invalidate then
|
2150
|
+
-- Unset the grace period attribute so that next time we'll send
|
2151
|
+
-- the grace period
|
2152
|
+
redis.call('hdel', QlessJob.ns .. jid, 'grace', 0)
|
2153
|
+
|
2154
|
+
-- See how many remaining retries the job has
|
2155
|
+
local remaining = tonumber(redis.call(
|
2156
|
+
'hincrby', QlessJob.ns .. jid, 'remaining', -1))
|
2157
|
+
|
2158
|
+
-- This is where we actually have to time out the work
|
2159
|
+
if remaining < 0 then
|
2160
|
+
-- Now remove the instance from the schedule, and work queues
|
2161
|
+
-- for the queue it's in
|
2162
|
+
self.work.remove(jid)
|
2163
|
+
self.locks.remove(jid)
|
2164
|
+
self.scheduled.remove(jid)
|
2165
|
+
|
2166
|
+
local group = 'failed-retries-' .. Qless.job(jid):data()['queue']
|
2167
|
+
local job = Qless.job(jid)
|
2168
|
+
job:history(now, 'failed', {group = group})
|
2169
|
+
redis.call('hmset', QlessJob.ns .. jid, 'state', 'failed',
|
2170
|
+
'worker', '',
|
2171
|
+
'expires', '')
|
2172
|
+
-- If the failure has not already been set, then set it
|
2173
|
+
redis.call('hset', QlessJob.ns .. jid,
|
2174
|
+
'failure', cjson.encode({
|
2175
|
+
['group'] = group,
|
2176
|
+
['message'] =
|
2177
|
+
'Job exhausted retries in queue "' .. self.name .. '"',
|
2178
|
+
['when'] = now,
|
2179
|
+
['worker'] = unpack(job:data('worker'))
|
2180
|
+
}))
|
2181
|
+
|
2182
|
+
-- Add this type of failure to the list of failures
|
2183
|
+
redis.call('sadd', 'ql:failures', group)
|
2184
|
+
-- And add this particular instance to the failed types
|
2185
|
+
redis.call('lpush', 'ql:f:' .. group, jid)
|
2186
|
+
|
2187
|
+
if redis.call('zscore', 'ql:tracked', jid) ~= false then
|
2188
|
+
Qless.publish('failed', jid)
|
2189
|
+
end
|
2190
|
+
Qless.publish('log', cjson.encode({
|
2191
|
+
jid = jid,
|
2192
|
+
event = 'failed',
|
2193
|
+
group = group,
|
2194
|
+
worker = worker,
|
2195
|
+
message =
|
2196
|
+
'Job exhausted retries in queue "' .. self.name .. '"'
|
2197
|
+
}))
|
2198
|
+
|
2199
|
+
-- Increment the count of the failed jobs
|
2200
|
+
local bin = now - (now % 86400)
|
2201
|
+
redis.call('hincrby',
|
2202
|
+
'ql:s:stats:' .. bin .. ':' .. self.name, 'failures', 1)
|
2203
|
+
redis.call('hincrby',
|
2204
|
+
'ql:s:stats:' .. bin .. ':' .. self.name, 'failed' , 1)
|
2205
|
+
else
|
2206
|
+
table.insert(jids, jid)
|
2207
|
+
end
|
2208
|
+
end
|
2209
|
+
end
|
2210
|
+
|
2211
|
+
return jids
|
2212
|
+
end
|
2213
|
+
|
2214
|
+
-- Forget the provided queues. As in, remove them from the list of known queues
|
2215
|
+
function QlessQueue.deregister(...)
|
2216
|
+
redis.call('zrem', Qless.ns .. 'queues', unpack(arg))
|
2217
|
+
end
|
2218
|
+
|
2219
|
+
-- Return information about a particular queue, or all queues
|
2220
|
+
-- [
|
2221
|
+
-- {
|
2222
|
+
-- 'name': 'testing',
|
2223
|
+
-- 'stalled': 2,
|
2224
|
+
-- 'waiting': 5,
|
2225
|
+
-- 'running': 5,
|
2226
|
+
-- 'scheduled': 10,
|
2227
|
+
-- 'depends': 5,
|
2228
|
+
-- 'recurring': 0
|
2229
|
+
-- }, {
|
2230
|
+
-- ...
|
2231
|
+
-- }
|
2232
|
+
-- ]
|
2233
|
+
function QlessQueue.counts(now, name)
|
2234
|
+
if name then
|
2235
|
+
local queue = Qless.queue(name)
|
2236
|
+
local stalled = queue.locks.length(now)
|
2237
|
+
-- Check for any scheduled jobs that need to be moved
|
2238
|
+
queue:check_scheduled(now, queue.scheduled.length())
|
2239
|
+
return {
|
2240
|
+
name = name,
|
2241
|
+
waiting = queue.work.length(),
|
2242
|
+
stalled = stalled,
|
2243
|
+
running = queue.locks.length() - stalled,
|
2244
|
+
scheduled = queue.scheduled.length(),
|
2245
|
+
depends = queue.depends.length(),
|
2246
|
+
recurring = queue.recurring.length(),
|
2247
|
+
paused = queue:paused()
|
2248
|
+
}
|
2249
|
+
else
|
2250
|
+
local queues = redis.call('zrange', 'ql:queues', 0, -1)
|
2251
|
+
local response = {}
|
2252
|
+
for index, qname in ipairs(queues) do
|
2253
|
+
table.insert(response, QlessQueue.counts(now, qname))
|
2254
|
+
end
|
2255
|
+
return response
|
2256
|
+
end
|
2257
|
+
end
|
2258
|
+
-- Get all the attributes of this particular job
|
2259
|
+
function QlessRecurringJob:data()
|
2260
|
+
local job = redis.call(
|
2261
|
+
'hmget', 'ql:r:' .. self.jid, 'jid', 'klass', 'state', 'queue',
|
2262
|
+
'priority', 'interval', 'retries', 'count', 'data', 'tags', 'backlog')
|
2263
|
+
|
2264
|
+
if not job[1] then
|
2265
|
+
return nil
|
2266
|
+
end
|
2267
|
+
|
2268
|
+
return {
|
2269
|
+
jid = job[1],
|
2270
|
+
klass = job[2],
|
2271
|
+
state = job[3],
|
2272
|
+
queue = job[4],
|
2273
|
+
priority = tonumber(job[5]),
|
2274
|
+
interval = tonumber(job[6]),
|
2275
|
+
retries = tonumber(job[7]),
|
2276
|
+
count = tonumber(job[8]),
|
2277
|
+
data = job[9],
|
2278
|
+
tags = cjson.decode(job[10]),
|
2279
|
+
backlog = tonumber(job[11] or 0)
|
2280
|
+
}
|
2281
|
+
end
|
2282
|
+
|
2283
|
+
-- Update the recurring job data. Key can be:
|
2284
|
+
-- - priority
|
2285
|
+
-- - interval
|
2286
|
+
-- - retries
|
2287
|
+
-- - data
|
2288
|
+
-- - klass
|
2289
|
+
-- - queue
|
2290
|
+
-- - backlog
|
2291
|
+
function QlessRecurringJob:update(now, ...)
|
2292
|
+
local options = {}
|
2293
|
+
-- Make sure that the job exists
|
2294
|
+
if redis.call('exists', 'ql:r:' .. self.jid) ~= 0 then
|
2295
|
+
for i = 1, #arg, 2 do
|
2296
|
+
local key = arg[i]
|
2297
|
+
local value = arg[i+1]
|
2298
|
+
assert(value, 'No value provided for ' .. tostring(key))
|
2299
|
+
if key == 'priority' or key == 'interval' or key == 'retries' then
|
2300
|
+
value = assert(tonumber(value), 'Recur(): Arg "' .. key .. '" must be a number: ' .. tostring(value))
|
2301
|
+
-- If the command is 'interval', then we need to update the
|
2302
|
+
-- time when it should next be scheduled
|
2303
|
+
if key == 'interval' then
|
2304
|
+
local queue, interval = unpack(redis.call('hmget', 'ql:r:' .. self.jid, 'queue', 'interval'))
|
2305
|
+
Qless.queue(queue).recurring.update(
|
2306
|
+
value - tonumber(interval), self.jid)
|
2307
|
+
end
|
2308
|
+
redis.call('hset', 'ql:r:' .. self.jid, key, value)
|
2309
|
+
elseif key == 'data' then
|
2310
|
+
assert(cjson.decode(value), 'Recur(): Arg "data" is not JSON-encoded: ' .. tostring(value))
|
2311
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'data', value)
|
2312
|
+
elseif key == 'klass' then
|
2313
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'klass', value)
|
2314
|
+
elseif key == 'queue' then
|
2315
|
+
local queue_obj = Qless.queue(
|
2316
|
+
redis.call('hget', 'ql:r:' .. self.jid, 'queue'))
|
2317
|
+
local score = queue_obj.recurring.score(self.jid)
|
2318
|
+
queue_obj.recurring.remove(self.jid)
|
2319
|
+
Qless.queue(value).recurring.add(score, self.jid)
|
2320
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'queue', value)
|
2321
|
+
-- If we don't already know about the queue, learn about it
|
2322
|
+
if redis.call('zscore', 'ql:queues', value) == false then
|
2323
|
+
redis.call('zadd', 'ql:queues', now, value)
|
2324
|
+
end
|
2325
|
+
elseif key == 'backlog' then
|
2326
|
+
value = assert(tonumber(value),
|
2327
|
+
'Recur(): Arg "backlog" not a number: ' .. tostring(value))
|
2328
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'backlog', value)
|
2329
|
+
else
|
2330
|
+
error('Recur(): Unrecognized option "' .. key .. '"')
|
2331
|
+
end
|
2332
|
+
end
|
2333
|
+
return true
|
2334
|
+
else
|
2335
|
+
error('Recur(): No recurring job ' .. self.jid)
|
2336
|
+
end
|
2337
|
+
end
|
2338
|
+
|
2339
|
+
-- Tags this recurring job with the provided tags
|
2340
|
+
function QlessRecurringJob:tag(...)
|
2341
|
+
local tags = redis.call('hget', 'ql:r:' .. self.jid, 'tags')
|
2342
|
+
-- If the job has been canceled / deleted, then return false
|
2343
|
+
if tags then
|
2344
|
+
-- Decode the json blob, convert to dictionary
|
2345
|
+
tags = cjson.decode(tags)
|
2346
|
+
local _tags = {}
|
2347
|
+
for i,v in ipairs(tags) do _tags[v] = true end
|
2348
|
+
|
2349
|
+
-- Otherwise, add the job to the sorted set with that tags
|
2350
|
+
for i=1,#arg do if _tags[arg[i]] == nil then table.insert(tags, arg[i]) end end
|
2351
|
+
|
2352
|
+
tags = cjson.encode(tags)
|
2353
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'tags', tags)
|
2354
|
+
return tags
|
2355
|
+
else
|
2356
|
+
error('Tag(): Job ' .. self.jid .. ' does not exist')
|
2357
|
+
end
|
2358
|
+
end
|
2359
|
+
|
2360
|
+
-- Removes a tag from the recurring job
|
2361
|
+
function QlessRecurringJob:untag(...)
|
2362
|
+
-- Get the existing tags
|
2363
|
+
local tags = redis.call('hget', 'ql:r:' .. self.jid, 'tags')
|
2364
|
+
-- If the job has been canceled / deleted, then return false
|
2365
|
+
if tags then
|
2366
|
+
-- Decode the json blob, convert to dictionary
|
2367
|
+
tags = cjson.decode(tags)
|
2368
|
+
local _tags = {}
|
2369
|
+
-- Make a hash
|
2370
|
+
for i,v in ipairs(tags) do _tags[v] = true end
|
2371
|
+
-- Delete these from the hash
|
2372
|
+
for i = 1,#arg do _tags[arg[i]] = nil end
|
2373
|
+
-- Back into a list
|
2374
|
+
local results = {}
|
2375
|
+
for i, tag in ipairs(tags) do if _tags[tag] then table.insert(results, tag) end end
|
2376
|
+
-- json encode them, set, and return
|
2377
|
+
tags = cjson.encode(results)
|
2378
|
+
redis.call('hset', 'ql:r:' .. self.jid, 'tags', tags)
|
2379
|
+
return tags
|
2380
|
+
else
|
2381
|
+
error('Untag(): Job ' .. self.jid .. ' does not exist')
|
2382
|
+
end
|
2383
|
+
end
|
2384
|
+
|
2385
|
+
-- Stop further occurrences of this job
|
2386
|
+
function QlessRecurringJob:unrecur()
|
2387
|
+
-- First, find out what queue it was attached to
|
2388
|
+
local queue = redis.call('hget', 'ql:r:' .. self.jid, 'queue')
|
2389
|
+
if queue then
|
2390
|
+
-- Now, delete it from the queue it was attached to, and delete the
|
2391
|
+
-- thing itself
|
2392
|
+
Qless.queue(queue).recurring.remove(self.jid)
|
2393
|
+
redis.call('del', 'ql:r:' .. self.jid)
|
2394
|
+
return true
|
2395
|
+
else
|
2396
|
+
return true
|
2397
|
+
end
|
2398
|
+
end
|
2399
|
+
-- Deregisters these workers from the list of known workers
|
2400
|
+
function QlessWorker.deregister(...)
|
2401
|
+
redis.call('zrem', 'ql:workers', unpack(arg))
|
2402
|
+
end
|
2403
|
+
|
2404
|
+
-- Provide data about all the workers, or if a specific worker is provided,
|
2405
|
+
-- then which jobs that worker is responsible for. If no worker is provided,
|
2406
|
+
-- expect a response of the form:
|
2407
|
+
--
|
2408
|
+
-- [
|
2409
|
+
-- # This is sorted by the recency of activity from that worker
|
2410
|
+
-- {
|
2411
|
+
-- 'name' : 'hostname1-pid1',
|
2412
|
+
-- 'jobs' : 20,
|
2413
|
+
-- 'stalled': 0
|
2414
|
+
-- }, {
|
2415
|
+
-- ...
|
2416
|
+
-- }
|
2417
|
+
-- ]
|
2418
|
+
--
|
2419
|
+
-- If a worker id is provided, then expect a response of the form:
|
2420
|
+
--
|
2421
|
+
-- {
|
2422
|
+
-- 'jobs': [
|
2423
|
+
-- jid1,
|
2424
|
+
-- jid2,
|
2425
|
+
-- ...
|
2426
|
+
-- ], 'stalled': [
|
2427
|
+
-- jid1,
|
2428
|
+
-- ...
|
2429
|
+
-- ]
|
2430
|
+
-- }
|
2431
|
+
--
|
2432
|
+
function QlessWorker.counts(now, worker)
|
2433
|
+
-- Clean up all the workers' job lists if they're too old. This is
|
2434
|
+
-- determined by the `max-worker-age` configuration, defaulting to the
|
2435
|
+
-- last day. Seems like a 'reasonable' default
|
2436
|
+
local interval = tonumber(Qless.config.get('max-worker-age', 86400))
|
2437
|
+
|
2438
|
+
local workers = redis.call('zrangebyscore', 'ql:workers', 0, now - interval)
|
2439
|
+
for index, worker in ipairs(workers) do
|
2440
|
+
redis.call('del', 'ql:w:' .. worker .. ':jobs')
|
2441
|
+
end
|
2442
|
+
|
2443
|
+
-- And now remove them from the list of known workers
|
2444
|
+
redis.call('zremrangebyscore', 'ql:workers', 0, now - interval)
|
2445
|
+
|
2446
|
+
if worker then
|
2447
|
+
return {
|
2448
|
+
jobs = redis.call('zrevrangebyscore', 'ql:w:' .. worker .. ':jobs', now + 8640000, now),
|
2449
|
+
stalled = redis.call('zrevrangebyscore', 'ql:w:' .. worker .. ':jobs', now, 0)
|
2450
|
+
}
|
2451
|
+
else
|
2452
|
+
local response = {}
|
2453
|
+
local workers = redis.call('zrevrange', 'ql:workers', 0, -1)
|
2454
|
+
for index, worker in ipairs(workers) do
|
2455
|
+
table.insert(response, {
|
2456
|
+
name = worker,
|
2457
|
+
jobs = redis.call('zcount', 'ql:w:' .. worker .. ':jobs', now, now + 8640000),
|
2458
|
+
stalled = redis.call('zcount', 'ql:w:' .. worker .. ':jobs', 0, now)
|
2459
|
+
})
|
2460
|
+
end
|
2461
|
+
return response
|
2462
|
+
end
|
2463
|
+
end
|