groupmq-plus 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/LICENSE +59 -0
  2. package/README.md +722 -0
  3. package/dist/index.cjs +2567 -0
  4. package/dist/index.cjs.map +1 -0
  5. package/dist/index.d.cts +1300 -0
  6. package/dist/index.d.ts +1300 -0
  7. package/dist/index.js +2557 -0
  8. package/dist/index.js.map +1 -0
  9. package/dist/lua/change-delay.lua +62 -0
  10. package/dist/lua/check-stalled.lua +86 -0
  11. package/dist/lua/clean-status.lua +64 -0
  12. package/dist/lua/cleanup-poisoned-group.lua +46 -0
  13. package/dist/lua/cleanup.lua +46 -0
  14. package/dist/lua/complete-and-reserve-next-with-metadata.lua +221 -0
  15. package/dist/lua/complete-with-metadata.lua +190 -0
  16. package/dist/lua/complete.lua +51 -0
  17. package/dist/lua/dead-letter.lua +86 -0
  18. package/dist/lua/enqueue-batch.lua +149 -0
  19. package/dist/lua/enqueue-flow.lua +107 -0
  20. package/dist/lua/enqueue.lua +154 -0
  21. package/dist/lua/get-active-count.lua +6 -0
  22. package/dist/lua/get-active-jobs.lua +6 -0
  23. package/dist/lua/get-delayed-count.lua +5 -0
  24. package/dist/lua/get-delayed-jobs.lua +5 -0
  25. package/dist/lua/get-unique-groups-count.lua +13 -0
  26. package/dist/lua/get-unique-groups.lua +15 -0
  27. package/dist/lua/get-waiting-count.lua +11 -0
  28. package/dist/lua/get-waiting-jobs.lua +15 -0
  29. package/dist/lua/heartbeat.lua +22 -0
  30. package/dist/lua/is-empty.lua +35 -0
  31. package/dist/lua/promote-delayed-jobs.lua +40 -0
  32. package/dist/lua/promote-delayed-one.lua +44 -0
  33. package/dist/lua/promote-staged.lua +70 -0
  34. package/dist/lua/record-job-result.lua +143 -0
  35. package/dist/lua/remove.lua +55 -0
  36. package/dist/lua/reserve-atomic.lua +114 -0
  37. package/dist/lua/reserve-batch.lua +141 -0
  38. package/dist/lua/reserve.lua +161 -0
  39. package/dist/lua/retry.lua +53 -0
  40. package/package.json +92 -0
@@ -0,0 +1,35 @@
1
+ -- argv: ns
2
+ local ns = KEYS[1]
3
+
4
+ -- Check processing jobs
5
+ local processingCount = redis.call("ZCARD", ns .. ":processing")
6
+ if processingCount > 0 then
7
+ return 0
8
+ end
9
+
10
+ -- Check delayed jobs
11
+ local delayedCount = redis.call("ZCARD", ns .. ":delayed")
12
+ if delayedCount > 0 then
13
+ return 0
14
+ end
15
+
16
+ -- Check ready groups (jobs waiting)
17
+ local readyCount = redis.call("ZCARD", ns .. ":ready")
18
+ if readyCount > 0 then
19
+ return 0
20
+ end
21
+
22
+
23
+ -- Check all groups for waiting jobs
24
+ local groups = redis.call("SMEMBERS", ns .. ":groups")
25
+ for _, gid in ipairs(groups) do
26
+ local gZ = ns .. ":g:" .. gid
27
+ local jobCount = redis.call("ZCARD", gZ)
28
+ if jobCount > 0 then
29
+ return 0
30
+ end
31
+ end
32
+
33
+ -- Queue is completely empty
34
+ return 1
35
+
@@ -0,0 +1,40 @@
1
+ -- argv: ns, now
2
+ local ns = KEYS[1]
3
+ local now = tonumber(ARGV[1])
4
+
5
+ local delayedKey = ns .. ":delayed"
6
+ local readyKey = ns .. ":ready"
7
+
8
+ local promotedCount = 0
9
+
10
+ -- Get jobs that are ready (score <= now)
11
+ local readyJobs = redis.call("ZRANGEBYSCORE", delayedKey, 0, now)
12
+
13
+ for i = 1, #readyJobs do
14
+ local jobId = readyJobs[i]
15
+ local jobKey = ns .. ":job:" .. jobId
16
+ local groupId = redis.call("HGET", jobKey, "groupId")
17
+
18
+ if groupId then
19
+ local gZ = ns .. ":g:" .. groupId
20
+
21
+ -- Remove from delayed set
22
+ redis.call("ZREM", delayedKey, jobId)
23
+
24
+ -- Check if this job is the head of its group (earliest in group)
25
+ local head = redis.call("ZRANGE", gZ, 0, 0)
26
+ if head and #head > 0 and head[1] == jobId then
27
+ -- This is the head job, so group should be ready
28
+ local headScore = redis.call("ZSCORE", gZ, jobId)
29
+ if headScore then
30
+ redis.call("ZADD", readyKey, headScore, groupId)
31
+ end
32
+ end
33
+
34
+ promotedCount = promotedCount + 1
35
+ end
36
+ end
37
+
38
+ return promotedCount
39
+
40
+
@@ -0,0 +1,44 @@
1
+ -- argv: ns, now
2
+ local ns = KEYS[1]
3
+ local now = tonumber(ARGV[1])
4
+
5
+ local delayedKey = ns .. ":delayed"
6
+ local readyKey = ns .. ":ready"
7
+
8
+ -- Find one job that is due now
9
+ local ids = redis.call("ZRANGEBYSCORE", delayedKey, 0, now, "LIMIT", 0, 1)
10
+ if not ids or #ids == 0 then
11
+ return 0
12
+ end
13
+
14
+ local jobId = ids[1]
15
+
16
+ -- Try to remove it atomically; if another scheduler raced, ZREM will return 0
17
+ local removed = redis.call("ZREM", delayedKey, jobId)
18
+ if removed == 0 then
19
+ return 0
20
+ end
21
+
22
+ -- Determine its group and update ready queue if it was the head
23
+ local jobKey = ns .. ":job:" .. jobId
24
+ local groupId = redis.call("HGET", jobKey, "groupId")
25
+ if not groupId then
26
+ return 1 -- treat as moved even if metadata missing
27
+ end
28
+
29
+ -- Mark job as waiting (no longer delayed)
30
+ redis.call("HSET", jobKey, "status", "waiting")
31
+ redis.call("HDEL", jobKey, "runAt")
32
+
33
+ local gZ = ns .. ":g:" .. groupId
34
+ local head = redis.call("ZRANGE", gZ, 0, 0)
35
+ if head and #head > 0 and head[1] == jobId then
36
+ local headScore = redis.call("ZSCORE", gZ, jobId)
37
+ if headScore then
38
+ redis.call("ZADD", readyKey, headScore, groupId)
39
+ end
40
+ end
41
+
42
+ return 1
43
+
44
+
@@ -0,0 +1,70 @@
1
+ -- Promote staged jobs that are now ready to be processed
2
+ -- argv: ns, now, limit
3
+ local ns = KEYS[1]
4
+ local now = tonumber(ARGV[1])
5
+ local limit = tonumber(ARGV[2]) or 100
6
+
7
+ local stageKey = ns .. ":stage"
8
+ local readyKey = ns .. ":ready"
9
+ local timerKey = ns .. ":stage:timer"
10
+
11
+ local promotedCount = 0
12
+
13
+ -- Get jobs that are ready (score <= now)
14
+ local readyJobs = redis.call("ZRANGEBYSCORE", stageKey, 0, now, "LIMIT", 0, limit)
15
+
16
+ for i = 1, #readyJobs do
17
+ local jobId = readyJobs[i]
18
+ local jobKey = ns .. ":job:" .. jobId
19
+
20
+ -- Get job metadata
21
+ local jobData = redis.call("HMGET", jobKey, "groupId", "score", "status")
22
+ local groupId = jobData[1]
23
+ local score = jobData[2]
24
+ local status = jobData[3]
25
+
26
+ if groupId and score and status == "staged" then
27
+ local gZ = ns .. ":g:" .. groupId
28
+
29
+ -- Remove from staging set
30
+ redis.call("ZREM", stageKey, jobId)
31
+
32
+ -- Add to group ZSET with original score
33
+ redis.call("ZADD", gZ, tonumber(score), jobId)
34
+
35
+ -- Update job status from "staged" to "waiting"
36
+ redis.call("HSET", jobKey, "status", "waiting")
37
+
38
+ -- Check if group should be added to ready queue
39
+ -- Add group to ready if the head job is now waiting (not delayed or staged)
40
+ local head = redis.call("ZRANGE", gZ, 0, 0, "WITHSCORES")
41
+ if head and #head >= 2 then
42
+ local headJobId = head[1]
43
+ local headScore = tonumber(head[2])
44
+ local headJobKey = ns .. ":job:" .. headJobId
45
+ local headStatus = redis.call("HGET", headJobKey, "status")
46
+
47
+ -- Only add to ready if head is waiting (not delayed/staged)
48
+ if headStatus == "waiting" then
49
+ redis.call("ZADD", readyKey, headScore, groupId)
50
+ end
51
+ end
52
+
53
+ promotedCount = promotedCount + 1
54
+ end
55
+ end
56
+
57
+ -- Recompute timer: set to the next earliest staged job
58
+ local nextHead = redis.call("ZRANGE", stageKey, 0, 0, "WITHSCORES")
59
+ if nextHead and #nextHead >= 2 then
60
+ local nextReleaseAt = tonumber(nextHead[2])
61
+ -- Set timer to expire when the next earliest job is ready
62
+ local ttlMs = math.max(1, nextReleaseAt - now)
63
+ redis.call("SET", timerKey, "1", "PX", ttlMs)
64
+ else
65
+ -- No more staged jobs, delete the timer
66
+ redis.call("DEL", timerKey)
67
+ end
68
+
69
+ return promotedCount
70
+
@@ -0,0 +1,143 @@
1
+ -- Record job completion or failure with retention management
2
+ -- argv: ns, jobId, status ('completed' | 'failed'), timestamp, result/error (JSON),
3
+ -- keepCompleted, keepFailed, processedOn, finishedOn, attempts, maxAttempts
4
+ local ns = KEYS[1]
5
+ local jobId = ARGV[1]
6
+ local status = ARGV[2]
7
+ local timestamp = tonumber(ARGV[3])
8
+ local resultOrError = ARGV[4]
9
+ local keepCompleted = tonumber(ARGV[5])
10
+ local keepFailed = tonumber(ARGV[6])
11
+ local processedOn = ARGV[7]
12
+ local finishedOn = ARGV[8]
13
+ local attempts = ARGV[9]
14
+ local maxAttempts = ARGV[10]
15
+
16
+ local jobKey = ns .. ":job:" .. jobId
17
+
18
+ -- [PHASE 3 MODIFICATION START: Get parentId before potentially deleting the job]
19
+ local parentId = redis.call("HGET", jobKey, "parentId")
20
+ -- [PHASE 3 MODIFICATION END]
21
+
22
+ -- Verify job exists and check current status to prevent race conditions
23
+ local currentStatus = redis.call("HGET", jobKey, "status")
24
+ if not currentStatus then
25
+ -- Job doesn't exist, likely already cleaned up
26
+ return 0
27
+ end
28
+
29
+ -- If job is in "waiting" state, this might be a late completion after stalled recovery
30
+ -- In this case, we should not overwrite the status or delete the job
31
+ if currentStatus == "waiting" then
32
+ -- Job was recovered by stalled check and possibly being processed by another worker
33
+ -- Ignore this late completion to prevent corruption
34
+ return 0
35
+ end
36
+
37
+ -- [PHASE 3 MODIFICATION START: Update Flow Parent]
38
+ -- Regardless of whether the job succeeded or failed, if it's finished, update parent
39
+ if parentId then
40
+ local parentKey = ns .. ":job:" .. parentId
41
+ -- 1. Store child result in a separate hash to define parent's "childrenValues"
42
+ -- Key: flow:results:{parentId}, Field: {childId}
43
+ local flowResultsKey = ns .. ":flow:results:" .. parentId
44
+ redis.call("HSET", flowResultsKey, jobId, resultOrError)
45
+
46
+ -- 2. Decrement remaining counter
47
+ local remaining = redis.call("HINCRBY", parentKey, "flowRemaining", -1)
48
+
49
+ -- 3. If all children done, move parent to waiting
50
+ if remaining <= 0 then
51
+ local parentStatus = redis.call("HGET", parentKey, "status")
52
+ if parentStatus == "waiting-children" then
53
+ redis.call("HSET", parentKey, "status", "waiting")
54
+
55
+ local parentGroupId = redis.call("HGET", parentKey, "groupId")
56
+ local parentScore = tonumber(redis.call("HGET", parentKey, "score")) or (tonumber(redis.call("TIME")[1]) * 1000)
57
+
58
+ local pGZ = ns .. ":g:" .. parentGroupId
59
+ redis.call("ZADD", pGZ, parentScore, parentId)
60
+ redis.call("SADD", ns .. ":groups", parentGroupId)
61
+
62
+ -- Add to ready if head
63
+ local pHead = redis.call("ZRANGE", pGZ, 0, 0, "WITHSCORES")
64
+ if pHead and #pHead >= 2 then
65
+ local pHeadScore = tonumber(pHead[2])
66
+ redis.call("ZADD", ns .. ":ready", pHeadScore, parentGroupId)
67
+ end
68
+ end
69
+ end
70
+ end
71
+ -- [PHASE 3 MODIFICATION END]
72
+
73
+ if status == "completed" then
74
+ local completedKey = ns .. ":completed"
75
+
76
+ if keepCompleted > 0 then
77
+ -- Store job metadata and add to completed set
78
+ redis.call("HSET", jobKey,
79
+ "status", "completed",
80
+ "processedOn", processedOn,
81
+ "finishedOn", finishedOn,
82
+ "attempts", attempts,
83
+ "maxAttempts", maxAttempts,
84
+ "returnvalue", resultOrError
85
+ )
86
+ redis.call("ZADD", completedKey, timestamp, jobId)
87
+ -- Ensure idempotence mapping exists
88
+ redis.call("SET", ns .. ":unique:" .. jobId, jobId)
89
+
90
+ -- Trim old entries atomically
91
+ local zcount = redis.call("ZCARD", completedKey)
92
+ local toRemove = zcount - keepCompleted
93
+ if toRemove > 0 then
94
+ local oldIds = redis.call("ZRANGE", completedKey, 0, toRemove - 1)
95
+ if #oldIds > 0 then
96
+ redis.call("ZREMRANGEBYRANK", completedKey, 0, toRemove - 1)
97
+ -- Batch delete old jobs and unique keys
98
+ local keysToDelete = {}
99
+ for i = 1, #oldIds do
100
+ local oldId = oldIds[i]
101
+ table.insert(keysToDelete, ns .. ":job:" .. oldId)
102
+ table.insert(keysToDelete, ns .. ":unique:" .. oldId)
103
+ end
104
+ if #keysToDelete > 0 then
105
+ redis.call("DEL", unpack(keysToDelete))
106
+ end
107
+ end
108
+ end
109
+ else
110
+ -- keepCompleted == 0: Delete immediately (batch operation)
111
+ redis.call("DEL", jobKey, ns .. ":unique:" .. jobId)
112
+ end
113
+
114
+ elseif status == "failed" then
115
+ local failedKey = ns .. ":failed"
116
+
117
+ -- Parse error info from resultOrError JSON
118
+ -- Expected format: {"message":"...", "name":"...", "stack":"..."}
119
+ local errorInfo = cjson.decode(resultOrError)
120
+
121
+ if keepFailed > 0 then
122
+ -- Store failure metadata
123
+ redis.call("HSET", jobKey,
124
+ "status", "failed",
125
+ "failedReason", errorInfo.message or "Error",
126
+ "failedName", errorInfo.name or "Error",
127
+ "stacktrace", errorInfo.stack or "",
128
+ "processedOn", processedOn,
129
+ "finishedOn", finishedOn,
130
+ "attempts", attempts,
131
+ "maxAttempts", maxAttempts
132
+ )
133
+ redis.call("ZADD", failedKey, timestamp, jobId)
134
+
135
+ -- Note: No retention trimming for failed jobs (let clean() handle it)
136
+ else
137
+ -- keepFailed == 0: Delete immediately (batch operation)
138
+ redis.call("DEL", jobKey, ns .. ":unique:" .. jobId)
139
+ end
140
+ end
141
+
142
+ return 1
143
+
@@ -0,0 +1,55 @@
1
+ -- argv: ns, jobId
2
+ local ns = KEYS[1]
3
+ local jobId = ARGV[1]
4
+
5
+ local jobKey = ns .. ":job:" .. jobId
6
+ local delayedKey = ns .. ":delayed"
7
+ local readyKey = ns .. ":ready"
8
+ local processingKey = ns .. ":processing"
9
+
10
+ -- If job does not exist, return 0
11
+ if redis.call("EXISTS", jobKey) == 0 then
12
+ return 0
13
+ end
14
+
15
+ local groupId = redis.call("HGET", jobKey, "groupId")
16
+
17
+ -- Remove from delayed and processing structures
18
+ redis.call("ZREM", delayedKey, jobId)
19
+ redis.call("DEL", ns .. ":processing:" .. jobId)
20
+ redis.call("ZREM", processingKey, jobId)
21
+
22
+ -- Remove from completed/failed retention sets if present
23
+ redis.call("ZREM", ns .. ":completed", jobId)
24
+ redis.call("ZREM", ns .. ":failed", jobId)
25
+
26
+ -- Delete idempotence mapping
27
+ redis.call("DEL", ns .. ":unique:" .. jobId)
28
+
29
+ -- If we have a group, update group zset and ready queue accordingly
30
+ if groupId then
31
+ local gZ = ns .. ":g:" .. groupId
32
+ redis.call("ZREM", gZ, jobId)
33
+
34
+ local jobCount = redis.call("ZCARD", gZ)
35
+ if jobCount == 0 then
36
+ redis.call("ZREM", readyKey, groupId)
37
+ -- Clean up empty group
38
+ redis.call("DEL", gZ)
39
+ redis.call("SREM", ns .. ":groups", groupId)
40
+ else
41
+ local head = redis.call("ZRANGE", gZ, 0, 0, "WITHSCORES")
42
+ if head and #head >= 2 then
43
+ local headScore = tonumber(head[2])
44
+ redis.call("ZADD", readyKey, headScore, groupId)
45
+ end
46
+ end
47
+ end
48
+
49
+ -- Finally, delete the job hash and flow results
50
+ redis.call("DEL", jobKey)
51
+ redis.call("DEL", ns .. ":flow:results:" .. jobId)
52
+
53
+ return 1
54
+
55
+
@@ -0,0 +1,114 @@
1
+ -- Atomic reserve operation that checks lock/limit and reserves in one operation
2
+ -- argv: ns, nowEpochMs, vtMs, targetGroupId, allowedJobId (optional)
3
+ local ns = KEYS[1]
4
+ local now = tonumber(ARGV[1])
5
+ local vt = tonumber(ARGV[2])
6
+ local targetGroupId = ARGV[3]
7
+ local allowedJobId = ARGV[4] -- If provided, allow reserve if matches active job (chaining)
8
+
9
+ local readyKey = ns .. ":ready"
10
+ local gZ = ns .. ":g:" .. targetGroupId
11
+ local groupActiveKey = ns .. ":g:" .. targetGroupId .. ":active"
12
+ local configKey = ns .. ":config:" .. targetGroupId
13
+
14
+ -- Respect paused state
15
+ if redis.call("GET", ns .. ":paused") then
16
+ return nil
17
+ end
18
+
19
+ -- [PHASE 2 MODIFICATION START]
20
+ -- Fetch concurrency limit (default 1)
21
+ local limit = tonumber(redis.call("HGET", configKey, "concurrency")) or 1
22
+ local activeCount = redis.call("LLEN", groupActiveKey)
23
+
24
+ -- Logic: Can we reserve?
25
+ local canReserve = false
26
+
27
+ if activeCount < limit then
28
+ -- Case 1: Slots available
29
+ canReserve = true
30
+ elseif allowedJobId then
31
+ -- Case 2: Group is full, BUT we are explicitly allowed to chain from a specific job
32
+ -- Check if allowedJobId is actually in the active list (reclaiming its own slot)
33
+ -- Note: We scan the list. Since limits are usually small (e.g., <100), this O(N) is acceptable.
34
+ -- For strict O(1), we would need a Set, but List is used for queuing order.
35
+ local items = redis.call("LRANGE", groupActiveKey, 0, -1)
36
+ for _, id in ipairs(items) do
37
+ if id == allowedJobId then
38
+ canReserve = true
39
+ break
40
+ end
41
+ end
42
+ end
43
+
44
+ if not canReserve then
45
+ -- Group is full and no special access granted
46
+ -- If head matches our allowedJobId but we failed (shouldn't happen logic-wise but safe-guard),
47
+ -- ensure ready queue is correct.
48
+ local head = redis.call("ZRANGE", gZ, 0, 0, "WITHSCORES")
49
+ if head and #head >= 2 then
50
+ local headScore = tonumber(head[2])
51
+ redis.call("ZADD", readyKey, headScore, targetGroupId)
52
+ end
53
+ return nil
54
+ end
55
+ -- [PHASE 2 MODIFICATION END]
56
+
57
+ -- Try to get a job from the group
58
+ local head = redis.call("ZRANGE", gZ, 0, 0)
59
+ if not head or #head == 0 then
60
+ return nil
61
+ end
62
+ local headJobId = head[1]
63
+ local jobKey = ns .. ":job:" .. headJobId
64
+
65
+ -- Skip if head job is delayed
66
+ local jobStatus = redis.call("HGET", jobKey, "status")
67
+ if jobStatus == "delayed" then
68
+ return nil
69
+ end
70
+
71
+ -- Pop the job
72
+ local zpop = redis.call("ZPOPMIN", gZ, 1)
73
+ if not zpop or #zpop == 0 then
74
+ return nil
75
+ end
76
+ headJobId = zpop[1]
77
+
78
+ local job = redis.call("HMGET", jobKey, "id","groupId","data","attempts","maxAttempts","seq","timestamp","orderMs","score")
79
+ local id, groupId, payload, attempts, maxAttempts, seq, enq, orderMs, score = job[1], job[2], job[3], job[4], job[5], job[6], job[7], job[8], job[9]
80
+
81
+ if not id or id == false then
82
+ -- Corruption handling
83
+ local nextHead = redis.call("ZRANGE", gZ, 0, 0, "WITHSCORES")
84
+ if nextHead and #nextHead >= 2 then
85
+ local nextScore = tonumber(nextHead[2])
86
+ redis.call("ZADD", readyKey, nextScore, targetGroupId)
87
+ end
88
+ return nil
89
+ end
90
+
91
+ -- [PHASE 2 MODIFICATION START]
92
+ -- Push to group active list
93
+ -- If we are chaining (allowedJobId matched), we should technically verify we aren't adding a duplicate
94
+ -- if the previous one wasn't removed yet. But typically complete-with-metadata removes the old one.
95
+ -- Just strictly push.
96
+ redis.call("LPUSH", groupActiveKey, id)
97
+ -- [PHASE 2 MODIFICATION END]
98
+
99
+ local procKey = ns .. ":processing:" .. id
100
+ local deadline = now + vt
101
+ redis.call("HSET", procKey, "groupId", groupId, "deadlineAt", tostring(deadline))
102
+
103
+ local processingKey = ns .. ":processing"
104
+ redis.call("ZADD", processingKey, deadline, id)
105
+
106
+ redis.call("HSET", jobKey, "status", "processing")
107
+
108
+ local nextHead = redis.call("ZRANGE", gZ, 0, 0, "WITHSCORES")
109
+ if nextHead and #nextHead >= 2 then
110
+ local nextScore = tonumber(nextHead[2])
111
+ redis.call("ZADD", readyKey, nextScore, groupId)
112
+ end
113
+
114
+ return id .. "|||" .. groupId .. "|||" .. payload .. "|||" .. attempts .. "|||" .. maxAttempts .. "|||" .. seq .. "|||" .. enq .. "|||" .. orderMs .. "|||" .. score .. "|||" .. deadline
@@ -0,0 +1,141 @@
1
+ -- argv: ns, nowEpochMs, vtMs, maxBatch
2
+ local ns = KEYS[1]
3
+ local now = tonumber(ARGV[1])
4
+ local vt = tonumber(ARGV[2])
5
+ local maxBatch = tonumber(ARGV[3]) or 16
6
+
7
+ local readyKey = ns .. ":ready"
8
+ local processingKey = ns .. ":processing"
9
+
10
+ -- Early exit if paused
11
+ if redis.call("GET", ns .. ":paused") then
12
+ return {}
13
+ end
14
+
15
+ local out = {}
16
+
17
+ -- STALLED JOB RECOVERY WITH THROTTLING
18
+ -- Check for stalled jobs periodically to avoid overhead in hot path
19
+ -- This ensures stalled jobs are recovered even in high-load systems where ready queue is never empty
20
+ -- Check interval is adaptive: 1/4 of jobTimeout (to check 4x during visibility window), max 5s
21
+ local stalledCheckKey = ns .. ":stalled:lastcheck"
22
+ local lastCheck = tonumber(redis.call("GET", stalledCheckKey)) or 0
23
+ local stalledCheckInterval = math.min(math.floor(vt / 4), 5000)
24
+
25
+ if (now - lastCheck) >= stalledCheckInterval then
26
+ -- Update last check timestamp
27
+ redis.call("SET", stalledCheckKey, tostring(now))
28
+
29
+ -- Check for expired jobs and recover them
30
+ local expiredJobs = redis.call("ZRANGEBYSCORE", processingKey, 0, now)
31
+ if #expiredJobs > 0 then
32
+ for _, jobId in ipairs(expiredJobs) do
33
+ local procKey = ns .. ":processing:" .. jobId
34
+ local procData = redis.call("HMGET", procKey, "groupId", "deadlineAt")
35
+ local gid = procData[1]
36
+ local deadlineAt = tonumber(procData[2])
37
+ if gid and deadlineAt and now > deadlineAt then
38
+ local jobKey = ns .. ":job:" .. jobId
39
+ local jobScore = redis.call("HGET", jobKey, "score")
40
+ if jobScore then
41
+ local gZ = ns .. ":g:" .. gid
42
+ redis.call("ZADD", gZ, tonumber(jobScore), jobId)
43
+ local head = redis.call("ZRANGE", gZ, 0, 0, "WITHSCORES")
44
+ if head and #head >= 2 then
45
+ local headScore = tonumber(head[2])
46
+ redis.call("ZADD", readyKey, headScore, gid)
47
+ end
48
+ redis.call("DEL", ns .. ":lock:" .. gid)
49
+ redis.call("DEL", procKey)
50
+ redis.call("ZREM", processingKey, jobId)
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ -- Pop up to maxBatch groups from ready set (lowest score first)
58
+ local groups = redis.call("ZRANGE", readyKey, 0, maxBatch - 1, "WITHSCORES")
59
+ if not groups or #groups == 0 then
60
+ return {}
61
+ end
62
+
63
+ local processedGroups = {}
64
+ -- BullMQ-style: use per-group active list instead of group locks
65
+ for i = 1, #groups, 2 do
66
+ local gid = groups[i]
67
+ local gZ = ns .. ":g:" .. gid
68
+ local groupActiveKey = ns .. ":g:" .. gid .. ":active"
69
+ local configKey = ns .. ":config:" .. gid
70
+
71
+ -- [PHASE 2 MODIFICATION START]
72
+ -- Check concurrency limit
73
+ local activeCount = redis.call("LLEN", groupActiveKey)
74
+ local limit = tonumber(redis.call("HGET", configKey, "concurrency")) or 1
75
+
76
+ if activeCount < limit then
77
+ local head = redis.call("ZRANGE", gZ, 0, 0, "WITHSCORES")
78
+ if head and #head >= 2 then
79
+ local headJobId = head[1]
80
+ local headScore = tonumber(head[2])
81
+ local headJobKey = ns .. ":job:" .. headJobId
82
+
83
+ -- Skip if head job is delayed (will be promoted later)
84
+ local jobStatus = redis.call("HGET", headJobKey, "status")
85
+ if jobStatus ~= "delayed" then
86
+ -- Pop the job and push to active list atomically
87
+ local zpop = redis.call("ZPOPMIN", gZ, 1)
88
+ if zpop and #zpop > 0 then
89
+ local jobId = zpop[1]
90
+
91
+ local jobKey = ns .. ":job:" .. jobId
92
+ local job = redis.call("HMGET", jobKey, "id","groupId","data","attempts","maxAttempts","seq","timestamp","orderMs","score")
93
+ local id, groupId, payload, attempts, maxAttempts, seq, enq, orderMs, score = job[1], job[2], job[3], job[4], job[5], job[6], job[7], job[8], job[9]
94
+
95
+ -- Validate job data exists (handle corrupted/missing job hash)
96
+ if not id or id == false then
97
+ -- Job hash is missing/corrupted, skip this job and continue
98
+ -- Re-add next job to ready queue if exists
99
+ local nextHead = redis.call("ZRANGE", gZ, 0, 0, "WITHSCORES")
100
+ if nextHead and #nextHead >= 2 then
101
+ local nextScore = tonumber(nextHead[2])
102
+ redis.call("ZADD", readyKey, nextScore, gid)
103
+ end
104
+ else
105
+ -- Push to group active list
106
+ redis.call("LPUSH", groupActiveKey, jobId)
107
+
108
+ -- Mark job as processing
109
+ redis.call("HSET", jobKey, "status", "processing")
110
+
111
+ local procKey = ns .. ":processing:" .. id
112
+ local deadline = now + vt
113
+ redis.call("HSET", procKey, "groupId", gid, "deadlineAt", tostring(deadline))
114
+ redis.call("ZADD", processingKey, deadline, id)
115
+
116
+ -- Re-add group if there is a new head job (next oldest)
117
+ local nextHead = redis.call("ZRANGE", gZ, 0, 0, "WITHSCORES")
118
+ if nextHead and #nextHead >= 2 then
119
+ local nextScore = tonumber(nextHead[2])
120
+ redis.call("ZADD", readyKey, nextScore, gid)
121
+ end
122
+
123
+ table.insert(out, id .. "|||" .. groupId .. "|||" .. payload .. "|||" .. attempts .. "|||" .. maxAttempts .. "|||" .. seq .. "|||" .. enq .. "|||" .. orderMs .. "|||" .. score .. "|||" .. deadline)
124
+ table.insert(processedGroups, gid)
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+ -- [PHASE 2 MODIFICATION END]
131
+ -- Note: Groups with active jobs will be skipped
132
+ end
133
+
134
+ -- Remove only the groups that were actually processed from ready queue
135
+ for _, gid in ipairs(processedGroups) do
136
+ redis.call("ZREM", readyKey, gid)
137
+ end
138
+
139
+ return out
140
+
141
+