@renseiai/agentfactory-server 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +71 -0
- package/dist/src/a2a-server.d.ts +88 -0
- package/dist/src/a2a-server.d.ts.map +1 -0
- package/dist/src/a2a-server.integration.test.d.ts +9 -0
- package/dist/src/a2a-server.integration.test.d.ts.map +1 -0
- package/dist/src/a2a-server.integration.test.js +397 -0
- package/dist/src/a2a-server.js +235 -0
- package/dist/src/a2a-server.test.d.ts +2 -0
- package/dist/src/a2a-server.test.d.ts.map +1 -0
- package/dist/src/a2a-server.test.js +311 -0
- package/dist/src/a2a-types.d.ts +125 -0
- package/dist/src/a2a-types.d.ts.map +1 -0
- package/dist/src/a2a-types.js +8 -0
- package/dist/src/agent-tracking.d.ts +201 -0
- package/dist/src/agent-tracking.d.ts.map +1 -0
- package/dist/src/agent-tracking.js +349 -0
- package/dist/src/env-validation.d.ts +65 -0
- package/dist/src/env-validation.d.ts.map +1 -0
- package/dist/src/env-validation.js +134 -0
- package/dist/src/governor-dedup.d.ts +15 -0
- package/dist/src/governor-dedup.d.ts.map +1 -0
- package/dist/src/governor-dedup.js +31 -0
- package/dist/src/governor-event-bus.d.ts +54 -0
- package/dist/src/governor-event-bus.d.ts.map +1 -0
- package/dist/src/governor-event-bus.js +152 -0
- package/dist/src/governor-storage.d.ts +28 -0
- package/dist/src/governor-storage.d.ts.map +1 -0
- package/dist/src/governor-storage.js +52 -0
- package/dist/src/index.d.ts +26 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +50 -0
- package/dist/src/issue-lock.d.ts +129 -0
- package/dist/src/issue-lock.d.ts.map +1 -0
- package/dist/src/issue-lock.js +508 -0
- package/dist/src/logger.d.ts +76 -0
- package/dist/src/logger.d.ts.map +1 -0
- package/dist/src/logger.js +218 -0
- package/dist/src/orphan-cleanup.d.ts +64 -0
- package/dist/src/orphan-cleanup.d.ts.map +1 -0
- package/dist/src/orphan-cleanup.js +369 -0
- package/dist/src/pending-prompts.d.ts +67 -0
- package/dist/src/pending-prompts.d.ts.map +1 -0
- package/dist/src/pending-prompts.js +176 -0
- package/dist/src/processing-state-storage.d.ts +38 -0
- package/dist/src/processing-state-storage.d.ts.map +1 -0
- package/dist/src/processing-state-storage.js +61 -0
- package/dist/src/quota-tracker.d.ts +62 -0
- package/dist/src/quota-tracker.d.ts.map +1 -0
- package/dist/src/quota-tracker.js +155 -0
- package/dist/src/rate-limit.d.ts +111 -0
- package/dist/src/rate-limit.d.ts.map +1 -0
- package/dist/src/rate-limit.js +171 -0
- package/dist/src/redis-circuit-breaker.d.ts +67 -0
- package/dist/src/redis-circuit-breaker.d.ts.map +1 -0
- package/dist/src/redis-circuit-breaker.js +290 -0
- package/dist/src/redis-rate-limiter.d.ts +51 -0
- package/dist/src/redis-rate-limiter.d.ts.map +1 -0
- package/dist/src/redis-rate-limiter.js +168 -0
- package/dist/src/redis.d.ts +146 -0
- package/dist/src/redis.d.ts.map +1 -0
- package/dist/src/redis.js +343 -0
- package/dist/src/session-hash.d.ts +48 -0
- package/dist/src/session-hash.d.ts.map +1 -0
- package/dist/src/session-hash.js +80 -0
- package/dist/src/session-storage.d.ts +166 -0
- package/dist/src/session-storage.d.ts.map +1 -0
- package/dist/src/session-storage.js +397 -0
- package/dist/src/token-storage.d.ts +118 -0
- package/dist/src/token-storage.d.ts.map +1 -0
- package/dist/src/token-storage.js +263 -0
- package/dist/src/types.d.ts +11 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +7 -0
- package/dist/src/webhook-idempotency.d.ts +44 -0
- package/dist/src/webhook-idempotency.d.ts.map +1 -0
- package/dist/src/webhook-idempotency.js +148 -0
- package/dist/src/work-queue.d.ts +120 -0
- package/dist/src/work-queue.d.ts.map +1 -0
- package/dist/src/work-queue.js +384 -0
- package/dist/src/worker-auth.d.ts +29 -0
- package/dist/src/worker-auth.d.ts.map +1 -0
- package/dist/src/worker-auth.js +49 -0
- package/dist/src/worker-storage.d.ts +108 -0
- package/dist/src/worker-storage.d.ts.map +1 -0
- package/dist/src/worker-storage.js +295 -0
- package/dist/src/workflow-state-integration.test.d.ts +2 -0
- package/dist/src/workflow-state-integration.test.d.ts.map +1 -0
- package/dist/src/workflow-state-integration.test.js +342 -0
- package/dist/src/workflow-state.test.d.ts +2 -0
- package/dist/src/workflow-state.test.d.ts.map +1 -0
- package/dist/src/workflow-state.test.js +113 -0
- package/package.json +72 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Issue Lock Module
|
|
3
|
+
*
|
|
4
|
+
* Prevents overlapping agents for the same issue by providing:
|
|
5
|
+
* - Per-issue mutex (Redis SET NX) that gates work dispatch
|
|
6
|
+
* - Per-issue pending queue for parking incoming work while locked
|
|
7
|
+
* - Automatic promotion: releasing a lock dispatches the next pending item
|
|
8
|
+
*
|
|
9
|
+
* Redis Keys:
|
|
10
|
+
* - issue:lock:{issueId} -- String (JSON IssueLock), 2hr TTL
|
|
11
|
+
* - issue:pending:{issueId} -- Sorted Set (priority-ordered session IDs)
|
|
12
|
+
* - issue:pending:items:{issueId} -- Hash (sessionId -> JSON QueuedWork)
|
|
13
|
+
*/
|
|
14
|
+
import { type QueuedWork } from './work-queue.js';
|
|
15
|
+
import type { AgentWorkType } from './types.js';
|
|
16
|
+
/**
|
|
17
|
+
* Lock payload stored in Redis
|
|
18
|
+
*/
|
|
19
|
+
export interface IssueLock {
|
|
20
|
+
sessionId: string;
|
|
21
|
+
workType: AgentWorkType;
|
|
22
|
+
workerId: string | null;
|
|
23
|
+
lockedAt: number;
|
|
24
|
+
issueIdentifier: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Result of a dispatchWork call
|
|
28
|
+
*/
|
|
29
|
+
export interface DispatchResult {
|
|
30
|
+
dispatched: boolean;
|
|
31
|
+
parked: boolean;
|
|
32
|
+
replaced: boolean;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Acquire an issue-level lock.
|
|
36
|
+
* Uses SET NX for atomicity -- only one caller wins.
|
|
37
|
+
*
|
|
38
|
+
* @returns true if lock was acquired
|
|
39
|
+
*/
|
|
40
|
+
export declare function acquireIssueLock(issueId: string, lock: IssueLock): Promise<boolean>;
|
|
41
|
+
/**
|
|
42
|
+
* Read the current lock for an issue.
|
|
43
|
+
*/
|
|
44
|
+
export declare function getIssueLock(issueId: string): Promise<IssueLock | null>;
|
|
45
|
+
/**
|
|
46
|
+
* Release an issue lock. Idempotent.
|
|
47
|
+
*/
|
|
48
|
+
export declare function releaseIssueLock(issueId: string): Promise<void>;
|
|
49
|
+
/**
|
|
50
|
+
* Refresh the TTL on an issue lock (extend while agent is alive).
|
|
51
|
+
*/
|
|
52
|
+
export declare function refreshIssueLockTTL(issueId: string, ttlSeconds?: number): Promise<boolean>;
|
|
53
|
+
/**
|
|
54
|
+
* Park work for a locked issue.
|
|
55
|
+
*
|
|
56
|
+
* Deduplication: at most one parked item per workType per issue.
|
|
57
|
+
* If a parked item with the same workType already exists, it's replaced
|
|
58
|
+
* (the latest webhook wins). Different workTypes can coexist.
|
|
59
|
+
*/
|
|
60
|
+
export declare function parkWorkForIssue(issueId: string, work: QueuedWork): Promise<{
|
|
61
|
+
parked: boolean;
|
|
62
|
+
replaced: boolean;
|
|
63
|
+
}>;
|
|
64
|
+
/**
|
|
65
|
+
* Promote the next pending work item for an issue.
|
|
66
|
+
* Pops the highest-priority item, acquires the issue lock for it,
|
|
67
|
+
* and queues it in the global work queue.
|
|
68
|
+
*
|
|
69
|
+
* @returns The promoted work item, or null if nothing to promote
|
|
70
|
+
*/
|
|
71
|
+
export declare function promoteNextPendingWork(issueId: string): Promise<QueuedWork | null>;
|
|
72
|
+
/**
|
|
73
|
+
* Get the count of pending work items for an issue.
|
|
74
|
+
*/
|
|
75
|
+
export declare function getPendingWorkCount(issueId: string): Promise<number>;
|
|
76
|
+
/**
|
|
77
|
+
* Main entry point for dispatching work.
|
|
78
|
+
*
|
|
79
|
+
* Try to acquire the issue lock:
|
|
80
|
+
* - If acquired -> queue the work in the global queue
|
|
81
|
+
* - If locked -> park the work in the per-issue pending queue
|
|
82
|
+
*
|
|
83
|
+
* @returns DispatchResult indicating what happened
|
|
84
|
+
*/
|
|
85
|
+
export declare function dispatchWork(work: QueuedWork): Promise<DispatchResult>;
|
|
86
|
+
/**
|
|
87
|
+
* Remove a parked work item by sessionId.
|
|
88
|
+
*
|
|
89
|
+
* The issue-pending hash is keyed by workType, so we scan all entries
|
|
90
|
+
* to find the one matching the given sessionId.
|
|
91
|
+
*
|
|
92
|
+
* @returns true if a matching parked item was found and removed
|
|
93
|
+
*/
|
|
94
|
+
export declare function removeParkedWorkBySessionId(issueId: string, sessionId: string): Promise<boolean>;
|
|
95
|
+
/**
|
|
96
|
+
* Check if a session is parked in any issue-pending queue.
|
|
97
|
+
*
|
|
98
|
+
* Scans the issue:pending:items:{issueId} hash entries for a matching sessionId.
|
|
99
|
+
*
|
|
100
|
+
* @param issueId - The issue to check
|
|
101
|
+
* @param sessionId - The session to look for
|
|
102
|
+
* @returns true if the session is parked for this issue
|
|
103
|
+
*/
|
|
104
|
+
export declare function isSessionParkedForIssue(issueId: string, sessionId: string): Promise<boolean>;
|
|
105
|
+
/**
|
|
106
|
+
* Scan for expired issue locks that have pending work.
|
|
107
|
+
* If a lock expired naturally (TTL) but pending items remain, promote them.
|
|
108
|
+
*
|
|
109
|
+
* Called from orphan-cleanup to handle crashed workers that didn't release locks.
|
|
110
|
+
*/
|
|
111
|
+
export declare function cleanupExpiredLocksWithPendingWork(): Promise<number>;
|
|
112
|
+
/**
|
|
113
|
+
* Release issue locks held by sessions that should no longer hold them.
|
|
114
|
+
*
|
|
115
|
+
* This handles cases where:
|
|
116
|
+
* - A session completes but the lock release failed (network error during cleanup)
|
|
117
|
+
* - Orphan cleanup resets a session to 'pending' but the lock wasn't released
|
|
118
|
+
*
|
|
119
|
+
* The lock's 2-hour TTL would eventually expire, but this proactively clears it
|
|
120
|
+
* when workers have idle capacity.
|
|
121
|
+
*
|
|
122
|
+
* Only runs when workers are online -- if no workers are available, there's no
|
|
123
|
+
* point promoting parked work since nothing can pick it up.
|
|
124
|
+
*
|
|
125
|
+
* @param hasIdleWorkers - true if at least one worker is online with spare capacity
|
|
126
|
+
* @returns Number of stale locks released and parked work promoted
|
|
127
|
+
*/
|
|
128
|
+
export declare function cleanupStaleLocksWithIdleWorkers(hasIdleWorkers: boolean): Promise<number>;
|
|
129
|
+
//# sourceMappingURL=issue-lock.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"issue-lock.d.ts","sourceRoot":"","sources":["../../src/issue-lock.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAmBH,OAAO,EAAa,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAA;AAE5D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAoB/C;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,aAAa,CAAA;IACvB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAA;IACvB,QAAQ,EAAE,MAAM,CAAA;IAChB,eAAe,EAAE,MAAM,CAAA;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,OAAO,CAAA;IACnB,MAAM,EAAE,OAAO,CAAA;IACf,QAAQ,EAAE,OAAO,CAAA;CAClB;AAED;;;;;GAKG;AACH,wBAAsB,gBAAgB,CACpC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,SAAS,GACd,OAAO,CAAC,OAAO,CAAC,CA4BlB;AAED;;GAEG;AACH,wBAAsB,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CAY7E;AAED;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAUrE;AAED;;GAEG;AACH,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,MAAM,EACf,UAAU,GAAE,MAAyB,GACpC,OAAO,CAAC,OAAO,CAAC,CAUlB;AAED;;;;;;GAMG;AACH,wBAAsB,gBAAgB,CACpC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,UAAU,GACf,OAAO,CAAC;IAAE,MAAM,EAAE,OAAO,CAAC;IAAC,QAAQ,EAAE,OAAO,CAAA;CAAE,CAAC,CAoDjD;AAED;;;;;;GAMG;AACH,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CAqE5B;AAED;;GAEG;AACH,wBAAsB,mBAAmB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAU1E;AAED;;;;;;;;GAQG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,cAAc,CAAC,CAoD5E;AAED;;;;;;;GAOG;AACH,wBAAsB,2BAA2B,CAC/C,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,OAAO,CAAC,CAsClB;AAED;;;;;;;;GAQG;AACH,wBAAsB,uBAAuB,CAC3C,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,OAAO,CAAC,CAwBlB;AAED;;;;;GAKG;AACH,wBAAsB,kCAAkC,IAAI,OAAO,CAAC,MAAM,CAAC,CA6C1E;AAOD;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,gCAAgC,CACpD,cAAc,EAAE,OAAO,GACtB,OAAO,CAAC,MAAM,CAAC,CAgEjB"}
|
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Issue Lock Module
|
|
3
|
+
*
|
|
4
|
+
* Prevents overlapping agents for the same issue by providing:
|
|
5
|
+
* - Per-issue mutex (Redis SET NX) that gates work dispatch
|
|
6
|
+
* - Per-issue pending queue for parking incoming work while locked
|
|
7
|
+
* - Automatic promotion: releasing a lock dispatches the next pending item
|
|
8
|
+
*
|
|
9
|
+
* Redis Keys:
|
|
10
|
+
* - issue:lock:{issueId} -- String (JSON IssueLock), 2hr TTL
|
|
11
|
+
* - issue:pending:{issueId} -- Sorted Set (priority-ordered session IDs)
|
|
12
|
+
* - issue:pending:items:{issueId} -- Hash (sessionId -> JSON QueuedWork)
|
|
13
|
+
*/
|
|
14
|
+
import { redisSetNX, redisGet, redisDel, redisExpire, redisZAdd, redisZRem, redisZPopMin, redisZCard, redisHSet, redisHGet, redisHDel, redisHGetAll, isRedisConfigured, redisKeys, } from './redis.js';
|
|
15
|
+
import { queueWork } from './work-queue.js';
|
|
16
|
+
import { getSessionState } from './session-storage.js';
|
|
17
|
+
const log = {
|
|
18
|
+
info: (msg, data) => console.log(`[issue-lock] ${msg}`, data ? JSON.stringify(data) : ''),
|
|
19
|
+
warn: (msg, data) => console.warn(`[issue-lock] ${msg}`, data ? JSON.stringify(data) : ''),
|
|
20
|
+
error: (msg, data) => console.error(`[issue-lock] ${msg}`, data ? JSON.stringify(data) : ''),
|
|
21
|
+
debug: (_msg, _data) => { },
|
|
22
|
+
};
|
|
23
|
+
// Redis key prefixes
|
|
24
|
+
const LOCK_PREFIX = 'issue:lock:';
|
|
25
|
+
const PENDING_PREFIX = 'issue:pending:';
|
|
26
|
+
const PENDING_ITEMS_PREFIX = 'issue:pending:items:';
|
|
27
|
+
// Default lock TTL: 2 hours
|
|
28
|
+
const LOCK_TTL_SECONDS = 2 * 60 * 60;
|
|
29
|
+
// Pending queue TTL: 24 hours
|
|
30
|
+
const PENDING_TTL_SECONDS = 24 * 60 * 60;
|
|
31
|
+
/**
|
|
32
|
+
* Acquire an issue-level lock.
|
|
33
|
+
* Uses SET NX for atomicity -- only one caller wins.
|
|
34
|
+
*
|
|
35
|
+
* @returns true if lock was acquired
|
|
36
|
+
*/
|
|
37
|
+
export async function acquireIssueLock(issueId, lock) {
|
|
38
|
+
if (!isRedisConfigured()) {
|
|
39
|
+
return true; // No Redis = no locking, pass through
|
|
40
|
+
}
|
|
41
|
+
try {
|
|
42
|
+
const key = `${LOCK_PREFIX}${issueId}`;
|
|
43
|
+
const acquired = await redisSetNX(key, JSON.stringify(lock), LOCK_TTL_SECONDS);
|
|
44
|
+
if (acquired) {
|
|
45
|
+
log.info('Issue lock acquired', {
|
|
46
|
+
issueId,
|
|
47
|
+
sessionId: lock.sessionId,
|
|
48
|
+
workType: lock.workType,
|
|
49
|
+
issueIdentifier: lock.issueIdentifier,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
log.debug('Issue lock not acquired (already held)', {
|
|
54
|
+
issueId,
|
|
55
|
+
sessionId: lock.sessionId,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
return acquired;
|
|
59
|
+
}
|
|
60
|
+
catch (error) {
|
|
61
|
+
log.error('Failed to acquire issue lock', { error, issueId });
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Read the current lock for an issue.
|
|
67
|
+
*/
|
|
68
|
+
export async function getIssueLock(issueId) {
|
|
69
|
+
if (!isRedisConfigured())
|
|
70
|
+
return null;
|
|
71
|
+
try {
|
|
72
|
+
const key = `${LOCK_PREFIX}${issueId}`;
|
|
73
|
+
// redisSetNX stores the raw JSON string; redisGet parses it back
|
|
74
|
+
// Since redisSetNX stores JSON.stringify(lock), redisGet returns the parsed lock directly
|
|
75
|
+
return await redisGet(key);
|
|
76
|
+
}
|
|
77
|
+
catch (error) {
|
|
78
|
+
log.error('Failed to get issue lock', { error, issueId });
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Release an issue lock. Idempotent.
|
|
84
|
+
*/
|
|
85
|
+
export async function releaseIssueLock(issueId) {
|
|
86
|
+
if (!isRedisConfigured())
|
|
87
|
+
return;
|
|
88
|
+
try {
|
|
89
|
+
const key = `${LOCK_PREFIX}${issueId}`;
|
|
90
|
+
await redisDel(key);
|
|
91
|
+
log.info('Issue lock released', { issueId });
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
log.error('Failed to release issue lock', { error, issueId });
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Refresh the TTL on an issue lock (extend while agent is alive).
|
|
99
|
+
*/
|
|
100
|
+
export async function refreshIssueLockTTL(issueId, ttlSeconds = LOCK_TTL_SECONDS) {
|
|
101
|
+
if (!isRedisConfigured())
|
|
102
|
+
return false;
|
|
103
|
+
try {
|
|
104
|
+
const key = `${LOCK_PREFIX}${issueId}`;
|
|
105
|
+
return await redisExpire(key, ttlSeconds);
|
|
106
|
+
}
|
|
107
|
+
catch (error) {
|
|
108
|
+
log.error('Failed to refresh issue lock TTL', { error, issueId });
|
|
109
|
+
return false;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Park work for a locked issue.
|
|
114
|
+
*
|
|
115
|
+
* Deduplication: at most one parked item per workType per issue.
|
|
116
|
+
* If a parked item with the same workType already exists, it's replaced
|
|
117
|
+
* (the latest webhook wins). Different workTypes can coexist.
|
|
118
|
+
*/
|
|
119
|
+
export async function parkWorkForIssue(issueId, work) {
|
|
120
|
+
if (!isRedisConfigured()) {
|
|
121
|
+
return { parked: false, replaced: false };
|
|
122
|
+
}
|
|
123
|
+
try {
|
|
124
|
+
const pendingKey = `${PENDING_PREFIX}${issueId}`;
|
|
125
|
+
const itemsKey = `${PENDING_ITEMS_PREFIX}${issueId}`;
|
|
126
|
+
const workType = work.workType || 'development';
|
|
127
|
+
// Dedup key: use workType as the sorted set member
|
|
128
|
+
// This means at most one pending item per workType
|
|
129
|
+
const dedupMember = workType;
|
|
130
|
+
// Check if there's already a parked item with this workType
|
|
131
|
+
const existing = await redisHGet(itemsKey, dedupMember);
|
|
132
|
+
const replaced = !!existing;
|
|
133
|
+
if (replaced) {
|
|
134
|
+
// Remove old entry from sorted set before adding new one
|
|
135
|
+
await redisZRem(pendingKey, dedupMember);
|
|
136
|
+
log.info('Replacing existing parked work', {
|
|
137
|
+
issueId,
|
|
138
|
+
workType,
|
|
139
|
+
sessionId: work.sessionId,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
// Score = priority (lower = higher priority)
|
|
143
|
+
const score = work.priority;
|
|
144
|
+
// Add to sorted set and hash
|
|
145
|
+
await redisZAdd(pendingKey, score, dedupMember);
|
|
146
|
+
await redisHSet(itemsKey, dedupMember, JSON.stringify(work));
|
|
147
|
+
// Set TTL on both keys
|
|
148
|
+
await redisExpire(pendingKey, PENDING_TTL_SECONDS);
|
|
149
|
+
await redisExpire(itemsKey, PENDING_TTL_SECONDS);
|
|
150
|
+
log.info('Work parked for issue', {
|
|
151
|
+
issueId,
|
|
152
|
+
workType,
|
|
153
|
+
sessionId: work.sessionId,
|
|
154
|
+
priority: work.priority,
|
|
155
|
+
replaced,
|
|
156
|
+
});
|
|
157
|
+
return { parked: true, replaced };
|
|
158
|
+
}
|
|
159
|
+
catch (error) {
|
|
160
|
+
log.error('Failed to park work for issue', { error, issueId });
|
|
161
|
+
return { parked: false, replaced: false };
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Promote the next pending work item for an issue.
|
|
166
|
+
* Pops the highest-priority item, acquires the issue lock for it,
|
|
167
|
+
* and queues it in the global work queue.
|
|
168
|
+
*
|
|
169
|
+
* @returns The promoted work item, or null if nothing to promote
|
|
170
|
+
*/
|
|
171
|
+
export async function promoteNextPendingWork(issueId) {
|
|
172
|
+
if (!isRedisConfigured())
|
|
173
|
+
return null;
|
|
174
|
+
try {
|
|
175
|
+
const pendingKey = `${PENDING_PREFIX}${issueId}`;
|
|
176
|
+
const itemsKey = `${PENDING_ITEMS_PREFIX}${issueId}`;
|
|
177
|
+
// Pop the highest-priority (lowest score) member
|
|
178
|
+
const popped = await redisZPopMin(pendingKey);
|
|
179
|
+
if (!popped) {
|
|
180
|
+
log.debug('No pending work to promote', { issueId });
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
const dedupMember = popped.member;
|
|
184
|
+
// Get the work item from the hash
|
|
185
|
+
const workJson = await redisHGet(itemsKey, dedupMember);
|
|
186
|
+
if (!workJson) {
|
|
187
|
+
log.warn('Pending work item not found in hash', { issueId, dedupMember });
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
// Remove from hash
|
|
191
|
+
await redisHDel(itemsKey, dedupMember);
|
|
192
|
+
const work = JSON.parse(workJson);
|
|
193
|
+
// Acquire the issue lock for this promoted work
|
|
194
|
+
const lock = {
|
|
195
|
+
sessionId: work.sessionId,
|
|
196
|
+
workType: work.workType || 'development',
|
|
197
|
+
workerId: null,
|
|
198
|
+
lockedAt: Date.now(),
|
|
199
|
+
issueIdentifier: work.issueIdentifier,
|
|
200
|
+
};
|
|
201
|
+
const acquired = await acquireIssueLock(issueId, lock);
|
|
202
|
+
if (!acquired) {
|
|
203
|
+
log.warn('Failed to acquire lock for promoted work -- another lock appeared', {
|
|
204
|
+
issueId,
|
|
205
|
+
sessionId: work.sessionId,
|
|
206
|
+
});
|
|
207
|
+
// Re-park the work since we couldn't acquire the lock
|
|
208
|
+
await parkWorkForIssue(issueId, work);
|
|
209
|
+
return null;
|
|
210
|
+
}
|
|
211
|
+
// Queue in the global work queue
|
|
212
|
+
const queued = await queueWork(work);
|
|
213
|
+
if (!queued) {
|
|
214
|
+
log.error('Failed to queue promoted work', { issueId, sessionId: work.sessionId });
|
|
215
|
+
// Release the lock since we couldn't queue
|
|
216
|
+
await releaseIssueLock(issueId);
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
log.info('Pending work promoted', {
|
|
220
|
+
issueId,
|
|
221
|
+
sessionId: work.sessionId,
|
|
222
|
+
workType: work.workType,
|
|
223
|
+
issueIdentifier: work.issueIdentifier,
|
|
224
|
+
});
|
|
225
|
+
return work;
|
|
226
|
+
}
|
|
227
|
+
catch (error) {
|
|
228
|
+
log.error('Failed to promote pending work', { error, issueId });
|
|
229
|
+
return null;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Get the count of pending work items for an issue.
|
|
234
|
+
*/
|
|
235
|
+
export async function getPendingWorkCount(issueId) {
|
|
236
|
+
if (!isRedisConfigured())
|
|
237
|
+
return 0;
|
|
238
|
+
try {
|
|
239
|
+
const pendingKey = `${PENDING_PREFIX}${issueId}`;
|
|
240
|
+
return await redisZCard(pendingKey);
|
|
241
|
+
}
|
|
242
|
+
catch (error) {
|
|
243
|
+
log.error('Failed to get pending work count', { error, issueId });
|
|
244
|
+
return 0;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Main entry point for dispatching work.
|
|
249
|
+
*
|
|
250
|
+
* Try to acquire the issue lock:
|
|
251
|
+
* - If acquired -> queue the work in the global queue
|
|
252
|
+
* - If locked -> park the work in the per-issue pending queue
|
|
253
|
+
*
|
|
254
|
+
* @returns DispatchResult indicating what happened
|
|
255
|
+
*/
|
|
256
|
+
export async function dispatchWork(work) {
|
|
257
|
+
if (!isRedisConfigured()) {
|
|
258
|
+
// No Redis -- fall back to direct queueing (no locking)
|
|
259
|
+
const queued = await queueWork(work);
|
|
260
|
+
return { dispatched: queued, parked: false, replaced: false };
|
|
261
|
+
}
|
|
262
|
+
const issueId = work.issueId;
|
|
263
|
+
// Try to acquire the issue lock
|
|
264
|
+
const lock = {
|
|
265
|
+
sessionId: work.sessionId,
|
|
266
|
+
workType: work.workType || 'development',
|
|
267
|
+
workerId: null,
|
|
268
|
+
lockedAt: Date.now(),
|
|
269
|
+
issueIdentifier: work.issueIdentifier,
|
|
270
|
+
};
|
|
271
|
+
const acquired = await acquireIssueLock(issueId, lock);
|
|
272
|
+
if (acquired) {
|
|
273
|
+
// Lock acquired -- dispatch to global queue
|
|
274
|
+
const queued = await queueWork(work);
|
|
275
|
+
if (!queued) {
|
|
276
|
+
// Failed to queue -- release the lock
|
|
277
|
+
await releaseIssueLock(issueId);
|
|
278
|
+
return { dispatched: false, parked: false, replaced: false };
|
|
279
|
+
}
|
|
280
|
+
log.info('Work dispatched (lock acquired)', {
|
|
281
|
+
issueId,
|
|
282
|
+
sessionId: work.sessionId,
|
|
283
|
+
workType: work.workType,
|
|
284
|
+
issueIdentifier: work.issueIdentifier,
|
|
285
|
+
});
|
|
286
|
+
return { dispatched: true, parked: false, replaced: false };
|
|
287
|
+
}
|
|
288
|
+
// Lock held by another session -- park this work
|
|
289
|
+
const { parked, replaced } = await parkWorkForIssue(issueId, work);
|
|
290
|
+
if (parked) {
|
|
291
|
+
log.info('Work parked (issue locked)', {
|
|
292
|
+
issueId,
|
|
293
|
+
sessionId: work.sessionId,
|
|
294
|
+
workType: work.workType,
|
|
295
|
+
replaced,
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
return { dispatched: false, parked, replaced };
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Remove a parked work item by sessionId.
|
|
302
|
+
*
|
|
303
|
+
* The issue-pending hash is keyed by workType, so we scan all entries
|
|
304
|
+
* to find the one matching the given sessionId.
|
|
305
|
+
*
|
|
306
|
+
* @returns true if a matching parked item was found and removed
|
|
307
|
+
*/
|
|
308
|
+
export async function removeParkedWorkBySessionId(issueId, sessionId) {
|
|
309
|
+
if (!isRedisConfigured())
|
|
310
|
+
return false;
|
|
311
|
+
try {
|
|
312
|
+
const pendingKey = `${PENDING_PREFIX}${issueId}`;
|
|
313
|
+
const itemsKey = `${PENDING_ITEMS_PREFIX}${issueId}`;
|
|
314
|
+
// Get all entries in the hash (keyed by workType)
|
|
315
|
+
const entries = await redisHGetAll(itemsKey);
|
|
316
|
+
if (!entries)
|
|
317
|
+
return false;
|
|
318
|
+
for (const [dedupMember, workJson] of Object.entries(entries)) {
|
|
319
|
+
try {
|
|
320
|
+
const work = JSON.parse(workJson);
|
|
321
|
+
if (work.sessionId === sessionId) {
|
|
322
|
+
// Found the matching entry -- remove from both sorted set and hash
|
|
323
|
+
await redisZRem(pendingKey, dedupMember);
|
|
324
|
+
await redisHDel(itemsKey, dedupMember);
|
|
325
|
+
log.info('Removed parked work by sessionId', {
|
|
326
|
+
issueId,
|
|
327
|
+
sessionId,
|
|
328
|
+
workType: dedupMember,
|
|
329
|
+
});
|
|
330
|
+
return true;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
catch {
|
|
334
|
+
// Skip malformed entries
|
|
335
|
+
continue;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
log.debug('No parked work found for sessionId', { issueId, sessionId });
|
|
339
|
+
return false;
|
|
340
|
+
}
|
|
341
|
+
catch (error) {
|
|
342
|
+
log.error('Failed to remove parked work by sessionId', { error, issueId, sessionId });
|
|
343
|
+
return false;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Check if a session is parked in any issue-pending queue.
|
|
348
|
+
*
|
|
349
|
+
* Scans the issue:pending:items:{issueId} hash entries for a matching sessionId.
|
|
350
|
+
*
|
|
351
|
+
* @param issueId - The issue to check
|
|
352
|
+
* @param sessionId - The session to look for
|
|
353
|
+
* @returns true if the session is parked for this issue
|
|
354
|
+
*/
|
|
355
|
+
export async function isSessionParkedForIssue(issueId, sessionId) {
|
|
356
|
+
if (!isRedisConfigured())
|
|
357
|
+
return false;
|
|
358
|
+
try {
|
|
359
|
+
const itemsKey = `${PENDING_ITEMS_PREFIX}${issueId}`;
|
|
360
|
+
const entries = await redisHGetAll(itemsKey);
|
|
361
|
+
if (!entries)
|
|
362
|
+
return false;
|
|
363
|
+
for (const workJson of Object.values(entries)) {
|
|
364
|
+
try {
|
|
365
|
+
const work = JSON.parse(workJson);
|
|
366
|
+
if (work.sessionId === sessionId) {
|
|
367
|
+
return true;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
catch {
|
|
371
|
+
continue;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
return false;
|
|
375
|
+
}
|
|
376
|
+
catch (error) {
|
|
377
|
+
log.error('Failed to check if session is parked', { error, issueId, sessionId });
|
|
378
|
+
return false;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
/**
|
|
382
|
+
* Scan for expired issue locks that have pending work.
|
|
383
|
+
* If a lock expired naturally (TTL) but pending items remain, promote them.
|
|
384
|
+
*
|
|
385
|
+
* Called from orphan-cleanup to handle crashed workers that didn't release locks.
|
|
386
|
+
*/
|
|
387
|
+
export async function cleanupExpiredLocksWithPendingWork() {
|
|
388
|
+
if (!isRedisConfigured())
|
|
389
|
+
return 0;
|
|
390
|
+
let promoted = 0;
|
|
391
|
+
try {
|
|
392
|
+
// Find all pending queues
|
|
393
|
+
const pendingKeys = await redisKeys(`${PENDING_PREFIX}*`);
|
|
394
|
+
for (const pendingKey of pendingKeys) {
|
|
395
|
+
// Extract issueId from key
|
|
396
|
+
const issueId = pendingKey.replace(PENDING_PREFIX, '');
|
|
397
|
+
// Skip keys that are actually issue:pending:items:* hashes (not sorted sets)
|
|
398
|
+
if (issueId.includes(':'))
|
|
399
|
+
continue;
|
|
400
|
+
// Check if lock still exists
|
|
401
|
+
const lockKey = `${LOCK_PREFIX}${issueId}`;
|
|
402
|
+
const lock = await redisGet(lockKey);
|
|
403
|
+
if (!lock) {
|
|
404
|
+
// Lock expired but pending work exists -- promote
|
|
405
|
+
const count = await redisZCard(pendingKey);
|
|
406
|
+
if (count > 0) {
|
|
407
|
+
log.info('Found expired lock with pending work, promoting', {
|
|
408
|
+
issueId,
|
|
409
|
+
pendingCount: count,
|
|
410
|
+
});
|
|
411
|
+
const work = await promoteNextPendingWork(issueId);
|
|
412
|
+
if (work) {
|
|
413
|
+
promoted++;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
if (promoted > 0) {
|
|
419
|
+
log.info('Promoted pending work from expired locks', { promoted });
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
catch (error) {
|
|
423
|
+
log.error('Failed to cleanup expired locks', { error });
|
|
424
|
+
}
|
|
425
|
+
return promoted;
|
|
426
|
+
}
|
|
427
|
+
// Statuses where a session should NOT be holding an issue lock.
|
|
428
|
+
// Terminal: session finished (completed/failed/stopped) but lock release failed.
|
|
429
|
+
// Pending: session was reset by orphan cleanup but lock wasn't released (see orphan-cleanup.ts).
|
|
430
|
+
const STALE_LOCK_STATUSES = new Set(['completed', 'failed', 'stopped', 'pending']);
|
|
431
|
+
/**
|
|
432
|
+
* Release issue locks held by sessions that should no longer hold them.
|
|
433
|
+
*
|
|
434
|
+
* This handles cases where:
|
|
435
|
+
* - A session completes but the lock release failed (network error during cleanup)
|
|
436
|
+
* - Orphan cleanup resets a session to 'pending' but the lock wasn't released
|
|
437
|
+
*
|
|
438
|
+
* The lock's 2-hour TTL would eventually expire, but this proactively clears it
|
|
439
|
+
* when workers have idle capacity.
|
|
440
|
+
*
|
|
441
|
+
* Only runs when workers are online -- if no workers are available, there's no
|
|
442
|
+
* point promoting parked work since nothing can pick it up.
|
|
443
|
+
*
|
|
444
|
+
* @param hasIdleWorkers - true if at least one worker is online with spare capacity
|
|
445
|
+
* @returns Number of stale locks released and parked work promoted
|
|
446
|
+
*/
|
|
447
|
+
export async function cleanupStaleLocksWithIdleWorkers(hasIdleWorkers) {
|
|
448
|
+
if (!isRedisConfigured())
|
|
449
|
+
return 0;
|
|
450
|
+
if (!hasIdleWorkers)
|
|
451
|
+
return 0;
|
|
452
|
+
let promoted = 0;
|
|
453
|
+
try {
|
|
454
|
+
// Find all issue locks
|
|
455
|
+
const lockKeys = await redisKeys(`${LOCK_PREFIX}*`);
|
|
456
|
+
for (const lockKey of lockKeys) {
|
|
457
|
+
const issueId = lockKey.replace(LOCK_PREFIX, '');
|
|
458
|
+
// Skip keys that look like pending queue keys (contain extra colons)
|
|
459
|
+
if (issueId.includes(':'))
|
|
460
|
+
continue;
|
|
461
|
+
const lock = await redisGet(lockKey);
|
|
462
|
+
if (!lock)
|
|
463
|
+
continue;
|
|
464
|
+
// Check if the lock holder's session is in a terminal state
|
|
465
|
+
const session = await getSessionState(lock.sessionId);
|
|
466
|
+
if (!session) {
|
|
467
|
+
// Session expired from Redis (24h TTL) but lock remains (2h TTL)
|
|
468
|
+
// Safe to release -- the session is long gone
|
|
469
|
+
log.info('Releasing lock for expired session', {
|
|
470
|
+
issueId,
|
|
471
|
+
sessionId: lock.sessionId,
|
|
472
|
+
issueIdentifier: lock.issueIdentifier,
|
|
473
|
+
});
|
|
474
|
+
await releaseIssueLock(issueId);
|
|
475
|
+
const work = await promoteNextPendingWork(issueId);
|
|
476
|
+
if (work)
|
|
477
|
+
promoted++;
|
|
478
|
+
continue;
|
|
479
|
+
}
|
|
480
|
+
if (STALE_LOCK_STATUSES.has(session.status)) {
|
|
481
|
+
log.info('Releasing stale lock (session no longer needs lock)', {
|
|
482
|
+
issueId,
|
|
483
|
+
sessionId: lock.sessionId,
|
|
484
|
+
sessionStatus: session.status,
|
|
485
|
+
issueIdentifier: lock.issueIdentifier,
|
|
486
|
+
lockAge: Math.round((Date.now() - lock.lockedAt) / 1000),
|
|
487
|
+
});
|
|
488
|
+
await releaseIssueLock(issueId);
|
|
489
|
+
const work = await promoteNextPendingWork(issueId);
|
|
490
|
+
if (work) {
|
|
491
|
+
promoted++;
|
|
492
|
+
log.info('Promoted parked work after stale lock cleanup', {
|
|
493
|
+
issueId,
|
|
494
|
+
promotedSessionId: work.sessionId,
|
|
495
|
+
promotedWorkType: work.workType,
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
if (promoted > 0) {
|
|
501
|
+
log.info('Promoted parked work from stale locks', { promoted });
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
catch (error) {
|
|
505
|
+
log.error('Failed to cleanup stale locks', { error });
|
|
506
|
+
}
|
|
507
|
+
return promoted;
|
|
508
|
+
}
|