@desplega.ai/agent-swarm 1.86.0 → 1.87.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +72 -1
- package/package.json +3 -1
- package/src/be/db-queries/tracker.ts +21 -0
- package/src/be/db.ts +235 -14
- package/src/be/migrations/079_task_followup_config.sql +1 -0
- package/src/be/modelsdev-cache.json +77663 -74073
- package/src/cli.tsx +26 -0
- package/src/commands/context-preamble.ts +272 -0
- package/src/commands/e2b.ts +728 -0
- package/src/commands/resume-session.ts +35 -78
- package/src/commands/runner.ts +125 -13
- package/src/e2b/dispatch.ts +429 -0
- package/src/e2b/env.ts +206 -0
- package/src/heartbeat/heartbeat.ts +145 -30
- package/src/heartbeat/templates.ts +11 -7
- package/src/http/session-data.ts +8 -1
- package/src/http/tasks.ts +152 -3
- package/src/jira/sync.ts +4 -4
- package/src/linear/sync.ts +6 -5
- package/src/providers/claude-adapter.ts +10 -76
- package/src/providers/claude-managed-adapter.ts +61 -75
- package/src/providers/codex-adapter.ts +15 -18
- package/src/providers/codex-oauth/auth-json.ts +18 -1
- package/src/providers/codex-oauth/flow.ts +24 -1
- package/src/providers/types.ts +6 -0
- package/src/tasks/worker-follow-up.ts +162 -2
- package/src/telemetry.ts +11 -1
- package/src/tests/claude-adapter.test.ts +5 -27
- package/src/tests/claude-managed-adapter.test.ts +38 -52
- package/src/tests/codex-adapter.test.ts +6 -31
- package/src/tests/codex-oauth.test.ts +149 -3
- package/src/tests/codex-pool.test.ts +14 -3
- package/src/tests/e2b-dispatch.test.ts +330 -0
- package/src/tests/heartbeat-supersede-resume.test.ts +285 -0
- package/src/tests/heartbeat.test.ts +26 -16
- package/src/tests/prompt-template-remaining.test.ts +4 -0
- package/src/tests/resume-session.test.ts +42 -50
- package/src/tests/structured-output.test.ts +69 -0
- package/src/tests/task-completion-idempotency.test.ts +185 -2
- package/src/tests/task-supersede-resume.test.ts +722 -0
- package/src/tests/telemetry-init.test.ts +69 -0
- package/src/tests/vcs-tracking.test.ts +39 -0
- package/src/tools/send-task.ts +12 -1
- package/src/tools/store-progress.ts +2 -2
- package/src/tools/templates.ts +14 -2
- package/src/types.ts +46 -1
- package/src/workflows/executors/agent-task.ts +3 -0
|
@@ -17,18 +17,27 @@ import {
|
|
|
17
17
|
getTaskStats,
|
|
18
18
|
getTasksByStatus,
|
|
19
19
|
getUnassignedPoolTasks,
|
|
20
|
+
hasNonTerminalResumeChild,
|
|
20
21
|
releaseStaleMentionProcessing,
|
|
21
22
|
releaseStaleProcessingInbox,
|
|
22
23
|
releaseStaleReviewingTasks,
|
|
24
|
+
supersedeTask,
|
|
23
25
|
updateAgentStatus,
|
|
24
26
|
} from "../be/db";
|
|
25
27
|
import { resolveTemplate } from "../prompts/resolver";
|
|
28
|
+
import { createResumeFollowUp } from "../tasks/worker-follow-up";
|
|
26
29
|
import type { AgentTask } from "../types";
|
|
27
30
|
import { getExecutorRegistry } from "../workflows";
|
|
28
31
|
import { recoverIncompleteRuns } from "../workflows/recovery";
|
|
29
32
|
// Side-effect import: registers heartbeat event templates in the in-memory registry
|
|
30
33
|
import "./templates";
|
|
31
34
|
|
|
35
|
+
/**
|
|
36
|
+
* System tasks that must NOT be auto-resumed — mirrors `runRebootSweep`'s exclusion list
|
|
37
|
+
* to prevent infinite retry loops on the heartbeat/triage system tasks themselves.
|
|
38
|
+
*/
|
|
39
|
+
const SKIP_AUTO_RESUME_TYPES = new Set(["heartbeat-checklist", "boot-triage", "heartbeat"]);
|
|
40
|
+
|
|
32
41
|
// ============================================================================
|
|
33
42
|
// Configuration (env var overrides)
|
|
34
43
|
// ============================================================================
|
|
@@ -65,6 +74,12 @@ const HEARTBEAT_CHECKLIST_DISABLE = Boolean(process.env.HEARTBEAT_CHECKLIST_DISA
|
|
|
65
74
|
export interface HeartbeatFindings {
|
|
66
75
|
stalledTasks: AgentTask[];
|
|
67
76
|
autoFailedTasks: Array<{ taskId: string; agentId: string; reason: string }>;
|
|
77
|
+
autoResumedTasks: Array<{
|
|
78
|
+
taskId: string;
|
|
79
|
+
resumeTaskId: string;
|
|
80
|
+
agentId: string;
|
|
81
|
+
reason: string;
|
|
82
|
+
}>;
|
|
68
83
|
workerHealthFixes: Array<{ agentId: string; oldStatus: string; newStatus: string }>;
|
|
69
84
|
autoAssigned: Array<{ taskId: string; agentId: string }>;
|
|
70
85
|
staleCleanup: {
|
|
@@ -128,6 +143,7 @@ export async function codeLevelTriage(): Promise<HeartbeatFindings> {
|
|
|
128
143
|
const findings: HeartbeatFindings = {
|
|
129
144
|
stalledTasks: [],
|
|
130
145
|
autoFailedTasks: [],
|
|
146
|
+
autoResumedTasks: [],
|
|
131
147
|
workerHealthFixes: [],
|
|
132
148
|
autoAssigned: [],
|
|
133
149
|
staleCleanup: {
|
|
@@ -175,19 +191,13 @@ function detectAndRemediateStalledTasks(findings: HeartbeatFindings): void {
|
|
|
175
191
|
if (!session) {
|
|
176
192
|
// Case A: No active session — worker is dead
|
|
177
193
|
if (taskAgeMs >= STALL_THRESHOLD_NO_SESSION_MIN * 60 * 1000) {
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
// Fix agent status if no other active tasks
|
|
186
|
-
const remaining = getActiveTaskCount(task.agentId);
|
|
187
|
-
if (remaining === 0) {
|
|
188
|
-
updateAgentStatus(task.agentId, "idle");
|
|
189
|
-
}
|
|
190
|
-
}
|
|
194
|
+
remediateCrashedWorkerTask(findings, task, {
|
|
195
|
+
supersedeReason:
|
|
196
|
+
"Auto-superseded by heartbeat: worker session not found (no active session for task)",
|
|
197
|
+
legacyFailReason:
|
|
198
|
+
"Auto-failed by heartbeat: worker session not found (no active session for task)",
|
|
199
|
+
shortLabel: "no active session",
|
|
200
|
+
});
|
|
191
201
|
}
|
|
192
202
|
} else {
|
|
193
203
|
const sessionHeartbeatAgeMs = Date.now() - new Date(session.lastHeartbeatAt).getTime();
|
|
@@ -197,21 +207,14 @@ function detectAndRemediateStalledTasks(findings: HeartbeatFindings): void {
|
|
|
197
207
|
if (isStaleHeartbeat) {
|
|
198
208
|
// Case B: Session exists but heartbeat is stale — worker likely crashed
|
|
199
209
|
if (taskAgeMs >= STALL_THRESHOLD_STALE_HEARTBEAT_MIN * 60 * 1000) {
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
);
|
|
209
|
-
|
|
210
|
-
const remaining = getActiveTaskCount(task.agentId);
|
|
211
|
-
if (remaining === 0) {
|
|
212
|
-
updateAgentStatus(task.agentId, "idle");
|
|
213
|
-
}
|
|
214
|
-
}
|
|
210
|
+
remediateCrashedWorkerTask(findings, task, {
|
|
211
|
+
supersedeReason:
|
|
212
|
+
"Auto-superseded by heartbeat: worker session heartbeat is stale (likely crashed)",
|
|
213
|
+
legacyFailReason:
|
|
214
|
+
"Auto-failed by heartbeat: worker session heartbeat is stale (likely crashed)",
|
|
215
|
+
shortLabel: "stale session heartbeat",
|
|
216
|
+
cleanupActiveSession: true,
|
|
217
|
+
});
|
|
215
218
|
}
|
|
216
219
|
} else {
|
|
217
220
|
// Case C: Session exists and heartbeat is fresh — ambiguous
|
|
@@ -223,6 +226,115 @@ function detectAndRemediateStalledTasks(findings: HeartbeatFindings): void {
|
|
|
223
226
|
}
|
|
224
227
|
}
|
|
225
228
|
|
|
229
|
+
/**
|
|
230
|
+
* Shared remediation for Cases A (no active session) and B (stale heartbeat) of the
|
|
231
|
+
* stalled-task detector. Prefers the supersede → resume follow-up path (DES-523) so a
|
|
232
|
+
* crashed worker's task gets a fresh "resume" sibling instead of being silently dropped.
|
|
233
|
+
*
|
|
234
|
+
* Falls back to the legacy `failTask` path when:
|
|
235
|
+
* - the task is a system task (heartbeat / boot-triage) — would loop forever,
|
|
236
|
+
* - a non-terminal child already exists — a prior sweep already created a resume,
|
|
237
|
+
* - `createResumeFollowUp` returns `workflow-skip` — workflow engine owns retries.
|
|
238
|
+
*/
|
|
239
|
+
function remediateCrashedWorkerTask(
|
|
240
|
+
findings: HeartbeatFindings,
|
|
241
|
+
task: AgentTask,
|
|
242
|
+
opts: {
|
|
243
|
+
supersedeReason: string;
|
|
244
|
+
legacyFailReason: string;
|
|
245
|
+
shortLabel: string;
|
|
246
|
+
cleanupActiveSession?: boolean;
|
|
247
|
+
},
|
|
248
|
+
): void {
|
|
249
|
+
if (!task.agentId) return; // Type guard — caller already checked.
|
|
250
|
+
|
|
251
|
+
const skipAutoResume = SKIP_AUTO_RESUME_TYPES.has(task.taskType ?? "");
|
|
252
|
+
// Workflow-step tasks: skip supersede entirely so the engine's retry policy
|
|
253
|
+
// owns recovery. `createResumeFollowUp` would also bail with `workflow-skip`,
|
|
254
|
+
// but checking here avoids leaving the parent in `superseded` with a dangling
|
|
255
|
+
// dedicated-reason `failTask` no-op chasing it.
|
|
256
|
+
const isWorkflowStep = task.workflowRunStepId != null;
|
|
257
|
+
// Idempotency: if a non-terminal `resume` child already exists for this
|
|
258
|
+
// parent, a prior sweep already created the resume — fall back to the
|
|
259
|
+
// legacy fail path. We filter on `taskType = 'resume'` specifically (not
|
|
260
|
+
// any child task) because `send-task` auto-defaults `parentTaskId` to the
|
|
261
|
+
// caller's current task, so a crashed worker with delegated subtasks
|
|
262
|
+
// would otherwise be incorrectly skipped (PR #594 review).
|
|
263
|
+
const alreadyResumed = !skipAutoResume && !isWorkflowStep && hasNonTerminalResumeChild(task.id);
|
|
264
|
+
|
|
265
|
+
if (isWorkflowStep) {
|
|
266
|
+
const failed = failTask(task.id, "superseded_workflow_task");
|
|
267
|
+
if (failed) {
|
|
268
|
+
findings.autoFailedTasks.push({
|
|
269
|
+
taskId: task.id,
|
|
270
|
+
agentId: task.agentId,
|
|
271
|
+
reason: "superseded_workflow_task",
|
|
272
|
+
});
|
|
273
|
+
if (opts.cleanupActiveSession) deleteActiveSession(task.id);
|
|
274
|
+
console.log(
|
|
275
|
+
`[Heartbeat] Workflow-step task ${task.id.slice(0, 8)} failed — engine will handle retry (${opts.shortLabel})`,
|
|
276
|
+
);
|
|
277
|
+
const remaining = getActiveTaskCount(task.agentId);
|
|
278
|
+
if (remaining === 0) updateAgentStatus(task.agentId, "idle");
|
|
279
|
+
}
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (skipAutoResume || alreadyResumed) {
|
|
284
|
+
const failed = failTask(task.id, opts.legacyFailReason);
|
|
285
|
+
if (failed) {
|
|
286
|
+
findings.autoFailedTasks.push({
|
|
287
|
+
taskId: task.id,
|
|
288
|
+
agentId: task.agentId,
|
|
289
|
+
reason: opts.legacyFailReason,
|
|
290
|
+
});
|
|
291
|
+
if (opts.cleanupActiveSession) deleteActiveSession(task.id);
|
|
292
|
+
console.log(
|
|
293
|
+
`[Heartbeat] Auto-failed task ${task.id.slice(0, 8)} — ${opts.shortLabel} (${
|
|
294
|
+
skipAutoResume ? "skipRetry taskType" : "resume already exists"
|
|
295
|
+
})`,
|
|
296
|
+
);
|
|
297
|
+
const remaining = getActiveTaskCount(task.agentId);
|
|
298
|
+
if (remaining === 0) updateAgentStatus(task.agentId, "idle");
|
|
299
|
+
}
|
|
300
|
+
return;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Supersede + resume path.
|
|
304
|
+
const superseded = supersedeTask(task.id, {
|
|
305
|
+
reason: opts.supersedeReason,
|
|
306
|
+
resumeTaskId: null,
|
|
307
|
+
});
|
|
308
|
+
if (!superseded) return;
|
|
309
|
+
|
|
310
|
+
const resume = createResumeFollowUp({ parentId: task.id, reason: "crash_recovery" });
|
|
311
|
+
|
|
312
|
+
if (resume.kind === "created") {
|
|
313
|
+
findings.autoResumedTasks.push({
|
|
314
|
+
taskId: task.id,
|
|
315
|
+
resumeTaskId: resume.task.id,
|
|
316
|
+
agentId: task.agentId,
|
|
317
|
+
reason: opts.supersedeReason,
|
|
318
|
+
});
|
|
319
|
+
console.log(
|
|
320
|
+
`[Heartbeat] Auto-superseded task ${task.id.slice(0, 8)} — created resume ${resume.task.id.slice(0, 8)} (${opts.shortLabel})`,
|
|
321
|
+
);
|
|
322
|
+
} else {
|
|
323
|
+
// `workflow-skip` is unreachable here (handled above). `skipped` covers
|
|
324
|
+
// parent-not-found / lead-not-found edge cases — just log for operators.
|
|
325
|
+
console.log(
|
|
326
|
+
`[Heartbeat] Task ${task.id.slice(0, 8)} superseded but no resume created (${
|
|
327
|
+
resume.kind === "skipped" ? resume.reason : "workflow-skip"
|
|
328
|
+
})`,
|
|
329
|
+
);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
if (opts.cleanupActiveSession) deleteActiveSession(task.id);
|
|
333
|
+
|
|
334
|
+
const remaining = getActiveTaskCount(task.agentId);
|
|
335
|
+
if (remaining === 0) updateAgentStatus(task.agentId, "idle");
|
|
336
|
+
}
|
|
337
|
+
|
|
226
338
|
/**
|
|
227
339
|
* Aggressive sweep that runs once after server restart.
|
|
228
340
|
* Ignores age thresholds — any in_progress task with no active session is auto-failed.
|
|
@@ -268,8 +380,7 @@ export async function runRebootSweep(): Promise<void> {
|
|
|
268
380
|
}
|
|
269
381
|
|
|
270
382
|
// Don't retry system-generated heartbeat tasks
|
|
271
|
-
|
|
272
|
-
if (skipRetryTypes.includes(task.taskType ?? "")) {
|
|
383
|
+
if (SKIP_AUTO_RESUME_TYPES.has(task.taskType ?? "")) {
|
|
273
384
|
rebootAffectedTasks.push({ original: failed, retryTaskId: null });
|
|
274
385
|
continue;
|
|
275
386
|
}
|
|
@@ -674,6 +785,7 @@ export async function runHeartbeatSweep(): Promise<void> {
|
|
|
674
785
|
const cleanupOnlyFindings: HeartbeatFindings = {
|
|
675
786
|
stalledTasks: [],
|
|
676
787
|
autoFailedTasks: [],
|
|
788
|
+
autoResumedTasks: [],
|
|
677
789
|
workerHealthFixes: [],
|
|
678
790
|
autoAssigned: [],
|
|
679
791
|
staleCleanup: {
|
|
@@ -708,6 +820,9 @@ function logFindings(findings: HeartbeatFindings): void {
|
|
|
708
820
|
if (findings.autoFailedTasks.length > 0) {
|
|
709
821
|
parts.push(`auto_failed=${findings.autoFailedTasks.length}`);
|
|
710
822
|
}
|
|
823
|
+
if (findings.autoResumedTasks.length > 0) {
|
|
824
|
+
parts.push(`auto_resumed=${findings.autoResumedTasks.length}`);
|
|
825
|
+
}
|
|
711
826
|
if (findings.stalledTasks.length > 0) {
|
|
712
827
|
parts.push(`stalled=${findings.stalledTasks.length}`);
|
|
713
828
|
}
|
|
@@ -78,13 +78,17 @@ The API server has just restarted (deployment, pod rotation, or crash). An aggre
|
|
|
78
78
|
- If a retry failed or is stuck, re-create the task manually
|
|
79
79
|
- If the work is no longer needed, cancel the retry task
|
|
80
80
|
- You MUST address every item — do NOT skip this section
|
|
81
|
-
2. **
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
5.
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
81
|
+
2. **Verify supersede + resume worked end-to-end.** Worker crashes / OOMs are recovered via supersede (parent → \`superseded\`) + a fresh \`taskType=resume\` child created by the heartbeat sweep (DES-523). Sanity check:
|
|
82
|
+
- List recent \`superseded\` tasks: \`list-tasks status=superseded\` (last ~hour).
|
|
83
|
+
- For each, confirm a child task with \`taskType=resume\` and a non-terminal status exists. If a superseded task is missing its resume child, the work is silently dropped — recreate the task manually.
|
|
84
|
+
- Look for \`in_progress\` tasks older than 5 min on agents that show as offline — the sweep should have caught them. If any remain, recreate as needed.
|
|
85
|
+
3. **Check orphaned tasks.** If the "Orphaned Tasks" section lists pending/offered tasks assigned to offline workers, re-assign or cancel them.
|
|
86
|
+
4. Review agent status — are all expected workers online? If not, note which are missing.
|
|
87
|
+
5. Review your standing orders for any post-reboot checks.
|
|
88
|
+
6. Take action using your available tools.
|
|
89
|
+
7. Complete this task with a summary of what you found and what actions you took. Include the status of each reboot-interrupted task.
|
|
90
|
+
8. Do NOT create another boot-triage task — this is a one-off event.
|
|
91
|
+
9. **Update your standing orders** — If the reboot revealed a pattern worth monitoring (e.g., frequent restarts, specific tasks that keep failing), add a standing order to HEARTBEAT.md via \`update-profile\` with \`heartbeatMd\`.`,
|
|
88
92
|
variables: [
|
|
89
93
|
{
|
|
90
94
|
name: "system_status",
|
package/src/http/session-data.ts
CHANGED
|
@@ -46,6 +46,13 @@ const getSessionLogsByTask = route({
|
|
|
46
46
|
summary: "Get session logs for a task",
|
|
47
47
|
tags: ["Session Data"],
|
|
48
48
|
params: z.object({ taskId: z.string() }),
|
|
49
|
+
query: z.object({
|
|
50
|
+
// When set, returns the last N log rows ordered ASC. Used by the
|
|
51
|
+
// resume context preamble to avoid pulling the full log set over HTTP
|
|
52
|
+
// just to slice the tail. Server-side limit prevents OOM / slow
|
|
53
|
+
// dispatch for tasks with very long run history (PR #594 review).
|
|
54
|
+
limit: z.coerce.number().int().min(1).max(1000).optional(),
|
|
55
|
+
}),
|
|
49
56
|
responses: {
|
|
50
57
|
200: { description: "Session logs" },
|
|
51
58
|
404: { description: "Task not found" },
|
|
@@ -181,7 +188,7 @@ export async function handleSessionData(
|
|
|
181
188
|
jsonError(res, "Task not found", 404);
|
|
182
189
|
return true;
|
|
183
190
|
}
|
|
184
|
-
const logs = getSessionLogsByTaskId(parsed.params.taskId);
|
|
191
|
+
const logs = getSessionLogsByTaskId(parsed.params.taskId, parsed.query?.limit);
|
|
185
192
|
json(res, { logs });
|
|
186
193
|
return true;
|
|
187
194
|
}
|
package/src/http/tasks.ts
CHANGED
|
@@ -16,20 +16,23 @@ import {
|
|
|
16
16
|
getUserById,
|
|
17
17
|
pauseTask,
|
|
18
18
|
resumeTask,
|
|
19
|
+
supersedeTask,
|
|
19
20
|
updateAgentStatusFromCapacity,
|
|
20
21
|
updateTaskClaudeSessionId,
|
|
21
22
|
updateTaskProgress,
|
|
22
23
|
updateTaskVcs,
|
|
23
24
|
} from "../be/db";
|
|
24
25
|
import { createTaskWithSiblingAwareness } from "../tasks/sibling-awareness";
|
|
25
|
-
import { createWorkerTaskFollowUp } from "../tasks/worker-follow-up";
|
|
26
|
+
import { createResumeFollowUp, createWorkerTaskFollowUp } from "../tasks/worker-follow-up";
|
|
26
27
|
import { telemetry } from "../telemetry";
|
|
27
28
|
import {
|
|
28
29
|
type AgentTaskSource,
|
|
29
30
|
AgentTaskSourceSchema,
|
|
30
31
|
type AgentTaskStatus,
|
|
31
32
|
AgentTaskStatusSchema,
|
|
33
|
+
isTerminalTaskStatus,
|
|
32
34
|
ProviderNameSchema,
|
|
35
|
+
ResumeReasonSchema,
|
|
33
36
|
} from "../types";
|
|
34
37
|
import { route } from "./route-def";
|
|
35
38
|
import { json, jsonError } from "./utils";
|
|
@@ -234,6 +237,25 @@ const resumeTaskRoute = route({
|
|
|
234
237
|
},
|
|
235
238
|
});
|
|
236
239
|
|
|
240
|
+
const supersedeTaskRoute = route({
|
|
241
|
+
method: "post",
|
|
242
|
+
path: "/api/tasks/{id}/supersede",
|
|
243
|
+
pattern: ["api", "tasks", null, "supersede"],
|
|
244
|
+
summary: "Supersede an in-progress task (terminate + spawn resume follow-up)",
|
|
245
|
+
description:
|
|
246
|
+
'Marks the original task `superseded` (terminal) and creates a fresh `taskType="resume"` follow-up so a worker can pick up the work in a new provider session. Workflow-step tasks (those with `workflowRunStepId`) are carved out: the original is marked `failed` with reason `superseded_workflow_task` and no follow-up is created — the workflow engine\'s retry/failure policy applies.',
|
|
247
|
+
tags: ["Tasks"],
|
|
248
|
+
params: z.object({ id: z.string() }),
|
|
249
|
+
body: z.object({ reason: ResumeReasonSchema }),
|
|
250
|
+
auth: { apiKey: true, agentId: true },
|
|
251
|
+
responses: {
|
|
252
|
+
200: { description: "Task superseded (or workflow-failed)" },
|
|
253
|
+
400: { description: "Task not in_progress" },
|
|
254
|
+
403: { description: "Task belongs to another agent" },
|
|
255
|
+
404: { description: "Task not found" },
|
|
256
|
+
},
|
|
257
|
+
});
|
|
258
|
+
|
|
237
259
|
const updateTaskVcsRoute = route({
|
|
238
260
|
method: "patch",
|
|
239
261
|
path: "/api/tasks/{id}/vcs",
|
|
@@ -429,8 +451,7 @@ export async function handleTasks(
|
|
|
429
451
|
return true;
|
|
430
452
|
}
|
|
431
453
|
|
|
432
|
-
|
|
433
|
-
if (terminalStatuses.includes(task.status)) {
|
|
454
|
+
if (isTerminalTaskStatus(task.status)) {
|
|
434
455
|
jsonError(res, `Cannot cancel task with status '${task.status}'`, 400);
|
|
435
456
|
return true;
|
|
436
457
|
}
|
|
@@ -776,5 +797,133 @@ export async function handleTasks(
|
|
|
776
797
|
return true;
|
|
777
798
|
}
|
|
778
799
|
|
|
800
|
+
if (supersedeTaskRoute.match(req.method, pathSegments)) {
|
|
801
|
+
const parsed = await supersedeTaskRoute.parse(req, res, pathSegments, queryParams);
|
|
802
|
+
if (!parsed) return true;
|
|
803
|
+
const task = getTaskById(parsed.params.id);
|
|
804
|
+
|
|
805
|
+
if (!task) {
|
|
806
|
+
jsonError(res, "Task not found", 404);
|
|
807
|
+
return true;
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
if (myAgentId && task.agentId !== myAgentId) {
|
|
811
|
+
jsonError(res, "Task belongs to another agent", 403);
|
|
812
|
+
return true;
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
// Idempotency: if already terminal, return the alreadyFinished-shaped
|
|
816
|
+
// response (mirrors finishTask). Caller treats this as a successful
|
|
817
|
+
// supersede.
|
|
818
|
+
if (isTerminalTaskStatus(task.status)) {
|
|
819
|
+
json(res, {
|
|
820
|
+
success: true,
|
|
821
|
+
kind: "alreadyFinished",
|
|
822
|
+
task,
|
|
823
|
+
resumeTaskId: null,
|
|
824
|
+
});
|
|
825
|
+
return true;
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
if (task.status !== "in_progress") {
|
|
829
|
+
jsonError(res, `Task status is '${task.status}', not 'in_progress'`, 400);
|
|
830
|
+
return true;
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
// Workflow-step tasks: fail back to the engine instead of superseding.
|
|
834
|
+
// Check this BEFORE the supersede UPDATE so we don't leave a workflow
|
|
835
|
+
// step in `superseded` if the engine expects `failed`.
|
|
836
|
+
if (task.workflowRunStepId != null) {
|
|
837
|
+
const failed = failTask(parsed.params.id, "superseded_workflow_task");
|
|
838
|
+
ensure({
|
|
839
|
+
id: "task.workflow_step_failed_on_supersede",
|
|
840
|
+
flow: "task",
|
|
841
|
+
runId: parsed.params.id,
|
|
842
|
+
data: {
|
|
843
|
+
taskId: parsed.params.id,
|
|
844
|
+
agentId: task.agentId,
|
|
845
|
+
stepId: task.workflowRunStepId,
|
|
846
|
+
reason: parsed.body.reason,
|
|
847
|
+
},
|
|
848
|
+
});
|
|
849
|
+
json(res, {
|
|
850
|
+
success: true,
|
|
851
|
+
kind: "workflow-failed",
|
|
852
|
+
task: failed,
|
|
853
|
+
resumeTaskId: null,
|
|
854
|
+
});
|
|
855
|
+
return true;
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
// Supersede FIRST (atomic + idempotent in db.ts) so we don't orphan a
|
|
859
|
+
// resume child if a worker races to complete/fail/cancel between the
|
|
860
|
+
// pre-read status check and the supersede UPDATE.
|
|
861
|
+
const superseded = supersedeTask(parsed.params.id, {
|
|
862
|
+
reason: parsed.body.reason,
|
|
863
|
+
// resumeTaskId is attached AFTER the child is created. Lost race here
|
|
864
|
+
// means no child is created at all, so the log entry's null is accurate.
|
|
865
|
+
resumeTaskId: null,
|
|
866
|
+
});
|
|
867
|
+
if (!superseded) {
|
|
868
|
+
// Worker won the race (terminal transition between status check and
|
|
869
|
+
// this UPDATE). Treat as `alreadyFinished` — no resume child is created.
|
|
870
|
+
const fresh = getTaskById(parsed.params.id);
|
|
871
|
+
json(res, {
|
|
872
|
+
success: true,
|
|
873
|
+
kind: "alreadyFinished",
|
|
874
|
+
task: fresh,
|
|
875
|
+
resumeTaskId: null,
|
|
876
|
+
});
|
|
877
|
+
return true;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
// Parent is now superseded. Create the resume child.
|
|
881
|
+
const followUp = createResumeFollowUp({
|
|
882
|
+
parentId: parsed.params.id,
|
|
883
|
+
reason: parsed.body.reason,
|
|
884
|
+
});
|
|
885
|
+
|
|
886
|
+
// `workflow-skip` is unreachable here (workflow-step path branched above).
|
|
887
|
+
// `skipped` covers parent_not_found / lead_not_found edge cases — the
|
|
888
|
+
// supersede already landed, so log + roll forward without a resume task.
|
|
889
|
+
if (followUp.kind !== "created") {
|
|
890
|
+
console.warn(
|
|
891
|
+
`[Supersede] Task ${parsed.params.id.slice(0, 8)} superseded but resume creation skipped (${
|
|
892
|
+
followUp.kind === "skipped" ? followUp.reason : followUp.kind
|
|
893
|
+
})`,
|
|
894
|
+
);
|
|
895
|
+
json(res, {
|
|
896
|
+
success: true,
|
|
897
|
+
kind: "resumed",
|
|
898
|
+
task: superseded,
|
|
899
|
+
resumeTaskId: null,
|
|
900
|
+
});
|
|
901
|
+
return true;
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
const resumeTaskId = followUp.task.id;
|
|
905
|
+
|
|
906
|
+
ensure({
|
|
907
|
+
id: "task.superseded",
|
|
908
|
+
flow: "task",
|
|
909
|
+
runId: parsed.params.id,
|
|
910
|
+
data: {
|
|
911
|
+
taskId: parsed.params.id,
|
|
912
|
+
agentId: task.agentId,
|
|
913
|
+
reason: parsed.body.reason,
|
|
914
|
+
resumeTaskId,
|
|
915
|
+
},
|
|
916
|
+
});
|
|
917
|
+
|
|
918
|
+
json(res, {
|
|
919
|
+
success: true,
|
|
920
|
+
kind: "resumed",
|
|
921
|
+
task: superseded,
|
|
922
|
+
resumeTaskId,
|
|
923
|
+
resumeTaskStatus: followUp.task.status,
|
|
924
|
+
});
|
|
925
|
+
return true;
|
|
926
|
+
}
|
|
927
|
+
|
|
779
928
|
return false;
|
|
780
929
|
}
|
package/src/jira/sync.ts
CHANGED
|
@@ -25,7 +25,7 @@ import { ensureToken, ensureTokenOrThrow } from "../oauth/ensure-token";
|
|
|
25
25
|
import { resolveTemplate } from "../prompts/resolver";
|
|
26
26
|
import { buildJiraContextKey } from "../tasks/context-key";
|
|
27
27
|
import { createTaskWithSiblingAwareness } from "../tasks/sibling-awareness";
|
|
28
|
-
import type
|
|
28
|
+
import { type Agent, isTerminalTaskStatus } from "../types";
|
|
29
29
|
import { extractMentions, extractText } from "./adf";
|
|
30
30
|
import { getJiraMetadata } from "./metadata";
|
|
31
31
|
// Side-effect import: registers all Jira event templates in the prompt registry
|
|
@@ -252,7 +252,7 @@ export async function handleIssueEvent(event: Record<string, unknown>): Promise<
|
|
|
252
252
|
|
|
253
253
|
// Pre-existing — branch on prior task state.
|
|
254
254
|
const priorTask = claim.sync.swarmId ? getTaskById(claim.sync.swarmId) : null;
|
|
255
|
-
if (priorTask && !
|
|
255
|
+
if (priorTask && !isTerminalTaskStatus(priorTask.status)) {
|
|
256
256
|
// In-progress: do not duplicate. Match Linear's behavior of acknowledging
|
|
257
257
|
// and continuing with the existing task.
|
|
258
258
|
console.log(
|
|
@@ -408,7 +408,7 @@ async function routeCommentOnExistingSync(input: {
|
|
|
408
408
|
syncRow: { id: string; swarmId: string };
|
|
409
409
|
}): Promise<void> {
|
|
410
410
|
const priorTask = input.syncRow.swarmId ? getTaskById(input.syncRow.swarmId) : null;
|
|
411
|
-
if (priorTask && !
|
|
411
|
+
if (priorTask && !isTerminalTaskStatus(priorTask.status)) {
|
|
412
412
|
// In-progress: log and ignore (mirrors Linear's prompted-on-active path).
|
|
413
413
|
console.log(
|
|
414
414
|
`[Jira Sync] Bot mentioned on issue ${input.issueKey} but task ${priorTask.id} still ${priorTask.status} — ignoring`,
|
|
@@ -440,7 +440,7 @@ export async function handleIssueDeleteEvent(event: Record<string, unknown>): Pr
|
|
|
440
440
|
if (!sync) return;
|
|
441
441
|
|
|
442
442
|
const task = sync.swarmId ? getTaskById(sync.swarmId) : null;
|
|
443
|
-
if (task && !
|
|
443
|
+
if (task && !isTerminalTaskStatus(task.status)) {
|
|
444
444
|
cancelTask(sync.swarmId, "Jira issue deleted");
|
|
445
445
|
console.log(
|
|
446
446
|
`[Jira Sync] Cancelled task ${sync.swarmId} (Jira issue ${issue.key ?? issue.id} deleted)`,
|
package/src/linear/sync.ts
CHANGED
|
@@ -11,6 +11,7 @@ import { ensureToken } from "../oauth/ensure-token";
|
|
|
11
11
|
import { resolveTemplate } from "../prompts/resolver";
|
|
12
12
|
import { linearContextKey } from "../tasks/context-key";
|
|
13
13
|
import { createTaskWithSiblingAwareness } from "../tasks/sibling-awareness";
|
|
14
|
+
import { isTerminalTaskStatus } from "../types";
|
|
14
15
|
import {
|
|
15
16
|
buildSkipMessage,
|
|
16
17
|
getLinearGateConfig,
|
|
@@ -503,7 +504,7 @@ export async function handleAgentSessionEvent(event: Record<string, unknown>): P
|
|
|
503
504
|
// session can be closed. Do NOT create a duplicate swarm task. If the user
|
|
504
505
|
// wants to force a fresh run, they can re-assign the issue after the
|
|
505
506
|
// current task finishes.
|
|
506
|
-
if (existingTask && !
|
|
507
|
+
if (existingTask && !isTerminalTaskStatus(existingTask.status)) {
|
|
507
508
|
console.log(
|
|
508
509
|
`[Linear Sync] Issue ${issueIdentifier} already tracked as active task ${existing.swarmId} (status: ${existingTask.status}), skipping duplicate`,
|
|
509
510
|
);
|
|
@@ -671,7 +672,7 @@ export async function handleIssueUpdate(
|
|
|
671
672
|
// Map status to swarm actions
|
|
672
673
|
if (swarmStatus === "cancelled") {
|
|
673
674
|
const task = getTaskById(sync.swarmId);
|
|
674
|
-
if (task && !
|
|
675
|
+
if (task && !isTerminalTaskStatus(task.status)) {
|
|
675
676
|
cancelTask(sync.swarmId, `Linear issue cancelled`);
|
|
676
677
|
console.log(
|
|
677
678
|
`[Linear Sync] Cancelled task ${sync.swarmId} (Linear issue ${data.identifier ?? issueId} cancelled)`,
|
|
@@ -709,7 +710,7 @@ export async function handleIssueDelete(event: Record<string, unknown>): Promise
|
|
|
709
710
|
if (!sync) return;
|
|
710
711
|
|
|
711
712
|
const task = getTaskById(sync.swarmId);
|
|
712
|
-
if (task && !
|
|
713
|
+
if (task && !isTerminalTaskStatus(task.status)) {
|
|
713
714
|
cancelTask(sync.swarmId, "Linear issue deleted");
|
|
714
715
|
console.log(`[Linear Sync] Cancelled task ${sync.swarmId} (Linear issue ${issueId} deleted)`);
|
|
715
716
|
}
|
|
@@ -750,7 +751,7 @@ export async function handleAgentSessionPrompted(event: Record<string, unknown>)
|
|
|
750
751
|
const existing = getTrackerSyncByExternalId("linear", "task", issueId);
|
|
751
752
|
if (existing) {
|
|
752
753
|
const existingTask = getTaskById(existing.swarmId);
|
|
753
|
-
if (existingTask && !
|
|
754
|
+
if (existingTask && !isTerminalTaskStatus(existingTask.status)) {
|
|
754
755
|
cancelTask(existing.swarmId, "Stopped by user from Linear");
|
|
755
756
|
console.log(`[Linear Sync] Cancelled task ${existing.swarmId} (stop signal from Linear)`);
|
|
756
757
|
}
|
|
@@ -775,7 +776,7 @@ export async function handleAgentSessionPrompted(event: Record<string, unknown>)
|
|
|
775
776
|
const existingTask = getTaskById(existing.swarmId);
|
|
776
777
|
|
|
777
778
|
// If the task is still in progress, acknowledge but don't create a new one
|
|
778
|
-
if (existingTask && !
|
|
779
|
+
if (existingTask && !isTerminalTaskStatus(existingTask.status)) {
|
|
779
780
|
console.log(`[Linear Sync] Prompted on in-progress task ${existing.swarmId}, acknowledging`);
|
|
780
781
|
if (sessionId) {
|
|
781
782
|
postAgentSessionThought(
|