@desplega.ai/agent-swarm 1.86.0 → 1.87.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +72 -1
- package/package.json +3 -1
- package/src/be/db-queries/tracker.ts +21 -0
- package/src/be/db.ts +235 -14
- package/src/be/migrations/079_task_followup_config.sql +1 -0
- package/src/be/modelsdev-cache.json +77663 -74073
- package/src/cli.tsx +26 -0
- package/src/commands/context-preamble.ts +272 -0
- package/src/commands/e2b.ts +728 -0
- package/src/commands/resume-session.ts +35 -78
- package/src/commands/runner.ts +125 -13
- package/src/e2b/dispatch.ts +429 -0
- package/src/e2b/env.ts +206 -0
- package/src/heartbeat/heartbeat.ts +145 -30
- package/src/heartbeat/templates.ts +11 -7
- package/src/http/session-data.ts +8 -1
- package/src/http/tasks.ts +152 -3
- package/src/jira/sync.ts +4 -4
- package/src/linear/sync.ts +6 -5
- package/src/providers/claude-adapter.ts +10 -76
- package/src/providers/claude-managed-adapter.ts +61 -75
- package/src/providers/codex-adapter.ts +15 -18
- package/src/providers/codex-oauth/auth-json.ts +18 -1
- package/src/providers/codex-oauth/flow.ts +24 -1
- package/src/providers/types.ts +6 -0
- package/src/tasks/worker-follow-up.ts +162 -2
- package/src/telemetry.ts +11 -1
- package/src/tests/claude-adapter.test.ts +5 -27
- package/src/tests/claude-managed-adapter.test.ts +38 -52
- package/src/tests/codex-adapter.test.ts +6 -31
- package/src/tests/codex-oauth.test.ts +149 -3
- package/src/tests/codex-pool.test.ts +14 -3
- package/src/tests/e2b-dispatch.test.ts +330 -0
- package/src/tests/heartbeat-supersede-resume.test.ts +285 -0
- package/src/tests/heartbeat.test.ts +26 -16
- package/src/tests/prompt-template-remaining.test.ts +4 -0
- package/src/tests/resume-session.test.ts +42 -50
- package/src/tests/structured-output.test.ts +69 -0
- package/src/tests/task-completion-idempotency.test.ts +185 -2
- package/src/tests/task-supersede-resume.test.ts +722 -0
- package/src/tests/telemetry-init.test.ts +69 -0
- package/src/tests/vcs-tracking.test.ts +39 -0
- package/src/tools/send-task.ts +12 -1
- package/src/tools/store-progress.ts +2 -2
- package/src/tools/templates.ts +14 -2
- package/src/types.ts +46 -1
- package/src/workflows/executors/agent-task.ts +3 -0
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
import type { ProviderName } from "../types";
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* # Native session resume is deprecated.
|
|
5
|
+
*
|
|
6
|
+
* Follow-up continuity is delivered via the context preamble built by
|
|
7
|
+
* `buildContextPreamble` in `src/commands/context-preamble.ts`. The preamble
|
|
8
|
+
* is bounded, deterministic, and survives worker-container restarts — the
|
|
9
|
+
* failure modes that native resume could not handle.
|
|
10
|
+
*
|
|
11
|
+
* `resolveResumeSession` is preserved as an observability shim: it accepts
|
|
12
|
+
* the same candidate shape the runner already builds and returns every
|
|
13
|
+
* non-empty candidate in `skipped` with a deprecation reason. The result's
|
|
14
|
+
* `resumeSessionId` is always `undefined` — adapters spawn fresh sessions.
|
|
15
|
+
*
|
|
16
|
+
* Refs: thoughts/taras/plans/2026-05-28-deprecate-native-resume.md
|
|
17
|
+
*/
|
|
18
|
+
|
|
3
19
|
export type ResumeSessionSource = "task" | "parent";
|
|
4
20
|
|
|
5
21
|
export interface ResumeSessionCandidate {
|
|
@@ -18,33 +34,28 @@ export interface ResumeSessionSkip {
|
|
|
18
34
|
}
|
|
19
35
|
|
|
20
36
|
export interface ResumeSessionResolution {
|
|
37
|
+
/**
|
|
38
|
+
* @deprecated Always `undefined`. Native session resume was removed in the
|
|
39
|
+
* 2026-05-28 deprecation. See module docstring + context-preamble.ts.
|
|
40
|
+
*/
|
|
21
41
|
resumeSessionId?: string;
|
|
22
42
|
source?: ResumeSessionSource;
|
|
23
43
|
provider?: ProviderName;
|
|
24
44
|
skipped: ResumeSessionSkip[];
|
|
25
45
|
}
|
|
26
46
|
|
|
27
|
-
const
|
|
28
|
-
|
|
29
|
-
const RESUMABLE_PROVIDERS = new Set<ProviderName>(["claude", "claude-managed", "codex"]);
|
|
30
|
-
|
|
31
|
-
export function isClaudeCliSessionId(sessionId: string): boolean {
|
|
32
|
-
return UUID_RE.test(sessionId);
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
function normalizeStoredProvider(candidate: ResumeSessionCandidate): ProviderName | undefined {
|
|
36
|
-
if (candidate.provider === "claude" && candidate.providerMeta?.managed === true) {
|
|
37
|
-
return "claude-managed";
|
|
38
|
-
}
|
|
39
|
-
return candidate.provider;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
function providerSupportsResume(provider: ProviderName): boolean {
|
|
43
|
-
return RESUMABLE_PROVIDERS.has(provider);
|
|
44
|
-
}
|
|
47
|
+
export const RESUME_DEPRECATED_REASON = "native resume deprecated — using context preamble";
|
|
45
48
|
|
|
49
|
+
/**
|
|
50
|
+
* Observability shim. Records the candidates that *would* have been resume
|
|
51
|
+
* targets in the old world; never asks the adapter to resume.
|
|
52
|
+
*
|
|
53
|
+
* `_currentProvider` is kept for call-site compatibility with the runner
|
|
54
|
+
* (both call sites already pass `state.harnessProvider`); the value is
|
|
55
|
+
* intentionally unused.
|
|
56
|
+
*/
|
|
46
57
|
export function resolveResumeSession(
|
|
47
|
-
|
|
58
|
+
_currentProvider: ProviderName,
|
|
48
59
|
candidates: ResumeSessionCandidate[],
|
|
49
60
|
): ResumeSessionResolution {
|
|
50
61
|
const skipped: ResumeSessionSkip[] = [];
|
|
@@ -52,66 +63,12 @@ export function resolveResumeSession(
|
|
|
52
63
|
for (const candidate of candidates) {
|
|
53
64
|
const sessionId = candidate.sessionId?.trim();
|
|
54
65
|
if (!sessionId) continue;
|
|
55
|
-
|
|
56
|
-
const storedProvider = normalizeStoredProvider(candidate);
|
|
57
|
-
|
|
58
|
-
if (!storedProvider) {
|
|
59
|
-
if (currentProvider === "claude" && isClaudeCliSessionId(sessionId)) {
|
|
60
|
-
return {
|
|
61
|
-
resumeSessionId: sessionId,
|
|
62
|
-
source: candidate.source,
|
|
63
|
-
provider: "claude",
|
|
64
|
-
skipped,
|
|
65
|
-
};
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
skipped.push({
|
|
69
|
-
source: candidate.source,
|
|
70
|
-
sessionId,
|
|
71
|
-
reason:
|
|
72
|
-
currentProvider === "claude"
|
|
73
|
-
? "legacy Claude resume requires a UUID session id"
|
|
74
|
-
: "stored session provider is unknown",
|
|
75
|
-
});
|
|
76
|
-
continue;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
if (storedProvider !== currentProvider) {
|
|
80
|
-
skipped.push({
|
|
81
|
-
source: candidate.source,
|
|
82
|
-
sessionId,
|
|
83
|
-
provider: storedProvider,
|
|
84
|
-
reason: `stored session provider ${storedProvider} does not match current provider ${currentProvider}`,
|
|
85
|
-
});
|
|
86
|
-
continue;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
if (!providerSupportsResume(currentProvider)) {
|
|
90
|
-
skipped.push({
|
|
91
|
-
source: candidate.source,
|
|
92
|
-
sessionId,
|
|
93
|
-
provider: storedProvider,
|
|
94
|
-
reason: `provider ${currentProvider} does not support runner resume`,
|
|
95
|
-
});
|
|
96
|
-
continue;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
if (currentProvider === "claude" && !isClaudeCliSessionId(sessionId)) {
|
|
100
|
-
skipped.push({
|
|
101
|
-
source: candidate.source,
|
|
102
|
-
sessionId,
|
|
103
|
-
provider: storedProvider,
|
|
104
|
-
reason: "Claude CLI --resume requires a UUID session id",
|
|
105
|
-
});
|
|
106
|
-
continue;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
return {
|
|
110
|
-
resumeSessionId: sessionId,
|
|
66
|
+
skipped.push({
|
|
111
67
|
source: candidate.source,
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
68
|
+
sessionId,
|
|
69
|
+
provider: candidate.provider,
|
|
70
|
+
reason: RESUME_DEPRECATED_REASON,
|
|
71
|
+
});
|
|
115
72
|
}
|
|
116
73
|
|
|
117
74
|
return { skipped };
|
package/src/commands/runner.ts
CHANGED
|
@@ -50,7 +50,7 @@ import { refreshSkillsIfChanged } from "../utils/skills-refresh.ts";
|
|
|
50
50
|
import { detectVcsProvider } from "../vcs/index.ts";
|
|
51
51
|
import { validateJsonSchema } from "../workflows/json-schema-validator.ts";
|
|
52
52
|
import { interpolate } from "../workflows/template.ts";
|
|
53
|
-
import { buildContextPreamble } from "./context-preamble.ts";
|
|
53
|
+
import { buildContextPreamble, buildResumeContextPreamble } from "./context-preamble.ts";
|
|
54
54
|
import { awaitCredentials, BootMaxWaitExceededError, EX_CONFIG } from "./credential-wait.ts";
|
|
55
55
|
import {
|
|
56
56
|
buildCredStatusReport,
|
|
@@ -1011,6 +1011,84 @@ async function reportKeyRateLimit(
|
|
|
1011
1011
|
}
|
|
1012
1012
|
}
|
|
1013
1013
|
|
|
1014
|
+
/**
|
|
1015
|
+
* Supersede a task via the API (for graceful shutdown / context-limit /
|
|
1016
|
+
* operator-triggered). Returns `{ ok: true, resumeTaskId }` on success.
|
|
1017
|
+
* On 5xx / network failure returns `{ ok: false }` so the caller can fall
|
|
1018
|
+
* back to the legacy `pauseTaskViaAPI` (handles partial-deploy windows where
|
|
1019
|
+
* the API is older than the worker).
|
|
1020
|
+
*/
|
|
1021
|
+
async function supersedeTaskViaAPI(
|
|
1022
|
+
config: ApiConfig,
|
|
1023
|
+
role: string,
|
|
1024
|
+
taskId: string,
|
|
1025
|
+
reason: "graceful_shutdown" | "context_limits" | "manual_supersede",
|
|
1026
|
+
): Promise<{ ok: true; resumeTaskId: string | null; kind: string } | { ok: false }> {
|
|
1027
|
+
const headers: Record<string, string> = {
|
|
1028
|
+
"X-Agent-ID": config.agentId,
|
|
1029
|
+
"Content-Type": "application/json",
|
|
1030
|
+
};
|
|
1031
|
+
if (config.apiKey) {
|
|
1032
|
+
headers.Authorization = `Bearer ${config.apiKey}`;
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
try {
|
|
1036
|
+
const response = await fetch(`${config.apiUrl}/api/tasks/${taskId}/supersede`, {
|
|
1037
|
+
method: "POST",
|
|
1038
|
+
headers,
|
|
1039
|
+
body: JSON.stringify({ reason }),
|
|
1040
|
+
});
|
|
1041
|
+
|
|
1042
|
+
if (response.ok) {
|
|
1043
|
+
const body = (await response.json().catch(() => null)) as {
|
|
1044
|
+
resumeTaskId?: string | null;
|
|
1045
|
+
kind?: string;
|
|
1046
|
+
} | null;
|
|
1047
|
+
const resumeTaskId = body?.resumeTaskId ?? null;
|
|
1048
|
+
const kind = body?.kind ?? "resumed";
|
|
1049
|
+
console.log(
|
|
1050
|
+
`[${role}] Task ${taskId.slice(0, 8)} superseded (kind=${kind}, resume=${
|
|
1051
|
+
resumeTaskId ? resumeTaskId.slice(0, 8) : "none"
|
|
1052
|
+
})`,
|
|
1053
|
+
);
|
|
1054
|
+
return { ok: true, resumeTaskId, kind };
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
// 404 / 405 — the route doesn't exist on this API server. Happens during
|
|
1058
|
+
// partial deploys (new worker rolled out before new API). Fall back to
|
|
1059
|
+
// legacy pause so the task isn't left orphaned in_progress until heartbeat
|
|
1060
|
+
// recovery picks it up minutes later.
|
|
1061
|
+
if (response.status === 404 || response.status === 405) {
|
|
1062
|
+
console.warn(
|
|
1063
|
+
`[${role}] Supersede route missing for task ${taskId.slice(0, 8)} (${response.status}); falling back to pause`,
|
|
1064
|
+
);
|
|
1065
|
+
return { ok: false };
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
// Other 4xx → deliberate rejection from a current API (bad request,
|
|
1069
|
+
// idempotent no-op, forbidden, conflict). Do NOT fall back to legacy
|
|
1070
|
+
// pause — the API actively rejected the supersede, retrying via pause
|
|
1071
|
+
// would be wrong.
|
|
1072
|
+
if (response.status >= 400 && response.status < 500) {
|
|
1073
|
+
const error = await response.text();
|
|
1074
|
+
console.warn(
|
|
1075
|
+
`[${role}] Supersede rejected for task ${taskId.slice(0, 8)}: ${response.status} ${error}`,
|
|
1076
|
+
);
|
|
1077
|
+
return { ok: true, resumeTaskId: null, kind: "rejected" };
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
// 5xx → fall through to legacy pause.
|
|
1081
|
+
const error = await response.text();
|
|
1082
|
+
console.warn(
|
|
1083
|
+
`[${role}] Supersede failed for task ${taskId.slice(0, 8)}: ${response.status} ${error}`,
|
|
1084
|
+
);
|
|
1085
|
+
return { ok: false };
|
|
1086
|
+
} catch (err) {
|
|
1087
|
+
console.warn(`[${role}] Error superseding task ${taskId.slice(0, 8)}: ${err}`);
|
|
1088
|
+
return { ok: false };
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1014
1092
|
/**
|
|
1015
1093
|
* Pause a task via the API (for graceful shutdown).
|
|
1016
1094
|
* Unlike marking as failed, paused tasks can be resumed after container restart.
|
|
@@ -1181,21 +1259,35 @@ function setupShutdownHandlers(
|
|
|
1181
1259
|
}
|
|
1182
1260
|
}
|
|
1183
1261
|
|
|
1184
|
-
// Force kill remaining tasks and
|
|
1262
|
+
// Force kill remaining tasks and supersede them so a fresh "resume"
|
|
1263
|
+
// follow-up can pick up the work on any worker. Fallback chain:
|
|
1264
|
+
// 1. supersedeTaskViaAPI (primary)
|
|
1265
|
+
// 2. pauseTaskViaAPI (legacy — preserves graceful behavior during
|
|
1266
|
+
// partial-deploy windows where the API server is older than the
|
|
1267
|
+
// worker)
|
|
1268
|
+
// 3. ensureTaskFinished (mark as failed — last resort)
|
|
1185
1269
|
if (state.activeTasks.size > 0) {
|
|
1186
1270
|
console.log(
|
|
1187
|
-
`[${role}]
|
|
1271
|
+
`[${role}] Superseding ${state.activeTasks.size} remaining task(s) for resume after restart...`,
|
|
1188
1272
|
);
|
|
1189
1273
|
for (const [taskId, task] of state.activeTasks) {
|
|
1190
|
-
console.log(`[${role}]
|
|
1274
|
+
console.log(`[${role}] Superseding task ${taskId.slice(0, 8)}`);
|
|
1191
1275
|
task.session.abort().catch(() => {});
|
|
1192
|
-
// Mark as paused for graceful resume (instead of failed)
|
|
1193
1276
|
if (apiConfig) {
|
|
1277
|
+
const supersede = await supersedeTaskViaAPI(
|
|
1278
|
+
apiConfig,
|
|
1279
|
+
role,
|
|
1280
|
+
taskId,
|
|
1281
|
+
"graceful_shutdown",
|
|
1282
|
+
);
|
|
1283
|
+
if (supersede.ok) {
|
|
1284
|
+
continue;
|
|
1285
|
+
}
|
|
1286
|
+
// 5xx / network failure → try legacy pause for partial-deploy windows.
|
|
1194
1287
|
const paused = await pauseTaskViaAPI(apiConfig, role, taskId);
|
|
1195
1288
|
if (!paused) {
|
|
1196
|
-
// Fallback to marking as failed if pause fails
|
|
1197
1289
|
console.warn(
|
|
1198
|
-
`[${role}]
|
|
1290
|
+
`[${role}] Both supersede and pause failed for task ${taskId.slice(0, 8)}, marking as failed instead`,
|
|
1199
1291
|
);
|
|
1200
1292
|
await ensureTaskFinished(
|
|
1201
1293
|
apiConfig,
|
|
@@ -3754,7 +3846,15 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
3754
3846
|
}
|
|
3755
3847
|
|
|
3756
3848
|
// ========== Resume paused tasks with PRIORITY ==========
|
|
3757
|
-
//
|
|
3849
|
+
// LEGACY SAFETY NET — kept for tasks that were paused by older worker
|
|
3850
|
+
// builds during partial-deploy windows (when the API server already
|
|
3851
|
+
// accepted /pause but the new worker has rolled out /supersede). New
|
|
3852
|
+
// graceful-shutdown writes go through the supersede path (see
|
|
3853
|
+
// supersedeTaskViaAPI + the SIGTERM handler) and create a fresh
|
|
3854
|
+
// "resume" follow-up task instead of mutating the original. Cleanup of
|
|
3855
|
+
// this entire block is tracked in the "Legacy paused-task cleanup"
|
|
3856
|
+
// follow-up plan — remove once no new `paused` tasks have been created
|
|
3857
|
+
// for one full quarter.
|
|
3758
3858
|
try {
|
|
3759
3859
|
console.log(`[${role}] Checking for paused tasks to resume...`);
|
|
3760
3860
|
const pausedTasks = await getPausedTasksFromAPI(apiConfig);
|
|
@@ -3909,7 +4009,10 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
3909
4009
|
logFile,
|
|
3910
4010
|
systemPrompt: resolvedSystemPrompt,
|
|
3911
4011
|
additionalArgs: opts.additionalArgs,
|
|
3912
|
-
|
|
4012
|
+
// Native resume deprecated: always undefined. Follow-up continuity flows through
|
|
4013
|
+
// the context preamble injected above (see context-preamble.ts).
|
|
4014
|
+
// resumeResolution is still computed for observability via logResumeResolution.
|
|
4015
|
+
resumeSessionId: undefined,
|
|
3913
4016
|
role,
|
|
3914
4017
|
apiUrl,
|
|
3915
4018
|
apiKey,
|
|
@@ -4177,13 +4280,20 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
4177
4280
|
// Universal context preamble: inject for all providers when task is a follow-up.
|
|
4178
4281
|
// Gives non-resumable providers (opencode/pi/devin) prior-task context; also
|
|
4179
4282
|
// acts as a bounded safety net for resumable ones (claude/codex).
|
|
4180
|
-
|
|
4283
|
+
// For taskType="resume" (created by supersedeTaskViaAPI), use the
|
|
4284
|
+
// larger resume preamble that includes a session-log tool-call summary.
|
|
4285
|
+
const taskObj = trigger.task as { parentTaskId?: string; taskType?: string } | undefined;
|
|
4181
4286
|
if (taskObj?.parentTaskId && apiUrl) {
|
|
4182
|
-
const
|
|
4287
|
+
const isResumeTask = taskObj.taskType === "resume";
|
|
4288
|
+
const contextPreamble = isResumeTask
|
|
4289
|
+
? await buildResumeContextPreamble(apiUrl, apiKey, taskObj.parentTaskId)
|
|
4290
|
+
: await buildContextPreamble(apiUrl, apiKey, taskObj.parentTaskId);
|
|
4183
4291
|
if (contextPreamble) {
|
|
4184
4292
|
triggerPrompt = contextPreamble + triggerPrompt;
|
|
4185
4293
|
console.log(
|
|
4186
|
-
`[${role}] Injected context preamble for
|
|
4294
|
+
`[${role}] Injected ${isResumeTask ? "resume" : "context"} preamble for ${
|
|
4295
|
+
isResumeTask ? "resume" : "follow-up"
|
|
4296
|
+
} task (parent: ${taskObj.parentTaskId.slice(0, 8)})`,
|
|
4187
4297
|
);
|
|
4188
4298
|
}
|
|
4189
4299
|
}
|
|
@@ -4207,7 +4317,9 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
4207
4317
|
},
|
|
4208
4318
|
]);
|
|
4209
4319
|
logResumeResolution(role, resumeResolution);
|
|
4210
|
-
resumeSessionId
|
|
4320
|
+
// Native resume deprecated: keep `resumeSessionId` undefined so a fresh
|
|
4321
|
+
// session is spawned. Follow-up continuity flows via the context preamble
|
|
4322
|
+
// injected above (see context-preamble.ts).
|
|
4211
4323
|
} else {
|
|
4212
4324
|
console.log(`[${role}] Child task — parent session ID not found, starting fresh`);
|
|
4213
4325
|
}
|