@desplega.ai/agent-swarm 1.86.0 → 1.87.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/openapi.json +72 -1
  2. package/package.json +3 -1
  3. package/src/be/db-queries/tracker.ts +21 -0
  4. package/src/be/db.ts +235 -14
  5. package/src/be/migrations/079_task_followup_config.sql +1 -0
  6. package/src/be/modelsdev-cache.json +77663 -74073
  7. package/src/cli.tsx +26 -0
  8. package/src/commands/context-preamble.ts +272 -0
  9. package/src/commands/e2b.ts +728 -0
  10. package/src/commands/resume-session.ts +35 -78
  11. package/src/commands/runner.ts +125 -13
  12. package/src/e2b/dispatch.ts +429 -0
  13. package/src/e2b/env.ts +206 -0
  14. package/src/heartbeat/heartbeat.ts +145 -30
  15. package/src/heartbeat/templates.ts +11 -7
  16. package/src/http/session-data.ts +8 -1
  17. package/src/http/tasks.ts +152 -3
  18. package/src/jira/sync.ts +4 -4
  19. package/src/linear/sync.ts +6 -5
  20. package/src/providers/claude-adapter.ts +10 -76
  21. package/src/providers/claude-managed-adapter.ts +61 -75
  22. package/src/providers/codex-adapter.ts +15 -18
  23. package/src/providers/codex-oauth/auth-json.ts +18 -1
  24. package/src/providers/codex-oauth/flow.ts +24 -1
  25. package/src/providers/types.ts +6 -0
  26. package/src/tasks/worker-follow-up.ts +162 -2
  27. package/src/telemetry.ts +11 -1
  28. package/src/tests/claude-adapter.test.ts +5 -27
  29. package/src/tests/claude-managed-adapter.test.ts +38 -52
  30. package/src/tests/codex-adapter.test.ts +6 -31
  31. package/src/tests/codex-oauth.test.ts +149 -3
  32. package/src/tests/codex-pool.test.ts +14 -3
  33. package/src/tests/e2b-dispatch.test.ts +330 -0
  34. package/src/tests/heartbeat-supersede-resume.test.ts +285 -0
  35. package/src/tests/heartbeat.test.ts +26 -16
  36. package/src/tests/prompt-template-remaining.test.ts +4 -0
  37. package/src/tests/resume-session.test.ts +42 -50
  38. package/src/tests/structured-output.test.ts +69 -0
  39. package/src/tests/task-completion-idempotency.test.ts +185 -2
  40. package/src/tests/task-supersede-resume.test.ts +722 -0
  41. package/src/tests/telemetry-init.test.ts +69 -0
  42. package/src/tests/vcs-tracking.test.ts +39 -0
  43. package/src/tools/send-task.ts +12 -1
  44. package/src/tools/store-progress.ts +2 -2
  45. package/src/tools/templates.ts +14 -2
  46. package/src/types.ts +46 -1
  47. package/src/workflows/executors/agent-task.ts +3 -0
package/src/cli.tsx CHANGED
@@ -292,6 +292,27 @@ const COMMAND_HELP: Record<
292
292
  ` ${binName} claude-managed-setup --api-url https://swarm.example.com`,
293
293
  ].join("\n"),
294
294
  },
295
+ e2b: {
296
+ usage: `${binName} e2b <subcommand> [options]`,
297
+ description:
298
+ "Build Agent Swarm E2B templates and start API/worker sandboxes on demand for CI or Dockerless environments.",
299
+ options: [
300
+ " build-template --role api|worker Build or rebuild an E2B template",
301
+ " delete-template <template...> Delete E2B templates",
302
+ " publish-template <template...> Publish E2B templates",
303
+ " unpublish-template <template...> Make E2B templates private",
304
+ " start-api --template <name> Start the API in an E2B sandbox",
305
+ " start-worker --api-url <url> Start a worker against a public API URL",
306
+ " start-stack Start API plus one or more workers",
307
+ " list | kill <sandbox-id...> Inspect or clean up sandboxes",
308
+ " -h, --help Show this help",
309
+ ].join("\n"),
310
+ examples: [
311
+ ` ${binName} e2b build-template --role worker`,
312
+ ` ${binName} e2b start-worker --api-url https://swarm.example.com --api-key "$SWARM_API_KEY"`,
313
+ ` ${binName} e2b start-stack --workers 2 --api-key "$SWARM_API_KEY"`,
314
+ ].join("\n"),
315
+ },
295
316
  };
296
317
 
297
318
  function printHelp(command?: string) {
@@ -323,6 +344,7 @@ function printHelp(command?: string) {
323
344
  ["docs", "Open documentation (--open to launch in browser)"],
324
345
  ["codex-login", "Authenticate Codex via ChatGPT OAuth"],
325
346
  ["claude-managed-setup", "Bootstrap Anthropic Managed Agents (agent + env + skills)"],
347
+ ["e2b", "Build templates and start E2B API/worker sandboxes"],
326
348
  ["version", "Show version number"],
327
349
  ["help", "Show this help message"],
328
350
  ];
@@ -584,6 +606,10 @@ if (args.showHelp || args.command === "help" || args.command === undefined) {
584
606
  const { runClaudeManagedSetup } = await import("./commands/claude-managed-setup");
585
607
  const setupArgs = process.argv.slice(process.argv.indexOf("claude-managed-setup") + 1);
586
608
  await runClaudeManagedSetup(setupArgs);
609
+ } else if (args.command === "e2b") {
610
+ const { runE2BCommand } = await import("./commands/e2b");
611
+ const e2bArgs = process.argv.slice(process.argv.indexOf("e2b") + 1);
612
+ await runE2BCommand(e2bArgs);
587
613
  } else {
588
614
  render(<App args={args} />);
589
615
  }
@@ -11,6 +11,8 @@
11
11
  * resumes (see swarm memory sigterm-143-resumed-session-context-saturation-2026-05-13).
12
12
  */
13
13
 
14
+ import { scrubSecrets } from "../utils/secret-scrubber";
15
+
14
16
  export const CONTEXT_PREAMBLE_MAX_TOKENS = Number(
15
17
  process.env.CONTEXT_PREAMBLE_MAX_TOKENS || "2000",
16
18
  );
@@ -18,12 +20,25 @@ export const CONTEXT_PREAMBLE_MAX_TOKENS = Number(
18
20
  export const CONTEXT_PREAMBLE_MAX_CHARS = CONTEXT_PREAMBLE_MAX_TOKENS * 4;
19
21
  export const CONTEXT_PREAMBLE_MAX_ANCESTORS = 5;
20
22
 
23
+ /**
24
+ * Token budget for the resume-task preamble. Default 4000 = 2× the regular
25
+ * preamble, since the resume agent needs the original task brief verbatim
26
+ * plus a tool-call summary to avoid redoing completed work.
27
+ */
28
+ export const CONTEXT_PREAMBLE_RESUME_MAX_TOKENS = Number(
29
+ process.env.CONTEXT_PREAMBLE_RESUME_MAX_TOKENS || "4000",
30
+ );
31
+ export const CONTEXT_PREAMBLE_RESUME_MAX_CHARS = CONTEXT_PREAMBLE_RESUME_MAX_TOKENS * 4;
32
+ /** How many of the most recent session_logs rows to inspect for tool-call summary. */
33
+ export const CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT = 50;
34
+
21
35
  export interface TaskContextForPreamble {
22
36
  id: string;
23
37
  task: string;
24
38
  output?: string;
25
39
  progress?: string;
26
40
  status?: string;
41
+ taskType?: string;
27
42
  parentTaskId?: string;
28
43
  attachments?: Array<{
29
44
  kind: string;
@@ -57,6 +72,7 @@ export async function fetchTaskContextForPreamble(
57
72
  output: data.output,
58
73
  progress: data.progress,
59
74
  status: data.status,
75
+ taskType: data.taskType,
60
76
  parentTaskId: data.parentTaskId,
61
77
  attachments: data.attachments,
62
78
  };
@@ -176,3 +192,259 @@ export async function buildContextPreamble(
176
192
 
177
193
  return preamble;
178
194
  }
195
+
196
+ // ─── Resume Preamble ───────────────────────────────────────────────────────────
197
+
198
+ interface SessionLogForPreamble {
199
+ id: string;
200
+ taskId?: string;
201
+ sessionId: string;
202
+ iteration: number;
203
+ cli: string;
204
+ content: string;
205
+ lineNumber: number;
206
+ createdAt: string;
207
+ }
208
+
209
+ async function fetchSessionLogsForResume(
210
+ apiUrl: string,
211
+ apiKey: string,
212
+ taskId: string,
213
+ ): Promise<SessionLogForPreamble[]> {
214
+ const headers: Record<string, string> = {};
215
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
216
+ try {
217
+ // Bound server-side: long-running parents can accumulate large `session_logs`
218
+ // and the preamble only consumes the tail (see CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT).
219
+ // Passing `?limit=N` keeps dispatch fast and memory-flat regardless of run length.
220
+ const url = `${apiUrl}/api/tasks/${taskId}/session-logs?limit=${CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT}`;
221
+ const response = await fetch(url, { headers });
222
+ if (!response.ok) return [];
223
+ const data = (await response.json()) as { logs?: SessionLogForPreamble[] };
224
+ return Array.isArray(data.logs) ? data.logs : [];
225
+ } catch {
226
+ return [];
227
+ }
228
+ }
229
+
230
+ /**
231
+ * Format a single session_log line as a one-line tool-call summary. Falls back
232
+ * to a truncated content snippet when the line isn't recognizable as a
233
+ * tool call. The returned text is passed through `scrubSecrets` before
234
+ * insertion into the preamble (no secrets in /workspace/logs/*.jsonl).
235
+ */
236
+ function summarizeSessionLogLine(line: SessionLogForPreamble): string | null {
237
+ const ts = line.createdAt.slice(11, 19); // HH:MM:SS
238
+ let parsed: unknown;
239
+ try {
240
+ parsed = JSON.parse(line.content);
241
+ } catch {
242
+ const snippet = line.content.replace(/\s+/g, " ").slice(0, 120);
243
+ return snippet ? `[${ts}] ${snippet}` : null;
244
+ }
245
+ if (!parsed || typeof parsed !== "object") return null;
246
+ const obj = parsed as Record<string, unknown>;
247
+
248
+ // Anthropic / claude message-style tool calls.
249
+ const message = obj.message as Record<string, unknown> | undefined;
250
+ const content = message?.content;
251
+ if (Array.isArray(content)) {
252
+ for (const block of content) {
253
+ if (!block || typeof block !== "object") continue;
254
+ const b = block as Record<string, unknown>;
255
+ if (b.type === "tool_use" && typeof b.name === "string") {
256
+ const input = b.input as Record<string, unknown> | undefined;
257
+ const file = input?.file_path ?? input?.path ?? input?.command;
258
+ const fileStr = typeof file === "string" ? ` ${file}` : "";
259
+ return `[${ts}] ${b.name}${fileStr}`;
260
+ }
261
+ }
262
+ }
263
+
264
+ // Codex / generic event-style: { type: 'tool_use', name: '...', input: {...} }
265
+ if (obj.type === "tool_use" && typeof obj.name === "string") {
266
+ const input = obj.input as Record<string, unknown> | undefined;
267
+ const file = input?.file_path ?? input?.path ?? input?.command;
268
+ const fileStr = typeof file === "string" ? ` ${file}` : "";
269
+ return `[${ts}] ${obj.name}${fileStr}`;
270
+ }
271
+
272
+ // Fallback: short content snippet (still useful for diff/insight)
273
+ const snippet = JSON.stringify(parsed).replace(/\s+/g, " ").slice(0, 120);
274
+ return snippet ? `[${ts}] ${snippet}` : null;
275
+ }
276
+
277
+ /**
278
+ * Build a resume-task preamble.
279
+ *
280
+ * Reads the parent task + its recent session_logs over HTTP (never touches
281
+ * `bun:sqlite` worker-side). Allocates the 4000-token budget:
282
+ *
283
+ * - 40% — full parent task description (never truncated)
284
+ * - 35% — last-N session_logs summary (tool-call one-liners; scrubbed)
285
+ * - 15% — artifacts/attachments index (names + pointers only)
286
+ * - 10% — fixed framing (header + continuation instructions)
287
+ *
288
+ * Truncation order: session-log summary (oldest first), then artifacts.
289
+ * The task description is never truncated.
290
+ */
291
+ /**
292
+ * Walk up the parentTaskId chain through `taskType === "resume"` ancestors
293
+ * to find the original (non-resume) task. Returns the chain in order
294
+ * [immediateParent, ..., original]. Caps at MAX_RESUME_CHAIN_DEPTH to
295
+ * defend against cycles or runaway chains.
296
+ *
297
+ * PR #594 review: cascading resumes (original → resume1 → resume2) had
298
+ * `buildResumeContextPreamble` fetching only the immediate parent — whose
299
+ * `task` text is the synthetic "Resume interrupted task..." prompt rather
300
+ * than the original work brief. Walking the chain restores the original
301
+ * description and lets us merge session logs from all resume attempts.
302
+ */
303
+ const MAX_RESUME_CHAIN_DEPTH = 10;
304
+
305
+ async function walkResumeChain(
306
+ apiUrl: string,
307
+ apiKey: string,
308
+ immediateParentId: string,
309
+ ): Promise<TaskContextForPreamble[]> {
310
+ const chain: TaskContextForPreamble[] = [];
311
+ let currentId: string | undefined = immediateParentId;
312
+ for (let depth = 0; depth < MAX_RESUME_CHAIN_DEPTH && currentId; depth++) {
313
+ const ctx: TaskContextForPreamble | null = await fetchTaskContextForPreamble(
314
+ apiUrl,
315
+ apiKey,
316
+ currentId,
317
+ );
318
+ if (!ctx) break;
319
+ chain.push(ctx);
320
+ // Stop once we hit a non-resume ancestor — that's the original work.
321
+ if (ctx.taskType !== "resume") break;
322
+ currentId = ctx.parentTaskId;
323
+ }
324
+ return chain;
325
+ }
326
+
327
+ export async function buildResumeContextPreamble(
328
+ apiUrl: string,
329
+ apiKey: string,
330
+ parentTaskId: string,
331
+ ): Promise<string | null> {
332
+ const chain = await walkResumeChain(apiUrl, apiKey, parentTaskId);
333
+ if (chain.length === 0) return null;
334
+ // Original = last entry (non-resume ancestor, or the deepest reachable
335
+ // if the chain exceeds the depth cap or hits a fetch failure).
336
+ const original = chain[chain.length - 1] ?? chain[0];
337
+ if (!original) return null;
338
+ // Immediate parent — its attachments are the most recent "in flight" set.
339
+ const parent = chain[0] ?? original;
340
+
341
+ // Fetch session logs from EVERY chain member so a re-superseded resume
342
+ // still surfaces tool-call history from earlier attempts. Merge, sort by
343
+ // createdAt ASC, then keep the most recent N.
344
+ const logsBatches = await Promise.all(
345
+ chain.map((c) => fetchSessionLogsForResume(apiUrl, apiKey, c.id)),
346
+ );
347
+ const merged = logsBatches.flat();
348
+ merged.sort((a, b) => a.createdAt.localeCompare(b.createdAt));
349
+ const recentLogs = merged.slice(-CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT);
350
+
351
+ const descBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.4);
352
+ let logsBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.35);
353
+ let artBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.15);
354
+
355
+ const header = [
356
+ "\n---",
357
+ "## Resuming Interrupted Task",
358
+ "",
359
+ "This task is a fresh-session continuation of an interrupted task (graceful",
360
+ "shutdown / context-limit / operator action). The block below summarizes the",
361
+ "original task, what was done so far, and the artifacts in flight.",
362
+ "",
363
+ "**Do not redo work already completed below — extend it.**",
364
+ "",
365
+ `Original task ID: \`${original.id}\``,
366
+ chain.length > 1
367
+ ? `Resume chain depth: ${chain.length} (this is at least the ${
368
+ chain.length === 2 ? "2nd" : chain.length === 3 ? "3rd" : `${chain.length}th`
369
+ } resume attempt).`
370
+ : "",
371
+ "",
372
+ "---",
373
+ "",
374
+ "### Original Task Description",
375
+ "",
376
+ ]
377
+ .filter((s) => s !== "")
378
+ .join("\n");
379
+
380
+ // 40% — full description (never truncated). Pulled from the ORIGINAL
381
+ // (non-resume) ancestor so cascading resumes don't read each other's
382
+ // synthetic "Resume interrupted task..." preamble bodies (PR #594 review).
383
+ const descSection = original.task;
384
+
385
+ // 35% — session-log summary (tool-call lines)
386
+ const summaryLines: string[] = [];
387
+ for (const line of recentLogs) {
388
+ const summary = summarizeSessionLogLine(line);
389
+ if (!summary) continue;
390
+ summaryLines.push(summary);
391
+ }
392
+ // Scrub secrets BEFORE budget enforcement so secret strings don't get
393
+ // sliced into half-redactions mid-truncate.
394
+ const scrubbedSummary = summaryLines.map((s) => scrubSecrets(s));
395
+ let logsSection = scrubbedSummary.join("\n");
396
+ // FIFO truncate (drop oldest first) until under budget.
397
+ // We use `Math.max(0, descBudget - descSection.length)` slack adjustment so
398
+ // an oversized description doesn't starve the logs section entirely.
399
+ if (descSection.length > descBudget) {
400
+ const overflow = descSection.length - descBudget;
401
+ logsBudget = Math.max(0, logsBudget - Math.ceil(overflow / 2));
402
+ artBudget = Math.max(0, artBudget - Math.floor(overflow / 2));
403
+ }
404
+ while (logsSection.length > logsBudget && scrubbedSummary.length > 0) {
405
+ scrubbedSummary.shift();
406
+ logsSection = scrubbedSummary.join("\n");
407
+ }
408
+
409
+ // 15% — artifacts (names + pointers only)
410
+ const atts = parent.attachments?.filter((a) => a.name && (a.url || a.path || a.pageId)) ?? [];
411
+ const artLines: string[] = [];
412
+ for (const att of atts) {
413
+ const pointer = formatAttachmentPointer(att);
414
+ artLines.push(` - **${att.name}**: \`${pointer}\``);
415
+ }
416
+ let artSection = artLines.join("\n");
417
+ while (artSection.length > artBudget && artLines.length > 0) {
418
+ artLines.pop();
419
+ artSection = artLines.join("\n");
420
+ }
421
+
422
+ const sections: string[] = [header, descSection, ""];
423
+
424
+ if (logsSection) {
425
+ sections.push("### Recent Tool Calls", "", logsSection, "");
426
+ }
427
+
428
+ if (artSection) {
429
+ sections.push("### Artifacts In Flight", "", artSection, "");
430
+ }
431
+
432
+ sections.push(
433
+ "---",
434
+ "",
435
+ `To review the full prior session call \`get-task-details\` with taskId \`${original.id}\`.`,
436
+ "",
437
+ "---",
438
+ "",
439
+ );
440
+
441
+ let preamble = sections.join("\n");
442
+
443
+ // Final hard cap — should rarely trip given the per-section budgets above,
444
+ // but provides a safety net for very long descriptions.
445
+ if (preamble.length > CONTEXT_PREAMBLE_RESUME_MAX_CHARS) {
446
+ preamble = `${preamble.slice(0, CONTEXT_PREAMBLE_RESUME_MAX_CHARS)}\n\n[resume preamble truncated to ${CONTEXT_PREAMBLE_RESUME_MAX_TOKENS}-token budget]\n\n---\n`;
447
+ }
448
+
449
+ return preamble;
450
+ }