@desplega.ai/agent-swarm 1.85.0 → 1.87.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +1 -0
  2. package/openapi.json +72 -1
  3. package/package.json +10 -6
  4. package/src/be/db-queries/tracker.ts +21 -0
  5. package/src/be/db.ts +279 -14
  6. package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
  7. package/src/be/migrations/079_task_followup_config.sql +1 -0
  8. package/src/be/modelsdev-cache.json +155618 -0
  9. package/src/be/modelsdev-cache.ts +46 -0
  10. package/src/be/seed-pricing.ts +7 -44
  11. package/src/cli.tsx +38 -2
  12. package/src/commands/codex-session-runner.ts +132 -0
  13. package/src/commands/context-preamble.ts +272 -0
  14. package/src/commands/credential-wait.ts +2 -2
  15. package/src/commands/e2b.ts +728 -0
  16. package/src/commands/provider-credentials.ts +10 -5
  17. package/src/commands/resume-session.ts +35 -78
  18. package/src/commands/runner.ts +128 -16
  19. package/src/e2b/dispatch.ts +429 -0
  20. package/src/e2b/env.ts +206 -0
  21. package/src/heartbeat/heartbeat.ts +145 -30
  22. package/src/heartbeat/templates.ts +11 -7
  23. package/src/http/session-data.ts +8 -1
  24. package/src/http/tasks.ts +152 -3
  25. package/src/jira/sync.ts +4 -4
  26. package/src/linear/sync.ts +6 -5
  27. package/src/prompts/base-prompt.ts +49 -3
  28. package/src/providers/claude-adapter.ts +76 -61
  29. package/src/providers/claude-managed-adapter.ts +61 -75
  30. package/src/providers/claude-managed-models.ts +18 -2
  31. package/src/providers/codex-adapter.ts +429 -112
  32. package/src/providers/codex-models.ts +9 -2
  33. package/src/providers/codex-oauth/auth-json.ts +18 -1
  34. package/src/providers/codex-oauth/flow.ts +24 -1
  35. package/src/providers/index.ts +28 -19
  36. package/src/providers/pricing-sources.md +7 -4
  37. package/src/providers/swarm-events-shared.ts +14 -0
  38. package/src/providers/types.ts +6 -0
  39. package/src/slack/HEURISTICS.md +5 -1
  40. package/src/slack/handlers.test.ts +35 -0
  41. package/src/slack/handlers.ts +79 -2
  42. package/src/tasks/worker-follow-up.ts +162 -2
  43. package/src/telemetry.ts +11 -1
  44. package/src/tests/base-prompt.test.ts +46 -8
  45. package/src/tests/claude-adapter.test.ts +5 -27
  46. package/src/tests/claude-managed-adapter.test.ts +42 -56
  47. package/src/tests/codex-adapter-otel.test.ts +4 -4
  48. package/src/tests/codex-adapter.test.ts +25 -37
  49. package/src/tests/codex-oauth.test.ts +149 -3
  50. package/src/tests/codex-pool.test.ts +14 -3
  51. package/src/tests/codex-swarm-events.test.ts +35 -0
  52. package/src/tests/context-window.test.ts +1 -0
  53. package/src/tests/credential-check.test.ts +48 -29
  54. package/src/tests/e2b-dispatch.test.ts +330 -0
  55. package/src/tests/entrypoint-config-env-export.test.ts +81 -0
  56. package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
  57. package/src/tests/heartbeat-supersede-resume.test.ts +285 -0
  58. package/src/tests/heartbeat.test.ts +26 -16
  59. package/src/tests/migration-046-budgets.test.ts +6 -5
  60. package/src/tests/pricing-routes.test.ts +6 -5
  61. package/src/tests/prompt-template-remaining.test.ts +4 -0
  62. package/src/tests/provider-adapter.test.ts +10 -10
  63. package/src/tests/provider-command-format.test.ts +4 -4
  64. package/src/tests/resume-session.test.ts +42 -50
  65. package/src/tests/session-costs-codex-recompute.test.ts +25 -0
  66. package/src/tests/structured-output.test.ts +69 -0
  67. package/src/tests/task-completion-idempotency.test.ts +185 -2
  68. package/src/tests/task-supersede-resume.test.ts +722 -0
  69. package/src/tests/telemetry-init.test.ts +69 -0
  70. package/src/tests/vcs-tracking.test.ts +39 -0
  71. package/src/tools/send-task.ts +42 -10
  72. package/src/tools/store-progress.ts +2 -2
  73. package/src/tools/templates.ts +14 -2
  74. package/src/types.ts +46 -1
  75. package/src/utils/context-window.ts +1 -0
  76. package/src/workflows/executors/agent-task.ts +3 -0
  77. package/templates/schedules/daily-blocker-digest/config.json +13 -0
  78. package/templates/schedules/daily-blocker-digest/content.md +150 -0
  79. package/templates/schedules/daily-compounding-reflection/config.json +21 -0
  80. package/templates/schedules/daily-compounding-reflection/content.md +210 -0
  81. package/templates/schedules/daily-hn-briefing/config.json +13 -0
  82. package/templates/schedules/daily-hn-briefing/content.md +97 -0
  83. package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
  84. package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
  85. package/templates/schedules/gtm-weekly-review/config.json +13 -0
  86. package/templates/schedules/gtm-weekly-review/content.md +58 -0
  87. package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
  88. package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
  89. package/templates/schema.ts +26 -0
  90. package/templates/skills/agentmail-sending/config.json +13 -0
  91. package/templates/skills/agentmail-sending/content.md +48 -0
  92. package/templates/skills/artifacts/config.json +13 -0
  93. package/templates/skills/artifacts/content.md +87 -0
  94. package/templates/skills/browser-use-cloud/config.json +13 -0
  95. package/templates/skills/browser-use-cloud/content.md +155 -0
  96. package/templates/skills/desloppify/config.json +13 -0
  97. package/templates/skills/desloppify/content.md +201 -0
  98. package/templates/skills/exa-search/config.json +13 -0
  99. package/templates/skills/exa-search/content.md +106 -0
  100. package/templates/skills/jira-interaction/config.json +13 -0
  101. package/templates/skills/jira-interaction/content.md +252 -0
  102. package/templates/skills/kapso-whatsapp/config.json +13 -0
  103. package/templates/skills/kapso-whatsapp/content.md +369 -0
  104. package/templates/skills/kv-storage/config.json +13 -0
  105. package/templates/skills/kv-storage/content.md +111 -0
  106. package/templates/skills/linear-interaction/config.json +20 -0
  107. package/templates/skills/linear-interaction/content.md +230 -0
  108. package/templates/skills/pages/config.json +18 -0
  109. package/templates/skills/pages/content.md +85 -0
  110. package/templates/skills/profile-corruption-escalation/config.json +13 -0
  111. package/templates/skills/profile-corruption-escalation/content.md +105 -0
  112. package/templates/skills/scheduled-task-resilience/config.json +13 -0
  113. package/templates/skills/scheduled-task-resilience/content.md +95 -0
  114. package/templates/skills/sprite-cli/config.json +13 -0
  115. package/templates/skills/sprite-cli/content.md +133 -0
  116. package/templates/skills/turso-interaction/config.json +13 -0
  117. package/templates/skills/turso-interaction/content.md +192 -0
  118. package/templates/skills/workflow-iterate/config.json +18 -0
  119. package/templates/skills/workflow-iterate/content.md +399 -0
  120. package/templates/skills/workflow-structured-output/config.json +13 -0
  121. package/templates/skills/workflow-structured-output/content.md +101 -0
  122. package/templates/skills/x-api-interactions/config.json +13 -0
  123. package/templates/skills/x-api-interactions/content.md +109 -0
  124. package/templates/workflows/autopilot/config.json +13 -0
  125. package/templates/workflows/autopilot/content.md +58 -0
  126. package/templates/workflows/linear-drain-loop/config.json +21 -0
  127. package/templates/workflows/linear-drain-loop/content.md +72 -0
  128. package/templates/workflows/ralph-loop/config.json +13 -0
  129. package/templates/workflows/ralph-loop/content.md +75 -0
@@ -0,0 +1,46 @@
1
+ import { readFileSync } from "node:fs";
2
+ import path from "node:path";
3
+
4
+ export interface ModelsDevCostBlock {
5
+ input?: number;
6
+ output?: number;
7
+ cache_read?: number;
8
+ cache_write?: number;
9
+ }
10
+
11
+ export interface ModelsDevModel {
12
+ id?: string;
13
+ cost?: ModelsDevCostBlock;
14
+ }
15
+
16
+ export interface ModelsDevProvider {
17
+ models?: Record<string, ModelsDevModel>;
18
+ }
19
+
20
+ export type ModelsDevCache = Record<string, ModelsDevProvider>;
21
+
22
+ export const MODELSDEV_CACHE_PATH = path.join("src", "be", "modelsdev-cache.json");
23
+
24
+ /**
25
+ * Resolve the vendored models.dev cache from source checkouts and compiled
26
+ * Docker images. The API image copies the snapshot to `/app/src/be/...`.
27
+ */
28
+ export function loadModelsDevCache(): ModelsDevCache | null {
29
+ const explicitPath = process.env.MODELSDEV_CACHE_PATH;
30
+ const candidates = [
31
+ ...(explicitPath ? [explicitPath] : []),
32
+ path.join(process.cwd(), MODELSDEV_CACHE_PATH),
33
+ path.join(process.cwd(), "..", MODELSDEV_CACHE_PATH),
34
+ path.join("/app", MODELSDEV_CACHE_PATH),
35
+ ];
36
+
37
+ for (const candidate of candidates) {
38
+ try {
39
+ return JSON.parse(readFileSync(candidate, "utf-8")) as ModelsDevCache;
40
+ } catch {
41
+ // try next candidate
42
+ }
43
+ }
44
+
45
+ return null;
46
+ }
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Phase 2 of the cost-tracking plan — seed the `pricing` table at server boot.
3
3
  *
4
- * The vendored models.dev snapshot at `ui/src/lib/modelsdev-cache.json` is the
4
+ * The vendored models.dev snapshot at `src/be/modelsdev-cache.json` is the
5
5
  * single source of truth for per-token rates. We project it into rows keyed by
6
6
  * `(provider, model, token_class)` so the recompute path in
7
7
  * `src/http/session-data.ts` can rebuild USD from tokens regardless of which
@@ -17,30 +17,15 @@
17
17
  * admin route (`POST /api/pricing`) — we don't overwrite seed rows.
18
18
  */
19
19
 
20
- import { readFileSync } from "node:fs";
21
- import path from "node:path";
22
20
  import type { PricingProvider, PricingTokenClass } from "../types";
23
21
  import { getDb } from "./db";
22
+ import {
23
+ loadModelsDevCache,
24
+ type ModelsDevCache,
25
+ type ModelsDevCostBlock,
26
+ } from "./modelsdev-cache";
24
27
  import { normalizeModelKey } from "./pricing-normalize";
25
28
 
26
- interface ModelsDevCostBlock {
27
- input?: number;
28
- output?: number;
29
- cache_read?: number;
30
- cache_write?: number;
31
- }
32
-
33
- interface ModelsDevModel {
34
- id?: string;
35
- cost?: ModelsDevCostBlock;
36
- }
37
-
38
- interface ModelsDevProvider {
39
- models?: Record<string, ModelsDevModel>;
40
- }
41
-
42
- type ModelsDevCache = Record<string, ModelsDevProvider>;
43
-
44
29
  /**
45
30
  * Per-harness manual rates that models.dev doesn't carry. Keep the source URL
46
31
  * and a verification date next to each entry so {@link MANUAL_PRICING_OVERRIDES}
@@ -82,33 +67,11 @@ const MANUAL_PRICING_OVERRIDES: Array<{
82
67
  * fields the models.dev snapshot doesn't index directly; we map them here.
83
68
  */
84
69
  const ANTHROPIC_SHORTNAME_TO_MODELSDEV: Record<string, string> = {
85
- opus: "claude-opus-4-7",
70
+ opus: "claude-opus-4-8",
86
71
  sonnet: "claude-sonnet-4-6",
87
72
  haiku: "claude-haiku-4-5",
88
73
  };
89
74
 
90
- /**
91
- * Resolve the path to the vendored models.dev cache. The UI copy is canonical.
92
- * We treat this as best-effort: if the file is missing (developer ran the
93
- * server without `ui/` checked out), we log and continue with manual rates
94
- * only — better than crashing the boot.
95
- */
96
- function loadModelsDevCache(): ModelsDevCache | null {
97
- const candidates = [
98
- path.join(process.cwd(), "ui", "src", "lib", "modelsdev-cache.json"),
99
- path.join(process.cwd(), "..", "ui", "src", "lib", "modelsdev-cache.json"),
100
- ];
101
- for (const cand of candidates) {
102
- try {
103
- const raw = readFileSync(cand, "utf-8");
104
- return JSON.parse(raw) as ModelsDevCache;
105
- } catch {
106
- // try next candidate
107
- }
108
- }
109
- return null;
110
- }
111
-
112
75
  interface PricingSeedRow {
113
76
  provider: PricingProvider;
114
77
  model: string;
package/src/cli.tsx CHANGED
@@ -16,8 +16,12 @@ import { getApiKey, setApiKey } from "./utils/api-key.ts";
16
16
  // Get CLI name from bin field (assumes single key)
17
17
  const binName = Object.keys(pkg.bin)[0];
18
18
 
19
- // Restore cursor on exit
20
- const restoreCursor = () => process.stdout.write("\x1B[?25h");
19
+ // Restore cursor on exit — only when stdout is a TTY. Non-TTY invocations
20
+ // (like the codex-session-runner subprocess whose stdout is a JSON pipe)
21
+ // must not inject terminal escape sequences into the byte stream.
22
+ const restoreCursor = () => {
23
+ if (process.stdout.isTTY) process.stdout.write("\x1B[?25h");
24
+ };
21
25
  process.on("exit", restoreCursor);
22
26
  process.on("SIGINT", () => {
23
27
  restoreCursor();
@@ -288,6 +292,27 @@ const COMMAND_HELP: Record<
288
292
  ` ${binName} claude-managed-setup --api-url https://swarm.example.com`,
289
293
  ].join("\n"),
290
294
  },
295
+ e2b: {
296
+ usage: `${binName} e2b <subcommand> [options]`,
297
+ description:
298
+ "Build Agent Swarm E2B templates and start API/worker sandboxes on demand for CI or Dockerless environments.",
299
+ options: [
300
+ " build-template --role api|worker Build or rebuild an E2B template",
301
+ " delete-template <template...> Delete E2B templates",
302
+ " publish-template <template...> Publish E2B templates",
303
+ " unpublish-template <template...> Make E2B templates private",
304
+ " start-api --template <name> Start the API in an E2B sandbox",
305
+ " start-worker --api-url <url> Start a worker against a public API URL",
306
+ " start-stack Start API plus one or more workers",
307
+ " list | kill <sandbox-id...> Inspect or clean up sandboxes",
308
+ " -h, --help Show this help",
309
+ ].join("\n"),
310
+ examples: [
311
+ ` ${binName} e2b build-template --role worker`,
312
+ ` ${binName} e2b start-worker --api-url https://swarm.example.com --api-key "$SWARM_API_KEY"`,
313
+ ` ${binName} e2b start-stack --workers 2 --api-key "$SWARM_API_KEY"`,
314
+ ].join("\n"),
315
+ },
291
316
  };
292
317
 
293
318
  function printHelp(command?: string) {
@@ -319,6 +344,7 @@ function printHelp(command?: string) {
319
344
  ["docs", "Open documentation (--open to launch in browser)"],
320
345
  ["codex-login", "Authenticate Codex via ChatGPT OAuth"],
321
346
  ["claude-managed-setup", "Bootstrap Anthropic Managed Agents (agent + env + skills)"],
347
+ ["e2b", "Build templates and start E2B API/worker sandboxes"],
322
348
  ["version", "Show version number"],
323
349
  ["help", "Show this help message"],
324
350
  ];
@@ -570,10 +596,20 @@ if (args.showHelp || args.command === "help" || args.command === undefined) {
570
596
  const { runCodexLogin } = await import("./commands/codex-login");
571
597
  const codexLoginArgs = process.argv.slice(process.argv.indexOf("codex-login") + 1);
572
598
  await runCodexLogin(codexLoginArgs);
599
+ } else if (args.command === "codex-session-runner") {
600
+ // Internal subcommand — invoked by CodexSubprocessSession to host a single
601
+ // codex session in a throwaway subprocess. See src/commands/codex-session-runner.ts
602
+ // for the rationale (Picateclas spawn-OOM permanent fix, 2026-05-28).
603
+ const { runCodexSessionRunner } = await import("./commands/codex-session-runner");
604
+ await runCodexSessionRunner();
573
605
  } else if (args.command === "claude-managed-setup") {
574
606
  const { runClaudeManagedSetup } = await import("./commands/claude-managed-setup");
575
607
  const setupArgs = process.argv.slice(process.argv.indexOf("claude-managed-setup") + 1);
576
608
  await runClaudeManagedSetup(setupArgs);
609
+ } else if (args.command === "e2b") {
610
+ const { runE2BCommand } = await import("./commands/e2b");
611
+ const e2bArgs = process.argv.slice(process.argv.indexOf("e2b") + 1);
612
+ await runE2BCommand(e2bArgs);
577
613
  } else {
578
614
  render(<App args={args} />);
579
615
  }
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Codex session subprocess runner.
3
+ *
4
+ * Entry point for the `codex-session-runner` CLI subcommand. Reads a
5
+ * `CodexSubprocessInput` payload from stdin, drives a fresh in-process
6
+ * `CodexSession`, and pipes the session's `ProviderEvent` stream + final
7
+ * `ProviderResult` back to its parent over stdout as line-delimited JSON.
8
+ *
9
+ * Why this exists: the previous architecture ran every codex session
10
+ * directly inside the long-lived worker runner. The `@openai/codex-sdk`
11
+ * leaks SDK state (parsers, transcript buffers, JSON-RPC plumbing) into
12
+ * the runner's heap, and after ~1,500 task completions on a hot worker
13
+ * (Picateclas, 2026-05-28) the runner's VSZ ballooned to 74 GB / RSS to
14
+ * 7.5 GB, causing every subsequent `fork()` to fail ENOMEM regardless of
15
+ * current RSS (the kernel reserves CoW for the full VSZ at fork time).
16
+ *
17
+ * Moving each session into its own subprocess means the SDK state dies
18
+ * with the subprocess. The runner stays at the ~234 MB baseline observed
19
+ * on Reviewer (the cohort partner that did 481 task completions without
20
+ * the OOM symptom). See task `fa0c0681` for the byte-by-byte breakdown.
21
+ *
22
+ * Wire protocol over stdout (one JSON object per line):
23
+ * {"kind":"event", "event": <ProviderEvent>}
24
+ * {"kind":"result", "result": <ProviderResult>}
25
+ * {"kind":"error", "message": "..."}
26
+ */
27
+
28
+ import { createInProcessCodexSession } from "../providers/codex-adapter";
29
+ import type { ProviderEvent, ProviderResult, ProviderSessionConfig } from "../providers/types";
30
+
31
+ interface CodexSubprocessInput {
32
+ config: ProviderSessionConfig;
33
+ skillsDir?: string;
34
+ parentOtelEnv?: Record<string, string>;
35
+ }
36
+
37
+ async function readAllStdin(): Promise<string> {
38
+ // Bun.stdin is a BunFile in some versions, Web stream in others.
39
+ // The safest path is to read the readable stream directly.
40
+ const decoder = new TextDecoder();
41
+ let out = "";
42
+ const stream = (Bun.stdin as unknown as { stream?: () => ReadableStream<Uint8Array> }).stream
43
+ ? (Bun.stdin as unknown as { stream: () => ReadableStream<Uint8Array> }).stream()
44
+ : null;
45
+ if (stream) {
46
+ const reader = stream.getReader();
47
+ while (true) {
48
+ const { done, value } = await reader.read();
49
+ if (done) break;
50
+ if (value) out += decoder.decode(value, { stream: true });
51
+ }
52
+ out += decoder.decode();
53
+ return out;
54
+ }
55
+ // Fallback: read via Bun.file (file-like access works for piped stdin too)
56
+ return await Bun.file("/dev/stdin").text();
57
+ }
58
+
59
+ function writeLine(obj: unknown): void {
60
+ process.stdout.write(`${JSON.stringify(obj)}\n`);
61
+ }
62
+
63
+ export async function runCodexSessionRunner(): Promise<void> {
64
+ try {
65
+ await runCodexSessionRunnerInner();
66
+ } catch (err) {
67
+ const message = err instanceof Error ? err.message : String(err);
68
+ const stack = err instanceof Error ? err.stack : undefined;
69
+ console.error(`[codex-session-runner] top-level crash: ${message}`);
70
+ if (stack) console.error(stack);
71
+ writeLine({ kind: "error", message: `codex-session-runner: unexpected crash: ${message}` });
72
+ process.exit(1);
73
+ }
74
+ }
75
+
76
+ async function runCodexSessionRunnerInner(): Promise<void> {
77
+ let input: CodexSubprocessInput;
78
+ try {
79
+ const raw = await readAllStdin();
80
+ input = JSON.parse(raw) as CodexSubprocessInput;
81
+ } catch (err) {
82
+ const message = err instanceof Error ? err.message : String(err);
83
+ console.error(`[codex-session-runner] stdin parse failed: ${message}`);
84
+ writeLine({
85
+ kind: "error",
86
+ message: `codex-session-runner: failed to parse stdin: ${message}`,
87
+ });
88
+ process.exit(1);
89
+ }
90
+
91
+ // Forward the parent's captured OTel TRACEPARENT (and friends) into the
92
+ // session config's env so the spawned Codex CLI nests its spans under our
93
+ // worker.session trace. We deliberately do NOT call
94
+ // `buildOtelTraceparentEnv` from inside this subprocess — its tracer has
95
+ // no active span, so it would emit nothing.
96
+ if (input.parentOtelEnv && Object.keys(input.parentOtelEnv).length > 0) {
97
+ input.config.env = { ...(input.config.env ?? {}), ...input.parentOtelEnv };
98
+ }
99
+
100
+ let session: Awaited<ReturnType<typeof createInProcessCodexSession>>;
101
+ try {
102
+ session = await createInProcessCodexSession(input.config, {
103
+ skillsDir: input.skillsDir,
104
+ });
105
+ } catch (err) {
106
+ const message = err instanceof Error ? err.message : String(err);
107
+ console.error(`[codex-session-runner] createSession failed: ${message}`);
108
+ writeLine({ kind: "error", message: `codex-session-runner: createSession failed: ${message}` });
109
+ process.exit(1);
110
+ }
111
+
112
+ // Forward SIGTERM / SIGINT to the in-process session so the runner can
113
+ // gracefully cancel us. The parent `CodexSubprocessSession.abort()` sends
114
+ // SIGTERM here; the session's AbortController catches it and the codex
115
+ // CLI subprocess (a grandchild) gets cleaned up.
116
+ const onSignal = (signal: NodeJS.Signals) => {
117
+ void session.abort().finally(() => {
118
+ // give the session a beat to emit its cancellation result, then exit
119
+ setTimeout(() => process.exit(signal === "SIGINT" ? 130 : 143), 250);
120
+ });
121
+ };
122
+ process.on("SIGTERM", () => onSignal("SIGTERM"));
123
+ process.on("SIGINT", () => onSignal("SIGINT"));
124
+
125
+ session.onEvent((event: ProviderEvent) => {
126
+ writeLine({ kind: "event", event });
127
+ });
128
+
129
+ const result: ProviderResult = await session.waitForCompletion();
130
+ writeLine({ kind: "result", result });
131
+ process.exit(result.exitCode ?? 0);
132
+ }
@@ -11,6 +11,8 @@
11
11
  * resumes (see swarm memory sigterm-143-resumed-session-context-saturation-2026-05-13).
12
12
  */
13
13
 
14
+ import { scrubSecrets } from "../utils/secret-scrubber";
15
+
14
16
  export const CONTEXT_PREAMBLE_MAX_TOKENS = Number(
15
17
  process.env.CONTEXT_PREAMBLE_MAX_TOKENS || "2000",
16
18
  );
@@ -18,12 +20,25 @@ export const CONTEXT_PREAMBLE_MAX_TOKENS = Number(
18
20
  export const CONTEXT_PREAMBLE_MAX_CHARS = CONTEXT_PREAMBLE_MAX_TOKENS * 4;
19
21
  export const CONTEXT_PREAMBLE_MAX_ANCESTORS = 5;
20
22
 
23
+ /**
24
+ * Token budget for the resume-task preamble. Default 4000 = 2× the regular
25
+ * preamble, since the resume agent needs the original task brief verbatim
26
+ * plus a tool-call summary to avoid redoing completed work.
27
+ */
28
+ export const CONTEXT_PREAMBLE_RESUME_MAX_TOKENS = Number(
29
+ process.env.CONTEXT_PREAMBLE_RESUME_MAX_TOKENS || "4000",
30
+ );
31
+ export const CONTEXT_PREAMBLE_RESUME_MAX_CHARS = CONTEXT_PREAMBLE_RESUME_MAX_TOKENS * 4;
32
+ /** How many of the most recent session_logs rows to inspect for tool-call summary. */
33
+ export const CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT = 50;
34
+
21
35
  export interface TaskContextForPreamble {
22
36
  id: string;
23
37
  task: string;
24
38
  output?: string;
25
39
  progress?: string;
26
40
  status?: string;
41
+ taskType?: string;
27
42
  parentTaskId?: string;
28
43
  attachments?: Array<{
29
44
  kind: string;
@@ -57,6 +72,7 @@ export async function fetchTaskContextForPreamble(
57
72
  output: data.output,
58
73
  progress: data.progress,
59
74
  status: data.status,
75
+ taskType: data.taskType,
60
76
  parentTaskId: data.parentTaskId,
61
77
  attachments: data.attachments,
62
78
  };
@@ -176,3 +192,259 @@ export async function buildContextPreamble(
176
192
 
177
193
  return preamble;
178
194
  }
195
+
196
+ // ─── Resume Preamble ───────────────────────────────────────────────────────────
197
+
198
+ interface SessionLogForPreamble {
199
+ id: string;
200
+ taskId?: string;
201
+ sessionId: string;
202
+ iteration: number;
203
+ cli: string;
204
+ content: string;
205
+ lineNumber: number;
206
+ createdAt: string;
207
+ }
208
+
209
+ async function fetchSessionLogsForResume(
210
+ apiUrl: string,
211
+ apiKey: string,
212
+ taskId: string,
213
+ ): Promise<SessionLogForPreamble[]> {
214
+ const headers: Record<string, string> = {};
215
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
216
+ try {
217
+ // Bound server-side: long-running parents can accumulate large `session_logs`
218
+ // and the preamble only consumes the tail (see CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT).
219
+ // Passing `?limit=N` keeps dispatch fast and memory-flat regardless of run length.
220
+ const url = `${apiUrl}/api/tasks/${taskId}/session-logs?limit=${CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT}`;
221
+ const response = await fetch(url, { headers });
222
+ if (!response.ok) return [];
223
+ const data = (await response.json()) as { logs?: SessionLogForPreamble[] };
224
+ return Array.isArray(data.logs) ? data.logs : [];
225
+ } catch {
226
+ return [];
227
+ }
228
+ }
229
+
230
+ /**
231
+ * Format a single session_log line as a one-line tool-call summary. Falls back
232
+ * to a truncated content snippet when the line isn't recognizable as a
233
+ * tool call. The returned text is passed through `scrubSecrets` before
234
+ * insertion into the preamble (no secrets in /workspace/logs/*.jsonl).
235
+ */
236
+ function summarizeSessionLogLine(line: SessionLogForPreamble): string | null {
237
+ const ts = line.createdAt.slice(11, 19); // HH:MM:SS
238
+ let parsed: unknown;
239
+ try {
240
+ parsed = JSON.parse(line.content);
241
+ } catch {
242
+ const snippet = line.content.replace(/\s+/g, " ").slice(0, 120);
243
+ return snippet ? `[${ts}] ${snippet}` : null;
244
+ }
245
+ if (!parsed || typeof parsed !== "object") return null;
246
+ const obj = parsed as Record<string, unknown>;
247
+
248
+ // Anthropic / claude message-style tool calls.
249
+ const message = obj.message as Record<string, unknown> | undefined;
250
+ const content = message?.content;
251
+ if (Array.isArray(content)) {
252
+ for (const block of content) {
253
+ if (!block || typeof block !== "object") continue;
254
+ const b = block as Record<string, unknown>;
255
+ if (b.type === "tool_use" && typeof b.name === "string") {
256
+ const input = b.input as Record<string, unknown> | undefined;
257
+ const file = input?.file_path ?? input?.path ?? input?.command;
258
+ const fileStr = typeof file === "string" ? ` ${file}` : "";
259
+ return `[${ts}] ${b.name}${fileStr}`;
260
+ }
261
+ }
262
+ }
263
+
264
+ // Codex / generic event-style: { type: 'tool_use', name: '...', input: {...} }
265
+ if (obj.type === "tool_use" && typeof obj.name === "string") {
266
+ const input = obj.input as Record<string, unknown> | undefined;
267
+ const file = input?.file_path ?? input?.path ?? input?.command;
268
+ const fileStr = typeof file === "string" ? ` ${file}` : "";
269
+ return `[${ts}] ${obj.name}${fileStr}`;
270
+ }
271
+
272
+ // Fallback: short content snippet (still useful for diff/insight)
273
+ const snippet = JSON.stringify(parsed).replace(/\s+/g, " ").slice(0, 120);
274
+ return snippet ? `[${ts}] ${snippet}` : null;
275
+ }
276
+
277
+ /**
278
+ * Build a resume-task preamble.
279
+ *
280
+ * Reads the parent task + its recent session_logs over HTTP (never touches
281
+ * `bun:sqlite` worker-side). Allocates the 4000-token budget:
282
+ *
283
+ * - 40% — full parent task description (never truncated)
284
+ * - 35% — last-N session_logs summary (tool-call one-liners; scrubbed)
285
+ * - 15% — artifacts/attachments index (names + pointers only)
286
+ * - 10% — fixed framing (header + continuation instructions)
287
+ *
288
+ * Truncation order: session-log summary (oldest first), then artifacts.
289
+ * The task description is never truncated.
290
+ */
291
+ /**
292
+ * Walk up the parentTaskId chain through `taskType === "resume"` ancestors
293
+ * to find the original (non-resume) task. Returns the chain in order
294
+ * [immediateParent, ..., original]. Caps at MAX_RESUME_CHAIN_DEPTH to
295
+ * defend against cycles or runaway chains.
296
+ *
297
+ * PR #594 review: cascading resumes (original → resume1 → resume2) had
298
+ * `buildResumeContextPreamble` fetching only the immediate parent — whose
299
+ * `task` text is the synthetic "Resume interrupted task..." prompt rather
300
+ * than the original work brief. Walking the chain restores the original
301
+ * description and lets us merge session logs from all resume attempts.
302
+ */
303
+ const MAX_RESUME_CHAIN_DEPTH = 10;
304
+
305
+ async function walkResumeChain(
306
+ apiUrl: string,
307
+ apiKey: string,
308
+ immediateParentId: string,
309
+ ): Promise<TaskContextForPreamble[]> {
310
+ const chain: TaskContextForPreamble[] = [];
311
+ let currentId: string | undefined = immediateParentId;
312
+ for (let depth = 0; depth < MAX_RESUME_CHAIN_DEPTH && currentId; depth++) {
313
+ const ctx: TaskContextForPreamble | null = await fetchTaskContextForPreamble(
314
+ apiUrl,
315
+ apiKey,
316
+ currentId,
317
+ );
318
+ if (!ctx) break;
319
+ chain.push(ctx);
320
+ // Stop once we hit a non-resume ancestor — that's the original work.
321
+ if (ctx.taskType !== "resume") break;
322
+ currentId = ctx.parentTaskId;
323
+ }
324
+ return chain;
325
+ }
326
+
327
+ export async function buildResumeContextPreamble(
328
+ apiUrl: string,
329
+ apiKey: string,
330
+ parentTaskId: string,
331
+ ): Promise<string | null> {
332
+ const chain = await walkResumeChain(apiUrl, apiKey, parentTaskId);
333
+ if (chain.length === 0) return null;
334
+ // Original = last entry (non-resume ancestor, or the deepest reachable
335
+ // if the chain exceeds the depth cap or hits a fetch failure).
336
+ const original = chain[chain.length - 1] ?? chain[0];
337
+ if (!original) return null;
338
+ // Immediate parent — its attachments are the most recent "in flight" set.
339
+ const parent = chain[0] ?? original;
340
+
341
+ // Fetch session logs from EVERY chain member so a re-superseded resume
342
+ // still surfaces tool-call history from earlier attempts. Merge, sort by
343
+ // createdAt ASC, then keep the most recent N.
344
+ const logsBatches = await Promise.all(
345
+ chain.map((c) => fetchSessionLogsForResume(apiUrl, apiKey, c.id)),
346
+ );
347
+ const merged = logsBatches.flat();
348
+ merged.sort((a, b) => a.createdAt.localeCompare(b.createdAt));
349
+ const recentLogs = merged.slice(-CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT);
350
+
351
+ const descBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.4);
352
+ let logsBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.35);
353
+ let artBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.15);
354
+
355
+ const header = [
356
+ "\n---",
357
+ "## Resuming Interrupted Task",
358
+ "",
359
+ "This task is a fresh-session continuation of an interrupted task (graceful",
360
+ "shutdown / context-limit / operator action). The block below summarizes the",
361
+ "original task, what was done so far, and the artifacts in flight.",
362
+ "",
363
+ "**Do not redo work already completed below — extend it.**",
364
+ "",
365
+ `Original task ID: \`${original.id}\``,
366
+ chain.length > 1
367
+ ? `Resume chain depth: ${chain.length} (this is at least the ${
368
+ chain.length === 2 ? "2nd" : chain.length === 3 ? "3rd" : `${chain.length}th`
369
+ } resume attempt).`
370
+ : "",
371
+ "",
372
+ "---",
373
+ "",
374
+ "### Original Task Description",
375
+ "",
376
+ ]
377
+ .filter((s) => s !== "")
378
+ .join("\n");
379
+
380
+ // 40% — full description (never truncated). Pulled from the ORIGINAL
381
+ // (non-resume) ancestor so cascading resumes don't read each other's
382
+ // synthetic "Resume interrupted task..." preamble bodies (PR #594 review).
383
+ const descSection = original.task;
384
+
385
+ // 35% — session-log summary (tool-call lines)
386
+ const summaryLines: string[] = [];
387
+ for (const line of recentLogs) {
388
+ const summary = summarizeSessionLogLine(line);
389
+ if (!summary) continue;
390
+ summaryLines.push(summary);
391
+ }
392
+ // Scrub secrets BEFORE budget enforcement so secret strings don't get
393
+ // sliced into half-redactions mid-truncate.
394
+ const scrubbedSummary = summaryLines.map((s) => scrubSecrets(s));
395
+ let logsSection = scrubbedSummary.join("\n");
396
+ // FIFO truncate (drop oldest first) until under budget.
397
+ // We use `Math.max(0, descBudget - descSection.length)` slack adjustment so
398
+ // an oversized description doesn't starve the logs section entirely.
399
+ if (descSection.length > descBudget) {
400
+ const overflow = descSection.length - descBudget;
401
+ logsBudget = Math.max(0, logsBudget - Math.ceil(overflow / 2));
402
+ artBudget = Math.max(0, artBudget - Math.floor(overflow / 2));
403
+ }
404
+ while (logsSection.length > logsBudget && scrubbedSummary.length > 0) {
405
+ scrubbedSummary.shift();
406
+ logsSection = scrubbedSummary.join("\n");
407
+ }
408
+
409
+ // 15% — artifacts (names + pointers only)
410
+ const atts = parent.attachments?.filter((a) => a.name && (a.url || a.path || a.pageId)) ?? [];
411
+ const artLines: string[] = [];
412
+ for (const att of atts) {
413
+ const pointer = formatAttachmentPointer(att);
414
+ artLines.push(` - **${att.name}**: \`${pointer}\``);
415
+ }
416
+ let artSection = artLines.join("\n");
417
+ while (artSection.length > artBudget && artLines.length > 0) {
418
+ artLines.pop();
419
+ artSection = artLines.join("\n");
420
+ }
421
+
422
+ const sections: string[] = [header, descSection, ""];
423
+
424
+ if (logsSection) {
425
+ sections.push("### Recent Tool Calls", "", logsSection, "");
426
+ }
427
+
428
+ if (artSection) {
429
+ sections.push("### Artifacts In Flight", "", artSection, "");
430
+ }
431
+
432
+ sections.push(
433
+ "---",
434
+ "",
435
+ `To review the full prior session call \`get-task-details\` with taskId \`${original.id}\`.`,
436
+ "",
437
+ "---",
438
+ "",
439
+ );
440
+
441
+ let preamble = sections.join("\n");
442
+
443
+ // Final hard cap — should rarely trip given the per-section budgets above,
444
+ // but provides a safety net for very long descriptions.
445
+ if (preamble.length > CONTEXT_PREAMBLE_RESUME_MAX_CHARS) {
446
+ preamble = `${preamble.slice(0, CONTEXT_PREAMBLE_RESUME_MAX_CHARS)}\n\n[resume preamble truncated to ${CONTEXT_PREAMBLE_RESUME_MAX_TOKENS}-token budget]\n\n---\n`;
447
+ }
448
+
449
+ return preamble;
450
+ }
@@ -133,7 +133,7 @@ export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<C
133
133
 
134
134
  // Fast path: already satisfied at boot.
135
135
  let currentProvider = readProvider();
136
- let status = checkProviderCredentials(currentProvider, initialEnv, opts.credCheckOptions);
136
+ let status = await checkProviderCredentials(currentProvider, initialEnv, opts.credCheckOptions);
137
137
  if (status.ready) {
138
138
  log(
139
139
  `[boot] credentials ready (provider=${currentProvider}, satisfiedBy=${status.satisfiedBy})`,
@@ -183,7 +183,7 @@ export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<C
183
183
  currentProvider = nextProvider;
184
184
  }
185
185
 
186
- status = checkProviderCredentials(currentProvider, process.env, opts.credCheckOptions);
186
+ status = await checkProviderCredentials(currentProvider, process.env, opts.credCheckOptions);
187
187
 
188
188
  if (!status.ready) {
189
189
  // Exponential backoff with cap.