@desplega.ai/agent-swarm 1.85.0 → 1.87.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/openapi.json +72 -1
- package/package.json +10 -6
- package/src/be/db-queries/tracker.ts +21 -0
- package/src/be/db.ts +279 -14
- package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
- package/src/be/migrations/079_task_followup_config.sql +1 -0
- package/src/be/modelsdev-cache.json +155618 -0
- package/src/be/modelsdev-cache.ts +46 -0
- package/src/be/seed-pricing.ts +7 -44
- package/src/cli.tsx +38 -2
- package/src/commands/codex-session-runner.ts +132 -0
- package/src/commands/context-preamble.ts +272 -0
- package/src/commands/credential-wait.ts +2 -2
- package/src/commands/e2b.ts +728 -0
- package/src/commands/provider-credentials.ts +10 -5
- package/src/commands/resume-session.ts +35 -78
- package/src/commands/runner.ts +128 -16
- package/src/e2b/dispatch.ts +429 -0
- package/src/e2b/env.ts +206 -0
- package/src/heartbeat/heartbeat.ts +145 -30
- package/src/heartbeat/templates.ts +11 -7
- package/src/http/session-data.ts +8 -1
- package/src/http/tasks.ts +152 -3
- package/src/jira/sync.ts +4 -4
- package/src/linear/sync.ts +6 -5
- package/src/prompts/base-prompt.ts +49 -3
- package/src/providers/claude-adapter.ts +76 -61
- package/src/providers/claude-managed-adapter.ts +61 -75
- package/src/providers/claude-managed-models.ts +18 -2
- package/src/providers/codex-adapter.ts +429 -112
- package/src/providers/codex-models.ts +9 -2
- package/src/providers/codex-oauth/auth-json.ts +18 -1
- package/src/providers/codex-oauth/flow.ts +24 -1
- package/src/providers/index.ts +28 -19
- package/src/providers/pricing-sources.md +7 -4
- package/src/providers/swarm-events-shared.ts +14 -0
- package/src/providers/types.ts +6 -0
- package/src/slack/HEURISTICS.md +5 -1
- package/src/slack/handlers.test.ts +35 -0
- package/src/slack/handlers.ts +79 -2
- package/src/tasks/worker-follow-up.ts +162 -2
- package/src/telemetry.ts +11 -1
- package/src/tests/base-prompt.test.ts +46 -8
- package/src/tests/claude-adapter.test.ts +5 -27
- package/src/tests/claude-managed-adapter.test.ts +42 -56
- package/src/tests/codex-adapter-otel.test.ts +4 -4
- package/src/tests/codex-adapter.test.ts +25 -37
- package/src/tests/codex-oauth.test.ts +149 -3
- package/src/tests/codex-pool.test.ts +14 -3
- package/src/tests/codex-swarm-events.test.ts +35 -0
- package/src/tests/context-window.test.ts +1 -0
- package/src/tests/credential-check.test.ts +48 -29
- package/src/tests/e2b-dispatch.test.ts +330 -0
- package/src/tests/entrypoint-config-env-export.test.ts +81 -0
- package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
- package/src/tests/heartbeat-supersede-resume.test.ts +285 -0
- package/src/tests/heartbeat.test.ts +26 -16
- package/src/tests/migration-046-budgets.test.ts +6 -5
- package/src/tests/pricing-routes.test.ts +6 -5
- package/src/tests/prompt-template-remaining.test.ts +4 -0
- package/src/tests/provider-adapter.test.ts +10 -10
- package/src/tests/provider-command-format.test.ts +4 -4
- package/src/tests/resume-session.test.ts +42 -50
- package/src/tests/session-costs-codex-recompute.test.ts +25 -0
- package/src/tests/structured-output.test.ts +69 -0
- package/src/tests/task-completion-idempotency.test.ts +185 -2
- package/src/tests/task-supersede-resume.test.ts +722 -0
- package/src/tests/telemetry-init.test.ts +69 -0
- package/src/tests/vcs-tracking.test.ts +39 -0
- package/src/tools/send-task.ts +42 -10
- package/src/tools/store-progress.ts +2 -2
- package/src/tools/templates.ts +14 -2
- package/src/types.ts +46 -1
- package/src/utils/context-window.ts +1 -0
- package/src/workflows/executors/agent-task.ts +3 -0
- package/templates/schedules/daily-blocker-digest/config.json +13 -0
- package/templates/schedules/daily-blocker-digest/content.md +150 -0
- package/templates/schedules/daily-compounding-reflection/config.json +21 -0
- package/templates/schedules/daily-compounding-reflection/content.md +210 -0
- package/templates/schedules/daily-hn-briefing/config.json +13 -0
- package/templates/schedules/daily-hn-briefing/content.md +97 -0
- package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
- package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
- package/templates/schedules/gtm-weekly-review/config.json +13 -0
- package/templates/schedules/gtm-weekly-review/content.md +58 -0
- package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
- package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
- package/templates/schema.ts +26 -0
- package/templates/skills/agentmail-sending/config.json +13 -0
- package/templates/skills/agentmail-sending/content.md +48 -0
- package/templates/skills/artifacts/config.json +13 -0
- package/templates/skills/artifacts/content.md +87 -0
- package/templates/skills/browser-use-cloud/config.json +13 -0
- package/templates/skills/browser-use-cloud/content.md +155 -0
- package/templates/skills/desloppify/config.json +13 -0
- package/templates/skills/desloppify/content.md +201 -0
- package/templates/skills/exa-search/config.json +13 -0
- package/templates/skills/exa-search/content.md +106 -0
- package/templates/skills/jira-interaction/config.json +13 -0
- package/templates/skills/jira-interaction/content.md +252 -0
- package/templates/skills/kapso-whatsapp/config.json +13 -0
- package/templates/skills/kapso-whatsapp/content.md +369 -0
- package/templates/skills/kv-storage/config.json +13 -0
- package/templates/skills/kv-storage/content.md +111 -0
- package/templates/skills/linear-interaction/config.json +20 -0
- package/templates/skills/linear-interaction/content.md +230 -0
- package/templates/skills/pages/config.json +18 -0
- package/templates/skills/pages/content.md +85 -0
- package/templates/skills/profile-corruption-escalation/config.json +13 -0
- package/templates/skills/profile-corruption-escalation/content.md +105 -0
- package/templates/skills/scheduled-task-resilience/config.json +13 -0
- package/templates/skills/scheduled-task-resilience/content.md +95 -0
- package/templates/skills/sprite-cli/config.json +13 -0
- package/templates/skills/sprite-cli/content.md +133 -0
- package/templates/skills/turso-interaction/config.json +13 -0
- package/templates/skills/turso-interaction/content.md +192 -0
- package/templates/skills/workflow-iterate/config.json +18 -0
- package/templates/skills/workflow-iterate/content.md +399 -0
- package/templates/skills/workflow-structured-output/config.json +13 -0
- package/templates/skills/workflow-structured-output/content.md +101 -0
- package/templates/skills/x-api-interactions/config.json +13 -0
- package/templates/skills/x-api-interactions/content.md +109 -0
- package/templates/workflows/autopilot/config.json +13 -0
- package/templates/workflows/autopilot/content.md +58 -0
- package/templates/workflows/linear-drain-loop/config.json +21 -0
- package/templates/workflows/linear-drain-loop/content.md +72 -0
- package/templates/workflows/ralph-loop/config.json +13 -0
- package/templates/workflows/ralph-loop/content.md +75 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
export interface ModelsDevCostBlock {
|
|
5
|
+
input?: number;
|
|
6
|
+
output?: number;
|
|
7
|
+
cache_read?: number;
|
|
8
|
+
cache_write?: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface ModelsDevModel {
|
|
12
|
+
id?: string;
|
|
13
|
+
cost?: ModelsDevCostBlock;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface ModelsDevProvider {
|
|
17
|
+
models?: Record<string, ModelsDevModel>;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export type ModelsDevCache = Record<string, ModelsDevProvider>;
|
|
21
|
+
|
|
22
|
+
export const MODELSDEV_CACHE_PATH = path.join("src", "be", "modelsdev-cache.json");
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Resolve the vendored models.dev cache from source checkouts and compiled
|
|
26
|
+
* Docker images. The API image copies the snapshot to `/app/src/be/...`.
|
|
27
|
+
*/
|
|
28
|
+
export function loadModelsDevCache(): ModelsDevCache | null {
|
|
29
|
+
const explicitPath = process.env.MODELSDEV_CACHE_PATH;
|
|
30
|
+
const candidates = [
|
|
31
|
+
...(explicitPath ? [explicitPath] : []),
|
|
32
|
+
path.join(process.cwd(), MODELSDEV_CACHE_PATH),
|
|
33
|
+
path.join(process.cwd(), "..", MODELSDEV_CACHE_PATH),
|
|
34
|
+
path.join("/app", MODELSDEV_CACHE_PATH),
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
for (const candidate of candidates) {
|
|
38
|
+
try {
|
|
39
|
+
return JSON.parse(readFileSync(candidate, "utf-8")) as ModelsDevCache;
|
|
40
|
+
} catch {
|
|
41
|
+
// try next candidate
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return null;
|
|
46
|
+
}
|
package/src/be/seed-pricing.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Phase 2 of the cost-tracking plan — seed the `pricing` table at server boot.
|
|
3
3
|
*
|
|
4
|
-
* The vendored models.dev snapshot at `
|
|
4
|
+
* The vendored models.dev snapshot at `src/be/modelsdev-cache.json` is the
|
|
5
5
|
* single source of truth for per-token rates. We project it into rows keyed by
|
|
6
6
|
* `(provider, model, token_class)` so the recompute path in
|
|
7
7
|
* `src/http/session-data.ts` can rebuild USD from tokens regardless of which
|
|
@@ -17,30 +17,15 @@
|
|
|
17
17
|
* admin route (`POST /api/pricing`) — we don't overwrite seed rows.
|
|
18
18
|
*/
|
|
19
19
|
|
|
20
|
-
import { readFileSync } from "node:fs";
|
|
21
|
-
import path from "node:path";
|
|
22
20
|
import type { PricingProvider, PricingTokenClass } from "../types";
|
|
23
21
|
import { getDb } from "./db";
|
|
22
|
+
import {
|
|
23
|
+
loadModelsDevCache,
|
|
24
|
+
type ModelsDevCache,
|
|
25
|
+
type ModelsDevCostBlock,
|
|
26
|
+
} from "./modelsdev-cache";
|
|
24
27
|
import { normalizeModelKey } from "./pricing-normalize";
|
|
25
28
|
|
|
26
|
-
interface ModelsDevCostBlock {
|
|
27
|
-
input?: number;
|
|
28
|
-
output?: number;
|
|
29
|
-
cache_read?: number;
|
|
30
|
-
cache_write?: number;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
interface ModelsDevModel {
|
|
34
|
-
id?: string;
|
|
35
|
-
cost?: ModelsDevCostBlock;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
interface ModelsDevProvider {
|
|
39
|
-
models?: Record<string, ModelsDevModel>;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
type ModelsDevCache = Record<string, ModelsDevProvider>;
|
|
43
|
-
|
|
44
29
|
/**
|
|
45
30
|
* Per-harness manual rates that models.dev doesn't carry. Keep the source URL
|
|
46
31
|
* and a verification date next to each entry so {@link MANUAL_PRICING_OVERRIDES}
|
|
@@ -82,33 +67,11 @@ const MANUAL_PRICING_OVERRIDES: Array<{
|
|
|
82
67
|
* fields the models.dev snapshot doesn't index directly; we map them here.
|
|
83
68
|
*/
|
|
84
69
|
const ANTHROPIC_SHORTNAME_TO_MODELSDEV: Record<string, string> = {
|
|
85
|
-
opus: "claude-opus-4-
|
|
70
|
+
opus: "claude-opus-4-8",
|
|
86
71
|
sonnet: "claude-sonnet-4-6",
|
|
87
72
|
haiku: "claude-haiku-4-5",
|
|
88
73
|
};
|
|
89
74
|
|
|
90
|
-
/**
|
|
91
|
-
* Resolve the path to the vendored models.dev cache. The UI copy is canonical.
|
|
92
|
-
* We treat this as best-effort: if the file is missing (developer ran the
|
|
93
|
-
* server without `ui/` checked out), we log and continue with manual rates
|
|
94
|
-
* only — better than crashing the boot.
|
|
95
|
-
*/
|
|
96
|
-
function loadModelsDevCache(): ModelsDevCache | null {
|
|
97
|
-
const candidates = [
|
|
98
|
-
path.join(process.cwd(), "ui", "src", "lib", "modelsdev-cache.json"),
|
|
99
|
-
path.join(process.cwd(), "..", "ui", "src", "lib", "modelsdev-cache.json"),
|
|
100
|
-
];
|
|
101
|
-
for (const cand of candidates) {
|
|
102
|
-
try {
|
|
103
|
-
const raw = readFileSync(cand, "utf-8");
|
|
104
|
-
return JSON.parse(raw) as ModelsDevCache;
|
|
105
|
-
} catch {
|
|
106
|
-
// try next candidate
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
return null;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
75
|
interface PricingSeedRow {
|
|
113
76
|
provider: PricingProvider;
|
|
114
77
|
model: string;
|
package/src/cli.tsx
CHANGED
|
@@ -16,8 +16,12 @@ import { getApiKey, setApiKey } from "./utils/api-key.ts";
|
|
|
16
16
|
// Get CLI name from bin field (assumes single key)
|
|
17
17
|
const binName = Object.keys(pkg.bin)[0];
|
|
18
18
|
|
|
19
|
-
// Restore cursor on exit
|
|
20
|
-
|
|
19
|
+
// Restore cursor on exit — only when stdout is a TTY. Non-TTY invocations
|
|
20
|
+
// (like the codex-session-runner subprocess whose stdout is a JSON pipe)
|
|
21
|
+
// must not inject terminal escape sequences into the byte stream.
|
|
22
|
+
const restoreCursor = () => {
|
|
23
|
+
if (process.stdout.isTTY) process.stdout.write("\x1B[?25h");
|
|
24
|
+
};
|
|
21
25
|
process.on("exit", restoreCursor);
|
|
22
26
|
process.on("SIGINT", () => {
|
|
23
27
|
restoreCursor();
|
|
@@ -288,6 +292,27 @@ const COMMAND_HELP: Record<
|
|
|
288
292
|
` ${binName} claude-managed-setup --api-url https://swarm.example.com`,
|
|
289
293
|
].join("\n"),
|
|
290
294
|
},
|
|
295
|
+
e2b: {
|
|
296
|
+
usage: `${binName} e2b <subcommand> [options]`,
|
|
297
|
+
description:
|
|
298
|
+
"Build Agent Swarm E2B templates and start API/worker sandboxes on demand for CI or Dockerless environments.",
|
|
299
|
+
options: [
|
|
300
|
+
" build-template --role api|worker Build or rebuild an E2B template",
|
|
301
|
+
" delete-template <template...> Delete E2B templates",
|
|
302
|
+
" publish-template <template...> Publish E2B templates",
|
|
303
|
+
" unpublish-template <template...> Make E2B templates private",
|
|
304
|
+
" start-api --template <name> Start the API in an E2B sandbox",
|
|
305
|
+
" start-worker --api-url <url> Start a worker against a public API URL",
|
|
306
|
+
" start-stack Start API plus one or more workers",
|
|
307
|
+
" list | kill <sandbox-id...> Inspect or clean up sandboxes",
|
|
308
|
+
" -h, --help Show this help",
|
|
309
|
+
].join("\n"),
|
|
310
|
+
examples: [
|
|
311
|
+
` ${binName} e2b build-template --role worker`,
|
|
312
|
+
` ${binName} e2b start-worker --api-url https://swarm.example.com --api-key "$SWARM_API_KEY"`,
|
|
313
|
+
` ${binName} e2b start-stack --workers 2 --api-key "$SWARM_API_KEY"`,
|
|
314
|
+
].join("\n"),
|
|
315
|
+
},
|
|
291
316
|
};
|
|
292
317
|
|
|
293
318
|
function printHelp(command?: string) {
|
|
@@ -319,6 +344,7 @@ function printHelp(command?: string) {
|
|
|
319
344
|
["docs", "Open documentation (--open to launch in browser)"],
|
|
320
345
|
["codex-login", "Authenticate Codex via ChatGPT OAuth"],
|
|
321
346
|
["claude-managed-setup", "Bootstrap Anthropic Managed Agents (agent + env + skills)"],
|
|
347
|
+
["e2b", "Build templates and start E2B API/worker sandboxes"],
|
|
322
348
|
["version", "Show version number"],
|
|
323
349
|
["help", "Show this help message"],
|
|
324
350
|
];
|
|
@@ -570,10 +596,20 @@ if (args.showHelp || args.command === "help" || args.command === undefined) {
|
|
|
570
596
|
const { runCodexLogin } = await import("./commands/codex-login");
|
|
571
597
|
const codexLoginArgs = process.argv.slice(process.argv.indexOf("codex-login") + 1);
|
|
572
598
|
await runCodexLogin(codexLoginArgs);
|
|
599
|
+
} else if (args.command === "codex-session-runner") {
|
|
600
|
+
// Internal subcommand — invoked by CodexSubprocessSession to host a single
|
|
601
|
+
// codex session in a throwaway subprocess. See src/commands/codex-session-runner.ts
|
|
602
|
+
// for the rationale (Picateclas spawn-OOM permanent fix, 2026-05-28).
|
|
603
|
+
const { runCodexSessionRunner } = await import("./commands/codex-session-runner");
|
|
604
|
+
await runCodexSessionRunner();
|
|
573
605
|
} else if (args.command === "claude-managed-setup") {
|
|
574
606
|
const { runClaudeManagedSetup } = await import("./commands/claude-managed-setup");
|
|
575
607
|
const setupArgs = process.argv.slice(process.argv.indexOf("claude-managed-setup") + 1);
|
|
576
608
|
await runClaudeManagedSetup(setupArgs);
|
|
609
|
+
} else if (args.command === "e2b") {
|
|
610
|
+
const { runE2BCommand } = await import("./commands/e2b");
|
|
611
|
+
const e2bArgs = process.argv.slice(process.argv.indexOf("e2b") + 1);
|
|
612
|
+
await runE2BCommand(e2bArgs);
|
|
577
613
|
} else {
|
|
578
614
|
render(<App args={args} />);
|
|
579
615
|
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Codex session subprocess runner.
|
|
3
|
+
*
|
|
4
|
+
* Entry point for the `codex-session-runner` CLI subcommand. Reads a
|
|
5
|
+
* `CodexSubprocessInput` payload from stdin, drives a fresh in-process
|
|
6
|
+
* `CodexSession`, and pipes the session's `ProviderEvent` stream + final
|
|
7
|
+
* `ProviderResult` back to its parent over stdout as line-delimited JSON.
|
|
8
|
+
*
|
|
9
|
+
* Why this exists: the previous architecture ran every codex session
|
|
10
|
+
* directly inside the long-lived worker runner. The `@openai/codex-sdk`
|
|
11
|
+
* leaks SDK state (parsers, transcript buffers, JSON-RPC plumbing) into
|
|
12
|
+
* the runner's heap, and after ~1,500 task completions on a hot worker
|
|
13
|
+
* (Picateclas, 2026-05-28) the runner's VSZ ballooned to 74 GB / RSS to
|
|
14
|
+
* 7.5 GB, causing every subsequent `fork()` to fail ENOMEM regardless of
|
|
15
|
+
* current RSS (the kernel reserves CoW for the full VSZ at fork time).
|
|
16
|
+
*
|
|
17
|
+
* Moving each session into its own subprocess means the SDK state dies
|
|
18
|
+
* with the subprocess. The runner stays at the ~234 MB baseline observed
|
|
19
|
+
* on Reviewer (the cohort partner that did 481 task completions without
|
|
20
|
+
* the OOM symptom). See task `fa0c0681` for the byte-by-byte breakdown.
|
|
21
|
+
*
|
|
22
|
+
* Wire protocol over stdout (one JSON object per line):
|
|
23
|
+
* {"kind":"event", "event": <ProviderEvent>}
|
|
24
|
+
* {"kind":"result", "result": <ProviderResult>}
|
|
25
|
+
* {"kind":"error", "message": "..."}
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import { createInProcessCodexSession } from "../providers/codex-adapter";
|
|
29
|
+
import type { ProviderEvent, ProviderResult, ProviderSessionConfig } from "../providers/types";
|
|
30
|
+
|
|
31
|
+
interface CodexSubprocessInput {
|
|
32
|
+
config: ProviderSessionConfig;
|
|
33
|
+
skillsDir?: string;
|
|
34
|
+
parentOtelEnv?: Record<string, string>;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
async function readAllStdin(): Promise<string> {
|
|
38
|
+
// Bun.stdin is a BunFile in some versions, Web stream in others.
|
|
39
|
+
// The safest path is to read the readable stream directly.
|
|
40
|
+
const decoder = new TextDecoder();
|
|
41
|
+
let out = "";
|
|
42
|
+
const stream = (Bun.stdin as unknown as { stream?: () => ReadableStream<Uint8Array> }).stream
|
|
43
|
+
? (Bun.stdin as unknown as { stream: () => ReadableStream<Uint8Array> }).stream()
|
|
44
|
+
: null;
|
|
45
|
+
if (stream) {
|
|
46
|
+
const reader = stream.getReader();
|
|
47
|
+
while (true) {
|
|
48
|
+
const { done, value } = await reader.read();
|
|
49
|
+
if (done) break;
|
|
50
|
+
if (value) out += decoder.decode(value, { stream: true });
|
|
51
|
+
}
|
|
52
|
+
out += decoder.decode();
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
// Fallback: read via Bun.file (file-like access works for piped stdin too)
|
|
56
|
+
return await Bun.file("/dev/stdin").text();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function writeLine(obj: unknown): void {
|
|
60
|
+
process.stdout.write(`${JSON.stringify(obj)}\n`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export async function runCodexSessionRunner(): Promise<void> {
|
|
64
|
+
try {
|
|
65
|
+
await runCodexSessionRunnerInner();
|
|
66
|
+
} catch (err) {
|
|
67
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
68
|
+
const stack = err instanceof Error ? err.stack : undefined;
|
|
69
|
+
console.error(`[codex-session-runner] top-level crash: ${message}`);
|
|
70
|
+
if (stack) console.error(stack);
|
|
71
|
+
writeLine({ kind: "error", message: `codex-session-runner: unexpected crash: ${message}` });
|
|
72
|
+
process.exit(1);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function runCodexSessionRunnerInner(): Promise<void> {
|
|
77
|
+
let input: CodexSubprocessInput;
|
|
78
|
+
try {
|
|
79
|
+
const raw = await readAllStdin();
|
|
80
|
+
input = JSON.parse(raw) as CodexSubprocessInput;
|
|
81
|
+
} catch (err) {
|
|
82
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
83
|
+
console.error(`[codex-session-runner] stdin parse failed: ${message}`);
|
|
84
|
+
writeLine({
|
|
85
|
+
kind: "error",
|
|
86
|
+
message: `codex-session-runner: failed to parse stdin: ${message}`,
|
|
87
|
+
});
|
|
88
|
+
process.exit(1);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Forward the parent's captured OTel TRACEPARENT (and friends) into the
|
|
92
|
+
// session config's env so the spawned Codex CLI nests its spans under our
|
|
93
|
+
// worker.session trace. We deliberately do NOT call
|
|
94
|
+
// `buildOtelTraceparentEnv` from inside this subprocess — its tracer has
|
|
95
|
+
// no active span, so it would emit nothing.
|
|
96
|
+
if (input.parentOtelEnv && Object.keys(input.parentOtelEnv).length > 0) {
|
|
97
|
+
input.config.env = { ...(input.config.env ?? {}), ...input.parentOtelEnv };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
let session: Awaited<ReturnType<typeof createInProcessCodexSession>>;
|
|
101
|
+
try {
|
|
102
|
+
session = await createInProcessCodexSession(input.config, {
|
|
103
|
+
skillsDir: input.skillsDir,
|
|
104
|
+
});
|
|
105
|
+
} catch (err) {
|
|
106
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
107
|
+
console.error(`[codex-session-runner] createSession failed: ${message}`);
|
|
108
|
+
writeLine({ kind: "error", message: `codex-session-runner: createSession failed: ${message}` });
|
|
109
|
+
process.exit(1);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Forward SIGTERM / SIGINT to the in-process session so the runner can
|
|
113
|
+
// gracefully cancel us. The parent `CodexSubprocessSession.abort()` sends
|
|
114
|
+
// SIGTERM here; the session's AbortController catches it and the codex
|
|
115
|
+
// CLI subprocess (a grandchild) gets cleaned up.
|
|
116
|
+
const onSignal = (signal: NodeJS.Signals) => {
|
|
117
|
+
void session.abort().finally(() => {
|
|
118
|
+
// give the session a beat to emit its cancellation result, then exit
|
|
119
|
+
setTimeout(() => process.exit(signal === "SIGINT" ? 130 : 143), 250);
|
|
120
|
+
});
|
|
121
|
+
};
|
|
122
|
+
process.on("SIGTERM", () => onSignal("SIGTERM"));
|
|
123
|
+
process.on("SIGINT", () => onSignal("SIGINT"));
|
|
124
|
+
|
|
125
|
+
session.onEvent((event: ProviderEvent) => {
|
|
126
|
+
writeLine({ kind: "event", event });
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
const result: ProviderResult = await session.waitForCompletion();
|
|
130
|
+
writeLine({ kind: "result", result });
|
|
131
|
+
process.exit(result.exitCode ?? 0);
|
|
132
|
+
}
|
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
* resumes (see swarm memory sigterm-143-resumed-session-context-saturation-2026-05-13).
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
|
+
import { scrubSecrets } from "../utils/secret-scrubber";
|
|
15
|
+
|
|
14
16
|
export const CONTEXT_PREAMBLE_MAX_TOKENS = Number(
|
|
15
17
|
process.env.CONTEXT_PREAMBLE_MAX_TOKENS || "2000",
|
|
16
18
|
);
|
|
@@ -18,12 +20,25 @@ export const CONTEXT_PREAMBLE_MAX_TOKENS = Number(
|
|
|
18
20
|
export const CONTEXT_PREAMBLE_MAX_CHARS = CONTEXT_PREAMBLE_MAX_TOKENS * 4;
|
|
19
21
|
export const CONTEXT_PREAMBLE_MAX_ANCESTORS = 5;
|
|
20
22
|
|
|
23
|
+
/**
|
|
24
|
+
* Token budget for the resume-task preamble. Default 4000 = 2× the regular
|
|
25
|
+
* preamble, since the resume agent needs the original task brief verbatim
|
|
26
|
+
* plus a tool-call summary to avoid redoing completed work.
|
|
27
|
+
*/
|
|
28
|
+
export const CONTEXT_PREAMBLE_RESUME_MAX_TOKENS = Number(
|
|
29
|
+
process.env.CONTEXT_PREAMBLE_RESUME_MAX_TOKENS || "4000",
|
|
30
|
+
);
|
|
31
|
+
export const CONTEXT_PREAMBLE_RESUME_MAX_CHARS = CONTEXT_PREAMBLE_RESUME_MAX_TOKENS * 4;
|
|
32
|
+
/** How many of the most recent session_logs rows to inspect for tool-call summary. */
|
|
33
|
+
export const CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT = 50;
|
|
34
|
+
|
|
21
35
|
export interface TaskContextForPreamble {
|
|
22
36
|
id: string;
|
|
23
37
|
task: string;
|
|
24
38
|
output?: string;
|
|
25
39
|
progress?: string;
|
|
26
40
|
status?: string;
|
|
41
|
+
taskType?: string;
|
|
27
42
|
parentTaskId?: string;
|
|
28
43
|
attachments?: Array<{
|
|
29
44
|
kind: string;
|
|
@@ -57,6 +72,7 @@ export async function fetchTaskContextForPreamble(
|
|
|
57
72
|
output: data.output,
|
|
58
73
|
progress: data.progress,
|
|
59
74
|
status: data.status,
|
|
75
|
+
taskType: data.taskType,
|
|
60
76
|
parentTaskId: data.parentTaskId,
|
|
61
77
|
attachments: data.attachments,
|
|
62
78
|
};
|
|
@@ -176,3 +192,259 @@ export async function buildContextPreamble(
|
|
|
176
192
|
|
|
177
193
|
return preamble;
|
|
178
194
|
}
|
|
195
|
+
|
|
196
|
+
// ─── Resume Preamble ───────────────────────────────────────────────────────────
|
|
197
|
+
|
|
198
|
+
interface SessionLogForPreamble {
|
|
199
|
+
id: string;
|
|
200
|
+
taskId?: string;
|
|
201
|
+
sessionId: string;
|
|
202
|
+
iteration: number;
|
|
203
|
+
cli: string;
|
|
204
|
+
content: string;
|
|
205
|
+
lineNumber: number;
|
|
206
|
+
createdAt: string;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async function fetchSessionLogsForResume(
|
|
210
|
+
apiUrl: string,
|
|
211
|
+
apiKey: string,
|
|
212
|
+
taskId: string,
|
|
213
|
+
): Promise<SessionLogForPreamble[]> {
|
|
214
|
+
const headers: Record<string, string> = {};
|
|
215
|
+
if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
|
|
216
|
+
try {
|
|
217
|
+
// Bound server-side: long-running parents can accumulate large `session_logs`
|
|
218
|
+
// and the preamble only consumes the tail (see CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT).
|
|
219
|
+
// Passing `?limit=N` keeps dispatch fast and memory-flat regardless of run length.
|
|
220
|
+
const url = `${apiUrl}/api/tasks/${taskId}/session-logs?limit=${CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT}`;
|
|
221
|
+
const response = await fetch(url, { headers });
|
|
222
|
+
if (!response.ok) return [];
|
|
223
|
+
const data = (await response.json()) as { logs?: SessionLogForPreamble[] };
|
|
224
|
+
return Array.isArray(data.logs) ? data.logs : [];
|
|
225
|
+
} catch {
|
|
226
|
+
return [];
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Format a single session_log line as a one-line tool-call summary. Falls back
|
|
232
|
+
* to a truncated content snippet when the line isn't recognizable as a
|
|
233
|
+
* tool call. The returned text is passed through `scrubSecrets` before
|
|
234
|
+
* insertion into the preamble (no secrets in /workspace/logs/*.jsonl).
|
|
235
|
+
*/
|
|
236
|
+
function summarizeSessionLogLine(line: SessionLogForPreamble): string | null {
|
|
237
|
+
const ts = line.createdAt.slice(11, 19); // HH:MM:SS
|
|
238
|
+
let parsed: unknown;
|
|
239
|
+
try {
|
|
240
|
+
parsed = JSON.parse(line.content);
|
|
241
|
+
} catch {
|
|
242
|
+
const snippet = line.content.replace(/\s+/g, " ").slice(0, 120);
|
|
243
|
+
return snippet ? `[${ts}] ${snippet}` : null;
|
|
244
|
+
}
|
|
245
|
+
if (!parsed || typeof parsed !== "object") return null;
|
|
246
|
+
const obj = parsed as Record<string, unknown>;
|
|
247
|
+
|
|
248
|
+
// Anthropic / claude message-style tool calls.
|
|
249
|
+
const message = obj.message as Record<string, unknown> | undefined;
|
|
250
|
+
const content = message?.content;
|
|
251
|
+
if (Array.isArray(content)) {
|
|
252
|
+
for (const block of content) {
|
|
253
|
+
if (!block || typeof block !== "object") continue;
|
|
254
|
+
const b = block as Record<string, unknown>;
|
|
255
|
+
if (b.type === "tool_use" && typeof b.name === "string") {
|
|
256
|
+
const input = b.input as Record<string, unknown> | undefined;
|
|
257
|
+
const file = input?.file_path ?? input?.path ?? input?.command;
|
|
258
|
+
const fileStr = typeof file === "string" ? ` ${file}` : "";
|
|
259
|
+
return `[${ts}] ${b.name}${fileStr}`;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Codex / generic event-style: { type: 'tool_use', name: '...', input: {...} }
|
|
265
|
+
if (obj.type === "tool_use" && typeof obj.name === "string") {
|
|
266
|
+
const input = obj.input as Record<string, unknown> | undefined;
|
|
267
|
+
const file = input?.file_path ?? input?.path ?? input?.command;
|
|
268
|
+
const fileStr = typeof file === "string" ? ` ${file}` : "";
|
|
269
|
+
return `[${ts}] ${obj.name}${fileStr}`;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Fallback: short content snippet (still useful for diff/insight)
|
|
273
|
+
const snippet = JSON.stringify(parsed).replace(/\s+/g, " ").slice(0, 120);
|
|
274
|
+
return snippet ? `[${ts}] ${snippet}` : null;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Build a resume-task preamble.
|
|
279
|
+
*
|
|
280
|
+
* Reads the parent task + its recent session_logs over HTTP (never touches
|
|
281
|
+
* `bun:sqlite` worker-side). Allocates the 4000-token budget:
|
|
282
|
+
*
|
|
283
|
+
* - 40% — full parent task description (never truncated)
|
|
284
|
+
* - 35% — last-N session_logs summary (tool-call one-liners; scrubbed)
|
|
285
|
+
* - 15% — artifacts/attachments index (names + pointers only)
|
|
286
|
+
* - 10% — fixed framing (header + continuation instructions)
|
|
287
|
+
*
|
|
288
|
+
* Truncation order: session-log summary (oldest first), then artifacts.
|
|
289
|
+
* The task description is never truncated.
|
|
290
|
+
*/
|
|
291
|
+
/**
|
|
292
|
+
* Walk up the parentTaskId chain through `taskType === "resume"` ancestors
|
|
293
|
+
* to find the original (non-resume) task. Returns the chain in order
|
|
294
|
+
* [immediateParent, ..., original]. Caps at MAX_RESUME_CHAIN_DEPTH to
|
|
295
|
+
* defend against cycles or runaway chains.
|
|
296
|
+
*
|
|
297
|
+
* PR #594 review: cascading resumes (original → resume1 → resume2) had
|
|
298
|
+
* `buildResumeContextPreamble` fetching only the immediate parent — whose
|
|
299
|
+
* `task` text is the synthetic "Resume interrupted task..." prompt rather
|
|
300
|
+
* than the original work brief. Walking the chain restores the original
|
|
301
|
+
* description and lets us merge session logs from all resume attempts.
|
|
302
|
+
*/
|
|
303
|
+
const MAX_RESUME_CHAIN_DEPTH = 10;
|
|
304
|
+
|
|
305
|
+
async function walkResumeChain(
|
|
306
|
+
apiUrl: string,
|
|
307
|
+
apiKey: string,
|
|
308
|
+
immediateParentId: string,
|
|
309
|
+
): Promise<TaskContextForPreamble[]> {
|
|
310
|
+
const chain: TaskContextForPreamble[] = [];
|
|
311
|
+
let currentId: string | undefined = immediateParentId;
|
|
312
|
+
for (let depth = 0; depth < MAX_RESUME_CHAIN_DEPTH && currentId; depth++) {
|
|
313
|
+
const ctx: TaskContextForPreamble | null = await fetchTaskContextForPreamble(
|
|
314
|
+
apiUrl,
|
|
315
|
+
apiKey,
|
|
316
|
+
currentId,
|
|
317
|
+
);
|
|
318
|
+
if (!ctx) break;
|
|
319
|
+
chain.push(ctx);
|
|
320
|
+
// Stop once we hit a non-resume ancestor — that's the original work.
|
|
321
|
+
if (ctx.taskType !== "resume") break;
|
|
322
|
+
currentId = ctx.parentTaskId;
|
|
323
|
+
}
|
|
324
|
+
return chain;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
export async function buildResumeContextPreamble(
|
|
328
|
+
apiUrl: string,
|
|
329
|
+
apiKey: string,
|
|
330
|
+
parentTaskId: string,
|
|
331
|
+
): Promise<string | null> {
|
|
332
|
+
const chain = await walkResumeChain(apiUrl, apiKey, parentTaskId);
|
|
333
|
+
if (chain.length === 0) return null;
|
|
334
|
+
// Original = last entry (non-resume ancestor, or the deepest reachable
|
|
335
|
+
// if the chain exceeds the depth cap or hits a fetch failure).
|
|
336
|
+
const original = chain[chain.length - 1] ?? chain[0];
|
|
337
|
+
if (!original) return null;
|
|
338
|
+
// Immediate parent — its attachments are the most recent "in flight" set.
|
|
339
|
+
const parent = chain[0] ?? original;
|
|
340
|
+
|
|
341
|
+
// Fetch session logs from EVERY chain member so a re-superseded resume
|
|
342
|
+
// still surfaces tool-call history from earlier attempts. Merge, sort by
|
|
343
|
+
// createdAt ASC, then keep the most recent N.
|
|
344
|
+
const logsBatches = await Promise.all(
|
|
345
|
+
chain.map((c) => fetchSessionLogsForResume(apiUrl, apiKey, c.id)),
|
|
346
|
+
);
|
|
347
|
+
const merged = logsBatches.flat();
|
|
348
|
+
merged.sort((a, b) => a.createdAt.localeCompare(b.createdAt));
|
|
349
|
+
const recentLogs = merged.slice(-CONTEXT_PREAMBLE_RESUME_SESSION_LOG_LIMIT);
|
|
350
|
+
|
|
351
|
+
const descBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.4);
|
|
352
|
+
let logsBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.35);
|
|
353
|
+
let artBudget = Math.floor(CONTEXT_PREAMBLE_RESUME_MAX_CHARS * 0.15);
|
|
354
|
+
|
|
355
|
+
const header = [
|
|
356
|
+
"\n---",
|
|
357
|
+
"## Resuming Interrupted Task",
|
|
358
|
+
"",
|
|
359
|
+
"This task is a fresh-session continuation of an interrupted task (graceful",
|
|
360
|
+
"shutdown / context-limit / operator action). The block below summarizes the",
|
|
361
|
+
"original task, what was done so far, and the artifacts in flight.",
|
|
362
|
+
"",
|
|
363
|
+
"**Do not redo work already completed below — extend it.**",
|
|
364
|
+
"",
|
|
365
|
+
`Original task ID: \`${original.id}\``,
|
|
366
|
+
chain.length > 1
|
|
367
|
+
? `Resume chain depth: ${chain.length} (this is at least the ${
|
|
368
|
+
chain.length === 2 ? "2nd" : chain.length === 3 ? "3rd" : `${chain.length}th`
|
|
369
|
+
} resume attempt).`
|
|
370
|
+
: "",
|
|
371
|
+
"",
|
|
372
|
+
"---",
|
|
373
|
+
"",
|
|
374
|
+
"### Original Task Description",
|
|
375
|
+
"",
|
|
376
|
+
]
|
|
377
|
+
.filter((s) => s !== "")
|
|
378
|
+
.join("\n");
|
|
379
|
+
|
|
380
|
+
// 40% — full description (never truncated). Pulled from the ORIGINAL
|
|
381
|
+
// (non-resume) ancestor so cascading resumes don't read each other's
|
|
382
|
+
// synthetic "Resume interrupted task..." preamble bodies (PR #594 review).
|
|
383
|
+
const descSection = original.task;
|
|
384
|
+
|
|
385
|
+
// 35% — session-log summary (tool-call lines)
|
|
386
|
+
const summaryLines: string[] = [];
|
|
387
|
+
for (const line of recentLogs) {
|
|
388
|
+
const summary = summarizeSessionLogLine(line);
|
|
389
|
+
if (!summary) continue;
|
|
390
|
+
summaryLines.push(summary);
|
|
391
|
+
}
|
|
392
|
+
// Scrub secrets BEFORE budget enforcement so secret strings don't get
|
|
393
|
+
// sliced into half-redactions mid-truncate.
|
|
394
|
+
const scrubbedSummary = summaryLines.map((s) => scrubSecrets(s));
|
|
395
|
+
let logsSection = scrubbedSummary.join("\n");
|
|
396
|
+
// FIFO truncate (drop oldest first) until under budget.
|
|
397
|
+
// We use `Math.max(0, descBudget - descSection.length)` slack adjustment so
|
|
398
|
+
// an oversized description doesn't starve the logs section entirely.
|
|
399
|
+
if (descSection.length > descBudget) {
|
|
400
|
+
const overflow = descSection.length - descBudget;
|
|
401
|
+
logsBudget = Math.max(0, logsBudget - Math.ceil(overflow / 2));
|
|
402
|
+
artBudget = Math.max(0, artBudget - Math.floor(overflow / 2));
|
|
403
|
+
}
|
|
404
|
+
while (logsSection.length > logsBudget && scrubbedSummary.length > 0) {
|
|
405
|
+
scrubbedSummary.shift();
|
|
406
|
+
logsSection = scrubbedSummary.join("\n");
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// 15% — artifacts (names + pointers only)
|
|
410
|
+
const atts = parent.attachments?.filter((a) => a.name && (a.url || a.path || a.pageId)) ?? [];
|
|
411
|
+
const artLines: string[] = [];
|
|
412
|
+
for (const att of atts) {
|
|
413
|
+
const pointer = formatAttachmentPointer(att);
|
|
414
|
+
artLines.push(` - **${att.name}**: \`${pointer}\``);
|
|
415
|
+
}
|
|
416
|
+
let artSection = artLines.join("\n");
|
|
417
|
+
while (artSection.length > artBudget && artLines.length > 0) {
|
|
418
|
+
artLines.pop();
|
|
419
|
+
artSection = artLines.join("\n");
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
const sections: string[] = [header, descSection, ""];
|
|
423
|
+
|
|
424
|
+
if (logsSection) {
|
|
425
|
+
sections.push("### Recent Tool Calls", "", logsSection, "");
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
if (artSection) {
|
|
429
|
+
sections.push("### Artifacts In Flight", "", artSection, "");
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
sections.push(
|
|
433
|
+
"---",
|
|
434
|
+
"",
|
|
435
|
+
`To review the full prior session call \`get-task-details\` with taskId \`${original.id}\`.`,
|
|
436
|
+
"",
|
|
437
|
+
"---",
|
|
438
|
+
"",
|
|
439
|
+
);
|
|
440
|
+
|
|
441
|
+
let preamble = sections.join("\n");
|
|
442
|
+
|
|
443
|
+
// Final hard cap — should rarely trip given the per-section budgets above,
|
|
444
|
+
// but provides a safety net for very long descriptions.
|
|
445
|
+
if (preamble.length > CONTEXT_PREAMBLE_RESUME_MAX_CHARS) {
|
|
446
|
+
preamble = `${preamble.slice(0, CONTEXT_PREAMBLE_RESUME_MAX_CHARS)}\n\n[resume preamble truncated to ${CONTEXT_PREAMBLE_RESUME_MAX_TOKENS}-token budget]\n\n---\n`;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
return preamble;
|
|
450
|
+
}
|
|
@@ -133,7 +133,7 @@ export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<C
|
|
|
133
133
|
|
|
134
134
|
// Fast path: already satisfied at boot.
|
|
135
135
|
let currentProvider = readProvider();
|
|
136
|
-
let status = checkProviderCredentials(currentProvider, initialEnv, opts.credCheckOptions);
|
|
136
|
+
let status = await checkProviderCredentials(currentProvider, initialEnv, opts.credCheckOptions);
|
|
137
137
|
if (status.ready) {
|
|
138
138
|
log(
|
|
139
139
|
`[boot] credentials ready (provider=${currentProvider}, satisfiedBy=${status.satisfiedBy})`,
|
|
@@ -183,7 +183,7 @@ export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<C
|
|
|
183
183
|
currentProvider = nextProvider;
|
|
184
184
|
}
|
|
185
185
|
|
|
186
|
-
status = checkProviderCredentials(currentProvider, process.env, opts.credCheckOptions);
|
|
186
|
+
status = await checkProviderCredentials(currentProvider, process.env, opts.credCheckOptions);
|
|
187
187
|
|
|
188
188
|
if (!status.ready) {
|
|
189
189
|
// Exponential backoff with cap.
|