@desplega.ai/agent-swarm 1.85.0 → 1.86.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/openapi.json +1 -1
- package/package.json +8 -6
- package/src/be/db.ts +44 -0
- package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
- package/src/be/modelsdev-cache.json +152028 -0
- package/src/be/modelsdev-cache.ts +46 -0
- package/src/be/seed-pricing.ts +7 -44
- package/src/cli.tsx +12 -2
- package/src/commands/codex-session-runner.ts +132 -0
- package/src/commands/credential-wait.ts +2 -2
- package/src/commands/provider-credentials.ts +10 -5
- package/src/commands/runner.ts +3 -3
- package/src/prompts/base-prompt.ts +49 -3
- package/src/providers/claude-adapter.ts +83 -2
- package/src/providers/claude-managed-models.ts +18 -2
- package/src/providers/codex-adapter.ts +417 -97
- package/src/providers/codex-models.ts +9 -2
- package/src/providers/index.ts +28 -19
- package/src/providers/pricing-sources.md +7 -4
- package/src/providers/swarm-events-shared.ts +14 -0
- package/src/slack/HEURISTICS.md +5 -1
- package/src/slack/handlers.test.ts +35 -0
- package/src/slack/handlers.ts +79 -2
- package/src/tests/base-prompt.test.ts +46 -8
- package/src/tests/claude-managed-adapter.test.ts +4 -4
- package/src/tests/codex-adapter-otel.test.ts +4 -4
- package/src/tests/codex-adapter.test.ts +20 -7
- package/src/tests/codex-swarm-events.test.ts +35 -0
- package/src/tests/context-window.test.ts +1 -0
- package/src/tests/credential-check.test.ts +48 -29
- package/src/tests/entrypoint-config-env-export.test.ts +81 -0
- package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
- package/src/tests/migration-046-budgets.test.ts +6 -5
- package/src/tests/pricing-routes.test.ts +6 -5
- package/src/tests/provider-adapter.test.ts +10 -10
- package/src/tests/provider-command-format.test.ts +4 -4
- package/src/tests/session-costs-codex-recompute.test.ts +25 -0
- package/src/tools/send-task.ts +30 -9
- package/src/utils/context-window.ts +1 -0
- package/templates/schedules/daily-blocker-digest/config.json +13 -0
- package/templates/schedules/daily-blocker-digest/content.md +150 -0
- package/templates/schedules/daily-compounding-reflection/config.json +21 -0
- package/templates/schedules/daily-compounding-reflection/content.md +210 -0
- package/templates/schedules/daily-hn-briefing/config.json +13 -0
- package/templates/schedules/daily-hn-briefing/content.md +97 -0
- package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
- package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
- package/templates/schedules/gtm-weekly-review/config.json +13 -0
- package/templates/schedules/gtm-weekly-review/content.md +58 -0
- package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
- package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
- package/templates/schema.ts +26 -0
- package/templates/skills/agentmail-sending/config.json +13 -0
- package/templates/skills/agentmail-sending/content.md +48 -0
- package/templates/skills/artifacts/config.json +13 -0
- package/templates/skills/artifacts/content.md +87 -0
- package/templates/skills/browser-use-cloud/config.json +13 -0
- package/templates/skills/browser-use-cloud/content.md +155 -0
- package/templates/skills/desloppify/config.json +13 -0
- package/templates/skills/desloppify/content.md +201 -0
- package/templates/skills/exa-search/config.json +13 -0
- package/templates/skills/exa-search/content.md +106 -0
- package/templates/skills/jira-interaction/config.json +13 -0
- package/templates/skills/jira-interaction/content.md +252 -0
- package/templates/skills/kapso-whatsapp/config.json +13 -0
- package/templates/skills/kapso-whatsapp/content.md +369 -0
- package/templates/skills/kv-storage/config.json +13 -0
- package/templates/skills/kv-storage/content.md +111 -0
- package/templates/skills/linear-interaction/config.json +20 -0
- package/templates/skills/linear-interaction/content.md +230 -0
- package/templates/skills/pages/config.json +18 -0
- package/templates/skills/pages/content.md +85 -0
- package/templates/skills/profile-corruption-escalation/config.json +13 -0
- package/templates/skills/profile-corruption-escalation/content.md +105 -0
- package/templates/skills/scheduled-task-resilience/config.json +13 -0
- package/templates/skills/scheduled-task-resilience/content.md +95 -0
- package/templates/skills/sprite-cli/config.json +13 -0
- package/templates/skills/sprite-cli/content.md +133 -0
- package/templates/skills/turso-interaction/config.json +13 -0
- package/templates/skills/turso-interaction/content.md +192 -0
- package/templates/skills/workflow-iterate/config.json +18 -0
- package/templates/skills/workflow-iterate/content.md +399 -0
- package/templates/skills/workflow-structured-output/config.json +13 -0
- package/templates/skills/workflow-structured-output/content.md +101 -0
- package/templates/skills/x-api-interactions/config.json +13 -0
- package/templates/skills/x-api-interactions/content.md +109 -0
- package/templates/workflows/autopilot/config.json +13 -0
- package/templates/workflows/autopilot/content.md +58 -0
- package/templates/workflows/linear-drain-loop/config.json +21 -0
- package/templates/workflows/linear-drain-loop/content.md +72 -0
- package/templates/workflows/ralph-loop/config.json +13 -0
- package/templates/workflows/ralph-loop/content.md +75 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
export interface ModelsDevCostBlock {
|
|
5
|
+
input?: number;
|
|
6
|
+
output?: number;
|
|
7
|
+
cache_read?: number;
|
|
8
|
+
cache_write?: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface ModelsDevModel {
|
|
12
|
+
id?: string;
|
|
13
|
+
cost?: ModelsDevCostBlock;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface ModelsDevProvider {
|
|
17
|
+
models?: Record<string, ModelsDevModel>;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export type ModelsDevCache = Record<string, ModelsDevProvider>;
|
|
21
|
+
|
|
22
|
+
export const MODELSDEV_CACHE_PATH = path.join("src", "be", "modelsdev-cache.json");
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Resolve the vendored models.dev cache from source checkouts and compiled
|
|
26
|
+
* Docker images. The API image copies the snapshot to `/app/src/be/...`.
|
|
27
|
+
*/
|
|
28
|
+
export function loadModelsDevCache(): ModelsDevCache | null {
|
|
29
|
+
const explicitPath = process.env.MODELSDEV_CACHE_PATH;
|
|
30
|
+
const candidates = [
|
|
31
|
+
...(explicitPath ? [explicitPath] : []),
|
|
32
|
+
path.join(process.cwd(), MODELSDEV_CACHE_PATH),
|
|
33
|
+
path.join(process.cwd(), "..", MODELSDEV_CACHE_PATH),
|
|
34
|
+
path.join("/app", MODELSDEV_CACHE_PATH),
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
for (const candidate of candidates) {
|
|
38
|
+
try {
|
|
39
|
+
return JSON.parse(readFileSync(candidate, "utf-8")) as ModelsDevCache;
|
|
40
|
+
} catch {
|
|
41
|
+
// try next candidate
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return null;
|
|
46
|
+
}
|
package/src/be/seed-pricing.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Phase 2 of the cost-tracking plan — seed the `pricing` table at server boot.
|
|
3
3
|
*
|
|
4
|
-
* The vendored models.dev snapshot at `
|
|
4
|
+
* The vendored models.dev snapshot at `src/be/modelsdev-cache.json` is the
|
|
5
5
|
* single source of truth for per-token rates. We project it into rows keyed by
|
|
6
6
|
* `(provider, model, token_class)` so the recompute path in
|
|
7
7
|
* `src/http/session-data.ts` can rebuild USD from tokens regardless of which
|
|
@@ -17,30 +17,15 @@
|
|
|
17
17
|
* admin route (`POST /api/pricing`) — we don't overwrite seed rows.
|
|
18
18
|
*/
|
|
19
19
|
|
|
20
|
-
import { readFileSync } from "node:fs";
|
|
21
|
-
import path from "node:path";
|
|
22
20
|
import type { PricingProvider, PricingTokenClass } from "../types";
|
|
23
21
|
import { getDb } from "./db";
|
|
22
|
+
import {
|
|
23
|
+
loadModelsDevCache,
|
|
24
|
+
type ModelsDevCache,
|
|
25
|
+
type ModelsDevCostBlock,
|
|
26
|
+
} from "./modelsdev-cache";
|
|
24
27
|
import { normalizeModelKey } from "./pricing-normalize";
|
|
25
28
|
|
|
26
|
-
interface ModelsDevCostBlock {
|
|
27
|
-
input?: number;
|
|
28
|
-
output?: number;
|
|
29
|
-
cache_read?: number;
|
|
30
|
-
cache_write?: number;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
interface ModelsDevModel {
|
|
34
|
-
id?: string;
|
|
35
|
-
cost?: ModelsDevCostBlock;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
interface ModelsDevProvider {
|
|
39
|
-
models?: Record<string, ModelsDevModel>;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
type ModelsDevCache = Record<string, ModelsDevProvider>;
|
|
43
|
-
|
|
44
29
|
/**
|
|
45
30
|
* Per-harness manual rates that models.dev doesn't carry. Keep the source URL
|
|
46
31
|
* and a verification date next to each entry so {@link MANUAL_PRICING_OVERRIDES}
|
|
@@ -82,33 +67,11 @@ const MANUAL_PRICING_OVERRIDES: Array<{
|
|
|
82
67
|
* fields the models.dev snapshot doesn't index directly; we map them here.
|
|
83
68
|
*/
|
|
84
69
|
const ANTHROPIC_SHORTNAME_TO_MODELSDEV: Record<string, string> = {
|
|
85
|
-
opus: "claude-opus-4-
|
|
70
|
+
opus: "claude-opus-4-8",
|
|
86
71
|
sonnet: "claude-sonnet-4-6",
|
|
87
72
|
haiku: "claude-haiku-4-5",
|
|
88
73
|
};
|
|
89
74
|
|
|
90
|
-
/**
|
|
91
|
-
* Resolve the path to the vendored models.dev cache. The UI copy is canonical.
|
|
92
|
-
* We treat this as best-effort: if the file is missing (developer ran the
|
|
93
|
-
* server without `ui/` checked out), we log and continue with manual rates
|
|
94
|
-
* only — better than crashing the boot.
|
|
95
|
-
*/
|
|
96
|
-
function loadModelsDevCache(): ModelsDevCache | null {
|
|
97
|
-
const candidates = [
|
|
98
|
-
path.join(process.cwd(), "ui", "src", "lib", "modelsdev-cache.json"),
|
|
99
|
-
path.join(process.cwd(), "..", "ui", "src", "lib", "modelsdev-cache.json"),
|
|
100
|
-
];
|
|
101
|
-
for (const cand of candidates) {
|
|
102
|
-
try {
|
|
103
|
-
const raw = readFileSync(cand, "utf-8");
|
|
104
|
-
return JSON.parse(raw) as ModelsDevCache;
|
|
105
|
-
} catch {
|
|
106
|
-
// try next candidate
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
return null;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
75
|
interface PricingSeedRow {
|
|
113
76
|
provider: PricingProvider;
|
|
114
77
|
model: string;
|
package/src/cli.tsx
CHANGED
|
@@ -16,8 +16,12 @@ import { getApiKey, setApiKey } from "./utils/api-key.ts";
|
|
|
16
16
|
// Get CLI name from bin field (assumes single key)
|
|
17
17
|
const binName = Object.keys(pkg.bin)[0];
|
|
18
18
|
|
|
19
|
-
// Restore cursor on exit
|
|
20
|
-
|
|
19
|
+
// Restore cursor on exit — only when stdout is a TTY. Non-TTY invocations
|
|
20
|
+
// (like the codex-session-runner subprocess whose stdout is a JSON pipe)
|
|
21
|
+
// must not inject terminal escape sequences into the byte stream.
|
|
22
|
+
const restoreCursor = () => {
|
|
23
|
+
if (process.stdout.isTTY) process.stdout.write("\x1B[?25h");
|
|
24
|
+
};
|
|
21
25
|
process.on("exit", restoreCursor);
|
|
22
26
|
process.on("SIGINT", () => {
|
|
23
27
|
restoreCursor();
|
|
@@ -570,6 +574,12 @@ if (args.showHelp || args.command === "help" || args.command === undefined) {
|
|
|
570
574
|
const { runCodexLogin } = await import("./commands/codex-login");
|
|
571
575
|
const codexLoginArgs = process.argv.slice(process.argv.indexOf("codex-login") + 1);
|
|
572
576
|
await runCodexLogin(codexLoginArgs);
|
|
577
|
+
} else if (args.command === "codex-session-runner") {
|
|
578
|
+
// Internal subcommand — invoked by CodexSubprocessSession to host a single
|
|
579
|
+
// codex session in a throwaway subprocess. See src/commands/codex-session-runner.ts
|
|
580
|
+
// for the rationale (Picateclas spawn-OOM permanent fix, 2026-05-28).
|
|
581
|
+
const { runCodexSessionRunner } = await import("./commands/codex-session-runner");
|
|
582
|
+
await runCodexSessionRunner();
|
|
573
583
|
} else if (args.command === "claude-managed-setup") {
|
|
574
584
|
const { runClaudeManagedSetup } = await import("./commands/claude-managed-setup");
|
|
575
585
|
const setupArgs = process.argv.slice(process.argv.indexOf("claude-managed-setup") + 1);
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Codex session subprocess runner.
|
|
3
|
+
*
|
|
4
|
+
* Entry point for the `codex-session-runner` CLI subcommand. Reads a
|
|
5
|
+
* `CodexSubprocessInput` payload from stdin, drives a fresh in-process
|
|
6
|
+
* `CodexSession`, and pipes the session's `ProviderEvent` stream + final
|
|
7
|
+
* `ProviderResult` back to its parent over stdout as line-delimited JSON.
|
|
8
|
+
*
|
|
9
|
+
* Why this exists: the previous architecture ran every codex session
|
|
10
|
+
* directly inside the long-lived worker runner. The `@openai/codex-sdk`
|
|
11
|
+
* leaks SDK state (parsers, transcript buffers, JSON-RPC plumbing) into
|
|
12
|
+
* the runner's heap, and after ~1,500 task completions on a hot worker
|
|
13
|
+
* (Picateclas, 2026-05-28) the runner's VSZ ballooned to 74 GB / RSS to
|
|
14
|
+
* 7.5 GB, causing every subsequent `fork()` to fail ENOMEM regardless of
|
|
15
|
+
* current RSS (the kernel reserves CoW for the full VSZ at fork time).
|
|
16
|
+
*
|
|
17
|
+
* Moving each session into its own subprocess means the SDK state dies
|
|
18
|
+
* with the subprocess. The runner stays at the ~234 MB baseline observed
|
|
19
|
+
* on Reviewer (the cohort partner that did 481 task completions without
|
|
20
|
+
* the OOM symptom). See task `fa0c0681` for the byte-by-byte breakdown.
|
|
21
|
+
*
|
|
22
|
+
* Wire protocol over stdout (one JSON object per line):
|
|
23
|
+
* {"kind":"event", "event": <ProviderEvent>}
|
|
24
|
+
* {"kind":"result", "result": <ProviderResult>}
|
|
25
|
+
* {"kind":"error", "message": "..."}
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import { createInProcessCodexSession } from "../providers/codex-adapter";
|
|
29
|
+
import type { ProviderEvent, ProviderResult, ProviderSessionConfig } from "../providers/types";
|
|
30
|
+
|
|
31
|
+
interface CodexSubprocessInput {
|
|
32
|
+
config: ProviderSessionConfig;
|
|
33
|
+
skillsDir?: string;
|
|
34
|
+
parentOtelEnv?: Record<string, string>;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
async function readAllStdin(): Promise<string> {
|
|
38
|
+
// Bun.stdin is a BunFile in some versions, Web stream in others.
|
|
39
|
+
// The safest path is to read the readable stream directly.
|
|
40
|
+
const decoder = new TextDecoder();
|
|
41
|
+
let out = "";
|
|
42
|
+
const stream = (Bun.stdin as unknown as { stream?: () => ReadableStream<Uint8Array> }).stream
|
|
43
|
+
? (Bun.stdin as unknown as { stream: () => ReadableStream<Uint8Array> }).stream()
|
|
44
|
+
: null;
|
|
45
|
+
if (stream) {
|
|
46
|
+
const reader = stream.getReader();
|
|
47
|
+
while (true) {
|
|
48
|
+
const { done, value } = await reader.read();
|
|
49
|
+
if (done) break;
|
|
50
|
+
if (value) out += decoder.decode(value, { stream: true });
|
|
51
|
+
}
|
|
52
|
+
out += decoder.decode();
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
// Fallback: read via Bun.file (file-like access works for piped stdin too)
|
|
56
|
+
return await Bun.file("/dev/stdin").text();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function writeLine(obj: unknown): void {
|
|
60
|
+
process.stdout.write(`${JSON.stringify(obj)}\n`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export async function runCodexSessionRunner(): Promise<void> {
|
|
64
|
+
try {
|
|
65
|
+
await runCodexSessionRunnerInner();
|
|
66
|
+
} catch (err) {
|
|
67
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
68
|
+
const stack = err instanceof Error ? err.stack : undefined;
|
|
69
|
+
console.error(`[codex-session-runner] top-level crash: ${message}`);
|
|
70
|
+
if (stack) console.error(stack);
|
|
71
|
+
writeLine({ kind: "error", message: `codex-session-runner: unexpected crash: ${message}` });
|
|
72
|
+
process.exit(1);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function runCodexSessionRunnerInner(): Promise<void> {
|
|
77
|
+
let input: CodexSubprocessInput;
|
|
78
|
+
try {
|
|
79
|
+
const raw = await readAllStdin();
|
|
80
|
+
input = JSON.parse(raw) as CodexSubprocessInput;
|
|
81
|
+
} catch (err) {
|
|
82
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
83
|
+
console.error(`[codex-session-runner] stdin parse failed: ${message}`);
|
|
84
|
+
writeLine({
|
|
85
|
+
kind: "error",
|
|
86
|
+
message: `codex-session-runner: failed to parse stdin: ${message}`,
|
|
87
|
+
});
|
|
88
|
+
process.exit(1);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Forward the parent's captured OTel TRACEPARENT (and friends) into the
|
|
92
|
+
// session config's env so the spawned Codex CLI nests its spans under our
|
|
93
|
+
// worker.session trace. We deliberately do NOT call
|
|
94
|
+
// `buildOtelTraceparentEnv` from inside this subprocess — its tracer has
|
|
95
|
+
// no active span, so it would emit nothing.
|
|
96
|
+
if (input.parentOtelEnv && Object.keys(input.parentOtelEnv).length > 0) {
|
|
97
|
+
input.config.env = { ...(input.config.env ?? {}), ...input.parentOtelEnv };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
let session: Awaited<ReturnType<typeof createInProcessCodexSession>>;
|
|
101
|
+
try {
|
|
102
|
+
session = await createInProcessCodexSession(input.config, {
|
|
103
|
+
skillsDir: input.skillsDir,
|
|
104
|
+
});
|
|
105
|
+
} catch (err) {
|
|
106
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
107
|
+
console.error(`[codex-session-runner] createSession failed: ${message}`);
|
|
108
|
+
writeLine({ kind: "error", message: `codex-session-runner: createSession failed: ${message}` });
|
|
109
|
+
process.exit(1);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Forward SIGTERM / SIGINT to the in-process session so the runner can
|
|
113
|
+
// gracefully cancel us. The parent `CodexSubprocessSession.abort()` sends
|
|
114
|
+
// SIGTERM here; the session's AbortController catches it and the codex
|
|
115
|
+
// CLI subprocess (a grandchild) gets cleaned up.
|
|
116
|
+
const onSignal = (signal: NodeJS.Signals) => {
|
|
117
|
+
void session.abort().finally(() => {
|
|
118
|
+
// give the session a beat to emit its cancellation result, then exit
|
|
119
|
+
setTimeout(() => process.exit(signal === "SIGINT" ? 130 : 143), 250);
|
|
120
|
+
});
|
|
121
|
+
};
|
|
122
|
+
process.on("SIGTERM", () => onSignal("SIGTERM"));
|
|
123
|
+
process.on("SIGINT", () => onSignal("SIGINT"));
|
|
124
|
+
|
|
125
|
+
session.onEvent((event: ProviderEvent) => {
|
|
126
|
+
writeLine({ kind: "event", event });
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
const result: ProviderResult = await session.waitForCompletion();
|
|
130
|
+
writeLine({ kind: "result", result });
|
|
131
|
+
process.exit(result.exitCode ?? 0);
|
|
132
|
+
}
|
|
@@ -133,7 +133,7 @@ export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<C
|
|
|
133
133
|
|
|
134
134
|
// Fast path: already satisfied at boot.
|
|
135
135
|
let currentProvider = readProvider();
|
|
136
|
-
let status = checkProviderCredentials(currentProvider, initialEnv, opts.credCheckOptions);
|
|
136
|
+
let status = await checkProviderCredentials(currentProvider, initialEnv, opts.credCheckOptions);
|
|
137
137
|
if (status.ready) {
|
|
138
138
|
log(
|
|
139
139
|
`[boot] credentials ready (provider=${currentProvider}, satisfiedBy=${status.satisfiedBy})`,
|
|
@@ -183,7 +183,7 @@ export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<C
|
|
|
183
183
|
currentProvider = nextProvider;
|
|
184
184
|
}
|
|
185
185
|
|
|
186
|
-
status = checkProviderCredentials(currentProvider, process.env, opts.credCheckOptions);
|
|
186
|
+
status = await checkProviderCredentials(currentProvider, process.env, opts.credCheckOptions);
|
|
187
187
|
|
|
188
188
|
if (!status.ready) {
|
|
189
189
|
// Exponential backoff with cap.
|
|
@@ -23,7 +23,6 @@ import { checkClaudeManagedCredentials } from "../providers/claude-managed-adapt
|
|
|
23
23
|
import { checkCodexCredentials } from "../providers/codex-adapter";
|
|
24
24
|
import { checkDevinCredentials } from "../providers/devin-adapter";
|
|
25
25
|
import { checkOpencodeCredentials } from "../providers/opencode-adapter";
|
|
26
|
-
import { checkPiMonoCredentials } from "../providers/pi-mono-adapter";
|
|
27
26
|
import type { CredCheckOptions, CredStatus } from "../providers/types";
|
|
28
27
|
import type { AgentCredStatus, AgentLatestModel, ProviderName } from "../types";
|
|
29
28
|
import { scrubSecrets } from "../utils/secret-scrubber";
|
|
@@ -55,12 +54,16 @@ export const REQUIRED_CRED_VARS_BY_PROVIDER: Record<SupportedProvider, readonly
|
|
|
55
54
|
/**
|
|
56
55
|
* Run the predicate for `provider`. Unknown providers throw — call sites
|
|
57
56
|
* should treat that as a configuration bug, not a user-correctable state.
|
|
57
|
+
*
|
|
58
|
+
* The `pi` case uses a dynamic import so `@earendil-works/pi-coding-agent`
|
|
59
|
+
* (which has module-level side effects that crash in the Bun compiled
|
|
60
|
+
* binary) is only loaded when the pi provider is actually selected.
|
|
58
61
|
*/
|
|
59
|
-
export function checkProviderCredentials(
|
|
62
|
+
export async function checkProviderCredentials(
|
|
60
63
|
provider: string,
|
|
61
64
|
env: Record<string, string | undefined>,
|
|
62
65
|
opts?: CredCheckOptions,
|
|
63
|
-
): CredStatus {
|
|
66
|
+
): Promise<CredStatus> {
|
|
64
67
|
switch (provider) {
|
|
65
68
|
case "claude":
|
|
66
69
|
return checkClaudeCredentials(env);
|
|
@@ -72,8 +75,10 @@ export function checkProviderCredentials(
|
|
|
72
75
|
return checkDevinCredentials(env);
|
|
73
76
|
case "opencode":
|
|
74
77
|
return checkOpencodeCredentials(env, opts);
|
|
75
|
-
case "pi":
|
|
78
|
+
case "pi": {
|
|
79
|
+
const { checkPiMonoCredentials } = await import("../providers/pi-mono-adapter");
|
|
76
80
|
return checkPiMonoCredentials(env, opts);
|
|
81
|
+
}
|
|
77
82
|
default:
|
|
78
83
|
throw new Error(
|
|
79
84
|
`checkProviderCredentials: unknown provider "${provider}". Supported: claude, claude-managed, codex, devin, opencode, pi.`,
|
|
@@ -386,7 +391,7 @@ export async function buildCredStatusReport(
|
|
|
386
391
|
opts: CredCheckOptions = {},
|
|
387
392
|
kind: AgentCredStatus["reportKind"],
|
|
388
393
|
): Promise<AgentCredStatus> {
|
|
389
|
-
const presence = checkProviderCredentials(provider, env, opts);
|
|
394
|
+
const presence = await checkProviderCredentials(provider, env, opts);
|
|
390
395
|
let liveTest: AgentCredStatus["liveTest"] = null;
|
|
391
396
|
if (presence.ready) {
|
|
392
397
|
const live = await validateProviderCredentials(provider);
|
package/src/commands/runner.ts
CHANGED
|
@@ -2182,7 +2182,7 @@ export function implicitCloseActiveToolSpans(
|
|
|
2182
2182
|
}
|
|
2183
2183
|
|
|
2184
2184
|
async function spawnProviderProcess(
|
|
2185
|
-
adapter: ReturnType<typeof createProviderAdapter
|
|
2185
|
+
adapter: Awaited<ReturnType<typeof createProviderAdapter>>,
|
|
2186
2186
|
opts: {
|
|
2187
2187
|
prompt: string;
|
|
2188
2188
|
logFile: string;
|
|
@@ -3121,7 +3121,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
3121
3121
|
// Create provider adapter using the resolved value. `let` so the poll-loop
|
|
3122
3122
|
// reconciliation block (Section 4) can swap it live when an operator changes
|
|
3123
3123
|
// HARNESS_PROVIDER in swarm_config — call sites read the current binding.
|
|
3124
|
-
let adapter = createProviderAdapter(bootProvider);
|
|
3124
|
+
let adapter = await createProviderAdapter(bootProvider);
|
|
3125
3125
|
|
|
3126
3126
|
// Configure HTTP-based template resolution (workers resolve via API, not local DB)
|
|
3127
3127
|
if (apiKey) {
|
|
@@ -3368,7 +3368,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
3368
3368
|
const previous = state.harnessProvider;
|
|
3369
3369
|
console.log(`[${role}] [harness] Reconciling adapter: ${previous} → ${resolvedProvider}`);
|
|
3370
3370
|
try {
|
|
3371
|
-
adapter = createProviderAdapter(resolvedProvider);
|
|
3371
|
+
adapter = await createProviderAdapter(resolvedProvider);
|
|
3372
3372
|
state.harnessProvider = resolvedProvider;
|
|
3373
3373
|
basePrompt = await buildSystemPrompt();
|
|
3374
3374
|
resolvedSystemPrompt = additionalSystemPrompt
|
|
@@ -16,8 +16,27 @@ import "./session-templates";
|
|
|
16
16
|
/** Max characters per individual injected section before truncation */
|
|
17
17
|
const BOOTSTRAP_MAX_CHARS = 20_000;
|
|
18
18
|
|
|
19
|
-
/**
|
|
20
|
-
|
|
19
|
+
/**
|
|
20
|
+
* Max total characters across all injected sections combined.
|
|
21
|
+
*
|
|
22
|
+
* Sized to stay safely below Linux's `MAX_ARG_STRLEN = 131,072` bytes — the
|
|
23
|
+
* per-argv-element kernel limit that bit Picateclas attempts 4-6
|
|
24
|
+
* (2026-05-28). The base-prompt becomes one argv element when the claude
|
|
25
|
+
* adapter passes `--append-system-prompt <prompt>`, so the prompt MUST stay
|
|
26
|
+
* under MAX_ARG_STRLEN even with a few KB of growth. The claude-adapter
|
|
27
|
+
* also stages the prompt to a file (`--append-system-prompt-file`) as a
|
|
28
|
+
* belt-and-braces fix, but the budget cap is the cheap insurance for any
|
|
29
|
+
* code path that ever passes the prompt inline.
|
|
30
|
+
*/
|
|
31
|
+
const BOOTSTRAP_TOTAL_MAX_CHARS = 120_000;
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Per-section cap applied to the *repo* CLAUDE.md (the agent-swarm OSS
|
|
35
|
+
* one is ~18 KB and the biggest volatile component of the system prompt).
|
|
36
|
+
* 12 KB leaves room for the static prompt scaffold + identity + tools +
|
|
37
|
+
* agent CLAUDE.md without ever crossing MAX_ARG_STRLEN.
|
|
38
|
+
*/
|
|
39
|
+
const REPO_CLAUDE_MD_MAX_CHARS = 12_000;
|
|
21
40
|
|
|
22
41
|
/** Truncation notice appended when a section is cut */
|
|
23
42
|
const truncationNotice = (file: string) =>
|
|
@@ -150,7 +169,16 @@ export const getBasePrompt = async (args: BasePromptArgs): Promise<string> => {
|
|
|
150
169
|
prompt += `The following CLAUDE.md is from the repository cloned at \`${args.repoContext.clonePath}\`. `;
|
|
151
170
|
prompt += `**IMPORTANT: These instructions apply ONLY when working within the \`${args.repoContext.clonePath}\` directory.** `;
|
|
152
171
|
prompt += `Do NOT apply these rules to files outside that directory.\n\n`;
|
|
153
|
-
|
|
172
|
+
// Cap the repo CLAUDE.md so it can't blow the bootstrap budget on its
|
|
173
|
+
// own. Pre-cap, the agent-swarm OSS CLAUDE.md was 17,856 B — the
|
|
174
|
+
// single biggest volatile component of the system prompt and the
|
|
175
|
+
// direct driver of the Picateclas argv-E2BIG saga (2026-05-28).
|
|
176
|
+
// Truncation footer points readers at the on-disk copy in the cwd.
|
|
177
|
+
prompt += `${truncateRepoClaudeMd(
|
|
178
|
+
args.repoContext.claudeMd,
|
|
179
|
+
args.repoContext.clonePath,
|
|
180
|
+
REPO_CLAUDE_MD_MAX_CHARS,
|
|
181
|
+
)}\n`;
|
|
154
182
|
} else if (!args.repoContext.warning) {
|
|
155
183
|
prompt += `Repository is cloned at \`${args.repoContext.clonePath}\` but has no CLAUDE.md file.\n`;
|
|
156
184
|
}
|
|
@@ -267,6 +295,24 @@ export const getBasePrompt = async (args: BasePromptArgs): Promise<string> => {
|
|
|
267
295
|
return prompt;
|
|
268
296
|
};
|
|
269
297
|
|
|
298
|
+
/**
|
|
299
|
+
* Truncate the repo CLAUDE.md to a hard byte budget so it can't blow the
|
|
300
|
+
* bootstrap argv ceiling on its own (Picateclas spawn-OOM, 2026-05-28).
|
|
301
|
+
*
|
|
302
|
+
* The footer is structured as a `[truncated — see <path>/CLAUDE.md for full
|
|
303
|
+
* content]` notice so anyone reading the system prompt knows exactly where
|
|
304
|
+
* the dropped content lives on disk.
|
|
305
|
+
*
|
|
306
|
+
* Exported only for testing.
|
|
307
|
+
*/
|
|
308
|
+
export function truncateRepoClaudeMd(content: string, clonePath: string, budget: number): string {
|
|
309
|
+
if (content.length <= budget) return content;
|
|
310
|
+
const notice = `\n\n[...truncated — see ${clonePath}/CLAUDE.md for full content]\n`;
|
|
311
|
+
const contentBudget = budget - notice.length;
|
|
312
|
+
if (contentBudget <= 0) return notice.trimStart();
|
|
313
|
+
return content.slice(0, contentBudget) + notice;
|
|
314
|
+
}
|
|
315
|
+
|
|
270
316
|
/** Truncate a section to fit within a character budget, appending a notice if cut */
|
|
271
317
|
function truncateSection(
|
|
272
318
|
content: string | undefined,
|
|
@@ -318,6 +318,30 @@ export function buildClaudeCodeOtelEnv(
|
|
|
318
318
|
return otelEnv;
|
|
319
319
|
}
|
|
320
320
|
|
|
321
|
+
/**
|
|
322
|
+
* Resolve the path at which the per-task system prompt is staged on disk.
|
|
323
|
+
*
|
|
324
|
+
* Pushing the prompt as `--append-system-prompt <value>` makes the entire
|
|
325
|
+
* prompt one argv element. Linux's per-arg limit is `MAX_ARG_STRLEN = 131072`
|
|
326
|
+
* bytes — and the system prompt (CLAUDE.md + TOOLS.md + identity files +
|
|
327
|
+
* repo CLAUDE.md) routinely runs 50–80 KB. A few growth nudges push us
|
|
328
|
+
* across the cliff and `posix_spawn` returns E2BIG, killing the worker
|
|
329
|
+
* (Picateclas attempts 4-6, 2026-05-28).
|
|
330
|
+
*
|
|
331
|
+
* `claude --append-system-prompt-file <path>` reads the prompt from disk,
|
|
332
|
+
* so the argv stays bounded by the filename length and the system prompt
|
|
333
|
+
* size is decoupled from the kernel's argv ceiling.
|
|
334
|
+
*
|
|
335
|
+
* Exported for unit testing.
|
|
336
|
+
*/
|
|
337
|
+
export function getSystemPromptFilePath(taskId: string): string {
|
|
338
|
+
// The taskId is a UUID; safe to embed in a /tmp filename. Mirrors the
|
|
339
|
+
// existing /tmp/agent-swarm-task-${pid}.json + /tmp/mcp-${taskId}.json
|
|
340
|
+
// convention so a janitor sweeping /tmp can find all session-scoped state
|
|
341
|
+
// under the same prefix.
|
|
342
|
+
return `/tmp/agent-swarm-system-prompt-${taskId}.txt`;
|
|
343
|
+
}
|
|
344
|
+
|
|
321
345
|
class ClaudeSession implements ProviderSession {
|
|
322
346
|
private proc: ReturnType<typeof Bun.spawn>;
|
|
323
347
|
private listeners: Array<(event: ProviderEvent) => void> = [];
|
|
@@ -327,6 +351,8 @@ class ClaudeSession implements ProviderSession {
|
|
|
327
351
|
private errorTracker = new SessionErrorTracker();
|
|
328
352
|
private taskFilePid: number;
|
|
329
353
|
private contextWindowSize: number;
|
|
354
|
+
/** Path to the system-prompt temp file when one was staged for this session. */
|
|
355
|
+
private systemPromptFile: string | null;
|
|
330
356
|
|
|
331
357
|
constructor(
|
|
332
358
|
private config: ProviderSessionConfig,
|
|
@@ -335,9 +361,11 @@ class ClaudeSession implements ProviderSession {
|
|
|
335
361
|
taskFilePid: number,
|
|
336
362
|
private sessionMcpConfig: string | null = null,
|
|
337
363
|
private claudeBinaryArgv: readonly string[] = ["claude"],
|
|
364
|
+
systemPromptFile: string | null = null,
|
|
338
365
|
) {
|
|
339
366
|
this.taskFilePid = taskFilePid;
|
|
340
367
|
this.contextWindowSize = getContextWindowSize(model);
|
|
368
|
+
this.systemPromptFile = systemPromptFile;
|
|
341
369
|
const cmd = this.buildCommand();
|
|
342
370
|
|
|
343
371
|
console.log(
|
|
@@ -403,7 +431,13 @@ class ClaudeSession implements ProviderSession {
|
|
|
403
431
|
cmd.push(...this.config.additionalArgs);
|
|
404
432
|
}
|
|
405
433
|
|
|
406
|
-
|
|
434
|
+
// System prompt is staged on disk and read via the file-flag — see
|
|
435
|
+
// `getSystemPromptFilePath` for the rationale (argv E2BIG hardening,
|
|
436
|
+
// Picateclas spawn-OOM, 2026-05-28). The legacy inline form is kept as
|
|
437
|
+
// a fallback for the (unlikely) case where the file couldn't be staged.
|
|
438
|
+
if (this.systemPromptFile) {
|
|
439
|
+
cmd.push("--append-system-prompt-file", this.systemPromptFile);
|
|
440
|
+
} else if (this.config.systemPrompt) {
|
|
407
441
|
cmd.push("--append-system-prompt", this.config.systemPrompt);
|
|
408
442
|
}
|
|
409
443
|
|
|
@@ -490,7 +524,7 @@ class ClaudeSession implements ProviderSession {
|
|
|
490
524
|
await logFileHandle.end();
|
|
491
525
|
const exitCode = await this.proc.exited;
|
|
492
526
|
|
|
493
|
-
// Cleanup task file
|
|
527
|
+
// Cleanup task file, per-session MCP config, and per-task system prompt
|
|
494
528
|
await cleanupTaskFile(this.taskFilePid);
|
|
495
529
|
if (this.sessionMcpConfig) {
|
|
496
530
|
try {
|
|
@@ -499,6 +533,13 @@ class ClaudeSession implements ProviderSession {
|
|
|
499
533
|
// ignore — temp file may already be gone
|
|
500
534
|
}
|
|
501
535
|
}
|
|
536
|
+
if (this.systemPromptFile) {
|
|
537
|
+
try {
|
|
538
|
+
await unlink(this.systemPromptFile);
|
|
539
|
+
} catch {
|
|
540
|
+
// ignore — temp file may already be gone
|
|
541
|
+
}
|
|
542
|
+
}
|
|
502
543
|
|
|
503
544
|
if (exitCode !== 0 && stderrOutput) {
|
|
504
545
|
console.error(
|
|
@@ -723,6 +764,22 @@ class ClaudeSession implements ProviderSession {
|
|
|
723
764
|
startedAt: new Date().toISOString(),
|
|
724
765
|
});
|
|
725
766
|
|
|
767
|
+
// Re-stage the system prompt for the retry — the original was unlinked
|
|
768
|
+
// when the first session finished. Same soft-fail semantics: null
|
|
769
|
+
// falls back to the inline --append-system-prompt argv.
|
|
770
|
+
let retrySystemPromptFile: string | null = null;
|
|
771
|
+
if (retryConfig.systemPrompt) {
|
|
772
|
+
const candidate = getSystemPromptFilePath(retryConfig.taskId);
|
|
773
|
+
try {
|
|
774
|
+
await writeFile(candidate, retryConfig.systemPrompt);
|
|
775
|
+
retrySystemPromptFile = candidate;
|
|
776
|
+
} catch (err) {
|
|
777
|
+
console.warn(
|
|
778
|
+
`\x1b[33m[claude]\x1b[0m Failed to stage retry system prompt to ${candidate} (${err}); falling back to --append-system-prompt argv.`,
|
|
779
|
+
);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
|
|
726
783
|
const retrySession = new ClaudeSession(
|
|
727
784
|
retryConfig,
|
|
728
785
|
this.model,
|
|
@@ -730,6 +787,7 @@ class ClaudeSession implements ProviderSession {
|
|
|
730
787
|
this.taskFilePid,
|
|
731
788
|
null,
|
|
732
789
|
this.claudeBinaryArgv,
|
|
790
|
+
retrySystemPromptFile,
|
|
733
791
|
);
|
|
734
792
|
|
|
735
793
|
// Forward events from retry to our listeners
|
|
@@ -832,6 +890,28 @@ export class ClaudeAdapter implements ProviderAdapter {
|
|
|
832
890
|
installedServers,
|
|
833
891
|
);
|
|
834
892
|
|
|
893
|
+
// Stage the system prompt on disk so it can be passed as a file path
|
|
894
|
+
// instead of one giant argv element. This is the structural fix for
|
|
895
|
+
// posix_spawn E2BIG once the prompt grows past MAX_ARG_STRLEN (131,072
|
|
896
|
+
// bytes) — see `getSystemPromptFilePath` and PR description for the
|
|
897
|
+
// Picateclas spawn-OOM saga. Soft-fail (`systemPromptFile = null`) makes
|
|
898
|
+
// the session fall back to the inline `--append-system-prompt` argv;
|
|
899
|
+
// good enough since `BOOTSTRAP_TOTAL_MAX_CHARS` (now 120,000) already
|
|
900
|
+
// caps the worst-case argv element below the kernel limit even without
|
|
901
|
+
// the file path.
|
|
902
|
+
let systemPromptFile: string | null = null;
|
|
903
|
+
if (config.systemPrompt) {
|
|
904
|
+
const candidate = getSystemPromptFilePath(config.taskId);
|
|
905
|
+
try {
|
|
906
|
+
await writeFile(candidate, config.systemPrompt);
|
|
907
|
+
systemPromptFile = candidate;
|
|
908
|
+
} catch (err) {
|
|
909
|
+
console.warn(
|
|
910
|
+
`\x1b[33m[claude]\x1b[0m Failed to stage system prompt to ${candidate} (${err}); falling back to --append-system-prompt argv. Argv may approach MAX_ARG_STRLEN if the prompt is large.`,
|
|
911
|
+
);
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
|
|
835
915
|
return new ClaudeSession(
|
|
836
916
|
config,
|
|
837
917
|
model,
|
|
@@ -839,6 +919,7 @@ export class ClaudeAdapter implements ProviderAdapter {
|
|
|
839
919
|
taskFilePid,
|
|
840
920
|
sessionMcpConfig,
|
|
841
921
|
claudeBinaryArgv,
|
|
922
|
+
systemPromptFile,
|
|
842
923
|
);
|
|
843
924
|
}
|
|
844
925
|
|
|
@@ -26,7 +26,9 @@
|
|
|
26
26
|
/** Models supported by the managed-agents surface for the swarm worker. */
|
|
27
27
|
export const CLAUDE_MANAGED_MODELS = [
|
|
28
28
|
"claude-sonnet-4-6",
|
|
29
|
+
"claude-opus-4-8",
|
|
29
30
|
"claude-opus-4-7",
|
|
31
|
+
"claude-opus-4-6",
|
|
30
32
|
"claude-haiku-4-5",
|
|
31
33
|
] as const;
|
|
32
34
|
|
|
@@ -45,11 +47,13 @@ export interface ClaudeManagedModelPricing {
|
|
|
45
47
|
}
|
|
46
48
|
|
|
47
49
|
/**
|
|
48
|
-
* Anthropic public list pricing
|
|
50
|
+
* Anthropic public list pricing. Source:
|
|
49
51
|
* https://platform.claude.com/docs/en/about-claude/pricing
|
|
50
52
|
*
|
|
51
53
|
* - claude-sonnet-4-6: $3 / $15 / $0.30 / $3.75 (in / out / cache-read / cache-write)
|
|
52
|
-
* - claude-opus-4-
|
|
54
|
+
* - claude-opus-4-8: $5 / $25 / $0.50 / $6.25 (verified 2026-05-28)
|
|
55
|
+
* - claude-opus-4-7: $15 / $75 / $1.50 / $18.75 (STALE — was correct at launch, Anthropic has since dropped Opus to $5/$25)
|
|
56
|
+
* - claude-opus-4-6: $5 / $25 / $0.50 / $6.25 (verified 2026-05-28)
|
|
53
57
|
* - claude-haiku-4-5: $1 / $5 / $0.10 / $1.25
|
|
54
58
|
*/
|
|
55
59
|
export const CLAUDE_MANAGED_MODEL_PRICING: Record<ClaudeManagedModel, ClaudeManagedModelPricing> = {
|
|
@@ -59,12 +63,24 @@ export const CLAUDE_MANAGED_MODEL_PRICING: Record<ClaudeManagedModel, ClaudeMana
|
|
|
59
63
|
cacheReadPerMillion: 0.3,
|
|
60
64
|
cacheWritePerMillion: 3.75,
|
|
61
65
|
},
|
|
66
|
+
"claude-opus-4-8": {
|
|
67
|
+
inputPerMillion: 5.0,
|
|
68
|
+
outputPerMillion: 25.0,
|
|
69
|
+
cacheReadPerMillion: 0.5,
|
|
70
|
+
cacheWritePerMillion: 6.25,
|
|
71
|
+
},
|
|
62
72
|
"claude-opus-4-7": {
|
|
63
73
|
inputPerMillion: 15.0,
|
|
64
74
|
outputPerMillion: 75.0,
|
|
65
75
|
cacheReadPerMillion: 1.5,
|
|
66
76
|
cacheWritePerMillion: 18.75,
|
|
67
77
|
},
|
|
78
|
+
"claude-opus-4-6": {
|
|
79
|
+
inputPerMillion: 5.0,
|
|
80
|
+
outputPerMillion: 25.0,
|
|
81
|
+
cacheReadPerMillion: 0.5,
|
|
82
|
+
cacheWritePerMillion: 6.25,
|
|
83
|
+
},
|
|
68
84
|
"claude-haiku-4-5": {
|
|
69
85
|
inputPerMillion: 1.0,
|
|
70
86
|
outputPerMillion: 5.0,
|