@desplega.ai/agent-swarm 1.79.4 → 1.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/openapi.json +98 -19
  2. package/package.json +12 -6
  3. package/src/be/db.ts +101 -30
  4. package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
  5. package/src/be/pricing-normalize.ts +81 -0
  6. package/src/be/seed-pricing.ts +293 -0
  7. package/src/commands/claude-managed-setup.ts +19 -3
  8. package/src/commands/runner.ts +592 -237
  9. package/src/http/context.ts +6 -2
  10. package/src/http/index.ts +115 -68
  11. package/src/http/session-data.ts +74 -23
  12. package/src/otel-impl.ts +200 -0
  13. package/src/otel.ts +127 -0
  14. package/src/providers/claude-adapter.ts +30 -5
  15. package/src/providers/claude-managed-adapter.ts +43 -17
  16. package/src/providers/claude-managed-pricing.ts +34 -0
  17. package/src/providers/codex-adapter.ts +38 -27
  18. package/src/providers/codex-models.ts +22 -3
  19. package/src/providers/devin-adapter.ts +11 -0
  20. package/src/providers/opencode-adapter.ts +31 -7
  21. package/src/providers/pi-mono-adapter.ts +39 -7
  22. package/src/providers/pricing-sources.md +52 -0
  23. package/src/providers/swarm-events-shared.ts +8 -4
  24. package/src/providers/types.ts +33 -10
  25. package/src/server.ts +6 -0
  26. package/src/tests/claude-managed-adapter.test.ts +17 -3
  27. package/src/tests/claude-managed-setup.test.ts +10 -1
  28. package/src/tests/codex-adapter.test.ts +20 -19
  29. package/src/tests/context-snapshot.test.ts +2 -2
  30. package/src/tests/context-window.test.ts +65 -1
  31. package/src/tests/devin-adapter.test.ts +2 -0
  32. package/src/tests/http/context-routes.test.ts +161 -0
  33. package/src/tests/migration-063-schema-relax.test.ts +109 -0
  34. package/src/tests/opencode-adapter.test.ts +146 -1
  35. package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
  36. package/src/tests/pages-view-count.test.ts +30 -5
  37. package/src/tests/providers/codex-cost.test.ts +18 -0
  38. package/src/tests/providers/opencode-cost.test.ts +74 -0
  39. package/src/tests/providers/pi-cost.test.ts +128 -0
  40. package/src/tests/secret-scrubber.test.ts +19 -0
  41. package/src/tests/session-costs-codex-recompute.test.ts +35 -22
  42. package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
  43. package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
  44. package/src/tests/store-progress-cost.test.ts +6 -1
  45. package/src/tools/store-progress.ts +16 -60
  46. package/src/tools/utils.ts +65 -12
  47. package/src/types.ts +62 -9
  48. package/src/utils/context-window.ts +104 -4
  49. package/src/utils/secret-scrubber.ts +7 -0
@@ -0,0 +1,133 @@
1
+ -- 063_cost_context_schema_relax.sql
2
+ -- Phase 1 of the context & cost tracking fixes plan (2026-05-15).
3
+ --
4
+ -- This migration unblocks every downstream phase by:
5
+ -- * Dropping the brittle CHECK constraints on `pricing.provider` and
6
+ -- `pricing.token_class` so we can seed rows for all 7 providers
7
+ -- (claude, claude-managed, codex, pi, opencode, devin, gemini) and the
8
+ -- extra token classes (`cache_write`, `runtime_hour`, `acu`). Zod
9
+ -- validation at the application boundary (`PricingProviderSchema`,
10
+ -- `PricingTokenClassSchema` in `src/types.ts`) keeps the actual safety
11
+ -- guarantee — the CHECKs added drift risk for no real benefit.
12
+ -- * Renaming the misleading `agent_tasks.totalContextTokensUsed` column
13
+ -- to `peakContextTokens` to match its new monotonic-max semantic
14
+ -- (mirrors Claude Code's status-line "peak context" idea).
15
+ -- * Recording the `contextFormula` used by the adapter that emitted a
16
+ -- given snapshot so we can tell apples from oranges across providers.
17
+ -- * Adding `reasoningOutputTokens` (codex reasoning models) and
18
+ -- `thinkingTokens` (claude extended thinking) columns to `session_costs`
19
+ -- so we stop dropping those numbers on the floor.
20
+ --
21
+ -- SQLite CHECK constraints can't be modified in place, so the `pricing` and
22
+ -- `task_context_snapshots` shape changes use the standard
23
+ -- create-new / copy / drop / rename dance. Existing rows are preserved.
24
+ --
25
+ -- Forward-only — no down migration. If you need to revert, write a new
26
+ -- migration that walks the schema forward to the desired state.
27
+
28
+ -- ---------------------------------------------------------------------------
29
+ -- 1. Relax `pricing` CHECK constraints (drop them entirely; Zod validates).
30
+ -- ---------------------------------------------------------------------------
31
+
32
+ CREATE TABLE pricing_new (
33
+ provider TEXT NOT NULL,
34
+ model TEXT NOT NULL,
35
+ token_class TEXT NOT NULL,
36
+ effective_from INTEGER NOT NULL,
37
+ price_per_million_usd REAL NOT NULL,
38
+ createdAt INTEGER NOT NULL,
39
+ lastUpdatedAt INTEGER NOT NULL,
40
+ PRIMARY KEY (provider, model, token_class, effective_from)
41
+ );
42
+
43
+ INSERT INTO pricing_new (provider, model, token_class, effective_from, price_per_million_usd, createdAt, lastUpdatedAt)
44
+ SELECT provider, model, token_class, effective_from, price_per_million_usd, createdAt, lastUpdatedAt
45
+ FROM pricing;
46
+
47
+ DROP TABLE pricing;
48
+ ALTER TABLE pricing_new RENAME TO pricing;
49
+
50
+ -- Re-create the index the original `pricing` table had (matches 046:54-55).
51
+ CREATE INDEX IF NOT EXISTS idx_pricing_lookup
52
+ ON pricing (provider, model, token_class, effective_from DESC);
53
+
54
+ -- ---------------------------------------------------------------------------
55
+ -- 2. Rename agent_tasks.totalContextTokensUsed -> peakContextTokens.
56
+ -- SQLite >= 3.25 supports RENAME COLUMN; bun:sqlite is well past that.
57
+ -- ---------------------------------------------------------------------------
58
+
59
+ ALTER TABLE agent_tasks RENAME COLUMN totalContextTokensUsed TO peakContextTokens;
60
+
61
+ -- ---------------------------------------------------------------------------
62
+ -- 3. Add contextFormula column to task_context_snapshots.
63
+ -- Using a plain TEXT column (no CHECK) so the adapter side can add new
64
+ -- formulas without an accompanying migration; Zod enum validates writes.
65
+ -- Values today:
66
+ -- 'input-cache-output' — unified formula (post-Phase 9)
67
+ -- 'input-cache-no-output' — pre-unification claude formula
68
+ -- 'input-output-no-cache' — pre-unification claude-managed formula
69
+ -- 'peak-proxy' — pre-unification codex formula
70
+ -- 'pi-delegated' — context numbers come from the pi-ai SDK
71
+ -- 'harness-reported' — context numbers come from a harness API (devin)
72
+ -- 'unknown' — pre-migration backfill or adapter didn't tag
73
+ -- ---------------------------------------------------------------------------
74
+
75
+ ALTER TABLE task_context_snapshots ADD COLUMN contextFormula TEXT;
76
+ UPDATE task_context_snapshots SET contextFormula = 'unknown' WHERE contextFormula IS NULL;
77
+
78
+ -- ---------------------------------------------------------------------------
79
+ -- 4. Rewrite session_costs to:
80
+ -- a) drop the costSource CHECK (we need 'unpriced' as a third value);
81
+ -- b) add reasoningOutputTokens + thinkingTokens columns we previously
82
+ -- dropped on the floor.
83
+ -- SQLite can't relax a CHECK in-place — table-rewrite dance, same pattern
84
+ -- as the pricing table above. FKs and indexes are restored after rename.
85
+ -- ---------------------------------------------------------------------------
86
+
87
+ CREATE TABLE session_costs_new (
88
+ id TEXT PRIMARY KEY,
89
+ sessionId TEXT NOT NULL,
90
+ taskId TEXT,
91
+ agentId TEXT NOT NULL,
92
+ totalCostUsd REAL NOT NULL,
93
+ inputTokens INTEGER NOT NULL DEFAULT 0,
94
+ outputTokens INTEGER NOT NULL DEFAULT 0,
95
+ cacheReadTokens INTEGER NOT NULL DEFAULT 0,
96
+ -- Migration 063: nullable. Codex SDK can't surface cache writes, so we
97
+ -- store null instead of faking a 0 that mixes with real zeros.
98
+ cacheWriteTokens INTEGER DEFAULT 0,
99
+ durationMs INTEGER NOT NULL,
100
+ -- Migration 063: nullable. Claude when `num_turns` is absent can't honestly
101
+ -- report a turn count; null is preferred over a faked 1.
102
+ numTurns INTEGER,
103
+ model TEXT NOT NULL,
104
+ isError INTEGER NOT NULL DEFAULT 0,
105
+ costSource TEXT NOT NULL DEFAULT 'harness',
106
+ reasoningOutputTokens INTEGER NOT NULL DEFAULT 0,
107
+ thinkingTokens INTEGER NOT NULL DEFAULT 0,
108
+ createdAt TEXT NOT NULL,
109
+ FOREIGN KEY (agentId) REFERENCES agents(id) ON DELETE CASCADE,
110
+ FOREIGN KEY (taskId) REFERENCES agent_tasks(id) ON DELETE SET NULL
111
+ );
112
+
113
+ INSERT INTO session_costs_new (
114
+ id, sessionId, taskId, agentId, totalCostUsd,
115
+ inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens,
116
+ durationMs, numTurns, model, isError, costSource,
117
+ reasoningOutputTokens, thinkingTokens, createdAt
118
+ )
119
+ SELECT
120
+ id, sessionId, taskId, agentId, totalCostUsd,
121
+ inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens,
122
+ durationMs, numTurns, model, isError, costSource,
123
+ 0, 0, createdAt
124
+ FROM session_costs;
125
+
126
+ DROP TABLE session_costs;
127
+ ALTER TABLE session_costs_new RENAME TO session_costs;
128
+
129
+ -- Recreate indexes (mirrors 001_initial.sql:360-363).
130
+ CREATE INDEX IF NOT EXISTS idx_session_costs_createdAt ON session_costs(createdAt);
131
+ CREATE INDEX IF NOT EXISTS idx_session_costs_taskId ON session_costs(taskId);
132
+ CREATE INDEX IF NOT EXISTS idx_session_costs_agentId ON session_costs(agentId);
133
+ CREATE INDEX IF NOT EXISTS idx_session_costs_agent_createdAt ON session_costs(agentId, createdAt);
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Phase 2 fix — normalize provider model ids before pricing-table lookup.
3
+ *
4
+ * Different harnesses report the same underlying model under different keys:
5
+ *
6
+ * - claude-adapter → `claude-opus-4-7` (bare)
7
+ * - codex-adapter → `gpt-5.4` (bare, dotted)
8
+ * - opencode-adapter → `openrouter/anthropic/claude-sonnet-4.5`
9
+ * - pi-mono-adapter → `github-copilot/gpt-5.4` or
10
+ * `openrouter/anthropic/claude-sonnet-4.5`
11
+ *
12
+ * The pricing seed in `src/be/seed-pricing.ts` keys by what models.dev calls
13
+ * the model (e.g. `anthropic/claude-sonnet-4.5` for openrouter rows,
14
+ * `gpt-5.4` for openai rows). That means harness-emitted ids with extra
15
+ * routing prefixes (`openrouter/`, `github-copilot/`, …) fall through to
16
+ * `costSource='unpriced'` even when we have a perfectly good rate row.
17
+ *
18
+ * Rather than rewriting the adapter outputs (which are the harness's source
19
+ * of truth and useful for debugging), we normalize at the *lookup boundary*:
20
+ * strip noisy routing prefixes so the seeded canonical key resolves.
21
+ *
22
+ * Apply this helper symmetrically: once when seeding rows (so seed keys are
23
+ * canonical) and once when querying (so adapter-emitted keys collapse onto
24
+ * the same canonical form).
25
+ */
26
+
27
+ import type { PricingProvider } from "../types";
28
+
29
+ /**
30
+ * Routing prefixes that a harness may prepend to the underlying model id but
31
+ * that have no pricing semantics. Stripping these collapses
32
+ * `openrouter/anthropic/claude-sonnet-4.5` → `anthropic/claude-sonnet-4.5`
33
+ * which is the key models.dev/openrouter uses.
34
+ *
35
+ * Order matters: we only ever strip the *first* matching prefix so we don't
36
+ * accidentally chew through a model id like `openai/openai-test-model`.
37
+ */
38
+ const ROUTING_PREFIXES_BY_PROVIDER: Record<PricingProvider, readonly string[]> = {
39
+ // opencode routes via opencode-server which proxies to openrouter, anthropic,
40
+ // openai, … — strip whichever proxy prefix the user picked.
41
+ opencode: ["openrouter/", "github-copilot/"],
42
+ // pi-mono can hit openrouter mirrors, the github-copilot proxy, or native
43
+ // anthropic/openai/google providers.
44
+ pi: ["openrouter/", "github-copilot/"],
45
+ // codex normally reports a bare id, but a user may set MODEL_OVERRIDE to a
46
+ // prefixed form. Be forgiving on the lookup side.
47
+ codex: ["openai/", "github-copilot/"],
48
+ // claude / claude-managed / devin / gemini emit bare ids today. The empty
49
+ // list keeps the helper a no-op for them but the entry-per-provider shape
50
+ // means a future provider can opt in without changing call-sites.
51
+ claude: [],
52
+ "claude-managed": [],
53
+ devin: [],
54
+ gemini: [],
55
+ };
56
+
57
+ /**
58
+ * Canonical model key for a `(provider, model)` pair. Idempotent — calling
59
+ * this on an already-normalized value is a no-op.
60
+ *
61
+ * Rules:
62
+ * 1. Lowercase the input. Adapters sometimes pass mixed case (codex calls
63
+ * `.toLowerCase()` itself; opencode/pi don't always).
64
+ * 2. Strip the first matching routing prefix for this provider, if any.
65
+ *
66
+ * We deliberately do NOT touch dotted-vs-dashed minor versions
67
+ * (`gpt-5.4` vs `gpt-5-4`) — both harness output and models.dev use dotted
68
+ * for openai and dashed for anthropic, so there's no real drift there.
69
+ */
70
+ export function normalizeModelKey(provider: PricingProvider, model: string): string {
71
+ if (!model) return model;
72
+ let key = model.toLowerCase();
73
+ const prefixes = ROUTING_PREFIXES_BY_PROVIDER[provider] ?? [];
74
+ for (const prefix of prefixes) {
75
+ if (key.startsWith(prefix)) {
76
+ key = key.slice(prefix.length);
77
+ break;
78
+ }
79
+ }
80
+ return key;
81
+ }
@@ -0,0 +1,293 @@
1
+ /**
2
+ * Phase 2 of the cost-tracking plan — seed the `pricing` table at server boot.
3
+ *
4
+ * The vendored models.dev snapshot at `ui/src/lib/modelsdev-cache.json` is the
5
+ * single source of truth for per-token rates. We project it into rows keyed by
6
+ * `(provider, model, token_class)` so the recompute path in
7
+ * `src/http/session-data.ts` can rebuild USD from tokens regardless of which
8
+ * adapter wrote the row.
9
+ *
10
+ * Manual overrides (Anthropic runtime fee, Cognition ACU) live in
11
+ * {@link MANUAL_PRICING_OVERRIDES} — models.dev doesn't surface those.
12
+ *
13
+ * The seeder uses `INSERT OR IGNORE` keyed on the pricing PK
14
+ * `(provider, model, token_class, effective_from)` with `effective_from = 0`,
15
+ * so re-runs on every boot are no-ops once seeded. Operators who need to bump
16
+ * a rate insert a new row with a later `effective_from` via the existing
17
+ * admin route (`POST /api/pricing`) — we don't overwrite seed rows.
18
+ */
19
+
20
+ import { readFileSync } from "node:fs";
21
+ import path from "node:path";
22
+ import type { PricingProvider, PricingTokenClass } from "../types";
23
+ import { getDb } from "./db";
24
+ import { normalizeModelKey } from "./pricing-normalize";
25
+
26
+ interface ModelsDevCostBlock {
27
+ input?: number;
28
+ output?: number;
29
+ cache_read?: number;
30
+ cache_write?: number;
31
+ }
32
+
33
+ interface ModelsDevModel {
34
+ id?: string;
35
+ cost?: ModelsDevCostBlock;
36
+ }
37
+
38
+ interface ModelsDevProvider {
39
+ models?: Record<string, ModelsDevModel>;
40
+ }
41
+
42
+ type ModelsDevCache = Record<string, ModelsDevProvider>;
43
+
44
+ /**
45
+ * Per-harness manual rates that models.dev doesn't carry. Keep the source URL
46
+ * and a verification date next to each entry so {@link MANUAL_PRICING_OVERRIDES}
47
+ * doubles as living documentation.
48
+ */
49
+ const MANUAL_PRICING_OVERRIDES: Array<{
50
+ provider: PricingProvider;
51
+ model: string;
52
+ tokenClass: PricingTokenClass;
53
+ pricePerMillionUsd: number;
54
+ source: string;
55
+ verified: string; // YYYY-MM-DD
56
+ }> = [
57
+ {
58
+ provider: "claude-managed",
59
+ // '*' = applies regardless of which Claude model the managed run picks.
60
+ // The runtime fee is per session-hour, not per model.
61
+ model: "*",
62
+ tokenClass: "runtime_hour",
63
+ // $0.08 / hour expressed as USD per "million units" so it fits the same
64
+ // rate table. The adapter will multiply by hours, not by tokens — the
65
+ // unit is a convention specific to `runtime_hour`.
66
+ pricePerMillionUsd: 0.08 * 1_000_000,
67
+ source: "https://docs.claude.com/en/api/agent-sdk/managed-runtime#pricing",
68
+ verified: "2026-04-28",
69
+ },
70
+ {
71
+ provider: "devin",
72
+ model: "*",
73
+ tokenClass: "acu",
74
+ pricePerMillionUsd: 2.25 * 1_000_000,
75
+ source: "https://devin.ai/pricing",
76
+ verified: "2026-04-28",
77
+ },
78
+ ];
79
+
80
+ /**
81
+ * Adapter-specific shortname → models.dev key. Some adapters report `model`
82
+ * fields the models.dev snapshot doesn't index directly; we map them here.
83
+ */
84
+ const ANTHROPIC_SHORTNAME_TO_MODELSDEV: Record<string, string> = {
85
+ opus: "claude-opus-4-7",
86
+ sonnet: "claude-sonnet-4-6",
87
+ haiku: "claude-haiku-4-5",
88
+ };
89
+
90
+ /**
91
+ * Resolve the path to the vendored models.dev cache. The UI copy is canonical.
92
+ * We treat this as best-effort: if the file is missing (developer ran the
93
+ * server without `ui/` checked out), we log and continue with manual rates
94
+ * only — better than crashing the boot.
95
+ */
96
+ function loadModelsDevCache(): ModelsDevCache | null {
97
+ const candidates = [
98
+ path.join(process.cwd(), "ui", "src", "lib", "modelsdev-cache.json"),
99
+ path.join(process.cwd(), "..", "ui", "src", "lib", "modelsdev-cache.json"),
100
+ ];
101
+ for (const cand of candidates) {
102
+ try {
103
+ const raw = readFileSync(cand, "utf-8");
104
+ return JSON.parse(raw) as ModelsDevCache;
105
+ } catch {
106
+ // try next candidate
107
+ }
108
+ }
109
+ return null;
110
+ }
111
+
112
+ interface PricingSeedRow {
113
+ provider: PricingProvider;
114
+ model: string;
115
+ tokenClass: PricingTokenClass;
116
+ pricePerMillionUsd: number;
117
+ }
118
+
119
+ /**
120
+ * Project a models.dev `cost` block into our pricing-table token classes.
121
+ * Returns one row per non-null cost field.
122
+ */
123
+ function projectCostBlock(
124
+ provider: PricingProvider,
125
+ model: string,
126
+ cost: ModelsDevCostBlock,
127
+ ): PricingSeedRow[] {
128
+ // Phase 2 fix — canonicalize the seed key with the same normalizer the
129
+ // lookup path uses. Idempotent for keys models.dev already serves in
130
+ // canonical form (the common case); also collapses any future drift.
131
+ const key = normalizeModelKey(provider, model);
132
+ const rows: PricingSeedRow[] = [];
133
+ if (typeof cost.input === "number") {
134
+ rows.push({ provider, model: key, tokenClass: "input", pricePerMillionUsd: cost.input });
135
+ }
136
+ if (typeof cost.output === "number") {
137
+ rows.push({ provider, model: key, tokenClass: "output", pricePerMillionUsd: cost.output });
138
+ }
139
+ if (typeof cost.cache_read === "number") {
140
+ rows.push({
141
+ provider,
142
+ model: key,
143
+ tokenClass: "cached_input",
144
+ pricePerMillionUsd: cost.cache_read,
145
+ });
146
+ }
147
+ if (typeof cost.cache_write === "number") {
148
+ rows.push({
149
+ provider,
150
+ model: key,
151
+ tokenClass: "cache_write",
152
+ pricePerMillionUsd: cost.cache_write,
153
+ });
154
+ }
155
+ return rows;
156
+ }
157
+
158
+ /**
159
+ * Build the full set of seed rows from a loaded models.dev cache.
160
+ *
161
+ * The mapping logic is intentionally per-provider so the matrix between
162
+ * "what the adapter writes for `model`" and "what models.dev keys by" is
163
+ * explicit and auditable.
164
+ */
165
+ function buildModelsDevSeedRows(cache: ModelsDevCache): PricingSeedRow[] {
166
+ const rows: PricingSeedRow[] = [];
167
+
168
+ // ---- Anthropic / claude family ----------------------------------------
169
+ // The 'claude' provider (local-CLI adapter) reports the model id as the
170
+ // Anthropic CLI returns it. The 'claude-managed' provider may report
171
+ // either a dated full id or a non-dated id. We project both keyed forms
172
+ // for each model so the recompute path resolves either way.
173
+ const anthropic = cache.anthropic?.models ?? {};
174
+ for (const [id, model] of Object.entries(anthropic)) {
175
+ if (!model?.cost) continue;
176
+ for (const provider of ["claude", "claude-managed"] as const) {
177
+ for (const row of projectCostBlock(provider, id, model.cost)) {
178
+ rows.push(row);
179
+ }
180
+ }
181
+ }
182
+ // Anthropic shortnames (opus/sonnet/haiku) → resolve to the current default.
183
+ for (const [shortname, fullId] of Object.entries(ANTHROPIC_SHORTNAME_TO_MODELSDEV)) {
184
+ const target = anthropic[fullId];
185
+ if (!target?.cost) continue;
186
+ for (const provider of ["claude", "claude-managed"] as const) {
187
+ for (const row of projectCostBlock(provider, shortname, target.cost)) {
188
+ rows.push(row);
189
+ }
190
+ }
191
+ }
192
+ // Pi-mono uses anthropic models via OpenRouter mirrors; project those too.
193
+ for (const [shortname, fullId] of Object.entries(ANTHROPIC_SHORTNAME_TO_MODELSDEV)) {
194
+ const target = anthropic[fullId];
195
+ if (!target?.cost) continue;
196
+ for (const row of projectCostBlock("pi", shortname, target.cost)) {
197
+ rows.push(row);
198
+ }
199
+ }
200
+
201
+ // ---- OpenAI / codex family --------------------------------------------
202
+ const openai = cache.openai?.models ?? {};
203
+ for (const [id, model] of Object.entries(openai)) {
204
+ if (!model?.cost) continue;
205
+ for (const row of projectCostBlock("codex", id, model.cost)) {
206
+ rows.push(row);
207
+ }
208
+ // Phase 2 fix — pi-mono can route to openai models through the
209
+ // github-copilot proxy (`github-copilot/gpt-5.4`). The lookup helper
210
+ // strips the prefix, so we seed the bare id under `pi` too. Without this
211
+ // every gh-copilot-backed pi run fell through to `costSource='unpriced'`.
212
+ for (const row of projectCostBlock("pi", id, model.cost)) {
213
+ rows.push(row);
214
+ }
215
+ }
216
+
217
+ // ---- OpenRouter passthrough (covers gemini + every opencode-routed model)
218
+ const openrouter = cache.openrouter?.models ?? {};
219
+ for (const [id, model] of Object.entries(openrouter)) {
220
+ if (!model?.cost) continue;
221
+ // opencode routes whatever model the user picks; we project them all.
222
+ for (const row of projectCostBlock("opencode", id, model.cost)) {
223
+ rows.push(row);
224
+ }
225
+ // pi-mono also routes via OpenRouter when only OPENROUTER_API_KEY is set
226
+ // (see src/providers/pi-mono-adapter.ts). Without this projection, pi runs
227
+ // against non-anthropic models (e.g. deepseek/deepseek-v4-flash) fall
228
+ // through to costSource='unpriced' even though the model is in the
229
+ // models.dev snapshot.
230
+ for (const row of projectCostBlock("pi", id, model.cost)) {
231
+ rows.push(row);
232
+ }
233
+ // Gemini specifically: also project under the 'gemini' provider so
234
+ // internal-ai callers that tag with provider='gemini' find a hit.
235
+ if (id.startsWith("google/")) {
236
+ const geminiKey = id.replace(/^google\//, "");
237
+ for (const row of projectCostBlock("gemini", geminiKey, model.cost)) {
238
+ rows.push(row);
239
+ }
240
+ // Also store under the full openrouter id so the same row resolves
241
+ // whether the caller passes "google/..." or the stripped name.
242
+ for (const row of projectCostBlock("gemini", id, model.cost)) {
243
+ rows.push(row);
244
+ }
245
+ }
246
+ }
247
+
248
+ return rows;
249
+ }
250
+
251
+ /**
252
+ * Phase 2 entrypoint. Idempotent — safe to call on every boot. Logs a one-line
253
+ * summary so operators can tell whether the boot picked up new rates.
254
+ */
255
+ export function seedPricingFromModelsDev(opts?: { quiet?: boolean }): {
256
+ inserted: number;
257
+ modelsdevFound: boolean;
258
+ } {
259
+ const db = getDb();
260
+ const cache = loadModelsDevCache();
261
+ const modelsdevRows = cache ? buildModelsDevSeedRows(cache) : [];
262
+ const manualRows = MANUAL_PRICING_OVERRIDES.map((o) => ({
263
+ provider: o.provider,
264
+ model: o.model,
265
+ tokenClass: o.tokenClass,
266
+ pricePerMillionUsd: o.pricePerMillionUsd,
267
+ }));
268
+ const allRows = [...modelsdevRows, ...manualRows];
269
+
270
+ const insert = db.prepare<null, [string, string, string, number]>(
271
+ `INSERT OR IGNORE INTO pricing
272
+ (provider, model, token_class, effective_from, price_per_million_usd, createdAt, lastUpdatedAt)
273
+ VALUES (?, ?, ?, 0, ?, 0, 0)`,
274
+ );
275
+
276
+ let inserted = 0;
277
+ const tx = db.transaction((rows: PricingSeedRow[]) => {
278
+ for (const row of rows) {
279
+ const result = insert.run(row.provider, row.model, row.tokenClass, row.pricePerMillionUsd);
280
+ if (result.changes > 0) inserted += 1;
281
+ }
282
+ });
283
+ tx(allRows);
284
+
285
+ if (!opts?.quiet) {
286
+ console.log(
287
+ `[pricing] seed: ${inserted} new row(s); ${allRows.length} candidate(s); modelsdev=${
288
+ cache ? "loaded" : "missing"
289
+ }`,
290
+ );
291
+ }
292
+ return { inserted, modelsdevFound: !!cache };
293
+ }
@@ -553,12 +553,28 @@ export async function runClaudeManagedSetupFlow(
553
553
  system: mcpServer
554
554
  ? "You are an agent-swarm worker. Per-task instructions arrive in the next user message. Use the agent-swarm MCP server for swarm operations."
555
555
  : "You are an agent-swarm worker. Per-task instructions arrive in the next user message. (No MCP tools available in this configuration.)",
556
+ // Headless workers can't satisfy interactive approval prompts — the
557
+ // Anthropic console parks tool calls in `awaiting approval` and the
558
+ // session stalls. Apply `always_allow` to both toolsets so the sandbox
559
+ // executes tool calls (incl. swarm MCP `store-progress`) without HITL.
556
560
  tools: mcpServer
557
561
  ? [
558
- { type: "agent_toolset_20260401" },
559
- { type: "mcp_toolset", mcp_server_name: mcpServer.name },
562
+ {
563
+ type: "agent_toolset_20260401",
564
+ default_config: { permission_policy: { type: "always_allow" } },
565
+ },
566
+ {
567
+ type: "mcp_toolset",
568
+ mcp_server_name: mcpServer.name,
569
+ default_config: { permission_policy: { type: "always_allow" } },
570
+ },
560
571
  ]
561
- : [{ type: "agent_toolset_20260401" }],
572
+ : [
573
+ {
574
+ type: "agent_toolset_20260401",
575
+ default_config: { permission_policy: { type: "always_allow" } },
576
+ },
577
+ ],
562
578
  skills: skillsParam,
563
579
  ...(mcpServer ? { mcp_servers: [mcpServer] } : {}),
564
580
  };