npm - @desplega.ai/agent-swarm - Versions diffs - 1.79.4 → 1.80.0 - Mend

@desplega.ai/agent-swarm 1.79.4 → 1.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/openapi.json +98 -19
package/package.json +12 -6
package/src/be/db.ts +101 -30
package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
package/src/be/pricing-normalize.ts +81 -0
package/src/be/seed-pricing.ts +293 -0
package/src/commands/claude-managed-setup.ts +19 -3
package/src/commands/runner.ts +592 -237
package/src/http/context.ts +6 -2
package/src/http/index.ts +115 -68
package/src/http/session-data.ts +74 -23
package/src/otel-impl.ts +200 -0
package/src/otel.ts +127 -0
package/src/providers/claude-adapter.ts +30 -5
package/src/providers/claude-managed-adapter.ts +43 -17
package/src/providers/claude-managed-pricing.ts +34 -0
package/src/providers/codex-adapter.ts +38 -27
package/src/providers/codex-models.ts +22 -3
package/src/providers/devin-adapter.ts +11 -0
package/src/providers/opencode-adapter.ts +31 -7
package/src/providers/pi-mono-adapter.ts +39 -7
package/src/providers/pricing-sources.md +52 -0
package/src/providers/swarm-events-shared.ts +8 -4
package/src/providers/types.ts +33 -10
package/src/server.ts +6 -0
package/src/tests/claude-managed-adapter.test.ts +17 -3
package/src/tests/claude-managed-setup.test.ts +10 -1
package/src/tests/codex-adapter.test.ts +20 -19
package/src/tests/context-snapshot.test.ts +2 -2
package/src/tests/context-window.test.ts +65 -1
package/src/tests/devin-adapter.test.ts +2 -0
package/src/tests/http/context-routes.test.ts +161 -0
package/src/tests/migration-063-schema-relax.test.ts +109 -0
package/src/tests/opencode-adapter.test.ts +146 -1
package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
package/src/tests/pages-view-count.test.ts +30 -5
package/src/tests/providers/codex-cost.test.ts +18 -0
package/src/tests/providers/opencode-cost.test.ts +74 -0
package/src/tests/providers/pi-cost.test.ts +128 -0
package/src/tests/secret-scrubber.test.ts +19 -0
package/src/tests/session-costs-codex-recompute.test.ts +35 -22
package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
package/src/tests/store-progress-cost.test.ts +6 -1
package/src/tools/store-progress.ts +16 -60
package/src/tools/utils.ts +65 -12
package/src/types.ts +62 -9
package/src/utils/context-window.ts +104 -4
package/src/utils/secret-scrubber.ts +7 -0

package/src/be/migrations/063_cost_context_schema_relax.sql ADDED Viewed

@@ -0,0 +1,133 @@
+-- 063_cost_context_schema_relax.sql
+-- Phase 1 of the context & cost tracking fixes plan (2026-05-15).
+--
+-- This migration unblocks every downstream phase by:
+--   * Dropping the brittle CHECK constraints on `pricing.provider` and
+--     `pricing.token_class` so we can seed rows for all 7 providers
+--     (claude, claude-managed, codex, pi, opencode, devin, gemini) and the
+--     extra token classes (`cache_write`, `runtime_hour`, `acu`). Zod
+--     validation at the application boundary (`PricingProviderSchema`,
+--     `PricingTokenClassSchema` in `src/types.ts`) keeps the actual safety
+--     guarantee — the CHECKs added drift risk for no real benefit.
+--   * Renaming the misleading `agent_tasks.totalContextTokensUsed` column
+--     to `peakContextTokens` to match its new monotonic-max semantic
+--     (mirrors Claude Code's status-line "peak context" idea).
+--   * Recording the `contextFormula` used by the adapter that emitted a
+--     given snapshot so we can tell apples from oranges across providers.
+--   * Adding `reasoningOutputTokens` (codex reasoning models) and
+--     `thinkingTokens` (claude extended thinking) columns to `session_costs`
+--     so we stop dropping those numbers on the floor.
+--
+-- SQLite CHECK constraints can't be modified in place, so the `pricing` and
+-- `task_context_snapshots` shape changes use the standard
+-- create-new / copy / drop / rename dance. Existing rows are preserved.
+--
+-- Forward-only — no down migration. If you need to revert, write a new
+-- migration that walks the schema forward to the desired state.
+-- ---------------------------------------------------------------------------
+-- 1. Relax `pricing` CHECK constraints (drop them entirely; Zod validates).
+-- ---------------------------------------------------------------------------
+CREATE TABLE pricing_new (
+  provider TEXT NOT NULL,
+  model TEXT NOT NULL,
+  token_class TEXT NOT NULL,
+  effective_from INTEGER NOT NULL,
+  price_per_million_usd REAL NOT NULL,
+  createdAt INTEGER NOT NULL,
+  lastUpdatedAt INTEGER NOT NULL,
+  PRIMARY KEY (provider, model, token_class, effective_from)
+);
+INSERT INTO pricing_new (provider, model, token_class, effective_from, price_per_million_usd, createdAt, lastUpdatedAt)
+SELECT provider, model, token_class, effective_from, price_per_million_usd, createdAt, lastUpdatedAt
+FROM pricing;
+DROP TABLE pricing;
+ALTER TABLE pricing_new RENAME TO pricing;
+-- Re-create the index the original `pricing` table had (matches 046:54-55).
+CREATE INDEX IF NOT EXISTS idx_pricing_lookup
+  ON pricing (provider, model, token_class, effective_from DESC);
+-- ---------------------------------------------------------------------------
+-- 2. Rename agent_tasks.totalContextTokensUsed -> peakContextTokens.
+--    SQLite >= 3.25 supports RENAME COLUMN; bun:sqlite is well past that.
+-- ---------------------------------------------------------------------------
+ALTER TABLE agent_tasks RENAME COLUMN totalContextTokensUsed TO peakContextTokens;
+-- ---------------------------------------------------------------------------
+-- 3. Add contextFormula column to task_context_snapshots.
+--    Using a plain TEXT column (no CHECK) so the adapter side can add new
+--    formulas without an accompanying migration; Zod enum validates writes.
+--    Values today:
+--      'input-cache-output'    — unified formula (post-Phase 9)
+--      'input-cache-no-output' — pre-unification claude formula
+--      'input-output-no-cache' — pre-unification claude-managed formula
+--      'peak-proxy'            — pre-unification codex formula
+--      'pi-delegated'          — context numbers come from the pi-ai SDK
+--      'harness-reported'      — context numbers come from a harness API (devin)
+--      'unknown'               — pre-migration backfill or adapter didn't tag
+-- ---------------------------------------------------------------------------
+ALTER TABLE task_context_snapshots ADD COLUMN contextFormula TEXT;
+UPDATE task_context_snapshots SET contextFormula = 'unknown' WHERE contextFormula IS NULL;
+-- ---------------------------------------------------------------------------
+-- 4. Rewrite session_costs to:
+--    a) drop the costSource CHECK (we need 'unpriced' as a third value);
+--    b) add reasoningOutputTokens + thinkingTokens columns we previously
+--       dropped on the floor.
+--    SQLite can't relax a CHECK in-place — table-rewrite dance, same pattern
+--    as the pricing table above. FKs and indexes are restored after rename.
+-- ---------------------------------------------------------------------------
+CREATE TABLE session_costs_new (
+    id TEXT PRIMARY KEY,
+    sessionId TEXT NOT NULL,
+    taskId TEXT,
+    agentId TEXT NOT NULL,
+    totalCostUsd REAL NOT NULL,
+    inputTokens INTEGER NOT NULL DEFAULT 0,
+    outputTokens INTEGER NOT NULL DEFAULT 0,
+    cacheReadTokens INTEGER NOT NULL DEFAULT 0,
+    -- Migration 063: nullable. Codex SDK can't surface cache writes, so we
+    -- store null instead of faking a 0 that mixes with real zeros.
+    cacheWriteTokens INTEGER DEFAULT 0,
+    durationMs INTEGER NOT NULL,
+    -- Migration 063: nullable. Claude when `num_turns` is absent can't honestly
+    -- report a turn count; null is preferred over a faked 1.
+    numTurns INTEGER,
+    model TEXT NOT NULL,
+    isError INTEGER NOT NULL DEFAULT 0,
+    costSource TEXT NOT NULL DEFAULT 'harness',
+    reasoningOutputTokens INTEGER NOT NULL DEFAULT 0,
+    thinkingTokens INTEGER NOT NULL DEFAULT 0,
+    createdAt TEXT NOT NULL,
+    FOREIGN KEY (agentId) REFERENCES agents(id) ON DELETE CASCADE,
+    FOREIGN KEY (taskId) REFERENCES agent_tasks(id) ON DELETE SET NULL
+);
+INSERT INTO session_costs_new (
+    id, sessionId, taskId, agentId, totalCostUsd,
+    inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens,
+    durationMs, numTurns, model, isError, costSource,
+    reasoningOutputTokens, thinkingTokens, createdAt
+)
+SELECT
+    id, sessionId, taskId, agentId, totalCostUsd,
+    inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens,
+    durationMs, numTurns, model, isError, costSource,
+    0, 0, createdAt
+FROM session_costs;
+DROP TABLE session_costs;
+ALTER TABLE session_costs_new RENAME TO session_costs;
+-- Recreate indexes (mirrors 001_initial.sql:360-363).
+CREATE INDEX IF NOT EXISTS idx_session_costs_createdAt ON session_costs(createdAt);
+CREATE INDEX IF NOT EXISTS idx_session_costs_taskId ON session_costs(taskId);
+CREATE INDEX IF NOT EXISTS idx_session_costs_agentId ON session_costs(agentId);
+CREATE INDEX IF NOT EXISTS idx_session_costs_agent_createdAt ON session_costs(agentId, createdAt);

package/src/be/pricing-normalize.ts ADDED Viewed

@@ -0,0 +1,81 @@
+/**
+ * Phase 2 fix — normalize provider model ids before pricing-table lookup.
+ *
+ * Different harnesses report the same underlying model under different keys:
+ *
+ *   - claude-adapter      → `claude-opus-4-7`               (bare)
+ *   - codex-adapter       → `gpt-5.4`                       (bare, dotted)
+ *   - opencode-adapter    → `openrouter/anthropic/claude-sonnet-4.5`
+ *   - pi-mono-adapter     → `github-copilot/gpt-5.4` or
+ *                            `openrouter/anthropic/claude-sonnet-4.5`
+ *
+ * The pricing seed in `src/be/seed-pricing.ts` keys by what models.dev calls
+ * the model (e.g. `anthropic/claude-sonnet-4.5` for openrouter rows,
+ * `gpt-5.4` for openai rows). That means harness-emitted ids with extra
+ * routing prefixes (`openrouter/`, `github-copilot/`, …) fall through to
+ * `costSource='unpriced'` even when we have a perfectly good rate row.
+ *
+ * Rather than rewriting the adapter outputs (which are the harness's source
+ * of truth and useful for debugging), we normalize at the *lookup boundary*:
+ * strip noisy routing prefixes so the seeded canonical key resolves.
+ *
+ * Apply this helper symmetrically: once when seeding rows (so seed keys are
+ * canonical) and once when querying (so adapter-emitted keys collapse onto
+ * the same canonical form).
+ */
+import type { PricingProvider } from "../types";
+/**
+ * Routing prefixes that a harness may prepend to the underlying model id but
+ * that have no pricing semantics. Stripping these collapses
+ * `openrouter/anthropic/claude-sonnet-4.5` → `anthropic/claude-sonnet-4.5`
+ * which is the key models.dev/openrouter uses.
+ *
+ * Order matters: we only ever strip the *first* matching prefix so we don't
+ * accidentally chew through a model id like `openai/openai-test-model`.
+ */
+const ROUTING_PREFIXES_BY_PROVIDER: Record<PricingProvider, readonly string[]> = {
+  // opencode routes via opencode-server which proxies to openrouter, anthropic,
+  // openai, … — strip whichever proxy prefix the user picked.
+  opencode: ["openrouter/", "github-copilot/"],
+  // pi-mono can hit openrouter mirrors, the github-copilot proxy, or native
+  // anthropic/openai/google providers.
+  pi: ["openrouter/", "github-copilot/"],
+  // codex normally reports a bare id, but a user may set MODEL_OVERRIDE to a
+  // prefixed form. Be forgiving on the lookup side.
+  codex: ["openai/", "github-copilot/"],
+  // claude / claude-managed / devin / gemini emit bare ids today. The empty
+  // list keeps the helper a no-op for them but the entry-per-provider shape
+  // means a future provider can opt in without changing call-sites.
+  claude: [],
+  "claude-managed": [],
+  devin: [],
+  gemini: [],
+};
+/**
+ * Canonical model key for a `(provider, model)` pair. Idempotent — calling
+ * this on an already-normalized value is a no-op.
+ *
+ * Rules:
+ *  1. Lowercase the input. Adapters sometimes pass mixed case (codex calls
+ *     `.toLowerCase()` itself; opencode/pi don't always).
+ *  2. Strip the first matching routing prefix for this provider, if any.
+ *
+ * We deliberately do NOT touch dotted-vs-dashed minor versions
+ * (`gpt-5.4` vs `gpt-5-4`) — both harness output and models.dev use dotted
+ * for openai and dashed for anthropic, so there's no real drift there.
+ */
+export function normalizeModelKey(provider: PricingProvider, model: string): string {
+  if (!model) return model;
+  let key = model.toLowerCase();
+  const prefixes = ROUTING_PREFIXES_BY_PROVIDER[provider] ?? [];
+  for (const prefix of prefixes) {
+    if (key.startsWith(prefix)) {
+      key = key.slice(prefix.length);
+      break;
+    }
+  }
+  return key;
+}

package/src/be/seed-pricing.ts ADDED Viewed

@@ -0,0 +1,293 @@
+/**
+ * Phase 2 of the cost-tracking plan — seed the `pricing` table at server boot.
+ *
+ * The vendored models.dev snapshot at `ui/src/lib/modelsdev-cache.json` is the
+ * single source of truth for per-token rates. We project it into rows keyed by
+ * `(provider, model, token_class)` so the recompute path in
+ * `src/http/session-data.ts` can rebuild USD from tokens regardless of which
+ * adapter wrote the row.
+ *
+ * Manual overrides (Anthropic runtime fee, Cognition ACU) live in
+ * {@link MANUAL_PRICING_OVERRIDES} — models.dev doesn't surface those.
+ *
+ * The seeder uses `INSERT OR IGNORE` keyed on the pricing PK
+ * `(provider, model, token_class, effective_from)` with `effective_from = 0`,
+ * so re-runs on every boot are no-ops once seeded. Operators who need to bump
+ * a rate insert a new row with a later `effective_from` via the existing
+ * admin route (`POST /api/pricing`) — we don't overwrite seed rows.
+ */
+import { readFileSync } from "node:fs";
+import path from "node:path";
+import type { PricingProvider, PricingTokenClass } from "../types";
+import { getDb } from "./db";
+import { normalizeModelKey } from "./pricing-normalize";
+interface ModelsDevCostBlock {
+  input?: number;
+  output?: number;
+  cache_read?: number;
+  cache_write?: number;
+}
+interface ModelsDevModel {
+  id?: string;
+  cost?: ModelsDevCostBlock;
+}
+interface ModelsDevProvider {
+  models?: Record<string, ModelsDevModel>;
+}
+type ModelsDevCache = Record<string, ModelsDevProvider>;
+/**
+ * Per-harness manual rates that models.dev doesn't carry. Keep the source URL
+ * and a verification date next to each entry so {@link MANUAL_PRICING_OVERRIDES}
+ * doubles as living documentation.
+ */
+const MANUAL_PRICING_OVERRIDES: Array<{
+  provider: PricingProvider;
+  model: string;
+  tokenClass: PricingTokenClass;
+  pricePerMillionUsd: number;
+  source: string;
+  verified: string; // YYYY-MM-DD
+}> = [
+  {
+    provider: "claude-managed",
+    // '*' = applies regardless of which Claude model the managed run picks.
+    // The runtime fee is per session-hour, not per model.
+    model: "*",
+    tokenClass: "runtime_hour",
+    // $0.08 / hour expressed as USD per "million units" so it fits the same
+    // rate table. The adapter will multiply by hours, not by tokens — the
+    // unit is a convention specific to `runtime_hour`.
+    pricePerMillionUsd: 0.08 * 1_000_000,
+    source: "https://docs.claude.com/en/api/agent-sdk/managed-runtime#pricing",
+    verified: "2026-04-28",
+  },
+  {
+    provider: "devin",
+    model: "*",
+    tokenClass: "acu",
+    pricePerMillionUsd: 2.25 * 1_000_000,
+    source: "https://devin.ai/pricing",
+    verified: "2026-04-28",
+  },
+];
+/**
+ * Adapter-specific shortname → models.dev key. Some adapters report `model`
+ * fields the models.dev snapshot doesn't index directly; we map them here.
+ */
+const ANTHROPIC_SHORTNAME_TO_MODELSDEV: Record<string, string> = {
+  opus: "claude-opus-4-7",
+  sonnet: "claude-sonnet-4-6",
+  haiku: "claude-haiku-4-5",
+};
+/**
+ * Resolve the path to the vendored models.dev cache. The UI copy is canonical.
+ * We treat this as best-effort: if the file is missing (developer ran the
+ * server without `ui/` checked out), we log and continue with manual rates
+ * only — better than crashing the boot.
+ */
+function loadModelsDevCache(): ModelsDevCache | null {
+  const candidates = [
+    path.join(process.cwd(), "ui", "src", "lib", "modelsdev-cache.json"),
+    path.join(process.cwd(), "..", "ui", "src", "lib", "modelsdev-cache.json"),
+  ];
+  for (const cand of candidates) {
+    try {
+      const raw = readFileSync(cand, "utf-8");
+      return JSON.parse(raw) as ModelsDevCache;
+    } catch {
+      // try next candidate
+    }
+  }
+  return null;
+}
+interface PricingSeedRow {
+  provider: PricingProvider;
+  model: string;
+  tokenClass: PricingTokenClass;
+  pricePerMillionUsd: number;
+}
+/**
+ * Project a models.dev `cost` block into our pricing-table token classes.
+ * Returns one row per non-null cost field.
+ */
+function projectCostBlock(
+  provider: PricingProvider,
+  model: string,
+  cost: ModelsDevCostBlock,
+): PricingSeedRow[] {
+  // Phase 2 fix — canonicalize the seed key with the same normalizer the
+  // lookup path uses. Idempotent for keys models.dev already serves in
+  // canonical form (the common case); also collapses any future drift.
+  const key = normalizeModelKey(provider, model);
+  const rows: PricingSeedRow[] = [];
+  if (typeof cost.input === "number") {
+    rows.push({ provider, model: key, tokenClass: "input", pricePerMillionUsd: cost.input });
+  }
+  if (typeof cost.output === "number") {
+    rows.push({ provider, model: key, tokenClass: "output", pricePerMillionUsd: cost.output });
+  }
+  if (typeof cost.cache_read === "number") {
+    rows.push({
+      provider,
+      model: key,
+      tokenClass: "cached_input",
+      pricePerMillionUsd: cost.cache_read,
+    });
+  }
+  if (typeof cost.cache_write === "number") {
+    rows.push({
+      provider,
+      model: key,
+      tokenClass: "cache_write",
+      pricePerMillionUsd: cost.cache_write,
+    });
+  }
+  return rows;
+}
+/**
+ * Build the full set of seed rows from a loaded models.dev cache.
+ *
+ * The mapping logic is intentionally per-provider so the matrix between
+ * "what the adapter writes for `model`" and "what models.dev keys by" is
+ * explicit and auditable.
+ */
+function buildModelsDevSeedRows(cache: ModelsDevCache): PricingSeedRow[] {
+  const rows: PricingSeedRow[] = [];
+  // ---- Anthropic / claude family ----------------------------------------
+  // The 'claude' provider (local-CLI adapter) reports the model id as the
+  // Anthropic CLI returns it. The 'claude-managed' provider may report
+  // either a dated full id or a non-dated id. We project both keyed forms
+  // for each model so the recompute path resolves either way.
+  const anthropic = cache.anthropic?.models ?? {};
+  for (const [id, model] of Object.entries(anthropic)) {
+    if (!model?.cost) continue;
+    for (const provider of ["claude", "claude-managed"] as const) {
+      for (const row of projectCostBlock(provider, id, model.cost)) {
+        rows.push(row);
+      }
+    }
+  }
+  // Anthropic shortnames (opus/sonnet/haiku) → resolve to the current default.
+  for (const [shortname, fullId] of Object.entries(ANTHROPIC_SHORTNAME_TO_MODELSDEV)) {
+    const target = anthropic[fullId];
+    if (!target?.cost) continue;
+    for (const provider of ["claude", "claude-managed"] as const) {
+      for (const row of projectCostBlock(provider, shortname, target.cost)) {
+        rows.push(row);
+      }
+    }
+  }
+  // Pi-mono uses anthropic models via OpenRouter mirrors; project those too.
+  for (const [shortname, fullId] of Object.entries(ANTHROPIC_SHORTNAME_TO_MODELSDEV)) {
+    const target = anthropic[fullId];
+    if (!target?.cost) continue;
+    for (const row of projectCostBlock("pi", shortname, target.cost)) {
+      rows.push(row);
+    }
+  }
+  // ---- OpenAI / codex family --------------------------------------------
+  const openai = cache.openai?.models ?? {};
+  for (const [id, model] of Object.entries(openai)) {
+    if (!model?.cost) continue;
+    for (const row of projectCostBlock("codex", id, model.cost)) {
+      rows.push(row);
+    }
+    // Phase 2 fix — pi-mono can route to openai models through the
+    // github-copilot proxy (`github-copilot/gpt-5.4`). The lookup helper
+    // strips the prefix, so we seed the bare id under `pi` too. Without this
+    // every gh-copilot-backed pi run fell through to `costSource='unpriced'`.
+    for (const row of projectCostBlock("pi", id, model.cost)) {
+      rows.push(row);
+    }
+  }
+  // ---- OpenRouter passthrough (covers gemini + every opencode-routed model)
+  const openrouter = cache.openrouter?.models ?? {};
+  for (const [id, model] of Object.entries(openrouter)) {
+    if (!model?.cost) continue;
+    // opencode routes whatever model the user picks; we project them all.
+    for (const row of projectCostBlock("opencode", id, model.cost)) {
+      rows.push(row);
+    }
+    // pi-mono also routes via OpenRouter when only OPENROUTER_API_KEY is set
+    // (see src/providers/pi-mono-adapter.ts). Without this projection, pi runs
+    // against non-anthropic models (e.g. deepseek/deepseek-v4-flash) fall
+    // through to costSource='unpriced' even though the model is in the
+    // models.dev snapshot.
+    for (const row of projectCostBlock("pi", id, model.cost)) {
+      rows.push(row);
+    }
+    // Gemini specifically: also project under the 'gemini' provider so
+    // internal-ai callers that tag with provider='gemini' find a hit.
+    if (id.startsWith("google/")) {
+      const geminiKey = id.replace(/^google\//, "");
+      for (const row of projectCostBlock("gemini", geminiKey, model.cost)) {
+        rows.push(row);
+      }
+      // Also store under the full openrouter id so the same row resolves
+      // whether the caller passes "google/..." or the stripped name.
+      for (const row of projectCostBlock("gemini", id, model.cost)) {
+        rows.push(row);
+      }
+    }
+  }
+  return rows;
+}
+/**
+ * Phase 2 entrypoint. Idempotent — safe to call on every boot. Logs a one-line
+ * summary so operators can tell whether the boot picked up new rates.
+ */
+export function seedPricingFromModelsDev(opts?: { quiet?: boolean }): {
+  inserted: number;
+  modelsdevFound: boolean;
+} {
+  const db = getDb();
+  const cache = loadModelsDevCache();
+  const modelsdevRows = cache ? buildModelsDevSeedRows(cache) : [];
+  const manualRows = MANUAL_PRICING_OVERRIDES.map((o) => ({
+    provider: o.provider,
+    model: o.model,
+    tokenClass: o.tokenClass,
+    pricePerMillionUsd: o.pricePerMillionUsd,
+  }));
+  const allRows = [...modelsdevRows, ...manualRows];
+  const insert = db.prepare<null, [string, string, string, number]>(
+    `INSERT OR IGNORE INTO pricing
+       (provider, model, token_class, effective_from, price_per_million_usd, createdAt, lastUpdatedAt)
+     VALUES (?, ?, ?, 0, ?, 0, 0)`,
+  );
+  let inserted = 0;
+  const tx = db.transaction((rows: PricingSeedRow[]) => {
+    for (const row of rows) {
+      const result = insert.run(row.provider, row.model, row.tokenClass, row.pricePerMillionUsd);
+      if (result.changes > 0) inserted += 1;
+    }
+  });
+  tx(allRows);
+  if (!opts?.quiet) {
+    console.log(
+      `[pricing] seed: ${inserted} new row(s); ${allRows.length} candidate(s); modelsdev=${
+        cache ? "loaded" : "missing"
+      }`,
+    );
+  }
+  return { inserted, modelsdevFound: !!cache };
+}

package/src/commands/claude-managed-setup.ts CHANGED Viewed

@@ -553,12 +553,28 @@ export async function runClaudeManagedSetupFlow(
     system: mcpServer
       ? "You are an agent-swarm worker. Per-task instructions arrive in the next user message. Use the agent-swarm MCP server for swarm operations."
       : "You are an agent-swarm worker. Per-task instructions arrive in the next user message. (No MCP tools available in this configuration.)",
+    // Headless workers can't satisfy interactive approval prompts — the
+    // Anthropic console parks tool calls in `awaiting approval` and the
+    // session stalls. Apply `always_allow` to both toolsets so the sandbox
+    // executes tool calls (incl. swarm MCP `store-progress`) without HITL.
     tools: mcpServer
       ? [
-          { type: "agent_toolset_20260401" },
-          { type: "mcp_toolset", mcp_server_name: mcpServer.name },
+          {
+            type: "agent_toolset_20260401",
+            default_config: { permission_policy: { type: "always_allow" } },
+          },
+          {
+            type: "mcp_toolset",
+            mcp_server_name: mcpServer.name,
+            default_config: { permission_policy: { type: "always_allow" } },
+          },
         ]
-      : [{ type: "agent_toolset_20260401" }],
+      : [
+          {
+            type: "agent_toolset_20260401",
+            default_config: { permission_policy: { type: "always_allow" } },
+          },
+        ],
     skills: skillsParam,
     ...(mcpServer ? { mcp_servers: [mcpServer] } : {}),
   };