npm - @desplega.ai/agent-swarm - Versions diffs - 1.79.4 → 1.80.0 - Mend

@desplega.ai/agent-swarm 1.79.4 → 1.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/openapi.json +98 -19
package/package.json +12 -6
package/src/be/db.ts +101 -30
package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
package/src/be/pricing-normalize.ts +81 -0
package/src/be/seed-pricing.ts +293 -0
package/src/commands/claude-managed-setup.ts +19 -3
package/src/commands/runner.ts +592 -237
package/src/http/context.ts +6 -2
package/src/http/index.ts +115 -68
package/src/http/session-data.ts +74 -23
package/src/otel-impl.ts +200 -0
package/src/otel.ts +127 -0
package/src/providers/claude-adapter.ts +30 -5
package/src/providers/claude-managed-adapter.ts +43 -17
package/src/providers/claude-managed-pricing.ts +34 -0
package/src/providers/codex-adapter.ts +38 -27
package/src/providers/codex-models.ts +22 -3
package/src/providers/devin-adapter.ts +11 -0
package/src/providers/opencode-adapter.ts +31 -7
package/src/providers/pi-mono-adapter.ts +39 -7
package/src/providers/pricing-sources.md +52 -0
package/src/providers/swarm-events-shared.ts +8 -4
package/src/providers/types.ts +33 -10
package/src/server.ts +6 -0
package/src/tests/claude-managed-adapter.test.ts +17 -3
package/src/tests/claude-managed-setup.test.ts +10 -1
package/src/tests/codex-adapter.test.ts +20 -19
package/src/tests/context-snapshot.test.ts +2 -2
package/src/tests/context-window.test.ts +65 -1
package/src/tests/devin-adapter.test.ts +2 -0
package/src/tests/http/context-routes.test.ts +161 -0
package/src/tests/migration-063-schema-relax.test.ts +109 -0
package/src/tests/opencode-adapter.test.ts +146 -1
package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
package/src/tests/pages-view-count.test.ts +30 -5
package/src/tests/providers/codex-cost.test.ts +18 -0
package/src/tests/providers/opencode-cost.test.ts +74 -0
package/src/tests/providers/pi-cost.test.ts +128 -0
package/src/tests/secret-scrubber.test.ts +19 -0
package/src/tests/session-costs-codex-recompute.test.ts +35 -22
package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
package/src/tests/store-progress-cost.test.ts +6 -1
package/src/tools/store-progress.ts +16 -60
package/src/tools/utils.ts +65 -12
package/src/types.ts +62 -9
package/src/utils/context-window.ts +104 -4
package/src/utils/secret-scrubber.ts +7 -0

package/src/tests/providers/pi-cost.test.ts ADDED Viewed

@@ -0,0 +1,128 @@
+// Phase 3 fix — regression guard that PiMonoSession stamps `provider: "pi"`
+// on every CostData it emits. Without this tag the API server recompute
+// branch in src/http/session-data.ts falls through to costSource='harness'
+// instead of engaging the pricing-table lookup, so a perfectly-priced model
+// (e.g. `openrouter/deepseek/deepseek-v4-flash`) silently renders as un-priced.
+//
+// Mirrors the narrow, single-purpose shape of src/tests/providers/codex-cost.test.ts.
+import { describe, expect, test } from "bun:test";
+import { mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { PiMonoSession } from "../../providers/pi-mono-adapter";
+import type { ProviderEvent, ProviderSessionConfig } from "../../providers/types";
+/**
+ * Build a hand-rolled fake `AgentSession` that exercises the pi-mono-adapter
+ * cost-emission path without booting the real pi-coding-agent runtime.
+ *
+ * The adapter calls (in order, inside `runSession()`):
+ *   1. `prompt(text, opts)`   — resolves immediately for the fake
+ *   2. `waitForIdle()` reads  — `isStreaming` (we pin to `false`)
+ *   3. `getSessionStats()`    — returns the canned token/cost shape
+ *
+ * `subscribe(cb)` is called twice (once in the constructor for the normal
+ * event handler, once optionally in `waitForIdle`). Returning a noop
+ * unsubscriber is enough.
+ */
+function makeFakeAgentSession(opts: {
+  sessionId: string;
+  modelProvider: string;
+  modelId: string;
+}): {
+  fake: import("@earendil-works/pi-coding-agent").AgentSession;
+  callPromptResolve: () => void;
+} {
+  let promptResolve: () => void = () => {};
+  const promptDone = new Promise<void>((r) => {
+    promptResolve = r;
+  });
+  const fake = {
+    sessionId: opts.sessionId,
+    model: { provider: opts.modelProvider, id: opts.modelId },
+    isStreaming: false,
+    subscribe: (_cb: unknown) => () => {},
+    prompt: async () => {
+      // Block until the test wants the adapter to proceed past `prompt()`.
+      // Pi adapter awaits this before reading session stats, so we resolve
+      // synchronously to keep the test deterministic.
+      await promptDone;
+    },
+    getSessionStats: () => ({
+      tokens: { input: 64463, output: 313, cacheRead: 31616, cacheWrite: 0, total: 96392 },
+      // Pi-mono uses `stats.cost` directly. We pin a non-zero value so we can
+      // still assert it round-trips, but the load-bearing field for this
+      // suite is `provider` regardless of dollars.
+      cost: 0.008,
+      userMessages: 1,
+      assistantMessages: 1,
+    }),
+    getContextUsage: () => undefined,
+    dispose: () => {},
+  };
+  // Resolve the prompt gate immediately — the adapter awaits prompt() before
+  // waitForIdle() reads `isStreaming`, but our fake's `isStreaming` is `false`
+  // so waitForIdle resolves right away.
+  promptResolve();
+  return {
+    // The pi-coding-agent AgentSession surface area is wide; we cast through
+    // `unknown` because the test only needs the four methods listed above.
+    fake: fake as unknown as import("@earendil-works/pi-coding-agent").AgentSession,
+    callPromptResolve: promptResolve,
+  };
+}
+function makeConfig(logFile: string): ProviderSessionConfig {
+  return {
+    prompt: "do a thing",
+    systemPrompt: "be helpful",
+    // The exact harness-emitted model id from today's E2E run. This is the
+    // case `normalizeModelKey('pi', ...)` must collapse onto a seeded
+    // `deepseek/deepseek-v4-flash` row.
+    model: "openrouter/deepseek/deepseek-v4-flash",
+    role: "worker",
+    agentId: "agent-1",
+    taskId: "task-1",
+    apiUrl: "http://localhost:0",
+    apiKey: "test-key",
+    cwd: "/tmp",
+    logFile,
+  };
+}
+describe("PiMonoSession — provider tag on CostData", () => {
+  test("waitForCompletion → result.cost.provider === 'pi'", async () => {
+    const dir = join(tmpdir(), `pi-cost-test-${Date.now()}`);
+    mkdirSync(dir, { recursive: true });
+    const logFile = join(dir, "session.log");
+    try {
+      const { fake } = makeFakeAgentSession({
+        sessionId: "sess-pi-test",
+        modelProvider: "openrouter",
+        modelId: "deepseek/deepseek-v4-flash",
+      });
+      const events: ProviderEvent[] = [];
+      const session = new PiMonoSession(fake, makeConfig(logFile), false);
+      session.onEvent((e) => events.push(e));
+      const result = await session.waitForCompletion();
+      // The load-bearing assertion. Phase 2's API recompute path keys off
+      // exactly this field; emitting CostData without it silently disables
+      // pricing-table tagging for the entire pi provider.
+      expect(result.cost?.provider).toBe("pi");
+      const resultEvent = events.find((e) => e.type === "result");
+      expect(resultEvent).toBeDefined();
+      if (resultEvent?.type === "result") {
+        expect(resultEvent.cost.provider).toBe("pi");
+        // Sanity — the reportedModel() helper composes `provider/id` so the
+        // server-side normalizer's prefix-strip has something to bite on.
+        expect(resultEvent.cost.model).toBe("openrouter/deepseek/deepseek-v4-flash");
+      }
+    } finally {
+      rmSync(dir, { recursive: true, force: true });
+    }
+  });
+});

package/src/tests/secret-scrubber.test.ts CHANGED Viewed

@@ -113,6 +113,17 @@ describe("scrubSecrets — env-based replacement", () => {
     expect(out).not.toContain("sk-proj-abcd1234567890");
   });
+  test("redacts OTLP exporter headers from env", () => {
+    process.env.OTEL_EXPORTER_OTLP_HEADERS = "signoz-ingestion-key=localSignozKey_1234567890abcdef";
+    refreshSecretScrubberCache();
+    const out = scrubSecrets(
+      "OTEL_EXPORTER_OTLP_HEADERS=signoz-ingestion-key=localSignozKey_1234567890abcdef",
+    );
+    expect(out).toBe("OTEL_EXPORTER_OTLP_HEADERS=[REDACTED:OTEL_EXPORTER_OTLP_HEADERS]");
+  });
   test("cache rebuilds after refresh when new secret is added", () => {
     const out1 = scrubSecrets("no secret yet here_abcdefghij");
     expect(out1).toBe("no secret yet here_abcdefghij");
@@ -202,6 +213,14 @@ describe("scrubSecrets — regex patterns", () => {
     const out = scrubSecrets("token=ghp_1234567890abcdefABCDEF1234567890ABCD");
     expect(out).toContain("[REDACTED:github_token]");
   });
+  test("redacts SigNoz ingestion-key headers even when env is empty", () => {
+    const out = scrubSecrets(
+      "OTEL_EXPORTER_OTLP_HEADERS=signoz-ingestion-key=localSignozKey_1234567890abcdef",
+    );
+    expect(out).toBe("OTEL_EXPORTER_OTLP_HEADERS=[REDACTED:signoz_ingestion_key]");
+  });
 });
 describe("scrubSecrets — does not over-scrub", () => {

package/src/tests/session-costs-codex-recompute.test.ts CHANGED Viewed

@@ -98,7 +98,7 @@ interface CreatedCostResponse {
   cost: {
     id: string;
     totalCostUsd: number;
-    costSource: "harness" | "pricing-table";
+    costSource: "harness" | "pricing-table" | "unpriced";
     model: string;
   };
 }
@@ -153,15 +153,9 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
     expect(body.cost.totalCostUsd).toBeCloseTo(6.64, 5);
   });
-  test("provider=codex but a token class is missing → falls back to worker value, costSource='harness'", async () => {
-    // Only seed input + cached_input. Missing output forces fallback.
-    insertPricingRow({
-      provider: "codex",
-      model: "codex-test-synth",
-      tokenClass: "input",
-      effectiveFrom: 1,
-      pricePerMillionUsd: 2.0,
-    });
+  test("provider=codex but input/output rows missing → 'unpriced', worker value preserved", async () => {
+    // Only seed cached_input. Missing input + output blocks recompute and
+    // Phase 2 tags the row 'unpriced' (no rates means we can't trust harness USD either).
     insertPricingRow({
       provider: "codex",
       model: "codex-test-synth",
@@ -186,13 +180,16 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
     });
     expect(res.status).toBe(201);
     const body = (await res.json()) as CreatedCostResponse;
-    expect(body.cost.costSource).toBe("harness");
-    // Worker value preserved verbatim.
+    // Phase 2: provider tagged but no input/output rows ⇒ 'unpriced'.
+    expect(body.cost.costSource).toBe("unpriced");
+    // Worker value preserved verbatim — we don't fabricate one.
     expect(body.cost.totalCostUsd).toBe(1.23);
   });
-  test("provider=claude records harness USD as-is regardless of DB pricing rows", async () => {
-    // Even if there are codex pricing rows, claude must NOT be touched.
+  test("provider=claude with no pricing rows for the model → 'unpriced' (Phase 2)", async () => {
+    // Phase 2 extended the recompute path from codex-only to every provider.
+    // With no pricing rows seeded for ('claude', 'sonnet-4'), the row is
+    // tagged 'unpriced' rather than 'harness' — the UI surfaces it as a yellow badge.
     const res = await authedFetch(`/api/session-costs`, {
       method: "POST",
       body: JSON.stringify({
@@ -209,20 +206,35 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
     });
     expect(res.status).toBe(201);
     const body = (await res.json()) as CreatedCostResponse;
-    expect(body.cost.costSource).toBe("harness");
+    expect(body.cost.costSource).toBe("unpriced");
     expect(body.cost.totalCostUsd).toBe(7.77);
   });
-  test("provider=pi records harness USD as-is regardless of DB pricing rows", async () => {
+  test("provider=pi with seeded pricing rows → recomputes (Phase 2)", async () => {
+    // Phase 2 widens recompute beyond codex. Seed pi rows so we get a hit.
+    insertPricingRow({
+      provider: "pi",
+      model: "pi-test",
+      tokenClass: "input",
+      effectiveFrom: 1,
+      pricePerMillionUsd: 0.5,
+    });
+    insertPricingRow({
+      provider: "pi",
+      model: "pi-test",
+      tokenClass: "output",
+      effectiveFrom: 1,
+      pricePerMillionUsd: 3.0,
+    });
     const res = await authedFetch(`/api/session-costs`, {
       method: "POST",
       body: JSON.stringify({
         sessionId: "pi-passthrough-1",
         agentId: testAgent.id,
-        totalCostUsd: 0.42,
-        inputTokens: 10,
-        outputTokens: 5,
-        model: "openrouter/google/gemini-3-flash-preview",
+        totalCostUsd: 0.42, // expected to be overwritten
+        inputTokens: 1_000_000, // 1M input
+        outputTokens: 1_000_000, // 1M output
+        model: "pi-test",
         provider: "pi",
         durationMs: 1_000,
         numTurns: 1,
@@ -230,8 +242,9 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
     });
     expect(res.status).toBe(201);
     const body = (await res.json()) as CreatedCostResponse;
-    expect(body.cost.costSource).toBe("harness");
-    expect(body.cost.totalCostUsd).toBe(0.42);
+    expect(body.cost.costSource).toBe("pricing-table");
+    // 1M @ 0.5 + 1M @ 3.0 = $3.50
+    expect(body.cost.totalCostUsd).toBeCloseTo(3.5, 5);
   });
   test("provider field omitted → no recompute, costSource='harness' (back-compat)", async () => {

package/src/tests/session-costs-model-key-normalize.test.ts ADDED Viewed

@@ -0,0 +1,271 @@
+// Phase 2 fix — adapter-emitted model ids carry harness-specific routing
+// prefixes (`openrouter/`, `github-copilot/`, …) that the pricing seed does
+// not. Before the fix every opencode + pi-via-copilot run fell through to
+// `costSource='unpriced'` even when a seeded rate row existed. This suite
+// regresses the drift cases observed in real-harness E2E.
+import { afterAll, afterEach, beforeAll, describe, expect, test } from "bun:test";
+import { unlink } from "node:fs/promises";
+import {
+  createServer as createHttpServer,
+  type IncomingMessage,
+  type Server,
+  type ServerResponse,
+} from "node:http";
+import { closeDb, createAgent, getDb, initDb, insertPricingRow } from "../be/db";
+import { normalizeModelKey } from "../be/pricing-normalize";
+import { handleCore } from "../http/core";
+import { handleSessionData } from "../http/session-data";
+import { getPathSegments, parseQueryParams } from "../http/utils";
+const TEST_DB_PATH = "./test-model-key-normalize.sqlite";
+const API_KEY = "test-model-key-normalize";
+async function removeDbFiles(path: string): Promise<void> {
+  for (const suffix of ["", "-wal", "-shm"]) {
+    try {
+      await unlink(path + suffix);
+    } catch (error) {
+      if ((error as NodeJS.ErrnoException).code !== "ENOENT") throw error;
+    }
+  }
+}
+async function listen(server: Server): Promise<number> {
+  await new Promise<void>((resolve) => server.listen(0, resolve));
+  const addr = server.address();
+  if (!addr || typeof addr === "string") throw new Error("no port");
+  return addr.port;
+}
+function createTestServer(apiKey: string): Server {
+  return createHttpServer(async (req: IncomingMessage, res: ServerResponse) => {
+    const myAgentId = req.headers["x-agent-id"] as string | undefined;
+    const handled = await handleCore(req, res, myAgentId, apiKey);
+    if (handled) return;
+    const pathSegments = getPathSegments(req.url || "");
+    const queryParams = parseQueryParams(req.url || "");
+    const ok = await handleSessionData(req, res, pathSegments, queryParams, myAgentId);
+    if (!ok) {
+      res.writeHead(404);
+      res.end("Not Found");
+    }
+  });
+}
+let server: Server;
+let port: number;
+let testAgent: { id: string };
+beforeAll(async () => {
+  await removeDbFiles(TEST_DB_PATH);
+  initDb(TEST_DB_PATH);
+  testAgent = createAgent({ name: "model-key-normalize-test", isLead: false, status: "idle" });
+  server = createTestServer(API_KEY);
+  port = await listen(server);
+});
+afterAll(async () => {
+  await new Promise<void>((resolve) => server.close(() => resolve()));
+  closeDb();
+  await removeDbFiles(TEST_DB_PATH);
+});
+afterEach(() => {
+  const db = getDb();
+  db.prepare("DELETE FROM session_costs").run();
+  db.prepare("DELETE FROM pricing WHERE effective_from > 0").run();
+});
+function authedFetch(path: string, init: RequestInit = {}): Promise<Response> {
+  return fetch(`http://localhost:${port}${path}`, {
+    ...init,
+    headers: {
+      Authorization: `Bearer ${API_KEY}`,
+      "Content-Type": "application/json",
+      ...(init.headers ?? {}),
+    },
+  });
+}
+interface CostResponse {
+  success: boolean;
+  cost: {
+    totalCostUsd: number;
+    model: string;
+    costSource: "harness" | "pricing-table" | "unpriced";
+  };
+}
+describe("normalizeModelKey()", () => {
+  test("strips opencode routing prefix `openrouter/`", () => {
+    expect(normalizeModelKey("opencode", "openrouter/anthropic/claude-sonnet-4.5")).toBe(
+      "anthropic/claude-sonnet-4.5",
+    );
+  });
+  test("strips pi routing prefix `github-copilot/`", () => {
+    expect(normalizeModelKey("pi", "github-copilot/gpt-5.4")).toBe("gpt-5.4");
+  });
+  test("strips pi routing prefix `openrouter/`", () => {
+    expect(normalizeModelKey("pi", "openrouter/anthropic/claude-sonnet-4.5")).toBe(
+      "anthropic/claude-sonnet-4.5",
+    );
+  });
+  test("strips pi routing prefix `openrouter/` for deepseek (Phase 3 fix regression)", () => {
+    // The exact case from today's E2E (2026-05-18): pi-mono emits
+    // `openrouter/deepseek/deepseek-v4-flash`, the pricing seed keys the row
+    // under bare `deepseek/deepseek-v4-flash`. Drift collapsed before this
+    // assertion exists; keep it as an explicit regression guard.
+    expect(normalizeModelKey("pi", "openrouter/deepseek/deepseek-v4-flash")).toBe(
+      "deepseek/deepseek-v4-flash",
+    );
+  });
+  test("strips opencode routing prefix `openrouter/` for deepseek (Phase 3 fix regression)", () => {
+    // Same model, different harness — opencode-adapter wraps the underlying
+    // model id under the same `openrouter/` proxy prefix.
+    expect(normalizeModelKey("opencode", "openrouter/deepseek/deepseek-v4-flash")).toBe(
+      "deepseek/deepseek-v4-flash",
+    );
+  });
+  test("is a no-op for canonical claude ids", () => {
+    expect(normalizeModelKey("claude", "claude-opus-4-7")).toBe("claude-opus-4-7");
+  });
+  test("is idempotent", () => {
+    const once = normalizeModelKey("opencode", "openrouter/anthropic/claude-sonnet-4.5");
+    const twice = normalizeModelKey("opencode", once);
+    expect(twice).toBe(once);
+  });
+  test("lowercases mixed-case input", () => {
+    expect(normalizeModelKey("opencode", "OpenRouter/Anthropic/Claude-Sonnet-4.5")).toBe(
+      "anthropic/claude-sonnet-4.5",
+    );
+  });
+});
+describe("Phase 2 fix — POST /api/session-costs normalizes routing prefixes", () => {
+  test("opencode `openrouter/anthropic/claude-sonnet-4.5` resolves the seeded `anthropic/claude-sonnet-4.5` row", async () => {
+    // Seed mirrors what models.dev → seed-pricing.ts produces for the
+    // openrouter section: bare `anthropic/<id>` under the `opencode` provider.
+    insertPricingRow({
+      provider: "opencode",
+      model: "anthropic/claude-sonnet-4.5",
+      tokenClass: "input",
+      effectiveFrom: 1,
+      pricePerMillionUsd: 3,
+    });
+    insertPricingRow({
+      provider: "opencode",
+      model: "anthropic/claude-sonnet-4.5",
+      tokenClass: "output",
+      effectiveFrom: 1,
+      pricePerMillionUsd: 15,
+    });
+    const res = await authedFetch(`/api/session-costs`, {
+      method: "POST",
+      body: JSON.stringify({
+        sessionId: "opencode-normalize-1",
+        agentId: testAgent.id,
+        totalCostUsd: 0.42, // harness-reported, expected to be overwritten
+        inputTokens: 1_000_000,
+        outputTokens: 100_000,
+        // The exact string the opencode adapter emits today.
+        model: "openrouter/anthropic/claude-sonnet-4.5",
+        provider: "opencode",
+        durationMs: 1_000,
+        numTurns: 1,
+      }),
+    });
+    expect(res.status).toBe(201);
+    const body = (await res.json()) as CostResponse;
+    // 1M @ $3 + 100k @ $15 = $3 + $1.50 = $4.50
+    expect(body.cost.costSource).toBe("pricing-table");
+    expect(body.cost.totalCostUsd).toBeCloseTo(4.5, 5);
+    // Original adapter-emitted string is preserved on the row for debugging.
+    expect(body.cost.model).toBe("openrouter/anthropic/claude-sonnet-4.5");
+  });
+  test("pi `github-copilot/gpt-5.4` resolves the seeded bare `gpt-5.4` row", async () => {
+    insertPricingRow({
+      provider: "pi",
+      model: "gpt-5.4",
+      tokenClass: "input",
+      effectiveFrom: 1,
+      pricePerMillionUsd: 2,
+    });
+    insertPricingRow({
+      provider: "pi",
+      model: "gpt-5.4",
+      tokenClass: "output",
+      effectiveFrom: 1,
+      pricePerMillionUsd: 8,
+    });
+    const res = await authedFetch(`/api/session-costs`, {
+      method: "POST",
+      body: JSON.stringify({
+        sessionId: "pi-copilot-normalize-1",
+        agentId: testAgent.id,
+        totalCostUsd: 9.99,
+        inputTokens: 500_000,
+        outputTokens: 250_000,
+        model: "github-copilot/gpt-5.4",
+        provider: "pi",
+        durationMs: 1_000,
+        numTurns: 1,
+      }),
+    });
+    expect(res.status).toBe(201);
+    const body = (await res.json()) as CostResponse;
+    // 500k @ $2 + 250k @ $8 = $1 + $2 = $3
+    expect(body.cost.costSource).toBe("pricing-table");
+    expect(body.cost.totalCostUsd).toBeCloseTo(3.0, 5);
+    expect(body.cost.model).toBe("github-copilot/gpt-5.4");
+  });
+  test("claude `claude-opus-4-7` (no prefix) still resolves — regression guard", async () => {
+    // The bug report flagged claude-adapter as already-working. Make sure
+    // we did not regress its bare-id lookup.
+    insertPricingRow({
+      provider: "claude",
+      model: "claude-opus-4-7",
+      tokenClass: "input",
+      effectiveFrom: 1,
+      pricePerMillionUsd: 15,
+    });
+    insertPricingRow({
+      provider: "claude",
+      model: "claude-opus-4-7",
+      tokenClass: "output",
+      effectiveFrom: 1,
+      pricePerMillionUsd: 75,
+    });
+    const res = await authedFetch(`/api/session-costs`, {
+      method: "POST",
+      body: JSON.stringify({
+        sessionId: "claude-bare-1",
+        agentId: testAgent.id,
+        totalCostUsd: 1.23,
+        inputTokens: 1_000_000,
+        outputTokens: 100_000,
+        model: "claude-opus-4-7",
+        provider: "claude",
+        durationMs: 1_000,
+        numTurns: 1,
+      }),
+    });
+    expect(res.status).toBe(201);
+    const body = (await res.json()) as CostResponse;
+    // 1M @ $15 + 100k @ $75 = $15 + $7.50 = $22.50
+    expect(body.cost.costSource).toBe("pricing-table");
+    expect(body.cost.totalCostUsd).toBeCloseTo(22.5, 5);
+  });
+});