@desplega.ai/agent-swarm 1.79.4 → 1.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/openapi.json +98 -19
  2. package/package.json +12 -6
  3. package/src/be/db.ts +101 -30
  4. package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
  5. package/src/be/pricing-normalize.ts +81 -0
  6. package/src/be/seed-pricing.ts +293 -0
  7. package/src/commands/claude-managed-setup.ts +19 -3
  8. package/src/commands/runner.ts +592 -237
  9. package/src/http/context.ts +6 -2
  10. package/src/http/index.ts +115 -68
  11. package/src/http/session-data.ts +74 -23
  12. package/src/otel-impl.ts +200 -0
  13. package/src/otel.ts +127 -0
  14. package/src/providers/claude-adapter.ts +30 -5
  15. package/src/providers/claude-managed-adapter.ts +43 -17
  16. package/src/providers/claude-managed-pricing.ts +34 -0
  17. package/src/providers/codex-adapter.ts +38 -27
  18. package/src/providers/codex-models.ts +22 -3
  19. package/src/providers/devin-adapter.ts +11 -0
  20. package/src/providers/opencode-adapter.ts +31 -7
  21. package/src/providers/pi-mono-adapter.ts +39 -7
  22. package/src/providers/pricing-sources.md +52 -0
  23. package/src/providers/swarm-events-shared.ts +8 -4
  24. package/src/providers/types.ts +33 -10
  25. package/src/server.ts +6 -0
  26. package/src/tests/claude-managed-adapter.test.ts +17 -3
  27. package/src/tests/claude-managed-setup.test.ts +10 -1
  28. package/src/tests/codex-adapter.test.ts +20 -19
  29. package/src/tests/context-snapshot.test.ts +2 -2
  30. package/src/tests/context-window.test.ts +65 -1
  31. package/src/tests/devin-adapter.test.ts +2 -0
  32. package/src/tests/http/context-routes.test.ts +161 -0
  33. package/src/tests/migration-063-schema-relax.test.ts +109 -0
  34. package/src/tests/opencode-adapter.test.ts +146 -1
  35. package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
  36. package/src/tests/pages-view-count.test.ts +30 -5
  37. package/src/tests/providers/codex-cost.test.ts +18 -0
  38. package/src/tests/providers/opencode-cost.test.ts +74 -0
  39. package/src/tests/providers/pi-cost.test.ts +128 -0
  40. package/src/tests/secret-scrubber.test.ts +19 -0
  41. package/src/tests/session-costs-codex-recompute.test.ts +35 -22
  42. package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
  43. package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
  44. package/src/tests/store-progress-cost.test.ts +6 -1
  45. package/src/tools/store-progress.ts +16 -60
  46. package/src/tools/utils.ts +65 -12
  47. package/src/types.ts +62 -9
  48. package/src/utils/context-window.ts +104 -4
  49. package/src/utils/secret-scrubber.ts +7 -0
@@ -0,0 +1,128 @@
1
+ // Phase 3 fix — regression guard that PiMonoSession stamps `provider: "pi"`
2
+ // on every CostData it emits. Without this tag the API server recompute
3
+ // branch in src/http/session-data.ts falls through to costSource='harness'
4
+ // instead of engaging the pricing-table lookup, so a perfectly-priced model
5
+ // (e.g. `openrouter/deepseek/deepseek-v4-flash`) silently renders as un-priced.
6
+ //
7
+ // Mirrors the narrow, single-purpose shape of src/tests/providers/codex-cost.test.ts.
8
+
9
+ import { describe, expect, test } from "bun:test";
10
+ import { mkdirSync, rmSync } from "node:fs";
11
+ import { tmpdir } from "node:os";
12
+ import { join } from "node:path";
13
+ import { PiMonoSession } from "../../providers/pi-mono-adapter";
14
+ import type { ProviderEvent, ProviderSessionConfig } from "../../providers/types";
15
+
16
+ /**
17
+ * Build a hand-rolled fake `AgentSession` that exercises the pi-mono-adapter
18
+ * cost-emission path without booting the real pi-coding-agent runtime.
19
+ *
20
+ * The adapter calls (in order, inside `runSession()`):
21
+ * 1. `prompt(text, opts)` — resolves immediately for the fake
22
+ * 2. `waitForIdle()` reads — `isStreaming` (we pin to `false`)
23
+ * 3. `getSessionStats()` — returns the canned token/cost shape
24
+ *
25
+ * `subscribe(cb)` is called twice (once in the constructor for the normal
26
+ * event handler, once optionally in `waitForIdle`). Returning a noop
27
+ * unsubscriber is enough.
28
+ */
29
+ function makeFakeAgentSession(opts: {
30
+ sessionId: string;
31
+ modelProvider: string;
32
+ modelId: string;
33
+ }): {
34
+ fake: import("@earendil-works/pi-coding-agent").AgentSession;
35
+ callPromptResolve: () => void;
36
+ } {
37
+ let promptResolve: () => void = () => {};
38
+ const promptDone = new Promise<void>((r) => {
39
+ promptResolve = r;
40
+ });
41
+ const fake = {
42
+ sessionId: opts.sessionId,
43
+ model: { provider: opts.modelProvider, id: opts.modelId },
44
+ isStreaming: false,
45
+ subscribe: (_cb: unknown) => () => {},
46
+ prompt: async () => {
47
+ // Block until the test wants the adapter to proceed past `prompt()`.
48
+ // Pi adapter awaits this before reading session stats, so we resolve
49
+ // synchronously to keep the test deterministic.
50
+ await promptDone;
51
+ },
52
+ getSessionStats: () => ({
53
+ tokens: { input: 64463, output: 313, cacheRead: 31616, cacheWrite: 0, total: 96392 },
54
+ // Pi-mono uses `stats.cost` directly. We pin a non-zero value so we can
55
+ // still assert it round-trips, but the load-bearing field for this
56
+ // suite is `provider` regardless of dollars.
57
+ cost: 0.008,
58
+ userMessages: 1,
59
+ assistantMessages: 1,
60
+ }),
61
+ getContextUsage: () => undefined,
62
+ dispose: () => {},
63
+ };
64
+ // Resolve the prompt gate immediately — the adapter awaits prompt() before
65
+ // waitForIdle() reads `isStreaming`, but our fake's `isStreaming` is `false`
66
+ // so waitForIdle resolves right away.
67
+ promptResolve();
68
+ return {
69
+ // The pi-coding-agent AgentSession surface area is wide; we cast through
70
+ // `unknown` because the test only needs the four methods listed above.
71
+ fake: fake as unknown as import("@earendil-works/pi-coding-agent").AgentSession,
72
+ callPromptResolve: promptResolve,
73
+ };
74
+ }
75
+
76
+ function makeConfig(logFile: string): ProviderSessionConfig {
77
+ return {
78
+ prompt: "do a thing",
79
+ systemPrompt: "be helpful",
80
+ // The exact harness-emitted model id from today's E2E run. This is the
81
+ // case `normalizeModelKey('pi', ...)` must collapse onto a seeded
82
+ // `deepseek/deepseek-v4-flash` row.
83
+ model: "openrouter/deepseek/deepseek-v4-flash",
84
+ role: "worker",
85
+ agentId: "agent-1",
86
+ taskId: "task-1",
87
+ apiUrl: "http://localhost:0",
88
+ apiKey: "test-key",
89
+ cwd: "/tmp",
90
+ logFile,
91
+ };
92
+ }
93
+
94
+ describe("PiMonoSession — provider tag on CostData", () => {
95
+ test("waitForCompletion → result.cost.provider === 'pi'", async () => {
96
+ const dir = join(tmpdir(), `pi-cost-test-${Date.now()}`);
97
+ mkdirSync(dir, { recursive: true });
98
+ const logFile = join(dir, "session.log");
99
+ try {
100
+ const { fake } = makeFakeAgentSession({
101
+ sessionId: "sess-pi-test",
102
+ modelProvider: "openrouter",
103
+ modelId: "deepseek/deepseek-v4-flash",
104
+ });
105
+
106
+ const events: ProviderEvent[] = [];
107
+ const session = new PiMonoSession(fake, makeConfig(logFile), false);
108
+ session.onEvent((e) => events.push(e));
109
+
110
+ const result = await session.waitForCompletion();
111
+
112
+ // The load-bearing assertion. Phase 2's API recompute path keys off
113
+ // exactly this field; emitting CostData without it silently disables
114
+ // pricing-table tagging for the entire pi provider.
115
+ expect(result.cost?.provider).toBe("pi");
116
+ const resultEvent = events.find((e) => e.type === "result");
117
+ expect(resultEvent).toBeDefined();
118
+ if (resultEvent?.type === "result") {
119
+ expect(resultEvent.cost.provider).toBe("pi");
120
+ // Sanity — the reportedModel() helper composes `provider/id` so the
121
+ // server-side normalizer's prefix-strip has something to bite on.
122
+ expect(resultEvent.cost.model).toBe("openrouter/deepseek/deepseek-v4-flash");
123
+ }
124
+ } finally {
125
+ rmSync(dir, { recursive: true, force: true });
126
+ }
127
+ });
128
+ });
@@ -113,6 +113,17 @@ describe("scrubSecrets — env-based replacement", () => {
113
113
  expect(out).not.toContain("sk-proj-abcd1234567890");
114
114
  });
115
115
 
116
+ test("redacts OTLP exporter headers from env", () => {
117
+ process.env.OTEL_EXPORTER_OTLP_HEADERS = "signoz-ingestion-key=localSignozKey_1234567890abcdef";
118
+ refreshSecretScrubberCache();
119
+
120
+ const out = scrubSecrets(
121
+ "OTEL_EXPORTER_OTLP_HEADERS=signoz-ingestion-key=localSignozKey_1234567890abcdef",
122
+ );
123
+
124
+ expect(out).toBe("OTEL_EXPORTER_OTLP_HEADERS=[REDACTED:OTEL_EXPORTER_OTLP_HEADERS]");
125
+ });
126
+
116
127
  test("cache rebuilds after refresh when new secret is added", () => {
117
128
  const out1 = scrubSecrets("no secret yet here_abcdefghij");
118
129
  expect(out1).toBe("no secret yet here_abcdefghij");
@@ -202,6 +213,14 @@ describe("scrubSecrets — regex patterns", () => {
202
213
  const out = scrubSecrets("token=ghp_1234567890abcdefABCDEF1234567890ABCD");
203
214
  expect(out).toContain("[REDACTED:github_token]");
204
215
  });
216
+
217
+ test("redacts SigNoz ingestion-key headers even when env is empty", () => {
218
+ const out = scrubSecrets(
219
+ "OTEL_EXPORTER_OTLP_HEADERS=signoz-ingestion-key=localSignozKey_1234567890abcdef",
220
+ );
221
+
222
+ expect(out).toBe("OTEL_EXPORTER_OTLP_HEADERS=[REDACTED:signoz_ingestion_key]");
223
+ });
205
224
  });
206
225
 
207
226
  describe("scrubSecrets — does not over-scrub", () => {
@@ -98,7 +98,7 @@ interface CreatedCostResponse {
98
98
  cost: {
99
99
  id: string;
100
100
  totalCostUsd: number;
101
- costSource: "harness" | "pricing-table";
101
+ costSource: "harness" | "pricing-table" | "unpriced";
102
102
  model: string;
103
103
  };
104
104
  }
@@ -153,15 +153,9 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
153
153
  expect(body.cost.totalCostUsd).toBeCloseTo(6.64, 5);
154
154
  });
155
155
 
156
- test("provider=codex but a token class is missing → falls back to worker value, costSource='harness'", async () => {
157
- // Only seed input + cached_input. Missing output forces fallback.
158
- insertPricingRow({
159
- provider: "codex",
160
- model: "codex-test-synth",
161
- tokenClass: "input",
162
- effectiveFrom: 1,
163
- pricePerMillionUsd: 2.0,
164
- });
156
+ test("provider=codex but input/output rows missing → 'unpriced', worker value preserved", async () => {
157
+ // Only seed cached_input. Missing input + output blocks recompute and
158
+ // Phase 2 tags the row 'unpriced' (no rates means we can't trust harness USD either).
165
159
  insertPricingRow({
166
160
  provider: "codex",
167
161
  model: "codex-test-synth",
@@ -186,13 +180,16 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
186
180
  });
187
181
  expect(res.status).toBe(201);
188
182
  const body = (await res.json()) as CreatedCostResponse;
189
- expect(body.cost.costSource).toBe("harness");
190
- // Worker value preserved verbatim.
183
+ // Phase 2: provider tagged but no input/output rows ⇒ 'unpriced'.
184
+ expect(body.cost.costSource).toBe("unpriced");
185
+ // Worker value preserved verbatim — we don't fabricate one.
191
186
  expect(body.cost.totalCostUsd).toBe(1.23);
192
187
  });
193
188
 
194
- test("provider=claude records harness USD as-is regardless of DB pricing rows", async () => {
195
- // Even if there are codex pricing rows, claude must NOT be touched.
189
+ test("provider=claude with no pricing rows for the model 'unpriced' (Phase 2)", async () => {
190
+ // Phase 2 extended the recompute path from codex-only to every provider.
191
+ // With no pricing rows seeded for ('claude', 'sonnet-4'), the row is
192
+ // tagged 'unpriced' rather than 'harness' — the UI surfaces it as a yellow badge.
196
193
  const res = await authedFetch(`/api/session-costs`, {
197
194
  method: "POST",
198
195
  body: JSON.stringify({
@@ -209,20 +206,35 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
209
206
  });
210
207
  expect(res.status).toBe(201);
211
208
  const body = (await res.json()) as CreatedCostResponse;
212
- expect(body.cost.costSource).toBe("harness");
209
+ expect(body.cost.costSource).toBe("unpriced");
213
210
  expect(body.cost.totalCostUsd).toBe(7.77);
214
211
  });
215
212
 
216
- test("provider=pi records harness USD as-is regardless of DB pricing rows", async () => {
213
+ test("provider=pi with seeded pricing rows recomputes (Phase 2)", async () => {
214
+ // Phase 2 widens recompute beyond codex. Seed pi rows so we get a hit.
215
+ insertPricingRow({
216
+ provider: "pi",
217
+ model: "pi-test",
218
+ tokenClass: "input",
219
+ effectiveFrom: 1,
220
+ pricePerMillionUsd: 0.5,
221
+ });
222
+ insertPricingRow({
223
+ provider: "pi",
224
+ model: "pi-test",
225
+ tokenClass: "output",
226
+ effectiveFrom: 1,
227
+ pricePerMillionUsd: 3.0,
228
+ });
217
229
  const res = await authedFetch(`/api/session-costs`, {
218
230
  method: "POST",
219
231
  body: JSON.stringify({
220
232
  sessionId: "pi-passthrough-1",
221
233
  agentId: testAgent.id,
222
- totalCostUsd: 0.42,
223
- inputTokens: 10,
224
- outputTokens: 5,
225
- model: "openrouter/google/gemini-3-flash-preview",
234
+ totalCostUsd: 0.42, // expected to be overwritten
235
+ inputTokens: 1_000_000, // 1M input
236
+ outputTokens: 1_000_000, // 1M output
237
+ model: "pi-test",
226
238
  provider: "pi",
227
239
  durationMs: 1_000,
228
240
  numTurns: 1,
@@ -230,8 +242,9 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
230
242
  });
231
243
  expect(res.status).toBe(201);
232
244
  const body = (await res.json()) as CreatedCostResponse;
233
- expect(body.cost.costSource).toBe("harness");
234
- expect(body.cost.totalCostUsd).toBe(0.42);
245
+ expect(body.cost.costSource).toBe("pricing-table");
246
+ // 1M @ 0.5 + 1M @ 3.0 = $3.50
247
+ expect(body.cost.totalCostUsd).toBeCloseTo(3.5, 5);
235
248
  });
236
249
 
237
250
  test("provider field omitted → no recompute, costSource='harness' (back-compat)", async () => {
@@ -0,0 +1,271 @@
1
+ // Phase 2 fix — adapter-emitted model ids carry harness-specific routing
2
+ // prefixes (`openrouter/`, `github-copilot/`, …) that the pricing seed does
3
+ // not. Before the fix every opencode + pi-via-copilot run fell through to
4
+ // `costSource='unpriced'` even when a seeded rate row existed. This suite
5
+ // regresses the drift cases observed in real-harness E2E.
6
+
7
+ import { afterAll, afterEach, beforeAll, describe, expect, test } from "bun:test";
8
+ import { unlink } from "node:fs/promises";
9
+ import {
10
+ createServer as createHttpServer,
11
+ type IncomingMessage,
12
+ type Server,
13
+ type ServerResponse,
14
+ } from "node:http";
15
+ import { closeDb, createAgent, getDb, initDb, insertPricingRow } from "../be/db";
16
+ import { normalizeModelKey } from "../be/pricing-normalize";
17
+ import { handleCore } from "../http/core";
18
+ import { handleSessionData } from "../http/session-data";
19
+ import { getPathSegments, parseQueryParams } from "../http/utils";
20
+
21
+ const TEST_DB_PATH = "./test-model-key-normalize.sqlite";
22
+ const API_KEY = "test-model-key-normalize";
23
+
24
+ async function removeDbFiles(path: string): Promise<void> {
25
+ for (const suffix of ["", "-wal", "-shm"]) {
26
+ try {
27
+ await unlink(path + suffix);
28
+ } catch (error) {
29
+ if ((error as NodeJS.ErrnoException).code !== "ENOENT") throw error;
30
+ }
31
+ }
32
+ }
33
+
34
+ async function listen(server: Server): Promise<number> {
35
+ await new Promise<void>((resolve) => server.listen(0, resolve));
36
+ const addr = server.address();
37
+ if (!addr || typeof addr === "string") throw new Error("no port");
38
+ return addr.port;
39
+ }
40
+
41
+ function createTestServer(apiKey: string): Server {
42
+ return createHttpServer(async (req: IncomingMessage, res: ServerResponse) => {
43
+ const myAgentId = req.headers["x-agent-id"] as string | undefined;
44
+ const handled = await handleCore(req, res, myAgentId, apiKey);
45
+ if (handled) return;
46
+ const pathSegments = getPathSegments(req.url || "");
47
+ const queryParams = parseQueryParams(req.url || "");
48
+ const ok = await handleSessionData(req, res, pathSegments, queryParams, myAgentId);
49
+ if (!ok) {
50
+ res.writeHead(404);
51
+ res.end("Not Found");
52
+ }
53
+ });
54
+ }
55
+
56
+ let server: Server;
57
+ let port: number;
58
+ let testAgent: { id: string };
59
+
60
+ beforeAll(async () => {
61
+ await removeDbFiles(TEST_DB_PATH);
62
+ initDb(TEST_DB_PATH);
63
+ testAgent = createAgent({ name: "model-key-normalize-test", isLead: false, status: "idle" });
64
+ server = createTestServer(API_KEY);
65
+ port = await listen(server);
66
+ });
67
+
68
+ afterAll(async () => {
69
+ await new Promise<void>((resolve) => server.close(() => resolve()));
70
+ closeDb();
71
+ await removeDbFiles(TEST_DB_PATH);
72
+ });
73
+
74
+ afterEach(() => {
75
+ const db = getDb();
76
+ db.prepare("DELETE FROM session_costs").run();
77
+ db.prepare("DELETE FROM pricing WHERE effective_from > 0").run();
78
+ });
79
+
80
+ function authedFetch(path: string, init: RequestInit = {}): Promise<Response> {
81
+ return fetch(`http://localhost:${port}${path}`, {
82
+ ...init,
83
+ headers: {
84
+ Authorization: `Bearer ${API_KEY}`,
85
+ "Content-Type": "application/json",
86
+ ...(init.headers ?? {}),
87
+ },
88
+ });
89
+ }
90
+
91
+ interface CostResponse {
92
+ success: boolean;
93
+ cost: {
94
+ totalCostUsd: number;
95
+ model: string;
96
+ costSource: "harness" | "pricing-table" | "unpriced";
97
+ };
98
+ }
99
+
100
+ describe("normalizeModelKey()", () => {
101
+ test("strips opencode routing prefix `openrouter/`", () => {
102
+ expect(normalizeModelKey("opencode", "openrouter/anthropic/claude-sonnet-4.5")).toBe(
103
+ "anthropic/claude-sonnet-4.5",
104
+ );
105
+ });
106
+
107
+ test("strips pi routing prefix `github-copilot/`", () => {
108
+ expect(normalizeModelKey("pi", "github-copilot/gpt-5.4")).toBe("gpt-5.4");
109
+ });
110
+
111
+ test("strips pi routing prefix `openrouter/`", () => {
112
+ expect(normalizeModelKey("pi", "openrouter/anthropic/claude-sonnet-4.5")).toBe(
113
+ "anthropic/claude-sonnet-4.5",
114
+ );
115
+ });
116
+
117
+ test("strips pi routing prefix `openrouter/` for deepseek (Phase 3 fix regression)", () => {
118
+ // The exact case from today's E2E (2026-05-18): pi-mono emits
119
+ // `openrouter/deepseek/deepseek-v4-flash`, the pricing seed keys the row
120
+ // under bare `deepseek/deepseek-v4-flash`. Drift collapsed before this
121
+ // assertion exists; keep it as an explicit regression guard.
122
+ expect(normalizeModelKey("pi", "openrouter/deepseek/deepseek-v4-flash")).toBe(
123
+ "deepseek/deepseek-v4-flash",
124
+ );
125
+ });
126
+
127
+ test("strips opencode routing prefix `openrouter/` for deepseek (Phase 3 fix regression)", () => {
128
+ // Same model, different harness — opencode-adapter wraps the underlying
129
+ // model id under the same `openrouter/` proxy prefix.
130
+ expect(normalizeModelKey("opencode", "openrouter/deepseek/deepseek-v4-flash")).toBe(
131
+ "deepseek/deepseek-v4-flash",
132
+ );
133
+ });
134
+
135
+ test("is a no-op for canonical claude ids", () => {
136
+ expect(normalizeModelKey("claude", "claude-opus-4-7")).toBe("claude-opus-4-7");
137
+ });
138
+
139
+ test("is idempotent", () => {
140
+ const once = normalizeModelKey("opencode", "openrouter/anthropic/claude-sonnet-4.5");
141
+ const twice = normalizeModelKey("opencode", once);
142
+ expect(twice).toBe(once);
143
+ });
144
+
145
+ test("lowercases mixed-case input", () => {
146
+ expect(normalizeModelKey("opencode", "OpenRouter/Anthropic/Claude-Sonnet-4.5")).toBe(
147
+ "anthropic/claude-sonnet-4.5",
148
+ );
149
+ });
150
+ });
151
+
152
+ describe("Phase 2 fix — POST /api/session-costs normalizes routing prefixes", () => {
153
+ test("opencode `openrouter/anthropic/claude-sonnet-4.5` resolves the seeded `anthropic/claude-sonnet-4.5` row", async () => {
154
+ // Seed mirrors what models.dev → seed-pricing.ts produces for the
155
+ // openrouter section: bare `anthropic/<id>` under the `opencode` provider.
156
+ insertPricingRow({
157
+ provider: "opencode",
158
+ model: "anthropic/claude-sonnet-4.5",
159
+ tokenClass: "input",
160
+ effectiveFrom: 1,
161
+ pricePerMillionUsd: 3,
162
+ });
163
+ insertPricingRow({
164
+ provider: "opencode",
165
+ model: "anthropic/claude-sonnet-4.5",
166
+ tokenClass: "output",
167
+ effectiveFrom: 1,
168
+ pricePerMillionUsd: 15,
169
+ });
170
+
171
+ const res = await authedFetch(`/api/session-costs`, {
172
+ method: "POST",
173
+ body: JSON.stringify({
174
+ sessionId: "opencode-normalize-1",
175
+ agentId: testAgent.id,
176
+ totalCostUsd: 0.42, // harness-reported, expected to be overwritten
177
+ inputTokens: 1_000_000,
178
+ outputTokens: 100_000,
179
+ // The exact string the opencode adapter emits today.
180
+ model: "openrouter/anthropic/claude-sonnet-4.5",
181
+ provider: "opencode",
182
+ durationMs: 1_000,
183
+ numTurns: 1,
184
+ }),
185
+ });
186
+ expect(res.status).toBe(201);
187
+ const body = (await res.json()) as CostResponse;
188
+ // 1M @ $3 + 100k @ $15 = $3 + $1.50 = $4.50
189
+ expect(body.cost.costSource).toBe("pricing-table");
190
+ expect(body.cost.totalCostUsd).toBeCloseTo(4.5, 5);
191
+ // Original adapter-emitted string is preserved on the row for debugging.
192
+ expect(body.cost.model).toBe("openrouter/anthropic/claude-sonnet-4.5");
193
+ });
194
+
195
+ test("pi `github-copilot/gpt-5.4` resolves the seeded bare `gpt-5.4` row", async () => {
196
+ insertPricingRow({
197
+ provider: "pi",
198
+ model: "gpt-5.4",
199
+ tokenClass: "input",
200
+ effectiveFrom: 1,
201
+ pricePerMillionUsd: 2,
202
+ });
203
+ insertPricingRow({
204
+ provider: "pi",
205
+ model: "gpt-5.4",
206
+ tokenClass: "output",
207
+ effectiveFrom: 1,
208
+ pricePerMillionUsd: 8,
209
+ });
210
+
211
+ const res = await authedFetch(`/api/session-costs`, {
212
+ method: "POST",
213
+ body: JSON.stringify({
214
+ sessionId: "pi-copilot-normalize-1",
215
+ agentId: testAgent.id,
216
+ totalCostUsd: 9.99,
217
+ inputTokens: 500_000,
218
+ outputTokens: 250_000,
219
+ model: "github-copilot/gpt-5.4",
220
+ provider: "pi",
221
+ durationMs: 1_000,
222
+ numTurns: 1,
223
+ }),
224
+ });
225
+ expect(res.status).toBe(201);
226
+ const body = (await res.json()) as CostResponse;
227
+ // 500k @ $2 + 250k @ $8 = $1 + $2 = $3
228
+ expect(body.cost.costSource).toBe("pricing-table");
229
+ expect(body.cost.totalCostUsd).toBeCloseTo(3.0, 5);
230
+ expect(body.cost.model).toBe("github-copilot/gpt-5.4");
231
+ });
232
+
233
+ test("claude `claude-opus-4-7` (no prefix) still resolves — regression guard", async () => {
234
+ // The bug report flagged claude-adapter as already-working. Make sure
235
+ // we did not regress its bare-id lookup.
236
+ insertPricingRow({
237
+ provider: "claude",
238
+ model: "claude-opus-4-7",
239
+ tokenClass: "input",
240
+ effectiveFrom: 1,
241
+ pricePerMillionUsd: 15,
242
+ });
243
+ insertPricingRow({
244
+ provider: "claude",
245
+ model: "claude-opus-4-7",
246
+ tokenClass: "output",
247
+ effectiveFrom: 1,
248
+ pricePerMillionUsd: 75,
249
+ });
250
+
251
+ const res = await authedFetch(`/api/session-costs`, {
252
+ method: "POST",
253
+ body: JSON.stringify({
254
+ sessionId: "claude-bare-1",
255
+ agentId: testAgent.id,
256
+ totalCostUsd: 1.23,
257
+ inputTokens: 1_000_000,
258
+ outputTokens: 100_000,
259
+ model: "claude-opus-4-7",
260
+ provider: "claude",
261
+ durationMs: 1_000,
262
+ numTurns: 1,
263
+ }),
264
+ });
265
+ expect(res.status).toBe(201);
266
+ const body = (await res.json()) as CostResponse;
267
+ // 1M @ $15 + 100k @ $75 = $15 + $7.50 = $22.50
268
+ expect(body.cost.costSource).toBe("pricing-table");
269
+ expect(body.cost.totalCostUsd).toBeCloseTo(22.5, 5);
270
+ });
271
+ });