@desplega.ai/agent-swarm 1.79.4 → 1.80.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +98 -19
- package/package.json +12 -6
- package/src/be/db.ts +101 -30
- package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
- package/src/be/pricing-normalize.ts +81 -0
- package/src/be/seed-pricing.ts +293 -0
- package/src/commands/claude-managed-setup.ts +19 -3
- package/src/commands/runner.ts +592 -237
- package/src/http/context.ts +6 -2
- package/src/http/index.ts +115 -68
- package/src/http/session-data.ts +74 -23
- package/src/otel-impl.ts +200 -0
- package/src/otel.ts +127 -0
- package/src/providers/claude-adapter.ts +30 -5
- package/src/providers/claude-managed-adapter.ts +43 -17
- package/src/providers/claude-managed-pricing.ts +34 -0
- package/src/providers/codex-adapter.ts +38 -27
- package/src/providers/codex-models.ts +22 -3
- package/src/providers/devin-adapter.ts +11 -0
- package/src/providers/opencode-adapter.ts +31 -7
- package/src/providers/pi-mono-adapter.ts +39 -7
- package/src/providers/pricing-sources.md +52 -0
- package/src/providers/swarm-events-shared.ts +8 -4
- package/src/providers/types.ts +33 -10
- package/src/server.ts +6 -0
- package/src/tests/claude-managed-adapter.test.ts +17 -3
- package/src/tests/claude-managed-setup.test.ts +10 -1
- package/src/tests/codex-adapter.test.ts +20 -19
- package/src/tests/context-snapshot.test.ts +2 -2
- package/src/tests/context-window.test.ts +65 -1
- package/src/tests/devin-adapter.test.ts +2 -0
- package/src/tests/http/context-routes.test.ts +161 -0
- package/src/tests/migration-063-schema-relax.test.ts +109 -0
- package/src/tests/opencode-adapter.test.ts +146 -1
- package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
- package/src/tests/pages-view-count.test.ts +30 -5
- package/src/tests/providers/codex-cost.test.ts +18 -0
- package/src/tests/providers/opencode-cost.test.ts +74 -0
- package/src/tests/providers/pi-cost.test.ts +128 -0
- package/src/tests/secret-scrubber.test.ts +19 -0
- package/src/tests/session-costs-codex-recompute.test.ts +35 -22
- package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
- package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
- package/src/tests/store-progress-cost.test.ts +6 -1
- package/src/tools/store-progress.ts +16 -60
- package/src/tools/utils.ts +65 -12
- package/src/types.ts +62 -9
- package/src/utils/context-window.ts +104 -4
- package/src/utils/secret-scrubber.ts +7 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
// Phase 3 fix — regression guard that PiMonoSession stamps `provider: "pi"`
|
|
2
|
+
// on every CostData it emits. Without this tag the API server recompute
|
|
3
|
+
// branch in src/http/session-data.ts falls through to costSource='harness'
|
|
4
|
+
// instead of engaging the pricing-table lookup, so a perfectly-priced model
|
|
5
|
+
// (e.g. `openrouter/deepseek/deepseek-v4-flash`) silently renders as un-priced.
|
|
6
|
+
//
|
|
7
|
+
// Mirrors the narrow, single-purpose shape of src/tests/providers/codex-cost.test.ts.
|
|
8
|
+
|
|
9
|
+
import { describe, expect, test } from "bun:test";
|
|
10
|
+
import { mkdirSync, rmSync } from "node:fs";
|
|
11
|
+
import { tmpdir } from "node:os";
|
|
12
|
+
import { join } from "node:path";
|
|
13
|
+
import { PiMonoSession } from "../../providers/pi-mono-adapter";
|
|
14
|
+
import type { ProviderEvent, ProviderSessionConfig } from "../../providers/types";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Build a hand-rolled fake `AgentSession` that exercises the pi-mono-adapter
|
|
18
|
+
* cost-emission path without booting the real pi-coding-agent runtime.
|
|
19
|
+
*
|
|
20
|
+
* The adapter calls (in order, inside `runSession()`):
|
|
21
|
+
* 1. `prompt(text, opts)` — resolves immediately for the fake
|
|
22
|
+
* 2. `waitForIdle()` reads — `isStreaming` (we pin to `false`)
|
|
23
|
+
* 3. `getSessionStats()` — returns the canned token/cost shape
|
|
24
|
+
*
|
|
25
|
+
* `subscribe(cb)` is called twice (once in the constructor for the normal
|
|
26
|
+
* event handler, once optionally in `waitForIdle`). Returning a noop
|
|
27
|
+
* unsubscriber is enough.
|
|
28
|
+
*/
|
|
29
|
+
function makeFakeAgentSession(opts: {
|
|
30
|
+
sessionId: string;
|
|
31
|
+
modelProvider: string;
|
|
32
|
+
modelId: string;
|
|
33
|
+
}): {
|
|
34
|
+
fake: import("@earendil-works/pi-coding-agent").AgentSession;
|
|
35
|
+
callPromptResolve: () => void;
|
|
36
|
+
} {
|
|
37
|
+
let promptResolve: () => void = () => {};
|
|
38
|
+
const promptDone = new Promise<void>((r) => {
|
|
39
|
+
promptResolve = r;
|
|
40
|
+
});
|
|
41
|
+
const fake = {
|
|
42
|
+
sessionId: opts.sessionId,
|
|
43
|
+
model: { provider: opts.modelProvider, id: opts.modelId },
|
|
44
|
+
isStreaming: false,
|
|
45
|
+
subscribe: (_cb: unknown) => () => {},
|
|
46
|
+
prompt: async () => {
|
|
47
|
+
// Block until the test wants the adapter to proceed past `prompt()`.
|
|
48
|
+
// Pi adapter awaits this before reading session stats, so we resolve
|
|
49
|
+
// synchronously to keep the test deterministic.
|
|
50
|
+
await promptDone;
|
|
51
|
+
},
|
|
52
|
+
getSessionStats: () => ({
|
|
53
|
+
tokens: { input: 64463, output: 313, cacheRead: 31616, cacheWrite: 0, total: 96392 },
|
|
54
|
+
// Pi-mono uses `stats.cost` directly. We pin a non-zero value so we can
|
|
55
|
+
// still assert it round-trips, but the load-bearing field for this
|
|
56
|
+
// suite is `provider` regardless of dollars.
|
|
57
|
+
cost: 0.008,
|
|
58
|
+
userMessages: 1,
|
|
59
|
+
assistantMessages: 1,
|
|
60
|
+
}),
|
|
61
|
+
getContextUsage: () => undefined,
|
|
62
|
+
dispose: () => {},
|
|
63
|
+
};
|
|
64
|
+
// Resolve the prompt gate immediately — the adapter awaits prompt() before
|
|
65
|
+
// waitForIdle() reads `isStreaming`, but our fake's `isStreaming` is `false`
|
|
66
|
+
// so waitForIdle resolves right away.
|
|
67
|
+
promptResolve();
|
|
68
|
+
return {
|
|
69
|
+
// The pi-coding-agent AgentSession surface area is wide; we cast through
|
|
70
|
+
// `unknown` because the test only needs the four methods listed above.
|
|
71
|
+
fake: fake as unknown as import("@earendil-works/pi-coding-agent").AgentSession,
|
|
72
|
+
callPromptResolve: promptResolve,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function makeConfig(logFile: string): ProviderSessionConfig {
|
|
77
|
+
return {
|
|
78
|
+
prompt: "do a thing",
|
|
79
|
+
systemPrompt: "be helpful",
|
|
80
|
+
// The exact harness-emitted model id from today's E2E run. This is the
|
|
81
|
+
// case `normalizeModelKey('pi', ...)` must collapse onto a seeded
|
|
82
|
+
// `deepseek/deepseek-v4-flash` row.
|
|
83
|
+
model: "openrouter/deepseek/deepseek-v4-flash",
|
|
84
|
+
role: "worker",
|
|
85
|
+
agentId: "agent-1",
|
|
86
|
+
taskId: "task-1",
|
|
87
|
+
apiUrl: "http://localhost:0",
|
|
88
|
+
apiKey: "test-key",
|
|
89
|
+
cwd: "/tmp",
|
|
90
|
+
logFile,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
describe("PiMonoSession — provider tag on CostData", () => {
|
|
95
|
+
test("waitForCompletion → result.cost.provider === 'pi'", async () => {
|
|
96
|
+
const dir = join(tmpdir(), `pi-cost-test-${Date.now()}`);
|
|
97
|
+
mkdirSync(dir, { recursive: true });
|
|
98
|
+
const logFile = join(dir, "session.log");
|
|
99
|
+
try {
|
|
100
|
+
const { fake } = makeFakeAgentSession({
|
|
101
|
+
sessionId: "sess-pi-test",
|
|
102
|
+
modelProvider: "openrouter",
|
|
103
|
+
modelId: "deepseek/deepseek-v4-flash",
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
const events: ProviderEvent[] = [];
|
|
107
|
+
const session = new PiMonoSession(fake, makeConfig(logFile), false);
|
|
108
|
+
session.onEvent((e) => events.push(e));
|
|
109
|
+
|
|
110
|
+
const result = await session.waitForCompletion();
|
|
111
|
+
|
|
112
|
+
// The load-bearing assertion. Phase 2's API recompute path keys off
|
|
113
|
+
// exactly this field; emitting CostData without it silently disables
|
|
114
|
+
// pricing-table tagging for the entire pi provider.
|
|
115
|
+
expect(result.cost?.provider).toBe("pi");
|
|
116
|
+
const resultEvent = events.find((e) => e.type === "result");
|
|
117
|
+
expect(resultEvent).toBeDefined();
|
|
118
|
+
if (resultEvent?.type === "result") {
|
|
119
|
+
expect(resultEvent.cost.provider).toBe("pi");
|
|
120
|
+
// Sanity — the reportedModel() helper composes `provider/id` so the
|
|
121
|
+
// server-side normalizer's prefix-strip has something to bite on.
|
|
122
|
+
expect(resultEvent.cost.model).toBe("openrouter/deepseek/deepseek-v4-flash");
|
|
123
|
+
}
|
|
124
|
+
} finally {
|
|
125
|
+
rmSync(dir, { recursive: true, force: true });
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
});
|
|
@@ -113,6 +113,17 @@ describe("scrubSecrets — env-based replacement", () => {
|
|
|
113
113
|
expect(out).not.toContain("sk-proj-abcd1234567890");
|
|
114
114
|
});
|
|
115
115
|
|
|
116
|
+
test("redacts OTLP exporter headers from env", () => {
|
|
117
|
+
process.env.OTEL_EXPORTER_OTLP_HEADERS = "signoz-ingestion-key=localSignozKey_1234567890abcdef";
|
|
118
|
+
refreshSecretScrubberCache();
|
|
119
|
+
|
|
120
|
+
const out = scrubSecrets(
|
|
121
|
+
"OTEL_EXPORTER_OTLP_HEADERS=signoz-ingestion-key=localSignozKey_1234567890abcdef",
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
expect(out).toBe("OTEL_EXPORTER_OTLP_HEADERS=[REDACTED:OTEL_EXPORTER_OTLP_HEADERS]");
|
|
125
|
+
});
|
|
126
|
+
|
|
116
127
|
test("cache rebuilds after refresh when new secret is added", () => {
|
|
117
128
|
const out1 = scrubSecrets("no secret yet here_abcdefghij");
|
|
118
129
|
expect(out1).toBe("no secret yet here_abcdefghij");
|
|
@@ -202,6 +213,14 @@ describe("scrubSecrets — regex patterns", () => {
|
|
|
202
213
|
const out = scrubSecrets("token=ghp_1234567890abcdefABCDEF1234567890ABCD");
|
|
203
214
|
expect(out).toContain("[REDACTED:github_token]");
|
|
204
215
|
});
|
|
216
|
+
|
|
217
|
+
test("redacts SigNoz ingestion-key headers even when env is empty", () => {
|
|
218
|
+
const out = scrubSecrets(
|
|
219
|
+
"OTEL_EXPORTER_OTLP_HEADERS=signoz-ingestion-key=localSignozKey_1234567890abcdef",
|
|
220
|
+
);
|
|
221
|
+
|
|
222
|
+
expect(out).toBe("OTEL_EXPORTER_OTLP_HEADERS=[REDACTED:signoz_ingestion_key]");
|
|
223
|
+
});
|
|
205
224
|
});
|
|
206
225
|
|
|
207
226
|
describe("scrubSecrets — does not over-scrub", () => {
|
|
@@ -98,7 +98,7 @@ interface CreatedCostResponse {
|
|
|
98
98
|
cost: {
|
|
99
99
|
id: string;
|
|
100
100
|
totalCostUsd: number;
|
|
101
|
-
costSource: "harness" | "pricing-table";
|
|
101
|
+
costSource: "harness" | "pricing-table" | "unpriced";
|
|
102
102
|
model: string;
|
|
103
103
|
};
|
|
104
104
|
}
|
|
@@ -153,15 +153,9 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
|
|
|
153
153
|
expect(body.cost.totalCostUsd).toBeCloseTo(6.64, 5);
|
|
154
154
|
});
|
|
155
155
|
|
|
156
|
-
test("provider=codex but
|
|
157
|
-
// Only seed
|
|
158
|
-
|
|
159
|
-
provider: "codex",
|
|
160
|
-
model: "codex-test-synth",
|
|
161
|
-
tokenClass: "input",
|
|
162
|
-
effectiveFrom: 1,
|
|
163
|
-
pricePerMillionUsd: 2.0,
|
|
164
|
-
});
|
|
156
|
+
test("provider=codex but input/output rows missing → 'unpriced', worker value preserved", async () => {
|
|
157
|
+
// Only seed cached_input. Missing input + output blocks recompute and
|
|
158
|
+
// Phase 2 tags the row 'unpriced' (no rates means we can't trust harness USD either).
|
|
165
159
|
insertPricingRow({
|
|
166
160
|
provider: "codex",
|
|
167
161
|
model: "codex-test-synth",
|
|
@@ -186,13 +180,16 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
|
|
|
186
180
|
});
|
|
187
181
|
expect(res.status).toBe(201);
|
|
188
182
|
const body = (await res.json()) as CreatedCostResponse;
|
|
189
|
-
|
|
190
|
-
|
|
183
|
+
// Phase 2: provider tagged but no input/output rows ⇒ 'unpriced'.
|
|
184
|
+
expect(body.cost.costSource).toBe("unpriced");
|
|
185
|
+
// Worker value preserved verbatim — we don't fabricate one.
|
|
191
186
|
expect(body.cost.totalCostUsd).toBe(1.23);
|
|
192
187
|
});
|
|
193
188
|
|
|
194
|
-
test("provider=claude
|
|
195
|
-
//
|
|
189
|
+
test("provider=claude with no pricing rows for the model → 'unpriced' (Phase 2)", async () => {
|
|
190
|
+
// Phase 2 extended the recompute path from codex-only to every provider.
|
|
191
|
+
// With no pricing rows seeded for ('claude', 'sonnet-4'), the row is
|
|
192
|
+
// tagged 'unpriced' rather than 'harness' — the UI surfaces it as a yellow badge.
|
|
196
193
|
const res = await authedFetch(`/api/session-costs`, {
|
|
197
194
|
method: "POST",
|
|
198
195
|
body: JSON.stringify({
|
|
@@ -209,20 +206,35 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
|
|
|
209
206
|
});
|
|
210
207
|
expect(res.status).toBe(201);
|
|
211
208
|
const body = (await res.json()) as CreatedCostResponse;
|
|
212
|
-
expect(body.cost.costSource).toBe("
|
|
209
|
+
expect(body.cost.costSource).toBe("unpriced");
|
|
213
210
|
expect(body.cost.totalCostUsd).toBe(7.77);
|
|
214
211
|
});
|
|
215
212
|
|
|
216
|
-
test("provider=pi
|
|
213
|
+
test("provider=pi with seeded pricing rows → recomputes (Phase 2)", async () => {
|
|
214
|
+
// Phase 2 widens recompute beyond codex. Seed pi rows so we get a hit.
|
|
215
|
+
insertPricingRow({
|
|
216
|
+
provider: "pi",
|
|
217
|
+
model: "pi-test",
|
|
218
|
+
tokenClass: "input",
|
|
219
|
+
effectiveFrom: 1,
|
|
220
|
+
pricePerMillionUsd: 0.5,
|
|
221
|
+
});
|
|
222
|
+
insertPricingRow({
|
|
223
|
+
provider: "pi",
|
|
224
|
+
model: "pi-test",
|
|
225
|
+
tokenClass: "output",
|
|
226
|
+
effectiveFrom: 1,
|
|
227
|
+
pricePerMillionUsd: 3.0,
|
|
228
|
+
});
|
|
217
229
|
const res = await authedFetch(`/api/session-costs`, {
|
|
218
230
|
method: "POST",
|
|
219
231
|
body: JSON.stringify({
|
|
220
232
|
sessionId: "pi-passthrough-1",
|
|
221
233
|
agentId: testAgent.id,
|
|
222
|
-
totalCostUsd: 0.42,
|
|
223
|
-
inputTokens:
|
|
224
|
-
outputTokens:
|
|
225
|
-
model: "
|
|
234
|
+
totalCostUsd: 0.42, // expected to be overwritten
|
|
235
|
+
inputTokens: 1_000_000, // 1M input
|
|
236
|
+
outputTokens: 1_000_000, // 1M output
|
|
237
|
+
model: "pi-test",
|
|
226
238
|
provider: "pi",
|
|
227
239
|
durationMs: 1_000,
|
|
228
240
|
numTurns: 1,
|
|
@@ -230,8 +242,9 @@ describe("Phase 6 — POST /api/session-costs: Codex USD recompute", () => {
|
|
|
230
242
|
});
|
|
231
243
|
expect(res.status).toBe(201);
|
|
232
244
|
const body = (await res.json()) as CreatedCostResponse;
|
|
233
|
-
expect(body.cost.costSource).toBe("
|
|
234
|
-
|
|
245
|
+
expect(body.cost.costSource).toBe("pricing-table");
|
|
246
|
+
// 1M @ 0.5 + 1M @ 3.0 = $3.50
|
|
247
|
+
expect(body.cost.totalCostUsd).toBeCloseTo(3.5, 5);
|
|
235
248
|
});
|
|
236
249
|
|
|
237
250
|
test("provider field omitted → no recompute, costSource='harness' (back-compat)", async () => {
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
// Phase 2 fix — adapter-emitted model ids carry harness-specific routing
|
|
2
|
+
// prefixes (`openrouter/`, `github-copilot/`, …) that the pricing seed does
|
|
3
|
+
// not. Before the fix every opencode + pi-via-copilot run fell through to
|
|
4
|
+
// `costSource='unpriced'` even when a seeded rate row existed. This suite
|
|
5
|
+
// regresses the drift cases observed in real-harness E2E.
|
|
6
|
+
|
|
7
|
+
import { afterAll, afterEach, beforeAll, describe, expect, test } from "bun:test";
|
|
8
|
+
import { unlink } from "node:fs/promises";
|
|
9
|
+
import {
|
|
10
|
+
createServer as createHttpServer,
|
|
11
|
+
type IncomingMessage,
|
|
12
|
+
type Server,
|
|
13
|
+
type ServerResponse,
|
|
14
|
+
} from "node:http";
|
|
15
|
+
import { closeDb, createAgent, getDb, initDb, insertPricingRow } from "../be/db";
|
|
16
|
+
import { normalizeModelKey } from "../be/pricing-normalize";
|
|
17
|
+
import { handleCore } from "../http/core";
|
|
18
|
+
import { handleSessionData } from "../http/session-data";
|
|
19
|
+
import { getPathSegments, parseQueryParams } from "../http/utils";
|
|
20
|
+
|
|
21
|
+
const TEST_DB_PATH = "./test-model-key-normalize.sqlite";
|
|
22
|
+
const API_KEY = "test-model-key-normalize";
|
|
23
|
+
|
|
24
|
+
async function removeDbFiles(path: string): Promise<void> {
|
|
25
|
+
for (const suffix of ["", "-wal", "-shm"]) {
|
|
26
|
+
try {
|
|
27
|
+
await unlink(path + suffix);
|
|
28
|
+
} catch (error) {
|
|
29
|
+
if ((error as NodeJS.ErrnoException).code !== "ENOENT") throw error;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async function listen(server: Server): Promise<number> {
|
|
35
|
+
await new Promise<void>((resolve) => server.listen(0, resolve));
|
|
36
|
+
const addr = server.address();
|
|
37
|
+
if (!addr || typeof addr === "string") throw new Error("no port");
|
|
38
|
+
return addr.port;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function createTestServer(apiKey: string): Server {
|
|
42
|
+
return createHttpServer(async (req: IncomingMessage, res: ServerResponse) => {
|
|
43
|
+
const myAgentId = req.headers["x-agent-id"] as string | undefined;
|
|
44
|
+
const handled = await handleCore(req, res, myAgentId, apiKey);
|
|
45
|
+
if (handled) return;
|
|
46
|
+
const pathSegments = getPathSegments(req.url || "");
|
|
47
|
+
const queryParams = parseQueryParams(req.url || "");
|
|
48
|
+
const ok = await handleSessionData(req, res, pathSegments, queryParams, myAgentId);
|
|
49
|
+
if (!ok) {
|
|
50
|
+
res.writeHead(404);
|
|
51
|
+
res.end("Not Found");
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let server: Server;
|
|
57
|
+
let port: number;
|
|
58
|
+
let testAgent: { id: string };
|
|
59
|
+
|
|
60
|
+
beforeAll(async () => {
|
|
61
|
+
await removeDbFiles(TEST_DB_PATH);
|
|
62
|
+
initDb(TEST_DB_PATH);
|
|
63
|
+
testAgent = createAgent({ name: "model-key-normalize-test", isLead: false, status: "idle" });
|
|
64
|
+
server = createTestServer(API_KEY);
|
|
65
|
+
port = await listen(server);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
afterAll(async () => {
|
|
69
|
+
await new Promise<void>((resolve) => server.close(() => resolve()));
|
|
70
|
+
closeDb();
|
|
71
|
+
await removeDbFiles(TEST_DB_PATH);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
afterEach(() => {
|
|
75
|
+
const db = getDb();
|
|
76
|
+
db.prepare("DELETE FROM session_costs").run();
|
|
77
|
+
db.prepare("DELETE FROM pricing WHERE effective_from > 0").run();
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
function authedFetch(path: string, init: RequestInit = {}): Promise<Response> {
|
|
81
|
+
return fetch(`http://localhost:${port}${path}`, {
|
|
82
|
+
...init,
|
|
83
|
+
headers: {
|
|
84
|
+
Authorization: `Bearer ${API_KEY}`,
|
|
85
|
+
"Content-Type": "application/json",
|
|
86
|
+
...(init.headers ?? {}),
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
interface CostResponse {
|
|
92
|
+
success: boolean;
|
|
93
|
+
cost: {
|
|
94
|
+
totalCostUsd: number;
|
|
95
|
+
model: string;
|
|
96
|
+
costSource: "harness" | "pricing-table" | "unpriced";
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
describe("normalizeModelKey()", () => {
|
|
101
|
+
test("strips opencode routing prefix `openrouter/`", () => {
|
|
102
|
+
expect(normalizeModelKey("opencode", "openrouter/anthropic/claude-sonnet-4.5")).toBe(
|
|
103
|
+
"anthropic/claude-sonnet-4.5",
|
|
104
|
+
);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test("strips pi routing prefix `github-copilot/`", () => {
|
|
108
|
+
expect(normalizeModelKey("pi", "github-copilot/gpt-5.4")).toBe("gpt-5.4");
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
test("strips pi routing prefix `openrouter/`", () => {
|
|
112
|
+
expect(normalizeModelKey("pi", "openrouter/anthropic/claude-sonnet-4.5")).toBe(
|
|
113
|
+
"anthropic/claude-sonnet-4.5",
|
|
114
|
+
);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
test("strips pi routing prefix `openrouter/` for deepseek (Phase 3 fix regression)", () => {
|
|
118
|
+
// The exact case from today's E2E (2026-05-18): pi-mono emits
|
|
119
|
+
// `openrouter/deepseek/deepseek-v4-flash`, the pricing seed keys the row
|
|
120
|
+
// under bare `deepseek/deepseek-v4-flash`. Drift collapsed before this
|
|
121
|
+
// assertion exists; keep it as an explicit regression guard.
|
|
122
|
+
expect(normalizeModelKey("pi", "openrouter/deepseek/deepseek-v4-flash")).toBe(
|
|
123
|
+
"deepseek/deepseek-v4-flash",
|
|
124
|
+
);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
test("strips opencode routing prefix `openrouter/` for deepseek (Phase 3 fix regression)", () => {
|
|
128
|
+
// Same model, different harness — opencode-adapter wraps the underlying
|
|
129
|
+
// model id under the same `openrouter/` proxy prefix.
|
|
130
|
+
expect(normalizeModelKey("opencode", "openrouter/deepseek/deepseek-v4-flash")).toBe(
|
|
131
|
+
"deepseek/deepseek-v4-flash",
|
|
132
|
+
);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
test("is a no-op for canonical claude ids", () => {
|
|
136
|
+
expect(normalizeModelKey("claude", "claude-opus-4-7")).toBe("claude-opus-4-7");
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
test("is idempotent", () => {
|
|
140
|
+
const once = normalizeModelKey("opencode", "openrouter/anthropic/claude-sonnet-4.5");
|
|
141
|
+
const twice = normalizeModelKey("opencode", once);
|
|
142
|
+
expect(twice).toBe(once);
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
test("lowercases mixed-case input", () => {
|
|
146
|
+
expect(normalizeModelKey("opencode", "OpenRouter/Anthropic/Claude-Sonnet-4.5")).toBe(
|
|
147
|
+
"anthropic/claude-sonnet-4.5",
|
|
148
|
+
);
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
describe("Phase 2 fix — POST /api/session-costs normalizes routing prefixes", () => {
|
|
153
|
+
test("opencode `openrouter/anthropic/claude-sonnet-4.5` resolves the seeded `anthropic/claude-sonnet-4.5` row", async () => {
|
|
154
|
+
// Seed mirrors what models.dev → seed-pricing.ts produces for the
|
|
155
|
+
// openrouter section: bare `anthropic/<id>` under the `opencode` provider.
|
|
156
|
+
insertPricingRow({
|
|
157
|
+
provider: "opencode",
|
|
158
|
+
model: "anthropic/claude-sonnet-4.5",
|
|
159
|
+
tokenClass: "input",
|
|
160
|
+
effectiveFrom: 1,
|
|
161
|
+
pricePerMillionUsd: 3,
|
|
162
|
+
});
|
|
163
|
+
insertPricingRow({
|
|
164
|
+
provider: "opencode",
|
|
165
|
+
model: "anthropic/claude-sonnet-4.5",
|
|
166
|
+
tokenClass: "output",
|
|
167
|
+
effectiveFrom: 1,
|
|
168
|
+
pricePerMillionUsd: 15,
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
const res = await authedFetch(`/api/session-costs`, {
|
|
172
|
+
method: "POST",
|
|
173
|
+
body: JSON.stringify({
|
|
174
|
+
sessionId: "opencode-normalize-1",
|
|
175
|
+
agentId: testAgent.id,
|
|
176
|
+
totalCostUsd: 0.42, // harness-reported, expected to be overwritten
|
|
177
|
+
inputTokens: 1_000_000,
|
|
178
|
+
outputTokens: 100_000,
|
|
179
|
+
// The exact string the opencode adapter emits today.
|
|
180
|
+
model: "openrouter/anthropic/claude-sonnet-4.5",
|
|
181
|
+
provider: "opencode",
|
|
182
|
+
durationMs: 1_000,
|
|
183
|
+
numTurns: 1,
|
|
184
|
+
}),
|
|
185
|
+
});
|
|
186
|
+
expect(res.status).toBe(201);
|
|
187
|
+
const body = (await res.json()) as CostResponse;
|
|
188
|
+
// 1M @ $3 + 100k @ $15 = $3 + $1.50 = $4.50
|
|
189
|
+
expect(body.cost.costSource).toBe("pricing-table");
|
|
190
|
+
expect(body.cost.totalCostUsd).toBeCloseTo(4.5, 5);
|
|
191
|
+
// Original adapter-emitted string is preserved on the row for debugging.
|
|
192
|
+
expect(body.cost.model).toBe("openrouter/anthropic/claude-sonnet-4.5");
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
test("pi `github-copilot/gpt-5.4` resolves the seeded bare `gpt-5.4` row", async () => {
|
|
196
|
+
insertPricingRow({
|
|
197
|
+
provider: "pi",
|
|
198
|
+
model: "gpt-5.4",
|
|
199
|
+
tokenClass: "input",
|
|
200
|
+
effectiveFrom: 1,
|
|
201
|
+
pricePerMillionUsd: 2,
|
|
202
|
+
});
|
|
203
|
+
insertPricingRow({
|
|
204
|
+
provider: "pi",
|
|
205
|
+
model: "gpt-5.4",
|
|
206
|
+
tokenClass: "output",
|
|
207
|
+
effectiveFrom: 1,
|
|
208
|
+
pricePerMillionUsd: 8,
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
const res = await authedFetch(`/api/session-costs`, {
|
|
212
|
+
method: "POST",
|
|
213
|
+
body: JSON.stringify({
|
|
214
|
+
sessionId: "pi-copilot-normalize-1",
|
|
215
|
+
agentId: testAgent.id,
|
|
216
|
+
totalCostUsd: 9.99,
|
|
217
|
+
inputTokens: 500_000,
|
|
218
|
+
outputTokens: 250_000,
|
|
219
|
+
model: "github-copilot/gpt-5.4",
|
|
220
|
+
provider: "pi",
|
|
221
|
+
durationMs: 1_000,
|
|
222
|
+
numTurns: 1,
|
|
223
|
+
}),
|
|
224
|
+
});
|
|
225
|
+
expect(res.status).toBe(201);
|
|
226
|
+
const body = (await res.json()) as CostResponse;
|
|
227
|
+
// 500k @ $2 + 250k @ $8 = $1 + $2 = $3
|
|
228
|
+
expect(body.cost.costSource).toBe("pricing-table");
|
|
229
|
+
expect(body.cost.totalCostUsd).toBeCloseTo(3.0, 5);
|
|
230
|
+
expect(body.cost.model).toBe("github-copilot/gpt-5.4");
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
test("claude `claude-opus-4-7` (no prefix) still resolves — regression guard", async () => {
|
|
234
|
+
// The bug report flagged claude-adapter as already-working. Make sure
|
|
235
|
+
// we did not regress its bare-id lookup.
|
|
236
|
+
insertPricingRow({
|
|
237
|
+
provider: "claude",
|
|
238
|
+
model: "claude-opus-4-7",
|
|
239
|
+
tokenClass: "input",
|
|
240
|
+
effectiveFrom: 1,
|
|
241
|
+
pricePerMillionUsd: 15,
|
|
242
|
+
});
|
|
243
|
+
insertPricingRow({
|
|
244
|
+
provider: "claude",
|
|
245
|
+
model: "claude-opus-4-7",
|
|
246
|
+
tokenClass: "output",
|
|
247
|
+
effectiveFrom: 1,
|
|
248
|
+
pricePerMillionUsd: 75,
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
const res = await authedFetch(`/api/session-costs`, {
|
|
252
|
+
method: "POST",
|
|
253
|
+
body: JSON.stringify({
|
|
254
|
+
sessionId: "claude-bare-1",
|
|
255
|
+
agentId: testAgent.id,
|
|
256
|
+
totalCostUsd: 1.23,
|
|
257
|
+
inputTokens: 1_000_000,
|
|
258
|
+
outputTokens: 100_000,
|
|
259
|
+
model: "claude-opus-4-7",
|
|
260
|
+
provider: "claude",
|
|
261
|
+
durationMs: 1_000,
|
|
262
|
+
numTurns: 1,
|
|
263
|
+
}),
|
|
264
|
+
});
|
|
265
|
+
expect(res.status).toBe(201);
|
|
266
|
+
const body = (await res.json()) as CostResponse;
|
|
267
|
+
// 1M @ $15 + 100k @ $75 = $15 + $7.50 = $22.50
|
|
268
|
+
expect(body.cost.costSource).toBe("pricing-table");
|
|
269
|
+
expect(body.cost.totalCostUsd).toBeCloseTo(22.5, 5);
|
|
270
|
+
});
|
|
271
|
+
});
|