@desplega.ai/agent-swarm 1.79.4 → 1.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/openapi.json +98 -19
  2. package/package.json +12 -6
  3. package/src/be/db.ts +101 -30
  4. package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
  5. package/src/be/pricing-normalize.ts +81 -0
  6. package/src/be/seed-pricing.ts +293 -0
  7. package/src/commands/claude-managed-setup.ts +19 -3
  8. package/src/commands/runner.ts +592 -237
  9. package/src/http/context.ts +6 -2
  10. package/src/http/index.ts +115 -68
  11. package/src/http/session-data.ts +74 -23
  12. package/src/otel-impl.ts +200 -0
  13. package/src/otel.ts +127 -0
  14. package/src/providers/claude-adapter.ts +30 -5
  15. package/src/providers/claude-managed-adapter.ts +43 -17
  16. package/src/providers/claude-managed-pricing.ts +34 -0
  17. package/src/providers/codex-adapter.ts +38 -27
  18. package/src/providers/codex-models.ts +22 -3
  19. package/src/providers/devin-adapter.ts +11 -0
  20. package/src/providers/opencode-adapter.ts +31 -7
  21. package/src/providers/pi-mono-adapter.ts +39 -7
  22. package/src/providers/pricing-sources.md +52 -0
  23. package/src/providers/swarm-events-shared.ts +8 -4
  24. package/src/providers/types.ts +33 -10
  25. package/src/server.ts +6 -0
  26. package/src/tests/claude-managed-adapter.test.ts +17 -3
  27. package/src/tests/claude-managed-setup.test.ts +10 -1
  28. package/src/tests/codex-adapter.test.ts +20 -19
  29. package/src/tests/context-snapshot.test.ts +2 -2
  30. package/src/tests/context-window.test.ts +65 -1
  31. package/src/tests/devin-adapter.test.ts +2 -0
  32. package/src/tests/http/context-routes.test.ts +161 -0
  33. package/src/tests/migration-063-schema-relax.test.ts +109 -0
  34. package/src/tests/opencode-adapter.test.ts +146 -1
  35. package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
  36. package/src/tests/pages-view-count.test.ts +30 -5
  37. package/src/tests/providers/codex-cost.test.ts +18 -0
  38. package/src/tests/providers/opencode-cost.test.ts +74 -0
  39. package/src/tests/providers/pi-cost.test.ts +128 -0
  40. package/src/tests/secret-scrubber.test.ts +19 -0
  41. package/src/tests/session-costs-codex-recompute.test.ts +35 -22
  42. package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
  43. package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
  44. package/src/tests/store-progress-cost.test.ts +6 -1
  45. package/src/tools/store-progress.ts +16 -60
  46. package/src/tools/utils.ts +65 -12
  47. package/src/types.ts +62 -9
  48. package/src/utils/context-window.ts +104 -4
  49. package/src/utils/secret-scrubber.ts +7 -0
@@ -0,0 +1,170 @@
1
+ // Phase 2: POST /api/session-costs recompute fires for every provider with
2
+ // seeded pricing rows — not just codex. Unknown (provider, model) pairs are
3
+ // tagged `costSource='unpriced'`.
4
+
5
+ import { afterAll, afterEach, beforeAll, describe, expect, test } from "bun:test";
6
+ import { unlink } from "node:fs/promises";
7
+ import {
8
+ createServer as createHttpServer,
9
+ type IncomingMessage,
10
+ type Server,
11
+ type ServerResponse,
12
+ } from "node:http";
13
+ import { closeDb, createAgent, getDb, initDb, insertPricingRow } from "../be/db";
14
+ import { handleCore } from "../http/core";
15
+ import { handleSessionData } from "../http/session-data";
16
+ import { getPathSegments, parseQueryParams } from "../http/utils";
17
+
18
+ const TEST_DB_PATH = "./test-recompute-all-providers.sqlite";
19
+ const API_KEY = "test-recompute-all";
20
+
21
+ async function removeDbFiles(path: string): Promise<void> {
22
+ for (const suffix of ["", "-wal", "-shm"]) {
23
+ try {
24
+ await unlink(path + suffix);
25
+ } catch (error) {
26
+ if ((error as NodeJS.ErrnoException).code !== "ENOENT") throw error;
27
+ }
28
+ }
29
+ }
30
+
31
+ async function listen(server: Server): Promise<number> {
32
+ await new Promise<void>((resolve) => server.listen(0, resolve));
33
+ const addr = server.address();
34
+ if (!addr || typeof addr === "string") throw new Error("no port");
35
+ return addr.port;
36
+ }
37
+
38
+ function createTestServer(apiKey: string): Server {
39
+ return createHttpServer(async (req: IncomingMessage, res: ServerResponse) => {
40
+ const myAgentId = req.headers["x-agent-id"] as string | undefined;
41
+ const handled = await handleCore(req, res, myAgentId, apiKey);
42
+ if (handled) return;
43
+ const pathSegments = getPathSegments(req.url || "");
44
+ const queryParams = parseQueryParams(req.url || "");
45
+ const ok = await handleSessionData(req, res, pathSegments, queryParams, myAgentId);
46
+ if (!ok) {
47
+ res.writeHead(404);
48
+ res.end("Not Found");
49
+ }
50
+ });
51
+ }
52
+
53
+ let server: Server;
54
+ let port: number;
55
+ let testAgent: { id: string };
56
+
57
+ beforeAll(async () => {
58
+ await removeDbFiles(TEST_DB_PATH);
59
+ initDb(TEST_DB_PATH);
60
+ testAgent = createAgent({ name: "recompute-all-test", isLead: false, status: "idle" });
61
+ server = createTestServer(API_KEY);
62
+ port = await listen(server);
63
+ });
64
+
65
+ afterAll(async () => {
66
+ await new Promise<void>((resolve) => server.close(() => resolve()));
67
+ closeDb();
68
+ await removeDbFiles(TEST_DB_PATH);
69
+ });
70
+
71
+ afterEach(() => {
72
+ const db = getDb();
73
+ db.prepare("DELETE FROM session_costs").run();
74
+ // Wipe everything we explicitly inserted (effective_from > 0); leave the
75
+ // migration-046 codex seeds alone.
76
+ db.prepare("DELETE FROM pricing WHERE effective_from > 0").run();
77
+ });
78
+
79
+ function authedFetch(path: string, init: RequestInit = {}): Promise<Response> {
80
+ return fetch(`http://localhost:${port}${path}`, {
81
+ ...init,
82
+ headers: {
83
+ Authorization: `Bearer ${API_KEY}`,
84
+ "Content-Type": "application/json",
85
+ ...(init.headers ?? {}),
86
+ },
87
+ });
88
+ }
89
+
90
+ interface CostResponse {
91
+ success: boolean;
92
+ cost: {
93
+ totalCostUsd: number;
94
+ costSource: "harness" | "pricing-table" | "unpriced";
95
+ };
96
+ }
97
+
98
+ function seedTwoClassRates(provider: string, model: string, inputRate = 1, outputRate = 10) {
99
+ insertPricingRow({
100
+ provider: provider as Parameters<typeof insertPricingRow>[0]["provider"],
101
+ model,
102
+ tokenClass: "input",
103
+ effectiveFrom: 1,
104
+ pricePerMillionUsd: inputRate,
105
+ });
106
+ insertPricingRow({
107
+ provider: provider as Parameters<typeof insertPricingRow>[0]["provider"],
108
+ model,
109
+ tokenClass: "output",
110
+ effectiveFrom: 1,
111
+ pricePerMillionUsd: outputRate,
112
+ });
113
+ }
114
+
115
+ describe("Phase 2 — POST /api/session-costs recompute fires for every provider", () => {
116
+ for (const provider of [
117
+ "claude",
118
+ "claude-managed",
119
+ "codex",
120
+ "pi",
121
+ "opencode",
122
+ "devin",
123
+ "gemini",
124
+ ] as const) {
125
+ test(`provider=${provider} with seeded rows → costSource='pricing-table'`, async () => {
126
+ seedTwoClassRates(provider, `${provider}-test-model`, 2, 10);
127
+
128
+ const res = await authedFetch(`/api/session-costs`, {
129
+ method: "POST",
130
+ body: JSON.stringify({
131
+ sessionId: `${provider}-recompute-1`,
132
+ agentId: testAgent.id,
133
+ totalCostUsd: 999.99, // worker-reported; expected to be overwritten
134
+ inputTokens: 1_000_000, // 1M input
135
+ outputTokens: 500_000, // 500k output
136
+ model: `${provider}-test-model`,
137
+ provider,
138
+ durationMs: 1_000,
139
+ numTurns: 1,
140
+ }),
141
+ });
142
+ expect(res.status).toBe(201);
143
+ const body = (await res.json()) as CostResponse;
144
+ expect(body.cost.costSource).toBe("pricing-table");
145
+ // 1M @ 2 + 0.5M @ 10 = $2 + $5 = $7
146
+ expect(body.cost.totalCostUsd).toBeCloseTo(7.0, 5);
147
+ });
148
+ }
149
+
150
+ test("unknown (provider, model) pair → costSource='unpriced', worker value preserved", async () => {
151
+ const res = await authedFetch(`/api/session-costs`, {
152
+ method: "POST",
153
+ body: JSON.stringify({
154
+ sessionId: "unpriced-1",
155
+ agentId: testAgent.id,
156
+ totalCostUsd: 1.23,
157
+ inputTokens: 100,
158
+ outputTokens: 50,
159
+ model: "gpt-future-2027",
160
+ provider: "codex",
161
+ durationMs: 1_000,
162
+ numTurns: 1,
163
+ }),
164
+ });
165
+ expect(res.status).toBe(201);
166
+ const body = (await res.json()) as CostResponse;
167
+ expect(body.cost.costSource).toBe("unpriced");
168
+ expect(body.cost.totalCostUsd).toBe(1.23);
169
+ });
170
+ });
@@ -24,7 +24,12 @@ type TestCostData = {
24
24
  model?: string;
25
25
  };
26
26
 
27
- describe("store-progress with cost data", () => {
27
+ // Phase 11 NOTE: the `store-progress` MCP tool no longer accepts a `costData`
28
+ // field — adapters are the sole writers of `session_costs`. These tests now
29
+ // exercise the lower-level `createSessionCost` API directly, which the runner
30
+ // still calls via `POST /api/session-costs`. They verify the DB write path
31
+ // hasn't regressed, NOT the tool's input schema.
32
+ describe("createSessionCost direct API (was: store-progress with cost data)", () => {
28
33
  let agentId: string;
29
34
  let taskId: string;
30
35
 
@@ -3,7 +3,6 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import * as z from "zod";
4
4
  import {
5
5
  completeTask,
6
- createSessionCost,
7
6
  createTaskExtended,
8
7
  failTask,
9
8
  getAgentById,
@@ -24,32 +23,12 @@ import { AgentTaskSchema } from "@/types";
24
23
  import "./templates";
25
24
  import { validateJsonSchema } from "@/workflows/json-schema-validator";
26
25
 
27
- // Schema for optional cost data that agents can self-report.
28
- // In practice the harness adapter (claude/codex/opencode/etc.) is the
29
- // authoritative source of cost data it gets written via
30
- // POST /api/session-costs from the runner. Agents calling store-progress
31
- // rarely know the real numbers and have been observed echoing the example
32
- // values from this schema (e.g. model="opus" on a gpt-5-nano run). The
33
- // handler below silently drops payloads where every numeric field is zero.
34
- const CostDataSchema = z
35
- .object({
36
- totalCostUsd: z.number().min(0).describe("Total cost in USD"),
37
- inputTokens: z.number().int().min(0).optional().describe("Input tokens used"),
38
- outputTokens: z.number().int().min(0).optional().describe("Output tokens used"),
39
- cacheReadTokens: z.number().int().min(0).optional().describe("Cache read tokens"),
40
- cacheWriteTokens: z.number().int().min(0).optional().describe("Cache write tokens"),
41
- durationMs: z.number().int().min(0).optional().describe("Duration in milliseconds"),
42
- numTurns: z.number().int().min(1).optional().describe("Number of turns/iterations"),
43
- model: z
44
- .string()
45
- .optional()
46
- .describe(
47
- "Model identifier reported by the agent (only set if the agent has the real ID; do NOT echo the schema example).",
48
- ),
49
- })
50
- .describe(
51
- "Optional self-reported cost data. The harness adapter writes the authoritative cost record automatically — only pass this if you have real, non-zero numbers from a model that doesn't surface usage to the harness.",
52
- );
26
+ // Phase 11: the `cost` / `costData` field was removed from this tool's input
27
+ // schema. Adapters (claude/codex/pi/opencode/devin/claude-managed) are the
28
+ // sole writers of `session_costs` rows via `POST /api/session-costs`. Agents
29
+ // calling `store-progress` rarely knew the real numbers and historically
30
+ // echoed the schema example, producing noise rows keyed `mcp-<taskId>-<ts>`
31
+ // that double-counted alongside the harness's authoritative entry.
53
32
 
54
33
  export const registerStoreProgressTool = (server: McpServer) => {
55
34
  createToolRegistrar(server)(
@@ -72,9 +51,10 @@ export const registerStoreProgressTool = (server: McpServer) => {
72
51
  .string()
73
52
  .optional()
74
53
  .describe("The reason for failure (used when failing)."),
75
- costData: CostDataSchema.optional().describe(
76
- "Optional cost data for tracking session costs. When provided, a session cost record will be created linked to this task.",
77
- ),
54
+ // Phase 11: `costData` removed. The harness adapter is the sole
55
+ // writer of `session_costs` (see POST /api/session-costs in the
56
+ // runner). If a payload still includes the field, Zod's
57
+ // `unknownKeys` default drops it silently.
78
58
  }),
79
59
  outputSchema: z.object({
80
60
  success: z.boolean(),
@@ -89,7 +69,7 @@ export const registerStoreProgressTool = (server: McpServer) => {
89
69
  ),
90
70
  }),
91
71
  },
92
- async ({ taskId, progress, status, output, failureReason, costData }, requestInfo, _meta) => {
72
+ async ({ taskId, progress, status, output, failureReason }, requestInfo, _meta) => {
93
73
  if (!requestInfo.agentId) {
94
74
  return {
95
75
  content: [
@@ -254,35 +234,11 @@ export const registerStoreProgressTool = (server: McpServer) => {
254
234
  }
255
235
  }
256
236
 
257
- // Store cost data only if the agent provided non-trivial numbers.
258
- // Agents observed copying the schema example (e.g. model="opus"
259
- // on a gpt-5-nano run) with all-zero token/cost fields, producing
260
- // duplicate noise rows in session_costs alongside the harness's
261
- // authoritative entry. Drop those silently.
262
- const hasRealCost =
263
- costData &&
264
- (costData.totalCostUsd > 0 ||
265
- (costData.inputTokens ?? 0) > 0 ||
266
- (costData.outputTokens ?? 0) > 0 ||
267
- (costData.cacheReadTokens ?? 0) > 0 ||
268
- (costData.cacheWriteTokens ?? 0) > 0);
269
-
270
- if (hasRealCost && requestInfo.agentId) {
271
- createSessionCost({
272
- sessionId: `mcp-${taskId}-${Date.now()}`, // Generate unique session ID for MCP-based tasks
273
- taskId,
274
- agentId: requestInfo.agentId,
275
- totalCostUsd: costData.totalCostUsd,
276
- inputTokens: costData.inputTokens ?? 0,
277
- outputTokens: costData.outputTokens ?? 0,
278
- cacheReadTokens: costData.cacheReadTokens ?? 0,
279
- cacheWriteTokens: costData.cacheWriteTokens ?? 0,
280
- durationMs: costData.durationMs ?? 0,
281
- numTurns: costData.numTurns ?? 1,
282
- model: costData.model ?? "unknown",
283
- isError: status === "failed",
284
- });
285
- }
237
+ // Phase 11: removed the per-call `session_costs` insert. The harness
238
+ // adapter is the sole writer of cost rows now (via the runner's
239
+ // `POST /api/session-costs`); store-progress historically wrote a
240
+ // duplicate row keyed `mcp-<taskId>-<ts>` whenever an agent
241
+ // hallucinated a `costData` payload.
286
242
 
287
243
  return {
288
244
  success: true,
@@ -12,6 +12,8 @@ import type {
12
12
  ServerRequest,
13
13
  ToolAnnotations,
14
14
  } from "@modelcontextprotocol/sdk/types.js";
15
+ import { withSpan } from "../otel";
16
+ import { scrubSecrets } from "../utils/secret-scrubber";
15
17
 
16
18
  type Meta = RequestHandlerExtra<ServerRequest, ServerNotification>;
17
19
 
@@ -46,6 +48,38 @@ export const getRequestInfo = (req: Meta): RequestInfo => {
46
48
  };
47
49
  };
48
50
 
51
+ const PREVIEW_LIMIT = 500;
52
+
53
+ function previewValue(value: unknown): string | undefined {
54
+ if (value === undefined) return undefined;
55
+ try {
56
+ const serialized = typeof value === "string" ? value : JSON.stringify(value);
57
+ if (!serialized) return undefined;
58
+ const scrubbed = scrubSecrets(serialized);
59
+ return scrubbed.length > PREVIEW_LIMIT ? `${scrubbed.slice(0, PREVIEW_LIMIT)}...` : scrubbed;
60
+ } catch {
61
+ return "[unserializable]";
62
+ }
63
+ }
64
+
65
+ function toolRequestAttributes(name: string, requestInfo: RequestInfo, args?: unknown) {
66
+ return {
67
+ "mcp.tool.name": name,
68
+ "mcp.session.id": requestInfo.sessionId,
69
+ "agent.id": requestInfo.agentId,
70
+ "agentswarm.task.id": requestInfo.sourceTaskId,
71
+ "agentswarm.tool.args_preview": previewValue(args),
72
+ };
73
+ }
74
+
75
+ function toolResultAttributes(result: CallToolResult) {
76
+ return {
77
+ "mcp.tool.result_content_count": Array.isArray(result.content) ? result.content.length : 0,
78
+ "mcp.tool.is_error": result.isError ?? false,
79
+ "agentswarm.tool.result_preview": previewValue(result.content),
80
+ };
81
+ }
82
+
49
83
  // Infer the input type from the schema
50
84
  type InferInput<Args extends undefined | ZodRawShapeCompat | AnySchema> =
51
85
  Args extends ZodRawShapeCompat
@@ -104,23 +138,42 @@ export const createToolRegistrar = (server: McpServer) => {
104
138
  // When inputSchema is undefined, the MCP SDK calls handler(extra) with a single arg.
105
139
  // When inputSchema is defined, it calls handler(args, extra) with two args.
106
140
  if (config.inputSchema === undefined) {
107
- return server.registerTool(name, config, ((meta: Meta) => {
141
+ return server.registerTool(name, config, (async (meta: Meta) => {
108
142
  const requestInfo = getRequestInfo(meta);
109
- return (
110
- cb as (requestInfo: RequestInfo, meta: Meta) => CallToolResult | Promise<CallToolResult>
111
- )(requestInfo, meta);
143
+ return withSpan(
144
+ "mcp.tool",
145
+ async (span) => {
146
+ const result = await (
147
+ cb as (
148
+ requestInfo: RequestInfo,
149
+ meta: Meta,
150
+ ) => CallToolResult | Promise<CallToolResult>
151
+ )(requestInfo, meta);
152
+ span.setAttributes(toolResultAttributes(result));
153
+ return result;
154
+ },
155
+ toolRequestAttributes(name, requestInfo),
156
+ );
112
157
  }) as Parameters<typeof server.registerTool>[2]);
113
158
  }
114
159
 
115
- return server.registerTool(name, config, ((args: InferInput<InputArgs>, meta: Meta) => {
160
+ return server.registerTool(name, config, (async (args: InferInput<InputArgs>, meta: Meta) => {
116
161
  const requestInfo = getRequestInfo(meta);
117
- return (
118
- cb as (
119
- args: InferInput<InputArgs>,
120
- requestInfo: RequestInfo,
121
- meta: Meta,
122
- ) => CallToolResult | Promise<CallToolResult>
123
- )(args, requestInfo, meta);
162
+ return withSpan(
163
+ "mcp.tool",
164
+ async (span) => {
165
+ const result = await (
166
+ cb as (
167
+ args: InferInput<InputArgs>,
168
+ requestInfo: RequestInfo,
169
+ meta: Meta,
170
+ ) => CallToolResult | Promise<CallToolResult>
171
+ )(args, requestInfo, meta);
172
+ span.setAttributes(toolResultAttributes(result));
173
+ return result;
174
+ },
175
+ toolRequestAttributes(name, requestInfo, args),
176
+ );
124
177
  }) as Parameters<typeof server.registerTool>[2]);
125
178
  };
126
179
  };
package/src/types.ts CHANGED
@@ -192,7 +192,10 @@ export const AgentTaskSchema = z.object({
192
192
  // Context usage aggregates
193
193
  compactionCount: z.number().int().min(0).optional(),
194
194
  peakContextPercent: z.number().min(0).max(100).optional(),
195
- totalContextTokensUsed: z.number().int().min(0).optional(),
195
+ // Migration 063: renamed from totalContextTokensUsed. Semantic is now a
196
+ // monotonic max across the task's snapshots — "high water mark" rather than
197
+ // "latest known".
198
+ peakContextTokens: z.number().int().min(0).optional(),
196
199
  contextWindowSize: z.number().int().min(0).optional(),
197
200
 
198
201
  // Credential tracking
@@ -574,7 +577,9 @@ export const SessionLogSchema = z.object({
574
577
  export type SessionLog = z.infer<typeof SessionLogSchema>;
575
578
 
576
579
  // Session Cost Types (aggregated cost data per session)
577
- export const SessionCostSourceSchema = z.enum(["harness", "pricing-table"]);
580
+ // Migration 063 widened the set to include 'unpriced' for cases where the API
581
+ // recompute path couldn't find pricing rows for the (provider, model, token_class).
582
+ export const SessionCostSourceSchema = z.enum(["harness", "pricing-table", "unpriced"]);
578
583
  export type SessionCostSource = z.infer<typeof SessionCostSourceSchema>;
579
584
 
580
585
  export const SessionCostSchema = z.object({
@@ -587,13 +592,22 @@ export const SessionCostSchema = z.object({
587
592
  outputTokens: z.number().int().min(0).default(0),
588
593
  cacheReadTokens: z.number().int().min(0).default(0),
589
594
  cacheWriteTokens: z.number().int().min(0).default(0),
595
+ // Migration 063: reasoning_output_tokens from codex turn.completed events.
596
+ reasoningOutputTokens: z.number().int().min(0).default(0),
597
+ // Migration 063: thinking_input_tokens from claude extended-thinking flows.
598
+ thinkingTokens: z.number().int().min(0).default(0),
590
599
  durationMs: z.number().int().min(0),
591
- numTurns: z.number().int().min(1),
600
+ // numTurns is nullable — some adapters (e.g. Claude when num_turns is absent)
601
+ // can't honestly report a turn count. We prefer null over a faked 1.
602
+ numTurns: z.number().int().min(1).nullable(),
592
603
  model: z.string(),
593
604
  isError: z.boolean().default(false),
594
- // Phase 6: where the recorded totalCostUsd came from. New rows write the
595
- // actual source ('pricing-table' when the API recomputed Codex USD from DB
596
- // pricing rows, 'harness' otherwise). Defaults to 'harness' for back-compat.
605
+ // Phase 6 (extended by migration 063): where the recorded totalCostUsd came from.
606
+ // 'harness' value reported by the harness as-is.
607
+ // 'pricing-table' — value recomputed by the API from `pricing` rows.
608
+ // 'unpriced' — the API tried to recompute but the (provider, model)
609
+ // had no matching pricing rows; totalCostUsd is whatever
610
+ // the worker submitted (often 0).
597
611
  costSource: SessionCostSourceSchema.default("harness"),
598
612
  createdAt: z.iso.datetime(),
599
613
  });
@@ -1381,6 +1395,21 @@ export type McpServerWithInstallInfo = z.infer<typeof McpServerWithInstallInfoSc
1381
1395
  export const ContextSnapshotEventTypeSchema = z.enum(["progress", "compaction", "completion"]);
1382
1396
  export type ContextSnapshotEventType = z.infer<typeof ContextSnapshotEventTypeSchema>;
1383
1397
 
1398
+ // Migration 063: the formula the emitting adapter used to compute
1399
+ // contextUsedTokens. Lets downstream consumers (UI badges, cross-provider
1400
+ // comparisons) reason about whether two numbers are commensurable. Values
1401
+ // match the inline doc in `src/be/migrations/063_cost_context_schema_relax.sql`.
1402
+ export const ContextFormulaSchema = z.enum([
1403
+ "input-cache-output", // unified formula (post-Phase 9)
1404
+ "input-cache-no-output", // pre-unification claude formula
1405
+ "input-output-no-cache", // pre-unification claude-managed formula
1406
+ "peak-proxy", // pre-unification codex formula
1407
+ "pi-delegated", // numbers come from the pi-ai SDK
1408
+ "harness-reported", // numbers come from a harness API (devin)
1409
+ "unknown", // pre-migration backfill or adapter didn't tag
1410
+ ]);
1411
+ export type ContextFormula = z.infer<typeof ContextFormulaSchema>;
1412
+
1384
1413
  export const ContextSnapshotSchema = z.object({
1385
1414
  id: z.uuid(),
1386
1415
  taskId: z.uuid(),
@@ -1396,13 +1425,18 @@ export const ContextSnapshotSchema = z.object({
1396
1425
  eventType: ContextSnapshotEventTypeSchema,
1397
1426
 
1398
1427
  // Compaction-specific (null for non-compaction)
1399
- compactTrigger: z.enum(["auto", "manual"]).optional(),
1428
+ compactTrigger: z.enum(["auto", "manual", "auto-inferred"]).optional(),
1400
1429
  preCompactTokens: z.number().int().min(0).optional(),
1401
1430
 
1402
1431
  // Cumulative counters at this point
1403
1432
  cumulativeInputTokens: z.number().int().min(0).default(0),
1404
1433
  cumulativeOutputTokens: z.number().int().min(0).default(0),
1405
1434
 
1435
+ // Migration 063 — adapter stamps the formula it used to compute
1436
+ // contextUsedTokens. Optional so old rows / new providers without a tag
1437
+ // don't break, but every adapter should populate this going forward.
1438
+ contextFormula: ContextFormulaSchema.optional(),
1439
+
1406
1440
  createdAt: z.iso.datetime(),
1407
1441
  });
1408
1442
 
@@ -1430,10 +1464,29 @@ export const BudgetSchema = z.object({
1430
1464
  });
1431
1465
  export type Budget = z.infer<typeof BudgetSchema>;
1432
1466
 
1433
- export const PricingProviderSchema = z.enum(["claude", "codex", "pi"]);
1467
+ // Migration 063 widened both enums and dropped the SQL CHECKs to match.
1468
+ // New providers can land without an accompanying schema migration; Zod is now
1469
+ // the single source of truth for what's a valid (provider, token_class) row.
1470
+ export const PricingProviderSchema = z.enum([
1471
+ "claude",
1472
+ "claude-managed",
1473
+ "codex",
1474
+ "pi",
1475
+ "opencode",
1476
+ "devin",
1477
+ "gemini",
1478
+ ]);
1434
1479
  export type PricingProvider = z.infer<typeof PricingProviderSchema>;
1435
1480
 
1436
- export const PricingTokenClassSchema = z.enum(["input", "cached_input", "output"]);
1481
+ export const PricingTokenClassSchema = z.enum([
1482
+ "input",
1483
+ "cached_input",
1484
+ "output",
1485
+ // Migration 063 additions:
1486
+ "cache_write", // claude / claude-managed cache creation
1487
+ "runtime_hour", // claude-managed runtime fee per hour
1488
+ "acu", // devin Agent Compute Unit
1489
+ ]);
1437
1490
  export type PricingTokenClass = z.infer<typeof PricingTokenClassSchema>;
1438
1491
 
1439
1492
  export const PricingRowSchema = z.object({
@@ -2,31 +2,95 @@
2
2
  * Context window size lookup and usage computation utilities.
3
3
  *
4
4
  * This module is safe for both API and worker code — it has NO database imports.
5
+ *
6
+ * Phase 4 + Phase 9 of the cost-tracking plan:
7
+ * - `getContextWindowSize` now resolves shortnames, family-versioned ids
8
+ * (`claude-sonnet-4-6`), AND dated full ids (`claude-sonnet-4-6-20251004`)
9
+ * by stripping the trailing date suffix. Previously the dated form fell
10
+ * to the 200k default — wildly wrong for sonnet/opus 4.x.
11
+ * - `computeContextUsedUnified` is the canonical formula every adapter
12
+ * should use when emitting a `context_usage` event:
13
+ * contextUsedTokens = input + cache_read + cache_create + output
14
+ * The matching `CONTEXT_FORMULA` constant is what adapters stamp onto
15
+ * the snapshot's `contextFormula` field.
16
+ * - The legacy `computeContextUsed` stays for back-compat reads but is
17
+ * deprecated; new code should use `computeContextUsedUnified`.
18
+ */
19
+
20
+ /**
21
+ * Phase 9: stamp this onto every `context_usage` event the adapter emits.
22
+ * Callers that compute their own number for legacy reasons (e.g. pi-mono
23
+ * delegates to the pi-ai SDK) use a different value — see `ContextFormula`
24
+ * in `src/types.ts`.
5
25
  */
26
+ export const CONTEXT_FORMULA = "input-cache-output" as const;
6
27
 
7
28
  const CONTEXT_WINDOW_DEFAULTS: Record<string, number> = {
29
+ // Anthropic 4.x family
30
+ "claude-opus-4-7": 1_000_000,
8
31
  "claude-opus-4-6": 1_000_000,
32
+ "claude-opus-4-5": 1_000_000,
33
+ "claude-opus-4-1": 200_000,
34
+ "claude-opus-4-0": 200_000,
9
35
  "claude-sonnet-4-6": 1_000_000,
36
+ "claude-sonnet-4-5": 1_000_000,
37
+ "claude-sonnet-4-0": 200_000,
10
38
  "claude-haiku-4-5": 200_000,
39
+ // Anthropic 3.x family (legacy)
40
+ "claude-3-7-sonnet": 200_000,
41
+ "claude-3-5-sonnet": 200_000,
42
+ "claude-3-5-haiku": 200_000,
43
+ "claude-3-opus": 200_000,
44
+ "claude-3-sonnet": 200_000,
45
+ "claude-3-haiku": 200_000,
46
+ // Shortnames used by the local-CLI adapter and pi-mono OpenRouter mirror.
11
47
  opus: 1_000_000,
12
48
  sonnet: 1_000_000,
13
49
  haiku: 200_000,
14
50
  default: 200_000,
15
51
  };
16
52
 
53
+ const DEFAULT_CONTEXT_WINDOW = 200_000;
54
+
17
55
  /**
18
- * Look up the context window size (in tokens) for a given model identifier.
19
- * Falls back to the "default" entry when the model is not explicitly mapped.
56
+ * Strip a trailing date suffix from a Claude model id so dated full ids
57
+ * resolve to the same window as the family-versioned id.
58
+ *
59
+ * `claude-sonnet-4-6-20251004` → `claude-sonnet-4-6`
60
+ * `claude-haiku-4-5-20251001` → `claude-haiku-4-5`
61
+ *
62
+ * Anthropic's dated full ids are always `${family}-${major}-${minor}-${YYYYMMDD}`,
63
+ * so an 8-digit trailing date is a reliable signal.
20
64
  */
21
- const DEFAULT_CONTEXT_WINDOW = 200_000;
65
+ function stripAnthropicDateSuffix(model: string): string {
66
+ return model.replace(/-(\d{8})$/, "");
67
+ }
22
68
 
23
69
  export function getContextWindowSize(model: string): number {
24
- return CONTEXT_WINDOW_DEFAULTS[model] ?? DEFAULT_CONTEXT_WINDOW;
70
+ // Fast path: exact match (shortname or family-versioned id).
71
+ if (CONTEXT_WINDOW_DEFAULTS[model] !== undefined) {
72
+ return CONTEXT_WINDOW_DEFAULTS[model];
73
+ }
74
+ // Dated full id → strip suffix and retry.
75
+ const stripped = stripAnthropicDateSuffix(model);
76
+ if (stripped !== model && CONTEXT_WINDOW_DEFAULTS[stripped] !== undefined) {
77
+ return CONTEXT_WINDOW_DEFAULTS[stripped];
78
+ }
79
+ // OpenAI / GPT family — most reasoning models have 200k+; we keep this
80
+ // conservative and let callers override via models.dev rates if they want.
81
+ // Specific gpt-5.x context windows are >1M but the local-CLI adapter
82
+ // generally doesn't surface those; the API recompute path uses the rate
83
+ // table, not the window. The 200k default keeps the math safe.
84
+ return DEFAULT_CONTEXT_WINDOW;
25
85
  }
26
86
 
27
87
  /**
28
88
  * Compute the total context tokens used from a Claude API usage object.
29
89
  * Sums input_tokens + cache_creation_input_tokens + cache_read_input_tokens.
90
+ *
91
+ * @deprecated Phase 9 — use {@link computeContextUsedUnified} instead. This
92
+ * variant excludes output tokens, which is the wrong number when the goal is
93
+ * "how full is the model's context window right now."
30
94
  */
31
95
  export function computeContextUsed(usage: {
32
96
  input_tokens?: number | null;
@@ -39,3 +103,39 @@ export function computeContextUsed(usage: {
39
103
  (usage.cache_read_input_tokens ?? 0)
40
104
  );
41
105
  }
106
+
107
+ /**
108
+ * Phase 9: the unified context-used formula adapters should use when emitting
109
+ * `context_usage` events. Sums input + cache_read + cache_create + output,
110
+ * which is the number the Claude Code status line shows. Cross-provider
111
+ * comparisons (claude vs codex vs pi) are only meaningful when every adapter
112
+ * agrees on this formula.
113
+ *
114
+ * Returns 0 if every field is missing; callers should check the `contextTotal`
115
+ * separately and emit `null` for `contextPercent` when the window is unknown.
116
+ */
117
+ export function computeContextUsedUnified(parts: {
118
+ inputTokens?: number | null;
119
+ cacheReadTokens?: number | null;
120
+ cacheCreateTokens?: number | null;
121
+ outputTokens?: number | null;
122
+ }): number {
123
+ return (
124
+ (parts.inputTokens ?? 0) +
125
+ (parts.cacheReadTokens ?? 0) +
126
+ (parts.cacheCreateTokens ?? 0) +
127
+ (parts.outputTokens ?? 0)
128
+ );
129
+ }
130
+
131
+ /**
132
+ * Phase 9: clamp a raw context-percent value to [0, 100]. Returns null when
133
+ * `total` is missing or 0 so callers can show "unknown" instead of a
134
+ * divide-by-zero NaN/∞.
135
+ */
136
+ export function clampContextPercent(used: number, total: number | null | undefined): number | null {
137
+ if (!total || total <= 0) return null;
138
+ const raw = (used / total) * 100;
139
+ if (!Number.isFinite(raw)) return null;
140
+ return Math.min(100, Math.max(0, raw));
141
+ }