@desplega.ai/agent-swarm 1.79.4 → 1.80.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +98 -19
- package/package.json +12 -6
- package/src/be/db.ts +101 -30
- package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
- package/src/be/pricing-normalize.ts +81 -0
- package/src/be/seed-pricing.ts +293 -0
- package/src/commands/claude-managed-setup.ts +19 -3
- package/src/commands/runner.ts +592 -237
- package/src/http/context.ts +6 -2
- package/src/http/index.ts +115 -68
- package/src/http/session-data.ts +74 -23
- package/src/otel-impl.ts +200 -0
- package/src/otel.ts +127 -0
- package/src/providers/claude-adapter.ts +30 -5
- package/src/providers/claude-managed-adapter.ts +43 -17
- package/src/providers/claude-managed-pricing.ts +34 -0
- package/src/providers/codex-adapter.ts +38 -27
- package/src/providers/codex-models.ts +22 -3
- package/src/providers/devin-adapter.ts +11 -0
- package/src/providers/opencode-adapter.ts +31 -7
- package/src/providers/pi-mono-adapter.ts +39 -7
- package/src/providers/pricing-sources.md +52 -0
- package/src/providers/swarm-events-shared.ts +8 -4
- package/src/providers/types.ts +33 -10
- package/src/server.ts +6 -0
- package/src/tests/claude-managed-adapter.test.ts +17 -3
- package/src/tests/claude-managed-setup.test.ts +10 -1
- package/src/tests/codex-adapter.test.ts +20 -19
- package/src/tests/context-snapshot.test.ts +2 -2
- package/src/tests/context-window.test.ts +65 -1
- package/src/tests/devin-adapter.test.ts +2 -0
- package/src/tests/http/context-routes.test.ts +161 -0
- package/src/tests/migration-063-schema-relax.test.ts +109 -0
- package/src/tests/opencode-adapter.test.ts +146 -1
- package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
- package/src/tests/pages-view-count.test.ts +30 -5
- package/src/tests/providers/codex-cost.test.ts +18 -0
- package/src/tests/providers/opencode-cost.test.ts +74 -0
- package/src/tests/providers/pi-cost.test.ts +128 -0
- package/src/tests/secret-scrubber.test.ts +19 -0
- package/src/tests/session-costs-codex-recompute.test.ts +35 -22
- package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
- package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
- package/src/tests/store-progress-cost.test.ts +6 -1
- package/src/tools/store-progress.ts +16 -60
- package/src/tools/utils.ts +65 -12
- package/src/types.ts +62 -9
- package/src/utils/context-window.ts +104 -4
- package/src/utils/secret-scrubber.ts +7 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
// Phase 2: POST /api/session-costs recompute fires for every provider with
|
|
2
|
+
// seeded pricing rows — not just codex. Unknown (provider, model) pairs are
|
|
3
|
+
// tagged `costSource='unpriced'`.
|
|
4
|
+
|
|
5
|
+
import { afterAll, afterEach, beforeAll, describe, expect, test } from "bun:test";
|
|
6
|
+
import { unlink } from "node:fs/promises";
|
|
7
|
+
import {
|
|
8
|
+
createServer as createHttpServer,
|
|
9
|
+
type IncomingMessage,
|
|
10
|
+
type Server,
|
|
11
|
+
type ServerResponse,
|
|
12
|
+
} from "node:http";
|
|
13
|
+
import { closeDb, createAgent, getDb, initDb, insertPricingRow } from "../be/db";
|
|
14
|
+
import { handleCore } from "../http/core";
|
|
15
|
+
import { handleSessionData } from "../http/session-data";
|
|
16
|
+
import { getPathSegments, parseQueryParams } from "../http/utils";
|
|
17
|
+
|
|
18
|
+
const TEST_DB_PATH = "./test-recompute-all-providers.sqlite";
|
|
19
|
+
const API_KEY = "test-recompute-all";
|
|
20
|
+
|
|
21
|
+
async function removeDbFiles(path: string): Promise<void> {
|
|
22
|
+
for (const suffix of ["", "-wal", "-shm"]) {
|
|
23
|
+
try {
|
|
24
|
+
await unlink(path + suffix);
|
|
25
|
+
} catch (error) {
|
|
26
|
+
if ((error as NodeJS.ErrnoException).code !== "ENOENT") throw error;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async function listen(server: Server): Promise<number> {
|
|
32
|
+
await new Promise<void>((resolve) => server.listen(0, resolve));
|
|
33
|
+
const addr = server.address();
|
|
34
|
+
if (!addr || typeof addr === "string") throw new Error("no port");
|
|
35
|
+
return addr.port;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function createTestServer(apiKey: string): Server {
|
|
39
|
+
return createHttpServer(async (req: IncomingMessage, res: ServerResponse) => {
|
|
40
|
+
const myAgentId = req.headers["x-agent-id"] as string | undefined;
|
|
41
|
+
const handled = await handleCore(req, res, myAgentId, apiKey);
|
|
42
|
+
if (handled) return;
|
|
43
|
+
const pathSegments = getPathSegments(req.url || "");
|
|
44
|
+
const queryParams = parseQueryParams(req.url || "");
|
|
45
|
+
const ok = await handleSessionData(req, res, pathSegments, queryParams, myAgentId);
|
|
46
|
+
if (!ok) {
|
|
47
|
+
res.writeHead(404);
|
|
48
|
+
res.end("Not Found");
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
let server: Server;
|
|
54
|
+
let port: number;
|
|
55
|
+
let testAgent: { id: string };
|
|
56
|
+
|
|
57
|
+
beforeAll(async () => {
|
|
58
|
+
await removeDbFiles(TEST_DB_PATH);
|
|
59
|
+
initDb(TEST_DB_PATH);
|
|
60
|
+
testAgent = createAgent({ name: "recompute-all-test", isLead: false, status: "idle" });
|
|
61
|
+
server = createTestServer(API_KEY);
|
|
62
|
+
port = await listen(server);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
afterAll(async () => {
|
|
66
|
+
await new Promise<void>((resolve) => server.close(() => resolve()));
|
|
67
|
+
closeDb();
|
|
68
|
+
await removeDbFiles(TEST_DB_PATH);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
afterEach(() => {
|
|
72
|
+
const db = getDb();
|
|
73
|
+
db.prepare("DELETE FROM session_costs").run();
|
|
74
|
+
// Wipe everything we explicitly inserted (effective_from > 0); leave the
|
|
75
|
+
// migration-046 codex seeds alone.
|
|
76
|
+
db.prepare("DELETE FROM pricing WHERE effective_from > 0").run();
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
function authedFetch(path: string, init: RequestInit = {}): Promise<Response> {
|
|
80
|
+
return fetch(`http://localhost:${port}${path}`, {
|
|
81
|
+
...init,
|
|
82
|
+
headers: {
|
|
83
|
+
Authorization: `Bearer ${API_KEY}`,
|
|
84
|
+
"Content-Type": "application/json",
|
|
85
|
+
...(init.headers ?? {}),
|
|
86
|
+
},
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
interface CostResponse {
|
|
91
|
+
success: boolean;
|
|
92
|
+
cost: {
|
|
93
|
+
totalCostUsd: number;
|
|
94
|
+
costSource: "harness" | "pricing-table" | "unpriced";
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function seedTwoClassRates(provider: string, model: string, inputRate = 1, outputRate = 10) {
|
|
99
|
+
insertPricingRow({
|
|
100
|
+
provider: provider as Parameters<typeof insertPricingRow>[0]["provider"],
|
|
101
|
+
model,
|
|
102
|
+
tokenClass: "input",
|
|
103
|
+
effectiveFrom: 1,
|
|
104
|
+
pricePerMillionUsd: inputRate,
|
|
105
|
+
});
|
|
106
|
+
insertPricingRow({
|
|
107
|
+
provider: provider as Parameters<typeof insertPricingRow>[0]["provider"],
|
|
108
|
+
model,
|
|
109
|
+
tokenClass: "output",
|
|
110
|
+
effectiveFrom: 1,
|
|
111
|
+
pricePerMillionUsd: outputRate,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
describe("Phase 2 — POST /api/session-costs recompute fires for every provider", () => {
|
|
116
|
+
for (const provider of [
|
|
117
|
+
"claude",
|
|
118
|
+
"claude-managed",
|
|
119
|
+
"codex",
|
|
120
|
+
"pi",
|
|
121
|
+
"opencode",
|
|
122
|
+
"devin",
|
|
123
|
+
"gemini",
|
|
124
|
+
] as const) {
|
|
125
|
+
test(`provider=${provider} with seeded rows → costSource='pricing-table'`, async () => {
|
|
126
|
+
seedTwoClassRates(provider, `${provider}-test-model`, 2, 10);
|
|
127
|
+
|
|
128
|
+
const res = await authedFetch(`/api/session-costs`, {
|
|
129
|
+
method: "POST",
|
|
130
|
+
body: JSON.stringify({
|
|
131
|
+
sessionId: `${provider}-recompute-1`,
|
|
132
|
+
agentId: testAgent.id,
|
|
133
|
+
totalCostUsd: 999.99, // worker-reported; expected to be overwritten
|
|
134
|
+
inputTokens: 1_000_000, // 1M input
|
|
135
|
+
outputTokens: 500_000, // 500k output
|
|
136
|
+
model: `${provider}-test-model`,
|
|
137
|
+
provider,
|
|
138
|
+
durationMs: 1_000,
|
|
139
|
+
numTurns: 1,
|
|
140
|
+
}),
|
|
141
|
+
});
|
|
142
|
+
expect(res.status).toBe(201);
|
|
143
|
+
const body = (await res.json()) as CostResponse;
|
|
144
|
+
expect(body.cost.costSource).toBe("pricing-table");
|
|
145
|
+
// 1M @ 2 + 0.5M @ 10 = $2 + $5 = $7
|
|
146
|
+
expect(body.cost.totalCostUsd).toBeCloseTo(7.0, 5);
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
test("unknown (provider, model) pair → costSource='unpriced', worker value preserved", async () => {
|
|
151
|
+
const res = await authedFetch(`/api/session-costs`, {
|
|
152
|
+
method: "POST",
|
|
153
|
+
body: JSON.stringify({
|
|
154
|
+
sessionId: "unpriced-1",
|
|
155
|
+
agentId: testAgent.id,
|
|
156
|
+
totalCostUsd: 1.23,
|
|
157
|
+
inputTokens: 100,
|
|
158
|
+
outputTokens: 50,
|
|
159
|
+
model: "gpt-future-2027",
|
|
160
|
+
provider: "codex",
|
|
161
|
+
durationMs: 1_000,
|
|
162
|
+
numTurns: 1,
|
|
163
|
+
}),
|
|
164
|
+
});
|
|
165
|
+
expect(res.status).toBe(201);
|
|
166
|
+
const body = (await res.json()) as CostResponse;
|
|
167
|
+
expect(body.cost.costSource).toBe("unpriced");
|
|
168
|
+
expect(body.cost.totalCostUsd).toBe(1.23);
|
|
169
|
+
});
|
|
170
|
+
});
|
|
@@ -24,7 +24,12 @@ type TestCostData = {
|
|
|
24
24
|
model?: string;
|
|
25
25
|
};
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
// Phase 11 NOTE: the `store-progress` MCP tool no longer accepts a `costData`
|
|
28
|
+
// field — adapters are the sole writers of `session_costs`. These tests now
|
|
29
|
+
// exercise the lower-level `createSessionCost` API directly, which the runner
|
|
30
|
+
// still calls via `POST /api/session-costs`. They verify the DB write path
|
|
31
|
+
// hasn't regressed, NOT the tool's input schema.
|
|
32
|
+
describe("createSessionCost direct API (was: store-progress with cost data)", () => {
|
|
28
33
|
let agentId: string;
|
|
29
34
|
let taskId: string;
|
|
30
35
|
|
|
@@ -3,7 +3,6 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
|
3
3
|
import * as z from "zod";
|
|
4
4
|
import {
|
|
5
5
|
completeTask,
|
|
6
|
-
createSessionCost,
|
|
7
6
|
createTaskExtended,
|
|
8
7
|
failTask,
|
|
9
8
|
getAgentById,
|
|
@@ -24,32 +23,12 @@ import { AgentTaskSchema } from "@/types";
|
|
|
24
23
|
import "./templates";
|
|
25
24
|
import { validateJsonSchema } from "@/workflows/json-schema-validator";
|
|
26
25
|
|
|
27
|
-
//
|
|
28
|
-
//
|
|
29
|
-
//
|
|
30
|
-
//
|
|
31
|
-
//
|
|
32
|
-
//
|
|
33
|
-
// handler below silently drops payloads where every numeric field is zero.
|
|
34
|
-
const CostDataSchema = z
|
|
35
|
-
.object({
|
|
36
|
-
totalCostUsd: z.number().min(0).describe("Total cost in USD"),
|
|
37
|
-
inputTokens: z.number().int().min(0).optional().describe("Input tokens used"),
|
|
38
|
-
outputTokens: z.number().int().min(0).optional().describe("Output tokens used"),
|
|
39
|
-
cacheReadTokens: z.number().int().min(0).optional().describe("Cache read tokens"),
|
|
40
|
-
cacheWriteTokens: z.number().int().min(0).optional().describe("Cache write tokens"),
|
|
41
|
-
durationMs: z.number().int().min(0).optional().describe("Duration in milliseconds"),
|
|
42
|
-
numTurns: z.number().int().min(1).optional().describe("Number of turns/iterations"),
|
|
43
|
-
model: z
|
|
44
|
-
.string()
|
|
45
|
-
.optional()
|
|
46
|
-
.describe(
|
|
47
|
-
"Model identifier reported by the agent (only set if the agent has the real ID; do NOT echo the schema example).",
|
|
48
|
-
),
|
|
49
|
-
})
|
|
50
|
-
.describe(
|
|
51
|
-
"Optional self-reported cost data. The harness adapter writes the authoritative cost record automatically — only pass this if you have real, non-zero numbers from a model that doesn't surface usage to the harness.",
|
|
52
|
-
);
|
|
26
|
+
// Phase 11: the `cost` / `costData` field was removed from this tool's input
|
|
27
|
+
// schema. Adapters (claude/codex/pi/opencode/devin/claude-managed) are the
|
|
28
|
+
// sole writers of `session_costs` rows via `POST /api/session-costs`. Agents
|
|
29
|
+
// calling `store-progress` rarely knew the real numbers and historically
|
|
30
|
+
// echoed the schema example, producing noise rows keyed `mcp-<taskId>-<ts>`
|
|
31
|
+
// that double-counted alongside the harness's authoritative entry.
|
|
53
32
|
|
|
54
33
|
export const registerStoreProgressTool = (server: McpServer) => {
|
|
55
34
|
createToolRegistrar(server)(
|
|
@@ -72,9 +51,10 @@ export const registerStoreProgressTool = (server: McpServer) => {
|
|
|
72
51
|
.string()
|
|
73
52
|
.optional()
|
|
74
53
|
.describe("The reason for failure (used when failing)."),
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
),
|
|
54
|
+
// Phase 11: `costData` removed. The harness adapter is the sole
|
|
55
|
+
// writer of `session_costs` (see POST /api/session-costs in the
|
|
56
|
+
// runner). If a payload still includes the field, Zod's
|
|
57
|
+
// `unknownKeys` default drops it silently.
|
|
78
58
|
}),
|
|
79
59
|
outputSchema: z.object({
|
|
80
60
|
success: z.boolean(),
|
|
@@ -89,7 +69,7 @@ export const registerStoreProgressTool = (server: McpServer) => {
|
|
|
89
69
|
),
|
|
90
70
|
}),
|
|
91
71
|
},
|
|
92
|
-
async ({ taskId, progress, status, output, failureReason
|
|
72
|
+
async ({ taskId, progress, status, output, failureReason }, requestInfo, _meta) => {
|
|
93
73
|
if (!requestInfo.agentId) {
|
|
94
74
|
return {
|
|
95
75
|
content: [
|
|
@@ -254,35 +234,11 @@ export const registerStoreProgressTool = (server: McpServer) => {
|
|
|
254
234
|
}
|
|
255
235
|
}
|
|
256
236
|
|
|
257
|
-
//
|
|
258
|
-
//
|
|
259
|
-
//
|
|
260
|
-
// duplicate
|
|
261
|
-
//
|
|
262
|
-
const hasRealCost =
|
|
263
|
-
costData &&
|
|
264
|
-
(costData.totalCostUsd > 0 ||
|
|
265
|
-
(costData.inputTokens ?? 0) > 0 ||
|
|
266
|
-
(costData.outputTokens ?? 0) > 0 ||
|
|
267
|
-
(costData.cacheReadTokens ?? 0) > 0 ||
|
|
268
|
-
(costData.cacheWriteTokens ?? 0) > 0);
|
|
269
|
-
|
|
270
|
-
if (hasRealCost && requestInfo.agentId) {
|
|
271
|
-
createSessionCost({
|
|
272
|
-
sessionId: `mcp-${taskId}-${Date.now()}`, // Generate unique session ID for MCP-based tasks
|
|
273
|
-
taskId,
|
|
274
|
-
agentId: requestInfo.agentId,
|
|
275
|
-
totalCostUsd: costData.totalCostUsd,
|
|
276
|
-
inputTokens: costData.inputTokens ?? 0,
|
|
277
|
-
outputTokens: costData.outputTokens ?? 0,
|
|
278
|
-
cacheReadTokens: costData.cacheReadTokens ?? 0,
|
|
279
|
-
cacheWriteTokens: costData.cacheWriteTokens ?? 0,
|
|
280
|
-
durationMs: costData.durationMs ?? 0,
|
|
281
|
-
numTurns: costData.numTurns ?? 1,
|
|
282
|
-
model: costData.model ?? "unknown",
|
|
283
|
-
isError: status === "failed",
|
|
284
|
-
});
|
|
285
|
-
}
|
|
237
|
+
// Phase 11: removed the per-call `session_costs` insert. The harness
|
|
238
|
+
// adapter is the sole writer of cost rows now (via the runner's
|
|
239
|
+
// `POST /api/session-costs`); store-progress historically wrote a
|
|
240
|
+
// duplicate row keyed `mcp-<taskId>-<ts>` whenever an agent
|
|
241
|
+
// hallucinated a `costData` payload.
|
|
286
242
|
|
|
287
243
|
return {
|
|
288
244
|
success: true,
|
package/src/tools/utils.ts
CHANGED
|
@@ -12,6 +12,8 @@ import type {
|
|
|
12
12
|
ServerRequest,
|
|
13
13
|
ToolAnnotations,
|
|
14
14
|
} from "@modelcontextprotocol/sdk/types.js";
|
|
15
|
+
import { withSpan } from "../otel";
|
|
16
|
+
import { scrubSecrets } from "../utils/secret-scrubber";
|
|
15
17
|
|
|
16
18
|
type Meta = RequestHandlerExtra<ServerRequest, ServerNotification>;
|
|
17
19
|
|
|
@@ -46,6 +48,38 @@ export const getRequestInfo = (req: Meta): RequestInfo => {
|
|
|
46
48
|
};
|
|
47
49
|
};
|
|
48
50
|
|
|
51
|
+
const PREVIEW_LIMIT = 500;
|
|
52
|
+
|
|
53
|
+
function previewValue(value: unknown): string | undefined {
|
|
54
|
+
if (value === undefined) return undefined;
|
|
55
|
+
try {
|
|
56
|
+
const serialized = typeof value === "string" ? value : JSON.stringify(value);
|
|
57
|
+
if (!serialized) return undefined;
|
|
58
|
+
const scrubbed = scrubSecrets(serialized);
|
|
59
|
+
return scrubbed.length > PREVIEW_LIMIT ? `${scrubbed.slice(0, PREVIEW_LIMIT)}...` : scrubbed;
|
|
60
|
+
} catch {
|
|
61
|
+
return "[unserializable]";
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function toolRequestAttributes(name: string, requestInfo: RequestInfo, args?: unknown) {
|
|
66
|
+
return {
|
|
67
|
+
"mcp.tool.name": name,
|
|
68
|
+
"mcp.session.id": requestInfo.sessionId,
|
|
69
|
+
"agent.id": requestInfo.agentId,
|
|
70
|
+
"agentswarm.task.id": requestInfo.sourceTaskId,
|
|
71
|
+
"agentswarm.tool.args_preview": previewValue(args),
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function toolResultAttributes(result: CallToolResult) {
|
|
76
|
+
return {
|
|
77
|
+
"mcp.tool.result_content_count": Array.isArray(result.content) ? result.content.length : 0,
|
|
78
|
+
"mcp.tool.is_error": result.isError ?? false,
|
|
79
|
+
"agentswarm.tool.result_preview": previewValue(result.content),
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
49
83
|
// Infer the input type from the schema
|
|
50
84
|
type InferInput<Args extends undefined | ZodRawShapeCompat | AnySchema> =
|
|
51
85
|
Args extends ZodRawShapeCompat
|
|
@@ -104,23 +138,42 @@ export const createToolRegistrar = (server: McpServer) => {
|
|
|
104
138
|
// When inputSchema is undefined, the MCP SDK calls handler(extra) with a single arg.
|
|
105
139
|
// When inputSchema is defined, it calls handler(args, extra) with two args.
|
|
106
140
|
if (config.inputSchema === undefined) {
|
|
107
|
-
return server.registerTool(name, config, ((meta: Meta) => {
|
|
141
|
+
return server.registerTool(name, config, (async (meta: Meta) => {
|
|
108
142
|
const requestInfo = getRequestInfo(meta);
|
|
109
|
-
return (
|
|
110
|
-
|
|
111
|
-
|
|
143
|
+
return withSpan(
|
|
144
|
+
"mcp.tool",
|
|
145
|
+
async (span) => {
|
|
146
|
+
const result = await (
|
|
147
|
+
cb as (
|
|
148
|
+
requestInfo: RequestInfo,
|
|
149
|
+
meta: Meta,
|
|
150
|
+
) => CallToolResult | Promise<CallToolResult>
|
|
151
|
+
)(requestInfo, meta);
|
|
152
|
+
span.setAttributes(toolResultAttributes(result));
|
|
153
|
+
return result;
|
|
154
|
+
},
|
|
155
|
+
toolRequestAttributes(name, requestInfo),
|
|
156
|
+
);
|
|
112
157
|
}) as Parameters<typeof server.registerTool>[2]);
|
|
113
158
|
}
|
|
114
159
|
|
|
115
|
-
return server.registerTool(name, config, ((args: InferInput<InputArgs>, meta: Meta) => {
|
|
160
|
+
return server.registerTool(name, config, (async (args: InferInput<InputArgs>, meta: Meta) => {
|
|
116
161
|
const requestInfo = getRequestInfo(meta);
|
|
117
|
-
return (
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
162
|
+
return withSpan(
|
|
163
|
+
"mcp.tool",
|
|
164
|
+
async (span) => {
|
|
165
|
+
const result = await (
|
|
166
|
+
cb as (
|
|
167
|
+
args: InferInput<InputArgs>,
|
|
168
|
+
requestInfo: RequestInfo,
|
|
169
|
+
meta: Meta,
|
|
170
|
+
) => CallToolResult | Promise<CallToolResult>
|
|
171
|
+
)(args, requestInfo, meta);
|
|
172
|
+
span.setAttributes(toolResultAttributes(result));
|
|
173
|
+
return result;
|
|
174
|
+
},
|
|
175
|
+
toolRequestAttributes(name, requestInfo, args),
|
|
176
|
+
);
|
|
124
177
|
}) as Parameters<typeof server.registerTool>[2]);
|
|
125
178
|
};
|
|
126
179
|
};
|
package/src/types.ts
CHANGED
|
@@ -192,7 +192,10 @@ export const AgentTaskSchema = z.object({
|
|
|
192
192
|
// Context usage aggregates
|
|
193
193
|
compactionCount: z.number().int().min(0).optional(),
|
|
194
194
|
peakContextPercent: z.number().min(0).max(100).optional(),
|
|
195
|
-
|
|
195
|
+
// Migration 063: renamed from totalContextTokensUsed. Semantic is now a
|
|
196
|
+
// monotonic max across the task's snapshots — "high water mark" rather than
|
|
197
|
+
// "latest known".
|
|
198
|
+
peakContextTokens: z.number().int().min(0).optional(),
|
|
196
199
|
contextWindowSize: z.number().int().min(0).optional(),
|
|
197
200
|
|
|
198
201
|
// Credential tracking
|
|
@@ -574,7 +577,9 @@ export const SessionLogSchema = z.object({
|
|
|
574
577
|
export type SessionLog = z.infer<typeof SessionLogSchema>;
|
|
575
578
|
|
|
576
579
|
// Session Cost Types (aggregated cost data per session)
|
|
577
|
-
|
|
580
|
+
// Migration 063 widened the set to include 'unpriced' for cases where the API
|
|
581
|
+
// recompute path couldn't find pricing rows for the (provider, model, token_class).
|
|
582
|
+
export const SessionCostSourceSchema = z.enum(["harness", "pricing-table", "unpriced"]);
|
|
578
583
|
export type SessionCostSource = z.infer<typeof SessionCostSourceSchema>;
|
|
579
584
|
|
|
580
585
|
export const SessionCostSchema = z.object({
|
|
@@ -587,13 +592,22 @@ export const SessionCostSchema = z.object({
|
|
|
587
592
|
outputTokens: z.number().int().min(0).default(0),
|
|
588
593
|
cacheReadTokens: z.number().int().min(0).default(0),
|
|
589
594
|
cacheWriteTokens: z.number().int().min(0).default(0),
|
|
595
|
+
// Migration 063: reasoning_output_tokens from codex turn.completed events.
|
|
596
|
+
reasoningOutputTokens: z.number().int().min(0).default(0),
|
|
597
|
+
// Migration 063: thinking_input_tokens from claude extended-thinking flows.
|
|
598
|
+
thinkingTokens: z.number().int().min(0).default(0),
|
|
590
599
|
durationMs: z.number().int().min(0),
|
|
591
|
-
numTurns
|
|
600
|
+
// numTurns is nullable — some adapters (e.g. Claude when num_turns is absent)
|
|
601
|
+
// can't honestly report a turn count. We prefer null over a faked 1.
|
|
602
|
+
numTurns: z.number().int().min(1).nullable(),
|
|
592
603
|
model: z.string(),
|
|
593
604
|
isError: z.boolean().default(false),
|
|
594
|
-
// Phase 6: where the recorded totalCostUsd came from.
|
|
595
|
-
//
|
|
596
|
-
//
|
|
605
|
+
// Phase 6 (extended by migration 063): where the recorded totalCostUsd came from.
|
|
606
|
+
// 'harness' — value reported by the harness as-is.
|
|
607
|
+
// 'pricing-table' — value recomputed by the API from `pricing` rows.
|
|
608
|
+
// 'unpriced' — the API tried to recompute but the (provider, model)
|
|
609
|
+
// had no matching pricing rows; totalCostUsd is whatever
|
|
610
|
+
// the worker submitted (often 0).
|
|
597
611
|
costSource: SessionCostSourceSchema.default("harness"),
|
|
598
612
|
createdAt: z.iso.datetime(),
|
|
599
613
|
});
|
|
@@ -1381,6 +1395,21 @@ export type McpServerWithInstallInfo = z.infer<typeof McpServerWithInstallInfoSc
|
|
|
1381
1395
|
export const ContextSnapshotEventTypeSchema = z.enum(["progress", "compaction", "completion"]);
|
|
1382
1396
|
export type ContextSnapshotEventType = z.infer<typeof ContextSnapshotEventTypeSchema>;
|
|
1383
1397
|
|
|
1398
|
+
// Migration 063: the formula the emitting adapter used to compute
|
|
1399
|
+
// contextUsedTokens. Lets downstream consumers (UI badges, cross-provider
|
|
1400
|
+
// comparisons) reason about whether two numbers are commensurable. Values
|
|
1401
|
+
// match the inline doc in `src/be/migrations/063_cost_context_schema_relax.sql`.
|
|
1402
|
+
export const ContextFormulaSchema = z.enum([
|
|
1403
|
+
"input-cache-output", // unified formula (post-Phase 9)
|
|
1404
|
+
"input-cache-no-output", // pre-unification claude formula
|
|
1405
|
+
"input-output-no-cache", // pre-unification claude-managed formula
|
|
1406
|
+
"peak-proxy", // pre-unification codex formula
|
|
1407
|
+
"pi-delegated", // numbers come from the pi-ai SDK
|
|
1408
|
+
"harness-reported", // numbers come from a harness API (devin)
|
|
1409
|
+
"unknown", // pre-migration backfill or adapter didn't tag
|
|
1410
|
+
]);
|
|
1411
|
+
export type ContextFormula = z.infer<typeof ContextFormulaSchema>;
|
|
1412
|
+
|
|
1384
1413
|
export const ContextSnapshotSchema = z.object({
|
|
1385
1414
|
id: z.uuid(),
|
|
1386
1415
|
taskId: z.uuid(),
|
|
@@ -1396,13 +1425,18 @@ export const ContextSnapshotSchema = z.object({
|
|
|
1396
1425
|
eventType: ContextSnapshotEventTypeSchema,
|
|
1397
1426
|
|
|
1398
1427
|
// Compaction-specific (null for non-compaction)
|
|
1399
|
-
compactTrigger: z.enum(["auto", "manual"]).optional(),
|
|
1428
|
+
compactTrigger: z.enum(["auto", "manual", "auto-inferred"]).optional(),
|
|
1400
1429
|
preCompactTokens: z.number().int().min(0).optional(),
|
|
1401
1430
|
|
|
1402
1431
|
// Cumulative counters at this point
|
|
1403
1432
|
cumulativeInputTokens: z.number().int().min(0).default(0),
|
|
1404
1433
|
cumulativeOutputTokens: z.number().int().min(0).default(0),
|
|
1405
1434
|
|
|
1435
|
+
// Migration 063 — adapter stamps the formula it used to compute
|
|
1436
|
+
// contextUsedTokens. Optional so old rows / new providers without a tag
|
|
1437
|
+
// don't break, but every adapter should populate this going forward.
|
|
1438
|
+
contextFormula: ContextFormulaSchema.optional(),
|
|
1439
|
+
|
|
1406
1440
|
createdAt: z.iso.datetime(),
|
|
1407
1441
|
});
|
|
1408
1442
|
|
|
@@ -1430,10 +1464,29 @@ export const BudgetSchema = z.object({
|
|
|
1430
1464
|
});
|
|
1431
1465
|
export type Budget = z.infer<typeof BudgetSchema>;
|
|
1432
1466
|
|
|
1433
|
-
|
|
1467
|
+
// Migration 063 widened both enums and dropped the SQL CHECKs to match.
|
|
1468
|
+
// New providers can land without an accompanying schema migration; Zod is now
|
|
1469
|
+
// the single source of truth for what's a valid (provider, token_class) row.
|
|
1470
|
+
export const PricingProviderSchema = z.enum([
|
|
1471
|
+
"claude",
|
|
1472
|
+
"claude-managed",
|
|
1473
|
+
"codex",
|
|
1474
|
+
"pi",
|
|
1475
|
+
"opencode",
|
|
1476
|
+
"devin",
|
|
1477
|
+
"gemini",
|
|
1478
|
+
]);
|
|
1434
1479
|
export type PricingProvider = z.infer<typeof PricingProviderSchema>;
|
|
1435
1480
|
|
|
1436
|
-
export const PricingTokenClassSchema = z.enum([
|
|
1481
|
+
export const PricingTokenClassSchema = z.enum([
|
|
1482
|
+
"input",
|
|
1483
|
+
"cached_input",
|
|
1484
|
+
"output",
|
|
1485
|
+
// Migration 063 additions:
|
|
1486
|
+
"cache_write", // claude / claude-managed cache creation
|
|
1487
|
+
"runtime_hour", // claude-managed runtime fee per hour
|
|
1488
|
+
"acu", // devin Agent Compute Unit
|
|
1489
|
+
]);
|
|
1437
1490
|
export type PricingTokenClass = z.infer<typeof PricingTokenClassSchema>;
|
|
1438
1491
|
|
|
1439
1492
|
export const PricingRowSchema = z.object({
|
|
@@ -2,31 +2,95 @@
|
|
|
2
2
|
* Context window size lookup and usage computation utilities.
|
|
3
3
|
*
|
|
4
4
|
* This module is safe for both API and worker code — it has NO database imports.
|
|
5
|
+
*
|
|
6
|
+
* Phase 4 + Phase 9 of the cost-tracking plan:
|
|
7
|
+
* - `getContextWindowSize` now resolves shortnames, family-versioned ids
|
|
8
|
+
* (`claude-sonnet-4-6`), AND dated full ids (`claude-sonnet-4-6-20251004`)
|
|
9
|
+
* by stripping the trailing date suffix. Previously the dated form fell
|
|
10
|
+
* to the 200k default — wildly wrong for sonnet/opus 4.x.
|
|
11
|
+
* - `computeContextUsedUnified` is the canonical formula every adapter
|
|
12
|
+
* should use when emitting a `context_usage` event:
|
|
13
|
+
* contextUsedTokens = input + cache_read + cache_create + output
|
|
14
|
+
* The matching `CONTEXT_FORMULA` constant is what adapters stamp onto
|
|
15
|
+
* the snapshot's `contextFormula` field.
|
|
16
|
+
* - The legacy `computeContextUsed` stays for back-compat reads but is
|
|
17
|
+
* deprecated; new code should use `computeContextUsedUnified`.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Phase 9: stamp this onto every `context_usage` event the adapter emits.
|
|
22
|
+
* Callers that compute their own number for legacy reasons (e.g. pi-mono
|
|
23
|
+
* delegates to the pi-ai SDK) use a different value — see `ContextFormula`
|
|
24
|
+
* in `src/types.ts`.
|
|
5
25
|
*/
|
|
26
|
+
export const CONTEXT_FORMULA = "input-cache-output" as const;
|
|
6
27
|
|
|
7
28
|
const CONTEXT_WINDOW_DEFAULTS: Record<string, number> = {
|
|
29
|
+
// Anthropic 4.x family
|
|
30
|
+
"claude-opus-4-7": 1_000_000,
|
|
8
31
|
"claude-opus-4-6": 1_000_000,
|
|
32
|
+
"claude-opus-4-5": 1_000_000,
|
|
33
|
+
"claude-opus-4-1": 200_000,
|
|
34
|
+
"claude-opus-4-0": 200_000,
|
|
9
35
|
"claude-sonnet-4-6": 1_000_000,
|
|
36
|
+
"claude-sonnet-4-5": 1_000_000,
|
|
37
|
+
"claude-sonnet-4-0": 200_000,
|
|
10
38
|
"claude-haiku-4-5": 200_000,
|
|
39
|
+
// Anthropic 3.x family (legacy)
|
|
40
|
+
"claude-3-7-sonnet": 200_000,
|
|
41
|
+
"claude-3-5-sonnet": 200_000,
|
|
42
|
+
"claude-3-5-haiku": 200_000,
|
|
43
|
+
"claude-3-opus": 200_000,
|
|
44
|
+
"claude-3-sonnet": 200_000,
|
|
45
|
+
"claude-3-haiku": 200_000,
|
|
46
|
+
// Shortnames used by the local-CLI adapter and pi-mono OpenRouter mirror.
|
|
11
47
|
opus: 1_000_000,
|
|
12
48
|
sonnet: 1_000_000,
|
|
13
49
|
haiku: 200_000,
|
|
14
50
|
default: 200_000,
|
|
15
51
|
};
|
|
16
52
|
|
|
53
|
+
const DEFAULT_CONTEXT_WINDOW = 200_000;
|
|
54
|
+
|
|
17
55
|
/**
|
|
18
|
-
*
|
|
19
|
-
*
|
|
56
|
+
* Strip a trailing date suffix from a Claude model id so dated full ids
|
|
57
|
+
* resolve to the same window as the family-versioned id.
|
|
58
|
+
*
|
|
59
|
+
* `claude-sonnet-4-6-20251004` → `claude-sonnet-4-6`
|
|
60
|
+
* `claude-haiku-4-5-20251001` → `claude-haiku-4-5`
|
|
61
|
+
*
|
|
62
|
+
* Anthropic's dated full ids are always `${family}-${major}-${minor}-${YYYYMMDD}`,
|
|
63
|
+
* so an 8-digit trailing date is a reliable signal.
|
|
20
64
|
*/
|
|
21
|
-
|
|
65
|
+
function stripAnthropicDateSuffix(model: string): string {
|
|
66
|
+
return model.replace(/-(\d{8})$/, "");
|
|
67
|
+
}
|
|
22
68
|
|
|
23
69
|
export function getContextWindowSize(model: string): number {
|
|
24
|
-
|
|
70
|
+
// Fast path: exact match (shortname or family-versioned id).
|
|
71
|
+
if (CONTEXT_WINDOW_DEFAULTS[model] !== undefined) {
|
|
72
|
+
return CONTEXT_WINDOW_DEFAULTS[model];
|
|
73
|
+
}
|
|
74
|
+
// Dated full id → strip suffix and retry.
|
|
75
|
+
const stripped = stripAnthropicDateSuffix(model);
|
|
76
|
+
if (stripped !== model && CONTEXT_WINDOW_DEFAULTS[stripped] !== undefined) {
|
|
77
|
+
return CONTEXT_WINDOW_DEFAULTS[stripped];
|
|
78
|
+
}
|
|
79
|
+
// OpenAI / GPT family — most reasoning models have 200k+; we keep this
|
|
80
|
+
// conservative and let callers override via models.dev rates if they want.
|
|
81
|
+
// Specific gpt-5.x context windows are >1M but the local-CLI adapter
|
|
82
|
+
// generally doesn't surface those; the API recompute path uses the rate
|
|
83
|
+
// table, not the window. The 200k default keeps the math safe.
|
|
84
|
+
return DEFAULT_CONTEXT_WINDOW;
|
|
25
85
|
}
|
|
26
86
|
|
|
27
87
|
/**
|
|
28
88
|
* Compute the total context tokens used from a Claude API usage object.
|
|
29
89
|
* Sums input_tokens + cache_creation_input_tokens + cache_read_input_tokens.
|
|
90
|
+
*
|
|
91
|
+
* @deprecated Phase 9 — use {@link computeContextUsedUnified} instead. This
|
|
92
|
+
* variant excludes output tokens, which is the wrong number when the goal is
|
|
93
|
+
* "how full is the model's context window right now."
|
|
30
94
|
*/
|
|
31
95
|
export function computeContextUsed(usage: {
|
|
32
96
|
input_tokens?: number | null;
|
|
@@ -39,3 +103,39 @@ export function computeContextUsed(usage: {
|
|
|
39
103
|
(usage.cache_read_input_tokens ?? 0)
|
|
40
104
|
);
|
|
41
105
|
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Phase 9: the unified context-used formula adapters should use when emitting
|
|
109
|
+
* `context_usage` events. Sums input + cache_read + cache_create + output,
|
|
110
|
+
* which is the number the Claude Code status line shows. Cross-provider
|
|
111
|
+
* comparisons (claude vs codex vs pi) are only meaningful when every adapter
|
|
112
|
+
* agrees on this formula.
|
|
113
|
+
*
|
|
114
|
+
* Returns 0 if every field is missing; callers should check the `contextTotal`
|
|
115
|
+
* separately and emit `null` for `contextPercent` when the window is unknown.
|
|
116
|
+
*/
|
|
117
|
+
export function computeContextUsedUnified(parts: {
|
|
118
|
+
inputTokens?: number | null;
|
|
119
|
+
cacheReadTokens?: number | null;
|
|
120
|
+
cacheCreateTokens?: number | null;
|
|
121
|
+
outputTokens?: number | null;
|
|
122
|
+
}): number {
|
|
123
|
+
return (
|
|
124
|
+
(parts.inputTokens ?? 0) +
|
|
125
|
+
(parts.cacheReadTokens ?? 0) +
|
|
126
|
+
(parts.cacheCreateTokens ?? 0) +
|
|
127
|
+
(parts.outputTokens ?? 0)
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Phase 9: clamp a raw context-percent value to [0, 100]. Returns null when
|
|
133
|
+
* `total` is missing or 0 so callers can show "unknown" instead of a
|
|
134
|
+
* divide-by-zero NaN/∞.
|
|
135
|
+
*/
|
|
136
|
+
export function clampContextPercent(used: number, total: number | null | undefined): number | null {
|
|
137
|
+
if (!total || total <= 0) return null;
|
|
138
|
+
const raw = (used / total) * 100;
|
|
139
|
+
if (!Number.isFinite(raw)) return null;
|
|
140
|
+
return Math.min(100, Math.max(0, raw));
|
|
141
|
+
}
|