@sanity/ailf 3.8.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/config/canary-tasks.ts +64 -0
  2. package/config/test-budgets.ts +24 -0
  3. package/dist/_vendor/ailf-core/config-helpers.d.ts +19 -0
  4. package/dist/_vendor/ailf-core/config-helpers.js +27 -0
  5. package/dist/_vendor/ailf-core/index.d.ts +1 -1
  6. package/dist/_vendor/ailf-core/index.js +1 -1
  7. package/dist/_vendor/ailf-core/schemas/canary-tasks.d.ts +52 -0
  8. package/dist/_vendor/ailf-core/schemas/canary-tasks.js +46 -0
  9. package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
  10. package/dist/_vendor/ailf-core/schemas/index.js +2 -0
  11. package/dist/_vendor/ailf-core/schemas/test-budgets.d.ts +19 -0
  12. package/dist/_vendor/ailf-core/schemas/test-budgets.js +34 -0
  13. package/dist/_vendor/ailf-shared/canary-drift.d.ts +84 -0
  14. package/dist/_vendor/ailf-shared/canary-drift.js +86 -0
  15. package/dist/_vendor/ailf-shared/index.d.ts +1 -0
  16. package/dist/_vendor/ailf-shared/index.js +1 -0
  17. package/dist/adapters/config-sources/file-config-adapter.js +4 -5
  18. package/dist/adapters/task-sources/repo-schemas.d.ts +3 -3
  19. package/dist/cli-program.d.ts +39 -0
  20. package/dist/cli-program.js +137 -0
  21. package/dist/cli.d.ts +6 -0
  22. package/dist/cli.js +12 -122
  23. package/dist/config/canary-tasks.ts +64 -0
  24. package/dist/config/test-budgets.ts +24 -0
  25. package/dist/pipeline/calculate-scores.d.ts +17 -2
  26. package/dist/pipeline/calculate-scores.js +99 -0
  27. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +5 -0
  28. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +25 -2
  29. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +5 -1
  30. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +4 -0
  31. package/dist/pipeline/compiler/promptfoo-compiler.js +23 -0
  32. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  33. package/dist/tasks/knowledge-probe/groq-projections.task.ts +29 -11
  34. package/package.json +6 -3
  35. package/tasks/knowledge-probe/groq-projections.task.ts +29 -11
  36. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
  37. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
  38. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
  39. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
  40. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
  41. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
  42. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
  43. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
  44. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
  45. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
  46. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
  47. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
  48. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
  49. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
  50. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
  51. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
  52. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
  53. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
  54. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
  55. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
  56. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
  57. package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
  58. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
  59. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
@@ -1,503 +0,0 @@
1
- /**
2
- * telemetry.test.ts — Tests for the observability & telemetry subsystem.
3
- *
4
- * Covers tool call classification, trace collection, cost tracking,
5
- * redaction pipeline, trace storage, and per-turn trace merging.
6
- *
7
- * Run: npx tsx --test src/pipeline/compiler/__tests__/telemetry.test.ts
8
- */
9
- import assert from "node:assert/strict";
10
- import { existsSync, rmSync } from "node:fs";
11
- import { afterEach, describe, it } from "node:test";
12
- import { tmpdir } from "os";
13
- import { resolve } from "path";
14
- import { classifyToolCall, classifyToolCalls, } from "../telemetry/tool-classifier.js";
15
- import { collectTrace, mergeTraces } from "../telemetry/trace-collector.js";
16
- import { checkBudget, computeCost, estimateRunCost, lookupPricing, } from "../telemetry/cost-tracker.js";
17
- import { redactTrace } from "../telemetry/redactor.js";
18
- import { extractTraceSummary, LocalTraceStore, } from "../telemetry/trace-store.js";
19
- // ---------------------------------------------------------------------------
20
- // Tool call classification
21
- // ---------------------------------------------------------------------------
22
- describe("classifyToolCall", () => {
23
- it("classifies known tools by exact name", () => {
24
- assert.equal(classifyToolCall("WebSearch"), "search");
25
- assert.equal(classifyToolCall("Read"), "read");
26
- assert.equal(classifyToolCall("Write"), "write");
27
- assert.equal(classifyToolCall("Bash"), "execute");
28
- assert.equal(classifyToolCall("Browser.navigate"), "navigate");
29
- assert.equal(classifyToolCall("AskUser"), "communicate");
30
- });
31
- it("uses heuristic for unknown tools", () => {
32
- assert.equal(classifyToolCall("custom_search_tool"), "search");
33
- assert.equal(classifyToolCall("ReadFromDB"), "read");
34
- assert.equal(classifyToolCall("writeConfig"), "write");
35
- assert.equal(classifyToolCall("executeScript"), "execute");
36
- });
37
- it("uses custom mappings over defaults", () => {
38
- assert.equal(classifyToolCall("MyTool", { MyTool: "communicate" }), "communicate");
39
- });
40
- it("falls back to execute for truly unknown tools", () => {
41
- assert.equal(classifyToolCall("zzz_unknown_zzz"), "execute");
42
- });
43
- });
44
- describe("classifyToolCalls", () => {
45
- it("classifies a batch and reports unrecognized names", () => {
46
- const { categories, unrecognized } = classifyToolCalls([
47
- "WebSearch",
48
- "Read",
49
- "zzz_mystery_tool",
50
- ]);
51
- assert.equal(categories.length, 3);
52
- assert.equal(categories[0], "search");
53
- assert.equal(categories[1], "read");
54
- assert.ok(unrecognized.includes("zzz_mystery_tool"));
55
- });
56
- });
57
- // ---------------------------------------------------------------------------
58
- // Trace collection
59
- // ---------------------------------------------------------------------------
60
- describe("collectTrace", () => {
61
- const baseOptions = {
62
- runId: "run-1",
63
- taskId: "task-1",
64
- testCaseIndex: 0,
65
- modelId: "openai:chat:gpt-4o",
66
- };
67
- it("creates a trace from an empty response", () => {
68
- const trace = collectTrace({}, baseOptions);
69
- assert.equal(trace.runId, "run-1");
70
- assert.equal(trace.taskId, "task-1");
71
- assert.equal(trace.modelId, "openai:chat:gpt-4o");
72
- assert.equal(trace.toolCalls.length, 0);
73
- assert.equal(trace.tokensUsed.totalTokens, 0);
74
- });
75
- it("extracts tool calls from metadata", () => {
76
- const trace = collectTrace({
77
- metadata: {
78
- toolCalls: [
79
- { name: "WebSearch", input: { query: "GROQ" }, durationMs: 100 },
80
- { name: "Read", input: { path: "/docs/groq.md" }, durationMs: 50 },
81
- ],
82
- },
83
- }, baseOptions);
84
- assert.equal(trace.toolCalls.length, 2);
85
- assert.equal(trace.toolCalls[0].name, "WebSearch");
86
- assert.equal(trace.toolCalls[0].category, "search");
87
- assert.equal(trace.toolCalls[1].name, "Read");
88
- assert.equal(trace.toolCalls[1].category, "read");
89
- });
90
- it("extracts token usage", () => {
91
- const trace = collectTrace({ tokenUsage: { prompt: 1000, completion: 500, total: 1500 } }, baseOptions);
92
- assert.equal(trace.tokensUsed.promptTokens, 1000);
93
- assert.equal(trace.tokensUsed.completionTokens, 500);
94
- assert.equal(trace.tokensUsed.totalTokens, 1500);
95
- });
96
- it("extracts URLs from tool calls", () => {
97
- const trace = collectTrace({
98
- metadata: {
99
- toolCalls: [
100
- { name: "WebFetch", input: { url: "https://sanity.io/docs" } },
101
- ],
102
- },
103
- }, baseOptions);
104
- assert.ok(trace.urlsVisited.includes("https://sanity.io/docs"));
105
- });
106
- it("extracts search terms", () => {
107
- const trace = collectTrace({
108
- metadata: {
109
- toolCalls: [
110
- { name: "WebSearch", input: { query: "GROQ projection" } },
111
- ],
112
- },
113
- }, baseOptions);
114
- assert.ok(trace.searchTerms.includes("GROQ projection"));
115
- });
116
- it("extracts files read and written", () => {
117
- const trace = collectTrace({
118
- metadata: {
119
- toolCalls: [
120
- { name: "Read", input: { path: "/src/schema.ts" } },
121
- { name: "Write", input: { path: "/src/config.ts" } },
122
- ],
123
- },
124
- }, baseOptions);
125
- assert.ok(trace.filesRead.includes("/src/schema.ts"));
126
- assert.ok(trace.filesWritten.includes("/src/config.ts"));
127
- });
128
- it("creates event log from tool calls", () => {
129
- const trace = collectTrace({
130
- metadata: {
131
- toolCalls: [
132
- { name: "WebSearch", input: { query: "test" }, durationMs: 100 },
133
- ],
134
- },
135
- latencyMs: 500,
136
- }, baseOptions);
137
- // Should have: llm_request, tool_call_start, tool_call_end, llm_response
138
- assert.equal(trace.events.length, 4);
139
- assert.equal(trace.events[0].type, "llm_request");
140
- assert.equal(trace.events[1].type, "tool_call_start");
141
- assert.equal(trace.events[2].type, "tool_call_end");
142
- assert.equal(trace.events[3].type, "llm_response");
143
- });
144
- it("builds a root span", () => {
145
- const trace = collectTrace({ latencyMs: 1000 }, baseOptions);
146
- assert.equal(trace.spans.length, 1);
147
- assert.equal(trace.spans[0].operation, "test-case");
148
- assert.equal(trace.spans[0].parentSpanId, null);
149
- });
150
- });
151
- // ---------------------------------------------------------------------------
152
- // mergeTraces (per-turn tracing — task 6f)
153
- // ---------------------------------------------------------------------------
154
- describe("mergeTraces", () => {
155
- const parentOptions = {
156
- runId: "run-1",
157
- taskId: "task-1",
158
- testCaseIndex: 0,
159
- modelId: "openai:chat:gpt-4o",
160
- };
161
- function makeTurn(index) {
162
- return collectTrace({
163
- metadata: {
164
- toolCalls: [
165
- {
166
- name: "WebSearch",
167
- input: { query: `turn ${index}` },
168
- durationMs: 50,
169
- },
170
- ],
171
- },
172
- tokenUsage: { prompt: 100, completion: 50, total: 150 },
173
- latencyMs: 200,
174
- }, { ...parentOptions, testCaseIndex: index });
175
- }
176
- it("merges multiple turns into one trace", () => {
177
- const turns = [makeTurn(0), makeTurn(1), makeTurn(2)];
178
- const merged = mergeTraces(turns, parentOptions);
179
- assert.equal(merged.toolCalls.length, 3);
180
- assert.equal(merged.tokensUsed.promptTokens, 300);
181
- assert.equal(merged.tokensUsed.completionTokens, 150);
182
- assert.equal(merged.durationMs, 600);
183
- });
184
- it("creates per-turn spans under root", () => {
185
- const turns = [makeTurn(0), makeTurn(1)];
186
- const merged = mergeTraces(turns, parentOptions);
187
- // root + 2 turns
188
- assert.equal(merged.spans.length, 3);
189
- assert.equal(merged.spans[0].operation, "test-case");
190
- assert.equal(merged.spans[0].parentSpanId, null);
191
- assert.equal(merged.spans[1].operation, "turn-0");
192
- assert.equal(merged.spans[1].parentSpanId, merged.spans[0].spanId);
193
- assert.equal(merged.spans[2].operation, "turn-1");
194
- });
195
- it("deduplicates URLs and search terms", () => {
196
- const t1 = collectTrace({
197
- metadata: {
198
- toolCalls: [
199
- { name: "WebSearch", input: { query: "GROQ" } },
200
- { name: "WebFetch", input: { url: "https://sanity.io" } },
201
- ],
202
- },
203
- }, { ...parentOptions, testCaseIndex: 0 });
204
- const t2 = collectTrace({
205
- metadata: {
206
- toolCalls: [
207
- { name: "WebSearch", input: { query: "GROQ" } },
208
- { name: "WebFetch", input: { url: "https://sanity.io" } },
209
- ],
210
- },
211
- }, { ...parentOptions, testCaseIndex: 1 });
212
- const merged = mergeTraces([t1, t2], parentOptions);
213
- assert.equal(merged.searchTerms.length, 1); // deduplicated
214
- assert.equal(merged.urlsVisited.length, 1); // deduplicated
215
- });
216
- it("handles empty turns", () => {
217
- const merged = mergeTraces([], parentOptions);
218
- assert.equal(merged.toolCalls.length, 0);
219
- assert.equal(merged.spans.length, 1); // root only
220
- });
221
- });
222
- // ---------------------------------------------------------------------------
223
- // Cost tracking
224
- // ---------------------------------------------------------------------------
225
- describe("computeCost", () => {
226
- it("computes cost from token usage and pricing", () => {
227
- const cost = computeCost({ promptTokens: 1000, completionTokens: 500, totalTokens: 1500 }, { input: 3.0, output: 15.0 });
228
- // 1000 * 3.0/1M + 500 * 15.0/1M = 0.003 + 0.0075 = 0.0105
229
- assert.ok(Math.abs(cost - 0.0105) < 0.0001);
230
- });
231
- it("accounts for cached input tokens", () => {
232
- const cost = computeCost({
233
- promptTokens: 1000,
234
- completionTokens: 500,
235
- totalTokens: 1500,
236
- toolTokens: 300,
237
- }, { input: 3.0, output: 15.0, cachedInput: 0.3 });
238
- // 700 * 3.0/1M + 300 * 0.3/1M + 500 * 15.0/1M = 0.0021 + 0.00009 + 0.0075
239
- assert.ok(cost > 0);
240
- assert.ok(cost < 0.02);
241
- });
242
- });
243
- describe("lookupPricing", () => {
244
- it("finds exact match", () => {
245
- const pricing = lookupPricing("openai:chat:gpt-4o");
246
- assert.ok(pricing);
247
- assert.ok(pricing.input > 0);
248
- });
249
- it("falls back to prefix match", () => {
250
- const pricing = lookupPricing("openai:chat:gpt-4o-2024-11-20");
251
- assert.ok(pricing);
252
- });
253
- it("returns undefined for unknown model", () => {
254
- const pricing = lookupPricing("unknown:model:xyz");
255
- assert.equal(pricing, undefined);
256
- });
257
- it("uses custom pricing over defaults", () => {
258
- const pricing = lookupPricing("custom:model", {
259
- "custom:model": { input: 1.0, output: 2.0 },
260
- });
261
- assert.ok(pricing);
262
- assert.equal(pricing.input, 1.0);
263
- });
264
- });
265
- describe("estimateRunCost", () => {
266
- it("estimates cost for a run", () => {
267
- const estimate = estimateRunCost(5, ["openai:chat:gpt-4o"]);
268
- assert.ok(estimate.totalUSD > 0);
269
- assert.equal(estimate.perModel.length, 1);
270
- });
271
- it("flags budget warning", () => {
272
- const estimate = estimateRunCost(100, ["openai:chat:gpt-4o", "anthropic:messages:claude-sonnet-4-6"], { perRun: { warn: 0.01, stop: 1.0 } });
273
- assert.equal(estimate.exceedsWarning, true);
274
- });
275
- });
276
- describe("checkBudget", () => {
277
- it("allows spend below thresholds", () => {
278
- const result = checkBudget(1.0, { perRun: { warn: 5.0, stop: 20.0 } }, "perRun");
279
- assert.equal(result.proceed, true);
280
- assert.equal(result.warning, undefined);
281
- });
282
- it("warns at warn threshold", () => {
283
- const result = checkBudget(5.5, { perRun: { warn: 5.0, stop: 20.0 } }, "perRun");
284
- assert.equal(result.proceed, true);
285
- assert.ok(result.warning?.includes("warning"));
286
- });
287
- it("stops at stop threshold", () => {
288
- const result = checkBudget(25.0, { perRun: { warn: 5.0, stop: 20.0 } }, "perRun");
289
- assert.equal(result.proceed, false);
290
- assert.ok(result.warning?.includes("exceeded"));
291
- });
292
- });
293
- // ---------------------------------------------------------------------------
294
- // Redaction
295
- // ---------------------------------------------------------------------------
296
- describe("redactTrace", () => {
297
- function makeTrace(toolCalls) {
298
- return {
299
- traceId: "trace-1",
300
- runId: "run-1",
301
- taskId: "task-1",
302
- testCaseIndex: 0,
303
- modelId: "openai:chat:gpt-4o",
304
- spans: [],
305
- toolCalls,
306
- urlsVisited: [],
307
- searchTerms: [],
308
- filesRead: [],
309
- filesWritten: [],
310
- tokensUsed: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
311
- costEstimate: 0,
312
- durationMs: 0,
313
- events: [],
314
- startedAt: new Date().toISOString(),
315
- completedAt: new Date().toISOString(),
316
- };
317
- }
318
- it("redacts Bearer tokens in tool call inputs", () => {
319
- const trace = makeTrace([
320
- {
321
- name: "WebFetch",
322
- input: {
323
- url: "https://api.sanity.io",
324
- auth: "Bearer sk_live_abc123def456ghi789",
325
- },
326
- output: "OK",
327
- durationMs: 100,
328
- category: "read",
329
- },
330
- ]);
331
- const { trace: redacted, redactionCount } = redactTrace(trace);
332
- const inputStr = JSON.stringify(redacted.toolCalls[0].input);
333
- assert.ok(!inputStr.includes("sk_live_abc123def456ghi789"));
334
- assert.ok(inputStr.includes("[REDACTED]"));
335
- assert.ok(redactionCount > 0);
336
- });
337
- it("redacts Sanity tokens", () => {
338
- const trace = makeTrace([
339
- {
340
- name: "Write",
341
- input: { token: "skAbcDefGhiJklMnoPqrStUvWxYz0123456789" },
342
- output: null,
343
- durationMs: 10,
344
- category: "write",
345
- },
346
- ]);
347
- const { trace: redacted } = redactTrace(trace);
348
- const inputStr = JSON.stringify(redacted.toolCalls[0].input);
349
- assert.ok(inputStr.includes("[REDACTED_SANITY_TOKEN]"));
350
- });
351
- it("redacts OpenAI keys", () => {
352
- const trace = makeTrace([
353
- {
354
- name: "Bash",
355
- input: {
356
- command: "export OPENAI_API_KEY=sk-proj-abcdefghij1234567890abcdefghij",
357
- },
358
- output: null,
359
- durationMs: 10,
360
- category: "execute",
361
- },
362
- ]);
363
- const { trace: redacted } = redactTrace(trace);
364
- const inputStr = JSON.stringify(redacted.toolCalls[0].input);
365
- assert.ok(!inputStr.includes("sk-proj-abcdefghij1234567890abcdefghij"), "OpenAI key should be redacted");
366
- });
367
- it("does not mutate the original trace", () => {
368
- const original = makeTrace([
369
- {
370
- name: "WebFetch",
371
- input: { auth: "Bearer secrettoken1234567890" },
372
- output: null,
373
- durationMs: 10,
374
- category: "read",
375
- },
376
- ]);
377
- const originalStr = JSON.stringify(original);
378
- redactTrace(original);
379
- assert.equal(JSON.stringify(original), originalStr);
380
- });
381
- it("reports which rules fired", () => {
382
- const trace = makeTrace([
383
- {
384
- name: "Bash",
385
- input: {
386
- cmd: "curl -H 'Authorization: Bearer abc123def456789' https://api.example.com",
387
- },
388
- output: null,
389
- durationMs: 10,
390
- category: "execute",
391
- },
392
- ]);
393
- const { rulesApplied } = redactTrace(trace);
394
- assert.ok(rulesApplied.includes("bearer_tokens"));
395
- });
396
- });
397
- // ---------------------------------------------------------------------------
398
- // Trace storage
399
- // ---------------------------------------------------------------------------
400
- describe("LocalTraceStore", () => {
401
- const storeDir = resolve(tmpdir(), `ailf-trace-test-${process.pid}`);
402
- afterEach(() => {
403
- if (existsSync(storeDir)) {
404
- rmSync(storeDir, { recursive: true, force: true });
405
- }
406
- });
407
- it("stores and retrieves a trace", async () => {
408
- const store = new LocalTraceStore(storeDir);
409
- const trace = {
410
- traceId: "trace-store-test",
411
- runId: "run-1",
412
- taskId: "task-1",
413
- testCaseIndex: 0,
414
- modelId: "openai:chat:gpt-4o",
415
- spans: [],
416
- toolCalls: [],
417
- urlsVisited: [],
418
- searchTerms: [],
419
- filesRead: [],
420
- filesWritten: [],
421
- tokensUsed: { promptTokens: 100, completionTokens: 50, totalTokens: 150 },
422
- costEstimate: 0.001,
423
- durationMs: 500,
424
- events: [],
425
- startedAt: new Date().toISOString(),
426
- completedAt: new Date().toISOString(),
427
- };
428
- const result = await store.store(trace);
429
- assert.ok(result.uri.startsWith("file://"));
430
- assert.ok(result.sizeBytes > 0);
431
- const retrieved = await store.retrieve(result.uri);
432
- assert.ok(retrieved);
433
- assert.equal(retrieved.traceId, "trace-store-test");
434
- });
435
- it("returns null for non-existent trace", async () => {
436
- const store = new LocalTraceStore(storeDir);
437
- const result = await store.retrieve("file:///nonexistent/path.json");
438
- assert.equal(result, null);
439
- });
440
- });
441
- // ---------------------------------------------------------------------------
442
- // Trace summary extraction
443
- // ---------------------------------------------------------------------------
444
- describe("extractTraceSummary", () => {
445
- it("extracts sanitized summary from full trace", () => {
446
- const trace = {
447
- traceId: "trace-summary-test",
448
- runId: "run-1",
449
- taskId: "task-1",
450
- testCaseIndex: 0,
451
- modelId: "openai:chat:gpt-4o",
452
- spans: [],
453
- toolCalls: [
454
- {
455
- name: "WebSearch",
456
- input: {},
457
- output: null,
458
- durationMs: 100,
459
- category: "search",
460
- },
461
- {
462
- name: "Read",
463
- input: {},
464
- output: null,
465
- durationMs: 50,
466
- category: "read",
467
- },
468
- {
469
- name: "Read",
470
- input: {},
471
- output: null,
472
- durationMs: 30,
473
- category: "read",
474
- },
475
- ],
476
- urlsVisited: ["https://sanity.io/docs"],
477
- searchTerms: ["GROQ"],
478
- filesRead: ["/src/schema.ts"],
479
- filesWritten: [],
480
- tokensUsed: {
481
- promptTokens: 1000,
482
- completionTokens: 500,
483
- totalTokens: 1500,
484
- },
485
- costEstimate: 0.01,
486
- durationMs: 2000,
487
- events: [],
488
- startedAt: new Date().toISOString(),
489
- completedAt: new Date().toISOString(),
490
- };
491
- const summary = extractTraceSummary(trace, "file:///traces/trace-1.json");
492
- assert.equal(summary.traceId, "trace-summary-test");
493
- assert.equal(summary.traceDataUri, "file:///traces/trace-1.json");
494
- assert.equal(summary.toolCallCount, 3);
495
- assert.equal(summary.toolCallCategories.search, 1);
496
- assert.equal(summary.toolCallCategories.read, 2);
497
- assert.equal(summary.totalTokens, 1500);
498
- assert.equal(summary.costEstimate, 0.01);
499
- assert.equal(summary.urlsVisitedCount, 1);
500
- assert.equal(summary.filesReadCount, 1);
501
- assert.equal(summary.filesWrittenCount, 0);
502
- });
503
- });
@@ -1,10 +0,0 @@
1
- /**
2
- * tool-loop-openai.test.ts — Tests for the OpenAI MCP tool loop.
3
- *
4
- * Tests both API variants (Chat Completions and Responses) with mocked
5
- * fetch to verify tool calling, error handling, token tracking, and
6
- * round exhaustion.
7
- *
8
- * Run: npx tsx --test src/pipeline/compiler/__tests__/tool-loop-openai.test.ts
9
- */
10
- export {};