@sanity/ailf 3.8.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/config/canary-tasks.ts +64 -0
  2. package/config/test-budgets.ts +24 -0
  3. package/dist/_vendor/ailf-core/config-helpers.d.ts +19 -0
  4. package/dist/_vendor/ailf-core/config-helpers.js +27 -0
  5. package/dist/_vendor/ailf-core/index.d.ts +1 -1
  6. package/dist/_vendor/ailf-core/index.js +1 -1
  7. package/dist/_vendor/ailf-core/schemas/canary-tasks.d.ts +52 -0
  8. package/dist/_vendor/ailf-core/schemas/canary-tasks.js +46 -0
  9. package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
  10. package/dist/_vendor/ailf-core/schemas/index.js +2 -0
  11. package/dist/_vendor/ailf-core/schemas/test-budgets.d.ts +19 -0
  12. package/dist/_vendor/ailf-core/schemas/test-budgets.js +34 -0
  13. package/dist/_vendor/ailf-shared/canary-drift.d.ts +84 -0
  14. package/dist/_vendor/ailf-shared/canary-drift.js +86 -0
  15. package/dist/_vendor/ailf-shared/index.d.ts +1 -0
  16. package/dist/_vendor/ailf-shared/index.js +1 -0
  17. package/dist/adapters/config-sources/file-config-adapter.js +4 -5
  18. package/dist/adapters/task-sources/repo-schemas.d.ts +3 -3
  19. package/dist/cli-program.d.ts +39 -0
  20. package/dist/cli-program.js +137 -0
  21. package/dist/cli.d.ts +6 -0
  22. package/dist/cli.js +12 -122
  23. package/dist/config/canary-tasks.ts +64 -0
  24. package/dist/config/test-budgets.ts +24 -0
  25. package/dist/pipeline/calculate-scores.d.ts +17 -2
  26. package/dist/pipeline/calculate-scores.js +99 -0
  27. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +5 -0
  28. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +25 -2
  29. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +5 -1
  30. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +4 -0
  31. package/dist/pipeline/compiler/promptfoo-compiler.js +23 -0
  32. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  33. package/dist/tasks/knowledge-probe/groq-projections.task.ts +29 -11
  34. package/package.json +6 -3
  35. package/tasks/knowledge-probe/groq-projections.task.ts +29 -11
  36. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
  37. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
  38. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
  39. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
  40. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
  41. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
  42. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
  43. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
  44. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
  45. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
  46. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
  47. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
  48. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
  49. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
  50. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
  51. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
  52. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
  53. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
  54. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
  55. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
  56. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
  57. package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
  58. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
  59. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
@@ -1,509 +0,0 @@
1
- /**
2
- * tool-loop-openai.test.ts — Tests for the OpenAI MCP tool loop.
3
- *
4
- * Tests both API variants (Chat Completions and Responses) with mocked
5
- * fetch to verify tool calling, error handling, token tracking, and
6
- * round exhaustion.
7
- *
8
- * Run: npx tsx --test src/pipeline/compiler/__tests__/tool-loop-openai.test.ts
9
- */
10
- import assert from "node:assert/strict";
11
- import { afterEach, beforeEach, describe, it } from "node:test";
12
- import { runOpenAIToolLoop } from "../mode-handlers/mcp-tool-provider/tool-loop-openai.js";
13
- // ---------------------------------------------------------------------------
14
- // Test fixtures
15
- // ---------------------------------------------------------------------------
16
- const TEST_TOOLS = [
17
- {
18
- name: "query_documents",
19
- description: "Query Sanity documents using GROQ",
20
- inputSchema: {
21
- type: "object",
22
- properties: {
23
- query: { type: "string" },
24
- projectId: { type: "string" },
25
- dataset: { type: "string" },
26
- },
27
- required: ["query"],
28
- },
29
- },
30
- {
31
- name: "get_schema",
32
- description: "Get the Sanity schema for a project",
33
- inputSchema: {
34
- type: "object",
35
- properties: { projectId: { type: "string" } },
36
- },
37
- },
38
- ];
39
- function makeCallTool(results = {}) {
40
- return async (name, _args) => {
41
- if (results[name]) {
42
- return { content: results[name] };
43
- }
44
- return { content: `Result from ${name}` };
45
- };
46
- }
47
- function makeThrowingCallTool(errorMessage) {
48
- return async () => {
49
- throw new Error(errorMessage);
50
- };
51
- }
52
- function baseConfig(overrides) {
53
- return {
54
- prompt: "Query all documents from project abc123",
55
- tools: TEST_TOOLS,
56
- callTool: makeCallTool(),
57
- maxToolRounds: 5,
58
- model: "gpt-5.2",
59
- temperature: 0.2,
60
- maxTokens: 4096,
61
- apiKey: "test-api-key",
62
- ...overrides,
63
- };
64
- }
65
- // ---------------------------------------------------------------------------
66
- // Chat Completions API response builders
67
- // ---------------------------------------------------------------------------
68
- function chatResponse(opts) {
69
- const message = {
70
- role: "assistant",
71
- content: opts.content ?? null,
72
- };
73
- if (opts.toolCalls) {
74
- message.tool_calls = opts.toolCalls.map((tc) => ({
75
- id: tc.id,
76
- type: "function",
77
- function: { name: tc.name, arguments: tc.arguments },
78
- }));
79
- }
80
- return {
81
- choices: [
82
- {
83
- message,
84
- finish_reason: opts.finishReason ?? (opts.toolCalls ? "tool_calls" : "stop"),
85
- },
86
- ],
87
- usage: {
88
- prompt_tokens: opts.promptTokens ?? 100,
89
- completion_tokens: opts.completionTokens ?? 50,
90
- total_tokens: (opts.promptTokens ?? 100) + (opts.completionTokens ?? 50),
91
- },
92
- };
93
- }
94
- function chatErrorResponse(message) {
95
- return { error: { message } };
96
- }
97
- // ---------------------------------------------------------------------------
98
- // Responses API response builders
99
- // ---------------------------------------------------------------------------
100
- function responsesResponse(opts) {
101
- const output = [];
102
- if (opts.functionCalls) {
103
- for (const fc of opts.functionCalls) {
104
- output.push({
105
- type: "function_call",
106
- id: `fc_${fc.callId}`,
107
- call_id: fc.callId,
108
- name: fc.name,
109
- arguments: fc.arguments,
110
- });
111
- }
112
- }
113
- if (opts.text !== undefined) {
114
- output.push({
115
- type: "message",
116
- id: "msg_001",
117
- role: "assistant",
118
- content: [{ type: "output_text", text: opts.text }],
119
- });
120
- }
121
- return {
122
- id: opts.id ?? "resp_001",
123
- status: "completed",
124
- output,
125
- usage: {
126
- input_tokens: opts.inputTokens ?? 100,
127
- output_tokens: opts.outputTokens ?? 50,
128
- total_tokens: (opts.inputTokens ?? 100) + (opts.outputTokens ?? 50),
129
- },
130
- };
131
- }
132
- function responsesErrorResponse(message) {
133
- return { error: { message } };
134
- }
135
- // ---------------------------------------------------------------------------
136
- // Mock fetch helper
137
- // ---------------------------------------------------------------------------
138
- let originalFetch;
139
- let fetchCalls;
140
- /** Mock fetch that returns JSON responses. All responses have ok: true (API-level errors are in the body). */
141
- function mockFetch(responses) {
142
- let callIndex = 0;
143
- fetchCalls = [];
144
- globalThis.fetch = (async (url, init) => {
145
- const body = init?.body ? JSON.parse(String(init.body)) : undefined;
146
- fetchCalls.push({ url: url.toString(), body });
147
- const responseData = responses[callIndex] ?? responses[responses.length - 1];
148
- callIndex++;
149
- return {
150
- json: async () => responseData,
151
- ok: true,
152
- status: 200,
153
- };
154
- });
155
- }
156
- /** Mock fetch that returns an HTTP error (non-JSON body) */
157
- function mockHttpError(status, body) {
158
- fetchCalls = [];
159
- globalThis.fetch = (async (url, init) => {
160
- const reqBody = init?.body ? JSON.parse(String(init.body)) : undefined;
161
- fetchCalls.push({ url: url.toString(), body: reqBody });
162
- return new Response(body, { status, statusText: "Error" });
163
- });
164
- }
165
- // ---------------------------------------------------------------------------
166
- // Tests: Chat Completions API
167
- // ---------------------------------------------------------------------------
168
- describe("runOpenAIToolLoop — Chat Completions API", () => {
169
- beforeEach(() => {
170
- originalFetch = globalThis.fetch;
171
- });
172
- afterEach(() => {
173
- globalThis.fetch = originalFetch;
174
- });
175
- it("returns text when model answers without tool calls", async () => {
176
- mockFetch([
177
- chatResponse({
178
- content: "There are 42 documents.",
179
- finishReason: "stop",
180
- }),
181
- ]);
182
- const result = await runOpenAIToolLoop(baseConfig({ apiVariant: "chat" }));
183
- assert.equal(result.output, "There are 42 documents.");
184
- assert.equal(result.toolCallLog.length, 0);
185
- assert.equal(result.toolRounds, 0);
186
- assert.equal(result.exhaustedRounds, undefined);
187
- });
188
- it("executes a single tool call and returns final answer", async () => {
189
- mockFetch([
190
- // Round 0: model calls query_documents
191
- chatResponse({
192
- toolCalls: [
193
- {
194
- id: "call_1",
195
- name: "query_documents",
196
- arguments: '{"query":"*[_type==\\"post\\"]"}',
197
- },
198
- ],
199
- }),
200
- // Round 1: model synthesizes answer
201
- chatResponse({ content: "Found 10 posts.", finishReason: "stop" }),
202
- ]);
203
- const result = await runOpenAIToolLoop(baseConfig({ apiVariant: "chat" }));
204
- assert.equal(result.output, "Found 10 posts.");
205
- assert.equal(result.toolCallLog.length, 1);
206
- assert.equal(result.toolCallLog[0].name, "query_documents");
207
- assert.equal(result.toolRounds, 1);
208
- });
209
- it("executes multi-turn tool calls", async () => {
210
- mockFetch([
211
- // Round 0: model calls get_schema
212
- chatResponse({
213
- toolCalls: [
214
- {
215
- id: "call_1",
216
- name: "get_schema",
217
- arguments: '{"projectId":"abc123"}',
218
- },
219
- ],
220
- }),
221
- // Round 1: model calls query_documents
222
- chatResponse({
223
- toolCalls: [
224
- { id: "call_2", name: "query_documents", arguments: '{"query":"*"}' },
225
- ],
226
- }),
227
- // Round 2: model synthesizes
228
- chatResponse({
229
- content: "Schema has 5 types, 100 documents.",
230
- finishReason: "stop",
231
- }),
232
- ]);
233
- const result = await runOpenAIToolLoop(baseConfig({ apiVariant: "chat" }));
234
- assert.equal(result.toolCallLog.length, 2);
235
- assert.equal(result.toolCallLog[0].name, "get_schema");
236
- assert.equal(result.toolCallLog[1].name, "query_documents");
237
- assert.equal(result.toolRounds, 2);
238
- });
239
- it("captures tool execution errors in toolCallLog", async () => {
240
- mockFetch([
241
- chatResponse({
242
- toolCalls: [
243
- { id: "call_1", name: "query_documents", arguments: '{"query":"*"}' },
244
- ],
245
- }),
246
- chatResponse({
247
- content: "Tool failed, but I'll answer.",
248
- finishReason: "stop",
249
- }),
250
- ]);
251
- const result = await runOpenAIToolLoop(baseConfig({
252
- apiVariant: "chat",
253
- callTool: makeThrowingCallTool("Connection refused"),
254
- }));
255
- assert.equal(result.toolCallLog.length, 1);
256
- assert.equal(result.toolCallLog[0].output, "Error: Connection refused");
257
- assert.equal(result.output, "Tool failed, but I'll answer.");
258
- });
259
- it("handles exhausted rounds", async () => {
260
- // Model keeps calling tools for all 3 rounds (maxToolRounds=2 means rounds 0,1,2)
261
- mockFetch([
262
- chatResponse({
263
- toolCalls: [{ id: "call_1", name: "get_schema", arguments: "{}" }],
264
- }),
265
- chatResponse({
266
- toolCalls: [{ id: "call_2", name: "get_schema", arguments: "{}" }],
267
- }),
268
- // Last round: tool_choice "none" forces text, but model returns nothing useful
269
- chatResponse({ content: null, finishReason: "stop" }),
270
- ]);
271
- const result = await runOpenAIToolLoop(baseConfig({ apiVariant: "chat", maxToolRounds: 2 }));
272
- // Round 2 (the last) gets tool_choice: "none", model stops
273
- assert.equal(result.toolCallLog.length, 2);
274
- assert.equal(result.toolRounds, 2);
275
- // The model returned content: null with finishReason: stop on the last round
276
- assert.equal(result.output, "");
277
- });
278
- it("throws on API-level error in JSON body", async () => {
279
- mockFetch([chatErrorResponse("Rate limit exceeded")]);
280
- await assert.rejects(() => runOpenAIToolLoop(baseConfig({ apiVariant: "chat" })), { message: "Rate limit exceeded" });
281
- });
282
- it("throws on HTTP error with non-JSON body", async () => {
283
- mockHttpError(502, "<html>Bad Gateway</html>");
284
- await assert.rejects(() => runOpenAIToolLoop(baseConfig({ apiVariant: "chat" })), (err) => err.message.includes("HTTP 502") && err.message.includes("Bad Gateway"));
285
- });
286
- it("accumulates token usage across rounds", async () => {
287
- mockFetch([
288
- chatResponse({
289
- toolCalls: [{ id: "call_1", name: "get_schema", arguments: "{}" }],
290
- promptTokens: 200,
291
- completionTokens: 50,
292
- }),
293
- chatResponse({
294
- content: "Done.",
295
- finishReason: "stop",
296
- promptTokens: 300,
297
- completionTokens: 80,
298
- }),
299
- ]);
300
- const result = await runOpenAIToolLoop(baseConfig({ apiVariant: "chat" }));
301
- assert.equal(result.tokenUsage.prompt, 500); // 200 + 300
302
- assert.equal(result.tokenUsage.completion, 130); // 50 + 80
303
- });
304
- it("sends max_completion_tokens for GPT-5.x models", async () => {
305
- mockFetch([chatResponse({ content: "Answer.", finishReason: "stop" })]);
306
- await runOpenAIToolLoop(baseConfig({ apiVariant: "chat", model: "gpt-5.2" }));
307
- assert.equal(fetchCalls.length, 1);
308
- const body = fetchCalls[0].body;
309
- assert.equal(body.max_completion_tokens, 4096);
310
- assert.equal(body.max_tokens, undefined);
311
- });
312
- it("sends max_tokens for older models", async () => {
313
- mockFetch([chatResponse({ content: "Answer.", finishReason: "stop" })]);
314
- await runOpenAIToolLoop(baseConfig({ apiVariant: "chat", model: "gpt-4o" }));
315
- assert.equal(fetchCalls.length, 1);
316
- const body = fetchCalls[0].body;
317
- assert.equal(body.max_tokens, 4096);
318
- assert.equal(body.max_completion_tokens, undefined);
319
- });
320
- it("sends tool_choice 'none' on last round", async () => {
321
- mockFetch([
322
- chatResponse({
323
- toolCalls: [{ id: "call_1", name: "get_schema", arguments: "{}" }],
324
- }),
325
- chatResponse({ content: "Final.", finishReason: "stop" }),
326
- ]);
327
- await runOpenAIToolLoop(baseConfig({ apiVariant: "chat", maxToolRounds: 1 }));
328
- // Round 0: auto, Round 1 (last): none
329
- assert.equal(fetchCalls.length, 2);
330
- assert.equal(fetchCalls[0].body.tool_choice, "auto");
331
- assert.equal(fetchCalls[1].body.tool_choice, "none");
332
- });
333
- });
334
- // ---------------------------------------------------------------------------
335
- // Tests: Responses API
336
- // ---------------------------------------------------------------------------
337
- describe("runOpenAIToolLoop — Responses API", () => {
338
- beforeEach(() => {
339
- originalFetch = globalThis.fetch;
340
- });
341
- afterEach(() => {
342
- globalThis.fetch = originalFetch;
343
- });
344
- it("returns text when model answers without tool calls", async () => {
345
- mockFetch([responsesResponse({ text: "42 documents found." })]);
346
- const result = await runOpenAIToolLoop(baseConfig({ apiVariant: "responses" }));
347
- assert.equal(result.output, "42 documents found.");
348
- assert.equal(result.toolCallLog.length, 0);
349
- assert.equal(result.toolRounds, 0);
350
- });
351
- it("executes a single tool call and returns final answer", async () => {
352
- mockFetch([
353
- // Round 0: model calls query_documents
354
- responsesResponse({
355
- id: "resp_001",
356
- functionCalls: [
357
- {
358
- callId: "call_1",
359
- name: "query_documents",
360
- arguments: '{"query":"*"}',
361
- },
362
- ],
363
- }),
364
- // Round 1: model synthesizes
365
- responsesResponse({ id: "resp_002", text: "Found 10 posts." }),
366
- ]);
367
- const result = await runOpenAIToolLoop(baseConfig({ apiVariant: "responses" }));
368
- assert.equal(result.output, "Found 10 posts.");
369
- assert.equal(result.toolCallLog.length, 1);
370
- assert.equal(result.toolCallLog[0].name, "query_documents");
371
- assert.equal(result.toolRounds, 1);
372
- });
373
- it("chains via previous_response_id", async () => {
374
- mockFetch([
375
- responsesResponse({
376
- id: "resp_001",
377
- functionCalls: [
378
- { callId: "call_1", name: "get_schema", arguments: "{}" },
379
- ],
380
- }),
381
- responsesResponse({ id: "resp_002", text: "Schema loaded." }),
382
- ]);
383
- await runOpenAIToolLoop(baseConfig({ apiVariant: "responses" }));
384
- // Second request should chain via previous_response_id
385
- assert.equal(fetchCalls.length, 2);
386
- const secondBody = fetchCalls[1].body;
387
- assert.equal(secondBody.previous_response_id, "resp_001");
388
- });
389
- it("captures tool execution errors in toolCallLog", async () => {
390
- mockFetch([
391
- responsesResponse({
392
- functionCalls: [
393
- {
394
- callId: "call_1",
395
- name: "query_documents",
396
- arguments: '{"query":"*"}',
397
- },
398
- ],
399
- }),
400
- responsesResponse({ text: "Handled the error." }),
401
- ]);
402
- const result = await runOpenAIToolLoop(baseConfig({
403
- apiVariant: "responses",
404
- callTool: makeThrowingCallTool("Server unavailable"),
405
- }));
406
- assert.equal(result.toolCallLog.length, 1);
407
- assert.equal(result.toolCallLog[0].output, "Error: Server unavailable");
408
- assert.equal(result.output, "Handled the error.");
409
- });
410
- it("handles exhausted rounds", async () => {
411
- mockFetch([
412
- responsesResponse({
413
- id: "resp_001",
414
- functionCalls: [
415
- { callId: "call_1", name: "get_schema", arguments: "{}" },
416
- ],
417
- }),
418
- responsesResponse({
419
- id: "resp_002",
420
- functionCalls: [
421
- { callId: "call_2", name: "get_schema", arguments: "{}" },
422
- ],
423
- }),
424
- // Last round with tool_choice: "none" — model must return text
425
- // But if it doesn't produce function calls, we get empty output
426
- responsesResponse({ id: "resp_003", text: "" }),
427
- ]);
428
- const result = await runOpenAIToolLoop(baseConfig({ apiVariant: "responses", maxToolRounds: 2 }));
429
- assert.equal(result.toolCallLog.length, 2);
430
- assert.equal(result.toolRounds, 2);
431
- });
432
- it("throws on API-level error in JSON body", async () => {
433
- mockFetch([responsesErrorResponse("Invalid model")]);
434
- await assert.rejects(() => runOpenAIToolLoop(baseConfig({ apiVariant: "responses" })), { message: "Invalid model" });
435
- });
436
- it("throws on HTTP error with non-JSON body", async () => {
437
- mockHttpError(503, "Service Unavailable");
438
- await assert.rejects(() => runOpenAIToolLoop(baseConfig({ apiVariant: "responses" })), (err) => err.message.includes("HTTP 503") &&
439
- err.message.includes("Service Unavailable"));
440
- });
441
- it("accumulates token usage across rounds", async () => {
442
- mockFetch([
443
- responsesResponse({
444
- functionCalls: [
445
- { callId: "call_1", name: "get_schema", arguments: "{}" },
446
- ],
447
- inputTokens: 150,
448
- outputTokens: 40,
449
- }),
450
- responsesResponse({
451
- text: "Done.",
452
- inputTokens: 250,
453
- outputTokens: 60,
454
- }),
455
- ]);
456
- const result = await runOpenAIToolLoop(baseConfig({ apiVariant: "responses" }));
457
- assert.equal(result.tokenUsage.prompt, 400); // 150 + 250
458
- assert.equal(result.tokenUsage.completion, 100); // 40 + 60
459
- });
460
- it("sends max_output_tokens (not max_tokens)", async () => {
461
- mockFetch([responsesResponse({ text: "Answer." })]);
462
- await runOpenAIToolLoop(baseConfig({ apiVariant: "responses" }));
463
- const body = fetchCalls[0].body;
464
- assert.equal(body.max_output_tokens, 4096);
465
- assert.equal(body.max_tokens, undefined);
466
- assert.equal(body.max_completion_tokens, undefined);
467
- });
468
- it("uses correct endpoint URL", async () => {
469
- mockFetch([responsesResponse({ text: "Hi." })]);
470
- await runOpenAIToolLoop(baseConfig({ apiVariant: "responses" }));
471
- assert.ok(fetchCalls[0].url.includes("/v1/responses"));
472
- });
473
- it("passes reasoning_effort and omits temperature", async () => {
474
- mockFetch([responsesResponse({ text: "Thought carefully." })]);
475
- await runOpenAIToolLoop(baseConfig({
476
- apiVariant: "responses",
477
- providerConfig: { reasoning_effort: "medium" },
478
- }));
479
- const body = fetchCalls[0].body;
480
- assert.deepEqual(body.reasoning, { effort: "medium" });
481
- assert.equal(body.temperature, undefined);
482
- });
483
- it("includes temperature when reasoning_effort is not set", async () => {
484
- mockFetch([responsesResponse({ text: "Answer." })]);
485
- await runOpenAIToolLoop(baseConfig({ apiVariant: "responses" }));
486
- const body = fetchCalls[0].body;
487
- assert.equal(body.temperature, 0.2);
488
- assert.equal(body.reasoning, undefined);
489
- });
490
- });
491
- // ---------------------------------------------------------------------------
492
- // Tests: Default routing (no apiVariant)
493
- // ---------------------------------------------------------------------------
494
- describe("runOpenAIToolLoop — default routing", () => {
495
- beforeEach(() => {
496
- originalFetch = globalThis.fetch;
497
- });
498
- afterEach(() => {
499
- globalThis.fetch = originalFetch;
500
- });
501
- it("defaults to Chat Completions when apiVariant is undefined", async () => {
502
- mockFetch([
503
- chatResponse({ content: "Default path.", finishReason: "stop" }),
504
- ]);
505
- const result = await runOpenAIToolLoop(baseConfig());
506
- assert.equal(result.output, "Default path.");
507
- assert.ok(fetchCalls[0].url.includes("/v1/chat/completions"));
508
- });
509
- });