ai-functions 2.0.2 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -5
- package/CHANGELOG.md +38 -0
- package/LICENSE +21 -0
- package/README.md +361 -159
- package/dist/ai-promise.d.ts +47 -0
- package/dist/ai-promise.d.ts.map +1 -1
- package/dist/ai-promise.js +291 -3
- package/dist/ai-promise.js.map +1 -1
- package/dist/ai.d.ts +17 -18
- package/dist/ai.d.ts.map +1 -1
- package/dist/ai.js +93 -39
- package/dist/ai.js.map +1 -1
- package/dist/batch-map.d.ts +46 -4
- package/dist/batch-map.d.ts.map +1 -1
- package/dist/batch-map.js +35 -2
- package/dist/batch-map.js.map +1 -1
- package/dist/batch-queue.d.ts +116 -12
- package/dist/batch-queue.d.ts.map +1 -1
- package/dist/batch-queue.js +47 -2
- package/dist/batch-queue.js.map +1 -1
- package/dist/budget.d.ts +272 -0
- package/dist/budget.d.ts.map +1 -0
- package/dist/budget.js +500 -0
- package/dist/budget.js.map +1 -0
- package/dist/cache.d.ts +272 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +412 -0
- package/dist/cache.js.map +1 -0
- package/dist/context.d.ts +32 -1
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +16 -1
- package/dist/context.js.map +1 -1
- package/dist/eval/runner.d.ts +2 -1
- package/dist/eval/runner.d.ts.map +1 -1
- package/dist/eval/runner.js.map +1 -1
- package/dist/generate.d.ts.map +1 -1
- package/dist/generate.js +6 -10
- package/dist/generate.js.map +1 -1
- package/dist/index.d.ts +27 -20
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +72 -42
- package/dist/index.js.map +1 -1
- package/dist/primitives.d.ts +17 -0
- package/dist/primitives.d.ts.map +1 -1
- package/dist/primitives.js +19 -1
- package/dist/primitives.js.map +1 -1
- package/dist/retry.d.ts +303 -0
- package/dist/retry.d.ts.map +1 -0
- package/dist/retry.js +539 -0
- package/dist/retry.js.map +1 -0
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +1 -9
- package/dist/schema.js.map +1 -1
- package/dist/tool-orchestration.d.ts +391 -0
- package/dist/tool-orchestration.d.ts.map +1 -0
- package/dist/tool-orchestration.js +663 -0
- package/dist/tool-orchestration.js.map +1 -0
- package/dist/types.d.ts +50 -33
- package/dist/types.d.ts.map +1 -1
- package/evalite.config.js +14 -0
- package/evals/classification.eval.js +97 -0
- package/evals/marketing.eval.js +289 -0
- package/evals/math.eval.js +83 -0
- package/evals/run-evals.js +151 -0
- package/evals/structured-output.eval.js +131 -0
- package/evals/writing.eval.js +105 -0
- package/examples/batch-blog-posts.js +128 -0
- package/package.json +26 -26
- package/src/ai-promise.ts +359 -3
- package/src/ai.ts +155 -110
- package/src/batch/anthropic.js +256 -0
- package/src/batch/bedrock.js +584 -0
- package/src/batch/cloudflare.js +287 -0
- package/src/batch/google.js +359 -0
- package/src/batch/index.js +30 -0
- package/src/batch/memory.js +187 -0
- package/src/batch/openai.js +402 -0
- package/src/batch-map.ts +46 -4
- package/src/batch-queue.ts +116 -12
- package/src/budget.ts +727 -0
- package/src/cache.ts +653 -0
- package/src/context.ts +33 -1
- package/src/eval/index.js +7 -0
- package/src/eval/models.js +119 -0
- package/src/eval/runner.js +147 -0
- package/src/eval/runner.ts +3 -2
- package/src/generate.ts +7 -12
- package/src/index.ts +231 -53
- package/src/primitives.ts +19 -1
- package/src/retry.ts +776 -0
- package/src/schema.ts +1 -10
- package/src/tool-orchestration.ts +1008 -0
- package/src/types.ts +59 -41
- package/test/ai-proxy.test.js +157 -0
- package/test/async-iterators.test.js +261 -0
- package/test/backward-compat.test.ts +147 -0
- package/test/batch-autosubmit-errors.test.ts +598 -0
- package/test/batch-background.test.js +352 -0
- package/test/batch-blog-posts.test.js +293 -0
- package/test/blog-generation.test.js +390 -0
- package/test/browse-read.test.js +480 -0
- package/test/budget-tracking.test.ts +800 -0
- package/test/cache.test.ts +712 -0
- package/test/context-isolation.test.ts +687 -0
- package/test/core-functions.test.js +490 -0
- package/test/decide.test.js +260 -0
- package/test/define.test.js +232 -0
- package/test/e2e-bedrock-manual.js +136 -0
- package/test/e2e-bedrock.test.js +164 -0
- package/test/e2e-flex-gateway.js +131 -0
- package/test/e2e-flex-manual.js +156 -0
- package/test/e2e-flex.test.js +174 -0
- package/test/e2e-google-manual.js +150 -0
- package/test/e2e-google.test.js +181 -0
- package/test/embeddings.test.js +220 -0
- package/test/evals/define-function.eval.test.js +309 -0
- package/test/evals/deterministic.eval.test.ts +376 -0
- package/test/evals/primitives.eval.test.js +360 -0
- package/test/function-types.test.js +407 -0
- package/test/generate-core.test.js +213 -0
- package/test/generate.test.js +143 -0
- package/test/generic-order.test.ts +342 -0
- package/test/implicit-batch.test.js +326 -0
- package/test/json-parse-error-handling.test.ts +463 -0
- package/test/retry.test.ts +1016 -0
- package/test/schema.test.js +96 -0
- package/test/streaming.test.ts +316 -0
- package/test/tagged-templates.test.js +240 -0
- package/test/tool-orchestration.test.ts +770 -0
- package/vitest.config.js +39 -0
|
@@ -0,0 +1,770 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for agentic tool orchestration
|
|
3
|
+
*
|
|
4
|
+
* These tests cover multi-turn model→tools→model loops for complex AI workflows.
|
|
5
|
+
* Tests are written first (TDD RED phase) - implementation follows.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
|
|
9
|
+
import { z } from 'zod'
|
|
10
|
+
|
|
11
|
+
// Import types and classes we'll implement
|
|
12
|
+
import {
|
|
13
|
+
AgenticLoop,
|
|
14
|
+
ToolRouter,
|
|
15
|
+
ToolValidator,
|
|
16
|
+
type Tool,
|
|
17
|
+
type ToolResult,
|
|
18
|
+
type LoopOptions,
|
|
19
|
+
type LoopResult,
|
|
20
|
+
type ValidationResult,
|
|
21
|
+
} from '../src/tool-orchestration.js'
|
|
22
|
+
|
|
23
|
+
// Mock model for testing
|
|
24
|
+
const createMockModel = () => ({
|
|
25
|
+
generate: vi.fn(),
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
// Sample tools for testing
|
|
29
|
+
const calculatorTool: Tool = {
|
|
30
|
+
name: 'calculator',
|
|
31
|
+
description: 'Performs basic math operations',
|
|
32
|
+
parameters: z.object({
|
|
33
|
+
operation: z.enum(['add', 'subtract', 'multiply', 'divide']),
|
|
34
|
+
a: z.number(),
|
|
35
|
+
b: z.number(),
|
|
36
|
+
}),
|
|
37
|
+
execute: async ({ operation, a, b }) => {
|
|
38
|
+
switch (operation) {
|
|
39
|
+
case 'add': return a + b
|
|
40
|
+
case 'subtract': return a - b
|
|
41
|
+
case 'multiply': return a * b
|
|
42
|
+
case 'divide': return b !== 0 ? a / b : 'Division by zero'
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const fetchTool: Tool = {
|
|
48
|
+
name: 'fetch',
|
|
49
|
+
description: 'Fetches data from a URL',
|
|
50
|
+
parameters: z.object({
|
|
51
|
+
url: z.string().url(),
|
|
52
|
+
}),
|
|
53
|
+
execute: async ({ url }) => {
|
|
54
|
+
return { data: `Content from ${url}`, status: 200 }
|
|
55
|
+
},
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const slowTool: Tool = {
|
|
59
|
+
name: 'slow',
|
|
60
|
+
description: 'A tool that takes time to execute',
|
|
61
|
+
parameters: z.object({
|
|
62
|
+
delay: z.number(),
|
|
63
|
+
}),
|
|
64
|
+
execute: async ({ delay }) => {
|
|
65
|
+
await new Promise(resolve => setTimeout(resolve, delay))
|
|
66
|
+
return 'completed'
|
|
67
|
+
},
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const failingTool: Tool = {
|
|
71
|
+
name: 'failing',
|
|
72
|
+
description: 'A tool that always fails',
|
|
73
|
+
parameters: z.object({
|
|
74
|
+
message: z.string(),
|
|
75
|
+
}),
|
|
76
|
+
execute: async ({ message }) => {
|
|
77
|
+
throw new Error(`Tool failed: ${message}`)
|
|
78
|
+
},
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ============================================================================
|
|
82
|
+
// AgenticLoop Tests - Multi-turn model→tools→model loops
|
|
83
|
+
// ============================================================================
|
|
84
|
+
|
|
85
|
+
describe('AgenticLoop', () => {
|
|
86
|
+
describe('basic loop execution', () => {
|
|
87
|
+
it('should execute a single tool call and return result', async () => {
|
|
88
|
+
const loop = new AgenticLoop({
|
|
89
|
+
tools: [calculatorTool],
|
|
90
|
+
maxSteps: 5,
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
// Mock model response that calls calculator then finishes
|
|
94
|
+
const mockGenerate = vi.fn()
|
|
95
|
+
.mockResolvedValueOnce({
|
|
96
|
+
toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 5, b: 3 } }],
|
|
97
|
+
finishReason: 'tool_call',
|
|
98
|
+
})
|
|
99
|
+
.mockResolvedValueOnce({
|
|
100
|
+
text: 'The result of 5 + 3 is 8',
|
|
101
|
+
finishReason: 'stop',
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
const result = await loop.run({
|
|
105
|
+
model: { generate: mockGenerate } as any,
|
|
106
|
+
prompt: 'What is 5 + 3?',
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
expect(result.text).toContain('8')
|
|
110
|
+
expect(result.steps).toBe(2)
|
|
111
|
+
expect(result.toolCalls).toHaveLength(1)
|
|
112
|
+
expect(result.toolCalls[0].name).toBe('calculator')
|
|
113
|
+
expect(result.toolCalls[0].result).toBe(8)
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
it('should handle multiple sequential tool calls', async () => {
|
|
117
|
+
const loop = new AgenticLoop({
|
|
118
|
+
tools: [calculatorTool],
|
|
119
|
+
maxSteps: 10,
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
const mockGenerate = vi.fn()
|
|
123
|
+
.mockResolvedValueOnce({
|
|
124
|
+
toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 5, b: 3 } }],
|
|
125
|
+
finishReason: 'tool_call',
|
|
126
|
+
})
|
|
127
|
+
.mockResolvedValueOnce({
|
|
128
|
+
toolCalls: [{ name: 'calculator', arguments: { operation: 'multiply', a: 8, b: 2 } }],
|
|
129
|
+
finishReason: 'tool_call',
|
|
130
|
+
})
|
|
131
|
+
.mockResolvedValueOnce({
|
|
132
|
+
text: '5 + 3 = 8, then 8 * 2 = 16',
|
|
133
|
+
finishReason: 'stop',
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
const result = await loop.run({
|
|
137
|
+
model: { generate: mockGenerate } as any,
|
|
138
|
+
prompt: 'Add 5 and 3, then multiply by 2',
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
expect(result.steps).toBe(3)
|
|
142
|
+
expect(result.toolCalls).toHaveLength(2)
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
it('should preserve conversation state across turns', async () => {
|
|
146
|
+
const loop = new AgenticLoop({
|
|
147
|
+
tools: [calculatorTool],
|
|
148
|
+
maxSteps: 5,
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
const mockGenerate = vi.fn()
|
|
152
|
+
.mockResolvedValueOnce({
|
|
153
|
+
toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 10, b: 5 } }],
|
|
154
|
+
finishReason: 'tool_call',
|
|
155
|
+
})
|
|
156
|
+
.mockResolvedValueOnce({
|
|
157
|
+
text: 'Done',
|
|
158
|
+
finishReason: 'stop',
|
|
159
|
+
})
|
|
160
|
+
|
|
161
|
+
const result = await loop.run({
|
|
162
|
+
model: { generate: mockGenerate } as any,
|
|
163
|
+
prompt: 'Calculate 10 + 5',
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
// Verify the second call received the tool result in messages
|
|
167
|
+
const secondCall = mockGenerate.mock.calls[1][0]
|
|
168
|
+
expect(secondCall.messages).toBeDefined()
|
|
169
|
+
expect(secondCall.messages.some((m: any) =>
|
|
170
|
+
m.role === 'tool' && m.content.includes('15')
|
|
171
|
+
)).toBe(true)
|
|
172
|
+
})
|
|
173
|
+
})
|
|
174
|
+
|
|
175
|
+
describe('maxSteps limit enforcement', () => {
|
|
176
|
+
it('should stop at maxSteps limit', async () => {
|
|
177
|
+
const loop = new AgenticLoop({
|
|
178
|
+
tools: [calculatorTool],
|
|
179
|
+
maxSteps: 3,
|
|
180
|
+
})
|
|
181
|
+
|
|
182
|
+
// Model keeps calling tools indefinitely
|
|
183
|
+
const mockGenerate = vi.fn().mockResolvedValue({
|
|
184
|
+
toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 1 } }],
|
|
185
|
+
finishReason: 'tool_call',
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
const result = await loop.run({
|
|
189
|
+
model: { generate: mockGenerate } as any,
|
|
190
|
+
prompt: 'Keep adding',
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
expect(result.steps).toBe(3)
|
|
194
|
+
expect(result.stopReason).toBe('max_steps')
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
it('should throw error when maxSteps is exceeded in strict mode', async () => {
|
|
198
|
+
const loop = new AgenticLoop({
|
|
199
|
+
tools: [calculatorTool],
|
|
200
|
+
maxSteps: 2,
|
|
201
|
+
strictMaxSteps: true,
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
const mockGenerate = vi.fn().mockResolvedValue({
|
|
205
|
+
toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 1 } }],
|
|
206
|
+
finishReason: 'tool_call',
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
await expect(loop.run({
|
|
210
|
+
model: { generate: mockGenerate } as any,
|
|
211
|
+
prompt: 'Keep adding',
|
|
212
|
+
})).rejects.toThrow('Max steps exceeded')
|
|
213
|
+
})
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
describe('parallel tool execution', () => {
|
|
217
|
+
it('should execute multiple tool calls in parallel', async () => {
|
|
218
|
+
const timingTool: Tool = {
|
|
219
|
+
name: 'timing',
|
|
220
|
+
description: 'Returns execution order',
|
|
221
|
+
parameters: z.object({ id: z.string() }),
|
|
222
|
+
execute: async ({ id }) => {
|
|
223
|
+
await new Promise(r => setTimeout(r, 10))
|
|
224
|
+
return { id, time: Date.now() }
|
|
225
|
+
},
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const loop = new AgenticLoop({
|
|
229
|
+
tools: [timingTool],
|
|
230
|
+
maxSteps: 5,
|
|
231
|
+
parallelExecution: true,
|
|
232
|
+
})
|
|
233
|
+
|
|
234
|
+
const mockGenerate = vi.fn()
|
|
235
|
+
.mockResolvedValueOnce({
|
|
236
|
+
toolCalls: [
|
|
237
|
+
{ name: 'timing', arguments: { id: 'a' } },
|
|
238
|
+
{ name: 'timing', arguments: { id: 'b' } },
|
|
239
|
+
{ name: 'timing', arguments: { id: 'c' } },
|
|
240
|
+
],
|
|
241
|
+
finishReason: 'tool_call',
|
|
242
|
+
})
|
|
243
|
+
.mockResolvedValueOnce({
|
|
244
|
+
text: 'All done',
|
|
245
|
+
finishReason: 'stop',
|
|
246
|
+
})
|
|
247
|
+
|
|
248
|
+
const startTime = Date.now()
|
|
249
|
+
const result = await loop.run({
|
|
250
|
+
model: { generate: mockGenerate } as any,
|
|
251
|
+
prompt: 'Run all',
|
|
252
|
+
})
|
|
253
|
+
const elapsed = Date.now() - startTime
|
|
254
|
+
|
|
255
|
+
expect(result.toolCalls).toHaveLength(3)
|
|
256
|
+
// Parallel execution should be faster than sequential (3 * 10ms)
|
|
257
|
+
expect(elapsed).toBeLessThan(40)
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
it('should respect parallel execution limit', async () => {
|
|
261
|
+
const executionOrder: string[] = []
|
|
262
|
+
const trackingTool: Tool = {
|
|
263
|
+
name: 'track',
|
|
264
|
+
description: 'Tracks execution',
|
|
265
|
+
parameters: z.object({ id: z.string() }),
|
|
266
|
+
execute: async ({ id }) => {
|
|
267
|
+
executionOrder.push(`start:${id}`)
|
|
268
|
+
await new Promise(r => setTimeout(r, 20))
|
|
269
|
+
executionOrder.push(`end:${id}`)
|
|
270
|
+
return id
|
|
271
|
+
},
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const loop = new AgenticLoop({
|
|
275
|
+
tools: [trackingTool],
|
|
276
|
+
maxSteps: 5,
|
|
277
|
+
parallelExecution: true,
|
|
278
|
+
maxParallelCalls: 2,
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
const mockGenerate = vi.fn()
|
|
282
|
+
.mockResolvedValueOnce({
|
|
283
|
+
toolCalls: [
|
|
284
|
+
{ name: 'track', arguments: { id: '1' } },
|
|
285
|
+
{ name: 'track', arguments: { id: '2' } },
|
|
286
|
+
{ name: 'track', arguments: { id: '3' } },
|
|
287
|
+
{ name: 'track', arguments: { id: '4' } },
|
|
288
|
+
],
|
|
289
|
+
finishReason: 'tool_call',
|
|
290
|
+
})
|
|
291
|
+
.mockResolvedValueOnce({
|
|
292
|
+
text: 'Done',
|
|
293
|
+
finishReason: 'stop',
|
|
294
|
+
})
|
|
295
|
+
|
|
296
|
+
await loop.run({
|
|
297
|
+
model: { generate: mockGenerate } as any,
|
|
298
|
+
prompt: 'Track all',
|
|
299
|
+
})
|
|
300
|
+
|
|
301
|
+
// With maxParallelCalls: 2, at most 2 should start before any ends
|
|
302
|
+
let concurrentStarts = 0
|
|
303
|
+
let maxConcurrent = 0
|
|
304
|
+
for (const event of executionOrder) {
|
|
305
|
+
if (event.startsWith('start:')) concurrentStarts++
|
|
306
|
+
else concurrentStarts--
|
|
307
|
+
maxConcurrent = Math.max(maxConcurrent, concurrentStarts)
|
|
308
|
+
}
|
|
309
|
+
expect(maxConcurrent).toBeLessThanOrEqual(2)
|
|
310
|
+
})
|
|
311
|
+
})
|
|
312
|
+
|
|
313
|
+
describe('abort signal support', () => {
|
|
314
|
+
it('should abort execution when signal is triggered', async () => {
|
|
315
|
+
const loop = new AgenticLoop({
|
|
316
|
+
tools: [slowTool],
|
|
317
|
+
maxSteps: 10,
|
|
318
|
+
})
|
|
319
|
+
|
|
320
|
+
const controller = new AbortController()
|
|
321
|
+
const mockGenerate = vi.fn().mockResolvedValue({
|
|
322
|
+
toolCalls: [{ name: 'slow', arguments: { delay: 1000 } }],
|
|
323
|
+
finishReason: 'tool_call',
|
|
324
|
+
})
|
|
325
|
+
|
|
326
|
+
// Abort after 50ms
|
|
327
|
+
setTimeout(() => controller.abort(), 50)
|
|
328
|
+
|
|
329
|
+
await expect(loop.run({
|
|
330
|
+
model: { generate: mockGenerate } as any,
|
|
331
|
+
prompt: 'Run slow tool',
|
|
332
|
+
abortSignal: controller.signal,
|
|
333
|
+
})).rejects.toThrow('Aborted')
|
|
334
|
+
})
|
|
335
|
+
})
|
|
336
|
+
|
|
337
|
+
describe('onStep callback', () => {
|
|
338
|
+
it('should call onStep for each loop iteration', async () => {
|
|
339
|
+
const steps: any[] = []
|
|
340
|
+
const loop = new AgenticLoop({
|
|
341
|
+
tools: [calculatorTool],
|
|
342
|
+
maxSteps: 5,
|
|
343
|
+
onStep: (step) => { steps.push(step) },
|
|
344
|
+
})
|
|
345
|
+
|
|
346
|
+
const mockGenerate = vi.fn()
|
|
347
|
+
.mockResolvedValueOnce({
|
|
348
|
+
toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } }],
|
|
349
|
+
finishReason: 'tool_call',
|
|
350
|
+
})
|
|
351
|
+
.mockResolvedValueOnce({
|
|
352
|
+
text: 'Result is 3',
|
|
353
|
+
finishReason: 'stop',
|
|
354
|
+
})
|
|
355
|
+
|
|
356
|
+
await loop.run({
|
|
357
|
+
model: { generate: mockGenerate } as any,
|
|
358
|
+
prompt: 'Add',
|
|
359
|
+
})
|
|
360
|
+
|
|
361
|
+
expect(steps).toHaveLength(2)
|
|
362
|
+
expect(steps[0].stepNumber).toBe(1)
|
|
363
|
+
expect(steps[0].toolCalls).toHaveLength(1)
|
|
364
|
+
expect(steps[1].stepNumber).toBe(2)
|
|
365
|
+
})
|
|
366
|
+
})
|
|
367
|
+
})
|
|
368
|
+
|
|
369
|
+
// ============================================================================
|
|
370
|
+
// ToolRouter Tests - Routing tool calls to handlers
|
|
371
|
+
// ============================================================================
|
|
372
|
+
|
|
373
|
+
describe('ToolRouter', () => {
|
|
374
|
+
describe('tool registration and routing', () => {
|
|
375
|
+
it('should register and route to correct tool', async () => {
|
|
376
|
+
const router = new ToolRouter()
|
|
377
|
+
router.register(calculatorTool)
|
|
378
|
+
router.register(fetchTool)
|
|
379
|
+
|
|
380
|
+
const result = await router.route({
|
|
381
|
+
name: 'calculator',
|
|
382
|
+
arguments: { operation: 'multiply', a: 6, b: 7 },
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
expect(result.success).toBe(true)
|
|
386
|
+
expect(result.result).toBe(42)
|
|
387
|
+
})
|
|
388
|
+
|
|
389
|
+
it('should return error for unknown tool', async () => {
|
|
390
|
+
const router = new ToolRouter()
|
|
391
|
+
router.register(calculatorTool)
|
|
392
|
+
|
|
393
|
+
const result = await router.route({
|
|
394
|
+
name: 'unknown_tool',
|
|
395
|
+
arguments: {},
|
|
396
|
+
})
|
|
397
|
+
|
|
398
|
+
expect(result.success).toBe(false)
|
|
399
|
+
expect(result.error).toContain('not found')
|
|
400
|
+
})
|
|
401
|
+
|
|
402
|
+
it('should route multiple calls in order', async () => {
|
|
403
|
+
const router = new ToolRouter()
|
|
404
|
+
router.register(calculatorTool)
|
|
405
|
+
|
|
406
|
+
const calls = [
|
|
407
|
+
{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } },
|
|
408
|
+
{ name: 'calculator', arguments: { operation: 'multiply', a: 3, b: 4 } },
|
|
409
|
+
]
|
|
410
|
+
|
|
411
|
+
const results = await router.routeAll(calls)
|
|
412
|
+
|
|
413
|
+
expect(results).toHaveLength(2)
|
|
414
|
+
expect(results[0].result).toBe(3)
|
|
415
|
+
expect(results[1].result).toBe(12)
|
|
416
|
+
})
|
|
417
|
+
})
|
|
418
|
+
|
|
419
|
+
describe('tool result formatting', () => {
|
|
420
|
+
it('should format tool results for model consumption', async () => {
|
|
421
|
+
const router = new ToolRouter()
|
|
422
|
+
router.register(calculatorTool)
|
|
423
|
+
|
|
424
|
+
const result = await router.route({
|
|
425
|
+
name: 'calculator',
|
|
426
|
+
arguments: { operation: 'add', a: 10, b: 20 },
|
|
427
|
+
})
|
|
428
|
+
|
|
429
|
+
const formatted = router.formatResult(result)
|
|
430
|
+
expect(formatted.role).toBe('tool')
|
|
431
|
+
expect(formatted.content).toContain('30')
|
|
432
|
+
})
|
|
433
|
+
|
|
434
|
+
it('should format error results appropriately', async () => {
|
|
435
|
+
const router = new ToolRouter()
|
|
436
|
+
router.register(failingTool)
|
|
437
|
+
|
|
438
|
+
const result = await router.route({
|
|
439
|
+
name: 'failing',
|
|
440
|
+
arguments: { message: 'test error' },
|
|
441
|
+
})
|
|
442
|
+
|
|
443
|
+
const formatted = router.formatResult(result)
|
|
444
|
+
expect(formatted.role).toBe('tool')
|
|
445
|
+
expect(formatted.content).toContain('error')
|
|
446
|
+
expect(formatted.isError).toBe(true)
|
|
447
|
+
})
|
|
448
|
+
})
|
|
449
|
+
|
|
450
|
+
describe('parallel routing', () => {
|
|
451
|
+
it('should route multiple calls in parallel', async () => {
|
|
452
|
+
const router = new ToolRouter()
|
|
453
|
+
const executionTimes: number[] = []
|
|
454
|
+
|
|
455
|
+
const timingTool: Tool = {
|
|
456
|
+
name: 'time',
|
|
457
|
+
description: 'Records time',
|
|
458
|
+
parameters: z.object({ id: z.number() }),
|
|
459
|
+
execute: async ({ id }) => {
|
|
460
|
+
await new Promise(r => setTimeout(r, 20))
|
|
461
|
+
executionTimes.push(Date.now())
|
|
462
|
+
return id
|
|
463
|
+
},
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
router.register(timingTool)
|
|
467
|
+
|
|
468
|
+
const startTime = Date.now()
|
|
469
|
+
await router.routeAllParallel([
|
|
470
|
+
{ name: 'time', arguments: { id: 1 } },
|
|
471
|
+
{ name: 'time', arguments: { id: 2 } },
|
|
472
|
+
{ name: 'time', arguments: { id: 3 } },
|
|
473
|
+
])
|
|
474
|
+
const elapsed = Date.now() - startTime
|
|
475
|
+
|
|
476
|
+
// Should complete in ~20ms, not 60ms
|
|
477
|
+
expect(elapsed).toBeLessThan(50)
|
|
478
|
+
})
|
|
479
|
+
})
|
|
480
|
+
})
|
|
481
|
+
|
|
482
|
+
// ============================================================================
|
|
483
|
+
// ToolValidator Tests - Pre-execution validation
|
|
484
|
+
// ============================================================================
|
|
485
|
+
|
|
486
|
+
describe('ToolValidator', () => {
|
|
487
|
+
describe('argument validation', () => {
|
|
488
|
+
it('should validate arguments against tool schema', () => {
|
|
489
|
+
const validator = new ToolValidator()
|
|
490
|
+
validator.register(calculatorTool)
|
|
491
|
+
|
|
492
|
+
const result = validator.validate('calculator', {
|
|
493
|
+
operation: 'add',
|
|
494
|
+
a: 5,
|
|
495
|
+
b: 10,
|
|
496
|
+
})
|
|
497
|
+
|
|
498
|
+
expect(result.valid).toBe(true)
|
|
499
|
+
})
|
|
500
|
+
|
|
501
|
+
it('should reject invalid arguments', () => {
|
|
502
|
+
const validator = new ToolValidator()
|
|
503
|
+
validator.register(calculatorTool)
|
|
504
|
+
|
|
505
|
+
const result = validator.validate('calculator', {
|
|
506
|
+
operation: 'invalid_op',
|
|
507
|
+
a: 'not a number',
|
|
508
|
+
b: 10,
|
|
509
|
+
})
|
|
510
|
+
|
|
511
|
+
expect(result.valid).toBe(false)
|
|
512
|
+
expect(result.errors).toBeDefined()
|
|
513
|
+
expect(result.errors!.length).toBeGreaterThan(0)
|
|
514
|
+
})
|
|
515
|
+
|
|
516
|
+
it('should reject missing required arguments', () => {
|
|
517
|
+
const validator = new ToolValidator()
|
|
518
|
+
validator.register(calculatorTool)
|
|
519
|
+
|
|
520
|
+
const result = validator.validate('calculator', {
|
|
521
|
+
operation: 'add',
|
|
522
|
+
a: 5,
|
|
523
|
+
// missing 'b'
|
|
524
|
+
})
|
|
525
|
+
|
|
526
|
+
expect(result.valid).toBe(false)
|
|
527
|
+
})
|
|
528
|
+
|
|
529
|
+
it('should return error for unknown tool', () => {
|
|
530
|
+
const validator = new ToolValidator()
|
|
531
|
+
|
|
532
|
+
const result = validator.validate('unknown', { foo: 'bar' })
|
|
533
|
+
|
|
534
|
+
expect(result.valid).toBe(false)
|
|
535
|
+
expect(result.errors![0]).toContain('not registered')
|
|
536
|
+
})
|
|
537
|
+
})
|
|
538
|
+
|
|
539
|
+
describe('batch validation', () => {
|
|
540
|
+
it('should validate multiple tool calls at once', () => {
|
|
541
|
+
const validator = new ToolValidator()
|
|
542
|
+
validator.register(calculatorTool)
|
|
543
|
+
validator.register(fetchTool)
|
|
544
|
+
|
|
545
|
+
const results = validator.validateAll([
|
|
546
|
+
{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } },
|
|
547
|
+
{ name: 'fetch', arguments: { url: 'https://example.com' } },
|
|
548
|
+
{ name: 'calculator', arguments: { operation: 'bad', a: 1, b: 2 } },
|
|
549
|
+
])
|
|
550
|
+
|
|
551
|
+
expect(results).toHaveLength(3)
|
|
552
|
+
expect(results[0].valid).toBe(true)
|
|
553
|
+
expect(results[1].valid).toBe(true)
|
|
554
|
+
expect(results[2].valid).toBe(false)
|
|
555
|
+
})
|
|
556
|
+
})
|
|
557
|
+
})
|
|
558
|
+
|
|
559
|
+
// ============================================================================
|
|
560
|
+
// Tool Error Recovery Tests
|
|
561
|
+
// ============================================================================
|
|
562
|
+
|
|
563
|
+
describe('Tool Error Recovery', () => {
|
|
564
|
+
describe('error handling', () => {
|
|
565
|
+
it('should catch and report tool execution errors', async () => {
|
|
566
|
+
const loop = new AgenticLoop({
|
|
567
|
+
tools: [failingTool, calculatorTool],
|
|
568
|
+
maxSteps: 5,
|
|
569
|
+
})
|
|
570
|
+
|
|
571
|
+
const mockGenerate = vi.fn()
|
|
572
|
+
.mockResolvedValueOnce({
|
|
573
|
+
toolCalls: [{ name: 'failing', arguments: { message: 'test' } }],
|
|
574
|
+
finishReason: 'tool_call',
|
|
575
|
+
})
|
|
576
|
+
.mockResolvedValueOnce({
|
|
577
|
+
text: 'Tool failed, moving on',
|
|
578
|
+
finishReason: 'stop',
|
|
579
|
+
})
|
|
580
|
+
|
|
581
|
+
const result = await loop.run({
|
|
582
|
+
model: { generate: mockGenerate } as any,
|
|
583
|
+
prompt: 'Try the failing tool',
|
|
584
|
+
})
|
|
585
|
+
|
|
586
|
+
expect(result.toolCalls[0].error).toBeDefined()
|
|
587
|
+
expect(result.toolCalls[0].error).toContain('Tool failed')
|
|
588
|
+
})
|
|
589
|
+
|
|
590
|
+
it('should retry failed tool calls when retry is enabled', async () => {
|
|
591
|
+
let attempts = 0
|
|
592
|
+
const flakeyTool: Tool = {
|
|
593
|
+
name: 'flakey',
|
|
594
|
+
description: 'Fails first attempt',
|
|
595
|
+
parameters: z.object({}),
|
|
596
|
+
execute: async () => {
|
|
597
|
+
attempts++
|
|
598
|
+
if (attempts < 2) throw new Error('First attempt fails')
|
|
599
|
+
return 'success'
|
|
600
|
+
},
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
const loop = new AgenticLoop({
|
|
604
|
+
tools: [flakeyTool],
|
|
605
|
+
maxSteps: 5,
|
|
606
|
+
retryFailedTools: true,
|
|
607
|
+
maxToolRetries: 3,
|
|
608
|
+
})
|
|
609
|
+
|
|
610
|
+
const mockGenerate = vi.fn()
|
|
611
|
+
.mockResolvedValueOnce({
|
|
612
|
+
toolCalls: [{ name: 'flakey', arguments: {} }],
|
|
613
|
+
finishReason: 'tool_call',
|
|
614
|
+
})
|
|
615
|
+
.mockResolvedValueOnce({
|
|
616
|
+
text: 'Got success',
|
|
617
|
+
finishReason: 'stop',
|
|
618
|
+
})
|
|
619
|
+
|
|
620
|
+
const result = await loop.run({
|
|
621
|
+
model: { generate: mockGenerate } as any,
|
|
622
|
+
prompt: 'Use flakey tool',
|
|
623
|
+
})
|
|
624
|
+
|
|
625
|
+
expect(result.toolCalls[0].result).toBe('success')
|
|
626
|
+
expect(result.toolCalls[0].retryCount).toBe(1)
|
|
627
|
+
})
|
|
628
|
+
})
|
|
629
|
+
|
|
630
|
+
describe('graceful degradation', () => {
|
|
631
|
+
it('should continue with partial results when tools fail', async () => {
|
|
632
|
+
const loop = new AgenticLoop({
|
|
633
|
+
tools: [calculatorTool, failingTool],
|
|
634
|
+
maxSteps: 5,
|
|
635
|
+
continueOnError: true,
|
|
636
|
+
})
|
|
637
|
+
|
|
638
|
+
const mockGenerate = vi.fn()
|
|
639
|
+
.mockResolvedValueOnce({
|
|
640
|
+
toolCalls: [
|
|
641
|
+
{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } },
|
|
642
|
+
{ name: 'failing', arguments: { message: 'error' } },
|
|
643
|
+
],
|
|
644
|
+
finishReason: 'tool_call',
|
|
645
|
+
})
|
|
646
|
+
.mockResolvedValueOnce({
|
|
647
|
+
text: 'Calculator worked, other failed',
|
|
648
|
+
finishReason: 'stop',
|
|
649
|
+
})
|
|
650
|
+
|
|
651
|
+
const result = await loop.run({
|
|
652
|
+
model: { generate: mockGenerate } as any,
|
|
653
|
+
prompt: 'Use both tools',
|
|
654
|
+
})
|
|
655
|
+
|
|
656
|
+
expect(result.toolCalls).toHaveLength(2)
|
|
657
|
+
expect(result.toolCalls[0].result).toBe(3)
|
|
658
|
+
expect(result.toolCalls[1].error).toBeDefined()
|
|
659
|
+
})
|
|
660
|
+
})
|
|
661
|
+
|
|
662
|
+
describe('timeout handling', () => {
|
|
663
|
+
it('should timeout long-running tools', async () => {
|
|
664
|
+
const loop = new AgenticLoop({
|
|
665
|
+
tools: [slowTool],
|
|
666
|
+
maxSteps: 5,
|
|
667
|
+
toolTimeout: 50, // 50ms timeout
|
|
668
|
+
})
|
|
669
|
+
|
|
670
|
+
const mockGenerate = vi.fn()
|
|
671
|
+
.mockResolvedValueOnce({
|
|
672
|
+
toolCalls: [{ name: 'slow', arguments: { delay: 1000 } }],
|
|
673
|
+
finishReason: 'tool_call',
|
|
674
|
+
})
|
|
675
|
+
.mockResolvedValueOnce({
|
|
676
|
+
text: 'Tool timed out',
|
|
677
|
+
finishReason: 'stop',
|
|
678
|
+
})
|
|
679
|
+
|
|
680
|
+
const result = await loop.run({
|
|
681
|
+
model: { generate: mockGenerate } as any,
|
|
682
|
+
prompt: 'Run slow tool',
|
|
683
|
+
})
|
|
684
|
+
|
|
685
|
+
expect(result.toolCalls[0].error).toContain('timeout')
|
|
686
|
+
})
|
|
687
|
+
})
|
|
688
|
+
})
|
|
689
|
+
|
|
690
|
+
// ============================================================================
|
|
691
|
+
// Integration with generateText Tests
|
|
692
|
+
// ============================================================================
|
|
693
|
+
|
|
694
|
+
describe('Integration with generateText', () => {
|
|
695
|
+
it('should work with AI SDK tool format', async () => {
|
|
696
|
+
const loop = new AgenticLoop({
|
|
697
|
+
tools: [calculatorTool],
|
|
698
|
+
maxSteps: 5,
|
|
699
|
+
})
|
|
700
|
+
|
|
701
|
+
// Verify tool conversion to AI SDK format
|
|
702
|
+
const sdkTools = loop.getToolsForSDK()
|
|
703
|
+
expect(sdkTools.calculator).toBeDefined()
|
|
704
|
+
expect(sdkTools.calculator.description).toBe('Performs basic math operations')
|
|
705
|
+
expect(sdkTools.calculator.parameters).toBeDefined()
|
|
706
|
+
})
|
|
707
|
+
|
|
708
|
+
it('should expose tool results through experimental_toolResultContent', async () => {
|
|
709
|
+
const loop = new AgenticLoop({
|
|
710
|
+
tools: [calculatorTool],
|
|
711
|
+
maxSteps: 5,
|
|
712
|
+
})
|
|
713
|
+
|
|
714
|
+
const mockGenerate = vi.fn()
|
|
715
|
+
.mockResolvedValueOnce({
|
|
716
|
+
toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 2, b: 3 } }],
|
|
717
|
+
finishReason: 'tool_call',
|
|
718
|
+
})
|
|
719
|
+
.mockResolvedValueOnce({
|
|
720
|
+
text: '5',
|
|
721
|
+
finishReason: 'stop',
|
|
722
|
+
})
|
|
723
|
+
|
|
724
|
+
const result = await loop.run({
|
|
725
|
+
model: { generate: mockGenerate } as any,
|
|
726
|
+
prompt: 'Add 2 + 3',
|
|
727
|
+
})
|
|
728
|
+
|
|
729
|
+
// Verify tool results are exposed in a format compatible with AI SDK
|
|
730
|
+
expect(result.toolResults).toBeDefined()
|
|
731
|
+
expect(result.toolResults[0].toolName).toBe('calculator')
|
|
732
|
+
expect(result.toolResults[0].result).toBe(5)
|
|
733
|
+
})
|
|
734
|
+
})
|
|
735
|
+
|
|
736
|
+
// ============================================================================
|
|
737
|
+
// Token Usage Tracking Tests
|
|
738
|
+
// ============================================================================
|
|
739
|
+
|
|
740
|
+
describe('Token Usage Tracking', () => {
|
|
741
|
+
it('should track token usage across multi-turn conversations', async () => {
|
|
742
|
+
const loop = new AgenticLoop({
|
|
743
|
+
tools: [calculatorTool],
|
|
744
|
+
maxSteps: 5,
|
|
745
|
+
trackUsage: true,
|
|
746
|
+
})
|
|
747
|
+
|
|
748
|
+
const mockGenerate = vi.fn()
|
|
749
|
+
.mockResolvedValueOnce({
|
|
750
|
+
toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } }],
|
|
751
|
+
finishReason: 'tool_call',
|
|
752
|
+
usage: { promptTokens: 50, completionTokens: 20, totalTokens: 70 },
|
|
753
|
+
})
|
|
754
|
+
.mockResolvedValueOnce({
|
|
755
|
+
text: 'Result is 3',
|
|
756
|
+
finishReason: 'stop',
|
|
757
|
+
usage: { promptTokens: 80, completionTokens: 10, totalTokens: 90 },
|
|
758
|
+
})
|
|
759
|
+
|
|
760
|
+
const result = await loop.run({
|
|
761
|
+
model: { generate: mockGenerate } as any,
|
|
762
|
+
prompt: 'Add 1 + 2',
|
|
763
|
+
})
|
|
764
|
+
|
|
765
|
+
expect(result.usage).toBeDefined()
|
|
766
|
+
expect(result.usage!.promptTokens).toBe(130)
|
|
767
|
+
expect(result.usage!.completionTokens).toBe(30)
|
|
768
|
+
expect(result.usage!.totalTokens).toBe(160)
|
|
769
|
+
})
|
|
770
|
+
})
|