ai-functions 2.0.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/.turbo/turbo-build.log +4 -5
  2. package/CHANGELOG.md +38 -0
  3. package/LICENSE +21 -0
  4. package/README.md +361 -159
  5. package/dist/ai-promise.d.ts +47 -0
  6. package/dist/ai-promise.d.ts.map +1 -1
  7. package/dist/ai-promise.js +291 -3
  8. package/dist/ai-promise.js.map +1 -1
  9. package/dist/ai.d.ts +17 -18
  10. package/dist/ai.d.ts.map +1 -1
  11. package/dist/ai.js +93 -39
  12. package/dist/ai.js.map +1 -1
  13. package/dist/batch-map.d.ts +46 -4
  14. package/dist/batch-map.d.ts.map +1 -1
  15. package/dist/batch-map.js +35 -2
  16. package/dist/batch-map.js.map +1 -1
  17. package/dist/batch-queue.d.ts +116 -12
  18. package/dist/batch-queue.d.ts.map +1 -1
  19. package/dist/batch-queue.js +47 -2
  20. package/dist/batch-queue.js.map +1 -1
  21. package/dist/budget.d.ts +272 -0
  22. package/dist/budget.d.ts.map +1 -0
  23. package/dist/budget.js +500 -0
  24. package/dist/budget.js.map +1 -0
  25. package/dist/cache.d.ts +272 -0
  26. package/dist/cache.d.ts.map +1 -0
  27. package/dist/cache.js +412 -0
  28. package/dist/cache.js.map +1 -0
  29. package/dist/context.d.ts +32 -1
  30. package/dist/context.d.ts.map +1 -1
  31. package/dist/context.js +16 -1
  32. package/dist/context.js.map +1 -1
  33. package/dist/eval/runner.d.ts +2 -1
  34. package/dist/eval/runner.d.ts.map +1 -1
  35. package/dist/eval/runner.js.map +1 -1
  36. package/dist/generate.d.ts.map +1 -1
  37. package/dist/generate.js +6 -10
  38. package/dist/generate.js.map +1 -1
  39. package/dist/index.d.ts +27 -20
  40. package/dist/index.d.ts.map +1 -1
  41. package/dist/index.js +72 -42
  42. package/dist/index.js.map +1 -1
  43. package/dist/primitives.d.ts +17 -0
  44. package/dist/primitives.d.ts.map +1 -1
  45. package/dist/primitives.js +19 -1
  46. package/dist/primitives.js.map +1 -1
  47. package/dist/retry.d.ts +303 -0
  48. package/dist/retry.d.ts.map +1 -0
  49. package/dist/retry.js +539 -0
  50. package/dist/retry.js.map +1 -0
  51. package/dist/schema.d.ts.map +1 -1
  52. package/dist/schema.js +1 -9
  53. package/dist/schema.js.map +1 -1
  54. package/dist/tool-orchestration.d.ts +391 -0
  55. package/dist/tool-orchestration.d.ts.map +1 -0
  56. package/dist/tool-orchestration.js +663 -0
  57. package/dist/tool-orchestration.js.map +1 -0
  58. package/dist/types.d.ts +50 -33
  59. package/dist/types.d.ts.map +1 -1
  60. package/evalite.config.js +14 -0
  61. package/evals/classification.eval.js +97 -0
  62. package/evals/marketing.eval.js +289 -0
  63. package/evals/math.eval.js +83 -0
  64. package/evals/run-evals.js +151 -0
  65. package/evals/structured-output.eval.js +131 -0
  66. package/evals/writing.eval.js +105 -0
  67. package/examples/batch-blog-posts.js +128 -0
  68. package/package.json +26 -26
  69. package/src/ai-promise.ts +359 -3
  70. package/src/ai.ts +155 -110
  71. package/src/batch/anthropic.js +256 -0
  72. package/src/batch/bedrock.js +584 -0
  73. package/src/batch/cloudflare.js +287 -0
  74. package/src/batch/google.js +359 -0
  75. package/src/batch/index.js +30 -0
  76. package/src/batch/memory.js +187 -0
  77. package/src/batch/openai.js +402 -0
  78. package/src/batch-map.ts +46 -4
  79. package/src/batch-queue.ts +116 -12
  80. package/src/budget.ts +727 -0
  81. package/src/cache.ts +653 -0
  82. package/src/context.ts +33 -1
  83. package/src/eval/index.js +7 -0
  84. package/src/eval/models.js +119 -0
  85. package/src/eval/runner.js +147 -0
  86. package/src/eval/runner.ts +3 -2
  87. package/src/generate.ts +7 -12
  88. package/src/index.ts +231 -53
  89. package/src/primitives.ts +19 -1
  90. package/src/retry.ts +776 -0
  91. package/src/schema.ts +1 -10
  92. package/src/tool-orchestration.ts +1008 -0
  93. package/src/types.ts +59 -41
  94. package/test/ai-proxy.test.js +157 -0
  95. package/test/async-iterators.test.js +261 -0
  96. package/test/backward-compat.test.ts +147 -0
  97. package/test/batch-autosubmit-errors.test.ts +598 -0
  98. package/test/batch-background.test.js +352 -0
  99. package/test/batch-blog-posts.test.js +293 -0
  100. package/test/blog-generation.test.js +390 -0
  101. package/test/browse-read.test.js +480 -0
  102. package/test/budget-tracking.test.ts +800 -0
  103. package/test/cache.test.ts +712 -0
  104. package/test/context-isolation.test.ts +687 -0
  105. package/test/core-functions.test.js +490 -0
  106. package/test/decide.test.js +260 -0
  107. package/test/define.test.js +232 -0
  108. package/test/e2e-bedrock-manual.js +136 -0
  109. package/test/e2e-bedrock.test.js +164 -0
  110. package/test/e2e-flex-gateway.js +131 -0
  111. package/test/e2e-flex-manual.js +156 -0
  112. package/test/e2e-flex.test.js +174 -0
  113. package/test/e2e-google-manual.js +150 -0
  114. package/test/e2e-google.test.js +181 -0
  115. package/test/embeddings.test.js +220 -0
  116. package/test/evals/define-function.eval.test.js +309 -0
  117. package/test/evals/deterministic.eval.test.ts +376 -0
  118. package/test/evals/primitives.eval.test.js +360 -0
  119. package/test/function-types.test.js +407 -0
  120. package/test/generate-core.test.js +213 -0
  121. package/test/generate.test.js +143 -0
  122. package/test/generic-order.test.ts +342 -0
  123. package/test/implicit-batch.test.js +326 -0
  124. package/test/json-parse-error-handling.test.ts +463 -0
  125. package/test/retry.test.ts +1016 -0
  126. package/test/schema.test.js +96 -0
  127. package/test/streaming.test.ts +316 -0
  128. package/test/tagged-templates.test.js +240 -0
  129. package/test/tool-orchestration.test.ts +770 -0
  130. package/vitest.config.js +39 -0
@@ -0,0 +1,770 @@
1
+ /**
2
+ * Tests for agentic tool orchestration
3
+ *
4
+ * These tests cover multi-turn model→tools→model loops for complex AI workflows.
5
+ * Tests are written first (TDD RED phase) - implementation follows.
6
+ */
7
+
8
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
9
+ import { z } from 'zod'
10
+
11
+ // Import types and classes we'll implement
12
+ import {
13
+ AgenticLoop,
14
+ ToolRouter,
15
+ ToolValidator,
16
+ type Tool,
17
+ type ToolResult,
18
+ type LoopOptions,
19
+ type LoopResult,
20
+ type ValidationResult,
21
+ } from '../src/tool-orchestration.js'
22
+
23
+ // Mock model for testing
24
+ const createMockModel = () => ({
25
+ generate: vi.fn(),
26
+ })
27
+
28
+ // Sample tools for testing
29
+ const calculatorTool: Tool = {
30
+ name: 'calculator',
31
+ description: 'Performs basic math operations',
32
+ parameters: z.object({
33
+ operation: z.enum(['add', 'subtract', 'multiply', 'divide']),
34
+ a: z.number(),
35
+ b: z.number(),
36
+ }),
37
+ execute: async ({ operation, a, b }) => {
38
+ switch (operation) {
39
+ case 'add': return a + b
40
+ case 'subtract': return a - b
41
+ case 'multiply': return a * b
42
+ case 'divide': return b !== 0 ? a / b : 'Division by zero'
43
+ }
44
+ },
45
+ }
46
+
47
+ const fetchTool: Tool = {
48
+ name: 'fetch',
49
+ description: 'Fetches data from a URL',
50
+ parameters: z.object({
51
+ url: z.string().url(),
52
+ }),
53
+ execute: async ({ url }) => {
54
+ return { data: `Content from ${url}`, status: 200 }
55
+ },
56
+ }
57
+
58
+ const slowTool: Tool = {
59
+ name: 'slow',
60
+ description: 'A tool that takes time to execute',
61
+ parameters: z.object({
62
+ delay: z.number(),
63
+ }),
64
+ execute: async ({ delay }) => {
65
+ await new Promise(resolve => setTimeout(resolve, delay))
66
+ return 'completed'
67
+ },
68
+ }
69
+
70
+ const failingTool: Tool = {
71
+ name: 'failing',
72
+ description: 'A tool that always fails',
73
+ parameters: z.object({
74
+ message: z.string(),
75
+ }),
76
+ execute: async ({ message }) => {
77
+ throw new Error(`Tool failed: ${message}`)
78
+ },
79
+ }
80
+
81
+ // ============================================================================
82
+ // AgenticLoop Tests - Multi-turn model→tools→model loops
83
+ // ============================================================================
84
+
85
+ describe('AgenticLoop', () => {
86
+ describe('basic loop execution', () => {
87
+ it('should execute a single tool call and return result', async () => {
88
+ const loop = new AgenticLoop({
89
+ tools: [calculatorTool],
90
+ maxSteps: 5,
91
+ })
92
+
93
+ // Mock model response that calls calculator then finishes
94
+ const mockGenerate = vi.fn()
95
+ .mockResolvedValueOnce({
96
+ toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 5, b: 3 } }],
97
+ finishReason: 'tool_call',
98
+ })
99
+ .mockResolvedValueOnce({
100
+ text: 'The result of 5 + 3 is 8',
101
+ finishReason: 'stop',
102
+ })
103
+
104
+ const result = await loop.run({
105
+ model: { generate: mockGenerate } as any,
106
+ prompt: 'What is 5 + 3?',
107
+ })
108
+
109
+ expect(result.text).toContain('8')
110
+ expect(result.steps).toBe(2)
111
+ expect(result.toolCalls).toHaveLength(1)
112
+ expect(result.toolCalls[0].name).toBe('calculator')
113
+ expect(result.toolCalls[0].result).toBe(8)
114
+ })
115
+
116
+ it('should handle multiple sequential tool calls', async () => {
117
+ const loop = new AgenticLoop({
118
+ tools: [calculatorTool],
119
+ maxSteps: 10,
120
+ })
121
+
122
+ const mockGenerate = vi.fn()
123
+ .mockResolvedValueOnce({
124
+ toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 5, b: 3 } }],
125
+ finishReason: 'tool_call',
126
+ })
127
+ .mockResolvedValueOnce({
128
+ toolCalls: [{ name: 'calculator', arguments: { operation: 'multiply', a: 8, b: 2 } }],
129
+ finishReason: 'tool_call',
130
+ })
131
+ .mockResolvedValueOnce({
132
+ text: '5 + 3 = 8, then 8 * 2 = 16',
133
+ finishReason: 'stop',
134
+ })
135
+
136
+ const result = await loop.run({
137
+ model: { generate: mockGenerate } as any,
138
+ prompt: 'Add 5 and 3, then multiply by 2',
139
+ })
140
+
141
+ expect(result.steps).toBe(3)
142
+ expect(result.toolCalls).toHaveLength(2)
143
+ })
144
+
145
+ it('should preserve conversation state across turns', async () => {
146
+ const loop = new AgenticLoop({
147
+ tools: [calculatorTool],
148
+ maxSteps: 5,
149
+ })
150
+
151
+ const mockGenerate = vi.fn()
152
+ .mockResolvedValueOnce({
153
+ toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 10, b: 5 } }],
154
+ finishReason: 'tool_call',
155
+ })
156
+ .mockResolvedValueOnce({
157
+ text: 'Done',
158
+ finishReason: 'stop',
159
+ })
160
+
161
+ const result = await loop.run({
162
+ model: { generate: mockGenerate } as any,
163
+ prompt: 'Calculate 10 + 5',
164
+ })
165
+
166
+ // Verify the second call received the tool result in messages
167
+ const secondCall = mockGenerate.mock.calls[1][0]
168
+ expect(secondCall.messages).toBeDefined()
169
+ expect(secondCall.messages.some((m: any) =>
170
+ m.role === 'tool' && m.content.includes('15')
171
+ )).toBe(true)
172
+ })
173
+ })
174
+
175
+ describe('maxSteps limit enforcement', () => {
176
+ it('should stop at maxSteps limit', async () => {
177
+ const loop = new AgenticLoop({
178
+ tools: [calculatorTool],
179
+ maxSteps: 3,
180
+ })
181
+
182
+ // Model keeps calling tools indefinitely
183
+ const mockGenerate = vi.fn().mockResolvedValue({
184
+ toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 1 } }],
185
+ finishReason: 'tool_call',
186
+ })
187
+
188
+ const result = await loop.run({
189
+ model: { generate: mockGenerate } as any,
190
+ prompt: 'Keep adding',
191
+ })
192
+
193
+ expect(result.steps).toBe(3)
194
+ expect(result.stopReason).toBe('max_steps')
195
+ })
196
+
197
+ it('should throw error when maxSteps is exceeded in strict mode', async () => {
198
+ const loop = new AgenticLoop({
199
+ tools: [calculatorTool],
200
+ maxSteps: 2,
201
+ strictMaxSteps: true,
202
+ })
203
+
204
+ const mockGenerate = vi.fn().mockResolvedValue({
205
+ toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 1 } }],
206
+ finishReason: 'tool_call',
207
+ })
208
+
209
+ await expect(loop.run({
210
+ model: { generate: mockGenerate } as any,
211
+ prompt: 'Keep adding',
212
+ })).rejects.toThrow('Max steps exceeded')
213
+ })
214
+ })
215
+
216
+ describe('parallel tool execution', () => {
217
+ it('should execute multiple tool calls in parallel', async () => {
218
+ const timingTool: Tool = {
219
+ name: 'timing',
220
+ description: 'Returns execution order',
221
+ parameters: z.object({ id: z.string() }),
222
+ execute: async ({ id }) => {
223
+ await new Promise(r => setTimeout(r, 10))
224
+ return { id, time: Date.now() }
225
+ },
226
+ }
227
+
228
+ const loop = new AgenticLoop({
229
+ tools: [timingTool],
230
+ maxSteps: 5,
231
+ parallelExecution: true,
232
+ })
233
+
234
+ const mockGenerate = vi.fn()
235
+ .mockResolvedValueOnce({
236
+ toolCalls: [
237
+ { name: 'timing', arguments: { id: 'a' } },
238
+ { name: 'timing', arguments: { id: 'b' } },
239
+ { name: 'timing', arguments: { id: 'c' } },
240
+ ],
241
+ finishReason: 'tool_call',
242
+ })
243
+ .mockResolvedValueOnce({
244
+ text: 'All done',
245
+ finishReason: 'stop',
246
+ })
247
+
248
+ const startTime = Date.now()
249
+ const result = await loop.run({
250
+ model: { generate: mockGenerate } as any,
251
+ prompt: 'Run all',
252
+ })
253
+ const elapsed = Date.now() - startTime
254
+
255
+ expect(result.toolCalls).toHaveLength(3)
256
+ // Parallel execution should be faster than sequential (3 * 10ms)
257
+ expect(elapsed).toBeLessThan(40)
258
+ })
259
+
260
+ it('should respect parallel execution limit', async () => {
261
+ const executionOrder: string[] = []
262
+ const trackingTool: Tool = {
263
+ name: 'track',
264
+ description: 'Tracks execution',
265
+ parameters: z.object({ id: z.string() }),
266
+ execute: async ({ id }) => {
267
+ executionOrder.push(`start:${id}`)
268
+ await new Promise(r => setTimeout(r, 20))
269
+ executionOrder.push(`end:${id}`)
270
+ return id
271
+ },
272
+ }
273
+
274
+ const loop = new AgenticLoop({
275
+ tools: [trackingTool],
276
+ maxSteps: 5,
277
+ parallelExecution: true,
278
+ maxParallelCalls: 2,
279
+ })
280
+
281
+ const mockGenerate = vi.fn()
282
+ .mockResolvedValueOnce({
283
+ toolCalls: [
284
+ { name: 'track', arguments: { id: '1' } },
285
+ { name: 'track', arguments: { id: '2' } },
286
+ { name: 'track', arguments: { id: '3' } },
287
+ { name: 'track', arguments: { id: '4' } },
288
+ ],
289
+ finishReason: 'tool_call',
290
+ })
291
+ .mockResolvedValueOnce({
292
+ text: 'Done',
293
+ finishReason: 'stop',
294
+ })
295
+
296
+ await loop.run({
297
+ model: { generate: mockGenerate } as any,
298
+ prompt: 'Track all',
299
+ })
300
+
301
+ // With maxParallelCalls: 2, at most 2 should start before any ends
302
+ let concurrentStarts = 0
303
+ let maxConcurrent = 0
304
+ for (const event of executionOrder) {
305
+ if (event.startsWith('start:')) concurrentStarts++
306
+ else concurrentStarts--
307
+ maxConcurrent = Math.max(maxConcurrent, concurrentStarts)
308
+ }
309
+ expect(maxConcurrent).toBeLessThanOrEqual(2)
310
+ })
311
+ })
312
+
313
+ describe('abort signal support', () => {
314
+ it('should abort execution when signal is triggered', async () => {
315
+ const loop = new AgenticLoop({
316
+ tools: [slowTool],
317
+ maxSteps: 10,
318
+ })
319
+
320
+ const controller = new AbortController()
321
+ const mockGenerate = vi.fn().mockResolvedValue({
322
+ toolCalls: [{ name: 'slow', arguments: { delay: 1000 } }],
323
+ finishReason: 'tool_call',
324
+ })
325
+
326
+ // Abort after 50ms
327
+ setTimeout(() => controller.abort(), 50)
328
+
329
+ await expect(loop.run({
330
+ model: { generate: mockGenerate } as any,
331
+ prompt: 'Run slow tool',
332
+ abortSignal: controller.signal,
333
+ })).rejects.toThrow('Aborted')
334
+ })
335
+ })
336
+
337
+ describe('onStep callback', () => {
338
+ it('should call onStep for each loop iteration', async () => {
339
+ const steps: any[] = []
340
+ const loop = new AgenticLoop({
341
+ tools: [calculatorTool],
342
+ maxSteps: 5,
343
+ onStep: (step) => { steps.push(step) },
344
+ })
345
+
346
+ const mockGenerate = vi.fn()
347
+ .mockResolvedValueOnce({
348
+ toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } }],
349
+ finishReason: 'tool_call',
350
+ })
351
+ .mockResolvedValueOnce({
352
+ text: 'Result is 3',
353
+ finishReason: 'stop',
354
+ })
355
+
356
+ await loop.run({
357
+ model: { generate: mockGenerate } as any,
358
+ prompt: 'Add',
359
+ })
360
+
361
+ expect(steps).toHaveLength(2)
362
+ expect(steps[0].stepNumber).toBe(1)
363
+ expect(steps[0].toolCalls).toHaveLength(1)
364
+ expect(steps[1].stepNumber).toBe(2)
365
+ })
366
+ })
367
+ })
368
+
369
+ // ============================================================================
370
+ // ToolRouter Tests - Routing tool calls to handlers
371
+ // ============================================================================
372
+
373
+ describe('ToolRouter', () => {
374
+ describe('tool registration and routing', () => {
375
+ it('should register and route to correct tool', async () => {
376
+ const router = new ToolRouter()
377
+ router.register(calculatorTool)
378
+ router.register(fetchTool)
379
+
380
+ const result = await router.route({
381
+ name: 'calculator',
382
+ arguments: { operation: 'multiply', a: 6, b: 7 },
383
+ })
384
+
385
+ expect(result.success).toBe(true)
386
+ expect(result.result).toBe(42)
387
+ })
388
+
389
+ it('should return error for unknown tool', async () => {
390
+ const router = new ToolRouter()
391
+ router.register(calculatorTool)
392
+
393
+ const result = await router.route({
394
+ name: 'unknown_tool',
395
+ arguments: {},
396
+ })
397
+
398
+ expect(result.success).toBe(false)
399
+ expect(result.error).toContain('not found')
400
+ })
401
+
402
+ it('should route multiple calls in order', async () => {
403
+ const router = new ToolRouter()
404
+ router.register(calculatorTool)
405
+
406
+ const calls = [
407
+ { name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } },
408
+ { name: 'calculator', arguments: { operation: 'multiply', a: 3, b: 4 } },
409
+ ]
410
+
411
+ const results = await router.routeAll(calls)
412
+
413
+ expect(results).toHaveLength(2)
414
+ expect(results[0].result).toBe(3)
415
+ expect(results[1].result).toBe(12)
416
+ })
417
+ })
418
+
419
+ describe('tool result formatting', () => {
420
+ it('should format tool results for model consumption', async () => {
421
+ const router = new ToolRouter()
422
+ router.register(calculatorTool)
423
+
424
+ const result = await router.route({
425
+ name: 'calculator',
426
+ arguments: { operation: 'add', a: 10, b: 20 },
427
+ })
428
+
429
+ const formatted = router.formatResult(result)
430
+ expect(formatted.role).toBe('tool')
431
+ expect(formatted.content).toContain('30')
432
+ })
433
+
434
+ it('should format error results appropriately', async () => {
435
+ const router = new ToolRouter()
436
+ router.register(failingTool)
437
+
438
+ const result = await router.route({
439
+ name: 'failing',
440
+ arguments: { message: 'test error' },
441
+ })
442
+
443
+ const formatted = router.formatResult(result)
444
+ expect(formatted.role).toBe('tool')
445
+ expect(formatted.content).toContain('error')
446
+ expect(formatted.isError).toBe(true)
447
+ })
448
+ })
449
+
450
+ describe('parallel routing', () => {
451
+ it('should route multiple calls in parallel', async () => {
452
+ const router = new ToolRouter()
453
+ const executionTimes: number[] = []
454
+
455
+ const timingTool: Tool = {
456
+ name: 'time',
457
+ description: 'Records time',
458
+ parameters: z.object({ id: z.number() }),
459
+ execute: async ({ id }) => {
460
+ await new Promise(r => setTimeout(r, 20))
461
+ executionTimes.push(Date.now())
462
+ return id
463
+ },
464
+ }
465
+
466
+ router.register(timingTool)
467
+
468
+ const startTime = Date.now()
469
+ await router.routeAllParallel([
470
+ { name: 'time', arguments: { id: 1 } },
471
+ { name: 'time', arguments: { id: 2 } },
472
+ { name: 'time', arguments: { id: 3 } },
473
+ ])
474
+ const elapsed = Date.now() - startTime
475
+
476
+ // Should complete in ~20ms, not 60ms
477
+ expect(elapsed).toBeLessThan(50)
478
+ })
479
+ })
480
+ })
481
+
482
+ // ============================================================================
483
+ // ToolValidator Tests - Pre-execution validation
484
+ // ============================================================================
485
+
486
+ describe('ToolValidator', () => {
487
+ describe('argument validation', () => {
488
+ it('should validate arguments against tool schema', () => {
489
+ const validator = new ToolValidator()
490
+ validator.register(calculatorTool)
491
+
492
+ const result = validator.validate('calculator', {
493
+ operation: 'add',
494
+ a: 5,
495
+ b: 10,
496
+ })
497
+
498
+ expect(result.valid).toBe(true)
499
+ })
500
+
501
+ it('should reject invalid arguments', () => {
502
+ const validator = new ToolValidator()
503
+ validator.register(calculatorTool)
504
+
505
+ const result = validator.validate('calculator', {
506
+ operation: 'invalid_op',
507
+ a: 'not a number',
508
+ b: 10,
509
+ })
510
+
511
+ expect(result.valid).toBe(false)
512
+ expect(result.errors).toBeDefined()
513
+ expect(result.errors!.length).toBeGreaterThan(0)
514
+ })
515
+
516
+ it('should reject missing required arguments', () => {
517
+ const validator = new ToolValidator()
518
+ validator.register(calculatorTool)
519
+
520
+ const result = validator.validate('calculator', {
521
+ operation: 'add',
522
+ a: 5,
523
+ // missing 'b'
524
+ })
525
+
526
+ expect(result.valid).toBe(false)
527
+ })
528
+
529
+ it('should return error for unknown tool', () => {
530
+ const validator = new ToolValidator()
531
+
532
+ const result = validator.validate('unknown', { foo: 'bar' })
533
+
534
+ expect(result.valid).toBe(false)
535
+ expect(result.errors![0]).toContain('not registered')
536
+ })
537
+ })
538
+
539
+ describe('batch validation', () => {
540
+ it('should validate multiple tool calls at once', () => {
541
+ const validator = new ToolValidator()
542
+ validator.register(calculatorTool)
543
+ validator.register(fetchTool)
544
+
545
+ const results = validator.validateAll([
546
+ { name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } },
547
+ { name: 'fetch', arguments: { url: 'https://example.com' } },
548
+ { name: 'calculator', arguments: { operation: 'bad', a: 1, b: 2 } },
549
+ ])
550
+
551
+ expect(results).toHaveLength(3)
552
+ expect(results[0].valid).toBe(true)
553
+ expect(results[1].valid).toBe(true)
554
+ expect(results[2].valid).toBe(false)
555
+ })
556
+ })
557
+ })
558
+
559
+ // ============================================================================
560
+ // Tool Error Recovery Tests
561
+ // ============================================================================
562
+
563
+ describe('Tool Error Recovery', () => {
564
+ describe('error handling', () => {
565
+ it('should catch and report tool execution errors', async () => {
566
+ const loop = new AgenticLoop({
567
+ tools: [failingTool, calculatorTool],
568
+ maxSteps: 5,
569
+ })
570
+
571
+ const mockGenerate = vi.fn()
572
+ .mockResolvedValueOnce({
573
+ toolCalls: [{ name: 'failing', arguments: { message: 'test' } }],
574
+ finishReason: 'tool_call',
575
+ })
576
+ .mockResolvedValueOnce({
577
+ text: 'Tool failed, moving on',
578
+ finishReason: 'stop',
579
+ })
580
+
581
+ const result = await loop.run({
582
+ model: { generate: mockGenerate } as any,
583
+ prompt: 'Try the failing tool',
584
+ })
585
+
586
+ expect(result.toolCalls[0].error).toBeDefined()
587
+ expect(result.toolCalls[0].error).toContain('Tool failed')
588
+ })
589
+
590
+ it('should retry failed tool calls when retry is enabled', async () => {
591
+ let attempts = 0
592
+ const flakeyTool: Tool = {
593
+ name: 'flakey',
594
+ description: 'Fails first attempt',
595
+ parameters: z.object({}),
596
+ execute: async () => {
597
+ attempts++
598
+ if (attempts < 2) throw new Error('First attempt fails')
599
+ return 'success'
600
+ },
601
+ }
602
+
603
+ const loop = new AgenticLoop({
604
+ tools: [flakeyTool],
605
+ maxSteps: 5,
606
+ retryFailedTools: true,
607
+ maxToolRetries: 3,
608
+ })
609
+
610
+ const mockGenerate = vi.fn()
611
+ .mockResolvedValueOnce({
612
+ toolCalls: [{ name: 'flakey', arguments: {} }],
613
+ finishReason: 'tool_call',
614
+ })
615
+ .mockResolvedValueOnce({
616
+ text: 'Got success',
617
+ finishReason: 'stop',
618
+ })
619
+
620
+ const result = await loop.run({
621
+ model: { generate: mockGenerate } as any,
622
+ prompt: 'Use flakey tool',
623
+ })
624
+
625
+ expect(result.toolCalls[0].result).toBe('success')
626
+ expect(result.toolCalls[0].retryCount).toBe(1)
627
+ })
628
+ })
629
+
630
+ describe('graceful degradation', () => {
631
+ it('should continue with partial results when tools fail', async () => {
632
+ const loop = new AgenticLoop({
633
+ tools: [calculatorTool, failingTool],
634
+ maxSteps: 5,
635
+ continueOnError: true,
636
+ })
637
+
638
+ const mockGenerate = vi.fn()
639
+ .mockResolvedValueOnce({
640
+ toolCalls: [
641
+ { name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } },
642
+ { name: 'failing', arguments: { message: 'error' } },
643
+ ],
644
+ finishReason: 'tool_call',
645
+ })
646
+ .mockResolvedValueOnce({
647
+ text: 'Calculator worked, other failed',
648
+ finishReason: 'stop',
649
+ })
650
+
651
+ const result = await loop.run({
652
+ model: { generate: mockGenerate } as any,
653
+ prompt: 'Use both tools',
654
+ })
655
+
656
+ expect(result.toolCalls).toHaveLength(2)
657
+ expect(result.toolCalls[0].result).toBe(3)
658
+ expect(result.toolCalls[1].error).toBeDefined()
659
+ })
660
+ })
661
+
662
+ describe('timeout handling', () => {
663
+ it('should timeout long-running tools', async () => {
664
+ const loop = new AgenticLoop({
665
+ tools: [slowTool],
666
+ maxSteps: 5,
667
+ toolTimeout: 50, // 50ms timeout
668
+ })
669
+
670
+ const mockGenerate = vi.fn()
671
+ .mockResolvedValueOnce({
672
+ toolCalls: [{ name: 'slow', arguments: { delay: 1000 } }],
673
+ finishReason: 'tool_call',
674
+ })
675
+ .mockResolvedValueOnce({
676
+ text: 'Tool timed out',
677
+ finishReason: 'stop',
678
+ })
679
+
680
+ const result = await loop.run({
681
+ model: { generate: mockGenerate } as any,
682
+ prompt: 'Run slow tool',
683
+ })
684
+
685
+ expect(result.toolCalls[0].error).toContain('timeout')
686
+ })
687
+ })
688
+ })
689
+
690
+ // ============================================================================
691
+ // Integration with generateText Tests
692
+ // ============================================================================
693
+
694
+ describe('Integration with generateText', () => {
695
+ it('should work with AI SDK tool format', async () => {
696
+ const loop = new AgenticLoop({
697
+ tools: [calculatorTool],
698
+ maxSteps: 5,
699
+ })
700
+
701
+ // Verify tool conversion to AI SDK format
702
+ const sdkTools = loop.getToolsForSDK()
703
+ expect(sdkTools.calculator).toBeDefined()
704
+ expect(sdkTools.calculator.description).toBe('Performs basic math operations')
705
+ expect(sdkTools.calculator.parameters).toBeDefined()
706
+ })
707
+
708
+ it('should expose tool results through experimental_toolResultContent', async () => {
709
+ const loop = new AgenticLoop({
710
+ tools: [calculatorTool],
711
+ maxSteps: 5,
712
+ })
713
+
714
+ const mockGenerate = vi.fn()
715
+ .mockResolvedValueOnce({
716
+ toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 2, b: 3 } }],
717
+ finishReason: 'tool_call',
718
+ })
719
+ .mockResolvedValueOnce({
720
+ text: '5',
721
+ finishReason: 'stop',
722
+ })
723
+
724
+ const result = await loop.run({
725
+ model: { generate: mockGenerate } as any,
726
+ prompt: 'Add 2 + 3',
727
+ })
728
+
729
+ // Verify tool results are exposed in a format compatible with AI SDK
730
+ expect(result.toolResults).toBeDefined()
731
+ expect(result.toolResults[0].toolName).toBe('calculator')
732
+ expect(result.toolResults[0].result).toBe(5)
733
+ })
734
+ })
735
+
736
+ // ============================================================================
737
+ // Token Usage Tracking Tests
738
+ // ============================================================================
739
+
740
+ describe('Token Usage Tracking', () => {
741
+ it('should track token usage across multi-turn conversations', async () => {
742
+ const loop = new AgenticLoop({
743
+ tools: [calculatorTool],
744
+ maxSteps: 5,
745
+ trackUsage: true,
746
+ })
747
+
748
+ const mockGenerate = vi.fn()
749
+ .mockResolvedValueOnce({
750
+ toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } }],
751
+ finishReason: 'tool_call',
752
+ usage: { promptTokens: 50, completionTokens: 20, totalTokens: 70 },
753
+ })
754
+ .mockResolvedValueOnce({
755
+ text: 'Result is 3',
756
+ finishReason: 'stop',
757
+ usage: { promptTokens: 80, completionTokens: 10, totalTokens: 90 },
758
+ })
759
+
760
+ const result = await loop.run({
761
+ model: { generate: mockGenerate } as any,
762
+ prompt: 'Add 1 + 2',
763
+ })
764
+
765
+ expect(result.usage).toBeDefined()
766
+ expect(result.usage!.promptTokens).toBe(130)
767
+ expect(result.usage!.completionTokens).toBe(30)
768
+ expect(result.usage!.totalTokens).toBe(160)
769
+ })
770
+ })