npm - ai-functions - Versions diffs - 2.1.1 → 2.3.0 - Mend

ai-functions 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (286) hide show

package/.turbo/turbo-build.log +1 -4
package/CHANGELOG.md +68 -1
package/README.md +397 -157
package/dist/ai-promise.d.ts +50 -3
package/dist/ai-promise.d.ts.map +1 -1
package/dist/ai-promise.js +410 -51
package/dist/ai-promise.js.map +1 -1
package/dist/ai-schemas.d.ts +56 -0
package/dist/ai-schemas.d.ts.map +1 -0
package/dist/ai-schemas.js +53 -0
package/dist/ai-schemas.js.map +1 -0
package/dist/ai.d.ts +16 -242
package/dist/ai.d.ts.map +1 -1
package/dist/ai.js +54 -837
package/dist/ai.js.map +1 -1
package/dist/batch/anthropic.d.ts +6 -4
package/dist/batch/anthropic.d.ts.map +1 -1
package/dist/batch/anthropic.js +83 -145
package/dist/batch/anthropic.js.map +1 -1
package/dist/batch/bedrock.d.ts +8 -30
package/dist/batch/bedrock.d.ts.map +1 -1
package/dist/batch/bedrock.js +155 -338
package/dist/batch/bedrock.js.map +1 -1
package/dist/batch/cloudflare.d.ts +8 -20
package/dist/batch/cloudflare.d.ts.map +1 -1
package/dist/batch/cloudflare.js +68 -189
package/dist/batch/cloudflare.js.map +1 -1
package/dist/batch/google.d.ts +6 -20
package/dist/batch/google.d.ts.map +1 -1
package/dist/batch/google.js +70 -238
package/dist/batch/google.js.map +1 -1
package/dist/batch/index.d.ts +4 -1
package/dist/batch/index.d.ts.map +1 -1
package/dist/batch/index.js +4 -1
package/dist/batch/index.js.map +1 -1
package/dist/batch/memory.d.ts +1 -1
package/dist/batch/memory.d.ts.map +1 -1
package/dist/batch/memory.js +14 -10
package/dist/batch/memory.js.map +1 -1
package/dist/batch/openai.d.ts +11 -14
package/dist/batch/openai.d.ts.map +1 -1
package/dist/batch/openai.js +52 -156
package/dist/batch/openai.js.map +1 -1
package/dist/batch/provider.d.ts +111 -0
package/dist/batch/provider.d.ts.map +1 -0
package/dist/batch/provider.js +233 -0
package/dist/batch/provider.js.map +1 -0
package/dist/batch-map.d.ts.map +1 -1
package/dist/batch-map.js +23 -17
package/dist/batch-map.js.map +1 -1
package/dist/batch-queue.d.ts +65 -0
package/dist/batch-queue.d.ts.map +1 -1
package/dist/batch-queue.js +169 -14
package/dist/batch-queue.js.map +1 -1
package/dist/budget.d.ts +272 -0
package/dist/budget.d.ts.map +1 -0
package/dist/budget.js +513 -0
package/dist/budget.js.map +1 -0
package/dist/cache.d.ts +295 -0
package/dist/cache.d.ts.map +1 -0
package/dist/cache.js +433 -0
package/dist/cache.js.map +1 -0
package/dist/context.d.ts +42 -8
package/dist/context.d.ts.map +1 -1
package/dist/context.js +64 -62
package/dist/context.js.map +1 -1
package/dist/digital-objects-registry.d.ts +229 -0
package/dist/digital-objects-registry.d.ts.map +1 -0
package/dist/digital-objects-registry.js +617 -0
package/dist/digital-objects-registry.js.map +1 -0
package/dist/embeddings.d.ts +2 -2
package/dist/embeddings.d.ts.map +1 -1
package/dist/errors.d.ts +22 -0
package/dist/errors.d.ts.map +1 -0
package/dist/errors.js +35 -0
package/dist/errors.js.map +1 -0
package/dist/eval/runner.d.ts +10 -1
package/dist/eval/runner.d.ts.map +1 -1
package/dist/eval/runner.js +41 -35
package/dist/eval/runner.js.map +1 -1
package/dist/eval-log/in-memory.d.ts +34 -0
package/dist/eval-log/in-memory.d.ts.map +1 -0
package/dist/eval-log/in-memory.js +84 -0
package/dist/eval-log/in-memory.js.map +1 -0
package/dist/eval-log/index.d.ts +29 -0
package/dist/eval-log/index.d.ts.map +1 -0
package/dist/eval-log/index.js +39 -0
package/dist/eval-log/index.js.map +1 -0
package/dist/eval-log/types.d.ts +101 -0
package/dist/eval-log/types.d.ts.map +1 -0
package/dist/eval-log/types.js +16 -0
package/dist/eval-log/types.js.map +1 -0
package/dist/function-registry.d.ts +116 -0
package/dist/function-registry.d.ts.map +1 -0
package/dist/function-registry.js +546 -0
package/dist/function-registry.js.map +1 -0
package/dist/generate.d.ts +9 -3
package/dist/generate.d.ts.map +1 -1
package/dist/generate.js +18 -22
package/dist/generate.js.map +1 -1
package/dist/index.d.ts +35 -20
package/dist/index.d.ts.map +1 -1
package/dist/index.js +89 -42
package/dist/index.js.map +1 -1
package/dist/logger.d.ts +118 -0
package/dist/logger.d.ts.map +1 -0
package/dist/logger.js +187 -0
package/dist/logger.js.map +1 -0
package/dist/middleware/budget.d.ts +84 -0
package/dist/middleware/budget.d.ts.map +1 -0
package/dist/middleware/budget.js +110 -0
package/dist/middleware/budget.js.map +1 -0
package/dist/middleware/cache.d.ts +103 -0
package/dist/middleware/cache.d.ts.map +1 -0
package/dist/middleware/cache.js +228 -0
package/dist/middleware/cache.js.map +1 -0
package/dist/middleware/embed-cache.d.ts +99 -0
package/dist/middleware/embed-cache.d.ts.map +1 -0
package/dist/middleware/embed-cache.js +128 -0
package/dist/middleware/embed-cache.js.map +1 -0
package/dist/middleware/index.d.ts +11 -0
package/dist/middleware/index.d.ts.map +1 -0
package/dist/middleware/index.js +11 -0
package/dist/middleware/index.js.map +1 -0
package/dist/middleware/trace.d.ts +103 -0
package/dist/middleware/trace.d.ts.map +1 -0
package/dist/middleware/trace.js +176 -0
package/dist/middleware/trace.js.map +1 -0
package/dist/primitives.d.ts +120 -1
package/dist/primitives.d.ts.map +1 -1
package/dist/primitives.js +398 -26
package/dist/primitives.js.map +1 -1
package/dist/retry.d.ts +368 -0
package/dist/retry.d.ts.map +1 -0
package/dist/retry.js +646 -0
package/dist/retry.js.map +1 -0
package/dist/schema.d.ts.map +1 -1
package/dist/schema.js +2 -10
package/dist/schema.js.map +1 -1
package/dist/telemetry.d.ts +128 -0
package/dist/telemetry.d.ts.map +1 -0
package/dist/telemetry.js +285 -0
package/dist/telemetry.js.map +1 -0
package/dist/template.d.ts.map +1 -1
package/dist/template.js +6 -1
package/dist/template.js.map +1 -1
package/dist/tool-orchestration.d.ts +453 -0
package/dist/tool-orchestration.d.ts.map +1 -0
package/dist/tool-orchestration.js +763 -0
package/dist/tool-orchestration.js.map +1 -0
package/dist/type-guards.d.ts +28 -0
package/dist/type-guards.d.ts.map +1 -0
package/dist/type-guards.js +29 -0
package/dist/type-guards.js.map +1 -0
package/dist/types.d.ts +135 -17
package/dist/types.d.ts.map +1 -1
package/dist/types.js +36 -1
package/dist/types.js.map +1 -1
package/dist/wrap-for-v3.d.ts +80 -0
package/dist/wrap-for-v3.d.ts.map +1 -0
package/dist/wrap-for-v3.js +89 -0
package/dist/wrap-for-v3.js.map +1 -0
package/examples/00-quickstart.ts +232 -0
package/examples/01-rag-chatbot.ts +212 -0
package/examples/02-multi-agent-research.ts +290 -0
package/examples/03-email-classification.ts +379 -0
package/examples/04-content-moderation.ts +400 -0
package/examples/05-document-extraction.ts +455 -0
package/examples/06-streaming-chat-nextjs.ts +437 -0
package/examples/07-cloudflare-worker.ts +483 -0
package/examples/08-batch-processing.ts +491 -0
package/examples/09-budget-constrained.ts +527 -0
package/examples/10-tool-orchestration.ts +565 -0
package/examples/11-retry-resilience.ts +403 -0
package/examples/12-caching-strategies.ts +422 -0
package/examples/README.md +145 -0
package/package.json +10 -6
package/src/ai-promise.ts +528 -99
package/src/ai-schemas.ts +122 -0
package/src/ai.ts +69 -1153
package/src/batch/anthropic.ts +96 -161
package/src/batch/bedrock.ts +203 -454
package/src/batch/cloudflare.ts +99 -282
package/src/batch/google.ts +91 -297
package/src/batch/index.ts +4 -1
package/src/batch/memory.ts +15 -10
package/src/batch/openai.ts +65 -193
package/src/batch/provider.ts +336 -0
package/src/batch-map.ts +29 -24
package/src/batch-queue.ts +200 -11
package/src/budget.ts +740 -0
package/src/cache.ts +681 -0
package/src/context.ts +122 -76
package/src/digital-objects-registry.ts +750 -0
package/src/errors.ts +37 -0
package/src/eval/runner.ts +63 -38
package/src/eval-log/in-memory.ts +90 -0
package/src/eval-log/index.ts +46 -0
package/src/eval-log/types.ts +110 -0
package/src/function-registry.ts +671 -0
package/src/generate.ts +33 -33
package/src/index.ts +325 -49
package/src/logger.ts +232 -0
package/src/middleware/budget.ts +171 -0
package/src/middleware/cache.ts +299 -0
package/src/middleware/embed-cache.ts +195 -0
package/src/middleware/index.ts +23 -0
package/src/middleware/trace.ts +248 -0
package/src/primitives.ts +589 -62
package/src/retry.ts +902 -0
package/src/schema.ts +8 -17
package/src/telemetry.ts +403 -0
package/src/template.ts +8 -4
package/src/tool-orchestration.ts +1173 -0
package/src/type-guards.ts +31 -0
package/src/types.ts +164 -25
package/src/wrap-for-v3.ts +105 -0
package/test/ai-promise.test.ts +1080 -0
package/test/ai-proxy.test.ts +1 -1
package/test/backward-compat.test.ts +147 -0
package/test/batch-autosubmit-errors.test.ts +610 -0
package/test/batch-blog-posts.test.ts +87 -129
package/test/budget-tracking.test.ts +800 -0
package/test/cache.test.ts +712 -0
package/test/context-isolation.test.ts +687 -0
package/test/core-functions.test.ts +183 -579
package/test/decide.test.ts +154 -322
package/test/define.test.ts +211 -8
package/test/digital-objects-registry.test.ts +760 -0
package/test/embedding-cache-middleware.test.ts +140 -0
package/test/evals/deterministic.eval.test.ts +376 -0
package/test/generate-core.test.ts +140 -229
package/test/implicit-batch.test.ts +22 -65
package/test/json-parse-error-handling.test.ts +463 -0
package/test/retry-policy-integration.test.ts +117 -0
package/test/retry.test.ts +1016 -0
package/test/schema.test.ts +55 -19
package/test/streaming.test.ts +316 -0
package/test/template.test.ts +1164 -0
package/test/tool-orchestration.test.ts +1040 -0
package/test/wrap-for-v3.test.ts +612 -0
package/vitest.config.js +6 -0
package/vitest.config.ts +20 -0
package/dist/rpc/auth.d.ts +0 -69
package/dist/rpc/auth.d.ts.map +0 -1
package/dist/rpc/auth.js +0 -136
package/dist/rpc/auth.js.map +0 -1
package/dist/rpc/client.d.ts +0 -62
package/dist/rpc/client.d.ts.map +0 -1
package/dist/rpc/client.js +0 -103
package/dist/rpc/client.js.map +0 -1
package/dist/rpc/deferred.d.ts +0 -60
package/dist/rpc/deferred.d.ts.map +0 -1
package/dist/rpc/deferred.js +0 -96
package/dist/rpc/deferred.js.map +0 -1
package/dist/rpc/index.d.ts +0 -22
package/dist/rpc/index.d.ts.map +0 -1
package/dist/rpc/index.js +0 -38
package/dist/rpc/index.js.map +0 -1
package/dist/rpc/local.d.ts +0 -42
package/dist/rpc/local.d.ts.map +0 -1
package/dist/rpc/local.js +0 -50
package/dist/rpc/local.js.map +0 -1
package/dist/rpc/server.d.ts +0 -165
package/dist/rpc/server.d.ts.map +0 -1
package/dist/rpc/server.js +0 -405
package/dist/rpc/server.js.map +0 -1
package/dist/rpc/session.d.ts +0 -32
package/dist/rpc/session.d.ts.map +0 -1
package/dist/rpc/session.js +0 -43
package/dist/rpc/session.js.map +0 -1
package/dist/rpc/transport.d.ts +0 -306
package/dist/rpc/transport.d.ts.map +0 -1
package/dist/rpc/transport.js +0 -731
package/dist/rpc/transport.js.map +0 -1
package/src/batch/anthropic.js +0 -256
package/src/batch/bedrock.js +0 -584
package/src/batch/cloudflare.js +0 -287
package/src/batch/google.js +0 -359
package/src/batch/index.js +0 -30
package/src/batch/memory.js +0 -187
package/src/batch/openai.js +0 -402
package/src/eval/index.js +0 -7
package/src/eval/models.js +0 -119
package/src/eval/runner.js +0 -147
package/test/schema.test.js +0 -96

package/test/tool-orchestration.test.ts ADDED Viewed

@@ -0,0 +1,1040 @@
+/**
+ * Tests for agentic tool orchestration
+ *
+ * These tests cover multi-turn model→tools→model loops for complex AI workflows.
+ * Tests are written first (TDD RED phase) - implementation follows.
+ */
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
+import { z } from 'zod'
+// Import types and classes we'll implement
+import {
+  AgenticLoop,
+  ToolRouter,
+  ToolValidator,
+  type Tool,
+  type ToolResult,
+  type LoopOptions,
+  type LoopResult,
+  type ValidationResult,
+} from '../src/tool-orchestration.js'
+// Mock model for testing
+const createMockModel = () => ({
+  generate: vi.fn(),
+})
+// Sample tools for testing
+const calculatorTool: Tool = {
+  name: 'calculator',
+  description: 'Performs basic math operations',
+  parameters: z.object({
+    operation: z.enum(['add', 'subtract', 'multiply', 'divide']),
+    a: z.number(),
+    b: z.number(),
+  }),
+  execute: async ({ operation, a, b }) => {
+    switch (operation) {
+      case 'add': return a + b
+      case 'subtract': return a - b
+      case 'multiply': return a * b
+      case 'divide': return b !== 0 ? a / b : 'Division by zero'
+    }
+  },
+}
+const fetchTool: Tool = {
+  name: 'fetch',
+  description: 'Fetches data from a URL',
+  parameters: z.object({
+    url: z.string().url(),
+  }),
+  execute: async ({ url }) => {
+    return { data: `Content from ${url}`, status: 200 }
+  },
+}
+const slowTool: Tool = {
+  name: 'slow',
+  description: 'A tool that takes time to execute',
+  parameters: z.object({
+    delay: z.number(),
+  }),
+  execute: async ({ delay }) => {
+    await new Promise(resolve => setTimeout(resolve, delay))
+    return 'completed'
+  },
+}
+const failingTool: Tool = {
+  name: 'failing',
+  description: 'A tool that always fails',
+  parameters: z.object({
+    message: z.string(),
+  }),
+  execute: async ({ message }) => {
+    throw new Error(`Tool failed: ${message}`)
+  },
+}
+// ============================================================================
+// AgenticLoop Tests - Multi-turn model→tools→model loops
+// ============================================================================
+describe('AgenticLoop', () => {
+  describe('basic loop execution', () => {
+    it('should execute a single tool call and return result', async () => {
+      const loop = new AgenticLoop({
+        tools: [calculatorTool],
+        maxSteps: 5,
+      })
+      // Mock model response that calls calculator then finishes
+      const mockGenerate = vi.fn()
+        .mockResolvedValueOnce({
+          toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 5, b: 3 } }],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          text: 'The result of 5 + 3 is 8',
+          finishReason: 'stop',
+        })
+      const result = await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'What is 5 + 3?',
+      })
+      expect(result.text).toContain('8')
+      expect(result.steps).toBe(2)
+      expect(result.toolCalls).toHaveLength(1)
+      expect(result.toolCalls[0].name).toBe('calculator')
+      expect(result.toolCalls[0].result).toBe(8)
+    })
+    it('should handle multiple sequential tool calls', async () => {
+      const loop = new AgenticLoop({
+        tools: [calculatorTool],
+        maxSteps: 10,
+      })
+      const mockGenerate = vi.fn()
+        .mockResolvedValueOnce({
+          toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 5, b: 3 } }],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          toolCalls: [{ name: 'calculator', arguments: { operation: 'multiply', a: 8, b: 2 } }],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          text: '5 + 3 = 8, then 8 * 2 = 16',
+          finishReason: 'stop',
+        })
+      const result = await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Add 5 and 3, then multiply by 2',
+      })
+      expect(result.steps).toBe(3)
+      expect(result.toolCalls).toHaveLength(2)
+    })
+    it('should preserve conversation state across turns', async () => {
+      const loop = new AgenticLoop({
+        tools: [calculatorTool],
+        maxSteps: 5,
+      })
+      const mockGenerate = vi.fn()
+        .mockResolvedValueOnce({
+          toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 10, b: 5 } }],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          text: 'Done',
+          finishReason: 'stop',
+        })
+      const result = await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Calculate 10 + 5',
+      })
+      // Verify the second call received the tool result in messages
+      const secondCall = mockGenerate.mock.calls[1][0]
+      expect(secondCall.messages).toBeDefined()
+      expect(secondCall.messages.some((m: any) =>
+        m.role === 'tool' && m.content.includes('15')
+      )).toBe(true)
+    })
+  })
+  describe('maxSteps limit enforcement', () => {
+    it('should stop at maxSteps limit', async () => {
+      const loop = new AgenticLoop({
+        tools: [calculatorTool],
+        maxSteps: 3,
+      })
+      // Model keeps calling tools indefinitely
+      const mockGenerate = vi.fn().mockResolvedValue({
+        toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 1 } }],
+        finishReason: 'tool_call',
+      })
+      const result = await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Keep adding',
+      })
+      expect(result.steps).toBe(3)
+      expect(result.stopReason).toBe('max_steps')
+    })
+    it('should throw error when maxSteps is exceeded in strict mode', async () => {
+      const loop = new AgenticLoop({
+        tools: [calculatorTool],
+        maxSteps: 2,
+        strictMaxSteps: true,
+      })
+      const mockGenerate = vi.fn().mockResolvedValue({
+        toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 1 } }],
+        finishReason: 'tool_call',
+      })
+      await expect(loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Keep adding',
+      })).rejects.toThrow('Max steps exceeded')
+    })
+  })
+  describe('parallel tool execution', () => {
+    it('should execute multiple tool calls in parallel', async () => {
+      const timingTool: Tool = {
+        name: 'timing',
+        description: 'Returns execution order',
+        parameters: z.object({ id: z.string() }),
+        execute: async ({ id }) => {
+          await new Promise(r => setTimeout(r, 10))
+          return { id, time: Date.now() }
+        },
+      }
+      const loop = new AgenticLoop({
+        tools: [timingTool],
+        maxSteps: 5,
+        parallelExecution: true,
+      })
+      const mockGenerate = vi.fn()
+        .mockResolvedValueOnce({
+          toolCalls: [
+            { name: 'timing', arguments: { id: 'a' } },
+            { name: 'timing', arguments: { id: 'b' } },
+            { name: 'timing', arguments: { id: 'c' } },
+          ],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          text: 'All done',
+          finishReason: 'stop',
+        })
+      const startTime = Date.now()
+      const result = await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Run all',
+      })
+      const elapsed = Date.now() - startTime
+      expect(result.toolCalls).toHaveLength(3)
+      // Parallel execution should be faster than sequential (3 * 10ms)
+      expect(elapsed).toBeLessThan(40)
+    })
+    it('should respect parallel execution limit', async () => {
+      const executionOrder: string[] = []
+      const trackingTool: Tool = {
+        name: 'track',
+        description: 'Tracks execution',
+        parameters: z.object({ id: z.string() }),
+        execute: async ({ id }) => {
+          executionOrder.push(`start:${id}`)
+          await new Promise(r => setTimeout(r, 20))
+          executionOrder.push(`end:${id}`)
+          return id
+        },
+      }
+      const loop = new AgenticLoop({
+        tools: [trackingTool],
+        maxSteps: 5,
+        parallelExecution: true,
+        maxParallelCalls: 2,
+      })
+      const mockGenerate = vi.fn()
+        .mockResolvedValueOnce({
+          toolCalls: [
+            { name: 'track', arguments: { id: '1' } },
+            { name: 'track', arguments: { id: '2' } },
+            { name: 'track', arguments: { id: '3' } },
+            { name: 'track', arguments: { id: '4' } },
+          ],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          text: 'Done',
+          finishReason: 'stop',
+        })
+      await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Track all',
+      })
+      // With maxParallelCalls: 2, at most 2 should start before any ends
+      let concurrentStarts = 0
+      let maxConcurrent = 0
+      for (const event of executionOrder) {
+        if (event.startsWith('start:')) concurrentStarts++
+        else concurrentStarts--
+        maxConcurrent = Math.max(maxConcurrent, concurrentStarts)
+      }
+      expect(maxConcurrent).toBeLessThanOrEqual(2)
+    })
+  })
+  describe('abort signal support', () => {
+    it('should abort execution when signal is triggered', async () => {
+      const loop = new AgenticLoop({
+        tools: [slowTool],
+        maxSteps: 10,
+      })
+      const controller = new AbortController()
+      const mockGenerate = vi.fn().mockResolvedValue({
+        toolCalls: [{ name: 'slow', arguments: { delay: 1000 } }],
+        finishReason: 'tool_call',
+      })
+      // Abort after 50ms
+      setTimeout(() => controller.abort(), 50)
+      await expect(loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Run slow tool',
+        abortSignal: controller.signal,
+      })).rejects.toThrow('Aborted')
+    })
+  })
+  describe('onStep callback', () => {
+    it('should call onStep for each loop iteration', async () => {
+      const steps: any[] = []
+      const loop = new AgenticLoop({
+        tools: [calculatorTool],
+        maxSteps: 5,
+        onStep: (step) => { steps.push(step) },
+      })
+      const mockGenerate = vi.fn()
+        .mockResolvedValueOnce({
+          toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } }],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          text: 'Result is 3',
+          finishReason: 'stop',
+        })
+      await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Add',
+      })
+      expect(steps).toHaveLength(2)
+      expect(steps[0].stepNumber).toBe(1)
+      expect(steps[0].toolCalls).toHaveLength(1)
+      expect(steps[1].stepNumber).toBe(2)
+    })
+  })
+})
+// ============================================================================
+// ToolRouter Tests - Routing tool calls to handlers
+// ============================================================================
+describe('ToolRouter', () => {
+  describe('tool registration and routing', () => {
+    it('should register and route to correct tool', async () => {
+      const router = new ToolRouter()
+      router.register(calculatorTool)
+      router.register(fetchTool)
+      const result = await router.route({
+        name: 'calculator',
+        arguments: { operation: 'multiply', a: 6, b: 7 },
+      })
+      expect(result.success).toBe(true)
+      expect(result.result).toBe(42)
+    })
+    it('should return error for unknown tool', async () => {
+      const router = new ToolRouter()
+      router.register(calculatorTool)
+      const result = await router.route({
+        name: 'unknown_tool',
+        arguments: {},
+      })
+      expect(result.success).toBe(false)
+      expect(result.error).toContain('not found')
+    })
+    it('should route multiple calls in order', async () => {
+      const router = new ToolRouter()
+      router.register(calculatorTool)
+      const calls = [
+        { name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } },
+        { name: 'calculator', arguments: { operation: 'multiply', a: 3, b: 4 } },
+      ]
+      const results = await router.routeAll(calls)
+      expect(results).toHaveLength(2)
+      expect(results[0].result).toBe(3)
+      expect(results[1].result).toBe(12)
+    })
+  })
+  describe('tool result formatting', () => {
+    it('should format tool results for model consumption', async () => {
+      const router = new ToolRouter()
+      router.register(calculatorTool)
+      const result = await router.route({
+        name: 'calculator',
+        arguments: { operation: 'add', a: 10, b: 20 },
+      })
+      const formatted = router.formatResult(result)
+      expect(formatted.role).toBe('tool')
+      expect(formatted.content).toContain('30')
+    })
+    it('should format error results appropriately', async () => {
+      const router = new ToolRouter()
+      router.register(failingTool)
+      const result = await router.route({
+        name: 'failing',
+        arguments: { message: 'test error' },
+      })
+      const formatted = router.formatResult(result)
+      expect(formatted.role).toBe('tool')
+      expect(formatted.content).toContain('error')
+      expect(formatted.isError).toBe(true)
+    })
+  })
+  describe('parallel routing', () => {
+    it('should route multiple calls in parallel', async () => {
+      const router = new ToolRouter()
+      const executionTimes: number[] = []
+      const timingTool: Tool = {
+        name: 'time',
+        description: 'Records time',
+        parameters: z.object({ id: z.number() }),
+        execute: async ({ id }) => {
+          await new Promise(r => setTimeout(r, 20))
+          executionTimes.push(Date.now())
+          return id
+        },
+      }
+      router.register(timingTool)
+      const startTime = Date.now()
+      await router.routeAllParallel([
+        { name: 'time', arguments: { id: 1 } },
+        { name: 'time', arguments: { id: 2 } },
+        { name: 'time', arguments: { id: 3 } },
+      ])
+      const elapsed = Date.now() - startTime
+      // Should complete in ~20ms, not 60ms
+      expect(elapsed).toBeLessThan(50)
+    })
+  })
+})
+// ============================================================================
+// ToolValidator Tests - Pre-execution validation
+// ============================================================================
+describe('ToolValidator', () => {
+  describe('argument validation', () => {
+    it('should validate arguments against tool schema', () => {
+      const validator = new ToolValidator()
+      validator.register(calculatorTool)
+      const result = validator.validate('calculator', {
+        operation: 'add',
+        a: 5,
+        b: 10,
+      })
+      expect(result.valid).toBe(true)
+    })
+    it('should reject invalid arguments', () => {
+      const validator = new ToolValidator()
+      validator.register(calculatorTool)
+      const result = validator.validate('calculator', {
+        operation: 'invalid_op',
+        a: 'not a number',
+        b: 10,
+      })
+      expect(result.valid).toBe(false)
+      expect(result.errors).toBeDefined()
+      expect(result.errors!.length).toBeGreaterThan(0)
+    })
+    it('should reject missing required arguments', () => {
+      const validator = new ToolValidator()
+      validator.register(calculatorTool)
+      const result = validator.validate('calculator', {
+        operation: 'add',
+        a: 5,
+        // missing 'b'
+      })
+      expect(result.valid).toBe(false)
+    })
+    it('should return error for unknown tool', () => {
+      const validator = new ToolValidator()
+      const result = validator.validate('unknown', { foo: 'bar' })
+      expect(result.valid).toBe(false)
+      expect(result.errors![0]).toContain('not registered')
+    })
+  })
+  describe('batch validation', () => {
+    it('should validate multiple tool calls at once', () => {
+      const validator = new ToolValidator()
+      validator.register(calculatorTool)
+      validator.register(fetchTool)
+      const results = validator.validateAll([
+        { name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } },
+        { name: 'fetch', arguments: { url: 'https://example.com' } },
+        { name: 'calculator', arguments: { operation: 'bad', a: 1, b: 2 } },
+      ])
+      expect(results).toHaveLength(3)
+      expect(results[0].valid).toBe(true)
+      expect(results[1].valid).toBe(true)
+      expect(results[2].valid).toBe(false)
+    })
+  })
+})
+// ============================================================================
+// Tool Error Recovery Tests
+// ============================================================================
+describe('Tool Error Recovery', () => {
+  describe('error handling', () => {
+    it('should catch and report tool execution errors', async () => {
+      const loop = new AgenticLoop({
+        tools: [failingTool, calculatorTool],
+        maxSteps: 5,
+      })
+      const mockGenerate = vi.fn()
+        .mockResolvedValueOnce({
+          toolCalls: [{ name: 'failing', arguments: { message: 'test' } }],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          text: 'Tool failed, moving on',
+          finishReason: 'stop',
+        })
+      const result = await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Try the failing tool',
+      })
+      expect(result.toolCalls[0].error).toBeDefined()
+      expect(result.toolCalls[0].error).toContain('Tool failed')
+    })
+    it('should retry failed tool calls when retry is enabled', async () => {
+      let attempts = 0
+      const flakeyTool: Tool = {
+        name: 'flakey',
+        description: 'Fails first attempt',
+        parameters: z.object({}),
+        execute: async () => {
+          attempts++
+          if (attempts < 2) throw new Error('First attempt fails')
+          return 'success'
+        },
+      }
+      const loop = new AgenticLoop({
+        tools: [flakeyTool],
+        maxSteps: 5,
+        retryFailedTools: true,
+        maxToolRetries: 3,
+      })
+      const mockGenerate = vi.fn()
+        .mockResolvedValueOnce({
+          toolCalls: [{ name: 'flakey', arguments: {} }],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          text: 'Got success',
+          finishReason: 'stop',
+        })
+      const result = await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Use flakey tool',
+      })
+      expect(result.toolCalls[0].result).toBe('success')
+      expect(result.toolCalls[0].retryCount).toBe(1)
+    })
+  })
+  describe('graceful degradation', () => {
+    it('should continue with partial results when tools fail', async () => {
+      const loop = new AgenticLoop({
+        tools: [calculatorTool, failingTool],
+        maxSteps: 5,
+        continueOnError: true,
+      })
+      const mockGenerate = vi.fn()
+        .mockResolvedValueOnce({
+          toolCalls: [
+            { name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } },
+            { name: 'failing', arguments: { message: 'error' } },
+          ],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          text: 'Calculator worked, other failed',
+          finishReason: 'stop',
+        })
+      const result = await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Use both tools',
+      })
+      expect(result.toolCalls).toHaveLength(2)
+      expect(result.toolCalls[0].result).toBe(3)
+      expect(result.toolCalls[1].error).toBeDefined()
+    })
+  })
+  describe('timeout handling', () => {
+    it('should timeout long-running tools', async () => {
+      const loop = new AgenticLoop({
+        tools: [slowTool],
+        maxSteps: 5,
+        toolTimeout: 50, // 50ms timeout
+      })
+      const mockGenerate = vi.fn()
+        .mockResolvedValueOnce({
+          toolCalls: [{ name: 'slow', arguments: { delay: 1000 } }],
+          finishReason: 'tool_call',
+        })
+        .mockResolvedValueOnce({
+          text: 'Tool timed out',
+          finishReason: 'stop',
+        })
+      const result = await loop.run({
+        model: { generate: mockGenerate } as any,
+        prompt: 'Run slow tool',
+      })
+      expect(result.toolCalls[0].error).toContain('timeout')
+    })
+  })
+})
+// ============================================================================
+// Integration with generateText Tests
+// ============================================================================
+describe('Integration with generateText', () => {
+  it('should work with AI SDK tool format', async () => {
+    const loop = new AgenticLoop({
+      tools: [calculatorTool],
+      maxSteps: 5,
+    })
+    // Verify tool conversion to AI SDK format
+    const sdkTools = loop.getToolsForSDK()
+    expect(sdkTools.calculator).toBeDefined()
+    expect(sdkTools.calculator.description).toBe('Performs basic math operations')
+    expect(sdkTools.calculator.parameters).toBeDefined()
+  })
+  it('should expose tool results through experimental_toolResultContent', async () => {
+    const loop = new AgenticLoop({
+      tools: [calculatorTool],
+      maxSteps: 5,
+    })
+    const mockGenerate = vi.fn()
+      .mockResolvedValueOnce({
+        toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 2, b: 3 } }],
+        finishReason: 'tool_call',
+      })
+      .mockResolvedValueOnce({
+        text: '5',
+        finishReason: 'stop',
+      })
+    const result = await loop.run({
+      model: { generate: mockGenerate } as any,
+      prompt: 'Add 2 + 3',
+    })
+    // Verify tool results are exposed in a format compatible with AI SDK
+    expect(result.toolResults).toBeDefined()
+    expect(result.toolResults[0].toolName).toBe('calculator')
+    expect(result.toolResults[0].result).toBe(5)
+  })
+})
+// ============================================================================
+// Token Usage Tracking Tests
+// ============================================================================
+describe('Token Usage Tracking', () => {
+  it('should track token usage across multi-turn conversations', async () => {
+    const loop = new AgenticLoop({
+      tools: [calculatorTool],
+      maxSteps: 5,
+      trackUsage: true,
+    })
+    const mockGenerate = vi.fn()
+      .mockResolvedValueOnce({
+        toolCalls: [{ name: 'calculator', arguments: { operation: 'add', a: 1, b: 2 } }],
+        finishReason: 'tool_call',
+        usage: { promptTokens: 50, completionTokens: 20, totalTokens: 70 },
+      })
+      .mockResolvedValueOnce({
+        text: 'Result is 3',
+        finishReason: 'stop',
+        usage: { promptTokens: 80, completionTokens: 10, totalTokens: 90 },
+      })
+    const result = await loop.run({
+      model: { generate: mockGenerate } as any,
+      prompt: 'Add 1 + 2',
+    })
+    expect(result.usage).toBeDefined()
+    expect(result.usage!.promptTokens).toBe(130)
+    expect(result.usage!.completionTokens).toBe(30)
+    expect(result.usage!.totalTokens).toBe(160)
+  })
+})
+// ============================================================================
+// cachedTool Tests - Tool result caching with cleanup
+// ============================================================================
+import { cachedTool, type CachedTool } from '../src/tool-orchestration.js'
+describe('cachedTool', () => {
+  describe('basic caching behavior', () => {
+    it('should cache tool results', async () => {
+      let executionCount = 0
+      const countingTool: Tool = {
+        name: 'counting',
+        description: 'Counts executions',
+        parameters: z.object({ key: z.string() }),
+        execute: async ({ key }) => {
+          executionCount++
+          return { key, count: executionCount }
+        },
+      }
+      const cached = cachedTool(countingTool, { ttl: 1000 })
+      // First call executes the tool
+      const result1 = await cached.execute({ key: 'test' })
+      expect(result1).toEqual({ key: 'test', count: 1 })
+      // Second call with same key returns cached result
+      const result2 = await cached.execute({ key: 'test' })
+      expect(result2).toEqual({ key: 'test', count: 1 })
+      expect(executionCount).toBe(1) // Only executed once
+      // Different key executes again
+      const result3 = await cached.execute({ key: 'other' })
+      expect(result3).toEqual({ key: 'other', count: 2 })
+    })
+    it('should expire cached entries after TTL', async () => {
+      vi.useFakeTimers()
+      let executionCount = 0
+      const countingTool: Tool = {
+        name: 'counting',
+        description: 'Counts executions',
+        parameters: z.object({ key: z.string() }),
+        execute: async ({ key }) => {
+          executionCount++
+          return { key, count: executionCount }
+        },
+      }
+      const cached = cachedTool(countingTool, { ttl: 100 })
+      // First call
+      await cached.execute({ key: 'test' })
+      expect(executionCount).toBe(1)
+      // After TTL expires
+      vi.advanceTimersByTime(150)
+      // Should execute again since cache expired
+      await cached.execute({ key: 'test' })
+      expect(executionCount).toBe(2)
+      vi.useRealTimers()
+    })
+  })
+  describe('cache cleanup', () => {
+    it('should periodically clean up expired entries', async () => {
+      vi.useFakeTimers()
+      let executionCount = 0
+      const countingTool: Tool = {
+        name: 'counting',
+        description: 'Counts executions',
+        parameters: z.object({ key: z.string() }),
+        execute: async ({ key }) => {
+          executionCount++
+          return { key, count: executionCount }
+        },
+      }
+      const cached = cachedTool(countingTool, {
+        ttl: 100,
+        cleanupIntervalMs: 50,
+      }) as CachedTool
+      // Execute to populate cache
+      await cached.execute({ key: 'entry1' })
+      await cached.execute({ key: 'entry2' })
+      expect(cached.cacheSize()).toBe(2)
+      // Wait for TTL to expire and cleanup to run
+      vi.advanceTimersByTime(150)
+      // Entries should be cleaned up automatically
+      expect(cached.cacheSize()).toBe(0)
+      // Cleanup timer
+      cached.destroy()
+      vi.useRealTimers()
+    })
+    it('should stop cleanup timer and clear cache when destroyed', async () => {
+      vi.useFakeTimers()
+      const countingTool: Tool = {
+        name: 'counting',
+        description: 'Counts executions',
+        parameters: z.object({ key: z.string() }),
+        execute: async ({ key }) => ({ key }),
+      }
+      const cached = cachedTool(countingTool, {
+        ttl: 100,
+        cleanupIntervalMs: 50,
+      }) as CachedTool
+      await cached.execute({ key: 'entry1' })
+      expect(cached.cacheSize()).toBe(1)
+      // Destroy stops cleanup timer and clears cache to prevent memory leaks
+      cached.destroy()
+      // Cache should be cleared immediately on destroy
+      expect(cached.cacheSize()).toBe(0)
+      // Advancing time should have no effect (timer is stopped)
+      vi.advanceTimersByTime(150)
+      expect(cached.cacheSize()).toBe(0)
+      vi.useRealTimers()
+    })
+    it('should clear all cache entries on clearCache()', async () => {
+      const countingTool: Tool = {
+        name: 'counting',
+        description: 'Counts executions',
+        parameters: z.object({ key: z.string() }),
+        execute: async ({ key }) => ({ key }),
+      }
+      const cached = cachedTool(countingTool, { ttl: 60000 }) as CachedTool
+      await cached.execute({ key: 'entry1' })
+      await cached.execute({ key: 'entry2' })
+      await cached.execute({ key: 'entry3' })
+      expect(cached.cacheSize()).toBe(3)
+      cached.clearCache()
+      expect(cached.cacheSize()).toBe(0)
+      cached.destroy()
+    })
+  })
+  describe('max cache size (LRU eviction)', () => {
+    it('should evict oldest entries when maxSize is reached', async () => {
+      vi.useFakeTimers()
+      let executionCount = 0
+      const countingTool: Tool = {
+        name: 'counting',
+        description: 'Counts executions',
+        parameters: z.object({ key: z.string() }),
+        execute: async ({ key }) => {
+          executionCount++
+          return { key, count: executionCount }
+        },
+      }
+      const cached = cachedTool(countingTool, {
+        ttl: 60000,
+        maxSize: 3,
+      }) as CachedTool
+      // Fill cache to max
+      await cached.execute({ key: 'a' })
+      vi.advanceTimersByTime(10)
+      await cached.execute({ key: 'b' })
+      vi.advanceTimersByTime(10)
+      await cached.execute({ key: 'c' })
+      vi.advanceTimersByTime(10)
+      expect(cached.cacheSize()).toBe(3)
+      expect(executionCount).toBe(3)
+      // Adding 4th entry should evict oldest ('a')
+      await cached.execute({ key: 'd' })
+      expect(cached.cacheSize()).toBe(3)
+      // Accessing 'a' should re-execute since it was evicted
+      await cached.execute({ key: 'a' })
+      expect(executionCount).toBe(5) // New execution
+      cached.destroy()
+      vi.useRealTimers()
+    })
+    it('should update LRU order on cache hit', async () => {
+      vi.useFakeTimers()
+      let executionCount = 0
+      const countingTool: Tool = {
+        name: 'counting',
+        description: 'Counts executions',
+        parameters: z.object({ key: z.string() }),
+        execute: async ({ key }) => {
+          executionCount++
+          return { key, count: executionCount }
+        },
+      }
+      const cached = cachedTool(countingTool, {
+        ttl: 60000,
+        maxSize: 3,
+      }) as CachedTool
+      // Fill cache: a, b, c (oldest to newest)
+      await cached.execute({ key: 'a' })
+      vi.advanceTimersByTime(10)
+      await cached.execute({ key: 'b' })
+      vi.advanceTimersByTime(10)
+      await cached.execute({ key: 'c' })
+      vi.advanceTimersByTime(10)
+      // Access 'a' to make it recently used
+      await cached.execute({ key: 'a' }) // Cache hit
+      vi.advanceTimersByTime(10)
+      expect(executionCount).toBe(3) // No new execution
+      // Add 'd' - should evict 'b' (now oldest) not 'a'
+      await cached.execute({ key: 'd' })
+      // 'b' was evicted, 'a' and 'c' remain
+      await cached.execute({ key: 'b' }) // Should re-execute
+      expect(executionCount).toBe(5)
+      await cached.execute({ key: 'a' }) // Still cached
+      expect(executionCount).toBe(5)
+      cached.destroy()
+      vi.useRealTimers()
+    })
+  })
+  describe('resource cleanup on tool destruction', () => {
+    it('should clean up timers and memory when destroy is called', async () => {
+      const tool: Tool = {
+        name: 'test',
+        description: 'Test tool',
+        parameters: z.object({ key: z.string() }),
+        execute: async ({ key }) => ({ key }),
+      }
+      const cached = cachedTool(tool, {
+        ttl: 1000,
+        cleanupIntervalMs: 100,
+      }) as CachedTool
+      await cached.execute({ key: 'test' })
+      expect(cached.cacheSize()).toBe(1)
+      cached.destroy()
+      expect(cached.cacheSize()).toBe(0)
+      // Should still work after destroy but without caching
+      await cached.execute({ key: 'test2' })
+      expect(cached.cacheSize()).toBe(0)
+    })
+  })
+})