npm - @strav/brain - Versions diffs - 1.0.0-alpha.8 → 1.0.0-alpha.9 - Mend

@strav/brain 1.0.0-alpha.8 → 1.0.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/package.json +2 -2
package/src/agent.ts +59 -0
package/src/agent_result.ts +32 -0
package/src/agent_runner.ts +61 -0
package/src/brain_manager.ts +67 -1
package/src/brain_provider.ts +10 -1
package/src/define_tool.ts +42 -0
package/src/index.ts +20 -6
package/src/provider.ts +26 -0
package/src/providers/anthropic_provider.ts +175 -5
package/src/tool.ts +35 -0
package/src/tool_execution_error.ts +26 -0
package/src/types.ts +34 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@strav/brain",
-  "version": "1.0.0-alpha.8",
+  "version": "1.0.0-alpha.9",
   "description": "Strav AI module — unified Provider interface, BrainManager, threads, prompt caching. Anthropic provider in V1; OpenAI / Gemini / DeepSeek follow.",
   "type": "module",
   "main": "./src/index.ts",
@@ -19,7 +19,7 @@
     "access": "public"
   },
   "dependencies": {
-    "@strav/kernel": "1.0.0-alpha.8",
+    "@strav/kernel": "1.0.0-alpha.9",
     "@anthropic-ai/sdk": "^0.100.0"
   },
   "peerDependencies": {

package/src/agent.ts ADDED Viewed

@@ -0,0 +1,59 @@
+/**
+ * `Agent` — declarative base class for AI agents.
+ *
+ * Apps subclass and set the static-ish properties: which model to
+ * use, what the agent's persona is, which tools it has access to,
+ * and an optional iteration ceiling. The `BrainManager.agent(Class)`
+ * call resolves an instance via the container, builds an
+ * `AgentRunner`, and lets the app stream input + context into it.
+ *
+ * ```ts
+ * @inject()
+ * class ResearchAgent extends Agent {
+ *   override readonly instructions = 'You are a meticulous research assistant.'
+ *   override readonly tools = [searchTool, summarizeTool]
+ *   override readonly tier: ModelTier = 'powerful'
+ * }
+ *
+ * const result = await brain.agent(ResearchAgent)
+ *   .input('What is the current state of bun.sql?')
+ *   .context({ userId: '01ABC...' })
+ *   .run()
+ * ```
+ *
+ * V1 makes the configuration declarative-only — apps that need
+ * runtime knobs (per-request model overrides, dynamic tool sets)
+ * use `BrainManager.runTools(...)` directly. Adding per-instance
+ * overrides on the Agent class is a future ergonomic slice.
+ */
+import type { ModelTier } from './types.ts'
+import type { Tool } from './tool.ts'
+export abstract class Agent {
+  /** System prompt — the persona / instructions Claude sees on every turn. */
+  abstract readonly instructions: string
+  /** Tools the agent can call. Empty array → the model answers without tools. */
+  readonly tools: readonly Tool[] = []
+  /** Override the configured default provider. Default = brain's default provider. */
+  readonly provider?: string
+  /** Explicit model ID. Wins over `tier`. */
+  readonly model?: string
+  /** Tier sugar. Default `'powerful'` for agentic work. */
+  readonly tier: ModelTier = 'powerful'
+  /**
+   * Safety ceiling on the agentic loop. Default `10`. Hitting it
+   * returns a result with `stopReason: 'max_iterations'`; the loop
+   * doesn't throw because partial progress (assistant messages, tool
+   * results) is usually still useful to surface.
+   */
+  readonly maxIterations: number = 10
+  /** Hard cap on per-call response tokens. Default `4096`. */
+  readonly maxTokens: number = 4096
+}

package/src/agent_result.ts ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * `AgentResult` — what an agentic loop returns when it ends. Combines
+ * the final assistant `text`, the full message history (including
+ * tool calls + results so apps can render the trace), the total
+ * iteration count (how many tool-use round-trips the loop made),
+ * and aggregated token usage across every model call inside the
+ * loop.
+ *
+ * `stopReason` is the provider's terminal stop reason (typically
+ * `'end_turn'`). When the loop exits because it hit `maxIterations`,
+ * `stopReason` is `'max_iterations'` — distinct from the provider
+ * value so apps can detect "the model would have kept going."
+ */
+import type { ChatUsage, Message } from './types.ts'
+export interface AgentResult {
+  /** Concatenated text from the final assistant turn. */
+  text: string
+  /** Full message history of the loop, including tool_use / tool_result blocks. */
+  messages: Message[]
+  /** Number of tool-use rounds. `0` when the model answered without tools. */
+  iterations: number
+  /**
+   * Terminal stop reason. Either the provider's stop_reason (typically
+   * `'end_turn'`) or the framework-specific `'max_iterations'` when
+   * the loop hit its iteration ceiling.
+   */
+  stopReason: string
+  /** Token usage summed across every model call in the loop. */
+  usage: ChatUsage
+}

package/src/agent_runner.ts ADDED Viewed

@@ -0,0 +1,61 @@
+/**
+ * `AgentRunner` — fluent builder returned by `BrainManager.agent(Class)`.
+ *
+ * Carries the agent instance + an input message + an optional
+ * per-run context bag. `run()` translates the agent's declarative
+ * configuration into a `runWithTools` call and returns the
+ * `AgentResult`.
+ *
+ * Designed to chain: `brain.agent(R).input(text).context({...}).run()`.
+ * Apps that need the full Message-array surface bypass the runner
+ * and call `BrainManager.runTools(messages, tools, options)` directly.
+ */
+import type { Agent } from './agent.ts'
+import type { AgentResult } from './agent_result.ts'
+import type { BrainManager } from './brain_manager.ts'
+import type { ChatOptions, Message } from './types.ts'
+export class AgentRunner {
+  private prompt: string | undefined
+  private contextBag: Record<string, unknown> = {}
+  constructor(
+    private readonly brain: BrainManager,
+    private readonly agent: Agent,
+  ) {}
+  /** Set the user input. Required before `run()`. */
+  input(text: string): this {
+    this.prompt = text
+    return this
+  }
+  /**
+   * Attach context that every tool's `execute(input, ctx)` will see
+   * on `ctx.context`. Useful for per-request data the agent's tools
+   * need but the model shouldn't see directly (auth identity,
+   * tenant id, request-id for tracing).
+   */
+  context(data: Record<string, unknown>): this {
+    this.contextBag = { ...this.contextBag, ...data }
+    return this
+  }
+  async run(): Promise<AgentResult> {
+    if (this.prompt === undefined) {
+      throw new Error('AgentRunner.run: input() must be called before run().')
+    }
+    const messages: Message[] = [{ role: 'user', content: this.prompt }]
+    const options: ChatOptions & { maxIterations?: number; context?: Record<string, unknown> } = {
+      tier: this.agent.tier,
+      maxTokens: this.agent.maxTokens,
+      system: this.agent.instructions,
+      maxIterations: this.agent.maxIterations,
+      context: this.contextBag,
+    }
+    if (this.agent.model !== undefined) options.model = this.agent.model
+    if (this.agent.provider !== undefined) options.provider = this.agent.provider
+    return this.brain.runTools(messages, this.agent.tools, options)
+  }
+}

package/src/brain_manager.ts CHANGED Viewed

@@ -19,6 +19,9 @@
  * ```
  */
+import type { Agent } from './agent.ts'
+import type { AgentResult } from './agent_result.ts'
+import { AgentRunner } from './agent_runner.ts'
 import { BrainError } from './brain_error.ts'
 import type { ModelTier } from './types.ts'
 import type {
@@ -27,9 +30,13 @@ import type {
   Message,
   StreamEvent,
 } from './types.ts'
-import type { Provider } from './provider.ts'
+import type { Provider, RunWithToolsOptions } from './provider.ts'
+import type { Tool } from './tool.ts'
 import { DEFAULT_TIERS } from './brain_config.ts'
+/** Container-aware Agent constructor resolver — `BrainProvider` installs one wired to `app.resolve(...)`. */
+export type AgentResolver = <A extends Agent>(cls: new (...args: never[]) => A) => A
 export interface BrainManagerOptions {
   /** Name of the default provider — must exist in `providers`. */
   default: string
@@ -117,8 +124,67 @@ export class BrainManager {
     return provider.countTokens(messages, resolved)
   }
+  /**
+   * Run an agentic loop: send `messages` + `tools` to the model;
+   * execute any tool the model calls; loop until the model returns
+   * a terminal `stop_reason` (`'end_turn'`) or `maxIterations` is hit.
+   *
+   * Throws `BrainError` when the configured provider doesn't
+   * implement `runWithTools` (V1: OpenAI / Gemini / DeepSeek providers
+   * don't yet — only `AnthropicProvider`).
+   */
+  async runTools(
+    input: string | readonly Message[],
+    tools: readonly Tool[],
+    options: RunWithToolsOptions = {},
+  ): Promise<AgentResult> {
+    const provider = this.provider(options.provider)
+    if (!provider.runWithTools) {
+      throw new BrainError(
+        `BrainManager.runTools: provider "${provider.name}" does not implement runWithTools. Use a provider that supports tool use (V1: Anthropic).`,
+        { context: { provider: provider.name } },
+      )
+    }
+    const messages = normalizeInput(input)
+    const resolved = this.applyDefaults(options) as RunWithToolsOptions
+    return provider.runWithTools(messages, tools, resolved)
+  }
+  /**
+   * Resolve an `Agent` subclass from the container and return an
+   * `AgentRunner` ready to receive `input(...)` and `run()`. Apps
+   * `@inject()`-decorate their Agent subclass so constructor
+   * injection of dependencies (Repositories, services, etc.) flows
+   * through normally.
+   */
+  agent<A extends Agent>(AgentClass: new (...args: never[]) => A, instance?: A): AgentRunner {
+    const agent = instance ?? this.resolveAgent(AgentClass)
+    return new AgentRunner(this, agent)
+  }
   // ─── Internal ────────────────────────────────────────────────────────────
+  private resolveAgent<A extends Agent>(AgentClass: new (...args: never[]) => A): A {
+    if (this.agentResolver) return this.agentResolver(AgentClass)
+    // Fallback: assume the Agent class is constructible without args.
+    // Apps that need DI on the agent register a resolver via
+    // `setAgentResolver` (BrainProvider wires this to the container).
+    return new (AgentClass as unknown as new () => A)()
+  }
+  /**
+   * Internal — `BrainProvider` calls this at boot to plug in the
+   * container's resolution function so `brain.agent(MyAgent)` runs
+   * `app.resolve(MyAgent)` under the hood. Apps that build a
+   * `BrainManager` by hand for tests can leave this unset and pass
+   * a pre-constructed agent to `brain.agent(_, instance)`.
+   */
+  setAgentResolver(resolver: AgentResolver): void {
+    this.agentResolver = resolver
+  }
+  private agentResolver: AgentResolver | undefined
   private applyDefaults(options: ChatOptions): ChatOptions {
     const resolved: ChatOptions = { ...options }
     if (resolved.model === undefined && resolved.tier !== undefined) {

package/src/brain_provider.ts CHANGED Viewed

@@ -64,7 +64,16 @@ export class BrainProvider extends ServiceProvider {
       }
       if (config.tiers !== undefined) options.tiers = config.tiers
       if (config.cache?.auto !== undefined) options.defaultCache = config.cache.auto
-      return new BrainManager(options)
+      const manager = new BrainManager(options)
+      // Plug in the container so `brain.agent(MyAgent)` resolves
+      // its constructor deps through `@inject()` like every other
+      // injected class. The variance widening at the boundary
+      // (`never[]` ↔ `any[]`) is purely a TS typing artifact — the
+      // container call is identical to a direct `c.resolve(MyAgent)`.
+      manager.setAgentResolver(<A>(cls: new (...args: never[]) => A) =>
+        c.resolve(cls as unknown as new (...args: unknown[]) => A),
+      )
+      return manager
     })
   }

package/src/define_tool.ts ADDED Viewed

@@ -0,0 +1,42 @@
+/**
+ * `defineTool({ name, description, inputSchema, execute })` — typed
+ * factory mirroring `defineWorkflow` / `defineMachine` / `defineDurable`.
+ *
+ * ```ts
+ * const getWeather = defineTool({
+ *   name: 'get_weather',
+ *   description: 'Get current weather for a location.',
+ *   inputSchema: {
+ *     type: 'object',
+ *     properties: { city: { type: 'string' } },
+ *     required: ['city'],
+ *   },
+ *   execute: async ({ city }: { city: string }, ctx) => {
+ *     return weatherService.lookup(city, ctx.context.userId as string)
+ *   },
+ * })
+ * ```
+ *
+ * The generic parameters are usually inferred from `execute`'s first
+ * arg + return type; apps that want explicit typing pass them.
+ */
+import type { Tool, ToolContext } from './tool.ts'
+export interface DefineToolSpec<TInput, TOutput> {
+  name: string
+  description: string
+  inputSchema: Record<string, unknown>
+  execute(input: TInput, ctx: ToolContext): Promise<TOutput>
+}
+export function defineTool<TInput = unknown, TOutput = unknown>(
+  spec: DefineToolSpec<TInput, TOutput>,
+): Tool<TInput, TOutput> {
+  return {
+    name: spec.name,
+    description: spec.description,
+    inputSchema: spec.inputSchema,
+    execute: spec.execute,
+  }
+}

package/src/index.ts CHANGED Viewed

@@ -1,10 +1,15 @@
 // Public API of @strav/brain.
 //
-// Foundation slice: Provider interface + AnthropicProvider, BrainManager,
-// Thread, BrainProvider service-wiring, prompt caching. Tools / agents /
-// MCP / embeddings / other providers (OpenAI/Google/DeepSeek) land in
-// follow-up slices.
+// V1: Provider interface + AnthropicProvider, BrainManager, Thread,
+// BrainProvider service-wiring, prompt caching.
+// V2 (this slice): tools + agents — defineTool, Agent base + AgentRunner,
+// BrainManager.runTools / .agent(Class), Provider.runWithTools.
+// Still deferred: MCP, embeddings, streaming agent loops, server-side
+// tools, structured outputs, other providers.
+export { Agent } from './agent.ts'
+export type { AgentResult } from './agent_result.ts'
+export { AgentRunner } from './agent_runner.ts'
 export {
   type AnthropicProviderConfig,
   type BrainCacheConfig,
@@ -14,11 +19,18 @@ export {
   type ProviderConfig,
 } from './brain_config.ts'
 export { BrainError } from './brain_error.ts'
-export { BrainManager, type BrainManagerOptions } from './brain_manager.ts'
+export {
+  type AgentResolver,
+  BrainManager,
+  type BrainManagerOptions,
+} from './brain_manager.ts'
 export { BrainProvider } from './brain_provider.ts'
+export { defineTool, type DefineToolSpec } from './define_tool.ts'
 export { AnthropicProvider } from './providers/anthropic_provider.ts'
-export type { Provider } from './provider.ts'
+export type { Provider, RunWithToolsOptions } from './provider.ts'
 export { Thread, type ThreadOptions, type ThreadState } from './thread.ts'
+export type { Tool, ToolContext } from './tool.ts'
+export { ToolExecutionError } from './tool_execution_error.ts'
 export type {
   ChatOptions,
   ChatResult,
@@ -29,4 +41,6 @@ export type {
   StreamEvent,
   SystemPrompt,
   TextBlock,
+  ToolResultBlock,
+  ToolUseBlock,
 } from './types.ts'

package/src/provider.ts CHANGED Viewed

@@ -12,6 +12,8 @@
  * subclassing.
  */
+import type { AgentResult } from './agent_result.ts'
+import type { Tool } from './tool.ts'
 import type {
   ChatOptions,
   ChatResult,
@@ -19,6 +21,13 @@ import type {
   StreamEvent,
 } from './types.ts'
+export interface RunWithToolsOptions extends ChatOptions {
+  /** Safety ceiling on tool-use round-trips. Default `10`. */
+  maxIterations?: number
+  /** Free-form context bag passed to every tool's `execute(input, ctx)`. */
+  context?: Record<string, unknown>
+}
 export interface Provider {
   /** Identifier — matches the `config.brain.providers` key. */
   readonly name: string
@@ -45,4 +54,21 @@ export interface Provider {
    * implementation may approximate.
    */
   countTokens?(messages: readonly Message[], options?: ChatOptions): Promise<number>
+  /**
+   * Agentic loop. Sends the `messages` + `tools` to the model;
+   * detects tool-use blocks in the response; runs the matching
+   * tool's `execute`; appends the result and re-asks. Loops until
+   * the model returns `stop_reason: 'end_turn'` (or its
+   * provider-specific equivalent) or `maxIterations` is hit.
+   *
+   * Optional on the interface so providers that don't (yet) support
+   * tool use can omit it; `BrainManager.runTools` throws a
+   * `BrainError` when the configured provider lacks the method.
+   */
+  runWithTools?(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    options?: RunWithToolsOptions,
+  ): Promise<AgentResult>
 }

package/src/providers/anthropic_provider.ts CHANGED Viewed

@@ -24,16 +24,23 @@
  */
 import Anthropic from '@anthropic-ai/sdk'
+import type { AgentResult } from '../agent_result.ts'
 import type { AnthropicProviderConfig } from '../brain_config.ts'
 import { DEFAULT_MODEL } from '../brain_config.ts'
-import type { Provider } from '../provider.ts'
+import type { Provider, RunWithToolsOptions } from '../provider.ts'
+import type { Tool } from '../tool.ts'
+import { ToolExecutionError } from '../tool_execution_error.ts'
 import type {
   ChatOptions,
   ChatResult,
   ChatUsage,
+  ContentBlock,
   Message,
   StreamEvent,
   SystemPrompt,
+  TextBlock,
+  ToolResultBlock,
+  ToolUseBlock,
 } from '../types.ts'
 const EPHEMERAL_CACHE = { type: 'ephemeral' } as const
@@ -109,6 +116,110 @@ export class AnthropicProvider implements Provider {
     return result.input_tokens
   }
+  /**
+   * Agentic loop. Send → detect tool_use blocks → execute → append
+   * tool_result → re-send, until the model returns `end_turn` or
+   * the iteration ceiling is hit.
+   *
+   * Tools are passed once on every call — Anthropic doesn't carry
+   * tool state across requests; the model rediscovers them from the
+   * `tools` array each turn. Apps that care about cache hits keep
+   * the tool list stable across runs.
+   */
+  async runWithTools(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    options: RunWithToolsOptions = {},
+  ): Promise<AgentResult> {
+    const maxIterations = options.maxIterations ?? 10
+    const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
+    const workingMessages: Message[] = [...messages]
+    const aggregated: ChatUsage = {
+      inputTokens: 0,
+      outputTokens: 0,
+      cacheReadTokens: 0,
+      cacheCreationTokens: 0,
+    }
+    let iterations = 0
+    let lastStopReason: string | null = null
+    while (true) {
+      const params = this.buildParams(workingMessages, options)
+      params.tools = tools.map((t) => ({
+        name: t.name,
+        description: t.description,
+        input_schema: t.inputSchema as Anthropic.Tool.InputSchema,
+      }))
+      const response = await this.client.messages.create(params)
+      addUsage(aggregated, response.usage)
+      lastStopReason = response.stop_reason ?? null
+      // Append the assistant turn verbatim from the SDK shape so
+      // tool_use blocks survive to the next request unchanged.
+      workingMessages.push({
+        role: 'assistant',
+        content: fromAnthropicContent(response.content),
+      })
+      if (response.stop_reason !== 'tool_use') {
+        return {
+          text: collectText(response.content),
+          messages: workingMessages,
+          iterations,
+          stopReason: lastStopReason ?? 'end_turn',
+          usage: aggregated,
+        }
+      }
+      // Execute every tool_use block in the response and append the
+      // results in a single user-role turn. The SDK's API expects all
+      // tool_result blocks for a given assistant turn to land in the
+      // same user message.
+      const toolUseBlocks = response.content.filter(
+        (b): b is Anthropic.ToolUseBlock => b.type === 'tool_use',
+      )
+      const resultBlocks: ContentBlock[] = []
+      for (const block of toolUseBlocks) {
+        const tool = toolMap.get(block.name)
+        if (!tool) {
+          throw new ToolExecutionError(
+            block.name,
+            block.id,
+            new Error(`Tool "${block.name}" is not registered.`),
+          )
+        }
+        let output: unknown
+        try {
+          output = await tool.execute(block.input, {
+            callId: block.id,
+            context: options.context ?? {},
+          })
+        } catch (cause) {
+          throw new ToolExecutionError(block.name, block.id, cause)
+        }
+        const resultBlock: ToolResultBlock = {
+          type: 'tool_result',
+          toolUseId: block.id,
+          content: typeof output === 'string' ? output : JSON.stringify(output),
+        }
+        resultBlocks.push(resultBlock)
+      }
+      workingMessages.push({ role: 'user', content: resultBlocks })
+      iterations++
+      if (iterations >= maxIterations) {
+        return {
+          text: collectText(response.content),
+          messages: workingMessages,
+          iterations,
+          stopReason: 'max_iterations',
+          usage: aggregated,
+        }
+      }
+    }
+  }
   // ─── Param translation ──────────────────────────────────────────────────
   private buildParams(
@@ -181,10 +292,30 @@ function toMessageParam(message: Message): Anthropic.MessageParam {
   }
   return {
     role: message.role,
-    content: message.content.map((block) => {
-      const param: Anthropic.TextBlockParam = { type: 'text', text: block.text }
-      if (block.cache) param.cache_control = EPHEMERAL_CACHE
-      return param
+    content: message.content.map((block): Anthropic.ContentBlockParam => {
+      if (block.type === 'tool_use') {
+        return {
+          type: 'tool_use',
+          id: block.id,
+          name: block.name,
+          input: block.input as Record<string, unknown>,
+        }
+      }
+      if (block.type === 'tool_result') {
+        const param: Anthropic.ToolResultBlockParam = {
+          type: 'tool_result',
+          tool_use_id: block.toolUseId,
+          content:
+            typeof block.content === 'string'
+              ? block.content
+              : block.content.map((b) => ({ type: 'text', text: b.text }) as Anthropic.TextBlockParam),
+        }
+        if (block.isError) param.is_error = true
+        return param
+      }
+      const text: Anthropic.TextBlockParam = { type: 'text', text: block.text }
+      if (block.cache) text.cache_control = EPHEMERAL_CACHE
+      return text
     }),
   }
 }
@@ -225,3 +356,42 @@ function mergeBetas(
   }
   return out
 }
+function addUsage(acc: ChatUsage, u: Anthropic.Usage): void {
+  acc.inputTokens += u.input_tokens
+  acc.outputTokens += u.output_tokens
+  acc.cacheReadTokens += u.cache_read_input_tokens ?? 0
+  acc.cacheCreationTokens += u.cache_creation_input_tokens ?? 0
+}
+function collectText(content: Anthropic.ContentBlock[]): string {
+  return content
+    .filter((b): b is Anthropic.TextBlock => b.type === 'text')
+    .map((b) => b.text)
+    .join('')
+}
+/**
+ * Translate the SDK's response content blocks back into framework
+ * `ContentBlock`s for storage in `workingMessages`. We preserve
+ * `text` and `tool_use` blocks verbatim; other server-side block
+ * types (thinking, server tool blocks) are dropped — V1 doesn't
+ * surface them, and re-sending them as part of the assistant turn
+ * could confuse the model.
+ */
+function fromAnthropicContent(content: Anthropic.ContentBlock[]): ContentBlock[] {
+  const out: ContentBlock[] = []
+  for (const block of content) {
+    if (block.type === 'text') {
+      out.push({ type: 'text', text: block.text } satisfies TextBlock)
+    } else if (block.type === 'tool_use') {
+      out.push({
+        type: 'tool_use',
+        id: block.id,
+        name: block.name,
+        input: block.input,
+      } satisfies ToolUseBlock)
+    }
+  }
+  return out
+}

package/src/tool.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/**
+ * `Tool` — the framework-native shape every tool implementation
+ * conforms to. Providers translate the `name`, `description`, and
+ * `inputSchema` into their vendor's tool-definition wire format;
+ * `execute` runs in-process on the framework side when the model
+ * calls the tool.
+ *
+ * `inputSchema` is plain JSON Schema (draft 2020-12 compatible).
+ * Apps that prefer Zod use the SDK's helpers and feed the resulting
+ * JSON Schema into `defineTool`; the framework deliberately doesn't
+ * couple to Zod so apps stay free to bring whatever schema library
+ * they want.
+ *
+ * Generics: `TInput` is what `execute` receives (after the model's
+ * raw input has been narrowed by validation at the call site, when
+ * apps choose to validate). `TOutput` is what the agentic loop
+ * appends as the `tool_result.content`. Both default to `unknown`
+ * for apps that don't want the cognitive overhead of typing tools.
+ */
+export interface ToolContext {
+  /** Provider-assigned call id — matches `ToolUseBlock.id`. */
+  readonly callId: string
+  /** Per-run free-form context bag passed by the caller. Optional. */
+  readonly context: Readonly<Record<string, unknown>>
+}
+export interface Tool<TInput = unknown, TOutput = unknown> {
+  name: string
+  description: string
+  /** JSON Schema for the tool's input. Providers translate this into their wire format. */
+  inputSchema: Record<string, unknown>
+  /** In-process executor. Throws propagate as `ToolExecutionError` through the runner. */
+  execute(input: TInput, ctx: ToolContext): Promise<TOutput>
+}

package/src/tool_execution_error.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * `ToolExecutionError` — wrapper thrown by the agentic loop when a
+ * tool's `execute` function throws. Carries the tool name + the
+ * provider's call id on `context` so apps building error reporters /
+ * traces can correlate failures with model output without parsing
+ * stack frames.
+ *
+ * V1 propagates these out of `runWithTools` — the loop aborts on the
+ * first tool failure. A later slice may add a graceful path
+ * (`{ type: 'tool_result', isError: true }` is appended and the
+ * loop continues) but apps that need that today can catch the
+ * error, append the result themselves, and re-call the runner.
+ */
+import { StravError } from '@strav/kernel'
+export class ToolExecutionError extends StravError {
+  constructor(toolName: string, callId: string, cause: unknown) {
+    const message = cause instanceof Error ? cause.message : String(cause)
+    super(
+      `Tool "${toolName}" execution failed: ${message}`,
+      { code: 'brain.tool-execution-failed', status: 500 },
+      { context: { tool: toolName, callId }, cause },
+    )
+  }
+}

package/src/types.ts CHANGED Viewed

@@ -38,7 +38,40 @@ export interface TextBlock {
   cache?: boolean
 }
-export type ContentBlock = TextBlock
+/**
+ * Provider-emitted tool-use block. Appears in `assistant`-role
+ * messages when the model decides to call a tool. `input` is the
+ * parsed JSON the model produced for the tool's `inputSchema`; apps
+ * that need to validate it (Zod, ajv, etc.) do so at the call site.
+ *
+ * The agentic loop creates a matching `ToolResultBlock` and appends
+ * it to the next `user`-role message before re-asking the model.
+ */
+export interface ToolUseBlock {
+  type: 'tool_use'
+  /** Provider-assigned call id. The matching tool_result references this verbatim. */
+  id: string
+  /** Tool name — matches a registered `Tool.name`. */
+  name: string
+  /** Parsed input the model produced. Apps validate against the tool's schema. */
+  input: unknown
+}
+/**
+ * Result of executing a tool. Appended to a `user`-role message and
+ * fed back to the model. `content` is either a plain string (the
+ * common case) or a list of text blocks for richer payloads. Mark
+ * `isError: true` so the model knows the tool call failed and can
+ * adjust its approach.
+ */
+export interface ToolResultBlock {
+  type: 'tool_result'
+  toolUseId: string
+  content: string | TextBlock[]
+  isError?: boolean
+}
+export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock
 /** A single conversation turn. `content` can be a bare string or a typed block list. */
 export interface Message {