npm - @strav/brain - Versions diffs - 1.0.0-alpha.15 → 1.0.0-alpha.17 - Mend

@strav/brain 1.0.0-alpha.15 → 1.0.0-alpha.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/package.json +2 -2
package/src/agent.ts +34 -5
package/src/agent_generate_result.ts +30 -0
package/src/agent_runner.ts +140 -14
package/src/agent_stream_event.ts +100 -0
package/src/brain_config.ts +91 -1
package/src/brain_manager.ts +168 -4
package/src/brain_provider.ts +25 -1
package/src/index.ts +19 -1
package/src/mcp/client.ts +82 -13
package/src/mcp/index.ts +6 -0
package/src/mcp/oauth.ts +227 -0
package/src/mcp/resolve_mcp_tools.ts +6 -2
package/src/mcp_server.ts +16 -0
package/src/provider.ts +109 -0
package/src/providers/anthropic_provider.ts +596 -28
package/src/providers/deepseek_provider.ts +117 -0
package/src/providers/gemini_provider.ts +590 -21
package/src/providers/ollama_provider.ts +86 -0
package/src/providers/openai_compat_provider.ts +187 -0
package/src/providers/openai_provider.ts +735 -32
package/src/providers/openai_responses_provider.ts +700 -0
package/src/tool.ts +7 -0
package/src/tool_runner.ts +81 -0
package/src/types.ts +233 -0

package/src/providers/openai_provider.ts CHANGED Viewed

@@ -52,26 +52,37 @@ import type { AgentResult } from '../agent_result.ts'
 import { BrainError } from '../brain_error.ts'
 import type { OpenAIProviderConfig } from '../brain_config.ts'
 import type { MCPServer } from '../mcp_server.ts'
+import type { AgentGenerateResult } from '../agent_generate_result.ts'
+import type { AgentStreamEvent } from '../agent_stream_event.ts'
 import { resolveMcpTools, type ResolveMcpToolsOptions } from '../mcp/resolve_mcp_tools.ts'
 import { parseGenerated, type OutputSchema } from '../output_schema.ts'
+import { recoverOrThrow, runToolWithRecovery } from '../tool_runner.ts'
 import type { Provider, RunWithToolsOptions } from '../provider.ts'
 import type { Tool } from '../tool.ts'
 import { ToolExecutionError } from '../tool_execution_error.ts'
 import type {
+  AudioSource,
   ChatOptions,
   ChatResult,
   ChatUsage,
   ContentBlock,
+  EmbedOptions,
+  EmbedResult,
   GenerateResult,
+  ImageBlock,
   Message,
   StreamEvent,
   SystemPrompt,
   TextBlock,
   ToolResultBlock,
   ToolUseBlock,
+  TranscribeOptions,
+  TranscribeResult,
 } from '../types.ts'
 const DEFAULT_OPENAI_MODEL = 'gpt-5'
+const DEFAULT_OPENAI_EMBED_MODEL = 'text-embedding-3-small'
+const DEFAULT_OPENAI_TRANSCRIBE_MODEL = 'whisper-1'
 export interface OpenAIProviderOptions {
   client?: OpenAI
@@ -85,10 +96,18 @@ export interface OpenAIProviderOptions {
 export class OpenAIProvider implements Provider {
   readonly name: string
-  private readonly client: OpenAI
-  private readonly defaultModel: string
-  private readonly defaultMaxTokens: number
-  private readonly mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
+  // Protected (rather than private) so OpenAI-compatible drivers
+  // can subclass — see `DeepSeekProvider`. Apps that want to plug
+  // in Groq / Together / Fireworks follow the same pattern: extend,
+  // override the constructor's base URL + default model, optionally
+  // override `buildParams` to suppress fields the upstream API
+  // doesn't accept.
+  protected readonly client: OpenAI
+  protected readonly defaultModel: string
+  protected readonly defaultMaxTokens: number
+  protected readonly defaultEmbedModel: string
+  protected readonly defaultTranscribeModel: string
+  protected readonly mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
   constructor(
     name: string,
@@ -98,6 +117,8 @@ export class OpenAIProvider implements Provider {
     this.name = name
     this.defaultModel = config.defaultModel ?? DEFAULT_OPENAI_MODEL
     this.defaultMaxTokens = config.defaultMaxTokens ?? 4096
+    this.defaultEmbedModel = config.defaultEmbedModel ?? DEFAULT_OPENAI_EMBED_MODEL
+    this.defaultTranscribeModel = config.defaultTranscribeModel ?? DEFAULT_OPENAI_TRANSCRIBE_MODEL
     this.mcpClientFactory = options.mcpClientFactory
     this.client =
       options.client ??
@@ -110,7 +131,7 @@ export class OpenAIProvider implements Provider {
   async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
     const params = this.buildParams(messages, options, [])
-    const response = await this.client.chat.completions.create(params)
+    const response = await this.client.chat.completions.create(params, reqOpts(options))
     return this.toChatResult(response)
   }
@@ -123,7 +144,7 @@ export class OpenAIProvider implements Provider {
       stream: true,
       stream_options: { include_usage: true },
     }
-    const stream = await this.client.chat.completions.create(params)
+    const stream = await this.client.chat.completions.create(params, reqOpts(options))
     let aggregatedUsage: OpenAI.CompletionUsage | undefined
     let finishReason: string | null = null
     for await (const chunk of stream) {
@@ -179,8 +200,9 @@ export class OpenAIProvider implements Provider {
     let iterations = 0
     while (true) {
+      checkAborted(options.signal)
       const params = this.buildParams(workingMessages, options, tools)
-      const response = await this.client.chat.completions.create(params)
+      const response = await this.client.chat.completions.create(params, reqOpts(options))
       addUsage(aggregated, response.usage)
       const choice = response.choices[0]
@@ -210,52 +232,639 @@ export class OpenAIProvider implements Provider {
       const resultBlocks: ContentBlock[] = []
       for (const call of toolCalls) {
         if (call.type !== 'function') continue
-        const tool = toolMap.get(call.function.name)
-        if (!tool) {
-          throw new ToolExecutionError(
+        let parsedInput: unknown
+        let parseFailed: { content: string; isError: boolean } | undefined
+        try {
+          parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
+        } catch (err) {
+          parseFailed = recoverOrThrow(
+            new ToolExecutionError(
+              call.function.name,
+              call.id,
+              new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
+            ),
+            options,
+          )
+        }
+        const { content, isError } = parseFailed
+          ?? (await runToolWithRecovery(
+            toolMap.get(call.function.name),
             call.function.name,
             call.id,
-            new Error(`Tool "${call.function.name}" is not registered.`),
-          )
+            parsedInput,
+            options,
+          ))
+        resultBlocks.push({
+          type: 'tool_result',
+          toolUseId: call.id,
+          content,
+          ...(isError ? { isError: true } : {}),
+        } satisfies ToolResultBlock)
+      }
+      workingMessages.push({ role: 'user', content: resultBlocks })
+      iterations++
+      if (iterations >= maxIterations) {
+        return {
+          text: assistantMessage.content ?? '',
+          messages: workingMessages,
+          iterations,
+          stopReason: 'max_iterations',
+          usage: aggregated,
+        }
+      }
+    }
+  }
+  async runWithToolsAndSchema<T>(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    schema: OutputSchema<T>,
+    options: RunWithToolsOptions = {},
+  ): Promise<AgentGenerateResult<T>> {
+    const mcpServers: readonly MCPServer[] = options.mcpServers ?? []
+    const resolved =
+      mcpServers.length > 0
+        ? await resolveMcpTools(mcpServers, {
+            ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
+          })
+        : { tools: [] as Tool[], close: async () => {} }
+    try {
+      return await this._runLoopWithSchema([...tools, ...resolved.tools], messages, schema, options)
+    } finally {
+      await resolved.close()
+    }
+  }
+  private async _runLoopWithSchema<T>(
+    tools: readonly Tool[],
+    messages: readonly Message[],
+    schema: OutputSchema<T>,
+    options: RunWithToolsOptions,
+  ): Promise<AgentGenerateResult<T>> {
+    const maxIterations = options.maxIterations ?? 10
+    const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
+    const workingMessages: Message[] = [...messages]
+    const aggregated: ChatUsage = {
+      inputTokens: 0,
+      outputTokens: 0,
+      cacheReadTokens: 0,
+      cacheCreationTokens: 0,
+    }
+    let iterations = 0
+    while (true) {
+      const params = this.buildParams(workingMessages, options, tools)
+      params.response_format = {
+        type: 'json_schema',
+        json_schema: {
+          name: schema.name,
+          ...(schema.description !== undefined ? { description: schema.description } : {}),
+          schema: schema.jsonSchema,
+          strict: true,
+        },
+      }
+      const response = await this.client.chat.completions.create(params, reqOpts(options))
+      addUsage(aggregated, response.usage)
+      const choice = response.choices[0]
+      if (!choice) {
+        throw new BrainError('OpenAIProvider: response had no choices.')
+      }
+      const assistantMessage = choice.message
+      workingMessages.push({
+        role: 'assistant',
+        content: fromOpenAIAssistantMessage(assistantMessage),
+      })
+      const toolCalls = assistantMessage.tool_calls ?? []
+      if (toolCalls.length === 0 || choice.finish_reason !== 'tool_calls') {
+        const text = assistantMessage.content ?? ''
+        return {
+          value: parseGenerated(text, schema),
+          text,
+          messages: workingMessages,
+          iterations,
+          stopReason: choice.finish_reason ?? 'stop',
+          usage: aggregated,
         }
+      }
+      const resultBlocks: ContentBlock[] = []
+      for (const call of toolCalls) {
+        if (call.type !== 'function') continue
         let parsedInput: unknown
+        let parseFailed: { content: string; isError: boolean } | undefined
         try {
           parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
         } catch (err) {
-          throw new ToolExecutionError(
+          parseFailed = recoverOrThrow(
+            new ToolExecutionError(
+              call.function.name,
+              call.id,
+              new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
+            ),
+            options,
+          )
+        }
+        const { content, isError } = parseFailed
+          ?? (await runToolWithRecovery(
+            toolMap.get(call.function.name),
             call.function.name,
             call.id,
-            new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
-          )
+            parsedInput,
+            options,
+          ))
+        resultBlocks.push({
+          type: 'tool_result',
+          toolUseId: call.id,
+          content,
+          ...(isError ? { isError: true } : {}),
+        } satisfies ToolResultBlock)
+      }
+      workingMessages.push({ role: 'user', content: resultBlocks })
+      iterations++
+      if (iterations >= maxIterations) {
+        const text = assistantMessage.content ?? ''
+        return {
+          value: parseGenerated(text, schema),
+          text,
+          messages: workingMessages,
+          iterations,
+          stopReason: 'max_iterations',
+          usage: aggregated,
         }
-        let output: unknown
-        try {
-          output = await tool.execute(parsedInput, {
-            callId: call.id,
-            context: options.context ?? {},
+      }
+    }
+  }
+  async *streamWithTools(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    options: RunWithToolsOptions = {},
+  ): AsyncIterable<AgentStreamEvent> {
+    const mcpServers: readonly MCPServer[] = options.mcpServers ?? []
+    const resolved =
+      mcpServers.length > 0
+        ? await resolveMcpTools(mcpServers, {
+            ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
           })
-        } catch (cause) {
-          throw new ToolExecutionError(call.function.name, call.id, cause)
+        : { tools: [] as Tool[], close: async () => {} }
+    try {
+      yield* this._streamLoop(messages, [...tools, ...resolved.tools], options)
+    } finally {
+      await resolved.close()
+    }
+  }
+  private async *_streamLoop(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    options: RunWithToolsOptions,
+  ): AsyncIterable<AgentStreamEvent> {
+    const maxIterations = options.maxIterations ?? 10
+    const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
+    const workingMessages: Message[] = [...messages]
+    const aggregated: ChatUsage = {
+      inputTokens: 0,
+      outputTokens: 0,
+      cacheReadTokens: 0,
+      cacheCreationTokens: 0,
+    }
+    let iterations = 0
+    while (true) {
+      checkAborted(options.signal)
+      yield { type: 'iteration_start', iteration: iterations }
+      const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
+        ...this.buildParams(workingMessages, options, tools),
+        stream: true,
+        stream_options: { include_usage: true },
+      }
+      const stream = await this.client.chat.completions.create(params, reqOpts(options))
+      let textBuf = ''
+      // Tracks: per index, the running entry; and whether
+      // `tool_use_start` has already been emitted (we emit once the
+      // first chunk brings the id + name).
+      const toolCallsByIndex: Map<
+        number,
+        { id?: string; name?: string; args: string; started: boolean }
+      > = new Map()
+      let finishReason: string | null = null
+      let lastUsage: OpenAI.CompletionUsage | undefined
+      for await (const chunk of stream) {
+        const choice = chunk.choices[0]
+        const delta = choice?.delta
+        if (delta?.content && typeof delta.content === 'string' && delta.content.length > 0) {
+          textBuf += delta.content
+          yield { type: 'text', delta: delta.content }
+        }
+        if (delta?.tool_calls) {
+          for (const tc of delta.tool_calls) {
+            const entry = toolCallsByIndex.get(tc.index) ?? { args: '', started: false }
+            if (tc.id) entry.id = tc.id
+            if (tc.function?.name) entry.name = tc.function.name
+            toolCallsByIndex.set(tc.index, entry)
+            // Emit `tool_use_start` once id+name are both known.
+            // OpenAI typically delivers them in the same first
+            // chunk for a given tool call.
+            if (!entry.started && entry.id !== undefined && entry.name !== undefined) {
+              entry.started = true
+              yield { type: 'tool_use_start', id: entry.id, name: entry.name }
+            }
+            if (tc.function?.arguments) {
+              entry.args += tc.function.arguments
+              // Emit a delta only after start has fired — apps relying
+              // on an id wouldn't have one until then.
+              if (entry.started && entry.id !== undefined) {
+                yield {
+                  type: 'tool_use_delta',
+                  id: entry.id,
+                  argsDelta: tc.function.arguments,
+                }
+              }
+            }
+          }
         }
-        const resultBlock: ToolResultBlock = {
+        if (choice?.finish_reason) finishReason = choice.finish_reason
+        if (chunk.usage) lastUsage = chunk.usage
+      }
+      addUsage(aggregated, lastUsage)
+      yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
+      // Materialize the assistant turn the same way runWithTools does.
+      const assistantBlocks: ContentBlock[] = []
+      if (textBuf.length > 0) assistantBlocks.push({ type: 'text', text: textBuf })
+      const orderedCalls = [...toolCallsByIndex.entries()]
+        .sort(([a], [b]) => a - b)
+        .map(([, v]) => v)
+      for (const call of orderedCalls) {
+        if (!call.id || !call.name) continue
+        let parsedInput: unknown = {}
+        try {
+          parsedInput = call.args ? JSON.parse(call.args) : {}
+        } catch {
+          parsedInput = call.args
+        }
+        assistantBlocks.push({
+          type: 'tool_use',
+          id: call.id,
+          name: call.name,
+          input: parsedInput,
+        } satisfies ToolUseBlock)
+      }
+      const assistantContent: string | ContentBlock[] =
+        assistantBlocks.length === 1 && assistantBlocks[0]?.type === 'text'
+          ? assistantBlocks[0].text
+          : assistantBlocks
+      workingMessages.push({ role: 'assistant', content: assistantContent })
+      if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
+        yield {
+          type: 'stop',
+          stopReason: finishReason ?? 'stop',
+          iterations,
+          usage: aggregated,
+          messages: workingMessages,
+        }
+        return
+      }
+      const resultBlocks: ContentBlock[] = []
+      for (const call of orderedCalls) {
+        if (!call.id || !call.name) continue
+        let parsedInput: unknown
+        let parseFailed: { content: string; isError: boolean } | undefined
+        try {
+          parsedInput = call.args ? JSON.parse(call.args) : {}
+        } catch (err) {
+          parseFailed = recoverOrThrow(
+            new ToolExecutionError(
+              call.name,
+              call.id,
+              new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
+            ),
+            options,
+          )
+          parsedInput = call.args
+        }
+        yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
+        const { content, isError } = parseFailed
+          ?? (await runToolWithRecovery(
+            toolMap.get(call.name),
+            call.name,
+            call.id,
+            parsedInput,
+            options,
+          ))
+        resultBlocks.push({
           type: 'tool_result',
           toolUseId: call.id,
-          content: typeof output === 'string' ? output : JSON.stringify(output),
+          content,
+          ...(isError ? { isError: true } : {}),
+        } satisfies ToolResultBlock)
+        yield {
+          type: 'tool_result',
+          id: call.id,
+          name: call.name,
+          content,
+          isError,
         }
-        resultBlocks.push(resultBlock)
       }
       workingMessages.push({ role: 'user', content: resultBlocks })
       iterations++
       if (iterations >= maxIterations) {
-        return {
-          text: assistantMessage.content ?? '',
+        yield {
+          type: 'stop',
+          stopReason: 'max_iterations',
+          iterations,
+          usage: aggregated,
           messages: workingMessages,
+        }
+        return
+      }
+    }
+  }
+  async *streamWithToolsAndSchema<T>(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    schema: OutputSchema<T>,
+    options: RunWithToolsOptions = {},
+  ): AsyncIterable<AgentStreamEvent<T>> {
+    const mcpServers: readonly MCPServer[] = options.mcpServers ?? []
+    const resolved =
+      mcpServers.length > 0
+        ? await resolveMcpTools(mcpServers, {
+            ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
+          })
+        : { tools: [] as Tool[], close: async () => {} }
+    try {
+      yield* this._streamLoopWithSchema(
+        [...tools, ...resolved.tools],
+        messages,
+        schema,
+        options,
+      )
+    } finally {
+      await resolved.close()
+    }
+  }
+  private async *_streamLoopWithSchema<T>(
+    tools: readonly Tool[],
+    messages: readonly Message[],
+    schema: OutputSchema<T>,
+    options: RunWithToolsOptions,
+  ): AsyncIterable<AgentStreamEvent<T>> {
+    const maxIterations = options.maxIterations ?? 10
+    const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
+    const workingMessages: Message[] = [...messages]
+    const aggregated: ChatUsage = {
+      inputTokens: 0,
+      outputTokens: 0,
+      cacheReadTokens: 0,
+      cacheCreationTokens: 0,
+    }
+    let iterations = 0
+    while (true) {
+      checkAborted(options.signal)
+      yield { type: 'iteration_start', iteration: iterations }
+      const baseParams = this.buildParams(workingMessages, options, tools)
+      baseParams.response_format = {
+        type: 'json_schema',
+        json_schema: {
+          name: schema.name,
+          ...(schema.description !== undefined ? { description: schema.description } : {}),
+          schema: schema.jsonSchema,
+          strict: true,
+        },
+      }
+      const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
+        ...baseParams,
+        stream: true,
+        stream_options: { include_usage: true },
+      }
+      const stream = await this.client.chat.completions.create(params, reqOpts(options))
+      let textBuf = ''
+      // Tracks: per index, the running entry; and whether
+      // `tool_use_start` has already been emitted (we emit once the
+      // first chunk brings the id + name).
+      const toolCallsByIndex: Map<
+        number,
+        { id?: string; name?: string; args: string; started: boolean }
+      > = new Map()
+      let finishReason: string | null = null
+      let lastUsage: OpenAI.CompletionUsage | undefined
+      for await (const chunk of stream) {
+        const choice = chunk.choices[0]
+        const delta = choice?.delta
+        if (delta?.content && typeof delta.content === 'string' && delta.content.length > 0) {
+          textBuf += delta.content
+          yield { type: 'text', delta: delta.content }
+        }
+        if (delta?.tool_calls) {
+          for (const tc of delta.tool_calls) {
+            const entry = toolCallsByIndex.get(tc.index) ?? { args: '', started: false }
+            if (tc.id) entry.id = tc.id
+            if (tc.function?.name) entry.name = tc.function.name
+            toolCallsByIndex.set(tc.index, entry)
+            // Emit `tool_use_start` once id+name are both known.
+            // OpenAI typically delivers them in the same first
+            // chunk for a given tool call.
+            if (!entry.started && entry.id !== undefined && entry.name !== undefined) {
+              entry.started = true
+              yield { type: 'tool_use_start', id: entry.id, name: entry.name }
+            }
+            if (tc.function?.arguments) {
+              entry.args += tc.function.arguments
+              // Emit a delta only after start has fired — apps relying
+              // on an id wouldn't have one until then.
+              if (entry.started && entry.id !== undefined) {
+                yield {
+                  type: 'tool_use_delta',
+                  id: entry.id,
+                  argsDelta: tc.function.arguments,
+                }
+              }
+            }
+          }
+        }
+        if (choice?.finish_reason) finishReason = choice.finish_reason
+        if (chunk.usage) lastUsage = chunk.usage
+      }
+      addUsage(aggregated, lastUsage)
+      yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
+      const assistantBlocks: ContentBlock[] = []
+      if (textBuf.length > 0) assistantBlocks.push({ type: 'text', text: textBuf })
+      const orderedCalls = [...toolCallsByIndex.entries()]
+        .sort(([a], [b]) => a - b)
+        .map(([, v]) => v)
+      for (const call of orderedCalls) {
+        if (!call.id || !call.name) continue
+        let parsedInput: unknown = {}
+        try {
+          parsedInput = call.args ? JSON.parse(call.args) : {}
+        } catch {
+          parsedInput = call.args
+        }
+        assistantBlocks.push({
+          type: 'tool_use',
+          id: call.id,
+          name: call.name,
+          input: parsedInput,
+        } satisfies ToolUseBlock)
+      }
+      const assistantContent: string | ContentBlock[] =
+        assistantBlocks.length === 1 && assistantBlocks[0]?.type === 'text'
+          ? assistantBlocks[0].text
+          : assistantBlocks
+      workingMessages.push({ role: 'assistant', content: assistantContent })
+      if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
+        const text = textBuf
+        const value = parseGenerated(text, schema)
+        yield {
+          type: 'stop',
+          stopReason: finishReason ?? 'stop',
           iterations,
-          stopReason: 'max_iterations',
           usage: aggregated,
+          messages: workingMessages,
+          value,
+          text,
+        } as AgentStreamEvent<T>
+        return
+      }
+      const resultBlocks: ContentBlock[] = []
+      for (const call of orderedCalls) {
+        if (!call.id || !call.name) continue
+        let parsedInput: unknown
+        let parseFailed: { content: string; isError: boolean } | undefined
+        try {
+          parsedInput = call.args ? JSON.parse(call.args) : {}
+        } catch (err) {
+          parseFailed = recoverOrThrow(
+            new ToolExecutionError(
+              call.name,
+              call.id,
+              new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
+            ),
+            options,
+          )
+          parsedInput = call.args
+        }
+        yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
+        const { content, isError } = parseFailed
+          ?? (await runToolWithRecovery(
+            toolMap.get(call.name),
+            call.name,
+            call.id,
+            parsedInput,
+            options,
+          ))
+        resultBlocks.push({
+          type: 'tool_result',
+          toolUseId: call.id,
+          content,
+          ...(isError ? { isError: true } : {}),
+        } satisfies ToolResultBlock)
+        yield {
+          type: 'tool_result',
+          id: call.id,
+          name: call.name,
+          content,
+          isError,
         }
       }
+      workingMessages.push({ role: 'user', content: resultBlocks })
+      iterations++
+      if (iterations >= maxIterations) {
+        const text = textBuf
+        const value = parseGenerated(text, schema)
+        yield {
+          type: 'stop',
+          stopReason: 'max_iterations',
+          iterations,
+          usage: aggregated,
+          messages: workingMessages,
+          value,
+          text,
+        } as AgentStreamEvent<T>
+        return
+      }
+    }
+  }
+  async transcribe(
+    audio: AudioSource,
+    options: TranscribeOptions = {},
+  ): Promise<TranscribeResult<OpenAI.Audio.TranscriptionCreateResponse>> {
+    const model = options.model ?? this.defaultTranscribeModel
+    const file = await audioSourceToFile(audio)
+    const params: OpenAI.Audio.TranscriptionCreateParams = {
+      file,
+      model,
+      ...(options.language !== undefined ? { language: options.language } : {}),
+      ...(options.prompt !== undefined ? { prompt: options.prompt } : {}),
+    }
+    const response = await this.client.audio.transcriptions.create(
+      params,
+      options.signal !== undefined ? { signal: options.signal } : undefined,
+    )
+    // Whisper-1 returns { text, language?, duration? } when
+    // response_format is 'verbose_json'; we default to the SDK
+    // default (`json`) which only surfaces `text`. Apps that
+    // want language / duration from Whisper set
+    // `response_format: 'verbose_json'` via a raw SDK call;
+    // we can extend the option set when an app asks.
+    const text = 'text' in response && typeof response.text === 'string' ? response.text : ''
+    const result: TranscribeResult<OpenAI.Audio.TranscriptionCreateResponse> = {
+      text,
+      model,
+      raw: response,
+    }
+    if ('language' in response && typeof response.language === 'string') {
+      result.language = response.language
+    }
+    if ('duration' in response && typeof response.duration === 'number') {
+      result.duration = response.duration
+    }
+    return result
+  }
+  async embed(
+    texts: readonly string[],
+    options: EmbedOptions = {},
+  ): Promise<EmbedResult<OpenAI.CreateEmbeddingResponse>> {
+    const model = options.model ?? this.defaultEmbedModel
+    const params: OpenAI.EmbeddingCreateParams = {
+      model,
+      input: texts as string[],
+      ...(options.dimensions !== undefined ? { dimensions: options.dimensions } : {}),
+    }
+    const response = await this.client.embeddings.create(
+      params,
+      options.signal !== undefined ? { signal: options.signal } : undefined,
+    )
+    return {
+      embeddings: response.data.map((d) => d.embedding),
+      model: response.model,
+      usage: { inputTokens: response.usage?.prompt_tokens ?? 0 },
+      raw: response,
     }
   }
@@ -274,7 +883,7 @@ export class OpenAIProvider implements Provider {
         strict: true,
       },
     }
-    const response = await this.client.chat.completions.create(params)
+    const response = await this.client.chat.completions.create(params, reqOpts(options))
     const choice = response.choices[0]
     const text = choice?.message?.content ?? ''
     const value = parseGenerated(text, schema)
@@ -290,11 +899,17 @@ export class OpenAIProvider implements Provider {
   // ─── Param translation ──────────────────────────────────────────────────
-  private buildParams(
+  protected buildParams(
     messages: readonly Message[],
     options: ChatOptions,
     tools: readonly Tool[],
   ): OpenAI.Chat.ChatCompletionCreateParamsNonStreaming {
+    if (options.serverTools && options.serverTools.length > 0) {
+      throw new BrainError(
+        "OpenAIProvider: server tools (web_search / code_execution / web_fetch / url_context) are not supported on OpenAI's chat completions API. OpenAI's server tools live on the Responses API (separate provider slice). Run them as framework-local tools, route to Anthropic / Gemini, or wait for the OpenAIResponsesProvider slice.",
+        { context: { provider: 'openai' } },
+      )
+    }
     const model = options.model ?? this.defaultModel
     const params: OpenAI.Chat.ChatCompletionCreateParamsNonStreaming = {
       model,
@@ -389,6 +1004,55 @@ export class OpenAIProvider implements Provider {
 // ─── Shape converters ─────────────────────────────────────────────────────
+/** Build the request-options bag forwarded to the SDK. Only `signal` for now. */
+function reqOpts(options: { signal?: AbortSignal }): { signal?: AbortSignal } | undefined {
+  return options.signal !== undefined ? { signal: options.signal } : undefined
+}
+/**
+ * Materialize an `AudioSource` as a `File` the OpenAI SDK's
+ * `Uploadable` shape accepts. Base64 → in-memory File; URL →
+ * fetch + wrap. The SDK wants a filename; we synthesize one
+ * since `AudioSource` doesn't carry one. The extension lets the
+ * SDK pick the right content-type for the multipart upload.
+ */
+async function audioSourceToFile(audio: AudioSource): Promise<File> {
+  if (audio.type === 'base64') {
+    const bytes = Buffer.from(audio.data, 'base64')
+    const ext = extFromMime(audio.mediaType)
+    return new File([bytes], `audio.${ext}`, { type: audio.mediaType })
+  }
+  const response = await fetch(audio.url)
+  if (!response.ok) {
+    throw new BrainError(
+      `OpenAIProvider.transcribe: failed to fetch audio at ${audio.url}: ${response.status} ${response.statusText}.`,
+      { context: { url: audio.url, status: response.status } },
+    )
+  }
+  const buf = await response.arrayBuffer()
+  const mime = response.headers.get('content-type') ?? 'audio/mpeg'
+  return new File([buf], `audio.${extFromMime(mime)}`, { type: mime })
+}
+function extFromMime(mime: string): string {
+  // Strip parameters (`audio/mpeg; codecs=...` → `audio/mpeg`).
+  const m = mime.split(';')[0]?.trim().toLowerCase() ?? ''
+  if (m === 'audio/mp3' || m === 'audio/mpeg' || m === 'audio/mpga') return 'mp3'
+  if (m === 'audio/wav' || m === 'audio/x-wav') return 'wav'
+  if (m === 'audio/ogg') return 'ogg'
+  if (m === 'audio/flac') return 'flac'
+  if (m === 'audio/webm') return 'webm'
+  if (m === 'audio/aac' || m === 'audio/x-aac' || m === 'audio/mp4' || m === 'audio/m4a') return 'm4a'
+  return 'mp3'
+}
+/** Throw a DOMException-shaped abort error if the signal has fired. */
+function checkAborted(signal: AbortSignal | undefined): void {
+  if (signal?.aborted) {
+    throw signal.reason ?? new DOMException('Aborted', 'AbortError')
+  }
+}
 function systemPromptText(system: SystemPrompt | undefined): string {
   if (system === undefined) return ''
   if (typeof system === 'string') return system
@@ -425,8 +1089,47 @@ function toOpenAIMessage(message: Message): OpenAI.Chat.ChatCompletionMessagePar
     return param
   }
-  // User-role multi-block content — flatten text. MCP blocks (which
-  // are read-only and Anthropic-specific) are silently dropped.
+  // Document / audio aren't supported by OpenAI's chat completions
+  // API. Throw with vendor-specific guidance so apps don't waste a
+  // 400 trying to send a PDF.
+  for (const block of message.content) {
+    if (block.type === 'document') {
+      throw new BrainError(
+        "OpenAIProvider: document blocks are not supported on OpenAI's chat completions API. For PDFs, split the document to images (one per page) and send them as ImageBlocks on a vision-capable model (gpt-5 / gpt-4o family); or route document workloads to Anthropic / Gemini, which accept PDF blocks natively.",
+        { context: { provider: 'openai' } },
+      )
+    }
+    if (block.type === 'audio') {
+      throw new BrainError(
+        "OpenAIProvider: audio blocks are not supported on OpenAI's chat completions API. Transcribe audio upstream via OpenAI's Whisper / gpt-4o-transcribe and send the resulting text; or route audio workloads to Gemini, which accepts audio blocks natively.",
+        { context: { provider: 'openai' } },
+      )
+    }
+  }
+  // User-role multi-block content. If any image blocks are present,
+  // emit OpenAI's multi-part content array (text + image_url
+  // entries). Otherwise flatten text — keeps simple text messages
+  // cleanly typed as strings. MCP blocks (read-only,
+  // Anthropic-specific) are silently dropped.
+  const images = message.content.filter((b): b is ImageBlock => b.type === 'image')
+  if (images.length > 0) {
+    const parts: OpenAI.Chat.ChatCompletionContentPart[] = []
+    for (const block of message.content) {
+      if (block.type === 'text') {
+        parts.push({ type: 'text', text: block.text })
+      } else if (block.type === 'image') {
+        const url =
+          block.source.type === 'base64'
+            ? `data:${block.source.mediaType};base64,${block.source.data}`
+            : block.source.url
+        parts.push({ type: 'image_url', image_url: { url } })
+      }
+      // tool_result / tool_use / mcp blocks dropped from user content
+      // (they're handled elsewhere or aren't valid on user turns).
+    }
+    return { role: 'user', content: parts }
+  }
   const text = message.content
     .filter((b): b is TextBlock => b.type === 'text')
     .map((b) => b.text)