npm - @strav/brain - Versions diffs - 1.0.0-alpha.17 → 1.0.0-alpha.18 - Mend

@strav/brain 1.0.0-alpha.17 → 1.0.0-alpha.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/package.json +4 -2
package/src/agent_generate_result.ts +2 -0
package/src/agent_result.ts +7 -0
package/src/agent_runner.ts +80 -4
package/src/brain_manager.ts +119 -2
package/src/index.ts +20 -2
package/src/mcp/client.ts +17 -0
package/src/mcp/index.ts +1 -0
package/src/mcp/pool.ts +106 -0
package/src/mcp/resolve_mcp_tools.ts +25 -7
package/src/persistence/brain_message.ts +34 -0
package/src/persistence/brain_message_repository.ts +106 -0
package/src/persistence/brain_store.ts +166 -0
package/src/persistence/brain_suspended_run.ts +30 -0
package/src/persistence/brain_suspended_run_repository.ts +68 -0
package/src/persistence/brain_thread.ts +30 -0
package/src/persistence/brain_thread_repository.ts +65 -0
package/src/persistence/database_brain_store.ts +190 -0
package/src/persistence/index.ts +48 -0
package/src/persistence/schema/brain_message_schema.ts +61 -0
package/src/persistence/schema/brain_suspended_run_schema.ts +58 -0
package/src/persistence/schema/brain_thread_schema.ts +50 -0
package/src/persistence/schema/index.ts +3 -0
package/src/provider.ts +36 -1
package/src/providers/anthropic_provider.ts +140 -23
package/src/providers/gemini_provider.ts +55 -32
package/src/providers/openai_compat_provider.ts +452 -23
package/src/providers/openai_provider.ts +87 -32
package/src/providers/openai_responses_provider.ts +365 -50
package/src/suspended_run.ts +153 -0
package/src/thread.ts +40 -1
package/src/types.ts +110 -0

package/src/providers/gemini_provider.ts CHANGED Viewed

@@ -73,7 +73,12 @@ import type {
 import { resolveMcpTools, type ResolveMcpToolsOptions } from '../mcp/resolve_mcp_tools.ts'
 import { parseGenerated, type OutputSchema } from '../output_schema.ts'
 import { runToolWithRecovery } from '../tool_runner.ts'
-import type { Provider, RunWithToolsOptions } from '../provider.ts'
+import type {
+  Provider,
+  RunWithToolsOptions,
+  RunWithToolsOptionsWithSuspend,
+} from '../provider.ts'
+import type { SuspendedRun } from '../suspended_run.ts'
 import type { Tool } from '../tool.ts'
 import type {
   ChatOptions,
@@ -119,6 +124,8 @@ export interface GeminiProviderOptions {
   client?: { models: GeminiModelsClient }
   /** Internal seam — tests inject a stub MCP client factory. */
   mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
+  /** See `OpenAIProviderOptions.mcpPool` — same semantics. */
+  mcpPool?: ResolveMcpToolsOptions['pool']
 }
 export class GeminiProvider implements Provider {
@@ -128,6 +135,7 @@ export class GeminiProvider implements Provider {
   private readonly defaultMaxTokens: number
   private readonly defaultEmbedModel: string
   private readonly mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
+  private readonly mcpPool?: ResolveMcpToolsOptions['pool']
   constructor(name: string, config: GeminiProviderConfig, options: GeminiProviderOptions = {}) {
     this.name = name
@@ -135,6 +143,7 @@ export class GeminiProvider implements Provider {
     this.defaultMaxTokens = config.defaultMaxTokens ?? 4096
     this.defaultEmbedModel = config.defaultEmbedModel ?? DEFAULT_GEMINI_EMBED_MODEL
     this.mcpClientFactory = options.mcpClientFactory
+    this.mcpPool = options.mcpPool
     if (options.client) {
       this.models = options.client.models
     } else {
@@ -273,18 +282,42 @@ export class GeminiProvider implements Provider {
     }
   }
+  /**
+   * Resolve MCP tool descriptors for `servers`, threading the
+   * provider's optional `clientFactory` (test seam) and `mcpPool`
+   * (long-lived connections) through. Caller invokes
+   * `resolved.close()` in `finally` — a no-op when the pool owns
+   * lifetimes.
+   */
+  private resolveMcp(servers: readonly MCPServer[]): Promise<{
+    tools: Tool[]
+    close: () => Promise<void>
+  }> {
+    if (servers.length === 0) {
+      return Promise.resolve({ tools: [], close: async () => {} })
+    }
+    return resolveMcpTools(servers, {
+      ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
+      ...(this.mcpPool ? { pool: this.mcpPool } : {}),
+    })
+  }
+  runWithTools(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    options: RunWithToolsOptionsWithSuspend,
+  ): Promise<AgentResult | SuspendedRun>
+  runWithTools(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    options?: RunWithToolsOptions,
+  ): Promise<AgentResult>
   async runWithTools(
     messages: readonly Message[],
     tools: readonly Tool[],
     options: RunWithToolsOptions = {},
-  ): Promise<AgentResult> {
-    const mcpServers: readonly MCPServer[] = options.mcpServers ?? []
-    const resolved =
-      mcpServers.length > 0
-        ? await resolveMcpTools(mcpServers, {
-            ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
-          })
-        : { tools: [] as Tool[], close: async () => {} }
+  ): Promise<AgentResult | SuspendedRun> {
+    const resolved = await this.resolveMcp(options.mcpServers ?? [])
     try {
       return await this._runLoop(messages, [...tools, ...resolved.tools], options)
     } finally {
@@ -296,7 +329,7 @@ export class GeminiProvider implements Provider {
     messages: readonly Message[],
     tools: readonly Tool[],
     options: RunWithToolsOptions,
-  ): Promise<AgentResult> {
+  ): Promise<AgentResult | SuspendedRun> {
     const maxIterations = options.maxIterations ?? 10
     const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
     const workingMessages: Message[] = [...messages]
@@ -339,7 +372,15 @@ export class GeminiProvider implements Provider {
       }
       const resultBlocks: ContentBlock[] = []
-      for (const call of toolUses) {
+      for (let i = 0; i < toolUses.length; i++) {
+        const call = toolUses[i]!
+        if (options.shouldSuspend && await options.shouldSuspend(call, options.context)) {
+          return {
+            status: 'suspended',
+            pendingToolCalls: toolUses.slice(i),
+            state: { messages: workingMessages, iterations, usage: aggregated },
+          }
+        }
         const { content, isError } = await runToolWithRecovery(
           toolMap.get(call.name),
           call.name,
@@ -375,13 +416,7 @@ export class GeminiProvider implements Provider {
     schema: OutputSchema<T>,
     options: RunWithToolsOptions = {},
   ): Promise<AgentGenerateResult<T>> {
-    const mcpServers: readonly MCPServer[] = options.mcpServers ?? []
-    const resolved =
-      mcpServers.length > 0
-        ? await resolveMcpTools(mcpServers, {
-            ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
-          })
-        : { tools: [] as Tool[], close: async () => {} }
+    const resolved = await this.resolveMcp(options.mcpServers ?? [])
     try {
       return await this._runLoopWithSchema([...tools, ...resolved.tools], messages, schema, options)
     } finally {
@@ -480,13 +515,7 @@ export class GeminiProvider implements Provider {
     tools: readonly Tool[],
     options: RunWithToolsOptions = {},
   ): AsyncIterable<AgentStreamEvent> {
-    const mcpServers: readonly MCPServer[] = options.mcpServers ?? []
-    const resolved =
-      mcpServers.length > 0
-        ? await resolveMcpTools(mcpServers, {
-            ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
-          })
-        : { tools: [] as Tool[], close: async () => {} }
+    const resolved = await this.resolveMcp(options.mcpServers ?? [])
     try {
       yield* this._streamLoop(messages, [...tools, ...resolved.tools], options)
     } finally {
@@ -605,13 +634,7 @@ export class GeminiProvider implements Provider {
     schema: OutputSchema<T>,
     options: RunWithToolsOptions = {},
   ): AsyncIterable<AgentStreamEvent<T>> {
-    const mcpServers: readonly MCPServer[] = options.mcpServers ?? []
-    const resolved =
-      mcpServers.length > 0
-        ? await resolveMcpTools(mcpServers, {
-            ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
-          })
-        : { tools: [] as Tool[], close: async () => {} }
+    const resolved = await this.resolveMcp(options.mcpServers ?? [])
     try {
       yield* this._streamLoopWithSchema(
         [...tools, ...resolved.tools],

package/src/providers/openai_compat_provider.ts CHANGED Viewed

@@ -45,12 +45,17 @@ import { BrainError } from '../brain_error.ts'
 import { parseGenerated, type OutputSchema } from '../output_schema.ts'
 import type { RunWithToolsOptions } from '../provider.ts'
 import type { Tool } from '../tool.ts'
+import { recoverOrThrow, runToolWithRecovery } from '../tool_runner.ts'
+import { ToolExecutionError } from '../tool_execution_error.ts'
 import type {
   ChatOptions,
   ChatUsage,
+  ContentBlock,
   GenerateResult,
   Message,
   SystemPrompt,
+  ToolResultBlock,
+  ToolUseBlock,
 } from '../types.ts'
 import { OpenAIProvider } from './openai_provider.ts'
@@ -116,36 +121,351 @@ export abstract class OpenAICompatProvider extends OpenAIProvider {
   }
   /**
-   * Combined tool-loop + structured output isn't supported on
-   * OpenAI-compat providers in V1. The API's `json_object` mode
-   * doesn't carry schema enforcement, and weaving the
-   * schema-instruction into every turn's system prompt during a
-   * tool loop would surprise apps. Apps run `runTools(...)` +
-   * `generate(...)` as two separate calls, or switch to OpenAI /
-   * Anthropic / Gemini for the combined call.
+   * Combined tool-loop + structured output via the **tool-forcing**
+   * pattern. OpenAI-compat endpoints don't support per-turn
+   * `json_schema` enforcement, but they do support OpenAI-style
+   * function calling — so the framework injects a synthetic
+   * `respond_with_<schemaName>` tool whose JSON-Schema
+   * `parameters` IS the desired output schema. The model uses it
+   * (and only it) for its final answer; the args become the
+   * parsed structured value. Regular tools work normally
+   * alongside.
+   *
+   * The model is prompted to call regular tools first, then
+   * `respond_with` exactly once when ready to answer. If it
+   * doesn't (returns plain text instead, or hits `maxIterations`),
+   * the framework throws `BrainError` — apps should reinforce the
+   * pattern via a clearer system prompt, or simplify the task.
+   *
+   * Caveats vs OpenAI's `strict: true`:
+   *   - Smaller models may emit invalid JSON in the tool args.
+   *     `parseGenerated` + the optional `schema.parse` hook catch
+   *     it at the boundary.
+   *   - Schema features beyond OpenAI function-calling's subset
+   *     (recursive refs, advanced keywords) may not be honored.
+   *     Stick to flat object schemas for best results.
    */
   override async runWithToolsAndSchema<T>(
-    _messages: readonly Message[],
-    _tools: readonly Tool[],
-    _schema: OutputSchema<T>,
-    _options?: RunWithToolsOptions,
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    schema: OutputSchema<T>,
+    options: RunWithToolsOptions = {},
   ): Promise<AgentGenerateResult<T>> {
-    throw new BrainError(
-      `${this.name}.runWithToolsAndSchema: combined tool use + structured output is not supported on OpenAI-compat providers in V1. Run \`brain.runTools(...)\` and \`brain.generate(...)\` as two separate calls, or switch to OpenAI / Anthropic / Gemini for this combination.`,
-      { context: { provider: this.name } },
-    )
+    const resolved = await this.resolveMcp(options.mcpServers ?? [])
+    try {
+      return await this._toolForcingLoop(
+        messages,
+        [...tools, ...resolved.tools],
+        schema,
+        options,
+      )
+    } finally {
+      await resolved.close()
+    }
   }
   override async *streamWithToolsAndSchema<T>(
-    _messages: readonly Message[],
-    _tools: readonly Tool[],
-    _schema: OutputSchema<T>,
-    _options?: RunWithToolsOptions,
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    schema: OutputSchema<T>,
+    options: RunWithToolsOptions = {},
   ): AsyncIterable<AgentStreamEvent<T>> {
-    throw new BrainError(
-      `${this.name}.streamWithToolsAndSchema: combined streaming + tool use + structured output is not supported on OpenAI-compat providers in V1. Use \`brain.streamTools(...)\` and \`brain.generate(...)\` separately, or switch to OpenAI / Anthropic / Gemini for this combination.`,
-      { context: { provider: this.name } },
-    )
+    const resolved = await this.resolveMcp(options.mcpServers ?? [])
+    try {
+      yield* this._toolForcingStream(
+        messages,
+        [...tools, ...resolved.tools],
+        schema,
+        options,
+      )
+    } finally {
+      await resolved.close()
+    }
+  }
+  private async _toolForcingLoop<T>(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    schema: OutputSchema<T>,
+    options: RunWithToolsOptions,
+  ): Promise<AgentGenerateResult<T>> {
+    const { respondTool, respondName, augmented } = prepareToolForcing(schema, options, tools)
+    const maxIterations = options.maxIterations ?? 10
+    const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
+    const workingMessages: Message[] = [...messages]
+    const aggregated: ChatUsage = {
+      inputTokens: 0,
+      outputTokens: 0,
+      cacheReadTokens: 0,
+      cacheCreationTokens: 0,
+    }
+    let iterations = 0
+    while (true) {
+      checkAborted(options.signal)
+      const params = this.buildParams(workingMessages, augmented, tools)
+      params.tools = [...(params.tools ?? []), respondTool]
+      const response = await this.client.chat.completions.create(
+        params,
+        reqOpts(options),
+      )
+      addUsageHere(aggregated, response.usage, this)
+      const choice = response.choices[0]
+      if (!choice) {
+        throw new BrainError(
+          `${this.name}.runWithToolsAndSchema: response had no choices.`,
+        )
+      }
+      const assistantMessage = choice.message
+      workingMessages.push({
+        role: 'assistant',
+        content: fromOpenAIAssistant(assistantMessage),
+      })
+      const toolCalls = assistantMessage.tool_calls ?? []
+      const respond = toolCalls.find(
+        (c) => c.type === 'function' && c.function.name === respondName,
+      )
+      if (respond && respond.type === 'function') {
+        const text = respond.function.arguments ?? ''
+        const value = parseGenerated(text, schema)
+        return {
+          value,
+          text,
+          messages: workingMessages,
+          iterations,
+          stopReason: choice.finish_reason ?? 'stop',
+          usage: aggregated,
+        }
+      }
+      if (toolCalls.length === 0 || choice.finish_reason !== 'tool_calls') {
+        throw new BrainError(
+          `${this.name}.runWithToolsAndSchema: model returned without calling \`${respondName}\`. Add a stronger instruction in the system prompt — apps must steer the model to use the synthetic respond tool for its final answer.`,
+          { context: { provider: this.name, text: assistantMessage.content ?? '' } },
+        )
+      }
+      const resultBlocks: ContentBlock[] = []
+      for (const call of toolCalls) {
+        if (call.type !== 'function') continue
+        let parsedInput: unknown
+        let parseFailed: { content: string; isError: boolean } | undefined
+        try {
+          parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
+        } catch (err) {
+          parseFailed = recoverOrThrow(
+            new ToolExecutionError(
+              call.function.name,
+              call.id,
+              new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
+            ),
+            options,
+          )
+        }
+        const { content, isError } = parseFailed
+          ?? (await runToolWithRecovery(
+            toolMap.get(call.function.name),
+            call.function.name,
+            call.id,
+            parsedInput,
+            options,
+          ))
+        resultBlocks.push({
+          type: 'tool_result',
+          toolUseId: call.id,
+          content,
+          ...(isError ? { isError: true } : {}),
+        } satisfies ToolResultBlock)
+      }
+      workingMessages.push({ role: 'user', content: resultBlocks })
+      iterations++
+      if (iterations >= maxIterations) {
+        throw new BrainError(
+          `${this.name}.runWithToolsAndSchema: hit maxIterations (${maxIterations}) without the model calling \`${respondName}\`. Bump maxIterations, simplify the task, or strengthen the system-prompt nudge.`,
+          { context: { provider: this.name } },
+        )
+      }
+    }
+  }
+  private async *_toolForcingStream<T>(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    schema: OutputSchema<T>,
+    options: RunWithToolsOptions,
+  ): AsyncIterable<AgentStreamEvent<T>> {
+    const { respondTool, respondName, augmented } = prepareToolForcing(schema, options, tools)
+    const maxIterations = options.maxIterations ?? 10
+    const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
+    const workingMessages: Message[] = [...messages]
+    const aggregated: ChatUsage = {
+      inputTokens: 0,
+      outputTokens: 0,
+      cacheReadTokens: 0,
+      cacheCreationTokens: 0,
+    }
+    let iterations = 0
+    while (true) {
+      checkAborted(options.signal)
+      yield { type: 'iteration_start', iteration: iterations }
+      const baseParams = this.buildParams(workingMessages, augmented, tools)
+      baseParams.tools = [...(baseParams.tools ?? []), respondTool]
+      const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
+        ...baseParams,
+        stream: true,
+        stream_options: { include_usage: true },
+      }
+      const stream = await this.client.chat.completions.create(params, reqOpts(options))
+      let textBuf = ''
+      const toolCallsByIndex = new Map<
+        number,
+        { id?: string; name?: string; args: string; started: boolean }
+      >()
+      let finishReason: string | null = null
+      let lastUsage: OpenAI.CompletionUsage | undefined
+      for await (const chunk of stream) {
+        const choice = chunk.choices[0]
+        const delta = choice?.delta
+        if (delta?.content && typeof delta.content === 'string' && delta.content.length > 0) {
+          textBuf += delta.content
+          yield { type: 'text', delta: delta.content }
+        }
+        if (delta?.tool_calls) {
+          for (const tc of delta.tool_calls) {
+            const entry = toolCallsByIndex.get(tc.index) ?? { args: '', started: false }
+            if (tc.id) entry.id = tc.id
+            if (tc.function?.name) entry.name = tc.function.name
+            toolCallsByIndex.set(tc.index, entry)
+            if (!entry.started && entry.id !== undefined && entry.name !== undefined) {
+              entry.started = true
+              if (entry.name !== respondName) {
+                yield { type: 'tool_use_start', id: entry.id, name: entry.name }
+              }
+            }
+            if (tc.function?.arguments) {
+              entry.args += tc.function.arguments
+              if (
+                entry.started &&
+                entry.id !== undefined &&
+                entry.name !== respondName
+              ) {
+                yield {
+                  type: 'tool_use_delta',
+                  id: entry.id,
+                  argsDelta: tc.function.arguments,
+                }
+              }
+            }
+          }
+        }
+        if (choice?.finish_reason) finishReason = choice.finish_reason
+        if (chunk.usage) lastUsage = chunk.usage
+      }
+      addUsageHere(aggregated, lastUsage, this)
+      yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
+      const assistantBlocks: ContentBlock[] = []
+      if (textBuf.length > 0) assistantBlocks.push({ type: 'text', text: textBuf })
+      const orderedCalls = [...toolCallsByIndex.entries()]
+        .sort(([a], [b]) => a - b)
+        .map(([, v]) => v)
+      for (const call of orderedCalls) {
+        if (!call.id || !call.name) continue
+        let parsedInput: unknown = {}
+        try {
+          parsedInput = call.args ? JSON.parse(call.args) : {}
+        } catch {
+          parsedInput = call.args
+        }
+        assistantBlocks.push({
+          type: 'tool_use',
+          id: call.id,
+          name: call.name,
+          input: parsedInput,
+        } satisfies ToolUseBlock)
+      }
+      const assistantContent: string | ContentBlock[] =
+        assistantBlocks.length === 1 && assistantBlocks[0]?.type === 'text'
+          ? assistantBlocks[0].text
+          : assistantBlocks
+      workingMessages.push({ role: 'assistant', content: assistantContent })
+      const respond = orderedCalls.find((c) => c.name === respondName)
+      if (respond && respond.id) {
+        const text = respond.args
+        const value = parseGenerated(text, schema)
+        yield {
+          type: 'stop',
+          stopReason: finishReason ?? 'stop',
+          iterations,
+          usage: aggregated,
+          messages: workingMessages,
+          value,
+          text,
+        } as AgentStreamEvent<T>
+        return
+      }
+      if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
+        throw new BrainError(
+          `${this.name}.streamWithToolsAndSchema: model returned without calling \`${respondName}\`. Strengthen the system-prompt nudge.`,
+          { context: { provider: this.name, text: textBuf } },
+        )
+      }
+      const resultBlocks: ContentBlock[] = []
+      for (const call of orderedCalls) {
+        if (!call.id || !call.name) continue
+        let parsedInput: unknown
+        let parseFailed: { content: string; isError: boolean } | undefined
+        try {
+          parsedInput = call.args ? JSON.parse(call.args) : {}
+        } catch (err) {
+          parseFailed = recoverOrThrow(
+            new ToolExecutionError(
+              call.name,
+              call.id,
+              new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
+            ),
+            options,
+          )
+          parsedInput = call.args
+        }
+        yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
+        const { content, isError } = parseFailed
+          ?? (await runToolWithRecovery(
+            toolMap.get(call.name),
+            call.name,
+            call.id,
+            parsedInput,
+            options,
+          ))
+        resultBlocks.push({
+          type: 'tool_result',
+          toolUseId: call.id,
+          content,
+          ...(isError ? { isError: true } : {}),
+        } satisfies ToolResultBlock)
+        yield { type: 'tool_result', id: call.id, name: call.name, content, isError }
+      }
+      workingMessages.push({ role: 'user', content: resultBlocks })
+      iterations++
+      if (iterations >= maxIterations) {
+        throw new BrainError(
+          `${this.name}.streamWithToolsAndSchema: hit maxIterations (${maxIterations}) without the model calling \`${respondName}\`.`,
+          { context: { provider: this.name } },
+        )
+      }
+    }
   }
   /**
@@ -185,3 +505,112 @@ function schemaInstruction(schema: OutputSchema<unknown>): string {
   ].filter((s): s is string => s !== undefined)
   return lines.join('\n')
 }
+// ─── Tool-forcing helpers ────────────────────────────────────────────────
+const RESPOND_TOOL_PREFIX = 'respond_with_'
+/**
+ * Build the synthetic respond-tool entry + the system-prompt nudge
+ * apps inject alongside their own system message. Validates that
+ * the chosen tool name doesn't collide with any user tool — that
+ * would make the loop's terminal detection ambiguous.
+ */
+function prepareToolForcing(
+  schema: OutputSchema<unknown>,
+  options: ChatOptions,
+  userTools: readonly Tool[],
+): {
+  respondTool: OpenAI.Chat.ChatCompletionTool
+  respondName: string
+  augmented: ChatOptions
+} {
+  const respondName = `${RESPOND_TOOL_PREFIX}${schema.name}`
+  if (userTools.some((t) => t.name === respondName)) {
+    throw new BrainError(
+      `OpenAICompatProvider.runWithToolsAndSchema: synthetic tool name "${respondName}" collides with a user-supplied tool. Rename your tool or the OutputSchema.name to avoid the clash.`,
+      { context: { conflictingName: respondName } },
+    )
+  }
+  const respondTool: OpenAI.Chat.ChatCompletionTool = {
+    type: 'function',
+    function: {
+      name: respondName,
+      description:
+        `Submit your final answer. Call this exactly once, after using any other tools you need. ` +
+        `The arguments MUST conform to the schema below. Do not return prose alongside or in place of this call.` +
+        (schema.description ? ` (${schema.description})` : ''),
+      parameters: schema.jsonSchema as Record<string, unknown>,
+    },
+  }
+  const augmented: ChatOptions = {
+    ...options,
+    system: combineSystem(options.system, toolForcingInstruction(respondName)),
+  }
+  return { respondTool, respondName, augmented }
+}
+function toolForcingInstruction(respondName: string): string {
+  return [
+    `When you are ready to give the final answer, call the \`${respondName}\` function with the structured arguments.`,
+    `Use any other available tools first to gather what you need. Once you have enough information, call \`${respondName}\` exactly once and do NOT also return prose text.`,
+  ].join(' ')
+}
+function reqOpts(options: { signal?: AbortSignal }): { signal?: AbortSignal } | undefined {
+  return options.signal !== undefined ? { signal: options.signal } : undefined
+}
+function checkAborted(signal: AbortSignal | undefined): void {
+  if (signal?.aborted) {
+    throw signal.reason ?? new DOMException('Aborted', 'AbortError')
+  }
+}
+function fromOpenAIAssistant(
+  msg: OpenAI.Chat.ChatCompletionMessage,
+): string | ContentBlock[] {
+  const blocks: ContentBlock[] = []
+  if (msg.content) blocks.push({ type: 'text', text: msg.content })
+  if (msg.tool_calls) {
+    for (const call of msg.tool_calls) {
+      if (call.type !== 'function') continue
+      let parsedInput: unknown = {}
+      try {
+        parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
+      } catch {
+        parsedInput = call.function.arguments ?? {}
+      }
+      blocks.push({
+        type: 'tool_use',
+        id: call.id,
+        name: call.function.name,
+        input: parsedInput,
+      } satisfies ToolUseBlock)
+    }
+  }
+  if (blocks.length === 1 && blocks[0]?.type === 'text') return blocks[0].text
+  return blocks
+}
+/**
+ * Add provider-mapped usage onto an accumulator. Calls `mapUsage`
+ * on the provider instance so subclasses (e.g., DeepSeek) honor
+ * their vendor-specific cache fields.
+ */
+function addUsageHere(
+  acc: ChatUsage,
+  u: OpenAI.CompletionUsage | undefined,
+  provider: OpenAICompatProvider,
+): void {
+  if (!u) return
+  // Cast: `mapUsage` is protected on the abstract class; we're
+  // inside the module so the access is valid at runtime.
+  const mapped = (provider as unknown as {
+    mapUsage(u: OpenAI.CompletionUsage | undefined): ChatUsage
+  }).mapUsage(u)
+  acc.inputTokens += mapped.inputTokens
+  acc.outputTokens += mapped.outputTokens
+  acc.cacheReadTokens += mapped.cacheReadTokens
+  acc.cacheCreationTokens += mapped.cacheCreationTokens
+}