npm - @strav/brain - Versions diffs - 1.0.0-alpha.17 → 1.0.0-alpha.18 - Mend

@strav/brain 1.0.0-alpha.17 → 1.0.0-alpha.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/package.json +4 -2
package/src/agent_generate_result.ts +2 -0
package/src/agent_result.ts +7 -0
package/src/agent_runner.ts +80 -4
package/src/brain_manager.ts +119 -2
package/src/index.ts +20 -2
package/src/mcp/client.ts +17 -0
package/src/mcp/index.ts +1 -0
package/src/mcp/pool.ts +106 -0
package/src/mcp/resolve_mcp_tools.ts +25 -7
package/src/persistence/brain_message.ts +34 -0
package/src/persistence/brain_message_repository.ts +106 -0
package/src/persistence/brain_store.ts +166 -0
package/src/persistence/brain_suspended_run.ts +30 -0
package/src/persistence/brain_suspended_run_repository.ts +68 -0
package/src/persistence/brain_thread.ts +30 -0
package/src/persistence/brain_thread_repository.ts +65 -0
package/src/persistence/database_brain_store.ts +190 -0
package/src/persistence/index.ts +48 -0
package/src/persistence/schema/brain_message_schema.ts +61 -0
package/src/persistence/schema/brain_suspended_run_schema.ts +58 -0
package/src/persistence/schema/brain_thread_schema.ts +50 -0
package/src/persistence/schema/index.ts +3 -0
package/src/provider.ts +36 -1
package/src/providers/anthropic_provider.ts +140 -23
package/src/providers/gemini_provider.ts +55 -32
package/src/providers/openai_compat_provider.ts +452 -23
package/src/providers/openai_provider.ts +87 -32
package/src/providers/openai_responses_provider.ts +365 -50
package/src/suspended_run.ts +153 -0
package/src/thread.ts +40 -1
package/src/types.ts +110 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@strav/brain",
-  "version": "1.0.0-alpha.17",
+  "version": "1.0.0-alpha.18",
   "description": "Strav AI module — unified Provider interface, BrainManager, threads, prompt caching, tools / agents / MCP. Anthropic + OpenAI providers; Gemini / DeepSeek follow.",
   "type": "module",
   "main": "./src/index.ts",
@@ -8,6 +8,7 @@
   "exports": {
     ".": "./src/index.ts",
     "./mcp": "./src/mcp/index.ts",
+    "./persistence": "./src/persistence/index.ts",
     "./zod": "./src/zod/index.ts"
   },
   "files": [
@@ -24,7 +25,8 @@
     "@anthropic-ai/sdk": "^0.100.0",
     "@google/genai": "^2.7.0",
     "@modelcontextprotocol/sdk": "^1.29.0",
-    "@strav/kernel": "1.0.0-alpha.17",
+    "@strav/database": "1.0.0-alpha.18",
+    "@strav/kernel": "1.0.0-alpha.18",
     "openai": "^6.0.0"
   },
   "peerDependencies": {

package/src/agent_generate_result.ts CHANGED Viewed

@@ -27,4 +27,6 @@ export interface AgentGenerateResult<T = unknown> {
   stopReason: string
   /** Token usage from the single underlying `generate` call. */
   usage: ChatUsage
+  /** See `ChatResult.responseId`. */
+  responseId?: string
 }

package/src/agent_result.ts CHANGED Viewed

@@ -29,4 +29,11 @@ export interface AgentResult {
   stopReason: string
   /** Token usage summed across every model call in the loop. */
   usage: ChatUsage
+  /**
+   * Final provider response id when the provider exposes stateful
+   * conversations (OpenAI Responses API). Captured from the last
+   * model turn so apps that persist the conversation can resume
+   * via `ChatOptions.previousResponseId`. Undefined elsewhere.
+   */
+  responseId?: string
 }

package/src/agent_runner.ts CHANGED Viewed

@@ -26,8 +26,17 @@ import type { AgentStreamEvent } from './agent_stream_event.ts'
 import type { BrainManager } from './brain_manager.ts'
 import { BrainError } from './brain_error.ts'
 import type { OutputSchema } from './output_schema.ts'
-import type { ChatOptions, Message } from './types.ts'
+import type {
+  ChatOptions,
+  Message,
+  ToolUseBlock,
+} from './types.ts'
 import type { RunWithToolsOptions } from './provider.ts'
+import type {
+  SuspendedRun,
+  SuspendedState,
+  ToolResultInput,
+} from './suspended_run.ts'
 /**
  * Conditional return shape for `AgentRunner.run()`. With the default
@@ -42,16 +51,47 @@ import type { RunWithToolsOptions } from './provider.ts'
  */
 export type AgentRunResult<T> = [T] extends [never] ? AgentResult : AgentGenerateResult<T>
-export class AgentRunner<T = never> {
+/**
+ * Conditional return shape that flips when the runner has opted in
+ * to suspension via `.suspend(gate)`. The phantom `S` generic on
+ * `AgentRunner<T, S>` carries the bit; `S extends true` widens the
+ * union so callers must narrow with `isSuspended(...)` before
+ * touching `result.value` / `result.text`.
+ */
+export type AgentRunMaybeSuspended<T, S extends boolean> = [S] extends [true]
+  ? AgentRunResult<T> | SuspendedRun
+  : AgentRunResult<T>
+export class AgentRunner<T = never, S extends boolean = false> {
   private prompt: string | undefined
   private contextBag: Record<string, unknown> = {}
   private schema: OutputSchema<T> | undefined
+  private suspendGate:
+    | ((call: ToolUseBlock, context?: Record<string, unknown>) => boolean | Promise<boolean>)
+    | undefined
   constructor(
     private readonly brain: BrainManager,
     private readonly agent: Agent<unknown>,
   ) {}
+  /**
+   * Install a human-in-the-loop gate. Called before each tool
+   * execution inside the agent loop; when it returns `true`, the
+   * run pauses and `.run()` resolves with a `SuspendedRun` instead
+   * of `AgentResult`. Apps obtain results out-of-band and call
+   * `.resume(state, results)` to continue.
+   *
+   * Throws `BrainError` if the runner is also in structured-output
+   * mode (`.output(schema)`) — schema + suspend is a deferred slice.
+   */
+  suspend(
+    gate: (call: ToolUseBlock, context?: Record<string, unknown>) => boolean | Promise<boolean>,
+  ): AgentRunner<T, true> {
+    this.suspendGate = gate
+    return this as unknown as AgentRunner<T, true>
+  }
   /** Set the user input. Required before `run()`. */
   input(text: string): this {
     this.prompt = text
@@ -127,10 +167,15 @@ export class AgentRunner<T = never> {
     >
   }
-  async run(): Promise<AgentRunResult<T>> {
+  async run(): Promise<AgentRunMaybeSuspended<T, S>> {
     if (this.prompt === undefined) {
       throw new BrainError('AgentRunner.run: input() must be called before run().')
     }
+    if (this.suspendGate !== undefined && this.schema !== undefined) {
+      throw new BrainError(
+        'AgentRunner.run: `.suspend(...)` and `.output(schema)` cannot be combined in V1 — the schema variants don\'t yet model pause/resume. Run tools first with suspension, then call brain.generate(...) on the result for the structured summary.',
+      )
+    }
     const messages: Message[] = [{ role: 'user', content: this.prompt }]
     if (this.schema !== undefined) {
@@ -172,8 +217,39 @@ export class AgentRunner<T = never> {
       context: this.contextBag,
     }
     if (this.agent.mcpServers.length > 0) options.mcpServers = this.agent.mcpServers
+    if (this.suspendGate !== undefined) options.shouldSuspend = this.suspendGate
     const result = await this.brain.runTools(messages, this.agent.tools, options)
-    return result as AgentRunResult<T>
+    return result as AgentRunMaybeSuspended<T, S>
+  }
+  /**
+   * Resume a previously-suspended run. Takes the `SuspendedRun.state`
+   * snapshot and the results gathered for each `pendingToolCalls`
+   * entry; the loop continues from where it paused.
+   *
+   * The runner's `suspend()` gate carries over so the same
+   * approval logic applies to any further tool calls — pass a
+   * fresh gate via `suspend()` before `resume()` to change the
+   * policy.
+   */
+  async resume(
+    state: SuspendedState,
+    results: readonly ToolResultInput[],
+  ): Promise<AgentRunMaybeSuspended<T, true>> {
+    if (this.schema !== undefined) {
+      throw new BrainError(
+        'AgentRunner.resume: structured-output runners cannot be resumed in V1 — `.output(schema)` is incompatible with pause/resume.',
+      )
+    }
+    const options: RunWithToolsOptions = {
+      ...this.buildChatOptions(),
+      maxIterations: this.agent.maxIterations,
+      context: this.contextBag,
+    }
+    if (this.agent.mcpServers.length > 0) options.mcpServers = this.agent.mcpServers
+    if (this.suspendGate !== undefined) options.shouldSuspend = this.suspendGate
+    const result = await this.brain.resumeTools(state, results, this.agent.tools, options)
+    return result as AgentRunMaybeSuspended<T, true>
   }
   private buildChatOptions(): ChatOptions {

package/src/brain_manager.ts CHANGED Viewed

@@ -40,7 +40,12 @@ import type {
   TranscribeOptions,
   TranscribeResult,
 } from './types.ts'
-import type { Provider, RunWithToolsOptions } from './provider.ts'
+import type {
+  Provider,
+  RunWithToolsOptions,
+  RunWithToolsOptionsWithSuspend,
+} from './provider.ts'
+import { appendResumeResults, type SuspendedRun, type SuspendedState, type ToolResultInput } from './suspended_run.ts'
 import type { Tool } from './tool.ts'
 import { DEFAULT_TIERS } from './brain_config.ts'
@@ -152,11 +157,21 @@ export class BrainManager {
    * implement `runWithTools` (V1: OpenAI / Gemini / DeepSeek providers
    * don't yet — only `AnthropicProvider`).
    */
+  runTools(
+    input: string | readonly Message[],
+    tools: readonly Tool[],
+    options: RunWithToolsOptionsWithSuspend,
+  ): Promise<AgentResult | SuspendedRun>
+  runTools(
+    input: string | readonly Message[],
+    tools: readonly Tool[],
+    options?: RunWithToolsOptions,
+  ): Promise<AgentResult>
   async runTools(
     input: string | readonly Message[],
     tools: readonly Tool[],
     options: RunWithToolsOptions = {},
-  ): Promise<AgentResult> {
+  ): Promise<AgentResult | SuspendedRun> {
     const provider = this.provider(options.provider)
     if (!provider.runWithTools) {
       throw new BrainError(
@@ -175,6 +190,42 @@ export class BrainManager {
     return provider.runWithTools(messages, tools, resolved)
   }
+  /**
+   * Resume a previously-suspended tool-use loop. Takes the
+   * `SuspendedRun.state` snapshot plus the results the integrator
+   * gathered for each `pendingToolCalls` entry; appends a `tool_result`
+   * block per entry; re-enters `runTools` so the model can continue
+   * (potentially suspending again on the next tool).
+   *
+   * Mid-batch invariant: every pending call MUST get a result —
+   * otherwise the provider rejects the next request because the
+   * assistant turn's `tool_use` blocks are no longer balanced.
+   * `resumeTools` throws `BrainError` when results are missing.
+   *
+   * The `previousResponseId` carried on the snapshot (when the
+   * provider supports stateful conversations) is threaded back via
+   * `options.previousResponseId` automatically — per-call
+   * `options.previousResponseId` wins if supplied explicitly.
+   */
+  async resumeTools(
+    state: SuspendedState,
+    results: readonly ToolResultInput[],
+    tools: readonly Tool[],
+    options: RunWithToolsOptions = {},
+  ): Promise<AgentResult | SuspendedRun> {
+    const resumed = appendResumeResults(state, results)
+    const merged: RunWithToolsOptions = { ...options }
+    if (merged.previousResponseId === undefined && state.responseId !== undefined) {
+      merged.previousResponseId = state.responseId
+    }
+    const out = await this.runTools(
+      resumed,
+      tools,
+      merged as RunWithToolsOptionsWithSuspend,
+    )
+    return mergeResumeCounters(out, state)
+  }
   /**
    * Streaming variant of `generateWithTools`. Yields
    * `AgentStreamEvent<T>`s as the loop progresses; the terminal
@@ -189,6 +240,7 @@ export class BrainManager {
     tools: readonly Tool[],
     options: RunWithToolsOptions = {},
   ): AsyncIterable<AgentStreamEvent<T>> {
+    rejectShouldSuspend(options, 'streamGenerateWithTools')
     const provider = this.provider(options.provider)
     if (!provider.streamWithToolsAndSchema) {
       throw new BrainError(
@@ -220,6 +272,7 @@ export class BrainManager {
     tools: readonly Tool[],
     options: RunWithToolsOptions = {},
   ): Promise<AgentGenerateResult<T>> {
+    rejectShouldSuspend(options, 'generateWithTools')
     const provider = this.provider(options.provider)
     if (!provider.runWithToolsAndSchema) {
       throw new BrainError(
@@ -250,6 +303,7 @@ export class BrainManager {
     tools: readonly Tool[],
     options: RunWithToolsOptions = {},
   ): AsyncIterable<AgentStreamEvent> {
+    rejectShouldSuspend(options, 'streamTools')
     const provider = this.provider(options.provider)
     if (!provider.streamWithTools) {
       throw new BrainError(
@@ -411,3 +465,66 @@ function normalizeInput(input: string | readonly Message[]): readonly Message[]
   }
   return input
 }
+/**
+ * V1 scope guard. `shouldSuspend` is wired only into the non-
+ * streaming `runWithTools` loop; the streaming and schema variants
+ * don't yet model pause / resume, so silently ignoring would be
+ * worse than throwing. Apps that need both should run tools first
+ * (suspending as needed), then call `generate` for the structured
+ * summary in a separate step.
+ */
+/**
+ * Carry forward the pre-suspension iteration count + token usage so
+ * `result.iterations` / `result.usage` reflect the full run, not
+ * just the post-resume portion. When the resumed call suspends
+ * again, the new state's iterations + usage also get the carry-
+ * forward so apps see a running total across an arbitrary number
+ * of suspension cycles.
+ */
+function mergeResumeCounters(
+  out: AgentResult | SuspendedRun,
+  state: SuspendedState,
+): AgentResult | SuspendedRun {
+  // +1 accounts for the suspended round itself — at suspension time
+  // the loop hadn't yet incremented `iterations` (we paused mid-
+  // batch, before tool execution). Supplying results to resume
+  // effectively completes that round.
+  const carryIter = state.iterations + 1
+  if ('status' in out) {
+    return {
+      ...out,
+      state: {
+        ...out.state,
+        iterations: out.state.iterations + carryIter,
+        usage: addUsage(out.state.usage, state.usage),
+      },
+    }
+  }
+  return {
+    ...out,
+    iterations: out.iterations + carryIter,
+    usage: addUsage(out.usage, state.usage),
+  }
+}
+function addUsage(
+  a: SuspendedState['usage'],
+  b: SuspendedState['usage'],
+): SuspendedState['usage'] {
+  return {
+    inputTokens: a.inputTokens + b.inputTokens,
+    outputTokens: a.outputTokens + b.outputTokens,
+    cacheReadTokens: a.cacheReadTokens + b.cacheReadTokens,
+    cacheCreationTokens: a.cacheCreationTokens + b.cacheCreationTokens,
+  }
+}
+function rejectShouldSuspend(options: RunWithToolsOptions, entry: string): void {
+  if (options.shouldSuspend !== undefined) {
+    throw new BrainError(
+      `BrainManager.${entry}: \`shouldSuspend\` is only supported on \`runTools\` (the non-streaming + no-schema entrypoint) in V1. Run tools first with suspension, then call \`generate\` for the structured summary as a separate step.`,
+      { context: { entry } },
+    )
+  }
+}

package/src/index.ts CHANGED Viewed

@@ -10,7 +10,11 @@
 export { Agent } from './agent.ts'
 export type { AgentGenerateResult } from './agent_generate_result.ts'
 export type { AgentResult } from './agent_result.ts'
-export { AgentRunner, type AgentRunResult } from './agent_runner.ts'
+export {
+  AgentRunner,
+  type AgentRunMaybeSuspended,
+  type AgentRunResult,
+} from './agent_runner.ts'
 export type { AgentStreamEvent } from './agent_stream_event.ts'
 export {
   type AnthropicProviderConfig,
@@ -33,6 +37,7 @@ export {
 } from './brain_manager.ts'
 export { BrainProvider } from './brain_provider.ts'
 export { defineTool, type DefineToolSpec } from './define_tool.ts'
+export { MCPClientPool, type MCPClientFactory } from './mcp/pool.ts'
 export type { MCPServer, MCPServerToolConfig } from './mcp_server.ts'
 export type { OutputSchema } from './output_schema.ts'
 export { AnthropicProvider } from './providers/anthropic_provider.ts'
@@ -42,7 +47,18 @@ export { OllamaProvider } from './providers/ollama_provider.ts'
 export { OpenAICompatProvider } from './providers/openai_compat_provider.ts'
 export { OpenAIProvider } from './providers/openai_provider.ts'
 export { OpenAIResponsesProvider } from './providers/openai_responses_provider.ts'
-export type { Provider, RunWithToolsOptions } from './provider.ts'
+export type {
+  Provider,
+  RunWithToolsOptions,
+  RunWithToolsOptionsWithSuspend,
+} from './provider.ts'
+export {
+  appendResumeResults,
+  isSuspended,
+  type SuspendedRun,
+  type SuspendedState,
+  type ToolResultInput,
+} from './suspended_run.ts'
 export { Thread, type ThreadOptions, type ThreadState } from './thread.ts'
 export type { Tool, ToolContext } from './tool.ts'
 export { ToolExecutionError } from './tool_execution_error.ts'
@@ -50,6 +66,8 @@ export type {
   ChatOptions,
   ChatResult,
   ChatUsage,
+  CompactConfig,
+  CompactionBlock,
   ContentBlock,
   AudioBlock,
   AudioSource,

package/src/mcp/client.ts CHANGED Viewed

@@ -66,6 +66,15 @@ export class MCPClient {
   readonly server: MCPServer
   private readonly _client: Client
   private _connected = false
+  /**
+   * In-flight connect promise — set on the first concurrent
+   * `connect()` and cleared on settle. Subsequent callers that
+   * race against the first one await the same promise instead of
+   * each kicking off their own transport handshake. Necessary for
+   * pooled clients: a fresh `borrow()` followed by parallel
+   * `listTools()` + `callTool()` calls both hit the same connect.
+   */
+  private _connecting: Promise<void> | undefined
   private _transport: StreamableHTTPClientTransport | undefined
   private _authProvider: StoreBackedOAuthProvider | undefined
@@ -87,6 +96,14 @@ export class MCPClient {
   async connect(): Promise<void> {
     if (this._connected) return
+    if (this._connecting) return this._connecting
+    this._connecting = this._doConnect().finally(() => {
+      this._connecting = undefined
+    })
+    return this._connecting
+  }
+  private async _doConnect(): Promise<void> {
     const transport = this._buildTransport()
     this._transport = transport
     try {

package/src/mcp/index.ts CHANGED Viewed

@@ -15,6 +15,7 @@ export {
   type MCPOAuthStore,
   MemoryOAuthStore,
 } from './oauth.ts'
+export { MCPClientPool, type MCPClientFactory } from './pool.ts'
 export {
   resolveMcpTools,
   type ResolveMcpToolsOptions,

package/src/mcp/pool.ts ADDED Viewed

@@ -0,0 +1,106 @@
+/**
+ * `MCPClientPool` — long-lived, per-server `MCPClient` cache.
+ *
+ * Default `resolveMcpTools` flow constructs a fresh `MCPClient` per
+ * call to `runTools` / `runWithTools` / etc., handshakes the
+ * Streamable HTTP transport, lists tools, executes them, then
+ * closes the transport in a `finally`. For one-shot calls that's
+ * fine. For long-running agent workers — chat servers, background
+ * job processors — the per-call handshake adds noticeable
+ * latency and burns connection slots upstream.
+ *
+ * The pool keeps one connected `MCPClient` per `(server.name,
+ * server.url)` pair for the lifetime of the pool. `borrow(server)`
+ * returns the pooled client (lazily creating + connecting on
+ * first use). When the pool is in play, `resolveMcpTools` skips
+ * the per-call `close()` — the pool owns the lifetime — so
+ * subsequent calls reuse the existing transport.
+ *
+ * Apps own the pool's lifetime. Construct one at app boot, hand it
+ * to every provider (or to `BrainProvider` if using the DI
+ * helper), and call `pool.close()` on shutdown.
+ *
+ * ```ts
+ * const pool = new MCPClientPool()
+ *
+ * const openai = new OpenAIProvider(
+ *   'openai',
+ *   { driver: 'openai', apiKey: ... },
+ *   { mcpPool: pool },
+ * )
+ *
+ * // ... many runTools calls later, on graceful shutdown:
+ * await pool.close()
+ * ```
+ *
+ * Concurrency: `borrow()` is synchronous; `MCPClient.connect()`
+ * itself dedupes concurrent calls. Two parallel `runTools` calls
+ * sharing the same pooled client both await one handshake.
+ *
+ * Re-auth: when a borrowed client throws `MCPAuthRequiredError`,
+ * the pool keeps the (still un-authorized) client. Apps call
+ * `pool.evict(server)` after running `completeAuthorization` on
+ * a fresh client so subsequent borrows see the renewed state —
+ * or just reuse the same client the app authorized via the
+ * standard `MCPClient.completeAuthorization` flow.
+ */
+import type { MCPServer } from '../mcp_server.ts'
+import { MCPClient } from './client.ts'
+/** Internal — factory injection for tests. Defaults to `new MCPClient(server)`. */
+export type MCPClientFactory = (server: MCPServer) => MCPClient
+export class MCPClientPool {
+  private readonly clients: Map<string, MCPClient> = new Map()
+  private readonly factory: MCPClientFactory
+  constructor(factory: MCPClientFactory = (s) => new MCPClient(s)) {
+    this.factory = factory
+  }
+  /**
+   * Return the pooled client for `server`, constructing + caching it on
+   * first call. The client is NOT eagerly connected — the first
+   * `listTools` / `callTool` invocation triggers `connect()` once.
+   */
+  borrow(server: MCPServer): MCPClient {
+    const key = poolKey(server)
+    const existing = this.clients.get(key)
+    if (existing) return existing
+    const client = this.factory(server)
+    this.clients.set(key, client)
+    return client
+  }
+  /**
+   * Drop the cached client for `server` and close its transport.
+   * Useful after the app re-authorizes an OAuth server, or after a
+   * transient failure where the connection state is suspect and a
+   * fresh handshake on next borrow is preferable.
+   */
+  async evict(server: MCPServer): Promise<void> {
+    const key = poolKey(server)
+    const client = this.clients.get(key)
+    if (!client) return
+    this.clients.delete(key)
+    await client.close()
+  }
+  /** Close every pooled client. Call on app shutdown. */
+  async close(): Promise<void> {
+    const all = [...this.clients.values()]
+    this.clients.clear()
+    await Promise.all(all.map((c) => c.close()))
+  }
+  /** Whether the pool currently holds a client for `server`. Used by tests. */
+  has(server: MCPServer): boolean {
+    return this.clients.has(poolKey(server))
+  }
+}
+/** Pool key: name + url, so two `MCPServer`s with the same name but different URLs don't collide. */
+function poolKey(server: MCPServer): string {
+  return `${server.name}|${server.url}`
+}

package/src/mcp/resolve_mcp_tools.ts CHANGED Viewed

@@ -21,6 +21,7 @@
 import type { MCPServer } from '../mcp_server.ts'
 import type { Tool, ToolContext } from '../tool.ts'
 import { MCPClient } from './client.ts'
+import type { MCPClientPool } from './pool.ts'
 export interface ResolvedMcpTools {
   tools: Tool[]
@@ -30,6 +31,14 @@ export interface ResolvedMcpTools {
 export interface ResolveMcpToolsOptions {
   /** Override the client factory — tests inject mock clients per server here. */
   clientFactory?(server: MCPServer): MCPClient
+  /**
+   * When set, clients are borrowed from the pool instead of being
+   * constructed fresh per call, and the returned `close` becomes a
+   * no-op — the pool owns the lifetime, and apps call
+   * `pool.close()` on shutdown. Mutually beneficial with
+   * `clientFactory` (tests pass a factory to the pool itself).
+   */
+  pool?: MCPClientPool
 }
 const NAME_SEPARATOR = '__'
@@ -40,13 +49,16 @@ export async function resolveMcpTools(
 ): Promise<ResolvedMcpTools> {
   const clients: MCPClient[] = []
   const tools: Tool[] = []
+  const pooled = options.pool !== undefined
   for (const server of servers) {
     if (server.tools?.enabled === false) continue
-    const client = options.clientFactory
-      ? options.clientFactory(server)
-      : new MCPClient(server)
-    clients.push(client)
+    const client = options.pool
+      ? options.pool.borrow(server)
+      : options.clientFactory
+        ? options.clientFactory(server)
+        : new MCPClient(server)
+    if (!pooled) clients.push(client)
     const allowed = server.tools?.allowedTools
     const allowedSet = allowed ? new Set(allowed) : null
@@ -60,9 +72,15 @@ export async function resolveMcpTools(
   return {
     tools,
-    close: async () => {
-      await Promise.all(clients.map((c) => c.close()))
-    },
+    // Pooled clients live across calls — `close` becomes a no-op
+    // and the pool owns the lifetime. Non-pooled clients close
+    // here so each `runWithTools` invocation cleans up its own
+    // transports.
+    close: pooled
+      ? async () => {}
+      : async () => {
+          await Promise.all(clients.map((c) => c.close()))
+        },
   }
 }

package/src/persistence/brain_message.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * `BrainMessage` — the typed row of `brain_message`. One per turn.
+ *
+ * `content` mirrors `Message.content` — string for plain text or
+ * `ContentBlock[]` when the turn carries structured blocks
+ * (tool_use, tool_result, image, compaction, ...). JSONB hydration
+ * is automatic.
+ *
+ * Assistant turns carry `model` / `usage` / `stop_reason` /
+ * `response_id`; user turns leave them NULL. The repository's
+ * `appendTurn` helper writes the right shape per role.
+ */
+import { Model } from '@strav/database'
+import type { ChatUsage, ContentBlock } from '../types.ts'
+import { brainMessageSchema } from './schema/brain_message_schema.ts'
+export type BrainMessageRole = 'user' | 'assistant'
+export class BrainMessage extends Model {
+  static override readonly schema = brainMessageSchema
+  id!: string
+  tenant_id!: string
+  thread_id!: string
+  turn_index!: number
+  role!: BrainMessageRole
+  content!: string | ContentBlock[]
+  model!: string | null
+  usage!: ChatUsage | null
+  stop_reason!: string | null
+  response_id!: string | null
+  created_at!: Date
+}