npm - @strav/brain - Versions diffs - 1.0.0-alpha.9 → 1.0.1 - Mend

@strav/brain 1.0.0-alpha.9 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/package.json +23 -7
package/src/agent.ts +43 -5
package/src/agent_generate_result.ts +32 -0
package/src/agent_result.ts +7 -0
package/src/agent_runner.ts +218 -14
package/src/agent_stream_event.ts +100 -0
package/src/brain_config.ts +218 -1
package/src/brain_driver.ts +247 -0
package/src/brain_error.ts +86 -10
package/src/brain_manager.ts +359 -11
package/src/brain_provider.ts +79 -9
package/src/drivers/anthropic/anthropic_brain_driver.ts +641 -0
package/src/drivers/anthropic/anthropic_helpers.ts +65 -0
package/src/drivers/anthropic/anthropic_message_builder.ts +258 -0
package/src/drivers/anthropic/anthropic_response_mapper.ts +123 -0
package/src/drivers/anthropic/anthropic_tool_loop.ts +246 -0
package/src/drivers/anthropic/index.ts +1 -0
package/src/drivers/deepseek/deepseek_brain_driver.ts +117 -0
package/src/drivers/deepseek/index.ts +1 -0
package/src/drivers/gemini/gemini_brain_driver.ts +1064 -0
package/src/drivers/gemini/index.ts +1 -0
package/src/drivers/minimax/index.ts +1 -0
package/src/drivers/minimax/minimax_brain_driver.ts +84 -0
package/src/drivers/ollama/index.ts +1 -0
package/src/drivers/ollama/ollama_brain_driver.ts +86 -0
package/src/drivers/openai/index.ts +1 -0
package/src/drivers/openai/openai_brain_driver.ts +796 -0
package/src/drivers/openai/openai_helpers.ts +58 -0
package/src/drivers/openai/openai_message_builder.ts +187 -0
package/src/drivers/openai/openai_response_mapper.ts +70 -0
package/src/drivers/openai/openai_tool_dispatch.ts +127 -0
package/src/drivers/openai/openai_tool_loop.ts +191 -0
package/src/drivers/openai_compat/index.ts +1 -0
package/src/drivers/openai_compat/openai_compat_brain_driver.ts +616 -0
package/src/drivers/openai_responses/index.ts +1 -0
package/src/drivers/openai_responses/openai_responses_brain_driver.ts +1015 -0
package/src/drivers/openrouter/index.ts +1 -0
package/src/drivers/openrouter/openrouter_brain_driver.ts +137 -0
package/src/drivers/qwen/index.ts +1 -0
package/src/drivers/qwen/qwen_brain_driver.ts +103 -0
package/src/index.ts +75 -11
package/src/mcp/client.ts +243 -0
package/src/mcp/index.ts +23 -0
package/src/mcp/oauth.ts +227 -0
package/src/mcp/pool.ts +106 -0
package/src/mcp/resolve_mcp_tools.ts +108 -0
package/src/mcp_server.ts +63 -0
package/src/output_schema.ts +72 -0
package/src/persistence/brain_message.ts +34 -0
package/src/persistence/brain_message_repository.ts +98 -0
package/src/persistence/brain_store.ts +166 -0
package/src/persistence/brain_suspended_run.ts +30 -0
package/src/persistence/brain_suspended_run_repository.ts +59 -0
package/src/persistence/brain_thread.ts +30 -0
package/src/persistence/brain_thread_repository.ts +56 -0
package/src/persistence/database_brain_store.ts +190 -0
package/src/persistence/index.ts +48 -0
package/src/persistence/schemas/brain_message_schema.ts +61 -0
package/src/persistence/schemas/brain_suspended_run_schema.ts +58 -0
package/src/persistence/schemas/brain_thread_schema.ts +50 -0
package/src/persistence/schemas/index.ts +3 -0
package/src/suspended_run.ts +153 -0
package/src/thread.ts +40 -1
package/src/tool.ts +7 -0
package/src/tool_runner.ts +81 -0
package/src/translate/index.ts +19 -0
package/src/translate/translate_cache.ts +78 -0
package/src/translate/translate_provider.ts +46 -0
package/src/translate/translator.ts +271 -0
package/src/types.ts +398 -1
package/src/zod/index.ts +121 -0
package/src/provider.ts +0 -74
package/src/providers/anthropic_provider.ts +0 -397

package/src/translate/translator.ts ADDED Viewed

@@ -0,0 +1,271 @@
+/**
+ * `Translator` — LLM-backed translation primitive on top of
+ * `BrainManager`. Sonnet-uniform by default (`tier: 'balanced'`),
+ * which routes to `claude-sonnet-4-6` on the Anthropic driver — apps
+ * override with `options.model` or `options.provider` per call.
+ *
+ * Two entry points:
+ *
+ *   - `translate(text, { to: [...] })` — fan-out one string into
+ *     every target language in parallel. Returns
+ *     `{ [langCode]: translated }`.
+ *
+ *   - `translateBatch(fields, { to: [...] })` — translate a
+ *     fixed-shape object (`{ title, body }`) into every target
+ *     language. Each target language runs in parallel; within a
+ *     language, all fields land in one model call so the model
+ *     keeps shared context (a `title` and `body` translated
+ *     together stay tonally consistent).
+ *
+ * Cross-cutting:
+ *
+ *   - **Structured output.** Uses `brain.generate(input, schema)`
+ *     with a JSON Schema that locks the response to the expected
+ *     keys, so models never sneak in commentary or transliterations.
+ *
+ *   - **Prompt caching.** The system prompt is identical across
+ *     every call (per-language hints ride in the user message), so
+ *     Anthropic prompt caching kicks in once the cache window warms.
+ *     Set `cache: false` on the constructor to opt out.
+ *
+ *   - **In-memory cache.** Identical `(model, from, to, text)`
+ *     tuples are served from a process-local LRU (default 1000
+ *     entries) — see `TranslateCache`. Pass `cacheSize: 0` to
+ *     disable.
+ *
+ *   - **Source language auto-detect.** Omit `from` and the user
+ *     message tells the model to detect the source. Apps that know
+ *     the source pass it explicitly for marginal quality + token
+ *     savings.
+ */
+import type { BrainManager } from '../brain_manager.ts'
+import type { OutputSchema } from '../output_schema.ts'
+import type { ChatOptions, ModelTier } from '../types.ts'
+import { cacheKey, TranslateCache } from './translate_cache.ts'
+export interface TranslatorOptions {
+  brain: BrainManager
+  /** Brain provider name. Defaults to the configured `brain.default`. */
+  provider?: string
+  /** Brain tier sugar — overridden by `model`. Default `'balanced'` (Sonnet on Anthropic per ADR-0004-style routing). */
+  tier?: ModelTier
+  /** Explicit model id. Wins over `tier`. */
+  model?: string
+  /** Override the system prompt. Apps localising the prompt itself reach for this. */
+  systemPrompt?: string
+  /** LRU capacity for the translation cache. `0` disables. Default `1000`. */
+  cacheSize?: number
+  /** Enable Anthropic prompt caching on the system prompt. Default `true`. Non-Anthropic providers ignore. */
+  cache?: boolean
+}
+export interface TranslateOptions {
+  /** Target BCP-47 language codes (`'th'`, `'zh-Hant'`, `'ja'`). */
+  to: readonly string[]
+  /** Source BCP-47 code. Omit to ask the model to detect. */
+  from?: string
+  /** Per-call model override (wins over the constructor's tier/model). */
+  model?: string
+  /** Per-call provider override. */
+  provider?: string
+  /** Cancellation signal — forwarded to every parallel `brain.generate` call. */
+  signal?: AbortSignal
+}
+export type BatchTranslateOptions = TranslateOptions
+/**
+ * Default system prompt — kept stable across every call so prompt
+ * caching can warm. Per-call specifics (source/target language,
+ * text, field shape) ride in the user message.
+ */
+export const DEFAULT_SYSTEM_PROMPT = `You are a translation engine.
+The user supplies (a) a source-language code (or "auto"), (b) a target BCP-47 language code, and (c) the source text or a JSON object of named source fields. Translate the source into the target language and output ONLY the translation in the required JSON shape.
+Rules:
+- Output ONLY the translated text in the requested JSON shape. Do not add explanations, notes, alternatives, or transliterations.
+- Preserve Markdown, HTML tags, links, mentions, hashtags, code spans, and emoji exactly as in the source.
+- Keep numbers, dates, currency symbols, and proper nouns recognisable in the target locale; do not invent translations for brand names.
+- If the source is already in the target language, output it unchanged.
+- For batch translations, every requested field must appear in the output — never drop a field.`
+export class Translator {
+  private readonly brain: BrainManager
+  private readonly provider: string | undefined
+  private readonly tier: ModelTier
+  private readonly explicitModel: string | undefined
+  private readonly systemPrompt: string
+  private readonly cache: TranslateCache
+  private readonly promptCache: boolean
+  constructor(options: TranslatorOptions) {
+    this.brain = options.brain
+    this.provider = options.provider
+    this.tier = options.tier ?? 'balanced'
+    this.explicitModel = options.model
+    this.systemPrompt = options.systemPrompt ?? DEFAULT_SYSTEM_PROMPT
+    this.cache = new TranslateCache(options.cacheSize ?? 1000)
+    this.promptCache = options.cache ?? true
+  }
+  /**
+   * Translate one string into every target language in parallel.
+   * Returns a `{ [lang]: translated }` map containing one entry per
+   * code in `options.to`. Calls fan out concurrently; a single
+   * thrown call rejects the whole `Promise.all`.
+   */
+  async translate(
+    text: string,
+    options: TranslateOptions,
+  ): Promise<Record<string, string>> {
+    if (options.to.length === 0) return {}
+    const results = await Promise.all(
+      options.to.map(async (lang) => {
+        const translated = await this.translateOne(text, lang, options)
+        return [lang, translated] as const
+      }),
+    )
+    return Object.fromEntries(results)
+  }
+  /**
+   * Translate a fixed-shape object of fields into every target
+   * language. Each target language runs in parallel; within a
+   * language, all fields are translated in one model call so context
+   * is shared.
+   *
+   * Returns `{ [lang]: { ...fields } }`. The shape of every per-
+   * language object matches the input keys exactly — missing keys
+   * are treated as a hard error (the model is instructed to never
+   * drop a field) and surface as a `BrainError` from `generate`'s
+   * schema parser.
+   */
+  async translateBatch<T extends Record<string, string>>(
+    fields: T,
+    options: BatchTranslateOptions,
+  ): Promise<Record<string, T>> {
+    if (options.to.length === 0) return {}
+    const fieldNames = Object.keys(fields) as Array<keyof T & string>
+    if (fieldNames.length === 0) return Object.fromEntries(options.to.map((l) => [l, {} as T]))
+    const results = await Promise.all(
+      options.to.map(async (lang) => {
+        const translated = await this.translateBatchOne(fields, fieldNames, lang, options)
+        return [lang, translated] as const
+      }),
+    )
+    return Object.fromEntries(results)
+  }
+  /** Drop the in-memory LRU. Useful in tests to keep cases isolated. */
+  clearCache(): void {
+    this.cache.clear()
+  }
+  // ─── internals ──────────────────────────────────────────────────────
+  private resolvedModel(per: TranslateOptions): string {
+    return per.model ?? this.explicitModel ?? this.tier
+  }
+  private buildChatOptions(per: TranslateOptions): ChatOptions {
+    const opts: ChatOptions = {
+      system: this.promptCache
+        ? { text: this.systemPrompt, cache: true }
+        : this.systemPrompt,
+    }
+    if (per.model) opts.model = per.model
+    else if (this.explicitModel) opts.model = this.explicitModel
+    else opts.tier = this.tier
+    if (per.provider ?? this.provider) opts.provider = (per.provider ?? this.provider)!
+    if (per.signal) opts.signal = per.signal
+    return opts
+  }
+  private async translateOne(
+    text: string,
+    lang: string,
+    per: TranslateOptions,
+  ): Promise<string> {
+    const model = this.resolvedModel(per)
+    const key = cacheKey({ model, from: per.from, to: lang, text })
+    const hit = this.cache.get(key)
+    if (hit !== undefined) return hit
+    const schema: OutputSchema<{ translation: string }> = {
+      name: 'translation',
+      description: `Translation of the source text into ${lang}.`,
+      jsonSchema: {
+        type: 'object',
+        properties: { translation: { type: 'string' } },
+        required: ['translation'],
+        additionalProperties: false,
+      },
+    }
+    const userMessage = `SOURCE_LANGUAGE: ${per.from ?? 'auto'}\nTARGET_LANGUAGE: ${lang}\nTEXT:\n${text}`
+    const result = await this.brain.generate(userMessage, schema, this.buildChatOptions(per))
+    const translated = result.value.translation
+    this.cache.set(key, translated)
+    return translated
+  }
+  private async translateBatchOne<T extends Record<string, string>>(
+    fields: T,
+    fieldNames: readonly (keyof T & string)[],
+    lang: string,
+    per: BatchTranslateOptions,
+  ): Promise<T> {
+    const model = this.resolvedModel(per)
+    // Per-field cache: check every field; only call the model when at
+    // least one field is missing. The single model call still covers
+    // all fields (we don't sub-call per missing field — the context
+    // gain from a single call outweighs the extra translation work).
+    const fromCache: Partial<Record<string, string>> = {}
+    let allHit = true
+    for (const name of fieldNames) {
+      const hit = this.cache.get(
+        cacheKey({ model, from: per.from, to: lang, text: fields[name]! }),
+      )
+      if (hit === undefined) {
+        allHit = false
+      } else {
+        fromCache[name] = hit
+      }
+    }
+    if (allHit) return fromCache as T
+    const properties: Record<string, unknown> = {}
+    for (const name of fieldNames) properties[name] = { type: 'string' }
+    const schema: OutputSchema<T> = {
+      name: 'batch_translation',
+      description: `Translation of every named field into ${lang}.`,
+      jsonSchema: {
+        type: 'object',
+        properties,
+        required: [...fieldNames],
+        additionalProperties: false,
+      },
+    }
+    const fieldsBlock = fieldNames
+      .map((n) => `- ${n}: ${JSON.stringify(fields[n]!)}`)
+      .join('\n')
+    const userMessage = `SOURCE_LANGUAGE: ${per.from ?? 'auto'}\nTARGET_LANGUAGE: ${lang}\nFIELDS:\n${fieldsBlock}\n\nOutput a JSON object with these exact keys: ${fieldNames.join(', ')}.`
+    const result = await this.brain.generate(userMessage, schema, this.buildChatOptions(per))
+    const translated = result.value
+    for (const name of fieldNames) {
+      this.cache.set(
+        cacheKey({ model, from: per.from, to: lang, text: fields[name]! }),
+        translated[name]!,
+      )
+    }
+    return translated
+  }
+}

package/src/types.ts CHANGED Viewed

@@ -71,7 +71,147 @@ export interface ToolResultBlock {
   isError?: boolean
 }
-export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock
+/**
+ * Provider-emitted MCP tool-use block. Read-only — apps don't construct
+ * these; they appear in `assistant`-role messages when the model calls
+ * a tool exposed by a configured MCP server. Anthropic's backend
+ * invokes the MCP server itself and inlines the result as an
+ * `MCPToolResultBlock` in the same response, so the framework's
+ * agentic loop doesn't need to handle the call.
+ *
+ * Apps render these for observability (showing users that the model
+ * consulted Linear / Notion / GitHub via MCP) and for audit trails.
+ */
+export interface MCPToolUseBlock {
+  type: 'mcp_tool_use'
+  id: string
+  /** MCP server identifier — matches `MCPServer.name`. */
+  serverName: string
+  /** Tool name as exposed by the MCP server. */
+  name: string
+  /** Parsed input the model passed to the MCP tool. */
+  input: unknown
+}
+/**
+ * Provider-emitted MCP tool result. Pairs with `MCPToolUseBlock` by
+ * `toolUseId`. `content` is either a string or text blocks; `isError`
+ * is `true` when the MCP server returned an error.
+ */
+export interface MCPToolResultBlock {
+  type: 'mcp_tool_result'
+  toolUseId: string
+  content: string | TextBlock[]
+  isError?: boolean
+}
+/**
+ * Image input — attaches a picture to a user message so vision-
+ * capable models can see it alongside the text. V1 covers images
+ * only; audio + video defer.
+ *
+ * `source` is a discriminated union:
+ *   - `{ type: 'base64', mediaType, data }` — inline bytes for
+ *     uploads, screenshots, attachments your app already holds in
+ *     memory. `mediaType` is the IANA MIME (`image/png`,
+ *     `image/jpeg`, `image/webp`, `image/gif`); `data` is the
+ *     base64-encoded image (no `data:` prefix — the provider
+ *     translation adds it where needed).
+ *   - `{ type: 'url', url }` — remote image URL. Anthropic, OpenAI,
+ *     and Gemini all accept HTTPS URLs; check the provider's
+ *     domain allowlist if calls 404 (Anthropic was historically
+ *     stricter). For Gemini, GCS URIs (`gs://...`) also work.
+ *
+ * Vision support is provider- AND model-dependent. Cloud picks:
+ * Anthropic Claude 4 family, OpenAI gpt-4o / gpt-5 family, Gemini
+ * 2.x. Local: `llama3.2-vision`, `llava`, `qwen2.5-vl` on Ollama.
+ * Models without vision either reject the call or ignore the image.
+ */
+export interface ImageBlock {
+  type: 'image'
+  source:
+    | { type: 'base64'; mediaType: string; data: string }
+    | { type: 'url'; url: string }
+}
+/**
+ * Document input — attaches a PDF (V1 only — the providers that
+ * support documents currently all gate on `application/pdf`) to a
+ * user message. Anthropic surfaces it as a native `document` block;
+ * Gemini accepts it via `inlineData` / `fileData` with
+ * `application/pdf` mime; OpenAI / Ollama / DeepSeek don't support
+ * PDF blocks at all (apps split the PDF to images and use
+ * `ImageBlock`s for those vendors).
+ *
+ * The optional `title` is shown to the model on Anthropic (helpful
+ * for multi-document calls — "the contract", "the invoice"); other
+ * providers ignore it.
+ */
+export interface DocumentBlock {
+  type: 'document'
+  source:
+    | { type: 'base64'; mediaType: string; data: string }
+    | { type: 'url'; url: string }
+  /** Optional title shown to the model (Anthropic uses it; others ignore). */
+  title?: string
+}
+/**
+ * Audio input — attaches a sound clip to a user message. V1
+ * coverage: Gemini supports audio natively via `inlineData` with
+ * audio MIMEs (`audio/mp3`, `audio/wav`, `audio/ogg`, `audio/flac`,
+ * `audio/webm`, `audio/aac`). Anthropic + OpenAI + Ollama don't
+ * accept audio in their chat APIs — OpenAI apps preprocess via
+ * Whisper; Anthropic apps wait for the audio block to land in the
+ * SDK; Ollama apps that need audio look at server-side
+ * transcription models.
+ */
+export interface AudioBlock {
+  type: 'audio'
+  source:
+    | { type: 'base64'; mediaType: string; data: string }
+    | { type: 'url'; url: string }
+}
+/**
+ * Server-side compaction block. Anthropic's `compact-2026-01-12`
+ * beta returns a `compaction` block when an auto-compaction trigger
+ * fires during a request. The framework surfaces it on
+ * `result.content` and Thread persists it on the assistant turn so
+ * subsequent requests echo it back verbatim — the model only sees
+ * the summary + opaque blob from then on, and the older raw turns
+ * stay out of context.
+ *
+ * V1 produces these on Anthropic only. Other providers ignore the
+ * `compact` option silently, and never emit a `CompactionBlock`.
+ *
+ * Round-trip invariant: pass the block back unchanged. The
+ * `encryptedContent` blob is opaque metadata the server uses to
+ * stitch the compaction history together; the framework never
+ * mutates it.
+ *
+ * `content === null` means a compaction attempt failed (e.g.,
+ * malformed model output). The server treats these as no-ops on
+ * the next request, so apps don't need to special-case them.
+ */
+export interface CompactionBlock {
+  type: 'compaction'
+  /** Summary of compacted content. Null when compaction failed. */
+  content: string | null
+  /** Opaque metadata round-tripped verbatim on subsequent requests. */
+  encryptedContent: string | null
+}
+export type ContentBlock =
+  | TextBlock
+  | ImageBlock
+  | DocumentBlock
+  | AudioBlock
+  | ToolUseBlock
+  | ToolResultBlock
+  | MCPToolUseBlock
+  | MCPToolResultBlock
+  | CompactionBlock
 /** A single conversation turn. `content` can be a bare string or a typed block list. */
 export interface Message {
@@ -95,6 +235,85 @@ export type SystemPrompt =
  * escape hatch in `ChatResult` is what they reach for when they need
  * provider-specific fields.
  */
+/**
+ * Server-side tool — work the provider's backend runs on behalf
+ * of the model. Unlike framework-local tools (`Tool` /
+ * `defineTool`), the model's call doesn't round-trip through
+ * the app's process; the provider executes the tool and inlines
+ * the result in the response.
+ *
+ * V1 coverage:
+ *   - **Anthropic**: `web_search`, `code_execution`, `web_fetch`.
+ *   - **Gemini**: `web_search` (Google Search), `code_execution`,
+ *     `url_context`.
+ *   - **OpenAI / DeepSeek / Ollama**: throw — OpenAI's server tools
+ *     live on the Responses API (separate slice); the compat
+ *     providers don't expose them.
+ *
+ * Cross-provider portability:
+ *   - `web_search` + `code_execution` work on both Anthropic and
+ *     Gemini.
+ *   - `web_fetch` is Anthropic-only.
+ *   - `url_context` is Gemini-only.
+ *
+ * Server tools combine freely with framework-local `Tool[]` and
+ * MCP servers — the model sees all three sets in one tool list.
+ */
+export type ServerTool =
+  | {
+      type: 'web_search'
+      /** Max times the model can call this tool per turn (Anthropic; Gemini ignores). */
+      maxUses?: number
+      /** Domain allowlist (Anthropic; Gemini ignores). Mutually exclusive with `blockedDomains`. */
+      allowedDomains?: readonly string[]
+      /** Domain blocklist (Anthropic; Gemini ignores). */
+      blockedDomains?: readonly string[]
+    }
+  | { type: 'code_execution' }
+  | {
+      type: 'web_fetch'
+      /** Max URL fetches per turn (Anthropic). */
+      maxUses?: number
+      /** Domain allowlist. */
+      allowedDomains?: readonly string[]
+      /** Domain blocklist. */
+      blockedDomains?: readonly string[]
+    }
+  | {
+      type: 'url_context'
+      /** Gemini fetches the URL and surfaces grounded answers from it. */
+    }
+/**
+ * Per-call compaction configuration. Maps to Anthropic's
+ * `compact-2026-01-12` beta `edits[]` entry. All fields optional —
+ * omitting one falls back to the server's default (trigger:
+ * 150,000 input tokens; no extra instructions; no pause).
+ */
+export interface CompactConfig {
+  /**
+   * Trigger threshold in input tokens. Compaction fires once the
+   * conversation crosses this token count. Default 150,000 — same
+   * as the server-side default.
+   */
+  trigger?: number
+  /**
+   * Extra hint to the summarization model. Useful for biasing the
+   * compaction toward what your app actually cares to preserve
+   * ("keep all customer ids referenced", "preserve every diff
+   * hunk", ...).
+   */
+  instructions?: string
+  /**
+   * When `true`, the server returns the compaction block in-line
+   * but does NOT continue generation — the next assistant turn
+   * waits for an explicit re-prompt. Apps that want to inspect or
+   * gate compaction set this; default `false` (compaction is
+   * transparent).
+   */
+  pauseAfterCompaction?: boolean
+}
 export interface ChatOptions {
   /** Override the configured default model. Wins over `tier`. */
   model?: string
@@ -129,6 +348,56 @@ export interface ChatOptions {
    * provider by config; this is the override for that.
    */
   provider?: string
+  /**
+   * Cancel the in-flight operation. Aborting between iterations of
+   * a tool loop bails before the next model call; aborting mid-call
+   * propagates the SDK's native abort error (typically a `DOMException`
+   * with `name: 'AbortError'`). Streaming iterators reject on the
+   * next `for await` step.
+   */
+  signal?: AbortSignal
+  /**
+   * Server-side tools — work the provider's backend runs (web
+   * search, code execution, URL fetching). The model's calls
+   * don't round-trip through the framework's tool loop; results
+   * land inline in the response. Combines freely with
+   * framework-local `Tool[]` and MCP servers.
+   *
+   * V1 supports Anthropic + Gemini; OpenAI / DeepSeek / Ollama
+   * throw `BrainError` (use the Responses API for OpenAI, or
+   * route to Anthropic / Gemini).
+   */
+  serverTools?: readonly ServerTool[]
+  /**
+   * Server-side conversation compaction. When set, the provider
+   * auto-summarizes the older part of the message history once the
+   * `trigger` token threshold is reached; the summary lives on the
+   * response as a `CompactionBlock` that apps round-trip on
+   * subsequent requests (Thread does this automatically). Saves
+   * tokens on long threads without lossy client-side pruning.
+   *
+   * Only honored by `AnthropicBrainDriver` (driver `'anthropic'`),
+   * via the `compact-2026-01-12` beta. Silently ignored by every
+   * other provider so apps targeting multiple providers with the
+   * same options object don't have to special-case.
+   */
+  compact?: CompactConfig
+  /**
+   * Stateful conversation pointer — OpenAI Responses API. When set,
+   * the provider sends only the new turn(s); the server picks up
+   * from the prior `Response` identified by this id and replays
+   * the conversation server-side. Saves tokens on long threads.
+   *
+   * Only honored by `OpenAIResponsesBrainDriver` (driver
+   * `'openai-responses'`); silently ignored by every other provider
+   * — apps that target multiple providers with the same options
+   * object don't have to special-case.
+   *
+   * Pair with `ChatResult.responseId` (returned by every call) to
+   * thread the conversation forward. `Thread` does this
+   * automatically when its underlying provider supports it.
+   */
+  previousResponseId?: string
 }
 /** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
@@ -151,6 +420,24 @@ export interface ChatResult<Raw = unknown> {
   stopReason: string | null
   usage: ChatUsage
   raw: Raw
+  /**
+   * Structured assistant content blocks — populated when the model
+   * emitted more than plain text on this turn (compaction blocks
+   * today; reasoning blocks once those surface). Apps that
+   * persist the conversation (`Thread`, custom stores) push this
+   * onto the message history when present so round-trippable
+   * blocks survive subsequent requests. Undefined when the turn
+   * was plain text only.
+   */
+  content?: ContentBlock[]
+  /**
+   * Provider response id when the provider exposes stateful
+   * conversations (currently OpenAI Responses API). Apps thread
+   * this forward via `ChatOptions.previousResponseId` so the
+   * server replays prior turns without re-sending them.
+   * Undefined for providers that don't support the pattern.
+   */
+  responseId?: string
 }
 /**
@@ -161,3 +448,113 @@ export interface ChatResult<Raw = unknown> {
 export type StreamEvent =
   | { type: 'text'; delta: string }
   | { type: 'stop'; stopReason: string | null; usage: ChatUsage }
+/**
+ * Per-call options for `brain.embed(...)`. Only the embed-relevant
+ * subset of `ChatOptions` — chat-specific knobs (system prompt,
+ * thinking, cache, tools) don't apply.
+ */
+export interface EmbedOptions {
+  /** Override the configured default embedding model. */
+  model?: string
+  /**
+   * Override the default provider. Must name a provider that
+   * implements `embed` (V1: OpenAI, Gemini, Ollama; Anthropic +
+   * DeepSeek throw with a clear "route to a different provider"
+   * message).
+   */
+  provider?: string
+  /**
+   * Optional dimensionality hint. OpenAI passes through as
+   * `dimensions`; Gemini as `outputDimensionality`. Providers
+   * that ignore it silently drop the field.
+   */
+  dimensions?: number
+  /** Cancellation signal — same shape as `ChatOptions.signal`. */
+  signal?: AbortSignal
+}
+/**
+ * Per-call options for `brain.transcribe(...)`.
+ */
+export interface TranscribeOptions {
+  /** Override the configured default transcription model. */
+  model?: string
+  /**
+   * Override the default provider. Must name a provider that
+   * implements `transcribe` (V1: OpenAI / Gemini / Ollama;
+   * Anthropic + DeepSeek throw).
+   */
+  provider?: string
+  /**
+   * Optional BCP-47 language hint (`en`, `fr`, `ja`). Improves
+   * accuracy when known; models without hint support ignore.
+   */
+  language?: string
+  /**
+   * Optional bias prompt to steer vocabulary / style / formatting.
+   * OpenAI calls this `prompt`; Gemini-via-chat threads it into
+   * the system message; others ignore.
+   */
+  prompt?: string
+  /** Cancellation signal — same shape as `ChatOptions.signal`. */
+  signal?: AbortSignal
+}
+/**
+ * Audio source — same discriminated union as
+ * `AudioBlock.source`, named separately for `transcribe(...)`
+ * which takes it directly (no wrapping `AudioBlock` shell).
+ */
+export type AudioSource =
+  | { type: 'base64'; mediaType: string; data: string }
+  | { type: 'url'; url: string }
+/**
+ * Result of one `transcribe` call. `text` is the transcribed
+ * audio; `language` / `duration` are surfaced when the provider
+ * returns them (OpenAI does on the `verbose_json` response
+ * format; Gemini's chat-wrap path doesn't). `raw` is the
+ * provider's full native response for fields the framework
+ * doesn't surface.
+ */
+export interface TranscribeResult<Raw = unknown> {
+  text: string
+  model: string
+  /** BCP-47 detected (or echoed) language. Optional. */
+  language?: string
+  /** Audio duration in seconds. Optional. */
+  duration?: number
+  raw: Raw
+}
+/**
+ * Result of one `embed` call. `embeddings[i]` is the vector for
+ * the i-th input text. `model` is the model the provider used
+ * (echoed back for logging). `usage.inputTokens` is the total
+ * tokens consumed across all inputs.
+ */
+export interface EmbedResult<Raw = unknown> {
+  embeddings: number[][]
+  model: string
+  usage: { inputTokens: number }
+  /** Provider's full native response — escape hatch for fields the framework doesn't surface. */
+  raw: Raw
+}
+/**
+ * Result of a structured-output call. `value` is the parsed JSON
+ * shaped to the `OutputSchema<T>` passed in. `text` is the raw JSON
+ * string the model produced (useful for logging / debugging when
+ * `parse` rejects). `raw` is the provider's full native response.
+ */
+export interface GenerateResult<T = unknown, Raw = unknown> {
+  value: T
+  text: string
+  model: string
+  stopReason: string | null
+  usage: ChatUsage
+  raw: Raw
+  /** See `ChatResult.responseId`. */
+  responseId?: string
+}