npm - @strav/brain - Versions diffs - 0.4.31 → 1.0.0-alpha.11 - Mend

@strav/brain 0.4.31 → 1.0.0-alpha.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/package.json +18 -20
package/src/agent.ts +50 -75
package/src/agent_result.ts +32 -0
package/src/agent_runner.ts +63 -0
package/src/brain_config.ts +95 -0
package/src/brain_error.ts +29 -0
package/src/brain_manager.ts +186 -123
package/src/brain_provider.ts +104 -6
package/src/define_tool.ts +42 -0
package/src/index.ts +44 -41
package/src/mcp_server.ts +47 -0
package/src/provider.ts +83 -0
package/src/providers/anthropic_provider.ts +435 -232
package/src/providers/openai_provider.ts +350 -503
package/src/thread.ts +99 -0
package/src/tool.ts +28 -44
package/src/tool_execution_error.ts +26 -0
package/src/types.ts +164 -237
package/CHANGELOG.md +0 -44
package/README.md +0 -121
package/src/helpers.ts +0 -1082
package/src/mcp_toolbox.ts +0 -62
package/src/memory/context_budget.ts +0 -120
package/src/memory/index.ts +0 -17
package/src/memory/memory_manager.ts +0 -168
package/src/memory/semantic_memory.ts +0 -89
package/src/memory/strategies/sliding_window.ts +0 -20
package/src/memory/strategies/summarize.ts +0 -157
package/src/memory/thread_store.ts +0 -56
package/src/memory/token_counter.ts +0 -101
package/src/memory/types.ts +0 -68
package/src/providers/google_provider.ts +0 -496
package/src/providers/openai_responses_provider.ts +0 -321
package/src/utils/error_scrub.ts +0 -5
package/src/utils/prompt.ts +0 -65
package/src/utils/retry.ts +0 -104
package/src/utils/schema.ts +0 -27
package/src/utils/sse_parser.ts +0 -62
package/src/workflow.ts +0 -199
package/tsconfig.json +0 -5

package/src/thread.ts ADDED Viewed

@@ -0,0 +1,99 @@
+/**
+ * `Thread` — multi-turn conversation that retains its message history
+ * across calls. Built on top of `BrainManager.chat` (no provider
+ * coupling); apps that want a stateless one-shot use
+ * `BrainManager.chat` directly.
+ *
+ * State model: the thread owns an append-only `messages` array. Each
+ * `send(text)` appends a user turn, calls `brain.chat`, appends the
+ * assistant reply, and returns the assistant's text. The full message
+ * history is serializable via `toJSON()` so apps can persist a thread
+ * across requests (e.g. one row per conversation in Postgres).
+ *
+ * What's NOT here in V1:
+ *   - Auto-compaction. Long threads accumulate without bound; apps
+ *     that need bounded context handle this themselves (prune
+ *     `thread.messages` in place, or use the underlying provider's
+ *     server-side compaction feature once that ships in V2).
+ *   - Streaming `send`. The thread's `send()` is awaited-fully; for
+ *     token-by-token streaming in a conversation, call
+ *     `brain.stream(thread.messages.concat(newUser))` directly.
+ */
+import type { BrainManager } from './brain_manager.ts'
+import type { ChatOptions, Message, SystemPrompt } from './types.ts'
+export interface ThreadOptions {
+  /** System prompt — applied to every `send()` call. Supports cache flags. */
+  system?: SystemPrompt
+  /** Per-thread `ChatOptions` defaults — merged with per-call overrides on `send()`. */
+  options?: ChatOptions
+}
+/** Serializable snapshot. What `toJSON()` produces / `fromJSON()` accepts. */
+export interface ThreadState {
+  messages: Message[]
+  system?: SystemPrompt
+  options?: ChatOptions
+}
+export class Thread {
+  /** Append-only conversation history. Read-only — mutate via `send()` (or pass through `toJSON`). */
+  readonly messages: Message[] = []
+  readonly system?: SystemPrompt
+  readonly options?: ChatOptions
+  private readonly brain: BrainManager
+  constructor(brain: BrainManager, opts: ThreadOptions = {}) {
+    this.brain = brain
+    if (opts.system !== undefined) this.system = opts.system
+    if (opts.options !== undefined) this.options = opts.options
+  }
+  /**
+   * Append a user turn, call the model, append the assistant reply,
+   * and return the reply text. Per-call options override the
+   * thread's defaults; `system` always comes from the thread.
+   */
+  async send(text: string, options: ChatOptions = {}): Promise<string> {
+    this.messages.push({ role: 'user', content: text })
+    const merged: ChatOptions = {
+      ...(this.options ?? {}),
+      ...options,
+      // System is owned by the thread; per-call `system` is ignored
+      // intentionally so a caller can't drift the conversation
+      // mid-thread by changing the system prompt every turn.
+      ...(this.system !== undefined ? { system: this.system } : {}),
+    }
+    const result = await this.brain.chat(this.messages, merged)
+    this.messages.push({ role: 'assistant', content: result.text })
+    return result.text
+  }
+  /** Number of turns. Each `send()` adds 2 (user + assistant). */
+  get length(): number {
+    return this.messages.length
+  }
+  /** Serialize to a plain object — pass to `Thread.fromJSON` to restore. */
+  toJSON(): ThreadState {
+    const state: ThreadState = { messages: [...this.messages] }
+    if (this.system !== undefined) state.system = this.system
+    if (this.options !== undefined) state.options = this.options
+    return state
+  }
+  /**
+   * Restore a thread from a serialized snapshot. The `BrainManager`
+   * is passed in fresh — only the conversation state lives on disk;
+   * the manager is rebuilt at app boot.
+   */
+  static fromJSON(brain: BrainManager, state: ThreadState): Thread {
+    const options: ThreadOptions = {}
+    if (state.system !== undefined) options.system = state.system
+    if (state.options !== undefined) options.options = state.options
+    const thread = new Thread(brain, options)
+    for (const m of state.messages) thread.messages.push(m)
+    return thread
+  }
+}

package/src/tool.ts CHANGED Viewed

@@ -1,51 +1,35 @@
-import { zodToJsonSchema } from './utils/schema.ts'
-import type { ToolDefinition, JsonSchema } from './types.ts'
 /**
- * Define a tool that an agent can invoke.
+ * `Tool` — the framework-native shape every tool implementation
+ * conforms to. Providers translate the `name`, `description`, and
+ * `inputSchema` into their vendor's tool-definition wire format;
+ * `execute` runs in-process on the framework side when the model
+ * calls the tool.
  *
- * Accepts either a Zod schema or a raw JSON Schema object
- * for `parameters`. Zod schemas are automatically converted.
+ * `inputSchema` is plain JSON Schema (draft 2020-12 compatible).
+ * Apps that prefer Zod use the SDK's helpers and feed the resulting
+ * JSON Schema into `defineTool`; the framework deliberately doesn't
+ * couple to Zod so apps stay free to bring whatever schema library
+ * they want.
  *
- * @example
- * const searchTool = defineTool({
- *   name: 'search',
- *   description: 'Search the database',
- *   parameters: z.object({ query: z.string() }),
- *   execute: async ({ query }, context) => {
- *     const userId = context?.userId
- *     return await db.search(query, { userId })
- *   },
- * })
+ * Generics: `TInput` is what `execute` receives (after the model's
+ * raw input has been narrowed by validation at the call site, when
+ * apps choose to validate). `TOutput` is what the agentic loop
+ * appends as the `tool_result.content`. Both default to `unknown`
+ * for apps that don't want the cognitive overhead of typing tools.
  */
-export function defineTool<TArgs = any, TContext = Record<string, unknown>>(config: {
-  name: string
-  description: string
-  parameters: any
-  execute: (args: TArgs, context?: TContext) => unknown | Promise<unknown>
-}): ToolDefinition {
-  return {
-    name: config.name,
-    description: config.description,
-    parameters: zodToJsonSchema(config.parameters) as JsonSchema,
-    execute: config.execute as (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>,
-  }
+export interface ToolContext {
+  /** Provider-assigned call id — matches `ToolUseBlock.id`. */
+  readonly callId: string
+  /** Per-run free-form context bag passed by the caller. Optional. */
+  readonly context: Readonly<Record<string, unknown>>
 }
-/**
- * Group related tools into a named collection.
- *
- * A toolbox is simply a labeled array — useful for organizing
- * tools by domain (e.g., database tools, API tools) and
- * spreading them into an agent's `tools` array.
- *
- * @example
- * const dbTools = defineToolbox('database', [searchTool, insertTool])
- *
- * class MyAgent extends Agent {
- *   tools = [...dbTools, weatherTool]
- * }
- */
-export function defineToolbox(_name: string, tools: ToolDefinition[]): ToolDefinition[] {
-  return tools
+export interface Tool<TInput = unknown, TOutput = unknown> {
+  name: string
+  description: string
+  /** JSON Schema for the tool's input. Providers translate this into their wire format. */
+  inputSchema: Record<string, unknown>
+  /** In-process executor. Throws propagate as `ToolExecutionError` through the runner. */
+  execute(input: TInput, ctx: ToolContext): Promise<TOutput>
 }

package/src/tool_execution_error.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * `ToolExecutionError` — wrapper thrown by the agentic loop when a
+ * tool's `execute` function throws. Carries the tool name + the
+ * provider's call id on `context` so apps building error reporters /
+ * traces can correlate failures with model output without parsing
+ * stack frames.
+ *
+ * V1 propagates these out of `runWithTools` — the loop aborts on the
+ * first tool failure. A later slice may add a graceful path
+ * (`{ type: 'tool_result', isError: true }` is appended and the
+ * loop continues) but apps that need that today can catch the
+ * error, append the result themselves, and re-call the runner.
+ */
+import { StravError } from '@strav/kernel'
+export class ToolExecutionError extends StravError {
+  constructor(toolName: string, callId: string, cause: unknown) {
+    const message = cause instanceof Error ? cause.message : String(cause)
+    super(
+      `Tool "${toolName}" execution failed: ${message}`,
+      { code: 'brain.tool-execution-failed', status: 500 },
+      { context: { tool: toolName, callId }, cause },
+    )
+  }
+}

package/src/types.ts CHANGED Viewed

@@ -1,275 +1,202 @@
-// ── JSON Schema ──────────────────────────────────────────────────────────────
-/** Minimal recursive JSON Schema type. */
-export type JsonSchema = Record<string, unknown>
-// ── SSE ──────────────────────────────────────────────────────────────────────
-export interface SSEEvent {
-  event?: string
-  data: string
-}
+/**
+ * Public types for the brain runtime.
+ *
+ * Apps work with three high-level shapes:
+ *
+ *   - `Message` — a user/assistant turn in a conversation. `content` is
+ *     either a plain string or a list of `ContentBlock`s for richer
+ *     payloads (cached blocks, images in a later slice).
+ *
+ *   - `ChatOptions` — per-call knobs: model selection (explicit `model`
+ *     or `tier` sugar), `system` prompt with optional cache flag,
+ *     `maxTokens`, `thinking`, `effort`, etc.
+ *
+ *   - `ChatResult` — what comes back from `chat()`: assistant `text`,
+ *     `usage` (including cache hit/miss counters), `stopReason`, the
+ *     `model` that actually answered, and a `raw` escape hatch with the
+ *     provider's native response.
+ *
+ * The streaming side adds `StreamEvent` — a discriminated union of the
+ * events a provider emits while a response is being generated. V1
+ * covers text deltas, the final-message event, and `stopReason`;
+ * thinking blocks / tool-use streams land when those features ship.
+ */
-// ── Usage ────────────────────────────────────────────────────────────────────
+/** Coarse-grained model tier. Sugar for "fast / balanced / powerful" without naming an SDK. */
+export type ModelTier = 'fast' | 'balanced' | 'powerful'
-export interface Usage {
-  inputTokens: number
-  outputTokens: number
-  totalTokens: number
+/**
+ * A text content block. The `cache` flag lets apps mark long, stable
+ * prefixes for prompt caching; providers translate this to whatever
+ * cache mechanism their SDK exposes (Anthropic: `cache_control:
+ * {type: 'ephemeral'}`).
+ */
+export interface TextBlock {
+  type: 'text'
+  text: string
+  /** Mark this block as a cache breakpoint. Default `false`. */
+  cache?: boolean
 }
-// ── Messages ─────────────────────────────────────────────────────────────────
-export interface ToolCall {
+/**
+ * Provider-emitted tool-use block. Appears in `assistant`-role
+ * messages when the model decides to call a tool. `input` is the
+ * parsed JSON the model produced for the tool's `inputSchema`; apps
+ * that need to validate it (Zod, ajv, etc.) do so at the call site.
+ *
+ * The agentic loop creates a matching `ToolResultBlock` and appends
+ * it to the next `user`-role message before re-asking the model.
+ */
+export interface ToolUseBlock {
+  type: 'tool_use'
+  /** Provider-assigned call id. The matching tool_result references this verbatim. */
   id: string
+  /** Tool name — matches a registered `Tool.name`. */
   name: string
-  arguments: Record<string, unknown>
-}
-export interface ContentBlock {
-  type: 'text' | 'tool_use' | 'tool_result'
-  text?: string
-  id?: string
-  name?: string
-  input?: Record<string, unknown>
-  toolUseId?: string
-  content?: string
+  /** Parsed input the model produced. Apps validate against the tool's schema. */
+  input: unknown
 }
-export interface Message {
-  role: 'user' | 'assistant' | 'tool'
-  content: string | ContentBlock[]
-  toolCalls?: ToolCall[]
-  toolCallId?: string
-}
-// ── Tool Definition ──────────────────────────────────────────────────────────
-export interface ToolDefinition {
-  name: string
-  description: string
-  parameters: JsonSchema
-  execute: (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>
-}
-// ── Completion Request / Response ────────────────────────────────────────────
-export interface CompletionRequest {
-  model: string
-  messages: Message[]
-  system?: string
-  tools?: ToolDefinition[]
-  toolChoice?: 'auto' | 'required' | { name: string }
-  maxTokens?: number
-  temperature?: number
-  schema?: JsonSchema
-  stopSequences?: string[]
-}
-export interface CompletionResponse {
-  id: string
-  content: string
-  toolCalls: ToolCall[]
-  stopReason: 'end' | 'tool_use' | 'max_tokens' | 'stop_sequence'
-  usage: Usage
-  raw: unknown
-}
-// ── Streaming ────────────────────────────────────────────────────────────────
-export interface StreamChunk {
-  type: 'text' | 'tool_start' | 'tool_delta' | 'tool_end' | 'usage' | 'done'
-  text?: string
-  toolCall?: Partial<ToolCall>
-  toolIndex?: number
-  usage?: Usage
-}
-// ── Output Schema ────────────────────────────────────────────────────────────
-/** A schema that optionally validates data via `.parse()` (e.g., Zod schema). */
-export interface OutputSchema {
-  parse?: (data: unknown) => unknown
-  [key: string]: unknown
-}
-// ── Agent ────────────────────────────────────────────────────────────────────
-export interface ToolCallRecord {
-  name: string
-  arguments: Record<string, unknown>
-  result: unknown
-  duration: number
-}
-export interface AgentResult<T = any> {
-  data: T
-  text: string
-  toolCalls: ToolCallRecord[]
-  messages: Message[]
-  usage: Usage
-  iterations: number
-}
-export interface AgentEvent {
-  type: 'text' | 'tool_start' | 'tool_result' | 'iteration' | 'done' | 'suspended'
-  text?: string
-  toolCall?: ToolCallRecord
-  iteration?: number
-  result?: AgentResult
-  suspended?: SuspendedRun
-}
-// ── Suspend / Resume ─────────────────────────────────────────────────────────
 /**
- * A JSON-serializable snapshot of an agent loop at the moment it suspended.
- *
- * All fields are plain data — no functions, class instances, or cycles — so
- * the snapshot can be stringified, stored across a process boundary, and
- * later passed to `AgentRunner.resume()` to continue the run.
+ * Result of executing a tool. Appended to a `user`-role message and
+ * fed back to the model. `content` is either a plain string (the
+ * common case) or a list of text blocks for richer payloads. Mark
+ * `isError: true` so the model knows the tool call failed and can
+ * adjust its approach.
  */
-export interface SerializedAgentState {
-  messages: Message[]
-  allToolCalls: ToolCallRecord[]
-  totalUsage: Usage
-  iterations: number
+export interface ToolResultBlock {
+  type: 'tool_result'
+  toolUseId: string
+  content: string | TextBlock[]
+  isError?: boolean
 }
 /**
- * Result of an agent run that was suspended before executing one or more
- * tool calls. The integrator is expected to obtain tool results out-of-band
- * (human approval, external system, queued job, etc.) and call
- * `AgentRunner.resume(state, toolResults)` to continue.
+ * Provider-emitted MCP tool-use block. Read-only — apps don't construct
+ * these; they appear in `assistant`-role messages when the model calls
+ * a tool exposed by a configured MCP server. Anthropic's backend
+ * invokes the MCP server itself and inlines the result as an
+ * `MCPToolResultBlock` in the same response, so the framework's
+ * agentic loop doesn't need to handle the call.
  *
- * `pendingToolCalls` contains the pending call that triggered suspension
- * plus any subsequent tool calls from the same batch that have not been
- * executed. Results must be supplied for each of them on resume so the
- * conversation remains well-formed for the provider.
+ * Apps render these for observability (showing users that the model
+ * consulted Linear / Notion / GitHub via MCP) and for audit trails.
  */
-export interface SuspendedRun {
-  status: 'suspended'
-  pendingToolCalls: ToolCall[]
-  state: SerializedAgentState
-}
-/** Result of a pending tool call, supplied to `AgentRunner.resume()`. */
-export interface ToolCallResult {
-  toolCallId: string
-  result: unknown
+export interface MCPToolUseBlock {
+  type: 'mcp_tool_use'
+  id: string
+  /** MCP server identifier — matches `MCPServer.name`. */
+  serverName: string
+  /** Tool name as exposed by the MCP server. */
+  name: string
+  /** Parsed input the model passed to the MCP tool. */
+  input: unknown
 }
-// ── Workflow ──────────────────────────────────────────────────────────────────
-export interface WorkflowResult {
-  results: Record<string, AgentResult>
-  usage: Usage
-  duration: number
+/**
+ * Provider-emitted MCP tool result. Pairs with `MCPToolUseBlock` by
+ * `toolUseId`. `content` is either a string or text blocks; `isError`
+ * is `true` when the MCP server returned an error.
+ */
+export interface MCPToolResultBlock {
+  type: 'mcp_tool_result'
+  toolUseId: string
+  content: string | TextBlock[]
+  isError?: boolean
 }
-// ── Embedding ────────────────────────────────────────────────────────────────
+export type ContentBlock =
+  | TextBlock
+  | ToolUseBlock
+  | ToolResultBlock
+  | MCPToolUseBlock
+  | MCPToolResultBlock
-export interface EmbeddingResponse {
-  embeddings: number[][]
-  model: string
-  usage: { totalTokens: number }
+/** A single conversation turn. `content` can be a bare string or a typed block list. */
+export interface Message {
+  role: 'user' | 'assistant'
+  content: string | ContentBlock[]
 }
-// ── Transcription (Speech-to-Text) ───────────────────────────────────────────
+/**
+ * The `system` prompt. Either a plain string (no cache) or a structured
+ * form that lets apps mark the prompt as cached. Apps that want
+ * fine-grained control over multi-block system prompts pass an array.
+ */
+export type SystemPrompt =
+  | string
+  | { text: string; cache?: boolean }
+  | Array<{ text: string; cache?: boolean }>
-export interface TranscribeRequest {
-  /** Audio bytes. Most STT endpoints cap at ~25MB; chunk longer recordings. */
-  audio: Uint8Array | Blob
-  /**
-   * MIME type of the audio. Required for providers that infer format from
-   * the multipart filename or rely on it for inline base64 (Gemini).
-   * Examples: 'audio/m4a', 'audio/mpeg', 'audio/wav', 'audio/ogg',
-   * 'audio/webm', 'audio/flac'.
-   */
-  contentType?: string
-  /** Override the provider's default STT model. */
+/**
+ * Per-call options. Generics are deliberately conservative — apps
+ * don't usually need to type-narrow the provider response; the `raw`
+ * escape hatch in `ChatResult` is what they reach for when they need
+ * provider-specific fields.
+ */
+export interface ChatOptions {
+  /** Override the configured default model. Wins over `tier`. */
   model?: string
+  /** Sugar for selecting a model by tier. Resolved against `config.brain.tiers`. */
+  tier?: ModelTier
+  /** System prompt — typed shape supports prompt caching. */
+  system?: SystemPrompt
+  /** Hard ceiling on response tokens. Default `4096`. */
+  maxTokens?: number
   /**
-   * BCP-47 language hint (e.g. 'th', 'en', 'zh'). Whisper accepts ISO-639-1
-   * ('th'); Gemini uses BCP-47. Both improve accuracy when set; omit for
-   * auto-detection.
+   * Adaptive thinking control. `'adaptive'` enables it; `'disabled'`
+   * (or omission) turns it off. On Opus 4.7 + 4.6 / Sonnet 4.6 this
+   * is the only supported thinking mode — `budget_tokens` is removed
+   * upstream and not exposed here.
    */
-  language?: string
+  thinking?: 'adaptive' | 'disabled'
+  /** Effort hint. `low` / `medium` / `high` / `xhigh` / `max`. Defaults to provider's pick. */
+  effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max'
   /**
-   * Optional priming prompt — gives the model vocabulary or context to
-   * bias toward (proper nouns, brand names, menu items, dialect markers).
-   * Whisper uses this directly; Gemini incorporates it into the system
-   * instruction.
+   * Top-level cache_control toggle. When `true` the provider asks the
+   * SDK to auto-cache the last cacheable block on every request.
+   * Defaults to `config.brain.cache.auto ?? false`.
    */
-  prompt?: string
+  cache?: boolean
   /**
-   * Filename to send in the multipart form (Whisper). Used to derive the
-   * audio format on the server when `contentType` is missing. Defaults to
-   * 'audio.bin' if not provided.
+   * Beta features to enable on this request. Pass through to the
+   * provider's beta-header machinery. Provider-specific.
    */
-  filename?: string
-}
-export interface TranscriptionResponse {
-  /** Transcribed text. */
-  text: string
-  /** Detected language, when the provider reports one. */
-  language?: string
-  /** Audio duration in seconds, when the provider reports one. */
-  duration?: number
-  /** Original provider response for callers that need provider-specific fields. */
-  raw: unknown
-}
-// ── Provider ─────────────────────────────────────────────────────────────────
-export interface AIProvider {
-  readonly name: string
-  complete(request: CompletionRequest): Promise<CompletionResponse>
-  stream(request: CompletionRequest): AsyncIterable<StreamChunk>
-  embed?(input: string | string[], model?: string): Promise<EmbeddingResponse>
+  betas?: readonly string[]
   /**
-   * Transcribe audio to text. Implemented by providers that expose a
-   * speech-to-text endpoint (OpenAI Whisper, Google Gemini's multimodal
-   * generateContent). Throws or remains undefined for providers without
-   * STT (Anthropic at time of writing).
+   * Provider-specific overrides. `BrainManager.chat` selects the
+   * provider by config; this is the override for that.
    */
-  transcribe?(request: TranscribeRequest): Promise<TranscriptionResponse>
+  provider?: string
 }
-// ── Hooks ────────────────────────────────────────────────────────────────────
-export type BeforeHook = (request: CompletionRequest) => void | Promise<void>
-export type AfterHook = (
-  request: CompletionRequest,
-  response: CompletionResponse
-) => void | Promise<void>
-// ── Config ───────────────────────────────────────────────────────────────────
-export interface ProviderConfig {
-  driver: string
-  apiKey: string
-  model: string
-  baseUrl?: string
-  maxTokens?: number
-  temperature?: number
-  maxRetries?: number
-  retryBaseDelay?: number
+/** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
+export interface ChatUsage {
+  inputTokens: number
+  outputTokens: number
+  cacheReadTokens: number
+  cacheCreationTokens: number
 }
-export interface BrainConfig {
-  default: string
-  providers: Record<string, ProviderConfig>
-  maxTokens: number
-  temperature: number
-  maxIterations: number
-  memory?: import('./memory/types.ts').MemoryConfig
+/**
+ * The provider's reply. `text` is the concatenated assistant text;
+ * `raw` is the provider's full native response shape for apps that
+ * need anything we don't surface (e.g. citation blocks, server-tool
+ * results once those ship).
+ */
+export interface ChatResult<Raw = unknown> {
+  text: string
+  model: string
+  stopReason: string | null
+  usage: ChatUsage
+  raw: Raw
 }
-// ── Serialized Thread ────────────────────────────────────────────────────────
-export interface SerializedThread {
-  messages: Message[]
-  system?: string
-}
+/**
+ * Streaming event union. V1 covers the text-delta + completion path
+ * apps want for chat-style UIs; thinking blocks and tool-use streams
+ * are reserved for later slices.
+ */
+export type StreamEvent =
+  | { type: 'text'; delta: string }
+  | { type: 'stop'; stopReason: string | null; usage: ChatUsage }