npm - @strav/brain - Versions diffs - 1.0.0-alpha.17 → 1.0.0-alpha.18 - Mend

@strav/brain 1.0.0-alpha.17 → 1.0.0-alpha.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/package.json +4 -2
package/src/agent_generate_result.ts +2 -0
package/src/agent_result.ts +7 -0
package/src/agent_runner.ts +80 -4
package/src/brain_manager.ts +119 -2
package/src/index.ts +20 -2
package/src/mcp/client.ts +17 -0
package/src/mcp/index.ts +1 -0
package/src/mcp/pool.ts +106 -0
package/src/mcp/resolve_mcp_tools.ts +25 -7
package/src/persistence/brain_message.ts +34 -0
package/src/persistence/brain_message_repository.ts +106 -0
package/src/persistence/brain_store.ts +166 -0
package/src/persistence/brain_suspended_run.ts +30 -0
package/src/persistence/brain_suspended_run_repository.ts +68 -0
package/src/persistence/brain_thread.ts +30 -0
package/src/persistence/brain_thread_repository.ts +65 -0
package/src/persistence/database_brain_store.ts +190 -0
package/src/persistence/index.ts +48 -0
package/src/persistence/schema/brain_message_schema.ts +61 -0
package/src/persistence/schema/brain_suspended_run_schema.ts +58 -0
package/src/persistence/schema/brain_thread_schema.ts +50 -0
package/src/persistence/schema/index.ts +3 -0
package/src/provider.ts +36 -1
package/src/providers/anthropic_provider.ts +140 -23
package/src/providers/gemini_provider.ts +55 -32
package/src/providers/openai_compat_provider.ts +452 -23
package/src/providers/openai_provider.ts +87 -32
package/src/providers/openai_responses_provider.ts +365 -50
package/src/suspended_run.ts +153 -0
package/src/thread.ts +40 -1
package/src/types.ts +110 -0

package/src/persistence/index.ts ADDED Viewed

@@ -0,0 +1,48 @@
+// Public API of `@strav/brain/persistence` — recommended schema +
+// repositories for persisting conversations (threads + turns) and
+// human-in-the-loop suspended runs to Postgres via `@strav/database`.
+//
+// Apps that need a different backend implement `BrainStore`
+// directly — the schemas + repositories are conveniences, not
+// obligations.
+export {
+  BrainMessage,
+  type BrainMessageRole,
+} from './brain_message.ts'
+export {
+  type AppendTurnInput,
+  BrainMessageRepository,
+  type LoadMessagesOptions,
+} from './brain_message_repository.ts'
+export type {
+  BrainStore,
+  CreateThreadInput,
+  LoadedSuspendedRun,
+  LoadedThread,
+  SaveSuspendedRunInput,
+  SuspendedFilter,
+  SuspendedSummary,
+  ThreadFilter,
+  ThreadSummary,
+  TurnInput,
+} from './brain_store.ts'
+export {
+  BrainSuspendedRun,
+  type BrainSuspendedRunStatus,
+} from './brain_suspended_run.ts'
+export {
+  type ListPendingOptions,
+  BrainSuspendedRunRepository,
+} from './brain_suspended_run_repository.ts'
+export { BrainThread } from './brain_thread.ts'
+export {
+  BrainThreadRepository,
+  type ListThreadsOptions,
+} from './brain_thread_repository.ts'
+export { DatabaseBrainStore } from './database_brain_store.ts'
+export {
+  brainMessageSchema,
+  brainSuspendedRunSchema,
+  brainThreadSchema,
+} from './schema/index.ts'

package/src/persistence/schema/brain_message_schema.ts ADDED Viewed

@@ -0,0 +1,61 @@
+/**
+ * `brainMessageSchema` — one row per assistant or user turn within
+ * a thread. Append-only; rows are inserted in `turn_index` order
+ * and never updated (compaction blocks live as a regular assistant
+ * row whose `content` includes a `CompactionBlock`).
+ *
+ * Why per-turn rather than a JSONB blob on `brain_thread`:
+ *
+ *   - **Pagination.** UIs render the latest N turns; queries select
+ *     by `(thread_id, turn_index)` instead of parsing a JSON array.
+ *   - **Per-turn metadata.** `model` / `usage` / `stop_reason` /
+ *     `response_id` are indexed and queryable for cost analytics,
+ *     audit, and routing (e.g., "which threads used gpt-5?").
+ *   - **Append cost.** Each `send()` is a single INSERT, not a
+ *     rewrite of the entire array.
+ *
+ * Columns:
+ *
+ *   - `id`           ULID primary key.
+ *   - `thread_id`    FK → `brain_thread`. `onDelete: cascade` —
+ *                    deleting a thread drops its history.
+ *   - `turn_index`   0-based ordinal. Unique with `thread_id` (app
+ *                    migration adds the index).
+ *   - `role`         `user` or `assistant`. The framework's
+ *                    `Message.role` union; tool_result blocks land
+ *                    on user turns per the assistant ↔ user
+ *                    handshake, so `role` reflects that.
+ *   - `content`      JSONB — `string | ContentBlock[]`. Carries
+ *                    every typed block: text, image, document,
+ *                    audio, tool_use, tool_result, mcp_*, compaction.
+ *   - `model`        Model identifier used for assistant turns
+ *                    (NULL for user turns).
+ *   - `usage`        JSONB — `ChatUsage` for assistant turns.
+ *   - `stop_reason`  Provider terminal reason (`end_turn`, etc.).
+ *   - `response_id`  OpenAI Responses API id when surfaced. Indexed
+ *                    via partial index in the recommended migration.
+ *   - `created_at`   Timestamp.
+ *
+ * Archetype.Event — append-only semantics; no `updated_at`.
+ */
+import { Archetype, defineSchema } from '@strav/database'
+import { brainThreadSchema } from './brain_thread_schema.ts'
+export const brainMessageSchema = defineSchema(
+  'brain_message',
+  Archetype.Event,
+  (t) => {
+    t.id()
+    t.reference('thread_id').to(brainThreadSchema).onDelete('cascade').notNull()
+    t.integer('turn_index').notNull()
+    t.enum('role', ['user', 'assistant']).notNull()
+    t.json('content').notNull()
+    t.string('model').max(128).nullable()
+    t.json('usage').nullable()
+    t.string('stop_reason').max(64).nullable()
+    t.string('response_id').max(128).nullable()
+    t.timestamp('created_at').notNull()
+  },
+  { tenanted: true },
+)

package/src/persistence/schema/brain_suspended_run_schema.ts ADDED Viewed

@@ -0,0 +1,58 @@
+/**
+ * `brainSuspendedRunSchema` — a paused agentic loop awaiting
+ * human-in-the-loop tool approval.
+ *
+ * Two real use cases drive the shape:
+ *
+ *   1. **Linked to a thread** — the suspending run was part of a
+ *      conversational thread; the app wants the suspended state to
+ *      reference its thread so the UI can show "thread X is paused
+ *      waiting on Y." `thread_id` is the FK, nullable so detached
+ *      runs are fine.
+ *   2. **Standalone** — the run came from a one-shot `runTools(...)`
+ *      call (cron job, queued worker, ...). No thread context;
+ *      `thread_id` stays NULL.
+ *
+ * Columns:
+ *
+ *   - `id`                  ULID primary key. The id apps reference
+ *                           when resuming.
+ *   - `thread_id`           FK → `brain_thread`, NULLABLE,
+ *                           `onDelete: set null` — if the thread
+ *                           gets deleted, the suspended run keeps
+ *                           its data so the human approver can
+ *                           still inspect it.
+ *   - `user_id`             App-defined approver / owner.
+ *   - `pending_tool_calls`  JSONB — `ToolUseBlock[]` the model
+ *                           wants executed. Multi-call batches are
+ *                           captured together (mid-batch invariant).
+ *   - `state`               JSONB — `SuspendedState` snapshot. The
+ *                           framework's `brain.resumeTools(state,
+ *                           ...)` takes this as its first arg.
+ *   - `status`              `pending | resumed | cancelled`. Apps
+ *                           bulk-list pending runs and walk through
+ *                           an approval queue.
+ *   - `timestamps`          `created_at` for "how long pending?"
+ *                           sorts, `updated_at` for transition
+ *                           tracking.
+ *
+ * Tenanted: standard `tenant_id` + RLS.
+ */
+import { Archetype, defineSchema } from '@strav/database'
+import { brainThreadSchema } from './brain_thread_schema.ts'
+export const brainSuspendedRunSchema = defineSchema(
+  'brain_suspended_run',
+  Archetype.Entity,
+  (t) => {
+    t.id()
+    t.reference('thread_id').to(brainThreadSchema).onDelete('set null').nullable()
+    t.string('user_id').max(64).nullable()
+    t.json('pending_tool_calls').notNull()
+    t.json('state').notNull()
+    t.enum('status', ['pending', 'resumed', 'cancelled']).notNull().default('pending')
+    t.timestamps()
+  },
+  { tenanted: true },
+)

package/src/persistence/schema/brain_thread_schema.ts ADDED Viewed

@@ -0,0 +1,50 @@
+/**
+ * `brainThreadSchema` — one row per conversation.
+ *
+ * Carries the per-thread defaults that `Thread` already serializes
+ * (`system`, `options`, `lastResponseId`) plus a few framework-side
+ * fields apps want to filter / sort on:
+ *
+ *   - `id`            ULID primary key. Hand the same value back to
+ *                     `BrainStore.loadThread(id)` to rehydrate.
+ *   - `user_id`       App-defined owner. Stored as `text` (no FK) —
+ *                     user table shape varies per app. Indexed in
+ *                     the recommended migration so "list threads
+ *                     for user X" stays fast.
+ *   - `title`         Human label. Apps set it from the first user
+ *                     turn or via an explicit "rename" UI.
+ *   - `system`        Thread-owned system prompt. Mirrors
+ *                     `ThreadState.system`. JSONB so the structured
+ *                     form (text + cache flag) round-trips.
+ *   - `options`       Thread defaults applied to every `send()`.
+ *                     Mirrors `ThreadState.options`.
+ *   - `last_response_id`  OpenAI Responses API stateful pointer.
+ *                     Mirrors `ThreadState.lastResponseId`. NULL for
+ *                     non-Responses providers.
+ *   - `timestamps`    `created_at` + `updated_at` for sort / audit.
+ *
+ * Tenanted: `tenant_id` FK + RLS policies auto-injected by
+ * `@strav/database`. Apps wrap calls in `tenants.withTenant(...)`
+ * and the database enforces isolation — no app-level filter needed.
+ *
+ * The per-turn message history lives in `brain_message`, joined by
+ * `thread_id`. This keeps every send to an O(1) INSERT and makes
+ * pagination / per-turn analytics cheap.
+ */
+import { Archetype, defineSchema } from '@strav/database'
+export const brainThreadSchema = defineSchema(
+  'brain_thread',
+  Archetype.Entity,
+  (t) => {
+    t.id()
+    t.string('user_id').max(64).nullable()
+    t.string('title').max(255).nullable()
+    t.json('system').nullable()
+    t.json('options').nullable()
+    t.string('last_response_id').max(128).nullable()
+    t.timestamps()
+  },
+  { tenanted: true },
+)

package/src/persistence/schema/index.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export { brainMessageSchema } from './brain_message_schema.ts'
+export { brainSuspendedRunSchema } from './brain_suspended_run_schema.ts'
+export { brainThreadSchema } from './brain_thread_schema.ts'

package/src/provider.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import type { AgentResult } from './agent_result.ts'
 import type { AgentStreamEvent } from './agent_stream_event.ts'
 import type { MCPServer } from './mcp_server.ts'
 import type { OutputSchema } from './output_schema.ts'
+import type { SuspendedRun } from './suspended_run.ts'
 import type { Tool } from './tool.ts'
 import type { ToolExecutionError } from './tool_execution_error.ts'
 import type {
@@ -28,6 +29,7 @@ import type {
   GenerateResult,
   Message,
   StreamEvent,
+  ToolUseBlock,
   TranscribeOptions,
   TranscribeResult,
 } from './types.ts'
@@ -69,6 +71,39 @@ export interface RunWithToolsOptions extends ChatOptions {
    * ```
    */
   onToolError?(error: ToolExecutionError): string | undefined
+  /**
+   * Human-in-the-loop gate. Called before each tool execution; when
+   * it returns `true`, the loop suspends and `runWithTools` returns
+   * a `SuspendedRun` carrying the pending tool calls + a JSON-
+   * serializable snapshot of the loop state. Apps obtain results
+   * out-of-band (human approval, queued worker, external system,
+   * ...) and call `brain.resumeTools(state, results, tools, options)`
+   * to continue.
+   *
+   * Mid-batch invariant: if a tool call inside a multi-call batch
+   * triggers suspension, the framework also captures all unexecuted
+   * siblings from the same assistant turn — the provider's
+   * `tool_use` / `tool_result` pairing must stay balanced on resume.
+   *
+   * V1 scope: only honored on non-streaming `runWithTools`. Pass it
+   * to `streamWithTools`, `runWithToolsAndSchema`, or
+   * `streamWithToolsAndSchema` and the framework throws `BrainError`
+   * — those entrypoints don't yet model the pause/resume protocol.
+   */
+  shouldSuspend?(
+    call: ToolUseBlock,
+    context?: Record<string, unknown>,
+  ): boolean | Promise<boolean>
+}
+/**
+ * Same as `RunWithToolsOptions` but with `shouldSuspend` required.
+ * Used to narrow the return type of `runWithTools` overloads — when
+ * apps opt in to the human-in-the-loop gate, the result widens to
+ * `AgentResult | SuspendedRun`; otherwise it's just `AgentResult`.
+ */
+export type RunWithToolsOptionsWithSuspend = RunWithToolsOptions & {
+  shouldSuspend: NonNullable<RunWithToolsOptions['shouldSuspend']>
 }
 export interface Provider {
@@ -113,7 +148,7 @@ export interface Provider {
     messages: readonly Message[],
     tools: readonly Tool[],
     options?: RunWithToolsOptions,
-  ): Promise<AgentResult>
+  ): Promise<AgentResult | SuspendedRun>
   /**
    * Structured output. Sends `messages` to the model with a

package/src/providers/anthropic_provider.ts CHANGED Viewed

@@ -28,12 +28,18 @@ import type { AgentResult } from '../agent_result.ts'
 import type { AnthropicProviderConfig } from '../brain_config.ts'
 import { DEFAULT_MODEL } from '../brain_config.ts'
 import { BrainError } from '../brain_error.ts'
-import type { Provider, RunWithToolsOptions } from '../provider.ts'
+import type {
+  Provider,
+  RunWithToolsOptions,
+  RunWithToolsOptionsWithSuspend,
+} from '../provider.ts'
+import type { SuspendedRun } from '../suspended_run.ts'
 import type { Tool } from '../tool.ts'
 import type {
   ChatOptions,
   ChatResult,
   ChatUsage,
+  CompactionBlock,
   ContentBlock,
   GenerateResult,
   MCPToolResultBlock,
@@ -82,7 +88,13 @@ export class AnthropicProvider implements Provider {
   async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
     const params = this.buildParams(messages, options)
-    const response = await this.client.messages.create(params, reqOpts(options))
+    const useBeta = needsBetaRouting(params)
+    const response = useBeta
+      ? ((await this.client.beta.messages.create(
+          params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
+          reqOpts(options),
+        )) as unknown as Anthropic.Message)
+      : await this.client.messages.create(params, reqOpts(options))
     return this.toChatResult(response)
   }
@@ -91,7 +103,12 @@ export class AnthropicProvider implements Provider {
     options: ChatOptions = {},
   ): AsyncIterable<StreamEvent> {
     const params = this.buildParams(messages, options)
-    const stream = this.client.messages.stream(params, reqOpts(options))
+    const stream = needsBetaRouting(params)
+      ? this.client.beta.messages.stream(
+          params as unknown as Anthropic.Beta.Messages.MessageCreateParamsStreaming,
+          reqOpts(options),
+        )
+      : this.client.messages.stream(params, reqOpts(options))
     for await (const event of stream) {
       if (
         event.type === 'content_block_delta' &&
@@ -137,11 +154,21 @@ export class AnthropicProvider implements Provider {
    * `tools` array each turn. Apps that care about cache hits keep
    * the tool list stable across runs.
    */
+  runWithTools(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    options: RunWithToolsOptionsWithSuspend,
+  ): Promise<AgentResult | SuspendedRun>
+  runWithTools(
+    messages: readonly Message[],
+    tools: readonly Tool[],
+    options?: RunWithToolsOptions,
+  ): Promise<AgentResult>
   async runWithTools(
     messages: readonly Message[],
     tools: readonly Tool[],
     options: RunWithToolsOptions = {},
-  ): Promise<AgentResult> {
+  ): Promise<AgentResult | SuspendedRun> {
     const maxIterations = options.maxIterations ?? 10
     const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
     const workingMessages: Message[] = [...messages]
@@ -186,7 +213,6 @@ export class AnthropicProvider implements Provider {
       // Declare MCP servers + flip to the beta surface when in use.
       // Anthropic's MCP connector requires `mcp-client-2025-11-20`.
-      let response: Anthropic.Message
       if (useMcpBeta) {
         params.mcp_servers = mcpServers.map((s) => {
           const def: Anthropic.Beta.Messages.BetaRequestMCPServerURLDefinition = {
@@ -201,13 +227,15 @@ export class AnthropicProvider implements Provider {
         ;(params as { betas?: string[] }).betas = baseBetas.includes('mcp-client-2025-11-20')
           ? [...baseBetas]
           : [...baseBetas, 'mcp-client-2025-11-20']
-        response = (await this.client.beta.messages.create(
-          params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
-          reqOpts(options),
-        )) as unknown as Anthropic.Message
-      } else {
-        response = await this.client.messages.create(params, reqOpts(options))
       }
+      // Route via beta when either MCP servers OR compaction are in
+      // play — both live on the beta surface.
+      const response: Anthropic.Message = needsBetaRouting(params)
+        ? ((await this.client.beta.messages.create(
+            params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
+            reqOpts(options),
+          )) as unknown as Anthropic.Message)
+        : await this.client.messages.create(params, reqOpts(options))
       addUsage(aggregated, response.usage)
       lastStopReason = response.stop_reason ?? null
@@ -236,7 +264,28 @@ export class AnthropicProvider implements Provider {
         (b): b is Anthropic.ToolUseBlock => b.type === 'tool_use',
       )
       const resultBlocks: ContentBlock[] = []
-      for (const block of toolUseBlocks) {
+      for (let i = 0; i < toolUseBlocks.length; i++) {
+        const block = toolUseBlocks[i]!
+        if (options.shouldSuspend) {
+          const frameworkCall: ToolUseBlock = {
+            type: 'tool_use',
+            id: block.id,
+            name: block.name,
+            input: block.input as Record<string, unknown>,
+          }
+          if (await options.shouldSuspend(frameworkCall, options.context)) {
+            return {
+              status: 'suspended',
+              pendingToolCalls: toolUseBlocks.slice(i).map((b) => ({
+                type: 'tool_use',
+                id: b.id,
+                name: b.name,
+                input: b.input as Record<string, unknown>,
+              })),
+              state: { messages: workingMessages, iterations, usage: aggregated },
+            }
+          }
+        }
         const { content, isError } = await runToolWithRecovery(
           toolMap.get(block.name),
           block.name,
@@ -314,7 +363,6 @@ export class AnthropicProvider implements Provider {
         format: { type: 'json_schema', schema: schema.jsonSchema },
       }
-      let response: Anthropic.Message
       if (useMcpBeta) {
         params.mcp_servers = mcpServers.map((s) => {
           const def: Anthropic.Beta.Messages.BetaRequestMCPServerURLDefinition = {
@@ -329,13 +377,13 @@ export class AnthropicProvider implements Provider {
         ;(params as { betas?: string[] }).betas = baseBetas.includes('mcp-client-2025-11-20')
           ? [...baseBetas]
           : [...baseBetas, 'mcp-client-2025-11-20']
-        response = (await this.client.beta.messages.create(
-          params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
-          reqOpts(options),
-        )) as unknown as Anthropic.Message
-      } else {
-        response = await this.client.messages.create(params, reqOpts(options))
       }
+      const response: Anthropic.Message = needsBetaRouting(params)
+        ? ((await this.client.beta.messages.create(
+            params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
+            reqOpts(options),
+          )) as unknown as Anthropic.Message)
+        : await this.client.messages.create(params, reqOpts(options))
       addUsage(aggregated, response.usage)
       lastStopReason = response.stop_reason ?? null
@@ -454,7 +502,7 @@ export class AnthropicProvider implements Provider {
           : [...baseBetas, 'mcp-client-2025-11-20']
       }
-      const stream = useMcpBeta
+      const stream = needsBetaRouting(params)
         ? this.client.beta.messages.stream(
             params as unknown as Anthropic.Beta.Messages.MessageCreateParamsStreaming,
             reqOpts(options),
@@ -619,7 +667,7 @@ export class AnthropicProvider implements Provider {
           : [...baseBetas, 'mcp-client-2025-11-20']
       }
-      const stream = useMcpBeta
+      const stream = needsBetaRouting(params)
         ? this.client.beta.messages.stream(
             params as unknown as Anthropic.Beta.Messages.MessageCreateParamsStreaming,
             reqOpts(options),
@@ -782,7 +830,26 @@ export class AnthropicProvider implements Provider {
       ;(params as { cache_control?: { type: 'ephemeral' } }).cache_control = EPHEMERAL_CACHE
     }
-    const betas = mergeBetas(this.betas, options.betas)
+    // Compaction — emits the beta `edits` entry + flips the
+    // `compact-2026-01-12` beta header so the request goes through
+    // the SDK's beta surface (same routing as MCP).
+    const baseBetas = mergeBetas(this.betas, options.betas)
+    const betas = options.compact !== undefined
+      ? mergeBetas(baseBetas, [COMPACT_BETA])
+      : baseBetas
+    if (options.compact !== undefined) {
+      const edit: Record<string, unknown> = { type: COMPACT_EDIT_TYPE }
+      if (options.compact.trigger !== undefined) {
+        edit.trigger = { type: 'input_tokens', value: options.compact.trigger }
+      }
+      if (options.compact.instructions !== undefined) {
+        edit.instructions = options.compact.instructions
+      }
+      if (options.compact.pauseAfterCompaction !== undefined) {
+        edit.pause_after_compaction = options.compact.pauseAfterCompaction
+      }
+      ;(params as { edits?: unknown[] }).edits = [edit]
+    }
     if (betas.length > 0) {
       ;(params as { betas?: readonly string[] }).betas = betas
     }
@@ -799,18 +866,48 @@ export class AnthropicProvider implements Provider {
       .filter((b): b is Anthropic.TextBlock => b.type === 'text')
       .map((b) => b.text)
       .join('')
-    return {
+    const result: ChatResult<Anthropic.Message> = {
       text,
       model: message.model,
       stopReason: message.stop_reason,
       usage: toUsage(message.usage),
       raw: message,
     }
+    // Surface structured content when the turn carries blocks
+    // beyond plain text (compaction today; reasoning blocks in a
+    // future slice). Apps that persist conversations push this
+    // onto the message history so round-trippable blocks survive
+    // subsequent requests.
+    const blocks = fromAnthropicContent(message.content)
+    if (blocks.some((b) => b.type !== 'text')) {
+      result.content = blocks
+    }
+    return result
   }
 }
 // ─── Shape converters ─────────────────────────────────────────────────────
+/** Compaction beta — required header + `edits[].type` for `compact-2026-01-12`. */
+const COMPACT_BETA = 'compact-2026-01-12'
+const COMPACT_EDIT_TYPE = 'compact_20260112'
+/**
+ * Whether the request needs to flow through `client.beta.messages.create`
+ * instead of the stable surface. Triggered by:
+ *
+ *   - `edits[]` (compaction).
+ *   - `mcp_servers[]` (server-side MCP).
+ *
+ * Tests typically stub `client.messages.create`; the beta path uses the
+ * stub that lives at `client.beta.messages.create`.
+ */
+function needsBetaRouting(params: Anthropic.MessageCreateParamsNonStreaming): boolean {
+  const p = params as { edits?: unknown[]; mcp_servers?: unknown[] }
+  return (p.edits !== undefined && p.edits.length > 0)
+    || (p.mcp_servers !== undefined && p.mcp_servers.length > 0)
+}
 /** Build the request-options bag forwarded to the SDK. Only `signal` for now. */
 function reqOpts(options: { signal?: AbortSignal }): { signal?: AbortSignal } | undefined {
   return options.signal !== undefined ? { signal: options.signal } : undefined
@@ -905,6 +1002,19 @@ function toMessageParam(message: Message): Anthropic.MessageParam {
             { context: { provider: 'anthropic' } },
           )
         }
+        if (block.type === 'compaction') {
+          // Round-trip the compaction block verbatim — the server uses
+          // the opaque `encrypted_content` to stitch prior compactions
+          // together; mutating either field would invalidate the
+          // history. Untyped on the stable SDK surface; cast through
+          // the beta type shape.
+          const param: Record<string, unknown> = { type: 'compaction' }
+          if (block.content !== null) param.content = block.content
+          if (block.encryptedContent !== null) {
+            param.encrypted_content = block.encryptedContent
+          }
+          return param as unknown as Anthropic.ContentBlockParam
+        }
         const text: Anthropic.TextBlockParam = { type: 'text', text: block.text }
         if (block.cache) text.cache_control = EPHEMERAL_CACHE
         return text
@@ -1071,6 +1181,13 @@ function fromAnthropicContent(
       }
       if (r.is_error) result.isError = true
       out.push(result)
+    } else if (block.type === 'compaction') {
+      const c = block as { content?: string | null; encrypted_content?: string | null }
+      out.push({
+        type: 'compaction',
+        content: c.content ?? null,
+        encryptedContent: c.encrypted_content ?? null,
+      } satisfies CompactionBlock)
     }
   }
   return out