@strav/brain 1.0.0-alpha.17 → 1.0.0-alpha.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ // Public API of `@strav/brain/persistence` — recommended schema +
2
+ // repositories for persisting conversations (threads + turns) and
3
+ // human-in-the-loop suspended runs to Postgres via `@strav/database`.
4
+ //
5
+ // Apps that need a different backend implement `BrainStore`
6
+ // directly — the schemas + repositories are conveniences, not
7
+ // obligations.
8
+
9
+ export {
10
+ BrainMessage,
11
+ type BrainMessageRole,
12
+ } from './brain_message.ts'
13
+ export {
14
+ type AppendTurnInput,
15
+ BrainMessageRepository,
16
+ type LoadMessagesOptions,
17
+ } from './brain_message_repository.ts'
18
+ export type {
19
+ BrainStore,
20
+ CreateThreadInput,
21
+ LoadedSuspendedRun,
22
+ LoadedThread,
23
+ SaveSuspendedRunInput,
24
+ SuspendedFilter,
25
+ SuspendedSummary,
26
+ ThreadFilter,
27
+ ThreadSummary,
28
+ TurnInput,
29
+ } from './brain_store.ts'
30
+ export {
31
+ BrainSuspendedRun,
32
+ type BrainSuspendedRunStatus,
33
+ } from './brain_suspended_run.ts'
34
+ export {
35
+ type ListPendingOptions,
36
+ BrainSuspendedRunRepository,
37
+ } from './brain_suspended_run_repository.ts'
38
+ export { BrainThread } from './brain_thread.ts'
39
+ export {
40
+ BrainThreadRepository,
41
+ type ListThreadsOptions,
42
+ } from './brain_thread_repository.ts'
43
+ export { DatabaseBrainStore } from './database_brain_store.ts'
44
+ export {
45
+ brainMessageSchema,
46
+ brainSuspendedRunSchema,
47
+ brainThreadSchema,
48
+ } from './schema/index.ts'
@@ -0,0 +1,61 @@
1
+ /**
2
+ * `brainMessageSchema` — one row per assistant or user turn within
3
+ * a thread. Append-only; rows are inserted in `turn_index` order
4
+ * and never updated (compaction blocks live as a regular assistant
5
+ * row whose `content` includes a `CompactionBlock`).
6
+ *
7
+ * Why per-turn rather than a JSONB blob on `brain_thread`:
8
+ *
9
+ * - **Pagination.** UIs render the latest N turns; queries select
10
+ * by `(thread_id, turn_index)` instead of parsing a JSON array.
11
+ * - **Per-turn metadata.** `model` / `usage` / `stop_reason` /
12
+ * `response_id` are indexed and queryable for cost analytics,
13
+ * audit, and routing (e.g., "which threads used gpt-5?").
14
+ * - **Append cost.** Each `send()` is a single INSERT, not a
15
+ * rewrite of the entire array.
16
+ *
17
+ * Columns:
18
+ *
19
+ * - `id` ULID primary key.
20
+ * - `thread_id` FK → `brain_thread`. `onDelete: cascade` —
21
+ * deleting a thread drops its history.
22
+ * - `turn_index` 0-based ordinal. Unique with `thread_id` (app
23
+ * migration adds the index).
24
+ * - `role` `user` or `assistant`. The framework's
25
+ * `Message.role` union; tool_result blocks land
26
+ * on user turns per the assistant ↔ user
27
+ * handshake, so `role` reflects that.
28
+ * - `content` JSONB — `string | ContentBlock[]`. Carries
29
+ * every typed block: text, image, document,
30
+ * audio, tool_use, tool_result, mcp_*, compaction.
31
+ * - `model` Model identifier used for assistant turns
32
+ * (NULL for user turns).
33
+ * - `usage` JSONB — `ChatUsage` for assistant turns.
34
+ * - `stop_reason` Provider terminal reason (`end_turn`, etc.).
35
+ * - `response_id` OpenAI Responses API id when surfaced. Indexed
36
+ * via partial index in the recommended migration.
37
+ * - `created_at` Timestamp.
38
+ *
39
+ * Archetype.Event — append-only semantics; no `updated_at`.
40
+ */
41
+
42
+ import { Archetype, defineSchema } from '@strav/database'
43
+ import { brainThreadSchema } from './brain_thread_schema.ts'
44
+
45
+ export const brainMessageSchema = defineSchema(
46
+ 'brain_message',
47
+ Archetype.Event,
48
+ (t) => {
49
+ t.id()
50
+ t.reference('thread_id').to(brainThreadSchema).onDelete('cascade').notNull()
51
+ t.integer('turn_index').notNull()
52
+ t.enum('role', ['user', 'assistant']).notNull()
53
+ t.json('content').notNull()
54
+ t.string('model').max(128).nullable()
55
+ t.json('usage').nullable()
56
+ t.string('stop_reason').max(64).nullable()
57
+ t.string('response_id').max(128).nullable()
58
+ t.timestamp('created_at').notNull()
59
+ },
60
+ { tenanted: true },
61
+ )
@@ -0,0 +1,58 @@
1
+ /**
2
+ * `brainSuspendedRunSchema` — a paused agentic loop awaiting
3
+ * human-in-the-loop tool approval.
4
+ *
5
+ * Two real use cases drive the shape:
6
+ *
7
+ * 1. **Linked to a thread** — the suspending run was part of a
8
+ * conversational thread; the app wants the suspended state to
9
+ * reference its thread so the UI can show "thread X is paused
10
+ * waiting on Y." `thread_id` is the FK, nullable so detached
11
+ * runs are fine.
12
+ * 2. **Standalone** — the run came from a one-shot `runTools(...)`
13
+ * call (cron job, queued worker, ...). No thread context;
14
+ * `thread_id` stays NULL.
15
+ *
16
+ * Columns:
17
+ *
18
+ * - `id` ULID primary key. The id apps reference
19
+ * when resuming.
20
+ * - `thread_id` FK → `brain_thread`, NULLABLE,
21
+ * `onDelete: set null` — if the thread
22
+ * gets deleted, the suspended run keeps
23
+ * its data so the human approver can
24
+ * still inspect it.
25
+ * - `user_id` App-defined approver / owner.
26
+ * - `pending_tool_calls` JSONB — `ToolUseBlock[]` the model
27
+ * wants executed. Multi-call batches are
28
+ * captured together (mid-batch invariant).
29
+ * - `state` JSONB — `SuspendedState` snapshot. The
30
+ * framework's `brain.resumeTools(state,
31
+ * ...)` takes this as its first arg.
32
+ * - `status` `pending | resumed | cancelled`. Apps
33
+ * bulk-list pending runs and walk through
34
+ * an approval queue.
35
+ * - `timestamps` `created_at` for "how long pending?"
36
+ * sorts, `updated_at` for transition
37
+ * tracking.
38
+ *
39
+ * Tenanted: standard `tenant_id` + RLS.
40
+ */
41
+
42
+ import { Archetype, defineSchema } from '@strav/database'
43
+ import { brainThreadSchema } from './brain_thread_schema.ts'
44
+
45
+ export const brainSuspendedRunSchema = defineSchema(
46
+ 'brain_suspended_run',
47
+ Archetype.Entity,
48
+ (t) => {
49
+ t.id()
50
+ t.reference('thread_id').to(brainThreadSchema).onDelete('set null').nullable()
51
+ t.string('user_id').max(64).nullable()
52
+ t.json('pending_tool_calls').notNull()
53
+ t.json('state').notNull()
54
+ t.enum('status', ['pending', 'resumed', 'cancelled']).notNull().default('pending')
55
+ t.timestamps()
56
+ },
57
+ { tenanted: true },
58
+ )
@@ -0,0 +1,50 @@
1
+ /**
2
+ * `brainThreadSchema` — one row per conversation.
3
+ *
4
+ * Carries the per-thread defaults that `Thread` already serializes
5
+ * (`system`, `options`, `lastResponseId`) plus a few framework-side
6
+ * fields apps want to filter / sort on:
7
+ *
8
+ * - `id` ULID primary key. Hand the same value back to
9
+ * `BrainStore.loadThread(id)` to rehydrate.
10
+ * - `user_id` App-defined owner. Stored as `text` (no FK) —
11
+ * user table shape varies per app. Indexed in
12
+ * the recommended migration so "list threads
13
+ * for user X" stays fast.
14
+ * - `title` Human label. Apps set it from the first user
15
+ * turn or via an explicit "rename" UI.
16
+ * - `system` Thread-owned system prompt. Mirrors
17
+ * `ThreadState.system`. JSONB so the structured
18
+ * form (text + cache flag) round-trips.
19
+ * - `options` Thread defaults applied to every `send()`.
20
+ * Mirrors `ThreadState.options`.
21
+ * - `last_response_id` OpenAI Responses API stateful pointer.
22
+ * Mirrors `ThreadState.lastResponseId`. NULL for
23
+ * non-Responses providers.
24
+ * - `timestamps` `created_at` + `updated_at` for sort / audit.
25
+ *
26
+ * Tenanted: `tenant_id` FK + RLS policies auto-injected by
27
+ * `@strav/database`. Apps wrap calls in `tenants.withTenant(...)`
28
+ * and the database enforces isolation — no app-level filter needed.
29
+ *
30
+ * The per-turn message history lives in `brain_message`, joined by
31
+ * `thread_id`. This keeps every send to an O(1) INSERT and makes
32
+ * pagination / per-turn analytics cheap.
33
+ */
34
+
35
+ import { Archetype, defineSchema } from '@strav/database'
36
+
37
+ export const brainThreadSchema = defineSchema(
38
+ 'brain_thread',
39
+ Archetype.Entity,
40
+ (t) => {
41
+ t.id()
42
+ t.string('user_id').max(64).nullable()
43
+ t.string('title').max(255).nullable()
44
+ t.json('system').nullable()
45
+ t.json('options').nullable()
46
+ t.string('last_response_id').max(128).nullable()
47
+ t.timestamps()
48
+ },
49
+ { tenanted: true },
50
+ )
@@ -0,0 +1,3 @@
1
+ export { brainMessageSchema } from './brain_message_schema.ts'
2
+ export { brainSuspendedRunSchema } from './brain_suspended_run_schema.ts'
3
+ export { brainThreadSchema } from './brain_thread_schema.ts'
package/src/provider.ts CHANGED
@@ -17,6 +17,7 @@ import type { AgentResult } from './agent_result.ts'
17
17
  import type { AgentStreamEvent } from './agent_stream_event.ts'
18
18
  import type { MCPServer } from './mcp_server.ts'
19
19
  import type { OutputSchema } from './output_schema.ts'
20
+ import type { SuspendedRun } from './suspended_run.ts'
20
21
  import type { Tool } from './tool.ts'
21
22
  import type { ToolExecutionError } from './tool_execution_error.ts'
22
23
  import type {
@@ -28,6 +29,7 @@ import type {
28
29
  GenerateResult,
29
30
  Message,
30
31
  StreamEvent,
32
+ ToolUseBlock,
31
33
  TranscribeOptions,
32
34
  TranscribeResult,
33
35
  } from './types.ts'
@@ -69,6 +71,39 @@ export interface RunWithToolsOptions extends ChatOptions {
69
71
  * ```
70
72
  */
71
73
  onToolError?(error: ToolExecutionError): string | undefined
74
+ /**
75
+ * Human-in-the-loop gate. Called before each tool execution; when
76
+ * it returns `true`, the loop suspends and `runWithTools` returns
77
+ * a `SuspendedRun` carrying the pending tool calls + a JSON-
78
+ * serializable snapshot of the loop state. Apps obtain results
79
+ * out-of-band (human approval, queued worker, external system,
80
+ * ...) and call `brain.resumeTools(state, results, tools, options)`
81
+ * to continue.
82
+ *
83
+ * Mid-batch invariant: if a tool call inside a multi-call batch
84
+ * triggers suspension, the framework also captures all unexecuted
85
+ * siblings from the same assistant turn — the provider's
86
+ * `tool_use` / `tool_result` pairing must stay balanced on resume.
87
+ *
88
+ * V1 scope: only honored on non-streaming `runWithTools`. Pass it
89
+ * to `streamWithTools`, `runWithToolsAndSchema`, or
90
+ * `streamWithToolsAndSchema` and the framework throws `BrainError`
91
+ * — those entrypoints don't yet model the pause/resume protocol.
92
+ */
93
+ shouldSuspend?(
94
+ call: ToolUseBlock,
95
+ context?: Record<string, unknown>,
96
+ ): boolean | Promise<boolean>
97
+ }
98
+
99
+ /**
100
+ * Same as `RunWithToolsOptions` but with `shouldSuspend` required.
101
+ * Used to narrow the return type of `runWithTools` overloads — when
102
+ * apps opt in to the human-in-the-loop gate, the result widens to
103
+ * `AgentResult | SuspendedRun`; otherwise it's just `AgentResult`.
104
+ */
105
+ export type RunWithToolsOptionsWithSuspend = RunWithToolsOptions & {
106
+ shouldSuspend: NonNullable<RunWithToolsOptions['shouldSuspend']>
72
107
  }
73
108
 
74
109
  export interface Provider {
@@ -113,7 +148,7 @@ export interface Provider {
113
148
  messages: readonly Message[],
114
149
  tools: readonly Tool[],
115
150
  options?: RunWithToolsOptions,
116
- ): Promise<AgentResult>
151
+ ): Promise<AgentResult | SuspendedRun>
117
152
 
118
153
  /**
119
154
  * Structured output. Sends `messages` to the model with a
@@ -28,12 +28,18 @@ import type { AgentResult } from '../agent_result.ts'
28
28
  import type { AnthropicProviderConfig } from '../brain_config.ts'
29
29
  import { DEFAULT_MODEL } from '../brain_config.ts'
30
30
  import { BrainError } from '../brain_error.ts'
31
- import type { Provider, RunWithToolsOptions } from '../provider.ts'
31
+ import type {
32
+ Provider,
33
+ RunWithToolsOptions,
34
+ RunWithToolsOptionsWithSuspend,
35
+ } from '../provider.ts'
36
+ import type { SuspendedRun } from '../suspended_run.ts'
32
37
  import type { Tool } from '../tool.ts'
33
38
  import type {
34
39
  ChatOptions,
35
40
  ChatResult,
36
41
  ChatUsage,
42
+ CompactionBlock,
37
43
  ContentBlock,
38
44
  GenerateResult,
39
45
  MCPToolResultBlock,
@@ -82,7 +88,13 @@ export class AnthropicProvider implements Provider {
82
88
 
83
89
  async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
84
90
  const params = this.buildParams(messages, options)
85
- const response = await this.client.messages.create(params, reqOpts(options))
91
+ const useBeta = needsBetaRouting(params)
92
+ const response = useBeta
93
+ ? ((await this.client.beta.messages.create(
94
+ params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
95
+ reqOpts(options),
96
+ )) as unknown as Anthropic.Message)
97
+ : await this.client.messages.create(params, reqOpts(options))
86
98
  return this.toChatResult(response)
87
99
  }
88
100
 
@@ -91,7 +103,12 @@ export class AnthropicProvider implements Provider {
91
103
  options: ChatOptions = {},
92
104
  ): AsyncIterable<StreamEvent> {
93
105
  const params = this.buildParams(messages, options)
94
- const stream = this.client.messages.stream(params, reqOpts(options))
106
+ const stream = needsBetaRouting(params)
107
+ ? this.client.beta.messages.stream(
108
+ params as unknown as Anthropic.Beta.Messages.MessageCreateParamsStreaming,
109
+ reqOpts(options),
110
+ )
111
+ : this.client.messages.stream(params, reqOpts(options))
95
112
  for await (const event of stream) {
96
113
  if (
97
114
  event.type === 'content_block_delta' &&
@@ -137,11 +154,21 @@ export class AnthropicProvider implements Provider {
137
154
  * `tools` array each turn. Apps that care about cache hits keep
138
155
  * the tool list stable across runs.
139
156
  */
157
+ runWithTools(
158
+ messages: readonly Message[],
159
+ tools: readonly Tool[],
160
+ options: RunWithToolsOptionsWithSuspend,
161
+ ): Promise<AgentResult | SuspendedRun>
162
+ runWithTools(
163
+ messages: readonly Message[],
164
+ tools: readonly Tool[],
165
+ options?: RunWithToolsOptions,
166
+ ): Promise<AgentResult>
140
167
  async runWithTools(
141
168
  messages: readonly Message[],
142
169
  tools: readonly Tool[],
143
170
  options: RunWithToolsOptions = {},
144
- ): Promise<AgentResult> {
171
+ ): Promise<AgentResult | SuspendedRun> {
145
172
  const maxIterations = options.maxIterations ?? 10
146
173
  const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
147
174
  const workingMessages: Message[] = [...messages]
@@ -186,7 +213,6 @@ export class AnthropicProvider implements Provider {
186
213
 
187
214
  // Declare MCP servers + flip to the beta surface when in use.
188
215
  // Anthropic's MCP connector requires `mcp-client-2025-11-20`.
189
- let response: Anthropic.Message
190
216
  if (useMcpBeta) {
191
217
  params.mcp_servers = mcpServers.map((s) => {
192
218
  const def: Anthropic.Beta.Messages.BetaRequestMCPServerURLDefinition = {
@@ -201,13 +227,15 @@ export class AnthropicProvider implements Provider {
201
227
  ;(params as { betas?: string[] }).betas = baseBetas.includes('mcp-client-2025-11-20')
202
228
  ? [...baseBetas]
203
229
  : [...baseBetas, 'mcp-client-2025-11-20']
204
- response = (await this.client.beta.messages.create(
205
- params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
206
- reqOpts(options),
207
- )) as unknown as Anthropic.Message
208
- } else {
209
- response = await this.client.messages.create(params, reqOpts(options))
210
230
  }
231
+ // Route via beta when either MCP servers OR compaction are in
232
+ // play — both live on the beta surface.
233
+ const response: Anthropic.Message = needsBetaRouting(params)
234
+ ? ((await this.client.beta.messages.create(
235
+ params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
236
+ reqOpts(options),
237
+ )) as unknown as Anthropic.Message)
238
+ : await this.client.messages.create(params, reqOpts(options))
211
239
  addUsage(aggregated, response.usage)
212
240
  lastStopReason = response.stop_reason ?? null
213
241
 
@@ -236,7 +264,28 @@ export class AnthropicProvider implements Provider {
236
264
  (b): b is Anthropic.ToolUseBlock => b.type === 'tool_use',
237
265
  )
238
266
  const resultBlocks: ContentBlock[] = []
239
- for (const block of toolUseBlocks) {
267
+ for (let i = 0; i < toolUseBlocks.length; i++) {
268
+ const block = toolUseBlocks[i]!
269
+ if (options.shouldSuspend) {
270
+ const frameworkCall: ToolUseBlock = {
271
+ type: 'tool_use',
272
+ id: block.id,
273
+ name: block.name,
274
+ input: block.input as Record<string, unknown>,
275
+ }
276
+ if (await options.shouldSuspend(frameworkCall, options.context)) {
277
+ return {
278
+ status: 'suspended',
279
+ pendingToolCalls: toolUseBlocks.slice(i).map((b) => ({
280
+ type: 'tool_use',
281
+ id: b.id,
282
+ name: b.name,
283
+ input: b.input as Record<string, unknown>,
284
+ })),
285
+ state: { messages: workingMessages, iterations, usage: aggregated },
286
+ }
287
+ }
288
+ }
240
289
  const { content, isError } = await runToolWithRecovery(
241
290
  toolMap.get(block.name),
242
291
  block.name,
@@ -314,7 +363,6 @@ export class AnthropicProvider implements Provider {
314
363
  format: { type: 'json_schema', schema: schema.jsonSchema },
315
364
  }
316
365
 
317
- let response: Anthropic.Message
318
366
  if (useMcpBeta) {
319
367
  params.mcp_servers = mcpServers.map((s) => {
320
368
  const def: Anthropic.Beta.Messages.BetaRequestMCPServerURLDefinition = {
@@ -329,13 +377,13 @@ export class AnthropicProvider implements Provider {
329
377
  ;(params as { betas?: string[] }).betas = baseBetas.includes('mcp-client-2025-11-20')
330
378
  ? [...baseBetas]
331
379
  : [...baseBetas, 'mcp-client-2025-11-20']
332
- response = (await this.client.beta.messages.create(
333
- params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
334
- reqOpts(options),
335
- )) as unknown as Anthropic.Message
336
- } else {
337
- response = await this.client.messages.create(params, reqOpts(options))
338
380
  }
381
+ const response: Anthropic.Message = needsBetaRouting(params)
382
+ ? ((await this.client.beta.messages.create(
383
+ params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
384
+ reqOpts(options),
385
+ )) as unknown as Anthropic.Message)
386
+ : await this.client.messages.create(params, reqOpts(options))
339
387
  addUsage(aggregated, response.usage)
340
388
  lastStopReason = response.stop_reason ?? null
341
389
 
@@ -454,7 +502,7 @@ export class AnthropicProvider implements Provider {
454
502
  : [...baseBetas, 'mcp-client-2025-11-20']
455
503
  }
456
504
 
457
- const stream = useMcpBeta
505
+ const stream = needsBetaRouting(params)
458
506
  ? this.client.beta.messages.stream(
459
507
  params as unknown as Anthropic.Beta.Messages.MessageCreateParamsStreaming,
460
508
  reqOpts(options),
@@ -619,7 +667,7 @@ export class AnthropicProvider implements Provider {
619
667
  : [...baseBetas, 'mcp-client-2025-11-20']
620
668
  }
621
669
 
622
- const stream = useMcpBeta
670
+ const stream = needsBetaRouting(params)
623
671
  ? this.client.beta.messages.stream(
624
672
  params as unknown as Anthropic.Beta.Messages.MessageCreateParamsStreaming,
625
673
  reqOpts(options),
@@ -782,7 +830,26 @@ export class AnthropicProvider implements Provider {
782
830
  ;(params as { cache_control?: { type: 'ephemeral' } }).cache_control = EPHEMERAL_CACHE
783
831
  }
784
832
 
785
- const betas = mergeBetas(this.betas, options.betas)
833
+ // Compaction emits the beta `edits` entry + flips the
834
+ // `compact-2026-01-12` beta header so the request goes through
835
+ // the SDK's beta surface (same routing as MCP).
836
+ const baseBetas = mergeBetas(this.betas, options.betas)
837
+ const betas = options.compact !== undefined
838
+ ? mergeBetas(baseBetas, [COMPACT_BETA])
839
+ : baseBetas
840
+ if (options.compact !== undefined) {
841
+ const edit: Record<string, unknown> = { type: COMPACT_EDIT_TYPE }
842
+ if (options.compact.trigger !== undefined) {
843
+ edit.trigger = { type: 'input_tokens', value: options.compact.trigger }
844
+ }
845
+ if (options.compact.instructions !== undefined) {
846
+ edit.instructions = options.compact.instructions
847
+ }
848
+ if (options.compact.pauseAfterCompaction !== undefined) {
849
+ edit.pause_after_compaction = options.compact.pauseAfterCompaction
850
+ }
851
+ ;(params as { edits?: unknown[] }).edits = [edit]
852
+ }
786
853
  if (betas.length > 0) {
787
854
  ;(params as { betas?: readonly string[] }).betas = betas
788
855
  }
@@ -799,18 +866,48 @@ export class AnthropicProvider implements Provider {
799
866
  .filter((b): b is Anthropic.TextBlock => b.type === 'text')
800
867
  .map((b) => b.text)
801
868
  .join('')
802
- return {
869
+ const result: ChatResult<Anthropic.Message> = {
803
870
  text,
804
871
  model: message.model,
805
872
  stopReason: message.stop_reason,
806
873
  usage: toUsage(message.usage),
807
874
  raw: message,
808
875
  }
876
+ // Surface structured content when the turn carries blocks
877
+ // beyond plain text (compaction today; reasoning blocks in a
878
+ // future slice). Apps that persist conversations push this
879
+ // onto the message history so round-trippable blocks survive
880
+ // subsequent requests.
881
+ const blocks = fromAnthropicContent(message.content)
882
+ if (blocks.some((b) => b.type !== 'text')) {
883
+ result.content = blocks
884
+ }
885
+ return result
809
886
  }
810
887
  }
811
888
 
812
889
  // ─── Shape converters ─────────────────────────────────────────────────────
813
890
 
891
+ /** Compaction beta — required header + `edits[].type` for `compact-2026-01-12`. */
892
+ const COMPACT_BETA = 'compact-2026-01-12'
893
+ const COMPACT_EDIT_TYPE = 'compact_20260112'
894
+
895
+ /**
896
+ * Whether the request needs to flow through `client.beta.messages.create`
897
+ * instead of the stable surface. Triggered by:
898
+ *
899
+ * - `edits[]` (compaction).
900
+ * - `mcp_servers[]` (server-side MCP).
901
+ *
902
+ * Tests typically stub `client.messages.create`; the beta path uses the
903
+ * stub that lives at `client.beta.messages.create`.
904
+ */
905
+ function needsBetaRouting(params: Anthropic.MessageCreateParamsNonStreaming): boolean {
906
+ const p = params as { edits?: unknown[]; mcp_servers?: unknown[] }
907
+ return (p.edits !== undefined && p.edits.length > 0)
908
+ || (p.mcp_servers !== undefined && p.mcp_servers.length > 0)
909
+ }
910
+
814
911
  /** Build the request-options bag forwarded to the SDK. Only `signal` for now. */
815
912
  function reqOpts(options: { signal?: AbortSignal }): { signal?: AbortSignal } | undefined {
816
913
  return options.signal !== undefined ? { signal: options.signal } : undefined
@@ -905,6 +1002,19 @@ function toMessageParam(message: Message): Anthropic.MessageParam {
905
1002
  { context: { provider: 'anthropic' } },
906
1003
  )
907
1004
  }
1005
+ if (block.type === 'compaction') {
1006
+ // Round-trip the compaction block verbatim — the server uses
1007
+ // the opaque `encrypted_content` to stitch prior compactions
1008
+ // together; mutating either field would invalidate the
1009
+ // history. Untyped on the stable SDK surface; cast through
1010
+ // the beta type shape.
1011
+ const param: Record<string, unknown> = { type: 'compaction' }
1012
+ if (block.content !== null) param.content = block.content
1013
+ if (block.encryptedContent !== null) {
1014
+ param.encrypted_content = block.encryptedContent
1015
+ }
1016
+ return param as unknown as Anthropic.ContentBlockParam
1017
+ }
908
1018
  const text: Anthropic.TextBlockParam = { type: 'text', text: block.text }
909
1019
  if (block.cache) text.cache_control = EPHEMERAL_CACHE
910
1020
  return text
@@ -1071,6 +1181,13 @@ function fromAnthropicContent(
1071
1181
  }
1072
1182
  if (r.is_error) result.isError = true
1073
1183
  out.push(result)
1184
+ } else if (block.type === 'compaction') {
1185
+ const c = block as { content?: string | null; encrypted_content?: string | null }
1186
+ out.push({
1187
+ type: 'compaction',
1188
+ content: c.content ?? null,
1189
+ encryptedContent: c.encrypted_content ?? null,
1190
+ } satisfies CompactionBlock)
1074
1191
  }
1075
1192
  }
1076
1193
  return out