@strav/brain 1.0.0-alpha.17 → 1.0.0-alpha.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -2
- package/src/agent_generate_result.ts +2 -0
- package/src/agent_result.ts +7 -0
- package/src/agent_runner.ts +80 -4
- package/src/brain_manager.ts +119 -2
- package/src/index.ts +20 -2
- package/src/mcp/client.ts +17 -0
- package/src/mcp/index.ts +1 -0
- package/src/mcp/pool.ts +106 -0
- package/src/mcp/resolve_mcp_tools.ts +25 -7
- package/src/persistence/brain_message.ts +34 -0
- package/src/persistence/brain_message_repository.ts +106 -0
- package/src/persistence/brain_store.ts +166 -0
- package/src/persistence/brain_suspended_run.ts +30 -0
- package/src/persistence/brain_suspended_run_repository.ts +68 -0
- package/src/persistence/brain_thread.ts +30 -0
- package/src/persistence/brain_thread_repository.ts +65 -0
- package/src/persistence/database_brain_store.ts +190 -0
- package/src/persistence/index.ts +48 -0
- package/src/persistence/schema/brain_message_schema.ts +61 -0
- package/src/persistence/schema/brain_suspended_run_schema.ts +58 -0
- package/src/persistence/schema/brain_thread_schema.ts +50 -0
- package/src/persistence/schema/index.ts +3 -0
- package/src/provider.ts +36 -1
- package/src/providers/anthropic_provider.ts +140 -23
- package/src/providers/gemini_provider.ts +55 -32
- package/src/providers/openai_compat_provider.ts +452 -23
- package/src/providers/openai_provider.ts +87 -32
- package/src/providers/openai_responses_provider.ts +365 -50
- package/src/suspended_run.ts +153 -0
- package/src/thread.ts +40 -1
- package/src/types.ts +110 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
// Public API of `@strav/brain/persistence` — recommended schema +
|
|
2
|
+
// repositories for persisting conversations (threads + turns) and
|
|
3
|
+
// human-in-the-loop suspended runs to Postgres via `@strav/database`.
|
|
4
|
+
//
|
|
5
|
+
// Apps that need a different backend implement `BrainStore`
|
|
6
|
+
// directly — the schemas + repositories are conveniences, not
|
|
7
|
+
// obligations.
|
|
8
|
+
|
|
9
|
+
export {
|
|
10
|
+
BrainMessage,
|
|
11
|
+
type BrainMessageRole,
|
|
12
|
+
} from './brain_message.ts'
|
|
13
|
+
export {
|
|
14
|
+
type AppendTurnInput,
|
|
15
|
+
BrainMessageRepository,
|
|
16
|
+
type LoadMessagesOptions,
|
|
17
|
+
} from './brain_message_repository.ts'
|
|
18
|
+
export type {
|
|
19
|
+
BrainStore,
|
|
20
|
+
CreateThreadInput,
|
|
21
|
+
LoadedSuspendedRun,
|
|
22
|
+
LoadedThread,
|
|
23
|
+
SaveSuspendedRunInput,
|
|
24
|
+
SuspendedFilter,
|
|
25
|
+
SuspendedSummary,
|
|
26
|
+
ThreadFilter,
|
|
27
|
+
ThreadSummary,
|
|
28
|
+
TurnInput,
|
|
29
|
+
} from './brain_store.ts'
|
|
30
|
+
export {
|
|
31
|
+
BrainSuspendedRun,
|
|
32
|
+
type BrainSuspendedRunStatus,
|
|
33
|
+
} from './brain_suspended_run.ts'
|
|
34
|
+
export {
|
|
35
|
+
type ListPendingOptions,
|
|
36
|
+
BrainSuspendedRunRepository,
|
|
37
|
+
} from './brain_suspended_run_repository.ts'
|
|
38
|
+
export { BrainThread } from './brain_thread.ts'
|
|
39
|
+
export {
|
|
40
|
+
BrainThreadRepository,
|
|
41
|
+
type ListThreadsOptions,
|
|
42
|
+
} from './brain_thread_repository.ts'
|
|
43
|
+
export { DatabaseBrainStore } from './database_brain_store.ts'
|
|
44
|
+
export {
|
|
45
|
+
brainMessageSchema,
|
|
46
|
+
brainSuspendedRunSchema,
|
|
47
|
+
brainThreadSchema,
|
|
48
|
+
} from './schema/index.ts'
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `brainMessageSchema` — one row per assistant or user turn within
|
|
3
|
+
* a thread. Append-only; rows are inserted in `turn_index` order
|
|
4
|
+
* and never updated (compaction blocks live as a regular assistant
|
|
5
|
+
* row whose `content` includes a `CompactionBlock`).
|
|
6
|
+
*
|
|
7
|
+
* Why per-turn rather than a JSONB blob on `brain_thread`:
|
|
8
|
+
*
|
|
9
|
+
* - **Pagination.** UIs render the latest N turns; queries select
|
|
10
|
+
* by `(thread_id, turn_index)` instead of parsing a JSON array.
|
|
11
|
+
* - **Per-turn metadata.** `model` / `usage` / `stop_reason` /
|
|
12
|
+
* `response_id` are indexed and queryable for cost analytics,
|
|
13
|
+
* audit, and routing (e.g., "which threads used gpt-5?").
|
|
14
|
+
* - **Append cost.** Each `send()` is a single INSERT, not a
|
|
15
|
+
* rewrite of the entire array.
|
|
16
|
+
*
|
|
17
|
+
* Columns:
|
|
18
|
+
*
|
|
19
|
+
* - `id` ULID primary key.
|
|
20
|
+
* - `thread_id` FK → `brain_thread`. `onDelete: cascade` —
|
|
21
|
+
* deleting a thread drops its history.
|
|
22
|
+
* - `turn_index` 0-based ordinal. Unique with `thread_id` (app
|
|
23
|
+
* migration adds the index).
|
|
24
|
+
* - `role` `user` or `assistant`. The framework's
|
|
25
|
+
* `Message.role` union; tool_result blocks land
|
|
26
|
+
* on user turns per the assistant ↔ user
|
|
27
|
+
* handshake, so `role` reflects that.
|
|
28
|
+
* - `content` JSONB — `string | ContentBlock[]`. Carries
|
|
29
|
+
* every typed block: text, image, document,
|
|
30
|
+
* audio, tool_use, tool_result, mcp_*, compaction.
|
|
31
|
+
* - `model` Model identifier used for assistant turns
|
|
32
|
+
* (NULL for user turns).
|
|
33
|
+
* - `usage` JSONB — `ChatUsage` for assistant turns.
|
|
34
|
+
* - `stop_reason` Provider terminal reason (`end_turn`, etc.).
|
|
35
|
+
* - `response_id` OpenAI Responses API id when surfaced. Indexed
|
|
36
|
+
* via partial index in the recommended migration.
|
|
37
|
+
* - `created_at` Timestamp.
|
|
38
|
+
*
|
|
39
|
+
* Archetype.Event — append-only semantics; no `updated_at`.
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
import { Archetype, defineSchema } from '@strav/database'
|
|
43
|
+
import { brainThreadSchema } from './brain_thread_schema.ts'
|
|
44
|
+
|
|
45
|
+
export const brainMessageSchema = defineSchema(
|
|
46
|
+
'brain_message',
|
|
47
|
+
Archetype.Event,
|
|
48
|
+
(t) => {
|
|
49
|
+
t.id()
|
|
50
|
+
t.reference('thread_id').to(brainThreadSchema).onDelete('cascade').notNull()
|
|
51
|
+
t.integer('turn_index').notNull()
|
|
52
|
+
t.enum('role', ['user', 'assistant']).notNull()
|
|
53
|
+
t.json('content').notNull()
|
|
54
|
+
t.string('model').max(128).nullable()
|
|
55
|
+
t.json('usage').nullable()
|
|
56
|
+
t.string('stop_reason').max(64).nullable()
|
|
57
|
+
t.string('response_id').max(128).nullable()
|
|
58
|
+
t.timestamp('created_at').notNull()
|
|
59
|
+
},
|
|
60
|
+
{ tenanted: true },
|
|
61
|
+
)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `brainSuspendedRunSchema` — a paused agentic loop awaiting
|
|
3
|
+
* human-in-the-loop tool approval.
|
|
4
|
+
*
|
|
5
|
+
* Two real use cases drive the shape:
|
|
6
|
+
*
|
|
7
|
+
* 1. **Linked to a thread** — the suspending run was part of a
|
|
8
|
+
* conversational thread; the app wants the suspended state to
|
|
9
|
+
* reference its thread so the UI can show "thread X is paused
|
|
10
|
+
* waiting on Y." `thread_id` is the FK, nullable so detached
|
|
11
|
+
* runs are fine.
|
|
12
|
+
* 2. **Standalone** — the run came from a one-shot `runTools(...)`
|
|
13
|
+
* call (cron job, queued worker, ...). No thread context;
|
|
14
|
+
* `thread_id` stays NULL.
|
|
15
|
+
*
|
|
16
|
+
* Columns:
|
|
17
|
+
*
|
|
18
|
+
* - `id` ULID primary key. The id apps reference
|
|
19
|
+
* when resuming.
|
|
20
|
+
* - `thread_id` FK → `brain_thread`, NULLABLE,
|
|
21
|
+
* `onDelete: set null` — if the thread
|
|
22
|
+
* gets deleted, the suspended run keeps
|
|
23
|
+
* its data so the human approver can
|
|
24
|
+
* still inspect it.
|
|
25
|
+
* - `user_id` App-defined approver / owner.
|
|
26
|
+
* - `pending_tool_calls` JSONB — `ToolUseBlock[]` the model
|
|
27
|
+
* wants executed. Multi-call batches are
|
|
28
|
+
* captured together (mid-batch invariant).
|
|
29
|
+
* - `state` JSONB — `SuspendedState` snapshot. The
|
|
30
|
+
* framework's `brain.resumeTools(state,
|
|
31
|
+
* ...)` takes this as its first arg.
|
|
32
|
+
* - `status` `pending | resumed | cancelled`. Apps
|
|
33
|
+
* bulk-list pending runs and walk through
|
|
34
|
+
* an approval queue.
|
|
35
|
+
* - `timestamps` `created_at` for "how long pending?"
|
|
36
|
+
* sorts, `updated_at` for transition
|
|
37
|
+
* tracking.
|
|
38
|
+
*
|
|
39
|
+
* Tenanted: standard `tenant_id` + RLS.
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
import { Archetype, defineSchema } from '@strav/database'
|
|
43
|
+
import { brainThreadSchema } from './brain_thread_schema.ts'
|
|
44
|
+
|
|
45
|
+
export const brainSuspendedRunSchema = defineSchema(
|
|
46
|
+
'brain_suspended_run',
|
|
47
|
+
Archetype.Entity,
|
|
48
|
+
(t) => {
|
|
49
|
+
t.id()
|
|
50
|
+
t.reference('thread_id').to(brainThreadSchema).onDelete('set null').nullable()
|
|
51
|
+
t.string('user_id').max(64).nullable()
|
|
52
|
+
t.json('pending_tool_calls').notNull()
|
|
53
|
+
t.json('state').notNull()
|
|
54
|
+
t.enum('status', ['pending', 'resumed', 'cancelled']).notNull().default('pending')
|
|
55
|
+
t.timestamps()
|
|
56
|
+
},
|
|
57
|
+
{ tenanted: true },
|
|
58
|
+
)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `brainThreadSchema` — one row per conversation.
|
|
3
|
+
*
|
|
4
|
+
* Carries the per-thread defaults that `Thread` already serializes
|
|
5
|
+
* (`system`, `options`, `lastResponseId`) plus a few framework-side
|
|
6
|
+
* fields apps want to filter / sort on:
|
|
7
|
+
*
|
|
8
|
+
* - `id` ULID primary key. Hand the same value back to
|
|
9
|
+
* `BrainStore.loadThread(id)` to rehydrate.
|
|
10
|
+
* - `user_id` App-defined owner. Stored as `text` (no FK) —
|
|
11
|
+
* user table shape varies per app. Indexed in
|
|
12
|
+
* the recommended migration so "list threads
|
|
13
|
+
* for user X" stays fast.
|
|
14
|
+
* - `title` Human label. Apps set it from the first user
|
|
15
|
+
* turn or via an explicit "rename" UI.
|
|
16
|
+
* - `system` Thread-owned system prompt. Mirrors
|
|
17
|
+
* `ThreadState.system`. JSONB so the structured
|
|
18
|
+
* form (text + cache flag) round-trips.
|
|
19
|
+
* - `options` Thread defaults applied to every `send()`.
|
|
20
|
+
* Mirrors `ThreadState.options`.
|
|
21
|
+
* - `last_response_id` OpenAI Responses API stateful pointer.
|
|
22
|
+
* Mirrors `ThreadState.lastResponseId`. NULL for
|
|
23
|
+
* non-Responses providers.
|
|
24
|
+
* - `timestamps` `created_at` + `updated_at` for sort / audit.
|
|
25
|
+
*
|
|
26
|
+
* Tenanted: `tenant_id` FK + RLS policies auto-injected by
|
|
27
|
+
* `@strav/database`. Apps wrap calls in `tenants.withTenant(...)`
|
|
28
|
+
* and the database enforces isolation — no app-level filter needed.
|
|
29
|
+
*
|
|
30
|
+
* The per-turn message history lives in `brain_message`, joined by
|
|
31
|
+
* `thread_id`. This keeps every send to an O(1) INSERT and makes
|
|
32
|
+
* pagination / per-turn analytics cheap.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
import { Archetype, defineSchema } from '@strav/database'
|
|
36
|
+
|
|
37
|
+
export const brainThreadSchema = defineSchema(
|
|
38
|
+
'brain_thread',
|
|
39
|
+
Archetype.Entity,
|
|
40
|
+
(t) => {
|
|
41
|
+
t.id()
|
|
42
|
+
t.string('user_id').max(64).nullable()
|
|
43
|
+
t.string('title').max(255).nullable()
|
|
44
|
+
t.json('system').nullable()
|
|
45
|
+
t.json('options').nullable()
|
|
46
|
+
t.string('last_response_id').max(128).nullable()
|
|
47
|
+
t.timestamps()
|
|
48
|
+
},
|
|
49
|
+
{ tenanted: true },
|
|
50
|
+
)
|
package/src/provider.ts
CHANGED
|
@@ -17,6 +17,7 @@ import type { AgentResult } from './agent_result.ts'
|
|
|
17
17
|
import type { AgentStreamEvent } from './agent_stream_event.ts'
|
|
18
18
|
import type { MCPServer } from './mcp_server.ts'
|
|
19
19
|
import type { OutputSchema } from './output_schema.ts'
|
|
20
|
+
import type { SuspendedRun } from './suspended_run.ts'
|
|
20
21
|
import type { Tool } from './tool.ts'
|
|
21
22
|
import type { ToolExecutionError } from './tool_execution_error.ts'
|
|
22
23
|
import type {
|
|
@@ -28,6 +29,7 @@ import type {
|
|
|
28
29
|
GenerateResult,
|
|
29
30
|
Message,
|
|
30
31
|
StreamEvent,
|
|
32
|
+
ToolUseBlock,
|
|
31
33
|
TranscribeOptions,
|
|
32
34
|
TranscribeResult,
|
|
33
35
|
} from './types.ts'
|
|
@@ -69,6 +71,39 @@ export interface RunWithToolsOptions extends ChatOptions {
|
|
|
69
71
|
* ```
|
|
70
72
|
*/
|
|
71
73
|
onToolError?(error: ToolExecutionError): string | undefined
|
|
74
|
+
/**
|
|
75
|
+
* Human-in-the-loop gate. Called before each tool execution; when
|
|
76
|
+
* it returns `true`, the loop suspends and `runWithTools` returns
|
|
77
|
+
* a `SuspendedRun` carrying the pending tool calls + a JSON-
|
|
78
|
+
* serializable snapshot of the loop state. Apps obtain results
|
|
79
|
+
* out-of-band (human approval, queued worker, external system,
|
|
80
|
+
* ...) and call `brain.resumeTools(state, results, tools, options)`
|
|
81
|
+
* to continue.
|
|
82
|
+
*
|
|
83
|
+
* Mid-batch invariant: if a tool call inside a multi-call batch
|
|
84
|
+
* triggers suspension, the framework also captures all unexecuted
|
|
85
|
+
* siblings from the same assistant turn — the provider's
|
|
86
|
+
* `tool_use` / `tool_result` pairing must stay balanced on resume.
|
|
87
|
+
*
|
|
88
|
+
* V1 scope: only honored on non-streaming `runWithTools`. Pass it
|
|
89
|
+
* to `streamWithTools`, `runWithToolsAndSchema`, or
|
|
90
|
+
* `streamWithToolsAndSchema` and the framework throws `BrainError`
|
|
91
|
+
* — those entrypoints don't yet model the pause/resume protocol.
|
|
92
|
+
*/
|
|
93
|
+
shouldSuspend?(
|
|
94
|
+
call: ToolUseBlock,
|
|
95
|
+
context?: Record<string, unknown>,
|
|
96
|
+
): boolean | Promise<boolean>
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Same as `RunWithToolsOptions` but with `shouldSuspend` required.
|
|
101
|
+
* Used to narrow the return type of `runWithTools` overloads — when
|
|
102
|
+
* apps opt in to the human-in-the-loop gate, the result widens to
|
|
103
|
+
* `AgentResult | SuspendedRun`; otherwise it's just `AgentResult`.
|
|
104
|
+
*/
|
|
105
|
+
export type RunWithToolsOptionsWithSuspend = RunWithToolsOptions & {
|
|
106
|
+
shouldSuspend: NonNullable<RunWithToolsOptions['shouldSuspend']>
|
|
72
107
|
}
|
|
73
108
|
|
|
74
109
|
export interface Provider {
|
|
@@ -113,7 +148,7 @@ export interface Provider {
|
|
|
113
148
|
messages: readonly Message[],
|
|
114
149
|
tools: readonly Tool[],
|
|
115
150
|
options?: RunWithToolsOptions,
|
|
116
|
-
): Promise<AgentResult>
|
|
151
|
+
): Promise<AgentResult | SuspendedRun>
|
|
117
152
|
|
|
118
153
|
/**
|
|
119
154
|
* Structured output. Sends `messages` to the model with a
|
|
@@ -28,12 +28,18 @@ import type { AgentResult } from '../agent_result.ts'
|
|
|
28
28
|
import type { AnthropicProviderConfig } from '../brain_config.ts'
|
|
29
29
|
import { DEFAULT_MODEL } from '../brain_config.ts'
|
|
30
30
|
import { BrainError } from '../brain_error.ts'
|
|
31
|
-
import type {
|
|
31
|
+
import type {
|
|
32
|
+
Provider,
|
|
33
|
+
RunWithToolsOptions,
|
|
34
|
+
RunWithToolsOptionsWithSuspend,
|
|
35
|
+
} from '../provider.ts'
|
|
36
|
+
import type { SuspendedRun } from '../suspended_run.ts'
|
|
32
37
|
import type { Tool } from '../tool.ts'
|
|
33
38
|
import type {
|
|
34
39
|
ChatOptions,
|
|
35
40
|
ChatResult,
|
|
36
41
|
ChatUsage,
|
|
42
|
+
CompactionBlock,
|
|
37
43
|
ContentBlock,
|
|
38
44
|
GenerateResult,
|
|
39
45
|
MCPToolResultBlock,
|
|
@@ -82,7 +88,13 @@ export class AnthropicProvider implements Provider {
|
|
|
82
88
|
|
|
83
89
|
async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
|
|
84
90
|
const params = this.buildParams(messages, options)
|
|
85
|
-
const
|
|
91
|
+
const useBeta = needsBetaRouting(params)
|
|
92
|
+
const response = useBeta
|
|
93
|
+
? ((await this.client.beta.messages.create(
|
|
94
|
+
params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
|
|
95
|
+
reqOpts(options),
|
|
96
|
+
)) as unknown as Anthropic.Message)
|
|
97
|
+
: await this.client.messages.create(params, reqOpts(options))
|
|
86
98
|
return this.toChatResult(response)
|
|
87
99
|
}
|
|
88
100
|
|
|
@@ -91,7 +103,12 @@ export class AnthropicProvider implements Provider {
|
|
|
91
103
|
options: ChatOptions = {},
|
|
92
104
|
): AsyncIterable<StreamEvent> {
|
|
93
105
|
const params = this.buildParams(messages, options)
|
|
94
|
-
const stream =
|
|
106
|
+
const stream = needsBetaRouting(params)
|
|
107
|
+
? this.client.beta.messages.stream(
|
|
108
|
+
params as unknown as Anthropic.Beta.Messages.MessageCreateParamsStreaming,
|
|
109
|
+
reqOpts(options),
|
|
110
|
+
)
|
|
111
|
+
: this.client.messages.stream(params, reqOpts(options))
|
|
95
112
|
for await (const event of stream) {
|
|
96
113
|
if (
|
|
97
114
|
event.type === 'content_block_delta' &&
|
|
@@ -137,11 +154,21 @@ export class AnthropicProvider implements Provider {
|
|
|
137
154
|
* `tools` array each turn. Apps that care about cache hits keep
|
|
138
155
|
* the tool list stable across runs.
|
|
139
156
|
*/
|
|
157
|
+
runWithTools(
|
|
158
|
+
messages: readonly Message[],
|
|
159
|
+
tools: readonly Tool[],
|
|
160
|
+
options: RunWithToolsOptionsWithSuspend,
|
|
161
|
+
): Promise<AgentResult | SuspendedRun>
|
|
162
|
+
runWithTools(
|
|
163
|
+
messages: readonly Message[],
|
|
164
|
+
tools: readonly Tool[],
|
|
165
|
+
options?: RunWithToolsOptions,
|
|
166
|
+
): Promise<AgentResult>
|
|
140
167
|
async runWithTools(
|
|
141
168
|
messages: readonly Message[],
|
|
142
169
|
tools: readonly Tool[],
|
|
143
170
|
options: RunWithToolsOptions = {},
|
|
144
|
-
): Promise<AgentResult> {
|
|
171
|
+
): Promise<AgentResult | SuspendedRun> {
|
|
145
172
|
const maxIterations = options.maxIterations ?? 10
|
|
146
173
|
const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
|
|
147
174
|
const workingMessages: Message[] = [...messages]
|
|
@@ -186,7 +213,6 @@ export class AnthropicProvider implements Provider {
|
|
|
186
213
|
|
|
187
214
|
// Declare MCP servers + flip to the beta surface when in use.
|
|
188
215
|
// Anthropic's MCP connector requires `mcp-client-2025-11-20`.
|
|
189
|
-
let response: Anthropic.Message
|
|
190
216
|
if (useMcpBeta) {
|
|
191
217
|
params.mcp_servers = mcpServers.map((s) => {
|
|
192
218
|
const def: Anthropic.Beta.Messages.BetaRequestMCPServerURLDefinition = {
|
|
@@ -201,13 +227,15 @@ export class AnthropicProvider implements Provider {
|
|
|
201
227
|
;(params as { betas?: string[] }).betas = baseBetas.includes('mcp-client-2025-11-20')
|
|
202
228
|
? [...baseBetas]
|
|
203
229
|
: [...baseBetas, 'mcp-client-2025-11-20']
|
|
204
|
-
response = (await this.client.beta.messages.create(
|
|
205
|
-
params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
|
|
206
|
-
reqOpts(options),
|
|
207
|
-
)) as unknown as Anthropic.Message
|
|
208
|
-
} else {
|
|
209
|
-
response = await this.client.messages.create(params, reqOpts(options))
|
|
210
230
|
}
|
|
231
|
+
// Route via beta when either MCP servers OR compaction are in
|
|
232
|
+
// play — both live on the beta surface.
|
|
233
|
+
const response: Anthropic.Message = needsBetaRouting(params)
|
|
234
|
+
? ((await this.client.beta.messages.create(
|
|
235
|
+
params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
|
|
236
|
+
reqOpts(options),
|
|
237
|
+
)) as unknown as Anthropic.Message)
|
|
238
|
+
: await this.client.messages.create(params, reqOpts(options))
|
|
211
239
|
addUsage(aggregated, response.usage)
|
|
212
240
|
lastStopReason = response.stop_reason ?? null
|
|
213
241
|
|
|
@@ -236,7 +264,28 @@ export class AnthropicProvider implements Provider {
|
|
|
236
264
|
(b): b is Anthropic.ToolUseBlock => b.type === 'tool_use',
|
|
237
265
|
)
|
|
238
266
|
const resultBlocks: ContentBlock[] = []
|
|
239
|
-
for (
|
|
267
|
+
for (let i = 0; i < toolUseBlocks.length; i++) {
|
|
268
|
+
const block = toolUseBlocks[i]!
|
|
269
|
+
if (options.shouldSuspend) {
|
|
270
|
+
const frameworkCall: ToolUseBlock = {
|
|
271
|
+
type: 'tool_use',
|
|
272
|
+
id: block.id,
|
|
273
|
+
name: block.name,
|
|
274
|
+
input: block.input as Record<string, unknown>,
|
|
275
|
+
}
|
|
276
|
+
if (await options.shouldSuspend(frameworkCall, options.context)) {
|
|
277
|
+
return {
|
|
278
|
+
status: 'suspended',
|
|
279
|
+
pendingToolCalls: toolUseBlocks.slice(i).map((b) => ({
|
|
280
|
+
type: 'tool_use',
|
|
281
|
+
id: b.id,
|
|
282
|
+
name: b.name,
|
|
283
|
+
input: b.input as Record<string, unknown>,
|
|
284
|
+
})),
|
|
285
|
+
state: { messages: workingMessages, iterations, usage: aggregated },
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
240
289
|
const { content, isError } = await runToolWithRecovery(
|
|
241
290
|
toolMap.get(block.name),
|
|
242
291
|
block.name,
|
|
@@ -314,7 +363,6 @@ export class AnthropicProvider implements Provider {
|
|
|
314
363
|
format: { type: 'json_schema', schema: schema.jsonSchema },
|
|
315
364
|
}
|
|
316
365
|
|
|
317
|
-
let response: Anthropic.Message
|
|
318
366
|
if (useMcpBeta) {
|
|
319
367
|
params.mcp_servers = mcpServers.map((s) => {
|
|
320
368
|
const def: Anthropic.Beta.Messages.BetaRequestMCPServerURLDefinition = {
|
|
@@ -329,13 +377,13 @@ export class AnthropicProvider implements Provider {
|
|
|
329
377
|
;(params as { betas?: string[] }).betas = baseBetas.includes('mcp-client-2025-11-20')
|
|
330
378
|
? [...baseBetas]
|
|
331
379
|
: [...baseBetas, 'mcp-client-2025-11-20']
|
|
332
|
-
response = (await this.client.beta.messages.create(
|
|
333
|
-
params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
|
|
334
|
-
reqOpts(options),
|
|
335
|
-
)) as unknown as Anthropic.Message
|
|
336
|
-
} else {
|
|
337
|
-
response = await this.client.messages.create(params, reqOpts(options))
|
|
338
380
|
}
|
|
381
|
+
const response: Anthropic.Message = needsBetaRouting(params)
|
|
382
|
+
? ((await this.client.beta.messages.create(
|
|
383
|
+
params as unknown as Anthropic.Beta.Messages.MessageCreateParamsNonStreaming,
|
|
384
|
+
reqOpts(options),
|
|
385
|
+
)) as unknown as Anthropic.Message)
|
|
386
|
+
: await this.client.messages.create(params, reqOpts(options))
|
|
339
387
|
addUsage(aggregated, response.usage)
|
|
340
388
|
lastStopReason = response.stop_reason ?? null
|
|
341
389
|
|
|
@@ -454,7 +502,7 @@ export class AnthropicProvider implements Provider {
|
|
|
454
502
|
: [...baseBetas, 'mcp-client-2025-11-20']
|
|
455
503
|
}
|
|
456
504
|
|
|
457
|
-
const stream =
|
|
505
|
+
const stream = needsBetaRouting(params)
|
|
458
506
|
? this.client.beta.messages.stream(
|
|
459
507
|
params as unknown as Anthropic.Beta.Messages.MessageCreateParamsStreaming,
|
|
460
508
|
reqOpts(options),
|
|
@@ -619,7 +667,7 @@ export class AnthropicProvider implements Provider {
|
|
|
619
667
|
: [...baseBetas, 'mcp-client-2025-11-20']
|
|
620
668
|
}
|
|
621
669
|
|
|
622
|
-
const stream =
|
|
670
|
+
const stream = needsBetaRouting(params)
|
|
623
671
|
? this.client.beta.messages.stream(
|
|
624
672
|
params as unknown as Anthropic.Beta.Messages.MessageCreateParamsStreaming,
|
|
625
673
|
reqOpts(options),
|
|
@@ -782,7 +830,26 @@ export class AnthropicProvider implements Provider {
|
|
|
782
830
|
;(params as { cache_control?: { type: 'ephemeral' } }).cache_control = EPHEMERAL_CACHE
|
|
783
831
|
}
|
|
784
832
|
|
|
785
|
-
|
|
833
|
+
// Compaction — emits the beta `edits` entry + flips the
|
|
834
|
+
// `compact-2026-01-12` beta header so the request goes through
|
|
835
|
+
// the SDK's beta surface (same routing as MCP).
|
|
836
|
+
const baseBetas = mergeBetas(this.betas, options.betas)
|
|
837
|
+
const betas = options.compact !== undefined
|
|
838
|
+
? mergeBetas(baseBetas, [COMPACT_BETA])
|
|
839
|
+
: baseBetas
|
|
840
|
+
if (options.compact !== undefined) {
|
|
841
|
+
const edit: Record<string, unknown> = { type: COMPACT_EDIT_TYPE }
|
|
842
|
+
if (options.compact.trigger !== undefined) {
|
|
843
|
+
edit.trigger = { type: 'input_tokens', value: options.compact.trigger }
|
|
844
|
+
}
|
|
845
|
+
if (options.compact.instructions !== undefined) {
|
|
846
|
+
edit.instructions = options.compact.instructions
|
|
847
|
+
}
|
|
848
|
+
if (options.compact.pauseAfterCompaction !== undefined) {
|
|
849
|
+
edit.pause_after_compaction = options.compact.pauseAfterCompaction
|
|
850
|
+
}
|
|
851
|
+
;(params as { edits?: unknown[] }).edits = [edit]
|
|
852
|
+
}
|
|
786
853
|
if (betas.length > 0) {
|
|
787
854
|
;(params as { betas?: readonly string[] }).betas = betas
|
|
788
855
|
}
|
|
@@ -799,18 +866,48 @@ export class AnthropicProvider implements Provider {
|
|
|
799
866
|
.filter((b): b is Anthropic.TextBlock => b.type === 'text')
|
|
800
867
|
.map((b) => b.text)
|
|
801
868
|
.join('')
|
|
802
|
-
|
|
869
|
+
const result: ChatResult<Anthropic.Message> = {
|
|
803
870
|
text,
|
|
804
871
|
model: message.model,
|
|
805
872
|
stopReason: message.stop_reason,
|
|
806
873
|
usage: toUsage(message.usage),
|
|
807
874
|
raw: message,
|
|
808
875
|
}
|
|
876
|
+
// Surface structured content when the turn carries blocks
|
|
877
|
+
// beyond plain text (compaction today; reasoning blocks in a
|
|
878
|
+
// future slice). Apps that persist conversations push this
|
|
879
|
+
// onto the message history so round-trippable blocks survive
|
|
880
|
+
// subsequent requests.
|
|
881
|
+
const blocks = fromAnthropicContent(message.content)
|
|
882
|
+
if (blocks.some((b) => b.type !== 'text')) {
|
|
883
|
+
result.content = blocks
|
|
884
|
+
}
|
|
885
|
+
return result
|
|
809
886
|
}
|
|
810
887
|
}
|
|
811
888
|
|
|
812
889
|
// ─── Shape converters ─────────────────────────────────────────────────────
|
|
813
890
|
|
|
891
|
+
/** Compaction beta — required header + `edits[].type` for `compact-2026-01-12`. */
|
|
892
|
+
const COMPACT_BETA = 'compact-2026-01-12'
|
|
893
|
+
const COMPACT_EDIT_TYPE = 'compact_20260112'
|
|
894
|
+
|
|
895
|
+
/**
|
|
896
|
+
* Whether the request needs to flow through `client.beta.messages.create`
|
|
897
|
+
* instead of the stable surface. Triggered by:
|
|
898
|
+
*
|
|
899
|
+
* - `edits[]` (compaction).
|
|
900
|
+
* - `mcp_servers[]` (server-side MCP).
|
|
901
|
+
*
|
|
902
|
+
* Tests typically stub `client.messages.create`; the beta path uses the
|
|
903
|
+
* stub that lives at `client.beta.messages.create`.
|
|
904
|
+
*/
|
|
905
|
+
function needsBetaRouting(params: Anthropic.MessageCreateParamsNonStreaming): boolean {
|
|
906
|
+
const p = params as { edits?: unknown[]; mcp_servers?: unknown[] }
|
|
907
|
+
return (p.edits !== undefined && p.edits.length > 0)
|
|
908
|
+
|| (p.mcp_servers !== undefined && p.mcp_servers.length > 0)
|
|
909
|
+
}
|
|
910
|
+
|
|
814
911
|
/** Build the request-options bag forwarded to the SDK. Only `signal` for now. */
|
|
815
912
|
function reqOpts(options: { signal?: AbortSignal }): { signal?: AbortSignal } | undefined {
|
|
816
913
|
return options.signal !== undefined ? { signal: options.signal } : undefined
|
|
@@ -905,6 +1002,19 @@ function toMessageParam(message: Message): Anthropic.MessageParam {
|
|
|
905
1002
|
{ context: { provider: 'anthropic' } },
|
|
906
1003
|
)
|
|
907
1004
|
}
|
|
1005
|
+
if (block.type === 'compaction') {
|
|
1006
|
+
// Round-trip the compaction block verbatim — the server uses
|
|
1007
|
+
// the opaque `encrypted_content` to stitch prior compactions
|
|
1008
|
+
// together; mutating either field would invalidate the
|
|
1009
|
+
// history. Untyped on the stable SDK surface; cast through
|
|
1010
|
+
// the beta type shape.
|
|
1011
|
+
const param: Record<string, unknown> = { type: 'compaction' }
|
|
1012
|
+
if (block.content !== null) param.content = block.content
|
|
1013
|
+
if (block.encryptedContent !== null) {
|
|
1014
|
+
param.encrypted_content = block.encryptedContent
|
|
1015
|
+
}
|
|
1016
|
+
return param as unknown as Anthropic.ContentBlockParam
|
|
1017
|
+
}
|
|
908
1018
|
const text: Anthropic.TextBlockParam = { type: 'text', text: block.text }
|
|
909
1019
|
if (block.cache) text.cache_control = EPHEMERAL_CACHE
|
|
910
1020
|
return text
|
|
@@ -1071,6 +1181,13 @@ function fromAnthropicContent(
|
|
|
1071
1181
|
}
|
|
1072
1182
|
if (r.is_error) result.isError = true
|
|
1073
1183
|
out.push(result)
|
|
1184
|
+
} else if (block.type === 'compaction') {
|
|
1185
|
+
const c = block as { content?: string | null; encrypted_content?: string | null }
|
|
1186
|
+
out.push({
|
|
1187
|
+
type: 'compaction',
|
|
1188
|
+
content: c.content ?? null,
|
|
1189
|
+
encryptedContent: c.encrypted_content ?? null,
|
|
1190
|
+
} satisfies CompactionBlock)
|
|
1074
1191
|
}
|
|
1075
1192
|
}
|
|
1076
1193
|
return out
|