@strav/brain 1.0.0-alpha.9 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/package.json +23 -7
  2. package/src/agent.ts +43 -5
  3. package/src/agent_generate_result.ts +32 -0
  4. package/src/agent_result.ts +7 -0
  5. package/src/agent_runner.ts +218 -14
  6. package/src/agent_stream_event.ts +100 -0
  7. package/src/brain_config.ts +218 -1
  8. package/src/brain_driver.ts +247 -0
  9. package/src/brain_error.ts +86 -10
  10. package/src/brain_manager.ts +359 -11
  11. package/src/brain_provider.ts +79 -9
  12. package/src/drivers/anthropic/anthropic_brain_driver.ts +641 -0
  13. package/src/drivers/anthropic/anthropic_helpers.ts +65 -0
  14. package/src/drivers/anthropic/anthropic_message_builder.ts +258 -0
  15. package/src/drivers/anthropic/anthropic_response_mapper.ts +123 -0
  16. package/src/drivers/anthropic/anthropic_tool_loop.ts +246 -0
  17. package/src/drivers/anthropic/index.ts +1 -0
  18. package/src/drivers/deepseek/deepseek_brain_driver.ts +117 -0
  19. package/src/drivers/deepseek/index.ts +1 -0
  20. package/src/drivers/gemini/gemini_brain_driver.ts +1064 -0
  21. package/src/drivers/gemini/index.ts +1 -0
  22. package/src/drivers/minimax/index.ts +1 -0
  23. package/src/drivers/minimax/minimax_brain_driver.ts +84 -0
  24. package/src/drivers/ollama/index.ts +1 -0
  25. package/src/drivers/ollama/ollama_brain_driver.ts +86 -0
  26. package/src/drivers/openai/index.ts +1 -0
  27. package/src/drivers/openai/openai_brain_driver.ts +796 -0
  28. package/src/drivers/openai/openai_helpers.ts +58 -0
  29. package/src/drivers/openai/openai_message_builder.ts +187 -0
  30. package/src/drivers/openai/openai_response_mapper.ts +70 -0
  31. package/src/drivers/openai/openai_tool_dispatch.ts +127 -0
  32. package/src/drivers/openai/openai_tool_loop.ts +191 -0
  33. package/src/drivers/openai_compat/index.ts +1 -0
  34. package/src/drivers/openai_compat/openai_compat_brain_driver.ts +616 -0
  35. package/src/drivers/openai_responses/index.ts +1 -0
  36. package/src/drivers/openai_responses/openai_responses_brain_driver.ts +1015 -0
  37. package/src/drivers/openrouter/index.ts +1 -0
  38. package/src/drivers/openrouter/openrouter_brain_driver.ts +137 -0
  39. package/src/drivers/qwen/index.ts +1 -0
  40. package/src/drivers/qwen/qwen_brain_driver.ts +103 -0
  41. package/src/index.ts +75 -11
  42. package/src/mcp/client.ts +243 -0
  43. package/src/mcp/index.ts +23 -0
  44. package/src/mcp/oauth.ts +227 -0
  45. package/src/mcp/pool.ts +106 -0
  46. package/src/mcp/resolve_mcp_tools.ts +108 -0
  47. package/src/mcp_server.ts +63 -0
  48. package/src/output_schema.ts +72 -0
  49. package/src/persistence/brain_message.ts +34 -0
  50. package/src/persistence/brain_message_repository.ts +98 -0
  51. package/src/persistence/brain_store.ts +166 -0
  52. package/src/persistence/brain_suspended_run.ts +30 -0
  53. package/src/persistence/brain_suspended_run_repository.ts +59 -0
  54. package/src/persistence/brain_thread.ts +30 -0
  55. package/src/persistence/brain_thread_repository.ts +56 -0
  56. package/src/persistence/database_brain_store.ts +190 -0
  57. package/src/persistence/index.ts +48 -0
  58. package/src/persistence/schemas/brain_message_schema.ts +61 -0
  59. package/src/persistence/schemas/brain_suspended_run_schema.ts +58 -0
  60. package/src/persistence/schemas/brain_thread_schema.ts +50 -0
  61. package/src/persistence/schemas/index.ts +3 -0
  62. package/src/suspended_run.ts +153 -0
  63. package/src/thread.ts +40 -1
  64. package/src/tool.ts +7 -0
  65. package/src/tool_runner.ts +81 -0
  66. package/src/translate/index.ts +19 -0
  67. package/src/translate/translate_cache.ts +78 -0
  68. package/src/translate/translate_provider.ts +46 -0
  69. package/src/translate/translator.ts +271 -0
  70. package/src/types.ts +398 -1
  71. package/src/zod/index.ts +121 -0
  72. package/src/provider.ts +0 -74
  73. package/src/providers/anthropic_provider.ts +0 -397
@@ -0,0 +1,271 @@
1
+ /**
2
+ * `Translator` — LLM-backed translation primitive on top of
3
+ * `BrainManager`. Sonnet-uniform by default (`tier: 'balanced'`),
4
+ * which routes to `claude-sonnet-4-6` on the Anthropic driver — apps
5
+ * override with `options.model` or `options.provider` per call.
6
+ *
7
+ * Two entry points:
8
+ *
9
+ * - `translate(text, { to: [...] })` — fan-out one string into
10
+ * every target language in parallel. Returns
11
+ * `{ [langCode]: translated }`.
12
+ *
13
+ * - `translateBatch(fields, { to: [...] })` — translate a
14
+ * fixed-shape object (`{ title, body }`) into every target
15
+ * language. Each target language runs in parallel; within a
16
+ * language, all fields land in one model call so the model
17
+ * keeps shared context (a `title` and `body` translated
18
+ * together stay tonally consistent).
19
+ *
20
+ * Cross-cutting:
21
+ *
22
+ * - **Structured output.** Uses `brain.generate(input, schema)`
23
+ * with a JSON Schema that locks the response to the expected
24
+ * keys, so models never sneak in commentary or transliterations.
25
+ *
26
+ * - **Prompt caching.** The system prompt is identical across
27
+ * every call (per-language hints ride in the user message), so
28
+ * Anthropic prompt caching kicks in once the cache window warms.
29
+ * Set `cache: false` on the constructor to opt out.
30
+ *
31
+ * - **In-memory cache.** Identical `(model, from, to, text)`
32
+ * tuples are served from a process-local LRU (default 1000
33
+ * entries) — see `TranslateCache`. Pass `cacheSize: 0` to
34
+ * disable.
35
+ *
36
+ * - **Source language auto-detect.** Omit `from` and the user
37
+ * message tells the model to detect the source. Apps that know
38
+ * the source pass it explicitly for marginal quality + token
39
+ * savings.
40
+ */
41
+
42
+ import type { BrainManager } from '../brain_manager.ts'
43
+ import type { OutputSchema } from '../output_schema.ts'
44
+ import type { ChatOptions, ModelTier } from '../types.ts'
45
+ import { cacheKey, TranslateCache } from './translate_cache.ts'
46
+
47
+ export interface TranslatorOptions {
48
+ brain: BrainManager
49
+ /** Brain provider name. Defaults to the configured `brain.default`. */
50
+ provider?: string
51
+ /** Brain tier sugar — overridden by `model`. Default `'balanced'` (Sonnet on Anthropic per ADR-0004-style routing). */
52
+ tier?: ModelTier
53
+ /** Explicit model id. Wins over `tier`. */
54
+ model?: string
55
+ /** Override the system prompt. Apps localising the prompt itself reach for this. */
56
+ systemPrompt?: string
57
+ /** LRU capacity for the translation cache. `0` disables. Default `1000`. */
58
+ cacheSize?: number
59
+ /** Enable Anthropic prompt caching on the system prompt. Default `true`. Non-Anthropic providers ignore. */
60
+ cache?: boolean
61
+ }
62
+
63
+ export interface TranslateOptions {
64
+ /** Target BCP-47 language codes (`'th'`, `'zh-Hant'`, `'ja'`). */
65
+ to: readonly string[]
66
+ /** Source BCP-47 code. Omit to ask the model to detect. */
67
+ from?: string
68
+ /** Per-call model override (wins over the constructor's tier/model). */
69
+ model?: string
70
+ /** Per-call provider override. */
71
+ provider?: string
72
+ /** Cancellation signal — forwarded to every parallel `brain.generate` call. */
73
+ signal?: AbortSignal
74
+ }
75
+
76
+ export type BatchTranslateOptions = TranslateOptions
77
+
78
+ /**
79
+ * Default system prompt — kept stable across every call so prompt
80
+ * caching can warm. Per-call specifics (source/target language,
81
+ * text, field shape) ride in the user message.
82
+ */
83
+ export const DEFAULT_SYSTEM_PROMPT = `You are a translation engine.
84
+
85
+ The user supplies (a) a source-language code (or "auto"), (b) a target BCP-47 language code, and (c) the source text or a JSON object of named source fields. Translate the source into the target language and output ONLY the translation in the required JSON shape.
86
+
87
+ Rules:
88
+ - Output ONLY the translated text in the requested JSON shape. Do not add explanations, notes, alternatives, or transliterations.
89
+ - Preserve Markdown, HTML tags, links, mentions, hashtags, code spans, and emoji exactly as in the source.
90
+ - Keep numbers, dates, currency symbols, and proper nouns recognisable in the target locale; do not invent translations for brand names.
91
+ - If the source is already in the target language, output it unchanged.
92
+ - For batch translations, every requested field must appear in the output — never drop a field.`
93
+
94
+ export class Translator {
95
+ private readonly brain: BrainManager
96
+ private readonly provider: string | undefined
97
+ private readonly tier: ModelTier
98
+ private readonly explicitModel: string | undefined
99
+ private readonly systemPrompt: string
100
+ private readonly cache: TranslateCache
101
+ private readonly promptCache: boolean
102
+
103
+ constructor(options: TranslatorOptions) {
104
+ this.brain = options.brain
105
+ this.provider = options.provider
106
+ this.tier = options.tier ?? 'balanced'
107
+ this.explicitModel = options.model
108
+ this.systemPrompt = options.systemPrompt ?? DEFAULT_SYSTEM_PROMPT
109
+ this.cache = new TranslateCache(options.cacheSize ?? 1000)
110
+ this.promptCache = options.cache ?? true
111
+ }
112
+
113
+ /**
114
+ * Translate one string into every target language in parallel.
115
+ * Returns a `{ [lang]: translated }` map containing one entry per
116
+ * code in `options.to`. Calls fan out concurrently; a single
117
+ * thrown call rejects the whole `Promise.all`.
118
+ */
119
+ async translate(
120
+ text: string,
121
+ options: TranslateOptions,
122
+ ): Promise<Record<string, string>> {
123
+ if (options.to.length === 0) return {}
124
+
125
+ const results = await Promise.all(
126
+ options.to.map(async (lang) => {
127
+ const translated = await this.translateOne(text, lang, options)
128
+ return [lang, translated] as const
129
+ }),
130
+ )
131
+ return Object.fromEntries(results)
132
+ }
133
+
134
+ /**
135
+ * Translate a fixed-shape object of fields into every target
136
+ * language. Each target language runs in parallel; within a
137
+ * language, all fields are translated in one model call so context
138
+ * is shared.
139
+ *
140
+ * Returns `{ [lang]: { ...fields } }`. The shape of every per-
141
+ * language object matches the input keys exactly — missing keys
142
+ * are treated as a hard error (the model is instructed to never
143
+ * drop a field) and surface as a `BrainError` from `generate`'s
144
+ * schema parser.
145
+ */
146
+ async translateBatch<T extends Record<string, string>>(
147
+ fields: T,
148
+ options: BatchTranslateOptions,
149
+ ): Promise<Record<string, T>> {
150
+ if (options.to.length === 0) return {}
151
+ const fieldNames = Object.keys(fields) as Array<keyof T & string>
152
+ if (fieldNames.length === 0) return Object.fromEntries(options.to.map((l) => [l, {} as T]))
153
+
154
+ const results = await Promise.all(
155
+ options.to.map(async (lang) => {
156
+ const translated = await this.translateBatchOne(fields, fieldNames, lang, options)
157
+ return [lang, translated] as const
158
+ }),
159
+ )
160
+ return Object.fromEntries(results)
161
+ }
162
+
163
+ /** Drop the in-memory LRU. Useful in tests to keep cases isolated. */
164
+ clearCache(): void {
165
+ this.cache.clear()
166
+ }
167
+
168
+ // ─── internals ──────────────────────────────────────────────────────
169
+
170
+ private resolvedModel(per: TranslateOptions): string {
171
+ return per.model ?? this.explicitModel ?? this.tier
172
+ }
173
+
174
+ private buildChatOptions(per: TranslateOptions): ChatOptions {
175
+ const opts: ChatOptions = {
176
+ system: this.promptCache
177
+ ? { text: this.systemPrompt, cache: true }
178
+ : this.systemPrompt,
179
+ }
180
+ if (per.model) opts.model = per.model
181
+ else if (this.explicitModel) opts.model = this.explicitModel
182
+ else opts.tier = this.tier
183
+ if (per.provider ?? this.provider) opts.provider = (per.provider ?? this.provider)!
184
+ if (per.signal) opts.signal = per.signal
185
+ return opts
186
+ }
187
+
188
+ private async translateOne(
189
+ text: string,
190
+ lang: string,
191
+ per: TranslateOptions,
192
+ ): Promise<string> {
193
+ const model = this.resolvedModel(per)
194
+ const key = cacheKey({ model, from: per.from, to: lang, text })
195
+ const hit = this.cache.get(key)
196
+ if (hit !== undefined) return hit
197
+
198
+ const schema: OutputSchema<{ translation: string }> = {
199
+ name: 'translation',
200
+ description: `Translation of the source text into ${lang}.`,
201
+ jsonSchema: {
202
+ type: 'object',
203
+ properties: { translation: { type: 'string' } },
204
+ required: ['translation'],
205
+ additionalProperties: false,
206
+ },
207
+ }
208
+
209
+ const userMessage = `SOURCE_LANGUAGE: ${per.from ?? 'auto'}\nTARGET_LANGUAGE: ${lang}\nTEXT:\n${text}`
210
+
211
+ const result = await this.brain.generate(userMessage, schema, this.buildChatOptions(per))
212
+ const translated = result.value.translation
213
+ this.cache.set(key, translated)
214
+ return translated
215
+ }
216
+
217
+ private async translateBatchOne<T extends Record<string, string>>(
218
+ fields: T,
219
+ fieldNames: readonly (keyof T & string)[],
220
+ lang: string,
221
+ per: BatchTranslateOptions,
222
+ ): Promise<T> {
223
+ const model = this.resolvedModel(per)
224
+
225
+ // Per-field cache: check every field; only call the model when at
226
+ // least one field is missing. The single model call still covers
227
+ // all fields (we don't sub-call per missing field — the context
228
+ // gain from a single call outweighs the extra translation work).
229
+ const fromCache: Partial<Record<string, string>> = {}
230
+ let allHit = true
231
+ for (const name of fieldNames) {
232
+ const hit = this.cache.get(
233
+ cacheKey({ model, from: per.from, to: lang, text: fields[name]! }),
234
+ )
235
+ if (hit === undefined) {
236
+ allHit = false
237
+ } else {
238
+ fromCache[name] = hit
239
+ }
240
+ }
241
+ if (allHit) return fromCache as T
242
+
243
+ const properties: Record<string, unknown> = {}
244
+ for (const name of fieldNames) properties[name] = { type: 'string' }
245
+ const schema: OutputSchema<T> = {
246
+ name: 'batch_translation',
247
+ description: `Translation of every named field into ${lang}.`,
248
+ jsonSchema: {
249
+ type: 'object',
250
+ properties,
251
+ required: [...fieldNames],
252
+ additionalProperties: false,
253
+ },
254
+ }
255
+
256
+ const fieldsBlock = fieldNames
257
+ .map((n) => `- ${n}: ${JSON.stringify(fields[n]!)}`)
258
+ .join('\n')
259
+ const userMessage = `SOURCE_LANGUAGE: ${per.from ?? 'auto'}\nTARGET_LANGUAGE: ${lang}\nFIELDS:\n${fieldsBlock}\n\nOutput a JSON object with these exact keys: ${fieldNames.join(', ')}.`
260
+
261
+ const result = await this.brain.generate(userMessage, schema, this.buildChatOptions(per))
262
+ const translated = result.value
263
+ for (const name of fieldNames) {
264
+ this.cache.set(
265
+ cacheKey({ model, from: per.from, to: lang, text: fields[name]! }),
266
+ translated[name]!,
267
+ )
268
+ }
269
+ return translated
270
+ }
271
+ }
package/src/types.ts CHANGED
@@ -71,7 +71,147 @@ export interface ToolResultBlock {
71
71
  isError?: boolean
72
72
  }
73
73
 
74
- export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock
74
+ /**
75
+ * Provider-emitted MCP tool-use block. Read-only — apps don't construct
76
+ * these; they appear in `assistant`-role messages when the model calls
77
+ * a tool exposed by a configured MCP server. Anthropic's backend
78
+ * invokes the MCP server itself and inlines the result as an
79
+ * `MCPToolResultBlock` in the same response, so the framework's
80
+ * agentic loop doesn't need to handle the call.
81
+ *
82
+ * Apps render these for observability (showing users that the model
83
+ * consulted Linear / Notion / GitHub via MCP) and for audit trails.
84
+ */
85
+ export interface MCPToolUseBlock {
86
+ type: 'mcp_tool_use'
87
+ id: string
88
+ /** MCP server identifier — matches `MCPServer.name`. */
89
+ serverName: string
90
+ /** Tool name as exposed by the MCP server. */
91
+ name: string
92
+ /** Parsed input the model passed to the MCP tool. */
93
+ input: unknown
94
+ }
95
+
96
+ /**
97
+ * Provider-emitted MCP tool result. Pairs with `MCPToolUseBlock` by
98
+ * `toolUseId`. `content` is either a string or text blocks; `isError`
99
+ * is `true` when the MCP server returned an error.
100
+ */
101
+ export interface MCPToolResultBlock {
102
+ type: 'mcp_tool_result'
103
+ toolUseId: string
104
+ content: string | TextBlock[]
105
+ isError?: boolean
106
+ }
107
+
108
+ /**
109
+ * Image input — attaches a picture to a user message so vision-
110
+ * capable models can see it alongside the text. V1 covers images
111
+ * only; audio + video defer.
112
+ *
113
+ * `source` is a discriminated union:
114
+ * - `{ type: 'base64', mediaType, data }` — inline bytes for
115
+ * uploads, screenshots, attachments your app already holds in
116
+ * memory. `mediaType` is the IANA MIME (`image/png`,
117
+ * `image/jpeg`, `image/webp`, `image/gif`); `data` is the
118
+ * base64-encoded image (no `data:` prefix — the provider
119
+ * translation adds it where needed).
120
+ * - `{ type: 'url', url }` — remote image URL. Anthropic, OpenAI,
121
+ * and Gemini all accept HTTPS URLs; check the provider's
122
+ * domain allowlist if calls 404 (Anthropic was historically
123
+ * stricter). For Gemini, GCS URIs (`gs://...`) also work.
124
+ *
125
+ * Vision support is provider- AND model-dependent. Cloud picks:
126
+ * Anthropic Claude 4 family, OpenAI gpt-4o / gpt-5 family, Gemini
127
+ * 2.x. Local: `llama3.2-vision`, `llava`, `qwen2.5-vl` on Ollama.
128
+ * Models without vision either reject the call or ignore the image.
129
+ */
130
+ export interface ImageBlock {
131
+ type: 'image'
132
+ source:
133
+ | { type: 'base64'; mediaType: string; data: string }
134
+ | { type: 'url'; url: string }
135
+ }
136
+
137
+ /**
138
+ * Document input — attaches a PDF (V1 only — the providers that
139
+ * support documents currently all gate on `application/pdf`) to a
140
+ * user message. Anthropic surfaces it as a native `document` block;
141
+ * Gemini accepts it via `inlineData` / `fileData` with
142
+ * `application/pdf` mime; OpenAI / Ollama / DeepSeek don't support
143
+ * PDF blocks at all (apps split the PDF to images and use
144
+ * `ImageBlock`s for those vendors).
145
+ *
146
+ * The optional `title` is shown to the model on Anthropic (helpful
147
+ * for multi-document calls — "the contract", "the invoice"); other
148
+ * providers ignore it.
149
+ */
150
+ export interface DocumentBlock {
151
+ type: 'document'
152
+ source:
153
+ | { type: 'base64'; mediaType: string; data: string }
154
+ | { type: 'url'; url: string }
155
+ /** Optional title shown to the model (Anthropic uses it; others ignore). */
156
+ title?: string
157
+ }
158
+
159
+ /**
160
+ * Audio input — attaches a sound clip to a user message. V1
161
+ * coverage: Gemini supports audio natively via `inlineData` with
162
+ * audio MIMEs (`audio/mp3`, `audio/wav`, `audio/ogg`, `audio/flac`,
163
+ * `audio/webm`, `audio/aac`). Anthropic + OpenAI + Ollama don't
164
+ * accept audio in their chat APIs — OpenAI apps preprocess via
165
+ * Whisper; Anthropic apps wait for the audio block to land in the
166
+ * SDK; Ollama apps that need audio look at server-side
167
+ * transcription models.
168
+ */
169
+ export interface AudioBlock {
170
+ type: 'audio'
171
+ source:
172
+ | { type: 'base64'; mediaType: string; data: string }
173
+ | { type: 'url'; url: string }
174
+ }
175
+
176
+ /**
177
+ * Server-side compaction block. Anthropic's `compact-2026-01-12`
178
+ * beta returns a `compaction` block when an auto-compaction trigger
179
+ * fires during a request. The framework surfaces it on
180
+ * `result.content` and Thread persists it on the assistant turn so
181
+ * subsequent requests echo it back verbatim — the model only sees
182
+ * the summary + opaque blob from then on, and the older raw turns
183
+ * stay out of context.
184
+ *
185
+ * V1 produces these on Anthropic only. Other providers ignore the
186
+ * `compact` option silently, and never emit a `CompactionBlock`.
187
+ *
188
+ * Round-trip invariant: pass the block back unchanged. The
189
+ * `encryptedContent` blob is opaque metadata the server uses to
190
+ * stitch the compaction history together; the framework never
191
+ * mutates it.
192
+ *
193
+ * `content === null` means a compaction attempt failed (e.g.,
194
+ * malformed model output). The server treats these as no-ops on
195
+ * the next request, so apps don't need to special-case them.
196
+ */
197
+ export interface CompactionBlock {
198
+ type: 'compaction'
199
+ /** Summary of compacted content. Null when compaction failed. */
200
+ content: string | null
201
+ /** Opaque metadata round-tripped verbatim on subsequent requests. */
202
+ encryptedContent: string | null
203
+ }
204
+
205
+ export type ContentBlock =
206
+ | TextBlock
207
+ | ImageBlock
208
+ | DocumentBlock
209
+ | AudioBlock
210
+ | ToolUseBlock
211
+ | ToolResultBlock
212
+ | MCPToolUseBlock
213
+ | MCPToolResultBlock
214
+ | CompactionBlock
75
215
 
76
216
  /** A single conversation turn. `content` can be a bare string or a typed block list. */
77
217
  export interface Message {
@@ -95,6 +235,85 @@ export type SystemPrompt =
95
235
  * escape hatch in `ChatResult` is what they reach for when they need
96
236
  * provider-specific fields.
97
237
  */
238
+ /**
239
+ * Server-side tool — work the provider's backend runs on behalf
240
+ * of the model. Unlike framework-local tools (`Tool` /
241
+ * `defineTool`), the model's call doesn't round-trip through
242
+ * the app's process; the provider executes the tool and inlines
243
+ * the result in the response.
244
+ *
245
+ * V1 coverage:
246
+ * - **Anthropic**: `web_search`, `code_execution`, `web_fetch`.
247
+ * - **Gemini**: `web_search` (Google Search), `code_execution`,
248
+ * `url_context`.
249
+ * - **OpenAI / DeepSeek / Ollama**: throw — OpenAI's server tools
250
+ * live on the Responses API (separate slice); the compat
251
+ * providers don't expose them.
252
+ *
253
+ * Cross-provider portability:
254
+ * - `web_search` + `code_execution` work on both Anthropic and
255
+ * Gemini.
256
+ * - `web_fetch` is Anthropic-only.
257
+ * - `url_context` is Gemini-only.
258
+ *
259
+ * Server tools combine freely with framework-local `Tool[]` and
260
+ * MCP servers — the model sees all three sets in one tool list.
261
+ */
262
+ export type ServerTool =
263
+ | {
264
+ type: 'web_search'
265
+ /** Max times the model can call this tool per turn (Anthropic; Gemini ignores). */
266
+ maxUses?: number
267
+ /** Domain allowlist (Anthropic; Gemini ignores). Mutually exclusive with `blockedDomains`. */
268
+ allowedDomains?: readonly string[]
269
+ /** Domain blocklist (Anthropic; Gemini ignores). */
270
+ blockedDomains?: readonly string[]
271
+ }
272
+ | { type: 'code_execution' }
273
+ | {
274
+ type: 'web_fetch'
275
+ /** Max URL fetches per turn (Anthropic). */
276
+ maxUses?: number
277
+ /** Domain allowlist. */
278
+ allowedDomains?: readonly string[]
279
+ /** Domain blocklist. */
280
+ blockedDomains?: readonly string[]
281
+ }
282
+ | {
283
+ type: 'url_context'
284
+ /** Gemini fetches the URL and surfaces grounded answers from it. */
285
+ }
286
+
287
+ /**
288
+ * Per-call compaction configuration. Maps to Anthropic's
289
+ * `compact-2026-01-12` beta `edits[]` entry. All fields optional —
290
+ * omitting one falls back to the server's default (trigger:
291
+ * 150,000 input tokens; no extra instructions; no pause).
292
+ */
293
+ export interface CompactConfig {
294
+ /**
295
+ * Trigger threshold in input tokens. Compaction fires once the
296
+ * conversation crosses this token count. Default 150,000 — same
297
+ * as the server-side default.
298
+ */
299
+ trigger?: number
300
+ /**
301
+ * Extra hint to the summarization model. Useful for biasing the
302
+ * compaction toward what your app actually cares to preserve
303
+ * ("keep all customer ids referenced", "preserve every diff
304
+ * hunk", ...).
305
+ */
306
+ instructions?: string
307
+ /**
308
+ * When `true`, the server returns the compaction block in-line
309
+ * but does NOT continue generation — the next assistant turn
310
+ * waits for an explicit re-prompt. Apps that want to inspect or
311
+ * gate compaction set this; default `false` (compaction is
312
+ * transparent).
313
+ */
314
+ pauseAfterCompaction?: boolean
315
+ }
316
+
98
317
  export interface ChatOptions {
99
318
  /** Override the configured default model. Wins over `tier`. */
100
319
  model?: string
@@ -129,6 +348,56 @@ export interface ChatOptions {
129
348
  * provider by config; this is the override for that.
130
349
  */
131
350
  provider?: string
351
+ /**
352
+ * Cancel the in-flight operation. Aborting between iterations of
353
+ * a tool loop bails before the next model call; aborting mid-call
354
+ * propagates the SDK's native abort error (typically a `DOMException`
355
+ * with `name: 'AbortError'`). Streaming iterators reject on the
356
+ * next `for await` step.
357
+ */
358
+ signal?: AbortSignal
359
+ /**
360
+ * Server-side tools — work the provider's backend runs (web
361
+ * search, code execution, URL fetching). The model's calls
362
+ * don't round-trip through the framework's tool loop; results
363
+ * land inline in the response. Combines freely with
364
+ * framework-local `Tool[]` and MCP servers.
365
+ *
366
+ * V1 supports Anthropic + Gemini; OpenAI / DeepSeek / Ollama
367
+ * throw `BrainError` (use the Responses API for OpenAI, or
368
+ * route to Anthropic / Gemini).
369
+ */
370
+ serverTools?: readonly ServerTool[]
371
+ /**
372
+ * Server-side conversation compaction. When set, the provider
373
+ * auto-summarizes the older part of the message history once the
374
+ * `trigger` token threshold is reached; the summary lives on the
375
+ * response as a `CompactionBlock` that apps round-trip on
376
+ * subsequent requests (Thread does this automatically). Saves
377
+ * tokens on long threads without lossy client-side pruning.
378
+ *
379
+ * Only honored by `AnthropicBrainDriver` (driver `'anthropic'`),
380
+ * via the `compact-2026-01-12` beta. Silently ignored by every
381
+ * other provider so apps targeting multiple providers with the
382
+ * same options object don't have to special-case.
383
+ */
384
+ compact?: CompactConfig
385
+ /**
386
+ * Stateful conversation pointer — OpenAI Responses API. When set,
387
+ * the provider sends only the new turn(s); the server picks up
388
+ * from the prior `Response` identified by this id and replays
389
+ * the conversation server-side. Saves tokens on long threads.
390
+ *
391
+ * Only honored by `OpenAIResponsesBrainDriver` (driver
392
+ * `'openai-responses'`); silently ignored by every other provider
393
+ * — apps that target multiple providers with the same options
394
+ * object don't have to special-case.
395
+ *
396
+ * Pair with `ChatResult.responseId` (returned by every call) to
397
+ * thread the conversation forward. `Thread` does this
398
+ * automatically when its underlying provider supports it.
399
+ */
400
+ previousResponseId?: string
132
401
  }
133
402
 
134
403
  /** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
@@ -151,6 +420,24 @@ export interface ChatResult<Raw = unknown> {
151
420
  stopReason: string | null
152
421
  usage: ChatUsage
153
422
  raw: Raw
423
+ /**
424
+ * Structured assistant content blocks — populated when the model
425
+ * emitted more than plain text on this turn (compaction blocks
426
+ * today; reasoning blocks once those surface). Apps that
427
+ * persist the conversation (`Thread`, custom stores) push this
428
+ * onto the message history when present so round-trippable
429
+ * blocks survive subsequent requests. Undefined when the turn
430
+ * was plain text only.
431
+ */
432
+ content?: ContentBlock[]
433
+ /**
434
+ * Provider response id when the provider exposes stateful
435
+ * conversations (currently OpenAI Responses API). Apps thread
436
+ * this forward via `ChatOptions.previousResponseId` so the
437
+ * server replays prior turns without re-sending them.
438
+ * Undefined for providers that don't support the pattern.
439
+ */
440
+ responseId?: string
154
441
  }
155
442
 
156
443
  /**
@@ -161,3 +448,113 @@ export interface ChatResult<Raw = unknown> {
161
448
  export type StreamEvent =
162
449
  | { type: 'text'; delta: string }
163
450
  | { type: 'stop'; stopReason: string | null; usage: ChatUsage }
451
+
452
+ /**
453
+ * Per-call options for `brain.embed(...)`. Only the embed-relevant
454
+ * subset of `ChatOptions` — chat-specific knobs (system prompt,
455
+ * thinking, cache, tools) don't apply.
456
+ */
457
+ export interface EmbedOptions {
458
+ /** Override the configured default embedding model. */
459
+ model?: string
460
+ /**
461
+ * Override the default provider. Must name a provider that
462
+ * implements `embed` (V1: OpenAI, Gemini, Ollama; Anthropic +
463
+ * DeepSeek throw with a clear "route to a different provider"
464
+ * message).
465
+ */
466
+ provider?: string
467
+ /**
468
+ * Optional dimensionality hint. OpenAI passes through as
469
+ * `dimensions`; Gemini as `outputDimensionality`. Providers
470
+ * that ignore it silently drop the field.
471
+ */
472
+ dimensions?: number
473
+ /** Cancellation signal — same shape as `ChatOptions.signal`. */
474
+ signal?: AbortSignal
475
+ }
476
+
477
+ /**
478
+ * Per-call options for `brain.transcribe(...)`.
479
+ */
480
+ export interface TranscribeOptions {
481
+ /** Override the configured default transcription model. */
482
+ model?: string
483
+ /**
484
+ * Override the default provider. Must name a provider that
485
+ * implements `transcribe` (V1: OpenAI / Gemini / Ollama;
486
+ * Anthropic + DeepSeek throw).
487
+ */
488
+ provider?: string
489
+ /**
490
+ * Optional BCP-47 language hint (`en`, `fr`, `ja`). Improves
491
+ * accuracy when known; models without hint support ignore.
492
+ */
493
+ language?: string
494
+ /**
495
+ * Optional bias prompt to steer vocabulary / style / formatting.
496
+ * OpenAI calls this `prompt`; Gemini-via-chat threads it into
497
+ * the system message; others ignore.
498
+ */
499
+ prompt?: string
500
+ /** Cancellation signal — same shape as `ChatOptions.signal`. */
501
+ signal?: AbortSignal
502
+ }
503
+
504
+ /**
505
+ * Audio source — same discriminated union as
506
+ * `AudioBlock.source`, named separately for `transcribe(...)`
507
+ * which takes it directly (no wrapping `AudioBlock` shell).
508
+ */
509
+ export type AudioSource =
510
+ | { type: 'base64'; mediaType: string; data: string }
511
+ | { type: 'url'; url: string }
512
+
513
+ /**
514
+ * Result of one `transcribe` call. `text` is the transcribed
515
+ * audio; `language` / `duration` are surfaced when the provider
516
+ * returns them (OpenAI does on the `verbose_json` response
517
+ * format; Gemini's chat-wrap path doesn't). `raw` is the
518
+ * provider's full native response for fields the framework
519
+ * doesn't surface.
520
+ */
521
+ export interface TranscribeResult<Raw = unknown> {
522
+ text: string
523
+ model: string
524
+ /** BCP-47 detected (or echoed) language. Optional. */
525
+ language?: string
526
+ /** Audio duration in seconds. Optional. */
527
+ duration?: number
528
+ raw: Raw
529
+ }
530
+
531
+ /**
532
+ * Result of one `embed` call. `embeddings[i]` is the vector for
533
+ * the i-th input text. `model` is the model the provider used
534
+ * (echoed back for logging). `usage.inputTokens` is the total
535
+ * tokens consumed across all inputs.
536
+ */
537
+ export interface EmbedResult<Raw = unknown> {
538
+ embeddings: number[][]
539
+ model: string
540
+ usage: { inputTokens: number }
541
+ /** Provider's full native response — escape hatch for fields the framework doesn't surface. */
542
+ raw: Raw
543
+ }
544
+
545
+ /**
546
+ * Result of a structured-output call. `value` is the parsed JSON
547
+ * shaped to the `OutputSchema<T>` passed in. `text` is the raw JSON
548
+ * string the model produced (useful for logging / debugging when
549
+ * `parse` rejects). `raw` is the provider's full native response.
550
+ */
551
+ export interface GenerateResult<T = unknown, Raw = unknown> {
552
+ value: T
553
+ text: string
554
+ model: string
555
+ stopReason: string | null
556
+ usage: ChatUsage
557
+ raw: Raw
558
+ /** See `ChatResult.responseId`. */
559
+ responseId?: string
560
+ }