@strav/brain 0.4.30 → 1.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/package.json +17 -20
  2. package/src/agent.ts +50 -75
  3. package/src/agent_result.ts +32 -0
  4. package/src/agent_runner.ts +63 -0
  5. package/src/brain_config.ts +80 -0
  6. package/src/brain_error.ts +29 -0
  7. package/src/brain_manager.ts +186 -123
  8. package/src/brain_provider.ts +91 -6
  9. package/src/define_tool.ts +42 -0
  10. package/src/index.ts +43 -42
  11. package/src/mcp_server.ts +47 -0
  12. package/src/provider.ts +83 -0
  13. package/src/providers/anthropic_provider.ts +435 -232
  14. package/src/thread.ts +99 -0
  15. package/src/tool.ts +28 -44
  16. package/src/tool_execution_error.ts +26 -0
  17. package/src/types.ts +164 -237
  18. package/CHANGELOG.md +0 -44
  19. package/README.md +0 -121
  20. package/src/helpers.ts +0 -1082
  21. package/src/mcp_toolbox.ts +0 -62
  22. package/src/memory/context_budget.ts +0 -120
  23. package/src/memory/index.ts +0 -17
  24. package/src/memory/memory_manager.ts +0 -168
  25. package/src/memory/semantic_memory.ts +0 -89
  26. package/src/memory/strategies/sliding_window.ts +0 -20
  27. package/src/memory/strategies/summarize.ts +0 -157
  28. package/src/memory/thread_store.ts +0 -56
  29. package/src/memory/token_counter.ts +0 -101
  30. package/src/memory/types.ts +0 -68
  31. package/src/providers/google_provider.ts +0 -496
  32. package/src/providers/openai_provider.ts +0 -569
  33. package/src/providers/openai_responses_provider.ts +0 -321
  34. package/src/utils/error_scrub.ts +0 -5
  35. package/src/utils/prompt.ts +0 -65
  36. package/src/utils/retry.ts +0 -104
  37. package/src/utils/schema.ts +0 -27
  38. package/src/utils/sse_parser.ts +0 -62
  39. package/src/workflow.ts +0 -199
  40. package/tsconfig.json +0 -5
package/src/thread.ts ADDED
@@ -0,0 +1,99 @@
1
+ /**
2
+ * `Thread` — multi-turn conversation that retains its message history
3
+ * across calls. Built on top of `BrainManager.chat` (no provider
4
+ * coupling); apps that want a stateless one-shot use
5
+ * `BrainManager.chat` directly.
6
+ *
7
+ * State model: the thread owns an append-only `messages` array. Each
8
+ * `send(text)` appends a user turn, calls `brain.chat`, appends the
9
+ * assistant reply, and returns the assistant's text. The full message
10
+ * history is serializable via `toJSON()` so apps can persist a thread
11
+ * across requests (e.g. one row per conversation in Postgres).
12
+ *
13
+ * What's NOT here in V1:
14
+ * - Auto-compaction. Long threads accumulate without bound; apps
15
+ * that need bounded context handle this themselves (prune
16
+ * `thread.messages` in place, or use the underlying provider's
17
+ * server-side compaction feature once that ships in V2).
18
+ * - Streaming `send`. The thread's `send()` is awaited-fully; for
19
+ * token-by-token streaming in a conversation, call
20
+ * `brain.stream(thread.messages.concat(newUser))` directly.
21
+ */
22
+
23
+ import type { BrainManager } from './brain_manager.ts'
24
+ import type { ChatOptions, Message, SystemPrompt } from './types.ts'
25
+
26
+ export interface ThreadOptions {
27
+ /** System prompt — applied to every `send()` call. Supports cache flags. */
28
+ system?: SystemPrompt
29
+ /** Per-thread `ChatOptions` defaults — merged with per-call overrides on `send()`. */
30
+ options?: ChatOptions
31
+ }
32
+
33
+ /** Serializable snapshot. What `toJSON()` produces / `fromJSON()` accepts. */
34
+ export interface ThreadState {
35
+ messages: Message[]
36
+ system?: SystemPrompt
37
+ options?: ChatOptions
38
+ }
39
+
40
+ export class Thread {
41
+ /** Append-only conversation history. Read-only — mutate via `send()` (or pass through `toJSON`). */
42
+ readonly messages: Message[] = []
43
+ readonly system?: SystemPrompt
44
+ readonly options?: ChatOptions
45
+ private readonly brain: BrainManager
46
+
47
+ constructor(brain: BrainManager, opts: ThreadOptions = {}) {
48
+ this.brain = brain
49
+ if (opts.system !== undefined) this.system = opts.system
50
+ if (opts.options !== undefined) this.options = opts.options
51
+ }
52
+
53
+ /**
54
+ * Append a user turn, call the model, append the assistant reply,
55
+ * and return the reply text. Per-call options override the
56
+ * thread's defaults; `system` always comes from the thread.
57
+ */
58
+ async send(text: string, options: ChatOptions = {}): Promise<string> {
59
+ this.messages.push({ role: 'user', content: text })
60
+ const merged: ChatOptions = {
61
+ ...(this.options ?? {}),
62
+ ...options,
63
+ // System is owned by the thread; per-call `system` is ignored
64
+ // intentionally so a caller can't drift the conversation
65
+ // mid-thread by changing the system prompt every turn.
66
+ ...(this.system !== undefined ? { system: this.system } : {}),
67
+ }
68
+ const result = await this.brain.chat(this.messages, merged)
69
+ this.messages.push({ role: 'assistant', content: result.text })
70
+ return result.text
71
+ }
72
+
73
+ /** Number of turns. Each `send()` adds 2 (user + assistant). */
74
+ get length(): number {
75
+ return this.messages.length
76
+ }
77
+
78
+ /** Serialize to a plain object — pass to `Thread.fromJSON` to restore. */
79
+ toJSON(): ThreadState {
80
+ const state: ThreadState = { messages: [...this.messages] }
81
+ if (this.system !== undefined) state.system = this.system
82
+ if (this.options !== undefined) state.options = this.options
83
+ return state
84
+ }
85
+
86
+ /**
87
+ * Restore a thread from a serialized snapshot. The `BrainManager`
88
+ * is passed in fresh — only the conversation state lives on disk;
89
+ * the manager is rebuilt at app boot.
90
+ */
91
+ static fromJSON(brain: BrainManager, state: ThreadState): Thread {
92
+ const options: ThreadOptions = {}
93
+ if (state.system !== undefined) options.system = state.system
94
+ if (state.options !== undefined) options.options = state.options
95
+ const thread = new Thread(brain, options)
96
+ for (const m of state.messages) thread.messages.push(m)
97
+ return thread
98
+ }
99
+ }
package/src/tool.ts CHANGED
@@ -1,51 +1,35 @@
1
- import { zodToJsonSchema } from './utils/schema.ts'
2
- import type { ToolDefinition, JsonSchema } from './types.ts'
3
-
4
1
  /**
5
- * Define a tool that an agent can invoke.
2
+ * `Tool` the framework-native shape every tool implementation
3
+ * conforms to. Providers translate the `name`, `description`, and
4
+ * `inputSchema` into their vendor's tool-definition wire format;
5
+ * `execute` runs in-process on the framework side when the model
6
+ * calls the tool.
6
7
  *
7
- * Accepts either a Zod schema or a raw JSON Schema object
8
- * for `parameters`. Zod schemas are automatically converted.
8
+ * `inputSchema` is plain JSON Schema (draft 2020-12 compatible).
9
+ * Apps that prefer Zod use the SDK's helpers and feed the resulting
10
+ * JSON Schema into `defineTool`; the framework deliberately doesn't
11
+ * couple to Zod so apps stay free to bring whatever schema library
12
+ * they want.
9
13
  *
10
- * @example
11
- * const searchTool = defineTool({
12
- * name: 'search',
13
- * description: 'Search the database',
14
- * parameters: z.object({ query: z.string() }),
15
- * execute: async ({ query }, context) => {
16
- * const userId = context?.userId
17
- * return await db.search(query, { userId })
18
- * },
19
- * })
14
+ * Generics: `TInput` is what `execute` receives (after the model's
15
+ * raw input has been narrowed by validation at the call site, when
16
+ * apps choose to validate). `TOutput` is what the agentic loop
17
+ * appends as the `tool_result.content`. Both default to `unknown`
18
+ * for apps that don't want the cognitive overhead of typing tools.
20
19
  */
21
- export function defineTool<TArgs = any, TContext = Record<string, unknown>>(config: {
22
- name: string
23
- description: string
24
- parameters: any
25
- execute: (args: TArgs, context?: TContext) => unknown | Promise<unknown>
26
- }): ToolDefinition {
27
- return {
28
- name: config.name,
29
- description: config.description,
30
- parameters: zodToJsonSchema(config.parameters) as JsonSchema,
31
- execute: config.execute as (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>,
32
- }
20
+
21
+ export interface ToolContext {
22
+ /** Provider-assigned call id — matches `ToolUseBlock.id`. */
23
+ readonly callId: string
24
+ /** Per-run free-form context bag passed by the caller. Optional. */
25
+ readonly context: Readonly<Record<string, unknown>>
33
26
  }
34
27
 
35
- /**
36
- * Group related tools into a named collection.
37
- *
38
- * A toolbox is simply a labeled array useful for organizing
39
- * tools by domain (e.g., database tools, API tools) and
40
- * spreading them into an agent's `tools` array.
41
- *
42
- * @example
43
- * const dbTools = defineToolbox('database', [searchTool, insertTool])
44
- *
45
- * class MyAgent extends Agent {
46
- * tools = [...dbTools, weatherTool]
47
- * }
48
- */
49
- export function defineToolbox(_name: string, tools: ToolDefinition[]): ToolDefinition[] {
50
- return tools
28
+ export interface Tool<TInput = unknown, TOutput = unknown> {
29
+ name: string
30
+ description: string
31
+ /** JSON Schema for the tool's input. Providers translate this into their wire format. */
32
+ inputSchema: Record<string, unknown>
33
+ /** In-process executor. Throws propagate as `ToolExecutionError` through the runner. */
34
+ execute(input: TInput, ctx: ToolContext): Promise<TOutput>
51
35
  }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * `ToolExecutionError` — wrapper thrown by the agentic loop when a
3
+ * tool's `execute` function throws. Carries the tool name + the
4
+ * provider's call id on `context` so apps building error reporters /
5
+ * traces can correlate failures with model output without parsing
6
+ * stack frames.
7
+ *
8
+ * V1 propagates these out of `runWithTools` — the loop aborts on the
9
+ * first tool failure. A later slice may add a graceful path
10
+ * (`{ type: 'tool_result', isError: true }` is appended and the
11
+ * loop continues) but apps that need that today can catch the
12
+ * error, append the result themselves, and re-call the runner.
13
+ */
14
+
15
+ import { StravError } from '@strav/kernel'
16
+
17
+ export class ToolExecutionError extends StravError {
18
+ constructor(toolName: string, callId: string, cause: unknown) {
19
+ const message = cause instanceof Error ? cause.message : String(cause)
20
+ super(
21
+ `Tool "${toolName}" execution failed: ${message}`,
22
+ { code: 'brain.tool-execution-failed', status: 500 },
23
+ { context: { tool: toolName, callId }, cause },
24
+ )
25
+ }
26
+ }
package/src/types.ts CHANGED
@@ -1,275 +1,202 @@
1
- // ── JSON Schema ──────────────────────────────────────────────────────────────
2
-
3
- /** Minimal recursive JSON Schema type. */
4
- export type JsonSchema = Record<string, unknown>
5
-
6
- // ── SSE ──────────────────────────────────────────────────────────────────────
7
-
8
- export interface SSEEvent {
9
- event?: string
10
- data: string
11
- }
1
+ /**
2
+ * Public types for the brain runtime.
3
+ *
4
+ * Apps work with three high-level shapes:
5
+ *
6
+ * - `Message` — a user/assistant turn in a conversation. `content` is
7
+ * either a plain string or a list of `ContentBlock`s for richer
8
+ * payloads (cached blocks, images in a later slice).
9
+ *
10
+ * - `ChatOptions` — per-call knobs: model selection (explicit `model`
11
+ * or `tier` sugar), `system` prompt with optional cache flag,
12
+ * `maxTokens`, `thinking`, `effort`, etc.
13
+ *
14
+ * - `ChatResult` — what comes back from `chat()`: assistant `text`,
15
+ * `usage` (including cache hit/miss counters), `stopReason`, the
16
+ * `model` that actually answered, and a `raw` escape hatch with the
17
+ * provider's native response.
18
+ *
19
+ * The streaming side adds `StreamEvent` — a discriminated union of the
20
+ * events a provider emits while a response is being generated. V1
21
+ * covers text deltas, the final-message event, and `stopReason`;
22
+ * thinking blocks / tool-use streams land when those features ship.
23
+ */
12
24
 
13
- // ── Usage ────────────────────────────────────────────────────────────────────
25
+ /** Coarse-grained model tier. Sugar for "fast / balanced / powerful" without naming an SDK. */
26
+ export type ModelTier = 'fast' | 'balanced' | 'powerful'
14
27
 
15
- export interface Usage {
16
- inputTokens: number
17
- outputTokens: number
18
- totalTokens: number
28
+ /**
29
+ * A text content block. The `cache` flag lets apps mark long, stable
30
+ * prefixes for prompt caching; providers translate this to whatever
31
+ * cache mechanism their SDK exposes (Anthropic: `cache_control:
32
+ * {type: 'ephemeral'}`).
33
+ */
34
+ export interface TextBlock {
35
+ type: 'text'
36
+ text: string
37
+ /** Mark this block as a cache breakpoint. Default `false`. */
38
+ cache?: boolean
19
39
  }
20
40
 
21
- // ── Messages ─────────────────────────────────────────────────────────────────
22
-
23
- export interface ToolCall {
41
+ /**
42
+ * Provider-emitted tool-use block. Appears in `assistant`-role
43
+ * messages when the model decides to call a tool. `input` is the
44
+ * parsed JSON the model produced for the tool's `inputSchema`; apps
45
+ * that need to validate it (Zod, ajv, etc.) do so at the call site.
46
+ *
47
+ * The agentic loop creates a matching `ToolResultBlock` and appends
48
+ * it to the next `user`-role message before re-asking the model.
49
+ */
50
+ export interface ToolUseBlock {
51
+ type: 'tool_use'
52
+ /** Provider-assigned call id. The matching tool_result references this verbatim. */
24
53
  id: string
54
+ /** Tool name — matches a registered `Tool.name`. */
25
55
  name: string
26
- arguments: Record<string, unknown>
27
- }
28
-
29
- export interface ContentBlock {
30
- type: 'text' | 'tool_use' | 'tool_result'
31
- text?: string
32
- id?: string
33
- name?: string
34
- input?: Record<string, unknown>
35
- toolUseId?: string
36
- content?: string
56
+ /** Parsed input the model produced. Apps validate against the tool's schema. */
57
+ input: unknown
37
58
  }
38
59
 
39
- export interface Message {
40
- role: 'user' | 'assistant' | 'tool'
41
- content: string | ContentBlock[]
42
- toolCalls?: ToolCall[]
43
- toolCallId?: string
44
- }
45
-
46
- // ── Tool Definition ──────────────────────────────────────────────────────────
47
-
48
- export interface ToolDefinition {
49
- name: string
50
- description: string
51
- parameters: JsonSchema
52
- execute: (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>
53
- }
54
-
55
- // ── Completion Request / Response ────────────────────────────────────────────
56
-
57
- export interface CompletionRequest {
58
- model: string
59
- messages: Message[]
60
- system?: string
61
- tools?: ToolDefinition[]
62
- toolChoice?: 'auto' | 'required' | { name: string }
63
- maxTokens?: number
64
- temperature?: number
65
- schema?: JsonSchema
66
- stopSequences?: string[]
67
- }
68
-
69
- export interface CompletionResponse {
70
- id: string
71
- content: string
72
- toolCalls: ToolCall[]
73
- stopReason: 'end' | 'tool_use' | 'max_tokens' | 'stop_sequence'
74
- usage: Usage
75
- raw: unknown
76
- }
77
-
78
- // ── Streaming ────────────────────────────────────────────────────────────────
79
-
80
- export interface StreamChunk {
81
- type: 'text' | 'tool_start' | 'tool_delta' | 'tool_end' | 'usage' | 'done'
82
- text?: string
83
- toolCall?: Partial<ToolCall>
84
- toolIndex?: number
85
- usage?: Usage
86
- }
87
-
88
- // ── Output Schema ────────────────────────────────────────────────────────────
89
-
90
- /** A schema that optionally validates data via `.parse()` (e.g., Zod schema). */
91
- export interface OutputSchema {
92
- parse?: (data: unknown) => unknown
93
- [key: string]: unknown
94
- }
95
-
96
- // ── Agent ────────────────────────────────────────────────────────────────────
97
-
98
- export interface ToolCallRecord {
99
- name: string
100
- arguments: Record<string, unknown>
101
- result: unknown
102
- duration: number
103
- }
104
-
105
- export interface AgentResult<T = any> {
106
- data: T
107
- text: string
108
- toolCalls: ToolCallRecord[]
109
- messages: Message[]
110
- usage: Usage
111
- iterations: number
112
- }
113
-
114
- export interface AgentEvent {
115
- type: 'text' | 'tool_start' | 'tool_result' | 'iteration' | 'done' | 'suspended'
116
- text?: string
117
- toolCall?: ToolCallRecord
118
- iteration?: number
119
- result?: AgentResult
120
- suspended?: SuspendedRun
121
- }
122
-
123
- // ── Suspend / Resume ─────────────────────────────────────────────────────────
124
-
125
60
  /**
126
- * A JSON-serializable snapshot of an agent loop at the moment it suspended.
127
- *
128
- * All fields are plain data no functions, class instances, or cycles — so
129
- * the snapshot can be stringified, stored across a process boundary, and
130
- * later passed to `AgentRunner.resume()` to continue the run.
61
+ * Result of executing a tool. Appended to a `user`-role message and
62
+ * fed back to the model. `content` is either a plain string (the
63
+ * common case) or a list of text blocks for richer payloads. Mark
64
+ * `isError: true` so the model knows the tool call failed and can
65
+ * adjust its approach.
131
66
  */
132
- export interface SerializedAgentState {
133
- messages: Message[]
134
- allToolCalls: ToolCallRecord[]
135
- totalUsage: Usage
136
- iterations: number
67
+ export interface ToolResultBlock {
68
+ type: 'tool_result'
69
+ toolUseId: string
70
+ content: string | TextBlock[]
71
+ isError?: boolean
137
72
  }
138
73
 
139
74
  /**
140
- * Result of an agent run that was suspended before executing one or more
141
- * tool calls. The integrator is expected to obtain tool results out-of-band
142
- * (human approval, external system, queued job, etc.) and call
143
- * `AgentRunner.resume(state, toolResults)` to continue.
75
+ * Provider-emitted MCP tool-use block. Read-only apps don't construct
76
+ * these; they appear in `assistant`-role messages when the model calls
77
+ * a tool exposed by a configured MCP server. Anthropic's backend
78
+ * invokes the MCP server itself and inlines the result as an
79
+ * `MCPToolResultBlock` in the same response, so the framework's
80
+ * agentic loop doesn't need to handle the call.
144
81
  *
145
- * `pendingToolCalls` contains the pending call that triggered suspension
146
- * plus any subsequent tool calls from the same batch that have not been
147
- * executed. Results must be supplied for each of them on resume so the
148
- * conversation remains well-formed for the provider.
82
+ * Apps render these for observability (showing users that the model
83
+ * consulted Linear / Notion / GitHub via MCP) and for audit trails.
149
84
  */
150
- export interface SuspendedRun {
151
- status: 'suspended'
152
- pendingToolCalls: ToolCall[]
153
- state: SerializedAgentState
154
- }
155
-
156
- /** Result of a pending tool call, supplied to `AgentRunner.resume()`. */
157
- export interface ToolCallResult {
158
- toolCallId: string
159
- result: unknown
85
+ export interface MCPToolUseBlock {
86
+ type: 'mcp_tool_use'
87
+ id: string
88
+ /** MCP server identifier — matches `MCPServer.name`. */
89
+ serverName: string
90
+ /** Tool name as exposed by the MCP server. */
91
+ name: string
92
+ /** Parsed input the model passed to the MCP tool. */
93
+ input: unknown
160
94
  }
161
95
 
162
- // ── Workflow ──────────────────────────────────────────────────────────────────
163
-
164
- export interface WorkflowResult {
165
- results: Record<string, AgentResult>
166
- usage: Usage
167
- duration: number
96
+ /**
97
+ * Provider-emitted MCP tool result. Pairs with `MCPToolUseBlock` by
98
+ * `toolUseId`. `content` is either a string or text blocks; `isError`
99
+ * is `true` when the MCP server returned an error.
100
+ */
101
+ export interface MCPToolResultBlock {
102
+ type: 'mcp_tool_result'
103
+ toolUseId: string
104
+ content: string | TextBlock[]
105
+ isError?: boolean
168
106
  }
169
107
 
170
- // ── Embedding ────────────────────────────────────────────────────────────────
108
+ export type ContentBlock =
109
+ | TextBlock
110
+ | ToolUseBlock
111
+ | ToolResultBlock
112
+ | MCPToolUseBlock
113
+ | MCPToolResultBlock
171
114
 
172
- export interface EmbeddingResponse {
173
- embeddings: number[][]
174
- model: string
175
- usage: { totalTokens: number }
115
+ /** A single conversation turn. `content` can be a bare string or a typed block list. */
116
+ export interface Message {
117
+ role: 'user' | 'assistant'
118
+ content: string | ContentBlock[]
176
119
  }
177
120
 
178
- // ── Transcription (Speech-to-Text) ───────────────────────────────────────────
121
+ /**
122
+ * The `system` prompt. Either a plain string (no cache) or a structured
123
+ * form that lets apps mark the prompt as cached. Apps that want
124
+ * fine-grained control over multi-block system prompts pass an array.
125
+ */
126
+ export type SystemPrompt =
127
+ | string
128
+ | { text: string; cache?: boolean }
129
+ | Array<{ text: string; cache?: boolean }>
179
130
 
180
- export interface TranscribeRequest {
181
- /** Audio bytes. Most STT endpoints cap at ~25MB; chunk longer recordings. */
182
- audio: Uint8Array | Blob
183
- /**
184
- * MIME type of the audio. Required for providers that infer format from
185
- * the multipart filename or rely on it for inline base64 (Gemini).
186
- * Examples: 'audio/m4a', 'audio/mpeg', 'audio/wav', 'audio/ogg',
187
- * 'audio/webm', 'audio/flac'.
188
- */
189
- contentType?: string
190
- /** Override the provider's default STT model. */
131
+ /**
132
+ * Per-call options. Generics are deliberately conservative apps
133
+ * don't usually need to type-narrow the provider response; the `raw`
134
+ * escape hatch in `ChatResult` is what they reach for when they need
135
+ * provider-specific fields.
136
+ */
137
+ export interface ChatOptions {
138
+ /** Override the configured default model. Wins over `tier`. */
191
139
  model?: string
140
+ /** Sugar for selecting a model by tier. Resolved against `config.brain.tiers`. */
141
+ tier?: ModelTier
142
+ /** System prompt — typed shape supports prompt caching. */
143
+ system?: SystemPrompt
144
+ /** Hard ceiling on response tokens. Default `4096`. */
145
+ maxTokens?: number
192
146
  /**
193
- * BCP-47 language hint (e.g. 'th', 'en', 'zh'). Whisper accepts ISO-639-1
194
- * ('th'); Gemini uses BCP-47. Both improve accuracy when set; omit for
195
- * auto-detection.
147
+ * Adaptive thinking control. `'adaptive'` enables it; `'disabled'`
148
+ * (or omission) turns it off. On Opus 4.7 + 4.6 / Sonnet 4.6 this
149
+ * is the only supported thinking mode — `budget_tokens` is removed
150
+ * upstream and not exposed here.
196
151
  */
197
- language?: string
152
+ thinking?: 'adaptive' | 'disabled'
153
+ /** Effort hint. `low` / `medium` / `high` / `xhigh` / `max`. Defaults to provider's pick. */
154
+ effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max'
198
155
  /**
199
- * Optional priming prompt gives the model vocabulary or context to
200
- * bias toward (proper nouns, brand names, menu items, dialect markers).
201
- * Whisper uses this directly; Gemini incorporates it into the system
202
- * instruction.
156
+ * Top-level cache_control toggle. When `true` the provider asks the
157
+ * SDK to auto-cache the last cacheable block on every request.
158
+ * Defaults to `config.brain.cache.auto ?? false`.
203
159
  */
204
- prompt?: string
160
+ cache?: boolean
205
161
  /**
206
- * Filename to send in the multipart form (Whisper). Used to derive the
207
- * audio format on the server when `contentType` is missing. Defaults to
208
- * 'audio.bin' if not provided.
162
+ * Beta features to enable on this request. Pass through to the
163
+ * provider's beta-header machinery. Provider-specific.
209
164
  */
210
- filename?: string
211
- }
212
-
213
- export interface TranscriptionResponse {
214
- /** Transcribed text. */
215
- text: string
216
- /** Detected language, when the provider reports one. */
217
- language?: string
218
- /** Audio duration in seconds, when the provider reports one. */
219
- duration?: number
220
- /** Original provider response for callers that need provider-specific fields. */
221
- raw: unknown
222
- }
223
-
224
- // ── Provider ─────────────────────────────────────────────────────────────────
225
-
226
- export interface AIProvider {
227
- readonly name: string
228
- complete(request: CompletionRequest): Promise<CompletionResponse>
229
- stream(request: CompletionRequest): AsyncIterable<StreamChunk>
230
- embed?(input: string | string[], model?: string): Promise<EmbeddingResponse>
165
+ betas?: readonly string[]
231
166
  /**
232
- * Transcribe audio to text. Implemented by providers that expose a
233
- * speech-to-text endpoint (OpenAI Whisper, Google Gemini's multimodal
234
- * generateContent). Throws or remains undefined for providers without
235
- * STT (Anthropic at time of writing).
167
+ * Provider-specific overrides. `BrainManager.chat` selects the
168
+ * provider by config; this is the override for that.
236
169
  */
237
- transcribe?(request: TranscribeRequest): Promise<TranscriptionResponse>
170
+ provider?: string
238
171
  }
239
172
 
240
- // ── Hooks ────────────────────────────────────────────────────────────────────
241
-
242
- export type BeforeHook = (request: CompletionRequest) => void | Promise<void>
243
- export type AfterHook = (
244
- request: CompletionRequest,
245
- response: CompletionResponse
246
- ) => void | Promise<void>
247
-
248
- // ── Config ───────────────────────────────────────────────────────────────────
249
-
250
- export interface ProviderConfig {
251
- driver: string
252
- apiKey: string
253
- model: string
254
- baseUrl?: string
255
- maxTokens?: number
256
- temperature?: number
257
- maxRetries?: number
258
- retryBaseDelay?: number
173
+ /** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
174
+ export interface ChatUsage {
175
+ inputTokens: number
176
+ outputTokens: number
177
+ cacheReadTokens: number
178
+ cacheCreationTokens: number
259
179
  }
260
180
 
261
- export interface BrainConfig {
262
- default: string
263
- providers: Record<string, ProviderConfig>
264
- maxTokens: number
265
- temperature: number
266
- maxIterations: number
267
- memory?: import('./memory/types.ts').MemoryConfig
181
+ /**
182
+ * The provider's reply. `text` is the concatenated assistant text;
183
+ * `raw` is the provider's full native response shape for apps that
184
+ * need anything we don't surface (e.g. citation blocks, server-tool
185
+ * results once those ship).
186
+ */
187
+ export interface ChatResult<Raw = unknown> {
188
+ text: string
189
+ model: string
190
+ stopReason: string | null
191
+ usage: ChatUsage
192
+ raw: Raw
268
193
  }
269
194
 
270
- // ── Serialized Thread ────────────────────────────────────────────────────────
271
-
272
- export interface SerializedThread {
273
- messages: Message[]
274
- system?: string
275
- }
195
+ /**
196
+ * Streaming event union. V1 covers the text-delta + completion path
197
+ * apps want for chat-style UIs; thinking blocks and tool-use streams
198
+ * are reserved for later slices.
199
+ */
200
+ export type StreamEvent =
201
+ | { type: 'text'; delta: string }
202
+ | { type: 'stop'; stopReason: string | null; usage: ChatUsage }