@strav/brain 0.4.31 → 1.0.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/thread.ts ADDED
@@ -0,0 +1,99 @@
1
+ /**
2
+ * `Thread` — multi-turn conversation that retains its message history
3
+ * across calls. Built on top of `BrainManager.chat` (no provider
4
+ * coupling); apps that want a stateless one-shot use
5
+ * `BrainManager.chat` directly.
6
+ *
7
+ * State model: the thread owns an append-only `messages` array. Each
8
+ * `send(text)` appends a user turn, calls `brain.chat`, appends the
9
+ * assistant reply, and returns the assistant's text. The full message
10
+ * history is serializable via `toJSON()` so apps can persist a thread
11
+ * across requests (e.g. one row per conversation in Postgres).
12
+ *
13
+ * What's NOT here in V1:
14
+ * - Auto-compaction. Long threads accumulate without bound; apps
15
+ * that need bounded context handle this themselves (prune
16
+ * `thread.messages` in place, or use the underlying provider's
17
+ * server-side compaction feature once that ships in V2).
18
+ * - Streaming `send`. The thread's `send()` is awaited-fully; for
19
+ * token-by-token streaming in a conversation, call
20
+ * `brain.stream(thread.messages.concat(newUser))` directly.
21
+ */
22
+
23
+ import type { BrainManager } from './brain_manager.ts'
24
+ import type { ChatOptions, Message, SystemPrompt } from './types.ts'
25
+
26
+ export interface ThreadOptions {
27
+ /** System prompt — applied to every `send()` call. Supports cache flags. */
28
+ system?: SystemPrompt
29
+ /** Per-thread `ChatOptions` defaults — merged with per-call overrides on `send()`. */
30
+ options?: ChatOptions
31
+ }
32
+
33
+ /** Serializable snapshot. What `toJSON()` produces / `fromJSON()` accepts. */
34
+ export interface ThreadState {
35
+ messages: Message[]
36
+ system?: SystemPrompt
37
+ options?: ChatOptions
38
+ }
39
+
40
+ export class Thread {
41
+ /** Append-only conversation history. Read-only — mutate via `send()` (or pass through `toJSON`). */
42
+ readonly messages: Message[] = []
43
+ readonly system?: SystemPrompt
44
+ readonly options?: ChatOptions
45
+ private readonly brain: BrainManager
46
+
47
+ constructor(brain: BrainManager, opts: ThreadOptions = {}) {
48
+ this.brain = brain
49
+ if (opts.system !== undefined) this.system = opts.system
50
+ if (opts.options !== undefined) this.options = opts.options
51
+ }
52
+
53
+ /**
54
+ * Append a user turn, call the model, append the assistant reply,
55
+ * and return the reply text. Per-call options override the
56
+ * thread's defaults; `system` always comes from the thread.
57
+ */
58
+ async send(text: string, options: ChatOptions = {}): Promise<string> {
59
+ this.messages.push({ role: 'user', content: text })
60
+ const merged: ChatOptions = {
61
+ ...(this.options ?? {}),
62
+ ...options,
63
+ // System is owned by the thread; per-call `system` is ignored
64
+ // intentionally so a caller can't drift the conversation
65
+ // mid-thread by changing the system prompt every turn.
66
+ ...(this.system !== undefined ? { system: this.system } : {}),
67
+ }
68
+ const result = await this.brain.chat(this.messages, merged)
69
+ this.messages.push({ role: 'assistant', content: result.text })
70
+ return result.text
71
+ }
72
+
73
+ /** Number of turns. Each `send()` adds 2 (user + assistant). */
74
+ get length(): number {
75
+ return this.messages.length
76
+ }
77
+
78
+ /** Serialize to a plain object — pass to `Thread.fromJSON` to restore. */
79
+ toJSON(): ThreadState {
80
+ const state: ThreadState = { messages: [...this.messages] }
81
+ if (this.system !== undefined) state.system = this.system
82
+ if (this.options !== undefined) state.options = this.options
83
+ return state
84
+ }
85
+
86
+ /**
87
+ * Restore a thread from a serialized snapshot. The `BrainManager`
88
+ * is passed in fresh — only the conversation state lives on disk;
89
+ * the manager is rebuilt at app boot.
90
+ */
91
+ static fromJSON(brain: BrainManager, state: ThreadState): Thread {
92
+ const options: ThreadOptions = {}
93
+ if (state.system !== undefined) options.system = state.system
94
+ if (state.options !== undefined) options.options = state.options
95
+ const thread = new Thread(brain, options)
96
+ for (const m of state.messages) thread.messages.push(m)
97
+ return thread
98
+ }
99
+ }
package/src/tool.ts CHANGED
@@ -1,51 +1,35 @@
1
- import { zodToJsonSchema } from './utils/schema.ts'
2
- import type { ToolDefinition, JsonSchema } from './types.ts'
3
-
4
1
  /**
5
- * Define a tool that an agent can invoke.
2
+ * `Tool` the framework-native shape every tool implementation
3
+ * conforms to. Providers translate the `name`, `description`, and
4
+ * `inputSchema` into their vendor's tool-definition wire format;
5
+ * `execute` runs in-process on the framework side when the model
6
+ * calls the tool.
6
7
  *
7
- * Accepts either a Zod schema or a raw JSON Schema object
8
- * for `parameters`. Zod schemas are automatically converted.
8
+ * `inputSchema` is plain JSON Schema (draft 2020-12 compatible).
9
+ * Apps that prefer Zod use the SDK's helpers and feed the resulting
10
+ * JSON Schema into `defineTool`; the framework deliberately doesn't
11
+ * couple to Zod so apps stay free to bring whatever schema library
12
+ * they want.
9
13
  *
10
- * @example
11
- * const searchTool = defineTool({
12
- * name: 'search',
13
- * description: 'Search the database',
14
- * parameters: z.object({ query: z.string() }),
15
- * execute: async ({ query }, context) => {
16
- * const userId = context?.userId
17
- * return await db.search(query, { userId })
18
- * },
19
- * })
14
+ * Generics: `TInput` is what `execute` receives (after the model's
15
+ * raw input has been narrowed by validation at the call site, when
16
+ * apps choose to validate). `TOutput` is what the agentic loop
17
+ * appends as the `tool_result.content`. Both default to `unknown`
18
+ * for apps that don't want the cognitive overhead of typing tools.
20
19
  */
21
- export function defineTool<TArgs = any, TContext = Record<string, unknown>>(config: {
22
- name: string
23
- description: string
24
- parameters: any
25
- execute: (args: TArgs, context?: TContext) => unknown | Promise<unknown>
26
- }): ToolDefinition {
27
- return {
28
- name: config.name,
29
- description: config.description,
30
- parameters: zodToJsonSchema(config.parameters) as JsonSchema,
31
- execute: config.execute as (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>,
32
- }
20
+
21
+ export interface ToolContext {
22
+ /** Provider-assigned call id — matches `ToolUseBlock.id`. */
23
+ readonly callId: string
24
+ /** Per-run free-form context bag passed by the caller. Optional. */
25
+ readonly context: Readonly<Record<string, unknown>>
33
26
  }
34
27
 
35
- /**
36
- * Group related tools into a named collection.
37
- *
38
- * A toolbox is simply a labeled array useful for organizing
39
- * tools by domain (e.g., database tools, API tools) and
40
- * spreading them into an agent's `tools` array.
41
- *
42
- * @example
43
- * const dbTools = defineToolbox('database', [searchTool, insertTool])
44
- *
45
- * class MyAgent extends Agent {
46
- * tools = [...dbTools, weatherTool]
47
- * }
48
- */
49
- export function defineToolbox(_name: string, tools: ToolDefinition[]): ToolDefinition[] {
50
- return tools
28
+ export interface Tool<TInput = unknown, TOutput = unknown> {
29
+ name: string
30
+ description: string
31
+ /** JSON Schema for the tool's input. Providers translate this into their wire format. */
32
+ inputSchema: Record<string, unknown>
33
+ /** In-process executor. Throws propagate as `ToolExecutionError` through the runner. */
34
+ execute(input: TInput, ctx: ToolContext): Promise<TOutput>
51
35
  }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * `ToolExecutionError` — wrapper thrown by the agentic loop when a
3
+ * tool's `execute` function throws. Carries the tool name + the
4
+ * provider's call id on `context` so apps building error reporters /
5
+ * traces can correlate failures with model output without parsing
6
+ * stack frames.
7
+ *
8
+ * V1 propagates these out of `runWithTools` — the loop aborts on the
9
+ * first tool failure. A later slice may add a graceful path
10
+ * (`{ type: 'tool_result', isError: true }` is appended and the
11
+ * loop continues) but apps that need that today can catch the
12
+ * error, append the result themselves, and re-call the runner.
13
+ */
14
+
15
+ import { StravError } from '@strav/kernel'
16
+
17
+ export class ToolExecutionError extends StravError {
18
+ constructor(toolName: string, callId: string, cause: unknown) {
19
+ const message = cause instanceof Error ? cause.message : String(cause)
20
+ super(
21
+ `Tool "${toolName}" execution failed: ${message}`,
22
+ { code: 'brain.tool-execution-failed', status: 500 },
23
+ { context: { tool: toolName, callId }, cause },
24
+ )
25
+ }
26
+ }
package/src/types.ts CHANGED
@@ -1,275 +1,163 @@
1
- // ── JSON Schema ──────────────────────────────────────────────────────────────
2
-
3
- /** Minimal recursive JSON Schema type. */
4
- export type JsonSchema = Record<string, unknown>
5
-
6
- // ── SSE ──────────────────────────────────────────────────────────────────────
7
-
8
- export interface SSEEvent {
9
- event?: string
10
- data: string
11
- }
1
+ /**
2
+ * Public types for the brain runtime.
3
+ *
4
+ * Apps work with three high-level shapes:
5
+ *
6
+ * - `Message` — a user/assistant turn in a conversation. `content` is
7
+ * either a plain string or a list of `ContentBlock`s for richer
8
+ * payloads (cached blocks, images in a later slice).
9
+ *
10
+ * - `ChatOptions` — per-call knobs: model selection (explicit `model`
11
+ * or `tier` sugar), `system` prompt with optional cache flag,
12
+ * `maxTokens`, `thinking`, `effort`, etc.
13
+ *
14
+ * - `ChatResult` — what comes back from `chat()`: assistant `text`,
15
+ * `usage` (including cache hit/miss counters), `stopReason`, the
16
+ * `model` that actually answered, and a `raw` escape hatch with the
17
+ * provider's native response.
18
+ *
19
+ * The streaming side adds `StreamEvent` — a discriminated union of the
20
+ * events a provider emits while a response is being generated. V1
21
+ * covers text deltas, the final-message event, and `stopReason`;
22
+ * thinking blocks / tool-use streams land when those features ship.
23
+ */
12
24
 
13
- // ── Usage ────────────────────────────────────────────────────────────────────
25
+ /** Coarse-grained model tier. Sugar for "fast / balanced / powerful" without naming an SDK. */
26
+ export type ModelTier = 'fast' | 'balanced' | 'powerful'
14
27
 
15
- export interface Usage {
16
- inputTokens: number
17
- outputTokens: number
18
- totalTokens: number
28
+ /**
29
+ * A text content block. The `cache` flag lets apps mark long, stable
30
+ * prefixes for prompt caching; providers translate this to whatever
31
+ * cache mechanism their SDK exposes (Anthropic: `cache_control:
32
+ * {type: 'ephemeral'}`).
33
+ */
34
+ export interface TextBlock {
35
+ type: 'text'
36
+ text: string
37
+ /** Mark this block as a cache breakpoint. Default `false`. */
38
+ cache?: boolean
19
39
  }
20
40
 
21
- // ── Messages ─────────────────────────────────────────────────────────────────
22
-
23
- export interface ToolCall {
41
+ /**
42
+ * Provider-emitted tool-use block. Appears in `assistant`-role
43
+ * messages when the model decides to call a tool. `input` is the
44
+ * parsed JSON the model produced for the tool's `inputSchema`; apps
45
+ * that need to validate it (Zod, ajv, etc.) do so at the call site.
46
+ *
47
+ * The agentic loop creates a matching `ToolResultBlock` and appends
48
+ * it to the next `user`-role message before re-asking the model.
49
+ */
50
+ export interface ToolUseBlock {
51
+ type: 'tool_use'
52
+ /** Provider-assigned call id. The matching tool_result references this verbatim. */
24
53
  id: string
54
+ /** Tool name — matches a registered `Tool.name`. */
25
55
  name: string
26
- arguments: Record<string, unknown>
56
+ /** Parsed input the model produced. Apps validate against the tool's schema. */
57
+ input: unknown
27
58
  }
28
59
 
29
- export interface ContentBlock {
30
- type: 'text' | 'tool_use' | 'tool_result'
31
- text?: string
32
- id?: string
33
- name?: string
34
- input?: Record<string, unknown>
35
- toolUseId?: string
36
- content?: string
60
+ /**
61
+ * Result of executing a tool. Appended to a `user`-role message and
62
+ * fed back to the model. `content` is either a plain string (the
63
+ * common case) or a list of text blocks for richer payloads. Mark
64
+ * `isError: true` so the model knows the tool call failed and can
65
+ * adjust its approach.
66
+ */
67
+ export interface ToolResultBlock {
68
+ type: 'tool_result'
69
+ toolUseId: string
70
+ content: string | TextBlock[]
71
+ isError?: boolean
37
72
  }
38
73
 
74
+ export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock
75
+
76
+ /** A single conversation turn. `content` can be a bare string or a typed block list. */
39
77
  export interface Message {
40
- role: 'user' | 'assistant' | 'tool'
78
+ role: 'user' | 'assistant'
41
79
  content: string | ContentBlock[]
42
- toolCalls?: ToolCall[]
43
- toolCallId?: string
44
- }
45
-
46
- // ── Tool Definition ──────────────────────────────────────────────────────────
47
-
48
- export interface ToolDefinition {
49
- name: string
50
- description: string
51
- parameters: JsonSchema
52
- execute: (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>
53
- }
54
-
55
- // ── Completion Request / Response ────────────────────────────────────────────
56
-
57
- export interface CompletionRequest {
58
- model: string
59
- messages: Message[]
60
- system?: string
61
- tools?: ToolDefinition[]
62
- toolChoice?: 'auto' | 'required' | { name: string }
63
- maxTokens?: number
64
- temperature?: number
65
- schema?: JsonSchema
66
- stopSequences?: string[]
67
- }
68
-
69
- export interface CompletionResponse {
70
- id: string
71
- content: string
72
- toolCalls: ToolCall[]
73
- stopReason: 'end' | 'tool_use' | 'max_tokens' | 'stop_sequence'
74
- usage: Usage
75
- raw: unknown
76
- }
77
-
78
- // ── Streaming ────────────────────────────────────────────────────────────────
79
-
80
- export interface StreamChunk {
81
- type: 'text' | 'tool_start' | 'tool_delta' | 'tool_end' | 'usage' | 'done'
82
- text?: string
83
- toolCall?: Partial<ToolCall>
84
- toolIndex?: number
85
- usage?: Usage
86
- }
87
-
88
- // ── Output Schema ────────────────────────────────────────────────────────────
89
-
90
- /** A schema that optionally validates data via `.parse()` (e.g., Zod schema). */
91
- export interface OutputSchema {
92
- parse?: (data: unknown) => unknown
93
- [key: string]: unknown
94
- }
95
-
96
- // ── Agent ────────────────────────────────────────────────────────────────────
97
-
98
- export interface ToolCallRecord {
99
- name: string
100
- arguments: Record<string, unknown>
101
- result: unknown
102
- duration: number
103
80
  }
104
81
 
105
- export interface AgentResult<T = any> {
106
- data: T
107
- text: string
108
- toolCalls: ToolCallRecord[]
109
- messages: Message[]
110
- usage: Usage
111
- iterations: number
112
- }
113
-
114
- export interface AgentEvent {
115
- type: 'text' | 'tool_start' | 'tool_result' | 'iteration' | 'done' | 'suspended'
116
- text?: string
117
- toolCall?: ToolCallRecord
118
- iteration?: number
119
- result?: AgentResult
120
- suspended?: SuspendedRun
121
- }
122
-
123
- // ── Suspend / Resume ─────────────────────────────────────────────────────────
124
-
125
82
  /**
126
- * A JSON-serializable snapshot of an agent loop at the moment it suspended.
127
- *
128
- * All fields are plain data no functions, class instances, or cycles — so
129
- * the snapshot can be stringified, stored across a process boundary, and
130
- * later passed to `AgentRunner.resume()` to continue the run.
83
+ * The `system` prompt. Either a plain string (no cache) or a structured
84
+ * form that lets apps mark the prompt as cached. Apps that want
85
+ * fine-grained control over multi-block system prompts pass an array.
131
86
  */
132
- export interface SerializedAgentState {
133
- messages: Message[]
134
- allToolCalls: ToolCallRecord[]
135
- totalUsage: Usage
136
- iterations: number
137
- }
87
+ export type SystemPrompt =
88
+ | string
89
+ | { text: string; cache?: boolean }
90
+ | Array<{ text: string; cache?: boolean }>
138
91
 
139
92
  /**
140
- * Result of an agent run that was suspended before executing one or more
141
- * tool calls. The integrator is expected to obtain tool results out-of-band
142
- * (human approval, external system, queued job, etc.) and call
143
- * `AgentRunner.resume(state, toolResults)` to continue.
144
- *
145
- * `pendingToolCalls` contains the pending call that triggered suspension
146
- * plus any subsequent tool calls from the same batch that have not been
147
- * executed. Results must be supplied for each of them on resume so the
148
- * conversation remains well-formed for the provider.
93
+ * Per-call options. Generics are deliberately conservative apps
94
+ * don't usually need to type-narrow the provider response; the `raw`
95
+ * escape hatch in `ChatResult` is what they reach for when they need
96
+ * provider-specific fields.
149
97
  */
150
- export interface SuspendedRun {
151
- status: 'suspended'
152
- pendingToolCalls: ToolCall[]
153
- state: SerializedAgentState
154
- }
155
-
156
- /** Result of a pending tool call, supplied to `AgentRunner.resume()`. */
157
- export interface ToolCallResult {
158
- toolCallId: string
159
- result: unknown
160
- }
161
-
162
- // ── Workflow ──────────────────────────────────────────────────────────────────
163
-
164
- export interface WorkflowResult {
165
- results: Record<string, AgentResult>
166
- usage: Usage
167
- duration: number
168
- }
169
-
170
- // ── Embedding ────────────────────────────────────────────────────────────────
171
-
172
- export interface EmbeddingResponse {
173
- embeddings: number[][]
174
- model: string
175
- usage: { totalTokens: number }
176
- }
177
-
178
- // ── Transcription (Speech-to-Text) ───────────────────────────────────────────
179
-
180
- export interface TranscribeRequest {
181
- /** Audio bytes. Most STT endpoints cap at ~25MB; chunk longer recordings. */
182
- audio: Uint8Array | Blob
183
- /**
184
- * MIME type of the audio. Required for providers that infer format from
185
- * the multipart filename or rely on it for inline base64 (Gemini).
186
- * Examples: 'audio/m4a', 'audio/mpeg', 'audio/wav', 'audio/ogg',
187
- * 'audio/webm', 'audio/flac'.
188
- */
189
- contentType?: string
190
- /** Override the provider's default STT model. */
98
+ export interface ChatOptions {
99
+ /** Override the configured default model. Wins over `tier`. */
191
100
  model?: string
101
+ /** Sugar for selecting a model by tier. Resolved against `config.brain.tiers`. */
102
+ tier?: ModelTier
103
+ /** System prompt — typed shape supports prompt caching. */
104
+ system?: SystemPrompt
105
+ /** Hard ceiling on response tokens. Default `4096`. */
106
+ maxTokens?: number
192
107
  /**
193
- * BCP-47 language hint (e.g. 'th', 'en', 'zh'). Whisper accepts ISO-639-1
194
- * ('th'); Gemini uses BCP-47. Both improve accuracy when set; omit for
195
- * auto-detection.
108
+ * Adaptive thinking control. `'adaptive'` enables it; `'disabled'`
109
+ * (or omission) turns it off. On Opus 4.7 + 4.6 / Sonnet 4.6 this
110
+ * is the only supported thinking mode — `budget_tokens` is removed
111
+ * upstream and not exposed here.
196
112
  */
197
- language?: string
113
+ thinking?: 'adaptive' | 'disabled'
114
+ /** Effort hint. `low` / `medium` / `high` / `xhigh` / `max`. Defaults to provider's pick. */
115
+ effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max'
198
116
  /**
199
- * Optional priming prompt gives the model vocabulary or context to
200
- * bias toward (proper nouns, brand names, menu items, dialect markers).
201
- * Whisper uses this directly; Gemini incorporates it into the system
202
- * instruction.
117
+ * Top-level cache_control toggle. When `true` the provider asks the
118
+ * SDK to auto-cache the last cacheable block on every request.
119
+ * Defaults to `config.brain.cache.auto ?? false`.
203
120
  */
204
- prompt?: string
121
+ cache?: boolean
205
122
  /**
206
- * Filename to send in the multipart form (Whisper). Used to derive the
207
- * audio format on the server when `contentType` is missing. Defaults to
208
- * 'audio.bin' if not provided.
123
+ * Beta features to enable on this request. Pass through to the
124
+ * provider's beta-header machinery. Provider-specific.
209
125
  */
210
- filename?: string
211
- }
212
-
213
- export interface TranscriptionResponse {
214
- /** Transcribed text. */
215
- text: string
216
- /** Detected language, when the provider reports one. */
217
- language?: string
218
- /** Audio duration in seconds, when the provider reports one. */
219
- duration?: number
220
- /** Original provider response for callers that need provider-specific fields. */
221
- raw: unknown
222
- }
223
-
224
- // ── Provider ─────────────────────────────────────────────────────────────────
225
-
226
- export interface AIProvider {
227
- readonly name: string
228
- complete(request: CompletionRequest): Promise<CompletionResponse>
229
- stream(request: CompletionRequest): AsyncIterable<StreamChunk>
230
- embed?(input: string | string[], model?: string): Promise<EmbeddingResponse>
126
+ betas?: readonly string[]
231
127
  /**
232
- * Transcribe audio to text. Implemented by providers that expose a
233
- * speech-to-text endpoint (OpenAI Whisper, Google Gemini's multimodal
234
- * generateContent). Throws or remains undefined for providers without
235
- * STT (Anthropic at time of writing).
128
+ * Provider-specific overrides. `BrainManager.chat` selects the
129
+ * provider by config; this is the override for that.
236
130
  */
237
- transcribe?(request: TranscribeRequest): Promise<TranscriptionResponse>
131
+ provider?: string
238
132
  }
239
133
 
240
- // ── Hooks ────────────────────────────────────────────────────────────────────
241
-
242
- export type BeforeHook = (request: CompletionRequest) => void | Promise<void>
243
- export type AfterHook = (
244
- request: CompletionRequest,
245
- response: CompletionResponse
246
- ) => void | Promise<void>
247
-
248
- // ── Config ───────────────────────────────────────────────────────────────────
249
-
250
- export interface ProviderConfig {
251
- driver: string
252
- apiKey: string
253
- model: string
254
- baseUrl?: string
255
- maxTokens?: number
256
- temperature?: number
257
- maxRetries?: number
258
- retryBaseDelay?: number
134
+ /** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
135
+ export interface ChatUsage {
136
+ inputTokens: number
137
+ outputTokens: number
138
+ cacheReadTokens: number
139
+ cacheCreationTokens: number
259
140
  }
260
141
 
261
- export interface BrainConfig {
262
- default: string
263
- providers: Record<string, ProviderConfig>
264
- maxTokens: number
265
- temperature: number
266
- maxIterations: number
267
- memory?: import('./memory/types.ts').MemoryConfig
142
+ /**
143
+ * The provider's reply. `text` is the concatenated assistant text;
144
+ * `raw` is the provider's full native response shape for apps that
145
+ * need anything we don't surface (e.g. citation blocks, server-tool
146
+ * results once those ship).
147
+ */
148
+ export interface ChatResult<Raw = unknown> {
149
+ text: string
150
+ model: string
151
+ stopReason: string | null
152
+ usage: ChatUsage
153
+ raw: Raw
268
154
  }
269
155
 
270
- // ── Serialized Thread ────────────────────────────────────────────────────────
271
-
272
- export interface SerializedThread {
273
- messages: Message[]
274
- system?: string
275
- }
156
+ /**
157
+ * Streaming event union. V1 covers the text-delta + completion path
158
+ * apps want for chat-style UIs; thinking blocks and tool-use streams
159
+ * are reserved for later slices.
160
+ */
161
+ export type StreamEvent =
162
+ | { type: 'text'; delta: string }
163
+ | { type: 'stop'; stopReason: string | null; usage: ChatUsage }
package/CHANGELOG.md DELETED
@@ -1,44 +0,0 @@
1
- # Changelog
2
-
3
- ## 0.2.12
4
-
5
- ### Added
6
-
7
- - **GoogleProvider** — Support for Google's Gemini models
8
- - Native Gemini API integration using `generativelanguage.googleapis.com`
9
- - Support for completion, streaming, function calling, and embeddings
10
- - Models: `gemini-2.0-flash`, `gemini-2.5-flash`, `gemini-3-pro-preview`
11
- - Authentication via `x-goog-api-key` header
12
- - Zero new dependencies — uses raw `fetch()` following existing patterns
13
- - Comprehensive test suite with 29 tests covering all functionality
14
-
15
- ## 0.6.0
16
-
17
- ### Added
18
-
19
- - **Memory management** — three-tier conversation memory system for long-running threads
20
- - `thread.memory()` enables opt-in context window management
21
- - **Working memory** — recent messages within token budget
22
- - **Episodic memory** — LLM-generated summaries of compacted older messages
23
- - **Semantic memory** — structured facts extracted from conversation, injected into system prompt
24
- - `TokenCounter` — approximate token estimation per provider (~4 chars/token)
25
- - `ContextBudget` — budget allocation across system prompt, summaries, facts, and working messages
26
- - `MemoryManager` — orchestrates compaction and fact extraction
27
- - `SemanticMemory` — in-memory fact store with `<known_facts>` prompt injection
28
- - `SummarizeStrategy` — LLM-powered compaction with optional fact extraction
29
- - `SlidingWindowStrategy` — drop oldest messages without summarization
30
- - `InMemoryThreadStore` — default `ThreadStore` implementation for dev/testing
31
- - `ThreadStore` interface — pluggable persistence (implement for database-backed storage)
32
- - `BrainManager.useThreadStore()` — register a thread store for persistence
33
- - `BrainManager.memoryConfig` / `BrainManager.threadStore` — accessors for memory configuration
34
- - `thread.id()` — set thread identifier for persistence
35
- - `thread.persist()` — enable auto-save to ThreadStore after each `send()`
36
- - `thread.facts` / `thread.episodicSummary` — access memory state
37
- - `thread.serializeMemory()` / `thread.restoreMemory()` — extended serialization with memory state
38
- - `BrainConfig.memory` — optional `MemoryConfig` field for global memory settings
39
-
40
- ## 0.1.1
41
-
42
- ### Changed
43
-
44
- - Applied consistent code formatting across all source files