@strav/brain 1.0.0-alpha.8 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +23 -7
- package/src/agent.ts +97 -0
- package/src/agent_generate_result.ts +32 -0
- package/src/agent_result.ts +39 -0
- package/src/agent_runner.ts +265 -0
- package/src/agent_stream_event.ts +100 -0
- package/src/brain_config.ts +218 -1
- package/src/brain_driver.ts +247 -0
- package/src/brain_error.ts +86 -10
- package/src/brain_manager.ts +419 -5
- package/src/brain_provider.ts +89 -10
- package/src/define_tool.ts +42 -0
- package/src/drivers/anthropic/anthropic_brain_driver.ts +641 -0
- package/src/drivers/anthropic/anthropic_helpers.ts +65 -0
- package/src/drivers/anthropic/anthropic_message_builder.ts +258 -0
- package/src/drivers/anthropic/anthropic_response_mapper.ts +123 -0
- package/src/drivers/anthropic/anthropic_tool_loop.ts +246 -0
- package/src/drivers/anthropic/index.ts +1 -0
- package/src/drivers/deepseek/deepseek_brain_driver.ts +117 -0
- package/src/drivers/deepseek/index.ts +1 -0
- package/src/drivers/gemini/gemini_brain_driver.ts +1064 -0
- package/src/drivers/gemini/index.ts +1 -0
- package/src/drivers/minimax/index.ts +1 -0
- package/src/drivers/minimax/minimax_brain_driver.ts +84 -0
- package/src/drivers/ollama/index.ts +1 -0
- package/src/drivers/ollama/ollama_brain_driver.ts +86 -0
- package/src/drivers/openai/index.ts +1 -0
- package/src/drivers/openai/openai_brain_driver.ts +796 -0
- package/src/drivers/openai/openai_helpers.ts +58 -0
- package/src/drivers/openai/openai_message_builder.ts +187 -0
- package/src/drivers/openai/openai_response_mapper.ts +70 -0
- package/src/drivers/openai/openai_tool_dispatch.ts +127 -0
- package/src/drivers/openai/openai_tool_loop.ts +191 -0
- package/src/drivers/openai_compat/index.ts +1 -0
- package/src/drivers/openai_compat/openai_compat_brain_driver.ts +616 -0
- package/src/drivers/openai_responses/index.ts +1 -0
- package/src/drivers/openai_responses/openai_responses_brain_driver.ts +1015 -0
- package/src/drivers/openrouter/index.ts +1 -0
- package/src/drivers/openrouter/openrouter_brain_driver.ts +137 -0
- package/src/drivers/qwen/index.ts +1 -0
- package/src/drivers/qwen/qwen_brain_driver.ts +103 -0
- package/src/index.ts +86 -8
- package/src/mcp/client.ts +243 -0
- package/src/mcp/index.ts +23 -0
- package/src/mcp/oauth.ts +227 -0
- package/src/mcp/pool.ts +106 -0
- package/src/mcp/resolve_mcp_tools.ts +108 -0
- package/src/mcp_server.ts +63 -0
- package/src/output_schema.ts +72 -0
- package/src/persistence/brain_message.ts +34 -0
- package/src/persistence/brain_message_repository.ts +98 -0
- package/src/persistence/brain_store.ts +166 -0
- package/src/persistence/brain_suspended_run.ts +30 -0
- package/src/persistence/brain_suspended_run_repository.ts +59 -0
- package/src/persistence/brain_thread.ts +30 -0
- package/src/persistence/brain_thread_repository.ts +56 -0
- package/src/persistence/database_brain_store.ts +190 -0
- package/src/persistence/index.ts +48 -0
- package/src/persistence/schemas/brain_message_schema.ts +61 -0
- package/src/persistence/schemas/brain_suspended_run_schema.ts +58 -0
- package/src/persistence/schemas/brain_thread_schema.ts +50 -0
- package/src/persistence/schemas/index.ts +3 -0
- package/src/suspended_run.ts +153 -0
- package/src/thread.ts +40 -1
- package/src/tool.ts +42 -0
- package/src/tool_execution_error.ts +26 -0
- package/src/tool_runner.ts +81 -0
- package/src/translate/index.ts +19 -0
- package/src/translate/translate_cache.ts +78 -0
- package/src/translate/translate_provider.ts +46 -0
- package/src/translate/translator.ts +271 -0
- package/src/types.ts +431 -1
- package/src/zod/index.ts +121 -0
- package/src/provider.ts +0 -48
- package/src/providers/anthropic_provider.ts +0 -227
package/src/types.ts
CHANGED
|
@@ -38,7 +38,180 @@ export interface TextBlock {
|
|
|
38
38
|
cache?: boolean
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
/**
|
|
42
|
+
* Provider-emitted tool-use block. Appears in `assistant`-role
|
|
43
|
+
* messages when the model decides to call a tool. `input` is the
|
|
44
|
+
* parsed JSON the model produced for the tool's `inputSchema`; apps
|
|
45
|
+
* that need to validate it (Zod, ajv, etc.) do so at the call site.
|
|
46
|
+
*
|
|
47
|
+
* The agentic loop creates a matching `ToolResultBlock` and appends
|
|
48
|
+
* it to the next `user`-role message before re-asking the model.
|
|
49
|
+
*/
|
|
50
|
+
export interface ToolUseBlock {
|
|
51
|
+
type: 'tool_use'
|
|
52
|
+
/** Provider-assigned call id. The matching tool_result references this verbatim. */
|
|
53
|
+
id: string
|
|
54
|
+
/** Tool name — matches a registered `Tool.name`. */
|
|
55
|
+
name: string
|
|
56
|
+
/** Parsed input the model produced. Apps validate against the tool's schema. */
|
|
57
|
+
input: unknown
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Result of executing a tool. Appended to a `user`-role message and
|
|
62
|
+
* fed back to the model. `content` is either a plain string (the
|
|
63
|
+
* common case) or a list of text blocks for richer payloads. Mark
|
|
64
|
+
* `isError: true` so the model knows the tool call failed and can
|
|
65
|
+
* adjust its approach.
|
|
66
|
+
*/
|
|
67
|
+
export interface ToolResultBlock {
|
|
68
|
+
type: 'tool_result'
|
|
69
|
+
toolUseId: string
|
|
70
|
+
content: string | TextBlock[]
|
|
71
|
+
isError?: boolean
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Provider-emitted MCP tool-use block. Read-only — apps don't construct
|
|
76
|
+
* these; they appear in `assistant`-role messages when the model calls
|
|
77
|
+
* a tool exposed by a configured MCP server. Anthropic's backend
|
|
78
|
+
* invokes the MCP server itself and inlines the result as an
|
|
79
|
+
* `MCPToolResultBlock` in the same response, so the framework's
|
|
80
|
+
* agentic loop doesn't need to handle the call.
|
|
81
|
+
*
|
|
82
|
+
* Apps render these for observability (showing users that the model
|
|
83
|
+
* consulted Linear / Notion / GitHub via MCP) and for audit trails.
|
|
84
|
+
*/
|
|
85
|
+
export interface MCPToolUseBlock {
|
|
86
|
+
type: 'mcp_tool_use'
|
|
87
|
+
id: string
|
|
88
|
+
/** MCP server identifier — matches `MCPServer.name`. */
|
|
89
|
+
serverName: string
|
|
90
|
+
/** Tool name as exposed by the MCP server. */
|
|
91
|
+
name: string
|
|
92
|
+
/** Parsed input the model passed to the MCP tool. */
|
|
93
|
+
input: unknown
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Provider-emitted MCP tool result. Pairs with `MCPToolUseBlock` by
|
|
98
|
+
* `toolUseId`. `content` is either a string or text blocks; `isError`
|
|
99
|
+
* is `true` when the MCP server returned an error.
|
|
100
|
+
*/
|
|
101
|
+
export interface MCPToolResultBlock {
|
|
102
|
+
type: 'mcp_tool_result'
|
|
103
|
+
toolUseId: string
|
|
104
|
+
content: string | TextBlock[]
|
|
105
|
+
isError?: boolean
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Image input — attaches a picture to a user message so vision-
|
|
110
|
+
* capable models can see it alongside the text. V1 covers images
|
|
111
|
+
* only; audio + video defer.
|
|
112
|
+
*
|
|
113
|
+
* `source` is a discriminated union:
|
|
114
|
+
* - `{ type: 'base64', mediaType, data }` — inline bytes for
|
|
115
|
+
* uploads, screenshots, attachments your app already holds in
|
|
116
|
+
* memory. `mediaType` is the IANA MIME (`image/png`,
|
|
117
|
+
* `image/jpeg`, `image/webp`, `image/gif`); `data` is the
|
|
118
|
+
* base64-encoded image (no `data:` prefix — the provider
|
|
119
|
+
* translation adds it where needed).
|
|
120
|
+
* - `{ type: 'url', url }` — remote image URL. Anthropic, OpenAI,
|
|
121
|
+
* and Gemini all accept HTTPS URLs; check the provider's
|
|
122
|
+
* domain allowlist if calls 404 (Anthropic was historically
|
|
123
|
+
* stricter). For Gemini, GCS URIs (`gs://...`) also work.
|
|
124
|
+
*
|
|
125
|
+
* Vision support is provider- AND model-dependent. Cloud picks:
|
|
126
|
+
* Anthropic Claude 4 family, OpenAI gpt-4o / gpt-5 family, Gemini
|
|
127
|
+
* 2.x. Local: `llama3.2-vision`, `llava`, `qwen2.5-vl` on Ollama.
|
|
128
|
+
* Models without vision either reject the call or ignore the image.
|
|
129
|
+
*/
|
|
130
|
+
export interface ImageBlock {
|
|
131
|
+
type: 'image'
|
|
132
|
+
source:
|
|
133
|
+
| { type: 'base64'; mediaType: string; data: string }
|
|
134
|
+
| { type: 'url'; url: string }
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Document input — attaches a PDF (V1 only — the providers that
|
|
139
|
+
* support documents currently all gate on `application/pdf`) to a
|
|
140
|
+
* user message. Anthropic surfaces it as a native `document` block;
|
|
141
|
+
* Gemini accepts it via `inlineData` / `fileData` with
|
|
142
|
+
* `application/pdf` mime; OpenAI / Ollama / DeepSeek don't support
|
|
143
|
+
* PDF blocks at all (apps split the PDF to images and use
|
|
144
|
+
* `ImageBlock`s for those vendors).
|
|
145
|
+
*
|
|
146
|
+
* The optional `title` is shown to the model on Anthropic (helpful
|
|
147
|
+
* for multi-document calls — "the contract", "the invoice"); other
|
|
148
|
+
* providers ignore it.
|
|
149
|
+
*/
|
|
150
|
+
export interface DocumentBlock {
|
|
151
|
+
type: 'document'
|
|
152
|
+
source:
|
|
153
|
+
| { type: 'base64'; mediaType: string; data: string }
|
|
154
|
+
| { type: 'url'; url: string }
|
|
155
|
+
/** Optional title shown to the model (Anthropic uses it; others ignore). */
|
|
156
|
+
title?: string
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Audio input — attaches a sound clip to a user message. V1
|
|
161
|
+
* coverage: Gemini supports audio natively via `inlineData` with
|
|
162
|
+
* audio MIMEs (`audio/mp3`, `audio/wav`, `audio/ogg`, `audio/flac`,
|
|
163
|
+
* `audio/webm`, `audio/aac`). Anthropic + OpenAI + Ollama don't
|
|
164
|
+
* accept audio in their chat APIs — OpenAI apps preprocess via
|
|
165
|
+
* Whisper; Anthropic apps wait for the audio block to land in the
|
|
166
|
+
* SDK; Ollama apps that need audio look at server-side
|
|
167
|
+
* transcription models.
|
|
168
|
+
*/
|
|
169
|
+
export interface AudioBlock {
|
|
170
|
+
type: 'audio'
|
|
171
|
+
source:
|
|
172
|
+
| { type: 'base64'; mediaType: string; data: string }
|
|
173
|
+
| { type: 'url'; url: string }
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Server-side compaction block. Anthropic's `compact-2026-01-12`
|
|
178
|
+
* beta returns a `compaction` block when an auto-compaction trigger
|
|
179
|
+
* fires during a request. The framework surfaces it on
|
|
180
|
+
* `result.content` and Thread persists it on the assistant turn so
|
|
181
|
+
* subsequent requests echo it back verbatim — the model only sees
|
|
182
|
+
* the summary + opaque blob from then on, and the older raw turns
|
|
183
|
+
* stay out of context.
|
|
184
|
+
*
|
|
185
|
+
* V1 produces these on Anthropic only. Other providers ignore the
|
|
186
|
+
* `compact` option silently, and never emit a `CompactionBlock`.
|
|
187
|
+
*
|
|
188
|
+
* Round-trip invariant: pass the block back unchanged. The
|
|
189
|
+
* `encryptedContent` blob is opaque metadata the server uses to
|
|
190
|
+
* stitch the compaction history together; the framework never
|
|
191
|
+
* mutates it.
|
|
192
|
+
*
|
|
193
|
+
* `content === null` means a compaction attempt failed (e.g.,
|
|
194
|
+
* malformed model output). The server treats these as no-ops on
|
|
195
|
+
* the next request, so apps don't need to special-case them.
|
|
196
|
+
*/
|
|
197
|
+
export interface CompactionBlock {
|
|
198
|
+
type: 'compaction'
|
|
199
|
+
/** Summary of compacted content. Null when compaction failed. */
|
|
200
|
+
content: string | null
|
|
201
|
+
/** Opaque metadata round-tripped verbatim on subsequent requests. */
|
|
202
|
+
encryptedContent: string | null
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export type ContentBlock =
|
|
206
|
+
| TextBlock
|
|
207
|
+
| ImageBlock
|
|
208
|
+
| DocumentBlock
|
|
209
|
+
| AudioBlock
|
|
210
|
+
| ToolUseBlock
|
|
211
|
+
| ToolResultBlock
|
|
212
|
+
| MCPToolUseBlock
|
|
213
|
+
| MCPToolResultBlock
|
|
214
|
+
| CompactionBlock
|
|
42
215
|
|
|
43
216
|
/** A single conversation turn. `content` can be a bare string or a typed block list. */
|
|
44
217
|
export interface Message {
|
|
@@ -62,6 +235,85 @@ export type SystemPrompt =
|
|
|
62
235
|
* escape hatch in `ChatResult` is what they reach for when they need
|
|
63
236
|
* provider-specific fields.
|
|
64
237
|
*/
|
|
238
|
+
/**
|
|
239
|
+
* Server-side tool — work the provider's backend runs on behalf
|
|
240
|
+
* of the model. Unlike framework-local tools (`Tool` /
|
|
241
|
+
* `defineTool`), the model's call doesn't round-trip through
|
|
242
|
+
* the app's process; the provider executes the tool and inlines
|
|
243
|
+
* the result in the response.
|
|
244
|
+
*
|
|
245
|
+
* V1 coverage:
|
|
246
|
+
* - **Anthropic**: `web_search`, `code_execution`, `web_fetch`.
|
|
247
|
+
* - **Gemini**: `web_search` (Google Search), `code_execution`,
|
|
248
|
+
* `url_context`.
|
|
249
|
+
* - **OpenAI / DeepSeek / Ollama**: throw — OpenAI's server tools
|
|
250
|
+
* live on the Responses API (separate slice); the compat
|
|
251
|
+
* providers don't expose them.
|
|
252
|
+
*
|
|
253
|
+
* Cross-provider portability:
|
|
254
|
+
* - `web_search` + `code_execution` work on both Anthropic and
|
|
255
|
+
* Gemini.
|
|
256
|
+
* - `web_fetch` is Anthropic-only.
|
|
257
|
+
* - `url_context` is Gemini-only.
|
|
258
|
+
*
|
|
259
|
+
* Server tools combine freely with framework-local `Tool[]` and
|
|
260
|
+
* MCP servers — the model sees all three sets in one tool list.
|
|
261
|
+
*/
|
|
262
|
+
export type ServerTool =
|
|
263
|
+
| {
|
|
264
|
+
type: 'web_search'
|
|
265
|
+
/** Max times the model can call this tool per turn (Anthropic; Gemini ignores). */
|
|
266
|
+
maxUses?: number
|
|
267
|
+
/** Domain allowlist (Anthropic; Gemini ignores). Mutually exclusive with `blockedDomains`. */
|
|
268
|
+
allowedDomains?: readonly string[]
|
|
269
|
+
/** Domain blocklist (Anthropic; Gemini ignores). */
|
|
270
|
+
blockedDomains?: readonly string[]
|
|
271
|
+
}
|
|
272
|
+
| { type: 'code_execution' }
|
|
273
|
+
| {
|
|
274
|
+
type: 'web_fetch'
|
|
275
|
+
/** Max URL fetches per turn (Anthropic). */
|
|
276
|
+
maxUses?: number
|
|
277
|
+
/** Domain allowlist. */
|
|
278
|
+
allowedDomains?: readonly string[]
|
|
279
|
+
/** Domain blocklist. */
|
|
280
|
+
blockedDomains?: readonly string[]
|
|
281
|
+
}
|
|
282
|
+
| {
|
|
283
|
+
type: 'url_context'
|
|
284
|
+
/** Gemini fetches the URL and surfaces grounded answers from it. */
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Per-call compaction configuration. Maps to Anthropic's
|
|
289
|
+
* `compact-2026-01-12` beta `edits[]` entry. All fields optional —
|
|
290
|
+
* omitting one falls back to the server's default (trigger:
|
|
291
|
+
* 150,000 input tokens; no extra instructions; no pause).
|
|
292
|
+
*/
|
|
293
|
+
export interface CompactConfig {
|
|
294
|
+
/**
|
|
295
|
+
* Trigger threshold in input tokens. Compaction fires once the
|
|
296
|
+
* conversation crosses this token count. Default 150,000 — same
|
|
297
|
+
* as the server-side default.
|
|
298
|
+
*/
|
|
299
|
+
trigger?: number
|
|
300
|
+
/**
|
|
301
|
+
* Extra hint to the summarization model. Useful for biasing the
|
|
302
|
+
* compaction toward what your app actually cares to preserve
|
|
303
|
+
* ("keep all customer ids referenced", "preserve every diff
|
|
304
|
+
* hunk", ...).
|
|
305
|
+
*/
|
|
306
|
+
instructions?: string
|
|
307
|
+
/**
|
|
308
|
+
* When `true`, the server returns the compaction block in-line
|
|
309
|
+
* but does NOT continue generation — the next assistant turn
|
|
310
|
+
* waits for an explicit re-prompt. Apps that want to inspect or
|
|
311
|
+
* gate compaction set this; default `false` (compaction is
|
|
312
|
+
* transparent).
|
|
313
|
+
*/
|
|
314
|
+
pauseAfterCompaction?: boolean
|
|
315
|
+
}
|
|
316
|
+
|
|
65
317
|
export interface ChatOptions {
|
|
66
318
|
/** Override the configured default model. Wins over `tier`. */
|
|
67
319
|
model?: string
|
|
@@ -96,6 +348,56 @@ export interface ChatOptions {
|
|
|
96
348
|
* provider by config; this is the override for that.
|
|
97
349
|
*/
|
|
98
350
|
provider?: string
|
|
351
|
+
/**
|
|
352
|
+
* Cancel the in-flight operation. Aborting between iterations of
|
|
353
|
+
* a tool loop bails before the next model call; aborting mid-call
|
|
354
|
+
* propagates the SDK's native abort error (typically a `DOMException`
|
|
355
|
+
* with `name: 'AbortError'`). Streaming iterators reject on the
|
|
356
|
+
* next `for await` step.
|
|
357
|
+
*/
|
|
358
|
+
signal?: AbortSignal
|
|
359
|
+
/**
|
|
360
|
+
* Server-side tools — work the provider's backend runs (web
|
|
361
|
+
* search, code execution, URL fetching). The model's calls
|
|
362
|
+
* don't round-trip through the framework's tool loop; results
|
|
363
|
+
* land inline in the response. Combines freely with
|
|
364
|
+
* framework-local `Tool[]` and MCP servers.
|
|
365
|
+
*
|
|
366
|
+
* V1 supports Anthropic + Gemini; OpenAI / DeepSeek / Ollama
|
|
367
|
+
* throw `BrainError` (use the Responses API for OpenAI, or
|
|
368
|
+
* route to Anthropic / Gemini).
|
|
369
|
+
*/
|
|
370
|
+
serverTools?: readonly ServerTool[]
|
|
371
|
+
/**
|
|
372
|
+
* Server-side conversation compaction. When set, the provider
|
|
373
|
+
* auto-summarizes the older part of the message history once the
|
|
374
|
+
* `trigger` token threshold is reached; the summary lives on the
|
|
375
|
+
* response as a `CompactionBlock` that apps round-trip on
|
|
376
|
+
* subsequent requests (Thread does this automatically). Saves
|
|
377
|
+
* tokens on long threads without lossy client-side pruning.
|
|
378
|
+
*
|
|
379
|
+
* Only honored by `AnthropicBrainDriver` (driver `'anthropic'`),
|
|
380
|
+
* via the `compact-2026-01-12` beta. Silently ignored by every
|
|
381
|
+
* other provider so apps targeting multiple providers with the
|
|
382
|
+
* same options object don't have to special-case.
|
|
383
|
+
*/
|
|
384
|
+
compact?: CompactConfig
|
|
385
|
+
/**
|
|
386
|
+
* Stateful conversation pointer — OpenAI Responses API. When set,
|
|
387
|
+
* the provider sends only the new turn(s); the server picks up
|
|
388
|
+
* from the prior `Response` identified by this id and replays
|
|
389
|
+
* the conversation server-side. Saves tokens on long threads.
|
|
390
|
+
*
|
|
391
|
+
* Only honored by `OpenAIResponsesBrainDriver` (driver
|
|
392
|
+
* `'openai-responses'`); silently ignored by every other provider
|
|
393
|
+
* — apps that target multiple providers with the same options
|
|
394
|
+
* object don't have to special-case.
|
|
395
|
+
*
|
|
396
|
+
* Pair with `ChatResult.responseId` (returned by every call) to
|
|
397
|
+
* thread the conversation forward. `Thread` does this
|
|
398
|
+
* automatically when its underlying provider supports it.
|
|
399
|
+
*/
|
|
400
|
+
previousResponseId?: string
|
|
99
401
|
}
|
|
100
402
|
|
|
101
403
|
/** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
|
|
@@ -118,6 +420,24 @@ export interface ChatResult<Raw = unknown> {
|
|
|
118
420
|
stopReason: string | null
|
|
119
421
|
usage: ChatUsage
|
|
120
422
|
raw: Raw
|
|
423
|
+
/**
|
|
424
|
+
* Structured assistant content blocks — populated when the model
|
|
425
|
+
* emitted more than plain text on this turn (compaction blocks
|
|
426
|
+
* today; reasoning blocks once those surface). Apps that
|
|
427
|
+
* persist the conversation (`Thread`, custom stores) push this
|
|
428
|
+
* onto the message history when present so round-trippable
|
|
429
|
+
* blocks survive subsequent requests. Undefined when the turn
|
|
430
|
+
* was plain text only.
|
|
431
|
+
*/
|
|
432
|
+
content?: ContentBlock[]
|
|
433
|
+
/**
|
|
434
|
+
* Provider response id when the provider exposes stateful
|
|
435
|
+
* conversations (currently OpenAI Responses API). Apps thread
|
|
436
|
+
* this forward via `ChatOptions.previousResponseId` so the
|
|
437
|
+
* server replays prior turns without re-sending them.
|
|
438
|
+
* Undefined for providers that don't support the pattern.
|
|
439
|
+
*/
|
|
440
|
+
responseId?: string
|
|
121
441
|
}
|
|
122
442
|
|
|
123
443
|
/**
|
|
@@ -128,3 +448,113 @@ export interface ChatResult<Raw = unknown> {
|
|
|
128
448
|
export type StreamEvent =
|
|
129
449
|
| { type: 'text'; delta: string }
|
|
130
450
|
| { type: 'stop'; stopReason: string | null; usage: ChatUsage }
|
|
451
|
+
|
|
452
|
+
/**
|
|
453
|
+
* Per-call options for `brain.embed(...)`. Only the embed-relevant
|
|
454
|
+
* subset of `ChatOptions` — chat-specific knobs (system prompt,
|
|
455
|
+
* thinking, cache, tools) don't apply.
|
|
456
|
+
*/
|
|
457
|
+
export interface EmbedOptions {
|
|
458
|
+
/** Override the configured default embedding model. */
|
|
459
|
+
model?: string
|
|
460
|
+
/**
|
|
461
|
+
* Override the default provider. Must name a provider that
|
|
462
|
+
* implements `embed` (V1: OpenAI, Gemini, Ollama; Anthropic +
|
|
463
|
+
* DeepSeek throw with a clear "route to a different provider"
|
|
464
|
+
* message).
|
|
465
|
+
*/
|
|
466
|
+
provider?: string
|
|
467
|
+
/**
|
|
468
|
+
* Optional dimensionality hint. OpenAI passes through as
|
|
469
|
+
* `dimensions`; Gemini as `outputDimensionality`. Providers
|
|
470
|
+
* that ignore it silently drop the field.
|
|
471
|
+
*/
|
|
472
|
+
dimensions?: number
|
|
473
|
+
/** Cancellation signal — same shape as `ChatOptions.signal`. */
|
|
474
|
+
signal?: AbortSignal
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
/**
|
|
478
|
+
* Per-call options for `brain.transcribe(...)`.
|
|
479
|
+
*/
|
|
480
|
+
export interface TranscribeOptions {
|
|
481
|
+
/** Override the configured default transcription model. */
|
|
482
|
+
model?: string
|
|
483
|
+
/**
|
|
484
|
+
* Override the default provider. Must name a provider that
|
|
485
|
+
* implements `transcribe` (V1: OpenAI / Gemini / Ollama;
|
|
486
|
+
* Anthropic + DeepSeek throw).
|
|
487
|
+
*/
|
|
488
|
+
provider?: string
|
|
489
|
+
/**
|
|
490
|
+
* Optional BCP-47 language hint (`en`, `fr`, `ja`). Improves
|
|
491
|
+
* accuracy when known; models without hint support ignore.
|
|
492
|
+
*/
|
|
493
|
+
language?: string
|
|
494
|
+
/**
|
|
495
|
+
* Optional bias prompt to steer vocabulary / style / formatting.
|
|
496
|
+
* OpenAI calls this `prompt`; Gemini-via-chat threads it into
|
|
497
|
+
* the system message; others ignore.
|
|
498
|
+
*/
|
|
499
|
+
prompt?: string
|
|
500
|
+
/** Cancellation signal — same shape as `ChatOptions.signal`. */
|
|
501
|
+
signal?: AbortSignal
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Audio source — same discriminated union as
|
|
506
|
+
* `AudioBlock.source`, named separately for `transcribe(...)`
|
|
507
|
+
* which takes it directly (no wrapping `AudioBlock` shell).
|
|
508
|
+
*/
|
|
509
|
+
export type AudioSource =
|
|
510
|
+
| { type: 'base64'; mediaType: string; data: string }
|
|
511
|
+
| { type: 'url'; url: string }
|
|
512
|
+
|
|
513
|
+
/**
|
|
514
|
+
* Result of one `transcribe` call. `text` is the transcribed
|
|
515
|
+
* audio; `language` / `duration` are surfaced when the provider
|
|
516
|
+
* returns them (OpenAI does on the `verbose_json` response
|
|
517
|
+
* format; Gemini's chat-wrap path doesn't). `raw` is the
|
|
518
|
+
* provider's full native response for fields the framework
|
|
519
|
+
* doesn't surface.
|
|
520
|
+
*/
|
|
521
|
+
export interface TranscribeResult<Raw = unknown> {
|
|
522
|
+
text: string
|
|
523
|
+
model: string
|
|
524
|
+
/** BCP-47 detected (or echoed) language. Optional. */
|
|
525
|
+
language?: string
|
|
526
|
+
/** Audio duration in seconds. Optional. */
|
|
527
|
+
duration?: number
|
|
528
|
+
raw: Raw
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Result of one `embed` call. `embeddings[i]` is the vector for
|
|
533
|
+
* the i-th input text. `model` is the model the provider used
|
|
534
|
+
* (echoed back for logging). `usage.inputTokens` is the total
|
|
535
|
+
* tokens consumed across all inputs.
|
|
536
|
+
*/
|
|
537
|
+
export interface EmbedResult<Raw = unknown> {
|
|
538
|
+
embeddings: number[][]
|
|
539
|
+
model: string
|
|
540
|
+
usage: { inputTokens: number }
|
|
541
|
+
/** Provider's full native response — escape hatch for fields the framework doesn't surface. */
|
|
542
|
+
raw: Raw
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
/**
|
|
546
|
+
* Result of a structured-output call. `value` is the parsed JSON
|
|
547
|
+
* shaped to the `OutputSchema<T>` passed in. `text` is the raw JSON
|
|
548
|
+
* string the model produced (useful for logging / debugging when
|
|
549
|
+
* `parse` rejects). `raw` is the provider's full native response.
|
|
550
|
+
*/
|
|
551
|
+
export interface GenerateResult<T = unknown, Raw = unknown> {
|
|
552
|
+
value: T
|
|
553
|
+
text: string
|
|
554
|
+
model: string
|
|
555
|
+
stopReason: string | null
|
|
556
|
+
usage: ChatUsage
|
|
557
|
+
raw: Raw
|
|
558
|
+
/** See `ChatResult.responseId`. */
|
|
559
|
+
responseId?: string
|
|
560
|
+
}
|
package/src/zod/index.ts
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@strav/brain/zod` — Zod-flavored helpers on top of the
|
|
3
|
+
* schema-library-agnostic core.
|
|
4
|
+
*
|
|
5
|
+
* The default `@strav/brain` import deliberately doesn't depend on
|
|
6
|
+
* Zod — `Tool.inputSchema` and `OutputSchema.jsonSchema` are plain
|
|
7
|
+
* JSON Schema so apps stay free to pick Ajv, Valibot, ArkType, or
|
|
8
|
+
* nothing at all. This sub-path opt-in adds two thin wrappers for
|
|
9
|
+
* apps that already use Zod:
|
|
10
|
+
*
|
|
11
|
+
* - `outputSchema(z, opts?)` turns a Zod schema into an
|
|
12
|
+
* `OutputSchema<z.infer<typeof z>>` — `jsonSchema` is derived
|
|
13
|
+
* via Zod's built-in `z.toJSONSchema`, and `parse` is wired to
|
|
14
|
+
* `z.parse`. Apps then pass the result straight to
|
|
15
|
+
* `BrainManager.generate(input, schema)`.
|
|
16
|
+
*
|
|
17
|
+
* - `tool({ name, description, input, execute })` turns a Zod
|
|
18
|
+
* schema for the tool's input into a framework `Tool` — the
|
|
19
|
+
* wrapper validates the model's raw input through the Zod
|
|
20
|
+
* schema before calling the app's `execute`. Apps get inferred
|
|
21
|
+
* types on `execute(input)` for free.
|
|
22
|
+
*
|
|
23
|
+
* `zod` is an optional peer dependency. Apps that don't use Zod
|
|
24
|
+
* don't install it, don't bundle it, and never import this
|
|
25
|
+
* sub-path — they keep using `defineTool` / hand-written
|
|
26
|
+
* `OutputSchema` literals with raw JSON Schema.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { z } from 'zod'
|
|
30
|
+
import type { OutputSchema } from '../output_schema.ts'
|
|
31
|
+
import type { Tool, ToolContext } from '../tool.ts'
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Options for `outputSchema`. `name` defaults to `'output'` —
|
|
35
|
+
* apps that surface multiple schemas in logs or to OpenAI's wire
|
|
36
|
+
* format should pass a stable, descriptive identifier.
|
|
37
|
+
*/
|
|
38
|
+
export interface OutputSchemaOptions {
|
|
39
|
+
/** Identifier — defaults to `'output'`. */
|
|
40
|
+
name?: string
|
|
41
|
+
/** Optional model-facing hint. Defaults to the Zod schema's `.describe(…)` if set. */
|
|
42
|
+
description?: string
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Build an `OutputSchema<T>` from a Zod schema. The returned shape
|
|
47
|
+
* is ready to pass to `BrainManager.generate(...)`.
|
|
48
|
+
*
|
|
49
|
+
* ```ts
|
|
50
|
+
* const CityZ = z.object({ city: z.string(), population: z.number().int() })
|
|
51
|
+
* const { value } = await brain.generate('Capital of France?', outputSchema(CityZ, { name: 'city_answer' }))
|
|
52
|
+
* // ^? { city: string; population: number }
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export function outputSchema<T>(
|
|
56
|
+
schema: z.ZodType<T>,
|
|
57
|
+
options: OutputSchemaOptions = {},
|
|
58
|
+
): OutputSchema<T> {
|
|
59
|
+
const description = options.description ?? zodDescription(schema)
|
|
60
|
+
const result: OutputSchema<T> = {
|
|
61
|
+
name: options.name ?? 'output',
|
|
62
|
+
jsonSchema: z.toJSONSchema(schema) as Record<string, unknown>,
|
|
63
|
+
parse: (value) => schema.parse(value),
|
|
64
|
+
}
|
|
65
|
+
if (description !== undefined) result.description = description
|
|
66
|
+
return result
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Spec passed to `tool(...)`. `execute` receives the model's input
|
|
71
|
+
* already validated + typed against `input` — no need to call
|
|
72
|
+
* `input.parse` manually.
|
|
73
|
+
*/
|
|
74
|
+
export interface ZodToolSpec<TInput, TOutput> {
|
|
75
|
+
name: string
|
|
76
|
+
description: string
|
|
77
|
+
input: z.ZodType<TInput>
|
|
78
|
+
execute(input: TInput, ctx: ToolContext): Promise<TOutput>
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Build a framework `Tool` from a Zod-typed spec. The wrapper
|
|
83
|
+
* derives `inputSchema` via `z.toJSONSchema` and validates the
|
|
84
|
+
* model's raw input through `input.parse` before delegating to
|
|
85
|
+
* `execute`. Validation failures propagate as `ZodError`; the
|
|
86
|
+
* agentic loop wraps that into a `ToolExecutionError`.
|
|
87
|
+
*
|
|
88
|
+
* ```ts
|
|
89
|
+
* const search = tool({
|
|
90
|
+
* name: 'search_orders',
|
|
91
|
+
* description: 'Look up an order by id.',
|
|
92
|
+
* input: z.object({ orderId: z.string() }),
|
|
93
|
+
* async execute({ orderId }, ctx) {
|
|
94
|
+
* // ^? { orderId: string }
|
|
95
|
+
* return await orders.find(orderId, ctx.context)
|
|
96
|
+
* },
|
|
97
|
+
* })
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
export function tool<TInput, TOutput>(
|
|
101
|
+
spec: ZodToolSpec<TInput, TOutput>,
|
|
102
|
+
): Tool<TInput, TOutput> {
|
|
103
|
+
const jsonSchema = z.toJSONSchema(spec.input) as Record<string, unknown>
|
|
104
|
+
return {
|
|
105
|
+
name: spec.name,
|
|
106
|
+
description: spec.description,
|
|
107
|
+
inputSchema: jsonSchema,
|
|
108
|
+
async execute(raw: TInput, ctx: ToolContext): Promise<TOutput> {
|
|
109
|
+
const parsed = spec.input.parse(raw)
|
|
110
|
+
return spec.execute(parsed, ctx)
|
|
111
|
+
},
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function zodDescription(schema: z.ZodType<unknown>): string | undefined {
|
|
116
|
+
// Zod stores `.describe(…)` on the schema's `_def`; surface it
|
|
117
|
+
// as the model-facing hint when callers don't pass one
|
|
118
|
+
// explicitly.
|
|
119
|
+
const def = (schema as unknown as { description?: string }).description
|
|
120
|
+
return typeof def === 'string' && def.length > 0 ? def : undefined
|
|
121
|
+
}
|
package/src/provider.ts
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* `Provider` — the contract every brain backend implements.
|
|
3
|
-
*
|
|
4
|
-
* Each concrete provider (Anthropic, OpenAI later, Gemini later,
|
|
5
|
-
* DeepSeek later) wraps the vendor's SDK and translates the framework
|
|
6
|
-
* shapes (`Message`, `ChatOptions`) into the vendor's native request,
|
|
7
|
-
* then translates the response back into `ChatResult` / `StreamEvent`.
|
|
8
|
-
*
|
|
9
|
-
* Providers are values, not classes — apps use them via the
|
|
10
|
-
* `BrainManager` facade. The interface is exported so apps that need
|
|
11
|
-
* to plug in a custom provider (e.g. a local Ollama) can do so without
|
|
12
|
-
* subclassing.
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
import type {
|
|
16
|
-
ChatOptions,
|
|
17
|
-
ChatResult,
|
|
18
|
-
Message,
|
|
19
|
-
StreamEvent,
|
|
20
|
-
} from './types.ts'
|
|
21
|
-
|
|
22
|
-
export interface Provider {
|
|
23
|
-
/** Identifier — matches the `config.brain.providers` key. */
|
|
24
|
-
readonly name: string
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Generate a single reply. Awaits the full response; for
|
|
28
|
-
* token-by-token rendering use `stream()`.
|
|
29
|
-
*/
|
|
30
|
-
chat(messages: readonly Message[], options?: ChatOptions): Promise<ChatResult>
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Stream the reply as it's generated. The async iterable yields
|
|
34
|
-
* `text` events for each delta and a final `stop` event with usage
|
|
35
|
-
* + stop-reason. Apps that want the full collected message at the
|
|
36
|
-
* end pass the same `messages` to `chat()` instead; this surface is
|
|
37
|
-
* for UI streaming, not for "make one call and get the message".
|
|
38
|
-
*/
|
|
39
|
-
stream(messages: readonly Message[], options?: ChatOptions): AsyncIterable<StreamEvent>
|
|
40
|
-
|
|
41
|
-
/**
|
|
42
|
-
* Count input tokens for a given message set + options. Used by
|
|
43
|
-
* apps that need to budget context before sending. Optional — not
|
|
44
|
-
* every provider exposes a cheap token-count endpoint, so the
|
|
45
|
-
* implementation may approximate.
|
|
46
|
-
*/
|
|
47
|
-
countTokens?(messages: readonly Message[], options?: ChatOptions): Promise<number>
|
|
48
|
-
}
|