@strav/brain 1.0.0-alpha.16 → 1.0.0-alpha.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/package.json +4 -2
  2. package/src/agent.ts +34 -5
  3. package/src/agent_generate_result.ts +2 -0
  4. package/src/agent_result.ts +7 -0
  5. package/src/agent_runner.ts +134 -15
  6. package/src/agent_stream_event.ts +100 -0
  7. package/src/brain_config.ts +91 -1
  8. package/src/brain_manager.ts +287 -6
  9. package/src/brain_provider.ts +25 -1
  10. package/src/index.ts +37 -2
  11. package/src/mcp/client.ts +99 -13
  12. package/src/mcp/index.ts +7 -0
  13. package/src/mcp/oauth.ts +227 -0
  14. package/src/mcp/pool.ts +106 -0
  15. package/src/mcp/resolve_mcp_tools.ts +31 -9
  16. package/src/mcp_server.ts +16 -0
  17. package/src/persistence/brain_message.ts +34 -0
  18. package/src/persistence/brain_message_repository.ts +106 -0
  19. package/src/persistence/brain_store.ts +166 -0
  20. package/src/persistence/brain_suspended_run.ts +30 -0
  21. package/src/persistence/brain_suspended_run_repository.ts +68 -0
  22. package/src/persistence/brain_thread.ts +30 -0
  23. package/src/persistence/brain_thread_repository.ts +65 -0
  24. package/src/persistence/database_brain_store.ts +190 -0
  25. package/src/persistence/index.ts +48 -0
  26. package/src/persistence/schema/brain_message_schema.ts +61 -0
  27. package/src/persistence/schema/brain_suspended_run_schema.ts +58 -0
  28. package/src/persistence/schema/brain_thread_schema.ts +50 -0
  29. package/src/persistence/schema/index.ts +3 -0
  30. package/src/provider.ts +145 -1
  31. package/src/providers/anthropic_provider.ts +723 -38
  32. package/src/providers/deepseek_provider.ts +117 -0
  33. package/src/providers/gemini_provider.ts +625 -33
  34. package/src/providers/ollama_provider.ts +86 -0
  35. package/src/providers/openai_compat_provider.ts +616 -0
  36. package/src/providers/openai_provider.ts +801 -43
  37. package/src/providers/openai_responses_provider.ts +1015 -0
  38. package/src/suspended_run.ts +153 -0
  39. package/src/thread.ts +40 -1
  40. package/src/tool.ts +7 -0
  41. package/src/tool_runner.ts +81 -0
  42. package/src/types.ts +343 -0
package/src/types.ts CHANGED
@@ -105,12 +105,113 @@ export interface MCPToolResultBlock {
105
105
  isError?: boolean
106
106
  }
107
107
 
108
+ /**
109
+ * Image input — attaches a picture to a user message so vision-
110
+ * capable models can see it alongside the text. V1 covers images
111
+ * only; audio + video defer.
112
+ *
113
+ * `source` is a discriminated union:
114
+ * - `{ type: 'base64', mediaType, data }` — inline bytes for
115
+ * uploads, screenshots, attachments your app already holds in
116
+ * memory. `mediaType` is the IANA MIME (`image/png`,
117
+ * `image/jpeg`, `image/webp`, `image/gif`); `data` is the
118
+ * base64-encoded image (no `data:` prefix — the provider
119
+ * translation adds it where needed).
120
+ * - `{ type: 'url', url }` — remote image URL. Anthropic, OpenAI,
121
+ * and Gemini all accept HTTPS URLs; check the provider's
122
+ * domain allowlist if calls 404 (Anthropic was historically
123
+ * stricter). For Gemini, GCS URIs (`gs://...`) also work.
124
+ *
125
+ * Vision support is provider- AND model-dependent. Cloud picks:
126
+ * Anthropic Claude 4 family, OpenAI gpt-4o / gpt-5 family, Gemini
127
+ * 2.x. Local: `llama3.2-vision`, `llava`, `qwen2.5-vl` on Ollama.
128
+ * Models without vision either reject the call or ignore the image.
129
+ */
130
+ export interface ImageBlock {
131
+ type: 'image'
132
+ source:
133
+ | { type: 'base64'; mediaType: string; data: string }
134
+ | { type: 'url'; url: string }
135
+ }
136
+
137
+ /**
138
+ * Document input — attaches a PDF (V1 only — the providers that
139
+ * support documents currently all gate on `application/pdf`) to a
140
+ * user message. Anthropic surfaces it as a native `document` block;
141
+ * Gemini accepts it via `inlineData` / `fileData` with
142
+ * `application/pdf` mime; OpenAI / Ollama / DeepSeek don't support
143
+ * PDF blocks at all (apps split the PDF to images and use
144
+ * `ImageBlock`s for those vendors).
145
+ *
146
+ * The optional `title` is shown to the model on Anthropic (helpful
147
+ * for multi-document calls — "the contract", "the invoice"); other
148
+ * providers ignore it.
149
+ */
150
+ export interface DocumentBlock {
151
+ type: 'document'
152
+ source:
153
+ | { type: 'base64'; mediaType: string; data: string }
154
+ | { type: 'url'; url: string }
155
+ /** Optional title shown to the model (Anthropic uses it; others ignore). */
156
+ title?: string
157
+ }
158
+
159
+ /**
160
+ * Audio input — attaches a sound clip to a user message. V1
161
+ * coverage: Gemini supports audio natively via `inlineData` with
162
+ * audio MIMEs (`audio/mp3`, `audio/wav`, `audio/ogg`, `audio/flac`,
163
+ * `audio/webm`, `audio/aac`). Anthropic + OpenAI + Ollama don't
164
+ * accept audio in their chat APIs — OpenAI apps preprocess via
165
+ * Whisper; Anthropic apps wait for the audio block to land in the
166
+ * SDK; Ollama apps that need audio look at server-side
167
+ * transcription models.
168
+ */
169
+ export interface AudioBlock {
170
+ type: 'audio'
171
+ source:
172
+ | { type: 'base64'; mediaType: string; data: string }
173
+ | { type: 'url'; url: string }
174
+ }
175
+
176
+ /**
177
+ * Server-side compaction block. Anthropic's `compact-2026-01-12`
178
+ * beta returns a `compaction` block when an auto-compaction trigger
179
+ * fires during a request. The framework surfaces it on
180
+ * `result.content` and Thread persists it on the assistant turn so
181
+ * subsequent requests echo it back verbatim — the model only sees
182
+ * the summary + opaque blob from then on, and the older raw turns
183
+ * stay out of context.
184
+ *
185
+ * V1 produces these on Anthropic only. Other providers ignore the
186
+ * `compact` option silently, and never emit a `CompactionBlock`.
187
+ *
188
+ * Round-trip invariant: pass the block back unchanged. The
189
+ * `encryptedContent` blob is opaque metadata the server uses to
190
+ * stitch the compaction history together; the framework never
191
+ * mutates it.
192
+ *
193
+ * `content === null` means a compaction attempt failed (e.g.,
194
+ * malformed model output). The server treats these as no-ops on
195
+ * the next request, so apps don't need to special-case them.
196
+ */
197
+ export interface CompactionBlock {
198
+ type: 'compaction'
199
+ /** Summary of compacted content. Null when compaction failed. */
200
+ content: string | null
201
+ /** Opaque metadata round-tripped verbatim on subsequent requests. */
202
+ encryptedContent: string | null
203
+ }
204
+
108
205
  export type ContentBlock =
109
206
  | TextBlock
207
+ | ImageBlock
208
+ | DocumentBlock
209
+ | AudioBlock
110
210
  | ToolUseBlock
111
211
  | ToolResultBlock
112
212
  | MCPToolUseBlock
113
213
  | MCPToolResultBlock
214
+ | CompactionBlock
114
215
 
115
216
  /** A single conversation turn. `content` can be a bare string or a typed block list. */
116
217
  export interface Message {
@@ -134,6 +235,85 @@ export type SystemPrompt =
134
235
  * escape hatch in `ChatResult` is what they reach for when they need
135
236
  * provider-specific fields.
136
237
  */
238
+ /**
239
+ * Server-side tool — work the provider's backend runs on behalf
240
+ * of the model. Unlike framework-local tools (`Tool` /
241
+ * `defineTool`), the model's call doesn't round-trip through
242
+ * the app's process; the provider executes the tool and inlines
243
+ * the result in the response.
244
+ *
245
+ * V1 coverage:
246
+ * - **Anthropic**: `web_search`, `code_execution`, `web_fetch`.
247
+ * - **Gemini**: `web_search` (Google Search), `code_execution`,
248
+ * `url_context`.
249
+ * - **OpenAI / DeepSeek / Ollama**: throw — OpenAI's server tools
250
+ * live on the Responses API (separate slice); the compat
251
+ * providers don't expose them.
252
+ *
253
+ * Cross-provider portability:
254
+ * - `web_search` + `code_execution` work on both Anthropic and
255
+ * Gemini.
256
+ * - `web_fetch` is Anthropic-only.
257
+ * - `url_context` is Gemini-only.
258
+ *
259
+ * Server tools combine freely with framework-local `Tool[]` and
260
+ * MCP servers — the model sees all three sets in one tool list.
261
+ */
262
+ export type ServerTool =
263
+ | {
264
+ type: 'web_search'
265
+ /** Max times the model can call this tool per turn (Anthropic; Gemini ignores). */
266
+ maxUses?: number
267
+ /** Domain allowlist (Anthropic; Gemini ignores). Mutually exclusive with `blockedDomains`. */
268
+ allowedDomains?: readonly string[]
269
+ /** Domain blocklist (Anthropic; Gemini ignores). */
270
+ blockedDomains?: readonly string[]
271
+ }
272
+ | { type: 'code_execution' }
273
+ | {
274
+ type: 'web_fetch'
275
+ /** Max URL fetches per turn (Anthropic). */
276
+ maxUses?: number
277
+ /** Domain allowlist. */
278
+ allowedDomains?: readonly string[]
279
+ /** Domain blocklist. */
280
+ blockedDomains?: readonly string[]
281
+ }
282
+ | {
283
+ type: 'url_context'
284
+ /** Gemini fetches the URL and surfaces grounded answers from it. */
285
+ }
286
+
287
+ /**
288
+ * Per-call compaction configuration. Maps to Anthropic's
289
+ * `compact-2026-01-12` beta `edits[]` entry. All fields optional —
290
+ * omitting one falls back to the server's default (trigger:
291
+ * 150,000 input tokens; no extra instructions; no pause).
292
+ */
293
+ export interface CompactConfig {
294
+ /**
295
+ * Trigger threshold in input tokens. Compaction fires once the
296
+ * conversation crosses this token count. Default 150,000 — same
297
+ * as the server-side default.
298
+ */
299
+ trigger?: number
300
+ /**
301
+ * Extra hint to the summarization model. Useful for biasing the
302
+ * compaction toward what your app actually cares to preserve
303
+ * ("keep all customer ids referenced", "preserve every diff
304
+ * hunk", ...).
305
+ */
306
+ instructions?: string
307
+ /**
308
+ * When `true`, the server returns the compaction block in-line
309
+ * but does NOT continue generation — the next assistant turn
310
+ * waits for an explicit re-prompt. Apps that want to inspect or
311
+ * gate compaction set this; default `false` (compaction is
312
+ * transparent).
313
+ */
314
+ pauseAfterCompaction?: boolean
315
+ }
316
+
137
317
  export interface ChatOptions {
138
318
  /** Override the configured default model. Wins over `tier`. */
139
319
  model?: string
@@ -168,6 +348,56 @@ export interface ChatOptions {
168
348
  * provider by config; this is the override for that.
169
349
  */
170
350
  provider?: string
351
+ /**
352
+ * Cancel the in-flight operation. Aborting between iterations of
353
+ * a tool loop bails before the next model call; aborting mid-call
354
+ * propagates the SDK's native abort error (typically a `DOMException`
355
+ * with `name: 'AbortError'`). Streaming iterators reject on the
356
+ * next `for await` step.
357
+ */
358
+ signal?: AbortSignal
359
+ /**
360
+ * Server-side tools — work the provider's backend runs (web
361
+ * search, code execution, URL fetching). The model's calls
362
+ * don't round-trip through the framework's tool loop; results
363
+ * land inline in the response. Combines freely with
364
+ * framework-local `Tool[]` and MCP servers.
365
+ *
366
+ * V1 supports Anthropic + Gemini; OpenAI / DeepSeek / Ollama
367
+ * throw `BrainError` (use the Responses API for OpenAI, or
368
+ * route to Anthropic / Gemini).
369
+ */
370
+ serverTools?: readonly ServerTool[]
371
+ /**
372
+ * Server-side conversation compaction. When set, the provider
373
+ * auto-summarizes the older part of the message history once the
374
+ * `trigger` token threshold is reached; the summary lives on the
375
+ * response as a `CompactionBlock` that apps round-trip on
376
+ * subsequent requests (Thread does this automatically). Saves
377
+ * tokens on long threads without lossy client-side pruning.
378
+ *
379
+ * Only honored by `AnthropicProvider` (driver `'anthropic'`),
380
+ * via the `compact-2026-01-12` beta. Silently ignored by every
381
+ * other provider so apps targeting multiple providers with the
382
+ * same options object don't have to special-case.
383
+ */
384
+ compact?: CompactConfig
385
+ /**
386
+ * Stateful conversation pointer — OpenAI Responses API. When set,
387
+ * the provider sends only the new turn(s); the server picks up
388
+ * from the prior `Response` identified by this id and replays
389
+ * the conversation server-side. Saves tokens on long threads.
390
+ *
391
+ * Only honored by `OpenAIResponsesProvider` (driver
392
+ * `'openai-responses'`); silently ignored by every other provider
393
+ * — apps that target multiple providers with the same options
394
+ * object don't have to special-case.
395
+ *
396
+ * Pair with `ChatResult.responseId` (returned by every call) to
397
+ * thread the conversation forward. `Thread` does this
398
+ * automatically when its underlying provider supports it.
399
+ */
400
+ previousResponseId?: string
171
401
  }
172
402
 
173
403
  /** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
@@ -190,6 +420,24 @@ export interface ChatResult<Raw = unknown> {
190
420
  stopReason: string | null
191
421
  usage: ChatUsage
192
422
  raw: Raw
423
+ /**
424
+ * Structured assistant content blocks — populated when the model
425
+ * emitted more than plain text on this turn (compaction blocks
426
+ * today; reasoning blocks once those surface). Apps that
427
+ * persist the conversation (`Thread`, custom stores) push this
428
+ * onto the message history when present so round-trippable
429
+ * blocks survive subsequent requests. Undefined when the turn
430
+ * was plain text only.
431
+ */
432
+ content?: ContentBlock[]
433
+ /**
434
+ * Provider response id when the provider exposes stateful
435
+ * conversations (currently OpenAI Responses API). Apps thread
436
+ * this forward via `ChatOptions.previousResponseId` so the
437
+ * server replays prior turns without re-sending them.
438
+ * Undefined for providers that don't support the pattern.
439
+ */
440
+ responseId?: string
193
441
  }
194
442
 
195
443
  /**
@@ -201,6 +449,99 @@ export type StreamEvent =
201
449
  | { type: 'text'; delta: string }
202
450
  | { type: 'stop'; stopReason: string | null; usage: ChatUsage }
203
451
 
452
+ /**
453
+ * Per-call options for `brain.embed(...)`. Only the embed-relevant
454
+ * subset of `ChatOptions` — chat-specific knobs (system prompt,
455
+ * thinking, cache, tools) don't apply.
456
+ */
457
+ export interface EmbedOptions {
458
+ /** Override the configured default embedding model. */
459
+ model?: string
460
+ /**
461
+ * Override the default provider. Must name a provider that
462
+ * implements `embed` (V1: OpenAI, Gemini, Ollama; Anthropic +
463
+ * DeepSeek throw with a clear "route to a different provider"
464
+ * message).
465
+ */
466
+ provider?: string
467
+ /**
468
+ * Optional dimensionality hint. OpenAI passes through as
469
+ * `dimensions`; Gemini as `outputDimensionality`. Providers
470
+ * that ignore it silently drop the field.
471
+ */
472
+ dimensions?: number
473
+ /** Cancellation signal — same shape as `ChatOptions.signal`. */
474
+ signal?: AbortSignal
475
+ }
476
+
477
+ /**
478
+ * Per-call options for `brain.transcribe(...)`.
479
+ */
480
+ export interface TranscribeOptions {
481
+ /** Override the configured default transcription model. */
482
+ model?: string
483
+ /**
484
+ * Override the default provider. Must name a provider that
485
+ * implements `transcribe` (V1: OpenAI / Gemini / Ollama;
486
+ * Anthropic + DeepSeek throw).
487
+ */
488
+ provider?: string
489
+ /**
490
+ * Optional BCP-47 language hint (`en`, `fr`, `ja`). Improves
491
+ * accuracy when known; models without hint support ignore.
492
+ */
493
+ language?: string
494
+ /**
495
+ * Optional bias prompt to steer vocabulary / style / formatting.
496
+ * OpenAI calls this `prompt`; Gemini-via-chat threads it into
497
+ * the system message; others ignore.
498
+ */
499
+ prompt?: string
500
+ /** Cancellation signal — same shape as `ChatOptions.signal`. */
501
+ signal?: AbortSignal
502
+ }
503
+
504
+ /**
505
+ * Audio source — same discriminated union as
506
+ * `AudioBlock.source`, named separately for `transcribe(...)`
507
+ * which takes it directly (no wrapping `AudioBlock` shell).
508
+ */
509
+ export type AudioSource =
510
+ | { type: 'base64'; mediaType: string; data: string }
511
+ | { type: 'url'; url: string }
512
+
513
+ /**
514
+ * Result of one `transcribe` call. `text` is the transcribed
515
+ * audio; `language` / `duration` are surfaced when the provider
516
+ * returns them (OpenAI does on the `verbose_json` response
517
+ * format; Gemini's chat-wrap path doesn't). `raw` is the
518
+ * provider's full native response for fields the framework
519
+ * doesn't surface.
520
+ */
521
+ export interface TranscribeResult<Raw = unknown> {
522
+ text: string
523
+ model: string
524
+ /** BCP-47 detected (or echoed) language. Optional. */
525
+ language?: string
526
+ /** Audio duration in seconds. Optional. */
527
+ duration?: number
528
+ raw: Raw
529
+ }
530
+
531
+ /**
532
+ * Result of one `embed` call. `embeddings[i]` is the vector for
533
+ * the i-th input text. `model` is the model the provider used
534
+ * (echoed back for logging). `usage.inputTokens` is the total
535
+ * tokens consumed across all inputs.
536
+ */
537
+ export interface EmbedResult<Raw = unknown> {
538
+ embeddings: number[][]
539
+ model: string
540
+ usage: { inputTokens: number }
541
+ /** Provider's full native response — escape hatch for fields the framework doesn't surface. */
542
+ raw: Raw
543
+ }
544
+
204
545
  /**
205
546
  * Result of a structured-output call. `value` is the parsed JSON
206
547
  * shaped to the `OutputSchema<T>` passed in. `text` is the raw JSON
@@ -214,4 +555,6 @@ export interface GenerateResult<T = unknown, Raw = unknown> {
214
555
  stopReason: string | null
215
556
  usage: ChatUsage
216
557
  raw: Raw
558
+ /** See `ChatResult.responseId`. */
559
+ responseId?: string
217
560
  }