@strav/brain 0.4.31 → 1.0.0-alpha.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +18 -20
- package/src/agent.ts +50 -75
- package/src/agent_result.ts +32 -0
- package/src/agent_runner.ts +63 -0
- package/src/brain_config.ts +95 -0
- package/src/brain_error.ts +29 -0
- package/src/brain_manager.ts +186 -123
- package/src/brain_provider.ts +104 -6
- package/src/define_tool.ts +42 -0
- package/src/index.ts +44 -41
- package/src/mcp_server.ts +47 -0
- package/src/provider.ts +83 -0
- package/src/providers/anthropic_provider.ts +435 -232
- package/src/providers/openai_provider.ts +350 -503
- package/src/thread.ts +99 -0
- package/src/tool.ts +28 -44
- package/src/tool_execution_error.ts +26 -0
- package/src/types.ts +164 -237
- package/CHANGELOG.md +0 -44
- package/README.md +0 -121
- package/src/helpers.ts +0 -1082
- package/src/mcp_toolbox.ts +0 -62
- package/src/memory/context_budget.ts +0 -120
- package/src/memory/index.ts +0 -17
- package/src/memory/memory_manager.ts +0 -168
- package/src/memory/semantic_memory.ts +0 -89
- package/src/memory/strategies/sliding_window.ts +0 -20
- package/src/memory/strategies/summarize.ts +0 -157
- package/src/memory/thread_store.ts +0 -56
- package/src/memory/token_counter.ts +0 -101
- package/src/memory/types.ts +0 -68
- package/src/providers/google_provider.ts +0 -496
- package/src/providers/openai_responses_provider.ts +0 -321
- package/src/utils/error_scrub.ts +0 -5
- package/src/utils/prompt.ts +0 -65
- package/src/utils/retry.ts +0 -104
- package/src/utils/schema.ts +0 -27
- package/src/utils/sse_parser.ts +0 -62
- package/src/workflow.ts +0 -199
- package/tsconfig.json +0 -5
package/src/thread.ts
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `Thread` — multi-turn conversation that retains its message history
|
|
3
|
+
* across calls. Built on top of `BrainManager.chat` (no provider
|
|
4
|
+
* coupling); apps that want a stateless one-shot use
|
|
5
|
+
* `BrainManager.chat` directly.
|
|
6
|
+
*
|
|
7
|
+
* State model: the thread owns an append-only `messages` array. Each
|
|
8
|
+
* `send(text)` appends a user turn, calls `brain.chat`, appends the
|
|
9
|
+
* assistant reply, and returns the assistant's text. The full message
|
|
10
|
+
* history is serializable via `toJSON()` so apps can persist a thread
|
|
11
|
+
* across requests (e.g. one row per conversation in Postgres).
|
|
12
|
+
*
|
|
13
|
+
* What's NOT here in V1:
|
|
14
|
+
* - Auto-compaction. Long threads accumulate without bound; apps
|
|
15
|
+
* that need bounded context handle this themselves (prune
|
|
16
|
+
* `thread.messages` in place, or use the underlying provider's
|
|
17
|
+
* server-side compaction feature once that ships in V2).
|
|
18
|
+
* - Streaming `send`. The thread's `send()` is awaited-fully; for
|
|
19
|
+
* token-by-token streaming in a conversation, call
|
|
20
|
+
* `brain.stream(thread.messages.concat(newUser))` directly.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import type { BrainManager } from './brain_manager.ts'
|
|
24
|
+
import type { ChatOptions, Message, SystemPrompt } from './types.ts'
|
|
25
|
+
|
|
26
|
+
export interface ThreadOptions {
|
|
27
|
+
/** System prompt — applied to every `send()` call. Supports cache flags. */
|
|
28
|
+
system?: SystemPrompt
|
|
29
|
+
/** Per-thread `ChatOptions` defaults — merged with per-call overrides on `send()`. */
|
|
30
|
+
options?: ChatOptions
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Serializable snapshot. What `toJSON()` produces / `fromJSON()` accepts. */
|
|
34
|
+
export interface ThreadState {
|
|
35
|
+
messages: Message[]
|
|
36
|
+
system?: SystemPrompt
|
|
37
|
+
options?: ChatOptions
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export class Thread {
|
|
41
|
+
/** Append-only conversation history. Read-only — mutate via `send()` (or pass through `toJSON`). */
|
|
42
|
+
readonly messages: Message[] = []
|
|
43
|
+
readonly system?: SystemPrompt
|
|
44
|
+
readonly options?: ChatOptions
|
|
45
|
+
private readonly brain: BrainManager
|
|
46
|
+
|
|
47
|
+
constructor(brain: BrainManager, opts: ThreadOptions = {}) {
|
|
48
|
+
this.brain = brain
|
|
49
|
+
if (opts.system !== undefined) this.system = opts.system
|
|
50
|
+
if (opts.options !== undefined) this.options = opts.options
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Append a user turn, call the model, append the assistant reply,
|
|
55
|
+
* and return the reply text. Per-call options override the
|
|
56
|
+
* thread's defaults; `system` always comes from the thread.
|
|
57
|
+
*/
|
|
58
|
+
async send(text: string, options: ChatOptions = {}): Promise<string> {
|
|
59
|
+
this.messages.push({ role: 'user', content: text })
|
|
60
|
+
const merged: ChatOptions = {
|
|
61
|
+
...(this.options ?? {}),
|
|
62
|
+
...options,
|
|
63
|
+
// System is owned by the thread; per-call `system` is ignored
|
|
64
|
+
// intentionally so a caller can't drift the conversation
|
|
65
|
+
// mid-thread by changing the system prompt every turn.
|
|
66
|
+
...(this.system !== undefined ? { system: this.system } : {}),
|
|
67
|
+
}
|
|
68
|
+
const result = await this.brain.chat(this.messages, merged)
|
|
69
|
+
this.messages.push({ role: 'assistant', content: result.text })
|
|
70
|
+
return result.text
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** Number of turns. Each `send()` adds 2 (user + assistant). */
|
|
74
|
+
get length(): number {
|
|
75
|
+
return this.messages.length
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Serialize to a plain object — pass to `Thread.fromJSON` to restore. */
|
|
79
|
+
toJSON(): ThreadState {
|
|
80
|
+
const state: ThreadState = { messages: [...this.messages] }
|
|
81
|
+
if (this.system !== undefined) state.system = this.system
|
|
82
|
+
if (this.options !== undefined) state.options = this.options
|
|
83
|
+
return state
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Restore a thread from a serialized snapshot. The `BrainManager`
|
|
88
|
+
* is passed in fresh — only the conversation state lives on disk;
|
|
89
|
+
* the manager is rebuilt at app boot.
|
|
90
|
+
*/
|
|
91
|
+
static fromJSON(brain: BrainManager, state: ThreadState): Thread {
|
|
92
|
+
const options: ThreadOptions = {}
|
|
93
|
+
if (state.system !== undefined) options.system = state.system
|
|
94
|
+
if (state.options !== undefined) options.options = state.options
|
|
95
|
+
const thread = new Thread(brain, options)
|
|
96
|
+
for (const m of state.messages) thread.messages.push(m)
|
|
97
|
+
return thread
|
|
98
|
+
}
|
|
99
|
+
}
|
package/src/tool.ts
CHANGED
|
@@ -1,51 +1,35 @@
|
|
|
1
|
-
import { zodToJsonSchema } from './utils/schema.ts'
|
|
2
|
-
import type { ToolDefinition, JsonSchema } from './types.ts'
|
|
3
|
-
|
|
4
1
|
/**
|
|
5
|
-
*
|
|
2
|
+
* `Tool` — the framework-native shape every tool implementation
|
|
3
|
+
* conforms to. Providers translate the `name`, `description`, and
|
|
4
|
+
* `inputSchema` into their vendor's tool-definition wire format;
|
|
5
|
+
* `execute` runs in-process on the framework side when the model
|
|
6
|
+
* calls the tool.
|
|
6
7
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
8
|
+
* `inputSchema` is plain JSON Schema (draft 2020-12 compatible).
|
|
9
|
+
* Apps that prefer Zod use the SDK's helpers and feed the resulting
|
|
10
|
+
* JSON Schema into `defineTool`; the framework deliberately doesn't
|
|
11
|
+
* couple to Zod so apps stay free to bring whatever schema library
|
|
12
|
+
* they want.
|
|
9
13
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
* execute: async ({ query }, context) => {
|
|
16
|
-
* const userId = context?.userId
|
|
17
|
-
* return await db.search(query, { userId })
|
|
18
|
-
* },
|
|
19
|
-
* })
|
|
14
|
+
* Generics: `TInput` is what `execute` receives (after the model's
|
|
15
|
+
* raw input has been narrowed by validation at the call site, when
|
|
16
|
+
* apps choose to validate). `TOutput` is what the agentic loop
|
|
17
|
+
* appends as the `tool_result.content`. Both default to `unknown`
|
|
18
|
+
* for apps that don't want the cognitive overhead of typing tools.
|
|
20
19
|
*/
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
return {
|
|
28
|
-
name: config.name,
|
|
29
|
-
description: config.description,
|
|
30
|
-
parameters: zodToJsonSchema(config.parameters) as JsonSchema,
|
|
31
|
-
execute: config.execute as (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>,
|
|
32
|
-
}
|
|
20
|
+
|
|
21
|
+
export interface ToolContext {
|
|
22
|
+
/** Provider-assigned call id — matches `ToolUseBlock.id`. */
|
|
23
|
+
readonly callId: string
|
|
24
|
+
/** Per-run free-form context bag passed by the caller. Optional. */
|
|
25
|
+
readonly context: Readonly<Record<string, unknown>>
|
|
33
26
|
}
|
|
34
27
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
* @example
|
|
43
|
-
* const dbTools = defineToolbox('database', [searchTool, insertTool])
|
|
44
|
-
*
|
|
45
|
-
* class MyAgent extends Agent {
|
|
46
|
-
* tools = [...dbTools, weatherTool]
|
|
47
|
-
* }
|
|
48
|
-
*/
|
|
49
|
-
export function defineToolbox(_name: string, tools: ToolDefinition[]): ToolDefinition[] {
|
|
50
|
-
return tools
|
|
28
|
+
export interface Tool<TInput = unknown, TOutput = unknown> {
|
|
29
|
+
name: string
|
|
30
|
+
description: string
|
|
31
|
+
/** JSON Schema for the tool's input. Providers translate this into their wire format. */
|
|
32
|
+
inputSchema: Record<string, unknown>
|
|
33
|
+
/** In-process executor. Throws propagate as `ToolExecutionError` through the runner. */
|
|
34
|
+
execute(input: TInput, ctx: ToolContext): Promise<TOutput>
|
|
51
35
|
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `ToolExecutionError` — wrapper thrown by the agentic loop when a
|
|
3
|
+
* tool's `execute` function throws. Carries the tool name + the
|
|
4
|
+
* provider's call id on `context` so apps building error reporters /
|
|
5
|
+
* traces can correlate failures with model output without parsing
|
|
6
|
+
* stack frames.
|
|
7
|
+
*
|
|
8
|
+
* V1 propagates these out of `runWithTools` — the loop aborts on the
|
|
9
|
+
* first tool failure. A later slice may add a graceful path
|
|
10
|
+
* (`{ type: 'tool_result', isError: true }` is appended and the
|
|
11
|
+
* loop continues) but apps that need that today can catch the
|
|
12
|
+
* error, append the result themselves, and re-call the runner.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { StravError } from '@strav/kernel'
|
|
16
|
+
|
|
17
|
+
export class ToolExecutionError extends StravError {
|
|
18
|
+
constructor(toolName: string, callId: string, cause: unknown) {
|
|
19
|
+
const message = cause instanceof Error ? cause.message : String(cause)
|
|
20
|
+
super(
|
|
21
|
+
`Tool "${toolName}" execution failed: ${message}`,
|
|
22
|
+
{ code: 'brain.tool-execution-failed', status: 500 },
|
|
23
|
+
{ context: { tool: toolName, callId }, cause },
|
|
24
|
+
)
|
|
25
|
+
}
|
|
26
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -1,275 +1,202 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Public types for the brain runtime.
|
|
3
|
+
*
|
|
4
|
+
* Apps work with three high-level shapes:
|
|
5
|
+
*
|
|
6
|
+
* - `Message` — a user/assistant turn in a conversation. `content` is
|
|
7
|
+
* either a plain string or a list of `ContentBlock`s for richer
|
|
8
|
+
* payloads (cached blocks, images in a later slice).
|
|
9
|
+
*
|
|
10
|
+
* - `ChatOptions` — per-call knobs: model selection (explicit `model`
|
|
11
|
+
* or `tier` sugar), `system` prompt with optional cache flag,
|
|
12
|
+
* `maxTokens`, `thinking`, `effort`, etc.
|
|
13
|
+
*
|
|
14
|
+
* - `ChatResult` — what comes back from `chat()`: assistant `text`,
|
|
15
|
+
* `usage` (including cache hit/miss counters), `stopReason`, the
|
|
16
|
+
* `model` that actually answered, and a `raw` escape hatch with the
|
|
17
|
+
* provider's native response.
|
|
18
|
+
*
|
|
19
|
+
* The streaming side adds `StreamEvent` — a discriminated union of the
|
|
20
|
+
* events a provider emits while a response is being generated. V1
|
|
21
|
+
* covers text deltas, the final-message event, and `stopReason`;
|
|
22
|
+
* thinking blocks / tool-use streams land when those features ship.
|
|
23
|
+
*/
|
|
12
24
|
|
|
13
|
-
|
|
25
|
+
/** Coarse-grained model tier. Sugar for "fast / balanced / powerful" without naming an SDK. */
|
|
26
|
+
export type ModelTier = 'fast' | 'balanced' | 'powerful'
|
|
14
27
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
28
|
+
/**
|
|
29
|
+
* A text content block. The `cache` flag lets apps mark long, stable
|
|
30
|
+
* prefixes for prompt caching; providers translate this to whatever
|
|
31
|
+
* cache mechanism their SDK exposes (Anthropic: `cache_control:
|
|
32
|
+
* {type: 'ephemeral'}`).
|
|
33
|
+
*/
|
|
34
|
+
export interface TextBlock {
|
|
35
|
+
type: 'text'
|
|
36
|
+
text: string
|
|
37
|
+
/** Mark this block as a cache breakpoint. Default `false`. */
|
|
38
|
+
cache?: boolean
|
|
19
39
|
}
|
|
20
40
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
41
|
+
/**
|
|
42
|
+
* Provider-emitted tool-use block. Appears in `assistant`-role
|
|
43
|
+
* messages when the model decides to call a tool. `input` is the
|
|
44
|
+
* parsed JSON the model produced for the tool's `inputSchema`; apps
|
|
45
|
+
* that need to validate it (Zod, ajv, etc.) do so at the call site.
|
|
46
|
+
*
|
|
47
|
+
* The agentic loop creates a matching `ToolResultBlock` and appends
|
|
48
|
+
* it to the next `user`-role message before re-asking the model.
|
|
49
|
+
*/
|
|
50
|
+
export interface ToolUseBlock {
|
|
51
|
+
type: 'tool_use'
|
|
52
|
+
/** Provider-assigned call id. The matching tool_result references this verbatim. */
|
|
24
53
|
id: string
|
|
54
|
+
/** Tool name — matches a registered `Tool.name`. */
|
|
25
55
|
name: string
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
export interface ContentBlock {
|
|
30
|
-
type: 'text' | 'tool_use' | 'tool_result'
|
|
31
|
-
text?: string
|
|
32
|
-
id?: string
|
|
33
|
-
name?: string
|
|
34
|
-
input?: Record<string, unknown>
|
|
35
|
-
toolUseId?: string
|
|
36
|
-
content?: string
|
|
56
|
+
/** Parsed input the model produced. Apps validate against the tool's schema. */
|
|
57
|
+
input: unknown
|
|
37
58
|
}
|
|
38
59
|
|
|
39
|
-
export interface Message {
|
|
40
|
-
role: 'user' | 'assistant' | 'tool'
|
|
41
|
-
content: string | ContentBlock[]
|
|
42
|
-
toolCalls?: ToolCall[]
|
|
43
|
-
toolCallId?: string
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// ── Tool Definition ──────────────────────────────────────────────────────────
|
|
47
|
-
|
|
48
|
-
export interface ToolDefinition {
|
|
49
|
-
name: string
|
|
50
|
-
description: string
|
|
51
|
-
parameters: JsonSchema
|
|
52
|
-
execute: (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
// ── Completion Request / Response ────────────────────────────────────────────
|
|
56
|
-
|
|
57
|
-
export interface CompletionRequest {
|
|
58
|
-
model: string
|
|
59
|
-
messages: Message[]
|
|
60
|
-
system?: string
|
|
61
|
-
tools?: ToolDefinition[]
|
|
62
|
-
toolChoice?: 'auto' | 'required' | { name: string }
|
|
63
|
-
maxTokens?: number
|
|
64
|
-
temperature?: number
|
|
65
|
-
schema?: JsonSchema
|
|
66
|
-
stopSequences?: string[]
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
export interface CompletionResponse {
|
|
70
|
-
id: string
|
|
71
|
-
content: string
|
|
72
|
-
toolCalls: ToolCall[]
|
|
73
|
-
stopReason: 'end' | 'tool_use' | 'max_tokens' | 'stop_sequence'
|
|
74
|
-
usage: Usage
|
|
75
|
-
raw: unknown
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// ── Streaming ────────────────────────────────────────────────────────────────
|
|
79
|
-
|
|
80
|
-
export interface StreamChunk {
|
|
81
|
-
type: 'text' | 'tool_start' | 'tool_delta' | 'tool_end' | 'usage' | 'done'
|
|
82
|
-
text?: string
|
|
83
|
-
toolCall?: Partial<ToolCall>
|
|
84
|
-
toolIndex?: number
|
|
85
|
-
usage?: Usage
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// ── Output Schema ────────────────────────────────────────────────────────────
|
|
89
|
-
|
|
90
|
-
/** A schema that optionally validates data via `.parse()` (e.g., Zod schema). */
|
|
91
|
-
export interface OutputSchema {
|
|
92
|
-
parse?: (data: unknown) => unknown
|
|
93
|
-
[key: string]: unknown
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
// ── Agent ────────────────────────────────────────────────────────────────────
|
|
97
|
-
|
|
98
|
-
export interface ToolCallRecord {
|
|
99
|
-
name: string
|
|
100
|
-
arguments: Record<string, unknown>
|
|
101
|
-
result: unknown
|
|
102
|
-
duration: number
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
export interface AgentResult<T = any> {
|
|
106
|
-
data: T
|
|
107
|
-
text: string
|
|
108
|
-
toolCalls: ToolCallRecord[]
|
|
109
|
-
messages: Message[]
|
|
110
|
-
usage: Usage
|
|
111
|
-
iterations: number
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
export interface AgentEvent {
|
|
115
|
-
type: 'text' | 'tool_start' | 'tool_result' | 'iteration' | 'done' | 'suspended'
|
|
116
|
-
text?: string
|
|
117
|
-
toolCall?: ToolCallRecord
|
|
118
|
-
iteration?: number
|
|
119
|
-
result?: AgentResult
|
|
120
|
-
suspended?: SuspendedRun
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
// ── Suspend / Resume ─────────────────────────────────────────────────────────
|
|
124
|
-
|
|
125
60
|
/**
|
|
126
|
-
*
|
|
127
|
-
*
|
|
128
|
-
*
|
|
129
|
-
*
|
|
130
|
-
*
|
|
61
|
+
* Result of executing a tool. Appended to a `user`-role message and
|
|
62
|
+
* fed back to the model. `content` is either a plain string (the
|
|
63
|
+
* common case) or a list of text blocks for richer payloads. Mark
|
|
64
|
+
* `isError: true` so the model knows the tool call failed and can
|
|
65
|
+
* adjust its approach.
|
|
131
66
|
*/
|
|
132
|
-
export interface
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
67
|
+
export interface ToolResultBlock {
|
|
68
|
+
type: 'tool_result'
|
|
69
|
+
toolUseId: string
|
|
70
|
+
content: string | TextBlock[]
|
|
71
|
+
isError?: boolean
|
|
137
72
|
}
|
|
138
73
|
|
|
139
74
|
/**
|
|
140
|
-
*
|
|
141
|
-
*
|
|
142
|
-
*
|
|
143
|
-
*
|
|
75
|
+
* Provider-emitted MCP tool-use block. Read-only — apps don't construct
|
|
76
|
+
* these; they appear in `assistant`-role messages when the model calls
|
|
77
|
+
* a tool exposed by a configured MCP server. Anthropic's backend
|
|
78
|
+
* invokes the MCP server itself and inlines the result as an
|
|
79
|
+
* `MCPToolResultBlock` in the same response, so the framework's
|
|
80
|
+
* agentic loop doesn't need to handle the call.
|
|
144
81
|
*
|
|
145
|
-
*
|
|
146
|
-
*
|
|
147
|
-
* executed. Results must be supplied for each of them on resume so the
|
|
148
|
-
* conversation remains well-formed for the provider.
|
|
82
|
+
* Apps render these for observability (showing users that the model
|
|
83
|
+
* consulted Linear / Notion / GitHub via MCP) and for audit trails.
|
|
149
84
|
*/
|
|
150
|
-
export interface
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
result: unknown
|
|
85
|
+
export interface MCPToolUseBlock {
|
|
86
|
+
type: 'mcp_tool_use'
|
|
87
|
+
id: string
|
|
88
|
+
/** MCP server identifier — matches `MCPServer.name`. */
|
|
89
|
+
serverName: string
|
|
90
|
+
/** Tool name as exposed by the MCP server. */
|
|
91
|
+
name: string
|
|
92
|
+
/** Parsed input the model passed to the MCP tool. */
|
|
93
|
+
input: unknown
|
|
160
94
|
}
|
|
161
95
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
96
|
+
/**
|
|
97
|
+
* Provider-emitted MCP tool result. Pairs with `MCPToolUseBlock` by
|
|
98
|
+
* `toolUseId`. `content` is either a string or text blocks; `isError`
|
|
99
|
+
* is `true` when the MCP server returned an error.
|
|
100
|
+
*/
|
|
101
|
+
export interface MCPToolResultBlock {
|
|
102
|
+
type: 'mcp_tool_result'
|
|
103
|
+
toolUseId: string
|
|
104
|
+
content: string | TextBlock[]
|
|
105
|
+
isError?: boolean
|
|
168
106
|
}
|
|
169
107
|
|
|
170
|
-
|
|
108
|
+
export type ContentBlock =
|
|
109
|
+
| TextBlock
|
|
110
|
+
| ToolUseBlock
|
|
111
|
+
| ToolResultBlock
|
|
112
|
+
| MCPToolUseBlock
|
|
113
|
+
| MCPToolResultBlock
|
|
171
114
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
115
|
+
/** A single conversation turn. `content` can be a bare string or a typed block list. */
|
|
116
|
+
export interface Message {
|
|
117
|
+
role: 'user' | 'assistant'
|
|
118
|
+
content: string | ContentBlock[]
|
|
176
119
|
}
|
|
177
120
|
|
|
178
|
-
|
|
121
|
+
/**
|
|
122
|
+
* The `system` prompt. Either a plain string (no cache) or a structured
|
|
123
|
+
* form that lets apps mark the prompt as cached. Apps that want
|
|
124
|
+
* fine-grained control over multi-block system prompts pass an array.
|
|
125
|
+
*/
|
|
126
|
+
export type SystemPrompt =
|
|
127
|
+
| string
|
|
128
|
+
| { text: string; cache?: boolean }
|
|
129
|
+
| Array<{ text: string; cache?: boolean }>
|
|
179
130
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
*/
|
|
189
|
-
contentType?: string
|
|
190
|
-
/** Override the provider's default STT model. */
|
|
131
|
+
/**
|
|
132
|
+
* Per-call options. Generics are deliberately conservative — apps
|
|
133
|
+
* don't usually need to type-narrow the provider response; the `raw`
|
|
134
|
+
* escape hatch in `ChatResult` is what they reach for when they need
|
|
135
|
+
* provider-specific fields.
|
|
136
|
+
*/
|
|
137
|
+
export interface ChatOptions {
|
|
138
|
+
/** Override the configured default model. Wins over `tier`. */
|
|
191
139
|
model?: string
|
|
140
|
+
/** Sugar for selecting a model by tier. Resolved against `config.brain.tiers`. */
|
|
141
|
+
tier?: ModelTier
|
|
142
|
+
/** System prompt — typed shape supports prompt caching. */
|
|
143
|
+
system?: SystemPrompt
|
|
144
|
+
/** Hard ceiling on response tokens. Default `4096`. */
|
|
145
|
+
maxTokens?: number
|
|
192
146
|
/**
|
|
193
|
-
*
|
|
194
|
-
* (
|
|
195
|
-
*
|
|
147
|
+
* Adaptive thinking control. `'adaptive'` enables it; `'disabled'`
|
|
148
|
+
* (or omission) turns it off. On Opus 4.7 + 4.6 / Sonnet 4.6 this
|
|
149
|
+
* is the only supported thinking mode — `budget_tokens` is removed
|
|
150
|
+
* upstream and not exposed here.
|
|
196
151
|
*/
|
|
197
|
-
|
|
152
|
+
thinking?: 'adaptive' | 'disabled'
|
|
153
|
+
/** Effort hint. `low` / `medium` / `high` / `xhigh` / `max`. Defaults to provider's pick. */
|
|
154
|
+
effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max'
|
|
198
155
|
/**
|
|
199
|
-
*
|
|
200
|
-
*
|
|
201
|
-
*
|
|
202
|
-
* instruction.
|
|
156
|
+
* Top-level cache_control toggle. When `true` the provider asks the
|
|
157
|
+
* SDK to auto-cache the last cacheable block on every request.
|
|
158
|
+
* Defaults to `config.brain.cache.auto ?? false`.
|
|
203
159
|
*/
|
|
204
|
-
|
|
160
|
+
cache?: boolean
|
|
205
161
|
/**
|
|
206
|
-
*
|
|
207
|
-
*
|
|
208
|
-
* 'audio.bin' if not provided.
|
|
162
|
+
* Beta features to enable on this request. Pass through to the
|
|
163
|
+
* provider's beta-header machinery. Provider-specific.
|
|
209
164
|
*/
|
|
210
|
-
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
export interface TranscriptionResponse {
|
|
214
|
-
/** Transcribed text. */
|
|
215
|
-
text: string
|
|
216
|
-
/** Detected language, when the provider reports one. */
|
|
217
|
-
language?: string
|
|
218
|
-
/** Audio duration in seconds, when the provider reports one. */
|
|
219
|
-
duration?: number
|
|
220
|
-
/** Original provider response for callers that need provider-specific fields. */
|
|
221
|
-
raw: unknown
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
// ── Provider ─────────────────────────────────────────────────────────────────
|
|
225
|
-
|
|
226
|
-
export interface AIProvider {
|
|
227
|
-
readonly name: string
|
|
228
|
-
complete(request: CompletionRequest): Promise<CompletionResponse>
|
|
229
|
-
stream(request: CompletionRequest): AsyncIterable<StreamChunk>
|
|
230
|
-
embed?(input: string | string[], model?: string): Promise<EmbeddingResponse>
|
|
165
|
+
betas?: readonly string[]
|
|
231
166
|
/**
|
|
232
|
-
*
|
|
233
|
-
*
|
|
234
|
-
* generateContent). Throws or remains undefined for providers without
|
|
235
|
-
* STT (Anthropic at time of writing).
|
|
167
|
+
* Provider-specific overrides. `BrainManager.chat` selects the
|
|
168
|
+
* provider by config; this is the override for that.
|
|
236
169
|
*/
|
|
237
|
-
|
|
170
|
+
provider?: string
|
|
238
171
|
}
|
|
239
172
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
) => void | Promise<void>
|
|
247
|
-
|
|
248
|
-
// ── Config ───────────────────────────────────────────────────────────────────
|
|
249
|
-
|
|
250
|
-
export interface ProviderConfig {
|
|
251
|
-
driver: string
|
|
252
|
-
apiKey: string
|
|
253
|
-
model: string
|
|
254
|
-
baseUrl?: string
|
|
255
|
-
maxTokens?: number
|
|
256
|
-
temperature?: number
|
|
257
|
-
maxRetries?: number
|
|
258
|
-
retryBaseDelay?: number
|
|
173
|
+
/** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
|
|
174
|
+
export interface ChatUsage {
|
|
175
|
+
inputTokens: number
|
|
176
|
+
outputTokens: number
|
|
177
|
+
cacheReadTokens: number
|
|
178
|
+
cacheCreationTokens: number
|
|
259
179
|
}
|
|
260
180
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
181
|
+
/**
|
|
182
|
+
* The provider's reply. `text` is the concatenated assistant text;
|
|
183
|
+
* `raw` is the provider's full native response shape for apps that
|
|
184
|
+
* need anything we don't surface (e.g. citation blocks, server-tool
|
|
185
|
+
* results once those ship).
|
|
186
|
+
*/
|
|
187
|
+
export interface ChatResult<Raw = unknown> {
|
|
188
|
+
text: string
|
|
189
|
+
model: string
|
|
190
|
+
stopReason: string | null
|
|
191
|
+
usage: ChatUsage
|
|
192
|
+
raw: Raw
|
|
268
193
|
}
|
|
269
194
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
195
|
+
/**
|
|
196
|
+
* Streaming event union. V1 covers the text-delta + completion path
|
|
197
|
+
* apps want for chat-style UIs; thinking blocks and tool-use streams
|
|
198
|
+
* are reserved for later slices.
|
|
199
|
+
*/
|
|
200
|
+
export type StreamEvent =
|
|
201
|
+
| { type: 'text'; delta: string }
|
|
202
|
+
| { type: 'stop'; stopReason: string | null; usage: ChatUsage }
|