@strav/brain 0.4.31 → 1.0.0-alpha.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +17 -20
- package/src/agent.ts +42 -76
- package/src/agent_result.ts +32 -0
- package/src/agent_runner.ts +61 -0
- package/src/brain_config.ts +72 -0
- package/src/brain_error.ts +29 -0
- package/src/brain_manager.ts +170 -123
- package/src/brain_provider.ts +90 -6
- package/src/define_tool.ts +42 -0
- package/src/index.ts +40 -42
- package/src/provider.ts +74 -0
- package/src/providers/anthropic_provider.ts +347 -231
- package/src/thread.ts +99 -0
- package/src/tool.ts +28 -44
- package/src/tool_execution_error.ts +26 -0
- package/src/types.ts +129 -241
- package/CHANGELOG.md +0 -44
- package/README.md +0 -121
- package/src/helpers.ts +0 -1082
- package/src/mcp_toolbox.ts +0 -62
- package/src/memory/context_budget.ts +0 -120
- package/src/memory/index.ts +0 -17
- package/src/memory/memory_manager.ts +0 -168
- package/src/memory/semantic_memory.ts +0 -89
- package/src/memory/strategies/sliding_window.ts +0 -20
- package/src/memory/strategies/summarize.ts +0 -157
- package/src/memory/thread_store.ts +0 -56
- package/src/memory/token_counter.ts +0 -101
- package/src/memory/types.ts +0 -68
- package/src/providers/google_provider.ts +0 -496
- package/src/providers/openai_provider.ts +0 -569
- package/src/providers/openai_responses_provider.ts +0 -321
- package/src/utils/error_scrub.ts +0 -5
- package/src/utils/prompt.ts +0 -65
- package/src/utils/retry.ts +0 -104
- package/src/utils/schema.ts +0 -27
- package/src/utils/sse_parser.ts +0 -62
- package/src/workflow.ts +0 -199
- package/tsconfig.json +0 -5
package/src/thread.ts
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `Thread` — multi-turn conversation that retains its message history
|
|
3
|
+
* across calls. Built on top of `BrainManager.chat` (no provider
|
|
4
|
+
* coupling); apps that want a stateless one-shot use
|
|
5
|
+
* `BrainManager.chat` directly.
|
|
6
|
+
*
|
|
7
|
+
* State model: the thread owns an append-only `messages` array. Each
|
|
8
|
+
* `send(text)` appends a user turn, calls `brain.chat`, appends the
|
|
9
|
+
* assistant reply, and returns the assistant's text. The full message
|
|
10
|
+
* history is serializable via `toJSON()` so apps can persist a thread
|
|
11
|
+
* across requests (e.g. one row per conversation in Postgres).
|
|
12
|
+
*
|
|
13
|
+
* What's NOT here in V1:
|
|
14
|
+
* - Auto-compaction. Long threads accumulate without bound; apps
|
|
15
|
+
* that need bounded context handle this themselves (prune
|
|
16
|
+
* `thread.messages` in place, or use the underlying provider's
|
|
17
|
+
* server-side compaction feature once that ships in V2).
|
|
18
|
+
* - Streaming `send`. The thread's `send()` is awaited-fully; for
|
|
19
|
+
* token-by-token streaming in a conversation, call
|
|
20
|
+
* `brain.stream(thread.messages.concat(newUser))` directly.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import type { BrainManager } from './brain_manager.ts'
|
|
24
|
+
import type { ChatOptions, Message, SystemPrompt } from './types.ts'
|
|
25
|
+
|
|
26
|
+
export interface ThreadOptions {
|
|
27
|
+
/** System prompt — applied to every `send()` call. Supports cache flags. */
|
|
28
|
+
system?: SystemPrompt
|
|
29
|
+
/** Per-thread `ChatOptions` defaults — merged with per-call overrides on `send()`. */
|
|
30
|
+
options?: ChatOptions
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Serializable snapshot. What `toJSON()` produces / `fromJSON()` accepts. */
|
|
34
|
+
export interface ThreadState {
|
|
35
|
+
messages: Message[]
|
|
36
|
+
system?: SystemPrompt
|
|
37
|
+
options?: ChatOptions
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export class Thread {
|
|
41
|
+
/** Append-only conversation history. Read-only — mutate via `send()` (or pass through `toJSON`). */
|
|
42
|
+
readonly messages: Message[] = []
|
|
43
|
+
readonly system?: SystemPrompt
|
|
44
|
+
readonly options?: ChatOptions
|
|
45
|
+
private readonly brain: BrainManager
|
|
46
|
+
|
|
47
|
+
constructor(brain: BrainManager, opts: ThreadOptions = {}) {
|
|
48
|
+
this.brain = brain
|
|
49
|
+
if (opts.system !== undefined) this.system = opts.system
|
|
50
|
+
if (opts.options !== undefined) this.options = opts.options
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Append a user turn, call the model, append the assistant reply,
|
|
55
|
+
* and return the reply text. Per-call options override the
|
|
56
|
+
* thread's defaults; `system` always comes from the thread.
|
|
57
|
+
*/
|
|
58
|
+
async send(text: string, options: ChatOptions = {}): Promise<string> {
|
|
59
|
+
this.messages.push({ role: 'user', content: text })
|
|
60
|
+
const merged: ChatOptions = {
|
|
61
|
+
...(this.options ?? {}),
|
|
62
|
+
...options,
|
|
63
|
+
// System is owned by the thread; per-call `system` is ignored
|
|
64
|
+
// intentionally so a caller can't drift the conversation
|
|
65
|
+
// mid-thread by changing the system prompt every turn.
|
|
66
|
+
...(this.system !== undefined ? { system: this.system } : {}),
|
|
67
|
+
}
|
|
68
|
+
const result = await this.brain.chat(this.messages, merged)
|
|
69
|
+
this.messages.push({ role: 'assistant', content: result.text })
|
|
70
|
+
return result.text
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** Number of turns. Each `send()` adds 2 (user + assistant). */
|
|
74
|
+
get length(): number {
|
|
75
|
+
return this.messages.length
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Serialize to a plain object — pass to `Thread.fromJSON` to restore. */
|
|
79
|
+
toJSON(): ThreadState {
|
|
80
|
+
const state: ThreadState = { messages: [...this.messages] }
|
|
81
|
+
if (this.system !== undefined) state.system = this.system
|
|
82
|
+
if (this.options !== undefined) state.options = this.options
|
|
83
|
+
return state
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Restore a thread from a serialized snapshot. The `BrainManager`
|
|
88
|
+
* is passed in fresh — only the conversation state lives on disk;
|
|
89
|
+
* the manager is rebuilt at app boot.
|
|
90
|
+
*/
|
|
91
|
+
static fromJSON(brain: BrainManager, state: ThreadState): Thread {
|
|
92
|
+
const options: ThreadOptions = {}
|
|
93
|
+
if (state.system !== undefined) options.system = state.system
|
|
94
|
+
if (state.options !== undefined) options.options = state.options
|
|
95
|
+
const thread = new Thread(brain, options)
|
|
96
|
+
for (const m of state.messages) thread.messages.push(m)
|
|
97
|
+
return thread
|
|
98
|
+
}
|
|
99
|
+
}
|
package/src/tool.ts
CHANGED
|
@@ -1,51 +1,35 @@
|
|
|
1
|
-
import { zodToJsonSchema } from './utils/schema.ts'
|
|
2
|
-
import type { ToolDefinition, JsonSchema } from './types.ts'
|
|
3
|
-
|
|
4
1
|
/**
|
|
5
|
-
*
|
|
2
|
+
* `Tool` — the framework-native shape every tool implementation
|
|
3
|
+
* conforms to. Providers translate the `name`, `description`, and
|
|
4
|
+
* `inputSchema` into their vendor's tool-definition wire format;
|
|
5
|
+
* `execute` runs in-process on the framework side when the model
|
|
6
|
+
* calls the tool.
|
|
6
7
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
8
|
+
* `inputSchema` is plain JSON Schema (draft 2020-12 compatible).
|
|
9
|
+
* Apps that prefer Zod use the SDK's helpers and feed the resulting
|
|
10
|
+
* JSON Schema into `defineTool`; the framework deliberately doesn't
|
|
11
|
+
* couple to Zod so apps stay free to bring whatever schema library
|
|
12
|
+
* they want.
|
|
9
13
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
* execute: async ({ query }, context) => {
|
|
16
|
-
* const userId = context?.userId
|
|
17
|
-
* return await db.search(query, { userId })
|
|
18
|
-
* },
|
|
19
|
-
* })
|
|
14
|
+
* Generics: `TInput` is what `execute` receives (after the model's
|
|
15
|
+
* raw input has been narrowed by validation at the call site, when
|
|
16
|
+
* apps choose to validate). `TOutput` is what the agentic loop
|
|
17
|
+
* appends as the `tool_result.content`. Both default to `unknown`
|
|
18
|
+
* for apps that don't want the cognitive overhead of typing tools.
|
|
20
19
|
*/
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
return {
|
|
28
|
-
name: config.name,
|
|
29
|
-
description: config.description,
|
|
30
|
-
parameters: zodToJsonSchema(config.parameters) as JsonSchema,
|
|
31
|
-
execute: config.execute as (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>,
|
|
32
|
-
}
|
|
20
|
+
|
|
21
|
+
export interface ToolContext {
|
|
22
|
+
/** Provider-assigned call id — matches `ToolUseBlock.id`. */
|
|
23
|
+
readonly callId: string
|
|
24
|
+
/** Per-run free-form context bag passed by the caller. Optional. */
|
|
25
|
+
readonly context: Readonly<Record<string, unknown>>
|
|
33
26
|
}
|
|
34
27
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
* @example
|
|
43
|
-
* const dbTools = defineToolbox('database', [searchTool, insertTool])
|
|
44
|
-
*
|
|
45
|
-
* class MyAgent extends Agent {
|
|
46
|
-
* tools = [...dbTools, weatherTool]
|
|
47
|
-
* }
|
|
48
|
-
*/
|
|
49
|
-
export function defineToolbox(_name: string, tools: ToolDefinition[]): ToolDefinition[] {
|
|
50
|
-
return tools
|
|
28
|
+
export interface Tool<TInput = unknown, TOutput = unknown> {
|
|
29
|
+
name: string
|
|
30
|
+
description: string
|
|
31
|
+
/** JSON Schema for the tool's input. Providers translate this into their wire format. */
|
|
32
|
+
inputSchema: Record<string, unknown>
|
|
33
|
+
/** In-process executor. Throws propagate as `ToolExecutionError` through the runner. */
|
|
34
|
+
execute(input: TInput, ctx: ToolContext): Promise<TOutput>
|
|
51
35
|
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `ToolExecutionError` — wrapper thrown by the agentic loop when a
|
|
3
|
+
* tool's `execute` function throws. Carries the tool name + the
|
|
4
|
+
* provider's call id on `context` so apps building error reporters /
|
|
5
|
+
* traces can correlate failures with model output without parsing
|
|
6
|
+
* stack frames.
|
|
7
|
+
*
|
|
8
|
+
* V1 propagates these out of `runWithTools` — the loop aborts on the
|
|
9
|
+
* first tool failure. A later slice may add a graceful path
|
|
10
|
+
* (`{ type: 'tool_result', isError: true }` is appended and the
|
|
11
|
+
* loop continues) but apps that need that today can catch the
|
|
12
|
+
* error, append the result themselves, and re-call the runner.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { StravError } from '@strav/kernel'
|
|
16
|
+
|
|
17
|
+
export class ToolExecutionError extends StravError {
|
|
18
|
+
constructor(toolName: string, callId: string, cause: unknown) {
|
|
19
|
+
const message = cause instanceof Error ? cause.message : String(cause)
|
|
20
|
+
super(
|
|
21
|
+
`Tool "${toolName}" execution failed: ${message}`,
|
|
22
|
+
{ code: 'brain.tool-execution-failed', status: 500 },
|
|
23
|
+
{ context: { tool: toolName, callId }, cause },
|
|
24
|
+
)
|
|
25
|
+
}
|
|
26
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -1,275 +1,163 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Public types for the brain runtime.
|
|
3
|
+
*
|
|
4
|
+
* Apps work with three high-level shapes:
|
|
5
|
+
*
|
|
6
|
+
* - `Message` — a user/assistant turn in a conversation. `content` is
|
|
7
|
+
* either a plain string or a list of `ContentBlock`s for richer
|
|
8
|
+
* payloads (cached blocks, images in a later slice).
|
|
9
|
+
*
|
|
10
|
+
* - `ChatOptions` — per-call knobs: model selection (explicit `model`
|
|
11
|
+
* or `tier` sugar), `system` prompt with optional cache flag,
|
|
12
|
+
* `maxTokens`, `thinking`, `effort`, etc.
|
|
13
|
+
*
|
|
14
|
+
* - `ChatResult` — what comes back from `chat()`: assistant `text`,
|
|
15
|
+
* `usage` (including cache hit/miss counters), `stopReason`, the
|
|
16
|
+
* `model` that actually answered, and a `raw` escape hatch with the
|
|
17
|
+
* provider's native response.
|
|
18
|
+
*
|
|
19
|
+
* The streaming side adds `StreamEvent` — a discriminated union of the
|
|
20
|
+
* events a provider emits while a response is being generated. V1
|
|
21
|
+
* covers text deltas, the final-message event, and `stopReason`;
|
|
22
|
+
* thinking blocks / tool-use streams land when those features ship.
|
|
23
|
+
*/
|
|
12
24
|
|
|
13
|
-
|
|
25
|
+
/** Coarse-grained model tier. Sugar for "fast / balanced / powerful" without naming an SDK. */
|
|
26
|
+
export type ModelTier = 'fast' | 'balanced' | 'powerful'
|
|
14
27
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
28
|
+
/**
|
|
29
|
+
* A text content block. The `cache` flag lets apps mark long, stable
|
|
30
|
+
* prefixes for prompt caching; providers translate this to whatever
|
|
31
|
+
* cache mechanism their SDK exposes (Anthropic: `cache_control:
|
|
32
|
+
* {type: 'ephemeral'}`).
|
|
33
|
+
*/
|
|
34
|
+
export interface TextBlock {
|
|
35
|
+
type: 'text'
|
|
36
|
+
text: string
|
|
37
|
+
/** Mark this block as a cache breakpoint. Default `false`. */
|
|
38
|
+
cache?: boolean
|
|
19
39
|
}
|
|
20
40
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
41
|
+
/**
|
|
42
|
+
* Provider-emitted tool-use block. Appears in `assistant`-role
|
|
43
|
+
* messages when the model decides to call a tool. `input` is the
|
|
44
|
+
* parsed JSON the model produced for the tool's `inputSchema`; apps
|
|
45
|
+
* that need to validate it (Zod, ajv, etc.) do so at the call site.
|
|
46
|
+
*
|
|
47
|
+
* The agentic loop creates a matching `ToolResultBlock` and appends
|
|
48
|
+
* it to the next `user`-role message before re-asking the model.
|
|
49
|
+
*/
|
|
50
|
+
export interface ToolUseBlock {
|
|
51
|
+
type: 'tool_use'
|
|
52
|
+
/** Provider-assigned call id. The matching tool_result references this verbatim. */
|
|
24
53
|
id: string
|
|
54
|
+
/** Tool name — matches a registered `Tool.name`. */
|
|
25
55
|
name: string
|
|
26
|
-
|
|
56
|
+
/** Parsed input the model produced. Apps validate against the tool's schema. */
|
|
57
|
+
input: unknown
|
|
27
58
|
}
|
|
28
59
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
60
|
+
/**
|
|
61
|
+
* Result of executing a tool. Appended to a `user`-role message and
|
|
62
|
+
* fed back to the model. `content` is either a plain string (the
|
|
63
|
+
* common case) or a list of text blocks for richer payloads. Mark
|
|
64
|
+
* `isError: true` so the model knows the tool call failed and can
|
|
65
|
+
* adjust its approach.
|
|
66
|
+
*/
|
|
67
|
+
export interface ToolResultBlock {
|
|
68
|
+
type: 'tool_result'
|
|
69
|
+
toolUseId: string
|
|
70
|
+
content: string | TextBlock[]
|
|
71
|
+
isError?: boolean
|
|
37
72
|
}
|
|
38
73
|
|
|
74
|
+
export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock
|
|
75
|
+
|
|
76
|
+
/** A single conversation turn. `content` can be a bare string or a typed block list. */
|
|
39
77
|
export interface Message {
|
|
40
|
-
role: 'user' | 'assistant'
|
|
78
|
+
role: 'user' | 'assistant'
|
|
41
79
|
content: string | ContentBlock[]
|
|
42
|
-
toolCalls?: ToolCall[]
|
|
43
|
-
toolCallId?: string
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// ── Tool Definition ──────────────────────────────────────────────────────────
|
|
47
|
-
|
|
48
|
-
export interface ToolDefinition {
|
|
49
|
-
name: string
|
|
50
|
-
description: string
|
|
51
|
-
parameters: JsonSchema
|
|
52
|
-
execute: (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
// ── Completion Request / Response ────────────────────────────────────────────
|
|
56
|
-
|
|
57
|
-
export interface CompletionRequest {
|
|
58
|
-
model: string
|
|
59
|
-
messages: Message[]
|
|
60
|
-
system?: string
|
|
61
|
-
tools?: ToolDefinition[]
|
|
62
|
-
toolChoice?: 'auto' | 'required' | { name: string }
|
|
63
|
-
maxTokens?: number
|
|
64
|
-
temperature?: number
|
|
65
|
-
schema?: JsonSchema
|
|
66
|
-
stopSequences?: string[]
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
export interface CompletionResponse {
|
|
70
|
-
id: string
|
|
71
|
-
content: string
|
|
72
|
-
toolCalls: ToolCall[]
|
|
73
|
-
stopReason: 'end' | 'tool_use' | 'max_tokens' | 'stop_sequence'
|
|
74
|
-
usage: Usage
|
|
75
|
-
raw: unknown
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// ── Streaming ────────────────────────────────────────────────────────────────
|
|
79
|
-
|
|
80
|
-
export interface StreamChunk {
|
|
81
|
-
type: 'text' | 'tool_start' | 'tool_delta' | 'tool_end' | 'usage' | 'done'
|
|
82
|
-
text?: string
|
|
83
|
-
toolCall?: Partial<ToolCall>
|
|
84
|
-
toolIndex?: number
|
|
85
|
-
usage?: Usage
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// ── Output Schema ────────────────────────────────────────────────────────────
|
|
89
|
-
|
|
90
|
-
/** A schema that optionally validates data via `.parse()` (e.g., Zod schema). */
|
|
91
|
-
export interface OutputSchema {
|
|
92
|
-
parse?: (data: unknown) => unknown
|
|
93
|
-
[key: string]: unknown
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
// ── Agent ────────────────────────────────────────────────────────────────────
|
|
97
|
-
|
|
98
|
-
export interface ToolCallRecord {
|
|
99
|
-
name: string
|
|
100
|
-
arguments: Record<string, unknown>
|
|
101
|
-
result: unknown
|
|
102
|
-
duration: number
|
|
103
80
|
}
|
|
104
81
|
|
|
105
|
-
export interface AgentResult<T = any> {
|
|
106
|
-
data: T
|
|
107
|
-
text: string
|
|
108
|
-
toolCalls: ToolCallRecord[]
|
|
109
|
-
messages: Message[]
|
|
110
|
-
usage: Usage
|
|
111
|
-
iterations: number
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
export interface AgentEvent {
|
|
115
|
-
type: 'text' | 'tool_start' | 'tool_result' | 'iteration' | 'done' | 'suspended'
|
|
116
|
-
text?: string
|
|
117
|
-
toolCall?: ToolCallRecord
|
|
118
|
-
iteration?: number
|
|
119
|
-
result?: AgentResult
|
|
120
|
-
suspended?: SuspendedRun
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
// ── Suspend / Resume ─────────────────────────────────────────────────────────
|
|
124
|
-
|
|
125
82
|
/**
|
|
126
|
-
*
|
|
127
|
-
*
|
|
128
|
-
*
|
|
129
|
-
* the snapshot can be stringified, stored across a process boundary, and
|
|
130
|
-
* later passed to `AgentRunner.resume()` to continue the run.
|
|
83
|
+
* The `system` prompt. Either a plain string (no cache) or a structured
|
|
84
|
+
* form that lets apps mark the prompt as cached. Apps that want
|
|
85
|
+
* fine-grained control over multi-block system prompts pass an array.
|
|
131
86
|
*/
|
|
132
|
-
export
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
iterations: number
|
|
137
|
-
}
|
|
87
|
+
export type SystemPrompt =
|
|
88
|
+
| string
|
|
89
|
+
| { text: string; cache?: boolean }
|
|
90
|
+
| Array<{ text: string; cache?: boolean }>
|
|
138
91
|
|
|
139
92
|
/**
|
|
140
|
-
*
|
|
141
|
-
*
|
|
142
|
-
*
|
|
143
|
-
*
|
|
144
|
-
*
|
|
145
|
-
* `pendingToolCalls` contains the pending call that triggered suspension
|
|
146
|
-
* plus any subsequent tool calls from the same batch that have not been
|
|
147
|
-
* executed. Results must be supplied for each of them on resume so the
|
|
148
|
-
* conversation remains well-formed for the provider.
|
|
93
|
+
* Per-call options. Generics are deliberately conservative — apps
|
|
94
|
+
* don't usually need to type-narrow the provider response; the `raw`
|
|
95
|
+
* escape hatch in `ChatResult` is what they reach for when they need
|
|
96
|
+
* provider-specific fields.
|
|
149
97
|
*/
|
|
150
|
-
export interface
|
|
151
|
-
|
|
152
|
-
pendingToolCalls: ToolCall[]
|
|
153
|
-
state: SerializedAgentState
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
/** Result of a pending tool call, supplied to `AgentRunner.resume()`. */
|
|
157
|
-
export interface ToolCallResult {
|
|
158
|
-
toolCallId: string
|
|
159
|
-
result: unknown
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// ── Workflow ──────────────────────────────────────────────────────────────────
|
|
163
|
-
|
|
164
|
-
export interface WorkflowResult {
|
|
165
|
-
results: Record<string, AgentResult>
|
|
166
|
-
usage: Usage
|
|
167
|
-
duration: number
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
// ── Embedding ────────────────────────────────────────────────────────────────
|
|
171
|
-
|
|
172
|
-
export interface EmbeddingResponse {
|
|
173
|
-
embeddings: number[][]
|
|
174
|
-
model: string
|
|
175
|
-
usage: { totalTokens: number }
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
// ── Transcription (Speech-to-Text) ───────────────────────────────────────────
|
|
179
|
-
|
|
180
|
-
export interface TranscribeRequest {
|
|
181
|
-
/** Audio bytes. Most STT endpoints cap at ~25MB; chunk longer recordings. */
|
|
182
|
-
audio: Uint8Array | Blob
|
|
183
|
-
/**
|
|
184
|
-
* MIME type of the audio. Required for providers that infer format from
|
|
185
|
-
* the multipart filename or rely on it for inline base64 (Gemini).
|
|
186
|
-
* Examples: 'audio/m4a', 'audio/mpeg', 'audio/wav', 'audio/ogg',
|
|
187
|
-
* 'audio/webm', 'audio/flac'.
|
|
188
|
-
*/
|
|
189
|
-
contentType?: string
|
|
190
|
-
/** Override the provider's default STT model. */
|
|
98
|
+
export interface ChatOptions {
|
|
99
|
+
/** Override the configured default model. Wins over `tier`. */
|
|
191
100
|
model?: string
|
|
101
|
+
/** Sugar for selecting a model by tier. Resolved against `config.brain.tiers`. */
|
|
102
|
+
tier?: ModelTier
|
|
103
|
+
/** System prompt — typed shape supports prompt caching. */
|
|
104
|
+
system?: SystemPrompt
|
|
105
|
+
/** Hard ceiling on response tokens. Default `4096`. */
|
|
106
|
+
maxTokens?: number
|
|
192
107
|
/**
|
|
193
|
-
*
|
|
194
|
-
* (
|
|
195
|
-
*
|
|
108
|
+
* Adaptive thinking control. `'adaptive'` enables it; `'disabled'`
|
|
109
|
+
* (or omission) turns it off. On Opus 4.7 + 4.6 / Sonnet 4.6 this
|
|
110
|
+
* is the only supported thinking mode — `budget_tokens` is removed
|
|
111
|
+
* upstream and not exposed here.
|
|
196
112
|
*/
|
|
197
|
-
|
|
113
|
+
thinking?: 'adaptive' | 'disabled'
|
|
114
|
+
/** Effort hint. `low` / `medium` / `high` / `xhigh` / `max`. Defaults to provider's pick. */
|
|
115
|
+
effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max'
|
|
198
116
|
/**
|
|
199
|
-
*
|
|
200
|
-
*
|
|
201
|
-
*
|
|
202
|
-
* instruction.
|
|
117
|
+
* Top-level cache_control toggle. When `true` the provider asks the
|
|
118
|
+
* SDK to auto-cache the last cacheable block on every request.
|
|
119
|
+
* Defaults to `config.brain.cache.auto ?? false`.
|
|
203
120
|
*/
|
|
204
|
-
|
|
121
|
+
cache?: boolean
|
|
205
122
|
/**
|
|
206
|
-
*
|
|
207
|
-
*
|
|
208
|
-
* 'audio.bin' if not provided.
|
|
123
|
+
* Beta features to enable on this request. Pass through to the
|
|
124
|
+
* provider's beta-header machinery. Provider-specific.
|
|
209
125
|
*/
|
|
210
|
-
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
export interface TranscriptionResponse {
|
|
214
|
-
/** Transcribed text. */
|
|
215
|
-
text: string
|
|
216
|
-
/** Detected language, when the provider reports one. */
|
|
217
|
-
language?: string
|
|
218
|
-
/** Audio duration in seconds, when the provider reports one. */
|
|
219
|
-
duration?: number
|
|
220
|
-
/** Original provider response for callers that need provider-specific fields. */
|
|
221
|
-
raw: unknown
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
// ── Provider ─────────────────────────────────────────────────────────────────
|
|
225
|
-
|
|
226
|
-
export interface AIProvider {
|
|
227
|
-
readonly name: string
|
|
228
|
-
complete(request: CompletionRequest): Promise<CompletionResponse>
|
|
229
|
-
stream(request: CompletionRequest): AsyncIterable<StreamChunk>
|
|
230
|
-
embed?(input: string | string[], model?: string): Promise<EmbeddingResponse>
|
|
126
|
+
betas?: readonly string[]
|
|
231
127
|
/**
|
|
232
|
-
*
|
|
233
|
-
*
|
|
234
|
-
* generateContent). Throws or remains undefined for providers without
|
|
235
|
-
* STT (Anthropic at time of writing).
|
|
128
|
+
* Provider-specific overrides. `BrainManager.chat` selects the
|
|
129
|
+
* provider by config; this is the override for that.
|
|
236
130
|
*/
|
|
237
|
-
|
|
131
|
+
provider?: string
|
|
238
132
|
}
|
|
239
133
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
) => void | Promise<void>
|
|
247
|
-
|
|
248
|
-
// ── Config ───────────────────────────────────────────────────────────────────
|
|
249
|
-
|
|
250
|
-
export interface ProviderConfig {
|
|
251
|
-
driver: string
|
|
252
|
-
apiKey: string
|
|
253
|
-
model: string
|
|
254
|
-
baseUrl?: string
|
|
255
|
-
maxTokens?: number
|
|
256
|
-
temperature?: number
|
|
257
|
-
maxRetries?: number
|
|
258
|
-
retryBaseDelay?: number
|
|
134
|
+
/** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
|
|
135
|
+
export interface ChatUsage {
|
|
136
|
+
inputTokens: number
|
|
137
|
+
outputTokens: number
|
|
138
|
+
cacheReadTokens: number
|
|
139
|
+
cacheCreationTokens: number
|
|
259
140
|
}
|
|
260
141
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
142
|
+
/**
|
|
143
|
+
* The provider's reply. `text` is the concatenated assistant text;
|
|
144
|
+
* `raw` is the provider's full native response shape for apps that
|
|
145
|
+
* need anything we don't surface (e.g. citation blocks, server-tool
|
|
146
|
+
* results once those ship).
|
|
147
|
+
*/
|
|
148
|
+
export interface ChatResult<Raw = unknown> {
|
|
149
|
+
text: string
|
|
150
|
+
model: string
|
|
151
|
+
stopReason: string | null
|
|
152
|
+
usage: ChatUsage
|
|
153
|
+
raw: Raw
|
|
268
154
|
}
|
|
269
155
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
156
|
+
/**
|
|
157
|
+
* Streaming event union. V1 covers the text-delta + completion path
|
|
158
|
+
* apps want for chat-style UIs; thinking blocks and tool-use streams
|
|
159
|
+
* are reserved for later slices.
|
|
160
|
+
*/
|
|
161
|
+
export type StreamEvent =
|
|
162
|
+
| { type: 'text'; delta: string }
|
|
163
|
+
| { type: 'stop'; stopReason: string | null; usage: ChatUsage }
|
package/CHANGELOG.md
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
# Changelog
|
|
2
|
-
|
|
3
|
-
## 0.2.12
|
|
4
|
-
|
|
5
|
-
### Added
|
|
6
|
-
|
|
7
|
-
- **GoogleProvider** — Support for Google's Gemini models
|
|
8
|
-
- Native Gemini API integration using `generativelanguage.googleapis.com`
|
|
9
|
-
- Support for completion, streaming, function calling, and embeddings
|
|
10
|
-
- Models: `gemini-2.0-flash`, `gemini-2.5-flash`, `gemini-3-pro-preview`
|
|
11
|
-
- Authentication via `x-goog-api-key` header
|
|
12
|
-
- Zero new dependencies — uses raw `fetch()` following existing patterns
|
|
13
|
-
- Comprehensive test suite with 29 tests covering all functionality
|
|
14
|
-
|
|
15
|
-
## 0.6.0
|
|
16
|
-
|
|
17
|
-
### Added
|
|
18
|
-
|
|
19
|
-
- **Memory management** — three-tier conversation memory system for long-running threads
|
|
20
|
-
- `thread.memory()` enables opt-in context window management
|
|
21
|
-
- **Working memory** — recent messages within token budget
|
|
22
|
-
- **Episodic memory** — LLM-generated summaries of compacted older messages
|
|
23
|
-
- **Semantic memory** — structured facts extracted from conversation, injected into system prompt
|
|
24
|
-
- `TokenCounter` — approximate token estimation per provider (~4 chars/token)
|
|
25
|
-
- `ContextBudget` — budget allocation across system prompt, summaries, facts, and working messages
|
|
26
|
-
- `MemoryManager` — orchestrates compaction and fact extraction
|
|
27
|
-
- `SemanticMemory` — in-memory fact store with `<known_facts>` prompt injection
|
|
28
|
-
- `SummarizeStrategy` — LLM-powered compaction with optional fact extraction
|
|
29
|
-
- `SlidingWindowStrategy` — drop oldest messages without summarization
|
|
30
|
-
- `InMemoryThreadStore` — default `ThreadStore` implementation for dev/testing
|
|
31
|
-
- `ThreadStore` interface — pluggable persistence (implement for database-backed storage)
|
|
32
|
-
- `BrainManager.useThreadStore()` — register a thread store for persistence
|
|
33
|
-
- `BrainManager.memoryConfig` / `BrainManager.threadStore` — accessors for memory configuration
|
|
34
|
-
- `thread.id()` — set thread identifier for persistence
|
|
35
|
-
- `thread.persist()` — enable auto-save to ThreadStore after each `send()`
|
|
36
|
-
- `thread.facts` / `thread.episodicSummary` — access memory state
|
|
37
|
-
- `thread.serializeMemory()` / `thread.restoreMemory()` — extended serialization with memory state
|
|
38
|
-
- `BrainConfig.memory` — optional `MemoryConfig` field for global memory settings
|
|
39
|
-
|
|
40
|
-
## 0.1.1
|
|
41
|
-
|
|
42
|
-
### Changed
|
|
43
|
-
|
|
44
|
-
- Applied consistent code formatting across all source files
|