@strav/brain 0.4.30 → 1.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/types.ts CHANGED
@@ -1,275 +1,130 @@
1
- // ── JSON Schema ──────────────────────────────────────────────────────────────
2
-
3
- /** Minimal recursive JSON Schema type. */
4
- export type JsonSchema = Record<string, unknown>
5
-
6
- // ── SSE ──────────────────────────────────────────────────────────────────────
7
-
8
- export interface SSEEvent {
9
- event?: string
10
- data: string
11
- }
12
-
13
- // ── Usage ────────────────────────────────────────────────────────────────────
14
-
15
- export interface Usage {
16
- inputTokens: number
17
- outputTokens: number
18
- totalTokens: number
19
- }
1
+ /**
2
+ * Public types for the brain runtime.
3
+ *
4
+ * Apps work with three high-level shapes:
5
+ *
6
+ * - `Message` — a user/assistant turn in a conversation. `content` is
7
+ * either a plain string or a list of `ContentBlock`s for richer
8
+ * payloads (cached blocks, images in a later slice).
9
+ *
10
+ * - `ChatOptions` — per-call knobs: model selection (explicit `model`
11
+ * or `tier` sugar), `system` prompt with optional cache flag,
12
+ * `maxTokens`, `thinking`, `effort`, etc.
13
+ *
14
+ * - `ChatResult` — what comes back from `chat()`: assistant `text`,
15
+ * `usage` (including cache hit/miss counters), `stopReason`, the
16
+ * `model` that actually answered, and a `raw` escape hatch with the
17
+ * provider's native response.
18
+ *
19
+ * The streaming side adds `StreamEvent` — a discriminated union of the
20
+ * events a provider emits while a response is being generated. V1
21
+ * covers text deltas, the final-message event, and `stopReason`;
22
+ * thinking blocks / tool-use streams land when those features ship.
23
+ */
20
24
 
21
- // ── Messages ─────────────────────────────────────────────────────────────────
25
+ /** Coarse-grained model tier. Sugar for "fast / balanced / powerful" without naming an SDK. */
26
+ export type ModelTier = 'fast' | 'balanced' | 'powerful'
22
27
 
23
- export interface ToolCall {
24
- id: string
25
- name: string
26
- arguments: Record<string, unknown>
28
+ /**
29
+ * A text content block. The `cache` flag lets apps mark long, stable
30
+ * prefixes for prompt caching; providers translate this to whatever
31
+ * cache mechanism their SDK exposes (Anthropic: `cache_control:
32
+ * {type: 'ephemeral'}`).
33
+ */
34
+ export interface TextBlock {
35
+ type: 'text'
36
+ text: string
37
+ /** Mark this block as a cache breakpoint. Default `false`. */
38
+ cache?: boolean
27
39
  }
28
40
 
29
- export interface ContentBlock {
30
- type: 'text' | 'tool_use' | 'tool_result'
31
- text?: string
32
- id?: string
33
- name?: string
34
- input?: Record<string, unknown>
35
- toolUseId?: string
36
- content?: string
37
- }
41
+ export type ContentBlock = TextBlock
38
42
 
43
+ /** A single conversation turn. `content` can be a bare string or a typed block list. */
39
44
  export interface Message {
40
- role: 'user' | 'assistant' | 'tool'
45
+ role: 'user' | 'assistant'
41
46
  content: string | ContentBlock[]
42
- toolCalls?: ToolCall[]
43
- toolCallId?: string
44
- }
45
-
46
- // ── Tool Definition ──────────────────────────────────────────────────────────
47
-
48
- export interface ToolDefinition {
49
- name: string
50
- description: string
51
- parameters: JsonSchema
52
- execute: (args: Record<string, unknown>, context?: Record<string, unknown>) => unknown | Promise<unknown>
53
- }
54
-
55
- // ── Completion Request / Response ────────────────────────────────────────────
56
-
57
- export interface CompletionRequest {
58
- model: string
59
- messages: Message[]
60
- system?: string
61
- tools?: ToolDefinition[]
62
- toolChoice?: 'auto' | 'required' | { name: string }
63
- maxTokens?: number
64
- temperature?: number
65
- schema?: JsonSchema
66
- stopSequences?: string[]
67
- }
68
-
69
- export interface CompletionResponse {
70
- id: string
71
- content: string
72
- toolCalls: ToolCall[]
73
- stopReason: 'end' | 'tool_use' | 'max_tokens' | 'stop_sequence'
74
- usage: Usage
75
- raw: unknown
76
- }
77
-
78
- // ── Streaming ────────────────────────────────────────────────────────────────
79
-
80
- export interface StreamChunk {
81
- type: 'text' | 'tool_start' | 'tool_delta' | 'tool_end' | 'usage' | 'done'
82
- text?: string
83
- toolCall?: Partial<ToolCall>
84
- toolIndex?: number
85
- usage?: Usage
86
- }
87
-
88
- // ── Output Schema ────────────────────────────────────────────────────────────
89
-
90
- /** A schema that optionally validates data via `.parse()` (e.g., Zod schema). */
91
- export interface OutputSchema {
92
- parse?: (data: unknown) => unknown
93
- [key: string]: unknown
94
- }
95
-
96
- // ── Agent ────────────────────────────────────────────────────────────────────
97
-
98
- export interface ToolCallRecord {
99
- name: string
100
- arguments: Record<string, unknown>
101
- result: unknown
102
- duration: number
103
- }
104
-
105
- export interface AgentResult<T = any> {
106
- data: T
107
- text: string
108
- toolCalls: ToolCallRecord[]
109
- messages: Message[]
110
- usage: Usage
111
- iterations: number
112
47
  }
113
48
 
114
- export interface AgentEvent {
115
- type: 'text' | 'tool_start' | 'tool_result' | 'iteration' | 'done' | 'suspended'
116
- text?: string
117
- toolCall?: ToolCallRecord
118
- iteration?: number
119
- result?: AgentResult
120
- suspended?: SuspendedRun
121
- }
122
-
123
- // ── Suspend / Resume ─────────────────────────────────────────────────────────
124
-
125
49
  /**
126
- * A JSON-serializable snapshot of an agent loop at the moment it suspended.
127
- *
128
- * All fields are plain data no functions, class instances, or cycles — so
129
- * the snapshot can be stringified, stored across a process boundary, and
130
- * later passed to `AgentRunner.resume()` to continue the run.
50
+ * The `system` prompt. Either a plain string (no cache) or a structured
51
+ * form that lets apps mark the prompt as cached. Apps that want
52
+ * fine-grained control over multi-block system prompts pass an array.
131
53
  */
132
- export interface SerializedAgentState {
133
- messages: Message[]
134
- allToolCalls: ToolCallRecord[]
135
- totalUsage: Usage
136
- iterations: number
137
- }
54
+ export type SystemPrompt =
55
+ | string
56
+ | { text: string; cache?: boolean }
57
+ | Array<{ text: string; cache?: boolean }>
138
58
 
139
59
  /**
140
- * Result of an agent run that was suspended before executing one or more
141
- * tool calls. The integrator is expected to obtain tool results out-of-band
142
- * (human approval, external system, queued job, etc.) and call
143
- * `AgentRunner.resume(state, toolResults)` to continue.
144
- *
145
- * `pendingToolCalls` contains the pending call that triggered suspension
146
- * plus any subsequent tool calls from the same batch that have not been
147
- * executed. Results must be supplied for each of them on resume so the
148
- * conversation remains well-formed for the provider.
60
+ * Per-call options. Generics are deliberately conservative apps
61
+ * don't usually need to type-narrow the provider response; the `raw`
62
+ * escape hatch in `ChatResult` is what they reach for when they need
63
+ * provider-specific fields.
149
64
  */
150
- export interface SuspendedRun {
151
- status: 'suspended'
152
- pendingToolCalls: ToolCall[]
153
- state: SerializedAgentState
154
- }
155
-
156
- /** Result of a pending tool call, supplied to `AgentRunner.resume()`. */
157
- export interface ToolCallResult {
158
- toolCallId: string
159
- result: unknown
160
- }
161
-
162
- // ── Workflow ──────────────────────────────────────────────────────────────────
163
-
164
- export interface WorkflowResult {
165
- results: Record<string, AgentResult>
166
- usage: Usage
167
- duration: number
168
- }
169
-
170
- // ── Embedding ────────────────────────────────────────────────────────────────
171
-
172
- export interface EmbeddingResponse {
173
- embeddings: number[][]
174
- model: string
175
- usage: { totalTokens: number }
176
- }
177
-
178
- // ── Transcription (Speech-to-Text) ───────────────────────────────────────────
179
-
180
- export interface TranscribeRequest {
181
- /** Audio bytes. Most STT endpoints cap at ~25MB; chunk longer recordings. */
182
- audio: Uint8Array | Blob
183
- /**
184
- * MIME type of the audio. Required for providers that infer format from
185
- * the multipart filename or rely on it for inline base64 (Gemini).
186
- * Examples: 'audio/m4a', 'audio/mpeg', 'audio/wav', 'audio/ogg',
187
- * 'audio/webm', 'audio/flac'.
188
- */
189
- contentType?: string
190
- /** Override the provider's default STT model. */
65
+ export interface ChatOptions {
66
+ /** Override the configured default model. Wins over `tier`. */
191
67
  model?: string
68
+ /** Sugar for selecting a model by tier. Resolved against `config.brain.tiers`. */
69
+ tier?: ModelTier
70
+ /** System prompt — typed shape supports prompt caching. */
71
+ system?: SystemPrompt
72
+ /** Hard ceiling on response tokens. Default `4096`. */
73
+ maxTokens?: number
192
74
  /**
193
- * BCP-47 language hint (e.g. 'th', 'en', 'zh'). Whisper accepts ISO-639-1
194
- * ('th'); Gemini uses BCP-47. Both improve accuracy when set; omit for
195
- * auto-detection.
75
+ * Adaptive thinking control. `'adaptive'` enables it; `'disabled'`
76
+ * (or omission) turns it off. On Opus 4.7 + 4.6 / Sonnet 4.6 this
77
+ * is the only supported thinking mode — `budget_tokens` is removed
78
+ * upstream and not exposed here.
196
79
  */
197
- language?: string
80
+ thinking?: 'adaptive' | 'disabled'
81
+ /** Effort hint. `low` / `medium` / `high` / `xhigh` / `max`. Defaults to provider's pick. */
82
+ effort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max'
198
83
  /**
199
- * Optional priming prompt gives the model vocabulary or context to
200
- * bias toward (proper nouns, brand names, menu items, dialect markers).
201
- * Whisper uses this directly; Gemini incorporates it into the system
202
- * instruction.
84
+ * Top-level cache_control toggle. When `true` the provider asks the
85
+ * SDK to auto-cache the last cacheable block on every request.
86
+ * Defaults to `config.brain.cache.auto ?? false`.
203
87
  */
204
- prompt?: string
88
+ cache?: boolean
205
89
  /**
206
- * Filename to send in the multipart form (Whisper). Used to derive the
207
- * audio format on the server when `contentType` is missing. Defaults to
208
- * 'audio.bin' if not provided.
90
+ * Beta features to enable on this request. Pass through to the
91
+ * provider's beta-header machinery. Provider-specific.
209
92
  */
210
- filename?: string
211
- }
212
-
213
- export interface TranscriptionResponse {
214
- /** Transcribed text. */
215
- text: string
216
- /** Detected language, when the provider reports one. */
217
- language?: string
218
- /** Audio duration in seconds, when the provider reports one. */
219
- duration?: number
220
- /** Original provider response for callers that need provider-specific fields. */
221
- raw: unknown
222
- }
223
-
224
- // ── Provider ─────────────────────────────────────────────────────────────────
225
-
226
- export interface AIProvider {
227
- readonly name: string
228
- complete(request: CompletionRequest): Promise<CompletionResponse>
229
- stream(request: CompletionRequest): AsyncIterable<StreamChunk>
230
- embed?(input: string | string[], model?: string): Promise<EmbeddingResponse>
93
+ betas?: readonly string[]
231
94
  /**
232
- * Transcribe audio to text. Implemented by providers that expose a
233
- * speech-to-text endpoint (OpenAI Whisper, Google Gemini's multimodal
234
- * generateContent). Throws or remains undefined for providers without
235
- * STT (Anthropic at time of writing).
95
+ * Provider-specific overrides. `BrainManager.chat` selects the
96
+ * provider by config; this is the override for that.
236
97
  */
237
- transcribe?(request: TranscribeRequest): Promise<TranscriptionResponse>
98
+ provider?: string
238
99
  }
239
100
 
240
- // ── Hooks ────────────────────────────────────────────────────────────────────
241
-
242
- export type BeforeHook = (request: CompletionRequest) => void | Promise<void>
243
- export type AfterHook = (
244
- request: CompletionRequest,
245
- response: CompletionResponse
246
- ) => void | Promise<void>
247
-
248
- // ── Config ───────────────────────────────────────────────────────────────────
249
-
250
- export interface ProviderConfig {
251
- driver: string
252
- apiKey: string
253
- model: string
254
- baseUrl?: string
255
- maxTokens?: number
256
- temperature?: number
257
- maxRetries?: number
258
- retryBaseDelay?: number
101
+ /** Token usage for a single call. Cache-hit fields are populated when caching is in play. */
102
+ export interface ChatUsage {
103
+ inputTokens: number
104
+ outputTokens: number
105
+ cacheReadTokens: number
106
+ cacheCreationTokens: number
259
107
  }
260
108
 
261
- export interface BrainConfig {
262
- default: string
263
- providers: Record<string, ProviderConfig>
264
- maxTokens: number
265
- temperature: number
266
- maxIterations: number
267
- memory?: import('./memory/types.ts').MemoryConfig
109
+ /**
110
+ * The provider's reply. `text` is the concatenated assistant text;
111
+ * `raw` is the provider's full native response shape for apps that
112
+ * need anything we don't surface (e.g. citation blocks, server-tool
113
+ * results once those ship).
114
+ */
115
+ export interface ChatResult<Raw = unknown> {
116
+ text: string
117
+ model: string
118
+ stopReason: string | null
119
+ usage: ChatUsage
120
+ raw: Raw
268
121
  }
269
122
 
270
- // ── Serialized Thread ────────────────────────────────────────────────────────
271
-
272
- export interface SerializedThread {
273
- messages: Message[]
274
- system?: string
275
- }
123
+ /**
124
+ * Streaming event union. V1 covers the text-delta + completion path
125
+ * apps want for chat-style UIs; thinking blocks and tool-use streams
126
+ * are reserved for later slices.
127
+ */
128
+ export type StreamEvent =
129
+ | { type: 'text'; delta: string }
130
+ | { type: 'stop'; stopReason: string | null; usage: ChatUsage }
package/CHANGELOG.md DELETED
@@ -1,44 +0,0 @@
1
- # Changelog
2
-
3
- ## 0.2.12
4
-
5
- ### Added
6
-
7
- - **GoogleProvider** — Support for Google's Gemini models
8
- - Native Gemini API integration using `generativelanguage.googleapis.com`
9
- - Support for completion, streaming, function calling, and embeddings
10
- - Models: `gemini-2.0-flash`, `gemini-2.5-flash`, `gemini-3-pro-preview`
11
- - Authentication via `x-goog-api-key` header
12
- - Zero new dependencies — uses raw `fetch()` following existing patterns
13
- - Comprehensive test suite with 29 tests covering all functionality
14
-
15
- ## 0.6.0
16
-
17
- ### Added
18
-
19
- - **Memory management** — three-tier conversation memory system for long-running threads
20
- - `thread.memory()` enables opt-in context window management
21
- - **Working memory** — recent messages within token budget
22
- - **Episodic memory** — LLM-generated summaries of compacted older messages
23
- - **Semantic memory** — structured facts extracted from conversation, injected into system prompt
24
- - `TokenCounter` — approximate token estimation per provider (~4 chars/token)
25
- - `ContextBudget` — budget allocation across system prompt, summaries, facts, and working messages
26
- - `MemoryManager` — orchestrates compaction and fact extraction
27
- - `SemanticMemory` — in-memory fact store with `<known_facts>` prompt injection
28
- - `SummarizeStrategy` — LLM-powered compaction with optional fact extraction
29
- - `SlidingWindowStrategy` — drop oldest messages without summarization
30
- - `InMemoryThreadStore` — default `ThreadStore` implementation for dev/testing
31
- - `ThreadStore` interface — pluggable persistence (implement for database-backed storage)
32
- - `BrainManager.useThreadStore()` — register a thread store for persistence
33
- - `BrainManager.memoryConfig` / `BrainManager.threadStore` — accessors for memory configuration
34
- - `thread.id()` — set thread identifier for persistence
35
- - `thread.persist()` — enable auto-save to ThreadStore after each `send()`
36
- - `thread.facts` / `thread.episodicSummary` — access memory state
37
- - `thread.serializeMemory()` / `thread.restoreMemory()` — extended serialization with memory state
38
- - `BrainConfig.memory` — optional `MemoryConfig` field for global memory settings
39
-
40
- ## 0.1.1
41
-
42
- ### Changed
43
-
44
- - Applied consistent code formatting across all source files
package/README.md DELETED
@@ -1,121 +0,0 @@
1
- # @strav/brain
2
-
3
- AI module for the [Strav](https://www.npmjs.com/package/@strav/core) framework. Provides a unified interface for AI providers with support for agents, threads, tool use, and multi-step workflows.
4
-
5
- ## Install
6
-
7
- ```bash
8
- bun add @strav/brain
9
- ```
10
-
11
- Requires `@strav/core` as a peer dependency.
12
-
13
- ## Providers
14
-
15
- - **Anthropic** (Claude)
16
- - **OpenAI** (GPT, also works with DeepSeek via custom `baseUrl`)
17
- - **Google** (Gemini)
18
-
19
- ## Usage
20
-
21
- ```ts
22
- import { brain } from '@strav/brain'
23
-
24
- // One-shot chat
25
- const response = await brain.chat('Explain quantum computing')
26
-
27
- // Streaming
28
- for await (const chunk of brain.stream('Write a poem')) {
29
- process.stdout.write(chunk.text)
30
- }
31
-
32
- // Structured output with Zod
33
- import { z } from 'zod'
34
- const result = await brain.generate('List 3 colors', {
35
- schema: z.object({ colors: z.array(z.string()) }),
36
- })
37
-
38
- // Embeddings
39
- const vectors = await brain.embed('Hello world')
40
- ```
41
-
42
- ## Tools
43
-
44
- Define tools that AI agents can use:
45
-
46
- ```ts
47
- import { defineTool } from '@strav/brain'
48
- import { z } from 'zod'
49
-
50
- const searchTool = defineTool({
51
- name: 'search',
52
- description: 'Search the database',
53
- parameters: z.object({ query: z.string() }),
54
- execute: async ({ query }, context) => {
55
- const userId = context?.userId
56
- return await db.search(query, { userId })
57
- },
58
- })
59
- ```
60
-
61
- The `execute` function receives two parameters:
62
- - `args` - The parsed and validated tool arguments
63
- - `context` - Optional context object passed from the agent runner
64
-
65
- ## Agents
66
-
67
- ```ts
68
- import { Agent, defineTool } from '@strav/brain'
69
-
70
- class ResearchAgent extends Agent {
71
- provider = 'anthropic'
72
- model = 'claude-sonnet-4-20250514'
73
- instructions = 'You are a research assistant.'
74
- tools = [searchTool, summarizeTool]
75
- }
76
-
77
- // Google Gemini agent
78
- class GeminiResearchAgent extends Agent {
79
- provider = 'google'
80
- model = 'gemini-2.0-flash'
81
- instructions = 'You are a research assistant powered by Gemini.'
82
- tools = [searchTool, summarizeTool]
83
- }
84
-
85
- // Run agent with context
86
- const runner = brain.agent(ResearchAgent)
87
- runner.context({ userId: '123' }) // Pass context to tools
88
- const result = await runner.input('Find info on Bun').run()
89
- ```
90
-
91
- ## Threads
92
-
93
- Multi-turn conversations with serialization support:
94
-
95
- ```ts
96
- const thread = brain.thread({ provider: 'anthropic', model: 'claude-sonnet-4-20250514' })
97
- await thread.send('Hello')
98
- await thread.send('Tell me more')
99
- const saved = thread.serialize() // persist and restore later
100
-
101
- // Google Gemini example
102
- const geminiThread = brain.thread({ provider: 'google', model: 'gemini-2.0-flash' })
103
- await geminiThread.send('Explain quantum computing')
104
- ```
105
-
106
- ## Workflows
107
-
108
- Orchestrate multi-agent pipelines:
109
-
110
- ```ts
111
- const workflow = brain.workflow()
112
- .step('research', ResearchAgent)
113
- .step('summarize', SummaryAgent)
114
- .parallel('review', [FactCheckAgent, StyleAgent])
115
-
116
- const result = await workflow.run('Analyze this topic')
117
- ```
118
-
119
- ## License
120
-
121
- MIT
package/src/agent.ts DELETED
@@ -1,93 +0,0 @@
1
- import type {
2
- ToolDefinition,
3
- ToolCall,
4
- ToolCallRecord,
5
- AgentResult,
6
- OutputSchema,
7
- } from './types.ts'
8
-
9
- /**
10
- * Base class for AI agents.
11
- *
12
- * Extend this class to define an agent with custom instructions,
13
- * tools, structured output, and lifecycle hooks.
14
- *
15
- * @example
16
- * class SupportAgent extends Agent {
17
- * provider = 'anthropic'
18
- * model = 'claude-sonnet-4-5-20250929'
19
- * instructions = 'You are a customer support agent.'
20
- * tools = [searchTool, lookupOrderTool]
21
- *
22
- * output = z.object({
23
- * reply: z.string(),
24
- * category: z.enum(['billing', 'shipping', 'product', 'other']),
25
- * })
26
- *
27
- * onToolCall(call: ToolCall) {
28
- * console.log(`Calling tool: ${call.name}`)
29
- * }
30
- * }
31
- */
32
- export abstract class Agent {
33
- /** Provider name (e.g., 'anthropic', 'openai'). Falls back to config default. */
34
- provider?: string
35
-
36
- /** Model identifier. Falls back to the provider's configured default model. */
37
- model?: string
38
-
39
- /** System prompt / instructions for this agent. Supports `{{key}}` context interpolation. */
40
- instructions: string = ''
41
-
42
- /** Tools available to this agent during execution. */
43
- tools?: ToolDefinition[]
44
-
45
- /** Structured output schema (Zod or JSON Schema). When set, the final response is parsed and validated. */
46
- output?: OutputSchema
47
-
48
- /** Maximum tool-use loop iterations before forcing a stop. Falls back to config default (10). */
49
- maxIterations?: number
50
-
51
- /** Maximum tokens per completion request. Falls back to config default (4096). */
52
- maxTokens?: number
53
-
54
- /** Temperature for completion requests. Falls back to config default (0.7). */
55
- temperature?: number
56
-
57
- // ── Lifecycle hooks (optional) ───────────────────────────────────────────
58
-
59
- /** Called before the first completion request. */
60
- onStart?(input: string, context: Record<string, unknown>): void | Promise<void>
61
-
62
- /** Called when the model requests a tool call, before execution. */
63
- onToolCall?(call: ToolCall): void | Promise<void>
64
-
65
- /**
66
- * Called before a tool is executed. Return `true` to suspend the agent loop
67
- * before running this tool call; the runner will return a `SuspendedRun`
68
- * with a JSON-serializable snapshot of the loop state. Resume later via
69
- * `AgentRunner.resume(state, toolResults)` once the tool result is known.
70
- *
71
- * This is a policy-free primitive: the framework does not attach meaning
72
- * to suspension. Integrators can use it to gate mutating tools on human
73
- * approval, dispatch a tool to an external worker, rate-limit, etc.
74
- *
75
- * When suspension occurs mid-batch, the triggering call and any remaining
76
- * unprocessed calls in the same batch are captured together in
77
- * `pendingToolCalls` so the provider's tool_use/tool_result contract stays
78
- * balanced on resume.
79
- */
80
- shouldSuspend?(
81
- call: ToolCall,
82
- context: Record<string, unknown>
83
- ): boolean | Promise<boolean>
84
-
85
- /** Called after a tool finishes execution. */
86
- onToolResult?(call: ToolCallRecord): void | Promise<void>
87
-
88
- /** Called when the agent run completes successfully. */
89
- onComplete?(result: AgentResult): void | Promise<void>
90
-
91
- /** Called when the agent run encounters an error. */
92
- onError?(error: Error): void | Promise<void>
93
- }