@strav/brain 0.4.30 → 1.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,281 +1,227 @@
1
- import { parseSSE } from '../utils/sse_parser.ts'
2
- import { retryableFetch, type RetryOptions } from '../utils/retry.ts'
3
- import { ExternalServiceError } from '@strav/kernel'
4
- import type {
5
- AIProvider,
6
- CompletionRequest,
7
- CompletionResponse,
8
- StreamChunk,
9
- ProviderConfig,
10
- Message,
11
- ToolCall,
12
- Usage,
13
- } from '../types.ts'
14
-
15
1
  /**
16
- * Anthropic Messages API provider.
2
+ * `AnthropicProvider` implementation of `Provider` backed by the
3
+ * official `@anthropic-ai/sdk`.
17
4
  *
18
- * Translates the framework's normalized CompletionRequest/Response
19
- * to/from the Anthropic wire format. Uses raw `fetch()`.
5
+ * Responsibilities:
6
+ * 1. Hold a singleton `Anthropic` client instance for the
7
+ * configured API key + base URL.
8
+ * 2. Translate the framework's `ChatOptions` / `Message` shapes
9
+ * into Anthropic's `MessageCreateParams` (system as `TextBlock[]`
10
+ * with `cache_control` when requested; messages with per-block
11
+ * cache flags translated likewise; `thinking` mapped to
12
+ * `ThinkingConfigParam`; `effort` placed under `output_config`).
13
+ * 3. Translate the response back to `ChatResult` — flatten the
14
+ * content blocks into a single `text` string, surface usage with
15
+ * cache-hit counters, and pass the raw `Message` through on `.raw`.
16
+ * 4. Stream via `client.messages.stream()` and yield the framework
17
+ * `StreamEvent` union — `text` deltas plus a terminal `stop`
18
+ * event with usage + stop reason.
19
+ *
20
+ * Errors from the SDK propagate; apps that want provider-specific
21
+ * recovery can `instanceof Anthropic.RateLimitError` etc. The brain
22
+ * facade wraps the call site in `BrainError` only for invariants the
23
+ * facade owns (e.g. "no provider configured").
20
24
  */
21
- export class AnthropicProvider implements AIProvider {
22
- readonly name: string
23
- private apiKey: string
24
- private baseUrl: string
25
- private defaultModel: string
26
- private defaultMaxTokens: number
27
- private retryOptions: RetryOptions
28
25
 
29
- constructor(config: ProviderConfig) {
30
- this.name = 'anthropic'
31
- this.apiKey = config.apiKey
32
- this.baseUrl = (config.baseUrl ?? 'https://api.anthropic.com').replace(/\/$/, '')
33
- this.defaultModel = config.model
34
- this.defaultMaxTokens = config.maxTokens ?? 4096
35
- this.retryOptions = {
36
- maxRetries: config.maxRetries ?? 3,
37
- baseDelay: config.retryBaseDelay ?? 1000,
38
- }
39
- }
40
-
41
- async complete(request: CompletionRequest): Promise<CompletionResponse> {
42
- const body = this.buildRequestBody(request, false)
26
+ import Anthropic from '@anthropic-ai/sdk'
27
+ import type { AnthropicProviderConfig } from '../brain_config.ts'
28
+ import { DEFAULT_MODEL } from '../brain_config.ts'
29
+ import type { Provider } from '../provider.ts'
30
+ import type {
31
+ ChatOptions,
32
+ ChatResult,
33
+ ChatUsage,
34
+ Message,
35
+ StreamEvent,
36
+ SystemPrompt,
37
+ } from '../types.ts'
43
38
 
44
- const response = await retryableFetch(
45
- 'Anthropic',
46
- `${this.baseUrl}/v1/messages`,
47
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
48
- this.retryOptions
49
- )
39
+ const EPHEMERAL_CACHE = { type: 'ephemeral' } as const
50
40
 
51
- const data: any = await response.json()
52
- return this.parseResponse(data)
41
+ export class AnthropicProvider implements Provider {
42
+ readonly name: string
43
+ private readonly client: Anthropic
44
+ private readonly defaultModel: string
45
+ private readonly defaultMaxTokens: number
46
+ private readonly betas: readonly string[]
47
+
48
+ constructor(
49
+ name: string,
50
+ config: AnthropicProviderConfig,
51
+ options: { client?: Anthropic } = {},
52
+ ) {
53
+ this.name = name
54
+ this.defaultModel = config.defaultModel ?? DEFAULT_MODEL
55
+ this.defaultMaxTokens = config.defaultMaxTokens ?? 4096
56
+ this.betas = config.betas ?? []
57
+ // `client` injection point — tests pass a stub; apps that want a
58
+ // pre-configured SDK instance (custom retry, fetch transport, etc.)
59
+ // build their own and hand it over here.
60
+ this.client =
61
+ options.client ??
62
+ new Anthropic({
63
+ apiKey: config.apiKey,
64
+ ...(config.baseUrl !== undefined ? { baseURL: config.baseUrl } : {}),
65
+ })
53
66
  }
54
67
 
55
- async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
56
- const body = this.buildRequestBody(request, true)
57
-
58
- const response = await retryableFetch(
59
- 'Anthropic',
60
- `${this.baseUrl}/v1/messages`,
61
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
62
- this.retryOptions
63
- )
64
-
65
- if (!response.body) {
66
- throw new ExternalServiceError('Anthropic', undefined, 'No stream body returned')
67
- }
68
-
69
- let currentBlockIndex = -1
70
-
71
- for await (const sse of parseSSE(response.body)) {
72
- if (sse.data === '[DONE]') break
73
-
74
- let parsed: any
75
- try {
76
- parsed = JSON.parse(sse.data)
77
- } catch {
78
- continue
79
- }
80
-
81
- const type = parsed.type ?? sse.event
68
+ async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
69
+ const params = this.buildParams(messages, options)
70
+ const response = await this.client.messages.create(params)
71
+ return this.toChatResult(response)
72
+ }
82
73
 
83
- if (type === 'content_block_start') {
84
- currentBlockIndex = parsed.index ?? currentBlockIndex + 1
85
- const block = parsed.content_block
86
- if (block?.type === 'tool_use') {
87
- yield {
88
- type: 'tool_start',
89
- toolCall: { id: block.id, name: block.name },
90
- toolIndex: currentBlockIndex,
91
- }
92
- }
93
- } else if (type === 'content_block_delta') {
94
- const delta = parsed.delta
95
- if (delta?.type === 'text_delta') {
96
- yield { type: 'text', text: delta.text }
97
- } else if (delta?.type === 'input_json_delta') {
98
- yield {
99
- type: 'tool_delta',
100
- text: delta.partial_json,
101
- toolIndex: parsed.index ?? currentBlockIndex,
102
- }
103
- }
104
- } else if (type === 'content_block_stop') {
105
- // If we were accumulating a tool call, signal end
106
- if (currentBlockIndex >= 0) {
107
- yield { type: 'tool_end', toolIndex: parsed.index ?? currentBlockIndex }
108
- }
109
- } else if (type === 'message_delta') {
110
- const usage = parsed.usage
111
- if (usage) {
112
- yield {
113
- type: 'usage',
114
- usage: {
115
- inputTokens: usage.input_tokens ?? 0,
116
- outputTokens: usage.output_tokens ?? 0,
117
- totalTokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0),
118
- },
119
- }
120
- }
121
- } else if (type === 'message_stop') {
122
- yield { type: 'done' }
74
+ async *stream(
75
+ messages: readonly Message[],
76
+ options: ChatOptions = {},
77
+ ): AsyncIterable<StreamEvent> {
78
+ const params = this.buildParams(messages, options)
79
+ const stream = this.client.messages.stream(params)
80
+ for await (const event of stream) {
81
+ if (
82
+ event.type === 'content_block_delta' &&
83
+ event.delta.type === 'text_delta'
84
+ ) {
85
+ yield { type: 'text', delta: event.delta.text }
123
86
  }
124
87
  }
88
+ const final = await stream.finalMessage()
89
+ yield {
90
+ type: 'stop',
91
+ stopReason: final.stop_reason,
92
+ usage: toUsage(final.usage),
93
+ }
125
94
  }
126
95
 
127
- // ── Private helpers ──────────────────────────────────────────────────────
128
-
129
- private buildHeaders(): Record<string, string> {
130
- return {
131
- 'content-type': 'application/json',
132
- 'x-api-key': this.apiKey,
133
- 'anthropic-version': '2023-06-01',
134
- }
96
+ async countTokens(
97
+ messages: readonly Message[],
98
+ options: ChatOptions = {},
99
+ ): Promise<number> {
100
+ const base = this.buildParams(messages, options)
101
+ // count_tokens only accepts a subset of MessageCreateParams; build
102
+ // a focused payload that matches what apps actually need to budget.
103
+ const result = await this.client.messages.countTokens({
104
+ model: base.model,
105
+ messages: base.messages,
106
+ ...(base.system !== undefined ? { system: base.system } : {}),
107
+ ...(base.thinking !== undefined ? { thinking: base.thinking } : {}),
108
+ })
109
+ return result.input_tokens
135
110
  }
136
111
 
137
- private buildRequestBody(request: CompletionRequest, stream: boolean): Record<string, unknown> {
138
- const body: Record<string, unknown> = {
139
- model: request.model ?? this.defaultModel,
140
- max_tokens: request.maxTokens ?? this.defaultMaxTokens,
141
- messages: this.mapMessages(request.messages),
112
+ // ─── Param translation ──────────────────────────────────────────────────
113
+
114
+ private buildParams(
115
+ messages: readonly Message[],
116
+ options: ChatOptions,
117
+ ): Anthropic.MessageCreateParamsNonStreaming {
118
+ const model = options.model ?? this.defaultModel
119
+ const params: Anthropic.MessageCreateParamsNonStreaming = {
120
+ model,
121
+ max_tokens: options.maxTokens ?? this.defaultMaxTokens,
122
+ messages: messages.map(toMessageParam),
142
123
  }
143
124
 
144
- if (stream) body.stream = true
145
- if (request.system) body.system = request.system
146
- if (request.temperature !== undefined) body.temperature = request.temperature
147
- if (request.stopSequences?.length) body.stop_sequences = request.stopSequences
125
+ const system = toSystemParam(options.system)
126
+ if (system !== undefined) params.system = system
148
127
 
149
- // Tools
150
- if (request.tools?.length) {
151
- body.tools = request.tools.map(t => ({
152
- name: t.name,
153
- description: t.description,
154
- input_schema: t.parameters,
155
- }))
128
+ if (options.thinking === 'adaptive') {
129
+ params.thinking = { type: 'adaptive' }
130
+ } else if (options.thinking === 'disabled') {
131
+ params.thinking = { type: 'disabled' }
156
132
  }
157
133
 
158
- // Tool choice
159
- if (request.toolChoice) {
160
- if (request.toolChoice === 'auto') {
161
- body.tool_choice = { type: 'auto' }
162
- } else if (request.toolChoice === 'required') {
163
- body.tool_choice = { type: 'any' }
164
- } else {
165
- body.tool_choice = { type: 'tool', name: request.toolChoice.name }
166
- }
134
+ if (options.effort !== undefined) {
135
+ params.output_config = { effort: options.effort }
167
136
  }
168
137
 
169
- // Structured output (using GA API with output_config)
170
- if (request.schema) {
171
- body.output_config = {
172
- format: {
173
- type: 'json_schema',
174
- schema: request.schema
175
- }
176
- }
138
+ if (options.cache === true) {
139
+ // Top-level auto-cache the last cacheable block. Maps to the
140
+ // SDK's `cache_control` shorthand on the request body.
141
+ ;(params as { cache_control?: { type: 'ephemeral' } }).cache_control = EPHEMERAL_CACHE
177
142
  }
178
143
 
179
- return body
180
- }
181
-
182
- private mapMessages(messages: Message[]): any[] {
183
- const result: any[] = []
184
-
185
- for (const msg of messages) {
186
- if (msg.role === 'tool') {
187
- // Tool results go as user messages with tool_result content blocks
188
- result.push({
189
- role: 'user',
190
- content: [
191
- {
192
- type: 'tool_result',
193
- tool_use_id: msg.toolCallId,
194
- content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
195
- },
196
- ],
197
- })
198
- } else if (msg.role === 'assistant') {
199
- const content: any[] = []
200
-
201
- // Add text content if present
202
- const text = typeof msg.content === 'string' ? msg.content : ''
203
- if (text) {
204
- content.push({ type: 'text', text })
205
- }
144
+ const betas = mergeBetas(this.betas, options.betas)
145
+ if (betas.length > 0) {
146
+ ;(params as { betas?: readonly string[] }).betas = betas
147
+ }
206
148
 
207
- // Add tool use blocks
208
- if (msg.toolCalls?.length) {
209
- for (const tc of msg.toolCalls) {
210
- content.push({
211
- type: 'tool_use',
212
- id: tc.id,
213
- name: tc.name,
214
- input: tc.arguments,
215
- })
216
- }
217
- }
149
+ return params
150
+ }
218
151
 
219
- result.push({
220
- role: 'assistant',
221
- content: content.length === 1 && content[0].type === 'text' ? content[0].text : content,
222
- })
223
- } else {
224
- // User messages
225
- result.push({
226
- role: 'user',
227
- content: typeof msg.content === 'string' ? msg.content : msg.content,
228
- })
229
- }
152
+ private toChatResult(message: Anthropic.Message): ChatResult<Anthropic.Message> {
153
+ const text = message.content
154
+ .filter((b): b is Anthropic.TextBlock => b.type === 'text')
155
+ .map((b) => b.text)
156
+ .join('')
157
+ return {
158
+ text,
159
+ model: message.model,
160
+ stopReason: message.stop_reason,
161
+ usage: toUsage(message.usage),
162
+ raw: message,
230
163
  }
231
-
232
- return result
233
164
  }
165
+ }
234
166
 
235
- private parseResponse(data: any): CompletionResponse {
236
- let content = ''
237
- const toolCalls: ToolCall[] = []
167
+ // ─── Shape converters ─────────────────────────────────────────────────────
238
168
 
239
- if (Array.isArray(data.content)) {
240
- for (const block of data.content) {
241
- if (block.type === 'text') {
242
- content += block.text
243
- } else if (block.type === 'tool_use') {
244
- toolCalls.push({
245
- id: block.id,
246
- name: block.name,
247
- arguments: block.input ?? {},
248
- })
249
- }
250
- }
251
- }
169
+ function toUsage(u: Anthropic.Usage): ChatUsage {
170
+ return {
171
+ inputTokens: u.input_tokens,
172
+ outputTokens: u.output_tokens,
173
+ cacheReadTokens: u.cache_read_input_tokens ?? 0,
174
+ cacheCreationTokens: u.cache_creation_input_tokens ?? 0,
175
+ }
176
+ }
252
177
 
253
- const usage: Usage = {
254
- inputTokens: data.usage?.input_tokens ?? 0,
255
- outputTokens: data.usage?.output_tokens ?? 0,
256
- totalTokens: (data.usage?.input_tokens ?? 0) + (data.usage?.output_tokens ?? 0),
257
- }
178
+ function toMessageParam(message: Message): Anthropic.MessageParam {
179
+ if (typeof message.content === 'string') {
180
+ return { role: message.role, content: message.content }
181
+ }
182
+ return {
183
+ role: message.role,
184
+ content: message.content.map((block) => {
185
+ const param: Anthropic.TextBlockParam = { type: 'text', text: block.text }
186
+ if (block.cache) param.cache_control = EPHEMERAL_CACHE
187
+ return param
188
+ }),
189
+ }
190
+ }
258
191
 
259
- let stopReason: CompletionResponse['stopReason'] = 'end'
260
- switch (data.stop_reason) {
261
- case 'tool_use':
262
- stopReason = 'tool_use'
263
- break
264
- case 'max_tokens':
265
- stopReason = 'max_tokens'
266
- break
267
- case 'stop_sequence':
268
- stopReason = 'stop_sequence'
269
- break
270
- }
192
+ function toSystemParam(
193
+ system: SystemPrompt | undefined,
194
+ ): string | Anthropic.TextBlockParam[] | undefined {
195
+ if (system === undefined) return undefined
196
+ if (typeof system === 'string') return system
197
+ if (Array.isArray(system)) {
198
+ return system.map((block) => {
199
+ const param: Anthropic.TextBlockParam = { type: 'text', text: block.text }
200
+ if (block.cache) param.cache_control = EPHEMERAL_CACHE
201
+ return param
202
+ })
203
+ }
204
+ const param: Anthropic.TextBlockParam = { type: 'text', text: system.text }
205
+ if (system.cache) param.cache_control = EPHEMERAL_CACHE
206
+ return [param]
207
+ }
271
208
 
272
- return {
273
- id: data.id ?? '',
274
- content,
275
- toolCalls,
276
- stopReason,
277
- usage,
278
- raw: data,
279
- }
209
+ function mergeBetas(
210
+ providerBetas: readonly string[],
211
+ callBetas: readonly string[] | undefined,
212
+ ): readonly string[] {
213
+ if (!callBetas || callBetas.length === 0) return providerBetas
214
+ const seen = new Set<string>()
215
+ const out: string[] = []
216
+ for (const b of providerBetas) {
217
+ if (seen.has(b)) continue
218
+ seen.add(b)
219
+ out.push(b)
220
+ }
221
+ for (const b of callBetas) {
222
+ if (seen.has(b)) continue
223
+ seen.add(b)
224
+ out.push(b)
280
225
  }
226
+ return out
281
227
  }
package/src/thread.ts ADDED
@@ -0,0 +1,99 @@
1
+ /**
2
+ * `Thread` — multi-turn conversation that retains its message history
3
+ * across calls. Built on top of `BrainManager.chat` (no provider
4
+ * coupling); apps that want a stateless one-shot use
5
+ * `BrainManager.chat` directly.
6
+ *
7
+ * State model: the thread owns an append-only `messages` array. Each
8
+ * `send(text)` appends a user turn, calls `brain.chat`, appends the
9
+ * assistant reply, and returns the assistant's text. The full message
10
+ * history is serializable via `toJSON()` so apps can persist a thread
11
+ * across requests (e.g. one row per conversation in Postgres).
12
+ *
13
+ * What's NOT here in V1:
14
+ * - Auto-compaction. Long threads accumulate without bound; apps
15
+ * that need bounded context handle this themselves (prune
16
+ * `thread.messages` in place, or use the underlying provider's
17
+ * server-side compaction feature once that ships in V2).
18
+ * - Streaming `send`. The thread's `send()` is awaited-fully; for
19
+ * token-by-token streaming in a conversation, call
20
+ * `brain.stream(thread.messages.concat(newUser))` directly.
21
+ */
22
+
23
+ import type { BrainManager } from './brain_manager.ts'
24
+ import type { ChatOptions, Message, SystemPrompt } from './types.ts'
25
+
26
+ export interface ThreadOptions {
27
+ /** System prompt — applied to every `send()` call. Supports cache flags. */
28
+ system?: SystemPrompt
29
+ /** Per-thread `ChatOptions` defaults — merged with per-call overrides on `send()`. */
30
+ options?: ChatOptions
31
+ }
32
+
33
+ /** Serializable snapshot. What `toJSON()` produces / `fromJSON()` accepts. */
34
+ export interface ThreadState {
35
+ messages: Message[]
36
+ system?: SystemPrompt
37
+ options?: ChatOptions
38
+ }
39
+
40
+ export class Thread {
41
+ /** Append-only conversation history. Read-only — mutate via `send()` (or pass through `toJSON`). */
42
+ readonly messages: Message[] = []
43
+ readonly system?: SystemPrompt
44
+ readonly options?: ChatOptions
45
+ private readonly brain: BrainManager
46
+
47
+ constructor(brain: BrainManager, opts: ThreadOptions = {}) {
48
+ this.brain = brain
49
+ if (opts.system !== undefined) this.system = opts.system
50
+ if (opts.options !== undefined) this.options = opts.options
51
+ }
52
+
53
+ /**
54
+ * Append a user turn, call the model, append the assistant reply,
55
+ * and return the reply text. Per-call options override the
56
+ * thread's defaults; `system` always comes from the thread.
57
+ */
58
+ async send(text: string, options: ChatOptions = {}): Promise<string> {
59
+ this.messages.push({ role: 'user', content: text })
60
+ const merged: ChatOptions = {
61
+ ...(this.options ?? {}),
62
+ ...options,
63
+ // System is owned by the thread; per-call `system` is ignored
64
+ // intentionally so a caller can't drift the conversation
65
+ // mid-thread by changing the system prompt every turn.
66
+ ...(this.system !== undefined ? { system: this.system } : {}),
67
+ }
68
+ const result = await this.brain.chat(this.messages, merged)
69
+ this.messages.push({ role: 'assistant', content: result.text })
70
+ return result.text
71
+ }
72
+
73
+ /** Number of turns. Each `send()` adds 2 (user + assistant). */
74
+ get length(): number {
75
+ return this.messages.length
76
+ }
77
+
78
+ /** Serialize to a plain object — pass to `Thread.fromJSON` to restore. */
79
+ toJSON(): ThreadState {
80
+ const state: ThreadState = { messages: [...this.messages] }
81
+ if (this.system !== undefined) state.system = this.system
82
+ if (this.options !== undefined) state.options = this.options
83
+ return state
84
+ }
85
+
86
+ /**
87
+ * Restore a thread from a serialized snapshot. The `BrainManager`
88
+ * is passed in fresh — only the conversation state lives on disk;
89
+ * the manager is rebuilt at app boot.
90
+ */
91
+ static fromJSON(brain: BrainManager, state: ThreadState): Thread {
92
+ const options: ThreadOptions = {}
93
+ if (state.system !== undefined) options.system = state.system
94
+ if (state.options !== undefined) options.options = state.options
95
+ const thread = new Thread(brain, options)
96
+ for (const m of state.messages) thread.messages.push(m)
97
+ return thread
98
+ }
99
+ }