@strav/brain 0.4.31 → 1.0.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,281 +1,397 @@
1
- import { parseSSE } from '../utils/sse_parser.ts'
2
- import { retryableFetch, type RetryOptions } from '../utils/retry.ts'
3
- import { ExternalServiceError } from '@strav/kernel'
4
- import type {
5
- AIProvider,
6
- CompletionRequest,
7
- CompletionResponse,
8
- StreamChunk,
9
- ProviderConfig,
10
- Message,
11
- ToolCall,
12
- Usage,
13
- } from '../types.ts'
14
-
15
1
  /**
16
- * Anthropic Messages API provider.
2
+ * `AnthropicProvider` implementation of `Provider` backed by the
3
+ * official `@anthropic-ai/sdk`.
4
+ *
5
+ * Responsibilities:
6
+ * 1. Hold a singleton `Anthropic` client instance for the
7
+ * configured API key + base URL.
8
+ * 2. Translate the framework's `ChatOptions` / `Message` shapes
9
+ * into Anthropic's `MessageCreateParams` (system as `TextBlock[]`
10
+ * with `cache_control` when requested; messages with per-block
11
+ * cache flags translated likewise; `thinking` mapped to
12
+ * `ThinkingConfigParam`; `effort` placed under `output_config`).
13
+ * 3. Translate the response back to `ChatResult` — flatten the
14
+ * content blocks into a single `text` string, surface usage with
15
+ * cache-hit counters, and pass the raw `Message` through on `.raw`.
16
+ * 4. Stream via `client.messages.stream()` and yield the framework
17
+ * `StreamEvent` union — `text` deltas plus a terminal `stop`
18
+ * event with usage + stop reason.
17
19
  *
18
- * Translates the framework's normalized CompletionRequest/Response
19
- * to/from the Anthropic wire format. Uses raw `fetch()`.
20
+ * Errors from the SDK propagate; apps that want provider-specific
21
+ * recovery can `instanceof Anthropic.RateLimitError` etc. The brain
22
+ * facade wraps the call site in `BrainError` only for invariants the
23
+ * facade owns (e.g. "no provider configured").
20
24
  */
21
- export class AnthropicProvider implements AIProvider {
22
- readonly name: string
23
- private apiKey: string
24
- private baseUrl: string
25
- private defaultModel: string
26
- private defaultMaxTokens: number
27
- private retryOptions: RetryOptions
28
25
 
29
- constructor(config: ProviderConfig) {
30
- this.name = 'anthropic'
31
- this.apiKey = config.apiKey
32
- this.baseUrl = (config.baseUrl ?? 'https://api.anthropic.com').replace(/\/$/, '')
33
- this.defaultModel = config.model
34
- this.defaultMaxTokens = config.maxTokens ?? 4096
35
- this.retryOptions = {
36
- maxRetries: config.maxRetries ?? 3,
37
- baseDelay: config.retryBaseDelay ?? 1000,
38
- }
39
- }
26
+ import Anthropic from '@anthropic-ai/sdk'
27
+ import type { AgentResult } from '../agent_result.ts'
28
+ import type { AnthropicProviderConfig } from '../brain_config.ts'
29
+ import { DEFAULT_MODEL } from '../brain_config.ts'
30
+ import type { Provider, RunWithToolsOptions } from '../provider.ts'
31
+ import type { Tool } from '../tool.ts'
32
+ import { ToolExecutionError } from '../tool_execution_error.ts'
33
+ import type {
34
+ ChatOptions,
35
+ ChatResult,
36
+ ChatUsage,
37
+ ContentBlock,
38
+ Message,
39
+ StreamEvent,
40
+ SystemPrompt,
41
+ TextBlock,
42
+ ToolResultBlock,
43
+ ToolUseBlock,
44
+ } from '../types.ts'
40
45
 
41
- async complete(request: CompletionRequest): Promise<CompletionResponse> {
42
- const body = this.buildRequestBody(request, false)
46
+ const EPHEMERAL_CACHE = { type: 'ephemeral' } as const
43
47
 
44
- const response = await retryableFetch(
45
- 'Anthropic',
46
- `${this.baseUrl}/v1/messages`,
47
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
48
- this.retryOptions
49
- )
48
+ export class AnthropicProvider implements Provider {
49
+ readonly name: string
50
+ private readonly client: Anthropic
51
+ private readonly defaultModel: string
52
+ private readonly defaultMaxTokens: number
53
+ private readonly betas: readonly string[]
54
+
55
+ constructor(
56
+ name: string,
57
+ config: AnthropicProviderConfig,
58
+ options: { client?: Anthropic } = {},
59
+ ) {
60
+ this.name = name
61
+ this.defaultModel = config.defaultModel ?? DEFAULT_MODEL
62
+ this.defaultMaxTokens = config.defaultMaxTokens ?? 4096
63
+ this.betas = config.betas ?? []
64
+ // `client` injection point — tests pass a stub; apps that want a
65
+ // pre-configured SDK instance (custom retry, fetch transport, etc.)
66
+ // build their own and hand it over here.
67
+ this.client =
68
+ options.client ??
69
+ new Anthropic({
70
+ apiKey: config.apiKey,
71
+ ...(config.baseUrl !== undefined ? { baseURL: config.baseUrl } : {}),
72
+ })
73
+ }
50
74
 
51
- const data: any = await response.json()
52
- return this.parseResponse(data)
75
+ async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
76
+ const params = this.buildParams(messages, options)
77
+ const response = await this.client.messages.create(params)
78
+ return this.toChatResult(response)
53
79
  }
54
80
 
55
- async *stream(request: CompletionRequest): AsyncIterable<StreamChunk> {
56
- const body = this.buildRequestBody(request, true)
81
+ async *stream(
82
+ messages: readonly Message[],
83
+ options: ChatOptions = {},
84
+ ): AsyncIterable<StreamEvent> {
85
+ const params = this.buildParams(messages, options)
86
+ const stream = this.client.messages.stream(params)
87
+ for await (const event of stream) {
88
+ if (
89
+ event.type === 'content_block_delta' &&
90
+ event.delta.type === 'text_delta'
91
+ ) {
92
+ yield { type: 'text', delta: event.delta.text }
93
+ }
94
+ }
95
+ const final = await stream.finalMessage()
96
+ yield {
97
+ type: 'stop',
98
+ stopReason: final.stop_reason,
99
+ usage: toUsage(final.usage),
100
+ }
101
+ }
57
102
 
58
- const response = await retryableFetch(
59
- 'Anthropic',
60
- `${this.baseUrl}/v1/messages`,
61
- { method: 'POST', headers: this.buildHeaders(), body: JSON.stringify(body) },
62
- this.retryOptions
63
- )
103
+ async countTokens(
104
+ messages: readonly Message[],
105
+ options: ChatOptions = {},
106
+ ): Promise<number> {
107
+ const base = this.buildParams(messages, options)
108
+ // count_tokens only accepts a subset of MessageCreateParams; build
109
+ // a focused payload that matches what apps actually need to budget.
110
+ const result = await this.client.messages.countTokens({
111
+ model: base.model,
112
+ messages: base.messages,
113
+ ...(base.system !== undefined ? { system: base.system } : {}),
114
+ ...(base.thinking !== undefined ? { thinking: base.thinking } : {}),
115
+ })
116
+ return result.input_tokens
117
+ }
64
118
 
65
- if (!response.body) {
66
- throw new ExternalServiceError('Anthropic', undefined, 'No stream body returned')
119
+ /**
120
+ * Agentic loop. Send detect tool_use blocks → execute → append
121
+ * tool_result → re-send, until the model returns `end_turn` or
122
+ * the iteration ceiling is hit.
123
+ *
124
+ * Tools are passed once on every call — Anthropic doesn't carry
125
+ * tool state across requests; the model rediscovers them from the
126
+ * `tools` array each turn. Apps that care about cache hits keep
127
+ * the tool list stable across runs.
128
+ */
129
+ async runWithTools(
130
+ messages: readonly Message[],
131
+ tools: readonly Tool[],
132
+ options: RunWithToolsOptions = {},
133
+ ): Promise<AgentResult> {
134
+ const maxIterations = options.maxIterations ?? 10
135
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
136
+ const workingMessages: Message[] = [...messages]
137
+ const aggregated: ChatUsage = {
138
+ inputTokens: 0,
139
+ outputTokens: 0,
140
+ cacheReadTokens: 0,
141
+ cacheCreationTokens: 0,
67
142
  }
143
+ let iterations = 0
144
+ let lastStopReason: string | null = null
68
145
 
69
- let currentBlockIndex = -1
70
-
71
- for await (const sse of parseSSE(response.body)) {
72
- if (sse.data === '[DONE]') break
146
+ while (true) {
147
+ const params = this.buildParams(workingMessages, options)
148
+ params.tools = tools.map((t) => ({
149
+ name: t.name,
150
+ description: t.description,
151
+ input_schema: t.inputSchema as Anthropic.Tool.InputSchema,
152
+ }))
73
153
 
74
- let parsed: any
75
- try {
76
- parsed = JSON.parse(sse.data)
77
- } catch {
78
- continue
154
+ const response = await this.client.messages.create(params)
155
+ addUsage(aggregated, response.usage)
156
+ lastStopReason = response.stop_reason ?? null
157
+
158
+ // Append the assistant turn verbatim from the SDK shape so
159
+ // tool_use blocks survive to the next request unchanged.
160
+ workingMessages.push({
161
+ role: 'assistant',
162
+ content: fromAnthropicContent(response.content),
163
+ })
164
+
165
+ if (response.stop_reason !== 'tool_use') {
166
+ return {
167
+ text: collectText(response.content),
168
+ messages: workingMessages,
169
+ iterations,
170
+ stopReason: lastStopReason ?? 'end_turn',
171
+ usage: aggregated,
172
+ }
79
173
  }
80
174
 
81
- const type = parsed.type ?? sse.event
82
-
83
- if (type === 'content_block_start') {
84
- currentBlockIndex = parsed.index ?? currentBlockIndex + 1
85
- const block = parsed.content_block
86
- if (block?.type === 'tool_use') {
87
- yield {
88
- type: 'tool_start',
89
- toolCall: { id: block.id, name: block.name },
90
- toolIndex: currentBlockIndex,
91
- }
175
+ // Execute every tool_use block in the response and append the
176
+ // results in a single user-role turn. The SDK's API expects all
177
+ // tool_result blocks for a given assistant turn to land in the
178
+ // same user message.
179
+ const toolUseBlocks = response.content.filter(
180
+ (b): b is Anthropic.ToolUseBlock => b.type === 'tool_use',
181
+ )
182
+ const resultBlocks: ContentBlock[] = []
183
+ for (const block of toolUseBlocks) {
184
+ const tool = toolMap.get(block.name)
185
+ if (!tool) {
186
+ throw new ToolExecutionError(
187
+ block.name,
188
+ block.id,
189
+ new Error(`Tool "${block.name}" is not registered.`),
190
+ )
92
191
  }
93
- } else if (type === 'content_block_delta') {
94
- const delta = parsed.delta
95
- if (delta?.type === 'text_delta') {
96
- yield { type: 'text', text: delta.text }
97
- } else if (delta?.type === 'input_json_delta') {
98
- yield {
99
- type: 'tool_delta',
100
- text: delta.partial_json,
101
- toolIndex: parsed.index ?? currentBlockIndex,
102
- }
192
+ let output: unknown
193
+ try {
194
+ output = await tool.execute(block.input, {
195
+ callId: block.id,
196
+ context: options.context ?? {},
197
+ })
198
+ } catch (cause) {
199
+ throw new ToolExecutionError(block.name, block.id, cause)
103
200
  }
104
- } else if (type === 'content_block_stop') {
105
- // If we were accumulating a tool call, signal end
106
- if (currentBlockIndex >= 0) {
107
- yield { type: 'tool_end', toolIndex: parsed.index ?? currentBlockIndex }
201
+ const resultBlock: ToolResultBlock = {
202
+ type: 'tool_result',
203
+ toolUseId: block.id,
204
+ content: typeof output === 'string' ? output : JSON.stringify(output),
108
205
  }
109
- } else if (type === 'message_delta') {
110
- const usage = parsed.usage
111
- if (usage) {
112
- yield {
113
- type: 'usage',
114
- usage: {
115
- inputTokens: usage.input_tokens ?? 0,
116
- outputTokens: usage.output_tokens ?? 0,
117
- totalTokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0),
118
- },
119
- }
206
+ resultBlocks.push(resultBlock)
207
+ }
208
+ workingMessages.push({ role: 'user', content: resultBlocks })
209
+
210
+ iterations++
211
+ if (iterations >= maxIterations) {
212
+ return {
213
+ text: collectText(response.content),
214
+ messages: workingMessages,
215
+ iterations,
216
+ stopReason: 'max_iterations',
217
+ usage: aggregated,
120
218
  }
121
- } else if (type === 'message_stop') {
122
- yield { type: 'done' }
123
219
  }
124
220
  }
125
221
  }
126
222
 
127
- // ── Private helpers ──────────────────────────────────────────────────────
128
-
129
- private buildHeaders(): Record<string, string> {
130
- return {
131
- 'content-type': 'application/json',
132
- 'x-api-key': this.apiKey,
133
- 'anthropic-version': '2023-06-01',
223
+ // ─── Param translation ──────────────────────────────────────────────────
224
+
225
+ private buildParams(
226
+ messages: readonly Message[],
227
+ options: ChatOptions,
228
+ ): Anthropic.MessageCreateParamsNonStreaming {
229
+ const model = options.model ?? this.defaultModel
230
+ const params: Anthropic.MessageCreateParamsNonStreaming = {
231
+ model,
232
+ max_tokens: options.maxTokens ?? this.defaultMaxTokens,
233
+ messages: messages.map(toMessageParam),
134
234
  }
135
- }
136
235
 
137
- private buildRequestBody(request: CompletionRequest, stream: boolean): Record<string, unknown> {
138
- const body: Record<string, unknown> = {
139
- model: request.model ?? this.defaultModel,
140
- max_tokens: request.maxTokens ?? this.defaultMaxTokens,
141
- messages: this.mapMessages(request.messages),
142
- }
236
+ const system = toSystemParam(options.system)
237
+ if (system !== undefined) params.system = system
143
238
 
144
- if (stream) body.stream = true
145
- if (request.system) body.system = request.system
146
- if (request.temperature !== undefined) body.temperature = request.temperature
147
- if (request.stopSequences?.length) body.stop_sequences = request.stopSequences
239
+ if (options.thinking === 'adaptive') {
240
+ params.thinking = { type: 'adaptive' }
241
+ } else if (options.thinking === 'disabled') {
242
+ params.thinking = { type: 'disabled' }
243
+ }
148
244
 
149
- // Tools
150
- if (request.tools?.length) {
151
- body.tools = request.tools.map(t => ({
152
- name: t.name,
153
- description: t.description,
154
- input_schema: t.parameters,
155
- }))
245
+ if (options.effort !== undefined) {
246
+ params.output_config = { effort: options.effort }
156
247
  }
157
248
 
158
- // Tool choice
159
- if (request.toolChoice) {
160
- if (request.toolChoice === 'auto') {
161
- body.tool_choice = { type: 'auto' }
162
- } else if (request.toolChoice === 'required') {
163
- body.tool_choice = { type: 'any' }
164
- } else {
165
- body.tool_choice = { type: 'tool', name: request.toolChoice.name }
166
- }
249
+ if (options.cache === true) {
250
+ // Top-level auto-cache the last cacheable block. Maps to the
251
+ // SDK's `cache_control` shorthand on the request body.
252
+ ;(params as { cache_control?: { type: 'ephemeral' } }).cache_control = EPHEMERAL_CACHE
167
253
  }
168
254
 
169
- // Structured output (using GA API with output_config)
170
- if (request.schema) {
171
- body.output_config = {
172
- format: {
173
- type: 'json_schema',
174
- schema: request.schema
175
- }
176
- }
255
+ const betas = mergeBetas(this.betas, options.betas)
256
+ if (betas.length > 0) {
257
+ ;(params as { betas?: readonly string[] }).betas = betas
177
258
  }
178
259
 
179
- return body
260
+ return params
180
261
  }
181
262
 
182
- private mapMessages(messages: Message[]): any[] {
183
- const result: any[] = []
263
+ private toChatResult(message: Anthropic.Message): ChatResult<Anthropic.Message> {
264
+ const text = message.content
265
+ .filter((b): b is Anthropic.TextBlock => b.type === 'text')
266
+ .map((b) => b.text)
267
+ .join('')
268
+ return {
269
+ text,
270
+ model: message.model,
271
+ stopReason: message.stop_reason,
272
+ usage: toUsage(message.usage),
273
+ raw: message,
274
+ }
275
+ }
276
+ }
184
277
 
185
- for (const msg of messages) {
186
- if (msg.role === 'tool') {
187
- // Tool results go as user messages with tool_result content blocks
188
- result.push({
189
- role: 'user',
190
- content: [
191
- {
192
- type: 'tool_result',
193
- tool_use_id: msg.toolCallId,
194
- content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
195
- },
196
- ],
197
- })
198
- } else if (msg.role === 'assistant') {
199
- const content: any[] = []
278
+ // ─── Shape converters ─────────────────────────────────────────────────────
200
279
 
201
- // Add text content if present
202
- const text = typeof msg.content === 'string' ? msg.content : ''
203
- if (text) {
204
- content.push({ type: 'text', text })
205
- }
280
+ function toUsage(u: Anthropic.Usage): ChatUsage {
281
+ return {
282
+ inputTokens: u.input_tokens,
283
+ outputTokens: u.output_tokens,
284
+ cacheReadTokens: u.cache_read_input_tokens ?? 0,
285
+ cacheCreationTokens: u.cache_creation_input_tokens ?? 0,
286
+ }
287
+ }
206
288
 
207
- // Add tool use blocks
208
- if (msg.toolCalls?.length) {
209
- for (const tc of msg.toolCalls) {
210
- content.push({
211
- type: 'tool_use',
212
- id: tc.id,
213
- name: tc.name,
214
- input: tc.arguments,
215
- })
216
- }
289
+ function toMessageParam(message: Message): Anthropic.MessageParam {
290
+ if (typeof message.content === 'string') {
291
+ return { role: message.role, content: message.content }
292
+ }
293
+ return {
294
+ role: message.role,
295
+ content: message.content.map((block): Anthropic.ContentBlockParam => {
296
+ if (block.type === 'tool_use') {
297
+ return {
298
+ type: 'tool_use',
299
+ id: block.id,
300
+ name: block.name,
301
+ input: block.input as Record<string, unknown>,
217
302
  }
218
-
219
- result.push({
220
- role: 'assistant',
221
- content: content.length === 1 && content[0].type === 'text' ? content[0].text : content,
222
- })
223
- } else {
224
- // User messages
225
- result.push({
226
- role: 'user',
227
- content: typeof msg.content === 'string' ? msg.content : msg.content,
228
- })
229
303
  }
230
- }
231
-
232
- return result
304
+ if (block.type === 'tool_result') {
305
+ const param: Anthropic.ToolResultBlockParam = {
306
+ type: 'tool_result',
307
+ tool_use_id: block.toolUseId,
308
+ content:
309
+ typeof block.content === 'string'
310
+ ? block.content
311
+ : block.content.map((b) => ({ type: 'text', text: b.text }) as Anthropic.TextBlockParam),
312
+ }
313
+ if (block.isError) param.is_error = true
314
+ return param
315
+ }
316
+ const text: Anthropic.TextBlockParam = { type: 'text', text: block.text }
317
+ if (block.cache) text.cache_control = EPHEMERAL_CACHE
318
+ return text
319
+ }),
233
320
  }
321
+ }
234
322
 
235
- private parseResponse(data: any): CompletionResponse {
236
- let content = ''
237
- const toolCalls: ToolCall[] = []
323
+ function toSystemParam(
324
+ system: SystemPrompt | undefined,
325
+ ): string | Anthropic.TextBlockParam[] | undefined {
326
+ if (system === undefined) return undefined
327
+ if (typeof system === 'string') return system
328
+ if (Array.isArray(system)) {
329
+ return system.map((block) => {
330
+ const param: Anthropic.TextBlockParam = { type: 'text', text: block.text }
331
+ if (block.cache) param.cache_control = EPHEMERAL_CACHE
332
+ return param
333
+ })
334
+ }
335
+ const param: Anthropic.TextBlockParam = { type: 'text', text: system.text }
336
+ if (system.cache) param.cache_control = EPHEMERAL_CACHE
337
+ return [param]
338
+ }
238
339
 
239
- if (Array.isArray(data.content)) {
240
- for (const block of data.content) {
241
- if (block.type === 'text') {
242
- content += block.text
243
- } else if (block.type === 'tool_use') {
244
- toolCalls.push({
245
- id: block.id,
246
- name: block.name,
247
- arguments: block.input ?? {},
248
- })
249
- }
250
- }
251
- }
340
+ function mergeBetas(
341
+ providerBetas: readonly string[],
342
+ callBetas: readonly string[] | undefined,
343
+ ): readonly string[] {
344
+ if (!callBetas || callBetas.length === 0) return providerBetas
345
+ const seen = new Set<string>()
346
+ const out: string[] = []
347
+ for (const b of providerBetas) {
348
+ if (seen.has(b)) continue
349
+ seen.add(b)
350
+ out.push(b)
351
+ }
352
+ for (const b of callBetas) {
353
+ if (seen.has(b)) continue
354
+ seen.add(b)
355
+ out.push(b)
356
+ }
357
+ return out
358
+ }
252
359
 
253
- const usage: Usage = {
254
- inputTokens: data.usage?.input_tokens ?? 0,
255
- outputTokens: data.usage?.output_tokens ?? 0,
256
- totalTokens: (data.usage?.input_tokens ?? 0) + (data.usage?.output_tokens ?? 0),
257
- }
360
+ function addUsage(acc: ChatUsage, u: Anthropic.Usage): void {
361
+ acc.inputTokens += u.input_tokens
362
+ acc.outputTokens += u.output_tokens
363
+ acc.cacheReadTokens += u.cache_read_input_tokens ?? 0
364
+ acc.cacheCreationTokens += u.cache_creation_input_tokens ?? 0
365
+ }
258
366
 
259
- let stopReason: CompletionResponse['stopReason'] = 'end'
260
- switch (data.stop_reason) {
261
- case 'tool_use':
262
- stopReason = 'tool_use'
263
- break
264
- case 'max_tokens':
265
- stopReason = 'max_tokens'
266
- break
267
- case 'stop_sequence':
268
- stopReason = 'stop_sequence'
269
- break
270
- }
367
+ function collectText(content: Anthropic.ContentBlock[]): string {
368
+ return content
369
+ .filter((b): b is Anthropic.TextBlock => b.type === 'text')
370
+ .map((b) => b.text)
371
+ .join('')
372
+ }
271
373
 
272
- return {
273
- id: data.id ?? '',
274
- content,
275
- toolCalls,
276
- stopReason,
277
- usage,
278
- raw: data,
374
+ /**
375
+ * Translate the SDK's response content blocks back into framework
376
+ * `ContentBlock`s for storage in `workingMessages`. We preserve
377
+ * `text` and `tool_use` blocks verbatim; other server-side block
378
+ * types (thinking, server tool blocks) are dropped — V1 doesn't
379
+ * surface them, and re-sending them as part of the assistant turn
380
+ * could confuse the model.
381
+ */
382
+ function fromAnthropicContent(content: Anthropic.ContentBlock[]): ContentBlock[] {
383
+ const out: ContentBlock[] = []
384
+ for (const block of content) {
385
+ if (block.type === 'text') {
386
+ out.push({ type: 'text', text: block.text } satisfies TextBlock)
387
+ } else if (block.type === 'tool_use') {
388
+ out.push({
389
+ type: 'tool_use',
390
+ id: block.id,
391
+ name: block.name,
392
+ input: block.input,
393
+ } satisfies ToolUseBlock)
279
394
  }
280
395
  }
396
+ return out
281
397
  }