@strav/brain 1.0.0-alpha.16 → 1.0.0-alpha.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/package.json +4 -2
  2. package/src/agent.ts +34 -5
  3. package/src/agent_generate_result.ts +2 -0
  4. package/src/agent_result.ts +7 -0
  5. package/src/agent_runner.ts +134 -15
  6. package/src/agent_stream_event.ts +100 -0
  7. package/src/brain_config.ts +91 -1
  8. package/src/brain_manager.ts +287 -6
  9. package/src/brain_provider.ts +25 -1
  10. package/src/index.ts +37 -2
  11. package/src/mcp/client.ts +99 -13
  12. package/src/mcp/index.ts +7 -0
  13. package/src/mcp/oauth.ts +227 -0
  14. package/src/mcp/pool.ts +106 -0
  15. package/src/mcp/resolve_mcp_tools.ts +31 -9
  16. package/src/mcp_server.ts +16 -0
  17. package/src/persistence/brain_message.ts +34 -0
  18. package/src/persistence/brain_message_repository.ts +106 -0
  19. package/src/persistence/brain_store.ts +166 -0
  20. package/src/persistence/brain_suspended_run.ts +30 -0
  21. package/src/persistence/brain_suspended_run_repository.ts +68 -0
  22. package/src/persistence/brain_thread.ts +30 -0
  23. package/src/persistence/brain_thread_repository.ts +65 -0
  24. package/src/persistence/database_brain_store.ts +190 -0
  25. package/src/persistence/index.ts +48 -0
  26. package/src/persistence/schema/brain_message_schema.ts +61 -0
  27. package/src/persistence/schema/brain_suspended_run_schema.ts +58 -0
  28. package/src/persistence/schema/brain_thread_schema.ts +50 -0
  29. package/src/persistence/schema/index.ts +3 -0
  30. package/src/provider.ts +145 -1
  31. package/src/providers/anthropic_provider.ts +723 -38
  32. package/src/providers/deepseek_provider.ts +117 -0
  33. package/src/providers/gemini_provider.ts +625 -33
  34. package/src/providers/ollama_provider.ts +86 -0
  35. package/src/providers/openai_compat_provider.ts +616 -0
  36. package/src/providers/openai_provider.ts +801 -43
  37. package/src/providers/openai_responses_provider.ts +1015 -0
  38. package/src/suspended_run.ts +153 -0
  39. package/src/thread.ts +40 -1
  40. package/src/tool.ts +7 -0
  41. package/src/tool_runner.ts +81 -0
  42. package/src/types.ts +343 -0
@@ -52,26 +52,42 @@ import type { AgentResult } from '../agent_result.ts'
52
52
  import { BrainError } from '../brain_error.ts'
53
53
  import type { OpenAIProviderConfig } from '../brain_config.ts'
54
54
  import type { MCPServer } from '../mcp_server.ts'
55
+ import type { AgentGenerateResult } from '../agent_generate_result.ts'
56
+ import type { AgentStreamEvent } from '../agent_stream_event.ts'
55
57
  import { resolveMcpTools, type ResolveMcpToolsOptions } from '../mcp/resolve_mcp_tools.ts'
56
58
  import { parseGenerated, type OutputSchema } from '../output_schema.ts'
57
- import type { Provider, RunWithToolsOptions } from '../provider.ts'
59
+ import { recoverOrThrow, runToolWithRecovery } from '../tool_runner.ts'
60
+ import type {
61
+ Provider,
62
+ RunWithToolsOptions,
63
+ RunWithToolsOptionsWithSuspend,
64
+ } from '../provider.ts'
65
+ import type { SuspendedRun } from '../suspended_run.ts'
58
66
  import type { Tool } from '../tool.ts'
59
67
  import { ToolExecutionError } from '../tool_execution_error.ts'
60
68
  import type {
69
+ AudioSource,
61
70
  ChatOptions,
62
71
  ChatResult,
63
72
  ChatUsage,
64
73
  ContentBlock,
74
+ EmbedOptions,
75
+ EmbedResult,
65
76
  GenerateResult,
77
+ ImageBlock,
66
78
  Message,
67
79
  StreamEvent,
68
80
  SystemPrompt,
69
81
  TextBlock,
70
82
  ToolResultBlock,
71
83
  ToolUseBlock,
84
+ TranscribeOptions,
85
+ TranscribeResult,
72
86
  } from '../types.ts'
73
87
 
74
88
  const DEFAULT_OPENAI_MODEL = 'gpt-5'
89
+ const DEFAULT_OPENAI_EMBED_MODEL = 'text-embedding-3-small'
90
+ const DEFAULT_OPENAI_TRANSCRIBE_MODEL = 'whisper-1'
75
91
 
76
92
  export interface OpenAIProviderOptions {
77
93
  client?: OpenAI
@@ -81,14 +97,33 @@ export interface OpenAIProviderOptions {
81
97
  * unset; the provider uses the default `MCPClient`.
82
98
  */
83
99
  mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
100
+ /**
101
+ * Optional MCP connection pool. When set, every `runWithTools`
102
+ * call (and its schema / streaming variants) borrows MCP clients
103
+ * from the pool instead of constructing fresh ones — and the
104
+ * per-call cleanup becomes a no-op so transports survive across
105
+ * calls. Apps construct one pool at boot and pass it to every
106
+ * provider that needs local MCP; pool ownership stays on the app
107
+ * via `pool.close()` at shutdown.
108
+ */
109
+ mcpPool?: ResolveMcpToolsOptions['pool']
84
110
  }
85
111
 
86
112
  export class OpenAIProvider implements Provider {
87
113
  readonly name: string
88
- private readonly client: OpenAI
89
- private readonly defaultModel: string
90
- private readonly defaultMaxTokens: number
91
- private readonly mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
114
+ // Protected (rather than private) so OpenAI-compatible drivers
115
+ // can subclass — see `DeepSeekProvider`. Apps that want to plug
116
+ // in Groq / Together / Fireworks follow the same pattern: extend,
117
+ // override the constructor's base URL + default model, optionally
118
+ // override `buildParams` to suppress fields the upstream API
119
+ // doesn't accept.
120
+ protected readonly client: OpenAI
121
+ protected readonly defaultModel: string
122
+ protected readonly defaultMaxTokens: number
123
+ protected readonly defaultEmbedModel: string
124
+ protected readonly defaultTranscribeModel: string
125
+ protected readonly mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
126
+ protected readonly mcpPool?: ResolveMcpToolsOptions['pool']
92
127
 
93
128
  constructor(
94
129
  name: string,
@@ -98,7 +133,10 @@ export class OpenAIProvider implements Provider {
98
133
  this.name = name
99
134
  this.defaultModel = config.defaultModel ?? DEFAULT_OPENAI_MODEL
100
135
  this.defaultMaxTokens = config.defaultMaxTokens ?? 4096
136
+ this.defaultEmbedModel = config.defaultEmbedModel ?? DEFAULT_OPENAI_EMBED_MODEL
137
+ this.defaultTranscribeModel = config.defaultTranscribeModel ?? DEFAULT_OPENAI_TRANSCRIBE_MODEL
101
138
  this.mcpClientFactory = options.mcpClientFactory
139
+ this.mcpPool = options.mcpPool
102
140
  this.client =
103
141
  options.client ??
104
142
  new OpenAI({
@@ -110,7 +148,7 @@ export class OpenAIProvider implements Provider {
110
148
 
111
149
  async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
112
150
  const params = this.buildParams(messages, options, [])
113
- const response = await this.client.chat.completions.create(params)
151
+ const response = await this.client.chat.completions.create(params, reqOpts(options))
114
152
  return this.toChatResult(response)
115
153
  }
116
154
 
@@ -123,7 +161,7 @@ export class OpenAIProvider implements Provider {
123
161
  stream: true,
124
162
  stream_options: { include_usage: true },
125
163
  }
126
- const stream = await this.client.chat.completions.create(params)
164
+ const stream = await this.client.chat.completions.create(params, reqOpts(options))
127
165
  let aggregatedUsage: OpenAI.CompletionUsage | undefined
128
166
  let finishReason: string | null = null
129
167
  for await (const chunk of stream) {
@@ -143,18 +181,22 @@ export class OpenAIProvider implements Provider {
143
181
  }
144
182
  }
145
183
 
184
+ runWithTools(
185
+ messages: readonly Message[],
186
+ tools: readonly Tool[],
187
+ options: RunWithToolsOptionsWithSuspend,
188
+ ): Promise<AgentResult | SuspendedRun>
189
+ runWithTools(
190
+ messages: readonly Message[],
191
+ tools: readonly Tool[],
192
+ options?: RunWithToolsOptions,
193
+ ): Promise<AgentResult>
146
194
  async runWithTools(
147
195
  messages: readonly Message[],
148
196
  tools: readonly Tool[],
149
197
  options: RunWithToolsOptions = {},
150
- ): Promise<AgentResult> {
151
- const mcpServers: readonly MCPServer[] = options.mcpServers ?? []
152
- const resolved =
153
- mcpServers.length > 0
154
- ? await resolveMcpTools(mcpServers, {
155
- ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
156
- })
157
- : { tools: [] as Tool[], close: async () => {} }
198
+ ): Promise<AgentResult | SuspendedRun> {
199
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
158
200
  try {
159
201
  return await this._runLoop(messages, [...tools, ...resolved.tools], options)
160
202
  } finally {
@@ -166,7 +208,7 @@ export class OpenAIProvider implements Provider {
166
208
  messages: readonly Message[],
167
209
  tools: readonly Tool[],
168
210
  options: RunWithToolsOptions,
169
- ): Promise<AgentResult> {
211
+ ): Promise<AgentResult | SuspendedRun> {
170
212
  const maxIterations = options.maxIterations ?? 10
171
213
  const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
172
214
  const workingMessages: Message[] = [...messages]
@@ -179,8 +221,9 @@ export class OpenAIProvider implements Provider {
179
221
  let iterations = 0
180
222
 
181
223
  while (true) {
224
+ checkAborted(options.signal)
182
225
  const params = this.buildParams(workingMessages, options, tools)
183
- const response = await this.client.chat.completions.create(params)
226
+ const response = await this.client.chat.completions.create(params, reqOpts(options))
184
227
  addUsage(aggregated, response.usage)
185
228
 
186
229
  const choice = response.choices[0]
@@ -208,54 +251,656 @@ export class OpenAIProvider implements Provider {
208
251
  }
209
252
 
210
253
  const resultBlocks: ContentBlock[] = []
211
- for (const call of toolCalls) {
254
+ for (let i = 0; i < toolCalls.length; i++) {
255
+ const call = toolCalls[i]!
212
256
  if (call.type !== 'function') continue
213
- const tool = toolMap.get(call.function.name)
214
- if (!tool) {
215
- throw new ToolExecutionError(
257
+ let parsedInput: unknown
258
+ let parseFailed: { content: string; isError: boolean } | undefined
259
+ try {
260
+ parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
261
+ } catch (err) {
262
+ parseFailed = recoverOrThrow(
263
+ new ToolExecutionError(
264
+ call.function.name,
265
+ call.id,
266
+ new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
267
+ ),
268
+ options,
269
+ )
270
+ }
271
+ if (options.shouldSuspend && !parseFailed) {
272
+ const frameworkCall: ToolUseBlock = {
273
+ type: 'tool_use',
274
+ id: call.id,
275
+ name: call.function.name,
276
+ input: (parsedInput ?? {}) as Record<string, unknown>,
277
+ }
278
+ if (await options.shouldSuspend(frameworkCall, options.context)) {
279
+ const pending: ToolUseBlock[] = []
280
+ for (let j = i; j < toolCalls.length; j++) {
281
+ const c = toolCalls[j]!
282
+ if (c.type !== 'function') continue
283
+ let pInput: unknown = {}
284
+ try {
285
+ pInput = c.function.arguments ? JSON.parse(c.function.arguments) : {}
286
+ } catch {
287
+ pInput = c.function.arguments ?? {}
288
+ }
289
+ pending.push({
290
+ type: 'tool_use',
291
+ id: c.id,
292
+ name: c.function.name,
293
+ input: pInput as Record<string, unknown>,
294
+ })
295
+ }
296
+ return {
297
+ status: 'suspended',
298
+ pendingToolCalls: pending,
299
+ state: { messages: workingMessages, iterations, usage: aggregated },
300
+ }
301
+ }
302
+ }
303
+ const { content, isError } = parseFailed
304
+ ?? (await runToolWithRecovery(
305
+ toolMap.get(call.function.name),
216
306
  call.function.name,
217
307
  call.id,
218
- new Error(`Tool "${call.function.name}" is not registered.`),
219
- )
308
+ parsedInput,
309
+ options,
310
+ ))
311
+ resultBlocks.push({
312
+ type: 'tool_result',
313
+ toolUseId: call.id,
314
+ content,
315
+ ...(isError ? { isError: true } : {}),
316
+ } satisfies ToolResultBlock)
317
+ }
318
+ workingMessages.push({ role: 'user', content: resultBlocks })
319
+
320
+ iterations++
321
+ if (iterations >= maxIterations) {
322
+ return {
323
+ text: assistantMessage.content ?? '',
324
+ messages: workingMessages,
325
+ iterations,
326
+ stopReason: 'max_iterations',
327
+ usage: aggregated,
220
328
  }
329
+ }
330
+ }
331
+ }
332
+
333
+ async runWithToolsAndSchema<T>(
334
+ messages: readonly Message[],
335
+ tools: readonly Tool[],
336
+ schema: OutputSchema<T>,
337
+ options: RunWithToolsOptions = {},
338
+ ): Promise<AgentGenerateResult<T>> {
339
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
340
+ try {
341
+ return await this._runLoopWithSchema([...tools, ...resolved.tools], messages, schema, options)
342
+ } finally {
343
+ await resolved.close()
344
+ }
345
+ }
346
+
347
+ private async _runLoopWithSchema<T>(
348
+ tools: readonly Tool[],
349
+ messages: readonly Message[],
350
+ schema: OutputSchema<T>,
351
+ options: RunWithToolsOptions,
352
+ ): Promise<AgentGenerateResult<T>> {
353
+ const maxIterations = options.maxIterations ?? 10
354
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
355
+ const workingMessages: Message[] = [...messages]
356
+ const aggregated: ChatUsage = {
357
+ inputTokens: 0,
358
+ outputTokens: 0,
359
+ cacheReadTokens: 0,
360
+ cacheCreationTokens: 0,
361
+ }
362
+ let iterations = 0
363
+
364
+ while (true) {
365
+ const params = this.buildParams(workingMessages, options, tools)
366
+ params.response_format = {
367
+ type: 'json_schema',
368
+ json_schema: {
369
+ name: schema.name,
370
+ ...(schema.description !== undefined ? { description: schema.description } : {}),
371
+ schema: schema.jsonSchema,
372
+ strict: true,
373
+ },
374
+ }
375
+ const response = await this.client.chat.completions.create(params, reqOpts(options))
376
+ addUsage(aggregated, response.usage)
377
+
378
+ const choice = response.choices[0]
379
+ if (!choice) {
380
+ throw new BrainError('OpenAIProvider: response had no choices.')
381
+ }
382
+ const assistantMessage = choice.message
383
+ workingMessages.push({
384
+ role: 'assistant',
385
+ content: fromOpenAIAssistantMessage(assistantMessage),
386
+ })
387
+
388
+ const toolCalls = assistantMessage.tool_calls ?? []
389
+ if (toolCalls.length === 0 || choice.finish_reason !== 'tool_calls') {
390
+ const text = assistantMessage.content ?? ''
391
+ return {
392
+ value: parseGenerated(text, schema),
393
+ text,
394
+ messages: workingMessages,
395
+ iterations,
396
+ stopReason: choice.finish_reason ?? 'stop',
397
+ usage: aggregated,
398
+ }
399
+ }
400
+
401
+ const resultBlocks: ContentBlock[] = []
402
+ for (const call of toolCalls) {
403
+ if (call.type !== 'function') continue
221
404
  let parsedInput: unknown
405
+ let parseFailed: { content: string; isError: boolean } | undefined
222
406
  try {
223
407
  parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
224
408
  } catch (err) {
225
- throw new ToolExecutionError(
409
+ parseFailed = recoverOrThrow(
410
+ new ToolExecutionError(
411
+ call.function.name,
412
+ call.id,
413
+ new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
414
+ ),
415
+ options,
416
+ )
417
+ }
418
+ const { content, isError } = parseFailed
419
+ ?? (await runToolWithRecovery(
420
+ toolMap.get(call.function.name),
226
421
  call.function.name,
227
422
  call.id,
228
- new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
229
- )
423
+ parsedInput,
424
+ options,
425
+ ))
426
+ resultBlocks.push({
427
+ type: 'tool_result',
428
+ toolUseId: call.id,
429
+ content,
430
+ ...(isError ? { isError: true } : {}),
431
+ } satisfies ToolResultBlock)
432
+ }
433
+ workingMessages.push({ role: 'user', content: resultBlocks })
434
+
435
+ iterations++
436
+ if (iterations >= maxIterations) {
437
+ const text = assistantMessage.content ?? ''
438
+ return {
439
+ value: parseGenerated(text, schema),
440
+ text,
441
+ messages: workingMessages,
442
+ iterations,
443
+ stopReason: 'max_iterations',
444
+ usage: aggregated,
445
+ }
446
+ }
447
+ }
448
+ }
449
+
450
+ async *streamWithTools(
451
+ messages: readonly Message[],
452
+ tools: readonly Tool[],
453
+ options: RunWithToolsOptions = {},
454
+ ): AsyncIterable<AgentStreamEvent> {
455
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
456
+ try {
457
+ yield* this._streamLoop(messages, [...tools, ...resolved.tools], options)
458
+ } finally {
459
+ await resolved.close()
460
+ }
461
+ }
462
+
463
+ private async *_streamLoop(
464
+ messages: readonly Message[],
465
+ tools: readonly Tool[],
466
+ options: RunWithToolsOptions,
467
+ ): AsyncIterable<AgentStreamEvent> {
468
+ const maxIterations = options.maxIterations ?? 10
469
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
470
+ const workingMessages: Message[] = [...messages]
471
+ const aggregated: ChatUsage = {
472
+ inputTokens: 0,
473
+ outputTokens: 0,
474
+ cacheReadTokens: 0,
475
+ cacheCreationTokens: 0,
476
+ }
477
+ let iterations = 0
478
+
479
+ while (true) {
480
+ checkAborted(options.signal)
481
+ yield { type: 'iteration_start', iteration: iterations }
482
+
483
+ const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
484
+ ...this.buildParams(workingMessages, options, tools),
485
+ stream: true,
486
+ stream_options: { include_usage: true },
487
+ }
488
+ const stream = await this.client.chat.completions.create(params, reqOpts(options))
489
+
490
+ let textBuf = ''
491
+ // Tracks: per index, the running entry; and whether
492
+ // `tool_use_start` has already been emitted (we emit once the
493
+ // first chunk brings the id + name).
494
+ const toolCallsByIndex: Map<
495
+ number,
496
+ { id?: string; name?: string; args: string; started: boolean }
497
+ > = new Map()
498
+ let finishReason: string | null = null
499
+ let lastUsage: OpenAI.CompletionUsage | undefined
500
+
501
+ for await (const chunk of stream) {
502
+ const choice = chunk.choices[0]
503
+ const delta = choice?.delta
504
+ if (delta?.content && typeof delta.content === 'string' && delta.content.length > 0) {
505
+ textBuf += delta.content
506
+ yield { type: 'text', delta: delta.content }
507
+ }
508
+ if (delta?.tool_calls) {
509
+ for (const tc of delta.tool_calls) {
510
+ const entry = toolCallsByIndex.get(tc.index) ?? { args: '', started: false }
511
+ if (tc.id) entry.id = tc.id
512
+ if (tc.function?.name) entry.name = tc.function.name
513
+ toolCallsByIndex.set(tc.index, entry)
514
+ // Emit `tool_use_start` once id+name are both known.
515
+ // OpenAI typically delivers them in the same first
516
+ // chunk for a given tool call.
517
+ if (!entry.started && entry.id !== undefined && entry.name !== undefined) {
518
+ entry.started = true
519
+ yield { type: 'tool_use_start', id: entry.id, name: entry.name }
520
+ }
521
+ if (tc.function?.arguments) {
522
+ entry.args += tc.function.arguments
523
+ // Emit a delta only after start has fired — apps relying
524
+ // on an id wouldn't have one until then.
525
+ if (entry.started && entry.id !== undefined) {
526
+ yield {
527
+ type: 'tool_use_delta',
528
+ id: entry.id,
529
+ argsDelta: tc.function.arguments,
530
+ }
531
+ }
532
+ }
533
+ }
230
534
  }
231
- let output: unknown
535
+ if (choice?.finish_reason) finishReason = choice.finish_reason
536
+ if (chunk.usage) lastUsage = chunk.usage
537
+ }
538
+
539
+ addUsage(aggregated, lastUsage)
540
+ yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
541
+
542
+ // Materialize the assistant turn the same way runWithTools does.
543
+ const assistantBlocks: ContentBlock[] = []
544
+ if (textBuf.length > 0) assistantBlocks.push({ type: 'text', text: textBuf })
545
+ const orderedCalls = [...toolCallsByIndex.entries()]
546
+ .sort(([a], [b]) => a - b)
547
+ .map(([, v]) => v)
548
+ for (const call of orderedCalls) {
549
+ if (!call.id || !call.name) continue
550
+ let parsedInput: unknown = {}
551
+ try {
552
+ parsedInput = call.args ? JSON.parse(call.args) : {}
553
+ } catch {
554
+ parsedInput = call.args
555
+ }
556
+ assistantBlocks.push({
557
+ type: 'tool_use',
558
+ id: call.id,
559
+ name: call.name,
560
+ input: parsedInput,
561
+ } satisfies ToolUseBlock)
562
+ }
563
+ const assistantContent: string | ContentBlock[] =
564
+ assistantBlocks.length === 1 && assistantBlocks[0]?.type === 'text'
565
+ ? assistantBlocks[0].text
566
+ : assistantBlocks
567
+ workingMessages.push({ role: 'assistant', content: assistantContent })
568
+
569
+ if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
570
+ yield {
571
+ type: 'stop',
572
+ stopReason: finishReason ?? 'stop',
573
+ iterations,
574
+ usage: aggregated,
575
+ messages: workingMessages,
576
+ }
577
+ return
578
+ }
579
+
580
+ const resultBlocks: ContentBlock[] = []
581
+ for (const call of orderedCalls) {
582
+ if (!call.id || !call.name) continue
583
+ let parsedInput: unknown
584
+ let parseFailed: { content: string; isError: boolean } | undefined
232
585
  try {
233
- output = await tool.execute(parsedInput, {
234
- callId: call.id,
235
- context: options.context ?? {},
236
- })
237
- } catch (cause) {
238
- throw new ToolExecutionError(call.function.name, call.id, cause)
586
+ parsedInput = call.args ? JSON.parse(call.args) : {}
587
+ } catch (err) {
588
+ parseFailed = recoverOrThrow(
589
+ new ToolExecutionError(
590
+ call.name,
591
+ call.id,
592
+ new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
593
+ ),
594
+ options,
595
+ )
596
+ parsedInput = call.args
239
597
  }
240
- const resultBlock: ToolResultBlock = {
598
+ yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
599
+ const { content, isError } = parseFailed
600
+ ?? (await runToolWithRecovery(
601
+ toolMap.get(call.name),
602
+ call.name,
603
+ call.id,
604
+ parsedInput,
605
+ options,
606
+ ))
607
+ resultBlocks.push({
241
608
  type: 'tool_result',
242
609
  toolUseId: call.id,
243
- content: typeof output === 'string' ? output : JSON.stringify(output),
610
+ content,
611
+ ...(isError ? { isError: true } : {}),
612
+ } satisfies ToolResultBlock)
613
+ yield {
614
+ type: 'tool_result',
615
+ id: call.id,
616
+ name: call.name,
617
+ content,
618
+ isError,
244
619
  }
245
- resultBlocks.push(resultBlock)
246
620
  }
247
621
  workingMessages.push({ role: 'user', content: resultBlocks })
248
622
 
249
623
  iterations++
250
624
  if (iterations >= maxIterations) {
251
- return {
252
- text: assistantMessage.content ?? '',
625
+ yield {
626
+ type: 'stop',
627
+ stopReason: 'max_iterations',
628
+ iterations,
629
+ usage: aggregated,
253
630
  messages: workingMessages,
631
+ }
632
+ return
633
+ }
634
+ }
635
+ }
636
+
637
+ async *streamWithToolsAndSchema<T>(
638
+ messages: readonly Message[],
639
+ tools: readonly Tool[],
640
+ schema: OutputSchema<T>,
641
+ options: RunWithToolsOptions = {},
642
+ ): AsyncIterable<AgentStreamEvent<T>> {
643
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
644
+ try {
645
+ yield* this._streamLoopWithSchema(
646
+ [...tools, ...resolved.tools],
647
+ messages,
648
+ schema,
649
+ options,
650
+ )
651
+ } finally {
652
+ await resolved.close()
653
+ }
654
+ }
655
+
656
+ private async *_streamLoopWithSchema<T>(
657
+ tools: readonly Tool[],
658
+ messages: readonly Message[],
659
+ schema: OutputSchema<T>,
660
+ options: RunWithToolsOptions,
661
+ ): AsyncIterable<AgentStreamEvent<T>> {
662
+ const maxIterations = options.maxIterations ?? 10
663
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
664
+ const workingMessages: Message[] = [...messages]
665
+ const aggregated: ChatUsage = {
666
+ inputTokens: 0,
667
+ outputTokens: 0,
668
+ cacheReadTokens: 0,
669
+ cacheCreationTokens: 0,
670
+ }
671
+ let iterations = 0
672
+
673
+ while (true) {
674
+ checkAborted(options.signal)
675
+ yield { type: 'iteration_start', iteration: iterations }
676
+
677
+ const baseParams = this.buildParams(workingMessages, options, tools)
678
+ baseParams.response_format = {
679
+ type: 'json_schema',
680
+ json_schema: {
681
+ name: schema.name,
682
+ ...(schema.description !== undefined ? { description: schema.description } : {}),
683
+ schema: schema.jsonSchema,
684
+ strict: true,
685
+ },
686
+ }
687
+ const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
688
+ ...baseParams,
689
+ stream: true,
690
+ stream_options: { include_usage: true },
691
+ }
692
+ const stream = await this.client.chat.completions.create(params, reqOpts(options))
693
+
694
+ let textBuf = ''
695
+ // Tracks: per index, the running entry; and whether
696
+ // `tool_use_start` has already been emitted (we emit once the
697
+ // first chunk brings the id + name).
698
+ const toolCallsByIndex: Map<
699
+ number,
700
+ { id?: string; name?: string; args: string; started: boolean }
701
+ > = new Map()
702
+ let finishReason: string | null = null
703
+ let lastUsage: OpenAI.CompletionUsage | undefined
704
+
705
+ for await (const chunk of stream) {
706
+ const choice = chunk.choices[0]
707
+ const delta = choice?.delta
708
+ if (delta?.content && typeof delta.content === 'string' && delta.content.length > 0) {
709
+ textBuf += delta.content
710
+ yield { type: 'text', delta: delta.content }
711
+ }
712
+ if (delta?.tool_calls) {
713
+ for (const tc of delta.tool_calls) {
714
+ const entry = toolCallsByIndex.get(tc.index) ?? { args: '', started: false }
715
+ if (tc.id) entry.id = tc.id
716
+ if (tc.function?.name) entry.name = tc.function.name
717
+ toolCallsByIndex.set(tc.index, entry)
718
+ // Emit `tool_use_start` once id+name are both known.
719
+ // OpenAI typically delivers them in the same first
720
+ // chunk for a given tool call.
721
+ if (!entry.started && entry.id !== undefined && entry.name !== undefined) {
722
+ entry.started = true
723
+ yield { type: 'tool_use_start', id: entry.id, name: entry.name }
724
+ }
725
+ if (tc.function?.arguments) {
726
+ entry.args += tc.function.arguments
727
+ // Emit a delta only after start has fired — apps relying
728
+ // on an id wouldn't have one until then.
729
+ if (entry.started && entry.id !== undefined) {
730
+ yield {
731
+ type: 'tool_use_delta',
732
+ id: entry.id,
733
+ argsDelta: tc.function.arguments,
734
+ }
735
+ }
736
+ }
737
+ }
738
+ }
739
+ if (choice?.finish_reason) finishReason = choice.finish_reason
740
+ if (chunk.usage) lastUsage = chunk.usage
741
+ }
742
+
743
+ addUsage(aggregated, lastUsage)
744
+ yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
745
+
746
+ const assistantBlocks: ContentBlock[] = []
747
+ if (textBuf.length > 0) assistantBlocks.push({ type: 'text', text: textBuf })
748
+ const orderedCalls = [...toolCallsByIndex.entries()]
749
+ .sort(([a], [b]) => a - b)
750
+ .map(([, v]) => v)
751
+ for (const call of orderedCalls) {
752
+ if (!call.id || !call.name) continue
753
+ let parsedInput: unknown = {}
754
+ try {
755
+ parsedInput = call.args ? JSON.parse(call.args) : {}
756
+ } catch {
757
+ parsedInput = call.args
758
+ }
759
+ assistantBlocks.push({
760
+ type: 'tool_use',
761
+ id: call.id,
762
+ name: call.name,
763
+ input: parsedInput,
764
+ } satisfies ToolUseBlock)
765
+ }
766
+ const assistantContent: string | ContentBlock[] =
767
+ assistantBlocks.length === 1 && assistantBlocks[0]?.type === 'text'
768
+ ? assistantBlocks[0].text
769
+ : assistantBlocks
770
+ workingMessages.push({ role: 'assistant', content: assistantContent })
771
+
772
+ if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
773
+ const text = textBuf
774
+ const value = parseGenerated(text, schema)
775
+ yield {
776
+ type: 'stop',
777
+ stopReason: finishReason ?? 'stop',
254
778
  iterations,
255
- stopReason: 'max_iterations',
256
779
  usage: aggregated,
780
+ messages: workingMessages,
781
+ value,
782
+ text,
783
+ } as AgentStreamEvent<T>
784
+ return
785
+ }
786
+
787
+ const resultBlocks: ContentBlock[] = []
788
+ for (const call of orderedCalls) {
789
+ if (!call.id || !call.name) continue
790
+ let parsedInput: unknown
791
+ let parseFailed: { content: string; isError: boolean } | undefined
792
+ try {
793
+ parsedInput = call.args ? JSON.parse(call.args) : {}
794
+ } catch (err) {
795
+ parseFailed = recoverOrThrow(
796
+ new ToolExecutionError(
797
+ call.name,
798
+ call.id,
799
+ new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
800
+ ),
801
+ options,
802
+ )
803
+ parsedInput = call.args
804
+ }
805
+ yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
806
+ const { content, isError } = parseFailed
807
+ ?? (await runToolWithRecovery(
808
+ toolMap.get(call.name),
809
+ call.name,
810
+ call.id,
811
+ parsedInput,
812
+ options,
813
+ ))
814
+ resultBlocks.push({
815
+ type: 'tool_result',
816
+ toolUseId: call.id,
817
+ content,
818
+ ...(isError ? { isError: true } : {}),
819
+ } satisfies ToolResultBlock)
820
+ yield {
821
+ type: 'tool_result',
822
+ id: call.id,
823
+ name: call.name,
824
+ content,
825
+ isError,
257
826
  }
258
827
  }
828
+ workingMessages.push({ role: 'user', content: resultBlocks })
829
+
830
+ iterations++
831
+ if (iterations >= maxIterations) {
832
+ const text = textBuf
833
+ const value = parseGenerated(text, schema)
834
+ yield {
835
+ type: 'stop',
836
+ stopReason: 'max_iterations',
837
+ iterations,
838
+ usage: aggregated,
839
+ messages: workingMessages,
840
+ value,
841
+ text,
842
+ } as AgentStreamEvent<T>
843
+ return
844
+ }
845
+ }
846
+ }
847
+
848
+ async transcribe(
849
+ audio: AudioSource,
850
+ options: TranscribeOptions = {},
851
+ ): Promise<TranscribeResult<OpenAI.Audio.TranscriptionCreateResponse>> {
852
+ const model = options.model ?? this.defaultTranscribeModel
853
+ const file = await audioSourceToFile(audio)
854
+ const params: OpenAI.Audio.TranscriptionCreateParams = {
855
+ file,
856
+ model,
857
+ ...(options.language !== undefined ? { language: options.language } : {}),
858
+ ...(options.prompt !== undefined ? { prompt: options.prompt } : {}),
859
+ }
860
+ const response = await this.client.audio.transcriptions.create(
861
+ params,
862
+ options.signal !== undefined ? { signal: options.signal } : undefined,
863
+ )
864
+ // Whisper-1 returns { text, language?, duration? } when
865
+ // response_format is 'verbose_json'; we default to the SDK
866
+ // default (`json`) which only surfaces `text`. Apps that
867
+ // want language / duration from Whisper set
868
+ // `response_format: 'verbose_json'` via a raw SDK call;
869
+ // we can extend the option set when an app asks.
870
+ const text = 'text' in response && typeof response.text === 'string' ? response.text : ''
871
+ const result: TranscribeResult<OpenAI.Audio.TranscriptionCreateResponse> = {
872
+ text,
873
+ model,
874
+ raw: response,
875
+ }
876
+ if ('language' in response && typeof response.language === 'string') {
877
+ result.language = response.language
878
+ }
879
+ if ('duration' in response && typeof response.duration === 'number') {
880
+ result.duration = response.duration
881
+ }
882
+ return result
883
+ }
884
+
885
+ async embed(
886
+ texts: readonly string[],
887
+ options: EmbedOptions = {},
888
+ ): Promise<EmbedResult<OpenAI.CreateEmbeddingResponse>> {
889
+ const model = options.model ?? this.defaultEmbedModel
890
+ const params: OpenAI.EmbeddingCreateParams = {
891
+ model,
892
+ input: texts as string[],
893
+ ...(options.dimensions !== undefined ? { dimensions: options.dimensions } : {}),
894
+ }
895
+ const response = await this.client.embeddings.create(
896
+ params,
897
+ options.signal !== undefined ? { signal: options.signal } : undefined,
898
+ )
899
+ return {
900
+ embeddings: response.data.map((d) => d.embedding),
901
+ model: response.model,
902
+ usage: { inputTokens: response.usage?.prompt_tokens ?? 0 },
903
+ raw: response,
259
904
  }
260
905
  }
261
906
 
@@ -274,7 +919,7 @@ export class OpenAIProvider implements Provider {
274
919
  strict: true,
275
920
  },
276
921
  }
277
- const response = await this.client.chat.completions.create(params)
922
+ const response = await this.client.chat.completions.create(params, reqOpts(options))
278
923
  const choice = response.choices[0]
279
924
  const text = choice?.message?.content ?? ''
280
925
  const value = parseGenerated(text, schema)
@@ -288,13 +933,38 @@ export class OpenAIProvider implements Provider {
288
933
  }
289
934
  }
290
935
 
936
+ /**
937
+ * Single resolve-MCP entry point used by every tool-loop variant.
938
+ * Threads both the test-only `clientFactory` and the optional
939
+ * `mcpPool` through. Caller invokes `resolved.close()` in
940
+ * `finally`; that's a no-op when the pool owns the lifetime.
941
+ */
942
+ protected resolveMcp(servers: readonly MCPServer[]): Promise<{
943
+ tools: Tool[]
944
+ close: () => Promise<void>
945
+ }> {
946
+ if (servers.length === 0) {
947
+ return Promise.resolve({ tools: [], close: async () => {} })
948
+ }
949
+ return resolveMcpTools(servers, {
950
+ ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
951
+ ...(this.mcpPool ? { pool: this.mcpPool } : {}),
952
+ })
953
+ }
954
+
291
955
  // ─── Param translation ──────────────────────────────────────────────────
292
956
 
293
- private buildParams(
957
+ protected buildParams(
294
958
  messages: readonly Message[],
295
959
  options: ChatOptions,
296
960
  tools: readonly Tool[],
297
961
  ): OpenAI.Chat.ChatCompletionCreateParamsNonStreaming {
962
+ if (options.serverTools && options.serverTools.length > 0) {
963
+ throw new BrainError(
964
+ "OpenAIProvider: server tools (web_search / code_execution / web_fetch / url_context) are not supported on OpenAI's chat completions API. OpenAI's server tools live on the Responses API (separate provider slice). Run them as framework-local tools, route to Anthropic / Gemini, or wait for the OpenAIResponsesProvider slice.",
965
+ { context: { provider: 'openai' } },
966
+ )
967
+ }
298
968
  const model = options.model ?? this.defaultModel
299
969
  const params: OpenAI.Chat.ChatCompletionCreateParamsNonStreaming = {
300
970
  model,
@@ -389,6 +1059,55 @@ export class OpenAIProvider implements Provider {
389
1059
 
390
1060
  // ─── Shape converters ─────────────────────────────────────────────────────
391
1061
 
1062
+ /** Build the request-options bag forwarded to the SDK. Only `signal` for now. */
1063
+ function reqOpts(options: { signal?: AbortSignal }): { signal?: AbortSignal } | undefined {
1064
+ return options.signal !== undefined ? { signal: options.signal } : undefined
1065
+ }
1066
+
1067
+ /**
1068
+ * Materialize an `AudioSource` as a `File` the OpenAI SDK's
1069
+ * `Uploadable` shape accepts. Base64 → in-memory File; URL →
1070
+ * fetch + wrap. The SDK wants a filename; we synthesize one
1071
+ * since `AudioSource` doesn't carry one. The extension lets the
1072
+ * SDK pick the right content-type for the multipart upload.
1073
+ */
1074
+ async function audioSourceToFile(audio: AudioSource): Promise<File> {
1075
+ if (audio.type === 'base64') {
1076
+ const bytes = Buffer.from(audio.data, 'base64')
1077
+ const ext = extFromMime(audio.mediaType)
1078
+ return new File([bytes], `audio.${ext}`, { type: audio.mediaType })
1079
+ }
1080
+ const response = await fetch(audio.url)
1081
+ if (!response.ok) {
1082
+ throw new BrainError(
1083
+ `OpenAIProvider.transcribe: failed to fetch audio at ${audio.url}: ${response.status} ${response.statusText}.`,
1084
+ { context: { url: audio.url, status: response.status } },
1085
+ )
1086
+ }
1087
+ const buf = await response.arrayBuffer()
1088
+ const mime = response.headers.get('content-type') ?? 'audio/mpeg'
1089
+ return new File([buf], `audio.${extFromMime(mime)}`, { type: mime })
1090
+ }
1091
+
1092
+ function extFromMime(mime: string): string {
1093
+ // Strip parameters (`audio/mpeg; codecs=...` → `audio/mpeg`).
1094
+ const m = mime.split(';')[0]?.trim().toLowerCase() ?? ''
1095
+ if (m === 'audio/mp3' || m === 'audio/mpeg' || m === 'audio/mpga') return 'mp3'
1096
+ if (m === 'audio/wav' || m === 'audio/x-wav') return 'wav'
1097
+ if (m === 'audio/ogg') return 'ogg'
1098
+ if (m === 'audio/flac') return 'flac'
1099
+ if (m === 'audio/webm') return 'webm'
1100
+ if (m === 'audio/aac' || m === 'audio/x-aac' || m === 'audio/mp4' || m === 'audio/m4a') return 'm4a'
1101
+ return 'mp3'
1102
+ }
1103
+
1104
+ /** Throw a DOMException-shaped abort error if the signal has fired. */
1105
+ function checkAborted(signal: AbortSignal | undefined): void {
1106
+ if (signal?.aborted) {
1107
+ throw signal.reason ?? new DOMException('Aborted', 'AbortError')
1108
+ }
1109
+ }
1110
+
392
1111
  function systemPromptText(system: SystemPrompt | undefined): string {
393
1112
  if (system === undefined) return ''
394
1113
  if (typeof system === 'string') return system
@@ -425,8 +1144,47 @@ function toOpenAIMessage(message: Message): OpenAI.Chat.ChatCompletionMessagePar
425
1144
  return param
426
1145
  }
427
1146
 
428
- // User-role multi-block content flatten text. MCP blocks (which
429
- // are read-only and Anthropic-specific) are silently dropped.
1147
+ // Document / audio aren't supported by OpenAI's chat completions
1148
+ // API. Throw with vendor-specific guidance so apps don't waste a
1149
+ // 400 trying to send a PDF.
1150
+ for (const block of message.content) {
1151
+ if (block.type === 'document') {
1152
+ throw new BrainError(
1153
+ "OpenAIProvider: document blocks are not supported on OpenAI's chat completions API. For PDFs, split the document to images (one per page) and send them as ImageBlocks on a vision-capable model (gpt-5 / gpt-4o family); or route document workloads to Anthropic / Gemini, which accept PDF blocks natively.",
1154
+ { context: { provider: 'openai' } },
1155
+ )
1156
+ }
1157
+ if (block.type === 'audio') {
1158
+ throw new BrainError(
1159
+ "OpenAIProvider: audio blocks are not supported on OpenAI's chat completions API. Transcribe audio upstream via OpenAI's Whisper / gpt-4o-transcribe and send the resulting text; or route audio workloads to Gemini, which accepts audio blocks natively.",
1160
+ { context: { provider: 'openai' } },
1161
+ )
1162
+ }
1163
+ }
1164
+
1165
+ // User-role multi-block content. If any image blocks are present,
1166
+ // emit OpenAI's multi-part content array (text + image_url
1167
+ // entries). Otherwise flatten text — keeps simple text messages
1168
+ // cleanly typed as strings. MCP blocks (read-only,
1169
+ // Anthropic-specific) are silently dropped.
1170
+ const images = message.content.filter((b): b is ImageBlock => b.type === 'image')
1171
+ if (images.length > 0) {
1172
+ const parts: OpenAI.Chat.ChatCompletionContentPart[] = []
1173
+ for (const block of message.content) {
1174
+ if (block.type === 'text') {
1175
+ parts.push({ type: 'text', text: block.text })
1176
+ } else if (block.type === 'image') {
1177
+ const url =
1178
+ block.source.type === 'base64'
1179
+ ? `data:${block.source.mediaType};base64,${block.source.data}`
1180
+ : block.source.url
1181
+ parts.push({ type: 'image_url', image_url: { url } })
1182
+ }
1183
+ // tool_result / tool_use / mcp blocks dropped from user content
1184
+ // (they're handled elsewhere or aren't valid on user turns).
1185
+ }
1186
+ return { role: 'user', content: parts }
1187
+ }
430
1188
  const text = message.content
431
1189
  .filter((b): b is TextBlock => b.type === 'text')
432
1190
  .map((b) => b.text)