@strav/brain 1.0.0-alpha.22 → 1.0.0-alpha.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/package.json +3 -3
  2. package/src/agent_runner.ts +1 -1
  3. package/src/{provider.ts → brain_driver.ts} +11 -10
  4. package/src/brain_error.ts +86 -10
  5. package/src/brain_manager.ts +30 -7
  6. package/src/brain_provider.ts +16 -16
  7. package/src/drivers/anthropic/anthropic_brain_driver.ts +641 -0
  8. package/src/drivers/anthropic/anthropic_helpers.ts +65 -0
  9. package/src/drivers/anthropic/anthropic_message_builder.ts +258 -0
  10. package/src/drivers/anthropic/anthropic_response_mapper.ts +123 -0
  11. package/src/drivers/anthropic/anthropic_tool_loop.ts +246 -0
  12. package/src/drivers/anthropic/index.ts +1 -0
  13. package/src/{providers/deepseek_provider.ts → drivers/deepseek/deepseek_brain_driver.ts} +10 -10
  14. package/src/drivers/deepseek/index.ts +1 -0
  15. package/src/{providers/gemini_provider.ts → drivers/gemini/gemini_brain_driver.ts} +21 -21
  16. package/src/drivers/gemini/index.ts +1 -0
  17. package/src/drivers/ollama/index.ts +1 -0
  18. package/src/{providers/ollama_provider.ts → drivers/ollama/ollama_brain_driver.ts} +5 -5
  19. package/src/drivers/openai/index.ts +1 -0
  20. package/src/{providers/openai_provider.ts → drivers/openai/openai_brain_driver.ts} +152 -591
  21. package/src/drivers/openai/openai_helpers.ts +58 -0
  22. package/src/drivers/openai/openai_message_builder.ts +187 -0
  23. package/src/drivers/openai/openai_response_mapper.ts +70 -0
  24. package/src/drivers/openai/openai_tool_dispatch.ts +127 -0
  25. package/src/drivers/openai/openai_tool_loop.ts +191 -0
  26. package/src/drivers/openai_compat/index.ts +1 -0
  27. package/src/{providers/openai_compat_provider.ts → drivers/openai_compat/openai_compat_brain_driver.ts} +16 -16
  28. package/src/drivers/openai_responses/index.ts +1 -0
  29. package/src/{providers/openai_responses_provider.ts → drivers/openai_responses/openai_responses_brain_driver.ts} +24 -24
  30. package/src/index.ts +18 -12
  31. package/src/mcp/pool.ts +1 -1
  32. package/src/persistence/brain_message.ts +1 -1
  33. package/src/persistence/brain_message_repository.ts +3 -11
  34. package/src/persistence/brain_suspended_run.ts +1 -1
  35. package/src/persistence/brain_suspended_run_repository.ts +2 -11
  36. package/src/persistence/brain_thread.ts +1 -1
  37. package/src/persistence/brain_thread_repository.ts +2 -11
  38. package/src/persistence/index.ts +1 -1
  39. package/src/tool_runner.ts +1 -1
  40. package/src/types.ts +2 -2
  41. package/src/providers/anthropic_provider.ts +0 -1194
  42. /package/src/persistence/{schema → schemas}/brain_message_schema.ts +0 -0
  43. /package/src/persistence/{schema → schemas}/brain_suspended_run_schema.ts +0 -0
  44. /package/src/persistence/{schema → schemas}/brain_thread_schema.ts +0 -0
  45. /package/src/persistence/{schema → schemas}/index.ts +0 -0
@@ -1,5 +1,5 @@
1
1
  /**
2
- * `OpenAIProvider` — implementation of `Provider` backed by the
2
+ * `OpenAIBrainDriver` — implementation of `Provider` backed by the
3
3
  * official `openai` SDK (chat completions API).
4
4
  *
5
5
  * Maps framework shapes to OpenAI's wire format:
@@ -48,23 +48,21 @@
48
48
  */
49
49
 
50
50
  import OpenAI from 'openai'
51
- import type { AgentResult } from '../agent_result.ts'
52
- import { BrainError } from '../brain_error.ts'
53
- import type { OpenAIProviderConfig } from '../brain_config.ts'
54
- import type { MCPServer } from '../mcp_server.ts'
55
- import type { AgentGenerateResult } from '../agent_generate_result.ts'
56
- import type { AgentStreamEvent } from '../agent_stream_event.ts'
57
- import { resolveMcpTools, type ResolveMcpToolsOptions } from '../mcp/resolve_mcp_tools.ts'
58
- import { parseGenerated, type OutputSchema } from '../output_schema.ts'
59
- import { recoverOrThrow, runToolWithRecovery } from '../tool_runner.ts'
51
+ import type { AgentResult } from '../../agent_result.ts'
52
+ import { BrainError } from '../../brain_error.ts'
53
+ import type { OpenAIProviderConfig } from '../../brain_config.ts'
54
+ import type { MCPServer } from '../../mcp_server.ts'
55
+ import type { AgentGenerateResult } from '../../agent_generate_result.ts'
56
+ import type { AgentStreamEvent } from '../../agent_stream_event.ts'
57
+ import { resolveMcpTools, type ResolveMcpToolsOptions } from '../../mcp/resolve_mcp_tools.ts'
58
+ import { parseGenerated, type OutputSchema } from '../../output_schema.ts'
60
59
  import type {
61
- Provider,
60
+ BrainDriver,
62
61
  RunWithToolsOptions,
63
62
  RunWithToolsOptionsWithSuspend,
64
- } from '../provider.ts'
65
- import type { SuspendedRun } from '../suspended_run.ts'
66
- import type { Tool } from '../tool.ts'
67
- import { ToolExecutionError } from '../tool_execution_error.ts'
63
+ } from '../../brain_driver.ts'
64
+ import type { SuspendedRun } from '../../suspended_run.ts'
65
+ import type { Tool } from '../../tool.ts'
68
66
  import type {
69
67
  AudioSource,
70
68
  ChatOptions,
@@ -74,16 +72,38 @@ import type {
74
72
  EmbedOptions,
75
73
  EmbedResult,
76
74
  GenerateResult,
77
- ImageBlock,
78
75
  Message,
79
76
  StreamEvent,
80
- SystemPrompt,
81
- TextBlock,
82
77
  ToolResultBlock,
83
78
  ToolUseBlock,
84
79
  TranscribeOptions,
85
80
  TranscribeResult,
86
- } from '../types.ts'
81
+ } from '../../types.ts'
82
+ import {
83
+ audioSourceToFile,
84
+ checkAborted,
85
+ reqOpts,
86
+ } from './openai_helpers.ts'
87
+ import {
88
+ buildOpenAIChatParams,
89
+ toOpenAIMessages,
90
+ } from './openai_message_builder.ts'
91
+ import {
92
+ addOpenAIUsage,
93
+ toOpenAIChatResult,
94
+ toOpenAIUsage,
95
+ } from './openai_response_mapper.ts'
96
+ import {
97
+ assistantTurnFromStream,
98
+ executeToolCall,
99
+ orderStreamedCalls,
100
+ parseToolCallArgs,
101
+ type StreamedCallEntry,
102
+ } from './openai_tool_dispatch.ts'
103
+ import {
104
+ createNonStreamLoopState,
105
+ runOpenAINonStreamIteration,
106
+ } from './openai_tool_loop.ts'
87
107
 
88
108
  const DEFAULT_OPENAI_MODEL = 'gpt-5'
89
109
  const DEFAULT_OPENAI_EMBED_MODEL = 'text-embedding-3-small'
@@ -109,10 +129,10 @@ export interface OpenAIProviderOptions {
109
129
  mcpPool?: ResolveMcpToolsOptions['pool']
110
130
  }
111
131
 
112
- export class OpenAIProvider implements Provider {
132
+ export class OpenAIBrainDriver implements BrainDriver {
113
133
  readonly name: string
114
134
  // Protected (rather than private) so OpenAI-compatible drivers
115
- // can subclass — see `DeepSeekProvider`. Apps that want to plug
135
+ // can subclass — see `DeepSeekBrainDriver`. Apps that want to plug
116
136
  // in Groq / Together / Fireworks follow the same pattern: extend,
117
137
  // override the constructor's base URL + default model, optionally
118
138
  // override `buildParams` to suppress fields the upstream API
@@ -149,7 +169,7 @@ export class OpenAIProvider implements Provider {
149
169
  async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
150
170
  const params = this.buildParams(messages, options, [])
151
171
  const response = await this.client.chat.completions.create(params, reqOpts(options))
152
- return this.toChatResult(response)
172
+ return toOpenAIChatResult(response)
153
173
  }
154
174
 
155
175
  async *stream(
@@ -177,7 +197,7 @@ export class OpenAIProvider implements Provider {
177
197
  yield {
178
198
  type: 'stop',
179
199
  stopReason: finishReason,
180
- usage: toUsage(aggregatedUsage),
200
+ usage: toOpenAIUsage(aggregatedUsage),
181
201
  }
182
202
  }
183
203
 
@@ -211,121 +231,37 @@ export class OpenAIProvider implements Provider {
211
231
  ): Promise<AgentResult | SuspendedRun> {
212
232
  const maxIterations = options.maxIterations ?? 10
213
233
  const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
214
- const workingMessages: Message[] = [...messages]
215
- const aggregated: ChatUsage = {
216
- inputTokens: 0,
217
- outputTokens: 0,
218
- cacheReadTokens: 0,
219
- cacheCreationTokens: 0,
220
- }
221
- let iterations = 0
234
+ const state = createNonStreamLoopState(messages)
235
+ const buildParams = (msgs: readonly Message[]) => this.buildParams(msgs, options, tools)
222
236
 
223
237
  while (true) {
224
- checkAborted(options.signal)
225
- const params = this.buildParams(workingMessages, options, tools)
226
- const response = await this.client.chat.completions.create(params, reqOpts(options))
227
- addUsage(aggregated, response.usage)
228
-
229
- const choice = response.choices[0]
230
- if (!choice) {
231
- throw new BrainError('OpenAIProvider: response had no choices.')
232
- }
233
- const assistantMessage = choice.message
234
-
235
- // Append assistant turn to working messages so we send it back
236
- // verbatim on the next round-trip.
237
- workingMessages.push({
238
- role: 'assistant',
239
- content: fromOpenAIAssistantMessage(assistantMessage),
238
+ const outcome = await runOpenAINonStreamIteration({
239
+ state,
240
+ toolMap,
241
+ maxIterations,
242
+ client: this.client,
243
+ buildParams,
244
+ options,
245
+ suspendCheck: options.shouldSuspend,
240
246
  })
241
-
242
- const toolCalls = assistantMessage.tool_calls ?? []
243
- if (toolCalls.length === 0 || choice.finish_reason !== 'tool_calls') {
247
+ if (outcome.kind === 'continue') continue
248
+ if (outcome.kind === 'suspended') {
244
249
  return {
245
- text: assistantMessage.content ?? '',
246
- messages: workingMessages,
247
- iterations,
248
- stopReason: choice.finish_reason ?? 'stop',
249
- usage: aggregated,
250
- }
251
- }
252
-
253
- const resultBlocks: ContentBlock[] = []
254
- for (let i = 0; i < toolCalls.length; i++) {
255
- const call = toolCalls[i]!
256
- if (call.type !== 'function') continue
257
- let parsedInput: unknown
258
- let parseFailed: { content: string; isError: boolean } | undefined
259
- try {
260
- parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
261
- } catch (err) {
262
- parseFailed = recoverOrThrow(
263
- new ToolExecutionError(
264
- call.function.name,
265
- call.id,
266
- new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
267
- ),
268
- options,
269
- )
250
+ status: 'suspended',
251
+ pendingToolCalls: outcome.pendingToolCalls,
252
+ state: {
253
+ messages: state.workingMessages,
254
+ iterations: state.iterations,
255
+ usage: state.aggregated,
256
+ },
270
257
  }
271
- if (options.shouldSuspend && !parseFailed) {
272
- const frameworkCall: ToolUseBlock = {
273
- type: 'tool_use',
274
- id: call.id,
275
- name: call.function.name,
276
- input: (parsedInput ?? {}) as Record<string, unknown>,
277
- }
278
- if (await options.shouldSuspend(frameworkCall, options.context)) {
279
- const pending: ToolUseBlock[] = []
280
- for (let j = i; j < toolCalls.length; j++) {
281
- const c = toolCalls[j]!
282
- if (c.type !== 'function') continue
283
- let pInput: unknown = {}
284
- try {
285
- pInput = c.function.arguments ? JSON.parse(c.function.arguments) : {}
286
- } catch {
287
- pInput = c.function.arguments ?? {}
288
- }
289
- pending.push({
290
- type: 'tool_use',
291
- id: c.id,
292
- name: c.function.name,
293
- input: pInput as Record<string, unknown>,
294
- })
295
- }
296
- return {
297
- status: 'suspended',
298
- pendingToolCalls: pending,
299
- state: { messages: workingMessages, iterations, usage: aggregated },
300
- }
301
- }
302
- }
303
- const { content, isError } = parseFailed
304
- ?? (await runToolWithRecovery(
305
- toolMap.get(call.function.name),
306
- call.function.name,
307
- call.id,
308
- parsedInput,
309
- options,
310
- ))
311
- resultBlocks.push({
312
- type: 'tool_result',
313
- toolUseId: call.id,
314
- content,
315
- ...(isError ? { isError: true } : {}),
316
- } satisfies ToolResultBlock)
317
258
  }
318
- workingMessages.push({ role: 'user', content: resultBlocks })
319
-
320
- iterations++
321
- if (iterations >= maxIterations) {
322
- return {
323
- text: assistantMessage.content ?? '',
324
- messages: workingMessages,
325
- iterations,
326
- stopReason: 'max_iterations',
327
- usage: aggregated,
328
- }
259
+ return {
260
+ text: outcome.assistantText,
261
+ messages: state.workingMessages,
262
+ iterations: state.iterations,
263
+ stopReason: outcome.kind === 'max_iterations' ? 'max_iterations' : outcome.stopReason,
264
+ usage: state.aggregated,
329
265
  }
330
266
  }
331
267
  }
@@ -352,17 +288,9 @@ export class OpenAIProvider implements Provider {
352
288
  ): Promise<AgentGenerateResult<T>> {
353
289
  const maxIterations = options.maxIterations ?? 10
354
290
  const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
355
- const workingMessages: Message[] = [...messages]
356
- const aggregated: ChatUsage = {
357
- inputTokens: 0,
358
- outputTokens: 0,
359
- cacheReadTokens: 0,
360
- cacheCreationTokens: 0,
361
- }
362
- let iterations = 0
363
-
364
- while (true) {
365
- const params = this.buildParams(workingMessages, options, tools)
291
+ const state = createNonStreamLoopState(messages)
292
+ const buildParams = (msgs: readonly Message[]) => {
293
+ const params = this.buildParams(msgs, options, tools)
366
294
  params.response_format = {
367
295
  type: 'json_schema',
368
296
  json_schema: {
@@ -372,77 +300,37 @@ export class OpenAIProvider implements Provider {
372
300
  strict: true,
373
301
  },
374
302
  }
375
- const response = await this.client.chat.completions.create(params, reqOpts(options))
376
- addUsage(aggregated, response.usage)
303
+ return params
304
+ }
377
305
 
378
- const choice = response.choices[0]
379
- if (!choice) {
380
- throw new BrainError('OpenAIProvider: response had no choices.')
381
- }
382
- const assistantMessage = choice.message
383
- workingMessages.push({
384
- role: 'assistant',
385
- content: fromOpenAIAssistantMessage(assistantMessage),
306
+ while (true) {
307
+ const outcome = await runOpenAINonStreamIteration({
308
+ state,
309
+ toolMap,
310
+ maxIterations,
311
+ client: this.client,
312
+ buildParams,
313
+ options,
314
+ // Schema variant doesn't support suspension — the manager
315
+ // throws BrainError before reaching the loop when shouldSuspend
316
+ // is set on `runWithToolsAndSchema`. See `brain_driver.ts`.
317
+ suspendCheck: undefined,
386
318
  })
387
-
388
- const toolCalls = assistantMessage.tool_calls ?? []
389
- if (toolCalls.length === 0 || choice.finish_reason !== 'tool_calls') {
390
- const text = assistantMessage.content ?? ''
391
- return {
392
- value: parseGenerated(text, schema),
393
- text,
394
- messages: workingMessages,
395
- iterations,
396
- stopReason: choice.finish_reason ?? 'stop',
397
- usage: aggregated,
398
- }
399
- }
400
-
401
- const resultBlocks: ContentBlock[] = []
402
- for (const call of toolCalls) {
403
- if (call.type !== 'function') continue
404
- let parsedInput: unknown
405
- let parseFailed: { content: string; isError: boolean } | undefined
406
- try {
407
- parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
408
- } catch (err) {
409
- parseFailed = recoverOrThrow(
410
- new ToolExecutionError(
411
- call.function.name,
412
- call.id,
413
- new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
414
- ),
415
- options,
416
- )
417
- }
418
- const { content, isError } = parseFailed
419
- ?? (await runToolWithRecovery(
420
- toolMap.get(call.function.name),
421
- call.function.name,
422
- call.id,
423
- parsedInput,
424
- options,
425
- ))
426
- resultBlocks.push({
427
- type: 'tool_result',
428
- toolUseId: call.id,
429
- content,
430
- ...(isError ? { isError: true } : {}),
431
- } satisfies ToolResultBlock)
319
+ if (outcome.kind === 'continue') continue
320
+ if (outcome.kind === 'suspended') {
321
+ // Unreachable: suspendCheck is undefined so 'suspended' can't
322
+ // be produced. Defensive throw makes the assumption explicit.
323
+ throw new BrainError(
324
+ 'OpenAIBrainDriver: runWithToolsAndSchema received a suspension outcome but does not support it.',
325
+ )
432
326
  }
433
- workingMessages.push({ role: 'user', content: resultBlocks })
434
-
435
- iterations++
436
- if (iterations >= maxIterations) {
437
- const text = assistantMessage.content ?? ''
438
- return {
439
- value: parseGenerated(text, schema),
440
- text,
441
- messages: workingMessages,
442
- iterations,
443
- stopReason: 'max_iterations',
444
- usage: aggregated,
445
- }
327
+ return {
328
+ value: parseGenerated(outcome.assistantText, schema),
329
+ text: outcome.assistantText,
330
+ messages: state.workingMessages,
331
+ iterations: state.iterations,
332
+ stopReason: outcome.kind === 'max_iterations' ? 'max_iterations' : outcome.stopReason,
333
+ usage: state.aggregated,
446
334
  }
447
335
  }
448
336
  }
@@ -491,10 +379,7 @@ export class OpenAIProvider implements Provider {
491
379
  // Tracks: per index, the running entry; and whether
492
380
  // `tool_use_start` has already been emitted (we emit once the
493
381
  // first chunk brings the id + name).
494
- const toolCallsByIndex: Map<
495
- number,
496
- { id?: string; name?: string; args: string; started: boolean }
497
- > = new Map()
382
+ const toolCallsByIndex: Map<number, StreamedCallEntry> = new Map()
498
383
  let finishReason: string | null = null
499
384
  let lastUsage: OpenAI.CompletionUsage | undefined
500
385
 
@@ -536,35 +421,14 @@ export class OpenAIProvider implements Provider {
536
421
  if (chunk.usage) lastUsage = chunk.usage
537
422
  }
538
423
 
539
- addUsage(aggregated, lastUsage)
424
+ addOpenAIUsage(aggregated, lastUsage)
540
425
  yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
541
426
 
542
- // Materialize the assistant turn the same way runWithTools does.
543
- const assistantBlocks: ContentBlock[] = []
544
- if (textBuf.length > 0) assistantBlocks.push({ type: 'text', text: textBuf })
545
- const orderedCalls = [...toolCallsByIndex.entries()]
546
- .sort(([a], [b]) => a - b)
547
- .map(([, v]) => v)
548
- for (const call of orderedCalls) {
549
- if (!call.id || !call.name) continue
550
- let parsedInput: unknown = {}
551
- try {
552
- parsedInput = call.args ? JSON.parse(call.args) : {}
553
- } catch {
554
- parsedInput = call.args
555
- }
556
- assistantBlocks.push({
557
- type: 'tool_use',
558
- id: call.id,
559
- name: call.name,
560
- input: parsedInput,
561
- } satisfies ToolUseBlock)
562
- }
563
- const assistantContent: string | ContentBlock[] =
564
- assistantBlocks.length === 1 && assistantBlocks[0]?.type === 'text'
565
- ? assistantBlocks[0].text
566
- : assistantBlocks
567
- workingMessages.push({ role: 'assistant', content: assistantContent })
427
+ const orderedCalls = orderStreamedCalls(toolCallsByIndex)
428
+ workingMessages.push({
429
+ role: 'assistant',
430
+ content: assistantTurnFromStream(textBuf, orderedCalls),
431
+ })
568
432
 
569
433
  if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
570
434
  yield {
@@ -580,30 +444,21 @@ export class OpenAIProvider implements Provider {
580
444
  const resultBlocks: ContentBlock[] = []
581
445
  for (const call of orderedCalls) {
582
446
  if (!call.id || !call.name) continue
583
- let parsedInput: unknown
584
- let parseFailed: { content: string; isError: boolean } | undefined
585
- try {
586
- parsedInput = call.args ? JSON.parse(call.args) : {}
587
- } catch (err) {
588
- parseFailed = recoverOrThrow(
589
- new ToolExecutionError(
590
- call.name,
591
- call.id,
592
- new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
593
- ),
594
- options,
595
- )
596
- parsedInput = call.args
597
- }
447
+ const { parsedInput, parseFailed } = parseToolCallArgs(
448
+ call.name,
449
+ call.id,
450
+ call.args,
451
+ options,
452
+ )
598
453
  yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
599
- const { content, isError } = parseFailed
600
- ?? (await runToolWithRecovery(
601
- toolMap.get(call.name),
602
- call.name,
603
- call.id,
604
- parsedInput,
605
- options,
606
- ))
454
+ const { content, isError } = await executeToolCall(
455
+ call.name,
456
+ call.id,
457
+ parsedInput,
458
+ parseFailed,
459
+ toolMap,
460
+ options,
461
+ )
607
462
  resultBlocks.push({
608
463
  type: 'tool_result',
609
464
  toolUseId: call.id,
@@ -695,10 +550,7 @@ export class OpenAIProvider implements Provider {
695
550
  // Tracks: per index, the running entry; and whether
696
551
  // `tool_use_start` has already been emitted (we emit once the
697
552
  // first chunk brings the id + name).
698
- const toolCallsByIndex: Map<
699
- number,
700
- { id?: string; name?: string; args: string; started: boolean }
701
- > = new Map()
553
+ const toolCallsByIndex: Map<number, StreamedCallEntry> = new Map()
702
554
  let finishReason: string | null = null
703
555
  let lastUsage: OpenAI.CompletionUsage | undefined
704
556
 
@@ -740,34 +592,14 @@ export class OpenAIProvider implements Provider {
740
592
  if (chunk.usage) lastUsage = chunk.usage
741
593
  }
742
594
 
743
- addUsage(aggregated, lastUsage)
595
+ addOpenAIUsage(aggregated, lastUsage)
744
596
  yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
745
597
 
746
- const assistantBlocks: ContentBlock[] = []
747
- if (textBuf.length > 0) assistantBlocks.push({ type: 'text', text: textBuf })
748
- const orderedCalls = [...toolCallsByIndex.entries()]
749
- .sort(([a], [b]) => a - b)
750
- .map(([, v]) => v)
751
- for (const call of orderedCalls) {
752
- if (!call.id || !call.name) continue
753
- let parsedInput: unknown = {}
754
- try {
755
- parsedInput = call.args ? JSON.parse(call.args) : {}
756
- } catch {
757
- parsedInput = call.args
758
- }
759
- assistantBlocks.push({
760
- type: 'tool_use',
761
- id: call.id,
762
- name: call.name,
763
- input: parsedInput,
764
- } satisfies ToolUseBlock)
765
- }
766
- const assistantContent: string | ContentBlock[] =
767
- assistantBlocks.length === 1 && assistantBlocks[0]?.type === 'text'
768
- ? assistantBlocks[0].text
769
- : assistantBlocks
770
- workingMessages.push({ role: 'assistant', content: assistantContent })
598
+ const orderedCalls = orderStreamedCalls(toolCallsByIndex)
599
+ workingMessages.push({
600
+ role: 'assistant',
601
+ content: assistantTurnFromStream(textBuf, orderedCalls),
602
+ })
771
603
 
772
604
  if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
773
605
  const text = textBuf
@@ -787,30 +619,21 @@ export class OpenAIProvider implements Provider {
787
619
  const resultBlocks: ContentBlock[] = []
788
620
  for (const call of orderedCalls) {
789
621
  if (!call.id || !call.name) continue
790
- let parsedInput: unknown
791
- let parseFailed: { content: string; isError: boolean } | undefined
792
- try {
793
- parsedInput = call.args ? JSON.parse(call.args) : {}
794
- } catch (err) {
795
- parseFailed = recoverOrThrow(
796
- new ToolExecutionError(
797
- call.name,
798
- call.id,
799
- new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
800
- ),
801
- options,
802
- )
803
- parsedInput = call.args
804
- }
622
+ const { parsedInput, parseFailed } = parseToolCallArgs(
623
+ call.name,
624
+ call.id,
625
+ call.args,
626
+ options,
627
+ )
805
628
  yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
806
- const { content, isError } = parseFailed
807
- ?? (await runToolWithRecovery(
808
- toolMap.get(call.name),
809
- call.name,
810
- call.id,
811
- parsedInput,
812
- options,
813
- ))
629
+ const { content, isError } = await executeToolCall(
630
+ call.name,
631
+ call.id,
632
+ parsedInput,
633
+ parseFailed,
634
+ toolMap,
635
+ options,
636
+ )
814
637
  resultBlocks.push({
815
638
  type: 'tool_result',
816
639
  toolUseId: call.id,
@@ -928,7 +751,7 @@ export class OpenAIProvider implements Provider {
928
751
  text,
929
752
  model: response.model,
930
753
  stopReason: choice?.finish_reason ?? null,
931
- usage: toUsage(response.usage),
754
+ usage: toOpenAIUsage(response.usage),
932
755
  raw: response,
933
756
  }
934
757
  }
@@ -954,282 +777,20 @@ export class OpenAIProvider implements Provider {
954
777
 
955
778
  // ─── Param translation ──────────────────────────────────────────────────
956
779
 
780
+ /**
781
+ * Thin wrapper around `buildOpenAIChatParams` so `OpenAICompatBrainDriver`
782
+ * subclasses can still override the request shape via `super.buildParams(...)`
783
+ * (e.g. strip `reasoning_effort` for endpoints that reject it). Pure
784
+ * translation lives in `openai_message_builder.ts`.
785
+ */
957
786
  protected buildParams(
958
787
  messages: readonly Message[],
959
788
  options: ChatOptions,
960
789
  tools: readonly Tool[],
961
790
  ): OpenAI.Chat.ChatCompletionCreateParamsNonStreaming {
962
- if (options.serverTools && options.serverTools.length > 0) {
963
- throw new BrainError(
964
- "OpenAIProvider: server tools (web_search / code_execution / web_fetch / url_context) are not supported on OpenAI's chat completions API. OpenAI's server tools live on the Responses API (separate provider slice). Run them as framework-local tools, route to Anthropic / Gemini, or wait for the OpenAIResponsesProvider slice.",
965
- { context: { provider: 'openai' } },
966
- )
967
- }
968
- const model = options.model ?? this.defaultModel
969
- const params: OpenAI.Chat.ChatCompletionCreateParamsNonStreaming = {
970
- model,
971
- max_completion_tokens: options.maxTokens ?? this.defaultMaxTokens,
972
- messages: this.toMessages(options.system, messages),
973
- }
974
-
975
- if (tools.length > 0) {
976
- params.tools = tools.map((t) => ({
977
- type: 'function',
978
- function: {
979
- name: t.name,
980
- description: t.description,
981
- parameters: t.inputSchema as Record<string, unknown>,
982
- },
983
- }))
984
- }
985
-
986
- // Reasoning controls — only emitted when explicitly set so
987
- // non-reasoning models don't get rejected.
988
- if (options.effort !== undefined) {
989
- params.reasoning_effort = options.effort as OpenAI.ReasoningEffort
990
- } else if (options.thinking === 'adaptive') {
991
- params.reasoning_effort = 'medium' as OpenAI.ReasoningEffort
992
- } else if (options.thinking === 'disabled') {
993
- params.reasoning_effort = 'minimal' as OpenAI.ReasoningEffort
994
- }
995
-
996
- // `cache` is a no-op on OpenAI — prompt caching is automatic.
997
- // We accept the flag silently so apps that target both providers
998
- // with the same options object don't have to special-case.
999
-
1000
- return params
1001
- }
1002
-
1003
- private toMessages(
1004
- system: SystemPrompt | undefined,
1005
- messages: readonly Message[],
1006
- ): OpenAI.Chat.ChatCompletionMessageParam[] {
1007
- const out: OpenAI.Chat.ChatCompletionMessageParam[] = []
1008
- const systemText = systemPromptText(system)
1009
- if (systemText.length > 0) {
1010
- out.push({ role: 'system', content: systemText })
1011
- }
1012
- for (const message of messages) {
1013
- // User-role messages with tool results in their content fan
1014
- // out into one `tool`-role message per result — OpenAI's
1015
- // contract is "one tool_call_id per tool message," not a
1016
- // single user message carrying multiple results.
1017
- if (
1018
- message.role === 'user' &&
1019
- Array.isArray(message.content) &&
1020
- message.content.some((b) => b.type === 'tool_result')
1021
- ) {
1022
- const remainingText: string[] = []
1023
- for (const block of message.content) {
1024
- if (block.type === 'tool_result') {
1025
- out.push({
1026
- role: 'tool',
1027
- tool_call_id: block.toolUseId,
1028
- content: typeof block.content === 'string'
1029
- ? block.content
1030
- : block.content.map((t) => t.text).join(''),
1031
- })
1032
- } else if (block.type === 'text') {
1033
- remainingText.push(block.text)
1034
- }
1035
- }
1036
- if (remainingText.length > 0) {
1037
- out.push({ role: 'user', content: remainingText.join('') })
1038
- }
1039
- continue
1040
- }
1041
- out.push(toOpenAIMessage(message))
1042
- }
1043
- return out
1044
- }
1045
-
1046
- private toChatResult(
1047
- response: OpenAI.Chat.ChatCompletion,
1048
- ): ChatResult<OpenAI.Chat.ChatCompletion> {
1049
- const choice = response.choices[0]
1050
- return {
1051
- text: choice?.message?.content ?? '',
1052
- model: response.model,
1053
- stopReason: choice?.finish_reason ?? null,
1054
- usage: toUsage(response.usage),
1055
- raw: response,
1056
- }
1057
- }
1058
- }
1059
-
1060
- // ─── Shape converters ─────────────────────────────────────────────────────
1061
-
1062
- /** Build the request-options bag forwarded to the SDK. Only `signal` for now. */
1063
- function reqOpts(options: { signal?: AbortSignal }): { signal?: AbortSignal } | undefined {
1064
- return options.signal !== undefined ? { signal: options.signal } : undefined
1065
- }
1066
-
1067
- /**
1068
- * Materialize an `AudioSource` as a `File` the OpenAI SDK's
1069
- * `Uploadable` shape accepts. Base64 → in-memory File; URL →
1070
- * fetch + wrap. The SDK wants a filename; we synthesize one
1071
- * since `AudioSource` doesn't carry one. The extension lets the
1072
- * SDK pick the right content-type for the multipart upload.
1073
- */
1074
- async function audioSourceToFile(audio: AudioSource): Promise<File> {
1075
- if (audio.type === 'base64') {
1076
- const bytes = Buffer.from(audio.data, 'base64')
1077
- const ext = extFromMime(audio.mediaType)
1078
- return new File([bytes], `audio.${ext}`, { type: audio.mediaType })
1079
- }
1080
- const response = await fetch(audio.url)
1081
- if (!response.ok) {
1082
- throw new BrainError(
1083
- `OpenAIProvider.transcribe: failed to fetch audio at ${audio.url}: ${response.status} ${response.statusText}.`,
1084
- { context: { url: audio.url, status: response.status } },
1085
- )
1086
- }
1087
- const buf = await response.arrayBuffer()
1088
- const mime = response.headers.get('content-type') ?? 'audio/mpeg'
1089
- return new File([buf], `audio.${extFromMime(mime)}`, { type: mime })
1090
- }
1091
-
1092
- function extFromMime(mime: string): string {
1093
- // Strip parameters (`audio/mpeg; codecs=...` → `audio/mpeg`).
1094
- const m = mime.split(';')[0]?.trim().toLowerCase() ?? ''
1095
- if (m === 'audio/mp3' || m === 'audio/mpeg' || m === 'audio/mpga') return 'mp3'
1096
- if (m === 'audio/wav' || m === 'audio/x-wav') return 'wav'
1097
- if (m === 'audio/ogg') return 'ogg'
1098
- if (m === 'audio/flac') return 'flac'
1099
- if (m === 'audio/webm') return 'webm'
1100
- if (m === 'audio/aac' || m === 'audio/x-aac' || m === 'audio/mp4' || m === 'audio/m4a') return 'm4a'
1101
- return 'mp3'
1102
- }
1103
-
1104
- /** Throw a DOMException-shaped abort error if the signal has fired. */
1105
- function checkAborted(signal: AbortSignal | undefined): void {
1106
- if (signal?.aborted) {
1107
- throw signal.reason ?? new DOMException('Aborted', 'AbortError')
1108
- }
1109
- }
1110
-
1111
- function systemPromptText(system: SystemPrompt | undefined): string {
1112
- if (system === undefined) return ''
1113
- if (typeof system === 'string') return system
1114
- if (Array.isArray(system)) return system.map((b) => b.text).join('\n')
1115
- return system.text
1116
- }
1117
-
1118
- function toOpenAIMessage(message: Message): OpenAI.Chat.ChatCompletionMessageParam {
1119
- if (typeof message.content === 'string') {
1120
- return { role: message.role, content: message.content } as OpenAI.Chat.ChatCompletionMessageParam
1121
- }
1122
-
1123
- // Assistant turns may contain text + tool_use blocks; we need to
1124
- // split tool_use blocks into the `tool_calls` field and put the
1125
- // remaining text into `content`.
1126
- if (message.role === 'assistant') {
1127
- const text = message.content
1128
- .filter((b): b is TextBlock => b.type === 'text')
1129
- .map((b) => b.text)
1130
- .join('')
1131
- const toolUses = message.content.filter((b): b is ToolUseBlock => b.type === 'tool_use')
1132
- const param: OpenAI.Chat.ChatCompletionAssistantMessageParam = { role: 'assistant' }
1133
- if (text.length > 0) param.content = text
1134
- if (toolUses.length > 0) {
1135
- param.tool_calls = toolUses.map((b) => ({
1136
- id: b.id,
1137
- type: 'function',
1138
- function: {
1139
- name: b.name,
1140
- arguments: JSON.stringify(b.input ?? {}),
1141
- },
1142
- }))
1143
- }
1144
- return param
1145
- }
1146
-
1147
- // Document / audio aren't supported by OpenAI's chat completions
1148
- // API. Throw with vendor-specific guidance so apps don't waste a
1149
- // 400 trying to send a PDF.
1150
- for (const block of message.content) {
1151
- if (block.type === 'document') {
1152
- throw new BrainError(
1153
- "OpenAIProvider: document blocks are not supported on OpenAI's chat completions API. For PDFs, split the document to images (one per page) and send them as ImageBlocks on a vision-capable model (gpt-5 / gpt-4o family); or route document workloads to Anthropic / Gemini, which accept PDF blocks natively.",
1154
- { context: { provider: 'openai' } },
1155
- )
1156
- }
1157
- if (block.type === 'audio') {
1158
- throw new BrainError(
1159
- "OpenAIProvider: audio blocks are not supported on OpenAI's chat completions API. Transcribe audio upstream via OpenAI's Whisper / gpt-4o-transcribe and send the resulting text; or route audio workloads to Gemini, which accepts audio blocks natively.",
1160
- { context: { provider: 'openai' } },
1161
- )
1162
- }
1163
- }
1164
-
1165
- // User-role multi-block content. If any image blocks are present,
1166
- // emit OpenAI's multi-part content array (text + image_url
1167
- // entries). Otherwise flatten text — keeps simple text messages
1168
- // cleanly typed as strings. MCP blocks (read-only,
1169
- // Anthropic-specific) are silently dropped.
1170
- const images = message.content.filter((b): b is ImageBlock => b.type === 'image')
1171
- if (images.length > 0) {
1172
- const parts: OpenAI.Chat.ChatCompletionContentPart[] = []
1173
- for (const block of message.content) {
1174
- if (block.type === 'text') {
1175
- parts.push({ type: 'text', text: block.text })
1176
- } else if (block.type === 'image') {
1177
- const url =
1178
- block.source.type === 'base64'
1179
- ? `data:${block.source.mediaType};base64,${block.source.data}`
1180
- : block.source.url
1181
- parts.push({ type: 'image_url', image_url: { url } })
1182
- }
1183
- // tool_result / tool_use / mcp blocks dropped from user content
1184
- // (they're handled elsewhere or aren't valid on user turns).
1185
- }
1186
- return { role: 'user', content: parts }
1187
- }
1188
- const text = message.content
1189
- .filter((b): b is TextBlock => b.type === 'text')
1190
- .map((b) => b.text)
1191
- .join('')
1192
- return { role: 'user', content: text }
1193
- }
1194
-
1195
- function fromOpenAIAssistantMessage(
1196
- msg: OpenAI.Chat.ChatCompletionMessage,
1197
- ): string | ContentBlock[] {
1198
- const blocks: ContentBlock[] = []
1199
- if (msg.content) blocks.push({ type: 'text', text: msg.content })
1200
- if (msg.tool_calls) {
1201
- for (const call of msg.tool_calls) {
1202
- if (call.type !== 'function') continue
1203
- let parsedInput: unknown = {}
1204
- try {
1205
- parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
1206
- } catch {
1207
- parsedInput = call.function.arguments ?? {}
1208
- }
1209
- blocks.push({
1210
- type: 'tool_use',
1211
- id: call.id,
1212
- name: call.function.name,
1213
- input: parsedInput,
1214
- } satisfies ToolUseBlock)
1215
- }
1216
- }
1217
- if (blocks.length === 1 && blocks[0]?.type === 'text') return blocks[0].text
1218
- return blocks
1219
- }
1220
-
1221
- function toUsage(u: OpenAI.CompletionUsage | undefined): ChatUsage {
1222
- return {
1223
- inputTokens: u?.prompt_tokens ?? 0,
1224
- outputTokens: u?.completion_tokens ?? 0,
1225
- cacheReadTokens: u?.prompt_tokens_details?.cached_tokens ?? 0,
1226
- cacheCreationTokens: 0,
791
+ return buildOpenAIChatParams(messages, options, tools, {
792
+ defaultModel: this.defaultModel,
793
+ defaultMaxTokens: this.defaultMaxTokens,
794
+ })
1227
795
  }
1228
796
  }
1229
-
1230
- function addUsage(acc: ChatUsage, u: OpenAI.CompletionUsage | undefined): void {
1231
- if (!u) return
1232
- acc.inputTokens += u.prompt_tokens
1233
- acc.outputTokens += u.completion_tokens
1234
- acc.cacheReadTokens += u.prompt_tokens_details?.cached_tokens ?? 0
1235
- }