@strav/brain 1.0.0-alpha.9 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/package.json +23 -7
  2. package/src/agent.ts +43 -5
  3. package/src/agent_generate_result.ts +32 -0
  4. package/src/agent_result.ts +7 -0
  5. package/src/agent_runner.ts +218 -14
  6. package/src/agent_stream_event.ts +100 -0
  7. package/src/brain_config.ts +218 -1
  8. package/src/brain_driver.ts +247 -0
  9. package/src/brain_error.ts +86 -10
  10. package/src/brain_manager.ts +359 -11
  11. package/src/brain_provider.ts +79 -9
  12. package/src/drivers/anthropic/anthropic_brain_driver.ts +641 -0
  13. package/src/drivers/anthropic/anthropic_helpers.ts +65 -0
  14. package/src/drivers/anthropic/anthropic_message_builder.ts +258 -0
  15. package/src/drivers/anthropic/anthropic_response_mapper.ts +123 -0
  16. package/src/drivers/anthropic/anthropic_tool_loop.ts +246 -0
  17. package/src/drivers/anthropic/index.ts +1 -0
  18. package/src/drivers/deepseek/deepseek_brain_driver.ts +117 -0
  19. package/src/drivers/deepseek/index.ts +1 -0
  20. package/src/drivers/gemini/gemini_brain_driver.ts +1064 -0
  21. package/src/drivers/gemini/index.ts +1 -0
  22. package/src/drivers/minimax/index.ts +1 -0
  23. package/src/drivers/minimax/minimax_brain_driver.ts +84 -0
  24. package/src/drivers/ollama/index.ts +1 -0
  25. package/src/drivers/ollama/ollama_brain_driver.ts +86 -0
  26. package/src/drivers/openai/index.ts +1 -0
  27. package/src/drivers/openai/openai_brain_driver.ts +796 -0
  28. package/src/drivers/openai/openai_helpers.ts +58 -0
  29. package/src/drivers/openai/openai_message_builder.ts +187 -0
  30. package/src/drivers/openai/openai_response_mapper.ts +70 -0
  31. package/src/drivers/openai/openai_tool_dispatch.ts +127 -0
  32. package/src/drivers/openai/openai_tool_loop.ts +191 -0
  33. package/src/drivers/openai_compat/index.ts +1 -0
  34. package/src/drivers/openai_compat/openai_compat_brain_driver.ts +616 -0
  35. package/src/drivers/openai_responses/index.ts +1 -0
  36. package/src/drivers/openai_responses/openai_responses_brain_driver.ts +1015 -0
  37. package/src/drivers/openrouter/index.ts +1 -0
  38. package/src/drivers/openrouter/openrouter_brain_driver.ts +137 -0
  39. package/src/drivers/qwen/index.ts +1 -0
  40. package/src/drivers/qwen/qwen_brain_driver.ts +103 -0
  41. package/src/index.ts +75 -11
  42. package/src/mcp/client.ts +243 -0
  43. package/src/mcp/index.ts +23 -0
  44. package/src/mcp/oauth.ts +227 -0
  45. package/src/mcp/pool.ts +106 -0
  46. package/src/mcp/resolve_mcp_tools.ts +108 -0
  47. package/src/mcp_server.ts +63 -0
  48. package/src/output_schema.ts +72 -0
  49. package/src/persistence/brain_message.ts +34 -0
  50. package/src/persistence/brain_message_repository.ts +98 -0
  51. package/src/persistence/brain_store.ts +166 -0
  52. package/src/persistence/brain_suspended_run.ts +30 -0
  53. package/src/persistence/brain_suspended_run_repository.ts +59 -0
  54. package/src/persistence/brain_thread.ts +30 -0
  55. package/src/persistence/brain_thread_repository.ts +56 -0
  56. package/src/persistence/database_brain_store.ts +190 -0
  57. package/src/persistence/index.ts +48 -0
  58. package/src/persistence/schemas/brain_message_schema.ts +61 -0
  59. package/src/persistence/schemas/brain_suspended_run_schema.ts +58 -0
  60. package/src/persistence/schemas/brain_thread_schema.ts +50 -0
  61. package/src/persistence/schemas/index.ts +3 -0
  62. package/src/suspended_run.ts +153 -0
  63. package/src/thread.ts +40 -1
  64. package/src/tool.ts +7 -0
  65. package/src/tool_runner.ts +81 -0
  66. package/src/translate/index.ts +19 -0
  67. package/src/translate/translate_cache.ts +78 -0
  68. package/src/translate/translate_provider.ts +46 -0
  69. package/src/translate/translator.ts +271 -0
  70. package/src/types.ts +398 -1
  71. package/src/zod/index.ts +121 -0
  72. package/src/provider.ts +0 -74
  73. package/src/providers/anthropic_provider.ts +0 -397
@@ -0,0 +1,796 @@
1
+ /**
2
+ * `OpenAIBrainDriver` — implementation of `Provider` backed by the
3
+ * official `openai` SDK (chat completions API).
4
+ *
5
+ * Maps framework shapes to OpenAI's wire format:
6
+ *
7
+ * - `system` becomes the first message with `role: 'system'`.
8
+ * (OpenAI doesn't have a separate system field on chat
9
+ * completions; o1/o3 reasoning models accept `developer` as
10
+ * a synonym but `system` still works.)
11
+ *
12
+ * - `Message` with string content → `{role, content: string}`.
13
+ * `Message` with `ContentBlock[]`: text blocks concatenate into
14
+ * a single content string; `ToolUseBlock`s on assistant turns
15
+ * translate to `tool_calls`; `ToolResultBlock`s in user turns
16
+ * each become their own `{role: 'tool', tool_call_id, content}`
17
+ * message (OpenAI requires this layout, not a single user turn
18
+ * with mixed content like Anthropic's).
19
+ *
20
+ * - `Tool[]` → `[{type: 'function', function: {name, description,
21
+ * parameters: tool.inputSchema}}]`. OpenAI wraps every tool in
22
+ * a `function` namespace where Anthropic uses flat tool
23
+ * definitions.
24
+ *
25
+ * - `MCPServer[]` → resolved via the local MCP client
26
+ * (`@strav/brain/mcp`). Each server is dialed, its tools are
27
+ * discovered, and they're merged with locally-defined tools.
28
+ * The agentic loop then treats them uniformly. Tool names are
29
+ * namespaced `<server>__<tool>` to avoid collisions. Transports
30
+ * are closed in a `finally` once the loop exits.
31
+ *
32
+ * - `cache: true` is a no-op. OpenAI auto-caches; there's no
33
+ * per-block cache_control to set. The framework flag is
34
+ * accepted (so config that targets both providers still
35
+ * works) but doesn't emit anything to the wire.
36
+ *
37
+ * - `thinking: 'adaptive'` maps to `reasoning_effort: 'medium'`
38
+ * on reasoning models (o1, o3, o5, etc.); `'disabled'` maps
39
+ * to `reasoning_effort: 'minimal'`. Non-reasoning models
40
+ * silently ignore the field.
41
+ *
42
+ * - `effort` (when set) maps directly to `reasoning_effort`
43
+ * when supported by the model.
44
+ *
45
+ * - `countTokens` is NOT implemented — OpenAI has no dedicated
46
+ * count endpoint. `BrainManager.countTokens` returns `null`
47
+ * when the configured provider doesn't expose the method.
48
+ */
49
+
50
+ import OpenAI from 'openai'
51
+ import type { AgentResult } from '../../agent_result.ts'
52
+ import { BrainError } from '../../brain_error.ts'
53
+ import type { OpenAIProviderConfig } from '../../brain_config.ts'
54
+ import type { MCPServer } from '../../mcp_server.ts'
55
+ import type { AgentGenerateResult } from '../../agent_generate_result.ts'
56
+ import type { AgentStreamEvent } from '../../agent_stream_event.ts'
57
+ import { resolveMcpTools, type ResolveMcpToolsOptions } from '../../mcp/resolve_mcp_tools.ts'
58
+ import { parseGenerated, type OutputSchema } from '../../output_schema.ts'
59
+ import type {
60
+ BrainDriver,
61
+ RunWithToolsOptions,
62
+ RunWithToolsOptionsWithSuspend,
63
+ } from '../../brain_driver.ts'
64
+ import type { SuspendedRun } from '../../suspended_run.ts'
65
+ import type { Tool } from '../../tool.ts'
66
+ import type {
67
+ AudioSource,
68
+ ChatOptions,
69
+ ChatResult,
70
+ ChatUsage,
71
+ ContentBlock,
72
+ EmbedOptions,
73
+ EmbedResult,
74
+ GenerateResult,
75
+ Message,
76
+ StreamEvent,
77
+ ToolResultBlock,
78
+ ToolUseBlock,
79
+ TranscribeOptions,
80
+ TranscribeResult,
81
+ } from '../../types.ts'
82
+ import {
83
+ audioSourceToFile,
84
+ checkAborted,
85
+ reqOpts,
86
+ } from './openai_helpers.ts'
87
+ import {
88
+ buildOpenAIChatParams,
89
+ toOpenAIMessages,
90
+ } from './openai_message_builder.ts'
91
+ import {
92
+ addOpenAIUsage,
93
+ toOpenAIChatResult,
94
+ toOpenAIUsage,
95
+ } from './openai_response_mapper.ts'
96
+ import {
97
+ assistantTurnFromStream,
98
+ executeToolCall,
99
+ orderStreamedCalls,
100
+ parseToolCallArgs,
101
+ type StreamedCallEntry,
102
+ } from './openai_tool_dispatch.ts'
103
+ import {
104
+ createNonStreamLoopState,
105
+ runOpenAINonStreamIteration,
106
+ } from './openai_tool_loop.ts'
107
+
108
+ const DEFAULT_OPENAI_MODEL = 'gpt-5'
109
+ const DEFAULT_OPENAI_EMBED_MODEL = 'text-embedding-3-small'
110
+ const DEFAULT_OPENAI_TRANSCRIBE_MODEL = 'whisper-1'
111
+
112
+ export interface OpenAIProviderOptions {
113
+ client?: OpenAI
114
+ /**
115
+ * Internal seam — tests inject a stub MCP client factory so MCP
116
+ * tool resolution doesn't dial the network. Real apps leave it
117
+ * unset; the provider uses the default `MCPClient`.
118
+ */
119
+ mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
120
+ /**
121
+ * Optional MCP connection pool. When set, every `runWithTools`
122
+ * call (and its schema / streaming variants) borrows MCP clients
123
+ * from the pool instead of constructing fresh ones — and the
124
+ * per-call cleanup becomes a no-op so transports survive across
125
+ * calls. Apps construct one pool at boot and pass it to every
126
+ * provider that needs local MCP; pool ownership stays on the app
127
+ * via `pool.close()` at shutdown.
128
+ */
129
+ mcpPool?: ResolveMcpToolsOptions['pool']
130
+ }
131
+
132
+ export class OpenAIBrainDriver implements BrainDriver {
133
+ readonly name: string
134
+ // Protected (rather than private) so OpenAI-compatible drivers
135
+ // can subclass — see `DeepSeekBrainDriver`. Apps that want to plug
136
+ // in Groq / Together / Fireworks follow the same pattern: extend,
137
+ // override the constructor's base URL + default model, optionally
138
+ // override `buildParams` to suppress fields the upstream API
139
+ // doesn't accept.
140
+ protected readonly client: OpenAI
141
+ protected readonly defaultModel: string
142
+ protected readonly defaultMaxTokens: number
143
+ protected readonly defaultEmbedModel: string
144
+ protected readonly defaultTranscribeModel: string
145
+ protected readonly mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
146
+ protected readonly mcpPool?: ResolveMcpToolsOptions['pool']
147
+
148
+ constructor(
149
+ name: string,
150
+ config: OpenAIProviderConfig,
151
+ options: OpenAIProviderOptions = {},
152
+ ) {
153
+ this.name = name
154
+ this.defaultModel = config.defaultModel ?? DEFAULT_OPENAI_MODEL
155
+ this.defaultMaxTokens = config.defaultMaxTokens ?? 4096
156
+ this.defaultEmbedModel = config.defaultEmbedModel ?? DEFAULT_OPENAI_EMBED_MODEL
157
+ this.defaultTranscribeModel = config.defaultTranscribeModel ?? DEFAULT_OPENAI_TRANSCRIBE_MODEL
158
+ this.mcpClientFactory = options.mcpClientFactory
159
+ this.mcpPool = options.mcpPool
160
+ this.client =
161
+ options.client ??
162
+ new OpenAI({
163
+ apiKey: config.apiKey,
164
+ ...(config.baseUrl !== undefined ? { baseURL: config.baseUrl } : {}),
165
+ ...(config.organization !== undefined ? { organization: config.organization } : {}),
166
+ })
167
+ }
168
+
169
+ async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
170
+ const params = this.buildParams(messages, options, [])
171
+ const response = await this.client.chat.completions.create(params, reqOpts(options))
172
+ return toOpenAIChatResult(response)
173
+ }
174
+
175
+ async *stream(
176
+ messages: readonly Message[],
177
+ options: ChatOptions = {},
178
+ ): AsyncIterable<StreamEvent> {
179
+ const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
180
+ ...this.buildParams(messages, options, []),
181
+ stream: true,
182
+ stream_options: { include_usage: true },
183
+ }
184
+ const stream = await this.client.chat.completions.create(params, reqOpts(options))
185
+ let aggregatedUsage: OpenAI.CompletionUsage | undefined
186
+ let finishReason: string | null = null
187
+ for await (const chunk of stream) {
188
+ const delta = chunk.choices[0]?.delta?.content
189
+ if (typeof delta === 'string' && delta.length > 0) {
190
+ yield { type: 'text', delta }
191
+ }
192
+ if (chunk.choices[0]?.finish_reason) {
193
+ finishReason = chunk.choices[0].finish_reason
194
+ }
195
+ if (chunk.usage) aggregatedUsage = chunk.usage
196
+ }
197
+ yield {
198
+ type: 'stop',
199
+ stopReason: finishReason,
200
+ usage: toOpenAIUsage(aggregatedUsage),
201
+ }
202
+ }
203
+
204
+ runWithTools(
205
+ messages: readonly Message[],
206
+ tools: readonly Tool[],
207
+ options: RunWithToolsOptionsWithSuspend,
208
+ ): Promise<AgentResult | SuspendedRun>
209
+ runWithTools(
210
+ messages: readonly Message[],
211
+ tools: readonly Tool[],
212
+ options?: RunWithToolsOptions,
213
+ ): Promise<AgentResult>
214
+ async runWithTools(
215
+ messages: readonly Message[],
216
+ tools: readonly Tool[],
217
+ options: RunWithToolsOptions = {},
218
+ ): Promise<AgentResult | SuspendedRun> {
219
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
220
+ try {
221
+ return await this._runLoop(messages, [...tools, ...resolved.tools], options)
222
+ } finally {
223
+ await resolved.close()
224
+ }
225
+ }
226
+
227
+ private async _runLoop(
228
+ messages: readonly Message[],
229
+ tools: readonly Tool[],
230
+ options: RunWithToolsOptions,
231
+ ): Promise<AgentResult | SuspendedRun> {
232
+ const maxIterations = options.maxIterations ?? 10
233
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
234
+ const state = createNonStreamLoopState(messages)
235
+ const buildParams = (msgs: readonly Message[]) => this.buildParams(msgs, options, tools)
236
+
237
+ while (true) {
238
+ const outcome = await runOpenAINonStreamIteration({
239
+ state,
240
+ toolMap,
241
+ maxIterations,
242
+ client: this.client,
243
+ buildParams,
244
+ options,
245
+ suspendCheck: options.shouldSuspend,
246
+ })
247
+ if (outcome.kind === 'continue') continue
248
+ if (outcome.kind === 'suspended') {
249
+ return {
250
+ status: 'suspended',
251
+ pendingToolCalls: outcome.pendingToolCalls,
252
+ state: {
253
+ messages: state.workingMessages,
254
+ iterations: state.iterations,
255
+ usage: state.aggregated,
256
+ },
257
+ }
258
+ }
259
+ return {
260
+ text: outcome.assistantText,
261
+ messages: state.workingMessages,
262
+ iterations: state.iterations,
263
+ stopReason: outcome.kind === 'max_iterations' ? 'max_iterations' : outcome.stopReason,
264
+ usage: state.aggregated,
265
+ }
266
+ }
267
+ }
268
+
269
+ async runWithToolsAndSchema<T>(
270
+ messages: readonly Message[],
271
+ tools: readonly Tool[],
272
+ schema: OutputSchema<T>,
273
+ options: RunWithToolsOptions = {},
274
+ ): Promise<AgentGenerateResult<T>> {
275
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
276
+ try {
277
+ return await this._runLoopWithSchema([...tools, ...resolved.tools], messages, schema, options)
278
+ } finally {
279
+ await resolved.close()
280
+ }
281
+ }
282
+
283
+ private async _runLoopWithSchema<T>(
284
+ tools: readonly Tool[],
285
+ messages: readonly Message[],
286
+ schema: OutputSchema<T>,
287
+ options: RunWithToolsOptions,
288
+ ): Promise<AgentGenerateResult<T>> {
289
+ const maxIterations = options.maxIterations ?? 10
290
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
291
+ const state = createNonStreamLoopState(messages)
292
+ const buildParams = (msgs: readonly Message[]) => {
293
+ const params = this.buildParams(msgs, options, tools)
294
+ params.response_format = {
295
+ type: 'json_schema',
296
+ json_schema: {
297
+ name: schema.name,
298
+ ...(schema.description !== undefined ? { description: schema.description } : {}),
299
+ schema: schema.jsonSchema,
300
+ strict: true,
301
+ },
302
+ }
303
+ return params
304
+ }
305
+
306
+ while (true) {
307
+ const outcome = await runOpenAINonStreamIteration({
308
+ state,
309
+ toolMap,
310
+ maxIterations,
311
+ client: this.client,
312
+ buildParams,
313
+ options,
314
+ // Schema variant doesn't support suspension — the manager
315
+ // throws BrainError before reaching the loop when shouldSuspend
316
+ // is set on `runWithToolsAndSchema`. See `brain_driver.ts`.
317
+ suspendCheck: undefined,
318
+ })
319
+ if (outcome.kind === 'continue') continue
320
+ if (outcome.kind === 'suspended') {
321
+ // Unreachable: suspendCheck is undefined so 'suspended' can't
322
+ // be produced. Defensive throw makes the assumption explicit.
323
+ throw new BrainError(
324
+ 'OpenAIBrainDriver: runWithToolsAndSchema received a suspension outcome but does not support it.',
325
+ )
326
+ }
327
+ return {
328
+ value: parseGenerated(outcome.assistantText, schema),
329
+ text: outcome.assistantText,
330
+ messages: state.workingMessages,
331
+ iterations: state.iterations,
332
+ stopReason: outcome.kind === 'max_iterations' ? 'max_iterations' : outcome.stopReason,
333
+ usage: state.aggregated,
334
+ }
335
+ }
336
+ }
337
+
338
+ async *streamWithTools(
339
+ messages: readonly Message[],
340
+ tools: readonly Tool[],
341
+ options: RunWithToolsOptions = {},
342
+ ): AsyncIterable<AgentStreamEvent> {
343
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
344
+ try {
345
+ yield* this._streamLoop(messages, [...tools, ...resolved.tools], options)
346
+ } finally {
347
+ await resolved.close()
348
+ }
349
+ }
350
+
351
+ private async *_streamLoop(
352
+ messages: readonly Message[],
353
+ tools: readonly Tool[],
354
+ options: RunWithToolsOptions,
355
+ ): AsyncIterable<AgentStreamEvent> {
356
+ const maxIterations = options.maxIterations ?? 10
357
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
358
+ const workingMessages: Message[] = [...messages]
359
+ const aggregated: ChatUsage = {
360
+ inputTokens: 0,
361
+ outputTokens: 0,
362
+ cacheReadTokens: 0,
363
+ cacheCreationTokens: 0,
364
+ }
365
+ let iterations = 0
366
+
367
+ while (true) {
368
+ checkAborted(options.signal)
369
+ yield { type: 'iteration_start', iteration: iterations }
370
+
371
+ const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
372
+ ...this.buildParams(workingMessages, options, tools),
373
+ stream: true,
374
+ stream_options: { include_usage: true },
375
+ }
376
+ const stream = await this.client.chat.completions.create(params, reqOpts(options))
377
+
378
+ let textBuf = ''
379
+ // Tracks: per index, the running entry; and whether
380
+ // `tool_use_start` has already been emitted (we emit once the
381
+ // first chunk brings the id + name).
382
+ const toolCallsByIndex: Map<number, StreamedCallEntry> = new Map()
383
+ let finishReason: string | null = null
384
+ let lastUsage: OpenAI.CompletionUsage | undefined
385
+
386
+ for await (const chunk of stream) {
387
+ const choice = chunk.choices[0]
388
+ const delta = choice?.delta
389
+ if (delta?.content && typeof delta.content === 'string' && delta.content.length > 0) {
390
+ textBuf += delta.content
391
+ yield { type: 'text', delta: delta.content }
392
+ }
393
+ if (delta?.tool_calls) {
394
+ for (const tc of delta.tool_calls) {
395
+ const entry = toolCallsByIndex.get(tc.index) ?? { args: '', started: false }
396
+ if (tc.id) entry.id = tc.id
397
+ if (tc.function?.name) entry.name = tc.function.name
398
+ toolCallsByIndex.set(tc.index, entry)
399
+ // Emit `tool_use_start` once id+name are both known.
400
+ // OpenAI typically delivers them in the same first
401
+ // chunk for a given tool call.
402
+ if (!entry.started && entry.id !== undefined && entry.name !== undefined) {
403
+ entry.started = true
404
+ yield { type: 'tool_use_start', id: entry.id, name: entry.name }
405
+ }
406
+ if (tc.function?.arguments) {
407
+ entry.args += tc.function.arguments
408
+ // Emit a delta only after start has fired — apps relying
409
+ // on an id wouldn't have one until then.
410
+ if (entry.started && entry.id !== undefined) {
411
+ yield {
412
+ type: 'tool_use_delta',
413
+ id: entry.id,
414
+ argsDelta: tc.function.arguments,
415
+ }
416
+ }
417
+ }
418
+ }
419
+ }
420
+ if (choice?.finish_reason) finishReason = choice.finish_reason
421
+ if (chunk.usage) lastUsage = chunk.usage
422
+ }
423
+
424
+ addOpenAIUsage(aggregated, lastUsage)
425
+ yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
426
+
427
+ const orderedCalls = orderStreamedCalls(toolCallsByIndex)
428
+ workingMessages.push({
429
+ role: 'assistant',
430
+ content: assistantTurnFromStream(textBuf, orderedCalls),
431
+ })
432
+
433
+ if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
434
+ yield {
435
+ type: 'stop',
436
+ stopReason: finishReason ?? 'stop',
437
+ iterations,
438
+ usage: aggregated,
439
+ messages: workingMessages,
440
+ }
441
+ return
442
+ }
443
+
444
+ const resultBlocks: ContentBlock[] = []
445
+ for (const call of orderedCalls) {
446
+ if (!call.id || !call.name) continue
447
+ const { parsedInput, parseFailed } = parseToolCallArgs(
448
+ call.name,
449
+ call.id,
450
+ call.args,
451
+ options,
452
+ )
453
+ yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
454
+ const { content, isError } = await executeToolCall(
455
+ call.name,
456
+ call.id,
457
+ parsedInput,
458
+ parseFailed,
459
+ toolMap,
460
+ options,
461
+ )
462
+ resultBlocks.push({
463
+ type: 'tool_result',
464
+ toolUseId: call.id,
465
+ content,
466
+ ...(isError ? { isError: true } : {}),
467
+ } satisfies ToolResultBlock)
468
+ yield {
469
+ type: 'tool_result',
470
+ id: call.id,
471
+ name: call.name,
472
+ content,
473
+ isError,
474
+ }
475
+ }
476
+ workingMessages.push({ role: 'user', content: resultBlocks })
477
+
478
+ iterations++
479
+ if (iterations >= maxIterations) {
480
+ yield {
481
+ type: 'stop',
482
+ stopReason: 'max_iterations',
483
+ iterations,
484
+ usage: aggregated,
485
+ messages: workingMessages,
486
+ }
487
+ return
488
+ }
489
+ }
490
+ }
491
+
492
+ async *streamWithToolsAndSchema<T>(
493
+ messages: readonly Message[],
494
+ tools: readonly Tool[],
495
+ schema: OutputSchema<T>,
496
+ options: RunWithToolsOptions = {},
497
+ ): AsyncIterable<AgentStreamEvent<T>> {
498
+ const resolved = await this.resolveMcp(options.mcpServers ?? [])
499
+ try {
500
+ yield* this._streamLoopWithSchema(
501
+ [...tools, ...resolved.tools],
502
+ messages,
503
+ schema,
504
+ options,
505
+ )
506
+ } finally {
507
+ await resolved.close()
508
+ }
509
+ }
510
+
511
+ private async *_streamLoopWithSchema<T>(
512
+ tools: readonly Tool[],
513
+ messages: readonly Message[],
514
+ schema: OutputSchema<T>,
515
+ options: RunWithToolsOptions,
516
+ ): AsyncIterable<AgentStreamEvent<T>> {
517
+ const maxIterations = options.maxIterations ?? 10
518
+ const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
519
+ const workingMessages: Message[] = [...messages]
520
+ const aggregated: ChatUsage = {
521
+ inputTokens: 0,
522
+ outputTokens: 0,
523
+ cacheReadTokens: 0,
524
+ cacheCreationTokens: 0,
525
+ }
526
+ let iterations = 0
527
+
528
+ while (true) {
529
+ checkAborted(options.signal)
530
+ yield { type: 'iteration_start', iteration: iterations }
531
+
532
+ const baseParams = this.buildParams(workingMessages, options, tools)
533
+ baseParams.response_format = {
534
+ type: 'json_schema',
535
+ json_schema: {
536
+ name: schema.name,
537
+ ...(schema.description !== undefined ? { description: schema.description } : {}),
538
+ schema: schema.jsonSchema,
539
+ strict: true,
540
+ },
541
+ }
542
+ const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
543
+ ...baseParams,
544
+ stream: true,
545
+ stream_options: { include_usage: true },
546
+ }
547
+ const stream = await this.client.chat.completions.create(params, reqOpts(options))
548
+
549
+ let textBuf = ''
550
+ // Tracks: per index, the running entry; and whether
551
+ // `tool_use_start` has already been emitted (we emit once the
552
+ // first chunk brings the id + name).
553
+ const toolCallsByIndex: Map<number, StreamedCallEntry> = new Map()
554
+ let finishReason: string | null = null
555
+ let lastUsage: OpenAI.CompletionUsage | undefined
556
+
557
+ for await (const chunk of stream) {
558
+ const choice = chunk.choices[0]
559
+ const delta = choice?.delta
560
+ if (delta?.content && typeof delta.content === 'string' && delta.content.length > 0) {
561
+ textBuf += delta.content
562
+ yield { type: 'text', delta: delta.content }
563
+ }
564
+ if (delta?.tool_calls) {
565
+ for (const tc of delta.tool_calls) {
566
+ const entry = toolCallsByIndex.get(tc.index) ?? { args: '', started: false }
567
+ if (tc.id) entry.id = tc.id
568
+ if (tc.function?.name) entry.name = tc.function.name
569
+ toolCallsByIndex.set(tc.index, entry)
570
+ // Emit `tool_use_start` once id+name are both known.
571
+ // OpenAI typically delivers them in the same first
572
+ // chunk for a given tool call.
573
+ if (!entry.started && entry.id !== undefined && entry.name !== undefined) {
574
+ entry.started = true
575
+ yield { type: 'tool_use_start', id: entry.id, name: entry.name }
576
+ }
577
+ if (tc.function?.arguments) {
578
+ entry.args += tc.function.arguments
579
+ // Emit a delta only after start has fired — apps relying
580
+ // on an id wouldn't have one until then.
581
+ if (entry.started && entry.id !== undefined) {
582
+ yield {
583
+ type: 'tool_use_delta',
584
+ id: entry.id,
585
+ argsDelta: tc.function.arguments,
586
+ }
587
+ }
588
+ }
589
+ }
590
+ }
591
+ if (choice?.finish_reason) finishReason = choice.finish_reason
592
+ if (chunk.usage) lastUsage = chunk.usage
593
+ }
594
+
595
+ addOpenAIUsage(aggregated, lastUsage)
596
+ yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
597
+
598
+ const orderedCalls = orderStreamedCalls(toolCallsByIndex)
599
+ workingMessages.push({
600
+ role: 'assistant',
601
+ content: assistantTurnFromStream(textBuf, orderedCalls),
602
+ })
603
+
604
+ if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
605
+ const text = textBuf
606
+ const value = parseGenerated(text, schema)
607
+ yield {
608
+ type: 'stop',
609
+ stopReason: finishReason ?? 'stop',
610
+ iterations,
611
+ usage: aggregated,
612
+ messages: workingMessages,
613
+ value,
614
+ text,
615
+ } as AgentStreamEvent<T>
616
+ return
617
+ }
618
+
619
+ const resultBlocks: ContentBlock[] = []
620
+ for (const call of orderedCalls) {
621
+ if (!call.id || !call.name) continue
622
+ const { parsedInput, parseFailed } = parseToolCallArgs(
623
+ call.name,
624
+ call.id,
625
+ call.args,
626
+ options,
627
+ )
628
+ yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
629
+ const { content, isError } = await executeToolCall(
630
+ call.name,
631
+ call.id,
632
+ parsedInput,
633
+ parseFailed,
634
+ toolMap,
635
+ options,
636
+ )
637
+ resultBlocks.push({
638
+ type: 'tool_result',
639
+ toolUseId: call.id,
640
+ content,
641
+ ...(isError ? { isError: true } : {}),
642
+ } satisfies ToolResultBlock)
643
+ yield {
644
+ type: 'tool_result',
645
+ id: call.id,
646
+ name: call.name,
647
+ content,
648
+ isError,
649
+ }
650
+ }
651
+ workingMessages.push({ role: 'user', content: resultBlocks })
652
+
653
+ iterations++
654
+ if (iterations >= maxIterations) {
655
+ const text = textBuf
656
+ const value = parseGenerated(text, schema)
657
+ yield {
658
+ type: 'stop',
659
+ stopReason: 'max_iterations',
660
+ iterations,
661
+ usage: aggregated,
662
+ messages: workingMessages,
663
+ value,
664
+ text,
665
+ } as AgentStreamEvent<T>
666
+ return
667
+ }
668
+ }
669
+ }
670
+
671
+ async transcribe(
672
+ audio: AudioSource,
673
+ options: TranscribeOptions = {},
674
+ ): Promise<TranscribeResult<OpenAI.Audio.TranscriptionCreateResponse>> {
675
+ const model = options.model ?? this.defaultTranscribeModel
676
+ const file = await audioSourceToFile(audio)
677
+ const params: OpenAI.Audio.TranscriptionCreateParams = {
678
+ file,
679
+ model,
680
+ ...(options.language !== undefined ? { language: options.language } : {}),
681
+ ...(options.prompt !== undefined ? { prompt: options.prompt } : {}),
682
+ }
683
+ const response = await this.client.audio.transcriptions.create(
684
+ params,
685
+ options.signal !== undefined ? { signal: options.signal } : undefined,
686
+ )
687
+ // Whisper-1 returns { text, language?, duration? } when
688
+ // response_format is 'verbose_json'; we default to the SDK
689
+ // default (`json`) which only surfaces `text`. Apps that
690
+ // want language / duration from Whisper set
691
+ // `response_format: 'verbose_json'` via a raw SDK call;
692
+ // we can extend the option set when an app asks.
693
+ const text = 'text' in response && typeof response.text === 'string' ? response.text : ''
694
+ const result: TranscribeResult<OpenAI.Audio.TranscriptionCreateResponse> = {
695
+ text,
696
+ model,
697
+ raw: response,
698
+ }
699
+ if ('language' in response && typeof response.language === 'string') {
700
+ result.language = response.language
701
+ }
702
+ if ('duration' in response && typeof response.duration === 'number') {
703
+ result.duration = response.duration
704
+ }
705
+ return result
706
+ }
707
+
708
+ async embed(
709
+ texts: readonly string[],
710
+ options: EmbedOptions = {},
711
+ ): Promise<EmbedResult<OpenAI.CreateEmbeddingResponse>> {
712
+ const model = options.model ?? this.defaultEmbedModel
713
+ const params: OpenAI.EmbeddingCreateParams = {
714
+ model,
715
+ input: texts as string[],
716
+ ...(options.dimensions !== undefined ? { dimensions: options.dimensions } : {}),
717
+ }
718
+ const response = await this.client.embeddings.create(
719
+ params,
720
+ options.signal !== undefined ? { signal: options.signal } : undefined,
721
+ )
722
+ return {
723
+ embeddings: response.data.map((d) => d.embedding),
724
+ model: response.model,
725
+ usage: { inputTokens: response.usage?.prompt_tokens ?? 0 },
726
+ raw: response,
727
+ }
728
+ }
729
+
730
+ async generate<T>(
731
+ messages: readonly Message[],
732
+ schema: OutputSchema<T>,
733
+ options: ChatOptions = {},
734
+ ): Promise<GenerateResult<T>> {
735
+ const params = this.buildParams(messages, options, [])
736
+ params.response_format = {
737
+ type: 'json_schema',
738
+ json_schema: {
739
+ name: schema.name,
740
+ ...(schema.description !== undefined ? { description: schema.description } : {}),
741
+ schema: schema.jsonSchema,
742
+ strict: true,
743
+ },
744
+ }
745
+ const response = await this.client.chat.completions.create(params, reqOpts(options))
746
+ const choice = response.choices[0]
747
+ const text = choice?.message?.content ?? ''
748
+ const value = parseGenerated(text, schema)
749
+ return {
750
+ value,
751
+ text,
752
+ model: response.model,
753
+ stopReason: choice?.finish_reason ?? null,
754
+ usage: toOpenAIUsage(response.usage),
755
+ raw: response,
756
+ }
757
+ }
758
+
759
+ /**
760
+ * Single resolve-MCP entry point used by every tool-loop variant.
761
+ * Threads both the test-only `clientFactory` and the optional
762
+ * `mcpPool` through. Caller invokes `resolved.close()` in
763
+ * `finally`; that's a no-op when the pool owns the lifetime.
764
+ */
765
+ protected resolveMcp(servers: readonly MCPServer[]): Promise<{
766
+ tools: Tool[]
767
+ close: () => Promise<void>
768
+ }> {
769
+ if (servers.length === 0) {
770
+ return Promise.resolve({ tools: [], close: async () => {} })
771
+ }
772
+ return resolveMcpTools(servers, {
773
+ ...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
774
+ ...(this.mcpPool ? { pool: this.mcpPool } : {}),
775
+ })
776
+ }
777
+
778
+ // ─── Param translation ──────────────────────────────────────────────────
779
+
780
+ /**
781
+ * Thin wrapper around `buildOpenAIChatParams` so `OpenAICompatBrainDriver`
782
+ * subclasses can still override the request shape via `super.buildParams(...)`
783
+ * (e.g. strip `reasoning_effort` for endpoints that reject it). Pure
784
+ * translation lives in `openai_message_builder.ts`.
785
+ */
786
+ protected buildParams(
787
+ messages: readonly Message[],
788
+ options: ChatOptions,
789
+ tools: readonly Tool[],
790
+ ): OpenAI.Chat.ChatCompletionCreateParamsNonStreaming {
791
+ return buildOpenAIChatParams(messages, options, tools, {
792
+ defaultModel: this.defaultModel,
793
+ defaultMaxTokens: this.defaultMaxTokens,
794
+ })
795
+ }
796
+ }