@strav/brain 1.0.0-alpha.16 → 1.0.0-alpha.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -2
- package/src/agent.ts +34 -5
- package/src/agent_generate_result.ts +2 -0
- package/src/agent_result.ts +7 -0
- package/src/agent_runner.ts +134 -15
- package/src/agent_stream_event.ts +100 -0
- package/src/brain_config.ts +91 -1
- package/src/brain_manager.ts +287 -6
- package/src/brain_provider.ts +25 -1
- package/src/index.ts +37 -2
- package/src/mcp/client.ts +99 -13
- package/src/mcp/index.ts +7 -0
- package/src/mcp/oauth.ts +227 -0
- package/src/mcp/pool.ts +106 -0
- package/src/mcp/resolve_mcp_tools.ts +31 -9
- package/src/mcp_server.ts +16 -0
- package/src/persistence/brain_message.ts +34 -0
- package/src/persistence/brain_message_repository.ts +106 -0
- package/src/persistence/brain_store.ts +166 -0
- package/src/persistence/brain_suspended_run.ts +30 -0
- package/src/persistence/brain_suspended_run_repository.ts +68 -0
- package/src/persistence/brain_thread.ts +30 -0
- package/src/persistence/brain_thread_repository.ts +65 -0
- package/src/persistence/database_brain_store.ts +190 -0
- package/src/persistence/index.ts +48 -0
- package/src/persistence/schema/brain_message_schema.ts +61 -0
- package/src/persistence/schema/brain_suspended_run_schema.ts +58 -0
- package/src/persistence/schema/brain_thread_schema.ts +50 -0
- package/src/persistence/schema/index.ts +3 -0
- package/src/provider.ts +145 -1
- package/src/providers/anthropic_provider.ts +723 -38
- package/src/providers/deepseek_provider.ts +117 -0
- package/src/providers/gemini_provider.ts +625 -33
- package/src/providers/ollama_provider.ts +86 -0
- package/src/providers/openai_compat_provider.ts +616 -0
- package/src/providers/openai_provider.ts +801 -43
- package/src/providers/openai_responses_provider.ts +1015 -0
- package/src/suspended_run.ts +153 -0
- package/src/thread.ts +40 -1
- package/src/tool.ts +7 -0
- package/src/tool_runner.ts +81 -0
- package/src/types.ts +343 -0
|
@@ -52,26 +52,42 @@ import type { AgentResult } from '../agent_result.ts'
|
|
|
52
52
|
import { BrainError } from '../brain_error.ts'
|
|
53
53
|
import type { OpenAIProviderConfig } from '../brain_config.ts'
|
|
54
54
|
import type { MCPServer } from '../mcp_server.ts'
|
|
55
|
+
import type { AgentGenerateResult } from '../agent_generate_result.ts'
|
|
56
|
+
import type { AgentStreamEvent } from '../agent_stream_event.ts'
|
|
55
57
|
import { resolveMcpTools, type ResolveMcpToolsOptions } from '../mcp/resolve_mcp_tools.ts'
|
|
56
58
|
import { parseGenerated, type OutputSchema } from '../output_schema.ts'
|
|
57
|
-
import
|
|
59
|
+
import { recoverOrThrow, runToolWithRecovery } from '../tool_runner.ts'
|
|
60
|
+
import type {
|
|
61
|
+
Provider,
|
|
62
|
+
RunWithToolsOptions,
|
|
63
|
+
RunWithToolsOptionsWithSuspend,
|
|
64
|
+
} from '../provider.ts'
|
|
65
|
+
import type { SuspendedRun } from '../suspended_run.ts'
|
|
58
66
|
import type { Tool } from '../tool.ts'
|
|
59
67
|
import { ToolExecutionError } from '../tool_execution_error.ts'
|
|
60
68
|
import type {
|
|
69
|
+
AudioSource,
|
|
61
70
|
ChatOptions,
|
|
62
71
|
ChatResult,
|
|
63
72
|
ChatUsage,
|
|
64
73
|
ContentBlock,
|
|
74
|
+
EmbedOptions,
|
|
75
|
+
EmbedResult,
|
|
65
76
|
GenerateResult,
|
|
77
|
+
ImageBlock,
|
|
66
78
|
Message,
|
|
67
79
|
StreamEvent,
|
|
68
80
|
SystemPrompt,
|
|
69
81
|
TextBlock,
|
|
70
82
|
ToolResultBlock,
|
|
71
83
|
ToolUseBlock,
|
|
84
|
+
TranscribeOptions,
|
|
85
|
+
TranscribeResult,
|
|
72
86
|
} from '../types.ts'
|
|
73
87
|
|
|
74
88
|
const DEFAULT_OPENAI_MODEL = 'gpt-5'
|
|
89
|
+
const DEFAULT_OPENAI_EMBED_MODEL = 'text-embedding-3-small'
|
|
90
|
+
const DEFAULT_OPENAI_TRANSCRIBE_MODEL = 'whisper-1'
|
|
75
91
|
|
|
76
92
|
export interface OpenAIProviderOptions {
|
|
77
93
|
client?: OpenAI
|
|
@@ -81,14 +97,33 @@ export interface OpenAIProviderOptions {
|
|
|
81
97
|
* unset; the provider uses the default `MCPClient`.
|
|
82
98
|
*/
|
|
83
99
|
mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
|
|
100
|
+
/**
|
|
101
|
+
* Optional MCP connection pool. When set, every `runWithTools`
|
|
102
|
+
* call (and its schema / streaming variants) borrows MCP clients
|
|
103
|
+
* from the pool instead of constructing fresh ones — and the
|
|
104
|
+
* per-call cleanup becomes a no-op so transports survive across
|
|
105
|
+
* calls. Apps construct one pool at boot and pass it to every
|
|
106
|
+
* provider that needs local MCP; pool ownership stays on the app
|
|
107
|
+
* via `pool.close()` at shutdown.
|
|
108
|
+
*/
|
|
109
|
+
mcpPool?: ResolveMcpToolsOptions['pool']
|
|
84
110
|
}
|
|
85
111
|
|
|
86
112
|
export class OpenAIProvider implements Provider {
|
|
87
113
|
readonly name: string
|
|
88
|
-
private
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
114
|
+
// Protected (rather than private) so OpenAI-compatible drivers
|
|
115
|
+
// can subclass — see `DeepSeekProvider`. Apps that want to plug
|
|
116
|
+
// in Groq / Together / Fireworks follow the same pattern: extend,
|
|
117
|
+
// override the constructor's base URL + default model, optionally
|
|
118
|
+
// override `buildParams` to suppress fields the upstream API
|
|
119
|
+
// doesn't accept.
|
|
120
|
+
protected readonly client: OpenAI
|
|
121
|
+
protected readonly defaultModel: string
|
|
122
|
+
protected readonly defaultMaxTokens: number
|
|
123
|
+
protected readonly defaultEmbedModel: string
|
|
124
|
+
protected readonly defaultTranscribeModel: string
|
|
125
|
+
protected readonly mcpClientFactory?: ResolveMcpToolsOptions['clientFactory']
|
|
126
|
+
protected readonly mcpPool?: ResolveMcpToolsOptions['pool']
|
|
92
127
|
|
|
93
128
|
constructor(
|
|
94
129
|
name: string,
|
|
@@ -98,7 +133,10 @@ export class OpenAIProvider implements Provider {
|
|
|
98
133
|
this.name = name
|
|
99
134
|
this.defaultModel = config.defaultModel ?? DEFAULT_OPENAI_MODEL
|
|
100
135
|
this.defaultMaxTokens = config.defaultMaxTokens ?? 4096
|
|
136
|
+
this.defaultEmbedModel = config.defaultEmbedModel ?? DEFAULT_OPENAI_EMBED_MODEL
|
|
137
|
+
this.defaultTranscribeModel = config.defaultTranscribeModel ?? DEFAULT_OPENAI_TRANSCRIBE_MODEL
|
|
101
138
|
this.mcpClientFactory = options.mcpClientFactory
|
|
139
|
+
this.mcpPool = options.mcpPool
|
|
102
140
|
this.client =
|
|
103
141
|
options.client ??
|
|
104
142
|
new OpenAI({
|
|
@@ -110,7 +148,7 @@ export class OpenAIProvider implements Provider {
|
|
|
110
148
|
|
|
111
149
|
async chat(messages: readonly Message[], options: ChatOptions = {}): Promise<ChatResult> {
|
|
112
150
|
const params = this.buildParams(messages, options, [])
|
|
113
|
-
const response = await this.client.chat.completions.create(params)
|
|
151
|
+
const response = await this.client.chat.completions.create(params, reqOpts(options))
|
|
114
152
|
return this.toChatResult(response)
|
|
115
153
|
}
|
|
116
154
|
|
|
@@ -123,7 +161,7 @@ export class OpenAIProvider implements Provider {
|
|
|
123
161
|
stream: true,
|
|
124
162
|
stream_options: { include_usage: true },
|
|
125
163
|
}
|
|
126
|
-
const stream = await this.client.chat.completions.create(params)
|
|
164
|
+
const stream = await this.client.chat.completions.create(params, reqOpts(options))
|
|
127
165
|
let aggregatedUsage: OpenAI.CompletionUsage | undefined
|
|
128
166
|
let finishReason: string | null = null
|
|
129
167
|
for await (const chunk of stream) {
|
|
@@ -143,18 +181,22 @@ export class OpenAIProvider implements Provider {
|
|
|
143
181
|
}
|
|
144
182
|
}
|
|
145
183
|
|
|
184
|
+
runWithTools(
|
|
185
|
+
messages: readonly Message[],
|
|
186
|
+
tools: readonly Tool[],
|
|
187
|
+
options: RunWithToolsOptionsWithSuspend,
|
|
188
|
+
): Promise<AgentResult | SuspendedRun>
|
|
189
|
+
runWithTools(
|
|
190
|
+
messages: readonly Message[],
|
|
191
|
+
tools: readonly Tool[],
|
|
192
|
+
options?: RunWithToolsOptions,
|
|
193
|
+
): Promise<AgentResult>
|
|
146
194
|
async runWithTools(
|
|
147
195
|
messages: readonly Message[],
|
|
148
196
|
tools: readonly Tool[],
|
|
149
197
|
options: RunWithToolsOptions = {},
|
|
150
|
-
): Promise<AgentResult> {
|
|
151
|
-
const
|
|
152
|
-
const resolved =
|
|
153
|
-
mcpServers.length > 0
|
|
154
|
-
? await resolveMcpTools(mcpServers, {
|
|
155
|
-
...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
|
|
156
|
-
})
|
|
157
|
-
: { tools: [] as Tool[], close: async () => {} }
|
|
198
|
+
): Promise<AgentResult | SuspendedRun> {
|
|
199
|
+
const resolved = await this.resolveMcp(options.mcpServers ?? [])
|
|
158
200
|
try {
|
|
159
201
|
return await this._runLoop(messages, [...tools, ...resolved.tools], options)
|
|
160
202
|
} finally {
|
|
@@ -166,7 +208,7 @@ export class OpenAIProvider implements Provider {
|
|
|
166
208
|
messages: readonly Message[],
|
|
167
209
|
tools: readonly Tool[],
|
|
168
210
|
options: RunWithToolsOptions,
|
|
169
|
-
): Promise<AgentResult> {
|
|
211
|
+
): Promise<AgentResult | SuspendedRun> {
|
|
170
212
|
const maxIterations = options.maxIterations ?? 10
|
|
171
213
|
const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
|
|
172
214
|
const workingMessages: Message[] = [...messages]
|
|
@@ -179,8 +221,9 @@ export class OpenAIProvider implements Provider {
|
|
|
179
221
|
let iterations = 0
|
|
180
222
|
|
|
181
223
|
while (true) {
|
|
224
|
+
checkAborted(options.signal)
|
|
182
225
|
const params = this.buildParams(workingMessages, options, tools)
|
|
183
|
-
const response = await this.client.chat.completions.create(params)
|
|
226
|
+
const response = await this.client.chat.completions.create(params, reqOpts(options))
|
|
184
227
|
addUsage(aggregated, response.usage)
|
|
185
228
|
|
|
186
229
|
const choice = response.choices[0]
|
|
@@ -208,54 +251,656 @@ export class OpenAIProvider implements Provider {
|
|
|
208
251
|
}
|
|
209
252
|
|
|
210
253
|
const resultBlocks: ContentBlock[] = []
|
|
211
|
-
for (
|
|
254
|
+
for (let i = 0; i < toolCalls.length; i++) {
|
|
255
|
+
const call = toolCalls[i]!
|
|
212
256
|
if (call.type !== 'function') continue
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
257
|
+
let parsedInput: unknown
|
|
258
|
+
let parseFailed: { content: string; isError: boolean } | undefined
|
|
259
|
+
try {
|
|
260
|
+
parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
|
|
261
|
+
} catch (err) {
|
|
262
|
+
parseFailed = recoverOrThrow(
|
|
263
|
+
new ToolExecutionError(
|
|
264
|
+
call.function.name,
|
|
265
|
+
call.id,
|
|
266
|
+
new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
|
|
267
|
+
),
|
|
268
|
+
options,
|
|
269
|
+
)
|
|
270
|
+
}
|
|
271
|
+
if (options.shouldSuspend && !parseFailed) {
|
|
272
|
+
const frameworkCall: ToolUseBlock = {
|
|
273
|
+
type: 'tool_use',
|
|
274
|
+
id: call.id,
|
|
275
|
+
name: call.function.name,
|
|
276
|
+
input: (parsedInput ?? {}) as Record<string, unknown>,
|
|
277
|
+
}
|
|
278
|
+
if (await options.shouldSuspend(frameworkCall, options.context)) {
|
|
279
|
+
const pending: ToolUseBlock[] = []
|
|
280
|
+
for (let j = i; j < toolCalls.length; j++) {
|
|
281
|
+
const c = toolCalls[j]!
|
|
282
|
+
if (c.type !== 'function') continue
|
|
283
|
+
let pInput: unknown = {}
|
|
284
|
+
try {
|
|
285
|
+
pInput = c.function.arguments ? JSON.parse(c.function.arguments) : {}
|
|
286
|
+
} catch {
|
|
287
|
+
pInput = c.function.arguments ?? {}
|
|
288
|
+
}
|
|
289
|
+
pending.push({
|
|
290
|
+
type: 'tool_use',
|
|
291
|
+
id: c.id,
|
|
292
|
+
name: c.function.name,
|
|
293
|
+
input: pInput as Record<string, unknown>,
|
|
294
|
+
})
|
|
295
|
+
}
|
|
296
|
+
return {
|
|
297
|
+
status: 'suspended',
|
|
298
|
+
pendingToolCalls: pending,
|
|
299
|
+
state: { messages: workingMessages, iterations, usage: aggregated },
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
const { content, isError } = parseFailed
|
|
304
|
+
?? (await runToolWithRecovery(
|
|
305
|
+
toolMap.get(call.function.name),
|
|
216
306
|
call.function.name,
|
|
217
307
|
call.id,
|
|
218
|
-
|
|
219
|
-
|
|
308
|
+
parsedInput,
|
|
309
|
+
options,
|
|
310
|
+
))
|
|
311
|
+
resultBlocks.push({
|
|
312
|
+
type: 'tool_result',
|
|
313
|
+
toolUseId: call.id,
|
|
314
|
+
content,
|
|
315
|
+
...(isError ? { isError: true } : {}),
|
|
316
|
+
} satisfies ToolResultBlock)
|
|
317
|
+
}
|
|
318
|
+
workingMessages.push({ role: 'user', content: resultBlocks })
|
|
319
|
+
|
|
320
|
+
iterations++
|
|
321
|
+
if (iterations >= maxIterations) {
|
|
322
|
+
return {
|
|
323
|
+
text: assistantMessage.content ?? '',
|
|
324
|
+
messages: workingMessages,
|
|
325
|
+
iterations,
|
|
326
|
+
stopReason: 'max_iterations',
|
|
327
|
+
usage: aggregated,
|
|
220
328
|
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
async runWithToolsAndSchema<T>(
|
|
334
|
+
messages: readonly Message[],
|
|
335
|
+
tools: readonly Tool[],
|
|
336
|
+
schema: OutputSchema<T>,
|
|
337
|
+
options: RunWithToolsOptions = {},
|
|
338
|
+
): Promise<AgentGenerateResult<T>> {
|
|
339
|
+
const resolved = await this.resolveMcp(options.mcpServers ?? [])
|
|
340
|
+
try {
|
|
341
|
+
return await this._runLoopWithSchema([...tools, ...resolved.tools], messages, schema, options)
|
|
342
|
+
} finally {
|
|
343
|
+
await resolved.close()
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
private async _runLoopWithSchema<T>(
|
|
348
|
+
tools: readonly Tool[],
|
|
349
|
+
messages: readonly Message[],
|
|
350
|
+
schema: OutputSchema<T>,
|
|
351
|
+
options: RunWithToolsOptions,
|
|
352
|
+
): Promise<AgentGenerateResult<T>> {
|
|
353
|
+
const maxIterations = options.maxIterations ?? 10
|
|
354
|
+
const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
|
|
355
|
+
const workingMessages: Message[] = [...messages]
|
|
356
|
+
const aggregated: ChatUsage = {
|
|
357
|
+
inputTokens: 0,
|
|
358
|
+
outputTokens: 0,
|
|
359
|
+
cacheReadTokens: 0,
|
|
360
|
+
cacheCreationTokens: 0,
|
|
361
|
+
}
|
|
362
|
+
let iterations = 0
|
|
363
|
+
|
|
364
|
+
while (true) {
|
|
365
|
+
const params = this.buildParams(workingMessages, options, tools)
|
|
366
|
+
params.response_format = {
|
|
367
|
+
type: 'json_schema',
|
|
368
|
+
json_schema: {
|
|
369
|
+
name: schema.name,
|
|
370
|
+
...(schema.description !== undefined ? { description: schema.description } : {}),
|
|
371
|
+
schema: schema.jsonSchema,
|
|
372
|
+
strict: true,
|
|
373
|
+
},
|
|
374
|
+
}
|
|
375
|
+
const response = await this.client.chat.completions.create(params, reqOpts(options))
|
|
376
|
+
addUsage(aggregated, response.usage)
|
|
377
|
+
|
|
378
|
+
const choice = response.choices[0]
|
|
379
|
+
if (!choice) {
|
|
380
|
+
throw new BrainError('OpenAIProvider: response had no choices.')
|
|
381
|
+
}
|
|
382
|
+
const assistantMessage = choice.message
|
|
383
|
+
workingMessages.push({
|
|
384
|
+
role: 'assistant',
|
|
385
|
+
content: fromOpenAIAssistantMessage(assistantMessage),
|
|
386
|
+
})
|
|
387
|
+
|
|
388
|
+
const toolCalls = assistantMessage.tool_calls ?? []
|
|
389
|
+
if (toolCalls.length === 0 || choice.finish_reason !== 'tool_calls') {
|
|
390
|
+
const text = assistantMessage.content ?? ''
|
|
391
|
+
return {
|
|
392
|
+
value: parseGenerated(text, schema),
|
|
393
|
+
text,
|
|
394
|
+
messages: workingMessages,
|
|
395
|
+
iterations,
|
|
396
|
+
stopReason: choice.finish_reason ?? 'stop',
|
|
397
|
+
usage: aggregated,
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
const resultBlocks: ContentBlock[] = []
|
|
402
|
+
for (const call of toolCalls) {
|
|
403
|
+
if (call.type !== 'function') continue
|
|
221
404
|
let parsedInput: unknown
|
|
405
|
+
let parseFailed: { content: string; isError: boolean } | undefined
|
|
222
406
|
try {
|
|
223
407
|
parsedInput = call.function.arguments ? JSON.parse(call.function.arguments) : {}
|
|
224
408
|
} catch (err) {
|
|
225
|
-
|
|
409
|
+
parseFailed = recoverOrThrow(
|
|
410
|
+
new ToolExecutionError(
|
|
411
|
+
call.function.name,
|
|
412
|
+
call.id,
|
|
413
|
+
new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
|
|
414
|
+
),
|
|
415
|
+
options,
|
|
416
|
+
)
|
|
417
|
+
}
|
|
418
|
+
const { content, isError } = parseFailed
|
|
419
|
+
?? (await runToolWithRecovery(
|
|
420
|
+
toolMap.get(call.function.name),
|
|
226
421
|
call.function.name,
|
|
227
422
|
call.id,
|
|
228
|
-
|
|
229
|
-
|
|
423
|
+
parsedInput,
|
|
424
|
+
options,
|
|
425
|
+
))
|
|
426
|
+
resultBlocks.push({
|
|
427
|
+
type: 'tool_result',
|
|
428
|
+
toolUseId: call.id,
|
|
429
|
+
content,
|
|
430
|
+
...(isError ? { isError: true } : {}),
|
|
431
|
+
} satisfies ToolResultBlock)
|
|
432
|
+
}
|
|
433
|
+
workingMessages.push({ role: 'user', content: resultBlocks })
|
|
434
|
+
|
|
435
|
+
iterations++
|
|
436
|
+
if (iterations >= maxIterations) {
|
|
437
|
+
const text = assistantMessage.content ?? ''
|
|
438
|
+
return {
|
|
439
|
+
value: parseGenerated(text, schema),
|
|
440
|
+
text,
|
|
441
|
+
messages: workingMessages,
|
|
442
|
+
iterations,
|
|
443
|
+
stopReason: 'max_iterations',
|
|
444
|
+
usage: aggregated,
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
async *streamWithTools(
|
|
451
|
+
messages: readonly Message[],
|
|
452
|
+
tools: readonly Tool[],
|
|
453
|
+
options: RunWithToolsOptions = {},
|
|
454
|
+
): AsyncIterable<AgentStreamEvent> {
|
|
455
|
+
const resolved = await this.resolveMcp(options.mcpServers ?? [])
|
|
456
|
+
try {
|
|
457
|
+
yield* this._streamLoop(messages, [...tools, ...resolved.tools], options)
|
|
458
|
+
} finally {
|
|
459
|
+
await resolved.close()
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
private async *_streamLoop(
|
|
464
|
+
messages: readonly Message[],
|
|
465
|
+
tools: readonly Tool[],
|
|
466
|
+
options: RunWithToolsOptions,
|
|
467
|
+
): AsyncIterable<AgentStreamEvent> {
|
|
468
|
+
const maxIterations = options.maxIterations ?? 10
|
|
469
|
+
const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
|
|
470
|
+
const workingMessages: Message[] = [...messages]
|
|
471
|
+
const aggregated: ChatUsage = {
|
|
472
|
+
inputTokens: 0,
|
|
473
|
+
outputTokens: 0,
|
|
474
|
+
cacheReadTokens: 0,
|
|
475
|
+
cacheCreationTokens: 0,
|
|
476
|
+
}
|
|
477
|
+
let iterations = 0
|
|
478
|
+
|
|
479
|
+
while (true) {
|
|
480
|
+
checkAborted(options.signal)
|
|
481
|
+
yield { type: 'iteration_start', iteration: iterations }
|
|
482
|
+
|
|
483
|
+
const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
|
|
484
|
+
...this.buildParams(workingMessages, options, tools),
|
|
485
|
+
stream: true,
|
|
486
|
+
stream_options: { include_usage: true },
|
|
487
|
+
}
|
|
488
|
+
const stream = await this.client.chat.completions.create(params, reqOpts(options))
|
|
489
|
+
|
|
490
|
+
let textBuf = ''
|
|
491
|
+
// Tracks: per index, the running entry; and whether
|
|
492
|
+
// `tool_use_start` has already been emitted (we emit once the
|
|
493
|
+
// first chunk brings the id + name).
|
|
494
|
+
const toolCallsByIndex: Map<
|
|
495
|
+
number,
|
|
496
|
+
{ id?: string; name?: string; args: string; started: boolean }
|
|
497
|
+
> = new Map()
|
|
498
|
+
let finishReason: string | null = null
|
|
499
|
+
let lastUsage: OpenAI.CompletionUsage | undefined
|
|
500
|
+
|
|
501
|
+
for await (const chunk of stream) {
|
|
502
|
+
const choice = chunk.choices[0]
|
|
503
|
+
const delta = choice?.delta
|
|
504
|
+
if (delta?.content && typeof delta.content === 'string' && delta.content.length > 0) {
|
|
505
|
+
textBuf += delta.content
|
|
506
|
+
yield { type: 'text', delta: delta.content }
|
|
507
|
+
}
|
|
508
|
+
if (delta?.tool_calls) {
|
|
509
|
+
for (const tc of delta.tool_calls) {
|
|
510
|
+
const entry = toolCallsByIndex.get(tc.index) ?? { args: '', started: false }
|
|
511
|
+
if (tc.id) entry.id = tc.id
|
|
512
|
+
if (tc.function?.name) entry.name = tc.function.name
|
|
513
|
+
toolCallsByIndex.set(tc.index, entry)
|
|
514
|
+
// Emit `tool_use_start` once id+name are both known.
|
|
515
|
+
// OpenAI typically delivers them in the same first
|
|
516
|
+
// chunk for a given tool call.
|
|
517
|
+
if (!entry.started && entry.id !== undefined && entry.name !== undefined) {
|
|
518
|
+
entry.started = true
|
|
519
|
+
yield { type: 'tool_use_start', id: entry.id, name: entry.name }
|
|
520
|
+
}
|
|
521
|
+
if (tc.function?.arguments) {
|
|
522
|
+
entry.args += tc.function.arguments
|
|
523
|
+
// Emit a delta only after start has fired — apps relying
|
|
524
|
+
// on an id wouldn't have one until then.
|
|
525
|
+
if (entry.started && entry.id !== undefined) {
|
|
526
|
+
yield {
|
|
527
|
+
type: 'tool_use_delta',
|
|
528
|
+
id: entry.id,
|
|
529
|
+
argsDelta: tc.function.arguments,
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
}
|
|
230
534
|
}
|
|
231
|
-
|
|
535
|
+
if (choice?.finish_reason) finishReason = choice.finish_reason
|
|
536
|
+
if (chunk.usage) lastUsage = chunk.usage
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
addUsage(aggregated, lastUsage)
|
|
540
|
+
yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
|
|
541
|
+
|
|
542
|
+
// Materialize the assistant turn the same way runWithTools does.
|
|
543
|
+
const assistantBlocks: ContentBlock[] = []
|
|
544
|
+
if (textBuf.length > 0) assistantBlocks.push({ type: 'text', text: textBuf })
|
|
545
|
+
const orderedCalls = [...toolCallsByIndex.entries()]
|
|
546
|
+
.sort(([a], [b]) => a - b)
|
|
547
|
+
.map(([, v]) => v)
|
|
548
|
+
for (const call of orderedCalls) {
|
|
549
|
+
if (!call.id || !call.name) continue
|
|
550
|
+
let parsedInput: unknown = {}
|
|
551
|
+
try {
|
|
552
|
+
parsedInput = call.args ? JSON.parse(call.args) : {}
|
|
553
|
+
} catch {
|
|
554
|
+
parsedInput = call.args
|
|
555
|
+
}
|
|
556
|
+
assistantBlocks.push({
|
|
557
|
+
type: 'tool_use',
|
|
558
|
+
id: call.id,
|
|
559
|
+
name: call.name,
|
|
560
|
+
input: parsedInput,
|
|
561
|
+
} satisfies ToolUseBlock)
|
|
562
|
+
}
|
|
563
|
+
const assistantContent: string | ContentBlock[] =
|
|
564
|
+
assistantBlocks.length === 1 && assistantBlocks[0]?.type === 'text'
|
|
565
|
+
? assistantBlocks[0].text
|
|
566
|
+
: assistantBlocks
|
|
567
|
+
workingMessages.push({ role: 'assistant', content: assistantContent })
|
|
568
|
+
|
|
569
|
+
if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
|
|
570
|
+
yield {
|
|
571
|
+
type: 'stop',
|
|
572
|
+
stopReason: finishReason ?? 'stop',
|
|
573
|
+
iterations,
|
|
574
|
+
usage: aggregated,
|
|
575
|
+
messages: workingMessages,
|
|
576
|
+
}
|
|
577
|
+
return
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
const resultBlocks: ContentBlock[] = []
|
|
581
|
+
for (const call of orderedCalls) {
|
|
582
|
+
if (!call.id || !call.name) continue
|
|
583
|
+
let parsedInput: unknown
|
|
584
|
+
let parseFailed: { content: string; isError: boolean } | undefined
|
|
232
585
|
try {
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
586
|
+
parsedInput = call.args ? JSON.parse(call.args) : {}
|
|
587
|
+
} catch (err) {
|
|
588
|
+
parseFailed = recoverOrThrow(
|
|
589
|
+
new ToolExecutionError(
|
|
590
|
+
call.name,
|
|
591
|
+
call.id,
|
|
592
|
+
new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
|
|
593
|
+
),
|
|
594
|
+
options,
|
|
595
|
+
)
|
|
596
|
+
parsedInput = call.args
|
|
239
597
|
}
|
|
240
|
-
|
|
598
|
+
yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
|
|
599
|
+
const { content, isError } = parseFailed
|
|
600
|
+
?? (await runToolWithRecovery(
|
|
601
|
+
toolMap.get(call.name),
|
|
602
|
+
call.name,
|
|
603
|
+
call.id,
|
|
604
|
+
parsedInput,
|
|
605
|
+
options,
|
|
606
|
+
))
|
|
607
|
+
resultBlocks.push({
|
|
241
608
|
type: 'tool_result',
|
|
242
609
|
toolUseId: call.id,
|
|
243
|
-
content
|
|
610
|
+
content,
|
|
611
|
+
...(isError ? { isError: true } : {}),
|
|
612
|
+
} satisfies ToolResultBlock)
|
|
613
|
+
yield {
|
|
614
|
+
type: 'tool_result',
|
|
615
|
+
id: call.id,
|
|
616
|
+
name: call.name,
|
|
617
|
+
content,
|
|
618
|
+
isError,
|
|
244
619
|
}
|
|
245
|
-
resultBlocks.push(resultBlock)
|
|
246
620
|
}
|
|
247
621
|
workingMessages.push({ role: 'user', content: resultBlocks })
|
|
248
622
|
|
|
249
623
|
iterations++
|
|
250
624
|
if (iterations >= maxIterations) {
|
|
251
|
-
|
|
252
|
-
|
|
625
|
+
yield {
|
|
626
|
+
type: 'stop',
|
|
627
|
+
stopReason: 'max_iterations',
|
|
628
|
+
iterations,
|
|
629
|
+
usage: aggregated,
|
|
253
630
|
messages: workingMessages,
|
|
631
|
+
}
|
|
632
|
+
return
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
async *streamWithToolsAndSchema<T>(
|
|
638
|
+
messages: readonly Message[],
|
|
639
|
+
tools: readonly Tool[],
|
|
640
|
+
schema: OutputSchema<T>,
|
|
641
|
+
options: RunWithToolsOptions = {},
|
|
642
|
+
): AsyncIterable<AgentStreamEvent<T>> {
|
|
643
|
+
const resolved = await this.resolveMcp(options.mcpServers ?? [])
|
|
644
|
+
try {
|
|
645
|
+
yield* this._streamLoopWithSchema(
|
|
646
|
+
[...tools, ...resolved.tools],
|
|
647
|
+
messages,
|
|
648
|
+
schema,
|
|
649
|
+
options,
|
|
650
|
+
)
|
|
651
|
+
} finally {
|
|
652
|
+
await resolved.close()
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
private async *_streamLoopWithSchema<T>(
|
|
657
|
+
tools: readonly Tool[],
|
|
658
|
+
messages: readonly Message[],
|
|
659
|
+
schema: OutputSchema<T>,
|
|
660
|
+
options: RunWithToolsOptions,
|
|
661
|
+
): AsyncIterable<AgentStreamEvent<T>> {
|
|
662
|
+
const maxIterations = options.maxIterations ?? 10
|
|
663
|
+
const toolMap = new Map<string, Tool>(tools.map((t) => [t.name, t]))
|
|
664
|
+
const workingMessages: Message[] = [...messages]
|
|
665
|
+
const aggregated: ChatUsage = {
|
|
666
|
+
inputTokens: 0,
|
|
667
|
+
outputTokens: 0,
|
|
668
|
+
cacheReadTokens: 0,
|
|
669
|
+
cacheCreationTokens: 0,
|
|
670
|
+
}
|
|
671
|
+
let iterations = 0
|
|
672
|
+
|
|
673
|
+
while (true) {
|
|
674
|
+
checkAborted(options.signal)
|
|
675
|
+
yield { type: 'iteration_start', iteration: iterations }
|
|
676
|
+
|
|
677
|
+
const baseParams = this.buildParams(workingMessages, options, tools)
|
|
678
|
+
baseParams.response_format = {
|
|
679
|
+
type: 'json_schema',
|
|
680
|
+
json_schema: {
|
|
681
|
+
name: schema.name,
|
|
682
|
+
...(schema.description !== undefined ? { description: schema.description } : {}),
|
|
683
|
+
schema: schema.jsonSchema,
|
|
684
|
+
strict: true,
|
|
685
|
+
},
|
|
686
|
+
}
|
|
687
|
+
const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
|
|
688
|
+
...baseParams,
|
|
689
|
+
stream: true,
|
|
690
|
+
stream_options: { include_usage: true },
|
|
691
|
+
}
|
|
692
|
+
const stream = await this.client.chat.completions.create(params, reqOpts(options))
|
|
693
|
+
|
|
694
|
+
let textBuf = ''
|
|
695
|
+
// Tracks: per index, the running entry; and whether
|
|
696
|
+
// `tool_use_start` has already been emitted (we emit once the
|
|
697
|
+
// first chunk brings the id + name).
|
|
698
|
+
const toolCallsByIndex: Map<
|
|
699
|
+
number,
|
|
700
|
+
{ id?: string; name?: string; args: string; started: boolean }
|
|
701
|
+
> = new Map()
|
|
702
|
+
let finishReason: string | null = null
|
|
703
|
+
let lastUsage: OpenAI.CompletionUsage | undefined
|
|
704
|
+
|
|
705
|
+
for await (const chunk of stream) {
|
|
706
|
+
const choice = chunk.choices[0]
|
|
707
|
+
const delta = choice?.delta
|
|
708
|
+
if (delta?.content && typeof delta.content === 'string' && delta.content.length > 0) {
|
|
709
|
+
textBuf += delta.content
|
|
710
|
+
yield { type: 'text', delta: delta.content }
|
|
711
|
+
}
|
|
712
|
+
if (delta?.tool_calls) {
|
|
713
|
+
for (const tc of delta.tool_calls) {
|
|
714
|
+
const entry = toolCallsByIndex.get(tc.index) ?? { args: '', started: false }
|
|
715
|
+
if (tc.id) entry.id = tc.id
|
|
716
|
+
if (tc.function?.name) entry.name = tc.function.name
|
|
717
|
+
toolCallsByIndex.set(tc.index, entry)
|
|
718
|
+
// Emit `tool_use_start` once id+name are both known.
|
|
719
|
+
// OpenAI typically delivers them in the same first
|
|
720
|
+
// chunk for a given tool call.
|
|
721
|
+
if (!entry.started && entry.id !== undefined && entry.name !== undefined) {
|
|
722
|
+
entry.started = true
|
|
723
|
+
yield { type: 'tool_use_start', id: entry.id, name: entry.name }
|
|
724
|
+
}
|
|
725
|
+
if (tc.function?.arguments) {
|
|
726
|
+
entry.args += tc.function.arguments
|
|
727
|
+
// Emit a delta only after start has fired — apps relying
|
|
728
|
+
// on an id wouldn't have one until then.
|
|
729
|
+
if (entry.started && entry.id !== undefined) {
|
|
730
|
+
yield {
|
|
731
|
+
type: 'tool_use_delta',
|
|
732
|
+
id: entry.id,
|
|
733
|
+
argsDelta: tc.function.arguments,
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
if (choice?.finish_reason) finishReason = choice.finish_reason
|
|
740
|
+
if (chunk.usage) lastUsage = chunk.usage
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
addUsage(aggregated, lastUsage)
|
|
744
|
+
yield { type: 'iteration_end', iteration: iterations, stopReason: finishReason }
|
|
745
|
+
|
|
746
|
+
const assistantBlocks: ContentBlock[] = []
|
|
747
|
+
if (textBuf.length > 0) assistantBlocks.push({ type: 'text', text: textBuf })
|
|
748
|
+
const orderedCalls = [...toolCallsByIndex.entries()]
|
|
749
|
+
.sort(([a], [b]) => a - b)
|
|
750
|
+
.map(([, v]) => v)
|
|
751
|
+
for (const call of orderedCalls) {
|
|
752
|
+
if (!call.id || !call.name) continue
|
|
753
|
+
let parsedInput: unknown = {}
|
|
754
|
+
try {
|
|
755
|
+
parsedInput = call.args ? JSON.parse(call.args) : {}
|
|
756
|
+
} catch {
|
|
757
|
+
parsedInput = call.args
|
|
758
|
+
}
|
|
759
|
+
assistantBlocks.push({
|
|
760
|
+
type: 'tool_use',
|
|
761
|
+
id: call.id,
|
|
762
|
+
name: call.name,
|
|
763
|
+
input: parsedInput,
|
|
764
|
+
} satisfies ToolUseBlock)
|
|
765
|
+
}
|
|
766
|
+
const assistantContent: string | ContentBlock[] =
|
|
767
|
+
assistantBlocks.length === 1 && assistantBlocks[0]?.type === 'text'
|
|
768
|
+
? assistantBlocks[0].text
|
|
769
|
+
: assistantBlocks
|
|
770
|
+
workingMessages.push({ role: 'assistant', content: assistantContent })
|
|
771
|
+
|
|
772
|
+
if (finishReason !== 'tool_calls' || orderedCalls.length === 0) {
|
|
773
|
+
const text = textBuf
|
|
774
|
+
const value = parseGenerated(text, schema)
|
|
775
|
+
yield {
|
|
776
|
+
type: 'stop',
|
|
777
|
+
stopReason: finishReason ?? 'stop',
|
|
254
778
|
iterations,
|
|
255
|
-
stopReason: 'max_iterations',
|
|
256
779
|
usage: aggregated,
|
|
780
|
+
messages: workingMessages,
|
|
781
|
+
value,
|
|
782
|
+
text,
|
|
783
|
+
} as AgentStreamEvent<T>
|
|
784
|
+
return
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
const resultBlocks: ContentBlock[] = []
|
|
788
|
+
for (const call of orderedCalls) {
|
|
789
|
+
if (!call.id || !call.name) continue
|
|
790
|
+
let parsedInput: unknown
|
|
791
|
+
let parseFailed: { content: string; isError: boolean } | undefined
|
|
792
|
+
try {
|
|
793
|
+
parsedInput = call.args ? JSON.parse(call.args) : {}
|
|
794
|
+
} catch (err) {
|
|
795
|
+
parseFailed = recoverOrThrow(
|
|
796
|
+
new ToolExecutionError(
|
|
797
|
+
call.name,
|
|
798
|
+
call.id,
|
|
799
|
+
new Error(`Failed to parse tool input JSON: ${(err as Error).message}`),
|
|
800
|
+
),
|
|
801
|
+
options,
|
|
802
|
+
)
|
|
803
|
+
parsedInput = call.args
|
|
804
|
+
}
|
|
805
|
+
yield { type: 'tool_use', id: call.id, name: call.name, input: parsedInput }
|
|
806
|
+
const { content, isError } = parseFailed
|
|
807
|
+
?? (await runToolWithRecovery(
|
|
808
|
+
toolMap.get(call.name),
|
|
809
|
+
call.name,
|
|
810
|
+
call.id,
|
|
811
|
+
parsedInput,
|
|
812
|
+
options,
|
|
813
|
+
))
|
|
814
|
+
resultBlocks.push({
|
|
815
|
+
type: 'tool_result',
|
|
816
|
+
toolUseId: call.id,
|
|
817
|
+
content,
|
|
818
|
+
...(isError ? { isError: true } : {}),
|
|
819
|
+
} satisfies ToolResultBlock)
|
|
820
|
+
yield {
|
|
821
|
+
type: 'tool_result',
|
|
822
|
+
id: call.id,
|
|
823
|
+
name: call.name,
|
|
824
|
+
content,
|
|
825
|
+
isError,
|
|
257
826
|
}
|
|
258
827
|
}
|
|
828
|
+
workingMessages.push({ role: 'user', content: resultBlocks })
|
|
829
|
+
|
|
830
|
+
iterations++
|
|
831
|
+
if (iterations >= maxIterations) {
|
|
832
|
+
const text = textBuf
|
|
833
|
+
const value = parseGenerated(text, schema)
|
|
834
|
+
yield {
|
|
835
|
+
type: 'stop',
|
|
836
|
+
stopReason: 'max_iterations',
|
|
837
|
+
iterations,
|
|
838
|
+
usage: aggregated,
|
|
839
|
+
messages: workingMessages,
|
|
840
|
+
value,
|
|
841
|
+
text,
|
|
842
|
+
} as AgentStreamEvent<T>
|
|
843
|
+
return
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
async transcribe(
|
|
849
|
+
audio: AudioSource,
|
|
850
|
+
options: TranscribeOptions = {},
|
|
851
|
+
): Promise<TranscribeResult<OpenAI.Audio.TranscriptionCreateResponse>> {
|
|
852
|
+
const model = options.model ?? this.defaultTranscribeModel
|
|
853
|
+
const file = await audioSourceToFile(audio)
|
|
854
|
+
const params: OpenAI.Audio.TranscriptionCreateParams = {
|
|
855
|
+
file,
|
|
856
|
+
model,
|
|
857
|
+
...(options.language !== undefined ? { language: options.language } : {}),
|
|
858
|
+
...(options.prompt !== undefined ? { prompt: options.prompt } : {}),
|
|
859
|
+
}
|
|
860
|
+
const response = await this.client.audio.transcriptions.create(
|
|
861
|
+
params,
|
|
862
|
+
options.signal !== undefined ? { signal: options.signal } : undefined,
|
|
863
|
+
)
|
|
864
|
+
// Whisper-1 returns { text, language?, duration? } when
|
|
865
|
+
// response_format is 'verbose_json'; we default to the SDK
|
|
866
|
+
// default (`json`) which only surfaces `text`. Apps that
|
|
867
|
+
// want language / duration from Whisper set
|
|
868
|
+
// `response_format: 'verbose_json'` via a raw SDK call;
|
|
869
|
+
// we can extend the option set when an app asks.
|
|
870
|
+
const text = 'text' in response && typeof response.text === 'string' ? response.text : ''
|
|
871
|
+
const result: TranscribeResult<OpenAI.Audio.TranscriptionCreateResponse> = {
|
|
872
|
+
text,
|
|
873
|
+
model,
|
|
874
|
+
raw: response,
|
|
875
|
+
}
|
|
876
|
+
if ('language' in response && typeof response.language === 'string') {
|
|
877
|
+
result.language = response.language
|
|
878
|
+
}
|
|
879
|
+
if ('duration' in response && typeof response.duration === 'number') {
|
|
880
|
+
result.duration = response.duration
|
|
881
|
+
}
|
|
882
|
+
return result
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
async embed(
|
|
886
|
+
texts: readonly string[],
|
|
887
|
+
options: EmbedOptions = {},
|
|
888
|
+
): Promise<EmbedResult<OpenAI.CreateEmbeddingResponse>> {
|
|
889
|
+
const model = options.model ?? this.defaultEmbedModel
|
|
890
|
+
const params: OpenAI.EmbeddingCreateParams = {
|
|
891
|
+
model,
|
|
892
|
+
input: texts as string[],
|
|
893
|
+
...(options.dimensions !== undefined ? { dimensions: options.dimensions } : {}),
|
|
894
|
+
}
|
|
895
|
+
const response = await this.client.embeddings.create(
|
|
896
|
+
params,
|
|
897
|
+
options.signal !== undefined ? { signal: options.signal } : undefined,
|
|
898
|
+
)
|
|
899
|
+
return {
|
|
900
|
+
embeddings: response.data.map((d) => d.embedding),
|
|
901
|
+
model: response.model,
|
|
902
|
+
usage: { inputTokens: response.usage?.prompt_tokens ?? 0 },
|
|
903
|
+
raw: response,
|
|
259
904
|
}
|
|
260
905
|
}
|
|
261
906
|
|
|
@@ -274,7 +919,7 @@ export class OpenAIProvider implements Provider {
|
|
|
274
919
|
strict: true,
|
|
275
920
|
},
|
|
276
921
|
}
|
|
277
|
-
const response = await this.client.chat.completions.create(params)
|
|
922
|
+
const response = await this.client.chat.completions.create(params, reqOpts(options))
|
|
278
923
|
const choice = response.choices[0]
|
|
279
924
|
const text = choice?.message?.content ?? ''
|
|
280
925
|
const value = parseGenerated(text, schema)
|
|
@@ -288,13 +933,38 @@ export class OpenAIProvider implements Provider {
|
|
|
288
933
|
}
|
|
289
934
|
}
|
|
290
935
|
|
|
936
|
+
/**
|
|
937
|
+
* Single resolve-MCP entry point used by every tool-loop variant.
|
|
938
|
+
* Threads both the test-only `clientFactory` and the optional
|
|
939
|
+
* `mcpPool` through. Caller invokes `resolved.close()` in
|
|
940
|
+
* `finally`; that's a no-op when the pool owns the lifetime.
|
|
941
|
+
*/
|
|
942
|
+
protected resolveMcp(servers: readonly MCPServer[]): Promise<{
|
|
943
|
+
tools: Tool[]
|
|
944
|
+
close: () => Promise<void>
|
|
945
|
+
}> {
|
|
946
|
+
if (servers.length === 0) {
|
|
947
|
+
return Promise.resolve({ tools: [], close: async () => {} })
|
|
948
|
+
}
|
|
949
|
+
return resolveMcpTools(servers, {
|
|
950
|
+
...(this.mcpClientFactory ? { clientFactory: this.mcpClientFactory } : {}),
|
|
951
|
+
...(this.mcpPool ? { pool: this.mcpPool } : {}),
|
|
952
|
+
})
|
|
953
|
+
}
|
|
954
|
+
|
|
291
955
|
// ─── Param translation ──────────────────────────────────────────────────
|
|
292
956
|
|
|
293
|
-
|
|
957
|
+
protected buildParams(
|
|
294
958
|
messages: readonly Message[],
|
|
295
959
|
options: ChatOptions,
|
|
296
960
|
tools: readonly Tool[],
|
|
297
961
|
): OpenAI.Chat.ChatCompletionCreateParamsNonStreaming {
|
|
962
|
+
if (options.serverTools && options.serverTools.length > 0) {
|
|
963
|
+
throw new BrainError(
|
|
964
|
+
"OpenAIProvider: server tools (web_search / code_execution / web_fetch / url_context) are not supported on OpenAI's chat completions API. OpenAI's server tools live on the Responses API (separate provider slice). Run them as framework-local tools, route to Anthropic / Gemini, or wait for the OpenAIResponsesProvider slice.",
|
|
965
|
+
{ context: { provider: 'openai' } },
|
|
966
|
+
)
|
|
967
|
+
}
|
|
298
968
|
const model = options.model ?? this.defaultModel
|
|
299
969
|
const params: OpenAI.Chat.ChatCompletionCreateParamsNonStreaming = {
|
|
300
970
|
model,
|
|
@@ -389,6 +1059,55 @@ export class OpenAIProvider implements Provider {
|
|
|
389
1059
|
|
|
390
1060
|
// ─── Shape converters ─────────────────────────────────────────────────────
|
|
391
1061
|
|
|
1062
|
+
/** Build the request-options bag forwarded to the SDK. Only `signal` for now. */
|
|
1063
|
+
function reqOpts(options: { signal?: AbortSignal }): { signal?: AbortSignal } | undefined {
|
|
1064
|
+
return options.signal !== undefined ? { signal: options.signal } : undefined
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
/**
|
|
1068
|
+
* Materialize an `AudioSource` as a `File` the OpenAI SDK's
|
|
1069
|
+
* `Uploadable` shape accepts. Base64 → in-memory File; URL →
|
|
1070
|
+
* fetch + wrap. The SDK wants a filename; we synthesize one
|
|
1071
|
+
* since `AudioSource` doesn't carry one. The extension lets the
|
|
1072
|
+
* SDK pick the right content-type for the multipart upload.
|
|
1073
|
+
*/
|
|
1074
|
+
async function audioSourceToFile(audio: AudioSource): Promise<File> {
|
|
1075
|
+
if (audio.type === 'base64') {
|
|
1076
|
+
const bytes = Buffer.from(audio.data, 'base64')
|
|
1077
|
+
const ext = extFromMime(audio.mediaType)
|
|
1078
|
+
return new File([bytes], `audio.${ext}`, { type: audio.mediaType })
|
|
1079
|
+
}
|
|
1080
|
+
const response = await fetch(audio.url)
|
|
1081
|
+
if (!response.ok) {
|
|
1082
|
+
throw new BrainError(
|
|
1083
|
+
`OpenAIProvider.transcribe: failed to fetch audio at ${audio.url}: ${response.status} ${response.statusText}.`,
|
|
1084
|
+
{ context: { url: audio.url, status: response.status } },
|
|
1085
|
+
)
|
|
1086
|
+
}
|
|
1087
|
+
const buf = await response.arrayBuffer()
|
|
1088
|
+
const mime = response.headers.get('content-type') ?? 'audio/mpeg'
|
|
1089
|
+
return new File([buf], `audio.${extFromMime(mime)}`, { type: mime })
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
function extFromMime(mime: string): string {
|
|
1093
|
+
// Strip parameters (`audio/mpeg; codecs=...` → `audio/mpeg`).
|
|
1094
|
+
const m = mime.split(';')[0]?.trim().toLowerCase() ?? ''
|
|
1095
|
+
if (m === 'audio/mp3' || m === 'audio/mpeg' || m === 'audio/mpga') return 'mp3'
|
|
1096
|
+
if (m === 'audio/wav' || m === 'audio/x-wav') return 'wav'
|
|
1097
|
+
if (m === 'audio/ogg') return 'ogg'
|
|
1098
|
+
if (m === 'audio/flac') return 'flac'
|
|
1099
|
+
if (m === 'audio/webm') return 'webm'
|
|
1100
|
+
if (m === 'audio/aac' || m === 'audio/x-aac' || m === 'audio/mp4' || m === 'audio/m4a') return 'm4a'
|
|
1101
|
+
return 'mp3'
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
/** Throw a DOMException-shaped abort error if the signal has fired. */
|
|
1105
|
+
function checkAborted(signal: AbortSignal | undefined): void {
|
|
1106
|
+
if (signal?.aborted) {
|
|
1107
|
+
throw signal.reason ?? new DOMException('Aborted', 'AbortError')
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
|
|
392
1111
|
function systemPromptText(system: SystemPrompt | undefined): string {
|
|
393
1112
|
if (system === undefined) return ''
|
|
394
1113
|
if (typeof system === 'string') return system
|
|
@@ -425,8 +1144,47 @@ function toOpenAIMessage(message: Message): OpenAI.Chat.ChatCompletionMessagePar
|
|
|
425
1144
|
return param
|
|
426
1145
|
}
|
|
427
1146
|
|
|
428
|
-
//
|
|
429
|
-
//
|
|
1147
|
+
// Document / audio aren't supported by OpenAI's chat completions
|
|
1148
|
+
// API. Throw with vendor-specific guidance so apps don't waste a
|
|
1149
|
+
// 400 trying to send a PDF.
|
|
1150
|
+
for (const block of message.content) {
|
|
1151
|
+
if (block.type === 'document') {
|
|
1152
|
+
throw new BrainError(
|
|
1153
|
+
"OpenAIProvider: document blocks are not supported on OpenAI's chat completions API. For PDFs, split the document to images (one per page) and send them as ImageBlocks on a vision-capable model (gpt-5 / gpt-4o family); or route document workloads to Anthropic / Gemini, which accept PDF blocks natively.",
|
|
1154
|
+
{ context: { provider: 'openai' } },
|
|
1155
|
+
)
|
|
1156
|
+
}
|
|
1157
|
+
if (block.type === 'audio') {
|
|
1158
|
+
throw new BrainError(
|
|
1159
|
+
"OpenAIProvider: audio blocks are not supported on OpenAI's chat completions API. Transcribe audio upstream via OpenAI's Whisper / gpt-4o-transcribe and send the resulting text; or route audio workloads to Gemini, which accepts audio blocks natively.",
|
|
1160
|
+
{ context: { provider: 'openai' } },
|
|
1161
|
+
)
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
// User-role multi-block content. If any image blocks are present,
|
|
1166
|
+
// emit OpenAI's multi-part content array (text + image_url
|
|
1167
|
+
// entries). Otherwise flatten text — keeps simple text messages
|
|
1168
|
+
// cleanly typed as strings. MCP blocks (read-only,
|
|
1169
|
+
// Anthropic-specific) are silently dropped.
|
|
1170
|
+
const images = message.content.filter((b): b is ImageBlock => b.type === 'image')
|
|
1171
|
+
if (images.length > 0) {
|
|
1172
|
+
const parts: OpenAI.Chat.ChatCompletionContentPart[] = []
|
|
1173
|
+
for (const block of message.content) {
|
|
1174
|
+
if (block.type === 'text') {
|
|
1175
|
+
parts.push({ type: 'text', text: block.text })
|
|
1176
|
+
} else if (block.type === 'image') {
|
|
1177
|
+
const url =
|
|
1178
|
+
block.source.type === 'base64'
|
|
1179
|
+
? `data:${block.source.mediaType};base64,${block.source.data}`
|
|
1180
|
+
: block.source.url
|
|
1181
|
+
parts.push({ type: 'image_url', image_url: { url } })
|
|
1182
|
+
}
|
|
1183
|
+
// tool_result / tool_use / mcp blocks dropped from user content
|
|
1184
|
+
// (they're handled elsewhere or aren't valid on user turns).
|
|
1185
|
+
}
|
|
1186
|
+
return { role: 'user', content: parts }
|
|
1187
|
+
}
|
|
430
1188
|
const text = message.content
|
|
431
1189
|
.filter((b): b is TextBlock => b.type === 'text')
|
|
432
1190
|
.map((b) => b.text)
|