veryfront 0.1.321 → 0.1.322

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/esm/deno.js +1 -1
  2. package/esm/extensions/ext-jwt/src/index.d.ts +39 -0
  3. package/esm/extensions/ext-jwt/src/index.d.ts.map +1 -0
  4. package/esm/extensions/ext-jwt/src/index.js +103 -0
  5. package/esm/extensions/ext-openai/src/openai-provider.d.ts +29 -0
  6. package/esm/extensions/ext-openai/src/openai-provider.d.ts.map +1 -0
  7. package/esm/extensions/ext-openai/src/openai-provider.js +1095 -0
  8. package/esm/src/embedding/veryfront-cloud/provider.d.ts.map +1 -1
  9. package/esm/src/embedding/veryfront-cloud/provider.js +6 -1
  10. package/esm/src/provider/shared/index.d.ts +16 -0
  11. package/esm/src/provider/shared/index.d.ts.map +1 -0
  12. package/esm/src/provider/shared/index.js +18 -0
  13. package/esm/src/provider/veryfront-cloud/openai.d.ts +10 -0
  14. package/esm/src/provider/veryfront-cloud/openai.d.ts.map +1 -0
  15. package/esm/src/provider/veryfront-cloud/openai.js +18 -0
  16. package/esm/src/provider/veryfront-cloud/provider.d.ts.map +1 -1
  17. package/esm/src/provider/veryfront-cloud/provider.js +6 -1
  18. package/esm/src/proxy/main.js +3 -0
  19. package/esm/src/utils/version-constant.d.ts +1 -1
  20. package/esm/src/utils/version-constant.js +1 -1
  21. package/package.json +2 -1
  22. package/src/deno.js +1 -1
  23. package/src/extensions/ext-jwt/src/index.ts +173 -0
  24. package/src/extensions/ext-openai/src/openai-provider.ts +1481 -0
  25. package/src/src/embedding/veryfront-cloud/provider.ts +6 -3
  26. package/src/src/provider/shared/index.ts +62 -0
  27. package/src/src/provider/veryfront-cloud/openai.ts +34 -0
  28. package/src/src/provider/veryfront-cloud/provider.ts +6 -3
  29. package/src/src/proxy/main.ts +4 -0
  30. package/src/src/utils/version-constant.ts +1 -1
@@ -0,0 +1,1481 @@
1
+ /**
2
+ * OpenAI provider — implements the {@link AIProvider} contract for OpenAI,
3
+ * OpenAI-compatible endpoints (Azure OpenAI, Moonshot AI), and OpenAI's
4
+ * Responses API.
5
+ *
6
+ * Ported from `src/provider/runtime-loader.ts` as part of PR 11.
7
+ *
8
+ * @module extensions/ext-openai/openai-provider
9
+ */
10
+
11
+ import type { AIProvider, AIProviderConfig } from "../../../src/extensions/interfaces/index.js";
12
+ import type { EmbeddingRuntime, ModelRuntime } from "../../../src/provider/types.js";
13
+ import {
14
+ buildProviderError,
15
+ createOpenAIRequestInit,
16
+ createWarningCollector,
17
+ getOpenAIChatCompletionsUrl,
18
+ getOpenAIEmbeddingUrl,
19
+ getOpenAIResponsesUrl,
20
+ isNumberArray,
21
+ mergeUsage,
22
+ parseRetryAfterMs,
23
+ ProviderError,
24
+ ProviderOverloadedError,
25
+ ProviderQuotaError,
26
+ ProviderRateLimitError,
27
+ ProviderRequestError,
28
+ readProviderOptions,
29
+ readRecord,
30
+ readTextParts,
31
+ requestJson,
32
+ requestStream,
33
+ stringifyJsonValue,
34
+ TOOL_INPUT_PENDING_THRESHOLD_MS,
35
+ toOpenAICompatibleMessages,
36
+ toOpenAICompatibleTools,
37
+ withToolInputStatusTransitions,
38
+ } from "../../../src/provider/shared/index.js";
39
+ import type { OpenAICompatibleChatRequest, RuntimePromptMessage } from "../../../src/provider/shared/index.js";
40
+
41
+ // Re-export error classes so extension tests can import from this module.
42
+ export {
43
+ buildProviderError,
44
+ isNumberArray,
45
+ mergeUsage,
46
+ parseRetryAfterMs,
47
+ ProviderError,
48
+ ProviderOverloadedError,
49
+ ProviderQuotaError,
50
+ ProviderRateLimitError,
51
+ ProviderRequestError,
52
+ TOOL_INPUT_PENDING_THRESHOLD_MS,
53
+ withToolInputStatusTransitions,
54
+ };
55
+
56
+ export interface OpenAIRuntimeConfig {
57
+ apiKey: string;
58
+ baseURL?: string;
59
+ name?: string;
60
+ fetch?: typeof globalThis.fetch;
61
+ }
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // Internal types
65
+ // ---------------------------------------------------------------------------
66
+
67
+ type ProviderReasoningEffort = "low" | "medium" | "high" | "max";
68
+
69
+ type ProviderReasoningOption = {
70
+ enabled?: boolean;
71
+ effort?: ProviderReasoningEffort;
72
+ budgetTokens?: number;
73
+ };
74
+
75
+ type OpenAICompatibleChoice = {
76
+ message?: unknown;
77
+ delta?: unknown;
78
+ finish_reason?: unknown;
79
+ };
80
+
81
+ type OpenAIStreamToolCallState = {
82
+ id: string;
83
+ name: string;
84
+ arguments: string;
85
+ started: boolean;
86
+ };
87
+
88
+ type RuntimeToolDefinition =
89
+ | {
90
+ type: "function";
91
+ name: string;
92
+ description?: string;
93
+ inputSchema: unknown;
94
+ }
95
+ | {
96
+ type: "provider";
97
+ name: string;
98
+ id: `${string}.${string}`;
99
+ args: Record<string, unknown>;
100
+ };
101
+
102
+ type OpenAICompatibleLanguageOptions = {
103
+ prompt: RuntimePromptMessage[];
104
+ maxOutputTokens?: number;
105
+ temperature?: number;
106
+ topP?: number;
107
+ topK?: number;
108
+ stopSequences?: string[];
109
+ tools?: RuntimeToolDefinition[];
110
+ toolChoice?: unknown;
111
+ seed?: number;
112
+ presencePenalty?: number;
113
+ frequencyPenalty?: number;
114
+ headers?: HeadersInit;
115
+ providerOptions?: Record<string, unknown>;
116
+ includeRawChunks?: boolean;
117
+ abortSignal?: AbortSignal;
118
+ reasoning?: ProviderReasoningOption;
119
+ userId?: string;
120
+ serviceTier?: "auto" | "default" | "flex" | "scale";
121
+ parallelToolCalls?: boolean;
122
+ responseFormat?:
123
+ | { type: "text" }
124
+ | { type: "json" }
125
+ | {
126
+ type: "json_schema";
127
+ name: string;
128
+ schema: unknown;
129
+ description?: string;
130
+ strict?: boolean;
131
+ };
132
+ };
133
+
134
+ type RuntimeUsage = {
135
+ inputTokens?: number;
136
+ outputTokens?: number;
137
+ totalTokens?: number;
138
+ cacheCreationInputTokens?: number;
139
+ cacheReadInputTokens?: number;
140
+ };
141
+
142
+ // ---------------------------------------------------------------------------
143
+ // Embedding helpers
144
+ // ---------------------------------------------------------------------------
145
+
146
+ function extractOpenAIEmbeddings(payload: unknown): number[][] {
147
+ const record = readRecord(payload);
148
+ const data = record?.data;
149
+ if (!Array.isArray(data)) {
150
+ throw new Error("Invalid OpenAI embedding response: data array missing");
151
+ }
152
+
153
+ const embeddings: number[][] = [];
154
+
155
+ for (const item of data) {
156
+ const itemRecord = readRecord(item);
157
+ const embedding = itemRecord?.embedding;
158
+ if (!isNumberArray(embedding)) {
159
+ throw new Error("Invalid OpenAI embedding response: embedding vector missing");
160
+ }
161
+ embeddings.push(embedding);
162
+ }
163
+
164
+ return embeddings;
165
+ }
166
+
167
+ function extractOpenAIUsageTokens(payload: unknown): number | undefined {
168
+ const record = readRecord(payload);
169
+ const usage = readRecord(record?.usage);
170
+ const totalTokens = usage?.total_tokens;
171
+ return typeof totalTokens === "number" ? totalTokens : undefined;
172
+ }
173
+
174
+ // ---------------------------------------------------------------------------
175
+ // Chat helpers
176
+ // ---------------------------------------------------------------------------
177
+
178
+ function normalizeOpenAIFinishReason(
179
+ raw: unknown,
180
+ ): string | { unified: string; raw: string } | null {
181
+ if (typeof raw !== "string") {
182
+ return null;
183
+ }
184
+
185
+ if (raw === "tool_calls") {
186
+ return { unified: "tool-calls", raw };
187
+ }
188
+
189
+ if (raw === "content_filter") {
190
+ return { unified: "content-filter", raw };
191
+ }
192
+
193
+ return raw;
194
+ }
195
+
196
+ function extractOpenAIUsage(payload: unknown): RuntimeUsage | undefined {
197
+ const record = readRecord(payload);
198
+ const usage = readRecord(record?.usage);
199
+ if (!usage) {
200
+ return undefined;
201
+ }
202
+
203
+ const inputTokens = usage.prompt_tokens;
204
+ const outputTokens = usage.completion_tokens;
205
+ const totalTokens = usage.total_tokens;
206
+ const promptTokensDetails = readRecord(usage.prompt_tokens_details);
207
+ const cachedTokens = promptTokensDetails?.cached_tokens;
208
+
209
+ return {
210
+ inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
211
+ outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
212
+ totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
213
+ ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
214
+ };
215
+ }
216
+
217
+ function extractOpenAIContentText(content: unknown): string {
218
+ if (typeof content === "string") {
219
+ return content;
220
+ }
221
+
222
+ if (!Array.isArray(content)) {
223
+ return "";
224
+ }
225
+
226
+ let text = "";
227
+ for (const part of content) {
228
+ const record = readRecord(part);
229
+ const type = record?.type;
230
+ if (type === "text" && typeof record?.text === "string") {
231
+ text += record.text;
232
+ }
233
+ }
234
+
235
+ return text;
236
+ }
237
+
238
+ function extractOpenAIToolCalls(message: Record<string, unknown>): Array<{
239
+ toolCallId: string;
240
+ toolName: string;
241
+ input: string;
242
+ }> {
243
+ const toolCalls = message.tool_calls;
244
+ if (!Array.isArray(toolCalls)) {
245
+ return [];
246
+ }
247
+
248
+ const normalized: Array<{ toolCallId: string; toolName: string; input: string }> = [];
249
+ for (const entry of toolCalls) {
250
+ const record = readRecord(entry);
251
+ const id = typeof record?.id === "string" ? record.id : undefined;
252
+ const fn = readRecord(record?.function);
253
+ const name = typeof fn?.name === "string" ? fn.name : undefined;
254
+ const argumentsText = typeof fn?.arguments === "string" ? fn.arguments : undefined;
255
+ if (!id || !name || argumentsText === undefined) {
256
+ continue;
257
+ }
258
+ normalized.push({
259
+ toolCallId: id,
260
+ toolName: name,
261
+ input: argumentsText,
262
+ });
263
+ }
264
+
265
+ return normalized;
266
+ }
267
+
268
+ /**
269
+ * OpenAI reasoning models (o1 / o3 / o4 family) use the completion path but
270
+ * have different constraints than chat models: sampling params are rejected,
271
+ * and they accept a `reasoning_effort` field. We detect them by model id
272
+ * prefix so callers don't have to configure it per runtime.
273
+ */
274
+ function isOpenAIReasoningModel(modelId: string): boolean {
275
+ return /^o[134](-|$)/.test(modelId);
276
+ }
277
+
278
+ /**
279
+ * Detect native OpenAI models (gpt-*, o-series, chatgpt-*) vs third-party
280
+ * OpenAI-compatible providers (Kimi, etc.). Native OpenAI models require
281
+ * `max_completion_tokens` (the old `max_tokens` is rejected by newer models
282
+ * like gpt-5.2), while third-party providers still expect `max_tokens`.
283
+ */
284
+ function isNativeOpenAIModel(modelId: string): boolean {
285
+ return /^(gpt-|o[134](-|$)|chatgpt-)/.test(modelId);
286
+ }
287
+
288
+ /**
289
+ * Kimi K2.5 fixes sampling parameters (temperature, top_p, presence_penalty,
290
+ * frequency_penalty) to predetermined values and rejects any other values.
291
+ * See https://platform.moonshot.cn/docs/guide/kimi-k2-5-quickstart
292
+ */
293
+ function isFixedSamplingModel(modelId: string): boolean {
294
+ return /^kimi-k2\.5/.test(modelId);
295
+ }
296
+
297
+ /**
298
+ * Map the unified reasoning effort to OpenAI's `reasoning_effort` enum.
299
+ * OpenAI doesn't accept "max" — we collapse it to "high".
300
+ */
301
+ function resolveOpenAIReasoningEffort(
302
+ option: ProviderReasoningOption | undefined,
303
+ ): "low" | "medium" | "high" | undefined {
304
+ if (!option || option.enabled !== true) {
305
+ return undefined;
306
+ }
307
+ switch (option.effort) {
308
+ case "low":
309
+ return "low";
310
+ case "high":
311
+ case "max":
312
+ return "high";
313
+ case "medium":
314
+ default:
315
+ return "medium";
316
+ }
317
+ }
318
+
319
+ function unwrapToolInputSchema(inputSchema: unknown): unknown {
320
+ if (typeof inputSchema !== "object" || inputSchema === null || Array.isArray(inputSchema)) {
321
+ return inputSchema;
322
+ }
323
+
324
+ const candidate = Reflect.get(inputSchema, "jsonSchema");
325
+ return candidate ?? inputSchema;
326
+ }
327
+
328
+ function toSnakeCaseRecord(record: Record<string, unknown>): Record<string, unknown> {
329
+ return Object.fromEntries(
330
+ Object.entries(record).map(([key, value]) => [
331
+ key.replace(/[A-Z]/g, (match) => `_${match.toLowerCase()}`),
332
+ value,
333
+ ]),
334
+ );
335
+ }
336
+
337
+ type WarningCollector = {
338
+ push(warning: {
339
+ type: "unsupported-setting" | "other";
340
+ setting?: string;
341
+ details?: string;
342
+ provider: string;
343
+ }): void;
344
+ drain(): Array<{
345
+ type: "unsupported-setting" | "other";
346
+ setting?: string;
347
+ details?: string;
348
+ provider: string;
349
+ }>;
350
+ };
351
+
352
+ function buildOpenAIChatRequest(
353
+ modelId: string,
354
+ providerName: string,
355
+ options: OpenAICompatibleLanguageOptions,
356
+ stream: boolean,
357
+ warnings: WarningCollector,
358
+ ): OpenAICompatibleChatRequest {
359
+ const isReasoningModel = isOpenAIReasoningModel(modelId);
360
+ const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
361
+ const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
362
+ const fixedSampling = isFixedSamplingModel(modelId);
363
+ const dropSamplingParams = reasoningEnabled || fixedSampling;
364
+
365
+ // OpenAI Chat Completions has no top_k surface.
366
+ if (options.topK !== undefined) {
367
+ warnings.push({
368
+ type: "unsupported-setting",
369
+ provider: "openai",
370
+ setting: "topK",
371
+ details: "OpenAI Chat Completions does not expose top_k; the value was dropped.",
372
+ });
373
+ }
374
+
375
+ // Reasoning models (o1 / o3 / o4) and models with fixed sampling params
376
+ // reject sampling params outright. Emit warnings.
377
+ if (dropSamplingParams) {
378
+ const dropped: Array<[keyof typeof options, string]> = [
379
+ ["temperature", "temperature"],
380
+ ["topP", "top_p"],
381
+ ["presencePenalty", "presence_penalty"],
382
+ ["frequencyPenalty", "frequency_penalty"],
383
+ ];
384
+ for (const [key, openaiName] of dropped) {
385
+ if (options[key] !== undefined) {
386
+ warnings.push({
387
+ type: "unsupported-setting",
388
+ provider: "openai",
389
+ setting: key,
390
+ details: fixedSampling
391
+ ? `Dropped because this model uses fixed sampling parameters.`
392
+ : `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
393
+ });
394
+ }
395
+ }
396
+ }
397
+
398
+ const body: OpenAICompatibleChatRequest = {
399
+ model: modelId,
400
+ messages: toOpenAICompatibleMessages(options.prompt),
401
+ ...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
402
+ ...(options.maxOutputTokens !== undefined
403
+ ? isNativeOpenAIModel(modelId)
404
+ ? { max_completion_tokens: options.maxOutputTokens }
405
+ : { max_tokens: options.maxOutputTokens }
406
+ : {}),
407
+ ...(!dropSamplingParams && options.temperature !== undefined
408
+ ? { temperature: options.temperature }
409
+ : {}),
410
+ ...(!dropSamplingParams && options.topP !== undefined ? { top_p: options.topP } : {}),
411
+ ...(options.stopSequences && options.stopSequences.length > 0
412
+ ? { stop: options.stopSequences }
413
+ : {}),
414
+ ...(toOpenAICompatibleTools(options.tools)
415
+ ? { tools: toOpenAICompatibleTools(options.tools) }
416
+ : {}),
417
+ ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
418
+ ...(options.seed !== undefined ? { seed: options.seed } : {}),
419
+ ...(!dropSamplingParams && options.presencePenalty !== undefined
420
+ ? { presence_penalty: options.presencePenalty }
421
+ : {}),
422
+ ...(!dropSamplingParams && options.frequencyPenalty !== undefined
423
+ ? { frequency_penalty: options.frequencyPenalty }
424
+ : {}),
425
+ ...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}),
426
+ ...(typeof options.userId === "string" && options.userId.length > 0
427
+ ? { user: options.userId }
428
+ : {}),
429
+ ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
430
+ ...(options.parallelToolCalls !== undefined
431
+ ? { parallel_tool_calls: options.parallelToolCalls }
432
+ : {}),
433
+ ...(options.responseFormat && options.responseFormat.type !== "text"
434
+ ? {
435
+ response_format: options.responseFormat.type === "json" ? { type: "json_object" } : {
436
+ type: "json_schema",
437
+ json_schema: {
438
+ name: options.responseFormat.name,
439
+ ...(typeof options.responseFormat.description === "string"
440
+ ? { description: options.responseFormat.description }
441
+ : {}),
442
+ schema: unwrapToolInputSchema(options.responseFormat.schema),
443
+ ...(options.responseFormat.strict !== undefined
444
+ ? { strict: options.responseFormat.strict }
445
+ : {}),
446
+ },
447
+ },
448
+ }
449
+ : {}),
450
+ };
451
+
452
+ const providerOpts = readProviderOptions(options.providerOptions, "openai", providerName);
453
+
454
+ // Normalize max_tokens → max_completion_tokens for native OpenAI models.
455
+ if (isNativeOpenAIModel(modelId) && "max_tokens" in providerOpts) {
456
+ if (!("max_completion_tokens" in providerOpts)) {
457
+ providerOpts.max_completion_tokens = providerOpts.max_tokens;
458
+ }
459
+ delete providerOpts.max_tokens;
460
+ }
461
+
462
+ Object.assign(body, providerOpts);
463
+ return body;
464
+ }
465
+
466
+ // ---------------------------------------------------------------------------
467
+ // Chat streaming
468
+ // ---------------------------------------------------------------------------
469
+
470
+ function parseSseChunk(chunk: string): {
471
+ events: Array<unknown | "[DONE]">;
472
+ remainder: string;
473
+ } {
474
+ const blocks = chunk.split(/\r?\n\r?\n/);
475
+ const remainder = blocks.pop() ?? "";
476
+ const events = blocks.flatMap((block) => {
477
+ const dataLines = block.split(/\r?\n/)
478
+ .filter((line) => line.startsWith("data:"))
479
+ .map((line) => line.slice(5).trimStart());
480
+
481
+ if (!dataLines.length) {
482
+ return [];
483
+ }
484
+
485
+ const payload = dataLines.join("\n").trim();
486
+ if (payload === "[DONE]") {
487
+ return ["[DONE]" as const];
488
+ }
489
+
490
+ try {
491
+ return [JSON.parse(payload) as unknown];
492
+ } catch {
493
+ return [];
494
+ }
495
+ });
496
+
497
+ return { events, remainder };
498
+ }
499
+
500
+ function extractFirstChoice(payload: unknown): OpenAICompatibleChoice | undefined {
501
+ const record = readRecord(payload);
502
+ const choices = record?.choices;
503
+ if (!Array.isArray(choices) || choices.length === 0) {
504
+ return undefined;
505
+ }
506
+
507
+ const first = readRecord(choices[0]);
508
+ if (!first) {
509
+ return undefined;
510
+ }
511
+
512
+ return first;
513
+ }
514
+
515
+ function buildOpenAIGenerateResult(payload: unknown): {
516
+ content: Array<
517
+ { type: "text"; text: string } | {
518
+ type: "tool-call";
519
+ toolCallId: string;
520
+ toolName: string;
521
+ input: string;
522
+ }
523
+ >;
524
+ finishReason?: string | { unified: string; raw: string } | null;
525
+ usage?: RuntimeUsage;
526
+ } {
527
+ const choice = extractFirstChoice(payload);
528
+ const message = readRecord(choice?.message);
529
+ const text = extractOpenAIContentText(message?.content);
530
+ const toolCalls = message ? extractOpenAIToolCalls(message) : [];
531
+
532
+ return {
533
+ content: [
534
+ ...(text.length > 0 ? [{ type: "text" as const, text }] : []),
535
+ ...toolCalls.map((toolCall) => ({
536
+ type: "tool-call" as const,
537
+ toolCallId: toolCall.toolCallId,
538
+ toolName: toolCall.toolName,
539
+ input: toolCall.input,
540
+ })),
541
+ ],
542
+ finishReason: normalizeOpenAIFinishReason(choice?.finish_reason),
543
+ usage: extractOpenAIUsage(payload),
544
+ };
545
+ }
546
+
547
+ async function* streamOpenAICompatibleParts(
548
+ stream: ReadableStream<Uint8Array>,
549
+ ): AsyncIterable<unknown> {
550
+ const decoder = new TextDecoder();
551
+ let buffer = "";
552
+ const toolCalls = new Map<number, OpenAIStreamToolCallState>();
553
+ let reasoningId: string | null = null;
554
+ let reasoningIndex = 0;
555
+ let finishReason: string | { unified: string; raw: string } | null = null;
556
+ let usage: RuntimeUsage | undefined;
557
+
558
+ for await (const chunk of stream) {
559
+ buffer += decoder.decode(chunk, { stream: true });
560
+ const parsed = parseSseChunk(buffer);
561
+ buffer = parsed.remainder;
562
+
563
+ for (const event of parsed.events) {
564
+ if (event === "[DONE]") {
565
+ continue;
566
+ }
567
+
568
+ const record = readRecord(event);
569
+ usage = extractOpenAIUsage(record) ?? usage;
570
+ const choice = extractFirstChoice(record);
571
+ if (!choice) {
572
+ continue;
573
+ }
574
+
575
+ const delta = readRecord(choice.delta);
576
+ if (typeof delta?.reasoning_content === "string" && delta.reasoning_content.length > 0) {
577
+ if (!reasoningId) {
578
+ reasoningId = `reasoning-${reasoningIndex++}`;
579
+ yield {
580
+ type: "reasoning-start",
581
+ id: reasoningId,
582
+ };
583
+ }
584
+
585
+ yield {
586
+ type: "reasoning-delta",
587
+ id: reasoningId,
588
+ delta: delta.reasoning_content,
589
+ };
590
+ }
591
+
592
+ const textDelta = extractOpenAIContentText(delta?.content);
593
+ if (textDelta.length > 0) {
594
+ if (reasoningId) {
595
+ yield {
596
+ type: "reasoning-end",
597
+ id: reasoningId,
598
+ };
599
+ reasoningId = null;
600
+ }
601
+ yield { type: "text-delta", delta: textDelta };
602
+ }
603
+
604
+ const rawToolCalls = Array.isArray(delta?.tool_calls) ? delta.tool_calls : [];
605
+ for (const rawToolCall of rawToolCalls) {
606
+ if (reasoningId) {
607
+ yield {
608
+ type: "reasoning-end",
609
+ id: reasoningId,
610
+ };
611
+ reasoningId = null;
612
+ }
613
+
614
+ const toolCallRecord = readRecord(rawToolCall);
615
+ const index = typeof toolCallRecord?.index === "number" ? toolCallRecord.index : 0;
616
+ const current = toolCalls.get(index) ?? {
617
+ id: typeof toolCallRecord?.id === "string" ? toolCallRecord.id : `tool-${index}`,
618
+ name: "",
619
+ arguments: "",
620
+ started: false,
621
+ };
622
+
623
+ if (typeof toolCallRecord?.id === "string") {
624
+ current.id = toolCallRecord.id;
625
+ }
626
+
627
+ const fn = readRecord(toolCallRecord?.function);
628
+ if (typeof fn?.name === "string") {
629
+ current.name = fn.name;
630
+ }
631
+
632
+ if (!current.started && current.name.length > 0) {
633
+ current.started = true;
634
+ yield {
635
+ type: "tool-input-start",
636
+ id: current.id,
637
+ toolName: current.name,
638
+ };
639
+ }
640
+
641
+ if (typeof fn?.arguments === "string" && fn.arguments.length > 0) {
642
+ current.arguments += fn.arguments;
643
+ yield {
644
+ type: "tool-input-delta",
645
+ id: current.id,
646
+ delta: fn.arguments,
647
+ };
648
+ }
649
+
650
+ toolCalls.set(index, current);
651
+ }
652
+
653
+ const normalizedFinishReason = normalizeOpenAIFinishReason(choice.finish_reason);
654
+ if (normalizedFinishReason) {
655
+ finishReason = normalizedFinishReason;
656
+ }
657
+ }
658
+ }
659
+
660
+ if (buffer.trim().length > 0) {
661
+ const parsed = parseSseChunk(`${buffer}\n\n`);
662
+ for (const event of parsed.events) {
663
+ if (event === "[DONE]") {
664
+ continue;
665
+ }
666
+
667
+ const record = readRecord(event);
668
+ usage = extractOpenAIUsage(record) ?? usage;
669
+ }
670
+ }
671
+
672
+ if (reasoningId) {
673
+ yield {
674
+ type: "reasoning-end",
675
+ id: reasoningId,
676
+ };
677
+ }
678
+
679
+ if (
680
+ finishReason &&
681
+ typeof finishReason === "object" &&
682
+ finishReason.unified === "tool-calls"
683
+ ) {
684
+ for (const toolCall of toolCalls.values()) {
685
+ yield {
686
+ type: "tool-call",
687
+ toolCallId: toolCall.id,
688
+ toolName: toolCall.name,
689
+ input: toolCall.arguments,
690
+ };
691
+ }
692
+ }
693
+
694
+ yield {
695
+ type: "finish",
696
+ finishReason,
697
+ ...(usage ? { usage } : {}),
698
+ };
699
+ }
700
+
701
+ // ---------------------------------------------------------------------------
702
+ // Responses API types and helpers
703
+ // ---------------------------------------------------------------------------
704
+
705
+ type OpenAIResponsesInputItem = Record<string, unknown>;
706
+
707
+ type OpenAIResponsesRequest = {
708
+ model: string;
709
+ input: OpenAIResponsesInputItem[];
710
+ instructions?: string;
711
+ stream?: boolean;
712
+ max_output_tokens?: number;
713
+ temperature?: number;
714
+ top_p?: number;
715
+ tools?: Array<Record<string, unknown>>;
716
+ tool_choice?: unknown;
717
+ reasoning?: { effort?: string; summary?: string };
718
+ metadata?: Record<string, string>;
719
+ user?: string;
720
+ service_tier?: string;
721
+ parallel_tool_calls?: boolean;
722
+ text?: { format: Record<string, unknown> };
723
+ [key: string]: unknown;
724
+ };
725
+
726
+ /**
727
+ * Convert the unified RuntimePromptMessage[] to the Responses API `input`
728
+ * array shape. Differences from Chat Completions:
729
+ * - System prompts go on the top-level `instructions` field, not inline.
730
+ * - Content parts use `input_text` / `output_text` discriminants instead
731
+ * of the Chat Completions plain-text shorthand.
732
+ * - Assistant tool calls become standalone `function_call` items in the
733
+ * input array, not nested `tool_calls` on a message.
734
+ * - Tool results become standalone `function_call_output` items.
735
+ * - Reasoning content parts roundtrip as `reasoning` items so callers can
736
+ * replay multi-turn conversations with chain-of-thought intact.
737
+ */
738
+ function toOpenAIResponsesInput(
739
+ prompt: RuntimePromptMessage[],
740
+ ): { instructions?: string; input: OpenAIResponsesInputItem[] } {
741
+ const instructionsParts: string[] = [];
742
+ const input: OpenAIResponsesInputItem[] = [];
743
+
744
+ for (const message of prompt) {
745
+ switch (message.role) {
746
+ case "system":
747
+ if (message.content.length > 0) {
748
+ instructionsParts.push(message.content);
749
+ }
750
+ break;
751
+ case "user":
752
+ input.push({
753
+ role: "user",
754
+ content: [{ type: "input_text", text: readTextParts(message.content) }],
755
+ });
756
+ break;
757
+ case "assistant": {
758
+ const messageContent: Array<Record<string, unknown>> = [];
759
+ for (const part of message.content) {
760
+ if (part.type === "text") {
761
+ messageContent.push({ type: "output_text", text: part.text });
762
+ continue;
763
+ }
764
+ if (part.type === "reasoning") {
765
+ // Reasoning items are top-level entries in the input array,
766
+ // not nested inside the assistant message — flush whatever
767
+ // text we've accumulated first, then push the reasoning item.
768
+ if (messageContent.length > 0) {
769
+ input.push({ role: "assistant", content: [...messageContent] });
770
+ messageContent.length = 0;
771
+ }
772
+ const summary: Array<Record<string, unknown>> = [];
773
+ if (typeof part.text === "string" && part.text.length > 0) {
774
+ summary.push({ type: "summary_text", text: part.text });
775
+ }
776
+ input.push({
777
+ type: "reasoning",
778
+ ...(typeof part.signature === "string" ? { encrypted_content: part.signature } : {}),
779
+ summary,
780
+ });
781
+ continue;
782
+ }
783
+ // tool-call: flush message content, then push as standalone
784
+ // function_call item per Responses API shape.
785
+ if (messageContent.length > 0) {
786
+ input.push({ role: "assistant", content: [...messageContent] });
787
+ messageContent.length = 0;
788
+ }
789
+ input.push({
790
+ type: "function_call",
791
+ call_id: part.toolCallId,
792
+ name: part.toolName,
793
+ arguments: stringifyJsonValue(part.input),
794
+ });
795
+ }
796
+ if (messageContent.length > 0) {
797
+ input.push({ role: "assistant", content: messageContent });
798
+ }
799
+ break;
800
+ }
801
+ case "tool":
802
+ for (const part of message.content) {
803
+ input.push({
804
+ type: "function_call_output",
805
+ call_id: part.toolCallId,
806
+ output: stringifyJsonValue(part.output.value),
807
+ });
808
+ }
809
+ break;
810
+ }
811
+ }
812
+
813
+ return {
814
+ ...(instructionsParts.length > 0 ? { instructions: instructionsParts.join("\n\n") } : {}),
815
+ input,
816
+ };
817
+ }
818
+
819
+ /**
820
+ * Tools on the Responses API differ from Chat Completions: instead of
821
+ * `{ type: "function", function: { name, parameters } }` the function
822
+ * shape lifts the name/parameters/strict to the top of the entry. Native
823
+ * tools (web_search, file_search, computer_use, code_interpreter) live
824
+ * alongside function tools in the same array.
825
+ */
826
+ function toOpenAIResponsesTools(
827
+ tools: RuntimeToolDefinition[] | undefined,
828
+ ): Array<Record<string, unknown>> | undefined {
829
+ if (!tools) return undefined;
830
+ const normalized: Array<Record<string, unknown>> = [];
831
+ for (const tool of tools) {
832
+ if (tool.type === "function") {
833
+ normalized.push({
834
+ type: "function",
835
+ name: tool.name,
836
+ ...(typeof tool.description === "string" ? { description: tool.description } : {}),
837
+ parameters: unwrapToolInputSchema(tool.inputSchema),
838
+ });
839
+ continue;
840
+ }
841
+ if (!tool.id.startsWith("openai.")) continue;
842
+ const providerType = tool.id.slice("openai.".length);
843
+ if (providerType.length === 0) continue;
844
+ normalized.push({
845
+ type: providerType,
846
+ ...toSnakeCaseRecord(tool.args),
847
+ });
848
+ }
849
+ return normalized.length > 0 ? normalized : undefined;
850
+ }
851
+
852
+ function buildOpenAIResponsesRequest(
853
+ modelId: string,
854
+ providerName: string,
855
+ options: OpenAICompatibleLanguageOptions,
856
+ stream: boolean,
857
+ warnings: WarningCollector,
858
+ ): OpenAIResponsesRequest {
859
+ const isReasoningModel = isOpenAIReasoningModel(modelId);
860
+ const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
861
+ const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
862
+
863
+ // Same param-sanitization rules as Chat Completions: reasoning models
864
+ // reject sampling params. Drop with a warning.
865
+ if (options.topK !== undefined) {
866
+ warnings.push({
867
+ type: "unsupported-setting",
868
+ provider: "openai",
869
+ setting: "topK",
870
+ details: "OpenAI Responses API does not expose top_k; the value was dropped.",
871
+ });
872
+ }
873
+ if (reasoningEnabled) {
874
+ const dropped: Array<[keyof typeof options, string]> = [
875
+ ["temperature", "temperature"],
876
+ ["topP", "top_p"],
877
+ ["presencePenalty", "presence_penalty"],
878
+ ["frequencyPenalty", "frequency_penalty"],
879
+ ];
880
+ for (const [key, openaiName] of dropped) {
881
+ if (options[key] !== undefined) {
882
+ warnings.push({
883
+ type: "unsupported-setting",
884
+ provider: "openai",
885
+ setting: key,
886
+ details:
887
+ `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
888
+ });
889
+ }
890
+ }
891
+ }
892
+
893
+ const { instructions, input } = toOpenAIResponsesInput(options.prompt);
894
+ const responsesTools = toOpenAIResponsesTools(options.tools);
895
+
896
+ const body: OpenAIResponsesRequest = {
897
+ model: modelId,
898
+ input,
899
+ ...(instructions !== undefined ? { instructions } : {}),
900
+ ...(stream ? { stream: true } : {}),
901
+ ...(options.maxOutputTokens !== undefined
902
+ ? { max_output_tokens: options.maxOutputTokens }
903
+ : {}),
904
+ ...(!reasoningEnabled && options.temperature !== undefined
905
+ ? { temperature: options.temperature }
906
+ : {}),
907
+ ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
908
+ ...(responsesTools ? { tools: responsesTools } : {}),
909
+ ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
910
+ // The Responses API surfaces reasoning effort + summary verbosity
911
+ // in a structured `reasoning` object instead of a flat field.
912
+ ...(reasoningEffort !== undefined
913
+ ? { reasoning: { effort: reasoningEffort, summary: "auto" } }
914
+ : {}),
915
+ ...(typeof options.userId === "string" && options.userId.length > 0
916
+ ? { user: options.userId }
917
+ : {}),
918
+ ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
919
+ ...(options.parallelToolCalls !== undefined
920
+ ? { parallel_tool_calls: options.parallelToolCalls }
921
+ : {}),
922
+ // Responses API uses `text.format` instead of Chat Completions'
923
+ // `response_format`. The shape is similar but nested under `text`.
924
+ ...(options.responseFormat && options.responseFormat.type !== "text"
925
+ ? {
926
+ text: {
927
+ format: options.responseFormat.type === "json" ? { type: "json_object" } : {
928
+ type: "json_schema",
929
+ name: options.responseFormat.name,
930
+ ...(typeof options.responseFormat.description === "string"
931
+ ? { description: options.responseFormat.description }
932
+ : {}),
933
+ schema: unwrapToolInputSchema(options.responseFormat.schema),
934
+ ...(options.responseFormat.strict !== undefined
935
+ ? { strict: options.responseFormat.strict }
936
+ : {}),
937
+ },
938
+ },
939
+ }
940
+ : {}),
941
+ };
942
+
943
+ Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
944
+ return body;
945
+ }
946
+
947
+ /**
948
+ * The Responses API uses `input_tokens` / `output_tokens` field names
949
+ * instead of Chat Completions' `prompt_tokens` / `completion_tokens`.
950
+ */
951
+ function extractOpenAIResponsesUsage(payload: unknown): RuntimeUsage | undefined {
952
+ const record = readRecord(payload);
953
+ // Streaming usage lives on response.completed inside `response.usage`;
954
+ // non-streaming has it at the top level.
955
+ const responseRecord = readRecord(record?.response);
956
+ const usage = readRecord(responseRecord?.usage) ?? readRecord(record?.usage);
957
+ if (!usage) return undefined;
958
+
959
+ const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : undefined;
960
+ const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : undefined;
961
+ const totalTokens = typeof usage.total_tokens === "number"
962
+ ? usage.total_tokens
963
+ : (inputTokens !== undefined || outputTokens !== undefined
964
+ ? (inputTokens ?? 0) + (outputTokens ?? 0)
965
+ : undefined);
966
+ const inputDetails = readRecord(usage.input_tokens_details);
967
+ const cachedTokens = inputDetails?.cached_tokens;
968
+
969
+ return {
970
+ inputTokens,
971
+ outputTokens,
972
+ totalTokens,
973
+ ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
974
+ };
975
+ }
976
+
977
+ function normalizeOpenAIResponsesFinishReason(
978
+ raw: unknown,
979
+ ): string | { unified: string; raw: string } | null {
980
+ if (typeof raw !== "string") return null;
981
+ switch (raw) {
982
+ case "completed":
983
+ return { unified: "stop", raw };
984
+ case "incomplete":
985
+ return { unified: "length", raw };
986
+ case "failed":
987
+ return { unified: "error", raw };
988
+ case "in_progress":
989
+ return null;
990
+ default:
991
+ return raw;
992
+ }
993
+ }
994
+
995
+ type OpenAIResponsesContentPart =
996
+ | { type: "text"; text: string }
997
+ | {
998
+ type: "reasoning";
999
+ summaries?: Array<{ id?: string; text: string }>;
1000
+ signature?: string;
1001
+ }
1002
+ | { type: "tool-call"; toolCallId: string; toolName: string; input: string };
1003
+
1004
+ function buildOpenAIResponsesGenerateResult(payload: unknown): {
1005
+ content: OpenAIResponsesContentPart[];
1006
+ finishReason?: string | { unified: string; raw: string } | null;
1007
+ usage?: RuntimeUsage;
1008
+ } {
1009
+ const record = readRecord(payload);
1010
+ const output = Array.isArray(record?.output) ? record.output : [];
1011
+ const content: OpenAIResponsesContentPart[] = [];
1012
+
1013
+ for (const item of output) {
1014
+ const itemRecord = readRecord(item);
1015
+ const itemType = typeof itemRecord?.type === "string" ? itemRecord.type : undefined;
1016
+
1017
+ if (itemType === "message" && Array.isArray(itemRecord?.content)) {
1018
+ // A message item bundles one or more output_text parts.
1019
+ let text = "";
1020
+ for (const part of itemRecord.content) {
1021
+ const p = readRecord(part);
1022
+ if (typeof p?.type === "string" && p.type === "output_text" && typeof p.text === "string") {
1023
+ text += p.text;
1024
+ }
1025
+ }
1026
+ if (text.length > 0) {
1027
+ content.push({ type: "text", text });
1028
+ }
1029
+ continue;
1030
+ }
1031
+
1032
+ if (itemType === "function_call") {
1033
+ content.push({
1034
+ type: "tool-call",
1035
+ toolCallId: typeof itemRecord?.call_id === "string"
1036
+ ? itemRecord.call_id
1037
+ : (typeof itemRecord?.id === "string" ? itemRecord.id : ""),
1038
+ toolName: typeof itemRecord?.name === "string" ? itemRecord.name : "",
1039
+ input: typeof itemRecord?.arguments === "string"
1040
+ ? itemRecord.arguments
1041
+ : stringifyJsonValue(itemRecord?.arguments ?? {}),
1042
+ });
1043
+ continue;
1044
+ }
1045
+
1046
+ if (itemType === "reasoning") {
1047
+ const summary = Array.isArray(itemRecord?.summary) ? itemRecord.summary : [];
1048
+ const summaries: Array<{ id?: string; text: string }> = [];
1049
+ for (const s of summary) {
1050
+ const sr = readRecord(s);
1051
+ if (typeof sr?.text === "string" && sr.text.length > 0) {
1052
+ summaries.push({
1053
+ ...(typeof sr?.id === "string" ? { id: sr.id } : {}),
1054
+ text: sr.text,
1055
+ });
1056
+ }
1057
+ }
1058
+ content.push({
1059
+ type: "reasoning",
1060
+ ...(summaries.length > 0 ? { summaries } : {}),
1061
+ ...(typeof itemRecord?.encrypted_content === "string"
1062
+ ? { signature: itemRecord.encrypted_content }
1063
+ : {}),
1064
+ });
1065
+ continue;
1066
+ }
1067
+ }
1068
+
1069
+ return {
1070
+ content,
1071
+ finishReason: normalizeOpenAIResponsesFinishReason(record?.status),
1072
+ usage: extractOpenAIResponsesUsage(payload),
1073
+ };
1074
+ }
1075
+
1076
+ type OpenAIResponsesStreamReasoningState = {
1077
+ id: string;
1078
+ emittedStart: boolean;
1079
+ };
1080
+
1081
+ type OpenAIResponsesStreamFunctionCallState = {
1082
+ id: string;
1083
+ toolCallId: string;
1084
+ name: string;
1085
+ arguments: string;
1086
+ };
1087
+
1088
+ /**
1089
+ * Parse the Responses API streaming event grammar into the same UI part
1090
+ * shapes the existing OpenAI / Anthropic / Google streams emit.
1091
+ */
1092
+ async function* streamOpenAIResponsesParts(
1093
+ stream: ReadableStream<Uint8Array>,
1094
+ ): AsyncIterable<unknown> {
1095
+ const decoder = new TextDecoder();
1096
+ let buffer = "";
1097
+ const reasoningBlocks = new Map<string, OpenAIResponsesStreamReasoningState>();
1098
+ const functionCalls = new Map<string, OpenAIResponsesStreamFunctionCallState>();
1099
+ const startedToolCalls = new Set<string>();
1100
+ let finishReason: string | { unified: string; raw: string } | null = null;
1101
+ let usage: RuntimeUsage | undefined;
1102
+ let reasoningCounter = 0;
1103
+
1104
+ for await (const chunk of stream) {
1105
+ buffer += decoder.decode(chunk, { stream: true });
1106
+ const parsed = parseSseChunk(buffer);
1107
+ buffer = parsed.remainder;
1108
+
1109
+ for (const event of parsed.events) {
1110
+ if (event === "[DONE]") continue;
1111
+ const record = readRecord(event);
1112
+ const type = typeof record?.type === "string" ? record.type : undefined;
1113
+ if (!type) continue;
1114
+
1115
+ // response.output_item.added: a new output item begins.
1116
+ if (type === "response.output_item.added") {
1117
+ const item = readRecord(record?.item);
1118
+ const itemType = typeof item?.type === "string" ? item.type : undefined;
1119
+ const itemId = typeof item?.id === "string" ? item.id : undefined;
1120
+ if (itemType === "function_call" && itemId) {
1121
+ const callId = typeof item?.call_id === "string" ? item.call_id : itemId;
1122
+ const name = typeof item?.name === "string" ? item.name : "";
1123
+ functionCalls.set(itemId, {
1124
+ id: itemId,
1125
+ toolCallId: callId,
1126
+ name,
1127
+ arguments: "",
1128
+ });
1129
+ }
1130
+ if (itemType === "reasoning" && itemId) {
1131
+ reasoningBlocks.set(itemId, {
1132
+ id: `reasoning-${reasoningCounter++}`,
1133
+ emittedStart: false,
1134
+ });
1135
+ }
1136
+ continue;
1137
+ }
1138
+
1139
+ // response.output_text.delta: text chunk for a message item.
1140
+ if (type === "response.output_text.delta" && typeof record?.delta === "string") {
1141
+ if (record.delta.length > 0) {
1142
+ yield { type: "text-delta", delta: record.delta };
1143
+ }
1144
+ continue;
1145
+ }
1146
+
1147
+ // response.reasoning_summary_text.delta: reasoning summary text chunk.
1148
+ if (type === "response.reasoning_summary_text.delta" && typeof record?.delta === "string") {
1149
+ const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
1150
+ const state = itemId ? reasoningBlocks.get(itemId) : undefined;
1151
+ if (state && record.delta.length > 0) {
1152
+ if (!state.emittedStart) {
1153
+ yield { type: "reasoning-start", id: state.id };
1154
+ state.emittedStart = true;
1155
+ }
1156
+ yield { type: "reasoning-delta", id: state.id, delta: record.delta };
1157
+ }
1158
+ continue;
1159
+ }
1160
+
1161
+ // response.function_call_arguments.delta: tool call argument chunk.
1162
+ if (type === "response.function_call_arguments.delta" && typeof record?.delta === "string") {
1163
+ const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
1164
+ const state = itemId ? functionCalls.get(itemId) : undefined;
1165
+ if (state && record.delta.length > 0) {
1166
+ if (!startedToolCalls.has(state.id)) {
1167
+ yield {
1168
+ type: "tool-input-start",
1169
+ id: state.toolCallId,
1170
+ toolName: state.name,
1171
+ };
1172
+ startedToolCalls.add(state.id);
1173
+ }
1174
+ state.arguments += record.delta;
1175
+ yield {
1176
+ type: "tool-input-delta",
1177
+ id: state.toolCallId,
1178
+ delta: record.delta,
1179
+ };
1180
+ }
1181
+ continue;
1182
+ }
1183
+
1184
+ // response.output_item.done: an item has finished emitting deltas.
1185
+ if (type === "response.output_item.done") {
1186
+ const item = readRecord(record?.item);
1187
+ const itemType = typeof item?.type === "string" ? item.type : undefined;
1188
+ const itemId = typeof item?.id === "string" ? item.id : undefined;
1189
+ if (itemType === "reasoning" && itemId) {
1190
+ const state = reasoningBlocks.get(itemId);
1191
+ if (state?.emittedStart) {
1192
+ yield { type: "reasoning-end", id: state.id };
1193
+ }
1194
+ reasoningBlocks.delete(itemId);
1195
+ }
1196
+ if (itemType === "function_call" && itemId) {
1197
+ const state = functionCalls.get(itemId);
1198
+ if (state) {
1199
+ yield {
1200
+ type: "tool-call",
1201
+ toolCallId: state.toolCallId,
1202
+ toolName: state.name,
1203
+ input: state.arguments,
1204
+ };
1205
+ }
1206
+ functionCalls.delete(itemId);
1207
+ }
1208
+ continue;
1209
+ }
1210
+
1211
+ // response.completed: terminal event with the final response object.
1212
+ if (type === "response.completed") {
1213
+ usage = extractOpenAIResponsesUsage(record) ?? usage;
1214
+ const responseRecord = readRecord(record?.response);
1215
+ finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status);
1216
+ continue;
1217
+ }
1218
+
1219
+ if (type === "response.failed" || type === "response.incomplete") {
1220
+ const responseRecord = readRecord(record?.response);
1221
+ finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status) ??
1222
+ (type === "response.failed"
1223
+ ? { unified: "error", raw: "failed" }
1224
+ : { unified: "length", raw: "incomplete" });
1225
+ usage = extractOpenAIResponsesUsage(record) ?? usage;
1226
+ continue;
1227
+ }
1228
+ }
1229
+ }
1230
+
1231
+ // Close any reasoning streams still open at end-of-stream (defensive).
1232
+ for (const state of reasoningBlocks.values()) {
1233
+ if (state.emittedStart) {
1234
+ yield { type: "reasoning-end", id: state.id };
1235
+ }
1236
+ }
1237
+
1238
+ yield {
1239
+ type: "finish",
1240
+ finishReason,
1241
+ ...(usage ? { usage } : {}),
1242
+ };
1243
+ }
1244
+
1245
+ // ---------------------------------------------------------------------------
1246
+ // Public factory functions
1247
+ // ---------------------------------------------------------------------------
1248
+
1249
+ export function createOpenAIModelRuntime(
1250
+ config: OpenAIRuntimeConfig,
1251
+ modelId: string,
1252
+ ): ModelRuntime {
1253
+ const fetchImpl = config.fetch ?? globalThis.fetch;
1254
+ return {
1255
+ provider: config.name ?? "openai",
1256
+ modelId,
1257
+ specificationVersion: "v3",
1258
+ supportedUrls: {},
1259
+ doGenerate(optionsForRuntime: unknown) {
1260
+ const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
1261
+ const url = getOpenAIChatCompletionsUrl(config.baseURL);
1262
+ const warnings = createWarningCollector();
1263
+ const body = buildOpenAIChatRequest(
1264
+ modelId,
1265
+ config.name ?? "openai",
1266
+ options,
1267
+ false,
1268
+ warnings,
1269
+ );
1270
+ return requestJson({
1271
+ url,
1272
+ fetchImpl,
1273
+ providerLabel: config.name ?? "openai",
1274
+ providerKind: "openai",
1275
+ init: createOpenAIRequestInit({
1276
+ apiKey: config.apiKey,
1277
+ extraHeaders: options.headers,
1278
+ body: JSON.stringify(body),
1279
+ signal: options.abortSignal,
1280
+ }),
1281
+ }).then((payload) => {
1282
+ const drained = warnings.drain();
1283
+ return {
1284
+ ...buildOpenAIGenerateResult(payload),
1285
+ ...(drained.length > 0 ? { warnings: drained } : {}),
1286
+ };
1287
+ });
1288
+ },
1289
+ doStream(optionsForRuntime: unknown) {
1290
+ const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
1291
+ const url = getOpenAIChatCompletionsUrl(config.baseURL);
1292
+ const warnings = createWarningCollector();
1293
+ const body = buildOpenAIChatRequest(
1294
+ modelId,
1295
+ config.name ?? "openai",
1296
+ options,
1297
+ true,
1298
+ warnings,
1299
+ );
1300
+ return requestStream({
1301
+ url,
1302
+ fetchImpl,
1303
+ providerLabel: config.name ?? "openai",
1304
+ providerKind: "openai",
1305
+ init: createOpenAIRequestInit({
1306
+ apiKey: config.apiKey,
1307
+ extraHeaders: options.headers,
1308
+ body: JSON.stringify(body),
1309
+ signal: options.abortSignal,
1310
+ }),
1311
+ }).then((responseStream) => {
1312
+ const drained = warnings.drain();
1313
+ return {
1314
+ stream: ReadableStream.from(
1315
+ withToolInputStatusTransitions(streamOpenAICompatibleParts(responseStream)),
1316
+ ),
1317
+ ...(drained.length > 0 ? { warnings: drained } : {}),
1318
+ };
1319
+ });
1320
+ },
1321
+ };
1322
+ }
1323
+
1324
+ export function createOpenAIResponsesRuntime(
1325
+ config: OpenAIRuntimeConfig,
1326
+ modelId: string,
1327
+ ): ModelRuntime {
1328
+ const fetchImpl = config.fetch ?? globalThis.fetch;
1329
+ return {
1330
+ provider: config.name ?? "openai",
1331
+ modelId,
1332
+ specificationVersion: "v3",
1333
+ supportedUrls: {},
1334
+ doGenerate(optionsForRuntime: unknown) {
1335
+ const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
1336
+ const url = getOpenAIResponsesUrl(config.baseURL);
1337
+ const warnings = createWarningCollector();
1338
+ const body = buildOpenAIResponsesRequest(
1339
+ modelId,
1340
+ config.name ?? "openai",
1341
+ options,
1342
+ false,
1343
+ warnings,
1344
+ );
1345
+ return requestJson({
1346
+ url,
1347
+ fetchImpl,
1348
+ providerLabel: config.name ?? "openai",
1349
+ providerKind: "openai",
1350
+ init: createOpenAIRequestInit({
1351
+ apiKey: config.apiKey,
1352
+ extraHeaders: options.headers,
1353
+ body: JSON.stringify(body),
1354
+ signal: options.abortSignal,
1355
+ }),
1356
+ }).then((payload) => {
1357
+ const drained = warnings.drain();
1358
+ return {
1359
+ ...buildOpenAIResponsesGenerateResult(payload),
1360
+ ...(drained.length > 0 ? { warnings: drained } : {}),
1361
+ };
1362
+ });
1363
+ },
1364
+ doStream(optionsForRuntime: unknown) {
1365
+ const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
1366
+ const url = getOpenAIResponsesUrl(config.baseURL);
1367
+ const warnings = createWarningCollector();
1368
+ const body = buildOpenAIResponsesRequest(
1369
+ modelId,
1370
+ config.name ?? "openai",
1371
+ options,
1372
+ true,
1373
+ warnings,
1374
+ );
1375
+ return requestStream({
1376
+ url,
1377
+ fetchImpl,
1378
+ providerLabel: config.name ?? "openai",
1379
+ providerKind: "openai",
1380
+ init: createOpenAIRequestInit({
1381
+ apiKey: config.apiKey,
1382
+ extraHeaders: options.headers,
1383
+ body: JSON.stringify(body),
1384
+ signal: options.abortSignal,
1385
+ }),
1386
+ }).then((responseStream) => {
1387
+ const drained = warnings.drain();
1388
+ return {
1389
+ stream: ReadableStream.from(
1390
+ withToolInputStatusTransitions(streamOpenAIResponsesParts(responseStream)),
1391
+ ),
1392
+ ...(drained.length > 0 ? { warnings: drained } : {}),
1393
+ };
1394
+ });
1395
+ },
1396
+ };
1397
+ }
1398
+
1399
+ export function createOpenAIEmbeddingRuntime(
1400
+ config: OpenAIRuntimeConfig,
1401
+ modelId: string,
1402
+ ): EmbeddingRuntime {
1403
+ const fetchImpl = config.fetch ?? globalThis.fetch;
1404
+ return {
1405
+ provider: config.name ?? "openai",
1406
+ modelId,
1407
+ supportsParallelCalls: true,
1408
+ doEmbed({ values, abortSignal }) {
1409
+ if (values.length === 0) {
1410
+ return Promise.resolve({
1411
+ embeddings: [],
1412
+ warnings: [],
1413
+ rawResponse: { data: [] },
1414
+ });
1415
+ }
1416
+
1417
+ const url = getOpenAIEmbeddingUrl(config.baseURL);
1418
+ return requestJson({
1419
+ url,
1420
+ fetchImpl,
1421
+ providerLabel: config.name ?? "openai",
1422
+ providerKind: "openai",
1423
+ init: createOpenAIRequestInit({
1424
+ apiKey: config.apiKey,
1425
+ body: JSON.stringify({
1426
+ model: modelId,
1427
+ input: values,
1428
+ }),
1429
+ signal: abortSignal,
1430
+ }),
1431
+ }).then((payload) => ({
1432
+ embeddings: extractOpenAIEmbeddings(payload),
1433
+ usage: {
1434
+ tokens: extractOpenAIUsageTokens(payload),
1435
+ },
1436
+ rawResponse: payload,
1437
+ warnings: [],
1438
+ }));
1439
+ },
1440
+ };
1441
+ }
1442
+
1443
+ export class OpenAIProvider implements AIProvider {
1444
+ readonly id = "openai";
1445
+
1446
+ createModel(modelId: string, config: AIProviderConfig): ModelRuntime {
1447
+ return createOpenAIModelRuntime(
1448
+ {
1449
+ apiKey: config.credential,
1450
+ baseURL: config.baseURL,
1451
+ name: config.name ?? "openai",
1452
+ fetch: config.fetch,
1453
+ },
1454
+ modelId,
1455
+ );
1456
+ }
1457
+
1458
+ createEmbedding(modelId: string, config: AIProviderConfig): EmbeddingRuntime {
1459
+ return createOpenAIEmbeddingRuntime(
1460
+ {
1461
+ apiKey: config.credential,
1462
+ baseURL: config.baseURL,
1463
+ name: config.name ?? "openai",
1464
+ fetch: config.fetch,
1465
+ },
1466
+ modelId,
1467
+ );
1468
+ }
1469
+
1470
+ createResponses(modelId: string, config: AIProviderConfig): ModelRuntime {
1471
+ return createOpenAIResponsesRuntime(
1472
+ {
1473
+ apiKey: config.credential,
1474
+ baseURL: config.baseURL,
1475
+ name: config.name ?? "openai",
1476
+ fetch: config.fetch,
1477
+ },
1478
+ modelId,
1479
+ );
1480
+ }
1481
+ }