veryfront 0.1.208 → 0.1.210

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -396,6 +396,10 @@ function getOpenAIChatCompletionsUrl(baseURL?: string): string {
396
396
  return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "chat/completions");
397
397
  }
398
398
 
399
+ function getOpenAIResponsesUrl(baseURL?: string): string {
400
+ return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "responses");
401
+ }
402
+
399
403
  function getGoogleGenerateContentUrl(baseURL: string | undefined, modelId: string): string {
400
404
  return joinUrl(
401
405
  baseURL ?? DEFAULT_GOOGLE_BASE_URL,
@@ -2905,6 +2909,657 @@ export function createOpenAIModelRuntime(
2905
2909
  };
2906
2910
  }
2907
2911
 
2912
+ // =============================================================================
2913
+ // OpenAI Responses API runtime (#1077, deferred from #1052 C4)
2914
+ // =============================================================================
2915
+ //
2916
+ // The Responses API (/v1/responses) is a different surface than Chat
2917
+ // Completions. Same provider, different request shape, different streaming
2918
+ // event grammar, different response shape, and different reasoning-summary
2919
+ // surface. This runtime is parallel to createOpenAIModelRuntime so each
2920
+ // path stays focused on one wire format.
2921
+ //
2922
+ // Why parallel runtimes instead of a flag? See the rationale in #1077.
2923
+ //
2924
+ // docs: https://platform.openai.com/docs/api-reference/responses
2925
+
2926
+ type OpenAIResponsesInputItem = Record<string, unknown>;
2927
+
2928
+ type OpenAIResponsesRequest = {
2929
+ model: string;
2930
+ input: OpenAIResponsesInputItem[];
2931
+ instructions?: string;
2932
+ stream?: boolean;
2933
+ max_output_tokens?: number;
2934
+ temperature?: number;
2935
+ top_p?: number;
2936
+ tools?: Array<Record<string, unknown>>;
2937
+ tool_choice?: unknown;
2938
+ reasoning?: { effort?: string; summary?: string };
2939
+ metadata?: Record<string, string>;
2940
+ user?: string;
2941
+ service_tier?: string;
2942
+ parallel_tool_calls?: boolean;
2943
+ text?: { format: Record<string, unknown> };
2944
+ [key: string]: unknown;
2945
+ };
2946
+
2947
+ /**
2948
+ * Convert the unified RuntimePromptMessage[] to the Responses API `input`
2949
+ * array shape. Differences from Chat Completions:
2950
+ * - System prompts go on the top-level `instructions` field, not inline.
2951
+ * - Content parts use `input_text` / `output_text` discriminants instead
2952
+ * of the Chat Completions plain-text shorthand.
2953
+ * - Assistant tool calls become standalone `function_call` items in the
2954
+ * input array, not nested `tool_calls` on a message.
2955
+ * - Tool results become standalone `function_call_output` items.
2956
+ * - Reasoning content parts roundtrip as `reasoning` items so callers can
2957
+ * replay multi-turn conversations with chain-of-thought intact.
2958
+ */
2959
+ function toOpenAIResponsesInput(
2960
+ prompt: RuntimePromptMessage[],
2961
+ ): { instructions?: string; input: OpenAIResponsesInputItem[] } {
2962
+ const instructionsParts: string[] = [];
2963
+ const input: OpenAIResponsesInputItem[] = [];
2964
+
2965
+ for (const message of prompt) {
2966
+ switch (message.role) {
2967
+ case "system":
2968
+ if (message.content.length > 0) {
2969
+ instructionsParts.push(message.content);
2970
+ }
2971
+ break;
2972
+ case "user":
2973
+ input.push({
2974
+ role: "user",
2975
+ content: [{ type: "input_text", text: readTextParts(message.content) }],
2976
+ });
2977
+ break;
2978
+ case "assistant": {
2979
+ const messageContent: Array<Record<string, unknown>> = [];
2980
+ for (const part of message.content) {
2981
+ if (part.type === "text") {
2982
+ messageContent.push({ type: "output_text", text: part.text });
2983
+ continue;
2984
+ }
2985
+ if (part.type === "reasoning") {
2986
+ // Reasoning items are top-level entries in the input array,
2987
+ // not nested inside the assistant message — flush whatever
2988
+ // text we've accumulated first, then push the reasoning item.
2989
+ if (messageContent.length > 0) {
2990
+ input.push({ role: "assistant", content: [...messageContent] });
2991
+ messageContent.length = 0;
2992
+ }
2993
+ const summary: Array<Record<string, unknown>> = [];
2994
+ if (typeof part.text === "string" && part.text.length > 0) {
2995
+ summary.push({ type: "summary_text", text: part.text });
2996
+ }
2997
+ input.push({
2998
+ type: "reasoning",
2999
+ ...(typeof part.signature === "string" ? { encrypted_content: part.signature } : {}),
3000
+ summary,
3001
+ });
3002
+ continue;
3003
+ }
3004
+ // tool-call: flush message content, then push as standalone
3005
+ // function_call item per Responses API shape.
3006
+ if (messageContent.length > 0) {
3007
+ input.push({ role: "assistant", content: [...messageContent] });
3008
+ messageContent.length = 0;
3009
+ }
3010
+ input.push({
3011
+ type: "function_call",
3012
+ call_id: part.toolCallId,
3013
+ name: part.toolName,
3014
+ arguments: stringifyJsonValue(part.input),
3015
+ });
3016
+ }
3017
+ if (messageContent.length > 0) {
3018
+ input.push({ role: "assistant", content: messageContent });
3019
+ }
3020
+ break;
3021
+ }
3022
+ case "tool":
3023
+ for (const part of message.content) {
3024
+ input.push({
3025
+ type: "function_call_output",
3026
+ call_id: part.toolCallId,
3027
+ output: stringifyJsonValue(part.output.value),
3028
+ });
3029
+ }
3030
+ break;
3031
+ }
3032
+ }
3033
+
3034
+ return {
3035
+ ...(instructionsParts.length > 0 ? { instructions: instructionsParts.join("\n\n") } : {}),
3036
+ input,
3037
+ };
3038
+ }
3039
+
3040
+ /**
3041
+ * Tools on the Responses API differ from Chat Completions: instead of
3042
+ * `{ type: "function", function: { name, parameters } }` the function
3043
+ * shape lifts the name/parameters/strict to the top of the entry. Native
3044
+ * tools (web_search, file_search, computer_use, code_interpreter) live
3045
+ * alongside function tools in the same array.
3046
+ */
3047
+ function toOpenAIResponsesTools(
3048
+ tools: RuntimeToolDefinition[] | undefined,
3049
+ ): Array<Record<string, unknown>> | undefined {
3050
+ if (!tools) return undefined;
3051
+ const normalized: Array<Record<string, unknown>> = [];
3052
+ for (const tool of tools) {
3053
+ if (tool.type === "function") {
3054
+ normalized.push({
3055
+ type: "function",
3056
+ name: tool.name,
3057
+ ...(typeof tool.description === "string" ? { description: tool.description } : {}),
3058
+ parameters: unwrapToolInputSchema(tool.inputSchema),
3059
+ });
3060
+ continue;
3061
+ }
3062
+ if (!tool.id.startsWith("openai.")) continue;
3063
+ const providerType = tool.id.slice("openai.".length);
3064
+ if (providerType.length === 0) continue;
3065
+ normalized.push({
3066
+ type: providerType,
3067
+ ...toSnakeCaseRecord(tool.args),
3068
+ });
3069
+ }
3070
+ return normalized.length > 0 ? normalized : undefined;
3071
+ }
3072
+
3073
+ function buildOpenAIResponsesRequest(
3074
+ modelId: string,
3075
+ providerName: string,
3076
+ options: OpenAICompatibleLanguageOptions,
3077
+ stream: boolean,
3078
+ warnings: WarningCollector,
3079
+ ): OpenAIResponsesRequest {
3080
+ const isReasoningModel = isOpenAIReasoningModel(modelId);
3081
+ const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
3082
+ const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
3083
+
3084
+ // Same param-sanitization rules as Chat Completions: reasoning models
3085
+ // reject sampling params. Drop with a warning.
3086
+ if (options.topK !== undefined) {
3087
+ warnings.push({
3088
+ type: "unsupported-setting",
3089
+ provider: "openai",
3090
+ setting: "topK",
3091
+ details: "OpenAI Responses API does not expose top_k; the value was dropped.",
3092
+ });
3093
+ }
3094
+ if (reasoningEnabled) {
3095
+ const dropped: Array<[keyof typeof options, string]> = [
3096
+ ["temperature", "temperature"],
3097
+ ["topP", "top_p"],
3098
+ ["presencePenalty", "presence_penalty"],
3099
+ ["frequencyPenalty", "frequency_penalty"],
3100
+ ];
3101
+ for (const [key, openaiName] of dropped) {
3102
+ if (options[key] !== undefined) {
3103
+ warnings.push({
3104
+ type: "unsupported-setting",
3105
+ provider: "openai",
3106
+ setting: key,
3107
+ details:
3108
+ `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
3109
+ });
3110
+ }
3111
+ }
3112
+ }
3113
+
3114
+ const { instructions, input } = toOpenAIResponsesInput(options.prompt);
3115
+ const responsesTools = toOpenAIResponsesTools(options.tools);
3116
+
3117
+ const body: OpenAIResponsesRequest = {
3118
+ model: modelId,
3119
+ input,
3120
+ ...(instructions !== undefined ? { instructions } : {}),
3121
+ ...(stream ? { stream: true } : {}),
3122
+ ...(options.maxOutputTokens !== undefined
3123
+ ? { max_output_tokens: options.maxOutputTokens }
3124
+ : {}),
3125
+ ...(!reasoningEnabled && options.temperature !== undefined
3126
+ ? { temperature: options.temperature }
3127
+ : {}),
3128
+ ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
3129
+ ...(responsesTools ? { tools: responsesTools } : {}),
3130
+ ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
3131
+ // The Responses API surfaces reasoning effort + summary verbosity
3132
+ // in a structured `reasoning` object instead of a flat field. We
3133
+ // request "auto" summary so callers see structured summary parts
3134
+ // without having to opt into them per request.
3135
+ ...(reasoningEffort !== undefined
3136
+ ? { reasoning: { effort: reasoningEffort, summary: "auto" } }
3137
+ : {}),
3138
+ ...(typeof options.userId === "string" && options.userId.length > 0
3139
+ ? { user: options.userId }
3140
+ : {}),
3141
+ ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
3142
+ ...(options.parallelToolCalls !== undefined
3143
+ ? { parallel_tool_calls: options.parallelToolCalls }
3144
+ : {}),
3145
+ // Responses API uses `text.format` instead of Chat Completions'
3146
+ // `response_format`. The shape is similar but nested under `text`.
3147
+ ...(options.responseFormat && options.responseFormat.type !== "text"
3148
+ ? {
3149
+ text: {
3150
+ format: options.responseFormat.type === "json" ? { type: "json_object" } : {
3151
+ type: "json_schema",
3152
+ name: options.responseFormat.name,
3153
+ ...(typeof options.responseFormat.description === "string"
3154
+ ? { description: options.responseFormat.description }
3155
+ : {}),
3156
+ schema: unwrapToolInputSchema(options.responseFormat.schema),
3157
+ ...(options.responseFormat.strict !== undefined
3158
+ ? { strict: options.responseFormat.strict }
3159
+ : {}),
3160
+ },
3161
+ },
3162
+ }
3163
+ : {}),
3164
+ };
3165
+
3166
+ Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
3167
+ return body;
3168
+ }
3169
+
3170
+ /**
3171
+ * The Responses API uses `input_tokens` / `output_tokens` field names
3172
+ * instead of Chat Completions' `prompt_tokens` / `completion_tokens`.
3173
+ * It also nests cached input tokens under `input_tokens_details` and
3174
+ * exposes reasoning tokens via `output_tokens_details.reasoning_tokens`.
3175
+ */
3176
+ function extractOpenAIResponsesUsage(payload: unknown): RuntimeUsage | undefined {
3177
+ const record = readRecord(payload);
3178
+ // Streaming usage lives on response.completed inside `response.usage`;
3179
+ // non-streaming has it at the top level.
3180
+ const responseRecord = readRecord(record?.response);
3181
+ const usage = readRecord(responseRecord?.usage) ?? readRecord(record?.usage);
3182
+ if (!usage) return undefined;
3183
+
3184
+ const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : undefined;
3185
+ const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : undefined;
3186
+ const totalTokens = typeof usage.total_tokens === "number"
3187
+ ? usage.total_tokens
3188
+ : (inputTokens !== undefined || outputTokens !== undefined
3189
+ ? (inputTokens ?? 0) + (outputTokens ?? 0)
3190
+ : undefined);
3191
+ const inputDetails = readRecord(usage.input_tokens_details);
3192
+ const cachedTokens = inputDetails?.cached_tokens;
3193
+
3194
+ return {
3195
+ inputTokens,
3196
+ outputTokens,
3197
+ totalTokens,
3198
+ ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
3199
+ };
3200
+ }
3201
+
3202
+ function normalizeOpenAIResponsesFinishReason(
3203
+ raw: unknown,
3204
+ ): string | { unified: string; raw: string } | null {
3205
+ if (typeof raw !== "string") return null;
3206
+ switch (raw) {
3207
+ case "completed":
3208
+ return { unified: "stop", raw };
3209
+ case "incomplete":
3210
+ return { unified: "length", raw };
3211
+ case "failed":
3212
+ return { unified: "error", raw };
3213
+ case "in_progress":
3214
+ return null;
3215
+ default:
3216
+ return raw;
3217
+ }
3218
+ }
3219
+
3220
+ type OpenAIResponsesContentPart =
3221
+ | { type: "text"; text: string }
3222
+ | {
3223
+ type: "reasoning";
3224
+ summaries?: Array<{ id?: string; text: string }>;
3225
+ signature?: string;
3226
+ }
3227
+ | { type: "tool-call"; toolCallId: string; toolName: string; input: string };
3228
+
3229
+ function buildOpenAIResponsesGenerateResult(payload: unknown): {
3230
+ content: OpenAIResponsesContentPart[];
3231
+ finishReason?: string | { unified: string; raw: string } | null;
3232
+ usage?: RuntimeUsage;
3233
+ } {
3234
+ const record = readRecord(payload);
3235
+ const output = Array.isArray(record?.output) ? record.output : [];
3236
+ const content: OpenAIResponsesContentPart[] = [];
3237
+
3238
+ for (const item of output) {
3239
+ const itemRecord = readRecord(item);
3240
+ const itemType = typeof itemRecord?.type === "string" ? itemRecord.type : undefined;
3241
+
3242
+ if (itemType === "message" && Array.isArray(itemRecord?.content)) {
3243
+ // A message item bundles one or more output_text parts. Concat
3244
+ // their texts into a single text content entry.
3245
+ let text = "";
3246
+ for (const part of itemRecord.content) {
3247
+ const p = readRecord(part);
3248
+ if (typeof p?.type === "string" && p.type === "output_text" && typeof p.text === "string") {
3249
+ text += p.text;
3250
+ }
3251
+ }
3252
+ if (text.length > 0) {
3253
+ content.push({ type: "text", text });
3254
+ }
3255
+ continue;
3256
+ }
3257
+
3258
+ if (itemType === "function_call") {
3259
+ content.push({
3260
+ type: "tool-call",
3261
+ toolCallId: typeof itemRecord?.call_id === "string"
3262
+ ? itemRecord.call_id
3263
+ : (typeof itemRecord?.id === "string" ? itemRecord.id : ""),
3264
+ toolName: typeof itemRecord?.name === "string" ? itemRecord.name : "",
3265
+ input: typeof itemRecord?.arguments === "string"
3266
+ ? itemRecord.arguments
3267
+ : stringifyJsonValue(itemRecord?.arguments ?? {}),
3268
+ });
3269
+ continue;
3270
+ }
3271
+
3272
+ if (itemType === "reasoning") {
3273
+ const summary = Array.isArray(itemRecord?.summary) ? itemRecord.summary : [];
3274
+ const summaries: Array<{ id?: string; text: string }> = [];
3275
+ for (const s of summary) {
3276
+ const sr = readRecord(s);
3277
+ if (typeof sr?.text === "string" && sr.text.length > 0) {
3278
+ summaries.push({
3279
+ ...(typeof sr?.id === "string" ? { id: sr.id } : {}),
3280
+ text: sr.text,
3281
+ });
3282
+ }
3283
+ }
3284
+ content.push({
3285
+ type: "reasoning",
3286
+ ...(summaries.length > 0 ? { summaries } : {}),
3287
+ ...(typeof itemRecord?.encrypted_content === "string"
3288
+ ? { signature: itemRecord.encrypted_content }
3289
+ : {}),
3290
+ });
3291
+ continue;
3292
+ }
3293
+ }
3294
+
3295
+ return {
3296
+ content,
3297
+ finishReason: normalizeOpenAIResponsesFinishReason(record?.status),
3298
+ usage: extractOpenAIResponsesUsage(payload),
3299
+ };
3300
+ }
3301
+
3302
+ type OpenAIResponsesStreamReasoningState = {
3303
+ id: string;
3304
+ emittedStart: boolean;
3305
+ };
3306
+
3307
+ type OpenAIResponsesStreamFunctionCallState = {
3308
+ id: string;
3309
+ toolCallId: string;
3310
+ name: string;
3311
+ arguments: string;
3312
+ };
3313
+
3314
+ /**
3315
+ * Parse the Responses API streaming event grammar into the same UI part
3316
+ * shapes the existing OpenAI / Anthropic / Google streams emit. The
3317
+ * Responses API uses a strict event-typed protocol — every event has a
3318
+ * `type` field naming the lifecycle phase — instead of the loose
3319
+ * `delta`-based shape Chat Completions uses.
3320
+ */
3321
+ async function* streamOpenAIResponsesParts(
3322
+ stream: ReadableStream<Uint8Array>,
3323
+ ): AsyncIterable<unknown> {
3324
+ const decoder = new TextDecoder();
3325
+ let buffer = "";
3326
+ const reasoningBlocks = new Map<string, OpenAIResponsesStreamReasoningState>();
3327
+ const functionCalls = new Map<string, OpenAIResponsesStreamFunctionCallState>();
3328
+ const startedToolCalls = new Set<string>();
3329
+ let finishReason: string | { unified: string; raw: string } | null = null;
3330
+ let usage: RuntimeUsage | undefined;
3331
+ let reasoningCounter = 0;
3332
+
3333
+ for await (const chunk of stream) {
3334
+ buffer += decoder.decode(chunk, { stream: true });
3335
+ const parsed = parseSseChunk(buffer);
3336
+ buffer = parsed.remainder;
3337
+
3338
+ for (const event of parsed.events) {
3339
+ if (event === "[DONE]") continue;
3340
+ const record = readRecord(event);
3341
+ const type = typeof record?.type === "string" ? record.type : undefined;
3342
+ if (!type) continue;
3343
+
3344
+ // response.output_item.added: a new output item begins. Track
3345
+ // function_call items so their argument deltas can be attributed,
3346
+ // and reasoning items so summary deltas can group correctly.
3347
+ if (type === "response.output_item.added") {
3348
+ const item = readRecord(record?.item);
3349
+ const itemType = typeof item?.type === "string" ? item.type : undefined;
3350
+ const itemId = typeof item?.id === "string" ? item.id : undefined;
3351
+ if (itemType === "function_call" && itemId) {
3352
+ const callId = typeof item?.call_id === "string" ? item.call_id : itemId;
3353
+ const name = typeof item?.name === "string" ? item.name : "";
3354
+ functionCalls.set(itemId, {
3355
+ id: itemId,
3356
+ toolCallId: callId,
3357
+ name,
3358
+ arguments: "",
3359
+ });
3360
+ }
3361
+ if (itemType === "reasoning" && itemId) {
3362
+ reasoningBlocks.set(itemId, {
3363
+ id: `reasoning-${reasoningCounter++}`,
3364
+ emittedStart: false,
3365
+ });
3366
+ }
3367
+ continue;
3368
+ }
3369
+
3370
+ // response.output_text.delta: text chunk for a message item.
3371
+ if (type === "response.output_text.delta" && typeof record?.delta === "string") {
3372
+ if (record.delta.length > 0) {
3373
+ yield { type: "text-delta", delta: record.delta };
3374
+ }
3375
+ continue;
3376
+ }
3377
+
3378
+ // response.reasoning_summary_text.delta: reasoning summary text
3379
+ // chunk. The first delta on an item lazily emits the
3380
+ // reasoning-start event so callers can group deltas into a part.
3381
+ if (type === "response.reasoning_summary_text.delta" && typeof record?.delta === "string") {
3382
+ const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
3383
+ const state = itemId ? reasoningBlocks.get(itemId) : undefined;
3384
+ if (state && record.delta.length > 0) {
3385
+ if (!state.emittedStart) {
3386
+ yield { type: "reasoning-start", id: state.id };
3387
+ state.emittedStart = true;
3388
+ }
3389
+ yield { type: "reasoning-delta", id: state.id, delta: record.delta };
3390
+ }
3391
+ continue;
3392
+ }
3393
+
3394
+ // response.function_call_arguments.delta: tool call argument
3395
+ // chunk. The first delta lazily emits tool-input-start.
3396
+ if (type === "response.function_call_arguments.delta" && typeof record?.delta === "string") {
3397
+ const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
3398
+ const state = itemId ? functionCalls.get(itemId) : undefined;
3399
+ if (state && record.delta.length > 0) {
3400
+ if (!startedToolCalls.has(state.id)) {
3401
+ yield {
3402
+ type: "tool-input-start",
3403
+ id: state.toolCallId,
3404
+ toolName: state.name,
3405
+ };
3406
+ startedToolCalls.add(state.id);
3407
+ }
3408
+ state.arguments += record.delta;
3409
+ yield {
3410
+ type: "tool-input-delta",
3411
+ id: state.toolCallId,
3412
+ delta: record.delta,
3413
+ };
3414
+ }
3415
+ continue;
3416
+ }
3417
+
3418
+ // response.output_item.done: an item has finished emitting deltas.
3419
+ // Close any reasoning or function-call streams that were open.
3420
+ if (type === "response.output_item.done") {
3421
+ const item = readRecord(record?.item);
3422
+ const itemType = typeof item?.type === "string" ? item.type : undefined;
3423
+ const itemId = typeof item?.id === "string" ? item.id : undefined;
3424
+ if (itemType === "reasoning" && itemId) {
3425
+ const state = reasoningBlocks.get(itemId);
3426
+ if (state?.emittedStart) {
3427
+ yield { type: "reasoning-end", id: state.id };
3428
+ }
3429
+ reasoningBlocks.delete(itemId);
3430
+ }
3431
+ if (itemType === "function_call" && itemId) {
3432
+ const state = functionCalls.get(itemId);
3433
+ if (state) {
3434
+ yield {
3435
+ type: "tool-call",
3436
+ toolCallId: state.toolCallId,
3437
+ toolName: state.name,
3438
+ input: state.arguments,
3439
+ };
3440
+ }
3441
+ functionCalls.delete(itemId);
3442
+ }
3443
+ continue;
3444
+ }
3445
+
3446
+ // response.completed: terminal event with the final response object
3447
+ // (status + usage). Capture both for the final finish part.
3448
+ if (type === "response.completed") {
3449
+ usage = extractOpenAIResponsesUsage(record) ?? usage;
3450
+ const responseRecord = readRecord(record?.response);
3451
+ finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status);
3452
+ continue;
3453
+ }
3454
+
3455
+ if (type === "response.failed" || type === "response.incomplete") {
3456
+ const responseRecord = readRecord(record?.response);
3457
+ finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status) ??
3458
+ (type === "response.failed"
3459
+ ? { unified: "error", raw: "failed" }
3460
+ : { unified: "length", raw: "incomplete" });
3461
+ usage = extractOpenAIResponsesUsage(record) ?? usage;
3462
+ continue;
3463
+ }
3464
+ }
3465
+ }
3466
+
3467
+ // Close any reasoning streams still open at end-of-stream (defensive
3468
+ // — a clean Responses API stream always closes them via output_item.done).
3469
+ for (const state of reasoningBlocks.values()) {
3470
+ if (state.emittedStart) {
3471
+ yield { type: "reasoning-end", id: state.id };
3472
+ }
3473
+ }
3474
+
3475
+ yield {
3476
+ type: "finish",
3477
+ finishReason,
3478
+ ...(usage ? { usage } : {}),
3479
+ };
3480
+ }
3481
+
3482
+ export function createOpenAIResponsesRuntime(
3483
+ config: OpenAIRuntimeConfig,
3484
+ modelId: string,
3485
+ ): ModelRuntime {
3486
+ const fetchImpl = config.fetch ?? globalThis.fetch;
3487
+ return {
3488
+ provider: config.name ?? "openai",
3489
+ modelId,
3490
+ specificationVersion: "v3",
3491
+ supportedUrls: {},
3492
+ doGenerate(optionsForRuntime: unknown) {
3493
+ const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
3494
+ const url = getOpenAIResponsesUrl(config.baseURL);
3495
+ const warnings = createWarningCollector();
3496
+ const body = buildOpenAIResponsesRequest(
3497
+ modelId,
3498
+ config.name ?? "openai",
3499
+ options,
3500
+ false,
3501
+ warnings,
3502
+ );
3503
+ return requestJson({
3504
+ url,
3505
+ fetchImpl,
3506
+ providerLabel: config.name ?? "openai",
3507
+ providerKind: "openai",
3508
+ init: {
3509
+ method: "POST",
3510
+ headers: createRequestHeaders({
3511
+ apiKeyHeaderName: "authorization",
3512
+ apiKey: `Bearer ${config.apiKey}`,
3513
+ extraHeaders: options.headers,
3514
+ }),
3515
+ body: JSON.stringify(body),
3516
+ signal: options.abortSignal,
3517
+ },
3518
+ }).then((payload) => {
3519
+ const drained = warnings.drain();
3520
+ return {
3521
+ ...buildOpenAIResponsesGenerateResult(payload),
3522
+ ...(drained.length > 0 ? { warnings: drained } : {}),
3523
+ };
3524
+ });
3525
+ },
3526
+ doStream(optionsForRuntime: unknown) {
3527
+ const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
3528
+ const url = getOpenAIResponsesUrl(config.baseURL);
3529
+ const warnings = createWarningCollector();
3530
+ const body = buildOpenAIResponsesRequest(
3531
+ modelId,
3532
+ config.name ?? "openai",
3533
+ options,
3534
+ true,
3535
+ warnings,
3536
+ );
3537
+ return requestStream({
3538
+ url,
3539
+ fetchImpl,
3540
+ providerLabel: config.name ?? "openai",
3541
+ providerKind: "openai",
3542
+ init: {
3543
+ method: "POST",
3544
+ headers: createRequestHeaders({
3545
+ apiKeyHeaderName: "authorization",
3546
+ apiKey: `Bearer ${config.apiKey}`,
3547
+ extraHeaders: options.headers,
3548
+ }),
3549
+ body: JSON.stringify(body),
3550
+ signal: options.abortSignal,
3551
+ },
3552
+ }).then((responseStream) => {
3553
+ const drained = warnings.drain();
3554
+ return {
3555
+ stream: ReadableStream.from(streamOpenAIResponsesParts(responseStream)),
3556
+ ...(drained.length > 0 ? { warnings: drained } : {}),
3557
+ };
3558
+ });
3559
+ },
3560
+ };
3561
+ }
3562
+
2908
3563
  export function createAnthropicModelRuntime(
2909
3564
  config: AnthropicRuntimeConfig,
2910
3565
  modelId: string,
@@ -1,3 +1,3 @@
1
1
  // Keep in sync with deno.json version.
2
2
  // scripts/release.ts updates this constant during releases.
3
- export const VERSION = "0.1.208";
3
+ export const VERSION = "0.1.210";