veryfront 0.1.208 → 0.1.209

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/deno.js CHANGED
@@ -1,6 +1,6 @@
1
1
  export default {
2
2
  "name": "veryfront",
3
- "version": "0.1.208",
3
+ "version": "0.1.209",
4
4
  "license": "Apache-2.0",
5
5
  "nodeModulesDir": "auto",
6
6
  "exclude": [
@@ -64,6 +64,7 @@ export declare class ProviderQuotaError extends ProviderError {
64
64
  export declare class ProviderRequestError extends ProviderError {
65
65
  }
66
66
  export declare function createOpenAIModelRuntime(config: OpenAIRuntimeConfig, modelId: string): ModelRuntime;
67
+ export declare function createOpenAIResponsesRuntime(config: OpenAIRuntimeConfig, modelId: string): ModelRuntime;
67
68
  export declare function createAnthropicModelRuntime(config: AnthropicRuntimeConfig, modelId: string): ModelRuntime;
68
69
  export declare function createGoogleModelRuntime(config: GoogleRuntimeConfig, modelId: string): ModelRuntime;
69
70
  export declare function createOpenAIEmbeddingRuntime(config: OpenAIRuntimeConfig, modelId: string): EmbeddingRuntime;
@@ -1 +1 @@
1
- {"version":3,"file":"runtime-loader.d.ts","sourceRoot":"","sources":["../../../src/src/provider/runtime-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAEjE,MAAM,WAAW,mBAAmB;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,MAAM,WAAW,mBAAmB;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAidD,KAAK,YAAY,GAAG,WAAW,GAAG,QAAQ,GAAG,QAAQ,CAAC;AAEtD;;;;;;GAMG;AACH,MAAM,MAAM,eAAe,GAAG;IAC5B,IAAI,EAAE,qBAAqB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,YAAY,CAAC;CACxB,CAAC;AAwBF;;;;;GAKG;AACH,qBAAa,aAAc,SAAQ,KAAK;IACtC,QAAQ,CAAC,QAAQ,EAAE,YAAY,CAAC;IAChC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;gBAEnB,OAAO,EAAE;QACnB,QAAQ,EAAE,YAAY,CAAC;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,OAAO,CAAC;QACnB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB;CAUF;AAED,4EAA4E;AAC5E,qBAAa,uBAAwB,SAAQ,aAAa;CAAG;AAE7D,mFAAmF;AACnF,qBAAa,sBAAuB,SAAQ,aAAa;CAAG;AAE5D,2DAA2D;AAC3D,qBAAa,kBAAmB,SAAQ,aAAa;CAAG;AAExD,6DAA6D;AAC7D,qBAAa,oBAAqB,SAAQ,aAAa;CAAG;AAqtE1D,wBAAgB,wBAAwB,CACtC,MAAM,EAAE,mBAAmB,EAC3B,OAAO,EAAE,MAAM,GACd,YAAY,CA4Ed;AAED,wBAAgB,2BAA2B,CACzC,MAAM,EAAE,sBAAsB,EAC9B,OAAO,EAAE,MAAM,GACd,YAAY,CA4Ed;AAED,wBAAgB,wBAAwB,CACtC,MAAM,EAAE,mBAAmB,EAC3B,OAAO,EAAE,MAAM,GACd,YAAY,CAwEd;AAED,wBAAgB,4BAA4B,CAC1C,MAAM,EAAE,mBAAmB,EAC3B,OAAO,EAAE,MAAM,GACd,gBAAgB,CA2ClB;AAED,wBAAgB,4BAA4B,CAC1C,MAAM,EAAE,mBAAmB,EAC3B,OAAO,EAAE,MAAM,GACd,gBAAgB,CAiDlB"}
1
+ {"version":3,"file":"runtime-loader.d.ts","sourceRoot":"","sources":["../../../src/src/provider/runtime-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAEjE,MAAM,WAAW,mBAAmB;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,MAAM,WAAW,mBAAmB;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAqdD,KAAK,YAAY,GAAG,WAAW,GAAG,QAAQ,GAAG,QAAQ,CAAC;AAEtD;;;;;;GAMG;AACH,MAAM,MAAM,eAAe,GAAG;IAC5B,IAAI,EAAE,qBAAqB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,YAAY,CAAC;CACxB,CAAC;AAwBF;;;;;GAKG;AACH,qBAAa,aAAc,SAAQ,KAAK;IACtC,QAAQ,CAAC,QAAQ,EAAE,YAAY,CAAC;IAChC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC;IAC5B,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;gBAEnB,OAAO,EAAE;QACnB,QAAQ,EAAE,YAAY,CAAC;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,OAAO,CAAC;QACnB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB;CAUF;AAED,4EAA4E;AAC5E,qBAAa,uBAAwB,SAAQ,aAAa;CAAG;AAE7D,mFAAmF;AACnF,qBAAa,sBAAuB,SAAQ,aAAa;CAAG;AAE5D,2DAA2D;AAC3D,qBAAa,kBAAmB,SAAQ,aAAa;CAAG;AAExD,6DAA6D;AAC7D,qBAAa,oBAAqB,SAAQ,aAAa;CAAG;AAqtE1D,wBAAgB,wBAAwB,CACtC,MAAM,EAAE,mBAAmB,EAC3B,OAAO,EAAE,MAAM,GACd,YAAY,CA4Ed;AA4jBD,wBAAgB,4BAA4B,CAC1C,MAAM,EAAE,mBAAmB,EAC3B,OAAO,EAAE,MAAM,GACd,YAAY,CA4Ed;AAED,wBAAgB,2BAA2B,CACzC,MAAM,EAAE,sBAAsB,EAC9B,OAAO,EAAE,MAAM,GACd,YAAY,CA4Ed;AAED,wBAAgB,wBAAwB,CACtC,MAAM,EAAE,mBAAmB,EAC3B,OAAO,EAAE,MAAM,GACd,YAAY,CAwEd;AAED,wBAAgB,4BAA4B,CAC1C,MAAM,EAAE,mBAAmB,EAC3B,OAAO,EAAE,MAAM,GACd,gBAAgB,CA2ClB;AAED,wBAAgB,4BAA4B,CAC1C,MAAM,EAAE,mBAAmB,EAC3B,OAAO,EAAE,MAAM,GACd,gBAAgB,CAiDlB"}
@@ -13,6 +13,9 @@ function getAnthropicMessagesUrl(baseURL) {
13
13
  function getOpenAIChatCompletionsUrl(baseURL) {
14
14
  return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "chat/completions");
15
15
  }
16
+ function getOpenAIResponsesUrl(baseURL) {
17
+ return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "responses");
18
+ }
16
19
  function getGoogleGenerateContentUrl(baseURL, modelId) {
17
20
  return joinUrl(baseURL ?? DEFAULT_GOOGLE_BASE_URL, `models/${encodeURIComponent(modelId)}:generateContent`);
18
21
  }
@@ -2026,6 +2029,542 @@ export function createOpenAIModelRuntime(config, modelId) {
2026
2029
  },
2027
2030
  };
2028
2031
  }
2032
+ /**
2033
+ * Convert the unified RuntimePromptMessage[] to the Responses API `input`
2034
+ * array shape. Differences from Chat Completions:
2035
+ * - System prompts go on the top-level `instructions` field, not inline.
2036
+ * - Content parts use `input_text` / `output_text` discriminants instead
2037
+ * of the Chat Completions plain-text shorthand.
2038
+ * - Assistant tool calls become standalone `function_call` items in the
2039
+ * input array, not nested `tool_calls` on a message.
2040
+ * - Tool results become standalone `function_call_output` items.
2041
+ * - Reasoning content parts roundtrip as `reasoning` items so callers can
2042
+ * replay multi-turn conversations with chain-of-thought intact.
2043
+ */
2044
+ function toOpenAIResponsesInput(prompt) {
2045
+ const instructionsParts = [];
2046
+ const input = [];
2047
+ for (const message of prompt) {
2048
+ switch (message.role) {
2049
+ case "system":
2050
+ if (message.content.length > 0) {
2051
+ instructionsParts.push(message.content);
2052
+ }
2053
+ break;
2054
+ case "user":
2055
+ input.push({
2056
+ role: "user",
2057
+ content: [{ type: "input_text", text: readTextParts(message.content) }],
2058
+ });
2059
+ break;
2060
+ case "assistant": {
2061
+ const messageContent = [];
2062
+ for (const part of message.content) {
2063
+ if (part.type === "text") {
2064
+ messageContent.push({ type: "output_text", text: part.text });
2065
+ continue;
2066
+ }
2067
+ if (part.type === "reasoning") {
2068
+ // Reasoning items are top-level entries in the input array,
2069
+ // not nested inside the assistant message — flush whatever
2070
+ // text we've accumulated first, then push the reasoning item.
2071
+ if (messageContent.length > 0) {
2072
+ input.push({ role: "assistant", content: [...messageContent] });
2073
+ messageContent.length = 0;
2074
+ }
2075
+ const summary = [];
2076
+ if (typeof part.text === "string" && part.text.length > 0) {
2077
+ summary.push({ type: "summary_text", text: part.text });
2078
+ }
2079
+ input.push({
2080
+ type: "reasoning",
2081
+ ...(typeof part.signature === "string" ? { encrypted_content: part.signature } : {}),
2082
+ summary,
2083
+ });
2084
+ continue;
2085
+ }
2086
+ // tool-call: flush message content, then push as standalone
2087
+ // function_call item per Responses API shape.
2088
+ if (messageContent.length > 0) {
2089
+ input.push({ role: "assistant", content: [...messageContent] });
2090
+ messageContent.length = 0;
2091
+ }
2092
+ input.push({
2093
+ type: "function_call",
2094
+ call_id: part.toolCallId,
2095
+ name: part.toolName,
2096
+ arguments: stringifyJsonValue(part.input),
2097
+ });
2098
+ }
2099
+ if (messageContent.length > 0) {
2100
+ input.push({ role: "assistant", content: messageContent });
2101
+ }
2102
+ break;
2103
+ }
2104
+ case "tool":
2105
+ for (const part of message.content) {
2106
+ input.push({
2107
+ type: "function_call_output",
2108
+ call_id: part.toolCallId,
2109
+ output: stringifyJsonValue(part.output.value),
2110
+ });
2111
+ }
2112
+ break;
2113
+ }
2114
+ }
2115
+ return {
2116
+ ...(instructionsParts.length > 0 ? { instructions: instructionsParts.join("\n\n") } : {}),
2117
+ input,
2118
+ };
2119
+ }
2120
+ /**
2121
+ * Tools on the Responses API differ from Chat Completions: instead of
2122
+ * `{ type: "function", function: { name, parameters } }` the function
2123
+ * shape lifts the name/parameters/strict to the top of the entry. Native
2124
+ * tools (web_search, file_search, computer_use, code_interpreter) live
2125
+ * alongside function tools in the same array.
2126
+ */
2127
+ function toOpenAIResponsesTools(tools) {
2128
+ if (!tools)
2129
+ return undefined;
2130
+ const normalized = [];
2131
+ for (const tool of tools) {
2132
+ if (tool.type === "function") {
2133
+ normalized.push({
2134
+ type: "function",
2135
+ name: tool.name,
2136
+ ...(typeof tool.description === "string" ? { description: tool.description } : {}),
2137
+ parameters: unwrapToolInputSchema(tool.inputSchema),
2138
+ });
2139
+ continue;
2140
+ }
2141
+ if (!tool.id.startsWith("openai."))
2142
+ continue;
2143
+ const providerType = tool.id.slice("openai.".length);
2144
+ if (providerType.length === 0)
2145
+ continue;
2146
+ normalized.push({
2147
+ type: providerType,
2148
+ ...toSnakeCaseRecord(tool.args),
2149
+ });
2150
+ }
2151
+ return normalized.length > 0 ? normalized : undefined;
2152
+ }
2153
+ function buildOpenAIResponsesRequest(modelId, providerName, options, stream, warnings) {
2154
+ const isReasoningModel = isOpenAIReasoningModel(modelId);
2155
+ const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
2156
+ const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
2157
+ // Same param-sanitization rules as Chat Completions: reasoning models
2158
+ // reject sampling params. Drop with a warning.
2159
+ if (options.topK !== undefined) {
2160
+ warnings.push({
2161
+ type: "unsupported-setting",
2162
+ provider: "openai",
2163
+ setting: "topK",
2164
+ details: "OpenAI Responses API does not expose top_k; the value was dropped.",
2165
+ });
2166
+ }
2167
+ if (reasoningEnabled) {
2168
+ const dropped = [
2169
+ ["temperature", "temperature"],
2170
+ ["topP", "top_p"],
2171
+ ["presencePenalty", "presence_penalty"],
2172
+ ["frequencyPenalty", "frequency_penalty"],
2173
+ ];
2174
+ for (const [key, openaiName] of dropped) {
2175
+ if (options[key] !== undefined) {
2176
+ warnings.push({
2177
+ type: "unsupported-setting",
2178
+ provider: "openai",
2179
+ setting: key,
2180
+ details: `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
2181
+ });
2182
+ }
2183
+ }
2184
+ }
2185
+ const { instructions, input } = toOpenAIResponsesInput(options.prompt);
2186
+ const responsesTools = toOpenAIResponsesTools(options.tools);
2187
+ const body = {
2188
+ model: modelId,
2189
+ input,
2190
+ ...(instructions !== undefined ? { instructions } : {}),
2191
+ ...(stream ? { stream: true } : {}),
2192
+ ...(options.maxOutputTokens !== undefined
2193
+ ? { max_output_tokens: options.maxOutputTokens }
2194
+ : {}),
2195
+ ...(!reasoningEnabled && options.temperature !== undefined
2196
+ ? { temperature: options.temperature }
2197
+ : {}),
2198
+ ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
2199
+ ...(responsesTools ? { tools: responsesTools } : {}),
2200
+ ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
2201
+ // The Responses API surfaces reasoning effort + summary verbosity
2202
+ // in a structured `reasoning` object instead of a flat field. We
2203
+ // request "auto" summary so callers see structured summary parts
2204
+ // without having to opt into them per request.
2205
+ ...(reasoningEffort !== undefined
2206
+ ? { reasoning: { effort: reasoningEffort, summary: "auto" } }
2207
+ : {}),
2208
+ ...(typeof options.userId === "string" && options.userId.length > 0
2209
+ ? { user: options.userId }
2210
+ : {}),
2211
+ ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
2212
+ ...(options.parallelToolCalls !== undefined
2213
+ ? { parallel_tool_calls: options.parallelToolCalls }
2214
+ : {}),
2215
+ // Responses API uses `text.format` instead of Chat Completions'
2216
+ // `response_format`. The shape is similar but nested under `text`.
2217
+ ...(options.responseFormat && options.responseFormat.type !== "text"
2218
+ ? {
2219
+ text: {
2220
+ format: options.responseFormat.type === "json" ? { type: "json_object" } : {
2221
+ type: "json_schema",
2222
+ name: options.responseFormat.name,
2223
+ ...(typeof options.responseFormat.description === "string"
2224
+ ? { description: options.responseFormat.description }
2225
+ : {}),
2226
+ schema: unwrapToolInputSchema(options.responseFormat.schema),
2227
+ ...(options.responseFormat.strict !== undefined
2228
+ ? { strict: options.responseFormat.strict }
2229
+ : {}),
2230
+ },
2231
+ },
2232
+ }
2233
+ : {}),
2234
+ };
2235
+ Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
2236
+ return body;
2237
+ }
2238
+ /**
2239
+ * The Responses API uses `input_tokens` / `output_tokens` field names
2240
+ * instead of Chat Completions' `prompt_tokens` / `completion_tokens`.
2241
+ * It also nests cached input tokens under `input_tokens_details` and
2242
+ * exposes reasoning tokens via `output_tokens_details.reasoning_tokens`.
2243
+ */
2244
+ function extractOpenAIResponsesUsage(payload) {
2245
+ const record = readRecord(payload);
2246
+ // Streaming usage lives on response.completed inside `response.usage`;
2247
+ // non-streaming has it at the top level.
2248
+ const responseRecord = readRecord(record?.response);
2249
+ const usage = readRecord(responseRecord?.usage) ?? readRecord(record?.usage);
2250
+ if (!usage)
2251
+ return undefined;
2252
+ const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : undefined;
2253
+ const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : undefined;
2254
+ const totalTokens = typeof usage.total_tokens === "number"
2255
+ ? usage.total_tokens
2256
+ : (inputTokens !== undefined || outputTokens !== undefined
2257
+ ? (inputTokens ?? 0) + (outputTokens ?? 0)
2258
+ : undefined);
2259
+ const inputDetails = readRecord(usage.input_tokens_details);
2260
+ const cachedTokens = inputDetails?.cached_tokens;
2261
+ return {
2262
+ inputTokens,
2263
+ outputTokens,
2264
+ totalTokens,
2265
+ ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
2266
+ };
2267
+ }
2268
+ function normalizeOpenAIResponsesFinishReason(raw) {
2269
+ if (typeof raw !== "string")
2270
+ return null;
2271
+ switch (raw) {
2272
+ case "completed":
2273
+ return { unified: "stop", raw };
2274
+ case "incomplete":
2275
+ return { unified: "length", raw };
2276
+ case "failed":
2277
+ return { unified: "error", raw };
2278
+ case "in_progress":
2279
+ return null;
2280
+ default:
2281
+ return raw;
2282
+ }
2283
+ }
2284
+ function buildOpenAIResponsesGenerateResult(payload) {
2285
+ const record = readRecord(payload);
2286
+ const output = Array.isArray(record?.output) ? record.output : [];
2287
+ const content = [];
2288
+ for (const item of output) {
2289
+ const itemRecord = readRecord(item);
2290
+ const itemType = typeof itemRecord?.type === "string" ? itemRecord.type : undefined;
2291
+ if (itemType === "message" && Array.isArray(itemRecord?.content)) {
2292
+ // A message item bundles one or more output_text parts. Concat
2293
+ // their texts into a single text content entry.
2294
+ let text = "";
2295
+ for (const part of itemRecord.content) {
2296
+ const p = readRecord(part);
2297
+ if (typeof p?.type === "string" && p.type === "output_text" && typeof p.text === "string") {
2298
+ text += p.text;
2299
+ }
2300
+ }
2301
+ if (text.length > 0) {
2302
+ content.push({ type: "text", text });
2303
+ }
2304
+ continue;
2305
+ }
2306
+ if (itemType === "function_call") {
2307
+ content.push({
2308
+ type: "tool-call",
2309
+ toolCallId: typeof itemRecord?.call_id === "string"
2310
+ ? itemRecord.call_id
2311
+ : (typeof itemRecord?.id === "string" ? itemRecord.id : ""),
2312
+ toolName: typeof itemRecord?.name === "string" ? itemRecord.name : "",
2313
+ input: typeof itemRecord?.arguments === "string"
2314
+ ? itemRecord.arguments
2315
+ : stringifyJsonValue(itemRecord?.arguments ?? {}),
2316
+ });
2317
+ continue;
2318
+ }
2319
+ if (itemType === "reasoning") {
2320
+ const summary = Array.isArray(itemRecord?.summary) ? itemRecord.summary : [];
2321
+ const summaries = [];
2322
+ for (const s of summary) {
2323
+ const sr = readRecord(s);
2324
+ if (typeof sr?.text === "string" && sr.text.length > 0) {
2325
+ summaries.push({
2326
+ ...(typeof sr?.id === "string" ? { id: sr.id } : {}),
2327
+ text: sr.text,
2328
+ });
2329
+ }
2330
+ }
2331
+ content.push({
2332
+ type: "reasoning",
2333
+ ...(summaries.length > 0 ? { summaries } : {}),
2334
+ ...(typeof itemRecord?.encrypted_content === "string"
2335
+ ? { signature: itemRecord.encrypted_content }
2336
+ : {}),
2337
+ });
2338
+ continue;
2339
+ }
2340
+ }
2341
+ return {
2342
+ content,
2343
+ finishReason: normalizeOpenAIResponsesFinishReason(record?.status),
2344
+ usage: extractOpenAIResponsesUsage(payload),
2345
+ };
2346
+ }
2347
+ /**
2348
+ * Parse the Responses API streaming event grammar into the same UI part
2349
+ * shapes the existing OpenAI / Anthropic / Google streams emit. The
2350
+ * Responses API uses a strict event-typed protocol — every event has a
2351
+ * `type` field naming the lifecycle phase — instead of the loose
2352
+ * `delta`-based shape Chat Completions uses.
2353
+ */
2354
+ async function* streamOpenAIResponsesParts(stream) {
2355
+ const decoder = new TextDecoder();
2356
+ let buffer = "";
2357
+ const reasoningBlocks = new Map();
2358
+ const functionCalls = new Map();
2359
+ const startedToolCalls = new Set();
2360
+ let finishReason = null;
2361
+ let usage;
2362
+ let reasoningCounter = 0;
2363
+ for await (const chunk of stream) {
2364
+ buffer += decoder.decode(chunk, { stream: true });
2365
+ const parsed = parseSseChunk(buffer);
2366
+ buffer = parsed.remainder;
2367
+ for (const event of parsed.events) {
2368
+ if (event === "[DONE]")
2369
+ continue;
2370
+ const record = readRecord(event);
2371
+ const type = typeof record?.type === "string" ? record.type : undefined;
2372
+ if (!type)
2373
+ continue;
2374
+ // response.output_item.added: a new output item begins. Track
2375
+ // function_call items so their argument deltas can be attributed,
2376
+ // and reasoning items so summary deltas can group correctly.
2377
+ if (type === "response.output_item.added") {
2378
+ const item = readRecord(record?.item);
2379
+ const itemType = typeof item?.type === "string" ? item.type : undefined;
2380
+ const itemId = typeof item?.id === "string" ? item.id : undefined;
2381
+ if (itemType === "function_call" && itemId) {
2382
+ const callId = typeof item?.call_id === "string" ? item.call_id : itemId;
2383
+ const name = typeof item?.name === "string" ? item.name : "";
2384
+ functionCalls.set(itemId, {
2385
+ id: itemId,
2386
+ toolCallId: callId,
2387
+ name,
2388
+ arguments: "",
2389
+ });
2390
+ }
2391
+ if (itemType === "reasoning" && itemId) {
2392
+ reasoningBlocks.set(itemId, {
2393
+ id: `reasoning-${reasoningCounter++}`,
2394
+ emittedStart: false,
2395
+ });
2396
+ }
2397
+ continue;
2398
+ }
2399
+ // response.output_text.delta: text chunk for a message item.
2400
+ if (type === "response.output_text.delta" && typeof record?.delta === "string") {
2401
+ if (record.delta.length > 0) {
2402
+ yield { type: "text-delta", delta: record.delta };
2403
+ }
2404
+ continue;
2405
+ }
2406
+ // response.reasoning_summary_text.delta: reasoning summary text
2407
+ // chunk. The first delta on an item lazily emits the
2408
+ // reasoning-start event so callers can group deltas into a part.
2409
+ if (type === "response.reasoning_summary_text.delta" && typeof record?.delta === "string") {
2410
+ const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
2411
+ const state = itemId ? reasoningBlocks.get(itemId) : undefined;
2412
+ if (state && record.delta.length > 0) {
2413
+ if (!state.emittedStart) {
2414
+ yield { type: "reasoning-start", id: state.id };
2415
+ state.emittedStart = true;
2416
+ }
2417
+ yield { type: "reasoning-delta", id: state.id, delta: record.delta };
2418
+ }
2419
+ continue;
2420
+ }
2421
+ // response.function_call_arguments.delta: tool call argument
2422
+ // chunk. The first delta lazily emits tool-input-start.
2423
+ if (type === "response.function_call_arguments.delta" && typeof record?.delta === "string") {
2424
+ const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
2425
+ const state = itemId ? functionCalls.get(itemId) : undefined;
2426
+ if (state && record.delta.length > 0) {
2427
+ if (!startedToolCalls.has(state.id)) {
2428
+ yield {
2429
+ type: "tool-input-start",
2430
+ id: state.toolCallId,
2431
+ toolName: state.name,
2432
+ };
2433
+ startedToolCalls.add(state.id);
2434
+ }
2435
+ state.arguments += record.delta;
2436
+ yield {
2437
+ type: "tool-input-delta",
2438
+ id: state.toolCallId,
2439
+ delta: record.delta,
2440
+ };
2441
+ }
2442
+ continue;
2443
+ }
2444
+ // response.output_item.done: an item has finished emitting deltas.
2445
+ // Close any reasoning or function-call streams that were open.
2446
+ if (type === "response.output_item.done") {
2447
+ const item = readRecord(record?.item);
2448
+ const itemType = typeof item?.type === "string" ? item.type : undefined;
2449
+ const itemId = typeof item?.id === "string" ? item.id : undefined;
2450
+ if (itemType === "reasoning" && itemId) {
2451
+ const state = reasoningBlocks.get(itemId);
2452
+ if (state?.emittedStart) {
2453
+ yield { type: "reasoning-end", id: state.id };
2454
+ }
2455
+ reasoningBlocks.delete(itemId);
2456
+ }
2457
+ if (itemType === "function_call" && itemId) {
2458
+ const state = functionCalls.get(itemId);
2459
+ if (state) {
2460
+ yield {
2461
+ type: "tool-call",
2462
+ toolCallId: state.toolCallId,
2463
+ toolName: state.name,
2464
+ input: state.arguments,
2465
+ };
2466
+ }
2467
+ functionCalls.delete(itemId);
2468
+ }
2469
+ continue;
2470
+ }
2471
+ // response.completed: terminal event with the final response object
2472
+ // (status + usage). Capture both for the final finish part.
2473
+ if (type === "response.completed") {
2474
+ usage = extractOpenAIResponsesUsage(record) ?? usage;
2475
+ const responseRecord = readRecord(record?.response);
2476
+ finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status);
2477
+ continue;
2478
+ }
2479
+ if (type === "response.failed" || type === "response.incomplete") {
2480
+ const responseRecord = readRecord(record?.response);
2481
+ finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status) ??
2482
+ (type === "response.failed"
2483
+ ? { unified: "error", raw: "failed" }
2484
+ : { unified: "length", raw: "incomplete" });
2485
+ usage = extractOpenAIResponsesUsage(record) ?? usage;
2486
+ continue;
2487
+ }
2488
+ }
2489
+ }
2490
+ // Close any reasoning streams still open at end-of-stream (defensive
2491
+ // — a clean Responses API stream always closes them via output_item.done).
2492
+ for (const state of reasoningBlocks.values()) {
2493
+ if (state.emittedStart) {
2494
+ yield { type: "reasoning-end", id: state.id };
2495
+ }
2496
+ }
2497
+ yield {
2498
+ type: "finish",
2499
+ finishReason,
2500
+ ...(usage ? { usage } : {}),
2501
+ };
2502
+ }
2503
+ export function createOpenAIResponsesRuntime(config, modelId) {
2504
+ const fetchImpl = config.fetch ?? globalThis.fetch;
2505
+ return {
2506
+ provider: config.name ?? "openai",
2507
+ modelId,
2508
+ specificationVersion: "v3",
2509
+ supportedUrls: {},
2510
+ doGenerate(optionsForRuntime) {
2511
+ const options = optionsForRuntime;
2512
+ const url = getOpenAIResponsesUrl(config.baseURL);
2513
+ const warnings = createWarningCollector();
2514
+ const body = buildOpenAIResponsesRequest(modelId, config.name ?? "openai", options, false, warnings);
2515
+ return requestJson({
2516
+ url,
2517
+ fetchImpl,
2518
+ providerLabel: config.name ?? "openai",
2519
+ providerKind: "openai",
2520
+ init: {
2521
+ method: "POST",
2522
+ headers: createRequestHeaders({
2523
+ apiKeyHeaderName: "authorization",
2524
+ apiKey: `Bearer ${config.apiKey}`,
2525
+ extraHeaders: options.headers,
2526
+ }),
2527
+ body: JSON.stringify(body),
2528
+ signal: options.abortSignal,
2529
+ },
2530
+ }).then((payload) => {
2531
+ const drained = warnings.drain();
2532
+ return {
2533
+ ...buildOpenAIResponsesGenerateResult(payload),
2534
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2535
+ };
2536
+ });
2537
+ },
2538
+ doStream(optionsForRuntime) {
2539
+ const options = optionsForRuntime;
2540
+ const url = getOpenAIResponsesUrl(config.baseURL);
2541
+ const warnings = createWarningCollector();
2542
+ const body = buildOpenAIResponsesRequest(modelId, config.name ?? "openai", options, true, warnings);
2543
+ return requestStream({
2544
+ url,
2545
+ fetchImpl,
2546
+ providerLabel: config.name ?? "openai",
2547
+ providerKind: "openai",
2548
+ init: {
2549
+ method: "POST",
2550
+ headers: createRequestHeaders({
2551
+ apiKeyHeaderName: "authorization",
2552
+ apiKey: `Bearer ${config.apiKey}`,
2553
+ extraHeaders: options.headers,
2554
+ }),
2555
+ body: JSON.stringify(body),
2556
+ signal: options.abortSignal,
2557
+ },
2558
+ }).then((responseStream) => {
2559
+ const drained = warnings.drain();
2560
+ return {
2561
+ stream: ReadableStream.from(streamOpenAIResponsesParts(responseStream)),
2562
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2563
+ };
2564
+ });
2565
+ },
2566
+ };
2567
+ }
2029
2568
  export function createAnthropicModelRuntime(config, modelId) {
2030
2569
  const fetchImpl = config.fetch ?? globalThis.fetch;
2031
2570
  return {
@@ -1,2 +1,2 @@
1
- export declare const VERSION = "0.1.208";
1
+ export declare const VERSION = "0.1.209";
2
2
  //# sourceMappingURL=version-constant.d.ts.map
@@ -1,3 +1,3 @@
1
1
  // Keep in sync with deno.json version.
2
2
  // scripts/release.ts updates this constant during releases.
3
- export const VERSION = "0.1.208";
3
+ export const VERSION = "0.1.209";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "veryfront",
3
- "version": "0.1.208",
3
+ "version": "0.1.209",
4
4
  "description": "The simplest way to build AI-powered apps",
5
5
  "keywords": [
6
6
  "react",
package/src/deno.js CHANGED
@@ -1,6 +1,6 @@
1
1
  export default {
2
2
  "name": "veryfront",
3
- "version": "0.1.208",
3
+ "version": "0.1.209",
4
4
  "license": "Apache-2.0",
5
5
  "nodeModulesDir": "auto",
6
6
  "exclude": [
@@ -396,6 +396,10 @@ function getOpenAIChatCompletionsUrl(baseURL?: string): string {
396
396
  return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "chat/completions");
397
397
  }
398
398
 
399
+ function getOpenAIResponsesUrl(baseURL?: string): string {
400
+ return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "responses");
401
+ }
402
+
399
403
  function getGoogleGenerateContentUrl(baseURL: string | undefined, modelId: string): string {
400
404
  return joinUrl(
401
405
  baseURL ?? DEFAULT_GOOGLE_BASE_URL,
@@ -2905,6 +2909,657 @@ export function createOpenAIModelRuntime(
2905
2909
  };
2906
2910
  }
2907
2911
 
2912
+ // =============================================================================
2913
+ // OpenAI Responses API runtime (#1077, deferred from #1052 C4)
2914
+ // =============================================================================
2915
+ //
2916
+ // The Responses API (/v1/responses) is a different surface than Chat
2917
+ // Completions. Same provider, different request shape, different streaming
2918
+ // event grammar, different response shape, and different reasoning-summary
2919
+ // surface. This runtime is parallel to createOpenAIModelRuntime so each
2920
+ // path stays focused on one wire format.
2921
+ //
2922
+ // Why parallel runtimes instead of a flag? See the rationale in #1077.
2923
+ //
2924
+ // docs: https://platform.openai.com/docs/api-reference/responses
2925
+
2926
+ type OpenAIResponsesInputItem = Record<string, unknown>;
2927
+
2928
+ type OpenAIResponsesRequest = {
2929
+ model: string;
2930
+ input: OpenAIResponsesInputItem[];
2931
+ instructions?: string;
2932
+ stream?: boolean;
2933
+ max_output_tokens?: number;
2934
+ temperature?: number;
2935
+ top_p?: number;
2936
+ tools?: Array<Record<string, unknown>>;
2937
+ tool_choice?: unknown;
2938
+ reasoning?: { effort?: string; summary?: string };
2939
+ metadata?: Record<string, string>;
2940
+ user?: string;
2941
+ service_tier?: string;
2942
+ parallel_tool_calls?: boolean;
2943
+ text?: { format: Record<string, unknown> };
2944
+ [key: string]: unknown;
2945
+ };
2946
+
2947
+ /**
2948
+ * Convert the unified RuntimePromptMessage[] to the Responses API `input`
2949
+ * array shape. Differences from Chat Completions:
2950
+ * - System prompts go on the top-level `instructions` field, not inline.
2951
+ * - Content parts use `input_text` / `output_text` discriminants instead
2952
+ * of the Chat Completions plain-text shorthand.
2953
+ * - Assistant tool calls become standalone `function_call` items in the
2954
+ * input array, not nested `tool_calls` on a message.
2955
+ * - Tool results become standalone `function_call_output` items.
2956
+ * - Reasoning content parts roundtrip as `reasoning` items so callers can
2957
+ * replay multi-turn conversations with chain-of-thought intact.
2958
+ */
2959
+ function toOpenAIResponsesInput(
2960
+ prompt: RuntimePromptMessage[],
2961
+ ): { instructions?: string; input: OpenAIResponsesInputItem[] } {
2962
+ const instructionsParts: string[] = [];
2963
+ const input: OpenAIResponsesInputItem[] = [];
2964
+
2965
+ for (const message of prompt) {
2966
+ switch (message.role) {
2967
+ case "system":
2968
+ if (message.content.length > 0) {
2969
+ instructionsParts.push(message.content);
2970
+ }
2971
+ break;
2972
+ case "user":
2973
+ input.push({
2974
+ role: "user",
2975
+ content: [{ type: "input_text", text: readTextParts(message.content) }],
2976
+ });
2977
+ break;
2978
+ case "assistant": {
2979
+ const messageContent: Array<Record<string, unknown>> = [];
2980
+ for (const part of message.content) {
2981
+ if (part.type === "text") {
2982
+ messageContent.push({ type: "output_text", text: part.text });
2983
+ continue;
2984
+ }
2985
+ if (part.type === "reasoning") {
2986
+ // Reasoning items are top-level entries in the input array,
2987
+ // not nested inside the assistant message — flush whatever
2988
+ // text we've accumulated first, then push the reasoning item.
2989
+ if (messageContent.length > 0) {
2990
+ input.push({ role: "assistant", content: [...messageContent] });
2991
+ messageContent.length = 0;
2992
+ }
2993
+ const summary: Array<Record<string, unknown>> = [];
2994
+ if (typeof part.text === "string" && part.text.length > 0) {
2995
+ summary.push({ type: "summary_text", text: part.text });
2996
+ }
2997
+ input.push({
2998
+ type: "reasoning",
2999
+ ...(typeof part.signature === "string" ? { encrypted_content: part.signature } : {}),
3000
+ summary,
3001
+ });
3002
+ continue;
3003
+ }
3004
+ // tool-call: flush message content, then push as standalone
3005
+ // function_call item per Responses API shape.
3006
+ if (messageContent.length > 0) {
3007
+ input.push({ role: "assistant", content: [...messageContent] });
3008
+ messageContent.length = 0;
3009
+ }
3010
+ input.push({
3011
+ type: "function_call",
3012
+ call_id: part.toolCallId,
3013
+ name: part.toolName,
3014
+ arguments: stringifyJsonValue(part.input),
3015
+ });
3016
+ }
3017
+ if (messageContent.length > 0) {
3018
+ input.push({ role: "assistant", content: messageContent });
3019
+ }
3020
+ break;
3021
+ }
3022
+ case "tool":
3023
+ for (const part of message.content) {
3024
+ input.push({
3025
+ type: "function_call_output",
3026
+ call_id: part.toolCallId,
3027
+ output: stringifyJsonValue(part.output.value),
3028
+ });
3029
+ }
3030
+ break;
3031
+ }
3032
+ }
3033
+
3034
+ return {
3035
+ ...(instructionsParts.length > 0 ? { instructions: instructionsParts.join("\n\n") } : {}),
3036
+ input,
3037
+ };
3038
+ }
3039
+
3040
+ /**
3041
+ * Tools on the Responses API differ from Chat Completions: instead of
3042
+ * `{ type: "function", function: { name, parameters } }` the function
3043
+ * shape lifts the name/parameters/strict to the top of the entry. Native
3044
+ * tools (web_search, file_search, computer_use, code_interpreter) live
3045
+ * alongside function tools in the same array.
3046
+ */
3047
+ function toOpenAIResponsesTools(
3048
+ tools: RuntimeToolDefinition[] | undefined,
3049
+ ): Array<Record<string, unknown>> | undefined {
3050
+ if (!tools) return undefined;
3051
+ const normalized: Array<Record<string, unknown>> = [];
3052
+ for (const tool of tools) {
3053
+ if (tool.type === "function") {
3054
+ normalized.push({
3055
+ type: "function",
3056
+ name: tool.name,
3057
+ ...(typeof tool.description === "string" ? { description: tool.description } : {}),
3058
+ parameters: unwrapToolInputSchema(tool.inputSchema),
3059
+ });
3060
+ continue;
3061
+ }
3062
+ if (!tool.id.startsWith("openai.")) continue;
3063
+ const providerType = tool.id.slice("openai.".length);
3064
+ if (providerType.length === 0) continue;
3065
+ normalized.push({
3066
+ type: providerType,
3067
+ ...toSnakeCaseRecord(tool.args),
3068
+ });
3069
+ }
3070
+ return normalized.length > 0 ? normalized : undefined;
3071
+ }
3072
+
3073
+ function buildOpenAIResponsesRequest(
3074
+ modelId: string,
3075
+ providerName: string,
3076
+ options: OpenAICompatibleLanguageOptions,
3077
+ stream: boolean,
3078
+ warnings: WarningCollector,
3079
+ ): OpenAIResponsesRequest {
3080
+ const isReasoningModel = isOpenAIReasoningModel(modelId);
3081
+ const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
3082
+ const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
3083
+
3084
+ // Same param-sanitization rules as Chat Completions: reasoning models
3085
+ // reject sampling params. Drop with a warning.
3086
+ if (options.topK !== undefined) {
3087
+ warnings.push({
3088
+ type: "unsupported-setting",
3089
+ provider: "openai",
3090
+ setting: "topK",
3091
+ details: "OpenAI Responses API does not expose top_k; the value was dropped.",
3092
+ });
3093
+ }
3094
+ if (reasoningEnabled) {
3095
+ const dropped: Array<[keyof typeof options, string]> = [
3096
+ ["temperature", "temperature"],
3097
+ ["topP", "top_p"],
3098
+ ["presencePenalty", "presence_penalty"],
3099
+ ["frequencyPenalty", "frequency_penalty"],
3100
+ ];
3101
+ for (const [key, openaiName] of dropped) {
3102
+ if (options[key] !== undefined) {
3103
+ warnings.push({
3104
+ type: "unsupported-setting",
3105
+ provider: "openai",
3106
+ setting: key,
3107
+ details:
3108
+ `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
3109
+ });
3110
+ }
3111
+ }
3112
+ }
3113
+
3114
+ const { instructions, input } = toOpenAIResponsesInput(options.prompt);
3115
+ const responsesTools = toOpenAIResponsesTools(options.tools);
3116
+
3117
+ const body: OpenAIResponsesRequest = {
3118
+ model: modelId,
3119
+ input,
3120
+ ...(instructions !== undefined ? { instructions } : {}),
3121
+ ...(stream ? { stream: true } : {}),
3122
+ ...(options.maxOutputTokens !== undefined
3123
+ ? { max_output_tokens: options.maxOutputTokens }
3124
+ : {}),
3125
+ ...(!reasoningEnabled && options.temperature !== undefined
3126
+ ? { temperature: options.temperature }
3127
+ : {}),
3128
+ ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
3129
+ ...(responsesTools ? { tools: responsesTools } : {}),
3130
+ ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
3131
+ // The Responses API surfaces reasoning effort + summary verbosity
3132
+ // in a structured `reasoning` object instead of a flat field. We
3133
+ // request "auto" summary so callers see structured summary parts
3134
+ // without having to opt into them per request.
3135
+ ...(reasoningEffort !== undefined
3136
+ ? { reasoning: { effort: reasoningEffort, summary: "auto" } }
3137
+ : {}),
3138
+ ...(typeof options.userId === "string" && options.userId.length > 0
3139
+ ? { user: options.userId }
3140
+ : {}),
3141
+ ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
3142
+ ...(options.parallelToolCalls !== undefined
3143
+ ? { parallel_tool_calls: options.parallelToolCalls }
3144
+ : {}),
3145
+ // Responses API uses `text.format` instead of Chat Completions'
3146
+ // `response_format`. The shape is similar but nested under `text`.
3147
+ ...(options.responseFormat && options.responseFormat.type !== "text"
3148
+ ? {
3149
+ text: {
3150
+ format: options.responseFormat.type === "json" ? { type: "json_object" } : {
3151
+ type: "json_schema",
3152
+ name: options.responseFormat.name,
3153
+ ...(typeof options.responseFormat.description === "string"
3154
+ ? { description: options.responseFormat.description }
3155
+ : {}),
3156
+ schema: unwrapToolInputSchema(options.responseFormat.schema),
3157
+ ...(options.responseFormat.strict !== undefined
3158
+ ? { strict: options.responseFormat.strict }
3159
+ : {}),
3160
+ },
3161
+ },
3162
+ }
3163
+ : {}),
3164
+ };
3165
+
3166
+ Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
3167
+ return body;
3168
+ }
3169
+
3170
+ /**
3171
+ * The Responses API uses `input_tokens` / `output_tokens` field names
3172
+ * instead of Chat Completions' `prompt_tokens` / `completion_tokens`.
3173
+ * It also nests cached input tokens under `input_tokens_details` and
3174
+ * exposes reasoning tokens via `output_tokens_details.reasoning_tokens`.
3175
+ */
3176
+ function extractOpenAIResponsesUsage(payload: unknown): RuntimeUsage | undefined {
3177
+ const record = readRecord(payload);
3178
+ // Streaming usage lives on response.completed inside `response.usage`;
3179
+ // non-streaming has it at the top level.
3180
+ const responseRecord = readRecord(record?.response);
3181
+ const usage = readRecord(responseRecord?.usage) ?? readRecord(record?.usage);
3182
+ if (!usage) return undefined;
3183
+
3184
+ const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : undefined;
3185
+ const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : undefined;
3186
+ const totalTokens = typeof usage.total_tokens === "number"
3187
+ ? usage.total_tokens
3188
+ : (inputTokens !== undefined || outputTokens !== undefined
3189
+ ? (inputTokens ?? 0) + (outputTokens ?? 0)
3190
+ : undefined);
3191
+ const inputDetails = readRecord(usage.input_tokens_details);
3192
+ const cachedTokens = inputDetails?.cached_tokens;
3193
+
3194
+ return {
3195
+ inputTokens,
3196
+ outputTokens,
3197
+ totalTokens,
3198
+ ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
3199
+ };
3200
+ }
3201
+
3202
+ function normalizeOpenAIResponsesFinishReason(
3203
+ raw: unknown,
3204
+ ): string | { unified: string; raw: string } | null {
3205
+ if (typeof raw !== "string") return null;
3206
+ switch (raw) {
3207
+ case "completed":
3208
+ return { unified: "stop", raw };
3209
+ case "incomplete":
3210
+ return { unified: "length", raw };
3211
+ case "failed":
3212
+ return { unified: "error", raw };
3213
+ case "in_progress":
3214
+ return null;
3215
+ default:
3216
+ return raw;
3217
+ }
3218
+ }
3219
+
3220
+ type OpenAIResponsesContentPart =
3221
+ | { type: "text"; text: string }
3222
+ | {
3223
+ type: "reasoning";
3224
+ summaries?: Array<{ id?: string; text: string }>;
3225
+ signature?: string;
3226
+ }
3227
+ | { type: "tool-call"; toolCallId: string; toolName: string; input: string };
3228
+
3229
+ function buildOpenAIResponsesGenerateResult(payload: unknown): {
3230
+ content: OpenAIResponsesContentPart[];
3231
+ finishReason?: string | { unified: string; raw: string } | null;
3232
+ usage?: RuntimeUsage;
3233
+ } {
3234
+ const record = readRecord(payload);
3235
+ const output = Array.isArray(record?.output) ? record.output : [];
3236
+ const content: OpenAIResponsesContentPart[] = [];
3237
+
3238
+ for (const item of output) {
3239
+ const itemRecord = readRecord(item);
3240
+ const itemType = typeof itemRecord?.type === "string" ? itemRecord.type : undefined;
3241
+
3242
+ if (itemType === "message" && Array.isArray(itemRecord?.content)) {
3243
+ // A message item bundles one or more output_text parts. Concat
3244
+ // their texts into a single text content entry.
3245
+ let text = "";
3246
+ for (const part of itemRecord.content) {
3247
+ const p = readRecord(part);
3248
+ if (typeof p?.type === "string" && p.type === "output_text" && typeof p.text === "string") {
3249
+ text += p.text;
3250
+ }
3251
+ }
3252
+ if (text.length > 0) {
3253
+ content.push({ type: "text", text });
3254
+ }
3255
+ continue;
3256
+ }
3257
+
3258
+ if (itemType === "function_call") {
3259
+ content.push({
3260
+ type: "tool-call",
3261
+ toolCallId: typeof itemRecord?.call_id === "string"
3262
+ ? itemRecord.call_id
3263
+ : (typeof itemRecord?.id === "string" ? itemRecord.id : ""),
3264
+ toolName: typeof itemRecord?.name === "string" ? itemRecord.name : "",
3265
+ input: typeof itemRecord?.arguments === "string"
3266
+ ? itemRecord.arguments
3267
+ : stringifyJsonValue(itemRecord?.arguments ?? {}),
3268
+ });
3269
+ continue;
3270
+ }
3271
+
3272
+ if (itemType === "reasoning") {
3273
+ const summary = Array.isArray(itemRecord?.summary) ? itemRecord.summary : [];
3274
+ const summaries: Array<{ id?: string; text: string }> = [];
3275
+ for (const s of summary) {
3276
+ const sr = readRecord(s);
3277
+ if (typeof sr?.text === "string" && sr.text.length > 0) {
3278
+ summaries.push({
3279
+ ...(typeof sr?.id === "string" ? { id: sr.id } : {}),
3280
+ text: sr.text,
3281
+ });
3282
+ }
3283
+ }
3284
+ content.push({
3285
+ type: "reasoning",
3286
+ ...(summaries.length > 0 ? { summaries } : {}),
3287
+ ...(typeof itemRecord?.encrypted_content === "string"
3288
+ ? { signature: itemRecord.encrypted_content }
3289
+ : {}),
3290
+ });
3291
+ continue;
3292
+ }
3293
+ }
3294
+
3295
+ return {
3296
+ content,
3297
+ finishReason: normalizeOpenAIResponsesFinishReason(record?.status),
3298
+ usage: extractOpenAIResponsesUsage(payload),
3299
+ };
3300
+ }
3301
+
3302
+ type OpenAIResponsesStreamReasoningState = {
3303
+ id: string;
3304
+ emittedStart: boolean;
3305
+ };
3306
+
3307
+ type OpenAIResponsesStreamFunctionCallState = {
3308
+ id: string;
3309
+ toolCallId: string;
3310
+ name: string;
3311
+ arguments: string;
3312
+ };
3313
+
3314
+ /**
3315
+ * Parse the Responses API streaming event grammar into the same UI part
3316
+ * shapes the existing OpenAI / Anthropic / Google streams emit. The
3317
+ * Responses API uses a strict event-typed protocol — every event has a
3318
+ * `type` field naming the lifecycle phase — instead of the loose
3319
+ * `delta`-based shape Chat Completions uses.
3320
+ */
3321
+ async function* streamOpenAIResponsesParts(
3322
+ stream: ReadableStream<Uint8Array>,
3323
+ ): AsyncIterable<unknown> {
3324
+ const decoder = new TextDecoder();
3325
+ let buffer = "";
3326
+ const reasoningBlocks = new Map<string, OpenAIResponsesStreamReasoningState>();
3327
+ const functionCalls = new Map<string, OpenAIResponsesStreamFunctionCallState>();
3328
+ const startedToolCalls = new Set<string>();
3329
+ let finishReason: string | { unified: string; raw: string } | null = null;
3330
+ let usage: RuntimeUsage | undefined;
3331
+ let reasoningCounter = 0;
3332
+
3333
+ for await (const chunk of stream) {
3334
+ buffer += decoder.decode(chunk, { stream: true });
3335
+ const parsed = parseSseChunk(buffer);
3336
+ buffer = parsed.remainder;
3337
+
3338
+ for (const event of parsed.events) {
3339
+ if (event === "[DONE]") continue;
3340
+ const record = readRecord(event);
3341
+ const type = typeof record?.type === "string" ? record.type : undefined;
3342
+ if (!type) continue;
3343
+
3344
+ // response.output_item.added: a new output item begins. Track
3345
+ // function_call items so their argument deltas can be attributed,
3346
+ // and reasoning items so summary deltas can group correctly.
3347
+ if (type === "response.output_item.added") {
3348
+ const item = readRecord(record?.item);
3349
+ const itemType = typeof item?.type === "string" ? item.type : undefined;
3350
+ const itemId = typeof item?.id === "string" ? item.id : undefined;
3351
+ if (itemType === "function_call" && itemId) {
3352
+ const callId = typeof item?.call_id === "string" ? item.call_id : itemId;
3353
+ const name = typeof item?.name === "string" ? item.name : "";
3354
+ functionCalls.set(itemId, {
3355
+ id: itemId,
3356
+ toolCallId: callId,
3357
+ name,
3358
+ arguments: "",
3359
+ });
3360
+ }
3361
+ if (itemType === "reasoning" && itemId) {
3362
+ reasoningBlocks.set(itemId, {
3363
+ id: `reasoning-${reasoningCounter++}`,
3364
+ emittedStart: false,
3365
+ });
3366
+ }
3367
+ continue;
3368
+ }
3369
+
3370
+ // response.output_text.delta: text chunk for a message item.
3371
+ if (type === "response.output_text.delta" && typeof record?.delta === "string") {
3372
+ if (record.delta.length > 0) {
3373
+ yield { type: "text-delta", delta: record.delta };
3374
+ }
3375
+ continue;
3376
+ }
3377
+
3378
+ // response.reasoning_summary_text.delta: reasoning summary text
3379
+ // chunk. The first delta on an item lazily emits the
3380
+ // reasoning-start event so callers can group deltas into a part.
3381
+ if (type === "response.reasoning_summary_text.delta" && typeof record?.delta === "string") {
3382
+ const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
3383
+ const state = itemId ? reasoningBlocks.get(itemId) : undefined;
3384
+ if (state && record.delta.length > 0) {
3385
+ if (!state.emittedStart) {
3386
+ yield { type: "reasoning-start", id: state.id };
3387
+ state.emittedStart = true;
3388
+ }
3389
+ yield { type: "reasoning-delta", id: state.id, delta: record.delta };
3390
+ }
3391
+ continue;
3392
+ }
3393
+
3394
+ // response.function_call_arguments.delta: tool call argument
3395
+ // chunk. The first delta lazily emits tool-input-start.
3396
+ if (type === "response.function_call_arguments.delta" && typeof record?.delta === "string") {
3397
+ const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
3398
+ const state = itemId ? functionCalls.get(itemId) : undefined;
3399
+ if (state && record.delta.length > 0) {
3400
+ if (!startedToolCalls.has(state.id)) {
3401
+ yield {
3402
+ type: "tool-input-start",
3403
+ id: state.toolCallId,
3404
+ toolName: state.name,
3405
+ };
3406
+ startedToolCalls.add(state.id);
3407
+ }
3408
+ state.arguments += record.delta;
3409
+ yield {
3410
+ type: "tool-input-delta",
3411
+ id: state.toolCallId,
3412
+ delta: record.delta,
3413
+ };
3414
+ }
3415
+ continue;
3416
+ }
3417
+
3418
+ // response.output_item.done: an item has finished emitting deltas.
3419
+ // Close any reasoning or function-call streams that were open.
3420
+ if (type === "response.output_item.done") {
3421
+ const item = readRecord(record?.item);
3422
+ const itemType = typeof item?.type === "string" ? item.type : undefined;
3423
+ const itemId = typeof item?.id === "string" ? item.id : undefined;
3424
+ if (itemType === "reasoning" && itemId) {
3425
+ const state = reasoningBlocks.get(itemId);
3426
+ if (state?.emittedStart) {
3427
+ yield { type: "reasoning-end", id: state.id };
3428
+ }
3429
+ reasoningBlocks.delete(itemId);
3430
+ }
3431
+ if (itemType === "function_call" && itemId) {
3432
+ const state = functionCalls.get(itemId);
3433
+ if (state) {
3434
+ yield {
3435
+ type: "tool-call",
3436
+ toolCallId: state.toolCallId,
3437
+ toolName: state.name,
3438
+ input: state.arguments,
3439
+ };
3440
+ }
3441
+ functionCalls.delete(itemId);
3442
+ }
3443
+ continue;
3444
+ }
3445
+
3446
+ // response.completed: terminal event with the final response object
3447
+ // (status + usage). Capture both for the final finish part.
3448
+ if (type === "response.completed") {
3449
+ usage = extractOpenAIResponsesUsage(record) ?? usage;
3450
+ const responseRecord = readRecord(record?.response);
3451
+ finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status);
3452
+ continue;
3453
+ }
3454
+
3455
+ if (type === "response.failed" || type === "response.incomplete") {
3456
+ const responseRecord = readRecord(record?.response);
3457
+ finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status) ??
3458
+ (type === "response.failed"
3459
+ ? { unified: "error", raw: "failed" }
3460
+ : { unified: "length", raw: "incomplete" });
3461
+ usage = extractOpenAIResponsesUsage(record) ?? usage;
3462
+ continue;
3463
+ }
3464
+ }
3465
+ }
3466
+
3467
+ // Close any reasoning streams still open at end-of-stream (defensive
3468
+ // — a clean Responses API stream always closes them via output_item.done).
3469
+ for (const state of reasoningBlocks.values()) {
3470
+ if (state.emittedStart) {
3471
+ yield { type: "reasoning-end", id: state.id };
3472
+ }
3473
+ }
3474
+
3475
+ yield {
3476
+ type: "finish",
3477
+ finishReason,
3478
+ ...(usage ? { usage } : {}),
3479
+ };
3480
+ }
3481
+
3482
+ export function createOpenAIResponsesRuntime(
3483
+ config: OpenAIRuntimeConfig,
3484
+ modelId: string,
3485
+ ): ModelRuntime {
3486
+ const fetchImpl = config.fetch ?? globalThis.fetch;
3487
+ return {
3488
+ provider: config.name ?? "openai",
3489
+ modelId,
3490
+ specificationVersion: "v3",
3491
+ supportedUrls: {},
3492
+ doGenerate(optionsForRuntime: unknown) {
3493
+ const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
3494
+ const url = getOpenAIResponsesUrl(config.baseURL);
3495
+ const warnings = createWarningCollector();
3496
+ const body = buildOpenAIResponsesRequest(
3497
+ modelId,
3498
+ config.name ?? "openai",
3499
+ options,
3500
+ false,
3501
+ warnings,
3502
+ );
3503
+ return requestJson({
3504
+ url,
3505
+ fetchImpl,
3506
+ providerLabel: config.name ?? "openai",
3507
+ providerKind: "openai",
3508
+ init: {
3509
+ method: "POST",
3510
+ headers: createRequestHeaders({
3511
+ apiKeyHeaderName: "authorization",
3512
+ apiKey: `Bearer ${config.apiKey}`,
3513
+ extraHeaders: options.headers,
3514
+ }),
3515
+ body: JSON.stringify(body),
3516
+ signal: options.abortSignal,
3517
+ },
3518
+ }).then((payload) => {
3519
+ const drained = warnings.drain();
3520
+ return {
3521
+ ...buildOpenAIResponsesGenerateResult(payload),
3522
+ ...(drained.length > 0 ? { warnings: drained } : {}),
3523
+ };
3524
+ });
3525
+ },
3526
+ doStream(optionsForRuntime: unknown) {
3527
+ const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
3528
+ const url = getOpenAIResponsesUrl(config.baseURL);
3529
+ const warnings = createWarningCollector();
3530
+ const body = buildOpenAIResponsesRequest(
3531
+ modelId,
3532
+ config.name ?? "openai",
3533
+ options,
3534
+ true,
3535
+ warnings,
3536
+ );
3537
+ return requestStream({
3538
+ url,
3539
+ fetchImpl,
3540
+ providerLabel: config.name ?? "openai",
3541
+ providerKind: "openai",
3542
+ init: {
3543
+ method: "POST",
3544
+ headers: createRequestHeaders({
3545
+ apiKeyHeaderName: "authorization",
3546
+ apiKey: `Bearer ${config.apiKey}`,
3547
+ extraHeaders: options.headers,
3548
+ }),
3549
+ body: JSON.stringify(body),
3550
+ signal: options.abortSignal,
3551
+ },
3552
+ }).then((responseStream) => {
3553
+ const drained = warnings.drain();
3554
+ return {
3555
+ stream: ReadableStream.from(streamOpenAIResponsesParts(responseStream)),
3556
+ ...(drained.length > 0 ? { warnings: drained } : {}),
3557
+ };
3558
+ });
3559
+ },
3560
+ };
3561
+ }
3562
+
2908
3563
  export function createAnthropicModelRuntime(
2909
3564
  config: AnthropicRuntimeConfig,
2910
3565
  modelId: string,
@@ -1,3 +1,3 @@
1
1
  // Keep in sync with deno.json version.
2
2
  // scripts/release.ts updates this constant during releases.
3
- export const VERSION = "0.1.208";
3
+ export const VERSION = "0.1.209";