@oh-my-pi/pi-ai 8.2.2 → 8.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "8.2.2",
3
+ "version": "8.4.0",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -56,7 +56,7 @@
56
56
  "test": "bun test"
57
57
  },
58
58
  "dependencies": {
59
- "@oh-my-pi/pi-utils": "8.2.2",
59
+ "@oh-my-pi/pi-utils": "8.4.0",
60
60
  "@anthropic-ai/sdk": "^0.71.2",
61
61
  "@aws-sdk/client-bedrock-runtime": "^3.975.0",
62
62
  "@bufbuild/protobuf": "^2.10.2",
package/src/index.ts CHANGED
@@ -2,6 +2,7 @@ import "./utils/migrate-env";
2
2
 
3
3
  export * from "./models";
4
4
  export * from "./providers/anthropic";
5
+ export * from "./providers/azure-openai-responses";
5
6
  export * from "./providers/cursor";
6
7
  export * from "./providers/google";
7
8
  export * from "./providers/google-gemini-cli";
@@ -4092,40 +4092,6 @@ export const MODELS = {
4092
4092
  } satisfies Model<"openai-codex-responses">,
4093
4093
  },
4094
4094
  "opencode": {
4095
- "alpha-gd4": {
4096
- id: "alpha-gd4",
4097
- name: "Alpha GD4",
4098
- api: "anthropic-messages",
4099
- provider: "opencode",
4100
- baseUrl: "https://opencode.ai/zen",
4101
- reasoning: true,
4102
- input: ["text"],
4103
- cost: {
4104
- input: 0.5,
4105
- output: 2,
4106
- cacheRead: 0.15,
4107
- cacheWrite: 0,
4108
- },
4109
- contextWindow: 262144,
4110
- maxTokens: 32768,
4111
- } satisfies Model<"anthropic-messages">,
4112
- "alpha-glm-4.7": {
4113
- id: "alpha-glm-4.7",
4114
- name: "Alpha GLM-4.7",
4115
- api: "openai-completions",
4116
- provider: "opencode",
4117
- baseUrl: "https://opencode.ai/zen/v1",
4118
- reasoning: true,
4119
- input: ["text"],
4120
- cost: {
4121
- input: 0.6,
4122
- output: 2.2,
4123
- cacheRead: 0.6,
4124
- cacheWrite: 0,
4125
- },
4126
- contextWindow: 204800,
4127
- maxTokens: 131072,
4128
- } satisfies Model<"openai-completions">,
4129
4095
  "big-pickle": {
4130
4096
  id: "big-pickle",
4131
4097
  name: "Big Pickle",
@@ -5310,10 +5276,10 @@ export const MODELS = {
5310
5276
  reasoning: false,
5311
5277
  input: ["text", "image"],
5312
5278
  cost: {
5313
- input: 0.09999999999999999,
5314
- output: 0.39999999999999997,
5315
- cacheRead: 0.024999999999999998,
5316
- cacheWrite: 0.08333333333333334,
5279
+ input: 0,
5280
+ output: 0,
5281
+ cacheRead: 0,
5282
+ cacheWrite: 0,
5317
5283
  },
5318
5284
  contextWindow: 1048576,
5319
5285
  maxTokens: 8192,
@@ -0,0 +1,688 @@
1
+ import type OpenAI from "openai";
2
+ import { AzureOpenAI } from "openai";
3
+ import type {
4
+ Tool as OpenAITool,
5
+ ResponseCreateParamsStreaming,
6
+ ResponseFunctionToolCall,
7
+ ResponseInput,
8
+ ResponseInputContent,
9
+ ResponseInputImage,
10
+ ResponseInputText,
11
+ ResponseOutputMessage,
12
+ ResponseReasoningItem,
13
+ } from "openai/resources/responses/responses";
14
+ import { calculateCost } from "../models";
15
+ import { getEnvApiKey } from "../stream";
16
+ import type {
17
+ Api,
18
+ AssistantMessage,
19
+ Context,
20
+ ImageContent,
21
+ Model,
22
+ StopReason,
23
+ StreamFunction,
24
+ StreamOptions,
25
+ TextContent,
26
+ ThinkingContent,
27
+ Tool,
28
+ ToolCall,
29
+ } from "../types";
30
+ import { AssistantMessageEventStream } from "../utils/event-stream";
31
+ import { parseStreamingJson } from "../utils/json-parse";
32
+ import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
33
+ import { sanitizeSurrogates } from "../utils/sanitize-unicode";
34
+ import { transformMessages } from "./transform-messages";
35
+
36
+ const DEFAULT_AZURE_API_VERSION = "v1";
37
+
38
+ function parseDeploymentNameMap(value: string | undefined): Map<string, string> {
39
+ const map = new Map<string, string>();
40
+ if (!value) return map;
41
+ for (const entry of value.split(",")) {
42
+ const trimmed = entry.trim();
43
+ if (!trimmed) continue;
44
+ const [modelId, deploymentName] = trimmed.split("=", 2);
45
+ if (!modelId || !deploymentName) continue;
46
+ map.set(modelId.trim(), deploymentName.trim());
47
+ }
48
+ return map;
49
+ }
50
+
51
+ function resolveDeploymentName(model: Model<"azure-openai-responses">, options?: AzureOpenAIResponsesOptions): string {
52
+ if (options?.azureDeploymentName) {
53
+ return options.azureDeploymentName;
54
+ }
55
+ const mappedDeployment = parseDeploymentNameMap(process.env.AZURE_OPENAI_DEPLOYMENT_NAME_MAP).get(model.id);
56
+ return mappedDeployment || model.id;
57
+ }
58
+
59
+ // Azure OpenAI Responses-specific options
60
+ export interface AzureOpenAIResponsesOptions extends StreamOptions {
61
+ reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
62
+ reasoningSummary?: "auto" | "detailed" | "concise" | null;
63
+ azureApiVersion?: string;
64
+ azureResourceName?: string;
65
+ azureBaseUrl?: string;
66
+ azureDeploymentName?: string;
67
+ }
68
+
69
+ /**
70
+ * Generate function for Azure OpenAI Responses API
71
+ */
72
+ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"> = (
73
+ model: Model<"azure-openai-responses">,
74
+ context: Context,
75
+ options?: AzureOpenAIResponsesOptions,
76
+ ): AssistantMessageEventStream => {
77
+ const stream = new AssistantMessageEventStream();
78
+
79
+ // Start async processing
80
+ (async () => {
81
+ const startTime = Date.now();
82
+ let firstTokenTime: number | undefined;
83
+ const deploymentName = resolveDeploymentName(model, options);
84
+
85
+ const output: AssistantMessage = {
86
+ role: "assistant",
87
+ content: [],
88
+ api: "azure-openai-responses" as Api,
89
+ provider: model.provider,
90
+ model: model.id,
91
+ usage: {
92
+ input: 0,
93
+ output: 0,
94
+ cacheRead: 0,
95
+ cacheWrite: 0,
96
+ totalTokens: 0,
97
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
98
+ },
99
+ stopReason: "stop",
100
+ timestamp: Date.now(),
101
+ };
102
+
103
+ try {
104
+ // Create Azure OpenAI client
105
+ const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
106
+ const client = createClient(model, apiKey, options);
107
+ const params = buildParams(model, context, options, deploymentName);
108
+ options?.onPayload?.(params);
109
+ const openaiStream = await client.responses.create(
110
+ params,
111
+ options?.signal ? { signal: options.signal } : undefined,
112
+ );
113
+ stream.push({ type: "start", partial: output });
114
+
115
+ let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
116
+ let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
117
+ const blocks = output.content;
118
+ const blockIndex = () => blocks.length - 1;
119
+
120
+ for await (const event of openaiStream) {
121
+ // Handle output item start
122
+ if (event.type === "response.output_item.added") {
123
+ if (!firstTokenTime) firstTokenTime = Date.now();
124
+ const item = event.item;
125
+ if (item.type === "reasoning") {
126
+ currentItem = item;
127
+ currentBlock = { type: "thinking", thinking: "" };
128
+ output.content.push(currentBlock);
129
+ stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
130
+ } else if (item.type === "message") {
131
+ currentItem = item;
132
+ currentBlock = { type: "text", text: "" };
133
+ output.content.push(currentBlock);
134
+ stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
135
+ } else if (item.type === "function_call") {
136
+ currentItem = item;
137
+ currentBlock = {
138
+ type: "toolCall",
139
+ id: `${item.call_id}|${item.id}`,
140
+ name: item.name,
141
+ arguments: {},
142
+ partialJson: item.arguments || "",
143
+ };
144
+ output.content.push(currentBlock);
145
+ stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
146
+ }
147
+ }
148
+ // Handle reasoning summary deltas
149
+ else if (event.type === "response.reasoning_summary_part.added") {
150
+ if (currentItem && currentItem.type === "reasoning") {
151
+ currentItem.summary = currentItem.summary || [];
152
+ currentItem.summary.push(event.part);
153
+ }
154
+ } else if (event.type === "response.reasoning_summary_text.delta") {
155
+ if (
156
+ currentItem &&
157
+ currentItem.type === "reasoning" &&
158
+ currentBlock &&
159
+ currentBlock.type === "thinking"
160
+ ) {
161
+ currentItem.summary = currentItem.summary || [];
162
+ const lastPart = currentItem.summary[currentItem.summary.length - 1];
163
+ if (lastPart) {
164
+ currentBlock.thinking += event.delta;
165
+ lastPart.text += event.delta;
166
+ stream.push({
167
+ type: "thinking_delta",
168
+ contentIndex: blockIndex(),
169
+ delta: event.delta,
170
+ partial: output,
171
+ });
172
+ }
173
+ }
174
+ }
175
+ // Add a new line between summary parts (hack...)
176
+ else if (event.type === "response.reasoning_summary_part.done") {
177
+ if (
178
+ currentItem &&
179
+ currentItem.type === "reasoning" &&
180
+ currentBlock &&
181
+ currentBlock.type === "thinking"
182
+ ) {
183
+ currentItem.summary = currentItem.summary || [];
184
+ const lastPart = currentItem.summary[currentItem.summary.length - 1];
185
+ if (lastPart) {
186
+ currentBlock.thinking += "\n\n";
187
+ lastPart.text += "\n\n";
188
+ stream.push({
189
+ type: "thinking_delta",
190
+ contentIndex: blockIndex(),
191
+ delta: "\n\n",
192
+ partial: output,
193
+ });
194
+ }
195
+ }
196
+ }
197
+ // Handle text output deltas
198
+ else if (event.type === "response.content_part.added") {
199
+ if (currentItem && currentItem.type === "message") {
200
+ currentItem.content = currentItem.content || [];
201
+ // Filter out ReasoningText, only accept output_text and refusal
202
+ if (event.part.type === "output_text" || event.part.type === "refusal") {
203
+ currentItem.content.push(event.part);
204
+ }
205
+ }
206
+ } else if (event.type === "response.output_text.delta") {
207
+ if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
208
+ if (!currentItem.content || currentItem.content.length === 0) {
209
+ continue;
210
+ }
211
+ const lastPart = currentItem.content[currentItem.content.length - 1];
212
+ if (lastPart && lastPart.type === "output_text") {
213
+ currentBlock.text += event.delta;
214
+ lastPart.text += event.delta;
215
+ stream.push({
216
+ type: "text_delta",
217
+ contentIndex: blockIndex(),
218
+ delta: event.delta,
219
+ partial: output,
220
+ });
221
+ }
222
+ }
223
+ } else if (event.type === "response.refusal.delta") {
224
+ if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
225
+ if (!currentItem.content || currentItem.content.length === 0) {
226
+ continue;
227
+ }
228
+ const lastPart = currentItem.content[currentItem.content.length - 1];
229
+ if (lastPart && lastPart.type === "refusal") {
230
+ currentBlock.text += event.delta;
231
+ lastPart.refusal += event.delta;
232
+ stream.push({
233
+ type: "text_delta",
234
+ contentIndex: blockIndex(),
235
+ delta: event.delta,
236
+ partial: output,
237
+ });
238
+ }
239
+ }
240
+ }
241
+ // Handle function call argument deltas
242
+ else if (event.type === "response.function_call_arguments.delta") {
243
+ if (
244
+ currentItem &&
245
+ currentItem.type === "function_call" &&
246
+ currentBlock &&
247
+ currentBlock.type === "toolCall"
248
+ ) {
249
+ currentBlock.partialJson += event.delta;
250
+ currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
251
+ stream.push({
252
+ type: "toolcall_delta",
253
+ contentIndex: blockIndex(),
254
+ delta: event.delta,
255
+ partial: output,
256
+ });
257
+ }
258
+ }
259
+ // Handle function call arguments done (some providers send this instead of deltas)
260
+ else if (event.type === "response.function_call_arguments.done") {
261
+ if (currentItem?.type === "function_call" && currentBlock?.type === "toolCall") {
262
+ currentBlock.partialJson = event.arguments;
263
+ currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
264
+ }
265
+ }
266
+ // Handle output item completion
267
+ else if (event.type === "response.output_item.done") {
268
+ const item = event.item;
269
+
270
+ if (item.type === "reasoning" && currentBlock && currentBlock.type === "thinking") {
271
+ currentBlock.thinking = item.summary?.map(s => s.text).join("\n\n") || "";
272
+ currentBlock.thinkingSignature = JSON.stringify(item);
273
+ stream.push({
274
+ type: "thinking_end",
275
+ contentIndex: blockIndex(),
276
+ content: currentBlock.thinking,
277
+ partial: output,
278
+ });
279
+ currentBlock = null;
280
+ } else if (item.type === "message" && currentBlock && currentBlock.type === "text") {
281
+ currentBlock.text = item.content.map(c => (c.type === "output_text" ? c.text : c.refusal)).join("");
282
+ currentBlock.textSignature = item.id;
283
+ stream.push({
284
+ type: "text_end",
285
+ contentIndex: blockIndex(),
286
+ content: currentBlock.text,
287
+ partial: output,
288
+ });
289
+ currentBlock = null;
290
+ } else if (item.type === "function_call") {
291
+ const args =
292
+ currentBlock?.type === "toolCall" && currentBlock.partialJson
293
+ ? JSON.parse(currentBlock.partialJson)
294
+ : JSON.parse(item.arguments);
295
+ const toolCall: ToolCall = {
296
+ type: "toolCall",
297
+ id: `${item.call_id}|${item.id}`,
298
+ name: item.name,
299
+ arguments: args,
300
+ };
301
+ currentBlock = null;
302
+ stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
303
+ }
304
+ }
305
+ // Handle completion
306
+ else if (event.type === "response.completed") {
307
+ const response = event.response;
308
+ if (response?.usage) {
309
+ const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
310
+ output.usage = {
311
+ // OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
312
+ input: (response.usage.input_tokens || 0) - cachedTokens,
313
+ output: response.usage.output_tokens || 0,
314
+ cacheRead: cachedTokens,
315
+ cacheWrite: 0,
316
+ totalTokens: response.usage.total_tokens || 0,
317
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
318
+ };
319
+ }
320
+ calculateCost(model, output.usage);
321
+ // Map status to stop reason
322
+ output.stopReason = mapStopReason(response?.status);
323
+ if (output.content.some(b => b.type === "toolCall") && output.stopReason === "stop") {
324
+ output.stopReason = "toolUse";
325
+ }
326
+ }
327
+ // Handle errors
328
+ else if (event.type === "error") {
329
+ throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error");
330
+ } else if (event.type === "response.failed") {
331
+ throw new Error("Unknown error");
332
+ }
333
+ }
334
+
335
+ if (options?.signal?.aborted) {
336
+ throw new Error("Request was aborted");
337
+ }
338
+
339
+ if (output.stopReason === "aborted" || output.stopReason === "error") {
340
+ throw new Error("An unkown error ocurred");
341
+ }
342
+
343
+ output.duration = Date.now() - startTime;
344
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
345
+ stream.push({ type: "done", reason: output.stopReason, message: output });
346
+ stream.end();
347
+ } catch (error) {
348
+ for (const block of output.content) delete (block as { index?: number }).index;
349
+ output.stopReason = options?.signal?.aborted ? "aborted" : "error";
350
+ output.errorMessage = formatErrorMessageWithRetryAfter(error);
351
+ output.duration = Date.now() - startTime;
352
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
353
+ stream.push({ type: "error", reason: output.stopReason, error: output });
354
+ stream.end();
355
+ }
356
+ })();
357
+
358
+ return stream;
359
+ };
360
+
361
+ function normalizeAzureBaseUrl(baseUrl: string): string {
362
+ return baseUrl.replace(/\/+$/, "");
363
+ }
364
+
365
+ function buildDefaultBaseUrl(resourceName: string): string {
366
+ return `https://${resourceName}.openai.azure.com/openai/v1`;
367
+ }
368
+
369
+ function resolveAzureConfig(
370
+ model: Model<"azure-openai-responses">,
371
+ options?: AzureOpenAIResponsesOptions,
372
+ ): { baseUrl: string; apiVersion: string } {
373
+ const apiVersion = options?.azureApiVersion || process.env.AZURE_OPENAI_API_VERSION || DEFAULT_AZURE_API_VERSION;
374
+
375
+ const baseUrl = options?.azureBaseUrl?.trim() || process.env.AZURE_OPENAI_BASE_URL?.trim() || undefined;
376
+ const resourceName = options?.azureResourceName || process.env.AZURE_OPENAI_RESOURCE_NAME;
377
+
378
+ let resolvedBaseUrl = baseUrl;
379
+
380
+ if (!resolvedBaseUrl && resourceName) {
381
+ resolvedBaseUrl = buildDefaultBaseUrl(resourceName);
382
+ }
383
+
384
+ if (!resolvedBaseUrl && model.baseUrl) {
385
+ resolvedBaseUrl = model.baseUrl;
386
+ }
387
+
388
+ if (!resolvedBaseUrl) {
389
+ throw new Error(
390
+ "Azure OpenAI base URL is required. Set AZURE_OPENAI_BASE_URL or AZURE_OPENAI_RESOURCE_NAME, or pass azureBaseUrl, azureResourceName, or model.baseUrl.",
391
+ );
392
+ }
393
+
394
+ return {
395
+ baseUrl: normalizeAzureBaseUrl(resolvedBaseUrl),
396
+ apiVersion,
397
+ };
398
+ }
399
+
400
+ function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
401
+ if (!apiKey) {
402
+ if (!process.env.AZURE_OPENAI_API_KEY) {
403
+ throw new Error(
404
+ "Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument.",
405
+ );
406
+ }
407
+ apiKey = process.env.AZURE_OPENAI_API_KEY;
408
+ }
409
+
410
+ const headers = { ...(model.headers ?? {}) };
411
+
412
+ if (options?.headers) {
413
+ Object.assign(headers, options.headers);
414
+ }
415
+
416
+ const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
417
+
418
+ return new AzureOpenAI({
419
+ apiKey,
420
+ apiVersion,
421
+ dangerouslyAllowBrowser: true,
422
+ defaultHeaders: headers,
423
+ baseURL: baseUrl,
424
+ });
425
+ }
426
+
427
+ function buildParams(
428
+ model: Model<"azure-openai-responses">,
429
+ context: Context,
430
+ options: AzureOpenAIResponsesOptions | undefined,
431
+ deploymentName: string,
432
+ ) {
433
+ const messages = convertMessages(model, context, true);
434
+
435
+ const params: ResponseCreateParamsStreaming = {
436
+ model: deploymentName,
437
+ input: messages,
438
+ stream: true,
439
+ prompt_cache_key: options?.sessionId,
440
+ };
441
+
442
+ if (options?.maxTokens) {
443
+ params.max_output_tokens = options?.maxTokens;
444
+ }
445
+
446
+ if (options?.temperature !== undefined) {
447
+ params.temperature = options?.temperature;
448
+ }
449
+
450
+ if (context.tools) {
451
+ params.tools = convertTools(context.tools);
452
+ }
453
+
454
+ if (model.reasoning) {
455
+ if (options?.reasoningEffort || options?.reasoningSummary) {
456
+ params.reasoning = {
457
+ effort: options?.reasoningEffort || "medium",
458
+ summary: options?.reasoningSummary || "auto",
459
+ };
460
+ params.include = ["reasoning.encrypted_content"];
461
+ } else {
462
+ if (model.name.toLowerCase().startsWith("gpt-5")) {
463
+ // Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
464
+ messages.push({
465
+ role: "developer",
466
+ content: [
467
+ {
468
+ type: "input_text",
469
+ text: "# Juice: 0 !important",
470
+ },
471
+ ],
472
+ });
473
+ }
474
+ }
475
+ }
476
+
477
+ return params;
478
+ }
479
+
480
+ function normalizeResponsesToolCallId(id: string): { callId: string; itemId: string } {
481
+ const [callId, itemId] = id.split("|");
482
+ if (callId && itemId) {
483
+ return { callId, itemId };
484
+ }
485
+ const hash = Bun.hash.xxHash64(id).toString(36);
486
+ return { callId: `call_${hash}`, itemId: `item_${hash}` };
487
+ }
488
+
489
+ function convertMessages(
490
+ model: Model<"azure-openai-responses">,
491
+ context: Context,
492
+ strictResponsesPairing: boolean,
493
+ ): ResponseInput {
494
+ const messages: ResponseInput = [];
495
+ const knownCallIds = new Set<string>();
496
+
497
+ const transformedMessages = transformMessages(context.messages, model);
498
+
499
+ if (context.systemPrompt) {
500
+ const role = model.reasoning ? "developer" : "system";
501
+ messages.push({
502
+ role,
503
+ content: sanitizeSurrogates(context.systemPrompt),
504
+ });
505
+ }
506
+
507
+ let msgIndex = 0;
508
+ for (const msg of transformedMessages) {
509
+ if (msg.role === "user") {
510
+ if (typeof msg.content === "string") {
511
+ // Skip empty user messages
512
+ if (!msg.content || msg.content.trim() === "") continue;
513
+ messages.push({
514
+ role: "user",
515
+ content: [{ type: "input_text", text: sanitizeSurrogates(msg.content) }],
516
+ });
517
+ } else {
518
+ const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
519
+ if (item.type === "text") {
520
+ return {
521
+ type: "input_text",
522
+ text: sanitizeSurrogates(item.text),
523
+ } satisfies ResponseInputText;
524
+ }
525
+ return {
526
+ type: "input_image",
527
+ detail: "auto",
528
+ image_url: `data:${item.mimeType};base64,${item.data}`,
529
+ } satisfies ResponseInputImage;
530
+ });
531
+ // Filter out images if model doesn't support them, and empty text blocks
532
+ let filteredContent = !model.input.includes("image")
533
+ ? content.filter(c => c.type !== "input_image")
534
+ : content;
535
+ filteredContent = filteredContent.filter(c => {
536
+ if (c.type === "input_text") {
537
+ return c.text.trim().length > 0;
538
+ }
539
+ return true; // Keep non-text content (images)
540
+ });
541
+ if (filteredContent.length === 0) continue;
542
+ messages.push({
543
+ role: "user",
544
+ content: filteredContent,
545
+ });
546
+ }
547
+ } else if (msg.role === "assistant") {
548
+ const output: ResponseInput = [];
549
+ const assistantMsg = msg as AssistantMessage;
550
+
551
+ // Check if this message is from a different model (same provider, different model ID).
552
+ // For such messages, tool call IDs with fc_ prefix need to be stripped to avoid
553
+ // OpenAI's reasoning/function_call pairing validation errors.
554
+ const isDifferentModel =
555
+ assistantMsg.model !== model.id &&
556
+ assistantMsg.provider === model.provider &&
557
+ assistantMsg.api === model.api;
558
+
559
+ for (const block of msg.content) {
560
+ // Do not submit thinking blocks if the completion had an error (i.e. abort)
561
+ if (block.type === "thinking" && msg.stopReason !== "error") {
562
+ if (block.thinkingSignature) {
563
+ const reasoningItem = JSON.parse(block.thinkingSignature);
564
+ output.push(reasoningItem);
565
+ }
566
+ } else if (block.type === "text") {
567
+ const textBlock = block as TextContent;
568
+ // OpenAI requires id to be max 64 characters
569
+ let msgId = textBlock.textSignature;
570
+ if (!msgId) {
571
+ msgId = `msg_${msgIndex}`;
572
+ } else if (msgId.length > 64) {
573
+ msgId = `msg_${Bun.hash.xxHash64(msgId).toString(36)}`;
574
+ }
575
+ output.push({
576
+ type: "message",
577
+ role: "assistant",
578
+ content: [{ type: "output_text", text: sanitizeSurrogates(textBlock.text), annotations: [] }],
579
+ status: "completed",
580
+ id: msgId,
581
+ } satisfies ResponseOutputMessage);
582
+ // Do not submit toolcall blocks if the completion had an error (i.e. abort)
583
+ } else if (block.type === "toolCall" && msg.stopReason !== "error") {
584
+ const toolCall = block as ToolCall;
585
+ const normalized = normalizeResponsesToolCallId(toolCall.id);
586
+ const callId = normalized.callId;
587
+ // For different-model messages, set id to undefined to avoid pairing validation.
588
+ // OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items.
589
+ // By omitting the id, we avoid triggering that validation (like cross-provider does).
590
+ let itemId: string | undefined = normalized.itemId;
591
+ if (isDifferentModel && itemId?.startsWith("fc_")) {
592
+ itemId = undefined;
593
+ }
594
+ knownCallIds.add(normalized.callId);
595
+ output.push({
596
+ type: "function_call",
597
+ id: itemId,
598
+ call_id: callId,
599
+ name: toolCall.name,
600
+ arguments: JSON.stringify(toolCall.arguments),
601
+ });
602
+ }
603
+ }
604
+ if (output.length === 0) continue;
605
+ messages.push(...output);
606
+ } else if (msg.role === "toolResult") {
607
+ // Extract text and image content
608
+ const textResult = msg.content
609
+ .filter(c => c.type === "text")
610
+ .map(c => (c as { text: string }).text)
611
+ .join("\n");
612
+ const hasImages = msg.content.some(c => c.type === "image");
613
+ const normalized = normalizeResponsesToolCallId(msg.toolCallId);
614
+ if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
615
+ continue;
616
+ }
617
+
618
+ // Always send function_call_output with text (or placeholder if only images)
619
+ const hasText = textResult.length > 0;
620
+ messages.push({
621
+ type: "function_call_output",
622
+ call_id: normalized.callId,
623
+ output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
624
+ });
625
+
626
+ // If there are images and model supports them, send a follow-up user message with images
627
+ if (hasImages && model.input.includes("image")) {
628
+ const contentParts: ResponseInputContent[] = [];
629
+
630
+ // Add text prefix
631
+ contentParts.push({
632
+ type: "input_text",
633
+ text: "Attached image(s) from tool result:",
634
+ } satisfies ResponseInputText);
635
+
636
+ // Add images
637
+ for (const block of msg.content) {
638
+ if (block.type === "image") {
639
+ contentParts.push({
640
+ type: "input_image",
641
+ detail: "auto",
642
+ image_url: `data:${(block as ImageContent).mimeType};base64,${(block as ImageContent).data}`,
643
+ } satisfies ResponseInputImage);
644
+ }
645
+ }
646
+
647
+ messages.push({
648
+ role: "user",
649
+ content: contentParts,
650
+ });
651
+ }
652
+ }
653
+ msgIndex++;
654
+ }
655
+
656
+ return messages;
657
+ }
658
+
659
+ function convertTools(tools: Tool[]): OpenAITool[] {
660
+ return tools.map(tool => ({
661
+ type: "function",
662
+ name: tool.name,
663
+ description: tool.description,
664
+ parameters: tool.parameters as Record<string, unknown>,
665
+ strict: false,
666
+ }));
667
+ }
668
+
669
+ function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
670
+ if (!status) return "stop";
671
+ switch (status) {
672
+ case "completed":
673
+ return "stop";
674
+ case "incomplete":
675
+ return "length";
676
+ case "failed":
677
+ case "cancelled":
678
+ return "error";
679
+ // These two are wonky ...
680
+ case "in_progress":
681
+ case "queued":
682
+ return "stop";
683
+ default: {
684
+ const _exhaustive: never = status;
685
+ throw new Error(`Unhandled stop reason: ${_exhaustive}`);
686
+ }
687
+ }
688
+ }
@@ -179,7 +179,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
179
179
  type: "toolCall",
180
180
  id: toolCallId,
181
181
  name: part.functionCall.name || "",
182
- arguments: part.functionCall.args as Record<string, any>,
182
+ arguments: (part.functionCall.args ?? {}) as Record<string, any>,
183
183
  ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
184
184
  };
185
185
 
@@ -74,26 +74,12 @@ const CODEX_MAX_RETRIES = 2;
74
74
  const CODEX_RETRYABLE_STATUS = new Set([408, 429, 500, 502, 503, 504]);
75
75
  const CODEX_RETRY_DELAY_MS = 500;
76
76
 
77
- /** Fast deterministic hash to shorten long strings */
78
- function shortHash(str: string): string {
79
- let h1 = 0xdeadbeef;
80
- let h2 = 0x41c6ce57;
81
- for (let i = 0; i < str.length; i++) {
82
- const ch = str.charCodeAt(i);
83
- h1 = Math.imul(h1 ^ ch, 2654435761);
84
- h2 = Math.imul(h2 ^ ch, 1597334677);
85
- }
86
- h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
87
- h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
88
- return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
89
- }
90
-
91
77
  function normalizeResponsesToolCallId(id: string): { callId: string; itemId: string } {
92
78
  const [callId, itemId] = id.split("|");
93
79
  if (callId && itemId) {
94
80
  return { callId, itemId };
95
81
  }
96
- const hash = shortHash(id);
82
+ const hash = Bun.hash.xxHash64(id).toString(36);
97
83
  return { callId: `call_${hash}`, itemId: `item_${hash}` };
98
84
  }
99
85
 
@@ -298,6 +284,9 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
298
284
  }
299
285
  } else if (eventType === "response.output_text.delta") {
300
286
  if (currentItem && currentItem.type === "message" && currentBlock?.type === "text") {
287
+ if (!currentItem.content || currentItem.content.length === 0) {
288
+ continue;
289
+ }
301
290
  const lastPart = currentItem.content[currentItem.content.length - 1];
302
291
  if (lastPart && lastPart.type === "output_text") {
303
292
  const delta = (rawEvent as { delta?: string }).delta || "";
@@ -313,6 +302,9 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
313
302
  }
314
303
  } else if (eventType === "response.refusal.delta") {
315
304
  if (currentItem && currentItem.type === "message" && currentBlock?.type === "text") {
305
+ if (!currentItem.content || currentItem.content.length === 0) {
306
+ continue;
307
+ }
316
308
  const lastPart = currentItem.content[currentItem.content.length - 1];
317
309
  if (lastPart && lastPart.type === "refusal") {
318
310
  const delta = (rawEvent as { delta?: string }).delta || "";
@@ -338,6 +330,14 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
338
330
  partial: output,
339
331
  });
340
332
  }
333
+ } else if (eventType === "response.function_call_arguments.done") {
334
+ if (currentItem?.type === "function_call" && currentBlock?.type === "toolCall") {
335
+ const args = (rawEvent as { arguments?: string }).arguments;
336
+ if (typeof args === "string") {
337
+ currentBlock.partialJson = args;
338
+ currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
339
+ }
340
+ }
341
341
  } else if (eventType === "response.output_item.done") {
342
342
  const item = rawEvent.item as ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall;
343
343
  if (item.type === "reasoning" && currentBlock?.type === "thinking") {
@@ -622,7 +622,7 @@ function convertMessages(model: Model<"openai-codex-responses">, context: Contex
622
622
  if (!msgId) {
623
623
  msgId = `msg_${msgIndex}`;
624
624
  } else if (msgId.length > 64) {
625
- msgId = `msg_${shortHash(msgId)}`;
625
+ msgId = `msg_${Bun.hash.xxHash64(msgId).toString(36)}`;
626
626
  }
627
627
  output.push({
628
628
  type: "message",
@@ -50,6 +50,10 @@ function normalizeMistralToolId(id: string, isMistral: boolean): string {
50
50
  return normalized;
51
51
  }
52
52
 
53
+ type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting">> & {
54
+ openRouterRouting?: OpenAICompat["openRouterRouting"];
55
+ };
56
+
53
57
  /**
54
58
  * Check if conversation messages contain tool calls or tool results.
55
59
  * This is needed because Anthropic (via proxy) requires the tools param
@@ -429,6 +433,11 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
429
433
  params.reasoning_effort = options.reasoningEffort;
430
434
  }
431
435
 
436
+ // OpenRouter provider routing preferences
437
+ if (model.baseUrl.includes("openrouter.ai") && compat.openRouterRouting) {
438
+ (params as { provider?: unknown }).provider = compat.openRouterRouting;
439
+ }
440
+
432
441
  return params;
433
442
  }
434
443
 
@@ -468,7 +477,7 @@ function maybeAddOpenRouterAnthropicCacheControl(
468
477
  export function convertMessages(
469
478
  model: Model<"openai-completions">,
470
479
  context: Context,
471
- compat: Required<OpenAICompat>,
480
+ compat: ResolvedOpenAICompat,
472
481
  ): ChatCompletionMessageParam[] {
473
482
  const params: ChatCompletionMessageParam[] = [];
474
483
 
@@ -718,7 +727,7 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto
718
727
  * Provider takes precedence over URL-based detection since it's explicitly configured.
719
728
  * Returns a fully resolved OpenAICompat object with all fields set.
720
729
  */
721
- function detectCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
730
+ function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
722
731
  const provider = model.provider;
723
732
  const baseUrl = model.baseUrl;
724
733
 
@@ -753,6 +762,7 @@ function detectCompat(model: Model<"openai-completions">): Required<OpenAICompat
753
762
  requiresThinkingAsText: isMistral,
754
763
  requiresMistralToolIds: isMistral,
755
764
  thinkingFormat: isZai ? "zai" : "openai",
765
+ openRouterRouting: undefined,
756
766
  };
757
767
  }
758
768
 
@@ -760,7 +770,7 @@ function detectCompat(model: Model<"openai-completions">): Required<OpenAICompat
760
770
  * Get resolved compatibility settings for a model.
761
771
  * Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
762
772
  */
763
- function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
773
+ function getCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
764
774
  const detected = detectCompat(model);
765
775
  if (!model.compat) return detected;
766
776
 
@@ -776,5 +786,6 @@ function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
776
786
  requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
777
787
  requiresMistralToolIds: model.compat.requiresMistralToolIds ?? detected.requiresMistralToolIds,
778
788
  thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
789
+ openRouterRouting: model.compat.openRouterRouting ?? detected.openRouterRouting,
779
790
  };
780
791
  }
@@ -31,20 +31,6 @@ import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
31
31
  import { sanitizeSurrogates } from "../utils/sanitize-unicode";
32
32
  import { transformMessages } from "./transform-messages";
33
33
 
34
- /** Fast deterministic hash to shorten long strings */
35
- function shortHash(str: string): string {
36
- let h1 = 0xdeadbeef;
37
- let h2 = 0x41c6ce57;
38
- for (let i = 0; i < str.length; i++) {
39
- const ch = str.charCodeAt(i);
40
- h1 = Math.imul(h1 ^ ch, 2654435761);
41
- h2 = Math.imul(h2 ^ ch, 1597334677);
42
- }
43
- h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
44
- h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
45
- return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
46
- }
47
-
48
34
  // OpenAI Responses-specific options
49
35
  export interface OpenAIResponsesOptions extends StreamOptions {
50
36
  reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
@@ -195,6 +181,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
195
181
  }
196
182
  } else if (event.type === "response.output_text.delta") {
197
183
  if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
184
+ if (!currentItem.content || currentItem.content.length === 0) {
185
+ continue;
186
+ }
198
187
  const lastPart = currentItem.content[currentItem.content.length - 1];
199
188
  if (lastPart && lastPart.type === "output_text") {
200
189
  currentBlock.text += event.delta;
@@ -209,6 +198,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
209
198
  }
210
199
  } else if (event.type === "response.refusal.delta") {
211
200
  if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
201
+ if (!currentItem.content || currentItem.content.length === 0) {
202
+ continue;
203
+ }
212
204
  const lastPart = currentItem.content[currentItem.content.length - 1];
213
205
  if (lastPart && lastPart.type === "refusal") {
214
206
  currentBlock.text += event.delta;
@@ -449,7 +441,7 @@ function normalizeResponsesToolCallId(id: string): { callId: string; itemId: str
449
441
  if (callId && itemId) {
450
442
  return { callId, itemId };
451
443
  }
452
- const hash = shortHash(id);
444
+ const hash = Bun.hash.xxHash64(id).toString(36);
453
445
  return { callId: `call_${hash}`, itemId: `item_${hash}` };
454
446
  }
455
447
 
@@ -542,7 +534,7 @@ function convertMessages(
542
534
  if (!msgId) {
543
535
  msgId = `msg_${msgIndex}`;
544
536
  } else if (msgId.length > 64) {
545
- msgId = `msg_${shortHash(msgId)}`;
537
+ msgId = `msg_${Bun.hash.xxHash64(msgId).toString(36)}`;
546
538
  }
547
539
  output.push({
548
540
  type: "message",
@@ -9,26 +9,12 @@ function normalizeToolCallId(id: string): string {
9
9
  return id.replace(/[^a-zA-Z0-9_-]/g, "").slice(0, 40);
10
10
  }
11
11
 
12
- /** Fast deterministic hash to shorten long strings */
13
- function shortHash(str: string): string {
14
- let h1 = 0xdeadbeef;
15
- let h2 = 0x41c6ce57;
16
- for (let i = 0; i < str.length; i++) {
17
- const ch = str.charCodeAt(i);
18
- h1 = Math.imul(h1 ^ ch, 2654435761);
19
- h2 = Math.imul(h2 ^ ch, 1597334677);
20
- }
21
- h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
22
- h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
23
- return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
24
- }
25
-
26
12
  function normalizeResponsesToolCallId(id: string): string {
27
13
  const [callId, itemId] = id.split("|");
28
14
  if (callId && itemId) {
29
15
  return id;
30
16
  }
31
- const hash = shortHash(id);
17
+ const hash = Bun.hash.xxHash64(id).toString(36);
32
18
  return `call_${hash}|item_${hash}`;
33
19
  }
34
20
 
@@ -36,7 +22,10 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
36
22
  // Build a map of original tool call IDs to normalized IDs for github-copilot cross-API switches
37
23
  const toolCallIdMap = new Map<string, string>();
38
24
  const skippedToolCallIds = new Set<string>();
39
- const needsResponsesToolCallIds = model.api === "openai-responses" || model.api === "openai-codex-responses";
25
+ const needsResponsesToolCallIds =
26
+ model.api === "openai-responses" ||
27
+ model.api === "openai-codex-responses" ||
28
+ model.api === "azure-openai-responses";
40
29
 
41
30
  // First pass: transform messages (thinking blocks, tool call ID normalization)
42
31
  const transformed = messages.flatMap<Message>((msg): Message[] => {
package/src/stream.ts CHANGED
@@ -4,6 +4,7 @@ import * as path from "node:path";
4
4
  import { supportsXhigh } from "./models";
5
5
  import { type BedrockOptions, streamBedrock } from "./providers/amazon-bedrock";
6
6
  import { type AnthropicOptions, streamAnthropic } from "./providers/anthropic";
7
+ import { type AzureOpenAIResponsesOptions, streamAzureOpenAIResponses } from "./providers/azure-openai-responses";
7
8
  import { type CursorOptions, streamCursor } from "./providers/cursor";
8
9
  import { type GoogleOptions, streamGoogle } from "./providers/google";
9
10
  import {
@@ -108,6 +109,7 @@ export function getEnvApiKey(provider: any): string | undefined {
108
109
  minimax: "MINIMAX_API_KEY",
109
110
  opencode: "OPENCODE_API_KEY",
110
111
  cursor: "CURSOR_ACCESS_TOKEN",
112
+ "azure-openai-responses": "AZURE_OPENAI_API_KEY",
111
113
  };
112
114
 
113
115
  const envVar = envMap[provider];
@@ -144,6 +146,9 @@ export function stream<TApi extends Api>(
144
146
  case "openai-responses":
145
147
  return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
146
148
 
149
+ case "azure-openai-responses":
150
+ return streamAzureOpenAIResponses(model as Model<"azure-openai-responses">, context, providerOptions as any);
151
+
147
152
  case "openai-codex-responses":
148
153
  return streamOpenAICodexResponses(model as Model<"openai-codex-responses">, context, providerOptions as any);
149
154
 
@@ -345,6 +350,12 @@ function mapOptionsForApi<TApi extends Api>(
345
350
  reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
346
351
  } satisfies OpenAIResponsesOptions;
347
352
 
353
+ case "azure-openai-responses":
354
+ return {
355
+ ...base,
356
+ reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
357
+ } satisfies AzureOpenAIResponsesOptions;
358
+
348
359
  case "openai-codex-responses":
349
360
  return {
350
361
  ...base,
package/src/types.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  import type { TSchema } from "@sinclair/typebox";
2
2
  import type { BedrockOptions } from "./providers/amazon-bedrock";
3
3
  import type { AnthropicOptions } from "./providers/anthropic";
4
+ import type { AzureOpenAIResponsesOptions } from "./providers/azure-openai-responses";
4
5
  import type { CursorOptions } from "./providers/cursor";
5
6
  import type {
6
7
  DeleteArgs,
@@ -33,6 +34,7 @@ export type Api =
33
34
  | "openai-completions"
34
35
  | "openai-responses"
35
36
  | "openai-codex-responses"
37
+ | "azure-openai-responses"
36
38
  | "anthropic-messages"
37
39
  | "bedrock-converse-stream"
38
40
  | "google-generative-ai"
@@ -46,6 +48,7 @@ export interface ApiOptionsMap {
46
48
  "openai-completions": OpenAICompletionsOptions;
47
49
  "openai-responses": OpenAIResponsesOptions;
48
50
  "openai-codex-responses": OpenAICodexResponsesOptions;
51
+ "azure-openai-responses": AzureOpenAIResponsesOptions;
49
52
  "google-generative-ai": GoogleOptions;
50
53
  "google-gemini-cli": GoogleGeminiCliOptions;
51
54
  "google-vertex": GoogleVertexOptions;
@@ -289,6 +292,20 @@ export interface OpenAICompat {
289
292
  requiresMistralToolIds?: boolean;
290
293
  /** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "zai" uses thinking: { type: "enabled" }. Default: "openai". */
291
294
  thinkingFormat?: "openai" | "zai";
295
+ /** OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. */
296
+ openRouterRouting?: OpenRouterRouting;
297
+ }
298
+
299
+ /**
300
+ * OpenRouter provider routing preferences.
301
+ * Controls which upstream providers OpenRouter routes requests to.
302
+ * @see https://openrouter.ai/docs/provider-routing
303
+ */
304
+ export interface OpenRouterRouting {
305
+ /** List of provider slugs to exclusively use for this request (e.g., ["amazon-bedrock", "anthropic"]). */
306
+ only?: string[];
307
+ /** List of provider slugs to try in order (e.g., ["anthropic", "openai"]). */
308
+ order?: string[];
292
309
  }
293
310
 
294
311
  // Model interface for the unified model system