npm - @oh-my-pi/pi-ai - Versions diffs - 8.2.2 → 8.4.0 - Mend

@oh-my-pi/pi-ai 8.2.2 → 8.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +2 -2
package/src/index.ts +1 -0
package/src/models.generated.ts +4 -38
package/src/providers/azure-openai-responses.ts +688 -0
package/src/providers/google.ts +1 -1
package/src/providers/openai-codex-responses.ts +16 -16
package/src/providers/openai-completions.ts +14 -3
package/src/providers/openai-responses.ts +8 -16
package/src/providers/transform-messages.ts +5 -16
package/src/stream.ts +11 -0
package/src/types.ts +17 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@oh-my-pi/pi-ai",
-	"version": "8.2.2",
+	"version": "8.4.0",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./src/index.ts",
@@ -56,7 +56,7 @@
 		"test": "bun test"
 	},
 	"dependencies": {
-		"@oh-my-pi/pi-utils": "8.2.2",
+		"@oh-my-pi/pi-utils": "8.4.0",
 		"@anthropic-ai/sdk": "^0.71.2",
 		"@aws-sdk/client-bedrock-runtime": "^3.975.0",
 		"@bufbuild/protobuf": "^2.10.2",

package/src/index.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import "./utils/migrate-env";
 export * from "./models";
 export * from "./providers/anthropic";
+export * from "./providers/azure-openai-responses";
 export * from "./providers/cursor";
 export * from "./providers/google";
 export * from "./providers/google-gemini-cli";

package/src/models.generated.ts CHANGED Viewed

@@ -4092,40 +4092,6 @@ export const MODELS = {
 		} satisfies Model<"openai-codex-responses">,
 	},
 	"opencode": {
-		"alpha-gd4": {
-			id: "alpha-gd4",
-			name: "Alpha GD4",
-			api: "anthropic-messages",
-			provider: "opencode",
-			baseUrl: "https://opencode.ai/zen",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0.5,
-				output: 2,
-				cacheRead: 0.15,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 32768,
-		} satisfies Model<"anthropic-messages">,
-		"alpha-glm-4.7": {
-			id: "alpha-glm-4.7",
-			name: "Alpha GLM-4.7",
-			api: "openai-completions",
-			provider: "opencode",
-			baseUrl: "https://opencode.ai/zen/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0.6,
-				output: 2.2,
-				cacheRead: 0.6,
-				cacheWrite: 0,
-			},
-			contextWindow: 204800,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
 		"big-pickle": {
 			id: "big-pickle",
 			name: "Big Pickle",
@@ -5310,10 +5276,10 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text", "image"],
 			cost: {
-				input: 0.09999999999999999,
-				output: 0.39999999999999997,
-				cacheRead: 0.024999999999999998,
-				cacheWrite: 0.08333333333333334,
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
 			},
 			contextWindow: 1048576,
 			maxTokens: 8192,

package/src/providers/azure-openai-responses.ts ADDED Viewed

@@ -0,0 +1,688 @@
+import type OpenAI from "openai";
+import { AzureOpenAI } from "openai";
+import type {
+	Tool as OpenAITool,
+	ResponseCreateParamsStreaming,
+	ResponseFunctionToolCall,
+	ResponseInput,
+	ResponseInputContent,
+	ResponseInputImage,
+	ResponseInputText,
+	ResponseOutputMessage,
+	ResponseReasoningItem,
+} from "openai/resources/responses/responses";
+import { calculateCost } from "../models";
+import { getEnvApiKey } from "../stream";
+import type {
+	Api,
+	AssistantMessage,
+	Context,
+	ImageContent,
+	Model,
+	StopReason,
+	StreamFunction,
+	StreamOptions,
+	TextContent,
+	ThinkingContent,
+	Tool,
+	ToolCall,
+} from "../types";
+import { AssistantMessageEventStream } from "../utils/event-stream";
+import { parseStreamingJson } from "../utils/json-parse";
+import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode";
+import { transformMessages } from "./transform-messages";
+const DEFAULT_AZURE_API_VERSION = "v1";
+function parseDeploymentNameMap(value: string | undefined): Map<string, string> {
+	const map = new Map<string, string>();
+	if (!value) return map;
+	for (const entry of value.split(",")) {
+		const trimmed = entry.trim();
+		if (!trimmed) continue;
+		const [modelId, deploymentName] = trimmed.split("=", 2);
+		if (!modelId || !deploymentName) continue;
+		map.set(modelId.trim(), deploymentName.trim());
+	}
+	return map;
+}
+function resolveDeploymentName(model: Model<"azure-openai-responses">, options?: AzureOpenAIResponsesOptions): string {
+	if (options?.azureDeploymentName) {
+		return options.azureDeploymentName;
+	}
+	const mappedDeployment = parseDeploymentNameMap(process.env.AZURE_OPENAI_DEPLOYMENT_NAME_MAP).get(model.id);
+	return mappedDeployment || model.id;
+}
+// Azure OpenAI Responses-specific options
+export interface AzureOpenAIResponsesOptions extends StreamOptions {
+	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
+	reasoningSummary?: "auto" | "detailed" | "concise" | null;
+	azureApiVersion?: string;
+	azureResourceName?: string;
+	azureBaseUrl?: string;
+	azureDeploymentName?: string;
+}
+/**
+ * Generate function for Azure OpenAI Responses API
+ */
+export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"> = (
+	model: Model<"azure-openai-responses">,
+	context: Context,
+	options?: AzureOpenAIResponsesOptions,
+): AssistantMessageEventStream => {
+	const stream = new AssistantMessageEventStream();
+	// Start async processing
+	(async () => {
+		const startTime = Date.now();
+		let firstTokenTime: number | undefined;
+		const deploymentName = resolveDeploymentName(model, options);
+		const output: AssistantMessage = {
+			role: "assistant",
+			content: [],
+			api: "azure-openai-responses" as Api,
+			provider: model.provider,
+			model: model.id,
+			usage: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+				totalTokens: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "stop",
+			timestamp: Date.now(),
+		};
+		try {
+			// Create Azure OpenAI client
+			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
+			const client = createClient(model, apiKey, options);
+			const params = buildParams(model, context, options, deploymentName);
+			options?.onPayload?.(params);
+			const openaiStream = await client.responses.create(
+				params,
+				options?.signal ? { signal: options.signal } : undefined,
+			);
+			stream.push({ type: "start", partial: output });
+			let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
+			let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
+			const blocks = output.content;
+			const blockIndex = () => blocks.length - 1;
+			for await (const event of openaiStream) {
+				// Handle output item start
+				if (event.type === "response.output_item.added") {
+					if (!firstTokenTime) firstTokenTime = Date.now();
+					const item = event.item;
+					if (item.type === "reasoning") {
+						currentItem = item;
+						currentBlock = { type: "thinking", thinking: "" };
+						output.content.push(currentBlock);
+						stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+					} else if (item.type === "message") {
+						currentItem = item;
+						currentBlock = { type: "text", text: "" };
+						output.content.push(currentBlock);
+						stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
+					} else if (item.type === "function_call") {
+						currentItem = item;
+						currentBlock = {
+							type: "toolCall",
+							id: `${item.call_id}|${item.id}`,
+							name: item.name,
+							arguments: {},
+							partialJson: item.arguments || "",
+						};
+						output.content.push(currentBlock);
+						stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
+					}
+				}
+				// Handle reasoning summary deltas
+				else if (event.type === "response.reasoning_summary_part.added") {
+					if (currentItem && currentItem.type === "reasoning") {
+						currentItem.summary = currentItem.summary || [];
+						currentItem.summary.push(event.part);
+					}
+				} else if (event.type === "response.reasoning_summary_text.delta") {
+					if (
+						currentItem &&
+						currentItem.type === "reasoning" &&
+						currentBlock &&
+						currentBlock.type === "thinking"
+					) {
+						currentItem.summary = currentItem.summary || [];
+						const lastPart = currentItem.summary[currentItem.summary.length - 1];
+						if (lastPart) {
+							currentBlock.thinking += event.delta;
+							lastPart.text += event.delta;
+							stream.push({
+								type: "thinking_delta",
+								contentIndex: blockIndex(),
+								delta: event.delta,
+								partial: output,
+							});
+						}
+					}
+				}
+				// Add a new line between summary parts (hack...)
+				else if (event.type === "response.reasoning_summary_part.done") {
+					if (
+						currentItem &&
+						currentItem.type === "reasoning" &&
+						currentBlock &&
+						currentBlock.type === "thinking"
+					) {
+						currentItem.summary = currentItem.summary || [];
+						const lastPart = currentItem.summary[currentItem.summary.length - 1];
+						if (lastPart) {
+							currentBlock.thinking += "\n\n";
+							lastPart.text += "\n\n";
+							stream.push({
+								type: "thinking_delta",
+								contentIndex: blockIndex(),
+								delta: "\n\n",
+								partial: output,
+							});
+						}
+					}
+				}
+				// Handle text output deltas
+				else if (event.type === "response.content_part.added") {
+					if (currentItem && currentItem.type === "message") {
+						currentItem.content = currentItem.content || [];
+						// Filter out ReasoningText, only accept output_text and refusal
+						if (event.part.type === "output_text" || event.part.type === "refusal") {
+							currentItem.content.push(event.part);
+						}
+					}
+				} else if (event.type === "response.output_text.delta") {
+					if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
+						if (!currentItem.content || currentItem.content.length === 0) {
+							continue;
+						}
+						const lastPart = currentItem.content[currentItem.content.length - 1];
+						if (lastPart && lastPart.type === "output_text") {
+							currentBlock.text += event.delta;
+							lastPart.text += event.delta;
+							stream.push({
+								type: "text_delta",
+								contentIndex: blockIndex(),
+								delta: event.delta,
+								partial: output,
+							});
+						}
+					}
+				} else if (event.type === "response.refusal.delta") {
+					if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
+						if (!currentItem.content || currentItem.content.length === 0) {
+							continue;
+						}
+						const lastPart = currentItem.content[currentItem.content.length - 1];
+						if (lastPart && lastPart.type === "refusal") {
+							currentBlock.text += event.delta;
+							lastPart.refusal += event.delta;
+							stream.push({
+								type: "text_delta",
+								contentIndex: blockIndex(),
+								delta: event.delta,
+								partial: output,
+							});
+						}
+					}
+				}
+				// Handle function call argument deltas
+				else if (event.type === "response.function_call_arguments.delta") {
+					if (
+						currentItem &&
+						currentItem.type === "function_call" &&
+						currentBlock &&
+						currentBlock.type === "toolCall"
+					) {
+						currentBlock.partialJson += event.delta;
+						currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
+						stream.push({
+							type: "toolcall_delta",
+							contentIndex: blockIndex(),
+							delta: event.delta,
+							partial: output,
+						});
+					}
+				}
+				// Handle function call arguments done (some providers send this instead of deltas)
+				else if (event.type === "response.function_call_arguments.done") {
+					if (currentItem?.type === "function_call" && currentBlock?.type === "toolCall") {
+						currentBlock.partialJson = event.arguments;
+						currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
+					}
+				}
+				// Handle output item completion
+				else if (event.type === "response.output_item.done") {
+					const item = event.item;
+					if (item.type === "reasoning" && currentBlock && currentBlock.type === "thinking") {
+						currentBlock.thinking = item.summary?.map(s => s.text).join("\n\n") || "";
+						currentBlock.thinkingSignature = JSON.stringify(item);
+						stream.push({
+							type: "thinking_end",
+							contentIndex: blockIndex(),
+							content: currentBlock.thinking,
+							partial: output,
+						});
+						currentBlock = null;
+					} else if (item.type === "message" && currentBlock && currentBlock.type === "text") {
+						currentBlock.text = item.content.map(c => (c.type === "output_text" ? c.text : c.refusal)).join("");
+						currentBlock.textSignature = item.id;
+						stream.push({
+							type: "text_end",
+							contentIndex: blockIndex(),
+							content: currentBlock.text,
+							partial: output,
+						});
+						currentBlock = null;
+					} else if (item.type === "function_call") {
+						const args =
+							currentBlock?.type === "toolCall" && currentBlock.partialJson
+								? JSON.parse(currentBlock.partialJson)
+								: JSON.parse(item.arguments);
+						const toolCall: ToolCall = {
+							type: "toolCall",
+							id: `${item.call_id}|${item.id}`,
+							name: item.name,
+							arguments: args,
+						};
+						currentBlock = null;
+						stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
+					}
+				}
+				// Handle completion
+				else if (event.type === "response.completed") {
+					const response = event.response;
+					if (response?.usage) {
+						const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
+						output.usage = {
+							// OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
+							input: (response.usage.input_tokens || 0) - cachedTokens,
+							output: response.usage.output_tokens || 0,
+							cacheRead: cachedTokens,
+							cacheWrite: 0,
+							totalTokens: response.usage.total_tokens || 0,
+							cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+						};
+					}
+					calculateCost(model, output.usage);
+					// Map status to stop reason
+					output.stopReason = mapStopReason(response?.status);
+					if (output.content.some(b => b.type === "toolCall") && output.stopReason === "stop") {
+						output.stopReason = "toolUse";
+					}
+				}
+				// Handle errors
+				else if (event.type === "error") {
+					throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error");
+				} else if (event.type === "response.failed") {
+					throw new Error("Unknown error");
+				}
+			}
+			if (options?.signal?.aborted) {
+				throw new Error("Request was aborted");
+			}
+			if (output.stopReason === "aborted" || output.stopReason === "error") {
+				throw new Error("An unkown error ocurred");
+			}
+			output.duration = Date.now() - startTime;
+			if (firstTokenTime) output.ttft = firstTokenTime - startTime;
+			stream.push({ type: "done", reason: output.stopReason, message: output });
+			stream.end();
+		} catch (error) {
+			for (const block of output.content) delete (block as { index?: number }).index;
+			output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+			output.errorMessage = formatErrorMessageWithRetryAfter(error);
+			output.duration = Date.now() - startTime;
+			if (firstTokenTime) output.ttft = firstTokenTime - startTime;
+			stream.push({ type: "error", reason: output.stopReason, error: output });
+			stream.end();
+		}
+	})();
+	return stream;
+};
+function normalizeAzureBaseUrl(baseUrl: string): string {
+	return baseUrl.replace(/\/+$/, "");
+}
+function buildDefaultBaseUrl(resourceName: string): string {
+	return `https://${resourceName}.openai.azure.com/openai/v1`;
+}
+function resolveAzureConfig(
+	model: Model<"azure-openai-responses">,
+	options?: AzureOpenAIResponsesOptions,
+): { baseUrl: string; apiVersion: string } {
+	const apiVersion = options?.azureApiVersion || process.env.AZURE_OPENAI_API_VERSION || DEFAULT_AZURE_API_VERSION;
+	const baseUrl = options?.azureBaseUrl?.trim() || process.env.AZURE_OPENAI_BASE_URL?.trim() || undefined;
+	const resourceName = options?.azureResourceName || process.env.AZURE_OPENAI_RESOURCE_NAME;
+	let resolvedBaseUrl = baseUrl;
+	if (!resolvedBaseUrl && resourceName) {
+		resolvedBaseUrl = buildDefaultBaseUrl(resourceName);
+	}
+	if (!resolvedBaseUrl && model.baseUrl) {
+		resolvedBaseUrl = model.baseUrl;
+	}
+	if (!resolvedBaseUrl) {
+		throw new Error(
+			"Azure OpenAI base URL is required. Set AZURE_OPENAI_BASE_URL or AZURE_OPENAI_RESOURCE_NAME, or pass azureBaseUrl, azureResourceName, or model.baseUrl.",
+		);
+	}
+	return {
+		baseUrl: normalizeAzureBaseUrl(resolvedBaseUrl),
+		apiVersion,
+	};
+}
+function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
+	if (!apiKey) {
+		if (!process.env.AZURE_OPENAI_API_KEY) {
+			throw new Error(
+				"Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument.",
+			);
+		}
+		apiKey = process.env.AZURE_OPENAI_API_KEY;
+	}
+	const headers = { ...(model.headers ?? {}) };
+	if (options?.headers) {
+		Object.assign(headers, options.headers);
+	}
+	const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
+	return new AzureOpenAI({
+		apiKey,
+		apiVersion,
+		dangerouslyAllowBrowser: true,
+		defaultHeaders: headers,
+		baseURL: baseUrl,
+	});
+}
+function buildParams(
+	model: Model<"azure-openai-responses">,
+	context: Context,
+	options: AzureOpenAIResponsesOptions | undefined,
+	deploymentName: string,
+) {
+	const messages = convertMessages(model, context, true);
+	const params: ResponseCreateParamsStreaming = {
+		model: deploymentName,
+		input: messages,
+		stream: true,
+		prompt_cache_key: options?.sessionId,
+	};
+	if (options?.maxTokens) {
+		params.max_output_tokens = options?.maxTokens;
+	}
+	if (options?.temperature !== undefined) {
+		params.temperature = options?.temperature;
+	}
+	if (context.tools) {
+		params.tools = convertTools(context.tools);
+	}
+	if (model.reasoning) {
+		if (options?.reasoningEffort || options?.reasoningSummary) {
+			params.reasoning = {
+				effort: options?.reasoningEffort || "medium",
+				summary: options?.reasoningSummary || "auto",
+			};
+			params.include = ["reasoning.encrypted_content"];
+		} else {
+			if (model.name.toLowerCase().startsWith("gpt-5")) {
+				// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
+				messages.push({
+					role: "developer",
+					content: [
+						{
+							type: "input_text",
+							text: "# Juice: 0 !important",
+						},
+					],
+				});
+			}
+		}
+	}
+	return params;
+}
+function normalizeResponsesToolCallId(id: string): { callId: string; itemId: string } {
+	const [callId, itemId] = id.split("|");
+	if (callId && itemId) {
+		return { callId, itemId };
+	}
+	const hash = Bun.hash.xxHash64(id).toString(36);
+	return { callId: `call_${hash}`, itemId: `item_${hash}` };
+}
+function convertMessages(
+	model: Model<"azure-openai-responses">,
+	context: Context,
+	strictResponsesPairing: boolean,
+): ResponseInput {
+	const messages: ResponseInput = [];
+	const knownCallIds = new Set<string>();
+	const transformedMessages = transformMessages(context.messages, model);
+	if (context.systemPrompt) {
+		const role = model.reasoning ? "developer" : "system";
+		messages.push({
+			role,
+			content: sanitizeSurrogates(context.systemPrompt),
+		});
+	}
+	let msgIndex = 0;
+	for (const msg of transformedMessages) {
+		if (msg.role === "user") {
+			if (typeof msg.content === "string") {
+				// Skip empty user messages
+				if (!msg.content || msg.content.trim() === "") continue;
+				messages.push({
+					role: "user",
+					content: [{ type: "input_text", text: sanitizeSurrogates(msg.content) }],
+				});
+			} else {
+				const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
+					if (item.type === "text") {
+						return {
+							type: "input_text",
+							text: sanitizeSurrogates(item.text),
+						} satisfies ResponseInputText;
+					}
+					return {
+						type: "input_image",
+						detail: "auto",
+						image_url: `data:${item.mimeType};base64,${item.data}`,
+					} satisfies ResponseInputImage;
+				});
+				// Filter out images if model doesn't support them, and empty text blocks
+				let filteredContent = !model.input.includes("image")
+					? content.filter(c => c.type !== "input_image")
+					: content;
+				filteredContent = filteredContent.filter(c => {
+					if (c.type === "input_text") {
+						return c.text.trim().length > 0;
+					}
+					return true; // Keep non-text content (images)
+				});
+				if (filteredContent.length === 0) continue;
+				messages.push({
+					role: "user",
+					content: filteredContent,
+				});
+			}
+		} else if (msg.role === "assistant") {
+			const output: ResponseInput = [];
+			const assistantMsg = msg as AssistantMessage;
+			// Check if this message is from a different model (same provider, different model ID).
+			// For such messages, tool call IDs with fc_ prefix need to be stripped to avoid
+			// OpenAI's reasoning/function_call pairing validation errors.
+			const isDifferentModel =
+				assistantMsg.model !== model.id &&
+				assistantMsg.provider === model.provider &&
+				assistantMsg.api === model.api;
+			for (const block of msg.content) {
+				// Do not submit thinking blocks if the completion had an error (i.e. abort)
+				if (block.type === "thinking" && msg.stopReason !== "error") {
+					if (block.thinkingSignature) {
+						const reasoningItem = JSON.parse(block.thinkingSignature);
+						output.push(reasoningItem);
+					}
+				} else if (block.type === "text") {
+					const textBlock = block as TextContent;
+					// OpenAI requires id to be max 64 characters
+					let msgId = textBlock.textSignature;
+					if (!msgId) {
+						msgId = `msg_${msgIndex}`;
+					} else if (msgId.length > 64) {
+						msgId = `msg_${Bun.hash.xxHash64(msgId).toString(36)}`;
+					}
+					output.push({
+						type: "message",
+						role: "assistant",
+						content: [{ type: "output_text", text: sanitizeSurrogates(textBlock.text), annotations: [] }],
+						status: "completed",
+						id: msgId,
+					} satisfies ResponseOutputMessage);
+					// Do not submit toolcall blocks if the completion had an error (i.e. abort)
+				} else if (block.type === "toolCall" && msg.stopReason !== "error") {
+					const toolCall = block as ToolCall;
+					const normalized = normalizeResponsesToolCallId(toolCall.id);
+					const callId = normalized.callId;
+					// For different-model messages, set id to undefined to avoid pairing validation.
+					// OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items.
+					// By omitting the id, we avoid triggering that validation (like cross-provider does).
+					let itemId: string | undefined = normalized.itemId;
+					if (isDifferentModel && itemId?.startsWith("fc_")) {
+						itemId = undefined;
+					}
+					knownCallIds.add(normalized.callId);
+					output.push({
+						type: "function_call",
+						id: itemId,
+						call_id: callId,
+						name: toolCall.name,
+						arguments: JSON.stringify(toolCall.arguments),
+					});
+				}
+			}
+			if (output.length === 0) continue;
+			messages.push(...output);
+		} else if (msg.role === "toolResult") {
+			// Extract text and image content
+			const textResult = msg.content
+				.filter(c => c.type === "text")
+				.map(c => (c as { text: string }).text)
+				.join("\n");
+			const hasImages = msg.content.some(c => c.type === "image");
+			const normalized = normalizeResponsesToolCallId(msg.toolCallId);
+			if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
+				continue;
+			}
+			// Always send function_call_output with text (or placeholder if only images)
+			const hasText = textResult.length > 0;
+			messages.push({
+				type: "function_call_output",
+				call_id: normalized.callId,
+				output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
+			});
+			// If there are images and model supports them, send a follow-up user message with images
+			if (hasImages && model.input.includes("image")) {
+				const contentParts: ResponseInputContent[] = [];
+				// Add text prefix
+				contentParts.push({
+					type: "input_text",
+					text: "Attached image(s) from tool result:",
+				} satisfies ResponseInputText);
+				// Add images
+				for (const block of msg.content) {
+					if (block.type === "image") {
+						contentParts.push({
+							type: "input_image",
+							detail: "auto",
+							image_url: `data:${(block as ImageContent).mimeType};base64,${(block as ImageContent).data}`,
+						} satisfies ResponseInputImage);
+					}
+				}
+				messages.push({
+					role: "user",
+					content: contentParts,
+				});
+			}
+		}
+		msgIndex++;
+	}
+	return messages;
+}
+function convertTools(tools: Tool[]): OpenAITool[] {
+	return tools.map(tool => ({
+		type: "function",
+		name: tool.name,
+		description: tool.description,
+		parameters: tool.parameters as Record<string, unknown>,
+		strict: false,
+	}));
+}
+function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
+	if (!status) return "stop";
+	switch (status) {
+		case "completed":
+			return "stop";
+		case "incomplete":
+			return "length";
+		case "failed":
+		case "cancelled":
+			return "error";
+		// These two are wonky ...
+		case "in_progress":
+		case "queued":
+			return "stop";
+		default: {
+			const _exhaustive: never = status;
+			throw new Error(`Unhandled stop reason: ${_exhaustive}`);
+		}
+	}
+}

package/src/providers/google.ts CHANGED Viewed

@@ -179,7 +179,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
 								type: "toolCall",
 								id: toolCallId,
 								name: part.functionCall.name || "",
-								arguments: part.functionCall.args as Record<string, any>,
+								arguments: (part.functionCall.args ?? {}) as Record<string, any>,
 								...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
 							};

package/src/providers/openai-codex-responses.ts CHANGED Viewed

@@ -74,26 +74,12 @@ const CODEX_MAX_RETRIES = 2;
 const CODEX_RETRYABLE_STATUS = new Set([408, 429, 500, 502, 503, 504]);
 const CODEX_RETRY_DELAY_MS = 500;
-/** Fast deterministic hash to shorten long strings */
-function shortHash(str: string): string {
-	let h1 = 0xdeadbeef;
-	let h2 = 0x41c6ce57;
-	for (let i = 0; i < str.length; i++) {
-		const ch = str.charCodeAt(i);
-		h1 = Math.imul(h1 ^ ch, 2654435761);
-		h2 = Math.imul(h2 ^ ch, 1597334677);
-	}
-	h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
-	h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
-	return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
-}
 function normalizeResponsesToolCallId(id: string): { callId: string; itemId: string } {
 	const [callId, itemId] = id.split("|");
 	if (callId && itemId) {
 		return { callId, itemId };
 	}
-	const hash = shortHash(id);
+	const hash = Bun.hash.xxHash64(id).toString(36);
 	return { callId: `call_${hash}`, itemId: `item_${hash}` };
 }
@@ -298,6 +284,9 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 					}
 				} else if (eventType === "response.output_text.delta") {
 					if (currentItem && currentItem.type === "message" && currentBlock?.type === "text") {
+						if (!currentItem.content || currentItem.content.length === 0) {
+							continue;
+						}
 						const lastPart = currentItem.content[currentItem.content.length - 1];
 						if (lastPart && lastPart.type === "output_text") {
 							const delta = (rawEvent as { delta?: string }).delta || "";
@@ -313,6 +302,9 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 					}
 				} else if (eventType === "response.refusal.delta") {
 					if (currentItem && currentItem.type === "message" && currentBlock?.type === "text") {
+						if (!currentItem.content || currentItem.content.length === 0) {
+							continue;
+						}
 						const lastPart = currentItem.content[currentItem.content.length - 1];
 						if (lastPart && lastPart.type === "refusal") {
 							const delta = (rawEvent as { delta?: string }).delta || "";
@@ -338,6 +330,14 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 							partial: output,
 						});
 					}
+				} else if (eventType === "response.function_call_arguments.done") {
+					if (currentItem?.type === "function_call" && currentBlock?.type === "toolCall") {
+						const args = (rawEvent as { arguments?: string }).arguments;
+						if (typeof args === "string") {
+							currentBlock.partialJson = args;
+							currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
+						}
+					}
 				} else if (eventType === "response.output_item.done") {
 					const item = rawEvent.item as ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall;
 					if (item.type === "reasoning" && currentBlock?.type === "thinking") {
@@ -622,7 +622,7 @@ function convertMessages(model: Model<"openai-codex-responses">, context: Contex
 					if (!msgId) {
 						msgId = `msg_${msgIndex}`;
 					} else if (msgId.length > 64) {
-						msgId = `msg_${shortHash(msgId)}`;
+						msgId = `msg_${Bun.hash.xxHash64(msgId).toString(36)}`;
 					}
 					output.push({
 						type: "message",

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -50,6 +50,10 @@ function normalizeMistralToolId(id: string, isMistral: boolean): string {
 	return normalized;
 }
+type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting">> & {
+	openRouterRouting?: OpenAICompat["openRouterRouting"];
+};
 /**
  * Check if conversation messages contain tool calls or tool results.
  * This is needed because Anthropic (via proxy) requires the tools param
@@ -429,6 +433,11 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
 		params.reasoning_effort = options.reasoningEffort;
 	}
+	// OpenRouter provider routing preferences
+	if (model.baseUrl.includes("openrouter.ai") && compat.openRouterRouting) {
+		(params as { provider?: unknown }).provider = compat.openRouterRouting;
+	}
 	return params;
 }
@@ -468,7 +477,7 @@ function maybeAddOpenRouterAnthropicCacheControl(
 export function convertMessages(
 	model: Model<"openai-completions">,
 	context: Context,
-	compat: Required<OpenAICompat>,
+	compat: ResolvedOpenAICompat,
 ): ChatCompletionMessageParam[] {
 	const params: ChatCompletionMessageParam[] = [];
@@ -718,7 +727,7 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto
  * Provider takes precedence over URL-based detection since it's explicitly configured.
  * Returns a fully resolved OpenAICompat object with all fields set.
  */
-function detectCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
+function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
 	const provider = model.provider;
 	const baseUrl = model.baseUrl;
@@ -753,6 +762,7 @@ function detectCompat(model: Model<"openai-completions">): Required<OpenAICompat
 		requiresThinkingAsText: isMistral,
 		requiresMistralToolIds: isMistral,
 		thinkingFormat: isZai ? "zai" : "openai",
+		openRouterRouting: undefined,
 	};
 }
@@ -760,7 +770,7 @@ function detectCompat(model: Model<"openai-completions">): Required<OpenAICompat
  * Get resolved compatibility settings for a model.
  * Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
  */
-function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
+function getCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
 	const detected = detectCompat(model);
 	if (!model.compat) return detected;
@@ -776,5 +786,6 @@ function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
 		requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
 		requiresMistralToolIds: model.compat.requiresMistralToolIds ?? detected.requiresMistralToolIds,
 		thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
+		openRouterRouting: model.compat.openRouterRouting ?? detected.openRouterRouting,
 	};
 }

package/src/providers/openai-responses.ts CHANGED Viewed

@@ -31,20 +31,6 @@ import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode";
 import { transformMessages } from "./transform-messages";
-/** Fast deterministic hash to shorten long strings */
-function shortHash(str: string): string {
-	let h1 = 0xdeadbeef;
-	let h2 = 0x41c6ce57;
-	for (let i = 0; i < str.length; i++) {
-		const ch = str.charCodeAt(i);
-		h1 = Math.imul(h1 ^ ch, 2654435761);
-		h2 = Math.imul(h2 ^ ch, 1597334677);
-	}
-	h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
-	h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
-	return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
-}
 // OpenAI Responses-specific options
 export interface OpenAIResponsesOptions extends StreamOptions {
 	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
@@ -195,6 +181,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 					}
 				} else if (event.type === "response.output_text.delta") {
 					if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
+						if (!currentItem.content || currentItem.content.length === 0) {
+							continue;
+						}
 						const lastPart = currentItem.content[currentItem.content.length - 1];
 						if (lastPart && lastPart.type === "output_text") {
 							currentBlock.text += event.delta;
@@ -209,6 +198,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 					}
 				} else if (event.type === "response.refusal.delta") {
 					if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
+						if (!currentItem.content || currentItem.content.length === 0) {
+							continue;
+						}
 						const lastPart = currentItem.content[currentItem.content.length - 1];
 						if (lastPart && lastPart.type === "refusal") {
 							currentBlock.text += event.delta;
@@ -449,7 +441,7 @@ function normalizeResponsesToolCallId(id: string): { callId: string; itemId: str
 	if (callId && itemId) {
 		return { callId, itemId };
 	}
-	const hash = shortHash(id);
+	const hash = Bun.hash.xxHash64(id).toString(36);
 	return { callId: `call_${hash}`, itemId: `item_${hash}` };
 }
@@ -542,7 +534,7 @@ function convertMessages(
 					if (!msgId) {
 						msgId = `msg_${msgIndex}`;
 					} else if (msgId.length > 64) {
-						msgId = `msg_${shortHash(msgId)}`;
+						msgId = `msg_${Bun.hash.xxHash64(msgId).toString(36)}`;
 					}
 					output.push({
 						type: "message",

package/src/providers/transform-messages.ts CHANGED Viewed

@@ -9,26 +9,12 @@ function normalizeToolCallId(id: string): string {
 	return id.replace(/[^a-zA-Z0-9_-]/g, "").slice(0, 40);
 }
-/** Fast deterministic hash to shorten long strings */
-function shortHash(str: string): string {
-	let h1 = 0xdeadbeef;
-	let h2 = 0x41c6ce57;
-	for (let i = 0; i < str.length; i++) {
-		const ch = str.charCodeAt(i);
-		h1 = Math.imul(h1 ^ ch, 2654435761);
-		h2 = Math.imul(h2 ^ ch, 1597334677);
-	}
-	h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
-	h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
-	return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
-}
 function normalizeResponsesToolCallId(id: string): string {
 	const [callId, itemId] = id.split("|");
 	if (callId && itemId) {
 		return id;
 	}
-	const hash = shortHash(id);
+	const hash = Bun.hash.xxHash64(id).toString(36);
 	return `call_${hash}|item_${hash}`;
 }
@@ -36,7 +22,10 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
 	// Build a map of original tool call IDs to normalized IDs for github-copilot cross-API switches
 	const toolCallIdMap = new Map<string, string>();
 	const skippedToolCallIds = new Set<string>();
-	const needsResponsesToolCallIds = model.api === "openai-responses" || model.api === "openai-codex-responses";
+	const needsResponsesToolCallIds =
+		model.api === "openai-responses" ||
+		model.api === "openai-codex-responses" ||
+		model.api === "azure-openai-responses";
 	// First pass: transform messages (thinking blocks, tool call ID normalization)
 	const transformed = messages.flatMap<Message>((msg): Message[] => {

package/src/stream.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import * as path from "node:path";
 import { supportsXhigh } from "./models";
 import { type BedrockOptions, streamBedrock } from "./providers/amazon-bedrock";
 import { type AnthropicOptions, streamAnthropic } from "./providers/anthropic";
+import { type AzureOpenAIResponsesOptions, streamAzureOpenAIResponses } from "./providers/azure-openai-responses";
 import { type CursorOptions, streamCursor } from "./providers/cursor";
 import { type GoogleOptions, streamGoogle } from "./providers/google";
 import {
@@ -108,6 +109,7 @@ export function getEnvApiKey(provider: any): string | undefined {
 		minimax: "MINIMAX_API_KEY",
 		opencode: "OPENCODE_API_KEY",
 		cursor: "CURSOR_ACCESS_TOKEN",
+		"azure-openai-responses": "AZURE_OPENAI_API_KEY",
 	};
 	const envVar = envMap[provider];
@@ -144,6 +146,9 @@ export function stream<TApi extends Api>(
 		case "openai-responses":
 			return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
+		case "azure-openai-responses":
+			return streamAzureOpenAIResponses(model as Model<"azure-openai-responses">, context, providerOptions as any);
 		case "openai-codex-responses":
 			return streamOpenAICodexResponses(model as Model<"openai-codex-responses">, context, providerOptions as any);
@@ -345,6 +350,12 @@ function mapOptionsForApi<TApi extends Api>(
 				reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
 			} satisfies OpenAIResponsesOptions;
+		case "azure-openai-responses":
+			return {
+				...base,
+				reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
+			} satisfies AzureOpenAIResponsesOptions;
 		case "openai-codex-responses":
 			return {
 				...base,

package/src/types.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import type { TSchema } from "@sinclair/typebox";
 import type { BedrockOptions } from "./providers/amazon-bedrock";
 import type { AnthropicOptions } from "./providers/anthropic";
+import type { AzureOpenAIResponsesOptions } from "./providers/azure-openai-responses";
 import type { CursorOptions } from "./providers/cursor";
 import type {
 	DeleteArgs,
@@ -33,6 +34,7 @@ export type Api =
 	| "openai-completions"
 	| "openai-responses"
 	| "openai-codex-responses"
+	| "azure-openai-responses"
 	| "anthropic-messages"
 	| "bedrock-converse-stream"
 	| "google-generative-ai"
@@ -46,6 +48,7 @@ export interface ApiOptionsMap {
 	"openai-completions": OpenAICompletionsOptions;
 	"openai-responses": OpenAIResponsesOptions;
 	"openai-codex-responses": OpenAICodexResponsesOptions;
+	"azure-openai-responses": AzureOpenAIResponsesOptions;
 	"google-generative-ai": GoogleOptions;
 	"google-gemini-cli": GoogleGeminiCliOptions;
 	"google-vertex": GoogleVertexOptions;
@@ -289,6 +292,20 @@ export interface OpenAICompat {
 	requiresMistralToolIds?: boolean;
 	/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "zai" uses thinking: { type: "enabled" }. Default: "openai". */
 	thinkingFormat?: "openai" | "zai";
+	/** OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. */
+	openRouterRouting?: OpenRouterRouting;
+}
+/**
+ * OpenRouter provider routing preferences.
+ * Controls which upstream providers OpenRouter routes requests to.
+ * @see https://openrouter.ai/docs/provider-routing
+ */
+export interface OpenRouterRouting {
+	/** List of provider slugs to exclusively use for this request (e.g., ["amazon-bedrock", "anthropic"]). */
+	only?: string[];
+	/** List of provider slugs to try in order (e.g., ["anthropic", "openai"]). */
+	order?: string[];
 }
 // Model interface for the unified model system