npm - @oh-my-pi/pi-ai - Versions diffs - 6.8.2 → 6.8.4 - Mend

@oh-my-pi/pi-ai 6.8.2 → 6.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +31 -0
package/package.json +2 -2
package/src/providers/amazon-bedrock.ts +4 -2
package/src/providers/anthropic.ts +168 -102
package/src/providers/cursor.ts +2 -0
package/src/providers/google-gemini-cli.ts +2 -0
package/src/providers/google-vertex.ts +1 -0
package/src/providers/google.ts +1 -0
package/src/providers/openai-codex-responses.ts +35 -17
package/src/providers/openai-completions.ts +9 -3
package/src/providers/openai-responses.ts +44 -8
package/src/providers/transform-messages.ts +59 -7
package/src/stream.ts +15 -6
package/src/types.ts +10 -0
package/src/utils/oauth/callback-server.ts +2 -2
package/src/utils/oauth/index.ts +1 -0
package/src/utils/oauth/openai-codex.ts +10 -3

package/README.md CHANGED Viewed

@@ -607,6 +607,28 @@ context.messages.push({ role: "user", content: "Please continue" });
 const continuation = await complete(model, context);
 ```
+### Common Stream Options
+All providers accept the base `StreamOptions` (in addition to provider-specific options):
+- `apiKey`: Override the provider API key
+- `headers`: Extra request headers merged on top of model-defined headers
+- `sessionId`: Provider-specific session identifier (prompt caching/routing)
+- `signal`: Abort in-flight requests
+- `onPayload`: Callback invoked with the provider request payload just before sending
+Example:
+```typescript
+const response = await complete(model, context, {
+	apiKey: "sk-live",
+	headers: { "X-Debug-Trace": "true" },
+	onPayload: (payload) => {
+		console.log("request payload", payload);
+	},
+});
+```
 ## APIs, Models, and Providers
 The library implements 4 API interfaces, each with its own streaming function and options:
@@ -987,6 +1009,15 @@ import {
 } from "@oh-my-pi/pi-ai";
 ```
+`loginOpenAICodex` accepts an optional `originator` value used in the OAuth flow:
+```typescript
+await loginOpenAICodex({
+	onAuth: ({ url }) => console.log(url),
+	originator: "my-cli",
+});
+```
 ### Login Flow Example
 ```typescript

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@oh-my-pi/pi-ai",
-	"version": "6.8.2",
+	"version": "6.8.4",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./src/index.ts",
@@ -17,7 +17,7 @@
 		"test": "bun test"
 	},
 	"dependencies": {
-		"@oh-my-pi/pi-utils": "6.8.2",
+		"@oh-my-pi/pi-utils": "6.8.4",
 		"@anthropic-ai/sdk": "0.71.2",
 		"@aws-sdk/client-bedrock-runtime": "^3.968.0",
 		"@bufbuild/protobuf": "^2.10.2",

package/src/providers/amazon-bedrock.ts CHANGED Viewed

@@ -93,14 +93,16 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
 				profile: options.profile,
 			});
-			const command = new ConverseStreamCommand({
+			const commandInput = {
 				modelId: model.id,
 				messages: convertMessages(context, model),
 				system: buildSystemPrompt(context.systemPrompt, model),
 				inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
 				toolConfig: convertToolConfig(context.tools, options.toolChoice),
 				additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
-			});
+			};
+			options?.onPayload?.(commandInput);
+			const command = new ConverseStreamCommand(commandInput);
 			const response = await client.send(command, { abortSignal: options.signal });

package/src/providers/anthropic.ts CHANGED Viewed

@@ -161,8 +161,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 		try {
 			const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
 			const extraBetas = normalizeExtraBetas(options?.betas);
-			const { client, isOAuthToken } = createClient(model, apiKey, extraBetas, true);
+			const { client, isOAuthToken } = createClient(model, apiKey, extraBetas, true, options?.headers);
 			const params = buildParams(model, context, isOAuthToken, options);
+			options?.onPayload?.(params);
 			const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
 			stream.push({ type: "start", partial: output });
@@ -291,11 +292,21 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 					if (event.delta.stop_reason) {
 						output.stopReason = mapStopReason(event.delta.stop_reason);
 					}
-					output.usage.input = event.usage.input_tokens || 0;
-					output.usage.output = event.usage.output_tokens || 0;
-					output.usage.cacheRead = event.usage.cache_read_input_tokens || 0;
-					output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0;
-					// Anthropic doesn't provide total_tokens, compute from components
+					// message_delta.usage only contains output_tokens (cumulative), not input_tokens
+					// Preserve input token counts from message_start, only update output
+					if (event.usage.output_tokens !== undefined && event.usage.output_tokens !== null) {
+						output.usage.output = event.usage.output_tokens;
+					}
+					// These fields may or may not be present in message_delta
+					if (event.usage.cache_read_input_tokens !== undefined && event.usage.cache_read_input_tokens !== null) {
+						output.usage.cacheRead = event.usage.cache_read_input_tokens;
+					}
+					if (
+						event.usage.cache_creation_input_tokens !== undefined &&
+						event.usage.cache_creation_input_tokens !== null
+					) {
+						output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
+					}
 					output.usage.totalTokens =
 						output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
 					calculateCost(model, output.usage);
@@ -420,6 +431,7 @@ function createClient(
 	apiKey: string,
 	extraBetas: string[],
 	stream: boolean,
+	extraHeaders?: Record<string, string>,
 ): { client: Anthropic; isOAuthToken: boolean } {
 	const oauthToken = isOAuthToken(apiKey);
@@ -438,7 +450,7 @@ function createClient(
 		isOAuth: oauthToken,
 		extraBetas: mergedBetas,
 		stream,
-		modelHeaders: model.headers,
+		modelHeaders: { ...(model.headers ?? {}), ...(extraHeaders ?? {}) },
 	});
 	const clientOptions: ConstructorParameters<typeof Anthropic>[0] = {
@@ -466,16 +478,13 @@ export type AnthropicSystemBlock = {
 };
 type CacheControlBlock = {
-	cache_control?: { type: "ephemeral" };
+	cache_control?: { type: "ephemeral" } | null;
 };
-type CacheControlMode = "none" | "toolBlocks" | "userText";
 const cacheControlEphemeral = { type: "ephemeral" as const };
 type SystemBlockOptions = {
 	includeClaudeCodeInstruction?: boolean;
-	includeCacheControl?: boolean;
 	extraInstructions?: string[];
 };
@@ -483,17 +492,15 @@ export function buildAnthropicSystemBlocks(
 	systemPrompt: string | undefined,
 	options: SystemBlockOptions = {},
 ): AnthropicSystemBlock[] | undefined {
-	const { includeClaudeCodeInstruction = false, includeCacheControl = true, extraInstructions = [] } = options;
+	const { includeClaudeCodeInstruction = false, extraInstructions = [] } = options;
 	const blocks: AnthropicSystemBlock[] = [];
 	const sanitizedPrompt = systemPrompt ? sanitizeSurrogates(systemPrompt) : "";
 	const hasClaudeCodeInstruction = sanitizedPrompt.includes(claudeCodeSystemInstruction);
-	const cacheControl = includeCacheControl ? { type: "ephemeral" as const } : undefined;
 	if (includeClaudeCodeInstruction && !hasClaudeCodeInstruction) {
 		blocks.push({
 			type: "text",
 			text: claudeCodeSystemInstruction,
-			...(cacheControl ? { cache_control: cacheControl } : {}),
 		});
 	}
@@ -503,7 +510,6 @@ export function buildAnthropicSystemBlocks(
 		blocks.push({
 			type: "text",
 			text: trimmed,
-			...(cacheControl ? { cache_control: cacheControl } : {}),
 		});
 	}
@@ -511,7 +517,6 @@ export function buildAnthropicSystemBlocks(
 		blocks.push({
 			type: "text",
 			text: sanitizedPrompt,
-			...(cacheControl ? { cache_control: cacheControl } : {}),
 		});
 	}
@@ -546,11 +551,9 @@ function buildParams(
 	isOAuthToken: boolean,
 	options?: AnthropicOptions,
 ): MessageCreateParamsStreaming {
-	const hasTools = Boolean(context.tools?.length);
-	const cacheControlMode = resolveCacheControlMode(context.messages, hasTools && isOAuthToken);
 	const params: MessageCreateParamsStreaming = {
 		model: model.id,
-		messages: convertMessages(context.messages, model, isOAuthToken, cacheControlMode),
+		messages: convertMessages(context.messages, model, isOAuthToken),
 		max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
 		stream: true,
 	};
@@ -558,7 +561,6 @@ function buildParams(
 	const includeClaudeCodeSystem = !model.id.startsWith("claude-3-5-haiku");
 	const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {
 		includeClaudeCodeInstruction: includeClaudeCodeSystem,
-		includeCacheControl: cacheControlMode !== "none",
 	});
 	if (systemBlocks) {
 		params.system = systemBlocks;
@@ -596,6 +598,8 @@ function buildParams(
 		ensureMaxTokensForThinking(params, model);
 	}
+	applyPromptCaching(params);
 	return params;
 }
@@ -605,75 +609,141 @@ function sanitizeToolCallId(id: string): string {
 	return id.replace(/[^a-zA-Z0-9_-]/g, "_");
 }
-function resolveCacheControlMode(messages: Message[], includeCacheControl: boolean): CacheControlMode {
-	if (!includeCacheControl) return "none";
+function stripCacheControl<T extends CacheControlBlock>(blocks: T[]): void {
+	for (const block of blocks) {
+		if ("cache_control" in block) {
+			delete block.cache_control;
+		}
+	}
+}
+function applyCacheControlToLastBlock<T extends CacheControlBlock>(blocks: T[]): void {
+	if (blocks.length === 0) return;
+	const lastIndex = blocks.length - 1;
+	blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControlEphemeral };
+}
-	for (const message of messages) {
-		if (message.role === "toolResult") return "toolBlocks";
-		if (message.role === "assistant") {
-			const hasToolCall = message.content.some((block) => block.type === "toolCall");
-			if (hasToolCall) return "toolBlocks";
+function applyCacheControlToLastTextBlock(blocks: Array<ContentBlockParam & CacheControlBlock>): void {
+	if (blocks.length === 0) return;
+	for (let i = blocks.length - 1; i >= 0; i--) {
+		if (blocks[i].type === "text") {
+			blocks[i] = { ...blocks[i], cache_control: cacheControlEphemeral };
+			return;
 		}
 	}
+	applyCacheControlToLastBlock(blocks);
+}
+function applyPromptCaching(params: MessageCreateParamsStreaming): void {
+	// Anthropic allows max 4 cache breakpoints
+	const MAX_CACHE_BREAKPOINTS = 4;
-	return "userText";
+	// First, strip ALL existing cache_control to ensure clean slate
+	if (params.tools) {
+		for (const tool of params.tools) {
+			delete (tool as CacheControlBlock).cache_control;
+		}
+	}
+	if (params.system && Array.isArray(params.system)) {
+		stripCacheControl(params.system);
+	}
+	for (const message of params.messages) {
+		if (Array.isArray(message.content)) {
+			stripCacheControl(message.content as Array<ContentBlockParam & CacheControlBlock>);
+		}
+	}
+	let cacheBreakpointsUsed = 0;
+	// Cache hierarchy order: tools -> system -> messages
+	// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
+	// 1. Cache tools - place breakpoint on last tool definition
+	if (params.tools && params.tools.length > 0) {
+		applyCacheControlToLastBlock(params.tools as Array<CacheControlBlock>);
+		cacheBreakpointsUsed++;
+	}
+	if (cacheBreakpointsUsed >= MAX_CACHE_BREAKPOINTS) return;
+	// 2. Cache system prompt
+	if (params.system && Array.isArray(params.system) && params.system.length > 0) {
+		applyCacheControlToLastBlock(params.system);
+		cacheBreakpointsUsed++;
+	}
+	if (cacheBreakpointsUsed >= MAX_CACHE_BREAKPOINTS) return;
+	// 3. Cache penultimate user message for conversation history caching
+	const userIndexes = params.messages
+		.map((message, index) => (message.role === "user" ? index : -1))
+		.filter((index) => index >= 0);
+	if (userIndexes.length >= 2) {
+		const penultimateUserIndex = userIndexes[userIndexes.length - 2];
+		const penultimateUser = params.messages[penultimateUserIndex];
+		if (penultimateUser) {
+			if (typeof penultimateUser.content === "string") {
+				penultimateUser.content = [
+					{ type: "text", text: penultimateUser.content, cache_control: cacheControlEphemeral },
+				];
+				cacheBreakpointsUsed++;
+			} else if (Array.isArray(penultimateUser.content) && penultimateUser.content.length > 0) {
+				applyCacheControlToLastTextBlock(penultimateUser.content as Array<ContentBlockParam & CacheControlBlock>);
+				cacheBreakpointsUsed++;
+			}
+		}
+	}
+	if (cacheBreakpointsUsed >= MAX_CACHE_BREAKPOINTS) return;
+	// 4. Cache final user message for current turn (enables cache hit on next request)
+	if (userIndexes.length >= 1) {
+		const lastUserIndex = userIndexes[userIndexes.length - 1];
+		const lastUser = params.messages[lastUserIndex];
+		if (lastUser) {
+			if (typeof lastUser.content === "string") {
+				lastUser.content = [{ type: "text", text: lastUser.content, cache_control: cacheControlEphemeral }];
+			} else if (Array.isArray(lastUser.content) && lastUser.content.length > 0) {
+				applyCacheControlToLastTextBlock(lastUser.content as Array<ContentBlockParam & CacheControlBlock>);
+			}
+		}
+	}
 }
 function convertMessages(
 	messages: Message[],
 	model: Model<"anthropic-messages">,
 	isOAuthToken: boolean,
-	cacheControlMode: CacheControlMode,
 ): MessageParam[] {
 	const params: MessageParam[] = [];
-	const applyToolCacheControl = cacheControlMode === "toolBlocks";
-	const applyUserTextCacheControl = cacheControlMode === "userText";
-	const withCacheControl = <T extends object>(block: T, enabled: boolean): T | (T & CacheControlBlock) => {
-		if (!enabled) return block;
-		return { ...block, cache_control: cacheControlEphemeral };
-	};
 	// Transform messages for cross-provider compatibility
 	const transformedMessages = transformMessages(messages, model);
 	for (let i = 0; i < transformedMessages.length; i++) {
 		const msg = transformedMessages[i];
 		if (msg.role === "user") {
+			// Skip messages with undefined/null content
+			if (!msg.content) continue;
 			if (typeof msg.content === "string") {
 				if (msg.content.trim().length > 0) {
 					const text = sanitizeSurrogates(msg.content);
-					if (applyUserTextCacheControl) {
-						const blocks: Array<ContentBlockParam & CacheControlBlock> = [
-							withCacheControl(
-								{
-									type: "text",
-									text,
-								},
-								true,
-							),
-						];
-						params.push({
-							role: "user",
-							content: blocks,
-						});
-					} else {
-						params.push({
-							role: "user",
-							content: text,
-						});
-					}
+					params.push({
+						role: "user",
+						content: text,
+					});
 				}
-			} else {
+			} else if (Array.isArray(msg.content)) {
 				const blocks: Array<ContentBlockParam & CacheControlBlock> = msg.content.map((item) => {
 					if (item.type === "text") {
-						return withCacheControl(
-							{
-								type: "text",
-								text: sanitizeSurrogates(item.text),
-							},
-							applyUserTextCacheControl,
-						);
+						return {
+							type: "text",
+							text: sanitizeSurrogates(item.text),
+						};
 					}
 					return {
 						type: "image",
@@ -698,6 +768,9 @@ function convertMessages(
 				});
 			}
 		} else if (msg.role === "assistant") {
+			// Skip messages with undefined/null content
+			if (!msg.content || !Array.isArray(msg.content)) continue;
 			const blocks: Array<ContentBlockParam & CacheControlBlock> = [];
 			for (const block of msg.content) {
@@ -725,17 +798,12 @@ function convertMessages(
 						});
 					}
 				} else if (block.type === "toolCall") {
-					blocks.push(
-						withCacheControl(
-							{
-								type: "tool_use",
-								id: sanitizeToolCallId(block.id),
-								name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
-								input: block.arguments,
-							},
-							applyToolCacheControl,
-						),
-					);
+					blocks.push({
+						type: "tool_use",
+						id: sanitizeToolCallId(block.id),
+						name: isOAuthToken ? toClaudeCodeName(block.name) : block.name,
+						input: block.arguments,
+					});
 				}
 			}
 			if (blocks.length === 0) continue;
@@ -748,33 +816,23 @@ function convertMessages(
 			const toolResults: Array<ContentBlockParam & CacheControlBlock> = [];
 			// Add the current tool result
-			toolResults.push(
-				withCacheControl(
-					{
-						type: "tool_result",
-						tool_use_id: sanitizeToolCallId(msg.toolCallId),
-						content: convertContentBlocks(msg.content),
-						is_error: msg.isError,
-					},
-					applyToolCacheControl,
-				),
-			);
+			toolResults.push({
+				type: "tool_result",
+				tool_use_id: sanitizeToolCallId(msg.toolCallId),
+				content: convertContentBlocks(msg.content),
+				is_error: msg.isError,
+			});
 			// Look ahead for consecutive toolResult messages
 			let j = i + 1;
 			while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
 				const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
-				toolResults.push(
-					withCacheControl(
-						{
-							type: "tool_result",
-							tool_use_id: sanitizeToolCallId(nextMsg.toolCallId),
-							content: convertContentBlocks(nextMsg.content),
-							is_error: nextMsg.isError,
-						},
-						applyToolCacheControl,
-					),
-				);
+				toolResults.push({
+					type: "tool_result",
+					tool_use_id: sanitizeToolCallId(nextMsg.toolCallId),
+					content: convertContentBlocks(nextMsg.content),
+					is_error: nextMsg.isError,
+				});
 				j++;
 			}
@@ -782,14 +840,22 @@ function convertMessages(
 			i = j - 1;
 			// Add a single user message with all tool results
-			params.push({
-				role: "user",
-				content: toolResults,
-			});
+			if (toolResults.length > 0) {
+				params.push({
+					role: "user",
+					content: toolResults,
+				});
+			}
 		}
 	}
-	return params;
+	// Final validation: filter out any messages with invalid content
+	return params.filter((msg) => {
+		if (!msg.content) return false;
+		if (typeof msg.content === "string") return msg.content.length > 0;
+		if (Array.isArray(msg.content)) return msg.content.length > 0;
+		return false;
+	});
 }
 function convertTools(tools: Tool[], isOAuthToken: boolean): Anthropic.Messages.Tool[] {

package/src/providers/cursor.ts CHANGED Viewed

@@ -2027,6 +2027,8 @@ function buildGrpcRequest(
 		conversationId: state.conversationId,
 	});
+	options?.onPayload?.(runRequest);
 	// Tools are sent later via requestContext (exec handshake)
 	if (options?.customSystemPrompt) {

package/src/providers/google-gemini-cli.ts CHANGED Viewed

@@ -410,6 +410,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 			const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
 			const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
+			options?.onPayload?.(requestBody);
 			const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
 			const requestHeaders = {
@@ -418,6 +419,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 				Accept: "text/event-stream",
 				...headers,
 				...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
+				...(options?.headers ?? {}),
 			};
 			const requestBodyJson = JSON.stringify(requestBody);

package/src/providers/google-vertex.ts CHANGED Viewed

@@ -85,6 +85,7 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
 			const location = resolveLocation(options);
 			const client = createClient(model, project, location);
 			const params = buildParams(model, context, options);
+			options?.onPayload?.(params);
 			const googleStream = await client.models.generateContentStream(params);
 			stream.push({ type: "start", partial: output });

package/src/providers/google.ts CHANGED Viewed

@@ -75,6 +75,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
 			const client = createClient(model, apiKey);
 			const params = buildParams(model, context, options);
+			options?.onPayload?.(params);
 			const googleStream = await client.models.generateContentStream(params);
 			stream.push({ type: "start", partial: output });

package/src/providers/openai-codex-responses.ts CHANGED Viewed

@@ -55,6 +55,29 @@ const CODEX_MAX_RETRIES = 2;
 const CODEX_RETRYABLE_STATUS = new Set([408, 429, 500, 502, 503, 504]);
 const CODEX_RETRY_DELAY_MS = 500;
+/** Fast deterministic hash to shorten long strings */
+function shortHash(str: string): string {
+	let h1 = 0xdeadbeef;
+	let h2 = 0x41c6ce57;
+	for (let i = 0; i < str.length; i++) {
+		const ch = str.charCodeAt(i);
+		h1 = Math.imul(h1 ^ ch, 2654435761);
+		h2 = Math.imul(h2 ^ ch, 1597334677);
+	}
+	h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
+	h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
+	return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
+}
+function normalizeResponsesToolCallId(id: string): { callId: string; itemId: string } {
+	const [callId, itemId] = id.split("|");
+	if (callId && itemId) {
+		return { callId, itemId };
+	}
+	const hash = shortHash(id);
+	return { callId: `call_${hash}`, itemId: `item_${hash}` };
+}
 export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"> = (
 	model: Model<"openai-codex-responses">,
 	context: Context,
@@ -128,9 +151,15 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 			};
 			const transformedBody = await transformRequestBody(params, codexOptions, systemPrompt);
+			options?.onPayload?.(transformedBody);
 			const reasoningEffort = transformedBody.reasoning?.effort ?? null;
-			const headers = createCodexHeaders(model.headers, accountId, apiKey, options?.sessionId);
+			const headers = createCodexHeaders(
+				{ ...(model.headers ?? {}), ...(options?.headers ?? {}) },
+				accountId,
+				apiKey,
+				options?.sessionId,
+			);
 			logCodexDebug("codex request", {
 				url,
 				model: params.model,
@@ -508,19 +537,6 @@ function getAccountId(accessToken: string): string {
 	return accountId;
 }
-function shortHash(str: string): string {
-	let h1 = 0xdeadbeef;
-	let h2 = 0x41c6ce57;
-	for (let i = 0; i < str.length; i++) {
-		const ch = str.charCodeAt(i);
-		h1 = Math.imul(h1 ^ ch, 2654435761);
-		h2 = Math.imul(h2 ^ ch, 1597334677);
-	}
-	h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
-	h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
-	return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
-}
 function convertMessages(model: Model<"openai-codex-responses">, context: Context): ResponseInput {
 	const messages: ResponseInput = [];
@@ -583,10 +599,11 @@ function convertMessages(model: Model<"openai-codex-responses">, context: Contex
 					} satisfies ResponseOutputMessage);
 				} else if (block.type === "toolCall" && msg.stopReason !== "error") {
 					const toolCall = block as ToolCall;
+					const normalized = normalizeResponsesToolCallId(toolCall.id);
 					output.push({
 						type: "function_call",
-						id: toolCall.id.split("|")[1],
-						call_id: toolCall.id.split("|")[0],
+						id: normalized.itemId,
+						call_id: normalized.callId,
 						name: toolCall.name,
 						arguments: JSON.stringify(toolCall.arguments),
 					});
@@ -600,11 +617,12 @@ function convertMessages(model: Model<"openai-codex-responses">, context: Contex
 				.map((c) => (c as { text: string }).text)
 				.join("\n");
 			const hasImages = msg.content.some((c) => c.type === "image");
+			const normalized = normalizeResponsesToolCallId(msg.toolCallId);
 			const hasText = textResult.length > 0;
 			messages.push({
 				type: "function_call_output",
-				call_id: msg.toolCallId.split("|")[0],
+				call_id: normalized.callId,
 				output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
 			});

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -101,8 +101,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 		try {
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
-			const client = createClient(model, context, apiKey);
+			const client = createClient(model, context, apiKey, options?.headers);
 			const params = buildParams(model, context, options);
+			options?.onPayload?.(params);
 			const openaiStream = await client.chat.completions.create(params, { signal: options?.signal });
 			stream.push({ type: "start", partial: output });
@@ -319,7 +320,12 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 	return stream;
 };
-function createClient(model: Model<"openai-completions">, context: Context, apiKey?: string) {
+function createClient(
+	model: Model<"openai-completions">,
+	context: Context,
+	apiKey?: string,
+	extraHeaders?: Record<string, string>,
+) {
 	if (!apiKey) {
 		if (!process.env.OPENAI_API_KEY) {
 			throw new Error(
@@ -329,7 +335,7 @@ function createClient(model: Model<"openai-completions">, context: Context, apiK
 		apiKey = process.env.OPENAI_API_KEY;
 	}
-	const headers = { ...model.headers };
+	const headers = { ...(model.headers ?? {}), ...(extraHeaders ?? {}) };
 	if (model.provider === "github-copilot") {
 		// Copilot expects X-Initiator to indicate whether the request is user-initiated
 		// or agent-initiated (e.g. follow-up after assistant/tool messages). If there is

package/src/providers/openai-responses.ts CHANGED Viewed

@@ -50,6 +50,11 @@ export interface OpenAIResponsesOptions extends StreamOptions {
 	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
 	reasoningSummary?: "auto" | "detailed" | "concise" | null;
 	serviceTier?: ResponseCreateParamsStreaming["service_tier"];
+	/**
+	 * Enforce strict tool call/result pairing when building Responses API inputs.
+	 * Azure OpenAI Responses API requires tool results to have a matching tool call.
+	 */
+	strictResponsesPairing?: boolean;
 }
 /**
@@ -85,8 +90,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 		try {
 			// Create OpenAI client
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
-			const client = createClient(model, context, apiKey);
+			const client = createClient(model, context, apiKey, options?.headers);
 			const params = buildParams(model, context, options);
+			options?.onPayload?.(params);
 			const openaiStream = await client.responses.create(
 				params,
 				options?.signal ? { signal: options.signal } : undefined,
@@ -317,7 +323,12 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 	return stream;
 };
-function createClient(model: Model<"openai-responses">, context: Context, apiKey?: string) {
+function createClient(
+	model: Model<"openai-responses">,
+	context: Context,
+	apiKey?: string,
+	extraHeaders?: Record<string, string>,
+) {
 	if (!apiKey) {
 		if (!process.env.OPENAI_API_KEY) {
 			throw new Error(
@@ -327,7 +338,7 @@ function createClient(model: Model<"openai-responses">, context: Context, apiKey
 		apiKey = process.env.OPENAI_API_KEY;
 	}
-	const headers = { ...model.headers };
+	const headers = { ...(model.headers ?? {}), ...(extraHeaders ?? {}) };
 	if (model.provider === "github-copilot") {
 		// Copilot expects X-Initiator to indicate whether the request is user-initiated
 		// or agent-initiated (e.g. follow-up after assistant/tool messages). If there is
@@ -362,7 +373,8 @@ function createClient(model: Model<"openai-responses">, context: Context, apiKey
 }
 function buildParams(model: Model<"openai-responses">, context: Context, options?: OpenAIResponsesOptions) {
-	const messages = convertMessages(model, context);
+	const strictResponsesPairing = options?.strictResponsesPairing ?? isAzureOpenAIBaseUrl(model.baseUrl ?? "");
+	const messages = convertMessages(model, context, strictResponsesPairing);
 	const params: ResponseCreateParamsStreaming = {
 		model: model.id,
@@ -413,8 +425,26 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
 	return params;
 }
-function convertMessages(model: Model<"openai-responses">, context: Context): ResponseInput {
+function normalizeResponsesToolCallId(id: string): { callId: string; itemId: string } {
+	const [callId, itemId] = id.split("|");
+	if (callId && itemId) {
+		return { callId, itemId };
+	}
+	const hash = shortHash(id);
+	return { callId: `call_${hash}`, itemId: `item_${hash}` };
+}
+function isAzureOpenAIBaseUrl(baseUrl: string): boolean {
+	return baseUrl.includes(".openai.azure.com") || baseUrl.includes("azure.com/openai");
+}
+function convertMessages(
+	model: Model<"openai-responses">,
+	context: Context,
+	strictResponsesPairing: boolean,
+): ResponseInput {
 	const messages: ResponseInput = [];
+	const knownCallIds = new Set<string>();
 	const transformedMessages = transformMessages(context.messages, model);
@@ -487,10 +517,12 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
 					// Do not submit toolcall blocks if the completion had an error (i.e. abort)
 				} else if (block.type === "toolCall" && msg.stopReason !== "error") {
 					const toolCall = block as ToolCall;
+					const normalized = normalizeResponsesToolCallId(toolCall.id);
+					knownCallIds.add(normalized.callId);
 					output.push({
 						type: "function_call",
-						id: toolCall.id.split("|")[1],
-						call_id: toolCall.id.split("|")[0],
+						id: normalized.itemId,
+						call_id: normalized.callId,
 						name: toolCall.name,
 						arguments: JSON.stringify(toolCall.arguments),
 					});
@@ -505,12 +537,16 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
 				.map((c) => (c as any).text)
 				.join("\n");
 			const hasImages = msg.content.some((c) => c.type === "image");
+			const normalized = normalizeResponsesToolCallId(msg.toolCallId);
+			if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
+				continue;
+			}
 			// Always send function_call_output with text (or placeholder if only images)
 			const hasText = textResult.length > 0;
 			messages.push({
 				type: "function_call_output",
-				call_id: msg.toolCallId.split("|")[0],
+				call_id: normalized.callId,
 				output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
 			});

package/src/providers/transform-messages.ts CHANGED Viewed

@@ -9,9 +9,34 @@ function normalizeToolCallId(id: string): string {
 	return id.replace(/[^a-zA-Z0-9_-]/g, "").slice(0, 40);
 }
+/** Fast deterministic hash to shorten long strings */
+function shortHash(str: string): string {
+	let h1 = 0xdeadbeef;
+	let h2 = 0x41c6ce57;
+	for (let i = 0; i < str.length; i++) {
+		const ch = str.charCodeAt(i);
+		h1 = Math.imul(h1 ^ ch, 2654435761);
+		h2 = Math.imul(h2 ^ ch, 1597334677);
+	}
+	h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
+	h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
+	return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
+}
+function normalizeResponsesToolCallId(id: string): string {
+	const [callId, itemId] = id.split("|");
+	if (callId && itemId) {
+		return id;
+	}
+	const hash = shortHash(id);
+	return `call_${hash}|item_${hash}`;
+}
 export function transformMessages<TApi extends Api>(messages: Message[], model: Model<TApi>): Message[] {
 	// Build a map of original tool call IDs to normalized IDs for github-copilot cross-API switches
 	const toolCallIdMap = new Map<string, string>();
+	const skippedToolCallIds = new Set<string>();
+	const needsResponsesToolCallIds = model.api === "openai-responses" || model.api === "openai-codex-responses";
 	// First pass: transform messages (thinking blocks, tool call ID normalization)
 	const transformed = messages.flatMap<Message>((msg): Message[] => {
@@ -22,20 +47,39 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
 		// Handle toolResult messages - normalize toolCallId if we have a mapping
 		if (msg.role === "toolResult") {
+			if (skippedToolCallIds.has(msg.toolCallId)) {
+				return [];
+			}
 			const normalizedId = toolCallIdMap.get(msg.toolCallId);
 			if (normalizedId && normalizedId !== msg.toolCallId) {
 				return [{ ...msg, toolCallId: normalizedId }];
 			}
+			if (needsResponsesToolCallIds) {
+				return [{ ...msg, toolCallId: normalizeResponsesToolCallId(msg.toolCallId) }];
+			}
 			return [msg];
 		}
 		// Assistant messages need transformation check
 		if (msg.role === "assistant") {
 			const assistantMsg = msg as AssistantMessage;
+			const isSameProviderApi = assistantMsg.provider === model.provider && assistantMsg.api === model.api;
+			const isErroredAssistant = assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted";
+			if (!isSameProviderApi && isErroredAssistant) {
+				for (const block of assistantMsg.content) {
+					if (block.type === "toolCall") {
+						skippedToolCallIds.add(block.id);
+					}
+				}
+				return [];
+			}
 			// If message is from the same provider and API, keep as is
-			if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) {
-				if (assistantMsg.stopReason === "error" && assistantMsg.content.length === 0) {
+			if (isSameProviderApi) {
+				if (
+					(assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") &&
+					assistantMsg.content.length === 0
+				) {
 					return [];
 				}
 				return [msg];
@@ -64,12 +108,20 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
 					};
 				}
 				// Normalize tool call IDs when target API requires strict format
-				if (block.type === "toolCall" && needsToolCallIdNormalization) {
+				if (block.type === "toolCall") {
 					const toolCall = block as ToolCall;
-					const normalizedId = normalizeToolCallId(toolCall.id);
-					if (normalizedId !== toolCall.id) {
-						toolCallIdMap.set(toolCall.id, normalizedId);
-						return { ...toolCall, id: normalizedId };
+					if (needsResponsesToolCallIds) {
+						const normalizedId = normalizeResponsesToolCallId(toolCall.id);
+						if (normalizedId !== toolCall.id) {
+							toolCallIdMap.set(toolCall.id, normalizedId);
+							return { ...toolCall, id: normalizedId };
+						}
+					} else if (needsToolCallIdNormalization) {
+						const normalizedId = normalizeToolCallId(toolCall.id);
+						if (normalizedId !== toolCall.id) {
+							toolCallIdMap.set(toolCall.id, normalizedId);
+							return { ...toolCall, id: normalizedId };
+						}
 					}
 				}
 				// All other blocks pass through unchanged

package/src/stream.ts CHANGED Viewed

@@ -79,10 +79,17 @@ export function getEnvApiKey(provider: any): string | undefined {
 		// 1. AWS_PROFILE - named profile from ~/.aws/credentials
 		// 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY - standard IAM keys
 		// 3. AWS_BEARER_TOKEN_BEDROCK - Bedrock API keys (bearer token)
+		// 4. AWS_CONTAINER_CREDENTIALS_* - ECS/Task IAM role credentials
+		// 5. AWS_WEB_IDENTITY_TOKEN_FILE + AWS_ROLE_ARN - IRSA (EKS) web identity
+		const hasEcsCredentials =
+			!!process.env.AWS_CONTAINER_CREDENTIALS_RELATIVE_URI || !!process.env.AWS_CONTAINER_CREDENTIALS_FULL_URI;
+		const hasWebIdentity = !!process.env.AWS_WEB_IDENTITY_TOKEN_FILE && !!process.env.AWS_ROLE_ARN;
 		if (
 			process.env.AWS_PROFILE ||
 			(process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY) ||
-			process.env.AWS_BEARER_TOKEN_BEDROCK
+			process.env.AWS_BEARER_TOKEN_BEDROCK ||
+			hasEcsCredentials ||
+			hasWebIdentity
 		) {
 			return "<authenticated>";
 		}
@@ -208,11 +215,11 @@ export const OUTPUT_FALLBACK_BUFFER = 4000;
 const ANTHROPIC_USE_INTERLEAVED_THINKING = true;
 const ANTHROPIC_THINKING: Record<ThinkingLevel, number> = {
-	minimal: 3072,
-	low: 6144,
-	medium: 12288,
-	high: 24576,
-	xhigh: 49152,
+	minimal: 1024,
+	low: 4096,
+	medium: 8192,
+	high: 16384,
+	xhigh: 32768,
 };
 const GOOGLE_THINKING: Record<ThinkingLevel, number> = {
@@ -252,7 +259,9 @@ function mapOptionsForApi<TApi extends Api>(
 		maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
 		signal: options?.signal,
 		apiKey: apiKey || options?.apiKey,
+		headers: options?.headers,
 		sessionId: options?.sessionId,
+		onPayload: options?.onPayload,
 		execHandlers: options?.execHandlers,
 	};

package/src/types.ts CHANGED Viewed

@@ -96,12 +96,22 @@ export interface StreamOptions {
 	maxTokens?: number;
 	signal?: AbortSignal;
 	apiKey?: string;
+	/**
+	 * Additional headers to include in provider requests.
+	 * These are merged on top of model-defined headers.
+	 */
+	headers?: Record<string, string>;
 	/**
 	 * Optional session identifier for providers that support session-based caching.
 	 * Providers can use this to enable prompt caching, request routing, or other
 	 * session-aware features. Ignored by providers that don't support it.
 	 */
 	sessionId?: string;
+	/**
+	 * Optional hook to observe the provider request payload before it is sent.
+	 * The payload format is provider-specific.
+	 */
+	onPayload?: (payload: unknown) => void;
 	/** Cursor exec/MCP tool handlers (cursor-agent only). */
 	execHandlers?: CursorExecHandlers;
 }

package/src/utils/oauth/callback-server.ts CHANGED Viewed

@@ -14,7 +14,7 @@
 import templateHtml from "./oauth.html" with { type: "text" };
 import type { OAuthController, OAuthCredentials } from "./types";
-const DEFAULT_TIMEOUT = 120;
+const DEFAULT_TIMEOUT = 120_000;
 const DEFAULT_HOSTNAME = "localhost";
 const CALLBACK_PATH = "/callback";
@@ -182,7 +182,7 @@ export abstract class OAuthCallbackFlow {
 	 * Wait for OAuth callback or manual input (whichever comes first).
 	 */
 	private waitForCallback(expectedState: string): Promise<CallbackResult> {
-		const timeoutSignal = AbortSignal.timeout(DEFAULT_TIMEOUT * 1000);
+		const timeoutSignal = AbortSignal.timeout(DEFAULT_TIMEOUT);
 		const signal = this.ctrl.signal ? AbortSignal.any([this.ctrl.signal, timeoutSignal]) : timeoutSignal;
 		const callbackPromise = new Promise<CallbackResult>((resolve, reject) => {

package/src/utils/oauth/index.ts CHANGED Viewed

@@ -30,6 +30,7 @@ export {
 export { loginAntigravity, refreshAntigravityToken } from "./google-antigravity";
 // Google Gemini CLI
 export { loginGeminiCli, refreshGoogleCloudToken } from "./google-gemini-cli";
+export type { OpenAICodexLoginOptions } from "./openai-codex";
 // OpenAI Codex (ChatGPT OAuth)
 export { loginOpenAICodex, refreshOpenAICodexToken } from "./openai-codex";

package/src/utils/oauth/openai-codex.ts CHANGED Viewed

@@ -49,6 +49,7 @@ class OpenAICodexOAuthFlow extends OAuthCallbackFlow {
 	constructor(
 		ctrl: OAuthController,
 		private readonly pkce: PKCE,
+		private readonly originator: string,
 	) {
 		super(ctrl, CALLBACK_PORT, CALLBACK_PATH);
 	}
@@ -67,7 +68,7 @@ class OpenAICodexOAuthFlow extends OAuthCallbackFlow {
 			state,
 			id_token_add_organizations: "true",
 			codex_cli_simplified_flow: "true",
-			originator: "opencode",
+			originator: this.originator,
 		});
 		const url = `${AUTHORIZE_URL}?${searchParams.toString()}`;
@@ -122,9 +123,15 @@ async function exchangeCodeForToken(code: string, verifier: string, redirectUri:
 /**
  * Login with OpenAI Codex OAuth
  */
-export async function loginOpenAICodex(ctrl: OAuthController): Promise<OAuthCredentials> {
+export type OpenAICodexLoginOptions = OAuthController & {
+	/** Optional originator value for OpenAI Codex OAuth. Default: "opencode". */
+	originator?: string;
+};
+export async function loginOpenAICodex(options: OpenAICodexLoginOptions): Promise<OAuthCredentials> {
 	const pkce = await generatePKCE();
-	const flow = new OpenAICodexOAuthFlow(ctrl, pkce);
+	const originator = options.originator?.trim() || "opencode";
+	const flow = new OpenAICodexOAuthFlow(options, pkce, originator);
 	return flow.login();
 }