npm - @oh-my-pi/pi-ai - Versions diffs - 15.12.4 → 15.13.0 - Mend

@oh-my-pi/pi-ai 15.12.4 → 15.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +26 -1
package/dist/types/providers/anthropic-client.d.ts +2 -0
package/dist/types/providers/google-gemini-cli.d.ts +1 -1
package/package.json +3 -3
package/src/providers/amazon-bedrock.ts +19 -1
package/src/providers/anthropic-client.ts +2 -0
package/src/providers/anthropic.ts +14 -7
package/src/providers/azure-openai-responses.ts +9 -1
package/src/providers/google-gemini-cli.ts +35 -3
package/src/providers/google-shared.ts +14 -2
package/src/providers/ollama.ts +19 -1
package/src/providers/openai-codex-responses.ts +27 -4
package/src/providers/openai-completions.ts +40 -7
package/src/providers/openai-responses-shared.ts +4 -1
package/src/providers/openai-responses.ts +9 -4
package/src/registry/oauth/gitlab-duo.ts +8 -3
package/src/registry/zai.ts +1 -1
package/src/utils/openai-http.ts +4 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,31 @@
 ## [Unreleased]
+## [15.13.0] - 2026-06-14
+### Fixed
+- Fixed OpenAI Responses/Realtime SSE stream handler crashing with "Error Code undefined: undefined" when parsing error events with nested error details by falling back to the nested error object fields.
+- Fixed OpenAI-compatible providers that reject forced `tool_choice` on thinking-required models by downgrading unsupported forced choices to `auto` while keeping tools available ([#2546](https://github.com/can1357/oh-my-pi/issues/2546)).
+- Fixed GitHub Copilot Anthropic transport (`api.githubcopilot.com/v1/messages`) returning `400 tools.0.custom.eager_input_streaming: Extra inputs are not permitted` on every tool-bearing turn by stopping the emission of the per-tool `eager_input_streaming` flag and the `fine-grained-tool-streaming-2025-05-14` beta header on the Copilot transport — the proxy whitelists neither ([#2558](https://github.com/can1357/oh-my-pi/issues/2558)).
+- Disabled Bun's native ~300s pre-response `fetch` timeout in every streaming provider (OpenAI completions/responses, Azure responses, Anthropic, Codex SSE, Bedrock, Gemini CLI, Ollama). The configurable first-event/idle/SDK watchdogs (`PI_STREAM_FIRST_EVENT_TIMEOUT_MS`, `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS`, `compat.streamIdleTimeoutMs`) were silently capped by Bun's hidden ceiling, so cold large-context streams (e.g. self-hosted vLLM at multi-hundred-K prompts) died at exactly 300s with `TimeoutError: The operation timed out.` Direct callers of `./providers/{amazon-bedrock,google-gemini-cli,ollama,openai-codex-responses}` (which bypass `register-builtins`' iterator-level watchdog) now install a pre-response `AbortSignal.timeout(firstEventTimeoutMs)` alongside the disable, so a stalled upstream still fails within the configured budget instead of hanging forever ([#2422](https://github.com/can1357/oh-my-pi/issues/2422))
+- Fixed Gemini / Antigravity streams (Google Cloud Code Assist API) creating a trailing empty text block and emitting redundant `text_start`/`text_delta`/`text_end` events at the end of the turn when the final SSE chunk contains an empty text part (`text: ""`). The parser now ignores empty text parts, preserving the active transcript block state and ensuring proper nesting and rendering of subsequent background jobs or new turns.
+- Preserved terminal Google `thoughtSignature`s by still extracting and applying the signature on the active block even when the text part is empty or undefined.
+- Stopped Gemini Antigravity sessions (`gemini-3*` / Claude under Cloud Code Assist) from leaking system rule reminders and personality preambles into the final response, by appending an explicit 'do not output rule checks' instruction to the injected system parts.
+- Fixed Gemini / Antigravity streams (Google Cloud Code Assist API) letting a `functionCall` part's own `thoughtSignature` clobber the preceding text or thinking block's signature on `think → tool` and `text → tool` turns. A signed function-call part has `text: undefined`, so it fell into the terminal-signature branch while the prior block was still active; that branch now skips function-call parts, leaving the tool call's signature on the tool call where it belongs and preventing corrupted signatures on same-model replay.
+- Fixed MiniMax-M3 OpenAI-compatible streams rendering reasoning twice when the same chunk carried both `<think>…</think>` content and structured `reasoning_content`; structured reasoning now wins and cumulative MiniMax reasoning snapshots are collapsed to deltas using a per-signature snapshot tracker that survives the `</think>`-to-text block transition (so post-answer cumulative snapshots don't reinstate a duplicate thinking block). ([#2433](https://github.com/can1357/oh-my-pi/issues/2433))
+## [15.12.6] - 2026-06-14
+### Changed
+- Bumped Z.AI (GLM Coding Plan) API key validation probe to glm-5.2.
+### Fixed
+- Fixed tool schema conversion for non-Cloud Code Assist Google Gemini models by normalizing parameters with `normalizeSchemaForGoogle` to prevent un-normalized schema properties (such as `additionalProperties: false` or type arrays) from causing Gemini API errors.
+- Fixed OpenAI-family request builders dropping forced named `tool_choice` directives when the named tool is absent from the serialized `tools` array, preventing spec-strict providers from rejecting self-inconsistent requests. ([#1701](https://github.com/can1357/oh-my-pi/issues/1701))
 ## [15.12.4] - 2026-06-13
 ### Added
@@ -3392,4 +3417,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
 ## [0.9.4] - 2025-11-26
-Initial release with multi-provider LLM support.
+Initial release with multi-provider LLM support.

package/dist/types/providers/anthropic-client.d.ts CHANGED Viewed

@@ -25,6 +25,8 @@ export type AnthropicFetchOptions = RequestInit & {
         cert?: string;
         key?: string;
     };
+    /** Bun extension: see {@link FetchWithRetryOptions.timeout} — `false` disables Bun's native fetch TTFT timeout (issue #2422). */
+    timeout?: number | false;
 };
 export interface AnthropicClientOptions {
     /** Sent as `X-Api-Key` unless the header is already present in `defaultHeaders`. */

package/dist/types/providers/google-gemini-cli.d.ts CHANGED Viewed

@@ -53,7 +53,7 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
     requestModelId?: string;
     projectId?: string;
 }
-export { ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders, getGeminiCliUserAgent, } from "@oh-my-pi/pi-catalog/wire/gemini-headers";
+export { ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION, ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders, getGeminiCliUserAgent, } from "@oh-my-pi/pi-catalog/wire/gemini-headers";
 interface ParsedGeminiCliCredentials {
     accessToken: string;
     projectId: string;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "15.12.4",
+	"version": "15.13.0",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -38,8 +38,8 @@
 	},
 	"dependencies": {
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-catalog": "15.12.4",
-		"@oh-my-pi/pi-utils": "15.12.4",
+		"@oh-my-pi/pi-catalog": "15.13.0",
+		"@oh-my-pi/pi-utils": "15.13.0",
 		"partial-json": "^0.1.7",
 		"zod": "^4"
 	},

package/src/providers/amazon-bedrock.ts CHANGED Viewed

@@ -31,6 +31,7 @@ import type {
 import { normalizeToolCallId, resolveCacheRetention } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump } from "../utils/http-inspector";
+import { getStreamFirstEventTimeoutMs } from "../utils/idle-iterator";
 import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
 import { toolWireSchema } from "../utils/schema/wire";
 import { invalidateAwsCredentialCache, resolveAwsCredentials } from "./aws-credentials";
@@ -282,12 +283,29 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
 				requestHeaders = { ...baseHeaders, ...signed };
 			}
+			// Bun's native fetch ceiling is disabled below (`timeout: false`) so
+			// configurable watchdogs govern slow-prefill streams (issue #2422).
+			// Direct callers that bypass `register-builtins` (which installs the
+			// iterator-level first-event watchdog) still need a pre-response
+			// timer, otherwise a Bedrock/proxy that accepts the POST and never
+			// sends headers would hang forever.
+			const firstEventTimeoutMs = options.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs();
+			const preResponseWatchdog =
+				firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
+					? AbortSignal.timeout(firstEventTimeoutMs)
+					: undefined;
+			const fetchSignal = preResponseWatchdog
+				? options.signal
+					? AbortSignal.any([options.signal, preResponseWatchdog])
+					: preResponseWatchdog
+				: options.signal;
 			const response = await fetchWithRetry(url, {
 				method: "POST",
 				headers: requestHeaders,
 				body,
-				signal: options.signal,
+				signal: fetchSignal,
 				fetch: options.fetch,
+				timeout: false,
 			});
 			if (!response.ok) {

package/src/providers/anthropic-client.ts CHANGED Viewed

@@ -57,6 +57,8 @@ export type AnthropicFetchOptions = RequestInit & {
 		cert?: string;
 		key?: string;
 	};
+	/** Bun extension: see {@link FetchWithRetryOptions.timeout} — `false` disables Bun's native fetch TTFT timeout (issue #2422). */
+	timeout?: number | false;
 };
 export interface AnthropicClientOptions {

package/src/providers/anthropic.ts CHANGED Viewed

@@ -2305,16 +2305,22 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
 	const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
 	const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
 	const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
+	// Disable Bun's native ~300s pre-response fetch timeout (issue #2422).
+	// `AnthropicMessagesClient` already arms its own DEFAULT_TIMEOUT_MS timer
+	// per request, so the native ceiling can only short-circuit slow-prefill
+	// streams before the configured watchdog gets to govern them.
+	const fetchOptions: AnthropicFetchOptions = { ...(tlsFetchOptions ?? {}), timeout: false };
 	const baseFetch = args.fetch ?? fetch;
 	// Only OAuth requests inject the CC billing header; no API-key request can ever
 	// contain it, so there is no need to install the rewriter for those.
 	const cchFetch = oauthToken ? wrapFetchForCch(baseFetch) : baseFetch;
 	if (model.provider === "github-copilot") {
 		const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
+		// The GitHub Copilot Anthropic proxy doesn't accept Anthropic beta
+		// features (and the catalog already forces `supportsEagerToolInputStreaming
+		// = false` for this host, so `needsFineGrainedToolStreamingBeta` is true
+		// whenever tools are present). Forward only caller-supplied betas.
 		const betaFeatures = [...extraBetas];
-		if (needsFineGrainedToolStreamingBeta) {
-			betaFeatures.push(fineGrainedToolStreamingBeta);
-		}
 		const defaultHeaders = mergeHeaders(
 			{
 				Accept: stream ? "text/event-stream" : "application/json",
@@ -2337,7 +2343,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
 			maxRetries: 5,
 			defaultHeaders,
 			fetch: cchFetch,
-			...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
+			fetchOptions,
 		};
 	}
@@ -2372,6 +2378,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
 			maxRetries: 5,
 			defaultHeaders,
 			fetch: cchFetch,
+			fetchOptions,
 		};
 	}
@@ -2388,7 +2395,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
 			maxRetries: 5,
 			defaultHeaders,
 			fetch: cchFetch,
-			...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
+			fetchOptions,
 		};
 	}
 	// OpenCode Zen's Anthropic-compatible gateway accepts bearer auth only;
@@ -2402,7 +2409,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
 			maxRetries: 5,
 			defaultHeaders,
 			fetch: cchFetch,
-			...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
+			fetchOptions,
 		};
 	}
@@ -2421,7 +2428,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
 		maxRetries: 5,
 		defaultHeaders,
 		fetch: cchFetch,
-		...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
+		fetchOptions,
 	};
 }

package/src/providers/azure-openai-responses.ts CHANGED Viewed

@@ -336,7 +336,15 @@ function buildParams(
 	if (context.tools) {
 		params.tools = convertTools(context.tools);
 		if (options?.toolChoice) {
-			params.tool_choice = mapToOpenAIResponsesToolChoice(options.toolChoice);
+			const toolChoice = mapToOpenAIResponsesToolChoice(options.toolChoice);
+			if (
+				toolChoice &&
+				(typeof toolChoice === "string" ||
+					toolChoice.type !== "function" ||
+					context.tools.some(tool => tool.name === toolChoice.name))
+			) {
+				params.tool_choice = toolChoice;
+			}
 		}
 	}

package/src/providers/google-gemini-cli.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { createHash, randomBytes, randomUUID } from "node:crypto";
 import { scheduler } from "node:timers/promises";
 import { calculateCost } from "@oh-my-pi/pi-catalog/models";
 import {
+	ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION,
 	ANTIGRAVITY_SYSTEM_INSTRUCTION,
 	getAntigravityUserAgent,
 	getGeminiCliHeaders,
@@ -27,6 +28,7 @@ import type {
 import { normalizeSystemPrompts } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump } from "../utils/http-inspector";
+import { getStreamFirstEventTimeoutMs } from "../utils/idle-iterator";
 // Refresh is the sole responsibility of AuthStorage (broker-aware, single-flighted);
 // the stream provider trusts the access token threaded through `options.apiKey`.
 import { normalizeSchemaForCCA } from "../utils/schema";
@@ -101,6 +103,7 @@ const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googlea
 const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
 export {
+	ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION,
 	ANTIGRAVITY_SYSTEM_INSTRUCTION,
 	getAntigravityUserAgent,
 	getGeminiCliHeaders,
@@ -365,17 +368,34 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 				headers: requestHeaders,
 			};
+			// Direct callers that skip `register-builtins` (which installs the
+			// iterator-level watchdog) need a pre-response timer alongside
+			// `timeout: false`; otherwise a stalled Cloud Code Assist proxy
+			// would hang forever. Floor matches the lazy wrapper's 5min default.
+			const firstEventTimeoutMs =
+				options?.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(undefined, 300_000);
+			const preResponseWatchdog =
+				firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
+					? AbortSignal.timeout(firstEventTimeoutMs)
+					: undefined;
+			const callerSignal = options?.signal;
+			const fetchSignal = preResponseWatchdog
+				? callerSignal
+					? AbortSignal.any([callerSignal, preResponseWatchdog])
+					: preResponseWatchdog
+				: callerSignal;
 			const response = await fetchWithRetry(
 				attempt => `${endpoints[Math.min(attempt, endpoints.length - 1)]}/v1internal:streamGenerateContent?alt=sse`,
 				{
 					method: "POST",
 					headers: requestHeaders,
 					body: requestBodyJson,
-					signal: options?.signal,
+					signal: fetchSignal,
 					maxAttempts: MAX_RETRIES + 1,
 					defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
 					maxDelayMs: options?.maxRetryDelayMs ?? RATE_LIMIT_BUDGET_MS,
 					fetch: options?.fetch,
+					timeout: false,
 				},
 			);
 			if (!response.ok) {
@@ -447,7 +467,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 					const candidate = responseData.candidates?.[0];
 					if (candidate?.content?.parts) {
 						for (const part of candidate.content.parts) {
-							if (part.text !== undefined) {
+							if (part.text !== undefined && part.text !== "") {
 								const isThinking = isThinkingPart(part);
 								if (
 									!currentBlock ||
@@ -484,6 +504,18 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 										partial: output,
 									});
 								}
+							} else if (part.text === "" && part.thoughtSignature && currentBlock && !part.functionCall) {
+								if (currentBlock.type === "thinking") {
+									currentBlock.thinkingSignature = retainThoughtSignature(
+										currentBlock.thinkingSignature,
+										part.thoughtSignature,
+									);
+								} else {
+									currentBlock.textSignature = retainThoughtSignature(
+										currentBlock.textSignature,
+										part.thoughtSignature,
+									);
+								}
 							}
 							if (part.functionCall) {
@@ -849,10 +881,10 @@ export function buildRequest(
 	if (isAntigravity && shouldInjectAntigravitySystemInstruction(model.id)) {
 		const existingParts = request.systemInstruction?.parts ?? [];
 		request.systemInstruction = {
-			role: "user",
 			parts: [
 				{ text: ANTIGRAVITY_SYSTEM_INSTRUCTION },
 				{ text: `Please ignore following [ignore]${ANTIGRAVITY_SYSTEM_INSTRUCTION}[/ignore]` },
+				{ text: ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION },
 				...existingParts,
 			],
 		};

package/src/providers/google-shared.ts CHANGED Viewed

@@ -372,7 +372,7 @@ export function convertTools(
 				description: tool.description || "",
 				...(useParameters
 					? { parameters: normalizeSchemaForCCA(toolWireSchema(tool)) }
-					: { parametersJsonSchema: toolWireSchema(tool) }),
+					: { parametersJsonSchema: normalizeSchemaForGoogle(toolWireSchema(tool)) }),
 			})),
 		},
 	];
@@ -609,7 +609,7 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
 		const candidate = chunk.candidates?.[0];
 		if (candidate?.content?.parts) {
 			for (const part of candidate.content.parts) {
-				if (part.text !== undefined) {
+				if (part.text !== undefined && part.text !== "") {
 					if (!firstTokenSeen) {
 						firstTokenSeen = true;
 						onFirstToken?.();
@@ -650,6 +650,18 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
 							partial: output,
 						});
 					}
+				} else if (part.text === "" && part.thoughtSignature && currentBlock && !part.functionCall) {
+					if (currentBlock.type === "thinking") {
+						currentBlock.thinkingSignature = retainThoughtSignature(
+							currentBlock.thinkingSignature,
+							part.thoughtSignature,
+						);
+					} else if (retainTextSignature) {
+						currentBlock.textSignature = retainThoughtSignature(
+							currentBlock.textSignature,
+							part.thoughtSignature,
+						);
+					}
 				}
 				if (part.functionCall) {

package/src/providers/ollama.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import type {
 import { normalizeSystemPrompts } from "../utils";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { type CapturedHttpErrorResponse, finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
+import { getOpenAIStreamFirstEventTimeoutMs, getOpenAIStreamIdleTimeoutMs } from "../utils/idle-iterator";
 import { parseStreamingJson } from "../utils/json-parse";
 import { toolWireSchema } from "../utils/schema/wire";
 import {
@@ -525,6 +526,22 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
 				url: `${baseUrl}/api/chat`,
 				body,
 			};
+			// Direct callers that bypass `register-builtins` (which installs
+			// the iterator-level watchdog) need a pre-response timer alongside
+			// `timeout: false`; otherwise an Ollama server that accepts the
+			// POST and never streams headers would hang forever (issue #2422).
+			const idleTimeoutMs = options.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
+			const firstEventTimeoutMs =
+				options.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
+			const preResponseWatchdog =
+				firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
+					? AbortSignal.timeout(firstEventTimeoutMs)
+					: undefined;
+			const fetchSignal = preResponseWatchdog
+				? options.signal
+					? AbortSignal.any([options.signal, preResponseWatchdog])
+					: preResponseWatchdog
+				: options.signal;
 			const response = await fetchWithRetry(`${baseUrl}/api/chat`, {
 				method: "POST",
 				headers: {
@@ -534,9 +551,10 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
 					"Content-Type": "application/json",
 				},
 				body: JSON.stringify(body),
-				signal: options.signal,
+				signal: fetchSignal,
 				defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
 				fetch: options.fetch,
+				timeout: false,
 			});
 			if (!response.ok) {
 				capturedErrorResponse = await captureHttpErrorResponse(response);

package/src/providers/openai-codex-responses.ts CHANGED Viewed

@@ -272,6 +272,7 @@ interface CodexRequestSetup {
 	requestSignal: AbortSignal;
 	wrapCodexSseStream: (source: AsyncGenerator<Record<string, unknown>>) => AsyncGenerator<Record<string, unknown>>;
 	requestAbortController: AbortController;
+	firstEventTimeoutMs: number | undefined;
 	websocketIdleTimeoutMs: number | undefined;
 	websocketFirstEventTimeoutMs: number | undefined;
 }
@@ -554,13 +555,16 @@ export function normalizeCodexToolChoice(
 	if (!choice) return undefined;
 	if (typeof choice === "string") return choice;
 	const allowFreeform = model ? supportsFreeformApplyPatchCodex(model) : false;
-	const mapName = (name: string): Record<string, string> => {
+	const mapName = (name: string): Record<string, string> | undefined => {
+		const directTool = tools.find(tool => tool.name === name);
 		const customTool = allowFreeform
 			? tools.find(tool => tool.customFormat && (tool.name === name || tool.customWireName === name))
 			: undefined;
+		const offeredTool = customTool ?? directTool;
+		if (!offeredTool) return undefined;
 		return customTool
 			? { type: "custom", name: customTool.customWireName ?? customTool.name }
-			: { type: "function", name };
+			: { type: "function", name: offeredTool.name };
 	};
 	if (choice.type === "function") {
 		if ("function" in choice && choice.function?.name) {
@@ -687,6 +691,7 @@ function createRequestSetup(options: OpenAICodexResponsesOptions | undefined): C
 		requestAbortController,
 		requestSignal,
 		wrapCodexSseStream,
+		firstEventTimeoutMs,
 		websocketIdleTimeoutMs,
 		websocketFirstEventTimeoutMs,
 	};
@@ -983,6 +988,7 @@ async function openCodexSseTransport(
 				state,
 				requestContext.responsesLite,
 				requestSetup.requestSignal,
+				requestSetup.firstEventTimeoutMs,
 				event => options?.onSseEvent?.(event, model),
 				options?.fetch,
 			),
@@ -3016,7 +3022,8 @@ async function openCodexSseEventStream(
 	body: RequestBody,
 	state: CodexWebSocketSessionState | undefined,
 	responsesLite: boolean,
-	signal?: AbortSignal,
+	signal: AbortSignal | undefined,
+	firstEventTimeoutMs: number | undefined,
 	onSseEvent?: OpenAICodexResponsesOptions["onSseEvent"],
 	fetchOverride?: FetchImpl,
 ): Promise<AsyncGenerator<Record<string, unknown>>> {
@@ -3028,15 +3035,31 @@ async function openCodexSseEventStream(
 		sentTurnStateHeader: headers.has(X_CODEX_TURN_STATE_HEADER),
 		sentModelsEtagHeader: headers.has(X_MODELS_ETAG_HEADER),
 	});
+	// `wrapCodexSseStream` arms a first-event watchdog only after this fetch
+	// resolves (it wraps the SSE generator). With `timeout: false` disabling
+	// Bun's native 300s ceiling, a stalled pre-response request needs its own
+	// watchdog — combine the caller signal with a fresh
+	// `AbortSignal.timeout(firstEventTimeoutMs)` so headers must arrive
+	// within the configured budget (issue #2422).
+	const preResponseWatchdog =
+		firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
+			? AbortSignal.timeout(firstEventTimeoutMs)
+			: undefined;
+	const fetchSignal = preResponseWatchdog
+		? signal
+			? AbortSignal.any([signal, preResponseWatchdog])
+			: preResponseWatchdog
+		: signal;
 	const response = await fetchWithRetry(url, {
 		method: "POST",
 		headers,
 		body: JSON.stringify(body),
-		signal,
+		signal: fetchSignal,
 		maxAttempts: CODEX_MAX_RETRIES + 1,
 		defaultDelayMs: attempt => CODEX_RETRY_DELAY_MS * (attempt + 1),
 		maxDelayMs: CODEX_RATE_LIMIT_BUDGET_MS,
 		fetch: fetchOverride,
+		timeout: false,
 	});
 	logCodexDebug("codex response", {
 		url: response.url,

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -699,6 +699,14 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 				if (!firstTokenTime) firstTokenTime = Date.now();
 				appendText(output, stream, text);
 			};
+			// Tracks the last full cumulative reasoning snapshot per signature (the
+			// reasoning field name) so dedup survives block transitions. Required
+			// for MiniMax-M3: once `</think>` and visible text arrive, currentBlock
+			// flips to "text", but later chunks keep carrying the same cumulative
+			// `reasoning_content` snapshot. Without an external tracker the guard
+			// below misses and the snapshot gets re-emitted as a fresh thinking
+			// block after the answer has started.
+			const lastCumulativeReasoningBySignature = new Map<string, string>();
 			const appendThinkingDelta = (
 				thinking: string,
 				signature?: string,
@@ -706,13 +714,13 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 			): void => {
 				if (!thinking) return;
 				let emittedThinking = thinking;
-				if (
-					source === "cumulative" &&
-					currentBlock?.type === "thinking" &&
-					(signature === undefined || currentBlock.thinkingSignature === signature) &&
-					thinking.startsWith(currentBlock.thinking)
-				) {
-					emittedThinking = thinking.slice(currentBlock.thinking.length);
+				if (source === "cumulative") {
+					const key = signature ?? "";
+					const lastSnapshot = lastCumulativeReasoningBySignature.get(key) ?? "";
+					if (thinking.startsWith(lastSnapshot)) {
+						emittedThinking = thinking.slice(lastSnapshot.length);
+					}
+					lastCumulativeReasoningBySignature.set(key, thinking);
 					if (!emittedThinking) return;
 				}
 				if (!firstTokenTime) firstTokenTime = Date.now();
@@ -1217,6 +1225,11 @@ async function createRequestSetup(
 	};
 }
+function getForcedCompletionsToolName(toolChoice: OpenAICompletionsParams["tool_choice"]): string | undefined {
+	if (typeof toolChoice !== "object" || toolChoice === null || !("function" in toolChoice)) return undefined;
+	return toolChoice.function.name;
+}
 function buildParams(
 	model: Model<"openai-completions">,
 	context: Context,
@@ -1228,6 +1241,7 @@ function buildParams(
 		Boolean(options?.reasoning) && !options?.disableReasoning && Boolean(model.reasoning);
 	const forcedToolChoiceSuppressesThinking =
 		compat.disableReasoningOnForcedToolChoice &&
+		compat.supportsForcedToolChoice &&
 		isForcedToolChoice(mapToOpenAICompletionsToolChoice(options?.toolChoice));
 	if (compat.whenThinking && thinkingEnabledForRequest && !forcedToolChoiceSuppressesThinking) {
 		compat = compat.whenThinking; // precomputed at model build — pointer swap, no allocation
@@ -1329,6 +1343,12 @@ function buildParams(
 	if (options?.toolChoice && compat.supportsToolChoice) {
 		params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
 	}
+	if (isForcedToolChoice(params.tool_choice) && !compat.supportsForcedToolChoice) {
+		// Some thinking-required OpenAI-compatible models reject forced
+		// `tool_choice` while still accepting tools with the default auto
+		// selector. Keep the tool available and let the model choose it.
+		params.tool_choice = "auto";
+	}
 	if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
 		// `tool_choice: "none"` with no tools to gate is redundant and also
@@ -1342,6 +1362,19 @@ function buildParams(
 		delete params.tool_choice;
 	}
+	const forcedToolName = getForcedCompletionsToolName(params.tool_choice);
+	if (
+		forcedToolName !== undefined &&
+		(!Array.isArray(params.tools) ||
+			!params.tools.some(tool => tool.type === "function" && tool.function.name === forcedToolName))
+	) {
+		// A forced named tool_choice is only valid when the same request offers
+		// that function in `tools`. Active-tool filtering normally enforces this
+		// before provider dispatch; this guard keeps raw provider callers from
+		// emitting a self-inconsistent OpenAI-compatible payload.
+		delete params.tool_choice;
+	}
 	if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
 		// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
 		// Must explicitly disable since z.ai defaults to thinking enabled.

package/src/providers/openai-responses-shared.ts CHANGED Viewed

@@ -934,7 +934,10 @@ export async function processResponsesStream<TApi extends Api>(
 			// reaches the SDK stream), actively releasing the connection.
 			break;
 		} else if (event.type === "error") {
-			throw new Error(`Error Code ${event.code}: ${event.message}`);
+			const err = (event as any).error ?? event;
+			const code = err.code ?? "unknown";
+			const message = err.message ?? "no message";
+			throw new Error(`Error Code ${code}: ${message}`);
 		} else if (event.type === "response.failed") {
 			populateResponsesUsageFromResponse(output, event.response?.usage);
 			const error = event.response?.error ?? (event.response as any)?.status_details?.error;

package/src/providers/openai-responses.ts CHANGED Viewed

@@ -836,13 +836,18 @@ export function mapOpenAIResponsesToolChoiceForTools(
 	model: Model<"openai-responses">,
 ): OpenAIResponsesToolChoice {
 	const mapped = mapToOpenAIResponsesToolChoice(choice);
-	if (!mapped || typeof mapped === "string" || mapped.type !== "function" || !supportsFreeformApplyPatch(model)) {
+	if (!mapped || typeof mapped === "string" || mapped.type !== "function") {
 		return mapped;
 	}
-	const customTool = tools.find(
-		tool => tool.customFormat && (tool.name === mapped.name || tool.customWireName === mapped.name),
-	);
+	const directTool = tools.find(tool => tool.name === mapped.name);
+	const customTool = supportsFreeformApplyPatch(model)
+		? tools.find(tool => tool.customFormat && (tool.name === mapped.name || tool.customWireName === mapped.name))
+		: undefined;
+	const offeredTool = customTool ?? directTool;
+	if (!offeredTool) {
+		return undefined;
+	}
 	return customTool ? { type: "custom", name: customTool.customWireName ?? customTool.name } : mapped;
 }

package/src/registry/oauth/gitlab-duo.ts CHANGED Viewed

@@ -40,9 +40,10 @@ function resolveClientId(): string {
 /**
  * Resolve callback-server options from `GITLAB_REDIRECT_URI`. When set, the
  * exact string is advertised to GitLab (strict matching), random-port fallback
- * is disabled, and the local listener is bound to the URI's loopback host/port
- * so the browser callback lands on us. Non-loopback URIs bind a random local
- * port — only the paste-code path can complete in that case.
+ * is disabled, and HTTP loopback URIs bind the listener to the URI's host/port
+ * so the browser callback lands on us. HTTPS loopback URIs are rejected because
+ * the local callback server is plaintext HTTP. Non-loopback URIs bind a random
+ * local port — only the paste-code path can complete in that case.
  */
 function resolveCallbackOptions(): OAuthCallbackFlowOptions {
 	const raw = process.env.GITLAB_REDIRECT_URI?.trim();
@@ -65,6 +66,10 @@ function resolveCallbackOptions(): OAuthCallbackFlowOptions {
 	}
 	const isLoopback = parsed.hostname === "localhost" || parsed.hostname === "127.0.0.1" || parsed.hostname === "[::1]";
+	if (isLoopback && parsed.protocol !== "http:") {
+		throw new Error(`GITLAB_REDIRECT_URI loopback callbacks must use http://, got: ${raw}`);
+	}
 	const port = parsed.port ? Number.parseInt(parsed.port, 10) : parsed.protocol === "https:" ? 443 : 80;
 	return {

package/src/registry/zai.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import type { ProviderDefinition } from "./types";
 const AUTH_URL = "https://z.ai/manage-apikey/apikey-list";
 const API_BASE_URL = "https://api.z.ai/api/coding/paas/v4";
-const VALIDATION_MODEL = "glm-4.7";
+const VALIDATION_MODEL = "glm-5.2";
 export async function loginZai(options: OAuthController): Promise<string> {
 	if (!options.onPrompt) {

package/src/utils/openai-http.ts CHANGED Viewed

@@ -79,6 +79,10 @@ export async function postOpenAIStream<TEvent>(init: OpenAIStreamRequestInit): P
 		signal: init.signal,
 		fetch: init.fetch,
 		maxAttempts: init.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
+		// Bun's native fetch enforces a hard ~300s pre-response timeout (issue #2422).
+		// Cold large-context streams legitimately exceed it; the caller's
+		// `firstEventTimeoutMs`/`AbortSignal` already govern stuck requests.
+		timeout: false,
 	});
 	if (!response.ok) {
 		throw await captureOpenAIHttpError(response);