npm - @oh-my-pi/pi-ai - Versions diffs - 15.0.1 → 15.0.2 - Mend

@oh-my-pi/pi-ai 15.0.1 → 15.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +11 -0
package/package.json +4 -4
package/src/providers/anthropic.ts +9 -1
package/src/providers/azure-openai-responses.ts +4 -1
package/src/providers/gitlab-duo.ts +10 -4
package/src/providers/google-gemini-cli.ts +2 -1
package/src/providers/google-vertex.ts +28 -10
package/src/providers/google.ts +12 -4
package/src/providers/ollama.ts +1 -0
package/src/providers/openai-anthropic-shim.ts +2 -0
package/src/providers/openai-codex-responses.ts +4 -0
package/src/providers/openai-completions-compat.ts +19 -9
package/src/providers/openai-completions.ts +15 -7
package/src/providers/openai-responses.ts +5 -1
package/src/providers/register-builtins.ts +35 -8
package/src/types.ts +18 -1
package/src/utils/h2-fetch.ts +15 -2
package/src/utils/idle-iterator.ts +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,12 @@
 ## [Unreleased]
+## [15.0.2] - 2026-05-15
+### Fixed
+- Fixed `StreamOptions.fetch` typing to accept fetch-compatible override functions that do not expose `preconnect`, allowing custom fetch implementations to be used without type errors across runtimes
+- Fixed Moonshot Kimi K2.6 forced tool calls to send `thinking: { type: "disabled" }`, avoiding `tool_choice 'specified' is incompatible with thinking enabled` 400s while preserving the requested named tool ([#1077](https://github.com/can1357/oh-my-pi/issues/1077)).
 ## [15.0.1] - 2026-05-14
 ### Breaking Changes
@@ -22,6 +28,11 @@
 - Fixed OAuth credentials being silently disabled when two omp processes (or any two `AuthStorage` instances sharing a `agent.db`) race on token refresh. Anthropic rotates refresh tokens on every use, so the loser's `invalid_grant` response previously soft-deleted the row that the winner just rotated, forcing the user to `/login` again. `#tryOAuthCredential` now re-reads the row from disk before declaring a definitive failure: if the persisted `refresh` differs from the snapshot it tried, the peer-rotated credential is reloaded and the request retries against the fresh token instead of disabling the live row.
 - Closed a remaining race window in OAuth refresh-failure handling: between re-reading the credential row to check for peer rotation and the subsequent soft-delete, another process could still complete a refresh and rotate the row, leaving us to disable the freshly-rotated credential by `id`. The disable now runs as a single CAS update conditioned on the row's `data` still matching the snapshot we tried to refresh, and on `disabled_cause IS NULL`. If the CAS reports 0 rows changed (peer rotation, or row already disabled by a concurrent failure on the same snapshot), we reload from disk and retry instead of mutating the wrong row or emitting a spurious `credential_disabled` event.
+### Changed
+- Lowered the default steady-state stream idle timeout from 120s to 30s while preserving the existing environment overrides.
+### Fixed
+- Lazy built-in provider streams now enforce the shared idle watchdog and abort stalled provider requests, so session auto-retry can continue after transient network drops instead of remaining stuck. Caller aborts still terminate as aborted.
 ## [14.9.3] - 2026-05-10

package/package.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "15.0.1",
+	"version": "15.0.2",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
-	"homepage": "https://github.com/can1357/oh-my-pi",
+	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
 	"contributors": [
 		"Mario Zechner"
@@ -46,8 +46,8 @@
 		"@aws-sdk/credential-provider-node": "^3.972.39",
 		"@bufbuild/protobuf": "^2.12.0",
 		"@google/genai": "^1.52.0",
-		"@oh-my-pi/pi-natives": "15.0.1",
-		"@oh-my-pi/pi-utils": "15.0.1",
+		"@oh-my-pi/pi-natives": "15.0.2",
+		"@oh-my-pi/pi-utils": "15.0.2",
 		"@sinclair/typebox": "^0.34.49",
 		"@smithy/node-http-handler": "^4.6.1",
 		"ajv": "^8.20.0",

package/src/providers/anthropic.ts CHANGED Viewed

@@ -25,6 +25,7 @@ import type {
 	AssistantMessage,
 	CacheRetention,
 	Context,
+	FetchImpl,
 	ImageContent,
 	Message,
 	Model,
@@ -541,6 +542,7 @@ export type AnthropicClientOptionsArgs = {
 	isOAuth?: boolean;
 	hasTools?: boolean;
 	onSseEvent?: AnthropicOptions["onSseEvent"];
+	fetch?: FetchImpl;
 };
 export type AnthropicClientOptionsResult = {
@@ -965,6 +967,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 					isOAuth: options?.isOAuth,
 					hasTools: !!context.tools?.length,
 					onSseEvent: options?.onSseEvent,
+					fetch: options?.fetch,
 				});
 				client = created.client;
 				isOAuthToken = created.isOAuthToken;
@@ -1405,7 +1408,12 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
 	const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
 	const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
 	const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
-	const debugFetch = onSseEvent ? wrapFetchForSseDebug(fetch, event => onSseEvent(event, model)) : undefined;
+	const baseFetch = args.fetch ?? fetch;
+	const debugFetch = onSseEvent
+		? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model))
+		: args.fetch
+			? baseFetch
+			: undefined;
 	if (model.provider === "github-copilot") {
 		const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
 		const betaFeatures = [...extraBetas];

package/src/providers/azure-openai-responses.ts CHANGED Viewed

@@ -241,6 +241,7 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
 	const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
+	const baseFetch = options?.fetch ?? fetch;
 	return new AzureOpenAI({
 		apiKey,
 		apiVersion,
@@ -248,7 +249,9 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
 		maxRetries: 5,
 		defaultHeaders: headers,
 		baseURL: baseUrl,
-		fetch: options?.onSseEvent ? wrapFetchForSseDebug(fetch, event => options.onSseEvent?.(event, model)) : fetch,
+		fetch: options?.onSseEvent
+			? wrapFetchForSseDebug(baseFetch, event => options.onSseEvent?.(event, model))
+			: baseFetch,
 	});
 }

package/src/providers/gitlab-duo.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { ANTHROPIC_THINKING, mapAnthropicToolChoice } from "../stream";
-import type { Api, Context, Model, SimpleStreamOptions } from "../types";
+import type { Api, Context, FetchImpl, Model, SimpleStreamOptions } from "../types";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import type { OpenAICompletionsOptions } from "./openai-completions";
 import type { OpenAIResponsesOptions } from "./openai-responses";
@@ -172,13 +172,16 @@ interface DirectAccessToken {
 const directAccessCache = new Map<string, DirectAccessToken>();
-async function getDirectAccessToken(gitlabAccessToken: string): Promise<DirectAccessToken> {
+async function getDirectAccessToken(
+	gitlabAccessToken: string,
+	fetchImpl: FetchImpl = fetch,
+): Promise<DirectAccessToken> {
 	const cached = directAccessCache.get(gitlabAccessToken);
 	if (cached && cached.expiresAt > Date.now()) {
 		return cached;
 	}
-	const response = await fetch(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
+	const response = await fetchImpl(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
 		method: "POST",
 		headers: {
 			Authorization: `Bearer ${gitlabAccessToken}`,
@@ -240,7 +243,7 @@ export function streamGitLabDuo(
 				throw new Error(`Unsupported GitLab Duo model: ${model.id}`);
 			}
-			const directAccess = await getDirectAccessToken(options.apiKey);
+			const directAccess = await getDirectAccessToken(options.apiKey, options.fetch);
 			const headers = {
 				...directAccess.headers,
 				...options.headers,
@@ -278,6 +281,7 @@ export function streamGitLabDuo(
 								onPayload: options.onPayload,
 								onResponse: options.onResponse,
 								onSseEvent: options.onSseEvent,
+								fetch: options.fetch,
 								thinkingEnabled: Boolean(reasoningEffort) && model.reasoning,
 								thinkingBudgetTokens: reasoningEffort
 									? (options.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
@@ -314,6 +318,7 @@ export function streamGitLabDuo(
 									onPayload: options.onPayload,
 									onResponse: options.onResponse,
 									onSseEvent: options.onSseEvent,
+									fetch: options.fetch,
 									reasoning: reasoningEffort,
 									toolChoice: options.toolChoice,
 								} satisfies OpenAIResponsesOptions,
@@ -345,6 +350,7 @@ export function streamGitLabDuo(
 									onPayload: options.onPayload,
 									onResponse: options.onResponse,
 									onSseEvent: options.onSseEvent,
+									fetch: options.fetch,
 									reasoning: reasoningEffort,
 									toolChoice: options.toolChoice,
 								} satisfies OpenAICompletionsOptions,

package/src/providers/google-gemini-cli.ts CHANGED Viewed

@@ -362,6 +362,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 					maxAttempts: MAX_RETRIES + 1,
 					defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
 					maxDelayMs: options?.maxRetryDelayMs ?? RATE_LIMIT_BUDGET_MS,
+					fetch: options?.fetch,
 				},
 			);
 			if (!response.ok) {
@@ -545,7 +546,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 						throw new Error("Missing request URL");
 					}
-					currentResponse = await fetch(requestUrl, {
+					currentResponse = await (options?.fetch ?? fetch)(requestUrl, {
 						method: "POST",
 						headers: requestHeaders,
 						body: requestBodyJson,

package/src/providers/google-vertex.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { GoogleGenAI } from "@google/genai";
 import { $env } from "@oh-my-pi/pi-utils";
-import type { Context, Model, StreamFunction } from "../types";
+import type { Context, FetchImpl, Model, StreamFunction } from "../types";
 import type { AssistantMessageEventStream } from "../utils/event-stream";
 import { buildGoogleGenerateContentParams, type GoogleSharedStreamOptions, streamGoogleGenAI } from "./google-shared";
@@ -25,7 +25,9 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
 			const apiKey = resolveApiKey(options);
 			const project = apiKey ? undefined : resolveProject(options);
 			const location = apiKey ? undefined : resolveLocation(options);
-			const client = apiKey ? createClientWithApiKey(model, apiKey) : createClient(model, project!, location!);
+			const client = apiKey
+				? createClientWithApiKey(model, apiKey, options?.fetch)
+				: createClient(model, project!, location!, options?.fetch);
 			const params = buildGoogleGenerateContentParams(model, context, options ?? {});
 			const url = apiKey
 				? `https://aiplatform.googleapis.com/${API_VERSION}/publishers/google/models/${model.id}:streamGenerateContent`
@@ -34,29 +36,45 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
 		},
 	});
-function buildHttpOptions(model: Model<"google-vertex">): { headers?: Record<string, string> } | undefined {
-	if (!model.headers) {
-		return undefined;
+function buildHttpOptions(
+	model: Model<"google-vertex">,
+	fetchOverride: FetchImpl | undefined,
+): { headers?: Record<string, string>; fetch?: FetchImpl } | undefined {
+	const options: { headers?: Record<string, string>; fetch?: FetchImpl } = {};
+	if (model.headers) {
+		options.headers = { ...model.headers };
+	}
+	if (fetchOverride) {
+		options.fetch = fetchOverride;
 	}
-	return { headers: { ...model.headers } };
+	return Object.keys(options).length > 0 ? options : undefined;
 }
-function createClient(model: Model<"google-vertex">, project: string, location: string): GoogleGenAI {
+function createClient(
+	model: Model<"google-vertex">,
+	project: string,
+	location: string,
+	fetchOverride: FetchImpl | undefined,
+): GoogleGenAI {
 	return new GoogleGenAI({
 		vertexai: true,
 		project,
 		location,
 		apiVersion: API_VERSION,
-		httpOptions: buildHttpOptions(model),
+		httpOptions: buildHttpOptions(model, fetchOverride),
 	});
 }
-function createClientWithApiKey(model: Model<"google-vertex">, apiKey: string): GoogleGenAI {
+function createClientWithApiKey(
+	model: Model<"google-vertex">,
+	apiKey: string,
+	fetchOverride: FetchImpl | undefined,
+): GoogleGenAI {
 	return new GoogleGenAI({
 		vertexai: true,
 		apiKey,
 		apiVersion: API_VERSION,
-		httpOptions: buildHttpOptions(model),
+		httpOptions: buildHttpOptions(model, fetchOverride),
 	});
 }

package/src/providers/google.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { GoogleGenAI } from "@google/genai";
 import { getEnvApiKey } from "../stream";
-import type { Context, Model, StreamFunction } from "../types";
+import type { Context, FetchImpl, Model, StreamFunction } from "../types";
 import type { AssistantMessageEventStream } from "../utils/event-stream";
 import { buildGoogleGenerateContentParams, type GoogleSharedStreamOptions, streamGoogleGenAI } from "./google-shared";
@@ -17,15 +17,20 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
 		api: "google-generative-ai",
 		prepare: () => {
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider);
-			const client = createClient(model, apiKey);
+			const client = createClient(model, apiKey, options?.fetch);
 			const params = buildGoogleGenerateContentParams(model, context, options ?? {});
 			const url = model.baseUrl ? `${model.baseUrl}/models/${model.id}:streamGenerateContent` : undefined;
 			return { client, params, url };
 		},
 	});
-function createClient(model: Model<"google-generative-ai">, apiKey?: string): GoogleGenAI {
-	const httpOptions: { baseUrl?: string; apiVersion?: string; headers?: Record<string, string> } = {};
+function createClient(model: Model<"google-generative-ai">, apiKey?: string, fetchOverride?: FetchImpl): GoogleGenAI {
+	const httpOptions: {
+		baseUrl?: string;
+		apiVersion?: string;
+		headers?: Record<string, string>;
+		fetch?: FetchImpl;
+	} = {};
 	if (model.baseUrl) {
 		httpOptions.baseUrl = model.baseUrl;
 		httpOptions.apiVersion = ""; // baseUrl already includes version path, don't append
@@ -33,6 +38,9 @@ function createClient(model: Model<"google-generative-ai">, apiKey?: string): Go
 	if (model.headers) {
 		httpOptions.headers = model.headers;
 	}
+	if (fetchOverride) {
+		httpOptions.fetch = fetchOverride;
+	}
 	return new GoogleGenAI({
 		apiKey,

package/src/providers/ollama.ts CHANGED Viewed

@@ -378,6 +378,7 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
 				body: JSON.stringify(body),
 				signal: options.signal,
 				defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
+				fetch: options.fetch,
 			});
 			if (!response.ok) {
 				throw new Error(`HTTP ${response.status} from ${baseUrl}/api/chat`);

package/src/providers/openai-anthropic-shim.ts CHANGED Viewed

@@ -88,6 +88,7 @@ export function streamOpenAIAnthropicShim(
 					onPayload: options?.onPayload,
 					onResponse: options?.onResponse,
 					onSseEvent: options?.onSseEvent,
+					fetch: options?.fetch,
 					thinkingEnabled,
 					thinkingBudgetTokens: thinkingBudget,
 				});
@@ -116,6 +117,7 @@ export function streamOpenAIAnthropicShim(
 					onPayload: options?.onPayload,
 					onResponse: options?.onResponse,
 					onSseEvent: options?.onSseEvent,
+					fetch: options?.fetch,
 					reasoning: reasoningEffort,
 				});

package/src/providers/openai-codex-responses.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import {
 	type Api,
 	type AssistantMessage,
 	type Context,
+	type FetchImpl,
 	type Model,
 	type ProviderSessionState,
 	type ServiceTier,
@@ -735,6 +736,7 @@ async function openCodexSseTransport(
 			state,
 			requestSetup.requestSignal,
 			event => options?.onSseEvent?.(event, model),
+			options?.fetch,
 		),
 	);
 	return { eventStream, requestBodyForState: structuredCloneJSON(body), transport: "sse" };
@@ -2173,6 +2175,7 @@ async function openCodexSseEventStream(
 	state: CodexWebSocketSessionState | undefined,
 	signal?: AbortSignal,
 	onSseEvent?: OpenAICodexResponsesOptions["onSseEvent"],
+	fetchOverride?: FetchImpl,
 ): Promise<AsyncGenerator<Record<string, unknown>>> {
 	const headers = createCodexHeaders(requestHeaders, accountId, apiKey, sessionId, "sse", state);
 	logCodexDebug("codex request", {
@@ -2190,6 +2193,7 @@ async function openCodexSseEventStream(
 		maxAttempts: CODEX_MAX_RETRIES + 1,
 		defaultDelayMs: attempt => CODEX_RETRY_DELAY_MS * (attempt + 1),
 		maxDelayMs: CODEX_RATE_LIMIT_BUDGET_MS,
+		fetch: fetchOverride,
 	});
 	logCodexDebug("codex response", {
 		url: response.url,

package/src/providers/openai-completions-compat.ts CHANGED Viewed

@@ -53,6 +53,12 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 	const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
 	const isKilo = provider === "kilo" || baseUrl.includes("api.kilo.ai");
 	const isKimiModel = model.id.includes("moonshotai/kimi") || /^kimi[-.]/i.test(model.id);
+	const isMoonshotKimi =
+		isKimiModel &&
+		(provider === "moonshot" ||
+			provider === "kimi-code" ||
+			baseUrl.includes("api.moonshot.ai") ||
+			baseUrl.includes("api.kimi.com"));
 	const isAnthropicModel =
 		provider === "anthropic" ||
 		baseUrl.includes("api.anthropic.com") ||
@@ -90,6 +96,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 		provider === "opencode-zen" ||
 		provider === "opencode-go" ||
 		baseUrl.includes("opencode.ai");
+	const isOpenCodeProvider = provider === "opencode-go" || provider === "opencode-zen";
 	const useMaxTokens =
 		provider === "mistral" ||
@@ -173,22 +180,25 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 		requiresAssistantAfterToolResult: false,
 		requiresThinkingAsText: isMistral,
 		requiresMistralToolIds: isMistral,
-		thinkingFormat: isZai
-			? "zai"
-			: provider === "openrouter" || baseUrl.includes("openrouter.ai")
-				? "openrouter"
-				: isAlibaba || isQwen
-					? "qwen"
-					: "openai",
+		thinkingFormat:
+			isZai || isMoonshotKimi
+				? "zai"
+				: provider === "openrouter" || baseUrl.includes("openrouter.ai")
+					? "openrouter"
+					: isAlibaba || isQwen
+						? "qwen"
+						: "openai",
 		reasoningContentField: "reasoning_content",
 		// Backends that 400 follow-up requests when prior assistant tool-call turns lack `reasoning_content`:
-		//   - Kimi: documented invariant on its native API and via OpenCode-Go.
+		//   - Kimi: documented invariant on its native API.
 		//   - Any reasoning-capable model reached through OpenRouter: DeepSeek V4 Pro and similar enforce
 		//     this server-side whenever the request is in thinking mode. We can't translate Anthropic's
 		//     redacted/encrypted reasoning into DeepSeek's plaintext form, so cross-provider continuations
 		//     rely on a placeholder — see `convertMessages` for the placeholder injection.
+		//   - OpenCode-Go and OpenCode-Zen handle reasoning content internally and reject
+		//     `reasoning_content` in client-sent messages — exclude them even for Kimi models.
 		requiresReasoningContentForToolCalls:
-			isKimiModel ||
+			(isKimiModel && !isOpenCodeProvider) ||
 			(isDeepseekFamily && Boolean(model.reasoning)) ||
 			((provider === "openrouter" || baseUrl.includes("openrouter.ai")) && Boolean(model.reasoning)),
 		// DeepSeek V4 rejects synthetic reasoning_content placeholders (".") on tool-call turns.

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -16,6 +16,7 @@ import { getEnvApiKey } from "../stream";
 import {
 	type AssistantMessage,
 	type Context,
+	type FetchImpl,
 	getPriorityPremiumRequests,
 	type Message,
 	type MessageAttribution,
@@ -362,6 +363,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 				options?.headers,
 				options?.initiatorOverride,
 				options?.onSseEvent,
+				options?.fetch,
 			);
 			const priorityPremiumRequests = getPriorityPremiumRequests(options?.serviceTier, model.provider);
 			const premiumRequestsTotal =
@@ -778,6 +780,7 @@ async function createClient(
 	extraHeaders?: Record<string, string>,
 	initiatorOverride?: MessageAttribution,
 	onSseEvent?: OpenAICompletionsOptions["onSseEvent"],
+	fetchOverride?: FetchImpl,
 ): Promise<{
 	client: OpenAI;
 	copilotPremiumRequests: number | undefined;
@@ -847,9 +850,10 @@ async function createClient(
 		azureDefaultQuery = { "api-version": apiVersion };
 	}
 	let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
+	const baseFetch = fetchOverride ?? fetch;
 	const wrappedFetch = Object.assign(
 		async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
-			const response = await fetch(input, init);
+			const response = await baseFetch(input, init);
 			if (response.ok) {
 				capturedErrorResponse = undefined;
 				return response;
@@ -872,7 +876,7 @@ async function createClient(
 			};
 			return response;
 		},
-		{ preconnect: fetch.preconnect },
+		baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {},
 	);
 	const debugFetch = onSseEvent ? wrapFetchForSseDebug(wrappedFetch, event => onSseEvent(event, model)) : wrappedFetch;
 	return {
@@ -1019,12 +1023,14 @@ function buildParams(
 	}
 	if (compat.disableReasoningOnForcedToolChoice && isForcedToolChoice(params.tool_choice)) {
-		// Mirrors anthropic.ts:disableThinkingIfToolChoiceForced — backends like
-		// Kimi 400 with `tool_choice 'specified' is incompatible with thinking
-		// enabled`. Drop reasoning for this turn instead of dropping tool_choice;
-		// the agent still gets the forced tool call, just without thinking.
+		// Backends like Kimi 400 with `tool_choice 'specified' is incompatible
+		// with thinking enabled`. Suppress thinking for this single forced-tool
+		// turn while keeping the tool-selection contract intact.
 		delete params.reasoning_effort;
 		delete params.reasoning;
+		if (compat.thinkingFormat === "zai") {
+			params.thinking = { type: "disabled" };
+		}
 	}
 	// OpenRouter provider routing preferences
@@ -1362,7 +1368,9 @@ export function convertMessages(
 			const canUseSyntheticReasoningContent =
 				compat.requiresReasoningContentForToolCalls &&
 				compat.allowsSyntheticReasoningContentForToolCalls &&
-				(compat.thinkingFormat === "openai" || compat.thinkingFormat === "openrouter");
+				(compat.thinkingFormat === "openai" ||
+					compat.thinkingFormat === "openrouter" ||
+					compat.thinkingFormat === "zai");
 			// DeepSeek reasoning models require reasoning_content on ALL assistant turns,
 			// not just tool-call turns. Other providers (Kimi, OpenRouter) only require it
 			// on tool-call turns.

package/src/providers/openai-responses.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import {
 	type AssistantMessage,
 	type CacheRetention,
 	type Context,
+	type FetchImpl,
 	getPriorityPremiumRequests,
 	type MessageAttribution,
 	type Model,
@@ -210,6 +211,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 				options?.initiatorOverride,
 				cacheSessionId,
 				options?.onSseEvent,
+				options?.fetch,
 			);
 			const priorityPremiumRequests = getPriorityPremiumRequests(options?.serviceTier, model.provider);
 			const premiumRequestsTotal =
@@ -312,6 +314,7 @@ function createClient(
 	initiatorOverride?: MessageAttribution,
 	sessionId?: string,
 	onSseEvent?: OpenAIResponsesOptions["onSseEvent"],
+	fetchOverride?: FetchImpl,
 ): {
 	client: OpenAI;
 	copilotPremiumRequests: number | undefined;
@@ -349,6 +352,7 @@ function createClient(
 		headers.session_id ??= sessionId;
 		headers["x-client-request-id"] ??= sessionId;
 	}
+	const baseFetch = fetchOverride ?? fetch;
 	return {
 		client: new OpenAI({
 			apiKey,
@@ -356,7 +360,7 @@ function createClient(
 			dangerouslyAllowBrowser: true,
 			maxRetries: 5,
 			defaultHeaders: headers,
-			fetch: onSseEvent ? wrapFetchForSseDebug(fetch, event => onSseEvent(event, model)) : fetch,
+			fetch: onSseEvent ? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model)) : baseFetch,
 		}),
 		copilotPremiumRequests,
 		baseUrl,

package/src/providers/register-builtins.ts CHANGED Viewed

@@ -19,7 +19,9 @@ import type {
 	Model,
 	OptionsForApi,
 } from "../types";
+import { type AbortSourceTracker, createAbortSourceTracker } from "../utils/abort";
 import { AssistantMessageEventStream as EventStreamImpl } from "../utils/event-stream";
+import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTimeout } from "../utils/idle-iterator";
 import type { BedrockOptions } from "./amazon-bedrock";
 import type { AnthropicOptions } from "./anthropic";
 import type { AzureOpenAIResponsesOptions } from "./azure-openai-responses";
@@ -155,6 +157,9 @@ export function setBedrockProviderModule(module: BedrockProviderModule): void {
 // Stream forwarding / error helpers
 // ---------------------------------------------------------------------------
+const LAZY_STREAM_IDLE_TIMEOUT_ERROR = "Provider stream stalled while waiting for the next event";
+const LAZY_STREAM_FIRST_EVENT_TIMEOUT_ERROR = "Provider stream timed out while waiting for the first event";
 function hasFinalResult(
 	source: AsyncIterable<AssistantMessageEvent>,
 ): source is AsyncIterable<AssistantMessageEvent> & { result(): Promise<AssistantMessage> } {
@@ -165,10 +170,23 @@ function forwardStream<TApi extends Api>(
 	target: EventStreamImpl,
 	source: AsyncIterable<AssistantMessageEvent>,
 	model: Model<TApi>,
+	options: OptionsForApi<TApi>,
+	abortTracker: AbortSourceTracker,
 ): void {
 	(async () => {
 		try {
-			for await (const event of source) {
+			const idleTimeoutMs = options.streamIdleTimeoutMs ?? getStreamIdleTimeoutMs();
+			const watchedSource = iterateWithIdleTimeout(source, {
+				idleTimeoutMs,
+				firstItemTimeoutMs: options.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(idleTimeoutMs),
+				errorMessage: LAZY_STREAM_IDLE_TIMEOUT_ERROR,
+				firstItemErrorMessage: LAZY_STREAM_FIRST_EVENT_TIMEOUT_ERROR,
+				onIdle: () => abortTracker.abortLocally(new Error(LAZY_STREAM_IDLE_TIMEOUT_ERROR)),
+				onFirstItemTimeout: () => abortTracker.abortLocally(new Error(LAZY_STREAM_FIRST_EVENT_TIMEOUT_ERROR)),
+				abortSignal: options.signal,
+			});
+			for await (const event of watchedSource) {
 				target.push(event);
 			}
 			if (hasFinalResult(source)) {
@@ -177,14 +195,19 @@ function forwardStream<TApi extends Api>(
 				target.end();
 			}
 		} catch (error) {
-			const message = createLazyLoadErrorMessage(model, error);
-			target.push({ type: "error", reason: "error", error: message });
+			const stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
+			const message = createLazyLoadErrorMessage(model, error, stopReason);
+			target.push({ type: "error", reason: stopReason, error: message });
 			target.end(message);
 		}
 	})();
 }
-function createLazyLoadErrorMessage<TApi extends Api>(model: Model<TApi>, error: unknown): AssistantMessage {
+function createLazyLoadErrorMessage<TApi extends Api>(
+	model: Model<TApi>,
+	error: unknown,
+	stopReason: Extract<AssistantMessage["stopReason"], "aborted" | "error"> = "error",
+): AssistantMessage {
 	return {
 		role: "assistant",
 		content: [],
@@ -199,8 +222,9 @@ function createLazyLoadErrorMessage<TApi extends Api>(model: Model<TApi>, error:
 			totalTokens: 0,
 			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 		},
-		stopReason: "error",
-		errorMessage: error instanceof Error ? error.message : String(error),
+		stopReason,
+		errorMessage:
+			stopReason === "aborted" ? "Request was aborted" : error instanceof Error ? error.message : String(error),
 		timestamp: Date.now(),
 	};
 }
@@ -214,11 +238,14 @@ function createLazyStream<TApi extends Api>(
 ): (model: Model<TApi>, context: Context, options: OptionsForApi<TApi>) => EventStreamImpl {
 	return (model, context, options) => {
 		const outer = new EventStreamImpl();
+		const streamOptions = (options ?? {}) as OptionsForApi<TApi>;
 		loadModule()
 			.then(module => {
-				const inner = module.stream(model, context, options);
-				forwardStream(outer, inner, model);
+				const abortTracker = createAbortSourceTracker(streamOptions.signal);
+				const providerOptions = { ...streamOptions, signal: abortTracker.requestSignal } as OptionsForApi<TApi>;
+				const inner = module.stream(model, context, providerOptions);
+				forwardStream(outer, inner, model, streamOptions, abortTracker);
 			})
 			.catch(error => {
 				const message = createLazyLoadErrorMessage(model, error);

package/src/types.ts CHANGED Viewed

@@ -204,6 +204,15 @@ export interface RawSseEvent {
 	raw: string[];
 }
+/**
+ * `fetch`-compatible function. Accepts any callable matching the standard
+ * fetch signature; `preconnect` is optional because non-Bun runtimes (browsers,
+ * test mocks) won't expose it.
+ */
+export type FetchImpl = ((input: string | URL | Request, init?: RequestInit) => Promise<Response>) & {
+	preconnect?: typeof globalThis.fetch.preconnect;
+};
 export interface StreamOptions {
 	temperature?: number;
 	topP?: number;
@@ -275,6 +284,14 @@ export interface StreamOptions {
 	 * Set to 0 to disable the inter-event idle watchdog for this request.
 	 */
 	streamIdleTimeoutMs?: number;
+	/**
+	 * Optional `fetch` implementation override. Providers route every HTTP
+	 * request — direct calls, SDK clients, and retry helpers — through this
+	 * implementation when set. Defaults to `globalThis.fetch`. Providers that
+	 * do not use `fetch` (Bedrock's AWS SDK transport, Cursor's HTTP/2
+	 * channel) silently ignore the override.
+	 */
+	fetch?: FetchImpl;
 	/** Cursor exec/MCP tool handlers (cursor-agent only). */
 	execHandlers?: CursorExecHandlers;
 }
@@ -613,7 +630,7 @@ export interface OpenAICompat {
 	requiresThinkingAsText?: boolean;
 	/** Whether tool call IDs must be normalized to Mistral format (exactly 9 alphanumeric chars). Default: auto-detected from URL. */
 	requiresMistralToolIds?: boolean;
-	/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "openrouter" uses reasoning: { effort }, "zai" uses thinking: { type: "enabled" }, "qwen" uses top-level enable_thinking, and "qwen-chat-template" uses chat_template_kwargs.enable_thinking. Default: "openai". */
+	/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "openrouter" uses reasoning: { effort }, "zai" uses thinking: { type: "enabled" | "disabled" } (also used by Moonshot Kimi), "qwen" uses top-level enable_thinking, and "qwen-chat-template" uses chat_template_kwargs.enable_thinking. Default: "openai". */
 	thinkingFormat?: "openai" | "openrouter" | "zai" | "qwen" | "qwen-chat-template";
 	/** Which reasoning content field to emit on assistant messages. Default: auto-detected. */
 	reasoningContentField?: "reasoning_content" | "reasoning" | "reasoning_text";

package/src/utils/h2-fetch.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /**
  * Patch `globalThis.fetch` to advertise HTTP/2 in TLS ALPN, with transparent
- * HTTP/1.1 fallback when the server doesn't select `h2`.
+ * HTTP/1.1 fallback when the server doesn't negotiate `h2`.
  *
  * Bun's HTTP/2 client is gated on `BUN_FEATURE_FLAG_EXPERIMENTAL_HTTP2_CLIENT`,
  * read by the native runtime before any JS executes; assigning to
@@ -8,6 +8,12 @@
  * activates h2 over TLS ALPN and rejects with `error.code === "HTTP2Unsupported"`
  * if the server picks anything else, so we catch and retry without the hint.
  *
+ * Some HTTPS endpoints (e.g. corporate API gateways behind reverse proxies)
+ * advertise h2 via ALPN but then refuse or reset the connection at the HTTP/2
+ * framing layer. Bun surfaces these as `ConnectionRefused`, `ConnectionReset`,
+ * or `ConnectionClosed` rather than `HTTP2Unsupported`, so we treat those
+ * codes as h2-fallback triggers as well.
+ *
  * Bun negotiates h2 via ALPN over TLS only (no h2c), so plain `http://` URLs
  * skip the attempt entirely — avoids the throw/retry round-trip for localhost.
  *
@@ -24,12 +30,19 @@ export function installH2Fetch(): void {
 	const original = globalThis.fetch as typeof fetch & PatchedFetch;
 	if (original[installed]) return;
+	/** Error codes that indicate h2 negotiation/transport failure (not an application error). */
+	const h2FallbackCodes: ReadonlySet<string> = new Set([
+		"HTTP2Unsupported", // Server selected h1 in ALPN
+		"ConnectionRefused", // Server refused the h2 connection
+		"ConnectionReset", // Server reset during h2 handshake
+		"ConnectionClosed", // Server closed before h2 response
+	]);
 	const wrapper = async function h2fetch(input: string | URL | Request, init?: RequestInit): Promise<Response> {
 		if (!isHttps(input)) return original(input, init);
 		try {
 			return await original(input, { ...init, protocol: "http2" });
 		} catch (err) {
-			if ((err as { code?: unknown }).code !== "HTTP2Unsupported") throw err;
+			if (!h2FallbackCodes.has((err as { code?: string }).code ?? "")) throw err;
 			return original(input, init);
 		}
 	} as typeof fetch & PatchedFetch;

package/src/utils/idle-iterator.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { $env } from "@oh-my-pi/pi-utils";
-const DEFAULT_STREAM_IDLE_TIMEOUT_MS = 120_000;
+const DEFAULT_STREAM_IDLE_TIMEOUT_MS = 30_000;
 const DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS = 100_000;
 function normalizeIdleTimeoutMs(value: string | undefined, fallback: number): number | undefined {