npm - @oh-my-pi/pi-ai - Versions diffs - 15.0.0 → 15.0.2 - Mend

@oh-my-pi/pi-ai 15.0.0 → 15.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/CHANGELOG.md +22 -1
package/package.json +6 -6
package/src/index.ts +1 -0
package/src/provider-models/ollama.ts +3 -11
package/src/providers/anthropic.ts +21 -10
package/src/providers/azure-openai-responses.ts +23 -84
package/src/providers/gitlab-duo.ts +10 -4
package/src/providers/google-gemini-cli.ts +39 -205
package/src/providers/google-gemini-headers.ts +0 -100
package/src/providers/google-shared.ts +446 -4
package/src/providers/google-vertex.ts +46 -380
package/src/providers/google.ts +27 -362
package/src/providers/kimi.ts +15 -96
package/src/providers/ollama.ts +4 -12
package/src/providers/openai-anthropic-shim.ts +138 -0
package/src/providers/openai-codex-responses.ts +42 -199
package/src/providers/openai-completions-compat.ts +19 -9
package/src/providers/openai-completions.ts +32 -31
package/src/providers/openai-responses-shared.ts +143 -24
package/src/providers/openai-responses.ts +25 -77
package/src/providers/register-builtins.ts +35 -8
package/src/providers/synthetic.ts +15 -102
package/src/types.ts +31 -2
package/src/utils/h2-fetch.ts +60 -0
package/src/utils/http-inspector.ts +2 -2
package/src/utils/idle-iterator.ts +1 -1
package/src/utils/oauth/github-copilot.ts +6 -10
package/src/utils/oauth/kimi.ts +4 -3
package/src/utils/oauth/lm-studio.ts +0 -2
package/src/utils/retry.ts +8 -130

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,22 @@
 ## [Unreleased]
+## [15.0.2] - 2026-05-15
+### Fixed
+- Fixed `StreamOptions.fetch` typing to accept fetch-compatible override functions that do not expose `preconnect`, allowing custom fetch implementations to be used without type errors across runtimes
+- Fixed Moonshot Kimi K2.6 forced tool calls to send `thinking: { type: "disabled" }`, avoiding `tool_choice 'specified' is incompatible with thinking enabled` 400s while preserving the requested named tool ([#1077](https://github.com/can1357/oh-my-pi/issues/1077)).
+## [15.0.1] - 2026-05-14
+### Breaking Changes
+- Increased the minimum Bun runtime version to `>=1.3.14` for the `@aws-?` package
+### Added
+- Added `installH2Fetch` to patch `globalThis.fetch` so HTTPS requests attempt HTTP/2 over ALPN with automatic HTTP/1.1 fallback when HTTP/2 is unsupported
+- Added priority service-tier traffic to the `premiumRequests` accounting on OpenAI and OpenAI Codex providers. Sending `serviceTier: "priority"` now increments `usage.premiumRequests` by 1 per request, matching the existing GitHub Copilot premium-request budget semantics so downstream consumers (e.g. the `omp stats` "Premium Reqs" card and `/usage`) reflect priority traffic alongside Copilot premium calls.
 ## [15.0.0] - 2026-05-13
 ### Added
@@ -12,6 +28,11 @@
 - Fixed OAuth credentials being silently disabled when two omp processes (or any two `AuthStorage` instances sharing a `agent.db`) race on token refresh. Anthropic rotates refresh tokens on every use, so the loser's `invalid_grant` response previously soft-deleted the row that the winner just rotated, forcing the user to `/login` again. `#tryOAuthCredential` now re-reads the row from disk before declaring a definitive failure: if the persisted `refresh` differs from the snapshot it tried, the peer-rotated credential is reloaded and the request retries against the fresh token instead of disabling the live row.
 - Closed a remaining race window in OAuth refresh-failure handling: between re-reading the credential row to check for peer rotation and the subsequent soft-delete, another process could still complete a refresh and rotate the row, leaving us to disable the freshly-rotated credential by `id`. The disable now runs as a single CAS update conditioned on the row's `data` still matching the snapshot we tried to refresh, and on `disabled_cause IS NULL`. If the CAS reports 0 rows changed (peer rotation, or row already disabled by a concurrent failure on the same snapshot), we reload from disk and retry instead of mutating the wrong row or emitting a spurious `credential_disabled` event.
+### Changed
+- Lowered the default steady-state stream idle timeout from 120s to 30s while preserving the existing environment overrides.
+### Fixed
+- Lazy built-in provider streams now enforce the shared idle watchdog and abort stalled provider requests, so session auto-retry can continue after transient network drops instead of remaining stuck. Caller aborts still terminate as aborted.
 ## [14.9.3] - 2026-05-10
@@ -2344,4 +2365,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
 ## [0.9.4] - 2025-11-26
-Initial release with multi-provider LLM support.
+Initial release with multi-provider LLM support.

package/package.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "15.0.0",
+	"version": "15.0.2",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
-	"homepage": "https://github.com/can1357/oh-my-pi",
+	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
 	"contributors": [
 		"Mario Zechner"
@@ -46,8 +46,8 @@
 		"@aws-sdk/credential-provider-node": "^3.972.39",
 		"@bufbuild/protobuf": "^2.12.0",
 		"@google/genai": "^1.52.0",
-		"@oh-my-pi/pi-natives": "15.0.0",
-		"@oh-my-pi/pi-utils": "15.0.0",
+		"@oh-my-pi/pi-natives": "15.0.2",
+		"@oh-my-pi/pi-utils": "15.0.2",
 		"@sinclair/typebox": "^0.34.49",
 		"@smithy/node-http-handler": "^4.6.1",
 		"ajv": "^8.20.0",
@@ -58,10 +58,10 @@
 		"zod": "4.4.3"
 	},
 	"devDependencies": {
-		"@types/bun": "^1.3.13"
+		"@types/bun": "^1.3.14"
 	},
 	"engines": {
-		"bun": ">=1.3.7"
+		"bun": ">=1.3.14"
 	},
 	"files": [
 		"src",

package/src/index.ts CHANGED Viewed

@@ -37,6 +37,7 @@ export * from "./usage/zai";
 export * from "./utils/anthropic-auth";
 export * from "./utils/discovery";
 export * from "./utils/event-stream";
+export * from "./utils/h2-fetch";
 export * from "./utils/overflow";
 export * from "./utils/retry";
 export * from "./utils/schema";

package/src/provider-models/ollama.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { abortableSleep } from "@oh-my-pi/pi-utils";
+import { fetchWithRetry } from "@oh-my-pi/pi-utils";
 import type { ModelManagerOptions } from "../model-manager";
 import { Effort } from "../model-thinking";
 import type { ThinkingConfig } from "../types";
@@ -19,16 +19,7 @@ type OllamaShowResponse = {
 	model_info?: Record<string, unknown>;
 };
-const MODEL_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
-async function fetchWithRetry(url: string, init: RequestInit): Promise<Response> {
-	for (let attempt = 0; attempt < MODEL_RETRY_DELAYS_MS.length; attempt++) {
-		const response = await fetch(url, init);
-		if (response.ok || response.status < 500) return response;
-		await abortableSleep(MODEL_RETRY_DELAYS_MS[attempt]!);
-	}
-	return fetch(url, init);
-}
+const OLLAMA_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
 function trimTrailingSlash(value: string): string {
 	return value.endsWith("/") ? value.slice(0, -1) : value;
@@ -109,6 +100,7 @@ export function ollamaCloudModelManagerOptions(
 			const response = await fetchWithRetry(`${baseUrl}/api/tags`, {
 				method: "GET",
 				headers: createCloudHeaders(apiKey),
+				defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
 			});
 			if (!response.ok) {
 				throw new Error(`HTTP ${response.status} from ${baseUrl}/api/tags`);

package/src/providers/anthropic.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import * as nodeCrypto from "node:crypto";
 import * as fs from "node:fs";
+import { scheduler } from "node:timers/promises";
 import * as tls from "node:tls";
 import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
 import type {
@@ -8,7 +9,14 @@ import type {
 	MessageParam,
 	RawMessageStreamEvent,
 } from "@anthropic-ai/sdk/resources/messages";
-import { $env, abortableSleep, isEnoent, readSseEvents } from "@oh-my-pi/pi-utils";
+import {
+	$env,
+	extractHttpStatusFromError,
+	isEnoent,
+	isRetryableError,
+	isUnexpectedSocketCloseMessage,
+	readSseEvents,
+} from "@oh-my-pi/pi-utils";
 import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
 import { calculateCost } from "../models";
 import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
@@ -17,6 +25,7 @@ import type {
 	AssistantMessage,
 	CacheRetention,
 	Context,
+	FetchImpl,
 	ImageContent,
 	Message,
 	Model,
@@ -48,12 +57,7 @@ import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTi
 import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse";
 import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
 import { notifyProviderResponse } from "../utils/provider-response";
-import {
-	extractHttpStatusFromError,
-	isCopilotRetryableError,
-	isRetryableError,
-	isUnexpectedSocketCloseMessage,
-} from "../utils/retry";
+import { isCopilotTransientModelError } from "../utils/retry";
 import { COMBINATOR_KEYS, NO_STRICT } from "../utils/schema";
 import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
 import {
@@ -538,6 +542,7 @@ export type AnthropicClientOptionsArgs = {
 	isOAuth?: boolean;
 	hasTools?: boolean;
 	onSseEvent?: AnthropicOptions["onSseEvent"];
+	fetch?: FetchImpl;
 };
 export type AnthropicClientOptionsResult = {
@@ -844,7 +849,7 @@ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
 export function isProviderRetryableError(error: unknown, provider?: string): boolean {
 	if (!(error instanceof Error)) return false;
-	if (provider === "github-copilot" && isCopilotRetryableError(error)) return true;
+	if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
 	const msg = error.message.toLowerCase();
 	if (
 		isUnexpectedSocketCloseMessage(msg) ||
@@ -962,6 +967,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 					isOAuth: options?.isOAuth,
 					hasTools: !!context.tools?.length,
 					onSseEvent: options?.onSseEvent,
+					fetch: options?.fetch,
 				});
 				client = created.client;
 				isOAuthToken = created.isOAuthToken;
@@ -1287,7 +1293,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 					}
 					providerRetryAttempt++;
 					const delayMs = PROVIDER_BASE_DELAY_MS * 2 ** (providerRetryAttempt - 1);
-					await abortableSleep(delayMs, options?.signal);
+					await scheduler.wait(delayMs, { signal: options?.signal });
 					output.content.length = 0;
 					output.responseId = undefined;
 					output.errorMessage = strictFallbackErrorMessage;
@@ -1402,7 +1408,12 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
 	const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
 	const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
 	const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
-	const debugFetch = onSseEvent ? wrapFetchForSseDebug(fetch, event => onSseEvent(event, model)) : undefined;
+	const baseFetch = args.fetch ?? fetch;
+	const debugFetch = onSseEvent
+		? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model))
+		: args.fetch
+			? baseFetch
+			: undefined;
 	if (model.provider === "github-copilot") {
 		const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
 		const betaFeatures = [...extraBetas];

package/src/providers/azure-openai-responses.ts CHANGED Viewed

@@ -6,17 +6,15 @@ import type {
 	ResponseInput,
 } from "openai/resources/responses/responses";
 import { getEnvApiKey } from "../stream";
-import {
-	type Api,
-	type AssistantMessage,
-	type Context,
-	type Model,
-	type ServiceTier,
-	type StreamFunction,
-	type StreamOptions,
-	shouldSendServiceTier,
-	type Tool,
-	type ToolChoice,
+import type {
+	AssistantMessage,
+	Context,
+	Model,
+	ServiceTier,
+	StreamFunction,
+	StreamOptions,
+	Tool,
+	ToolChoice,
 } from "../types";
 import { normalizeSystemPrompts } from "../utils";
 import { createAbortSourceTracker } from "../utils/abort";
@@ -33,8 +31,11 @@ import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
 import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
 import {
 	appendResponsesToolResultMessages,
+	applyCommonResponsesSamplingParams,
+	applyResponsesReasoningParams,
 	convertResponsesAssistantMessage,
 	convertResponsesInputContent,
+	createInitialResponsesAssistantMessage,
 	normalizeResponsesToolCallIdForTransform,
 	processResponsesStream,
 } from "./openai-responses-shared";
@@ -101,23 +102,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
 		let firstTokenTime: number | undefined;
 		const deploymentName = resolveDeploymentName(model, options);
-		const output: AssistantMessage = {
-			role: "assistant",
-			content: [],
-			api: "azure-openai-responses" as Api,
-			provider: model.provider,
-			model: model.id,
-			usage: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-				totalTokens: 0,
-				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-			},
-			stopReason: "stop",
-			timestamp: Date.now(),
-		};
+		const output: AssistantMessage = createInitialResponsesAssistantMessage(
+			"azure-openai-responses",
+			model.provider,
+			model.id,
+		);
 		let rawRequestDump: RawHttpRequestDump | undefined;
 		const abortTracker = createAbortSourceTracker(options?.signal);
 		const firstEventTimeoutAbortError = new Error(AZURE_OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
@@ -252,6 +241,7 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
 	const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
+	const baseFetch = options?.fetch ?? fetch;
 	return new AzureOpenAI({
 		apiKey,
 		apiVersion,
@@ -259,7 +249,9 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
 		maxRetries: 5,
 		defaultHeaders: headers,
 		baseURL: baseUrl,
-		fetch: options?.onSseEvent ? wrapFetchForSseDebug(fetch, event => options.onSseEvent?.(event, model)) : fetch,
+		fetch: options?.onSseEvent
+			? wrapFetchForSseDebug(baseFetch, event => options.onSseEvent?.(event, model))
+			: baseFetch,
 	});
 }
@@ -279,31 +271,7 @@ function buildParams(
 		prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
 	};
-	if (options?.maxTokens) {
-		params.max_output_tokens = options?.maxTokens;
-	}
-	if (options?.temperature !== undefined) {
-		params.temperature = options?.temperature;
-	}
-	if (options?.topP !== undefined) {
-		params.top_p = options.topP;
-	}
-	if (options?.topK !== undefined) {
-		params.top_k = options.topK;
-	}
-	if (options?.minP !== undefined) {
-		params.min_p = options.minP;
-	}
-	if (options?.presencePenalty !== undefined) {
-		params.presence_penalty = options.presencePenalty;
-	}
-	if (options?.repetitionPenalty !== undefined) {
-		params.repetition_penalty = options.repetitionPenalty;
-	}
-	if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
-		params.service_tier = options.serviceTier;
-	}
+	applyCommonResponsesSamplingParams(params, options, model.provider);
 	if (context.tools) {
 		params.tools = convertTools(context.tools);
@@ -312,36 +280,7 @@ function buildParams(
 		}
 	}
-	if (model.reasoning) {
-		// Always request encrypted reasoning content so reasoning items can be
-		// replayed in multi-turn conversations when store is false (items aren't
-		// persisted server-side, so we must include the full content).
-		// See: https://github.com/can1357/oh-my-pi/issues/41
-		params.include = ["reasoning.encrypted_content"];
-		if (options?.reasoning || options?.reasoningSummary !== undefined) {
-			const reasoningParams: NonNullable<typeof params.reasoning> = {
-				effort: options?.reasoning || "medium",
-			};
-			if (options?.reasoningSummary !== null) {
-				reasoningParams.summary = options?.reasoningSummary || "auto";
-			}
-			params.reasoning = reasoningParams;
-		} else {
-			if (model.name.toLowerCase().startsWith("gpt-5")) {
-				// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
-				messages.push({
-					role: "developer",
-					content: [
-						{
-							type: "input_text",
-							text: "# Juice: 0 !important",
-						},
-					],
-				});
-			}
-		}
-	}
+	applyResponsesReasoningParams(params, model, options, messages);
 	return params;
 }

package/src/providers/gitlab-duo.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { ANTHROPIC_THINKING, mapAnthropicToolChoice } from "../stream";
-import type { Api, Context, Model, SimpleStreamOptions } from "../types";
+import type { Api, Context, FetchImpl, Model, SimpleStreamOptions } from "../types";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import type { OpenAICompletionsOptions } from "./openai-completions";
 import type { OpenAIResponsesOptions } from "./openai-responses";
@@ -172,13 +172,16 @@ interface DirectAccessToken {
 const directAccessCache = new Map<string, DirectAccessToken>();
-async function getDirectAccessToken(gitlabAccessToken: string): Promise<DirectAccessToken> {
+async function getDirectAccessToken(
+	gitlabAccessToken: string,
+	fetchImpl: FetchImpl = fetch,
+): Promise<DirectAccessToken> {
 	const cached = directAccessCache.get(gitlabAccessToken);
 	if (cached && cached.expiresAt > Date.now()) {
 		return cached;
 	}
-	const response = await fetch(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
+	const response = await fetchImpl(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
 		method: "POST",
 		headers: {
 			Authorization: `Bearer ${gitlabAccessToken}`,
@@ -240,7 +243,7 @@ export function streamGitLabDuo(
 				throw new Error(`Unsupported GitLab Duo model: ${model.id}`);
 			}
-			const directAccess = await getDirectAccessToken(options.apiKey);
+			const directAccess = await getDirectAccessToken(options.apiKey, options.fetch);
 			const headers = {
 				...directAccess.headers,
 				...options.headers,
@@ -278,6 +281,7 @@ export function streamGitLabDuo(
 								onPayload: options.onPayload,
 								onResponse: options.onResponse,
 								onSseEvent: options.onSseEvent,
+								fetch: options.fetch,
 								thinkingEnabled: Boolean(reasoningEffort) && model.reasoning,
 								thinkingBudgetTokens: reasoningEffort
 									? (options.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
@@ -314,6 +318,7 @@ export function streamGitLabDuo(
 									onPayload: options.onPayload,
 									onResponse: options.onResponse,
 									onSseEvent: options.onSseEvent,
+									fetch: options.fetch,
 									reasoning: reasoningEffort,
 									toolChoice: options.toolChoice,
 								} satisfies OpenAIResponsesOptions,
@@ -345,6 +350,7 @@ export function streamGitLabDuo(
 									onPayload: options.onPayload,
 									onResponse: options.onResponse,
 									onSseEvent: options.onSseEvent,
+									fetch: options.fetch,
 									reasoning: reasoningEffort,
 									toolChoice: options.toolChoice,
 								} satisfies OpenAICompletionsOptions,