npm - @oh-my-pi/pi-ai - Versions diffs - 14.1.2 → 14.2.1 - Mend

@oh-my-pi/pi-ai 14.1.2 → 14.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/CHANGELOG.md +57 -2
package/package.json +5 -3
package/src/auth-storage.ts +18 -6
package/src/model-thinking.ts +48 -2
package/src/models.json +2608 -388
package/src/provider-models/openai-compat.ts +97 -224
package/src/providers/amazon-bedrock.ts +103 -34
package/src/providers/anthropic.ts +44 -22
package/src/providers/azure-openai-responses.ts +4 -4
package/src/providers/cursor.ts +18 -12
package/src/providers/gitlab-duo.ts +2 -21
package/src/providers/kimi.ts +2 -22
package/src/providers/openai-codex-responses.ts +194 -23
package/src/providers/openai-completions.ts +22 -13
package/src/providers/openai-responses-shared.ts +143 -18
package/src/providers/openai-responses.ts +91 -15
package/src/providers/shared/error-message.ts +21 -0
package/src/providers/synthetic.ts +2 -22
package/src/stream.ts +1 -7
package/src/types.ts +34 -0
package/src/usage/kimi.ts +1 -11
package/src/usage/openai-codex.ts +1 -11
package/src/usage/shared.ts +10 -0
package/src/utils/anthropic-auth.ts +1 -7
package/src/utils/foundry.ts +8 -0
package/src/utils/http-inspector.ts +9 -2
package/src/utils/idle-iterator.ts +29 -54
package/src/utils/oauth/api-key-login.ts +87 -0
package/src/utils/oauth/cerebras.ts +15 -58
package/src/utils/oauth/google-antigravity.ts +11 -86
package/src/utils/oauth/google-gemini-cli.ts +11 -89
package/src/utils/oauth/google-oauth-shared.ts +110 -0
package/src/utils/oauth/moonshot.ts +15 -58
package/src/utils/oauth/nanogpt.ts +14 -50
package/src/utils/oauth/openai-codex.ts +3 -3
package/src/utils/oauth/synthetic.ts +15 -59
package/src/utils/oauth/together.ts +15 -58
package/src/utils/oauth/zenmux.ts +14 -50
package/src/utils/retry.ts +77 -0
package/src/utils/schema/CONSTRAINTS.md +1 -0
package/src/utils/schema/strict-mode.ts +10 -0
package/src/utils/tool-choice.ts +7 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,42 @@
 ## [Unreleased]
+## [14.2.1] - 2026-04-24
+### Fixed
+- Fixed OpenAI Codex Spark OAuth selection to require a verified ChatGPT Pro account instead of falling back to Plus or unknown-plan accounts.
+## [14.2.0] - 2026-04-23
+### Added
+- Added `gpt-5.5` to the built-in model catalog for both OpenAI Responses (`openai`) and local `litellm` (`openai-completions`) providers
+- Added `gpt-image-2` to the `litellm` built-in model catalog
+- Added `isCopilotTransientModelError()` and `callWithCopilotModelRetry()` helpers in `utils/retry` that detect GitHub Copilot's intermittent `HTTP 400 model_not_supported` responses for preview models (`gpt-5.3-codex`, `gpt-5.4`, `gpt-5.4-mini`, ...) and retry the request up to three times with backoff. OpenAI Responses, OpenAI Completions, and Anthropic provider paths now participate in this retry when the model is served through Copilot.
+- Added OpenAI Responses custom-tool grammar support for Codex-style `apply_patch` calls, including freeform streaming, history replay, and forced tool-choice mapping to the custom wire name.
+### Changed
+- Updated built-in model metadata with revised `contextWindow`, `maxTokens`, and pricing values for existing entries
+- Changed generated model policies to assign `applyPatchToolType: "freeform"` for first-party GPT-5 OpenAI Responses and Codex models, so regenerated `models.json` preserves the `apply_patch` custom-tool metadata.
+- Renamed `rewriteCopilotAuthError` to `rewriteCopilotError` and extended it to rewrite `HTTP 400 model_not_supported` after retries are exhausted with guidance about Copilot's OAuth-client-specific rollout gap (see opencode#13313).
+### Fixed
+- Fixed Amazon Bedrock proxy handling to honor lowercase `http_proxy`, `https_proxy`, and `all_proxy` environment variables when using HTTP/1 fallback
+- Fixed Amazon Bedrock streaming behind corporate HTTP proxies by using a proxy-aware HTTP/1 transport when `HTTPS_PROXY`, `HTTP_PROXY`, or `ALL_PROXY` is configured, including AWS SSO credential calls.
+- Fixed Amazon Bedrock requests to retry once with HTTP/1 when the AWS SDK's default HTTP/2 transport fails before streaming begins.
+- Fixed OpenAI Responses streaming to display thinking tokens from local providers (llama.cpp, etc.) that send raw `reasoning_text.delta` events and empty `summary` arrays in `output_item.done`. Previously, thinking content was silently dropped during streaming while non-streaming mode worked correctly.
+- Synced the bundled OpenCode Go catalog with the current docs so `kimi-k2.6`, `mimo-v2.5`, and `mimo-v2.5-pro` appear in offline/default model lists.
+## [14.1.3] - 2026-04-17
+### Fixed
+- Preserved user-provided `session_id` and `x-client-request-id` headers in OpenAI Responses requests instead of overriding them with automatic session-derived values
+- Stopped sending `session_id` and `x-client-request-id` headers for OpenAI Responses requests when `cacheRetention` is set to `none`
+- Fixed direct OpenAI Responses requests to send `session_id` and `x-client-request-id` from the same session-derived value as `prompt_cache_key`, improving prompt cache affinity for append-only sessions
 ## [14.1.1] - 2026-04-14
 ### Added
@@ -21,6 +57,7 @@
 - Fixed strict tool schema enforcement to preserve `additionalProperties: false` and required keys for reused nested object schemas, preventing invalid `todo_write` function schemas in Codex/OpenAI requests
 ## [14.1.0] - 2026-04-11
 ### Added
 - Added `accountId` to usage report metadata
@@ -37,6 +74,7 @@
 ## [14.0.5] - 2026-04-11
 ### Changed
 - Replaced GitHub Copilot authentication from VSCode extension impersonation to the opencode OAuth flow, eliminating TOS concerns. Existing users will need to re-authenticate once with `/login github-copilot`.
 - Simplified Copilot token handling: GitHub OAuth token is used directly for all API requests (no JWT exchange or refresh cycle).
 - Changed GitHub Copilot API base URL from `api.individual.githubcopilot.com` to `api.githubcopilot.com`.
@@ -48,6 +86,7 @@
 - Fixed GitHub Copilot `/models` discovery to unwrap structured OAuth credentials before sending the bearer token, preserving dynamic catalog refresh for OAuth-backed callers.
 ### Removed
 - Removed Copilot JWT proxy-ep base URL resolution (no longer needed with opencode auth).
 ## [14.0.3] - 2026-04-09
@@ -57,6 +96,7 @@
 - Fixed Ollama discovery cache normalization so cached models upgrade to the OpenAI Responses transport after the provider change
 ## [14.0.0] - 2026-04-08
 ### Breaking Changes
 - Removed `coerceNullStrings` function and its automatic null-string coercion behavior from JSON parsing
@@ -79,6 +119,7 @@
 - Fixed Anthropic streaming to suppress transient SDK console errors for malformed SSE keep-alive frames so the TUI only shows surfaced provider errors
 - Added environment-based credential fallback for the OpenAI Codex provider.
 ## [13.17.6] - 2026-04-01
 ### Fixed
@@ -86,6 +127,7 @@
 - Fixed Anthropic first-event timeouts to exclude stream connection setup from the watchdog, preserve timeout-specific retry classification after local aborts, and reset retry state cleanly between attempts
 ## [13.17.5] - 2026-04-01
 ### Changed
 - Increased default first-event timeout from 15s to 45s to better accommodate longer request setup times
@@ -124,6 +166,7 @@
 - Added Vercel AI Gateway to `/login` providers for interactive API key setup
 ### Fixed
 - Fixed `omp commit` failing with HTTP 400 errors when using reasoning-enabled models on OpenAI-compatible endpoints that don't support the `developer` role (e.g., GitHub Copilot, custom proxies). Now falls back to `system` role when `developer` is unsupported.
 ## [13.17.0] - 2026-03-30
@@ -148,6 +191,7 @@
 - Fixed normalizeAnthropicBaseUrl returning empty string instead of undefined when baseUrl is empty
 ## [13.16.4] - 2026-03-28
 ### Added
 - Added support for Groq Compound and Compound Mini models with extended context window (131K tokens) and configurable thinking levels
@@ -168,6 +212,7 @@
 - Updated OpenRouter Claude 3.5 Sonnet pricing: input from 0.45 to 0.42, cache read from 0.225 to 0.21
 ## [13.16.3] - 2026-03-28
 ### Changed
 - Modified OAuth credential saving to preserve unrelated identities instead of replacing all credentials for a provider
@@ -193,6 +238,7 @@
 - Fixed `parseRateLimitReason` not recognizing "usage limit" in Codex error messages, causing incorrect fallback to `UNKNOWN` classification instead of `QUOTA_EXHAUSTED`
 ## [13.14.2] - 2026-03-21
 ### Changed
 - Updated thinking configuration format from `levels` array to `minLevel` and `maxLevel` properties for improved clarity
@@ -215,13 +261,14 @@
 - Added bundled GPT-5.4 mini model metadata for OpenAI, OpenAI Codex, and GitHub Copilot, including low-to-xhigh thinking support and GitHub Copilot premium multiplier metadata
 - Added bundled GPT-5.4 nano model metadata for OpenAI and OpenAI Codex, including low-to-xhigh thinking support
 ## [13.13.2] - 2026-03-18
 ### Changed
 - Modified tool result handling for aborted assistant messages to preserve existing tool results when already recorded, instead of always replacing them with synthetic 'aborted' results
 ## [13.13.0] - 2026-03-18
 ### Changed
 - Changed tool argument validation to always normalize optional null values before type coercion, ensuring consistent handling of LLM-generated 'null' strings
@@ -232,6 +279,7 @@
 - Improved type safety of `validateToolCall` and `validateToolArguments` functions by returning properly typed `ToolCall["arguments"]` instead of `any`
 ## [13.12.9] - 2026-03-17
 ### Changed
 - Extracted OpenAI compatibility detection and resolution logic into dedicated `openai-completions-compat` module for improved maintainability and reusability
@@ -281,6 +329,7 @@
 - Fixed auth schema V0-to-V1 migration crash when the V0 table lacks a `disabled` column
 ## [13.11.0] - 2026-03-12
 ### Added
 - Added support for Parallel AI provider with API key authentication
@@ -296,6 +345,7 @@
 - Improved retry logic to handle HTTP/2 stream errors and internal_error responses from Anthropic API
 ## [13.9.16] - 2026-03-10
 ### Added
 - Support for `onPayload` callback to replace provider request payloads before sending, enabling request interception and modification
@@ -318,11 +368,13 @@
 - Fixed handling of malformed JSON messages in websocket streams to trigger immediate fallback to SSE without retry attempts
 ## [13.9.13] - 2026-03-10
 ### Added
 - Added `isSpecialServiceTier` utility function to validate OpenAI service tier values
 ## [13.9.12] - 2026-03-09
 ### Added
 - Added Tavily web search provider support with API key authentication
@@ -355,11 +407,13 @@
 - Fixed auth storage to preserve newer recorded schema versions when opened by older binaries
 ## [13.9.8] - 2026-03-08
 ### Fixed
 - Fixed WebSocket stream fallback logic to safely replay buffered output over SSE when WebSocket fails after partial content has been streamed
 ## [13.9.4] - 2026-03-07
 ### Changed
 - Simplified API key credential storage to always replace existing credentials on re-login instead of accumulating multiple keys
@@ -372,6 +426,7 @@
 - Fixed Cerebras model compatibility by preventing `stream_options` usage requests in chat completions
 ## [13.9.3] - 2026-03-07
 ### Breaking Changes
 - Changed `reasoning` parameter from `ThinkingLevel | undefined` to `Effort | undefined` in `SimpleStreamOptions`; 'off' is no longer valid (omit the field instead)
@@ -2042,4 +2097,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
 ## [0.9.4] - 2025-11-26
-Initial release with multi-provider LLM support.
+Initial release with multi-provider LLM support.

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "14.1.2",
+	"version": "14.2.1",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://github.com/can1357/oh-my-pi",
 	"author": "Can Boluk",
@@ -43,16 +43,18 @@
 	"dependencies": {
 		"@anthropic-ai/sdk": "^0.78",
 		"@aws-sdk/client-bedrock-runtime": "^3",
+		"@aws-sdk/credential-provider-node": "^3",
 		"@bufbuild/protobuf": "^2.11",
 		"@google/genai": "^1.43",
-		"@oh-my-pi/pi-natives": "14.1.2",
-		"@oh-my-pi/pi-utils": "14.1.2",
+		"@oh-my-pi/pi-natives": "14.2.1",
+		"@oh-my-pi/pi-utils": "14.2.1",
 		"@sinclair/typebox": "^0.34",
 		"@smithy/node-http-handler": "^4.4",
 		"ajv": "^8.18",
 		"ajv-formats": "^3.0",
 		"openai": "^6.25",
 		"partial-json": "^0.1",
+		"proxy-agent": "^6.5",
 		"zod": "4.3.6"
 	},
 	"devDependencies": {

package/src/auth-storage.ts CHANGED Viewed

@@ -208,6 +208,10 @@ function getOpenAICodexPlanPriority(report: UsageReport | null): number {
 	return planType.includes("pro") ? 0 : 2;
 }
+function hasOpenAICodexProPlan(report: UsageReport | null): boolean {
+	return getUsagePlanType(report)?.includes("pro") === true;
+}
 function resolveDefaultUsageProvider(provider: Provider): UsageProvider | undefined {
 	return DEFAULT_USAGE_PROVIDER_MAP.get(provider);
 }
@@ -1656,14 +1660,14 @@ export class AuthStorage {
 		const providerKey = this.#getProviderTypeKey(provider, "oauth");
 		const order = this.#getCredentialOrder(providerKey, sessionId, credentials.length);
 		const strategy = this.#rankingStrategyResolver?.(provider);
-		const checkUsage = strategy !== undefined && credentials.length > 1;
+		const requiresProModel = requiresOpenAICodexProModel(provider, options?.modelId);
+		const checkUsage = strategy !== undefined && (credentials.length > 1 || requiresProModel);
 		const sessionCredential = this.#getSessionCredential(provider, sessionId);
 		const sessionPreferredIndex = sessionCredential?.type === "oauth" ? sessionCredential.index : undefined;
 		// Skip ranking only when the session already has a working preferred credential — re-ranking
 		// mid-session causes account switches that cold-start the server-side prompt cache. New sessions
 		// (no preference) and sessions whose preferred is blocked still rank, so we pick the account
 		// with the most headroom proactively and fall back intelligently when rate-limited.
-		const requiresProModel = requiresOpenAICodexProModel(provider, options?.modelId);
 		const sessionPreferredIsAvailable =
 			sessionPreferredIndex !== undefined && !this.#isCredentialBlocked(providerKey, sessionPreferredIndex);
 		const shouldRank = checkUsage && (!sessionPreferredIsAvailable || requiresProModel);
@@ -1777,10 +1781,11 @@ export class AuthStorage {
 			return undefined;
 		}
+		const requiresProModel = requiresOpenAICodexProModel(provider, options?.modelId);
 		let usage: UsageReport | null = null;
 		let usageChecked = false;
-		if (checkUsage && !allowBlocked) {
+		if ((checkUsage && !allowBlocked) || requiresProModel) {
 			if (usagePrechecked) {
 				usage = prefetchedUsage;
 				usageChecked = true;
@@ -1791,7 +1796,10 @@ export class AuthStorage {
 				});
 				usageChecked = true;
 			}
-			if (usage && this.#isUsageLimitReached(usage)) {
+			if (requiresProModel && !hasOpenAICodexProPlan(usage)) {
+				return undefined;
+			}
+			if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
 				const resetAtMs = this.#getUsageResetAtMs(usage, Date.now());
 				this.#markCredentialBlocked(
 					providerKey,
@@ -1829,15 +1837,19 @@ export class AuthStorage {
 				enterpriseUrl: result.newCredentials.enterpriseUrl ?? selection.credential.enterpriseUrl,
 			};
 			this.#replaceCredentialAt(provider, selection.index, updated);
-			if (checkUsage && !allowBlocked) {
+			if ((checkUsage && !allowBlocked) || requiresProModel) {
 				const sameAccount = selection.credential.accountId === updated.accountId;
 				if (!usageChecked || !sameAccount) {
 					usage = await this.#getUsageReport(provider, updated, {
 						...options,
 						timeoutMs: this.#usageRequestTimeoutMs,
 					});
+					usageChecked = true;
+				}
+				if (requiresProModel && !hasOpenAICodexProPlan(usage)) {
+					return undefined;
 				}
-				if (usage && this.#isUsageLimitReached(usage)) {
+				if (checkUsage && !allowBlocked && usage && this.#isUsageLimitReached(usage)) {
 					const resetAtMs = this.#getUsageResetAtMs(usage, Date.now());
 					this.#markCredentialBlocked(
 						providerKey,

package/src/model-thinking.ts CHANGED Viewed

@@ -48,6 +48,14 @@ const CODEX_GPT_5_4_PRIORITY_BY_VARIANT: Partial<Record<OpenAIVariant, number>>
 	nano: 2,
 };
+const COPILOT_GENERATED_LIMITS: Record<string, { contextWindow: number; maxTokens: number }> = {
+	"claude-opus-4.6": { contextWindow: 168000, maxTokens: 32000 },
+	"gpt-5.2": { contextWindow: 272000, maxTokens: 128000 },
+	"gpt-5.4": { contextWindow: 272000, maxTokens: 128000 },
+	"gpt-5.4-mini": { contextWindow: 272000, maxTokens: 128000 },
+	"grok-code-fast-1": { contextWindow: 192000, maxTokens: 64000 },
+};
 interface GeminiModel {
 	family: "gemini";
 	kind: GeminiKind;
@@ -258,7 +266,7 @@ export function mapEffortToGoogleThinkingLevel<TApi extends Api>(
 export function mapEffortToAnthropicAdaptiveEffort<TApi extends Api>(
 	model: ApiModel<TApi>,
 	effort: Effort,
-): "low" | "medium" | "high" | "max" {
+): "low" | "medium" | "high" | "xhigh" | "max" {
 	switch (requireSupportedEffort(model, effort)) {
 		case Effort.Minimal:
 		case Effort.Low:
@@ -268,12 +276,34 @@ export function mapEffortToAnthropicAdaptiveEffort<TApi extends Api>(
 		case Effort.High:
 			return "high";
 		case Effort.XHigh:
-			return "max";
+			// Opus 4.7+ introduced a distinct "xhigh" effort level (between "high" and "max").
+			// The Anthropic docs scope this to the Messages API only, so Bedrock Converse and
+			// older adaptive-thinking Opus 4.6 models keep the legacy "max" alias.
+			return anthropicModelHasRealXHighEffort(model) ? "xhigh" : "max";
 	}
 }
+function anthropicModelHasRealXHighEffort<TApi extends Api>(model: ApiModel<TApi>): boolean {
+	if (model.api !== "anthropic-messages") return false;
+	const parsedModel = parseKnownModel(model.id);
+	if (parsedModel.family !== "anthropic" || parsedModel.kind !== "opus") return false;
+	return semverGte(parsedModel.version, "4.7");
+}
 function applyGeneratedModelPolicy(model: ApiModel<Api>): void {
+	const copilotLimits = model.provider === "github-copilot" ? COPILOT_GENERATED_LIMITS[model.id] : undefined;
+	if (copilotLimits) {
+		model.contextWindow = copilotLimits.contextWindow;
+		model.maxTokens = copilotLimits.maxTokens;
+	}
 	const parsedModel = parseKnownModel(model.id);
+	const applyPatchToolType = inferGeneratedApplyPatchToolType(model, parsedModel);
+	if (applyPatchToolType) {
+		model.applyPatchToolType = applyPatchToolType;
+	} else {
+		delete model.applyPatchToolType;
+	}
 	if (parsedModel.family === "anthropic") {
 		applyAnthropicCatalogPolicy(model, parsedModel);
 	}
@@ -298,6 +328,22 @@ function applyAnthropicCatalogPolicy(model: ApiModel<Api>, parsedModel: Anthropi
 	}
 }
+function inferGeneratedApplyPatchToolType(
+	model: ApiModel<Api>,
+	parsedModel: ParsedModel,
+): ApiModel<Api>["applyPatchToolType"] {
+	if (parsedModel.family !== "openai" || parsedModel.version.major !== 5) {
+		return undefined;
+	}
+	if (model.provider === "openai" && model.api === "openai-responses") {
+		return "freeform";
+	}
+	if (model.provider === "openai-codex" && model.api === "openai-codex-responses") {
+		return "freeform";
+	}
+	return undefined;
+}
 function applyOpenAICatalogPolicy(model: ApiModel<Api>, parsedModel: OpenAIModel): void {
 	// Codex models: 400K figure includes output budget; input window is 272K.
 	if (parsedModel.variant.startsWith("codex") && parsedModel.variant !== "codex-spark") {