npm - @oh-my-pi/pi-ai - Versions diffs - 13.14.0 → 13.15.2 - Mend

@oh-my-pi/pi-ai 13.14.0 → 13.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/CHANGELOG.md +27 -0
package/package.json +2 -2
package/src/index.ts +1 -0
package/src/model-thinking.ts +19 -5
package/src/models.json +2427 -998
package/src/providers/amazon-bedrock.ts +10 -0
package/src/providers/anthropic.ts +32 -13
package/src/providers/google-gemini-cli.ts +1 -1
package/src/providers/google-shared.ts +21 -8
package/src/providers/openai-codex-responses.ts +12 -1
package/src/providers/openai-completions-compat.ts +7 -1
package/src/providers/openai-completions.ts +38 -11
package/src/providers/openai-responses-shared.ts +25 -2
package/src/providers/register-builtins.ts +329 -0
package/src/rate-limit-utils.ts +9 -1
package/src/types.ts +3 -2
package/src/usage/claude.ts +1 -1
package/src/usage/zai.ts +1 -1
package/src/utils/retry.ts +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,33 @@
 ## [Unreleased]
+## [13.15.0] - 2026-03-23
+### Added
+- Added `isUsageLimitError()` to `rate-limit-utils` as a single source of truth for detecting usage/quota limit errors across all providers
+### Fixed
+- Fixed lazy stream forwarding to properly handle final results from source streams with `result()` methods
+- Fixed lazy stream error handling to convert iterator failures into terminal error results instead of silently failing
+- Fixed `parseRateLimitReason` to recognize "usage limit" in error messages and correctly classify them as `QUOTA_EXHAUSTED`
+- Fixed Codex `fetchWithRetry` retrying 429 responses for `usage_limit_reached` errors for up to 5 minutes instead of returning immediately for credential switching
+- Removed `usage.?limit` from `TRANSIENT_MESSAGE_PATTERN` in retry utils since usage limits are not transient and require credential rotation
+- Fixed `parseRateLimitReason` not recognizing "usage limit" in Codex error messages, causing incorrect fallback to `UNKNOWN` classification instead of `QUOTA_EXHAUSTED`
+## [13.14.2] - 2026-03-21
+### Changed
+- Updated thinking configuration format from `levels` array to `minLevel` and `maxLevel` properties for improved clarity
+- Corrected context window from 400000 to 272000 tokens for GPT-5.4 mini and nano variants on Codex transport
+- Normalized GPT-5.4 variant priority handling to use parsed variant instead of special-casing raw model IDs
+- Added support for `mini` variant in OpenAI model parsing regex
+### Fixed
+- Fixed inconsistent thinking level configuration across multiple model definitions
 ## [13.14.0] - 2026-03-20
 ### Fixed

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "13.14.0",
+	"version": "13.15.2",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://github.com/can1357/oh-my-pi",
 	"author": "Can Boluk",
@@ -41,7 +41,7 @@
 		"@aws-sdk/client-bedrock-runtime": "^3",
 		"@bufbuild/protobuf": "^2.11",
 		"@google/genai": "^1.43",
-		"@oh-my-pi/pi-utils": "13.14.0",
+		"@oh-my-pi/pi-utils": "13.15.2",
 		"@sinclair/typebox": "^0.34",
 		"@smithy/node-http-handler": "^4.4",
 		"ajv": "^8.18",

package/src/index.ts CHANGED Viewed

@@ -16,6 +16,7 @@ export * from "./providers/google";
 export * from "./providers/google-gemini-cli";
 export * from "./providers/google-vertex";
 export * from "./providers/kimi";
+export type { OpenAICodexResponsesOptions } from "./providers/openai-codex-responses";
 export * from "./providers/openai-completions";
 export * from "./providers/openai-responses";
 export * from "./providers/synthetic";

package/src/model-thinking.ts CHANGED Viewed

@@ -40,7 +40,13 @@ type SemVer = {
 type GeminiKind = "pro" | "flash";
 type AnthropicKind = "opus" | "sonnet";
-type OpenAIVariant = "base" | "codex" | "codex-max" | "codex-mini" | "codex-spark" | "max" | "nano";
+type OpenAIVariant = "base" | "codex" | "codex-max" | "codex-mini" | "codex-spark" | "mini" | "max" | "nano";
+const CODEX_GPT_5_4_PRIORITY_BY_VARIANT: Partial<Record<OpenAIVariant, number>> = {
+	base: 0,
+	mini: 1,
+	nano: 2,
+};
 interface GeminiModel {
 	family: "gemini";
@@ -299,9 +305,17 @@ function applyOpenAICatalogPolicy(model: ApiModel<Api>, parsedModel: OpenAIModel
 		return;
 	}
 	// GPT-5.4 mini/nano use plain OpenAI IDs on the Codex transport, but Codex still
-	// enforces the lower prompt budget for these variants.
-	if (model.api === "openai-codex-responses" && (model.id === "gpt-5.4-mini" || model.id === "gpt-5.4-nano")) {
-		model.contextWindow = 272000;
+	// enforces the lower prompt budget for these variants. Codex discovery can also
+	// report inconsistent priorities for the GPT-5.4 family, so normalize by parsed
+	// variant instead of special-casing raw model ids.
+	if (model.api === "openai-codex-responses" && semverEqual(parsedModel.version, "5.4")) {
+		const normalizedPriority = CODEX_GPT_5_4_PRIORITY_BY_VARIANT[parsedModel.variant];
+		if (normalizedPriority !== undefined) {
+			model.priority = normalizedPriority;
+		}
+		if (parsedModel.variant === "mini" || parsedModel.variant === "nano") {
+			model.contextWindow = 272000;
+		}
 	}
 }
@@ -489,7 +503,7 @@ function parseAnthropicModel(modelId: string): AnthropicModel | null {
 }
 function parseOpenAIModel(modelId: string): OpenAIModel | null {
-	const match = /gpt-(\d+(?:\.\d+){0,2})(?:-(codex-spark|codex-mini|codex-max|codex|max|nano))?\b/.exec(modelId);
+	const match = /gpt-(\d+(?:\.\d+){0,2})(?:-(codex-spark|codex-mini|codex-max|codex|mini|max|nano))?\b/.exec(modelId);
 	if (!match) {
 		return null;
 	}