npm - @oh-my-pi/pi-ai - Versions diffs - 11.6.1 → 11.7.1 - Mend

@oh-my-pi/pi-ai 11.6.1 → 11.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +2 -2
package/src/models.generated.ts +193 -124

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@oh-my-pi/pi-ai",
-	"version": "11.6.1",
+	"version": "11.7.1",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./src/index.ts",
@@ -63,7 +63,7 @@
 		"@connectrpc/connect-node": "^2.1.1",
 		"@google/genai": "^1.39.0",
 		"@mistralai/mistralai": "^1.13.0",
-		"@oh-my-pi/pi-utils": "11.6.1",
+		"@oh-my-pi/pi-utils": "11.7.1",
 		"@sinclair/typebox": "^0.34.48",
 		"@smithy/node-http-handler": "^4.4.9",
 		"ajv": "^8.17.1",

package/src/models.generated.ts CHANGED Viewed

@@ -107,8 +107,8 @@ export const MODELS = {
 			contextWindow: 200000,
 			maxTokens: 4096,
 		} satisfies Model<"bedrock-converse-stream">,
-		"anthropic.claude-opus-4-6-v1:0": {
-			id: "anthropic.claude-opus-4-6-v1:0",
+		"anthropic.claude-opus-4-6-v1": {
+			id: "anthropic.claude-opus-4-6-v1",
 			name: "Claude Opus 4.6",
 			api: "bedrock-converse-stream",
 			provider: "amazon-bedrock",
@@ -121,7 +121,7 @@ export const MODELS = {
 				cacheRead: 0.5,
 				cacheWrite: 6.25,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 128000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"cohere.command-r-plus-v1:0": {
@@ -209,8 +209,8 @@ export const MODELS = {
 			contextWindow: 200000,
 			maxTokens: 64000,
 		} satisfies Model<"bedrock-converse-stream">,
-		"eu.anthropic.claude-opus-4-6-v1:0": {
-			id: "eu.anthropic.claude-opus-4-6-v1:0",
+		"eu.anthropic.claude-opus-4-6-v1": {
+			id: "eu.anthropic.claude-opus-4-6-v1",
 			name: "Claude Opus 4.6 (EU)",
 			api: "bedrock-converse-stream",
 			provider: "amazon-bedrock",
@@ -220,10 +220,10 @@ export const MODELS = {
 			cost: {
 				input: 5,
 				output: 25,
-				cacheRead: 1.5,
-				cacheWrite: 18.75,
+				cacheRead: 0.5,
+				cacheWrite: 6.25,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 128000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"eu.anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -311,8 +311,8 @@ export const MODELS = {
 			contextWindow: 200000,
 			maxTokens: 64000,
 		} satisfies Model<"bedrock-converse-stream">,
-		"global.anthropic.claude-opus-4-6-v1:0": {
-			id: "global.anthropic.claude-opus-4-6-v1:0",
+		"global.anthropic.claude-opus-4-6-v1": {
+			id: "global.anthropic.claude-opus-4-6-v1",
 			name: "Claude Opus 4.6 (Global)",
 			api: "bedrock-converse-stream",
 			provider: "amazon-bedrock",
@@ -325,7 +325,7 @@ export const MODELS = {
 				cacheRead: 0.5,
 				cacheWrite: 6.25,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 128000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"global.anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -900,14 +900,14 @@ export const MODELS = {
 			cost: {
 				input: 5,
 				output: 25,
-				cacheRead: 1.5,
-				cacheWrite: 18.75,
+				cacheRead: 0.5,
+				cacheWrite: 6.25,
 			},
 			contextWindow: 200000,
 			maxTokens: 64000,
 		} satisfies Model<"bedrock-converse-stream">,
-		"us.anthropic.claude-opus-4-6-v1:0": {
-			id: "us.anthropic.claude-opus-4-6-v1:0",
+		"us.anthropic.claude-opus-4-6-v1": {
+			id: "us.anthropic.claude-opus-4-6-v1",
 			name: "Claude Opus 4.6 (US)",
 			api: "bedrock-converse-stream",
 			provider: "amazon-bedrock",
@@ -917,10 +917,10 @@ export const MODELS = {
 			cost: {
 				input: 5,
 				output: 25,
-				cacheRead: 1.5,
-				cacheWrite: 18.75,
+				cacheRead: 0.5,
+				cacheWrite: 6.25,
 			},
-			contextWindow: 200000,
+			contextWindow: 1000000,
 			maxTokens: 128000,
 		} satisfies Model<"bedrock-converse-stream">,
 		"us.anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -1398,23 +1398,6 @@ export const MODELS = {
 				cacheRead: 0.5,
 				cacheWrite: 6.25,
 			},
-			contextWindow: 1000000,
-			maxTokens: 128000,
-		} satisfies Model<"anthropic-messages">,
-		"claude-opus-4-6-20260205": {
-			id: "claude-opus-4-6-20260205",
-			name: "Claude Opus 4.6",
-			api: "anthropic-messages",
-			provider: "anthropic",
-			baseUrl: "https://api.anthropic.com",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 5,
-				output: 25,
-				cacheRead: 0.5,
-				cacheWrite: 6.25,
-			},
 			contextWindow: 200000,
 			maxTokens: 128000,
 		} satisfies Model<"anthropic-messages">,
@@ -1819,7 +1802,7 @@ export const MODELS = {
 				cacheWrite: 0,
 			},
 			contextWindow: 128000,
-			maxTokens: 16000,
+			maxTokens: 64000,
 		} satisfies Model<"openai-completions">,
 		"claude-sonnet-4": {
 			id: "claude-sonnet-4",
@@ -2480,17 +2463,34 @@ export const MODELS = {
 	"google-antigravity": {
 		"claude-opus-4-5-thinking": {
 			id: "claude-opus-4-5-thinking",
-			name: "Claude Opus 4.5 Thinking (Antigravity)",
+			name: "Claude Opus 4.5 (Thinking) (Antigravity)",
 			api: "google-gemini-cli",
 			provider: "google-antigravity",
 			baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 5,
-				output: 25,
-				cacheRead: 0.5,
-				cacheWrite: 6.25,
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 64000,
+		} satisfies Model<"google-gemini-cli">,
+		"claude-opus-4-6-thinking": {
+			id: "claude-opus-4-6-thinking",
+			name: "Claude Opus 4.6 (Thinking) (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
 			},
 			contextWindow: 200000,
 			maxTokens: 64000,
@@ -2504,31 +2504,82 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text", "image"],
 			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0.3,
-				cacheWrite: 3.75,
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
 			},
 			contextWindow: 200000,
 			maxTokens: 64000,
 		} satisfies Model<"google-gemini-cli">,
 		"claude-sonnet-4-5-thinking": {
 			id: "claude-sonnet-4-5-thinking",
-			name: "Claude Sonnet 4.5 Thinking (Antigravity)",
+			name: "Claude Sonnet 4.5 (Thinking) (Antigravity)",
 			api: "google-gemini-cli",
 			provider: "google-antigravity",
 			baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0.3,
-				cacheWrite: 3.75,
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
 			},
 			contextWindow: 200000,
 			maxTokens: 64000,
 		} satisfies Model<"google-gemini-cli">,
+		"gemini-2.5-flash": {
+			id: "gemini-2.5-flash",
+			name: "Gemini 2.5 Flash (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		} satisfies Model<"google-gemini-cli">,
+		"gemini-2.5-flash-thinking": {
+			id: "gemini-2.5-flash-thinking",
+			name: "Gemini 2.5 Flash (Thinking) (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		} satisfies Model<"google-gemini-cli">,
+		"gemini-2.5-pro": {
+			id: "gemini-2.5-pro",
+			name: "Gemini 2.5 Pro (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		} satisfies Model<"google-gemini-cli">,
 		"gemini-3-flash": {
 			id: "gemini-3-flash",
 			name: "Gemini 3 Flash (Antigravity)",
@@ -2538,59 +2589,59 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 0.5,
-				output: 3,
-				cacheRead: 0.5,
+				input: 0,
+				output: 0,
+				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 1048576,
-			maxTokens: 65535,
+			maxTokens: 65536,
 		} satisfies Model<"google-gemini-cli">,
 		"gemini-3-pro-high": {
 			id: "gemini-3-pro-high",
-			name: "Gemini 3 Pro High (Antigravity)",
+			name: "Gemini 3 Pro (High) (Antigravity)",
 			api: "google-gemini-cli",
 			provider: "google-antigravity",
 			baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 2,
-				output: 12,
-				cacheRead: 0.2,
-				cacheWrite: 2.375,
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
 			},
 			contextWindow: 1048576,
 			maxTokens: 65535,
 		} satisfies Model<"google-gemini-cli">,
 		"gemini-3-pro-low": {
 			id: "gemini-3-pro-low",
-			name: "Gemini 3 Pro Low (Antigravity)",
+			name: "Gemini 3 Pro (Low) (Antigravity)",
 			api: "google-gemini-cli",
 			provider: "google-antigravity",
 			baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
-				input: 2,
-				output: 12,
-				cacheRead: 0.2,
-				cacheWrite: 2.375,
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
 			},
 			contextWindow: 1048576,
 			maxTokens: 65535,
 		} satisfies Model<"google-gemini-cli">,
 		"gpt-oss-120b-medium": {
 			id: "gpt-oss-120b-medium",
-			name: "GPT-OSS 120B Medium (Antigravity)",
+			name: "GPT-OSS 120B (Medium) (Antigravity)",
 			api: "google-gemini-cli",
 			provider: "google-antigravity",
 			baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com",
-			reasoning: false,
+			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.09,
-				output: 0.36,
+				input: 0,
+				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -4909,8 +4960,8 @@ export const MODELS = {
 			input: ["text"],
 			cost: {
 				input: 0.09,
-				output: 0.39999999999999997,
-				cacheRead: 0,
+				output: 0.44999999999999996,
+				cacheRead: 0.09,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -5408,7 +5459,7 @@ export const MODELS = {
 			cost: {
 				input: 0.3,
 				output: 1.2,
-				cacheRead: 0,
+				cacheRead: 0.15,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
@@ -5425,7 +5476,7 @@ export const MODELS = {
 			cost: {
 				input: 0.19,
 				output: 0.87,
-				cacheRead: 0,
+				cacheRead: 0.095,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
@@ -5476,7 +5527,7 @@ export const MODELS = {
 			cost: {
 				input: 0.39999999999999997,
 				output: 1.75,
-				cacheRead: 0,
+				cacheRead: 0.19999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
@@ -5527,7 +5578,7 @@ export const MODELS = {
 			cost: {
 				input: 0.25,
 				output: 0.38,
-				cacheRead: 0,
+				cacheRead: 0.125,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
@@ -5748,7 +5799,7 @@ export const MODELS = {
 			cost: {
 				input: 0.04,
 				output: 0.15,
-				cacheRead: 0,
+				cacheRead: 0.02,
 				cacheWrite: 0,
 			},
 			contextWindow: 128000,
@@ -5848,12 +5899,12 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 3.5,
-				output: 3.5,
+				input: 4,
+				output: 4,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 10000,
+			contextWindow: 131000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3.1-70b-instruct": {
@@ -6037,7 +6088,7 @@ export const MODELS = {
 			cost: {
 				input: 0.049999999999999996,
 				output: 0.22,
-				cacheRead: 0,
+				cacheRead: 0.024999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -6307,13 +6358,13 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.03,
-				output: 0.11,
+				input: 0.049999999999999996,
+				output: 0.08,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 32768,
-			maxTokens: 32768,
+			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-small-3.1-24b-instruct": {
 			id: "mistralai/mistral-small-3.1-24b-instruct",
@@ -6326,7 +6377,7 @@ export const MODELS = {
 			cost: {
 				input: 0.03,
 				output: 0.11,
-				cacheRead: 0,
+				cacheRead: 0.015,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -6360,7 +6411,7 @@ export const MODELS = {
 			cost: {
 				input: 0.06,
 				output: 0.18,
-				cacheRead: 0,
+				cacheRead: 0.03,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -6513,7 +6564,7 @@ export const MODELS = {
 			cost: {
 				input: 0.39,
 				output: 1.9,
-				cacheRead: 0,
+				cacheRead: 0.195,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -6547,7 +6598,7 @@ export const MODELS = {
 			cost: {
 				input: 0.39999999999999997,
 				output: 1.75,
-				cacheRead: 0,
+				cacheRead: 0.19999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -6563,12 +6614,12 @@ export const MODELS = {
 			input: ["text", "image"],
 			cost: {
 				input: 0.44999999999999996,
-				output: 2.5,
-				cacheRead: 0,
+				output: 2.25,
+				cacheRead: 0.075,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
-			maxTokens: 65535,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"nex-agi/deepseek-v3.1-nex-n1": {
 			id: "nex-agi/deepseek-v3.1-nex-n1",
@@ -6598,7 +6649,7 @@ export const MODELS = {
 			cost: {
 				input: 0.02,
 				output: 0.09999999999999999,
-				cacheRead: 0,
+				cacheRead: 0.01,
 				cacheWrite: 0,
 			},
 			contextWindow: 32768,
@@ -6615,7 +6666,7 @@ export const MODELS = {
 			cost: {
 				input: 0.11,
 				output: 0.38,
-				cacheRead: 0,
+				cacheRead: 0.055,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -7395,13 +7446,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.02,
-				output: 0.09999999999999999,
+				input: 0.03,
+				output: 0.14,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 131072,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"openai/gpt-oss-20b:free": {
 			id: "openai/gpt-oss-20b:free",
@@ -7624,6 +7675,24 @@ export const MODELS = {
 			contextWindow: 200000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
+		"openrouter/pony-alpha": {
+			id: "openrouter/pony-alpha",
+			name: "Pony Alpha",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			compat: {"supportsToolChoice":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 131000,
+		} satisfies Model<"openai-completions">,
 		"prime-intellect/intellect-3": {
 			id: "prime-intellect/intellect-3",
 			name: "Prime Intellect: INTELLECT-3",
@@ -7788,7 +7857,7 @@ export const MODELS = {
 			cost: {
 				input: 0.049999999999999996,
 				output: 0.22,
-				cacheRead: 0,
+				cacheRead: 0.024999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 40960,
@@ -7839,7 +7908,7 @@ export const MODELS = {
 			cost: {
 				input: 0.11,
 				output: 0.6,
-				cacheRead: 0,
+				cacheRead: 0.055,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -7856,7 +7925,7 @@ export const MODELS = {
 			cost: {
 				input: 0.06,
 				output: 0.22,
-				cacheRead: 0,
+				cacheRead: 0.03,
 				cacheWrite: 0,
 			},
 			contextWindow: 40960,
@@ -7873,7 +7942,7 @@ export const MODELS = {
 			cost: {
 				input: 0.08,
 				output: 0.33,
-				cacheRead: 0,
+				cacheRead: 0.04,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -7907,7 +7976,7 @@ export const MODELS = {
 			cost: {
 				input: 0.08,
 				output: 0.24,
-				cacheRead: 0,
+				cacheRead: 0.04,
 				cacheWrite: 0,
 			},
 			contextWindow: 40960,
@@ -7957,12 +8026,12 @@ export const MODELS = {
 			input: ["text"],
 			cost: {
 				input: 0.22,
-				output: 0.95,
-				cacheRead: 0,
+				output: 1,
+				cacheRead: 0.022,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
-			maxTokens: 262144,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"qwen/qwen3-coder-30b-a3b-instruct": {
 			id: "qwen/qwen3-coder-30b-a3b-instruct",
@@ -8009,7 +8078,7 @@ export const MODELS = {
 			cost: {
 				input: 0.07,
 				output: 0.3,
-				cacheRead: 0,
+				cacheRead: 0.035,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -8038,7 +8107,7 @@ export const MODELS = {
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: true,
+			reasoning: false,
 			input: ["text"],
 			cost: {
 				input: 0.22,
@@ -8384,7 +8453,7 @@ export const MODELS = {
 			cost: {
 				input: 0.25,
 				output: 0.85,
-				cacheRead: 0,
+				cacheRead: 0.125,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
@@ -8401,7 +8470,7 @@ export const MODELS = {
 			cost: {
 				input: 0.25,
 				output: 0.85,
-				cacheRead: 0,
+				cacheRead: 0.125,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
@@ -8588,7 +8657,7 @@ export const MODELS = {
 			cost: {
 				input: 0.09,
 				output: 0.29,
-				cacheRead: 0,
+				cacheRead: 0.045,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
@@ -8622,7 +8691,7 @@ export const MODELS = {
 			cost: {
 				input: 0.35,
 				output: 1.55,
-				cacheRead: 0,
+				cacheRead: 0.175,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
@@ -8637,13 +8706,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.049999999999999996,
-				output: 0.22,
-				cacheRead: 0,
+				input: 0.13,
+				output: 0.85,
+				cacheRead: 0.024999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 131072,
+			maxTokens: 98304,
 		} satisfies Model<"openai-completions">,
 		"z-ai/glm-4.5-air:free": {
 			id: "z-ai/glm-4.5-air:free",
@@ -8690,7 +8759,7 @@ export const MODELS = {
 			cost: {
 				input: 0.35,
 				output: 1.5,
-				cacheRead: 0,
+				cacheRead: 0.175,
 				cacheWrite: 0,
 			},
 			contextWindow: 202752,
@@ -8741,7 +8810,7 @@ export const MODELS = {
 			cost: {
 				input: 0.39999999999999997,
 				output: 1.5,
-				cacheRead: 0,
+				cacheRead: 0.19999999999999998,
 				cacheWrite: 0,
 			},
 			contextWindow: 202752,
@@ -8756,13 +8825,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.07,
+				input: 0.06,
 				output: 0.39999999999999997,
-				cacheRead: 0.01,
+				cacheRead: 0.0100000002,
 				cacheWrite: 0,
 			},
-			contextWindow: 200000,
-			maxTokens: 131072,
+			contextWindow: 202752,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 	},
 	"vercel-ai-gateway": {
@@ -8860,8 +8929,8 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.38,
-				output: 1.53,
+				input: 0.39999999999999997,
+				output: 1.5999999999999999,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@@ -9625,13 +9694,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.28,
+				input: 0.3,
 				output: 1.2,
-				cacheRead: 0.14,
+				cacheRead: 0.15,
 				cacheWrite: 0,
 			},
-			contextWindow: 196608,
-			maxTokens: 196608,
+			contextWindow: 204800,
+			maxTokens: 131072,
 		} satisfies Model<"anthropic-messages">,
 		"minimax/minimax-m2.1-lightning": {
 			id: "minimax/minimax-m2.1-lightning",