npm - gsd-pi - Versions diffs - 2.42.0-dev.97e9e30 → 2.42.0-dev.eedc83f - Mend

gsd-pi 2.42.0-dev.97e9e30 → 2.42.0-dev.eedc83f

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

package/packages/pi-ai/scripts/generate-models.ts ADDED Viewed

@@ -0,0 +1,1543 @@
+#!/usr/bin/env tsx
+import { writeFileSync } from "fs";
+import { join, dirname } from "path";
+import { fileURLToPath } from "url";
+import { Api, KnownProvider, Model } from "../src/types.js";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+const packageRoot = join(__dirname, "..");
+interface ModelsDevModel {
+	id: string;
+	name: string;
+	tool_call?: boolean;
+	reasoning?: boolean;
+	limit?: {
+		context?: number;
+		output?: number;
+	};
+	cost?: {
+		input?: number;
+		output?: number;
+		cache_read?: number;
+		cache_write?: number;
+	};
+	modalities?: {
+		input?: string[];
+	};
+	provider?: {
+		npm?: string;
+	};
+}
+interface AiGatewayModel {
+	id: string;
+	name?: string;
+	context_window?: number;
+	max_tokens?: number;
+	tags?: string[];
+	pricing?: {
+		input?: string | number;
+		output?: string | number;
+		input_cache_read?: string | number;
+		input_cache_write?: string | number;
+	};
+}
+const COPILOT_STATIC_HEADERS = {
+	"User-Agent": "GitHubCopilotChat/0.35.0",
+	"Editor-Version": "vscode/1.107.0",
+	"Editor-Plugin-Version": "copilot-chat/0.35.0",
+	"Copilot-Integration-Id": "vscode-chat",
+} as const;
+const AI_GATEWAY_MODELS_URL = "https://ai-gateway.vercel.sh/v1";
+const AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh";
+async function fetchOpenRouterModels(): Promise<Model<any>[]> {
+	try {
+		console.log("Fetching models from OpenRouter API...");
+		const response = await fetch("https://openrouter.ai/api/v1/models");
+		const data = await response.json();
+		const models: Model<any>[] = [];
+		for (const model of data.data) {
+			// Only include models that support tools
+			if (!model.supported_parameters?.includes("tools")) continue;
+			// Parse provider from model ID
+			let provider: KnownProvider = "openrouter";
+			let modelKey = model.id;
+			modelKey = model.id; // Keep full ID for OpenRouter
+			// Parse input modalities
+			const input: ("text" | "image")[] = ["text"];
+			if (model.architecture?.modality?.includes("image")) {
+				input.push("image");
+			}
+			// Convert pricing from $/token to $/million tokens
+			const inputCost = parseFloat(model.pricing?.prompt || "0") * 1_000_000;
+			const outputCost = parseFloat(model.pricing?.completion || "0") * 1_000_000;
+			const cacheReadCost = parseFloat(model.pricing?.input_cache_read || "0") * 1_000_000;
+			const cacheWriteCost = parseFloat(model.pricing?.input_cache_write || "0") * 1_000_000;
+			const normalizedModel: Model<any> = {
+				id: modelKey,
+				name: model.name,
+				api: "openai-completions",
+				baseUrl: "https://openrouter.ai/api/v1",
+				provider,
+				reasoning: model.supported_parameters?.includes("reasoning") || false,
+				input,
+				cost: {
+					input: inputCost,
+					output: outputCost,
+					cacheRead: cacheReadCost,
+					cacheWrite: cacheWriteCost,
+				},
+				contextWindow: model.context_length || 4096,
+				maxTokens: model.top_provider?.max_completion_tokens || 4096,
+			};
+			models.push(normalizedModel);
+		}
+		console.log(`Fetched ${models.length} tool-capable models from OpenRouter`);
+		return models;
+	} catch (error) {
+		console.error("Failed to fetch OpenRouter models:", error);
+		return [];
+	}
+}
+async function fetchAiGatewayModels(): Promise<Model<any>[]> {
+	try {
+		console.log("Fetching models from Vercel AI Gateway API...");
+		const response = await fetch(`${AI_GATEWAY_MODELS_URL}/models`);
+		const data = await response.json();
+		const models: Model<any>[] = [];
+		const toNumber = (value: string | number | undefined): number => {
+			if (typeof value === "number") {
+				return Number.isFinite(value) ? value : 0;
+			}
+			const parsed = parseFloat(value ?? "0");
+			return Number.isFinite(parsed) ? parsed : 0;
+		};
+		const items = Array.isArray(data.data) ? (data.data as AiGatewayModel[]) : [];
+		for (const model of items) {
+			const tags = Array.isArray(model.tags) ? model.tags : [];
+			// Only include models that support tools
+			if (!tags.includes("tool-use")) continue;
+			const input: ("text" | "image")[] = ["text"];
+			if (tags.includes("vision")) {
+				input.push("image");
+			}
+			const inputCost = toNumber(model.pricing?.input) * 1_000_000;
+			const outputCost = toNumber(model.pricing?.output) * 1_000_000;
+			const cacheReadCost = toNumber(model.pricing?.input_cache_read) * 1_000_000;
+			const cacheWriteCost = toNumber(model.pricing?.input_cache_write) * 1_000_000;
+			models.push({
+				id: model.id,
+				name: model.name || model.id,
+				api: "anthropic-messages",
+				baseUrl: AI_GATEWAY_BASE_URL,
+				provider: "vercel-ai-gateway",
+				reasoning: tags.includes("reasoning"),
+				input,
+				cost: {
+					input: inputCost,
+					output: outputCost,
+					cacheRead: cacheReadCost,
+					cacheWrite: cacheWriteCost,
+				},
+				contextWindow: model.context_window || 4096,
+				maxTokens: model.max_tokens || 4096,
+			});
+		}
+		console.log(`Fetched ${models.length} tool-capable models from Vercel AI Gateway`);
+		return models;
+	} catch (error) {
+		console.error("Failed to fetch Vercel AI Gateway models:", error);
+		return [];
+	}
+}
+async function loadModelsDevData(): Promise<Model<any>[]> {
+	try {
+		console.log("Fetching models from models.dev API...");
+		const response = await fetch("https://models.dev/api.json");
+		const data = await response.json();
+		const models: Model<any>[] = [];
+		// Process Amazon Bedrock models
+		if (data["amazon-bedrock"]?.models) {
+			for (const [modelId, model] of Object.entries(data["amazon-bedrock"].models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				let id = modelId;
+				if (id.startsWith("ai21.jamba")) {
+					// These models doesn't support tool use in streaming mode
+					continue;
+				}
+				if (id.startsWith("mistral.mistral-7b-instruct-v0")) {
+					// These models doesn't support system messages
+					continue;
+				}
+				models.push({
+					id,
+					name: m.name || id,
+					api: "bedrock-converse-stream" as const,
+					provider: "amazon-bedrock" as const,
+					baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+					reasoning: m.reasoning === true,
+					input: (m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"]) as ("text" | "image")[],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process Anthropic models
+		if (data.anthropic?.models) {
+			for (const [modelId, model] of Object.entries(data.anthropic.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "anthropic-messages",
+					provider: "anthropic",
+					baseUrl: "https://api.anthropic.com",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process Google models
+		if (data.google?.models) {
+			for (const [modelId, model] of Object.entries(data.google.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "google-generative-ai",
+					provider: "google",
+					baseUrl: "https://generativelanguage.googleapis.com/v1beta",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process OpenAI models
+		if (data.openai?.models) {
+			for (const [modelId, model] of Object.entries(data.openai.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "openai-responses",
+					provider: "openai",
+					baseUrl: "https://api.openai.com/v1",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process Groq models
+		if (data.groq?.models) {
+			for (const [modelId, model] of Object.entries(data.groq.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "openai-completions",
+					provider: "groq",
+					baseUrl: "https://api.groq.com/openai/v1",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process Cerebras models
+		if (data.cerebras?.models) {
+			for (const [modelId, model] of Object.entries(data.cerebras.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "openai-completions",
+					provider: "cerebras",
+					baseUrl: "https://api.cerebras.ai/v1",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process xAi models
+		if (data.xai?.models) {
+			for (const [modelId, model] of Object.entries(data.xai.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "openai-completions",
+					provider: "xai",
+					baseUrl: "https://api.x.ai/v1",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process zAi models
+		if (data.zai?.models) {
+			for (const [modelId, model] of Object.entries(data.zai.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				const supportsImage = m.modalities?.input?.includes("image")
+				models.push({
+				id: modelId,
+				name: m.name || modelId,
+				api: "openai-completions",
+				provider: "zai",
+				baseUrl: "https://api.z.ai/api/coding/paas/v4",
+				reasoning: m.reasoning === true,
+				input: supportsImage ? ["text", "image"] : ["text"],
+				cost: {
+					input: m.cost?.input || 0,
+					output: m.cost?.output || 0,
+					cacheRead: m.cost?.cache_read || 0,
+					cacheWrite: m.cost?.cache_write || 0,
+				},
+				compat: {
+					supportsDeveloperRole: false,
+					thinkingFormat: "zai",
+				},
+				contextWindow: m.limit?.context || 4096,
+				maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process Mistral models
+		if (data.mistral?.models) {
+			for (const [modelId, model] of Object.entries(data.mistral.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "mistral-conversations",
+					provider: "mistral",
+					baseUrl: "https://api.mistral.ai",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process Hugging Face models
+		if (data.huggingface?.models) {
+			for (const [modelId, model] of Object.entries(data.huggingface.models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "openai-completions",
+					provider: "huggingface",
+					baseUrl: "https://router.huggingface.co/v1",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					compat: {
+						supportsDeveloperRole: false,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process OpenCode models (Zen and Go)
+		// API mapping based on provider.npm field:
+		// - @ai-sdk/openai → openai-responses
+		// - @ai-sdk/anthropic → anthropic-messages
+		// - @ai-sdk/google → google-generative-ai
+		// - null/undefined/@ai-sdk/openai-compatible → openai-completions
+		const opencodeVariants = [
+			{ key: "opencode", provider: "opencode", basePath: "https://opencode.ai/zen" },
+			{ key: "opencode-go", provider: "opencode-go", basePath: "https://opencode.ai/zen/go" },
+		] as const;
+		for (const variant of opencodeVariants) {
+			if (!data[variant.key]?.models) continue;
+			for (const [modelId, model] of Object.entries(data[variant.key].models)) {
+				const m = model as ModelsDevModel & { status?: string };
+				if (m.tool_call !== true) continue;
+				if (m.status === "deprecated") continue;
+				const npm = m.provider?.npm;
+				let api: Api;
+				let baseUrl: string;
+				if (npm === "@ai-sdk/openai") {
+					api = "openai-responses";
+					baseUrl = `${variant.basePath}/v1`;
+				} else if (npm === "@ai-sdk/anthropic") {
+					api = "anthropic-messages";
+					// Anthropic SDK appends /v1/messages to baseURL
+					baseUrl = variant.basePath;
+				} else if (npm === "@ai-sdk/google") {
+					api = "google-generative-ai";
+					baseUrl = `${variant.basePath}/v1`;
+				} else {
+					// null, undefined, or @ai-sdk/openai-compatible
+					api = "openai-completions";
+					baseUrl = `${variant.basePath}/v1`;
+				}
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api,
+					provider: variant.provider,
+					baseUrl,
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		// Process GitHub Copilot models
+		if (data["github-copilot"]?.models) {
+			for (const [modelId, model] of Object.entries(data["github-copilot"].models)) {
+				const m = model as ModelsDevModel & { status?: string };
+				if (m.tool_call !== true) continue;
+				if (m.status === "deprecated") continue;
+				// Claude 4.x models route to Anthropic Messages API
+				const isCopilotClaude4 = /^claude-(haiku|sonnet|opus)-4([.\-]|$)/.test(modelId);
+				// gpt-5 models require responses API, others use completions
+				const needsResponsesApi = modelId.startsWith("gpt-5") || modelId.startsWith("oswe");
+				const api: Api = isCopilotClaude4
+					? "anthropic-messages"
+					: needsResponsesApi
+						? "openai-responses"
+						: "openai-completions";
+				const copilotModel: Model<any> = {
+					id: modelId,
+					name: m.name || modelId,
+					api,
+					provider: "github-copilot",
+					baseUrl: "https://api.individual.githubcopilot.com",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 128000,
+					maxTokens: m.limit?.output || 8192,
+					headers: { ...COPILOT_STATIC_HEADERS },
+					// compat only applies to openai-completions
+					...(api === "openai-completions" ? {
+						compat: {
+							supportsStore: false,
+							supportsDeveloperRole: false,
+							supportsReasoningEffort: false,
+						},
+					} : {}),
+				};
+				models.push(copilotModel);
+			}
+		}
+		// Process MiniMax models
+		const minimaxVariants = [
+			{ key: "minimax", provider: "minimax", baseUrl: "https://api.minimax.io/anthropic" },
+			{ key: "minimax-cn", provider: "minimax-cn", baseUrl: "https://api.minimaxi.com/anthropic" },
+		] as const;
+		for (const { key, provider, baseUrl } of minimaxVariants) {
+			if (data[key]?.models) {
+				for (const [modelId, model] of Object.entries(data[key].models)) {
+					const m = model as ModelsDevModel;
+					if (m.tool_call !== true) continue;
+					models.push({
+						id: modelId,
+						name: m.name || modelId,
+						api: "anthropic-messages",
+						provider,
+						// MiniMax's Anthropic-compatible API - SDK appends /v1/messages
+						baseUrl,
+						reasoning: m.reasoning === true,
+						input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+						cost: {
+							input: m.cost?.input || 0,
+							output: m.cost?.output || 0,
+							cacheRead: m.cost?.cache_read || 0,
+							cacheWrite: m.cost?.cache_write || 0,
+						},
+						contextWindow: m.limit?.context || 4096,
+						maxTokens: m.limit?.output || 4096,
+					});
+				}
+			}
+		}
+		// Process Kimi For Coding models
+		if (data["kimi-for-coding"]?.models) {
+			for (const [modelId, model] of Object.entries(data["kimi-for-coding"].models)) {
+				const m = model as ModelsDevModel;
+				if (m.tool_call !== true) continue;
+				models.push({
+					id: modelId,
+					name: m.name || modelId,
+					api: "anthropic-messages",
+					provider: "kimi-coding",
+					// Kimi For Coding's Anthropic-compatible API - SDK appends /v1/messages
+					baseUrl: "https://api.kimi.com/coding",
+					reasoning: m.reasoning === true,
+					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+					cost: {
+						input: m.cost?.input || 0,
+						output: m.cost?.output || 0,
+						cacheRead: m.cost?.cache_read || 0,
+						cacheWrite: m.cost?.cache_write || 0,
+					},
+					contextWindow: m.limit?.context || 4096,
+					maxTokens: m.limit?.output || 4096,
+				});
+			}
+		}
+		console.log(`Loaded ${models.length} tool-capable models from models.dev`);
+		return models;
+	} catch (error) {
+		console.error("Failed to load models.dev data:", error);
+		return [];
+	}
+}
+async function generateModels() {
+	// Fetch models from both sources
+	// models.dev: Anthropic, Google, OpenAI, Groq, Cerebras
+	// OpenRouter: xAI and other providers (excluding Anthropic, Google, OpenAI)
+	// AI Gateway: OpenAI-compatible catalog with tool-capable models
+	const modelsDevModels = await loadModelsDevData();
+	const openRouterModels = await fetchOpenRouterModels();
+	const aiGatewayModels = await fetchAiGatewayModels();
+	// Combine models (models.dev has priority)
+	const allModels = [...modelsDevModels, ...openRouterModels, ...aiGatewayModels].filter(
+		(model) =>
+			!((model.provider === "opencode" || model.provider === "opencode-go") && model.id === "gpt-5.3-codex-spark"),
+	);
+	// Fix incorrect cache pricing for Claude Opus 4.5 from models.dev
+	// models.dev has 3x the correct pricing (1.5/18.75 instead of 0.5/6.25)
+	const opus45 = allModels.find(m => m.provider === "anthropic" && m.id === "claude-opus-4-5");
+	if (opus45) {
+		opus45.cost.cacheRead = 0.5;
+		opus45.cost.cacheWrite = 6.25;
+	}
+	// Temporary overrides until upstream model metadata is corrected.
+	for (const candidate of allModels) {
+		if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-opus-4-6-v1")) {
+			candidate.cost.cacheRead = 0.5;
+			candidate.cost.cacheWrite = 6.25;
+			candidate.contextWindow = 1000000;
+		}
+		if (candidate.provider === "amazon-bedrock" && candidate.id.includes("anthropic.claude-sonnet-4-6")) {
+			candidate.contextWindow = 1000000;
+		}
+		if (
+			(candidate.provider === "anthropic" ||
+				candidate.provider === "opencode" ||
+				candidate.provider === "opencode-go") &&
+			(candidate.id === "claude-opus-4-6" ||
+				candidate.id === "claude-sonnet-4-6" ||
+				candidate.id === "claude-opus-4.6" ||
+				candidate.id === "claude-sonnet-4.6")
+		) {
+			candidate.contextWindow = 1000000;
+		}
+		if (
+			candidate.provider === "google-antigravity" &&
+			(candidate.id === "claude-opus-4-6-thinking" || candidate.id === "claude-sonnet-4-6")
+		) {
+			candidate.contextWindow = 1000000;
+		}
+		// OpenCode variants list Claude Sonnet 4/4.5 with 1M context, actual limit is 200K
+		if (
+			(candidate.provider === "opencode" || candidate.provider === "opencode-go") &&
+			(candidate.id === "claude-sonnet-4-5" || candidate.id === "claude-sonnet-4")
+		) {
+			candidate.contextWindow = 200000;
+		}
+		if ((candidate.provider === "opencode" || candidate.provider === "opencode-go") && candidate.id === "gpt-5.4") {
+			candidate.contextWindow = 272000;
+			candidate.maxTokens = 128000;
+		}
+		if (candidate.provider === "openai" && candidate.id === "gpt-5.4") {
+			candidate.contextWindow = 272000;
+			candidate.maxTokens = 128000;
+		}
+		// Keep selected OpenRouter model metadata stable until upstream settles.
+		if (candidate.provider === "openrouter" && candidate.id === "moonshotai/kimi-k2.5") {
+			candidate.cost.input = 0.41;
+			candidate.cost.output = 2.06;
+			candidate.cost.cacheRead = 0.07;
+			candidate.maxTokens = 4096;
+		}
+		if (candidate.provider === "openrouter" && candidate.id === "z-ai/glm-5") {
+			candidate.cost.input = 0.6;
+			candidate.cost.output = 1.9;
+			candidate.cost.cacheRead = 0.119;
+		}
+	}
+	// Add missing EU Opus 4.6 profile
+	if (!allModels.some((m) => m.provider === "amazon-bedrock" && m.id === "eu.anthropic.claude-opus-4-6-v1")) {
+		allModels.push({
+			id: "eu.anthropic.claude-opus-4-6-v1",
+			name: "Claude Opus 4.6 (EU)",
+			api: "bedrock-converse-stream",
+			provider: "amazon-bedrock",
+			baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 5,
+				output: 25,
+				cacheRead: 0.5,
+				cacheWrite: 6.25,
+			},
+			contextWindow: 1000000,
+			maxTokens: 128000,
+		});
+	}
+	// Add missing Claude Opus 4.6
+	if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-opus-4-6")) {
+		allModels.push({
+			id: "claude-opus-4-6",
+			name: "Claude Opus 4.6",
+			api: "anthropic-messages",
+			baseUrl: "https://api.anthropic.com",
+			provider: "anthropic",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 5,
+				output: 25,
+				cacheRead: 0.5,
+				cacheWrite: 6.25,
+			},
+			contextWindow: 1000000,
+			maxTokens: 128000,
+		});
+	}
+	// Add missing Claude Sonnet 4.6
+	if (!allModels.some(m => m.provider === "anthropic" && m.id === "claude-sonnet-4-6")) {
+		allModels.push({
+			id: "claude-sonnet-4-6",
+			name: "Claude Sonnet 4.6",
+			api: "anthropic-messages",
+			baseUrl: "https://api.anthropic.com",
+			provider: "anthropic",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 3,
+				output: 15,
+				cacheRead: 0.3,
+				cacheWrite: 3.75,
+			},
+			contextWindow: 1000000,
+			maxTokens: 64000,
+		});
+	}
+	// Add missing Gemini 3.1 Flash Lite Preview until models.dev includes it.
+	if (!allModels.some((m) => m.provider === "google" && m.id === "gemini-3.1-flash-lite-preview")) {
+		allModels.push({
+			id: "gemini-3.1-flash-lite-preview",
+			name: "Gemini 3.1 Flash Lite Preview",
+			api: "google-generative-ai",
+			baseUrl: "https://generativelanguage.googleapis.com/v1beta",
+			provider: "google",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		});
+	}
+	// Add missing gpt models
+	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5-chat-latest")) {
+		allModels.push({
+			id: "gpt-5-chat-latest",
+			name: "GPT-5 Chat Latest",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		});
+	}
+	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex")) {
+		allModels.push({
+			id: "gpt-5.1-codex",
+			name: "GPT-5.1 Codex",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 5,
+				cacheRead: 0.125,
+				cacheWrite: 1.25,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		});
+	}
+	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex-max")) {
+		allModels.push({
+			id: "gpt-5.1-codex-max",
+			name: "GPT-5.1 Codex Max",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		});
+	}
+	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.3-codex-spark")) {
+		allModels.push({
+			id: "gpt-5.3-codex-spark",
+			name: "GPT-5.3 Codex Spark",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		});
+	}
+	// Add missing GitHub Copilot GPT-5.3 models until models.dev includes them.
+	const copilotBaseModel = allModels.find(
+		(m) => m.provider === "github-copilot" && m.id === "gpt-5.2-codex",
+	);
+	if (copilotBaseModel) {
+		if (!allModels.some((m) => m.provider === "github-copilot" && m.id === "gpt-5.3-codex")) {
+			allModels.push({
+				...copilotBaseModel,
+				id: "gpt-5.3-codex",
+				name: "GPT-5.3 Codex",
+			});
+		}
+	}
+	if (!allModels.some((m) => m.provider === "openai" && m.id === "gpt-5.4")) {
+		allModels.push({
+			id: "gpt-5.4",
+			name: "GPT-5.4",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 2.5,
+				output: 15,
+				cacheRead: 0.25,
+				cacheWrite: 0,
+			},
+			contextWindow: 272000,
+			maxTokens: 128000,
+		});
+	}
+	// OpenAI Codex (ChatGPT OAuth) models
+	// NOTE: These are not fetched from models.dev; we keep a small, explicit list to avoid aliases.
+	// Context window is based on observed server limits (400s above ~272k), not marketing numbers.
+	const CODEX_BASE_URL = "https://chatgpt.com/backend-api";
+	const CODEX_CONTEXT = 272000;
+	const CODEX_MAX_TOKENS = 128000;
+	const codexModels: Model<"openai-codex-responses">[] = [
+		{
+			id: "gpt-5.1",
+			name: "GPT-5.1",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.1-codex-max",
+			name: "GPT-5.1 Codex Max",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.1-codex-mini",
+			name: "GPT-5.1 Codex Mini",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.25, output: 2, cacheRead: 0.025, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.2",
+			name: "GPT-5.2",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.2-codex",
+			name: "GPT-5.2 Codex",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.3-codex",
+			name: "GPT-5.3 Codex",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.4",
+			name: "GPT-5.4",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 },
+			contextWindow: CODEX_CONTEXT,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+		{
+			id: "gpt-5.3-codex-spark",
+			name: "GPT-5.3 Codex Spark",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: CODEX_BASE_URL,
+			reasoning: true,
+			input: ["text"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 128000,
+			maxTokens: CODEX_MAX_TOKENS,
+		},
+	];
+	allModels.push(...codexModels);
+	// Add missing Grok models
+	if (!allModels.some(m => m.provider === "xai" && m.id === "grok-code-fast-1")) {
+		allModels.push({
+			id: "grok-code-fast-1",
+			name: "Grok Code Fast 1",
+			api: "openai-completions",
+			baseUrl: "https://api.x.ai/v1",
+			provider: "xai",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.2,
+				output: 1.5,
+				cacheRead: 0.02,
+				cacheWrite: 0,
+			},
+			contextWindow: 32768,
+			maxTokens: 8192,
+		});
+	}
+	// Add "auto" alias for openrouter/auto
+	if (!allModels.some(m => m.provider === "openrouter" && m.id === "auto")) {
+		allModels.push({
+			id: "auto",
+			name: "Auto",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				// we dont know about the costs because OpenRouter auto routes to different models
+				// and then charges you for the underlying used model
+				input:0,
+				output:0,
+				cacheRead:0,
+				cacheWrite:0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 30000,
+		});
+	}
+	// Google Cloud Code Assist models (Gemini CLI)
+	// Uses production endpoint, standard Gemini models only
+	const CLOUD_CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com";
+	const cloudCodeAssistModels: Model<"google-gemini-cli">[] = [
+		{
+			id: "gemini-2.5-pro",
+			name: "Gemini 2.5 Pro (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-2.5-flash",
+			name: "Gemini 2.5 Flash (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-2.0-flash",
+			name: "Gemini 2.0 Flash (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 8192,
+		},
+		{
+			id: "gemini-3-pro-preview",
+			name: "Gemini 3 Pro Preview (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-3-flash-preview",
+			name: "Gemini 3 Flash Preview (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-3.1-pro-preview",
+			name: "Gemini 3.1 Pro Preview (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+	];
+	allModels.push(...cloudCodeAssistModels);
+	// Antigravity models (Gemini 3, Claude, GPT-OSS via Google Cloud)
+	// Uses sandbox endpoint and different OAuth credentials for access to additional models
+	const ANTIGRAVITY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
+	const antigravityModels: Model<"google-gemini-cli">[] = [
+		{
+			id: "gemini-3.1-pro-high",
+			name: "Gemini 3.1 Pro High (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			// the Model type doesn't seem to support having extended-context costs, so I'm just using the pricing for <200k input
+			cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 2.375 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-3.1-pro-low",
+			name: "Gemini 3.1 Pro Low (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			// the Model type doesn't seem to support having extended-context costs, so I'm just using the pricing for <200k input
+			cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 2.375 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-3-flash",
+			name: "Gemini 3 Flash (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.5, output: 3, cacheRead: 0.5, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "claude-sonnet-4-5",
+			name: "Claude Sonnet 4.5 (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
+			contextWindow: 200000,
+			maxTokens: 64000,
+		},
+		{
+			id: "claude-sonnet-4-5-thinking",
+			name: "Claude Sonnet 4.5 Thinking (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
+			contextWindow: 200000,
+			maxTokens: 64000,
+		},
+		{
+			id: "claude-opus-4-5-thinking",
+			name: "Claude Opus 4.5 Thinking (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
+			contextWindow: 200000,
+			maxTokens: 64000,
+		},
+		{
+			id: "claude-opus-4-6-thinking",
+			name: "Claude Opus 4.6 Thinking (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
+			contextWindow: 200000,
+			maxTokens: 128000,
+		},
+		{
+			id: "claude-sonnet-4-6",
+			name: "Claude Sonnet 4.6 (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
+			contextWindow: 200000,
+			maxTokens: 64000,
+		},
+		{
+			id: "gpt-oss-120b-medium",
+			name: "GPT-OSS 120B Medium (Antigravity)",
+			api: "google-gemini-cli",
+			provider: "google-antigravity",
+			baseUrl: ANTIGRAVITY_ENDPOINT,
+			reasoning: false,
+			input: ["text"],
+			cost: { input: 0.09, output: 0.36, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 131072,
+			maxTokens: 32768,
+		},
+	];
+	allModels.push(...antigravityModels);
+	const VERTEX_BASE_URL = "https://{location}-aiplatform.googleapis.com";
+	const vertexModels: Model<"google-vertex">[] = [
+		{
+			id: "gemini-3-pro-preview",
+			name: "Gemini 3 Pro Preview (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
+			contextWindow: 1000000,
+			maxTokens: 64000,
+		},
+		{
+			id: "gemini-3.1-pro-preview",
+			name: "Gemini 3.1 Pro Preview (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-3-flash-preview",
+			name: "Gemini 3 Flash Preview (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-2.0-flash",
+			name: "Gemini 2.0 Flash (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 0.15, output: 0.6, cacheRead: 0.0375, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 8192,
+		},
+		{
+			id: "gemini-2.0-flash-lite",
+			name: "Gemini 2.0 Flash Lite (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-2.5-pro",
+			name: "Gemini 2.5 Pro (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-2.5-flash",
+			name: "Gemini 2.5 Flash (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.3, output: 2.5, cacheRead: 0.03, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-2.5-flash-lite-preview-09-2025",
+			name: "Gemini 2.5 Flash Lite Preview 09-25 (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-2.5-flash-lite",
+			name: "Gemini 2.5 Flash Lite (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0.1, output: 0.4, cacheRead: 0.01, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		},
+		{
+			id: "gemini-1.5-pro",
+			name: "Gemini 1.5 Pro (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 1.25, output: 5, cacheRead: 0.3125, cacheWrite: 0 },
+			contextWindow: 1000000,
+			maxTokens: 8192,
+		},
+		{
+			id: "gemini-1.5-flash",
+			name: "Gemini 1.5 Flash (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 },
+			contextWindow: 1000000,
+			maxTokens: 8192,
+		},
+		{
+			id: "gemini-1.5-flash-8b",
+			name: "Gemini 1.5 Flash-8B (Vertex)",
+			api: "google-vertex",
+			provider: "google-vertex",
+			baseUrl: VERTEX_BASE_URL,
+			reasoning: false,
+			input: ["text", "image"],
+			cost: { input: 0.0375, output: 0.15, cacheRead: 0.01, cacheWrite: 0 },
+			contextWindow: 1000000,
+			maxTokens: 8192,
+		},
+	];
+	allModels.push(...vertexModels);
+	// Kimi For Coding models (Moonshot AI's Anthropic-compatible coding API)
+	// Static fallback in case models.dev doesn't have them yet
+	const KIMI_CODING_BASE_URL = "https://api.kimi.com/coding";
+	const kimiCodingModels: Model<"anthropic-messages">[] = [
+		{
+			id: "kimi-k2-thinking",
+			name: "Kimi K2 Thinking",
+			api: "anthropic-messages",
+			provider: "kimi-coding",
+			baseUrl: KIMI_CODING_BASE_URL,
+			reasoning: true,
+			input: ["text"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 262144,
+			maxTokens: 32768,
+		},
+		{
+			id: "k2p5",
+			name: "Kimi K2.5",
+			api: "anthropic-messages",
+			provider: "kimi-coding",
+			baseUrl: KIMI_CODING_BASE_URL,
+			reasoning: true,
+			input: ["text"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 262144,
+			maxTokens: 32768,
+		},
+	];
+	// Only add if not already present from models.dev
+	for (const model of kimiCodingModels) {
+		if (!allModels.some(m => m.provider === "kimi-coding" && m.id === model.id)) {
+			allModels.push(model);
+		}
+	}
+	const azureOpenAiModels: Model<Api>[] = allModels
+		.filter((model) => model.provider === "openai" && model.api === "openai-responses")
+		.map((model) => ({
+			...model,
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+		}));
+	allModels.push(...azureOpenAiModels);
+	// Group by provider and deduplicate by model ID
+	const providers: Record<string, Record<string, Model<any>>> = {};
+	for (const model of allModels) {
+		if (!providers[model.provider]) {
+			providers[model.provider] = {};
+		}
+		// Use model ID as key to automatically deduplicate
+		// Only add if not already present (models.dev takes priority over OpenRouter)
+		if (!providers[model.provider][model.id]) {
+			providers[model.provider][model.id] = model;
+		}
+	}
+	// Generate TypeScript file
+	let output = `// This file is auto-generated by scripts/generate-models.ts
+// Do not edit manually - run 'npm run generate-models' to update
+import type { Model } from "./types.js";
+export const MODELS = {
+`;
+	// Generate provider sections (sorted for deterministic output)
+	const sortedProviderIds = Object.keys(providers).sort();
+	for (const providerId of sortedProviderIds) {
+		const models = providers[providerId];
+		output += `\t${JSON.stringify(providerId)}: {\n`;
+		const sortedModelIds = Object.keys(models).sort();
+		for (const modelId of sortedModelIds) {
+			const model = models[modelId];
+			output += `\t\t"${model.id}": {\n`;
+			output += `\t\t\tid: "${model.id}",\n`;
+			output += `\t\t\tname: "${model.name}",\n`;
+			output += `\t\t\tapi: "${model.api}",\n`;
+			output += `\t\t\tprovider: "${model.provider}",\n`;
+			if (model.baseUrl !== undefined) {
+				output += `\t\t\tbaseUrl: "${model.baseUrl}",\n`;
+			}
+			if (model.headers) {
+				output += `\t\t\theaders: ${JSON.stringify(model.headers)},\n`;
+			}
+			if (model.compat) {
+				output += `			compat: ${JSON.stringify(model.compat)},
+`;
+			}
+			output += `\t\t\treasoning: ${model.reasoning},\n`;
+			output += `\t\t\tinput: [${model.input.map(i => `"${i}"`).join(", ")}],\n`;
+			output += `\t\t\tcost: {\n`;
+			output += `\t\t\t\tinput: ${model.cost.input},\n`;
+			output += `\t\t\t\toutput: ${model.cost.output},\n`;
+			output += `\t\t\t\tcacheRead: ${model.cost.cacheRead},\n`;
+			output += `\t\t\t\tcacheWrite: ${model.cost.cacheWrite},\n`;
+			output += `\t\t\t},\n`;
+			output += `\t\t\tcontextWindow: ${model.contextWindow},\n`;
+			output += `\t\t\tmaxTokens: ${model.maxTokens},\n`;
+			output += `\t\t} satisfies Model<"${model.api}">,\n`;
+		}
+		output += `\t},\n`;
+	}
+	output += `} as const;
+`;
+	// Write file
+	writeFileSync(join(packageRoot, "src/models.generated.ts"), output);
+	console.log("Generated src/models.generated.ts");
+	// Print statistics
+	const totalModels = allModels.length;
+	const reasoningModels = allModels.filter(m => m.reasoning).length;
+	console.log(`\nModel Statistics:`);
+	console.log(`  Total tool-capable models: ${totalModels}`);
+	console.log(`  Reasoning-capable models: ${reasoningModels}`);
+	for (const [provider, models] of Object.entries(providers)) {
+		console.log(`  ${provider}: ${Object.keys(models).length} models`);
+	}
+}
+// Run the generator
+generateModels().catch(console.error);