npm - pi-free - Versions diffs - 2.0.15 → 2.1.1 - Mend

pi-free 2.0.15 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/CHANGELOG.md +100 -3
package/README.md +64 -79
package/banner.svg +21 -36
package/config.ts +123 -9
package/constants.ts +3 -9
package/index.ts +14 -15
package/lib/built-in-toggle.ts +29 -56
package/lib/json-persistence.ts +90 -22
package/lib/logger.ts +21 -12
package/lib/model-detection.ts +2 -12
package/lib/model-enhancer.ts +11 -2
package/lib/model-metadata.ts +387 -0
package/lib/open-browser.ts +74 -24
package/lib/paths.ts +90 -0
package/lib/probe-cache.ts +19 -19
package/lib/provider-cache.ts +74 -28
package/lib/provider-compat.ts +53 -37
package/lib/provider-probe.ts +188 -0
package/lib/registry.ts +1 -5
package/lib/session-start-metrics.ts +46 -0
package/lib/telemetry.ts +115 -86
package/lib/types.ts +22 -2
package/lib/util.ts +80 -21
package/package.json +7 -2
package/provider-failover/benchmark-lookup.ts +17 -5
package/provider-helper.ts +12 -27
package/providers/cline/cline-models.ts +7 -1
package/providers/cline/cline-xml-bridge.ts +1471 -0
package/providers/cline/cline.ts +67 -199
package/providers/codestral/codestral.ts +0 -11
package/providers/crofai/crofai.ts +6 -1
package/providers/deepinfra/deepinfra.ts +69 -2
package/providers/dynamic-built-in/index.ts +237 -22
package/providers/kilo/kilo-models.ts +3 -1
package/providers/kilo/kilo.ts +270 -60
package/providers/model-fetcher.ts +18 -55
package/providers/novita/novita.ts +69 -2
package/providers/ollama/ollama.ts +47 -36
package/providers/opencode-session.ts +67 -2
package/providers/routeway/routeway.ts +25 -17
package/providers/sambanova/sambanova.ts +67 -1
package/providers/together/together.ts +69 -2
package/providers/tokenrouter/tokenrouter.ts +634 -0
package/providers/zenmux/zenmux.ts +6 -1
package/scripts/check-extensions.mjs +32 -16
package/providers/nvidia/nvidia.ts +0 -510

package/lib/telemetry.ts CHANGED Viewed

@@ -7,10 +7,9 @@
  * Provides a real-world performance signal alongside static CI benchmarks.
  */
-import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
-import { homedir } from "node:os";
-import { join } from "node:path";
 import { createLogger } from "./logger.ts";
+import { resolveSafeDataFile } from "./paths.ts";
+import { createJSONStore } from "./json-persistence.ts";
 const _logger = createLogger("telemetry");
@@ -71,55 +70,44 @@ export interface TelemetryStore {
 // Constants
 // =============================================================================
-const TELEMETRY_DIR = join(homedir(), ".pi");
-const TELEMETRY_FILE = join(TELEMETRY_DIR, "free-telemetry.json");
+const TELEMETRY_FILE = resolveSafeDataFile(
+	process.env.PI_FREE_TELEMETRY_FILE,
+	"free-telemetry.json",
+);
 const MAX_RECENT_CALLS = 50;
-// In-flight tracking: keyed by "provider/model", value is start timestamp
+// In-flight tracking: keyed by "provider/model", value is start timestamp.
+// TTL: 1 hour — anything older is stale (the matching recordModelCall
+// never fired, e.g. the agent was killed mid-call) and gets reaped
+// on the next startModelCall/recordModelCall.
 const _inFlight = new Map<string, number>();
+const _IN_FLIGHT_TTL_MS = 60 * 60 * 1000;
-// =============================================================================
-// Storage
-// =============================================================================
-function ensureDir(): void {
-	if (!existsSync(TELEMETRY_DIR)) {
-		mkdirSync(TELEMETRY_DIR, { recursive: true });
-	}
-}
-function loadStore(): TelemetryStore {
-	try {
-		if (!existsSync(TELEMETRY_FILE)) {
-			return { models: {}, lastUpdated: Date.now() };
+function reapStaleInFlight(now: number): void {
+	for (const [key, start] of _inFlight) {
+		if (now - start > _IN_FLIGHT_TTL_MS) {
+			_inFlight.delete(key);
 		}
-		const raw = readFileSync(TELEMETRY_FILE, "utf-8");
-		return JSON.parse(raw) as TelemetryStore;
-	} catch (err) {
-		_logger.warn("Failed to load telemetry store, resetting", {
-			error: String(err),
-		});
-		return { models: {}, lastUpdated: Date.now() };
 	}
 }
-function saveStore(store: TelemetryStore): void {
-	try {
-		ensureDir();
-		store.lastUpdated = Date.now();
-		writeFileSync(TELEMETRY_FILE, JSON.stringify(store, null, 2), "utf-8");
-	} catch (err) {
-		_logger.warn("Failed to save telemetry store", {
-			error: String(err),
-		});
-	}
-}
+// =============================================================================
+// Storage
+// =============================================================================
+const _store = createJSONStore<TelemetryStore>(TELEMETRY_FILE, {
+	models: {},
+	lastUpdated: Date.now(),
+});
 // =============================================================================
 // Entry management
 // =============================================================================
-function deriveModelTelemetry(modelKey: string, entries: TelemetryEntry[]): ModelTelemetry {
+function deriveModelTelemetry(
+	_modelKey: string,
+	entries: TelemetryEntry[],
+): ModelTelemetry {
 	const recent = entries.slice(-MAX_RECENT_CALLS);
 	const totalCalls = entries.length;
 	const successCalls = entries.filter((e) => e.success).length;
@@ -134,12 +122,24 @@ function deriveModelTelemetry(modelKey: string, entries: TelemetryEntry[]): Mode
 			acc.totalCost += e.cost;
 			return acc;
 		},
-		{ totalTokens: 0, totalPromptTokens: 0, totalCompletionTokens: 0, totalLatencyMs: 0, totalCost: 0 },
+		{
+			totalTokens: 0,
+			totalPromptTokens: 0,
+			totalCompletionTokens: 0,
+			totalLatencyMs: 0,
+			totalCost: 0,
+		},
 	);
 	const totalSuccessEntries = entries.filter((e) => e.success);
-	const totalTokensFromSuccessful = totalSuccessEntries.reduce((s, e) => s + e.totalTokens, 0);
-	const totalLatencyFromSuccessful = totalSuccessEntries.reduce((s, e) => s + e.latencyMs, 0);
+	const totalTokensFromSuccessful = totalSuccessEntries.reduce(
+		(s, e) => s + e.totalTokens,
+		0,
+	);
+	const totalLatencyFromSuccessful = totalSuccessEntries.reduce(
+		(s, e) => s + e.latencyMs,
+		0,
+	);
 	return {
 		totalCalls,
@@ -150,31 +150,47 @@ function deriveModelTelemetry(modelKey: string, entries: TelemetryEntry[]): Mode
 		totalCompletionTokens: stats.totalCompletionTokens,
 		totalLatencyMs: stats.totalLatencyMs,
 		totalCost: stats.totalCost,
-		avgLatencyMs: totalSuccessEntries.length > 0
-			? Math.round(totalLatencyFromSuccessful / totalSuccessEntries.length)
-			: 0,
-		avgTokensPerSecond: totalLatencyFromSuccessful > 0
-			? parseFloat((totalTokensFromSuccessful / (totalLatencyFromSuccessful / 1000)).toFixed(1))
-			: 0,
-		successRate: totalCalls > 0
-			? parseFloat((successCalls / totalCalls * 100).toFixed(1))
-			: 0,
+		avgLatencyMs:
+			totalSuccessEntries.length > 0
+				? Math.round(totalLatencyFromSuccessful / totalSuccessEntries.length)
+				: 0,
+		avgTokensPerSecond:
+			totalLatencyFromSuccessful > 0
+				? parseFloat(
+						(
+							totalTokensFromSuccessful /
+							(totalLatencyFromSuccessful / 1000)
+						).toFixed(1),
+					)
+				: 0,
+		successRate:
+			totalCalls > 0
+				? parseFloat(((successCalls / totalCalls) * 100).toFixed(1))
+				: 0,
 		recentCalls: recent,
 	};
 }
-function addEntry(entry: TelemetryEntry): void {
-	const store = loadStore();
-	const modelKey = `${entry.provider}/${entry.model}`;
-	const existing: TelemetryEntry[] = store.models[modelKey]?.recentCalls ?? [];
-	existing.push(entry);
-	// Keep only last MAX_RECENT_CALLS * 2 in raw storage (we derive stats from these)
-	const pruned = existing.slice(-MAX_RECENT_CALLS * 2);
-	store.models[modelKey] = deriveModelTelemetry(modelKey, pruned);
-	saveStore(store);
+async function addEntry(entry: TelemetryEntry): Promise<void> {
+	await _store.update((store) => {
+		const modelKey = `${entry.provider}/${entry.model}`;
+		const existing: TelemetryEntry[] =
+			store.models[modelKey]?.recentCalls ?? [];
+		existing.push(entry);
+		// Keep only last MAX_RECENT_CALLS * 2 in raw storage (we derive stats from these)
+		const pruned = existing.slice(-MAX_RECENT_CALLS * 2);
+		return {
+			...store,
+			models: {
+				...store.models,
+				[modelKey]: deriveModelTelemetry(modelKey, pruned),
+			},
+			lastUpdated: Date.now(),
+		};
+	});
 }
 // =============================================================================
@@ -185,23 +201,27 @@ function addEntry(entry: TelemetryEntry): void {
  * Get telemetry for all tracked models.
  */
 export function getAllTelemetry(): Record<string, ModelTelemetry> {
-	const store = loadStore();
-	return store.models;
+	return _store.load().models;
 }
 /**
  * Get telemetry for a specific provider/model combination.
  */
-export function getModelTelemetry(provider: string, model: string): ModelTelemetry | null {
-	const store = loadStore();
-	return store.models[`${provider}/${model}`] ?? null;
+export function getModelTelemetry(
+	provider: string,
+	model: string,
+): ModelTelemetry | null {
+	return _store.load().models[`${provider}/${model}`] ?? null;
 }
 /**
  * Format a model's telemetry as a human-readable string (for status bar / /model list).
  * Returns undefined if no telemetry data is available.
  */
-export function formatModelTelemetry(provider: string, model: string): string | undefined {
+export function formatModelTelemetry(
+	provider: string,
+	model: string,
+): string | undefined {
 	const telemetry = getModelTelemetry(provider, model);
 	if (!telemetry || telemetry.totalCalls === 0) return undefined;
@@ -230,7 +250,7 @@ export function getProviderTelemetry(provider: string): {
 	totalCost: number;
 	models: number;
 } {
-	const store = loadStore();
+	const store = _store.load();
 	let totalCalls = 0;
 	let totalCost = 0;
 	let models = 0;
@@ -252,7 +272,16 @@ export function getProviderTelemetry(provider: string): {
  */
 export function startModelCall(provider: string, model: string): void {
 	const key = `${provider}/${model}`;
-	_inFlight.set(key, Date.now());
+	const now = Date.now();
+	reapStaleInFlight(now);
+	_inFlight.set(key, now);
+}
+/** Options for {@link recordModelCall} */
+export interface RecordModelCallOptions {
+	success: boolean;
+	stopReason?: string;
+	errorMessage?: string;
 }
 /**
@@ -263,28 +292,26 @@ export function startModelCall(provider: string, model: string): void {
  * @param model - The model ID
  * @param usage - Token usage { input, output, totalTokens }
  * @param cost - Cost in USD
- * @param success - Whether the call succeeded
- * @param stopReason - The stop reason (e.g. "stop", "error")
- * @param errorMessage - Error message if failed
+ * @param options - Options object ({@link RecordModelCallOptions})
  */
-export function recordModelCall(
+export async function recordModelCall(
 	provider: string,
 	model: string,
 	usage: { input: number; output: number; totalTokens: number },
 	cost: number,
-	success: boolean,
-	stopReason?: string,
-	errorMessage?: string,
-): void {
+	options: RecordModelCallOptions,
+): Promise<void> {
+	const { success, stopReason, errorMessage } = options;
 	const key = `${provider}/${model}`;
 	const startTime = _inFlight.get(key) ?? Date.now();
 	const latencyMs = Date.now() - startTime;
 	_inFlight.delete(key);
 	const totalTokens = usage.totalTokens || usage.input + usage.output;
-	const tokensPerSecond = latencyMs > 0
-		? parseFloat((totalTokens / (latencyMs / 1000)).toFixed(1))
-		: 0;
+	const tokensPerSecond =
+		latencyMs > 0
+			? parseFloat((totalTokens / (latencyMs / 1000)).toFixed(1))
+			: 0;
 	const entry: TelemetryEntry = {
 		timestamp: Date.now(),
@@ -301,7 +328,7 @@ export function recordModelCall(
 		...(errorMessage ? { error: errorMessage } : {}),
 	};
-	addEntry(entry);
+	await addEntry(entry);
 	_logger.info(`Telemetry: ${provider}/${model}`, {
 		latencyMs,
@@ -315,9 +342,11 @@ export function recordModelCall(
 /**
  * Clear all telemetry data.
  */
-export function clearTelemetry(): void {
-	const store: TelemetryStore = { models: {}, lastUpdated: Date.now() };
-	saveStore(store);
+export async function clearTelemetry(): Promise<void> {
+	await _store.update(() => ({
+		models: {},
+		lastUpdated: Date.now(),
+	}));
 }
 /**

package/lib/types.ts CHANGED Viewed

@@ -14,6 +14,19 @@ export interface CostConfig {
 	cacheWrite: number;
 }
+export interface ModelIdentity {
+	id: string;
+	name?: string;
+	family?: string;
+	provider?: string;
+}
+export type ModelMatchHints = Partial<ModelIdentity>;
+export interface ModelsDevEnrichedMetadata {
+	modelsDev?: ModelMatchHints;
+}
 export interface ProviderModelConfig {
 	id: string;
 	name: string;
@@ -35,6 +48,13 @@ export interface ModelsDevCost {
 	cache_write?: number;
 }
+export interface ModelsDevReasoningOption {
+	type: "effort" | "toggle" | "budget_tokens";
+	values?: string[];
+	min?: number;
+	max?: number;
+}
 export interface ModelsDevLimit {
 	context: number;
 	output: number;
@@ -45,10 +65,10 @@ export interface ModelsDevModalities {
 	output?: string[];
 }
-export interface ModelsDevModel {
-	id: string;
+export interface ModelsDevModel extends ModelIdentity {
 	name: string;
 	reasoning: boolean;
+	reasoning_options?: ModelsDevReasoningOption[];
 	cost?: ModelsDevCost;
 	limit: ModelsDevLimit;
 	modalities?: ModelsDevModalities;

package/lib/util.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { createLogger } from "./logger.ts";
+import { safeEnrichModelsWithModelsDev } from "./model-metadata.ts";
 import {
 	getProxyModelCompat,
 	isLikelyReasoningModel,
@@ -6,12 +7,40 @@ import {
 import type { ProviderModelConfig as PiProviderModelConfig } from "@earendil-works/pi-coding-agent";
 import type { ProviderModelConfig } from "./types.ts";
+/**
+ * Optional callbacks that providers can pass to
+ * `fetchOpenAICompatibleModels` to override default reasoning/compat
+ * detection logic. Keeping these as injected dependencies (rather
+ * than hard-coding `isLikelyReasoningModel` / `getProxyModelCompat`)
+ * lets `lib/util.ts` stay decoupled from `lib/provider-compat.ts`.
+ */
+export interface OpenAIModelCallbacks {
+	/**
+	 * Determine whether a model is a reasoning model.
+	 * If omitted, defaults to `isLikelyReasoningModel` from provider-compat.
+	 */
+	detectReasoning?: (model: { id: string; name?: string }) => boolean;
+	/**
+	 * Determine proxy-compat overrides for a model.
+	 * If omitted, defaults to `getProxyModelCompat` from provider-compat.
+	 */
+	getProxyCompat?: (model: {
+		id: string;
+		name?: string;
+	}) => PiProviderModelConfig["compat"] | undefined;
+}
 const _logger = createLogger("util");
 // =============================================================================
 // Shared Utilities
 // =============================================================================
+/** Async sleep helper — avoids creating anonymous functions in loops */
+export function sleep(ms: number): Promise<void> {
+	return new Promise((resolve) => setTimeout(resolve, ms));
+}
 /**
  * Log a warning message for provider operations
  */
@@ -74,7 +103,7 @@ export async function fetchWithRetry(
 			if (response.status >= 500) {
 				lastError = new Error(`Server error ${response.status}`);
 				if (i < retries - 1) {
-					await new Promise((r) => setTimeout(r, delayMs * (i + 1)));
+					await sleep(delayMs * (i + 1));
 					continue;
 				}
 				// Last retry exhausted - throw the error
@@ -85,7 +114,7 @@ export async function fetchWithRetry(
 		} catch (error) {
 			lastError = error;
 			if (i < retries - 1) {
-				await new Promise((r) => setTimeout(r, delayMs * (i + 1)));
+				await sleep(delayMs * (i + 1));
 			}
 		}
 	}
@@ -310,16 +339,22 @@ export function cleanModelName(name: string): string {
 	// Handle patterns like "Provider : Model Name" or "Provider / Model Name"
 	const colonIdx = name.indexOf(":");
 	const slashIdx = name.indexOf("/");
-	const idx =
-		colonIdx === -1
-			? slashIdx
-			: slashIdx === -1
-				? colonIdx
-				: Math.min(colonIdx, slashIdx);
-	if (idx > 0) {
-		return name.slice(idx + 1).trim();
+	let idx = -1;
+	if (colonIdx === -1 && slashIdx === -1) {
+		// Neither found — return trimmed name as-is
+		return name.trim();
 	}
-	return name.trim();
+	if (colonIdx === -1) {
+		// Only slash found
+		idx = slashIdx;
+	} else if (slashIdx === -1) {
+		// Only colon found
+		idx = colonIdx;
+	} else {
+		// Both found — use the earliest
+		idx = Math.min(colonIdx, slashIdx);
+	}
+	return name.slice(idx + 1).trim();
 }
 // =============================================================================
@@ -335,31 +370,51 @@ export function mapOpenRouterModel(m: {
 	name: string;
 	context_length?: number;
 	max_completion_tokens?: number | null;
-	top_provider?: { max_completion_tokens?: number | null };
-	pricing?: { prompt?: string | null; completion?: string | null };
+	top_provider?: {
+		context_length?: number | null;
+		max_completion_tokens?: number | null;
+	};
+	pricing?: {
+		prompt?: string | null;
+		completion?: string | null;
+		input_cache_read?: string | null;
+		input_cache_write?: string | null;
+	};
 	architecture?: {
 		input_modalities?: string[] | null;
 		output_modalities?: string[] | null;
 	};
+	supported_parameters?: string[] | null;
 	isFree?: boolean;
 }): ProviderModelConfig {
 	const promptPrice = Number.parseFloat(m.pricing?.prompt ?? "0");
 	const completionPrice = Number.parseFloat(m.pricing?.completion ?? "0");
+	const cacheReadPrice = Number.parseFloat(
+		m.pricing?.input_cache_read ?? "0",
+	);
+	const cacheWritePrice = Number.parseFloat(
+		m.pricing?.input_cache_write ?? "0",
+	);
+	const supportedParameters = m.supported_parameters ?? [];
+	const reasoning =
+		supportedParameters.includes("reasoning") ||
+		supportedParameters.includes("reasoning_effort");
 	return {
 		id: m.id,
 		name: cleanModelName(m.name),
-		reasoning: false, // OpenRouter doesn't expose reasoning flag directly
+		reasoning,
+		...(reasoning && { thinkingLevelMap: { off: "none" } }),
 		input: m.architecture?.input_modalities?.includes("image")
 			? (["text", "image"] as const)
 			: (["text"] as const),
 		cost: {
 			input: promptPrice,
 			output: completionPrice,
-			cacheRead: 0,
-			cacheWrite: 0,
+			cacheRead: cacheReadPrice,
+			cacheWrite: cacheWritePrice,
 		},
-		contextWindow: m.context_length ?? 4096,
+		contextWindow: m.context_length ?? m.top_provider?.context_length ?? 4096,
 		maxTokens:
 			m.max_completion_tokens ?? m.top_provider?.max_completion_tokens ?? 4096,
 		_pricingKnown: true,
@@ -433,8 +488,11 @@ export async function fetchOpenAICompatibleModels(
 	baseUrl: string,
 	apiKey: string,
 	defaults: OpenAIModelDefaults = {},
+	callbacks: OpenAIModelCallbacks = {},
 ): Promise<PiProviderModelConfig[]> {
 	const logger = createLogger(providerId);
+	const detectReasoning = callbacks.detectReasoning ?? isLikelyReasoningModel;
+	const getCompat = callbacks.getProxyCompat ?? getProxyModelCompat;
 	logger.info(`[${providerId}] Fetching models...`);
@@ -463,7 +521,7 @@ export async function fetchOpenAICompatibleModels(
 		logger.info(`[${providerId}] Fetched ${models.length} models`);
-		return models
+		const mapped = models
 			.filter((m) => m.id)
 			.map((m): PiProviderModelConfig => {
 				const name = m.id.split("/").pop() || m.id;
@@ -484,8 +542,7 @@ export async function fetchOpenAICompatibleModels(
 					4_096;
 				// Use per-model reasoning flag if the API provides it
-				const reasoning =
-					m.reasoning ?? isLikelyReasoningModel({ id: m.id, name });
+				const reasoning = m.reasoning ?? detectReasoning({ id: m.id, name });
 				// Use per-model input_modalities if the API provides it
 				const hasVision = m.input_modalities?.includes("image") ?? false;
@@ -521,10 +578,12 @@ export async function fetchOpenAICompatibleModels(
 					},
 					contextWindow,
 					maxTokens,
-					compat: getProxyModelCompat({ id: m.id, name }),
+					compat: getCompat({ id: m.id, name }),
 					_pricingKnown: hasApiPricing,
 				} as PiProviderModelConfig & { _pricingKnown?: boolean };
 			});
+		return await safeEnrichModelsWithModelsDev(mapped, { providerId });
 	} catch (error) {
 		logger.error(`[${providerId}] Failed to fetch models:`, {
 			error: error instanceof Error ? error.message : String(error),

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "pi-free",
-	"version": "2.0.15",
+	"version": "2.1.1",
 	"type": "module",
 	"description": "AI model providers for Pi with free model filtering and dynamic model fetching",
 	"keywords": [
@@ -44,10 +44,15 @@
 		"scripts/check-extensions.mjs"
 	],
 	"scripts": {
+		"audit:prod": "npm audit --omit=dev --audit-level=high",
 		"check": "node scripts/check-extensions.mjs",
+		"check:lockfile": "node scripts/check-lockfile-sync.mjs",
+		"check:tarball": "node scripts/check-tarball.mjs",
+		"lint": "tsc --noEmit",
 		"test": "vitest",
 		"test:ui": "vitest --ui",
-		"test:run": "vitest run"
+		"test:run": "vitest run",
+		"smoke:cline": "tsx scripts/smoke-cline-xml-bridge.ts"
 	},
 	"peerDependencies": {
 		"@earendil-works/pi-ai": "*",

package/provider-failover/benchmark-lookup.ts CHANGED Viewed

@@ -12,6 +12,7 @@
 import { appendFileSync, existsSync, mkdirSync, writeFileSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
+import type { ModelMatchHints } from "../lib/types.ts";
 import {
 	HARDCODED_BENCHMARKS,
 	type HardcodedBenchmark,
@@ -677,6 +678,7 @@ export function findHardcodedBenchmark(
 	modelName: string,
 	modelId: string,
 	provider?: string,
+	hints?: ModelMatchHints,
 ): HardcodedBenchmark | null {
 	// Normalize: convert colons to dashes (Ollama model:tag format)
 	const search = `${modelName} ${modelId}`.toLowerCase().replaceAll(":", "-");
@@ -699,9 +701,17 @@ export function findHardcodedBenchmark(
 	);
 	if (normalizedResult) return normalizedResult;
-	// 4. Prefix fallback with base model extraction
-	const prefix = tryPrefixFallback(normalized, provider, modelId, modelName);
-	if (prefix) return prefix;
+	// 4. Prefix fallback with base model extraction. Also try models.dev
+	// canonical IDs/names when available for opaque gateway model IDs.
+	const prefixCandidates = [normalized, hints?.id, hints?.name]
+		.map((candidate) =>
+			(candidate?.trim() ?? "").toLowerCase().replaceAll(/[\s_:]+/g, "-"),
+		)
+		.filter(Boolean);
+	for (const candidate of prefixCandidates) {
+		const prefix = tryPrefixFallback(candidate, provider, modelId, modelName);
+		if (prefix) return prefix;
+	}
 	// No match found
 	logDebug({
@@ -724,8 +734,9 @@ export function getHardcodedScore(
 	modelName: string,
 	modelId: string,
 	provider?: string,
+	hints?: ModelMatchHints,
 ): number | null {
-	const benchmark = findHardcodedBenchmark(modelName, modelId, provider);
+	const benchmark = findHardcodedBenchmark(modelName, modelId, provider, hints);
 	return benchmark?.codingIndex ?? null;
 }
@@ -737,8 +748,9 @@ export function enhanceModelNameWithCodingIndex(
 	modelName: string,
 	modelId: string,
 	provider?: string,
+	hints?: ModelMatchHints,
 ): string {
-	const benchmark = findHardcodedBenchmark(modelName, modelId, provider);
+	const benchmark = findHardcodedBenchmark(modelName, modelId, provider, hints);
 	if (benchmark?.codingIndex !== undefined) {
 		return `${modelName} [CI: ${benchmark.codingIndex.toFixed(1)}]`;
 	}