npm - pi-free - Versions diffs - 2.0.12 → 2.0.14 - Mend

pi-free 2.0.12 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/CHANGELOG.md +640 -608
package/README.md +7 -23
package/config.ts +15 -0
package/constants.ts +3 -0
package/index.ts +135 -0
package/lib/built-in-toggle.ts +34 -7
package/lib/probe-cache.ts +86 -0
package/lib/registry.ts +25 -3
package/lib/telemetry.ts +328 -0
package/lib/util.ts +10 -1
package/package.json +1 -1
package/provider-failover/benchmark-lookup.ts +94 -8
package/provider-failover/benchmarks-chunk-0.ts +599 -890
package/provider-failover/benchmarks-chunk-1.ts +655 -924
package/provider-failover/benchmarks-chunk-2.ts +675 -966
package/provider-failover/benchmarks-chunk-3.ts +676 -967
package/provider-failover/benchmarks-chunk-4.ts +704 -954
package/provider-failover/benchmarks-chunk-5.ts +1301 -0
package/provider-failover/hardcoded-benchmarks.ts +9 -3
package/providers/cline/cline-models.ts +196 -68
package/providers/dynamic-built-in/index.ts +27 -4
package/providers/kilo/kilo.ts +2 -2
package/providers/model-fetcher.ts +3 -1
package/providers/nvidia/nvidia.ts +47 -15
package/providers/ollama/ollama.ts +103 -46
package/providers/opencode-session.ts +376 -11
package/providers/qwen/qwen.ts +2 -2
package/providers/routeway/routeway.ts +213 -0

package/provider-failover/hardcoded-benchmarks.ts CHANGED Viewed

@@ -11,6 +11,7 @@
  * under the 3000-line limit. This file re-exports the merged result.
  *
  * To update: Run scripts/update-benchmarks.ts with ARTIFICIAL_ANALYSIS_API_KEY
+ * The script auto-updates this file's imports and spread when chunk count changes.
  */
 import { BENCHMARKS_CHUNK_0 } from "./benchmarks-chunk-0.ts";
@@ -18,10 +19,8 @@ import { BENCHMARKS_CHUNK_1 } from "./benchmarks-chunk-1.ts";
 import { BENCHMARKS_CHUNK_2 } from "./benchmarks-chunk-2.ts";
 import { BENCHMARKS_CHUNK_3 } from "./benchmarks-chunk-3.ts";
 import { BENCHMARKS_CHUNK_4 } from "./benchmarks-chunk-4.ts";
+import { BENCHMARKS_CHUNK_5 } from "./benchmarks-chunk-5.ts";
 export interface HardcodedBenchmark {
-	intelligenceIndex: number; // AA score 0-70
-	normalizedScore: number; // Our score 0-100
 	codingIndex?: number;
 	mathIndex?: number;
 	agenticIndex?: number;
@@ -33,6 +32,12 @@ export interface HardcodedBenchmark {
 	supportsReasoning: boolean;
 	supportsVision: boolean;
 	lastUpdated: string;
+	/**
+	 * Original model name from the source API (for debugging name collisions).
+	 * Only present when regenerated; absent in shipped data.
+	 */
+	originalModel?: string;
 }
 /**
@@ -45,4 +50,5 @@ export const HARDCODED_BENCHMARKS: Record<string, HardcodedBenchmark> = {
 	...BENCHMARKS_CHUNK_2,
 	...BENCHMARKS_CHUNK_3,
 	...BENCHMARKS_CHUNK_4,
+	...BENCHMARKS_CHUNK_5,
 };

package/providers/cline/cline-models.ts CHANGED Viewed

@@ -1,27 +1,70 @@
 /**
  * Cline model fetching.
  *
- * Fetches ALL models from OpenRouter (Cline's gateway).
- * Free/paid filtering is handled by the global free-only filter.
+ * Fetches Cline's own model catalog from api.cline.bot instead of OpenRouter.
+ * Cline also exposes a recommended/free-to-try list; those models may have
+ * non-zero list pricing in the catalog, so we mark exact recommended-free IDs
+ * as zero-cost for pi-free's free-model filter.
  */
 import { applyHidden } from "../../config.ts";
 import {
-	BASE_URL_OPENROUTER,
+	BASE_URL_CLINE,
 	DEFAULT_FETCH_TIMEOUT_MS,
 	PROVIDER_CLINE,
 } from "../../constants.ts";
 import type { ProviderModelConfig } from "../../lib/types.ts";
 import { cleanModelName, fetchWithRetry } from "../../lib/util.ts";
-interface OpenRouterRaw {
+interface ClineRaw {
 	id: string;
-	name: string;
-	context_length?: number;
-	supported_parameters?: string[];
-	architecture?: { input_modalities?: string[]; output_modalities?: string[] };
-	top_provider?: { max_completion_tokens?: number | null };
-	pricing?: { prompt?: string; completion?: string };
+	name?: string;
+	description?: string | null;
+	context_length?: number | null;
+	supported_parameters?: string[] | null;
+	architecture?: {
+		modality?: string | string[] | null;
+		input_modalities?: string[] | null;
+		output_modalities?: string[] | null;
+	} | null;
+	top_provider?: {
+		max_completion_tokens?: number | null;
+		context_length?: number | null;
+	} | null;
+	pricing?: {
+		prompt?: string | null;
+		completion?: string | null;
+		input_cache_read?: string | null;
+		input_cache_write?: string | null;
+	} | null;
+}
+interface ClineRecommendedModel {
+	id: string;
+	name?: string;
+	description?: string;
+	tags?: string[];
+}
+interface ClineRecommendedModelsResponse {
+	recommended?: ClineRecommendedModel[];
+	free?: ClineRecommendedModel[];
+}
+const VS_CODE_VERSION = "1.109.3";
+const CLINE_EXTENSION_VERSION = "3.76.0";
+function buildClineFetchHeaders(): Record<string, string> {
+	return {
+		Accept: "application/json",
+		"Content-Type": "application/json",
+		"User-Agent": `Cline/${CLINE_EXTENSION_VERSION}`,
+		"X-PLATFORM": "Visual Studio Code",
+		"X-PLATFORM-VERSION": VS_CODE_VERSION,
+		"X-CLIENT-TYPE": "VSCode Extension",
+		"X-CLIENT-VERSION": CLINE_EXTENSION_VERSION,
+		"X-CORE-VERSION": CLINE_EXTENSION_VERSION,
+	};
 }
 function extractNameFromId(id: string): string {
@@ -34,84 +77,169 @@ function extractNameFromId(id: string): string {
 /**
  * Parse pricing string to cost per million tokens.
- * OpenRouter returns pricing as string (e.g., "0.0001" or "0").
+ * Cline returns pricing as string per token (e.g. "0.0001" or "0").
  */
-function parsePricing(pricingStr: string | undefined): number {
+function parsePricing(pricingStr: string | null | undefined): number {
 	if (!pricingStr || pricingStr === "0") return 0;
 	const parsed = Number.parseFloat(pricingStr);
-	return Number.isNaN(parsed) ? 0 : parsed * 1_000_000; // Convert to per-million
+	return Number.isNaN(parsed) ? 0 : parsed * 1_000_000;
 }
-/**
- * Check if a model is free (both prompt and completion pricing is 0).
- */
-function isFreeModel(info: OpenRouterRaw): boolean {
-	return info.pricing?.prompt === "0" && info.pricing?.completion === "0";
+function modalityIncludes(
+	modality: string | string[] | null | undefined,
+	needle: string,
+): boolean {
+	if (Array.isArray(modality)) return modality.includes(needle);
+	return typeof modality === "string" && modality.includes(needle);
 }
-/**
- * Fetch ALL models from OpenRouter.
- * @param freeOnly - If true, return only free models
- */
-export async function fetchClineModels(
-	freeOnly = false,
-): Promise<ProviderModelConfig[]> {
+function hasTextOutput(info: ClineRaw): boolean {
+	const outputMods = info.architecture?.output_modalities;
+	if (Array.isArray(outputMods) && outputMods.length > 0) {
+		return outputMods.includes("text");
+	}
+	return modalityIncludes(info.architecture?.modality, "text");
+}
+function supportsImages(info: ClineRaw): boolean {
+	const inputMods = info.architecture?.input_modalities;
+	if (Array.isArray(inputMods) && inputMods.includes("image")) return true;
+	return modalityIncludes(info.architecture?.modality, "image");
+}
+function modelFromRecommended(
+	model: ClineRecommendedModel,
+): ProviderModelConfig & { _pricingKnown?: boolean } {
+	const name = model.name?.trim() || extractNameFromId(model.id);
+	return {
+		id: model.id,
+		name: `${cleanModelName(name)} (Cline)`,
+		reasoning: false,
+		input: ["text"],
+		cost: {
+			input: 0,
+			output: 0,
+			cacheRead: 0,
+			cacheWrite: 0,
+		},
+		contextWindow: 1_000_000,
+		maxTokens: 65_536,
+		_pricingKnown: true,
+	};
+}
+function modelFromCatalog(
+	info: ClineRaw,
+	freeToTryIds: ReadonlySet<string>,
+): ProviderModelConfig & { _pricingKnown?: boolean } {
+	const isReasoning = !!(
+		info.supported_parameters?.includes("include_reasoning") ||
+		info.supported_parameters?.includes("reasoning")
+	);
+	const isFreeToTry = freeToTryIds.has(info.id);
+	const inputCost = isFreeToTry ? 0 : parsePricing(info.pricing?.prompt);
+	const outputCost = isFreeToTry ? 0 : parsePricing(info.pricing?.completion);
+	const cacheRead = isFreeToTry
+		? 0
+		: parsePricing(info.pricing?.input_cache_read);
+	const cacheWrite = isFreeToTry
+		? 0
+		: parsePricing(info.pricing?.input_cache_write);
+	const isFree = inputCost === 0 && outputCost === 0;
+	const cleanName = info.name
+		? cleanModelName(info.name)
+		: extractNameFromId(info.id);
+	return {
+		id: info.id,
+		name: `${cleanName} (Cline)${isFree ? "" : " 💰"}`,
+		reasoning: isReasoning,
+		input: supportsImages(info) ? ["text", "image"] : ["text"],
+		cost: {
+			input: inputCost,
+			output: outputCost,
+			cacheRead,
+			cacheWrite,
+		},
+		contextWindow:
+			info.context_length ?? info.top_provider?.context_length ?? 128_000,
+		maxTokens: info.top_provider?.max_completion_tokens ?? 8_192,
+		_pricingKnown: info.pricing !== null && info.pricing !== undefined,
+	};
+}
+async function fetchClineRecommendedFreeModels(): Promise<
+	ClineRecommendedModel[]
+> {
 	const response = await fetchWithRetry(
-		`${BASE_URL_OPENROUTER}/models`,
-		{},
+		`${BASE_URL_CLINE}/ai/cline/recommended-models`,
+		{ headers: buildClineFetchHeaders() },
 		3,
 		1000,
 		DEFAULT_FETCH_TIMEOUT_MS,
 	);
-	if (!response.ok)
-		throw new Error(`Failed to fetch OpenRouter models: ${response.status}`);
+	if (!response.ok) return [];
-	const json = (await response.json()) as { data?: OpenRouterRaw[] };
+	const json = (await response.json()) as ClineRecommendedModelsResponse;
+	return Array.isArray(json.free) ? json.free.filter((m) => m?.id) : [];
+}
-	// Filter to usable models (chat-capable)
-	let usableModels = json.data ?? [];
+async function fetchClineCatalogModels(): Promise<ClineRaw[]> {
+	const response = await fetchWithRetry(
+		`${BASE_URL_CLINE}/ai/cline/models`,
+		{ headers: buildClineFetchHeaders() },
+		3,
+		1000,
+		DEFAULT_FETCH_TIMEOUT_MS,
+	);
-	// If freeOnly, filter to free models
-	if (freeOnly) {
-		usableModels = usableModels.filter(isFreeModel);
+	if (!response.ok)
+		throw new Error(`Failed to fetch Cline models: ${response.status}`);
+	const json = (await response.json()) as { data?: ClineRaw[] };
+	if (!Array.isArray(json.data)) {
+		throw new Error("Invalid Cline models response: missing data array");
+	}
+	return json.data;
+}
+/**
+ * Fetch models from Cline.
+ * @param freeOnly - If true, return only zero-cost/free-to-try models
+ */
+export async function fetchClineModels(
+	freeOnly = false,
+): Promise<ProviderModelConfig[]> {
+	const [catalogModels, recommendedFreeModels] = await Promise.all([
+		fetchClineCatalogModels(),
+		fetchClineRecommendedFreeModels().catch(() => []),
+	]);
+	const recommendedFreeIds = new Set(recommendedFreeModels.map((m) => m.id));
+	const models: Array<ProviderModelConfig & { _pricingKnown?: boolean }> = [];
+	const seen = new Set<string>();
+	for (const info of catalogModels) {
+		if (!hasTextOutput(info)) continue;
+		const model = modelFromCatalog(info, recommendedFreeIds);
+		models.push(model);
+		seen.add(model.id);
 	}
-	const models: ProviderModelConfig[] = [];
-	for (const info of usableModels) {
-		const isReasoning = !!(
-			info.supported_parameters?.includes("include_reasoning") ||
-			info.supported_parameters?.includes("reasoning")
-		);
-		const hasImage =
-			info.architecture?.input_modalities?.includes("image") ?? false;
-		// Calculate cost per million tokens
-		const inputCost = parsePricing(info.pricing?.prompt);
-		const outputCost = parsePricing(info.pricing?.completion);
-		const isFree = inputCost === 0 && outputCost === 0;
-		const cleanName = info.name
-			? cleanModelName(info.name)
-			: extractNameFromId(info.id);
-		models.push({
-			id: info.id,
-			name: `${cleanName} (Cline)${isFree ? "" : " 💰"}`,
-			reasoning: isReasoning,
-			input: hasImage ? ["text", "image"] : ["text"],
-			cost: {
-				input: inputCost,
-				output: outputCost,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: info.context_length ?? 128_000,
-			maxTokens: info.top_provider?.max_completion_tokens ?? 8_192,
-		});
+	// The recommended/free-to-try endpoint can lead the full catalog. Include
+	// those exact IDs so newly promoted models (e.g. alibaba/qwen3.7-plus) show up.
+	for (const model of recommendedFreeModels) {
+		if (seen.has(model.id)) continue;
+		models.push(modelFromRecommended(model));
+		seen.add(model.id);
 	}
-	return applyHidden(models, PROVIDER_CLINE);
+	const filtered = freeOnly
+		? models.filter((m) => m.cost.input === 0 && m.cost.output === 0)
+		: models;
+	return applyHidden(filtered, PROVIDER_CLINE);
 }
 /**

package/providers/dynamic-built-in/index.ts CHANGED Viewed

@@ -22,6 +22,7 @@
  * OpenAI is intentionally skipped per user request.
  */
+import type { Api } from "@earendil-works/pi-ai";
 import type {
 	ExtensionAPI,
 	ProviderModelConfig,
@@ -46,9 +47,18 @@ import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
 import { fetchOpenRouterCompatibleModels } from "../model-fetcher.ts";
 import { createToggleState } from "../../lib/toggle-state.ts";
 import { enhanceWithCI } from "../../provider-helper.ts";
+import {
+	OPENCODE_DYNAMIC_API,
+	createOpenCodeSessionTracker,
+	createOpenCodeStreamSimple,
+	isOpenCodeProvider,
+} from "../opencode-session.ts";
 const _logger = createLogger("dynamic-built-in");
+// OpenCode headers must be regenerated for every LLM request.
+const _opencodeSession = createOpenCodeSessionTracker();
 // =============================================================================
 // Generic Model Fetcher
 // =============================================================================
@@ -170,7 +180,7 @@ interface DynamicProviderDef {
 	providerId: string;
 	getApiKey: () => string | undefined;
 	baseUrl: string;
-	api: "openai-completions" | "mistral-conversations" | "anthropic-messages";
+	api: Api;
 	defaultShowPaid: boolean | (() => boolean);
 	/** Optional per-provider compat overrides (e.g., DeepSeek proxy). */
 	compat?: ProviderModelConfig["compat"];
@@ -217,10 +227,18 @@ const DYNAMIC_PROVIDERS: DynamicProviderDef[] = [
 		providerId: "opencode",
 		getApiKey: getOpencodeApiKey,
 		baseUrl: "https://opencode.ai/zen/v1",
-		api: "openai-completions",
+		api: OPENCODE_DYNAMIC_API,
 		defaultShowPaid: getOpencodeShowPaid,
 		// OpenCode API returns no pricing — _pricingKnown=false, name-based detection
 	},
+	{
+		providerId: "opencode-go",
+		getApiKey: getOpencodeApiKey,
+		baseUrl: "https://opencode.ai/zen/go/v1",
+		api: OPENCODE_DYNAMIC_API,
+		defaultShowPaid: getOpencodeShowPaid,
+		// OpenCode Go uses the same OPENCODE_API_KEY and per-request headers
+	},
 	{
 		providerId: "openrouter",
 		getApiKey: getOpenrouterApiKey,
@@ -261,9 +279,11 @@ async function discoverAndRegister(
 			});
 		}
-		// Apply DeepSeek proxy compat to matching models
+		// Apply DeepSeek proxy compat to matching models. OpenCode headers are
+		// injected per request by createOpenCodeStreamSimple(), not stored here.
 		allModels = allModels.map((m) => ({
 			...m,
+			api: isOpenCodeProvider(config.providerId) ? OPENCODE_DYNAMIC_API : m.api,
 			compat: getProxyModelCompat(m) ?? m.compat,
 		}));
 	} catch (error) {
@@ -327,6 +347,9 @@ async function registerProvider(
 			baseUrl: config.baseUrl,
 			apiKey,
 			api: config.api,
+			...(isOpenCodeProvider(config.providerId)
+				? { streamSimple: createOpenCodeStreamSimple(_opencodeSession) }
+				: {}),
 			models: enhanceWithCI(models, config.providerId),
 		});
 	};
@@ -439,7 +462,7 @@ export async function setupDynamicBuiltInProviders(
 						freeOnly: false,
 					}),
 			},
-			fastrouterApiKey ?? "FASTROUTER_API_KEY",
+			fastrouterApiKey ?? "$FASTROUTER_API_KEY",
 		),
 	);

package/providers/kilo/kilo.ts CHANGED Viewed

@@ -38,7 +38,7 @@ import { fetchKiloModels, KILO_GATEWAY_BASE } from "./kilo-models.ts";
 const KILO_PROVIDER_CONFIG = {
 	providerId: PROVIDER_KILO,
 	baseUrl: KILO_GATEWAY_BASE,
-	apiKey: "KILO_API_KEY",
+	apiKey: "$KILO_API_KEY",
 	headers: {
 		"X-KILOCODE-EDITORNAME": "Pi",
 	},
@@ -149,7 +149,7 @@ export default async function kiloProvider(pi: ExtensionAPI) {
 	// Register initial provider (default to free models)
 	pi.registerProvider(PROVIDER_KILO, {
 		baseUrl: KILO_GATEWAY_BASE,
-		apiKey: "KILO_API_KEY",
+		apiKey: "$KILO_API_KEY",
 		api: "openai-completions" as const,
 		headers: {
 			"X-KILOCODE-EDITORNAME": "Pi",

package/providers/model-fetcher.ts CHANGED Viewed

@@ -24,6 +24,7 @@ interface OpenRouterCompatibleModel {
 	};
 	top_provider?: { max_completion_tokens?: number | null };
 	supported_parameters?: string[];
+	isFree?: boolean;
 }
 interface FetchModelsOptions {
@@ -98,8 +99,9 @@ export async function fetchOpenRouterCompatibleModels(
 			const outputMods = m.architecture?.output_modalities ?? [];
 			if (outputMods.includes("image")) return false;
-			// Filter by pricing if freeOnly
+			// Filter by provider flag when available, otherwise pricing.
 			if (freeOnly) {
+				if (typeof m.isFree === "boolean") return m.isFree;
 				const prompt = Number.parseFloat(m.pricing?.prompt ?? "1");
 				const completion = Number.parseFloat(m.pricing?.completion ?? "1");
 				if (prompt !== 0 || completion !== 0) return false;

package/providers/nvidia/nvidia.ts CHANGED Viewed

@@ -31,6 +31,10 @@ import {
 	URL_MODELS_DEV,
 } from "../../constants.ts";
 import { createLogger } from "../../lib/logger.ts";
+import {
+	getModelsDueForProbe,
+	recordModelProbeResults,
+} from "../../lib/probe-cache.ts";
 import { registerWithGlobalToggle } from "../../lib/registry.ts";
 import type { ModelsDevModel, ModelsDevProvider } from "../../lib/types.ts";
 import {
@@ -287,12 +291,12 @@ async function fetchNvidiaModels(
 /**
  * Probe a single NVIDIA model with a minimal chat request.
- * Returns true if the model is routable (not 404), false if it 404s.
+ * Returns "broken" only for deterministic 404s; network errors are unknown.
  */
 async function probeNvidiaModel(
 	apiKey: string,
 	modelId: string,
-): Promise<boolean> {
+): Promise<"ok" | "broken" | "unknown"> {
 	try {
 		const response = await fetchWithTimeout(
 			`${BASE_URL_NVIDIA}/chat/completions`,
@@ -313,9 +317,9 @@ async function probeNvidiaModel(
 		);
 		// 404 = function not found (model not provisioned)
 		// 200/400/401/etc = at least routable
-		return response.status !== 404;
+		return response.status === 404 ? "broken" : "ok";
 	} catch {
-		return true; // Network errors / timeouts are not "model not found"
+		return "unknown"; // Network errors / timeouts are not "model not found"
 	}
 }
@@ -330,26 +334,51 @@ async function runNvidiaProbe(
 	modelsToTest: ProviderModelConfig[],
 	stored: { free: ProviderModelConfig[]; all: ProviderModelConfig[] },
 	reRegister: (models: ProviderModelConfig[]) => void,
-): Promise<void> {
+	options: { useCache?: boolean } = {},
+): Promise<string[]> {
+	const modelIdsToProbe = options.useCache
+		? new Set(
+				getModelsDueForProbe(
+					PROVIDER_NVIDIA,
+					modelsToTest.map((m) => m.id),
+				),
+			)
+		: undefined;
+	const probeCandidates = modelIdsToProbe
+		? modelsToTest.filter((m) => modelIdsToProbe.has(m.id))
+		: modelsToTest;
+	if (probeCandidates.length === 0) {
+		_nvidiaLogger.info("Auto-probe: NVIDIA probe cache is fresh");
+		return [];
+	}
 	const notFound: string[] = [];
+	const cacheableResults: Array<{ modelId: string; status: "ok" | "broken" }> =
+		[];
 	const batchSize = 5;
-	for (let i = 0; i < modelsToTest.length; i += batchSize) {
-		const batch = modelsToTest.slice(i, i + batchSize);
+	for (let i = 0; i < probeCandidates.length; i += batchSize) {
+		const batch = probeCandidates.slice(i, i + batchSize);
 		const results = await Promise.all(
 			batch.map(async (m) => {
-				const ok = await probeNvidiaModel(apiKey, m.id);
-				return { id: m.id, ok };
+				const status = await probeNvidiaModel(apiKey, m.id);
+				return { id: m.id, status };
 			}),
 		);
 		for (const r of results) {
-			if (!r.ok) notFound.push(r.id);
+			if (r.status === "broken") notFound.push(r.id);
+			if (r.status !== "unknown") {
+				cacheableResults.push({ modelId: r.id, status: r.status });
+			}
 		}
 	}
+	recordModelProbeResults(PROVIDER_NVIDIA, cacheableResults);
 	if (notFound.length === 0) {
-		_nvidiaLogger.info("Auto-probe: all NVIDIA models are routable");
-		return;
+		_nvidiaLogger.info("Auto-probe: all checked NVIDIA models are routable");
+		return [];
 	}
 	// Auto-hide 404 models in config (provider-scoped)
@@ -367,6 +396,7 @@ async function runNvidiaProbe(
 	_nvidiaLogger.info(
 		`Auto-probe: found ${notFound.length} broken models (auto-hidden)`,
 	);
+	return notFound;
 }
 export default async function nvidiaProvider(pi: ExtensionAPI) {
@@ -391,7 +421,7 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
 	const reRegister = createReRegister(pi, {
 		providerId: PROVIDER_NVIDIA,
 		baseUrl: BASE_URL_NVIDIA,
-		apiKey: apiKey || "NVIDIA_API_KEY",
+		apiKey: apiKey || "$NVIDIA_API_KEY",
 	});
 	// Register with global toggle system
@@ -401,7 +431,7 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
 	const initialModels = allModels;
 	pi.registerProvider(PROVIDER_NVIDIA, {
 		baseUrl: BASE_URL_NVIDIA,
-		apiKey: apiKey || "NVIDIA_API_KEY",
+		apiKey: apiKey || "$NVIDIA_API_KEY",
 		api: "openai-completions" as const,
 		authHeader: true,
 		headers: {
@@ -416,7 +446,9 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
 		if (_autoProbeDone || !apiKey) return;
 		_autoProbeDone = true;
 		_nvidiaLogger.info("Starting lazy auto-probe of NVIDIA models...");
-		runNvidiaProbe(apiKey, allModels, stored, reRegister).catch((err) => {
+		runNvidiaProbe(apiKey, allModels, stored, reRegister, {
+			useCache: true,
+		}).catch((err) => {
 			_nvidiaLogger.warn("Auto-probe failed", {
 				error: err instanceof Error ? err.message : String(err),
 			});