pi-free 2.0.13 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@
11
11
  * under the 3000-line limit. This file re-exports the merged result.
12
12
  *
13
13
  * To update: Run scripts/update-benchmarks.ts with ARTIFICIAL_ANALYSIS_API_KEY
14
+ * The script auto-updates this file's imports and spread when chunk count changes.
14
15
  */
15
16
 
16
17
  import { BENCHMARKS_CHUNK_0 } from "./benchmarks-chunk-0.ts";
@@ -18,10 +19,8 @@ import { BENCHMARKS_CHUNK_1 } from "./benchmarks-chunk-1.ts";
18
19
  import { BENCHMARKS_CHUNK_2 } from "./benchmarks-chunk-2.ts";
19
20
  import { BENCHMARKS_CHUNK_3 } from "./benchmarks-chunk-3.ts";
20
21
  import { BENCHMARKS_CHUNK_4 } from "./benchmarks-chunk-4.ts";
21
-
22
+ import { BENCHMARKS_CHUNK_5 } from "./benchmarks-chunk-5.ts";
22
23
  export interface HardcodedBenchmark {
23
- intelligenceIndex: number; // AA score 0-70
24
- normalizedScore: number; // Our score 0-100
25
24
  codingIndex?: number;
26
25
  mathIndex?: number;
27
26
  agenticIndex?: number;
@@ -33,6 +32,12 @@ export interface HardcodedBenchmark {
33
32
  supportsReasoning: boolean;
34
33
  supportsVision: boolean;
35
34
  lastUpdated: string;
35
+
36
+ /**
37
+ * Original model name from the source API (for debugging name collisions).
38
+ * Only present when regenerated; absent in shipped data.
39
+ */
40
+ originalModel?: string;
36
41
  }
37
42
 
38
43
  /**
@@ -45,4 +50,5 @@ export const HARDCODED_BENCHMARKS: Record<string, HardcodedBenchmark> = {
45
50
  ...BENCHMARKS_CHUNK_2,
46
51
  ...BENCHMARKS_CHUNK_3,
47
52
  ...BENCHMARKS_CHUNK_4,
53
+ ...BENCHMARKS_CHUNK_5,
48
54
  };
@@ -1,27 +1,71 @@
1
1
  /**
2
2
  * Cline model fetching.
3
3
  *
4
- * Fetches ALL models from OpenRouter (Cline's gateway).
5
- * Free/paid filtering is handled by the global free-only filter.
4
+ * Fetches Cline's own model catalog from api.cline.bot instead of OpenRouter.
5
+ * Cline also exposes a recommended/free-to-try list; those models may have
6
+ * non-zero list pricing in the catalog, so we mark exact recommended-free IDs
7
+ * as zero-cost for pi-free's free-model filter.
6
8
  */
7
9
 
8
10
  import { applyHidden } from "../../config.ts";
9
11
  import {
10
- BASE_URL_OPENROUTER,
12
+ BASE_URL_CLINE,
11
13
  DEFAULT_FETCH_TIMEOUT_MS,
12
14
  PROVIDER_CLINE,
13
15
  } from "../../constants.ts";
14
16
  import type { ProviderModelConfig } from "../../lib/types.ts";
17
+ import { getProxyModelCompat } from "../../lib/provider-compat.ts";
15
18
  import { cleanModelName, fetchWithRetry } from "../../lib/util.ts";
16
19
 
17
- interface OpenRouterRaw {
20
+ interface ClineRaw {
18
21
  id: string;
19
- name: string;
20
- context_length?: number;
21
- supported_parameters?: string[];
22
- architecture?: { input_modalities?: string[]; output_modalities?: string[] };
23
- top_provider?: { max_completion_tokens?: number | null };
24
- pricing?: { prompt?: string; completion?: string };
22
+ name?: string;
23
+ description?: string | null;
24
+ context_length?: number | null;
25
+ supported_parameters?: string[] | null;
26
+ architecture?: {
27
+ modality?: string | string[] | null;
28
+ input_modalities?: string[] | null;
29
+ output_modalities?: string[] | null;
30
+ } | null;
31
+ top_provider?: {
32
+ max_completion_tokens?: number | null;
33
+ context_length?: number | null;
34
+ } | null;
35
+ pricing?: {
36
+ prompt?: string | null;
37
+ completion?: string | null;
38
+ input_cache_read?: string | null;
39
+ input_cache_write?: string | null;
40
+ } | null;
41
+ }
42
+
43
+ interface ClineRecommendedModel {
44
+ id: string;
45
+ name?: string;
46
+ description?: string;
47
+ tags?: string[];
48
+ }
49
+
50
+ interface ClineRecommendedModelsResponse {
51
+ recommended?: ClineRecommendedModel[];
52
+ free?: ClineRecommendedModel[];
53
+ }
54
+
55
+ const VS_CODE_VERSION = "1.109.3";
56
+ const CLINE_EXTENSION_VERSION = "3.76.0";
57
+
58
+ function buildClineFetchHeaders(): Record<string, string> {
59
+ return {
60
+ Accept: "application/json",
61
+ "Content-Type": "application/json",
62
+ "User-Agent": `Cline/${CLINE_EXTENSION_VERSION}`,
63
+ "X-PLATFORM": "Visual Studio Code",
64
+ "X-PLATFORM-VERSION": VS_CODE_VERSION,
65
+ "X-CLIENT-TYPE": "VSCode Extension",
66
+ "X-CLIENT-VERSION": CLINE_EXTENSION_VERSION,
67
+ "X-CORE-VERSION": CLINE_EXTENSION_VERSION,
68
+ };
25
69
  }
26
70
 
27
71
  function extractNameFromId(id: string): string {
@@ -34,84 +78,172 @@ function extractNameFromId(id: string): string {
34
78
 
35
79
  /**
36
80
  * Parse pricing string to cost per million tokens.
37
- * OpenRouter returns pricing as string (e.g., "0.0001" or "0").
81
+ * Cline returns pricing as string per token (e.g. "0.0001" or "0").
38
82
  */
39
- function parsePricing(pricingStr: string | undefined): number {
83
+ function parsePricing(pricingStr: string | null | undefined): number {
40
84
  if (!pricingStr || pricingStr === "0") return 0;
41
85
  const parsed = Number.parseFloat(pricingStr);
42
- return Number.isNaN(parsed) ? 0 : parsed * 1_000_000; // Convert to per-million
86
+ return Number.isNaN(parsed) ? 0 : parsed * 1_000_000;
43
87
  }
44
88
 
45
- /**
46
- * Check if a model is free (both prompt and completion pricing is 0).
47
- */
48
- function isFreeModel(info: OpenRouterRaw): boolean {
49
- return info.pricing?.prompt === "0" && info.pricing?.completion === "0";
89
+ function modalityIncludes(
90
+ modality: string | string[] | null | undefined,
91
+ needle: string,
92
+ ): boolean {
93
+ if (Array.isArray(modality)) return modality.includes(needle);
94
+ return typeof modality === "string" && modality.includes(needle);
50
95
  }
51
96
 
52
- /**
53
- * Fetch ALL models from OpenRouter.
54
- * @param freeOnly - If true, return only free models
55
- */
56
- export async function fetchClineModels(
57
- freeOnly = false,
58
- ): Promise<ProviderModelConfig[]> {
97
+ function hasTextOutput(info: ClineRaw): boolean {
98
+ const outputMods = info.architecture?.output_modalities;
99
+ if (Array.isArray(outputMods) && outputMods.length > 0) {
100
+ return outputMods.includes("text");
101
+ }
102
+ return modalityIncludes(info.architecture?.modality, "text");
103
+ }
104
+
105
+ function supportsImages(info: ClineRaw): boolean {
106
+ const inputMods = info.architecture?.input_modalities;
107
+ if (Array.isArray(inputMods) && inputMods.includes("image")) return true;
108
+ return modalityIncludes(info.architecture?.modality, "image");
109
+ }
110
+
111
+ function modelFromRecommended(
112
+ model: ClineRecommendedModel,
113
+ ): ProviderModelConfig & { _pricingKnown?: boolean } {
114
+ const name = model.name?.trim() || extractNameFromId(model.id);
115
+ return {
116
+ id: model.id,
117
+ name: `${cleanModelName(name)} (Cline)`,
118
+ reasoning: false,
119
+ input: ["text"],
120
+ cost: {
121
+ input: 0,
122
+ output: 0,
123
+ cacheRead: 0,
124
+ cacheWrite: 0,
125
+ },
126
+ contextWindow: 1_000_000,
127
+ maxTokens: 65_536,
128
+ _pricingKnown: true,
129
+ };
130
+ }
131
+
132
+ function modelFromCatalog(
133
+ info: ClineRaw,
134
+ freeToTryIds: ReadonlySet<string>,
135
+ ): ProviderModelConfig & { _pricingKnown?: boolean } {
136
+ const isReasoning = !!(
137
+ info.supported_parameters?.includes("include_reasoning") ||
138
+ info.supported_parameters?.includes("reasoning")
139
+ );
140
+ const isFreeToTry = freeToTryIds.has(info.id);
141
+ const inputCost = isFreeToTry ? 0 : parsePricing(info.pricing?.prompt);
142
+ const outputCost = isFreeToTry ? 0 : parsePricing(info.pricing?.completion);
143
+ const cacheRead = isFreeToTry
144
+ ? 0
145
+ : parsePricing(info.pricing?.input_cache_read);
146
+ const cacheWrite = isFreeToTry
147
+ ? 0
148
+ : parsePricing(info.pricing?.input_cache_write);
149
+ const isFree = inputCost === 0 && outputCost === 0;
150
+ const cleanName = info.name
151
+ ? cleanModelName(info.name)
152
+ : extractNameFromId(info.id);
153
+
154
+ return {
155
+ id: info.id,
156
+ name: `${cleanName} (Cline)${isFree ? "" : " 💰"}`,
157
+ reasoning: isReasoning,
158
+ input: supportsImages(info) ? ["text", "image"] : ["text"],
159
+ cost: {
160
+ input: inputCost,
161
+ output: outputCost,
162
+ cacheRead,
163
+ cacheWrite,
164
+ },
165
+ contextWindow:
166
+ info.context_length ?? info.top_provider?.context_length ?? 128_000,
167
+ maxTokens: info.top_provider?.max_completion_tokens ?? 8_192,
168
+ ...(getProxyModelCompat({ id: info.id, name: info.name })
169
+ ? { compat: getProxyModelCompat({ id: info.id, name: info.name }) }
170
+ : {}),
171
+ _pricingKnown: info.pricing !== null && info.pricing !== undefined,
172
+ } as ProviderModelConfig & { _pricingKnown?: boolean; compat?: any };
173
+ }
174
+
175
+ async function fetchClineRecommendedFreeModels(): Promise<
176
+ ClineRecommendedModel[]
177
+ > {
59
178
  const response = await fetchWithRetry(
60
- `${BASE_URL_OPENROUTER}/models`,
61
- {},
179
+ `${BASE_URL_CLINE}/ai/cline/recommended-models`,
180
+ { headers: buildClineFetchHeaders() },
62
181
  3,
63
182
  1000,
64
183
  DEFAULT_FETCH_TIMEOUT_MS,
65
184
  );
66
185
 
67
- if (!response.ok)
68
- throw new Error(`Failed to fetch OpenRouter models: ${response.status}`);
186
+ if (!response.ok) return [];
69
187
 
70
- const json = (await response.json()) as { data?: OpenRouterRaw[] };
188
+ const json = (await response.json()) as ClineRecommendedModelsResponse;
189
+ return Array.isArray(json.free) ? json.free.filter((m) => m?.id) : [];
190
+ }
71
191
 
72
- // Filter to usable models (chat-capable)
73
- let usableModels = json.data ?? [];
192
+ async function fetchClineCatalogModels(): Promise<ClineRaw[]> {
193
+ const response = await fetchWithRetry(
194
+ `${BASE_URL_CLINE}/ai/cline/models`,
195
+ { headers: buildClineFetchHeaders() },
196
+ 3,
197
+ 1000,
198
+ DEFAULT_FETCH_TIMEOUT_MS,
199
+ );
74
200
 
75
- // If freeOnly, filter to free models
76
- if (freeOnly) {
77
- usableModels = usableModels.filter(isFreeModel);
201
+ if (!response.ok)
202
+ throw new Error(`Failed to fetch Cline models: ${response.status}`);
203
+
204
+ const json = (await response.json()) as { data?: ClineRaw[] };
205
+ if (!Array.isArray(json.data)) {
206
+ throw new Error("Invalid Cline models response: missing data array");
207
+ }
208
+ return json.data;
209
+ }
210
+
211
+ /**
212
+ * Fetch models from Cline.
213
+ * @param freeOnly - If true, return only zero-cost/free-to-try models
214
+ */
215
+ export async function fetchClineModels(
216
+ freeOnly = false,
217
+ ): Promise<ProviderModelConfig[]> {
218
+ const [catalogModels, recommendedFreeModels] = await Promise.all([
219
+ fetchClineCatalogModels(),
220
+ fetchClineRecommendedFreeModels().catch(() => []),
221
+ ]);
222
+ const recommendedFreeIds = new Set(recommendedFreeModels.map((m) => m.id));
223
+
224
+ const models: Array<ProviderModelConfig & { _pricingKnown?: boolean }> = [];
225
+ const seen = new Set<string>();
226
+
227
+ for (const info of catalogModels) {
228
+ if (!hasTextOutput(info)) continue;
229
+ const model = modelFromCatalog(info, recommendedFreeIds);
230
+ models.push(model);
231
+ seen.add(model.id);
78
232
  }
79
233
 
80
- const models: ProviderModelConfig[] = [];
81
- for (const info of usableModels) {
82
- const isReasoning = !!(
83
- info.supported_parameters?.includes("include_reasoning") ||
84
- info.supported_parameters?.includes("reasoning")
85
- );
86
- const hasImage =
87
- info.architecture?.input_modalities?.includes("image") ?? false;
88
-
89
- // Calculate cost per million tokens
90
- const inputCost = parsePricing(info.pricing?.prompt);
91
- const outputCost = parsePricing(info.pricing?.completion);
92
- const isFree = inputCost === 0 && outputCost === 0;
93
-
94
- const cleanName = info.name
95
- ? cleanModelName(info.name)
96
- : extractNameFromId(info.id);
97
-
98
- models.push({
99
- id: info.id,
100
- name: `${cleanName} (Cline)${isFree ? "" : " 💰"}`,
101
- reasoning: isReasoning,
102
- input: hasImage ? ["text", "image"] : ["text"],
103
- cost: {
104
- input: inputCost,
105
- output: outputCost,
106
- cacheRead: 0,
107
- cacheWrite: 0,
108
- },
109
- contextWindow: info.context_length ?? 128_000,
110
- maxTokens: info.top_provider?.max_completion_tokens ?? 8_192,
111
- });
234
+ // The recommended/free-to-try endpoint can lead the full catalog. Include
235
+ // those exact IDs so newly promoted models (e.g. alibaba/qwen3.7-plus) show up.
236
+ for (const model of recommendedFreeModels) {
237
+ if (seen.has(model.id)) continue;
238
+ models.push(modelFromRecommended(model));
239
+ seen.add(model.id);
112
240
  }
113
241
 
114
- return applyHidden(models, PROVIDER_CLINE);
242
+ const filtered = freeOnly
243
+ ? models.filter((m) => m.cost.input === 0 && m.cost.output === 0)
244
+ : models;
245
+
246
+ return applyHidden(filtered, PROVIDER_CLINE);
115
247
  }
116
248
 
117
249
  /**
@@ -73,9 +73,9 @@ function toApiKey(credentials: OAuthCredentials): string {
73
73
  // =============================================================================
74
74
 
75
75
  const TASK_PROGRESS_BLOCK = `
76
- # task_progress List (Optional - Plan Mode)
76
+ # task_progress List (Optional)
77
77
 
78
- While in PLAN MODE, if you've outlined concrete steps or requirements for the user, you may include a preliminary todo list using the task_progress parameter.
78
+ You may include a todo list using the task_progress parameter to track progress on multi-step tasks.
79
79
 
80
80
  1. To create or update a todo list, include the task_progress parameter in the next tool call
81
81
  2. Review each item and update its status:
@@ -100,7 +100,7 @@ function buildEnvironmentDetails(): string {
100
100
  0 / 204.8K tokens used (0%)
101
101
 
102
102
  # Current Mode
103
- PLAN MODE
103
+ ACT MODE
104
104
  </environmentDetails>`;
105
105
  }
106
106
 
@@ -462,7 +462,7 @@ export async function setupDynamicBuiltInProviders(
462
462
  freeOnly: false,
463
463
  }),
464
464
  },
465
- fastrouterApiKey ?? "FASTROUTER_API_KEY",
465
+ fastrouterApiKey ?? "$FASTROUTER_API_KEY",
466
466
  ),
467
467
  );
468
468
 
@@ -38,7 +38,7 @@ import { fetchKiloModels, KILO_GATEWAY_BASE } from "./kilo-models.ts";
38
38
  const KILO_PROVIDER_CONFIG = {
39
39
  providerId: PROVIDER_KILO,
40
40
  baseUrl: KILO_GATEWAY_BASE,
41
- apiKey: "KILO_API_KEY",
41
+ apiKey: "$KILO_API_KEY",
42
42
  headers: {
43
43
  "X-KILOCODE-EDITORNAME": "Pi",
44
44
  },
@@ -149,7 +149,7 @@ export default async function kiloProvider(pi: ExtensionAPI) {
149
149
  // Register initial provider (default to free models)
150
150
  pi.registerProvider(PROVIDER_KILO, {
151
151
  baseUrl: KILO_GATEWAY_BASE,
152
- apiKey: "KILO_API_KEY",
152
+ apiKey: "$KILO_API_KEY",
153
153
  api: "openai-completions" as const,
154
154
  headers: {
155
155
  "X-KILOCODE-EDITORNAME": "Pi",
@@ -24,6 +24,7 @@ interface OpenRouterCompatibleModel {
24
24
  };
25
25
  top_provider?: { max_completion_tokens?: number | null };
26
26
  supported_parameters?: string[];
27
+ isFree?: boolean;
27
28
  }
28
29
 
29
30
  interface FetchModelsOptions {
@@ -98,8 +99,9 @@ export async function fetchOpenRouterCompatibleModels(
98
99
  const outputMods = m.architecture?.output_modalities ?? [];
99
100
  if (outputMods.includes("image")) return false;
100
101
 
101
- // Filter by pricing if freeOnly
102
+ // Filter by provider flag when available, otherwise pricing.
102
103
  if (freeOnly) {
104
+ if (typeof m.isFree === "boolean") return m.isFree;
103
105
  const prompt = Number.parseFloat(m.pricing?.prompt ?? "1");
104
106
  const completion = Number.parseFloat(m.pricing?.completion ?? "1");
105
107
  if (prompt !== 0 || completion !== 0) return false;
@@ -31,6 +31,14 @@ import {
31
31
  URL_MODELS_DEV,
32
32
  } from "../../constants.ts";
33
33
  import { createLogger } from "../../lib/logger.ts";
34
+ import {
35
+ getModelsDueForProbe,
36
+ recordModelProbeResults,
37
+ } from "../../lib/probe-cache.ts";
38
+ import {
39
+ getProxyModelCompat,
40
+ isLikelyReasoningModel,
41
+ } from "../../lib/provider-compat.ts";
34
42
  import { registerWithGlobalToggle } from "../../lib/registry.ts";
35
43
  import type { ModelsDevModel, ModelsDevProvider } from "../../lib/types.ts";
36
44
  import {
@@ -151,7 +159,8 @@ function inferModelFromId(id: string): ModelsDevModel | null {
151
159
  .replaceAll(/\b(\d+(?:\.\d+)?)b\b/gi, "$1B");
152
160
 
153
161
  const hasVision = /vision|multimodal|vl/i.test(id);
154
- const hasReasoning = /reason|r1|thinking/i.test(id);
162
+ const hasReasoning =
163
+ /reason|r1|thinking/i.test(id) || isLikelyReasoningModel({ id, name });
155
164
 
156
165
  return {
157
166
  id,
@@ -273,6 +282,7 @@ async function fetchNvidiaModels(
273
282
  },
274
283
  contextWindow: m.limit.context,
275
284
  maxTokens: m.limit.output,
285
+ compat: getProxyModelCompat({ id: m.id, name: m.name }),
276
286
  }),
277
287
  ),
278
288
  PROVIDER_NVIDIA,
@@ -287,12 +297,12 @@ async function fetchNvidiaModels(
287
297
 
288
298
  /**
289
299
  * Probe a single NVIDIA model with a minimal chat request.
290
- * Returns true if the model is routable (not 404), false if it 404s.
300
+ * Returns "broken" only for deterministic 404s; network errors are unknown.
291
301
  */
292
302
  async function probeNvidiaModel(
293
303
  apiKey: string,
294
304
  modelId: string,
295
- ): Promise<boolean> {
305
+ ): Promise<"ok" | "broken" | "unknown"> {
296
306
  try {
297
307
  const response = await fetchWithTimeout(
298
308
  `${BASE_URL_NVIDIA}/chat/completions`,
@@ -313,9 +323,9 @@ async function probeNvidiaModel(
313
323
  );
314
324
  // 404 = function not found (model not provisioned)
315
325
  // 200/400/401/etc = at least routable
316
- return response.status !== 404;
326
+ return response.status === 404 ? "broken" : "ok";
317
327
  } catch {
318
- return true; // Network errors / timeouts are not "model not found"
328
+ return "unknown"; // Network errors / timeouts are not "model not found"
319
329
  }
320
330
  }
321
331
 
@@ -330,26 +340,51 @@ async function runNvidiaProbe(
330
340
  modelsToTest: ProviderModelConfig[],
331
341
  stored: { free: ProviderModelConfig[]; all: ProviderModelConfig[] },
332
342
  reRegister: (models: ProviderModelConfig[]) => void,
333
- ): Promise<void> {
343
+ options: { useCache?: boolean } = {},
344
+ ): Promise<string[]> {
345
+ const modelIdsToProbe = options.useCache
346
+ ? new Set(
347
+ getModelsDueForProbe(
348
+ PROVIDER_NVIDIA,
349
+ modelsToTest.map((m) => m.id),
350
+ ),
351
+ )
352
+ : undefined;
353
+ const probeCandidates = modelIdsToProbe
354
+ ? modelsToTest.filter((m) => modelIdsToProbe.has(m.id))
355
+ : modelsToTest;
356
+
357
+ if (probeCandidates.length === 0) {
358
+ _nvidiaLogger.info("Auto-probe: NVIDIA probe cache is fresh");
359
+ return [];
360
+ }
361
+
334
362
  const notFound: string[] = [];
363
+ const cacheableResults: Array<{ modelId: string; status: "ok" | "broken" }> =
364
+ [];
335
365
  const batchSize = 5;
336
366
 
337
- for (let i = 0; i < modelsToTest.length; i += batchSize) {
338
- const batch = modelsToTest.slice(i, i + batchSize);
367
+ for (let i = 0; i < probeCandidates.length; i += batchSize) {
368
+ const batch = probeCandidates.slice(i, i + batchSize);
339
369
  const results = await Promise.all(
340
370
  batch.map(async (m) => {
341
- const ok = await probeNvidiaModel(apiKey, m.id);
342
- return { id: m.id, ok };
371
+ const status = await probeNvidiaModel(apiKey, m.id);
372
+ return { id: m.id, status };
343
373
  }),
344
374
  );
345
375
  for (const r of results) {
346
- if (!r.ok) notFound.push(r.id);
376
+ if (r.status === "broken") notFound.push(r.id);
377
+ if (r.status !== "unknown") {
378
+ cacheableResults.push({ modelId: r.id, status: r.status });
379
+ }
347
380
  }
348
381
  }
349
382
 
383
+ recordModelProbeResults(PROVIDER_NVIDIA, cacheableResults);
384
+
350
385
  if (notFound.length === 0) {
351
- _nvidiaLogger.info("Auto-probe: all NVIDIA models are routable");
352
- return;
386
+ _nvidiaLogger.info("Auto-probe: all checked NVIDIA models are routable");
387
+ return [];
353
388
  }
354
389
 
355
390
  // Auto-hide 404 models in config (provider-scoped)
@@ -367,6 +402,7 @@ async function runNvidiaProbe(
367
402
  _nvidiaLogger.info(
368
403
  `Auto-probe: found ${notFound.length} broken models (auto-hidden)`,
369
404
  );
405
+ return notFound;
370
406
  }
371
407
 
372
408
  export default async function nvidiaProvider(pi: ExtensionAPI) {
@@ -391,7 +427,7 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
391
427
  const reRegister = createReRegister(pi, {
392
428
  providerId: PROVIDER_NVIDIA,
393
429
  baseUrl: BASE_URL_NVIDIA,
394
- apiKey: apiKey || "NVIDIA_API_KEY",
430
+ apiKey: apiKey || "$NVIDIA_API_KEY",
395
431
  });
396
432
 
397
433
  // Register with global toggle system
@@ -401,7 +437,7 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
401
437
  const initialModels = allModels;
402
438
  pi.registerProvider(PROVIDER_NVIDIA, {
403
439
  baseUrl: BASE_URL_NVIDIA,
404
- apiKey: apiKey || "NVIDIA_API_KEY",
440
+ apiKey: apiKey || "$NVIDIA_API_KEY",
405
441
  api: "openai-completions" as const,
406
442
  authHeader: true,
407
443
  headers: {
@@ -416,7 +452,9 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
416
452
  if (_autoProbeDone || !apiKey) return;
417
453
  _autoProbeDone = true;
418
454
  _nvidiaLogger.info("Starting lazy auto-probe of NVIDIA models...");
419
- runNvidiaProbe(apiKey, allModels, stored, reRegister).catch((err) => {
455
+ runNvidiaProbe(apiKey, allModels, stored, reRegister, {
456
+ useCache: true,
457
+ }).catch((err) => {
420
458
  _nvidiaLogger.warn("Auto-probe failed", {
421
459
  error: err instanceof Error ? err.message : String(err),
422
460
  });