pi-free 2.0.12 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@
11
11
  * under the 3000-line limit. This file re-exports the merged result.
12
12
  *
13
13
  * To update: Run scripts/update-benchmarks.ts with ARTIFICIAL_ANALYSIS_API_KEY
14
+ * The script auto-updates this file's imports and spread when chunk count changes.
14
15
  */
15
16
 
16
17
  import { BENCHMARKS_CHUNK_0 } from "./benchmarks-chunk-0.ts";
@@ -18,10 +19,8 @@ import { BENCHMARKS_CHUNK_1 } from "./benchmarks-chunk-1.ts";
18
19
  import { BENCHMARKS_CHUNK_2 } from "./benchmarks-chunk-2.ts";
19
20
  import { BENCHMARKS_CHUNK_3 } from "./benchmarks-chunk-3.ts";
20
21
  import { BENCHMARKS_CHUNK_4 } from "./benchmarks-chunk-4.ts";
21
-
22
+ import { BENCHMARKS_CHUNK_5 } from "./benchmarks-chunk-5.ts";
22
23
  export interface HardcodedBenchmark {
23
- intelligenceIndex: number; // AA score 0-70
24
- normalizedScore: number; // Our score 0-100
25
24
  codingIndex?: number;
26
25
  mathIndex?: number;
27
26
  agenticIndex?: number;
@@ -33,6 +32,12 @@ export interface HardcodedBenchmark {
33
32
  supportsReasoning: boolean;
34
33
  supportsVision: boolean;
35
34
  lastUpdated: string;
35
+
36
+ /**
37
+ * Original model name from the source API (for debugging name collisions).
38
+ * Only present when regenerated; absent in shipped data.
39
+ */
40
+ originalModel?: string;
36
41
  }
37
42
 
38
43
  /**
@@ -45,4 +50,5 @@ export const HARDCODED_BENCHMARKS: Record<string, HardcodedBenchmark> = {
45
50
  ...BENCHMARKS_CHUNK_2,
46
51
  ...BENCHMARKS_CHUNK_3,
47
52
  ...BENCHMARKS_CHUNK_4,
53
+ ...BENCHMARKS_CHUNK_5,
48
54
  };
@@ -1,27 +1,70 @@
1
1
  /**
2
2
  * Cline model fetching.
3
3
  *
4
- * Fetches ALL models from OpenRouter (Cline's gateway).
5
- * Free/paid filtering is handled by the global free-only filter.
4
+ * Fetches Cline's own model catalog from api.cline.bot instead of OpenRouter.
5
+ * Cline also exposes a recommended/free-to-try list; those models may have
6
+ * non-zero list pricing in the catalog, so we mark exact recommended-free IDs
7
+ * as zero-cost for pi-free's free-model filter.
6
8
  */
7
9
 
8
10
  import { applyHidden } from "../../config.ts";
9
11
  import {
10
- BASE_URL_OPENROUTER,
12
+ BASE_URL_CLINE,
11
13
  DEFAULT_FETCH_TIMEOUT_MS,
12
14
  PROVIDER_CLINE,
13
15
  } from "../../constants.ts";
14
16
  import type { ProviderModelConfig } from "../../lib/types.ts";
15
17
  import { cleanModelName, fetchWithRetry } from "../../lib/util.ts";
16
18
 
17
- interface OpenRouterRaw {
19
+ interface ClineRaw {
18
20
  id: string;
19
- name: string;
20
- context_length?: number;
21
- supported_parameters?: string[];
22
- architecture?: { input_modalities?: string[]; output_modalities?: string[] };
23
- top_provider?: { max_completion_tokens?: number | null };
24
- pricing?: { prompt?: string; completion?: string };
21
+ name?: string;
22
+ description?: string | null;
23
+ context_length?: number | null;
24
+ supported_parameters?: string[] | null;
25
+ architecture?: {
26
+ modality?: string | string[] | null;
27
+ input_modalities?: string[] | null;
28
+ output_modalities?: string[] | null;
29
+ } | null;
30
+ top_provider?: {
31
+ max_completion_tokens?: number | null;
32
+ context_length?: number | null;
33
+ } | null;
34
+ pricing?: {
35
+ prompt?: string | null;
36
+ completion?: string | null;
37
+ input_cache_read?: string | null;
38
+ input_cache_write?: string | null;
39
+ } | null;
40
+ }
41
+
42
+ interface ClineRecommendedModel {
43
+ id: string;
44
+ name?: string;
45
+ description?: string;
46
+ tags?: string[];
47
+ }
48
+
49
+ interface ClineRecommendedModelsResponse {
50
+ recommended?: ClineRecommendedModel[];
51
+ free?: ClineRecommendedModel[];
52
+ }
53
+
54
+ const VS_CODE_VERSION = "1.109.3";
55
+ const CLINE_EXTENSION_VERSION = "3.76.0";
56
+
57
+ function buildClineFetchHeaders(): Record<string, string> {
58
+ return {
59
+ Accept: "application/json",
60
+ "Content-Type": "application/json",
61
+ "User-Agent": `Cline/${CLINE_EXTENSION_VERSION}`,
62
+ "X-PLATFORM": "Visual Studio Code",
63
+ "X-PLATFORM-VERSION": VS_CODE_VERSION,
64
+ "X-CLIENT-TYPE": "VSCode Extension",
65
+ "X-CLIENT-VERSION": CLINE_EXTENSION_VERSION,
66
+ "X-CORE-VERSION": CLINE_EXTENSION_VERSION,
67
+ };
25
68
  }
26
69
 
27
70
  function extractNameFromId(id: string): string {
@@ -34,84 +77,169 @@ function extractNameFromId(id: string): string {
34
77
 
35
78
  /**
36
79
  * Parse pricing string to cost per million tokens.
37
- * OpenRouter returns pricing as string (e.g., "0.0001" or "0").
80
+ * Cline returns pricing as string per token (e.g. "0.0001" or "0").
38
81
  */
39
- function parsePricing(pricingStr: string | undefined): number {
82
+ function parsePricing(pricingStr: string | null | undefined): number {
40
83
  if (!pricingStr || pricingStr === "0") return 0;
41
84
  const parsed = Number.parseFloat(pricingStr);
42
- return Number.isNaN(parsed) ? 0 : parsed * 1_000_000; // Convert to per-million
85
+ return Number.isNaN(parsed) ? 0 : parsed * 1_000_000;
43
86
  }
44
87
 
45
- /**
46
- * Check if a model is free (both prompt and completion pricing is 0).
47
- */
48
- function isFreeModel(info: OpenRouterRaw): boolean {
49
- return info.pricing?.prompt === "0" && info.pricing?.completion === "0";
88
+ function modalityIncludes(
89
+ modality: string | string[] | null | undefined,
90
+ needle: string,
91
+ ): boolean {
92
+ if (Array.isArray(modality)) return modality.includes(needle);
93
+ return typeof modality === "string" && modality.includes(needle);
50
94
  }
51
95
 
52
- /**
53
- * Fetch ALL models from OpenRouter.
54
- * @param freeOnly - If true, return only free models
55
- */
56
- export async function fetchClineModels(
57
- freeOnly = false,
58
- ): Promise<ProviderModelConfig[]> {
96
+ function hasTextOutput(info: ClineRaw): boolean {
97
+ const outputMods = info.architecture?.output_modalities;
98
+ if (Array.isArray(outputMods) && outputMods.length > 0) {
99
+ return outputMods.includes("text");
100
+ }
101
+ return modalityIncludes(info.architecture?.modality, "text");
102
+ }
103
+
104
+ function supportsImages(info: ClineRaw): boolean {
105
+ const inputMods = info.architecture?.input_modalities;
106
+ if (Array.isArray(inputMods) && inputMods.includes("image")) return true;
107
+ return modalityIncludes(info.architecture?.modality, "image");
108
+ }
109
+
110
+ function modelFromRecommended(
111
+ model: ClineRecommendedModel,
112
+ ): ProviderModelConfig & { _pricingKnown?: boolean } {
113
+ const name = model.name?.trim() || extractNameFromId(model.id);
114
+ return {
115
+ id: model.id,
116
+ name: `${cleanModelName(name)} (Cline)`,
117
+ reasoning: false,
118
+ input: ["text"],
119
+ cost: {
120
+ input: 0,
121
+ output: 0,
122
+ cacheRead: 0,
123
+ cacheWrite: 0,
124
+ },
125
+ contextWindow: 1_000_000,
126
+ maxTokens: 65_536,
127
+ _pricingKnown: true,
128
+ };
129
+ }
130
+
131
+ function modelFromCatalog(
132
+ info: ClineRaw,
133
+ freeToTryIds: ReadonlySet<string>,
134
+ ): ProviderModelConfig & { _pricingKnown?: boolean } {
135
+ const isReasoning = !!(
136
+ info.supported_parameters?.includes("include_reasoning") ||
137
+ info.supported_parameters?.includes("reasoning")
138
+ );
139
+ const isFreeToTry = freeToTryIds.has(info.id);
140
+ const inputCost = isFreeToTry ? 0 : parsePricing(info.pricing?.prompt);
141
+ const outputCost = isFreeToTry ? 0 : parsePricing(info.pricing?.completion);
142
+ const cacheRead = isFreeToTry
143
+ ? 0
144
+ : parsePricing(info.pricing?.input_cache_read);
145
+ const cacheWrite = isFreeToTry
146
+ ? 0
147
+ : parsePricing(info.pricing?.input_cache_write);
148
+ const isFree = inputCost === 0 && outputCost === 0;
149
+ const cleanName = info.name
150
+ ? cleanModelName(info.name)
151
+ : extractNameFromId(info.id);
152
+
153
+ return {
154
+ id: info.id,
155
+ name: `${cleanName} (Cline)${isFree ? "" : " 💰"}`,
156
+ reasoning: isReasoning,
157
+ input: supportsImages(info) ? ["text", "image"] : ["text"],
158
+ cost: {
159
+ input: inputCost,
160
+ output: outputCost,
161
+ cacheRead,
162
+ cacheWrite,
163
+ },
164
+ contextWindow:
165
+ info.context_length ?? info.top_provider?.context_length ?? 128_000,
166
+ maxTokens: info.top_provider?.max_completion_tokens ?? 8_192,
167
+ _pricingKnown: info.pricing !== null && info.pricing !== undefined,
168
+ };
169
+ }
170
+
171
+ async function fetchClineRecommendedFreeModels(): Promise<
172
+ ClineRecommendedModel[]
173
+ > {
59
174
  const response = await fetchWithRetry(
60
- `${BASE_URL_OPENROUTER}/models`,
61
- {},
175
+ `${BASE_URL_CLINE}/ai/cline/recommended-models`,
176
+ { headers: buildClineFetchHeaders() },
62
177
  3,
63
178
  1000,
64
179
  DEFAULT_FETCH_TIMEOUT_MS,
65
180
  );
66
181
 
67
- if (!response.ok)
68
- throw new Error(`Failed to fetch OpenRouter models: ${response.status}`);
182
+ if (!response.ok) return [];
69
183
 
70
- const json = (await response.json()) as { data?: OpenRouterRaw[] };
184
+ const json = (await response.json()) as ClineRecommendedModelsResponse;
185
+ return Array.isArray(json.free) ? json.free.filter((m) => m?.id) : [];
186
+ }
71
187
 
72
- // Filter to usable models (chat-capable)
73
- let usableModels = json.data ?? [];
188
+ async function fetchClineCatalogModels(): Promise<ClineRaw[]> {
189
+ const response = await fetchWithRetry(
190
+ `${BASE_URL_CLINE}/ai/cline/models`,
191
+ { headers: buildClineFetchHeaders() },
192
+ 3,
193
+ 1000,
194
+ DEFAULT_FETCH_TIMEOUT_MS,
195
+ );
74
196
 
75
- // If freeOnly, filter to free models
76
- if (freeOnly) {
77
- usableModels = usableModels.filter(isFreeModel);
197
+ if (!response.ok)
198
+ throw new Error(`Failed to fetch Cline models: ${response.status}`);
199
+
200
+ const json = (await response.json()) as { data?: ClineRaw[] };
201
+ if (!Array.isArray(json.data)) {
202
+ throw new Error("Invalid Cline models response: missing data array");
203
+ }
204
+ return json.data;
205
+ }
206
+
207
+ /**
208
+ * Fetch models from Cline.
209
+ * @param freeOnly - If true, return only zero-cost/free-to-try models
210
+ */
211
+ export async function fetchClineModels(
212
+ freeOnly = false,
213
+ ): Promise<ProviderModelConfig[]> {
214
+ const [catalogModels, recommendedFreeModels] = await Promise.all([
215
+ fetchClineCatalogModels(),
216
+ fetchClineRecommendedFreeModels().catch(() => []),
217
+ ]);
218
+ const recommendedFreeIds = new Set(recommendedFreeModels.map((m) => m.id));
219
+
220
+ const models: Array<ProviderModelConfig & { _pricingKnown?: boolean }> = [];
221
+ const seen = new Set<string>();
222
+
223
+ for (const info of catalogModels) {
224
+ if (!hasTextOutput(info)) continue;
225
+ const model = modelFromCatalog(info, recommendedFreeIds);
226
+ models.push(model);
227
+ seen.add(model.id);
78
228
  }
79
229
 
80
- const models: ProviderModelConfig[] = [];
81
- for (const info of usableModels) {
82
- const isReasoning = !!(
83
- info.supported_parameters?.includes("include_reasoning") ||
84
- info.supported_parameters?.includes("reasoning")
85
- );
86
- const hasImage =
87
- info.architecture?.input_modalities?.includes("image") ?? false;
88
-
89
- // Calculate cost per million tokens
90
- const inputCost = parsePricing(info.pricing?.prompt);
91
- const outputCost = parsePricing(info.pricing?.completion);
92
- const isFree = inputCost === 0 && outputCost === 0;
93
-
94
- const cleanName = info.name
95
- ? cleanModelName(info.name)
96
- : extractNameFromId(info.id);
97
-
98
- models.push({
99
- id: info.id,
100
- name: `${cleanName} (Cline)${isFree ? "" : " 💰"}`,
101
- reasoning: isReasoning,
102
- input: hasImage ? ["text", "image"] : ["text"],
103
- cost: {
104
- input: inputCost,
105
- output: outputCost,
106
- cacheRead: 0,
107
- cacheWrite: 0,
108
- },
109
- contextWindow: info.context_length ?? 128_000,
110
- maxTokens: info.top_provider?.max_completion_tokens ?? 8_192,
111
- });
230
+ // The recommended/free-to-try endpoint can lead the full catalog. Include
231
+ // those exact IDs so newly promoted models (e.g. alibaba/qwen3.7-plus) show up.
232
+ for (const model of recommendedFreeModels) {
233
+ if (seen.has(model.id)) continue;
234
+ models.push(modelFromRecommended(model));
235
+ seen.add(model.id);
112
236
  }
113
237
 
114
- return applyHidden(models, PROVIDER_CLINE);
238
+ const filtered = freeOnly
239
+ ? models.filter((m) => m.cost.input === 0 && m.cost.output === 0)
240
+ : models;
241
+
242
+ return applyHidden(filtered, PROVIDER_CLINE);
115
243
  }
116
244
 
117
245
  /**
@@ -22,6 +22,7 @@
22
22
  * OpenAI is intentionally skipped per user request.
23
23
  */
24
24
 
25
+ import type { Api } from "@earendil-works/pi-ai";
25
26
  import type {
26
27
  ExtensionAPI,
27
28
  ProviderModelConfig,
@@ -46,9 +47,18 @@ import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
46
47
  import { fetchOpenRouterCompatibleModels } from "../model-fetcher.ts";
47
48
  import { createToggleState } from "../../lib/toggle-state.ts";
48
49
  import { enhanceWithCI } from "../../provider-helper.ts";
50
+ import {
51
+ OPENCODE_DYNAMIC_API,
52
+ createOpenCodeSessionTracker,
53
+ createOpenCodeStreamSimple,
54
+ isOpenCodeProvider,
55
+ } from "../opencode-session.ts";
49
56
 
50
57
  const _logger = createLogger("dynamic-built-in");
51
58
 
59
+ // OpenCode headers must be regenerated for every LLM request.
60
+ const _opencodeSession = createOpenCodeSessionTracker();
61
+
52
62
  // =============================================================================
53
63
  // Generic Model Fetcher
54
64
  // =============================================================================
@@ -170,7 +180,7 @@ interface DynamicProviderDef {
170
180
  providerId: string;
171
181
  getApiKey: () => string | undefined;
172
182
  baseUrl: string;
173
- api: "openai-completions" | "mistral-conversations" | "anthropic-messages";
183
+ api: Api;
174
184
  defaultShowPaid: boolean | (() => boolean);
175
185
  /** Optional per-provider compat overrides (e.g., DeepSeek proxy). */
176
186
  compat?: ProviderModelConfig["compat"];
@@ -217,10 +227,18 @@ const DYNAMIC_PROVIDERS: DynamicProviderDef[] = [
217
227
  providerId: "opencode",
218
228
  getApiKey: getOpencodeApiKey,
219
229
  baseUrl: "https://opencode.ai/zen/v1",
220
- api: "openai-completions",
230
+ api: OPENCODE_DYNAMIC_API,
221
231
  defaultShowPaid: getOpencodeShowPaid,
222
232
  // OpenCode API returns no pricing — _pricingKnown=false, name-based detection
223
233
  },
234
+ {
235
+ providerId: "opencode-go",
236
+ getApiKey: getOpencodeApiKey,
237
+ baseUrl: "https://opencode.ai/zen/go/v1",
238
+ api: OPENCODE_DYNAMIC_API,
239
+ defaultShowPaid: getOpencodeShowPaid,
240
+ // OpenCode Go uses the same OPENCODE_API_KEY and per-request headers
241
+ },
224
242
  {
225
243
  providerId: "openrouter",
226
244
  getApiKey: getOpenrouterApiKey,
@@ -261,9 +279,11 @@ async function discoverAndRegister(
261
279
  });
262
280
  }
263
281
 
264
- // Apply DeepSeek proxy compat to matching models
282
+ // Apply DeepSeek proxy compat to matching models. OpenCode headers are
283
+ // injected per request by createOpenCodeStreamSimple(), not stored here.
265
284
  allModels = allModels.map((m) => ({
266
285
  ...m,
286
+ api: isOpenCodeProvider(config.providerId) ? OPENCODE_DYNAMIC_API : m.api,
267
287
  compat: getProxyModelCompat(m) ?? m.compat,
268
288
  }));
269
289
  } catch (error) {
@@ -327,6 +347,9 @@ async function registerProvider(
327
347
  baseUrl: config.baseUrl,
328
348
  apiKey,
329
349
  api: config.api,
350
+ ...(isOpenCodeProvider(config.providerId)
351
+ ? { streamSimple: createOpenCodeStreamSimple(_opencodeSession) }
352
+ : {}),
330
353
  models: enhanceWithCI(models, config.providerId),
331
354
  });
332
355
  };
@@ -439,7 +462,7 @@ export async function setupDynamicBuiltInProviders(
439
462
  freeOnly: false,
440
463
  }),
441
464
  },
442
- fastrouterApiKey ?? "FASTROUTER_API_KEY",
465
+ fastrouterApiKey ?? "$FASTROUTER_API_KEY",
443
466
  ),
444
467
  );
445
468
 
@@ -38,7 +38,7 @@ import { fetchKiloModels, KILO_GATEWAY_BASE } from "./kilo-models.ts";
38
38
  const KILO_PROVIDER_CONFIG = {
39
39
  providerId: PROVIDER_KILO,
40
40
  baseUrl: KILO_GATEWAY_BASE,
41
- apiKey: "KILO_API_KEY",
41
+ apiKey: "$KILO_API_KEY",
42
42
  headers: {
43
43
  "X-KILOCODE-EDITORNAME": "Pi",
44
44
  },
@@ -149,7 +149,7 @@ export default async function kiloProvider(pi: ExtensionAPI) {
149
149
  // Register initial provider (default to free models)
150
150
  pi.registerProvider(PROVIDER_KILO, {
151
151
  baseUrl: KILO_GATEWAY_BASE,
152
- apiKey: "KILO_API_KEY",
152
+ apiKey: "$KILO_API_KEY",
153
153
  api: "openai-completions" as const,
154
154
  headers: {
155
155
  "X-KILOCODE-EDITORNAME": "Pi",
@@ -24,6 +24,7 @@ interface OpenRouterCompatibleModel {
24
24
  };
25
25
  top_provider?: { max_completion_tokens?: number | null };
26
26
  supported_parameters?: string[];
27
+ isFree?: boolean;
27
28
  }
28
29
 
29
30
  interface FetchModelsOptions {
@@ -98,8 +99,9 @@ export async function fetchOpenRouterCompatibleModels(
98
99
  const outputMods = m.architecture?.output_modalities ?? [];
99
100
  if (outputMods.includes("image")) return false;
100
101
 
101
- // Filter by pricing if freeOnly
102
+ // Filter by provider flag when available, otherwise pricing.
102
103
  if (freeOnly) {
104
+ if (typeof m.isFree === "boolean") return m.isFree;
103
105
  const prompt = Number.parseFloat(m.pricing?.prompt ?? "1");
104
106
  const completion = Number.parseFloat(m.pricing?.completion ?? "1");
105
107
  if (prompt !== 0 || completion !== 0) return false;
@@ -31,6 +31,10 @@ import {
31
31
  URL_MODELS_DEV,
32
32
  } from "../../constants.ts";
33
33
  import { createLogger } from "../../lib/logger.ts";
34
+ import {
35
+ getModelsDueForProbe,
36
+ recordModelProbeResults,
37
+ } from "../../lib/probe-cache.ts";
34
38
  import { registerWithGlobalToggle } from "../../lib/registry.ts";
35
39
  import type { ModelsDevModel, ModelsDevProvider } from "../../lib/types.ts";
36
40
  import {
@@ -287,12 +291,12 @@ async function fetchNvidiaModels(
287
291
 
288
292
  /**
289
293
  * Probe a single NVIDIA model with a minimal chat request.
290
- * Returns true if the model is routable (not 404), false if it 404s.
294
+ * Returns "broken" only for deterministic 404s; network errors are unknown.
291
295
  */
292
296
  async function probeNvidiaModel(
293
297
  apiKey: string,
294
298
  modelId: string,
295
- ): Promise<boolean> {
299
+ ): Promise<"ok" | "broken" | "unknown"> {
296
300
  try {
297
301
  const response = await fetchWithTimeout(
298
302
  `${BASE_URL_NVIDIA}/chat/completions`,
@@ -313,9 +317,9 @@ async function probeNvidiaModel(
313
317
  );
314
318
  // 404 = function not found (model not provisioned)
315
319
  // 200/400/401/etc = at least routable
316
- return response.status !== 404;
320
+ return response.status === 404 ? "broken" : "ok";
317
321
  } catch {
318
- return true; // Network errors / timeouts are not "model not found"
322
+ return "unknown"; // Network errors / timeouts are not "model not found"
319
323
  }
320
324
  }
321
325
 
@@ -330,26 +334,51 @@ async function runNvidiaProbe(
330
334
  modelsToTest: ProviderModelConfig[],
331
335
  stored: { free: ProviderModelConfig[]; all: ProviderModelConfig[] },
332
336
  reRegister: (models: ProviderModelConfig[]) => void,
333
- ): Promise<void> {
337
+ options: { useCache?: boolean } = {},
338
+ ): Promise<string[]> {
339
+ const modelIdsToProbe = options.useCache
340
+ ? new Set(
341
+ getModelsDueForProbe(
342
+ PROVIDER_NVIDIA,
343
+ modelsToTest.map((m) => m.id),
344
+ ),
345
+ )
346
+ : undefined;
347
+ const probeCandidates = modelIdsToProbe
348
+ ? modelsToTest.filter((m) => modelIdsToProbe.has(m.id))
349
+ : modelsToTest;
350
+
351
+ if (probeCandidates.length === 0) {
352
+ _nvidiaLogger.info("Auto-probe: NVIDIA probe cache is fresh");
353
+ return [];
354
+ }
355
+
334
356
  const notFound: string[] = [];
357
+ const cacheableResults: Array<{ modelId: string; status: "ok" | "broken" }> =
358
+ [];
335
359
  const batchSize = 5;
336
360
 
337
- for (let i = 0; i < modelsToTest.length; i += batchSize) {
338
- const batch = modelsToTest.slice(i, i + batchSize);
361
+ for (let i = 0; i < probeCandidates.length; i += batchSize) {
362
+ const batch = probeCandidates.slice(i, i + batchSize);
339
363
  const results = await Promise.all(
340
364
  batch.map(async (m) => {
341
- const ok = await probeNvidiaModel(apiKey, m.id);
342
- return { id: m.id, ok };
365
+ const status = await probeNvidiaModel(apiKey, m.id);
366
+ return { id: m.id, status };
343
367
  }),
344
368
  );
345
369
  for (const r of results) {
346
- if (!r.ok) notFound.push(r.id);
370
+ if (r.status === "broken") notFound.push(r.id);
371
+ if (r.status !== "unknown") {
372
+ cacheableResults.push({ modelId: r.id, status: r.status });
373
+ }
347
374
  }
348
375
  }
349
376
 
377
+ recordModelProbeResults(PROVIDER_NVIDIA, cacheableResults);
378
+
350
379
  if (notFound.length === 0) {
351
- _nvidiaLogger.info("Auto-probe: all NVIDIA models are routable");
352
- return;
380
+ _nvidiaLogger.info("Auto-probe: all checked NVIDIA models are routable");
381
+ return [];
353
382
  }
354
383
 
355
384
  // Auto-hide 404 models in config (provider-scoped)
@@ -367,6 +396,7 @@ async function runNvidiaProbe(
367
396
  _nvidiaLogger.info(
368
397
  `Auto-probe: found ${notFound.length} broken models (auto-hidden)`,
369
398
  );
399
+ return notFound;
370
400
  }
371
401
 
372
402
  export default async function nvidiaProvider(pi: ExtensionAPI) {
@@ -391,7 +421,7 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
391
421
  const reRegister = createReRegister(pi, {
392
422
  providerId: PROVIDER_NVIDIA,
393
423
  baseUrl: BASE_URL_NVIDIA,
394
- apiKey: apiKey || "NVIDIA_API_KEY",
424
+ apiKey: apiKey || "$NVIDIA_API_KEY",
395
425
  });
396
426
 
397
427
  // Register with global toggle system
@@ -401,7 +431,7 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
401
431
  const initialModels = allModels;
402
432
  pi.registerProvider(PROVIDER_NVIDIA, {
403
433
  baseUrl: BASE_URL_NVIDIA,
404
- apiKey: apiKey || "NVIDIA_API_KEY",
434
+ apiKey: apiKey || "$NVIDIA_API_KEY",
405
435
  api: "openai-completions" as const,
406
436
  authHeader: true,
407
437
  headers: {
@@ -416,7 +446,9 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
416
446
  if (_autoProbeDone || !apiKey) return;
417
447
  _autoProbeDone = true;
418
448
  _nvidiaLogger.info("Starting lazy auto-probe of NVIDIA models...");
419
- runNvidiaProbe(apiKey, allModels, stored, reRegister).catch((err) => {
449
+ runNvidiaProbe(apiKey, allModels, stored, reRegister, {
450
+ useCache: true,
451
+ }).catch((err) => {
420
452
  _nvidiaLogger.warn("Auto-probe failed", {
421
453
  error: err instanceof Error ? err.message : String(err),
422
454
  });