pi-free 2.0.2 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,415 +1,474 @@
1
- /**
2
- * NVIDIA NIM Provider Extension
3
- *
4
- * Provides access to NVIDIA-hosted large models via integrate.api.nvidia.com.
5
- * All models use NVIDIA's free credit system — requires NVIDIA_API_KEY.
6
- * Get a free key at: https://build.nvidia.com
7
- *
8
- * Small models (< 70B) are filtered out to keep the list focused on useful
9
- * chat/coding models. Non-chat models (embedding, speech-to-text, OCR,
10
- * image-gen) are filtered by their modalities (output must be ["text"],
11
- * input must include "text").
12
- *
13
- * Responds to global free-only filter for free/paid model filtering.
14
- */
15
-
16
- import type {
17
- ExtensionAPI,
18
- ProviderModelConfig,
19
- } from "@mariozechner/pi-coding-agent";
20
- import {
21
- applyHidden,
22
- getNvidiaApiKey,
23
- loadConfigFile,
24
- PROVIDER_NVIDIA,
25
- saveConfig,
26
- } from "../../config.ts";
27
- import {
28
- BASE_URL_NVIDIA,
29
- DEFAULT_FETCH_TIMEOUT_MS,
30
- NVIDIA_MIN_SIZE_B,
31
- URL_MODELS_DEV,
32
- } from "../../constants.ts";
33
- import { registerWithGlobalToggle } from "../../lib/registry.ts";
34
- import type { ModelsDevModel, ModelsDevProvider } from "../../lib/types.ts";
35
- import {
36
- fetchWithRetry,
37
- fetchWithTimeout,
38
- isUsableModel,
39
- } from "../../lib/util.ts";
40
- import { createReRegister, enhanceWithCI } from "../../provider-helper.ts";
41
-
42
- // =============================================================================
43
- // Non-chat model heuristics for models not in models.dev
44
- // =============================================================================
45
-
46
- const NVIDIA_NON_CHAT_PATTERNS: RegExp[] = [
47
- /embed(?!.*instruct)/i,
48
- /whisper/i,
49
- /reward/i,
50
- /ocr(?!.*instruct)/i,
51
- /safety-guard|content-safety|nemoguard/i,
52
- /retriever-parse|nemotron-parse(?!.*instruct)/i,
53
- /detector/i,
54
- /deplot/i,
55
- /nvclip/i,
56
- /vila$/i,
57
- /neva(?!.*instruct)/i,
58
- /translate/i,
59
- /cosmos-reason/i,
60
- /kosmos/i,
61
- /bge-/i,
62
- /arctic-embed/i,
63
- /gliner/i,
64
- /nv-embed/i,
65
- /embedqa/i,
66
- /embedcode/i,
67
- ];
68
-
69
- /**
70
- * Models that appear in NVIDIA's /v1/models but return 404 "Function not found"
71
- * on /v1/chat/completions. These are listed but not actually provisioned for
72
- * hosted chat inference. Community-reported; add new IDs as they surface.
73
- *
74
- * Users can also hide individual models via hidden_models in ~/.pi/free.json.
75
- */
76
- const NVIDIA_KNOWN_404_MODELS: ReadonlySet<string> = new Set([
77
- "01-ai/yi-large",
78
- "adept/fuyu-8b",
79
- "ai21labs/jamba-1.5-large-instruct",
80
- "aisingapore/sea-lion-7b-instruct",
81
- "baai/bge-m3",
82
- "bigcode/starcoder2-15b",
83
- "databricks/dbrx-instruct",
84
- "deepseek-ai/deepseek-coder-6.7b-instruct",
85
- "google/codegemma-1.1-7b",
86
- "google/codegemma-7b",
87
- "google/deplot",
88
- "google/gemma-2b",
89
- "google/recurrentgemma-2b",
90
- "ibm/granite-3.0-3b-a800m-instruct",
91
- "ibm/granite-3.0-8b-instruct",
92
- "ibm/granite-34b-code-instruct",
93
- "ibm/granite-8b-code-instruct",
94
- "meta/codellama-70b",
95
- "meta/llama2-70b",
96
- "microsoft/kosmos-2",
97
- "microsoft/phi-3-vision-128k-instruct",
98
- "microsoft/phi-3.5-moe-instruct",
99
- "mistralai/codestral-22b-instruct-v0.1",
100
- "mistralai/mistral-7b-instruct-v0.3",
101
- "mistralai/mistral-large",
102
- "mistralai/mistral-large-2-instruct",
103
- "mistralai/mixtral-8x22b-v0.1",
104
- "nv-mistralai/mistral-nemo-12b-instruct",
105
- "nvidia/cosmos-reason2-8b",
106
- "nvidia/embed-qa-4",
107
- "nvidia/llama-3.1-nemotron-51b-instruct",
108
- "nvidia/llama-3.1-nemotron-70b-instruct",
109
- "nvidia/llama-3.1-nemotron-ultra-253b-v1",
110
- "nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1",
111
- "nvidia/llama-3.2-nemoretriever-300m-embed-v1",
112
- "nvidia/llama-3.2-nv-embedqa-1b-v1",
113
- "nvidia/llama-3.2-nv-embedqa-1b-v2",
114
- "nvidia/llama-nemotron-embed-1b-v2",
115
- "nvidia/llama-nemotron-embed-vl-1b-v2",
116
- "nvidia/llama3-chatqa-1.5-70b",
117
- "nvidia/mistral-nemo-minitron-8b-8k-instruct",
118
- "nvidia/nemotron-4-340b-instruct",
119
- "nvidia/nemotron-4-340b-reward",
120
- "nvidia/nemotron-nano-3-30b-a3b",
121
- "nvidia/neva-22b",
122
- "nvidia/nv-embed-v1",
123
- "nvidia/nv-embedcode-7b-v1",
124
- "nvidia/nv-embedqa-e5-v5",
125
- "nvidia/nv-embedqa-mistral-7b-v2",
126
- "nvidia/nvclip",
127
- "nvidia/riva-translate-4b-instruct",
128
- "snowflake/arctic-embed-l",
129
- "writer/palmyra-creative-122b",
130
- "writer/palmyra-fin-70b-32k",
131
- "writer/palmyra-med-70b",
132
- "writer/palmyra-med-70b-32k",
133
- "zyphra/zamba2-7b-instruct",
134
- ]);
135
-
136
- /**
137
- * Infer model metadata from a NVIDIA model ID for models not present in
138
- * models.dev. Returns null if the ID matches known non-chat patterns.
139
- */
140
- function inferModelFromId(id: string): ModelsDevModel | null {
141
- for (const pattern of NVIDIA_NON_CHAT_PATTERNS) {
142
- if (pattern.test(id)) return null;
143
- }
144
-
145
- const name = id
146
- .split("/")
147
- .pop()!
148
- .replace(/-/g, " ")
149
- .replace(/\b\w/g, (c) => c.toUpperCase())
150
- .replace(/\b(\d+(?:\.\d+)?)b\b/gi, "$1B");
151
-
152
- const hasVision = /vision|multimodal|vl/i.test(id);
153
- const hasReasoning = /reason|r1|thinking/i.test(id);
154
-
155
- return {
156
- id,
157
- name,
158
- reasoning: hasReasoning,
159
- limit: { context: 128_000, output: 4096 },
160
- modalities: {
161
- input: hasVision ? ["text", "image"] : ["text"],
162
- output: ["text"],
163
- },
164
- cost: { input: 0, output: 0 },
165
- };
166
- }
167
-
168
- // =============================================================================
169
- // Fetch + map
170
- // =============================================================================
171
-
172
- async function fetchNvidiaModels(
173
- apiKey?: string,
174
- ): Promise<ProviderModelConfig[]> {
175
- // ── 1. Query NVIDIA's actual API (source of truth) ─────────────────
176
- let apiModelIds = new Set<string>();
177
- if (apiKey) {
178
- try {
179
- const response = await fetchWithRetry(
180
- `${BASE_URL_NVIDIA}/models`,
181
- {
182
- headers: {
183
- Authorization: `Bearer ${apiKey}`,
184
- "User-Agent": "pi-free-providers",
185
- },
186
- },
187
- 3,
188
- 1000,
189
- DEFAULT_FETCH_TIMEOUT_MS,
190
- );
191
- if (response.ok) {
192
- const json = (await response.json()) as {
193
- data?: Array<{ id: string }>;
194
- };
195
- if (json.data) {
196
- apiModelIds = new Set(json.data.map((m) => m.id));
197
- }
198
- }
199
- } catch (error) {
200
- console.error("[nvidia] Failed to fetch models from NVIDIA API", error);
201
- }
202
- }
203
-
204
- // ── 2. Fetch models.dev for rich metadata (cost, limits, etc.) ─────
205
- const devModels = new Map<string, ModelsDevModel>();
206
- try {
207
- const response = await fetchWithRetry(
208
- URL_MODELS_DEV,
209
- {
210
- headers: { "User-Agent": "pi-free-providers" },
211
- },
212
- 3,
213
- 1000,
214
- DEFAULT_FETCH_TIMEOUT_MS,
215
- );
216
- if (response.ok) {
217
- const json = (await response.json()) as Record<string, ModelsDevProvider>;
218
- const provider = Object.values(json).find((p) => p?.id === "nvidia");
219
- if (provider?.models) {
220
- for (const m of Object.values(provider.models)) {
221
- devModels.set(m.id, m);
222
- }
223
- }
224
- }
225
- } catch (error) {
226
- console.error("[nvidia] Failed to fetch models.dev", error);
227
- }
228
-
229
- // ── 3. Build unified list (NVIDIA API wins; fallback to models.dev) ─
230
- const modelIds =
231
- apiModelIds.size > 0 ? [...apiModelIds] : [...devModels.keys()];
232
-
233
- const result = applyHidden(
234
- modelIds
235
- .map((id) => {
236
- const dev = devModels.get(id);
237
- if (dev) return dev;
238
- return inferModelFromId(id);
239
- })
240
- .filter((m): m is ModelsDevModel => m !== null)
241
- .filter((m) => isUsableModel(m.id, NVIDIA_MIN_SIZE_B))
242
- .filter((m) => {
243
- const modalities = m.modalities;
244
- if (modalities) {
245
- const output = modalities.output ?? [];
246
- const input = modalities.input ?? [];
247
- if (!output.includes("text")) return false;
248
- if (!input.includes("text")) return false;
249
- }
250
- return true;
251
- })
252
- // Filter out known 404 models (listed but not provisioned for chat)
253
- .filter((m) => {
254
- if (NVIDIA_KNOWN_404_MODELS.has(m.id)) {
255
- return false;
256
- }
257
- return true;
258
- })
259
- // NVIDIA is freemium — all models are usable with free credits.
260
- // No cost filtering applied.
261
- .map(
262
- (m): ProviderModelConfig => ({
263
- id: m.id,
264
- name: m.name,
265
- reasoning: m.reasoning,
266
- input: m.modalities?.input?.includes("image")
267
- ? ["text", "image"]
268
- : ["text"],
269
- cost: {
270
- input: m.cost?.input ?? 0,
271
- output: m.cost?.output ?? 0,
272
- cacheRead: m.cost?.cache_read ?? 0,
273
- cacheWrite: m.cost?.cache_write ?? 0,
274
- },
275
- contextWindow: m.limit.context,
276
- maxTokens: m.limit.output,
277
- }),
278
- ),
279
- PROVIDER_NVIDIA,
280
- );
281
-
282
- return result;
283
- }
284
-
285
- // =============================================================================
286
- // Extension Entry Point
287
- // =============================================================================
288
-
289
- /**
290
- * Probe a single NVIDIA model with a minimal chat request.
291
- * Returns true if the model is routable (not 404), false if it 404s.
292
- */
293
- async function probeNvidiaModel(
294
- apiKey: string,
295
- modelId: string,
296
- ): Promise<boolean> {
297
- try {
298
- const response = await fetchWithTimeout(
299
- `${BASE_URL_NVIDIA}/chat/completions`,
300
- {
301
- method: "POST",
302
- headers: {
303
- Authorization: `Bearer ${apiKey}`,
304
- "Content-Type": "application/json",
305
- "User-Agent": "pi-free-providers",
306
- },
307
- body: JSON.stringify({
308
- model: modelId,
309
- messages: [{ role: "user", content: "hi" }],
310
- max_tokens: 1,
311
- }),
312
- },
313
- 10000, // 10 second timeout
314
- );
315
- // 404 = function not found (model not provisioned)
316
- // 200/400/401/etc = at least routable
317
- return response.status !== 404;
318
- } catch {
319
- return true; // Network errors / timeouts are not "model not found"
320
- }
321
- }
322
-
323
- export default async function (pi: ExtensionAPI) {
324
- const apiKey = getNvidiaApiKey();
325
- const hasKey = !!apiKey;
326
-
327
- let allModels: ProviderModelConfig[] = [];
328
-
329
- try {
330
- allModels = await fetchNvidiaModels(apiKey);
331
- } catch (error) {
332
- console.error("[nvidia] Failed to fetch models at startup", error);
333
- return;
334
- }
335
-
336
- // Store both sets for global toggle (same list — NVIDIA is freemium)
337
- const stored = { free: allModels, all: allModels };
338
-
339
- // Create re-register function
340
- const reRegister = createReRegister(pi, {
341
- providerId: PROVIDER_NVIDIA,
342
- baseUrl: BASE_URL_NVIDIA,
343
- apiKey: apiKey || "NVIDIA_API_KEY",
344
- });
345
-
346
- // Register with global toggle system
347
- registerWithGlobalToggle(PROVIDER_NVIDIA, stored, reRegister, hasKey);
348
-
349
- // Register initial models (global toggle will apply filter if needed)
350
- const initialModels = allModels;
351
- pi.registerProvider(PROVIDER_NVIDIA, {
352
- baseUrl: BASE_URL_NVIDIA,
353
- apiKey: apiKey || "NVIDIA_API_KEY",
354
- api: "openai-completions" as const,
355
- authHeader: true,
356
- headers: {
357
- "User-Agent": "pi-free-providers",
358
- },
359
- models: enhanceWithCI(initialModels),
360
- });
361
-
362
- // ── Probe command: test all registered models for 404s ─────────────
363
- pi.registerCommand("probe-nvidia", {
364
- description: "Test all NVIDIA models for 404 'Function not found' errors",
365
- handler: async (_args, ctx) => {
366
- if (!apiKey) {
367
- ctx.ui.notify("NVIDIA_API_KEY not set", "error");
368
- return;
369
- }
370
-
371
- const modelsToTest = allModels;
372
- ctx.ui.notify(`Probing ${modelsToTest.length} NVIDIA models…`, "info");
373
-
374
- const notFound: string[] = [];
375
- const batchSize = 5;
376
-
377
- for (let i = 0; i < modelsToTest.length; i += batchSize) {
378
- const batch = modelsToTest.slice(i, i + batchSize);
379
- const results = await Promise.all(
380
- batch.map(async (m) => {
381
- const ok = await probeNvidiaModel(apiKey, m.id);
382
- return { id: m.id, ok };
383
- }),
384
- );
385
- for (const r of results) {
386
- if (!r.ok) notFound.push(r.id);
387
- }
388
- }
389
-
390
- if (notFound.length === 0) {
391
- ctx.ui.notify("All NVIDIA models are routable ✅", "info");
392
- return;
393
- }
394
-
395
- // Auto-hide 404 models in config (provider-scoped)
396
- const config = loadConfigFile();
397
- const existingHidden = new Set(config.hidden_models ?? []);
398
- for (const id of notFound) existingHidden.add(`${PROVIDER_NVIDIA}/${id}`);
399
- saveConfig({ hidden_models: Array.from(existingHidden) });
400
-
401
- // Re-register so hidden models disappear immediately
402
- const filtered = await fetchNvidiaModels(apiKey);
403
- stored.free = filtered;
404
- stored.all = filtered;
405
- reRegister(filtered);
406
-
407
- ctx.ui.notify(
408
- `Found ${notFound.length} broken models (auto-hidden):\n${notFound.join("\n")}`,
409
- "warning",
410
- );
411
- },
412
- });
413
-
414
- // Registration complete - models registered silently (use LOG_LEVEL=info to see details)
415
- }
1
+ /**
2
+ * NVIDIA NIM Provider Extension
3
+ *
4
+ * Provides access to NVIDIA-hosted large models via integrate.api.nvidia.com.
5
+ * All models use NVIDIA's free credit system — requires NVIDIA_API_KEY.
6
+ * Get a free key at: https://build.nvidia.com
7
+ *
8
+ * Small models (< 70B) are filtered out to keep the list focused on useful
9
+ * chat/coding models. Non-chat models (embedding, speech-to-text, OCR,
10
+ * image-gen) are filtered by their modalities (output must be ["text"],
11
+ * input must include "text").
12
+ *
13
+ * Responds to global free-only filter for free/paid model filtering.
14
+ */
15
+
16
+ import type {
17
+ ExtensionAPI,
18
+ ProviderModelConfig,
19
+ } from "@mariozechner/pi-coding-agent";
20
+ import {
21
+ applyHidden,
22
+ getNvidiaApiKey,
23
+ loadConfigFile,
24
+ PROVIDER_NVIDIA,
25
+ saveConfig,
26
+ } from "../../config.ts";
27
+ import {
28
+ BASE_URL_NVIDIA,
29
+ DEFAULT_FETCH_TIMEOUT_MS,
30
+ NVIDIA_MIN_SIZE_B,
31
+ URL_MODELS_DEV,
32
+ } from "../../constants.ts";
33
+ import { createLogger } from "../../lib/logger.ts";
34
+ import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
35
+ import type { ModelsDevModel, ModelsDevProvider } from "../../lib/types.ts";
36
+ import {
37
+ fetchWithRetry,
38
+ fetchWithTimeout,
39
+ isUsableModel,
40
+ } from "../../lib/util.ts";
41
+ import { createReRegister, enhanceWithCI } from "../../provider-helper.ts";
42
+
43
+ // =============================================================================
44
+ // Non-chat model heuristics for models not in models.dev
45
+ // =============================================================================
46
+
47
+ const NVIDIA_NON_CHAT_PATTERNS: RegExp[] = [
48
+ /embed(?!.*instruct)/i,
49
+ /whisper/i,
50
+ /reward/i,
51
+ /ocr(?!.*instruct)/i,
52
+ /safety-guard|content-safety|nemoguard/i,
53
+ /retriever-parse|nemotron-parse(?!.*instruct)/i,
54
+ /detector/i,
55
+ /deplot/i,
56
+ /nvclip/i,
57
+ /vila$/i,
58
+ /neva(?!.*instruct)/i,
59
+ /translate/i,
60
+ /cosmos-reason/i,
61
+ /kosmos/i,
62
+ /bge-/i,
63
+ /arctic-embed/i,
64
+ /gliner/i,
65
+ /nv-embed/i,
66
+ /embedqa/i,
67
+ /embedcode/i,
68
+ ];
69
+
70
+ /**
71
+ * Models that appear in NVIDIA's /v1/models but return 404 "Function not found"
72
+ * on /v1/chat/completions. These are listed but not actually provisioned for
73
+ * hosted chat inference. Community-reported; add new IDs as they surface.
74
+ *
75
+ * Users can also hide individual models via hidden_models in ~/.pi/free.json.
76
+ */
77
+ const NVIDIA_KNOWN_404_MODELS: ReadonlySet<string> = new Set([
78
+ "01-ai/yi-large",
79
+ "adept/fuyu-8b",
80
+ "ai21labs/jamba-1.5-large-instruct",
81
+ "aisingapore/sea-lion-7b-instruct",
82
+ "baai/bge-m3",
83
+ "bigcode/starcoder2-15b",
84
+ "databricks/dbrx-instruct",
85
+ "deepseek-ai/deepseek-coder-6.7b-instruct",
86
+ "google/codegemma-1.1-7b",
87
+ "google/codegemma-7b",
88
+ "google/deplot",
89
+ "google/gemma-2b",
90
+ "google/recurrentgemma-2b",
91
+ "ibm/granite-3.0-3b-a800m-instruct",
92
+ "ibm/granite-3.0-8b-instruct",
93
+ "ibm/granite-34b-code-instruct",
94
+ "ibm/granite-8b-code-instruct",
95
+ "meta/codellama-70b",
96
+ "meta/llama2-70b",
97
+ "microsoft/kosmos-2",
98
+ "microsoft/phi-3-vision-128k-instruct",
99
+ "microsoft/phi-3.5-moe-instruct",
100
+ "mistralai/codestral-22b-instruct-v0.1",
101
+ "mistralai/mistral-7b-instruct-v0.3",
102
+ "mistralai/mistral-large",
103
+ "mistralai/mistral-large-2-instruct",
104
+ "mistralai/mixtral-8x22b-v0.1",
105
+ "nv-mistralai/mistral-nemo-12b-instruct",
106
+ "nvidia/cosmos-reason2-8b",
107
+ "nvidia/embed-qa-4",
108
+ "nvidia/llama-3.1-nemotron-51b-instruct",
109
+ "nvidia/llama-3.1-nemotron-70b-instruct",
110
+ "nvidia/llama-3.1-nemotron-ultra-253b-v1",
111
+ "nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1",
112
+ "nvidia/llama-3.2-nemoretriever-300m-embed-v1",
113
+ "nvidia/llama-3.2-nv-embedqa-1b-v1",
114
+ "nvidia/llama-3.2-nv-embedqa-1b-v2",
115
+ "nvidia/llama-nemotron-embed-1b-v2",
116
+ "nvidia/llama-nemotron-embed-vl-1b-v2",
117
+ "nvidia/llama3-chatqa-1.5-70b",
118
+ "nvidia/mistral-nemo-minitron-8b-8k-instruct",
119
+ "nvidia/nemotron-4-340b-instruct",
120
+ "nvidia/nemotron-4-340b-reward",
121
+ "nvidia/nemotron-nano-3-30b-a3b",
122
+ "nvidia/neva-22b",
123
+ "nvidia/nv-embed-v1",
124
+ "nvidia/nv-embedcode-7b-v1",
125
+ "nvidia/nv-embedqa-e5-v5",
126
+ "nvidia/nv-embedqa-mistral-7b-v2",
127
+ "nvidia/nvclip",
128
+ "nvidia/riva-translate-4b-instruct",
129
+ "snowflake/arctic-embed-l",
130
+ "writer/palmyra-creative-122b",
131
+ "writer/palmyra-fin-70b-32k",
132
+ "writer/palmyra-med-70b",
133
+ "writer/palmyra-med-70b-32k",
134
+ "zyphra/zamba2-7b-instruct",
135
+ ]);
136
+
137
+ /**
138
+ * Infer model metadata from a NVIDIA model ID for models not present in
139
+ * models.dev. Returns null if the ID matches known non-chat patterns.
140
+ */
141
+ function inferModelFromId(id: string): ModelsDevModel | null {
142
+ for (const pattern of NVIDIA_NON_CHAT_PATTERNS) {
143
+ if (pattern.test(id)) return null;
144
+ }
145
+
146
+ const name = id
147
+ .split("/")
148
+ .pop()!
149
+ .replace(/-/g, " ")
150
+ .replace(/\b\w/g, (c) => c.toUpperCase())
151
+ .replace(/\b(\d+(?:\.\d+)?)b\b/gi, "$1B");
152
+
153
+ const hasVision = /vision|multimodal|vl/i.test(id);
154
+ const hasReasoning = /reason|r1|thinking/i.test(id);
155
+
156
+ return {
157
+ id,
158
+ name,
159
+ reasoning: hasReasoning,
160
+ limit: { context: 128_000, output: 4096 },
161
+ modalities: {
162
+ input: hasVision ? ["text", "image"] : ["text"],
163
+ output: ["text"],
164
+ },
165
+ cost: { input: 0, output: 0 },
166
+ };
167
+ }
168
+
169
+ // =============================================================================
170
+ // Fetch + map
171
+ // =============================================================================
172
+
173
+ async function fetchNvidiaApiModelIds(apiKey: string): Promise<Set<string>> {
174
+ try {
175
+ const response = await fetchWithRetry(
176
+ `${BASE_URL_NVIDIA}/models`,
177
+ {
178
+ headers: {
179
+ Authorization: `Bearer ${apiKey}`,
180
+ "User-Agent": "pi-free-providers",
181
+ },
182
+ },
183
+ 3,
184
+ 1000,
185
+ DEFAULT_FETCH_TIMEOUT_MS,
186
+ );
187
+ if (response.ok) {
188
+ const json = (await response.json()) as {
189
+ data?: Array<{ id: string }>;
190
+ };
191
+ if (json.data) {
192
+ return new Set(json.data.map((m) => m.id));
193
+ }
194
+ }
195
+ } catch (error) {
196
+ console.error("[nvidia] Failed to fetch models from NVIDIA API", error);
197
+ }
198
+ return new Set();
199
+ }
200
+
201
+ async function fetchModelsDevMetadata(): Promise<Map<string, ModelsDevModel>> {
202
+ const devModels = new Map<string, ModelsDevModel>();
203
+ try {
204
+ const response = await fetchWithRetry(
205
+ URL_MODELS_DEV,
206
+ {
207
+ headers: { "User-Agent": "pi-free-providers" },
208
+ },
209
+ 3,
210
+ 1000,
211
+ DEFAULT_FETCH_TIMEOUT_MS,
212
+ );
213
+ if (response.ok) {
214
+ const json = (await response.json()) as Record<string, ModelsDevProvider>;
215
+ const provider = Object.values(json).find((p) => p?.id === "nvidia");
216
+ if (provider?.models) {
217
+ for (const m of Object.values(provider.models)) {
218
+ devModels.set(m.id, m);
219
+ }
220
+ }
221
+ }
222
+ } catch (error) {
223
+ console.error("[nvidia] Failed to fetch models.dev", error);
224
+ }
225
+ return devModels;
226
+ }
227
+
228
+ function isChatModel(m: ModelsDevModel): boolean {
229
+ const modalities = m.modalities;
230
+ if (!modalities) return true;
231
+ const output = modalities.output ?? [];
232
+ const input = modalities.input ?? [];
233
+ return output.includes("text") && input.includes("text");
234
+ }
235
+
236
+ async function fetchNvidiaModels(
237
+ apiKey?: string,
238
+ ): Promise<ProviderModelConfig[]> {
239
+ // ── 1. Query NVIDIA's actual API (source of truth) ─────────────────
240
+ const apiModelIds = apiKey
241
+ ? await fetchNvidiaApiModelIds(apiKey)
242
+ : new Set<string>();
243
+
244
+ // ── 2. Fetch models.dev for rich metadata (cost, limits, etc.) ─────
245
+ const devModels = await fetchModelsDevMetadata();
246
+
247
+ // ── 3. Build unified list (NVIDIA API wins; fallback to models.dev) ─
248
+ const modelIds =
249
+ apiModelIds.size > 0 ? [...apiModelIds] : [...devModels.keys()];
250
+
251
+ const result = applyHidden(
252
+ modelIds
253
+ .map((id) => devModels.get(id) ?? inferModelFromId(id))
254
+ .filter((m): m is ModelsDevModel => m !== null)
255
+ .filter((m) => isUsableModel(m.id, NVIDIA_MIN_SIZE_B))
256
+ .filter(isChatModel)
257
+ .filter((m) => !NVIDIA_KNOWN_404_MODELS.has(m.id))
258
+ // NVIDIA is freemium — all models are usable with free credits.
259
+ // No cost filtering applied.
260
+ .map(
261
+ (m): ProviderModelConfig => ({
262
+ id: m.id,
263
+ name: m.name,
264
+ reasoning: m.reasoning,
265
+ input: m.modalities?.input?.includes("image")
266
+ ? ["text", "image"]
267
+ : ["text"],
268
+ cost: {
269
+ input: m.cost?.input ?? 0,
270
+ output: m.cost?.output ?? 0,
271
+ cacheRead: m.cost?.cache_read ?? 0,
272
+ cacheWrite: m.cost?.cache_write ?? 0,
273
+ },
274
+ contextWindow: m.limit.context,
275
+ maxTokens: m.limit.output,
276
+ }),
277
+ ),
278
+ PROVIDER_NVIDIA,
279
+ );
280
+
281
+ return result;
282
+ }
283
+
284
+ // =============================================================================
285
+ // Extension Entry Point
286
+ // =============================================================================
287
+
288
+ /**
289
+ * Probe a single NVIDIA model with a minimal chat request.
290
+ * Returns true if the model is routable (not 404), false if it 404s.
291
+ */
292
+ async function probeNvidiaModel(
293
+ apiKey: string,
294
+ modelId: string,
295
+ ): Promise<boolean> {
296
+ try {
297
+ const response = await fetchWithTimeout(
298
+ `${BASE_URL_NVIDIA}/chat/completions`,
299
+ {
300
+ method: "POST",
301
+ headers: {
302
+ Authorization: `Bearer ${apiKey}`,
303
+ "Content-Type": "application/json",
304
+ "User-Agent": "pi-free-providers",
305
+ },
306
+ body: JSON.stringify({
307
+ model: modelId,
308
+ messages: [{ role: "user", content: "hi" }],
309
+ max_tokens: 1,
310
+ }),
311
+ },
312
+ 10000, // 10 second timeout
313
+ );
314
+ // 404 = function not found (model not provisioned)
315
+ // 200/400/401/etc = at least routable
316
+ return response.status !== 404;
317
+ } catch {
318
+ return true; // Network errors / timeouts are not "model not found"
319
+ }
320
+ }
321
+
322
+ const _nvidiaLogger = createLogger("nvidia");
323
+
324
+ /**
325
+ * Run probe on a list of models and auto-hide 404s.
326
+ * Shared between the /probe-nvidia command and auto-probe on session_start.
327
+ */
328
+ async function runNvidiaProbe(
329
+ apiKey: string,
330
+ modelsToTest: ProviderModelConfig[],
331
+ stored: { free: ProviderModelConfig[]; all: ProviderModelConfig[] },
332
+ reRegister: (models: ProviderModelConfig[]) => void,
333
+ ): Promise<void> {
334
+ const notFound: string[] = [];
335
+ const batchSize = 5;
336
+
337
+ for (let i = 0; i < modelsToTest.length; i += batchSize) {
338
+ const batch = modelsToTest.slice(i, i + batchSize);
339
+ const results = await Promise.all(
340
+ batch.map(async (m) => {
341
+ const ok = await probeNvidiaModel(apiKey, m.id);
342
+ return { id: m.id, ok };
343
+ }),
344
+ );
345
+ for (const r of results) {
346
+ if (!r.ok) notFound.push(r.id);
347
+ }
348
+ }
349
+
350
+ if (notFound.length === 0) {
351
+ _nvidiaLogger.info("Auto-probe: all NVIDIA models are routable");
352
+ return;
353
+ }
354
+
355
+ // Auto-hide 404 models in config (provider-scoped)
356
+ const cfg = loadConfigFile();
357
+ const existingHidden = new Set(cfg.hidden_models ?? []);
358
+ for (const id of notFound) existingHidden.add(`${PROVIDER_NVIDIA}/${id}`);
359
+ saveConfig({ hidden_models: Array.from(existingHidden) });
360
+
361
+ // Re-register so hidden models disappear immediately
362
+ const filtered = await fetchNvidiaModels(apiKey);
363
+ stored.free = filtered;
364
+ stored.all = filtered;
365
+ reRegister(filtered);
366
+
367
+ _nvidiaLogger.info(
368
+ `Auto-probe: found ${notFound.length} broken models (auto-hidden)`,
369
+ );
370
+ }
371
+
372
+ export default async function (pi: ExtensionAPI) {
373
+ const apiKey = getNvidiaApiKey();
374
+ const hasKey = !!apiKey;
375
+
376
+ let allModels: ProviderModelConfig[] = [];
377
+
378
+ try {
379
+ allModels = await fetchNvidiaModels(apiKey);
380
+ } catch (error) {
381
+ console.error("[nvidia] Failed to fetch models at startup", error);
382
+ return;
383
+ }
384
+
385
+ // Store both sets for global toggle using consistent isFreeModel helper
386
+ // NVIDIA uses Route B (name-based): only models with "free" in name are marked free
387
+ const freeModels = allModels.filter((m) =>
388
+ isFreeModel({ ...m, provider: PROVIDER_NVIDIA }),
389
+ );
390
+ const stored = { free: freeModels, all: allModels };
391
+
392
+ // Create re-register function
393
+ const reRegister = createReRegister(pi, {
394
+ providerId: PROVIDER_NVIDIA,
395
+ baseUrl: BASE_URL_NVIDIA,
396
+ apiKey: apiKey || "NVIDIA_API_KEY",
397
+ });
398
+
399
+ // Register with global toggle system
400
+ registerWithGlobalToggle(PROVIDER_NVIDIA, stored, reRegister, hasKey);
401
+
402
+ // Register initial models (global toggle will apply filter if needed)
403
+ const initialModels = allModels;
404
+ pi.registerProvider(PROVIDER_NVIDIA, {
405
+ baseUrl: BASE_URL_NVIDIA,
406
+ apiKey: apiKey || "NVIDIA_API_KEY",
407
+ api: "openai-completions" as const,
408
+ authHeader: true,
409
+ headers: {
410
+ "User-Agent": "pi-free-providers",
411
+ },
412
+ models: enhanceWithCI(initialModels),
413
+ });
414
+
415
+ // ── Lazy auto-probe on first session_start ──────────────────────
416
+ let _autoProbeDone = false;
417
+ pi.on("session_start", async () => {
418
+ if (_autoProbeDone || !apiKey) return;
419
+ _autoProbeDone = true;
420
+ _nvidiaLogger.info("Starting lazy auto-probe of NVIDIA models...");
421
+ runNvidiaProbe(apiKey, allModels, stored, reRegister).catch((err) => {
422
+ _nvidiaLogger.warn("Auto-probe failed", {
423
+ error: err instanceof Error ? err.message : String(err),
424
+ });
425
+ });
426
+ });
427
+
428
+ // ── Probe command: test all registered models for 404s ─────────────
429
+ pi.registerCommand("probe-nvidia", {
430
+ description: "Test all NVIDIA models for 404 'Function not found' errors",
431
+ handler: async (_args, ctx) => {
432
+ if (!apiKey) {
433
+ ctx.ui.notify("NVIDIA_API_KEY not set", "error");
434
+ return;
435
+ }
436
+
437
+ const modelsToTest = allModels;
438
+ ctx.ui.notify(`Probing ${modelsToTest.length} NVIDIA models…`, "info");
439
+
440
+ await runNvidiaProbe(apiKey, modelsToTest, stored, reRegister);
441
+
442
+ // Check if any were hidden (re-read config)
443
+ const cfgAfter = loadConfigFile();
444
+ const newHidden = (cfgAfter.hidden_models ?? []).filter((h) =>
445
+ h.startsWith(`${PROVIDER_NVIDIA}/`),
446
+ );
447
+ if (newHidden.length > 0) {
448
+ ctx.ui.notify(
449
+ `Found ${newHidden.length} broken models (auto-hidden):\n${newHidden.join("\n")}`,
450
+ "warning",
451
+ );
452
+ } else {
453
+ ctx.ui.notify("All NVIDIA models are routable ✅", "info");
454
+ }
455
+ },
456
+ });
457
+
458
+ // ── Status bar for provider selection ─────────────────────────
459
+
460
+ pi.on("model_select", (_event, ctx) => {
461
+ if (_event.model?.provider !== PROVIDER_NVIDIA) {
462
+ ctx.ui.setStatus(`${PROVIDER_NVIDIA}-status`, undefined);
463
+ return;
464
+ }
465
+
466
+ const count = allModels.length;
467
+ ctx.ui.setStatus(
468
+ `${PROVIDER_NVIDIA}-status`,
469
+ `nvidia: ${count} models (freemium)`,
470
+ );
471
+ });
472
+
473
+ // Registration complete - models registered silently (use LOG_LEVEL=info to see details)
474
+ }