pi-free 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,8 +20,9 @@ import type {
20
20
  import {
21
21
  applyHidden,
22
22
  getNvidiaApiKey,
23
- getNvidiaShowPaid,
23
+ loadConfigFile,
24
24
  PROVIDER_NVIDIA,
25
+ saveConfig,
25
26
  } from "../../config.ts";
26
27
  import {
27
28
  BASE_URL_NVIDIA,
@@ -30,43 +31,215 @@ import {
30
31
  URL_MODELS_DEV,
31
32
  } from "../../constants.ts";
32
33
  import { registerWithGlobalToggle } from "../../lib/registry.ts";
33
- import type { ModelsDevProvider } from "../../lib/types.ts";
34
- import { fetchWithRetry, isUsableModel } from "../../lib/util.ts";
34
+ import type { ModelsDevModel, ModelsDevProvider } from "../../lib/types.ts";
35
+ import {
36
+ fetchWithRetry,
37
+ fetchWithTimeout,
38
+ isUsableModel,
39
+ } from "../../lib/util.ts";
35
40
  import { createReRegister, enhanceWithCI } from "../../provider-helper.ts";
36
41
 
42
+ // =============================================================================
43
+ // Non-chat model heuristics for models not in models.dev
44
+ // =============================================================================
45
+
46
+ const NVIDIA_NON_CHAT_PATTERNS: RegExp[] = [
47
+ /embed(?!.*instruct)/i,
48
+ /whisper/i,
49
+ /reward/i,
50
+ /ocr(?!.*instruct)/i,
51
+ /safety-guard|content-safety|nemoguard/i,
52
+ /retriever-parse|nemotron-parse(?!.*instruct)/i,
53
+ /detector/i,
54
+ /deplot/i,
55
+ /nvclip/i,
56
+ /vila$/i,
57
+ /neva(?!.*instruct)/i,
58
+ /translate/i,
59
+ /cosmos-reason/i,
60
+ /kosmos/i,
61
+ /bge-/i,
62
+ /arctic-embed/i,
63
+ /gliner/i,
64
+ /nv-embed/i,
65
+ /embedqa/i,
66
+ /embedcode/i,
67
+ ];
68
+
69
+ /**
70
+ * Models that appear in NVIDIA's /v1/models but return 404 "Function not found"
71
+ * on /v1/chat/completions. These are listed but not actually provisioned for
72
+ * hosted chat inference. Community-reported; add new IDs as they surface.
73
+ *
74
+ * Users can also hide individual models via hidden_models in ~/.pi/free.json.
75
+ */
76
+ const NVIDIA_KNOWN_404_MODELS: ReadonlySet<string> = new Set([
77
+ "01-ai/yi-large",
78
+ "adept/fuyu-8b",
79
+ "ai21labs/jamba-1.5-large-instruct",
80
+ "aisingapore/sea-lion-7b-instruct",
81
+ "baai/bge-m3",
82
+ "bigcode/starcoder2-15b",
83
+ "databricks/dbrx-instruct",
84
+ "deepseek-ai/deepseek-coder-6.7b-instruct",
85
+ "google/codegemma-1.1-7b",
86
+ "google/codegemma-7b",
87
+ "google/deplot",
88
+ "google/gemma-2b",
89
+ "google/recurrentgemma-2b",
90
+ "ibm/granite-3.0-3b-a800m-instruct",
91
+ "ibm/granite-3.0-8b-instruct",
92
+ "ibm/granite-34b-code-instruct",
93
+ "ibm/granite-8b-code-instruct",
94
+ "meta/codellama-70b",
95
+ "meta/llama2-70b",
96
+ "microsoft/kosmos-2",
97
+ "microsoft/phi-3-vision-128k-instruct",
98
+ "microsoft/phi-3.5-moe-instruct",
99
+ "mistralai/codestral-22b-instruct-v0.1",
100
+ "mistralai/mistral-7b-instruct-v0.3",
101
+ "mistralai/mistral-large",
102
+ "mistralai/mistral-large-2-instruct",
103
+ "mistralai/mixtral-8x22b-v0.1",
104
+ "nv-mistralai/mistral-nemo-12b-instruct",
105
+ "nvidia/cosmos-reason2-8b",
106
+ "nvidia/embed-qa-4",
107
+ "nvidia/llama-3.1-nemotron-51b-instruct",
108
+ "nvidia/llama-3.1-nemotron-70b-instruct",
109
+ "nvidia/llama-3.1-nemotron-ultra-253b-v1",
110
+ "nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1",
111
+ "nvidia/llama-3.2-nemoretriever-300m-embed-v1",
112
+ "nvidia/llama-3.2-nv-embedqa-1b-v1",
113
+ "nvidia/llama-3.2-nv-embedqa-1b-v2",
114
+ "nvidia/llama-nemotron-embed-1b-v2",
115
+ "nvidia/llama-nemotron-embed-vl-1b-v2",
116
+ "nvidia/llama3-chatqa-1.5-70b",
117
+ "nvidia/mistral-nemo-minitron-8b-8k-instruct",
118
+ "nvidia/nemotron-4-340b-instruct",
119
+ "nvidia/nemotron-4-340b-reward",
120
+ "nvidia/nemotron-nano-3-30b-a3b",
121
+ "nvidia/neva-22b",
122
+ "nvidia/nv-embed-v1",
123
+ "nvidia/nv-embedcode-7b-v1",
124
+ "nvidia/nv-embedqa-e5-v5",
125
+ "nvidia/nv-embedqa-mistral-7b-v2",
126
+ "nvidia/nvclip",
127
+ "nvidia/riva-translate-4b-instruct",
128
+ "snowflake/arctic-embed-l",
129
+ "writer/palmyra-creative-122b",
130
+ "writer/palmyra-fin-70b-32k",
131
+ "writer/palmyra-med-70b",
132
+ "writer/palmyra-med-70b-32k",
133
+ "zyphra/zamba2-7b-instruct",
134
+ ]);
135
+
136
+ /**
137
+ * Infer model metadata from a NVIDIA model ID for models not present in
138
+ * models.dev. Returns null if the ID matches known non-chat patterns.
139
+ */
140
+ function inferModelFromId(id: string): ModelsDevModel | null {
141
+ for (const pattern of NVIDIA_NON_CHAT_PATTERNS) {
142
+ if (pattern.test(id)) return null;
143
+ }
144
+
145
+ const name = id
146
+ .split("/")
147
+ .pop()!
148
+ .replace(/-/g, " ")
149
+ .replace(/\b\w/g, (c) => c.toUpperCase())
150
+ .replace(/\b(\d+(?:\.\d+)?)b\b/gi, "$1B");
151
+
152
+ const hasVision = /vision|multimodal|vl/i.test(id);
153
+ const hasReasoning = /reason|r1|thinking/i.test(id);
154
+
155
+ return {
156
+ id,
157
+ name,
158
+ reasoning: hasReasoning,
159
+ limit: { context: 128_000, output: 4096 },
160
+ modalities: {
161
+ input: hasVision ? ["text", "image"] : ["text"],
162
+ output: ["text"],
163
+ },
164
+ cost: { input: 0, output: 0 },
165
+ };
166
+ }
167
+
37
168
  // =============================================================================
38
169
  // Fetch + map
39
170
  // =============================================================================
40
171
 
41
172
  async function fetchNvidiaModels(
42
- showPaid = false,
173
+ apiKey?: string,
43
174
  ): Promise<ProviderModelConfig[]> {
44
- const response = await fetchWithRetry(
45
- URL_MODELS_DEV,
46
- {
47
- headers: { "User-Agent": "pi-free-providers" },
48
- },
49
- 3,
50
- 1000,
51
- DEFAULT_FETCH_TIMEOUT_MS,
52
- );
175
+ // ── 1. Query NVIDIA's actual API (source of truth) ─────────────────
176
+ let apiModelIds = new Set<string>();
177
+ if (apiKey) {
178
+ try {
179
+ const response = await fetchWithRetry(
180
+ `${BASE_URL_NVIDIA}/models`,
181
+ {
182
+ headers: {
183
+ Authorization: `Bearer ${apiKey}`,
184
+ "User-Agent": "pi-free-providers",
185
+ },
186
+ },
187
+ 3,
188
+ 1000,
189
+ DEFAULT_FETCH_TIMEOUT_MS,
190
+ );
191
+ if (response.ok) {
192
+ const json = (await response.json()) as {
193
+ data?: Array<{ id: string }>;
194
+ };
195
+ if (json.data) {
196
+ apiModelIds = new Set(json.data.map((m) => m.id));
197
+ }
198
+ }
199
+ } catch (error) {
200
+ console.error("[nvidia] Failed to fetch models from NVIDIA API", error);
201
+ }
202
+ }
53
203
 
54
- if (!response.ok) {
55
- throw new Error(
56
- `Failed to fetch models.dev: ${response.status} ${response.statusText}`,
204
+ // ── 2. Fetch models.dev for rich metadata (cost, limits, etc.) ─────
205
+ const devModels = new Map<string, ModelsDevModel>();
206
+ try {
207
+ const response = await fetchWithRetry(
208
+ URL_MODELS_DEV,
209
+ {
210
+ headers: { "User-Agent": "pi-free-providers" },
211
+ },
212
+ 3,
213
+ 1000,
214
+ DEFAULT_FETCH_TIMEOUT_MS,
57
215
  );
216
+ if (response.ok) {
217
+ const json = (await response.json()) as Record<string, ModelsDevProvider>;
218
+ const provider = Object.values(json).find((p) => p?.id === "nvidia");
219
+ if (provider?.models) {
220
+ for (const m of Object.values(provider.models)) {
221
+ devModels.set(m.id, m);
222
+ }
223
+ }
224
+ }
225
+ } catch (error) {
226
+ console.error("[nvidia] Failed to fetch models.dev", error);
58
227
  }
59
228
 
60
- const json = (await response.json()) as Record<string, ModelsDevProvider>;
61
- const provider = Object.values(json).find((p) => p?.id === "nvidia");
62
- if (!provider?.models)
63
- throw new Error("nvidia provider not found in models.dev");
229
+ // ── 3. Build unified list (NVIDIA API wins; fallback to models.dev)
230
+ const modelIds =
231
+ apiModelIds.size > 0 ? [...apiModelIds] : [...devModels.keys()];
64
232
 
65
233
  const result = applyHidden(
66
- Object.values(provider.models)
234
+ modelIds
235
+ .map((id) => {
236
+ const dev = devModels.get(id);
237
+ if (dev) return dev;
238
+ return inferModelFromId(id);
239
+ })
240
+ .filter((m): m is ModelsDevModel => m !== null)
67
241
  .filter((m) => isUsableModel(m.id, NVIDIA_MIN_SIZE_B))
68
242
  .filter((m) => {
69
- // Filter non-chat models by modalities
70
243
  const modalities = m.modalities;
71
244
  if (modalities) {
72
245
  const output = modalities.output ?? [];
@@ -76,11 +249,15 @@ async function fetchNvidiaModels(
76
249
  }
77
250
  return true;
78
251
  })
252
+ // Filter out known 404 models (listed but not provisioned for chat)
79
253
  .filter((m) => {
80
- // Filter by cost - free models have input cost of 0
81
- if (!showPaid && (m.cost?.input ?? 0) > 0) return false;
254
+ if (NVIDIA_KNOWN_404_MODELS.has(m.id)) {
255
+ return false;
256
+ }
82
257
  return true;
83
258
  })
259
+ // NVIDIA is freemium — all models are usable with free credits.
260
+ // No cost filtering applied.
84
261
  .map(
85
262
  (m): ProviderModelConfig => ({
86
263
  id: m.id,
@@ -99,6 +276,7 @@ async function fetchNvidiaModels(
99
276
  maxTokens: m.limit.output,
100
277
  }),
101
278
  ),
279
+ PROVIDER_NVIDIA,
102
280
  );
103
281
 
104
282
  return result;
@@ -108,23 +286,55 @@ async function fetchNvidiaModels(
108
286
  // Extension Entry Point
109
287
  // =============================================================================
110
288
 
289
+ /**
290
+ * Probe a single NVIDIA model with a minimal chat request.
291
+ * Returns true if the model is routable (not 404), false if it 404s.
292
+ */
293
+ async function probeNvidiaModel(
294
+ apiKey: string,
295
+ modelId: string,
296
+ ): Promise<boolean> {
297
+ try {
298
+ const response = await fetchWithTimeout(
299
+ `${BASE_URL_NVIDIA}/chat/completions`,
300
+ {
301
+ method: "POST",
302
+ headers: {
303
+ Authorization: `Bearer ${apiKey}`,
304
+ "Content-Type": "application/json",
305
+ "User-Agent": "pi-free-providers",
306
+ },
307
+ body: JSON.stringify({
308
+ model: modelId,
309
+ messages: [{ role: "user", content: "hi" }],
310
+ max_tokens: 1,
311
+ }),
312
+ },
313
+ 10000, // 10 second timeout
314
+ );
315
+ // 404 = function not found (model not provisioned)
316
+ // 200/400/401/etc = at least routable
317
+ return response.status !== 404;
318
+ } catch {
319
+ return true; // Network errors / timeouts are not "model not found"
320
+ }
321
+ }
322
+
111
323
  export default async function (pi: ExtensionAPI) {
112
- // Fetch both free and all models
113
- let freeModels: ProviderModelConfig[] = [];
324
+ const apiKey = getNvidiaApiKey();
325
+ const hasKey = !!apiKey;
326
+
114
327
  let allModels: ProviderModelConfig[] = [];
115
328
 
116
329
  try {
117
- freeModels = await fetchNvidiaModels(false);
118
- allModels = await fetchNvidiaModels(true);
330
+ allModels = await fetchNvidiaModels(apiKey);
119
331
  } catch (error) {
120
332
  console.error("[nvidia] Failed to fetch models at startup", error);
121
333
  return;
122
334
  }
123
335
 
124
- // Store both sets for global toggle
125
- const stored = { free: freeModels, all: allModels };
126
- const apiKey = getNvidiaApiKey();
127
- const hasKey = !!(apiKey || process.env.NVIDIA_API_KEY);
336
+ // Store both sets for global toggle (same list — NVIDIA is freemium)
337
+ const stored = { free: allModels, all: allModels };
128
338
 
129
339
  // Create re-register function
130
340
  const reRegister = createReRegister(pi, {
@@ -137,16 +347,69 @@ export default async function (pi: ExtensionAPI) {
137
347
  registerWithGlobalToggle(PROVIDER_NVIDIA, stored, reRegister, hasKey);
138
348
 
139
349
  // Register initial models (global toggle will apply filter if needed)
140
- const initialModels = getNvidiaShowPaid() ? allModels : freeModels;
350
+ const initialModels = allModels;
141
351
  pi.registerProvider(PROVIDER_NVIDIA, {
142
352
  baseUrl: BASE_URL_NVIDIA,
143
353
  apiKey: apiKey || "NVIDIA_API_KEY",
144
354
  api: "openai-completions" as const,
355
+ authHeader: true,
145
356
  headers: {
146
357
  "User-Agent": "pi-free-providers",
147
358
  },
148
359
  models: enhanceWithCI(initialModels),
149
360
  });
150
361
 
362
+ // ── Probe command: test all registered models for 404s ─────────────
363
+ pi.registerCommand("probe-nvidia", {
364
+ description: "Test all NVIDIA models for 404 'Function not found' errors",
365
+ handler: async (_args, ctx) => {
366
+ if (!apiKey) {
367
+ ctx.ui.notify("NVIDIA_API_KEY not set", "error");
368
+ return;
369
+ }
370
+
371
+ const modelsToTest = allModels;
372
+ ctx.ui.notify(`Probing ${modelsToTest.length} NVIDIA models…`, "info");
373
+
374
+ const notFound: string[] = [];
375
+ const batchSize = 5;
376
+
377
+ for (let i = 0; i < modelsToTest.length; i += batchSize) {
378
+ const batch = modelsToTest.slice(i, i + batchSize);
379
+ const results = await Promise.all(
380
+ batch.map(async (m) => {
381
+ const ok = await probeNvidiaModel(apiKey, m.id);
382
+ return { id: m.id, ok };
383
+ }),
384
+ );
385
+ for (const r of results) {
386
+ if (!r.ok) notFound.push(r.id);
387
+ }
388
+ }
389
+
390
+ if (notFound.length === 0) {
391
+ ctx.ui.notify("All NVIDIA models are routable ✅", "info");
392
+ return;
393
+ }
394
+
395
+ // Auto-hide 404 models in config (provider-scoped)
396
+ const config = loadConfigFile();
397
+ const existingHidden = new Set(config.hidden_models ?? []);
398
+ for (const id of notFound) existingHidden.add(`${PROVIDER_NVIDIA}/${id}`);
399
+ saveConfig({ hidden_models: Array.from(existingHidden) });
400
+
401
+ // Re-register so hidden models disappear immediately
402
+ const filtered = await fetchNvidiaModels(apiKey);
403
+ stored.free = filtered;
404
+ stored.all = filtered;
405
+ reRegister(filtered);
406
+
407
+ ctx.ui.notify(
408
+ `Found ${notFound.length} broken models (auto-hidden):\n${notFound.join("\n")}`,
409
+ "warning",
410
+ );
411
+ },
412
+ });
413
+
151
414
  // Registration complete - models registered silently (use LOG_LEVEL=info to see details)
152
415
  }