pi-free 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +106 -2
- package/README.md +548 -393
- package/config.ts +22 -2
- package/index.ts +148 -148
- package/lib/built-in-toggle.ts +23 -42
- package/lib/toggle-state.ts +86 -0
- package/lib/util.ts +256 -256
- package/package.json +1 -1
- package/provider-failover/benchmark-lookup.ts +637 -247
- package/provider-helper.ts +261 -260
- package/providers/cline/cline-models.ts +129 -128
- package/providers/cline/cline.ts +284 -298
- package/providers/cloudflare/cloudflare.ts +292 -134
- package/providers/dynamic-built-in/index.ts +2 -1
- package/providers/kilo/kilo-models.ts +2 -1
- package/providers/modal/modal.ts +47 -44
- package/providers/nvidia/nvidia.ts +296 -33
- package/providers/ollama/ollama.ts +280 -172
|
@@ -20,8 +20,9 @@ import type {
|
|
|
20
20
|
import {
|
|
21
21
|
applyHidden,
|
|
22
22
|
getNvidiaApiKey,
|
|
23
|
-
|
|
23
|
+
loadConfigFile,
|
|
24
24
|
PROVIDER_NVIDIA,
|
|
25
|
+
saveConfig,
|
|
25
26
|
} from "../../config.ts";
|
|
26
27
|
import {
|
|
27
28
|
BASE_URL_NVIDIA,
|
|
@@ -30,43 +31,215 @@ import {
|
|
|
30
31
|
URL_MODELS_DEV,
|
|
31
32
|
} from "../../constants.ts";
|
|
32
33
|
import { registerWithGlobalToggle } from "../../lib/registry.ts";
|
|
33
|
-
import type { ModelsDevProvider } from "../../lib/types.ts";
|
|
34
|
-
import {
|
|
34
|
+
import type { ModelsDevModel, ModelsDevProvider } from "../../lib/types.ts";
|
|
35
|
+
import {
|
|
36
|
+
fetchWithRetry,
|
|
37
|
+
fetchWithTimeout,
|
|
38
|
+
isUsableModel,
|
|
39
|
+
} from "../../lib/util.ts";
|
|
35
40
|
import { createReRegister, enhanceWithCI } from "../../provider-helper.ts";
|
|
36
41
|
|
|
42
|
+
// =============================================================================
|
|
43
|
+
// Non-chat model heuristics for models not in models.dev
|
|
44
|
+
// =============================================================================
|
|
45
|
+
|
|
46
|
+
const NVIDIA_NON_CHAT_PATTERNS: RegExp[] = [
|
|
47
|
+
/embed(?!.*instruct)/i,
|
|
48
|
+
/whisper/i,
|
|
49
|
+
/reward/i,
|
|
50
|
+
/ocr(?!.*instruct)/i,
|
|
51
|
+
/safety-guard|content-safety|nemoguard/i,
|
|
52
|
+
/retriever-parse|nemotron-parse(?!.*instruct)/i,
|
|
53
|
+
/detector/i,
|
|
54
|
+
/deplot/i,
|
|
55
|
+
/nvclip/i,
|
|
56
|
+
/vila$/i,
|
|
57
|
+
/neva(?!.*instruct)/i,
|
|
58
|
+
/translate/i,
|
|
59
|
+
/cosmos-reason/i,
|
|
60
|
+
/kosmos/i,
|
|
61
|
+
/bge-/i,
|
|
62
|
+
/arctic-embed/i,
|
|
63
|
+
/gliner/i,
|
|
64
|
+
/nv-embed/i,
|
|
65
|
+
/embedqa/i,
|
|
66
|
+
/embedcode/i,
|
|
67
|
+
];
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Models that appear in NVIDIA's /v1/models but return 404 "Function not found"
|
|
71
|
+
* on /v1/chat/completions. These are listed but not actually provisioned for
|
|
72
|
+
* hosted chat inference. Community-reported; add new IDs as they surface.
|
|
73
|
+
*
|
|
74
|
+
* Users can also hide individual models via hidden_models in ~/.pi/free.json.
|
|
75
|
+
*/
|
|
76
|
+
const NVIDIA_KNOWN_404_MODELS: ReadonlySet<string> = new Set([
|
|
77
|
+
"01-ai/yi-large",
|
|
78
|
+
"adept/fuyu-8b",
|
|
79
|
+
"ai21labs/jamba-1.5-large-instruct",
|
|
80
|
+
"aisingapore/sea-lion-7b-instruct",
|
|
81
|
+
"baai/bge-m3",
|
|
82
|
+
"bigcode/starcoder2-15b",
|
|
83
|
+
"databricks/dbrx-instruct",
|
|
84
|
+
"deepseek-ai/deepseek-coder-6.7b-instruct",
|
|
85
|
+
"google/codegemma-1.1-7b",
|
|
86
|
+
"google/codegemma-7b",
|
|
87
|
+
"google/deplot",
|
|
88
|
+
"google/gemma-2b",
|
|
89
|
+
"google/recurrentgemma-2b",
|
|
90
|
+
"ibm/granite-3.0-3b-a800m-instruct",
|
|
91
|
+
"ibm/granite-3.0-8b-instruct",
|
|
92
|
+
"ibm/granite-34b-code-instruct",
|
|
93
|
+
"ibm/granite-8b-code-instruct",
|
|
94
|
+
"meta/codellama-70b",
|
|
95
|
+
"meta/llama2-70b",
|
|
96
|
+
"microsoft/kosmos-2",
|
|
97
|
+
"microsoft/phi-3-vision-128k-instruct",
|
|
98
|
+
"microsoft/phi-3.5-moe-instruct",
|
|
99
|
+
"mistralai/codestral-22b-instruct-v0.1",
|
|
100
|
+
"mistralai/mistral-7b-instruct-v0.3",
|
|
101
|
+
"mistralai/mistral-large",
|
|
102
|
+
"mistralai/mistral-large-2-instruct",
|
|
103
|
+
"mistralai/mixtral-8x22b-v0.1",
|
|
104
|
+
"nv-mistralai/mistral-nemo-12b-instruct",
|
|
105
|
+
"nvidia/cosmos-reason2-8b",
|
|
106
|
+
"nvidia/embed-qa-4",
|
|
107
|
+
"nvidia/llama-3.1-nemotron-51b-instruct",
|
|
108
|
+
"nvidia/llama-3.1-nemotron-70b-instruct",
|
|
109
|
+
"nvidia/llama-3.1-nemotron-ultra-253b-v1",
|
|
110
|
+
"nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1",
|
|
111
|
+
"nvidia/llama-3.2-nemoretriever-300m-embed-v1",
|
|
112
|
+
"nvidia/llama-3.2-nv-embedqa-1b-v1",
|
|
113
|
+
"nvidia/llama-3.2-nv-embedqa-1b-v2",
|
|
114
|
+
"nvidia/llama-nemotron-embed-1b-v2",
|
|
115
|
+
"nvidia/llama-nemotron-embed-vl-1b-v2",
|
|
116
|
+
"nvidia/llama3-chatqa-1.5-70b",
|
|
117
|
+
"nvidia/mistral-nemo-minitron-8b-8k-instruct",
|
|
118
|
+
"nvidia/nemotron-4-340b-instruct",
|
|
119
|
+
"nvidia/nemotron-4-340b-reward",
|
|
120
|
+
"nvidia/nemotron-nano-3-30b-a3b",
|
|
121
|
+
"nvidia/neva-22b",
|
|
122
|
+
"nvidia/nv-embed-v1",
|
|
123
|
+
"nvidia/nv-embedcode-7b-v1",
|
|
124
|
+
"nvidia/nv-embedqa-e5-v5",
|
|
125
|
+
"nvidia/nv-embedqa-mistral-7b-v2",
|
|
126
|
+
"nvidia/nvclip",
|
|
127
|
+
"nvidia/riva-translate-4b-instruct",
|
|
128
|
+
"snowflake/arctic-embed-l",
|
|
129
|
+
"writer/palmyra-creative-122b",
|
|
130
|
+
"writer/palmyra-fin-70b-32k",
|
|
131
|
+
"writer/palmyra-med-70b",
|
|
132
|
+
"writer/palmyra-med-70b-32k",
|
|
133
|
+
"zyphra/zamba2-7b-instruct",
|
|
134
|
+
]);
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Infer model metadata from a NVIDIA model ID for models not present in
|
|
138
|
+
* models.dev. Returns null if the ID matches known non-chat patterns.
|
|
139
|
+
*/
|
|
140
|
+
function inferModelFromId(id: string): ModelsDevModel | null {
|
|
141
|
+
for (const pattern of NVIDIA_NON_CHAT_PATTERNS) {
|
|
142
|
+
if (pattern.test(id)) return null;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const name = id
|
|
146
|
+
.split("/")
|
|
147
|
+
.pop()!
|
|
148
|
+
.replace(/-/g, " ")
|
|
149
|
+
.replace(/\b\w/g, (c) => c.toUpperCase())
|
|
150
|
+
.replace(/\b(\d+(?:\.\d+)?)b\b/gi, "$1B");
|
|
151
|
+
|
|
152
|
+
const hasVision = /vision|multimodal|vl/i.test(id);
|
|
153
|
+
const hasReasoning = /reason|r1|thinking/i.test(id);
|
|
154
|
+
|
|
155
|
+
return {
|
|
156
|
+
id,
|
|
157
|
+
name,
|
|
158
|
+
reasoning: hasReasoning,
|
|
159
|
+
limit: { context: 128_000, output: 4096 },
|
|
160
|
+
modalities: {
|
|
161
|
+
input: hasVision ? ["text", "image"] : ["text"],
|
|
162
|
+
output: ["text"],
|
|
163
|
+
},
|
|
164
|
+
cost: { input: 0, output: 0 },
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
37
168
|
// =============================================================================
|
|
38
169
|
// Fetch + map
|
|
39
170
|
// =============================================================================
|
|
40
171
|
|
|
41
172
|
async function fetchNvidiaModels(
|
|
42
|
-
|
|
173
|
+
apiKey?: string,
|
|
43
174
|
): Promise<ProviderModelConfig[]> {
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
175
|
+
// ── 1. Query NVIDIA's actual API (source of truth) ─────────────────
|
|
176
|
+
let apiModelIds = new Set<string>();
|
|
177
|
+
if (apiKey) {
|
|
178
|
+
try {
|
|
179
|
+
const response = await fetchWithRetry(
|
|
180
|
+
`${BASE_URL_NVIDIA}/models`,
|
|
181
|
+
{
|
|
182
|
+
headers: {
|
|
183
|
+
Authorization: `Bearer ${apiKey}`,
|
|
184
|
+
"User-Agent": "pi-free-providers",
|
|
185
|
+
},
|
|
186
|
+
},
|
|
187
|
+
3,
|
|
188
|
+
1000,
|
|
189
|
+
DEFAULT_FETCH_TIMEOUT_MS,
|
|
190
|
+
);
|
|
191
|
+
if (response.ok) {
|
|
192
|
+
const json = (await response.json()) as {
|
|
193
|
+
data?: Array<{ id: string }>;
|
|
194
|
+
};
|
|
195
|
+
if (json.data) {
|
|
196
|
+
apiModelIds = new Set(json.data.map((m) => m.id));
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
} catch (error) {
|
|
200
|
+
console.error("[nvidia] Failed to fetch models from NVIDIA API", error);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
53
203
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
204
|
+
// ── 2. Fetch models.dev for rich metadata (cost, limits, etc.) ─────
|
|
205
|
+
const devModels = new Map<string, ModelsDevModel>();
|
|
206
|
+
try {
|
|
207
|
+
const response = await fetchWithRetry(
|
|
208
|
+
URL_MODELS_DEV,
|
|
209
|
+
{
|
|
210
|
+
headers: { "User-Agent": "pi-free-providers" },
|
|
211
|
+
},
|
|
212
|
+
3,
|
|
213
|
+
1000,
|
|
214
|
+
DEFAULT_FETCH_TIMEOUT_MS,
|
|
57
215
|
);
|
|
216
|
+
if (response.ok) {
|
|
217
|
+
const json = (await response.json()) as Record<string, ModelsDevProvider>;
|
|
218
|
+
const provider = Object.values(json).find((p) => p?.id === "nvidia");
|
|
219
|
+
if (provider?.models) {
|
|
220
|
+
for (const m of Object.values(provider.models)) {
|
|
221
|
+
devModels.set(m.id, m);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
} catch (error) {
|
|
226
|
+
console.error("[nvidia] Failed to fetch models.dev", error);
|
|
58
227
|
}
|
|
59
228
|
|
|
60
|
-
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
throw new Error("nvidia provider not found in models.dev");
|
|
229
|
+
// ── 3. Build unified list (NVIDIA API wins; fallback to models.dev) ─
|
|
230
|
+
const modelIds =
|
|
231
|
+
apiModelIds.size > 0 ? [...apiModelIds] : [...devModels.keys()];
|
|
64
232
|
|
|
65
233
|
const result = applyHidden(
|
|
66
|
-
|
|
234
|
+
modelIds
|
|
235
|
+
.map((id) => {
|
|
236
|
+
const dev = devModels.get(id);
|
|
237
|
+
if (dev) return dev;
|
|
238
|
+
return inferModelFromId(id);
|
|
239
|
+
})
|
|
240
|
+
.filter((m): m is ModelsDevModel => m !== null)
|
|
67
241
|
.filter((m) => isUsableModel(m.id, NVIDIA_MIN_SIZE_B))
|
|
68
242
|
.filter((m) => {
|
|
69
|
-
// Filter non-chat models by modalities
|
|
70
243
|
const modalities = m.modalities;
|
|
71
244
|
if (modalities) {
|
|
72
245
|
const output = modalities.output ?? [];
|
|
@@ -76,11 +249,15 @@ async function fetchNvidiaModels(
|
|
|
76
249
|
}
|
|
77
250
|
return true;
|
|
78
251
|
})
|
|
252
|
+
// Filter out known 404 models (listed but not provisioned for chat)
|
|
79
253
|
.filter((m) => {
|
|
80
|
-
|
|
81
|
-
|
|
254
|
+
if (NVIDIA_KNOWN_404_MODELS.has(m.id)) {
|
|
255
|
+
return false;
|
|
256
|
+
}
|
|
82
257
|
return true;
|
|
83
258
|
})
|
|
259
|
+
// NVIDIA is freemium — all models are usable with free credits.
|
|
260
|
+
// No cost filtering applied.
|
|
84
261
|
.map(
|
|
85
262
|
(m): ProviderModelConfig => ({
|
|
86
263
|
id: m.id,
|
|
@@ -99,6 +276,7 @@ async function fetchNvidiaModels(
|
|
|
99
276
|
maxTokens: m.limit.output,
|
|
100
277
|
}),
|
|
101
278
|
),
|
|
279
|
+
PROVIDER_NVIDIA,
|
|
102
280
|
);
|
|
103
281
|
|
|
104
282
|
return result;
|
|
@@ -108,23 +286,55 @@ async function fetchNvidiaModels(
|
|
|
108
286
|
// Extension Entry Point
|
|
109
287
|
// =============================================================================
|
|
110
288
|
|
|
289
|
+
/**
|
|
290
|
+
* Probe a single NVIDIA model with a minimal chat request.
|
|
291
|
+
* Returns true if the model is routable (not 404), false if it 404s.
|
|
292
|
+
*/
|
|
293
|
+
async function probeNvidiaModel(
|
|
294
|
+
apiKey: string,
|
|
295
|
+
modelId: string,
|
|
296
|
+
): Promise<boolean> {
|
|
297
|
+
try {
|
|
298
|
+
const response = await fetchWithTimeout(
|
|
299
|
+
`${BASE_URL_NVIDIA}/chat/completions`,
|
|
300
|
+
{
|
|
301
|
+
method: "POST",
|
|
302
|
+
headers: {
|
|
303
|
+
Authorization: `Bearer ${apiKey}`,
|
|
304
|
+
"Content-Type": "application/json",
|
|
305
|
+
"User-Agent": "pi-free-providers",
|
|
306
|
+
},
|
|
307
|
+
body: JSON.stringify({
|
|
308
|
+
model: modelId,
|
|
309
|
+
messages: [{ role: "user", content: "hi" }],
|
|
310
|
+
max_tokens: 1,
|
|
311
|
+
}),
|
|
312
|
+
},
|
|
313
|
+
10000, // 10 second timeout
|
|
314
|
+
);
|
|
315
|
+
// 404 = function not found (model not provisioned)
|
|
316
|
+
// 200/400/401/etc = at least routable
|
|
317
|
+
return response.status !== 404;
|
|
318
|
+
} catch {
|
|
319
|
+
return true; // Network errors / timeouts are not "model not found"
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
111
323
|
export default async function (pi: ExtensionAPI) {
|
|
112
|
-
|
|
113
|
-
|
|
324
|
+
const apiKey = getNvidiaApiKey();
|
|
325
|
+
const hasKey = !!apiKey;
|
|
326
|
+
|
|
114
327
|
let allModels: ProviderModelConfig[] = [];
|
|
115
328
|
|
|
116
329
|
try {
|
|
117
|
-
|
|
118
|
-
allModels = await fetchNvidiaModels(true);
|
|
330
|
+
allModels = await fetchNvidiaModels(apiKey);
|
|
119
331
|
} catch (error) {
|
|
120
332
|
console.error("[nvidia] Failed to fetch models at startup", error);
|
|
121
333
|
return;
|
|
122
334
|
}
|
|
123
335
|
|
|
124
|
-
// Store both sets for global toggle
|
|
125
|
-
const stored = { free:
|
|
126
|
-
const apiKey = getNvidiaApiKey();
|
|
127
|
-
const hasKey = !!(apiKey || process.env.NVIDIA_API_KEY);
|
|
336
|
+
// Store both sets for global toggle (same list — NVIDIA is freemium)
|
|
337
|
+
const stored = { free: allModels, all: allModels };
|
|
128
338
|
|
|
129
339
|
// Create re-register function
|
|
130
340
|
const reRegister = createReRegister(pi, {
|
|
@@ -137,16 +347,69 @@ export default async function (pi: ExtensionAPI) {
|
|
|
137
347
|
registerWithGlobalToggle(PROVIDER_NVIDIA, stored, reRegister, hasKey);
|
|
138
348
|
|
|
139
349
|
// Register initial models (global toggle will apply filter if needed)
|
|
140
|
-
const initialModels =
|
|
350
|
+
const initialModels = allModels;
|
|
141
351
|
pi.registerProvider(PROVIDER_NVIDIA, {
|
|
142
352
|
baseUrl: BASE_URL_NVIDIA,
|
|
143
353
|
apiKey: apiKey || "NVIDIA_API_KEY",
|
|
144
354
|
api: "openai-completions" as const,
|
|
355
|
+
authHeader: true,
|
|
145
356
|
headers: {
|
|
146
357
|
"User-Agent": "pi-free-providers",
|
|
147
358
|
},
|
|
148
359
|
models: enhanceWithCI(initialModels),
|
|
149
360
|
});
|
|
150
361
|
|
|
362
|
+
// ── Probe command: test all registered models for 404s ─────────────
|
|
363
|
+
pi.registerCommand("probe-nvidia", {
|
|
364
|
+
description: "Test all NVIDIA models for 404 'Function not found' errors",
|
|
365
|
+
handler: async (_args, ctx) => {
|
|
366
|
+
if (!apiKey) {
|
|
367
|
+
ctx.ui.notify("NVIDIA_API_KEY not set", "error");
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
const modelsToTest = allModels;
|
|
372
|
+
ctx.ui.notify(`Probing ${modelsToTest.length} NVIDIA models…`, "info");
|
|
373
|
+
|
|
374
|
+
const notFound: string[] = [];
|
|
375
|
+
const batchSize = 5;
|
|
376
|
+
|
|
377
|
+
for (let i = 0; i < modelsToTest.length; i += batchSize) {
|
|
378
|
+
const batch = modelsToTest.slice(i, i + batchSize);
|
|
379
|
+
const results = await Promise.all(
|
|
380
|
+
batch.map(async (m) => {
|
|
381
|
+
const ok = await probeNvidiaModel(apiKey, m.id);
|
|
382
|
+
return { id: m.id, ok };
|
|
383
|
+
}),
|
|
384
|
+
);
|
|
385
|
+
for (const r of results) {
|
|
386
|
+
if (!r.ok) notFound.push(r.id);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
if (notFound.length === 0) {
|
|
391
|
+
ctx.ui.notify("All NVIDIA models are routable ✅", "info");
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Auto-hide 404 models in config (provider-scoped)
|
|
396
|
+
const config = loadConfigFile();
|
|
397
|
+
const existingHidden = new Set(config.hidden_models ?? []);
|
|
398
|
+
for (const id of notFound) existingHidden.add(`${PROVIDER_NVIDIA}/${id}`);
|
|
399
|
+
saveConfig({ hidden_models: Array.from(existingHidden) });
|
|
400
|
+
|
|
401
|
+
// Re-register so hidden models disappear immediately
|
|
402
|
+
const filtered = await fetchNvidiaModels(apiKey);
|
|
403
|
+
stored.free = filtered;
|
|
404
|
+
stored.all = filtered;
|
|
405
|
+
reRegister(filtered);
|
|
406
|
+
|
|
407
|
+
ctx.ui.notify(
|
|
408
|
+
`Found ${notFound.length} broken models (auto-hidden):\n${notFound.join("\n")}`,
|
|
409
|
+
"warning",
|
|
410
|
+
);
|
|
411
|
+
},
|
|
412
|
+
});
|
|
413
|
+
|
|
151
414
|
// Registration complete - models registered silently (use LOG_LEVEL=info to see details)
|
|
152
415
|
}
|