pi-free 2.0.13 → 2.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +9 -5
- package/config.ts +15 -0
- package/constants.ts +3 -0
- package/index.ts +135 -0
- package/lib/built-in-toggle.ts +4 -4
- package/lib/probe-cache.ts +86 -0
- package/lib/provider-compat.ts +33 -0
- package/lib/registry.ts +25 -3
- package/lib/telemetry.ts +328 -0
- package/lib/util.ts +10 -1
- package/package.json +1 -1
- package/provider-failover/benchmark-lookup.ts +94 -8
- package/provider-failover/benchmarks-chunk-0.ts +599 -890
- package/provider-failover/benchmarks-chunk-1.ts +655 -924
- package/provider-failover/benchmarks-chunk-2.ts +675 -966
- package/provider-failover/benchmarks-chunk-3.ts +676 -967
- package/provider-failover/benchmarks-chunk-4.ts +704 -954
- package/provider-failover/benchmarks-chunk-5.ts +1301 -0
- package/provider-failover/hardcoded-benchmarks.ts +9 -3
- package/providers/cline/cline-models.ts +200 -68
- package/providers/cline/cline.ts +3 -3
- package/providers/dynamic-built-in/index.ts +1 -1
- package/providers/kilo/kilo.ts +2 -2
- package/providers/model-fetcher.ts +3 -1
- package/providers/nvidia/nvidia.ts +54 -16
- package/providers/ollama/ollama.ts +103 -46
- package/providers/opencode-session.ts +398 -371
- package/providers/qwen/qwen.ts +2 -2
- package/providers/routeway/routeway.ts +391 -0
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
* under the 3000-line limit. This file re-exports the merged result.
|
|
12
12
|
*
|
|
13
13
|
* To update: Run scripts/update-benchmarks.ts with ARTIFICIAL_ANALYSIS_API_KEY
|
|
14
|
+
* The script auto-updates this file's imports and spread when chunk count changes.
|
|
14
15
|
*/
|
|
15
16
|
|
|
16
17
|
import { BENCHMARKS_CHUNK_0 } from "./benchmarks-chunk-0.ts";
|
|
@@ -18,10 +19,8 @@ import { BENCHMARKS_CHUNK_1 } from "./benchmarks-chunk-1.ts";
|
|
|
18
19
|
import { BENCHMARKS_CHUNK_2 } from "./benchmarks-chunk-2.ts";
|
|
19
20
|
import { BENCHMARKS_CHUNK_3 } from "./benchmarks-chunk-3.ts";
|
|
20
21
|
import { BENCHMARKS_CHUNK_4 } from "./benchmarks-chunk-4.ts";
|
|
21
|
-
|
|
22
|
+
import { BENCHMARKS_CHUNK_5 } from "./benchmarks-chunk-5.ts";
|
|
22
23
|
export interface HardcodedBenchmark {
|
|
23
|
-
intelligenceIndex: number; // AA score 0-70
|
|
24
|
-
normalizedScore: number; // Our score 0-100
|
|
25
24
|
codingIndex?: number;
|
|
26
25
|
mathIndex?: number;
|
|
27
26
|
agenticIndex?: number;
|
|
@@ -33,6 +32,12 @@ export interface HardcodedBenchmark {
|
|
|
33
32
|
supportsReasoning: boolean;
|
|
34
33
|
supportsVision: boolean;
|
|
35
34
|
lastUpdated: string;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Original model name from the source API (for debugging name collisions).
|
|
38
|
+
* Only present when regenerated; absent in shipped data.
|
|
39
|
+
*/
|
|
40
|
+
originalModel?: string;
|
|
36
41
|
}
|
|
37
42
|
|
|
38
43
|
/**
|
|
@@ -45,4 +50,5 @@ export const HARDCODED_BENCHMARKS: Record<string, HardcodedBenchmark> = {
|
|
|
45
50
|
...BENCHMARKS_CHUNK_2,
|
|
46
51
|
...BENCHMARKS_CHUNK_3,
|
|
47
52
|
...BENCHMARKS_CHUNK_4,
|
|
53
|
+
...BENCHMARKS_CHUNK_5,
|
|
48
54
|
};
|
|
@@ -1,27 +1,71 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Cline model fetching.
|
|
3
3
|
*
|
|
4
|
-
* Fetches
|
|
5
|
-
*
|
|
4
|
+
* Fetches Cline's own model catalog from api.cline.bot instead of OpenRouter.
|
|
5
|
+
* Cline also exposes a recommended/free-to-try list; those models may have
|
|
6
|
+
* non-zero list pricing in the catalog, so we mark exact recommended-free IDs
|
|
7
|
+
* as zero-cost for pi-free's free-model filter.
|
|
6
8
|
*/
|
|
7
9
|
|
|
8
10
|
import { applyHidden } from "../../config.ts";
|
|
9
11
|
import {
|
|
10
|
-
|
|
12
|
+
BASE_URL_CLINE,
|
|
11
13
|
DEFAULT_FETCH_TIMEOUT_MS,
|
|
12
14
|
PROVIDER_CLINE,
|
|
13
15
|
} from "../../constants.ts";
|
|
14
16
|
import type { ProviderModelConfig } from "../../lib/types.ts";
|
|
17
|
+
import { getProxyModelCompat } from "../../lib/provider-compat.ts";
|
|
15
18
|
import { cleanModelName, fetchWithRetry } from "../../lib/util.ts";
|
|
16
19
|
|
|
17
|
-
interface
|
|
20
|
+
interface ClineRaw {
|
|
18
21
|
id: string;
|
|
19
|
-
name
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
name?: string;
|
|
23
|
+
description?: string | null;
|
|
24
|
+
context_length?: number | null;
|
|
25
|
+
supported_parameters?: string[] | null;
|
|
26
|
+
architecture?: {
|
|
27
|
+
modality?: string | string[] | null;
|
|
28
|
+
input_modalities?: string[] | null;
|
|
29
|
+
output_modalities?: string[] | null;
|
|
30
|
+
} | null;
|
|
31
|
+
top_provider?: {
|
|
32
|
+
max_completion_tokens?: number | null;
|
|
33
|
+
context_length?: number | null;
|
|
34
|
+
} | null;
|
|
35
|
+
pricing?: {
|
|
36
|
+
prompt?: string | null;
|
|
37
|
+
completion?: string | null;
|
|
38
|
+
input_cache_read?: string | null;
|
|
39
|
+
input_cache_write?: string | null;
|
|
40
|
+
} | null;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
interface ClineRecommendedModel {
|
|
44
|
+
id: string;
|
|
45
|
+
name?: string;
|
|
46
|
+
description?: string;
|
|
47
|
+
tags?: string[];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
interface ClineRecommendedModelsResponse {
|
|
51
|
+
recommended?: ClineRecommendedModel[];
|
|
52
|
+
free?: ClineRecommendedModel[];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const VS_CODE_VERSION = "1.109.3";
|
|
56
|
+
const CLINE_EXTENSION_VERSION = "3.76.0";
|
|
57
|
+
|
|
58
|
+
function buildClineFetchHeaders(): Record<string, string> {
|
|
59
|
+
return {
|
|
60
|
+
Accept: "application/json",
|
|
61
|
+
"Content-Type": "application/json",
|
|
62
|
+
"User-Agent": `Cline/${CLINE_EXTENSION_VERSION}`,
|
|
63
|
+
"X-PLATFORM": "Visual Studio Code",
|
|
64
|
+
"X-PLATFORM-VERSION": VS_CODE_VERSION,
|
|
65
|
+
"X-CLIENT-TYPE": "VSCode Extension",
|
|
66
|
+
"X-CLIENT-VERSION": CLINE_EXTENSION_VERSION,
|
|
67
|
+
"X-CORE-VERSION": CLINE_EXTENSION_VERSION,
|
|
68
|
+
};
|
|
25
69
|
}
|
|
26
70
|
|
|
27
71
|
function extractNameFromId(id: string): string {
|
|
@@ -34,84 +78,172 @@ function extractNameFromId(id: string): string {
|
|
|
34
78
|
|
|
35
79
|
/**
|
|
36
80
|
* Parse pricing string to cost per million tokens.
|
|
37
|
-
*
|
|
81
|
+
* Cline returns pricing as string per token (e.g. "0.0001" or "0").
|
|
38
82
|
*/
|
|
39
|
-
function parsePricing(pricingStr: string | undefined): number {
|
|
83
|
+
function parsePricing(pricingStr: string | null | undefined): number {
|
|
40
84
|
if (!pricingStr || pricingStr === "0") return 0;
|
|
41
85
|
const parsed = Number.parseFloat(pricingStr);
|
|
42
|
-
return Number.isNaN(parsed) ? 0 : parsed * 1_000_000;
|
|
86
|
+
return Number.isNaN(parsed) ? 0 : parsed * 1_000_000;
|
|
43
87
|
}
|
|
44
88
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
89
|
+
function modalityIncludes(
|
|
90
|
+
modality: string | string[] | null | undefined,
|
|
91
|
+
needle: string,
|
|
92
|
+
): boolean {
|
|
93
|
+
if (Array.isArray(modality)) return modality.includes(needle);
|
|
94
|
+
return typeof modality === "string" && modality.includes(needle);
|
|
50
95
|
}
|
|
51
96
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
97
|
+
function hasTextOutput(info: ClineRaw): boolean {
|
|
98
|
+
const outputMods = info.architecture?.output_modalities;
|
|
99
|
+
if (Array.isArray(outputMods) && outputMods.length > 0) {
|
|
100
|
+
return outputMods.includes("text");
|
|
101
|
+
}
|
|
102
|
+
return modalityIncludes(info.architecture?.modality, "text");
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function supportsImages(info: ClineRaw): boolean {
|
|
106
|
+
const inputMods = info.architecture?.input_modalities;
|
|
107
|
+
if (Array.isArray(inputMods) && inputMods.includes("image")) return true;
|
|
108
|
+
return modalityIncludes(info.architecture?.modality, "image");
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function modelFromRecommended(
|
|
112
|
+
model: ClineRecommendedModel,
|
|
113
|
+
): ProviderModelConfig & { _pricingKnown?: boolean } {
|
|
114
|
+
const name = model.name?.trim() || extractNameFromId(model.id);
|
|
115
|
+
return {
|
|
116
|
+
id: model.id,
|
|
117
|
+
name: `${cleanModelName(name)} (Cline)`,
|
|
118
|
+
reasoning: false,
|
|
119
|
+
input: ["text"],
|
|
120
|
+
cost: {
|
|
121
|
+
input: 0,
|
|
122
|
+
output: 0,
|
|
123
|
+
cacheRead: 0,
|
|
124
|
+
cacheWrite: 0,
|
|
125
|
+
},
|
|
126
|
+
contextWindow: 1_000_000,
|
|
127
|
+
maxTokens: 65_536,
|
|
128
|
+
_pricingKnown: true,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function modelFromCatalog(
|
|
133
|
+
info: ClineRaw,
|
|
134
|
+
freeToTryIds: ReadonlySet<string>,
|
|
135
|
+
): ProviderModelConfig & { _pricingKnown?: boolean } {
|
|
136
|
+
const isReasoning = !!(
|
|
137
|
+
info.supported_parameters?.includes("include_reasoning") ||
|
|
138
|
+
info.supported_parameters?.includes("reasoning")
|
|
139
|
+
);
|
|
140
|
+
const isFreeToTry = freeToTryIds.has(info.id);
|
|
141
|
+
const inputCost = isFreeToTry ? 0 : parsePricing(info.pricing?.prompt);
|
|
142
|
+
const outputCost = isFreeToTry ? 0 : parsePricing(info.pricing?.completion);
|
|
143
|
+
const cacheRead = isFreeToTry
|
|
144
|
+
? 0
|
|
145
|
+
: parsePricing(info.pricing?.input_cache_read);
|
|
146
|
+
const cacheWrite = isFreeToTry
|
|
147
|
+
? 0
|
|
148
|
+
: parsePricing(info.pricing?.input_cache_write);
|
|
149
|
+
const isFree = inputCost === 0 && outputCost === 0;
|
|
150
|
+
const cleanName = info.name
|
|
151
|
+
? cleanModelName(info.name)
|
|
152
|
+
: extractNameFromId(info.id);
|
|
153
|
+
|
|
154
|
+
return {
|
|
155
|
+
id: info.id,
|
|
156
|
+
name: `${cleanName} (Cline)${isFree ? "" : " 💰"}`,
|
|
157
|
+
reasoning: isReasoning,
|
|
158
|
+
input: supportsImages(info) ? ["text", "image"] : ["text"],
|
|
159
|
+
cost: {
|
|
160
|
+
input: inputCost,
|
|
161
|
+
output: outputCost,
|
|
162
|
+
cacheRead,
|
|
163
|
+
cacheWrite,
|
|
164
|
+
},
|
|
165
|
+
contextWindow:
|
|
166
|
+
info.context_length ?? info.top_provider?.context_length ?? 128_000,
|
|
167
|
+
maxTokens: info.top_provider?.max_completion_tokens ?? 8_192,
|
|
168
|
+
...(getProxyModelCompat({ id: info.id, name: info.name })
|
|
169
|
+
? { compat: getProxyModelCompat({ id: info.id, name: info.name }) }
|
|
170
|
+
: {}),
|
|
171
|
+
_pricingKnown: info.pricing !== null && info.pricing !== undefined,
|
|
172
|
+
} as ProviderModelConfig & { _pricingKnown?: boolean; compat?: any };
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
async function fetchClineRecommendedFreeModels(): Promise<
|
|
176
|
+
ClineRecommendedModel[]
|
|
177
|
+
> {
|
|
59
178
|
const response = await fetchWithRetry(
|
|
60
|
-
`${
|
|
61
|
-
{},
|
|
179
|
+
`${BASE_URL_CLINE}/ai/cline/recommended-models`,
|
|
180
|
+
{ headers: buildClineFetchHeaders() },
|
|
62
181
|
3,
|
|
63
182
|
1000,
|
|
64
183
|
DEFAULT_FETCH_TIMEOUT_MS,
|
|
65
184
|
);
|
|
66
185
|
|
|
67
|
-
if (!response.ok)
|
|
68
|
-
throw new Error(`Failed to fetch OpenRouter models: ${response.status}`);
|
|
186
|
+
if (!response.ok) return [];
|
|
69
187
|
|
|
70
|
-
const json = (await response.json()) as
|
|
188
|
+
const json = (await response.json()) as ClineRecommendedModelsResponse;
|
|
189
|
+
return Array.isArray(json.free) ? json.free.filter((m) => m?.id) : [];
|
|
190
|
+
}
|
|
71
191
|
|
|
72
|
-
|
|
73
|
-
|
|
192
|
+
async function fetchClineCatalogModels(): Promise<ClineRaw[]> {
|
|
193
|
+
const response = await fetchWithRetry(
|
|
194
|
+
`${BASE_URL_CLINE}/ai/cline/models`,
|
|
195
|
+
{ headers: buildClineFetchHeaders() },
|
|
196
|
+
3,
|
|
197
|
+
1000,
|
|
198
|
+
DEFAULT_FETCH_TIMEOUT_MS,
|
|
199
|
+
);
|
|
74
200
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
201
|
+
if (!response.ok)
|
|
202
|
+
throw new Error(`Failed to fetch Cline models: ${response.status}`);
|
|
203
|
+
|
|
204
|
+
const json = (await response.json()) as { data?: ClineRaw[] };
|
|
205
|
+
if (!Array.isArray(json.data)) {
|
|
206
|
+
throw new Error("Invalid Cline models response: missing data array");
|
|
207
|
+
}
|
|
208
|
+
return json.data;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Fetch models from Cline.
|
|
213
|
+
* @param freeOnly - If true, return only zero-cost/free-to-try models
|
|
214
|
+
*/
|
|
215
|
+
export async function fetchClineModels(
|
|
216
|
+
freeOnly = false,
|
|
217
|
+
): Promise<ProviderModelConfig[]> {
|
|
218
|
+
const [catalogModels, recommendedFreeModels] = await Promise.all([
|
|
219
|
+
fetchClineCatalogModels(),
|
|
220
|
+
fetchClineRecommendedFreeModels().catch(() => []),
|
|
221
|
+
]);
|
|
222
|
+
const recommendedFreeIds = new Set(recommendedFreeModels.map((m) => m.id));
|
|
223
|
+
|
|
224
|
+
const models: Array<ProviderModelConfig & { _pricingKnown?: boolean }> = [];
|
|
225
|
+
const seen = new Set<string>();
|
|
226
|
+
|
|
227
|
+
for (const info of catalogModels) {
|
|
228
|
+
if (!hasTextOutput(info)) continue;
|
|
229
|
+
const model = modelFromCatalog(info, recommendedFreeIds);
|
|
230
|
+
models.push(model);
|
|
231
|
+
seen.add(model.id);
|
|
78
232
|
}
|
|
79
233
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
);
|
|
86
|
-
const hasImage =
|
|
87
|
-
info.architecture?.input_modalities?.includes("image") ?? false;
|
|
88
|
-
|
|
89
|
-
// Calculate cost per million tokens
|
|
90
|
-
const inputCost = parsePricing(info.pricing?.prompt);
|
|
91
|
-
const outputCost = parsePricing(info.pricing?.completion);
|
|
92
|
-
const isFree = inputCost === 0 && outputCost === 0;
|
|
93
|
-
|
|
94
|
-
const cleanName = info.name
|
|
95
|
-
? cleanModelName(info.name)
|
|
96
|
-
: extractNameFromId(info.id);
|
|
97
|
-
|
|
98
|
-
models.push({
|
|
99
|
-
id: info.id,
|
|
100
|
-
name: `${cleanName} (Cline)${isFree ? "" : " 💰"}`,
|
|
101
|
-
reasoning: isReasoning,
|
|
102
|
-
input: hasImage ? ["text", "image"] : ["text"],
|
|
103
|
-
cost: {
|
|
104
|
-
input: inputCost,
|
|
105
|
-
output: outputCost,
|
|
106
|
-
cacheRead: 0,
|
|
107
|
-
cacheWrite: 0,
|
|
108
|
-
},
|
|
109
|
-
contextWindow: info.context_length ?? 128_000,
|
|
110
|
-
maxTokens: info.top_provider?.max_completion_tokens ?? 8_192,
|
|
111
|
-
});
|
|
234
|
+
// The recommended/free-to-try endpoint can lead the full catalog. Include
|
|
235
|
+
// those exact IDs so newly promoted models (e.g. alibaba/qwen3.7-plus) show up.
|
|
236
|
+
for (const model of recommendedFreeModels) {
|
|
237
|
+
if (seen.has(model.id)) continue;
|
|
238
|
+
models.push(modelFromRecommended(model));
|
|
239
|
+
seen.add(model.id);
|
|
112
240
|
}
|
|
113
241
|
|
|
114
|
-
|
|
242
|
+
const filtered = freeOnly
|
|
243
|
+
? models.filter((m) => m.cost.input === 0 && m.cost.output === 0)
|
|
244
|
+
: models;
|
|
245
|
+
|
|
246
|
+
return applyHidden(filtered, PROVIDER_CLINE);
|
|
115
247
|
}
|
|
116
248
|
|
|
117
249
|
/**
|
package/providers/cline/cline.ts
CHANGED
|
@@ -73,9 +73,9 @@ function toApiKey(credentials: OAuthCredentials): string {
|
|
|
73
73
|
// =============================================================================
|
|
74
74
|
|
|
75
75
|
const TASK_PROGRESS_BLOCK = `
|
|
76
|
-
# task_progress List (Optional
|
|
76
|
+
# task_progress List (Optional)
|
|
77
77
|
|
|
78
|
-
|
|
78
|
+
You may include a todo list using the task_progress parameter to track progress on multi-step tasks.
|
|
79
79
|
|
|
80
80
|
1. To create or update a todo list, include the task_progress parameter in the next tool call
|
|
81
81
|
2. Review each item and update its status:
|
|
@@ -100,7 +100,7 @@ function buildEnvironmentDetails(): string {
|
|
|
100
100
|
0 / 204.8K tokens used (0%)
|
|
101
101
|
|
|
102
102
|
# Current Mode
|
|
103
|
-
|
|
103
|
+
ACT MODE
|
|
104
104
|
</environmentDetails>`;
|
|
105
105
|
}
|
|
106
106
|
|
package/providers/kilo/kilo.ts
CHANGED
|
@@ -38,7 +38,7 @@ import { fetchKiloModels, KILO_GATEWAY_BASE } from "./kilo-models.ts";
|
|
|
38
38
|
const KILO_PROVIDER_CONFIG = {
|
|
39
39
|
providerId: PROVIDER_KILO,
|
|
40
40
|
baseUrl: KILO_GATEWAY_BASE,
|
|
41
|
-
apiKey: "KILO_API_KEY",
|
|
41
|
+
apiKey: "$KILO_API_KEY",
|
|
42
42
|
headers: {
|
|
43
43
|
"X-KILOCODE-EDITORNAME": "Pi",
|
|
44
44
|
},
|
|
@@ -149,7 +149,7 @@ export default async function kiloProvider(pi: ExtensionAPI) {
|
|
|
149
149
|
// Register initial provider (default to free models)
|
|
150
150
|
pi.registerProvider(PROVIDER_KILO, {
|
|
151
151
|
baseUrl: KILO_GATEWAY_BASE,
|
|
152
|
-
apiKey: "KILO_API_KEY",
|
|
152
|
+
apiKey: "$KILO_API_KEY",
|
|
153
153
|
api: "openai-completions" as const,
|
|
154
154
|
headers: {
|
|
155
155
|
"X-KILOCODE-EDITORNAME": "Pi",
|
|
@@ -24,6 +24,7 @@ interface OpenRouterCompatibleModel {
|
|
|
24
24
|
};
|
|
25
25
|
top_provider?: { max_completion_tokens?: number | null };
|
|
26
26
|
supported_parameters?: string[];
|
|
27
|
+
isFree?: boolean;
|
|
27
28
|
}
|
|
28
29
|
|
|
29
30
|
interface FetchModelsOptions {
|
|
@@ -98,8 +99,9 @@ export async function fetchOpenRouterCompatibleModels(
|
|
|
98
99
|
const outputMods = m.architecture?.output_modalities ?? [];
|
|
99
100
|
if (outputMods.includes("image")) return false;
|
|
100
101
|
|
|
101
|
-
// Filter by
|
|
102
|
+
// Filter by provider flag when available, otherwise pricing.
|
|
102
103
|
if (freeOnly) {
|
|
104
|
+
if (typeof m.isFree === "boolean") return m.isFree;
|
|
103
105
|
const prompt = Number.parseFloat(m.pricing?.prompt ?? "1");
|
|
104
106
|
const completion = Number.parseFloat(m.pricing?.completion ?? "1");
|
|
105
107
|
if (prompt !== 0 || completion !== 0) return false;
|
|
@@ -31,6 +31,14 @@ import {
|
|
|
31
31
|
URL_MODELS_DEV,
|
|
32
32
|
} from "../../constants.ts";
|
|
33
33
|
import { createLogger } from "../../lib/logger.ts";
|
|
34
|
+
import {
|
|
35
|
+
getModelsDueForProbe,
|
|
36
|
+
recordModelProbeResults,
|
|
37
|
+
} from "../../lib/probe-cache.ts";
|
|
38
|
+
import {
|
|
39
|
+
getProxyModelCompat,
|
|
40
|
+
isLikelyReasoningModel,
|
|
41
|
+
} from "../../lib/provider-compat.ts";
|
|
34
42
|
import { registerWithGlobalToggle } from "../../lib/registry.ts";
|
|
35
43
|
import type { ModelsDevModel, ModelsDevProvider } from "../../lib/types.ts";
|
|
36
44
|
import {
|
|
@@ -151,7 +159,8 @@ function inferModelFromId(id: string): ModelsDevModel | null {
|
|
|
151
159
|
.replaceAll(/\b(\d+(?:\.\d+)?)b\b/gi, "$1B");
|
|
152
160
|
|
|
153
161
|
const hasVision = /vision|multimodal|vl/i.test(id);
|
|
154
|
-
const hasReasoning =
|
|
162
|
+
const hasReasoning =
|
|
163
|
+
/reason|r1|thinking/i.test(id) || isLikelyReasoningModel({ id, name });
|
|
155
164
|
|
|
156
165
|
return {
|
|
157
166
|
id,
|
|
@@ -273,6 +282,7 @@ async function fetchNvidiaModels(
|
|
|
273
282
|
},
|
|
274
283
|
contextWindow: m.limit.context,
|
|
275
284
|
maxTokens: m.limit.output,
|
|
285
|
+
compat: getProxyModelCompat({ id: m.id, name: m.name }),
|
|
276
286
|
}),
|
|
277
287
|
),
|
|
278
288
|
PROVIDER_NVIDIA,
|
|
@@ -287,12 +297,12 @@ async function fetchNvidiaModels(
|
|
|
287
297
|
|
|
288
298
|
/**
|
|
289
299
|
* Probe a single NVIDIA model with a minimal chat request.
|
|
290
|
-
* Returns
|
|
300
|
+
* Returns "broken" only for deterministic 404s; network errors are unknown.
|
|
291
301
|
*/
|
|
292
302
|
async function probeNvidiaModel(
|
|
293
303
|
apiKey: string,
|
|
294
304
|
modelId: string,
|
|
295
|
-
): Promise<
|
|
305
|
+
): Promise<"ok" | "broken" | "unknown"> {
|
|
296
306
|
try {
|
|
297
307
|
const response = await fetchWithTimeout(
|
|
298
308
|
`${BASE_URL_NVIDIA}/chat/completions`,
|
|
@@ -313,9 +323,9 @@ async function probeNvidiaModel(
|
|
|
313
323
|
);
|
|
314
324
|
// 404 = function not found (model not provisioned)
|
|
315
325
|
// 200/400/401/etc = at least routable
|
|
316
|
-
return response.status
|
|
326
|
+
return response.status === 404 ? "broken" : "ok";
|
|
317
327
|
} catch {
|
|
318
|
-
return
|
|
328
|
+
return "unknown"; // Network errors / timeouts are not "model not found"
|
|
319
329
|
}
|
|
320
330
|
}
|
|
321
331
|
|
|
@@ -330,26 +340,51 @@ async function runNvidiaProbe(
|
|
|
330
340
|
modelsToTest: ProviderModelConfig[],
|
|
331
341
|
stored: { free: ProviderModelConfig[]; all: ProviderModelConfig[] },
|
|
332
342
|
reRegister: (models: ProviderModelConfig[]) => void,
|
|
333
|
-
|
|
343
|
+
options: { useCache?: boolean } = {},
|
|
344
|
+
): Promise<string[]> {
|
|
345
|
+
const modelIdsToProbe = options.useCache
|
|
346
|
+
? new Set(
|
|
347
|
+
getModelsDueForProbe(
|
|
348
|
+
PROVIDER_NVIDIA,
|
|
349
|
+
modelsToTest.map((m) => m.id),
|
|
350
|
+
),
|
|
351
|
+
)
|
|
352
|
+
: undefined;
|
|
353
|
+
const probeCandidates = modelIdsToProbe
|
|
354
|
+
? modelsToTest.filter((m) => modelIdsToProbe.has(m.id))
|
|
355
|
+
: modelsToTest;
|
|
356
|
+
|
|
357
|
+
if (probeCandidates.length === 0) {
|
|
358
|
+
_nvidiaLogger.info("Auto-probe: NVIDIA probe cache is fresh");
|
|
359
|
+
return [];
|
|
360
|
+
}
|
|
361
|
+
|
|
334
362
|
const notFound: string[] = [];
|
|
363
|
+
const cacheableResults: Array<{ modelId: string; status: "ok" | "broken" }> =
|
|
364
|
+
[];
|
|
335
365
|
const batchSize = 5;
|
|
336
366
|
|
|
337
|
-
for (let i = 0; i <
|
|
338
|
-
const batch =
|
|
367
|
+
for (let i = 0; i < probeCandidates.length; i += batchSize) {
|
|
368
|
+
const batch = probeCandidates.slice(i, i + batchSize);
|
|
339
369
|
const results = await Promise.all(
|
|
340
370
|
batch.map(async (m) => {
|
|
341
|
-
const
|
|
342
|
-
return { id: m.id,
|
|
371
|
+
const status = await probeNvidiaModel(apiKey, m.id);
|
|
372
|
+
return { id: m.id, status };
|
|
343
373
|
}),
|
|
344
374
|
);
|
|
345
375
|
for (const r of results) {
|
|
346
|
-
if (
|
|
376
|
+
if (r.status === "broken") notFound.push(r.id);
|
|
377
|
+
if (r.status !== "unknown") {
|
|
378
|
+
cacheableResults.push({ modelId: r.id, status: r.status });
|
|
379
|
+
}
|
|
347
380
|
}
|
|
348
381
|
}
|
|
349
382
|
|
|
383
|
+
recordModelProbeResults(PROVIDER_NVIDIA, cacheableResults);
|
|
384
|
+
|
|
350
385
|
if (notFound.length === 0) {
|
|
351
|
-
_nvidiaLogger.info("Auto-probe: all NVIDIA models are routable");
|
|
352
|
-
return;
|
|
386
|
+
_nvidiaLogger.info("Auto-probe: all checked NVIDIA models are routable");
|
|
387
|
+
return [];
|
|
353
388
|
}
|
|
354
389
|
|
|
355
390
|
// Auto-hide 404 models in config (provider-scoped)
|
|
@@ -367,6 +402,7 @@ async function runNvidiaProbe(
|
|
|
367
402
|
_nvidiaLogger.info(
|
|
368
403
|
`Auto-probe: found ${notFound.length} broken models (auto-hidden)`,
|
|
369
404
|
);
|
|
405
|
+
return notFound;
|
|
370
406
|
}
|
|
371
407
|
|
|
372
408
|
export default async function nvidiaProvider(pi: ExtensionAPI) {
|
|
@@ -391,7 +427,7 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
|
|
|
391
427
|
const reRegister = createReRegister(pi, {
|
|
392
428
|
providerId: PROVIDER_NVIDIA,
|
|
393
429
|
baseUrl: BASE_URL_NVIDIA,
|
|
394
|
-
apiKey: apiKey || "NVIDIA_API_KEY",
|
|
430
|
+
apiKey: apiKey || "$NVIDIA_API_KEY",
|
|
395
431
|
});
|
|
396
432
|
|
|
397
433
|
// Register with global toggle system
|
|
@@ -401,7 +437,7 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
|
|
|
401
437
|
const initialModels = allModels;
|
|
402
438
|
pi.registerProvider(PROVIDER_NVIDIA, {
|
|
403
439
|
baseUrl: BASE_URL_NVIDIA,
|
|
404
|
-
apiKey: apiKey || "NVIDIA_API_KEY",
|
|
440
|
+
apiKey: apiKey || "$NVIDIA_API_KEY",
|
|
405
441
|
api: "openai-completions" as const,
|
|
406
442
|
authHeader: true,
|
|
407
443
|
headers: {
|
|
@@ -416,7 +452,9 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
|
|
|
416
452
|
if (_autoProbeDone || !apiKey) return;
|
|
417
453
|
_autoProbeDone = true;
|
|
418
454
|
_nvidiaLogger.info("Starting lazy auto-probe of NVIDIA models...");
|
|
419
|
-
runNvidiaProbe(apiKey, allModels, stored, reRegister
|
|
455
|
+
runNvidiaProbe(apiKey, allModels, stored, reRegister, {
|
|
456
|
+
useCache: true,
|
|
457
|
+
}).catch((err) => {
|
|
420
458
|
_nvidiaLogger.warn("Auto-probe failed", {
|
|
421
459
|
error: err instanceof Error ? err.message : String(err),
|
|
422
460
|
});
|