pi-free 2.0.13 → 2.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +9 -5
- package/config.ts +15 -0
- package/constants.ts +3 -0
- package/index.ts +135 -0
- package/lib/built-in-toggle.ts +4 -4
- package/lib/probe-cache.ts +86 -0
- package/lib/provider-compat.ts +33 -0
- package/lib/registry.ts +25 -3
- package/lib/telemetry.ts +328 -0
- package/lib/util.ts +10 -1
- package/package.json +1 -1
- package/provider-failover/benchmark-lookup.ts +94 -8
- package/provider-failover/benchmarks-chunk-0.ts +599 -890
- package/provider-failover/benchmarks-chunk-1.ts +655 -924
- package/provider-failover/benchmarks-chunk-2.ts +675 -966
- package/provider-failover/benchmarks-chunk-3.ts +676 -967
- package/provider-failover/benchmarks-chunk-4.ts +704 -954
- package/provider-failover/benchmarks-chunk-5.ts +1301 -0
- package/provider-failover/hardcoded-benchmarks.ts +9 -3
- package/providers/cline/cline-models.ts +200 -68
- package/providers/cline/cline.ts +3 -3
- package/providers/dynamic-built-in/index.ts +1 -1
- package/providers/kilo/kilo.ts +2 -2
- package/providers/model-fetcher.ts +3 -1
- package/providers/nvidia/nvidia.ts +54 -16
- package/providers/ollama/ollama.ts +103 -46
- package/providers/opencode-session.ts +398 -371
- package/providers/qwen/qwen.ts +2 -2
- package/providers/routeway/routeway.ts +391 -0
package/lib/telemetry.ts
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model Telemetry — tracks real-world performance of free models.
|
|
3
|
+
*
|
|
4
|
+
* Hooks into Pi's turn_end event to capture token usage, latency, and
|
|
5
|
+
* success/failure per model. Persists to ~/.pi/free-telemetry.json.
|
|
6
|
+
*
|
|
7
|
+
* Provides a real-world performance signal alongside static CI benchmarks.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
|
|
11
|
+
import { homedir } from "node:os";
|
|
12
|
+
import { join } from "node:path";
|
|
13
|
+
import { createLogger } from "./logger.ts";
|
|
14
|
+
|
|
15
|
+
const _logger = createLogger("telemetry");
|
|
16
|
+
|
|
17
|
+
// =============================================================================
|
|
18
|
+
// Types
|
|
19
|
+
// =============================================================================
|
|
20
|
+
|
|
21
|
+
export interface TelemetryEntry {
|
|
22
|
+
timestamp: number;
|
|
23
|
+
provider: string;
|
|
24
|
+
model: string;
|
|
25
|
+
success: boolean;
|
|
26
|
+
latencyMs: number;
|
|
27
|
+
promptTokens: number;
|
|
28
|
+
completionTokens: number;
|
|
29
|
+
totalTokens: number;
|
|
30
|
+
tokensPerSecond: number;
|
|
31
|
+
cost: number;
|
|
32
|
+
stopReason?: string;
|
|
33
|
+
error?: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface ModelTelemetry {
|
|
37
|
+
/** Total calls tracked for this model. */
|
|
38
|
+
totalCalls: number;
|
|
39
|
+
/** Successful calls. */
|
|
40
|
+
successCalls: number;
|
|
41
|
+
/** Failed calls. */
|
|
42
|
+
errorCalls: number;
|
|
43
|
+
/** Total tokens consumed (input + output). */
|
|
44
|
+
totalTokens: number;
|
|
45
|
+
/** Total prompt (input) tokens. */
|
|
46
|
+
totalPromptTokens: number;
|
|
47
|
+
/** Total completion (output) tokens. */
|
|
48
|
+
totalCompletionTokens: number;
|
|
49
|
+
/** Sum of all latencies in ms (for avg calculation). */
|
|
50
|
+
totalLatencyMs: number;
|
|
51
|
+
/** Sum of all costs. */
|
|
52
|
+
totalCost: number;
|
|
53
|
+
|
|
54
|
+
// Derived (computed on read)
|
|
55
|
+
avgLatencyMs: number;
|
|
56
|
+
avgTokensPerSecond: number;
|
|
57
|
+
successRate: number;
|
|
58
|
+
|
|
59
|
+
/** Recent calls (last 50). */
|
|
60
|
+
recentCalls: TelemetryEntry[];
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface TelemetryStore {
|
|
64
|
+
/** Keyed by "provider/model" */
|
|
65
|
+
models: Record<string, ModelTelemetry>;
|
|
66
|
+
/** When the store was last updated. */
|
|
67
|
+
lastUpdated: number;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// =============================================================================
|
|
71
|
+
// Constants
|
|
72
|
+
// =============================================================================
|
|
73
|
+
|
|
74
|
+
const TELEMETRY_DIR = join(homedir(), ".pi");
|
|
75
|
+
const TELEMETRY_FILE = join(TELEMETRY_DIR, "free-telemetry.json");
|
|
76
|
+
const MAX_RECENT_CALLS = 50;
|
|
77
|
+
|
|
78
|
+
// In-flight tracking: keyed by "provider/model", value is start timestamp
|
|
79
|
+
const _inFlight = new Map<string, number>();
|
|
80
|
+
|
|
81
|
+
// =============================================================================
|
|
82
|
+
// Storage
|
|
83
|
+
// =============================================================================
|
|
84
|
+
|
|
85
|
+
function ensureDir(): void {
|
|
86
|
+
if (!existsSync(TELEMETRY_DIR)) {
|
|
87
|
+
mkdirSync(TELEMETRY_DIR, { recursive: true });
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function loadStore(): TelemetryStore {
|
|
92
|
+
try {
|
|
93
|
+
if (!existsSync(TELEMETRY_FILE)) {
|
|
94
|
+
return { models: {}, lastUpdated: Date.now() };
|
|
95
|
+
}
|
|
96
|
+
const raw = readFileSync(TELEMETRY_FILE, "utf-8");
|
|
97
|
+
return JSON.parse(raw) as TelemetryStore;
|
|
98
|
+
} catch (err) {
|
|
99
|
+
_logger.warn("Failed to load telemetry store, resetting", {
|
|
100
|
+
error: String(err),
|
|
101
|
+
});
|
|
102
|
+
return { models: {}, lastUpdated: Date.now() };
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function saveStore(store: TelemetryStore): void {
|
|
107
|
+
try {
|
|
108
|
+
ensureDir();
|
|
109
|
+
store.lastUpdated = Date.now();
|
|
110
|
+
writeFileSync(TELEMETRY_FILE, JSON.stringify(store, null, 2), "utf-8");
|
|
111
|
+
} catch (err) {
|
|
112
|
+
_logger.warn("Failed to save telemetry store", {
|
|
113
|
+
error: String(err),
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// =============================================================================
|
|
119
|
+
// Entry management
|
|
120
|
+
// =============================================================================
|
|
121
|
+
|
|
122
|
+
function deriveModelTelemetry(modelKey: string, entries: TelemetryEntry[]): ModelTelemetry {
|
|
123
|
+
const recent = entries.slice(-MAX_RECENT_CALLS);
|
|
124
|
+
const totalCalls = entries.length;
|
|
125
|
+
const successCalls = entries.filter((e) => e.success).length;
|
|
126
|
+
const errorCalls = totalCalls - successCalls;
|
|
127
|
+
|
|
128
|
+
const stats = entries.reduce(
|
|
129
|
+
(acc, e) => {
|
|
130
|
+
acc.totalTokens += e.totalTokens;
|
|
131
|
+
acc.totalPromptTokens += e.promptTokens;
|
|
132
|
+
acc.totalCompletionTokens += e.completionTokens;
|
|
133
|
+
acc.totalLatencyMs += e.latencyMs;
|
|
134
|
+
acc.totalCost += e.cost;
|
|
135
|
+
return acc;
|
|
136
|
+
},
|
|
137
|
+
{ totalTokens: 0, totalPromptTokens: 0, totalCompletionTokens: 0, totalLatencyMs: 0, totalCost: 0 },
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
const totalSuccessEntries = entries.filter((e) => e.success);
|
|
141
|
+
const totalTokensFromSuccessful = totalSuccessEntries.reduce((s, e) => s + e.totalTokens, 0);
|
|
142
|
+
const totalLatencyFromSuccessful = totalSuccessEntries.reduce((s, e) => s + e.latencyMs, 0);
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
totalCalls,
|
|
146
|
+
successCalls,
|
|
147
|
+
errorCalls,
|
|
148
|
+
totalTokens: stats.totalTokens,
|
|
149
|
+
totalPromptTokens: stats.totalPromptTokens,
|
|
150
|
+
totalCompletionTokens: stats.totalCompletionTokens,
|
|
151
|
+
totalLatencyMs: stats.totalLatencyMs,
|
|
152
|
+
totalCost: stats.totalCost,
|
|
153
|
+
avgLatencyMs: totalSuccessEntries.length > 0
|
|
154
|
+
? Math.round(totalLatencyFromSuccessful / totalSuccessEntries.length)
|
|
155
|
+
: 0,
|
|
156
|
+
avgTokensPerSecond: totalLatencyFromSuccessful > 0
|
|
157
|
+
? parseFloat((totalTokensFromSuccessful / (totalLatencyFromSuccessful / 1000)).toFixed(1))
|
|
158
|
+
: 0,
|
|
159
|
+
successRate: totalCalls > 0
|
|
160
|
+
? parseFloat((successCalls / totalCalls * 100).toFixed(1))
|
|
161
|
+
: 0,
|
|
162
|
+
recentCalls: recent,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function addEntry(entry: TelemetryEntry): void {
|
|
167
|
+
const store = loadStore();
|
|
168
|
+
const modelKey = `${entry.provider}/${entry.model}`;
|
|
169
|
+
|
|
170
|
+
const existing: TelemetryEntry[] = store.models[modelKey]?.recentCalls ?? [];
|
|
171
|
+
existing.push(entry);
|
|
172
|
+
|
|
173
|
+
// Keep only last MAX_RECENT_CALLS * 2 in raw storage (we derive stats from these)
|
|
174
|
+
const pruned = existing.slice(-MAX_RECENT_CALLS * 2);
|
|
175
|
+
|
|
176
|
+
store.models[modelKey] = deriveModelTelemetry(modelKey, pruned);
|
|
177
|
+
saveStore(store);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// =============================================================================
|
|
181
|
+
// Public API
|
|
182
|
+
// =============================================================================
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Get telemetry for all tracked models.
|
|
186
|
+
*/
|
|
187
|
+
export function getAllTelemetry(): Record<string, ModelTelemetry> {
|
|
188
|
+
const store = loadStore();
|
|
189
|
+
return store.models;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Get telemetry for a specific provider/model combination.
|
|
194
|
+
*/
|
|
195
|
+
export function getModelTelemetry(provider: string, model: string): ModelTelemetry | null {
|
|
196
|
+
const store = loadStore();
|
|
197
|
+
return store.models[`${provider}/${model}`] ?? null;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Format a model's telemetry as a human-readable string (for status bar / /model list).
|
|
202
|
+
* Returns undefined if no telemetry data is available.
|
|
203
|
+
*/
|
|
204
|
+
export function formatModelTelemetry(provider: string, model: string): string | undefined {
|
|
205
|
+
const telemetry = getModelTelemetry(provider, model);
|
|
206
|
+
if (!telemetry || telemetry.totalCalls === 0) return undefined;
|
|
207
|
+
|
|
208
|
+
const parts: string[] = [];
|
|
209
|
+
if (telemetry.totalCalls > 0) {
|
|
210
|
+
parts.push(`${telemetry.totalCalls} calls`);
|
|
211
|
+
}
|
|
212
|
+
if (telemetry.successRate > 0) {
|
|
213
|
+
parts.push(`${telemetry.successRate}% ok`);
|
|
214
|
+
}
|
|
215
|
+
if (telemetry.avgLatencyMs > 0) {
|
|
216
|
+
parts.push(`${telemetry.avgLatencyMs}ms`);
|
|
217
|
+
}
|
|
218
|
+
if (telemetry.avgTokensPerSecond > 0) {
|
|
219
|
+
parts.push(`${telemetry.avgTokensPerSecond} tok/s`);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return parts.length > 0 ? parts.join(" · ") : undefined;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Get telemetry summary for a provider (all models combined).
|
|
227
|
+
*/
|
|
228
|
+
export function getProviderTelemetry(provider: string): {
|
|
229
|
+
totalCalls: number;
|
|
230
|
+
totalCost: number;
|
|
231
|
+
models: number;
|
|
232
|
+
} {
|
|
233
|
+
const store = loadStore();
|
|
234
|
+
let totalCalls = 0;
|
|
235
|
+
let totalCost = 0;
|
|
236
|
+
let models = 0;
|
|
237
|
+
|
|
238
|
+
for (const [key, data] of Object.entries(store.models)) {
|
|
239
|
+
if (key.startsWith(`${provider}/`)) {
|
|
240
|
+
totalCalls += data.totalCalls;
|
|
241
|
+
totalCost += data.totalCost;
|
|
242
|
+
models++;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return { totalCalls, totalCost, models };
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Mark a model call as started (records the start timestamp).
|
|
251
|
+
* Call this from before_agent_start or model_select.
|
|
252
|
+
*/
|
|
253
|
+
export function startModelCall(provider: string, model: string): void {
|
|
254
|
+
const key = `${provider}/${model}`;
|
|
255
|
+
_inFlight.set(key, Date.now());
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Record a completed model call with its usage data.
|
|
260
|
+
* Call this from turn_end when the message is an AssistantMessage.
|
|
261
|
+
*
|
|
262
|
+
* @param provider - The provider ID
|
|
263
|
+
* @param model - The model ID
|
|
264
|
+
* @param usage - Token usage { input, output, totalTokens }
|
|
265
|
+
* @param cost - Cost in USD
|
|
266
|
+
* @param success - Whether the call succeeded
|
|
267
|
+
* @param stopReason - The stop reason (e.g. "stop", "error")
|
|
268
|
+
* @param errorMessage - Error message if failed
|
|
269
|
+
*/
|
|
270
|
+
export function recordModelCall(
|
|
271
|
+
provider: string,
|
|
272
|
+
model: string,
|
|
273
|
+
usage: { input: number; output: number; totalTokens: number },
|
|
274
|
+
cost: number,
|
|
275
|
+
success: boolean,
|
|
276
|
+
stopReason?: string,
|
|
277
|
+
errorMessage?: string,
|
|
278
|
+
): void {
|
|
279
|
+
const key = `${provider}/${model}`;
|
|
280
|
+
const startTime = _inFlight.get(key) ?? Date.now();
|
|
281
|
+
const latencyMs = Date.now() - startTime;
|
|
282
|
+
_inFlight.delete(key);
|
|
283
|
+
|
|
284
|
+
const totalTokens = usage.totalTokens || usage.input + usage.output;
|
|
285
|
+
const tokensPerSecond = latencyMs > 0
|
|
286
|
+
? parseFloat((totalTokens / (latencyMs / 1000)).toFixed(1))
|
|
287
|
+
: 0;
|
|
288
|
+
|
|
289
|
+
const entry: TelemetryEntry = {
|
|
290
|
+
timestamp: Date.now(),
|
|
291
|
+
provider,
|
|
292
|
+
model,
|
|
293
|
+
success,
|
|
294
|
+
latencyMs,
|
|
295
|
+
promptTokens: usage.input,
|
|
296
|
+
completionTokens: usage.output,
|
|
297
|
+
totalTokens,
|
|
298
|
+
tokensPerSecond,
|
|
299
|
+
cost,
|
|
300
|
+
stopReason,
|
|
301
|
+
...(errorMessage ? { error: errorMessage } : {}),
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
addEntry(entry);
|
|
305
|
+
|
|
306
|
+
_logger.info(`Telemetry: ${provider}/${model}`, {
|
|
307
|
+
latencyMs,
|
|
308
|
+
totalTokens,
|
|
309
|
+
tokensPerSecond,
|
|
310
|
+
success,
|
|
311
|
+
cost,
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Clear all telemetry data.
|
|
317
|
+
*/
|
|
318
|
+
export function clearTelemetry(): void {
|
|
319
|
+
const store: TelemetryStore = { models: {}, lastUpdated: Date.now() };
|
|
320
|
+
saveStore(store);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Get the path to the telemetry file.
|
|
325
|
+
*/
|
|
326
|
+
export function getTelemetryPath(): string {
|
|
327
|
+
return TELEMETRY_FILE;
|
|
328
|
+
}
|
package/lib/util.ts
CHANGED
|
@@ -341,6 +341,7 @@ export function mapOpenRouterModel(m: {
|
|
|
341
341
|
input_modalities?: string[] | null;
|
|
342
342
|
output_modalities?: string[] | null;
|
|
343
343
|
};
|
|
344
|
+
isFree?: boolean;
|
|
344
345
|
}): ProviderModelConfig {
|
|
345
346
|
const promptPrice = Number.parseFloat(m.pricing?.prompt ?? "0");
|
|
346
347
|
const completionPrice = Number.parseFloat(m.pricing?.completion ?? "0");
|
|
@@ -362,7 +363,15 @@ export function mapOpenRouterModel(m: {
|
|
|
362
363
|
maxTokens:
|
|
363
364
|
m.max_completion_tokens ?? m.top_provider?.max_completion_tokens ?? 4096,
|
|
364
365
|
_pricingKnown: true,
|
|
365
|
-
|
|
366
|
+
...(typeof m.isFree === "boolean" && {
|
|
367
|
+
_freeKnown: true,
|
|
368
|
+
_isFree: m.isFree,
|
|
369
|
+
}),
|
|
370
|
+
} as ProviderModelConfig & {
|
|
371
|
+
_pricingKnown?: boolean;
|
|
372
|
+
_freeKnown?: boolean;
|
|
373
|
+
_isFree?: boolean;
|
|
374
|
+
};
|
|
366
375
|
}
|
|
367
376
|
|
|
368
377
|
// =============================================================================
|
package/package.json
CHANGED
|
@@ -225,6 +225,8 @@ function stripCommonSuffixes(ctx: {
|
|
|
225
225
|
/-bf\d+$/g, // -bf16
|
|
226
226
|
/-preview$/g, // -preview
|
|
227
227
|
/-exp$/g, // -exp (experimental)
|
|
228
|
+
/-turbo$/g, // -turbo (Together AI suffix)
|
|
229
|
+
/-instant$/g, // -instant (Groq suffix for fast-response models)
|
|
228
230
|
/-instruct-0\.\d+$/g, // HuggingFace revision tags
|
|
229
231
|
];
|
|
230
232
|
for (const pattern of suffixesToStrip) {
|
|
@@ -248,8 +250,28 @@ function applyProviderNormalization(
|
|
|
248
250
|
|
|
249
251
|
if (provider === "nvidia") normalizeNvidia(ctx);
|
|
250
252
|
if (provider === "cloudflare") normalizeCloudflare(ctx);
|
|
253
|
+
// Strip generic org/ prefix (e.g., "google/", "mistralai/") before everything
|
|
254
|
+
const stripped = ctx.normalized.replace(/^[^/]+\//, "");
|
|
255
|
+
if (stripped !== ctx.normalized) {
|
|
256
|
+
ctx.normalized = stripped;
|
|
257
|
+
ctx.strategies.push("strip-org-prefix");
|
|
258
|
+
}
|
|
259
|
+
|
|
251
260
|
normalizeFreeSuffix(ctx);
|
|
252
|
-
|
|
261
|
+
// Also strip -free suffix (used by ZenMux, etc.)
|
|
262
|
+
if (ctx.normalized.endsWith("-free")) {
|
|
263
|
+
ctx.normalized = ctx.normalized.replaceAll(/-free$/g, "");
|
|
264
|
+
ctx.strategies.push("strip-free-suffix");
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// General normalization: convert llamaN → llama-N (e.g., llama3-70b → llama-3-70b)
|
|
268
|
+
if (/^llama\d/.test(ctx.normalized)) {
|
|
269
|
+
ctx.normalized = ctx.normalized.replaceAll(/^llama(\d)/g, "llama-$1");
|
|
270
|
+
ctx.strategies.push("llama-dash-general");
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
if (provider === "ollama" || provider === "ollama-cloud")
|
|
274
|
+
normalizeOllama(ctx);
|
|
253
275
|
if (provider === "groq") normalizeGroq(ctx);
|
|
254
276
|
if (provider === "cerebras") normalizeCerebras(ctx);
|
|
255
277
|
if (provider === "mistral") normalizeMistral(ctx);
|
|
@@ -281,6 +303,8 @@ const VARIANT_QUALIFIER_SEGMENTS = new Set([
|
|
|
281
303
|
"preview",
|
|
282
304
|
"adaptive",
|
|
283
305
|
"fast",
|
|
306
|
+
"instruct",
|
|
307
|
+
"chat",
|
|
284
308
|
]);
|
|
285
309
|
|
|
286
310
|
/**
|
|
@@ -301,7 +325,13 @@ function isVariantQualifier(segment: string): boolean {
|
|
|
301
325
|
// Two-digit year like "25", "24"
|
|
302
326
|
if (/^\d{2}$/.test(segment)) return true;
|
|
303
327
|
// Special variant suffixes
|
|
304
|
-
if (
|
|
328
|
+
if (
|
|
329
|
+
segment === "speciale" ||
|
|
330
|
+
segment === "chatgpt" ||
|
|
331
|
+
segment === "latest" ||
|
|
332
|
+
segment === "instruct" ||
|
|
333
|
+
segment === "chat"
|
|
334
|
+
)
|
|
305
335
|
return true;
|
|
306
336
|
return false;
|
|
307
337
|
}
|
|
@@ -397,12 +427,10 @@ function findBestVariantByPrefix(
|
|
|
397
427
|
if (candidates.length === 0) return null;
|
|
398
428
|
|
|
399
429
|
// Pick the candidate with the highest codingIndex
|
|
400
|
-
// If tied or no CI, use normalizedScore as tiebreaker
|
|
401
430
|
candidates.sort((a, b) => {
|
|
402
431
|
const ciA = a.data.codingIndex ?? -1;
|
|
403
432
|
const ciB = b.data.codingIndex ?? -1;
|
|
404
|
-
|
|
405
|
-
return (b.data.normalizedScore ?? 0) - (a.data.normalizedScore ?? 0);
|
|
433
|
+
return ciB - ciA;
|
|
406
434
|
});
|
|
407
435
|
|
|
408
436
|
// Only return if the best candidate has a codingIndex
|
|
@@ -438,7 +466,12 @@ const MODEL_VARIANTS: Record<string, string[]> = {
|
|
|
438
466
|
],
|
|
439
467
|
"claude-3-opus": ["claude-3-opus", "opus-3"],
|
|
440
468
|
"llama-3.1-instruct-405b": ["llama-3.1-405b", "llama3.1-405b", "llama-405b"],
|
|
441
|
-
"llama-3.1-instruct-70b": [
|
|
469
|
+
"llama-3.1-instruct-70b": [
|
|
470
|
+
"llama-3.1-70b",
|
|
471
|
+
"llama3.1-70b",
|
|
472
|
+
"llama-70b",
|
|
473
|
+
"llama-3.1-70b-versatile",
|
|
474
|
+
],
|
|
442
475
|
"gemini-1.5-pro": ["gemini-1.5-pro", "gemini1.5-pro", "gemini-pro-1.5"],
|
|
443
476
|
"qwen2.5-instruct-72b": ["qwen2.5-72b", "qwen-2.5-72b"],
|
|
444
477
|
"deepseek-v3.2-non-reasoning": ["deepseek-v3", "deepseekv3", "deepseek-chat"],
|
|
@@ -453,6 +486,58 @@ const MODEL_VARIANTS: Record<string, string[]> = {
|
|
|
453
486
|
"nemotron-super",
|
|
454
487
|
"nemotron-3",
|
|
455
488
|
],
|
|
489
|
+
"glm-4.6v-non-reasoning": [
|
|
490
|
+
"glm-4.6v",
|
|
491
|
+
"glm-4.6v-flash",
|
|
492
|
+
"glm-4.6v-flash-free",
|
|
493
|
+
],
|
|
494
|
+
"glm-4.7-flash-non-reasoning": ["glm-4.7-flash", "glm-4.7-flash-free"],
|
|
495
|
+
"mistral-small-4-non-reasoning": [
|
|
496
|
+
"mistral-small-24b",
|
|
497
|
+
"mistral-small-24b-instruct",
|
|
498
|
+
"mistral-small-24b-2501",
|
|
499
|
+
],
|
|
500
|
+
"qwen2.5-coder-instruct-7b": ["qwen2.5-7b", "qwen2.5-7b-instruct"],
|
|
501
|
+
"llama-3.2-instruct-3b": ["llama-3.2-3b", "llama-3.2-3b-instruct"],
|
|
502
|
+
"llama-3.2-instruct-1b": [
|
|
503
|
+
"llama-3.2-1b",
|
|
504
|
+
"llama-3.2-1b-instruct",
|
|
505
|
+
"llama3.2-1b",
|
|
506
|
+
],
|
|
507
|
+
|
|
508
|
+
// --- Claude 4 series (providers use date-stamped IDs like claude-sonnet-4-20250514) ---
|
|
509
|
+
// Order matters: more specific aliases first to avoid false prefix matches
|
|
510
|
+
"claude-4.5-sonnet-reasoning": [
|
|
511
|
+
"claude-sonnet-4.5",
|
|
512
|
+
"claude-sonnet-4.5-20250601",
|
|
513
|
+
],
|
|
514
|
+
"claude-4-sonnet-reasoning": ["claude-sonnet-4", "claude-sonnet-4-20250514"],
|
|
515
|
+
"claude-4-opus-reasoning": ["claude-opus-4", "claude-opus-4-20250514"],
|
|
516
|
+
|
|
517
|
+
// --- Qwen Max → Qwen3 Max ---
|
|
518
|
+
"qwen3-max": ["qwen-max", "qwen/qwen-max"],
|
|
519
|
+
|
|
520
|
+
// --- Mistral Large 2411 → Mistral Large 2 (Nov '24) ---
|
|
521
|
+
"mistral-large-2-nov-24": [
|
|
522
|
+
"mistral-large-2411",
|
|
523
|
+
"mistralai/mistral-large-2411",
|
|
524
|
+
],
|
|
525
|
+
|
|
526
|
+
// --- Groq-specific variants (versatile suffix, numeric context suffixes) ---
|
|
527
|
+
"llama-3.3-instruct-70b": [
|
|
528
|
+
"llama-3.3-70b-versatile",
|
|
529
|
+
"llama3.3-70b",
|
|
530
|
+
"llama-3.3-70b",
|
|
531
|
+
],
|
|
532
|
+
"llama-3-instruct-70b": ["llama3-70b-8192", "llama3-70b"],
|
|
533
|
+
"llama-3-instruct-8b": ["llama3-8b-8192", "llama3-8b"],
|
|
534
|
+
"llama-3.1-instruct-8b": [
|
|
535
|
+
"llama3.1-8b",
|
|
536
|
+
"llama-3.1-8b",
|
|
537
|
+
"llama-3.1-8b-instant",
|
|
538
|
+
"llama3.1-8b-instruct",
|
|
539
|
+
],
|
|
540
|
+
"mistral-small-3.1": ["mistral-small-2501", "mistral-small-24b-2501"],
|
|
456
541
|
};
|
|
457
542
|
|
|
458
543
|
// =============================================================================
|
|
@@ -593,7 +678,8 @@ export function findHardcodedBenchmark(
|
|
|
593
678
|
modelId: string,
|
|
594
679
|
provider?: string,
|
|
595
680
|
): HardcodedBenchmark | null {
|
|
596
|
-
|
|
681
|
+
// Normalize: convert colons to dashes (Ollama model:tag format)
|
|
682
|
+
const search = `${modelName} ${modelId}`.toLowerCase().replaceAll(":", "-");
|
|
597
683
|
|
|
598
684
|
logDebug({ provider, modelId, modelName, action: "attempt" });
|
|
599
685
|
|
|
@@ -640,7 +726,7 @@ export function getHardcodedScore(
|
|
|
640
726
|
provider?: string,
|
|
641
727
|
): number | null {
|
|
642
728
|
const benchmark = findHardcodedBenchmark(modelName, modelId, provider);
|
|
643
|
-
return benchmark?.
|
|
729
|
+
return benchmark?.codingIndex ?? null;
|
|
644
730
|
}
|
|
645
731
|
|
|
646
732
|
/**
|