pi-free 2.0.6 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +540 -421
  2. package/README.md +572 -495
  3. package/config.ts +58 -11
  4. package/constants.ts +12 -0
  5. package/index.ts +66 -2
  6. package/lib/model-detection.ts +1 -0
  7. package/lib/model-enhancer.ts +20 -20
  8. package/lib/open-browser.ts +1 -1
  9. package/lib/quota-monitor.ts +123 -0
  10. package/lib/types.ts +101 -101
  11. package/lib/util.ts +460 -351
  12. package/package.json +68 -68
  13. package/provider-failover/benchmark-lookup.ts +743 -702
  14. package/provider-failover/benchmarks-chunk-0.ts +48 -48
  15. package/provider-failover/benchmarks-chunk-1.ts +44 -44
  16. package/provider-failover/benchmarks-chunk-2.ts +39 -39
  17. package/provider-failover/benchmarks-chunk-3.ts +41 -41
  18. package/provider-failover/benchmarks-chunk-4.ts +33 -33
  19. package/providers/cline/cline-auth.ts +473 -473
  20. package/providers/cline/cline-models.ts +2 -2
  21. package/providers/cline/cline.ts +1 -1
  22. package/providers/codestral/codestral.ts +139 -0
  23. package/providers/crofai/crofai.ts +14 -85
  24. package/providers/deepinfra/deepinfra.ts +109 -0
  25. package/providers/kilo/kilo-auth.ts +155 -155
  26. package/providers/kilo/kilo.ts +1 -1
  27. package/providers/llm7/llm7.ts +156 -0
  28. package/providers/model-fetcher.ts +2 -2
  29. package/providers/nvidia/nvidia.ts +4 -4
  30. package/providers/ollama/ollama.ts +1 -1
  31. package/providers/opencode-session.ts +1 -1
  32. package/providers/qwen/qwen-models.ts +101 -101
  33. package/providers/qwen/qwen.ts +1 -1
  34. package/providers/sambanova/sambanova.ts +109 -0
  35. package/providers/zenmux/zenmux.ts +5 -2
  36. package/scripts/check-extensions.mjs +6 -4
@@ -1,702 +1,743 @@
1
- /**
2
- * Benchmark lookup logic — extracted from hardcoded-benchmarks.ts
3
- * for maintainability (the data file is ~10k lines of JSON-like entries).
4
- *
5
- * This module re-exports everything consumers currently import from
6
- * hardcoded-benchmarks, so you can switch imports to this file without
7
- * breaking anything.
8
- *
9
- * ENHANCED: Added debug logging and provider-specific normalizers
10
- */
11
-
12
- import { appendFileSync, existsSync, mkdirSync, writeFileSync } from "node:fs";
13
- import { homedir } from "node:os";
14
- import { join } from "node:path";
15
- import {
16
- HARDCODED_BENCHMARKS,
17
- type HardcodedBenchmark,
18
- } from "./hardcoded-benchmarks.ts";
19
-
20
- // Re-export the type and data so callers can migrate imports here
21
- export { HARDCODED_BENCHMARKS, type HardcodedBenchmark };
22
-
23
- // =============================================================================
24
- // Debug Logging
25
- // =============================================================================
26
-
27
- const LOG_DIR = join(homedir(), ".pi");
28
- const LOG_FILE = join(LOG_DIR, "modelmatch.log");
29
- let debugEnabled = true;
30
-
31
- /**
32
- * Enable/disable debug logging
33
- */
34
- export function setDebugLogging(enabled: boolean): void {
35
- debugEnabled = enabled;
36
- }
37
-
38
- /**
39
- * Log a message to the modelmatch.log file
40
- */
41
- function logDebug(entry: {
42
- provider?: string;
43
- modelId: string;
44
- modelName: string;
45
- action: "attempt" | "match" | "miss" | "normalized";
46
- strategy?: string;
47
- normalizedId?: string;
48
- matchKey?: string;
49
- codingIndex?: number;
50
- details?: string;
51
- }): void {
52
- if (!debugEnabled) return;
53
-
54
- try {
55
- // Ensure log directory exists
56
- if (!existsSync(LOG_DIR)) {
57
- mkdirSync(LOG_DIR, { recursive: true });
58
- }
59
-
60
- // Initialize log file with header if it doesn't exist
61
- if (!existsSync(LOG_FILE)) {
62
- writeFileSync(
63
- LOG_FILE,
64
- "timestamp|provider|modelId|modelName|action|strategy|normalizedId|matchKey|codingIndex|details\n",
65
- );
66
- }
67
-
68
- const timestamp = new Date().toISOString();
69
- const line = [
70
- timestamp,
71
- entry.provider || "unknown",
72
- entry.modelId,
73
- entry.modelName,
74
- entry.action,
75
- entry.strategy || "",
76
- entry.normalizedId || "",
77
- entry.matchKey || "",
78
- entry.codingIndex !== undefined ? entry.codingIndex.toFixed(1) : "",
79
- entry.details || "",
80
- ]
81
- .map((f) => f.replace(/[\\|]/g, "\\$&")) // Escape backslashes and pipes
82
- .join("|");
83
-
84
- appendFileSync(LOG_FILE, `${line}\n`);
85
- } catch {
86
- // Silently fail - don't break functionality for logging issues
87
- }
88
- }
89
-
90
- /**
91
- * Get the path to the log file for user reference
92
- */
93
- export function getMatchLogPath(): string {
94
- return LOG_FILE;
95
- }
96
-
97
- /**
98
- * Clear the match log
99
- */
100
- export function clearMatchLog(): void {
101
- try {
102
- if (existsSync(LOG_FILE)) {
103
- writeFileSync(
104
- LOG_FILE,
105
- "timestamp|provider|modelId|modelName|action|strategy|normalizedId|matchKey|codingIndex|details\n",
106
- );
107
- }
108
- } catch {
109
- // Ignore errors
110
- }
111
- }
112
-
113
- // =============================================================================
114
- // Provider-Specific Normalizers
115
- // =============================================================================
116
-
117
- /**
118
- * Apply provider-specific ID normalization to handle naming conventions
119
- */
120
- function applyProviderNormalization(
121
- modelId: string,
122
- provider?: string,
123
- ): { normalized: string; strategy: string } {
124
- let normalized = modelId.toLowerCase();
125
- const strategies: string[] = [];
126
-
127
- // Provider-specific prefix stripping
128
- if (provider === "nvidia") {
129
- // NVIDIA uses prefixes like meta/, mistralai/, microsoft/, qwen/
130
- const prefixMatch = normalized.match(
131
- /^(meta|mistralai|microsoft|qwen|nvidia|ibm|google|ai21labs|bigcode|databricks|deepseek-ai|01-ai|adept|aisingapore|baai|bytedance|luma|stabilityai|fireworks|upstage|voyage|snowflake|recursal|kdan|unity|cloudflare|fblgit|nttdata|dito|nousresearch|espressomodels|ftmsh|huggingface|isolationai|pinglab|functionnetwork|huggingfaceh4|mcw|shutterstock)[^/]*\//,
132
- );
133
- if (prefixMatch) {
134
- normalized = normalized.replace(/^[^/]+\//, "");
135
- strategies.push("strip-nvidia-prefix");
136
- }
137
- }
138
-
139
- if (provider === "cloudflare") {
140
- // Cloudflare uses @cf/namespace/model format
141
- if (normalized.startsWith("@cf/")) {
142
- normalized = normalized.replace(/^@cf\/[^/]+\//, "");
143
- strategies.push("strip-cf-namespace");
144
- }
145
- }
146
-
147
- // Provider-agnostic normalization
148
- // Strip :free suffix (common in OpenRouter)
149
- if (normalized.includes(":free")) {
150
- normalized = normalized.replace(/:free$/, "");
151
- strategies.push("strip-free-suffix");
152
- }
153
-
154
- // Handle Ollama format (model:tag)
155
- if (provider === "ollama" && normalized.includes(":")) {
156
- normalized = normalized.replace(/:/g, "-");
157
- strategies.push("ollama-colon-to-dash");
158
- }
159
-
160
- // Handle Groq suffixes
161
- if (provider === "groq") {
162
- if (/-\d+$/.test(normalized)) {
163
- // Strip numeric suffixes like -32768, -131072
164
- normalized = normalized.replace(/-\d+$/, "");
165
- strategies.push("strip-groq-numeric-suffix");
166
- }
167
- if (normalized.includes("-versatile")) {
168
- normalized = normalized.replace(/-versatile$/, "");
169
- strategies.push("strip-groq-versatile");
170
- }
171
- }
172
-
173
- // Handle Cerebras format (llama3.1-8b -> llama-3.1-8b)
174
- if (provider === "cerebras") {
175
- if (/^llama\d/.test(normalized)) {
176
- normalized = normalized.replace(/^llama(\d)/, "llama-$1");
177
- strategies.push("cerebras-llama-dash");
178
- }
179
- // Add instruct if missing for llama models
180
- if (
181
- /^llama-[\d.]+-\d+b$/.test(normalized) &&
182
- !normalized.includes("instruct")
183
- ) {
184
- normalized = normalized.replace(/^(llama-[\d.]+-\d+b)/, "$1-instruct");
185
- strategies.push("add-instruct-suffix");
186
- }
187
- }
188
-
189
- // Handle Mistral -latest suffix
190
- if (provider === "mistral" && normalized.includes("-latest")) {
191
- normalized = normalized.replace(/-latest$/, "");
192
- strategies.push("strip-mistral-latest");
193
- }
194
-
195
- // Strip common suffixes that aren't in benchmark keys
196
- const suffixesToStrip = [
197
- /-\d{8}$/, // Date suffixes like -20250514
198
- /-v\d+(\.\d+)?$/, // Version suffixes like -v1.1
199
- /-\d{3,}$/, // Numeric suffixes like -001, -2603
200
- /-it$/, // -it (Gemma convention)
201
- /-fp\d+$/, // -fp8, -fp16
202
- /-bf\d+$/, // -bf16
203
- /-preview$/, // -preview
204
- /-exp$/, // -exp (experimental)
205
- /-instruct-0\.\d+$/, // HuggingFace revision tags
206
- ];
207
-
208
- for (const pattern of suffixesToStrip) {
209
- if (pattern.test(normalized)) {
210
- normalized = normalized.replace(pattern, "");
211
- strategies.push(
212
- `strip-${pattern.source.replace(/[\\^$.*+?()[\]{}|]/g, "").slice(0, 10)}`,
213
- );
214
- }
215
- }
216
-
217
- return {
218
- normalized,
219
- strategy: strategies.join(","),
220
- };
221
- }
222
-
223
- // =============================================================================
224
- // Prefix fallback helpers
225
- // =============================================================================
226
-
227
- /**
228
- * Segments that indicate a variant of the same base model
229
- * (effort level, reasoning mode, date, preview) — NOT a fundamentally different model.
230
- * Used to filter prefix matches so we don't cross model boundaries
231
- * (e.g. gpt-4o → gpt-4o-mini is wrong, but gpt-4o → gpt-4o-aug-24 is fine).
232
- */
233
- const VARIANT_QUALIFIER_SEGMENTS = new Set([
234
- "reasoning",
235
- "non-reasoning",
236
- "high",
237
- "low",
238
- "medium",
239
- "xhigh",
240
- "preview",
241
- "adaptive",
242
- "fast",
243
- ]);
244
-
245
- /**
246
- * Check if a segment is a variant qualifier rather than a different model identifier.
247
- * Accepts effort levels, reasoning modes, date codes, size specifiers, and version numbers.
248
- */
249
- function isVariantQualifier(segment: string): boolean {
250
- if (VARIANT_QUALIFIER_SEGMENTS.has(segment)) return true;
251
- // Date codes like "0528", "20250514"
252
- if (/^\d{4,8}$/.test(segment)) return true;
253
- // Month names (from date suffixes like "may-25", "mar-24")
254
- if (/^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)$/.test(segment))
255
- return true;
256
- // Size specifiers like "70b", "8b", "a35b", "a3b" (MoE notation)
257
- if (/^a?\d+(\.\d+)?b$/i.test(segment)) return true;
258
- // Version numbers like "v3.2", "v2.5", "v1"
259
- if (/^v\d+(\.\d+)?$/.test(segment)) return true;
260
- // Two-digit year like "25", "24"
261
- if (/^\d{2}$/.test(segment)) return true;
262
- // Special variant suffixes
263
- if (segment === "speciale" || segment === "chatgpt" || segment === "latest")
264
- return true;
265
- return false;
266
- }
267
-
268
- /**
269
- * Normalize model ID by reordering size tokens to match AA convention.
270
- * Converts "70b-instruct" "instruct-70b", "405b-chat" "chat-405b".
271
- * AA uses instruct-70b order while providers often use 70b-instruct.
272
- */
273
- function normalizeSizeTokenOrder(id: string): string {
274
- // Convert "70b-instruct" "instruct-70b", "405b-chat" → "chat-405b"
275
- const suffixes = new Set(["instruct", "chat"]);
276
- const parts = id.split("-");
277
- for (let i = 0; i < parts.length - 1; i++) {
278
- const lower = parts[i].toLowerCase();
279
- if (lower.endsWith("b") && suffixes.has(parts[i + 1].toLowerCase())) {
280
- // Validate the part before 'b' is a number
281
- const num = lower.slice(0, -1);
282
- if (num.length > 0 && !Number.isNaN(Number.parseFloat(num))) {
283
- [parts[i], parts[i + 1]] = [parts[i + 1], parts[i]];
284
- break;
285
- }
286
- }
287
- }
288
- return parts.join("-");
289
- }
290
-
291
- /**
292
- * Extract the base model ID from a provider model ID.
293
- * Strips ALL provider prefixes ("openai/", "@cf/meta/", "@cf/qwen/"), :free suffix, date suffixes, and version suffixes.
294
- */
295
- function extractBaseModelId(modelId: string): string {
296
- return modelId
297
- .toLowerCase()
298
- .replace(/^.*\//, "") // Strip ALL path prefixes - keep only last segment
299
- .replace(/:free$/, "") // Strip :free suffix
300
- .replace(/-\d{8}$/, "") // Strip date suffixes like -20250514
301
- .replace(/-v\d+(\.\d+)?$/, "") // Strip version suffixes like -v1.1
302
- .replace(/-\d{3,}$/, "") // Strip numeric suffixes like -001, -2603
303
- .replace(/-it$/, "") // Strip -it suffix (Gemma convention for "instruct")
304
- .replace(/-fp\d+$/, "") // Strip -fp8, -fp16 suffixes
305
- .replace(/-bf\d+$/, "") // Strip -bf16 suffixes
306
- .trim();
307
- }
308
-
309
- /**
310
- * Find the best benchmark variant by prefix matching.
311
- * Given a base model ID, finds all benchmark keys that are variants of it
312
- * (same base model with effort/reasoning/date qualifiers) and returns the
313
- * variant with the highest codingIndex.
314
- */
315
- function findBestVariantByPrefix(
316
- baseId: string,
317
- provider?: string,
318
- originalId?: string,
319
- ): HardcodedBenchmark | null {
320
- const prefixKey = baseId + "-";
321
- const candidates: { key: string; data: HardcodedBenchmark }[] = [];
322
-
323
- for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
324
- string,
325
- HardcodedBenchmark,
326
- ][]) {
327
- // Exact match
328
- if (key === baseId) {
329
- if (data.codingIndex !== undefined) {
330
- logDebug({
331
- provider,
332
- modelId: originalId || baseId,
333
- modelName: "",
334
- action: "match",
335
- strategy: "exact-prefix-match",
336
- matchKey: key,
337
- codingIndex: data.codingIndex,
338
- });
339
- return data;
340
- }
341
- continue;
342
- }
343
-
344
- // Prefix match: key starts with baseId + "-"
345
- if (key.startsWith(prefixKey)) {
346
- // Check that the first segment after the prefix is a qualifier
347
- // (prevents gpt-4o → gpt-4o-mini cross-model matches)
348
- const remainder = key.slice(prefixKey.length);
349
- const firstSegment = remainder.split("-")[0]!;
350
- if (isVariantQualifier(firstSegment)) {
351
- candidates.push({ key, data });
352
- }
353
- }
354
- }
355
-
356
- if (candidates.length === 0) return null;
357
-
358
- // Pick the candidate with the highest codingIndex
359
- // If tied or no CI, use normalizedScore as tiebreaker
360
- candidates.sort((a, b) => {
361
- const ciA = a.data.codingIndex ?? -1;
362
- const ciB = b.data.codingIndex ?? -1;
363
- if (ciB !== ciA) return ciB - ciA;
364
- return (b.data.normalizedScore ?? 0) - (a.data.normalizedScore ?? 0);
365
- });
366
-
367
- // Only return if the best candidate has a codingIndex
368
- if (candidates[0]!.data.codingIndex !== undefined) {
369
- logDebug({
370
- provider,
371
- modelId: originalId || baseId,
372
- modelName: "",
373
- action: "match",
374
- strategy: "variant-prefix-match",
375
- normalizedId: baseId,
376
- matchKey: candidates[0]!.key,
377
- codingIndex: candidates[0]!.data.codingIndex,
378
- details: `${candidates.length} candidates`,
379
- });
380
- return candidates[0]!.data;
381
- }
382
-
383
- return null;
384
- }
385
-
386
- // =============================================================================
387
- // Variant alias mappings
388
- // =============================================================================
389
-
390
- const MODEL_VARIANTS: Record<string, string[]> = {
391
- "gpt-4o-aug-24": ["gpt-4o", "gpt-4-o"],
392
- "gpt-4": ["gpt-4", "gpt4"],
393
- "claude-3.5-sonnet-oct-24": [
394
- "claude-3.5-sonnet",
395
- "claude-3-5-sonnet",
396
- "sonnet-3.5",
397
- ],
398
- "claude-3-opus": ["claude-3-opus", "opus-3"],
399
- "llama-3.1-instruct-405b": ["llama-3.1-405b", "llama3.1-405b", "llama-405b"],
400
- "llama-3.1-instruct-70b": ["llama-3.1-70b", "llama3.1-70b", "llama-70b"],
401
- "gemini-1.5-pro": ["gemini-1.5-pro", "gemini1.5-pro", "gemini-pro-1.5"],
402
- "qwen2.5-instruct-72b": ["qwen2.5-72b", "qwen-2.5-72b"],
403
- "deepseek-v3.2-non-reasoning": ["deepseek-v3", "deepseekv3", "deepseek-chat"],
404
- "mimo-v2-pro": ["mimo-v2-pro", "mimo-v2-pro-free", "mimo-pro"],
405
- "mimo-v2-omni": ["mimo-v2-omni", "mimo-v2-omni-free", "mimo-omni"],
406
- "mimo-v2-flash": ["mimo-v2-flash", "mimo-v2-flash-free", "mimo-flash"],
407
- "big-pickle": ["big-pickle", "bigpickle"],
408
- "minimax-m2.5": ["minimax-m2.5", "minimax-m2.5-free", "minimax-m25"],
409
- "nvidia-nemotron-3-super-120b-a12b-reasoning": [
410
- "nemotron-3-super",
411
- "nemotron-3-super-free",
412
- "nemotron-super",
413
- "nemotron-3",
414
- ],
415
- };
416
-
417
- // =============================================================================
418
- // Strategy steps
419
- // =============================================================================
420
-
421
- function tryDirectSubstringMatch(
422
- search: string,
423
- provider: string | undefined,
424
- modelId: string,
425
- modelName: string,
426
- ): HardcodedBenchmark | null {
427
- for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
428
- string,
429
- HardcodedBenchmark,
430
- ][]) {
431
- if (search.includes(key.toLowerCase())) {
432
- logDebug({
433
- provider,
434
- modelId,
435
- modelName,
436
- action: "match",
437
- strategy: "direct-substring",
438
- matchKey: key,
439
- codingIndex: data.codingIndex,
440
- });
441
- return data;
442
- }
443
- }
444
- return null;
445
- }
446
-
447
- function tryVariantAliasMatch(
448
- search: string,
449
- provider: string | undefined,
450
- modelId: string,
451
- modelName: string,
452
- ): HardcodedBenchmark | null {
453
- for (const [canonical, names] of Object.entries(MODEL_VARIANTS)) {
454
- if (names.some((n) => search.includes(n.toLowerCase()))) {
455
- const data = HARDCODED_BENCHMARKS[canonical];
456
- if (data) {
457
- logDebug({
458
- provider,
459
- modelId,
460
- modelName,
461
- action: "match",
462
- strategy: "variant-alias",
463
- matchKey: canonical,
464
- codingIndex: data.codingIndex,
465
- });
466
- return data;
467
- }
468
- }
469
- }
470
- return null;
471
- }
472
-
473
- function tryProviderNormalizedMatch(
474
- modelId: string,
475
- provider: string | undefined,
476
- modelName: string,
477
- ): { result: HardcodedBenchmark | null; normalized: string } {
478
- const { normalized, strategy } = applyProviderNormalization(
479
- modelId,
480
- provider,
481
- );
482
-
483
- if (normalized === modelId.toLowerCase()) {
484
- return { result: null, normalized };
485
- }
486
-
487
- logDebug({
488
- provider,
489
- modelId,
490
- modelName,
491
- action: "normalized",
492
- strategy,
493
- normalizedId: normalized,
494
- });
495
-
496
- for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
497
- string,
498
- HardcodedBenchmark,
499
- ][]) {
500
- if (normalized.includes(key.toLowerCase())) {
501
- logDebug({
502
- provider,
503
- modelId,
504
- modelName,
505
- action: "match",
506
- strategy: `provider-normalized:${strategy}`,
507
- matchKey: key,
508
- codingIndex: data.codingIndex,
509
- });
510
- return { result: data, normalized };
511
- }
512
- }
513
-
514
- return { result: null, normalized };
515
- }
516
-
517
- function tryPrefixFallback(
518
- normalizedId: string,
519
- provider: string | undefined,
520
- modelId: string,
521
- modelName: string,
522
- ): HardcodedBenchmark | null {
523
- const baseId = extractBaseModelId(normalizedId);
524
- if (!baseId) return null;
525
-
526
- const best = findBestVariantByPrefix(baseId, provider, modelId);
527
- if (best) return best;
528
-
529
- // Try with word-order normalization
530
- // (e.g., llama-3.3-70b-instruct → llama-3.3-instruct-70b)
531
- const reordered = normalizeSizeTokenOrder(baseId);
532
- if (reordered === baseId) return null;
533
-
534
- logDebug({
535
- provider,
536
- modelId,
537
- modelName,
538
- action: "normalized",
539
- strategy: "size-token-reorder",
540
- normalizedId: reordered,
541
- });
542
-
543
- return findBestVariantByPrefix(reordered, provider, modelId);
544
- }
545
-
546
- // =============================================================================
547
- // Main lookup
548
- // =============================================================================
549
-
550
- export function findHardcodedBenchmark(
551
- modelName: string,
552
- modelId: string,
553
- provider?: string,
554
- ): HardcodedBenchmark | null {
555
- const search = `${modelName} ${modelId}`.toLowerCase();
556
-
557
- logDebug({ provider, modelId, modelName, action: "attempt" });
558
-
559
- // 1. Direct substring match
560
- const direct = tryDirectSubstringMatch(search, provider, modelId, modelName);
561
- if (direct) return direct;
562
-
563
- // 2. Variant alias matching
564
- const variant = tryVariantAliasMatch(search, provider, modelId, modelName);
565
- if (variant) return variant;
566
-
567
- // 3. Provider-specific normalization
568
- const { result: normalizedResult, normalized } = tryProviderNormalizedMatch(
569
- modelId,
570
- provider,
571
- modelName,
572
- );
573
- if (normalizedResult) return normalizedResult;
574
-
575
- // 4. Prefix fallback with base model extraction
576
- const prefix = tryPrefixFallback(normalized, provider, modelId, modelName);
577
- if (prefix) return prefix;
578
-
579
- // No match found
580
- logDebug({
581
- provider,
582
- modelId,
583
- modelName,
584
- action: "miss",
585
- strategy: "all-strategies-failed",
586
- normalizedId: normalized,
587
- details: `Final normalized: ${normalized}`,
588
- });
589
-
590
- return null;
591
- }
592
-
593
- /**
594
- * Get score from hardcoded data
595
- */
596
- export function getHardcodedScore(
597
- modelName: string,
598
- modelId: string,
599
- provider?: string,
600
- ): number | null {
601
- const benchmark = findHardcodedBenchmark(modelName, modelId, provider);
602
- return benchmark?.normalizedScore ?? null;
603
- }
604
-
605
- /**
606
- * Enhance model name with Coding Index score
607
- * Returns model name with CI score appended if available
608
- */
609
- export function enhanceModelNameWithCodingIndex(
610
- modelName: string,
611
- modelId: string,
612
- provider?: string,
613
- ): string {
614
- const benchmark = findHardcodedBenchmark(modelName, modelId, provider);
615
- if (benchmark?.codingIndex !== undefined) {
616
- return `${modelName} [CI: ${benchmark.codingIndex.toFixed(1)}]`;
617
- }
618
- return modelName;
619
- }
620
-
621
- // =============================================================================
622
- // Stats and Reporting
623
- // =============================================================================
624
-
625
- /**
626
- * Get statistics about model matching from the current session
627
- * Note: This reads the log file and computes stats
628
- */
629
- interface LogStats {
630
- totalAttempts: number;
631
- matches: number;
632
- misses: number;
633
- byProvider: Record<
634
- string,
635
- { attempts: number; matches: number; misses: number }
636
- >;
637
- }
638
-
639
- function parseLogLine(stats: LogStats, line: string): void {
640
- if (!line.trim()) return;
641
- const parts = line.split("|");
642
- if (parts.length < 5) return;
643
-
644
- const provider = parts[1] || "unknown";
645
- const action = parts[4];
646
-
647
- if (!stats.byProvider[provider]) {
648
- stats.byProvider[provider] = { attempts: 0, matches: 0, misses: 0 };
649
- }
650
-
651
- if (action === "attempt") {
652
- stats.totalAttempts++;
653
- stats.byProvider[provider].attempts++;
654
- } else if (action === "match") {
655
- stats.matches++;
656
- stats.byProvider[provider].matches++;
657
- } else if (action === "miss") {
658
- stats.misses++;
659
- stats.byProvider[provider].misses++;
660
- }
661
- }
662
-
663
- function computeMatchRate(stats: LogStats): number {
664
- const total = stats.matches + stats.misses;
665
- return total > 0 ? Math.round((stats.matches / total) * 100) : 0;
666
- }
667
-
668
- export function getMatchingStats(): {
669
- totalAttempts: number;
670
- matches: number;
671
- misses: number;
672
- matchRate: number;
673
- byProvider: Record<
674
- string,
675
- { attempts: number; matches: number; misses: number }
676
- >;
677
- } {
678
- const stats: LogStats = {
679
- totalAttempts: 0,
680
- matches: 0,
681
- misses: 0,
682
- byProvider: {},
683
- };
684
-
685
- try {
686
- if (!existsSync(LOG_FILE)) {
687
- return { ...stats, matchRate: 0 };
688
- }
689
-
690
- const content = readFileSync(LOG_FILE, "utf-8");
691
- for (const line of content.split("\n").slice(1)) {
692
- parseLogLine(stats, line);
693
- }
694
- } catch {
695
- // Return empty stats on error
696
- }
697
-
698
- return { ...stats, matchRate: computeMatchRate(stats) };
699
- }
700
-
701
- // Need to import readFileSync for stats
702
- import { readFileSync } from "node:fs";
1
+ /**
2
+ * Benchmark lookup logic — extracted from hardcoded-benchmarks.ts
3
+ * for maintainability (the data file is ~10k lines of JSON-like entries).
4
+ *
5
+ * This module re-exports everything consumers currently import from
6
+ * hardcoded-benchmarks, so you can switch imports to this file without
7
+ * breaking anything.
8
+ *
9
+ * ENHANCED: Added debug logging and provider-specific normalizers
10
+ */
11
+
12
+ import { appendFileSync, existsSync, mkdirSync, writeFileSync } from "node:fs";
13
+ import { homedir } from "node:os";
14
+ import { join } from "node:path";
15
+ import {
16
+ HARDCODED_BENCHMARKS,
17
+ type HardcodedBenchmark,
18
+ } from "./hardcoded-benchmarks.ts";
19
+
20
+ // Re-export the type and data so callers can migrate imports here
21
+ export { HARDCODED_BENCHMARKS, type HardcodedBenchmark };
22
+
23
+ // =============================================================================
24
+ // Debug Logging
25
+ // =============================================================================
26
+
27
+ const LOG_DIR = join(homedir(), ".pi");
28
+ const LOG_FILE = join(LOG_DIR, "modelmatch.log");
29
+ let debugEnabled = true;
30
+
31
+ /**
32
+ * Enable/disable debug logging
33
+ */
34
+ export function setDebugLogging(enabled: boolean): void {
35
+ debugEnabled = enabled;
36
+ }
37
+
38
+ /**
39
+ * Log a message to the modelmatch.log file
40
+ */
41
+ function logDebug(entry: {
42
+ provider?: string;
43
+ modelId: string;
44
+ modelName: string;
45
+ action: "attempt" | "match" | "miss" | "normalized";
46
+ strategy?: string;
47
+ normalizedId?: string;
48
+ matchKey?: string;
49
+ codingIndex?: number;
50
+ details?: string;
51
+ }): void {
52
+ if (!debugEnabled) return;
53
+
54
+ try {
55
+ // Ensure log directory exists
56
+ if (!existsSync(LOG_DIR)) {
57
+ mkdirSync(LOG_DIR, { recursive: true });
58
+ }
59
+
60
+ // Initialize log file with header if it doesn't exist
61
+ if (!existsSync(LOG_FILE)) {
62
+ writeFileSync(
63
+ LOG_FILE,
64
+ "timestamp|provider|modelId|modelName|action|strategy|normalizedId|matchKey|codingIndex|details\n",
65
+ );
66
+ }
67
+
68
+ const timestamp = new Date().toISOString();
69
+ const line = [
70
+ timestamp,
71
+ entry.provider || "unknown",
72
+ entry.modelId,
73
+ entry.modelName,
74
+ entry.action,
75
+ entry.strategy || "",
76
+ entry.normalizedId || "",
77
+ entry.matchKey || "",
78
+ entry.codingIndex !== undefined ? entry.codingIndex.toFixed(1) : "",
79
+ entry.details || "",
80
+ ]
81
+ .map((f) => f.replaceAll(/[\\|]/g, "\\$&")) // Escape backslashes and pipes
82
+ .join("|");
83
+
84
+ appendFileSync(LOG_FILE, `${line}\n`);
85
+ } catch {
86
+ // Silently fail - don't break functionality for logging issues
87
+ }
88
+ }
89
+
90
+ /**
91
+ * Get the path to the log file for user reference
92
+ */
93
+ export function getMatchLogPath(): string {
94
+ return LOG_FILE;
95
+ }
96
+
97
+ /**
98
+ * Clear the match log
99
+ */
100
+ export function clearMatchLog(): void {
101
+ try {
102
+ if (existsSync(LOG_FILE)) {
103
+ writeFileSync(
104
+ LOG_FILE,
105
+ "timestamp|provider|modelId|modelName|action|strategy|normalizedId|matchKey|codingIndex|details\n",
106
+ );
107
+ }
108
+ } catch {
109
+ // Ignore errors
110
+ }
111
+ }
112
+
113
+ // =============================================================================
114
+ // Provider-Specific Normalizers
115
+ // =============================================================================
116
+
117
+ /**
118
+ * Apply provider-specific ID normalization to handle naming conventions
119
+ */
120
+ /** Normalize NVIDIA model IDs by stripping org prefixes like meta/, mistralai/ */
121
+ function normalizeNvidia(ctx: {
122
+ normalized: string;
123
+ strategies: string[];
124
+ }): void {
125
+ const prefixMatch = ctx.normalized.match(
126
+ /^(meta|mistralai|microsoft|qwen|nvidia|ibm|google|ai21labs|bigcode|databricks|deepseek-ai|01-ai|adept|aisingapore|baai|bytedance|luma|stabilityai|fireworks|upstage|voyage|snowflake|recursal|kdan|unity|cloudflare|fblgit|nttdata|dito|nousresearch|espressomodels|ftmsh|huggingface|isolationai|pinglab|functionnetwork|huggingfaceh4|mcw|shutterstock)[^/]*\//,
127
+ );
128
+ if (prefixMatch) {
129
+ ctx.normalized = ctx.normalized.replaceAll(/^[^/]+\//g, "");
130
+ ctx.strategies.push("strip-nvidia-prefix");
131
+ }
132
+ }
133
+
134
+ /** Normalize Cloudflare model IDs by stripping @cf/namespace/ prefix */
135
+ function normalizeCloudflare(ctx: {
136
+ normalized: string;
137
+ strategies: string[];
138
+ }): void {
139
+ if (ctx.normalized.startsWith("@cf/")) {
140
+ ctx.normalized = ctx.normalized.replaceAll(/^@cf\/[^/]+\//g, "");
141
+ ctx.strategies.push("strip-cf-namespace");
142
+ }
143
+ }
144
+
145
+ /** Strip OpenRouter's :free suffix from model IDs */
146
+ function normalizeFreeSuffix(ctx: {
147
+ normalized: string;
148
+ strategies: string[];
149
+ }): void {
150
+ if (ctx.normalized.includes(":free")) {
151
+ ctx.normalized = ctx.normalized.replaceAll(/:free$/g, "");
152
+ ctx.strategies.push("strip-free-suffix");
153
+ }
154
+ }
155
+
156
+ /** Handle Ollama model:tag format by replacing colons with dashes */
157
+ function normalizeOllama(ctx: {
158
+ normalized: string;
159
+ strategies: string[];
160
+ }): void {
161
+ if (ctx.normalized.includes(":")) {
162
+ ctx.normalized = ctx.normalized.replaceAll(/:/g, "-");
163
+ ctx.strategies.push("ollama-colon-to-dash");
164
+ }
165
+ }
166
+
167
+ /** Strip Groq-specific numeric suffixes (-32768) and -versatile */
168
+ function normalizeGroq(ctx: {
169
+ normalized: string;
170
+ strategies: string[];
171
+ }): void {
172
+ if (/-\d+$/.test(ctx.normalized)) {
173
+ ctx.normalized = ctx.normalized.replaceAll(/-\d+$/g, "");
174
+ ctx.strategies.push("strip-groq-numeric-suffix");
175
+ }
176
+ if (ctx.normalized.includes("-versatile")) {
177
+ ctx.normalized = ctx.normalized.replaceAll(/-versatile$/g, "");
178
+ ctx.strategies.push("strip-groq-versatile");
179
+ }
180
+ }
181
+
182
+ /** Normalize Cerebras llama format (llama3.1-8b -> llama-3.1-8b) and add -instruct */
183
+ function normalizeCerebras(ctx: {
184
+ normalized: string;
185
+ strategies: string[];
186
+ }): void {
187
+ if (/^llama\d/.test(ctx.normalized)) {
188
+ ctx.normalized = ctx.normalized.replaceAll(/^llama(\d)/g, "llama-$1");
189
+ ctx.strategies.push("cerebras-llama-dash");
190
+ }
191
+ if (
192
+ /^llama-[\d.]+-\d+b$/.test(ctx.normalized) &&
193
+ !ctx.normalized.includes("instruct")
194
+ ) {
195
+ ctx.normalized = ctx.normalized.replaceAll(
196
+ /^(llama-[\d.]+-\d+b)/g,
197
+ "$1-instruct",
198
+ );
199
+ ctx.strategies.push("add-instruct-suffix");
200
+ }
201
+ }
202
+
203
+ /** Strip Mistral's -latest suffix */
204
+ function normalizeMistral(ctx: {
205
+ normalized: string;
206
+ strategies: string[];
207
+ }): void {
208
+ if (ctx.normalized.includes("-latest")) {
209
+ ctx.normalized = ctx.normalized.replaceAll(/-latest$/g, "");
210
+ ctx.strategies.push("strip-mistral-latest");
211
+ }
212
+ }
213
+
214
+ /** Strip generic suffixes (dates, versions, preview, fp*) that aren't in benchmarks */
215
+ function stripCommonSuffixes(ctx: {
216
+ normalized: string;
217
+ strategies: string[];
218
+ }): void {
219
+ const suffixesToStrip = [
220
+ /-\d{8}$/g, // Date suffixes like -20250514
221
+ /-v\d+(\.\d+)?$/g, // Version suffixes like -v1.1
222
+ /-\d{3,}$/g, // Numeric suffixes like -001, -2603
223
+ /-it$/g, // -it (Gemma convention)
224
+ /-fp\d+$/g, // -fp8, -fp16
225
+ /-bf\d+$/g, // -bf16
226
+ /-preview$/g, // -preview
227
+ /-exp$/g, // -exp (experimental)
228
+ /-instruct-0\.\d+$/g, // HuggingFace revision tags
229
+ ];
230
+ for (const pattern of suffixesToStrip) {
231
+ if (pattern.test(ctx.normalized)) {
232
+ ctx.normalized = ctx.normalized.replaceAll(pattern, "");
233
+ ctx.strategies.push(
234
+ `strip-${pattern.source.replace(/[\\^$.*+?()[\]{}|]/g, "").slice(0, 10)}`,
235
+ );
236
+ }
237
+ }
238
+ }
239
+
240
+ function applyProviderNormalization(
241
+ modelId: string,
242
+ provider?: string,
243
+ ): { normalized: string; strategy: string } {
244
+ const ctx: { normalized: string; strategies: string[] } = {
245
+ normalized: modelId.toLowerCase(),
246
+ strategies: [],
247
+ };
248
+
249
+ if (provider === "nvidia") normalizeNvidia(ctx);
250
+ if (provider === "cloudflare") normalizeCloudflare(ctx);
251
+ normalizeFreeSuffix(ctx);
252
+ if (provider === "ollama") normalizeOllama(ctx);
253
+ if (provider === "groq") normalizeGroq(ctx);
254
+ if (provider === "cerebras") normalizeCerebras(ctx);
255
+ if (provider === "mistral") normalizeMistral(ctx);
256
+ stripCommonSuffixes(ctx);
257
+
258
+ return {
259
+ normalized: ctx.normalized,
260
+ strategy: ctx.strategies.join(","),
261
+ };
262
+ }
263
+
264
+ // =============================================================================
265
+ // Prefix fallback helpers
266
+ // =============================================================================
267
+
268
+ /**
269
+ * Segments that indicate a variant of the same base model
270
+ * (effort level, reasoning mode, date, preview) — NOT a fundamentally different model.
271
+ * Used to filter prefix matches so we don't cross model boundaries
272
+ * (e.g. gpt-4o → gpt-4o-mini is wrong, but gpt-4o → gpt-4o-aug-24 is fine).
273
+ */
274
+ const VARIANT_QUALIFIER_SEGMENTS = new Set([
275
+ "reasoning",
276
+ "non-reasoning",
277
+ "high",
278
+ "low",
279
+ "medium",
280
+ "xhigh",
281
+ "preview",
282
+ "adaptive",
283
+ "fast",
284
+ ]);
285
+
286
+ /**
287
+ * Check if a segment is a variant qualifier rather than a different model identifier.
288
+ * Accepts effort levels, reasoning modes, date codes, size specifiers, and version numbers.
289
+ */
290
+ function isVariantQualifier(segment: string): boolean {
291
+ if (VARIANT_QUALIFIER_SEGMENTS.has(segment)) return true;
292
+ // Date codes like "0528", "20250514"
293
+ if (/^\d{4,8}$/.test(segment)) return true;
294
+ // Month names (from date suffixes like "may-25", "mar-24")
295
+ if (/^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)$/.test(segment))
296
+ return true;
297
+ // Size specifiers like "70b", "8b", "a35b", "a3b" (MoE notation)
298
+ if (/^a?\d+(\.\d+)?b$/i.test(segment)) return true;
299
+ // Version numbers like "v3.2", "v2.5", "v1"
300
+ if (/^v\d+(\.\d+)?$/.test(segment)) return true;
301
+ // Two-digit year like "25", "24"
302
+ if (/^\d{2}$/.test(segment)) return true;
303
+ // Special variant suffixes
304
+ if (segment === "speciale" || segment === "chatgpt" || segment === "latest")
305
+ return true;
306
+ return false;
307
+ }
308
+
309
+ /**
310
+ * Normalize model ID by reordering size tokens to match AA convention.
311
+ * Converts "70b-instruct" "instruct-70b", "405b-chat" "chat-405b".
312
+ * AA uses instruct-70b order while providers often use 70b-instruct.
313
+ */
314
+ function normalizeSizeTokenOrder(id: string): string {
315
+ // Convert "70b-instruct" → "instruct-70b", "405b-chat" → "chat-405b"
316
+ const suffixes = new Set(["instruct", "chat"]);
317
+ const parts = id.split("-");
318
+ for (let i = 0; i < parts.length - 1; i++) {
319
+ const lower = parts[i].toLowerCase();
320
+ if (lower.endsWith("b") && suffixes.has(parts[i + 1].toLowerCase())) {
321
+ // Validate the part before 'b' is a number
322
+ const num = lower.slice(0, -1);
323
+ if (num.length > 0 && !Number.isNaN(Number.parseFloat(num))) {
324
+ [parts[i], parts[i + 1]] = [parts[i + 1], parts[i]];
325
+ break;
326
+ }
327
+ }
328
+ }
329
+ return parts.join("-");
330
+ }
331
+
332
+ /**
333
+ * Extract the base model ID from a provider model ID.
334
+ * Strips ALL provider prefixes ("openai/", "@cf/meta/", "@cf/qwen/"), :free suffix, date suffixes, and version suffixes.
335
+ */
336
+ function extractBaseModelId(modelId: string): string {
337
+ return modelId
338
+ .toLowerCase()
339
+ .replaceAll(/^.*\//g, "") // Strip ALL path prefixes - keep only last segment
340
+ .replaceAll(/:free$/g, "") // Strip :free suffix
341
+ .replaceAll(/-\d{8}$/g, "") // Strip date suffixes like -20250514
342
+ .replaceAll(/-v\d+(\.\d+)?$/g, "") // Strip version suffixes like -v1.1
343
+ .replaceAll(/-\d{3,}$/g, "") // Strip numeric suffixes like -001, -2603
344
+ .replaceAll(/-it$/g, "") // Strip -it suffix (Gemma convention for "instruct")
345
+ .replaceAll(/-fp\d+$/g, "") // Strip -fp8, -fp16 suffixes
346
+ .replaceAll(/-bf\d+$/g, "") // Strip -bf16 suffixes
347
+ .trim();
348
+ }
349
+
350
+ /**
351
+ * Find the best benchmark variant by prefix matching.
352
+ * Given a base model ID, finds all benchmark keys that are variants of it
353
+ * (same base model with effort/reasoning/date qualifiers) and returns the
354
+ * variant with the highest codingIndex.
355
+ */
356
+ function findBestVariantByPrefix(
357
+ baseId: string,
358
+ provider?: string,
359
+ originalId?: string,
360
+ ): HardcodedBenchmark | null {
361
+ const prefixKey = baseId + "-";
362
+ const candidates: { key: string; data: HardcodedBenchmark }[] = [];
363
+
364
+ for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
365
+ string,
366
+ HardcodedBenchmark,
367
+ ][]) {
368
+ // Exact match
369
+ if (key === baseId) {
370
+ if (data.codingIndex !== undefined) {
371
+ logDebug({
372
+ provider,
373
+ modelId: originalId || baseId,
374
+ modelName: "",
375
+ action: "match",
376
+ strategy: "exact-prefix-match",
377
+ matchKey: key,
378
+ codingIndex: data.codingIndex,
379
+ });
380
+ return data;
381
+ }
382
+ continue;
383
+ }
384
+
385
+ // Prefix match: key starts with baseId + "-"
386
+ if (key.startsWith(prefixKey)) {
387
+ // Check that the first segment after the prefix is a qualifier
388
+ // (prevents gpt-4o → gpt-4o-mini cross-model matches)
389
+ const remainder = key.slice(prefixKey.length);
390
+ const firstSegment = remainder.split("-")[0]!;
391
+ if (isVariantQualifier(firstSegment)) {
392
+ candidates.push({ key, data });
393
+ }
394
+ }
395
+ }
396
+
397
+ if (candidates.length === 0) return null;
398
+
399
+ // Pick the candidate with the highest codingIndex
400
+ // If tied or no CI, use normalizedScore as tiebreaker
401
+ candidates.sort((a, b) => {
402
+ const ciA = a.data.codingIndex ?? -1;
403
+ const ciB = b.data.codingIndex ?? -1;
404
+ if (ciB !== ciA) return ciB - ciA;
405
+ return (b.data.normalizedScore ?? 0) - (a.data.normalizedScore ?? 0);
406
+ });
407
+
408
+ // Only return if the best candidate has a codingIndex
409
+ if (candidates[0]!.data.codingIndex !== undefined) {
410
+ logDebug({
411
+ provider,
412
+ modelId: originalId || baseId,
413
+ modelName: "",
414
+ action: "match",
415
+ strategy: "variant-prefix-match",
416
+ normalizedId: baseId,
417
+ matchKey: candidates[0]!.key,
418
+ codingIndex: candidates[0]!.data.codingIndex,
419
+ details: `${candidates.length} candidates`,
420
+ });
421
+ return candidates[0]!.data;
422
+ }
423
+
424
+ return null;
425
+ }
426
+
427
+ // =============================================================================
428
+ // Variant alias mappings
429
+ // =============================================================================
430
+
431
+ const MODEL_VARIANTS: Record<string, string[]> = {
432
+ "gpt-4o-aug-24": ["gpt-4o", "gpt-4-o"],
433
+ "gpt-4": ["gpt-4", "gpt4"],
434
+ "claude-3.5-sonnet-oct-24": [
435
+ "claude-3.5-sonnet",
436
+ "claude-3-5-sonnet",
437
+ "sonnet-3.5",
438
+ ],
439
+ "claude-3-opus": ["claude-3-opus", "opus-3"],
440
+ "llama-3.1-instruct-405b": ["llama-3.1-405b", "llama3.1-405b", "llama-405b"],
441
+ "llama-3.1-instruct-70b": ["llama-3.1-70b", "llama3.1-70b", "llama-70b"],
442
+ "gemini-1.5-pro": ["gemini-1.5-pro", "gemini1.5-pro", "gemini-pro-1.5"],
443
+ "qwen2.5-instruct-72b": ["qwen2.5-72b", "qwen-2.5-72b"],
444
+ "deepseek-v3.2-non-reasoning": ["deepseek-v3", "deepseekv3", "deepseek-chat"],
445
+ "mimo-v2-pro": ["mimo-v2-pro", "mimo-v2-pro-free", "mimo-pro"],
446
+ "mimo-v2-omni": ["mimo-v2-omni", "mimo-v2-omni-free", "mimo-omni"],
447
+ "mimo-v2-flash": ["mimo-v2-flash", "mimo-v2-flash-free", "mimo-flash"],
448
+ "big-pickle": ["big-pickle", "bigpickle"],
449
+ "minimax-m2.5": ["minimax-m2.5", "minimax-m2.5-free", "minimax-m25"],
450
+ "nvidia-nemotron-3-super-120b-a12b-reasoning": [
451
+ "nemotron-3-super",
452
+ "nemotron-3-super-free",
453
+ "nemotron-super",
454
+ "nemotron-3",
455
+ ],
456
+ };
457
+
458
+ // =============================================================================
459
+ // Strategy steps
460
+ // =============================================================================
461
+
462
+ function tryDirectSubstringMatch(
463
+ search: string,
464
+ provider: string | undefined,
465
+ modelId: string,
466
+ modelName: string,
467
+ ): HardcodedBenchmark | null {
468
+ for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
469
+ string,
470
+ HardcodedBenchmark,
471
+ ][]) {
472
+ if (search.includes(key.toLowerCase())) {
473
+ logDebug({
474
+ provider,
475
+ modelId,
476
+ modelName,
477
+ action: "match",
478
+ strategy: "direct-substring",
479
+ matchKey: key,
480
+ codingIndex: data.codingIndex,
481
+ });
482
+ return data;
483
+ }
484
+ }
485
+ return null;
486
+ }
487
+
488
+ function tryVariantAliasMatch(
489
+ search: string,
490
+ provider: string | undefined,
491
+ modelId: string,
492
+ modelName: string,
493
+ ): HardcodedBenchmark | null {
494
+ for (const [canonical, names] of Object.entries(MODEL_VARIANTS)) {
495
+ if (names.some((n) => search.includes(n.toLowerCase()))) {
496
+ const data = HARDCODED_BENCHMARKS[canonical];
497
+ if (data) {
498
+ logDebug({
499
+ provider,
500
+ modelId,
501
+ modelName,
502
+ action: "match",
503
+ strategy: "variant-alias",
504
+ matchKey: canonical,
505
+ codingIndex: data.codingIndex,
506
+ });
507
+ return data;
508
+ }
509
+ }
510
+ }
511
+ return null;
512
+ }
513
+
514
+ function tryProviderNormalizedMatch(
515
+ modelId: string,
516
+ provider: string | undefined,
517
+ modelName: string,
518
+ ): { result: HardcodedBenchmark | null; normalized: string } {
519
+ const { normalized, strategy } = applyProviderNormalization(
520
+ modelId,
521
+ provider,
522
+ );
523
+
524
+ if (normalized === modelId.toLowerCase()) {
525
+ return { result: null, normalized };
526
+ }
527
+
528
+ logDebug({
529
+ provider,
530
+ modelId,
531
+ modelName,
532
+ action: "normalized",
533
+ strategy,
534
+ normalizedId: normalized,
535
+ });
536
+
537
+ for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
538
+ string,
539
+ HardcodedBenchmark,
540
+ ][]) {
541
+ if (normalized.includes(key.toLowerCase())) {
542
+ logDebug({
543
+ provider,
544
+ modelId,
545
+ modelName,
546
+ action: "match",
547
+ strategy: `provider-normalized:${strategy}`,
548
+ matchKey: key,
549
+ codingIndex: data.codingIndex,
550
+ });
551
+ return { result: data, normalized };
552
+ }
553
+ }
554
+
555
+ return { result: null, normalized };
556
+ }
557
+
558
+ function tryPrefixFallback(
559
+ normalizedId: string,
560
+ provider: string | undefined,
561
+ modelId: string,
562
+ modelName: string,
563
+ ): HardcodedBenchmark | null {
564
+ const baseId = extractBaseModelId(normalizedId);
565
+ if (!baseId) return null;
566
+
567
+ const best = findBestVariantByPrefix(baseId, provider, modelId);
568
+ if (best) return best;
569
+
570
+ // Try with word-order normalization
571
+ // (e.g., llama-3.3-70b-instruct → llama-3.3-instruct-70b)
572
+ const reordered = normalizeSizeTokenOrder(baseId);
573
+ if (reordered === baseId) return null;
574
+
575
+ logDebug({
576
+ provider,
577
+ modelId,
578
+ modelName,
579
+ action: "normalized",
580
+ strategy: "size-token-reorder",
581
+ normalizedId: reordered,
582
+ });
583
+
584
+ return findBestVariantByPrefix(reordered, provider, modelId);
585
+ }
586
+
587
+ // =============================================================================
588
+ // Main lookup
589
+ // =============================================================================
590
+
591
+ export function findHardcodedBenchmark(
592
+ modelName: string,
593
+ modelId: string,
594
+ provider?: string,
595
+ ): HardcodedBenchmark | null {
596
+ const search = `${modelName} ${modelId}`.toLowerCase();
597
+
598
+ logDebug({ provider, modelId, modelName, action: "attempt" });
599
+
600
+ // 1. Direct substring match
601
+ const direct = tryDirectSubstringMatch(search, provider, modelId, modelName);
602
+ if (direct) return direct;
603
+
604
+ // 2. Variant alias matching
605
+ const variant = tryVariantAliasMatch(search, provider, modelId, modelName);
606
+ if (variant) return variant;
607
+
608
+ // 3. Provider-specific normalization
609
+ const { result: normalizedResult, normalized } = tryProviderNormalizedMatch(
610
+ modelId,
611
+ provider,
612
+ modelName,
613
+ );
614
+ if (normalizedResult) return normalizedResult;
615
+
616
+ // 4. Prefix fallback with base model extraction
617
+ const prefix = tryPrefixFallback(normalized, provider, modelId, modelName);
618
+ if (prefix) return prefix;
619
+
620
+ // No match found
621
+ logDebug({
622
+ provider,
623
+ modelId,
624
+ modelName,
625
+ action: "miss",
626
+ strategy: "all-strategies-failed",
627
+ normalizedId: normalized,
628
+ details: `Final normalized: ${normalized}`,
629
+ });
630
+
631
+ return null;
632
+ }
633
+
634
+ /**
635
+ * Get score from hardcoded data
636
+ */
637
+ export function getHardcodedScore(
638
+ modelName: string,
639
+ modelId: string,
640
+ provider?: string,
641
+ ): number | null {
642
+ const benchmark = findHardcodedBenchmark(modelName, modelId, provider);
643
+ return benchmark?.normalizedScore ?? null;
644
+ }
645
+
646
+ /**
647
+ * Enhance model name with Coding Index score
648
+ * Returns model name with CI score appended if available
649
+ */
650
+ export function enhanceModelNameWithCodingIndex(
651
+ modelName: string,
652
+ modelId: string,
653
+ provider?: string,
654
+ ): string {
655
+ const benchmark = findHardcodedBenchmark(modelName, modelId, provider);
656
+ if (benchmark?.codingIndex !== undefined) {
657
+ return `${modelName} [CI: ${benchmark.codingIndex.toFixed(1)}]`;
658
+ }
659
+ return modelName;
660
+ }
661
+
662
+ // =============================================================================
663
+ // Stats and Reporting
664
+ // =============================================================================
665
+
666
+ /**
667
+ * Get statistics about model matching from the current session
668
+ * Note: This reads the log file and computes stats
669
+ */
670
+ interface LogStats {
671
+ totalAttempts: number;
672
+ matches: number;
673
+ misses: number;
674
+ byProvider: Record<
675
+ string,
676
+ { attempts: number; matches: number; misses: number }
677
+ >;
678
+ }
679
+
680
+ function parseLogLine(stats: LogStats, line: string): void {
681
+ if (!line.trim()) return;
682
+ const parts = line.split("|");
683
+ if (parts.length < 5) return;
684
+
685
+ const provider = parts[1] || "unknown";
686
+ const action = parts[4];
687
+
688
+ if (!stats.byProvider[provider]) {
689
+ stats.byProvider[provider] = { attempts: 0, matches: 0, misses: 0 };
690
+ }
691
+
692
+ if (action === "attempt") {
693
+ stats.totalAttempts++;
694
+ stats.byProvider[provider].attempts++;
695
+ } else if (action === "match") {
696
+ stats.matches++;
697
+ stats.byProvider[provider].matches++;
698
+ } else if (action === "miss") {
699
+ stats.misses++;
700
+ stats.byProvider[provider].misses++;
701
+ }
702
+ }
703
+
704
+ function computeMatchRate(stats: LogStats): number {
705
+ const total = stats.matches + stats.misses;
706
+ return total > 0 ? Math.round((stats.matches / total) * 100) : 0;
707
+ }
708
+
709
+ export function getMatchingStats(): {
710
+ totalAttempts: number;
711
+ matches: number;
712
+ misses: number;
713
+ matchRate: number;
714
+ byProvider: Record<
715
+ string,
716
+ { attempts: number; matches: number; misses: number }
717
+ >;
718
+ } {
719
+ const stats: LogStats = {
720
+ totalAttempts: 0,
721
+ matches: 0,
722
+ misses: 0,
723
+ byProvider: {},
724
+ };
725
+
726
+ try {
727
+ if (!existsSync(LOG_FILE)) {
728
+ return { ...stats, matchRate: 0 };
729
+ }
730
+
731
+ const content = readFileSync(LOG_FILE, "utf-8");
732
+ for (const line of content.split("\n").slice(1)) {
733
+ parseLogLine(stats, line);
734
+ }
735
+ } catch {
736
+ // Return empty stats on error
737
+ }
738
+
739
+ return { ...stats, matchRate: computeMatchRate(stats) };
740
+ }
741
+
742
+ // Need to import readFileSync for stats
743
+ import { readFileSync } from "node:fs";