pi-free 2.0.4 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/banner.svg +132 -0
- package/index.ts +1 -1
- package/lib/model-detection.ts +176 -139
- package/lib/registry.ts +28 -21
- package/lib/util.ts +10 -4
- package/package.json +2 -1
- package/provider-failover/benchmark-lookup.ts +189 -138
- package/providers/cline/cline.ts +27 -10
- package/providers/dynamic-built-in/index.ts +3 -1
- package/providers/nvidia/nvidia.ts +48 -50
- package/providers/qwen/qwen.ts +47 -49
- package/scripts/check-extensions.mjs +8 -1
|
@@ -370,24 +370,46 @@ function findBestVariantByPrefix(
|
|
|
370
370
|
}
|
|
371
371
|
|
|
372
372
|
// =============================================================================
|
|
373
|
-
//
|
|
373
|
+
// Variant alias mappings
|
|
374
374
|
// =============================================================================
|
|
375
375
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
376
|
+
const MODEL_VARIANTS: Record<string, string[]> = {
|
|
377
|
+
"gpt-4o-aug-24": ["gpt-4o", "gpt-4-o"],
|
|
378
|
+
"gpt-4": ["gpt-4", "gpt4"],
|
|
379
|
+
"claude-3.5-sonnet-oct-24": [
|
|
380
|
+
"claude-3.5-sonnet",
|
|
381
|
+
"claude-3-5-sonnet",
|
|
382
|
+
"sonnet-3.5",
|
|
383
|
+
],
|
|
384
|
+
"claude-3-opus": ["claude-3-opus", "opus-3"],
|
|
385
|
+
"llama-3.1-instruct-405b": ["llama-3.1-405b", "llama3.1-405b", "llama-405b"],
|
|
386
|
+
"llama-3.1-instruct-70b": ["llama-3.1-70b", "llama3.1-70b", "llama-70b"],
|
|
387
|
+
"gemini-1.5-pro": ["gemini-1.5-pro", "gemini1.5-pro", "gemini-pro-1.5"],
|
|
388
|
+
"qwen2.5-instruct-72b": ["qwen2.5-72b", "qwen-2.5-72b"],
|
|
389
|
+
"deepseek-v3.2-non-reasoning": ["deepseek-v3", "deepseekv3", "deepseek-chat"],
|
|
390
|
+
"mimo-v2-pro": ["mimo-v2-pro", "mimo-v2-pro-free", "mimo-pro"],
|
|
391
|
+
"mimo-v2-omni": ["mimo-v2-omni", "mimo-v2-omni-free", "mimo-omni"],
|
|
392
|
+
"mimo-v2-flash": ["mimo-v2-flash", "mimo-v2-flash-free", "mimo-flash"],
|
|
393
|
+
"big-pickle": ["big-pickle", "bigpickle"],
|
|
394
|
+
"minimax-m2.5": ["minimax-m2.5", "minimax-m2.5-free", "minimax-m25"],
|
|
395
|
+
"nvidia-nemotron-3-super-120b-a12b-reasoning": [
|
|
396
|
+
"nemotron-3-super",
|
|
397
|
+
"nemotron-3-super-free",
|
|
398
|
+
"nemotron-super",
|
|
399
|
+
"nemotron-3",
|
|
400
|
+
],
|
|
401
|
+
};
|
|
382
402
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
modelName,
|
|
387
|
-
action: "attempt",
|
|
388
|
-
});
|
|
403
|
+
// =============================================================================
|
|
404
|
+
// Strategy steps
|
|
405
|
+
// =============================================================================
|
|
389
406
|
|
|
390
|
-
|
|
407
|
+
function tryDirectSubstringMatch(
|
|
408
|
+
search: string,
|
|
409
|
+
provider: string | undefined,
|
|
410
|
+
modelId: string,
|
|
411
|
+
modelName: string,
|
|
412
|
+
): HardcodedBenchmark | null {
|
|
391
413
|
for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
|
|
392
414
|
string,
|
|
393
415
|
HardcodedBenchmark,
|
|
@@ -405,44 +427,16 @@ export function findHardcodedBenchmark(
|
|
|
405
427
|
return data;
|
|
406
428
|
}
|
|
407
429
|
}
|
|
430
|
+
return null;
|
|
431
|
+
}
|
|
408
432
|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
"sonnet-3.5",
|
|
417
|
-
],
|
|
418
|
-
"claude-3-opus": ["claude-3-opus", "opus-3"],
|
|
419
|
-
"llama-3.1-instruct-405b": [
|
|
420
|
-
"llama-3.1-405b",
|
|
421
|
-
"llama3.1-405b",
|
|
422
|
-
"llama-405b",
|
|
423
|
-
],
|
|
424
|
-
"llama-3.1-instruct-70b": ["llama-3.1-70b", "llama3.1-70b", "llama-70b"],
|
|
425
|
-
"gemini-1.5-pro": ["gemini-1.5-pro", "gemini1.5-pro", "gemini-pro-1.5"],
|
|
426
|
-
"qwen2.5-instruct-72b": ["qwen2.5-72b", "qwen-2.5-72b"],
|
|
427
|
-
"deepseek-v3.2-non-reasoning": [
|
|
428
|
-
"deepseek-v3",
|
|
429
|
-
"deepseekv3",
|
|
430
|
-
"deepseek-chat",
|
|
431
|
-
],
|
|
432
|
-
"mimo-v2-pro": ["mimo-v2-pro", "mimo-v2-pro-free", "mimo-pro"],
|
|
433
|
-
"mimo-v2-omni": ["mimo-v2-omni", "mimo-v2-omni-free", "mimo-omni"],
|
|
434
|
-
"mimo-v2-flash": ["mimo-v2-flash", "mimo-v2-flash-free", "mimo-flash"],
|
|
435
|
-
"big-pickle": ["big-pickle", "bigpickle"],
|
|
436
|
-
"minimax-m2.5": ["minimax-m2.5", "minimax-m2.5-free", "minimax-m25"],
|
|
437
|
-
"nvidia-nemotron-3-super-120b-a12b-reasoning": [
|
|
438
|
-
"nemotron-3-super",
|
|
439
|
-
"nemotron-3-super-free",
|
|
440
|
-
"nemotron-super",
|
|
441
|
-
"nemotron-3",
|
|
442
|
-
],
|
|
443
|
-
};
|
|
444
|
-
|
|
445
|
-
for (const [canonical, names] of Object.entries(variants)) {
|
|
433
|
+
function tryVariantAliasMatch(
|
|
434
|
+
search: string,
|
|
435
|
+
provider: string | undefined,
|
|
436
|
+
modelId: string,
|
|
437
|
+
modelName: string,
|
|
438
|
+
): HardcodedBenchmark | null {
|
|
439
|
+
for (const [canonical, names] of Object.entries(MODEL_VARIANTS)) {
|
|
446
440
|
if (names.some((n) => search.includes(n.toLowerCase()))) {
|
|
447
441
|
const data = HARDCODED_BENCHMARKS[canonical];
|
|
448
442
|
if (data) {
|
|
@@ -459,66 +453,115 @@ export function findHardcodedBenchmark(
|
|
|
459
453
|
}
|
|
460
454
|
}
|
|
461
455
|
}
|
|
456
|
+
return null;
|
|
457
|
+
}
|
|
462
458
|
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
action: "normalized",
|
|
473
|
-
strategy: providerStrategy,
|
|
474
|
-
normalizedId: providerNormalized,
|
|
475
|
-
});
|
|
459
|
+
function tryProviderNormalizedMatch(
|
|
460
|
+
modelId: string,
|
|
461
|
+
provider: string | undefined,
|
|
462
|
+
modelName: string,
|
|
463
|
+
): { result: HardcodedBenchmark | null; normalized: string } {
|
|
464
|
+
const { normalized, strategy } = applyProviderNormalization(
|
|
465
|
+
modelId,
|
|
466
|
+
provider,
|
|
467
|
+
);
|
|
476
468
|
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
string,
|
|
480
|
-
HardcodedBenchmark,
|
|
481
|
-
][]) {
|
|
482
|
-
if (providerNormalized.includes(key.toLowerCase())) {
|
|
483
|
-
logDebug({
|
|
484
|
-
provider,
|
|
485
|
-
modelId,
|
|
486
|
-
modelName,
|
|
487
|
-
action: "match",
|
|
488
|
-
strategy: `provider-normalized:${providerStrategy}`,
|
|
489
|
-
matchKey: key,
|
|
490
|
-
codingIndex: data.codingIndex,
|
|
491
|
-
});
|
|
492
|
-
return data;
|
|
493
|
-
}
|
|
494
|
-
}
|
|
469
|
+
if (normalized === modelId.toLowerCase()) {
|
|
470
|
+
return { result: null, normalized };
|
|
495
471
|
}
|
|
496
472
|
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
473
|
+
logDebug({
|
|
474
|
+
provider,
|
|
475
|
+
modelId,
|
|
476
|
+
modelName,
|
|
477
|
+
action: "normalized",
|
|
478
|
+
strategy,
|
|
479
|
+
normalizedId: normalized,
|
|
480
|
+
});
|
|
481
|
+
|
|
482
|
+
for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
|
|
483
|
+
string,
|
|
484
|
+
HardcodedBenchmark,
|
|
485
|
+
][]) {
|
|
486
|
+
if (normalized.includes(key.toLowerCase())) {
|
|
509
487
|
logDebug({
|
|
510
488
|
provider,
|
|
511
489
|
modelId,
|
|
512
490
|
modelName,
|
|
513
|
-
action: "
|
|
514
|
-
strategy:
|
|
515
|
-
|
|
491
|
+
action: "match",
|
|
492
|
+
strategy: `provider-normalized:${strategy}`,
|
|
493
|
+
matchKey: key,
|
|
494
|
+
codingIndex: data.codingIndex,
|
|
516
495
|
});
|
|
517
|
-
|
|
518
|
-
if (best) return best;
|
|
496
|
+
return { result: data, normalized };
|
|
519
497
|
}
|
|
520
498
|
}
|
|
521
499
|
|
|
500
|
+
return { result: null, normalized };
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
function tryPrefixFallback(
|
|
504
|
+
normalizedId: string,
|
|
505
|
+
provider: string | undefined,
|
|
506
|
+
modelId: string,
|
|
507
|
+
modelName: string,
|
|
508
|
+
): HardcodedBenchmark | null {
|
|
509
|
+
const baseId = extractBaseModelId(normalizedId);
|
|
510
|
+
if (!baseId) return null;
|
|
511
|
+
|
|
512
|
+
const best = findBestVariantByPrefix(baseId, provider, modelId);
|
|
513
|
+
if (best) return best;
|
|
514
|
+
|
|
515
|
+
// Try with word-order normalization
|
|
516
|
+
// (e.g., llama-3.3-70b-instruct → llama-3.3-instruct-70b)
|
|
517
|
+
const reordered = normalizeSizeTokenOrder(baseId);
|
|
518
|
+
if (reordered === baseId) return null;
|
|
519
|
+
|
|
520
|
+
logDebug({
|
|
521
|
+
provider,
|
|
522
|
+
modelId,
|
|
523
|
+
modelName,
|
|
524
|
+
action: "normalized",
|
|
525
|
+
strategy: "size-token-reorder",
|
|
526
|
+
normalizedId: reordered,
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
return findBestVariantByPrefix(reordered, provider, modelId);
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// =============================================================================
|
|
533
|
+
// Main lookup
|
|
534
|
+
// =============================================================================
|
|
535
|
+
|
|
536
|
+
export function findHardcodedBenchmark(
|
|
537
|
+
modelName: string,
|
|
538
|
+
modelId: string,
|
|
539
|
+
provider?: string,
|
|
540
|
+
): HardcodedBenchmark | null {
|
|
541
|
+
const search = `${modelName} ${modelId}`.toLowerCase();
|
|
542
|
+
|
|
543
|
+
logDebug({ provider, modelId, modelName, action: "attempt" });
|
|
544
|
+
|
|
545
|
+
// 1. Direct substring match
|
|
546
|
+
const direct = tryDirectSubstringMatch(search, provider, modelId, modelName);
|
|
547
|
+
if (direct) return direct;
|
|
548
|
+
|
|
549
|
+
// 2. Variant alias matching
|
|
550
|
+
const variant = tryVariantAliasMatch(search, provider, modelId, modelName);
|
|
551
|
+
if (variant) return variant;
|
|
552
|
+
|
|
553
|
+
// 3. Provider-specific normalization
|
|
554
|
+
const { result: normalizedResult, normalized } = tryProviderNormalizedMatch(
|
|
555
|
+
modelId,
|
|
556
|
+
provider,
|
|
557
|
+
modelName,
|
|
558
|
+
);
|
|
559
|
+
if (normalizedResult) return normalizedResult;
|
|
560
|
+
|
|
561
|
+
// 4. Prefix fallback with base model extraction
|
|
562
|
+
const prefix = tryPrefixFallback(normalized, provider, modelId, modelName);
|
|
563
|
+
if (prefix) return prefix;
|
|
564
|
+
|
|
522
565
|
// No match found
|
|
523
566
|
logDebug({
|
|
524
567
|
provider,
|
|
@@ -526,8 +569,8 @@ export function findHardcodedBenchmark(
|
|
|
526
569
|
modelName,
|
|
527
570
|
action: "miss",
|
|
528
571
|
strategy: "all-strategies-failed",
|
|
529
|
-
normalizedId:
|
|
530
|
-
details: `Final normalized: ${
|
|
572
|
+
normalizedId: normalized,
|
|
573
|
+
details: `Final normalized: ${normalized}`,
|
|
531
574
|
});
|
|
532
575
|
|
|
533
576
|
return null;
|
|
@@ -569,6 +612,45 @@ export function enhanceModelNameWithCodingIndex(
|
|
|
569
612
|
* Get statistics about model matching from the current session
|
|
570
613
|
* Note: This reads the log file and computes stats
|
|
571
614
|
*/
|
|
615
|
+
interface LogStats {
|
|
616
|
+
totalAttempts: number;
|
|
617
|
+
matches: number;
|
|
618
|
+
misses: number;
|
|
619
|
+
byProvider: Record<
|
|
620
|
+
string,
|
|
621
|
+
{ attempts: number; matches: number; misses: number }
|
|
622
|
+
>;
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
function parseLogLine(stats: LogStats, line: string): void {
|
|
626
|
+
if (!line.trim()) return;
|
|
627
|
+
const parts = line.split("|");
|
|
628
|
+
if (parts.length < 5) return;
|
|
629
|
+
|
|
630
|
+
const provider = parts[1] || "unknown";
|
|
631
|
+
const action = parts[4];
|
|
632
|
+
|
|
633
|
+
if (!stats.byProvider[provider]) {
|
|
634
|
+
stats.byProvider[provider] = { attempts: 0, matches: 0, misses: 0 };
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
if (action === "attempt") {
|
|
638
|
+
stats.totalAttempts++;
|
|
639
|
+
stats.byProvider[provider].attempts++;
|
|
640
|
+
} else if (action === "match") {
|
|
641
|
+
stats.matches++;
|
|
642
|
+
stats.byProvider[provider].matches++;
|
|
643
|
+
} else if (action === "miss") {
|
|
644
|
+
stats.misses++;
|
|
645
|
+
stats.byProvider[provider].misses++;
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
function computeMatchRate(stats: LogStats): number {
|
|
650
|
+
const total = stats.matches + stats.misses;
|
|
651
|
+
return total > 0 ? Math.round((stats.matches / total) * 100) : 0;
|
|
652
|
+
}
|
|
653
|
+
|
|
572
654
|
export function getMatchingStats(): {
|
|
573
655
|
totalAttempts: number;
|
|
574
656
|
matches: number;
|
|
@@ -579,58 +661,27 @@ export function getMatchingStats(): {
|
|
|
579
661
|
{ attempts: number; matches: number; misses: number }
|
|
580
662
|
>;
|
|
581
663
|
} {
|
|
582
|
-
const stats = {
|
|
664
|
+
const stats: LogStats = {
|
|
583
665
|
totalAttempts: 0,
|
|
584
666
|
matches: 0,
|
|
585
667
|
misses: 0,
|
|
586
|
-
|
|
587
|
-
byProvider: {} as Record<
|
|
588
|
-
string,
|
|
589
|
-
{ attempts: number; matches: number; misses: number }
|
|
590
|
-
>,
|
|
668
|
+
byProvider: {},
|
|
591
669
|
};
|
|
592
670
|
|
|
593
671
|
try {
|
|
594
672
|
if (!existsSync(LOG_FILE)) {
|
|
595
|
-
return stats;
|
|
673
|
+
return { ...stats, matchRate: 0 };
|
|
596
674
|
}
|
|
597
675
|
|
|
598
676
|
const content = readFileSync(LOG_FILE, "utf-8");
|
|
599
|
-
const
|
|
600
|
-
|
|
601
|
-
for (const line of lines) {
|
|
602
|
-
if (!line.trim()) continue;
|
|
603
|
-
const parts = line.split("|");
|
|
604
|
-
if (parts.length < 5) continue;
|
|
605
|
-
|
|
606
|
-
const provider = parts[1] || "unknown";
|
|
607
|
-
const action = parts[4];
|
|
608
|
-
|
|
609
|
-
if (!stats.byProvider[provider]) {
|
|
610
|
-
stats.byProvider[provider] = { attempts: 0, matches: 0, misses: 0 };
|
|
611
|
-
}
|
|
612
|
-
|
|
613
|
-
if (action === "attempt") {
|
|
614
|
-
stats.totalAttempts++;
|
|
615
|
-
stats.byProvider[provider].attempts++;
|
|
616
|
-
} else if (action === "match") {
|
|
617
|
-
stats.matches++;
|
|
618
|
-
stats.byProvider[provider].matches++;
|
|
619
|
-
} else if (action === "miss") {
|
|
620
|
-
stats.misses++;
|
|
621
|
-
stats.byProvider[provider].misses++;
|
|
622
|
-
}
|
|
677
|
+
for (const line of content.split("\n").slice(1)) {
|
|
678
|
+
parseLogLine(stats, line);
|
|
623
679
|
}
|
|
624
|
-
|
|
625
|
-
stats.matchRate =
|
|
626
|
-
stats.totalAttempts > 0
|
|
627
|
-
? Math.round((stats.matches / (stats.matches + stats.misses)) * 100)
|
|
628
|
-
: 0;
|
|
629
680
|
} catch {
|
|
630
681
|
// Return empty stats on error
|
|
631
682
|
}
|
|
632
683
|
|
|
633
|
-
return stats;
|
|
684
|
+
return { ...stats, matchRate: computeMatchRate(stats) };
|
|
634
685
|
}
|
|
635
686
|
|
|
636
687
|
// Need to import readFileSync for stats
|
package/providers/cline/cline.ts
CHANGED
|
@@ -138,19 +138,24 @@ function extractTaskBody(content: unknown): string {
|
|
|
138
138
|
return "";
|
|
139
139
|
}
|
|
140
140
|
|
|
141
|
-
function
|
|
142
|
-
|
|
143
|
-
|
|
141
|
+
function findLastClineWrappedMessage(messages: any[]): {
|
|
142
|
+
index: number;
|
|
143
|
+
transcript: string;
|
|
144
|
+
} {
|
|
144
145
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
145
146
|
if (messages[i]?.role !== "user") continue;
|
|
146
147
|
if (!isClineWrapped(messages[i]?.content)) continue;
|
|
147
|
-
|
|
148
|
-
baseTranscript = extractTaskBody(messages[i].content);
|
|
149
|
-
break;
|
|
148
|
+
return { index: i, transcript: extractTaskBody(messages[i].content) };
|
|
150
149
|
}
|
|
150
|
+
return { index: -1, transcript: "" };
|
|
151
|
+
}
|
|
151
152
|
|
|
153
|
+
function buildTranscriptParts(
|
|
154
|
+
messages: any[],
|
|
155
|
+
startIdx: number,
|
|
156
|
+
baseTranscript: string,
|
|
157
|
+
): string[] {
|
|
152
158
|
const parts: string[] = baseTranscript ? [baseTranscript] : [];
|
|
153
|
-
const startIdx = lastWrappedIdx >= 0 ? lastWrappedIdx + 1 : 0;
|
|
154
159
|
|
|
155
160
|
for (let i = startIdx; i < messages.length; i++) {
|
|
156
161
|
const msg = messages[i];
|
|
@@ -167,9 +172,10 @@ function shapeMessagesForCline(messages: any[]): any[] {
|
|
|
167
172
|
}
|
|
168
173
|
}
|
|
169
174
|
|
|
170
|
-
|
|
171
|
-
|
|
175
|
+
return parts;
|
|
176
|
+
}
|
|
172
177
|
|
|
178
|
+
function buildCollapsedMessage(messages: any[], transcript: string): any[] {
|
|
173
179
|
const collapsed: any[] = [];
|
|
174
180
|
const systemMsg = messages.find((m: any) => m?.role === "system");
|
|
175
181
|
if (systemMsg) {
|
|
@@ -182,13 +188,24 @@ function shapeMessagesForCline(messages: any[]): any[] {
|
|
|
182
188
|
content: [
|
|
183
189
|
{ type: "text", text: `<task>\n${transcript}\n</task>` },
|
|
184
190
|
{ type: "text", text: TASK_PROGRESS_BLOCK },
|
|
185
|
-
{ type: "text", text:
|
|
191
|
+
{ type: "text", text: buildEnvironmentDetails() },
|
|
186
192
|
],
|
|
187
193
|
});
|
|
188
194
|
|
|
189
195
|
return collapsed;
|
|
190
196
|
}
|
|
191
197
|
|
|
198
|
+
function shapeMessagesForCline(messages: any[]): any[] {
|
|
199
|
+
const { index: lastWrappedIdx, transcript: baseTranscript } =
|
|
200
|
+
findLastClineWrappedMessage(messages);
|
|
201
|
+
|
|
202
|
+
const startIdx = lastWrappedIdx >= 0 ? lastWrappedIdx + 1 : 0;
|
|
203
|
+
const parts = buildTranscriptParts(messages, startIdx, baseTranscript);
|
|
204
|
+
const transcript = parts.join("\n\n").trim() || "(no conversation yet)";
|
|
205
|
+
|
|
206
|
+
return buildCollapsedMessage(messages, transcript);
|
|
207
|
+
}
|
|
208
|
+
|
|
192
209
|
// =============================================================================
|
|
193
210
|
// Extension entry point
|
|
194
211
|
// =============================================================================
|
|
@@ -58,7 +58,9 @@ interface FetchModelsOptions {
|
|
|
58
58
|
async function fetchModelsFromEndpoint(
|
|
59
59
|
opts: FetchModelsOptions,
|
|
60
60
|
): Promise<ProviderModelConfig[]> {
|
|
61
|
-
|
|
61
|
+
let cleanBase = opts.baseUrl;
|
|
62
|
+
while (cleanBase.endsWith("/")) cleanBase = cleanBase.slice(0, -1);
|
|
63
|
+
const url = `${cleanBase}/models`;
|
|
62
64
|
const headers: Record<string, string> = {
|
|
63
65
|
Accept: "application/json",
|
|
64
66
|
Authorization: `Bearer ${opts.apiKey}`,
|
|
@@ -170,39 +170,35 @@ function inferModelFromId(id: string): ModelsDevModel | null {
|
|
|
170
170
|
// Fetch + map
|
|
171
171
|
// =============================================================================
|
|
172
172
|
|
|
173
|
-
async function
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
`${BASE_URL_NVIDIA}/models`,
|
|
182
|
-
{
|
|
183
|
-
headers: {
|
|
184
|
-
Authorization: `Bearer ${apiKey}`,
|
|
185
|
-
"User-Agent": "pi-free-providers",
|
|
186
|
-
},
|
|
173
|
+
async function fetchNvidiaApiModelIds(apiKey: string): Promise<Set<string>> {
|
|
174
|
+
try {
|
|
175
|
+
const response = await fetchWithRetry(
|
|
176
|
+
`${BASE_URL_NVIDIA}/models`,
|
|
177
|
+
{
|
|
178
|
+
headers: {
|
|
179
|
+
Authorization: `Bearer ${apiKey}`,
|
|
180
|
+
"User-Agent": "pi-free-providers",
|
|
187
181
|
},
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
182
|
+
},
|
|
183
|
+
3,
|
|
184
|
+
1000,
|
|
185
|
+
DEFAULT_FETCH_TIMEOUT_MS,
|
|
186
|
+
);
|
|
187
|
+
if (response.ok) {
|
|
188
|
+
const json = (await response.json()) as {
|
|
189
|
+
data?: Array<{ id: string }>;
|
|
190
|
+
};
|
|
191
|
+
if (json.data) {
|
|
192
|
+
return new Set(json.data.map((m) => m.id));
|
|
199
193
|
}
|
|
200
|
-
} catch (error) {
|
|
201
|
-
console.error("[nvidia] Failed to fetch models from NVIDIA API", error);
|
|
202
194
|
}
|
|
195
|
+
} catch (error) {
|
|
196
|
+
console.error("[nvidia] Failed to fetch models from NVIDIA API", error);
|
|
203
197
|
}
|
|
198
|
+
return new Set();
|
|
199
|
+
}
|
|
204
200
|
|
|
205
|
-
|
|
201
|
+
async function fetchModelsDevMetadata(): Promise<Map<string, ModelsDevModel>> {
|
|
206
202
|
const devModels = new Map<string, ModelsDevModel>();
|
|
207
203
|
try {
|
|
208
204
|
const response = await fetchWithRetry(
|
|
@@ -226,6 +222,27 @@ async function fetchNvidiaModels(
|
|
|
226
222
|
} catch (error) {
|
|
227
223
|
console.error("[nvidia] Failed to fetch models.dev", error);
|
|
228
224
|
}
|
|
225
|
+
return devModels;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function isChatModel(m: ModelsDevModel): boolean {
|
|
229
|
+
const modalities = m.modalities;
|
|
230
|
+
if (!modalities) return true;
|
|
231
|
+
const output = modalities.output ?? [];
|
|
232
|
+
const input = modalities.input ?? [];
|
|
233
|
+
return output.includes("text") && input.includes("text");
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
async function fetchNvidiaModels(
|
|
237
|
+
apiKey?: string,
|
|
238
|
+
): Promise<ProviderModelConfig[]> {
|
|
239
|
+
// ── 1. Query NVIDIA's actual API (source of truth) ─────────────────
|
|
240
|
+
const apiModelIds = apiKey
|
|
241
|
+
? await fetchNvidiaApiModelIds(apiKey)
|
|
242
|
+
: new Set<string>();
|
|
243
|
+
|
|
244
|
+
// ── 2. Fetch models.dev for rich metadata (cost, limits, etc.) ─────
|
|
245
|
+
const devModels = await fetchModelsDevMetadata();
|
|
229
246
|
|
|
230
247
|
// ── 3. Build unified list (NVIDIA API wins; fallback to models.dev) ─
|
|
231
248
|
const modelIds =
|
|
@@ -233,30 +250,11 @@ async function fetchNvidiaModels(
|
|
|
233
250
|
|
|
234
251
|
const result = applyHidden(
|
|
235
252
|
modelIds
|
|
236
|
-
.map((id) =>
|
|
237
|
-
const dev = devModels.get(id);
|
|
238
|
-
if (dev) return dev;
|
|
239
|
-
return inferModelFromId(id);
|
|
240
|
-
})
|
|
253
|
+
.map((id) => devModels.get(id) ?? inferModelFromId(id))
|
|
241
254
|
.filter((m): m is ModelsDevModel => m !== null)
|
|
242
255
|
.filter((m) => isUsableModel(m.id, NVIDIA_MIN_SIZE_B))
|
|
243
|
-
.filter(
|
|
244
|
-
|
|
245
|
-
if (modalities) {
|
|
246
|
-
const output = modalities.output ?? [];
|
|
247
|
-
const input = modalities.input ?? [];
|
|
248
|
-
if (!output.includes("text")) return false;
|
|
249
|
-
if (!input.includes("text")) return false;
|
|
250
|
-
}
|
|
251
|
-
return true;
|
|
252
|
-
})
|
|
253
|
-
// Filter out known 404 models (listed but not provisioned for chat)
|
|
254
|
-
.filter((m) => {
|
|
255
|
-
if (NVIDIA_KNOWN_404_MODELS.has(m.id)) {
|
|
256
|
-
return false;
|
|
257
|
-
}
|
|
258
|
-
return true;
|
|
259
|
-
})
|
|
256
|
+
.filter(isChatModel)
|
|
257
|
+
.filter((m) => !NVIDIA_KNOWN_404_MODELS.has(m.id))
|
|
260
258
|
// NVIDIA is freemium — all models are usable with free credits.
|
|
261
259
|
// No cost filtering applied.
|
|
262
260
|
.map(
|