llm-checker 3.5.15 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -8
- package/analyzer/compatibility.js +5 -0
- package/analyzer/performance.js +5 -4
- package/bin/cli.js +5 -39
- package/bin/enhanced_cli.js +449 -24
- package/bin/mcp-server.mjs +266 -101
- package/package.json +13 -8
- package/src/ai/multi-objective-selector.js +118 -11
- package/src/calibration/calibration-manager.js +4 -1
- package/src/data/model-database.js +489 -5
- package/src/data/registry-ingestors.js +751 -0
- package/src/data/registry-recommender.js +514 -0
- package/src/data/seed/README.md +11 -3
- package/src/data/seed/models.db +0 -0
- package/src/data/sync-manager.js +32 -18
- package/src/hardware/backends/apple-silicon.js +5 -1
- package/src/hardware/backends/cuda-detector.js +47 -19
- package/src/hardware/backends/intel-detector.js +6 -2
- package/src/hardware/backends/rocm-detector.js +6 -2
- package/src/hardware/detector.js +57 -30
- package/src/hardware/unified-detector.js +129 -25
- package/src/index.js +68 -4
- package/src/models/ai-check-selector.js +36 -5
- package/src/models/deterministic-selector.js +179 -18
- package/src/models/expanded_database.js +9 -5
- package/src/models/intelligent-selector.js +87 -1
- package/src/models/moe-assumptions.js +11 -0
- package/src/models/requirements.js +16 -11
- package/src/models/scoring-core.js +341 -0
- package/src/models/scoring-engine.js +9 -2
- package/src/ollama/capacity-planner.js +15 -2
- package/src/ollama/client.js +70 -30
- package/src/ollama/enhanced-client.js +20 -2
- package/src/ollama/manager.js +14 -2
- package/src/policy/cli-policy.js +8 -2
- package/src/policy/policy-engine.js +2 -1
- package/src/provenance/model-provenance.js +4 -1
- package/src/ui/cli-theme.js +47 -7
- package/src/ui/interactive-panel.js +162 -24
|
@@ -249,40 +249,65 @@ class CUDADetector {
|
|
|
249
249
|
|
|
250
250
|
const lines = gpuData.split('\n');
|
|
251
251
|
|
|
252
|
+
// Older drivers emit fewer columns (e.g. no power/clocks), and the CSV
|
|
253
|
+
// separator can be either ", " or "," depending on driver/locale. Split
|
|
254
|
+
// tolerantly and only require the leading identity + memory columns so a
|
|
255
|
+
// GPU is never dropped just because optional trailing fields are absent.
|
|
256
|
+
const toMB = (value) => {
|
|
257
|
+
const n = parseInt(value, 10);
|
|
258
|
+
return Number.isFinite(n) ? n : 0;
|
|
259
|
+
};
|
|
260
|
+
const toGB = (value) => {
|
|
261
|
+
const mb = toMB(value);
|
|
262
|
+
return mb > 0 ? Math.round(mb / 1024) : 0;
|
|
263
|
+
};
|
|
264
|
+
const toInt = (value) => {
|
|
265
|
+
const n = parseInt(value, 10);
|
|
266
|
+
return Number.isFinite(n) ? n : 0;
|
|
267
|
+
};
|
|
268
|
+
const toFloat = (value) => {
|
|
269
|
+
const n = parseFloat(value);
|
|
270
|
+
return Number.isFinite(n) ? n : 0;
|
|
271
|
+
};
|
|
272
|
+
|
|
252
273
|
for (const line of lines) {
|
|
253
|
-
|
|
274
|
+
if (!line || !line.trim()) continue;
|
|
275
|
+
const parts = line.split(/\s*,\s*/).map(p => p.trim());
|
|
276
|
+
|
|
277
|
+
// Need at least index, name, uuid, memory.total to describe a GPU.
|
|
278
|
+
if (parts.length < 4) continue;
|
|
254
279
|
|
|
255
|
-
|
|
280
|
+
const memTotalMB = toMB(parts[3]);
|
|
256
281
|
|
|
257
282
|
const gpu = {
|
|
258
|
-
index:
|
|
283
|
+
index: toInt(parts[0]),
|
|
259
284
|
name: parts[1] || 'Unknown NVIDIA GPU',
|
|
260
285
|
uuid: parts[2] || null,
|
|
261
286
|
memory: {
|
|
262
|
-
total:
|
|
263
|
-
free:
|
|
264
|
-
used:
|
|
287
|
+
total: toGB(parts[3]), // Convert MB to GB
|
|
288
|
+
free: toGB(parts[4]),
|
|
289
|
+
used: toGB(parts[5])
|
|
265
290
|
},
|
|
266
291
|
computeMode: parts[6] || 'Default',
|
|
267
292
|
pcie: {
|
|
268
|
-
generation:
|
|
269
|
-
width:
|
|
293
|
+
generation: toInt(parts[7]),
|
|
294
|
+
width: toInt(parts[8])
|
|
270
295
|
},
|
|
271
296
|
power: {
|
|
272
|
-
draw:
|
|
273
|
-
limit:
|
|
297
|
+
draw: toFloat(parts[9]),
|
|
298
|
+
limit: toFloat(parts[10])
|
|
274
299
|
},
|
|
275
|
-
temperature:
|
|
300
|
+
temperature: toInt(parts[11]),
|
|
276
301
|
utilization: {
|
|
277
|
-
gpu:
|
|
278
|
-
memory:
|
|
302
|
+
gpu: toInt(parts[12]),
|
|
303
|
+
memory: toInt(parts[13])
|
|
279
304
|
},
|
|
280
305
|
clocks: {
|
|
281
|
-
current:
|
|
282
|
-
max:
|
|
306
|
+
current: toInt(parts[14]),
|
|
307
|
+
max: toInt(parts[15])
|
|
283
308
|
},
|
|
284
309
|
capabilities: this.getGPUCapabilities(parts[1]),
|
|
285
|
-
speedCoefficient: this.calculateSpeedCoefficient(parts[1],
|
|
310
|
+
speedCoefficient: this.calculateSpeedCoefficient(parts[1], memTotalMB)
|
|
286
311
|
};
|
|
287
312
|
|
|
288
313
|
result.gpus.push(gpu);
|
|
@@ -298,15 +323,18 @@ class CUDADetector {
|
|
|
298
323
|
|
|
299
324
|
const lines = simpleQuery.split('\n');
|
|
300
325
|
for (let i = 0; i < lines.length; i++) {
|
|
301
|
-
|
|
302
|
-
const
|
|
326
|
+
if (!lines[i] || !lines[i].trim()) continue;
|
|
327
|
+
const [name, memMB] = lines[i].split(/\s*,\s*/).map(p => p.trim());
|
|
328
|
+
const parsedMB = parseInt(memMB, 10);
|
|
329
|
+
const memMBSafe = Number.isFinite(parsedMB) ? parsedMB : 0;
|
|
330
|
+
const memGB = memMBSafe > 0 ? Math.round(memMBSafe / 1024) : 0;
|
|
303
331
|
|
|
304
332
|
result.gpus.push({
|
|
305
333
|
index: i,
|
|
306
334
|
name: name || 'NVIDIA GPU',
|
|
307
335
|
memory: { total: memGB, free: memGB, used: 0 },
|
|
308
336
|
capabilities: this.getGPUCapabilities(name),
|
|
309
|
-
speedCoefficient: this.calculateSpeedCoefficient(name,
|
|
337
|
+
speedCoefficient: this.calculateSpeedCoefficient(name, memMBSafe)
|
|
310
338
|
});
|
|
311
339
|
result.totalVRAM += memGB;
|
|
312
340
|
}
|
|
@@ -111,8 +111,12 @@ class IntelDetector {
|
|
|
111
111
|
const name = nameMatch[0].replace(/Corporation\s*/i, '').trim();
|
|
112
112
|
const isDedicated = name.toLowerCase().includes('arc');
|
|
113
113
|
|
|
114
|
-
//
|
|
115
|
-
|
|
114
|
+
// Prefer the model-based estimate: getVRAMFromSysfs reads the PCI
|
|
115
|
+
// MMIO BAR size, which is NOT the card's VRAM (a non-Resizable-BAR
|
|
116
|
+
// Arc reports ~256M while having 8-16GB), so a wrong BAR value must
|
|
117
|
+
// not shadow the reliable per-model estimate. BAR is only a last
|
|
118
|
+
// resort when the model can't be recognized.
|
|
119
|
+
let vram = this.estimateVRAM(name) || this.getVRAMFromSysfs(block);
|
|
116
120
|
|
|
117
121
|
const gpu = {
|
|
118
122
|
index: result.gpus.length,
|
|
@@ -942,8 +942,12 @@ class ROCmDetector {
|
|
|
942
942
|
// Try to match device ID to specific variant
|
|
943
943
|
const deviceInfo = ROCmDetector.AMD_DEVICE_IDS[deviceId];
|
|
944
944
|
if (deviceInfo) return deviceInfo.name;
|
|
945
|
-
//
|
|
946
|
-
|
|
945
|
+
// Unknown device ID: lspci groups several SKUs behind one string
|
|
946
|
+
// (e.g. "Radeon RX 7900 XT/7900 XTX/7900M"). Committing to variants[0]
|
|
947
|
+
// mislabels the card as the lowest-tier SKU and yields the wrong VRAM,
|
|
948
|
+
// so keep the full variant list — honestly ambiguous beats confidently
|
|
949
|
+
// wrong.
|
|
950
|
+
return `AMD Radeon ${variants.join('/')}`;
|
|
947
951
|
}
|
|
948
952
|
return `AMD Radeon ${bracketName}`;
|
|
949
953
|
}
|
package/src/hardware/detector.js
CHANGED
|
@@ -85,12 +85,16 @@ class HardwareDetector {
|
|
|
85
85
|
const freeGB = Math.round(memory.free / (1024 ** 3));
|
|
86
86
|
const usedGB = totalGB - freeGB;
|
|
87
87
|
|
|
88
|
+
// Guard against a zero/unknown total (some virtualized or sandboxed hosts
|
|
89
|
+
// report memory.total === 0), which would otherwise make usagePercent NaN.
|
|
90
|
+
const usagePercent = totalGB > 0 ? Math.round((usedGB / totalGB) * 100) : 0;
|
|
91
|
+
|
|
88
92
|
return {
|
|
89
93
|
total: totalGB,
|
|
90
94
|
free: freeGB,
|
|
91
95
|
used: usedGB,
|
|
92
96
|
available: Math.round(memory.available / (1024 ** 3)),
|
|
93
|
-
usagePercent
|
|
97
|
+
usagePercent,
|
|
94
98
|
swapTotal: Math.round(memory.swaptotal / (1024 ** 3)),
|
|
95
99
|
swapUsed: Math.round(memory.swapused / (1024 ** 3)),
|
|
96
100
|
score: this.calculateMemoryScore(totalGB, freeGB)
|
|
@@ -420,7 +424,12 @@ class HardwareDetector {
|
|
|
420
424
|
driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
|
|
421
425
|
};
|
|
422
426
|
} catch (error) {
|
|
423
|
-
// Keep systeminformation-only results when backend-specific detection is
|
|
427
|
+
// Keep systeminformation-only results when backend-specific detection is
|
|
428
|
+
// unavailable. Surface the cause under a debug flag so a genuine bug in the
|
|
429
|
+
// enrichment path is distinguishable from "no backend tools installed".
|
|
430
|
+
if (process.env.DEBUG_GPU || process.env.LLM_CHECKER_DEBUG) {
|
|
431
|
+
console.error('[llm-checker] enrichWithUnifiedHardware failed:', error && error.stack ? error.stack : error);
|
|
432
|
+
}
|
|
424
433
|
}
|
|
425
434
|
}
|
|
426
435
|
|
|
@@ -553,8 +562,23 @@ class HardwareDetector {
|
|
|
553
562
|
|
|
554
563
|
// NVIDIA data-center / workstation
|
|
555
564
|
if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 96;
|
|
565
|
+
|
|
566
|
+
// NVIDIA Blackwell / Ada / Hopper workstation & datacenter cards. These are
|
|
567
|
+
// matched BEFORE the generic "rtx -> 8" fallback so high-VRAM professional
|
|
568
|
+
// GPUs (e.g. "RTX PRO 6000") are not collapsed to 8GB (issue #88).
|
|
569
|
+
if (modelLower.includes('rtx pro 6000') || modelLower.includes('rtx 6000 blackwell')) return 96;
|
|
570
|
+
if (modelLower.includes('rtx 6000 ada') || modelLower.includes('rtx 5000 ada')) return 48;
|
|
571
|
+
if (modelLower.includes('rtx a6000') || modelLower.includes('a6000')) return 48;
|
|
572
|
+
if (modelLower.includes('rtx a5000') || modelLower.includes('a5000')) return 24;
|
|
573
|
+
if (modelLower.includes('l40s') || modelLower.includes('l40')) return 48;
|
|
574
|
+
if (modelLower.includes('h200')) return 141;
|
|
575
|
+
if (modelLower.includes('h100')) return 80;
|
|
576
|
+
if (modelLower.includes('a100') && (modelLower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(modelLower))) return 40;
|
|
577
|
+
if (modelLower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
|
|
578
|
+
if (modelLower.includes('a40')) return 48;
|
|
579
|
+
|
|
556
580
|
if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 16;
|
|
557
|
-
|
|
581
|
+
|
|
558
582
|
// NVIDIA RTX 50 series
|
|
559
583
|
if (modelLower.includes('rtx 5090')) return 32;
|
|
560
584
|
if (modelLower.includes('rtx 5080')) return 16;
|
|
@@ -635,7 +659,7 @@ class HardwareDetector {
|
|
|
635
659
|
else score += totalGB * 2;
|
|
636
660
|
|
|
637
661
|
// Score basado en RAM disponible
|
|
638
|
-
const freePercent = (freeGB / totalGB) * 100;
|
|
662
|
+
const freePercent = totalGB > 0 ? (freeGB / totalGB) * 100 : 0;
|
|
639
663
|
if (freePercent > 50) score += 20;
|
|
640
664
|
else if (freePercent > 30) score += 15;
|
|
641
665
|
else if (freePercent > 20) score += 10;
|
|
@@ -738,34 +762,37 @@ class HardwareDetector {
|
|
|
738
762
|
* Normalize VRAM values (handle different units and wrong totals)
|
|
739
763
|
*/
|
|
740
764
|
normalizeVRAM(vram) {
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
//
|
|
746
|
-
|
|
747
|
-
|
|
765
|
+
const raw = Number(vram);
|
|
766
|
+
if (!Number.isFinite(raw) || raw <= 0) return 0;
|
|
767
|
+
|
|
768
|
+
// Inputs reaching this function come from systeminformation / lspci (which
|
|
769
|
+
// express controller VRAM in megabytes), from raw byte counts on systems
|
|
770
|
+
// that report that way, and increasingly from our own curated GB tables
|
|
771
|
+
// (estimateVRAMFromModel, device-id maps) fed back through here. The unit
|
|
772
|
+
// is inferred from magnitude:
|
|
773
|
+
//
|
|
774
|
+
// > 1e6 -> raw bytes (a 192 GB card is ~2.06e11 bytes, while
|
|
775
|
+
// the same card in MB is ~196,608, well under 1e6).
|
|
776
|
+
// >= 1024 -> megabytes (the smallest dedicated framebuffer that
|
|
777
|
+
// still rounds to >=1 GB; this is the systeminformation
|
|
778
|
+
// reporting range, e.g. 8192, 16384, 16368).
|
|
779
|
+
// 1 <= v <= 256 -> already gigabytes. Real single-GPU VRAM tops out
|
|
780
|
+
// around 192 GB (H200 ~141, B200/MI ~192), so any
|
|
781
|
+
// small integer in this band is a GB value. This is
|
|
782
|
+
// the dead-zone fix for issue #88: normalizeVRAM(96)
|
|
783
|
+
// used to return 0 (treated 96 as 96 MB -> 0 GB).
|
|
784
|
+
// 257 <= v < 1024 -> sub-gigabyte framebuffer in MB (e.g. a 512 MB
|
|
785
|
+
// aperture) -> rounds to 0/1 GB as before.
|
|
786
|
+
if (raw > 1_000_000) {
|
|
787
|
+
return Math.max(0, Math.round(raw / (1024 * 1024 * 1024))); // bytes -> GB
|
|
748
788
|
}
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
if (vramValue >= 1024) {
|
|
752
|
-
// Values >= 1024 are likely MB, convert to GB
|
|
753
|
-
vramValue = Math.round(vramValue / 1024);
|
|
754
|
-
} else if (vramValue >= 512 && vramValue < 1024) {
|
|
755
|
-
// 512-1023 MB, round to 1GB
|
|
756
|
-
vramValue = 1;
|
|
757
|
-
} else if (vramValue > 80) {
|
|
758
|
-
// Values between 80-511 are likely incorrect MB values, treat as MB
|
|
759
|
-
vramValue = Math.round(vramValue / 1024) || 1;
|
|
760
|
-
} else if (vramValue >= 1 && vramValue <= 80) {
|
|
761
|
-
// Values 1-80 are likely already in GB, keep as is
|
|
762
|
-
vramValue = vramValue;
|
|
763
|
-
} else {
|
|
764
|
-
// Values < 1 round to 0
|
|
765
|
-
vramValue = 0;
|
|
789
|
+
if (raw >= 1024) {
|
|
790
|
+
return Math.max(0, Math.round(raw / 1024)); // MB -> GB
|
|
766
791
|
}
|
|
767
|
-
|
|
768
|
-
|
|
792
|
+
if (raw <= 256) {
|
|
793
|
+
return Math.round(raw); // already GB (plausible single-GPU range)
|
|
794
|
+
}
|
|
795
|
+
return Math.max(0, Math.round(raw / 1024)); // 257..1023 MB -> GB
|
|
769
796
|
}
|
|
770
797
|
|
|
771
798
|
/**
|
|
@@ -13,6 +13,21 @@ const si = require('systeminformation');
|
|
|
13
13
|
const { execSync } = require('child_process');
|
|
14
14
|
const { normalizePlatform } = require('../utils/platform');
|
|
15
15
|
|
|
16
|
+
// Recent GPUs whose PCI device id is not yet resolved to a model name by the
|
|
17
|
+
// distro pci.ids database (so lspci / systeminformation report them as a bare
|
|
18
|
+
// "Device <id>"). Mapping the device id lets us (a) give them a real name and
|
|
19
|
+
// (b) collapse the multiple raw views of the SAME card that different detection
|
|
20
|
+
// sources produce into one inventory entry. Unknown ids degrade gracefully to a
|
|
21
|
+
// stable `pci:<id>` match key, so this table only needs the newest cards.
|
|
22
|
+
const PCI_GPU_MAP = {
|
|
23
|
+
// NVIDIA Blackwell (RTX 50 series, desktop)
|
|
24
|
+
'2f04': { family: 'rtx5070', type: 'dedicated', name: 'NVIDIA GeForce RTX 5070' },
|
|
25
|
+
'2c02': { family: 'rtx5080', type: 'dedicated', name: 'NVIDIA GeForce RTX 5080' },
|
|
26
|
+
'2b85': { family: 'rtx5090', type: 'dedicated', name: 'NVIDIA GeForce RTX 5090' },
|
|
27
|
+
// AMD Raphael / Granite Ridge desktop iGPU (Ryzen 7000/9000 non-G)
|
|
28
|
+
'13c0': { family: 'amd-raphael-igpu', type: 'integrated', name: 'AMD Radeon Graphics (Raphael)' }
|
|
29
|
+
};
|
|
30
|
+
|
|
16
31
|
class UnifiedDetector {
|
|
17
32
|
constructor() {
|
|
18
33
|
this.backends = {
|
|
@@ -613,14 +628,19 @@ class UnifiedDetector {
|
|
|
613
628
|
|
|
614
629
|
const normalized = controllers
|
|
615
630
|
.map((controller) => {
|
|
616
|
-
|
|
631
|
+
let name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
|
|
617
632
|
if (!name || name.toLowerCase() === 'unknown') return null;
|
|
618
633
|
if (this.isRemoteDisplayModel(name)) return null;
|
|
619
634
|
|
|
620
635
|
const nameLower = name.toLowerCase();
|
|
621
636
|
if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
|
|
622
637
|
|
|
623
|
-
|
|
638
|
+
// Resolve recent cards that the runtime could only report as a bare
|
|
639
|
+
// "Device <id>" so they get a real name and correct integrated flag.
|
|
640
|
+
const mapped = this.resolveMappedGpu(name) || this.resolveMappedGpu(controller?.deviceId);
|
|
641
|
+
if (mapped) name = mapped.name;
|
|
642
|
+
|
|
643
|
+
const isIntegrated = mapped ? mapped.type === 'integrated' : this.isIntegratedGPUModel(name);
|
|
624
644
|
let vram = isIntegrated
|
|
625
645
|
? this.estimateIntegratedFallbackMemory(controller, memoryInfo)
|
|
626
646
|
: this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
|
|
@@ -711,30 +731,55 @@ class UnifiedDetector {
|
|
|
711
731
|
|
|
712
732
|
if (!isNvidia && !isAMD && !isIntel) continue;
|
|
713
733
|
|
|
714
|
-
const
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
.trim();
|
|
734
|
+
const vendorLabel = isNvidia ? 'NVIDIA' : (isAMD ? 'AMD' : 'Intel');
|
|
735
|
+
const pciId = this.extractPciDeviceId(line);
|
|
736
|
+
const mapped = this.resolveMappedGpu(line);
|
|
718
737
|
|
|
738
|
+
// Prefer the resolved model name inside a trailing "[Model] [vvvv:dddd]"
|
|
739
|
+
// pair (e.g. "[GeForce RTX 4060]"). Otherwise clean the raw lspci line
|
|
740
|
+
// down to a readable device string instead of using the whole line.
|
|
719
741
|
const bracketName = line.match(/\[(?![0-9a-f]{4}:[0-9a-f]{4}\])([^\]]+)\]\s*\[[0-9a-f]{4}:[0-9a-f]{4}\]/i);
|
|
720
|
-
|
|
721
|
-
|
|
742
|
+
let name = (bracketName?.[1] || '').replace(/\s+/g, ' ').trim();
|
|
743
|
+
|
|
744
|
+
if (!name) {
|
|
745
|
+
name = line
|
|
746
|
+
.replace(/^[0-9a-f]{2,4}:[0-9a-f]{2}\.[0-9a-f]\s+/i, '') // PCI address
|
|
747
|
+
.replace(/^(?:vga compatible|3d|display)\s+controller\s+\[[0-9a-f]{4}\]:\s*/i, '') // class prefix
|
|
748
|
+
.replace(/\s*\[[0-9a-f]{4}:[0-9a-f]{4}\]/i, '') // [vvvv:dddd]
|
|
749
|
+
.replace(/\s*\(rev\s+[0-9a-f]+\)\s*$/i, '') // (rev xx)
|
|
750
|
+
.replace(/\b(?:corporation|corp\.?|inc\.?|advanced micro devices,?)\b/gi, '')
|
|
751
|
+
.replace(/\[amd\/ati\]/gi, '')
|
|
752
|
+
.replace(/\s+/g, ' ')
|
|
753
|
+
.trim();
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
// If the card could not be resolved to a real model, give it a stable,
|
|
757
|
+
// readable name that carries the PCI id so it dedupes across sources.
|
|
758
|
+
const meaningful = name.replace(/\b(?:nvidia|amd|ati|intel|device|graphics|gpu|controller)\b/gi, '').replace(/[^a-z0-9]/gi, '').trim();
|
|
759
|
+
if (mapped) {
|
|
760
|
+
name = mapped.name;
|
|
761
|
+
} else if (!meaningful) {
|
|
762
|
+
name = pciId ? `${vendorLabel} Device ${pciId.toUpperCase()}` : `${vendorLabel} GPU`;
|
|
763
|
+
}
|
|
722
764
|
|
|
723
|
-
const isIntegrated =
|
|
765
|
+
const isIntegrated = mapped
|
|
766
|
+
? mapped.type === 'integrated'
|
|
767
|
+
: (this.isIntegratedGPUModel(name) || (isIntel && !/\barc\b/i.test(name)));
|
|
724
768
|
let vram = this.estimateFallbackVRAM(name);
|
|
725
769
|
if (isIntegrated) {
|
|
726
770
|
vram = 0;
|
|
727
771
|
}
|
|
728
772
|
|
|
729
|
-
const dedupeKey = `${
|
|
773
|
+
const dedupeKey = `${this.getGpuMatchKey(name)}|${isIntegrated ? 'i' : 'd'}`;
|
|
730
774
|
if (seen.has(dedupeKey)) continue;
|
|
731
775
|
seen.add(dedupeKey);
|
|
732
776
|
|
|
733
777
|
results.push({
|
|
734
778
|
name,
|
|
735
|
-
vendor:
|
|
779
|
+
vendor: vendorLabel,
|
|
736
780
|
type: isIntegrated ? 'integrated' : 'dedicated',
|
|
737
781
|
memory: { total: vram },
|
|
782
|
+
pciId: pciId || null,
|
|
738
783
|
source: 'lspci'
|
|
739
784
|
});
|
|
740
785
|
}
|
|
@@ -746,22 +791,27 @@ class UnifiedDetector {
|
|
|
746
791
|
const num = Number(value);
|
|
747
792
|
if (!Number.isFinite(num) || num <= 0) return 0;
|
|
748
793
|
|
|
749
|
-
//
|
|
750
|
-
|
|
751
|
-
|
|
794
|
+
// Unit inference by magnitude, kept consistent with
|
|
795
|
+
// HardwareDetector.normalizeVRAM so both detection paths agree:
|
|
796
|
+
//
|
|
797
|
+
// > 1e6 -> raw bytes.
|
|
798
|
+
// >= 1024 -> megabytes (systeminformation reporting range).
|
|
799
|
+
// 1 <= v <= 256 -> already gigabytes. The previous "1..80 means GB"
|
|
800
|
+
// band silently returned 0 for legitimate large GB
|
|
801
|
+
// values, so normalizeFallbackVRAM(192) was 0 — the
|
|
802
|
+
// 192 GB box in issue #88 collapsed to nothing. A
|
|
803
|
+
// single GPU realistically tops out around 192 GB.
|
|
804
|
+
// 257 <= v < 1024 -> sub-gigabyte framebuffer in MB -> rounds to 0/1 GB.
|
|
805
|
+
if (num > 1_000_000) {
|
|
806
|
+
return Math.max(0, Math.round(num / (1024 * 1024 * 1024))); // bytes -> GB
|
|
752
807
|
}
|
|
753
|
-
|
|
754
|
-
// MB -> GB
|
|
755
808
|
if (num >= 1024) {
|
|
756
|
-
return Math.round(num / 1024);
|
|
809
|
+
return Math.max(0, Math.round(num / 1024)); // MB -> GB
|
|
757
810
|
}
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
if (num >= 1 && num <= 80) {
|
|
761
|
-
return Math.round(num);
|
|
811
|
+
if (num <= 256) {
|
|
812
|
+
return Math.round(num); // already GB (plausible single-GPU range)
|
|
762
813
|
}
|
|
763
|
-
|
|
764
|
-
return 0;
|
|
814
|
+
return Math.max(0, Math.round(num / 1024)); // 257..1023 MB -> GB
|
|
765
815
|
}
|
|
766
816
|
|
|
767
817
|
isIntegratedGPUModel(model) {
|
|
@@ -799,6 +849,21 @@ class UnifiedDetector {
|
|
|
799
849
|
if (lower.includes('rx 6900') || lower.includes('rx 6800')) return 16;
|
|
800
850
|
if (lower.includes('rx 6700')) return 12;
|
|
801
851
|
|
|
852
|
+
// NVIDIA workstation / datacenter (Blackwell / Ada / Hopper / Ampere).
|
|
853
|
+
// Matched BEFORE the consumer RTX entries and the generic fallbacks so a
|
|
854
|
+
// high-VRAM professional card is not collapsed to a consumer-tier value or
|
|
855
|
+
// 0 (issue #88: dual "RTX PRO 6000" must reach ~192GB total, not ~16GB).
|
|
856
|
+
if (lower.includes('rtx pro 6000') || lower.includes('rtx 6000 blackwell')) return 96;
|
|
857
|
+
if (lower.includes('rtx 6000 ada') || lower.includes('rtx 5000 ada')) return 48;
|
|
858
|
+
if (lower.includes('rtx a6000') || lower.includes('a6000')) return 48;
|
|
859
|
+
if (lower.includes('rtx a5000') || lower.includes('a5000')) return 24;
|
|
860
|
+
if (lower.includes('l40s') || lower.includes('l40')) return 48;
|
|
861
|
+
if (lower.includes('h200')) return 141;
|
|
862
|
+
if (lower.includes('h100')) return 80;
|
|
863
|
+
if (lower.includes('a100') && (lower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(lower))) return 40;
|
|
864
|
+
if (lower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
|
|
865
|
+
if (lower.includes('a40')) return 48;
|
|
866
|
+
|
|
802
867
|
if (lower.includes('rtx 5090')) return 32;
|
|
803
868
|
if (lower.includes('rtx 4090') || lower.includes('rtx 3090')) return 24;
|
|
804
869
|
if (lower.includes('rtx 5080') || lower.includes('rtx 4080')) return 16;
|
|
@@ -817,6 +882,15 @@ class UnifiedDetector {
|
|
|
817
882
|
return `${familyMatch[1]}${familyMatch[2]}`;
|
|
818
883
|
}
|
|
819
884
|
|
|
885
|
+
// Different detection sources describe an unresolved card in different
|
|
886
|
+
// ways for the SAME hardware, e.g. systeminformation "Device 2f04" and
|
|
887
|
+
// lspci "...Device [10de:2f04]". Key on the PCI device id (mapped to a
|
|
888
|
+
// canonical family when known) so those collapse to one inventory entry.
|
|
889
|
+
const pciId = this.extractPciDeviceId(name);
|
|
890
|
+
if (pciId) {
|
|
891
|
+
return (PCI_GPU_MAP[pciId] && PCI_GPU_MAP[pciId].family) || `pci:${pciId}`;
|
|
892
|
+
}
|
|
893
|
+
|
|
820
894
|
const concise = lower
|
|
821
895
|
.replace(/nvidia|amd|ati|intel|corporation|geforce|radeon|graphics/g, '')
|
|
822
896
|
.replace(/\s+/g, ' ')
|
|
@@ -825,6 +899,26 @@ class UnifiedDetector {
|
|
|
825
899
|
return concise || lower;
|
|
826
900
|
}
|
|
827
901
|
|
|
902
|
+
/**
|
|
903
|
+
* Extract a 4-hex PCI device id from a GPU name/description, handling both the
|
|
904
|
+
* lspci "[vendor:device]" form and the bare "Device <id>" form that
|
|
905
|
+
* systeminformation emits for cards it cannot name. Returns null when none.
|
|
906
|
+
*/
|
|
907
|
+
extractPciDeviceId(text) {
|
|
908
|
+
const value = String(text || '');
|
|
909
|
+
const bracket = value.match(/\[[0-9a-f]{4}:([0-9a-f]{4})\]/i);
|
|
910
|
+
if (bracket) return bracket[1].toLowerCase();
|
|
911
|
+
const bare = value.match(/\bdevice\s+([0-9a-f]{4})\b/i);
|
|
912
|
+
if (bare) return bare[1].toLowerCase();
|
|
913
|
+
return null;
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
/** Look up a curated mapping for a recent card by PCI device id (or null). */
|
|
917
|
+
resolveMappedGpu(text) {
|
|
918
|
+
const pciId = this.extractPciDeviceId(text);
|
|
919
|
+
return pciId && PCI_GPU_MAP[pciId] ? { pciId, ...PCI_GPU_MAP[pciId] } : null;
|
|
920
|
+
}
|
|
921
|
+
|
|
828
922
|
/**
|
|
829
923
|
* Generate hardware fingerprint for benchmarks
|
|
830
924
|
*/
|
|
@@ -879,9 +973,19 @@ class UnifiedDetector {
|
|
|
879
973
|
summary.bestBackend === 'metal' ||
|
|
880
974
|
(summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0)
|
|
881
975
|
) {
|
|
882
|
-
|
|
976
|
+
const effectiveMemory = Number(summary.effectiveMemory);
|
|
977
|
+
if (!Number.isFinite(effectiveMemory) || effectiveMemory <= 0) return false;
|
|
978
|
+
return sizeGB <= (effectiveMemory - 2);
|
|
883
979
|
} else {
|
|
884
|
-
const
|
|
980
|
+
const totalVRAM = Number(summary.totalVRAM);
|
|
981
|
+
if (!Number.isFinite(totalVRAM) || totalVRAM <= 0) return false;
|
|
982
|
+
|
|
983
|
+
// Guard the per-GPU divisor: gpuCount can be 0 when the summary was
|
|
984
|
+
// built without resolved GPU memory, which previously produced
|
|
985
|
+
// Infinity (totalVRAM / 0) and made any model "fit".
|
|
986
|
+
const gpuCount = Math.max(1, Number(summary.gpuCount) || 0);
|
|
987
|
+
const availableVRAM = useMultiGPU ? totalVRAM : (totalVRAM / gpuCount);
|
|
988
|
+
if (!Number.isFinite(availableVRAM) || availableVRAM <= 0) return false;
|
|
885
989
|
return sizeGB <= (availableVRAM - 2);
|
|
886
990
|
}
|
|
887
991
|
}
|
package/src/index.js
CHANGED
|
@@ -20,6 +20,17 @@ const {
|
|
|
20
20
|
} = require('./provenance/model-provenance');
|
|
21
21
|
const { normalizePlatform } = require('./utils/platform');
|
|
22
22
|
|
|
23
|
+
function normalizeRecommendationRuntime(runtime = 'auto') {
|
|
24
|
+
const normalized = String(runtime || 'auto').trim().toLowerCase();
|
|
25
|
+
if (['auto', 'all', '*'].includes(normalized)) return 'auto';
|
|
26
|
+
if (['ollama', 'vllm', 'mlx', 'llama.cpp', 'llamacpp', 'llama_cpp', 'transformers', 'hf'].includes(normalized)) {
|
|
27
|
+
if (normalized === 'llamacpp' || normalized === 'llama_cpp') return 'llama.cpp';
|
|
28
|
+
if (normalized === 'hf') return 'transformers';
|
|
29
|
+
return normalized;
|
|
30
|
+
}
|
|
31
|
+
return normalizeRuntime(normalized);
|
|
32
|
+
}
|
|
33
|
+
|
|
23
34
|
class LLMChecker {
|
|
24
35
|
constructor(options = {}) {
|
|
25
36
|
this.hardwareDetector = new HardwareDetector();
|
|
@@ -2467,7 +2478,59 @@ class LLMChecker {
|
|
|
2467
2478
|
async generateIntelligentRecommendations(hardware, options = {}) {
|
|
2468
2479
|
try {
|
|
2469
2480
|
this.logger.info('Generating intelligent recommendations...');
|
|
2470
|
-
const selectedRuntime =
|
|
2481
|
+
const selectedRuntime = normalizeRecommendationRuntime(options.runtime || 'auto');
|
|
2482
|
+
const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
|
|
2483
|
+
|
|
2484
|
+
if (options.registry !== false) {
|
|
2485
|
+
let registryRecommender = null;
|
|
2486
|
+
try {
|
|
2487
|
+
const { RegistryRecommender } = require('./data/registry-recommender');
|
|
2488
|
+
registryRecommender = new RegistryRecommender();
|
|
2489
|
+
await registryRecommender.initialize();
|
|
2490
|
+
|
|
2491
|
+
const registryResult = await registryRecommender.getBestModelsForHardware(hardware, {
|
|
2492
|
+
runtime: selectedRuntime,
|
|
2493
|
+
optimizeFor,
|
|
2494
|
+
limit: 3,
|
|
2495
|
+
poolLimit: options.poolLimit || 20000,
|
|
2496
|
+
localOnly: options.includeGated ? false : true
|
|
2497
|
+
});
|
|
2498
|
+
const recommendations = registryResult.recommendations;
|
|
2499
|
+
const hasRegistryRecommendations = Object.values(recommendations)
|
|
2500
|
+
.some((group) => Array.isArray(group.bestModels) && group.bestModels.length > 0);
|
|
2501
|
+
|
|
2502
|
+
if (hasRegistryRecommendations) {
|
|
2503
|
+
const summary = this.intelligentRecommender.generateRecommendationSummary(
|
|
2504
|
+
recommendations,
|
|
2505
|
+
hardware,
|
|
2506
|
+
{ optimizeFor }
|
|
2507
|
+
);
|
|
2508
|
+
const totalModelsAnalyzed = Number(registryResult.totalModelsAnalyzed) || Object.values(recommendations)
|
|
2509
|
+
.reduce((sum, group) => sum + (Number(group.totalCandidates) || Number(group.totalEvaluated) || 0), 0);
|
|
2510
|
+
|
|
2511
|
+
this.logger.info(`Generated registry recommendations for ${Object.keys(recommendations).length} categories`);
|
|
2512
|
+
|
|
2513
|
+
return {
|
|
2514
|
+
recommendations,
|
|
2515
|
+
summary,
|
|
2516
|
+
optimizeFor: summary.optimize_for || optimizeFor,
|
|
2517
|
+
runtime: selectedRuntime,
|
|
2518
|
+
recommendationSource: 'registry',
|
|
2519
|
+
registryStats: registryResult.registryStats,
|
|
2520
|
+
totalModelsAnalyzed,
|
|
2521
|
+
generatedAt: new Date().toISOString()
|
|
2522
|
+
};
|
|
2523
|
+
}
|
|
2524
|
+
|
|
2525
|
+
this.logger.warn('Registry recommendations were empty, falling back to Ollama catalog');
|
|
2526
|
+
} catch (error) {
|
|
2527
|
+
this.logger.warn('Registry recommendations unavailable, falling back to Ollama catalog', { error: error.message });
|
|
2528
|
+
} finally {
|
|
2529
|
+
if (registryRecommender) {
|
|
2530
|
+
registryRecommender.close();
|
|
2531
|
+
}
|
|
2532
|
+
}
|
|
2533
|
+
}
|
|
2471
2534
|
|
|
2472
2535
|
// Prefer the synced SQLite catalog so `llm-checker sync` updates recommendations immediately.
|
|
2473
2536
|
const ollamaData = await this.loadOllamaModelData();
|
|
@@ -2479,11 +2542,11 @@ class LLMChecker {
|
|
|
2479
2542
|
}
|
|
2480
2543
|
|
|
2481
2544
|
// Generar recomendaciones inteligentes
|
|
2482
|
-
const
|
|
2545
|
+
const fallbackRuntime = selectedRuntime === 'auto' ? 'ollama' : selectedRuntime;
|
|
2483
2546
|
const recommendations = await this.intelligentRecommender.getBestModelsForHardware(
|
|
2484
2547
|
hardware,
|
|
2485
2548
|
allModels,
|
|
2486
|
-
{ optimizeFor, runtime:
|
|
2549
|
+
{ optimizeFor, runtime: fallbackRuntime }
|
|
2487
2550
|
);
|
|
2488
2551
|
const summary = this.intelligentRecommender.generateRecommendationSummary(
|
|
2489
2552
|
recommendations,
|
|
@@ -2497,7 +2560,8 @@ class LLMChecker {
|
|
|
2497
2560
|
recommendations,
|
|
2498
2561
|
summary,
|
|
2499
2562
|
optimizeFor: summary.optimize_for || optimizeFor,
|
|
2500
|
-
runtime:
|
|
2563
|
+
runtime: fallbackRuntime,
|
|
2564
|
+
recommendationSource: 'ollama_catalog',
|
|
2501
2565
|
totalModelsAnalyzed: allModels.length,
|
|
2502
2566
|
generatedAt: new Date().toISOString()
|
|
2503
2567
|
};
|