llm-checker 3.5.15 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +28 -8
  2. package/analyzer/compatibility.js +5 -0
  3. package/analyzer/performance.js +5 -4
  4. package/bin/cli.js +5 -39
  5. package/bin/enhanced_cli.js +449 -24
  6. package/bin/mcp-server.mjs +266 -101
  7. package/package.json +13 -8
  8. package/src/ai/multi-objective-selector.js +118 -11
  9. package/src/calibration/calibration-manager.js +4 -1
  10. package/src/data/model-database.js +489 -5
  11. package/src/data/registry-ingestors.js +751 -0
  12. package/src/data/registry-recommender.js +514 -0
  13. package/src/data/seed/README.md +11 -3
  14. package/src/data/seed/models.db +0 -0
  15. package/src/data/sync-manager.js +32 -18
  16. package/src/hardware/backends/apple-silicon.js +5 -1
  17. package/src/hardware/backends/cuda-detector.js +47 -19
  18. package/src/hardware/backends/intel-detector.js +6 -2
  19. package/src/hardware/backends/rocm-detector.js +6 -2
  20. package/src/hardware/detector.js +57 -30
  21. package/src/hardware/unified-detector.js +129 -25
  22. package/src/index.js +68 -4
  23. package/src/models/ai-check-selector.js +36 -5
  24. package/src/models/deterministic-selector.js +179 -18
  25. package/src/models/expanded_database.js +9 -5
  26. package/src/models/intelligent-selector.js +87 -1
  27. package/src/models/moe-assumptions.js +11 -0
  28. package/src/models/requirements.js +16 -11
  29. package/src/models/scoring-core.js +341 -0
  30. package/src/models/scoring-engine.js +9 -2
  31. package/src/ollama/capacity-planner.js +15 -2
  32. package/src/ollama/client.js +70 -30
  33. package/src/ollama/enhanced-client.js +20 -2
  34. package/src/ollama/manager.js +14 -2
  35. package/src/policy/cli-policy.js +8 -2
  36. package/src/policy/policy-engine.js +2 -1
  37. package/src/provenance/model-provenance.js +4 -1
  38. package/src/ui/cli-theme.js +47 -7
  39. package/src/ui/interactive-panel.js +162 -24
@@ -249,40 +249,65 @@ class CUDADetector {
249
249
 
250
250
  const lines = gpuData.split('\n');
251
251
 
252
+ // Older drivers emit fewer columns (e.g. no power/clocks), and the CSV
253
+ // separator can be either ", " or "," depending on driver/locale. Split
254
+ // tolerantly and only require the leading identity + memory columns so a
255
+ // GPU is never dropped just because optional trailing fields are absent.
256
+ const toMB = (value) => {
257
+ const n = parseInt(value, 10);
258
+ return Number.isFinite(n) ? n : 0;
259
+ };
260
+ const toGB = (value) => {
261
+ const mb = toMB(value);
262
+ return mb > 0 ? Math.round(mb / 1024) : 0;
263
+ };
264
+ const toInt = (value) => {
265
+ const n = parseInt(value, 10);
266
+ return Number.isFinite(n) ? n : 0;
267
+ };
268
+ const toFloat = (value) => {
269
+ const n = parseFloat(value);
270
+ return Number.isFinite(n) ? n : 0;
271
+ };
272
+
252
273
  for (const line of lines) {
253
- const parts = line.split(', ').map(p => p.trim());
274
+ if (!line || !line.trim()) continue;
275
+ const parts = line.split(/\s*,\s*/).map(p => p.trim());
276
+
277
+ // Need at least index, name, uuid, memory.total to describe a GPU.
278
+ if (parts.length < 4) continue;
254
279
 
255
- if (parts.length < 10) continue;
280
+ const memTotalMB = toMB(parts[3]);
256
281
 
257
282
  const gpu = {
258
- index: parseInt(parts[0]) || 0,
283
+ index: toInt(parts[0]),
259
284
  name: parts[1] || 'Unknown NVIDIA GPU',
260
285
  uuid: parts[2] || null,
261
286
  memory: {
262
- total: Math.round(parseInt(parts[3]) / 1024) || 0, // Convert MB to GB
263
- free: Math.round(parseInt(parts[4]) / 1024) || 0,
264
- used: Math.round(parseInt(parts[5]) / 1024) || 0
287
+ total: toGB(parts[3]), // Convert MB to GB
288
+ free: toGB(parts[4]),
289
+ used: toGB(parts[5])
265
290
  },
266
291
  computeMode: parts[6] || 'Default',
267
292
  pcie: {
268
- generation: parseInt(parts[7]) || 0,
269
- width: parseInt(parts[8]) || 0
293
+ generation: toInt(parts[7]),
294
+ width: toInt(parts[8])
270
295
  },
271
296
  power: {
272
- draw: parseFloat(parts[9]) || 0,
273
- limit: parseFloat(parts[10]) || 0
297
+ draw: toFloat(parts[9]),
298
+ limit: toFloat(parts[10])
274
299
  },
275
- temperature: parseInt(parts[11]) || 0,
300
+ temperature: toInt(parts[11]),
276
301
  utilization: {
277
- gpu: parseInt(parts[12]) || 0,
278
- memory: parseInt(parts[13]) || 0
302
+ gpu: toInt(parts[12]),
303
+ memory: toInt(parts[13])
279
304
  },
280
305
  clocks: {
281
- current: parseInt(parts[14]) || 0,
282
- max: parseInt(parts[15]) || 0
306
+ current: toInt(parts[14]),
307
+ max: toInt(parts[15])
283
308
  },
284
309
  capabilities: this.getGPUCapabilities(parts[1]),
285
- speedCoefficient: this.calculateSpeedCoefficient(parts[1], parseInt(parts[3]))
310
+ speedCoefficient: this.calculateSpeedCoefficient(parts[1], memTotalMB)
286
311
  };
287
312
 
288
313
  result.gpus.push(gpu);
@@ -298,15 +323,18 @@ class CUDADetector {
298
323
 
299
324
  const lines = simpleQuery.split('\n');
300
325
  for (let i = 0; i < lines.length; i++) {
301
- const [name, memMB] = lines[i].split(', ').map(p => p.trim());
302
- const memGB = Math.round(parseInt(memMB) / 1024) || 0;
326
+ if (!lines[i] || !lines[i].trim()) continue;
327
+ const [name, memMB] = lines[i].split(/\s*,\s*/).map(p => p.trim());
328
+ const parsedMB = parseInt(memMB, 10);
329
+ const memMBSafe = Number.isFinite(parsedMB) ? parsedMB : 0;
330
+ const memGB = memMBSafe > 0 ? Math.round(memMBSafe / 1024) : 0;
303
331
 
304
332
  result.gpus.push({
305
333
  index: i,
306
334
  name: name || 'NVIDIA GPU',
307
335
  memory: { total: memGB, free: memGB, used: 0 },
308
336
  capabilities: this.getGPUCapabilities(name),
309
- speedCoefficient: this.calculateSpeedCoefficient(name, parseInt(memMB))
337
+ speedCoefficient: this.calculateSpeedCoefficient(name, memMBSafe)
310
338
  });
311
339
  result.totalVRAM += memGB;
312
340
  }
@@ -111,8 +111,12 @@ class IntelDetector {
111
111
  const name = nameMatch[0].replace(/Corporation\s*/i, '').trim();
112
112
  const isDedicated = name.toLowerCase().includes('arc');
113
113
 
114
- // Get VRAM from sysfs or estimate
115
- let vram = this.getVRAMFromSysfs(block) || this.estimateVRAM(name);
114
+ // Prefer the model-based estimate: getVRAMFromSysfs reads the PCI
115
+ // MMIO BAR size, which is NOT the card's VRAM (a non-Resizable-BAR
116
+ // Arc reports ~256M while having 8-16GB), so a wrong BAR value must
117
+ // not shadow the reliable per-model estimate. BAR is only a last
118
+ // resort when the model can't be recognized.
119
+ let vram = this.estimateVRAM(name) || this.getVRAMFromSysfs(block);
116
120
 
117
121
  const gpu = {
118
122
  index: result.gpus.length,
@@ -942,8 +942,12 @@ class ROCmDetector {
942
942
  // Try to match device ID to specific variant
943
943
  const deviceInfo = ROCmDetector.AMD_DEVICE_IDS[deviceId];
944
944
  if (deviceInfo) return deviceInfo.name;
945
- // Default to first variant with "AMD Radeon" prefix
946
- return `AMD Radeon ${variants[0]}`;
945
+ // Unknown device ID: lspci groups several SKUs behind one string
946
+ // (e.g. "Radeon RX 7900 XT/7900 XTX/7900M"). Committing to variants[0]
947
+ // mislabels the card as the lowest-tier SKU and yields the wrong VRAM,
948
+ // so keep the full variant list — honestly ambiguous beats confidently
949
+ // wrong.
950
+ return `AMD Radeon ${variants.join('/')}`;
947
951
  }
948
952
  return `AMD Radeon ${bracketName}`;
949
953
  }
@@ -85,12 +85,16 @@ class HardwareDetector {
85
85
  const freeGB = Math.round(memory.free / (1024 ** 3));
86
86
  const usedGB = totalGB - freeGB;
87
87
 
88
+ // Guard against a zero/unknown total (some virtualized or sandboxed hosts
89
+ // report memory.total === 0), which would otherwise make usagePercent NaN.
90
+ const usagePercent = totalGB > 0 ? Math.round((usedGB / totalGB) * 100) : 0;
91
+
88
92
  return {
89
93
  total: totalGB,
90
94
  free: freeGB,
91
95
  used: usedGB,
92
96
  available: Math.round(memory.available / (1024 ** 3)),
93
- usagePercent: Math.round((usedGB / totalGB) * 100),
97
+ usagePercent,
94
98
  swapTotal: Math.round(memory.swaptotal / (1024 ** 3)),
95
99
  swapUsed: Math.round(memory.swapused / (1024 ** 3)),
96
100
  score: this.calculateMemoryScore(totalGB, freeGB)
@@ -420,7 +424,12 @@ class HardwareDetector {
420
424
  driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
421
425
  };
422
426
  } catch (error) {
423
- // Keep systeminformation-only results when backend-specific detection is unavailable
427
+ // Keep systeminformation-only results when backend-specific detection is
428
+ // unavailable. Surface the cause under a debug flag so a genuine bug in the
429
+ // enrichment path is distinguishable from "no backend tools installed".
430
+ if (process.env.DEBUG_GPU || process.env.LLM_CHECKER_DEBUG) {
431
+ console.error('[llm-checker] enrichWithUnifiedHardware failed:', error && error.stack ? error.stack : error);
432
+ }
424
433
  }
425
434
  }
426
435
 
@@ -553,8 +562,23 @@ class HardwareDetector {
553
562
 
554
563
  // NVIDIA data-center / workstation
555
564
  if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 96;
565
+
566
+ // NVIDIA Blackwell / Ada / Hopper workstation & datacenter cards. These are
567
+ // matched BEFORE the generic "rtx -> 8" fallback so high-VRAM professional
568
+ // GPUs (e.g. "RTX PRO 6000") are not collapsed to 8GB (issue #88).
569
+ if (modelLower.includes('rtx pro 6000') || modelLower.includes('rtx 6000 blackwell')) return 96;
570
+ if (modelLower.includes('rtx 6000 ada') || modelLower.includes('rtx 5000 ada')) return 48;
571
+ if (modelLower.includes('rtx a6000') || modelLower.includes('a6000')) return 48;
572
+ if (modelLower.includes('rtx a5000') || modelLower.includes('a5000')) return 24;
573
+ if (modelLower.includes('l40s') || modelLower.includes('l40')) return 48;
574
+ if (modelLower.includes('h200')) return 141;
575
+ if (modelLower.includes('h100')) return 80;
576
+ if (modelLower.includes('a100') && (modelLower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(modelLower))) return 40;
577
+ if (modelLower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
578
+ if (modelLower.includes('a40')) return 48;
579
+
556
580
  if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 16;
557
-
581
+
558
582
  // NVIDIA RTX 50 series
559
583
  if (modelLower.includes('rtx 5090')) return 32;
560
584
  if (modelLower.includes('rtx 5080')) return 16;
@@ -635,7 +659,7 @@ class HardwareDetector {
635
659
  else score += totalGB * 2;
636
660
 
637
661
  // Score basado en RAM disponible
638
- const freePercent = (freeGB / totalGB) * 100;
662
+ const freePercent = totalGB > 0 ? (freeGB / totalGB) * 100 : 0;
639
663
  if (freePercent > 50) score += 20;
640
664
  else if (freePercent > 30) score += 15;
641
665
  else if (freePercent > 20) score += 10;
@@ -738,34 +762,37 @@ class HardwareDetector {
738
762
  * Normalize VRAM values (handle different units and wrong totals)
739
763
  */
740
764
  normalizeVRAM(vram) {
741
- if (!vram || vram <= 0) return 0;
742
-
743
- let vramValue = vram;
744
-
745
- // Handle VRAM in bytes (some systems report this way)
746
- if (vramValue > 100000) {
747
- vramValue = Math.round(vramValue / (1024 * 1024)); // Convert bytes to MB
765
+ const raw = Number(vram);
766
+ if (!Number.isFinite(raw) || raw <= 0) return 0;
767
+
768
+ // Inputs reaching this function come from systeminformation / lspci (which
769
+ // express controller VRAM in megabytes), from raw byte counts on systems
770
+ // that report that way, and increasingly from our own curated GB tables
771
+ // (estimateVRAMFromModel, device-id maps) fed back through here. The unit
772
+ // is inferred from magnitude:
773
+ //
774
+ // > 1e6 -> raw bytes (a 192 GB card is ~2.06e11 bytes, while
775
+ // the same card in MB is ~196,608, well under 1e6).
776
+ // >= 1024 -> megabytes (the smallest dedicated framebuffer that
777
+ // still rounds to >=1 GB; this is the systeminformation
778
+ // reporting range, e.g. 8192, 16384, 16368).
779
+ // 1 <= v <= 256 -> already gigabytes. Real single-GPU VRAM tops out
780
+ // around 192 GB (H200 ~141, B200/MI ~192), so any
781
+ // small integer in this band is a GB value. This is
782
+ // the dead-zone fix for issue #88: normalizeVRAM(96)
783
+ // used to return 0 (treated 96 as 96 MB -> 0 GB).
784
+ // 257 <= v < 1024 -> sub-gigabyte framebuffer in MB (e.g. a 512 MB
785
+ // aperture) -> rounds to 0/1 GB as before.
786
+ if (raw > 1_000_000) {
787
+ return Math.max(0, Math.round(raw / (1024 * 1024 * 1024))); // bytes -> GB
748
788
  }
749
-
750
- // Now determine if we have MB or GB values
751
- if (vramValue >= 1024) {
752
- // Values >= 1024 are likely MB, convert to GB
753
- vramValue = Math.round(vramValue / 1024);
754
- } else if (vramValue >= 512 && vramValue < 1024) {
755
- // 512-1023 MB, round to 1GB
756
- vramValue = 1;
757
- } else if (vramValue > 80) {
758
- // Values between 80-511 are likely incorrect MB values, treat as MB
759
- vramValue = Math.round(vramValue / 1024) || 1;
760
- } else if (vramValue >= 1 && vramValue <= 80) {
761
- // Values 1-80 are likely already in GB, keep as is
762
- vramValue = vramValue;
763
- } else {
764
- // Values < 1 round to 0
765
- vramValue = 0;
789
+ if (raw >= 1024) {
790
+ return Math.max(0, Math.round(raw / 1024)); // MB -> GB
766
791
  }
767
-
768
- return vramValue;
792
+ if (raw <= 256) {
793
+ return Math.round(raw); // already GB (plausible single-GPU range)
794
+ }
795
+ return Math.max(0, Math.round(raw / 1024)); // 257..1023 MB -> GB
769
796
  }
770
797
 
771
798
  /**
@@ -13,6 +13,21 @@ const si = require('systeminformation');
13
13
  const { execSync } = require('child_process');
14
14
  const { normalizePlatform } = require('../utils/platform');
15
15
 
16
+ // Recent GPUs whose PCI device id is not yet resolved to a model name by the
17
+ // distro pci.ids database (so lspci / systeminformation report them as a bare
18
+ // "Device <id>"). Mapping the device id lets us (a) give them a real name and
19
+ // (b) collapse the multiple raw views of the SAME card that different detection
20
+ // sources produce into one inventory entry. Unknown ids degrade gracefully to a
21
+ // stable `pci:<id>` match key, so this table only needs the newest cards.
22
+ const PCI_GPU_MAP = {
23
+ // NVIDIA Blackwell (RTX 50 series, desktop)
24
+ '2f04': { family: 'rtx5070', type: 'dedicated', name: 'NVIDIA GeForce RTX 5070' },
25
+ '2c02': { family: 'rtx5080', type: 'dedicated', name: 'NVIDIA GeForce RTX 5080' },
26
+ '2b85': { family: 'rtx5090', type: 'dedicated', name: 'NVIDIA GeForce RTX 5090' },
27
+ // AMD Raphael / Granite Ridge desktop iGPU (Ryzen 7000/9000 non-G)
28
+ '13c0': { family: 'amd-raphael-igpu', type: 'integrated', name: 'AMD Radeon Graphics (Raphael)' }
29
+ };
30
+
16
31
  class UnifiedDetector {
17
32
  constructor() {
18
33
  this.backends = {
@@ -613,14 +628,19 @@ class UnifiedDetector {
613
628
 
614
629
  const normalized = controllers
615
630
  .map((controller) => {
616
- const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
631
+ let name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
617
632
  if (!name || name.toLowerCase() === 'unknown') return null;
618
633
  if (this.isRemoteDisplayModel(name)) return null;
619
634
 
620
635
  const nameLower = name.toLowerCase();
621
636
  if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
622
637
 
623
- const isIntegrated = this.isIntegratedGPUModel(name);
638
+ // Resolve recent cards that the runtime could only report as a bare
639
+ // "Device <id>" so they get a real name and correct integrated flag.
640
+ const mapped = this.resolveMappedGpu(name) || this.resolveMappedGpu(controller?.deviceId);
641
+ if (mapped) name = mapped.name;
642
+
643
+ const isIntegrated = mapped ? mapped.type === 'integrated' : this.isIntegratedGPUModel(name);
624
644
  let vram = isIntegrated
625
645
  ? this.estimateIntegratedFallbackMemory(controller, memoryInfo)
626
646
  : this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
@@ -711,30 +731,55 @@ class UnifiedDetector {
711
731
 
712
732
  if (!isNvidia && !isAMD && !isIntel) continue;
713
733
 
714
- const genericName = line
715
- .replace(/^[0-9a-f:.]+\s+/i, '')
716
- .replace(/\(rev\s+[0-9a-f]+\)$/i, '')
717
- .trim();
734
+ const vendorLabel = isNvidia ? 'NVIDIA' : (isAMD ? 'AMD' : 'Intel');
735
+ const pciId = this.extractPciDeviceId(line);
736
+ const mapped = this.resolveMappedGpu(line);
718
737
 
738
+ // Prefer the resolved model name inside a trailing "[Model] [vvvv:dddd]"
739
+ // pair (e.g. "[GeForce RTX 4060]"). Otherwise clean the raw lspci line
740
+ // down to a readable device string instead of using the whole line.
719
741
  const bracketName = line.match(/\[(?![0-9a-f]{4}:[0-9a-f]{4}\])([^\]]+)\]\s*\[[0-9a-f]{4}:[0-9a-f]{4}\]/i);
720
- const name = (bracketName?.[1] || genericName || 'Unknown GPU').replace(/\s+/g, ' ').trim();
721
- if (!name || name.toLowerCase() === 'unknown gpu') continue;
742
+ let name = (bracketName?.[1] || '').replace(/\s+/g, ' ').trim();
743
+
744
+ if (!name) {
745
+ name = line
746
+ .replace(/^[0-9a-f]{2,4}:[0-9a-f]{2}\.[0-9a-f]\s+/i, '') // PCI address
747
+ .replace(/^(?:vga compatible|3d|display)\s+controller\s+\[[0-9a-f]{4}\]:\s*/i, '') // class prefix
748
+ .replace(/\s*\[[0-9a-f]{4}:[0-9a-f]{4}\]/i, '') // [vvvv:dddd]
749
+ .replace(/\s*\(rev\s+[0-9a-f]+\)\s*$/i, '') // (rev xx)
750
+ .replace(/\b(?:corporation|corp\.?|inc\.?|advanced micro devices,?)\b/gi, '')
751
+ .replace(/\[amd\/ati\]/gi, '')
752
+ .replace(/\s+/g, ' ')
753
+ .trim();
754
+ }
755
+
756
+ // If the card could not be resolved to a real model, give it a stable,
757
+ // readable name that carries the PCI id so it dedupes across sources.
758
+ const meaningful = name.replace(/\b(?:nvidia|amd|ati|intel|device|graphics|gpu|controller)\b/gi, '').replace(/[^a-z0-9]/gi, '').trim();
759
+ if (mapped) {
760
+ name = mapped.name;
761
+ } else if (!meaningful) {
762
+ name = pciId ? `${vendorLabel} Device ${pciId.toUpperCase()}` : `${vendorLabel} GPU`;
763
+ }
722
764
 
723
- const isIntegrated = this.isIntegratedGPUModel(name) || isIntel;
765
+ const isIntegrated = mapped
766
+ ? mapped.type === 'integrated'
767
+ : (this.isIntegratedGPUModel(name) || (isIntel && !/\barc\b/i.test(name)));
724
768
  let vram = this.estimateFallbackVRAM(name);
725
769
  if (isIntegrated) {
726
770
  vram = 0;
727
771
  }
728
772
 
729
- const dedupeKey = `${name.toLowerCase()}|${isIntegrated ? 'i' : 'd'}`;
773
+ const dedupeKey = `${this.getGpuMatchKey(name)}|${isIntegrated ? 'i' : 'd'}`;
730
774
  if (seen.has(dedupeKey)) continue;
731
775
  seen.add(dedupeKey);
732
776
 
733
777
  results.push({
734
778
  name,
735
- vendor: isNvidia ? 'NVIDIA' : (isAMD ? 'AMD' : 'Intel'),
779
+ vendor: vendorLabel,
736
780
  type: isIntegrated ? 'integrated' : 'dedicated',
737
781
  memory: { total: vram },
782
+ pciId: pciId || null,
738
783
  source: 'lspci'
739
784
  });
740
785
  }
@@ -746,22 +791,27 @@ class UnifiedDetector {
746
791
  const num = Number(value);
747
792
  if (!Number.isFinite(num) || num <= 0) return 0;
748
793
 
749
- // Bytes -> GB
750
- if (num > 1024 * 1024) {
751
- return Math.round(num / (1024 * 1024 * 1024));
794
+ // Unit inference by magnitude, kept consistent with
795
+ // HardwareDetector.normalizeVRAM so both detection paths agree:
796
+ //
797
+ // > 1e6 -> raw bytes.
798
+ // >= 1024 -> megabytes (systeminformation reporting range).
799
+ // 1 <= v <= 256 -> already gigabytes. The previous "1..80 means GB"
800
+ // band silently returned 0 for legitimate large GB
801
+ // values, so normalizeFallbackVRAM(192) was 0 — the
802
+ // 192 GB box in issue #88 collapsed to nothing. A
803
+ // single GPU realistically tops out around 192 GB.
804
+ // 257 <= v < 1024 -> sub-gigabyte framebuffer in MB -> rounds to 0/1 GB.
805
+ if (num > 1_000_000) {
806
+ return Math.max(0, Math.round(num / (1024 * 1024 * 1024))); // bytes -> GB
752
807
  }
753
-
754
- // MB -> GB
755
808
  if (num >= 1024) {
756
- return Math.round(num / 1024);
809
+ return Math.max(0, Math.round(num / 1024)); // MB -> GB
757
810
  }
758
-
759
- // Likely already GB
760
- if (num >= 1 && num <= 80) {
761
- return Math.round(num);
811
+ if (num <= 256) {
812
+ return Math.round(num); // already GB (plausible single-GPU range)
762
813
  }
763
-
764
- return 0;
814
+ return Math.max(0, Math.round(num / 1024)); // 257..1023 MB -> GB
765
815
  }
766
816
 
767
817
  isIntegratedGPUModel(model) {
@@ -799,6 +849,21 @@ class UnifiedDetector {
799
849
  if (lower.includes('rx 6900') || lower.includes('rx 6800')) return 16;
800
850
  if (lower.includes('rx 6700')) return 12;
801
851
 
852
+ // NVIDIA workstation / datacenter (Blackwell / Ada / Hopper / Ampere).
853
+ // Matched BEFORE the consumer RTX entries and the generic fallbacks so a
854
+ // high-VRAM professional card is not collapsed to a consumer-tier value or
855
+ // 0 (issue #88: dual "RTX PRO 6000" must reach ~192GB total, not ~16GB).
856
+ if (lower.includes('rtx pro 6000') || lower.includes('rtx 6000 blackwell')) return 96;
857
+ if (lower.includes('rtx 6000 ada') || lower.includes('rtx 5000 ada')) return 48;
858
+ if (lower.includes('rtx a6000') || lower.includes('a6000')) return 48;
859
+ if (lower.includes('rtx a5000') || lower.includes('a5000')) return 24;
860
+ if (lower.includes('l40s') || lower.includes('l40')) return 48;
861
+ if (lower.includes('h200')) return 141;
862
+ if (lower.includes('h100')) return 80;
863
+ if (lower.includes('a100') && (lower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(lower))) return 40;
864
+ if (lower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
865
+ if (lower.includes('a40')) return 48;
866
+
802
867
  if (lower.includes('rtx 5090')) return 32;
803
868
  if (lower.includes('rtx 4090') || lower.includes('rtx 3090')) return 24;
804
869
  if (lower.includes('rtx 5080') || lower.includes('rtx 4080')) return 16;
@@ -817,6 +882,15 @@ class UnifiedDetector {
817
882
  return `${familyMatch[1]}${familyMatch[2]}`;
818
883
  }
819
884
 
885
+ // Different detection sources describe an unresolved card in different
886
+ // ways for the SAME hardware, e.g. systeminformation "Device 2f04" and
887
+ // lspci "...Device [10de:2f04]". Key on the PCI device id (mapped to a
888
+ // canonical family when known) so those collapse to one inventory entry.
889
+ const pciId = this.extractPciDeviceId(name);
890
+ if (pciId) {
891
+ return (PCI_GPU_MAP[pciId] && PCI_GPU_MAP[pciId].family) || `pci:${pciId}`;
892
+ }
893
+
820
894
  const concise = lower
821
895
  .replace(/nvidia|amd|ati|intel|corporation|geforce|radeon|graphics/g, '')
822
896
  .replace(/\s+/g, ' ')
@@ -825,6 +899,26 @@ class UnifiedDetector {
825
899
  return concise || lower;
826
900
  }
827
901
 
902
+ /**
903
+ * Extract a 4-hex PCI device id from a GPU name/description, handling both the
904
+ * lspci "[vendor:device]" form and the bare "Device <id>" form that
905
+ * systeminformation emits for cards it cannot name. Returns null when none.
906
+ */
907
+ extractPciDeviceId(text) {
908
+ const value = String(text || '');
909
+ const bracket = value.match(/\[[0-9a-f]{4}:([0-9a-f]{4})\]/i);
910
+ if (bracket) return bracket[1].toLowerCase();
911
+ const bare = value.match(/\bdevice\s+([0-9a-f]{4})\b/i);
912
+ if (bare) return bare[1].toLowerCase();
913
+ return null;
914
+ }
915
+
916
+ /** Look up a curated mapping for a recent card by PCI device id (or null). */
917
+ resolveMappedGpu(text) {
918
+ const pciId = this.extractPciDeviceId(text);
919
+ return pciId && PCI_GPU_MAP[pciId] ? { pciId, ...PCI_GPU_MAP[pciId] } : null;
920
+ }
921
+
828
922
  /**
829
923
  * Generate hardware fingerprint for benchmarks
830
924
  */
@@ -879,9 +973,19 @@ class UnifiedDetector {
879
973
  summary.bestBackend === 'metal' ||
880
974
  (summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0)
881
975
  ) {
882
- return sizeGB <= (summary.effectiveMemory - 2);
976
+ const effectiveMemory = Number(summary.effectiveMemory);
977
+ if (!Number.isFinite(effectiveMemory) || effectiveMemory <= 0) return false;
978
+ return sizeGB <= (effectiveMemory - 2);
883
979
  } else {
884
- const availableVRAM = useMultiGPU ? summary.totalVRAM : (summary.totalVRAM / summary.gpuCount);
980
+ const totalVRAM = Number(summary.totalVRAM);
981
+ if (!Number.isFinite(totalVRAM) || totalVRAM <= 0) return false;
982
+
983
+ // Guard the per-GPU divisor: gpuCount can be 0 when the summary was
984
+ // built without resolved GPU memory, which previously produced
985
+ // Infinity (totalVRAM / 0) and made any model "fit".
986
+ const gpuCount = Math.max(1, Number(summary.gpuCount) || 0);
987
+ const availableVRAM = useMultiGPU ? totalVRAM : (totalVRAM / gpuCount);
988
+ if (!Number.isFinite(availableVRAM) || availableVRAM <= 0) return false;
885
989
  return sizeGB <= (availableVRAM - 2);
886
990
  }
887
991
  }
package/src/index.js CHANGED
@@ -20,6 +20,17 @@ const {
20
20
  } = require('./provenance/model-provenance');
21
21
  const { normalizePlatform } = require('./utils/platform');
22
22
 
23
+ function normalizeRecommendationRuntime(runtime = 'auto') {
24
+ const normalized = String(runtime || 'auto').trim().toLowerCase();
25
+ if (['auto', 'all', '*'].includes(normalized)) return 'auto';
26
+ if (['ollama', 'vllm', 'mlx', 'llama.cpp', 'llamacpp', 'llama_cpp', 'transformers', 'hf'].includes(normalized)) {
27
+ if (normalized === 'llamacpp' || normalized === 'llama_cpp') return 'llama.cpp';
28
+ if (normalized === 'hf') return 'transformers';
29
+ return normalized;
30
+ }
31
+ return normalizeRuntime(normalized);
32
+ }
33
+
23
34
  class LLMChecker {
24
35
  constructor(options = {}) {
25
36
  this.hardwareDetector = new HardwareDetector();
@@ -2467,7 +2478,59 @@ class LLMChecker {
2467
2478
  async generateIntelligentRecommendations(hardware, options = {}) {
2468
2479
  try {
2469
2480
  this.logger.info('Generating intelligent recommendations...');
2470
- const selectedRuntime = normalizeRuntime(options.runtime || 'ollama');
2481
+ const selectedRuntime = normalizeRecommendationRuntime(options.runtime || 'auto');
2482
+ const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
2483
+
2484
+ if (options.registry !== false) {
2485
+ let registryRecommender = null;
2486
+ try {
2487
+ const { RegistryRecommender } = require('./data/registry-recommender');
2488
+ registryRecommender = new RegistryRecommender();
2489
+ await registryRecommender.initialize();
2490
+
2491
+ const registryResult = await registryRecommender.getBestModelsForHardware(hardware, {
2492
+ runtime: selectedRuntime,
2493
+ optimizeFor,
2494
+ limit: 3,
2495
+ poolLimit: options.poolLimit || 20000,
2496
+ localOnly: options.includeGated ? false : true
2497
+ });
2498
+ const recommendations = registryResult.recommendations;
2499
+ const hasRegistryRecommendations = Object.values(recommendations)
2500
+ .some((group) => Array.isArray(group.bestModels) && group.bestModels.length > 0);
2501
+
2502
+ if (hasRegistryRecommendations) {
2503
+ const summary = this.intelligentRecommender.generateRecommendationSummary(
2504
+ recommendations,
2505
+ hardware,
2506
+ { optimizeFor }
2507
+ );
2508
+ const totalModelsAnalyzed = Number(registryResult.totalModelsAnalyzed) || Object.values(recommendations)
2509
+ .reduce((sum, group) => sum + (Number(group.totalCandidates) || Number(group.totalEvaluated) || 0), 0);
2510
+
2511
+ this.logger.info(`Generated registry recommendations for ${Object.keys(recommendations).length} categories`);
2512
+
2513
+ return {
2514
+ recommendations,
2515
+ summary,
2516
+ optimizeFor: summary.optimize_for || optimizeFor,
2517
+ runtime: selectedRuntime,
2518
+ recommendationSource: 'registry',
2519
+ registryStats: registryResult.registryStats,
2520
+ totalModelsAnalyzed,
2521
+ generatedAt: new Date().toISOString()
2522
+ };
2523
+ }
2524
+
2525
+ this.logger.warn('Registry recommendations were empty, falling back to Ollama catalog');
2526
+ } catch (error) {
2527
+ this.logger.warn('Registry recommendations unavailable, falling back to Ollama catalog', { error: error.message });
2528
+ } finally {
2529
+ if (registryRecommender) {
2530
+ registryRecommender.close();
2531
+ }
2532
+ }
2533
+ }
2471
2534
 
2472
2535
  // Prefer the synced SQLite catalog so `llm-checker sync` updates recommendations immediately.
2473
2536
  const ollamaData = await this.loadOllamaModelData();
@@ -2479,11 +2542,11 @@ class LLMChecker {
2479
2542
  }
2480
2543
 
2481
2544
  // Generar recomendaciones inteligentes
2482
- const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
2545
+ const fallbackRuntime = selectedRuntime === 'auto' ? 'ollama' : selectedRuntime;
2483
2546
  const recommendations = await this.intelligentRecommender.getBestModelsForHardware(
2484
2547
  hardware,
2485
2548
  allModels,
2486
- { optimizeFor, runtime: selectedRuntime }
2549
+ { optimizeFor, runtime: fallbackRuntime }
2487
2550
  );
2488
2551
  const summary = this.intelligentRecommender.generateRecommendationSummary(
2489
2552
  recommendations,
@@ -2497,7 +2560,8 @@ class LLMChecker {
2497
2560
  recommendations,
2498
2561
  summary,
2499
2562
  optimizeFor: summary.optimize_for || optimizeFor,
2500
- runtime: selectedRuntime,
2563
+ runtime: fallbackRuntime,
2564
+ recommendationSource: 'ollama_catalog',
2501
2565
  totalModelsAnalyzed: allModels.length,
2502
2566
  generatedAt: new Date().toISOString()
2503
2567
  };