npm - llm-checker - Versions diffs - 3.5.15 → 3.7.0 - Mend

llm-checker 3.5.15 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +28 -8
package/analyzer/compatibility.js +5 -0
package/analyzer/performance.js +5 -4
package/bin/cli.js +5 -39
package/bin/enhanced_cli.js +449 -24
package/bin/mcp-server.mjs +266 -101
package/package.json +13 -8
package/src/ai/multi-objective-selector.js +118 -11
package/src/calibration/calibration-manager.js +4 -1
package/src/data/model-database.js +489 -5
package/src/data/registry-ingestors.js +751 -0
package/src/data/registry-recommender.js +514 -0
package/src/data/seed/README.md +11 -3
package/src/data/seed/models.db +0 -0
package/src/data/sync-manager.js +32 -18
package/src/hardware/backends/apple-silicon.js +5 -1
package/src/hardware/backends/cuda-detector.js +47 -19
package/src/hardware/backends/intel-detector.js +6 -2
package/src/hardware/backends/rocm-detector.js +6 -2
package/src/hardware/detector.js +57 -30
package/src/hardware/unified-detector.js +129 -25
package/src/index.js +68 -4
package/src/models/ai-check-selector.js +36 -5
package/src/models/deterministic-selector.js +179 -18
package/src/models/expanded_database.js +9 -5
package/src/models/intelligent-selector.js +87 -1
package/src/models/moe-assumptions.js +11 -0
package/src/models/requirements.js +16 -11
package/src/models/scoring-core.js +341 -0
package/src/models/scoring-engine.js +9 -2
package/src/ollama/capacity-planner.js +15 -2
package/src/ollama/client.js +70 -30
package/src/ollama/enhanced-client.js +20 -2
package/src/ollama/manager.js +14 -2
package/src/policy/cli-policy.js +8 -2
package/src/policy/policy-engine.js +2 -1
package/src/provenance/model-provenance.js +4 -1
package/src/ui/cli-theme.js +47 -7
package/src/ui/interactive-panel.js +162 -24

package/src/hardware/backends/cuda-detector.js CHANGED Viewed

@@ -249,40 +249,65 @@ class CUDADetector {
             const lines = gpuData.split('\n');
+            // Older drivers emit fewer columns (e.g. no power/clocks), and the CSV
+            // separator can be either ", " or "," depending on driver/locale. Split
+            // tolerantly and only require the leading identity + memory columns so a
+            // GPU is never dropped just because optional trailing fields are absent.
+            const toMB = (value) => {
+                const n = parseInt(value, 10);
+                return Number.isFinite(n) ? n : 0;
+            };
+            const toGB = (value) => {
+                const mb = toMB(value);
+                return mb > 0 ? Math.round(mb / 1024) : 0;
+            };
+            const toInt = (value) => {
+                const n = parseInt(value, 10);
+                return Number.isFinite(n) ? n : 0;
+            };
+            const toFloat = (value) => {
+                const n = parseFloat(value);
+                return Number.isFinite(n) ? n : 0;
+            };
             for (const line of lines) {
-                const parts = line.split(', ').map(p => p.trim());
+                if (!line || !line.trim()) continue;
+                const parts = line.split(/\s*,\s*/).map(p => p.trim());
+                // Need at least index, name, uuid, memory.total to describe a GPU.
+                if (parts.length < 4) continue;
-                if (parts.length < 10) continue;
+                const memTotalMB = toMB(parts[3]);
                 const gpu = {
-                    index: parseInt(parts[0]) || 0,
+                    index: toInt(parts[0]),
                     name: parts[1] || 'Unknown NVIDIA GPU',
                     uuid: parts[2] || null,
                     memory: {
-                        total: Math.round(parseInt(parts[3]) / 1024) || 0,  // Convert MB to GB
-                        free: Math.round(parseInt(parts[4]) / 1024) || 0,
-                        used: Math.round(parseInt(parts[5]) / 1024) || 0
+                        total: toGB(parts[3]),  // Convert MB to GB
+                        free: toGB(parts[4]),
+                        used: toGB(parts[5])
                     },
                     computeMode: parts[6] || 'Default',
                     pcie: {
-                        generation: parseInt(parts[7]) || 0,
-                        width: parseInt(parts[8]) || 0
+                        generation: toInt(parts[7]),
+                        width: toInt(parts[8])
                     },
                     power: {
-                        draw: parseFloat(parts[9]) || 0,
-                        limit: parseFloat(parts[10]) || 0
+                        draw: toFloat(parts[9]),
+                        limit: toFloat(parts[10])
                     },
-                    temperature: parseInt(parts[11]) || 0,
+                    temperature: toInt(parts[11]),
                     utilization: {
-                        gpu: parseInt(parts[12]) || 0,
-                        memory: parseInt(parts[13]) || 0
+                        gpu: toInt(parts[12]),
+                        memory: toInt(parts[13])
                     },
                     clocks: {
-                        current: parseInt(parts[14]) || 0,
-                        max: parseInt(parts[15]) || 0
+                        current: toInt(parts[14]),
+                        max: toInt(parts[15])
                     },
                     capabilities: this.getGPUCapabilities(parts[1]),
-                    speedCoefficient: this.calculateSpeedCoefficient(parts[1], parseInt(parts[3]))
+                    speedCoefficient: this.calculateSpeedCoefficient(parts[1], memTotalMB)
                 };
                 result.gpus.push(gpu);
@@ -298,15 +323,18 @@ class CUDADetector {
                 const lines = simpleQuery.split('\n');
                 for (let i = 0; i < lines.length; i++) {
-                    const [name, memMB] = lines[i].split(', ').map(p => p.trim());
-                    const memGB = Math.round(parseInt(memMB) / 1024) || 0;
+                    if (!lines[i] || !lines[i].trim()) continue;
+                    const [name, memMB] = lines[i].split(/\s*,\s*/).map(p => p.trim());
+                    const parsedMB = parseInt(memMB, 10);
+                    const memMBSafe = Number.isFinite(parsedMB) ? parsedMB : 0;
+                    const memGB = memMBSafe > 0 ? Math.round(memMBSafe / 1024) : 0;
                     result.gpus.push({
                         index: i,
                         name: name || 'NVIDIA GPU',
                         memory: { total: memGB, free: memGB, used: 0 },
                         capabilities: this.getGPUCapabilities(name),
-                        speedCoefficient: this.calculateSpeedCoefficient(name, parseInt(memMB))
+                        speedCoefficient: this.calculateSpeedCoefficient(name, memMBSafe)
                     });
                     result.totalVRAM += memGB;
                 }

package/src/hardware/backends/intel-detector.js CHANGED Viewed

@@ -111,8 +111,12 @@ class IntelDetector {
                 const name = nameMatch[0].replace(/Corporation\s*/i, '').trim();
                 const isDedicated = name.toLowerCase().includes('arc');
-                // Get VRAM from sysfs or estimate
-                let vram = this.getVRAMFromSysfs(block) || this.estimateVRAM(name);
+                // Prefer the model-based estimate: getVRAMFromSysfs reads the PCI
+                // MMIO BAR size, which is NOT the card's VRAM (a non-Resizable-BAR
+                // Arc reports ~256M while having 8-16GB), so a wrong BAR value must
+                // not shadow the reliable per-model estimate. BAR is only a last
+                // resort when the model can't be recognized.
+                let vram = this.estimateVRAM(name) || this.getVRAMFromSysfs(block);
                 const gpu = {
                     index: result.gpus.length,

package/src/hardware/backends/rocm-detector.js CHANGED Viewed

@@ -942,8 +942,12 @@ class ROCmDetector {
                 // Try to match device ID to specific variant
                 const deviceInfo = ROCmDetector.AMD_DEVICE_IDS[deviceId];
                 if (deviceInfo) return deviceInfo.name;
-                // Default to first variant with "AMD Radeon" prefix
-                return `AMD Radeon ${variants[0]}`;
+                // Unknown device ID: lspci groups several SKUs behind one string
+                // (e.g. "Radeon RX 7900 XT/7900 XTX/7900M"). Committing to variants[0]
+                // mislabels the card as the lowest-tier SKU and yields the wrong VRAM,
+                // so keep the full variant list — honestly ambiguous beats confidently
+                // wrong.
+                return `AMD Radeon ${variants.join('/')}`;
             }
             return `AMD Radeon ${bracketName}`;
         }

package/src/hardware/detector.js CHANGED Viewed

@@ -85,12 +85,16 @@ class HardwareDetector {
         const freeGB = Math.round(memory.free / (1024 ** 3));
         const usedGB = totalGB - freeGB;
+        // Guard against a zero/unknown total (some virtualized or sandboxed hosts
+        // report memory.total === 0), which would otherwise make usagePercent NaN.
+        const usagePercent = totalGB > 0 ? Math.round((usedGB / totalGB) * 100) : 0;
         return {
             total: totalGB,
             free: freeGB,
             used: usedGB,
             available: Math.round(memory.available / (1024 ** 3)),
-            usagePercent: Math.round((usedGB / totalGB) * 100),
+            usagePercent,
             swapTotal: Math.round(memory.swaptotal / (1024 ** 3)),
             swapUsed: Math.round(memory.swapused / (1024 ** 3)),
             score: this.calculateMemoryScore(totalGB, freeGB)
@@ -420,7 +424,12 @@ class HardwareDetector {
                 driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
             };
         } catch (error) {
-            // Keep systeminformation-only results when backend-specific detection is unavailable
+            // Keep systeminformation-only results when backend-specific detection is
+            // unavailable. Surface the cause under a debug flag so a genuine bug in the
+            // enrichment path is distinguishable from "no backend tools installed".
+            if (process.env.DEBUG_GPU || process.env.LLM_CHECKER_DEBUG) {
+                console.error('[llm-checker] enrichWithUnifiedHardware failed:', error && error.stack ? error.stack : error);
+            }
         }
     }
@@ -553,8 +562,23 @@ class HardwareDetector {
         // NVIDIA data-center / workstation
         if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 96;
+        // NVIDIA Blackwell / Ada / Hopper workstation & datacenter cards. These are
+        // matched BEFORE the generic "rtx -> 8" fallback so high-VRAM professional
+        // GPUs (e.g. "RTX PRO 6000") are not collapsed to 8GB (issue #88).
+        if (modelLower.includes('rtx pro 6000') || modelLower.includes('rtx 6000 blackwell')) return 96;
+        if (modelLower.includes('rtx 6000 ada') || modelLower.includes('rtx 5000 ada')) return 48;
+        if (modelLower.includes('rtx a6000') || modelLower.includes('a6000')) return 48;
+        if (modelLower.includes('rtx a5000') || modelLower.includes('a5000')) return 24;
+        if (modelLower.includes('l40s') || modelLower.includes('l40')) return 48;
+        if (modelLower.includes('h200')) return 141;
+        if (modelLower.includes('h100')) return 80;
+        if (modelLower.includes('a100') && (modelLower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(modelLower))) return 40;
+        if (modelLower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
+        if (modelLower.includes('a40')) return 48;
         if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 16;
         // NVIDIA RTX 50 series
         if (modelLower.includes('rtx 5090')) return 32;
         if (modelLower.includes('rtx 5080')) return 16;
@@ -635,7 +659,7 @@ class HardwareDetector {
         else score += totalGB * 2;
         // Score basado en RAM disponible
-        const freePercent = (freeGB / totalGB) * 100;
+        const freePercent = totalGB > 0 ? (freeGB / totalGB) * 100 : 0;
         if (freePercent > 50) score += 20;
         else if (freePercent > 30) score += 15;
         else if (freePercent > 20) score += 10;
@@ -738,34 +762,37 @@ class HardwareDetector {
      * Normalize VRAM values (handle different units and wrong totals)
      */
     normalizeVRAM(vram) {
-        if (!vram || vram <= 0) return 0;
-        let vramValue = vram;
-        // Handle VRAM in bytes (some systems report this way)
-        if (vramValue > 100000) {
-            vramValue = Math.round(vramValue / (1024 * 1024)); // Convert bytes to MB
+        const raw = Number(vram);
+        if (!Number.isFinite(raw) || raw <= 0) return 0;
+        // Inputs reaching this function come from systeminformation / lspci (which
+        // express controller VRAM in megabytes), from raw byte counts on systems
+        // that report that way, and increasingly from our own curated GB tables
+        // (estimateVRAMFromModel, device-id maps) fed back through here. The unit
+        // is inferred from magnitude:
+        //
+        //   > 1e6            -> raw bytes (a 192 GB card is ~2.06e11 bytes, while
+        //                       the same card in MB is ~196,608, well under 1e6).
+        //   >= 1024          -> megabytes (the smallest dedicated framebuffer that
+        //                       still rounds to >=1 GB; this is the systeminformation
+        //                       reporting range, e.g. 8192, 16384, 16368).
+        //   1 <= v <= 256    -> already gigabytes. Real single-GPU VRAM tops out
+        //                       around 192 GB (H200 ~141, B200/MI ~192), so any
+        //                       small integer in this band is a GB value. This is
+        //                       the dead-zone fix for issue #88: normalizeVRAM(96)
+        //                       used to return 0 (treated 96 as 96 MB -> 0 GB).
+        //   257 <= v < 1024  -> sub-gigabyte framebuffer in MB (e.g. a 512 MB
+        //                       aperture) -> rounds to 0/1 GB as before.
+        if (raw > 1_000_000) {
+            return Math.max(0, Math.round(raw / (1024 * 1024 * 1024))); // bytes -> GB
         }
-        // Now determine if we have MB or GB values
-        if (vramValue >= 1024) {
-            // Values >= 1024 are likely MB, convert to GB
-            vramValue = Math.round(vramValue / 1024);
-        } else if (vramValue >= 512 && vramValue < 1024) {
-            // 512-1023 MB, round to 1GB
-            vramValue = 1;
-        } else if (vramValue > 80) {
-            // Values between 80-511 are likely incorrect MB values, treat as MB
-            vramValue = Math.round(vramValue / 1024) || 1;
-        } else if (vramValue >= 1 && vramValue <= 80) {
-            // Values 1-80 are likely already in GB, keep as is
-            vramValue = vramValue;
-        } else {
-            // Values < 1 round to 0
-            vramValue = 0;
+        if (raw >= 1024) {
+            return Math.max(0, Math.round(raw / 1024)); // MB -> GB
         }
-        return vramValue;
+        if (raw <= 256) {
+            return Math.round(raw); // already GB (plausible single-GPU range)
+        }
+        return Math.max(0, Math.round(raw / 1024)); // 257..1023 MB -> GB
     }
     /**

package/src/hardware/unified-detector.js CHANGED Viewed

@@ -13,6 +13,21 @@ const si = require('systeminformation');
 const { execSync } = require('child_process');
 const { normalizePlatform } = require('../utils/platform');
+// Recent GPUs whose PCI device id is not yet resolved to a model name by the
+// distro pci.ids database (so lspci / systeminformation report them as a bare
+// "Device <id>"). Mapping the device id lets us (a) give them a real name and
+// (b) collapse the multiple raw views of the SAME card that different detection
+// sources produce into one inventory entry. Unknown ids degrade gracefully to a
+// stable `pci:<id>` match key, so this table only needs the newest cards.
+const PCI_GPU_MAP = {
+    // NVIDIA Blackwell (RTX 50 series, desktop)
+    '2f04': { family: 'rtx5070', type: 'dedicated', name: 'NVIDIA GeForce RTX 5070' },
+    '2c02': { family: 'rtx5080', type: 'dedicated', name: 'NVIDIA GeForce RTX 5080' },
+    '2b85': { family: 'rtx5090', type: 'dedicated', name: 'NVIDIA GeForce RTX 5090' },
+    // AMD Raphael / Granite Ridge desktop iGPU (Ryzen 7000/9000 non-G)
+    '13c0': { family: 'amd-raphael-igpu', type: 'integrated', name: 'AMD Radeon Graphics (Raphael)' }
+};
 class UnifiedDetector {
     constructor() {
         this.backends = {
@@ -613,14 +628,19 @@ class UnifiedDetector {
         const normalized = controllers
             .map((controller) => {
-                const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
+                let name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
                 if (!name || name.toLowerCase() === 'unknown') return null;
                 if (this.isRemoteDisplayModel(name)) return null;
                 const nameLower = name.toLowerCase();
                 if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
-                const isIntegrated = this.isIntegratedGPUModel(name);
+                // Resolve recent cards that the runtime could only report as a bare
+                // "Device <id>" so they get a real name and correct integrated flag.
+                const mapped = this.resolveMappedGpu(name) || this.resolveMappedGpu(controller?.deviceId);
+                if (mapped) name = mapped.name;
+                const isIntegrated = mapped ? mapped.type === 'integrated' : this.isIntegratedGPUModel(name);
                 let vram = isIntegrated
                     ? this.estimateIntegratedFallbackMemory(controller, memoryInfo)
                     : this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
@@ -711,30 +731,55 @@ class UnifiedDetector {
             if (!isNvidia && !isAMD && !isIntel) continue;
-            const genericName = line
-                .replace(/^[0-9a-f:.]+\s+/i, '')
-                .replace(/\(rev\s+[0-9a-f]+\)$/i, '')
-                .trim();
+            const vendorLabel = isNvidia ? 'NVIDIA' : (isAMD ? 'AMD' : 'Intel');
+            const pciId = this.extractPciDeviceId(line);
+            const mapped = this.resolveMappedGpu(line);
+            // Prefer the resolved model name inside a trailing "[Model] [vvvv:dddd]"
+            // pair (e.g. "[GeForce RTX 4060]"). Otherwise clean the raw lspci line
+            // down to a readable device string instead of using the whole line.
             const bracketName = line.match(/\[(?![0-9a-f]{4}:[0-9a-f]{4}\])([^\]]+)\]\s*\[[0-9a-f]{4}:[0-9a-f]{4}\]/i);
-            const name = (bracketName?.[1] || genericName || 'Unknown GPU').replace(/\s+/g, ' ').trim();
-            if (!name || name.toLowerCase() === 'unknown gpu') continue;
+            let name = (bracketName?.[1] || '').replace(/\s+/g, ' ').trim();
+            if (!name) {
+                name = line
+                    .replace(/^[0-9a-f]{2,4}:[0-9a-f]{2}\.[0-9a-f]\s+/i, '')                  // PCI address
+                    .replace(/^(?:vga compatible|3d|display)\s+controller\s+\[[0-9a-f]{4}\]:\s*/i, '') // class prefix
+                    .replace(/\s*\[[0-9a-f]{4}:[0-9a-f]{4}\]/i, '')                            // [vvvv:dddd]
+                    .replace(/\s*\(rev\s+[0-9a-f]+\)\s*$/i, '')                                // (rev xx)
+                    .replace(/\b(?:corporation|corp\.?|inc\.?|advanced micro devices,?)\b/gi, '')
+                    .replace(/\[amd\/ati\]/gi, '')
+                    .replace(/\s+/g, ' ')
+                    .trim();
+            }
+            // If the card could not be resolved to a real model, give it a stable,
+            // readable name that carries the PCI id so it dedupes across sources.
+            const meaningful = name.replace(/\b(?:nvidia|amd|ati|intel|device|graphics|gpu|controller)\b/gi, '').replace(/[^a-z0-9]/gi, '').trim();
+            if (mapped) {
+                name = mapped.name;
+            } else if (!meaningful) {
+                name = pciId ? `${vendorLabel} Device ${pciId.toUpperCase()}` : `${vendorLabel} GPU`;
+            }
-            const isIntegrated = this.isIntegratedGPUModel(name) || isIntel;
+            const isIntegrated = mapped
+                ? mapped.type === 'integrated'
+                : (this.isIntegratedGPUModel(name) || (isIntel && !/\barc\b/i.test(name)));
             let vram = this.estimateFallbackVRAM(name);
             if (isIntegrated) {
                 vram = 0;
             }
-            const dedupeKey = `${name.toLowerCase()}|${isIntegrated ? 'i' : 'd'}`;
+            const dedupeKey = `${this.getGpuMatchKey(name)}|${isIntegrated ? 'i' : 'd'}`;
             if (seen.has(dedupeKey)) continue;
             seen.add(dedupeKey);
             results.push({
                 name,
-                vendor: isNvidia ? 'NVIDIA' : (isAMD ? 'AMD' : 'Intel'),
+                vendor: vendorLabel,
                 type: isIntegrated ? 'integrated' : 'dedicated',
                 memory: { total: vram },
+                pciId: pciId || null,
                 source: 'lspci'
             });
         }
@@ -746,22 +791,27 @@ class UnifiedDetector {
         const num = Number(value);
         if (!Number.isFinite(num) || num <= 0) return 0;
-        // Bytes -> GB
-        if (num > 1024 * 1024) {
-            return Math.round(num / (1024 * 1024 * 1024));
+        // Unit inference by magnitude, kept consistent with
+        // HardwareDetector.normalizeVRAM so both detection paths agree:
+        //
+        //   > 1e6            -> raw bytes.
+        //   >= 1024          -> megabytes (systeminformation reporting range).
+        //   1 <= v <= 256    -> already gigabytes. The previous "1..80 means GB"
+        //                       band silently returned 0 for legitimate large GB
+        //                       values, so normalizeFallbackVRAM(192) was 0 — the
+        //                       192 GB box in issue #88 collapsed to nothing. A
+        //                       single GPU realistically tops out around 192 GB.
+        //   257 <= v < 1024  -> sub-gigabyte framebuffer in MB -> rounds to 0/1 GB.
+        if (num > 1_000_000) {
+            return Math.max(0, Math.round(num / (1024 * 1024 * 1024))); // bytes -> GB
         }
-        // MB -> GB
         if (num >= 1024) {
-            return Math.round(num / 1024);
+            return Math.max(0, Math.round(num / 1024)); // MB -> GB
         }
-        // Likely already GB
-        if (num >= 1 && num <= 80) {
-            return Math.round(num);
+        if (num <= 256) {
+            return Math.round(num); // already GB (plausible single-GPU range)
         }
-        return 0;
+        return Math.max(0, Math.round(num / 1024)); // 257..1023 MB -> GB
     }
     isIntegratedGPUModel(model) {
@@ -799,6 +849,21 @@ class UnifiedDetector {
         if (lower.includes('rx 6900') || lower.includes('rx 6800')) return 16;
         if (lower.includes('rx 6700')) return 12;
+        // NVIDIA workstation / datacenter (Blackwell / Ada / Hopper / Ampere).
+        // Matched BEFORE the consumer RTX entries and the generic fallbacks so a
+        // high-VRAM professional card is not collapsed to a consumer-tier value or
+        // 0 (issue #88: dual "RTX PRO 6000" must reach ~192GB total, not ~16GB).
+        if (lower.includes('rtx pro 6000') || lower.includes('rtx 6000 blackwell')) return 96;
+        if (lower.includes('rtx 6000 ada') || lower.includes('rtx 5000 ada')) return 48;
+        if (lower.includes('rtx a6000') || lower.includes('a6000')) return 48;
+        if (lower.includes('rtx a5000') || lower.includes('a5000')) return 24;
+        if (lower.includes('l40s') || lower.includes('l40')) return 48;
+        if (lower.includes('h200')) return 141;
+        if (lower.includes('h100')) return 80;
+        if (lower.includes('a100') && (lower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(lower))) return 40;
+        if (lower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
+        if (lower.includes('a40')) return 48;
         if (lower.includes('rtx 5090')) return 32;
         if (lower.includes('rtx 4090') || lower.includes('rtx 3090')) return 24;
         if (lower.includes('rtx 5080') || lower.includes('rtx 4080')) return 16;
@@ -817,6 +882,15 @@ class UnifiedDetector {
             return `${familyMatch[1]}${familyMatch[2]}`;
         }
+        // Different detection sources describe an unresolved card in different
+        // ways for the SAME hardware, e.g. systeminformation "Device 2f04" and
+        // lspci "...Device [10de:2f04]". Key on the PCI device id (mapped to a
+        // canonical family when known) so those collapse to one inventory entry.
+        const pciId = this.extractPciDeviceId(name);
+        if (pciId) {
+            return (PCI_GPU_MAP[pciId] && PCI_GPU_MAP[pciId].family) || `pci:${pciId}`;
+        }
         const concise = lower
             .replace(/nvidia|amd|ati|intel|corporation|geforce|radeon|graphics/g, '')
             .replace(/\s+/g, ' ')
@@ -825,6 +899,26 @@ class UnifiedDetector {
         return concise || lower;
     }
+    /**
+     * Extract a 4-hex PCI device id from a GPU name/description, handling both the
+     * lspci "[vendor:device]" form and the bare "Device <id>" form that
+     * systeminformation emits for cards it cannot name. Returns null when none.
+     */
+    extractPciDeviceId(text) {
+        const value = String(text || '');
+        const bracket = value.match(/\[[0-9a-f]{4}:([0-9a-f]{4})\]/i);
+        if (bracket) return bracket[1].toLowerCase();
+        const bare = value.match(/\bdevice\s+([0-9a-f]{4})\b/i);
+        if (bare) return bare[1].toLowerCase();
+        return null;
+    }
+    /** Look up a curated mapping for a recent card by PCI device id (or null). */
+    resolveMappedGpu(text) {
+        const pciId = this.extractPciDeviceId(text);
+        return pciId && PCI_GPU_MAP[pciId] ? { pciId, ...PCI_GPU_MAP[pciId] } : null;
+    }
     /**
      * Generate hardware fingerprint for benchmarks
      */
@@ -879,9 +973,19 @@ class UnifiedDetector {
             summary.bestBackend === 'metal' ||
             (summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0)
         ) {
-            return sizeGB <= (summary.effectiveMemory - 2);
+            const effectiveMemory = Number(summary.effectiveMemory);
+            if (!Number.isFinite(effectiveMemory) || effectiveMemory <= 0) return false;
+            return sizeGB <= (effectiveMemory - 2);
         } else {
-            const availableVRAM = useMultiGPU ? summary.totalVRAM : (summary.totalVRAM / summary.gpuCount);
+            const totalVRAM = Number(summary.totalVRAM);
+            if (!Number.isFinite(totalVRAM) || totalVRAM <= 0) return false;
+            // Guard the per-GPU divisor: gpuCount can be 0 when the summary was
+            // built without resolved GPU memory, which previously produced
+            // Infinity (totalVRAM / 0) and made any model "fit".
+            const gpuCount = Math.max(1, Number(summary.gpuCount) || 0);
+            const availableVRAM = useMultiGPU ? totalVRAM : (totalVRAM / gpuCount);
+            if (!Number.isFinite(availableVRAM) || availableVRAM <= 0) return false;
             return sizeGB <= (availableVRAM - 2);
         }
     }

package/src/index.js CHANGED Viewed

@@ -20,6 +20,17 @@ const {
 } = require('./provenance/model-provenance');
 const { normalizePlatform } = require('./utils/platform');
+function normalizeRecommendationRuntime(runtime = 'auto') {
+    const normalized = String(runtime || 'auto').trim().toLowerCase();
+    if (['auto', 'all', '*'].includes(normalized)) return 'auto';
+    if (['ollama', 'vllm', 'mlx', 'llama.cpp', 'llamacpp', 'llama_cpp', 'transformers', 'hf'].includes(normalized)) {
+        if (normalized === 'llamacpp' || normalized === 'llama_cpp') return 'llama.cpp';
+        if (normalized === 'hf') return 'transformers';
+        return normalized;
+    }
+    return normalizeRuntime(normalized);
+}
 class LLMChecker {
     constructor(options = {}) {
         this.hardwareDetector = new HardwareDetector();
@@ -2467,7 +2478,59 @@ class LLMChecker {
     async generateIntelligentRecommendations(hardware, options = {}) {
         try {
             this.logger.info('Generating intelligent recommendations...');
-            const selectedRuntime = normalizeRuntime(options.runtime || 'ollama');
+            const selectedRuntime = normalizeRecommendationRuntime(options.runtime || 'auto');
+            const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
+            if (options.registry !== false) {
+                let registryRecommender = null;
+                try {
+                    const { RegistryRecommender } = require('./data/registry-recommender');
+                    registryRecommender = new RegistryRecommender();
+                    await registryRecommender.initialize();
+                    const registryResult = await registryRecommender.getBestModelsForHardware(hardware, {
+                        runtime: selectedRuntime,
+                        optimizeFor,
+                        limit: 3,
+                        poolLimit: options.poolLimit || 20000,
+                        localOnly: options.includeGated ? false : true
+                    });
+                    const recommendations = registryResult.recommendations;
+                    const hasRegistryRecommendations = Object.values(recommendations)
+                        .some((group) => Array.isArray(group.bestModels) && group.bestModels.length > 0);
+                    if (hasRegistryRecommendations) {
+                        const summary = this.intelligentRecommender.generateRecommendationSummary(
+                            recommendations,
+                            hardware,
+                            { optimizeFor }
+                        );
+                        const totalModelsAnalyzed = Number(registryResult.totalModelsAnalyzed) || Object.values(recommendations)
+                            .reduce((sum, group) => sum + (Number(group.totalCandidates) || Number(group.totalEvaluated) || 0), 0);
+                        this.logger.info(`Generated registry recommendations for ${Object.keys(recommendations).length} categories`);
+                        return {
+                            recommendations,
+                            summary,
+                            optimizeFor: summary.optimize_for || optimizeFor,
+                            runtime: selectedRuntime,
+                            recommendationSource: 'registry',
+                            registryStats: registryResult.registryStats,
+                            totalModelsAnalyzed,
+                            generatedAt: new Date().toISOString()
+                        };
+                    }
+                    this.logger.warn('Registry recommendations were empty, falling back to Ollama catalog');
+                } catch (error) {
+                    this.logger.warn('Registry recommendations unavailable, falling back to Ollama catalog', { error: error.message });
+                } finally {
+                    if (registryRecommender) {
+                        registryRecommender.close();
+                    }
+                }
+            }
             // Prefer the synced SQLite catalog so `llm-checker sync` updates recommendations immediately.
             const ollamaData = await this.loadOllamaModelData();
@@ -2479,11 +2542,11 @@ class LLMChecker {
             }
             // Generar recomendaciones inteligentes
-            const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
+            const fallbackRuntime = selectedRuntime === 'auto' ? 'ollama' : selectedRuntime;
             const recommendations = await this.intelligentRecommender.getBestModelsForHardware(
                 hardware,
                 allModels,
-                { optimizeFor, runtime: selectedRuntime }
+                { optimizeFor, runtime: fallbackRuntime }
             );
             const summary = this.intelligentRecommender.generateRecommendationSummary(
                 recommendations,
@@ -2497,7 +2560,8 @@ class LLMChecker {
                 recommendations,
                 summary,
                 optimizeFor: summary.optimize_for || optimizeFor,
-                runtime: selectedRuntime,
+                runtime: fallbackRuntime,
+                recommendationSource: 'ollama_catalog',
                 totalModelsAnalyzed: allModels.length,
                 generatedAt: new Date().toISOString()
             };