npm - llm-checker - Versions diffs - 3.5.14 → 3.6.1 - Mend

llm-checker 3.5.14 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +14 -1
package/analyzer/compatibility.js +5 -0
package/analyzer/performance.js +5 -4
package/bin/cli.js +5 -39
package/bin/enhanced_cli.js +88 -19
package/bin/mcp-server.mjs +266 -101
package/package.json +7 -7
package/src/ai/multi-objective-selector.js +118 -11
package/src/calibration/calibration-manager.js +4 -1
package/src/data/model-database.js +39 -5
package/src/data/sync-manager.js +32 -18
package/src/hardware/backends/apple-silicon.js +5 -1
package/src/hardware/backends/cuda-detector.js +47 -19
package/src/hardware/backends/intel-detector.js +6 -2
package/src/hardware/backends/rocm-detector.js +6 -2
package/src/hardware/detector.js +57 -30
package/src/hardware/unified-detector.js +129 -25
package/src/models/ai-check-selector.js +36 -5
package/src/models/deterministic-selector.js +163 -15
package/src/models/expanded_database.js +9 -5
package/src/models/intelligent-selector.js +87 -1
package/src/models/requirements.js +16 -11
package/src/models/scoring-core.js +341 -0
package/src/models/scoring-engine.js +9 -2
package/src/ollama/capacity-planner.js +15 -2
package/src/ollama/client.js +70 -30
package/src/ollama/enhanced-client.js +20 -2
package/src/ollama/manager.js +14 -2
package/src/policy/cli-policy.js +8 -2
package/src/policy/policy-engine.js +2 -1
package/src/provenance/model-provenance.js +4 -1
package/src/ui/cli-theme.js +57 -7
package/src/ui/interactive-panel.js +176 -20

package/src/ai/multi-objective-selector.js CHANGED Viewed

@@ -10,6 +10,7 @@
 const { MULTI_OBJECTIVE_WEIGHTS } = require('../models/scoring-config');
 const { normalizePlatform } = require('../utils/platform');
+const { rankModels } = require('../models/scoring-core');
 class MultiObjectiveSelector {
     constructor() {
@@ -40,23 +41,124 @@ class MultiObjectiveSelector {
     }
     /**
-     * Select best models using multi-objective ranking
+     * Select best models using the UNIFIED canonical scoring core (issue #88).
+     *
+     * `check` used to rank through this selector's own multi-objective math,
+     * which diverged from `recommend`/`smart-recommend` and never received the
+     * PR #89 high-capacity right-sizing fix. It now routes the ranking through
+     * the shared DeterministicModelSelector core (via scoring-core.rankModels)
+     * so identical (model, hardware) inputs score identically across all three
+     * commands and the high-capacity floor applies here too.
+     *
+     * The output shape is preserved exactly: `{ compatible, marginal,
+     * incompatible }`, each entry being the ORIGINAL model object spread with
+     * `totalScore`, `components { quality, speed, ttfb, context, hardwareMatch }`
+     * and `reasoning`, so downstream `check` rendering and the regression test
+     * (which calls `estimateModelParams` on the returned object) keep working.
      */
     async selectBestModels(hardware, models, category = 'general', topK = 10) {
-        // Step 1: Hard filters - remove incompatible models
+        const inputModels = Array.isArray(models) ? models.filter(Boolean) : [];
+        if (inputModels.length === 0) {
+            return { compatible: [], marginal: [], incompatible: [] };
+        }
+        let ranking;
+        try {
+            ranking = await rankModels(inputModels, hardware, { category, topN: inputModels.length });
+        } catch (error) {
+            ranking = null;
+        }
+        // Defensive fallback: if the unified core is unavailable for any reason,
+        // fall back to the legacy multi-objective ranking so `check` still works.
+        if (!ranking || !Array.isArray(ranking.candidates)) {
+            return this.selectBestModelsLegacy(hardware, inputModels, category, topK);
+        }
+        const scoredModels = [];
+        const rankedSources = new Set();
+        for (const candidate of ranking.candidates) {
+            const source = candidate?.meta?.__source;
+            if (!source) continue;
+            rankedSources.add(source);
+            scoredModels.push(this.mapCoreCandidateToMultiObjective(candidate, source, hardware, category));
+        }
+        // Models the canonical core dropped (category filter / budget) are not
+        // viable on this hardware for this use case -> treat as incompatible,
+        // mirroring the previous hard-filter semantics.
+        const incompatibleExtras = inputModels
+            .filter((model) => !rankedSources.has(model))
+            .map((model) => ({
+                ...model,
+                totalScore: 0,
+                components: { quality: 0, speed: 0, ttfb: 0, context: 0, hardwareMatch: 0 },
+                reasoning: 'Filtered out by unified scoring core (does not fit hardware/use-case)'
+            }));
+        scoredModels.sort((a, b) => b.totalScore - a.totalScore);
+        const classified = this.classifyResults(scoredModels, topK);
+        classified.incompatible = [...classified.incompatible, ...incompatibleExtras].slice(0, 5);
+        return classified;
+    }
+    /**
+     * Map a unified-core candidate back into this selector's multi-objective
+     * output shape. The 0-100 `score` from the deterministic core becomes
+     * `totalScore`; component sub-scores are normalized to 0-1 to match the
+     * historical `components` contract consumed by `check` rendering.
+     */
+    mapCoreCandidateToMultiObjective(candidate, source, hardware, category) {
+        const components = candidate.components || {};
+        const to01 = (value) => {
+            const num = Number(value);
+            if (!Number.isFinite(num)) return 0;
+            return Math.max(0, Math.min(1, num / 100));
+        };
+        const quality = to01(components.Q);
+        const speed = to01(components.S);
+        const context = to01(components.C);
+        // The deterministic core folds hardware fitness into the `F` (fit) plus
+        // `H` (high-capacity right-sizing) components; surface that as the
+        // historical `hardwareMatch` signal so `check` insights stay meaningful.
+        const hardwareMatch = to01((Number(components.F) || 0) + (Number(components.H) || 0));
+        return {
+            ...source,
+            totalScore: Math.round(candidate.score * 100) / 100,
+            score: Math.round(candidate.score * 100) / 100,
+            components: {
+                quality,
+                speed,
+                ttfb: speed, // ttfb tracks speed; legacy field retained for shape
+                context,
+                hardwareMatch
+            },
+            quant: candidate.quant || source.quant,
+            estimatedRAM: candidate.requiredGB,
+            estimatedTPS: candidate.estTPS,
+            reasoning: candidate.rationale ||
+                this.generateReasoning(source, hardware, quality, hardwareMatch)
+        };
+    }
+    /**
+     * Legacy multi-objective ranking, retained ONLY as a defensive fallback if
+     * the unified core throws. Not used on the normal path.
+     */
+    selectBestModelsLegacy(hardware, models, category = 'general', topK = 10) {
         const compatibleModels = this.applyHardFilters(hardware, models);
         if (compatibleModels.length === 0) {
             return { compatible: [], marginal: [], incompatible: models };
         }
-        // Step 2: Multi-objective scoring
-        const scoredModels = compatibleModels.map(model =>
+        const scoredModels = compatibleModels.map(model =>
             this.calculateMultiObjectiveScore(hardware, model, category)
         ).filter(Boolean);
-        // Step 3: Sort and classify
         scoredModels.sort((a, b) => b.totalScore - a.totalScore);
         return this.classifyResults(scoredModels, topK);
@@ -784,11 +886,16 @@ class MultiObjectiveSelector {
     }
     estimateKVCache(model, contextLength) {
-        // Rough KV cache estimation: 2 * layers * hidden_size * seq_len * 2 bytes
+        // KV cache grows linearly with parameter count and context length. The old
+        // formula derived both "layers" and "hidden size" from params and multiplied
+        // them, making the estimate scale with params^2 — a 70B model at 8k came out
+        // at ~299 GB (real value ~11 GB), which made checkMemoryCompatibility reject
+        // every mid/large model. Use the same calibrated linear factor as the Ollama
+        // capacity planner (~0.08 GB per 1B params at 4k context).
         const params = this.estimateModelParams(model);
-        const layers = Math.round(params * 2); // Rough approximation
-        const hiddenSize = Math.round(params * 1000); // Rough approximation
-        return (2 * layers * hiddenSize * contextLength * 2) / (1024 ** 3); // GB
+        const ctx = Number.isFinite(contextLength) && contextLength > 0 ? contextLength : 4096;
+        const kvFactorPer4k = 0.08; // GB per 1B params at 4k context (fp16)
+        return Math.max(0, params * kvFactorPer4k * (ctx / 4096)); // GB
     }
     estimateTokensPerSecond(hardware, model) {

package/src/calibration/calibration-manager.js CHANGED Viewed

@@ -320,7 +320,10 @@ class CalibrationManager {
                 NO_COLOR: '1'
             }
         });
-        const latencyMs = Number((process.hrtime.bigint() - started) / 1_000_000n);
+        // Convert ns->ms in floating point: dividing the BigInt first floored away
+        // all sub-millisecond precision (a 0.5 ms call read as 0 ms, skewing p50/p95,
+        // ttft and tokens/sec). The ns diff is well within Number's safe range.
+        const latencyMs = Number(process.hrtime.bigint() - started) / 1_000_000;
         if (result.error) {
             const error = new Error(result.error.message || 'Failed to execute runtime prompt.');

package/src/data/model-database.js CHANGED Viewed

@@ -13,6 +13,11 @@ class ModelDatabase {
         this.seedDbPath = options.seedDbPath || path.join(__dirname, 'seed', 'models.db');
         this.db = null;
         this.initialized = false;
+        // Batched-write state: during a bulk sync we defer the (expensive) full
+        // sql.js export-and-write until the batch ends, instead of rewriting the
+        // whole DB file on every single row.
+        this._batchDepth = 0;
+        this._pendingSave = false;
     }
     /**
@@ -148,7 +153,29 @@ class ModelDatabase {
         if (!this.useBetterSqlite && this.db) {
             const data = this.db.export();
             const buffer = Buffer.from(data);
-            fs.writeFileSync(this.dbPath, buffer);
+            // Write to a temp file then atomically rename, so a crash/SIGINT
+            // mid-write can't leave a truncated, unreadable models.db behind.
+            const tmpPath = `${this.dbPath}.tmp`;
+            fs.writeFileSync(tmpPath, buffer);
+            fs.renameSync(tmpPath, this.dbPath);
+            this._pendingSave = false;
+        }
+    }
+    /**
+     * Group many writes so the database file is exported/written once at the end
+     * instead of on every row. Nestable; the outermost endBatch() flushes.
+     */
+    beginBatch() {
+        this._batchDepth += 1;
+    }
+    endBatch() {
+        if (this._batchDepth > 0) {
+            this._batchDepth -= 1;
+        }
+        if (this._batchDepth === 0 && this._pendingSave) {
+            this.saveToFile();
         }
     }
@@ -160,7 +187,11 @@ class ModelDatabase {
             return this.db.prepare(sql).run(...params);
         } else {
             this.db.run(sql, params);
-            this.saveToFile();
+            if (this._batchDepth > 0) {
+                this._pendingSave = true; // defer the full export until endBatch()
+            } else {
+                this.saveToFile();
+            }
         }
     }
@@ -406,9 +437,12 @@ class ModelDatabase {
             params.push(filters.maxSizeGB);
         }
-        // Order by
-        const orderBy = filters.orderBy || 'pulls';
-        const orderDir = filters.orderDir || 'DESC';
+        // Order by — column names and direction can't be parameterized, so whitelist
+        // them. A future caller forwarding a user-supplied sort field would otherwise
+        // be a SQL-injection / crash vector on this public filters API.
+        const ORDERABLE_COLUMNS = new Set(['pulls', 'name', 'tags_count', 'updated_at', 'created_at']);
+        const orderBy = ORDERABLE_COLUMNS.has(filters.orderBy) ? filters.orderBy : 'pulls';
+        const orderDir = String(filters.orderDir).toUpperCase() === 'ASC' ? 'ASC' : 'DESC';
         sql += ` ORDER BY m.${orderBy} ${orderDir}`;
         // Limit

package/src/data/sync-manager.js CHANGED Viewed

@@ -47,22 +47,27 @@ class SyncManager {
         this.onProgress({ phase: 'start', message: 'Starting full sync...' });
-        // Clear existing data
-        this.db.clear();
-        // Scrape all models
-        const result = await this.scraper.scrapeAll((model, variants) => {
-            // Save model as we go
-            this.db.upsertModel(model);
-            // Save variants
-            for (const variant of variants) {
-                this.db.upsertVariant(variant);
-            }
-        });
+        // Batch all writes into a single atomic DB file write at the end. Saving on
+        // every upsert re-exported and rewrote the whole sql.js DB thousands of
+        // times, turning the sync into O(n^2) disk I/O.
+        this.db.beginBatch();
+        try {
+            // Clear existing data
+            this.db.clear();
+            // Scrape all models
+            await this.scraper.scrapeAll((model, variants) => {
+                this.db.upsertModel(model);
+                for (const variant of variants) {
+                    this.db.upsertVariant(variant);
+                }
+            });
-        // Update sync timestamp
-        this.db.setLastSync(new Date().toISOString());
+            // Update sync timestamp
+            this.db.setLastSync(new Date().toISOString());
+        } finally {
+            this.db.endBatch();
+        }
         const stats = this.db.getStats();
@@ -110,6 +115,9 @@ class SyncManager {
         let updated = 0;
         let added = 0;
+        // Batch all upserts into a single atomic DB write at the end (see fullSync).
+        this.db.beginBatch();
+        try {
         // Process new models
         for (const { id } of newModels) {
             try {
@@ -157,12 +165,18 @@ class SyncManager {
                 await this.sleep(100);
             } catch (error) {
-                // Ignore errors during incremental update
+                // Log instead of silently swallowing: a systematic failure here
+                // (network down, schema mismatch) would otherwise report success
+                // with updated: 0 and leave the catalog quietly stale.
+                this.onError(`Error updating ${id}: ${error.message}`);
             }
         }
-        // Update sync timestamp
-        this.db.setLastSync(new Date().toISOString());
+            // Update sync timestamp
+            this.db.setLastSync(new Date().toISOString());
+        } finally {
+            this.db.endBatch();
+        }
         const stats = this.db.getStats();

package/src/hardware/backends/apple-silicon.js CHANGED Viewed

@@ -283,7 +283,11 @@ class AppleSiliconDetector {
         const info = this.detect();
         if (!info) return null;
-        return `apple-${info.chip.toLowerCase().replace(/\s+/g, '-')}-${info.memory.unified}gb`;
+        // info.chip stays null when the sysctl brand-string read fails (sandboxed
+        // env, missing binary); fall back so this can't throw on null.toLowerCase().
+        const chip = info.chip || 'apple-silicon';
+        const unified = info.memory?.unified || 0;
+        return `apple-${chip.toLowerCase().replace(/\s+/g, '-')}-${unified}gb`;
     }
     /**

package/src/hardware/backends/cuda-detector.js CHANGED Viewed

@@ -249,40 +249,65 @@ class CUDADetector {
             const lines = gpuData.split('\n');
+            // Older drivers emit fewer columns (e.g. no power/clocks), and the CSV
+            // separator can be either ", " or "," depending on driver/locale. Split
+            // tolerantly and only require the leading identity + memory columns so a
+            // GPU is never dropped just because optional trailing fields are absent.
+            const toMB = (value) => {
+                const n = parseInt(value, 10);
+                return Number.isFinite(n) ? n : 0;
+            };
+            const toGB = (value) => {
+                const mb = toMB(value);
+                return mb > 0 ? Math.round(mb / 1024) : 0;
+            };
+            const toInt = (value) => {
+                const n = parseInt(value, 10);
+                return Number.isFinite(n) ? n : 0;
+            };
+            const toFloat = (value) => {
+                const n = parseFloat(value);
+                return Number.isFinite(n) ? n : 0;
+            };
             for (const line of lines) {
-                const parts = line.split(', ').map(p => p.trim());
+                if (!line || !line.trim()) continue;
+                const parts = line.split(/\s*,\s*/).map(p => p.trim());
+                // Need at least index, name, uuid, memory.total to describe a GPU.
+                if (parts.length < 4) continue;
-                if (parts.length < 10) continue;
+                const memTotalMB = toMB(parts[3]);
                 const gpu = {
-                    index: parseInt(parts[0]) || 0,
+                    index: toInt(parts[0]),
                     name: parts[1] || 'Unknown NVIDIA GPU',
                     uuid: parts[2] || null,
                     memory: {
-                        total: Math.round(parseInt(parts[3]) / 1024) || 0,  // Convert MB to GB
-                        free: Math.round(parseInt(parts[4]) / 1024) || 0,
-                        used: Math.round(parseInt(parts[5]) / 1024) || 0
+                        total: toGB(parts[3]),  // Convert MB to GB
+                        free: toGB(parts[4]),
+                        used: toGB(parts[5])
                     },
                     computeMode: parts[6] || 'Default',
                     pcie: {
-                        generation: parseInt(parts[7]) || 0,
-                        width: parseInt(parts[8]) || 0
+                        generation: toInt(parts[7]),
+                        width: toInt(parts[8])
                     },
                     power: {
-                        draw: parseFloat(parts[9]) || 0,
-                        limit: parseFloat(parts[10]) || 0
+                        draw: toFloat(parts[9]),
+                        limit: toFloat(parts[10])
                     },
-                    temperature: parseInt(parts[11]) || 0,
+                    temperature: toInt(parts[11]),
                     utilization: {
-                        gpu: parseInt(parts[12]) || 0,
-                        memory: parseInt(parts[13]) || 0
+                        gpu: toInt(parts[12]),
+                        memory: toInt(parts[13])
                     },
                     clocks: {
-                        current: parseInt(parts[14]) || 0,
-                        max: parseInt(parts[15]) || 0
+                        current: toInt(parts[14]),
+                        max: toInt(parts[15])
                     },
                     capabilities: this.getGPUCapabilities(parts[1]),
-                    speedCoefficient: this.calculateSpeedCoefficient(parts[1], parseInt(parts[3]))
+                    speedCoefficient: this.calculateSpeedCoefficient(parts[1], memTotalMB)
                 };
                 result.gpus.push(gpu);
@@ -298,15 +323,18 @@ class CUDADetector {
                 const lines = simpleQuery.split('\n');
                 for (let i = 0; i < lines.length; i++) {
-                    const [name, memMB] = lines[i].split(', ').map(p => p.trim());
-                    const memGB = Math.round(parseInt(memMB) / 1024) || 0;
+                    if (!lines[i] || !lines[i].trim()) continue;
+                    const [name, memMB] = lines[i].split(/\s*,\s*/).map(p => p.trim());
+                    const parsedMB = parseInt(memMB, 10);
+                    const memMBSafe = Number.isFinite(parsedMB) ? parsedMB : 0;
+                    const memGB = memMBSafe > 0 ? Math.round(memMBSafe / 1024) : 0;
                     result.gpus.push({
                         index: i,
                         name: name || 'NVIDIA GPU',
                         memory: { total: memGB, free: memGB, used: 0 },
                         capabilities: this.getGPUCapabilities(name),
-                        speedCoefficient: this.calculateSpeedCoefficient(name, parseInt(memMB))
+                        speedCoefficient: this.calculateSpeedCoefficient(name, memMBSafe)
                     });
                     result.totalVRAM += memGB;
                 }

package/src/hardware/backends/intel-detector.js CHANGED Viewed

@@ -111,8 +111,12 @@ class IntelDetector {
                 const name = nameMatch[0].replace(/Corporation\s*/i, '').trim();
                 const isDedicated = name.toLowerCase().includes('arc');
-                // Get VRAM from sysfs or estimate
-                let vram = this.getVRAMFromSysfs(block) || this.estimateVRAM(name);
+                // Prefer the model-based estimate: getVRAMFromSysfs reads the PCI
+                // MMIO BAR size, which is NOT the card's VRAM (a non-Resizable-BAR
+                // Arc reports ~256M while having 8-16GB), so a wrong BAR value must
+                // not shadow the reliable per-model estimate. BAR is only a last
+                // resort when the model can't be recognized.
+                let vram = this.estimateVRAM(name) || this.getVRAMFromSysfs(block);
                 const gpu = {
                     index: result.gpus.length,

package/src/hardware/backends/rocm-detector.js CHANGED Viewed

@@ -942,8 +942,12 @@ class ROCmDetector {
                 // Try to match device ID to specific variant
                 const deviceInfo = ROCmDetector.AMD_DEVICE_IDS[deviceId];
                 if (deviceInfo) return deviceInfo.name;
-                // Default to first variant with "AMD Radeon" prefix
-                return `AMD Radeon ${variants[0]}`;
+                // Unknown device ID: lspci groups several SKUs behind one string
+                // (e.g. "Radeon RX 7900 XT/7900 XTX/7900M"). Committing to variants[0]
+                // mislabels the card as the lowest-tier SKU and yields the wrong VRAM,
+                // so keep the full variant list — honestly ambiguous beats confidently
+                // wrong.
+                return `AMD Radeon ${variants.join('/')}`;
             }
             return `AMD Radeon ${bracketName}`;
         }

package/src/hardware/detector.js CHANGED Viewed

@@ -85,12 +85,16 @@ class HardwareDetector {
         const freeGB = Math.round(memory.free / (1024 ** 3));
         const usedGB = totalGB - freeGB;
+        // Guard against a zero/unknown total (some virtualized or sandboxed hosts
+        // report memory.total === 0), which would otherwise make usagePercent NaN.
+        const usagePercent = totalGB > 0 ? Math.round((usedGB / totalGB) * 100) : 0;
         return {
             total: totalGB,
             free: freeGB,
             used: usedGB,
             available: Math.round(memory.available / (1024 ** 3)),
-            usagePercent: Math.round((usedGB / totalGB) * 100),
+            usagePercent,
             swapTotal: Math.round(memory.swaptotal / (1024 ** 3)),
             swapUsed: Math.round(memory.swapused / (1024 ** 3)),
             score: this.calculateMemoryScore(totalGB, freeGB)
@@ -420,7 +424,12 @@ class HardwareDetector {
                 driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
             };
         } catch (error) {
-            // Keep systeminformation-only results when backend-specific detection is unavailable
+            // Keep systeminformation-only results when backend-specific detection is
+            // unavailable. Surface the cause under a debug flag so a genuine bug in the
+            // enrichment path is distinguishable from "no backend tools installed".
+            if (process.env.DEBUG_GPU || process.env.LLM_CHECKER_DEBUG) {
+                console.error('[llm-checker] enrichWithUnifiedHardware failed:', error && error.stack ? error.stack : error);
+            }
         }
     }
@@ -553,8 +562,23 @@ class HardwareDetector {
         // NVIDIA data-center / workstation
         if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 96;
+        // NVIDIA Blackwell / Ada / Hopper workstation & datacenter cards. These are
+        // matched BEFORE the generic "rtx -> 8" fallback so high-VRAM professional
+        // GPUs (e.g. "RTX PRO 6000") are not collapsed to 8GB (issue #88).
+        if (modelLower.includes('rtx pro 6000') || modelLower.includes('rtx 6000 blackwell')) return 96;
+        if (modelLower.includes('rtx 6000 ada') || modelLower.includes('rtx 5000 ada')) return 48;
+        if (modelLower.includes('rtx a6000') || modelLower.includes('a6000')) return 48;
+        if (modelLower.includes('rtx a5000') || modelLower.includes('a5000')) return 24;
+        if (modelLower.includes('l40s') || modelLower.includes('l40')) return 48;
+        if (modelLower.includes('h200')) return 141;
+        if (modelLower.includes('h100')) return 80;
+        if (modelLower.includes('a100') && (modelLower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(modelLower))) return 40;
+        if (modelLower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
+        if (modelLower.includes('a40')) return 48;
         if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 16;
         // NVIDIA RTX 50 series
         if (modelLower.includes('rtx 5090')) return 32;
         if (modelLower.includes('rtx 5080')) return 16;
@@ -635,7 +659,7 @@ class HardwareDetector {
         else score += totalGB * 2;
         // Score basado en RAM disponible
-        const freePercent = (freeGB / totalGB) * 100;
+        const freePercent = totalGB > 0 ? (freeGB / totalGB) * 100 : 0;
         if (freePercent > 50) score += 20;
         else if (freePercent > 30) score += 15;
         else if (freePercent > 20) score += 10;
@@ -738,34 +762,37 @@ class HardwareDetector {
      * Normalize VRAM values (handle different units and wrong totals)
      */
     normalizeVRAM(vram) {
-        if (!vram || vram <= 0) return 0;
-        let vramValue = vram;
-        // Handle VRAM in bytes (some systems report this way)
-        if (vramValue > 100000) {
-            vramValue = Math.round(vramValue / (1024 * 1024)); // Convert bytes to MB
+        const raw = Number(vram);
+        if (!Number.isFinite(raw) || raw <= 0) return 0;
+        // Inputs reaching this function come from systeminformation / lspci (which
+        // express controller VRAM in megabytes), from raw byte counts on systems
+        // that report that way, and increasingly from our own curated GB tables
+        // (estimateVRAMFromModel, device-id maps) fed back through here. The unit
+        // is inferred from magnitude:
+        //
+        //   > 1e6            -> raw bytes (a 192 GB card is ~2.06e11 bytes, while
+        //                       the same card in MB is ~196,608, well under 1e6).
+        //   >= 1024          -> megabytes (the smallest dedicated framebuffer that
+        //                       still rounds to >=1 GB; this is the systeminformation
+        //                       reporting range, e.g. 8192, 16384, 16368).
+        //   1 <= v <= 256    -> already gigabytes. Real single-GPU VRAM tops out
+        //                       around 192 GB (H200 ~141, B200/MI ~192), so any
+        //                       small integer in this band is a GB value. This is
+        //                       the dead-zone fix for issue #88: normalizeVRAM(96)
+        //                       used to return 0 (treated 96 as 96 MB -> 0 GB).
+        //   257 <= v < 1024  -> sub-gigabyte framebuffer in MB (e.g. a 512 MB
+        //                       aperture) -> rounds to 0/1 GB as before.
+        if (raw > 1_000_000) {
+            return Math.max(0, Math.round(raw / (1024 * 1024 * 1024))); // bytes -> GB
         }
-        // Now determine if we have MB or GB values
-        if (vramValue >= 1024) {
-            // Values >= 1024 are likely MB, convert to GB
-            vramValue = Math.round(vramValue / 1024);
-        } else if (vramValue >= 512 && vramValue < 1024) {
-            // 512-1023 MB, round to 1GB
-            vramValue = 1;
-        } else if (vramValue > 80) {
-            // Values between 80-511 are likely incorrect MB values, treat as MB
-            vramValue = Math.round(vramValue / 1024) || 1;
-        } else if (vramValue >= 1 && vramValue <= 80) {
-            // Values 1-80 are likely already in GB, keep as is
-            vramValue = vramValue;
-        } else {
-            // Values < 1 round to 0
-            vramValue = 0;
+        if (raw >= 1024) {
+            return Math.max(0, Math.round(raw / 1024)); // MB -> GB
         }
-        return vramValue;
+        if (raw <= 256) {
+            return Math.round(raw); // already GB (plausible single-GPU range)
+        }
+        return Math.max(0, Math.round(raw / 1024)); // 257..1023 MB -> GB
     }
     /**