npm - llm-checker - Versions diffs - 3.5.14 → 3.6.1 - Mend

llm-checker 3.5.14 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +14 -1
package/analyzer/compatibility.js +5 -0
package/analyzer/performance.js +5 -4
package/bin/cli.js +5 -39
package/bin/enhanced_cli.js +88 -19
package/bin/mcp-server.mjs +266 -101
package/package.json +7 -7
package/src/ai/multi-objective-selector.js +118 -11
package/src/calibration/calibration-manager.js +4 -1
package/src/data/model-database.js +39 -5
package/src/data/sync-manager.js +32 -18
package/src/hardware/backends/apple-silicon.js +5 -1
package/src/hardware/backends/cuda-detector.js +47 -19
package/src/hardware/backends/intel-detector.js +6 -2
package/src/hardware/backends/rocm-detector.js +6 -2
package/src/hardware/detector.js +57 -30
package/src/hardware/unified-detector.js +129 -25
package/src/models/ai-check-selector.js +36 -5
package/src/models/deterministic-selector.js +163 -15
package/src/models/expanded_database.js +9 -5
package/src/models/intelligent-selector.js +87 -1
package/src/models/requirements.js +16 -11
package/src/models/scoring-core.js +341 -0
package/src/models/scoring-engine.js +9 -2
package/src/ollama/capacity-planner.js +15 -2
package/src/ollama/client.js +70 -30
package/src/ollama/enhanced-client.js +20 -2
package/src/ollama/manager.js +14 -2
package/src/policy/cli-policy.js +8 -2
package/src/policy/policy-engine.js +2 -1
package/src/provenance/model-provenance.js +4 -1
package/src/ui/cli-theme.js +57 -7
package/src/ui/interactive-panel.js +176 -20

package/src/hardware/unified-detector.js CHANGED Viewed

@@ -13,6 +13,21 @@ const si = require('systeminformation');
 const { execSync } = require('child_process');
 const { normalizePlatform } = require('../utils/platform');
+// Recent GPUs whose PCI device id is not yet resolved to a model name by the
+// distro pci.ids database (so lspci / systeminformation report them as a bare
+// "Device <id>"). Mapping the device id lets us (a) give them a real name and
+// (b) collapse the multiple raw views of the SAME card that different detection
+// sources produce into one inventory entry. Unknown ids degrade gracefully to a
+// stable `pci:<id>` match key, so this table only needs the newest cards.
+const PCI_GPU_MAP = {
+    // NVIDIA Blackwell (RTX 50 series, desktop)
+    '2f04': { family: 'rtx5070', type: 'dedicated', name: 'NVIDIA GeForce RTX 5070' },
+    '2c02': { family: 'rtx5080', type: 'dedicated', name: 'NVIDIA GeForce RTX 5080' },
+    '2b85': { family: 'rtx5090', type: 'dedicated', name: 'NVIDIA GeForce RTX 5090' },
+    // AMD Raphael / Granite Ridge desktop iGPU (Ryzen 7000/9000 non-G)
+    '13c0': { family: 'amd-raphael-igpu', type: 'integrated', name: 'AMD Radeon Graphics (Raphael)' }
+};
 class UnifiedDetector {
     constructor() {
         this.backends = {
@@ -613,14 +628,19 @@ class UnifiedDetector {
         const normalized = controllers
             .map((controller) => {
-                const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
+                let name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
                 if (!name || name.toLowerCase() === 'unknown') return null;
                 if (this.isRemoteDisplayModel(name)) return null;
                 const nameLower = name.toLowerCase();
                 if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
-                const isIntegrated = this.isIntegratedGPUModel(name);
+                // Resolve recent cards that the runtime could only report as a bare
+                // "Device <id>" so they get a real name and correct integrated flag.
+                const mapped = this.resolveMappedGpu(name) || this.resolveMappedGpu(controller?.deviceId);
+                if (mapped) name = mapped.name;
+                const isIntegrated = mapped ? mapped.type === 'integrated' : this.isIntegratedGPUModel(name);
                 let vram = isIntegrated
                     ? this.estimateIntegratedFallbackMemory(controller, memoryInfo)
                     : this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
@@ -711,30 +731,55 @@ class UnifiedDetector {
             if (!isNvidia && !isAMD && !isIntel) continue;
-            const genericName = line
-                .replace(/^[0-9a-f:.]+\s+/i, '')
-                .replace(/\(rev\s+[0-9a-f]+\)$/i, '')
-                .trim();
+            const vendorLabel = isNvidia ? 'NVIDIA' : (isAMD ? 'AMD' : 'Intel');
+            const pciId = this.extractPciDeviceId(line);
+            const mapped = this.resolveMappedGpu(line);
+            // Prefer the resolved model name inside a trailing "[Model] [vvvv:dddd]"
+            // pair (e.g. "[GeForce RTX 4060]"). Otherwise clean the raw lspci line
+            // down to a readable device string instead of using the whole line.
             const bracketName = line.match(/\[(?![0-9a-f]{4}:[0-9a-f]{4}\])([^\]]+)\]\s*\[[0-9a-f]{4}:[0-9a-f]{4}\]/i);
-            const name = (bracketName?.[1] || genericName || 'Unknown GPU').replace(/\s+/g, ' ').trim();
-            if (!name || name.toLowerCase() === 'unknown gpu') continue;
+            let name = (bracketName?.[1] || '').replace(/\s+/g, ' ').trim();
+            if (!name) {
+                name = line
+                    .replace(/^[0-9a-f]{2,4}:[0-9a-f]{2}\.[0-9a-f]\s+/i, '')                  // PCI address
+                    .replace(/^(?:vga compatible|3d|display)\s+controller\s+\[[0-9a-f]{4}\]:\s*/i, '') // class prefix
+                    .replace(/\s*\[[0-9a-f]{4}:[0-9a-f]{4}\]/i, '')                            // [vvvv:dddd]
+                    .replace(/\s*\(rev\s+[0-9a-f]+\)\s*$/i, '')                                // (rev xx)
+                    .replace(/\b(?:corporation|corp\.?|inc\.?|advanced micro devices,?)\b/gi, '')
+                    .replace(/\[amd\/ati\]/gi, '')
+                    .replace(/\s+/g, ' ')
+                    .trim();
+            }
+            // If the card could not be resolved to a real model, give it a stable,
+            // readable name that carries the PCI id so it dedupes across sources.
+            const meaningful = name.replace(/\b(?:nvidia|amd|ati|intel|device|graphics|gpu|controller)\b/gi, '').replace(/[^a-z0-9]/gi, '').trim();
+            if (mapped) {
+                name = mapped.name;
+            } else if (!meaningful) {
+                name = pciId ? `${vendorLabel} Device ${pciId.toUpperCase()}` : `${vendorLabel} GPU`;
+            }
-            const isIntegrated = this.isIntegratedGPUModel(name) || isIntel;
+            const isIntegrated = mapped
+                ? mapped.type === 'integrated'
+                : (this.isIntegratedGPUModel(name) || (isIntel && !/\barc\b/i.test(name)));
             let vram = this.estimateFallbackVRAM(name);
             if (isIntegrated) {
                 vram = 0;
             }
-            const dedupeKey = `${name.toLowerCase()}|${isIntegrated ? 'i' : 'd'}`;
+            const dedupeKey = `${this.getGpuMatchKey(name)}|${isIntegrated ? 'i' : 'd'}`;
             if (seen.has(dedupeKey)) continue;
             seen.add(dedupeKey);
             results.push({
                 name,
-                vendor: isNvidia ? 'NVIDIA' : (isAMD ? 'AMD' : 'Intel'),
+                vendor: vendorLabel,
                 type: isIntegrated ? 'integrated' : 'dedicated',
                 memory: { total: vram },
+                pciId: pciId || null,
                 source: 'lspci'
             });
         }
@@ -746,22 +791,27 @@ class UnifiedDetector {
         const num = Number(value);
         if (!Number.isFinite(num) || num <= 0) return 0;
-        // Bytes -> GB
-        if (num > 1024 * 1024) {
-            return Math.round(num / (1024 * 1024 * 1024));
+        // Unit inference by magnitude, kept consistent with
+        // HardwareDetector.normalizeVRAM so both detection paths agree:
+        //
+        //   > 1e6            -> raw bytes.
+        //   >= 1024          -> megabytes (systeminformation reporting range).
+        //   1 <= v <= 256    -> already gigabytes. The previous "1..80 means GB"
+        //                       band silently returned 0 for legitimate large GB
+        //                       values, so normalizeFallbackVRAM(192) was 0 — the
+        //                       192 GB box in issue #88 collapsed to nothing. A
+        //                       single GPU realistically tops out around 192 GB.
+        //   257 <= v < 1024  -> sub-gigabyte framebuffer in MB -> rounds to 0/1 GB.
+        if (num > 1_000_000) {
+            return Math.max(0, Math.round(num / (1024 * 1024 * 1024))); // bytes -> GB
         }
-        // MB -> GB
         if (num >= 1024) {
-            return Math.round(num / 1024);
+            return Math.max(0, Math.round(num / 1024)); // MB -> GB
         }
-        // Likely already GB
-        if (num >= 1 && num <= 80) {
-            return Math.round(num);
+        if (num <= 256) {
+            return Math.round(num); // already GB (plausible single-GPU range)
         }
-        return 0;
+        return Math.max(0, Math.round(num / 1024)); // 257..1023 MB -> GB
     }
     isIntegratedGPUModel(model) {
@@ -799,6 +849,21 @@ class UnifiedDetector {
         if (lower.includes('rx 6900') || lower.includes('rx 6800')) return 16;
         if (lower.includes('rx 6700')) return 12;
+        // NVIDIA workstation / datacenter (Blackwell / Ada / Hopper / Ampere).
+        // Matched BEFORE the consumer RTX entries and the generic fallbacks so a
+        // high-VRAM professional card is not collapsed to a consumer-tier value or
+        // 0 (issue #88: dual "RTX PRO 6000" must reach ~192GB total, not ~16GB).
+        if (lower.includes('rtx pro 6000') || lower.includes('rtx 6000 blackwell')) return 96;
+        if (lower.includes('rtx 6000 ada') || lower.includes('rtx 5000 ada')) return 48;
+        if (lower.includes('rtx a6000') || lower.includes('a6000')) return 48;
+        if (lower.includes('rtx a5000') || lower.includes('a5000')) return 24;
+        if (lower.includes('l40s') || lower.includes('l40')) return 48;
+        if (lower.includes('h200')) return 141;
+        if (lower.includes('h100')) return 80;
+        if (lower.includes('a100') && (lower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(lower))) return 40;
+        if (lower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
+        if (lower.includes('a40')) return 48;
         if (lower.includes('rtx 5090')) return 32;
         if (lower.includes('rtx 4090') || lower.includes('rtx 3090')) return 24;
         if (lower.includes('rtx 5080') || lower.includes('rtx 4080')) return 16;
@@ -817,6 +882,15 @@ class UnifiedDetector {
             return `${familyMatch[1]}${familyMatch[2]}`;
         }
+        // Different detection sources describe an unresolved card in different
+        // ways for the SAME hardware, e.g. systeminformation "Device 2f04" and
+        // lspci "...Device [10de:2f04]". Key on the PCI device id (mapped to a
+        // canonical family when known) so those collapse to one inventory entry.
+        const pciId = this.extractPciDeviceId(name);
+        if (pciId) {
+            return (PCI_GPU_MAP[pciId] && PCI_GPU_MAP[pciId].family) || `pci:${pciId}`;
+        }
         const concise = lower
             .replace(/nvidia|amd|ati|intel|corporation|geforce|radeon|graphics/g, '')
             .replace(/\s+/g, ' ')
@@ -825,6 +899,26 @@ class UnifiedDetector {
         return concise || lower;
     }
+    /**
+     * Extract a 4-hex PCI device id from a GPU name/description, handling both the
+     * lspci "[vendor:device]" form and the bare "Device <id>" form that
+     * systeminformation emits for cards it cannot name. Returns null when none.
+     */
+    extractPciDeviceId(text) {
+        const value = String(text || '');
+        const bracket = value.match(/\[[0-9a-f]{4}:([0-9a-f]{4})\]/i);
+        if (bracket) return bracket[1].toLowerCase();
+        const bare = value.match(/\bdevice\s+([0-9a-f]{4})\b/i);
+        if (bare) return bare[1].toLowerCase();
+        return null;
+    }
+    /** Look up a curated mapping for a recent card by PCI device id (or null). */
+    resolveMappedGpu(text) {
+        const pciId = this.extractPciDeviceId(text);
+        return pciId && PCI_GPU_MAP[pciId] ? { pciId, ...PCI_GPU_MAP[pciId] } : null;
+    }
     /**
      * Generate hardware fingerprint for benchmarks
      */
@@ -879,9 +973,19 @@ class UnifiedDetector {
             summary.bestBackend === 'metal' ||
             (summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0)
         ) {
-            return sizeGB <= (summary.effectiveMemory - 2);
+            const effectiveMemory = Number(summary.effectiveMemory);
+            if (!Number.isFinite(effectiveMemory) || effectiveMemory <= 0) return false;
+            return sizeGB <= (effectiveMemory - 2);
         } else {
-            const availableVRAM = useMultiGPU ? summary.totalVRAM : (summary.totalVRAM / summary.gpuCount);
+            const totalVRAM = Number(summary.totalVRAM);
+            if (!Number.isFinite(totalVRAM) || totalVRAM <= 0) return false;
+            // Guard the per-GPU divisor: gpuCount can be 0 when the summary was
+            // built without resolved GPU memory, which previously produced
+            // Infinity (totalVRAM / 0) and made any model "fit".
+            const gpuCount = Math.max(1, Number(summary.gpuCount) || 0);
+            const availableVRAM = useMultiGPU ? totalVRAM : (totalVRAM / gpuCount);
+            if (!Number.isFinite(availableVRAM) || availableVRAM <= 0) return false;
             return sizeGB <= (availableVRAM - 2);
         }
     }

package/src/models/ai-check-selector.js CHANGED Viewed

@@ -62,6 +62,25 @@ Respond with JSON only, no additional text.`;
     /**
      * Main AI-Check function
      */
+    /** Normalize the --models option (array, or comma/space-separated string) to a list. */
+    parseModelFilter(models) {
+        if (!models) return [];
+        const list = Array.isArray(models) ? models : String(models).split(/[,\s]+/);
+        return list.map((m) => String(m).trim().toLowerCase()).filter(Boolean);
+    }
+    /** True when an Ollama DB model matches a user-supplied name fragment. */
+    modelMatchesFilter(model, needle) {
+        const identifier = String(model?.model_identifier || '').toLowerCase();
+        const name = String(model?.model_name || '').toLowerCase();
+        return (
+            identifier === needle ||
+            name === needle ||
+            identifier.includes(needle) ||
+            name.includes(needle)
+        );
+    }
     async aiCheck(options = {}) {
         const {
             category = 'general',
@@ -90,11 +109,23 @@ Respond with JSON only, no additional text.`;
         const budget = hardware.gpu.unified ? hardware.usableMemGB :
                      (hardware.gpu.vramGB || hardware.usableMemGB);
-        // Filter models by category first
-        const categoryModels = this.filterOllamaModelsByCategory(allOllamaModels, category);
-        if (!silent) {
-            console.log(chalk.cyan('│') + ` ${categoryModels.length} models match ${category} category`);
+        // Optional explicit model filter (--models qwen2.5,llama3.1). When present
+        // it overrides the category filter: the user asked for specific models.
+        const modelFilter = this.parseModelFilter(options.models);
+        let categoryModels;
+        if (modelFilter.length > 0) {
+            categoryModels = allOllamaModels.filter((model) =>
+                modelFilter.some((needle) => this.modelMatchesFilter(model, needle))
+            );
+            if (!silent) {
+                console.log(chalk.cyan('│') + ` Restricted to ${categoryModels.length} model(s) matching --models`);
+            }
+        } else {
+            // Filter models by category first
+            categoryModels = this.filterOllamaModelsByCategory(allOllamaModels, category);
+            if (!silent) {
+                console.log(chalk.cyan('│') + ` ${categoryModels.length} models match ${category} category`);
+            }
         }
         // Evaluate each model using deterministic scoring

package/src/models/deterministic-selector.js CHANGED Viewed

@@ -1556,10 +1556,21 @@ class DeterministicModelSelector {
         const S = speedEstimate.score;
         const F = this.calculateFitScore(requiredGB, budget);
         const C = this.calculateContextScore(model, targetCtx);
+        const capacityAdjustment = this.calculateHighCapacitySizeAdjustment(
+            hardware,
+            model,
+            budget,
+            category,
+            optimizeFor
+        );
         // 4. Calculate final weighted score
         const weights = this.getScoringWeights(category, optimizeFor);
-        const score = Math.round((Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
+        const weightedScore = Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3];
+        const score = Math.max(
+            0,
+            Math.min(100, Math.round((weightedScore + capacityAdjustment.score) * 10) / 10)
+        );
         // 5. Build rationale
         const rationale = this.buildRationale(
@@ -1572,7 +1583,8 @@ class DeterministicModelSelector {
             Q,
             S,
             memoryEstimate,
-            speedEstimate
+            speedEstimate,
+            capacityAdjustment
         );
         return {
@@ -1599,7 +1611,8 @@ class DeterministicModelSelector {
                 runtime: speedEstimate.runtime,
                 moe: speedEstimate.moe
             },
-            components: { Q, S, F, C }
+            components: { Q, S, F, C, H: capacityAdjustment.score },
+            optimizeFor
         };
     }
@@ -1858,6 +1871,9 @@ class DeterministicModelSelector {
         if (hardware.cpu.cores >= 8) base *= 1.1;
         if (hardware.acceleration.supports_metal || hardware.acceleration.supports_cuda) base *= 1.2;
+        const acceleratorScale = this.calculateAcceleratorSpeedScale(hardware, backend);
+        base *= acceleratorScale.multiplier;
         const normalizedRuntime = normalizeMoERuntime(runtime);
         const moe = estimateMoESpeedMultiplier({
             model,
@@ -1880,7 +1896,46 @@ class DeterministicModelSelector {
             estimatedTPS,
             score,
             runtime: normalizedRuntime,
-            moe
+            moe,
+            acceleratorScale
+        };
+    }
+    calculateAcceleratorSpeedScale(hardware = {}, backend = 'cpu_x86') {
+        if (backend !== 'cuda' && backend !== 'metal') {
+            return { multiplier: 1, reason: null };
+        }
+        const gpu = hardware.gpu || {};
+        const memory = hardware.memory || {};
+        const toFiniteNumber = (value, fallback = 0) => {
+            const parsed = Number(value);
+            return Number.isFinite(parsed) ? parsed : fallback;
+        };
+        const vramGB = toFiniteNumber(gpu.vramGB ?? gpu.vram ?? gpu.totalVRAM, 0);
+        const ramGB = toFiniteNumber(memory.totalGB ?? memory.total, 0);
+        const acceleratorMemoryGB = backend === 'metal' && Boolean(gpu.unified)
+            ? Math.max(vramGB, ramGB)
+            : vramGB;
+        const gpuCount = Math.max(1, toFiniteNumber(gpu.gpuCount ?? gpu.count, 1));
+        let multiplier = 1;
+        if (acceleratorMemoryGB >= 160) multiplier *= 3.2;
+        else if (acceleratorMemoryGB >= 96) multiplier *= 2.6;
+        else if (acceleratorMemoryGB >= 80) multiplier *= 2.2;
+        else if (acceleratorMemoryGB >= 48) multiplier *= 1.7;
+        else if (acceleratorMemoryGB >= 24) multiplier *= 1.15;
+        if (backend === 'cuda' && gpuCount > 1) {
+            multiplier *= Math.min(1.8, 1 + ((gpuCount - 1) * 0.25));
+        }
+        const rounded = Math.round(multiplier * 100) / 100;
+        return {
+            multiplier: rounded,
+            reason: rounded > 1
+                ? `${backend.toUpperCase()} capacity x${rounded}`
+                : null
         };
     }
@@ -1888,13 +1943,79 @@ class DeterministicModelSelector {
         const ratio = requiredGB / budgetGB;
         if (ratio <= 0.9) return 100;
         if (ratio <= 1.0) return 70;
-        return 0; // Should be filtered out earlier
+        return 0; // Unreachable in practice: evaluateModel drops requiredGB > budget.
     }
     calculateContextScore(model, targetCtx) {
-        if (model.ctxMax >= targetCtx) return 100;
-        if (model.ctxMax >= targetCtx * 0.5) return 70;
-        return 0; // Should be filtered out earlier
+        const ctxMax = Number(model?.ctxMax) || 0;
+        if (ctxMax >= targetCtx) return 100;
+        if (ctxMax >= targetCtx * 0.5) return 70;
+        // Context is NOT pre-filtered: a model that cannot serve the requested
+        // context still scores here (0 for this component) and stays eligible,
+        // weighted down rather than excluded.
+        return 0;
+    }
+    getHighCapacitySizeTarget(budgetGB, hardware = {}) {
+        if (!Number.isFinite(budgetGB) || budgetGB < 32) return null;
+        const isMultiGPU = Boolean(hardware?.gpu?.isMultiGPU);
+        if (budgetGB >= 128) return { minParamsB: 30, sweetSpotParamsB: 70 };
+        if (budgetGB >= 80) return { minParamsB: 30, sweetSpotParamsB: 70 };
+        if (budgetGB >= 48) return { minParamsB: 20, sweetSpotParamsB: 34 };
+        if (budgetGB >= 32 && isMultiGPU) return { minParamsB: 30, sweetSpotParamsB: 30 };
+        if (budgetGB >= 32) return { minParamsB: 13, sweetSpotParamsB: 30 };
+        return null;
+    }
+    calculateHighCapacitySizeAdjustment(hardware, model, budgetGB, category, optimizeFor = 'balanced') {
+        const objective = this.normalizeOptimizationObjective(optimizeFor);
+        if (objective === 'speed' || category === 'embeddings') {
+            return { score: 0, reason: null };
+        }
+        const normalizedHardware = this.normalizeHardwareProfile(hardware || {});
+        const tier = this.mapHardwareTier(normalizedHardware);
+        const highCapacityTiers = new Set(['very_high', 'ultra_high', 'extreme', 'flagship']);
+        const target = this.getHighCapacitySizeTarget(budgetGB, normalizedHardware);
+        const hasHighCapacitySignal =
+            Boolean(target) ||
+            highCapacityTiers.has(tier) ||
+            Number(normalizedHardware?.gpu?.vramGB || 0) >= 48;
+        if (!hasHighCapacitySignal || !target) {
+            return { score: 0, reason: null };
+        }
+        const params = this.parseBillionsValue(model?.paramsB);
+        if (!Number.isFinite(params) || params <= 0) {
+            return { score: 0, reason: null };
+        }
+        const categoryMultiplier = category === 'multimodal' ? 0.6 : 1;
+        if (params < target.minParamsB) {
+            const deficitRatio = (target.minParamsB - params) / target.minParamsB;
+            const penalty = -Math.min(24, deficitRatio * 24) * categoryMultiplier;
+            const roundedPenalty = Math.round(penalty * 10) / 10;
+            return {
+                score: roundedPenalty,
+                reason: `below ${target.minParamsB}B high-capacity floor`
+            };
+        }
+        const distanceRatio = Math.min(
+            1,
+            Math.abs(params - target.sweetSpotParamsB) / target.sweetSpotParamsB
+        );
+        const bonus = Math.max(0, 12 * (1 - distanceRatio)) * categoryMultiplier;
+        const roundedBonus = Math.round(bonus * 10) / 10;
+        return {
+            score: roundedBonus,
+            reason: roundedBonus > 0
+                ? `${target.sweetSpotParamsB}B high-capacity target`
+                : null
+        };
     }
     estimatePracticalMaxParamsForBudget(budgetGB) {
@@ -1994,7 +2115,19 @@ class DeterministicModelSelector {
         return highCapacityPromoted;
     }
-    buildRationale(hardware, model, quant, requiredGB, budget, category, Q, S, memoryEstimate = null, speedEstimate = null) {
+    buildRationale(
+        hardware,
+        model,
+        quant,
+        requiredGB,
+        budget,
+        category,
+        Q,
+        S,
+        memoryEstimate = null,
+        speedEstimate = null,
+        capacityAdjustment = null
+    ) {
         const parts = [];
         // Memory fit
@@ -2027,6 +2160,14 @@ class DeterministicModelSelector {
             const multiplier = Number(speedEstimate.moe.multiplier || 1).toFixed(2);
             parts.push(`MoE speed x${multiplier} (${runtimeLabel})`);
         }
+        if (speedEstimate?.acceleratorScale?.multiplier > 1) {
+            parts.push(speedEstimate.acceleratorScale.reason);
+        }
+        if (capacityAdjustment?.reason) {
+            parts.push(capacityAdjustment.reason);
+        }
         // Size sweet spot
         if (model.paramsB >= 7 && model.paramsB <= 13) {
@@ -2114,14 +2255,21 @@ class DeterministicModelSelector {
     updateCandidateWithMeasuredSpeed(candidate, measuredTPS, category) {
         const normalizedS = this.normalizeTPSToScore(measuredTPS, category);
-        // Recalculate final score with measured speed
-        const weights = this.categoryWeights[category];
-        const { Q, F, C } = candidate.components;
+        // Re-score with the measured speed using the SAME weighting source as
+        // evaluateModel: getScoringWeights honours the user's optimizeFor profile and
+        // falls back to the general weights for categories (e.g. 'talking') that have
+        // no entry in DETERMINISTIC_WEIGHTS — indexing this.categoryWeights[category]
+        // directly threw a TypeError for those. We also re-add the stored capacity
+        // adjustment (H) and clamp, so a probed score stays comparable to a
+        // non-probed one instead of being silently lower.
+        const weights = this.getScoringWeights(category, candidate.optimizeFor || 'balanced');
+        const { Q, F, C, H = 0 } = candidate.components;
         candidate.estTPS = measuredTPS;
         candidate.components.S = normalizedS;
-        candidate.score = Math.round((Q * weights[0] + normalizedS * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
+        const weighted = Q * weights[0] + normalizedS * weights[1] + F * weights[2] + C * weights[3];
+        candidate.score = Math.max(0, Math.min(100, Math.round((weighted + H) * 10) / 10));
     }
     normalizeTPSToScore(tps, category) {

package/src/models/expanded_database.js CHANGED Viewed

@@ -1007,18 +1007,22 @@ class ExpandedModelsDatabase {
     }
     estimateMemoryUsage(model) {
-        const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
+        // Derive footprint from parameter count, not by stripping the unit off the
+        // size string and treating the bare number as gigabytes — that read a 774M
+        // model ("774M") as ~774 GB and a 22M model as ~22 GB. ~0.7 GB per 1B params
+        // is a reasonable quantized-runtime footprint baseline.
+        const sizeGB = this.extractModelParams(model) * 0.7;
         // Rough estimates including model loading overhead
         return {
-            minimal: Math.round(sizeGB * 1.2), // With quantization
-            typical: Math.round(sizeGB * 1.5), // Standard loading
-            maximum: Math.round(sizeGB * 2.0)  // With full context
+            minimal: Math.max(1, Math.round(sizeGB * 1.2)), // With quantization
+            typical: Math.max(1, Math.round(sizeGB * 1.5)), // Standard loading
+            maximum: Math.max(1, Math.round(sizeGB * 2.0))  // With full context
         };
     }
     estimatePowerConsumption(model, hardware) {
-        const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
+        const sizeGB = this.extractModelParams(model) * 0.7;
         const tier = this.getHardwareTier(hardware);
         const basePower = {