npm - llm-checker - Versions diffs - 3.5.15 → 3.7.0 - Mend

llm-checker 3.5.15 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +28 -8
package/analyzer/compatibility.js +5 -0
package/analyzer/performance.js +5 -4
package/bin/cli.js +5 -39
package/bin/enhanced_cli.js +449 -24
package/bin/mcp-server.mjs +266 -101
package/package.json +13 -8
package/src/ai/multi-objective-selector.js +118 -11
package/src/calibration/calibration-manager.js +4 -1
package/src/data/model-database.js +489 -5
package/src/data/registry-ingestors.js +751 -0
package/src/data/registry-recommender.js +514 -0
package/src/data/seed/README.md +11 -3
package/src/data/seed/models.db +0 -0
package/src/data/sync-manager.js +32 -18
package/src/hardware/backends/apple-silicon.js +5 -1
package/src/hardware/backends/cuda-detector.js +47 -19
package/src/hardware/backends/intel-detector.js +6 -2
package/src/hardware/backends/rocm-detector.js +6 -2
package/src/hardware/detector.js +57 -30
package/src/hardware/unified-detector.js +129 -25
package/src/index.js +68 -4
package/src/models/ai-check-selector.js +36 -5
package/src/models/deterministic-selector.js +179 -18
package/src/models/expanded_database.js +9 -5
package/src/models/intelligent-selector.js +87 -1
package/src/models/moe-assumptions.js +11 -0
package/src/models/requirements.js +16 -11
package/src/models/scoring-core.js +341 -0
package/src/models/scoring-engine.js +9 -2
package/src/ollama/capacity-planner.js +15 -2
package/src/ollama/client.js +70 -30
package/src/ollama/enhanced-client.js +20 -2
package/src/ollama/manager.js +14 -2
package/src/policy/cli-policy.js +8 -2
package/src/policy/policy-engine.js +2 -1
package/src/provenance/model-provenance.js +4 -1
package/src/ui/cli-theme.js +47 -7
package/src/ui/interactive-panel.js +162 -24

package/src/models/ai-check-selector.js CHANGED Viewed

@@ -62,6 +62,25 @@ Respond with JSON only, no additional text.`;
     /**
      * Main AI-Check function
      */
+    /** Normalize the --models option (array, or comma/space-separated string) to a list. */
+    parseModelFilter(models) {
+        if (!models) return [];
+        const list = Array.isArray(models) ? models : String(models).split(/[,\s]+/);
+        return list.map((m) => String(m).trim().toLowerCase()).filter(Boolean);
+    }
+    /** True when an Ollama DB model matches a user-supplied name fragment. */
+    modelMatchesFilter(model, needle) {
+        const identifier = String(model?.model_identifier || '').toLowerCase();
+        const name = String(model?.model_name || '').toLowerCase();
+        return (
+            identifier === needle ||
+            name === needle ||
+            identifier.includes(needle) ||
+            name.includes(needle)
+        );
+    }
     async aiCheck(options = {}) {
         const {
             category = 'general',
@@ -90,11 +109,23 @@ Respond with JSON only, no additional text.`;
         const budget = hardware.gpu.unified ? hardware.usableMemGB :
                      (hardware.gpu.vramGB || hardware.usableMemGB);
-        // Filter models by category first
-        const categoryModels = this.filterOllamaModelsByCategory(allOllamaModels, category);
-        if (!silent) {
-            console.log(chalk.cyan('│') + ` ${categoryModels.length} models match ${category} category`);
+        // Optional explicit model filter (--models qwen2.5,llama3.1). When present
+        // it overrides the category filter: the user asked for specific models.
+        const modelFilter = this.parseModelFilter(options.models);
+        let categoryModels;
+        if (modelFilter.length > 0) {
+            categoryModels = allOllamaModels.filter((model) =>
+                modelFilter.some((needle) => this.modelMatchesFilter(model, needle))
+            );
+            if (!silent) {
+                console.log(chalk.cyan('│') + ` Restricted to ${categoryModels.length} model(s) matching --models`);
+            }
+        } else {
+            // Filter models by category first
+            categoryModels = this.filterOllamaModelsByCategory(allOllamaModels, category);
+            if (!silent) {
+                console.log(chalk.cyan('│') + ` ${categoryModels.length} models match ${category} category`);
+            }
         }
         // Evaluate each model using deterministic scoring

package/src/models/deterministic-selector.js CHANGED Viewed

@@ -1556,10 +1556,21 @@ class DeterministicModelSelector {
         const S = speedEstimate.score;
         const F = this.calculateFitScore(requiredGB, budget);
         const C = this.calculateContextScore(model, targetCtx);
+        const capacityAdjustment = this.calculateHighCapacitySizeAdjustment(
+            hardware,
+            model,
+            budget,
+            category,
+            optimizeFor
+        );
         // 4. Calculate final weighted score
         const weights = this.getScoringWeights(category, optimizeFor);
-        const score = Math.round((Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
+        const weightedScore = Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3];
+        const score = Math.max(
+            0,
+            Math.min(100, Math.round((weightedScore + capacityAdjustment.score) * 10) / 10)
+        );
         // 5. Build rationale
         const rationale = this.buildRationale(
@@ -1572,7 +1583,8 @@ class DeterministicModelSelector {
             Q,
             S,
             memoryEstimate,
-            speedEstimate
+            speedEstimate,
+            capacityAdjustment
         );
         return {
@@ -1599,7 +1611,8 @@ class DeterministicModelSelector {
                 runtime: speedEstimate.runtime,
                 moe: speedEstimate.moe
             },
-            components: { Q, S, F, C }
+            components: { Q, S, F, C, H: capacityAdjustment.score },
+            optimizeFor
         };
     }
@@ -1858,6 +1871,9 @@ class DeterministicModelSelector {
         if (hardware.cpu.cores >= 8) base *= 1.1;
         if (hardware.acceleration.supports_metal || hardware.acceleration.supports_cuda) base *= 1.2;
+        const acceleratorScale = this.calculateAcceleratorSpeedScale(hardware, backend);
+        base *= acceleratorScale.multiplier;
         const normalizedRuntime = normalizeMoERuntime(runtime);
         const moe = estimateMoESpeedMultiplier({
             model,
@@ -1880,7 +1896,46 @@ class DeterministicModelSelector {
             estimatedTPS,
             score,
             runtime: normalizedRuntime,
-            moe
+            moe,
+            acceleratorScale
+        };
+    }
+    calculateAcceleratorSpeedScale(hardware = {}, backend = 'cpu_x86') {
+        if (backend !== 'cuda' && backend !== 'metal') {
+            return { multiplier: 1, reason: null };
+        }
+        const gpu = hardware.gpu || {};
+        const memory = hardware.memory || {};
+        const toFiniteNumber = (value, fallback = 0) => {
+            const parsed = Number(value);
+            return Number.isFinite(parsed) ? parsed : fallback;
+        };
+        const vramGB = toFiniteNumber(gpu.vramGB ?? gpu.vram ?? gpu.totalVRAM, 0);
+        const ramGB = toFiniteNumber(memory.totalGB ?? memory.total, 0);
+        const acceleratorMemoryGB = backend === 'metal' && Boolean(gpu.unified)
+            ? Math.max(vramGB, ramGB)
+            : vramGB;
+        const gpuCount = Math.max(1, toFiniteNumber(gpu.gpuCount ?? gpu.count, 1));
+        let multiplier = 1;
+        if (acceleratorMemoryGB >= 160) multiplier *= 3.2;
+        else if (acceleratorMemoryGB >= 96) multiplier *= 2.6;
+        else if (acceleratorMemoryGB >= 80) multiplier *= 2.2;
+        else if (acceleratorMemoryGB >= 48) multiplier *= 1.7;
+        else if (acceleratorMemoryGB >= 24) multiplier *= 1.15;
+        if (backend === 'cuda' && gpuCount > 1) {
+            multiplier *= Math.min(1.8, 1 + ((gpuCount - 1) * 0.25));
+        }
+        const rounded = Math.round(multiplier * 100) / 100;
+        return {
+            multiplier: rounded,
+            reason: rounded > 1
+                ? `${backend.toUpperCase()} capacity x${rounded}`
+                : null
         };
     }
@@ -1888,13 +1943,79 @@ class DeterministicModelSelector {
         const ratio = requiredGB / budgetGB;
         if (ratio <= 0.9) return 100;
         if (ratio <= 1.0) return 70;
-        return 0; // Should be filtered out earlier
+        return 0; // Unreachable in practice: evaluateModel drops requiredGB > budget.
     }
     calculateContextScore(model, targetCtx) {
-        if (model.ctxMax >= targetCtx) return 100;
-        if (model.ctxMax >= targetCtx * 0.5) return 70;
-        return 0; // Should be filtered out earlier
+        const ctxMax = Number(model?.ctxMax) || 0;
+        if (ctxMax >= targetCtx) return 100;
+        if (ctxMax >= targetCtx * 0.5) return 70;
+        // Context is NOT pre-filtered: a model that cannot serve the requested
+        // context still scores here (0 for this component) and stays eligible,
+        // weighted down rather than excluded.
+        return 0;
+    }
+    getHighCapacitySizeTarget(budgetGB, hardware = {}) {
+        if (!Number.isFinite(budgetGB) || budgetGB < 32) return null;
+        const isMultiGPU = Boolean(hardware?.gpu?.isMultiGPU);
+        if (budgetGB >= 128) return { minParamsB: 30, sweetSpotParamsB: 70 };
+        if (budgetGB >= 80) return { minParamsB: 30, sweetSpotParamsB: 70 };
+        if (budgetGB >= 48) return { minParamsB: 20, sweetSpotParamsB: 34 };
+        if (budgetGB >= 32 && isMultiGPU) return { minParamsB: 30, sweetSpotParamsB: 30 };
+        if (budgetGB >= 32) return { minParamsB: 13, sweetSpotParamsB: 30 };
+        return null;
+    }
+    calculateHighCapacitySizeAdjustment(hardware, model, budgetGB, category, optimizeFor = 'balanced') {
+        const objective = this.normalizeOptimizationObjective(optimizeFor);
+        if (objective === 'speed' || category === 'embeddings') {
+            return { score: 0, reason: null };
+        }
+        const normalizedHardware = this.normalizeHardwareProfile(hardware || {});
+        const tier = this.mapHardwareTier(normalizedHardware);
+        const highCapacityTiers = new Set(['very_high', 'ultra_high', 'extreme', 'flagship']);
+        const target = this.getHighCapacitySizeTarget(budgetGB, normalizedHardware);
+        const hasHighCapacitySignal =
+            Boolean(target) ||
+            highCapacityTiers.has(tier) ||
+            Number(normalizedHardware?.gpu?.vramGB || 0) >= 48;
+        if (!hasHighCapacitySignal || !target) {
+            return { score: 0, reason: null };
+        }
+        const params = this.parseBillionsValue(model?.paramsB);
+        if (!Number.isFinite(params) || params <= 0) {
+            return { score: 0, reason: null };
+        }
+        const categoryMultiplier = category === 'multimodal' ? 0.6 : 1;
+        if (params < target.minParamsB) {
+            const deficitRatio = (target.minParamsB - params) / target.minParamsB;
+            const penalty = -Math.min(24, deficitRatio * 24) * categoryMultiplier;
+            const roundedPenalty = Math.round(penalty * 10) / 10;
+            return {
+                score: roundedPenalty,
+                reason: `below ${target.minParamsB}B high-capacity floor`
+            };
+        }
+        const distanceRatio = Math.min(
+            1,
+            Math.abs(params - target.sweetSpotParamsB) / target.sweetSpotParamsB
+        );
+        const bonus = Math.max(0, 12 * (1 - distanceRatio)) * categoryMultiplier;
+        const roundedBonus = Math.round(bonus * 10) / 10;
+        return {
+            score: roundedBonus,
+            reason: roundedBonus > 0
+                ? `${target.sweetSpotParamsB}B high-capacity target`
+                : null
+        };
     }
     estimatePracticalMaxParamsForBudget(budgetGB) {
@@ -1994,7 +2115,19 @@ class DeterministicModelSelector {
         return highCapacityPromoted;
     }
-    buildRationale(hardware, model, quant, requiredGB, budget, category, Q, S, memoryEstimate = null, speedEstimate = null) {
+    buildRationale(
+        hardware,
+        model,
+        quant,
+        requiredGB,
+        budget,
+        category,
+        Q,
+        S,
+        memoryEstimate = null,
+        speedEstimate = null,
+        capacityAdjustment = null
+    ) {
         const parts = [];
         // Memory fit
@@ -2027,6 +2160,14 @@ class DeterministicModelSelector {
             const multiplier = Number(speedEstimate.moe.multiplier || 1).toFixed(2);
             parts.push(`MoE speed x${multiplier} (${runtimeLabel})`);
         }
+        if (speedEstimate?.acceleratorScale?.multiplier > 1) {
+            parts.push(speedEstimate.acceleratorScale.reason);
+        }
+        if (capacityAdjustment?.reason) {
+            parts.push(capacityAdjustment.reason);
+        }
         // Size sweet spot
         if (model.paramsB >= 7 && model.paramsB <= 13) {
@@ -2114,14 +2255,21 @@ class DeterministicModelSelector {
     updateCandidateWithMeasuredSpeed(candidate, measuredTPS, category) {
         const normalizedS = this.normalizeTPSToScore(measuredTPS, category);
-        // Recalculate final score with measured speed
-        const weights = this.categoryWeights[category];
-        const { Q, F, C } = candidate.components;
+        // Re-score with the measured speed using the SAME weighting source as
+        // evaluateModel: getScoringWeights honours the user's optimizeFor profile and
+        // falls back to the general weights for categories (e.g. 'talking') that have
+        // no entry in DETERMINISTIC_WEIGHTS — indexing this.categoryWeights[category]
+        // directly threw a TypeError for those. We also re-add the stored capacity
+        // adjustment (H) and clamp, so a probed score stays comparable to a
+        // non-probed one instead of being silently lower.
+        const weights = this.getScoringWeights(category, candidate.optimizeFor || 'balanced');
+        const { Q, F, C, H = 0 } = candidate.components;
         candidate.estTPS = measuredTPS;
         candidate.components.S = normalizedS;
-        candidate.score = Math.round((Q * weights[0] + normalizedS * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
+        const weighted = Q * weights[0] + normalizedS * weights[1] + F * weights[2] + C * weights[3];
+        candidate.score = Math.max(0, Math.min(100, Math.round((weighted + H) * 10) / 10));
     }
     normalizeTPSToScore(tps, category) {
@@ -2194,6 +2342,9 @@ class DeterministicModelSelector {
             estimatedRAM: candidate.requiredGB,
             reasoning: candidate.rationale,
             runtime: candidate.runtime || candidate.speed?.runtime || 'ollama',
+            installCommand: candidate.meta.installCommand || provenance.install_command || '',
+            downloadUrl: candidate.meta.downloadUrl || provenance.download_url || '',
+            artifactFormat: candidate.meta.artifact?.format || '',
             memoryAssumptionSource: candidate.memory?.assumptionSource || 'dense_params',
             speedAssumptions: candidate.speed?.moe ? {
                 applied: Boolean(candidate.speed.moe.applied),
@@ -2375,19 +2526,24 @@ class DeterministicModelSelector {
         Object.entries(recommendations).forEach(([category, data]) => {
             const bestModel = data.bestModels[0];
             if (bestModel) {
+                const command = bestModel.installCommand ||
+                    bestModel.provenance?.install_command ||
+                    `ollama pull ${bestModel.model_identifier}`;
                 summary.by_category[category] = {
                     name: bestModel.model_name || bestModel.name,
                     identifier: bestModel.model_identifier,
                     score: Math.round(bestModel.categoryScore || bestModel.score),
-                    command: `ollama pull ${bestModel.model_identifier}`,
+                    command,
                     size: this.formatModelSize(bestModel),
                     quantization: bestModel.quantization || bestModel.quant || 'Q4_K_M',
+                    runtime: bestModel.runtime || bestModel.provenance?.runtime || 'ollama',
                     pulls: bestModel.pulls || 0,
                     source: bestModel.source || bestModel.provenance?.source || 'unknown',
                     registry: bestModel.registry || bestModel.provenance?.registry || 'unknown',
                     version: bestModel.version || bestModel.provenance?.version || 'unknown',
                     license: bestModel.license || bestModel.provenance?.license || 'unknown',
                     digest: bestModel.digest || bestModel.provenance?.digest || 'unknown',
+                    download_url: bestModel.downloadUrl || bestModel.provenance?.download_url || '',
                     provenance: bestModel.provenance || {
                         source: bestModel.source || 'unknown',
                         registry: bestModel.registry || 'unknown',
@@ -2397,7 +2553,7 @@ class DeterministicModelSelector {
                     }
                 };
-                summary.quick_commands.push(`ollama pull ${bestModel.model_identifier}`);
+                summary.quick_commands.push(command);
                 const isGeneralCategory = ['general', 'coding', 'talking', 'reading'].includes(category);
                 const score = bestModel.categoryScore || bestModel.score || 0;
@@ -2411,18 +2567,23 @@ class DeterministicModelSelector {
         });
         if (bestOverallModel) {
+            const command = bestOverallModel.installCommand ||
+                bestOverallModel.provenance?.install_command ||
+                `ollama pull ${bestOverallModel.model_identifier}`;
             summary.best_overall = {
                 name: bestOverallModel.model_name || bestOverallModel.name,
                 identifier: bestOverallModel.model_identifier,
                 category: bestOverallCategory,
                 score: Math.round(bestOverallScore),
-                command: `ollama pull ${bestOverallModel.model_identifier}`,
+                command,
                 quantization: bestOverallModel.quantization || bestOverallModel.quant || 'Q4_K_M',
+                runtime: bestOverallModel.runtime || bestOverallModel.provenance?.runtime || 'ollama',
                 source: bestOverallModel.source || bestOverallModel.provenance?.source || 'unknown',
                 registry: bestOverallModel.registry || bestOverallModel.provenance?.registry || 'unknown',
                 version: bestOverallModel.version || bestOverallModel.provenance?.version || 'unknown',
                 license: bestOverallModel.license || bestOverallModel.provenance?.license || 'unknown',
                 digest: bestOverallModel.digest || bestOverallModel.provenance?.digest || 'unknown',
+                download_url: bestOverallModel.downloadUrl || bestOverallModel.provenance?.download_url || '',
                 provenance: bestOverallModel.provenance || {
                     source: bestOverallModel.source || 'unknown',
                     registry: bestOverallModel.registry || 'unknown',

package/src/models/expanded_database.js CHANGED Viewed

@@ -1007,18 +1007,22 @@ class ExpandedModelsDatabase {
     }
     estimateMemoryUsage(model) {
-        const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
+        // Derive footprint from parameter count, not by stripping the unit off the
+        // size string and treating the bare number as gigabytes — that read a 774M
+        // model ("774M") as ~774 GB and a 22M model as ~22 GB. ~0.7 GB per 1B params
+        // is a reasonable quantized-runtime footprint baseline.
+        const sizeGB = this.extractModelParams(model) * 0.7;
         // Rough estimates including model loading overhead
         return {
-            minimal: Math.round(sizeGB * 1.2), // With quantization
-            typical: Math.round(sizeGB * 1.5), // Standard loading
-            maximum: Math.round(sizeGB * 2.0)  // With full context
+            minimal: Math.max(1, Math.round(sizeGB * 1.2)), // With quantization
+            typical: Math.max(1, Math.round(sizeGB * 1.5)), // Standard loading
+            maximum: Math.max(1, Math.round(sizeGB * 2.0))  // With full context
         };
     }
     estimatePowerConsumption(model, hardware) {
-        const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
+        const sizeGB = this.extractModelParams(model) * 0.7;
         const tier = this.getHardwareTier(hardware);
         const basePower = {

package/src/models/intelligent-selector.js CHANGED Viewed

@@ -10,6 +10,7 @@ const ScoringEngine = require('./scoring-engine');
 const UnifiedDetector = require('../hardware/unified-detector');
 const PolicyManager = require('../policy/policy-manager');
 const PolicyEngine = require('../policy/policy-engine');
+const { rankModels } = require('./scoring-core');
 function isPlainObject(value) {
     return typeof value === 'object' && value !== null && !Array.isArray(value);
@@ -66,7 +67,9 @@ class IntelligentSelector {
         // Apply filters
         const filtered = this.applyFilters(variants, opts, hardware);
-        // Score all filtered variants
+        // Score all filtered variants. ScoringEngine still produces the
+        // per-variant `score` objects (final/components/meta) consumed by the
+        // smart-recommend display, but the RANKING is unified below.
         const scored = this.scoring.filterAndScore(filtered, hardware, {
             useCase: opts.useCase,
             targetContext: opts.targetContext,
@@ -75,6 +78,12 @@ class IntelligentSelector {
             headroom: opts.headroom || 2
         });
+        // Unify ranking with the canonical scoring core (issue #88): re-order
+        // the scored list and rewrite each item's final score using the shared
+        // DeterministicModelSelector so smart-recommend agrees with
+        // `check`/`recommend` and inherits the PR #89 high-capacity floor.
+        await this.applyUnifiedRanking(scored, hardware, opts);
         const policyEngine = this.resolvePolicyEngine(opts);
         const scoredWithPolicy = policyEngine.evaluateScoredVariants(
             scored,
@@ -114,6 +123,83 @@ class IntelligentSelector {
         };
     }
+    /**
+     * Re-rank the ScoringEngine-scored variants using the canonical scoring
+     * core so smart-recommend's ordering and headline scores match
+     * `check`/`recommend` and inherit the high-capacity right-sizing floor.
+     *
+     * Mutates `scored` in place: it is sorted by the unified score and each
+     * item's `score.final` is overwritten with the canonical 0-100 score.
+     * Component/meta sub-scores are left intact so the existing display (which
+     * shows Q/S/F and estimated TPS) keeps working. If the core cannot rank a
+     * variant (or throws), that item keeps its original ScoringEngine score and
+     * sorts after the unified ones, preserving a sensible fallback ordering.
+     */
+    async applyUnifiedRanking(scored, hardware, opts = {}) {
+        if (!Array.isArray(scored) || scored.length === 0) return scored;
+        let ranking;
+        try {
+            ranking = await rankModels(
+                scored.map((item) => item.variant),
+                hardware,
+                {
+                    category: opts.useCase || 'general',
+                    optimizeFor: opts.optimizeFor || opts.optimize || 'balanced',
+                    runtime: opts.runtime || 'ollama',
+                    topN: scored.length
+                }
+            );
+        } catch (error) {
+            return scored; // Defensive: keep original ScoringEngine ordering.
+        }
+        if (!ranking || !Array.isArray(ranking.candidates)) return scored;
+        // Map each source variant -> its unified score + ordering index.
+        const unifiedByVariant = new Map();
+        ranking.candidates.forEach((candidate, index) => {
+            const source = candidate?.meta?.__source;
+            if (!source) return;
+            unifiedByVariant.set(source, {
+                unifiedScore: Math.round(candidate.score * 10) / 10,
+                rank: index,
+                quant: candidate.quant,
+                estimatedTPS: candidate.estTPS
+            });
+        });
+        for (const item of scored) {
+            const unified = unifiedByVariant.get(item.variant);
+            if (!unified) {
+                // Not ranked by the core (e.g. filtered out): sort last and tag
+                // so any downstream tie-breaks are deterministic.
+                item.__unifiedRank = Number.MAX_SAFE_INTEGER;
+                continue;
+            }
+            item.__unifiedRank = unified.rank;
+            if (item.score) {
+                item.score.final = Math.min(100, Math.max(0, Math.round(unified.unifiedScore)));
+                if (item.score.meta) {
+                    item.score.meta.unifiedScore = unified.unifiedScore;
+                }
+            }
+        }
+        scored.sort((a, b) => {
+            const ra = Number.isFinite(a.__unifiedRank) ? a.__unifiedRank : Number.MAX_SAFE_INTEGER;
+            const rb = Number.isFinite(b.__unifiedRank) ? b.__unifiedRank : Number.MAX_SAFE_INTEGER;
+            if (ra !== rb) return ra - rb;
+            return (b.score?.final || 0) - (a.score?.final || 0);
+        });
+        for (const item of scored) {
+            delete item.__unifiedRank;
+        }
+        return scored;
+    }
     /**
      * Resolve policy engine from explicit options, in-memory policy, or policy file.
      */

package/src/models/moe-assumptions.js CHANGED Viewed

@@ -24,6 +24,14 @@ const MOE_RUNTIME_PROFILES = Object.freeze({
         maxEffectiveGain: 2.65,
         notes: ['optimized scheduler', 'better expert batching', 'lower offload pressure']
     }),
+    transformers: Object.freeze({
+        runtime: 'transformers',
+        routingOverhead: 0.15,
+        communicationOverhead: 0.10,
+        offloadOverhead: 0.06,
+        maxEffectiveGain: 2.45,
+        notes: ['general Hugging Face path', 'broad architecture support', 'higher Python overhead than vLLM']
+    }),
     mlx: Object.freeze({
         runtime: 'mlx',
         routingOverhead: 0.16,
@@ -45,6 +53,9 @@ const MOE_RUNTIME_PROFILES = Object.freeze({
 const RUNTIME_ALIASES = Object.freeze({
     ollama: 'ollama',
     vllm: 'vllm',
+    transformers: 'transformers',
+    'huggingface-transformers': 'transformers',
+    hf: 'transformers',
     mlx: 'mlx',
     'mlx-lm': 'mlx',
     mlx_lm: 'mlx',

package/src/models/requirements.js CHANGED Viewed

@@ -134,17 +134,22 @@ class RequirementsCalculator {
     }
     parseModelSize(sizeString) {
-        const normalized = sizeString.toLowerCase().replace(/[^0-9.kmb]/g, '');
-        if (normalized.includes('k')) {
-            return parseFloat(normalized.replace('k', '')) / 1000;
-        } else if (normalized.includes('m')) {
-            return parseFloat(normalized.replace('m', '')) / 1000;
-        } else if (normalized.includes('b')) {
-            return parseFloat(normalized.replace('b', ''));
-        } else {
-            return parseFloat(normalized);
-        }
+        // Anchor the number to its unit instead of globally stripping every char
+        // that isn't 0-9.kmb: the old approach kept stray k/m/b from model words, so
+        // "Llama 3.2 3B" normalized to "m3.23b" and parsed as 0.003B, and unit-only
+        // inputs produced NaN. Prefer a number that carries a B/M/K unit (the real
+        // size token, "3B") over a bare number (a version like "3.2").
+        const text = String(sizeString || '');
+        const match = text.match(/(\d+(?:\.\d+)?)\s*([kmb])\b/i) || text.match(/(\d+(?:\.\d+)?)/);
+        if (!match) return 1;
+        const value = parseFloat(match[1]);
+        if (!Number.isFinite(value)) return 1;
+        const unit = (match[2] || 'b').toLowerCase();
+        if (unit === 'k') return value / 1_000_000; // thousands of params -> billions
+        if (unit === 'm') return value / 1000;       // millions of params -> billions
+        return value;                                // billions
     }
     getContextMultiplier(contextLength) {