npm - llm-checker - Versions diffs - 3.5.15 → 3.7.0 - Mend

llm-checker 3.5.15 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +28 -8
package/analyzer/compatibility.js +5 -0
package/analyzer/performance.js +5 -4
package/bin/cli.js +5 -39
package/bin/enhanced_cli.js +449 -24
package/bin/mcp-server.mjs +266 -101
package/package.json +13 -8
package/src/ai/multi-objective-selector.js +118 -11
package/src/calibration/calibration-manager.js +4 -1
package/src/data/model-database.js +489 -5
package/src/data/registry-ingestors.js +751 -0
package/src/data/registry-recommender.js +514 -0
package/src/data/seed/README.md +11 -3
package/src/data/seed/models.db +0 -0
package/src/data/sync-manager.js +32 -18
package/src/hardware/backends/apple-silicon.js +5 -1
package/src/hardware/backends/cuda-detector.js +47 -19
package/src/hardware/backends/intel-detector.js +6 -2
package/src/hardware/backends/rocm-detector.js +6 -2
package/src/hardware/detector.js +57 -30
package/src/hardware/unified-detector.js +129 -25
package/src/index.js +68 -4
package/src/models/ai-check-selector.js +36 -5
package/src/models/deterministic-selector.js +179 -18
package/src/models/expanded_database.js +9 -5
package/src/models/intelligent-selector.js +87 -1
package/src/models/moe-assumptions.js +11 -0
package/src/models/requirements.js +16 -11
package/src/models/scoring-core.js +341 -0
package/src/models/scoring-engine.js +9 -2
package/src/ollama/capacity-planner.js +15 -2
package/src/ollama/client.js +70 -30
package/src/ollama/enhanced-client.js +20 -2
package/src/ollama/manager.js +14 -2
package/src/policy/cli-policy.js +8 -2
package/src/policy/policy-engine.js +2 -1
package/src/provenance/model-provenance.js +4 -1
package/src/ui/cli-theme.js +47 -7
package/src/ui/interactive-panel.js +162 -24

package/src/ai/multi-objective-selector.js CHANGED Viewed

@@ -10,6 +10,7 @@
 const { MULTI_OBJECTIVE_WEIGHTS } = require('../models/scoring-config');
 const { normalizePlatform } = require('../utils/platform');
+const { rankModels } = require('../models/scoring-core');
 class MultiObjectiveSelector {
     constructor() {
@@ -40,23 +41,124 @@ class MultiObjectiveSelector {
     }
     /**
-     * Select best models using multi-objective ranking
+     * Select best models using the UNIFIED canonical scoring core (issue #88).
+     *
+     * `check` used to rank through this selector's own multi-objective math,
+     * which diverged from `recommend`/`smart-recommend` and never received the
+     * PR #89 high-capacity right-sizing fix. It now routes the ranking through
+     * the shared DeterministicModelSelector core (via scoring-core.rankModels)
+     * so identical (model, hardware) inputs score identically across all three
+     * commands and the high-capacity floor applies here too.
+     *
+     * The output shape is preserved exactly: `{ compatible, marginal,
+     * incompatible }`, each entry being the ORIGINAL model object spread with
+     * `totalScore`, `components { quality, speed, ttfb, context, hardwareMatch }`
+     * and `reasoning`, so downstream `check` rendering and the regression test
+     * (which calls `estimateModelParams` on the returned object) keep working.
      */
     async selectBestModels(hardware, models, category = 'general', topK = 10) {
-        // Step 1: Hard filters - remove incompatible models
+        const inputModels = Array.isArray(models) ? models.filter(Boolean) : [];
+        if (inputModels.length === 0) {
+            return { compatible: [], marginal: [], incompatible: [] };
+        }
+        let ranking;
+        try {
+            ranking = await rankModels(inputModels, hardware, { category, topN: inputModels.length });
+        } catch (error) {
+            ranking = null;
+        }
+        // Defensive fallback: if the unified core is unavailable for any reason,
+        // fall back to the legacy multi-objective ranking so `check` still works.
+        if (!ranking || !Array.isArray(ranking.candidates)) {
+            return this.selectBestModelsLegacy(hardware, inputModels, category, topK);
+        }
+        const scoredModels = [];
+        const rankedSources = new Set();
+        for (const candidate of ranking.candidates) {
+            const source = candidate?.meta?.__source;
+            if (!source) continue;
+            rankedSources.add(source);
+            scoredModels.push(this.mapCoreCandidateToMultiObjective(candidate, source, hardware, category));
+        }
+        // Models the canonical core dropped (category filter / budget) are not
+        // viable on this hardware for this use case -> treat as incompatible,
+        // mirroring the previous hard-filter semantics.
+        const incompatibleExtras = inputModels
+            .filter((model) => !rankedSources.has(model))
+            .map((model) => ({
+                ...model,
+                totalScore: 0,
+                components: { quality: 0, speed: 0, ttfb: 0, context: 0, hardwareMatch: 0 },
+                reasoning: 'Filtered out by unified scoring core (does not fit hardware/use-case)'
+            }));
+        scoredModels.sort((a, b) => b.totalScore - a.totalScore);
+        const classified = this.classifyResults(scoredModels, topK);
+        classified.incompatible = [...classified.incompatible, ...incompatibleExtras].slice(0, 5);
+        return classified;
+    }
+    /**
+     * Map a unified-core candidate back into this selector's multi-objective
+     * output shape. The 0-100 `score` from the deterministic core becomes
+     * `totalScore`; component sub-scores are normalized to 0-1 to match the
+     * historical `components` contract consumed by `check` rendering.
+     */
+    mapCoreCandidateToMultiObjective(candidate, source, hardware, category) {
+        const components = candidate.components || {};
+        const to01 = (value) => {
+            const num = Number(value);
+            if (!Number.isFinite(num)) return 0;
+            return Math.max(0, Math.min(1, num / 100));
+        };
+        const quality = to01(components.Q);
+        const speed = to01(components.S);
+        const context = to01(components.C);
+        // The deterministic core folds hardware fitness into the `F` (fit) plus
+        // `H` (high-capacity right-sizing) components; surface that as the
+        // historical `hardwareMatch` signal so `check` insights stay meaningful.
+        const hardwareMatch = to01((Number(components.F) || 0) + (Number(components.H) || 0));
+        return {
+            ...source,
+            totalScore: Math.round(candidate.score * 100) / 100,
+            score: Math.round(candidate.score * 100) / 100,
+            components: {
+                quality,
+                speed,
+                ttfb: speed, // ttfb tracks speed; legacy field retained for shape
+                context,
+                hardwareMatch
+            },
+            quant: candidate.quant || source.quant,
+            estimatedRAM: candidate.requiredGB,
+            estimatedTPS: candidate.estTPS,
+            reasoning: candidate.rationale ||
+                this.generateReasoning(source, hardware, quality, hardwareMatch)
+        };
+    }
+    /**
+     * Legacy multi-objective ranking, retained ONLY as a defensive fallback if
+     * the unified core throws. Not used on the normal path.
+     */
+    selectBestModelsLegacy(hardware, models, category = 'general', topK = 10) {
         const compatibleModels = this.applyHardFilters(hardware, models);
         if (compatibleModels.length === 0) {
             return { compatible: [], marginal: [], incompatible: models };
         }
-        // Step 2: Multi-objective scoring
-        const scoredModels = compatibleModels.map(model =>
+        const scoredModels = compatibleModels.map(model =>
             this.calculateMultiObjectiveScore(hardware, model, category)
         ).filter(Boolean);
-        // Step 3: Sort and classify
         scoredModels.sort((a, b) => b.totalScore - a.totalScore);
         return this.classifyResults(scoredModels, topK);
@@ -784,11 +886,16 @@ class MultiObjectiveSelector {
     }
     estimateKVCache(model, contextLength) {
-        // Rough KV cache estimation: 2 * layers * hidden_size * seq_len * 2 bytes
+        // KV cache grows linearly with parameter count and context length. The old
+        // formula derived both "layers" and "hidden size" from params and multiplied
+        // them, making the estimate scale with params^2 — a 70B model at 8k came out
+        // at ~299 GB (real value ~11 GB), which made checkMemoryCompatibility reject
+        // every mid/large model. Use the same calibrated linear factor as the Ollama
+        // capacity planner (~0.08 GB per 1B params at 4k context).
         const params = this.estimateModelParams(model);
-        const layers = Math.round(params * 2); // Rough approximation
-        const hiddenSize = Math.round(params * 1000); // Rough approximation
-        return (2 * layers * hiddenSize * contextLength * 2) / (1024 ** 3); // GB
+        const ctx = Number.isFinite(contextLength) && contextLength > 0 ? contextLength : 4096;
+        const kvFactorPer4k = 0.08; // GB per 1B params at 4k context (fp16)
+        return Math.max(0, params * kvFactorPer4k * (ctx / 4096)); // GB
     }
     estimateTokensPerSecond(hardware, model) {

package/src/calibration/calibration-manager.js CHANGED Viewed

@@ -320,7 +320,10 @@ class CalibrationManager {
                 NO_COLOR: '1'
             }
         });
-        const latencyMs = Number((process.hrtime.bigint() - started) / 1_000_000n);
+        // Convert ns->ms in floating point: dividing the BigInt first floored away
+        // all sub-millisecond precision (a 0.5 ms call read as 0 ms, skewing p50/p95,
+        // ttft and tokens/sec). The ns diff is well within Number's safe range.
+        const latencyMs = Number(process.hrtime.bigint() - started) / 1_000_000;
         if (result.error) {
             const error = new Error(result.error.message || 'Failed to execute runtime prompt.');