npm - llm-checker - Versions diffs - 3.5.15 → 3.6.1 - Mend

llm-checker 3.5.15 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +14 -1
package/analyzer/compatibility.js +5 -0
package/analyzer/performance.js +5 -4
package/bin/cli.js +5 -39
package/bin/enhanced_cli.js +88 -19
package/bin/mcp-server.mjs +266 -101
package/package.json +7 -7
package/src/ai/multi-objective-selector.js +118 -11
package/src/calibration/calibration-manager.js +4 -1
package/src/data/model-database.js +39 -5
package/src/data/sync-manager.js +32 -18
package/src/hardware/backends/apple-silicon.js +5 -1
package/src/hardware/backends/cuda-detector.js +47 -19
package/src/hardware/backends/intel-detector.js +6 -2
package/src/hardware/backends/rocm-detector.js +6 -2
package/src/hardware/detector.js +57 -30
package/src/hardware/unified-detector.js +129 -25
package/src/models/ai-check-selector.js +36 -5
package/src/models/deterministic-selector.js +163 -15
package/src/models/expanded_database.js +9 -5
package/src/models/intelligent-selector.js +87 -1
package/src/models/requirements.js +16 -11
package/src/models/scoring-core.js +341 -0
package/src/models/scoring-engine.js +9 -2
package/src/ollama/capacity-planner.js +15 -2
package/src/ollama/client.js +70 -30
package/src/ollama/enhanced-client.js +20 -2
package/src/ollama/manager.js +14 -2
package/src/policy/cli-policy.js +8 -2
package/src/policy/policy-engine.js +2 -1
package/src/provenance/model-provenance.js +4 -1
package/src/ui/cli-theme.js +47 -7
package/src/ui/interactive-panel.js +162 -24

package/README.md CHANGED Viewed

@@ -573,6 +573,19 @@ This makes integrated GPUs visible even when the selected runtime backend is sti
 llm-checker recommend
 ```
+As of the scoring unification (#96), `check`, `recommend`, and `smart-recommend`
+all derive their ranking from **one canonical scoring core**
+(`DeterministicModelSelector` via `src/models/scoring-core.js`), so identical
+`(model, hardware)` inputs score identically across all three and the
+high-capacity right-sizing floor applies everywhere. They differ only in their
+model **source** and **presentation**, not in how a given model is ranked:
+| Command | Role | Ranking core |
+|---------|------|--------------|
+| `recommend` | Canonical model recommendations by category | Shared core (reference output) |
+| `check` | Full hardware-compatibility report with a recommendation card | Shared core (consistent ranking, fit-oriented report) |
+| `smart-recommend` | Catalog/DB-backed recommendations with a detailed score breakdown | Shared core (same ordering + scores) |
 Use optimization profiles to steer ranking by intent:
 ```bash
@@ -695,7 +708,7 @@ Three scoring systems are available, each optimized for different workflows:
 | `reasoning` | 60% | 10% | 20% | 10% |
 | `multimodal` | 50% | 15% | 20% | 15% |
-**Scoring Engine** (used by `smart-recommend` and `search`):
+**Scoring Engine** (used by `search` for catalog scoring; `smart-recommend`'s final ranking is produced by the shared scoring core &mdash; see #96):
 | Use Case | Quality | Speed | Fit | Context |
 |----------|:-------:|:-----:|:---:|:-------:|

package/analyzer/compatibility.js CHANGED Viewed

@@ -428,6 +428,11 @@ class CompatibilityAnalyzer {
     }
     parseModelSize(sizeString) {
+        // Guard non-string input (undefined / a numeric size) — this runs for every
+        // model in calculateModelCompatibility, so one bad entry must not crash the
+        // whole analysis. Matches the guard in analyzer/performance.js.
+        if (typeof sizeString !== 'string' || !sizeString.trim()) return 1;
         const match = sizeString.match(/(\d+\.?\d*)[BM]/i);
         if (!match) return 1;

package/analyzer/performance.js CHANGED Viewed

@@ -363,12 +363,13 @@ class PerformanceAnalyzer {
     }
     estimateLoadTime(model, hardware) {
+        // ~2 GB per 1B params (fp16-ish) on-disk approximation.
         const modelSizeGB = this.parseModelSize(model.size) * 2;
-        let loadTimeSeconds = modelSizeGB * 2;
-        loadTimeSeconds *= 0.7;
+        // Fold the previous `* 2` then `* 0.7` two-step (a leftover from an
+        // incomplete edit, with a dead blank line) into one documented factor:
+        // ~1.4 s of load time per GB before hardware adjustments.
+        let loadTimeSeconds = modelSizeGB * 1.4;
         const cpuSpeedFactor = Math.max(0.5, Math.min(1.5, (hardware.cpu.speed || 2.5) / 2.5));
         loadTimeSeconds /= cpuSpeedFactor;

package/bin/cli.js CHANGED Viewed

@@ -11,44 +11,10 @@ if (!Number.isFinite(majorNodeVersion) || majorNodeVersion < 16) {
     process.exit(1);
 }
-function preprocessAiCheckModelsArg(argv) {
-    const normalizedArgs = [];
-    let modelsFilter = null;
-    let sawAiCheck = false;
-    for (let index = 0; index < argv.length; index += 1) {
-        const token = argv[index];
-        if (token === 'ai-check') {
-            sawAiCheck = true;
-        }
-        if (sawAiCheck && token === '--models') {
-            const nextToken = argv[index + 1];
-            if (nextToken && !nextToken.startsWith('-')) {
-                modelsFilter = nextToken;
-                index += 1;
-            }
-            continue;
-        }
-        if (sawAiCheck && token.startsWith('--models=')) {
-            modelsFilter = token.slice('--models='.length);
-            continue;
-        }
-        normalizedArgs.push(token);
-    }
-    return { args: normalizedArgs, modelsFilter };
-}
-const preprocessedArgs = preprocessAiCheckModelsArg(process.argv.slice(2));
-if (typeof preprocessedArgs.modelsFilter === 'string' && preprocessedArgs.modelsFilter.trim()) {
-    process.env.LLM_CHECKER_AI_CHECK_MODELS = preprocessedArgs.modelsFilter.trim();
-}
-process.argv = [process.argv[0], process.argv[1], ...preprocessedArgs.args];
+// `ai-check --models <list>` is now a real commander option handled in
+// enhanced_cli.js (and the AICheckSelector applies it as a candidate filter), so
+// the previous argv-rewriting shim — which stripped the flag and stashed it in an
+// env var that nothing read — is gone. LLM_CHECKER_AI_CHECK_MODELS still works as
+// an explicit fallback for the same filter.
 require('./enhanced_cli');

package/bin/enhanced_cli.js CHANGED Viewed

@@ -58,7 +58,7 @@ const calibrationManager = new CalibrationManager();
 const COMMAND_HEADER_LABELS = {
     'hw-detect': 'Hardware Detection',
-    'smart-recommend': 'Smart Recommend',
+    'smart-recommend': 'Smart Recommend (Experimental)',
     search: 'Model Search',
     sync: 'Database Sync',
     'mcp-setup': 'Claude MCP Setup',
@@ -79,6 +79,19 @@ function showAsciiArt(command) {
     renderCommandHeader(label);
 }
+const RECOMMENDATION_COMMAND_NOTES = {
+    check: 'Compatibility report: shows hardware fit first. Use `llm-checker recommend` for canonical ranked model picks.',
+    recommend: 'Canonical recommendations: deterministic hardware-aware selector by category.',
+    'smart-recommend': 'Experimental scoring engine: results can differ from `recommend` while this path is being unified.'
+};
+function displayRecommendationCommandNote(command) {
+    const note = RECOMMENDATION_COMMAND_NOTES[command];
+    if (!note) return;
+    console.log(chalk.gray(`Recommendation mode: ${note}`));
+    console.log('');
+}
 // Function to search Ollama models by use case
 function getOllamaCacheFile(filename) {
     try {
@@ -3295,6 +3308,7 @@ Policy scope:
     )
     .action(async (options) => {
         showAsciiArt('check');
+        displayRecommendationCommandNote('check');
         try {
             // Use verbose progress unless explicitly disabled
             const verboseEnabled = options.verbose !== false;
@@ -3462,11 +3476,8 @@ Policy scope:
 program
     .command('ollama')
-    .description('Manage Ollama integration with hardware compatibility')
-    .option('-l, --list', 'List installed models with compatibility scores')
-    .option('-r, --running', 'Show running models with performance data')
-    .option('-c, --compatible', 'Show only hardware-compatible installed models')
-    .option('--recommendations', 'Show installation recommendations')
+    .description('Check Ollama integration status (use `installed` to rank installed models)')
+    .option('-l, --list', 'List installed models ranked by compatibility (runs `installed`)')
     .action(async (options) => {
         showAsciiArt('ollama');
         const spinner = ora('Checking Ollama integration...').start();
@@ -3492,7 +3503,9 @@ program
             spinner.succeed(`Ollama integration active`);
             if (options.list) {
-                console.log('Ollama models list feature coming soon...');
+                console.log(chalk.cyan('\nRun `llm-checker installed` to rank your installed models by compatibility and use-case.'));
+            } else {
+                console.log(chalk.gray('\nTip: `llm-checker installed` ranks installed models; `llm-checker recommend` suggests models for your hardware.'));
             }
         } catch (error) {
@@ -3520,10 +3533,18 @@ program
             const availability = await ollamaClient.checkOllamaAvailability();
             if (!availability.available) {
                 spinner.fail('Ollama not available');
-                console.log(chalk.red('\n' + availability.error));
-                if (availability.hint) {
-                    console.log(chalk.yellow('Hint: ' + availability.hint));
+                if (options.json) {
+                    // --json must always emit parseable JSON on stdout (the success
+                    // path prints an array); previously these branches printed
+                    // ANSI-colored prose and broke `installed --json | jq`.
+                    console.log(JSON.stringify([], null, 2));
+                } else {
+                    console.log(chalk.red('\n' + availability.error));
+                    if (availability.hint) {
+                        console.log(chalk.yellow('Hint: ' + availability.hint));
+                    }
                 }
+                process.exitCode = 1;
                 return;
             }
@@ -3531,8 +3552,12 @@ program
             const installedModels = await ollamaClient.getLocalModels();
             if (!installedModels || installedModels.length === 0) {
                 spinner.fail('No models installed');
-                console.log(chalk.yellow('\nNo Ollama models found. Install one with:'));
-                console.log(chalk.cyan('  ollama pull llama3.2:3b'));
+                if (options.json) {
+                    console.log(JSON.stringify([], null, 2));
+                } else {
+                    console.log(chalk.yellow('\nNo Ollama models found. Install one with:'));
+                    console.log(chalk.cyan('  ollama pull llama3.2:3b'));
+                }
                 return;
             }
@@ -3851,6 +3876,7 @@ Calibrated routing examples:
     )
     .action(async (options) => {
         showAsciiArt('recommend');
+        displayRecommendationCommandNote('recommend');
         try {
             const verboseEnabled = options.verbose !== false;
             const checker = new (getLLMChecker())({ verbose: verboseEnabled });
@@ -4365,6 +4391,7 @@ program
     .option('--ctx <number>', 'Target context length', '8192')
     .option('-e, --evaluator <model>', 'Evaluator model (auto for best available)', 'auto')
     .option('-w, --weight <number>', 'AI weight (0.0-1.0, default 0.3)', '0.3')
+    .option('-m, --models <list>', 'Restrict evaluation to these models (comma-separated)')
     .action(async (options) => {
         showAsciiArt('ai-check');
         // Check if Ollama is installed first
@@ -4377,14 +4404,33 @@ program
             const aiCheckSelector = new AICheckSelector();
+            // Validate numeric options up front: bad input (e.g. --weight abc, --top
+            // foo) used to flow through as NaN, which survived the downstream clamp
+            // and made the displayed AI weight and every weighted score NaN.
+            const weight = Number.parseFloat(options.weight);
+            const top = Number.parseInt(options.top, 10);
+            const ctx = options.ctx ? Number.parseInt(options.ctx, 10) : undefined;
+            const invalidNumeric =
+                (!Number.isFinite(weight) || weight < 0 || weight > 1) ? `--weight ${options.weight} (use a number from 0.0 to 1.0)`
+                : (!Number.isInteger(top) || top <= 0) ? `--top ${options.top} (use a positive integer)`
+                : (ctx !== undefined && (!Number.isInteger(ctx) || ctx <= 0)) ? `--ctx ${options.ctx} (use a positive integer)`
+                : null;
+            if (invalidNumeric) {
+                spinner.stop();
+                console.error(chalk.red(`Invalid ${invalidNumeric}`));
+                process.exitCode = 1;
+                return;
+            }
             const checkOptions = {
                 category: options.category,
-                top: parseInt(options.top),
-                ctx: options.ctx ? parseInt(options.ctx) : undefined,
+                top,
+                ctx,
                 evaluator: options.evaluator,
-                weight: parseFloat(options.weight)
+                weight,
+                models: options.models || process.env.LLM_CHECKER_AI_CHECK_MODELS || undefined
             };
             spinner.stop();
             const result = await aiCheckSelector.aiCheck(checkOptions);
@@ -4731,8 +4777,20 @@ program
             });
             if (searchResults.length === 0) {
-                if (spinner) spinner.info('No models found matching your query');
                 syncManager.close();
+                if (options.json) {
+                    // --json must always emit parseable JSON, even on zero matches
+                    // (previously it printed nothing and broke `search ... --json | jq`).
+                    console.log(JSON.stringify({
+                        query,
+                        all: [],
+                        recommendations: [],
+                        insights: [],
+                        meta: { afterFiltering: 0, totalMatches: 0 }
+                    }, null, 2));
+                } else if (spinner) {
+                    spinner.info('No models found matching your query');
+                }
                 return;
             }
@@ -4806,7 +4864,7 @@ program
 program
     .command('smart-recommend')
-    .description('Get intelligent model recommendations using the new scoring engine')
+    .description('Experimental recommendations using the alternate scoring engine')
     .option('-u, --use-case <case>', 'Optimize for use case', 'general')
     .option('-l, --limit <n>', 'Maximum number of recommendations', '5')
     .option('--target-tps <n>', 'Target tokens per second', '20')
@@ -4814,8 +4872,19 @@ program
     .option('--include-vision', 'Include vision/multimodal models')
     .option('--include-embeddings', 'Include embedding models')
     .option('-j, --json', 'Output as JSON')
+    .addHelpText(
+        'after',
+        `
+Recommendation engine note:
+  smart-recommend is experimental and may intentionally differ from recommend.
+  Use "llm-checker recommend" for canonical package recommendations.
+`
+    )
     .action(async (options) => {
-        if (!options.json) showAsciiArt('smart-recommend');
+        if (!options.json) {
+            showAsciiArt('smart-recommend');
+            displayRecommendationCommandNote('smart-recommend');
+        }
         const SyncManager = require('../src/data/sync-manager');
         const IntelligentSelector = require('../src/models/intelligent-selector');
         const UnifiedDetector = require('../src/hardware/unified-detector');