npm - llm-checker - Versions diffs - 3.2.0 → 3.2.1 - Mend

llm-checker 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +14 -0
package/analyzer/compatibility.js +20 -0
package/bin/cli.js +14 -0
package/bin/enhanced_cli.js +133 -36
package/package.json +5 -3
package/src/ai/multi-objective-selector.js +28 -4
package/src/hardware/backends/cuda-detector.js +32 -11
package/src/hardware/detector.js +107 -5
package/src/hardware/specs.js +8 -1
package/src/index.js +77 -11
package/src/models/expanded_database.js +8 -2
package/src/models/scoring-engine.js +4 -0
package/src/models/speculative-decoding-estimator.js +245 -0
package/src/runtime/runtime-support.js +174 -0
package/bin/CLAUDE.md +0 -27
package/src/CLAUDE.md +0 -18
package/src/data/CLAUDE.md +0 -17
package/src/hardware/CLAUDE.md +0 -18
package/src/hardware/backends/CLAUDE.md +0 -17
package/src/models/CLAUDE.md +0 -23
package/src/ollama/CLAUDE.md +0 -30
package/src/plugins/CLAUDE.md +0 -17
package/src/utils/CLAUDE.md +0 -17

package/README.md CHANGED Viewed

@@ -52,6 +52,20 @@ Choosing the right LLM for your hardware is complex. With thousands of model var
 ---
+## Comparison with Other Tooling (e.g. `llmfit`)
+LLM Checker and `llmfit` solve related but different problems:
+| Tool | Primary Focus | Typical Output |
+|------|---------------|----------------|
+| **LLM Checker** | Hardware-aware **model selection** for local inference | Ranked recommendations, compatibility scores, pull/run commands |
+| **llmfit** | LLM workflow support and model-fit evaluation from another angle | Different optimization workflow and selection heuristics |
+If your goal is: *"What should I run on this exact machine right now?"*, use **LLM Checker** first.
+If your goal is broader experimentation across custom pipelines, using both tools can be complementary.
+---
 ## Installation
 ```bash

package/analyzer/compatibility.js CHANGED Viewed

@@ -1,4 +1,10 @@
 const { getLogger } = require('../src/utils/logger');
+const {
+    normalizeRuntime,
+    getRuntimeDisplayName,
+    runtimeSupportedOnHardware,
+    runtimeSupportsSpeculativeDecoding
+} = require('../src/runtime/runtime-support');
 class CompatibilityAnalyzer {
     constructor() {
@@ -451,6 +457,7 @@ class CompatibilityAnalyzer {
     generateRecommendations(hardware, results, options = {}) {
         const recommendations = [];
+        const runtime = normalizeRuntime(options.runtime || 'ollama');
         const tier = this.getHardwareTier(hardware);
         if (hardware.memory.total < 16) {
@@ -511,6 +518,19 @@ class CompatibilityAnalyzer {
             }
         }
+        if (runtime !== 'ollama') {
+            const runtimeLabel = getRuntimeDisplayName(runtime);
+            if (runtimeSupportedOnHardware(runtime, hardware)) {
+                recommendations.push(`Runtime selected: ${runtimeLabel}`);
+            } else {
+                recommendations.push(`${runtimeLabel} is not recommended on this hardware (fallback to Ollama).`);
+            }
+            if (runtimeSupportsSpeculativeDecoding(runtime)) {
+                recommendations.push(`Enable speculative decoding in ${runtimeLabel} for higher throughput.`);
+            }
+        }
         return recommendations;
     }

package/bin/cli.js ADDED Viewed

@@ -0,0 +1,14 @@
+#!/usr/bin/env node
+'use strict';
+const majorNodeVersion = Number.parseInt(process.versions.node.split('.')[0], 10);
+if (!Number.isFinite(majorNodeVersion) || majorNodeVersion < 16) {
+    console.error(
+        `[llm-checker] Unsupported Node.js version: ${process.versions.node}. ` +
+        'Please use Node.js 16 or newer.'
+    );
+    process.exit(1);
+}
+require('./enhanced_cli');

package/bin/enhanced_cli.js CHANGED Viewed

@@ -16,6 +16,14 @@ function getLLMChecker() {
 const { getLogger } = require('../src/utils/logger');
 const fs = require('fs');
 const path = require('path');
+const {
+    SUPPORTED_RUNTIMES,
+    normalizeRuntime,
+    runtimeSupportedOnHardware,
+    getRuntimeDisplayName,
+    getRuntimeCommandSet
+} = require('../src/runtime/runtime-support');
+const SpeculativeDecodingEstimator = require('../src/models/speculative-decoding-estimator');
 // ASCII Art for each command - Large text banners
 const ASCII_ART = {
@@ -1406,10 +1414,18 @@ function displaySimplifiedSystemInfo(hardware) {
     console.log(`Hardware Tier: ${tierColor.bold(tier)}`);
 }
-async function displayModelRecommendations(analysis, hardware, useCase = 'general', limit = 1) {
+async function displayModelRecommendations(analysis, hardware, useCase = 'general', limit = 1, runtime = 'ollama') {
     const title = limit === 1 ? 'RECOMMENDED MODEL' : `TOP ${limit} COMPATIBLE MODELS`;
     console.log(chalk.green.bold(`\n${title}`));
     console.log(chalk.gray('─'.repeat(50)));
+    const selectedRuntime = normalizeRuntime(runtime);
+    const runtimeLabel = getRuntimeDisplayName(selectedRuntime);
+    const speculativeEstimator = new SpeculativeDecodingEstimator();
+    const speculativeCandidatePool = [
+        ...(analysis?.compatible || []),
+        ...(analysis?.marginal || [])
+    ];
     // Find the best models from compatible models considering use case
     let selectedModels = [];
@@ -1760,42 +1776,75 @@ async function displayModelRecommendations(analysis, hardware, useCase = 'genera
             if (model.performanceEstimate) {
                 console.log(`Estimated Speed: ${chalk.yellow(model.performanceEstimate.estimatedTokensPerSecond || 'N/A')} tokens/sec`);
             }
-            // Check if it's already installed by comparing with Ollama integration
+            console.log(`Runtime: ${chalk.white(runtimeLabel)}`);
+            const runtimeCommands = getRuntimeCommandSet(model, selectedRuntime);
+            // Check installation only when using Ollama runtime.
             let isInstalled = false;
-            try {
-                isInstalled = await checkIfModelInstalled(model, analysis.ollamaInfo);
-                if (isInstalled) {
-                    console.log(`Status: ${chalk.green('Already installed in Ollama')}`);
-                } else if (analysis.ollamaInfo && analysis.ollamaInfo.available) {
-                    console.log(`Status: ${chalk.gray('Available for installation')}`);
-                } else {
-                    console.log(`Status: ${chalk.yellow('Requires Ollama (not detected)')}`);
+            if (selectedRuntime === 'ollama') {
+                try {
+                    isInstalled = await checkIfModelInstalled(model, analysis.ollamaInfo);
+                    if (isInstalled) {
+                        console.log(`Status: ${chalk.green('Already installed in Ollama')}`);
+                    } else if (analysis.ollamaInfo && analysis.ollamaInfo.available) {
+                        console.log(`Status: ${chalk.gray('Available for installation')}`);
+                    } else {
+                        console.log(`Status: ${chalk.yellow('Requires Ollama (not detected)')}`);
+                    }
+                } catch (installCheckError) {
+                    if (analysis.ollamaInfo && analysis.ollamaInfo.available) {
+                        console.log(`Status: ${chalk.gray('Available for installation')}`);
+                    } else {
+                        console.log(`Status: ${chalk.yellow('Requires Ollama (not detected)')}`);
+                    }
                 }
-            } catch (installCheckError) {
-                // If checking installation status fails, show based on Ollama availability
-                if (analysis.ollamaInfo && analysis.ollamaInfo.available) {
-                    console.log(`Status: ${chalk.gray('Available for installation')}`);
-                } else {
-                    console.log(`Status: ${chalk.yellow('Requires Ollama (not detected)')}`);
+                const ollamaCommand = getOllamaInstallCommand(model);
+                if (ollamaCommand) {
+                    const modelName = extractModelName(ollamaCommand);
+                    if (isInstalled) {
+                        console.log(`\nRun: ${chalk.cyan.bold(`ollama run ${modelName}`)}`);
+                    } else {
+                        console.log(`\nPull: ${chalk.cyan.bold(ollamaCommand)}`);
+                    }
+                } else if (model.ollamaTag || model.ollamaId) {
+                    const tag = model.ollamaTag || model.ollamaId;
+                    if (isInstalled) {
+                        console.log(`\nRun: ${chalk.cyan.bold(`ollama run ${tag}`)}`);
+                    } else {
+                        console.log(`\nPull: ${chalk.cyan.bold(`ollama pull ${tag}`)}`);
+                    }
+                }
+            } else {
+                console.log(`Status: ${chalk.gray(`${runtimeLabel} runtime selected`)}`);
+                console.log(`\nRun: ${chalk.cyan.bold(runtimeCommands.run)}`);
+                if (index === 0) {
+                    console.log(`Install runtime: ${chalk.cyan.bold(runtimeCommands.install)}`);
+                    console.log(`Fetch model: ${chalk.cyan.bold(runtimeCommands.pull)}`);
                 }
             }
-            // Show pull/run command directly in each model block (Issue #3)
-            const ollamaCommand = getOllamaInstallCommand(model);
-            if (ollamaCommand) {
-                const modelName = extractModelName(ollamaCommand);
-                if (isInstalled) {
-                    console.log(`\nCommand: ${chalk.cyan.bold(`ollama run ${modelName}`)}`);
-                } else {
-                    console.log(`\nCommand: ${chalk.cyan.bold(ollamaCommand)}`);
-                }
-            } else if (model.ollamaTag || model.ollamaId) {
-                const tag = model.ollamaTag || model.ollamaId;
-                if (isInstalled) {
-                    console.log(`\nCommand: ${chalk.cyan.bold(`ollama run ${tag}`)}`);
-                } else {
-                    console.log(`\nCommand: ${chalk.cyan.bold(`ollama pull ${tag}`)}`);
+            const speculativeInfo =
+                model.speculativeDecoding ||
+                speculativeEstimator.estimate({
+                    model,
+                    candidates: speculativeCandidatePool,
+                    hardware,
+                    runtime: selectedRuntime
+                });
+            if (speculativeInfo && speculativeInfo.runtime === selectedRuntime) {
+                if (speculativeInfo.enabled) {
+                    console.log(
+                        `SpecDec: ${chalk.green(`+${speculativeInfo.estimatedThroughputGainPct}%`)} ` +
+                        `(${chalk.gray(`draft: ${speculativeInfo.draftModel}`)})`
+                    );
+                } else if (speculativeInfo.estimatedSpeedup) {
+                    const suggested = speculativeInfo.suggestedDraftModel ? ` with ${speculativeInfo.suggestedDraftModel}` : '';
+                    console.log(
+                        `SpecDec estimate: ${chalk.yellow(`+${speculativeInfo.estimatedThroughputGainPct}%`)}${chalk.gray(suggested)}`
+                    );
                 }
             }
         }
@@ -1807,9 +1856,12 @@ async function displayModelRecommendations(analysis, hardware, useCase = 'genera
     return selectedModels;
 }
-async function displayQuickStartCommands(analysis, recommendedModel = null, allRecommended = null) {
+async function displayQuickStartCommands(analysis, recommendedModel = null, allRecommended = null, runtime = 'ollama') {
     console.log(chalk.yellow.bold('\nQUICK START'));
     console.log(chalk.gray('─'.repeat(50)));
+    const selectedRuntime = normalizeRuntime(runtime);
+    const runtimeLabel = getRuntimeDisplayName(selectedRuntime);
     // Use the first model from allRecommended if available, otherwise fallback to recommendedModel
     let bestModel = (allRecommended && allRecommended.length > 0) ? allRecommended[0] : recommendedModel;
@@ -1824,6 +1876,33 @@ async function displayQuickStartCommands(analysis, recommendedModel = null, allR
         }
     }
+    if (selectedRuntime !== 'ollama') {
+        if (!bestModel) {
+            console.log(`1. Try expanding search: ${chalk.cyan('llm-checker check --include-cloud')}`);
+            return;
+        }
+        const runtimeCommands = getRuntimeCommandSet(bestModel, selectedRuntime);
+        console.log(`1. Install ${runtimeLabel}:`);
+        console.log(`   ${chalk.cyan.bold(runtimeCommands.install)}`);
+        console.log(`2. Fetch model weights:`);
+        console.log(`   ${chalk.cyan.bold(runtimeCommands.pull)}`);
+        console.log(`3. Run model:`);
+        console.log(`   ${chalk.cyan.bold(runtimeCommands.run)}`);
+        const speculative = bestModel.speculativeDecoding;
+        if (speculative && speculative.enabled) {
+            console.log(`4. SpecDec suggestion (${chalk.green(`+${speculative.estimatedThroughputGainPct}%`)}):`);
+            if (selectedRuntime === 'vllm') {
+                console.log(`   ${chalk.cyan.bold(`${runtimeCommands.run} --speculative-model '${speculative.draftModelRef || speculative.draftModel}'`)}`);
+            } else if (selectedRuntime === 'mlx') {
+                console.log(`   ${chalk.gray(`Use draft model ${speculative.draftModelRef || speculative.draftModel} when enabling speculative decoding in MLX-LM`)}`);
+            }
+        }
+        return;
+    }
     if (analysis.ollamaInfo && !analysis.ollamaInfo.available) {
         console.log(`1. Install Ollama: ${chalk.underline('https://ollama.ai')}`);
         console.log(`2. Come back and run this command again`);
@@ -1992,6 +2071,7 @@ program
     .option('--min-size <size>', 'Minimum model size to consider (e.g., "7B" or "7GB")')
     .option('--include-cloud', 'Include cloud models in analysis')
     .option('--ollama-only', 'Only show models available in Ollama')
+    .option('--runtime <runtime>', `Inference runtime (${SUPPORTED_RUNTIMES.join('|')})`, 'ollama')
     .option('--performance-test', 'Run performance benchmarks')
     .option('--show-ollama-analysis', 'Show detailed Ollama model analysis')
     .option('--no-verbose', 'Disable step-by-step progress display')
@@ -2008,6 +2088,16 @@ program
             }
             const hardware = await checker.getSystemInfo();
+            let selectedRuntime = normalizeRuntime(options.runtime);
+            if (!runtimeSupportedOnHardware(selectedRuntime, hardware)) {
+                const runtimeLabel = getRuntimeDisplayName(selectedRuntime);
+                console.log(
+                    chalk.yellow(
+                        `\nWarning: ${runtimeLabel} is not supported on this hardware. Falling back to Ollama.`
+                    )
+                );
+                selectedRuntime = 'ollama';
+            }
             // Normalize and fix use-case typos
             const normalizeUseCase = (useCase = '') => {
@@ -2049,7 +2139,8 @@ program
                 performanceTest: options.performanceTest,
                 limit: parseInt(options.limit) || 10,
                 maxSize: maxSize,
-                minSize: minSize
+                minSize: minSize,
+                runtime: selectedRuntime
             });
             if (!verboseEnabled) {
@@ -2058,8 +2149,14 @@ program
             // Simplified output - show only essential information
             displaySimplifiedSystemInfo(hardware);
-            const recommendedModels = await displayModelRecommendations(analysis, hardware, normalizedUseCase, parseInt(options.limit) || 1);
-            await displayQuickStartCommands(analysis, recommendedModels[0], recommendedModels);
+            const recommendedModels = await displayModelRecommendations(
+                analysis,
+                hardware,
+                normalizedUseCase,
+                parseInt(options.limit) || 1,
+                selectedRuntime
+            );
+            await displayQuickStartCommands(analysis, recommendedModels[0], recommendedModels, selectedRuntime);
         } catch (error) {
             console.error(chalk.red('\nError:'), error.message);

package/package.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
   "name": "llm-checker",
-  "version": "3.2.0",
+  "version": "3.2.1",
   "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
   "bin": {
-    "llm-checker": "bin/enhanced_cli.js",
-    "ollama-checker": "bin/enhanced_cli.js",
+    "llm-checker": "bin/cli.js",
+    "ollama-checker": "bin/cli.js",
     "llm-checker-mcp": "bin/mcp-server.mjs"
   },
   "main": "src/index.js",
@@ -13,6 +13,8 @@
     "test:gpu": "node tests/gpu-detection/multi-gpu.test.js",
     "test:platform": "node tests/platform-tests/cross-platform.test.js",
     "test:ui": "node tests/ui-tests/interface.test.js",
+    "test:runtime": "node tests/runtime-specdec-tests.js",
+    "test:hardware-detector": "node tests/hardware-detector-regression.js",
     "test:all": "node tests/run-all-tests.js",
     "build": "echo 'No build needed'",
     "dev": "node bin/enhanced_cli.js",

package/src/ai/multi-objective-selector.js CHANGED Viewed

@@ -387,6 +387,9 @@ class MultiObjectiveSelector {
         // 2) Memory bandwidth (20%) - simplified estimation
         let memBandwidthGBs = 50; // fallback
         const gpu = gpuModel.toLowerCase();
+        if (gpu.includes('gb10') || gpu.includes('grace blackwell') || gpu.includes('dgx spark')) memBandwidthGBs = 1000;
+        else if (gpu.includes('h100')) memBandwidthGBs = 3000;
+        else if (gpu.includes('a100')) memBandwidthGBs = 2039;
         if (gpu.includes('m4 pro')) memBandwidthGBs = 273;
         else if (gpu.includes('m4')) memBandwidthGBs = 120;
         else if (gpu.includes('rtx 4090')) memBandwidthGBs = 1008;
@@ -398,7 +401,10 @@ class MultiObjectiveSelector {
         // 3) Compute (20%) - simplified estimation
         let compute = 0;
-        if (gpu.includes('m4 pro')) compute = clamp(28 / 80);  // Match main algorithm
+        if (gpu.includes('gb10') || gpu.includes('grace blackwell') || gpu.includes('dgx spark')) compute = clamp(180 / 80);
+        else if (gpu.includes('h100')) compute = clamp(320 / 80);
+        else if (gpu.includes('a100')) compute = clamp(250 / 80);
+        else if (gpu.includes('m4 pro')) compute = clamp(28 / 80);  // Match main algorithm
         else if (gpu.includes('m4')) compute = clamp(15 / 80);
         else if (gpu.includes('rtx 4090')) compute = clamp(165 / 80);
         else if (gpu.includes('rtx 4080')) compute = clamp(121 / 80);
@@ -448,6 +454,10 @@ class MultiObjectiveSelector {
         // Special flagship GPU detection by model name
         if (gpuModel.toLowerCase().includes('rtx 50') ||
+            gpuModel.toLowerCase().includes('gb10') ||
+            gpuModel.toLowerCase().includes('grace blackwell') ||
+            gpuModel.toLowerCase().includes('dgx spark') ||
+            gpuModel.toLowerCase().includes('blackwell') ||
             gpuModel.toLowerCase().includes('h100') ||
             gpuModel.toLowerCase().includes('a100')) {
             tier = 'flagship';
@@ -599,7 +609,11 @@ class MultiObjectiveSelector {
         // NVIDIA GPU optimizations
         if (gpu.includes('nvidia') || gpu.includes('geforce') || gpu.includes('rtx') || gpu.includes('gtx')) {
-            if (gpu.includes('rtx 50')) {
+            if (gpu.includes('gb10') || gpu.includes('grace blackwell') || gpu.includes('dgx spark')) {
+                specs.offloadCapacity = Math.min(ramGB * 0.6, 32);
+                specs.memoryEfficiency = 0.96;
+                specs.backendOptimization = 1.25;
+            } else if (gpu.includes('rtx 50')) {
                 // RTX 50xx series - flagship tier with massive VRAM + excellent offload
                 specs.offloadCapacity = Math.min(ramGB * 0.5, 24);
                 specs.memoryEfficiency = 0.95;
@@ -732,7 +746,15 @@ class MultiObjectiveSelector {
         // GPU-based calculation (dedicated GPU only)
         if (vramGB > 0 && !gpuModel.toLowerCase().includes('iris') && !gpuModel.toLowerCase().includes('integrated')) {
             let gpuTPS = 20; // Conservative GPU baseline
-            if (gpuModel.toLowerCase().includes('rtx 50')) {
+            if (gpuModel.toLowerCase().includes('gb10') ||
+                gpuModel.toLowerCase().includes('grace blackwell') ||
+                gpuModel.toLowerCase().includes('dgx spark')) {
+                gpuTPS = 85; // GB10 / Grace Blackwell class
+            } else if (gpuModel.toLowerCase().includes('h100')) {
+                gpuTPS = 120;
+            } else if (gpuModel.toLowerCase().includes('a100')) {
+                gpuTPS = 95;
+            } else if (gpuModel.toLowerCase().includes('rtx 50')) {
                 gpuTPS = 60; // RTX 50 series - more realistic
             } else if (gpuModel.toLowerCase().includes('rtx 40')) {
                 gpuTPS = 45; // RTX 40 series
@@ -740,6 +762,8 @@ class MultiObjectiveSelector {
                 gpuTPS = 35; // RTX 30 series
             } else if (gpuModel.toLowerCase().includes('rtx 20')) {
                 gpuTPS = 25; // RTX 20 series
+            } else if (gpuModel.toLowerCase().includes('p100')) {
+                gpuTPS = 32; // Tesla P100 class
             } else if (vramGB >= 8) {
                 gpuTPS = 30; // Other high-end GPUs
             } else if (vramGB >= 4) {
@@ -817,4 +841,4 @@ class MultiObjectiveSelector {
     }
 }
-module.exports = MultiObjectiveSelector;
+module.exports = MultiObjectiveSelector;

package/src/hardware/backends/cuda-detector.js CHANGED Viewed

@@ -209,8 +209,34 @@ class CUDADetector {
             architecture: 'Unknown'
         };
+        // NVIDIA GB10 / Grace Blackwell (DGX Spark)
+        if (nameLower.includes('gb10') || nameLower.includes('grace blackwell') ||
+            nameLower.includes('dgx spark') || nameLower.includes('blackwell')) {
+            capabilities.tensorCores = true;
+            capabilities.bf16 = true;
+            capabilities.fp8 = true;
+            capabilities.computeCapability = '10.0';
+            capabilities.architecture = 'Grace Blackwell';
+        }
+        // H100 (Hopper)
+        else if (nameLower.includes('h100') || nameLower.includes('h200')) {
+            capabilities.tensorCores = true;
+            capabilities.bf16 = true;
+            capabilities.fp8 = true;
+            capabilities.nvlink = true;
+            capabilities.computeCapability = '9.0';
+            capabilities.architecture = 'Hopper';
+        }
+        // Tesla P100 (Pascal)
+        else if (nameLower.includes('p100') || nameLower.includes('tesla p100')) {
+            capabilities.tensorCores = false;
+            capabilities.bf16 = false;
+            capabilities.fp8 = false;
+            capabilities.computeCapability = '6.0';
+            capabilities.architecture = 'Pascal';
+        }
         // RTX 50 series (Blackwell)
-        if (nameLower.includes('rtx 50') || nameLower.includes('rtx50')) {
+        else if (nameLower.includes('rtx 50') || nameLower.includes('rtx50')) {
             capabilities.tensorCores = true;
             capabilities.bf16 = true;
             capabilities.fp8 = true;
@@ -257,15 +283,6 @@ class CUDADetector {
             capabilities.architecture = 'Volta';
             capabilities.nvlink = true;
         }
-        // H100 (Hopper)
-        else if (nameLower.includes('h100') || nameLower.includes('h200')) {
-            capabilities.tensorCores = true;
-            capabilities.bf16 = true;
-            capabilities.fp8 = true;
-            capabilities.nvlink = true;
-            capabilities.computeCapability = '9.0';
-            capabilities.architecture = 'Hopper';
-        }
         return capabilities;
     }
@@ -311,6 +328,9 @@ class CUDADetector {
             'rtx 2060': 80,
             // Data center
+            'gb10': 95,
+            'grace blackwell': 95,
+            'dgx spark': 95,
             'h100': 400,
             'h200': 450,
             'a100': 300,
@@ -318,7 +338,8 @@ class CUDADetector {
             'l4': 150,
             'a40': 180,
             't4': 70,
-            'v100': 120
+            'v100': 120,
+            'p100': 45
         };
         for (const [model, speed] of Object.entries(speedMap)) {