npm - llm-checker - Versions diffs - 3.2.0 → 3.2.1 - Mend

llm-checker 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +14 -0
package/analyzer/compatibility.js +20 -0
package/bin/cli.js +14 -0
package/bin/enhanced_cli.js +133 -36
package/package.json +5 -3
package/src/ai/multi-objective-selector.js +28 -4
package/src/hardware/backends/cuda-detector.js +32 -11
package/src/hardware/detector.js +107 -5
package/src/hardware/specs.js +8 -1
package/src/index.js +77 -11
package/src/models/expanded_database.js +8 -2
package/src/models/scoring-engine.js +4 -0
package/src/models/speculative-decoding-estimator.js +245 -0
package/src/runtime/runtime-support.js +174 -0
package/bin/CLAUDE.md +0 -27
package/src/CLAUDE.md +0 -18
package/src/data/CLAUDE.md +0 -17
package/src/hardware/CLAUDE.md +0 -18
package/src/hardware/backends/CLAUDE.md +0 -17
package/src/models/CLAUDE.md +0 -23
package/src/ollama/CLAUDE.md +0 -30
package/src/plugins/CLAUDE.md +0 -17
package/src/utils/CLAUDE.md +0 -17

package/src/hardware/detector.js CHANGED Viewed

@@ -1,10 +1,12 @@
 const si = require('systeminformation');
+const UnifiedDetector = require('./unified-detector');
 class HardwareDetector {
     constructor() {
         this.cache = null;
         this.cacheExpiry = 5 * 60 * 1000;
         this.cacheTime = 0;
+        this.unifiedDetector = new UnifiedDetector();
     }
     async getSystemInfo(forceFresh = false) {
@@ -31,6 +33,8 @@ class HardwareDetector {
                 timestamp: Date.now()
             };
+            await this.enrichWithUnifiedHardware(systemInfo);
             this.cache = systemInfo;
             this.cacheTime = Date.now();
@@ -93,9 +97,15 @@ class HardwareDetector {
         const validGPUs = controllers.filter(gpu => {
             const model = (gpu.model || '').toLowerCase();
             const vendor = (gpu.vendor || '').toLowerCase();
+            const hasKnownModelSignature = this.looksLikeRealGPUModel(model);
             // Skip GPUs with empty/invalid data (like virtualized GPUs)
-            if (!model || !vendor || model === 'unknown' || vendor === '') {
+            if (!model || model === 'unknown') {
+                return false;
+            }
+            // Some passthrough/virtualized setups report empty vendor while model is valid
+            if ((!vendor || vendor === '') && !hasKnownModelSignature) {
                 return false;
             }
@@ -181,7 +191,7 @@ class HardwareDetector {
         return {
             model: enhancedModel,
-            vendor: primaryGPU.vendor || 'Unknown',
+            vendor: primaryGPU.vendor || this.inferVendorFromGPUModel(enhancedModel, 'Unknown'),
             vram: effectiveVRAM,
             vramPerGPU: vram, // VRAM of primary GPU for reference
             vramDynamic: primaryGPU.vramDynamic || false,
@@ -192,13 +202,54 @@ class HardwareDetector {
             all: controllers.map(gpu => ({
                 model: gpu.model,
                 vram: this.normalizeVRAM(gpu.vram || 0),
-                vendor: gpu.vendor
+                vendor: gpu.vendor || this.inferVendorFromGPUModel(gpu.model, 'Unknown')
             })),
             displays: displays.length,
             score: this.calculateGPUScore(primaryGPU)
         };
     }
+    async enrichWithUnifiedHardware(systemInfo) {
+        try {
+            const unified = await this.unifiedDetector.detect();
+            if (!unified || !unified.summary || !unified.primary) {
+                return;
+            }
+            const primaryType = unified.primary.type || 'cpu';
+            if (primaryType === 'cpu') {
+                return;
+            }
+            const summary = unified.summary;
+            const backendInfo = unified.backends?.[primaryType]?.info || {};
+            const backendGPUs = Array.isArray(backendInfo.gpus) ? backendInfo.gpus : [];
+            const gpuCount = summary.gpuCount || backendGPUs.length || systemInfo.gpu.gpuCount || 1;
+            const totalVRAM = typeof summary.totalVRAM === 'number' ? summary.totalVRAM : systemInfo.gpu.vram;
+            const perGPUVRAM = backendGPUs[0]?.memory?.total
+                || (gpuCount > 0 && totalVRAM > 0 ? Math.round(totalVRAM / gpuCount) : 0);
+            const modelFromUnified = summary.gpuModel || systemInfo.gpu.model;
+            const vendor = this.inferVendorFromGPUModel(modelFromUnified, systemInfo.gpu.vendor);
+            systemInfo.gpu = {
+                ...systemInfo.gpu,
+                model: modelFromUnified,
+                vendor,
+                vram: totalVRAM || systemInfo.gpu.vram,
+                vramPerGPU: perGPUVRAM || systemInfo.gpu.vramPerGPU || 0,
+                dedicated: primaryType !== 'metal',
+                gpuCount,
+                isMultiGPU: Boolean(summary.isMultiGPU || gpuCount > 1),
+                backend: primaryType,
+                driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
+            };
+        } catch (error) {
+            // Keep systeminformation-only results when backend-specific detection is unavailable
+        }
+    }
     processSystemInfo(system) {
         return {
             manufacturer: system.manufacturer || 'Unknown',
@@ -298,6 +349,10 @@ class HardwareDetector {
     estimateVRAMFromModel(model) {
         if (!model) return 0;
         const modelLower = model.toLowerCase();
+        // NVIDIA data-center / workstation
+        if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 96;
+        if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 16;
         // NVIDIA RTX 50 series
         if (modelLower.includes('rtx 5090')) return 32;
@@ -398,6 +453,7 @@ class HardwareDetector {
         // Bonus por marcas/modelos específicos
         if (model.includes('rtx 5090')) score += 30;
+        else if (model.includes('gb10') || model.includes('grace blackwell') || model.includes('dgx spark')) score += 28;
         else if (model.includes('rtx 5080')) score += 27;
         else if (model.includes('rtx 5070')) score += 24;
         else if (model.includes('rtx 5060')) score += 21;
@@ -407,6 +463,7 @@ class HardwareDetector {
         else if (model.includes('rtx 30')) score += 18;
         else if (model.includes('rtx 20')) score += 15;
         else if (model.includes('gtx 16')) score += 12;
+        else if (model.includes('tesla p100') || model.includes('p100')) score += 14;
         else if (model.includes('apple m')) score += 15;
         return Math.min(Math.round(score), 100);
@@ -497,9 +554,10 @@ class HardwareDetector {
      */
     getGPUTier(model) {
         const modelLower = model.toLowerCase();
         // NVIDIA RTX series
         if (modelLower.includes('rtx 50')) return 100;
+        if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 98;
         if (modelLower.includes('rtx 4090')) return 95;
         if (modelLower.includes('rtx 40')) return 90;
         if (modelLower.includes('rtx 3090')) return 85;
@@ -511,6 +569,7 @@ class HardwareDetector {
         // NVIDIA Professional
         if (modelLower.includes('a100')) return 98;
         if (modelLower.includes('h100')) return 99;
+        if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 78;
         if (modelLower.includes('tesla')) return 75;
         if (modelLower.includes('quadro')) return 65;
@@ -545,6 +604,49 @@ class HardwareDetector {
         return 0;
     }
+    looksLikeRealGPUModel(model) {
+        if (!model) return false;
+        const modelLower = model.toLowerCase();
+        const gpuMarkers = [
+            'nvidia', 'geforce', 'rtx', 'gtx', 'tesla', 'quadro',
+            'amd', 'radeon', 'rx ', 'instinct',
+            'intel', 'arc', 'iris', 'uhd',
+            'apple', 'm1', 'm2', 'm3', 'm4',
+            'gb10', 'blackwell'
+        ];
+        return gpuMarkers.some(marker => modelLower.includes(marker));
+    }
+    inferVendorFromGPUModel(model, fallback = 'Unknown') {
+        if (!model) return fallback;
+        const modelLower = model.toLowerCase();
+        if (modelLower.includes('nvidia') || modelLower.includes('geforce') ||
+            modelLower.includes('rtx') || modelLower.includes('gtx') ||
+            modelLower.includes('tesla') || modelLower.includes('quadro') ||
+            modelLower.includes('gb10') || modelLower.includes('blackwell')) {
+            return 'NVIDIA';
+        }
+        if (modelLower.includes('amd') || modelLower.includes('radeon') || modelLower.includes('instinct')) {
+            return 'AMD';
+        }
+        if (modelLower.includes('intel') || modelLower.includes('arc') ||
+            modelLower.includes('iris') || modelLower.includes('uhd')) {
+            return 'Intel';
+        }
+        if (modelLower.includes('apple') || modelLower.includes('m1') ||
+            modelLower.includes('m2') || modelLower.includes('m3') || modelLower.includes('m4')) {
+            return 'Apple';
+        }
+        return fallback;
+    }
     async runQuickBenchmark() {
         const start = process.hrtime.bigint();
@@ -576,4 +678,4 @@ class HardwareDetector {
 }
-module.exports = HardwareDetector;
+module.exports = HardwareDetector;

package/src/hardware/specs.js CHANGED Viewed

@@ -71,6 +71,13 @@ class HardwareSpecs {
             'NVIDIA GeForce RTX 3060 Ti': { score: 75, vram: 8, tdp: 200, dedicated: true },
             'NVIDIA GeForce RTX 3060': { score: 70, vram: 12, tdp: 170, dedicated: true },
+            // NVIDIA Data Center / Workstation
+            'NVIDIA H100': { score: 100, vram: 80, tdp: 700, dedicated: true },
+            'NVIDIA A100': { score: 94, vram: 80, tdp: 400, dedicated: true },
+            'NVIDIA Tesla P100': { score: 74, vram: 16, tdp: 250, dedicated: true },
+            'NVIDIA GB10 Grace Blackwell': { score: 96, vram: 96, tdp: 140, dedicated: true },
+            'NVIDIA DGX Spark (GB10)': { score: 96, vram: 96, tdp: 140, dedicated: true },
             // AMD RX 7000 Series
             'AMD Radeon RX 7900 XTX': { score: 92, vram: 24, tdp: 355, dedicated: true },
             'AMD Radeon RX 7900 XT': { score: 88, vram: 20, tdp: 300, dedicated: true },
@@ -283,4 +290,4 @@ class HardwareSpecs {
     }
 }
-module.exports = HardwareSpecs;
+module.exports = HardwareSpecs;

package/src/index.js CHANGED Viewed

@@ -8,6 +8,12 @@ const OllamaClient = require('./ollama/client');
 const { getLogger } = require('./utils/logger');
 const { getOllamaModelsIntegration, OllamaNativeScraper } = require('./ollama/native-scraper');
 const VerboseProgress = require('./utils/verbose-progress');
+const SpeculativeDecodingEstimator = require('./models/speculative-decoding-estimator');
+const {
+    normalizeRuntime,
+    getRuntimePullCommand,
+    getRuntimeRunCommand
+} = require('./runtime/runtime-support');
 class LLMChecker {
     constructor(options = {}) {
@@ -17,6 +23,7 @@ class LLMChecker {
         this.ollamaScraper = new OllamaNativeScraper();
         this.compatibilityAnalyzer = new CompatibilityAnalyzer();
         this.performanceAnalyzer = new PerformanceAnalyzer();
+        this.speculativeDecodingEstimator = new SpeculativeDecodingEstimator();
         this.ollamaClient = new OllamaClient();
         this.logger = getLogger().createChild('LLMChecker');
         this.verbose = options.verbose !== false; // Default to verbose unless explicitly disabled
@@ -286,7 +293,7 @@ class LLMChecker {
         if (platform === 'apple_silicon') {
             return await this.analyzeWithAppleSiliconHeuristics(hardware, staticModels, ollamaIntegration, options);
         } else {
-            return await this.analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration);
+            return await this.analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration, options);
         }
     }
@@ -367,7 +374,12 @@ class LLMChecker {
             }))
         };
-        return mappedResults;
+        return this.attachSpeculativeDecodingEstimates(
+            mappedResults,
+            [...mappedResults.compatible, ...mappedResults.marginal],
+            hardware,
+            options.runtime
+        );
     }
     async integrateOllamaModels(hardware, availableModels) {
@@ -473,7 +485,7 @@ class LLMChecker {
         return integration;
     }
-    async analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration) {
+    async analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration, options = {}) {
         this.logger.info('Using mathematical heuristics combining database + local models');
         try {
@@ -594,8 +606,13 @@ class LLMChecker {
             });
             this.logger.info(`Mathematical heuristic results: ${compatibility.compatible.length} compatible, ${compatibility.marginal.length} marginal, ${compatibility.incompatible.length} incompatible`);
-            return compatibility;
+            return this.attachSpeculativeDecodingEstimates(
+                compatibility,
+                allUniqueModels,
+                hardware,
+                options.runtime
+            );
         } catch (error) {
             this.logger.error('Mathematical heuristic analysis failed, using fallback', { error: error.message });
@@ -715,6 +732,7 @@ class LLMChecker {
         return {
             ...existingModel,
             ollamaId: ollamaModel.model_identifier,
+            frameworks: Array.from(new Set([...(existingModel.frameworks || []), 'ollama', 'vllm', 'mlx'])),
             pulls: ollamaModel.pulls,
             lastUpdated: ollamaModel.last_updated,
             description: ollamaModel.description || existingModel.description,
@@ -726,7 +744,7 @@ class LLMChecker {
             },
             installation: {
                 ...existingModel.installation,
-                ollama: `ollama pull ${ollamaModel.model_identifier}`
+                ...this.createRuntimeInstallationCommands(ollamaModel.model_identifier, ollamaModel.model_name || existingModel.name)
             }
         };
     }
@@ -786,7 +804,7 @@ class LLMChecker {
             type: 'local',
             category: category,
             specialization: specialization,
-            frameworks: ['ollama'],
+            frameworks: ['ollama', 'vllm', 'mlx'],
             requirements: {
                 ram: Math.ceil(sizeNum * 0.6) || 2,
                 vram: Math.ceil(sizeNum * 0.4) || 0,
@@ -794,7 +812,7 @@ class LLMChecker {
                 storage: realStorageSize || Math.ceil(sizeNum * 0.7) || 1
             },
             installation: {
-                ollama: `ollama pull ${ollamaModel.model_identifier}`,
+                ...this.createRuntimeInstallationCommands(ollamaModel.model_identifier, ollamaModel.model_name),
                 description: ollamaModel.description || 'Available in Ollama library'
             },
             description: ollamaModel.description || `${ollamaModel.model_name} from Ollama`,
@@ -919,7 +937,7 @@ class LLMChecker {
             type: 'local',
             category: category,
             specialization: specialization,
-            frameworks: ['ollama'],
+            frameworks: ['ollama', 'vllm', 'mlx'],
             requirements: {
                 ram: Math.ceil((parseFloat(size) || 4) * 0.6),
                 vram: Math.ceil((parseFloat(size) || 4) * 0.4),
@@ -927,7 +945,7 @@ class LLMChecker {
                 storage: Math.ceil((parseFloat(size) || 4) * 0.7)
             },
             installation: {
-                ollama: `ollama pull ${cloudModel.model_identifier}`,
+                ...this.createRuntimeInstallationCommands(cloudModel.model_identifier, cloudModel.model_name),
                 description: cloudModel.description || 'Model from Ollama library'
             },
             year: 2024,
@@ -970,6 +988,54 @@ class LLMChecker {
         });
     }
+    createRuntimeInstallationCommands(modelIdentifier, modelName) {
+        const identifier = String(modelIdentifier || modelName || 'model').trim();
+        const runtimeModel = {
+            model_identifier: identifier,
+            ollamaId: identifier,
+            name: modelName || identifier
+        };
+        return {
+            ollama: `ollama pull ${identifier}`,
+            vllm: getRuntimeRunCommand(runtimeModel, 'vllm'),
+            vllmPull: getRuntimePullCommand(runtimeModel, 'vllm'),
+            mlx: getRuntimeRunCommand(runtimeModel, 'mlx'),
+            mlxPull: getRuntimePullCommand(runtimeModel, 'mlx')
+        };
+    }
+    attachSpeculativeDecodingEstimates(resultGroups, candidates, hardware, runtime = 'ollama') {
+        const selectedRuntime = normalizeRuntime(runtime);
+        const candidatePool = Array.isArray(candidates) ? candidates : [];
+        const withEstimate = (items = []) =>
+            items.map((model) => {
+                const estimate = this.speculativeDecodingEstimator.estimate({
+                    model,
+                    candidates: candidatePool,
+                    hardware,
+                    runtime: selectedRuntime
+                });
+                if (!estimate) {
+                    return model;
+                }
+                return {
+                    ...model,
+                    speculativeDecoding: estimate
+                };
+            });
+        return {
+            ...resultGroups,
+            compatible: withEstimate(resultGroups.compatible),
+            marginal: withEstimate(resultGroups.marginal),
+            incompatible: withEstimate(resultGroups.incompatible)
+        };
+    }
     async generateOllamaRecommendations(hardware, availableModels, installedModels) {
         const recommendations = [];
         const installedNames = new Set(installedModels.map(m => m.name.toLowerCase()));
@@ -2286,4 +2352,4 @@ class LLMChecker {
 }
-module.exports = LLMChecker;
+module.exports = LLMChecker;

package/src/models/expanded_database.js CHANGED Viewed

@@ -999,10 +999,16 @@ class ExpandedModelsDatabase {
         } else if (hasDedicatedGPU) {
             // Dedicated GPU - much better performance
             let gpuTPS = 30;
-            if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 65;
+            if (gpuModel.toLowerCase().includes('gb10') ||
+                gpuModel.toLowerCase().includes('grace blackwell') ||
+                gpuModel.toLowerCase().includes('dgx spark')) gpuTPS = 90;
+            else if (gpuModel.toLowerCase().includes('h100')) gpuTPS = 120;
+            else if (gpuModel.toLowerCase().includes('a100')) gpuTPS = 95;
+            else if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 65;
             else if (gpuModel.toLowerCase().includes('rtx 40')) gpuTPS = 50;
             else if (gpuModel.toLowerCase().includes('rtx 30')) gpuTPS = 40;
             else if (gpuModel.toLowerCase().includes('rtx 20')) gpuTPS = 30;
+            else if (gpuModel.toLowerCase().includes('p100')) gpuTPS = 32;
             else if (vramGB >= 16) gpuTPS = 45;
             else if (vramGB >= 8) gpuTPS = 35;
             else if (vramGB >= 4) gpuTPS = 25;
@@ -1139,4 +1145,4 @@ class ExpandedModelsDatabase {
     }
 }
-module.exports = ExpandedModelsDatabase;
+module.exports = ExpandedModelsDatabase;

package/src/models/scoring-engine.js CHANGED Viewed

@@ -170,6 +170,7 @@ class ScoringEngine {
             // NVIDIA - based on real llama.cpp/Ollama benchmarks
             'cuda_h100': 120,    // ~100-140 TPS for 7B Q4
             'cuda_a100': 90,     // ~80-100 TPS for 7B Q4
+            'cuda_gb10': 95,     // Grace Blackwell / DGX Spark class
             'cuda_4090': 70,     // ~60-80 TPS for 7B Q4
             'cuda_4080': 55,     // ~50-60 TPS for 7B Q4
             'cuda_3090': 50,     // ~45-55 TPS for 7B Q4
@@ -177,6 +178,7 @@ class ScoringEngine {
             'cuda_3070': 32,     // ~28-35 TPS for 7B Q4
             'cuda_3060': 25,     // ~20-28 TPS for 7B Q4
             'cuda_2080': 28,     // ~25-30 TPS for 7B Q4
+            'cuda_p100': 30,     // Tesla P100 class
             'cuda_default': 30,
             // AMD - slightly lower than equivalent NVIDIA
@@ -518,8 +520,10 @@ class ScoringEngine {
         const gpuModel = (hardware.summary.gpuModel || '').toLowerCase();
         if (backend === 'cuda') {
+            if (gpuModel.includes('gb10') || gpuModel.includes('grace blackwell') || gpuModel.includes('dgx spark')) return 'cuda_gb10';
             if (gpuModel.includes('h100')) return 'cuda_h100';
             if (gpuModel.includes('a100')) return 'cuda_a100';
+            if (gpuModel.includes('p100')) return 'cuda_p100';
             if (gpuModel.includes('4090')) return 'cuda_4090';
             if (gpuModel.includes('4080')) return 'cuda_4080';
             if (gpuModel.includes('3090')) return 'cuda_3090';