npm - llm-checker - Versions diffs - 3.1.0 - Mend

llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/LICENSE +21 -0
package/README.md +418 -0
package/analyzer/compatibility.js +584 -0
package/analyzer/performance.js +505 -0
package/bin/CLAUDE.md +12 -0
package/bin/enhanced_cli.js +3118 -0
package/bin/test-deterministic.js +41 -0
package/package.json +96 -0
package/src/CLAUDE.md +12 -0
package/src/ai/intelligent-selector.js +615 -0
package/src/ai/model-selector.js +312 -0
package/src/ai/multi-objective-selector.js +820 -0
package/src/commands/check.js +58 -0
package/src/data/CLAUDE.md +11 -0
package/src/data/model-database.js +637 -0
package/src/data/sync-manager.js +279 -0
package/src/hardware/CLAUDE.md +12 -0
package/src/hardware/backends/CLAUDE.md +11 -0
package/src/hardware/backends/apple-silicon.js +318 -0
package/src/hardware/backends/cpu-detector.js +490 -0
package/src/hardware/backends/cuda-detector.js +417 -0
package/src/hardware/backends/intel-detector.js +436 -0
package/src/hardware/backends/rocm-detector.js +440 -0
package/src/hardware/detector.js +573 -0
package/src/hardware/pc-optimizer.js +635 -0
package/src/hardware/specs.js +286 -0
package/src/hardware/unified-detector.js +442 -0
package/src/index.js +2289 -0
package/src/models/CLAUDE.md +17 -0
package/src/models/ai-check-selector.js +806 -0
package/src/models/catalog.json +426 -0
package/src/models/deterministic-selector.js +1145 -0
package/src/models/expanded_database.js +1142 -0
package/src/models/intelligent-selector.js +532 -0
package/src/models/requirements.js +310 -0
package/src/models/scoring-config.js +57 -0
package/src/models/scoring-engine.js +715 -0
package/src/ollama/.cache/README.md +33 -0
package/src/ollama/CLAUDE.md +24 -0
package/src/ollama/client.js +438 -0
package/src/ollama/enhanced-client.js +113 -0
package/src/ollama/enhanced-scraper.js +634 -0
package/src/ollama/manager.js +357 -0
package/src/ollama/native-scraper.js +776 -0
package/src/plugins/CLAUDE.md +11 -0
package/src/plugins/examples/custom_model_plugin.js +87 -0
package/src/plugins/index.js +295 -0
package/src/utils/CLAUDE.md +11 -0
package/src/utils/config.js +359 -0
package/src/utils/formatter.js +315 -0
package/src/utils/logger.js +272 -0
package/src/utils/model-classifier.js +167 -0
package/src/utils/verbose-progress.js +266 -0

package/src/models/requirements.js ADDED Viewed

@@ -0,0 +1,310 @@
+class RequirementsCalculator {
+    constructor() {
+        this.baseRequirements = this.initializeBaseRequirements();
+        this.quantizationMultipliers = this.initializeQuantizationMultipliers();
+        this.frameworkOverheads = this.initializeFrameworkOverheads();
+    }
+    initializeBaseRequirements() {
+        return {
+            // Base requirements per billion parameters
+            ramPerBillion: 2.0,    // GB RAM per billion parameters (FP16)
+            vramPerBillion: 1.5,   // GB VRAM per billion parameters
+            cpuCoresBase: 2,       // Minimum CPU cores
+            storageMultiplier: 1.1, // Storage overhead factor
+            // Context window impact
+            contextImpact: {
+                '2K': 1.0,
+                '4K': 1.1,
+                '8K': 1.2,
+                '16K': 1.4,
+                '32K': 1.6,
+                '64K': 1.8,
+                '128K': 2.0,
+                '200K': 2.5
+            },
+            // Model architecture impact
+            architectureMultipliers: {
+                'transformer': 1.0,
+                'mixture_of_experts': 0.7, // More efficient due to sparse activation
+                'state_space': 0.8,
+                'retrieval_augmented': 1.3
+            }
+        };
+    }
+    initializeQuantizationMultipliers() {
+        return {
+            'FP32': { ram: 1.0, vram: 1.0, quality: 1.0, speed: 0.8 },
+            'FP16': { ram: 0.5, vram: 0.5, quality: 0.99, speed: 1.0 },
+            'BF16': { ram: 0.5, vram: 0.5, quality: 0.995, speed: 1.0 },
+            'INT8': { ram: 0.25, vram: 0.25, quality: 0.95, speed: 1.2 },
+            'Q8_0': { ram: 0.25, vram: 0.25, quality: 0.97, speed: 1.1 },
+            'Q6_K': { ram: 0.19, vram: 0.19, quality: 0.94, speed: 1.15 },
+            'Q5_K_M': { ram: 0.16, vram: 0.16, quality: 0.92, speed: 1.2 },
+            'Q5_0': { ram: 0.16, vram: 0.16, quality: 0.90, speed: 1.2 },
+            'Q4_K_M': { ram: 0.125, vram: 0.125, quality: 0.88, speed: 1.3 },
+            'Q4_0': { ram: 0.125, vram: 0.125, quality: 0.85, speed: 1.3 },
+            'Q3_K_M': { ram: 0.09, vram: 0.09, quality: 0.80, speed: 1.4 },
+            'Q2_K': { ram: 0.06, vram: 0.06, quality: 0.70, speed: 1.5 }
+        };
+    }
+    initializeFrameworkOverheads() {
+        return {
+            'ollama': { ram: 0.5, vram: 0.2, cpu: 0.1 },
+            'llama.cpp': { ram: 0.3, vram: 0.1, cpu: 0.05 },
+            'transformers': { ram: 1.0, vram: 0.5, cpu: 0.2 },
+            'vllm': { ram: 0.8, vram: 0.3, cpu: 0.15 },
+            'mlx': { ram: 0.4, vram: 0, cpu: 0.1 }, // Apple Silicon unified memory
+            'tensorrt': { ram: 0.6, vram: 0.4, cpu: 0.1 }
+        };
+    }
+    calculateModelRequirements(modelConfig) {
+        const {
+            name,
+            size,
+            architecture = 'transformer',
+            contextLength = 4096,
+            quantization = 'FP16',
+            framework = 'ollama',
+            specialization
+        } = modelConfig;
+        // Parse model size
+        const sizeInBillions = this.parseModelSize(size);
+        // Get base requirements
+        let baseRAM = sizeInBillions * this.baseRequirements.ramPerBillion;
+        let baseVRAM = sizeInBillions * this.baseRequirements.vramPerBillion;
+        let baseCPUCores = Math.max(
+            this.baseRequirements.cpuCoresBase,
+            Math.ceil(sizeInBillions / 2)
+        );
+        let baseStorage = sizeInBillions * this.baseRequirements.storageMultiplier;
+        // Apply architecture multiplier
+        const archMultiplier = this.baseRequirements.architectureMultipliers[architecture] || 1.0;
+        baseRAM *= archMultiplier;
+        baseVRAM *= archMultiplier;
+        // Apply context length impact
+        const contextMultiplier = this.getContextMultiplier(contextLength);
+        baseRAM *= contextMultiplier;
+        // Apply quantization
+        const quantMultiplier = this.quantizationMultipliers[quantization] || this.quantizationMultipliers['FP16'];
+        baseRAM *= quantMultiplier.ram;
+        baseVRAM *= quantMultiplier.vram;
+        // Apply framework overhead
+        const frameworkOverhead = this.frameworkOverheads[framework] || this.frameworkOverheads['ollama'];
+        baseRAM += frameworkOverhead.ram;
+        baseVRAM += frameworkOverhead.vram;
+        baseCPUCores = Math.ceil(baseCPUCores * (1 + frameworkOverhead.cpu));
+        // Specialization adjustments
+        if (specialization === 'multimodal') {
+            baseRAM *= 1.3;
+            baseVRAM *= 1.5;
+            baseStorage *= 1.2;
+        } else if (specialization === 'code') {
+            baseRAM *= 1.1;
+            baseCPUCores += 1;
+        }
+        // Round to reasonable values
+        return {
+            ram: Math.ceil(baseRAM),
+            vram: Math.ceil(baseVRAM),
+            cpu_cores: baseCPUCores,
+            storage: Math.ceil(baseStorage),
+            recommended_ram: Math.ceil(baseRAM * 1.5),
+            recommended_vram: Math.ceil(baseVRAM * 1.3),
+            quantization,
+            framework,
+            performance: {
+                estimatedSpeed: this.estimateInferenceSpeed(sizeInBillions, quantization),
+                qualityImpact: quantMultiplier.quality
+            }
+        };
+    }
+    parseModelSize(sizeString) {
+        const normalized = sizeString.toLowerCase().replace(/[^0-9.kmb]/g, '');
+        if (normalized.includes('k')) {
+            return parseFloat(normalized.replace('k', '')) / 1000;
+        } else if (normalized.includes('m')) {
+            return parseFloat(normalized.replace('m', '')) / 1000;
+        } else if (normalized.includes('b')) {
+            return parseFloat(normalized.replace('b', ''));
+        } else {
+            return parseFloat(normalized);
+        }
+    }
+    getContextMultiplier(contextLength) {
+        if (contextLength >= 200000) return this.baseRequirements.contextImpact['200K'];
+        if (contextLength >= 128000) return this.baseRequirements.contextImpact['128K'];
+        if (contextLength >= 64000) return this.baseRequirements.contextImpact['64K'];
+        if (contextLength >= 32000) return this.baseRequirements.contextImpact['32K'];
+        if (contextLength >= 16000) return this.baseRequirements.contextImpact['16K'];
+        if (contextLength >= 8000) return this.baseRequirements.contextImpact['8K'];
+        if (contextLength >= 4000) return this.baseRequirements.contextImpact['4K'];
+        return this.baseRequirements.contextImpact['2K'];
+    }
+    estimateInferenceSpeed(sizeInBillions, quantization) {
+        // Base tokens per second for different model sizes
+        let baseSpeed = 100 / Math.sqrt(sizeInBillions); // Rough approximation
+        // Apply quantization speed multiplier
+        const quantMultiplier = this.quantizationMultipliers[quantization] || this.quantizationMultipliers['FP16'];
+        baseSpeed *= quantMultiplier.speed;
+        return {
+            cpuOnly: Math.round(baseSpeed * 0.3),
+            withGPU: Math.round(baseSpeed),
+            optimized: Math.round(baseSpeed * 1.5)
+        };
+    }
+    getOptimalQuantization(hardware, targetModel) {
+        const { memory, gpu } = hardware;
+        const modelRequirements = this.calculateModelRequirements(targetModel);
+        // Try different quantization levels from highest to lowest quality
+        const quantizationLevels = ['Q8_0', 'Q6_K', 'Q5_K_M', 'Q4_K_M', 'Q4_0', 'Q3_K_M', 'Q2_K'];
+        for (const quant of quantizationLevels) {
+            const requirements = this.calculateModelRequirements({
+                ...targetModel,
+                quantization: quant
+            });
+            if (requirements.ram <= memory.total && requirements.vram <= gpu.vram) {
+                return {
+                    quantization: quant,
+                    requirements,
+                    qualityImpact: this.quantizationMultipliers[quant].quality,
+                    fitsInMemory: true
+                };
+            }
+        }
+        return {
+            quantization: 'Q2_K',
+            requirements: this.calculateModelRequirements({
+                ...targetModel,
+                quantization: 'Q2_K'
+            }),
+            qualityImpact: this.quantizationMultipliers['Q2_K'].quality,
+            fitsInMemory: false
+        };
+    }
+    calculateBatchRequirements(models, hardware) {
+        // Calculate requirements for running multiple models
+        let totalRAM = 0;
+        let totalVRAM = 0;
+        let maxCPUCores = 0;
+        let totalStorage = 0;
+        const modelsWithRequirements = models.map(model => {
+            const requirements = this.calculateModelRequirements(model);
+            totalRAM += requirements.ram;
+            totalVRAM += requirements.vram;
+            maxCPUCores = Math.max(maxCPUCores, requirements.cpu_cores);
+            totalStorage += requirements.storage;
+            return {
+                ...model,
+                requirements
+            };
+        });
+        return {
+            models: modelsWithRequirements,
+            total: {
+                ram: totalRAM,
+                vram: totalVRAM,
+                cpu_cores: maxCPUCores,
+                storage: totalStorage
+            },
+            canRunAll: totalRAM <= hardware.memory.total &&
+                totalVRAM <= hardware.gpu.vram &&
+                maxCPUCores <= hardware.cpu.cores,
+            recommendations: this.generateBatchRecommendations(modelsWithRequirements, hardware)
+        };
+    }
+    generateBatchRecommendations(models, hardware) {
+        const recommendations = [];
+        // Check if models can run simultaneously
+        const totalRAM = models.reduce((sum, m) => sum + m.requirements.ram, 0);
+        if (totalRAM > hardware.memory.total) {
+            recommendations.push('Models cannot run simultaneously - consider model swapping');
+            recommendations.push('Use ollama for automatic model management');
+        }
+        // Suggest optimization strategies
+        if (models.length > 2) {
+            recommendations.push('Consider using smaller variants for background models');
+        }
+        // Framework recommendations
+        const hasLargeModels = models.some(m => parseFloat(m.size) > 10);
+        if (hasLargeModels) {
+            recommendations.push('Use vLLM for efficient batched inference');
+        }
+        return recommendations;
+    }
+    estimateLoadTime(model, hardware) {
+        const sizeGB = this.parseModelSize(model.size);
+        const { memory, gpu, cpu } = hardware;
+        // Base load time factors
+        let loadTimeSeconds = sizeGB * 2; // 2 seconds per GB baseline
+        // Storage speed impact (assuming SSD)
+        if (hardware.storage?.type === 'nvme') {
+            loadTimeSeconds *= 0.5;
+        } else if (hardware.storage?.type === 'ssd') {
+            loadTimeSeconds *= 0.7;
+        } else {
+            loadTimeSeconds *= 1.5; // HDD penalty
+        }
+        // CPU impact
+        if (cpu.cores >= 8) {
+            loadTimeSeconds *= 0.8;
+        } else if (cpu.cores <= 4) {
+            loadTimeSeconds *= 1.2;
+        }
+        // GPU loading
+        if (gpu.dedicated && gpu.vram >= sizeGB) {
+            loadTimeSeconds *= 1.3; // GPU transfer overhead
+        }
+        return {
+            estimated: Math.round(loadTimeSeconds),
+            factors: {
+                modelSize: sizeGB,
+                storageType: hardware.storage?.type || 'unknown',
+                cpuCores: cpu.cores,
+                gpuTransfer: gpu.dedicated && gpu.vram >= sizeGB
+            }
+        };
+    }
+}
+module.exports = RequirementsCalculator;

package/src/models/scoring-config.js ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * Centralized Scoring Weight Configuration
+ *
+ * Three scoring systems exist with different weights because they serve
+ * different purposes:
+ *
+ * DETERMINISTIC_WEIGHTS - Used by the primary recommendation engine
+ *   (deterministic-selector.js). Weights are per-category arrays [Q, S, F, C]
+ *   where Q=quality, S=speed, F=fit, C=context.
+ *
+ * MULTI_OBJECTIVE_WEIGHTS - Used by multi-objective-selector.js for
+ *   hardware-aware selection. Uses 5 factors: quality, speed, ttfb, context,
+ *   hardwareMatch. Emphasizes hardware fit more heavily.
+ *
+ * SCORING_ENGINE_WEIGHTS - Used by scoring-engine.js (powers smart-recommend
+ *   and search commands). Uses {Q, S, F, C} objects with additional presets
+ *   for specialized use cases like "fast" and "quality" modes.
+ */
+// deterministic-selector.js category weights [Q, S, F, C]
+const DETERMINISTIC_WEIGHTS = {
+    general:       [0.45, 0.35, 0.15, 0.05],
+    coding:        [0.55, 0.20, 0.15, 0.10],
+    reasoning:     [0.60, 0.10, 0.20, 0.10],
+    multimodal:    [0.50, 0.15, 0.20, 0.15],
+    summarization: [0.40, 0.35, 0.15, 0.10],
+    reading:       [0.40, 0.35, 0.15, 0.10],
+    embeddings:    [0.30, 0.50, 0.20, 0.00]
+};
+// multi-objective-selector.js category weights {quality, speed, ttfb, context, hardwareMatch}
+const MULTI_OBJECTIVE_WEIGHTS = {
+    general:    { quality: 0.45, speed: 0.15, ttfb: 0.05, context: 0.05, hardwareMatch: 0.30 },
+    coding:     { quality: 0.45, speed: 0.15, ttfb: 0.05, context: 0.10, hardwareMatch: 0.25 },
+    reasoning:  { quality: 0.50, speed: 0.10, ttfb: 0.05, context: 0.15, hardwareMatch: 0.20 },
+    multimodal: { quality: 0.40, speed: 0.10, ttfb: 0.05, context: 0.10, hardwareMatch: 0.35 },
+    longctx:    { quality: 0.30, speed: 0.10, ttfb: 0.05, context: 0.35, hardwareMatch: 0.20 }
+};
+// scoring-engine.js weight presets {Q, S, F, C}
+const SCORING_ENGINE_WEIGHTS = {
+    general:    { Q: 0.40, S: 0.35, F: 0.15, C: 0.10 },
+    coding:     { Q: 0.55, S: 0.20, F: 0.15, C: 0.10 },
+    reasoning:  { Q: 0.60, S: 0.15, F: 0.10, C: 0.15 },
+    chat:       { Q: 0.40, S: 0.40, F: 0.15, C: 0.05 },
+    creative:   { Q: 0.50, S: 0.25, F: 0.15, C: 0.10 },
+    embeddings: { Q: 0.30, S: 0.50, F: 0.15, C: 0.05 },
+    vision:     { Q: 0.50, S: 0.25, F: 0.15, C: 0.10 },
+    fast:       { Q: 0.25, S: 0.55, F: 0.15, C: 0.05 },
+    quality:    { Q: 0.65, S: 0.10, F: 0.15, C: 0.10 }
+};
+module.exports = {
+    DETERMINISTIC_WEIGHTS,
+    MULTI_OBJECTIVE_WEIGHTS,
+    SCORING_ENGINE_WEIGHTS
+};