llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,310 @@
1
+ class RequirementsCalculator {
2
+ constructor() {
3
+ this.baseRequirements = this.initializeBaseRequirements();
4
+ this.quantizationMultipliers = this.initializeQuantizationMultipliers();
5
+ this.frameworkOverheads = this.initializeFrameworkOverheads();
6
+ }
7
+
8
+ initializeBaseRequirements() {
9
+ return {
10
+ // Base requirements per billion parameters
11
+ ramPerBillion: 2.0, // GB RAM per billion parameters (FP16)
12
+ vramPerBillion: 1.5, // GB VRAM per billion parameters
13
+ cpuCoresBase: 2, // Minimum CPU cores
14
+ storageMultiplier: 1.1, // Storage overhead factor
15
+
16
+ // Context window impact
17
+ contextImpact: {
18
+ '2K': 1.0,
19
+ '4K': 1.1,
20
+ '8K': 1.2,
21
+ '16K': 1.4,
22
+ '32K': 1.6,
23
+ '64K': 1.8,
24
+ '128K': 2.0,
25
+ '200K': 2.5
26
+ },
27
+
28
+ // Model architecture impact
29
+ architectureMultipliers: {
30
+ 'transformer': 1.0,
31
+ 'mixture_of_experts': 0.7, // More efficient due to sparse activation
32
+ 'state_space': 0.8,
33
+ 'retrieval_augmented': 1.3
34
+ }
35
+ };
36
+ }
37
+
38
+ initializeQuantizationMultipliers() {
39
+ return {
40
+ 'FP32': { ram: 1.0, vram: 1.0, quality: 1.0, speed: 0.8 },
41
+ 'FP16': { ram: 0.5, vram: 0.5, quality: 0.99, speed: 1.0 },
42
+ 'BF16': { ram: 0.5, vram: 0.5, quality: 0.995, speed: 1.0 },
43
+ 'INT8': { ram: 0.25, vram: 0.25, quality: 0.95, speed: 1.2 },
44
+ 'Q8_0': { ram: 0.25, vram: 0.25, quality: 0.97, speed: 1.1 },
45
+ 'Q6_K': { ram: 0.19, vram: 0.19, quality: 0.94, speed: 1.15 },
46
+ 'Q5_K_M': { ram: 0.16, vram: 0.16, quality: 0.92, speed: 1.2 },
47
+ 'Q5_0': { ram: 0.16, vram: 0.16, quality: 0.90, speed: 1.2 },
48
+ 'Q4_K_M': { ram: 0.125, vram: 0.125, quality: 0.88, speed: 1.3 },
49
+ 'Q4_0': { ram: 0.125, vram: 0.125, quality: 0.85, speed: 1.3 },
50
+ 'Q3_K_M': { ram: 0.09, vram: 0.09, quality: 0.80, speed: 1.4 },
51
+ 'Q2_K': { ram: 0.06, vram: 0.06, quality: 0.70, speed: 1.5 }
52
+ };
53
+ }
54
+
55
+ initializeFrameworkOverheads() {
56
+ return {
57
+ 'ollama': { ram: 0.5, vram: 0.2, cpu: 0.1 },
58
+ 'llama.cpp': { ram: 0.3, vram: 0.1, cpu: 0.05 },
59
+ 'transformers': { ram: 1.0, vram: 0.5, cpu: 0.2 },
60
+ 'vllm': { ram: 0.8, vram: 0.3, cpu: 0.15 },
61
+ 'mlx': { ram: 0.4, vram: 0, cpu: 0.1 }, // Apple Silicon unified memory
62
+ 'tensorrt': { ram: 0.6, vram: 0.4, cpu: 0.1 }
63
+ };
64
+ }
65
+
66
+ calculateModelRequirements(modelConfig) {
67
+ const {
68
+ name,
69
+ size,
70
+ architecture = 'transformer',
71
+ contextLength = 4096,
72
+ quantization = 'FP16',
73
+ framework = 'ollama',
74
+ specialization
75
+ } = modelConfig;
76
+
77
+ // Parse model size
78
+ const sizeInBillions = this.parseModelSize(size);
79
+
80
+ // Get base requirements
81
+ let baseRAM = sizeInBillions * this.baseRequirements.ramPerBillion;
82
+ let baseVRAM = sizeInBillions * this.baseRequirements.vramPerBillion;
83
+ let baseCPUCores = Math.max(
84
+ this.baseRequirements.cpuCoresBase,
85
+ Math.ceil(sizeInBillions / 2)
86
+ );
87
+ let baseStorage = sizeInBillions * this.baseRequirements.storageMultiplier;
88
+
89
+ // Apply architecture multiplier
90
+ const archMultiplier = this.baseRequirements.architectureMultipliers[architecture] || 1.0;
91
+ baseRAM *= archMultiplier;
92
+ baseVRAM *= archMultiplier;
93
+
94
+ // Apply context length impact
95
+ const contextMultiplier = this.getContextMultiplier(contextLength);
96
+ baseRAM *= contextMultiplier;
97
+
98
+ // Apply quantization
99
+ const quantMultiplier = this.quantizationMultipliers[quantization] || this.quantizationMultipliers['FP16'];
100
+ baseRAM *= quantMultiplier.ram;
101
+ baseVRAM *= quantMultiplier.vram;
102
+
103
+ // Apply framework overhead
104
+ const frameworkOverhead = this.frameworkOverheads[framework] || this.frameworkOverheads['ollama'];
105
+ baseRAM += frameworkOverhead.ram;
106
+ baseVRAM += frameworkOverhead.vram;
107
+ baseCPUCores = Math.ceil(baseCPUCores * (1 + frameworkOverhead.cpu));
108
+
109
+ // Specialization adjustments
110
+ if (specialization === 'multimodal') {
111
+ baseRAM *= 1.3;
112
+ baseVRAM *= 1.5;
113
+ baseStorage *= 1.2;
114
+ } else if (specialization === 'code') {
115
+ baseRAM *= 1.1;
116
+ baseCPUCores += 1;
117
+ }
118
+
119
+ // Round to reasonable values
120
+ return {
121
+ ram: Math.ceil(baseRAM),
122
+ vram: Math.ceil(baseVRAM),
123
+ cpu_cores: baseCPUCores,
124
+ storage: Math.ceil(baseStorage),
125
+ recommended_ram: Math.ceil(baseRAM * 1.5),
126
+ recommended_vram: Math.ceil(baseVRAM * 1.3),
127
+ quantization,
128
+ framework,
129
+ performance: {
130
+ estimatedSpeed: this.estimateInferenceSpeed(sizeInBillions, quantization),
131
+ qualityImpact: quantMultiplier.quality
132
+ }
133
+ };
134
+ }
135
+
136
+ parseModelSize(sizeString) {
137
+ const normalized = sizeString.toLowerCase().replace(/[^0-9.kmb]/g, '');
138
+
139
+ if (normalized.includes('k')) {
140
+ return parseFloat(normalized.replace('k', '')) / 1000;
141
+ } else if (normalized.includes('m')) {
142
+ return parseFloat(normalized.replace('m', '')) / 1000;
143
+ } else if (normalized.includes('b')) {
144
+ return parseFloat(normalized.replace('b', ''));
145
+ } else {
146
+ return parseFloat(normalized);
147
+ }
148
+ }
149
+
150
+ getContextMultiplier(contextLength) {
151
+ if (contextLength >= 200000) return this.baseRequirements.contextImpact['200K'];
152
+ if (contextLength >= 128000) return this.baseRequirements.contextImpact['128K'];
153
+ if (contextLength >= 64000) return this.baseRequirements.contextImpact['64K'];
154
+ if (contextLength >= 32000) return this.baseRequirements.contextImpact['32K'];
155
+ if (contextLength >= 16000) return this.baseRequirements.contextImpact['16K'];
156
+ if (contextLength >= 8000) return this.baseRequirements.contextImpact['8K'];
157
+ if (contextLength >= 4000) return this.baseRequirements.contextImpact['4K'];
158
+ return this.baseRequirements.contextImpact['2K'];
159
+ }
160
+
161
+ estimateInferenceSpeed(sizeInBillions, quantization) {
162
+ // Base tokens per second for different model sizes
163
+ let baseSpeed = 100 / Math.sqrt(sizeInBillions); // Rough approximation
164
+
165
+ // Apply quantization speed multiplier
166
+ const quantMultiplier = this.quantizationMultipliers[quantization] || this.quantizationMultipliers['FP16'];
167
+ baseSpeed *= quantMultiplier.speed;
168
+
169
+ return {
170
+ cpuOnly: Math.round(baseSpeed * 0.3),
171
+ withGPU: Math.round(baseSpeed),
172
+ optimized: Math.round(baseSpeed * 1.5)
173
+ };
174
+ }
175
+
176
+ getOptimalQuantization(hardware, targetModel) {
177
+ const { memory, gpu } = hardware;
178
+ const modelRequirements = this.calculateModelRequirements(targetModel);
179
+
180
+ // Try different quantization levels from highest to lowest quality
181
+ const quantizationLevels = ['Q8_0', 'Q6_K', 'Q5_K_M', 'Q4_K_M', 'Q4_0', 'Q3_K_M', 'Q2_K'];
182
+
183
+ for (const quant of quantizationLevels) {
184
+ const requirements = this.calculateModelRequirements({
185
+ ...targetModel,
186
+ quantization: quant
187
+ });
188
+
189
+ if (requirements.ram <= memory.total && requirements.vram <= gpu.vram) {
190
+ return {
191
+ quantization: quant,
192
+ requirements,
193
+ qualityImpact: this.quantizationMultipliers[quant].quality,
194
+ fitsInMemory: true
195
+ };
196
+ }
197
+ }
198
+
199
+ return {
200
+ quantization: 'Q2_K',
201
+ requirements: this.calculateModelRequirements({
202
+ ...targetModel,
203
+ quantization: 'Q2_K'
204
+ }),
205
+ qualityImpact: this.quantizationMultipliers['Q2_K'].quality,
206
+ fitsInMemory: false
207
+ };
208
+ }
209
+
210
+ calculateBatchRequirements(models, hardware) {
211
+ // Calculate requirements for running multiple models
212
+ let totalRAM = 0;
213
+ let totalVRAM = 0;
214
+ let maxCPUCores = 0;
215
+ let totalStorage = 0;
216
+
217
+ const modelsWithRequirements = models.map(model => {
218
+ const requirements = this.calculateModelRequirements(model);
219
+ totalRAM += requirements.ram;
220
+ totalVRAM += requirements.vram;
221
+ maxCPUCores = Math.max(maxCPUCores, requirements.cpu_cores);
222
+ totalStorage += requirements.storage;
223
+
224
+ return {
225
+ ...model,
226
+ requirements
227
+ };
228
+ });
229
+
230
+ return {
231
+ models: modelsWithRequirements,
232
+ total: {
233
+ ram: totalRAM,
234
+ vram: totalVRAM,
235
+ cpu_cores: maxCPUCores,
236
+ storage: totalStorage
237
+ },
238
+ canRunAll: totalRAM <= hardware.memory.total &&
239
+ totalVRAM <= hardware.gpu.vram &&
240
+ maxCPUCores <= hardware.cpu.cores,
241
+ recommendations: this.generateBatchRecommendations(modelsWithRequirements, hardware)
242
+ };
243
+ }
244
+
245
+ generateBatchRecommendations(models, hardware) {
246
+ const recommendations = [];
247
+
248
+ // Check if models can run simultaneously
249
+ const totalRAM = models.reduce((sum, m) => sum + m.requirements.ram, 0);
250
+
251
+ if (totalRAM > hardware.memory.total) {
252
+ recommendations.push('Models cannot run simultaneously - consider model swapping');
253
+ recommendations.push('Use ollama for automatic model management');
254
+ }
255
+
256
+ // Suggest optimization strategies
257
+ if (models.length > 2) {
258
+ recommendations.push('Consider using smaller variants for background models');
259
+ }
260
+
261
+ // Framework recommendations
262
+ const hasLargeModels = models.some(m => parseFloat(m.size) > 10);
263
+ if (hasLargeModels) {
264
+ recommendations.push('Use vLLM for efficient batched inference');
265
+ }
266
+
267
+ return recommendations;
268
+ }
269
+
270
+ estimateLoadTime(model, hardware) {
271
+ const sizeGB = this.parseModelSize(model.size);
272
+ const { memory, gpu, cpu } = hardware;
273
+
274
+ // Base load time factors
275
+ let loadTimeSeconds = sizeGB * 2; // 2 seconds per GB baseline
276
+
277
+ // Storage speed impact (assuming SSD)
278
+ if (hardware.storage?.type === 'nvme') {
279
+ loadTimeSeconds *= 0.5;
280
+ } else if (hardware.storage?.type === 'ssd') {
281
+ loadTimeSeconds *= 0.7;
282
+ } else {
283
+ loadTimeSeconds *= 1.5; // HDD penalty
284
+ }
285
+
286
+ // CPU impact
287
+ if (cpu.cores >= 8) {
288
+ loadTimeSeconds *= 0.8;
289
+ } else if (cpu.cores <= 4) {
290
+ loadTimeSeconds *= 1.2;
291
+ }
292
+
293
+ // GPU loading
294
+ if (gpu.dedicated && gpu.vram >= sizeGB) {
295
+ loadTimeSeconds *= 1.3; // GPU transfer overhead
296
+ }
297
+
298
+ return {
299
+ estimated: Math.round(loadTimeSeconds),
300
+ factors: {
301
+ modelSize: sizeGB,
302
+ storageType: hardware.storage?.type || 'unknown',
303
+ cpuCores: cpu.cores,
304
+ gpuTransfer: gpu.dedicated && gpu.vram >= sizeGB
305
+ }
306
+ };
307
+ }
308
+ }
309
+
310
+ module.exports = RequirementsCalculator;
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Centralized Scoring Weight Configuration
3
+ *
4
+ * Three scoring systems exist with different weights because they serve
5
+ * different purposes:
6
+ *
7
+ * DETERMINISTIC_WEIGHTS - Used by the primary recommendation engine
8
+ * (deterministic-selector.js). Weights are per-category arrays [Q, S, F, C]
9
+ * where Q=quality, S=speed, F=fit, C=context.
10
+ *
11
+ * MULTI_OBJECTIVE_WEIGHTS - Used by multi-objective-selector.js for
12
+ * hardware-aware selection. Uses 5 factors: quality, speed, ttfb, context,
13
+ * hardwareMatch. Emphasizes hardware fit more heavily.
14
+ *
15
+ * SCORING_ENGINE_WEIGHTS - Used by scoring-engine.js (powers smart-recommend
16
+ * and search commands). Uses {Q, S, F, C} objects with additional presets
17
+ * for specialized use cases like "fast" and "quality" modes.
18
+ */
19
+
20
+ // deterministic-selector.js category weights [Q, S, F, C]
21
+ const DETERMINISTIC_WEIGHTS = {
22
+ general: [0.45, 0.35, 0.15, 0.05],
23
+ coding: [0.55, 0.20, 0.15, 0.10],
24
+ reasoning: [0.60, 0.10, 0.20, 0.10],
25
+ multimodal: [0.50, 0.15, 0.20, 0.15],
26
+ summarization: [0.40, 0.35, 0.15, 0.10],
27
+ reading: [0.40, 0.35, 0.15, 0.10],
28
+ embeddings: [0.30, 0.50, 0.20, 0.00]
29
+ };
30
+
31
+ // multi-objective-selector.js category weights {quality, speed, ttfb, context, hardwareMatch}
32
+ const MULTI_OBJECTIVE_WEIGHTS = {
33
+ general: { quality: 0.45, speed: 0.15, ttfb: 0.05, context: 0.05, hardwareMatch: 0.30 },
34
+ coding: { quality: 0.45, speed: 0.15, ttfb: 0.05, context: 0.10, hardwareMatch: 0.25 },
35
+ reasoning: { quality: 0.50, speed: 0.10, ttfb: 0.05, context: 0.15, hardwareMatch: 0.20 },
36
+ multimodal: { quality: 0.40, speed: 0.10, ttfb: 0.05, context: 0.10, hardwareMatch: 0.35 },
37
+ longctx: { quality: 0.30, speed: 0.10, ttfb: 0.05, context: 0.35, hardwareMatch: 0.20 }
38
+ };
39
+
40
+ // scoring-engine.js weight presets {Q, S, F, C}
41
+ const SCORING_ENGINE_WEIGHTS = {
42
+ general: { Q: 0.40, S: 0.35, F: 0.15, C: 0.10 },
43
+ coding: { Q: 0.55, S: 0.20, F: 0.15, C: 0.10 },
44
+ reasoning: { Q: 0.60, S: 0.15, F: 0.10, C: 0.15 },
45
+ chat: { Q: 0.40, S: 0.40, F: 0.15, C: 0.05 },
46
+ creative: { Q: 0.50, S: 0.25, F: 0.15, C: 0.10 },
47
+ embeddings: { Q: 0.30, S: 0.50, F: 0.15, C: 0.05 },
48
+ vision: { Q: 0.50, S: 0.25, F: 0.15, C: 0.10 },
49
+ fast: { Q: 0.25, S: 0.55, F: 0.15, C: 0.05 },
50
+ quality: { Q: 0.65, S: 0.10, F: 0.15, C: 0.10 }
51
+ };
52
+
53
+ module.exports = {
54
+ DETERMINISTIC_WEIGHTS,
55
+ MULTI_OBJECTIVE_WEIGHTS,
56
+ SCORING_ENGINE_WEIGHTS
57
+ };