llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,442 @@
1
+ /**
2
+ * Unified Hardware Detector
3
+ * Coordinates all hardware detection backends and provides a unified interface
4
+ * Automatically selects the best backend for LLM inference
5
+ */
6
+
7
+ const AppleSiliconDetector = require('./backends/apple-silicon');
8
+ const CUDADetector = require('./backends/cuda-detector');
9
+ const ROCmDetector = require('./backends/rocm-detector');
10
+ const IntelDetector = require('./backends/intel-detector');
11
+ const CPUDetector = require('./backends/cpu-detector');
12
+
13
+ class UnifiedDetector {
14
+ constructor() {
15
+ this.backends = {
16
+ metal: new AppleSiliconDetector(),
17
+ cuda: new CUDADetector(),
18
+ rocm: new ROCmDetector(),
19
+ intel: new IntelDetector(),
20
+ cpu: new CPUDetector()
21
+ };
22
+
23
+ this.cache = null;
24
+ this.cacheTime = 0;
25
+ this.cacheExpiry = 5 * 60 * 1000; // 5 minutes
26
+ }
27
+
28
+ /**
29
+ * Detect all available hardware and select the best backend
30
+ */
31
+ async detect() {
32
+ if (this.cache && (Date.now() - this.cacheTime < this.cacheExpiry)) {
33
+ return this.cache;
34
+ }
35
+
36
+ const result = {
37
+ backends: {},
38
+ primary: null,
39
+ cpu: null,
40
+ summary: {
41
+ bestBackend: 'cpu',
42
+ totalVRAM: 0,
43
+ effectiveMemory: 0,
44
+ speedCoefficient: 0,
45
+ isMultiGPU: false,
46
+ gpuCount: 0
47
+ },
48
+ fingerprint: null,
49
+ timestamp: Date.now()
50
+ };
51
+
52
+ // Detect CPU first (always available)
53
+ try {
54
+ result.cpu = this.backends.cpu.detect();
55
+ result.backends.cpu = {
56
+ available: true,
57
+ info: result.cpu
58
+ };
59
+ } catch (e) {
60
+ result.backends.cpu = { available: false, error: e.message };
61
+ }
62
+
63
+ // Detect Apple Silicon (macOS ARM only)
64
+ if (process.platform === 'darwin' && process.arch === 'arm64') {
65
+ try {
66
+ const metalInfo = this.backends.metal.detect();
67
+ if (metalInfo) {
68
+ result.backends.metal = {
69
+ available: true,
70
+ info: metalInfo
71
+ };
72
+ }
73
+ } catch (e) {
74
+ result.backends.metal = { available: false, error: e.message };
75
+ }
76
+ }
77
+
78
+ // Detect NVIDIA CUDA
79
+ try {
80
+ if (this.backends.cuda.checkAvailability()) {
81
+ const cudaInfo = this.backends.cuda.detect();
82
+ if (cudaInfo && cudaInfo.gpus.length > 0) {
83
+ result.backends.cuda = {
84
+ available: true,
85
+ info: cudaInfo
86
+ };
87
+ }
88
+ }
89
+ } catch (e) {
90
+ result.backends.cuda = { available: false, error: e.message };
91
+ }
92
+
93
+ // Detect AMD ROCm
94
+ try {
95
+ if (this.backends.rocm.checkAvailability()) {
96
+ const rocmInfo = this.backends.rocm.detect();
97
+ if (rocmInfo && rocmInfo.gpus.length > 0) {
98
+ result.backends.rocm = {
99
+ available: true,
100
+ info: rocmInfo
101
+ };
102
+ }
103
+ }
104
+ } catch (e) {
105
+ result.backends.rocm = { available: false, error: e.message };
106
+ }
107
+
108
+ // Detect Intel (Linux only for now)
109
+ if (process.platform === 'linux') {
110
+ try {
111
+ if (this.backends.intel.checkAvailability()) {
112
+ const intelInfo = this.backends.intel.detect();
113
+ if (intelInfo && intelInfo.gpus.length > 0) {
114
+ result.backends.intel = {
115
+ available: true,
116
+ info: intelInfo
117
+ };
118
+ }
119
+ }
120
+ } catch (e) {
121
+ result.backends.intel = { available: false, error: e.message };
122
+ }
123
+ }
124
+
125
+ // Select the best available backend
126
+ result.primary = this.selectPrimaryBackend(result.backends);
127
+
128
+ // Build summary
129
+ result.summary = this.buildSummary(result);
130
+
131
+ // Generate fingerprint
132
+ result.fingerprint = this.generateFingerprint(result);
133
+
134
+ this.cache = result;
135
+ this.cacheTime = Date.now();
136
+
137
+ return result;
138
+ }
139
+
140
+ /**
141
+ * Select the best backend for LLM inference
142
+ * Priority: CUDA > ROCm > Metal > Intel > CPU
143
+ */
144
+ selectPrimaryBackend(backends) {
145
+ // CUDA is generally the fastest
146
+ if (backends.cuda?.available) {
147
+ return {
148
+ type: 'cuda',
149
+ name: 'NVIDIA CUDA',
150
+ info: backends.cuda.info
151
+ };
152
+ }
153
+
154
+ // ROCm for AMD GPUs
155
+ if (backends.rocm?.available) {
156
+ return {
157
+ type: 'rocm',
158
+ name: 'AMD ROCm',
159
+ info: backends.rocm.info
160
+ };
161
+ }
162
+
163
+ // Metal for Apple Silicon
164
+ if (backends.metal?.available) {
165
+ return {
166
+ type: 'metal',
167
+ name: 'Apple Metal',
168
+ info: backends.metal.info
169
+ };
170
+ }
171
+
172
+ // Intel Arc/Iris
173
+ if (backends.intel?.available && backends.intel.info.hasDedicated) {
174
+ return {
175
+ type: 'intel',
176
+ name: 'Intel oneAPI',
177
+ info: backends.intel.info
178
+ };
179
+ }
180
+
181
+ // Fallback to CPU
182
+ return {
183
+ type: 'cpu',
184
+ name: 'CPU',
185
+ info: backends.cpu?.info || null
186
+ };
187
+ }
188
+
189
+ /**
190
+ * Build hardware summary
191
+ */
192
+ buildSummary(result) {
193
+ const summary = {
194
+ bestBackend: result.primary?.type || 'cpu',
195
+ backendName: result.primary?.name || 'CPU',
196
+ totalVRAM: 0,
197
+ effectiveMemory: 0,
198
+ speedCoefficient: 0,
199
+ isMultiGPU: false,
200
+ gpuCount: 0,
201
+ gpuModel: null,
202
+ cpuModel: result.cpu?.brand || 'Unknown',
203
+ systemRAM: require('os').totalmem() / (1024 ** 3)
204
+ };
205
+
206
+ const primary = result.primary;
207
+
208
+ if (primary?.type === 'cuda' && primary.info) {
209
+ summary.totalVRAM = primary.info.totalVRAM;
210
+ summary.gpuCount = primary.info.gpus.length;
211
+ summary.isMultiGPU = primary.info.isMultiGPU;
212
+ summary.speedCoefficient = primary.info.speedCoefficient;
213
+ summary.gpuModel = primary.info.gpus[0]?.name || 'NVIDIA GPU';
214
+ }
215
+ else if (primary?.type === 'rocm' && primary.info) {
216
+ summary.totalVRAM = primary.info.totalVRAM;
217
+ summary.gpuCount = primary.info.gpus.length;
218
+ summary.isMultiGPU = primary.info.isMultiGPU;
219
+ summary.speedCoefficient = primary.info.speedCoefficient;
220
+ summary.gpuModel = primary.info.gpus[0]?.name || 'AMD GPU';
221
+ }
222
+ else if (primary?.type === 'metal' && primary.info) {
223
+ // Apple Silicon uses unified memory
224
+ summary.totalVRAM = primary.info.memory.unified;
225
+ summary.gpuCount = 1;
226
+ summary.speedCoefficient = primary.info.speedCoefficient;
227
+ summary.gpuModel = primary.info.chip || 'Apple Silicon';
228
+ }
229
+ else if (primary?.type === 'intel' && primary.info) {
230
+ summary.totalVRAM = primary.info.totalVRAM;
231
+ summary.gpuCount = primary.info.gpus.filter(g => g.type === 'dedicated').length;
232
+ summary.speedCoefficient = primary.info.speedCoefficient;
233
+ summary.gpuModel = primary.info.gpus[0]?.name || 'Intel GPU';
234
+ }
235
+ else if (result.cpu) {
236
+ summary.speedCoefficient = result.cpu.speedCoefficient;
237
+ }
238
+
239
+ // Effective memory for LLM loading
240
+ // For GPU: use VRAM; for CPU/Metal: use system RAM
241
+ if (summary.totalVRAM > 0 && primary?.type !== 'metal') {
242
+ summary.effectiveMemory = summary.totalVRAM;
243
+ } else {
244
+ // Use 70% of system RAM for models (leave room for OS)
245
+ summary.effectiveMemory = Math.round(summary.systemRAM * 0.7);
246
+ }
247
+
248
+ return summary;
249
+ }
250
+
251
+ /**
252
+ * Generate hardware fingerprint for benchmarks
253
+ */
254
+ generateFingerprint(result) {
255
+ const primary = result.primary;
256
+
257
+ if (primary?.type === 'cuda') {
258
+ return this.backends.cuda.getFingerprint();
259
+ } else if (primary?.type === 'rocm') {
260
+ return this.backends.rocm.getFingerprint();
261
+ } else if (primary?.type === 'metal') {
262
+ return this.backends.metal.getFingerprint();
263
+ } else if (primary?.type === 'intel') {
264
+ return this.backends.intel.getFingerprint();
265
+ } else {
266
+ return this.backends.cpu.getFingerprint();
267
+ }
268
+ }
269
+
270
+ /**
271
+ * Estimate tokens per second for a model
272
+ */
273
+ estimateTokensPerSecond(paramsB, quantization = 'Q4_K_M') {
274
+ const result = this.cache || { primary: { type: 'cpu' } };
275
+ const primary = result.primary;
276
+
277
+ if (primary?.type === 'cuda') {
278
+ return this.backends.cuda.estimateTokensPerSecond(paramsB, quantization);
279
+ } else if (primary?.type === 'rocm') {
280
+ return this.backends.rocm.estimateTokensPerSecond(paramsB, quantization);
281
+ } else if (primary?.type === 'metal') {
282
+ return this.backends.metal.estimateTokensPerSecond(paramsB, quantization);
283
+ } else if (primary?.type === 'intel') {
284
+ return this.backends.intel.estimateTokensPerSecond(paramsB, quantization);
285
+ } else {
286
+ return this.backends.cpu.estimateTokensPerSecond(paramsB, quantization);
287
+ }
288
+ }
289
+
290
+ /**
291
+ * Check if a model will fit in memory
292
+ */
293
+ willModelFit(sizeGB, useMultiGPU = true) {
294
+ const result = this.cache;
295
+ if (!result) return false;
296
+
297
+ const summary = result.summary;
298
+
299
+ // Leave headroom (2GB for GPU, 20% for RAM)
300
+ if (summary.bestBackend === 'cpu' || summary.bestBackend === 'metal') {
301
+ return sizeGB <= (summary.effectiveMemory - 2);
302
+ } else {
303
+ const availableVRAM = useMultiGPU ? summary.totalVRAM : (summary.totalVRAM / summary.gpuCount);
304
+ return sizeGB <= (availableVRAM - 2);
305
+ }
306
+ }
307
+
308
+ /**
309
+ * Get the maximum model size that can be loaded
310
+ */
311
+ getMaxModelSize(headroomGB = 2) {
312
+ const result = this.cache;
313
+ if (!result) return 0;
314
+
315
+ return Math.max(0, result.summary.effectiveMemory - headroomGB);
316
+ }
317
+
318
+ /**
319
+ * Get hardware tier classification
320
+ */
321
+ getHardwareTier() {
322
+ const result = this.cache;
323
+ if (!result) return 'unknown';
324
+
325
+ const summary = result.summary;
326
+ const effectiveMem = summary.effectiveMemory;
327
+ const speed = summary.speedCoefficient;
328
+
329
+ // Tier based on effective memory and speed
330
+ if (effectiveMem >= 80 && speed >= 300) return 'ultra_high'; // H100, MI300
331
+ if (effectiveMem >= 48 && speed >= 200) return 'very_high'; // 2x3090, 4090
332
+ if (effectiveMem >= 24 && speed >= 150) return 'high'; // 3090, 4090, M2 Max
333
+ if (effectiveMem >= 16 && speed >= 100) return 'medium_high'; // 4080, 3080, M3 Pro
334
+ if (effectiveMem >= 12 && speed >= 80) return 'medium'; // 3060, 4060 Ti
335
+ if (effectiveMem >= 8 && speed >= 50) return 'medium_low'; // 3060, M2
336
+ if (effectiveMem >= 6 && speed >= 30) return 'low'; // GTX 1660, iGPU
337
+ return 'ultra_low'; // CPU only
338
+ }
339
+
340
+ /**
341
+ * Get recommended quantization levels
342
+ */
343
+ getRecommendedQuantizations(paramsB) {
344
+ const result = this.cache;
345
+ if (!result) return ['Q4_K_M'];
346
+
347
+ const maxSize = this.getMaxModelSize();
348
+ const recommendations = [];
349
+
350
+ // Estimate size for each quantization
351
+ const quantSizes = {
352
+ 'FP16': paramsB * 2,
353
+ 'Q8_0': paramsB * 1.1,
354
+ 'Q6_K': paramsB * 0.85,
355
+ 'Q5_K_M': paramsB * 0.75,
356
+ 'Q4_K_M': paramsB * 0.65,
357
+ 'Q4_0': paramsB * 0.55,
358
+ 'Q3_K_M': paramsB * 0.45,
359
+ 'IQ4_XS': paramsB * 0.5,
360
+ 'IQ3_XXS': paramsB * 0.35,
361
+ 'Q2_K': paramsB * 0.35
362
+ };
363
+
364
+ // Quality order (best first)
365
+ const qualityOrder = [
366
+ 'FP16', 'Q8_0', 'Q6_K', 'Q5_K_M', 'Q4_K_M',
367
+ 'IQ4_XS', 'Q4_0', 'Q3_K_M', 'IQ3_XXS', 'Q2_K'
368
+ ];
369
+
370
+ for (const quant of qualityOrder) {
371
+ if (quantSizes[quant] <= maxSize) {
372
+ recommendations.push(quant);
373
+ }
374
+ }
375
+
376
+ // Always suggest at least Q4_K_M if nothing fits
377
+ if (recommendations.length === 0) {
378
+ recommendations.push('Q4_K_M');
379
+ }
380
+
381
+ return recommendations.slice(0, 3); // Return top 3
382
+ }
383
+
384
+ /**
385
+ * Get a simple text description of the hardware
386
+ */
387
+ getHardwareDescription() {
388
+ const result = this.cache;
389
+ if (!result) return 'Unknown hardware';
390
+
391
+ const summary = result.summary;
392
+
393
+ if (summary.bestBackend === 'cuda') {
394
+ const gpuDesc = summary.isMultiGPU
395
+ ? `${summary.gpuCount}x ${summary.gpuModel}`
396
+ : summary.gpuModel;
397
+ return `${gpuDesc} (${summary.totalVRAM}GB VRAM) + ${summary.cpuModel}`;
398
+ }
399
+ else if (summary.bestBackend === 'rocm') {
400
+ const gpuDesc = summary.isMultiGPU
401
+ ? `${summary.gpuCount}x ${summary.gpuModel}`
402
+ : summary.gpuModel;
403
+ return `${gpuDesc} (${summary.totalVRAM}GB VRAM) + ${summary.cpuModel}`;
404
+ }
405
+ else if (summary.bestBackend === 'metal') {
406
+ return `${summary.gpuModel} (${summary.totalVRAM}GB Unified Memory)`;
407
+ }
408
+ else if (summary.bestBackend === 'intel') {
409
+ return `${summary.gpuModel} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
410
+ }
411
+ else {
412
+ return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
413
+ }
414
+ }
415
+
416
+ /**
417
+ * Get the active backend instance
418
+ */
419
+ getActiveBackend() {
420
+ const result = this.cache;
421
+ if (!result || !result.primary) return this.backends.cpu;
422
+
423
+ return this.backends[result.primary.type] || this.backends.cpu;
424
+ }
425
+
426
+ /**
427
+ * Clear cache to force re-detection
428
+ */
429
+ clearCache() {
430
+ this.cache = null;
431
+ this.cacheTime = 0;
432
+
433
+ // Clear individual backend caches
434
+ for (const backend of Object.values(this.backends)) {
435
+ if (backend.cache !== undefined) {
436
+ backend.cache = null;
437
+ }
438
+ }
439
+ }
440
+ }
441
+
442
+ module.exports = UnifiedDetector;