llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,440 @@
1
+ /**
2
+ * ROCm Detector
3
+ * Detects AMD GPUs using rocm-smi
4
+ * Supports multi-GPU setups and ROCm capabilities
5
+ */
6
+
7
+ const { execSync } = require('child_process');
8
+
9
+ class ROCmDetector {
10
+ constructor() {
11
+ this.cache = null;
12
+ this.isAvailable = null;
13
+ }
14
+
15
+ /**
16
+ * Check if ROCm is available
17
+ */
18
+ checkAvailability() {
19
+ if (this.isAvailable !== null) {
20
+ return this.isAvailable;
21
+ }
22
+
23
+ try {
24
+ execSync('rocm-smi --version', {
25
+ encoding: 'utf8',
26
+ timeout: 5000,
27
+ stdio: ['pipe', 'pipe', 'pipe']
28
+ });
29
+ this.isAvailable = true;
30
+ } catch (e) {
31
+ // Try alternative rocminfo command
32
+ try {
33
+ execSync('rocminfo', {
34
+ encoding: 'utf8',
35
+ timeout: 5000,
36
+ stdio: ['pipe', 'pipe', 'pipe']
37
+ });
38
+ this.isAvailable = true;
39
+ } catch (e2) {
40
+ this.isAvailable = false;
41
+ }
42
+ }
43
+
44
+ return this.isAvailable;
45
+ }
46
+
47
+ /**
48
+ * Detect all AMD GPUs and their capabilities
49
+ */
50
+ detect() {
51
+ if (!this.checkAvailability()) {
52
+ return null;
53
+ }
54
+
55
+ if (this.cache) {
56
+ return this.cache;
57
+ }
58
+
59
+ try {
60
+ const info = this.getGPUInfo();
61
+ this.cache = info;
62
+ return info;
63
+ } catch (error) {
64
+ return null;
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Get detailed GPU information using rocm-smi
70
+ */
71
+ getGPUInfo() {
72
+ const result = {
73
+ gpus: [],
74
+ rocmVersion: null,
75
+ totalVRAM: 0,
76
+ backend: 'rocm',
77
+ isMultiGPU: false,
78
+ speedCoefficient: 0
79
+ };
80
+
81
+ // Get ROCm version
82
+ try {
83
+ const versionOutput = execSync('rocm-smi --version', {
84
+ encoding: 'utf8',
85
+ timeout: 5000
86
+ });
87
+ const match = versionOutput.match(/(\d+\.\d+\.?\d*)/);
88
+ if (match) {
89
+ result.rocmVersion = match[1];
90
+ }
91
+ } catch (e) {
92
+ // Continue without version
93
+ }
94
+
95
+ try {
96
+ // Get GPU list using rocm-smi
97
+ const gpuList = execSync('rocm-smi --showproductname', {
98
+ encoding: 'utf8',
99
+ timeout: 10000
100
+ });
101
+
102
+ // Parse GPU names
103
+ const gpuNames = [];
104
+ const nameMatches = gpuList.matchAll(/GPU\[(\d+)\].*?:\s*(.+)/g);
105
+ for (const match of nameMatches) {
106
+ gpuNames[parseInt(match[1])] = match[2].trim();
107
+ }
108
+
109
+ // Get VRAM info
110
+ const memInfo = execSync('rocm-smi --showmeminfo vram', {
111
+ encoding: 'utf8',
112
+ timeout: 10000
113
+ });
114
+
115
+ // Parse memory info
116
+ const memMatches = memInfo.matchAll(/GPU\[(\d+)\].*?Total.*?:\s*(\d+)/g);
117
+ const gpuMemory = {};
118
+ for (const match of memMatches) {
119
+ const idx = parseInt(match[1]);
120
+ const memMB = parseInt(match[2]);
121
+ gpuMemory[idx] = Math.round(memMB / 1024); // Convert to GB
122
+ }
123
+
124
+ // Get temperature and utilization
125
+ let temps = {};
126
+ let utils = {};
127
+ try {
128
+ const tempInfo = execSync('rocm-smi --showtemp', {
129
+ encoding: 'utf8',
130
+ timeout: 5000
131
+ });
132
+ const tempMatches = tempInfo.matchAll(/GPU\[(\d+)\].*?Temperature.*?:\s*(\d+\.?\d*)/g);
133
+ for (const match of tempMatches) {
134
+ temps[parseInt(match[1])] = parseFloat(match[2]);
135
+ }
136
+
137
+ const utilInfo = execSync('rocm-smi --showuse', {
138
+ encoding: 'utf8',
139
+ timeout: 5000
140
+ });
141
+ const utilMatches = utilInfo.matchAll(/GPU\[(\d+)\].*?GPU use.*?:\s*(\d+)/g);
142
+ for (const match of utilMatches) {
143
+ utils[parseInt(match[1])] = parseInt(match[2]);
144
+ }
145
+ } catch (e) {
146
+ // Continue without temp/util
147
+ }
148
+
149
+ // Build GPU list
150
+ const numGPUs = Math.max(gpuNames.length, Object.keys(gpuMemory).length);
151
+ for (let i = 0; i < numGPUs; i++) {
152
+ const name = gpuNames[i] || `AMD GPU ${i}`;
153
+ const vram = gpuMemory[i] || this.estimateVRAMFromModel(name);
154
+
155
+ const gpu = {
156
+ index: i,
157
+ name: name,
158
+ memory: {
159
+ total: vram,
160
+ free: vram, // ROCm doesn't always report free memory
161
+ used: 0
162
+ },
163
+ temperature: temps[i] || 0,
164
+ utilization: utils[i] || 0,
165
+ capabilities: this.getGPUCapabilities(name),
166
+ speedCoefficient: this.calculateSpeedCoefficient(name, vram)
167
+ };
168
+
169
+ result.gpus.push(gpu);
170
+ result.totalVRAM += vram;
171
+ }
172
+ } catch (e) {
173
+ // Fallback to rocminfo
174
+ try {
175
+ const rocmInfo = execSync('rocminfo', {
176
+ encoding: 'utf8',
177
+ timeout: 10000
178
+ });
179
+
180
+ // Parse AMD GPUs from rocminfo
181
+ const agentMatches = rocmInfo.matchAll(/Name:\s*(gfx\d+|AMD.*)/gi);
182
+ let idx = 0;
183
+ for (const match of agentMatches) {
184
+ const name = match[1].trim();
185
+ if (name.toLowerCase().includes('gfx') || name.toLowerCase().includes('amd')) {
186
+ const vram = this.estimateVRAMFromGfxName(name);
187
+
188
+ result.gpus.push({
189
+ index: idx,
190
+ name: name,
191
+ memory: { total: vram, free: vram, used: 0 },
192
+ capabilities: this.getGPUCapabilities(name),
193
+ speedCoefficient: this.calculateSpeedCoefficient(name, vram)
194
+ });
195
+ result.totalVRAM += vram;
196
+ idx++;
197
+ }
198
+ }
199
+ } catch (e2) {
200
+ return null;
201
+ }
202
+ }
203
+
204
+ result.isMultiGPU = result.gpus.length > 1;
205
+ result.speedCoefficient = result.gpus.length > 0
206
+ ? Math.max(...result.gpus.map(g => g.speedCoefficient))
207
+ : 0;
208
+
209
+ return result;
210
+ }
211
+
212
+ /**
213
+ * Get GPU capabilities based on model name
214
+ */
215
+ getGPUCapabilities(name) {
216
+ const nameLower = (name || '').toLowerCase();
217
+
218
+ const capabilities = {
219
+ fp16: true,
220
+ bf16: false,
221
+ int8: true,
222
+ matrixCores: false,
223
+ infinityCache: false,
224
+ architecture: 'Unknown',
225
+ gfxVersion: null
226
+ };
227
+
228
+ // RDNA 3 (RX 7000 series)
229
+ if (nameLower.includes('7900') || nameLower.includes('7800') ||
230
+ nameLower.includes('7700') || nameLower.includes('7600') ||
231
+ nameLower.includes('gfx1100') || nameLower.includes('gfx1101') ||
232
+ nameLower.includes('gfx1102')) {
233
+ capabilities.bf16 = true;
234
+ capabilities.matrixCores = true; // AI Accelerators
235
+ capabilities.infinityCache = true;
236
+ capabilities.architecture = 'RDNA 3';
237
+ capabilities.gfxVersion = 'gfx1100';
238
+ }
239
+ // RDNA 2 (RX 6000 series)
240
+ else if (nameLower.includes('6900') || nameLower.includes('6800') ||
241
+ nameLower.includes('6700') || nameLower.includes('6600') ||
242
+ nameLower.includes('gfx1030') || nameLower.includes('gfx1031') ||
243
+ nameLower.includes('gfx1032')) {
244
+ capabilities.infinityCache = true;
245
+ capabilities.architecture = 'RDNA 2';
246
+ capabilities.gfxVersion = 'gfx1030';
247
+ }
248
+ // CDNA 2/3 (Instinct MI200/MI300 series)
249
+ else if (nameLower.includes('mi300') || nameLower.includes('mi250') ||
250
+ nameLower.includes('mi210') || nameLower.includes('gfx940') ||
251
+ nameLower.includes('gfx90a')) {
252
+ capabilities.bf16 = true;
253
+ capabilities.matrixCores = true;
254
+ capabilities.architecture = 'CDNA';
255
+ capabilities.gfxVersion = nameLower.includes('mi300') ? 'gfx940' : 'gfx90a';
256
+ }
257
+ // CDNA (Instinct MI100)
258
+ else if (nameLower.includes('mi100') || nameLower.includes('gfx908')) {
259
+ capabilities.bf16 = true;
260
+ capabilities.matrixCores = true;
261
+ capabilities.architecture = 'CDNA';
262
+ capabilities.gfxVersion = 'gfx908';
263
+ }
264
+ // RDNA 1 (RX 5000 series)
265
+ else if (nameLower.includes('5700') || nameLower.includes('5600') ||
266
+ nameLower.includes('5500') || nameLower.includes('gfx1010')) {
267
+ capabilities.architecture = 'RDNA 1';
268
+ capabilities.gfxVersion = 'gfx1010';
269
+ }
270
+
271
+ return capabilities;
272
+ }
273
+
274
+ /**
275
+ * Estimate VRAM from model name
276
+ */
277
+ estimateVRAMFromModel(name) {
278
+ const nameLower = (name || '').toLowerCase();
279
+
280
+ // RX 7000 series
281
+ if (nameLower.includes('7900 xtx')) return 24;
282
+ if (nameLower.includes('7900 xt')) return 20;
283
+ if (nameLower.includes('7900 gre')) return 16;
284
+ if (nameLower.includes('7800 xt')) return 16;
285
+ if (nameLower.includes('7700 xt')) return 12;
286
+ if (nameLower.includes('7600')) return 8;
287
+
288
+ // RX 6000 series
289
+ if (nameLower.includes('6950 xt')) return 16;
290
+ if (nameLower.includes('6900 xt')) return 16;
291
+ if (nameLower.includes('6800 xt')) return 16;
292
+ if (nameLower.includes('6800')) return 16;
293
+ if (nameLower.includes('6750 xt')) return 12;
294
+ if (nameLower.includes('6700 xt')) return 12;
295
+ if (nameLower.includes('6700')) return 10;
296
+ if (nameLower.includes('6650 xt')) return 8;
297
+ if (nameLower.includes('6600')) return 8;
298
+
299
+ // Instinct series
300
+ if (nameLower.includes('mi300x')) return 192;
301
+ if (nameLower.includes('mi300')) return 128;
302
+ if (nameLower.includes('mi250x')) return 128;
303
+ if (nameLower.includes('mi250')) return 64;
304
+ if (nameLower.includes('mi210')) return 64;
305
+ if (nameLower.includes('mi100')) return 32;
306
+
307
+ return 8; // Default
308
+ }
309
+
310
+ /**
311
+ * Estimate VRAM from gfx name
312
+ */
313
+ estimateVRAMFromGfxName(name) {
314
+ const nameLower = (name || '').toLowerCase();
315
+
316
+ if (nameLower.includes('gfx1100')) return 24; // RX 7900 XTX
317
+ if (nameLower.includes('gfx1101')) return 16; // RX 7800
318
+ if (nameLower.includes('gfx1102')) return 8; // RX 7600
319
+ if (nameLower.includes('gfx1030')) return 16; // RX 6900/6800
320
+ if (nameLower.includes('gfx1031')) return 12; // RX 6700
321
+ if (nameLower.includes('gfx1032')) return 8; // RX 6600
322
+ if (nameLower.includes('gfx940')) return 128; // MI300
323
+ if (nameLower.includes('gfx90a')) return 64; // MI250
324
+
325
+ return 8;
326
+ }
327
+
328
+ /**
329
+ * Calculate speed coefficient for LLM inference
330
+ */
331
+ calculateSpeedCoefficient(name, vramGB) {
332
+ const nameLower = (name || '').toLowerCase();
333
+
334
+ // Speed coefficients (tokens/sec per B params at Q4)
335
+ const speedMap = {
336
+ // RX 7000 series (RDNA 3)
337
+ '7900 xtx': 200,
338
+ '7900 xt': 180,
339
+ '7900 gre': 160,
340
+ '7800 xt': 150,
341
+ '7700 xt': 120,
342
+ '7600': 90,
343
+
344
+ // RX 6000 series (RDNA 2)
345
+ '6950 xt': 150,
346
+ '6900 xt': 140,
347
+ '6800 xt': 130,
348
+ '6800': 120,
349
+ '6750 xt': 100,
350
+ '6700 xt': 90,
351
+ '6700': 80,
352
+ '6600 xt': 70,
353
+ '6600': 60,
354
+
355
+ // Instinct series
356
+ 'mi300x': 400,
357
+ 'mi300': 350,
358
+ 'mi250x': 280,
359
+ 'mi250': 250,
360
+ 'mi210': 200,
361
+ 'mi100': 150
362
+ };
363
+
364
+ for (const [model, speed] of Object.entries(speedMap)) {
365
+ if (nameLower.includes(model)) {
366
+ return speed;
367
+ }
368
+ }
369
+
370
+ // Estimate based on VRAM if model not found
371
+ if (vramGB >= 24) return 180;
372
+ if (vramGB >= 16) return 140;
373
+ if (vramGB >= 12) return 100;
374
+ if (vramGB >= 8) return 70;
375
+ return 40;
376
+ }
377
+
378
+ /**
379
+ * Get primary GPU
380
+ */
381
+ getPrimaryGPU() {
382
+ const info = this.detect();
383
+ if (!info || info.gpus.length === 0) return null;
384
+
385
+ return info.gpus.reduce((best, gpu) => {
386
+ if (!best) return gpu;
387
+ if (gpu.memory.total > best.memory.total) return gpu;
388
+ if (gpu.memory.total === best.memory.total &&
389
+ gpu.speedCoefficient > best.speedCoefficient) return gpu;
390
+ return best;
391
+ }, null);
392
+ }
393
+
394
+ /**
395
+ * Get hardware fingerprint for benchmarks
396
+ */
397
+ getFingerprint() {
398
+ const info = this.detect();
399
+ if (!info || info.gpus.length === 0) return null;
400
+
401
+ const primary = this.getPrimaryGPU();
402
+ const gpuName = primary.name.toLowerCase()
403
+ .replace(/amd|radeon|rx/gi, '')
404
+ .replace(/\s+/g, '-')
405
+ .trim();
406
+
407
+ return `rocm-${gpuName}-${info.totalVRAM}gb${info.isMultiGPU ? '-x' + info.gpus.length : ''}`;
408
+ }
409
+
410
+ /**
411
+ * Estimate inference speed for a model size
412
+ */
413
+ estimateTokensPerSecond(paramsB, quantization = 'Q4_K_M') {
414
+ const info = this.detect();
415
+ if (!info || info.gpus.length === 0) return 0;
416
+
417
+ const gpu = this.getPrimaryGPU();
418
+
419
+ const quantMult = {
420
+ 'FP16': 1.0,
421
+ 'Q8_0': 1.4,
422
+ 'Q6_K': 1.6,
423
+ 'Q5_K_M': 1.8,
424
+ 'Q5_0': 1.8,
425
+ 'Q4_K_M': 2.2,
426
+ 'Q4_0': 2.4,
427
+ 'Q3_K_M': 2.6,
428
+ 'Q2_K': 3.0,
429
+ 'IQ4_XS': 2.3,
430
+ 'IQ3_XXS': 2.8
431
+ };
432
+
433
+ const mult = quantMult[quantization] || 1.8;
434
+ const baseSpeed = gpu.speedCoefficient / paramsB * mult;
435
+
436
+ return Math.round(baseSpeed);
437
+ }
438
+ }
439
+
440
+ module.exports = ROCmDetector;