llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,417 @@
1
+ /**
2
+ * CUDA Detector
3
+ * Detects NVIDIA GPUs using nvidia-smi
4
+ * Supports multi-GPU setups and detailed CUDA information
5
+ */
6
+
7
+ const { execSync, exec } = require('child_process');
8
+
9
+ class CUDADetector {
10
+ constructor() {
11
+ this.cache = null;
12
+ this.isAvailable = null;
13
+ }
14
+
15
+ /**
16
+ * Check if CUDA is available
17
+ */
18
+ checkAvailability() {
19
+ if (this.isAvailable !== null) {
20
+ return this.isAvailable;
21
+ }
22
+
23
+ try {
24
+ execSync('nvidia-smi --version', {
25
+ encoding: 'utf8',
26
+ timeout: 5000,
27
+ stdio: ['pipe', 'pipe', 'pipe']
28
+ });
29
+ this.isAvailable = true;
30
+ } catch (e) {
31
+ this.isAvailable = false;
32
+ }
33
+
34
+ return this.isAvailable;
35
+ }
36
+
37
+ /**
38
+ * Detect all NVIDIA GPUs and their capabilities
39
+ */
40
+ detect() {
41
+ if (!this.checkAvailability()) {
42
+ return null;
43
+ }
44
+
45
+ if (this.cache) {
46
+ return this.cache;
47
+ }
48
+
49
+ try {
50
+ const info = this.getGPUInfo();
51
+ this.cache = info;
52
+ return info;
53
+ } catch (error) {
54
+ return null;
55
+ }
56
+ }
57
+
58
+ /**
59
+ * Get detailed GPU information using nvidia-smi
60
+ */
61
+ getGPUInfo() {
62
+ const result = {
63
+ gpus: [],
64
+ driver: null,
65
+ cuda: null,
66
+ totalVRAM: 0,
67
+ backend: 'cuda',
68
+ isMultiGPU: false,
69
+ speedCoefficient: 0
70
+ };
71
+
72
+ try {
73
+ // Get driver and CUDA version
74
+ const versionInfo = execSync('nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits', {
75
+ encoding: 'utf8',
76
+ timeout: 5000
77
+ }).trim().split('\n')[0];
78
+ result.driver = versionInfo;
79
+
80
+ // Get CUDA version from nvidia-smi header
81
+ const header = execSync('nvidia-smi | head -n 3', {
82
+ encoding: 'utf8',
83
+ timeout: 5000
84
+ });
85
+ const cudaMatch = header.match(/CUDA Version:\s*([\d.]+)/);
86
+ if (cudaMatch) {
87
+ result.cuda = cudaMatch[1];
88
+ }
89
+ } catch (e) {
90
+ // Continue without version info
91
+ }
92
+
93
+ try {
94
+ // Query all GPUs with detailed info
95
+ const query = [
96
+ 'index',
97
+ 'name',
98
+ 'uuid',
99
+ 'memory.total',
100
+ 'memory.free',
101
+ 'memory.used',
102
+ 'compute_mode',
103
+ 'pcie.link.gen.current',
104
+ 'pcie.link.width.current',
105
+ 'power.draw',
106
+ 'power.limit',
107
+ 'temperature.gpu',
108
+ 'utilization.gpu',
109
+ 'utilization.memory',
110
+ 'clocks.current.sm',
111
+ 'clocks.max.sm'
112
+ ].join(',');
113
+
114
+ const gpuData = execSync(
115
+ `nvidia-smi --query-gpu=${query} --format=csv,noheader,nounits`,
116
+ { encoding: 'utf8', timeout: 10000 }
117
+ ).trim();
118
+
119
+ const lines = gpuData.split('\n');
120
+
121
+ for (const line of lines) {
122
+ const parts = line.split(', ').map(p => p.trim());
123
+
124
+ if (parts.length < 10) continue;
125
+
126
+ const gpu = {
127
+ index: parseInt(parts[0]) || 0,
128
+ name: parts[1] || 'Unknown NVIDIA GPU',
129
+ uuid: parts[2] || null,
130
+ memory: {
131
+ total: Math.round(parseInt(parts[3]) / 1024) || 0, // Convert MB to GB
132
+ free: Math.round(parseInt(parts[4]) / 1024) || 0,
133
+ used: Math.round(parseInt(parts[5]) / 1024) || 0
134
+ },
135
+ computeMode: parts[6] || 'Default',
136
+ pcie: {
137
+ generation: parseInt(parts[7]) || 0,
138
+ width: parseInt(parts[8]) || 0
139
+ },
140
+ power: {
141
+ draw: parseFloat(parts[9]) || 0,
142
+ limit: parseFloat(parts[10]) || 0
143
+ },
144
+ temperature: parseInt(parts[11]) || 0,
145
+ utilization: {
146
+ gpu: parseInt(parts[12]) || 0,
147
+ memory: parseInt(parts[13]) || 0
148
+ },
149
+ clocks: {
150
+ current: parseInt(parts[14]) || 0,
151
+ max: parseInt(parts[15]) || 0
152
+ },
153
+ capabilities: this.getGPUCapabilities(parts[1]),
154
+ speedCoefficient: this.calculateSpeedCoefficient(parts[1], parseInt(parts[3]))
155
+ };
156
+
157
+ result.gpus.push(gpu);
158
+ result.totalVRAM += gpu.memory.total;
159
+ }
160
+ } catch (e) {
161
+ // Fallback to simpler query
162
+ try {
163
+ const simpleQuery = execSync(
164
+ 'nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits',
165
+ { encoding: 'utf8', timeout: 5000 }
166
+ ).trim();
167
+
168
+ const lines = simpleQuery.split('\n');
169
+ for (let i = 0; i < lines.length; i++) {
170
+ const [name, memMB] = lines[i].split(', ').map(p => p.trim());
171
+ const memGB = Math.round(parseInt(memMB) / 1024) || 0;
172
+
173
+ result.gpus.push({
174
+ index: i,
175
+ name: name || 'NVIDIA GPU',
176
+ memory: { total: memGB, free: memGB, used: 0 },
177
+ capabilities: this.getGPUCapabilities(name),
178
+ speedCoefficient: this.calculateSpeedCoefficient(name, parseInt(memMB))
179
+ });
180
+ result.totalVRAM += memGB;
181
+ }
182
+ } catch (e2) {
183
+ return null;
184
+ }
185
+ }
186
+
187
+ result.isMultiGPU = result.gpus.length > 1;
188
+ result.speedCoefficient = result.gpus.length > 0
189
+ ? Math.max(...result.gpus.map(g => g.speedCoefficient))
190
+ : 0;
191
+
192
+ return result;
193
+ }
194
+
195
+ /**
196
+ * Get GPU capabilities based on model name
197
+ */
198
+ getGPUCapabilities(name) {
199
+ const nameLower = (name || '').toLowerCase();
200
+
201
+ const capabilities = {
202
+ tensorCores: false,
203
+ fp16: true,
204
+ bf16: false,
205
+ int8: true,
206
+ fp8: false,
207
+ nvlink: false,
208
+ computeCapability: '5.0',
209
+ architecture: 'Unknown'
210
+ };
211
+
212
+ // RTX 50 series (Blackwell)
213
+ if (nameLower.includes('rtx 50') || nameLower.includes('rtx50')) {
214
+ capabilities.tensorCores = true;
215
+ capabilities.bf16 = true;
216
+ capabilities.fp8 = true;
217
+ capabilities.computeCapability = '10.0';
218
+ capabilities.architecture = 'Blackwell';
219
+ }
220
+ // RTX 40 series (Ada Lovelace)
221
+ else if (nameLower.includes('rtx 40') || nameLower.includes('rtx40') ||
222
+ nameLower.includes('l40') || nameLower.includes('l4')) {
223
+ capabilities.tensorCores = true;
224
+ capabilities.bf16 = true;
225
+ capabilities.fp8 = true;
226
+ capabilities.computeCapability = '8.9';
227
+ capabilities.architecture = 'Ada Lovelace';
228
+ }
229
+ // RTX 30 series (Ampere)
230
+ else if (nameLower.includes('rtx 30') || nameLower.includes('rtx30') ||
231
+ nameLower.includes('a100') || nameLower.includes('a40') ||
232
+ nameLower.includes('a30') || nameLower.includes('a10')) {
233
+ capabilities.tensorCores = true;
234
+ capabilities.bf16 = true;
235
+ capabilities.computeCapability = '8.0';
236
+ capabilities.architecture = 'Ampere';
237
+ if (nameLower.includes('a100')) {
238
+ capabilities.nvlink = true;
239
+ }
240
+ }
241
+ // RTX 20 series (Turing)
242
+ else if (nameLower.includes('rtx 20') || nameLower.includes('rtx20') ||
243
+ nameLower.includes('t4') || nameLower.includes('quadro rtx')) {
244
+ capabilities.tensorCores = true;
245
+ capabilities.computeCapability = '7.5';
246
+ capabilities.architecture = 'Turing';
247
+ }
248
+ // GTX 16 series (Turing without Tensor Cores)
249
+ else if (nameLower.includes('gtx 16')) {
250
+ capabilities.computeCapability = '7.5';
251
+ capabilities.architecture = 'Turing';
252
+ }
253
+ // Tesla V100 (Volta)
254
+ else if (nameLower.includes('v100') || nameLower.includes('volta')) {
255
+ capabilities.tensorCores = true;
256
+ capabilities.computeCapability = '7.0';
257
+ capabilities.architecture = 'Volta';
258
+ capabilities.nvlink = true;
259
+ }
260
+ // H100 (Hopper)
261
+ else if (nameLower.includes('h100') || nameLower.includes('h200')) {
262
+ capabilities.tensorCores = true;
263
+ capabilities.bf16 = true;
264
+ capabilities.fp8 = true;
265
+ capabilities.nvlink = true;
266
+ capabilities.computeCapability = '9.0';
267
+ capabilities.architecture = 'Hopper';
268
+ }
269
+
270
+ return capabilities;
271
+ }
272
+
273
+ /**
274
+ * Calculate speed coefficient for LLM inference
275
+ */
276
+ calculateSpeedCoefficient(name, vramMB) {
277
+ const nameLower = (name || '').toLowerCase();
278
+ const vramGB = Math.round(vramMB / 1024);
279
+
280
+ // Speed coefficients (tokens/sec per B params at Q4)
281
+ const speedMap = {
282
+ // RTX 50 series
283
+ 'rtx 5090': 300,
284
+ 'rtx 5080': 260,
285
+ 'rtx 5070 ti': 230,
286
+ 'rtx 5070': 210,
287
+ 'rtx 5060': 180,
288
+
289
+ // RTX 40 series
290
+ 'rtx 4090': 260,
291
+ 'rtx 4080': 220,
292
+ 'rtx 4070 ti': 190,
293
+ 'rtx 4070': 170,
294
+ 'rtx 4060 ti': 150,
295
+ 'rtx 4060': 130,
296
+
297
+ // RTX 30 series
298
+ 'rtx 3090 ti': 220,
299
+ 'rtx 3090': 200,
300
+ 'rtx 3080 ti': 190,
301
+ 'rtx 3080': 180,
302
+ 'rtx 3070 ti': 160,
303
+ 'rtx 3070': 150,
304
+ 'rtx 3060 ti': 130,
305
+ 'rtx 3060': 110,
306
+
307
+ // RTX 20 series
308
+ 'rtx 2080 ti': 140,
309
+ 'rtx 2080': 120,
310
+ 'rtx 2070': 100,
311
+ 'rtx 2060': 80,
312
+
313
+ // Data center
314
+ 'h100': 400,
315
+ 'h200': 450,
316
+ 'a100': 300,
317
+ 'l40': 220,
318
+ 'l4': 150,
319
+ 'a40': 180,
320
+ 't4': 70,
321
+ 'v100': 120
322
+ };
323
+
324
+ for (const [model, speed] of Object.entries(speedMap)) {
325
+ if (nameLower.includes(model)) {
326
+ return speed;
327
+ }
328
+ }
329
+
330
+ // Estimate based on VRAM if model not found
331
+ if (vramGB >= 24) return 200;
332
+ if (vramGB >= 16) return 150;
333
+ if (vramGB >= 12) return 120;
334
+ if (vramGB >= 8) return 90;
335
+ if (vramGB >= 6) return 60;
336
+ return 40;
337
+ }
338
+
339
+ /**
340
+ * Get primary GPU (highest VRAM or fastest)
341
+ */
342
+ getPrimaryGPU() {
343
+ const info = this.detect();
344
+ if (!info || info.gpus.length === 0) return null;
345
+
346
+ return info.gpus.reduce((best, gpu) => {
347
+ if (!best) return gpu;
348
+ // Prefer higher VRAM, then higher speed coefficient
349
+ if (gpu.memory.total > best.memory.total) return gpu;
350
+ if (gpu.memory.total === best.memory.total &&
351
+ gpu.speedCoefficient > best.speedCoefficient) return gpu;
352
+ return best;
353
+ }, null);
354
+ }
355
+
356
+ /**
357
+ * Get hardware fingerprint for benchmarks
358
+ */
359
+ getFingerprint() {
360
+ const info = this.detect();
361
+ if (!info || info.gpus.length === 0) return null;
362
+
363
+ const primary = this.getPrimaryGPU();
364
+ const gpuName = primary.name.toLowerCase()
365
+ .replace(/nvidia|geforce|quadro|tesla/gi, '')
366
+ .replace(/\s+/g, '-')
367
+ .trim();
368
+
369
+ return `cuda-${gpuName}-${info.totalVRAM}gb${info.isMultiGPU ? '-x' + info.gpus.length : ''}`;
370
+ }
371
+
372
+ /**
373
+ * Estimate inference speed for a model size
374
+ */
375
+ estimateTokensPerSecond(paramsB, quantization = 'Q4_K_M', gpuIndex = null) {
376
+ const info = this.detect();
377
+ if (!info || info.gpus.length === 0) return 0;
378
+
379
+ const gpu = gpuIndex !== null && info.gpus[gpuIndex]
380
+ ? info.gpus[gpuIndex]
381
+ : this.getPrimaryGPU();
382
+
383
+ // Quantization multipliers (how much faster vs FP16)
384
+ const quantMult = {
385
+ 'FP16': 1.0,
386
+ 'Q8_0': 1.5,
387
+ 'Q6_K': 1.8,
388
+ 'Q5_K_M': 2.0,
389
+ 'Q5_0': 2.0,
390
+ 'Q4_K_M': 2.5,
391
+ 'Q4_0': 2.8,
392
+ 'Q3_K_M': 3.0,
393
+ 'Q2_K': 3.5,
394
+ 'IQ4_XS': 2.6,
395
+ 'IQ3_XXS': 3.2
396
+ };
397
+
398
+ const mult = quantMult[quantization] || 2.0;
399
+ const baseSpeed = gpu.speedCoefficient / paramsB * mult;
400
+
401
+ return Math.round(baseSpeed);
402
+ }
403
+
404
+ /**
405
+ * Check if model will fit in VRAM
406
+ */
407
+ willFitInVRAM(sizeGB, useMultiGPU = true) {
408
+ const info = this.detect();
409
+ if (!info) return false;
410
+
411
+ const availableVRAM = useMultiGPU ? info.totalVRAM : this.getPrimaryGPU()?.memory?.total || 0;
412
+ // Leave 2GB headroom for system
413
+ return sizeGB <= (availableVRAM - 2);
414
+ }
415
+ }
416
+
417
+ module.exports = CUDADetector;