llm-checker 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +418 -0
- package/analyzer/compatibility.js +584 -0
- package/analyzer/performance.js +505 -0
- package/bin/CLAUDE.md +12 -0
- package/bin/enhanced_cli.js +3118 -0
- package/bin/test-deterministic.js +41 -0
- package/package.json +96 -0
- package/src/CLAUDE.md +12 -0
- package/src/ai/intelligent-selector.js +615 -0
- package/src/ai/model-selector.js +312 -0
- package/src/ai/multi-objective-selector.js +820 -0
- package/src/commands/check.js +58 -0
- package/src/data/CLAUDE.md +11 -0
- package/src/data/model-database.js +637 -0
- package/src/data/sync-manager.js +279 -0
- package/src/hardware/CLAUDE.md +12 -0
- package/src/hardware/backends/CLAUDE.md +11 -0
- package/src/hardware/backends/apple-silicon.js +318 -0
- package/src/hardware/backends/cpu-detector.js +490 -0
- package/src/hardware/backends/cuda-detector.js +417 -0
- package/src/hardware/backends/intel-detector.js +436 -0
- package/src/hardware/backends/rocm-detector.js +440 -0
- package/src/hardware/detector.js +573 -0
- package/src/hardware/pc-optimizer.js +635 -0
- package/src/hardware/specs.js +286 -0
- package/src/hardware/unified-detector.js +442 -0
- package/src/index.js +2289 -0
- package/src/models/CLAUDE.md +17 -0
- package/src/models/ai-check-selector.js +806 -0
- package/src/models/catalog.json +426 -0
- package/src/models/deterministic-selector.js +1145 -0
- package/src/models/expanded_database.js +1142 -0
- package/src/models/intelligent-selector.js +532 -0
- package/src/models/requirements.js +310 -0
- package/src/models/scoring-config.js +57 -0
- package/src/models/scoring-engine.js +715 -0
- package/src/ollama/.cache/README.md +33 -0
- package/src/ollama/CLAUDE.md +24 -0
- package/src/ollama/client.js +438 -0
- package/src/ollama/enhanced-client.js +113 -0
- package/src/ollama/enhanced-scraper.js +634 -0
- package/src/ollama/manager.js +357 -0
- package/src/ollama/native-scraper.js +776 -0
- package/src/plugins/CLAUDE.md +11 -0
- package/src/plugins/examples/custom_model_plugin.js +87 -0
- package/src/plugins/index.js +295 -0
- package/src/utils/CLAUDE.md +11 -0
- package/src/utils/config.js +359 -0
- package/src/utils/formatter.js +315 -0
- package/src/utils/logger.js +272 -0
- package/src/utils/model-classifier.js +167 -0
- package/src/utils/verbose-progress.js +266 -0
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CUDA Detector
|
|
3
|
+
* Detects NVIDIA GPUs using nvidia-smi
|
|
4
|
+
* Supports multi-GPU setups and detailed CUDA information
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const { execSync, exec } = require('child_process');
|
|
8
|
+
|
|
9
|
+
class CUDADetector {
|
|
10
|
+
constructor() {
|
|
11
|
+
this.cache = null;
|
|
12
|
+
this.isAvailable = null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Check if CUDA is available
|
|
17
|
+
*/
|
|
18
|
+
checkAvailability() {
|
|
19
|
+
if (this.isAvailable !== null) {
|
|
20
|
+
return this.isAvailable;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
execSync('nvidia-smi --version', {
|
|
25
|
+
encoding: 'utf8',
|
|
26
|
+
timeout: 5000,
|
|
27
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
28
|
+
});
|
|
29
|
+
this.isAvailable = true;
|
|
30
|
+
} catch (e) {
|
|
31
|
+
this.isAvailable = false;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return this.isAvailable;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Detect all NVIDIA GPUs and their capabilities
|
|
39
|
+
*/
|
|
40
|
+
detect() {
|
|
41
|
+
if (!this.checkAvailability()) {
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (this.cache) {
|
|
46
|
+
return this.cache;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
const info = this.getGPUInfo();
|
|
51
|
+
this.cache = info;
|
|
52
|
+
return info;
|
|
53
|
+
} catch (error) {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Get detailed GPU information using nvidia-smi
|
|
60
|
+
*/
|
|
61
|
+
getGPUInfo() {
|
|
62
|
+
const result = {
|
|
63
|
+
gpus: [],
|
|
64
|
+
driver: null,
|
|
65
|
+
cuda: null,
|
|
66
|
+
totalVRAM: 0,
|
|
67
|
+
backend: 'cuda',
|
|
68
|
+
isMultiGPU: false,
|
|
69
|
+
speedCoefficient: 0
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
// Get driver and CUDA version
|
|
74
|
+
const versionInfo = execSync('nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits', {
|
|
75
|
+
encoding: 'utf8',
|
|
76
|
+
timeout: 5000
|
|
77
|
+
}).trim().split('\n')[0];
|
|
78
|
+
result.driver = versionInfo;
|
|
79
|
+
|
|
80
|
+
// Get CUDA version from nvidia-smi header
|
|
81
|
+
const header = execSync('nvidia-smi | head -n 3', {
|
|
82
|
+
encoding: 'utf8',
|
|
83
|
+
timeout: 5000
|
|
84
|
+
});
|
|
85
|
+
const cudaMatch = header.match(/CUDA Version:\s*([\d.]+)/);
|
|
86
|
+
if (cudaMatch) {
|
|
87
|
+
result.cuda = cudaMatch[1];
|
|
88
|
+
}
|
|
89
|
+
} catch (e) {
|
|
90
|
+
// Continue without version info
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
// Query all GPUs with detailed info
|
|
95
|
+
const query = [
|
|
96
|
+
'index',
|
|
97
|
+
'name',
|
|
98
|
+
'uuid',
|
|
99
|
+
'memory.total',
|
|
100
|
+
'memory.free',
|
|
101
|
+
'memory.used',
|
|
102
|
+
'compute_mode',
|
|
103
|
+
'pcie.link.gen.current',
|
|
104
|
+
'pcie.link.width.current',
|
|
105
|
+
'power.draw',
|
|
106
|
+
'power.limit',
|
|
107
|
+
'temperature.gpu',
|
|
108
|
+
'utilization.gpu',
|
|
109
|
+
'utilization.memory',
|
|
110
|
+
'clocks.current.sm',
|
|
111
|
+
'clocks.max.sm'
|
|
112
|
+
].join(',');
|
|
113
|
+
|
|
114
|
+
const gpuData = execSync(
|
|
115
|
+
`nvidia-smi --query-gpu=${query} --format=csv,noheader,nounits`,
|
|
116
|
+
{ encoding: 'utf8', timeout: 10000 }
|
|
117
|
+
).trim();
|
|
118
|
+
|
|
119
|
+
const lines = gpuData.split('\n');
|
|
120
|
+
|
|
121
|
+
for (const line of lines) {
|
|
122
|
+
const parts = line.split(', ').map(p => p.trim());
|
|
123
|
+
|
|
124
|
+
if (parts.length < 10) continue;
|
|
125
|
+
|
|
126
|
+
const gpu = {
|
|
127
|
+
index: parseInt(parts[0]) || 0,
|
|
128
|
+
name: parts[1] || 'Unknown NVIDIA GPU',
|
|
129
|
+
uuid: parts[2] || null,
|
|
130
|
+
memory: {
|
|
131
|
+
total: Math.round(parseInt(parts[3]) / 1024) || 0, // Convert MB to GB
|
|
132
|
+
free: Math.round(parseInt(parts[4]) / 1024) || 0,
|
|
133
|
+
used: Math.round(parseInt(parts[5]) / 1024) || 0
|
|
134
|
+
},
|
|
135
|
+
computeMode: parts[6] || 'Default',
|
|
136
|
+
pcie: {
|
|
137
|
+
generation: parseInt(parts[7]) || 0,
|
|
138
|
+
width: parseInt(parts[8]) || 0
|
|
139
|
+
},
|
|
140
|
+
power: {
|
|
141
|
+
draw: parseFloat(parts[9]) || 0,
|
|
142
|
+
limit: parseFloat(parts[10]) || 0
|
|
143
|
+
},
|
|
144
|
+
temperature: parseInt(parts[11]) || 0,
|
|
145
|
+
utilization: {
|
|
146
|
+
gpu: parseInt(parts[12]) || 0,
|
|
147
|
+
memory: parseInt(parts[13]) || 0
|
|
148
|
+
},
|
|
149
|
+
clocks: {
|
|
150
|
+
current: parseInt(parts[14]) || 0,
|
|
151
|
+
max: parseInt(parts[15]) || 0
|
|
152
|
+
},
|
|
153
|
+
capabilities: this.getGPUCapabilities(parts[1]),
|
|
154
|
+
speedCoefficient: this.calculateSpeedCoefficient(parts[1], parseInt(parts[3]))
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
result.gpus.push(gpu);
|
|
158
|
+
result.totalVRAM += gpu.memory.total;
|
|
159
|
+
}
|
|
160
|
+
} catch (e) {
|
|
161
|
+
// Fallback to simpler query
|
|
162
|
+
try {
|
|
163
|
+
const simpleQuery = execSync(
|
|
164
|
+
'nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits',
|
|
165
|
+
{ encoding: 'utf8', timeout: 5000 }
|
|
166
|
+
).trim();
|
|
167
|
+
|
|
168
|
+
const lines = simpleQuery.split('\n');
|
|
169
|
+
for (let i = 0; i < lines.length; i++) {
|
|
170
|
+
const [name, memMB] = lines[i].split(', ').map(p => p.trim());
|
|
171
|
+
const memGB = Math.round(parseInt(memMB) / 1024) || 0;
|
|
172
|
+
|
|
173
|
+
result.gpus.push({
|
|
174
|
+
index: i,
|
|
175
|
+
name: name || 'NVIDIA GPU',
|
|
176
|
+
memory: { total: memGB, free: memGB, used: 0 },
|
|
177
|
+
capabilities: this.getGPUCapabilities(name),
|
|
178
|
+
speedCoefficient: this.calculateSpeedCoefficient(name, parseInt(memMB))
|
|
179
|
+
});
|
|
180
|
+
result.totalVRAM += memGB;
|
|
181
|
+
}
|
|
182
|
+
} catch (e2) {
|
|
183
|
+
return null;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
result.isMultiGPU = result.gpus.length > 1;
|
|
188
|
+
result.speedCoefficient = result.gpus.length > 0
|
|
189
|
+
? Math.max(...result.gpus.map(g => g.speedCoefficient))
|
|
190
|
+
: 0;
|
|
191
|
+
|
|
192
|
+
return result;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Get GPU capabilities based on model name
|
|
197
|
+
*/
|
|
198
|
+
getGPUCapabilities(name) {
|
|
199
|
+
const nameLower = (name || '').toLowerCase();
|
|
200
|
+
|
|
201
|
+
const capabilities = {
|
|
202
|
+
tensorCores: false,
|
|
203
|
+
fp16: true,
|
|
204
|
+
bf16: false,
|
|
205
|
+
int8: true,
|
|
206
|
+
fp8: false,
|
|
207
|
+
nvlink: false,
|
|
208
|
+
computeCapability: '5.0',
|
|
209
|
+
architecture: 'Unknown'
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
// RTX 50 series (Blackwell)
|
|
213
|
+
if (nameLower.includes('rtx 50') || nameLower.includes('rtx50')) {
|
|
214
|
+
capabilities.tensorCores = true;
|
|
215
|
+
capabilities.bf16 = true;
|
|
216
|
+
capabilities.fp8 = true;
|
|
217
|
+
capabilities.computeCapability = '10.0';
|
|
218
|
+
capabilities.architecture = 'Blackwell';
|
|
219
|
+
}
|
|
220
|
+
// RTX 40 series (Ada Lovelace)
|
|
221
|
+
else if (nameLower.includes('rtx 40') || nameLower.includes('rtx40') ||
|
|
222
|
+
nameLower.includes('l40') || nameLower.includes('l4')) {
|
|
223
|
+
capabilities.tensorCores = true;
|
|
224
|
+
capabilities.bf16 = true;
|
|
225
|
+
capabilities.fp8 = true;
|
|
226
|
+
capabilities.computeCapability = '8.9';
|
|
227
|
+
capabilities.architecture = 'Ada Lovelace';
|
|
228
|
+
}
|
|
229
|
+
// RTX 30 series (Ampere)
|
|
230
|
+
else if (nameLower.includes('rtx 30') || nameLower.includes('rtx30') ||
|
|
231
|
+
nameLower.includes('a100') || nameLower.includes('a40') ||
|
|
232
|
+
nameLower.includes('a30') || nameLower.includes('a10')) {
|
|
233
|
+
capabilities.tensorCores = true;
|
|
234
|
+
capabilities.bf16 = true;
|
|
235
|
+
capabilities.computeCapability = '8.0';
|
|
236
|
+
capabilities.architecture = 'Ampere';
|
|
237
|
+
if (nameLower.includes('a100')) {
|
|
238
|
+
capabilities.nvlink = true;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
// RTX 20 series (Turing)
|
|
242
|
+
else if (nameLower.includes('rtx 20') || nameLower.includes('rtx20') ||
|
|
243
|
+
nameLower.includes('t4') || nameLower.includes('quadro rtx')) {
|
|
244
|
+
capabilities.tensorCores = true;
|
|
245
|
+
capabilities.computeCapability = '7.5';
|
|
246
|
+
capabilities.architecture = 'Turing';
|
|
247
|
+
}
|
|
248
|
+
// GTX 16 series (Turing without Tensor Cores)
|
|
249
|
+
else if (nameLower.includes('gtx 16')) {
|
|
250
|
+
capabilities.computeCapability = '7.5';
|
|
251
|
+
capabilities.architecture = 'Turing';
|
|
252
|
+
}
|
|
253
|
+
// Tesla V100 (Volta)
|
|
254
|
+
else if (nameLower.includes('v100') || nameLower.includes('volta')) {
|
|
255
|
+
capabilities.tensorCores = true;
|
|
256
|
+
capabilities.computeCapability = '7.0';
|
|
257
|
+
capabilities.architecture = 'Volta';
|
|
258
|
+
capabilities.nvlink = true;
|
|
259
|
+
}
|
|
260
|
+
// H100 (Hopper)
|
|
261
|
+
else if (nameLower.includes('h100') || nameLower.includes('h200')) {
|
|
262
|
+
capabilities.tensorCores = true;
|
|
263
|
+
capabilities.bf16 = true;
|
|
264
|
+
capabilities.fp8 = true;
|
|
265
|
+
capabilities.nvlink = true;
|
|
266
|
+
capabilities.computeCapability = '9.0';
|
|
267
|
+
capabilities.architecture = 'Hopper';
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return capabilities;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Calculate speed coefficient for LLM inference
|
|
275
|
+
*/
|
|
276
|
+
calculateSpeedCoefficient(name, vramMB) {
|
|
277
|
+
const nameLower = (name || '').toLowerCase();
|
|
278
|
+
const vramGB = Math.round(vramMB / 1024);
|
|
279
|
+
|
|
280
|
+
// Speed coefficients (tokens/sec per B params at Q4)
|
|
281
|
+
const speedMap = {
|
|
282
|
+
// RTX 50 series
|
|
283
|
+
'rtx 5090': 300,
|
|
284
|
+
'rtx 5080': 260,
|
|
285
|
+
'rtx 5070 ti': 230,
|
|
286
|
+
'rtx 5070': 210,
|
|
287
|
+
'rtx 5060': 180,
|
|
288
|
+
|
|
289
|
+
// RTX 40 series
|
|
290
|
+
'rtx 4090': 260,
|
|
291
|
+
'rtx 4080': 220,
|
|
292
|
+
'rtx 4070 ti': 190,
|
|
293
|
+
'rtx 4070': 170,
|
|
294
|
+
'rtx 4060 ti': 150,
|
|
295
|
+
'rtx 4060': 130,
|
|
296
|
+
|
|
297
|
+
// RTX 30 series
|
|
298
|
+
'rtx 3090 ti': 220,
|
|
299
|
+
'rtx 3090': 200,
|
|
300
|
+
'rtx 3080 ti': 190,
|
|
301
|
+
'rtx 3080': 180,
|
|
302
|
+
'rtx 3070 ti': 160,
|
|
303
|
+
'rtx 3070': 150,
|
|
304
|
+
'rtx 3060 ti': 130,
|
|
305
|
+
'rtx 3060': 110,
|
|
306
|
+
|
|
307
|
+
// RTX 20 series
|
|
308
|
+
'rtx 2080 ti': 140,
|
|
309
|
+
'rtx 2080': 120,
|
|
310
|
+
'rtx 2070': 100,
|
|
311
|
+
'rtx 2060': 80,
|
|
312
|
+
|
|
313
|
+
// Data center
|
|
314
|
+
'h100': 400,
|
|
315
|
+
'h200': 450,
|
|
316
|
+
'a100': 300,
|
|
317
|
+
'l40': 220,
|
|
318
|
+
'l4': 150,
|
|
319
|
+
'a40': 180,
|
|
320
|
+
't4': 70,
|
|
321
|
+
'v100': 120
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
for (const [model, speed] of Object.entries(speedMap)) {
|
|
325
|
+
if (nameLower.includes(model)) {
|
|
326
|
+
return speed;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Estimate based on VRAM if model not found
|
|
331
|
+
if (vramGB >= 24) return 200;
|
|
332
|
+
if (vramGB >= 16) return 150;
|
|
333
|
+
if (vramGB >= 12) return 120;
|
|
334
|
+
if (vramGB >= 8) return 90;
|
|
335
|
+
if (vramGB >= 6) return 60;
|
|
336
|
+
return 40;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Get primary GPU (highest VRAM or fastest)
|
|
341
|
+
*/
|
|
342
|
+
getPrimaryGPU() {
|
|
343
|
+
const info = this.detect();
|
|
344
|
+
if (!info || info.gpus.length === 0) return null;
|
|
345
|
+
|
|
346
|
+
return info.gpus.reduce((best, gpu) => {
|
|
347
|
+
if (!best) return gpu;
|
|
348
|
+
// Prefer higher VRAM, then higher speed coefficient
|
|
349
|
+
if (gpu.memory.total > best.memory.total) return gpu;
|
|
350
|
+
if (gpu.memory.total === best.memory.total &&
|
|
351
|
+
gpu.speedCoefficient > best.speedCoefficient) return gpu;
|
|
352
|
+
return best;
|
|
353
|
+
}, null);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Get hardware fingerprint for benchmarks
|
|
358
|
+
*/
|
|
359
|
+
getFingerprint() {
|
|
360
|
+
const info = this.detect();
|
|
361
|
+
if (!info || info.gpus.length === 0) return null;
|
|
362
|
+
|
|
363
|
+
const primary = this.getPrimaryGPU();
|
|
364
|
+
const gpuName = primary.name.toLowerCase()
|
|
365
|
+
.replace(/nvidia|geforce|quadro|tesla/gi, '')
|
|
366
|
+
.replace(/\s+/g, '-')
|
|
367
|
+
.trim();
|
|
368
|
+
|
|
369
|
+
return `cuda-${gpuName}-${info.totalVRAM}gb${info.isMultiGPU ? '-x' + info.gpus.length : ''}`;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Estimate inference speed for a model size
|
|
374
|
+
*/
|
|
375
|
+
estimateTokensPerSecond(paramsB, quantization = 'Q4_K_M', gpuIndex = null) {
|
|
376
|
+
const info = this.detect();
|
|
377
|
+
if (!info || info.gpus.length === 0) return 0;
|
|
378
|
+
|
|
379
|
+
const gpu = gpuIndex !== null && info.gpus[gpuIndex]
|
|
380
|
+
? info.gpus[gpuIndex]
|
|
381
|
+
: this.getPrimaryGPU();
|
|
382
|
+
|
|
383
|
+
// Quantization multipliers (how much faster vs FP16)
|
|
384
|
+
const quantMult = {
|
|
385
|
+
'FP16': 1.0,
|
|
386
|
+
'Q8_0': 1.5,
|
|
387
|
+
'Q6_K': 1.8,
|
|
388
|
+
'Q5_K_M': 2.0,
|
|
389
|
+
'Q5_0': 2.0,
|
|
390
|
+
'Q4_K_M': 2.5,
|
|
391
|
+
'Q4_0': 2.8,
|
|
392
|
+
'Q3_K_M': 3.0,
|
|
393
|
+
'Q2_K': 3.5,
|
|
394
|
+
'IQ4_XS': 2.6,
|
|
395
|
+
'IQ3_XXS': 3.2
|
|
396
|
+
};
|
|
397
|
+
|
|
398
|
+
const mult = quantMult[quantization] || 2.0;
|
|
399
|
+
const baseSpeed = gpu.speedCoefficient / paramsB * mult;
|
|
400
|
+
|
|
401
|
+
return Math.round(baseSpeed);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Check if model will fit in VRAM
|
|
406
|
+
*/
|
|
407
|
+
willFitInVRAM(sizeGB, useMultiGPU = true) {
|
|
408
|
+
const info = this.detect();
|
|
409
|
+
if (!info) return false;
|
|
410
|
+
|
|
411
|
+
const availableVRAM = useMultiGPU ? info.totalVRAM : this.getPrimaryGPU()?.memory?.total || 0;
|
|
412
|
+
// Leave 2GB headroom for system
|
|
413
|
+
return sizeGB <= (availableVRAM - 2);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
module.exports = CUDADetector;
|