llm-checker 3.2.8 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -17
- package/bin/enhanced_cli.js +303 -3
- package/package.json +1 -1
- package/src/calibration/calibration-manager.js +798 -0
- package/src/calibration/policy-routing.js +376 -0
- package/src/calibration/schemas.js +212 -0
- package/src/hardware/backends/cuda-detector.js +355 -5
|
@@ -4,12 +4,15 @@
|
|
|
4
4
|
* Supports multi-GPU setups and detailed CUDA information
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
+
const fs = require('fs');
|
|
8
|
+
const os = require('os');
|
|
7
9
|
const { execSync, exec } = require('child_process');
|
|
8
10
|
|
|
9
11
|
class CUDADetector {
|
|
10
12
|
constructor() {
|
|
11
13
|
this.cache = null;
|
|
12
14
|
this.isAvailable = null;
|
|
15
|
+
this.detectionMode = null;
|
|
13
16
|
}
|
|
14
17
|
|
|
15
18
|
/**
|
|
@@ -20,18 +23,134 @@ class CUDADetector {
|
|
|
20
23
|
return this.isAvailable;
|
|
21
24
|
}
|
|
22
25
|
|
|
26
|
+
if (this.hasNvidiaSMI()) {
|
|
27
|
+
this.isAvailable = true;
|
|
28
|
+
this.detectionMode = 'nvidia-smi';
|
|
29
|
+
return this.isAvailable;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (this.isJetsonPlatform() && this.hasJetsonCudaSupport()) {
|
|
33
|
+
this.isAvailable = true;
|
|
34
|
+
this.detectionMode = 'jetson';
|
|
35
|
+
return this.isAvailable;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
this.isAvailable = false;
|
|
39
|
+
this.detectionMode = null;
|
|
40
|
+
|
|
41
|
+
return this.isAvailable;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
hasNvidiaSMI() {
|
|
23
45
|
try {
|
|
24
46
|
execSync('nvidia-smi --version', {
|
|
25
47
|
encoding: 'utf8',
|
|
26
48
|
timeout: 5000,
|
|
27
49
|
stdio: ['pipe', 'pipe', 'pipe']
|
|
28
50
|
});
|
|
29
|
-
|
|
51
|
+
return true;
|
|
30
52
|
} catch (e) {
|
|
31
|
-
|
|
53
|
+
return false;
|
|
32
54
|
}
|
|
55
|
+
}
|
|
33
56
|
|
|
34
|
-
|
|
57
|
+
readFileIfExists(path) {
|
|
58
|
+
try {
|
|
59
|
+
if (!fs.existsSync(path)) return null;
|
|
60
|
+
return fs.readFileSync(path, 'utf8').replace(/\0/g, '').trim();
|
|
61
|
+
} catch (e) {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
isJetsonPlatform() {
|
|
67
|
+
if (process.platform !== 'linux') return false;
|
|
68
|
+
|
|
69
|
+
// Strong L4T marker present on Jetson devices
|
|
70
|
+
if (this.readFileIfExists('/etc/nv_tegra_release')) {
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const modelPaths = [
|
|
75
|
+
'/proc/device-tree/model',
|
|
76
|
+
'/sys/firmware/devicetree/base/model',
|
|
77
|
+
'/proc/device-tree/compatible',
|
|
78
|
+
'/sys/firmware/devicetree/base/compatible'
|
|
79
|
+
];
|
|
80
|
+
|
|
81
|
+
const jetsonMarkers = [
|
|
82
|
+
'jetson',
|
|
83
|
+
'tegra',
|
|
84
|
+
'orin',
|
|
85
|
+
'xavier',
|
|
86
|
+
'p3701',
|
|
87
|
+
'p3767',
|
|
88
|
+
'p2888',
|
|
89
|
+
'p3668',
|
|
90
|
+
'p3448'
|
|
91
|
+
];
|
|
92
|
+
|
|
93
|
+
for (const modelPath of modelPaths) {
|
|
94
|
+
const model = this.readFileIfExists(modelPath);
|
|
95
|
+
if (!model) continue;
|
|
96
|
+
|
|
97
|
+
const modelLower = model.toLowerCase();
|
|
98
|
+
if (jetsonMarkers.some((marker) => modelLower.includes(marker))) {
|
|
99
|
+
return true;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Jetson kernels often include tegra in release string
|
|
104
|
+
const kernelRelease = (os.release() || '').toLowerCase();
|
|
105
|
+
if (kernelRelease.includes('tegra')) {
|
|
106
|
+
return true;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Last-resort utility-based detection for minimal installs
|
|
110
|
+
if (process.arch === 'arm64' && (
|
|
111
|
+
fs.existsSync('/usr/bin/tegrastats') ||
|
|
112
|
+
fs.existsSync('/usr/sbin/nvpmodel')
|
|
113
|
+
)) {
|
|
114
|
+
return true;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const cpuInfo = this.readFileIfExists('/proc/cpuinfo');
|
|
118
|
+
if (cpuInfo) {
|
|
119
|
+
const cpuLower = cpuInfo.toLowerCase();
|
|
120
|
+
if (cpuLower.includes('nvidia') && cpuLower.includes('tegra')) {
|
|
121
|
+
return true;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
hasJetsonCudaSupport() {
|
|
129
|
+
const runtimeHints = [
|
|
130
|
+
'/usr/local/cuda',
|
|
131
|
+
'/usr/bin/tegrastats',
|
|
132
|
+
'/usr/sbin/nvpmodel',
|
|
133
|
+
'/usr/lib/aarch64-linux-gnu/tegra',
|
|
134
|
+
'/etc/nv_tegra_release',
|
|
135
|
+
'/dev/nvhost-gpu',
|
|
136
|
+
'/dev/nvmap',
|
|
137
|
+
'/proc/driver/nvidia/version'
|
|
138
|
+
];
|
|
139
|
+
|
|
140
|
+
if (runtimeHints.some((hintPath) => fs.existsSync(hintPath))) {
|
|
141
|
+
return true;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
try {
|
|
145
|
+
execSync('nvcc --version', {
|
|
146
|
+
encoding: 'utf8',
|
|
147
|
+
timeout: 5000,
|
|
148
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
149
|
+
});
|
|
150
|
+
return true;
|
|
151
|
+
} catch (e) {
|
|
152
|
+
return false;
|
|
153
|
+
}
|
|
35
154
|
}
|
|
36
155
|
|
|
37
156
|
/**
|
|
@@ -47,7 +166,14 @@ class CUDADetector {
|
|
|
47
166
|
}
|
|
48
167
|
|
|
49
168
|
try {
|
|
50
|
-
const info = this.
|
|
169
|
+
const info = this.detectionMode === 'jetson'
|
|
170
|
+
? this.getJetsonGPUInfo()
|
|
171
|
+
: this.getGPUInfo();
|
|
172
|
+
|
|
173
|
+
if (!info || !Array.isArray(info.gpus) || info.gpus.length === 0) {
|
|
174
|
+
return null;
|
|
175
|
+
}
|
|
176
|
+
|
|
51
177
|
this.cache = info;
|
|
52
178
|
return info;
|
|
53
179
|
} catch (error) {
|
|
@@ -192,6 +318,201 @@ class CUDADetector {
|
|
|
192
318
|
return result;
|
|
193
319
|
}
|
|
194
320
|
|
|
321
|
+
getJetsonGPUInfo() {
|
|
322
|
+
const modelRaw = this.readJetsonModel();
|
|
323
|
+
const model = this.normalizeJetsonModel(modelRaw);
|
|
324
|
+
const cudaVersion = this.detectJetsonCudaVersion();
|
|
325
|
+
const driverVersion = this.detectJetsonDriverVersion();
|
|
326
|
+
const totalSystemGB = Math.max(1, Math.round(os.totalmem() / (1024 ** 3)));
|
|
327
|
+
const sharedGpuMemoryGB = Math.max(1, Math.round(totalSystemGB * 0.85));
|
|
328
|
+
const capabilities = this.getJetsonCapabilities(modelRaw || model);
|
|
329
|
+
const speedCoefficient = this.getJetsonSpeedCoefficient(modelRaw || model);
|
|
330
|
+
|
|
331
|
+
return {
|
|
332
|
+
gpus: [
|
|
333
|
+
{
|
|
334
|
+
index: 0,
|
|
335
|
+
name: model,
|
|
336
|
+
uuid: null,
|
|
337
|
+
memory: {
|
|
338
|
+
total: sharedGpuMemoryGB,
|
|
339
|
+
free: Math.max(0, sharedGpuMemoryGB - 1),
|
|
340
|
+
used: Math.min(1, sharedGpuMemoryGB)
|
|
341
|
+
},
|
|
342
|
+
computeMode: 'Default',
|
|
343
|
+
pcie: {
|
|
344
|
+
generation: 0,
|
|
345
|
+
width: 0
|
|
346
|
+
},
|
|
347
|
+
power: {
|
|
348
|
+
draw: 0,
|
|
349
|
+
limit: 0
|
|
350
|
+
},
|
|
351
|
+
temperature: 0,
|
|
352
|
+
utilization: {
|
|
353
|
+
gpu: 0,
|
|
354
|
+
memory: 0
|
|
355
|
+
},
|
|
356
|
+
clocks: {
|
|
357
|
+
current: 0,
|
|
358
|
+
max: 0
|
|
359
|
+
},
|
|
360
|
+
capabilities,
|
|
361
|
+
speedCoefficient
|
|
362
|
+
}
|
|
363
|
+
],
|
|
364
|
+
driver: driverVersion,
|
|
365
|
+
cuda: cudaVersion,
|
|
366
|
+
totalVRAM: sharedGpuMemoryGB,
|
|
367
|
+
backend: 'cuda',
|
|
368
|
+
isMultiGPU: false,
|
|
369
|
+
speedCoefficient
|
|
370
|
+
};
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
readJetsonModel() {
|
|
374
|
+
const sources = [
|
|
375
|
+
'/proc/device-tree/model',
|
|
376
|
+
'/sys/firmware/devicetree/base/model'
|
|
377
|
+
];
|
|
378
|
+
|
|
379
|
+
for (const source of sources) {
|
|
380
|
+
const model = this.readFileIfExists(source);
|
|
381
|
+
if (model) return model;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
return null;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
normalizeJetsonModel(model) {
|
|
388
|
+
const modelLower = (model || '').toLowerCase();
|
|
389
|
+
|
|
390
|
+
if (modelLower.includes('agx orin')) return 'NVIDIA Jetson AGX Orin';
|
|
391
|
+
if (modelLower.includes('orin nx')) return 'NVIDIA Jetson Orin NX';
|
|
392
|
+
if (modelLower.includes('orin nano')) return 'NVIDIA Jetson Orin Nano';
|
|
393
|
+
if (modelLower.includes('orin')) return 'NVIDIA Jetson Orin';
|
|
394
|
+
if (modelLower.includes('xavier nx')) return 'NVIDIA Jetson Xavier NX';
|
|
395
|
+
if (modelLower.includes('agx xavier')) return 'NVIDIA Jetson AGX Xavier';
|
|
396
|
+
if (modelLower.includes('xavier')) return 'NVIDIA Jetson Xavier';
|
|
397
|
+
if (modelLower.includes('jetson nano')) return 'NVIDIA Jetson Nano';
|
|
398
|
+
if (modelLower.includes('tx2')) return 'NVIDIA Jetson TX2';
|
|
399
|
+
|
|
400
|
+
return 'NVIDIA Jetson (CUDA)';
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
detectJetsonCudaVersion() {
|
|
404
|
+
const versionTxt = this.readFileIfExists('/usr/local/cuda/version.txt');
|
|
405
|
+
if (versionTxt) {
|
|
406
|
+
const match = versionTxt.match(/CUDA Version\s+([\d.]+)/i);
|
|
407
|
+
if (match) return match[1];
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
try {
|
|
411
|
+
const nvccVersion = execSync('nvcc --version', {
|
|
412
|
+
encoding: 'utf8',
|
|
413
|
+
timeout: 5000,
|
|
414
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
415
|
+
});
|
|
416
|
+
const match = nvccVersion.match(/release\s+([\d.]+)/i);
|
|
417
|
+
if (match) return match[1];
|
|
418
|
+
} catch (e) {
|
|
419
|
+
// Ignore missing nvcc
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
return null;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
detectJetsonDriverVersion() {
|
|
426
|
+
const versionInfo = this.readFileIfExists('/proc/driver/nvidia/version');
|
|
427
|
+
if (!versionInfo) return null;
|
|
428
|
+
|
|
429
|
+
const match = versionInfo.match(/Kernel Module\s+([0-9.]+)/i);
|
|
430
|
+
return match ? match[1] : null;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
getJetsonCapabilities(model) {
|
|
434
|
+
const modelLower = (model || '').toLowerCase();
|
|
435
|
+
|
|
436
|
+
if (modelLower.includes('orin')) {
|
|
437
|
+
return {
|
|
438
|
+
tensorCores: true,
|
|
439
|
+
fp16: true,
|
|
440
|
+
bf16: true,
|
|
441
|
+
int8: true,
|
|
442
|
+
fp8: false,
|
|
443
|
+
nvlink: false,
|
|
444
|
+
computeCapability: '8.7',
|
|
445
|
+
architecture: 'Ampere'
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
if (modelLower.includes('xavier')) {
|
|
450
|
+
return {
|
|
451
|
+
tensorCores: true,
|
|
452
|
+
fp16: true,
|
|
453
|
+
bf16: false,
|
|
454
|
+
int8: true,
|
|
455
|
+
fp8: false,
|
|
456
|
+
nvlink: false,
|
|
457
|
+
computeCapability: '7.2',
|
|
458
|
+
architecture: 'Volta'
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
if (modelLower.includes('tx2')) {
|
|
463
|
+
return {
|
|
464
|
+
tensorCores: false,
|
|
465
|
+
fp16: true,
|
|
466
|
+
bf16: false,
|
|
467
|
+
int8: true,
|
|
468
|
+
fp8: false,
|
|
469
|
+
nvlink: false,
|
|
470
|
+
computeCapability: '6.2',
|
|
471
|
+
architecture: 'Pascal'
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (modelLower.includes('nano')) {
|
|
476
|
+
return {
|
|
477
|
+
tensorCores: false,
|
|
478
|
+
fp16: true,
|
|
479
|
+
bf16: false,
|
|
480
|
+
int8: true,
|
|
481
|
+
fp8: false,
|
|
482
|
+
nvlink: false,
|
|
483
|
+
computeCapability: '5.3',
|
|
484
|
+
architecture: 'Maxwell'
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
return {
|
|
489
|
+
tensorCores: false,
|
|
490
|
+
fp16: true,
|
|
491
|
+
bf16: false,
|
|
492
|
+
int8: true,
|
|
493
|
+
fp8: false,
|
|
494
|
+
nvlink: false,
|
|
495
|
+
computeCapability: '6.2',
|
|
496
|
+
architecture: 'Jetson'
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
getJetsonSpeedCoefficient(model) {
|
|
501
|
+
const modelLower = (model || '').toLowerCase();
|
|
502
|
+
|
|
503
|
+
if (modelLower.includes('agx orin')) return 95;
|
|
504
|
+
if (modelLower.includes('orin nx')) return 75;
|
|
505
|
+
if (modelLower.includes('orin nano')) return 65;
|
|
506
|
+
if (modelLower.includes('orin')) return 70;
|
|
507
|
+
if (modelLower.includes('agx xavier')) return 55;
|
|
508
|
+
if (modelLower.includes('xavier nx')) return 45;
|
|
509
|
+
if (modelLower.includes('xavier')) return 50;
|
|
510
|
+
if (modelLower.includes('tx2')) return 30;
|
|
511
|
+
if (modelLower.includes('nano')) return 24;
|
|
512
|
+
|
|
513
|
+
return 35;
|
|
514
|
+
}
|
|
515
|
+
|
|
195
516
|
/**
|
|
196
517
|
* Get GPU capabilities based on model name
|
|
197
518
|
*/
|
|
@@ -283,6 +604,24 @@ class CUDADetector {
|
|
|
283
604
|
capabilities.architecture = 'Volta';
|
|
284
605
|
capabilities.nvlink = true;
|
|
285
606
|
}
|
|
607
|
+
// Jetson Orin (Ampere)
|
|
608
|
+
else if (nameLower.includes('jetson') && nameLower.includes('orin')) {
|
|
609
|
+
capabilities.tensorCores = true;
|
|
610
|
+
capabilities.bf16 = true;
|
|
611
|
+
capabilities.computeCapability = '8.7';
|
|
612
|
+
capabilities.architecture = 'Ampere';
|
|
613
|
+
}
|
|
614
|
+
// Jetson Xavier (Volta)
|
|
615
|
+
else if (nameLower.includes('jetson') && nameLower.includes('xavier')) {
|
|
616
|
+
capabilities.tensorCores = true;
|
|
617
|
+
capabilities.computeCapability = '7.2';
|
|
618
|
+
capabilities.architecture = 'Volta';
|
|
619
|
+
}
|
|
620
|
+
// Jetson Nano / TX2
|
|
621
|
+
else if (nameLower.includes('jetson') && (nameLower.includes('nano') || nameLower.includes('tx2'))) {
|
|
622
|
+
capabilities.computeCapability = nameLower.includes('tx2') ? '6.2' : '5.3';
|
|
623
|
+
capabilities.architecture = nameLower.includes('tx2') ? 'Pascal' : 'Maxwell';
|
|
624
|
+
}
|
|
286
625
|
|
|
287
626
|
return capabilities;
|
|
288
627
|
}
|
|
@@ -339,7 +678,18 @@ class CUDADetector {
|
|
|
339
678
|
'a40': 180,
|
|
340
679
|
't4': 70,
|
|
341
680
|
'v100': 120,
|
|
342
|
-
'p100': 45
|
|
681
|
+
'p100': 45,
|
|
682
|
+
|
|
683
|
+
// Jetson family
|
|
684
|
+
'jetson agx orin': 95,
|
|
685
|
+
'jetson orin nx': 75,
|
|
686
|
+
'jetson orin nano': 65,
|
|
687
|
+
'jetson orin': 70,
|
|
688
|
+
'jetson agx xavier': 55,
|
|
689
|
+
'jetson xavier nx': 45,
|
|
690
|
+
'jetson xavier': 50,
|
|
691
|
+
'jetson tx2': 30,
|
|
692
|
+
'jetson nano': 24
|
|
343
693
|
};
|
|
344
694
|
|
|
345
695
|
for (const [model, speed] of Object.entries(speedMap)) {
|