llm-checker 3.2.4 → 3.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -6
- package/bin/enhanced_cli.js +13 -2
- package/package.json +4 -4
- package/src/hardware/backends/rocm-detector.js +20 -1
- package/src/hardware/detector.js +75 -10
- package/src/hardware/unified-detector.js +49 -10
- package/src/index.js +19 -4
- package/src/models/deterministic-selector.js +720 -46
- package/src/models/intelligent-selector.js +2 -0
- package/src/models/moe-assumptions.js +311 -0
- package/src/models/scoring-engine.js +38 -13
package/README.md
CHANGED
|
@@ -93,14 +93,19 @@ npm install sql.js
|
|
|
93
93
|
|
|
94
94
|
LLM Checker is published in all primary channels:
|
|
95
95
|
|
|
96
|
-
- npm (latest): [`llm-checker@
|
|
97
|
-
- GitHub
|
|
96
|
+
- npm (latest): [`llm-checker@latest`](https://www.npmjs.com/package/llm-checker)
|
|
97
|
+
- GitHub Releases: [Release history](https://github.com/Pavelevich/llm-checker/releases)
|
|
98
98
|
- GitHub Packages: [`@pavelevich/llm-checker`](https://github.com/users/Pavelevich/packages/npm/package/llm-checker)
|
|
99
99
|
|
|
100
|
-
### v3.2.
|
|
100
|
+
### v3.2.6 Highlights
|
|
101
101
|
|
|
102
|
-
-
|
|
103
|
-
-
|
|
102
|
+
- Recommendation engine now enforces feasible 30B-class coverage on high-capacity discrete multi-GPU setups (for non-speed objectives).
|
|
103
|
+
- Heterogeneous GPU inventories are preserved in output summaries and downstream recommendation inputs.
|
|
104
|
+
- Added and validated fallback mappings/paths for:
|
|
105
|
+
- AMD Radeon AI PRO R9700 (PCI ID `7551`)
|
|
106
|
+
- NVIDIA GTX 1070 Ti (device `1b82`)
|
|
107
|
+
- Linux RX 7900 XTX detection via non-ROCm fallbacks (`lspci`/`sysfs`)
|
|
108
|
+
- Expanded deterministic and hardware regression coverage for multi-GPU and unified-memory edge cases.
|
|
104
109
|
|
|
105
110
|
### Optional: Install from GitHub Packages
|
|
106
111
|
|
|
@@ -110,7 +115,7 @@ echo "@pavelevich:registry=https://npm.pkg.github.com" >> ~/.npmrc
|
|
|
110
115
|
echo "//npm.pkg.github.com/:_authToken=${GITHUB_TOKEN}" >> ~/.npmrc
|
|
111
116
|
|
|
112
117
|
# 2) Install
|
|
113
|
-
npm install -g @pavelevich/llm-checker@
|
|
118
|
+
npm install -g @pavelevich/llm-checker@latest
|
|
114
119
|
```
|
|
115
120
|
|
|
116
121
|
---
|
|
@@ -356,6 +361,16 @@ Metal:
|
|
|
356
361
|
llm-checker recommend
|
|
357
362
|
```
|
|
358
363
|
|
|
364
|
+
Use optimization profiles to steer ranking by intent:
|
|
365
|
+
|
|
366
|
+
```bash
|
|
367
|
+
llm-checker recommend --optimize balanced
|
|
368
|
+
llm-checker recommend --optimize speed
|
|
369
|
+
llm-checker recommend --optimize quality
|
|
370
|
+
llm-checker recommend --optimize context
|
|
371
|
+
llm-checker recommend --optimize coding
|
|
372
|
+
```
|
|
373
|
+
|
|
359
374
|
```
|
|
360
375
|
INTELLIGENT RECOMMENDATIONS BY CATEGORY
|
|
361
376
|
Hardware Tier: HIGH | Models Analyzed: 205
|
|
@@ -465,6 +480,48 @@ Memory requirements are calculated using calibrated bytes-per-parameter values:
|
|
|
465
480
|
|
|
466
481
|
The selector automatically picks the best quantization that fits your available memory.
|
|
467
482
|
|
|
483
|
+
For MoE models, deterministic memory estimation supports explicit sparse metadata when present:
|
|
484
|
+
|
|
485
|
+
- `total_params_b`
|
|
486
|
+
- `active_params_b`
|
|
487
|
+
- `expert_count`
|
|
488
|
+
- `experts_active_per_token`
|
|
489
|
+
|
|
490
|
+
Normalized recommendation variants expose both snake_case and camelCase metadata aliases
|
|
491
|
+
(for example: `total_params_b` + `totalParamsB`) when available.
|
|
492
|
+
|
|
493
|
+
MoE parameter path selection is deterministic and uses this fallback order:
|
|
494
|
+
|
|
495
|
+
1. `active_params_b` (assumption source: `moe_active_metadata`)
|
|
496
|
+
2. `total_params_b * (experts_active_per_token / expert_count)` (assumption source: `moe_derived_expert_ratio`)
|
|
497
|
+
3. `total_params_b` (assumption source: `moe_fallback_total_params`)
|
|
498
|
+
4. Model `paramsB` fallback (assumption source: `moe_fallback_model_params`)
|
|
499
|
+
|
|
500
|
+
Dense models continue to use the dense parameter path (`dense_params`) unchanged.
|
|
501
|
+
|
|
502
|
+
When `active_params_b` (or a derived active-ratio path) is available, inference memory
|
|
503
|
+
uses the sparse-active parameter estimate even if artifact size metadata is present.
|
|
504
|
+
|
|
505
|
+
### Runtime-Aware MoE Speed Estimation
|
|
506
|
+
|
|
507
|
+
MoE speed estimates now include runtime-specific overhead assumptions (routing, communication, offload), instead of using a single fixed MoE boost.
|
|
508
|
+
|
|
509
|
+
- Canonical helper: `src/models/moe-assumptions.js`
|
|
510
|
+
- Applied in both:
|
|
511
|
+
- `src/models/deterministic-selector.js`
|
|
512
|
+
- `src/models/scoring-engine.js`
|
|
513
|
+
|
|
514
|
+
Current runtime profiles:
|
|
515
|
+
|
|
516
|
+
| Runtime | Routing | Communication | Offload | Max Effective Gain |
|
|
517
|
+
|:--------|:-------:|:-------------:|:-------:|:------------------:|
|
|
518
|
+
| `ollama` | 18% | 13% | 8% | 2.35x |
|
|
519
|
+
| `vllm` | 12% | 8% | 4% | 2.65x |
|
|
520
|
+
| `mlx` | 16% | 10% | 5% | 2.45x |
|
|
521
|
+
| `llama.cpp` | 20% | 14% | 9% | 2.30x |
|
|
522
|
+
|
|
523
|
+
Recommendation outputs now expose these assumptions through runtime metadata and MoE speed diagnostics.
|
|
524
|
+
|
|
468
525
|
---
|
|
469
526
|
|
|
470
527
|
## Supported Hardware
|
package/bin/enhanced_cli.js
CHANGED
|
@@ -1027,11 +1027,13 @@ function displayIntelligentRecommendations(intelligentData) {
|
|
|
1027
1027
|
|
|
1028
1028
|
const { summary, recommendations } = intelligentData;
|
|
1029
1029
|
const tier = summary.hardware_tier.replace('_', ' ').toUpperCase();
|
|
1030
|
+
const optimizeProfile = (summary.optimize_for || intelligentData.optimizeFor || 'balanced').toUpperCase();
|
|
1030
1031
|
const tierColor = tier.includes('HIGH') ? chalk.green : tier.includes('MEDIUM') ? chalk.yellow : chalk.red;
|
|
1031
1032
|
|
|
1032
1033
|
console.log('\n' + chalk.bgRed.white.bold(' INTELLIGENT RECOMMENDATIONS BY CATEGORY '));
|
|
1033
1034
|
console.log(chalk.red('╭' + '─'.repeat(65)));
|
|
1034
1035
|
console.log(chalk.red('│') + ` Hardware Tier: ${tierColor.bold(tier)} | Models Analyzed: ${chalk.cyan.bold(intelligentData.totalModelsAnalyzed)}`);
|
|
1036
|
+
console.log(chalk.red('│') + ` Optimization: ${chalk.magenta.bold(optimizeProfile)}`);
|
|
1035
1037
|
console.log(chalk.red('│'));
|
|
1036
1038
|
|
|
1037
1039
|
// Mostrar mejor modelo general
|
|
@@ -1040,6 +1042,7 @@ function displayIntelligentRecommendations(intelligentData) {
|
|
|
1040
1042
|
console.log(chalk.red('│') + ` ${chalk.bold.yellow('BEST OVERALL:')} ${chalk.green.bold(best.name)}`);
|
|
1041
1043
|
console.log(chalk.red('│') + ` Command: ${chalk.cyan.bold(best.command)}`);
|
|
1042
1044
|
console.log(chalk.red('│') + ` Score: ${chalk.yellow.bold(best.score)}/100 | Category: ${chalk.magenta(best.category)}`);
|
|
1045
|
+
console.log(chalk.red('│') + ` Quantization: ${chalk.white.bold(best.quantization || 'Q4_K_M')}`);
|
|
1043
1046
|
console.log(chalk.red('│'));
|
|
1044
1047
|
}
|
|
1045
1048
|
|
|
@@ -1062,6 +1065,7 @@ function displayIntelligentRecommendations(intelligentData) {
|
|
|
1062
1065
|
console.log(chalk.red('│') + ` ${chalk.bold.white(categoryName)} (${icon}):`);
|
|
1063
1066
|
console.log(chalk.red('│') + ` ${chalk.green(model.name)} (${model.size})`);
|
|
1064
1067
|
console.log(chalk.red('│') + ` Score: ${scoreColor.bold(model.score)}/100 | Pulls: ${chalk.gray(model.pulls?.toLocaleString() || 'N/A')}`);
|
|
1068
|
+
console.log(chalk.red('│') + ` Quantization: ${chalk.white.bold(model.quantization || 'Q4_K_M')}`);
|
|
1065
1069
|
console.log(chalk.red('│') + ` Command: ${chalk.cyan.bold(model.command)}`);
|
|
1066
1070
|
console.log(chalk.red('│'));
|
|
1067
1071
|
});
|
|
@@ -2303,6 +2307,7 @@ auditCommand
|
|
|
2303
2307
|
.option('--out-dir <path>', 'Output directory when --out is omitted', 'audit-reports')
|
|
2304
2308
|
.option('-u, --use-case <case>', 'Use case when --command check is selected', 'general')
|
|
2305
2309
|
.option('-c, --category <category>', 'Category hint when --command recommend is selected')
|
|
2310
|
+
.option('--optimize <profile>', 'Optimization profile for recommend mode (balanced|speed|quality|context|coding)', 'balanced')
|
|
2306
2311
|
.option('--runtime <runtime>', `Runtime for check mode (${SUPPORTED_RUNTIMES.join('|')})`, 'ollama')
|
|
2307
2312
|
.option('--include-cloud', 'Include cloud models in check-mode analysis')
|
|
2308
2313
|
.option('--max-size <size>', 'Maximum model size for check mode (e.g., "24B" or "12GB")')
|
|
@@ -2356,7 +2361,9 @@ auditCommand
|
|
|
2356
2361
|
runtimeBackend = selectedRuntime;
|
|
2357
2362
|
policyCandidates = collectCandidatesFromAnalysis(analysisResult);
|
|
2358
2363
|
} else {
|
|
2359
|
-
recommendationResult = await checker.generateIntelligentRecommendations(hardware
|
|
2364
|
+
recommendationResult = await checker.generateIntelligentRecommendations(hardware, {
|
|
2365
|
+
optimizeFor: options.optimize
|
|
2366
|
+
});
|
|
2360
2367
|
if (!recommendationResult) {
|
|
2361
2368
|
throw new Error('Unable to generate recommendation data for policy audit export.');
|
|
2362
2369
|
}
|
|
@@ -2390,6 +2397,7 @@ auditCommand
|
|
|
2390
2397
|
runtime: runtimeBackend,
|
|
2391
2398
|
use_case: selectedCommand === 'check' ? normalizeUseCaseInput(options.useCase) : null,
|
|
2392
2399
|
category: selectedCommand === 'recommend' ? options.category || null : null,
|
|
2400
|
+
optimize: selectedCommand === 'recommend' ? options.optimize || 'balanced' : null,
|
|
2393
2401
|
include_cloud: Boolean(options.includeCloud)
|
|
2394
2402
|
},
|
|
2395
2403
|
hardware
|
|
@@ -2798,6 +2806,7 @@ program
|
|
|
2798
2806
|
.command('recommend')
|
|
2799
2807
|
.description('Get intelligent model recommendations for your hardware')
|
|
2800
2808
|
.option('-c, --category <category>', 'Get recommendations for specific category (coding, talking, reading, etc.)')
|
|
2809
|
+
.option('--optimize <profile>', 'Optimization profile (balanced|speed|quality|context|coding)', 'balanced')
|
|
2801
2810
|
.option('--no-verbose', 'Disable step-by-step progress display')
|
|
2802
2811
|
.option('--policy <file>', 'Evaluate recommendations against a policy file')
|
|
2803
2812
|
.addHelpText(
|
|
@@ -2821,7 +2830,9 @@ Enterprise policy examples:
|
|
|
2821
2830
|
}
|
|
2822
2831
|
|
|
2823
2832
|
const hardware = await checker.getSystemInfo();
|
|
2824
|
-
const intelligentRecommendations = await checker.generateIntelligentRecommendations(hardware
|
|
2833
|
+
const intelligentRecommendations = await checker.generateIntelligentRecommendations(hardware, {
|
|
2834
|
+
optimizeFor: options.optimize
|
|
2835
|
+
});
|
|
2825
2836
|
|
|
2826
2837
|
if (!intelligentRecommendations) {
|
|
2827
2838
|
console.error(chalk.red('\nFailed to generate recommendations'));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llm-checker",
|
|
3
|
-
"version": "3.2.
|
|
3
|
+
"version": "3.2.6",
|
|
4
4
|
"description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
|
|
5
5
|
"bin": {
|
|
6
6
|
"llm-checker": "bin/cli.js",
|
|
@@ -10,9 +10,9 @@
|
|
|
10
10
|
"main": "src/index.js",
|
|
11
11
|
"scripts": {
|
|
12
12
|
"test": "node tests/run-all-tests.js",
|
|
13
|
-
"test:gpu": "node tests/gpu-detection
|
|
14
|
-
"test:platform": "node tests/
|
|
15
|
-
"test:ui": "node tests/ui-
|
|
13
|
+
"test:gpu": "node tests/amd-gpu-detection.test.js",
|
|
14
|
+
"test:platform": "node tests/hardware-simulation-tests.js",
|
|
15
|
+
"test:ui": "node tests/ui-cli-smoke.test.js",
|
|
16
16
|
"test:runtime": "node tests/runtime-specdec-tests.js",
|
|
17
17
|
"test:deterministic-pool": "node tests/deterministic-model-pool-check.js",
|
|
18
18
|
"test:policy": "node tests/policy-commands.test.js",
|
|
@@ -18,6 +18,8 @@ class ROCmDetector {
|
|
|
18
18
|
|
|
19
19
|
// AMD PCI device IDs for model name resolution
|
|
20
20
|
static AMD_DEVICE_IDS = {
|
|
21
|
+
// RDNA 4 / Radeon AI PRO
|
|
22
|
+
'7551': { name: 'AMD Radeon AI PRO R9700', vram: 32 },
|
|
21
23
|
// RDNA 3 (RX 7000 series)
|
|
22
24
|
'744c': { name: 'AMD Radeon RX 7900 XTX', vram: 24 },
|
|
23
25
|
'7448': { name: 'AMD Radeon RX 7900 XT', vram: 20 },
|
|
@@ -546,8 +548,17 @@ class ROCmDetector {
|
|
|
546
548
|
gfxVersion: null
|
|
547
549
|
};
|
|
548
550
|
|
|
551
|
+
// RDNA 4 / Radeon AI PRO
|
|
552
|
+
if (nameLower.includes('r9700') || nameLower.includes('ai pro') ||
|
|
553
|
+
nameLower.includes('gfx1200') || nameLower.includes('gfx1201')) {
|
|
554
|
+
capabilities.bf16 = true;
|
|
555
|
+
capabilities.matrixCores = true;
|
|
556
|
+
capabilities.infinityCache = true;
|
|
557
|
+
capabilities.architecture = 'RDNA 4';
|
|
558
|
+
capabilities.gfxVersion = 'gfx1200';
|
|
559
|
+
}
|
|
549
560
|
// RDNA 3 (RX 7000 series)
|
|
550
|
-
if (nameLower.includes('7900') || nameLower.includes('7800') ||
|
|
561
|
+
else if (nameLower.includes('7900') || nameLower.includes('7800') ||
|
|
551
562
|
nameLower.includes('7700') || nameLower.includes('7600') ||
|
|
552
563
|
nameLower.includes('gfx1100') || nameLower.includes('gfx1101') ||
|
|
553
564
|
nameLower.includes('gfx1102')) {
|
|
@@ -598,6 +609,9 @@ class ROCmDetector {
|
|
|
598
609
|
estimateVRAMFromModel(name) {
|
|
599
610
|
const nameLower = (name || '').toLowerCase();
|
|
600
611
|
|
|
612
|
+
// RDNA 4 / Radeon AI PRO
|
|
613
|
+
if (nameLower.includes('r9700') || nameLower.includes('ai pro r9700')) return 32;
|
|
614
|
+
|
|
601
615
|
// RX 7000 series
|
|
602
616
|
if (nameLower.includes('7900 xtx')) return 24;
|
|
603
617
|
if (nameLower.includes('7900 xt')) return 20;
|
|
@@ -634,6 +648,7 @@ class ROCmDetector {
|
|
|
634
648
|
estimateVRAMFromGfxName(name) {
|
|
635
649
|
const nameLower = (name || '').toLowerCase();
|
|
636
650
|
|
|
651
|
+
if (nameLower.includes('gfx1200') || nameLower.includes('gfx1201')) return 32; // Radeon AI PRO R9700
|
|
637
652
|
if (nameLower.includes('gfx1100')) return 24; // RX 7900 XTX
|
|
638
653
|
if (nameLower.includes('gfx1101')) return 16; // RX 7800
|
|
639
654
|
if (nameLower.includes('gfx1102')) return 8; // RX 7600
|
|
@@ -654,6 +669,10 @@ class ROCmDetector {
|
|
|
654
669
|
|
|
655
670
|
// Speed coefficients (tokens/sec per B params at Q4)
|
|
656
671
|
const speedMap = {
|
|
672
|
+
// RDNA 4 / Radeon AI PRO
|
|
673
|
+
'r9700': 230,
|
|
674
|
+
'ai pro r9700': 230,
|
|
675
|
+
|
|
657
676
|
// RX 7000 series (RDNA 3)
|
|
658
677
|
'7900 xtx': 200,
|
|
659
678
|
'7900 xt': 180,
|
package/src/hardware/detector.js
CHANGED
|
@@ -86,15 +86,40 @@ class HardwareDetector {
|
|
|
86
86
|
processGPUInfo(graphics) {
|
|
87
87
|
const controllers = graphics.controllers || [];
|
|
88
88
|
const displays = graphics.displays || [];
|
|
89
|
+
|
|
90
|
+
// Enrich weak/placeholder controller entries with device-id fallback.
|
|
91
|
+
const normalizedControllers = controllers.map((gpu) => {
|
|
92
|
+
const normalized = { ...gpu };
|
|
93
|
+
const originalModel = (gpu.model || '').trim();
|
|
94
|
+
const modelLower = originalModel.toLowerCase();
|
|
95
|
+
|
|
96
|
+
const hasGenericModel = !originalModel ||
|
|
97
|
+
modelLower === 'unknown' ||
|
|
98
|
+
modelLower.includes('nvidia corporation device') ||
|
|
99
|
+
/^device\s+[0-9a-f]{4}$/i.test(originalModel);
|
|
100
|
+
|
|
101
|
+
if (hasGenericModel && gpu.deviceId) {
|
|
102
|
+
const mappedModel = this.getGPUModelFromDeviceId(gpu.deviceId);
|
|
103
|
+
if (mappedModel) {
|
|
104
|
+
normalized.model = mappedModel;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if ((!normalized.vendor || normalized.vendor.trim() === '') && normalized.model) {
|
|
109
|
+
normalized.vendor = this.inferVendorFromGPUModel(normalized.model, '');
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return normalized;
|
|
113
|
+
});
|
|
89
114
|
|
|
90
115
|
// Debug logging to help diagnose GPU detection issues
|
|
91
116
|
if (process.env.DEBUG_GPU) {
|
|
92
|
-
console.log('GPU Detection Debug:', JSON.stringify(
|
|
117
|
+
console.log('GPU Detection Debug:', JSON.stringify(normalizedControllers, null, 2));
|
|
93
118
|
}
|
|
94
119
|
|
|
95
120
|
|
|
96
121
|
// Filter out invalid/virtualized GPUs first
|
|
97
|
-
const validGPUs =
|
|
122
|
+
const validGPUs = normalizedControllers.filter(gpu => {
|
|
98
123
|
const model = (gpu.model || '').toLowerCase();
|
|
99
124
|
const vendor = (gpu.vendor || '').toLowerCase();
|
|
100
125
|
const hasKnownModelSignature = this.looksLikeRealGPUModel(model);
|
|
@@ -199,7 +224,7 @@ class HardwareDetector {
|
|
|
199
224
|
driverVersion: primaryGPU.driverVersion || 'Unknown',
|
|
200
225
|
gpuCount: gpuCount > 0 ? gpuCount : (dedicatedGPUs.length > 0 ? dedicatedGPUs.length : 1),
|
|
201
226
|
isMultiGPU: gpuCount > 1,
|
|
202
|
-
all:
|
|
227
|
+
all: normalizedControllers.map(gpu => ({
|
|
203
228
|
model: gpu.model,
|
|
204
229
|
vram: this.normalizeVRAM(gpu.vram || 0),
|
|
205
230
|
vendor: gpu.vendor || this.inferVendorFromGPUModel(gpu.model, 'Unknown')
|
|
@@ -230,7 +255,7 @@ class HardwareDetector {
|
|
|
230
255
|
const perGPUVRAM = backendGPUs[0]?.memory?.total
|
|
231
256
|
|| (gpuCount > 0 && totalVRAM > 0 ? Math.round(totalVRAM / gpuCount) : 0);
|
|
232
257
|
|
|
233
|
-
const modelFromUnified = summary.gpuModel || systemInfo.gpu.model;
|
|
258
|
+
const modelFromUnified = summary.gpuInventory || summary.gpuModel || systemInfo.gpu.model;
|
|
234
259
|
const vendor = this.inferVendorFromGPUModel(modelFromUnified, systemInfo.gpu.vendor);
|
|
235
260
|
|
|
236
261
|
systemInfo.gpu = {
|
|
@@ -242,6 +267,7 @@ class HardwareDetector {
|
|
|
242
267
|
dedicated: primaryType !== 'metal',
|
|
243
268
|
gpuCount,
|
|
244
269
|
isMultiGPU: Boolean(summary.isMultiGPU || gpuCount > 1),
|
|
270
|
+
gpuInventory: summary.gpuInventory || null,
|
|
245
271
|
backend: primaryType,
|
|
246
272
|
driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
|
|
247
273
|
};
|
|
@@ -315,10 +341,14 @@ class HardwareDetector {
|
|
|
315
341
|
getGPUModelFromDeviceId(deviceId) {
|
|
316
342
|
if (!deviceId) return null;
|
|
317
343
|
|
|
318
|
-
// Normalize device ID (
|
|
319
|
-
|
|
344
|
+
// Normalize device ID (handle "0x1B82", "10de:1b82", and raw variants)
|
|
345
|
+
let normalizedId = deviceId.toLowerCase().replace('0x', '');
|
|
346
|
+
const trailingHexMatch = normalizedId.match(/([0-9a-f]{4})$/);
|
|
347
|
+
if (trailingHexMatch) {
|
|
348
|
+
normalizedId = trailingHexMatch[1];
|
|
349
|
+
}
|
|
320
350
|
|
|
321
|
-
//
|
|
351
|
+
// Known PCI device-id mappings (subset, focused on common LLM hardware)
|
|
322
352
|
const deviceIdMap = {
|
|
323
353
|
'2d04': 'NVIDIA GeForce RTX 5060 Ti',
|
|
324
354
|
'2d05': 'NVIDIA GeForce RTX 5060',
|
|
@@ -327,7 +357,7 @@ class HardwareDetector {
|
|
|
327
357
|
'2d08': 'NVIDIA GeForce RTX 5080',
|
|
328
358
|
'2d09': 'NVIDIA GeForce RTX 5090',
|
|
329
359
|
|
|
330
|
-
// NVIDIA RTX 40 series
|
|
360
|
+
// NVIDIA RTX 40 series
|
|
331
361
|
'2684': 'NVIDIA GeForce RTX 4090',
|
|
332
362
|
'2685': 'NVIDIA GeForce RTX 4080',
|
|
333
363
|
'2786': 'NVIDIA GeForce RTX 4070 Ti',
|
|
@@ -335,12 +365,32 @@ class HardwareDetector {
|
|
|
335
365
|
'27a0': 'NVIDIA GeForce RTX 4060 Ti',
|
|
336
366
|
'27a1': 'NVIDIA GeForce RTX 4060',
|
|
337
367
|
|
|
338
|
-
// NVIDIA RTX 30 series
|
|
368
|
+
// NVIDIA RTX 30 series
|
|
339
369
|
'2204': 'NVIDIA GeForce RTX 3090',
|
|
340
370
|
'2206': 'NVIDIA GeForce RTX 3080',
|
|
341
371
|
'2484': 'NVIDIA GeForce RTX 3070',
|
|
342
372
|
'2487': 'NVIDIA GeForce RTX 3060 Ti',
|
|
343
|
-
'2504': 'NVIDIA GeForce RTX 3060'
|
|
373
|
+
'2504': 'NVIDIA GeForce RTX 3060',
|
|
374
|
+
|
|
375
|
+
// NVIDIA Pascal (Issue #35)
|
|
376
|
+
'1b82': 'NVIDIA GeForce GTX 1070 Ti',
|
|
377
|
+
'1b81': 'NVIDIA GeForce GTX 1070',
|
|
378
|
+
'1b80': 'NVIDIA GeForce GTX 1080',
|
|
379
|
+
|
|
380
|
+
// AMD RDNA 3 / RDNA 2
|
|
381
|
+
'744c': 'AMD Radeon RX 7900 XTX',
|
|
382
|
+
'7448': 'AMD Radeon RX 7900 XT',
|
|
383
|
+
'7460': 'AMD Radeon RX 7900 GRE',
|
|
384
|
+
'7480': 'AMD Radeon RX 7800 XT',
|
|
385
|
+
'7481': 'AMD Radeon RX 7700 XT',
|
|
386
|
+
'7483': 'AMD Radeon RX 7600',
|
|
387
|
+
'7484': 'AMD Radeon RX 7600 XT',
|
|
388
|
+
'73a3': 'AMD Radeon RX 6800 XT',
|
|
389
|
+
'73a2': 'AMD Radeon RX 6800',
|
|
390
|
+
'73df': 'AMD Radeon RX 6700 XT',
|
|
391
|
+
|
|
392
|
+
// AMD Radeon AI PRO
|
|
393
|
+
'7551': 'AMD Radeon AI PRO R9700'
|
|
344
394
|
};
|
|
345
395
|
|
|
346
396
|
return deviceIdMap[normalizedId] || null;
|
|
@@ -383,6 +433,13 @@ class HardwareDetector {
|
|
|
383
433
|
if (modelLower.includes('rx 7800')) return 16;
|
|
384
434
|
if (modelLower.includes('rx 7700')) return 12;
|
|
385
435
|
if (modelLower.includes('rx 7600')) return 8;
|
|
436
|
+
if (modelLower.includes('r9700') || modelLower.includes('ai pro r9700')) return 32;
|
|
437
|
+
|
|
438
|
+
// NVIDIA GTX Pascal
|
|
439
|
+
if (modelLower.includes('gtx 1080 ti')) return 11;
|
|
440
|
+
if (modelLower.includes('gtx 1080')) return 8;
|
|
441
|
+
if (modelLower.includes('gtx 1070 ti')) return 8;
|
|
442
|
+
if (modelLower.includes('gtx 1070')) return 8;
|
|
386
443
|
|
|
387
444
|
// Generic estimates
|
|
388
445
|
if (modelLower.includes('rtx')) return 8; // Default for RTX
|
|
@@ -462,9 +519,13 @@ class HardwareDetector {
|
|
|
462
519
|
else if (model.includes('rtx 4070')) score += 20;
|
|
463
520
|
else if (model.includes('rtx 30')) score += 18;
|
|
464
521
|
else if (model.includes('rtx 20')) score += 15;
|
|
522
|
+
else if (model.includes('gtx 1080')) score += 14;
|
|
523
|
+
else if (model.includes('gtx 1070 ti')) score += 13;
|
|
524
|
+
else if (model.includes('gtx 1070')) score += 12;
|
|
465
525
|
else if (model.includes('gtx 16')) score += 12;
|
|
466
526
|
else if (model.includes('tesla p100') || model.includes('p100')) score += 14;
|
|
467
527
|
else if (model.includes('apple m')) score += 15;
|
|
528
|
+
else if (model.includes('r9700') || model.includes('ai pro r9700')) score += 23;
|
|
468
529
|
|
|
469
530
|
return Math.min(Math.round(score), 100);
|
|
470
531
|
}
|
|
@@ -563,6 +624,9 @@ class HardwareDetector {
|
|
|
563
624
|
if (modelLower.includes('rtx 3090')) return 85;
|
|
564
625
|
if (modelLower.includes('rtx 30')) return 80;
|
|
565
626
|
if (modelLower.includes('rtx 20')) return 70;
|
|
627
|
+
if (modelLower.includes('gtx 1080')) return 58;
|
|
628
|
+
if (modelLower.includes('gtx 1070 ti')) return 56;
|
|
629
|
+
if (modelLower.includes('gtx 1070')) return 54;
|
|
566
630
|
if (modelLower.includes('gtx 16')) return 60;
|
|
567
631
|
if (modelLower.includes('gtx 10')) return 50;
|
|
568
632
|
|
|
@@ -579,6 +643,7 @@ class HardwareDetector {
|
|
|
579
643
|
if (modelLower.includes('rx 7700')) return 75;
|
|
580
644
|
if (modelLower.includes('rx 6900')) return 70;
|
|
581
645
|
if (modelLower.includes('rx 6800')) return 65;
|
|
646
|
+
if (modelLower.includes('r9700') || modelLower.includes('ai pro r9700')) return 88;
|
|
582
647
|
|
|
583
648
|
// Intel
|
|
584
649
|
if (modelLower.includes('arc a7')) return 55;
|
|
@@ -199,6 +199,9 @@ class UnifiedDetector {
|
|
|
199
199
|
isMultiGPU: false,
|
|
200
200
|
gpuCount: 0,
|
|
201
201
|
gpuModel: null,
|
|
202
|
+
gpuInventory: null,
|
|
203
|
+
gpuModels: [],
|
|
204
|
+
hasHeterogeneousGPU: false,
|
|
202
205
|
cpuModel: result.cpu?.brand || 'Unknown',
|
|
203
206
|
systemRAM: require('os').totalmem() / (1024 ** 3)
|
|
204
207
|
};
|
|
@@ -206,18 +209,26 @@ class UnifiedDetector {
|
|
|
206
209
|
const primary = result.primary;
|
|
207
210
|
|
|
208
211
|
if (primary?.type === 'cuda' && primary.info) {
|
|
212
|
+
const inventory = this.summarizeGPUInventory(primary.info.gpus);
|
|
209
213
|
summary.totalVRAM = primary.info.totalVRAM;
|
|
210
214
|
summary.gpuCount = primary.info.gpus.length;
|
|
211
215
|
summary.isMultiGPU = primary.info.isMultiGPU;
|
|
212
216
|
summary.speedCoefficient = primary.info.speedCoefficient;
|
|
213
|
-
summary.gpuModel =
|
|
217
|
+
summary.gpuModel = inventory.primaryModel || 'NVIDIA GPU';
|
|
218
|
+
summary.gpuInventory = inventory.displayName || summary.gpuModel;
|
|
219
|
+
summary.gpuModels = inventory.models;
|
|
220
|
+
summary.hasHeterogeneousGPU = inventory.isHeterogeneous;
|
|
214
221
|
}
|
|
215
222
|
else if (primary?.type === 'rocm' && primary.info) {
|
|
223
|
+
const inventory = this.summarizeGPUInventory(primary.info.gpus);
|
|
216
224
|
summary.totalVRAM = primary.info.totalVRAM;
|
|
217
225
|
summary.gpuCount = primary.info.gpus.length;
|
|
218
226
|
summary.isMultiGPU = primary.info.isMultiGPU;
|
|
219
227
|
summary.speedCoefficient = primary.info.speedCoefficient;
|
|
220
|
-
summary.gpuModel =
|
|
228
|
+
summary.gpuModel = inventory.primaryModel || 'AMD GPU';
|
|
229
|
+
summary.gpuInventory = inventory.displayName || summary.gpuModel;
|
|
230
|
+
summary.gpuModels = inventory.models;
|
|
231
|
+
summary.hasHeterogeneousGPU = inventory.isHeterogeneous;
|
|
221
232
|
}
|
|
222
233
|
else if (primary?.type === 'metal' && primary.info) {
|
|
223
234
|
// Apple Silicon uses unified memory
|
|
@@ -225,12 +236,18 @@ class UnifiedDetector {
|
|
|
225
236
|
summary.gpuCount = 1;
|
|
226
237
|
summary.speedCoefficient = primary.info.speedCoefficient;
|
|
227
238
|
summary.gpuModel = primary.info.chip || 'Apple Silicon';
|
|
239
|
+
summary.gpuInventory = summary.gpuModel;
|
|
240
|
+
summary.gpuModels = [{ name: summary.gpuModel, count: 1 }];
|
|
228
241
|
}
|
|
229
242
|
else if (primary?.type === 'intel' && primary.info) {
|
|
243
|
+
const inventory = this.summarizeGPUInventory(primary.info.gpus);
|
|
230
244
|
summary.totalVRAM = primary.info.totalVRAM;
|
|
231
245
|
summary.gpuCount = primary.info.gpus.filter(g => g.type === 'dedicated').length;
|
|
232
246
|
summary.speedCoefficient = primary.info.speedCoefficient;
|
|
233
|
-
summary.gpuModel =
|
|
247
|
+
summary.gpuModel = inventory.primaryModel || 'Intel GPU';
|
|
248
|
+
summary.gpuInventory = inventory.displayName || summary.gpuModel;
|
|
249
|
+
summary.gpuModels = inventory.models;
|
|
250
|
+
summary.hasHeterogeneousGPU = inventory.isHeterogeneous;
|
|
234
251
|
}
|
|
235
252
|
else if (result.cpu) {
|
|
236
253
|
summary.speedCoefficient = result.cpu.speedCoefficient;
|
|
@@ -248,6 +265,27 @@ class UnifiedDetector {
|
|
|
248
265
|
return summary;
|
|
249
266
|
}
|
|
250
267
|
|
|
268
|
+
summarizeGPUInventory(gpus = []) {
|
|
269
|
+
const counts = new Map();
|
|
270
|
+
|
|
271
|
+
for (const gpu of gpus) {
|
|
272
|
+
const name = (gpu?.name || 'Unknown GPU').replace(/\s+/g, ' ').trim();
|
|
273
|
+
counts.set(name, (counts.get(name) || 0) + 1);
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const models = Array.from(counts.entries()).map(([name, count]) => ({ name, count }));
|
|
277
|
+
const displayName = models
|
|
278
|
+
.map(({ name, count }) => (count > 1 ? `${count}x ${name}` : name))
|
|
279
|
+
.join(' + ');
|
|
280
|
+
|
|
281
|
+
return {
|
|
282
|
+
primaryModel: models[0]?.name || null,
|
|
283
|
+
displayName: displayName || null,
|
|
284
|
+
models,
|
|
285
|
+
isHeterogeneous: models.length > 1
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
|
|
251
289
|
/**
|
|
252
290
|
* Generate hardware fingerprint for benchmarks
|
|
253
291
|
*/
|
|
@@ -391,22 +429,23 @@ class UnifiedDetector {
|
|
|
391
429
|
const summary = result.summary;
|
|
392
430
|
|
|
393
431
|
if (summary.bestBackend === 'cuda') {
|
|
394
|
-
const gpuDesc = summary.
|
|
395
|
-
? `${summary.gpuCount}x ${summary.gpuModel}`
|
|
396
|
-
|
|
432
|
+
const gpuDesc = summary.gpuInventory || (
|
|
433
|
+
summary.isMultiGPU ? `${summary.gpuCount}x ${summary.gpuModel}` : summary.gpuModel
|
|
434
|
+
);
|
|
397
435
|
return `${gpuDesc} (${summary.totalVRAM}GB VRAM) + ${summary.cpuModel}`;
|
|
398
436
|
}
|
|
399
437
|
else if (summary.bestBackend === 'rocm') {
|
|
400
|
-
const gpuDesc = summary.
|
|
401
|
-
? `${summary.gpuCount}x ${summary.gpuModel}`
|
|
402
|
-
|
|
438
|
+
const gpuDesc = summary.gpuInventory || (
|
|
439
|
+
summary.isMultiGPU ? `${summary.gpuCount}x ${summary.gpuModel}` : summary.gpuModel
|
|
440
|
+
);
|
|
403
441
|
return `${gpuDesc} (${summary.totalVRAM}GB VRAM) + ${summary.cpuModel}`;
|
|
404
442
|
}
|
|
405
443
|
else if (summary.bestBackend === 'metal') {
|
|
406
444
|
return `${summary.gpuModel} (${summary.totalVRAM}GB Unified Memory)`;
|
|
407
445
|
}
|
|
408
446
|
else if (summary.bestBackend === 'intel') {
|
|
409
|
-
|
|
447
|
+
const gpuDesc = summary.gpuInventory || summary.gpuModel;
|
|
448
|
+
return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
|
|
410
449
|
}
|
|
411
450
|
else {
|
|
412
451
|
return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
|
package/src/index.js
CHANGED
|
@@ -258,7 +258,10 @@ class LLMChecker {
|
|
|
258
258
|
this.progress.step('Smart Recommendations', 'Generating personalized model suggestions...');
|
|
259
259
|
}
|
|
260
260
|
|
|
261
|
-
const recommendations = await this.generateIntelligentRecommendations(hardware
|
|
261
|
+
const recommendations = await this.generateIntelligentRecommendations(hardware, {
|
|
262
|
+
optimizeFor: options.optimizeFor || options.optimize,
|
|
263
|
+
runtime: options.runtime
|
|
264
|
+
});
|
|
262
265
|
const intelligentRecommendations = recommendations;
|
|
263
266
|
|
|
264
267
|
if (this.progress) {
|
|
@@ -2382,9 +2385,10 @@ class LLMChecker {
|
|
|
2382
2385
|
}
|
|
2383
2386
|
|
|
2384
2387
|
|
|
2385
|
-
async generateIntelligentRecommendations(hardware) {
|
|
2388
|
+
async generateIntelligentRecommendations(hardware, options = {}) {
|
|
2386
2389
|
try {
|
|
2387
2390
|
this.logger.info('Generating intelligent recommendations...');
|
|
2391
|
+
const selectedRuntime = normalizeRuntime(options.runtime || 'ollama');
|
|
2388
2392
|
|
|
2389
2393
|
// Obtener todos los modelos de Ollama
|
|
2390
2394
|
const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
|
|
@@ -2396,14 +2400,25 @@ class LLMChecker {
|
|
|
2396
2400
|
}
|
|
2397
2401
|
|
|
2398
2402
|
// Generar recomendaciones inteligentes
|
|
2399
|
-
const
|
|
2400
|
-
const
|
|
2403
|
+
const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
|
|
2404
|
+
const recommendations = await this.intelligentRecommender.getBestModelsForHardware(
|
|
2405
|
+
hardware,
|
|
2406
|
+
allModels,
|
|
2407
|
+
{ optimizeFor, runtime: selectedRuntime }
|
|
2408
|
+
);
|
|
2409
|
+
const summary = this.intelligentRecommender.generateRecommendationSummary(
|
|
2410
|
+
recommendations,
|
|
2411
|
+
hardware,
|
|
2412
|
+
{ optimizeFor }
|
|
2413
|
+
);
|
|
2401
2414
|
|
|
2402
2415
|
this.logger.info(`Generated recommendations for ${Object.keys(recommendations).length} categories`);
|
|
2403
2416
|
|
|
2404
2417
|
return {
|
|
2405
2418
|
recommendations,
|
|
2406
2419
|
summary,
|
|
2420
|
+
optimizeFor: summary.optimize_for || optimizeFor,
|
|
2421
|
+
runtime: selectedRuntime,
|
|
2407
2422
|
totalModelsAnalyzed: allModels.length,
|
|
2408
2423
|
generatedAt: new Date().toISOString()
|
|
2409
2424
|
};
|