llm-checker 3.2.4 → 3.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -93,14 +93,19 @@ npm install sql.js
93
93
 
94
94
  LLM Checker is published in all primary channels:
95
95
 
96
- - npm (latest): [`llm-checker@3.2.4`](https://www.npmjs.com/package/llm-checker)
97
- - GitHub Release: [`v3.2.4`](https://github.com/Pavelevich/llm-checker/releases/tag/v3.2.4)
96
+ - npm (latest): [`llm-checker@latest`](https://www.npmjs.com/package/llm-checker)
97
+ - GitHub Releases: [Release history](https://github.com/Pavelevich/llm-checker/releases)
98
98
  - GitHub Packages: [`@pavelevich/llm-checker`](https://github.com/users/Pavelevich/packages/npm/package/llm-checker)
99
99
 
100
- ### v3.2.4 Highlights
100
+ ### v3.2.6 Highlights
101
101
 
102
- - Fixed `recommend` hardware-profile handling so discrete VRAM limits are honored consistently.
103
- - Added deterministic selector regression coverage for 24GB VRAM fit behavior.
102
+ - Recommendation engine now enforces feasible 30B-class coverage on high-capacity discrete multi-GPU setups (for non-speed objectives).
103
+ - Heterogeneous GPU inventories are preserved in output summaries and downstream recommendation inputs.
104
+ - Added and validated fallback mappings/paths for:
105
+ - AMD Radeon AI PRO R9700 (PCI ID `7551`)
106
+ - NVIDIA GTX 1070 Ti (device `1b82`)
107
+ - Linux RX 7900 XTX detection via non-ROCm fallbacks (`lspci`/`sysfs`)
108
+ - Expanded deterministic and hardware regression coverage for multi-GPU and unified-memory edge cases.
104
109
 
105
110
  ### Optional: Install from GitHub Packages
106
111
 
@@ -110,7 +115,7 @@ echo "@pavelevich:registry=https://npm.pkg.github.com" >> ~/.npmrc
110
115
  echo "//npm.pkg.github.com/:_authToken=${GITHUB_TOKEN}" >> ~/.npmrc
111
116
 
112
117
  # 2) Install
113
- npm install -g @pavelevich/llm-checker@3.2.4
118
+ npm install -g @pavelevich/llm-checker@latest
114
119
  ```
115
120
 
116
121
  ---
@@ -356,6 +361,16 @@ Metal:
356
361
  llm-checker recommend
357
362
  ```
358
363
 
364
+ Use optimization profiles to steer ranking by intent:
365
+
366
+ ```bash
367
+ llm-checker recommend --optimize balanced
368
+ llm-checker recommend --optimize speed
369
+ llm-checker recommend --optimize quality
370
+ llm-checker recommend --optimize context
371
+ llm-checker recommend --optimize coding
372
+ ```
373
+
359
374
  ```
360
375
  INTELLIGENT RECOMMENDATIONS BY CATEGORY
361
376
  Hardware Tier: HIGH | Models Analyzed: 205
@@ -465,6 +480,48 @@ Memory requirements are calculated using calibrated bytes-per-parameter values:
465
480
 
466
481
  The selector automatically picks the best quantization that fits your available memory.
467
482
 
483
+ For MoE models, deterministic memory estimation supports explicit sparse metadata when present:
484
+
485
+ - `total_params_b`
486
+ - `active_params_b`
487
+ - `expert_count`
488
+ - `experts_active_per_token`
489
+
490
+ Normalized recommendation variants expose both snake_case and camelCase metadata aliases
491
+ (for example: `total_params_b` + `totalParamsB`) when available.
492
+
493
+ MoE parameter path selection is deterministic and uses this fallback order:
494
+
495
+ 1. `active_params_b` (assumption source: `moe_active_metadata`)
496
+ 2. `total_params_b * (experts_active_per_token / expert_count)` (assumption source: `moe_derived_expert_ratio`)
497
+ 3. `total_params_b` (assumption source: `moe_fallback_total_params`)
498
+ 4. Model `paramsB` fallback (assumption source: `moe_fallback_model_params`)
499
+
500
+ Dense models continue to use the dense parameter path (`dense_params`) unchanged.
501
+
502
+ When `active_params_b` (or a derived active-ratio path) is available, inference memory
503
+ uses the sparse-active parameter estimate even if artifact size metadata is present.
504
+
505
+ ### Runtime-Aware MoE Speed Estimation
506
+
507
+ MoE speed estimates now include runtime-specific overhead assumptions (routing, communication, offload), instead of using a single fixed MoE boost.
508
+
509
+ - Canonical helper: `src/models/moe-assumptions.js`
510
+ - Applied in both:
511
+ - `src/models/deterministic-selector.js`
512
+ - `src/models/scoring-engine.js`
513
+
514
+ Current runtime profiles:
515
+
516
+ | Runtime | Routing | Communication | Offload | Max Effective Gain |
517
+ |:--------|:-------:|:-------------:|:-------:|:------------------:|
518
+ | `ollama` | 18% | 13% | 8% | 2.35x |
519
+ | `vllm` | 12% | 8% | 4% | 2.65x |
520
+ | `mlx` | 16% | 10% | 5% | 2.45x |
521
+ | `llama.cpp` | 20% | 14% | 9% | 2.30x |
522
+
523
+ Recommendation outputs now expose these assumptions through runtime metadata and MoE speed diagnostics.
524
+
468
525
  ---
469
526
 
470
527
  ## Supported Hardware
@@ -1027,11 +1027,13 @@ function displayIntelligentRecommendations(intelligentData) {
1027
1027
 
1028
1028
  const { summary, recommendations } = intelligentData;
1029
1029
  const tier = summary.hardware_tier.replace('_', ' ').toUpperCase();
1030
+ const optimizeProfile = (summary.optimize_for || intelligentData.optimizeFor || 'balanced').toUpperCase();
1030
1031
  const tierColor = tier.includes('HIGH') ? chalk.green : tier.includes('MEDIUM') ? chalk.yellow : chalk.red;
1031
1032
 
1032
1033
  console.log('\n' + chalk.bgRed.white.bold(' INTELLIGENT RECOMMENDATIONS BY CATEGORY '));
1033
1034
  console.log(chalk.red('╭' + '─'.repeat(65)));
1034
1035
  console.log(chalk.red('│') + ` Hardware Tier: ${tierColor.bold(tier)} | Models Analyzed: ${chalk.cyan.bold(intelligentData.totalModelsAnalyzed)}`);
1036
+ console.log(chalk.red('│') + ` Optimization: ${chalk.magenta.bold(optimizeProfile)}`);
1035
1037
  console.log(chalk.red('│'));
1036
1038
 
1037
1039
  // Mostrar mejor modelo general
@@ -1040,6 +1042,7 @@ function displayIntelligentRecommendations(intelligentData) {
1040
1042
  console.log(chalk.red('│') + ` ${chalk.bold.yellow('BEST OVERALL:')} ${chalk.green.bold(best.name)}`);
1041
1043
  console.log(chalk.red('│') + ` Command: ${chalk.cyan.bold(best.command)}`);
1042
1044
  console.log(chalk.red('│') + ` Score: ${chalk.yellow.bold(best.score)}/100 | Category: ${chalk.magenta(best.category)}`);
1045
+ console.log(chalk.red('│') + ` Quantization: ${chalk.white.bold(best.quantization || 'Q4_K_M')}`);
1043
1046
  console.log(chalk.red('│'));
1044
1047
  }
1045
1048
 
@@ -1062,6 +1065,7 @@ function displayIntelligentRecommendations(intelligentData) {
1062
1065
  console.log(chalk.red('│') + ` ${chalk.bold.white(categoryName)} (${icon}):`);
1063
1066
  console.log(chalk.red('│') + ` ${chalk.green(model.name)} (${model.size})`);
1064
1067
  console.log(chalk.red('│') + ` Score: ${scoreColor.bold(model.score)}/100 | Pulls: ${chalk.gray(model.pulls?.toLocaleString() || 'N/A')}`);
1068
+ console.log(chalk.red('│') + ` Quantization: ${chalk.white.bold(model.quantization || 'Q4_K_M')}`);
1065
1069
  console.log(chalk.red('│') + ` Command: ${chalk.cyan.bold(model.command)}`);
1066
1070
  console.log(chalk.red('│'));
1067
1071
  });
@@ -2303,6 +2307,7 @@ auditCommand
2303
2307
  .option('--out-dir <path>', 'Output directory when --out is omitted', 'audit-reports')
2304
2308
  .option('-u, --use-case <case>', 'Use case when --command check is selected', 'general')
2305
2309
  .option('-c, --category <category>', 'Category hint when --command recommend is selected')
2310
+ .option('--optimize <profile>', 'Optimization profile for recommend mode (balanced|speed|quality|context|coding)', 'balanced')
2306
2311
  .option('--runtime <runtime>', `Runtime for check mode (${SUPPORTED_RUNTIMES.join('|')})`, 'ollama')
2307
2312
  .option('--include-cloud', 'Include cloud models in check-mode analysis')
2308
2313
  .option('--max-size <size>', 'Maximum model size for check mode (e.g., "24B" or "12GB")')
@@ -2356,7 +2361,9 @@ auditCommand
2356
2361
  runtimeBackend = selectedRuntime;
2357
2362
  policyCandidates = collectCandidatesFromAnalysis(analysisResult);
2358
2363
  } else {
2359
- recommendationResult = await checker.generateIntelligentRecommendations(hardware);
2364
+ recommendationResult = await checker.generateIntelligentRecommendations(hardware, {
2365
+ optimizeFor: options.optimize
2366
+ });
2360
2367
  if (!recommendationResult) {
2361
2368
  throw new Error('Unable to generate recommendation data for policy audit export.');
2362
2369
  }
@@ -2390,6 +2397,7 @@ auditCommand
2390
2397
  runtime: runtimeBackend,
2391
2398
  use_case: selectedCommand === 'check' ? normalizeUseCaseInput(options.useCase) : null,
2392
2399
  category: selectedCommand === 'recommend' ? options.category || null : null,
2400
+ optimize: selectedCommand === 'recommend' ? options.optimize || 'balanced' : null,
2393
2401
  include_cloud: Boolean(options.includeCloud)
2394
2402
  },
2395
2403
  hardware
@@ -2798,6 +2806,7 @@ program
2798
2806
  .command('recommend')
2799
2807
  .description('Get intelligent model recommendations for your hardware')
2800
2808
  .option('-c, --category <category>', 'Get recommendations for specific category (coding, talking, reading, etc.)')
2809
+ .option('--optimize <profile>', 'Optimization profile (balanced|speed|quality|context|coding)', 'balanced')
2801
2810
  .option('--no-verbose', 'Disable step-by-step progress display')
2802
2811
  .option('--policy <file>', 'Evaluate recommendations against a policy file')
2803
2812
  .addHelpText(
@@ -2821,7 +2830,9 @@ Enterprise policy examples:
2821
2830
  }
2822
2831
 
2823
2832
  const hardware = await checker.getSystemInfo();
2824
- const intelligentRecommendations = await checker.generateIntelligentRecommendations(hardware);
2833
+ const intelligentRecommendations = await checker.generateIntelligentRecommendations(hardware, {
2834
+ optimizeFor: options.optimize
2835
+ });
2825
2836
 
2826
2837
  if (!intelligentRecommendations) {
2827
2838
  console.error(chalk.red('\nFailed to generate recommendations'));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.2.4",
3
+ "version": "3.2.6",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
6
  "llm-checker": "bin/cli.js",
@@ -10,9 +10,9 @@
10
10
  "main": "src/index.js",
11
11
  "scripts": {
12
12
  "test": "node tests/run-all-tests.js",
13
- "test:gpu": "node tests/gpu-detection/multi-gpu.test.js",
14
- "test:platform": "node tests/platform-tests/cross-platform.test.js",
15
- "test:ui": "node tests/ui-tests/interface.test.js",
13
+ "test:gpu": "node tests/amd-gpu-detection.test.js",
14
+ "test:platform": "node tests/hardware-simulation-tests.js",
15
+ "test:ui": "node tests/ui-cli-smoke.test.js",
16
16
  "test:runtime": "node tests/runtime-specdec-tests.js",
17
17
  "test:deterministic-pool": "node tests/deterministic-model-pool-check.js",
18
18
  "test:policy": "node tests/policy-commands.test.js",
@@ -18,6 +18,8 @@ class ROCmDetector {
18
18
 
19
19
  // AMD PCI device IDs for model name resolution
20
20
  static AMD_DEVICE_IDS = {
21
+ // RDNA 4 / Radeon AI PRO
22
+ '7551': { name: 'AMD Radeon AI PRO R9700', vram: 32 },
21
23
  // RDNA 3 (RX 7000 series)
22
24
  '744c': { name: 'AMD Radeon RX 7900 XTX', vram: 24 },
23
25
  '7448': { name: 'AMD Radeon RX 7900 XT', vram: 20 },
@@ -546,8 +548,17 @@ class ROCmDetector {
546
548
  gfxVersion: null
547
549
  };
548
550
 
551
+ // RDNA 4 / Radeon AI PRO
552
+ if (nameLower.includes('r9700') || nameLower.includes('ai pro') ||
553
+ nameLower.includes('gfx1200') || nameLower.includes('gfx1201')) {
554
+ capabilities.bf16 = true;
555
+ capabilities.matrixCores = true;
556
+ capabilities.infinityCache = true;
557
+ capabilities.architecture = 'RDNA 4';
558
+ capabilities.gfxVersion = 'gfx1200';
559
+ }
549
560
  // RDNA 3 (RX 7000 series)
550
- if (nameLower.includes('7900') || nameLower.includes('7800') ||
561
+ else if (nameLower.includes('7900') || nameLower.includes('7800') ||
551
562
  nameLower.includes('7700') || nameLower.includes('7600') ||
552
563
  nameLower.includes('gfx1100') || nameLower.includes('gfx1101') ||
553
564
  nameLower.includes('gfx1102')) {
@@ -598,6 +609,9 @@ class ROCmDetector {
598
609
  estimateVRAMFromModel(name) {
599
610
  const nameLower = (name || '').toLowerCase();
600
611
 
612
+ // RDNA 4 / Radeon AI PRO
613
+ if (nameLower.includes('r9700') || nameLower.includes('ai pro r9700')) return 32;
614
+
601
615
  // RX 7000 series
602
616
  if (nameLower.includes('7900 xtx')) return 24;
603
617
  if (nameLower.includes('7900 xt')) return 20;
@@ -634,6 +648,7 @@ class ROCmDetector {
634
648
  estimateVRAMFromGfxName(name) {
635
649
  const nameLower = (name || '').toLowerCase();
636
650
 
651
+ if (nameLower.includes('gfx1200') || nameLower.includes('gfx1201')) return 32; // Radeon AI PRO R9700
637
652
  if (nameLower.includes('gfx1100')) return 24; // RX 7900 XTX
638
653
  if (nameLower.includes('gfx1101')) return 16; // RX 7800
639
654
  if (nameLower.includes('gfx1102')) return 8; // RX 7600
@@ -654,6 +669,10 @@ class ROCmDetector {
654
669
 
655
670
  // Speed coefficients (tokens/sec per B params at Q4)
656
671
  const speedMap = {
672
+ // RDNA 4 / Radeon AI PRO
673
+ 'r9700': 230,
674
+ 'ai pro r9700': 230,
675
+
657
676
  // RX 7000 series (RDNA 3)
658
677
  '7900 xtx': 200,
659
678
  '7900 xt': 180,
@@ -86,15 +86,40 @@ class HardwareDetector {
86
86
  processGPUInfo(graphics) {
87
87
  const controllers = graphics.controllers || [];
88
88
  const displays = graphics.displays || [];
89
+
90
+ // Enrich weak/placeholder controller entries with device-id fallback.
91
+ const normalizedControllers = controllers.map((gpu) => {
92
+ const normalized = { ...gpu };
93
+ const originalModel = (gpu.model || '').trim();
94
+ const modelLower = originalModel.toLowerCase();
95
+
96
+ const hasGenericModel = !originalModel ||
97
+ modelLower === 'unknown' ||
98
+ modelLower.includes('nvidia corporation device') ||
99
+ /^device\s+[0-9a-f]{4}$/i.test(originalModel);
100
+
101
+ if (hasGenericModel && gpu.deviceId) {
102
+ const mappedModel = this.getGPUModelFromDeviceId(gpu.deviceId);
103
+ if (mappedModel) {
104
+ normalized.model = mappedModel;
105
+ }
106
+ }
107
+
108
+ if ((!normalized.vendor || normalized.vendor.trim() === '') && normalized.model) {
109
+ normalized.vendor = this.inferVendorFromGPUModel(normalized.model, '');
110
+ }
111
+
112
+ return normalized;
113
+ });
89
114
 
90
115
  // Debug logging to help diagnose GPU detection issues
91
116
  if (process.env.DEBUG_GPU) {
92
- console.log('GPU Detection Debug:', JSON.stringify(controllers, null, 2));
117
+ console.log('GPU Detection Debug:', JSON.stringify(normalizedControllers, null, 2));
93
118
  }
94
119
 
95
120
 
96
121
  // Filter out invalid/virtualized GPUs first
97
- const validGPUs = controllers.filter(gpu => {
122
+ const validGPUs = normalizedControllers.filter(gpu => {
98
123
  const model = (gpu.model || '').toLowerCase();
99
124
  const vendor = (gpu.vendor || '').toLowerCase();
100
125
  const hasKnownModelSignature = this.looksLikeRealGPUModel(model);
@@ -199,7 +224,7 @@ class HardwareDetector {
199
224
  driverVersion: primaryGPU.driverVersion || 'Unknown',
200
225
  gpuCount: gpuCount > 0 ? gpuCount : (dedicatedGPUs.length > 0 ? dedicatedGPUs.length : 1),
201
226
  isMultiGPU: gpuCount > 1,
202
- all: controllers.map(gpu => ({
227
+ all: normalizedControllers.map(gpu => ({
203
228
  model: gpu.model,
204
229
  vram: this.normalizeVRAM(gpu.vram || 0),
205
230
  vendor: gpu.vendor || this.inferVendorFromGPUModel(gpu.model, 'Unknown')
@@ -230,7 +255,7 @@ class HardwareDetector {
230
255
  const perGPUVRAM = backendGPUs[0]?.memory?.total
231
256
  || (gpuCount > 0 && totalVRAM > 0 ? Math.round(totalVRAM / gpuCount) : 0);
232
257
 
233
- const modelFromUnified = summary.gpuModel || systemInfo.gpu.model;
258
+ const modelFromUnified = summary.gpuInventory || summary.gpuModel || systemInfo.gpu.model;
234
259
  const vendor = this.inferVendorFromGPUModel(modelFromUnified, systemInfo.gpu.vendor);
235
260
 
236
261
  systemInfo.gpu = {
@@ -242,6 +267,7 @@ class HardwareDetector {
242
267
  dedicated: primaryType !== 'metal',
243
268
  gpuCount,
244
269
  isMultiGPU: Boolean(summary.isMultiGPU || gpuCount > 1),
270
+ gpuInventory: summary.gpuInventory || null,
245
271
  backend: primaryType,
246
272
  driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
247
273
  };
@@ -315,10 +341,14 @@ class HardwareDetector {
315
341
  getGPUModelFromDeviceId(deviceId) {
316
342
  if (!deviceId) return null;
317
343
 
318
- // Normalize device ID (remove 0x prefix if present and convert to lowercase)
319
- const normalizedId = deviceId.toLowerCase().replace('0x', '');
344
+ // Normalize device ID (handle "0x1B82", "10de:1b82", and raw variants)
345
+ let normalizedId = deviceId.toLowerCase().replace('0x', '');
346
+ const trailingHexMatch = normalizedId.match(/([0-9a-f]{4})$/);
347
+ if (trailingHexMatch) {
348
+ normalizedId = trailingHexMatch[1];
349
+ }
320
350
 
321
- // NVIDIA RTX 50 series device IDs
351
+ // Known PCI device-id mappings (subset, focused on common LLM hardware)
322
352
  const deviceIdMap = {
323
353
  '2d04': 'NVIDIA GeForce RTX 5060 Ti',
324
354
  '2d05': 'NVIDIA GeForce RTX 5060',
@@ -327,7 +357,7 @@ class HardwareDetector {
327
357
  '2d08': 'NVIDIA GeForce RTX 5080',
328
358
  '2d09': 'NVIDIA GeForce RTX 5090',
329
359
 
330
- // NVIDIA RTX 40 series device IDs
360
+ // NVIDIA RTX 40 series
331
361
  '2684': 'NVIDIA GeForce RTX 4090',
332
362
  '2685': 'NVIDIA GeForce RTX 4080',
333
363
  '2786': 'NVIDIA GeForce RTX 4070 Ti',
@@ -335,12 +365,32 @@ class HardwareDetector {
335
365
  '27a0': 'NVIDIA GeForce RTX 4060 Ti',
336
366
  '27a1': 'NVIDIA GeForce RTX 4060',
337
367
 
338
- // NVIDIA RTX 30 series device IDs
368
+ // NVIDIA RTX 30 series
339
369
  '2204': 'NVIDIA GeForce RTX 3090',
340
370
  '2206': 'NVIDIA GeForce RTX 3080',
341
371
  '2484': 'NVIDIA GeForce RTX 3070',
342
372
  '2487': 'NVIDIA GeForce RTX 3060 Ti',
343
- '2504': 'NVIDIA GeForce RTX 3060'
373
+ '2504': 'NVIDIA GeForce RTX 3060',
374
+
375
+ // NVIDIA Pascal (Issue #35)
376
+ '1b82': 'NVIDIA GeForce GTX 1070 Ti',
377
+ '1b81': 'NVIDIA GeForce GTX 1070',
378
+ '1b80': 'NVIDIA GeForce GTX 1080',
379
+
380
+ // AMD RDNA 3 / RDNA 2
381
+ '744c': 'AMD Radeon RX 7900 XTX',
382
+ '7448': 'AMD Radeon RX 7900 XT',
383
+ '7460': 'AMD Radeon RX 7900 GRE',
384
+ '7480': 'AMD Radeon RX 7800 XT',
385
+ '7481': 'AMD Radeon RX 7700 XT',
386
+ '7483': 'AMD Radeon RX 7600',
387
+ '7484': 'AMD Radeon RX 7600 XT',
388
+ '73a3': 'AMD Radeon RX 6800 XT',
389
+ '73a2': 'AMD Radeon RX 6800',
390
+ '73df': 'AMD Radeon RX 6700 XT',
391
+
392
+ // AMD Radeon AI PRO
393
+ '7551': 'AMD Radeon AI PRO R9700'
344
394
  };
345
395
 
346
396
  return deviceIdMap[normalizedId] || null;
@@ -383,6 +433,13 @@ class HardwareDetector {
383
433
  if (modelLower.includes('rx 7800')) return 16;
384
434
  if (modelLower.includes('rx 7700')) return 12;
385
435
  if (modelLower.includes('rx 7600')) return 8;
436
+ if (modelLower.includes('r9700') || modelLower.includes('ai pro r9700')) return 32;
437
+
438
+ // NVIDIA GTX Pascal
439
+ if (modelLower.includes('gtx 1080 ti')) return 11;
440
+ if (modelLower.includes('gtx 1080')) return 8;
441
+ if (modelLower.includes('gtx 1070 ti')) return 8;
442
+ if (modelLower.includes('gtx 1070')) return 8;
386
443
 
387
444
  // Generic estimates
388
445
  if (modelLower.includes('rtx')) return 8; // Default for RTX
@@ -462,9 +519,13 @@ class HardwareDetector {
462
519
  else if (model.includes('rtx 4070')) score += 20;
463
520
  else if (model.includes('rtx 30')) score += 18;
464
521
  else if (model.includes('rtx 20')) score += 15;
522
+ else if (model.includes('gtx 1080')) score += 14;
523
+ else if (model.includes('gtx 1070 ti')) score += 13;
524
+ else if (model.includes('gtx 1070')) score += 12;
465
525
  else if (model.includes('gtx 16')) score += 12;
466
526
  else if (model.includes('tesla p100') || model.includes('p100')) score += 14;
467
527
  else if (model.includes('apple m')) score += 15;
528
+ else if (model.includes('r9700') || model.includes('ai pro r9700')) score += 23;
468
529
 
469
530
  return Math.min(Math.round(score), 100);
470
531
  }
@@ -563,6 +624,9 @@ class HardwareDetector {
563
624
  if (modelLower.includes('rtx 3090')) return 85;
564
625
  if (modelLower.includes('rtx 30')) return 80;
565
626
  if (modelLower.includes('rtx 20')) return 70;
627
+ if (modelLower.includes('gtx 1080')) return 58;
628
+ if (modelLower.includes('gtx 1070 ti')) return 56;
629
+ if (modelLower.includes('gtx 1070')) return 54;
566
630
  if (modelLower.includes('gtx 16')) return 60;
567
631
  if (modelLower.includes('gtx 10')) return 50;
568
632
 
@@ -579,6 +643,7 @@ class HardwareDetector {
579
643
  if (modelLower.includes('rx 7700')) return 75;
580
644
  if (modelLower.includes('rx 6900')) return 70;
581
645
  if (modelLower.includes('rx 6800')) return 65;
646
+ if (modelLower.includes('r9700') || modelLower.includes('ai pro r9700')) return 88;
582
647
 
583
648
  // Intel
584
649
  if (modelLower.includes('arc a7')) return 55;
@@ -199,6 +199,9 @@ class UnifiedDetector {
199
199
  isMultiGPU: false,
200
200
  gpuCount: 0,
201
201
  gpuModel: null,
202
+ gpuInventory: null,
203
+ gpuModels: [],
204
+ hasHeterogeneousGPU: false,
202
205
  cpuModel: result.cpu?.brand || 'Unknown',
203
206
  systemRAM: require('os').totalmem() / (1024 ** 3)
204
207
  };
@@ -206,18 +209,26 @@ class UnifiedDetector {
206
209
  const primary = result.primary;
207
210
 
208
211
  if (primary?.type === 'cuda' && primary.info) {
212
+ const inventory = this.summarizeGPUInventory(primary.info.gpus);
209
213
  summary.totalVRAM = primary.info.totalVRAM;
210
214
  summary.gpuCount = primary.info.gpus.length;
211
215
  summary.isMultiGPU = primary.info.isMultiGPU;
212
216
  summary.speedCoefficient = primary.info.speedCoefficient;
213
- summary.gpuModel = primary.info.gpus[0]?.name || 'NVIDIA GPU';
217
+ summary.gpuModel = inventory.primaryModel || 'NVIDIA GPU';
218
+ summary.gpuInventory = inventory.displayName || summary.gpuModel;
219
+ summary.gpuModels = inventory.models;
220
+ summary.hasHeterogeneousGPU = inventory.isHeterogeneous;
214
221
  }
215
222
  else if (primary?.type === 'rocm' && primary.info) {
223
+ const inventory = this.summarizeGPUInventory(primary.info.gpus);
216
224
  summary.totalVRAM = primary.info.totalVRAM;
217
225
  summary.gpuCount = primary.info.gpus.length;
218
226
  summary.isMultiGPU = primary.info.isMultiGPU;
219
227
  summary.speedCoefficient = primary.info.speedCoefficient;
220
- summary.gpuModel = primary.info.gpus[0]?.name || 'AMD GPU';
228
+ summary.gpuModel = inventory.primaryModel || 'AMD GPU';
229
+ summary.gpuInventory = inventory.displayName || summary.gpuModel;
230
+ summary.gpuModels = inventory.models;
231
+ summary.hasHeterogeneousGPU = inventory.isHeterogeneous;
221
232
  }
222
233
  else if (primary?.type === 'metal' && primary.info) {
223
234
  // Apple Silicon uses unified memory
@@ -225,12 +236,18 @@ class UnifiedDetector {
225
236
  summary.gpuCount = 1;
226
237
  summary.speedCoefficient = primary.info.speedCoefficient;
227
238
  summary.gpuModel = primary.info.chip || 'Apple Silicon';
239
+ summary.gpuInventory = summary.gpuModel;
240
+ summary.gpuModels = [{ name: summary.gpuModel, count: 1 }];
228
241
  }
229
242
  else if (primary?.type === 'intel' && primary.info) {
243
+ const inventory = this.summarizeGPUInventory(primary.info.gpus);
230
244
  summary.totalVRAM = primary.info.totalVRAM;
231
245
  summary.gpuCount = primary.info.gpus.filter(g => g.type === 'dedicated').length;
232
246
  summary.speedCoefficient = primary.info.speedCoefficient;
233
- summary.gpuModel = primary.info.gpus[0]?.name || 'Intel GPU';
247
+ summary.gpuModel = inventory.primaryModel || 'Intel GPU';
248
+ summary.gpuInventory = inventory.displayName || summary.gpuModel;
249
+ summary.gpuModels = inventory.models;
250
+ summary.hasHeterogeneousGPU = inventory.isHeterogeneous;
234
251
  }
235
252
  else if (result.cpu) {
236
253
  summary.speedCoefficient = result.cpu.speedCoefficient;
@@ -248,6 +265,27 @@ class UnifiedDetector {
248
265
  return summary;
249
266
  }
250
267
 
268
+ summarizeGPUInventory(gpus = []) {
269
+ const counts = new Map();
270
+
271
+ for (const gpu of gpus) {
272
+ const name = (gpu?.name || 'Unknown GPU').replace(/\s+/g, ' ').trim();
273
+ counts.set(name, (counts.get(name) || 0) + 1);
274
+ }
275
+
276
+ const models = Array.from(counts.entries()).map(([name, count]) => ({ name, count }));
277
+ const displayName = models
278
+ .map(({ name, count }) => (count > 1 ? `${count}x ${name}` : name))
279
+ .join(' + ');
280
+
281
+ return {
282
+ primaryModel: models[0]?.name || null,
283
+ displayName: displayName || null,
284
+ models,
285
+ isHeterogeneous: models.length > 1
286
+ };
287
+ }
288
+
251
289
  /**
252
290
  * Generate hardware fingerprint for benchmarks
253
291
  */
@@ -391,22 +429,23 @@ class UnifiedDetector {
391
429
  const summary = result.summary;
392
430
 
393
431
  if (summary.bestBackend === 'cuda') {
394
- const gpuDesc = summary.isMultiGPU
395
- ? `${summary.gpuCount}x ${summary.gpuModel}`
396
- : summary.gpuModel;
432
+ const gpuDesc = summary.gpuInventory || (
433
+ summary.isMultiGPU ? `${summary.gpuCount}x ${summary.gpuModel}` : summary.gpuModel
434
+ );
397
435
  return `${gpuDesc} (${summary.totalVRAM}GB VRAM) + ${summary.cpuModel}`;
398
436
  }
399
437
  else if (summary.bestBackend === 'rocm') {
400
- const gpuDesc = summary.isMultiGPU
401
- ? `${summary.gpuCount}x ${summary.gpuModel}`
402
- : summary.gpuModel;
438
+ const gpuDesc = summary.gpuInventory || (
439
+ summary.isMultiGPU ? `${summary.gpuCount}x ${summary.gpuModel}` : summary.gpuModel
440
+ );
403
441
  return `${gpuDesc} (${summary.totalVRAM}GB VRAM) + ${summary.cpuModel}`;
404
442
  }
405
443
  else if (summary.bestBackend === 'metal') {
406
444
  return `${summary.gpuModel} (${summary.totalVRAM}GB Unified Memory)`;
407
445
  }
408
446
  else if (summary.bestBackend === 'intel') {
409
- return `${summary.gpuModel} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
447
+ const gpuDesc = summary.gpuInventory || summary.gpuModel;
448
+ return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
410
449
  }
411
450
  else {
412
451
  return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
package/src/index.js CHANGED
@@ -258,7 +258,10 @@ class LLMChecker {
258
258
  this.progress.step('Smart Recommendations', 'Generating personalized model suggestions...');
259
259
  }
260
260
 
261
- const recommendations = await this.generateIntelligentRecommendations(hardware);
261
+ const recommendations = await this.generateIntelligentRecommendations(hardware, {
262
+ optimizeFor: options.optimizeFor || options.optimize,
263
+ runtime: options.runtime
264
+ });
262
265
  const intelligentRecommendations = recommendations;
263
266
 
264
267
  if (this.progress) {
@@ -2382,9 +2385,10 @@ class LLMChecker {
2382
2385
  }
2383
2386
 
2384
2387
 
2385
- async generateIntelligentRecommendations(hardware) {
2388
+ async generateIntelligentRecommendations(hardware, options = {}) {
2386
2389
  try {
2387
2390
  this.logger.info('Generating intelligent recommendations...');
2391
+ const selectedRuntime = normalizeRuntime(options.runtime || 'ollama');
2388
2392
 
2389
2393
  // Obtener todos los modelos de Ollama
2390
2394
  const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
@@ -2396,14 +2400,25 @@ class LLMChecker {
2396
2400
  }
2397
2401
 
2398
2402
  // Generar recomendaciones inteligentes
2399
- const recommendations = await this.intelligentRecommender.getBestModelsForHardware(hardware, allModels);
2400
- const summary = this.intelligentRecommender.generateRecommendationSummary(recommendations, hardware);
2403
+ const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
2404
+ const recommendations = await this.intelligentRecommender.getBestModelsForHardware(
2405
+ hardware,
2406
+ allModels,
2407
+ { optimizeFor, runtime: selectedRuntime }
2408
+ );
2409
+ const summary = this.intelligentRecommender.generateRecommendationSummary(
2410
+ recommendations,
2411
+ hardware,
2412
+ { optimizeFor }
2413
+ );
2401
2414
 
2402
2415
  this.logger.info(`Generated recommendations for ${Object.keys(recommendations).length} categories`);
2403
2416
 
2404
2417
  return {
2405
2418
  recommendations,
2406
2419
  summary,
2420
+ optimizeFor: summary.optimize_for || optimizeFor,
2421
+ runtime: selectedRuntime,
2407
2422
  totalModelsAnalyzed: allModels.length,
2408
2423
  generatedAt: new Date().toISOString()
2409
2424
  };