llm-checker 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,12 @@
1
1
  const si = require('systeminformation');
2
+ const UnifiedDetector = require('./unified-detector');
2
3
 
3
4
  class HardwareDetector {
4
5
  constructor() {
5
6
  this.cache = null;
6
7
  this.cacheExpiry = 5 * 60 * 1000;
7
8
  this.cacheTime = 0;
9
+ this.unifiedDetector = new UnifiedDetector();
8
10
  }
9
11
 
10
12
  async getSystemInfo(forceFresh = false) {
@@ -31,6 +33,8 @@ class HardwareDetector {
31
33
  timestamp: Date.now()
32
34
  };
33
35
 
36
+ await this.enrichWithUnifiedHardware(systemInfo);
37
+
34
38
  this.cache = systemInfo;
35
39
  this.cacheTime = Date.now();
36
40
 
@@ -93,9 +97,15 @@ class HardwareDetector {
93
97
  const validGPUs = controllers.filter(gpu => {
94
98
  const model = (gpu.model || '').toLowerCase();
95
99
  const vendor = (gpu.vendor || '').toLowerCase();
100
+ const hasKnownModelSignature = this.looksLikeRealGPUModel(model);
96
101
 
97
102
  // Skip GPUs with empty/invalid data (like virtualized GPUs)
98
- if (!model || !vendor || model === 'unknown' || vendor === '') {
103
+ if (!model || model === 'unknown') {
104
+ return false;
105
+ }
106
+
107
+ // Some passthrough/virtualized setups report empty vendor while model is valid
108
+ if ((!vendor || vendor === '') && !hasKnownModelSignature) {
99
109
  return false;
100
110
  }
101
111
 
@@ -181,7 +191,7 @@ class HardwareDetector {
181
191
 
182
192
  return {
183
193
  model: enhancedModel,
184
- vendor: primaryGPU.vendor || 'Unknown',
194
+ vendor: primaryGPU.vendor || this.inferVendorFromGPUModel(enhancedModel, 'Unknown'),
185
195
  vram: effectiveVRAM,
186
196
  vramPerGPU: vram, // VRAM of primary GPU for reference
187
197
  vramDynamic: primaryGPU.vramDynamic || false,
@@ -192,13 +202,54 @@ class HardwareDetector {
192
202
  all: controllers.map(gpu => ({
193
203
  model: gpu.model,
194
204
  vram: this.normalizeVRAM(gpu.vram || 0),
195
- vendor: gpu.vendor
205
+ vendor: gpu.vendor || this.inferVendorFromGPUModel(gpu.model, 'Unknown')
196
206
  })),
197
207
  displays: displays.length,
198
208
  score: this.calculateGPUScore(primaryGPU)
199
209
  };
200
210
  }
201
211
 
212
+ async enrichWithUnifiedHardware(systemInfo) {
213
+ try {
214
+ const unified = await this.unifiedDetector.detect();
215
+ if (!unified || !unified.summary || !unified.primary) {
216
+ return;
217
+ }
218
+
219
+ const primaryType = unified.primary.type || 'cpu';
220
+ if (primaryType === 'cpu') {
221
+ return;
222
+ }
223
+
224
+ const summary = unified.summary;
225
+ const backendInfo = unified.backends?.[primaryType]?.info || {};
226
+ const backendGPUs = Array.isArray(backendInfo.gpus) ? backendInfo.gpus : [];
227
+ const gpuCount = summary.gpuCount || backendGPUs.length || systemInfo.gpu.gpuCount || 1;
228
+
229
+ const totalVRAM = typeof summary.totalVRAM === 'number' ? summary.totalVRAM : systemInfo.gpu.vram;
230
+ const perGPUVRAM = backendGPUs[0]?.memory?.total
231
+ || (gpuCount > 0 && totalVRAM > 0 ? Math.round(totalVRAM / gpuCount) : 0);
232
+
233
+ const modelFromUnified = summary.gpuModel || systemInfo.gpu.model;
234
+ const vendor = this.inferVendorFromGPUModel(modelFromUnified, systemInfo.gpu.vendor);
235
+
236
+ systemInfo.gpu = {
237
+ ...systemInfo.gpu,
238
+ model: modelFromUnified,
239
+ vendor,
240
+ vram: totalVRAM || systemInfo.gpu.vram,
241
+ vramPerGPU: perGPUVRAM || systemInfo.gpu.vramPerGPU || 0,
242
+ dedicated: primaryType !== 'metal',
243
+ gpuCount,
244
+ isMultiGPU: Boolean(summary.isMultiGPU || gpuCount > 1),
245
+ backend: primaryType,
246
+ driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
247
+ };
248
+ } catch (error) {
249
+ // Keep systeminformation-only results when backend-specific detection is unavailable
250
+ }
251
+ }
252
+
202
253
  processSystemInfo(system) {
203
254
  return {
204
255
  manufacturer: system.manufacturer || 'Unknown',
@@ -298,6 +349,10 @@ class HardwareDetector {
298
349
  estimateVRAMFromModel(model) {
299
350
  if (!model) return 0;
300
351
  const modelLower = model.toLowerCase();
352
+
353
+ // NVIDIA data-center / workstation
354
+ if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 96;
355
+ if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 16;
301
356
 
302
357
  // NVIDIA RTX 50 series
303
358
  if (modelLower.includes('rtx 5090')) return 32;
@@ -398,6 +453,7 @@ class HardwareDetector {
398
453
 
399
454
  // Bonus por marcas/modelos específicos
400
455
  if (model.includes('rtx 5090')) score += 30;
456
+ else if (model.includes('gb10') || model.includes('grace blackwell') || model.includes('dgx spark')) score += 28;
401
457
  else if (model.includes('rtx 5080')) score += 27;
402
458
  else if (model.includes('rtx 5070')) score += 24;
403
459
  else if (model.includes('rtx 5060')) score += 21;
@@ -407,6 +463,7 @@ class HardwareDetector {
407
463
  else if (model.includes('rtx 30')) score += 18;
408
464
  else if (model.includes('rtx 20')) score += 15;
409
465
  else if (model.includes('gtx 16')) score += 12;
466
+ else if (model.includes('tesla p100') || model.includes('p100')) score += 14;
410
467
  else if (model.includes('apple m')) score += 15;
411
468
 
412
469
  return Math.min(Math.round(score), 100);
@@ -497,9 +554,10 @@ class HardwareDetector {
497
554
  */
498
555
  getGPUTier(model) {
499
556
  const modelLower = model.toLowerCase();
500
-
557
+
501
558
  // NVIDIA RTX series
502
559
  if (modelLower.includes('rtx 50')) return 100;
560
+ if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 98;
503
561
  if (modelLower.includes('rtx 4090')) return 95;
504
562
  if (modelLower.includes('rtx 40')) return 90;
505
563
  if (modelLower.includes('rtx 3090')) return 85;
@@ -511,6 +569,7 @@ class HardwareDetector {
511
569
  // NVIDIA Professional
512
570
  if (modelLower.includes('a100')) return 98;
513
571
  if (modelLower.includes('h100')) return 99;
572
+ if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 78;
514
573
  if (modelLower.includes('tesla')) return 75;
515
574
  if (modelLower.includes('quadro')) return 65;
516
575
 
@@ -545,6 +604,49 @@ class HardwareDetector {
545
604
  return 0;
546
605
  }
547
606
 
607
+ looksLikeRealGPUModel(model) {
608
+ if (!model) return false;
609
+ const modelLower = model.toLowerCase();
610
+
611
+ const gpuMarkers = [
612
+ 'nvidia', 'geforce', 'rtx', 'gtx', 'tesla', 'quadro',
613
+ 'amd', 'radeon', 'rx ', 'instinct',
614
+ 'intel', 'arc', 'iris', 'uhd',
615
+ 'apple', 'm1', 'm2', 'm3', 'm4',
616
+ 'gb10', 'blackwell'
617
+ ];
618
+
619
+ return gpuMarkers.some(marker => modelLower.includes(marker));
620
+ }
621
+
622
+ inferVendorFromGPUModel(model, fallback = 'Unknown') {
623
+ if (!model) return fallback;
624
+ const modelLower = model.toLowerCase();
625
+
626
+ if (modelLower.includes('nvidia') || modelLower.includes('geforce') ||
627
+ modelLower.includes('rtx') || modelLower.includes('gtx') ||
628
+ modelLower.includes('tesla') || modelLower.includes('quadro') ||
629
+ modelLower.includes('gb10') || modelLower.includes('blackwell')) {
630
+ return 'NVIDIA';
631
+ }
632
+
633
+ if (modelLower.includes('amd') || modelLower.includes('radeon') || modelLower.includes('instinct')) {
634
+ return 'AMD';
635
+ }
636
+
637
+ if (modelLower.includes('intel') || modelLower.includes('arc') ||
638
+ modelLower.includes('iris') || modelLower.includes('uhd')) {
639
+ return 'Intel';
640
+ }
641
+
642
+ if (modelLower.includes('apple') || modelLower.includes('m1') ||
643
+ modelLower.includes('m2') || modelLower.includes('m3') || modelLower.includes('m4')) {
644
+ return 'Apple';
645
+ }
646
+
647
+ return fallback;
648
+ }
649
+
548
650
  async runQuickBenchmark() {
549
651
 
550
652
  const start = process.hrtime.bigint();
@@ -576,4 +678,4 @@ class HardwareDetector {
576
678
 
577
679
  }
578
680
 
579
- module.exports = HardwareDetector;
681
+ module.exports = HardwareDetector;
@@ -71,6 +71,13 @@ class HardwareSpecs {
71
71
  'NVIDIA GeForce RTX 3060 Ti': { score: 75, vram: 8, tdp: 200, dedicated: true },
72
72
  'NVIDIA GeForce RTX 3060': { score: 70, vram: 12, tdp: 170, dedicated: true },
73
73
 
74
+ // NVIDIA Data Center / Workstation
75
+ 'NVIDIA H100': { score: 100, vram: 80, tdp: 700, dedicated: true },
76
+ 'NVIDIA A100': { score: 94, vram: 80, tdp: 400, dedicated: true },
77
+ 'NVIDIA Tesla P100': { score: 74, vram: 16, tdp: 250, dedicated: true },
78
+ 'NVIDIA GB10 Grace Blackwell': { score: 96, vram: 96, tdp: 140, dedicated: true },
79
+ 'NVIDIA DGX Spark (GB10)': { score: 96, vram: 96, tdp: 140, dedicated: true },
80
+
74
81
  // AMD RX 7000 Series
75
82
  'AMD Radeon RX 7900 XTX': { score: 92, vram: 24, tdp: 355, dedicated: true },
76
83
  'AMD Radeon RX 7900 XT': { score: 88, vram: 20, tdp: 300, dedicated: true },
@@ -283,4 +290,4 @@ class HardwareSpecs {
283
290
  }
284
291
  }
285
292
 
286
- module.exports = HardwareSpecs;
293
+ module.exports = HardwareSpecs;
package/src/index.js CHANGED
@@ -8,6 +8,12 @@ const OllamaClient = require('./ollama/client');
8
8
  const { getLogger } = require('./utils/logger');
9
9
  const { getOllamaModelsIntegration, OllamaNativeScraper } = require('./ollama/native-scraper');
10
10
  const VerboseProgress = require('./utils/verbose-progress');
11
+ const SpeculativeDecodingEstimator = require('./models/speculative-decoding-estimator');
12
+ const {
13
+ normalizeRuntime,
14
+ getRuntimePullCommand,
15
+ getRuntimeRunCommand
16
+ } = require('./runtime/runtime-support');
11
17
 
12
18
  class LLMChecker {
13
19
  constructor(options = {}) {
@@ -17,6 +23,7 @@ class LLMChecker {
17
23
  this.ollamaScraper = new OllamaNativeScraper();
18
24
  this.compatibilityAnalyzer = new CompatibilityAnalyzer();
19
25
  this.performanceAnalyzer = new PerformanceAnalyzer();
26
+ this.speculativeDecodingEstimator = new SpeculativeDecodingEstimator();
20
27
  this.ollamaClient = new OllamaClient();
21
28
  this.logger = getLogger().createChild('LLMChecker');
22
29
  this.verbose = options.verbose !== false; // Default to verbose unless explicitly disabled
@@ -286,7 +293,7 @@ class LLMChecker {
286
293
  if (platform === 'apple_silicon') {
287
294
  return await this.analyzeWithAppleSiliconHeuristics(hardware, staticModels, ollamaIntegration, options);
288
295
  } else {
289
- return await this.analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration);
296
+ return await this.analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration, options);
290
297
  }
291
298
  }
292
299
 
@@ -367,7 +374,12 @@ class LLMChecker {
367
374
  }))
368
375
  };
369
376
 
370
- return mappedResults;
377
+ return this.attachSpeculativeDecodingEstimates(
378
+ mappedResults,
379
+ [...mappedResults.compatible, ...mappedResults.marginal],
380
+ hardware,
381
+ options.runtime
382
+ );
371
383
  }
372
384
 
373
385
  async integrateOllamaModels(hardware, availableModels) {
@@ -473,7 +485,7 @@ class LLMChecker {
473
485
  return integration;
474
486
  }
475
487
 
476
- async analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration) {
488
+ async analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration, options = {}) {
477
489
  this.logger.info('Using mathematical heuristics combining database + local models');
478
490
 
479
491
  try {
@@ -594,8 +606,13 @@ class LLMChecker {
594
606
  });
595
607
 
596
608
  this.logger.info(`Mathematical heuristic results: ${compatibility.compatible.length} compatible, ${compatibility.marginal.length} marginal, ${compatibility.incompatible.length} incompatible`);
597
-
598
- return compatibility;
609
+
610
+ return this.attachSpeculativeDecodingEstimates(
611
+ compatibility,
612
+ allUniqueModels,
613
+ hardware,
614
+ options.runtime
615
+ );
599
616
 
600
617
  } catch (error) {
601
618
  this.logger.error('Mathematical heuristic analysis failed, using fallback', { error: error.message });
@@ -715,6 +732,7 @@ class LLMChecker {
715
732
  return {
716
733
  ...existingModel,
717
734
  ollamaId: ollamaModel.model_identifier,
735
+ frameworks: Array.from(new Set([...(existingModel.frameworks || []), 'ollama', 'vllm', 'mlx'])),
718
736
  pulls: ollamaModel.pulls,
719
737
  lastUpdated: ollamaModel.last_updated,
720
738
  description: ollamaModel.description || existingModel.description,
@@ -726,7 +744,7 @@ class LLMChecker {
726
744
  },
727
745
  installation: {
728
746
  ...existingModel.installation,
729
- ollama: `ollama pull ${ollamaModel.model_identifier}`
747
+ ...this.createRuntimeInstallationCommands(ollamaModel.model_identifier, ollamaModel.model_name || existingModel.name)
730
748
  }
731
749
  };
732
750
  }
@@ -786,7 +804,7 @@ class LLMChecker {
786
804
  type: 'local',
787
805
  category: category,
788
806
  specialization: specialization,
789
- frameworks: ['ollama'],
807
+ frameworks: ['ollama', 'vllm', 'mlx'],
790
808
  requirements: {
791
809
  ram: Math.ceil(sizeNum * 0.6) || 2,
792
810
  vram: Math.ceil(sizeNum * 0.4) || 0,
@@ -794,7 +812,7 @@ class LLMChecker {
794
812
  storage: realStorageSize || Math.ceil(sizeNum * 0.7) || 1
795
813
  },
796
814
  installation: {
797
- ollama: `ollama pull ${ollamaModel.model_identifier}`,
815
+ ...this.createRuntimeInstallationCommands(ollamaModel.model_identifier, ollamaModel.model_name),
798
816
  description: ollamaModel.description || 'Available in Ollama library'
799
817
  },
800
818
  description: ollamaModel.description || `${ollamaModel.model_name} from Ollama`,
@@ -919,7 +937,7 @@ class LLMChecker {
919
937
  type: 'local',
920
938
  category: category,
921
939
  specialization: specialization,
922
- frameworks: ['ollama'],
940
+ frameworks: ['ollama', 'vllm', 'mlx'],
923
941
  requirements: {
924
942
  ram: Math.ceil((parseFloat(size) || 4) * 0.6),
925
943
  vram: Math.ceil((parseFloat(size) || 4) * 0.4),
@@ -927,7 +945,7 @@ class LLMChecker {
927
945
  storage: Math.ceil((parseFloat(size) || 4) * 0.7)
928
946
  },
929
947
  installation: {
930
- ollama: `ollama pull ${cloudModel.model_identifier}`,
948
+ ...this.createRuntimeInstallationCommands(cloudModel.model_identifier, cloudModel.model_name),
931
949
  description: cloudModel.description || 'Model from Ollama library'
932
950
  },
933
951
  year: 2024,
@@ -970,6 +988,54 @@ class LLMChecker {
970
988
  });
971
989
  }
972
990
 
991
+ createRuntimeInstallationCommands(modelIdentifier, modelName) {
992
+ const identifier = String(modelIdentifier || modelName || 'model').trim();
993
+ const runtimeModel = {
994
+ model_identifier: identifier,
995
+ ollamaId: identifier,
996
+ name: modelName || identifier
997
+ };
998
+
999
+ return {
1000
+ ollama: `ollama pull ${identifier}`,
1001
+ vllm: getRuntimeRunCommand(runtimeModel, 'vllm'),
1002
+ vllmPull: getRuntimePullCommand(runtimeModel, 'vllm'),
1003
+ mlx: getRuntimeRunCommand(runtimeModel, 'mlx'),
1004
+ mlxPull: getRuntimePullCommand(runtimeModel, 'mlx')
1005
+ };
1006
+ }
1007
+
1008
+ attachSpeculativeDecodingEstimates(resultGroups, candidates, hardware, runtime = 'ollama') {
1009
+ const selectedRuntime = normalizeRuntime(runtime);
1010
+ const candidatePool = Array.isArray(candidates) ? candidates : [];
1011
+
1012
+ const withEstimate = (items = []) =>
1013
+ items.map((model) => {
1014
+ const estimate = this.speculativeDecodingEstimator.estimate({
1015
+ model,
1016
+ candidates: candidatePool,
1017
+ hardware,
1018
+ runtime: selectedRuntime
1019
+ });
1020
+
1021
+ if (!estimate) {
1022
+ return model;
1023
+ }
1024
+
1025
+ return {
1026
+ ...model,
1027
+ speculativeDecoding: estimate
1028
+ };
1029
+ });
1030
+
1031
+ return {
1032
+ ...resultGroups,
1033
+ compatible: withEstimate(resultGroups.compatible),
1034
+ marginal: withEstimate(resultGroups.marginal),
1035
+ incompatible: withEstimate(resultGroups.incompatible)
1036
+ };
1037
+ }
1038
+
973
1039
  async generateOllamaRecommendations(hardware, availableModels, installedModels) {
974
1040
  const recommendations = [];
975
1041
  const installedNames = new Set(installedModels.map(m => m.name.toLowerCase()));
@@ -2286,4 +2352,4 @@ class LLMChecker {
2286
2352
 
2287
2353
  }
2288
2354
 
2289
- module.exports = LLMChecker;
2355
+ module.exports = LLMChecker;
@@ -999,10 +999,16 @@ class ExpandedModelsDatabase {
999
999
  } else if (hasDedicatedGPU) {
1000
1000
  // Dedicated GPU - much better performance
1001
1001
  let gpuTPS = 30;
1002
- if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 65;
1002
+ if (gpuModel.toLowerCase().includes('gb10') ||
1003
+ gpuModel.toLowerCase().includes('grace blackwell') ||
1004
+ gpuModel.toLowerCase().includes('dgx spark')) gpuTPS = 90;
1005
+ else if (gpuModel.toLowerCase().includes('h100')) gpuTPS = 120;
1006
+ else if (gpuModel.toLowerCase().includes('a100')) gpuTPS = 95;
1007
+ else if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 65;
1003
1008
  else if (gpuModel.toLowerCase().includes('rtx 40')) gpuTPS = 50;
1004
1009
  else if (gpuModel.toLowerCase().includes('rtx 30')) gpuTPS = 40;
1005
1010
  else if (gpuModel.toLowerCase().includes('rtx 20')) gpuTPS = 30;
1011
+ else if (gpuModel.toLowerCase().includes('p100')) gpuTPS = 32;
1006
1012
  else if (vramGB >= 16) gpuTPS = 45;
1007
1013
  else if (vramGB >= 8) gpuTPS = 35;
1008
1014
  else if (vramGB >= 4) gpuTPS = 25;
@@ -1139,4 +1145,4 @@ class ExpandedModelsDatabase {
1139
1145
  }
1140
1146
  }
1141
1147
 
1142
- module.exports = ExpandedModelsDatabase;
1148
+ module.exports = ExpandedModelsDatabase;
@@ -170,6 +170,7 @@ class ScoringEngine {
170
170
  // NVIDIA - based on real llama.cpp/Ollama benchmarks
171
171
  'cuda_h100': 120, // ~100-140 TPS for 7B Q4
172
172
  'cuda_a100': 90, // ~80-100 TPS for 7B Q4
173
+ 'cuda_gb10': 95, // Grace Blackwell / DGX Spark class
173
174
  'cuda_4090': 70, // ~60-80 TPS for 7B Q4
174
175
  'cuda_4080': 55, // ~50-60 TPS for 7B Q4
175
176
  'cuda_3090': 50, // ~45-55 TPS for 7B Q4
@@ -177,6 +178,7 @@ class ScoringEngine {
177
178
  'cuda_3070': 32, // ~28-35 TPS for 7B Q4
178
179
  'cuda_3060': 25, // ~20-28 TPS for 7B Q4
179
180
  'cuda_2080': 28, // ~25-30 TPS for 7B Q4
181
+ 'cuda_p100': 30, // Tesla P100 class
180
182
  'cuda_default': 30,
181
183
 
182
184
  // AMD - slightly lower than equivalent NVIDIA
@@ -518,8 +520,10 @@ class ScoringEngine {
518
520
  const gpuModel = (hardware.summary.gpuModel || '').toLowerCase();
519
521
 
520
522
  if (backend === 'cuda') {
523
+ if (gpuModel.includes('gb10') || gpuModel.includes('grace blackwell') || gpuModel.includes('dgx spark')) return 'cuda_gb10';
521
524
  if (gpuModel.includes('h100')) return 'cuda_h100';
522
525
  if (gpuModel.includes('a100')) return 'cuda_a100';
526
+ if (gpuModel.includes('p100')) return 'cuda_p100';
523
527
  if (gpuModel.includes('4090')) return 'cuda_4090';
524
528
  if (gpuModel.includes('4080')) return 'cuda_4080';
525
529
  if (gpuModel.includes('3090')) return 'cuda_3090';