llm-checker 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/analyzer/compatibility.js +20 -0
- package/bin/cli.js +14 -0
- package/bin/enhanced_cli.js +133 -36
- package/package.json +5 -3
- package/src/ai/multi-objective-selector.js +28 -4
- package/src/hardware/backends/cuda-detector.js +32 -11
- package/src/hardware/detector.js +107 -5
- package/src/hardware/specs.js +8 -1
- package/src/index.js +77 -11
- package/src/models/expanded_database.js +8 -2
- package/src/models/scoring-engine.js +4 -0
- package/src/models/speculative-decoding-estimator.js +245 -0
- package/src/runtime/runtime-support.js +174 -0
- package/bin/CLAUDE.md +0 -27
- package/src/CLAUDE.md +0 -18
- package/src/data/CLAUDE.md +0 -17
- package/src/hardware/CLAUDE.md +0 -18
- package/src/hardware/backends/CLAUDE.md +0 -17
- package/src/models/CLAUDE.md +0 -23
- package/src/ollama/CLAUDE.md +0 -30
- package/src/plugins/CLAUDE.md +0 -17
- package/src/utils/CLAUDE.md +0 -17
package/src/hardware/detector.js
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
const si = require('systeminformation');
|
|
2
|
+
const UnifiedDetector = require('./unified-detector');
|
|
2
3
|
|
|
3
4
|
class HardwareDetector {
|
|
4
5
|
constructor() {
|
|
5
6
|
this.cache = null;
|
|
6
7
|
this.cacheExpiry = 5 * 60 * 1000;
|
|
7
8
|
this.cacheTime = 0;
|
|
9
|
+
this.unifiedDetector = new UnifiedDetector();
|
|
8
10
|
}
|
|
9
11
|
|
|
10
12
|
async getSystemInfo(forceFresh = false) {
|
|
@@ -31,6 +33,8 @@ class HardwareDetector {
|
|
|
31
33
|
timestamp: Date.now()
|
|
32
34
|
};
|
|
33
35
|
|
|
36
|
+
await this.enrichWithUnifiedHardware(systemInfo);
|
|
37
|
+
|
|
34
38
|
this.cache = systemInfo;
|
|
35
39
|
this.cacheTime = Date.now();
|
|
36
40
|
|
|
@@ -93,9 +97,15 @@ class HardwareDetector {
|
|
|
93
97
|
const validGPUs = controllers.filter(gpu => {
|
|
94
98
|
const model = (gpu.model || '').toLowerCase();
|
|
95
99
|
const vendor = (gpu.vendor || '').toLowerCase();
|
|
100
|
+
const hasKnownModelSignature = this.looksLikeRealGPUModel(model);
|
|
96
101
|
|
|
97
102
|
// Skip GPUs with empty/invalid data (like virtualized GPUs)
|
|
98
|
-
if (!model ||
|
|
103
|
+
if (!model || model === 'unknown') {
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Some passthrough/virtualized setups report empty vendor while model is valid
|
|
108
|
+
if ((!vendor || vendor === '') && !hasKnownModelSignature) {
|
|
99
109
|
return false;
|
|
100
110
|
}
|
|
101
111
|
|
|
@@ -181,7 +191,7 @@ class HardwareDetector {
|
|
|
181
191
|
|
|
182
192
|
return {
|
|
183
193
|
model: enhancedModel,
|
|
184
|
-
vendor: primaryGPU.vendor || 'Unknown',
|
|
194
|
+
vendor: primaryGPU.vendor || this.inferVendorFromGPUModel(enhancedModel, 'Unknown'),
|
|
185
195
|
vram: effectiveVRAM,
|
|
186
196
|
vramPerGPU: vram, // VRAM of primary GPU for reference
|
|
187
197
|
vramDynamic: primaryGPU.vramDynamic || false,
|
|
@@ -192,13 +202,54 @@ class HardwareDetector {
|
|
|
192
202
|
all: controllers.map(gpu => ({
|
|
193
203
|
model: gpu.model,
|
|
194
204
|
vram: this.normalizeVRAM(gpu.vram || 0),
|
|
195
|
-
vendor: gpu.vendor
|
|
205
|
+
vendor: gpu.vendor || this.inferVendorFromGPUModel(gpu.model, 'Unknown')
|
|
196
206
|
})),
|
|
197
207
|
displays: displays.length,
|
|
198
208
|
score: this.calculateGPUScore(primaryGPU)
|
|
199
209
|
};
|
|
200
210
|
}
|
|
201
211
|
|
|
212
|
+
async enrichWithUnifiedHardware(systemInfo) {
|
|
213
|
+
try {
|
|
214
|
+
const unified = await this.unifiedDetector.detect();
|
|
215
|
+
if (!unified || !unified.summary || !unified.primary) {
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const primaryType = unified.primary.type || 'cpu';
|
|
220
|
+
if (primaryType === 'cpu') {
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const summary = unified.summary;
|
|
225
|
+
const backendInfo = unified.backends?.[primaryType]?.info || {};
|
|
226
|
+
const backendGPUs = Array.isArray(backendInfo.gpus) ? backendInfo.gpus : [];
|
|
227
|
+
const gpuCount = summary.gpuCount || backendGPUs.length || systemInfo.gpu.gpuCount || 1;
|
|
228
|
+
|
|
229
|
+
const totalVRAM = typeof summary.totalVRAM === 'number' ? summary.totalVRAM : systemInfo.gpu.vram;
|
|
230
|
+
const perGPUVRAM = backendGPUs[0]?.memory?.total
|
|
231
|
+
|| (gpuCount > 0 && totalVRAM > 0 ? Math.round(totalVRAM / gpuCount) : 0);
|
|
232
|
+
|
|
233
|
+
const modelFromUnified = summary.gpuModel || systemInfo.gpu.model;
|
|
234
|
+
const vendor = this.inferVendorFromGPUModel(modelFromUnified, systemInfo.gpu.vendor);
|
|
235
|
+
|
|
236
|
+
systemInfo.gpu = {
|
|
237
|
+
...systemInfo.gpu,
|
|
238
|
+
model: modelFromUnified,
|
|
239
|
+
vendor,
|
|
240
|
+
vram: totalVRAM || systemInfo.gpu.vram,
|
|
241
|
+
vramPerGPU: perGPUVRAM || systemInfo.gpu.vramPerGPU || 0,
|
|
242
|
+
dedicated: primaryType !== 'metal',
|
|
243
|
+
gpuCount,
|
|
244
|
+
isMultiGPU: Boolean(summary.isMultiGPU || gpuCount > 1),
|
|
245
|
+
backend: primaryType,
|
|
246
|
+
driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
|
|
247
|
+
};
|
|
248
|
+
} catch (error) {
|
|
249
|
+
// Keep systeminformation-only results when backend-specific detection is unavailable
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
202
253
|
processSystemInfo(system) {
|
|
203
254
|
return {
|
|
204
255
|
manufacturer: system.manufacturer || 'Unknown',
|
|
@@ -298,6 +349,10 @@ class HardwareDetector {
|
|
|
298
349
|
estimateVRAMFromModel(model) {
|
|
299
350
|
if (!model) return 0;
|
|
300
351
|
const modelLower = model.toLowerCase();
|
|
352
|
+
|
|
353
|
+
// NVIDIA data-center / workstation
|
|
354
|
+
if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 96;
|
|
355
|
+
if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 16;
|
|
301
356
|
|
|
302
357
|
// NVIDIA RTX 50 series
|
|
303
358
|
if (modelLower.includes('rtx 5090')) return 32;
|
|
@@ -398,6 +453,7 @@ class HardwareDetector {
|
|
|
398
453
|
|
|
399
454
|
// Bonus por marcas/modelos específicos
|
|
400
455
|
if (model.includes('rtx 5090')) score += 30;
|
|
456
|
+
else if (model.includes('gb10') || model.includes('grace blackwell') || model.includes('dgx spark')) score += 28;
|
|
401
457
|
else if (model.includes('rtx 5080')) score += 27;
|
|
402
458
|
else if (model.includes('rtx 5070')) score += 24;
|
|
403
459
|
else if (model.includes('rtx 5060')) score += 21;
|
|
@@ -407,6 +463,7 @@ class HardwareDetector {
|
|
|
407
463
|
else if (model.includes('rtx 30')) score += 18;
|
|
408
464
|
else if (model.includes('rtx 20')) score += 15;
|
|
409
465
|
else if (model.includes('gtx 16')) score += 12;
|
|
466
|
+
else if (model.includes('tesla p100') || model.includes('p100')) score += 14;
|
|
410
467
|
else if (model.includes('apple m')) score += 15;
|
|
411
468
|
|
|
412
469
|
return Math.min(Math.round(score), 100);
|
|
@@ -497,9 +554,10 @@ class HardwareDetector {
|
|
|
497
554
|
*/
|
|
498
555
|
getGPUTier(model) {
|
|
499
556
|
const modelLower = model.toLowerCase();
|
|
500
|
-
|
|
557
|
+
|
|
501
558
|
// NVIDIA RTX series
|
|
502
559
|
if (modelLower.includes('rtx 50')) return 100;
|
|
560
|
+
if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 98;
|
|
503
561
|
if (modelLower.includes('rtx 4090')) return 95;
|
|
504
562
|
if (modelLower.includes('rtx 40')) return 90;
|
|
505
563
|
if (modelLower.includes('rtx 3090')) return 85;
|
|
@@ -511,6 +569,7 @@ class HardwareDetector {
|
|
|
511
569
|
// NVIDIA Professional
|
|
512
570
|
if (modelLower.includes('a100')) return 98;
|
|
513
571
|
if (modelLower.includes('h100')) return 99;
|
|
572
|
+
if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 78;
|
|
514
573
|
if (modelLower.includes('tesla')) return 75;
|
|
515
574
|
if (modelLower.includes('quadro')) return 65;
|
|
516
575
|
|
|
@@ -545,6 +604,49 @@ class HardwareDetector {
|
|
|
545
604
|
return 0;
|
|
546
605
|
}
|
|
547
606
|
|
|
607
|
+
looksLikeRealGPUModel(model) {
|
|
608
|
+
if (!model) return false;
|
|
609
|
+
const modelLower = model.toLowerCase();
|
|
610
|
+
|
|
611
|
+
const gpuMarkers = [
|
|
612
|
+
'nvidia', 'geforce', 'rtx', 'gtx', 'tesla', 'quadro',
|
|
613
|
+
'amd', 'radeon', 'rx ', 'instinct',
|
|
614
|
+
'intel', 'arc', 'iris', 'uhd',
|
|
615
|
+
'apple', 'm1', 'm2', 'm3', 'm4',
|
|
616
|
+
'gb10', 'blackwell'
|
|
617
|
+
];
|
|
618
|
+
|
|
619
|
+
return gpuMarkers.some(marker => modelLower.includes(marker));
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
inferVendorFromGPUModel(model, fallback = 'Unknown') {
|
|
623
|
+
if (!model) return fallback;
|
|
624
|
+
const modelLower = model.toLowerCase();
|
|
625
|
+
|
|
626
|
+
if (modelLower.includes('nvidia') || modelLower.includes('geforce') ||
|
|
627
|
+
modelLower.includes('rtx') || modelLower.includes('gtx') ||
|
|
628
|
+
modelLower.includes('tesla') || modelLower.includes('quadro') ||
|
|
629
|
+
modelLower.includes('gb10') || modelLower.includes('blackwell')) {
|
|
630
|
+
return 'NVIDIA';
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
if (modelLower.includes('amd') || modelLower.includes('radeon') || modelLower.includes('instinct')) {
|
|
634
|
+
return 'AMD';
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
if (modelLower.includes('intel') || modelLower.includes('arc') ||
|
|
638
|
+
modelLower.includes('iris') || modelLower.includes('uhd')) {
|
|
639
|
+
return 'Intel';
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
if (modelLower.includes('apple') || modelLower.includes('m1') ||
|
|
643
|
+
modelLower.includes('m2') || modelLower.includes('m3') || modelLower.includes('m4')) {
|
|
644
|
+
return 'Apple';
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
return fallback;
|
|
648
|
+
}
|
|
649
|
+
|
|
548
650
|
async runQuickBenchmark() {
|
|
549
651
|
|
|
550
652
|
const start = process.hrtime.bigint();
|
|
@@ -576,4 +678,4 @@ class HardwareDetector {
|
|
|
576
678
|
|
|
577
679
|
}
|
|
578
680
|
|
|
579
|
-
module.exports = HardwareDetector;
|
|
681
|
+
module.exports = HardwareDetector;
|
package/src/hardware/specs.js
CHANGED
|
@@ -71,6 +71,13 @@ class HardwareSpecs {
|
|
|
71
71
|
'NVIDIA GeForce RTX 3060 Ti': { score: 75, vram: 8, tdp: 200, dedicated: true },
|
|
72
72
|
'NVIDIA GeForce RTX 3060': { score: 70, vram: 12, tdp: 170, dedicated: true },
|
|
73
73
|
|
|
74
|
+
// NVIDIA Data Center / Workstation
|
|
75
|
+
'NVIDIA H100': { score: 100, vram: 80, tdp: 700, dedicated: true },
|
|
76
|
+
'NVIDIA A100': { score: 94, vram: 80, tdp: 400, dedicated: true },
|
|
77
|
+
'NVIDIA Tesla P100': { score: 74, vram: 16, tdp: 250, dedicated: true },
|
|
78
|
+
'NVIDIA GB10 Grace Blackwell': { score: 96, vram: 96, tdp: 140, dedicated: true },
|
|
79
|
+
'NVIDIA DGX Spark (GB10)': { score: 96, vram: 96, tdp: 140, dedicated: true },
|
|
80
|
+
|
|
74
81
|
// AMD RX 7000 Series
|
|
75
82
|
'AMD Radeon RX 7900 XTX': { score: 92, vram: 24, tdp: 355, dedicated: true },
|
|
76
83
|
'AMD Radeon RX 7900 XT': { score: 88, vram: 20, tdp: 300, dedicated: true },
|
|
@@ -283,4 +290,4 @@ class HardwareSpecs {
|
|
|
283
290
|
}
|
|
284
291
|
}
|
|
285
292
|
|
|
286
|
-
module.exports = HardwareSpecs;
|
|
293
|
+
module.exports = HardwareSpecs;
|
package/src/index.js
CHANGED
|
@@ -8,6 +8,12 @@ const OllamaClient = require('./ollama/client');
|
|
|
8
8
|
const { getLogger } = require('./utils/logger');
|
|
9
9
|
const { getOllamaModelsIntegration, OllamaNativeScraper } = require('./ollama/native-scraper');
|
|
10
10
|
const VerboseProgress = require('./utils/verbose-progress');
|
|
11
|
+
const SpeculativeDecodingEstimator = require('./models/speculative-decoding-estimator');
|
|
12
|
+
const {
|
|
13
|
+
normalizeRuntime,
|
|
14
|
+
getRuntimePullCommand,
|
|
15
|
+
getRuntimeRunCommand
|
|
16
|
+
} = require('./runtime/runtime-support');
|
|
11
17
|
|
|
12
18
|
class LLMChecker {
|
|
13
19
|
constructor(options = {}) {
|
|
@@ -17,6 +23,7 @@ class LLMChecker {
|
|
|
17
23
|
this.ollamaScraper = new OllamaNativeScraper();
|
|
18
24
|
this.compatibilityAnalyzer = new CompatibilityAnalyzer();
|
|
19
25
|
this.performanceAnalyzer = new PerformanceAnalyzer();
|
|
26
|
+
this.speculativeDecodingEstimator = new SpeculativeDecodingEstimator();
|
|
20
27
|
this.ollamaClient = new OllamaClient();
|
|
21
28
|
this.logger = getLogger().createChild('LLMChecker');
|
|
22
29
|
this.verbose = options.verbose !== false; // Default to verbose unless explicitly disabled
|
|
@@ -286,7 +293,7 @@ class LLMChecker {
|
|
|
286
293
|
if (platform === 'apple_silicon') {
|
|
287
294
|
return await this.analyzeWithAppleSiliconHeuristics(hardware, staticModels, ollamaIntegration, options);
|
|
288
295
|
} else {
|
|
289
|
-
return await this.analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration);
|
|
296
|
+
return await this.analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration, options);
|
|
290
297
|
}
|
|
291
298
|
}
|
|
292
299
|
|
|
@@ -367,7 +374,12 @@ class LLMChecker {
|
|
|
367
374
|
}))
|
|
368
375
|
};
|
|
369
376
|
|
|
370
|
-
return
|
|
377
|
+
return this.attachSpeculativeDecodingEstimates(
|
|
378
|
+
mappedResults,
|
|
379
|
+
[...mappedResults.compatible, ...mappedResults.marginal],
|
|
380
|
+
hardware,
|
|
381
|
+
options.runtime
|
|
382
|
+
);
|
|
371
383
|
}
|
|
372
384
|
|
|
373
385
|
async integrateOllamaModels(hardware, availableModels) {
|
|
@@ -473,7 +485,7 @@ class LLMChecker {
|
|
|
473
485
|
return integration;
|
|
474
486
|
}
|
|
475
487
|
|
|
476
|
-
async analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration) {
|
|
488
|
+
async analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration, options = {}) {
|
|
477
489
|
this.logger.info('Using mathematical heuristics combining database + local models');
|
|
478
490
|
|
|
479
491
|
try {
|
|
@@ -594,8 +606,13 @@ class LLMChecker {
|
|
|
594
606
|
});
|
|
595
607
|
|
|
596
608
|
this.logger.info(`Mathematical heuristic results: ${compatibility.compatible.length} compatible, ${compatibility.marginal.length} marginal, ${compatibility.incompatible.length} incompatible`);
|
|
597
|
-
|
|
598
|
-
return
|
|
609
|
+
|
|
610
|
+
return this.attachSpeculativeDecodingEstimates(
|
|
611
|
+
compatibility,
|
|
612
|
+
allUniqueModels,
|
|
613
|
+
hardware,
|
|
614
|
+
options.runtime
|
|
615
|
+
);
|
|
599
616
|
|
|
600
617
|
} catch (error) {
|
|
601
618
|
this.logger.error('Mathematical heuristic analysis failed, using fallback', { error: error.message });
|
|
@@ -715,6 +732,7 @@ class LLMChecker {
|
|
|
715
732
|
return {
|
|
716
733
|
...existingModel,
|
|
717
734
|
ollamaId: ollamaModel.model_identifier,
|
|
735
|
+
frameworks: Array.from(new Set([...(existingModel.frameworks || []), 'ollama', 'vllm', 'mlx'])),
|
|
718
736
|
pulls: ollamaModel.pulls,
|
|
719
737
|
lastUpdated: ollamaModel.last_updated,
|
|
720
738
|
description: ollamaModel.description || existingModel.description,
|
|
@@ -726,7 +744,7 @@ class LLMChecker {
|
|
|
726
744
|
},
|
|
727
745
|
installation: {
|
|
728
746
|
...existingModel.installation,
|
|
729
|
-
|
|
747
|
+
...this.createRuntimeInstallationCommands(ollamaModel.model_identifier, ollamaModel.model_name || existingModel.name)
|
|
730
748
|
}
|
|
731
749
|
};
|
|
732
750
|
}
|
|
@@ -786,7 +804,7 @@ class LLMChecker {
|
|
|
786
804
|
type: 'local',
|
|
787
805
|
category: category,
|
|
788
806
|
specialization: specialization,
|
|
789
|
-
frameworks: ['ollama'],
|
|
807
|
+
frameworks: ['ollama', 'vllm', 'mlx'],
|
|
790
808
|
requirements: {
|
|
791
809
|
ram: Math.ceil(sizeNum * 0.6) || 2,
|
|
792
810
|
vram: Math.ceil(sizeNum * 0.4) || 0,
|
|
@@ -794,7 +812,7 @@ class LLMChecker {
|
|
|
794
812
|
storage: realStorageSize || Math.ceil(sizeNum * 0.7) || 1
|
|
795
813
|
},
|
|
796
814
|
installation: {
|
|
797
|
-
|
|
815
|
+
...this.createRuntimeInstallationCommands(ollamaModel.model_identifier, ollamaModel.model_name),
|
|
798
816
|
description: ollamaModel.description || 'Available in Ollama library'
|
|
799
817
|
},
|
|
800
818
|
description: ollamaModel.description || `${ollamaModel.model_name} from Ollama`,
|
|
@@ -919,7 +937,7 @@ class LLMChecker {
|
|
|
919
937
|
type: 'local',
|
|
920
938
|
category: category,
|
|
921
939
|
specialization: specialization,
|
|
922
|
-
frameworks: ['ollama'],
|
|
940
|
+
frameworks: ['ollama', 'vllm', 'mlx'],
|
|
923
941
|
requirements: {
|
|
924
942
|
ram: Math.ceil((parseFloat(size) || 4) * 0.6),
|
|
925
943
|
vram: Math.ceil((parseFloat(size) || 4) * 0.4),
|
|
@@ -927,7 +945,7 @@ class LLMChecker {
|
|
|
927
945
|
storage: Math.ceil((parseFloat(size) || 4) * 0.7)
|
|
928
946
|
},
|
|
929
947
|
installation: {
|
|
930
|
-
|
|
948
|
+
...this.createRuntimeInstallationCommands(cloudModel.model_identifier, cloudModel.model_name),
|
|
931
949
|
description: cloudModel.description || 'Model from Ollama library'
|
|
932
950
|
},
|
|
933
951
|
year: 2024,
|
|
@@ -970,6 +988,54 @@ class LLMChecker {
|
|
|
970
988
|
});
|
|
971
989
|
}
|
|
972
990
|
|
|
991
|
+
createRuntimeInstallationCommands(modelIdentifier, modelName) {
|
|
992
|
+
const identifier = String(modelIdentifier || modelName || 'model').trim();
|
|
993
|
+
const runtimeModel = {
|
|
994
|
+
model_identifier: identifier,
|
|
995
|
+
ollamaId: identifier,
|
|
996
|
+
name: modelName || identifier
|
|
997
|
+
};
|
|
998
|
+
|
|
999
|
+
return {
|
|
1000
|
+
ollama: `ollama pull ${identifier}`,
|
|
1001
|
+
vllm: getRuntimeRunCommand(runtimeModel, 'vllm'),
|
|
1002
|
+
vllmPull: getRuntimePullCommand(runtimeModel, 'vllm'),
|
|
1003
|
+
mlx: getRuntimeRunCommand(runtimeModel, 'mlx'),
|
|
1004
|
+
mlxPull: getRuntimePullCommand(runtimeModel, 'mlx')
|
|
1005
|
+
};
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
attachSpeculativeDecodingEstimates(resultGroups, candidates, hardware, runtime = 'ollama') {
|
|
1009
|
+
const selectedRuntime = normalizeRuntime(runtime);
|
|
1010
|
+
const candidatePool = Array.isArray(candidates) ? candidates : [];
|
|
1011
|
+
|
|
1012
|
+
const withEstimate = (items = []) =>
|
|
1013
|
+
items.map((model) => {
|
|
1014
|
+
const estimate = this.speculativeDecodingEstimator.estimate({
|
|
1015
|
+
model,
|
|
1016
|
+
candidates: candidatePool,
|
|
1017
|
+
hardware,
|
|
1018
|
+
runtime: selectedRuntime
|
|
1019
|
+
});
|
|
1020
|
+
|
|
1021
|
+
if (!estimate) {
|
|
1022
|
+
return model;
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
return {
|
|
1026
|
+
...model,
|
|
1027
|
+
speculativeDecoding: estimate
|
|
1028
|
+
};
|
|
1029
|
+
});
|
|
1030
|
+
|
|
1031
|
+
return {
|
|
1032
|
+
...resultGroups,
|
|
1033
|
+
compatible: withEstimate(resultGroups.compatible),
|
|
1034
|
+
marginal: withEstimate(resultGroups.marginal),
|
|
1035
|
+
incompatible: withEstimate(resultGroups.incompatible)
|
|
1036
|
+
};
|
|
1037
|
+
}
|
|
1038
|
+
|
|
973
1039
|
async generateOllamaRecommendations(hardware, availableModels, installedModels) {
|
|
974
1040
|
const recommendations = [];
|
|
975
1041
|
const installedNames = new Set(installedModels.map(m => m.name.toLowerCase()));
|
|
@@ -2286,4 +2352,4 @@ class LLMChecker {
|
|
|
2286
2352
|
|
|
2287
2353
|
}
|
|
2288
2354
|
|
|
2289
|
-
module.exports = LLMChecker;
|
|
2355
|
+
module.exports = LLMChecker;
|
|
@@ -999,10 +999,16 @@ class ExpandedModelsDatabase {
|
|
|
999
999
|
} else if (hasDedicatedGPU) {
|
|
1000
1000
|
// Dedicated GPU - much better performance
|
|
1001
1001
|
let gpuTPS = 30;
|
|
1002
|
-
if (gpuModel.toLowerCase().includes('
|
|
1002
|
+
if (gpuModel.toLowerCase().includes('gb10') ||
|
|
1003
|
+
gpuModel.toLowerCase().includes('grace blackwell') ||
|
|
1004
|
+
gpuModel.toLowerCase().includes('dgx spark')) gpuTPS = 90;
|
|
1005
|
+
else if (gpuModel.toLowerCase().includes('h100')) gpuTPS = 120;
|
|
1006
|
+
else if (gpuModel.toLowerCase().includes('a100')) gpuTPS = 95;
|
|
1007
|
+
else if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 65;
|
|
1003
1008
|
else if (gpuModel.toLowerCase().includes('rtx 40')) gpuTPS = 50;
|
|
1004
1009
|
else if (gpuModel.toLowerCase().includes('rtx 30')) gpuTPS = 40;
|
|
1005
1010
|
else if (gpuModel.toLowerCase().includes('rtx 20')) gpuTPS = 30;
|
|
1011
|
+
else if (gpuModel.toLowerCase().includes('p100')) gpuTPS = 32;
|
|
1006
1012
|
else if (vramGB >= 16) gpuTPS = 45;
|
|
1007
1013
|
else if (vramGB >= 8) gpuTPS = 35;
|
|
1008
1014
|
else if (vramGB >= 4) gpuTPS = 25;
|
|
@@ -1139,4 +1145,4 @@ class ExpandedModelsDatabase {
|
|
|
1139
1145
|
}
|
|
1140
1146
|
}
|
|
1141
1147
|
|
|
1142
|
-
module.exports = ExpandedModelsDatabase;
|
|
1148
|
+
module.exports = ExpandedModelsDatabase;
|
|
@@ -170,6 +170,7 @@ class ScoringEngine {
|
|
|
170
170
|
// NVIDIA - based on real llama.cpp/Ollama benchmarks
|
|
171
171
|
'cuda_h100': 120, // ~100-140 TPS for 7B Q4
|
|
172
172
|
'cuda_a100': 90, // ~80-100 TPS for 7B Q4
|
|
173
|
+
'cuda_gb10': 95, // Grace Blackwell / DGX Spark class
|
|
173
174
|
'cuda_4090': 70, // ~60-80 TPS for 7B Q4
|
|
174
175
|
'cuda_4080': 55, // ~50-60 TPS for 7B Q4
|
|
175
176
|
'cuda_3090': 50, // ~45-55 TPS for 7B Q4
|
|
@@ -177,6 +178,7 @@ class ScoringEngine {
|
|
|
177
178
|
'cuda_3070': 32, // ~28-35 TPS for 7B Q4
|
|
178
179
|
'cuda_3060': 25, // ~20-28 TPS for 7B Q4
|
|
179
180
|
'cuda_2080': 28, // ~25-30 TPS for 7B Q4
|
|
181
|
+
'cuda_p100': 30, // Tesla P100 class
|
|
180
182
|
'cuda_default': 30,
|
|
181
183
|
|
|
182
184
|
// AMD - slightly lower than equivalent NVIDIA
|
|
@@ -518,8 +520,10 @@ class ScoringEngine {
|
|
|
518
520
|
const gpuModel = (hardware.summary.gpuModel || '').toLowerCase();
|
|
519
521
|
|
|
520
522
|
if (backend === 'cuda') {
|
|
523
|
+
if (gpuModel.includes('gb10') || gpuModel.includes('grace blackwell') || gpuModel.includes('dgx spark')) return 'cuda_gb10';
|
|
521
524
|
if (gpuModel.includes('h100')) return 'cuda_h100';
|
|
522
525
|
if (gpuModel.includes('a100')) return 'cuda_a100';
|
|
526
|
+
if (gpuModel.includes('p100')) return 'cuda_p100';
|
|
523
527
|
if (gpuModel.includes('4090')) return 'cuda_4090';
|
|
524
528
|
if (gpuModel.includes('4080')) return 'cuda_4080';
|
|
525
529
|
if (gpuModel.includes('3090')) return 'cuda_3090';
|