llm-checker 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/analyzer/compatibility.js +20 -0
- package/bin/cli.js +14 -0
- package/bin/enhanced_cli.js +133 -36
- package/package.json +5 -3
- package/src/ai/multi-objective-selector.js +28 -4
- package/src/hardware/backends/cuda-detector.js +32 -11
- package/src/hardware/detector.js +107 -5
- package/src/hardware/specs.js +8 -1
- package/src/index.js +77 -11
- package/src/models/expanded_database.js +8 -2
- package/src/models/scoring-engine.js +4 -0
- package/src/models/speculative-decoding-estimator.js +245 -0
- package/src/runtime/runtime-support.js +174 -0
- package/bin/CLAUDE.md +0 -27
- package/src/CLAUDE.md +0 -18
- package/src/data/CLAUDE.md +0 -17
- package/src/hardware/CLAUDE.md +0 -18
- package/src/hardware/backends/CLAUDE.md +0 -17
- package/src/models/CLAUDE.md +0 -23
- package/src/ollama/CLAUDE.md +0 -30
- package/src/plugins/CLAUDE.md +0 -17
- package/src/utils/CLAUDE.md +0 -17
package/README.md
CHANGED
|
@@ -52,6 +52,20 @@ Choosing the right LLM for your hardware is complex. With thousands of model var
|
|
|
52
52
|
|
|
53
53
|
---
|
|
54
54
|
|
|
55
|
+
## Comparison with Other Tooling (e.g. `llmfit`)
|
|
56
|
+
|
|
57
|
+
LLM Checker and `llmfit` solve related but different problems:
|
|
58
|
+
|
|
59
|
+
| Tool | Primary Focus | Typical Output |
|
|
60
|
+
|------|---------------|----------------|
|
|
61
|
+
| **LLM Checker** | Hardware-aware **model selection** for local inference | Ranked recommendations, compatibility scores, pull/run commands |
|
|
62
|
+
| **llmfit** | LLM workflow support and model-fit evaluation from another angle | Different optimization workflow and selection heuristics |
|
|
63
|
+
|
|
64
|
+
If your goal is: *"What should I run on this exact machine right now?"*, use **LLM Checker** first.
|
|
65
|
+
If your goal is broader experimentation across custom pipelines, using both tools can be complementary.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
55
69
|
## Installation
|
|
56
70
|
|
|
57
71
|
```bash
|
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
const { getLogger } = require('../src/utils/logger');
|
|
2
|
+
const {
|
|
3
|
+
normalizeRuntime,
|
|
4
|
+
getRuntimeDisplayName,
|
|
5
|
+
runtimeSupportedOnHardware,
|
|
6
|
+
runtimeSupportsSpeculativeDecoding
|
|
7
|
+
} = require('../src/runtime/runtime-support');
|
|
2
8
|
|
|
3
9
|
class CompatibilityAnalyzer {
|
|
4
10
|
constructor() {
|
|
@@ -451,6 +457,7 @@ class CompatibilityAnalyzer {
|
|
|
451
457
|
|
|
452
458
|
generateRecommendations(hardware, results, options = {}) {
|
|
453
459
|
const recommendations = [];
|
|
460
|
+
const runtime = normalizeRuntime(options.runtime || 'ollama');
|
|
454
461
|
const tier = this.getHardwareTier(hardware);
|
|
455
462
|
|
|
456
463
|
if (hardware.memory.total < 16) {
|
|
@@ -511,6 +518,19 @@ class CompatibilityAnalyzer {
|
|
|
511
518
|
}
|
|
512
519
|
}
|
|
513
520
|
|
|
521
|
+
if (runtime !== 'ollama') {
|
|
522
|
+
const runtimeLabel = getRuntimeDisplayName(runtime);
|
|
523
|
+
if (runtimeSupportedOnHardware(runtime, hardware)) {
|
|
524
|
+
recommendations.push(`Runtime selected: ${runtimeLabel}`);
|
|
525
|
+
} else {
|
|
526
|
+
recommendations.push(`${runtimeLabel} is not recommended on this hardware (fallback to Ollama).`);
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
if (runtimeSupportsSpeculativeDecoding(runtime)) {
|
|
530
|
+
recommendations.push(`Enable speculative decoding in ${runtimeLabel} for higher throughput.`);
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
514
534
|
return recommendations;
|
|
515
535
|
}
|
|
516
536
|
|
package/bin/cli.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const majorNodeVersion = Number.parseInt(process.versions.node.split('.')[0], 10);
|
|
5
|
+
|
|
6
|
+
if (!Number.isFinite(majorNodeVersion) || majorNodeVersion < 16) {
|
|
7
|
+
console.error(
|
|
8
|
+
`[llm-checker] Unsupported Node.js version: ${process.versions.node}. ` +
|
|
9
|
+
'Please use Node.js 16 or newer.'
|
|
10
|
+
);
|
|
11
|
+
process.exit(1);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
require('./enhanced_cli');
|
package/bin/enhanced_cli.js
CHANGED
|
@@ -16,6 +16,14 @@ function getLLMChecker() {
|
|
|
16
16
|
const { getLogger } = require('../src/utils/logger');
|
|
17
17
|
const fs = require('fs');
|
|
18
18
|
const path = require('path');
|
|
19
|
+
const {
|
|
20
|
+
SUPPORTED_RUNTIMES,
|
|
21
|
+
normalizeRuntime,
|
|
22
|
+
runtimeSupportedOnHardware,
|
|
23
|
+
getRuntimeDisplayName,
|
|
24
|
+
getRuntimeCommandSet
|
|
25
|
+
} = require('../src/runtime/runtime-support');
|
|
26
|
+
const SpeculativeDecodingEstimator = require('../src/models/speculative-decoding-estimator');
|
|
19
27
|
|
|
20
28
|
// ASCII Art for each command - Large text banners
|
|
21
29
|
const ASCII_ART = {
|
|
@@ -1406,10 +1414,18 @@ function displaySimplifiedSystemInfo(hardware) {
|
|
|
1406
1414
|
console.log(`Hardware Tier: ${tierColor.bold(tier)}`);
|
|
1407
1415
|
}
|
|
1408
1416
|
|
|
1409
|
-
async function displayModelRecommendations(analysis, hardware, useCase = 'general', limit = 1) {
|
|
1417
|
+
async function displayModelRecommendations(analysis, hardware, useCase = 'general', limit = 1, runtime = 'ollama') {
|
|
1410
1418
|
const title = limit === 1 ? 'RECOMMENDED MODEL' : `TOP ${limit} COMPATIBLE MODELS`;
|
|
1411
1419
|
console.log(chalk.green.bold(`\n${title}`));
|
|
1412
1420
|
console.log(chalk.gray('─'.repeat(50)));
|
|
1421
|
+
|
|
1422
|
+
const selectedRuntime = normalizeRuntime(runtime);
|
|
1423
|
+
const runtimeLabel = getRuntimeDisplayName(selectedRuntime);
|
|
1424
|
+
const speculativeEstimator = new SpeculativeDecodingEstimator();
|
|
1425
|
+
const speculativeCandidatePool = [
|
|
1426
|
+
...(analysis?.compatible || []),
|
|
1427
|
+
...(analysis?.marginal || [])
|
|
1428
|
+
];
|
|
1413
1429
|
|
|
1414
1430
|
// Find the best models from compatible models considering use case
|
|
1415
1431
|
let selectedModels = [];
|
|
@@ -1760,42 +1776,75 @@ async function displayModelRecommendations(analysis, hardware, useCase = 'genera
|
|
|
1760
1776
|
if (model.performanceEstimate) {
|
|
1761
1777
|
console.log(`Estimated Speed: ${chalk.yellow(model.performanceEstimate.estimatedTokensPerSecond || 'N/A')} tokens/sec`);
|
|
1762
1778
|
}
|
|
1763
|
-
|
|
1764
|
-
|
|
1779
|
+
|
|
1780
|
+
console.log(`Runtime: ${chalk.white(runtimeLabel)}`);
|
|
1781
|
+
const runtimeCommands = getRuntimeCommandSet(model, selectedRuntime);
|
|
1782
|
+
|
|
1783
|
+
// Check installation only when using Ollama runtime.
|
|
1765
1784
|
let isInstalled = false;
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1785
|
+
if (selectedRuntime === 'ollama') {
|
|
1786
|
+
try {
|
|
1787
|
+
isInstalled = await checkIfModelInstalled(model, analysis.ollamaInfo);
|
|
1788
|
+
if (isInstalled) {
|
|
1789
|
+
console.log(`Status: ${chalk.green('Already installed in Ollama')}`);
|
|
1790
|
+
} else if (analysis.ollamaInfo && analysis.ollamaInfo.available) {
|
|
1791
|
+
console.log(`Status: ${chalk.gray('Available for installation')}`);
|
|
1792
|
+
} else {
|
|
1793
|
+
console.log(`Status: ${chalk.yellow('Requires Ollama (not detected)')}`);
|
|
1794
|
+
}
|
|
1795
|
+
} catch (installCheckError) {
|
|
1796
|
+
if (analysis.ollamaInfo && analysis.ollamaInfo.available) {
|
|
1797
|
+
console.log(`Status: ${chalk.gray('Available for installation')}`);
|
|
1798
|
+
} else {
|
|
1799
|
+
console.log(`Status: ${chalk.yellow('Requires Ollama (not detected)')}`);
|
|
1800
|
+
}
|
|
1774
1801
|
}
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
if (
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1802
|
+
|
|
1803
|
+
const ollamaCommand = getOllamaInstallCommand(model);
|
|
1804
|
+
if (ollamaCommand) {
|
|
1805
|
+
const modelName = extractModelName(ollamaCommand);
|
|
1806
|
+
if (isInstalled) {
|
|
1807
|
+
console.log(`\nRun: ${chalk.cyan.bold(`ollama run ${modelName}`)}`);
|
|
1808
|
+
} else {
|
|
1809
|
+
console.log(`\nPull: ${chalk.cyan.bold(ollamaCommand)}`);
|
|
1810
|
+
}
|
|
1811
|
+
} else if (model.ollamaTag || model.ollamaId) {
|
|
1812
|
+
const tag = model.ollamaTag || model.ollamaId;
|
|
1813
|
+
if (isInstalled) {
|
|
1814
|
+
console.log(`\nRun: ${chalk.cyan.bold(`ollama run ${tag}`)}`);
|
|
1815
|
+
} else {
|
|
1816
|
+
console.log(`\nPull: ${chalk.cyan.bold(`ollama pull ${tag}`)}`);
|
|
1817
|
+
}
|
|
1818
|
+
}
|
|
1819
|
+
} else {
|
|
1820
|
+
console.log(`Status: ${chalk.gray(`${runtimeLabel} runtime selected`)}`);
|
|
1821
|
+
console.log(`\nRun: ${chalk.cyan.bold(runtimeCommands.run)}`);
|
|
1822
|
+
if (index === 0) {
|
|
1823
|
+
console.log(`Install runtime: ${chalk.cyan.bold(runtimeCommands.install)}`);
|
|
1824
|
+
console.log(`Fetch model: ${chalk.cyan.bold(runtimeCommands.pull)}`);
|
|
1781
1825
|
}
|
|
1782
1826
|
}
|
|
1783
1827
|
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1828
|
+
const speculativeInfo =
|
|
1829
|
+
model.speculativeDecoding ||
|
|
1830
|
+
speculativeEstimator.estimate({
|
|
1831
|
+
model,
|
|
1832
|
+
candidates: speculativeCandidatePool,
|
|
1833
|
+
hardware,
|
|
1834
|
+
runtime: selectedRuntime
|
|
1835
|
+
});
|
|
1836
|
+
|
|
1837
|
+
if (speculativeInfo && speculativeInfo.runtime === selectedRuntime) {
|
|
1838
|
+
if (speculativeInfo.enabled) {
|
|
1839
|
+
console.log(
|
|
1840
|
+
`SpecDec: ${chalk.green(`+${speculativeInfo.estimatedThroughputGainPct}%`)} ` +
|
|
1841
|
+
`(${chalk.gray(`draft: ${speculativeInfo.draftModel}`)})`
|
|
1842
|
+
);
|
|
1843
|
+
} else if (speculativeInfo.estimatedSpeedup) {
|
|
1844
|
+
const suggested = speculativeInfo.suggestedDraftModel ? ` with ${speculativeInfo.suggestedDraftModel}` : '';
|
|
1845
|
+
console.log(
|
|
1846
|
+
`SpecDec estimate: ${chalk.yellow(`+${speculativeInfo.estimatedThroughputGainPct}%`)}${chalk.gray(suggested)}`
|
|
1847
|
+
);
|
|
1799
1848
|
}
|
|
1800
1849
|
}
|
|
1801
1850
|
}
|
|
@@ -1807,9 +1856,12 @@ async function displayModelRecommendations(analysis, hardware, useCase = 'genera
|
|
|
1807
1856
|
return selectedModels;
|
|
1808
1857
|
}
|
|
1809
1858
|
|
|
1810
|
-
async function displayQuickStartCommands(analysis, recommendedModel = null, allRecommended = null) {
|
|
1859
|
+
async function displayQuickStartCommands(analysis, recommendedModel = null, allRecommended = null, runtime = 'ollama') {
|
|
1811
1860
|
console.log(chalk.yellow.bold('\nQUICK START'));
|
|
1812
1861
|
console.log(chalk.gray('─'.repeat(50)));
|
|
1862
|
+
|
|
1863
|
+
const selectedRuntime = normalizeRuntime(runtime);
|
|
1864
|
+
const runtimeLabel = getRuntimeDisplayName(selectedRuntime);
|
|
1813
1865
|
|
|
1814
1866
|
// Use the first model from allRecommended if available, otherwise fallback to recommendedModel
|
|
1815
1867
|
let bestModel = (allRecommended && allRecommended.length > 0) ? allRecommended[0] : recommendedModel;
|
|
@@ -1824,6 +1876,33 @@ async function displayQuickStartCommands(analysis, recommendedModel = null, allR
|
|
|
1824
1876
|
}
|
|
1825
1877
|
}
|
|
1826
1878
|
|
|
1879
|
+
if (selectedRuntime !== 'ollama') {
|
|
1880
|
+
if (!bestModel) {
|
|
1881
|
+
console.log(`1. Try expanding search: ${chalk.cyan('llm-checker check --include-cloud')}`);
|
|
1882
|
+
return;
|
|
1883
|
+
}
|
|
1884
|
+
|
|
1885
|
+
const runtimeCommands = getRuntimeCommandSet(bestModel, selectedRuntime);
|
|
1886
|
+
console.log(`1. Install ${runtimeLabel}:`);
|
|
1887
|
+
console.log(` ${chalk.cyan.bold(runtimeCommands.install)}`);
|
|
1888
|
+
console.log(`2. Fetch model weights:`);
|
|
1889
|
+
console.log(` ${chalk.cyan.bold(runtimeCommands.pull)}`);
|
|
1890
|
+
console.log(`3. Run model:`);
|
|
1891
|
+
console.log(` ${chalk.cyan.bold(runtimeCommands.run)}`);
|
|
1892
|
+
|
|
1893
|
+
const speculative = bestModel.speculativeDecoding;
|
|
1894
|
+
if (speculative && speculative.enabled) {
|
|
1895
|
+
console.log(`4. SpecDec suggestion (${chalk.green(`+${speculative.estimatedThroughputGainPct}%`)}):`);
|
|
1896
|
+
if (selectedRuntime === 'vllm') {
|
|
1897
|
+
console.log(` ${chalk.cyan.bold(`${runtimeCommands.run} --speculative-model '${speculative.draftModelRef || speculative.draftModel}'`)}`);
|
|
1898
|
+
} else if (selectedRuntime === 'mlx') {
|
|
1899
|
+
console.log(` ${chalk.gray(`Use draft model ${speculative.draftModelRef || speculative.draftModel} when enabling speculative decoding in MLX-LM`)}`);
|
|
1900
|
+
}
|
|
1901
|
+
}
|
|
1902
|
+
|
|
1903
|
+
return;
|
|
1904
|
+
}
|
|
1905
|
+
|
|
1827
1906
|
if (analysis.ollamaInfo && !analysis.ollamaInfo.available) {
|
|
1828
1907
|
console.log(`1. Install Ollama: ${chalk.underline('https://ollama.ai')}`);
|
|
1829
1908
|
console.log(`2. Come back and run this command again`);
|
|
@@ -1992,6 +2071,7 @@ program
|
|
|
1992
2071
|
.option('--min-size <size>', 'Minimum model size to consider (e.g., "7B" or "7GB")')
|
|
1993
2072
|
.option('--include-cloud', 'Include cloud models in analysis')
|
|
1994
2073
|
.option('--ollama-only', 'Only show models available in Ollama')
|
|
2074
|
+
.option('--runtime <runtime>', `Inference runtime (${SUPPORTED_RUNTIMES.join('|')})`, 'ollama')
|
|
1995
2075
|
.option('--performance-test', 'Run performance benchmarks')
|
|
1996
2076
|
.option('--show-ollama-analysis', 'Show detailed Ollama model analysis')
|
|
1997
2077
|
.option('--no-verbose', 'Disable step-by-step progress display')
|
|
@@ -2008,6 +2088,16 @@ program
|
|
|
2008
2088
|
}
|
|
2009
2089
|
|
|
2010
2090
|
const hardware = await checker.getSystemInfo();
|
|
2091
|
+
let selectedRuntime = normalizeRuntime(options.runtime);
|
|
2092
|
+
if (!runtimeSupportedOnHardware(selectedRuntime, hardware)) {
|
|
2093
|
+
const runtimeLabel = getRuntimeDisplayName(selectedRuntime);
|
|
2094
|
+
console.log(
|
|
2095
|
+
chalk.yellow(
|
|
2096
|
+
`\nWarning: ${runtimeLabel} is not supported on this hardware. Falling back to Ollama.`
|
|
2097
|
+
)
|
|
2098
|
+
);
|
|
2099
|
+
selectedRuntime = 'ollama';
|
|
2100
|
+
}
|
|
2011
2101
|
|
|
2012
2102
|
// Normalize and fix use-case typos
|
|
2013
2103
|
const normalizeUseCase = (useCase = '') => {
|
|
@@ -2049,7 +2139,8 @@ program
|
|
|
2049
2139
|
performanceTest: options.performanceTest,
|
|
2050
2140
|
limit: parseInt(options.limit) || 10,
|
|
2051
2141
|
maxSize: maxSize,
|
|
2052
|
-
minSize: minSize
|
|
2142
|
+
minSize: minSize,
|
|
2143
|
+
runtime: selectedRuntime
|
|
2053
2144
|
});
|
|
2054
2145
|
|
|
2055
2146
|
if (!verboseEnabled) {
|
|
@@ -2058,8 +2149,14 @@ program
|
|
|
2058
2149
|
|
|
2059
2150
|
// Simplified output - show only essential information
|
|
2060
2151
|
displaySimplifiedSystemInfo(hardware);
|
|
2061
|
-
const recommendedModels = await displayModelRecommendations(
|
|
2062
|
-
|
|
2152
|
+
const recommendedModels = await displayModelRecommendations(
|
|
2153
|
+
analysis,
|
|
2154
|
+
hardware,
|
|
2155
|
+
normalizedUseCase,
|
|
2156
|
+
parseInt(options.limit) || 1,
|
|
2157
|
+
selectedRuntime
|
|
2158
|
+
);
|
|
2159
|
+
await displayQuickStartCommands(analysis, recommendedModels[0], recommendedModels, selectedRuntime);
|
|
2063
2160
|
|
|
2064
2161
|
} catch (error) {
|
|
2065
2162
|
console.error(chalk.red('\nError:'), error.message);
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llm-checker",
|
|
3
|
-
"version": "3.2.
|
|
3
|
+
"version": "3.2.1",
|
|
4
4
|
"description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
|
|
5
5
|
"bin": {
|
|
6
|
-
"llm-checker": "bin/
|
|
7
|
-
"ollama-checker": "bin/
|
|
6
|
+
"llm-checker": "bin/cli.js",
|
|
7
|
+
"ollama-checker": "bin/cli.js",
|
|
8
8
|
"llm-checker-mcp": "bin/mcp-server.mjs"
|
|
9
9
|
},
|
|
10
10
|
"main": "src/index.js",
|
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
"test:gpu": "node tests/gpu-detection/multi-gpu.test.js",
|
|
14
14
|
"test:platform": "node tests/platform-tests/cross-platform.test.js",
|
|
15
15
|
"test:ui": "node tests/ui-tests/interface.test.js",
|
|
16
|
+
"test:runtime": "node tests/runtime-specdec-tests.js",
|
|
17
|
+
"test:hardware-detector": "node tests/hardware-detector-regression.js",
|
|
16
18
|
"test:all": "node tests/run-all-tests.js",
|
|
17
19
|
"build": "echo 'No build needed'",
|
|
18
20
|
"dev": "node bin/enhanced_cli.js",
|
|
@@ -387,6 +387,9 @@ class MultiObjectiveSelector {
|
|
|
387
387
|
// 2) Memory bandwidth (20%) - simplified estimation
|
|
388
388
|
let memBandwidthGBs = 50; // fallback
|
|
389
389
|
const gpu = gpuModel.toLowerCase();
|
|
390
|
+
if (gpu.includes('gb10') || gpu.includes('grace blackwell') || gpu.includes('dgx spark')) memBandwidthGBs = 1000;
|
|
391
|
+
else if (gpu.includes('h100')) memBandwidthGBs = 3000;
|
|
392
|
+
else if (gpu.includes('a100')) memBandwidthGBs = 2039;
|
|
390
393
|
if (gpu.includes('m4 pro')) memBandwidthGBs = 273;
|
|
391
394
|
else if (gpu.includes('m4')) memBandwidthGBs = 120;
|
|
392
395
|
else if (gpu.includes('rtx 4090')) memBandwidthGBs = 1008;
|
|
@@ -398,7 +401,10 @@ class MultiObjectiveSelector {
|
|
|
398
401
|
|
|
399
402
|
// 3) Compute (20%) - simplified estimation
|
|
400
403
|
let compute = 0;
|
|
401
|
-
if (gpu.includes('
|
|
404
|
+
if (gpu.includes('gb10') || gpu.includes('grace blackwell') || gpu.includes('dgx spark')) compute = clamp(180 / 80);
|
|
405
|
+
else if (gpu.includes('h100')) compute = clamp(320 / 80);
|
|
406
|
+
else if (gpu.includes('a100')) compute = clamp(250 / 80);
|
|
407
|
+
else if (gpu.includes('m4 pro')) compute = clamp(28 / 80); // Match main algorithm
|
|
402
408
|
else if (gpu.includes('m4')) compute = clamp(15 / 80);
|
|
403
409
|
else if (gpu.includes('rtx 4090')) compute = clamp(165 / 80);
|
|
404
410
|
else if (gpu.includes('rtx 4080')) compute = clamp(121 / 80);
|
|
@@ -448,6 +454,10 @@ class MultiObjectiveSelector {
|
|
|
448
454
|
|
|
449
455
|
// Special flagship GPU detection by model name
|
|
450
456
|
if (gpuModel.toLowerCase().includes('rtx 50') ||
|
|
457
|
+
gpuModel.toLowerCase().includes('gb10') ||
|
|
458
|
+
gpuModel.toLowerCase().includes('grace blackwell') ||
|
|
459
|
+
gpuModel.toLowerCase().includes('dgx spark') ||
|
|
460
|
+
gpuModel.toLowerCase().includes('blackwell') ||
|
|
451
461
|
gpuModel.toLowerCase().includes('h100') ||
|
|
452
462
|
gpuModel.toLowerCase().includes('a100')) {
|
|
453
463
|
tier = 'flagship';
|
|
@@ -599,7 +609,11 @@ class MultiObjectiveSelector {
|
|
|
599
609
|
|
|
600
610
|
// NVIDIA GPU optimizations
|
|
601
611
|
if (gpu.includes('nvidia') || gpu.includes('geforce') || gpu.includes('rtx') || gpu.includes('gtx')) {
|
|
602
|
-
if (gpu.includes('
|
|
612
|
+
if (gpu.includes('gb10') || gpu.includes('grace blackwell') || gpu.includes('dgx spark')) {
|
|
613
|
+
specs.offloadCapacity = Math.min(ramGB * 0.6, 32);
|
|
614
|
+
specs.memoryEfficiency = 0.96;
|
|
615
|
+
specs.backendOptimization = 1.25;
|
|
616
|
+
} else if (gpu.includes('rtx 50')) {
|
|
603
617
|
// RTX 50xx series - flagship tier with massive VRAM + excellent offload
|
|
604
618
|
specs.offloadCapacity = Math.min(ramGB * 0.5, 24);
|
|
605
619
|
specs.memoryEfficiency = 0.95;
|
|
@@ -732,7 +746,15 @@ class MultiObjectiveSelector {
|
|
|
732
746
|
// GPU-based calculation (dedicated GPU only)
|
|
733
747
|
if (vramGB > 0 && !gpuModel.toLowerCase().includes('iris') && !gpuModel.toLowerCase().includes('integrated')) {
|
|
734
748
|
let gpuTPS = 20; // Conservative GPU baseline
|
|
735
|
-
if (gpuModel.toLowerCase().includes('
|
|
749
|
+
if (gpuModel.toLowerCase().includes('gb10') ||
|
|
750
|
+
gpuModel.toLowerCase().includes('grace blackwell') ||
|
|
751
|
+
gpuModel.toLowerCase().includes('dgx spark')) {
|
|
752
|
+
gpuTPS = 85; // GB10 / Grace Blackwell class
|
|
753
|
+
} else if (gpuModel.toLowerCase().includes('h100')) {
|
|
754
|
+
gpuTPS = 120;
|
|
755
|
+
} else if (gpuModel.toLowerCase().includes('a100')) {
|
|
756
|
+
gpuTPS = 95;
|
|
757
|
+
} else if (gpuModel.toLowerCase().includes('rtx 50')) {
|
|
736
758
|
gpuTPS = 60; // RTX 50 series - more realistic
|
|
737
759
|
} else if (gpuModel.toLowerCase().includes('rtx 40')) {
|
|
738
760
|
gpuTPS = 45; // RTX 40 series
|
|
@@ -740,6 +762,8 @@ class MultiObjectiveSelector {
|
|
|
740
762
|
gpuTPS = 35; // RTX 30 series
|
|
741
763
|
} else if (gpuModel.toLowerCase().includes('rtx 20')) {
|
|
742
764
|
gpuTPS = 25; // RTX 20 series
|
|
765
|
+
} else if (gpuModel.toLowerCase().includes('p100')) {
|
|
766
|
+
gpuTPS = 32; // Tesla P100 class
|
|
743
767
|
} else if (vramGB >= 8) {
|
|
744
768
|
gpuTPS = 30; // Other high-end GPUs
|
|
745
769
|
} else if (vramGB >= 4) {
|
|
@@ -817,4 +841,4 @@ class MultiObjectiveSelector {
|
|
|
817
841
|
}
|
|
818
842
|
}
|
|
819
843
|
|
|
820
|
-
module.exports = MultiObjectiveSelector;
|
|
844
|
+
module.exports = MultiObjectiveSelector;
|
|
@@ -209,8 +209,34 @@ class CUDADetector {
|
|
|
209
209
|
architecture: 'Unknown'
|
|
210
210
|
};
|
|
211
211
|
|
|
212
|
+
// NVIDIA GB10 / Grace Blackwell (DGX Spark)
|
|
213
|
+
if (nameLower.includes('gb10') || nameLower.includes('grace blackwell') ||
|
|
214
|
+
nameLower.includes('dgx spark') || nameLower.includes('blackwell')) {
|
|
215
|
+
capabilities.tensorCores = true;
|
|
216
|
+
capabilities.bf16 = true;
|
|
217
|
+
capabilities.fp8 = true;
|
|
218
|
+
capabilities.computeCapability = '10.0';
|
|
219
|
+
capabilities.architecture = 'Grace Blackwell';
|
|
220
|
+
}
|
|
221
|
+
// H100 (Hopper)
|
|
222
|
+
else if (nameLower.includes('h100') || nameLower.includes('h200')) {
|
|
223
|
+
capabilities.tensorCores = true;
|
|
224
|
+
capabilities.bf16 = true;
|
|
225
|
+
capabilities.fp8 = true;
|
|
226
|
+
capabilities.nvlink = true;
|
|
227
|
+
capabilities.computeCapability = '9.0';
|
|
228
|
+
capabilities.architecture = 'Hopper';
|
|
229
|
+
}
|
|
230
|
+
// Tesla P100 (Pascal)
|
|
231
|
+
else if (nameLower.includes('p100') || nameLower.includes('tesla p100')) {
|
|
232
|
+
capabilities.tensorCores = false;
|
|
233
|
+
capabilities.bf16 = false;
|
|
234
|
+
capabilities.fp8 = false;
|
|
235
|
+
capabilities.computeCapability = '6.0';
|
|
236
|
+
capabilities.architecture = 'Pascal';
|
|
237
|
+
}
|
|
212
238
|
// RTX 50 series (Blackwell)
|
|
213
|
-
if (nameLower.includes('rtx 50') || nameLower.includes('rtx50')) {
|
|
239
|
+
else if (nameLower.includes('rtx 50') || nameLower.includes('rtx50')) {
|
|
214
240
|
capabilities.tensorCores = true;
|
|
215
241
|
capabilities.bf16 = true;
|
|
216
242
|
capabilities.fp8 = true;
|
|
@@ -257,15 +283,6 @@ class CUDADetector {
|
|
|
257
283
|
capabilities.architecture = 'Volta';
|
|
258
284
|
capabilities.nvlink = true;
|
|
259
285
|
}
|
|
260
|
-
// H100 (Hopper)
|
|
261
|
-
else if (nameLower.includes('h100') || nameLower.includes('h200')) {
|
|
262
|
-
capabilities.tensorCores = true;
|
|
263
|
-
capabilities.bf16 = true;
|
|
264
|
-
capabilities.fp8 = true;
|
|
265
|
-
capabilities.nvlink = true;
|
|
266
|
-
capabilities.computeCapability = '9.0';
|
|
267
|
-
capabilities.architecture = 'Hopper';
|
|
268
|
-
}
|
|
269
286
|
|
|
270
287
|
return capabilities;
|
|
271
288
|
}
|
|
@@ -311,6 +328,9 @@ class CUDADetector {
|
|
|
311
328
|
'rtx 2060': 80,
|
|
312
329
|
|
|
313
330
|
// Data center
|
|
331
|
+
'gb10': 95,
|
|
332
|
+
'grace blackwell': 95,
|
|
333
|
+
'dgx spark': 95,
|
|
314
334
|
'h100': 400,
|
|
315
335
|
'h200': 450,
|
|
316
336
|
'a100': 300,
|
|
@@ -318,7 +338,8 @@ class CUDADetector {
|
|
|
318
338
|
'l4': 150,
|
|
319
339
|
'a40': 180,
|
|
320
340
|
't4': 70,
|
|
321
|
-
'v100': 120
|
|
341
|
+
'v100': 120,
|
|
342
|
+
'p100': 45
|
|
322
343
|
};
|
|
323
344
|
|
|
324
345
|
for (const [model, speed] of Object.entries(speedMap)) {
|