llm-checker 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -52,6 +52,20 @@ Choosing the right LLM for your hardware is complex. With thousands of model var
52
52
 
53
53
  ---
54
54
 
55
+ ## Comparison with Other Tooling (e.g. `llmfit`)
56
+
57
+ LLM Checker and `llmfit` solve related but different problems:
58
+
59
+ | Tool | Primary Focus | Typical Output |
60
+ |------|---------------|----------------|
61
+ | **LLM Checker** | Hardware-aware **model selection** for local inference | Ranked recommendations, compatibility scores, pull/run commands |
62
+ | **llmfit** | LLM workflow support and model-fit evaluation from another angle | Different optimization workflow and selection heuristics |
63
+
64
+ If your goal is: *"What should I run on this exact machine right now?"*, use **LLM Checker** first.
65
+ If your goal is broader experimentation across custom pipelines, using both tools can be complementary.
66
+
67
+ ---
68
+
55
69
  ## Installation
56
70
 
57
71
  ```bash
@@ -1,4 +1,10 @@
1
1
  const { getLogger } = require('../src/utils/logger');
2
+ const {
3
+ normalizeRuntime,
4
+ getRuntimeDisplayName,
5
+ runtimeSupportedOnHardware,
6
+ runtimeSupportsSpeculativeDecoding
7
+ } = require('../src/runtime/runtime-support');
2
8
 
3
9
  class CompatibilityAnalyzer {
4
10
  constructor() {
@@ -451,6 +457,7 @@ class CompatibilityAnalyzer {
451
457
 
452
458
  generateRecommendations(hardware, results, options = {}) {
453
459
  const recommendations = [];
460
+ const runtime = normalizeRuntime(options.runtime || 'ollama');
454
461
  const tier = this.getHardwareTier(hardware);
455
462
 
456
463
  if (hardware.memory.total < 16) {
@@ -511,6 +518,19 @@ class CompatibilityAnalyzer {
511
518
  }
512
519
  }
513
520
 
521
+ if (runtime !== 'ollama') {
522
+ const runtimeLabel = getRuntimeDisplayName(runtime);
523
+ if (runtimeSupportedOnHardware(runtime, hardware)) {
524
+ recommendations.push(`Runtime selected: ${runtimeLabel}`);
525
+ } else {
526
+ recommendations.push(`${runtimeLabel} is not recommended on this hardware (fallback to Ollama).`);
527
+ }
528
+
529
+ if (runtimeSupportsSpeculativeDecoding(runtime)) {
530
+ recommendations.push(`Enable speculative decoding in ${runtimeLabel} for higher throughput.`);
531
+ }
532
+ }
533
+
514
534
  return recommendations;
515
535
  }
516
536
 
package/bin/cli.js ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ const majorNodeVersion = Number.parseInt(process.versions.node.split('.')[0], 10);
5
+
6
+ if (!Number.isFinite(majorNodeVersion) || majorNodeVersion < 16) {
7
+ console.error(
8
+ `[llm-checker] Unsupported Node.js version: ${process.versions.node}. ` +
9
+ 'Please use Node.js 16 or newer.'
10
+ );
11
+ process.exit(1);
12
+ }
13
+
14
+ require('./enhanced_cli');
@@ -16,6 +16,14 @@ function getLLMChecker() {
16
16
  const { getLogger } = require('../src/utils/logger');
17
17
  const fs = require('fs');
18
18
  const path = require('path');
19
+ const {
20
+ SUPPORTED_RUNTIMES,
21
+ normalizeRuntime,
22
+ runtimeSupportedOnHardware,
23
+ getRuntimeDisplayName,
24
+ getRuntimeCommandSet
25
+ } = require('../src/runtime/runtime-support');
26
+ const SpeculativeDecodingEstimator = require('../src/models/speculative-decoding-estimator');
19
27
 
20
28
  // ASCII Art for each command - Large text banners
21
29
  const ASCII_ART = {
@@ -1406,10 +1414,18 @@ function displaySimplifiedSystemInfo(hardware) {
1406
1414
  console.log(`Hardware Tier: ${tierColor.bold(tier)}`);
1407
1415
  }
1408
1416
 
1409
- async function displayModelRecommendations(analysis, hardware, useCase = 'general', limit = 1) {
1417
+ async function displayModelRecommendations(analysis, hardware, useCase = 'general', limit = 1, runtime = 'ollama') {
1410
1418
  const title = limit === 1 ? 'RECOMMENDED MODEL' : `TOP ${limit} COMPATIBLE MODELS`;
1411
1419
  console.log(chalk.green.bold(`\n${title}`));
1412
1420
  console.log(chalk.gray('─'.repeat(50)));
1421
+
1422
+ const selectedRuntime = normalizeRuntime(runtime);
1423
+ const runtimeLabel = getRuntimeDisplayName(selectedRuntime);
1424
+ const speculativeEstimator = new SpeculativeDecodingEstimator();
1425
+ const speculativeCandidatePool = [
1426
+ ...(analysis?.compatible || []),
1427
+ ...(analysis?.marginal || [])
1428
+ ];
1413
1429
 
1414
1430
  // Find the best models from compatible models considering use case
1415
1431
  let selectedModels = [];
@@ -1760,42 +1776,75 @@ async function displayModelRecommendations(analysis, hardware, useCase = 'genera
1760
1776
  if (model.performanceEstimate) {
1761
1777
  console.log(`Estimated Speed: ${chalk.yellow(model.performanceEstimate.estimatedTokensPerSecond || 'N/A')} tokens/sec`);
1762
1778
  }
1763
-
1764
- // Check if it's already installed by comparing with Ollama integration
1779
+
1780
+ console.log(`Runtime: ${chalk.white(runtimeLabel)}`);
1781
+ const runtimeCommands = getRuntimeCommandSet(model, selectedRuntime);
1782
+
1783
+ // Check installation only when using Ollama runtime.
1765
1784
  let isInstalled = false;
1766
- try {
1767
- isInstalled = await checkIfModelInstalled(model, analysis.ollamaInfo);
1768
- if (isInstalled) {
1769
- console.log(`Status: ${chalk.green('Already installed in Ollama')}`);
1770
- } else if (analysis.ollamaInfo && analysis.ollamaInfo.available) {
1771
- console.log(`Status: ${chalk.gray('Available for installation')}`);
1772
- } else {
1773
- console.log(`Status: ${chalk.yellow('Requires Ollama (not detected)')}`);
1785
+ if (selectedRuntime === 'ollama') {
1786
+ try {
1787
+ isInstalled = await checkIfModelInstalled(model, analysis.ollamaInfo);
1788
+ if (isInstalled) {
1789
+ console.log(`Status: ${chalk.green('Already installed in Ollama')}`);
1790
+ } else if (analysis.ollamaInfo && analysis.ollamaInfo.available) {
1791
+ console.log(`Status: ${chalk.gray('Available for installation')}`);
1792
+ } else {
1793
+ console.log(`Status: ${chalk.yellow('Requires Ollama (not detected)')}`);
1794
+ }
1795
+ } catch (installCheckError) {
1796
+ if (analysis.ollamaInfo && analysis.ollamaInfo.available) {
1797
+ console.log(`Status: ${chalk.gray('Available for installation')}`);
1798
+ } else {
1799
+ console.log(`Status: ${chalk.yellow('Requires Ollama (not detected)')}`);
1800
+ }
1774
1801
  }
1775
- } catch (installCheckError) {
1776
- // If checking installation status fails, show based on Ollama availability
1777
- if (analysis.ollamaInfo && analysis.ollamaInfo.available) {
1778
- console.log(`Status: ${chalk.gray('Available for installation')}`);
1779
- } else {
1780
- console.log(`Status: ${chalk.yellow('Requires Ollama (not detected)')}`);
1802
+
1803
+ const ollamaCommand = getOllamaInstallCommand(model);
1804
+ if (ollamaCommand) {
1805
+ const modelName = extractModelName(ollamaCommand);
1806
+ if (isInstalled) {
1807
+ console.log(`\nRun: ${chalk.cyan.bold(`ollama run ${modelName}`)}`);
1808
+ } else {
1809
+ console.log(`\nPull: ${chalk.cyan.bold(ollamaCommand)}`);
1810
+ }
1811
+ } else if (model.ollamaTag || model.ollamaId) {
1812
+ const tag = model.ollamaTag || model.ollamaId;
1813
+ if (isInstalled) {
1814
+ console.log(`\nRun: ${chalk.cyan.bold(`ollama run ${tag}`)}`);
1815
+ } else {
1816
+ console.log(`\nPull: ${chalk.cyan.bold(`ollama pull ${tag}`)}`);
1817
+ }
1818
+ }
1819
+ } else {
1820
+ console.log(`Status: ${chalk.gray(`${runtimeLabel} runtime selected`)}`);
1821
+ console.log(`\nRun: ${chalk.cyan.bold(runtimeCommands.run)}`);
1822
+ if (index === 0) {
1823
+ console.log(`Install runtime: ${chalk.cyan.bold(runtimeCommands.install)}`);
1824
+ console.log(`Fetch model: ${chalk.cyan.bold(runtimeCommands.pull)}`);
1781
1825
  }
1782
1826
  }
1783
1827
 
1784
- // Show pull/run command directly in each model block (Issue #3)
1785
- const ollamaCommand = getOllamaInstallCommand(model);
1786
- if (ollamaCommand) {
1787
- const modelName = extractModelName(ollamaCommand);
1788
- if (isInstalled) {
1789
- console.log(`\nCommand: ${chalk.cyan.bold(`ollama run ${modelName}`)}`);
1790
- } else {
1791
- console.log(`\nCommand: ${chalk.cyan.bold(ollamaCommand)}`);
1792
- }
1793
- } else if (model.ollamaTag || model.ollamaId) {
1794
- const tag = model.ollamaTag || model.ollamaId;
1795
- if (isInstalled) {
1796
- console.log(`\nCommand: ${chalk.cyan.bold(`ollama run ${tag}`)}`);
1797
- } else {
1798
- console.log(`\nCommand: ${chalk.cyan.bold(`ollama pull ${tag}`)}`);
1828
+ const speculativeInfo =
1829
+ model.speculativeDecoding ||
1830
+ speculativeEstimator.estimate({
1831
+ model,
1832
+ candidates: speculativeCandidatePool,
1833
+ hardware,
1834
+ runtime: selectedRuntime
1835
+ });
1836
+
1837
+ if (speculativeInfo && speculativeInfo.runtime === selectedRuntime) {
1838
+ if (speculativeInfo.enabled) {
1839
+ console.log(
1840
+ `SpecDec: ${chalk.green(`+${speculativeInfo.estimatedThroughputGainPct}%`)} ` +
1841
+ `(${chalk.gray(`draft: ${speculativeInfo.draftModel}`)})`
1842
+ );
1843
+ } else if (speculativeInfo.estimatedSpeedup) {
1844
+ const suggested = speculativeInfo.suggestedDraftModel ? ` with ${speculativeInfo.suggestedDraftModel}` : '';
1845
+ console.log(
1846
+ `SpecDec estimate: ${chalk.yellow(`+${speculativeInfo.estimatedThroughputGainPct}%`)}${chalk.gray(suggested)}`
1847
+ );
1799
1848
  }
1800
1849
  }
1801
1850
  }
@@ -1807,9 +1856,12 @@ async function displayModelRecommendations(analysis, hardware, useCase = 'genera
1807
1856
  return selectedModels;
1808
1857
  }
1809
1858
 
1810
- async function displayQuickStartCommands(analysis, recommendedModel = null, allRecommended = null) {
1859
+ async function displayQuickStartCommands(analysis, recommendedModel = null, allRecommended = null, runtime = 'ollama') {
1811
1860
  console.log(chalk.yellow.bold('\nQUICK START'));
1812
1861
  console.log(chalk.gray('─'.repeat(50)));
1862
+
1863
+ const selectedRuntime = normalizeRuntime(runtime);
1864
+ const runtimeLabel = getRuntimeDisplayName(selectedRuntime);
1813
1865
 
1814
1866
  // Use the first model from allRecommended if available, otherwise fallback to recommendedModel
1815
1867
  let bestModel = (allRecommended && allRecommended.length > 0) ? allRecommended[0] : recommendedModel;
@@ -1824,6 +1876,33 @@ async function displayQuickStartCommands(analysis, recommendedModel = null, allR
1824
1876
  }
1825
1877
  }
1826
1878
 
1879
+ if (selectedRuntime !== 'ollama') {
1880
+ if (!bestModel) {
1881
+ console.log(`1. Try expanding search: ${chalk.cyan('llm-checker check --include-cloud')}`);
1882
+ return;
1883
+ }
1884
+
1885
+ const runtimeCommands = getRuntimeCommandSet(bestModel, selectedRuntime);
1886
+ console.log(`1. Install ${runtimeLabel}:`);
1887
+ console.log(` ${chalk.cyan.bold(runtimeCommands.install)}`);
1888
+ console.log(`2. Fetch model weights:`);
1889
+ console.log(` ${chalk.cyan.bold(runtimeCommands.pull)}`);
1890
+ console.log(`3. Run model:`);
1891
+ console.log(` ${chalk.cyan.bold(runtimeCommands.run)}`);
1892
+
1893
+ const speculative = bestModel.speculativeDecoding;
1894
+ if (speculative && speculative.enabled) {
1895
+ console.log(`4. SpecDec suggestion (${chalk.green(`+${speculative.estimatedThroughputGainPct}%`)}):`);
1896
+ if (selectedRuntime === 'vllm') {
1897
+ console.log(` ${chalk.cyan.bold(`${runtimeCommands.run} --speculative-model '${speculative.draftModelRef || speculative.draftModel}'`)}`);
1898
+ } else if (selectedRuntime === 'mlx') {
1899
+ console.log(` ${chalk.gray(`Use draft model ${speculative.draftModelRef || speculative.draftModel} when enabling speculative decoding in MLX-LM`)}`);
1900
+ }
1901
+ }
1902
+
1903
+ return;
1904
+ }
1905
+
1827
1906
  if (analysis.ollamaInfo && !analysis.ollamaInfo.available) {
1828
1907
  console.log(`1. Install Ollama: ${chalk.underline('https://ollama.ai')}`);
1829
1908
  console.log(`2. Come back and run this command again`);
@@ -1992,6 +2071,7 @@ program
1992
2071
  .option('--min-size <size>', 'Minimum model size to consider (e.g., "7B" or "7GB")')
1993
2072
  .option('--include-cloud', 'Include cloud models in analysis')
1994
2073
  .option('--ollama-only', 'Only show models available in Ollama')
2074
+ .option('--runtime <runtime>', `Inference runtime (${SUPPORTED_RUNTIMES.join('|')})`, 'ollama')
1995
2075
  .option('--performance-test', 'Run performance benchmarks')
1996
2076
  .option('--show-ollama-analysis', 'Show detailed Ollama model analysis')
1997
2077
  .option('--no-verbose', 'Disable step-by-step progress display')
@@ -2008,6 +2088,16 @@ program
2008
2088
  }
2009
2089
 
2010
2090
  const hardware = await checker.getSystemInfo();
2091
+ let selectedRuntime = normalizeRuntime(options.runtime);
2092
+ if (!runtimeSupportedOnHardware(selectedRuntime, hardware)) {
2093
+ const runtimeLabel = getRuntimeDisplayName(selectedRuntime);
2094
+ console.log(
2095
+ chalk.yellow(
2096
+ `\nWarning: ${runtimeLabel} is not supported on this hardware. Falling back to Ollama.`
2097
+ )
2098
+ );
2099
+ selectedRuntime = 'ollama';
2100
+ }
2011
2101
 
2012
2102
  // Normalize and fix use-case typos
2013
2103
  const normalizeUseCase = (useCase = '') => {
@@ -2049,7 +2139,8 @@ program
2049
2139
  performanceTest: options.performanceTest,
2050
2140
  limit: parseInt(options.limit) || 10,
2051
2141
  maxSize: maxSize,
2052
- minSize: minSize
2142
+ minSize: minSize,
2143
+ runtime: selectedRuntime
2053
2144
  });
2054
2145
 
2055
2146
  if (!verboseEnabled) {
@@ -2058,8 +2149,14 @@ program
2058
2149
 
2059
2150
  // Simplified output - show only essential information
2060
2151
  displaySimplifiedSystemInfo(hardware);
2061
- const recommendedModels = await displayModelRecommendations(analysis, hardware, normalizedUseCase, parseInt(options.limit) || 1);
2062
- await displayQuickStartCommands(analysis, recommendedModels[0], recommendedModels);
2152
+ const recommendedModels = await displayModelRecommendations(
2153
+ analysis,
2154
+ hardware,
2155
+ normalizedUseCase,
2156
+ parseInt(options.limit) || 1,
2157
+ selectedRuntime
2158
+ );
2159
+ await displayQuickStartCommands(analysis, recommendedModels[0], recommendedModels, selectedRuntime);
2063
2160
 
2064
2161
  } catch (error) {
2065
2162
  console.error(chalk.red('\nError:'), error.message);
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.2.0",
3
+ "version": "3.2.1",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
- "llm-checker": "bin/enhanced_cli.js",
7
- "ollama-checker": "bin/enhanced_cli.js",
6
+ "llm-checker": "bin/cli.js",
7
+ "ollama-checker": "bin/cli.js",
8
8
  "llm-checker-mcp": "bin/mcp-server.mjs"
9
9
  },
10
10
  "main": "src/index.js",
@@ -13,6 +13,8 @@
13
13
  "test:gpu": "node tests/gpu-detection/multi-gpu.test.js",
14
14
  "test:platform": "node tests/platform-tests/cross-platform.test.js",
15
15
  "test:ui": "node tests/ui-tests/interface.test.js",
16
+ "test:runtime": "node tests/runtime-specdec-tests.js",
17
+ "test:hardware-detector": "node tests/hardware-detector-regression.js",
16
18
  "test:all": "node tests/run-all-tests.js",
17
19
  "build": "echo 'No build needed'",
18
20
  "dev": "node bin/enhanced_cli.js",
@@ -387,6 +387,9 @@ class MultiObjectiveSelector {
387
387
  // 2) Memory bandwidth (20%) - simplified estimation
388
388
  let memBandwidthGBs = 50; // fallback
389
389
  const gpu = gpuModel.toLowerCase();
390
+ if (gpu.includes('gb10') || gpu.includes('grace blackwell') || gpu.includes('dgx spark')) memBandwidthGBs = 1000;
391
+ else if (gpu.includes('h100')) memBandwidthGBs = 3000;
392
+ else if (gpu.includes('a100')) memBandwidthGBs = 2039;
390
393
  if (gpu.includes('m4 pro')) memBandwidthGBs = 273;
391
394
  else if (gpu.includes('m4')) memBandwidthGBs = 120;
392
395
  else if (gpu.includes('rtx 4090')) memBandwidthGBs = 1008;
@@ -398,7 +401,10 @@ class MultiObjectiveSelector {
398
401
 
399
402
  // 3) Compute (20%) - simplified estimation
400
403
  let compute = 0;
401
- if (gpu.includes('m4 pro')) compute = clamp(28 / 80); // Match main algorithm
404
+ if (gpu.includes('gb10') || gpu.includes('grace blackwell') || gpu.includes('dgx spark')) compute = clamp(180 / 80);
405
+ else if (gpu.includes('h100')) compute = clamp(320 / 80);
406
+ else if (gpu.includes('a100')) compute = clamp(250 / 80);
407
+ else if (gpu.includes('m4 pro')) compute = clamp(28 / 80); // Match main algorithm
402
408
  else if (gpu.includes('m4')) compute = clamp(15 / 80);
403
409
  else if (gpu.includes('rtx 4090')) compute = clamp(165 / 80);
404
410
  else if (gpu.includes('rtx 4080')) compute = clamp(121 / 80);
@@ -448,6 +454,10 @@ class MultiObjectiveSelector {
448
454
 
449
455
  // Special flagship GPU detection by model name
450
456
  if (gpuModel.toLowerCase().includes('rtx 50') ||
457
+ gpuModel.toLowerCase().includes('gb10') ||
458
+ gpuModel.toLowerCase().includes('grace blackwell') ||
459
+ gpuModel.toLowerCase().includes('dgx spark') ||
460
+ gpuModel.toLowerCase().includes('blackwell') ||
451
461
  gpuModel.toLowerCase().includes('h100') ||
452
462
  gpuModel.toLowerCase().includes('a100')) {
453
463
  tier = 'flagship';
@@ -599,7 +609,11 @@ class MultiObjectiveSelector {
599
609
 
600
610
  // NVIDIA GPU optimizations
601
611
  if (gpu.includes('nvidia') || gpu.includes('geforce') || gpu.includes('rtx') || gpu.includes('gtx')) {
602
- if (gpu.includes('rtx 50')) {
612
+ if (gpu.includes('gb10') || gpu.includes('grace blackwell') || gpu.includes('dgx spark')) {
613
+ specs.offloadCapacity = Math.min(ramGB * 0.6, 32);
614
+ specs.memoryEfficiency = 0.96;
615
+ specs.backendOptimization = 1.25;
616
+ } else if (gpu.includes('rtx 50')) {
603
617
  // RTX 50xx series - flagship tier with massive VRAM + excellent offload
604
618
  specs.offloadCapacity = Math.min(ramGB * 0.5, 24);
605
619
  specs.memoryEfficiency = 0.95;
@@ -732,7 +746,15 @@ class MultiObjectiveSelector {
732
746
  // GPU-based calculation (dedicated GPU only)
733
747
  if (vramGB > 0 && !gpuModel.toLowerCase().includes('iris') && !gpuModel.toLowerCase().includes('integrated')) {
734
748
  let gpuTPS = 20; // Conservative GPU baseline
735
- if (gpuModel.toLowerCase().includes('rtx 50')) {
749
+ if (gpuModel.toLowerCase().includes('gb10') ||
750
+ gpuModel.toLowerCase().includes('grace blackwell') ||
751
+ gpuModel.toLowerCase().includes('dgx spark')) {
752
+ gpuTPS = 85; // GB10 / Grace Blackwell class
753
+ } else if (gpuModel.toLowerCase().includes('h100')) {
754
+ gpuTPS = 120;
755
+ } else if (gpuModel.toLowerCase().includes('a100')) {
756
+ gpuTPS = 95;
757
+ } else if (gpuModel.toLowerCase().includes('rtx 50')) {
736
758
  gpuTPS = 60; // RTX 50 series - more realistic
737
759
  } else if (gpuModel.toLowerCase().includes('rtx 40')) {
738
760
  gpuTPS = 45; // RTX 40 series
@@ -740,6 +762,8 @@ class MultiObjectiveSelector {
740
762
  gpuTPS = 35; // RTX 30 series
741
763
  } else if (gpuModel.toLowerCase().includes('rtx 20')) {
742
764
  gpuTPS = 25; // RTX 20 series
765
+ } else if (gpuModel.toLowerCase().includes('p100')) {
766
+ gpuTPS = 32; // Tesla P100 class
743
767
  } else if (vramGB >= 8) {
744
768
  gpuTPS = 30; // Other high-end GPUs
745
769
  } else if (vramGB >= 4) {
@@ -817,4 +841,4 @@ class MultiObjectiveSelector {
817
841
  }
818
842
  }
819
843
 
820
- module.exports = MultiObjectiveSelector;
844
+ module.exports = MultiObjectiveSelector;
@@ -209,8 +209,34 @@ class CUDADetector {
209
209
  architecture: 'Unknown'
210
210
  };
211
211
 
212
+ // NVIDIA GB10 / Grace Blackwell (DGX Spark)
213
+ if (nameLower.includes('gb10') || nameLower.includes('grace blackwell') ||
214
+ nameLower.includes('dgx spark') || nameLower.includes('blackwell')) {
215
+ capabilities.tensorCores = true;
216
+ capabilities.bf16 = true;
217
+ capabilities.fp8 = true;
218
+ capabilities.computeCapability = '10.0';
219
+ capabilities.architecture = 'Grace Blackwell';
220
+ }
221
+ // H100 (Hopper)
222
+ else if (nameLower.includes('h100') || nameLower.includes('h200')) {
223
+ capabilities.tensorCores = true;
224
+ capabilities.bf16 = true;
225
+ capabilities.fp8 = true;
226
+ capabilities.nvlink = true;
227
+ capabilities.computeCapability = '9.0';
228
+ capabilities.architecture = 'Hopper';
229
+ }
230
+ // Tesla P100 (Pascal)
231
+ else if (nameLower.includes('p100') || nameLower.includes('tesla p100')) {
232
+ capabilities.tensorCores = false;
233
+ capabilities.bf16 = false;
234
+ capabilities.fp8 = false;
235
+ capabilities.computeCapability = '6.0';
236
+ capabilities.architecture = 'Pascal';
237
+ }
212
238
  // RTX 50 series (Blackwell)
213
- if (nameLower.includes('rtx 50') || nameLower.includes('rtx50')) {
239
+ else if (nameLower.includes('rtx 50') || nameLower.includes('rtx50')) {
214
240
  capabilities.tensorCores = true;
215
241
  capabilities.bf16 = true;
216
242
  capabilities.fp8 = true;
@@ -257,15 +283,6 @@ class CUDADetector {
257
283
  capabilities.architecture = 'Volta';
258
284
  capabilities.nvlink = true;
259
285
  }
260
- // H100 (Hopper)
261
- else if (nameLower.includes('h100') || nameLower.includes('h200')) {
262
- capabilities.tensorCores = true;
263
- capabilities.bf16 = true;
264
- capabilities.fp8 = true;
265
- capabilities.nvlink = true;
266
- capabilities.computeCapability = '9.0';
267
- capabilities.architecture = 'Hopper';
268
- }
269
286
 
270
287
  return capabilities;
271
288
  }
@@ -311,6 +328,9 @@ class CUDADetector {
311
328
  'rtx 2060': 80,
312
329
 
313
330
  // Data center
331
+ 'gb10': 95,
332
+ 'grace blackwell': 95,
333
+ 'dgx spark': 95,
314
334
  'h100': 400,
315
335
  'h200': 450,
316
336
  'a100': 300,
@@ -318,7 +338,8 @@ class CUDADetector {
318
338
  'l4': 150,
319
339
  'a40': 180,
320
340
  't4': 70,
321
- 'v100': 120
341
+ 'v100': 120,
342
+ 'p100': 45
322
343
  };
323
344
 
324
345
  for (const [model, speed] of Object.entries(speedMap)) {