llm-checker 3.5.11 → 3.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ const chalk = require('chalk');
4
4
  const ora = require('ora');
5
5
  const { table } = require('table');
6
6
  const os = require('os');
7
+ const readline = require('readline');
7
8
  const { spawn } = require('child_process');
8
9
  // LLMChecker is loaded lazily to avoid slow systeminformation init
9
10
  let _LLMChecker = null;
@@ -49,6 +50,7 @@ const {
49
50
  buildComplianceReport,
50
51
  serializeComplianceReport
51
52
  } = require('../src/policy/audit-reporter');
53
+ const { estimateTokenSpeedFromHardware } = require('../src/utils/token-speed-estimator');
52
54
  const { renderCommandHeader, renderPersistentBanner } = require('../src/ui/cli-theme');
53
55
  const { launchInteractivePanel } = require('../src/ui/interactive-panel');
54
56
  const policyManager = new PolicyManager();
@@ -881,6 +883,11 @@ function formatGpuInventoryList(models = []) {
881
883
 
882
884
  // Helper function to get hardware tier for display
883
885
  function getHardwareTierForDisplay(hardware) {
886
+ const canonicalTier = hardware?.summary?.hardwareTier;
887
+ if (typeof canonicalTier === 'string' && canonicalTier.trim()) {
888
+ return canonicalTier.replace(/_/g, ' ').toUpperCase();
889
+ }
890
+
884
891
  const ram = hardware.memory.total;
885
892
  const cores = hardware.cpu.cores;
886
893
  const gpuModel = hardware.gpu?.model || '';
@@ -923,6 +930,21 @@ function getHardwareTierForDisplay(hardware) {
923
930
  return tier;
924
931
  }
925
932
 
933
+ function getBackendLabelForDisplay(hardware) {
934
+ const summary = hardware?.summary || {};
935
+
936
+ if (typeof summary.bestBackendLabel === 'string' && summary.bestBackendLabel.trim()) {
937
+ return summary.bestBackendLabel;
938
+ }
939
+
940
+ const backendName = summary.backendName || String(summary.bestBackend || 'cpu').toUpperCase();
941
+ if (summary.runtimeBackend && summary.runtimeBackend !== summary.bestBackend) {
942
+ return `${backendName} + ${summary.runtimeBackendName || summary.runtimeBackend} assist`;
943
+ }
944
+
945
+ return backendName;
946
+ }
947
+
926
948
  function formatSpeed(speed) {
927
949
  const speedMap = {
928
950
  'very_fast': 'very_fast',
@@ -963,18 +985,25 @@ function displaySystemInfo(hardware, analysis) {
963
985
  const gpuColor = hardware.gpu.dedicated ? chalk.green : chalk.hex('#FFA500');
964
986
  const integratedList = formatGpuInventoryList(hardware.gpu.integratedGpuModels || hardware.summary?.integratedGpuModels);
965
987
  const dedicatedList = formatGpuInventoryList(hardware.gpu.dedicatedGpuModels || hardware.summary?.dedicatedGpuModels);
988
+ const integratedSharedMemory = hardware.gpu.sharedMemory || hardware.summary?.integratedSharedMemory || 0;
989
+ const vramDisplay = !hardware.gpu.dedicated && integratedSharedMemory > 0
990
+ ? `${integratedSharedMemory}GB shared`
991
+ : (hardware.gpu.vram === 0 && hardware.gpu.model && hardware.gpu.model.toLowerCase().includes('apple')
992
+ ? 'Unified Memory'
993
+ : `${hardware.gpu.vram || 'N/A'}GB`);
966
994
 
967
995
  const lines = [
968
996
  `${chalk.cyan('CPU:')} ${cpuColor(hardware.cpu.brand)} ${chalk.gray(`(${hardware.cpu.cores} cores, ${hardware.cpu.speed}GHz)`)}`,
969
997
  `${chalk.cyan('Architecture:')} ${hardware.cpu.architecture}`,
970
998
  `${chalk.cyan('RAM:')} ${ramColor(hardware.memory.total + 'GB')}`,
971
999
  `${chalk.cyan('GPU:')} ${gpuColor(hardware.gpu.model || 'Not detected')}`,
972
- `${chalk.cyan('VRAM:')} ${hardware.gpu.vram === 0 && hardware.gpu.model && hardware.gpu.model.toLowerCase().includes('apple') ? 'Unified Memory' : `${hardware.gpu.vram || 'N/A'}GB`}${hardware.gpu.dedicated ? chalk.green(' (Dedicated)') : chalk.hex('#FFA500')(' (Integrated)')}`,
1000
+ `${chalk.cyan('Backend:')} ${chalk.white(getBackendLabelForDisplay(hardware))}`,
1001
+ `${chalk.cyan('VRAM:')} ${vramDisplay}${hardware.gpu.dedicated ? chalk.green(' (Dedicated)') : chalk.hex('#FFA500')(' (Integrated)')}`,
973
1002
  `${chalk.cyan('Dedicated GPUs:')} ${chalk.green(dedicatedList)}`,
974
1003
  `${chalk.cyan('Integrated GPUs:')} ${chalk.hex('#FFA500')(integratedList)}`,
975
1004
  ];
976
1005
 
977
- const tier = analysis.summary.hardwareTier?.replace('_', ' ').toUpperCase() || 'UNKNOWN';
1006
+ const tier = analysis.summary.hardwareTier?.replace(/_/g, ' ').toUpperCase() || getHardwareTierForDisplay(hardware);
978
1007
  const tierColor = tier.includes('HIGH') ? chalk.green : tier.includes('MEDIUM') ? chalk.yellow : chalk.red;
979
1008
 
980
1009
  lines.push(`${chalk.bold('Hardware Tier:')} ${tierColor.bold(tier)}`);
@@ -1405,6 +1434,266 @@ function displayCalibratedRoutingDecision(commandName, calibratedPolicy, routeDe
1405
1434
  console.log(chalk.blue('╰'));
1406
1435
  }
1407
1436
 
1437
+ function parseAiRunModelSizeB(value) {
1438
+ const match = String(value || '').match(/(\d+(?:\.\d+)?)\s*([kmb])\+?/i);
1439
+ if (!match) return null;
1440
+
1441
+ const amount = Number(match[1]);
1442
+ if (!Number.isFinite(amount) || amount <= 0) return null;
1443
+
1444
+ const unit = match[2].toLowerCase();
1445
+ if (unit === 'b') return amount;
1446
+ if (unit === 'm') return amount / 1000;
1447
+ if (unit === 'k') return amount / 1_000_000;
1448
+ return null;
1449
+ }
1450
+
1451
+ function normalizeAiRunModelName(value) {
1452
+ return String(value || '')
1453
+ .trim()
1454
+ .toLowerCase()
1455
+ .replace(/:latest$/, '');
1456
+ }
1457
+
1458
+ function findAiRunLocalModel(localModels = [], modelName = '') {
1459
+ const target = normalizeAiRunModelName(modelName);
1460
+ if (!target) return null;
1461
+
1462
+ return localModels.find((model) => {
1463
+ const name = normalizeAiRunModelName(model.name || model.model);
1464
+ if (!name) return false;
1465
+ return name === target || name.includes(target) || target.includes(name);
1466
+ }) || null;
1467
+ }
1468
+
1469
+ function resolveAiRunModelSizeB(modelName, aiSelector, localModel = null) {
1470
+ const localParameterSize = localModel?.details?.parameter_size || localModel?.size;
1471
+ const parsedLocalSize = parseAiRunModelSizeB(localParameterSize);
1472
+ if (parsedLocalSize) return parsedLocalSize;
1473
+
1474
+ const parsedNameSize = parseAiRunModelSizeB(modelName);
1475
+ if (parsedNameSize) return parsedNameSize;
1476
+
1477
+ if (aiSelector && typeof aiSelector.estimateModelSize === 'function') {
1478
+ const selectorSize = Number(aiSelector.estimateModelSize(modelName));
1479
+ if (Number.isFinite(selectorSize) && selectorSize > 0) return selectorSize;
1480
+ }
1481
+
1482
+ return 7;
1483
+ }
1484
+
1485
+ function formatAiRunNumber(value, decimals = 1) {
1486
+ const number = Number(value);
1487
+ if (!Number.isFinite(number)) return 'N/A';
1488
+ return number.toFixed(decimals).replace(/\.0$/, '');
1489
+ }
1490
+
1491
+ function estimateAiRunWorkingSetGB(modelSizeB, localModel = null) {
1492
+ const fileSizeGB = Number(localModel?.fileSizeGB) || 0;
1493
+ const parameterEstimateGB = (Number(modelSizeB) * 0.75) + 2;
1494
+ if (fileSizeGB > 0) {
1495
+ return Math.max(fileSizeGB * 1.15, parameterEstimateGB * 0.85);
1496
+ }
1497
+ return parameterEstimateGB;
1498
+ }
1499
+
1500
+ function formatAiRunHardwareSummary(systemInfo = {}) {
1501
+ const cpuBrand = systemInfo.cpu?.brand || systemInfo.cpu?.model || 'CPU';
1502
+ const cores = systemInfo.cpu?.cores ? ` (${systemInfo.cpu.cores} cores)` : '';
1503
+ const memory = systemInfo.memory?.total ? `${systemInfo.memory.total}GB RAM` : 'RAM unknown';
1504
+ const gpu = systemInfo.gpu?.model || 'GPU not detected';
1505
+ return `${cpuBrand}${cores}, ${memory}, ${gpu}`;
1506
+ }
1507
+
1508
+ function formatAiRunMethod(method = '') {
1509
+ return String(method || 'selector')
1510
+ .replace(/[_-]+/g, ' ')
1511
+ .replace(/\b\w/g, (letter) => letter.toUpperCase());
1512
+ }
1513
+
1514
+ function formatAiRunReason(reason = '') {
1515
+ const text = String(reason || '').replace(/\s+/g, ' ').trim();
1516
+ if (!text) return 'Selected from local model compatibility scoring.';
1517
+ return text.length > 120 ? `${text.slice(0, 117)}...` : text;
1518
+ }
1519
+
1520
+ function formatAiRunMeasuredSpeed(benchmark = null) {
1521
+ if (!benchmark) return null;
1522
+ if (!benchmark.success) {
1523
+ return `not available (${benchmark.error || 'benchmark failed'})`;
1524
+ }
1525
+
1526
+ const parts = [];
1527
+ if (Number(benchmark.evalTokensPerSecond) > 0) {
1528
+ parts.push(`${formatAiRunNumber(benchmark.evalTokensPerSecond)} eval t/s`);
1529
+ }
1530
+ if (Number(benchmark.endToEndTokensPerSecond) > 0) {
1531
+ parts.push(`${formatAiRunNumber(benchmark.endToEndTokensPerSecond)} end-to-end t/s`);
1532
+ }
1533
+ if (parts.length === 0 && Number(benchmark.tokensPerSecond) > 0) {
1534
+ parts.push(`${formatAiRunNumber(benchmark.tokensPerSecond)} t/s`);
1535
+ }
1536
+
1537
+ const generated = Number(benchmark.tokensGenerated) > 0
1538
+ ? `, ${benchmark.tokensGenerated} tokens`
1539
+ : '';
1540
+ return `${parts.join(', ')}${generated}`;
1541
+ }
1542
+
1543
+ function displayAiRunReference({ result, systemInfo, taskHint, candidateModels, localModels, aiSelector, benchmark }) {
1544
+ const localModel = findAiRunLocalModel(localModels, result.bestModel);
1545
+ const modelSizeB = resolveAiRunModelSizeB(result.bestModel, aiSelector, localModel);
1546
+ const speedEstimate = estimateTokenSpeedFromHardware(systemInfo, {
1547
+ modelSizeB,
1548
+ modelName: result.bestModel
1549
+ });
1550
+ const workingSetGB = estimateAiRunWorkingSetGB(modelSizeB, localModel);
1551
+ const localCount = result.localModelsCount || candidateModels.length;
1552
+ const dbCount = result.totalModelsEvaluated;
1553
+ const confidence = Number(result.confidence);
1554
+ const confidenceText = Number.isFinite(confidence)
1555
+ ? `${Math.round(confidence * 100)}%`
1556
+ : 'N/A';
1557
+ const idealModel = result.recommendedFromDatabase;
1558
+ const usesFallback = idealModel && idealModel !== result.bestModel && result.isRecommendedInstalled === false;
1559
+ const measuredSpeed = formatAiRunMeasuredSpeed(benchmark);
1560
+
1561
+ console.log('\n' + chalk.bold('AI Run reference'));
1562
+ console.log(chalk.gray('----------------'));
1563
+ console.log(`${chalk.gray('Task:')} ${chalk.white(taskHint || 'general')}`);
1564
+ console.log(`${chalk.gray('Selected local model:')} ${chalk.green.bold(result.bestModel)}`);
1565
+
1566
+ if (idealModel) {
1567
+ const idealStatus = usesFallback ? chalk.yellow('not installed') : chalk.green('available');
1568
+ console.log(`${chalk.gray('Best database match:')} ${chalk.cyan(idealModel)} ${chalk.gray('(')}${idealStatus}${chalk.gray(')')}`);
1569
+ }
1570
+
1571
+ console.log(`${chalk.gray('Why this model:')} ${formatAiRunReason(result.reasoning || result.reason)}`);
1572
+ console.log(`${chalk.gray('Confidence:')} ${chalk.white(confidenceText)} ${chalk.gray(`via ${formatAiRunMethod(result.method)}`)}`);
1573
+ console.log(`${chalk.gray('Models evaluated:')} ${chalk.white(`${localCount} local`)}${dbCount ? chalk.gray(`, ${dbCount} database`) : ''}`);
1574
+ console.log(`${chalk.gray('Hardware:')} ${formatAiRunHardwareSummary(systemInfo)}`);
1575
+ console.log(`${chalk.gray('Estimated speed:')} ${chalk.yellow(`~${speedEstimate.tokensPerSecond} tokens/sec`)} ${chalk.gray(`${speedEstimate.backend}, generation only`)}`);
1576
+
1577
+ if (measuredSpeed) {
1578
+ const speedColor = benchmark?.success ? chalk.green : chalk.yellow;
1579
+ console.log(`${chalk.gray('Measured speed:')} ${speedColor(measuredSpeed)}`);
1580
+ }
1581
+
1582
+ console.log(`${chalk.gray('Memory reference:')} ${chalk.white(`~${formatAiRunNumber(modelSizeB)}B params, ~${formatAiRunNumber(workingSetGB)}GB working set`)}`);
1583
+
1584
+ if (usesFallback) {
1585
+ console.log(`${chalk.gray('Install ideal model:')} ${chalk.cyan(`ollama pull ${idealModel}`)}`);
1586
+ }
1587
+ }
1588
+
1589
+ function formatAiRunTurnSpeed(result = {}) {
1590
+ const evalSpeed = Number(result.evalTokensPerSecond);
1591
+ const preferredSpeed = evalSpeed > 0 ? evalSpeed : Number(result.tokensPerSecond);
1592
+
1593
+ if (!Number.isFinite(preferredSpeed) || preferredSpeed <= 0) {
1594
+ return '[speed unavailable]';
1595
+ }
1596
+
1597
+ return `[${formatAiRunNumber(preferredSpeed)} tokens/sec]`;
1598
+ }
1599
+
1600
+ async function runAiRunChatTurn(client, modelName, messages) {
1601
+ let printed = false;
1602
+ const result = await client.streamChat(
1603
+ modelName,
1604
+ messages,
1605
+ {
1606
+ keepAlive: '5m',
1607
+ timeoutMs: 180000
1608
+ },
1609
+ (chunk) => {
1610
+ printed = true;
1611
+ process.stdout.write(chunk);
1612
+ }
1613
+ );
1614
+
1615
+ if (!printed && result.response) {
1616
+ process.stdout.write(result.response);
1617
+ }
1618
+
1619
+ const responseText = result.response || result.message?.content || '';
1620
+ const needsSpace = responseText.length > 0 && !/\s$/.test(responseText);
1621
+ process.stdout.write(`${needsSpace ? ' ' : ''}${chalk.gray(formatAiRunTurnSpeed(result))}\n\n`);
1622
+
1623
+ return result;
1624
+ }
1625
+
1626
+ function askAiRunQuestion(rl, promptText) {
1627
+ return new Promise((resolve) => {
1628
+ let settled = false;
1629
+ const handleClose = () => {
1630
+ if (!settled) {
1631
+ settled = true;
1632
+ resolve(null);
1633
+ }
1634
+ };
1635
+
1636
+ rl.once('close', handleClose);
1637
+ rl.question(promptText, (answer) => {
1638
+ if (settled) return;
1639
+ settled = true;
1640
+ rl.off('close', handleClose);
1641
+ resolve(answer);
1642
+ });
1643
+ });
1644
+ }
1645
+
1646
+ async function runAiRunInteractiveChat(client, modelName) {
1647
+ const rl = readline.createInterface({
1648
+ input: process.stdin,
1649
+ output: process.stdout
1650
+ });
1651
+ const messages = [];
1652
+ let closed = false;
1653
+
1654
+ rl.on('SIGINT', () => {
1655
+ process.stdout.write('\n');
1656
+ rl.close();
1657
+ });
1658
+ rl.on('close', () => {
1659
+ closed = true;
1660
+ });
1661
+
1662
+ try {
1663
+ while (!closed) {
1664
+ const input = await askAiRunQuestion(rl, chalk.cyan('>>> '));
1665
+ if (input === null) break;
1666
+
1667
+ const trimmed = String(input || '').trim();
1668
+
1669
+ if (!trimmed) {
1670
+ continue;
1671
+ }
1672
+
1673
+ if (['/bye', '/exit', '/quit', 'q'].includes(trimmed.toLowerCase())) {
1674
+ break;
1675
+ }
1676
+
1677
+ if (['/?', '/help'].includes(trimmed.toLowerCase())) {
1678
+ console.log('Commands: /bye, /exit, /quit');
1679
+ continue;
1680
+ }
1681
+
1682
+ messages.push({ role: 'user', content: input });
1683
+
1684
+ try {
1685
+ const response = await runAiRunChatTurn(client, modelName, messages);
1686
+ const assistantContent = response.response || response.message?.content || '';
1687
+ messages.push({ role: 'assistant', content: assistantContent });
1688
+ } catch (error) {
1689
+ console.error(chalk.red(`Chat request failed: ${error.message}`));
1690
+ }
1691
+ }
1692
+ } finally {
1693
+ rl.close();
1694
+ }
1695
+ }
1696
+
1408
1697
  function displayModelsStats(originalCount, filteredCount, options) {
1409
1698
  console.log('\n' + chalk.bgGreen.white.bold(' DATABASE STATS '));
1410
1699
  console.log(chalk.green('╭' + '─'.repeat(60)));
@@ -1758,6 +2047,7 @@ function displaySimplifiedSystemInfo(hardware) {
1758
2047
  console.log(`Memory: ${chalk.white(memInfo)}`);
1759
2048
  console.log(`GPU: ${chalk.white(gpuInfo)}`);
1760
2049
  console.log(`Architecture: ${chalk.white(hardware.cpu.architecture)}`);
2050
+ console.log(`Backend: ${chalk.white(getBackendLabelForDisplay(hardware))}`);
1761
2051
 
1762
2052
  const tier = getHardwareTierForDisplay(hardware);
1763
2053
  const tierColor = tier.includes('HIGH') ? chalk.green : tier.includes('MEDIUM') ? chalk.yellow : chalk.red;
@@ -3182,7 +3472,7 @@ program
3182
3472
  const spinner = ora('Checking Ollama integration...').start();
3183
3473
 
3184
3474
  try {
3185
- const checker = new (getLLMChecker())();
3475
+ const checker = new (getLLMChecker())({ verbose: false });
3186
3476
  const analysis = await checker.analyze();
3187
3477
 
3188
3478
  if (!analysis.ollamaInfo.available) {
@@ -3884,14 +4174,15 @@ program
3884
4174
  .option('--json', 'Output in JSON format')
3885
4175
  .action(async (options) => {
3886
4176
  if (!options.json) showAsciiArt('list-models');
3887
- const spinner = ora('📋 Loading models database...').start();
4177
+ const spinner = options.json ? null : ora('📋 Loading models database...').start();
3888
4178
 
3889
4179
  try {
3890
4180
  const checker = new (getLLMChecker())();
3891
- const data = await checker.ollamaScraper.scrapeAllModels(false);
4181
+ const data = await checker.loadOllamaModelData();
3892
4182
 
3893
4183
  if (!data || !data.models) {
3894
- spinner.fail('No models found in database');
4184
+ if (spinner) spinner.fail('No models found in database');
4185
+ else console.error('No models found in database');
3895
4186
  return;
3896
4187
  }
3897
4188
 
@@ -3982,9 +4273,9 @@ program
3982
4273
  return (b.pulls || 0) - (a.pulls || 0);
3983
4274
  });
3984
4275
 
3985
- spinner.text = `Sorted by hardware compatibility (${getHardwareTierForDisplay(hardware)})`;
4276
+ if (spinner) spinner.text = `Sorted by hardware compatibility (${getHardwareTierForDisplay(hardware)})`;
3986
4277
  } catch (error) {
3987
- console.warn('Could not sort by hardware compatibility:', error.message);
4278
+ if (!options.json) console.warn('Could not sort by hardware compatibility:', error.message);
3988
4279
  // Fallback a ordenar por popularidad
3989
4280
  models.sort((a, b) => (b.pulls || 0) - (a.pulls || 0));
3990
4281
  }
@@ -3997,7 +4288,7 @@ program
3997
4288
  const limit = parseInt(options.limit) || 50;
3998
4289
  const displayModels = models.slice(0, limit);
3999
4290
 
4000
- spinner.succeed(`✅ Found ${models.length} models (showing ${displayModels.length})`);
4291
+ if (spinner) spinner.succeed(`✅ Found ${models.length} models (showing ${displayModels.length})`);
4001
4292
 
4002
4293
  if (options.json) {
4003
4294
  console.log(JSON.stringify(displayModels, null, 2));
@@ -4020,7 +4311,7 @@ program
4020
4311
  }
4021
4312
 
4022
4313
  } catch (error) {
4023
- spinner.fail('Failed to load models');
4314
+ if (spinner) spinner.fail('Failed to load models');
4024
4315
  console.error(chalk.red('Error:'), error.message);
4025
4316
  if (process.env.DEBUG) {
4026
4317
  console.error(error.stack);
@@ -4121,6 +4412,8 @@ program
4121
4412
  '--calibrated [file]',
4122
4413
  'Enable calibrated routing policy (optional file path; defaults to ~/.llm-checker/calibration-policy.{yaml,yml,json})'
4123
4414
  )
4415
+ .option('--benchmark', 'Run a short local speed test before launching')
4416
+ .option('--reference-only', 'Show model choice and speed reference without launching Ollama')
4124
4417
  .action(async (options) => {
4125
4418
  showAsciiArt('ai-run');
4126
4419
  // Check if Ollama is installed first
@@ -4134,6 +4427,14 @@ program
4134
4427
  const aiSelector = new AIModelSelector();
4135
4428
  const checker = new (getLLMChecker())();
4136
4429
  const systemInfo = await checker.getSystemInfo();
4430
+ let ollamaClient = null;
4431
+ const getOllamaClient = () => {
4432
+ if (!ollamaClient) {
4433
+ const OllamaClient = require('../src/ollama/client');
4434
+ ollamaClient = new OllamaClient();
4435
+ }
4436
+ return ollamaClient;
4437
+ };
4137
4438
  const routingPreference = resolveRoutingPolicyPreference({
4138
4439
  policyOption: options.policy,
4139
4440
  calibratedOption: options.calibrated
@@ -4142,18 +4443,18 @@ program
4142
4443
 
4143
4444
  // Get available models or use provided ones
4144
4445
  let candidateModels = options.models;
4446
+ let localModels = [];
4145
4447
 
4146
4448
  if (!candidateModels) {
4147
- spinner.text = '📋 Getting available Ollama models...';
4148
- const OllamaClient = require('../src/ollama/client');
4149
- const client = new OllamaClient();
4449
+ spinner.text = 'Getting available Ollama models...';
4450
+ const client = getOllamaClient();
4150
4451
 
4151
4452
  try {
4152
- const models = await client.getLocalModels();
4153
- candidateModels = models.map(m => m.name || m.model);
4453
+ localModels = await client.getLocalModels();
4454
+ candidateModels = localModels.map(m => m.name || m.model);
4154
4455
 
4155
4456
  if (candidateModels.length === 0) {
4156
- spinner.fail('No Ollama models found');
4457
+ spinner.fail('No Ollama models found');
4157
4458
  console.log('\nInstall some models first:');
4158
4459
  console.log(' ollama pull llama2:7b');
4159
4460
  console.log(' ollama pull mistral:7b');
@@ -4161,7 +4462,7 @@ program
4161
4462
  return;
4162
4463
  }
4163
4464
  } catch (error) {
4164
- spinner.fail('Failed to get Ollama models');
4465
+ spinner.fail('Failed to get Ollama models');
4165
4466
  console.error(chalk.red('Error:'), error.message);
4166
4467
  return;
4167
4468
  }
@@ -4202,28 +4503,62 @@ program
4202
4503
  )}) are not installed locally. Falling back to AI selector.`
4203
4504
  );
4204
4505
  }
4205
- result = await aiSelector.selectBestModel(candidateModels, systemSpecs, taskHint);
4506
+ result = await aiSelector.selectBestModel(candidateModels, systemSpecs, taskHint, { silent: true });
4206
4507
  }
4207
4508
 
4208
4509
  spinner.succeed(`Selected ${chalk.green.bold(result.bestModel)} (${result.method}, ${Math.round(result.confidence * 100)}% confidence)`);
4510
+
4511
+ let benchmark = null;
4512
+ if (options.benchmark) {
4513
+ const benchmarkSpinner = ora(`Measuring local throughput for ${result.bestModel}...`).start();
4514
+ try {
4515
+ benchmark = await getOllamaClient().testModelPerformance(
4516
+ result.bestModel,
4517
+ 'Write one concise sentence about local LLM performance.'
4518
+ );
4519
+
4520
+ if (benchmark.success) {
4521
+ benchmarkSpinner.succeed(`Measured ${formatAiRunNumber(benchmark.tokensPerSecond)} tokens/sec`);
4522
+ } else {
4523
+ benchmarkSpinner.stop();
4524
+ console.log(chalk.yellow(`Benchmark unavailable: ${benchmark.error || 'unknown error'}`));
4525
+ }
4526
+ } catch (error) {
4527
+ benchmark = { success: false, error: error.message };
4528
+ benchmarkSpinner.stop();
4529
+ console.log(chalk.yellow(`Benchmark unavailable: ${error.message}`));
4530
+ }
4531
+ }
4532
+
4209
4533
  displayCalibratedRoutingDecision('ai-run', calibratedPolicy, routeDecision, routingPreference.warnings);
4534
+ displayAiRunReference({
4535
+ result,
4536
+ systemInfo,
4537
+ taskHint,
4538
+ candidateModels,
4539
+ localModels,
4540
+ aiSelector,
4541
+ benchmark
4542
+ });
4543
+
4544
+ if (options.referenceOnly) {
4545
+ console.log(chalk.gray('\nReference-only mode: not launching Ollama.'));
4546
+ return;
4547
+ }
4210
4548
 
4211
- // Execute the selected model
4212
- console.log(chalk.magenta.bold(`\nLaunching ${result.bestModel}...`));
4213
- console.log(chalk.gray(`Tip: Type ${chalk.cyan('/bye')} to exit the chat when finished\n`));
4214
-
4215
- const args = ['run', result.bestModel];
4216
4549
  if (options.prompt) {
4217
- args.push(options.prompt);
4550
+ console.log(chalk.cyan(`\n>>> ${options.prompt}`));
4551
+ await runAiRunChatTurn(
4552
+ getOllamaClient(),
4553
+ result.bestModel,
4554
+ [{ role: 'user', content: options.prompt }]
4555
+ );
4556
+ return;
4218
4557
  }
4219
-
4220
- const ollamaProcess = spawn('ollama', args, {
4221
- stdio: 'inherit'
4222
- });
4223
-
4224
- ollamaProcess.on('error', (error) => {
4225
- console.error(chalk.red('Failed to launch Ollama:'), error.message);
4226
- });
4558
+
4559
+ console.log(chalk.magenta.bold(`\nStarting chat with ${result.bestModel}...`));
4560
+ console.log(chalk.gray(`Tip: Type ${chalk.cyan('/bye')} to exit the chat when finished\n`));
4561
+ await runAiRunInteractiveChat(getOllamaClient(), result.bestModel);
4227
4562
 
4228
4563
  } catch (error) {
4229
4564
  console.error(chalk.red('❌ AI-powered execution failed:'), error.message);
@@ -5058,7 +5393,7 @@ program
5058
5393
  console.log(` ${detector.getHardwareDescription()}`);
5059
5394
  console.log(` Tier: ${chalk.cyan(detector.getHardwareTier().replace('_', ' ').toUpperCase())}`);
5060
5395
  console.log(` Max model size: ${chalk.green(detector.getMaxModelSize() + 'GB')}`);
5061
- console.log(` Best backend: ${chalk.cyan(hardware.summary.bestBackend)}`);
5396
+ console.log(` Best backend: ${chalk.cyan(getBackendLabelForDisplay(hardware))}`);
5062
5397
  if (hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend) {
5063
5398
  console.log(` Runtime assist: ${chalk.green(hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend)}`);
5064
5399
  }
@@ -5106,9 +5441,23 @@ program
5106
5441
 
5107
5442
  if (backend === 'rocm' && info.info) {
5108
5443
  console.log(` ROCm: ${info.info.rocmVersion}`);
5109
- console.log(` Total VRAM: ${info.info.totalVRAM}GB`);
5444
+ const integratedOnly = (info.info.gpus || []).length > 0 &&
5445
+ (info.info.gpus || []).every((gpu) => gpu.type === 'integrated');
5446
+ if (integratedOnly) {
5447
+ console.log(` Total dedicated aperture: ${info.info.totalVRAM || 0}GB`);
5448
+ console.log(` Total shared memory: ${info.info.totalSharedMemory || 0}GB`);
5449
+ } else {
5450
+ console.log(` Total VRAM: ${info.info.totalVRAM}GB`);
5451
+ }
5110
5452
  for (const gpu of info.info.gpus) {
5111
- console.log(` ${gpu.name}: ${gpu.memory.total}GB`);
5453
+ if (gpu.type === 'integrated') {
5454
+ const dedicated = gpu.memory?.dedicated || 0;
5455
+ const shared = gpu.memory?.shared || gpu.memory?.total || 0;
5456
+ const dedicatedLabel = dedicated > 0 ? `, ${dedicated}GB aperture` : '';
5457
+ console.log(` ${gpu.name}: ${shared}GB shared${dedicatedLabel} (Integrated)`);
5458
+ } else {
5459
+ console.log(` ${gpu.name}: ${gpu.memory.total}GB`);
5460
+ }
5112
5461
  }
5113
5462
  }
5114
5463
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.5.11",
3
+ "version": "3.5.13",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
6
  "llm-checker": "bin/cli.js",
@@ -36,6 +36,7 @@
36
36
  "list-models": "node bin/enhanced_cli.js list-models",
37
37
  "ai-check": "node bin/enhanced_cli.js ai-check",
38
38
  "ai-run": "node bin/enhanced_cli.js ai-run",
39
+ "sync:seed": "node bin/enhanced_cli.js sync --force --quiet && node scripts/update-seed-db.js",
39
40
  "benchmark": "cd ml-model && python python/benchmark_collector.py",
40
41
  "train-ai": "cd ml-model && python python/train_model.py",
41
42
  "postinstall": "echo 'LLM Checker installed. Run: llm-checker hw-detect'"