llm-checker 3.5.1 → 3.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,34 +1,28 @@
1
- <p align="center">
2
- <img src="https://raw.githubusercontent.com/Pavelevich/llm-checker/main/assets/llm-checker-logo.gif" alt="LLM Checker Animated Logo" width="760">
3
- </p>
4
-
5
- <h1 align="center">LLM Checker</h1>
6
- <p align="center"><strong>Intelligent Ollama Model Selector</strong></p>
7
- <p align="center">
8
- AI-powered CLI that analyzes your hardware and recommends optimal LLM models<br/>
9
- Deterministic scoring across <b>200+ dynamic models</b> (35+ curated fallback) with hardware-calibrated memory estimation
10
- </p>
11
-
12
- <p align="center">
13
- <a href="https://www.npmjs.com/package/llm-checker"><img src="https://img.shields.io/npm/v/llm-checker?style=flat-square&color=0066FF" alt="npm version"></a>
14
- <a href="https://www.npmjs.com/package/llm-checker"><img src="https://img.shields.io/npm/dm/llm-checker?style=flat-square&color=0066FF" alt="npm downloads"></a>
15
- <a href="LICENSE"><img src="https://img.shields.io/badge/License-NPDL--1.0-CC3300?style=flat-square" alt="License"></a>
16
- <a href="https://discord.gg/mnmYrA7T"><img src="https://img.shields.io/discord/1457032977849520374?style=flat-square&color=0066FF&label=Discord" alt="Discord"></a>
17
- <a href="https://nodejs.org/"><img src="https://img.shields.io/badge/node-%3E%3D16-0066FF?style=flat-square" alt="Node.js"></a>
18
- </p>
19
-
20
- <p align="center">
21
- <a href="#start-here-2-minutes">Start Here</a> &bull;
22
- <a href="#installation">Installation</a> &bull;
23
- <a href="#quick-start">Quick Start</a> &bull;
24
- <a href="#calibration-quick-start-10-minutes">Calibration Quick Start</a> &bull;
25
- <a href="https://github.com/Pavelevich/llm-checker/tree/main/docs">Docs</a> &bull;
26
- <a href="#claude-code-mcp">Claude MCP</a> &bull;
27
- <a href="#commands">Commands</a> &bull;
28
- <a href="#scoring-system">Scoring</a> &bull;
29
- <a href="#supported-hardware">Hardware</a> &bull;
30
- <a href="https://discord.gg/mnmYrA7T"><img src="https://cdn.simpleicons.org/discord/0066FF" alt="Discord" width="14" height="14"> Discord</a>
31
- </p>
1
+ # LLM Checker
2
+
3
+ ![LLM Checker Animated Logo](https://raw.githubusercontent.com/Pavelevich/llm-checker/main/assets/llm-checker-logo.gif)
4
+
5
+ **Intelligent Ollama Model Selector**
6
+
7
+ AI-powered CLI that analyzes your hardware and recommends optimal LLM models.
8
+ Deterministic scoring across **200+ dynamic models** (35+ curated fallback) with hardware-calibrated memory estimation.
9
+
10
+ [![npm version](https://img.shields.io/npm/v/llm-checker?style=flat-square&color=0066FF)](https://www.npmjs.com/package/llm-checker)
11
+ [![npm downloads](https://img.shields.io/npm/dm/llm-checker?style=flat-square&color=0066FF)](https://www.npmjs.com/package/llm-checker)
12
+ [![License](https://img.shields.io/badge/License-NPDL--1.0-CC3300?style=flat-square)](LICENSE)
13
+ [![Discord](https://img.shields.io/discord/1457032977849520374?style=flat-square&color=0066FF&label=Discord)](https://discord.gg/mnmYrA7T)
14
+ [![Node.js](https://img.shields.io/badge/node-%3E%3D16-0066FF?style=flat-square)](https://nodejs.org/)
15
+
16
+ [Start Here](#start-here-2-minutes)
17
+ [Installation](#installation)
18
+ [Quick Start](#quick-start) •
19
+ [Calibration Quick Start](#calibration-quick-start-10-minutes) •
20
+ [Docs](https://github.com/Pavelevich/llm-checker/tree/main/docs)
21
+ [Claude MCP](#claude-code-mcp)
22
+ [Commands](#commands)
23
+ [Scoring](#scoring-system)
24
+ [Hardware](#supported-hardware)
25
+ [Discord](https://discord.gg/mnmYrA7T)
32
26
 
33
27
  ---
34
28
 
@@ -355,6 +349,10 @@ llm-checker search "qwen coder" --json
355
349
  | `installed` | Rank your installed Ollama models by compatibility |
356
350
  | `ollama-plan` | Compute safe Ollama runtime env vars (`NUM_CTX`, `NUM_PARALLEL`, `MAX_LOADED_MODELS`) for selected local models |
357
351
  | `mcp-setup` | Print/apply Claude MCP setup command and config snippet (`--apply`, `--json`, `--npx`) |
352
+ | `gpu-plan` | Multi-GPU placement advisor with single/pooled model-size envelopes |
353
+ | `verify-context` | Verify practical context-window limits for a local model |
354
+ | `amd-guard` | AMD/Windows reliability guard with mitigation hints |
355
+ | `toolcheck` | Test tool-calling compatibility for local models |
358
356
 
359
357
  ### Advanced Commands (require `sql.js`)
360
358
 
@@ -873,11 +871,9 @@ See [LICENSE](https://github.com/Pavelevich/llm-checker/blob/main/LICENSE) for f
873
871
 
874
872
  ---
875
873
 
876
- <p align="center">
877
- <a href="https://github.com/Pavelevich/llm-checker">GitHub</a> &bull;
878
- <a href="https://github.com/Pavelevich/llm-checker/releases">Releases</a> &bull;
879
- <a href="https://www.npmjs.com/package/llm-checker">npm</a> &bull;
880
- <a href="https://github.com/users/Pavelevich/packages/npm/package/llm-checker">GitHub Packages</a> &bull;
881
- <a href="https://github.com/Pavelevich/llm-checker/issues">Issues</a> &bull;
882
- <a href="https://discord.gg/mnmYrA7T">Discord</a>
883
- </p>
874
+ [GitHub](https://github.com/Pavelevich/llm-checker)
875
+ [Releases](https://github.com/Pavelevich/llm-checker/releases)
876
+ [npm](https://www.npmjs.com/package/llm-checker)
877
+ [GitHub Packages](https://github.com/users/Pavelevich/packages/npm/package/llm-checker)
878
+ [Issues](https://github.com/Pavelevich/llm-checker/issues)
879
+ [Discord](https://discord.gg/mnmYrA7T)
@@ -4267,6 +4267,421 @@ program
4267
4267
  }
4268
4268
  });
4269
4269
 
4270
+ program
4271
+ .command('gpu-plan')
4272
+ .description('Multi-GPU placement advisor with safe model-size envelopes')
4273
+ .option('--model-size <gb>', 'Validate a target model size (e.g. 14 or 14GB)')
4274
+ .option('-j, --json', 'Output as JSON')
4275
+ .action(async (options) => {
4276
+ if (!options.json) showAsciiArt('hw-detect');
4277
+ const spinner = options.json ? null : ora('Building GPU placement plan...').start();
4278
+
4279
+ try {
4280
+ const UnifiedDetector = require('../src/hardware/unified-detector');
4281
+ const { buildGpuPlan } = require('../src/commands/roadmap-tools');
4282
+
4283
+ const detector = new UnifiedDetector();
4284
+ const hardware = await detector.detect();
4285
+
4286
+ const modelSizeGB = options.modelSize !== undefined ? parseFloat(options.modelSize) : null;
4287
+ if (options.modelSize !== undefined && (!Number.isFinite(modelSizeGB) || modelSizeGB <= 0)) {
4288
+ throw new Error('Invalid --model-size value. Use a positive number (GB).');
4289
+ }
4290
+
4291
+ const plan = buildGpuPlan(hardware, { modelSizeGB });
4292
+
4293
+ if (options.json) {
4294
+ console.log(JSON.stringify(plan, null, 2));
4295
+ return;
4296
+ }
4297
+
4298
+ if (spinner) spinner.succeed('GPU placement plan ready');
4299
+
4300
+ console.log(chalk.blue.bold('\n=== Multi-GPU Placement Plan ==='));
4301
+ console.log(`Backend: ${chalk.cyan((plan.backend || 'cpu').toUpperCase())}`);
4302
+ console.log(`Detected GPUs: ${chalk.white(plan.gpuCount)}`);
4303
+ console.log(`Total VRAM/Unified: ${chalk.green(`${plan.totalVRAM}GB`)}`);
4304
+ console.log(`Single-GPU safe envelope: ${chalk.yellow(`${plan.singleMaxModelGB}GB`)}`);
4305
+ console.log(`Pooled safe envelope: ${chalk.yellow(`${plan.pooledMaxModelGB}GB`)}`);
4306
+ console.log(`Strategy: ${chalk.cyan(plan.strategy)} (${plan.strategyReason})`);
4307
+
4308
+ if (plan.gpus.length > 0) {
4309
+ const rows = [
4310
+ ['#', 'Backend', 'GPU', 'VRAM/Unified', 'Speed Coef']
4311
+ ];
4312
+ plan.gpus.forEach((gpu, index) => {
4313
+ rows.push([
4314
+ String(index + 1),
4315
+ gpu.backend.toUpperCase(),
4316
+ gpu.name,
4317
+ `${gpu.vramGB}GB`,
4318
+ String(gpu.speedCoefficient || 0)
4319
+ ]);
4320
+ });
4321
+ console.log('\n' + table(rows));
4322
+ }
4323
+
4324
+ if (plan.fit) {
4325
+ const fit = plan.fit;
4326
+ const status = fit.fitsSingleGPU || fit.fitsPooled ? chalk.green('[OK]') : chalk.red('[FAIL]');
4327
+ console.log(`${status} Target model ${fit.modelSizeGB}GB`);
4328
+ console.log(` Fits single GPU: ${fit.fitsSingleGPU ? 'yes' : 'no'}`);
4329
+ console.log(` Fits pooled setup: ${fit.fitsPooled ? 'yes' : 'no'}`);
4330
+ }
4331
+
4332
+ console.log(chalk.blue.bold('\nRecommended env:'));
4333
+ for (const [key, value] of Object.entries(plan.env || {})) {
4334
+ console.log(chalk.cyan(` export ${key}="${value}"`));
4335
+ }
4336
+
4337
+ if (Array.isArray(plan.recommendations) && plan.recommendations.length > 0) {
4338
+ console.log(chalk.blue.bold('\nRecommendations:'));
4339
+ for (const item of plan.recommendations) {
4340
+ console.log(chalk.gray(` - ${item}`));
4341
+ }
4342
+ }
4343
+
4344
+ console.log('');
4345
+ } catch (error) {
4346
+ if (spinner) spinner.fail('GPU plan failed');
4347
+ console.error(chalk.red('Error:'), error.message);
4348
+ if (process.env.DEBUG) console.error(error.stack);
4349
+ process.exit(1);
4350
+ }
4351
+ });
4352
+
4353
+ program
4354
+ .command('verify-context')
4355
+ .description('Verify practical context window limits for a local Ollama model')
4356
+ .option('-m, --model <name>', 'Model to verify (default: first installed model)')
4357
+ .option('-t, --target <tokens>', 'Target context window tokens to validate', '8192')
4358
+ .option('-j, --json', 'Output as JSON')
4359
+ .action(async (options) => {
4360
+ if (!options.json) showAsciiArt('ollama');
4361
+ const spinner = options.json ? null : ora('Verifying context window...').start();
4362
+
4363
+ try {
4364
+ const OllamaClient = require('../src/ollama/client');
4365
+ const UnifiedDetector = require('../src/hardware/unified-detector');
4366
+ const {
4367
+ buildContextVerification,
4368
+ extractContextWindow
4369
+ } = require('../src/commands/roadmap-tools');
4370
+
4371
+ const targetTokens = parseInt(options.target, 10);
4372
+ if (!Number.isFinite(targetTokens) || targetTokens <= 0) {
4373
+ throw new Error('Invalid --target value. Use a positive integer.');
4374
+ }
4375
+
4376
+ const ollama = new OllamaClient();
4377
+ const availability = await ollama.checkOllamaAvailability();
4378
+ if (!availability.available) {
4379
+ throw new Error(availability.error || 'Ollama is not available');
4380
+ }
4381
+
4382
+ const installed = await ollama.getLocalModels();
4383
+ if (!installed.length) {
4384
+ throw new Error('No local Ollama models installed. Pull one model first.');
4385
+ }
4386
+
4387
+ let selected = installed[0];
4388
+ if (options.model) {
4389
+ const needle = options.model.toLowerCase();
4390
+ selected = installed.find((m) =>
4391
+ m.name.toLowerCase() === needle ||
4392
+ m.name.toLowerCase().startsWith(`${needle}:`) ||
4393
+ m.name.toLowerCase().includes(needle)
4394
+ );
4395
+ if (!selected) {
4396
+ throw new Error(`Model "${options.model}" not found in local Ollama models.`);
4397
+ }
4398
+ }
4399
+
4400
+ let showPayload = null;
4401
+ try {
4402
+ showPayload = await ollama.showModel(selected.name);
4403
+ } catch (err) {
4404
+ // Continue even if show metadata fails; memory verification still works.
4405
+ if (!options.json && spinner) {
4406
+ spinner.info(`Metadata probe warning: ${err.message}`);
4407
+ spinner.start('Continuing with hardware-based estimate...');
4408
+ }
4409
+ }
4410
+
4411
+ const detector = new UnifiedDetector();
4412
+ const hardware = await detector.detect();
4413
+ const declaredContext = extractContextWindow(showPayload);
4414
+
4415
+ const verification = buildContextVerification({
4416
+ modelName: selected.name,
4417
+ targetTokens,
4418
+ declaredContext,
4419
+ modelSizeGB: selected.fileSizeGB || 7,
4420
+ hardware
4421
+ });
4422
+
4423
+ const output = {
4424
+ model: selected.name,
4425
+ targetTokens,
4426
+ declaredContext,
4427
+ modelSizeGB: selected.fileSizeGB || null,
4428
+ verification
4429
+ };
4430
+
4431
+ if (options.json) {
4432
+ console.log(JSON.stringify(output, null, 2));
4433
+ return;
4434
+ }
4435
+
4436
+ if (spinner) spinner.succeed('Context verification complete');
4437
+
4438
+ const statusColor = verification.status === 'pass'
4439
+ ? chalk.green
4440
+ : verification.status === 'warn'
4441
+ ? chalk.yellow
4442
+ : chalk.red;
4443
+
4444
+ console.log(chalk.blue.bold('\n=== Context Verification ==='));
4445
+ console.log(`Model: ${chalk.white.bold(selected.name)}`);
4446
+ console.log(`Target: ${chalk.cyan(`${targetTokens} tokens`)}`);
4447
+ console.log(`Declared context: ${chalk.cyan(declaredContext ? `${declaredContext} tokens` : 'not exposed')}`);
4448
+ console.log(`Estimated memory-safe context: ${chalk.cyan(`${verification.memoryLimitedContext} tokens`)}`);
4449
+ console.log(`Recommended runtime context: ${chalk.cyan(`${verification.recommendedContext} tokens`)}`);
4450
+ console.log(statusColor(`Status: ${verification.status.toUpperCase()}`));
4451
+
4452
+ console.log(chalk.blue.bold('\nChecks:'));
4453
+ for (const check of verification.checks) {
4454
+ const icon = check.status === 'pass' ? chalk.green('[OK]') :
4455
+ check.status === 'warn' ? chalk.yellow('[!]') : chalk.red('[FAIL]');
4456
+ console.log(` ${icon} ${check.message}`);
4457
+ }
4458
+
4459
+ if (verification.suggestions.length > 0) {
4460
+ console.log(chalk.blue.bold('\nSuggestions:'));
4461
+ for (const suggestion of verification.suggestions) {
4462
+ console.log(chalk.gray(` - ${suggestion}`));
4463
+ }
4464
+ }
4465
+
4466
+ console.log(chalk.cyan(`\nSuggested run: ollama run ${selected.name}`));
4467
+ console.log(chalk.cyan(`# with context budget: --ctx-size ${verification.recommendedContext}`));
4468
+ console.log('');
4469
+ } catch (error) {
4470
+ if (spinner) spinner.fail('Context verification failed');
4471
+ console.error(chalk.red('Error:'), error.message);
4472
+ if (process.env.DEBUG) console.error(error.stack);
4473
+ process.exit(1);
4474
+ }
4475
+ });
4476
+
4477
+ program
4478
+ .command('amd-guard')
4479
+ .description('AMD/Windows reliability guard with actionable mitigation hints')
4480
+ .option('-j, --json', 'Output as JSON')
4481
+ .action(async (options) => {
4482
+ if (!options.json) showAsciiArt('hw-detect');
4483
+ const spinner = options.json ? null : ora('Running AMD reliability guard...').start();
4484
+
4485
+ try {
4486
+ const UnifiedDetector = require('../src/hardware/unified-detector');
4487
+ const ROCmDetector = require('../src/hardware/backends/rocm-detector');
4488
+ const { buildAmdGuard } = require('../src/commands/roadmap-tools');
4489
+
4490
+ const detector = new UnifiedDetector();
4491
+ const hardware = await detector.detect();
4492
+
4493
+ const rocmDetector = new ROCmDetector();
4494
+ const rocmAvailable = rocmDetector.checkAvailability();
4495
+ const report = buildAmdGuard({
4496
+ platform: process.platform,
4497
+ hardware,
4498
+ rocmAvailable,
4499
+ rocmDetectionMethod: rocmDetector.detectionMethod
4500
+ });
4501
+
4502
+ if (options.json) {
4503
+ console.log(JSON.stringify(report, null, 2));
4504
+ return;
4505
+ }
4506
+
4507
+ if (spinner) spinner.succeed('AMD guard report ready');
4508
+
4509
+ const statusColor = report.status === 'pass'
4510
+ ? chalk.green
4511
+ : report.status === 'warn'
4512
+ ? chalk.yellow
4513
+ : chalk.red;
4514
+
4515
+ console.log(chalk.blue.bold('\n=== AMD Reliability Guard ==='));
4516
+ console.log(`Platform: ${chalk.white(process.platform)}`);
4517
+ console.log(`Primary backend: ${chalk.cyan((report.primaryBackend || 'cpu').toUpperCase())}`);
4518
+ console.log(`ROCm available: ${chalk.cyan(report.rocmAvailable ? 'yes' : 'no')}`);
4519
+ console.log(`Detection method: ${chalk.cyan(report.rocmDetectionMethod || 'none')}`);
4520
+ console.log(statusColor(`Status: ${report.status.toUpperCase()}`));
4521
+
4522
+ console.log(chalk.blue.bold('\nChecks:'));
4523
+ for (const check of report.checks) {
4524
+ const icon = check.status === 'pass' ? chalk.green('[OK]') :
4525
+ check.status === 'warn' ? chalk.yellow('[!]') : chalk.red('[FAIL]');
4526
+ console.log(` ${icon} ${check.message}`);
4527
+ }
4528
+
4529
+ if (report.recommendations.length > 0) {
4530
+ console.log(chalk.blue.bold('\nRecommendations:'));
4531
+ for (const recommendation of report.recommendations) {
4532
+ console.log(chalk.gray(` - ${recommendation}`));
4533
+ }
4534
+ }
4535
+ console.log('');
4536
+ } catch (error) {
4537
+ if (spinner) spinner.fail('AMD guard failed');
4538
+ console.error(chalk.red('Error:'), error.message);
4539
+ if (process.env.DEBUG) console.error(error.stack);
4540
+ process.exit(1);
4541
+ }
4542
+ });
4543
+
4544
+ program
4545
+ .command('toolcheck')
4546
+ .description('Tool-calling compatibility tester for local Ollama models')
4547
+ .option('-m, --model <name>', 'Test a specific model')
4548
+ .option('--all', 'Test all installed models')
4549
+ .option('--timeout <ms>', 'Per-model timeout in milliseconds', '45000')
4550
+ .option('-j, --json', 'Output as JSON')
4551
+ .action(async (options) => {
4552
+ if (!options.json) showAsciiArt('ollama');
4553
+ const spinner = options.json ? null : ora('Running tool-calling compatibility checks...').start();
4554
+
4555
+ try {
4556
+ const OllamaClient = require('../src/ollama/client');
4557
+ const { evaluateToolCallingResult } = require('../src/commands/roadmap-tools');
4558
+
4559
+ const timeoutMs = parseInt(options.timeout, 10);
4560
+ if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
4561
+ throw new Error('Invalid --timeout value. Use a positive integer in ms.');
4562
+ }
4563
+
4564
+ const ollama = new OllamaClient();
4565
+ const availability = await ollama.checkOllamaAvailability();
4566
+ if (!availability.available) {
4567
+ throw new Error(availability.error || 'Ollama is not available');
4568
+ }
4569
+
4570
+ const installed = await ollama.getLocalModels();
4571
+ if (!installed.length) {
4572
+ throw new Error('No local Ollama models installed.');
4573
+ }
4574
+
4575
+ let targets = installed;
4576
+ if (options.model) {
4577
+ const needle = options.model.toLowerCase();
4578
+ targets = installed.filter((m) =>
4579
+ m.name.toLowerCase() === needle ||
4580
+ m.name.toLowerCase().startsWith(`${needle}:`) ||
4581
+ m.name.toLowerCase().includes(needle)
4582
+ );
4583
+ if (!targets.length) {
4584
+ throw new Error(`Model "${options.model}" not found in local Ollama models.`);
4585
+ }
4586
+ } else if (!options.all) {
4587
+ targets = installed.slice(0, 1);
4588
+ }
4589
+
4590
+ const toolSpec = [
4591
+ {
4592
+ type: 'function',
4593
+ function: {
4594
+ name: 'add_numbers',
4595
+ description: 'Add two integers and return the sum',
4596
+ parameters: {
4597
+ type: 'object',
4598
+ properties: {
4599
+ a: { type: 'integer' },
4600
+ b: { type: 'integer' }
4601
+ },
4602
+ required: ['a', 'b']
4603
+ }
4604
+ }
4605
+ }
4606
+ ];
4607
+
4608
+ const results = [];
4609
+ for (const model of targets) {
4610
+ if (spinner) spinner.text = `Testing ${model.name}...`;
4611
+
4612
+ let payload = null;
4613
+ let err = null;
4614
+ try {
4615
+ payload = await ollama.chat(
4616
+ model.name,
4617
+ [{ role: 'user', content: 'Use the add_numbers tool with a=2 and b=3. Call the tool directly.' }],
4618
+ {
4619
+ tools: toolSpec,
4620
+ timeoutMs,
4621
+ generationOptions: {
4622
+ temperature: 0,
4623
+ num_predict: 64
4624
+ }
4625
+ }
4626
+ );
4627
+ } catch (error) {
4628
+ err = error;
4629
+ }
4630
+
4631
+ const evaluation = evaluateToolCallingResult(payload, err);
4632
+ results.push({
4633
+ model: model.name,
4634
+ status: evaluation.status,
4635
+ score: evaluation.score,
4636
+ reason: evaluation.reason,
4637
+ toolCalls: evaluation.toolCalls
4638
+ });
4639
+ }
4640
+
4641
+ if (options.json) {
4642
+ console.log(JSON.stringify({
4643
+ testedModels: results.length,
4644
+ results
4645
+ }, null, 2));
4646
+ return;
4647
+ }
4648
+
4649
+ if (spinner) spinner.succeed(`Toolcheck completed (${results.length} model${results.length > 1 ? 's' : ''})`);
4650
+
4651
+ const rows = [['Model', 'Status', 'Score', 'Reason']];
4652
+ for (const result of results) {
4653
+ const statusLabel = result.status === 'supported'
4654
+ ? chalk.green('SUPPORTED')
4655
+ : result.status === 'partial'
4656
+ ? chalk.yellow('PARTIAL')
4657
+ : chalk.red('UNSUPPORTED');
4658
+ rows.push([
4659
+ result.model,
4660
+ statusLabel,
4661
+ String(result.score),
4662
+ result.reason
4663
+ ]);
4664
+ }
4665
+
4666
+ console.log('\n' + table(rows));
4667
+
4668
+ const supported = results.filter((r) => r.status === 'supported').length;
4669
+ const partial = results.filter((r) => r.status === 'partial').length;
4670
+ const unsupported = results.filter((r) => r.status === 'unsupported').length;
4671
+
4672
+ console.log(chalk.blue.bold('Summary:'));
4673
+ console.log(chalk.green(` Supported: ${supported}`));
4674
+ console.log(chalk.yellow(` Partial: ${partial}`));
4675
+ console.log(chalk.red(` Unsupported: ${unsupported}`));
4676
+ console.log('');
4677
+ } catch (error) {
4678
+ if (spinner) spinner.fail('Toolcheck failed');
4679
+ console.error(chalk.red('Error:'), error.message);
4680
+ if (process.env.DEBUG) console.error(error.stack);
4681
+ process.exit(1);
4682
+ }
4683
+ });
4684
+
4270
4685
  program
4271
4686
  .command('hw-detect')
4272
4687
  .description('Detect and display detailed hardware capabilities')
@@ -4337,6 +4752,17 @@ program
4337
4752
  console.log(` ${gpu.name}: ${gpu.memory.total}GB`);
4338
4753
  }
4339
4754
  }
4755
+
4756
+ if (backend === 'generic' && info.info) {
4757
+ console.log(` Source: ${info.info.source || 'systeminformation'}`);
4758
+ console.log(` Total dedicated VRAM: ${info.info.totalVRAM || 0}GB`);
4759
+ for (const gpu of info.info.gpus || []) {
4760
+ const memory = gpu.memory?.total || 0;
4761
+ const typeLabel = gpu.type === 'integrated' ? 'Integrated' : 'Discrete';
4762
+ const memoryLabel = memory > 0 ? `${memory}GB` : 'shared/unknown';
4763
+ console.log(` ${gpu.name}: ${memoryLabel} (${typeLabel})`);
4764
+ }
4765
+ }
4340
4766
  }
4341
4767
 
4342
4768
  console.log(chalk.gray(`\nFingerprint: ${hardware.fingerprint}`));
@@ -163,6 +163,10 @@ const ALLOWED_CLI_COMMANDS = new Set([
163
163
  "audit",
164
164
  "calibrate",
165
165
  "check",
166
+ "gpu-plan",
167
+ "verify-context",
168
+ "amd-guard",
169
+ "toolcheck",
166
170
  "ollama",
167
171
  "installed",
168
172
  "ollama-plan",
@@ -274,6 +278,62 @@ server.tool(
274
278
  }
275
279
  );
276
280
 
281
+ server.tool(
282
+ "gpu_plan",
283
+ "Multi-GPU placement advisor that returns safe single/pooled model-size envelopes and recommended Ollama env settings",
284
+ {
285
+ model_size: z.number().optional().describe("Optional target model size in GB to validate"),
286
+ },
287
+ async ({ model_size }) => {
288
+ const args = ["gpu-plan"];
289
+ if (model_size) args.push("--model-size", String(model_size));
290
+ const result = await run(args, 60000);
291
+ return { content: [{ type: "text", text: result }] };
292
+ }
293
+ );
294
+
295
+ server.tool(
296
+ "verify_context",
297
+ "Verify practical context-window limits for a local Ollama model using model metadata and hardware memory budget",
298
+ {
299
+ model: z.string().optional().describe("Model name to validate (default: first installed model)"),
300
+ target_tokens: z.number().optional().describe("Target context window tokens (default: 8192)"),
301
+ },
302
+ async ({ model, target_tokens }) => {
303
+ const args = ["verify-context"];
304
+ if (model) args.push("--model", model);
305
+ if (target_tokens) args.push("--target", String(target_tokens));
306
+ const result = await run(args, 90000);
307
+ return { content: [{ type: "text", text: result }] };
308
+ }
309
+ );
310
+
311
+ server.tool(
312
+ "amd_guard",
313
+ "Run AMD/Windows reliability guard checks and return mitigation hints for unstable GPU paths",
314
+ {},
315
+ async () => {
316
+ const result = await run(["amd-guard"], 60000);
317
+ return { content: [{ type: "text", text: result }] };
318
+ }
319
+ );
320
+
321
+ server.tool(
322
+ "toolcheck",
323
+ "Test tool-calling compatibility of local Ollama models and classify support as supported/partial/unsupported",
324
+ {
325
+ model: z.string().optional().describe("Optional model to test"),
326
+ all: z.boolean().optional().describe("Test all installed models instead of only one"),
327
+ },
328
+ async ({ model, all }) => {
329
+ const args = ["toolcheck"];
330
+ if (model) args.push("--model", model);
331
+ if (all) args.push("--all");
332
+ const result = await run(args, 180000);
333
+ return { content: [{ type: "text", text: result }] };
334
+ }
335
+ );
336
+
277
337
  server.tool(
278
338
  "ollama_plan",
279
339
  "Build an Ollama capacity plan for selected local models and return recommended context/parallel/memory settings",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.5.1",
3
+ "version": "3.5.2",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
6
  "llm-checker": "bin/cli.js",
@@ -10,6 +10,7 @@
10
10
  "main": "src/index.js",
11
11
  "scripts": {
12
12
  "test": "node tests/run-all-tests.js",
13
+ "test:roadmap": "node tests/roadmap-tools.test.js",
13
14
  "test:gpu": "node tests/amd-gpu-detection.test.js",
14
15
  "test:platform": "node tests/hardware-simulation-tests.js",
15
16
  "test:ui": "node tests/ui-cli-smoke.test.js",
@@ -26,6 +27,10 @@
26
27
  "dev": "node bin/enhanced_cli.js",
27
28
  "start": "node bin/enhanced_cli.js",
28
29
  "check": "node bin/enhanced_cli.js check",
30
+ "gpu-plan": "node bin/enhanced_cli.js gpu-plan",
31
+ "verify-context": "node bin/enhanced_cli.js verify-context",
32
+ "amd-guard": "node bin/enhanced_cli.js amd-guard",
33
+ "toolcheck": "node bin/enhanced_cli.js toolcheck",
29
34
  "recommend": "node bin/enhanced_cli.js recommend",
30
35
  "ollama": "node bin/enhanced_cli.js ollama",
31
36
  "list-models": "node bin/enhanced_cli.js list-models",
@@ -90,7 +95,6 @@
90
95
  "bugs": {
91
96
  "url": "https://github.com/Pavelevich/llm-checker/issues"
92
97
  },
93
- "readmeFilename": "README.md",
94
98
  "engines": {
95
99
  "node": ">=16.0.0",
96
100
  "npm": ">=8.0.0"