llm-checker 3.5.15 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -573,6 +573,19 @@ This makes integrated GPUs visible even when the selected runtime backend is sti
573
573
  llm-checker recommend
574
574
  ```
575
575
 
576
+ As of the scoring unification (#96), `check`, `recommend`, and `smart-recommend`
577
+ all derive their ranking from **one canonical scoring core**
578
+ (`DeterministicModelSelector` via `src/models/scoring-core.js`), so identical
579
+ `(model, hardware)` inputs score identically across all three and the
580
+ high-capacity right-sizing floor applies everywhere. They differ only in their
581
+ model **source** and **presentation**, not in how a given model is ranked:
582
+
583
+ | Command | Role | Ranking core |
584
+ |---------|------|--------------|
585
+ | `recommend` | Canonical model recommendations by category | Shared core (reference output) |
586
+ | `check` | Full hardware-compatibility report with a recommendation card | Shared core (consistent ranking, fit-oriented report) |
587
+ | `smart-recommend` | Catalog/DB-backed recommendations with a detailed score breakdown | Shared core (same ordering + scores) |
588
+
576
589
  Use optimization profiles to steer ranking by intent:
577
590
 
578
591
  ```bash
@@ -695,7 +708,7 @@ Three scoring systems are available, each optimized for different workflows:
695
708
  | `reasoning` | 60% | 10% | 20% | 10% |
696
709
  | `multimodal` | 50% | 15% | 20% | 15% |
697
710
 
698
- **Scoring Engine** (used by `smart-recommend` and `search`):
711
+ **Scoring Engine** (used by `search` for catalog scoring; `smart-recommend`'s final ranking is produced by the shared scoring core — see #96):
699
712
 
700
713
  | Use Case | Quality | Speed | Fit | Context |
701
714
  |----------|:-------:|:-----:|:---:|:-------:|
@@ -428,6 +428,11 @@ class CompatibilityAnalyzer {
428
428
  }
429
429
 
430
430
  parseModelSize(sizeString) {
431
+ // Guard non-string input (undefined / a numeric size) — this runs for every
432
+ // model in calculateModelCompatibility, so one bad entry must not crash the
433
+ // whole analysis. Matches the guard in analyzer/performance.js.
434
+ if (typeof sizeString !== 'string' || !sizeString.trim()) return 1;
435
+
431
436
  const match = sizeString.match(/(\d+\.?\d*)[BM]/i);
432
437
  if (!match) return 1;
433
438
 
@@ -363,12 +363,13 @@ class PerformanceAnalyzer {
363
363
  }
364
364
 
365
365
  estimateLoadTime(model, hardware) {
366
+ // ~2 GB per 1B params (fp16-ish) on-disk approximation.
366
367
  const modelSizeGB = this.parseModelSize(model.size) * 2;
367
368
 
368
- let loadTimeSeconds = modelSizeGB * 2;
369
-
370
-
371
- loadTimeSeconds *= 0.7;
369
+ // Fold the previous `* 2` then `* 0.7` two-step (a leftover from an
370
+ // incomplete edit, with a dead blank line) into one documented factor:
371
+ // ~1.4 s of load time per GB before hardware adjustments.
372
+ let loadTimeSeconds = modelSizeGB * 1.4;
372
373
 
373
374
  const cpuSpeedFactor = Math.max(0.5, Math.min(1.5, (hardware.cpu.speed || 2.5) / 2.5));
374
375
  loadTimeSeconds /= cpuSpeedFactor;
package/bin/cli.js CHANGED
@@ -11,44 +11,10 @@ if (!Number.isFinite(majorNodeVersion) || majorNodeVersion < 16) {
11
11
  process.exit(1);
12
12
  }
13
13
 
14
- function preprocessAiCheckModelsArg(argv) {
15
- const normalizedArgs = [];
16
- let modelsFilter = null;
17
- let sawAiCheck = false;
18
-
19
- for (let index = 0; index < argv.length; index += 1) {
20
- const token = argv[index];
21
-
22
- if (token === 'ai-check') {
23
- sawAiCheck = true;
24
- }
25
-
26
- if (sawAiCheck && token === '--models') {
27
- const nextToken = argv[index + 1];
28
- if (nextToken && !nextToken.startsWith('-')) {
29
- modelsFilter = nextToken;
30
- index += 1;
31
- }
32
- continue;
33
- }
34
-
35
- if (sawAiCheck && token.startsWith('--models=')) {
36
- modelsFilter = token.slice('--models='.length);
37
- continue;
38
- }
39
-
40
- normalizedArgs.push(token);
41
- }
42
-
43
- return { args: normalizedArgs, modelsFilter };
44
- }
45
-
46
- const preprocessedArgs = preprocessAiCheckModelsArg(process.argv.slice(2));
47
-
48
- if (typeof preprocessedArgs.modelsFilter === 'string' && preprocessedArgs.modelsFilter.trim()) {
49
- process.env.LLM_CHECKER_AI_CHECK_MODELS = preprocessedArgs.modelsFilter.trim();
50
- }
51
-
52
- process.argv = [process.argv[0], process.argv[1], ...preprocessedArgs.args];
14
+ // `ai-check --models <list>` is now a real commander option handled in
15
+ // enhanced_cli.js (and the AICheckSelector applies it as a candidate filter), so
16
+ // the previous argv-rewriting shim — which stripped the flag and stashed it in an
17
+ // env var that nothing read — is gone. LLM_CHECKER_AI_CHECK_MODELS still works as
18
+ // an explicit fallback for the same filter.
53
19
 
54
20
  require('./enhanced_cli');
@@ -58,7 +58,7 @@ const calibrationManager = new CalibrationManager();
58
58
 
59
59
  const COMMAND_HEADER_LABELS = {
60
60
  'hw-detect': 'Hardware Detection',
61
- 'smart-recommend': 'Smart Recommend',
61
+ 'smart-recommend': 'Smart Recommend (Experimental)',
62
62
  search: 'Model Search',
63
63
  sync: 'Database Sync',
64
64
  'mcp-setup': 'Claude MCP Setup',
@@ -79,6 +79,19 @@ function showAsciiArt(command) {
79
79
  renderCommandHeader(label);
80
80
  }
81
81
 
82
+ const RECOMMENDATION_COMMAND_NOTES = {
83
+ check: 'Compatibility report: shows hardware fit first. Use `llm-checker recommend` for canonical ranked model picks.',
84
+ recommend: 'Canonical recommendations: deterministic hardware-aware selector by category.',
85
+ 'smart-recommend': 'Experimental scoring engine: results can differ from `recommend` while this path is being unified.'
86
+ };
87
+
88
+ function displayRecommendationCommandNote(command) {
89
+ const note = RECOMMENDATION_COMMAND_NOTES[command];
90
+ if (!note) return;
91
+ console.log(chalk.gray(`Recommendation mode: ${note}`));
92
+ console.log('');
93
+ }
94
+
82
95
  // Function to search Ollama models by use case
83
96
  function getOllamaCacheFile(filename) {
84
97
  try {
@@ -3295,6 +3308,7 @@ Policy scope:
3295
3308
  )
3296
3309
  .action(async (options) => {
3297
3310
  showAsciiArt('check');
3311
+ displayRecommendationCommandNote('check');
3298
3312
  try {
3299
3313
  // Use verbose progress unless explicitly disabled
3300
3314
  const verboseEnabled = options.verbose !== false;
@@ -3462,11 +3476,8 @@ Policy scope:
3462
3476
 
3463
3477
  program
3464
3478
  .command('ollama')
3465
- .description('Manage Ollama integration with hardware compatibility')
3466
- .option('-l, --list', 'List installed models with compatibility scores')
3467
- .option('-r, --running', 'Show running models with performance data')
3468
- .option('-c, --compatible', 'Show only hardware-compatible installed models')
3469
- .option('--recommendations', 'Show installation recommendations')
3479
+ .description('Check Ollama integration status (use `installed` to rank installed models)')
3480
+ .option('-l, --list', 'List installed models ranked by compatibility (runs `installed`)')
3470
3481
  .action(async (options) => {
3471
3482
  showAsciiArt('ollama');
3472
3483
  const spinner = ora('Checking Ollama integration...').start();
@@ -3492,7 +3503,9 @@ program
3492
3503
  spinner.succeed(`Ollama integration active`);
3493
3504
 
3494
3505
  if (options.list) {
3495
- console.log('Ollama models list feature coming soon...');
3506
+ console.log(chalk.cyan('\nRun `llm-checker installed` to rank your installed models by compatibility and use-case.'));
3507
+ } else {
3508
+ console.log(chalk.gray('\nTip: `llm-checker installed` ranks installed models; `llm-checker recommend` suggests models for your hardware.'));
3496
3509
  }
3497
3510
 
3498
3511
  } catch (error) {
@@ -3520,10 +3533,18 @@ program
3520
3533
  const availability = await ollamaClient.checkOllamaAvailability();
3521
3534
  if (!availability.available) {
3522
3535
  spinner.fail('Ollama not available');
3523
- console.log(chalk.red('\n' + availability.error));
3524
- if (availability.hint) {
3525
- console.log(chalk.yellow('Hint: ' + availability.hint));
3536
+ if (options.json) {
3537
+ // --json must always emit parseable JSON on stdout (the success
3538
+ // path prints an array); previously these branches printed
3539
+ // ANSI-colored prose and broke `installed --json | jq`.
3540
+ console.log(JSON.stringify([], null, 2));
3541
+ } else {
3542
+ console.log(chalk.red('\n' + availability.error));
3543
+ if (availability.hint) {
3544
+ console.log(chalk.yellow('Hint: ' + availability.hint));
3545
+ }
3526
3546
  }
3547
+ process.exitCode = 1;
3527
3548
  return;
3528
3549
  }
3529
3550
 
@@ -3531,8 +3552,12 @@ program
3531
3552
  const installedModels = await ollamaClient.getLocalModels();
3532
3553
  if (!installedModels || installedModels.length === 0) {
3533
3554
  spinner.fail('No models installed');
3534
- console.log(chalk.yellow('\nNo Ollama models found. Install one with:'));
3535
- console.log(chalk.cyan(' ollama pull llama3.2:3b'));
3555
+ if (options.json) {
3556
+ console.log(JSON.stringify([], null, 2));
3557
+ } else {
3558
+ console.log(chalk.yellow('\nNo Ollama models found. Install one with:'));
3559
+ console.log(chalk.cyan(' ollama pull llama3.2:3b'));
3560
+ }
3536
3561
  return;
3537
3562
  }
3538
3563
 
@@ -3851,6 +3876,7 @@ Calibrated routing examples:
3851
3876
  )
3852
3877
  .action(async (options) => {
3853
3878
  showAsciiArt('recommend');
3879
+ displayRecommendationCommandNote('recommend');
3854
3880
  try {
3855
3881
  const verboseEnabled = options.verbose !== false;
3856
3882
  const checker = new (getLLMChecker())({ verbose: verboseEnabled });
@@ -4365,6 +4391,7 @@ program
4365
4391
  .option('--ctx <number>', 'Target context length', '8192')
4366
4392
  .option('-e, --evaluator <model>', 'Evaluator model (auto for best available)', 'auto')
4367
4393
  .option('-w, --weight <number>', 'AI weight (0.0-1.0, default 0.3)', '0.3')
4394
+ .option('-m, --models <list>', 'Restrict evaluation to these models (comma-separated)')
4368
4395
  .action(async (options) => {
4369
4396
  showAsciiArt('ai-check');
4370
4397
  // Check if Ollama is installed first
@@ -4377,14 +4404,33 @@ program
4377
4404
 
4378
4405
  const aiCheckSelector = new AICheckSelector();
4379
4406
 
4407
+ // Validate numeric options up front: bad input (e.g. --weight abc, --top
4408
+ // foo) used to flow through as NaN, which survived the downstream clamp
4409
+ // and made the displayed AI weight and every weighted score NaN.
4410
+ const weight = Number.parseFloat(options.weight);
4411
+ const top = Number.parseInt(options.top, 10);
4412
+ const ctx = options.ctx ? Number.parseInt(options.ctx, 10) : undefined;
4413
+ const invalidNumeric =
4414
+ (!Number.isFinite(weight) || weight < 0 || weight > 1) ? `--weight ${options.weight} (use a number from 0.0 to 1.0)`
4415
+ : (!Number.isInteger(top) || top <= 0) ? `--top ${options.top} (use a positive integer)`
4416
+ : (ctx !== undefined && (!Number.isInteger(ctx) || ctx <= 0)) ? `--ctx ${options.ctx} (use a positive integer)`
4417
+ : null;
4418
+ if (invalidNumeric) {
4419
+ spinner.stop();
4420
+ console.error(chalk.red(`Invalid ${invalidNumeric}`));
4421
+ process.exitCode = 1;
4422
+ return;
4423
+ }
4424
+
4380
4425
  const checkOptions = {
4381
4426
  category: options.category,
4382
- top: parseInt(options.top),
4383
- ctx: options.ctx ? parseInt(options.ctx) : undefined,
4427
+ top,
4428
+ ctx,
4384
4429
  evaluator: options.evaluator,
4385
- weight: parseFloat(options.weight)
4430
+ weight,
4431
+ models: options.models || process.env.LLM_CHECKER_AI_CHECK_MODELS || undefined
4386
4432
  };
4387
-
4433
+
4388
4434
  spinner.stop();
4389
4435
 
4390
4436
  const result = await aiCheckSelector.aiCheck(checkOptions);
@@ -4731,8 +4777,20 @@ program
4731
4777
  });
4732
4778
 
4733
4779
  if (searchResults.length === 0) {
4734
- if (spinner) spinner.info('No models found matching your query');
4735
4780
  syncManager.close();
4781
+ if (options.json) {
4782
+ // --json must always emit parseable JSON, even on zero matches
4783
+ // (previously it printed nothing and broke `search ... --json | jq`).
4784
+ console.log(JSON.stringify({
4785
+ query,
4786
+ all: [],
4787
+ recommendations: [],
4788
+ insights: [],
4789
+ meta: { afterFiltering: 0, totalMatches: 0 }
4790
+ }, null, 2));
4791
+ } else if (spinner) {
4792
+ spinner.info('No models found matching your query');
4793
+ }
4736
4794
  return;
4737
4795
  }
4738
4796
 
@@ -4806,7 +4864,7 @@ program
4806
4864
 
4807
4865
  program
4808
4866
  .command('smart-recommend')
4809
- .description('Get intelligent model recommendations using the new scoring engine')
4867
+ .description('Experimental recommendations using the alternate scoring engine')
4810
4868
  .option('-u, --use-case <case>', 'Optimize for use case', 'general')
4811
4869
  .option('-l, --limit <n>', 'Maximum number of recommendations', '5')
4812
4870
  .option('--target-tps <n>', 'Target tokens per second', '20')
@@ -4814,8 +4872,19 @@ program
4814
4872
  .option('--include-vision', 'Include vision/multimodal models')
4815
4873
  .option('--include-embeddings', 'Include embedding models')
4816
4874
  .option('-j, --json', 'Output as JSON')
4875
+ .addHelpText(
4876
+ 'after',
4877
+ `
4878
+ Recommendation engine note:
4879
+ smart-recommend is experimental and may intentionally differ from recommend.
4880
+ Use "llm-checker recommend" for canonical package recommendations.
4881
+ `
4882
+ )
4817
4883
  .action(async (options) => {
4818
- if (!options.json) showAsciiArt('smart-recommend');
4884
+ if (!options.json) {
4885
+ showAsciiArt('smart-recommend');
4886
+ displayRecommendationCommandNote('smart-recommend');
4887
+ }
4819
4888
  const SyncManager = require('../src/data/sync-manager');
4820
4889
  const IntelligentSelector = require('../src/models/intelligent-selector');
4821
4890
  const UnifiedDetector = require('../src/hardware/unified-detector');