llm-checker 3.5.15 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +28 -8
  2. package/analyzer/compatibility.js +5 -0
  3. package/analyzer/performance.js +5 -4
  4. package/bin/cli.js +5 -39
  5. package/bin/enhanced_cli.js +449 -24
  6. package/bin/mcp-server.mjs +266 -101
  7. package/package.json +13 -8
  8. package/src/ai/multi-objective-selector.js +118 -11
  9. package/src/calibration/calibration-manager.js +4 -1
  10. package/src/data/model-database.js +489 -5
  11. package/src/data/registry-ingestors.js +751 -0
  12. package/src/data/registry-recommender.js +514 -0
  13. package/src/data/seed/README.md +11 -3
  14. package/src/data/seed/models.db +0 -0
  15. package/src/data/sync-manager.js +32 -18
  16. package/src/hardware/backends/apple-silicon.js +5 -1
  17. package/src/hardware/backends/cuda-detector.js +47 -19
  18. package/src/hardware/backends/intel-detector.js +6 -2
  19. package/src/hardware/backends/rocm-detector.js +6 -2
  20. package/src/hardware/detector.js +57 -30
  21. package/src/hardware/unified-detector.js +129 -25
  22. package/src/index.js +68 -4
  23. package/src/models/ai-check-selector.js +36 -5
  24. package/src/models/deterministic-selector.js +179 -18
  25. package/src/models/expanded_database.js +9 -5
  26. package/src/models/intelligent-selector.js +87 -1
  27. package/src/models/moe-assumptions.js +11 -0
  28. package/src/models/requirements.js +16 -11
  29. package/src/models/scoring-core.js +341 -0
  30. package/src/models/scoring-engine.js +9 -2
  31. package/src/ollama/capacity-planner.js +15 -2
  32. package/src/ollama/client.js +70 -30
  33. package/src/ollama/enhanced-client.js +20 -2
  34. package/src/ollama/manager.js +14 -2
  35. package/src/policy/cli-policy.js +8 -2
  36. package/src/policy/policy-engine.js +2 -1
  37. package/src/provenance/model-provenance.js +4 -1
  38. package/src/ui/cli-theme.js +47 -7
  39. package/src/ui/interactive-panel.js +162 -24
@@ -58,9 +58,12 @@ const calibrationManager = new CalibrationManager();
58
58
 
59
59
  const COMMAND_HEADER_LABELS = {
60
60
  'hw-detect': 'Hardware Detection',
61
- 'smart-recommend': 'Smart Recommend',
61
+ 'smart-recommend': 'Smart Recommend (Experimental)',
62
62
  search: 'Model Search',
63
63
  sync: 'Database Sync',
64
+ 'registry-sync': 'Model Registry Sync',
65
+ 'registry-search': 'Model Registry Search',
66
+ 'registry-recommend': 'Registry Recommendations',
64
67
  'mcp-setup': 'Claude MCP Setup',
65
68
  check: 'Compatibility Check',
66
69
  installed: 'Installed Models',
@@ -79,6 +82,19 @@ function showAsciiArt(command) {
79
82
  renderCommandHeader(label);
80
83
  }
81
84
 
85
+ const RECOMMENDATION_COMMAND_NOTES = {
86
+ check: 'Compatibility report: shows hardware fit first. Use `llm-checker recommend` for canonical ranked model picks.',
87
+ recommend: 'Canonical recommendations: deterministic hardware-aware selector by category.',
88
+ 'smart-recommend': 'Experimental scoring engine: results can differ from `recommend` while this path is being unified.'
89
+ };
90
+
91
+ function displayRecommendationCommandNote(command) {
92
+ const note = RECOMMENDATION_COMMAND_NOTES[command];
93
+ if (!note) return;
94
+ console.log(chalk.gray(`Recommendation mode: ${note}`));
95
+ console.log('');
96
+ }
97
+
82
98
  // Function to search Ollama models by use case
83
99
  function getOllamaCacheFile(filename) {
84
100
  try {
@@ -388,6 +404,41 @@ function getRealSizeFromOllamaCache(model) {
388
404
  }
389
405
  }
390
406
 
407
+ function parsePositiveNumberOption(value, fallback = null) {
408
+ if (value === undefined || value === null || value === '') return fallback;
409
+ const parsed = Number(value);
410
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
411
+ }
412
+
413
+ function truncateMiddle(value, maxLength = 48) {
414
+ const text = String(value || '');
415
+ if (text.length <= maxLength) return text;
416
+ if (maxLength <= 4) return text.slice(0, maxLength);
417
+ const head = Math.ceil((maxLength - 3) / 2);
418
+ const tail = Math.floor((maxLength - 3) / 2);
419
+ return `${text.slice(0, head)}...${text.slice(text.length - tail)}`;
420
+ }
421
+
422
+ function formatRegistryNumber(value, suffix = '') {
423
+ const parsed = Number(value);
424
+ if (!Number.isFinite(parsed) || parsed <= 0) return '?';
425
+ const rounded = parsed >= 100 ? Math.round(parsed) : Math.round(parsed * 10) / 10;
426
+ return `${rounded}${suffix}`;
427
+ }
428
+
429
+ function formatRegistrySize(value) {
430
+ const parsed = Number(value);
431
+ if (!Number.isFinite(parsed) || parsed <= 0) return '?';
432
+ return `${Math.round(parsed * 100) / 100}GB`;
433
+ }
434
+
435
+ function formatRegistryList(value, maxItems = 3) {
436
+ const items = Array.isArray(value) ? value : [];
437
+ if (items.length === 0) return '-';
438
+ const shown = items.slice(0, maxItems).join(', ');
439
+ return items.length > maxItems ? `${shown}, +${items.length - maxItems}` : shown;
440
+ }
441
+
391
442
  const program = new Command();
392
443
 
393
444
  program
@@ -1272,12 +1323,17 @@ function displayIntelligentRecommendations(intelligentData, hardware = null) {
1272
1323
  const { summary, recommendations } = intelligentData;
1273
1324
  const tier = summary.hardware_tier.replace('_', ' ').toUpperCase();
1274
1325
  const optimizeProfile = (summary.optimize_for || intelligentData.optimizeFor || 'balanced').toUpperCase();
1326
+ const runtimeLabel = (intelligentData.runtime || summary.best_overall?.runtime || 'auto').toUpperCase();
1327
+ const sourceLabel = intelligentData.recommendationSource === 'registry'
1328
+ ? 'Multi-source registry'
1329
+ : 'Ollama catalog';
1275
1330
  const tierColor = tier.includes('HIGH') ? chalk.green : tier.includes('MEDIUM') ? chalk.yellow : chalk.red;
1276
1331
 
1277
1332
  console.log('\n' + chalk.bgRed.white.bold(' INTELLIGENT RECOMMENDATIONS BY CATEGORY '));
1278
1333
  console.log(chalk.red('╭' + '─'.repeat(65)));
1279
1334
  console.log(chalk.red('│') + ` Hardware Tier: ${tierColor.bold(tier)} | Models Analyzed: ${chalk.cyan.bold(intelligentData.totalModelsAnalyzed)}`);
1280
- console.log(chalk.red('│') + ` Optimization: ${chalk.magenta.bold(optimizeProfile)}`);
1335
+ console.log(chalk.red('│') + ` Optimization: ${chalk.magenta.bold(optimizeProfile)} | Runtime: ${chalk.cyan.bold(runtimeLabel)}`);
1336
+ console.log(chalk.red('│') + ` Source: ${chalk.white.bold(sourceLabel)}`);
1281
1337
  console.log(chalk.red('│'));
1282
1338
 
1283
1339
  // Mostrar mejor modelo general
@@ -1288,6 +1344,7 @@ function displayIntelligentRecommendations(intelligentData, hardware = null) {
1288
1344
  console.log(chalk.red('│') + ` Command: ${chalk.cyan.bold(best.command)}`);
1289
1345
  console.log(chalk.red('│') + ` Score: ${chalk.yellow.bold(best.score)}/100 | Category: ${chalk.magenta(best.category)}`);
1290
1346
  console.log(chalk.red('│') + ` Quantization: ${chalk.white.bold(best.quantization || 'Q4_K_M')}`);
1347
+ console.log(chalk.red('│') + ` Runtime: ${chalk.cyan.bold(best.runtime || intelligentData.runtime || 'ollama')} | Source: ${chalk.gray(best.source || 'unknown')}`);
1291
1348
  console.log(chalk.red('│') + ` Fine-tuning: ${chalk.blue.bold(bestFineTuning.shortLabel)}`);
1292
1349
  console.log(chalk.red('│'));
1293
1350
  }
@@ -1313,6 +1370,7 @@ function displayIntelligentRecommendations(intelligentData, hardware = null) {
1313
1370
  console.log(chalk.red('│') + ` ${chalk.green(model.name)} (${model.size})`);
1314
1371
  console.log(chalk.red('│') + ` Score: ${scoreColor.bold(model.score)}/100 | Pulls: ${chalk.gray(model.pulls?.toLocaleString() || 'N/A')}`);
1315
1372
  console.log(chalk.red('│') + ` Quantization: ${chalk.white.bold(model.quantization || 'Q4_K_M')}`);
1373
+ console.log(chalk.red('│') + ` Runtime: ${chalk.cyan(model.runtime || intelligentData.runtime || 'ollama')} | Source: ${chalk.gray(model.source || 'unknown')}`);
1316
1374
  console.log(chalk.red('│') + ` Fine-tuning: ${chalk.blue.bold(fineTuningSupport.shortLabel)}`);
1317
1375
  console.log(chalk.red('│') + ` Command: ${chalk.cyan.bold(model.command)}`);
1318
1376
  console.log(chalk.red('│'));
@@ -3004,7 +3062,7 @@ auditCommand
3004
3062
  .option('-u, --use-case <case>', 'Use case when --command check is selected', 'general')
3005
3063
  .option('-c, --category <category>', 'Category hint when --command recommend is selected')
3006
3064
  .option('--optimize <profile>', 'Optimization profile for recommend mode (balanced|speed|quality|context|coding)', 'balanced')
3007
- .option('--runtime <runtime>', `Runtime for check mode (${SUPPORTED_RUNTIMES.join('|')})`, 'ollama')
3065
+ .option('--runtime <runtime>', 'Runtime for check/recommend mode (auto|ollama|vllm|mlx|llama.cpp|transformers)', 'auto')
3008
3066
  .option('--include-cloud', 'Include cloud models in check-mode analysis')
3009
3067
  .option('--max-size <size>', 'Maximum model size for check mode (e.g., "24B" or "12GB")')
3010
3068
  .option('--min-size <size>', 'Minimum model size for check mode (e.g., "3B" or "2GB")')
@@ -3058,13 +3116,14 @@ auditCommand
3058
3116
  policyCandidates = collectCandidatesFromAnalysis(analysisResult);
3059
3117
  } else {
3060
3118
  recommendationResult = await checker.generateIntelligentRecommendations(hardware, {
3061
- optimizeFor: options.optimize
3119
+ optimizeFor: options.optimize,
3120
+ runtime: options.runtime
3062
3121
  });
3063
3122
  if (!recommendationResult) {
3064
3123
  throw new Error('Unable to generate recommendation data for policy audit export.');
3065
3124
  }
3066
3125
 
3067
- runtimeBackend = normalizeRuntime(options.runtime || 'ollama');
3126
+ runtimeBackend = recommendationResult.runtime || options.runtime || 'auto';
3068
3127
  policyCandidates = collectCandidatesFromRecommendationData(recommendationResult);
3069
3128
  }
3070
3129
 
@@ -3295,6 +3354,7 @@ Policy scope:
3295
3354
  )
3296
3355
  .action(async (options) => {
3297
3356
  showAsciiArt('check');
3357
+ displayRecommendationCommandNote('check');
3298
3358
  try {
3299
3359
  // Use verbose progress unless explicitly disabled
3300
3360
  const verboseEnabled = options.verbose !== false;
@@ -3462,11 +3522,8 @@ Policy scope:
3462
3522
 
3463
3523
  program
3464
3524
  .command('ollama')
3465
- .description('Manage Ollama integration with hardware compatibility')
3466
- .option('-l, --list', 'List installed models with compatibility scores')
3467
- .option('-r, --running', 'Show running models with performance data')
3468
- .option('-c, --compatible', 'Show only hardware-compatible installed models')
3469
- .option('--recommendations', 'Show installation recommendations')
3525
+ .description('Check Ollama integration status (use `installed` to rank installed models)')
3526
+ .option('-l, --list', 'List installed models ranked by compatibility (runs `installed`)')
3470
3527
  .action(async (options) => {
3471
3528
  showAsciiArt('ollama');
3472
3529
  const spinner = ora('Checking Ollama integration...').start();
@@ -3492,7 +3549,9 @@ program
3492
3549
  spinner.succeed(`Ollama integration active`);
3493
3550
 
3494
3551
  if (options.list) {
3495
- console.log('Ollama models list feature coming soon...');
3552
+ console.log(chalk.cyan('\nRun `llm-checker installed` to rank your installed models by compatibility and use-case.'));
3553
+ } else {
3554
+ console.log(chalk.gray('\nTip: `llm-checker installed` ranks installed models; `llm-checker recommend` suggests models for your hardware.'));
3496
3555
  }
3497
3556
 
3498
3557
  } catch (error) {
@@ -3520,10 +3579,18 @@ program
3520
3579
  const availability = await ollamaClient.checkOllamaAvailability();
3521
3580
  if (!availability.available) {
3522
3581
  spinner.fail('Ollama not available');
3523
- console.log(chalk.red('\n' + availability.error));
3524
- if (availability.hint) {
3525
- console.log(chalk.yellow('Hint: ' + availability.hint));
3582
+ if (options.json) {
3583
+ // --json must always emit parseable JSON on stdout (the success
3584
+ // path prints an array); previously these branches printed
3585
+ // ANSI-colored prose and broke `installed --json | jq`.
3586
+ console.log(JSON.stringify([], null, 2));
3587
+ } else {
3588
+ console.log(chalk.red('\n' + availability.error));
3589
+ if (availability.hint) {
3590
+ console.log(chalk.yellow('Hint: ' + availability.hint));
3591
+ }
3526
3592
  }
3593
+ process.exitCode = 1;
3527
3594
  return;
3528
3595
  }
3529
3596
 
@@ -3531,8 +3598,12 @@ program
3531
3598
  const installedModels = await ollamaClient.getLocalModels();
3532
3599
  if (!installedModels || installedModels.length === 0) {
3533
3600
  spinner.fail('No models installed');
3534
- console.log(chalk.yellow('\nNo Ollama models found. Install one with:'));
3535
- console.log(chalk.cyan(' ollama pull llama3.2:3b'));
3601
+ if (options.json) {
3602
+ console.log(JSON.stringify([], null, 2));
3603
+ } else {
3604
+ console.log(chalk.yellow('\nNo Ollama models found. Install one with:'));
3605
+ console.log(chalk.cyan(' ollama pull llama3.2:3b'));
3606
+ }
3536
3607
  return;
3537
3608
  }
3538
3609
 
@@ -3819,6 +3890,8 @@ program
3819
3890
  .description('Get intelligent model recommendations for your hardware')
3820
3891
  .option('-c, --category <category>', 'Get recommendations for specific category (coding, talking, reading, etc.)')
3821
3892
  .option('--optimize <profile>', 'Optimization profile (balanced|speed|quality|context|coding)', 'balanced')
3893
+ .option('--runtime <runtime>', 'Runtime target for registry recommendations (auto|ollama|vllm|mlx|llama.cpp|transformers)', 'auto')
3894
+ .option('--no-registry', 'Use the legacy Ollama catalog recommendation path')
3822
3895
  .option('--no-verbose', 'Disable step-by-step progress display')
3823
3896
  .option('--policy <file>', 'Evaluate recommendations against a policy file')
3824
3897
  .option('--simulate <profile>', 'Simulate a hardware profile instead of detecting real hardware (use "list" to see profiles)')
@@ -3843,6 +3916,11 @@ Hardware simulation:
3843
3916
  $ llm-checker recommend --simulate m4pro24 --category coding
3844
3917
  $ llm-checker recommend --gpu "RTX 5060" --ram 32 --cpu "AMD Ryzen 7 5700X"
3845
3918
 
3919
+ Registry/runtime examples:
3920
+ $ llm-checker recommend --runtime auto --category coding
3921
+ $ llm-checker recommend --runtime vllm --category coding
3922
+ $ llm-checker recommend --runtime mlx --category general
3923
+
3846
3924
  Calibrated routing examples:
3847
3925
  $ llm-checker recommend --calibrated --category coding
3848
3926
  $ llm-checker recommend --calibrated ./calibration-policy.yaml --category reasoning
@@ -3851,6 +3929,7 @@ Calibrated routing examples:
3851
3929
  )
3852
3930
  .action(async (options) => {
3853
3931
  showAsciiArt('recommend');
3932
+ displayRecommendationCommandNote('recommend');
3854
3933
  try {
3855
3934
  const verboseEnabled = options.verbose !== false;
3856
3935
  const checker = new (getLLMChecker())({ verbose: verboseEnabled });
@@ -3919,7 +3998,9 @@ Calibrated routing examples:
3919
3998
 
3920
3999
  const hardware = await checker.getSystemInfo();
3921
4000
  const intelligentRecommendations = await checker.generateIntelligentRecommendations(hardware, {
3922
- optimizeFor: options.optimize
4001
+ optimizeFor: options.optimize,
4002
+ runtime: options.runtime,
4003
+ registry: options.registry
3923
4004
  });
3924
4005
 
3925
4006
  if (!intelligentRecommendations) {
@@ -4365,6 +4446,7 @@ program
4365
4446
  .option('--ctx <number>', 'Target context length', '8192')
4366
4447
  .option('-e, --evaluator <model>', 'Evaluator model (auto for best available)', 'auto')
4367
4448
  .option('-w, --weight <number>', 'AI weight (0.0-1.0, default 0.3)', '0.3')
4449
+ .option('-m, --models <list>', 'Restrict evaluation to these models (comma-separated)')
4368
4450
  .action(async (options) => {
4369
4451
  showAsciiArt('ai-check');
4370
4452
  // Check if Ollama is installed first
@@ -4377,14 +4459,33 @@ program
4377
4459
 
4378
4460
  const aiCheckSelector = new AICheckSelector();
4379
4461
 
4462
+ // Validate numeric options up front: bad input (e.g. --weight abc, --top
4463
+ // foo) used to flow through as NaN, which survived the downstream clamp
4464
+ // and made the displayed AI weight and every weighted score NaN.
4465
+ const weight = Number.parseFloat(options.weight);
4466
+ const top = Number.parseInt(options.top, 10);
4467
+ const ctx = options.ctx ? Number.parseInt(options.ctx, 10) : undefined;
4468
+ const invalidNumeric =
4469
+ (!Number.isFinite(weight) || weight < 0 || weight > 1) ? `--weight ${options.weight} (use a number from 0.0 to 1.0)`
4470
+ : (!Number.isInteger(top) || top <= 0) ? `--top ${options.top} (use a positive integer)`
4471
+ : (ctx !== undefined && (!Number.isInteger(ctx) || ctx <= 0)) ? `--ctx ${options.ctx} (use a positive integer)`
4472
+ : null;
4473
+ if (invalidNumeric) {
4474
+ spinner.stop();
4475
+ console.error(chalk.red(`Invalid ${invalidNumeric}`));
4476
+ process.exitCode = 1;
4477
+ return;
4478
+ }
4479
+
4380
4480
  const checkOptions = {
4381
4481
  category: options.category,
4382
- top: parseInt(options.top),
4383
- ctx: options.ctx ? parseInt(options.ctx) : undefined,
4482
+ top,
4483
+ ctx,
4384
4484
  evaluator: options.evaluator,
4385
- weight: parseFloat(options.weight)
4485
+ weight,
4486
+ models: options.models || process.env.LLM_CHECKER_AI_CHECK_MODELS || undefined
4386
4487
  };
4387
-
4488
+
4388
4489
  spinner.stop();
4389
4490
 
4390
4491
  const result = await aiCheckSelector.aiCheck(checkOptions);
@@ -4683,6 +4784,307 @@ program
4683
4784
  }
4684
4785
  });
4685
4786
 
4787
+ program
4788
+ .command('registry-sync')
4789
+ .description('Sync the multi-source model registry (Ollama, Hugging Face, GPT4All)')
4790
+ .option('-s, --sources <list>', 'Comma-separated sources: ollama,huggingface,gpt4all', 'ollama,huggingface,gpt4all')
4791
+ .option('-l, --limit <n>', 'Fallback maximum records per source')
4792
+ .option('--hf-limit <n>', 'Maximum Hugging Face repos to ingest', '3000')
4793
+ .option('--ollama-limit <n>', 'Maximum Ollama artifacts to ingest', '10000')
4794
+ .option('--gpt4all-limit <n>', 'Maximum GPT4All entries to ingest', '1000')
4795
+ .option('--query <text>', 'Hugging Face search query')
4796
+ .option('--task <task>', 'Hugging Face task/filter, for example text-generation or text-embeddings-inference')
4797
+ .option('--dry-run', 'Fetch and normalize without writing to the database')
4798
+ .option('-q, --quiet', 'Suppress progress output')
4799
+ .option('-j, --json', 'Output as JSON')
4800
+ .action(async (options) => {
4801
+ const quiet = Boolean(options.quiet || options.json);
4802
+ if (!quiet) showAsciiArt('registry-sync');
4803
+
4804
+ const ModelDatabase = require('../src/data/model-database');
4805
+ const { RegistryIngestor } = require('../src/data/registry-ingestors');
4806
+ const database = new ModelDatabase();
4807
+ const spinner = quiet ? null : ora('Preparing model registry sync...').start();
4808
+
4809
+ try {
4810
+ await database.initialize();
4811
+
4812
+ const ingestor = new RegistryIngestor({
4813
+ database,
4814
+ onProgress: (info) => {
4815
+ if (spinner && info.message) {
4816
+ spinner.text = info.message;
4817
+ }
4818
+ }
4819
+ });
4820
+
4821
+ const summary = await ingestor.ingest({
4822
+ sources: options.sources,
4823
+ limit: parsePositiveNumberOption(options.limit),
4824
+ hfLimit: parsePositiveNumberOption(options.hfLimit, 3000),
4825
+ ollamaLimit: parsePositiveNumberOption(options.ollamaLimit, 10000),
4826
+ gpt4allLimit: parsePositiveNumberOption(options.gpt4allLimit, 1000),
4827
+ query: options.query,
4828
+ task: options.task,
4829
+ dryRun: Boolean(options.dryRun)
4830
+ });
4831
+ const stats = options.dryRun ? null : database.getRegistryStats();
4832
+
4833
+ if (options.json) {
4834
+ console.log(JSON.stringify({ summary, stats }, null, 2));
4835
+ return;
4836
+ }
4837
+
4838
+ if (spinner) {
4839
+ const action = options.dryRun ? 'normalized' : 'synced';
4840
+ spinner.succeed(`Registry ${action}: ${summary.repos} repos, ${summary.artifacts} artifacts`);
4841
+ }
4842
+
4843
+ console.log(chalk.green('\n[OK] Registry sync complete'));
4844
+ console.log(chalk.gray(` Sources touched: ${summary.sources}`));
4845
+ console.log(chalk.gray(` Collections: ${summary.collections}`));
4846
+ console.log(chalk.gray(` Repositories: ${summary.repos}`));
4847
+ console.log(chalk.gray(` Artifacts: ${summary.artifacts}`));
4848
+
4849
+ if (stats) {
4850
+ console.log(chalk.blue.bold('\nRegistry totals:'));
4851
+ console.log(chalk.gray(` Sources: ${stats.sources}`));
4852
+ console.log(chalk.gray(` Repositories: ${stats.repos}`));
4853
+ console.log(chalk.gray(` Artifacts: ${stats.artifacts}`));
4854
+
4855
+ if (stats.bySource.length > 0) {
4856
+ const rows = [['Source', 'Artifacts']];
4857
+ for (const item of stats.bySource) {
4858
+ rows.push([item.source_id, String(item.artifact_count)]);
4859
+ }
4860
+ console.log('\n' + table(rows));
4861
+ }
4862
+ }
4863
+
4864
+ console.log(chalk.cyan('Try: llm-checker registry-search llama --runtime auto --limit 10'));
4865
+ } catch (error) {
4866
+ if (spinner) spinner.fail('Registry sync failed');
4867
+ console.error(chalk.red('Error:'), error.message);
4868
+ if (process.env.DEBUG) console.error(error.stack);
4869
+ process.exitCode = 1;
4870
+ } finally {
4871
+ database.close();
4872
+ }
4873
+ });
4874
+
4875
+ program
4876
+ .command('registry-search [query]')
4877
+ .description('Search exact downloadable/installable artifacts in the multi-source model registry')
4878
+ .option('-s, --source <source>', 'Filter by source: ollama, huggingface, gpt4all')
4879
+ .option('--format <format>', 'Filter by artifact format: gguf, safetensors, mlx, ollama')
4880
+ .option('--runtime <runtime>', 'Filter by runtime support: auto, ollama, llama.cpp, transformers, vllm, mlx')
4881
+ .option('--quant <type>', 'Filter by quantization, for example Q4_K_M or Q8_0')
4882
+ .option('--max-size <gb>', 'Maximum artifact size in GB')
4883
+ .option('--min-params <billion>', 'Minimum parameter count in billions')
4884
+ .option('--max-params <billion>', 'Maximum parameter count in billions')
4885
+ .option('--local-only', 'Exclude gated/auth-required artifacts')
4886
+ .option('-l, --limit <n>', 'Maximum number of results', '20')
4887
+ .option('-j, --json', 'Output as JSON')
4888
+ .action(async (query = '', options) => {
4889
+ if (!options.json) showAsciiArt('registry-search');
4890
+
4891
+ const ModelDatabase = require('../src/data/model-database');
4892
+ const database = new ModelDatabase();
4893
+
4894
+ try {
4895
+ await database.initialize();
4896
+
4897
+ const filters = {
4898
+ source: options.source,
4899
+ format: options.format ? String(options.format).toLowerCase() : undefined,
4900
+ runtime: options.runtime,
4901
+ quantization: options.quant,
4902
+ maxSizeGB: parsePositiveNumberOption(options.maxSize),
4903
+ minParamsB: parsePositiveNumberOption(options.minParams),
4904
+ maxParamsB: parsePositiveNumberOption(options.maxParams),
4905
+ localOnly: Boolean(options.localOnly),
4906
+ limit: parsePositiveNumberOption(options.limit, 20)
4907
+ };
4908
+ const results = database.searchModelArtifacts(query, filters);
4909
+ const stats = database.getRegistryStats();
4910
+
4911
+ if (options.json) {
4912
+ console.log(JSON.stringify({
4913
+ query,
4914
+ filters,
4915
+ count: results.length,
4916
+ stats,
4917
+ results
4918
+ }, null, 2));
4919
+ return;
4920
+ }
4921
+
4922
+ if (results.length === 0) {
4923
+ console.log(chalk.yellow('No registry artifacts found.'));
4924
+ if (stats.artifacts === 0) {
4925
+ console.log(chalk.gray('Populate the registry first with: llm-checker registry-sync'));
4926
+ }
4927
+ return;
4928
+ }
4929
+
4930
+ console.log(chalk.blue.bold('\nRegistry Results'));
4931
+ console.log(chalk.gray(`Stored registry: ${stats.artifacts} artifacts across ${stats.sources} sources`));
4932
+ console.log('');
4933
+
4934
+ const rows = [[
4935
+ 'Source',
4936
+ 'Model',
4937
+ 'Artifact',
4938
+ 'Params',
4939
+ 'Size',
4940
+ 'Format',
4941
+ 'Runtime',
4942
+ 'Install'
4943
+ ]];
4944
+
4945
+ for (const item of results) {
4946
+ rows.push([
4947
+ item.source_id,
4948
+ truncateMiddle(item.canonical_model_id, 34),
4949
+ truncateMiddle(item.artifact_name || item.filename, 34),
4950
+ formatRegistryNumber(item.parameter_count_b, 'B'),
4951
+ formatRegistrySize(item.size_gb),
4952
+ item.quantization ? `${item.format}/${item.quantization}` : item.format,
4953
+ formatRegistryList(item.runtime_support, 2),
4954
+ truncateMiddle(item.install_command || item.download_url, 46)
4955
+ ]);
4956
+ }
4957
+
4958
+ console.log(table(rows));
4959
+
4960
+ const links = results
4961
+ .filter((item) => item.download_url)
4962
+ .slice(0, 5);
4963
+ if (links.length > 0) {
4964
+ console.log(chalk.blue.bold('Exact download links:'));
4965
+ links.forEach((item, index) => {
4966
+ console.log(chalk.gray(` ${index + 1}. ${item.canonical_model_id} -> ${item.download_url}`));
4967
+ });
4968
+ }
4969
+ } catch (error) {
4970
+ console.error(chalk.red('Error:'), error.message);
4971
+ if (process.env.DEBUG) console.error(error.stack);
4972
+ process.exitCode = 1;
4973
+ } finally {
4974
+ database.close();
4975
+ }
4976
+ });
4977
+
4978
+ program
4979
+ .command('registry-recommend [query]')
4980
+ .description('Recommend the best exact model artifacts from the multi-source registry for this hardware')
4981
+ .option('-c, --category <category>', 'Task category (general, coding, reasoning, embeddings, multimodal)', 'general')
4982
+ .option('--optimize <profile>', 'Optimization profile (balanced|speed|quality|context|coding)', 'balanced')
4983
+ .option('--runtime <runtime>', 'Runtime target: auto, ollama, llama.cpp, vllm, mlx, transformers', 'auto')
4984
+ .option('-s, --source <source>', 'Filter by source: ollama, huggingface, gpt4all')
4985
+ .option('--format <format>', 'Filter by artifact format: gguf, safetensors, mlx, ollama')
4986
+ .option('--quant <type>', 'Filter by quantization, for example Q4_K_M or Q8_0')
4987
+ .option('--max-size <gb>', 'Maximum artifact size in GB')
4988
+ .option('--min-params <billion>', 'Minimum parameter count in billions')
4989
+ .option('--max-params <billion>', 'Maximum parameter count in billions')
4990
+ .option('--target-context <tokens>', 'Target context window for scoring')
4991
+ .option('--include-gated', 'Include gated/auth-required artifacts')
4992
+ .option('--pool-limit <n>', 'Maximum registry artifacts to score before ranking', '20000')
4993
+ .option('-l, --limit <n>', 'Maximum number of recommendations', '10')
4994
+ .option('-j, --json', 'Output as JSON')
4995
+ .action(async (query = '', options) => {
4996
+ if (!options.json) showAsciiArt('registry-recommend');
4997
+
4998
+ const UnifiedDetector = require('../src/hardware/unified-detector');
4999
+ const { RegistryRecommender } = require('../src/data/registry-recommender');
5000
+ const recommender = new RegistryRecommender();
5001
+ const spinner = options.json ? null : ora('Scoring registry artifacts...').start();
5002
+
5003
+ try {
5004
+ await recommender.initialize();
5005
+
5006
+ const detector = new UnifiedDetector();
5007
+ const hardware = await detector.detect();
5008
+ const category = normalizeTaskName(options.category || 'general');
5009
+ const result = await recommender.recommend({
5010
+ query,
5011
+ category,
5012
+ optimizeFor: options.optimize,
5013
+ runtime: options.runtime,
5014
+ source: options.source,
5015
+ format: options.format ? String(options.format).toLowerCase() : undefined,
5016
+ quantization: options.quant,
5017
+ maxSizeGB: parsePositiveNumberOption(options.maxSize),
5018
+ minParamsB: parsePositiveNumberOption(options.minParams),
5019
+ maxParamsB: parsePositiveNumberOption(options.maxParams),
5020
+ targetContext: parsePositiveNumberOption(options.targetContext),
5021
+ localOnly: !options.includeGated,
5022
+ poolLimit: parsePositiveNumberOption(options.poolLimit, 20000),
5023
+ limit: parsePositiveNumberOption(options.limit, 10),
5024
+ hardware
5025
+ });
5026
+
5027
+ if (options.json) {
5028
+ console.log(JSON.stringify({
5029
+ query,
5030
+ hardware: hardware.summary || hardware,
5031
+ ...result
5032
+ }, null, 2));
5033
+ return;
5034
+ }
5035
+
5036
+ if (spinner) {
5037
+ spinner.succeed(
5038
+ `Scored ${result.total_evaluated} candidates from ${result.total_artifacts} registry artifacts`
5039
+ );
5040
+ }
5041
+
5042
+ if (result.recommendations.length === 0) {
5043
+ console.log(chalk.yellow('No registry recommendations found for those filters.'));
5044
+ if (result.registry.artifacts === 0) {
5045
+ console.log(chalk.gray('Populate the registry first with: llm-checker registry-sync'));
5046
+ }
5047
+ return;
5048
+ }
5049
+
5050
+ console.log(chalk.blue.bold('\nRegistry Recommendations'));
5051
+ console.log(chalk.gray(`Registry: ${result.registry.repos} repos, ${result.registry.artifacts} artifacts`));
5052
+ console.log(chalk.gray(`Runtime: ${result.runtime} | Category: ${result.category} | Optimize: ${result.optimizeFor}`));
5053
+ console.log('');
5054
+
5055
+ const rows = [['#', 'Score', 'Source', 'Model', 'Artifact', 'Params', 'Size', 'Install']];
5056
+ result.recommendations.forEach((item, index) => {
5057
+ rows.push([
5058
+ String(index + 1),
5059
+ String(item.score),
5060
+ item.source,
5061
+ truncateMiddle(item.model, 30),
5062
+ truncateMiddle(item.artifact, 32),
5063
+ formatRegistryNumber(item.params_b, 'B'),
5064
+ formatRegistrySize(item.size_gb),
5065
+ truncateMiddle(item.install_command || item.download_url, 44)
5066
+ ]);
5067
+ });
5068
+
5069
+ console.log(table(rows));
5070
+
5071
+ console.log(chalk.blue.bold('Top pick:'));
5072
+ const best = result.recommendations[0];
5073
+ console.log(chalk.white.bold(` ${best.model}`));
5074
+ console.log(chalk.gray(` Artifact: ${best.artifact}`));
5075
+ console.log(chalk.gray(` Why: ${best.rationale}`));
5076
+ if (best.install_command) console.log(chalk.cyan(` ${best.install_command}`));
5077
+ if (best.download_url) console.log(chalk.gray(` ${best.download_url}`));
5078
+ } catch (error) {
5079
+ if (spinner) spinner.fail('Registry recommendation failed');
5080
+ console.error(chalk.red('Error:'), error.message);
5081
+ if (process.env.DEBUG) console.error(error.stack);
5082
+ process.exitCode = 1;
5083
+ } finally {
5084
+ recommender.close();
5085
+ }
5086
+ });
5087
+
4686
5088
  program
4687
5089
  .command('search <query>')
4688
5090
  .description('Search models in the database with intelligent scoring')
@@ -4731,8 +5133,20 @@ program
4731
5133
  });
4732
5134
 
4733
5135
  if (searchResults.length === 0) {
4734
- if (spinner) spinner.info('No models found matching your query');
4735
5136
  syncManager.close();
5137
+ if (options.json) {
5138
+ // --json must always emit parseable JSON, even on zero matches
5139
+ // (previously it printed nothing and broke `search ... --json | jq`).
5140
+ console.log(JSON.stringify({
5141
+ query,
5142
+ all: [],
5143
+ recommendations: [],
5144
+ insights: [],
5145
+ meta: { afterFiltering: 0, totalMatches: 0 }
5146
+ }, null, 2));
5147
+ } else if (spinner) {
5148
+ spinner.info('No models found matching your query');
5149
+ }
4736
5150
  return;
4737
5151
  }
4738
5152
 
@@ -4806,7 +5220,7 @@ program
4806
5220
 
4807
5221
  program
4808
5222
  .command('smart-recommend')
4809
- .description('Get intelligent model recommendations using the new scoring engine')
5223
+ .description('Experimental recommendations using the alternate scoring engine')
4810
5224
  .option('-u, --use-case <case>', 'Optimize for use case', 'general')
4811
5225
  .option('-l, --limit <n>', 'Maximum number of recommendations', '5')
4812
5226
  .option('--target-tps <n>', 'Target tokens per second', '20')
@@ -4814,8 +5228,19 @@ program
4814
5228
  .option('--include-vision', 'Include vision/multimodal models')
4815
5229
  .option('--include-embeddings', 'Include embedding models')
4816
5230
  .option('-j, --json', 'Output as JSON')
5231
+ .addHelpText(
5232
+ 'after',
5233
+ `
5234
+ Recommendation engine note:
5235
+ smart-recommend is experimental and may intentionally differ from recommend.
5236
+ Use "llm-checker recommend" for canonical package recommendations.
5237
+ `
5238
+ )
4817
5239
  .action(async (options) => {
4818
- if (!options.json) showAsciiArt('smart-recommend');
5240
+ if (!options.json) {
5241
+ showAsciiArt('smart-recommend');
5242
+ displayRecommendationCommandNote('smart-recommend');
5243
+ }
4819
5244
  const SyncManager = require('../src/data/sync-manager');
4820
5245
  const IntelligentSelector = require('../src/models/intelligent-selector');
4821
5246
  const UnifiedDetector = require('../src/hardware/unified-detector');