llm-checker 3.6.1 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -641,30 +641,36 @@ llm-checker search qwen --quant Q4_K_M --max-size 8
641
641
 
642
642
  ## Model Catalog
643
643
 
644
- LLM Checker ships with a pre-synced SQLite snapshot of the Ollama catalog. On first run, that snapshot is copied to `~/.llm-checker/models.db`, so recommendations and catalog search work immediately after npm install.
644
+ LLM Checker ships with a pre-synced SQLite snapshot of the Ollama catalog plus a multi-source registry of exact downloadable/installable model artifacts. On first run, that snapshot is copied to `~/.llm-checker/models.db`, so recommendations and catalog search work immediately after npm install.
645
645
 
646
646
  The packaged snapshot currently includes:
647
647
 
648
648
  - 229 Ollama models
649
649
  - 7176 variants
650
+ - 3259 multi-source registry repositories
651
+ - 33729 exact model artifacts from Hugging Face, Ollama, and GPT4All
652
+ - Hugging Face top 3000 repositories by downloads, fetched with API pagination
650
653
  - pull counts
651
654
  - tag counts
652
655
  - last-updated metadata
653
- - variant params, quantization, size, context, and input type fields when available
656
+ - variant params, quantization, size, context, runtime, install commands, download URLs, license/gated flags, tasks, and modalities when available
654
657
 
655
658
  Refresh it any time:
656
659
 
657
660
  ```bash
658
661
  llm-checker sync
662
+ llm-checker registry-sync --sources ollama,huggingface,gpt4all
663
+ llm-checker registry-search qwen --runtime auto --max-size 8
664
+ llm-checker registry-recommend --category coding --runtime auto --max-size 8
659
665
  ```
660
666
 
661
- For release maintainers, the packaged seed can be regenerated from the synced local DB:
667
+ For release maintainers, the packaged seed can be regenerated from the synced local DB and registry APIs:
662
668
 
663
669
  ```bash
664
670
  npm run sync:seed
665
671
  ```
666
672
 
667
- `recommend`, `list-models`, `ai-run`, and `ai-check` prefer the synced SQLite catalog. If the SQLite catalog is unavailable, LLM Checker falls back to the scraped cache and then to the curated catalog.
673
+ `recommend`, `list-models`, `ai-run`, and `ai-check` prefer the synced SQLite catalog. `registry-search` queries exact artifacts across sources, and `registry-recommend` ranks exact artifacts from the registry with the deterministic hardware-aware selector. If the SQLite catalog is unavailable, LLM Checker falls back to the scraped cache and then to the curated catalog.
668
674
 
669
675
  The curated fallback catalog includes 35+ models from the most popular Ollama families:
670
676
 
@@ -836,7 +842,7 @@ LLM Checker uses a deterministic pipeline so the same inputs produce the same ra
836
842
  flowchart LR
837
843
  subgraph Inputs
838
844
  HW["Hardware detector<br/>CPU/GPU/RAM/backend"]
839
- REG["Synced SQLite Ollama catalog<br/>(packaged seed + live sync)"]
845
+ REG["Synced SQLite model catalog<br/>(Ollama seed + multi-source registry)"]
840
846
  LOCAL["Installed local models"]
841
847
  FLAGS["CLI options<br/>use-case/runtime/limits/policy"]
842
848
  end
@@ -952,8 +958,9 @@ src/
952
958
  detector.js # Hardware detection
953
959
  unified-detector.js # Cross-platform detection
954
960
  data/
955
- model-database.js # SQLite storage and packaged seed loading
956
- seed/models.db # npm-packaged Ollama catalog snapshot
961
+ model-database.js # SQLite storage, registry tables, and packaged seed loading
962
+ registry-ingestors.js # Ollama/Hugging Face/GPT4All artifact normalization
963
+ seed/models.db # npm-packaged Ollama + multi-source registry snapshot
957
964
  sync-manager.js # Database sync from Ollama registry
958
965
  bin/
959
966
  enhanced_cli.js # CLI entry point
@@ -61,6 +61,9 @@ const COMMAND_HEADER_LABELS = {
61
61
  'smart-recommend': 'Smart Recommend (Experimental)',
62
62
  search: 'Model Search',
63
63
  sync: 'Database Sync',
64
+ 'registry-sync': 'Model Registry Sync',
65
+ 'registry-search': 'Model Registry Search',
66
+ 'registry-recommend': 'Registry Recommendations',
64
67
  'mcp-setup': 'Claude MCP Setup',
65
68
  check: 'Compatibility Check',
66
69
  installed: 'Installed Models',
@@ -401,6 +404,41 @@ function getRealSizeFromOllamaCache(model) {
401
404
  }
402
405
  }
403
406
 
407
+ function parsePositiveNumberOption(value, fallback = null) {
408
+ if (value === undefined || value === null || value === '') return fallback;
409
+ const parsed = Number(value);
410
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
411
+ }
412
+
413
+ function truncateMiddle(value, maxLength = 48) {
414
+ const text = String(value || '');
415
+ if (text.length <= maxLength) return text;
416
+ if (maxLength <= 4) return text.slice(0, maxLength);
417
+ const head = Math.ceil((maxLength - 3) / 2);
418
+ const tail = Math.floor((maxLength - 3) / 2);
419
+ return `${text.slice(0, head)}...${text.slice(text.length - tail)}`;
420
+ }
421
+
422
+ function formatRegistryNumber(value, suffix = '') {
423
+ const parsed = Number(value);
424
+ if (!Number.isFinite(parsed) || parsed <= 0) return '?';
425
+ const rounded = parsed >= 100 ? Math.round(parsed) : Math.round(parsed * 10) / 10;
426
+ return `${rounded}${suffix}`;
427
+ }
428
+
429
+ function formatRegistrySize(value) {
430
+ const parsed = Number(value);
431
+ if (!Number.isFinite(parsed) || parsed <= 0) return '?';
432
+ return `${Math.round(parsed * 100) / 100}GB`;
433
+ }
434
+
435
+ function formatRegistryList(value, maxItems = 3) {
436
+ const items = Array.isArray(value) ? value : [];
437
+ if (items.length === 0) return '-';
438
+ const shown = items.slice(0, maxItems).join(', ');
439
+ return items.length > maxItems ? `${shown}, +${items.length - maxItems}` : shown;
440
+ }
441
+
404
442
  const program = new Command();
405
443
 
406
444
  program
@@ -1285,12 +1323,17 @@ function displayIntelligentRecommendations(intelligentData, hardware = null) {
1285
1323
  const { summary, recommendations } = intelligentData;
1286
1324
  const tier = summary.hardware_tier.replace('_', ' ').toUpperCase();
1287
1325
  const optimizeProfile = (summary.optimize_for || intelligentData.optimizeFor || 'balanced').toUpperCase();
1326
+ const runtimeLabel = (intelligentData.runtime || summary.best_overall?.runtime || 'auto').toUpperCase();
1327
+ const sourceLabel = intelligentData.recommendationSource === 'registry'
1328
+ ? 'Multi-source registry'
1329
+ : 'Ollama catalog';
1288
1330
  const tierColor = tier.includes('HIGH') ? chalk.green : tier.includes('MEDIUM') ? chalk.yellow : chalk.red;
1289
1331
 
1290
1332
  console.log('\n' + chalk.bgRed.white.bold(' INTELLIGENT RECOMMENDATIONS BY CATEGORY '));
1291
1333
  console.log(chalk.red('╭' + '─'.repeat(65)));
1292
1334
  console.log(chalk.red('│') + ` Hardware Tier: ${tierColor.bold(tier)} | Models Analyzed: ${chalk.cyan.bold(intelligentData.totalModelsAnalyzed)}`);
1293
- console.log(chalk.red('│') + ` Optimization: ${chalk.magenta.bold(optimizeProfile)}`);
1335
+ console.log(chalk.red('│') + ` Optimization: ${chalk.magenta.bold(optimizeProfile)} | Runtime: ${chalk.cyan.bold(runtimeLabel)}`);
1336
+ console.log(chalk.red('│') + ` Source: ${chalk.white.bold(sourceLabel)}`);
1294
1337
  console.log(chalk.red('│'));
1295
1338
 
1296
1339
  // Mostrar mejor modelo general
@@ -1301,6 +1344,7 @@ function displayIntelligentRecommendations(intelligentData, hardware = null) {
1301
1344
  console.log(chalk.red('│') + ` Command: ${chalk.cyan.bold(best.command)}`);
1302
1345
  console.log(chalk.red('│') + ` Score: ${chalk.yellow.bold(best.score)}/100 | Category: ${chalk.magenta(best.category)}`);
1303
1346
  console.log(chalk.red('│') + ` Quantization: ${chalk.white.bold(best.quantization || 'Q4_K_M')}`);
1347
+ console.log(chalk.red('│') + ` Runtime: ${chalk.cyan.bold(best.runtime || intelligentData.runtime || 'ollama')} | Source: ${chalk.gray(best.source || 'unknown')}`);
1304
1348
  console.log(chalk.red('│') + ` Fine-tuning: ${chalk.blue.bold(bestFineTuning.shortLabel)}`);
1305
1349
  console.log(chalk.red('│'));
1306
1350
  }
@@ -1326,6 +1370,7 @@ function displayIntelligentRecommendations(intelligentData, hardware = null) {
1326
1370
  console.log(chalk.red('│') + ` ${chalk.green(model.name)} (${model.size})`);
1327
1371
  console.log(chalk.red('│') + ` Score: ${scoreColor.bold(model.score)}/100 | Pulls: ${chalk.gray(model.pulls?.toLocaleString() || 'N/A')}`);
1328
1372
  console.log(chalk.red('│') + ` Quantization: ${chalk.white.bold(model.quantization || 'Q4_K_M')}`);
1373
+ console.log(chalk.red('│') + ` Runtime: ${chalk.cyan(model.runtime || intelligentData.runtime || 'ollama')} | Source: ${chalk.gray(model.source || 'unknown')}`);
1329
1374
  console.log(chalk.red('│') + ` Fine-tuning: ${chalk.blue.bold(fineTuningSupport.shortLabel)}`);
1330
1375
  console.log(chalk.red('│') + ` Command: ${chalk.cyan.bold(model.command)}`);
1331
1376
  console.log(chalk.red('│'));
@@ -3017,7 +3062,7 @@ auditCommand
3017
3062
  .option('-u, --use-case <case>', 'Use case when --command check is selected', 'general')
3018
3063
  .option('-c, --category <category>', 'Category hint when --command recommend is selected')
3019
3064
  .option('--optimize <profile>', 'Optimization profile for recommend mode (balanced|speed|quality|context|coding)', 'balanced')
3020
- .option('--runtime <runtime>', `Runtime for check mode (${SUPPORTED_RUNTIMES.join('|')})`, 'ollama')
3065
+ .option('--runtime <runtime>', 'Runtime for check/recommend mode (auto|ollama|vllm|mlx|llama.cpp|transformers)', 'auto')
3021
3066
  .option('--include-cloud', 'Include cloud models in check-mode analysis')
3022
3067
  .option('--max-size <size>', 'Maximum model size for check mode (e.g., "24B" or "12GB")')
3023
3068
  .option('--min-size <size>', 'Minimum model size for check mode (e.g., "3B" or "2GB")')
@@ -3071,13 +3116,14 @@ auditCommand
3071
3116
  policyCandidates = collectCandidatesFromAnalysis(analysisResult);
3072
3117
  } else {
3073
3118
  recommendationResult = await checker.generateIntelligentRecommendations(hardware, {
3074
- optimizeFor: options.optimize
3119
+ optimizeFor: options.optimize,
3120
+ runtime: options.runtime
3075
3121
  });
3076
3122
  if (!recommendationResult) {
3077
3123
  throw new Error('Unable to generate recommendation data for policy audit export.');
3078
3124
  }
3079
3125
 
3080
- runtimeBackend = normalizeRuntime(options.runtime || 'ollama');
3126
+ runtimeBackend = recommendationResult.runtime || options.runtime || 'auto';
3081
3127
  policyCandidates = collectCandidatesFromRecommendationData(recommendationResult);
3082
3128
  }
3083
3129
 
@@ -3844,6 +3890,8 @@ program
3844
3890
  .description('Get intelligent model recommendations for your hardware')
3845
3891
  .option('-c, --category <category>', 'Get recommendations for specific category (coding, talking, reading, etc.)')
3846
3892
  .option('--optimize <profile>', 'Optimization profile (balanced|speed|quality|context|coding)', 'balanced')
3893
+ .option('--runtime <runtime>', 'Runtime target for registry recommendations (auto|ollama|vllm|mlx|llama.cpp|transformers)', 'auto')
3894
+ .option('--no-registry', 'Use the legacy Ollama catalog recommendation path')
3847
3895
  .option('--no-verbose', 'Disable step-by-step progress display')
3848
3896
  .option('--policy <file>', 'Evaluate recommendations against a policy file')
3849
3897
  .option('--simulate <profile>', 'Simulate a hardware profile instead of detecting real hardware (use "list" to see profiles)')
@@ -3868,6 +3916,11 @@ Hardware simulation:
3868
3916
  $ llm-checker recommend --simulate m4pro24 --category coding
3869
3917
  $ llm-checker recommend --gpu "RTX 5060" --ram 32 --cpu "AMD Ryzen 7 5700X"
3870
3918
 
3919
+ Registry/runtime examples:
3920
+ $ llm-checker recommend --runtime auto --category coding
3921
+ $ llm-checker recommend --runtime vllm --category coding
3922
+ $ llm-checker recommend --runtime mlx --category general
3923
+
3871
3924
  Calibrated routing examples:
3872
3925
  $ llm-checker recommend --calibrated --category coding
3873
3926
  $ llm-checker recommend --calibrated ./calibration-policy.yaml --category reasoning
@@ -3945,7 +3998,9 @@ Calibrated routing examples:
3945
3998
 
3946
3999
  const hardware = await checker.getSystemInfo();
3947
4000
  const intelligentRecommendations = await checker.generateIntelligentRecommendations(hardware, {
3948
- optimizeFor: options.optimize
4001
+ optimizeFor: options.optimize,
4002
+ runtime: options.runtime,
4003
+ registry: options.registry
3949
4004
  });
3950
4005
 
3951
4006
  if (!intelligentRecommendations) {
@@ -4729,6 +4784,307 @@ program
4729
4784
  }
4730
4785
  });
4731
4786
 
4787
+ program
4788
+ .command('registry-sync')
4789
+ .description('Sync the multi-source model registry (Ollama, Hugging Face, GPT4All)')
4790
+ .option('-s, --sources <list>', 'Comma-separated sources: ollama,huggingface,gpt4all', 'ollama,huggingface,gpt4all')
4791
+ .option('-l, --limit <n>', 'Fallback maximum records per source')
4792
+ .option('--hf-limit <n>', 'Maximum Hugging Face repos to ingest', '3000')
4793
+ .option('--ollama-limit <n>', 'Maximum Ollama artifacts to ingest', '10000')
4794
+ .option('--gpt4all-limit <n>', 'Maximum GPT4All entries to ingest', '1000')
4795
+ .option('--query <text>', 'Hugging Face search query')
4796
+ .option('--task <task>', 'Hugging Face task/filter, for example text-generation or text-embeddings-inference')
4797
+ .option('--dry-run', 'Fetch and normalize without writing to the database')
4798
+ .option('-q, --quiet', 'Suppress progress output')
4799
+ .option('-j, --json', 'Output as JSON')
4800
+ .action(async (options) => {
4801
+ const quiet = Boolean(options.quiet || options.json);
4802
+ if (!quiet) showAsciiArt('registry-sync');
4803
+
4804
+ const ModelDatabase = require('../src/data/model-database');
4805
+ const { RegistryIngestor } = require('../src/data/registry-ingestors');
4806
+ const database = new ModelDatabase();
4807
+ const spinner = quiet ? null : ora('Preparing model registry sync...').start();
4808
+
4809
+ try {
4810
+ await database.initialize();
4811
+
4812
+ const ingestor = new RegistryIngestor({
4813
+ database,
4814
+ onProgress: (info) => {
4815
+ if (spinner && info.message) {
4816
+ spinner.text = info.message;
4817
+ }
4818
+ }
4819
+ });
4820
+
4821
+ const summary = await ingestor.ingest({
4822
+ sources: options.sources,
4823
+ limit: parsePositiveNumberOption(options.limit),
4824
+ hfLimit: parsePositiveNumberOption(options.hfLimit, 3000),
4825
+ ollamaLimit: parsePositiveNumberOption(options.ollamaLimit, 10000),
4826
+ gpt4allLimit: parsePositiveNumberOption(options.gpt4allLimit, 1000),
4827
+ query: options.query,
4828
+ task: options.task,
4829
+ dryRun: Boolean(options.dryRun)
4830
+ });
4831
+ const stats = options.dryRun ? null : database.getRegistryStats();
4832
+
4833
+ if (options.json) {
4834
+ console.log(JSON.stringify({ summary, stats }, null, 2));
4835
+ return;
4836
+ }
4837
+
4838
+ if (spinner) {
4839
+ const action = options.dryRun ? 'normalized' : 'synced';
4840
+ spinner.succeed(`Registry ${action}: ${summary.repos} repos, ${summary.artifacts} artifacts`);
4841
+ }
4842
+
4843
+ console.log(chalk.green('\n[OK] Registry sync complete'));
4844
+ console.log(chalk.gray(` Sources touched: ${summary.sources}`));
4845
+ console.log(chalk.gray(` Collections: ${summary.collections}`));
4846
+ console.log(chalk.gray(` Repositories: ${summary.repos}`));
4847
+ console.log(chalk.gray(` Artifacts: ${summary.artifacts}`));
4848
+
4849
+ if (stats) {
4850
+ console.log(chalk.blue.bold('\nRegistry totals:'));
4851
+ console.log(chalk.gray(` Sources: ${stats.sources}`));
4852
+ console.log(chalk.gray(` Repositories: ${stats.repos}`));
4853
+ console.log(chalk.gray(` Artifacts: ${stats.artifacts}`));
4854
+
4855
+ if (stats.bySource.length > 0) {
4856
+ const rows = [['Source', 'Artifacts']];
4857
+ for (const item of stats.bySource) {
4858
+ rows.push([item.source_id, String(item.artifact_count)]);
4859
+ }
4860
+ console.log('\n' + table(rows));
4861
+ }
4862
+ }
4863
+
4864
+ console.log(chalk.cyan('Try: llm-checker registry-search llama --runtime auto --limit 10'));
4865
+ } catch (error) {
4866
+ if (spinner) spinner.fail('Registry sync failed');
4867
+ console.error(chalk.red('Error:'), error.message);
4868
+ if (process.env.DEBUG) console.error(error.stack);
4869
+ process.exitCode = 1;
4870
+ } finally {
4871
+ database.close();
4872
+ }
4873
+ });
4874
+
4875
+ program
4876
+ .command('registry-search [query]')
4877
+ .description('Search exact downloadable/installable artifacts in the multi-source model registry')
4878
+ .option('-s, --source <source>', 'Filter by source: ollama, huggingface, gpt4all')
4879
+ .option('--format <format>', 'Filter by artifact format: gguf, safetensors, mlx, ollama')
4880
+ .option('--runtime <runtime>', 'Filter by runtime support: auto, ollama, llama.cpp, transformers, vllm, mlx')
4881
+ .option('--quant <type>', 'Filter by quantization, for example Q4_K_M or Q8_0')
4882
+ .option('--max-size <gb>', 'Maximum artifact size in GB')
4883
+ .option('--min-params <billion>', 'Minimum parameter count in billions')
4884
+ .option('--max-params <billion>', 'Maximum parameter count in billions')
4885
+ .option('--local-only', 'Exclude gated/auth-required artifacts')
4886
+ .option('-l, --limit <n>', 'Maximum number of results', '20')
4887
+ .option('-j, --json', 'Output as JSON')
4888
+ .action(async (query = '', options) => {
4889
+ if (!options.json) showAsciiArt('registry-search');
4890
+
4891
+ const ModelDatabase = require('../src/data/model-database');
4892
+ const database = new ModelDatabase();
4893
+
4894
+ try {
4895
+ await database.initialize();
4896
+
4897
+ const filters = {
4898
+ source: options.source,
4899
+ format: options.format ? String(options.format).toLowerCase() : undefined,
4900
+ runtime: options.runtime,
4901
+ quantization: options.quant,
4902
+ maxSizeGB: parsePositiveNumberOption(options.maxSize),
4903
+ minParamsB: parsePositiveNumberOption(options.minParams),
4904
+ maxParamsB: parsePositiveNumberOption(options.maxParams),
4905
+ localOnly: Boolean(options.localOnly),
4906
+ limit: parsePositiveNumberOption(options.limit, 20)
4907
+ };
4908
+ const results = database.searchModelArtifacts(query, filters);
4909
+ const stats = database.getRegistryStats();
4910
+
4911
+ if (options.json) {
4912
+ console.log(JSON.stringify({
4913
+ query,
4914
+ filters,
4915
+ count: results.length,
4916
+ stats,
4917
+ results
4918
+ }, null, 2));
4919
+ return;
4920
+ }
4921
+
4922
+ if (results.length === 0) {
4923
+ console.log(chalk.yellow('No registry artifacts found.'));
4924
+ if (stats.artifacts === 0) {
4925
+ console.log(chalk.gray('Populate the registry first with: llm-checker registry-sync'));
4926
+ }
4927
+ return;
4928
+ }
4929
+
4930
+ console.log(chalk.blue.bold('\nRegistry Results'));
4931
+ console.log(chalk.gray(`Stored registry: ${stats.artifacts} artifacts across ${stats.sources} sources`));
4932
+ console.log('');
4933
+
4934
+ const rows = [[
4935
+ 'Source',
4936
+ 'Model',
4937
+ 'Artifact',
4938
+ 'Params',
4939
+ 'Size',
4940
+ 'Format',
4941
+ 'Runtime',
4942
+ 'Install'
4943
+ ]];
4944
+
4945
+ for (const item of results) {
4946
+ rows.push([
4947
+ item.source_id,
4948
+ truncateMiddle(item.canonical_model_id, 34),
4949
+ truncateMiddle(item.artifact_name || item.filename, 34),
4950
+ formatRegistryNumber(item.parameter_count_b, 'B'),
4951
+ formatRegistrySize(item.size_gb),
4952
+ item.quantization ? `${item.format}/${item.quantization}` : item.format,
4953
+ formatRegistryList(item.runtime_support, 2),
4954
+ truncateMiddle(item.install_command || item.download_url, 46)
4955
+ ]);
4956
+ }
4957
+
4958
+ console.log(table(rows));
4959
+
4960
+ const links = results
4961
+ .filter((item) => item.download_url)
4962
+ .slice(0, 5);
4963
+ if (links.length > 0) {
4964
+ console.log(chalk.blue.bold('Exact download links:'));
4965
+ links.forEach((item, index) => {
4966
+ console.log(chalk.gray(` ${index + 1}. ${item.canonical_model_id} -> ${item.download_url}`));
4967
+ });
4968
+ }
4969
+ } catch (error) {
4970
+ console.error(chalk.red('Error:'), error.message);
4971
+ if (process.env.DEBUG) console.error(error.stack);
4972
+ process.exitCode = 1;
4973
+ } finally {
4974
+ database.close();
4975
+ }
4976
+ });
4977
+
4978
+ program
4979
+ .command('registry-recommend [query]')
4980
+ .description('Recommend the best exact model artifacts from the multi-source registry for this hardware')
4981
+ .option('-c, --category <category>', 'Task category (general, coding, reasoning, embeddings, multimodal)', 'general')
4982
+ .option('--optimize <profile>', 'Optimization profile (balanced|speed|quality|context|coding)', 'balanced')
4983
+ .option('--runtime <runtime>', 'Runtime target: auto, ollama, llama.cpp, vllm, mlx, transformers', 'auto')
4984
+ .option('-s, --source <source>', 'Filter by source: ollama, huggingface, gpt4all')
4985
+ .option('--format <format>', 'Filter by artifact format: gguf, safetensors, mlx, ollama')
4986
+ .option('--quant <type>', 'Filter by quantization, for example Q4_K_M or Q8_0')
4987
+ .option('--max-size <gb>', 'Maximum artifact size in GB')
4988
+ .option('--min-params <billion>', 'Minimum parameter count in billions')
4989
+ .option('--max-params <billion>', 'Maximum parameter count in billions')
4990
+ .option('--target-context <tokens>', 'Target context window for scoring')
4991
+ .option('--include-gated', 'Include gated/auth-required artifacts')
4992
+ .option('--pool-limit <n>', 'Maximum registry artifacts to score before ranking', '20000')
4993
+ .option('-l, --limit <n>', 'Maximum number of recommendations', '10')
4994
+ .option('-j, --json', 'Output as JSON')
4995
+ .action(async (query = '', options) => {
4996
+ if (!options.json) showAsciiArt('registry-recommend');
4997
+
4998
+ const UnifiedDetector = require('../src/hardware/unified-detector');
4999
+ const { RegistryRecommender } = require('../src/data/registry-recommender');
5000
+ const recommender = new RegistryRecommender();
5001
+ const spinner = options.json ? null : ora('Scoring registry artifacts...').start();
5002
+
5003
+ try {
5004
+ await recommender.initialize();
5005
+
5006
+ const detector = new UnifiedDetector();
5007
+ const hardware = await detector.detect();
5008
+ const category = normalizeTaskName(options.category || 'general');
5009
+ const result = await recommender.recommend({
5010
+ query,
5011
+ category,
5012
+ optimizeFor: options.optimize,
5013
+ runtime: options.runtime,
5014
+ source: options.source,
5015
+ format: options.format ? String(options.format).toLowerCase() : undefined,
5016
+ quantization: options.quant,
5017
+ maxSizeGB: parsePositiveNumberOption(options.maxSize),
5018
+ minParamsB: parsePositiveNumberOption(options.minParams),
5019
+ maxParamsB: parsePositiveNumberOption(options.maxParams),
5020
+ targetContext: parsePositiveNumberOption(options.targetContext),
5021
+ localOnly: !options.includeGated,
5022
+ poolLimit: parsePositiveNumberOption(options.poolLimit, 20000),
5023
+ limit: parsePositiveNumberOption(options.limit, 10),
5024
+ hardware
5025
+ });
5026
+
5027
+ if (options.json) {
5028
+ console.log(JSON.stringify({
5029
+ query,
5030
+ hardware: hardware.summary || hardware,
5031
+ ...result
5032
+ }, null, 2));
5033
+ return;
5034
+ }
5035
+
5036
+ if (spinner) {
5037
+ spinner.succeed(
5038
+ `Scored ${result.total_evaluated} candidates from ${result.total_artifacts} registry artifacts`
5039
+ );
5040
+ }
5041
+
5042
+ if (result.recommendations.length === 0) {
5043
+ console.log(chalk.yellow('No registry recommendations found for those filters.'));
5044
+ if (result.registry.artifacts === 0) {
5045
+ console.log(chalk.gray('Populate the registry first with: llm-checker registry-sync'));
5046
+ }
5047
+ return;
5048
+ }
5049
+
5050
+ console.log(chalk.blue.bold('\nRegistry Recommendations'));
5051
+ console.log(chalk.gray(`Registry: ${result.registry.repos} repos, ${result.registry.artifacts} artifacts`));
5052
+ console.log(chalk.gray(`Runtime: ${result.runtime} | Category: ${result.category} | Optimize: ${result.optimizeFor}`));
5053
+ console.log('');
5054
+
5055
+ const rows = [['#', 'Score', 'Source', 'Model', 'Artifact', 'Params', 'Size', 'Install']];
5056
+ result.recommendations.forEach((item, index) => {
5057
+ rows.push([
5058
+ String(index + 1),
5059
+ String(item.score),
5060
+ item.source,
5061
+ truncateMiddle(item.model, 30),
5062
+ truncateMiddle(item.artifact, 32),
5063
+ formatRegistryNumber(item.params_b, 'B'),
5064
+ formatRegistrySize(item.size_gb),
5065
+ truncateMiddle(item.install_command || item.download_url, 44)
5066
+ ]);
5067
+ });
5068
+
5069
+ console.log(table(rows));
5070
+
5071
+ console.log(chalk.blue.bold('Top pick:'));
5072
+ const best = result.recommendations[0];
5073
+ console.log(chalk.white.bold(` ${best.model}`));
5074
+ console.log(chalk.gray(` Artifact: ${best.artifact}`));
5075
+ console.log(chalk.gray(` Why: ${best.rationale}`));
5076
+ if (best.install_command) console.log(chalk.cyan(` ${best.install_command}`));
5077
+ if (best.download_url) console.log(chalk.gray(` ${best.download_url}`));
5078
+ } catch (error) {
5079
+ if (spinner) spinner.fail('Registry recommendation failed');
5080
+ console.error(chalk.red('Error:'), error.message);
5081
+ if (process.env.DEBUG) console.error(error.stack);
5082
+ process.exitCode = 1;
5083
+ } finally {
5084
+ recommender.close();
5085
+ }
5086
+ });
5087
+
4732
5088
  program
4733
5089
  .command('search <query>')
4734
5090
  .description('Search models in the database with intelligent scoring')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.6.1",
3
+ "version": "3.7.0",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
6
  "llm-checker": "bin/cli.js",
@@ -16,6 +16,10 @@
16
16
  "test:ui": "node tests/ui-cli-smoke.test.js",
17
17
  "test:runtime": "node tests/runtime-specdec-tests.js",
18
18
  "test:deterministic-pool": "node tests/deterministic-model-pool-check.js",
19
+ "test:registry": "node tests/model-registry-ingestors.test.js",
20
+ "test:registry-main": "node tests/model-registry-main-flow.test.js",
21
+ "test:registry-recommender": "node tests/model-registry-recommender.test.js",
22
+ "test:registry-seed": "node tests/model-registry-seed.test.js",
19
23
  "test:policy": "node tests/policy-commands.test.js",
20
24
  "test:policy-cli": "node tests/policy-cli-enforcement.js",
21
25
  "test:policy-engine": "node tests/policy-engine.test.js",
@@ -36,7 +40,8 @@
36
40
  "list-models": "node bin/enhanced_cli.js list-models",
37
41
  "ai-check": "node bin/enhanced_cli.js ai-check",
38
42
  "ai-run": "node bin/enhanced_cli.js ai-run",
39
- "sync:seed": "node bin/enhanced_cli.js sync --force --quiet && node scripts/update-seed-db.js",
43
+ "sync:seed": "node bin/enhanced_cli.js sync --force --quiet && node scripts/update-seed-db.js && node scripts/update-registry-seed.js",
44
+ "sync:registry-seed": "node scripts/update-registry-seed.js",
40
45
  "benchmark": "cd ml-model && python python/benchmark_collector.py",
41
46
  "train-ai": "cd ml-model && python python/train_model.py",
42
47
  "postinstall": "echo 'LLM Checker installed. Run: llm-checker hw-detect'"