llm-checker 3.6.1 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -7
- package/bin/enhanced_cli.js +361 -5
- package/package.json +7 -2
- package/src/data/model-database.js +450 -0
- package/src/data/registry-ingestors.js +751 -0
- package/src/data/registry-recommender.js +514 -0
- package/src/data/seed/README.md +11 -3
- package/src/data/seed/models.db +0 -0
- package/src/index.js +68 -4
- package/src/models/deterministic-selector.js +16 -3
- package/src/models/moe-assumptions.js +11 -0
package/README.md
CHANGED
|
@@ -641,30 +641,36 @@ llm-checker search qwen --quant Q4_K_M --max-size 8
|
|
|
641
641
|
|
|
642
642
|
## Model Catalog
|
|
643
643
|
|
|
644
|
-
LLM Checker ships with a pre-synced SQLite snapshot of the Ollama catalog. On first run, that snapshot is copied to `~/.llm-checker/models.db`, so recommendations and catalog search work immediately after npm install.
|
|
644
|
+
LLM Checker ships with a pre-synced SQLite snapshot of the Ollama catalog plus a multi-source registry of exact downloadable/installable model artifacts. On first run, that snapshot is copied to `~/.llm-checker/models.db`, so recommendations and catalog search work immediately after npm install.
|
|
645
645
|
|
|
646
646
|
The packaged snapshot currently includes:
|
|
647
647
|
|
|
648
648
|
- 229 Ollama models
|
|
649
649
|
- 7176 variants
|
|
650
|
+
- 3259 multi-source registry repositories
|
|
651
|
+
- 33729 exact model artifacts from Hugging Face, Ollama, and GPT4All
|
|
652
|
+
- Hugging Face top 3000 repositories by downloads, fetched with API pagination
|
|
650
653
|
- pull counts
|
|
651
654
|
- tag counts
|
|
652
655
|
- last-updated metadata
|
|
653
|
-
- variant params, quantization, size, context,
|
|
656
|
+
- variant params, quantization, size, context, runtime, install commands, download URLs, license/gated flags, tasks, and modalities when available
|
|
654
657
|
|
|
655
658
|
Refresh it any time:
|
|
656
659
|
|
|
657
660
|
```bash
|
|
658
661
|
llm-checker sync
|
|
662
|
+
llm-checker registry-sync --sources ollama,huggingface,gpt4all
|
|
663
|
+
llm-checker registry-search qwen --runtime auto --max-size 8
|
|
664
|
+
llm-checker registry-recommend --category coding --runtime auto --max-size 8
|
|
659
665
|
```
|
|
660
666
|
|
|
661
|
-
For release maintainers, the packaged seed can be regenerated from the synced local DB:
|
|
667
|
+
For release maintainers, the packaged seed can be regenerated from the synced local DB and registry APIs:
|
|
662
668
|
|
|
663
669
|
```bash
|
|
664
670
|
npm run sync:seed
|
|
665
671
|
```
|
|
666
672
|
|
|
667
|
-
`recommend`, `list-models`, `ai-run`, and `ai-check` prefer the synced SQLite catalog. If the SQLite catalog is unavailable, LLM Checker falls back to the scraped cache and then to the curated catalog.
|
|
673
|
+
`recommend`, `list-models`, `ai-run`, and `ai-check` prefer the synced SQLite catalog. `registry-search` queries exact artifacts across sources, and `registry-recommend` ranks exact artifacts from the registry with the deterministic hardware-aware selector. If the SQLite catalog is unavailable, LLM Checker falls back to the scraped cache and then to the curated catalog.
|
|
668
674
|
|
|
669
675
|
The curated fallback catalog includes 35+ models from the most popular Ollama families:
|
|
670
676
|
|
|
@@ -836,7 +842,7 @@ LLM Checker uses a deterministic pipeline so the same inputs produce the same ra
|
|
|
836
842
|
flowchart LR
|
|
837
843
|
subgraph Inputs
|
|
838
844
|
HW["Hardware detector<br/>CPU/GPU/RAM/backend"]
|
|
839
|
-
REG["Synced SQLite
|
|
845
|
+
REG["Synced SQLite model catalog<br/>(Ollama seed + multi-source registry)"]
|
|
840
846
|
LOCAL["Installed local models"]
|
|
841
847
|
FLAGS["CLI options<br/>use-case/runtime/limits/policy"]
|
|
842
848
|
end
|
|
@@ -952,8 +958,9 @@ src/
|
|
|
952
958
|
detector.js # Hardware detection
|
|
953
959
|
unified-detector.js # Cross-platform detection
|
|
954
960
|
data/
|
|
955
|
-
model-database.js # SQLite storage and packaged seed loading
|
|
956
|
-
|
|
961
|
+
model-database.js # SQLite storage, registry tables, and packaged seed loading
|
|
962
|
+
registry-ingestors.js # Ollama/Hugging Face/GPT4All artifact normalization
|
|
963
|
+
seed/models.db # npm-packaged Ollama + multi-source registry snapshot
|
|
957
964
|
sync-manager.js # Database sync from Ollama registry
|
|
958
965
|
bin/
|
|
959
966
|
enhanced_cli.js # CLI entry point
|
package/bin/enhanced_cli.js
CHANGED
|
@@ -61,6 +61,9 @@ const COMMAND_HEADER_LABELS = {
|
|
|
61
61
|
'smart-recommend': 'Smart Recommend (Experimental)',
|
|
62
62
|
search: 'Model Search',
|
|
63
63
|
sync: 'Database Sync',
|
|
64
|
+
'registry-sync': 'Model Registry Sync',
|
|
65
|
+
'registry-search': 'Model Registry Search',
|
|
66
|
+
'registry-recommend': 'Registry Recommendations',
|
|
64
67
|
'mcp-setup': 'Claude MCP Setup',
|
|
65
68
|
check: 'Compatibility Check',
|
|
66
69
|
installed: 'Installed Models',
|
|
@@ -401,6 +404,41 @@ function getRealSizeFromOllamaCache(model) {
|
|
|
401
404
|
}
|
|
402
405
|
}
|
|
403
406
|
|
|
407
|
+
function parsePositiveNumberOption(value, fallback = null) {
|
|
408
|
+
if (value === undefined || value === null || value === '') return fallback;
|
|
409
|
+
const parsed = Number(value);
|
|
410
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
function truncateMiddle(value, maxLength = 48) {
|
|
414
|
+
const text = String(value || '');
|
|
415
|
+
if (text.length <= maxLength) return text;
|
|
416
|
+
if (maxLength <= 4) return text.slice(0, maxLength);
|
|
417
|
+
const head = Math.ceil((maxLength - 3) / 2);
|
|
418
|
+
const tail = Math.floor((maxLength - 3) / 2);
|
|
419
|
+
return `${text.slice(0, head)}...${text.slice(text.length - tail)}`;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
function formatRegistryNumber(value, suffix = '') {
|
|
423
|
+
const parsed = Number(value);
|
|
424
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return '?';
|
|
425
|
+
const rounded = parsed >= 100 ? Math.round(parsed) : Math.round(parsed * 10) / 10;
|
|
426
|
+
return `${rounded}${suffix}`;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
function formatRegistrySize(value) {
|
|
430
|
+
const parsed = Number(value);
|
|
431
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return '?';
|
|
432
|
+
return `${Math.round(parsed * 100) / 100}GB`;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
function formatRegistryList(value, maxItems = 3) {
|
|
436
|
+
const items = Array.isArray(value) ? value : [];
|
|
437
|
+
if (items.length === 0) return '-';
|
|
438
|
+
const shown = items.slice(0, maxItems).join(', ');
|
|
439
|
+
return items.length > maxItems ? `${shown}, +${items.length - maxItems}` : shown;
|
|
440
|
+
}
|
|
441
|
+
|
|
404
442
|
const program = new Command();
|
|
405
443
|
|
|
406
444
|
program
|
|
@@ -1285,12 +1323,17 @@ function displayIntelligentRecommendations(intelligentData, hardware = null) {
|
|
|
1285
1323
|
const { summary, recommendations } = intelligentData;
|
|
1286
1324
|
const tier = summary.hardware_tier.replace('_', ' ').toUpperCase();
|
|
1287
1325
|
const optimizeProfile = (summary.optimize_for || intelligentData.optimizeFor || 'balanced').toUpperCase();
|
|
1326
|
+
const runtimeLabel = (intelligentData.runtime || summary.best_overall?.runtime || 'auto').toUpperCase();
|
|
1327
|
+
const sourceLabel = intelligentData.recommendationSource === 'registry'
|
|
1328
|
+
? 'Multi-source registry'
|
|
1329
|
+
: 'Ollama catalog';
|
|
1288
1330
|
const tierColor = tier.includes('HIGH') ? chalk.green : tier.includes('MEDIUM') ? chalk.yellow : chalk.red;
|
|
1289
1331
|
|
|
1290
1332
|
console.log('\n' + chalk.bgRed.white.bold(' INTELLIGENT RECOMMENDATIONS BY CATEGORY '));
|
|
1291
1333
|
console.log(chalk.red('╭' + '─'.repeat(65)));
|
|
1292
1334
|
console.log(chalk.red('│') + ` Hardware Tier: ${tierColor.bold(tier)} | Models Analyzed: ${chalk.cyan.bold(intelligentData.totalModelsAnalyzed)}`);
|
|
1293
|
-
console.log(chalk.red('│') + ` Optimization: ${chalk.magenta.bold(optimizeProfile)}`);
|
|
1335
|
+
console.log(chalk.red('│') + ` Optimization: ${chalk.magenta.bold(optimizeProfile)} | Runtime: ${chalk.cyan.bold(runtimeLabel)}`);
|
|
1336
|
+
console.log(chalk.red('│') + ` Source: ${chalk.white.bold(sourceLabel)}`);
|
|
1294
1337
|
console.log(chalk.red('│'));
|
|
1295
1338
|
|
|
1296
1339
|
// Mostrar mejor modelo general
|
|
@@ -1301,6 +1344,7 @@ function displayIntelligentRecommendations(intelligentData, hardware = null) {
|
|
|
1301
1344
|
console.log(chalk.red('│') + ` Command: ${chalk.cyan.bold(best.command)}`);
|
|
1302
1345
|
console.log(chalk.red('│') + ` Score: ${chalk.yellow.bold(best.score)}/100 | Category: ${chalk.magenta(best.category)}`);
|
|
1303
1346
|
console.log(chalk.red('│') + ` Quantization: ${chalk.white.bold(best.quantization || 'Q4_K_M')}`);
|
|
1347
|
+
console.log(chalk.red('│') + ` Runtime: ${chalk.cyan.bold(best.runtime || intelligentData.runtime || 'ollama')} | Source: ${chalk.gray(best.source || 'unknown')}`);
|
|
1304
1348
|
console.log(chalk.red('│') + ` Fine-tuning: ${chalk.blue.bold(bestFineTuning.shortLabel)}`);
|
|
1305
1349
|
console.log(chalk.red('│'));
|
|
1306
1350
|
}
|
|
@@ -1326,6 +1370,7 @@ function displayIntelligentRecommendations(intelligentData, hardware = null) {
|
|
|
1326
1370
|
console.log(chalk.red('│') + ` ${chalk.green(model.name)} (${model.size})`);
|
|
1327
1371
|
console.log(chalk.red('│') + ` Score: ${scoreColor.bold(model.score)}/100 | Pulls: ${chalk.gray(model.pulls?.toLocaleString() || 'N/A')}`);
|
|
1328
1372
|
console.log(chalk.red('│') + ` Quantization: ${chalk.white.bold(model.quantization || 'Q4_K_M')}`);
|
|
1373
|
+
console.log(chalk.red('│') + ` Runtime: ${chalk.cyan(model.runtime || intelligentData.runtime || 'ollama')} | Source: ${chalk.gray(model.source || 'unknown')}`);
|
|
1329
1374
|
console.log(chalk.red('│') + ` Fine-tuning: ${chalk.blue.bold(fineTuningSupport.shortLabel)}`);
|
|
1330
1375
|
console.log(chalk.red('│') + ` Command: ${chalk.cyan.bold(model.command)}`);
|
|
1331
1376
|
console.log(chalk.red('│'));
|
|
@@ -3017,7 +3062,7 @@ auditCommand
|
|
|
3017
3062
|
.option('-u, --use-case <case>', 'Use case when --command check is selected', 'general')
|
|
3018
3063
|
.option('-c, --category <category>', 'Category hint when --command recommend is selected')
|
|
3019
3064
|
.option('--optimize <profile>', 'Optimization profile for recommend mode (balanced|speed|quality|context|coding)', 'balanced')
|
|
3020
|
-
.option('--runtime <runtime>',
|
|
3065
|
+
.option('--runtime <runtime>', 'Runtime for check/recommend mode (auto|ollama|vllm|mlx|llama.cpp|transformers)', 'auto')
|
|
3021
3066
|
.option('--include-cloud', 'Include cloud models in check-mode analysis')
|
|
3022
3067
|
.option('--max-size <size>', 'Maximum model size for check mode (e.g., "24B" or "12GB")')
|
|
3023
3068
|
.option('--min-size <size>', 'Minimum model size for check mode (e.g., "3B" or "2GB")')
|
|
@@ -3071,13 +3116,14 @@ auditCommand
|
|
|
3071
3116
|
policyCandidates = collectCandidatesFromAnalysis(analysisResult);
|
|
3072
3117
|
} else {
|
|
3073
3118
|
recommendationResult = await checker.generateIntelligentRecommendations(hardware, {
|
|
3074
|
-
optimizeFor: options.optimize
|
|
3119
|
+
optimizeFor: options.optimize,
|
|
3120
|
+
runtime: options.runtime
|
|
3075
3121
|
});
|
|
3076
3122
|
if (!recommendationResult) {
|
|
3077
3123
|
throw new Error('Unable to generate recommendation data for policy audit export.');
|
|
3078
3124
|
}
|
|
3079
3125
|
|
|
3080
|
-
runtimeBackend =
|
|
3126
|
+
runtimeBackend = recommendationResult.runtime || options.runtime || 'auto';
|
|
3081
3127
|
policyCandidates = collectCandidatesFromRecommendationData(recommendationResult);
|
|
3082
3128
|
}
|
|
3083
3129
|
|
|
@@ -3844,6 +3890,8 @@ program
|
|
|
3844
3890
|
.description('Get intelligent model recommendations for your hardware')
|
|
3845
3891
|
.option('-c, --category <category>', 'Get recommendations for specific category (coding, talking, reading, etc.)')
|
|
3846
3892
|
.option('--optimize <profile>', 'Optimization profile (balanced|speed|quality|context|coding)', 'balanced')
|
|
3893
|
+
.option('--runtime <runtime>', 'Runtime target for registry recommendations (auto|ollama|vllm|mlx|llama.cpp|transformers)', 'auto')
|
|
3894
|
+
.option('--no-registry', 'Use the legacy Ollama catalog recommendation path')
|
|
3847
3895
|
.option('--no-verbose', 'Disable step-by-step progress display')
|
|
3848
3896
|
.option('--policy <file>', 'Evaluate recommendations against a policy file')
|
|
3849
3897
|
.option('--simulate <profile>', 'Simulate a hardware profile instead of detecting real hardware (use "list" to see profiles)')
|
|
@@ -3868,6 +3916,11 @@ Hardware simulation:
|
|
|
3868
3916
|
$ llm-checker recommend --simulate m4pro24 --category coding
|
|
3869
3917
|
$ llm-checker recommend --gpu "RTX 5060" --ram 32 --cpu "AMD Ryzen 7 5700X"
|
|
3870
3918
|
|
|
3919
|
+
Registry/runtime examples:
|
|
3920
|
+
$ llm-checker recommend --runtime auto --category coding
|
|
3921
|
+
$ llm-checker recommend --runtime vllm --category coding
|
|
3922
|
+
$ llm-checker recommend --runtime mlx --category general
|
|
3923
|
+
|
|
3871
3924
|
Calibrated routing examples:
|
|
3872
3925
|
$ llm-checker recommend --calibrated --category coding
|
|
3873
3926
|
$ llm-checker recommend --calibrated ./calibration-policy.yaml --category reasoning
|
|
@@ -3945,7 +3998,9 @@ Calibrated routing examples:
|
|
|
3945
3998
|
|
|
3946
3999
|
const hardware = await checker.getSystemInfo();
|
|
3947
4000
|
const intelligentRecommendations = await checker.generateIntelligentRecommendations(hardware, {
|
|
3948
|
-
optimizeFor: options.optimize
|
|
4001
|
+
optimizeFor: options.optimize,
|
|
4002
|
+
runtime: options.runtime,
|
|
4003
|
+
registry: options.registry
|
|
3949
4004
|
});
|
|
3950
4005
|
|
|
3951
4006
|
if (!intelligentRecommendations) {
|
|
@@ -4729,6 +4784,307 @@ program
|
|
|
4729
4784
|
}
|
|
4730
4785
|
});
|
|
4731
4786
|
|
|
4787
|
+
program
|
|
4788
|
+
.command('registry-sync')
|
|
4789
|
+
.description('Sync the multi-source model registry (Ollama, Hugging Face, GPT4All)')
|
|
4790
|
+
.option('-s, --sources <list>', 'Comma-separated sources: ollama,huggingface,gpt4all', 'ollama,huggingface,gpt4all')
|
|
4791
|
+
.option('-l, --limit <n>', 'Fallback maximum records per source')
|
|
4792
|
+
.option('--hf-limit <n>', 'Maximum Hugging Face repos to ingest', '3000')
|
|
4793
|
+
.option('--ollama-limit <n>', 'Maximum Ollama artifacts to ingest', '10000')
|
|
4794
|
+
.option('--gpt4all-limit <n>', 'Maximum GPT4All entries to ingest', '1000')
|
|
4795
|
+
.option('--query <text>', 'Hugging Face search query')
|
|
4796
|
+
.option('--task <task>', 'Hugging Face task/filter, for example text-generation or text-embeddings-inference')
|
|
4797
|
+
.option('--dry-run', 'Fetch and normalize without writing to the database')
|
|
4798
|
+
.option('-q, --quiet', 'Suppress progress output')
|
|
4799
|
+
.option('-j, --json', 'Output as JSON')
|
|
4800
|
+
.action(async (options) => {
|
|
4801
|
+
const quiet = Boolean(options.quiet || options.json);
|
|
4802
|
+
if (!quiet) showAsciiArt('registry-sync');
|
|
4803
|
+
|
|
4804
|
+
const ModelDatabase = require('../src/data/model-database');
|
|
4805
|
+
const { RegistryIngestor } = require('../src/data/registry-ingestors');
|
|
4806
|
+
const database = new ModelDatabase();
|
|
4807
|
+
const spinner = quiet ? null : ora('Preparing model registry sync...').start();
|
|
4808
|
+
|
|
4809
|
+
try {
|
|
4810
|
+
await database.initialize();
|
|
4811
|
+
|
|
4812
|
+
const ingestor = new RegistryIngestor({
|
|
4813
|
+
database,
|
|
4814
|
+
onProgress: (info) => {
|
|
4815
|
+
if (spinner && info.message) {
|
|
4816
|
+
spinner.text = info.message;
|
|
4817
|
+
}
|
|
4818
|
+
}
|
|
4819
|
+
});
|
|
4820
|
+
|
|
4821
|
+
const summary = await ingestor.ingest({
|
|
4822
|
+
sources: options.sources,
|
|
4823
|
+
limit: parsePositiveNumberOption(options.limit),
|
|
4824
|
+
hfLimit: parsePositiveNumberOption(options.hfLimit, 3000),
|
|
4825
|
+
ollamaLimit: parsePositiveNumberOption(options.ollamaLimit, 10000),
|
|
4826
|
+
gpt4allLimit: parsePositiveNumberOption(options.gpt4allLimit, 1000),
|
|
4827
|
+
query: options.query,
|
|
4828
|
+
task: options.task,
|
|
4829
|
+
dryRun: Boolean(options.dryRun)
|
|
4830
|
+
});
|
|
4831
|
+
const stats = options.dryRun ? null : database.getRegistryStats();
|
|
4832
|
+
|
|
4833
|
+
if (options.json) {
|
|
4834
|
+
console.log(JSON.stringify({ summary, stats }, null, 2));
|
|
4835
|
+
return;
|
|
4836
|
+
}
|
|
4837
|
+
|
|
4838
|
+
if (spinner) {
|
|
4839
|
+
const action = options.dryRun ? 'normalized' : 'synced';
|
|
4840
|
+
spinner.succeed(`Registry ${action}: ${summary.repos} repos, ${summary.artifacts} artifacts`);
|
|
4841
|
+
}
|
|
4842
|
+
|
|
4843
|
+
console.log(chalk.green('\n[OK] Registry sync complete'));
|
|
4844
|
+
console.log(chalk.gray(` Sources touched: ${summary.sources}`));
|
|
4845
|
+
console.log(chalk.gray(` Collections: ${summary.collections}`));
|
|
4846
|
+
console.log(chalk.gray(` Repositories: ${summary.repos}`));
|
|
4847
|
+
console.log(chalk.gray(` Artifacts: ${summary.artifacts}`));
|
|
4848
|
+
|
|
4849
|
+
if (stats) {
|
|
4850
|
+
console.log(chalk.blue.bold('\nRegistry totals:'));
|
|
4851
|
+
console.log(chalk.gray(` Sources: ${stats.sources}`));
|
|
4852
|
+
console.log(chalk.gray(` Repositories: ${stats.repos}`));
|
|
4853
|
+
console.log(chalk.gray(` Artifacts: ${stats.artifacts}`));
|
|
4854
|
+
|
|
4855
|
+
if (stats.bySource.length > 0) {
|
|
4856
|
+
const rows = [['Source', 'Artifacts']];
|
|
4857
|
+
for (const item of stats.bySource) {
|
|
4858
|
+
rows.push([item.source_id, String(item.artifact_count)]);
|
|
4859
|
+
}
|
|
4860
|
+
console.log('\n' + table(rows));
|
|
4861
|
+
}
|
|
4862
|
+
}
|
|
4863
|
+
|
|
4864
|
+
console.log(chalk.cyan('Try: llm-checker registry-search llama --runtime auto --limit 10'));
|
|
4865
|
+
} catch (error) {
|
|
4866
|
+
if (spinner) spinner.fail('Registry sync failed');
|
|
4867
|
+
console.error(chalk.red('Error:'), error.message);
|
|
4868
|
+
if (process.env.DEBUG) console.error(error.stack);
|
|
4869
|
+
process.exitCode = 1;
|
|
4870
|
+
} finally {
|
|
4871
|
+
database.close();
|
|
4872
|
+
}
|
|
4873
|
+
});
|
|
4874
|
+
|
|
4875
|
+
program
|
|
4876
|
+
.command('registry-search [query]')
|
|
4877
|
+
.description('Search exact downloadable/installable artifacts in the multi-source model registry')
|
|
4878
|
+
.option('-s, --source <source>', 'Filter by source: ollama, huggingface, gpt4all')
|
|
4879
|
+
.option('--format <format>', 'Filter by artifact format: gguf, safetensors, mlx, ollama')
|
|
4880
|
+
.option('--runtime <runtime>', 'Filter by runtime support: auto, ollama, llama.cpp, transformers, vllm, mlx')
|
|
4881
|
+
.option('--quant <type>', 'Filter by quantization, for example Q4_K_M or Q8_0')
|
|
4882
|
+
.option('--max-size <gb>', 'Maximum artifact size in GB')
|
|
4883
|
+
.option('--min-params <billion>', 'Minimum parameter count in billions')
|
|
4884
|
+
.option('--max-params <billion>', 'Maximum parameter count in billions')
|
|
4885
|
+
.option('--local-only', 'Exclude gated/auth-required artifacts')
|
|
4886
|
+
.option('-l, --limit <n>', 'Maximum number of results', '20')
|
|
4887
|
+
.option('-j, --json', 'Output as JSON')
|
|
4888
|
+
.action(async (query = '', options) => {
|
|
4889
|
+
if (!options.json) showAsciiArt('registry-search');
|
|
4890
|
+
|
|
4891
|
+
const ModelDatabase = require('../src/data/model-database');
|
|
4892
|
+
const database = new ModelDatabase();
|
|
4893
|
+
|
|
4894
|
+
try {
|
|
4895
|
+
await database.initialize();
|
|
4896
|
+
|
|
4897
|
+
const filters = {
|
|
4898
|
+
source: options.source,
|
|
4899
|
+
format: options.format ? String(options.format).toLowerCase() : undefined,
|
|
4900
|
+
runtime: options.runtime,
|
|
4901
|
+
quantization: options.quant,
|
|
4902
|
+
maxSizeGB: parsePositiveNumberOption(options.maxSize),
|
|
4903
|
+
minParamsB: parsePositiveNumberOption(options.minParams),
|
|
4904
|
+
maxParamsB: parsePositiveNumberOption(options.maxParams),
|
|
4905
|
+
localOnly: Boolean(options.localOnly),
|
|
4906
|
+
limit: parsePositiveNumberOption(options.limit, 20)
|
|
4907
|
+
};
|
|
4908
|
+
const results = database.searchModelArtifacts(query, filters);
|
|
4909
|
+
const stats = database.getRegistryStats();
|
|
4910
|
+
|
|
4911
|
+
if (options.json) {
|
|
4912
|
+
console.log(JSON.stringify({
|
|
4913
|
+
query,
|
|
4914
|
+
filters,
|
|
4915
|
+
count: results.length,
|
|
4916
|
+
stats,
|
|
4917
|
+
results
|
|
4918
|
+
}, null, 2));
|
|
4919
|
+
return;
|
|
4920
|
+
}
|
|
4921
|
+
|
|
4922
|
+
if (results.length === 0) {
|
|
4923
|
+
console.log(chalk.yellow('No registry artifacts found.'));
|
|
4924
|
+
if (stats.artifacts === 0) {
|
|
4925
|
+
console.log(chalk.gray('Populate the registry first with: llm-checker registry-sync'));
|
|
4926
|
+
}
|
|
4927
|
+
return;
|
|
4928
|
+
}
|
|
4929
|
+
|
|
4930
|
+
console.log(chalk.blue.bold('\nRegistry Results'));
|
|
4931
|
+
console.log(chalk.gray(`Stored registry: ${stats.artifacts} artifacts across ${stats.sources} sources`));
|
|
4932
|
+
console.log('');
|
|
4933
|
+
|
|
4934
|
+
const rows = [[
|
|
4935
|
+
'Source',
|
|
4936
|
+
'Model',
|
|
4937
|
+
'Artifact',
|
|
4938
|
+
'Params',
|
|
4939
|
+
'Size',
|
|
4940
|
+
'Format',
|
|
4941
|
+
'Runtime',
|
|
4942
|
+
'Install'
|
|
4943
|
+
]];
|
|
4944
|
+
|
|
4945
|
+
for (const item of results) {
|
|
4946
|
+
rows.push([
|
|
4947
|
+
item.source_id,
|
|
4948
|
+
truncateMiddle(item.canonical_model_id, 34),
|
|
4949
|
+
truncateMiddle(item.artifact_name || item.filename, 34),
|
|
4950
|
+
formatRegistryNumber(item.parameter_count_b, 'B'),
|
|
4951
|
+
formatRegistrySize(item.size_gb),
|
|
4952
|
+
item.quantization ? `${item.format}/${item.quantization}` : item.format,
|
|
4953
|
+
formatRegistryList(item.runtime_support, 2),
|
|
4954
|
+
truncateMiddle(item.install_command || item.download_url, 46)
|
|
4955
|
+
]);
|
|
4956
|
+
}
|
|
4957
|
+
|
|
4958
|
+
console.log(table(rows));
|
|
4959
|
+
|
|
4960
|
+
const links = results
|
|
4961
|
+
.filter((item) => item.download_url)
|
|
4962
|
+
.slice(0, 5);
|
|
4963
|
+
if (links.length > 0) {
|
|
4964
|
+
console.log(chalk.blue.bold('Exact download links:'));
|
|
4965
|
+
links.forEach((item, index) => {
|
|
4966
|
+
console.log(chalk.gray(` ${index + 1}. ${item.canonical_model_id} -> ${item.download_url}`));
|
|
4967
|
+
});
|
|
4968
|
+
}
|
|
4969
|
+
} catch (error) {
|
|
4970
|
+
console.error(chalk.red('Error:'), error.message);
|
|
4971
|
+
if (process.env.DEBUG) console.error(error.stack);
|
|
4972
|
+
process.exitCode = 1;
|
|
4973
|
+
} finally {
|
|
4974
|
+
database.close();
|
|
4975
|
+
}
|
|
4976
|
+
});
|
|
4977
|
+
|
|
4978
|
+
program
|
|
4979
|
+
.command('registry-recommend [query]')
|
|
4980
|
+
.description('Recommend the best exact model artifacts from the multi-source registry for this hardware')
|
|
4981
|
+
.option('-c, --category <category>', 'Task category (general, coding, reasoning, embeddings, multimodal)', 'general')
|
|
4982
|
+
.option('--optimize <profile>', 'Optimization profile (balanced|speed|quality|context|coding)', 'balanced')
|
|
4983
|
+
.option('--runtime <runtime>', 'Runtime target: auto, ollama, llama.cpp, vllm, mlx, transformers', 'auto')
|
|
4984
|
+
.option('-s, --source <source>', 'Filter by source: ollama, huggingface, gpt4all')
|
|
4985
|
+
.option('--format <format>', 'Filter by artifact format: gguf, safetensors, mlx, ollama')
|
|
4986
|
+
.option('--quant <type>', 'Filter by quantization, for example Q4_K_M or Q8_0')
|
|
4987
|
+
.option('--max-size <gb>', 'Maximum artifact size in GB')
|
|
4988
|
+
.option('--min-params <billion>', 'Minimum parameter count in billions')
|
|
4989
|
+
.option('--max-params <billion>', 'Maximum parameter count in billions')
|
|
4990
|
+
.option('--target-context <tokens>', 'Target context window for scoring')
|
|
4991
|
+
.option('--include-gated', 'Include gated/auth-required artifacts')
|
|
4992
|
+
.option('--pool-limit <n>', 'Maximum registry artifacts to score before ranking', '20000')
|
|
4993
|
+
.option('-l, --limit <n>', 'Maximum number of recommendations', '10')
|
|
4994
|
+
.option('-j, --json', 'Output as JSON')
|
|
4995
|
+
.action(async (query = '', options) => {
|
|
4996
|
+
if (!options.json) showAsciiArt('registry-recommend');
|
|
4997
|
+
|
|
4998
|
+
const UnifiedDetector = require('../src/hardware/unified-detector');
|
|
4999
|
+
const { RegistryRecommender } = require('../src/data/registry-recommender');
|
|
5000
|
+
const recommender = new RegistryRecommender();
|
|
5001
|
+
const spinner = options.json ? null : ora('Scoring registry artifacts...').start();
|
|
5002
|
+
|
|
5003
|
+
try {
|
|
5004
|
+
await recommender.initialize();
|
|
5005
|
+
|
|
5006
|
+
const detector = new UnifiedDetector();
|
|
5007
|
+
const hardware = await detector.detect();
|
|
5008
|
+
const category = normalizeTaskName(options.category || 'general');
|
|
5009
|
+
const result = await recommender.recommend({
|
|
5010
|
+
query,
|
|
5011
|
+
category,
|
|
5012
|
+
optimizeFor: options.optimize,
|
|
5013
|
+
runtime: options.runtime,
|
|
5014
|
+
source: options.source,
|
|
5015
|
+
format: options.format ? String(options.format).toLowerCase() : undefined,
|
|
5016
|
+
quantization: options.quant,
|
|
5017
|
+
maxSizeGB: parsePositiveNumberOption(options.maxSize),
|
|
5018
|
+
minParamsB: parsePositiveNumberOption(options.minParams),
|
|
5019
|
+
maxParamsB: parsePositiveNumberOption(options.maxParams),
|
|
5020
|
+
targetContext: parsePositiveNumberOption(options.targetContext),
|
|
5021
|
+
localOnly: !options.includeGated,
|
|
5022
|
+
poolLimit: parsePositiveNumberOption(options.poolLimit, 20000),
|
|
5023
|
+
limit: parsePositiveNumberOption(options.limit, 10),
|
|
5024
|
+
hardware
|
|
5025
|
+
});
|
|
5026
|
+
|
|
5027
|
+
if (options.json) {
|
|
5028
|
+
console.log(JSON.stringify({
|
|
5029
|
+
query,
|
|
5030
|
+
hardware: hardware.summary || hardware,
|
|
5031
|
+
...result
|
|
5032
|
+
}, null, 2));
|
|
5033
|
+
return;
|
|
5034
|
+
}
|
|
5035
|
+
|
|
5036
|
+
if (spinner) {
|
|
5037
|
+
spinner.succeed(
|
|
5038
|
+
`Scored ${result.total_evaluated} candidates from ${result.total_artifacts} registry artifacts`
|
|
5039
|
+
);
|
|
5040
|
+
}
|
|
5041
|
+
|
|
5042
|
+
if (result.recommendations.length === 0) {
|
|
5043
|
+
console.log(chalk.yellow('No registry recommendations found for those filters.'));
|
|
5044
|
+
if (result.registry.artifacts === 0) {
|
|
5045
|
+
console.log(chalk.gray('Populate the registry first with: llm-checker registry-sync'));
|
|
5046
|
+
}
|
|
5047
|
+
return;
|
|
5048
|
+
}
|
|
5049
|
+
|
|
5050
|
+
console.log(chalk.blue.bold('\nRegistry Recommendations'));
|
|
5051
|
+
console.log(chalk.gray(`Registry: ${result.registry.repos} repos, ${result.registry.artifacts} artifacts`));
|
|
5052
|
+
console.log(chalk.gray(`Runtime: ${result.runtime} | Category: ${result.category} | Optimize: ${result.optimizeFor}`));
|
|
5053
|
+
console.log('');
|
|
5054
|
+
|
|
5055
|
+
const rows = [['#', 'Score', 'Source', 'Model', 'Artifact', 'Params', 'Size', 'Install']];
|
|
5056
|
+
result.recommendations.forEach((item, index) => {
|
|
5057
|
+
rows.push([
|
|
5058
|
+
String(index + 1),
|
|
5059
|
+
String(item.score),
|
|
5060
|
+
item.source,
|
|
5061
|
+
truncateMiddle(item.model, 30),
|
|
5062
|
+
truncateMiddle(item.artifact, 32),
|
|
5063
|
+
formatRegistryNumber(item.params_b, 'B'),
|
|
5064
|
+
formatRegistrySize(item.size_gb),
|
|
5065
|
+
truncateMiddle(item.install_command || item.download_url, 44)
|
|
5066
|
+
]);
|
|
5067
|
+
});
|
|
5068
|
+
|
|
5069
|
+
console.log(table(rows));
|
|
5070
|
+
|
|
5071
|
+
console.log(chalk.blue.bold('Top pick:'));
|
|
5072
|
+
const best = result.recommendations[0];
|
|
5073
|
+
console.log(chalk.white.bold(` ${best.model}`));
|
|
5074
|
+
console.log(chalk.gray(` Artifact: ${best.artifact}`));
|
|
5075
|
+
console.log(chalk.gray(` Why: ${best.rationale}`));
|
|
5076
|
+
if (best.install_command) console.log(chalk.cyan(` ${best.install_command}`));
|
|
5077
|
+
if (best.download_url) console.log(chalk.gray(` ${best.download_url}`));
|
|
5078
|
+
} catch (error) {
|
|
5079
|
+
if (spinner) spinner.fail('Registry recommendation failed');
|
|
5080
|
+
console.error(chalk.red('Error:'), error.message);
|
|
5081
|
+
if (process.env.DEBUG) console.error(error.stack);
|
|
5082
|
+
process.exitCode = 1;
|
|
5083
|
+
} finally {
|
|
5084
|
+
recommender.close();
|
|
5085
|
+
}
|
|
5086
|
+
});
|
|
5087
|
+
|
|
4732
5088
|
program
|
|
4733
5089
|
.command('search <query>')
|
|
4734
5090
|
.description('Search models in the database with intelligent scoring')
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llm-checker",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.7.0",
|
|
4
4
|
"description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
|
|
5
5
|
"bin": {
|
|
6
6
|
"llm-checker": "bin/cli.js",
|
|
@@ -16,6 +16,10 @@
|
|
|
16
16
|
"test:ui": "node tests/ui-cli-smoke.test.js",
|
|
17
17
|
"test:runtime": "node tests/runtime-specdec-tests.js",
|
|
18
18
|
"test:deterministic-pool": "node tests/deterministic-model-pool-check.js",
|
|
19
|
+
"test:registry": "node tests/model-registry-ingestors.test.js",
|
|
20
|
+
"test:registry-main": "node tests/model-registry-main-flow.test.js",
|
|
21
|
+
"test:registry-recommender": "node tests/model-registry-recommender.test.js",
|
|
22
|
+
"test:registry-seed": "node tests/model-registry-seed.test.js",
|
|
19
23
|
"test:policy": "node tests/policy-commands.test.js",
|
|
20
24
|
"test:policy-cli": "node tests/policy-cli-enforcement.js",
|
|
21
25
|
"test:policy-engine": "node tests/policy-engine.test.js",
|
|
@@ -36,7 +40,8 @@
|
|
|
36
40
|
"list-models": "node bin/enhanced_cli.js list-models",
|
|
37
41
|
"ai-check": "node bin/enhanced_cli.js ai-check",
|
|
38
42
|
"ai-run": "node bin/enhanced_cli.js ai-run",
|
|
39
|
-
"sync:seed": "node bin/enhanced_cli.js sync --force --quiet && node scripts/update-seed-db.js",
|
|
43
|
+
"sync:seed": "node bin/enhanced_cli.js sync --force --quiet && node scripts/update-seed-db.js && node scripts/update-registry-seed.js",
|
|
44
|
+
"sync:registry-seed": "node scripts/update-registry-seed.js",
|
|
40
45
|
"benchmark": "cd ml-model && python python/benchmark_collector.py",
|
|
41
46
|
"train-ai": "cd ml-model && python python/train_model.py",
|
|
42
47
|
"postinstall": "echo 'LLM Checker installed. Run: llm-checker hw-detect'"
|