llm-checker 3.5.14 → 3.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -1
- package/analyzer/compatibility.js +5 -0
- package/analyzer/performance.js +5 -4
- package/bin/cli.js +5 -39
- package/bin/enhanced_cli.js +88 -19
- package/bin/mcp-server.mjs +266 -101
- package/package.json +7 -7
- package/src/ai/multi-objective-selector.js +118 -11
- package/src/calibration/calibration-manager.js +4 -1
- package/src/data/model-database.js +39 -5
- package/src/data/sync-manager.js +32 -18
- package/src/hardware/backends/apple-silicon.js +5 -1
- package/src/hardware/backends/cuda-detector.js +47 -19
- package/src/hardware/backends/intel-detector.js +6 -2
- package/src/hardware/backends/rocm-detector.js +6 -2
- package/src/hardware/detector.js +57 -30
- package/src/hardware/unified-detector.js +129 -25
- package/src/models/ai-check-selector.js +36 -5
- package/src/models/deterministic-selector.js +163 -15
- package/src/models/expanded_database.js +9 -5
- package/src/models/intelligent-selector.js +87 -1
- package/src/models/requirements.js +16 -11
- package/src/models/scoring-core.js +341 -0
- package/src/models/scoring-engine.js +9 -2
- package/src/ollama/capacity-planner.js +15 -2
- package/src/ollama/client.js +70 -30
- package/src/ollama/enhanced-client.js +20 -2
- package/src/ollama/manager.js +14 -2
- package/src/policy/cli-policy.js +8 -2
- package/src/policy/policy-engine.js +2 -1
- package/src/provenance/model-provenance.js +4 -1
- package/src/ui/cli-theme.js +57 -7
- package/src/ui/interactive-panel.js +176 -20
package/README.md
CHANGED
|
@@ -573,6 +573,19 @@ This makes integrated GPUs visible even when the selected runtime backend is sti
|
|
|
573
573
|
llm-checker recommend
|
|
574
574
|
```
|
|
575
575
|
|
|
576
|
+
As of the scoring unification (#96), `check`, `recommend`, and `smart-recommend`
|
|
577
|
+
all derive their ranking from **one canonical scoring core**
|
|
578
|
+
(`DeterministicModelSelector` via `src/models/scoring-core.js`), so identical
|
|
579
|
+
`(model, hardware)` inputs score identically across all three and the
|
|
580
|
+
high-capacity right-sizing floor applies everywhere. They differ only in their
|
|
581
|
+
model **source** and **presentation**, not in how a given model is ranked:
|
|
582
|
+
|
|
583
|
+
| Command | Role | Ranking core |
|
|
584
|
+
|---------|------|--------------|
|
|
585
|
+
| `recommend` | Canonical model recommendations by category | Shared core (reference output) |
|
|
586
|
+
| `check` | Full hardware-compatibility report with a recommendation card | Shared core (consistent ranking, fit-oriented report) |
|
|
587
|
+
| `smart-recommend` | Catalog/DB-backed recommendations with a detailed score breakdown | Shared core (same ordering + scores) |
|
|
588
|
+
|
|
576
589
|
Use optimization profiles to steer ranking by intent:
|
|
577
590
|
|
|
578
591
|
```bash
|
|
@@ -695,7 +708,7 @@ Three scoring systems are available, each optimized for different workflows:
|
|
|
695
708
|
| `reasoning` | 60% | 10% | 20% | 10% |
|
|
696
709
|
| `multimodal` | 50% | 15% | 20% | 15% |
|
|
697
710
|
|
|
698
|
-
**Scoring Engine** (used by `smart-recommend`
|
|
711
|
+
**Scoring Engine** (used by `search` for catalog scoring; `smart-recommend`'s final ranking is produced by the shared scoring core — see #96):
|
|
699
712
|
|
|
700
713
|
| Use Case | Quality | Speed | Fit | Context |
|
|
701
714
|
|----------|:-------:|:-----:|:---:|:-------:|
|
|
@@ -428,6 +428,11 @@ class CompatibilityAnalyzer {
|
|
|
428
428
|
}
|
|
429
429
|
|
|
430
430
|
parseModelSize(sizeString) {
|
|
431
|
+
// Guard non-string input (undefined / a numeric size) — this runs for every
|
|
432
|
+
// model in calculateModelCompatibility, so one bad entry must not crash the
|
|
433
|
+
// whole analysis. Matches the guard in analyzer/performance.js.
|
|
434
|
+
if (typeof sizeString !== 'string' || !sizeString.trim()) return 1;
|
|
435
|
+
|
|
431
436
|
const match = sizeString.match(/(\d+\.?\d*)[BM]/i);
|
|
432
437
|
if (!match) return 1;
|
|
433
438
|
|
package/analyzer/performance.js
CHANGED
|
@@ -363,12 +363,13 @@ class PerformanceAnalyzer {
|
|
|
363
363
|
}
|
|
364
364
|
|
|
365
365
|
estimateLoadTime(model, hardware) {
|
|
366
|
+
// ~2 GB per 1B params (fp16-ish) on-disk approximation.
|
|
366
367
|
const modelSizeGB = this.parseModelSize(model.size) * 2;
|
|
367
368
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
loadTimeSeconds
|
|
369
|
+
// Fold the previous `* 2` then `* 0.7` two-step (a leftover from an
|
|
370
|
+
// incomplete edit, with a dead blank line) into one documented factor:
|
|
371
|
+
// ~1.4 s of load time per GB before hardware adjustments.
|
|
372
|
+
let loadTimeSeconds = modelSizeGB * 1.4;
|
|
372
373
|
|
|
373
374
|
const cpuSpeedFactor = Math.max(0.5, Math.min(1.5, (hardware.cpu.speed || 2.5) / 2.5));
|
|
374
375
|
loadTimeSeconds /= cpuSpeedFactor;
|
package/bin/cli.js
CHANGED
|
@@ -11,44 +11,10 @@ if (!Number.isFinite(majorNodeVersion) || majorNodeVersion < 16) {
|
|
|
11
11
|
process.exit(1);
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
for (let index = 0; index < argv.length; index += 1) {
|
|
20
|
-
const token = argv[index];
|
|
21
|
-
|
|
22
|
-
if (token === 'ai-check') {
|
|
23
|
-
sawAiCheck = true;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
if (sawAiCheck && token === '--models') {
|
|
27
|
-
const nextToken = argv[index + 1];
|
|
28
|
-
if (nextToken && !nextToken.startsWith('-')) {
|
|
29
|
-
modelsFilter = nextToken;
|
|
30
|
-
index += 1;
|
|
31
|
-
}
|
|
32
|
-
continue;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
if (sawAiCheck && token.startsWith('--models=')) {
|
|
36
|
-
modelsFilter = token.slice('--models='.length);
|
|
37
|
-
continue;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
normalizedArgs.push(token);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
return { args: normalizedArgs, modelsFilter };
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
const preprocessedArgs = preprocessAiCheckModelsArg(process.argv.slice(2));
|
|
47
|
-
|
|
48
|
-
if (typeof preprocessedArgs.modelsFilter === 'string' && preprocessedArgs.modelsFilter.trim()) {
|
|
49
|
-
process.env.LLM_CHECKER_AI_CHECK_MODELS = preprocessedArgs.modelsFilter.trim();
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
process.argv = [process.argv[0], process.argv[1], ...preprocessedArgs.args];
|
|
14
|
+
// `ai-check --models <list>` is now a real commander option handled in
|
|
15
|
+
// enhanced_cli.js (and the AICheckSelector applies it as a candidate filter), so
|
|
16
|
+
// the previous argv-rewriting shim — which stripped the flag and stashed it in an
|
|
17
|
+
// env var that nothing read — is gone. LLM_CHECKER_AI_CHECK_MODELS still works as
|
|
18
|
+
// an explicit fallback for the same filter.
|
|
53
19
|
|
|
54
20
|
require('./enhanced_cli');
|
package/bin/enhanced_cli.js
CHANGED
|
@@ -58,7 +58,7 @@ const calibrationManager = new CalibrationManager();
|
|
|
58
58
|
|
|
59
59
|
const COMMAND_HEADER_LABELS = {
|
|
60
60
|
'hw-detect': 'Hardware Detection',
|
|
61
|
-
'smart-recommend': 'Smart Recommend',
|
|
61
|
+
'smart-recommend': 'Smart Recommend (Experimental)',
|
|
62
62
|
search: 'Model Search',
|
|
63
63
|
sync: 'Database Sync',
|
|
64
64
|
'mcp-setup': 'Claude MCP Setup',
|
|
@@ -79,6 +79,19 @@ function showAsciiArt(command) {
|
|
|
79
79
|
renderCommandHeader(label);
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
+
const RECOMMENDATION_COMMAND_NOTES = {
|
|
83
|
+
check: 'Compatibility report: shows hardware fit first. Use `llm-checker recommend` for canonical ranked model picks.',
|
|
84
|
+
recommend: 'Canonical recommendations: deterministic hardware-aware selector by category.',
|
|
85
|
+
'smart-recommend': 'Experimental scoring engine: results can differ from `recommend` while this path is being unified.'
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
function displayRecommendationCommandNote(command) {
|
|
89
|
+
const note = RECOMMENDATION_COMMAND_NOTES[command];
|
|
90
|
+
if (!note) return;
|
|
91
|
+
console.log(chalk.gray(`Recommendation mode: ${note}`));
|
|
92
|
+
console.log('');
|
|
93
|
+
}
|
|
94
|
+
|
|
82
95
|
// Function to search Ollama models by use case
|
|
83
96
|
function getOllamaCacheFile(filename) {
|
|
84
97
|
try {
|
|
@@ -3295,6 +3308,7 @@ Policy scope:
|
|
|
3295
3308
|
)
|
|
3296
3309
|
.action(async (options) => {
|
|
3297
3310
|
showAsciiArt('check');
|
|
3311
|
+
displayRecommendationCommandNote('check');
|
|
3298
3312
|
try {
|
|
3299
3313
|
// Use verbose progress unless explicitly disabled
|
|
3300
3314
|
const verboseEnabled = options.verbose !== false;
|
|
@@ -3462,11 +3476,8 @@ Policy scope:
|
|
|
3462
3476
|
|
|
3463
3477
|
program
|
|
3464
3478
|
.command('ollama')
|
|
3465
|
-
.description('
|
|
3466
|
-
.option('-l, --list', 'List installed models
|
|
3467
|
-
.option('-r, --running', 'Show running models with performance data')
|
|
3468
|
-
.option('-c, --compatible', 'Show only hardware-compatible installed models')
|
|
3469
|
-
.option('--recommendations', 'Show installation recommendations')
|
|
3479
|
+
.description('Check Ollama integration status (use `installed` to rank installed models)')
|
|
3480
|
+
.option('-l, --list', 'List installed models ranked by compatibility (runs `installed`)')
|
|
3470
3481
|
.action(async (options) => {
|
|
3471
3482
|
showAsciiArt('ollama');
|
|
3472
3483
|
const spinner = ora('Checking Ollama integration...').start();
|
|
@@ -3492,7 +3503,9 @@ program
|
|
|
3492
3503
|
spinner.succeed(`Ollama integration active`);
|
|
3493
3504
|
|
|
3494
3505
|
if (options.list) {
|
|
3495
|
-
console.log('
|
|
3506
|
+
console.log(chalk.cyan('\nRun `llm-checker installed` to rank your installed models by compatibility and use-case.'));
|
|
3507
|
+
} else {
|
|
3508
|
+
console.log(chalk.gray('\nTip: `llm-checker installed` ranks installed models; `llm-checker recommend` suggests models for your hardware.'));
|
|
3496
3509
|
}
|
|
3497
3510
|
|
|
3498
3511
|
} catch (error) {
|
|
@@ -3520,10 +3533,18 @@ program
|
|
|
3520
3533
|
const availability = await ollamaClient.checkOllamaAvailability();
|
|
3521
3534
|
if (!availability.available) {
|
|
3522
3535
|
spinner.fail('Ollama not available');
|
|
3523
|
-
|
|
3524
|
-
|
|
3525
|
-
|
|
3536
|
+
if (options.json) {
|
|
3537
|
+
// --json must always emit parseable JSON on stdout (the success
|
|
3538
|
+
// path prints an array); previously these branches printed
|
|
3539
|
+
// ANSI-colored prose and broke `installed --json | jq`.
|
|
3540
|
+
console.log(JSON.stringify([], null, 2));
|
|
3541
|
+
} else {
|
|
3542
|
+
console.log(chalk.red('\n' + availability.error));
|
|
3543
|
+
if (availability.hint) {
|
|
3544
|
+
console.log(chalk.yellow('Hint: ' + availability.hint));
|
|
3545
|
+
}
|
|
3526
3546
|
}
|
|
3547
|
+
process.exitCode = 1;
|
|
3527
3548
|
return;
|
|
3528
3549
|
}
|
|
3529
3550
|
|
|
@@ -3531,8 +3552,12 @@ program
|
|
|
3531
3552
|
const installedModels = await ollamaClient.getLocalModels();
|
|
3532
3553
|
if (!installedModels || installedModels.length === 0) {
|
|
3533
3554
|
spinner.fail('No models installed');
|
|
3534
|
-
|
|
3535
|
-
|
|
3555
|
+
if (options.json) {
|
|
3556
|
+
console.log(JSON.stringify([], null, 2));
|
|
3557
|
+
} else {
|
|
3558
|
+
console.log(chalk.yellow('\nNo Ollama models found. Install one with:'));
|
|
3559
|
+
console.log(chalk.cyan(' ollama pull llama3.2:3b'));
|
|
3560
|
+
}
|
|
3536
3561
|
return;
|
|
3537
3562
|
}
|
|
3538
3563
|
|
|
@@ -3851,6 +3876,7 @@ Calibrated routing examples:
|
|
|
3851
3876
|
)
|
|
3852
3877
|
.action(async (options) => {
|
|
3853
3878
|
showAsciiArt('recommend');
|
|
3879
|
+
displayRecommendationCommandNote('recommend');
|
|
3854
3880
|
try {
|
|
3855
3881
|
const verboseEnabled = options.verbose !== false;
|
|
3856
3882
|
const checker = new (getLLMChecker())({ verbose: verboseEnabled });
|
|
@@ -4365,6 +4391,7 @@ program
|
|
|
4365
4391
|
.option('--ctx <number>', 'Target context length', '8192')
|
|
4366
4392
|
.option('-e, --evaluator <model>', 'Evaluator model (auto for best available)', 'auto')
|
|
4367
4393
|
.option('-w, --weight <number>', 'AI weight (0.0-1.0, default 0.3)', '0.3')
|
|
4394
|
+
.option('-m, --models <list>', 'Restrict evaluation to these models (comma-separated)')
|
|
4368
4395
|
.action(async (options) => {
|
|
4369
4396
|
showAsciiArt('ai-check');
|
|
4370
4397
|
// Check if Ollama is installed first
|
|
@@ -4377,14 +4404,33 @@ program
|
|
|
4377
4404
|
|
|
4378
4405
|
const aiCheckSelector = new AICheckSelector();
|
|
4379
4406
|
|
|
4407
|
+
// Validate numeric options up front: bad input (e.g. --weight abc, --top
|
|
4408
|
+
// foo) used to flow through as NaN, which survived the downstream clamp
|
|
4409
|
+
// and made the displayed AI weight and every weighted score NaN.
|
|
4410
|
+
const weight = Number.parseFloat(options.weight);
|
|
4411
|
+
const top = Number.parseInt(options.top, 10);
|
|
4412
|
+
const ctx = options.ctx ? Number.parseInt(options.ctx, 10) : undefined;
|
|
4413
|
+
const invalidNumeric =
|
|
4414
|
+
(!Number.isFinite(weight) || weight < 0 || weight > 1) ? `--weight ${options.weight} (use a number from 0.0 to 1.0)`
|
|
4415
|
+
: (!Number.isInteger(top) || top <= 0) ? `--top ${options.top} (use a positive integer)`
|
|
4416
|
+
: (ctx !== undefined && (!Number.isInteger(ctx) || ctx <= 0)) ? `--ctx ${options.ctx} (use a positive integer)`
|
|
4417
|
+
: null;
|
|
4418
|
+
if (invalidNumeric) {
|
|
4419
|
+
spinner.stop();
|
|
4420
|
+
console.error(chalk.red(`Invalid ${invalidNumeric}`));
|
|
4421
|
+
process.exitCode = 1;
|
|
4422
|
+
return;
|
|
4423
|
+
}
|
|
4424
|
+
|
|
4380
4425
|
const checkOptions = {
|
|
4381
4426
|
category: options.category,
|
|
4382
|
-
top
|
|
4383
|
-
ctx
|
|
4427
|
+
top,
|
|
4428
|
+
ctx,
|
|
4384
4429
|
evaluator: options.evaluator,
|
|
4385
|
-
weight
|
|
4430
|
+
weight,
|
|
4431
|
+
models: options.models || process.env.LLM_CHECKER_AI_CHECK_MODELS || undefined
|
|
4386
4432
|
};
|
|
4387
|
-
|
|
4433
|
+
|
|
4388
4434
|
spinner.stop();
|
|
4389
4435
|
|
|
4390
4436
|
const result = await aiCheckSelector.aiCheck(checkOptions);
|
|
@@ -4731,8 +4777,20 @@ program
|
|
|
4731
4777
|
});
|
|
4732
4778
|
|
|
4733
4779
|
if (searchResults.length === 0) {
|
|
4734
|
-
if (spinner) spinner.info('No models found matching your query');
|
|
4735
4780
|
syncManager.close();
|
|
4781
|
+
if (options.json) {
|
|
4782
|
+
// --json must always emit parseable JSON, even on zero matches
|
|
4783
|
+
// (previously it printed nothing and broke `search ... --json | jq`).
|
|
4784
|
+
console.log(JSON.stringify({
|
|
4785
|
+
query,
|
|
4786
|
+
all: [],
|
|
4787
|
+
recommendations: [],
|
|
4788
|
+
insights: [],
|
|
4789
|
+
meta: { afterFiltering: 0, totalMatches: 0 }
|
|
4790
|
+
}, null, 2));
|
|
4791
|
+
} else if (spinner) {
|
|
4792
|
+
spinner.info('No models found matching your query');
|
|
4793
|
+
}
|
|
4736
4794
|
return;
|
|
4737
4795
|
}
|
|
4738
4796
|
|
|
@@ -4806,7 +4864,7 @@ program
|
|
|
4806
4864
|
|
|
4807
4865
|
program
|
|
4808
4866
|
.command('smart-recommend')
|
|
4809
|
-
.description('
|
|
4867
|
+
.description('Experimental recommendations using the alternate scoring engine')
|
|
4810
4868
|
.option('-u, --use-case <case>', 'Optimize for use case', 'general')
|
|
4811
4869
|
.option('-l, --limit <n>', 'Maximum number of recommendations', '5')
|
|
4812
4870
|
.option('--target-tps <n>', 'Target tokens per second', '20')
|
|
@@ -4814,8 +4872,19 @@ program
|
|
|
4814
4872
|
.option('--include-vision', 'Include vision/multimodal models')
|
|
4815
4873
|
.option('--include-embeddings', 'Include embedding models')
|
|
4816
4874
|
.option('-j, --json', 'Output as JSON')
|
|
4875
|
+
.addHelpText(
|
|
4876
|
+
'after',
|
|
4877
|
+
`
|
|
4878
|
+
Recommendation engine note:
|
|
4879
|
+
smart-recommend is experimental and may intentionally differ from recommend.
|
|
4880
|
+
Use "llm-checker recommend" for canonical package recommendations.
|
|
4881
|
+
`
|
|
4882
|
+
)
|
|
4817
4883
|
.action(async (options) => {
|
|
4818
|
-
if (!options.json)
|
|
4884
|
+
if (!options.json) {
|
|
4885
|
+
showAsciiArt('smart-recommend');
|
|
4886
|
+
displayRecommendationCommandNote('smart-recommend');
|
|
4887
|
+
}
|
|
4819
4888
|
const SyncManager = require('../src/data/sync-manager');
|
|
4820
4889
|
const IntelligentSelector = require('../src/models/intelligent-selector');
|
|
4821
4890
|
const UnifiedDetector = require('../src/hardware/unified-detector');
|