llm-checker 3.5.15 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -8
- package/analyzer/compatibility.js +5 -0
- package/analyzer/performance.js +5 -4
- package/bin/cli.js +5 -39
- package/bin/enhanced_cli.js +449 -24
- package/bin/mcp-server.mjs +266 -101
- package/package.json +13 -8
- package/src/ai/multi-objective-selector.js +118 -11
- package/src/calibration/calibration-manager.js +4 -1
- package/src/data/model-database.js +489 -5
- package/src/data/registry-ingestors.js +751 -0
- package/src/data/registry-recommender.js +514 -0
- package/src/data/seed/README.md +11 -3
- package/src/data/seed/models.db +0 -0
- package/src/data/sync-manager.js +32 -18
- package/src/hardware/backends/apple-silicon.js +5 -1
- package/src/hardware/backends/cuda-detector.js +47 -19
- package/src/hardware/backends/intel-detector.js +6 -2
- package/src/hardware/backends/rocm-detector.js +6 -2
- package/src/hardware/detector.js +57 -30
- package/src/hardware/unified-detector.js +129 -25
- package/src/index.js +68 -4
- package/src/models/ai-check-selector.js +36 -5
- package/src/models/deterministic-selector.js +179 -18
- package/src/models/expanded_database.js +9 -5
- package/src/models/intelligent-selector.js +87 -1
- package/src/models/moe-assumptions.js +11 -0
- package/src/models/requirements.js +16 -11
- package/src/models/scoring-core.js +341 -0
- package/src/models/scoring-engine.js +9 -2
- package/src/ollama/capacity-planner.js +15 -2
- package/src/ollama/client.js +70 -30
- package/src/ollama/enhanced-client.js +20 -2
- package/src/ollama/manager.js +14 -2
- package/src/policy/cli-policy.js +8 -2
- package/src/policy/policy-engine.js +2 -1
- package/src/provenance/model-provenance.js +4 -1
- package/src/ui/cli-theme.js +47 -7
- package/src/ui/interactive-panel.js +162 -24
package/README.md
CHANGED
|
@@ -573,6 +573,19 @@ This makes integrated GPUs visible even when the selected runtime backend is sti
|
|
|
573
573
|
llm-checker recommend
|
|
574
574
|
```
|
|
575
575
|
|
|
576
|
+
As of the scoring unification (#96), `check`, `recommend`, and `smart-recommend`
|
|
577
|
+
all derive their ranking from **one canonical scoring core**
|
|
578
|
+
(`DeterministicModelSelector` via `src/models/scoring-core.js`), so identical
|
|
579
|
+
`(model, hardware)` inputs score identically across all three and the
|
|
580
|
+
high-capacity right-sizing floor applies everywhere. They differ only in their
|
|
581
|
+
model **source** and **presentation**, not in how a given model is ranked:
|
|
582
|
+
|
|
583
|
+
| Command | Role | Ranking core |
|
|
584
|
+
|---------|------|--------------|
|
|
585
|
+
| `recommend` | Canonical model recommendations by category | Shared core (reference output) |
|
|
586
|
+
| `check` | Full hardware-compatibility report with a recommendation card | Shared core (consistent ranking, fit-oriented report) |
|
|
587
|
+
| `smart-recommend` | Catalog/DB-backed recommendations with a detailed score breakdown | Shared core (same ordering + scores) |
|
|
588
|
+
|
|
576
589
|
Use optimization profiles to steer ranking by intent:
|
|
577
590
|
|
|
578
591
|
```bash
|
|
@@ -628,30 +641,36 @@ llm-checker search qwen --quant Q4_K_M --max-size 8
|
|
|
628
641
|
|
|
629
642
|
## Model Catalog
|
|
630
643
|
|
|
631
|
-
LLM Checker ships with a pre-synced SQLite snapshot of the Ollama catalog. On first run, that snapshot is copied to `~/.llm-checker/models.db`, so recommendations and catalog search work immediately after npm install.
|
|
644
|
+
LLM Checker ships with a pre-synced SQLite snapshot of the Ollama catalog plus a multi-source registry of exact downloadable/installable model artifacts. On first run, that snapshot is copied to `~/.llm-checker/models.db`, so recommendations and catalog search work immediately after npm install.
|
|
632
645
|
|
|
633
646
|
The packaged snapshot currently includes:
|
|
634
647
|
|
|
635
648
|
- 229 Ollama models
|
|
636
649
|
- 7176 variants
|
|
650
|
+
- 3259 multi-source registry repositories
|
|
651
|
+
- 33729 exact model artifacts from Hugging Face, Ollama, and GPT4All
|
|
652
|
+
- Hugging Face top 3000 repositories by downloads, fetched with API pagination
|
|
637
653
|
- pull counts
|
|
638
654
|
- tag counts
|
|
639
655
|
- last-updated metadata
|
|
640
|
-
- variant params, quantization, size, context,
|
|
656
|
+
- variant params, quantization, size, context, runtime, install commands, download URLs, license/gated flags, tasks, and modalities when available
|
|
641
657
|
|
|
642
658
|
Refresh it any time:
|
|
643
659
|
|
|
644
660
|
```bash
|
|
645
661
|
llm-checker sync
|
|
662
|
+
llm-checker registry-sync --sources ollama,huggingface,gpt4all
|
|
663
|
+
llm-checker registry-search qwen --runtime auto --max-size 8
|
|
664
|
+
llm-checker registry-recommend --category coding --runtime auto --max-size 8
|
|
646
665
|
```
|
|
647
666
|
|
|
648
|
-
For release maintainers, the packaged seed can be regenerated from the synced local DB:
|
|
667
|
+
For release maintainers, the packaged seed can be regenerated from the synced local DB and registry APIs:
|
|
649
668
|
|
|
650
669
|
```bash
|
|
651
670
|
npm run sync:seed
|
|
652
671
|
```
|
|
653
672
|
|
|
654
|
-
`recommend`, `list-models`, `ai-run`, and `ai-check` prefer the synced SQLite catalog. If the SQLite catalog is unavailable, LLM Checker falls back to the scraped cache and then to the curated catalog.
|
|
673
|
+
`recommend`, `list-models`, `ai-run`, and `ai-check` prefer the synced SQLite catalog. `registry-search` queries exact artifacts across sources, and `registry-recommend` ranks exact artifacts from the registry with the deterministic hardware-aware selector. If the SQLite catalog is unavailable, LLM Checker falls back to the scraped cache and then to the curated catalog.
|
|
655
674
|
|
|
656
675
|
The curated fallback catalog includes 35+ models from the most popular Ollama families:
|
|
657
676
|
|
|
@@ -695,7 +714,7 @@ Three scoring systems are available, each optimized for different workflows:
|
|
|
695
714
|
| `reasoning` | 60% | 10% | 20% | 10% |
|
|
696
715
|
| `multimodal` | 50% | 15% | 20% | 15% |
|
|
697
716
|
|
|
698
|
-
**Scoring Engine** (used by `smart-recommend`
|
|
717
|
+
**Scoring Engine** (used by `search` for catalog scoring; `smart-recommend`'s final ranking is produced by the shared scoring core — see #96):
|
|
699
718
|
|
|
700
719
|
| Use Case | Quality | Speed | Fit | Context |
|
|
701
720
|
|----------|:-------:|:-----:|:---:|:-------:|
|
|
@@ -823,7 +842,7 @@ LLM Checker uses a deterministic pipeline so the same inputs produce the same ra
|
|
|
823
842
|
flowchart LR
|
|
824
843
|
subgraph Inputs
|
|
825
844
|
HW["Hardware detector<br/>CPU/GPU/RAM/backend"]
|
|
826
|
-
REG["Synced SQLite
|
|
845
|
+
REG["Synced SQLite model catalog<br/>(Ollama seed + multi-source registry)"]
|
|
827
846
|
LOCAL["Installed local models"]
|
|
828
847
|
FLAGS["CLI options<br/>use-case/runtime/limits/policy"]
|
|
829
848
|
end
|
|
@@ -939,8 +958,9 @@ src/
|
|
|
939
958
|
detector.js # Hardware detection
|
|
940
959
|
unified-detector.js # Cross-platform detection
|
|
941
960
|
data/
|
|
942
|
-
model-database.js # SQLite storage and packaged seed loading
|
|
943
|
-
|
|
961
|
+
model-database.js # SQLite storage, registry tables, and packaged seed loading
|
|
962
|
+
registry-ingestors.js # Ollama/Hugging Face/GPT4All artifact normalization
|
|
963
|
+
seed/models.db # npm-packaged Ollama + multi-source registry snapshot
|
|
944
964
|
sync-manager.js # Database sync from Ollama registry
|
|
945
965
|
bin/
|
|
946
966
|
enhanced_cli.js # CLI entry point
|
|
@@ -428,6 +428,11 @@ class CompatibilityAnalyzer {
|
|
|
428
428
|
}
|
|
429
429
|
|
|
430
430
|
parseModelSize(sizeString) {
|
|
431
|
+
// Guard non-string input (undefined / a numeric size) — this runs for every
|
|
432
|
+
// model in calculateModelCompatibility, so one bad entry must not crash the
|
|
433
|
+
// whole analysis. Matches the guard in analyzer/performance.js.
|
|
434
|
+
if (typeof sizeString !== 'string' || !sizeString.trim()) return 1;
|
|
435
|
+
|
|
431
436
|
const match = sizeString.match(/(\d+\.?\d*)[BM]/i);
|
|
432
437
|
if (!match) return 1;
|
|
433
438
|
|
package/analyzer/performance.js
CHANGED
|
@@ -363,12 +363,13 @@ class PerformanceAnalyzer {
|
|
|
363
363
|
}
|
|
364
364
|
|
|
365
365
|
estimateLoadTime(model, hardware) {
|
|
366
|
+
// ~2 GB per 1B params (fp16-ish) on-disk approximation.
|
|
366
367
|
const modelSizeGB = this.parseModelSize(model.size) * 2;
|
|
367
368
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
loadTimeSeconds
|
|
369
|
+
// Fold the previous `* 2` then `* 0.7` two-step (a leftover from an
|
|
370
|
+
// incomplete edit, with a dead blank line) into one documented factor:
|
|
371
|
+
// ~1.4 s of load time per GB before hardware adjustments.
|
|
372
|
+
let loadTimeSeconds = modelSizeGB * 1.4;
|
|
372
373
|
|
|
373
374
|
const cpuSpeedFactor = Math.max(0.5, Math.min(1.5, (hardware.cpu.speed || 2.5) / 2.5));
|
|
374
375
|
loadTimeSeconds /= cpuSpeedFactor;
|
package/bin/cli.js
CHANGED
|
@@ -11,44 +11,10 @@ if (!Number.isFinite(majorNodeVersion) || majorNodeVersion < 16) {
|
|
|
11
11
|
process.exit(1);
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
for (let index = 0; index < argv.length; index += 1) {
|
|
20
|
-
const token = argv[index];
|
|
21
|
-
|
|
22
|
-
if (token === 'ai-check') {
|
|
23
|
-
sawAiCheck = true;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
if (sawAiCheck && token === '--models') {
|
|
27
|
-
const nextToken = argv[index + 1];
|
|
28
|
-
if (nextToken && !nextToken.startsWith('-')) {
|
|
29
|
-
modelsFilter = nextToken;
|
|
30
|
-
index += 1;
|
|
31
|
-
}
|
|
32
|
-
continue;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
if (sawAiCheck && token.startsWith('--models=')) {
|
|
36
|
-
modelsFilter = token.slice('--models='.length);
|
|
37
|
-
continue;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
normalizedArgs.push(token);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
return { args: normalizedArgs, modelsFilter };
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
const preprocessedArgs = preprocessAiCheckModelsArg(process.argv.slice(2));
|
|
47
|
-
|
|
48
|
-
if (typeof preprocessedArgs.modelsFilter === 'string' && preprocessedArgs.modelsFilter.trim()) {
|
|
49
|
-
process.env.LLM_CHECKER_AI_CHECK_MODELS = preprocessedArgs.modelsFilter.trim();
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
process.argv = [process.argv[0], process.argv[1], ...preprocessedArgs.args];
|
|
14
|
+
// `ai-check --models <list>` is now a real commander option handled in
|
|
15
|
+
// enhanced_cli.js (and the AICheckSelector applies it as a candidate filter), so
|
|
16
|
+
// the previous argv-rewriting shim — which stripped the flag and stashed it in an
|
|
17
|
+
// env var that nothing read — is gone. LLM_CHECKER_AI_CHECK_MODELS still works as
|
|
18
|
+
// an explicit fallback for the same filter.
|
|
53
19
|
|
|
54
20
|
require('./enhanced_cli');
|