llm-checker 3.5.12 → 3.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -17
- package/bin/cli.js +40 -0
- package/bin/enhanced_cli.js +360 -33
- package/package.json +2 -1
- package/src/ai/model-selector.js +47 -16
- package/src/ai/multi-objective-selector.js +55 -9
- package/src/data/model-database.js +92 -1
- package/src/data/seed/README.md +8 -0
- package/src/data/seed/models.db +0 -0
- package/src/hardware/backends/rocm-detector.js +469 -68
- package/src/hardware/unified-detector.js +39 -5
- package/src/index.js +40 -7
- package/src/models/ai-check-selector.js +27 -2
- package/src/models/deterministic-selector.js +80 -7
- package/src/ollama/client.js +121 -0
- package/src/ollama/enhanced-scraper.js +40 -26
- package/src/ollama/native-scraper.js +52 -27
- package/src/ui/cli-theme.js +139 -24
- package/src/ui/interactive-panel.js +1 -18
- package/src/utils/verbose-progress.js +144 -187
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
**Intelligent Ollama Model Selector**
|
|
6
6
|
|
|
7
7
|
AI-powered CLI that analyzes your hardware and recommends optimal LLM models.
|
|
8
|
-
Deterministic scoring across **200+
|
|
8
|
+
Deterministic scoring across **200+ Ollama models** and **7k+ variants** with a packaged SQLite catalog, live sync, and hardware-calibrated memory estimation.
|
|
9
9
|
|
|
10
10
|
[](https://www.npmjs.com/package/llm-checker)
|
|
11
11
|
[](https://www.npmjs.com/package/llm-checker)
|
|
@@ -38,12 +38,12 @@ Choosing the right LLM for your hardware is complex. With thousands of model var
|
|
|
38
38
|
|
|
39
39
|
| | Feature | Description |
|
|
40
40
|
|:---:|---|---|
|
|
41
|
-
| **200+** |
|
|
41
|
+
| **200+** | Packaged Model Catalog | Ships with a synced Ollama SQLite catalog and can refresh from Ollama on demand |
|
|
42
42
|
| **4D** | Scoring Engine | Quality, Speed, Fit, Context — weighted by use case |
|
|
43
43
|
| **Multi-GPU** | Hardware Detection | Apple Silicon, NVIDIA CUDA, AMD ROCm, Intel Arc, CPU, integrated/dedicated inventory visibility |
|
|
44
44
|
| **Calibrated** | Memory Estimation | Bytes-per-parameter formula validated against real Ollama sizes |
|
|
45
45
|
| **Zero** | Native Dependencies | Pure JavaScript — works on any Node.js 16+ system |
|
|
46
|
-
| **
|
|
46
|
+
| **Live** | AI Run Metrics | `ai-run` shows response speed in tokens/sec next to model output |
|
|
47
47
|
|
|
48
48
|
---
|
|
49
49
|
|
|
@@ -93,9 +93,10 @@ npm install -g llm-checker
|
|
|
93
93
|
- Node.js 16+ (any version: 16, 18, 20, 22, 24)
|
|
94
94
|
- [Ollama](https://ollama.ai) installed for running models
|
|
95
95
|
|
|
96
|
-
|
|
96
|
+
The package includes a prebuilt model catalog and declares `sql.js` as an optional dependency for SQLite-powered commands. If your package manager skips optional dependencies and database commands report `sql.js` missing, reinstall with optional dependencies enabled:
|
|
97
|
+
|
|
97
98
|
```bash
|
|
98
|
-
npm install
|
|
99
|
+
npm install -g llm-checker --include=optional
|
|
99
100
|
```
|
|
100
101
|
|
|
101
102
|
---
|
|
@@ -114,7 +115,10 @@ llm-checker hw-detect
|
|
|
114
115
|
# 3) Get recommendations by category
|
|
115
116
|
llm-checker recommend --category coding
|
|
116
117
|
|
|
117
|
-
# 4)
|
|
118
|
+
# 4) Refresh the catalog when you want current Ollama references
|
|
119
|
+
llm-checker sync
|
|
120
|
+
|
|
121
|
+
# 5) Run with auto-selection and tokens/sec metrics
|
|
118
122
|
llm-checker ai-run --category coding --prompt "Write a hello world in Python"
|
|
119
123
|
```
|
|
120
124
|
|
|
@@ -147,6 +151,21 @@ hash -r
|
|
|
147
151
|
llm-checker --version
|
|
148
152
|
```
|
|
149
153
|
|
|
154
|
+
### v3.5.13 Highlights
|
|
155
|
+
|
|
156
|
+
- Ships npm packages with a ready-to-use SQLite model catalog:
|
|
157
|
+
- 229 Ollama models
|
|
158
|
+
- 7176 variants
|
|
159
|
+
- real pull counts and `last_updated` metadata
|
|
160
|
+
- `sync` refreshes the local SQLite catalog from Ollama; `recommend`, `list-models`, `ai-run`, and `ai-check` now prefer that synced catalog instead of stale scraper cache data.
|
|
161
|
+
- Recommendation normalization was hardened:
|
|
162
|
+
- no more `pulls: 0` for the full catalog after sync
|
|
163
|
+
- `335m` style tags are treated as millions, not billions
|
|
164
|
+
- ambiguous aliases like `latest`, `small`, `medium`, and `large` are not guessed into fake parameter counts
|
|
165
|
+
- cloud variants are filtered out of local recommendations
|
|
166
|
+
- `ai-run` streams model responses through Ollama and appends measured tokens/sec so users can compare installed models by real local speed.
|
|
167
|
+
- The interactive panel no longer asks for optional parameters before every command.
|
|
168
|
+
|
|
150
169
|
### v3.3.0 Highlights
|
|
151
170
|
|
|
152
171
|
- Calibrated routing is now first-class in `recommend` and `ai-run`:
|
|
@@ -185,7 +204,7 @@ llm-checker check
|
|
|
185
204
|
# 3. Get intelligent recommendations by category
|
|
186
205
|
llm-checker recommend
|
|
187
206
|
|
|
188
|
-
# 4.
|
|
207
|
+
# 4. Refresh the catalog when you want current Ollama metadata
|
|
189
208
|
llm-checker sync
|
|
190
209
|
llm-checker search qwen --use-case coding
|
|
191
210
|
```
|
|
@@ -354,6 +373,7 @@ llm-checker search "qwen coder" --json
|
|
|
354
373
|
| `recommend` | Intelligent recommendations by category (coding, reasoning, multimodal, etc.) |
|
|
355
374
|
| `calibrate` | Generate calibration result + routing policy artifacts from a JSONL prompt suite |
|
|
356
375
|
| `installed` | Rank your installed Ollama models by compatibility |
|
|
376
|
+
| `list-models` | List the synced Ollama catalog by popularity, category, size, or JSON output |
|
|
357
377
|
| `ollama-plan` | Compute safe Ollama runtime env vars (`NUM_CTX`, `NUM_PARALLEL`, `MAX_LOADED_MODELS`) for selected local models |
|
|
358
378
|
| `mcp-setup` | Print/apply Claude MCP setup command and config snippet (`--apply`, `--json`, `--npx`) |
|
|
359
379
|
| `gpu-plan` | Multi-GPU placement advisor with single/pooled model-size envelopes |
|
|
@@ -361,12 +381,12 @@ llm-checker search "qwen coder" --json
|
|
|
361
381
|
| `amd-guard` | AMD/Windows reliability guard with mitigation hints |
|
|
362
382
|
| `toolcheck` | Test tool-calling compatibility for local models |
|
|
363
383
|
|
|
364
|
-
###
|
|
384
|
+
### Database Commands
|
|
365
385
|
|
|
366
386
|
| Command | Description |
|
|
367
387
|
|---------|-------------|
|
|
368
|
-
| `sync` |
|
|
369
|
-
| `search <query>` | Search
|
|
388
|
+
| `sync` | Refresh the local SQLite model catalog from Ollama |
|
|
389
|
+
| `search <query>` | Search the synced catalog with filters and intelligent scoring |
|
|
370
390
|
| `smart-recommend` | Advanced recommendations using the full scoring engine |
|
|
371
391
|
|
|
372
392
|
### Enterprise Policy Commands
|
|
@@ -487,7 +507,29 @@ License values are canonicalized for policy checks (for example `MIT License` ->
|
|
|
487
507
|
| Command | Description |
|
|
488
508
|
|---------|-------------|
|
|
489
509
|
| `ai-check` | AI-powered model evaluation with meta-analysis |
|
|
490
|
-
| `ai-run` | AI-powered model selection and execution |
|
|
510
|
+
| `ai-run` | AI-powered model selection and execution with live tokens/sec output |
|
|
511
|
+
|
|
512
|
+
---
|
|
513
|
+
|
|
514
|
+
### `ai-run` — Auto-Select and Run
|
|
515
|
+
|
|
516
|
+
```bash
|
|
517
|
+
llm-checker ai-run --category coding --prompt "Write a file parser in Node.js"
|
|
518
|
+
llm-checker ai-run --benchmark --category general
|
|
519
|
+
llm-checker ai-run --reference-only --category reasoning
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
`ai-run` chooses the best installed model for the requested category, falls back to the best local alternative when the top catalog pick is not installed, and streams through Ollama directly.
|
|
523
|
+
|
|
524
|
+
When a response completes, the CLI appends measured local speed:
|
|
525
|
+
|
|
526
|
+
```text
|
|
527
|
+
>>> hi
|
|
528
|
+
Hello! How can I help you today?
|
|
529
|
+
[42.8 tokens/sec]
|
|
530
|
+
```
|
|
531
|
+
|
|
532
|
+
Use `--reference-only` when you only want the recommendation card and pull command without starting a chat. Use `--benchmark` for a quick measured speed check on the selected local model.
|
|
491
533
|
|
|
492
534
|
---
|
|
493
535
|
|
|
@@ -586,9 +628,32 @@ llm-checker search qwen --quant Q4_K_M --max-size 8
|
|
|
586
628
|
|
|
587
629
|
## Model Catalog
|
|
588
630
|
|
|
589
|
-
LLM Checker
|
|
631
|
+
LLM Checker ships with a pre-synced SQLite snapshot of the Ollama catalog. On first run, that snapshot is copied to `~/.llm-checker/models.db`, so recommendations and catalog search work immediately after npm install.
|
|
632
|
+
|
|
633
|
+
The packaged snapshot currently includes:
|
|
634
|
+
|
|
635
|
+
- 229 Ollama models
|
|
636
|
+
- 7176 variants
|
|
637
|
+
- pull counts
|
|
638
|
+
- tag counts
|
|
639
|
+
- last-updated metadata
|
|
640
|
+
- variant params, quantization, size, context, and input type fields when available
|
|
641
|
+
|
|
642
|
+
Refresh it any time:
|
|
643
|
+
|
|
644
|
+
```bash
|
|
645
|
+
llm-checker sync
|
|
646
|
+
```
|
|
647
|
+
|
|
648
|
+
For release maintainers, the packaged seed can be regenerated from the synced local DB:
|
|
649
|
+
|
|
650
|
+
```bash
|
|
651
|
+
npm run sync:seed
|
|
652
|
+
```
|
|
653
|
+
|
|
654
|
+
`recommend`, `list-models`, `ai-run`, and `ai-check` prefer the synced SQLite catalog. If the SQLite catalog is unavailable, LLM Checker falls back to the scraped cache and then to the curated catalog.
|
|
590
655
|
|
|
591
|
-
The curated fallback catalog includes 35+ models from the most popular Ollama families
|
|
656
|
+
The curated fallback catalog includes 35+ models from the most popular Ollama families:
|
|
592
657
|
|
|
593
658
|
| Family | Models | Best For |
|
|
594
659
|
|--------|--------|----------|
|
|
@@ -602,7 +667,7 @@ The curated fallback catalog includes 35+ models from the most popular Ollama fa
|
|
|
602
667
|
| **LLaVA** | 7B, 13B | Vision |
|
|
603
668
|
| **Embeddings** | nomic-embed-text, mxbai-embed-large, bge-m3, all-minilm | RAG, search |
|
|
604
669
|
|
|
605
|
-
All available models are automatically combined with locally installed Ollama models for scoring.
|
|
670
|
+
All available models are automatically combined with locally installed Ollama models for scoring. Ambiguous tags such as `latest`, cloud-only variants, and aliases without reliable size metadata are kept out of local recommendations unless they can be resolved to concrete parameters or artifact sizes.
|
|
606
671
|
|
|
607
672
|
---
|
|
608
673
|
|
|
@@ -758,7 +823,7 @@ LLM Checker uses a deterministic pipeline so the same inputs produce the same ra
|
|
|
758
823
|
flowchart LR
|
|
759
824
|
subgraph Inputs
|
|
760
825
|
HW["Hardware detector<br/>CPU/GPU/RAM/backend"]
|
|
761
|
-
REG["
|
|
826
|
+
REG["Synced SQLite Ollama catalog<br/>(packaged seed + live sync)"]
|
|
762
827
|
LOCAL["Installed local models"]
|
|
763
828
|
FLAGS["CLI options<br/>use-case/runtime/limits/policy"]
|
|
764
829
|
end
|
|
@@ -807,7 +872,7 @@ flowchart LR
|
|
|
807
872
|
### Execution Stages
|
|
808
873
|
|
|
809
874
|
1. **Hardware profiling**: Detect CPU/GPU/RAM and effective backend capabilities.
|
|
810
|
-
2. **Model pool assembly**: Merge
|
|
875
|
+
2. **Model pool assembly**: Merge the synced SQLite catalog (or fallback cache/catalog) with locally installed models.
|
|
811
876
|
3. **Candidate filtering**: Keep only relevant models for the requested use case.
|
|
812
877
|
4. **Fit selection**: Choose the best quantization for available memory budget.
|
|
813
878
|
5. **Deterministic scoring**: Score each candidate across quality, speed, fit, and context.
|
|
@@ -874,7 +939,8 @@ src/
|
|
|
874
939
|
detector.js # Hardware detection
|
|
875
940
|
unified-detector.js # Cross-platform detection
|
|
876
941
|
data/
|
|
877
|
-
model-database.js # SQLite storage
|
|
942
|
+
model-database.js # SQLite storage and packaged seed loading
|
|
943
|
+
seed/models.db # npm-packaged Ollama catalog snapshot
|
|
878
944
|
sync-manager.js # Database sync from Ollama registry
|
|
879
945
|
bin/
|
|
880
946
|
enhanced_cli.js # CLI entry point
|
package/bin/cli.js
CHANGED
|
@@ -11,4 +11,44 @@ if (!Number.isFinite(majorNodeVersion) || majorNodeVersion < 16) {
|
|
|
11
11
|
process.exit(1);
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
+
function preprocessAiCheckModelsArg(argv) {
|
|
15
|
+
const normalizedArgs = [];
|
|
16
|
+
let modelsFilter = null;
|
|
17
|
+
let sawAiCheck = false;
|
|
18
|
+
|
|
19
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
20
|
+
const token = argv[index];
|
|
21
|
+
|
|
22
|
+
if (token === 'ai-check') {
|
|
23
|
+
sawAiCheck = true;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (sawAiCheck && token === '--models') {
|
|
27
|
+
const nextToken = argv[index + 1];
|
|
28
|
+
if (nextToken && !nextToken.startsWith('-')) {
|
|
29
|
+
modelsFilter = nextToken;
|
|
30
|
+
index += 1;
|
|
31
|
+
}
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (sawAiCheck && token.startsWith('--models=')) {
|
|
36
|
+
modelsFilter = token.slice('--models='.length);
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
normalizedArgs.push(token);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return { args: normalizedArgs, modelsFilter };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const preprocessedArgs = preprocessAiCheckModelsArg(process.argv.slice(2));
|
|
47
|
+
|
|
48
|
+
if (typeof preprocessedArgs.modelsFilter === 'string' && preprocessedArgs.modelsFilter.trim()) {
|
|
49
|
+
process.env.LLM_CHECKER_AI_CHECK_MODELS = preprocessedArgs.modelsFilter.trim();
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
process.argv = [process.argv[0], process.argv[1], ...preprocessedArgs.args];
|
|
53
|
+
|
|
14
54
|
require('./enhanced_cli');
|