llm-checker 3.4.2 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -2
- package/analyzer/performance.js +40 -94
- package/bin/enhanced_cli.js +320 -254
- package/bin/mcp-server.mjs +0 -0
- package/package.json +1 -1
- package/src/models/ai-check-selector.js +2 -2
- package/src/models/deterministic-selector.js +1 -0
- package/src/models/expanded_database.js +10 -83
- package/src/ollama/client.js +29 -4
- package/src/ui/cli-theme.js +733 -0
- package/src/ui/interactive-panel.js +599 -0
- package/src/utils/fetch.js +17 -0
- package/src/utils/token-speed-estimator.js +207 -0
- package/src/ollama/gpu-placement-planner.js +0 -496
package/README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
<p align="center">
|
|
2
|
-
<img src="
|
|
2
|
+
<img src="assets/llm-checker-logo.gif" alt="LLM Checker Animated Logo" width="760">
|
|
3
3
|
</p>
|
|
4
4
|
|
|
5
5
|
<p align="center">
|
|
@@ -254,6 +254,12 @@ npm install -g llm-checker
|
|
|
254
254
|
claude mcp add llm-checker -- llm-checker-mcp
|
|
255
255
|
```
|
|
256
256
|
|
|
257
|
+
Or generate the exact command directly from the CLI:
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
llm-checker mcp-setup
|
|
261
|
+
```
|
|
262
|
+
|
|
257
263
|
Or with npx (no global install needed):
|
|
258
264
|
|
|
259
265
|
```bash
|
|
@@ -320,6 +326,26 @@ Claude will automatically call the right tools and give you actionable results.
|
|
|
320
326
|
|
|
321
327
|
---
|
|
322
328
|
|
|
329
|
+
## Interactive CLI Panel
|
|
330
|
+
|
|
331
|
+
Running `llm-checker` with no arguments now opens an interactive panel (TTY terminals):
|
|
332
|
+
|
|
333
|
+
- animated startup banner
|
|
334
|
+
- main command list with descriptions
|
|
335
|
+
- type `/` to open all commands
|
|
336
|
+
- use up/down arrows to select a command
|
|
337
|
+
- press `Enter` to execute
|
|
338
|
+
- add optional extra flags before run (example: `--json --limit 5`)
|
|
339
|
+
|
|
340
|
+
For scripting and automation, direct command invocation remains unchanged:
|
|
341
|
+
|
|
342
|
+
```bash
|
|
343
|
+
llm-checker check --use-case coding --limit 3
|
|
344
|
+
llm-checker search "qwen coder" --json
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
---
|
|
348
|
+
|
|
323
349
|
## Commands
|
|
324
350
|
|
|
325
351
|
### Core Commands
|
|
@@ -332,7 +358,7 @@ Claude will automatically call the right tools and give you actionable results.
|
|
|
332
358
|
| `calibrate` | Generate calibration result + routing policy artifacts from a JSONL prompt suite |
|
|
333
359
|
| `installed` | Rank your installed Ollama models by compatibility |
|
|
334
360
|
| `ollama-plan` | Compute safe Ollama runtime env vars (`NUM_CTX`, `NUM_PARALLEL`, `MAX_LOADED_MODELS`) for selected local models |
|
|
335
|
-
| `
|
|
361
|
+
| `mcp-setup` | Print/apply Claude MCP setup command and config snippet (`--apply`, `--json`, `--npx`) |
|
|
336
362
|
|
|
337
363
|
### Advanced Commands (require `sql.js`)
|
|
338
364
|
|
package/analyzer/performance.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
const { estimateTokenSpeedFromHardware } = require('../src/utils/token-speed-estimator');
|
|
2
|
+
|
|
1
3
|
class PerformanceAnalyzer {
|
|
2
4
|
constructor() {
|
|
3
5
|
this.benchmarkCache = new Map();
|
|
@@ -284,113 +286,57 @@ class PerformanceAnalyzer {
|
|
|
284
286
|
}
|
|
285
287
|
|
|
286
288
|
calculateRealisticPerformance(model, hardware) {
|
|
287
|
-
// Parse model size
|
|
288
289
|
const modelSizeB = this.parseModelSize(model.size);
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
const
|
|
294
|
-
const baseSpeed = hardware.cpu?.speed || 2.4;
|
|
295
|
-
const vramGB = hardware.gpu?.vram || 0;
|
|
296
|
-
const memoryTotal = hardware.memory?.total || 8;
|
|
297
|
-
|
|
298
|
-
// Hardware type detection
|
|
299
|
-
const isAppleSilicon = hardware.cpu?.architecture === 'Apple Silicon' || (
|
|
300
|
-
process.platform === 'darwin' && (
|
|
301
|
-
gpuModel.toLowerCase().includes('apple') ||
|
|
302
|
-
gpuModel.toLowerCase().includes('m1') ||
|
|
303
|
-
gpuModel.toLowerCase().includes('m2') ||
|
|
304
|
-
gpuModel.toLowerCase().includes('m3') ||
|
|
305
|
-
gpuModel.toLowerCase().includes('m4')
|
|
306
|
-
)
|
|
307
|
-
);
|
|
308
|
-
const isIntegratedGPU = /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(gpuModel);
|
|
309
|
-
const hasDedicatedGPU = vramGB > 0 && !isIntegratedGPU && !isAppleSilicon;
|
|
310
|
-
|
|
311
|
-
let tokensPerSecond;
|
|
312
|
-
|
|
313
|
-
if (isAppleSilicon) {
|
|
314
|
-
// Apple Silicon - realistic but optimistic due to unified memory
|
|
315
|
-
let baseTPS = 20; // More realistic baseline
|
|
316
|
-
if (gpuModel.toLowerCase().includes('m4 pro')) baseTPS = 30;
|
|
317
|
-
else if (gpuModel.toLowerCase().includes('m4')) baseTPS = 25;
|
|
318
|
-
else if (gpuModel.toLowerCase().includes('m3 pro')) baseTPS = 28;
|
|
319
|
-
else if (gpuModel.toLowerCase().includes('m3')) baseTPS = 22;
|
|
320
|
-
else if (gpuModel.toLowerCase().includes('m2 pro')) baseTPS = 25;
|
|
321
|
-
else if (gpuModel.toLowerCase().includes('m2')) baseTPS = 20;
|
|
322
|
-
else if (gpuModel.toLowerCase().includes('m1 pro')) baseTPS = 22;
|
|
323
|
-
else if (gpuModel.toLowerCase().includes('m1')) baseTPS = 18;
|
|
324
|
-
|
|
325
|
-
// Memory scaling for Apple Silicon
|
|
326
|
-
if (memoryTotal >= 64) baseTPS *= 1.2;
|
|
327
|
-
else if (memoryTotal >= 32) baseTPS *= 1.1;
|
|
328
|
-
|
|
329
|
-
tokensPerSecond = Math.max(6, Math.round(baseTPS / Math.max(0.7, modelSizeB)));
|
|
330
|
-
|
|
331
|
-
} else if (hasDedicatedGPU) {
|
|
332
|
-
// Dedicated GPU - much better but still realistic
|
|
333
|
-
let gpuTPS = 25;
|
|
334
|
-
if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 60;
|
|
335
|
-
else if (gpuModel.toLowerCase().includes('rtx 40')) gpuTPS = 45;
|
|
336
|
-
else if (gpuModel.toLowerCase().includes('rtx 30')) gpuTPS = 35;
|
|
337
|
-
else if (gpuModel.toLowerCase().includes('rtx 20')) gpuTPS = 28;
|
|
338
|
-
else if (vramGB >= 16) gpuTPS = 40;
|
|
339
|
-
else if (vramGB >= 8) gpuTPS = 30;
|
|
340
|
-
else if (vramGB >= 4) gpuTPS = 25;
|
|
341
|
-
|
|
342
|
-
tokensPerSecond = Math.max(8, Math.round(gpuTPS / Math.max(0.4, modelSizeB)));
|
|
343
|
-
|
|
344
|
-
} else {
|
|
345
|
-
// CPU-only or integrated GPU - most conservative and realistic
|
|
346
|
-
const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
|
|
347
|
-
(cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
|
|
348
|
-
const hasAVX2 = cpuModel.toLowerCase().includes('intel') || cpuModel.toLowerCase().includes('amd');
|
|
349
|
-
|
|
350
|
-
// Base CPU performance - very conservative
|
|
351
|
-
let cpuK = 1.2; // Much more realistic
|
|
352
|
-
if (hasAVX512) cpuK = 2.0;
|
|
353
|
-
else if (hasAVX2) cpuK = 1.6;
|
|
354
|
-
|
|
355
|
-
// Threading efficiency (realistic diminishing returns)
|
|
356
|
-
const effectiveThreads = Math.min(cores, 6); // CPU inference doesn't scale linearly
|
|
357
|
-
|
|
358
|
-
// iGPU small boost
|
|
359
|
-
const iGpuMultiplier = isIntegratedGPU ? 1.2 : 1.0;
|
|
360
|
-
|
|
361
|
-
// Memory pressure factor
|
|
362
|
-
const memoryPressure = Math.min(1.0, Math.max(0.6, memoryTotal / (modelSizeB * 2)));
|
|
363
|
-
|
|
364
|
-
const baseTPS = (cpuK * baseSpeed * effectiveThreads * iGpuMultiplier * memoryPressure) / Math.max(2.0, modelSizeB);
|
|
365
|
-
|
|
366
|
-
// Realistic CPU caps based on hardware
|
|
367
|
-
const maxCPUTPS = hasAVX512 ? 18 : (isIntegratedGPU ? 12 : 8);
|
|
368
|
-
tokensPerSecond = Math.max(1, Math.min(maxCPUTPS, Math.round(baseTPS)));
|
|
369
|
-
}
|
|
290
|
+
const speedProfile = estimateTokenSpeedFromHardware(hardware, {
|
|
291
|
+
modelSizeB,
|
|
292
|
+
modelName: model.name
|
|
293
|
+
});
|
|
294
|
+
const tokensPerSecond = speedProfile.tokensPerSecond;
|
|
370
295
|
|
|
371
296
|
return {
|
|
372
|
-
estimatedTokensPerSecond:
|
|
297
|
+
estimatedTokensPerSecond: tokensPerSecond,
|
|
373
298
|
confidence: this.calculateConfidence(hardware, model),
|
|
374
299
|
factors: {
|
|
375
|
-
cpu:
|
|
376
|
-
memory:
|
|
377
|
-
gpu:
|
|
300
|
+
cpu: hardware.cpu?.brand || hardware.cpu?.model || 'Unknown CPU',
|
|
301
|
+
memory: hardware.memory?.total || 0,
|
|
302
|
+
gpu: speedProfile.backend,
|
|
378
303
|
modelSize: modelSizeB,
|
|
379
|
-
architecture:
|
|
304
|
+
architecture: hardware.cpu?.architecture || 'unknown',
|
|
305
|
+
sizeScale: speedProfile.sizeScale,
|
|
306
|
+
memoryFactor: speedProfile.memoryFactor
|
|
380
307
|
},
|
|
381
|
-
category: this.categorizePerformance(
|
|
308
|
+
category: this.categorizePerformance(tokensPerSecond),
|
|
382
309
|
loadTimeEstimate: this.estimateLoadTime(model, hardware)
|
|
383
310
|
};
|
|
384
311
|
}
|
|
385
312
|
|
|
386
313
|
parseModelSize(sizeString) {
|
|
387
|
-
|
|
388
|
-
|
|
314
|
+
if (typeof sizeString !== 'string' || !sizeString.trim()) return 1;
|
|
315
|
+
|
|
316
|
+
const normalized = sizeString.trim().toUpperCase();
|
|
389
317
|
|
|
390
|
-
|
|
391
|
-
const
|
|
318
|
+
// Parameter notation (e.g. 8B, 774M)
|
|
319
|
+
const paramMatch = normalized.match(/(\d+\.?\d*)\s*([BM])\b/);
|
|
320
|
+
if (paramMatch) {
|
|
321
|
+
const num = parseFloat(paramMatch[1]);
|
|
322
|
+
const unit = paramMatch[2];
|
|
323
|
+
return unit === 'B' ? num : num / 1000;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// File-size notation fallback (e.g. 4.9GB) -> rough Q4 param estimate
|
|
327
|
+
const gbMatch = normalized.match(/(\d+\.?\d*)\s*GB\b/);
|
|
328
|
+
if (gbMatch) {
|
|
329
|
+
const sizeGB = parseFloat(gbMatch[1]);
|
|
330
|
+
return Math.max(0.5, sizeGB / 0.62);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
const mbMatch = normalized.match(/(\d+\.?\d*)\s*MB\b/);
|
|
334
|
+
if (mbMatch) {
|
|
335
|
+
const sizeGB = parseFloat(mbMatch[1]) / 1024;
|
|
336
|
+
return Math.max(0.5, sizeGB / 0.62);
|
|
337
|
+
}
|
|
392
338
|
|
|
393
|
-
return
|
|
339
|
+
return 1;
|
|
394
340
|
}
|
|
395
341
|
|
|
396
342
|
calculateConfidence(hardware, model) {
|
|
@@ -502,4 +448,4 @@ class PerformanceAnalyzer {
|
|
|
502
448
|
}
|
|
503
449
|
}
|
|
504
450
|
|
|
505
|
-
module.exports = PerformanceAnalyzer;
|
|
451
|
+
module.exports = PerformanceAnalyzer;
|