llm-checker 3.4.2 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,16 +1,12 @@
1
1
  <p align="center">
2
- <img src="llmlogo.jpg" alt="LLM Checker Logo" width="200">
2
+ <img src="https://raw.githubusercontent.com/Pavelevich/llm-checker/main/assets/llm-checker-logo.gif" alt="LLM Checker Animated Logo" width="760">
3
3
  </p>
4
4
 
5
+ <h1 align="center">LLM Checker</h1>
6
+ <p align="center"><strong>Intelligent Ollama Model Selector</strong></p>
5
7
  <p align="center">
6
- <h1 align="center">LLM Checker</h1>
7
- <p align="center">
8
- <strong>Intelligent Ollama Model Selector</strong>
9
- </p>
10
- <p align="center">
11
- AI-powered CLI that analyzes your hardware and recommends optimal LLM models<br/>
12
- Deterministic scoring across <b>200+ dynamic models</b> (35+ curated fallback) with hardware-calibrated memory estimation
13
- </p>
8
+ AI-powered CLI that analyzes your hardware and recommends optimal LLM models<br/>
9
+ Deterministic scoring across <b>200+ dynamic models</b> (35+ curated fallback) with hardware-calibrated memory estimation
14
10
  </p>
15
11
 
16
12
  <p align="center">
@@ -254,6 +250,12 @@ npm install -g llm-checker
254
250
  claude mcp add llm-checker -- llm-checker-mcp
255
251
  ```
256
252
 
253
+ Or generate the exact command directly from the CLI:
254
+
255
+ ```bash
256
+ llm-checker mcp-setup
257
+ ```
258
+
257
259
  Or with npx (no global install needed):
258
260
 
259
261
  ```bash
@@ -320,6 +322,26 @@ Claude will automatically call the right tools and give you actionable results.
320
322
 
321
323
  ---
322
324
 
325
+ ## Interactive CLI Panel
326
+
327
+ Running `llm-checker` with no arguments now opens an interactive panel (TTY terminals):
328
+
329
+ - animated startup banner
330
+ - main command list with descriptions
331
+ - type `/` to open all commands
332
+ - use up/down arrows to select a command
333
+ - press `Enter` to execute
334
+ - add optional extra flags before run (example: `--json --limit 5`)
335
+
336
+ For scripting and automation, direct command invocation remains unchanged:
337
+
338
+ ```bash
339
+ llm-checker check --use-case coding --limit 3
340
+ llm-checker search "qwen coder" --json
341
+ ```
342
+
343
+ ---
344
+
323
345
  ## Commands
324
346
 
325
347
  ### Core Commands
@@ -332,7 +354,7 @@ Claude will automatically call the right tools and give you actionable results.
332
354
  | `calibrate` | Generate calibration result + routing policy artifacts from a JSONL prompt suite |
333
355
  | `installed` | Rank your installed Ollama models by compatibility |
334
356
  | `ollama-plan` | Compute safe Ollama runtime env vars (`NUM_CTX`, `NUM_PARALLEL`, `MAX_LOADED_MODELS`) for selected local models |
335
- | `gpu-plan` | Simulate `pin`/`replica`/`spread` multi-GPU placement with memory-fit and throughput estimates per model |
357
+ | `mcp-setup` | Print/apply Claude MCP setup command and config snippet (`--apply`, `--json`, `--npx`) |
336
358
 
337
359
  ### Advanced Commands (require `sql.js`)
338
360
 
@@ -1,3 +1,5 @@
1
+ const { estimateTokenSpeedFromHardware } = require('../src/utils/token-speed-estimator');
2
+
1
3
  class PerformanceAnalyzer {
2
4
  constructor() {
3
5
  this.benchmarkCache = new Map();
@@ -284,113 +286,57 @@ class PerformanceAnalyzer {
284
286
  }
285
287
 
286
288
  calculateRealisticPerformance(model, hardware) {
287
- // Parse model size
288
289
  const modelSizeB = this.parseModelSize(model.size);
289
-
290
- // Get hardware specifics
291
- const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
292
- const gpuModel = hardware.gpu?.model || '';
293
- const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
294
- const baseSpeed = hardware.cpu?.speed || 2.4;
295
- const vramGB = hardware.gpu?.vram || 0;
296
- const memoryTotal = hardware.memory?.total || 8;
297
-
298
- // Hardware type detection
299
- const isAppleSilicon = hardware.cpu?.architecture === 'Apple Silicon' || (
300
- process.platform === 'darwin' && (
301
- gpuModel.toLowerCase().includes('apple') ||
302
- gpuModel.toLowerCase().includes('m1') ||
303
- gpuModel.toLowerCase().includes('m2') ||
304
- gpuModel.toLowerCase().includes('m3') ||
305
- gpuModel.toLowerCase().includes('m4')
306
- )
307
- );
308
- const isIntegratedGPU = /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(gpuModel);
309
- const hasDedicatedGPU = vramGB > 0 && !isIntegratedGPU && !isAppleSilicon;
310
-
311
- let tokensPerSecond;
312
-
313
- if (isAppleSilicon) {
314
- // Apple Silicon - realistic but optimistic due to unified memory
315
- let baseTPS = 20; // More realistic baseline
316
- if (gpuModel.toLowerCase().includes('m4 pro')) baseTPS = 30;
317
- else if (gpuModel.toLowerCase().includes('m4')) baseTPS = 25;
318
- else if (gpuModel.toLowerCase().includes('m3 pro')) baseTPS = 28;
319
- else if (gpuModel.toLowerCase().includes('m3')) baseTPS = 22;
320
- else if (gpuModel.toLowerCase().includes('m2 pro')) baseTPS = 25;
321
- else if (gpuModel.toLowerCase().includes('m2')) baseTPS = 20;
322
- else if (gpuModel.toLowerCase().includes('m1 pro')) baseTPS = 22;
323
- else if (gpuModel.toLowerCase().includes('m1')) baseTPS = 18;
324
-
325
- // Memory scaling for Apple Silicon
326
- if (memoryTotal >= 64) baseTPS *= 1.2;
327
- else if (memoryTotal >= 32) baseTPS *= 1.1;
328
-
329
- tokensPerSecond = Math.max(6, Math.round(baseTPS / Math.max(0.7, modelSizeB)));
330
-
331
- } else if (hasDedicatedGPU) {
332
- // Dedicated GPU - much better but still realistic
333
- let gpuTPS = 25;
334
- if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 60;
335
- else if (gpuModel.toLowerCase().includes('rtx 40')) gpuTPS = 45;
336
- else if (gpuModel.toLowerCase().includes('rtx 30')) gpuTPS = 35;
337
- else if (gpuModel.toLowerCase().includes('rtx 20')) gpuTPS = 28;
338
- else if (vramGB >= 16) gpuTPS = 40;
339
- else if (vramGB >= 8) gpuTPS = 30;
340
- else if (vramGB >= 4) gpuTPS = 25;
341
-
342
- tokensPerSecond = Math.max(8, Math.round(gpuTPS / Math.max(0.4, modelSizeB)));
343
-
344
- } else {
345
- // CPU-only or integrated GPU - most conservative and realistic
346
- const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
347
- (cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
348
- const hasAVX2 = cpuModel.toLowerCase().includes('intel') || cpuModel.toLowerCase().includes('amd');
349
-
350
- // Base CPU performance - very conservative
351
- let cpuK = 1.2; // Much more realistic
352
- if (hasAVX512) cpuK = 2.0;
353
- else if (hasAVX2) cpuK = 1.6;
354
-
355
- // Threading efficiency (realistic diminishing returns)
356
- const effectiveThreads = Math.min(cores, 6); // CPU inference doesn't scale linearly
357
-
358
- // iGPU small boost
359
- const iGpuMultiplier = isIntegratedGPU ? 1.2 : 1.0;
360
-
361
- // Memory pressure factor
362
- const memoryPressure = Math.min(1.0, Math.max(0.6, memoryTotal / (modelSizeB * 2)));
363
-
364
- const baseTPS = (cpuK * baseSpeed * effectiveThreads * iGpuMultiplier * memoryPressure) / Math.max(2.0, modelSizeB);
365
-
366
- // Realistic CPU caps based on hardware
367
- const maxCPUTPS = hasAVX512 ? 18 : (isIntegratedGPU ? 12 : 8);
368
- tokensPerSecond = Math.max(1, Math.min(maxCPUTPS, Math.round(baseTPS)));
369
- }
290
+ const speedProfile = estimateTokenSpeedFromHardware(hardware, {
291
+ modelSizeB,
292
+ modelName: model.name
293
+ });
294
+ const tokensPerSecond = speedProfile.tokensPerSecond;
370
295
 
371
296
  return {
372
- estimatedTokensPerSecond: Math.round(tokensPerSecond),
297
+ estimatedTokensPerSecond: tokensPerSecond,
373
298
  confidence: this.calculateConfidence(hardware, model),
374
299
  factors: {
375
- cpu: cpuModel,
376
- memory: memoryTotal,
377
- gpu: hasDedicatedGPU ? 'dedicated' : (isIntegratedGPU ? 'integrated' : 'cpu_only'),
300
+ cpu: hardware.cpu?.brand || hardware.cpu?.model || 'Unknown CPU',
301
+ memory: hardware.memory?.total || 0,
302
+ gpu: speedProfile.backend,
378
303
  modelSize: modelSizeB,
379
- architecture: isAppleSilicon ? 'Apple Silicon' : 'x86'
304
+ architecture: hardware.cpu?.architecture || 'unknown',
305
+ sizeScale: speedProfile.sizeScale,
306
+ memoryFactor: speedProfile.memoryFactor
380
307
  },
381
- category: this.categorizePerformance(Math.round(tokensPerSecond)),
308
+ category: this.categorizePerformance(tokensPerSecond),
382
309
  loadTimeEstimate: this.estimateLoadTime(model, hardware)
383
310
  };
384
311
  }
385
312
 
386
313
  parseModelSize(sizeString) {
387
- const match = sizeString.match(/(\d+\.?\d*)[BM]/i);
388
- if (!match) return 1;
314
+ if (typeof sizeString !== 'string' || !sizeString.trim()) return 1;
315
+
316
+ const normalized = sizeString.trim().toUpperCase();
389
317
 
390
- const num = parseFloat(match[1]);
391
- const unit = match[0].slice(-1).toUpperCase();
318
+ // Parameter notation (e.g. 8B, 774M)
319
+ const paramMatch = normalized.match(/(\d+\.?\d*)\s*([BM])\b/);
320
+ if (paramMatch) {
321
+ const num = parseFloat(paramMatch[1]);
322
+ const unit = paramMatch[2];
323
+ return unit === 'B' ? num : num / 1000;
324
+ }
325
+
326
+ // File-size notation fallback (e.g. 4.9GB) -> rough Q4 param estimate
327
+ const gbMatch = normalized.match(/(\d+\.?\d*)\s*GB\b/);
328
+ if (gbMatch) {
329
+ const sizeGB = parseFloat(gbMatch[1]);
330
+ return Math.max(0.5, sizeGB / 0.62);
331
+ }
332
+
333
+ const mbMatch = normalized.match(/(\d+\.?\d*)\s*MB\b/);
334
+ if (mbMatch) {
335
+ const sizeGB = parseFloat(mbMatch[1]) / 1024;
336
+ return Math.max(0.5, sizeGB / 0.62);
337
+ }
392
338
 
393
- return unit === 'B' ? num : num / 1000; // Convert M to B
339
+ return 1;
394
340
  }
395
341
 
396
342
  calculateConfidence(hardware, model) {
@@ -502,4 +448,4 @@ class PerformanceAnalyzer {
502
448
  }
503
449
  }
504
450
 
505
- module.exports = PerformanceAnalyzer;
451
+ module.exports = PerformanceAnalyzer;