llm-checker 3.4.1 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,5 @@
1
1
  <p align="center">
2
- <img src="llmlogo.jpg" alt="LLM Checker Logo" width="200">
2
+ <img src="assets/llm-checker-logo.gif" alt="LLM Checker Animated Logo" width="760">
3
3
  </p>
4
4
 
5
5
  <p align="center">
@@ -254,6 +254,12 @@ npm install -g llm-checker
254
254
  claude mcp add llm-checker -- llm-checker-mcp
255
255
  ```
256
256
 
257
+ Or generate the exact command directly from the CLI:
258
+
259
+ ```bash
260
+ llm-checker mcp-setup
261
+ ```
262
+
257
263
  Or with npx (no global install needed):
258
264
 
259
265
  ```bash
@@ -320,6 +326,26 @@ Claude will automatically call the right tools and give you actionable results.
320
326
 
321
327
  ---
322
328
 
329
+ ## Interactive CLI Panel
330
+
331
+ Running `llm-checker` with no arguments now opens an interactive panel (TTY terminals):
332
+
333
+ - animated startup banner
334
+ - main command list with descriptions
335
+ - type `/` to open all commands
336
+ - use up/down arrows to select a command
337
+ - press `Enter` to execute
338
+ - add optional extra flags before run (example: `--json --limit 5`)
339
+
340
+ For scripting and automation, direct command invocation remains unchanged:
341
+
342
+ ```bash
343
+ llm-checker check --use-case coding --limit 3
344
+ llm-checker search "qwen coder" --json
345
+ ```
346
+
347
+ ---
348
+
323
349
  ## Commands
324
350
 
325
351
  ### Core Commands
@@ -332,6 +358,7 @@ Claude will automatically call the right tools and give you actionable results.
332
358
  | `calibrate` | Generate calibration result + routing policy artifacts from a JSONL prompt suite |
333
359
  | `installed` | Rank your installed Ollama models by compatibility |
334
360
  | `ollama-plan` | Compute safe Ollama runtime env vars (`NUM_CTX`, `NUM_PARALLEL`, `MAX_LOADED_MODELS`) for selected local models |
361
+ | `mcp-setup` | Print/apply Claude MCP setup command and config snippet (`--apply`, `--json`, `--npx`) |
335
362
 
336
363
  ### Advanced Commands (require `sql.js`)
337
364
 
@@ -1,3 +1,5 @@
1
+ const { estimateTokenSpeedFromHardware } = require('../src/utils/token-speed-estimator');
2
+
1
3
  class PerformanceAnalyzer {
2
4
  constructor() {
3
5
  this.benchmarkCache = new Map();
@@ -284,113 +286,57 @@ class PerformanceAnalyzer {
284
286
  }
285
287
 
286
288
  calculateRealisticPerformance(model, hardware) {
287
- // Parse model size
288
289
  const modelSizeB = this.parseModelSize(model.size);
289
-
290
- // Get hardware specifics
291
- const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
292
- const gpuModel = hardware.gpu?.model || '';
293
- const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
294
- const baseSpeed = hardware.cpu?.speed || 2.4;
295
- const vramGB = hardware.gpu?.vram || 0;
296
- const memoryTotal = hardware.memory?.total || 8;
297
-
298
- // Hardware type detection
299
- const isAppleSilicon = hardware.cpu?.architecture === 'Apple Silicon' || (
300
- process.platform === 'darwin' && (
301
- gpuModel.toLowerCase().includes('apple') ||
302
- gpuModel.toLowerCase().includes('m1') ||
303
- gpuModel.toLowerCase().includes('m2') ||
304
- gpuModel.toLowerCase().includes('m3') ||
305
- gpuModel.toLowerCase().includes('m4')
306
- )
307
- );
308
- const isIntegratedGPU = /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(gpuModel);
309
- const hasDedicatedGPU = vramGB > 0 && !isIntegratedGPU && !isAppleSilicon;
310
-
311
- let tokensPerSecond;
312
-
313
- if (isAppleSilicon) {
314
- // Apple Silicon - realistic but optimistic due to unified memory
315
- let baseTPS = 20; // More realistic baseline
316
- if (gpuModel.toLowerCase().includes('m4 pro')) baseTPS = 30;
317
- else if (gpuModel.toLowerCase().includes('m4')) baseTPS = 25;
318
- else if (gpuModel.toLowerCase().includes('m3 pro')) baseTPS = 28;
319
- else if (gpuModel.toLowerCase().includes('m3')) baseTPS = 22;
320
- else if (gpuModel.toLowerCase().includes('m2 pro')) baseTPS = 25;
321
- else if (gpuModel.toLowerCase().includes('m2')) baseTPS = 20;
322
- else if (gpuModel.toLowerCase().includes('m1 pro')) baseTPS = 22;
323
- else if (gpuModel.toLowerCase().includes('m1')) baseTPS = 18;
324
-
325
- // Memory scaling for Apple Silicon
326
- if (memoryTotal >= 64) baseTPS *= 1.2;
327
- else if (memoryTotal >= 32) baseTPS *= 1.1;
328
-
329
- tokensPerSecond = Math.max(6, Math.round(baseTPS / Math.max(0.7, modelSizeB)));
330
-
331
- } else if (hasDedicatedGPU) {
332
- // Dedicated GPU - much better but still realistic
333
- let gpuTPS = 25;
334
- if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 60;
335
- else if (gpuModel.toLowerCase().includes('rtx 40')) gpuTPS = 45;
336
- else if (gpuModel.toLowerCase().includes('rtx 30')) gpuTPS = 35;
337
- else if (gpuModel.toLowerCase().includes('rtx 20')) gpuTPS = 28;
338
- else if (vramGB >= 16) gpuTPS = 40;
339
- else if (vramGB >= 8) gpuTPS = 30;
340
- else if (vramGB >= 4) gpuTPS = 25;
341
-
342
- tokensPerSecond = Math.max(8, Math.round(gpuTPS / Math.max(0.4, modelSizeB)));
343
-
344
- } else {
345
- // CPU-only or integrated GPU - most conservative and realistic
346
- const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
347
- (cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
348
- const hasAVX2 = cpuModel.toLowerCase().includes('intel') || cpuModel.toLowerCase().includes('amd');
349
-
350
- // Base CPU performance - very conservative
351
- let cpuK = 1.2; // Much more realistic
352
- if (hasAVX512) cpuK = 2.0;
353
- else if (hasAVX2) cpuK = 1.6;
354
-
355
- // Threading efficiency (realistic diminishing returns)
356
- const effectiveThreads = Math.min(cores, 6); // CPU inference doesn't scale linearly
357
-
358
- // iGPU small boost
359
- const iGpuMultiplier = isIntegratedGPU ? 1.2 : 1.0;
360
-
361
- // Memory pressure factor
362
- const memoryPressure = Math.min(1.0, Math.max(0.6, memoryTotal / (modelSizeB * 2)));
363
-
364
- const baseTPS = (cpuK * baseSpeed * effectiveThreads * iGpuMultiplier * memoryPressure) / Math.max(2.0, modelSizeB);
365
-
366
- // Realistic CPU caps based on hardware
367
- const maxCPUTPS = hasAVX512 ? 18 : (isIntegratedGPU ? 12 : 8);
368
- tokensPerSecond = Math.max(1, Math.min(maxCPUTPS, Math.round(baseTPS)));
369
- }
290
+ const speedProfile = estimateTokenSpeedFromHardware(hardware, {
291
+ modelSizeB,
292
+ modelName: model.name
293
+ });
294
+ const tokensPerSecond = speedProfile.tokensPerSecond;
370
295
 
371
296
  return {
372
- estimatedTokensPerSecond: Math.round(tokensPerSecond),
297
+ estimatedTokensPerSecond: tokensPerSecond,
373
298
  confidence: this.calculateConfidence(hardware, model),
374
299
  factors: {
375
- cpu: cpuModel,
376
- memory: memoryTotal,
377
- gpu: hasDedicatedGPU ? 'dedicated' : (isIntegratedGPU ? 'integrated' : 'cpu_only'),
300
+ cpu: hardware.cpu?.brand || hardware.cpu?.model || 'Unknown CPU',
301
+ memory: hardware.memory?.total || 0,
302
+ gpu: speedProfile.backend,
378
303
  modelSize: modelSizeB,
379
- architecture: isAppleSilicon ? 'Apple Silicon' : 'x86'
304
+ architecture: hardware.cpu?.architecture || 'unknown',
305
+ sizeScale: speedProfile.sizeScale,
306
+ memoryFactor: speedProfile.memoryFactor
380
307
  },
381
- category: this.categorizePerformance(Math.round(tokensPerSecond)),
308
+ category: this.categorizePerformance(tokensPerSecond),
382
309
  loadTimeEstimate: this.estimateLoadTime(model, hardware)
383
310
  };
384
311
  }
385
312
 
386
313
  parseModelSize(sizeString) {
387
- const match = sizeString.match(/(\d+\.?\d*)[BM]/i);
388
- if (!match) return 1;
314
+ if (typeof sizeString !== 'string' || !sizeString.trim()) return 1;
315
+
316
+ const normalized = sizeString.trim().toUpperCase();
389
317
 
390
- const num = parseFloat(match[1]);
391
- const unit = match[0].slice(-1).toUpperCase();
318
+ // Parameter notation (e.g. 8B, 774M)
319
+ const paramMatch = normalized.match(/(\d+\.?\d*)\s*([BM])\b/);
320
+ if (paramMatch) {
321
+ const num = parseFloat(paramMatch[1]);
322
+ const unit = paramMatch[2];
323
+ return unit === 'B' ? num : num / 1000;
324
+ }
325
+
326
+ // File-size notation fallback (e.g. 4.9GB) -> rough Q4 param estimate
327
+ const gbMatch = normalized.match(/(\d+\.?\d*)\s*GB\b/);
328
+ if (gbMatch) {
329
+ const sizeGB = parseFloat(gbMatch[1]);
330
+ return Math.max(0.5, sizeGB / 0.62);
331
+ }
332
+
333
+ const mbMatch = normalized.match(/(\d+\.?\d*)\s*MB\b/);
334
+ if (mbMatch) {
335
+ const sizeGB = parseFloat(mbMatch[1]) / 1024;
336
+ return Math.max(0.5, sizeGB / 0.62);
337
+ }
392
338
 
393
- return unit === 'B' ? num : num / 1000; // Convert M to B
339
+ return 1;
394
340
  }
395
341
 
396
342
  calculateConfidence(hardware, model) {
@@ -502,4 +448,4 @@ class PerformanceAnalyzer {
502
448
  }
503
449
  }
504
450
 
505
- module.exports = PerformanceAnalyzer;
451
+ module.exports = PerformanceAnalyzer;