llm-checker 3.5.8 → 3.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -187,7 +187,7 @@ const ALLOWED_CLI_COMMANDS = new Set([
187
187
 
188
188
  const server = new McpServer({
189
189
  name: "llm-checker",
190
- version: "3.5.8",
190
+ version: "3.5.9",
191
191
  });
192
192
 
193
193
  // ============================================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.5.8",
3
+ "version": "3.5.9",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
6
  "llm-checker": "bin/cli.js",
@@ -7,15 +7,16 @@
7
7
 
8
8
  const DeterministicModelSelector = require('./deterministic-selector');
9
9
  const { OllamaNativeScraper } = require('../ollama/native-scraper');
10
+ const OllamaClient = require('../ollama/client');
10
11
  const crypto = require('crypto');
11
12
  const fs = require('fs');
12
13
  const path = require('path');
13
- const fetch = require('../utils/fetch');
14
14
  const { evaluateFineTuningSupport } = require('./fine-tuning-support');
15
15
 
16
16
  class AICheckSelector {
17
17
  constructor() {
18
18
  this.deterministicSelector = new DeterministicModelSelector();
19
+ this.ollamaClient = new OllamaClient();
19
20
  this.ollamaScraper = new OllamaNativeScraper();
20
21
  this.cachePath = path.join(require('os').homedir(), '.llm-checker', 'ai-check-cache.json');
21
22
 
@@ -389,17 +390,10 @@ Return JSON with this structure:
389
390
  ]
390
391
  };
391
392
 
392
- const response = await fetch('http://localhost:11434/api/chat', {
393
- method: 'POST',
394
- headers: { 'Content-Type': 'application/json' },
395
- body: JSON.stringify(requestBody)
393
+ const data = await this.ollamaClient.chat(modelId, requestBody.messages, {
394
+ timeoutMs: 45000,
395
+ generationOptions: requestBody.options
396
396
  });
397
-
398
- if (!response.ok) {
399
- throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
400
- }
401
-
402
- const data = await response.json();
403
397
 
404
398
  if (!data.message || !data.message.content) {
405
399
  throw new Error(`Invalid response from Ollama API: ${JSON.stringify(data)}`);
@@ -9,7 +9,7 @@ const fs = require('fs');
9
9
  const path = require('path');
10
10
  const os = require('os');
11
11
  const { spawn } = require('child_process');
12
- const fetch = require('../utils/fetch');
12
+ const OllamaClient = require('../ollama/client');
13
13
  const { DETERMINISTIC_WEIGHTS } = require('./scoring-config');
14
14
  const {
15
15
  parseBillionsValue: parseMoEBillionsValue,
@@ -24,6 +24,7 @@ class DeterministicModelSelector {
24
24
  constructor() {
25
25
  this.catalogPath = path.join(__dirname, 'catalog.json');
26
26
  this.benchCachePath = path.join(os.homedir(), '.llm-checker', 'bench.json');
27
+ this.ollamaClient = new OllamaClient();
27
28
  this.ollamaCachePaths = [
28
29
  path.join(os.homedir(), '.llm-checker', 'cache', 'ollama', 'ollama-detailed-models.json'),
29
30
  path.join(__dirname, '../ollama/.cache/ollama-detailed-models.json')
@@ -2018,33 +2019,23 @@ class DeterministicModelSelector {
2018
2019
 
2019
2020
  const prompt = prompts[category] || prompts['general'];
2020
2021
  const targetTokens = 128;
2021
-
2022
- const startTime = Date.now();
2023
-
2024
- // Make HTTP request to Ollama API
2025
- const response = await fetch('http://localhost:11434/api/generate', {
2026
- method: 'POST',
2027
- headers: { 'Content-Type': 'application/json' },
2028
- body: JSON.stringify({
2029
- model: modelId,
2030
- prompt: prompt,
2031
- stream: false,
2032
- options: {
2033
- num_predict: targetTokens
2034
- }
2035
- })
2022
+
2023
+ const result = await this.ollamaClient.generate(modelId, prompt, {
2024
+ generationOptions: {
2025
+ num_predict: targetTokens
2026
+ }
2036
2027
  });
2037
-
2038
- if (!response.ok) {
2039
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
2028
+
2029
+ if (Number.isFinite(result.tokensPerSecond) && result.tokensPerSecond > 0) {
2030
+ return result.tokensPerSecond;
2040
2031
  }
2041
-
2042
- const result = await response.json();
2043
- const elapsedSeconds = (Date.now() - startTime) / 1000;
2044
-
2045
- // Estimate tokens generated (simplified)
2046
- const tokensGenerated = result.response ? result.response.split(' ').length * 1.3 : targetTokens;
2047
-
2032
+
2033
+ const elapsedSeconds = Math.max(0.001, Number(result.responseTime || 0) / 1000);
2034
+ const estimatedResponseTokens = result.response
2035
+ ? result.response.split(/\s+/).filter(Boolean).length * 1.3
2036
+ : targetTokens;
2037
+ const tokensGenerated = Number(result.eval_count) || estimatedResponseTokens;
2038
+
2048
2039
  return tokensGenerated / elapsedSeconds;
2049
2040
  }
2050
2041
 
@@ -3,9 +3,9 @@ const fetch = require('../utils/fetch');
3
3
  class OllamaClient {
4
4
  constructor(baseURL = null) {
5
5
  // Support OLLAMA_HOST environment variable (standard Ollama configuration)
6
- // Also support OLLAMA_URL for backwards compatibility
6
+ // Also support OLLAMA_BASE_URL and OLLAMA_URL for backwards compatibility
7
7
  this.preferredBaseURL = this.normalizeBaseURL(
8
- baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434'
8
+ baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || process.env.OLLAMA_URL || 'http://localhost:11434'
9
9
  );
10
10
  this.baseURL = this.preferredBaseURL;
11
11
 
@@ -473,51 +473,86 @@ class OllamaClient {
473
473
  };
474
474
  }
475
475
 
476
- async testModelPerformance(modelName, testPrompt = "Hello, how are you?") {
476
+ async generate(modelName, prompt, options = {}) {
477
477
  const availability = await this.checkOllamaAvailability();
478
478
  if (!availability.available) {
479
479
  throw new Error(`Ollama not available: ${availability.error}`);
480
480
  }
481
481
 
482
+ const {
483
+ timeoutMs = 30000,
484
+ stream = false,
485
+ keepAlive,
486
+ format,
487
+ generationOptions = {}
488
+ } = options;
489
+
490
+ const payload = {
491
+ model: modelName,
492
+ prompt,
493
+ stream: Boolean(stream)
494
+ };
495
+
496
+ if (keepAlive) payload.keep_alive = keepAlive;
497
+ if (format) payload.format = format;
498
+ if (generationOptions && Object.keys(generationOptions).length > 0) {
499
+ payload.options = generationOptions;
500
+ }
501
+
482
502
  const startTime = Date.now();
483
503
 
484
504
  try {
485
505
  const controller = new AbortController();
486
- const timeoutId = setTimeout(() => controller.abort(), 30000);
506
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
487
507
 
488
508
  const response = await fetch(`${this.baseURL}/api/generate`, {
489
509
  method: 'POST',
490
510
  signal: controller.signal,
491
511
  headers: { 'Content-Type': 'application/json' },
492
- body: JSON.stringify({
493
- model: modelName,
494
- prompt: testPrompt,
495
- stream: false,
496
- options: {
497
- num_predict: 50 // Limitar respuesta para test rápido
498
- }
499
- })
512
+ body: JSON.stringify(payload)
500
513
  });
501
514
 
502
515
  clearTimeout(timeoutId);
503
516
 
504
517
  if (!response.ok) {
505
- throw new Error(`Test failed: HTTP ${response.status}`);
518
+ const errorText = await response.text();
519
+ throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
506
520
  }
507
521
 
508
522
  const data = await response.json();
509
- const endTime = Date.now();
523
+ const responseTime = Date.now() - startTime;
524
+ const speed = this.calculateTokensPerSecond(data, responseTime);
510
525
 
511
- const totalTime = endTime - startTime;
526
+ return {
527
+ ...data,
528
+ responseTime,
529
+ tokensPerSecond: speed.tokensPerSecond,
530
+ evalTokensPerSecond: speed.evalTokensPerSecond,
531
+ endToEndTokensPerSecond: speed.endToEndTokensPerSecond
532
+ };
533
+ } catch (error) {
534
+ throw new Error(`Failed to run generate request: ${error.message}`);
535
+ }
536
+ }
537
+
538
+ async testModelPerformance(modelName, testPrompt = "Hello, how are you?") {
539
+ const startTime = Date.now();
540
+
541
+ try {
542
+ const data = await this.generate(modelName, testPrompt, {
543
+ timeoutMs: 30000,
544
+ generationOptions: {
545
+ num_predict: 50
546
+ }
547
+ });
512
548
  const tokensGenerated = Number(data.eval_count) || 0;
513
- const speed = this.calculateTokensPerSecond(data, totalTime);
514
549
 
515
550
  return {
516
551
  success: true,
517
- responseTime: totalTime,
518
- tokensPerSecond: speed.tokensPerSecond,
519
- evalTokensPerSecond: speed.evalTokensPerSecond,
520
- endToEndTokensPerSecond: speed.endToEndTokensPerSecond,
552
+ responseTime: data.responseTime,
553
+ tokensPerSecond: data.tokensPerSecond,
554
+ evalTokensPerSecond: data.evalTokensPerSecond,
555
+ endToEndTokensPerSecond: data.endToEndTokensPerSecond,
521
556
  tokensGenerated,
522
557
  loadTime: data.load_duration ? Math.round(data.load_duration / 1000000) : null,
523
558
  evalTime: data.eval_duration ? Math.round(data.eval_duration / 1000000) : null,
@@ -16,7 +16,7 @@ class ConfigManager {
16
16
  return {
17
17
  version: "2.0",
18
18
  ollama: {
19
- baseURL: process.env.OLLAMA_BASE_URL || "http://localhost:11434",
19
+ baseURL: process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || "http://localhost:11434",
20
20
  timeout: 30000,
21
21
  enabled: true,
22
22
  autoDetect: true,
@@ -176,7 +176,9 @@ class ConfigManager {
176
176
  }
177
177
 
178
178
  // Ollama overrides
179
- if (process.env.OLLAMA_BASE_URL) {
179
+ if (process.env.OLLAMA_HOST) {
180
+ this.config.ollama.baseURL = process.env.OLLAMA_HOST;
181
+ } else if (process.env.OLLAMA_BASE_URL) {
180
182
  this.config.ollama.baseURL = process.env.OLLAMA_BASE_URL;
181
183
  }
182
184
 
@@ -356,4 +358,4 @@ class ConfigManager {
356
358
  }
357
359
  }
358
360
 
359
- module.exports = ConfigManager;
361
+ module.exports = ConfigManager;