llm-checker 3.5.8 → 3.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -187,7 +187,7 @@ const ALLOWED_CLI_COMMANDS = new Set([
187
187
 
188
188
  const server = new McpServer({
189
189
  name: "llm-checker",
190
- version: "3.5.8",
190
+ version: "3.5.10",
191
191
  });
192
192
 
193
193
  // ============================================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.5.8",
3
+ "version": "3.5.10",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
6
  "llm-checker": "bin/cli.js",
@@ -7,15 +7,16 @@
7
7
 
8
8
  const DeterministicModelSelector = require('./deterministic-selector');
9
9
  const { OllamaNativeScraper } = require('../ollama/native-scraper');
10
+ const OllamaClient = require('../ollama/client');
10
11
  const crypto = require('crypto');
11
12
  const fs = require('fs');
12
13
  const path = require('path');
13
- const fetch = require('../utils/fetch');
14
14
  const { evaluateFineTuningSupport } = require('./fine-tuning-support');
15
15
 
16
16
  class AICheckSelector {
17
17
  constructor() {
18
18
  this.deterministicSelector = new DeterministicModelSelector();
19
+ this.ollamaClient = new OllamaClient();
19
20
  this.ollamaScraper = new OllamaNativeScraper();
20
21
  this.cachePath = path.join(require('os').homedir(), '.llm-checker', 'ai-check-cache.json');
21
22
 
@@ -389,17 +390,10 @@ Return JSON with this structure:
389
390
  ]
390
391
  };
391
392
 
392
- const response = await fetch('http://localhost:11434/api/chat', {
393
- method: 'POST',
394
- headers: { 'Content-Type': 'application/json' },
395
- body: JSON.stringify(requestBody)
393
+ const data = await this.ollamaClient.chat(modelId, requestBody.messages, {
394
+ timeoutMs: 45000,
395
+ generationOptions: requestBody.options
396
396
  });
397
-
398
- if (!response.ok) {
399
- throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
400
- }
401
-
402
- const data = await response.json();
403
397
 
404
398
  if (!data.message || !data.message.content) {
405
399
  throw new Error(`Invalid response from Ollama API: ${JSON.stringify(data)}`);
@@ -9,7 +9,7 @@ const fs = require('fs');
9
9
  const path = require('path');
10
10
  const os = require('os');
11
11
  const { spawn } = require('child_process');
12
- const fetch = require('../utils/fetch');
12
+ const OllamaClient = require('../ollama/client');
13
13
  const { DETERMINISTIC_WEIGHTS } = require('./scoring-config');
14
14
  const {
15
15
  parseBillionsValue: parseMoEBillionsValue,
@@ -24,6 +24,7 @@ class DeterministicModelSelector {
24
24
  constructor() {
25
25
  this.catalogPath = path.join(__dirname, 'catalog.json');
26
26
  this.benchCachePath = path.join(os.homedir(), '.llm-checker', 'bench.json');
27
+ this.ollamaClient = new OllamaClient();
27
28
  this.ollamaCachePaths = [
28
29
  path.join(os.homedir(), '.llm-checker', 'cache', 'ollama', 'ollama-detailed-models.json'),
29
30
  path.join(__dirname, '../ollama/.cache/ollama-detailed-models.json')
@@ -2018,33 +2019,23 @@ class DeterministicModelSelector {
2018
2019
 
2019
2020
  const prompt = prompts[category] || prompts['general'];
2020
2021
  const targetTokens = 128;
2021
-
2022
- const startTime = Date.now();
2023
-
2024
- // Make HTTP request to Ollama API
2025
- const response = await fetch('http://localhost:11434/api/generate', {
2026
- method: 'POST',
2027
- headers: { 'Content-Type': 'application/json' },
2028
- body: JSON.stringify({
2029
- model: modelId,
2030
- prompt: prompt,
2031
- stream: false,
2032
- options: {
2033
- num_predict: targetTokens
2034
- }
2035
- })
2022
+
2023
+ const result = await this.ollamaClient.generate(modelId, prompt, {
2024
+ generationOptions: {
2025
+ num_predict: targetTokens
2026
+ }
2036
2027
  });
2037
-
2038
- if (!response.ok) {
2039
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
2028
+
2029
+ if (Number.isFinite(result.tokensPerSecond) && result.tokensPerSecond > 0) {
2030
+ return result.tokensPerSecond;
2040
2031
  }
2041
-
2042
- const result = await response.json();
2043
- const elapsedSeconds = (Date.now() - startTime) / 1000;
2044
-
2045
- // Estimate tokens generated (simplified)
2046
- const tokensGenerated = result.response ? result.response.split(' ').length * 1.3 : targetTokens;
2047
-
2032
+
2033
+ const elapsedSeconds = Math.max(0.001, Number(result.responseTime || 0) / 1000);
2034
+ const estimatedResponseTokens = result.response
2035
+ ? result.response.split(/\s+/).filter(Boolean).length * 1.3
2036
+ : targetTokens;
2037
+ const tokensGenerated = Number(result.eval_count) || estimatedResponseTokens;
2038
+
2048
2039
  return tokensGenerated / elapsedSeconds;
2049
2040
  }
2050
2041
 
@@ -3,9 +3,9 @@ const fetch = require('../utils/fetch');
3
3
  class OllamaClient {
4
4
  constructor(baseURL = null) {
5
5
  // Support OLLAMA_HOST environment variable (standard Ollama configuration)
6
- // Also support OLLAMA_URL for backwards compatibility
6
+ // Also support OLLAMA_BASE_URL and OLLAMA_URL for backwards compatibility
7
7
  this.preferredBaseURL = this.normalizeBaseURL(
8
- baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434'
8
+ baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || process.env.OLLAMA_URL || 'http://localhost:11434'
9
9
  );
10
10
  this.baseURL = this.preferredBaseURL;
11
11
 
@@ -35,7 +35,8 @@ class OllamaClient {
35
35
  candidates.push(ipv4.toString().replace(/\/$/, ''));
36
36
 
37
37
  const ipv6 = new URL(parsed.toString());
38
- ipv6.hostname = '::1';
38
+ // URL.hostname expects bracketed IPv6 literals when mutating an existing URL.
39
+ ipv6.hostname = '[::1]';
39
40
  candidates.push(ipv6.toString().replace(/\/$/, ''));
40
41
  }
41
42
  } catch (error) {
@@ -473,51 +474,86 @@ class OllamaClient {
473
474
  };
474
475
  }
475
476
 
476
- async testModelPerformance(modelName, testPrompt = "Hello, how are you?") {
477
+ async generate(modelName, prompt, options = {}) {
477
478
  const availability = await this.checkOllamaAvailability();
478
479
  if (!availability.available) {
479
480
  throw new Error(`Ollama not available: ${availability.error}`);
480
481
  }
481
482
 
483
+ const {
484
+ timeoutMs = 30000,
485
+ stream = false,
486
+ keepAlive,
487
+ format,
488
+ generationOptions = {}
489
+ } = options;
490
+
491
+ const payload = {
492
+ model: modelName,
493
+ prompt,
494
+ stream: Boolean(stream)
495
+ };
496
+
497
+ if (keepAlive) payload.keep_alive = keepAlive;
498
+ if (format) payload.format = format;
499
+ if (generationOptions && Object.keys(generationOptions).length > 0) {
500
+ payload.options = generationOptions;
501
+ }
502
+
482
503
  const startTime = Date.now();
483
504
 
484
505
  try {
485
506
  const controller = new AbortController();
486
- const timeoutId = setTimeout(() => controller.abort(), 30000);
507
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
487
508
 
488
509
  const response = await fetch(`${this.baseURL}/api/generate`, {
489
510
  method: 'POST',
490
511
  signal: controller.signal,
491
512
  headers: { 'Content-Type': 'application/json' },
492
- body: JSON.stringify({
493
- model: modelName,
494
- prompt: testPrompt,
495
- stream: false,
496
- options: {
497
- num_predict: 50 // Limitar respuesta para test rápido
498
- }
499
- })
513
+ body: JSON.stringify(payload)
500
514
  });
501
515
 
502
516
  clearTimeout(timeoutId);
503
517
 
504
518
  if (!response.ok) {
505
- throw new Error(`Test failed: HTTP ${response.status}`);
519
+ const errorText = await response.text();
520
+ throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
506
521
  }
507
522
 
508
523
  const data = await response.json();
509
- const endTime = Date.now();
524
+ const responseTime = Date.now() - startTime;
525
+ const speed = this.calculateTokensPerSecond(data, responseTime);
510
526
 
511
- const totalTime = endTime - startTime;
527
+ return {
528
+ ...data,
529
+ responseTime,
530
+ tokensPerSecond: speed.tokensPerSecond,
531
+ evalTokensPerSecond: speed.evalTokensPerSecond,
532
+ endToEndTokensPerSecond: speed.endToEndTokensPerSecond
533
+ };
534
+ } catch (error) {
535
+ throw new Error(`Failed to run generate request: ${error.message}`);
536
+ }
537
+ }
538
+
539
+ async testModelPerformance(modelName, testPrompt = "Hello, how are you?") {
540
+ const startTime = Date.now();
541
+
542
+ try {
543
+ const data = await this.generate(modelName, testPrompt, {
544
+ timeoutMs: 30000,
545
+ generationOptions: {
546
+ num_predict: 50
547
+ }
548
+ });
512
549
  const tokensGenerated = Number(data.eval_count) || 0;
513
- const speed = this.calculateTokensPerSecond(data, totalTime);
514
550
 
515
551
  return {
516
552
  success: true,
517
- responseTime: totalTime,
518
- tokensPerSecond: speed.tokensPerSecond,
519
- evalTokensPerSecond: speed.evalTokensPerSecond,
520
- endToEndTokensPerSecond: speed.endToEndTokensPerSecond,
553
+ responseTime: data.responseTime,
554
+ tokensPerSecond: data.tokensPerSecond,
555
+ evalTokensPerSecond: data.evalTokensPerSecond,
556
+ endToEndTokensPerSecond: data.endToEndTokensPerSecond,
521
557
  tokensGenerated,
522
558
  loadTime: data.load_duration ? Math.round(data.load_duration / 1000000) : null,
523
559
  evalTime: data.eval_duration ? Math.round(data.eval_duration / 1000000) : null,
@@ -16,7 +16,7 @@ class ConfigManager {
16
16
  return {
17
17
  version: "2.0",
18
18
  ollama: {
19
- baseURL: process.env.OLLAMA_BASE_URL || "http://localhost:11434",
19
+ baseURL: process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || "http://localhost:11434",
20
20
  timeout: 30000,
21
21
  enabled: true,
22
22
  autoDetect: true,
@@ -176,7 +176,9 @@ class ConfigManager {
176
176
  }
177
177
 
178
178
  // Ollama overrides
179
- if (process.env.OLLAMA_BASE_URL) {
179
+ if (process.env.OLLAMA_HOST) {
180
+ this.config.ollama.baseURL = process.env.OLLAMA_HOST;
181
+ } else if (process.env.OLLAMA_BASE_URL) {
180
182
  this.config.ollama.baseURL = process.env.OLLAMA_BASE_URL;
181
183
  }
182
184
 
@@ -356,4 +358,4 @@ class ConfigManager {
356
358
  }
357
359
  }
358
360
 
359
- module.exports = ConfigManager;
361
+ module.exports = ConfigManager;