llm-checker 3.5.8 → 3.5.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/mcp-server.mjs +1 -1
- package/package.json +1 -1
- package/src/models/ai-check-selector.js +5 -11
- package/src/models/deterministic-selector.js +17 -26
- package/src/ollama/client.js +57 -21
- package/src/utils/config.js +5 -3
package/bin/mcp-server.mjs
CHANGED
package/package.json
CHANGED
|
@@ -7,15 +7,16 @@
|
|
|
7
7
|
|
|
8
8
|
const DeterministicModelSelector = require('./deterministic-selector');
|
|
9
9
|
const { OllamaNativeScraper } = require('../ollama/native-scraper');
|
|
10
|
+
const OllamaClient = require('../ollama/client');
|
|
10
11
|
const crypto = require('crypto');
|
|
11
12
|
const fs = require('fs');
|
|
12
13
|
const path = require('path');
|
|
13
|
-
const fetch = require('../utils/fetch');
|
|
14
14
|
const { evaluateFineTuningSupport } = require('./fine-tuning-support');
|
|
15
15
|
|
|
16
16
|
class AICheckSelector {
|
|
17
17
|
constructor() {
|
|
18
18
|
this.deterministicSelector = new DeterministicModelSelector();
|
|
19
|
+
this.ollamaClient = new OllamaClient();
|
|
19
20
|
this.ollamaScraper = new OllamaNativeScraper();
|
|
20
21
|
this.cachePath = path.join(require('os').homedir(), '.llm-checker', 'ai-check-cache.json');
|
|
21
22
|
|
|
@@ -389,17 +390,10 @@ Return JSON with this structure:
|
|
|
389
390
|
]
|
|
390
391
|
};
|
|
391
392
|
|
|
392
|
-
const
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
body: JSON.stringify(requestBody)
|
|
393
|
+
const data = await this.ollamaClient.chat(modelId, requestBody.messages, {
|
|
394
|
+
timeoutMs: 45000,
|
|
395
|
+
generationOptions: requestBody.options
|
|
396
396
|
});
|
|
397
|
-
|
|
398
|
-
if (!response.ok) {
|
|
399
|
-
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
const data = await response.json();
|
|
403
397
|
|
|
404
398
|
if (!data.message || !data.message.content) {
|
|
405
399
|
throw new Error(`Invalid response from Ollama API: ${JSON.stringify(data)}`);
|
|
@@ -9,7 +9,7 @@ const fs = require('fs');
|
|
|
9
9
|
const path = require('path');
|
|
10
10
|
const os = require('os');
|
|
11
11
|
const { spawn } = require('child_process');
|
|
12
|
-
const
|
|
12
|
+
const OllamaClient = require('../ollama/client');
|
|
13
13
|
const { DETERMINISTIC_WEIGHTS } = require('./scoring-config');
|
|
14
14
|
const {
|
|
15
15
|
parseBillionsValue: parseMoEBillionsValue,
|
|
@@ -24,6 +24,7 @@ class DeterministicModelSelector {
|
|
|
24
24
|
constructor() {
|
|
25
25
|
this.catalogPath = path.join(__dirname, 'catalog.json');
|
|
26
26
|
this.benchCachePath = path.join(os.homedir(), '.llm-checker', 'bench.json');
|
|
27
|
+
this.ollamaClient = new OllamaClient();
|
|
27
28
|
this.ollamaCachePaths = [
|
|
28
29
|
path.join(os.homedir(), '.llm-checker', 'cache', 'ollama', 'ollama-detailed-models.json'),
|
|
29
30
|
path.join(__dirname, '../ollama/.cache/ollama-detailed-models.json')
|
|
@@ -2018,33 +2019,23 @@ class DeterministicModelSelector {
|
|
|
2018
2019
|
|
|
2019
2020
|
const prompt = prompts[category] || prompts['general'];
|
|
2020
2021
|
const targetTokens = 128;
|
|
2021
|
-
|
|
2022
|
-
const
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
method: 'POST',
|
|
2027
|
-
headers: { 'Content-Type': 'application/json' },
|
|
2028
|
-
body: JSON.stringify({
|
|
2029
|
-
model: modelId,
|
|
2030
|
-
prompt: prompt,
|
|
2031
|
-
stream: false,
|
|
2032
|
-
options: {
|
|
2033
|
-
num_predict: targetTokens
|
|
2034
|
-
}
|
|
2035
|
-
})
|
|
2022
|
+
|
|
2023
|
+
const result = await this.ollamaClient.generate(modelId, prompt, {
|
|
2024
|
+
generationOptions: {
|
|
2025
|
+
num_predict: targetTokens
|
|
2026
|
+
}
|
|
2036
2027
|
});
|
|
2037
|
-
|
|
2038
|
-
if (
|
|
2039
|
-
|
|
2028
|
+
|
|
2029
|
+
if (Number.isFinite(result.tokensPerSecond) && result.tokensPerSecond > 0) {
|
|
2030
|
+
return result.tokensPerSecond;
|
|
2040
2031
|
}
|
|
2041
|
-
|
|
2042
|
-
const
|
|
2043
|
-
const
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
const tokensGenerated = result.
|
|
2047
|
-
|
|
2032
|
+
|
|
2033
|
+
const elapsedSeconds = Math.max(0.001, Number(result.responseTime || 0) / 1000);
|
|
2034
|
+
const estimatedResponseTokens = result.response
|
|
2035
|
+
? result.response.split(/\s+/).filter(Boolean).length * 1.3
|
|
2036
|
+
: targetTokens;
|
|
2037
|
+
const tokensGenerated = Number(result.eval_count) || estimatedResponseTokens;
|
|
2038
|
+
|
|
2048
2039
|
return tokensGenerated / elapsedSeconds;
|
|
2049
2040
|
}
|
|
2050
2041
|
|
package/src/ollama/client.js
CHANGED
|
@@ -3,9 +3,9 @@ const fetch = require('../utils/fetch');
|
|
|
3
3
|
class OllamaClient {
|
|
4
4
|
constructor(baseURL = null) {
|
|
5
5
|
// Support OLLAMA_HOST environment variable (standard Ollama configuration)
|
|
6
|
-
// Also support OLLAMA_URL for backwards compatibility
|
|
6
|
+
// Also support OLLAMA_BASE_URL and OLLAMA_URL for backwards compatibility
|
|
7
7
|
this.preferredBaseURL = this.normalizeBaseURL(
|
|
8
|
-
baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434'
|
|
8
|
+
baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || process.env.OLLAMA_URL || 'http://localhost:11434'
|
|
9
9
|
);
|
|
10
10
|
this.baseURL = this.preferredBaseURL;
|
|
11
11
|
|
|
@@ -35,7 +35,8 @@ class OllamaClient {
|
|
|
35
35
|
candidates.push(ipv4.toString().replace(/\/$/, ''));
|
|
36
36
|
|
|
37
37
|
const ipv6 = new URL(parsed.toString());
|
|
38
|
-
|
|
38
|
+
// URL.hostname expects bracketed IPv6 literals when mutating an existing URL.
|
|
39
|
+
ipv6.hostname = '[::1]';
|
|
39
40
|
candidates.push(ipv6.toString().replace(/\/$/, ''));
|
|
40
41
|
}
|
|
41
42
|
} catch (error) {
|
|
@@ -473,51 +474,86 @@ class OllamaClient {
|
|
|
473
474
|
};
|
|
474
475
|
}
|
|
475
476
|
|
|
476
|
-
async
|
|
477
|
+
async generate(modelName, prompt, options = {}) {
|
|
477
478
|
const availability = await this.checkOllamaAvailability();
|
|
478
479
|
if (!availability.available) {
|
|
479
480
|
throw new Error(`Ollama not available: ${availability.error}`);
|
|
480
481
|
}
|
|
481
482
|
|
|
483
|
+
const {
|
|
484
|
+
timeoutMs = 30000,
|
|
485
|
+
stream = false,
|
|
486
|
+
keepAlive,
|
|
487
|
+
format,
|
|
488
|
+
generationOptions = {}
|
|
489
|
+
} = options;
|
|
490
|
+
|
|
491
|
+
const payload = {
|
|
492
|
+
model: modelName,
|
|
493
|
+
prompt,
|
|
494
|
+
stream: Boolean(stream)
|
|
495
|
+
};
|
|
496
|
+
|
|
497
|
+
if (keepAlive) payload.keep_alive = keepAlive;
|
|
498
|
+
if (format) payload.format = format;
|
|
499
|
+
if (generationOptions && Object.keys(generationOptions).length > 0) {
|
|
500
|
+
payload.options = generationOptions;
|
|
501
|
+
}
|
|
502
|
+
|
|
482
503
|
const startTime = Date.now();
|
|
483
504
|
|
|
484
505
|
try {
|
|
485
506
|
const controller = new AbortController();
|
|
486
|
-
const timeoutId = setTimeout(() => controller.abort(),
|
|
507
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
487
508
|
|
|
488
509
|
const response = await fetch(`${this.baseURL}/api/generate`, {
|
|
489
510
|
method: 'POST',
|
|
490
511
|
signal: controller.signal,
|
|
491
512
|
headers: { 'Content-Type': 'application/json' },
|
|
492
|
-
body: JSON.stringify(
|
|
493
|
-
model: modelName,
|
|
494
|
-
prompt: testPrompt,
|
|
495
|
-
stream: false,
|
|
496
|
-
options: {
|
|
497
|
-
num_predict: 50 // Limitar respuesta para test rápido
|
|
498
|
-
}
|
|
499
|
-
})
|
|
513
|
+
body: JSON.stringify(payload)
|
|
500
514
|
});
|
|
501
515
|
|
|
502
516
|
clearTimeout(timeoutId);
|
|
503
517
|
|
|
504
518
|
if (!response.ok) {
|
|
505
|
-
|
|
519
|
+
const errorText = await response.text();
|
|
520
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
|
|
506
521
|
}
|
|
507
522
|
|
|
508
523
|
const data = await response.json();
|
|
509
|
-
const
|
|
524
|
+
const responseTime = Date.now() - startTime;
|
|
525
|
+
const speed = this.calculateTokensPerSecond(data, responseTime);
|
|
510
526
|
|
|
511
|
-
|
|
527
|
+
return {
|
|
528
|
+
...data,
|
|
529
|
+
responseTime,
|
|
530
|
+
tokensPerSecond: speed.tokensPerSecond,
|
|
531
|
+
evalTokensPerSecond: speed.evalTokensPerSecond,
|
|
532
|
+
endToEndTokensPerSecond: speed.endToEndTokensPerSecond
|
|
533
|
+
};
|
|
534
|
+
} catch (error) {
|
|
535
|
+
throw new Error(`Failed to run generate request: ${error.message}`);
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
async testModelPerformance(modelName, testPrompt = "Hello, how are you?") {
|
|
540
|
+
const startTime = Date.now();
|
|
541
|
+
|
|
542
|
+
try {
|
|
543
|
+
const data = await this.generate(modelName, testPrompt, {
|
|
544
|
+
timeoutMs: 30000,
|
|
545
|
+
generationOptions: {
|
|
546
|
+
num_predict: 50
|
|
547
|
+
}
|
|
548
|
+
});
|
|
512
549
|
const tokensGenerated = Number(data.eval_count) || 0;
|
|
513
|
-
const speed = this.calculateTokensPerSecond(data, totalTime);
|
|
514
550
|
|
|
515
551
|
return {
|
|
516
552
|
success: true,
|
|
517
|
-
responseTime:
|
|
518
|
-
tokensPerSecond:
|
|
519
|
-
evalTokensPerSecond:
|
|
520
|
-
endToEndTokensPerSecond:
|
|
553
|
+
responseTime: data.responseTime,
|
|
554
|
+
tokensPerSecond: data.tokensPerSecond,
|
|
555
|
+
evalTokensPerSecond: data.evalTokensPerSecond,
|
|
556
|
+
endToEndTokensPerSecond: data.endToEndTokensPerSecond,
|
|
521
557
|
tokensGenerated,
|
|
522
558
|
loadTime: data.load_duration ? Math.round(data.load_duration / 1000000) : null,
|
|
523
559
|
evalTime: data.eval_duration ? Math.round(data.eval_duration / 1000000) : null,
|
package/src/utils/config.js
CHANGED
|
@@ -16,7 +16,7 @@ class ConfigManager {
|
|
|
16
16
|
return {
|
|
17
17
|
version: "2.0",
|
|
18
18
|
ollama: {
|
|
19
|
-
baseURL: process.env.OLLAMA_BASE_URL || "http://localhost:11434",
|
|
19
|
+
baseURL: process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || "http://localhost:11434",
|
|
20
20
|
timeout: 30000,
|
|
21
21
|
enabled: true,
|
|
22
22
|
autoDetect: true,
|
|
@@ -176,7 +176,9 @@ class ConfigManager {
|
|
|
176
176
|
}
|
|
177
177
|
|
|
178
178
|
// Ollama overrides
|
|
179
|
-
if (process.env.
|
|
179
|
+
if (process.env.OLLAMA_HOST) {
|
|
180
|
+
this.config.ollama.baseURL = process.env.OLLAMA_HOST;
|
|
181
|
+
} else if (process.env.OLLAMA_BASE_URL) {
|
|
180
182
|
this.config.ollama.baseURL = process.env.OLLAMA_BASE_URL;
|
|
181
183
|
}
|
|
182
184
|
|
|
@@ -356,4 +358,4 @@ class ConfigManager {
|
|
|
356
358
|
}
|
|
357
359
|
}
|
|
358
360
|
|
|
359
|
-
module.exports = ConfigManager;
|
|
361
|
+
module.exports = ConfigManager;
|