llm-checker 3.5.8 → 3.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/mcp-server.mjs +1 -1
- package/package.json +1 -1
- package/src/models/ai-check-selector.js +5 -11
- package/src/models/deterministic-selector.js +17 -26
- package/src/ollama/client.js +55 -20
- package/src/utils/config.js +5 -3
package/bin/mcp-server.mjs
CHANGED
package/package.json
CHANGED
|
@@ -7,15 +7,16 @@
|
|
|
7
7
|
|
|
8
8
|
const DeterministicModelSelector = require('./deterministic-selector');
|
|
9
9
|
const { OllamaNativeScraper } = require('../ollama/native-scraper');
|
|
10
|
+
const OllamaClient = require('../ollama/client');
|
|
10
11
|
const crypto = require('crypto');
|
|
11
12
|
const fs = require('fs');
|
|
12
13
|
const path = require('path');
|
|
13
|
-
const fetch = require('../utils/fetch');
|
|
14
14
|
const { evaluateFineTuningSupport } = require('./fine-tuning-support');
|
|
15
15
|
|
|
16
16
|
class AICheckSelector {
|
|
17
17
|
constructor() {
|
|
18
18
|
this.deterministicSelector = new DeterministicModelSelector();
|
|
19
|
+
this.ollamaClient = new OllamaClient();
|
|
19
20
|
this.ollamaScraper = new OllamaNativeScraper();
|
|
20
21
|
this.cachePath = path.join(require('os').homedir(), '.llm-checker', 'ai-check-cache.json');
|
|
21
22
|
|
|
@@ -389,17 +390,10 @@ Return JSON with this structure:
|
|
|
389
390
|
]
|
|
390
391
|
};
|
|
391
392
|
|
|
392
|
-
const
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
body: JSON.stringify(requestBody)
|
|
393
|
+
const data = await this.ollamaClient.chat(modelId, requestBody.messages, {
|
|
394
|
+
timeoutMs: 45000,
|
|
395
|
+
generationOptions: requestBody.options
|
|
396
396
|
});
|
|
397
|
-
|
|
398
|
-
if (!response.ok) {
|
|
399
|
-
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
const data = await response.json();
|
|
403
397
|
|
|
404
398
|
if (!data.message || !data.message.content) {
|
|
405
399
|
throw new Error(`Invalid response from Ollama API: ${JSON.stringify(data)}`);
|
|
@@ -9,7 +9,7 @@ const fs = require('fs');
|
|
|
9
9
|
const path = require('path');
|
|
10
10
|
const os = require('os');
|
|
11
11
|
const { spawn } = require('child_process');
|
|
12
|
-
const
|
|
12
|
+
const OllamaClient = require('../ollama/client');
|
|
13
13
|
const { DETERMINISTIC_WEIGHTS } = require('./scoring-config');
|
|
14
14
|
const {
|
|
15
15
|
parseBillionsValue: parseMoEBillionsValue,
|
|
@@ -24,6 +24,7 @@ class DeterministicModelSelector {
|
|
|
24
24
|
constructor() {
|
|
25
25
|
this.catalogPath = path.join(__dirname, 'catalog.json');
|
|
26
26
|
this.benchCachePath = path.join(os.homedir(), '.llm-checker', 'bench.json');
|
|
27
|
+
this.ollamaClient = new OllamaClient();
|
|
27
28
|
this.ollamaCachePaths = [
|
|
28
29
|
path.join(os.homedir(), '.llm-checker', 'cache', 'ollama', 'ollama-detailed-models.json'),
|
|
29
30
|
path.join(__dirname, '../ollama/.cache/ollama-detailed-models.json')
|
|
@@ -2018,33 +2019,23 @@ class DeterministicModelSelector {
|
|
|
2018
2019
|
|
|
2019
2020
|
const prompt = prompts[category] || prompts['general'];
|
|
2020
2021
|
const targetTokens = 128;
|
|
2021
|
-
|
|
2022
|
-
const
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
method: 'POST',
|
|
2027
|
-
headers: { 'Content-Type': 'application/json' },
|
|
2028
|
-
body: JSON.stringify({
|
|
2029
|
-
model: modelId,
|
|
2030
|
-
prompt: prompt,
|
|
2031
|
-
stream: false,
|
|
2032
|
-
options: {
|
|
2033
|
-
num_predict: targetTokens
|
|
2034
|
-
}
|
|
2035
|
-
})
|
|
2022
|
+
|
|
2023
|
+
const result = await this.ollamaClient.generate(modelId, prompt, {
|
|
2024
|
+
generationOptions: {
|
|
2025
|
+
num_predict: targetTokens
|
|
2026
|
+
}
|
|
2036
2027
|
});
|
|
2037
|
-
|
|
2038
|
-
if (
|
|
2039
|
-
|
|
2028
|
+
|
|
2029
|
+
if (Number.isFinite(result.tokensPerSecond) && result.tokensPerSecond > 0) {
|
|
2030
|
+
return result.tokensPerSecond;
|
|
2040
2031
|
}
|
|
2041
|
-
|
|
2042
|
-
const
|
|
2043
|
-
const
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
const tokensGenerated = result.
|
|
2047
|
-
|
|
2032
|
+
|
|
2033
|
+
const elapsedSeconds = Math.max(0.001, Number(result.responseTime || 0) / 1000);
|
|
2034
|
+
const estimatedResponseTokens = result.response
|
|
2035
|
+
? result.response.split(/\s+/).filter(Boolean).length * 1.3
|
|
2036
|
+
: targetTokens;
|
|
2037
|
+
const tokensGenerated = Number(result.eval_count) || estimatedResponseTokens;
|
|
2038
|
+
|
|
2048
2039
|
return tokensGenerated / elapsedSeconds;
|
|
2049
2040
|
}
|
|
2050
2041
|
|
package/src/ollama/client.js
CHANGED
|
@@ -3,9 +3,9 @@ const fetch = require('../utils/fetch');
|
|
|
3
3
|
class OllamaClient {
|
|
4
4
|
constructor(baseURL = null) {
|
|
5
5
|
// Support OLLAMA_HOST environment variable (standard Ollama configuration)
|
|
6
|
-
// Also support OLLAMA_URL for backwards compatibility
|
|
6
|
+
// Also support OLLAMA_BASE_URL and OLLAMA_URL for backwards compatibility
|
|
7
7
|
this.preferredBaseURL = this.normalizeBaseURL(
|
|
8
|
-
baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434'
|
|
8
|
+
baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || process.env.OLLAMA_URL || 'http://localhost:11434'
|
|
9
9
|
);
|
|
10
10
|
this.baseURL = this.preferredBaseURL;
|
|
11
11
|
|
|
@@ -473,51 +473,86 @@ class OllamaClient {
|
|
|
473
473
|
};
|
|
474
474
|
}
|
|
475
475
|
|
|
476
|
-
async
|
|
476
|
+
async generate(modelName, prompt, options = {}) {
|
|
477
477
|
const availability = await this.checkOllamaAvailability();
|
|
478
478
|
if (!availability.available) {
|
|
479
479
|
throw new Error(`Ollama not available: ${availability.error}`);
|
|
480
480
|
}
|
|
481
481
|
|
|
482
|
+
const {
|
|
483
|
+
timeoutMs = 30000,
|
|
484
|
+
stream = false,
|
|
485
|
+
keepAlive,
|
|
486
|
+
format,
|
|
487
|
+
generationOptions = {}
|
|
488
|
+
} = options;
|
|
489
|
+
|
|
490
|
+
const payload = {
|
|
491
|
+
model: modelName,
|
|
492
|
+
prompt,
|
|
493
|
+
stream: Boolean(stream)
|
|
494
|
+
};
|
|
495
|
+
|
|
496
|
+
if (keepAlive) payload.keep_alive = keepAlive;
|
|
497
|
+
if (format) payload.format = format;
|
|
498
|
+
if (generationOptions && Object.keys(generationOptions).length > 0) {
|
|
499
|
+
payload.options = generationOptions;
|
|
500
|
+
}
|
|
501
|
+
|
|
482
502
|
const startTime = Date.now();
|
|
483
503
|
|
|
484
504
|
try {
|
|
485
505
|
const controller = new AbortController();
|
|
486
|
-
const timeoutId = setTimeout(() => controller.abort(),
|
|
506
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
487
507
|
|
|
488
508
|
const response = await fetch(`${this.baseURL}/api/generate`, {
|
|
489
509
|
method: 'POST',
|
|
490
510
|
signal: controller.signal,
|
|
491
511
|
headers: { 'Content-Type': 'application/json' },
|
|
492
|
-
body: JSON.stringify(
|
|
493
|
-
model: modelName,
|
|
494
|
-
prompt: testPrompt,
|
|
495
|
-
stream: false,
|
|
496
|
-
options: {
|
|
497
|
-
num_predict: 50 // Limitar respuesta para test rápido
|
|
498
|
-
}
|
|
499
|
-
})
|
|
512
|
+
body: JSON.stringify(payload)
|
|
500
513
|
});
|
|
501
514
|
|
|
502
515
|
clearTimeout(timeoutId);
|
|
503
516
|
|
|
504
517
|
if (!response.ok) {
|
|
505
|
-
|
|
518
|
+
const errorText = await response.text();
|
|
519
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
|
|
506
520
|
}
|
|
507
521
|
|
|
508
522
|
const data = await response.json();
|
|
509
|
-
const
|
|
523
|
+
const responseTime = Date.now() - startTime;
|
|
524
|
+
const speed = this.calculateTokensPerSecond(data, responseTime);
|
|
510
525
|
|
|
511
|
-
|
|
526
|
+
return {
|
|
527
|
+
...data,
|
|
528
|
+
responseTime,
|
|
529
|
+
tokensPerSecond: speed.tokensPerSecond,
|
|
530
|
+
evalTokensPerSecond: speed.evalTokensPerSecond,
|
|
531
|
+
endToEndTokensPerSecond: speed.endToEndTokensPerSecond
|
|
532
|
+
};
|
|
533
|
+
} catch (error) {
|
|
534
|
+
throw new Error(`Failed to run generate request: ${error.message}`);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
async testModelPerformance(modelName, testPrompt = "Hello, how are you?") {
|
|
539
|
+
const startTime = Date.now();
|
|
540
|
+
|
|
541
|
+
try {
|
|
542
|
+
const data = await this.generate(modelName, testPrompt, {
|
|
543
|
+
timeoutMs: 30000,
|
|
544
|
+
generationOptions: {
|
|
545
|
+
num_predict: 50
|
|
546
|
+
}
|
|
547
|
+
});
|
|
512
548
|
const tokensGenerated = Number(data.eval_count) || 0;
|
|
513
|
-
const speed = this.calculateTokensPerSecond(data, totalTime);
|
|
514
549
|
|
|
515
550
|
return {
|
|
516
551
|
success: true,
|
|
517
|
-
responseTime:
|
|
518
|
-
tokensPerSecond:
|
|
519
|
-
evalTokensPerSecond:
|
|
520
|
-
endToEndTokensPerSecond:
|
|
552
|
+
responseTime: data.responseTime,
|
|
553
|
+
tokensPerSecond: data.tokensPerSecond,
|
|
554
|
+
evalTokensPerSecond: data.evalTokensPerSecond,
|
|
555
|
+
endToEndTokensPerSecond: data.endToEndTokensPerSecond,
|
|
521
556
|
tokensGenerated,
|
|
522
557
|
loadTime: data.load_duration ? Math.round(data.load_duration / 1000000) : null,
|
|
523
558
|
evalTime: data.eval_duration ? Math.round(data.eval_duration / 1000000) : null,
|
package/src/utils/config.js
CHANGED
|
@@ -16,7 +16,7 @@ class ConfigManager {
|
|
|
16
16
|
return {
|
|
17
17
|
version: "2.0",
|
|
18
18
|
ollama: {
|
|
19
|
-
baseURL: process.env.OLLAMA_BASE_URL || "http://localhost:11434",
|
|
19
|
+
baseURL: process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || "http://localhost:11434",
|
|
20
20
|
timeout: 30000,
|
|
21
21
|
enabled: true,
|
|
22
22
|
autoDetect: true,
|
|
@@ -176,7 +176,9 @@ class ConfigManager {
|
|
|
176
176
|
}
|
|
177
177
|
|
|
178
178
|
// Ollama overrides
|
|
179
|
-
if (process.env.
|
|
179
|
+
if (process.env.OLLAMA_HOST) {
|
|
180
|
+
this.config.ollama.baseURL = process.env.OLLAMA_HOST;
|
|
181
|
+
} else if (process.env.OLLAMA_BASE_URL) {
|
|
180
182
|
this.config.ollama.baseURL = process.env.OLLAMA_BASE_URL;
|
|
181
183
|
}
|
|
182
184
|
|
|
@@ -356,4 +358,4 @@ class ConfigManager {
|
|
|
356
358
|
}
|
|
357
359
|
}
|
|
358
360
|
|
|
359
|
-
module.exports = ConfigManager;
|
|
361
|
+
module.exports = ConfigManager;
|