llm-checker 3.5.7 → 3.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/enhanced_cli.js +7 -1
- package/bin/mcp-server.mjs +1 -1
- package/package.json +1 -1
- package/src/hardware/detector.js +6 -1
- package/src/hardware/unified-detector.js +71 -3
- package/src/models/ai-check-selector.js +5 -11
- package/src/models/deterministic-selector.js +17 -26
- package/src/models/intelligent-selector.js +3 -2
- package/src/ollama/client.js +159 -51
- package/src/utils/config.js +5 -3
package/bin/enhanced_cli.js
CHANGED
|
@@ -5059,10 +5059,16 @@ program
|
|
|
5059
5059
|
console.log(` Tier: ${chalk.cyan(detector.getHardwareTier().replace('_', ' ').toUpperCase())}`);
|
|
5060
5060
|
console.log(` Max model size: ${chalk.green(detector.getMaxModelSize() + 'GB')}`);
|
|
5061
5061
|
console.log(` Best backend: ${chalk.cyan(hardware.summary.bestBackend)}`);
|
|
5062
|
+
if (hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend) {
|
|
5063
|
+
console.log(` Runtime assist: ${chalk.green(hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend)}`);
|
|
5064
|
+
}
|
|
5062
5065
|
console.log(` Dedicated GPUs: ${chalk.green(formatGpuInventoryList(hardware.summary.dedicatedGpuModels))}`);
|
|
5063
5066
|
console.log(` Integrated GPUs: ${chalk.hex('#FFA500')(formatGpuInventoryList(hardware.summary.integratedGpuModels))}`);
|
|
5064
5067
|
if (hardware.summary.hasIntegratedGPU && hardware.summary.bestBackend === 'cpu') {
|
|
5065
|
-
|
|
5068
|
+
const assistMessage = hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend
|
|
5069
|
+
? `Integrated/shared-memory GPU detected, runtime may use ${hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend} acceleration`
|
|
5070
|
+
: 'Integrated/shared-memory GPU detected, runtime remains CPU';
|
|
5071
|
+
console.log(` Assist path: ${chalk.yellow(assistMessage)}`);
|
|
5066
5072
|
}
|
|
5067
5073
|
|
|
5068
5074
|
// CPU
|
package/bin/mcp-server.mjs
CHANGED
package/package.json
CHANGED
package/src/hardware/detector.js
CHANGED
|
@@ -190,7 +190,12 @@ class HardwareDetector {
|
|
|
190
190
|
}
|
|
191
191
|
|
|
192
192
|
// Skip very generic/placeholder entries
|
|
193
|
-
if (
|
|
193
|
+
if (
|
|
194
|
+
model.includes('standard vga') ||
|
|
195
|
+
model.includes('microsoft basic') ||
|
|
196
|
+
model.includes('remote display adapter') ||
|
|
197
|
+
model.includes('basic render driver')
|
|
198
|
+
) {
|
|
194
199
|
return false;
|
|
195
200
|
}
|
|
196
201
|
|
|
@@ -216,6 +216,9 @@ class UnifiedDetector {
|
|
|
216
216
|
const summary = {
|
|
217
217
|
bestBackend: result.primary?.type || 'cpu',
|
|
218
218
|
backendName: result.primary?.name || 'CPU',
|
|
219
|
+
runtimeBackend: result.primary?.type || 'cpu',
|
|
220
|
+
runtimeBackendName: result.primary?.name || 'CPU',
|
|
221
|
+
hasRuntimeAssist: false,
|
|
219
222
|
totalVRAM: 0,
|
|
220
223
|
effectiveMemory: 0,
|
|
221
224
|
speedCoefficient: 0,
|
|
@@ -316,6 +319,11 @@ class UnifiedDetector {
|
|
|
316
319
|
}
|
|
317
320
|
summary.hasHeterogeneousGPU = summary.hasHeterogeneousGPU || topology.isHeterogeneous;
|
|
318
321
|
|
|
322
|
+
const runtimeSelection = this.detectRuntimeAssistBackend(result, topology);
|
|
323
|
+
summary.runtimeBackend = runtimeSelection.backend;
|
|
324
|
+
summary.runtimeBackendName = runtimeSelection.name;
|
|
325
|
+
summary.hasRuntimeAssist = runtimeSelection.assisted;
|
|
326
|
+
|
|
319
327
|
// Effective memory for LLM loading
|
|
320
328
|
// For GPU: use VRAM; for CPU/Metal: use system RAM
|
|
321
329
|
if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
|
|
@@ -398,6 +406,7 @@ class UnifiedDetector {
|
|
|
398
406
|
.map((gpu) => {
|
|
399
407
|
const name = String(gpu?.name || gpu?.model || '').replace(/\s+/g, ' ').trim();
|
|
400
408
|
if (!name) return null;
|
|
409
|
+
if (this.isRemoteDisplayModel(name)) return null;
|
|
401
410
|
|
|
402
411
|
let type = gpu?.type;
|
|
403
412
|
if (type !== 'integrated' && type !== 'dedicated') {
|
|
@@ -417,6 +426,61 @@ class UnifiedDetector {
|
|
|
417
426
|
.filter(Boolean);
|
|
418
427
|
}
|
|
419
428
|
|
|
429
|
+
isRemoteDisplayModel(model) {
|
|
430
|
+
const lower = String(model || '').toLowerCase();
|
|
431
|
+
if (!lower) return false;
|
|
432
|
+
|
|
433
|
+
return (
|
|
434
|
+
lower.includes('microsoft remote display adapter') ||
|
|
435
|
+
lower.includes('remote display adapter') ||
|
|
436
|
+
lower.includes('basic render driver')
|
|
437
|
+
);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
inferGpuVendor(name) {
|
|
441
|
+
const lower = String(name || '').toLowerCase();
|
|
442
|
+
if (!lower) return 'unknown';
|
|
443
|
+
if (lower.includes('nvidia') || lower.includes('geforce') || lower.includes('rtx') || lower.includes('gtx')) return 'nvidia';
|
|
444
|
+
if (lower.includes('amd') || lower.includes('ati') || lower.includes('radeon')) return 'amd';
|
|
445
|
+
if (lower.includes('intel') || lower.includes('iris') || lower.includes('uhd') || lower.includes('arc')) return 'intel';
|
|
446
|
+
if (lower.includes('apple')) return 'apple';
|
|
447
|
+
return 'unknown';
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
detectRuntimeAssistBackend(result, topology = {}) {
|
|
451
|
+
const primaryType = result?.primary?.type || 'cpu';
|
|
452
|
+
const primaryName = result?.primary?.name || 'CPU';
|
|
453
|
+
|
|
454
|
+
if (primaryType !== 'cpu') {
|
|
455
|
+
return {
|
|
456
|
+
backend: primaryType,
|
|
457
|
+
name: primaryName,
|
|
458
|
+
assisted: false
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const platform = result?.platform || result?.os?.platform || normalizePlatform();
|
|
463
|
+
const integratedModels = Array.isArray(topology.integratedModels) ? topology.integratedModels : [];
|
|
464
|
+
const integratedVendors = integratedModels.map((gpu) => this.inferGpuVendor(gpu.name));
|
|
465
|
+
|
|
466
|
+
const hasWindowsIntegratedGpu = platform === 'win32' && integratedModels.length > 0;
|
|
467
|
+
const hasKnownIntegratedVendor = integratedVendors.some((vendor) => ['amd', 'intel', 'nvidia'].includes(vendor));
|
|
468
|
+
|
|
469
|
+
if (hasWindowsIntegratedGpu && hasKnownIntegratedVendor) {
|
|
470
|
+
return {
|
|
471
|
+
backend: 'vulkan',
|
|
472
|
+
name: 'Vulkan',
|
|
473
|
+
assisted: true
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return {
|
|
478
|
+
backend: primaryType,
|
|
479
|
+
name: primaryName,
|
|
480
|
+
assisted: false
|
|
481
|
+
};
|
|
482
|
+
}
|
|
483
|
+
|
|
420
484
|
getSystemMemoryGB(memoryInfo) {
|
|
421
485
|
const totalBytes = Number(memoryInfo?.total || 0);
|
|
422
486
|
if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
|
|
@@ -496,6 +560,7 @@ class UnifiedDetector {
|
|
|
496
560
|
.map((controller) => {
|
|
497
561
|
const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
|
|
498
562
|
if (!name || name.toLowerCase() === 'unknown') return null;
|
|
563
|
+
if (this.isRemoteDisplayModel(name)) return null;
|
|
499
564
|
|
|
500
565
|
const nameLower = name.toLowerCase();
|
|
501
566
|
if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
|
|
@@ -867,16 +932,19 @@ class UnifiedDetector {
|
|
|
867
932
|
return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
|
|
868
933
|
}
|
|
869
934
|
else {
|
|
935
|
+
const runtimeAssistSuffix = summary.hasRuntimeAssist && summary.runtimeBackend !== summary.bestBackend
|
|
936
|
+
? `${summary.runtimeBackendName || summary.runtimeBackend} assist`
|
|
937
|
+
: 'CPU backend';
|
|
870
938
|
if (summary.gpuModel && summary.hasIntegratedGPU && !summary.hasDedicatedGPU) {
|
|
871
939
|
const gpuDesc = summary.gpuInventory || summary.gpuModel;
|
|
872
940
|
if (summary.integratedSharedMemory > 0) {
|
|
873
|
-
return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory,
|
|
941
|
+
return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
|
|
874
942
|
}
|
|
875
|
-
return `${gpuDesc} (integrated/shared memory,
|
|
943
|
+
return `${gpuDesc} (integrated/shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
|
|
876
944
|
}
|
|
877
945
|
if (summary.gpuModel && summary.gpuCount > 0) {
|
|
878
946
|
const gpuDesc = summary.gpuInventory || summary.gpuModel;
|
|
879
|
-
return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected,
|
|
947
|
+
return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
|
|
880
948
|
}
|
|
881
949
|
return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
|
|
882
950
|
}
|
|
@@ -7,15 +7,16 @@
|
|
|
7
7
|
|
|
8
8
|
const DeterministicModelSelector = require('./deterministic-selector');
|
|
9
9
|
const { OllamaNativeScraper } = require('../ollama/native-scraper');
|
|
10
|
+
const OllamaClient = require('../ollama/client');
|
|
10
11
|
const crypto = require('crypto');
|
|
11
12
|
const fs = require('fs');
|
|
12
13
|
const path = require('path');
|
|
13
|
-
const fetch = require('../utils/fetch');
|
|
14
14
|
const { evaluateFineTuningSupport } = require('./fine-tuning-support');
|
|
15
15
|
|
|
16
16
|
class AICheckSelector {
|
|
17
17
|
constructor() {
|
|
18
18
|
this.deterministicSelector = new DeterministicModelSelector();
|
|
19
|
+
this.ollamaClient = new OllamaClient();
|
|
19
20
|
this.ollamaScraper = new OllamaNativeScraper();
|
|
20
21
|
this.cachePath = path.join(require('os').homedir(), '.llm-checker', 'ai-check-cache.json');
|
|
21
22
|
|
|
@@ -389,17 +390,10 @@ Return JSON with this structure:
|
|
|
389
390
|
]
|
|
390
391
|
};
|
|
391
392
|
|
|
392
|
-
const
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
body: JSON.stringify(requestBody)
|
|
393
|
+
const data = await this.ollamaClient.chat(modelId, requestBody.messages, {
|
|
394
|
+
timeoutMs: 45000,
|
|
395
|
+
generationOptions: requestBody.options
|
|
396
396
|
});
|
|
397
|
-
|
|
398
|
-
if (!response.ok) {
|
|
399
|
-
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
const data = await response.json();
|
|
403
397
|
|
|
404
398
|
if (!data.message || !data.message.content) {
|
|
405
399
|
throw new Error(`Invalid response from Ollama API: ${JSON.stringify(data)}`);
|
|
@@ -9,7 +9,7 @@ const fs = require('fs');
|
|
|
9
9
|
const path = require('path');
|
|
10
10
|
const os = require('os');
|
|
11
11
|
const { spawn } = require('child_process');
|
|
12
|
-
const
|
|
12
|
+
const OllamaClient = require('../ollama/client');
|
|
13
13
|
const { DETERMINISTIC_WEIGHTS } = require('./scoring-config');
|
|
14
14
|
const {
|
|
15
15
|
parseBillionsValue: parseMoEBillionsValue,
|
|
@@ -24,6 +24,7 @@ class DeterministicModelSelector {
|
|
|
24
24
|
constructor() {
|
|
25
25
|
this.catalogPath = path.join(__dirname, 'catalog.json');
|
|
26
26
|
this.benchCachePath = path.join(os.homedir(), '.llm-checker', 'bench.json');
|
|
27
|
+
this.ollamaClient = new OllamaClient();
|
|
27
28
|
this.ollamaCachePaths = [
|
|
28
29
|
path.join(os.homedir(), '.llm-checker', 'cache', 'ollama', 'ollama-detailed-models.json'),
|
|
29
30
|
path.join(__dirname, '../ollama/.cache/ollama-detailed-models.json')
|
|
@@ -2018,33 +2019,23 @@ class DeterministicModelSelector {
|
|
|
2018
2019
|
|
|
2019
2020
|
const prompt = prompts[category] || prompts['general'];
|
|
2020
2021
|
const targetTokens = 128;
|
|
2021
|
-
|
|
2022
|
-
const
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
method: 'POST',
|
|
2027
|
-
headers: { 'Content-Type': 'application/json' },
|
|
2028
|
-
body: JSON.stringify({
|
|
2029
|
-
model: modelId,
|
|
2030
|
-
prompt: prompt,
|
|
2031
|
-
stream: false,
|
|
2032
|
-
options: {
|
|
2033
|
-
num_predict: targetTokens
|
|
2034
|
-
}
|
|
2035
|
-
})
|
|
2022
|
+
|
|
2023
|
+
const result = await this.ollamaClient.generate(modelId, prompt, {
|
|
2024
|
+
generationOptions: {
|
|
2025
|
+
num_predict: targetTokens
|
|
2026
|
+
}
|
|
2036
2027
|
});
|
|
2037
|
-
|
|
2038
|
-
if (
|
|
2039
|
-
|
|
2028
|
+
|
|
2029
|
+
if (Number.isFinite(result.tokensPerSecond) && result.tokensPerSecond > 0) {
|
|
2030
|
+
return result.tokensPerSecond;
|
|
2040
2031
|
}
|
|
2041
|
-
|
|
2042
|
-
const
|
|
2043
|
-
const
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
const tokensGenerated = result.
|
|
2047
|
-
|
|
2032
|
+
|
|
2033
|
+
const elapsedSeconds = Math.max(0.001, Number(result.responseTime || 0) / 1000);
|
|
2034
|
+
const estimatedResponseTokens = result.response
|
|
2035
|
+
? result.response.split(/\s+/).filter(Boolean).length * 1.3
|
|
2036
|
+
: targetTokens;
|
|
2037
|
+
const tokensGenerated = Number(result.eval_count) || estimatedResponseTokens;
|
|
2038
|
+
|
|
2048
2039
|
return tokensGenerated / elapsedSeconds;
|
|
2049
2040
|
}
|
|
2050
2041
|
|
|
@@ -98,7 +98,8 @@ class IntelligentSelector {
|
|
|
98
98
|
description: this.detector.getHardwareDescription(),
|
|
99
99
|
tier: this.detector.getHardwareTier(),
|
|
100
100
|
maxSize: this.detector.getMaxModelSize(),
|
|
101
|
-
backend: hardware.summary.bestBackend
|
|
101
|
+
backend: hardware.summary.bestBackend,
|
|
102
|
+
runtimeBackend: hardware.summary.runtimeBackend || hardware.summary.bestBackend
|
|
102
103
|
},
|
|
103
104
|
policy: {
|
|
104
105
|
mode: policyEngine.getMode(),
|
|
@@ -163,7 +164,7 @@ class IntelligentSelector {
|
|
|
163
164
|
|
|
164
165
|
const context = {
|
|
165
166
|
backend: summary.bestBackend || null,
|
|
166
|
-
runtimeBackend: summary.bestBackend || null,
|
|
167
|
+
runtimeBackend: summary.runtimeBackend || summary.bestBackend || null,
|
|
167
168
|
ramGB: systemRAM,
|
|
168
169
|
totalRamGB: systemRAM,
|
|
169
170
|
hardware
|
package/src/ollama/client.js
CHANGED
|
@@ -3,14 +3,11 @@ const fetch = require('../utils/fetch');
|
|
|
3
3
|
class OllamaClient {
|
|
4
4
|
constructor(baseURL = null) {
|
|
5
5
|
// Support OLLAMA_HOST environment variable (standard Ollama configuration)
|
|
6
|
-
// Also support OLLAMA_URL for backwards compatibility
|
|
7
|
-
this.
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
this.baseURL = 'http://' + this.baseURL;
|
|
12
|
-
}
|
|
13
|
-
this.baseURL = this.baseURL.replace(/\/$/, '');
|
|
6
|
+
// Also support OLLAMA_BASE_URL and OLLAMA_URL for backwards compatibility
|
|
7
|
+
this.preferredBaseURL = this.normalizeBaseURL(
|
|
8
|
+
baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || process.env.OLLAMA_URL || 'http://localhost:11434'
|
|
9
|
+
);
|
|
10
|
+
this.baseURL = this.preferredBaseURL;
|
|
14
11
|
|
|
15
12
|
this.isAvailable = null;
|
|
16
13
|
this.lastCheck = 0;
|
|
@@ -18,6 +15,53 @@ class OllamaClient {
|
|
|
18
15
|
this._pendingCheck = null;
|
|
19
16
|
}
|
|
20
17
|
|
|
18
|
+
normalizeBaseURL(baseURL) {
|
|
19
|
+
let normalized = String(baseURL || '').trim();
|
|
20
|
+
if (!normalized.startsWith('http://') && !normalized.startsWith('https://')) {
|
|
21
|
+
normalized = 'http://' + normalized;
|
|
22
|
+
}
|
|
23
|
+
return normalized.replace(/\/$/, '');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
buildCandidateBaseURLs(baseURL = this.preferredBaseURL) {
|
|
27
|
+
const normalized = this.normalizeBaseURL(baseURL);
|
|
28
|
+
const candidates = [normalized];
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
const parsed = new URL(normalized);
|
|
32
|
+
if (parsed.hostname === 'localhost') {
|
|
33
|
+
const ipv4 = new URL(parsed.toString());
|
|
34
|
+
ipv4.hostname = '127.0.0.1';
|
|
35
|
+
candidates.push(ipv4.toString().replace(/\/$/, ''));
|
|
36
|
+
|
|
37
|
+
const ipv6 = new URL(parsed.toString());
|
|
38
|
+
ipv6.hostname = '::1';
|
|
39
|
+
candidates.push(ipv6.toString().replace(/\/$/, ''));
|
|
40
|
+
}
|
|
41
|
+
} catch (error) {
|
|
42
|
+
// Keep the preferred URL only if parsing fails.
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return [...new Set(candidates)];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
applyResolvedBaseURL(baseURL) {
|
|
49
|
+
this.baseURL = this.normalizeBaseURL(baseURL);
|
|
50
|
+
return this.baseURL;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
isRetryableAvailabilityError(error) {
|
|
54
|
+
const message = String(error?.message || '').toLowerCase();
|
|
55
|
+
return (
|
|
56
|
+
message.includes('econnrefused') ||
|
|
57
|
+
message.includes('fetch failed') ||
|
|
58
|
+
message.includes('network') ||
|
|
59
|
+
message.includes('socket') ||
|
|
60
|
+
message.includes('connect') ||
|
|
61
|
+
error?.name === 'AbortError'
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
|
|
21
65
|
async checkOllamaAvailability() {
|
|
22
66
|
|
|
23
67
|
if (this.isAvailable !== null && Date.now() - this.lastCheck < this.cacheTimeout) {
|
|
@@ -38,50 +82,79 @@ class OllamaClient {
|
|
|
38
82
|
}
|
|
39
83
|
|
|
40
84
|
async _doAvailabilityCheck() {
|
|
85
|
+
const candidateURLs = this.buildCandidateBaseURLs();
|
|
86
|
+
const attemptedURLs = [];
|
|
87
|
+
let lastError = null;
|
|
41
88
|
|
|
42
|
-
|
|
43
|
-
const
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
89
|
+
for (let index = 0; index < candidateURLs.length; index += 1) {
|
|
90
|
+
const candidateBaseURL = candidateURLs[index];
|
|
91
|
+
attemptedURLs.push(candidateBaseURL);
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
const controller = new AbortController();
|
|
95
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
96
|
+
|
|
97
|
+
const response = await fetch(`${candidateBaseURL}/api/version`, {
|
|
98
|
+
signal: controller.signal,
|
|
99
|
+
headers: { 'Content-Type': 'application/json' }
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
clearTimeout(timeoutId);
|
|
103
|
+
|
|
104
|
+
if (!response.ok) {
|
|
105
|
+
this.isAvailable = {
|
|
106
|
+
available: false,
|
|
107
|
+
error: 'Ollama not responding properly',
|
|
108
|
+
attemptedURL: candidateBaseURL,
|
|
109
|
+
attemptedURLs
|
|
110
|
+
};
|
|
111
|
+
this.lastCheck = Date.now();
|
|
112
|
+
return this.isAvailable;
|
|
113
|
+
}
|
|
52
114
|
|
|
53
|
-
if (response.ok) {
|
|
54
115
|
const data = await response.json();
|
|
55
|
-
this.
|
|
116
|
+
this.applyResolvedBaseURL(candidateBaseURL);
|
|
117
|
+
this.isAvailable = {
|
|
118
|
+
available: true,
|
|
119
|
+
version: data.version || 'unknown',
|
|
120
|
+
attemptedURL: candidateBaseURL,
|
|
121
|
+
attemptedURLs
|
|
122
|
+
};
|
|
56
123
|
this.lastCheck = Date.now();
|
|
57
124
|
return this.isAvailable;
|
|
125
|
+
} catch (error) {
|
|
126
|
+
lastError = error;
|
|
127
|
+
if (!this.isRetryableAvailabilityError(error) || index === candidateURLs.length - 1) {
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
58
130
|
}
|
|
131
|
+
}
|
|
59
132
|
|
|
60
|
-
|
|
61
|
-
this.lastCheck = Date.now();
|
|
62
|
-
return this.isAvailable;
|
|
63
|
-
} catch (error) {
|
|
133
|
+
if (lastError) {
|
|
64
134
|
let errorMessage;
|
|
65
135
|
let hint = '';
|
|
136
|
+
const errorText = String(lastError.message || '');
|
|
137
|
+
const activeURL = attemptedURLs[attemptedURLs.length - 1] || this.preferredBaseURL;
|
|
66
138
|
|
|
67
|
-
if (
|
|
68
|
-
errorMessage = `Ollama not running at ${
|
|
139
|
+
if (errorText.includes('ECONNREFUSED')) {
|
|
140
|
+
errorMessage = `Ollama not running at ${activeURL}`;
|
|
69
141
|
hint = 'Make sure Ollama is running. Try: ollama serve';
|
|
70
|
-
} else if (
|
|
71
|
-
errorMessage = `Ollama connection timeout at ${
|
|
142
|
+
} else if (errorText.includes('timeout') || lastError.name === 'AbortError') {
|
|
143
|
+
errorMessage = `Ollama connection timeout at ${activeURL}`;
|
|
72
144
|
hint = 'The server is not responding. Check if Ollama is running and accessible.';
|
|
73
|
-
} else if (
|
|
74
|
-
errorMessage = `Cannot resolve host: ${
|
|
145
|
+
} else if (errorText.includes('ENOTFOUND')) {
|
|
146
|
+
errorMessage = `Cannot resolve host: ${activeURL}`;
|
|
75
147
|
hint = 'Check your OLLAMA_HOST environment variable or network configuration.';
|
|
76
148
|
} else {
|
|
77
|
-
errorMessage = error
|
|
149
|
+
errorMessage = errorText || 'Unknown Ollama availability error';
|
|
78
150
|
}
|
|
79
151
|
|
|
80
152
|
this.isAvailable = {
|
|
81
153
|
available: false,
|
|
82
154
|
error: errorMessage,
|
|
83
|
-
hint
|
|
84
|
-
attemptedURL:
|
|
155
|
+
hint,
|
|
156
|
+
attemptedURL: activeURL,
|
|
157
|
+
attemptedURLs
|
|
85
158
|
};
|
|
86
159
|
this.lastCheck = Date.now();
|
|
87
160
|
return this.isAvailable;
|
|
@@ -400,51 +473,86 @@ class OllamaClient {
|
|
|
400
473
|
};
|
|
401
474
|
}
|
|
402
475
|
|
|
403
|
-
async
|
|
476
|
+
async generate(modelName, prompt, options = {}) {
|
|
404
477
|
const availability = await this.checkOllamaAvailability();
|
|
405
478
|
if (!availability.available) {
|
|
406
479
|
throw new Error(`Ollama not available: ${availability.error}`);
|
|
407
480
|
}
|
|
408
481
|
|
|
482
|
+
const {
|
|
483
|
+
timeoutMs = 30000,
|
|
484
|
+
stream = false,
|
|
485
|
+
keepAlive,
|
|
486
|
+
format,
|
|
487
|
+
generationOptions = {}
|
|
488
|
+
} = options;
|
|
489
|
+
|
|
490
|
+
const payload = {
|
|
491
|
+
model: modelName,
|
|
492
|
+
prompt,
|
|
493
|
+
stream: Boolean(stream)
|
|
494
|
+
};
|
|
495
|
+
|
|
496
|
+
if (keepAlive) payload.keep_alive = keepAlive;
|
|
497
|
+
if (format) payload.format = format;
|
|
498
|
+
if (generationOptions && Object.keys(generationOptions).length > 0) {
|
|
499
|
+
payload.options = generationOptions;
|
|
500
|
+
}
|
|
501
|
+
|
|
409
502
|
const startTime = Date.now();
|
|
410
503
|
|
|
411
504
|
try {
|
|
412
505
|
const controller = new AbortController();
|
|
413
|
-
const timeoutId = setTimeout(() => controller.abort(),
|
|
506
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
414
507
|
|
|
415
508
|
const response = await fetch(`${this.baseURL}/api/generate`, {
|
|
416
509
|
method: 'POST',
|
|
417
510
|
signal: controller.signal,
|
|
418
511
|
headers: { 'Content-Type': 'application/json' },
|
|
419
|
-
body: JSON.stringify(
|
|
420
|
-
model: modelName,
|
|
421
|
-
prompt: testPrompt,
|
|
422
|
-
stream: false,
|
|
423
|
-
options: {
|
|
424
|
-
num_predict: 50 // Limitar respuesta para test rápido
|
|
425
|
-
}
|
|
426
|
-
})
|
|
512
|
+
body: JSON.stringify(payload)
|
|
427
513
|
});
|
|
428
514
|
|
|
429
515
|
clearTimeout(timeoutId);
|
|
430
516
|
|
|
431
517
|
if (!response.ok) {
|
|
432
|
-
|
|
518
|
+
const errorText = await response.text();
|
|
519
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
|
|
433
520
|
}
|
|
434
521
|
|
|
435
522
|
const data = await response.json();
|
|
436
|
-
const
|
|
523
|
+
const responseTime = Date.now() - startTime;
|
|
524
|
+
const speed = this.calculateTokensPerSecond(data, responseTime);
|
|
525
|
+
|
|
526
|
+
return {
|
|
527
|
+
...data,
|
|
528
|
+
responseTime,
|
|
529
|
+
tokensPerSecond: speed.tokensPerSecond,
|
|
530
|
+
evalTokensPerSecond: speed.evalTokensPerSecond,
|
|
531
|
+
endToEndTokensPerSecond: speed.endToEndTokensPerSecond
|
|
532
|
+
};
|
|
533
|
+
} catch (error) {
|
|
534
|
+
throw new Error(`Failed to run generate request: ${error.message}`);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
437
537
|
|
|
438
|
-
|
|
538
|
+
async testModelPerformance(modelName, testPrompt = "Hello, how are you?") {
|
|
539
|
+
const startTime = Date.now();
|
|
540
|
+
|
|
541
|
+
try {
|
|
542
|
+
const data = await this.generate(modelName, testPrompt, {
|
|
543
|
+
timeoutMs: 30000,
|
|
544
|
+
generationOptions: {
|
|
545
|
+
num_predict: 50
|
|
546
|
+
}
|
|
547
|
+
});
|
|
439
548
|
const tokensGenerated = Number(data.eval_count) || 0;
|
|
440
|
-
const speed = this.calculateTokensPerSecond(data, totalTime);
|
|
441
549
|
|
|
442
550
|
return {
|
|
443
551
|
success: true,
|
|
444
|
-
responseTime:
|
|
445
|
-
tokensPerSecond:
|
|
446
|
-
evalTokensPerSecond:
|
|
447
|
-
endToEndTokensPerSecond:
|
|
552
|
+
responseTime: data.responseTime,
|
|
553
|
+
tokensPerSecond: data.tokensPerSecond,
|
|
554
|
+
evalTokensPerSecond: data.evalTokensPerSecond,
|
|
555
|
+
endToEndTokensPerSecond: data.endToEndTokensPerSecond,
|
|
448
556
|
tokensGenerated,
|
|
449
557
|
loadTime: data.load_duration ? Math.round(data.load_duration / 1000000) : null,
|
|
450
558
|
evalTime: data.eval_duration ? Math.round(data.eval_duration / 1000000) : null,
|
package/src/utils/config.js
CHANGED
|
@@ -16,7 +16,7 @@ class ConfigManager {
|
|
|
16
16
|
return {
|
|
17
17
|
version: "2.0",
|
|
18
18
|
ollama: {
|
|
19
|
-
baseURL: process.env.OLLAMA_BASE_URL || "http://localhost:11434",
|
|
19
|
+
baseURL: process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || "http://localhost:11434",
|
|
20
20
|
timeout: 30000,
|
|
21
21
|
enabled: true,
|
|
22
22
|
autoDetect: true,
|
|
@@ -176,7 +176,9 @@ class ConfigManager {
|
|
|
176
176
|
}
|
|
177
177
|
|
|
178
178
|
// Ollama overrides
|
|
179
|
-
if (process.env.
|
|
179
|
+
if (process.env.OLLAMA_HOST) {
|
|
180
|
+
this.config.ollama.baseURL = process.env.OLLAMA_HOST;
|
|
181
|
+
} else if (process.env.OLLAMA_BASE_URL) {
|
|
180
182
|
this.config.ollama.baseURL = process.env.OLLAMA_BASE_URL;
|
|
181
183
|
}
|
|
182
184
|
|
|
@@ -356,4 +358,4 @@ class ConfigManager {
|
|
|
356
358
|
}
|
|
357
359
|
}
|
|
358
360
|
|
|
359
|
-
module.exports = ConfigManager;
|
|
361
|
+
module.exports = ConfigManager;
|