llm-checker 3.5.7 → 3.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/enhanced_cli.js
CHANGED
|
@@ -5059,10 +5059,16 @@ program
|
|
|
5059
5059
|
console.log(` Tier: ${chalk.cyan(detector.getHardwareTier().replace('_', ' ').toUpperCase())}`);
|
|
5060
5060
|
console.log(` Max model size: ${chalk.green(detector.getMaxModelSize() + 'GB')}`);
|
|
5061
5061
|
console.log(` Best backend: ${chalk.cyan(hardware.summary.bestBackend)}`);
|
|
5062
|
+
if (hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend) {
|
|
5063
|
+
console.log(` Runtime assist: ${chalk.green(hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend)}`);
|
|
5064
|
+
}
|
|
5062
5065
|
console.log(` Dedicated GPUs: ${chalk.green(formatGpuInventoryList(hardware.summary.dedicatedGpuModels))}`);
|
|
5063
5066
|
console.log(` Integrated GPUs: ${chalk.hex('#FFA500')(formatGpuInventoryList(hardware.summary.integratedGpuModels))}`);
|
|
5064
5067
|
if (hardware.summary.hasIntegratedGPU && hardware.summary.bestBackend === 'cpu') {
|
|
5065
|
-
|
|
5068
|
+
const assistMessage = hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend
|
|
5069
|
+
? `Integrated/shared-memory GPU detected, runtime may use ${hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend} acceleration`
|
|
5070
|
+
: 'Integrated/shared-memory GPU detected, runtime remains CPU';
|
|
5071
|
+
console.log(` Assist path: ${chalk.yellow(assistMessage)}`);
|
|
5066
5072
|
}
|
|
5067
5073
|
|
|
5068
5074
|
// CPU
|
package/bin/mcp-server.mjs
CHANGED
package/package.json
CHANGED
package/src/hardware/detector.js
CHANGED
|
@@ -190,7 +190,12 @@ class HardwareDetector {
|
|
|
190
190
|
}
|
|
191
191
|
|
|
192
192
|
// Skip very generic/placeholder entries
|
|
193
|
-
if (
|
|
193
|
+
if (
|
|
194
|
+
model.includes('standard vga') ||
|
|
195
|
+
model.includes('microsoft basic') ||
|
|
196
|
+
model.includes('remote display adapter') ||
|
|
197
|
+
model.includes('basic render driver')
|
|
198
|
+
) {
|
|
194
199
|
return false;
|
|
195
200
|
}
|
|
196
201
|
|
|
@@ -216,6 +216,9 @@ class UnifiedDetector {
|
|
|
216
216
|
const summary = {
|
|
217
217
|
bestBackend: result.primary?.type || 'cpu',
|
|
218
218
|
backendName: result.primary?.name || 'CPU',
|
|
219
|
+
runtimeBackend: result.primary?.type || 'cpu',
|
|
220
|
+
runtimeBackendName: result.primary?.name || 'CPU',
|
|
221
|
+
hasRuntimeAssist: false,
|
|
219
222
|
totalVRAM: 0,
|
|
220
223
|
effectiveMemory: 0,
|
|
221
224
|
speedCoefficient: 0,
|
|
@@ -316,6 +319,11 @@ class UnifiedDetector {
|
|
|
316
319
|
}
|
|
317
320
|
summary.hasHeterogeneousGPU = summary.hasHeterogeneousGPU || topology.isHeterogeneous;
|
|
318
321
|
|
|
322
|
+
const runtimeSelection = this.detectRuntimeAssistBackend(result, topology);
|
|
323
|
+
summary.runtimeBackend = runtimeSelection.backend;
|
|
324
|
+
summary.runtimeBackendName = runtimeSelection.name;
|
|
325
|
+
summary.hasRuntimeAssist = runtimeSelection.assisted;
|
|
326
|
+
|
|
319
327
|
// Effective memory for LLM loading
|
|
320
328
|
// For GPU: use VRAM; for CPU/Metal: use system RAM
|
|
321
329
|
if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
|
|
@@ -398,6 +406,7 @@ class UnifiedDetector {
|
|
|
398
406
|
.map((gpu) => {
|
|
399
407
|
const name = String(gpu?.name || gpu?.model || '').replace(/\s+/g, ' ').trim();
|
|
400
408
|
if (!name) return null;
|
|
409
|
+
if (this.isRemoteDisplayModel(name)) return null;
|
|
401
410
|
|
|
402
411
|
let type = gpu?.type;
|
|
403
412
|
if (type !== 'integrated' && type !== 'dedicated') {
|
|
@@ -417,6 +426,61 @@ class UnifiedDetector {
|
|
|
417
426
|
.filter(Boolean);
|
|
418
427
|
}
|
|
419
428
|
|
|
429
|
+
isRemoteDisplayModel(model) {
|
|
430
|
+
const lower = String(model || '').toLowerCase();
|
|
431
|
+
if (!lower) return false;
|
|
432
|
+
|
|
433
|
+
return (
|
|
434
|
+
lower.includes('microsoft remote display adapter') ||
|
|
435
|
+
lower.includes('remote display adapter') ||
|
|
436
|
+
lower.includes('basic render driver')
|
|
437
|
+
);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
inferGpuVendor(name) {
|
|
441
|
+
const lower = String(name || '').toLowerCase();
|
|
442
|
+
if (!lower) return 'unknown';
|
|
443
|
+
if (lower.includes('nvidia') || lower.includes('geforce') || lower.includes('rtx') || lower.includes('gtx')) return 'nvidia';
|
|
444
|
+
if (lower.includes('amd') || lower.includes('ati') || lower.includes('radeon')) return 'amd';
|
|
445
|
+
if (lower.includes('intel') || lower.includes('iris') || lower.includes('uhd') || lower.includes('arc')) return 'intel';
|
|
446
|
+
if (lower.includes('apple')) return 'apple';
|
|
447
|
+
return 'unknown';
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
detectRuntimeAssistBackend(result, topology = {}) {
|
|
451
|
+
const primaryType = result?.primary?.type || 'cpu';
|
|
452
|
+
const primaryName = result?.primary?.name || 'CPU';
|
|
453
|
+
|
|
454
|
+
if (primaryType !== 'cpu') {
|
|
455
|
+
return {
|
|
456
|
+
backend: primaryType,
|
|
457
|
+
name: primaryName,
|
|
458
|
+
assisted: false
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const platform = result?.platform || result?.os?.platform || normalizePlatform();
|
|
463
|
+
const integratedModels = Array.isArray(topology.integratedModels) ? topology.integratedModels : [];
|
|
464
|
+
const integratedVendors = integratedModels.map((gpu) => this.inferGpuVendor(gpu.name));
|
|
465
|
+
|
|
466
|
+
const hasWindowsIntegratedGpu = platform === 'win32' && integratedModels.length > 0;
|
|
467
|
+
const hasKnownIntegratedVendor = integratedVendors.some((vendor) => ['amd', 'intel', 'nvidia'].includes(vendor));
|
|
468
|
+
|
|
469
|
+
if (hasWindowsIntegratedGpu && hasKnownIntegratedVendor) {
|
|
470
|
+
return {
|
|
471
|
+
backend: 'vulkan',
|
|
472
|
+
name: 'Vulkan',
|
|
473
|
+
assisted: true
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return {
|
|
478
|
+
backend: primaryType,
|
|
479
|
+
name: primaryName,
|
|
480
|
+
assisted: false
|
|
481
|
+
};
|
|
482
|
+
}
|
|
483
|
+
|
|
420
484
|
getSystemMemoryGB(memoryInfo) {
|
|
421
485
|
const totalBytes = Number(memoryInfo?.total || 0);
|
|
422
486
|
if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
|
|
@@ -496,6 +560,7 @@ class UnifiedDetector {
|
|
|
496
560
|
.map((controller) => {
|
|
497
561
|
const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
|
|
498
562
|
if (!name || name.toLowerCase() === 'unknown') return null;
|
|
563
|
+
if (this.isRemoteDisplayModel(name)) return null;
|
|
499
564
|
|
|
500
565
|
const nameLower = name.toLowerCase();
|
|
501
566
|
if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
|
|
@@ -867,16 +932,19 @@ class UnifiedDetector {
|
|
|
867
932
|
return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
|
|
868
933
|
}
|
|
869
934
|
else {
|
|
935
|
+
const runtimeAssistSuffix = summary.hasRuntimeAssist && summary.runtimeBackend !== summary.bestBackend
|
|
936
|
+
? `${summary.runtimeBackendName || summary.runtimeBackend} assist`
|
|
937
|
+
: 'CPU backend';
|
|
870
938
|
if (summary.gpuModel && summary.hasIntegratedGPU && !summary.hasDedicatedGPU) {
|
|
871
939
|
const gpuDesc = summary.gpuInventory || summary.gpuModel;
|
|
872
940
|
if (summary.integratedSharedMemory > 0) {
|
|
873
|
-
return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory,
|
|
941
|
+
return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
|
|
874
942
|
}
|
|
875
|
-
return `${gpuDesc} (integrated/shared memory,
|
|
943
|
+
return `${gpuDesc} (integrated/shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
|
|
876
944
|
}
|
|
877
945
|
if (summary.gpuModel && summary.gpuCount > 0) {
|
|
878
946
|
const gpuDesc = summary.gpuInventory || summary.gpuModel;
|
|
879
|
-
return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected,
|
|
947
|
+
return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
|
|
880
948
|
}
|
|
881
949
|
return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
|
|
882
950
|
}
|
|
@@ -98,7 +98,8 @@ class IntelligentSelector {
|
|
|
98
98
|
description: this.detector.getHardwareDescription(),
|
|
99
99
|
tier: this.detector.getHardwareTier(),
|
|
100
100
|
maxSize: this.detector.getMaxModelSize(),
|
|
101
|
-
backend: hardware.summary.bestBackend
|
|
101
|
+
backend: hardware.summary.bestBackend,
|
|
102
|
+
runtimeBackend: hardware.summary.runtimeBackend || hardware.summary.bestBackend
|
|
102
103
|
},
|
|
103
104
|
policy: {
|
|
104
105
|
mode: policyEngine.getMode(),
|
|
@@ -163,7 +164,7 @@ class IntelligentSelector {
|
|
|
163
164
|
|
|
164
165
|
const context = {
|
|
165
166
|
backend: summary.bestBackend || null,
|
|
166
|
-
runtimeBackend: summary.bestBackend || null,
|
|
167
|
+
runtimeBackend: summary.runtimeBackend || summary.bestBackend || null,
|
|
167
168
|
ramGB: systemRAM,
|
|
168
169
|
totalRamGB: systemRAM,
|
|
169
170
|
hardware
|
package/src/ollama/client.js
CHANGED
|
@@ -4,13 +4,10 @@ class OllamaClient {
|
|
|
4
4
|
constructor(baseURL = null) {
|
|
5
5
|
// Support OLLAMA_HOST environment variable (standard Ollama configuration)
|
|
6
6
|
// Also support OLLAMA_URL for backwards compatibility
|
|
7
|
-
this.
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
this.baseURL = 'http://' + this.baseURL;
|
|
12
|
-
}
|
|
13
|
-
this.baseURL = this.baseURL.replace(/\/$/, '');
|
|
7
|
+
this.preferredBaseURL = this.normalizeBaseURL(
|
|
8
|
+
baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434'
|
|
9
|
+
);
|
|
10
|
+
this.baseURL = this.preferredBaseURL;
|
|
14
11
|
|
|
15
12
|
this.isAvailable = null;
|
|
16
13
|
this.lastCheck = 0;
|
|
@@ -18,6 +15,53 @@ class OllamaClient {
|
|
|
18
15
|
this._pendingCheck = null;
|
|
19
16
|
}
|
|
20
17
|
|
|
18
|
+
normalizeBaseURL(baseURL) {
|
|
19
|
+
let normalized = String(baseURL || '').trim();
|
|
20
|
+
if (!normalized.startsWith('http://') && !normalized.startsWith('https://')) {
|
|
21
|
+
normalized = 'http://' + normalized;
|
|
22
|
+
}
|
|
23
|
+
return normalized.replace(/\/$/, '');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
buildCandidateBaseURLs(baseURL = this.preferredBaseURL) {
|
|
27
|
+
const normalized = this.normalizeBaseURL(baseURL);
|
|
28
|
+
const candidates = [normalized];
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
const parsed = new URL(normalized);
|
|
32
|
+
if (parsed.hostname === 'localhost') {
|
|
33
|
+
const ipv4 = new URL(parsed.toString());
|
|
34
|
+
ipv4.hostname = '127.0.0.1';
|
|
35
|
+
candidates.push(ipv4.toString().replace(/\/$/, ''));
|
|
36
|
+
|
|
37
|
+
const ipv6 = new URL(parsed.toString());
|
|
38
|
+
ipv6.hostname = '::1';
|
|
39
|
+
candidates.push(ipv6.toString().replace(/\/$/, ''));
|
|
40
|
+
}
|
|
41
|
+
} catch (error) {
|
|
42
|
+
// Keep the preferred URL only if parsing fails.
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return [...new Set(candidates)];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
applyResolvedBaseURL(baseURL) {
|
|
49
|
+
this.baseURL = this.normalizeBaseURL(baseURL);
|
|
50
|
+
return this.baseURL;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
isRetryableAvailabilityError(error) {
|
|
54
|
+
const message = String(error?.message || '').toLowerCase();
|
|
55
|
+
return (
|
|
56
|
+
message.includes('econnrefused') ||
|
|
57
|
+
message.includes('fetch failed') ||
|
|
58
|
+
message.includes('network') ||
|
|
59
|
+
message.includes('socket') ||
|
|
60
|
+
message.includes('connect') ||
|
|
61
|
+
error?.name === 'AbortError'
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
|
|
21
65
|
async checkOllamaAvailability() {
|
|
22
66
|
|
|
23
67
|
if (this.isAvailable !== null && Date.now() - this.lastCheck < this.cacheTimeout) {
|
|
@@ -38,50 +82,79 @@ class OllamaClient {
|
|
|
38
82
|
}
|
|
39
83
|
|
|
40
84
|
async _doAvailabilityCheck() {
|
|
85
|
+
const candidateURLs = this.buildCandidateBaseURLs();
|
|
86
|
+
const attemptedURLs = [];
|
|
87
|
+
let lastError = null;
|
|
41
88
|
|
|
42
|
-
|
|
43
|
-
const
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
89
|
+
for (let index = 0; index < candidateURLs.length; index += 1) {
|
|
90
|
+
const candidateBaseURL = candidateURLs[index];
|
|
91
|
+
attemptedURLs.push(candidateBaseURL);
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
const controller = new AbortController();
|
|
95
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
96
|
+
|
|
97
|
+
const response = await fetch(`${candidateBaseURL}/api/version`, {
|
|
98
|
+
signal: controller.signal,
|
|
99
|
+
headers: { 'Content-Type': 'application/json' }
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
clearTimeout(timeoutId);
|
|
103
|
+
|
|
104
|
+
if (!response.ok) {
|
|
105
|
+
this.isAvailable = {
|
|
106
|
+
available: false,
|
|
107
|
+
error: 'Ollama not responding properly',
|
|
108
|
+
attemptedURL: candidateBaseURL,
|
|
109
|
+
attemptedURLs
|
|
110
|
+
};
|
|
111
|
+
this.lastCheck = Date.now();
|
|
112
|
+
return this.isAvailable;
|
|
113
|
+
}
|
|
52
114
|
|
|
53
|
-
if (response.ok) {
|
|
54
115
|
const data = await response.json();
|
|
55
|
-
this.
|
|
116
|
+
this.applyResolvedBaseURL(candidateBaseURL);
|
|
117
|
+
this.isAvailable = {
|
|
118
|
+
available: true,
|
|
119
|
+
version: data.version || 'unknown',
|
|
120
|
+
attemptedURL: candidateBaseURL,
|
|
121
|
+
attemptedURLs
|
|
122
|
+
};
|
|
56
123
|
this.lastCheck = Date.now();
|
|
57
124
|
return this.isAvailable;
|
|
125
|
+
} catch (error) {
|
|
126
|
+
lastError = error;
|
|
127
|
+
if (!this.isRetryableAvailabilityError(error) || index === candidateURLs.length - 1) {
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
58
130
|
}
|
|
131
|
+
}
|
|
59
132
|
|
|
60
|
-
|
|
61
|
-
this.lastCheck = Date.now();
|
|
62
|
-
return this.isAvailable;
|
|
63
|
-
} catch (error) {
|
|
133
|
+
if (lastError) {
|
|
64
134
|
let errorMessage;
|
|
65
135
|
let hint = '';
|
|
136
|
+
const errorText = String(lastError.message || '');
|
|
137
|
+
const activeURL = attemptedURLs[attemptedURLs.length - 1] || this.preferredBaseURL;
|
|
66
138
|
|
|
67
|
-
if (
|
|
68
|
-
errorMessage = `Ollama not running at ${
|
|
139
|
+
if (errorText.includes('ECONNREFUSED')) {
|
|
140
|
+
errorMessage = `Ollama not running at ${activeURL}`;
|
|
69
141
|
hint = 'Make sure Ollama is running. Try: ollama serve';
|
|
70
|
-
} else if (
|
|
71
|
-
errorMessage = `Ollama connection timeout at ${
|
|
142
|
+
} else if (errorText.includes('timeout') || lastError.name === 'AbortError') {
|
|
143
|
+
errorMessage = `Ollama connection timeout at ${activeURL}`;
|
|
72
144
|
hint = 'The server is not responding. Check if Ollama is running and accessible.';
|
|
73
|
-
} else if (
|
|
74
|
-
errorMessage = `Cannot resolve host: ${
|
|
145
|
+
} else if (errorText.includes('ENOTFOUND')) {
|
|
146
|
+
errorMessage = `Cannot resolve host: ${activeURL}`;
|
|
75
147
|
hint = 'Check your OLLAMA_HOST environment variable or network configuration.';
|
|
76
148
|
} else {
|
|
77
|
-
errorMessage = error
|
|
149
|
+
errorMessage = errorText || 'Unknown Ollama availability error';
|
|
78
150
|
}
|
|
79
151
|
|
|
80
152
|
this.isAvailable = {
|
|
81
153
|
available: false,
|
|
82
154
|
error: errorMessage,
|
|
83
|
-
hint
|
|
84
|
-
attemptedURL:
|
|
155
|
+
hint,
|
|
156
|
+
attemptedURL: activeURL,
|
|
157
|
+
attemptedURLs
|
|
85
158
|
};
|
|
86
159
|
this.lastCheck = Date.now();
|
|
87
160
|
return this.isAvailable;
|