llm-checker 3.5.7 → 3.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5059,10 +5059,16 @@ program
5059
5059
  console.log(` Tier: ${chalk.cyan(detector.getHardwareTier().replace('_', ' ').toUpperCase())}`);
5060
5060
  console.log(` Max model size: ${chalk.green(detector.getMaxModelSize() + 'GB')}`);
5061
5061
  console.log(` Best backend: ${chalk.cyan(hardware.summary.bestBackend)}`);
5062
+ if (hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend) {
5063
+ console.log(` Runtime assist: ${chalk.green(hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend)}`);
5064
+ }
5062
5065
  console.log(` Dedicated GPUs: ${chalk.green(formatGpuInventoryList(hardware.summary.dedicatedGpuModels))}`);
5063
5066
  console.log(` Integrated GPUs: ${chalk.hex('#FFA500')(formatGpuInventoryList(hardware.summary.integratedGpuModels))}`);
5064
5067
  if (hardware.summary.hasIntegratedGPU && hardware.summary.bestBackend === 'cpu') {
5065
- console.log(` Assist path: ${chalk.yellow('Integrated/shared-memory GPU detected, runtime remains CPU')}`);
5068
+ const assistMessage = hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend
5069
+ ? `Integrated/shared-memory GPU detected, runtime may use ${hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend} acceleration`
5070
+ : 'Integrated/shared-memory GPU detected, runtime remains CPU';
5071
+ console.log(` Assist path: ${chalk.yellow(assistMessage)}`);
5066
5072
  }
5067
5073
 
5068
5074
  // CPU
@@ -187,7 +187,7 @@ const ALLOWED_CLI_COMMANDS = new Set([
187
187
 
188
188
  const server = new McpServer({
189
189
  name: "llm-checker",
190
- version: "3.5.7",
190
+ version: "3.5.8",
191
191
  });
192
192
 
193
193
  // ============================================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.5.7",
3
+ "version": "3.5.8",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
6
  "llm-checker": "bin/cli.js",
@@ -190,7 +190,12 @@ class HardwareDetector {
190
190
  }
191
191
 
192
192
  // Skip very generic/placeholder entries
193
- if (model.includes('standard vga') || model.includes('microsoft basic')) {
193
+ if (
194
+ model.includes('standard vga') ||
195
+ model.includes('microsoft basic') ||
196
+ model.includes('remote display adapter') ||
197
+ model.includes('basic render driver')
198
+ ) {
194
199
  return false;
195
200
  }
196
201
 
@@ -216,6 +216,9 @@ class UnifiedDetector {
216
216
  const summary = {
217
217
  bestBackend: result.primary?.type || 'cpu',
218
218
  backendName: result.primary?.name || 'CPU',
219
+ runtimeBackend: result.primary?.type || 'cpu',
220
+ runtimeBackendName: result.primary?.name || 'CPU',
221
+ hasRuntimeAssist: false,
219
222
  totalVRAM: 0,
220
223
  effectiveMemory: 0,
221
224
  speedCoefficient: 0,
@@ -316,6 +319,11 @@ class UnifiedDetector {
316
319
  }
317
320
  summary.hasHeterogeneousGPU = summary.hasHeterogeneousGPU || topology.isHeterogeneous;
318
321
 
322
+ const runtimeSelection = this.detectRuntimeAssistBackend(result, topology);
323
+ summary.runtimeBackend = runtimeSelection.backend;
324
+ summary.runtimeBackendName = runtimeSelection.name;
325
+ summary.hasRuntimeAssist = runtimeSelection.assisted;
326
+
319
327
  // Effective memory for LLM loading
320
328
  // For GPU: use VRAM; for CPU/Metal: use system RAM
321
329
  if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
@@ -398,6 +406,7 @@ class UnifiedDetector {
398
406
  .map((gpu) => {
399
407
  const name = String(gpu?.name || gpu?.model || '').replace(/\s+/g, ' ').trim();
400
408
  if (!name) return null;
409
+ if (this.isRemoteDisplayModel(name)) return null;
401
410
 
402
411
  let type = gpu?.type;
403
412
  if (type !== 'integrated' && type !== 'dedicated') {
@@ -417,6 +426,61 @@ class UnifiedDetector {
417
426
  .filter(Boolean);
418
427
  }
419
428
 
429
+ isRemoteDisplayModel(model) {
430
+ const lower = String(model || '').toLowerCase();
431
+ if (!lower) return false;
432
+
433
+ return (
434
+ lower.includes('microsoft remote display adapter') ||
435
+ lower.includes('remote display adapter') ||
436
+ lower.includes('basic render driver')
437
+ );
438
+ }
439
+
440
+ inferGpuVendor(name) {
441
+ const lower = String(name || '').toLowerCase();
442
+ if (!lower) return 'unknown';
443
+ if (lower.includes('nvidia') || lower.includes('geforce') || lower.includes('rtx') || lower.includes('gtx')) return 'nvidia';
444
+ if (lower.includes('amd') || lower.includes('ati') || lower.includes('radeon')) return 'amd';
445
+ if (lower.includes('intel') || lower.includes('iris') || lower.includes('uhd') || lower.includes('arc')) return 'intel';
446
+ if (lower.includes('apple')) return 'apple';
447
+ return 'unknown';
448
+ }
449
+
450
+ detectRuntimeAssistBackend(result, topology = {}) {
451
+ const primaryType = result?.primary?.type || 'cpu';
452
+ const primaryName = result?.primary?.name || 'CPU';
453
+
454
+ if (primaryType !== 'cpu') {
455
+ return {
456
+ backend: primaryType,
457
+ name: primaryName,
458
+ assisted: false
459
+ };
460
+ }
461
+
462
+ const platform = result?.platform || result?.os?.platform || normalizePlatform();
463
+ const integratedModels = Array.isArray(topology.integratedModels) ? topology.integratedModels : [];
464
+ const integratedVendors = integratedModels.map((gpu) => this.inferGpuVendor(gpu.name));
465
+
466
+ const hasWindowsIntegratedGpu = platform === 'win32' && integratedModels.length > 0;
467
+ const hasKnownIntegratedVendor = integratedVendors.some((vendor) => ['amd', 'intel', 'nvidia'].includes(vendor));
468
+
469
+ if (hasWindowsIntegratedGpu && hasKnownIntegratedVendor) {
470
+ return {
471
+ backend: 'vulkan',
472
+ name: 'Vulkan',
473
+ assisted: true
474
+ };
475
+ }
476
+
477
+ return {
478
+ backend: primaryType,
479
+ name: primaryName,
480
+ assisted: false
481
+ };
482
+ }
483
+
420
484
  getSystemMemoryGB(memoryInfo) {
421
485
  const totalBytes = Number(memoryInfo?.total || 0);
422
486
  if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
@@ -496,6 +560,7 @@ class UnifiedDetector {
496
560
  .map((controller) => {
497
561
  const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
498
562
  if (!name || name.toLowerCase() === 'unknown') return null;
563
+ if (this.isRemoteDisplayModel(name)) return null;
499
564
 
500
565
  const nameLower = name.toLowerCase();
501
566
  if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
@@ -867,16 +932,19 @@ class UnifiedDetector {
867
932
  return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
868
933
  }
869
934
  else {
935
+ const runtimeAssistSuffix = summary.hasRuntimeAssist && summary.runtimeBackend !== summary.bestBackend
936
+ ? `${summary.runtimeBackendName || summary.runtimeBackend} assist`
937
+ : 'CPU backend';
870
938
  if (summary.gpuModel && summary.hasIntegratedGPU && !summary.hasDedicatedGPU) {
871
939
  const gpuDesc = summary.gpuInventory || summary.gpuModel;
872
940
  if (summary.integratedSharedMemory > 0) {
873
- return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory, CPU backend) + ${summary.cpuModel}`;
941
+ return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
874
942
  }
875
- return `${gpuDesc} (integrated/shared memory, CPU backend) + ${summary.cpuModel}`;
943
+ return `${gpuDesc} (integrated/shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
876
944
  }
877
945
  if (summary.gpuModel && summary.gpuCount > 0) {
878
946
  const gpuDesc = summary.gpuInventory || summary.gpuModel;
879
- return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, CPU backend) + ${summary.cpuModel}`;
947
+ return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
880
948
  }
881
949
  return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
882
950
  }
@@ -98,7 +98,8 @@ class IntelligentSelector {
98
98
  description: this.detector.getHardwareDescription(),
99
99
  tier: this.detector.getHardwareTier(),
100
100
  maxSize: this.detector.getMaxModelSize(),
101
- backend: hardware.summary.bestBackend
101
+ backend: hardware.summary.bestBackend,
102
+ runtimeBackend: hardware.summary.runtimeBackend || hardware.summary.bestBackend
102
103
  },
103
104
  policy: {
104
105
  mode: policyEngine.getMode(),
@@ -163,7 +164,7 @@ class IntelligentSelector {
163
164
 
164
165
  const context = {
165
166
  backend: summary.bestBackend || null,
166
- runtimeBackend: summary.bestBackend || null,
167
+ runtimeBackend: summary.runtimeBackend || summary.bestBackend || null,
167
168
  ramGB: systemRAM,
168
169
  totalRamGB: systemRAM,
169
170
  hardware
@@ -4,13 +4,10 @@ class OllamaClient {
4
4
  constructor(baseURL = null) {
5
5
  // Support OLLAMA_HOST environment variable (standard Ollama configuration)
6
6
  // Also support OLLAMA_URL for backwards compatibility
7
- this.baseURL = baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434';
8
-
9
- // Normalize URL: ensure it has protocol and remove trailing slash
10
- if (!this.baseURL.startsWith('http://') && !this.baseURL.startsWith('https://')) {
11
- this.baseURL = 'http://' + this.baseURL;
12
- }
13
- this.baseURL = this.baseURL.replace(/\/$/, '');
7
+ this.preferredBaseURL = this.normalizeBaseURL(
8
+ baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434'
9
+ );
10
+ this.baseURL = this.preferredBaseURL;
14
11
 
15
12
  this.isAvailable = null;
16
13
  this.lastCheck = 0;
@@ -18,6 +15,53 @@ class OllamaClient {
18
15
  this._pendingCheck = null;
19
16
  }
20
17
 
18
+ normalizeBaseURL(baseURL) {
19
+ let normalized = String(baseURL || '').trim();
20
+ if (!normalized.startsWith('http://') && !normalized.startsWith('https://')) {
21
+ normalized = 'http://' + normalized;
22
+ }
23
+ return normalized.replace(/\/$/, '');
24
+ }
25
+
26
+ buildCandidateBaseURLs(baseURL = this.preferredBaseURL) {
27
+ const normalized = this.normalizeBaseURL(baseURL);
28
+ const candidates = [normalized];
29
+
30
+ try {
31
+ const parsed = new URL(normalized);
32
+ if (parsed.hostname === 'localhost') {
33
+ const ipv4 = new URL(parsed.toString());
34
+ ipv4.hostname = '127.0.0.1';
35
+ candidates.push(ipv4.toString().replace(/\/$/, ''));
36
+
37
+ const ipv6 = new URL(parsed.toString());
38
+ ipv6.hostname = '::1';
39
+ candidates.push(ipv6.toString().replace(/\/$/, ''));
40
+ }
41
+ } catch (error) {
42
+ // Keep the preferred URL only if parsing fails.
43
+ }
44
+
45
+ return [...new Set(candidates)];
46
+ }
47
+
48
+ applyResolvedBaseURL(baseURL) {
49
+ this.baseURL = this.normalizeBaseURL(baseURL);
50
+ return this.baseURL;
51
+ }
52
+
53
+ isRetryableAvailabilityError(error) {
54
+ const message = String(error?.message || '').toLowerCase();
55
+ return (
56
+ message.includes('econnrefused') ||
57
+ message.includes('fetch failed') ||
58
+ message.includes('network') ||
59
+ message.includes('socket') ||
60
+ message.includes('connect') ||
61
+ error?.name === 'AbortError'
62
+ );
63
+ }
64
+
21
65
  async checkOllamaAvailability() {
22
66
 
23
67
  if (this.isAvailable !== null && Date.now() - this.lastCheck < this.cacheTimeout) {
@@ -38,50 +82,79 @@ class OllamaClient {
38
82
  }
39
83
 
40
84
  async _doAvailabilityCheck() {
85
+ const candidateURLs = this.buildCandidateBaseURLs();
86
+ const attemptedURLs = [];
87
+ let lastError = null;
41
88
 
42
- try {
43
- const controller = new AbortController();
44
- const timeoutId = setTimeout(() => controller.abort(), 5000);
45
-
46
- const response = await fetch(`${this.baseURL}/api/version`, {
47
- signal: controller.signal,
48
- headers: { 'Content-Type': 'application/json' }
49
- });
50
-
51
- clearTimeout(timeoutId);
89
+ for (let index = 0; index < candidateURLs.length; index += 1) {
90
+ const candidateBaseURL = candidateURLs[index];
91
+ attemptedURLs.push(candidateBaseURL);
92
+
93
+ try {
94
+ const controller = new AbortController();
95
+ const timeoutId = setTimeout(() => controller.abort(), 5000);
96
+
97
+ const response = await fetch(`${candidateBaseURL}/api/version`, {
98
+ signal: controller.signal,
99
+ headers: { 'Content-Type': 'application/json' }
100
+ });
101
+
102
+ clearTimeout(timeoutId);
103
+
104
+ if (!response.ok) {
105
+ this.isAvailable = {
106
+ available: false,
107
+ error: 'Ollama not responding properly',
108
+ attemptedURL: candidateBaseURL,
109
+ attemptedURLs
110
+ };
111
+ this.lastCheck = Date.now();
112
+ return this.isAvailable;
113
+ }
52
114
 
53
- if (response.ok) {
54
115
  const data = await response.json();
55
- this.isAvailable = { available: true, version: data.version || 'unknown' };
116
+ this.applyResolvedBaseURL(candidateBaseURL);
117
+ this.isAvailable = {
118
+ available: true,
119
+ version: data.version || 'unknown',
120
+ attemptedURL: candidateBaseURL,
121
+ attemptedURLs
122
+ };
56
123
  this.lastCheck = Date.now();
57
124
  return this.isAvailable;
125
+ } catch (error) {
126
+ lastError = error;
127
+ if (!this.isRetryableAvailabilityError(error) || index === candidateURLs.length - 1) {
128
+ break;
129
+ }
58
130
  }
131
+ }
59
132
 
60
- this.isAvailable = { available: false, error: 'Ollama not responding properly' };
61
- this.lastCheck = Date.now();
62
- return this.isAvailable;
63
- } catch (error) {
133
+ if (lastError) {
64
134
  let errorMessage;
65
135
  let hint = '';
136
+ const errorText = String(lastError.message || '');
137
+ const activeURL = attemptedURLs[attemptedURLs.length - 1] || this.preferredBaseURL;
66
138
 
67
- if (error.message.includes('ECONNREFUSED')) {
68
- errorMessage = `Ollama not running at ${this.baseURL}`;
139
+ if (errorText.includes('ECONNREFUSED')) {
140
+ errorMessage = `Ollama not running at ${activeURL}`;
69
141
  hint = 'Make sure Ollama is running. Try: ollama serve';
70
- } else if (error.message.includes('timeout') || error.name === 'AbortError') {
71
- errorMessage = `Ollama connection timeout at ${this.baseURL}`;
142
+ } else if (errorText.includes('timeout') || lastError.name === 'AbortError') {
143
+ errorMessage = `Ollama connection timeout at ${activeURL}`;
72
144
  hint = 'The server is not responding. Check if Ollama is running and accessible.';
73
- } else if (error.message.includes('ENOTFOUND')) {
74
- errorMessage = `Cannot resolve host: ${this.baseURL}`;
145
+ } else if (errorText.includes('ENOTFOUND')) {
146
+ errorMessage = `Cannot resolve host: ${activeURL}`;
75
147
  hint = 'Check your OLLAMA_HOST environment variable or network configuration.';
76
148
  } else {
77
- errorMessage = error.message;
149
+ errorMessage = errorText || 'Unknown Ollama availability error';
78
150
  }
79
151
 
80
152
  this.isAvailable = {
81
153
  available: false,
82
154
  error: errorMessage,
83
- hint: hint,
84
- attemptedURL: this.baseURL
155
+ hint,
156
+ attemptedURL: activeURL,
157
+ attemptedURLs
85
158
  };
86
159
  this.lastCheck = Date.now();
87
160
  return this.isAvailable;