llm-checker 3.5.6 → 3.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5059,10 +5059,16 @@ program
5059
5059
  console.log(` Tier: ${chalk.cyan(detector.getHardwareTier().replace('_', ' ').toUpperCase())}`);
5060
5060
  console.log(` Max model size: ${chalk.green(detector.getMaxModelSize() + 'GB')}`);
5061
5061
  console.log(` Best backend: ${chalk.cyan(hardware.summary.bestBackend)}`);
5062
+ if (hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend) {
5063
+ console.log(` Runtime assist: ${chalk.green(hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend)}`);
5064
+ }
5062
5065
  console.log(` Dedicated GPUs: ${chalk.green(formatGpuInventoryList(hardware.summary.dedicatedGpuModels))}`);
5063
5066
  console.log(` Integrated GPUs: ${chalk.hex('#FFA500')(formatGpuInventoryList(hardware.summary.integratedGpuModels))}`);
5064
5067
  if (hardware.summary.hasIntegratedGPU && hardware.summary.bestBackend === 'cpu') {
5065
- console.log(` Assist path: ${chalk.yellow('Integrated/shared-memory GPU detected, runtime remains CPU')}`);
5068
+ const assistMessage = hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend
5069
+ ? `Integrated/shared-memory GPU detected, runtime may use ${hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend} acceleration`
5070
+ : 'Integrated/shared-memory GPU detected, runtime remains CPU';
5071
+ console.log(` Assist path: ${chalk.yellow(assistMessage)}`);
5066
5072
  }
5067
5073
 
5068
5074
  // CPU
@@ -187,7 +187,7 @@ const ALLOWED_CLI_COMMANDS = new Set([
187
187
 
188
188
  const server = new McpServer({
189
189
  name: "llm-checker",
190
- version: "3.4.0",
190
+ version: "3.5.8",
191
191
  });
192
192
 
193
193
  // ============================================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.5.6",
3
+ "version": "3.5.8",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
6
  "llm-checker": "bin/cli.js",
@@ -353,13 +353,16 @@ class MultiObjectiveSelector {
353
353
  const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
354
354
  ? hardware.summary.hasIntegratedGPU
355
355
  : /iris xe|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(`${gpuModel} ${integratedGpuInventory}`);
356
+ const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
357
+ ? hardware.summary.hasDedicatedGPU
358
+ : Boolean(hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU));
356
359
  const platform = normalizePlatform(hardware?.os?.platform || process.platform);
357
360
  const isPC = !isAppleSilicon && (platform === 'win32' || platform === 'linux');
358
361
 
359
362
  // 1) Effective memory for model weights (45%) - Apple Silicon & PC optimized
360
363
  let effMem;
361
364
 
362
- if (vramGB > 0 && !unified) {
365
+ if (hasDedicatedGPU && vramGB > 0 && !unified) {
363
366
  // Dedicated GPU path (Windows/Linux with discrete GPU)
364
367
  if (isPC) {
365
368
  // PC-specific GPU memory calculation with offload support
@@ -454,9 +457,9 @@ class MultiObjectiveSelector {
454
457
  tier = bumpTier(tier, +1); // High-end GPU boost
455
458
  } else if (!vramGB && !unified) {
456
459
  tier = bumpTier(tier, -1); // CPU-only penalty (moderate)
457
- } else if (hasIntegratedGPU) {
460
+ } else if (hasIntegratedGPU && !hasDedicatedGPU) {
458
461
  tier = bumpTier(tier, -1); // iGPU penalty
459
- } else if (vramGB > 0 && vramGB < 6) {
462
+ } else if (hasDedicatedGPU && vramGB > 0 && vramGB < 6) {
460
463
  tier = bumpTier(tier, -1); // Low VRAM penalty
461
464
  }
462
465
 
@@ -752,13 +755,16 @@ class MultiObjectiveSelector {
752
755
  const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
753
756
  ? hardware.summary.hasIntegratedGPU
754
757
  : false;
758
+ const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
759
+ ? hardware.summary.hasDedicatedGPU
760
+ : Boolean(hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU));
755
761
 
756
762
  // Use improved CPU estimation function for more realistic and varying speeds
757
763
  const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
758
764
  (cpuModel.includes('13th') || cpuModel.includes('14th') || cpuModel.includes('12th'));
759
765
 
760
766
  // GPU-based calculation (dedicated GPU only)
761
- if (vramGB > 0 && !hasIntegratedGPU) {
767
+ if (hasDedicatedGPU && vramGB > 0) {
762
768
  let gpuTPS = 20; // Conservative GPU baseline
763
769
  if (gpuModel.toLowerCase().includes('gb10') ||
764
770
  gpuModel.toLowerCase().includes('grace blackwell') ||
@@ -4,7 +4,7 @@
4
4
  * Focuses on AVX2, AVX512, AMX, and other SIMD extensions
5
5
  */
6
6
 
7
- const { execSync } = require('child_process');
7
+ const childProcess = require('child_process');
8
8
  const os = require('os');
9
9
  const fs = require('fs');
10
10
  const { normalizePlatform } = require('../../utils/platform');
@@ -92,7 +92,7 @@ class CPUDetector {
92
92
 
93
93
  try {
94
94
  if (platform === 'darwin') {
95
- return parseInt(execSync('sysctl -n hw.physicalcpu', { encoding: 'utf8', timeout: 5000 }).trim());
95
+ return parseInt(childProcess.execSync('sysctl -n hw.physicalcpu', { encoding: 'utf8', timeout: 5000 }).trim());
96
96
  } else if (platform === 'linux') {
97
97
  const cpuInfo = fs.readFileSync('/proc/cpuinfo', 'utf8');
98
98
  const coreIds = new Set();
@@ -142,7 +142,37 @@ class CPUDetector {
142
142
  * Execute shell command with consistent options.
143
143
  */
144
144
  runCommand(command) {
145
- return execSync(command, { encoding: 'utf8', timeout: 5000 });
145
+ const baseOptions = {
146
+ encoding: 'utf8',
147
+ timeout: 5000
148
+ };
149
+
150
+ if (normalizePlatform() === 'win32') {
151
+ const result = childProcess.spawnSync(command, {
152
+ ...baseOptions,
153
+ shell: true,
154
+ stdio: ['ignore', 'pipe', 'pipe'],
155
+ windowsHide: true
156
+ });
157
+
158
+ if (result.error) {
159
+ throw result.error;
160
+ }
161
+
162
+ if (result.status !== 0) {
163
+ const stderr = String(result.stderr || '').trim();
164
+ const stdout = String(result.stdout || '').trim();
165
+ const error = new Error(stderr || stdout || `Command failed: ${command}`);
166
+ error.status = result.status;
167
+ error.stdout = result.stdout;
168
+ error.stderr = result.stderr;
169
+ throw error;
170
+ }
171
+
172
+ return result.stdout;
173
+ }
174
+
175
+ return childProcess.execSync(command, baseOptions);
146
176
  }
147
177
 
148
178
  /**
@@ -215,10 +245,10 @@ class CPUDetector {
215
245
 
216
246
  try {
217
247
  if (platform === 'darwin') {
218
- cache.l1d = parseInt(execSync('sysctl -n hw.l1dcachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
219
- cache.l1i = parseInt(execSync('sysctl -n hw.l1icachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
220
- cache.l2 = parseInt(execSync('sysctl -n hw.l2cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
221
- cache.l3 = parseInt(execSync('sysctl -n hw.l3cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
248
+ cache.l1d = parseInt(childProcess.execSync('sysctl -n hw.l1dcachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
249
+ cache.l1i = parseInt(childProcess.execSync('sysctl -n hw.l1icachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
250
+ cache.l2 = parseInt(childProcess.execSync('sysctl -n hw.l2cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
251
+ cache.l3 = parseInt(childProcess.execSync('sysctl -n hw.l3cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
222
252
  } else if (platform === 'linux') {
223
253
  // Parse from /sys/devices/system/cpu/cpu0/cache/
224
254
  const cachePath = '/sys/devices/system/cpu/cpu0/cache';
@@ -288,12 +318,12 @@ class CPUDetector {
288
318
  } else {
289
319
  // Intel Mac - check via sysctl
290
320
  try {
291
- const features = execSync('sysctl -n machdep.cpu.features', {
321
+ const features = childProcess.execSync('sysctl -n machdep.cpu.features', {
292
322
  encoding: 'utf8',
293
323
  timeout: 5000
294
324
  }).toLowerCase();
295
325
 
296
- const leafFeatures = execSync('sysctl -n machdep.cpu.leaf7_features', {
326
+ const leafFeatures = childProcess.execSync('sysctl -n machdep.cpu.leaf7_features', {
297
327
  encoding: 'utf8',
298
328
  timeout: 5000
299
329
  }).toLowerCase();
@@ -15,6 +15,10 @@ class CUDADetector {
15
15
  this.detectionMode = null;
16
16
  }
17
17
 
18
+ execCommand(command, options = {}) {
19
+ return execSync(command, options);
20
+ }
21
+
18
22
  /**
19
23
  * Check if CUDA is available
20
24
  */
@@ -43,7 +47,7 @@ class CUDADetector {
43
47
 
44
48
  hasNvidiaSMI() {
45
49
  try {
46
- execSync('nvidia-smi --version', {
50
+ this.execCommand('nvidia-smi --version', {
47
51
  encoding: 'utf8',
48
52
  timeout: 5000,
49
53
  stdio: ['pipe', 'pipe', 'pipe']
@@ -142,7 +146,7 @@ class CUDADetector {
142
146
  }
143
147
 
144
148
  try {
145
- execSync('nvcc --version', {
149
+ this.execCommand('nvcc --version', {
146
150
  encoding: 'utf8',
147
151
  timeout: 5000,
148
152
  stdio: ['pipe', 'pipe', 'pipe']
@@ -197,17 +201,18 @@ class CUDADetector {
197
201
 
198
202
  try {
199
203
  // Get driver and CUDA version
200
- const versionInfo = execSync('nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits', {
204
+ const versionInfo = this.execCommand('nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits', {
201
205
  encoding: 'utf8',
202
206
  timeout: 5000
203
207
  }).trim().split('\n')[0];
204
208
  result.driver = versionInfo;
205
209
 
206
- // Get CUDA version from nvidia-smi header
207
- const header = execSync('nvidia-smi | head -n 3', {
210
+ // Parse the nvidia-smi banner in JS so Windows does not require shell-only tools like `head`.
211
+ const banner = this.execCommand('nvidia-smi', {
208
212
  encoding: 'utf8',
209
213
  timeout: 5000
210
214
  });
215
+ const header = banner.split('\n').slice(0, 3).join('\n');
211
216
  const cudaMatch = header.match(/CUDA Version:\s*([\d.]+)/);
212
217
  if (cudaMatch) {
213
218
  result.cuda = cudaMatch[1];
@@ -237,7 +242,7 @@ class CUDADetector {
237
242
  'clocks.max.sm'
238
243
  ].join(',');
239
244
 
240
- const gpuData = execSync(
245
+ const gpuData = this.execCommand(
241
246
  `nvidia-smi --query-gpu=${query} --format=csv,noheader,nounits`,
242
247
  { encoding: 'utf8', timeout: 10000 }
243
248
  ).trim();
@@ -286,7 +291,7 @@ class CUDADetector {
286
291
  } catch (e) {
287
292
  // Fallback to simpler query
288
293
  try {
289
- const simpleQuery = execSync(
294
+ const simpleQuery = this.execCommand(
290
295
  'nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits',
291
296
  { encoding: 'utf8', timeout: 5000 }
292
297
  ).trim();
@@ -408,7 +413,7 @@ class CUDADetector {
408
413
  }
409
414
 
410
415
  try {
411
- const nvccVersion = execSync('nvcc --version', {
416
+ const nvccVersion = this.execCommand('nvcc --version', {
412
417
  encoding: 'utf8',
413
418
  timeout: 5000,
414
419
  stdio: ['pipe', 'pipe', 'pipe']
@@ -41,7 +41,7 @@ class HardwareDetector {
41
41
  const systemInfo = {
42
42
  cpu: this.processCPUInfo(cpu),
43
43
  memory: this.processMemoryInfo(memory),
44
- gpu: this.processGPUInfo(graphics),
44
+ gpu: this.processGPUInfo(graphics, memory),
45
45
  system: this.processSystemInfo(system),
46
46
  os: this.processOSInfo(osInfo),
47
47
  timestamp: Date.now()
@@ -97,7 +97,48 @@ class HardwareDetector {
97
97
  };
98
98
  }
99
99
 
100
- processGPUInfo(graphics) {
100
+ getSystemMemoryGB(memoryInfo) {
101
+ const totalBytes = Number(memoryInfo?.total || 0);
102
+ if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
103
+ return Math.max(1, Math.round(totalBytes / (1024 ** 3)));
104
+ }
105
+
106
+ estimateIntegratedSharedMemory(gpu, memoryInfo) {
107
+ const dedicatedAperture = this.normalizeVRAM(gpu?.vram || 0);
108
+ const explicitSharedCandidates = [
109
+ gpu?.memoryTotal,
110
+ gpu?.memory,
111
+ gpu?.sharedMemory,
112
+ gpu?.memoryShared,
113
+ gpu?.memory?.shared,
114
+ gpu?.memory?.total
115
+ ]
116
+ .map((value) => this.normalizeVRAM(value))
117
+ .filter((value) => value > dedicatedAperture);
118
+
119
+ if (explicitSharedCandidates.length > 0) {
120
+ return Math.max(...explicitSharedCandidates);
121
+ }
122
+
123
+ const totalSystemGB = this.getSystemMemoryGB(memoryInfo);
124
+ if (gpu?.vramDynamic || dedicatedAperture <= 2) {
125
+ return this.estimateSystemSharedMemory(totalSystemGB, dedicatedAperture);
126
+ }
127
+
128
+ return dedicatedAperture;
129
+ }
130
+
131
+ estimateSystemSharedMemory(totalSystemGB, fallbackGB = 0) {
132
+ const fallback = Math.max(0, Number(fallbackGB) || 0);
133
+ if (!Number.isFinite(totalSystemGB) || totalSystemGB <= 0) {
134
+ return fallback;
135
+ }
136
+
137
+ // Integrated GPUs typically expose roughly half of system RAM as a shared pool.
138
+ return Math.max(fallback, Math.min(Math.max(1, Math.round(totalSystemGB / 2)), 16));
139
+ }
140
+
141
+ processGPUInfo(graphics, memoryInfo = null) {
101
142
  const controllers = graphics.controllers || [];
102
143
  const displays = graphics.displays || [];
103
144
 
@@ -149,7 +190,12 @@ class HardwareDetector {
149
190
  }
150
191
 
151
192
  // Skip very generic/placeholder entries
152
- if (model.includes('standard vga') || model.includes('microsoft basic')) {
193
+ if (
194
+ model.includes('standard vga') ||
195
+ model.includes('microsoft basic') ||
196
+ model.includes('remote display adapter') ||
197
+ model.includes('basic render driver')
198
+ ) {
153
199
  return false;
154
200
  }
155
201
 
@@ -199,8 +245,14 @@ class HardwareDetector {
199
245
  enhancedModel = this.getGPUModelFromDeviceId(primaryGPU.deviceId) || enhancedModel;
200
246
  }
201
247
 
248
+ const primaryIsIntegrated = this.isIntegratedGPU(enhancedModel);
249
+ const normalizedPrimaryVRAM = this.normalizeVRAM(primaryGPU.vram || 0);
250
+ const estimatedSharedMemory = primaryIsIntegrated
251
+ ? this.estimateIntegratedSharedMemory(primaryGPU, memoryInfo)
252
+ : 0;
253
+
202
254
  // Enhanced VRAM detection using the new normalizeVRAM function
203
- let vram = this.normalizeVRAM(primaryGPU.vram || 0);
255
+ let vram = primaryIsIntegrated ? estimatedSharedMemory : normalizedPrimaryVRAM;
204
256
 
205
257
  // If VRAM is still 0, try to estimate based on model or handle unified memory
206
258
  if (vram === 0 && primaryGPU.model) {
@@ -227,24 +279,41 @@ class HardwareDetector {
227
279
 
228
280
  // If we have multiple dedicated GPUs, use the combined VRAM
229
281
  const effectiveVRAM = gpuCount > 1 ? totalDedicatedVRAM : vram;
282
+ const sharedMemory = primaryIsIntegrated ? effectiveVRAM : 0;
283
+ const dedicatedMemory = primaryIsIntegrated ? normalizedPrimaryVRAM : effectiveVRAM;
284
+ const scoredGPU = {
285
+ ...primaryGPU,
286
+ model: enhancedModel,
287
+ vram: effectiveVRAM,
288
+ sharedMemory,
289
+ dedicatedMemory
290
+ };
230
291
 
231
292
  return {
232
293
  model: enhancedModel,
233
294
  vendor: primaryGPU.vendor || this.inferVendorFromGPUModel(enhancedModel, 'Unknown'),
234
295
  vram: effectiveVRAM,
235
296
  vramPerGPU: vram, // VRAM of primary GPU for reference
297
+ sharedMemory,
298
+ dedicatedMemory,
236
299
  vramDynamic: primaryGPU.vramDynamic || false,
237
- dedicated: !this.isIntegratedGPU(enhancedModel),
300
+ dedicated: !primaryIsIntegrated,
238
301
  driverVersion: primaryGPU.driverVersion || 'Unknown',
239
302
  gpuCount: gpuCount > 0 ? gpuCount : (dedicatedGPUs.length > 0 ? dedicatedGPUs.length : 1),
240
303
  isMultiGPU: gpuCount > 1,
241
304
  all: normalizedControllers.map(gpu => ({
242
305
  model: gpu.model,
243
- vram: this.normalizeVRAM(gpu.vram || 0),
306
+ vram: this.isIntegratedGPU(gpu.model)
307
+ ? this.estimateIntegratedSharedMemory(gpu, memoryInfo)
308
+ : this.normalizeVRAM(gpu.vram || 0),
309
+ sharedMemory: this.isIntegratedGPU(gpu.model)
310
+ ? this.estimateIntegratedSharedMemory(gpu, memoryInfo)
311
+ : 0,
312
+ dedicatedMemory: this.normalizeVRAM(gpu.vram || 0),
244
313
  vendor: gpu.vendor || this.inferVendorFromGPUModel(gpu.model, 'Unknown')
245
314
  })),
246
315
  displays: displays.length,
247
- score: this.calculateGPUScore(primaryGPU)
316
+ score: this.calculateGPUScore(scoredGPU)
248
317
  };
249
318
  }
250
319
 
@@ -303,17 +372,40 @@ class HardwareDetector {
303
372
  const modelFromUnified = summary.gpuModel || fallbackModel || systemInfo.gpu.model;
304
373
  const vendor = this.inferVendorFromGPUModel(modelFromUnified, systemInfo.gpu.vendor);
305
374
  const isAppleUnified = primaryType === 'metal';
375
+ const integratedSharedMemory = Math.max(
376
+ Number(systemInfo.gpu.sharedMemory || 0),
377
+ ...(Array.isArray(unified.systemGpu?.gpus)
378
+ ? unified.systemGpu.gpus
379
+ .filter((gpu) => gpu?.type === 'integrated')
380
+ .map((gpu) => Number(gpu?.memory?.total || gpu?.memoryTotal || 0))
381
+ : [0])
382
+ );
306
383
 
307
384
  systemInfo.summary = {
308
- ...summary
385
+ ...summary,
386
+ integratedSharedMemory: typeof summary.integratedSharedMemory === 'number'
387
+ ? summary.integratedSharedMemory
388
+ : integratedSharedMemory
309
389
  };
310
390
 
311
391
  systemInfo.gpu = {
312
392
  ...systemInfo.gpu,
313
393
  model: modelFromUnified,
314
394
  vendor,
315
- vram: isAppleUnified ? systemInfo.gpu.vram : (hasDedicatedGPU ? (totalVRAM || systemInfo.gpu.vram) : 0),
316
- vramPerGPU: isAppleUnified ? (systemInfo.gpu.vramPerGPU || 0) : (perGPUVRAM || systemInfo.gpu.vramPerGPU || 0),
395
+ vram: isAppleUnified
396
+ ? systemInfo.gpu.vram
397
+ : (hasDedicatedGPU ? (totalVRAM || systemInfo.gpu.vram) : (integratedSharedMemory || systemInfo.gpu.vram || 0)),
398
+ vramPerGPU: isAppleUnified
399
+ ? (systemInfo.gpu.vramPerGPU || 0)
400
+ : (hasDedicatedGPU
401
+ ? (perGPUVRAM || systemInfo.gpu.vramPerGPU || 0)
402
+ : (integratedSharedMemory || systemInfo.gpu.vramPerGPU || systemInfo.gpu.vram || 0)),
403
+ sharedMemory: isAppleUnified
404
+ ? (systemInfo.gpu.sharedMemory || 0)
405
+ : (hasIntegratedGPU ? Math.max(integratedSharedMemory || 0, systemInfo.gpu.sharedMemory || 0) : 0),
406
+ dedicatedMemory: hasDedicatedGPU
407
+ ? (totalVRAM || systemInfo.gpu.dedicatedMemory || systemInfo.gpu.vram || 0)
408
+ : 0,
317
409
  dedicated: hasDedicatedGPU,
318
410
  gpuCount: summary.gpuCount || gpuCount,
319
411
  isMultiGPU: Boolean(summary.isMultiGPU || gpuCount > 1),
@@ -557,13 +649,16 @@ class HardwareDetector {
557
649
 
558
650
  let score = 0;
559
651
  const model = gpu.model.toLowerCase();
560
- const vram = gpu.vram || 0;
652
+ const integrated = this.isIntegratedGPU(gpu.model);
653
+ const vram = integrated
654
+ ? Math.min(gpu.sharedMemory || gpu.vram || 0, 2)
655
+ : (gpu.vram || 0);
561
656
 
562
657
 
563
658
  score += vram * 8;
564
659
 
565
660
 
566
- if (!this.isIntegratedGPU(gpu.model)) {
661
+ if (!integrated) {
567
662
  score += 20;
568
663
  }
569
664
 
@@ -586,6 +681,10 @@ class HardwareDetector {
586
681
  else if (model.includes('apple m')) score += 15;
587
682
  else if (model.includes('r9700') || model.includes('ai pro r9700')) score += 23;
588
683
 
684
+ if (integrated) {
685
+ return Math.min(Math.round(score), 45);
686
+ }
687
+
589
688
  return Math.min(Math.round(score), 100);
590
689
  }
591
690
 
@@ -216,6 +216,9 @@ class UnifiedDetector {
216
216
  const summary = {
217
217
  bestBackend: result.primary?.type || 'cpu',
218
218
  backendName: result.primary?.name || 'CPU',
219
+ runtimeBackend: result.primary?.type || 'cpu',
220
+ runtimeBackendName: result.primary?.name || 'CPU',
221
+ hasRuntimeAssist: false,
219
222
  totalVRAM: 0,
220
223
  effectiveMemory: 0,
221
224
  speedCoefficient: 0,
@@ -231,6 +234,7 @@ class UnifiedDetector {
231
234
  dedicatedGpuCount: 0,
232
235
  integratedGpuModels: [],
233
236
  dedicatedGpuModels: [],
237
+ integratedSharedMemory: 0,
234
238
  cpuModel: result.cpu?.brand || 'Unknown',
235
239
  systemRAM: require('os').totalmem() / (1024 ** 3)
236
240
  };
@@ -303,6 +307,7 @@ class UnifiedDetector {
303
307
  summary.dedicatedGpuCount = topology.dedicatedCount;
304
308
  summary.integratedGpuModels = topology.integratedModels;
305
309
  summary.dedicatedGpuModels = topology.dedicatedModels;
310
+ summary.integratedSharedMemory = topology.integratedSharedMemory;
306
311
  if (!summary.gpuModel) {
307
312
  summary.gpuModel = topology.primaryModel || null;
308
313
  }
@@ -314,6 +319,11 @@ class UnifiedDetector {
314
319
  }
315
320
  summary.hasHeterogeneousGPU = summary.hasHeterogeneousGPU || topology.isHeterogeneous;
316
321
 
322
+ const runtimeSelection = this.detectRuntimeAssistBackend(result, topology);
323
+ summary.runtimeBackend = runtimeSelection.backend;
324
+ summary.runtimeBackendName = runtimeSelection.name;
325
+ summary.hasRuntimeAssist = runtimeSelection.assisted;
326
+
317
327
  // Effective memory for LLM loading
318
328
  // For GPU: use VRAM; for CPU/Metal: use system RAM
319
329
  if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
@@ -355,6 +365,9 @@ class UnifiedDetector {
355
365
  models,
356
366
  integratedModels,
357
367
  dedicatedModels,
368
+ integratedSharedMemory: normalized
369
+ .filter((gpu) => gpu.type === 'integrated')
370
+ .reduce((max, gpu) => Math.max(max, Number(gpu?.memory?.total || 0)), 0),
358
371
  integratedCount: normalized.filter((gpu) => gpu.type === 'integrated').length,
359
372
  dedicatedCount: normalized.filter((gpu) => gpu.type === 'dedicated').length,
360
373
  hasIntegratedGPU: normalized.some((gpu) => gpu.type === 'integrated'),
@@ -393,6 +406,7 @@ class UnifiedDetector {
393
406
  .map((gpu) => {
394
407
  const name = String(gpu?.name || gpu?.model || '').replace(/\s+/g, ' ').trim();
395
408
  if (!name) return null;
409
+ if (this.isRemoteDisplayModel(name)) return null;
396
410
 
397
411
  let type = gpu?.type;
398
412
  if (type !== 'integrated' && type !== 'dedicated') {
@@ -412,6 +426,101 @@ class UnifiedDetector {
412
426
  .filter(Boolean);
413
427
  }
414
428
 
429
+ isRemoteDisplayModel(model) {
430
+ const lower = String(model || '').toLowerCase();
431
+ if (!lower) return false;
432
+
433
+ return (
434
+ lower.includes('microsoft remote display adapter') ||
435
+ lower.includes('remote display adapter') ||
436
+ lower.includes('basic render driver')
437
+ );
438
+ }
439
+
440
+ inferGpuVendor(name) {
441
+ const lower = String(name || '').toLowerCase();
442
+ if (!lower) return 'unknown';
443
+ if (lower.includes('nvidia') || lower.includes('geforce') || lower.includes('rtx') || lower.includes('gtx')) return 'nvidia';
444
+ if (lower.includes('amd') || lower.includes('ati') || lower.includes('radeon')) return 'amd';
445
+ if (lower.includes('intel') || lower.includes('iris') || lower.includes('uhd') || lower.includes('arc')) return 'intel';
446
+ if (lower.includes('apple')) return 'apple';
447
+ return 'unknown';
448
+ }
449
+
450
+ detectRuntimeAssistBackend(result, topology = {}) {
451
+ const primaryType = result?.primary?.type || 'cpu';
452
+ const primaryName = result?.primary?.name || 'CPU';
453
+
454
+ if (primaryType !== 'cpu') {
455
+ return {
456
+ backend: primaryType,
457
+ name: primaryName,
458
+ assisted: false
459
+ };
460
+ }
461
+
462
+ const platform = result?.platform || result?.os?.platform || normalizePlatform();
463
+ const integratedModels = Array.isArray(topology.integratedModels) ? topology.integratedModels : [];
464
+ const integratedVendors = integratedModels.map((gpu) => this.inferGpuVendor(gpu.name));
465
+
466
+ const hasWindowsIntegratedGpu = platform === 'win32' && integratedModels.length > 0;
467
+ const hasKnownIntegratedVendor = integratedVendors.some((vendor) => ['amd', 'intel', 'nvidia'].includes(vendor));
468
+
469
+ if (hasWindowsIntegratedGpu && hasKnownIntegratedVendor) {
470
+ return {
471
+ backend: 'vulkan',
472
+ name: 'Vulkan',
473
+ assisted: true
474
+ };
475
+ }
476
+
477
+ return {
478
+ backend: primaryType,
479
+ name: primaryName,
480
+ assisted: false
481
+ };
482
+ }
483
+
484
+ getSystemMemoryGB(memoryInfo) {
485
+ const totalBytes = Number(memoryInfo?.total || 0);
486
+ if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
487
+ return Math.max(1, Math.round(totalBytes / (1024 ** 3)));
488
+ }
489
+
490
+ estimateSystemSharedMemory(totalSystemGB, fallbackGB = 0) {
491
+ const fallback = Math.max(0, Number(fallbackGB) || 0);
492
+ if (!Number.isFinite(totalSystemGB) || totalSystemGB <= 0) {
493
+ return fallback;
494
+ }
495
+
496
+ return Math.max(fallback, Math.min(Math.max(1, Math.round(totalSystemGB / 2)), 16));
497
+ }
498
+
499
+ estimateIntegratedFallbackMemory(controller, memoryInfo) {
500
+ const dedicatedAperture = this.normalizeFallbackVRAM(controller?.vram || 0);
501
+ const explicitSharedCandidates = [
502
+ controller?.memoryTotal,
503
+ controller?.memory,
504
+ controller?.sharedMemory,
505
+ controller?.memoryShared,
506
+ controller?.memory?.shared,
507
+ controller?.memory?.total
508
+ ]
509
+ .map((value) => this.normalizeFallbackVRAM(value))
510
+ .filter((value) => value > dedicatedAperture);
511
+
512
+ if (explicitSharedCandidates.length > 0) {
513
+ return Math.max(...explicitSharedCandidates);
514
+ }
515
+
516
+ const totalSystemGB = this.getSystemMemoryGB(memoryInfo);
517
+ if (controller?.vramDynamic || dedicatedAperture <= 2) {
518
+ return this.estimateSystemSharedMemory(totalSystemGB, dedicatedAperture);
519
+ }
520
+
521
+ return dedicatedAperture;
522
+ }
523
+
415
524
  mergeGpuInventories(...gpuLists) {
416
525
  const normalizedLists = gpuLists.map((list) => this.normalizeGpuInventory(list));
417
526
  const primaryIndex = normalizedLists.findIndex((list) => list.length > 0);
@@ -433,6 +542,7 @@ class UnifiedDetector {
433
542
 
434
543
  async detectSystemGpuFallback() {
435
544
  const graphics = await si.graphics();
545
+ const memoryInfo = await si.mem().catch(() => null);
436
546
  const controllers = Array.isArray(graphics?.controllers) ? graphics.controllers : [];
437
547
 
438
548
  if (controllers.length === 0) {
@@ -450,12 +560,15 @@ class UnifiedDetector {
450
560
  .map((controller) => {
451
561
  const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
452
562
  if (!name || name.toLowerCase() === 'unknown') return null;
563
+ if (this.isRemoteDisplayModel(name)) return null;
453
564
 
454
565
  const nameLower = name.toLowerCase();
455
566
  if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
456
567
 
457
568
  const isIntegrated = this.isIntegratedGPUModel(name);
458
- let vram = this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
569
+ let vram = isIntegrated
570
+ ? this.estimateIntegratedFallbackMemory(controller, memoryInfo)
571
+ : this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
459
572
 
460
573
  // For dedicated cards, estimate VRAM from model if runtime did not report memory.
461
574
  if (!isIntegrated && vram === 0) {
@@ -819,13 +932,19 @@ class UnifiedDetector {
819
932
  return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
820
933
  }
821
934
  else {
935
+ const runtimeAssistSuffix = summary.hasRuntimeAssist && summary.runtimeBackend !== summary.bestBackend
936
+ ? `${summary.runtimeBackendName || summary.runtimeBackend} assist`
937
+ : 'CPU backend';
822
938
  if (summary.gpuModel && summary.hasIntegratedGPU && !summary.hasDedicatedGPU) {
823
939
  const gpuDesc = summary.gpuInventory || summary.gpuModel;
824
- return `${gpuDesc} (integrated/shared memory, CPU backend) + ${summary.cpuModel}`;
940
+ if (summary.integratedSharedMemory > 0) {
941
+ return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
942
+ }
943
+ return `${gpuDesc} (integrated/shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
825
944
  }
826
945
  if (summary.gpuModel && summary.gpuCount > 0) {
827
946
  const gpuDesc = summary.gpuInventory || summary.gpuModel;
828
- return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, CPU backend) + ${summary.cpuModel}`;
947
+ return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
829
948
  }
830
949
  return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
831
950
  }
package/src/index.js CHANGED
@@ -1373,6 +1373,15 @@ class LLMChecker {
1373
1373
  // Detect PC platform (Windows/Linux)
1374
1374
  const normalizedPlatform = normalizePlatform(hardware.os?.platform || process.platform);
1375
1375
  const isPC = !isAppleSilicon && (normalizedPlatform === 'win32' || normalizedPlatform === 'linux');
1376
+ const integratedGpuInventory = Array.isArray(hardware.summary?.integratedGpuModels)
1377
+ ? hardware.summary.integratedGpuModels.map(({ name }) => name).join(' ')
1378
+ : '';
1379
+ const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
1380
+ ? hardware.summary.hasIntegratedGPU
1381
+ : /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics|intel.*integrated|integrated/i.test(`${gpuModel} ${integratedGpuInventory}`);
1382
+ const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
1383
+ ? (!unified && hardware.summary.hasDedicatedGPU)
1384
+ : Boolean(!unified && (hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU)));
1376
1385
  const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
1377
1386
  (cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
1378
1387
  const hasAVX2 = cpuModel.toLowerCase().includes('intel') ||
@@ -1381,7 +1390,7 @@ class LLMChecker {
1381
1390
  // 1) Capacidad efectiva para pesos del modelo (45%)
1382
1391
  let effMem;
1383
1392
 
1384
- if (vramGB > 0 && !unified) {
1393
+ if (hasDedicatedGPU && vramGB > 0 && !unified) {
1385
1394
  // Dedicated GPU path (Windows/Linux with discrete GPU)
1386
1395
  if (isPC) {
1387
1396
  // PC-specific GPU memory calculation with offload support
@@ -1469,16 +1478,6 @@ class LLMChecker {
1469
1478
  score >= 35 ? 'medium' : // 35-54 for mid-range systems
1470
1479
  score >= 20 ? 'low' : 'ultra_low'; // 20-34 for budget systems
1471
1480
 
1472
- const integratedGpuInventory = Array.isArray(hardware.summary?.integratedGpuModels)
1473
- ? hardware.summary.integratedGpuModels.map(({ name }) => name).join(' ')
1474
- : '';
1475
- const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
1476
- ? hardware.summary.hasIntegratedGPU
1477
- : /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics|intel.*integrated|integrated/i.test(`${gpuModel} ${integratedGpuInventory}`);
1478
- const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
1479
- ? (!unified && hardware.summary.hasDedicatedGPU)
1480
- : (vramGB > 0 && !hasIntegratedGPU && !unified);
1481
-
1482
1481
  // Debug logging for tier calculation
1483
1482
  if (process.env.DEBUG_TIER) {
1484
1483
  console.log(`GPU Model: "${gpuModel}"`);
@@ -1508,10 +1507,10 @@ class LLMChecker {
1508
1507
  } else if (!vramGB && !unified) {
1509
1508
  // Windows/Linux CPU-only - significativa limitación pero no extrema
1510
1509
  tier = this.bumpTier(tier, -1);
1511
- } else if (hasIntegratedGPU) {
1510
+ } else if (hasIntegratedGPU && !hasDedicatedGPU) {
1512
1511
  // iGPU - limitada pero algo mejor que CPU puro
1513
1512
  tier = this.bumpTier(tier, -1);
1514
- } else if (vramGB > 0 && vramGB < 6) {
1513
+ } else if (hasDedicatedGPU && vramGB > 0 && vramGB < 6) {
1515
1514
  // GPU dedicada con poca VRAM (GTX 1060, etc.)
1516
1515
  tier = this.bumpTier(tier, -1);
1517
1516
  }
@@ -751,40 +751,26 @@ class DeterministicModelSelector {
751
751
 
752
752
  return variants.map((variant) => {
753
753
  const variantTag = variant.tag || fallbackTag;
754
- const paramsB = this.extractParamsFromString(
755
- variant.size,
756
- variantTag,
757
- ollamaModel.main_size,
758
- ollamaModel.model_identifier
759
- );
754
+ const quant = this.resolveVariantQuantization(variant, variantTag);
755
+ const paramsB = this.resolveVariantParamsB(ollamaModel, variant, quant);
760
756
  const moeMetadata = this.extractMoEMetadata(ollamaModel, variant, paramsB, baseText);
761
- const quant = this.normalizeQuantization(
762
- variant.quantization ||
763
- this.extractQuantizationFromTag(variantTag) ||
764
- 'Q4_K_M'
765
- );
766
757
 
767
758
  const variantSizeGB = this.extractVariantSizeGB(variant, paramsB);
768
759
  const modalities = this.inferModalities(ollamaModel, variantTag);
769
760
  const modelTags = this.inferTagsForVariant(derivedTags, variant, variantTag);
770
761
  const sizeByQuant = {};
762
+ const variantIsCloud = this.isCloudVariantTag(variantTag);
771
763
 
772
764
  for (const sibling of variants) {
773
- const siblingParams = this.extractParamsFromString(
774
- sibling.size,
775
- sibling.tag,
776
- ollamaModel.main_size,
777
- ollamaModel.model_identifier
778
- );
765
+ const siblingTag = sibling.tag || fallbackTag;
766
+ if (this.isCloudVariantTag(siblingTag) !== variantIsCloud) continue;
767
+
768
+ const siblingQuant = this.resolveVariantQuantization(sibling, siblingTag);
769
+ const siblingParams = this.resolveVariantParamsB(ollamaModel, sibling, siblingQuant);
779
770
 
780
771
  // Keep quantization map parameter-aware: don't blend 8B/70B/405B sizes.
781
772
  if (Math.abs(siblingParams - paramsB) > 0.25) continue;
782
773
 
783
- const siblingQuant = this.normalizeQuantization(
784
- sibling.quantization ||
785
- this.extractQuantizationFromTag(sibling.tag || '') ||
786
- quant
787
- );
788
774
  const siblingSize = this.extractVariantSizeGB(sibling, siblingParams);
789
775
  if (!Number.isFinite(sizeByQuant[siblingQuant]) || siblingSize < sizeByQuant[siblingQuant]) {
790
776
  sizeByQuant[siblingQuant] = siblingSize;
@@ -1003,17 +989,130 @@ class DeterministicModelSelector {
1003
989
  }
1004
990
 
1005
991
  extractParamsFromString(...values) {
1006
- for (const value of values) {
1007
- if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
1008
- return value;
992
+ const candidates = this.extractParameterCandidates(...values);
993
+ return candidates.length > 0 ? candidates[0] : null;
994
+ }
995
+
996
+ extractParameterCandidates(...values) {
997
+ const candidates = [];
998
+ const seen = new Set();
999
+
1000
+ const pushCandidate = (value) => {
1001
+ if (!Number.isFinite(value) || value <= 0) return;
1002
+ const rounded = Math.round(value * 1000) / 1000;
1003
+ const key = String(rounded);
1004
+ if (seen.has(key)) return;
1005
+ seen.add(key);
1006
+ candidates.push(rounded);
1007
+ };
1008
+
1009
+ const visit = (value) => {
1010
+ if (typeof value === 'number') {
1011
+ pushCandidate(value);
1012
+ return;
1013
+ }
1014
+
1015
+ if (Array.isArray(value)) {
1016
+ value.forEach(visit);
1017
+ return;
1018
+ }
1019
+
1020
+ if (value && typeof value === 'object') {
1021
+ Object.values(value).forEach(visit);
1022
+ return;
1023
+ }
1024
+
1025
+ if (typeof value !== 'string') return;
1026
+
1027
+ const regex = /(\d+\.?\d*)\s*([BbMm])/g;
1028
+ for (const match of value.matchAll(regex)) {
1029
+ const amount = parseFloat(match[1]);
1030
+ const unit = match[2].toUpperCase();
1031
+ pushCandidate(unit === 'M' ? amount / 1000 : amount);
1009
1032
  }
1010
- if (typeof value !== 'string') continue;
1033
+ };
1034
+
1035
+ values.forEach(visit);
1036
+ return candidates;
1037
+ }
1038
+
1039
+ extractArtifactSizeGBFromValue(value) {
1040
+ if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
1041
+ return value;
1042
+ }
1043
+ if (typeof value !== 'string') return null;
1044
+
1045
+ const match = value.match(/(\d+\.?\d*)\s*g(?:i)?b\b/i);
1046
+ if (!match) return null;
1047
+ return parseFloat(match[1]);
1048
+ }
1049
+
1050
+ inferParamsFromArtifactSizeGB(sizeGB, quant = 'Q4_K_M') {
1051
+ const normalizedQuant = this.normalizeQuantization(quant);
1052
+ const bytesPerParam = {
1053
+ 'Q8_0': 1.05,
1054
+ 'Q6_K': 0.80,
1055
+ 'Q5_K_M': 0.68,
1056
+ 'Q4_K_M': 0.58,
1057
+ 'Q3_K': 0.48,
1058
+ 'Q2_K': 0.37
1059
+ };
1060
+ const bpp = bytesPerParam[normalizedQuant] || 0.58;
1061
+ const inferred = sizeGB / bpp;
1062
+ return Math.max(0.5, Math.round(inferred * 2) / 2);
1063
+ }
1064
+
1065
+ isCloudVariantTag(tag = '') {
1066
+ return /:cloud$/i.test(String(tag).trim());
1067
+ }
1068
+
1069
+ resolveVariantQuantization(variant = {}, variantTag = '') {
1070
+ const tagQuant = this.extractQuantizationFromTag(variantTag);
1071
+ if (tagQuant) {
1072
+ return this.normalizeQuantization(tagQuant);
1073
+ }
1074
+
1075
+ return this.normalizeQuantization(
1076
+ variant.quantization ||
1077
+ variant.quant ||
1078
+ 'Q4_K_M'
1079
+ );
1080
+ }
1011
1081
 
1012
- const match = value.match(/(\d+\.?\d*)\s*([BbMm])/);
1013
- if (!match) continue;
1014
- const n = parseFloat(match[1]);
1015
- const unit = match[2].toUpperCase();
1016
- return unit === 'M' ? n / 1000 : n;
1082
+ resolveVariantParamsB(ollamaModel = {}, variant = {}, quant = 'Q4_K_M') {
1083
+ const explicitParams = this.extractParamsFromString(
1084
+ variant.size,
1085
+ variant.tag,
1086
+ variant.label,
1087
+ variant.name,
1088
+ ollamaModel.model_identifier,
1089
+ ollamaModel.model_name,
1090
+ ollamaModel.parameter_size,
1091
+ ollamaModel.parameter_count,
1092
+ ollamaModel.parameters
1093
+ );
1094
+ if (Number.isFinite(explicitParams) && explicitParams > 0) {
1095
+ return explicitParams;
1096
+ }
1097
+
1098
+ const metadataCandidates = this.extractParameterCandidates(
1099
+ ollamaModel.model_sizes,
1100
+ ollamaModel.parameters,
1101
+ ollamaModel.parameter_size,
1102
+ ollamaModel.parameter_count
1103
+ );
1104
+ if (metadataCandidates.length > 0) {
1105
+ return Math.max(...metadataCandidates);
1106
+ }
1107
+
1108
+ const artifactSizeGB = this.extractVariantSizeGB(variant, null);
1109
+ if (!this.isCloudVariantTag(variant.tag) && Number.isFinite(artifactSizeGB) && artifactSizeGB > 0) {
1110
+ return this.inferParamsFromArtifactSizeGB(artifactSizeGB, quant);
1111
+ }
1112
+
1113
+ const modelArtifactSizeGB = this.extractArtifactSizeGBFromValue(ollamaModel.main_size);
1114
+ if (Number.isFinite(modelArtifactSizeGB) && modelArtifactSizeGB > 0) {
1115
+ return this.inferParamsFromArtifactSizeGB(modelArtifactSizeGB, quant);
1017
1116
  }
1018
1117
 
1019
1118
  return 7;
@@ -1038,6 +1137,7 @@ class DeterministicModelSelector {
1038
1137
  extractVariantSizeGB(variant, paramsB) {
1039
1138
  const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? NaN);
1040
1139
  if (Number.isFinite(candidate) && candidate > 0) return candidate;
1140
+ if (!Number.isFinite(paramsB) || paramsB <= 0) return 0.5;
1041
1141
  return Math.max(0.5, Math.round((paramsB * 0.58 + 0.5) * 10) / 10);
1042
1142
  }
1043
1143
 
@@ -2049,6 +2149,20 @@ class DeterministicModelSelector {
2049
2149
  }
2050
2150
 
2051
2151
  mapHardwareTier(hardware = {}) {
2152
+ const summary = hardware?.summary || {};
2153
+ const effectiveMemory = Number(summary.effectiveMemory);
2154
+ const speedCoefficient = Number(summary.speedCoefficient);
2155
+ if (Number.isFinite(effectiveMemory) && effectiveMemory > 0 && Number.isFinite(speedCoefficient)) {
2156
+ if (effectiveMemory >= 80 && speedCoefficient >= 300) return 'ultra_high';
2157
+ if (effectiveMemory >= 48 && speedCoefficient >= 200) return 'very_high';
2158
+ if (effectiveMemory >= 24 && speedCoefficient >= 150) return 'high';
2159
+ if (effectiveMemory >= 16 && speedCoefficient >= 100) return 'medium_high';
2160
+ if (effectiveMemory >= 12 && speedCoefficient >= 80) return 'medium';
2161
+ if (effectiveMemory >= 8 && speedCoefficient >= 50) return 'medium_low';
2162
+ if (effectiveMemory >= 6 && speedCoefficient >= 30) return 'low';
2163
+ return 'ultra_low';
2164
+ }
2165
+
2052
2166
  let ram, cores;
2053
2167
 
2054
2168
  if (hardware?.memory?.totalGB) {
@@ -98,7 +98,8 @@ class IntelligentSelector {
98
98
  description: this.detector.getHardwareDescription(),
99
99
  tier: this.detector.getHardwareTier(),
100
100
  maxSize: this.detector.getMaxModelSize(),
101
- backend: hardware.summary.bestBackend
101
+ backend: hardware.summary.bestBackend,
102
+ runtimeBackend: hardware.summary.runtimeBackend || hardware.summary.bestBackend
102
103
  },
103
104
  policy: {
104
105
  mode: policyEngine.getMode(),
@@ -163,7 +164,7 @@ class IntelligentSelector {
163
164
 
164
165
  const context = {
165
166
  backend: summary.bestBackend || null,
166
- runtimeBackend: summary.bestBackend || null,
167
+ runtimeBackend: summary.runtimeBackend || summary.bestBackend || null,
167
168
  ramGB: systemRAM,
168
169
  totalRamGB: systemRAM,
169
170
  hardware
@@ -4,13 +4,10 @@ class OllamaClient {
4
4
  constructor(baseURL = null) {
5
5
  // Support OLLAMA_HOST environment variable (standard Ollama configuration)
6
6
  // Also support OLLAMA_URL for backwards compatibility
7
- this.baseURL = baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434';
8
-
9
- // Normalize URL: ensure it has protocol and remove trailing slash
10
- if (!this.baseURL.startsWith('http://') && !this.baseURL.startsWith('https://')) {
11
- this.baseURL = 'http://' + this.baseURL;
12
- }
13
- this.baseURL = this.baseURL.replace(/\/$/, '');
7
+ this.preferredBaseURL = this.normalizeBaseURL(
8
+ baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434'
9
+ );
10
+ this.baseURL = this.preferredBaseURL;
14
11
 
15
12
  this.isAvailable = null;
16
13
  this.lastCheck = 0;
@@ -18,6 +15,53 @@ class OllamaClient {
18
15
  this._pendingCheck = null;
19
16
  }
20
17
 
18
+ normalizeBaseURL(baseURL) {
19
+ let normalized = String(baseURL || '').trim();
20
+ if (!normalized.startsWith('http://') && !normalized.startsWith('https://')) {
21
+ normalized = 'http://' + normalized;
22
+ }
23
+ return normalized.replace(/\/$/, '');
24
+ }
25
+
26
+ buildCandidateBaseURLs(baseURL = this.preferredBaseURL) {
27
+ const normalized = this.normalizeBaseURL(baseURL);
28
+ const candidates = [normalized];
29
+
30
+ try {
31
+ const parsed = new URL(normalized);
32
+ if (parsed.hostname === 'localhost') {
33
+ const ipv4 = new URL(parsed.toString());
34
+ ipv4.hostname = '127.0.0.1';
35
+ candidates.push(ipv4.toString().replace(/\/$/, ''));
36
+
37
+ const ipv6 = new URL(parsed.toString());
38
+ ipv6.hostname = '::1';
39
+ candidates.push(ipv6.toString().replace(/\/$/, ''));
40
+ }
41
+ } catch (error) {
42
+ // Keep the preferred URL only if parsing fails.
43
+ }
44
+
45
+ return [...new Set(candidates)];
46
+ }
47
+
48
+ applyResolvedBaseURL(baseURL) {
49
+ this.baseURL = this.normalizeBaseURL(baseURL);
50
+ return this.baseURL;
51
+ }
52
+
53
+ isRetryableAvailabilityError(error) {
54
+ const message = String(error?.message || '').toLowerCase();
55
+ return (
56
+ message.includes('econnrefused') ||
57
+ message.includes('fetch failed') ||
58
+ message.includes('network') ||
59
+ message.includes('socket') ||
60
+ message.includes('connect') ||
61
+ error?.name === 'AbortError'
62
+ );
63
+ }
64
+
21
65
  async checkOllamaAvailability() {
22
66
 
23
67
  if (this.isAvailable !== null && Date.now() - this.lastCheck < this.cacheTimeout) {
@@ -38,50 +82,79 @@ class OllamaClient {
38
82
  }
39
83
 
40
84
  async _doAvailabilityCheck() {
85
+ const candidateURLs = this.buildCandidateBaseURLs();
86
+ const attemptedURLs = [];
87
+ let lastError = null;
41
88
 
42
- try {
43
- const controller = new AbortController();
44
- const timeoutId = setTimeout(() => controller.abort(), 5000);
45
-
46
- const response = await fetch(`${this.baseURL}/api/version`, {
47
- signal: controller.signal,
48
- headers: { 'Content-Type': 'application/json' }
49
- });
50
-
51
- clearTimeout(timeoutId);
89
+ for (let index = 0; index < candidateURLs.length; index += 1) {
90
+ const candidateBaseURL = candidateURLs[index];
91
+ attemptedURLs.push(candidateBaseURL);
92
+
93
+ try {
94
+ const controller = new AbortController();
95
+ const timeoutId = setTimeout(() => controller.abort(), 5000);
96
+
97
+ const response = await fetch(`${candidateBaseURL}/api/version`, {
98
+ signal: controller.signal,
99
+ headers: { 'Content-Type': 'application/json' }
100
+ });
101
+
102
+ clearTimeout(timeoutId);
103
+
104
+ if (!response.ok) {
105
+ this.isAvailable = {
106
+ available: false,
107
+ error: 'Ollama not responding properly',
108
+ attemptedURL: candidateBaseURL,
109
+ attemptedURLs
110
+ };
111
+ this.lastCheck = Date.now();
112
+ return this.isAvailable;
113
+ }
52
114
 
53
- if (response.ok) {
54
115
  const data = await response.json();
55
- this.isAvailable = { available: true, version: data.version || 'unknown' };
116
+ this.applyResolvedBaseURL(candidateBaseURL);
117
+ this.isAvailable = {
118
+ available: true,
119
+ version: data.version || 'unknown',
120
+ attemptedURL: candidateBaseURL,
121
+ attemptedURLs
122
+ };
56
123
  this.lastCheck = Date.now();
57
124
  return this.isAvailable;
125
+ } catch (error) {
126
+ lastError = error;
127
+ if (!this.isRetryableAvailabilityError(error) || index === candidateURLs.length - 1) {
128
+ break;
129
+ }
58
130
  }
131
+ }
59
132
 
60
- this.isAvailable = { available: false, error: 'Ollama not responding properly' };
61
- this.lastCheck = Date.now();
62
- return this.isAvailable;
63
- } catch (error) {
133
+ if (lastError) {
64
134
  let errorMessage;
65
135
  let hint = '';
136
+ const errorText = String(lastError.message || '');
137
+ const activeURL = attemptedURLs[attemptedURLs.length - 1] || this.preferredBaseURL;
66
138
 
67
- if (error.message.includes('ECONNREFUSED')) {
68
- errorMessage = `Ollama not running at ${this.baseURL}`;
139
+ if (errorText.includes('ECONNREFUSED')) {
140
+ errorMessage = `Ollama not running at ${activeURL}`;
69
141
  hint = 'Make sure Ollama is running. Try: ollama serve';
70
- } else if (error.message.includes('timeout') || error.name === 'AbortError') {
71
- errorMessage = `Ollama connection timeout at ${this.baseURL}`;
142
+ } else if (errorText.includes('timeout') || lastError.name === 'AbortError') {
143
+ errorMessage = `Ollama connection timeout at ${activeURL}`;
72
144
  hint = 'The server is not responding. Check if Ollama is running and accessible.';
73
- } else if (error.message.includes('ENOTFOUND')) {
74
- errorMessage = `Cannot resolve host: ${this.baseURL}`;
145
+ } else if (errorText.includes('ENOTFOUND')) {
146
+ errorMessage = `Cannot resolve host: ${activeURL}`;
75
147
  hint = 'Check your OLLAMA_HOST environment variable or network configuration.';
76
148
  } else {
77
- errorMessage = error.message;
149
+ errorMessage = errorText || 'Unknown Ollama availability error';
78
150
  }
79
151
 
80
152
  this.isAvailable = {
81
153
  available: false,
82
154
  error: errorMessage,
83
- hint: hint,
84
- attemptedURL: this.baseURL
155
+ hint,
156
+ attemptedURL: activeURL,
157
+ attemptedURLs
85
158
  };
86
159
  this.lastCheck = Date.now();
87
160
  return this.isAvailable;