npm - llm-checker - Versions diffs - 3.5.6 → 3.5.8 - Mend

llm-checker 3.5.6 → 3.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/bin/enhanced_cli.js +7 -1
package/bin/mcp-server.mjs +1 -1
package/package.json +1 -1
package/src/ai/multi-objective-selector.js +10 -4
package/src/hardware/backends/cpu-detector.js +39 -9
package/src/hardware/backends/cuda-detector.js +13 -8
package/src/hardware/detector.js +111 -12
package/src/hardware/unified-detector.js +122 -3
package/src/index.js +12 -13
package/src/models/deterministic-selector.js +145 -31
package/src/models/intelligent-selector.js +3 -2
package/src/ollama/client.js +105 -32

package/bin/enhanced_cli.js CHANGED Viewed

@@ -5059,10 +5059,16 @@ program
             console.log(`  Tier: ${chalk.cyan(detector.getHardwareTier().replace('_', ' ').toUpperCase())}`);
             console.log(`  Max model size: ${chalk.green(detector.getMaxModelSize() + 'GB')}`);
             console.log(`  Best backend: ${chalk.cyan(hardware.summary.bestBackend)}`);
+            if (hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend) {
+                console.log(`  Runtime assist: ${chalk.green(hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend)}`);
+            }
             console.log(`  Dedicated GPUs: ${chalk.green(formatGpuInventoryList(hardware.summary.dedicatedGpuModels))}`);
             console.log(`  Integrated GPUs: ${chalk.hex('#FFA500')(formatGpuInventoryList(hardware.summary.integratedGpuModels))}`);
             if (hardware.summary.hasIntegratedGPU && hardware.summary.bestBackend === 'cpu') {
-                console.log(`  Assist path: ${chalk.yellow('Integrated/shared-memory GPU detected, runtime remains CPU')}`);
+                const assistMessage = hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend
+                    ? `Integrated/shared-memory GPU detected, runtime may use ${hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend} acceleration`
+                    : 'Integrated/shared-memory GPU detected, runtime remains CPU';
+                console.log(`  Assist path: ${chalk.yellow(assistMessage)}`);
             }
             // CPU

package/bin/mcp-server.mjs CHANGED Viewed

@@ -187,7 +187,7 @@ const ALLOWED_CLI_COMMANDS = new Set([
 const server = new McpServer({
   name: "llm-checker",
-  version: "3.4.0",
+  version: "3.5.8",
 });
 // ============================================================================

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "llm-checker",
-  "version": "3.5.6",
+  "version": "3.5.8",
   "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
   "bin": {
     "llm-checker": "bin/cli.js",

package/src/ai/multi-objective-selector.js CHANGED Viewed

@@ -353,13 +353,16 @@ class MultiObjectiveSelector {
         const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
             ? hardware.summary.hasIntegratedGPU
             : /iris xe|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(`${gpuModel} ${integratedGpuInventory}`);
+        const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
+            ? hardware.summary.hasDedicatedGPU
+            : Boolean(hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU));
         const platform = normalizePlatform(hardware?.os?.platform || process.platform);
         const isPC = !isAppleSilicon && (platform === 'win32' || platform === 'linux');
         // 1) Effective memory for model weights (45%) - Apple Silicon & PC optimized
         let effMem;
-        if (vramGB > 0 && !unified) {
+        if (hasDedicatedGPU && vramGB > 0 && !unified) {
             // Dedicated GPU path (Windows/Linux with discrete GPU)
             if (isPC) {
                 // PC-specific GPU memory calculation with offload support
@@ -454,9 +457,9 @@ class MultiObjectiveSelector {
             tier = bumpTier(tier, +1);  // High-end GPU boost
         } else if (!vramGB && !unified) {
             tier = bumpTier(tier, -1);  // CPU-only penalty (moderate)
-        } else if (hasIntegratedGPU) {
+        } else if (hasIntegratedGPU && !hasDedicatedGPU) {
             tier = bumpTier(tier, -1);  // iGPU penalty
-        } else if (vramGB > 0 && vramGB < 6) {
+        } else if (hasDedicatedGPU && vramGB > 0 && vramGB < 6) {
             tier = bumpTier(tier, -1);  // Low VRAM penalty
         }
@@ -752,13 +755,16 @@ class MultiObjectiveSelector {
         const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
             ? hardware.summary.hasIntegratedGPU
             : false;
+        const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
+            ? hardware.summary.hasDedicatedGPU
+            : Boolean(hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU));
         // Use improved CPU estimation function for more realistic and varying speeds
         const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
                          (cpuModel.includes('13th') || cpuModel.includes('14th') || cpuModel.includes('12th'));
         // GPU-based calculation (dedicated GPU only)
-        if (vramGB > 0 && !hasIntegratedGPU) {
+        if (hasDedicatedGPU && vramGB > 0) {
             let gpuTPS = 20; // Conservative GPU baseline
             if (gpuModel.toLowerCase().includes('gb10') ||
                 gpuModel.toLowerCase().includes('grace blackwell') ||

package/src/hardware/backends/cpu-detector.js CHANGED Viewed

@@ -4,7 +4,7 @@
  * Focuses on AVX2, AVX512, AMX, and other SIMD extensions
  */
-const { execSync } = require('child_process');
+const childProcess = require('child_process');
 const os = require('os');
 const fs = require('fs');
 const { normalizePlatform } = require('../../utils/platform');
@@ -92,7 +92,7 @@ class CPUDetector {
         try {
             if (platform === 'darwin') {
-                return parseInt(execSync('sysctl -n hw.physicalcpu', { encoding: 'utf8', timeout: 5000 }).trim());
+                return parseInt(childProcess.execSync('sysctl -n hw.physicalcpu', { encoding: 'utf8', timeout: 5000 }).trim());
             } else if (platform === 'linux') {
                 const cpuInfo = fs.readFileSync('/proc/cpuinfo', 'utf8');
                 const coreIds = new Set();
@@ -142,7 +142,37 @@ class CPUDetector {
      * Execute shell command with consistent options.
      */
     runCommand(command) {
-        return execSync(command, { encoding: 'utf8', timeout: 5000 });
+        const baseOptions = {
+            encoding: 'utf8',
+            timeout: 5000
+        };
+        if (normalizePlatform() === 'win32') {
+            const result = childProcess.spawnSync(command, {
+                ...baseOptions,
+                shell: true,
+                stdio: ['ignore', 'pipe', 'pipe'],
+                windowsHide: true
+            });
+            if (result.error) {
+                throw result.error;
+            }
+            if (result.status !== 0) {
+                const stderr = String(result.stderr || '').trim();
+                const stdout = String(result.stdout || '').trim();
+                const error = new Error(stderr || stdout || `Command failed: ${command}`);
+                error.status = result.status;
+                error.stdout = result.stdout;
+                error.stderr = result.stderr;
+                throw error;
+            }
+            return result.stdout;
+        }
+        return childProcess.execSync(command, baseOptions);
     }
     /**
@@ -215,10 +245,10 @@ class CPUDetector {
         try {
             if (platform === 'darwin') {
-                cache.l1d = parseInt(execSync('sysctl -n hw.l1dcachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
-                cache.l1i = parseInt(execSync('sysctl -n hw.l1icachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
-                cache.l2 = parseInt(execSync('sysctl -n hw.l2cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
-                cache.l3 = parseInt(execSync('sysctl -n hw.l3cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
+                cache.l1d = parseInt(childProcess.execSync('sysctl -n hw.l1dcachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
+                cache.l1i = parseInt(childProcess.execSync('sysctl -n hw.l1icachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
+                cache.l2 = parseInt(childProcess.execSync('sysctl -n hw.l2cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
+                cache.l3 = parseInt(childProcess.execSync('sysctl -n hw.l3cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
             } else if (platform === 'linux') {
                 // Parse from /sys/devices/system/cpu/cpu0/cache/
                 const cachePath = '/sys/devices/system/cpu/cpu0/cache';
@@ -288,12 +318,12 @@ class CPUDetector {
                 } else {
                     // Intel Mac - check via sysctl
                     try {
-                        const features = execSync('sysctl -n machdep.cpu.features', {
+                        const features = childProcess.execSync('sysctl -n machdep.cpu.features', {
                             encoding: 'utf8',
                             timeout: 5000
                         }).toLowerCase();
-                        const leafFeatures = execSync('sysctl -n machdep.cpu.leaf7_features', {
+                        const leafFeatures = childProcess.execSync('sysctl -n machdep.cpu.leaf7_features', {
                             encoding: 'utf8',
                             timeout: 5000
                         }).toLowerCase();

package/src/hardware/backends/cuda-detector.js CHANGED Viewed

@@ -15,6 +15,10 @@ class CUDADetector {
         this.detectionMode = null;
     }
+    execCommand(command, options = {}) {
+        return execSync(command, options);
+    }
     /**
      * Check if CUDA is available
      */
@@ -43,7 +47,7 @@ class CUDADetector {
     hasNvidiaSMI() {
         try {
-            execSync('nvidia-smi --version', {
+            this.execCommand('nvidia-smi --version', {
                 encoding: 'utf8',
                 timeout: 5000,
                 stdio: ['pipe', 'pipe', 'pipe']
@@ -142,7 +146,7 @@ class CUDADetector {
         }
         try {
-            execSync('nvcc --version', {
+            this.execCommand('nvcc --version', {
                 encoding: 'utf8',
                 timeout: 5000,
                 stdio: ['pipe', 'pipe', 'pipe']
@@ -197,17 +201,18 @@ class CUDADetector {
         try {
             // Get driver and CUDA version
-            const versionInfo = execSync('nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits', {
+            const versionInfo = this.execCommand('nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits', {
                 encoding: 'utf8',
                 timeout: 5000
             }).trim().split('\n')[0];
             result.driver = versionInfo;
-            // Get CUDA version from nvidia-smi header
-            const header = execSync('nvidia-smi | head -n 3', {
+            // Parse the nvidia-smi banner in JS so Windows does not require shell-only tools like `head`.
+            const banner = this.execCommand('nvidia-smi', {
                 encoding: 'utf8',
                 timeout: 5000
             });
+            const header = banner.split('\n').slice(0, 3).join('\n');
             const cudaMatch = header.match(/CUDA Version:\s*([\d.]+)/);
             if (cudaMatch) {
                 result.cuda = cudaMatch[1];
@@ -237,7 +242,7 @@ class CUDADetector {
                 'clocks.max.sm'
             ].join(',');
-            const gpuData = execSync(
+            const gpuData = this.execCommand(
                 `nvidia-smi --query-gpu=${query} --format=csv,noheader,nounits`,
                 { encoding: 'utf8', timeout: 10000 }
             ).trim();
@@ -286,7 +291,7 @@ class CUDADetector {
         } catch (e) {
             // Fallback to simpler query
             try {
-                const simpleQuery = execSync(
+                const simpleQuery = this.execCommand(
                     'nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits',
                     { encoding: 'utf8', timeout: 5000 }
                 ).trim();
@@ -408,7 +413,7 @@ class CUDADetector {
         }
         try {
-            const nvccVersion = execSync('nvcc --version', {
+            const nvccVersion = this.execCommand('nvcc --version', {
                 encoding: 'utf8',
                 timeout: 5000,
                 stdio: ['pipe', 'pipe', 'pipe']

package/src/hardware/detector.js CHANGED Viewed

@@ -41,7 +41,7 @@ class HardwareDetector {
             const systemInfo = {
                 cpu: this.processCPUInfo(cpu),
                 memory: this.processMemoryInfo(memory),
-                gpu: this.processGPUInfo(graphics),
+                gpu: this.processGPUInfo(graphics, memory),
                 system: this.processSystemInfo(system),
                 os: this.processOSInfo(osInfo),
                 timestamp: Date.now()
@@ -97,7 +97,48 @@ class HardwareDetector {
         };
     }
-    processGPUInfo(graphics) {
+    getSystemMemoryGB(memoryInfo) {
+        const totalBytes = Number(memoryInfo?.total || 0);
+        if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
+        return Math.max(1, Math.round(totalBytes / (1024 ** 3)));
+    }
+    estimateIntegratedSharedMemory(gpu, memoryInfo) {
+        const dedicatedAperture = this.normalizeVRAM(gpu?.vram || 0);
+        const explicitSharedCandidates = [
+            gpu?.memoryTotal,
+            gpu?.memory,
+            gpu?.sharedMemory,
+            gpu?.memoryShared,
+            gpu?.memory?.shared,
+            gpu?.memory?.total
+        ]
+            .map((value) => this.normalizeVRAM(value))
+            .filter((value) => value > dedicatedAperture);
+        if (explicitSharedCandidates.length > 0) {
+            return Math.max(...explicitSharedCandidates);
+        }
+        const totalSystemGB = this.getSystemMemoryGB(memoryInfo);
+        if (gpu?.vramDynamic || dedicatedAperture <= 2) {
+            return this.estimateSystemSharedMemory(totalSystemGB, dedicatedAperture);
+        }
+        return dedicatedAperture;
+    }
+    estimateSystemSharedMemory(totalSystemGB, fallbackGB = 0) {
+        const fallback = Math.max(0, Number(fallbackGB) || 0);
+        if (!Number.isFinite(totalSystemGB) || totalSystemGB <= 0) {
+            return fallback;
+        }
+        // Integrated GPUs typically expose roughly half of system RAM as a shared pool.
+        return Math.max(fallback, Math.min(Math.max(1, Math.round(totalSystemGB / 2)), 16));
+    }
+    processGPUInfo(graphics, memoryInfo = null) {
         const controllers = graphics.controllers || [];
         const displays = graphics.displays || [];
@@ -149,7 +190,12 @@ class HardwareDetector {
             }
             // Skip very generic/placeholder entries
-            if (model.includes('standard vga') || model.includes('microsoft basic')) {
+            if (
+                model.includes('standard vga') ||
+                model.includes('microsoft basic') ||
+                model.includes('remote display adapter') ||
+                model.includes('basic render driver')
+            ) {
                 return false;
             }
@@ -199,8 +245,14 @@ class HardwareDetector {
             enhancedModel = this.getGPUModelFromDeviceId(primaryGPU.deviceId) || enhancedModel;
         }
+        const primaryIsIntegrated = this.isIntegratedGPU(enhancedModel);
+        const normalizedPrimaryVRAM = this.normalizeVRAM(primaryGPU.vram || 0);
+        const estimatedSharedMemory = primaryIsIntegrated
+            ? this.estimateIntegratedSharedMemory(primaryGPU, memoryInfo)
+            : 0;
         // Enhanced VRAM detection using the new normalizeVRAM function
-        let vram = this.normalizeVRAM(primaryGPU.vram || 0);
+        let vram = primaryIsIntegrated ? estimatedSharedMemory : normalizedPrimaryVRAM;
         // If VRAM is still 0, try to estimate based on model or handle unified memory
         if (vram === 0 && primaryGPU.model) {
@@ -227,24 +279,41 @@ class HardwareDetector {
         // If we have multiple dedicated GPUs, use the combined VRAM
         const effectiveVRAM = gpuCount > 1 ? totalDedicatedVRAM : vram;
+        const sharedMemory = primaryIsIntegrated ? effectiveVRAM : 0;
+        const dedicatedMemory = primaryIsIntegrated ? normalizedPrimaryVRAM : effectiveVRAM;
+        const scoredGPU = {
+            ...primaryGPU,
+            model: enhancedModel,
+            vram: effectiveVRAM,
+            sharedMemory,
+            dedicatedMemory
+        };
         return {
             model: enhancedModel,
             vendor: primaryGPU.vendor || this.inferVendorFromGPUModel(enhancedModel, 'Unknown'),
             vram: effectiveVRAM,
             vramPerGPU: vram, // VRAM of primary GPU for reference
+            sharedMemory,
+            dedicatedMemory,
             vramDynamic: primaryGPU.vramDynamic || false,
-            dedicated: !this.isIntegratedGPU(enhancedModel),
+            dedicated: !primaryIsIntegrated,
             driverVersion: primaryGPU.driverVersion || 'Unknown',
             gpuCount: gpuCount > 0 ? gpuCount : (dedicatedGPUs.length > 0 ? dedicatedGPUs.length : 1),
             isMultiGPU: gpuCount > 1,
             all: normalizedControllers.map(gpu => ({
                 model: gpu.model,
-                vram: this.normalizeVRAM(gpu.vram || 0),
+                vram: this.isIntegratedGPU(gpu.model)
+                    ? this.estimateIntegratedSharedMemory(gpu, memoryInfo)
+                    : this.normalizeVRAM(gpu.vram || 0),
+                sharedMemory: this.isIntegratedGPU(gpu.model)
+                    ? this.estimateIntegratedSharedMemory(gpu, memoryInfo)
+                    : 0,
+                dedicatedMemory: this.normalizeVRAM(gpu.vram || 0),
                 vendor: gpu.vendor || this.inferVendorFromGPUModel(gpu.model, 'Unknown')
             })),
             displays: displays.length,
-            score: this.calculateGPUScore(primaryGPU)
+            score: this.calculateGPUScore(scoredGPU)
         };
     }
@@ -303,17 +372,40 @@ class HardwareDetector {
             const modelFromUnified = summary.gpuModel || fallbackModel || systemInfo.gpu.model;
             const vendor = this.inferVendorFromGPUModel(modelFromUnified, systemInfo.gpu.vendor);
             const isAppleUnified = primaryType === 'metal';
+            const integratedSharedMemory = Math.max(
+                Number(systemInfo.gpu.sharedMemory || 0),
+                ...(Array.isArray(unified.systemGpu?.gpus)
+                    ? unified.systemGpu.gpus
+                        .filter((gpu) => gpu?.type === 'integrated')
+                        .map((gpu) => Number(gpu?.memory?.total || gpu?.memoryTotal || 0))
+                    : [0])
+            );
             systemInfo.summary = {
-                ...summary
+                ...summary,
+                integratedSharedMemory: typeof summary.integratedSharedMemory === 'number'
+                    ? summary.integratedSharedMemory
+                    : integratedSharedMemory
             };
             systemInfo.gpu = {
                 ...systemInfo.gpu,
                 model: modelFromUnified,
                 vendor,
-                vram: isAppleUnified ? systemInfo.gpu.vram : (hasDedicatedGPU ? (totalVRAM || systemInfo.gpu.vram) : 0),
-                vramPerGPU: isAppleUnified ? (systemInfo.gpu.vramPerGPU || 0) : (perGPUVRAM || systemInfo.gpu.vramPerGPU || 0),
+                vram: isAppleUnified
+                    ? systemInfo.gpu.vram
+                    : (hasDedicatedGPU ? (totalVRAM || systemInfo.gpu.vram) : (integratedSharedMemory || systemInfo.gpu.vram || 0)),
+                vramPerGPU: isAppleUnified
+                    ? (systemInfo.gpu.vramPerGPU || 0)
+                    : (hasDedicatedGPU
+                        ? (perGPUVRAM || systemInfo.gpu.vramPerGPU || 0)
+                        : (integratedSharedMemory || systemInfo.gpu.vramPerGPU || systemInfo.gpu.vram || 0)),
+                sharedMemory: isAppleUnified
+                    ? (systemInfo.gpu.sharedMemory || 0)
+                    : (hasIntegratedGPU ? Math.max(integratedSharedMemory || 0, systemInfo.gpu.sharedMemory || 0) : 0),
+                dedicatedMemory: hasDedicatedGPU
+                    ? (totalVRAM || systemInfo.gpu.dedicatedMemory || systemInfo.gpu.vram || 0)
+                    : 0,
                 dedicated: hasDedicatedGPU,
                 gpuCount: summary.gpuCount || gpuCount,
                 isMultiGPU: Boolean(summary.isMultiGPU || gpuCount > 1),
@@ -557,13 +649,16 @@ class HardwareDetector {
         let score = 0;
         const model = gpu.model.toLowerCase();
-        const vram = gpu.vram || 0;
+        const integrated = this.isIntegratedGPU(gpu.model);
+        const vram = integrated
+            ? Math.min(gpu.sharedMemory || gpu.vram || 0, 2)
+            : (gpu.vram || 0);
         score += vram * 8;
-        if (!this.isIntegratedGPU(gpu.model)) {
+        if (!integrated) {
             score += 20;
         }
@@ -586,6 +681,10 @@ class HardwareDetector {
         else if (model.includes('apple m')) score += 15;
         else if (model.includes('r9700') || model.includes('ai pro r9700')) score += 23;
+        if (integrated) {
+            return Math.min(Math.round(score), 45);
+        }
         return Math.min(Math.round(score), 100);
     }

package/src/hardware/unified-detector.js CHANGED Viewed

@@ -216,6 +216,9 @@ class UnifiedDetector {
         const summary = {
             bestBackend: result.primary?.type || 'cpu',
             backendName: result.primary?.name || 'CPU',
+            runtimeBackend: result.primary?.type || 'cpu',
+            runtimeBackendName: result.primary?.name || 'CPU',
+            hasRuntimeAssist: false,
             totalVRAM: 0,
             effectiveMemory: 0,
             speedCoefficient: 0,
@@ -231,6 +234,7 @@ class UnifiedDetector {
             dedicatedGpuCount: 0,
             integratedGpuModels: [],
             dedicatedGpuModels: [],
+            integratedSharedMemory: 0,
             cpuModel: result.cpu?.brand || 'Unknown',
             systemRAM: require('os').totalmem() / (1024 ** 3)
         };
@@ -303,6 +307,7 @@ class UnifiedDetector {
         summary.dedicatedGpuCount = topology.dedicatedCount;
         summary.integratedGpuModels = topology.integratedModels;
         summary.dedicatedGpuModels = topology.dedicatedModels;
+        summary.integratedSharedMemory = topology.integratedSharedMemory;
         if (!summary.gpuModel) {
             summary.gpuModel = topology.primaryModel || null;
         }
@@ -314,6 +319,11 @@ class UnifiedDetector {
         }
         summary.hasHeterogeneousGPU = summary.hasHeterogeneousGPU || topology.isHeterogeneous;
+        const runtimeSelection = this.detectRuntimeAssistBackend(result, topology);
+        summary.runtimeBackend = runtimeSelection.backend;
+        summary.runtimeBackendName = runtimeSelection.name;
+        summary.hasRuntimeAssist = runtimeSelection.assisted;
         // Effective memory for LLM loading
         // For GPU: use VRAM; for CPU/Metal: use system RAM
         if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
@@ -355,6 +365,9 @@ class UnifiedDetector {
             models,
             integratedModels,
             dedicatedModels,
+            integratedSharedMemory: normalized
+                .filter((gpu) => gpu.type === 'integrated')
+                .reduce((max, gpu) => Math.max(max, Number(gpu?.memory?.total || 0)), 0),
             integratedCount: normalized.filter((gpu) => gpu.type === 'integrated').length,
             dedicatedCount: normalized.filter((gpu) => gpu.type === 'dedicated').length,
             hasIntegratedGPU: normalized.some((gpu) => gpu.type === 'integrated'),
@@ -393,6 +406,7 @@ class UnifiedDetector {
             .map((gpu) => {
                 const name = String(gpu?.name || gpu?.model || '').replace(/\s+/g, ' ').trim();
                 if (!name) return null;
+                if (this.isRemoteDisplayModel(name)) return null;
                 let type = gpu?.type;
                 if (type !== 'integrated' && type !== 'dedicated') {
@@ -412,6 +426,101 @@ class UnifiedDetector {
             .filter(Boolean);
     }
+    isRemoteDisplayModel(model) {
+        const lower = String(model || '').toLowerCase();
+        if (!lower) return false;
+        return (
+            lower.includes('microsoft remote display adapter') ||
+            lower.includes('remote display adapter') ||
+            lower.includes('basic render driver')
+        );
+    }
+    inferGpuVendor(name) {
+        const lower = String(name || '').toLowerCase();
+        if (!lower) return 'unknown';
+        if (lower.includes('nvidia') || lower.includes('geforce') || lower.includes('rtx') || lower.includes('gtx')) return 'nvidia';
+        if (lower.includes('amd') || lower.includes('ati') || lower.includes('radeon')) return 'amd';
+        if (lower.includes('intel') || lower.includes('iris') || lower.includes('uhd') || lower.includes('arc')) return 'intel';
+        if (lower.includes('apple')) return 'apple';
+        return 'unknown';
+    }
+    detectRuntimeAssistBackend(result, topology = {}) {
+        const primaryType = result?.primary?.type || 'cpu';
+        const primaryName = result?.primary?.name || 'CPU';
+        if (primaryType !== 'cpu') {
+            return {
+                backend: primaryType,
+                name: primaryName,
+                assisted: false
+            };
+        }
+        const platform = result?.platform || result?.os?.platform || normalizePlatform();
+        const integratedModels = Array.isArray(topology.integratedModels) ? topology.integratedModels : [];
+        const integratedVendors = integratedModels.map((gpu) => this.inferGpuVendor(gpu.name));
+        const hasWindowsIntegratedGpu = platform === 'win32' && integratedModels.length > 0;
+        const hasKnownIntegratedVendor = integratedVendors.some((vendor) => ['amd', 'intel', 'nvidia'].includes(vendor));
+        if (hasWindowsIntegratedGpu && hasKnownIntegratedVendor) {
+            return {
+                backend: 'vulkan',
+                name: 'Vulkan',
+                assisted: true
+            };
+        }
+        return {
+            backend: primaryType,
+            name: primaryName,
+            assisted: false
+        };
+    }
+    getSystemMemoryGB(memoryInfo) {
+        const totalBytes = Number(memoryInfo?.total || 0);
+        if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
+        return Math.max(1, Math.round(totalBytes / (1024 ** 3)));
+    }
+    estimateSystemSharedMemory(totalSystemGB, fallbackGB = 0) {
+        const fallback = Math.max(0, Number(fallbackGB) || 0);
+        if (!Number.isFinite(totalSystemGB) || totalSystemGB <= 0) {
+            return fallback;
+        }
+        return Math.max(fallback, Math.min(Math.max(1, Math.round(totalSystemGB / 2)), 16));
+    }
+    estimateIntegratedFallbackMemory(controller, memoryInfo) {
+        const dedicatedAperture = this.normalizeFallbackVRAM(controller?.vram || 0);
+        const explicitSharedCandidates = [
+            controller?.memoryTotal,
+            controller?.memory,
+            controller?.sharedMemory,
+            controller?.memoryShared,
+            controller?.memory?.shared,
+            controller?.memory?.total
+        ]
+            .map((value) => this.normalizeFallbackVRAM(value))
+            .filter((value) => value > dedicatedAperture);
+        if (explicitSharedCandidates.length > 0) {
+            return Math.max(...explicitSharedCandidates);
+        }
+        const totalSystemGB = this.getSystemMemoryGB(memoryInfo);
+        if (controller?.vramDynamic || dedicatedAperture <= 2) {
+            return this.estimateSystemSharedMemory(totalSystemGB, dedicatedAperture);
+        }
+        return dedicatedAperture;
+    }
     mergeGpuInventories(...gpuLists) {
         const normalizedLists = gpuLists.map((list) => this.normalizeGpuInventory(list));
         const primaryIndex = normalizedLists.findIndex((list) => list.length > 0);
@@ -433,6 +542,7 @@ class UnifiedDetector {
     async detectSystemGpuFallback() {
         const graphics = await si.graphics();
+        const memoryInfo = await si.mem().catch(() => null);
         const controllers = Array.isArray(graphics?.controllers) ? graphics.controllers : [];
         if (controllers.length === 0) {
@@ -450,12 +560,15 @@ class UnifiedDetector {
             .map((controller) => {
                 const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
                 if (!name || name.toLowerCase() === 'unknown') return null;
+                if (this.isRemoteDisplayModel(name)) return null;
                 const nameLower = name.toLowerCase();
                 if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
                 const isIntegrated = this.isIntegratedGPUModel(name);
-                let vram = this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
+                let vram = isIntegrated
+                    ? this.estimateIntegratedFallbackMemory(controller, memoryInfo)
+                    : this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
                 // For dedicated cards, estimate VRAM from model if runtime did not report memory.
                 if (!isIntegrated && vram === 0) {
@@ -819,13 +932,19 @@ class UnifiedDetector {
             return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
         }
         else {
+            const runtimeAssistSuffix = summary.hasRuntimeAssist && summary.runtimeBackend !== summary.bestBackend
+                ? `${summary.runtimeBackendName || summary.runtimeBackend} assist`
+                : 'CPU backend';
             if (summary.gpuModel && summary.hasIntegratedGPU && !summary.hasDedicatedGPU) {
                 const gpuDesc = summary.gpuInventory || summary.gpuModel;
-                return `${gpuDesc} (integrated/shared memory, CPU backend) + ${summary.cpuModel}`;
+                if (summary.integratedSharedMemory > 0) {
+                    return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
+                }
+                return `${gpuDesc} (integrated/shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
             }
             if (summary.gpuModel && summary.gpuCount > 0) {
                 const gpuDesc = summary.gpuInventory || summary.gpuModel;
-                return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, CPU backend) + ${summary.cpuModel}`;
+                return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
             }
             return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
         }

package/src/index.js CHANGED Viewed

@@ -1373,6 +1373,15 @@ class LLMChecker {
         // Detect PC platform (Windows/Linux)
         const normalizedPlatform = normalizePlatform(hardware.os?.platform || process.platform);
         const isPC = !isAppleSilicon && (normalizedPlatform === 'win32' || normalizedPlatform === 'linux');
+        const integratedGpuInventory = Array.isArray(hardware.summary?.integratedGpuModels)
+            ? hardware.summary.integratedGpuModels.map(({ name }) => name).join(' ')
+            : '';
+        const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
+            ? hardware.summary.hasIntegratedGPU
+            : /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics|intel.*integrated|integrated/i.test(`${gpuModel} ${integratedGpuInventory}`);
+        const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
+            ? (!unified && hardware.summary.hasDedicatedGPU)
+            : Boolean(!unified && (hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU)));
         const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
                          (cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
         const hasAVX2 = cpuModel.toLowerCase().includes('intel') ||
@@ -1381,7 +1390,7 @@ class LLMChecker {
         // 1) Capacidad efectiva para pesos del modelo (45%)
         let effMem;
-        if (vramGB > 0 && !unified) {
+        if (hasDedicatedGPU && vramGB > 0 && !unified) {
             // Dedicated GPU path (Windows/Linux with discrete GPU)
             if (isPC) {
                 // PC-specific GPU memory calculation with offload support
@@ -1469,16 +1478,6 @@ class LLMChecker {
                   score >= 35 ? 'medium' :         // 35-54 for mid-range systems
                   score >= 20 ? 'low' : 'ultra_low'; // 20-34 for budget systems
-        const integratedGpuInventory = Array.isArray(hardware.summary?.integratedGpuModels)
-            ? hardware.summary.integratedGpuModels.map(({ name }) => name).join(' ')
-            : '';
-        const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
-            ? hardware.summary.hasIntegratedGPU
-            : /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics|intel.*integrated|integrated/i.test(`${gpuModel} ${integratedGpuInventory}`);
-        const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
-            ? (!unified && hardware.summary.hasDedicatedGPU)
-            : (vramGB > 0 && !hasIntegratedGPU && !unified);
         // Debug logging for tier calculation
         if (process.env.DEBUG_TIER) {
             console.log(`GPU Model: "${gpuModel}"`);
@@ -1508,10 +1507,10 @@ class LLMChecker {
         } else if (!vramGB && !unified) {
             // Windows/Linux CPU-only - significativa limitación pero no extrema
             tier = this.bumpTier(tier, -1);
-        } else if (hasIntegratedGPU) {
+        } else if (hasIntegratedGPU && !hasDedicatedGPU) {
             // iGPU - limitada pero algo mejor que CPU puro
             tier = this.bumpTier(tier, -1);
-        } else if (vramGB > 0 && vramGB < 6) {
+        } else if (hasDedicatedGPU && vramGB > 0 && vramGB < 6) {
             // GPU dedicada con poca VRAM (GTX 1060, etc.)
             tier = this.bumpTier(tier, -1);
         }

package/src/models/deterministic-selector.js CHANGED Viewed

@@ -751,40 +751,26 @@ class DeterministicModelSelector {
         return variants.map((variant) => {
             const variantTag = variant.tag || fallbackTag;
-            const paramsB = this.extractParamsFromString(
-                variant.size,
-                variantTag,
-                ollamaModel.main_size,
-                ollamaModel.model_identifier
-            );
+            const quant = this.resolveVariantQuantization(variant, variantTag);
+            const paramsB = this.resolveVariantParamsB(ollamaModel, variant, quant);
             const moeMetadata = this.extractMoEMetadata(ollamaModel, variant, paramsB, baseText);
-            const quant = this.normalizeQuantization(
-                variant.quantization ||
-                this.extractQuantizationFromTag(variantTag) ||
-                'Q4_K_M'
-            );
             const variantSizeGB = this.extractVariantSizeGB(variant, paramsB);
             const modalities = this.inferModalities(ollamaModel, variantTag);
             const modelTags = this.inferTagsForVariant(derivedTags, variant, variantTag);
             const sizeByQuant = {};
+            const variantIsCloud = this.isCloudVariantTag(variantTag);
             for (const sibling of variants) {
-                const siblingParams = this.extractParamsFromString(
-                    sibling.size,
-                    sibling.tag,
-                    ollamaModel.main_size,
-                    ollamaModel.model_identifier
-                );
+                const siblingTag = sibling.tag || fallbackTag;
+                if (this.isCloudVariantTag(siblingTag) !== variantIsCloud) continue;
+                const siblingQuant = this.resolveVariantQuantization(sibling, siblingTag);
+                const siblingParams = this.resolveVariantParamsB(ollamaModel, sibling, siblingQuant);
                 // Keep quantization map parameter-aware: don't blend 8B/70B/405B sizes.
                 if (Math.abs(siblingParams - paramsB) > 0.25) continue;
-                const siblingQuant = this.normalizeQuantization(
-                    sibling.quantization ||
-                    this.extractQuantizationFromTag(sibling.tag || '') ||
-                    quant
-                );
                 const siblingSize = this.extractVariantSizeGB(sibling, siblingParams);
                 if (!Number.isFinite(sizeByQuant[siblingQuant]) || siblingSize < sizeByQuant[siblingQuant]) {
                     sizeByQuant[siblingQuant] = siblingSize;
@@ -1003,17 +989,130 @@ class DeterministicModelSelector {
     }
     extractParamsFromString(...values) {
-        for (const value of values) {
-            if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
-                return value;
+        const candidates = this.extractParameterCandidates(...values);
+        return candidates.length > 0 ? candidates[0] : null;
+    }
+    extractParameterCandidates(...values) {
+        const candidates = [];
+        const seen = new Set();
+        const pushCandidate = (value) => {
+            if (!Number.isFinite(value) || value <= 0) return;
+            const rounded = Math.round(value * 1000) / 1000;
+            const key = String(rounded);
+            if (seen.has(key)) return;
+            seen.add(key);
+            candidates.push(rounded);
+        };
+        const visit = (value) => {
+            if (typeof value === 'number') {
+                pushCandidate(value);
+                return;
+            }
+            if (Array.isArray(value)) {
+                value.forEach(visit);
+                return;
+            }
+            if (value && typeof value === 'object') {
+                Object.values(value).forEach(visit);
+                return;
+            }
+            if (typeof value !== 'string') return;
+            const regex = /(\d+\.?\d*)\s*([BbMm])/g;
+            for (const match of value.matchAll(regex)) {
+                const amount = parseFloat(match[1]);
+                const unit = match[2].toUpperCase();
+                pushCandidate(unit === 'M' ? amount / 1000 : amount);
             }
-            if (typeof value !== 'string') continue;
+        };
+        values.forEach(visit);
+        return candidates;
+    }
+    extractArtifactSizeGBFromValue(value) {
+        if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
+            return value;
+        }
+        if (typeof value !== 'string') return null;
+        const match = value.match(/(\d+\.?\d*)\s*g(?:i)?b\b/i);
+        if (!match) return null;
+        return parseFloat(match[1]);
+    }
+    inferParamsFromArtifactSizeGB(sizeGB, quant = 'Q4_K_M') {
+        const normalizedQuant = this.normalizeQuantization(quant);
+        const bytesPerParam = {
+            'Q8_0': 1.05,
+            'Q6_K': 0.80,
+            'Q5_K_M': 0.68,
+            'Q4_K_M': 0.58,
+            'Q3_K': 0.48,
+            'Q2_K': 0.37
+        };
+        const bpp = bytesPerParam[normalizedQuant] || 0.58;
+        const inferred = sizeGB / bpp;
+        return Math.max(0.5, Math.round(inferred * 2) / 2);
+    }
+    isCloudVariantTag(tag = '') {
+        return /:cloud$/i.test(String(tag).trim());
+    }
+    resolveVariantQuantization(variant = {}, variantTag = '') {
+        const tagQuant = this.extractQuantizationFromTag(variantTag);
+        if (tagQuant) {
+            return this.normalizeQuantization(tagQuant);
+        }
+        return this.normalizeQuantization(
+            variant.quantization ||
+            variant.quant ||
+            'Q4_K_M'
+        );
+    }
-            const match = value.match(/(\d+\.?\d*)\s*([BbMm])/);
-            if (!match) continue;
-            const n = parseFloat(match[1]);
-            const unit = match[2].toUpperCase();
-            return unit === 'M' ? n / 1000 : n;
+    resolveVariantParamsB(ollamaModel = {}, variant = {}, quant = 'Q4_K_M') {
+        const explicitParams = this.extractParamsFromString(
+            variant.size,
+            variant.tag,
+            variant.label,
+            variant.name,
+            ollamaModel.model_identifier,
+            ollamaModel.model_name,
+            ollamaModel.parameter_size,
+            ollamaModel.parameter_count,
+            ollamaModel.parameters
+        );
+        if (Number.isFinite(explicitParams) && explicitParams > 0) {
+            return explicitParams;
+        }
+        const metadataCandidates = this.extractParameterCandidates(
+            ollamaModel.model_sizes,
+            ollamaModel.parameters,
+            ollamaModel.parameter_size,
+            ollamaModel.parameter_count
+        );
+        if (metadataCandidates.length > 0) {
+            return Math.max(...metadataCandidates);
+        }
+        const artifactSizeGB = this.extractVariantSizeGB(variant, null);
+        if (!this.isCloudVariantTag(variant.tag) && Number.isFinite(artifactSizeGB) && artifactSizeGB > 0) {
+            return this.inferParamsFromArtifactSizeGB(artifactSizeGB, quant);
+        }
+        const modelArtifactSizeGB = this.extractArtifactSizeGBFromValue(ollamaModel.main_size);
+        if (Number.isFinite(modelArtifactSizeGB) && modelArtifactSizeGB > 0) {
+            return this.inferParamsFromArtifactSizeGB(modelArtifactSizeGB, quant);
         }
         return 7;
@@ -1038,6 +1137,7 @@ class DeterministicModelSelector {
     extractVariantSizeGB(variant, paramsB) {
         const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? NaN);
         if (Number.isFinite(candidate) && candidate > 0) return candidate;
+        if (!Number.isFinite(paramsB) || paramsB <= 0) return 0.5;
         return Math.max(0.5, Math.round((paramsB * 0.58 + 0.5) * 10) / 10);
     }
@@ -2049,6 +2149,20 @@ class DeterministicModelSelector {
     }
     mapHardwareTier(hardware = {}) {
+        const summary = hardware?.summary || {};
+        const effectiveMemory = Number(summary.effectiveMemory);
+        const speedCoefficient = Number(summary.speedCoefficient);
+        if (Number.isFinite(effectiveMemory) && effectiveMemory > 0 && Number.isFinite(speedCoefficient)) {
+            if (effectiveMemory >= 80 && speedCoefficient >= 300) return 'ultra_high';
+            if (effectiveMemory >= 48 && speedCoefficient >= 200) return 'very_high';
+            if (effectiveMemory >= 24 && speedCoefficient >= 150) return 'high';
+            if (effectiveMemory >= 16 && speedCoefficient >= 100) return 'medium_high';
+            if (effectiveMemory >= 12 && speedCoefficient >= 80) return 'medium';
+            if (effectiveMemory >= 8 && speedCoefficient >= 50) return 'medium_low';
+            if (effectiveMemory >= 6 && speedCoefficient >= 30) return 'low';
+            return 'ultra_low';
+        }
         let ram, cores;
         if (hardware?.memory?.totalGB) {

package/src/models/intelligent-selector.js CHANGED Viewed

@@ -98,7 +98,8 @@ class IntelligentSelector {
                 description: this.detector.getHardwareDescription(),
                 tier: this.detector.getHardwareTier(),
                 maxSize: this.detector.getMaxModelSize(),
-                backend: hardware.summary.bestBackend
+                backend: hardware.summary.bestBackend,
+                runtimeBackend: hardware.summary.runtimeBackend || hardware.summary.bestBackend
             },
             policy: {
                 mode: policyEngine.getMode(),
@@ -163,7 +164,7 @@ class IntelligentSelector {
         const context = {
             backend: summary.bestBackend || null,
-            runtimeBackend: summary.bestBackend || null,
+            runtimeBackend: summary.runtimeBackend || summary.bestBackend || null,
             ramGB: systemRAM,
             totalRamGB: systemRAM,
             hardware

package/src/ollama/client.js CHANGED Viewed

@@ -4,13 +4,10 @@ class OllamaClient {
     constructor(baseURL = null) {
         // Support OLLAMA_HOST environment variable (standard Ollama configuration)
         // Also support OLLAMA_URL for backwards compatibility
-        this.baseURL = baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434';
-        // Normalize URL: ensure it has protocol and remove trailing slash
-        if (!this.baseURL.startsWith('http://') && !this.baseURL.startsWith('https://')) {
-            this.baseURL = 'http://' + this.baseURL;
-        }
-        this.baseURL = this.baseURL.replace(/\/$/, '');
+        this.preferredBaseURL = this.normalizeBaseURL(
+            baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434'
+        );
+        this.baseURL = this.preferredBaseURL;
         this.isAvailable = null;
         this.lastCheck = 0;
@@ -18,6 +15,53 @@ class OllamaClient {
         this._pendingCheck = null;
     }
+    normalizeBaseURL(baseURL) {
+        let normalized = String(baseURL || '').trim();
+        if (!normalized.startsWith('http://') && !normalized.startsWith('https://')) {
+            normalized = 'http://' + normalized;
+        }
+        return normalized.replace(/\/$/, '');
+    }
+    buildCandidateBaseURLs(baseURL = this.preferredBaseURL) {
+        const normalized = this.normalizeBaseURL(baseURL);
+        const candidates = [normalized];
+        try {
+            const parsed = new URL(normalized);
+            if (parsed.hostname === 'localhost') {
+                const ipv4 = new URL(parsed.toString());
+                ipv4.hostname = '127.0.0.1';
+                candidates.push(ipv4.toString().replace(/\/$/, ''));
+                const ipv6 = new URL(parsed.toString());
+                ipv6.hostname = '::1';
+                candidates.push(ipv6.toString().replace(/\/$/, ''));
+            }
+        } catch (error) {
+            // Keep the preferred URL only if parsing fails.
+        }
+        return [...new Set(candidates)];
+    }
+    applyResolvedBaseURL(baseURL) {
+        this.baseURL = this.normalizeBaseURL(baseURL);
+        return this.baseURL;
+    }
+    isRetryableAvailabilityError(error) {
+        const message = String(error?.message || '').toLowerCase();
+        return (
+            message.includes('econnrefused') ||
+            message.includes('fetch failed') ||
+            message.includes('network') ||
+            message.includes('socket') ||
+            message.includes('connect') ||
+            error?.name === 'AbortError'
+        );
+    }
     async checkOllamaAvailability() {
         if (this.isAvailable !== null && Date.now() - this.lastCheck < this.cacheTimeout) {
@@ -38,50 +82,79 @@ class OllamaClient {
     }
     async _doAvailabilityCheck() {
+        const candidateURLs = this.buildCandidateBaseURLs();
+        const attemptedURLs = [];
+        let lastError = null;
-        try {
-            const controller = new AbortController();
-            const timeoutId = setTimeout(() => controller.abort(), 5000);
-            const response = await fetch(`${this.baseURL}/api/version`, {
-                signal: controller.signal,
-                headers: { 'Content-Type': 'application/json' }
-            });
-            clearTimeout(timeoutId);
+        for (let index = 0; index < candidateURLs.length; index += 1) {
+            const candidateBaseURL = candidateURLs[index];
+            attemptedURLs.push(candidateBaseURL);
+            try {
+                const controller = new AbortController();
+                const timeoutId = setTimeout(() => controller.abort(), 5000);
+                const response = await fetch(`${candidateBaseURL}/api/version`, {
+                    signal: controller.signal,
+                    headers: { 'Content-Type': 'application/json' }
+                });
+                clearTimeout(timeoutId);
+                if (!response.ok) {
+                    this.isAvailable = {
+                        available: false,
+                        error: 'Ollama not responding properly',
+                        attemptedURL: candidateBaseURL,
+                        attemptedURLs
+                    };
+                    this.lastCheck = Date.now();
+                    return this.isAvailable;
+                }
-            if (response.ok) {
                 const data = await response.json();
-                this.isAvailable = { available: true, version: data.version || 'unknown' };
+                this.applyResolvedBaseURL(candidateBaseURL);
+                this.isAvailable = {
+                    available: true,
+                    version: data.version || 'unknown',
+                    attemptedURL: candidateBaseURL,
+                    attemptedURLs
+                };
                 this.lastCheck = Date.now();
                 return this.isAvailable;
+            } catch (error) {
+                lastError = error;
+                if (!this.isRetryableAvailabilityError(error) || index === candidateURLs.length - 1) {
+                    break;
+                }
             }
+        }
-            this.isAvailable = { available: false, error: 'Ollama not responding properly' };
-            this.lastCheck = Date.now();
-            return this.isAvailable;
-        } catch (error) {
+        if (lastError) {
             let errorMessage;
             let hint = '';
+            const errorText = String(lastError.message || '');
+            const activeURL = attemptedURLs[attemptedURLs.length - 1] || this.preferredBaseURL;
-            if (error.message.includes('ECONNREFUSED')) {
-                errorMessage = `Ollama not running at ${this.baseURL}`;
+            if (errorText.includes('ECONNREFUSED')) {
+                errorMessage = `Ollama not running at ${activeURL}`;
                 hint = 'Make sure Ollama is running. Try: ollama serve';
-            } else if (error.message.includes('timeout') || error.name === 'AbortError') {
-                errorMessage = `Ollama connection timeout at ${this.baseURL}`;
+            } else if (errorText.includes('timeout') || lastError.name === 'AbortError') {
+                errorMessage = `Ollama connection timeout at ${activeURL}`;
                 hint = 'The server is not responding. Check if Ollama is running and accessible.';
-            } else if (error.message.includes('ENOTFOUND')) {
-                errorMessage = `Cannot resolve host: ${this.baseURL}`;
+            } else if (errorText.includes('ENOTFOUND')) {
+                errorMessage = `Cannot resolve host: ${activeURL}`;
                 hint = 'Check your OLLAMA_HOST environment variable or network configuration.';
             } else {
-                errorMessage = error.message;
+                errorMessage = errorText || 'Unknown Ollama availability error';
             }
             this.isAvailable = {
                 available: false,
                 error: errorMessage,
-                hint: hint,
-                attemptedURL: this.baseURL
+                hint,
+                attemptedURL: activeURL,
+                attemptedURLs
             };
             this.lastCheck = Date.now();
             return this.isAvailable;