npm - llm-checker - Versions diffs - 3.5.7 → 3.5.9 - Mend

llm-checker 3.5.7 → 3.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/bin/enhanced_cli.js +7 -1
package/bin/mcp-server.mjs +1 -1
package/package.json +1 -1
package/src/hardware/detector.js +6 -1
package/src/hardware/unified-detector.js +71 -3
package/src/models/ai-check-selector.js +5 -11
package/src/models/deterministic-selector.js +17 -26
package/src/models/intelligent-selector.js +3 -2
package/src/ollama/client.js +159 -51
package/src/utils/config.js +5 -3

package/bin/enhanced_cli.js CHANGED Viewed

@@ -5059,10 +5059,16 @@ program
             console.log(`  Tier: ${chalk.cyan(detector.getHardwareTier().replace('_', ' ').toUpperCase())}`);
             console.log(`  Max model size: ${chalk.green(detector.getMaxModelSize() + 'GB')}`);
             console.log(`  Best backend: ${chalk.cyan(hardware.summary.bestBackend)}`);
+            if (hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend) {
+                console.log(`  Runtime assist: ${chalk.green(hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend)}`);
+            }
             console.log(`  Dedicated GPUs: ${chalk.green(formatGpuInventoryList(hardware.summary.dedicatedGpuModels))}`);
             console.log(`  Integrated GPUs: ${chalk.hex('#FFA500')(formatGpuInventoryList(hardware.summary.integratedGpuModels))}`);
             if (hardware.summary.hasIntegratedGPU && hardware.summary.bestBackend === 'cpu') {
-                console.log(`  Assist path: ${chalk.yellow('Integrated/shared-memory GPU detected, runtime remains CPU')}`);
+                const assistMessage = hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend
+                    ? `Integrated/shared-memory GPU detected, runtime may use ${hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend} acceleration`
+                    : 'Integrated/shared-memory GPU detected, runtime remains CPU';
+                console.log(`  Assist path: ${chalk.yellow(assistMessage)}`);
             }
             // CPU

package/bin/mcp-server.mjs CHANGED Viewed

@@ -187,7 +187,7 @@ const ALLOWED_CLI_COMMANDS = new Set([
 const server = new McpServer({
   name: "llm-checker",
-  version: "3.5.7",
+  version: "3.5.9",
 });
 // ============================================================================

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "llm-checker",
-  "version": "3.5.7",
+  "version": "3.5.9",
   "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
   "bin": {
     "llm-checker": "bin/cli.js",

package/src/hardware/detector.js CHANGED Viewed

@@ -190,7 +190,12 @@ class HardwareDetector {
             }
             // Skip very generic/placeholder entries
-            if (model.includes('standard vga') || model.includes('microsoft basic')) {
+            if (
+                model.includes('standard vga') ||
+                model.includes('microsoft basic') ||
+                model.includes('remote display adapter') ||
+                model.includes('basic render driver')
+            ) {
                 return false;
             }

package/src/hardware/unified-detector.js CHANGED Viewed

@@ -216,6 +216,9 @@ class UnifiedDetector {
         const summary = {
             bestBackend: result.primary?.type || 'cpu',
             backendName: result.primary?.name || 'CPU',
+            runtimeBackend: result.primary?.type || 'cpu',
+            runtimeBackendName: result.primary?.name || 'CPU',
+            hasRuntimeAssist: false,
             totalVRAM: 0,
             effectiveMemory: 0,
             speedCoefficient: 0,
@@ -316,6 +319,11 @@ class UnifiedDetector {
         }
         summary.hasHeterogeneousGPU = summary.hasHeterogeneousGPU || topology.isHeterogeneous;
+        const runtimeSelection = this.detectRuntimeAssistBackend(result, topology);
+        summary.runtimeBackend = runtimeSelection.backend;
+        summary.runtimeBackendName = runtimeSelection.name;
+        summary.hasRuntimeAssist = runtimeSelection.assisted;
         // Effective memory for LLM loading
         // For GPU: use VRAM; for CPU/Metal: use system RAM
         if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
@@ -398,6 +406,7 @@ class UnifiedDetector {
             .map((gpu) => {
                 const name = String(gpu?.name || gpu?.model || '').replace(/\s+/g, ' ').trim();
                 if (!name) return null;
+                if (this.isRemoteDisplayModel(name)) return null;
                 let type = gpu?.type;
                 if (type !== 'integrated' && type !== 'dedicated') {
@@ -417,6 +426,61 @@ class UnifiedDetector {
             .filter(Boolean);
     }
+    isRemoteDisplayModel(model) {
+        const lower = String(model || '').toLowerCase();
+        if (!lower) return false;
+        return (
+            lower.includes('microsoft remote display adapter') ||
+            lower.includes('remote display adapter') ||
+            lower.includes('basic render driver')
+        );
+    }
+    inferGpuVendor(name) {
+        const lower = String(name || '').toLowerCase();
+        if (!lower) return 'unknown';
+        if (lower.includes('nvidia') || lower.includes('geforce') || lower.includes('rtx') || lower.includes('gtx')) return 'nvidia';
+        if (lower.includes('amd') || lower.includes('ati') || lower.includes('radeon')) return 'amd';
+        if (lower.includes('intel') || lower.includes('iris') || lower.includes('uhd') || lower.includes('arc')) return 'intel';
+        if (lower.includes('apple')) return 'apple';
+        return 'unknown';
+    }
+    detectRuntimeAssistBackend(result, topology = {}) {
+        const primaryType = result?.primary?.type || 'cpu';
+        const primaryName = result?.primary?.name || 'CPU';
+        if (primaryType !== 'cpu') {
+            return {
+                backend: primaryType,
+                name: primaryName,
+                assisted: false
+            };
+        }
+        const platform = result?.platform || result?.os?.platform || normalizePlatform();
+        const integratedModels = Array.isArray(topology.integratedModels) ? topology.integratedModels : [];
+        const integratedVendors = integratedModels.map((gpu) => this.inferGpuVendor(gpu.name));
+        const hasWindowsIntegratedGpu = platform === 'win32' && integratedModels.length > 0;
+        const hasKnownIntegratedVendor = integratedVendors.some((vendor) => ['amd', 'intel', 'nvidia'].includes(vendor));
+        if (hasWindowsIntegratedGpu && hasKnownIntegratedVendor) {
+            return {
+                backend: 'vulkan',
+                name: 'Vulkan',
+                assisted: true
+            };
+        }
+        return {
+            backend: primaryType,
+            name: primaryName,
+            assisted: false
+        };
+    }
     getSystemMemoryGB(memoryInfo) {
         const totalBytes = Number(memoryInfo?.total || 0);
         if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
@@ -496,6 +560,7 @@ class UnifiedDetector {
             .map((controller) => {
                 const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
                 if (!name || name.toLowerCase() === 'unknown') return null;
+                if (this.isRemoteDisplayModel(name)) return null;
                 const nameLower = name.toLowerCase();
                 if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
@@ -867,16 +932,19 @@ class UnifiedDetector {
             return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
         }
         else {
+            const runtimeAssistSuffix = summary.hasRuntimeAssist && summary.runtimeBackend !== summary.bestBackend
+                ? `${summary.runtimeBackendName || summary.runtimeBackend} assist`
+                : 'CPU backend';
             if (summary.gpuModel && summary.hasIntegratedGPU && !summary.hasDedicatedGPU) {
                 const gpuDesc = summary.gpuInventory || summary.gpuModel;
                 if (summary.integratedSharedMemory > 0) {
-                    return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory, CPU backend) + ${summary.cpuModel}`;
+                    return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
                 }
-                return `${gpuDesc} (integrated/shared memory, CPU backend) + ${summary.cpuModel}`;
+                return `${gpuDesc} (integrated/shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
             }
             if (summary.gpuModel && summary.gpuCount > 0) {
                 const gpuDesc = summary.gpuInventory || summary.gpuModel;
-                return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, CPU backend) + ${summary.cpuModel}`;
+                return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
             }
             return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
         }

package/src/models/ai-check-selector.js CHANGED Viewed

@@ -7,15 +7,16 @@
 const DeterministicModelSelector = require('./deterministic-selector');
 const { OllamaNativeScraper } = require('../ollama/native-scraper');
+const OllamaClient = require('../ollama/client');
 const crypto = require('crypto');
 const fs = require('fs');
 const path = require('path');
-const fetch = require('../utils/fetch');
 const { evaluateFineTuningSupport } = require('./fine-tuning-support');
 class AICheckSelector {
     constructor() {
         this.deterministicSelector = new DeterministicModelSelector();
+        this.ollamaClient = new OllamaClient();
         this.ollamaScraper = new OllamaNativeScraper();
         this.cachePath = path.join(require('os').homedir(), '.llm-checker', 'ai-check-cache.json');
@@ -389,17 +390,10 @@ Return JSON with this structure:
             ]
         };
-        const response = await fetch('http://localhost:11434/api/chat', {
-            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify(requestBody)
+        const data = await this.ollamaClient.chat(modelId, requestBody.messages, {
+            timeoutMs: 45000,
+            generationOptions: requestBody.options
         });
-        if (!response.ok) {
-            throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
-        }
-        const data = await response.json();
         if (!data.message || !data.message.content) {
             throw new Error(`Invalid response from Ollama API: ${JSON.stringify(data)}`);

package/src/models/deterministic-selector.js CHANGED Viewed

@@ -9,7 +9,7 @@ const fs = require('fs');
 const path = require('path');
 const os = require('os');
 const { spawn } = require('child_process');
-const fetch = require('../utils/fetch');
+const OllamaClient = require('../ollama/client');
 const { DETERMINISTIC_WEIGHTS } = require('./scoring-config');
 const {
     parseBillionsValue: parseMoEBillionsValue,
@@ -24,6 +24,7 @@ class DeterministicModelSelector {
     constructor() {
         this.catalogPath = path.join(__dirname, 'catalog.json');
         this.benchCachePath = path.join(os.homedir(), '.llm-checker', 'bench.json');
+        this.ollamaClient = new OllamaClient();
         this.ollamaCachePaths = [
             path.join(os.homedir(), '.llm-checker', 'cache', 'ollama', 'ollama-detailed-models.json'),
             path.join(__dirname, '../ollama/.cache/ollama-detailed-models.json')
@@ -2018,33 +2019,23 @@ class DeterministicModelSelector {
         const prompt = prompts[category] || prompts['general'];
         const targetTokens = 128;
-        const startTime = Date.now();
-        // Make HTTP request to Ollama API
-        const response = await fetch('http://localhost:11434/api/generate', {
-            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({
-                model: modelId,
-                prompt: prompt,
-                stream: false,
-                options: {
-                    num_predict: targetTokens
-                }
-            })
+        const result = await this.ollamaClient.generate(modelId, prompt, {
+            generationOptions: {
+                num_predict: targetTokens
+            }
         });
-        if (!response.ok) {
-            throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+        if (Number.isFinite(result.tokensPerSecond) && result.tokensPerSecond > 0) {
+            return result.tokensPerSecond;
         }
-        const result = await response.json();
-        const elapsedSeconds = (Date.now() - startTime) / 1000;
-        // Estimate tokens generated (simplified)
-        const tokensGenerated = result.response ? result.response.split(' ').length * 1.3 : targetTokens;
+        const elapsedSeconds = Math.max(0.001, Number(result.responseTime || 0) / 1000);
+        const estimatedResponseTokens = result.response
+            ? result.response.split(/\s+/).filter(Boolean).length * 1.3
+            : targetTokens;
+        const tokensGenerated = Number(result.eval_count) || estimatedResponseTokens;
         return tokensGenerated / elapsedSeconds;
     }

package/src/models/intelligent-selector.js CHANGED Viewed

@@ -98,7 +98,8 @@ class IntelligentSelector {
                 description: this.detector.getHardwareDescription(),
                 tier: this.detector.getHardwareTier(),
                 maxSize: this.detector.getMaxModelSize(),
-                backend: hardware.summary.bestBackend
+                backend: hardware.summary.bestBackend,
+                runtimeBackend: hardware.summary.runtimeBackend || hardware.summary.bestBackend
             },
             policy: {
                 mode: policyEngine.getMode(),
@@ -163,7 +164,7 @@ class IntelligentSelector {
         const context = {
             backend: summary.bestBackend || null,
-            runtimeBackend: summary.bestBackend || null,
+            runtimeBackend: summary.runtimeBackend || summary.bestBackend || null,
             ramGB: systemRAM,
             totalRamGB: systemRAM,
             hardware

package/src/ollama/client.js CHANGED Viewed

@@ -3,14 +3,11 @@ const fetch = require('../utils/fetch');
 class OllamaClient {
     constructor(baseURL = null) {
         // Support OLLAMA_HOST environment variable (standard Ollama configuration)
-        // Also support OLLAMA_URL for backwards compatibility
-        this.baseURL = baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434';
-        // Normalize URL: ensure it has protocol and remove trailing slash
-        if (!this.baseURL.startsWith('http://') && !this.baseURL.startsWith('https://')) {
-            this.baseURL = 'http://' + this.baseURL;
-        }
-        this.baseURL = this.baseURL.replace(/\/$/, '');
+        // Also support OLLAMA_BASE_URL and OLLAMA_URL for backwards compatibility
+        this.preferredBaseURL = this.normalizeBaseURL(
+            baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || process.env.OLLAMA_URL || 'http://localhost:11434'
+        );
+        this.baseURL = this.preferredBaseURL;
         this.isAvailable = null;
         this.lastCheck = 0;
@@ -18,6 +15,53 @@ class OllamaClient {
         this._pendingCheck = null;
     }
+    normalizeBaseURL(baseURL) {
+        let normalized = String(baseURL || '').trim();
+        if (!normalized.startsWith('http://') && !normalized.startsWith('https://')) {
+            normalized = 'http://' + normalized;
+        }
+        return normalized.replace(/\/$/, '');
+    }
+    buildCandidateBaseURLs(baseURL = this.preferredBaseURL) {
+        const normalized = this.normalizeBaseURL(baseURL);
+        const candidates = [normalized];
+        try {
+            const parsed = new URL(normalized);
+            if (parsed.hostname === 'localhost') {
+                const ipv4 = new URL(parsed.toString());
+                ipv4.hostname = '127.0.0.1';
+                candidates.push(ipv4.toString().replace(/\/$/, ''));
+                const ipv6 = new URL(parsed.toString());
+                ipv6.hostname = '::1';
+                candidates.push(ipv6.toString().replace(/\/$/, ''));
+            }
+        } catch (error) {
+            // Keep the preferred URL only if parsing fails.
+        }
+        return [...new Set(candidates)];
+    }
+    applyResolvedBaseURL(baseURL) {
+        this.baseURL = this.normalizeBaseURL(baseURL);
+        return this.baseURL;
+    }
+    isRetryableAvailabilityError(error) {
+        const message = String(error?.message || '').toLowerCase();
+        return (
+            message.includes('econnrefused') ||
+            message.includes('fetch failed') ||
+            message.includes('network') ||
+            message.includes('socket') ||
+            message.includes('connect') ||
+            error?.name === 'AbortError'
+        );
+    }
     async checkOllamaAvailability() {
         if (this.isAvailable !== null && Date.now() - this.lastCheck < this.cacheTimeout) {
@@ -38,50 +82,79 @@ class OllamaClient {
     }
     async _doAvailabilityCheck() {
+        const candidateURLs = this.buildCandidateBaseURLs();
+        const attemptedURLs = [];
+        let lastError = null;
-        try {
-            const controller = new AbortController();
-            const timeoutId = setTimeout(() => controller.abort(), 5000);
-            const response = await fetch(`${this.baseURL}/api/version`, {
-                signal: controller.signal,
-                headers: { 'Content-Type': 'application/json' }
-            });
-            clearTimeout(timeoutId);
+        for (let index = 0; index < candidateURLs.length; index += 1) {
+            const candidateBaseURL = candidateURLs[index];
+            attemptedURLs.push(candidateBaseURL);
+            try {
+                const controller = new AbortController();
+                const timeoutId = setTimeout(() => controller.abort(), 5000);
+                const response = await fetch(`${candidateBaseURL}/api/version`, {
+                    signal: controller.signal,
+                    headers: { 'Content-Type': 'application/json' }
+                });
+                clearTimeout(timeoutId);
+                if (!response.ok) {
+                    this.isAvailable = {
+                        available: false,
+                        error: 'Ollama not responding properly',
+                        attemptedURL: candidateBaseURL,
+                        attemptedURLs
+                    };
+                    this.lastCheck = Date.now();
+                    return this.isAvailable;
+                }
-            if (response.ok) {
                 const data = await response.json();
-                this.isAvailable = { available: true, version: data.version || 'unknown' };
+                this.applyResolvedBaseURL(candidateBaseURL);
+                this.isAvailable = {
+                    available: true,
+                    version: data.version || 'unknown',
+                    attemptedURL: candidateBaseURL,
+                    attemptedURLs
+                };
                 this.lastCheck = Date.now();
                 return this.isAvailable;
+            } catch (error) {
+                lastError = error;
+                if (!this.isRetryableAvailabilityError(error) || index === candidateURLs.length - 1) {
+                    break;
+                }
             }
+        }
-            this.isAvailable = { available: false, error: 'Ollama not responding properly' };
-            this.lastCheck = Date.now();
-            return this.isAvailable;
-        } catch (error) {
+        if (lastError) {
             let errorMessage;
             let hint = '';
+            const errorText = String(lastError.message || '');
+            const activeURL = attemptedURLs[attemptedURLs.length - 1] || this.preferredBaseURL;
-            if (error.message.includes('ECONNREFUSED')) {
-                errorMessage = `Ollama not running at ${this.baseURL}`;
+            if (errorText.includes('ECONNREFUSED')) {
+                errorMessage = `Ollama not running at ${activeURL}`;
                 hint = 'Make sure Ollama is running. Try: ollama serve';
-            } else if (error.message.includes('timeout') || error.name === 'AbortError') {
-                errorMessage = `Ollama connection timeout at ${this.baseURL}`;
+            } else if (errorText.includes('timeout') || lastError.name === 'AbortError') {
+                errorMessage = `Ollama connection timeout at ${activeURL}`;
                 hint = 'The server is not responding. Check if Ollama is running and accessible.';
-            } else if (error.message.includes('ENOTFOUND')) {
-                errorMessage = `Cannot resolve host: ${this.baseURL}`;
+            } else if (errorText.includes('ENOTFOUND')) {
+                errorMessage = `Cannot resolve host: ${activeURL}`;
                 hint = 'Check your OLLAMA_HOST environment variable or network configuration.';
             } else {
-                errorMessage = error.message;
+                errorMessage = errorText || 'Unknown Ollama availability error';
             }
             this.isAvailable = {
                 available: false,
                 error: errorMessage,
-                hint: hint,
-                attemptedURL: this.baseURL
+                hint,
+                attemptedURL: activeURL,
+                attemptedURLs
             };
             this.lastCheck = Date.now();
             return this.isAvailable;
@@ -400,51 +473,86 @@ class OllamaClient {
         };
     }
-    async testModelPerformance(modelName, testPrompt = "Hello, how are you?") {
+    async generate(modelName, prompt, options = {}) {
         const availability = await this.checkOllamaAvailability();
         if (!availability.available) {
             throw new Error(`Ollama not available: ${availability.error}`);
         }
+        const {
+            timeoutMs = 30000,
+            stream = false,
+            keepAlive,
+            format,
+            generationOptions = {}
+        } = options;
+        const payload = {
+            model: modelName,
+            prompt,
+            stream: Boolean(stream)
+        };
+        if (keepAlive) payload.keep_alive = keepAlive;
+        if (format) payload.format = format;
+        if (generationOptions && Object.keys(generationOptions).length > 0) {
+            payload.options = generationOptions;
+        }
         const startTime = Date.now();
         try {
             const controller = new AbortController();
-            const timeoutId = setTimeout(() => controller.abort(), 30000);
+            const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
             const response = await fetch(`${this.baseURL}/api/generate`, {
                 method: 'POST',
                 signal: controller.signal,
                 headers: { 'Content-Type': 'application/json' },
-                body: JSON.stringify({
-                    model: modelName,
-                    prompt: testPrompt,
-                    stream: false,
-                    options: {
-                        num_predict: 50 // Limitar respuesta para test rápido
-                    }
-                })
+                body: JSON.stringify(payload)
             });
             clearTimeout(timeoutId);
             if (!response.ok) {
-                throw new Error(`Test failed: HTTP ${response.status}`);
+                const errorText = await response.text();
+                throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
             }
             const data = await response.json();
-            const endTime = Date.now();
+            const responseTime = Date.now() - startTime;
+            const speed = this.calculateTokensPerSecond(data, responseTime);
+            return {
+                ...data,
+                responseTime,
+                tokensPerSecond: speed.tokensPerSecond,
+                evalTokensPerSecond: speed.evalTokensPerSecond,
+                endToEndTokensPerSecond: speed.endToEndTokensPerSecond
+            };
+        } catch (error) {
+            throw new Error(`Failed to run generate request: ${error.message}`);
+        }
+    }
-            const totalTime = endTime - startTime;
+    async testModelPerformance(modelName, testPrompt = "Hello, how are you?") {
+        const startTime = Date.now();
+        try {
+            const data = await this.generate(modelName, testPrompt, {
+                timeoutMs: 30000,
+                generationOptions: {
+                    num_predict: 50
+                }
+            });
             const tokensGenerated = Number(data.eval_count) || 0;
-            const speed = this.calculateTokensPerSecond(data, totalTime);
             return {
                 success: true,
-                responseTime: totalTime,
-                tokensPerSecond: speed.tokensPerSecond,
-                evalTokensPerSecond: speed.evalTokensPerSecond,
-                endToEndTokensPerSecond: speed.endToEndTokensPerSecond,
+                responseTime: data.responseTime,
+                tokensPerSecond: data.tokensPerSecond,
+                evalTokensPerSecond: data.evalTokensPerSecond,
+                endToEndTokensPerSecond: data.endToEndTokensPerSecond,
                 tokensGenerated,
                 loadTime: data.load_duration ? Math.round(data.load_duration / 1000000) : null,
                 evalTime: data.eval_duration ? Math.round(data.eval_duration / 1000000) : null,

package/src/utils/config.js CHANGED Viewed

@@ -16,7 +16,7 @@ class ConfigManager {
         return {
             version: "2.0",
             ollama: {
-                baseURL: process.env.OLLAMA_BASE_URL || "http://localhost:11434",
+                baseURL: process.env.OLLAMA_HOST || process.env.OLLAMA_BASE_URL || "http://localhost:11434",
                 timeout: 30000,
                 enabled: true,
                 autoDetect: true,
@@ -176,7 +176,9 @@ class ConfigManager {
         }
         // Ollama overrides
-        if (process.env.OLLAMA_BASE_URL) {
+        if (process.env.OLLAMA_HOST) {
+            this.config.ollama.baseURL = process.env.OLLAMA_HOST;
+        } else if (process.env.OLLAMA_BASE_URL) {
             this.config.ollama.baseURL = process.env.OLLAMA_BASE_URL;
         }
@@ -356,4 +358,4 @@ class ConfigManager {
     }
 }
-module.exports = ConfigManager;
+module.exports = ConfigManager;