npm - llm-checker - Versions diffs - 3.5.11 → 3.5.13 - Mend

llm-checker 3.5.11 → 3.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +83 -17
package/bin/cli.js +40 -0
package/bin/enhanced_cli.js +384 -35
package/package.json +2 -1
package/src/ai/model-selector.js +47 -16
package/src/ai/multi-objective-selector.js +55 -9
package/src/data/model-database.js +92 -1
package/src/data/seed/README.md +8 -0
package/src/data/seed/models.db +0 -0
package/src/hardware/backends/rocm-detector.js +469 -68
package/src/hardware/unified-detector.js +69 -18
package/src/index.js +59 -8
package/src/models/ai-check-selector.js +27 -2
package/src/models/deterministic-selector.js +84 -7
package/src/ollama/client.js +121 -0
package/src/ollama/enhanced-scraper.js +40 -26
package/src/ollama/native-scraper.js +52 -27
package/src/ui/cli-theme.js +139 -24
package/src/ui/interactive-panel.js +1 -18
package/src/utils/verbose-progress.js +144 -187

package/src/hardware/unified-detector.js CHANGED Viewed

@@ -307,7 +307,10 @@ class UnifiedDetector {
         summary.dedicatedGpuCount = topology.dedicatedCount;
         summary.integratedGpuModels = topology.integratedModels;
         summary.dedicatedGpuModels = topology.dedicatedModels;
-        summary.integratedSharedMemory = topology.integratedSharedMemory;
+        summary.integratedSharedMemory = Math.max(
+            topology.integratedSharedMemory,
+            this.getPrimaryIntegratedSharedMemory(primary)
+        );
         if (!summary.gpuModel) {
             summary.gpuModel = topology.primaryModel || null;
         }
@@ -324,18 +327,70 @@ class UnifiedDetector {
         summary.runtimeBackendName = runtimeSelection.name;
         summary.hasRuntimeAssist = runtimeSelection.assisted;
-        // Effective memory for LLM loading
-        // For GPU: use VRAM; for CPU/Metal: use system RAM
-        if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
+        // Effective memory for LLM loading. Integrated ROCm/iGPU devices expose
+        // a small aperture as VRAM and a much larger shared pool for model-fit
+        // decisions, so avoid treating the aperture as dedicated VRAM.
+        if (
+            ['rocm', 'intel'].includes(primary?.type) &&
+            summary.hasIntegratedGPU &&
+            !summary.hasDedicatedGPU &&
+            summary.integratedSharedMemory > 0
+        ) {
+            summary.effectiveMemory = summary.integratedSharedMemory;
+        } else if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
             summary.effectiveMemory = summary.totalVRAM;
         } else {
             // Use 70% of system RAM for models (leave room for OS)
             summary.effectiveMemory = Math.round(summary.systemRAM * 0.7);
         }
+        summary.hardwareTier = this.classifyHardwareTierFromSummary(summary);
+        summary.bestBackendLabel = this.getBestBackendLabel(summary);
         return summary;
     }
+    getPrimaryIntegratedSharedMemory(primary) {
+        const gpus = Array.isArray(primary?.info?.gpus) ? primary.info.gpus : [];
+        return gpus
+            .filter((gpu) => gpu?.type === 'integrated')
+            .reduce((max, gpu) => {
+                const candidates = [
+                    gpu?.sharedMemory,
+                    gpu?.unifiedMemory,
+                    gpu?.memory?.shared,
+                    gpu?.memory?.total
+                ].map(Number).filter((value) => Number.isFinite(value) && value > 0);
+                return Math.max(max, ...candidates, 0);
+            }, 0);
+    }
+    classifyHardwareTierFromSummary(summary = {}) {
+        const effectiveMem = Number(summary.effectiveMemory) || 0;
+        const speed = Number(summary.speedCoefficient) || 0;
+        if (effectiveMem >= 80 && speed >= 300) return 'ultra_high';      // H100, MI300
+        if (effectiveMem >= 48 && speed >= 200) return 'very_high';       // 2x3090, 4090
+        if (effectiveMem >= 24 && speed >= 150) return 'high';            // 3090, 4090, M2 Max
+        if (effectiveMem >= 16 && speed >= 100) return 'medium_high';     // 4080, 3080, M3 Pro
+        if (effectiveMem >= 12 && speed >= 80) return 'medium';           // 3060, 4060 Ti
+        if (effectiveMem >= 8 && speed >= 50) return 'medium_low';        // 3060, M2
+        if (effectiveMem >= 6 && speed >= 30) return 'low';               // GTX 1660, iGPU
+        return 'ultra_low';                                                // CPU only
+    }
+    getBestBackendLabel(summary = {}) {
+        const backendName = summary.backendName || String(summary.bestBackend || 'cpu').toUpperCase();
+        if (
+            summary.hasRuntimeAssist &&
+            summary.runtimeBackend &&
+            summary.runtimeBackend !== summary.bestBackend
+        ) {
+            return `${backendName} + ${summary.runtimeBackendName || summary.runtimeBackend} assist`;
+        }
+        return backendName;
+    }
     summarizeGPUInventory(gpus = []) {
         const normalized = this.normalizeGpuInventory(gpus);
         const counts = new Map();
@@ -819,7 +874,11 @@ class UnifiedDetector {
         const summary = result.summary;
         // Leave headroom (2GB for GPU, 20% for RAM)
-        if (summary.bestBackend === 'cpu' || summary.bestBackend === 'metal') {
+        if (
+            summary.bestBackend === 'cpu' ||
+            summary.bestBackend === 'metal' ||
+            (summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0)
+        ) {
             return sizeGB <= (summary.effectiveMemory - 2);
         } else {
             const availableVRAM = useMultiGPU ? summary.totalVRAM : (summary.totalVRAM / summary.gpuCount);
@@ -844,19 +903,7 @@ class UnifiedDetector {
         const result = this.cache;
         if (!result) return 'unknown';
-        const summary = result.summary;
-        const effectiveMem = summary.effectiveMemory;
-        const speed = summary.speedCoefficient;
-        // Tier based on effective memory and speed
-        if (effectiveMem >= 80 && speed >= 300) return 'ultra_high';      // H100, MI300
-        if (effectiveMem >= 48 && speed >= 200) return 'very_high';       // 2x3090, 4090
-        if (effectiveMem >= 24 && speed >= 150) return 'high';            // 3090, 4090, M2 Max
-        if (effectiveMem >= 16 && speed >= 100) return 'medium_high';     // 4080, 3080, M3 Pro
-        if (effectiveMem >= 12 && speed >= 80) return 'medium';           // 3060, 4060 Ti
-        if (effectiveMem >= 8 && speed >= 50) return 'medium_low';        // 3060, M2
-        if (effectiveMem >= 6 && speed >= 30) return 'low';               // GTX 1660, iGPU
-        return 'ultra_low';                                                // CPU only
+        return result.summary?.hardwareTier || this.classifyHardwareTierFromSummary(result.summary);
     }
     /**
@@ -922,6 +969,10 @@ class UnifiedDetector {
             const gpuDesc = summary.gpuInventory || (
                 summary.isMultiGPU ? `${summary.gpuCount}x ${summary.gpuModel}` : summary.gpuModel
             );
+            if (summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0) {
+                const dedicatedLabel = summary.totalVRAM > 0 ? `, ${summary.totalVRAM}GB aperture` : '';
+                return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory${dedicatedLabel}) + ${summary.cpuModel}`;
+            }
             return `${gpuDesc} (${summary.totalVRAM}GB VRAM) + ${summary.cpuModel}`;
         }
         else if (summary.bestBackend === 'metal') {

package/src/index.js CHANGED Viewed

@@ -78,7 +78,6 @@ class LLMChecker {
             // Report hardware detection progress before platform-specific analysis
             if (this.progress) {
                 this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
-                await new Promise(resolve => setTimeout(resolve, 200)); // Small delay for demo
                 const isApple = detectedPlatform === 'darwin';
                 const memLabel = isApple ? 'unified memory' : 'RAM';
                 this.progress.substep(`Memory detected: ${hardware.memory.total}GB ${memLabel}`, true);
@@ -117,7 +116,6 @@ class LLMChecker {
         // Apple Silicon optimized analysis with unified memory consideration
         if (this.progress) {
             this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
-            await new Promise(resolve => setTimeout(resolve, 200));
             this.progress.substep(`Memory detected: ${hardware.memory.total}GB unified memory`, true);
             const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Apple Silicon GPU'}`;
             this.progress.stepComplete(summary);
@@ -131,7 +129,6 @@ class LLMChecker {
         // Windows-specific analysis with discrete GPU / iGPU handling
         if (this.progress) {
             this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
-            await new Promise(resolve => setTimeout(resolve, 200));
             this.progress.substep(`Memory detected: ${hardware.memory.total}GB RAM`, true);
             const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Integrated GPU'}`;
             this.progress.stepComplete(summary);
@@ -145,7 +142,6 @@ class LLMChecker {
         // Linux-specific analysis (similar to Windows but with Linux considerations)
         if (this.progress) {
             this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
-            await new Promise(resolve => setTimeout(resolve, 200));
             this.progress.substep(`Memory detected: ${hardware.memory.total}GB RAM`, true);
             const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'GPU'}`;
             this.progress.stepComplete(summary);
@@ -516,7 +512,7 @@ class LLMChecker {
         try {
             // 1. Obtener TODOS los modelos de la base de datos de Ollama
-            const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
+            const ollamaData = await this.loadOllamaModelData();
             const allOllamaModels = ollamaData.models || [];
             this.logger.info(`Found ${allOllamaModels.length} models in Ollama database`);
@@ -1345,9 +1341,27 @@ class LLMChecker {
     }
     getHardwareTier(hardware) {
+        const canonicalTier = hardware?.summary?.hardwareTier;
+        if (typeof canonicalTier === 'string' && canonicalTier.trim()) {
+            return canonicalTier.trim().toLowerCase().replace(/\s+/g, '_');
+        }
         return this.calculateHardwareScore(hardware).tier;
     }
+    getHardwareTierBucket(hardware) {
+        const tier = this.getHardwareTier(hardware);
+        switch (tier) {
+            case 'very_high':
+                return 'ultra_high';
+            case 'medium_high':
+                return 'high';
+            case 'medium_low':
+                return 'low';
+            default:
+                return tier;
+        }
+    }
     calculateHardwareScore(hardware) {
         const clamp = (x, a = 0, b = 1) => Math.max(a, Math.min(b, x));
@@ -2003,7 +2017,7 @@ class LLMChecker {
             score -= 15;
         }
-        const hardwareTier = this.getHardwareTier(hardware);
+        const hardwareTier = this.getHardwareTierBucket(hardware);
         switch (hardwareTier) {
             case 'ultra_high':
                 score += 15;
@@ -2412,14 +2426,51 @@ class LLMChecker {
             this.getAllModels().find(m => m.name.toLowerCase().includes(name.toLowerCase()));
     }
+    async loadSyncedOllamaModelData() {
+        const ModelDatabase = require('./data/model-database');
+        const database = new ModelDatabase();
+        try {
+            await database.initialize();
+            const models = database.getAllModelsWithVariants();
+            const stats = database.getStats();
+            if (models.length > 0) {
+                return {
+                    models,
+                    total_count: models.length,
+                    cached_at: stats.lastSync || null,
+                    source: 'ollama_sqlite_database'
+                };
+            }
+        } finally {
+            database.close();
+        }
+        return null;
+    }
+    async loadOllamaModelData() {
+        try {
+            const syncedData = await this.loadSyncedOllamaModelData();
+            if (syncedData?.models?.length > 0) {
+                return syncedData;
+            }
+        } catch (error) {
+            this.logger.warn('Synced SQLite model database unavailable, falling back to Ollama cache', { error: error.message });
+        }
+        return this.ollamaScraper.scrapeAllModels(false);
+    }
     async generateIntelligentRecommendations(hardware, options = {}) {
         try {
             this.logger.info('Generating intelligent recommendations...');
             const selectedRuntime = normalizeRuntime(options.runtime || 'ollama');
-            // Obtener todos los modelos de Ollama
-            const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
+            // Prefer the synced SQLite catalog so `llm-checker sync` updates recommendations immediately.
+            const ollamaData = await this.loadOllamaModelData();
             const allModels = ollamaData.models || [];
             if (allModels.length === 0) {

package/src/models/ai-check-selector.js CHANGED Viewed

@@ -77,8 +77,8 @@ Respond with JSON only, no additional text.`;
         // Phase 1: Get ALL available models from the 177-model Ollama database
         const hardware = await this.deterministicSelector.getHardware();
-        // Use the same large database that check command uses (177 models)
-        const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
+        // Use the same synced database that recommend/check use.
+        const ollamaData = await this.loadModelDatabase();
         const allOllamaModels = ollamaData.models || [];
         if (!silent) {
@@ -248,6 +248,31 @@ Respond with JSON only, no additional text.`;
         };
     }
+    async loadModelDatabase() {
+        try {
+            const ModelDatabase = require('../data/model-database');
+            const database = new ModelDatabase();
+            await database.initialize();
+            try {
+                const models = database.getAllModelsWithVariants();
+                if (models.length > 0) {
+                    return {
+                        models,
+                        total_count: models.length,
+                        source: 'ollama_sqlite_database'
+                    };
+                }
+            } finally {
+                database.close();
+            }
+        } catch {
+            // Fall through to scraper cache.
+        }
+        return this.ollamaScraper.scrapeAllModels(false);
+    }
     /**
      * Pick the best installed evaluator model
      */

package/src/models/deterministic-selector.js CHANGED Viewed

@@ -44,11 +44,16 @@ class DeterministicModelSelector {
         this.familyBumps = {
             'qwen2.5': 2,
             'qwen3': 4,
+            'gemma3': 3,
             'deepseek': 3,
+            'deepseek-r1': 5,
+            'deepseek-coder': 4,
             'mistral': 1,
             'llama3.1': 1,
             'llama3.2': 2,
             'gemma2': 1,
+            'yi': -3,
+            'yi-coder': 1,
             'phi-3': 0,
             'granite': 0,
             'solar': 0,
@@ -750,7 +755,13 @@ class DeterministicModelSelector {
         if (ollamaModel.primary_category === 'reasoning') derivedTags.add('reasoning');
         if (ollamaModel.primary_category === 'creative') derivedTags.add('creative');
-        return variants.map((variant) => {
+        const hasConcreteVariants = variants.some((variant) => this.variantHasConcreteSizeOrParams(variant));
+        const selectableVariants = hasConcreteVariants
+            ? variants.filter((variant) => this.variantHasConcreteSizeOrParams(variant))
+            : variants;
+        return selectableVariants
+            .map((variant) => {
             const variantTag = variant.tag || fallbackTag;
             const quant = this.resolveVariantQuantization(variant, variantTag);
             const paramsB = this.resolveVariantParamsB(ollamaModel, variant, quant);
@@ -821,6 +832,8 @@ class DeterministicModelSelector {
                 modalities,
                 tags: modelTags,
                 model_identifier: variantTag,
+                last_updated: ollamaModel.last_updated || ollamaModel.lastUpdated || '',
+                updated_at: ollamaModel.updated_at || ollamaModel.updatedAt || '',
                 installed: false,
                 pulls: ollamaModel.actual_pulls || ollamaModel.pulls || 0,
                 availableQuantizations,
@@ -842,6 +855,28 @@ class DeterministicModelSelector {
         });
     }
+    variantHasConcreteSizeOrParams(variant = {}) {
+        const params = this.extractParamsFromString(
+            variant.params_b,
+            variant.paramsB,
+            variant.parameter_size,
+            variant.size,
+            variant.tag,
+            variant.label,
+            variant.name
+        );
+        if (Number.isFinite(params) && params > 0) return true;
+        const artifactSize = Number(
+            variant.real_size_gb ??
+            variant.estimated_size_gb ??
+            variant.size_gb ??
+            NaN
+        );
+        return Number.isFinite(artifactSize) && artifactSize > 0;
+    }
     parseBillionsValue(rawValue) {
         return parseMoEBillionsValue(rawValue);
     }
@@ -861,7 +896,26 @@ class DeterministicModelSelector {
     parseDateSafe(value) {
         if (!value || typeof value !== 'string') return null;
-        const parsed = new Date(value);
+        const normalized = value.trim();
+        const relativeMatch = normalized.match(/^(\d+)\s*(minutes?|hours?|days?|weeks?|months?|years?)\s+ago$/i);
+        if (relativeMatch) {
+            const amount = parseInt(relativeMatch[1], 10);
+            const unit = relativeMatch[2].toLowerCase();
+            const days =
+                unit.startsWith('minute') ? amount / (24 * 60) :
+                unit.startsWith('hour') ? amount / 24 :
+                unit.startsWith('day') ? amount :
+                unit.startsWith('week') ? amount * 7 :
+                unit.startsWith('month') ? amount * 30 :
+                unit.startsWith('year') ? amount * 365 :
+                null;
+            if (Number.isFinite(days)) {
+                return new Date(Date.now() - days * 24 * 60 * 60 * 1000);
+            }
+        }
+        const parsed = new Date(normalized);
         if (Number.isNaN(parsed.getTime())) return null;
         return parsed;
     }
@@ -912,8 +966,7 @@ class DeterministicModelSelector {
             model.updatedAt,
             model.release_date,
             model.released_at,
-            model.created_at,
-            model.detailed_scraped_at
+            model.created_at
         ];
         const updatedAt = dateCandidates
@@ -1027,6 +1080,9 @@ class DeterministicModelSelector {
             const regex = /(\d+\.?\d*)\s*([BbMm])/g;
             for (const match of value.matchAll(regex)) {
+                const suffix = value.slice(match.index + match[0].length, match.index + match[0].length + 2);
+                if (/^\s*b\b/i.test(suffix) || /^\s*[gk]b\b/i.test(suffix)) continue;
                 const amount = parseFloat(match[1]);
                 const unit = match[2].toUpperCase();
                 pushCandidate(unit === 'M' ? amount / 1000 : amount);
@@ -1103,7 +1159,7 @@ class DeterministicModelSelector {
             ollamaModel.parameter_count
         );
         if (metadataCandidates.length > 0) {
-            return Math.max(...metadataCandidates);
+            return metadataCandidates[0];
         }
         const artifactSizeGB = this.extractVariantSizeGB(variant, null);
@@ -1136,7 +1192,7 @@ class DeterministicModelSelector {
     }
     extractVariantSizeGB(variant, paramsB) {
-        const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? NaN);
+        const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? variant.size_gb ?? NaN);
         if (Number.isFinite(candidate) && candidate > 0) return candidate;
         if (!Number.isFinite(paramsB) || paramsB <= 0) return 0.5;
         return Math.max(0.5, Math.round((paramsB * 0.58 + 0.5) * 10) / 10);
@@ -1207,11 +1263,14 @@ class DeterministicModelSelector {
         if (name.includes('qwen2.5')) return 'qwen2.5';
         if (name.includes('qwen3')) return 'qwen3';
         if (name.includes('qwen')) return 'qwen2.5';
+        if (name.includes('deepseek-r1')) return 'deepseek-r1';
+        if (name.includes('deepseek-coder')) return 'deepseek-coder';
         if (name.includes('deepseek')) return 'deepseek';
         if (name.includes('llama3.2') || name.includes('llama3.3')) return 'llama3.2';
         if (name.includes('llama3.1')) return 'llama3.1';
         if (name.includes('llama')) return 'llama';
         if (name.includes('mistral')) return 'mistral';
+        if (name.includes('gemma3')) return 'gemma3';
         if (name.includes('gemma')) return 'gemma2';
         if (name.includes('phi')) return 'phi-3';
         if (name.includes('llava')) return 'llava';
@@ -1219,6 +1278,8 @@ class DeterministicModelSelector {
         if (name.includes('solar')) return 'solar';
         if (name.includes('starcoder')) return 'starcoder';
         if (name.includes('minicpm')) return 'minicpm';
+        if (name.includes('yi-coder')) return 'yi-coder';
+        if (name.includes('yi')) return 'yi';
         return 'unknown';
     }
@@ -1351,7 +1412,9 @@ class DeterministicModelSelector {
         const hardware = this.normalizeHardwareProfile(detectedHardware);
         const installed = Array.isArray(installedModels) ? installedModels : await this.getInstalledModels();
         const externalPool = Array.isArray(modelPool) && modelPool.length > 0
-            ? this.normalizeExternalModels(modelPool)
+            ? (modelPool.some(model => typeof model?.paramsB === 'number' && model?.model_identifier)
+                ? modelPool
+                : this.normalizeExternalModels(modelPool))
             : await this.loadModelPool();
         if (!silent) {
@@ -1445,6 +1508,10 @@ class DeterministicModelSelector {
     filterByCategory(models, category) {
         return models.filter(model => {
+            if (this.isCloudVariantTag(model.model_identifier || model.name)) {
+                return false;
+            }
             switch (category) {
                 case 'coding':
                     return model.tags.some(tag => ['coder', 'code', 'instruct'].includes(tag)) ||
@@ -1682,6 +1749,12 @@ class DeterministicModelSelector {
         // Freshness/deprecation adjustment
         const freshnessAdjustment = this.calculateFreshnessAdjustment(model);
         Q += freshnessAdjustment;
+        const pulls = Number(model.pulls || model.actual_pulls || 0);
+        if (pulls >= 100000000) Q += 4;
+        else if (pulls >= 20000000) Q += 3;
+        else if (pulls >= 5000000) Q += 2;
+        else if (pulls >= 1000000) Q += 1;
         // Task alignment bump
         const taskBump = this.getTaskAlignmentBump(model, category);
@@ -2141,6 +2214,10 @@ class DeterministicModelSelector {
     mapHardwareTier(hardware = {}) {
         const summary = hardware?.summary || {};
+        const canonicalTier = summary.hardwareTier || summary.hardware_tier;
+        if (typeof canonicalTier === 'string' && canonicalTier.trim()) {
+            return canonicalTier.trim().toLowerCase().replace(/\s+/g, '_');
+        }
         const effectiveMemory = Number(summary.effectiveMemory);
         const speedCoefficient = Number(summary.speedCoefficient);
         if (Number.isFinite(effectiveMemory) && effectiveMemory > 0 && Number.isFinite(speedCoefficient)) {

package/src/ollama/client.js CHANGED Viewed

@@ -668,6 +668,127 @@ class OllamaClient {
             throw new Error(`Failed to run chat request: ${error.message}`);
         }
     }
+    async streamChat(modelName, messages, options = {}, onChunk = null) {
+        const availability = await this.checkOllamaAvailability();
+        if (!availability.available) {
+            throw new Error(`Ollama not available: ${availability.error}`);
+        }
+        const {
+            tools,
+            format,
+            keepAlive,
+            timeoutMs = 120000,
+            generationOptions = {}
+        } = options;
+        const payload = {
+            model: modelName,
+            messages: Array.isArray(messages) ? messages : [],
+            stream: true
+        };
+        if (Array.isArray(tools) && tools.length > 0) payload.tools = tools;
+        if (format) payload.format = format;
+        if (keepAlive) payload.keep_alive = keepAlive;
+        if (generationOptions && Object.keys(generationOptions).length > 0) {
+            payload.options = generationOptions;
+        }
+        const startTime = Date.now();
+        const controller = new AbortController();
+        const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
+        try {
+            const response = await fetch(`${this.baseURL}/api/chat`, {
+                method: 'POST',
+                signal: controller.signal,
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify(payload)
+            });
+            if (!response.ok) {
+                const errorText = await response.text();
+                throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
+            }
+            const decoder = new TextDecoder();
+            let buffer = '';
+            let content = '';
+            let finalData = null;
+            const handleLine = (line) => {
+                if (!line.trim()) return;
+                const data = JSON.parse(line);
+                const chunk = data?.message?.content || '';
+                if (chunk) {
+                    content += chunk;
+                    if (typeof onChunk === 'function') {
+                        onChunk(chunk, data);
+                    }
+                }
+                if (data.done) {
+                    finalData = data;
+                }
+            };
+            if (response.body && typeof response.body.getReader === 'function') {
+                const reader = response.body.getReader();
+                while (true) {
+                    const { done, value } = await reader.read();
+                    if (done) break;
+                    buffer += decoder.decode(value, { stream: true });
+                    const lines = buffer.split('\n');
+                    buffer = lines.pop() || '';
+                    for (const line of lines) {
+                        handleLine(line);
+                    }
+                }
+            } else if (response.body && typeof response.body[Symbol.asyncIterator] === 'function') {
+                for await (const value of response.body) {
+                    buffer += decoder.decode(value, { stream: true });
+                    const lines = buffer.split('\n');
+                    buffer = lines.pop() || '';
+                    for (const line of lines) {
+                        handleLine(line);
+                    }
+                }
+            } else {
+                throw new Error('Streaming response body is not readable');
+            }
+            buffer += decoder.decode();
+            if (buffer.trim()) {
+                handleLine(buffer);
+            }
+            const responseTime = Date.now() - startTime;
+            const speed = this.calculateTokensPerSecond(finalData || {}, responseTime);
+            return {
+                ...(finalData || {}),
+                message: {
+                    role: 'assistant',
+                    content
+                },
+                response: content,
+                responseTime,
+                tokensPerSecond: speed.tokensPerSecond,
+                evalTokensPerSecond: speed.evalTokensPerSecond,
+                endToEndTokensPerSecond: speed.endToEndTokensPerSecond
+            };
+        } catch (error) {
+            throw new Error(`Failed to run streaming chat request: ${error.message}`);
+        } finally {
+            clearTimeout(timeoutId);
+        }
+    }
 }
 module.exports = OllamaClient;