npm - superlocalmemory - Versions diffs - 3.0.37 → 3.1.0 - Mend

superlocalmemory 3.0.37 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/package.json +1 -1
package/pyproject.toml +2 -1
package/src/superlocalmemory/cli/commands.py +96 -0
package/src/superlocalmemory/cli/main.py +13 -0
package/src/superlocalmemory/core/engine.py +63 -0
package/src/superlocalmemory/core/summarizer.py +4 -26
package/src/superlocalmemory/hooks/claude_code_hooks.py +175 -0
package/src/superlocalmemory/learning/consolidation_worker.py +289 -0
package/src/superlocalmemory/learning/signals.py +326 -0
package/src/superlocalmemory/llm/backbone.py +14 -5
package/src/superlocalmemory/mcp/resources.py +26 -1
package/src/superlocalmemory/mcp/server.py +2 -0
package/src/superlocalmemory/mcp/tools_active.py +205 -0
package/src/superlocalmemory/mcp/tools_core.py +51 -0
package/src/superlocalmemory/server/routes/behavioral.py +20 -5
package/src/superlocalmemory/server/routes/learning.py +69 -12
package/src/superlocalmemory/server/routes/stats.py +33 -5
package/src/superlocalmemory/server/routes/v3_api.py +93 -0

package/src/superlocalmemory/learning/consolidation_worker.py ADDED Viewed

@@ -0,0 +1,289 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under the MIT License - see LICENSE file
+# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
+"""Sleep-Time Consolidation Worker — background memory maintenance.
+Runs periodically (every 6 hours or on-demand) to:
+1. Decay confidence on unused facts (floor 0.1)
+2. Deduplicate near-identical facts
+3. Auto-retrain the adaptive ranker when signal threshold is met
+4. Report consolidation stats
+Inspired by: Letta's sleep-time compute, neuroscience memory consolidation.
+Part of Qualixar | Author: Varun Pratap Bhardwaj
+"""
+from __future__ import annotations
+import logging
+import sqlite3
+from datetime import datetime, timezone
+from pathlib import Path
+logger = logging.getLogger(__name__)
+class ConsolidationWorker:
+    """Background memory maintenance worker.
+    Call `run()` periodically or via dashboard button.
+    All operations are safe — they improve quality without losing data.
+    """
+    def __init__(self, memory_db: str | Path, learning_db: str | Path) -> None:
+        self._memory_db = str(memory_db)
+        self._learning_db = str(learning_db)
+    def run(self, profile_id: str, dry_run: bool = False) -> dict:
+        """Run full consolidation cycle. Returns stats."""
+        stats = {
+            "decayed": 0,
+            "deduped": 0,
+            "retrained": False,
+            "signal_count": 0,
+            "ranker_phase": 1,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+        }
+        # 1. Confidence decay on unused facts
+        try:
+            from superlocalmemory.learning.signals import LearningSignals
+            decayed = LearningSignals.decay_confidence(
+                self._memory_db, profile_id, rate=0.001,
+            )
+            stats["decayed"] = decayed
+            if not dry_run:
+                logger.info("Confidence decay: %d facts affected", decayed)
+        except Exception as exc:
+            logger.debug("Decay failed: %s", exc)
+        # 2. Deduplication (mark near-identical facts)
+        try:
+            deduped = self._deduplicate(profile_id, dry_run)
+            stats["deduped"] = deduped
+        except Exception as exc:
+            logger.debug("Dedup failed: %s", exc)
+        # 3. Generate behavioral patterns from memories
+        try:
+            patterns = self._generate_patterns(profile_id, dry_run)
+            stats["patterns_generated"] = patterns
+        except Exception as exc:
+            logger.debug("Pattern generation failed: %s", exc)
+        # 4. Check if ranker should retrain
+        try:
+            from superlocalmemory.learning.feedback import FeedbackCollector
+            collector = FeedbackCollector(Path(self._learning_db))
+            signal_count = collector.get_feedback_count(profile_id)
+            stats["signal_count"] = signal_count
+            stats["ranker_phase"] = 1 if signal_count < 50 else (2 if signal_count < 200 else 3)
+            # Auto-retrain at threshold crossings
+            if signal_count >= 200 and not dry_run:
+                retrained = self._retrain_ranker(profile_id, signal_count)
+                stats["retrained"] = retrained
+        except Exception as exc:
+            logger.debug("Retrain check failed: %s", exc)
+        return stats
+    def _deduplicate(self, profile_id: str, dry_run: bool) -> int:
+        """Find and mark near-duplicate facts.
+        Uses content similarity (exact prefix match for now).
+        Does NOT delete — marks with lower confidence.
+        """
+        try:
+            conn = sqlite3.connect(self._memory_db, timeout=10)
+            conn.execute("PRAGMA busy_timeout=5000")
+            conn.row_factory = sqlite3.Row
+            rows = conn.execute(
+                "SELECT fact_id, content FROM atomic_facts "
+                "WHERE profile_id = ? ORDER BY created_at",
+                (profile_id,),
+            ).fetchall()
+            seen_prefixes: dict[str, str] = {}
+            duplicates = []
+            for r in rows:
+                d = dict(r)
+                prefix = d["content"][:100].strip().lower()
+                if prefix in seen_prefixes:
+                    duplicates.append(d["fact_id"])
+                else:
+                    seen_prefixes[prefix] = d["fact_id"]
+            if duplicates and not dry_run:
+                for fid in duplicates:
+                    conn.execute(
+                        "UPDATE atomic_facts SET confidence = MAX(0.1, confidence * 0.5) "
+                        "WHERE fact_id = ?",
+                        (fid,),
+                    )
+                conn.commit()
+            conn.close()
+            return len(duplicates)
+        except Exception:
+            return 0
+    def _generate_patterns(self, profile_id: str, dry_run: bool) -> int:
+        """Mine behavioral patterns from existing memories.
+        Scans all facts to detect:
+        - Tech preferences (language/framework mentions)
+        - Topic clusters (frequently discussed subjects)
+        - Temporal patterns (time-of-day activity)
+        """
+        try:
+            from superlocalmemory.learning.behavioral import BehavioralPatternStore
+            import re
+            from collections import Counter
+            conn = sqlite3.connect(self._memory_db, timeout=10)
+            conn.execute("PRAGMA busy_timeout=5000")
+            conn.row_factory = sqlite3.Row
+            facts = conn.execute(
+                "SELECT content, created_at FROM atomic_facts "
+                "WHERE profile_id = ? ORDER BY created_at DESC LIMIT 500",
+                (profile_id,),
+            ).fetchall()
+            conn.close()
+            if len(facts) < 10:
+                return 0
+            store = BehavioralPatternStore(self._learning_db)
+            generated = 0
+            # Tech preferences: detect technology mentions
+            tech_keywords = {
+                "python": "Python", "javascript": "JavaScript", "typescript": "TypeScript",
+                "react": "React", "vue": "Vue", "angular": "Angular",
+                "postgresql": "PostgreSQL", "mysql": "MySQL", "sqlite": "SQLite",
+                "docker": "Docker", "kubernetes": "Kubernetes", "aws": "AWS",
+                "azure": "Azure", "gcp": "GCP", "node": "Node.js",
+                "fastapi": "FastAPI", "django": "Django", "flask": "Flask",
+                "rust": "Rust", "go": "Go", "java": "Java",
+                "git": "Git", "npm": "npm", "pip": "pip",
+                "langchain": "LangChain", "ollama": "Ollama", "pytorch": "PyTorch",
+                "claude": "Claude", "openai": "OpenAI", "anthropic": "Anthropic",
+            }
+            tech_counts = Counter()
+            for f in facts:
+                content = dict(f)["content"].lower()
+                for keyword, label in tech_keywords.items():
+                    if keyword in content:
+                        tech_counts[label] += 1
+            for tech, count in tech_counts.most_common(15):
+                if count >= 3 and not dry_run:
+                    confidence = min(1.0, count / 20)
+                    store.record_pattern(
+                        profile_id=profile_id,
+                        pattern_type="tech_preference",
+                        data={"topic": tech, "pattern_key": tech, "value": tech,
+                               "key": "tech", "evidence": count},
+                        success_rate=confidence,
+                        confidence=confidence,
+                    )
+                    generated += 1
+            # Topic clusters: most discussed subjects
+            word_counts = Counter()
+            stopwords = frozenset({
+                "the", "is", "a", "an", "in", "on", "at", "to", "for", "of",
+                "and", "or", "not", "with", "that", "this", "was", "are", "be",
+                "has", "had", "have", "from", "by", "it", "its", "as", "but",
+            })
+            for f in facts:
+                words = re.findall(r'\b[a-zA-Z]{4,}\b', dict(f)["content"].lower())
+                for w in words:
+                    if w not in stopwords:
+                        word_counts[w] += 1
+            for topic, count in word_counts.most_common(10):
+                if count >= 5 and not dry_run:
+                    confidence = min(1.0, count / 30)
+                    store.record_pattern(
+                        profile_id=profile_id,
+                        pattern_type="interest",
+                        data={"topic": topic, "pattern_key": topic,
+                               "count": count, "evidence": count},
+                        success_rate=confidence,
+                        confidence=confidence,
+                    )
+                    generated += 1
+            # Temporal patterns: time-of-day activity
+            hour_counts = Counter()
+            for f in facts:
+                created = dict(f).get("created_at", "")
+                if "T" in created:
+                    try:
+                        hour = int(created.split("T")[1][:2])
+                        period = "morning" if 6 <= hour < 12 else (
+                            "afternoon" if 12 <= hour < 18 else (
+                                "evening" if 18 <= hour < 22 else "night"))
+                        hour_counts[period] += 1
+                    except (ValueError, IndexError):
+                        pass
+            for period, count in hour_counts.most_common():
+                if count >= 3 and not dry_run:
+                    total = sum(hour_counts.values())
+                    pct = round(count / total * 100)
+                    store.record_pattern(
+                        profile_id=profile_id,
+                        pattern_type="temporal",
+                        data={"topic": period, "pattern_key": period,
+                               "value": f"{period} ({pct}%)", "evidence": count,
+                               "key": period, "distribution": dict(hour_counts)},
+                        success_rate=pct / 100,
+                        confidence=min(1.0, count / 20),
+                    )
+                    generated += 1
+            return generated
+        except Exception as exc:
+            logger.debug("Pattern generation error: %s", exc)
+            return 0
+    def _retrain_ranker(self, profile_id: str, signal_count: int) -> bool:
+        """Retrain the adaptive ranker from accumulated feedback."""
+        try:
+            from superlocalmemory.learning.feedback import FeedbackCollector
+            from superlocalmemory.learning.ranker import AdaptiveRanker
+            collector = FeedbackCollector(Path(self._learning_db))
+            feedback = collector.get_feedback(profile_id, limit=500)
+            if len(feedback) < 200:
+                return False
+            # Build training data from feedback
+            training_data = []
+            for f in feedback:
+                label = f.get("signal_value", 0.5)
+                training_data.append({
+                    "features": {"signal_value": label},
+                    "label": label,
+                })
+            ranker = AdaptiveRanker(signal_count=signal_count)
+            trained = ranker.train(training_data)
+            if trained:
+                logger.info("Ranker retrained with %d examples (Phase 3)", len(training_data))
+            return trained
+        except Exception as exc:
+            logger.debug("Retrain failed: %s", exc)
+            return False

package/src/superlocalmemory/learning/signals.py ADDED Viewed

@@ -0,0 +1,326 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under the MIT License - see LICENSE file
+# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
+"""Zero-Cost Learning Signals — mathematical learning without LLM tokens.
+Four signal types that improve retrieval quality over time:
+1. Entropy Gap     — Surprising content gets deeper indexing.
+2. Co-Retrieval    — Memories retrieved together strengthen graph edges.
+3. Channel Credit  — Track which retrieval channel works for which query type.
+4. Confidence Lifecycle — Boost on access, decay over time.
+All signals are computed locally with zero LLM cost.
+Inspired by: Nemori (entropy), A-Mem (link evolution), RMM (citation feedback).
+Part of Qualixar | Author: Varun Pratap Bhardwaj
+"""
+from __future__ import annotations
+import logging
+import math
+import sqlite3
+import threading
+from datetime import datetime, timezone
+from pathlib import Path
+logger = logging.getLogger(__name__)
+class LearningSignals:
+    """Compute and apply zero-cost learning signals.
+    Uses the main memory.db via direct sqlite3 (no engine dependency).
+    Thread-safe via lock.
+    """
+    def __init__(self, db_path: str | Path) -> None:
+        self._db_path = str(db_path)
+        self._lock = threading.Lock()
+        self._ensure_tables()
+    # ------------------------------------------------------------------
+    # Schema
+    # ------------------------------------------------------------------
+    def _ensure_tables(self) -> None:
+        """Create learning signal tables if they don't exist."""
+        with self._lock:
+            conn = self._connect()
+            try:
+                conn.execute(
+                    "CREATE TABLE IF NOT EXISTS channel_credits ("
+                    "id INTEGER PRIMARY KEY AUTOINCREMENT, "
+                    "profile_id TEXT NOT NULL, "
+                    "query_type TEXT NOT NULL, "
+                    "channel TEXT NOT NULL, "
+                    "hits INTEGER DEFAULT 0, "
+                    "total INTEGER DEFAULT 0, "
+                    "updated_at TEXT NOT NULL)"
+                )
+                conn.execute(
+                    "CREATE UNIQUE INDEX IF NOT EXISTS idx_channel_credit_unique "
+                    "ON channel_credits(profile_id, query_type, channel)"
+                )
+                conn.execute(
+                    "CREATE TABLE IF NOT EXISTS co_retrieval_edges ("
+                    "id INTEGER PRIMARY KEY AUTOINCREMENT, "
+                    "profile_id TEXT NOT NULL, "
+                    "fact_id_a TEXT NOT NULL, "
+                    "fact_id_b TEXT NOT NULL, "
+                    "co_count INTEGER DEFAULT 1, "
+                    "updated_at TEXT NOT NULL)"
+                )
+                conn.execute(
+                    "CREATE UNIQUE INDEX IF NOT EXISTS idx_co_retrieval_unique "
+                    "ON co_retrieval_edges(profile_id, fact_id_a, fact_id_b)"
+                )
+                conn.commit()
+            finally:
+                conn.close()
+    def _connect(self) -> sqlite3.Connection:
+        conn = sqlite3.connect(self._db_path, timeout=10)
+        conn.execute("PRAGMA journal_mode=WAL")
+        conn.execute("PRAGMA busy_timeout=5000")
+        conn.row_factory = sqlite3.Row
+        return conn
+    # ------------------------------------------------------------------
+    # Signal 1: Entropy Gap (store-time)
+    # ------------------------------------------------------------------
+    @staticmethod
+    def compute_entropy_gap(
+        new_embedding: list[float],
+        cluster_embeddings: list[list[float]],
+    ) -> float:
+        """Compute how surprising new content is relative to existing cluster.
+        High gap = surprising content = should get deeper indexing.
+        Low gap = redundant content = standard indexing.
+        Returns a value in [0.0, 1.0]. >0.7 is 'surprising'.
+        """
+        if not cluster_embeddings or not new_embedding:
+            return 0.5  # neutral when no comparison available
+        similarities = []
+        for existing in cluster_embeddings:
+            sim = _cosine_sim(new_embedding, existing)
+            similarities.append(sim)
+        avg_sim = sum(similarities) / len(similarities)
+        gap = max(0.0, min(1.0, 1.0 - avg_sim))
+        return gap
+    # ------------------------------------------------------------------
+    # Signal 2: Co-Retrieval (recall-time)
+    # ------------------------------------------------------------------
+    def record_co_retrieval(
+        self, profile_id: str, fact_ids: list[str],
+    ) -> int:
+        """Record that these facts were co-retrieved.
+        All pairs of facts in the result set get their co-retrieval
+        count incremented. This strengthens implicit graph edges.
+        """
+        if len(fact_ids) < 2:
+            return 0
+        now = datetime.now(timezone.utc).isoformat()
+        pairs = []
+        for i in range(len(fact_ids)):
+            for j in range(i + 1, min(len(fact_ids), i + 5)):
+                a, b = sorted([fact_ids[i], fact_ids[j]])
+                pairs.append((profile_id, a, b, now))
+        if not pairs:
+            return 0
+        with self._lock:
+            conn = self._connect()
+            try:
+                for pid, a, b, ts in pairs:
+                    conn.execute(
+                        "INSERT INTO co_retrieval_edges "
+                        "(profile_id, fact_id_a, fact_id_b, co_count, updated_at) "
+                        "VALUES (?, ?, ?, 1, ?) "
+                        "ON CONFLICT(profile_id, fact_id_a, fact_id_b) "
+                        "DO UPDATE SET co_count = co_count + 1, updated_at = ?",
+                        (pid, a, b, ts, ts),
+                    )
+                conn.commit()
+                return len(pairs)
+            finally:
+                conn.close()
+    def get_co_retrieval_boost(
+        self, profile_id: str, fact_id: str, top_k: int = 5,
+    ) -> list[dict]:
+        """Get top co-retrieved facts for boosting."""
+        with self._lock:
+            conn = self._connect()
+            try:
+                rows = conn.execute(
+                    "SELECT fact_id_a, fact_id_b, co_count FROM co_retrieval_edges "
+                    "WHERE profile_id = ? AND (fact_id_a = ? OR fact_id_b = ?) "
+                    "ORDER BY co_count DESC LIMIT ?",
+                    (profile_id, fact_id, fact_id, top_k),
+                ).fetchall()
+                results = []
+                for r in rows:
+                    d = dict(r)
+                    other = d["fact_id_b"] if d["fact_id_a"] == fact_id else d["fact_id_a"]
+                    results.append({"fact_id": other, "co_count": d["co_count"]})
+                return results
+            finally:
+                conn.close()
+    # ------------------------------------------------------------------
+    # Signal 3: Channel Credit (recall-time)
+    # ------------------------------------------------------------------
+    def credit_channel(
+        self, profile_id: str, query_type: str, channel: str, hit: bool,
+    ) -> None:
+        """Credit a retrieval channel for a hit or miss."""
+        now = datetime.now(timezone.utc).isoformat()
+        with self._lock:
+            conn = self._connect()
+            try:
+                hit_val = 1 if hit else 0
+                conn.execute(
+                    "INSERT INTO channel_credits "
+                    "(profile_id, query_type, channel, hits, total, updated_at) "
+                    "VALUES (?, ?, ?, ?, 1, ?) "
+                    "ON CONFLICT(profile_id, query_type, channel) "
+                    "DO UPDATE SET hits = hits + ?, total = total + 1, updated_at = ?",
+                    (profile_id, query_type, channel, hit_val, now, hit_val, now),
+                )
+                conn.commit()
+            finally:
+                conn.close()
+    def get_channel_weights(
+        self, profile_id: str, query_type: str,
+    ) -> dict[str, float]:
+        """Get learned channel weights for a query type.
+        Returns weight multipliers based on historical hit rates.
+        """
+        with self._lock:
+            conn = self._connect()
+            try:
+                rows = conn.execute(
+                    "SELECT channel, hits, total FROM channel_credits "
+                    "WHERE profile_id = ? AND query_type = ? AND total >= 5",
+                    (profile_id, query_type),
+                ).fetchall()
+                if not rows:
+                    return {}
+                weights = {}
+                for r in rows:
+                    d = dict(r)
+                    rate = d["hits"] / max(d["total"], 1)
+                    weights[d["channel"]] = 0.7 + (rate * 0.8)
+                return weights
+            finally:
+                conn.close()
+    # ------------------------------------------------------------------
+    # Signal 4: Confidence Lifecycle (store + recall time)
+    # ------------------------------------------------------------------
+    @staticmethod
+    def boost_confidence(db_path: str, fact_id: str, amount: float = 0.02) -> None:
+        """Boost a fact's confidence on access. Capped at 1.0."""
+        try:
+            conn = sqlite3.connect(db_path, timeout=10)
+            conn.execute("PRAGMA busy_timeout=5000")
+            conn.execute(
+                "UPDATE atomic_facts SET confidence = MIN(1.0, confidence + ?) "
+                "WHERE fact_id = ?",
+                (amount, fact_id),
+            )
+            conn.execute(
+                "UPDATE atomic_facts SET access_count = access_count + 1 "
+                "WHERE fact_id = ?",
+                (fact_id,),
+            )
+            conn.commit()
+            conn.close()
+        except Exception:
+            pass
+    @staticmethod
+    def decay_confidence(db_path: str, profile_id: str, rate: float = 0.001) -> int:
+        """Decay confidence on unused facts. Floor: 0.1."""
+        try:
+            conn = sqlite3.connect(db_path, timeout=10)
+            conn.execute("PRAGMA busy_timeout=5000")
+            cursor = conn.execute(
+                "UPDATE atomic_facts SET confidence = MAX(0.1, confidence - ?) "
+                "WHERE profile_id = ? AND access_count = 0 "
+                "AND created_at < datetime('now', '-7 days')",
+                (rate, profile_id),
+            )
+            conn.commit()
+            affected = cursor.rowcount
+            conn.close()
+            return affected
+        except Exception:
+            return 0
+    # ------------------------------------------------------------------
+    # Stats
+    # ------------------------------------------------------------------
+    def get_signal_stats(self, profile_id: str) -> dict:
+        """Get learning signal statistics for dashboard."""
+        with self._lock:
+            conn = self._connect()
+            try:
+                co_rows = conn.execute(
+                    "SELECT COUNT(*) AS c, COALESCE(SUM(co_count), 0) AS total "
+                    "FROM co_retrieval_edges WHERE profile_id = ?",
+                    (profile_id,),
+                ).fetchone()
+                co = dict(co_rows) if co_rows else {"c": 0, "total": 0}
+                ch_rows = conn.execute(
+                    "SELECT channel, hits, total FROM channel_credits "
+                    "WHERE profile_id = ? ORDER BY total DESC",
+                    (profile_id,),
+                ).fetchall()
+                channels = {
+                    dict(r)["channel"]: {
+                        "hits": dict(r)["hits"],
+                        "total": dict(r)["total"],
+                        "rate": round(dict(r)["hits"] / max(dict(r)["total"], 1), 3),
+                    }
+                    for r in ch_rows
+                }
+                return {
+                    "co_retrieval_edges": co["c"],
+                    "co_retrieval_events": co["total"],
+                    "channel_performance": channels,
+                }
+            finally:
+                conn.close()
+def _cosine_sim(a: list[float], b: list[float]) -> float:
+    """Cosine similarity between two vectors."""
+    if len(a) != len(b) or not a:
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    norm_a = math.sqrt(sum(x * x for x in a))
+    norm_b = math.sqrt(sum(x * x for x in b))
+    if norm_a < 1e-10 or norm_b < 1e-10:
+        return 0.0
+    return dot / (norm_a * norm_b)

package/src/superlocalmemory/llm/backbone.py CHANGED Viewed

@@ -109,7 +109,7 @@ class LLMBackbone:
             host = config.api_base or os.environ.get(
                 "OLLAMA_HOST", _OLLAMA_DEFAULT_BASE,
             )
-            self._base_url = f"{host.rstrip('/')}/v1/chat/completions"
+            self._base_url = f"{host.rstrip('/')}/api/chat"
         elif self._provider == "openrouter":
             self._api_key = config.api_key or os.environ.get(
                 _ENV_KEYS.get(self._provider, ""), "",
@@ -251,13 +251,19 @@ class LLMBackbone:
     ) -> tuple[str, dict[str, str], dict]:
         messages = self._make_messages(system, prompt)
         headers = {"Content-Type": "application/json"}
+        # Native /api/chat format — NOT /v1/chat/completions.
+        # The OpenAI-compatible endpoint silently ignores options.num_ctx,
+        # causing Ollama to use the model's default (131K for llama3.1 = 30 GB).
         payload = {
             "model": self._model,
             "messages": messages,
-            "max_tokens": max_tokens,
-            "temperature": temperature,
+            "stream": False,
             "keep_alive": "30s",
-            "options": {"num_ctx": 4096},
+            "options": {
+                "num_predict": max_tokens,
+                "temperature": temperature,
+                "num_ctx": 4096,
+            },
         }
         return self._base_url, headers, payload
@@ -308,7 +314,10 @@ class LLMBackbone:
         """Extract text from provider-specific JSON response."""
         if self._provider == "anthropic":
             return data.get("content", [{}])[0].get("text", "").strip()
-        # OpenAI / Azure / Ollama share response format.
+        if self._provider == "ollama":
+            # Native /api/chat: {"message": {"content": "..."}}
+            return data.get("message", {}).get("content", "").strip()
+        # OpenAI / Azure share response format.
         choices = data.get("choices", [{}])
         return choices[0].get("message", {}).get("content", "").strip()