npm - loki-mode - Versions diffs - 7.63.1 → 7.65.0 - Mend

loki-mode 7.63.1 → 7.65.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/SKILL.md +2 -2
package/VERSION +1 -1
package/autonomy/app-runner.sh +110 -1
package/dashboard/__init__.py +1 -1
package/dashboard/server.py +81 -18
package/docs/INSTALLATION.md +2 -2
package/loki-ts/dist/loki.js +2 -2
package/mcp/__init__.py +1 -1
package/mcp/server.py +27 -3
package/memory/consolidation.py +22 -3
package/memory/engine.py +157 -107
package/memory/retrieval.py +105 -41
package/memory/storage.py +131 -40
package/memory/token_economics.py +38 -9
package/package.json +1 -1
package/plugins/loki-mode/.claude-plugin/plugin.json +1 -1

package/memory/engine.py CHANGED Viewed

@@ -332,60 +332,75 @@ class MemoryEngine:
         real topics immediately after a session ends.
         """
         try:
-            index = self.storage.read_json("index.json") or {
-                "version": "1.1.0",
-                "topics": [],
-                "total_memories": 0,
-            }
-            context = episode.get("context", {}) if isinstance(episode.get("context"), dict) else {}
-            phase = (context.get("phase") or episode.get("phase") or "general").lower()
-            goal = (context.get("goal") or episode.get("goal") or "")[:200]
-            # Topic id = phase. Multiple episodes in the same phase share a topic.
-            topic_id = phase or "general"
-            now = datetime.now(timezone.utc).isoformat()
-            episode_id = episode.get("id")
-            cost = float(episode.get("cost_usd", 0) or 0)
-            tokens = int(episode.get("tokens_used", 0) or 0)
-            files = list(episode.get("files_modified", []) or [])
-            found = None
-            for topic in index.get("topics", []):
-                if topic.get("id") == topic_id:
-                    found = topic
-                    break
-            if found is None:
-                index.setdefault("topics", []).append({
-                    "id": topic_id,
-                    "summary": goal or f"Activity in phase {topic_id}",
-                    "episode_ids": [episode_id] if episode_id else [],
-                    "episode_count": 1,
-                    "total_cost_usd": cost,
-                    "total_tokens": tokens,
-                    "files_touched": files[:20],
-                    "first_seen": now,
-                    "last_accessed": now,
-                    "relevance_score": 0.5,
-                })
-                index["total_memories"] = index.get("total_memories", 0) + 1
-            else:
-                # Only count a given episode once. On resume/checkpoint the same
-                # trace id can be re-saved; without this guard episode_count,
-                # total_cost_usd, and total_tokens would inflate on every re-save
-                # even though episode_ids is already de-duplicated.
-                if episode_id and episode_id not in found.get("episode_ids", []):
-                    found.setdefault("episode_ids", []).append(episode_id)
-                    found["episode_count"] = found.get("episode_count", 0) + 1
-                    found["total_cost_usd"] = float(found.get("total_cost_usd", 0) or 0) + cost
-                    found["total_tokens"] = int(found.get("total_tokens", 0) or 0) + tokens
-                merged = set(found.get("files_touched", []) or []) | set(files[:20])
-                found["files_touched"] = sorted(merged)[:50]
-                found["last_accessed"] = now
-            index["last_updated"] = now
-            self.storage.write_json("index.json", index)
+            # H4 lost-update fix (wave-6): hold ONE exclusive lock spanning the
+            # full read-modify-write of index.json. _file_lock is reentrant per
+            # thread (storage._held_locks is threading.local) and cross-process
+            # safe (fcntl.flock), so the inner read_json/write_json calls -- which
+            # re-enter _file_lock on the SAME resolved path -- are no-ops and do
+            # not deadlock. The lock target is derived from storage._resolve_path
+            # so its string key is byte-identical to the one read_json/write_json
+            # compute internally (mismatched keys would self-deadlock).
+            index_lock = Path(self.storage._resolve_path("index.json"))
+            with self.storage._file_lock(index_lock, exclusive=True):
+                index = self.storage.read_json("index.json") or {
+                    "version": "1.1.0",
+                    "topics": [],
+                    "total_memories": 0,
+                }
+                context = episode.get("context", {}) if isinstance(episode.get("context"), dict) else {}
+                phase = (context.get("phase") or episode.get("phase") or "general").lower()
+                goal = (context.get("goal") or episode.get("goal") or "")[:200]
+                # Topic id = phase. Multiple episodes in the same phase share a topic.
+                topic_id = phase or "general"
+                now = datetime.now(timezone.utc).isoformat()
+                episode_id = episode.get("id")
+                cost = float(episode.get("cost_usd", 0) or 0)
+                tokens = int(episode.get("tokens_used", 0) or 0)
+                files = list(episode.get("files_modified", []) or [])
+                found = None
+                for topic in index.get("topics", []):
+                    if topic.get("id") == topic_id:
+                        found = topic
+                        break
+                if found is None:
+                    index.setdefault("topics", []).append({
+                        "id": topic_id,
+                        "summary": goal or f"Activity in phase {topic_id}",
+                        "episode_ids": [episode_id] if episode_id else [],
+                        "episode_count": 1,
+                        "total_cost_usd": cost,
+                        "total_tokens": tokens,
+                        "files_touched": files[:20],
+                        "first_seen": now,
+                        "last_accessed": now,
+                        "relevance_score": 0.5,
+                    })
+                    index["total_memories"] = index.get("total_memories", 0) + 1
+                else:
+                    # Only count a given episode once. On resume/checkpoint the same
+                    # trace id can be re-saved; without this guard episode_count,
+                    # total_cost_usd, and total_tokens would inflate on every re-save
+                    # even though episode_ids is already de-duplicated.
+                    if episode_id and episode_id not in found.get("episode_ids", []):
+                        found.setdefault("episode_ids", []).append(episode_id)
+                        found["episode_count"] = found.get("episode_count", 0) + 1
+                        found["total_cost_usd"] = float(found.get("total_cost_usd", 0) or 0) + cost
+                        found["total_tokens"] = int(found.get("total_tokens", 0) or 0) + tokens
+                    merged = set(found.get("files_touched", []) or []) | set(files[:20])
+                    found["files_touched"] = sorted(merged)[:50]
+                    found["last_accessed"] = now
+                index["last_updated"] = now
+                self.storage.write_json("index.json", index)
         except Exception:  # noqa: BLE001
-            # Never let index update break episode storage.
-            pass
+            # Never let index update break episode storage, but make the
+            # failure observable instead of swallowing it silently (L2).
+            logger.warning(
+                "Failed to update index.json with episode %s",
+                episode.get("id"),
+                exc_info=True,
+            )
     def get_episode(self, episode_id: str) -> Optional[EpisodeTrace]:
         """
@@ -522,8 +537,13 @@ class MemoryEngine:
         for pattern in patterns_data.get("patterns", []):
             if not isinstance(pattern, dict):
                 continue
-            # Filter by confidence
-            if pattern.get("confidence", 0) < min_confidence:
+            # Filter by confidence. Guard against an explicit null confidence
+            # (corrupt/hand-edited record): None < float raises TypeError in
+            # Python 3, so treat a null as 0 (filtered out unless threshold 0).
+            pattern_confidence = pattern.get("confidence")
+            if pattern_confidence is None:
+                pattern_confidence = 0
+            if pattern_confidence < min_confidence:
                 continue
             # Filter by category if specified
@@ -550,8 +570,10 @@ class MemoryEngine:
         if pattern_data is None:
             return
-        # Update fields
-        pattern_data["usage_count"] = pattern_data.get("usage_count", 0) + 1
+        # Update fields. `or 0` guards against an explicit null usage_count
+        # (corrupt/hand-edited record) crashing the increment with a TypeError;
+        # a null and 0 are equivalent here so `or` is safe.
+        pattern_data["usage_count"] = (pattern_data.get("usage_count") or 0) + 1
         pattern_data["last_used"] = datetime.now(timezone.utc).isoformat()
         # Write back via save_pattern which holds an exclusive lock during
@@ -577,9 +599,24 @@ class MemoryEngine:
         skill_id = skill_dict.get("id", f"skill-{self._generate_id()}")
         skill_dict["id"] = skill_id
-        # Generate filename from skill name or ID
-        skill_name = skill_dict.get("name", skill_id)
-        filename = skill_name.lower().replace(" ", "-").replace("_", "-")
+        # Generate filename from skill name or ID.
+        # H3 path-traversal fix (wave-6): the previous filename derivation only
+        # replaced spaces and underscores, so a skill name like
+        # "../../../tmp/pwned" kept its "/" and ".." and escaped the memory root
+        # via the raw open(skill_path, "w") below (which bypasses _resolve_path).
+        # Sanitize to safe chars only, matching storage.save_skill's house style,
+        # and fall back to the skill id when sanitization collapses to empty.
+        skill_name = skill_dict.get("name") or skill_id
+        normalized = skill_name.lower().replace(" ", "-").replace("_", "-")
+        filename = "".join(
+            c if (c.isalnum() or c == "-") else "-"
+            for c in normalized
+        ).strip("-")
+        if not filename:
+            filename = "".join(
+                c if (c.isalnum() or c == "-") else "-"
+                for c in skill_id.lower()
+            ).strip("-") or "skill"
         # Store as markdown
         content = self._skill_to_markdown(skill_dict)
@@ -899,57 +936,65 @@ class MemoryEngine:
         context = episode.get("context", {})
         action_entry = {
             "timestamp": episode.get("timestamp", datetime.now(timezone.utc).isoformat()),
-            "action": context.get("goal", "Task completed")[:100],
+            "action": (context.get("goal") or "Task completed")[:100],
             "outcome": episode.get("outcome", "unknown"),
-            "topic_id": context.get("phase", "general"),
+            "topic_id": context.get("phase") or "general",
         }
         self.storage.update_timeline(action_entry)
     def _update_index_with_pattern(self, pattern: Dict[str, Any]) -> None:
         """Update index with pattern topic."""
-        index = self.storage.read_json("index.json") or {
-            "version": "1.0",
-            "topics": [],
-            "total_memories": 0,
-            "total_tokens_available": 0,
-        }
+        # H4 lost-update fix (wave-6): hold ONE exclusive lock spanning the full
+        # read-modify-write of index.json so concurrent store_pattern (and
+        # store_episode) calls cannot clobber each other. See the matching note
+        # in _update_index_with_episode for why the lock target is derived from
+        # storage._resolve_path and why the inner read_json/write_json calls do
+        # not deadlock (reentrant per-thread, cross-process safe via flock).
+        index_lock = Path(self.storage._resolve_path("index.json"))
+        with self.storage._file_lock(index_lock, exclusive=True):
+            index = self.storage.read_json("index.json") or {
+                "version": "1.0",
+                "topics": [],
+                "total_memories": 0,
+                "total_tokens_available": 0,
+            }
-        category = pattern.get("category", "general")
-        # An index.json that is valid JSON but missing the "topics" key (e.g.
-        # written by an older/partial writer, or hand-edited) would crash here
-        # on index["topics"] because the `or {...}` default only fires when the
-        # whole file is falsy. setdefault matches the defensive pattern used in
-        # the sibling _update_index_with_episode.
-        topics = index.setdefault("topics", [])
-        # Find or create topic
-        topic_found = False
-        for topic in topics:
-            if topic.get("id") == category:
-                topic["last_accessed"] = datetime.now(timezone.utc).isoformat()
-                topic["relevance_score"] = max(
-                    topic.get("relevance_score", 0.5),
-                    pattern.get("confidence", 0.5),
-                )
-                topic_found = True
-                break
+            category = pattern.get("category", "general")
+            # An index.json that is valid JSON but missing the "topics" key (e.g.
+            # written by an older/partial writer, or hand-edited) would crash here
+            # on index["topics"] because the `or {...}` default only fires when the
+            # whole file is falsy. setdefault matches the defensive pattern used in
+            # the sibling _update_index_with_episode.
+            topics = index.setdefault("topics", [])
+            # Find or create topic
+            topic_found = False
+            for topic in topics:
+                if topic.get("id") == category:
+                    topic["last_accessed"] = datetime.now(timezone.utc).isoformat()
+                    topic["relevance_score"] = max(
+                        topic.get("relevance_score", 0.5),
+                        pattern.get("confidence", 0.5),
+                    )
+                    topic_found = True
+                    break
-        if not topic_found:
-            topics.append({
-                "id": category,
-                "summary": f"Patterns for {category}",
-                "relevance_score": pattern.get("confidence", 0.5),
-                "last_accessed": datetime.now(timezone.utc).isoformat(),
-                "token_count": len(json.dumps(pattern)) // 4,
-            })
+            if not topic_found:
+                topics.append({
+                    "id": category,
+                    "summary": f"Patterns for {category}",
+                    "relevance_score": pattern.get("confidence", 0.5),
+                    "last_accessed": datetime.now(timezone.utc).isoformat(),
+                    "token_count": len(json.dumps(pattern)) // 4,
+                })
-        index["last_updated"] = datetime.now(timezone.utc).isoformat()
-        if not topic_found:
-            index["total_memories"] = index.get("total_memories", 0) + 1
+            index["last_updated"] = datetime.now(timezone.utc).isoformat()
+            if not topic_found:
+                index["total_memories"] = index.get("total_memories", 0) + 1
-        self.storage.write_json("index.json", index)
+            self.storage.write_json("index.json", index)
     def _search_episode(self, episode_id: str) -> Optional[EpisodeTrace]:
         """Search for episode across all date directories."""
@@ -1190,9 +1235,13 @@ class MemoryEngine:
         Detect task type from context.
         Uses keyword matching based on goal, action, and phase.
         """
-        goal = context.get("goal", "").lower()
-        action = context.get("action_type", "").lower()
-        phase = context.get("phase", "").lower()
+        # M3 None-guard (wave-6): an explicit null value (e.g. {"goal": None})
+        # makes context.get("goal", "") return None, so None.lower() crashed.
+        # The retrieval.py copy was fixed in v7.61.0; this engine.py copy was
+        # the missed sibling. Coalesce to "" before calling string methods.
+        goal = (context.get("goal") or "").lower()
+        action = (context.get("action_type") or "").lower()
+        phase = (context.get("phase") or "").lower()
         signals = {
             "exploration": {
@@ -1277,7 +1326,8 @@ class MemoryEngine:
             episodes = self.get_recent_episodes(limit=50)
             for ep in episodes:
                 ep_dict = ep.to_dict() if hasattr(ep, "to_dict") else ep.__dict__.copy()
-                goal = ep_dict.get("context", {}).get("goal", "").lower()
+                ep_context = ep_dict.get("context") or {}
+                goal = (ep_context.get("goal") or "").lower()
                 score = sum(1 for kw in keywords if kw in goal)
                 if score > 0:
                     ep_dict["_score"] = score
@@ -1288,7 +1338,7 @@ class MemoryEngine:
             patterns = self.find_patterns(min_confidence=0.3)
             for pattern in patterns:
                 p_dict = pattern.to_dict() if hasattr(pattern, "to_dict") else pattern.__dict__.copy()
-                pattern_text = p_dict.get("pattern", "").lower()
+                pattern_text = (p_dict.get("pattern") or "").lower()
                 score = sum(1 for kw in keywords if kw in pattern_text)
                 if score > 0:
                     p_dict["_score"] = score
@@ -1299,8 +1349,8 @@ class MemoryEngine:
             skills = self.list_skills()
             for skill in skills:
                 s_dict = skill.to_dict() if hasattr(skill, "to_dict") else skill.__dict__.copy()
-                name = s_dict.get("name", "").lower()
-                desc = s_dict.get("description", "").lower()
+                name = (s_dict.get("name") or "").lower()
+                desc = (s_dict.get("description") or "").lower()
                 score = sum(1 for kw in keywords if kw in name or kw in desc)
                 if score > 0:
                     s_dict["_score"] = score

package/memory/retrieval.py CHANGED Viewed

@@ -940,8 +940,11 @@ class MemoryRetrieval:
         Returns:
             Weighted score incorporating importance
         """
-        source = result.get("_source", "")
-        base_score = result.get("_score", 0.5)
+        source = result.get("_source") or ""
+        # _score is set internally so null is unlikely, but guard for
+        # uniformity since it feeds the arithmetic below.
+        base_score = result.get("_score")
+        base_score = 0.5 if base_score is None else base_score
         # Map source to weight key
         weight_key = source
@@ -950,11 +953,17 @@ class MemoryRetrieval:
         weight = weights.get(weight_key, 0.0)
-        # Get importance score (default 0.5 if not set)
-        importance = result.get("importance", 0.5)
+        # Get importance score (default 0.5 if not set). Defensive: a
+        # corrupt/hand-edited record may carry importance=null, which would
+        # raise TypeError in the arithmetic below. Use the default only when
+        # missing/null; a legitimate 0.0 is preserved.
+        importance = result.get("importance")
+        importance = 0.5 if importance is None else importance
-        # Get confidence for semantic patterns
-        confidence = result.get("confidence", 1.0)
+        # Get confidence for semantic patterns. Same null guard; default 1.0
+        # only when missing/null, a legitimate 0.0 is preserved.
+        confidence = result.get("confidence")
+        confidence = 1.0 if confidence is None else confidence
         # Combined score: relevance * task_weight * importance * confidence
         # Importance contributes 30% of the final score
@@ -1141,17 +1150,22 @@ class MemoryRetrieval:
                 selected_memories.append(topic)
             budget_remaining -= layer1_tokens
-        # Layer 2: Expand summaries for top topics
-        layer2_budget = int(token_budget * 0.4)  # Reserve 40% for summaries
-        if budget_remaining > layer2_budget * 0.5:
+        # Layer 2: Expand summaries for top topics.
+        # Gate on the remaining budget (not a fraction of the layer-2 reserve)
+        # and trim the summary set to fit via optimize_context, mirroring
+        # Layer 3 below. Previously this admitted summaries all-or-nothing: a
+        # set that exceeded budget_remaining was dropped entirely, and the gate
+        # compared against layer2_budget*0.5 (a fraction of the reserve) rather
+        # than the budget actually left.
+        if budget_remaining > 100:
             summaries = self._get_topic_summaries(relevant_topics[:5], query, weights)
-            layer2_tokens = sum(estimate_memory_tokens(s) for s in summaries)
+            for summary in summaries:
+                summary["_layer"] = 2
-            if layer2_tokens <= budget_remaining:
-                for summary in summaries:
-                    summary["_layer"] = 2
-                    selected_memories.append(summary)
-                budget_remaining -= layer2_tokens
+            # Optimize to fit remaining budget (trimmed set, not all-or-nothing)
+            optimized = optimize_context(summaries, budget_remaining)
+            selected_memories.extend(optimized)
+            budget_remaining -= sum(estimate_memory_tokens(s) for s in optimized)
         # Layer 3: Full details for highest priority items
         if budget_remaining > 100:  # At least 100 tokens remaining
@@ -1189,14 +1203,36 @@ class MemoryRetrieval:
         scored_topics = []
         for topic in topics:
-            topic_name = topic.get("topic", "").lower()
-            memory_type = topic.get("type", "").lower()
+            if not isinstance(topic, dict):
+                continue
+            # The index.json writer (engine.py _stamp_topic at ~368 and
+            # store_pattern at ~978) emits topics keyed by "id" (a phase or
+            # category slug, e.g. "implementation", "auth") and "summary"
+            # (prose: the goal text or "Patterns for <category>"). It does NOT
+            # emit "topic", "type", or "last_updated". Previously this scorer
+            # read only "topic"/"type"/"last_updated", so word overlap, type
+            # weighting, and the recency boost were all silent no-ops on real
+            # data. Score against the real keys (id + summary for word overlap,
+            # id as the type/category for the strategy weight, the real recency
+            # keys), and keep the legacy "topic"/"type"/"last_updated" keys as
+            # fallbacks so any older-shape index still ranks.
+            topic_text = " ".join(
+                str(v) for v in (
+                    topic.get("summary"),
+                    topic.get("id"),
+                    topic.get("topic"),
+                ) if v
+            ).lower()
+            # The category/phase slug doubles as the memory-type weight key
+            # (the writer uses the category name as the id). Fall back to the
+            # legacy "type" key for older-shape indexes.
+            memory_type = (topic.get("id") or topic.get("type") or "").lower()
             # Calculate relevance score
             score = 0.0
             # Word overlap
-            topic_words = set(topic_name.split())
+            topic_words = set(topic_text.split())
             overlap = len(query_words & topic_words)
             score += overlap * 0.3
@@ -1204,8 +1240,11 @@ class MemoryRetrieval:
             type_weight = weights.get(memory_type, 0.1)
             score += type_weight
-            # Recency boost
-            if topic.get("last_updated"):
+            # Recency boost. The writer stamps "last_accessed"/"first_seen";
+            # "last_updated" is the legacy key.
+            if (topic.get("last_accessed")
+                    or topic.get("first_seen")
+                    or topic.get("last_updated")):
                 score += 0.1
             if score > 0:
@@ -1226,8 +1265,15 @@ class MemoryRetrieval:
         summaries = []
         for topic in topics:
-            topic_name = topic.get("topic", "")
-            memory_type = topic.get("type", "episodic")
+            if not isinstance(topic, dict):
+                continue
+            # Mirror _filter_relevant_topics: the writer emits "id"/"summary",
+            # not "topic". Fall back to the legacy "topic" key so both shapes
+            # resolve a usable name. Default type stays "episodic".
+            topic_name = (
+                topic.get("id") or topic.get("topic") or topic.get("summary") or ""
+            )
+            memory_type = topic.get("type") or "episodic"
             # Try to load summary from appropriate collection
             if memory_type == "episodic":
@@ -1426,7 +1472,12 @@ class MemoryRetrieval:
             parts.append(f"action: {context['action_type']}")
         if context.get("files"):
-            parts.append(f"files: {', '.join(context['files'][:3])}")
+            # Defensive: filter to str elements so a list carrying None or
+            # non-str entries (corrupt/hand-edited record) does not raise
+            # TypeError inside join. Mirrors the steps-join in skills search.
+            files = [f for f in context["files"][:3] if isinstance(f, str)]
+            if files:
+                parts.append(f"files: {', '.join(files)}")
         return " ".join(parts) if parts else ""
@@ -1458,13 +1509,16 @@ class MemoryRetrieval:
                 if not data:
                     continue
-                # Score based on keyword matches in goal
-                context = data.get("context", {})
-                goal = context.get("goal", "").lower()
+                # Score based on keyword matches in goal.
+                # Defensive: a corrupt or hand-edited record may carry
+                # context=null or null string fields; (x or "") avoids
+                # AttributeError on None.
+                context = data.get("context") or {}
+                goal = (context.get("goal") or "").lower()
                 score = sum(1 for kw in keywords if kw in goal)
                 # Also check phase
-                phase = context.get("phase", "").lower()
+                phase = (context.get("phase") or "").lower()
                 score += sum(0.5 for kw in keywords if kw in phase)
                 if score > 0:
@@ -1487,16 +1541,21 @@ class MemoryRetrieval:
         for pattern in patterns_data.get("patterns", []):
             if not isinstance(pattern, dict):
                 continue
-            pattern_text = pattern.get("pattern", "").lower()
-            category = pattern.get("category", "").lower()
-            correct = pattern.get("correct_approach", "").lower()
+            # Defensive: corrupt or hand-edited records may carry null
+            # string fields; (x or "") avoids AttributeError on None.
+            pattern_text = (pattern.get("pattern") or "").lower()
+            category = (pattern.get("category") or "").lower()
+            correct = (pattern.get("correct_approach") or "").lower()
             score = sum(1 for kw in keywords if kw in pattern_text)
             score += sum(0.5 for kw in keywords if kw in category)
             score += sum(0.3 for kw in keywords if kw in correct)
-            # Weight by confidence
-            confidence = pattern.get("confidence", 0.5)
+            # Weight by confidence. Defensive: a null confidence would make
+            # score *= None raise TypeError. Use 0.5 only when missing/null;
+            # a legitimate 0.0 is preserved (it correctly zeroes the score).
+            confidence = pattern.get("confidence")
+            confidence = 0.5 if confidence is None else confidence
             score *= confidence
             if score > 0:
@@ -1521,8 +1580,8 @@ class MemoryRetrieval:
             if not data:
                 continue
-            name = data.get("name", "").lower()
-            description = data.get("description", "").lower()
+            name = (data.get("name") or "").lower()
+            description = (data.get("description") or "").lower()
             steps_text = " ".join(
                 s for s in (data.get("steps") or []) if isinstance(s, str)
             ).lower()
@@ -1549,9 +1608,14 @@ class MemoryRetrieval:
         anti_data = self.storage.read_json("semantic/anti-patterns.json") or {}
         for anti in anti_data.get("anti_patterns", []):
-            what_fails = anti.get("what_fails", "").lower()
-            why = anti.get("why", "").lower()
-            prevention = anti.get("prevention", "").lower()
+            # Defensive: mirror the sibling loop below. A corrupt or
+            # hand-edited record may be a non-dict or carry null fields;
+            # the isinstance guard and (x or "") avoid AttributeError.
+            if not isinstance(anti, dict):
+                continue
+            what_fails = (anti.get("what_fails") or "").lower()
+            why = (anti.get("why") or "").lower()
+            prevention = (anti.get("prevention") or "").lower()
             score = sum(2 for kw in keywords if kw in what_fails)
             score += sum(1 for kw in keywords if kw in why)
@@ -1576,10 +1640,10 @@ class MemoryRetrieval:
                 continue
             if pat.get("category") != "anti-pattern":
                 continue
-            what_fails = (pat.get("incorrect_approach", "")
-                          or pat.get("pattern", "")).lower()
-            why = pat.get("description", "").lower()
-            prevention = pat.get("correct_approach", "").lower()
+            what_fails = (pat.get("incorrect_approach")
+                          or pat.get("pattern") or "").lower()
+            why = (pat.get("description") or "").lower()
+            prevention = (pat.get("correct_approach") or "").lower()
             score = sum(2 for kw in keywords if kw in what_fails)
             score += sum(1 for kw in keywords if kw in why)