npm - loki-mode - Versions diffs - 6.71.1 → 6.72.0 - Mend

loki-mode 6.71.1 → 6.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/README.md +9 -1
package/SKILL.md +2 -2
package/VERSION +1 -1
package/autonomy/hooks/migration-hooks.sh +26 -0
package/autonomy/loki +429 -92
package/autonomy/run.sh +219 -38
package/dashboard/__init__.py +1 -1
package/dashboard/server.py +101 -19
package/docs/INSTALLATION.md +20 -11
package/docs/bug-fixes/agent-01-cli-fixes.md +101 -0
package/docs/bug-fixes/agent-02-purplelab-fixes.md +88 -0
package/docs/bug-fixes/agent-03-dashboard-fixes.md +119 -0
package/docs/bug-fixes/agent-04-memory-fixes.md +105 -0
package/docs/bug-fixes/agent-05-provider-fixes.md +86 -0
package/docs/bug-fixes/agent-06-integration-fixes.md +101 -0
package/docs/bug-fixes/agent-07-dash-run-fixes.md +101 -0
package/docs/bug-fixes/agent-08-docker-fixes.md +164 -0
package/docs/bug-fixes/agent-09-e2e-build-fixes.md +69 -0
package/docs/bug-fixes/agent-10-e2e-fullstack-fixes.md +102 -0
package/docs/bug-fixes/agent-11-e2e-session-fixes.md +70 -0
package/docs/bug-fixes/agent-12-scenario-fixes.md +120 -0
package/docs/bug-fixes/agent-13-enterprise-fixes.md +143 -0
package/docs/bug-fixes/agent-14-uat-newuser-fixes.md +88 -0
package/docs/bug-fixes/agent-15-uat-poweruser-fixes.md +132 -0
package/docs/bug-fixes/agent-19-code-review.md +316 -0
package/docs/bug-fixes/agent-20-architecture-review.md +331 -0
package/docs/competitive/bolt-new-analysis.md +579 -0
package/docs/competitive/emergence-others-analysis.md +605 -0
package/docs/competitive/replit-lovable-analysis.md +622 -0
package/docs/test-scenarios/edge-cases.md +813 -0
package/docs/test-scenarios/enterprise-scenarios.md +732 -0
package/mcp/__init__.py +1 -1
package/mcp/server.py +49 -5
package/memory/consolidation.py +33 -0
package/memory/embeddings.py +10 -1
package/memory/engine.py +83 -38
package/memory/retrieval.py +36 -0
package/memory/storage.py +56 -4
package/memory/token_economics.py +14 -2
package/memory/vector_index.py +36 -7
package/package.json +1 -1
package/providers/gemini.sh +89 -2
package/templates/README.md +1 -1
package/templates/cli-tool.md +30 -0
package/templates/dashboard.md +4 -0
package/templates/data-pipeline.md +4 -0
package/templates/discord-bot.md +47 -0
package/templates/game.md +4 -0
package/templates/microservice.md +4 -0
package/templates/npm-library.md +4 -0
package/templates/rest-api-auth.md +50 -20
package/templates/rest-api.md +15 -0
package/templates/saas-starter.md +1 -1
package/templates/slack-bot.md +36 -0
package/templates/static-landing-page.md +9 -1
package/templates/web-scraper.md +4 -0
package/web-app/dist/assets/Badge-CeBkFjo6.js +1 -0
package/web-app/dist/assets/Button-yuhqo8Fq.js +1 -0
package/web-app/dist/assets/{Card-B1bV4syB.js → Card-BG17vsX0.js} +1 -1
package/web-app/dist/assets/{HomePage-CZTV6Nea.js → HomePage-BMSQ7Apj.js} +3 -3
package/web-app/dist/assets/{LoginPage-D4UdURJc.js → LoginPage-aH_6iolg.js} +1 -1
package/web-app/dist/assets/{NotFoundPage-CCLSeL6j.js → NotFoundPage-Di8cNtB1.js} +1 -1
package/web-app/dist/assets/ProjectPage-BtRssmw9.js +285 -0
package/web-app/dist/assets/ProjectsPage-B-FTFagc.js +6 -0
package/web-app/dist/assets/{SettingsPage-Xuv8EfAg.js → SettingsPage-DIJPBla4.js} +1 -1
package/web-app/dist/assets/TeamsPage--19fNX7w.js +36 -0
package/web-app/dist/assets/TemplatesPage-ChUQNOOv.js +11 -0
package/web-app/dist/assets/TerminalOutput-Dwrzecyl.js +31 -0
package/web-app/dist/assets/activity-BNRWeu9N.js +6 -0
package/web-app/dist/assets/{arrow-left-CaGtolHc.js → arrow-left-Ce6g1_YE.js} +1 -1
package/web-app/dist/assets/circle-alert-LIndawHL.js +11 -0
package/web-app/dist/assets/clock-Bpj4VPlP.js +6 -0
package/web-app/dist/assets/{external-link-CazyUyav.js → external-link-BhhdF0iQ.js} +1 -1
package/web-app/dist/assets/folder-open-CM2LgfxI.js +11 -0
package/web-app/dist/assets/index-8-KpWWq7.css +1 -0
package/web-app/dist/assets/index-kPDW4e_b.js +236 -0
package/web-app/dist/assets/lock-sAk3Xe54.js +16 -0
package/web-app/dist/assets/search-CR-2i9by.js +6 -0
package/web-app/dist/assets/server-DuFh4ymA.js +26 -0
package/web-app/dist/assets/trash-2-BmkkT8V_.js +11 -0
package/web-app/dist/index.html +2 -2
package/web-app/server.py +1321 -53
package/web-app/dist/assets/Badge-CBUx2PjL.js +0 -6
package/web-app/dist/assets/Button-DsRiznlh.js +0 -21
package/web-app/dist/assets/ProjectPage-D0w_X9tG.js +0 -237
package/web-app/dist/assets/ProjectsPage-ByYxDlKC.js +0 -16
package/web-app/dist/assets/TemplatesPage-BKWN07mc.js +0 -1
package/web-app/dist/assets/TerminalOutput-Dj98V8Z-.js +0 -51
package/web-app/dist/assets/clock-C_CDmobx.js +0 -11
package/web-app/dist/assets/index-D452pFGl.css +0 -1
package/web-app/dist/assets/index-Df4_kgLY.js +0 -196

package/mcp/__init__.py CHANGED Viewed

@@ -57,4 +57,4 @@ try:
 except ImportError:
     __all__ = ['mcp']
-__version__ = '6.71.1'
+__version__ = '6.72.0'

package/mcp/server.py CHANGED Viewed

@@ -74,12 +74,25 @@ def _get_learning_collector():
 def _get_mcp_state_manager():
-    """Get or create the StateManager instance for MCP server."""
+    """Get or create the StateManager instance for MCP server.
+    BUG-PU-002: Recreates the StateManager if the underlying .loki directory
+    has disappeared (e.g., project changed) to prevent stale file handle errors.
+    """
     global _state_manager
     if not STATE_MANAGER_AVAILABLE:
         return None
+    loki_dir = os.path.join(os.getcwd(), '.loki')
+    if _state_manager is not None:
+        # Verify the state manager's directory still matches cwd
+        existing_dir = getattr(_state_manager, 'loki_dir', None) or \
+                       getattr(_state_manager, '_loki_dir', None)
+        if existing_dir and os.path.realpath(existing_dir) != os.path.realpath(loki_dir):
+            # Project directory changed, recreate
+            if hasattr(_state_manager, 'close'):
+                _state_manager.close()
+            _state_manager = None
     if _state_manager is None:
-        loki_dir = os.path.join(os.getcwd(), '.loki')
         _state_manager = get_state_manager(
             loki_dir=loki_dir,
             enable_watch=False,  # MCP server doesn't need file watching
@@ -1312,22 +1325,39 @@ CHROMA_COLLECTION = os.environ.get("LOKI_CHROMA_COLLECTION", "loki-codebase")
 def _get_chroma_collection():
-    """Get or create ChromaDB collection (lazy connection)."""
+    """Get or create ChromaDB collection (lazy connection).
+    BUG-PU-002: Improved reconnection with timeout to prevent hanging
+    when ChromaDB container is stopped or unreachable after idle.
+    """
     global _chroma_client, _chroma_collection
     if _chroma_collection is not None:
         try:
             _chroma_client.heartbeat()
             return _chroma_collection
         except Exception:
+            logger.info("ChromaDB heartbeat failed, reconnecting...")
             _chroma_client = None
             _chroma_collection = None
     try:
         import chromadb
-        _chroma_client = chromadb.HttpClient(host=CHROMA_HOST, port=CHROMA_PORT)
+        from chromadb.config import Settings
+        _chroma_client = chromadb.HttpClient(
+            host=CHROMA_HOST,
+            port=CHROMA_PORT,
+            settings=Settings(
+                chroma_client_auth_provider=None,
+                anonymized_telemetry=False,
+            ),
+        )
+        # Verify connectivity before returning
+        _chroma_client.heartbeat()
         _chroma_collection = _chroma_client.get_collection(name=CHROMA_COLLECTION)
         return _chroma_collection
     except Exception as e:
         logger.warning(f"ChromaDB not available: {e}")
+        _chroma_client = None
+        _chroma_collection = None
         return None
@@ -1512,12 +1542,26 @@ async def mem_search(
         context = {"goal": query, "task_type": "exploration"}
         results = retriever.retrieve_task_aware(context, top_k=limit)
+        # BUG-MCP-006: Filter results by collection parameter when not "all"
+        # The retrieve_task_aware method returns all collections, but the user
+        # may have requested only a specific collection type
+        collection_type_map = {
+            "episodes": "episode",
+            "patterns": "pattern",
+            "skills": "skill",
+        }
+        filter_type = collection_type_map.get(collection)
         # Compact results for token efficiency
         compact = []
         for r in results:
+            result_type = r.get("_type", r.get("type", "unknown"))
+            # Apply collection filter
+            if filter_type and result_type != filter_type:
+                continue
             entry = {
                 "id": r.get("id", ""),
-                "type": r.get("_type", r.get("type", "unknown")),
+                "type": result_type,
                 "summary": (
                     r.get("goal", "") or
                     r.get("pattern", "") or

package/memory/consolidation.py CHANGED Viewed

@@ -11,9 +11,11 @@ from __future__ import annotations
 import uuid
 import time
+import fcntl
 from collections import defaultdict
 from dataclasses import dataclass, field
 from datetime import datetime, timezone, timedelta
+from pathlib import Path
 from typing import Optional, List, Dict, Any, Tuple
 try:
@@ -44,6 +46,7 @@ class ConsolidationResult:
         links_created: Number of Zettelkasten links created
         episodes_processed: Number of episodes that were processed
         duration_seconds: How long the consolidation took
+        vector_index_stale: Whether vector indices need rebuilding
     """
     patterns_created: int = 0
     patterns_merged: int = 0
@@ -51,6 +54,7 @@ class ConsolidationResult:
     links_created: int = 0
     episodes_processed: int = 0
     duration_seconds: float = 0.0
+    vector_index_stale: bool = False
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary for JSON serialization."""
@@ -61,6 +65,7 @@ class ConsolidationResult:
             "links_created": self.links_created,
             "episodes_processed": self.episodes_processed,
             "duration_seconds": self.duration_seconds,
+            "vector_index_stale": self.vector_index_stale,
         }
@@ -131,12 +136,34 @@ class ConsolidationPipeline:
         """
         Run the full consolidation pipeline.
+        Uses a file lock to prevent concurrent consolidation runs from
+        corrupting data (BUG-MEM-003 fix). If another consolidation is
+        already in progress, this call blocks until it completes.
         Args:
             since_hours: Only process episodes from the last N hours
         Returns:
             ConsolidationResult with statistics about the consolidation run
         """
+        lock_path = Path(self.base_path) / ".consolidation.lock"
+        lock_path.parent.mkdir(parents=True, exist_ok=True)
+        lock_file = None
+        try:
+            lock_file = open(lock_path, "w")
+            fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
+            return self._consolidate_locked(since_hours)
+        finally:
+            if lock_file is not None:
+                fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
+                lock_file.close()
+                try:
+                    lock_path.unlink()
+                except OSError:
+                    pass
+    def _consolidate_locked(self, since_hours: int) -> ConsolidationResult:
+        """Run the consolidation pipeline under an exclusive lock."""
         start_time = time.time()
         result = ConsolidationResult()
@@ -238,6 +265,12 @@ class ConsolidationPipeline:
                 self.storage.update_pattern(pattern)
                 result.links_created += len(links)
+        # Flag vector indices as stale when patterns changed (BUG-MEM-007).
+        # Callers should rebuild vector indices when this flag is True to
+        # ensure semantic search returns up-to-date results.
+        if result.patterns_created > 0 or result.patterns_merged > 0 or result.anti_patterns_created > 0:
+            result.vector_index_stale = True
         result.duration_seconds = time.time() - start_time
         return result

package/memory/embeddings.py CHANGED Viewed

@@ -1001,10 +1001,19 @@ class EmbeddingEngine:
             self._metrics["provider_calls"][provider_name] += 1
         except Exception as e:
-            logger.warning(f"Primary provider failed: {e}, trying fallback")
+            logger.warning("Primary provider failed: %s, trying fallback", e)
+            old_dimension = self.dimension
             self._use_fallback()
             embedding = self._primary_provider.embed(text)
             embedding = self._normalize(embedding)
+            # If dimension changed after fallback, log a warning so callers
+            # know existing vector indices may be incompatible (BUG-MEM-006).
+            if self.dimension != old_dimension:
+                logger.warning(
+                    "Embedding dimension changed from %d to %d after fallback. "
+                    "Existing vector indices may need to be rebuilt.",
+                    old_dimension, self.dimension
+                )
         # Ensure proper shape and type
         embedding = np.asarray(embedding, dtype=np.float32)

package/memory/engine.py CHANGED Viewed

@@ -75,6 +75,10 @@ class MemoryEngine:
     - Procedural memory: Learned action sequences (skills)
     """
+    # Supported schema versions (BUG-MEM-004 fix)
+    SUPPORTED_SCHEMA_VERSIONS = {"1.0", "1.1.0"}
+    CURRENT_SCHEMA_VERSION = "1.1.0"
     def __init__(
         self,
         storage: Optional[MemoryStorage] = None,
@@ -99,10 +103,36 @@ class MemoryEngine:
     # Lifecycle Operations
     # -------------------------------------------------------------------------
+    def _validate_schema_version(self, data: Dict[str, Any], source: str) -> None:
+        """
+        Validate that a memory data structure has a supported schema version.
+        Logs a warning for unknown versions and upgrades old versions to current.
+        This prevents silent data corruption from loading incompatible formats
+        (BUG-MEM-004 fix).
+        Args:
+            data: Memory data dictionary (index.json, timeline.json, patterns.json, etc.)
+            source: Description of the data source (for logging)
+        """
+        version = data.get("version")
+        if version is None:
+            # Legacy data without version -- assign current version
+            data["version"] = self.CURRENT_SCHEMA_VERSION
+            logger.info("Assigned schema version %s to %s (no version found)",
+                        self.CURRENT_SCHEMA_VERSION, source)
+        elif version not in self.SUPPORTED_SCHEMA_VERSIONS:
+            logger.warning(
+                "Unsupported schema version '%s' in %s. "
+                "Supported versions: %s. Data may not load correctly.",
+                version, source, ", ".join(sorted(self.SUPPORTED_SCHEMA_VERSIONS))
+            )
     def initialize(self) -> None:
         """
         Initialize the memory system.
         Ensures all required directories and files exist.
+        Validates schema versions on existing data (BUG-MEM-004).
         """
         # Create directory structure
         directories = [
@@ -116,25 +146,29 @@ class MemoryEngine:
         for directory in directories:
             self.storage.ensure_directory(directory)
-        # Initialize index if not exists
-        if not self.storage.read_json("index.json"):
+        # Initialize index if not exists, validate schema version if it does
+        existing_index = self.storage.read_json("index.json")
+        if not existing_index:
             self.storage.write_json(
                 "index.json",
                 {
-                    "version": "1.0",
+                    "version": self.CURRENT_SCHEMA_VERSION,
                     "last_updated": datetime.now(timezone.utc).isoformat(),
                     "topics": [],
                     "total_memories": 0,
                     "total_tokens_available": 0,
                 },
             )
+        else:
+            self._validate_schema_version(existing_index, "index.json")
-        # Initialize timeline if not exists
-        if not self.storage.read_json("timeline.json"):
+        # Initialize timeline if not exists, validate schema version if it does
+        existing_timeline = self.storage.read_json("timeline.json")
+        if not existing_timeline:
             self.storage.write_json(
                 "timeline.json",
                 {
-                    "version": "1.0",
+                    "version": self.CURRENT_SCHEMA_VERSION,
                     "last_updated": datetime.now(timezone.utc).isoformat(),
                     "recent_actions": [],
                     "key_decisions": [],
@@ -145,6 +179,8 @@ class MemoryEngine:
                     },
                 },
             )
+        else:
+            self._validate_schema_version(existing_timeline, "timeline.json")
         # Initialize semantic patterns if not exists
         if not self.storage.read_json("semantic/patterns.json"):
@@ -282,24 +318,33 @@ class MemoryEngine:
         """
         Retrieve an episode by ID.
+        Supports multiple ID formats:
+        - ep-YYYY-MM-DD-XXX (standard from EpisodeTrace.create)
+        - {prefix}-YYYY-MM-DD-XXX (variable-length prefix)
+        - Any other format (falls back to directory scan)
         Args:
             episode_id: Episode identifier
         Returns:
             EpisodeTrace instance or None if not found
         """
-        # Parse date from episode ID (format: ep-YYYY-MM-DD-XXX)
-        parts = episode_id.split("-")
-        if len(parts) >= 5 and len(parts[1]) == 4 and len(parts[2]) == 2 and len(parts[3]) == 2:
-            date_str = f"{parts[1]}-{parts[2]}-{parts[3]}"
-        else:
-            # Non-standard ID format; search all directories
-            return self._search_episode(episode_id)
+        import re
+        # Try to extract YYYY-MM-DD from anywhere in the episode ID.
+        # This handles variable-length prefixes (ep-, episode-, etc.)
+        # and avoids the fragile fixed-offset parsing that produced
+        # garbage paths for non-standard prefixes (BUG-MEM-001).
+        date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', episode_id)
+        if date_match:
+            date_str = date_match.group(0)
+            data = self.storage.read_json(f"episodic/{date_str}/task-{episode_id}.json")
+            if data:
+                return self._dict_to_episode(data)
-        data = self.storage.read_json(f"episodic/{date_str}/task-{episode_id}.json")
-        if data:
-            return self._dict_to_episode(data)
-        return None
+        # Non-standard ID format or file not found at parsed path;
+        # search all directories as fallback
+        return self._search_episode(episode_id)
     def get_recent_episodes(self, limit: int = 10) -> List[EpisodeTrace]:
         """
@@ -416,18 +461,26 @@ class MemoryEngine:
         """
         Increment usage count for a pattern.
+        Uses the storage layer's pattern update which holds an exclusive lock
+        during the read-modify-write cycle, preventing TOCTOU race conditions
+        when multiple agents update patterns concurrently.
         Args:
             pattern_id: Pattern identifier
         """
-        patterns_data = self.storage.read_json("semantic/patterns.json") or {"patterns": []}
+        # Load pattern via storage (which acquires read lock)
+        pattern_data = self.storage.load_pattern(pattern_id)
+        if pattern_data is None:
+            return
-        for pattern in patterns_data["patterns"]:
-            if pattern.get("id") == pattern_id:
-                pattern["usage_count"] = pattern.get("usage_count", 0) + 1
-                pattern["last_used"] = datetime.now(timezone.utc).isoformat()
-                break
+        # Update fields
+        pattern_data["usage_count"] = pattern_data.get("usage_count", 0) + 1
+        pattern_data["last_used"] = datetime.now(timezone.utc).isoformat()
-        self.storage.write_json("semantic/patterns.json", patterns_data)
+        # Write back via save_pattern which holds an exclusive lock during
+        # the full read-modify-write (upsert) cycle
+        pattern_obj = self._dict_to_pattern(pattern_data)
+        self.storage.save_pattern(pattern_obj)
     # -------------------------------------------------------------------------
     # Skill Operations
@@ -758,15 +811,12 @@ class MemoryEngine:
         return "".join(random.choices(string.ascii_lowercase + string.digits, k=6))
     def _update_timeline_with_episode(self, episode: Dict[str, Any]) -> None:
-        """Update timeline with episode summary."""
-        timeline = self.storage.read_json("timeline.json") or {
-            "version": "1.0",
-            "recent_actions": [],
-            "key_decisions": [],
-            "active_context": {},
-        }
+        """Update timeline with episode summary.
-        # Create action summary
+        Delegates to the storage layer's update_timeline method which holds
+        an exclusive lock during the read-modify-write cycle, preventing
+        concurrent timeline corruption.
+        """
         context = episode.get("context", {})
         action_entry = {
             "timestamp": episode.get("timestamp", datetime.now(timezone.utc).isoformat()),
@@ -775,12 +825,7 @@ class MemoryEngine:
             "topic_id": context.get("phase", "general"),
         }
-        # Add to recent actions (keep last 50)
-        timeline["recent_actions"].insert(0, action_entry)
-        timeline["recent_actions"] = timeline["recent_actions"][:50]
-        timeline["last_updated"] = datetime.now(timezone.utc).isoformat()
-        self.storage.write_json("timeline.json", timeline)
+        self.storage.update_timeline(action_entry)
     def _update_index_with_pattern(self, pattern: Dict[str, Any]) -> None:
         """Update index with pattern topic."""

package/memory/retrieval.py CHANGED Viewed

@@ -285,6 +285,10 @@ class MemoryRetrieval:
         self.vector_indices = vector_indices or {}
         self.base_path = Path(base_path)
         self._namespace = namespace
+        # Track when indices were last built to detect staleness (BUG-MEM-002).
+        # When consolidation modifies patterns, indices become stale and should
+        # be rebuilt before the next similarity search.
+        self._indices_built_at: Optional[float] = None
     @property
     def namespace(self) -> Optional[str]:
@@ -692,6 +696,15 @@ class MemoryRetrieval:
     # Multi-Modal Retrieval
     # -------------------------------------------------------------------------
+    def mark_indices_stale(self) -> None:
+        """
+        Mark vector indices as stale so they are rebuilt before next search.
+        Should be called after consolidation modifies the semantic memory
+        to prevent returning stale results (BUG-MEM-002 fix).
+        """
+        self._indices_built_at = None
     def retrieve_by_similarity(
         self,
         query: str,
@@ -702,6 +715,8 @@ class MemoryRetrieval:
         Retrieve by semantic similarity using embeddings.
         Falls back to keyword search if embeddings are not available.
+        Checks for index staleness and falls back to keyword search
+        if indices may be stale (BUG-MEM-002 fix).
         Args:
             query: Search query text
@@ -717,6 +732,21 @@ class MemoryRetrieval:
         if collection not in self.vector_indices:
             return self.retrieve_by_keyword(query.split(), collection)[:top_k]
+        # Check if indices need rebuilding after consolidation (BUG-MEM-002).
+        # If patterns.json was modified more recently than we last built
+        # indices, fall back to keyword search for accuracy.
+        if collection == "semantic" and self._indices_built_at is not None:
+            patterns_path = self.base_path / "semantic" / "patterns.json"
+            if patterns_path.exists():
+                import os
+                patterns_mtime = os.path.getmtime(patterns_path)
+                if patterns_mtime > self._indices_built_at:
+                    logger.info(
+                        "Semantic index is stale (patterns modified after index build). "
+                        "Falling back to keyword search for accuracy."
+                    )
+                    return self.retrieve_by_keyword(query.split(), collection)[:top_k]
         # Generate query embedding
         query_embedding = self.embedding_engine.embed(query)
@@ -1254,10 +1284,13 @@ class MemoryRetrieval:
         Reads all memories and creates vector embeddings for similarity search.
         Requires embedding_engine to be configured.
+        Records build timestamp so staleness can be detected (BUG-MEM-002).
         """
         if self.embedding_engine is None:
             return
+        import time as _time
         # Build episodic index
         if "episodic" in self.vector_indices:
             self._build_episodic_index()
@@ -1274,6 +1307,9 @@ class MemoryRetrieval:
         if "anti_patterns" in self.vector_indices:
             self._build_anti_patterns_index()
+        # Record build timestamp for staleness detection (BUG-MEM-002)
+        self._indices_built_at = _time.time()
     def update_index(
         self,
         collection: str,

package/memory/storage.py CHANGED Viewed

@@ -144,6 +144,8 @@ class MemoryStorage:
         # Clean up stale lock files from previous crashed processes
         self._cleanup_stale_locks()
+        # BUG-EP-015: Clean up orphaned temp files from kill -9 crashes
+        self._cleanup_stale_tmp_files()
     def _cleanup_stale_locks(self) -> None:
         """Remove stale .lock files older than 5 minutes (safe with concurrent processes).
@@ -167,10 +169,46 @@ class MemoryStorage:
         except OSError:
             pass
+    def _cleanup_stale_tmp_files(self) -> None:
+        """Remove orphaned .tmp files older than 5 minutes from crash recovery.
+        BUG-EP-015: When a process is killed with SIGKILL during an atomic
+        write, the temp file (.tmp_*.json) is left behind because the rename
+        never completes. These accumulate over time.
+        """
+        try:
+            import time
+            now_real = time.time()
+            stale_seconds = 300  # 5 minutes
+            for tmp_file in self.base_path.rglob(".tmp_*.json"):
+                try:
+                    file_mtime = tmp_file.stat().st_mtime
+                    age_seconds = now_real - file_mtime
+                    if age_seconds > stale_seconds:
+                        tmp_file.unlink()
+                except OSError:
+                    pass
+        except OSError:
+            pass
     def _ensure_index(self) -> None:
-        """Initialize index.json if it doesn't exist."""
+        """Initialize or repair index.json if it doesn't exist or is corrupted."""
         index_path = self.base_path / "index.json"
-        if not index_path.exists():
+        needs_init = not index_path.exists()
+        # BUG-EP-012: Check for corrupted index.json (exists but invalid JSON)
+        if not needs_init:
+            try:
+                text = index_path.read_text(encoding="utf-8", errors="replace")
+                json.loads(text)
+            except (json.JSONDecodeError, OSError):
+                import logging
+                logging.getLogger(__name__).warning(
+                    "Corrupted index.json detected, recreating from scratch"
+                )
+                needs_init = True
+        if needs_init:
             initial_index = {
                 "version": self.VERSION,
                 "last_updated": datetime.now(timezone.utc).isoformat(),
@@ -179,9 +217,23 @@ class MemoryStorage:
             self._atomic_write(index_path, initial_index)
     def _ensure_timeline(self) -> None:
-        """Initialize timeline.json if it doesn't exist."""
+        """Initialize or repair timeline.json if it doesn't exist or is corrupted."""
         timeline_path = self.base_path / "timeline.json"
-        if not timeline_path.exists():
+        needs_init = not timeline_path.exists()
+        # BUG-EP-012: Check for corrupted timeline.json (exists but invalid JSON)
+        if not needs_init:
+            try:
+                text = timeline_path.read_text(encoding="utf-8", errors="replace")
+                json.loads(text)
+            except (json.JSONDecodeError, OSError):
+                import logging
+                logging.getLogger(__name__).warning(
+                    "Corrupted timeline.json detected, recreating from scratch"
+                )
+                needs_init = True
+        if needs_init:
             initial_timeline = {
                 "version": self.VERSION,
                 "last_updated": datetime.now(timezone.utc).isoformat(),

package/memory/token_economics.py CHANGED Viewed

@@ -465,6 +465,12 @@ class TokenEconomics:
         self._full_load_baseline: Optional[int] = None
+    # Maximum token counter value to prevent unbounded growth in very long
+    # sessions.  Python ints don't overflow, but downstream JSON serializers
+    # and dashboard charts can choke on extremely large numbers.
+    # 10 billion tokens is well beyond any realistic single-session usage.
+    _MAX_TOKEN_COUNTER = 10_000_000_000
     def record_discovery(self, tokens: int) -> None:
         """
         Record tokens used for memory discovery/creation.
@@ -473,7 +479,10 @@ class TokenEconomics:
             tokens: Number of tokens used
         """
         if tokens > 0:
-            self.metrics["discovery_tokens"] += tokens
+            self.metrics["discovery_tokens"] = min(
+                self.metrics["discovery_tokens"] + tokens,
+                self._MAX_TOKEN_COUNTER,
+            )
     def record_read(self, tokens: int, layer: int) -> None:
         """
@@ -484,7 +493,10 @@ class TokenEconomics:
             layer: Memory layer accessed (1=topic, 2=summary, 3=full)
         """
         if tokens > 0:
-            self.metrics["read_tokens"] += tokens
+            self.metrics["read_tokens"] = min(
+                self.metrics["read_tokens"] + tokens,
+                self._MAX_TOKEN_COUNTER,
+            )
         if layer in (1, 2, 3):
             layer_key = f"layer{layer}_loads"

package/memory/vector_index.py CHANGED Viewed

@@ -277,11 +277,25 @@ class VectorIndex:
         else:
             embeddings_matrix = np.array([]).reshape(0, self.dimension)
-        np.savez(
-            f"{path}.npz",
-            embeddings=embeddings_matrix,
-            dimension=np.array([self.dimension])
-        )
+        # Write to temp file then atomically rename to prevent corruption
+        import tempfile
+        npz_path = f"{path}.npz"
+        npz_dir = os.path.dirname(npz_path) or "."
+        tmp_fd, tmp_path = tempfile.mkstemp(dir=npz_dir, suffix=".npz.tmp")
+        os.close(tmp_fd)
+        try:
+            np.savez(
+                tmp_path,
+                embeddings=embeddings_matrix,
+                dimension=np.array([self.dimension])
+            )
+            os.replace(tmp_path, npz_path)
+        except Exception:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+            raise
         # Save metadata as JSON sidecar
         sidecar_data = {
@@ -290,8 +304,23 @@ class VectorIndex:
             "dimension": self.dimension
         }
-        with open(f"{path}.json", "w", encoding="utf-8") as f:
-            json.dump(sidecar_data, f, indent=2)
+        import tempfile
+        json_path = f"{path}.json"
+        # Use atomic write to avoid corruption on crash (BUG-MEM-013 fix)
+        tmp_fd, tmp_path = tempfile.mkstemp(
+            dir=os.path.dirname(json_path) or ".",
+            suffix=".json.tmp"
+        )
+        try:
+            with os.fdopen(tmp_fd, "w", encoding="utf-8") as f:
+                json.dump(sidecar_data, f, indent=2, ensure_ascii=False)
+            os.replace(tmp_path, json_path)
+        except Exception:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+            raise
     def load(self, path: str) -> None:
         """