loki-mode 6.71.1 → 6.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +9 -1
  2. package/SKILL.md +2 -2
  3. package/VERSION +1 -1
  4. package/autonomy/hooks/migration-hooks.sh +26 -0
  5. package/autonomy/loki +429 -92
  6. package/autonomy/run.sh +219 -38
  7. package/dashboard/__init__.py +1 -1
  8. package/dashboard/server.py +101 -19
  9. package/docs/INSTALLATION.md +20 -11
  10. package/docs/bug-fixes/agent-01-cli-fixes.md +101 -0
  11. package/docs/bug-fixes/agent-02-purplelab-fixes.md +88 -0
  12. package/docs/bug-fixes/agent-03-dashboard-fixes.md +119 -0
  13. package/docs/bug-fixes/agent-04-memory-fixes.md +105 -0
  14. package/docs/bug-fixes/agent-05-provider-fixes.md +86 -0
  15. package/docs/bug-fixes/agent-06-integration-fixes.md +101 -0
  16. package/docs/bug-fixes/agent-07-dash-run-fixes.md +101 -0
  17. package/docs/bug-fixes/agent-08-docker-fixes.md +164 -0
  18. package/docs/bug-fixes/agent-09-e2e-build-fixes.md +69 -0
  19. package/docs/bug-fixes/agent-10-e2e-fullstack-fixes.md +102 -0
  20. package/docs/bug-fixes/agent-11-e2e-session-fixes.md +70 -0
  21. package/docs/bug-fixes/agent-12-scenario-fixes.md +120 -0
  22. package/docs/bug-fixes/agent-13-enterprise-fixes.md +143 -0
  23. package/docs/bug-fixes/agent-14-uat-newuser-fixes.md +88 -0
  24. package/docs/bug-fixes/agent-15-uat-poweruser-fixes.md +132 -0
  25. package/docs/bug-fixes/agent-19-code-review.md +316 -0
  26. package/docs/bug-fixes/agent-20-architecture-review.md +331 -0
  27. package/docs/competitive/bolt-new-analysis.md +579 -0
  28. package/docs/competitive/emergence-others-analysis.md +605 -0
  29. package/docs/competitive/replit-lovable-analysis.md +622 -0
  30. package/docs/test-scenarios/edge-cases.md +813 -0
  31. package/docs/test-scenarios/enterprise-scenarios.md +732 -0
  32. package/mcp/__init__.py +1 -1
  33. package/mcp/server.py +49 -5
  34. package/memory/consolidation.py +33 -0
  35. package/memory/embeddings.py +10 -1
  36. package/memory/engine.py +83 -38
  37. package/memory/retrieval.py +36 -0
  38. package/memory/storage.py +56 -4
  39. package/memory/token_economics.py +14 -2
  40. package/memory/vector_index.py +36 -7
  41. package/package.json +1 -1
  42. package/providers/gemini.sh +89 -2
  43. package/templates/README.md +1 -1
  44. package/templates/cli-tool.md +30 -0
  45. package/templates/dashboard.md +4 -0
  46. package/templates/data-pipeline.md +4 -0
  47. package/templates/discord-bot.md +47 -0
  48. package/templates/game.md +4 -0
  49. package/templates/microservice.md +4 -0
  50. package/templates/npm-library.md +4 -0
  51. package/templates/rest-api-auth.md +50 -20
  52. package/templates/rest-api.md +15 -0
  53. package/templates/saas-starter.md +1 -1
  54. package/templates/slack-bot.md +36 -0
  55. package/templates/static-landing-page.md +9 -1
  56. package/templates/web-scraper.md +4 -0
  57. package/web-app/dist/assets/Badge-CeBkFjo6.js +1 -0
  58. package/web-app/dist/assets/Button-yuhqo8Fq.js +1 -0
  59. package/web-app/dist/assets/{Card-B1bV4syB.js → Card-BG17vsX0.js} +1 -1
  60. package/web-app/dist/assets/{HomePage-CZTV6Nea.js → HomePage-BMSQ7Apj.js} +3 -3
  61. package/web-app/dist/assets/{LoginPage-D4UdURJc.js → LoginPage-aH_6iolg.js} +1 -1
  62. package/web-app/dist/assets/{NotFoundPage-CCLSeL6j.js → NotFoundPage-Di8cNtB1.js} +1 -1
  63. package/web-app/dist/assets/ProjectPage-BtRssmw9.js +285 -0
  64. package/web-app/dist/assets/ProjectsPage-B-FTFagc.js +6 -0
  65. package/web-app/dist/assets/{SettingsPage-Xuv8EfAg.js → SettingsPage-DIJPBla4.js} +1 -1
  66. package/web-app/dist/assets/TeamsPage--19fNX7w.js +36 -0
  67. package/web-app/dist/assets/TemplatesPage-ChUQNOOv.js +11 -0
  68. package/web-app/dist/assets/TerminalOutput-Dwrzecyl.js +31 -0
  69. package/web-app/dist/assets/activity-BNRWeu9N.js +6 -0
  70. package/web-app/dist/assets/{arrow-left-CaGtolHc.js → arrow-left-Ce6g1_YE.js} +1 -1
  71. package/web-app/dist/assets/circle-alert-LIndawHL.js +11 -0
  72. package/web-app/dist/assets/clock-Bpj4VPlP.js +6 -0
  73. package/web-app/dist/assets/{external-link-CazyUyav.js → external-link-BhhdF0iQ.js} +1 -1
  74. package/web-app/dist/assets/folder-open-CM2LgfxI.js +11 -0
  75. package/web-app/dist/assets/index-8-KpWWq7.css +1 -0
  76. package/web-app/dist/assets/index-kPDW4e_b.js +236 -0
  77. package/web-app/dist/assets/lock-sAk3Xe54.js +16 -0
  78. package/web-app/dist/assets/search-CR-2i9by.js +6 -0
  79. package/web-app/dist/assets/server-DuFh4ymA.js +26 -0
  80. package/web-app/dist/assets/trash-2-BmkkT8V_.js +11 -0
  81. package/web-app/dist/index.html +2 -2
  82. package/web-app/server.py +1321 -53
  83. package/web-app/dist/assets/Badge-CBUx2PjL.js +0 -6
  84. package/web-app/dist/assets/Button-DsRiznlh.js +0 -21
  85. package/web-app/dist/assets/ProjectPage-D0w_X9tG.js +0 -237
  86. package/web-app/dist/assets/ProjectsPage-ByYxDlKC.js +0 -16
  87. package/web-app/dist/assets/TemplatesPage-BKWN07mc.js +0 -1
  88. package/web-app/dist/assets/TerminalOutput-Dj98V8Z-.js +0 -51
  89. package/web-app/dist/assets/clock-C_CDmobx.js +0 -11
  90. package/web-app/dist/assets/index-D452pFGl.css +0 -1
  91. package/web-app/dist/assets/index-Df4_kgLY.js +0 -196
package/mcp/__init__.py CHANGED
@@ -57,4 +57,4 @@ try:
57
57
  except ImportError:
58
58
  __all__ = ['mcp']
59
59
 
60
- __version__ = '6.71.1'
60
+ __version__ = '6.72.0'
package/mcp/server.py CHANGED
@@ -74,12 +74,25 @@ def _get_learning_collector():
74
74
 
75
75
 
76
76
  def _get_mcp_state_manager():
77
- """Get or create the StateManager instance for MCP server."""
77
+ """Get or create the StateManager instance for MCP server.
78
+
79
+ BUG-PU-002: Recreates the StateManager if the underlying .loki directory
80
+ has disappeared (e.g., project changed) to prevent stale file handle errors.
81
+ """
78
82
  global _state_manager
79
83
  if not STATE_MANAGER_AVAILABLE:
80
84
  return None
85
+ loki_dir = os.path.join(os.getcwd(), '.loki')
86
+ if _state_manager is not None:
87
+ # Verify the state manager's directory still matches cwd
88
+ existing_dir = getattr(_state_manager, 'loki_dir', None) or \
89
+ getattr(_state_manager, '_loki_dir', None)
90
+ if existing_dir and os.path.realpath(existing_dir) != os.path.realpath(loki_dir):
91
+ # Project directory changed, recreate
92
+ if hasattr(_state_manager, 'close'):
93
+ _state_manager.close()
94
+ _state_manager = None
81
95
  if _state_manager is None:
82
- loki_dir = os.path.join(os.getcwd(), '.loki')
83
96
  _state_manager = get_state_manager(
84
97
  loki_dir=loki_dir,
85
98
  enable_watch=False, # MCP server doesn't need file watching
@@ -1312,22 +1325,39 @@ CHROMA_COLLECTION = os.environ.get("LOKI_CHROMA_COLLECTION", "loki-codebase")
1312
1325
 
1313
1326
 
1314
1327
  def _get_chroma_collection():
1315
- """Get or create ChromaDB collection (lazy connection)."""
1328
+ """Get or create ChromaDB collection (lazy connection).
1329
+
1330
+ BUG-PU-002: Improved reconnection with timeout to prevent hanging
1331
+ when ChromaDB container is stopped or unreachable after idle.
1332
+ """
1316
1333
  global _chroma_client, _chroma_collection
1317
1334
  if _chroma_collection is not None:
1318
1335
  try:
1319
1336
  _chroma_client.heartbeat()
1320
1337
  return _chroma_collection
1321
1338
  except Exception:
1339
+ logger.info("ChromaDB heartbeat failed, reconnecting...")
1322
1340
  _chroma_client = None
1323
1341
  _chroma_collection = None
1324
1342
  try:
1325
1343
  import chromadb
1326
- _chroma_client = chromadb.HttpClient(host=CHROMA_HOST, port=CHROMA_PORT)
1344
+ from chromadb.config import Settings
1345
+ _chroma_client = chromadb.HttpClient(
1346
+ host=CHROMA_HOST,
1347
+ port=CHROMA_PORT,
1348
+ settings=Settings(
1349
+ chroma_client_auth_provider=None,
1350
+ anonymized_telemetry=False,
1351
+ ),
1352
+ )
1353
+ # Verify connectivity before returning
1354
+ _chroma_client.heartbeat()
1327
1355
  _chroma_collection = _chroma_client.get_collection(name=CHROMA_COLLECTION)
1328
1356
  return _chroma_collection
1329
1357
  except Exception as e:
1330
1358
  logger.warning(f"ChromaDB not available: {e}")
1359
+ _chroma_client = None
1360
+ _chroma_collection = None
1331
1361
  return None
1332
1362
 
1333
1363
 
@@ -1512,12 +1542,26 @@ async def mem_search(
1512
1542
  context = {"goal": query, "task_type": "exploration"}
1513
1543
  results = retriever.retrieve_task_aware(context, top_k=limit)
1514
1544
 
1545
+ # BUG-MCP-006: Filter results by collection parameter when not "all"
1546
+ # The retrieve_task_aware method returns all collections, but the user
1547
+ # may have requested only a specific collection type
1548
+ collection_type_map = {
1549
+ "episodes": "episode",
1550
+ "patterns": "pattern",
1551
+ "skills": "skill",
1552
+ }
1553
+ filter_type = collection_type_map.get(collection)
1554
+
1515
1555
  # Compact results for token efficiency
1516
1556
  compact = []
1517
1557
  for r in results:
1558
+ result_type = r.get("_type", r.get("type", "unknown"))
1559
+ # Apply collection filter
1560
+ if filter_type and result_type != filter_type:
1561
+ continue
1518
1562
  entry = {
1519
1563
  "id": r.get("id", ""),
1520
- "type": r.get("_type", r.get("type", "unknown")),
1564
+ "type": result_type,
1521
1565
  "summary": (
1522
1566
  r.get("goal", "") or
1523
1567
  r.get("pattern", "") or
@@ -11,9 +11,11 @@ from __future__ import annotations
11
11
 
12
12
  import uuid
13
13
  import time
14
+ import fcntl
14
15
  from collections import defaultdict
15
16
  from dataclasses import dataclass, field
16
17
  from datetime import datetime, timezone, timedelta
18
+ from pathlib import Path
17
19
  from typing import Optional, List, Dict, Any, Tuple
18
20
 
19
21
  try:
@@ -44,6 +46,7 @@ class ConsolidationResult:
44
46
  links_created: Number of Zettelkasten links created
45
47
  episodes_processed: Number of episodes that were processed
46
48
  duration_seconds: How long the consolidation took
49
+ vector_index_stale: Whether vector indices need rebuilding
47
50
  """
48
51
  patterns_created: int = 0
49
52
  patterns_merged: int = 0
@@ -51,6 +54,7 @@ class ConsolidationResult:
51
54
  links_created: int = 0
52
55
  episodes_processed: int = 0
53
56
  duration_seconds: float = 0.0
57
+ vector_index_stale: bool = False
54
58
 
55
59
  def to_dict(self) -> Dict[str, Any]:
56
60
  """Convert to dictionary for JSON serialization."""
@@ -61,6 +65,7 @@ class ConsolidationResult:
61
65
  "links_created": self.links_created,
62
66
  "episodes_processed": self.episodes_processed,
63
67
  "duration_seconds": self.duration_seconds,
68
+ "vector_index_stale": self.vector_index_stale,
64
69
  }
65
70
 
66
71
 
@@ -131,12 +136,34 @@ class ConsolidationPipeline:
131
136
  """
132
137
  Run the full consolidation pipeline.
133
138
 
139
+ Uses a file lock to prevent concurrent consolidation runs from
140
+ corrupting data (BUG-MEM-003 fix). If another consolidation is
141
+ already in progress, this call blocks until it completes.
142
+
134
143
  Args:
135
144
  since_hours: Only process episodes from the last N hours
136
145
 
137
146
  Returns:
138
147
  ConsolidationResult with statistics about the consolidation run
139
148
  """
149
+ lock_path = Path(self.base_path) / ".consolidation.lock"
150
+ lock_path.parent.mkdir(parents=True, exist_ok=True)
151
+ lock_file = None
152
+ try:
153
+ lock_file = open(lock_path, "w")
154
+ fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
155
+ return self._consolidate_locked(since_hours)
156
+ finally:
157
+ if lock_file is not None:
158
+ fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
159
+ lock_file.close()
160
+ try:
161
+ lock_path.unlink()
162
+ except OSError:
163
+ pass
164
+
165
+ def _consolidate_locked(self, since_hours: int) -> ConsolidationResult:
166
+ """Run the consolidation pipeline under an exclusive lock."""
140
167
  start_time = time.time()
141
168
  result = ConsolidationResult()
142
169
 
@@ -238,6 +265,12 @@ class ConsolidationPipeline:
238
265
  self.storage.update_pattern(pattern)
239
266
  result.links_created += len(links)
240
267
 
268
+ # Flag vector indices as stale when patterns changed (BUG-MEM-007).
269
+ # Callers should rebuild vector indices when this flag is True to
270
+ # ensure semantic search returns up-to-date results.
271
+ if result.patterns_created > 0 or result.patterns_merged > 0 or result.anti_patterns_created > 0:
272
+ result.vector_index_stale = True
273
+
241
274
  result.duration_seconds = time.time() - start_time
242
275
  return result
243
276
 
@@ -1001,10 +1001,19 @@ class EmbeddingEngine:
1001
1001
  self._metrics["provider_calls"][provider_name] += 1
1002
1002
 
1003
1003
  except Exception as e:
1004
- logger.warning(f"Primary provider failed: {e}, trying fallback")
1004
+ logger.warning("Primary provider failed: %s, trying fallback", e)
1005
+ old_dimension = self.dimension
1005
1006
  self._use_fallback()
1006
1007
  embedding = self._primary_provider.embed(text)
1007
1008
  embedding = self._normalize(embedding)
1009
+ # If dimension changed after fallback, log a warning so callers
1010
+ # know existing vector indices may be incompatible (BUG-MEM-006).
1011
+ if self.dimension != old_dimension:
1012
+ logger.warning(
1013
+ "Embedding dimension changed from %d to %d after fallback. "
1014
+ "Existing vector indices may need to be rebuilt.",
1015
+ old_dimension, self.dimension
1016
+ )
1008
1017
 
1009
1018
  # Ensure proper shape and type
1010
1019
  embedding = np.asarray(embedding, dtype=np.float32)
package/memory/engine.py CHANGED
@@ -75,6 +75,10 @@ class MemoryEngine:
75
75
  - Procedural memory: Learned action sequences (skills)
76
76
  """
77
77
 
78
+ # Supported schema versions (BUG-MEM-004 fix)
79
+ SUPPORTED_SCHEMA_VERSIONS = {"1.0", "1.1.0"}
80
+ CURRENT_SCHEMA_VERSION = "1.1.0"
81
+
78
82
  def __init__(
79
83
  self,
80
84
  storage: Optional[MemoryStorage] = None,
@@ -99,10 +103,36 @@ class MemoryEngine:
99
103
  # Lifecycle Operations
100
104
  # -------------------------------------------------------------------------
101
105
 
106
+ def _validate_schema_version(self, data: Dict[str, Any], source: str) -> None:
107
+ """
108
+ Validate that a memory data structure has a supported schema version.
109
+
110
+ Logs a warning for unknown versions and upgrades old versions to current.
111
+ This prevents silent data corruption from loading incompatible formats
112
+ (BUG-MEM-004 fix).
113
+
114
+ Args:
115
+ data: Memory data dictionary (index.json, timeline.json, patterns.json, etc.)
116
+ source: Description of the data source (for logging)
117
+ """
118
+ version = data.get("version")
119
+ if version is None:
120
+ # Legacy data without version -- assign current version
121
+ data["version"] = self.CURRENT_SCHEMA_VERSION
122
+ logger.info("Assigned schema version %s to %s (no version found)",
123
+ self.CURRENT_SCHEMA_VERSION, source)
124
+ elif version not in self.SUPPORTED_SCHEMA_VERSIONS:
125
+ logger.warning(
126
+ "Unsupported schema version '%s' in %s. "
127
+ "Supported versions: %s. Data may not load correctly.",
128
+ version, source, ", ".join(sorted(self.SUPPORTED_SCHEMA_VERSIONS))
129
+ )
130
+
102
131
  def initialize(self) -> None:
103
132
  """
104
133
  Initialize the memory system.
105
134
  Ensures all required directories and files exist.
135
+ Validates schema versions on existing data (BUG-MEM-004).
106
136
  """
107
137
  # Create directory structure
108
138
  directories = [
@@ -116,25 +146,29 @@ class MemoryEngine:
116
146
  for directory in directories:
117
147
  self.storage.ensure_directory(directory)
118
148
 
119
- # Initialize index if not exists
120
- if not self.storage.read_json("index.json"):
149
+ # Initialize index if not exists, validate schema version if it does
150
+ existing_index = self.storage.read_json("index.json")
151
+ if not existing_index:
121
152
  self.storage.write_json(
122
153
  "index.json",
123
154
  {
124
- "version": "1.0",
155
+ "version": self.CURRENT_SCHEMA_VERSION,
125
156
  "last_updated": datetime.now(timezone.utc).isoformat(),
126
157
  "topics": [],
127
158
  "total_memories": 0,
128
159
  "total_tokens_available": 0,
129
160
  },
130
161
  )
162
+ else:
163
+ self._validate_schema_version(existing_index, "index.json")
131
164
 
132
- # Initialize timeline if not exists
133
- if not self.storage.read_json("timeline.json"):
165
+ # Initialize timeline if not exists, validate schema version if it does
166
+ existing_timeline = self.storage.read_json("timeline.json")
167
+ if not existing_timeline:
134
168
  self.storage.write_json(
135
169
  "timeline.json",
136
170
  {
137
- "version": "1.0",
171
+ "version": self.CURRENT_SCHEMA_VERSION,
138
172
  "last_updated": datetime.now(timezone.utc).isoformat(),
139
173
  "recent_actions": [],
140
174
  "key_decisions": [],
@@ -145,6 +179,8 @@ class MemoryEngine:
145
179
  },
146
180
  },
147
181
  )
182
+ else:
183
+ self._validate_schema_version(existing_timeline, "timeline.json")
148
184
 
149
185
  # Initialize semantic patterns if not exists
150
186
  if not self.storage.read_json("semantic/patterns.json"):
@@ -282,24 +318,33 @@ class MemoryEngine:
282
318
  """
283
319
  Retrieve an episode by ID.
284
320
 
321
+ Supports multiple ID formats:
322
+ - ep-YYYY-MM-DD-XXX (standard from EpisodeTrace.create)
323
+ - {prefix}-YYYY-MM-DD-XXX (variable-length prefix)
324
+ - Any other format (falls back to directory scan)
325
+
285
326
  Args:
286
327
  episode_id: Episode identifier
287
328
 
288
329
  Returns:
289
330
  EpisodeTrace instance or None if not found
290
331
  """
291
- # Parse date from episode ID (format: ep-YYYY-MM-DD-XXX)
292
- parts = episode_id.split("-")
293
- if len(parts) >= 5 and len(parts[1]) == 4 and len(parts[2]) == 2 and len(parts[3]) == 2:
294
- date_str = f"{parts[1]}-{parts[2]}-{parts[3]}"
295
- else:
296
- # Non-standard ID format; search all directories
297
- return self._search_episode(episode_id)
332
+ import re
333
+
334
+ # Try to extract YYYY-MM-DD from anywhere in the episode ID.
335
+ # This handles variable-length prefixes (ep-, episode-, etc.)
336
+ # and avoids the fragile fixed-offset parsing that produced
337
+ # garbage paths for non-standard prefixes (BUG-MEM-001).
338
+ date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', episode_id)
339
+ if date_match:
340
+ date_str = date_match.group(0)
341
+ data = self.storage.read_json(f"episodic/{date_str}/task-{episode_id}.json")
342
+ if data:
343
+ return self._dict_to_episode(data)
298
344
 
299
- data = self.storage.read_json(f"episodic/{date_str}/task-{episode_id}.json")
300
- if data:
301
- return self._dict_to_episode(data)
302
- return None
345
+ # Non-standard ID format or file not found at parsed path;
346
+ # search all directories as fallback
347
+ return self._search_episode(episode_id)
303
348
 
304
349
  def get_recent_episodes(self, limit: int = 10) -> List[EpisodeTrace]:
305
350
  """
@@ -416,18 +461,26 @@ class MemoryEngine:
416
461
  """
417
462
  Increment usage count for a pattern.
418
463
 
464
+ Uses the storage layer's pattern update which holds an exclusive lock
465
+ during the read-modify-write cycle, preventing TOCTOU race conditions
466
+ when multiple agents update patterns concurrently.
467
+
419
468
  Args:
420
469
  pattern_id: Pattern identifier
421
470
  """
422
- patterns_data = self.storage.read_json("semantic/patterns.json") or {"patterns": []}
471
+ # Load pattern via storage (which acquires read lock)
472
+ pattern_data = self.storage.load_pattern(pattern_id)
473
+ if pattern_data is None:
474
+ return
423
475
 
424
- for pattern in patterns_data["patterns"]:
425
- if pattern.get("id") == pattern_id:
426
- pattern["usage_count"] = pattern.get("usage_count", 0) + 1
427
- pattern["last_used"] = datetime.now(timezone.utc).isoformat()
428
- break
476
+ # Update fields
477
+ pattern_data["usage_count"] = pattern_data.get("usage_count", 0) + 1
478
+ pattern_data["last_used"] = datetime.now(timezone.utc).isoformat()
429
479
 
430
- self.storage.write_json("semantic/patterns.json", patterns_data)
480
+ # Write back via save_pattern which holds an exclusive lock during
481
+ # the full read-modify-write (upsert) cycle
482
+ pattern_obj = self._dict_to_pattern(pattern_data)
483
+ self.storage.save_pattern(pattern_obj)
431
484
 
432
485
  # -------------------------------------------------------------------------
433
486
  # Skill Operations
@@ -758,15 +811,12 @@ class MemoryEngine:
758
811
  return "".join(random.choices(string.ascii_lowercase + string.digits, k=6))
759
812
 
760
813
  def _update_timeline_with_episode(self, episode: Dict[str, Any]) -> None:
761
- """Update timeline with episode summary."""
762
- timeline = self.storage.read_json("timeline.json") or {
763
- "version": "1.0",
764
- "recent_actions": [],
765
- "key_decisions": [],
766
- "active_context": {},
767
- }
814
+ """Update timeline with episode summary.
768
815
 
769
- # Create action summary
816
+ Delegates to the storage layer's update_timeline method which holds
817
+ an exclusive lock during the read-modify-write cycle, preventing
818
+ concurrent timeline corruption.
819
+ """
770
820
  context = episode.get("context", {})
771
821
  action_entry = {
772
822
  "timestamp": episode.get("timestamp", datetime.now(timezone.utc).isoformat()),
@@ -775,12 +825,7 @@ class MemoryEngine:
775
825
  "topic_id": context.get("phase", "general"),
776
826
  }
777
827
 
778
- # Add to recent actions (keep last 50)
779
- timeline["recent_actions"].insert(0, action_entry)
780
- timeline["recent_actions"] = timeline["recent_actions"][:50]
781
- timeline["last_updated"] = datetime.now(timezone.utc).isoformat()
782
-
783
- self.storage.write_json("timeline.json", timeline)
828
+ self.storage.update_timeline(action_entry)
784
829
 
785
830
  def _update_index_with_pattern(self, pattern: Dict[str, Any]) -> None:
786
831
  """Update index with pattern topic."""
@@ -285,6 +285,10 @@ class MemoryRetrieval:
285
285
  self.vector_indices = vector_indices or {}
286
286
  self.base_path = Path(base_path)
287
287
  self._namespace = namespace
288
+ # Track when indices were last built to detect staleness (BUG-MEM-002).
289
+ # When consolidation modifies patterns, indices become stale and should
290
+ # be rebuilt before the next similarity search.
291
+ self._indices_built_at: Optional[float] = None
288
292
 
289
293
  @property
290
294
  def namespace(self) -> Optional[str]:
@@ -692,6 +696,15 @@ class MemoryRetrieval:
692
696
  # Multi-Modal Retrieval
693
697
  # -------------------------------------------------------------------------
694
698
 
699
+ def mark_indices_stale(self) -> None:
700
+ """
701
+ Mark vector indices as stale so they are rebuilt before next search.
702
+
703
+ Should be called after consolidation modifies the semantic memory
704
+ to prevent returning stale results (BUG-MEM-002 fix).
705
+ """
706
+ self._indices_built_at = None
707
+
695
708
  def retrieve_by_similarity(
696
709
  self,
697
710
  query: str,
@@ -702,6 +715,8 @@ class MemoryRetrieval:
702
715
  Retrieve by semantic similarity using embeddings.
703
716
 
704
717
  Falls back to keyword search if embeddings are not available.
718
+ Checks for index staleness and falls back to keyword search
719
+ if indices may be stale (BUG-MEM-002 fix).
705
720
 
706
721
  Args:
707
722
  query: Search query text
@@ -717,6 +732,21 @@ class MemoryRetrieval:
717
732
  if collection not in self.vector_indices:
718
733
  return self.retrieve_by_keyword(query.split(), collection)[:top_k]
719
734
 
735
+ # Check if indices need rebuilding after consolidation (BUG-MEM-002).
736
+ # If patterns.json was modified more recently than we last built
737
+ # indices, fall back to keyword search for accuracy.
738
+ if collection == "semantic" and self._indices_built_at is not None:
739
+ patterns_path = self.base_path / "semantic" / "patterns.json"
740
+ if patterns_path.exists():
741
+ import os
742
+ patterns_mtime = os.path.getmtime(patterns_path)
743
+ if patterns_mtime > self._indices_built_at:
744
+ logger.info(
745
+ "Semantic index is stale (patterns modified after index build). "
746
+ "Falling back to keyword search for accuracy."
747
+ )
748
+ return self.retrieve_by_keyword(query.split(), collection)[:top_k]
749
+
720
750
  # Generate query embedding
721
751
  query_embedding = self.embedding_engine.embed(query)
722
752
 
@@ -1254,10 +1284,13 @@ class MemoryRetrieval:
1254
1284
 
1255
1285
  Reads all memories and creates vector embeddings for similarity search.
1256
1286
  Requires embedding_engine to be configured.
1287
+ Records build timestamp so staleness can be detected (BUG-MEM-002).
1257
1288
  """
1258
1289
  if self.embedding_engine is None:
1259
1290
  return
1260
1291
 
1292
+ import time as _time
1293
+
1261
1294
  # Build episodic index
1262
1295
  if "episodic" in self.vector_indices:
1263
1296
  self._build_episodic_index()
@@ -1274,6 +1307,9 @@ class MemoryRetrieval:
1274
1307
  if "anti_patterns" in self.vector_indices:
1275
1308
  self._build_anti_patterns_index()
1276
1309
 
1310
+ # Record build timestamp for staleness detection (BUG-MEM-002)
1311
+ self._indices_built_at = _time.time()
1312
+
1277
1313
  def update_index(
1278
1314
  self,
1279
1315
  collection: str,
package/memory/storage.py CHANGED
@@ -144,6 +144,8 @@ class MemoryStorage:
144
144
 
145
145
  # Clean up stale lock files from previous crashed processes
146
146
  self._cleanup_stale_locks()
147
+ # BUG-EP-015: Clean up orphaned temp files from kill -9 crashes
148
+ self._cleanup_stale_tmp_files()
147
149
 
148
150
  def _cleanup_stale_locks(self) -> None:
149
151
  """Remove stale .lock files older than 5 minutes (safe with concurrent processes).
@@ -167,10 +169,46 @@ class MemoryStorage:
167
169
  except OSError:
168
170
  pass
169
171
 
172
+ def _cleanup_stale_tmp_files(self) -> None:
173
+ """Remove orphaned .tmp files older than 5 minutes from crash recovery.
174
+
175
+ BUG-EP-015: When a process is killed with SIGKILL during an atomic
176
+ write, the temp file (.tmp_*.json) is left behind because the rename
177
+ never completes. These accumulate over time.
178
+ """
179
+ try:
180
+ import time
181
+ now_real = time.time()
182
+ stale_seconds = 300 # 5 minutes
183
+ for tmp_file in self.base_path.rglob(".tmp_*.json"):
184
+ try:
185
+ file_mtime = tmp_file.stat().st_mtime
186
+ age_seconds = now_real - file_mtime
187
+ if age_seconds > stale_seconds:
188
+ tmp_file.unlink()
189
+ except OSError:
190
+ pass
191
+ except OSError:
192
+ pass
193
+
170
194
  def _ensure_index(self) -> None:
171
- """Initialize index.json if it doesn't exist."""
195
+ """Initialize or repair index.json if it doesn't exist or is corrupted."""
172
196
  index_path = self.base_path / "index.json"
173
- if not index_path.exists():
197
+ needs_init = not index_path.exists()
198
+
199
+ # BUG-EP-012: Check for corrupted index.json (exists but invalid JSON)
200
+ if not needs_init:
201
+ try:
202
+ text = index_path.read_text(encoding="utf-8", errors="replace")
203
+ json.loads(text)
204
+ except (json.JSONDecodeError, OSError):
205
+ import logging
206
+ logging.getLogger(__name__).warning(
207
+ "Corrupted index.json detected, recreating from scratch"
208
+ )
209
+ needs_init = True
210
+
211
+ if needs_init:
174
212
  initial_index = {
175
213
  "version": self.VERSION,
176
214
  "last_updated": datetime.now(timezone.utc).isoformat(),
@@ -179,9 +217,23 @@ class MemoryStorage:
179
217
  self._atomic_write(index_path, initial_index)
180
218
 
181
219
  def _ensure_timeline(self) -> None:
182
- """Initialize timeline.json if it doesn't exist."""
220
+ """Initialize or repair timeline.json if it doesn't exist or is corrupted."""
183
221
  timeline_path = self.base_path / "timeline.json"
184
- if not timeline_path.exists():
222
+ needs_init = not timeline_path.exists()
223
+
224
+ # BUG-EP-012: Check for corrupted timeline.json (exists but invalid JSON)
225
+ if not needs_init:
226
+ try:
227
+ text = timeline_path.read_text(encoding="utf-8", errors="replace")
228
+ json.loads(text)
229
+ except (json.JSONDecodeError, OSError):
230
+ import logging
231
+ logging.getLogger(__name__).warning(
232
+ "Corrupted timeline.json detected, recreating from scratch"
233
+ )
234
+ needs_init = True
235
+
236
+ if needs_init:
185
237
  initial_timeline = {
186
238
  "version": self.VERSION,
187
239
  "last_updated": datetime.now(timezone.utc).isoformat(),
@@ -465,6 +465,12 @@ class TokenEconomics:
465
465
 
466
466
  self._full_load_baseline: Optional[int] = None
467
467
 
468
+ # Maximum token counter value to prevent unbounded growth in very long
469
+ # sessions. Python ints don't overflow, but downstream JSON serializers
470
+ # and dashboard charts can choke on extremely large numbers.
471
+ # 10 billion tokens is well beyond any realistic single-session usage.
472
+ _MAX_TOKEN_COUNTER = 10_000_000_000
473
+
468
474
  def record_discovery(self, tokens: int) -> None:
469
475
  """
470
476
  Record tokens used for memory discovery/creation.
@@ -473,7 +479,10 @@ class TokenEconomics:
473
479
  tokens: Number of tokens used
474
480
  """
475
481
  if tokens > 0:
476
- self.metrics["discovery_tokens"] += tokens
482
+ self.metrics["discovery_tokens"] = min(
483
+ self.metrics["discovery_tokens"] + tokens,
484
+ self._MAX_TOKEN_COUNTER,
485
+ )
477
486
 
478
487
  def record_read(self, tokens: int, layer: int) -> None:
479
488
  """
@@ -484,7 +493,10 @@ class TokenEconomics:
484
493
  layer: Memory layer accessed (1=topic, 2=summary, 3=full)
485
494
  """
486
495
  if tokens > 0:
487
- self.metrics["read_tokens"] += tokens
496
+ self.metrics["read_tokens"] = min(
497
+ self.metrics["read_tokens"] + tokens,
498
+ self._MAX_TOKEN_COUNTER,
499
+ )
488
500
 
489
501
  if layer in (1, 2, 3):
490
502
  layer_key = f"layer{layer}_loads"
@@ -277,11 +277,25 @@ class VectorIndex:
277
277
  else:
278
278
  embeddings_matrix = np.array([]).reshape(0, self.dimension)
279
279
 
280
- np.savez(
281
- f"{path}.npz",
282
- embeddings=embeddings_matrix,
283
- dimension=np.array([self.dimension])
284
- )
280
+ # Write to temp file then atomically rename to prevent corruption
281
+ import tempfile
282
+ npz_path = f"{path}.npz"
283
+ npz_dir = os.path.dirname(npz_path) or "."
284
+ tmp_fd, tmp_path = tempfile.mkstemp(dir=npz_dir, suffix=".npz.tmp")
285
+ os.close(tmp_fd)
286
+ try:
287
+ np.savez(
288
+ tmp_path,
289
+ embeddings=embeddings_matrix,
290
+ dimension=np.array([self.dimension])
291
+ )
292
+ os.replace(tmp_path, npz_path)
293
+ except Exception:
294
+ try:
295
+ os.unlink(tmp_path)
296
+ except OSError:
297
+ pass
298
+ raise
285
299
 
286
300
  # Save metadata as JSON sidecar
287
301
  sidecar_data = {
@@ -290,8 +304,23 @@ class VectorIndex:
290
304
  "dimension": self.dimension
291
305
  }
292
306
 
293
- with open(f"{path}.json", "w", encoding="utf-8") as f:
294
- json.dump(sidecar_data, f, indent=2)
307
+ import tempfile
308
+ json_path = f"{path}.json"
309
+ # Use atomic write to avoid corruption on crash (BUG-MEM-013 fix)
310
+ tmp_fd, tmp_path = tempfile.mkstemp(
311
+ dir=os.path.dirname(json_path) or ".",
312
+ suffix=".json.tmp"
313
+ )
314
+ try:
315
+ with os.fdopen(tmp_fd, "w", encoding="utf-8") as f:
316
+ json.dump(sidecar_data, f, indent=2, ensure_ascii=False)
317
+ os.replace(tmp_path, json_path)
318
+ except Exception:
319
+ try:
320
+ os.unlink(tmp_path)
321
+ except OSError:
322
+ pass
323
+ raise
295
324
 
296
325
  def load(self, path: str) -> None:
297
326
  """