loki-mode 6.71.1 → 6.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -1
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/autonomy/hooks/migration-hooks.sh +26 -0
- package/autonomy/loki +429 -92
- package/autonomy/run.sh +219 -38
- package/dashboard/__init__.py +1 -1
- package/dashboard/server.py +101 -19
- package/docs/INSTALLATION.md +20 -11
- package/docs/bug-fixes/agent-01-cli-fixes.md +101 -0
- package/docs/bug-fixes/agent-02-purplelab-fixes.md +88 -0
- package/docs/bug-fixes/agent-03-dashboard-fixes.md +119 -0
- package/docs/bug-fixes/agent-04-memory-fixes.md +105 -0
- package/docs/bug-fixes/agent-05-provider-fixes.md +86 -0
- package/docs/bug-fixes/agent-06-integration-fixes.md +101 -0
- package/docs/bug-fixes/agent-07-dash-run-fixes.md +101 -0
- package/docs/bug-fixes/agent-08-docker-fixes.md +164 -0
- package/docs/bug-fixes/agent-09-e2e-build-fixes.md +69 -0
- package/docs/bug-fixes/agent-10-e2e-fullstack-fixes.md +102 -0
- package/docs/bug-fixes/agent-11-e2e-session-fixes.md +70 -0
- package/docs/bug-fixes/agent-12-scenario-fixes.md +120 -0
- package/docs/bug-fixes/agent-13-enterprise-fixes.md +143 -0
- package/docs/bug-fixes/agent-14-uat-newuser-fixes.md +88 -0
- package/docs/bug-fixes/agent-15-uat-poweruser-fixes.md +132 -0
- package/docs/bug-fixes/agent-19-code-review.md +316 -0
- package/docs/bug-fixes/agent-20-architecture-review.md +331 -0
- package/docs/competitive/bolt-new-analysis.md +579 -0
- package/docs/competitive/emergence-others-analysis.md +605 -0
- package/docs/competitive/replit-lovable-analysis.md +622 -0
- package/docs/test-scenarios/edge-cases.md +813 -0
- package/docs/test-scenarios/enterprise-scenarios.md +732 -0
- package/mcp/__init__.py +1 -1
- package/mcp/server.py +49 -5
- package/memory/consolidation.py +33 -0
- package/memory/embeddings.py +10 -1
- package/memory/engine.py +83 -38
- package/memory/retrieval.py +36 -0
- package/memory/storage.py +56 -4
- package/memory/token_economics.py +14 -2
- package/memory/vector_index.py +36 -7
- package/package.json +1 -1
- package/providers/gemini.sh +89 -2
- package/templates/README.md +1 -1
- package/templates/cli-tool.md +30 -0
- package/templates/dashboard.md +4 -0
- package/templates/data-pipeline.md +4 -0
- package/templates/discord-bot.md +47 -0
- package/templates/game.md +4 -0
- package/templates/microservice.md +4 -0
- package/templates/npm-library.md +4 -0
- package/templates/rest-api-auth.md +50 -20
- package/templates/rest-api.md +15 -0
- package/templates/saas-starter.md +1 -1
- package/templates/slack-bot.md +36 -0
- package/templates/static-landing-page.md +9 -1
- package/templates/web-scraper.md +4 -0
- package/web-app/dist/assets/Badge-CeBkFjo6.js +1 -0
- package/web-app/dist/assets/Button-yuhqo8Fq.js +1 -0
- package/web-app/dist/assets/{Card-B1bV4syB.js → Card-BG17vsX0.js} +1 -1
- package/web-app/dist/assets/{HomePage-CZTV6Nea.js → HomePage-BMSQ7Apj.js} +3 -3
- package/web-app/dist/assets/{LoginPage-D4UdURJc.js → LoginPage-aH_6iolg.js} +1 -1
- package/web-app/dist/assets/{NotFoundPage-CCLSeL6j.js → NotFoundPage-Di8cNtB1.js} +1 -1
- package/web-app/dist/assets/ProjectPage-BtRssmw9.js +285 -0
- package/web-app/dist/assets/ProjectsPage-B-FTFagc.js +6 -0
- package/web-app/dist/assets/{SettingsPage-Xuv8EfAg.js → SettingsPage-DIJPBla4.js} +1 -1
- package/web-app/dist/assets/TeamsPage--19fNX7w.js +36 -0
- package/web-app/dist/assets/TemplatesPage-ChUQNOOv.js +11 -0
- package/web-app/dist/assets/TerminalOutput-Dwrzecyl.js +31 -0
- package/web-app/dist/assets/activity-BNRWeu9N.js +6 -0
- package/web-app/dist/assets/{arrow-left-CaGtolHc.js → arrow-left-Ce6g1_YE.js} +1 -1
- package/web-app/dist/assets/circle-alert-LIndawHL.js +11 -0
- package/web-app/dist/assets/clock-Bpj4VPlP.js +6 -0
- package/web-app/dist/assets/{external-link-CazyUyav.js → external-link-BhhdF0iQ.js} +1 -1
- package/web-app/dist/assets/folder-open-CM2LgfxI.js +11 -0
- package/web-app/dist/assets/index-8-KpWWq7.css +1 -0
- package/web-app/dist/assets/index-kPDW4e_b.js +236 -0
- package/web-app/dist/assets/lock-sAk3Xe54.js +16 -0
- package/web-app/dist/assets/search-CR-2i9by.js +6 -0
- package/web-app/dist/assets/server-DuFh4ymA.js +26 -0
- package/web-app/dist/assets/trash-2-BmkkT8V_.js +11 -0
- package/web-app/dist/index.html +2 -2
- package/web-app/server.py +1321 -53
- package/web-app/dist/assets/Badge-CBUx2PjL.js +0 -6
- package/web-app/dist/assets/Button-DsRiznlh.js +0 -21
- package/web-app/dist/assets/ProjectPage-D0w_X9tG.js +0 -237
- package/web-app/dist/assets/ProjectsPage-ByYxDlKC.js +0 -16
- package/web-app/dist/assets/TemplatesPage-BKWN07mc.js +0 -1
- package/web-app/dist/assets/TerminalOutput-Dj98V8Z-.js +0 -51
- package/web-app/dist/assets/clock-C_CDmobx.js +0 -11
- package/web-app/dist/assets/index-D452pFGl.css +0 -1
- package/web-app/dist/assets/index-Df4_kgLY.js +0 -196
package/mcp/__init__.py
CHANGED
package/mcp/server.py
CHANGED
|
@@ -74,12 +74,25 @@ def _get_learning_collector():
|
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
def _get_mcp_state_manager():
|
|
77
|
-
"""Get or create the StateManager instance for MCP server.
|
|
77
|
+
"""Get or create the StateManager instance for MCP server.
|
|
78
|
+
|
|
79
|
+
BUG-PU-002: Recreates the StateManager if the underlying .loki directory
|
|
80
|
+
has disappeared (e.g., project changed) to prevent stale file handle errors.
|
|
81
|
+
"""
|
|
78
82
|
global _state_manager
|
|
79
83
|
if not STATE_MANAGER_AVAILABLE:
|
|
80
84
|
return None
|
|
85
|
+
loki_dir = os.path.join(os.getcwd(), '.loki')
|
|
86
|
+
if _state_manager is not None:
|
|
87
|
+
# Verify the state manager's directory still matches cwd
|
|
88
|
+
existing_dir = getattr(_state_manager, 'loki_dir', None) or \
|
|
89
|
+
getattr(_state_manager, '_loki_dir', None)
|
|
90
|
+
if existing_dir and os.path.realpath(existing_dir) != os.path.realpath(loki_dir):
|
|
91
|
+
# Project directory changed, recreate
|
|
92
|
+
if hasattr(_state_manager, 'close'):
|
|
93
|
+
_state_manager.close()
|
|
94
|
+
_state_manager = None
|
|
81
95
|
if _state_manager is None:
|
|
82
|
-
loki_dir = os.path.join(os.getcwd(), '.loki')
|
|
83
96
|
_state_manager = get_state_manager(
|
|
84
97
|
loki_dir=loki_dir,
|
|
85
98
|
enable_watch=False, # MCP server doesn't need file watching
|
|
@@ -1312,22 +1325,39 @@ CHROMA_COLLECTION = os.environ.get("LOKI_CHROMA_COLLECTION", "loki-codebase")
|
|
|
1312
1325
|
|
|
1313
1326
|
|
|
1314
1327
|
def _get_chroma_collection():
|
|
1315
|
-
"""Get or create ChromaDB collection (lazy connection).
|
|
1328
|
+
"""Get or create ChromaDB collection (lazy connection).
|
|
1329
|
+
|
|
1330
|
+
BUG-PU-002: Improved reconnection with timeout to prevent hanging
|
|
1331
|
+
when ChromaDB container is stopped or unreachable after idle.
|
|
1332
|
+
"""
|
|
1316
1333
|
global _chroma_client, _chroma_collection
|
|
1317
1334
|
if _chroma_collection is not None:
|
|
1318
1335
|
try:
|
|
1319
1336
|
_chroma_client.heartbeat()
|
|
1320
1337
|
return _chroma_collection
|
|
1321
1338
|
except Exception:
|
|
1339
|
+
logger.info("ChromaDB heartbeat failed, reconnecting...")
|
|
1322
1340
|
_chroma_client = None
|
|
1323
1341
|
_chroma_collection = None
|
|
1324
1342
|
try:
|
|
1325
1343
|
import chromadb
|
|
1326
|
-
|
|
1344
|
+
from chromadb.config import Settings
|
|
1345
|
+
_chroma_client = chromadb.HttpClient(
|
|
1346
|
+
host=CHROMA_HOST,
|
|
1347
|
+
port=CHROMA_PORT,
|
|
1348
|
+
settings=Settings(
|
|
1349
|
+
chroma_client_auth_provider=None,
|
|
1350
|
+
anonymized_telemetry=False,
|
|
1351
|
+
),
|
|
1352
|
+
)
|
|
1353
|
+
# Verify connectivity before returning
|
|
1354
|
+
_chroma_client.heartbeat()
|
|
1327
1355
|
_chroma_collection = _chroma_client.get_collection(name=CHROMA_COLLECTION)
|
|
1328
1356
|
return _chroma_collection
|
|
1329
1357
|
except Exception as e:
|
|
1330
1358
|
logger.warning(f"ChromaDB not available: {e}")
|
|
1359
|
+
_chroma_client = None
|
|
1360
|
+
_chroma_collection = None
|
|
1331
1361
|
return None
|
|
1332
1362
|
|
|
1333
1363
|
|
|
@@ -1512,12 +1542,26 @@ async def mem_search(
|
|
|
1512
1542
|
context = {"goal": query, "task_type": "exploration"}
|
|
1513
1543
|
results = retriever.retrieve_task_aware(context, top_k=limit)
|
|
1514
1544
|
|
|
1545
|
+
# BUG-MCP-006: Filter results by collection parameter when not "all"
|
|
1546
|
+
# The retrieve_task_aware method returns all collections, but the user
|
|
1547
|
+
# may have requested only a specific collection type
|
|
1548
|
+
collection_type_map = {
|
|
1549
|
+
"episodes": "episode",
|
|
1550
|
+
"patterns": "pattern",
|
|
1551
|
+
"skills": "skill",
|
|
1552
|
+
}
|
|
1553
|
+
filter_type = collection_type_map.get(collection)
|
|
1554
|
+
|
|
1515
1555
|
# Compact results for token efficiency
|
|
1516
1556
|
compact = []
|
|
1517
1557
|
for r in results:
|
|
1558
|
+
result_type = r.get("_type", r.get("type", "unknown"))
|
|
1559
|
+
# Apply collection filter
|
|
1560
|
+
if filter_type and result_type != filter_type:
|
|
1561
|
+
continue
|
|
1518
1562
|
entry = {
|
|
1519
1563
|
"id": r.get("id", ""),
|
|
1520
|
-
"type":
|
|
1564
|
+
"type": result_type,
|
|
1521
1565
|
"summary": (
|
|
1522
1566
|
r.get("goal", "") or
|
|
1523
1567
|
r.get("pattern", "") or
|
package/memory/consolidation.py
CHANGED
|
@@ -11,9 +11,11 @@ from __future__ import annotations
|
|
|
11
11
|
|
|
12
12
|
import uuid
|
|
13
13
|
import time
|
|
14
|
+
import fcntl
|
|
14
15
|
from collections import defaultdict
|
|
15
16
|
from dataclasses import dataclass, field
|
|
16
17
|
from datetime import datetime, timezone, timedelta
|
|
18
|
+
from pathlib import Path
|
|
17
19
|
from typing import Optional, List, Dict, Any, Tuple
|
|
18
20
|
|
|
19
21
|
try:
|
|
@@ -44,6 +46,7 @@ class ConsolidationResult:
|
|
|
44
46
|
links_created: Number of Zettelkasten links created
|
|
45
47
|
episodes_processed: Number of episodes that were processed
|
|
46
48
|
duration_seconds: How long the consolidation took
|
|
49
|
+
vector_index_stale: Whether vector indices need rebuilding
|
|
47
50
|
"""
|
|
48
51
|
patterns_created: int = 0
|
|
49
52
|
patterns_merged: int = 0
|
|
@@ -51,6 +54,7 @@ class ConsolidationResult:
|
|
|
51
54
|
links_created: int = 0
|
|
52
55
|
episodes_processed: int = 0
|
|
53
56
|
duration_seconds: float = 0.0
|
|
57
|
+
vector_index_stale: bool = False
|
|
54
58
|
|
|
55
59
|
def to_dict(self) -> Dict[str, Any]:
|
|
56
60
|
"""Convert to dictionary for JSON serialization."""
|
|
@@ -61,6 +65,7 @@ class ConsolidationResult:
|
|
|
61
65
|
"links_created": self.links_created,
|
|
62
66
|
"episodes_processed": self.episodes_processed,
|
|
63
67
|
"duration_seconds": self.duration_seconds,
|
|
68
|
+
"vector_index_stale": self.vector_index_stale,
|
|
64
69
|
}
|
|
65
70
|
|
|
66
71
|
|
|
@@ -131,12 +136,34 @@ class ConsolidationPipeline:
|
|
|
131
136
|
"""
|
|
132
137
|
Run the full consolidation pipeline.
|
|
133
138
|
|
|
139
|
+
Uses a file lock to prevent concurrent consolidation runs from
|
|
140
|
+
corrupting data (BUG-MEM-003 fix). If another consolidation is
|
|
141
|
+
already in progress, this call blocks until it completes.
|
|
142
|
+
|
|
134
143
|
Args:
|
|
135
144
|
since_hours: Only process episodes from the last N hours
|
|
136
145
|
|
|
137
146
|
Returns:
|
|
138
147
|
ConsolidationResult with statistics about the consolidation run
|
|
139
148
|
"""
|
|
149
|
+
lock_path = Path(self.base_path) / ".consolidation.lock"
|
|
150
|
+
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
lock_file = None
|
|
152
|
+
try:
|
|
153
|
+
lock_file = open(lock_path, "w")
|
|
154
|
+
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
|
|
155
|
+
return self._consolidate_locked(since_hours)
|
|
156
|
+
finally:
|
|
157
|
+
if lock_file is not None:
|
|
158
|
+
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
|
159
|
+
lock_file.close()
|
|
160
|
+
try:
|
|
161
|
+
lock_path.unlink()
|
|
162
|
+
except OSError:
|
|
163
|
+
pass
|
|
164
|
+
|
|
165
|
+
def _consolidate_locked(self, since_hours: int) -> ConsolidationResult:
|
|
166
|
+
"""Run the consolidation pipeline under an exclusive lock."""
|
|
140
167
|
start_time = time.time()
|
|
141
168
|
result = ConsolidationResult()
|
|
142
169
|
|
|
@@ -238,6 +265,12 @@ class ConsolidationPipeline:
|
|
|
238
265
|
self.storage.update_pattern(pattern)
|
|
239
266
|
result.links_created += len(links)
|
|
240
267
|
|
|
268
|
+
# Flag vector indices as stale when patterns changed (BUG-MEM-007).
|
|
269
|
+
# Callers should rebuild vector indices when this flag is True to
|
|
270
|
+
# ensure semantic search returns up-to-date results.
|
|
271
|
+
if result.patterns_created > 0 or result.patterns_merged > 0 or result.anti_patterns_created > 0:
|
|
272
|
+
result.vector_index_stale = True
|
|
273
|
+
|
|
241
274
|
result.duration_seconds = time.time() - start_time
|
|
242
275
|
return result
|
|
243
276
|
|
package/memory/embeddings.py
CHANGED
|
@@ -1001,10 +1001,19 @@ class EmbeddingEngine:
|
|
|
1001
1001
|
self._metrics["provider_calls"][provider_name] += 1
|
|
1002
1002
|
|
|
1003
1003
|
except Exception as e:
|
|
1004
|
-
logger.warning(
|
|
1004
|
+
logger.warning("Primary provider failed: %s, trying fallback", e)
|
|
1005
|
+
old_dimension = self.dimension
|
|
1005
1006
|
self._use_fallback()
|
|
1006
1007
|
embedding = self._primary_provider.embed(text)
|
|
1007
1008
|
embedding = self._normalize(embedding)
|
|
1009
|
+
# If dimension changed after fallback, log a warning so callers
|
|
1010
|
+
# know existing vector indices may be incompatible (BUG-MEM-006).
|
|
1011
|
+
if self.dimension != old_dimension:
|
|
1012
|
+
logger.warning(
|
|
1013
|
+
"Embedding dimension changed from %d to %d after fallback. "
|
|
1014
|
+
"Existing vector indices may need to be rebuilt.",
|
|
1015
|
+
old_dimension, self.dimension
|
|
1016
|
+
)
|
|
1008
1017
|
|
|
1009
1018
|
# Ensure proper shape and type
|
|
1010
1019
|
embedding = np.asarray(embedding, dtype=np.float32)
|
package/memory/engine.py
CHANGED
|
@@ -75,6 +75,10 @@ class MemoryEngine:
|
|
|
75
75
|
- Procedural memory: Learned action sequences (skills)
|
|
76
76
|
"""
|
|
77
77
|
|
|
78
|
+
# Supported schema versions (BUG-MEM-004 fix)
|
|
79
|
+
SUPPORTED_SCHEMA_VERSIONS = {"1.0", "1.1.0"}
|
|
80
|
+
CURRENT_SCHEMA_VERSION = "1.1.0"
|
|
81
|
+
|
|
78
82
|
def __init__(
|
|
79
83
|
self,
|
|
80
84
|
storage: Optional[MemoryStorage] = None,
|
|
@@ -99,10 +103,36 @@ class MemoryEngine:
|
|
|
99
103
|
# Lifecycle Operations
|
|
100
104
|
# -------------------------------------------------------------------------
|
|
101
105
|
|
|
106
|
+
def _validate_schema_version(self, data: Dict[str, Any], source: str) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Validate that a memory data structure has a supported schema version.
|
|
109
|
+
|
|
110
|
+
Logs a warning for unknown versions and upgrades old versions to current.
|
|
111
|
+
This prevents silent data corruption from loading incompatible formats
|
|
112
|
+
(BUG-MEM-004 fix).
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
data: Memory data dictionary (index.json, timeline.json, patterns.json, etc.)
|
|
116
|
+
source: Description of the data source (for logging)
|
|
117
|
+
"""
|
|
118
|
+
version = data.get("version")
|
|
119
|
+
if version is None:
|
|
120
|
+
# Legacy data without version -- assign current version
|
|
121
|
+
data["version"] = self.CURRENT_SCHEMA_VERSION
|
|
122
|
+
logger.info("Assigned schema version %s to %s (no version found)",
|
|
123
|
+
self.CURRENT_SCHEMA_VERSION, source)
|
|
124
|
+
elif version not in self.SUPPORTED_SCHEMA_VERSIONS:
|
|
125
|
+
logger.warning(
|
|
126
|
+
"Unsupported schema version '%s' in %s. "
|
|
127
|
+
"Supported versions: %s. Data may not load correctly.",
|
|
128
|
+
version, source, ", ".join(sorted(self.SUPPORTED_SCHEMA_VERSIONS))
|
|
129
|
+
)
|
|
130
|
+
|
|
102
131
|
def initialize(self) -> None:
|
|
103
132
|
"""
|
|
104
133
|
Initialize the memory system.
|
|
105
134
|
Ensures all required directories and files exist.
|
|
135
|
+
Validates schema versions on existing data (BUG-MEM-004).
|
|
106
136
|
"""
|
|
107
137
|
# Create directory structure
|
|
108
138
|
directories = [
|
|
@@ -116,25 +146,29 @@ class MemoryEngine:
|
|
|
116
146
|
for directory in directories:
|
|
117
147
|
self.storage.ensure_directory(directory)
|
|
118
148
|
|
|
119
|
-
# Initialize index if not exists
|
|
120
|
-
|
|
149
|
+
# Initialize index if not exists, validate schema version if it does
|
|
150
|
+
existing_index = self.storage.read_json("index.json")
|
|
151
|
+
if not existing_index:
|
|
121
152
|
self.storage.write_json(
|
|
122
153
|
"index.json",
|
|
123
154
|
{
|
|
124
|
-
"version":
|
|
155
|
+
"version": self.CURRENT_SCHEMA_VERSION,
|
|
125
156
|
"last_updated": datetime.now(timezone.utc).isoformat(),
|
|
126
157
|
"topics": [],
|
|
127
158
|
"total_memories": 0,
|
|
128
159
|
"total_tokens_available": 0,
|
|
129
160
|
},
|
|
130
161
|
)
|
|
162
|
+
else:
|
|
163
|
+
self._validate_schema_version(existing_index, "index.json")
|
|
131
164
|
|
|
132
|
-
# Initialize timeline if not exists
|
|
133
|
-
|
|
165
|
+
# Initialize timeline if not exists, validate schema version if it does
|
|
166
|
+
existing_timeline = self.storage.read_json("timeline.json")
|
|
167
|
+
if not existing_timeline:
|
|
134
168
|
self.storage.write_json(
|
|
135
169
|
"timeline.json",
|
|
136
170
|
{
|
|
137
|
-
"version":
|
|
171
|
+
"version": self.CURRENT_SCHEMA_VERSION,
|
|
138
172
|
"last_updated": datetime.now(timezone.utc).isoformat(),
|
|
139
173
|
"recent_actions": [],
|
|
140
174
|
"key_decisions": [],
|
|
@@ -145,6 +179,8 @@ class MemoryEngine:
|
|
|
145
179
|
},
|
|
146
180
|
},
|
|
147
181
|
)
|
|
182
|
+
else:
|
|
183
|
+
self._validate_schema_version(existing_timeline, "timeline.json")
|
|
148
184
|
|
|
149
185
|
# Initialize semantic patterns if not exists
|
|
150
186
|
if not self.storage.read_json("semantic/patterns.json"):
|
|
@@ -282,24 +318,33 @@ class MemoryEngine:
|
|
|
282
318
|
"""
|
|
283
319
|
Retrieve an episode by ID.
|
|
284
320
|
|
|
321
|
+
Supports multiple ID formats:
|
|
322
|
+
- ep-YYYY-MM-DD-XXX (standard from EpisodeTrace.create)
|
|
323
|
+
- {prefix}-YYYY-MM-DD-XXX (variable-length prefix)
|
|
324
|
+
- Any other format (falls back to directory scan)
|
|
325
|
+
|
|
285
326
|
Args:
|
|
286
327
|
episode_id: Episode identifier
|
|
287
328
|
|
|
288
329
|
Returns:
|
|
289
330
|
EpisodeTrace instance or None if not found
|
|
290
331
|
"""
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
332
|
+
import re
|
|
333
|
+
|
|
334
|
+
# Try to extract YYYY-MM-DD from anywhere in the episode ID.
|
|
335
|
+
# This handles variable-length prefixes (ep-, episode-, etc.)
|
|
336
|
+
# and avoids the fragile fixed-offset parsing that produced
|
|
337
|
+
# garbage paths for non-standard prefixes (BUG-MEM-001).
|
|
338
|
+
date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', episode_id)
|
|
339
|
+
if date_match:
|
|
340
|
+
date_str = date_match.group(0)
|
|
341
|
+
data = self.storage.read_json(f"episodic/{date_str}/task-{episode_id}.json")
|
|
342
|
+
if data:
|
|
343
|
+
return self._dict_to_episode(data)
|
|
298
344
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
return None
|
|
345
|
+
# Non-standard ID format or file not found at parsed path;
|
|
346
|
+
# search all directories as fallback
|
|
347
|
+
return self._search_episode(episode_id)
|
|
303
348
|
|
|
304
349
|
def get_recent_episodes(self, limit: int = 10) -> List[EpisodeTrace]:
|
|
305
350
|
"""
|
|
@@ -416,18 +461,26 @@ class MemoryEngine:
|
|
|
416
461
|
"""
|
|
417
462
|
Increment usage count for a pattern.
|
|
418
463
|
|
|
464
|
+
Uses the storage layer's pattern update which holds an exclusive lock
|
|
465
|
+
during the read-modify-write cycle, preventing TOCTOU race conditions
|
|
466
|
+
when multiple agents update patterns concurrently.
|
|
467
|
+
|
|
419
468
|
Args:
|
|
420
469
|
pattern_id: Pattern identifier
|
|
421
470
|
"""
|
|
422
|
-
|
|
471
|
+
# Load pattern via storage (which acquires read lock)
|
|
472
|
+
pattern_data = self.storage.load_pattern(pattern_id)
|
|
473
|
+
if pattern_data is None:
|
|
474
|
+
return
|
|
423
475
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
pattern["last_used"] = datetime.now(timezone.utc).isoformat()
|
|
428
|
-
break
|
|
476
|
+
# Update fields
|
|
477
|
+
pattern_data["usage_count"] = pattern_data.get("usage_count", 0) + 1
|
|
478
|
+
pattern_data["last_used"] = datetime.now(timezone.utc).isoformat()
|
|
429
479
|
|
|
430
|
-
|
|
480
|
+
# Write back via save_pattern which holds an exclusive lock during
|
|
481
|
+
# the full read-modify-write (upsert) cycle
|
|
482
|
+
pattern_obj = self._dict_to_pattern(pattern_data)
|
|
483
|
+
self.storage.save_pattern(pattern_obj)
|
|
431
484
|
|
|
432
485
|
# -------------------------------------------------------------------------
|
|
433
486
|
# Skill Operations
|
|
@@ -758,15 +811,12 @@ class MemoryEngine:
|
|
|
758
811
|
return "".join(random.choices(string.ascii_lowercase + string.digits, k=6))
|
|
759
812
|
|
|
760
813
|
def _update_timeline_with_episode(self, episode: Dict[str, Any]) -> None:
|
|
761
|
-
"""Update timeline with episode summary.
|
|
762
|
-
timeline = self.storage.read_json("timeline.json") or {
|
|
763
|
-
"version": "1.0",
|
|
764
|
-
"recent_actions": [],
|
|
765
|
-
"key_decisions": [],
|
|
766
|
-
"active_context": {},
|
|
767
|
-
}
|
|
814
|
+
"""Update timeline with episode summary.
|
|
768
815
|
|
|
769
|
-
|
|
816
|
+
Delegates to the storage layer's update_timeline method which holds
|
|
817
|
+
an exclusive lock during the read-modify-write cycle, preventing
|
|
818
|
+
concurrent timeline corruption.
|
|
819
|
+
"""
|
|
770
820
|
context = episode.get("context", {})
|
|
771
821
|
action_entry = {
|
|
772
822
|
"timestamp": episode.get("timestamp", datetime.now(timezone.utc).isoformat()),
|
|
@@ -775,12 +825,7 @@ class MemoryEngine:
|
|
|
775
825
|
"topic_id": context.get("phase", "general"),
|
|
776
826
|
}
|
|
777
827
|
|
|
778
|
-
|
|
779
|
-
timeline["recent_actions"].insert(0, action_entry)
|
|
780
|
-
timeline["recent_actions"] = timeline["recent_actions"][:50]
|
|
781
|
-
timeline["last_updated"] = datetime.now(timezone.utc).isoformat()
|
|
782
|
-
|
|
783
|
-
self.storage.write_json("timeline.json", timeline)
|
|
828
|
+
self.storage.update_timeline(action_entry)
|
|
784
829
|
|
|
785
830
|
def _update_index_with_pattern(self, pattern: Dict[str, Any]) -> None:
|
|
786
831
|
"""Update index with pattern topic."""
|
package/memory/retrieval.py
CHANGED
|
@@ -285,6 +285,10 @@ class MemoryRetrieval:
|
|
|
285
285
|
self.vector_indices = vector_indices or {}
|
|
286
286
|
self.base_path = Path(base_path)
|
|
287
287
|
self._namespace = namespace
|
|
288
|
+
# Track when indices were last built to detect staleness (BUG-MEM-002).
|
|
289
|
+
# When consolidation modifies patterns, indices become stale and should
|
|
290
|
+
# be rebuilt before the next similarity search.
|
|
291
|
+
self._indices_built_at: Optional[float] = None
|
|
288
292
|
|
|
289
293
|
@property
|
|
290
294
|
def namespace(self) -> Optional[str]:
|
|
@@ -692,6 +696,15 @@ class MemoryRetrieval:
|
|
|
692
696
|
# Multi-Modal Retrieval
|
|
693
697
|
# -------------------------------------------------------------------------
|
|
694
698
|
|
|
699
|
+
def mark_indices_stale(self) -> None:
|
|
700
|
+
"""
|
|
701
|
+
Mark vector indices as stale so they are rebuilt before next search.
|
|
702
|
+
|
|
703
|
+
Should be called after consolidation modifies the semantic memory
|
|
704
|
+
to prevent returning stale results (BUG-MEM-002 fix).
|
|
705
|
+
"""
|
|
706
|
+
self._indices_built_at = None
|
|
707
|
+
|
|
695
708
|
def retrieve_by_similarity(
|
|
696
709
|
self,
|
|
697
710
|
query: str,
|
|
@@ -702,6 +715,8 @@ class MemoryRetrieval:
|
|
|
702
715
|
Retrieve by semantic similarity using embeddings.
|
|
703
716
|
|
|
704
717
|
Falls back to keyword search if embeddings are not available.
|
|
718
|
+
Checks for index staleness and falls back to keyword search
|
|
719
|
+
if indices may be stale (BUG-MEM-002 fix).
|
|
705
720
|
|
|
706
721
|
Args:
|
|
707
722
|
query: Search query text
|
|
@@ -717,6 +732,21 @@ class MemoryRetrieval:
|
|
|
717
732
|
if collection not in self.vector_indices:
|
|
718
733
|
return self.retrieve_by_keyword(query.split(), collection)[:top_k]
|
|
719
734
|
|
|
735
|
+
# Check if indices need rebuilding after consolidation (BUG-MEM-002).
|
|
736
|
+
# If patterns.json was modified more recently than we last built
|
|
737
|
+
# indices, fall back to keyword search for accuracy.
|
|
738
|
+
if collection == "semantic" and self._indices_built_at is not None:
|
|
739
|
+
patterns_path = self.base_path / "semantic" / "patterns.json"
|
|
740
|
+
if patterns_path.exists():
|
|
741
|
+
import os
|
|
742
|
+
patterns_mtime = os.path.getmtime(patterns_path)
|
|
743
|
+
if patterns_mtime > self._indices_built_at:
|
|
744
|
+
logger.info(
|
|
745
|
+
"Semantic index is stale (patterns modified after index build). "
|
|
746
|
+
"Falling back to keyword search for accuracy."
|
|
747
|
+
)
|
|
748
|
+
return self.retrieve_by_keyword(query.split(), collection)[:top_k]
|
|
749
|
+
|
|
720
750
|
# Generate query embedding
|
|
721
751
|
query_embedding = self.embedding_engine.embed(query)
|
|
722
752
|
|
|
@@ -1254,10 +1284,13 @@ class MemoryRetrieval:
|
|
|
1254
1284
|
|
|
1255
1285
|
Reads all memories and creates vector embeddings for similarity search.
|
|
1256
1286
|
Requires embedding_engine to be configured.
|
|
1287
|
+
Records build timestamp so staleness can be detected (BUG-MEM-002).
|
|
1257
1288
|
"""
|
|
1258
1289
|
if self.embedding_engine is None:
|
|
1259
1290
|
return
|
|
1260
1291
|
|
|
1292
|
+
import time as _time
|
|
1293
|
+
|
|
1261
1294
|
# Build episodic index
|
|
1262
1295
|
if "episodic" in self.vector_indices:
|
|
1263
1296
|
self._build_episodic_index()
|
|
@@ -1274,6 +1307,9 @@ class MemoryRetrieval:
|
|
|
1274
1307
|
if "anti_patterns" in self.vector_indices:
|
|
1275
1308
|
self._build_anti_patterns_index()
|
|
1276
1309
|
|
|
1310
|
+
# Record build timestamp for staleness detection (BUG-MEM-002)
|
|
1311
|
+
self._indices_built_at = _time.time()
|
|
1312
|
+
|
|
1277
1313
|
def update_index(
|
|
1278
1314
|
self,
|
|
1279
1315
|
collection: str,
|
package/memory/storage.py
CHANGED
|
@@ -144,6 +144,8 @@ class MemoryStorage:
|
|
|
144
144
|
|
|
145
145
|
# Clean up stale lock files from previous crashed processes
|
|
146
146
|
self._cleanup_stale_locks()
|
|
147
|
+
# BUG-EP-015: Clean up orphaned temp files from kill -9 crashes
|
|
148
|
+
self._cleanup_stale_tmp_files()
|
|
147
149
|
|
|
148
150
|
def _cleanup_stale_locks(self) -> None:
|
|
149
151
|
"""Remove stale .lock files older than 5 minutes (safe with concurrent processes).
|
|
@@ -167,10 +169,46 @@ class MemoryStorage:
|
|
|
167
169
|
except OSError:
|
|
168
170
|
pass
|
|
169
171
|
|
|
172
|
+
def _cleanup_stale_tmp_files(self) -> None:
|
|
173
|
+
"""Remove orphaned .tmp files older than 5 minutes from crash recovery.
|
|
174
|
+
|
|
175
|
+
BUG-EP-015: When a process is killed with SIGKILL during an atomic
|
|
176
|
+
write, the temp file (.tmp_*.json) is left behind because the rename
|
|
177
|
+
never completes. These accumulate over time.
|
|
178
|
+
"""
|
|
179
|
+
try:
|
|
180
|
+
import time
|
|
181
|
+
now_real = time.time()
|
|
182
|
+
stale_seconds = 300 # 5 minutes
|
|
183
|
+
for tmp_file in self.base_path.rglob(".tmp_*.json"):
|
|
184
|
+
try:
|
|
185
|
+
file_mtime = tmp_file.stat().st_mtime
|
|
186
|
+
age_seconds = now_real - file_mtime
|
|
187
|
+
if age_seconds > stale_seconds:
|
|
188
|
+
tmp_file.unlink()
|
|
189
|
+
except OSError:
|
|
190
|
+
pass
|
|
191
|
+
except OSError:
|
|
192
|
+
pass
|
|
193
|
+
|
|
170
194
|
def _ensure_index(self) -> None:
|
|
171
|
-
"""Initialize index.json if it doesn't exist."""
|
|
195
|
+
"""Initialize or repair index.json if it doesn't exist or is corrupted."""
|
|
172
196
|
index_path = self.base_path / "index.json"
|
|
173
|
-
|
|
197
|
+
needs_init = not index_path.exists()
|
|
198
|
+
|
|
199
|
+
# BUG-EP-012: Check for corrupted index.json (exists but invalid JSON)
|
|
200
|
+
if not needs_init:
|
|
201
|
+
try:
|
|
202
|
+
text = index_path.read_text(encoding="utf-8", errors="replace")
|
|
203
|
+
json.loads(text)
|
|
204
|
+
except (json.JSONDecodeError, OSError):
|
|
205
|
+
import logging
|
|
206
|
+
logging.getLogger(__name__).warning(
|
|
207
|
+
"Corrupted index.json detected, recreating from scratch"
|
|
208
|
+
)
|
|
209
|
+
needs_init = True
|
|
210
|
+
|
|
211
|
+
if needs_init:
|
|
174
212
|
initial_index = {
|
|
175
213
|
"version": self.VERSION,
|
|
176
214
|
"last_updated": datetime.now(timezone.utc).isoformat(),
|
|
@@ -179,9 +217,23 @@ class MemoryStorage:
|
|
|
179
217
|
self._atomic_write(index_path, initial_index)
|
|
180
218
|
|
|
181
219
|
def _ensure_timeline(self) -> None:
|
|
182
|
-
"""Initialize timeline.json if it doesn't exist."""
|
|
220
|
+
"""Initialize or repair timeline.json if it doesn't exist or is corrupted."""
|
|
183
221
|
timeline_path = self.base_path / "timeline.json"
|
|
184
|
-
|
|
222
|
+
needs_init = not timeline_path.exists()
|
|
223
|
+
|
|
224
|
+
# BUG-EP-012: Check for corrupted timeline.json (exists but invalid JSON)
|
|
225
|
+
if not needs_init:
|
|
226
|
+
try:
|
|
227
|
+
text = timeline_path.read_text(encoding="utf-8", errors="replace")
|
|
228
|
+
json.loads(text)
|
|
229
|
+
except (json.JSONDecodeError, OSError):
|
|
230
|
+
import logging
|
|
231
|
+
logging.getLogger(__name__).warning(
|
|
232
|
+
"Corrupted timeline.json detected, recreating from scratch"
|
|
233
|
+
)
|
|
234
|
+
needs_init = True
|
|
235
|
+
|
|
236
|
+
if needs_init:
|
|
185
237
|
initial_timeline = {
|
|
186
238
|
"version": self.VERSION,
|
|
187
239
|
"last_updated": datetime.now(timezone.utc).isoformat(),
|
|
@@ -465,6 +465,12 @@ class TokenEconomics:
|
|
|
465
465
|
|
|
466
466
|
self._full_load_baseline: Optional[int] = None
|
|
467
467
|
|
|
468
|
+
# Maximum token counter value to prevent unbounded growth in very long
|
|
469
|
+
# sessions. Python ints don't overflow, but downstream JSON serializers
|
|
470
|
+
# and dashboard charts can choke on extremely large numbers.
|
|
471
|
+
# 10 billion tokens is well beyond any realistic single-session usage.
|
|
472
|
+
_MAX_TOKEN_COUNTER = 10_000_000_000
|
|
473
|
+
|
|
468
474
|
def record_discovery(self, tokens: int) -> None:
|
|
469
475
|
"""
|
|
470
476
|
Record tokens used for memory discovery/creation.
|
|
@@ -473,7 +479,10 @@ class TokenEconomics:
|
|
|
473
479
|
tokens: Number of tokens used
|
|
474
480
|
"""
|
|
475
481
|
if tokens > 0:
|
|
476
|
-
self.metrics["discovery_tokens"]
|
|
482
|
+
self.metrics["discovery_tokens"] = min(
|
|
483
|
+
self.metrics["discovery_tokens"] + tokens,
|
|
484
|
+
self._MAX_TOKEN_COUNTER,
|
|
485
|
+
)
|
|
477
486
|
|
|
478
487
|
def record_read(self, tokens: int, layer: int) -> None:
|
|
479
488
|
"""
|
|
@@ -484,7 +493,10 @@ class TokenEconomics:
|
|
|
484
493
|
layer: Memory layer accessed (1=topic, 2=summary, 3=full)
|
|
485
494
|
"""
|
|
486
495
|
if tokens > 0:
|
|
487
|
-
self.metrics["read_tokens"]
|
|
496
|
+
self.metrics["read_tokens"] = min(
|
|
497
|
+
self.metrics["read_tokens"] + tokens,
|
|
498
|
+
self._MAX_TOKEN_COUNTER,
|
|
499
|
+
)
|
|
488
500
|
|
|
489
501
|
if layer in (1, 2, 3):
|
|
490
502
|
layer_key = f"layer{layer}_loads"
|
package/memory/vector_index.py
CHANGED
|
@@ -277,11 +277,25 @@ class VectorIndex:
|
|
|
277
277
|
else:
|
|
278
278
|
embeddings_matrix = np.array([]).reshape(0, self.dimension)
|
|
279
279
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
)
|
|
280
|
+
# Write to temp file then atomically rename to prevent corruption
|
|
281
|
+
import tempfile
|
|
282
|
+
npz_path = f"{path}.npz"
|
|
283
|
+
npz_dir = os.path.dirname(npz_path) or "."
|
|
284
|
+
tmp_fd, tmp_path = tempfile.mkstemp(dir=npz_dir, suffix=".npz.tmp")
|
|
285
|
+
os.close(tmp_fd)
|
|
286
|
+
try:
|
|
287
|
+
np.savez(
|
|
288
|
+
tmp_path,
|
|
289
|
+
embeddings=embeddings_matrix,
|
|
290
|
+
dimension=np.array([self.dimension])
|
|
291
|
+
)
|
|
292
|
+
os.replace(tmp_path, npz_path)
|
|
293
|
+
except Exception:
|
|
294
|
+
try:
|
|
295
|
+
os.unlink(tmp_path)
|
|
296
|
+
except OSError:
|
|
297
|
+
pass
|
|
298
|
+
raise
|
|
285
299
|
|
|
286
300
|
# Save metadata as JSON sidecar
|
|
287
301
|
sidecar_data = {
|
|
@@ -290,8 +304,23 @@ class VectorIndex:
|
|
|
290
304
|
"dimension": self.dimension
|
|
291
305
|
}
|
|
292
306
|
|
|
293
|
-
|
|
294
|
-
|
|
307
|
+
import tempfile
|
|
308
|
+
json_path = f"{path}.json"
|
|
309
|
+
# Use atomic write to avoid corruption on crash (BUG-MEM-013 fix)
|
|
310
|
+
tmp_fd, tmp_path = tempfile.mkstemp(
|
|
311
|
+
dir=os.path.dirname(json_path) or ".",
|
|
312
|
+
suffix=".json.tmp"
|
|
313
|
+
)
|
|
314
|
+
try:
|
|
315
|
+
with os.fdopen(tmp_fd, "w", encoding="utf-8") as f:
|
|
316
|
+
json.dump(sidecar_data, f, indent=2, ensure_ascii=False)
|
|
317
|
+
os.replace(tmp_path, json_path)
|
|
318
|
+
except Exception:
|
|
319
|
+
try:
|
|
320
|
+
os.unlink(tmp_path)
|
|
321
|
+
except OSError:
|
|
322
|
+
pass
|
|
323
|
+
raise
|
|
295
324
|
|
|
296
325
|
def load(self, path: str) -> None:
|
|
297
326
|
"""
|