claude-memory-agent 2.1.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +11 -1
- package/bin/lib/banner.js +39 -0
- package/bin/lib/environment.js +166 -0
- package/bin/lib/installer.js +291 -0
- package/bin/lib/models.js +95 -0
- package/bin/lib/steps/advanced.js +101 -0
- package/bin/lib/steps/confirm.js +87 -0
- package/bin/lib/steps/model.js +57 -0
- package/bin/lib/steps/provider.js +65 -0
- package/bin/lib/steps/scope.js +59 -0
- package/bin/lib/steps/server.js +74 -0
- package/bin/lib/ui.js +75 -0
- package/bin/onboarding.js +164 -0
- package/bin/postinstall.js +22 -257
- package/config.py +103 -4
- package/dashboard.html +697 -27
- package/hooks/extract_memories.py +439 -0
- package/hooks/pre_compact_hook.py +76 -0
- package/hooks/session_end_hook.py +149 -0
- package/hooks/stop_hook.py +372 -0
- package/install.py +91 -37
- package/main.py +1636 -892
- package/mcp_server.py +451 -0
- package/package.json +14 -3
- package/requirements.txt +12 -8
- package/services/adaptive_ranker.py +272 -0
- package/services/agent_catalog.json +153 -0
- package/services/agent_registry.py +245 -730
- package/services/claude_md_sync.py +320 -4
- package/services/consolidation.py +417 -0
- package/services/database.py +586 -105
- package/services/embedding_pipeline.py +262 -0
- package/services/embeddings.py +493 -85
- package/services/memory_decay.py +408 -0
- package/services/native_memory_paths.py +86 -0
- package/services/native_memory_sync.py +496 -0
- package/services/response_manager.py +183 -0
- package/services/terminal_ui.py +199 -0
- package/services/tier_manager.py +235 -0
- package/services/websocket.py +26 -6
- package/skills/search.py +136 -61
- package/skills/session_review.py +210 -23
- package/skills/store.py +125 -18
- package/terminal_dashboard.py +474 -0
- package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
- package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
- package/hooks/__pycache__/grounding-hook.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
- package/services/__pycache__/__init__.cpython-312.pyc +0 -0
- package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
- package/services/__pycache__/auth.cpython-312.pyc +0 -0
- package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
- package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
- package/services/__pycache__/confidence.cpython-312.pyc +0 -0
- package/services/__pycache__/curator.cpython-312.pyc +0 -0
- package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
- package/services/__pycache__/database.cpython-312.pyc +0 -0
- package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
- package/services/__pycache__/insights.cpython-312.pyc +0 -0
- package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
- package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
- package/services/__pycache__/timeline.cpython-312.pyc +0 -0
- package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
- package/services/__pycache__/websocket.cpython-312.pyc +0 -0
- package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/__pycache__/admin.cpython-312.pyc +0 -0
- package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
- package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
- package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/skills/__pycache__/confidence_tracker.cpython-312.pyc +0 -0
- package/skills/__pycache__/context.cpython-312.pyc +0 -0
- package/skills/__pycache__/curator.cpython-312.pyc +0 -0
- package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
- package/skills/__pycache__/insights.cpython-312.pyc +0 -0
- package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
- package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
- package/skills/__pycache__/search.cpython-312.pyc +0 -0
- package/skills/__pycache__/session_review.cpython-312.pyc +0 -0
- package/skills/__pycache__/state.cpython-312.pyc +0 -0
- package/skills/__pycache__/store.cpython-312.pyc +0 -0
- package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
- package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
- package/skills/__pycache__/verification.cpython-312.pyc +0 -0
- package/test_automation.py +0 -221
- package/test_complete.py +0 -338
- package/test_full.py +0 -322
- package/verify_db.py +0 -134
package/services/database.py
CHANGED
|
@@ -104,7 +104,7 @@ class SQLiteConnectionPool:
|
|
|
104
104
|
self.db_path,
|
|
105
105
|
timeout=self.timeout,
|
|
106
106
|
check_same_thread=False,
|
|
107
|
-
isolation_level=
|
|
107
|
+
isolation_level="DEFERRED" # Use DEFERRED transactions for safety
|
|
108
108
|
)
|
|
109
109
|
conn.row_factory = sqlite3.Row
|
|
110
110
|
# Enable WAL mode for better concurrent read/write performance
|
|
@@ -323,6 +323,37 @@ class DatabaseService:
|
|
|
323
323
|
# Fallback for backward compatibility
|
|
324
324
|
yield self.conn
|
|
325
325
|
|
|
326
|
+
@contextmanager
|
|
327
|
+
def transaction(self, conn=None):
|
|
328
|
+
"""Context manager for transactional database operations.
|
|
329
|
+
|
|
330
|
+
Wraps a block in BEGIN/COMMIT with automatic ROLLBACK on error.
|
|
331
|
+
Prevents partial state corruption during multi-step operations.
|
|
332
|
+
|
|
333
|
+
Usage:
|
|
334
|
+
with self.transaction() as conn:
|
|
335
|
+
cursor = conn.cursor()
|
|
336
|
+
cursor.execute("INSERT ...")
|
|
337
|
+
cursor.execute("UPDATE ...")
|
|
338
|
+
# auto-committed on success, rolled back on exception
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
conn: Optional connection to use. If None, uses self.conn.
|
|
342
|
+
"""
|
|
343
|
+
use_conn = conn or self.conn
|
|
344
|
+
if use_conn is None:
|
|
345
|
+
raise ConnectionPoolError("No database connection available")
|
|
346
|
+
try:
|
|
347
|
+
use_conn.execute("BEGIN")
|
|
348
|
+
yield use_conn
|
|
349
|
+
use_conn.execute("COMMIT")
|
|
350
|
+
except Exception:
|
|
351
|
+
try:
|
|
352
|
+
use_conn.execute("ROLLBACK")
|
|
353
|
+
except Exception as rollback_err:
|
|
354
|
+
logger.error(f"Rollback failed: {rollback_err}")
|
|
355
|
+
raise
|
|
356
|
+
|
|
326
357
|
async def connect(self):
|
|
327
358
|
"""Establish database connection and initialize connection pool."""
|
|
328
359
|
try:
|
|
@@ -333,10 +364,15 @@ class DatabaseService:
|
|
|
333
364
|
timeout=DB_TIMEOUT
|
|
334
365
|
)
|
|
335
366
|
# Keep a primary connection for backward compatibility
|
|
336
|
-
self.conn = sqlite3.connect(
|
|
367
|
+
self.conn = sqlite3.connect(
|
|
368
|
+
self.db_path,
|
|
369
|
+
check_same_thread=False,
|
|
370
|
+
isolation_level="DEFERRED"
|
|
371
|
+
)
|
|
337
372
|
self.conn.row_factory = sqlite3.Row
|
|
338
373
|
# Enable WAL mode on primary connection too
|
|
339
374
|
self.conn.execute("PRAGMA journal_mode=WAL")
|
|
375
|
+
self.conn.execute("PRAGMA synchronous=NORMAL")
|
|
340
376
|
self.conn.execute("PRAGMA busy_timeout=30000")
|
|
341
377
|
logger.info(f"Database connected with pool size {DB_POOL_SIZE}")
|
|
342
378
|
except sqlite3.Error as e:
|
|
@@ -567,6 +603,7 @@ class DatabaseService:
|
|
|
567
603
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_agent ON memories(agent_type)")
|
|
568
604
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_success ON memories(success)")
|
|
569
605
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_importance ON memories(importance)")
|
|
606
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_outcome_status ON memories(outcome_status)")
|
|
570
607
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_patterns_problem ON patterns(problem_type)")
|
|
571
608
|
|
|
572
609
|
# Migration helper function
|
|
@@ -635,6 +672,66 @@ class DatabaseService:
|
|
|
635
672
|
safe_add_column("memories", "failed_in", "TEXT") # JSON array of contexts where solution failed
|
|
636
673
|
safe_add_column("memories", "context_confidence", "REAL") # Context-specific confidence score
|
|
637
674
|
|
|
675
|
+
# Migration: Add CLaRa-inspired tier columns (v2.4.0)
|
|
676
|
+
# Hierarchical memory tiers: hot (fast access), warm (compressed), cold (archive)
|
|
677
|
+
safe_add_column("memories", "tier", "TEXT DEFAULT 'hot'")
|
|
678
|
+
safe_add_column("memories", "tier_changed_at", "TEXT")
|
|
679
|
+
safe_add_column("memories", "compressed_content", "TEXT") # Compressed version for warm tier
|
|
680
|
+
|
|
681
|
+
# Tier index for fast filtering
|
|
682
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_tier ON memories(tier)")
|
|
683
|
+
|
|
684
|
+
# Migration: Consolidate legacy outcome/success into outcome_status (v2.5.0)
|
|
685
|
+
# Maps: success=1 -> 'success', success=0 -> 'failed', outcome text -> outcome_status
|
|
686
|
+
# Only updates rows that still have outcome_status='pending' and have legacy data
|
|
687
|
+
try:
|
|
688
|
+
cursor.execute("""
|
|
689
|
+
UPDATE memories
|
|
690
|
+
SET outcome_status = 'success'
|
|
691
|
+
WHERE outcome_status = 'pending'
|
|
692
|
+
AND success = 1
|
|
693
|
+
AND outcome_status != 'success'
|
|
694
|
+
""")
|
|
695
|
+
cursor.execute("""
|
|
696
|
+
UPDATE memories
|
|
697
|
+
SET outcome_status = 'failed'
|
|
698
|
+
WHERE outcome_status = 'pending'
|
|
699
|
+
AND success = 0
|
|
700
|
+
AND outcome_status != 'failed'
|
|
701
|
+
""")
|
|
702
|
+
rows_migrated = cursor.rowcount
|
|
703
|
+
if rows_migrated > 0:
|
|
704
|
+
logger.info(f"Migration: Consolidated {rows_migrated} legacy success values into outcome_status")
|
|
705
|
+
except Exception as e:
|
|
706
|
+
logger.debug(f"Legacy outcome consolidation skipped: {e}")
|
|
707
|
+
|
|
708
|
+
# Memory archive table for consolidation (v2.4.0)
|
|
709
|
+
cursor.execute("""
|
|
710
|
+
CREATE TABLE IF NOT EXISTS memory_archive (
|
|
711
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
712
|
+
original_id INTEGER,
|
|
713
|
+
type TEXT,
|
|
714
|
+
content TEXT,
|
|
715
|
+
embedding TEXT,
|
|
716
|
+
project_path TEXT,
|
|
717
|
+
session_id TEXT,
|
|
718
|
+
importance INTEGER,
|
|
719
|
+
access_count INTEGER,
|
|
720
|
+
decay_factor REAL,
|
|
721
|
+
metadata TEXT,
|
|
722
|
+
archive_reason TEXT,
|
|
723
|
+
relevance_score_at_archive REAL,
|
|
724
|
+
consolidated_into INTEGER,
|
|
725
|
+
archived_at TEXT DEFAULT (datetime('now'))
|
|
726
|
+
)
|
|
727
|
+
""")
|
|
728
|
+
# Migration: ensure memory_archive has consolidation column (v2.4.0)
|
|
729
|
+
# Older schema may lack this column
|
|
730
|
+
safe_add_column("memory_archive", "consolidated_into", "INTEGER")
|
|
731
|
+
|
|
732
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_archive_original ON memory_archive(original_id)")
|
|
733
|
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_archive_consolidated ON memory_archive(consolidated_into)")
|
|
734
|
+
|
|
638
735
|
# ============================================================
|
|
639
736
|
# SESSION TIMELINE TABLES (Anti-Hallucination Layer)
|
|
640
737
|
# ============================================================
|
|
@@ -890,32 +987,6 @@ class DatabaseService:
|
|
|
890
987
|
# MEMORY CLEANUP AND ARCHIVAL TABLES
|
|
891
988
|
# ============================================================
|
|
892
989
|
|
|
893
|
-
# Archived memories (soft-deleted for recovery)
|
|
894
|
-
cursor.execute("""
|
|
895
|
-
CREATE TABLE IF NOT EXISTS memory_archive (
|
|
896
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
897
|
-
original_id INTEGER NOT NULL,
|
|
898
|
-
|
|
899
|
-
-- Original memory data
|
|
900
|
-
type TEXT NOT NULL,
|
|
901
|
-
content TEXT NOT NULL,
|
|
902
|
-
embedding TEXT,
|
|
903
|
-
project_path TEXT,
|
|
904
|
-
session_id TEXT,
|
|
905
|
-
importance INTEGER,
|
|
906
|
-
access_count INTEGER,
|
|
907
|
-
decay_factor REAL,
|
|
908
|
-
metadata TEXT,
|
|
909
|
-
|
|
910
|
-
-- Archive metadata
|
|
911
|
-
archive_reason TEXT NOT NULL,
|
|
912
|
-
archived_at TEXT DEFAULT (datetime('now')),
|
|
913
|
-
archived_by TEXT,
|
|
914
|
-
relevance_score_at_archive REAL,
|
|
915
|
-
expires_at TEXT
|
|
916
|
-
)
|
|
917
|
-
""")
|
|
918
|
-
|
|
919
990
|
# Cleanup configuration per project
|
|
920
991
|
cursor.execute("""
|
|
921
992
|
CREATE TABLE IF NOT EXISTS cleanup_config (
|
|
@@ -1125,10 +1196,31 @@ class DatabaseService:
|
|
|
1125
1196
|
self.conn.commit()
|
|
1126
1197
|
|
|
1127
1198
|
def _serialize_embedding(self, embedding: List[float]) -> str:
|
|
1128
|
-
|
|
1199
|
+
"""Serialize embedding to binary format (base64-encoded struct pack).
|
|
1200
|
+
|
|
1201
|
+
Uses 'b64:' prefix to distinguish from legacy JSON format.
|
|
1202
|
+
~30-35% smaller than JSON serialization.
|
|
1203
|
+
"""
|
|
1204
|
+
import struct
|
|
1205
|
+
import base64
|
|
1206
|
+
packed = struct.pack(f'{len(embedding)}f', *embedding)
|
|
1207
|
+
return 'b64:' + base64.b64encode(packed).decode('ascii')
|
|
1129
1208
|
|
|
1130
1209
|
def _deserialize_embedding(self, embedding_str: str) -> List[float]:
|
|
1131
|
-
|
|
1210
|
+
"""Deserialize embedding from binary or JSON format.
|
|
1211
|
+
|
|
1212
|
+
Auto-detects format: 'b64:' prefix = binary, otherwise JSON.
|
|
1213
|
+
Backward compatible with existing JSON-serialized embeddings.
|
|
1214
|
+
"""
|
|
1215
|
+
if not embedding_str:
|
|
1216
|
+
return []
|
|
1217
|
+
if embedding_str.startswith('b64:'):
|
|
1218
|
+
import struct
|
|
1219
|
+
import base64
|
|
1220
|
+
raw = base64.b64decode(embedding_str[4:])
|
|
1221
|
+
count = len(raw) // 4 # 4 bytes per float32
|
|
1222
|
+
return list(struct.unpack(f'{count}f', raw))
|
|
1223
|
+
return json.loads(embedding_str)
|
|
1132
1224
|
|
|
1133
1225
|
def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
|
|
1134
1226
|
a = np.array(vec1)
|
|
@@ -1193,6 +1285,59 @@ class DatabaseService:
|
|
|
1193
1285
|
|
|
1194
1286
|
return round(score, 4)
|
|
1195
1287
|
|
|
1288
|
+
async def migrate_embeddings_to_binary(self, batch_size: int = 100) -> Dict[str, int]:
|
|
1289
|
+
"""Migrate existing JSON-serialized embeddings to binary format.
|
|
1290
|
+
|
|
1291
|
+
Processes in batches to avoid lock contention. Safe to run multiple times.
|
|
1292
|
+
|
|
1293
|
+
Returns:
|
|
1294
|
+
Dict with 'migrated', 'skipped', 'errors' counts.
|
|
1295
|
+
"""
|
|
1296
|
+
import struct
|
|
1297
|
+
import base64
|
|
1298
|
+
cursor = self.conn.cursor()
|
|
1299
|
+
migrated = 0
|
|
1300
|
+
skipped = 0
|
|
1301
|
+
errors = 0
|
|
1302
|
+
|
|
1303
|
+
for table in ['memories', 'patterns', 'timeline_events']:
|
|
1304
|
+
try:
|
|
1305
|
+
cursor.execute(f"SELECT id, embedding FROM {table} WHERE embedding IS NOT NULL")
|
|
1306
|
+
except Exception:
|
|
1307
|
+
continue
|
|
1308
|
+
|
|
1309
|
+
batch = []
|
|
1310
|
+
for row in cursor.fetchall():
|
|
1311
|
+
emb_str = row['embedding']
|
|
1312
|
+
if not emb_str or emb_str.startswith('b64:'):
|
|
1313
|
+
skipped += 1
|
|
1314
|
+
continue
|
|
1315
|
+
try:
|
|
1316
|
+
floats = json.loads(emb_str)
|
|
1317
|
+
packed = struct.pack(f'{len(floats)}f', *floats)
|
|
1318
|
+
new_val = 'b64:' + base64.b64encode(packed).decode('ascii')
|
|
1319
|
+
batch.append((new_val, row['id']))
|
|
1320
|
+
except Exception:
|
|
1321
|
+
errors += 1
|
|
1322
|
+
continue
|
|
1323
|
+
|
|
1324
|
+
if len(batch) >= batch_size:
|
|
1325
|
+
cursor.executemany(
|
|
1326
|
+
f"UPDATE {table} SET embedding = ? WHERE id = ?", batch
|
|
1327
|
+
)
|
|
1328
|
+
self.conn.commit()
|
|
1329
|
+
migrated += len(batch)
|
|
1330
|
+
batch = []
|
|
1331
|
+
|
|
1332
|
+
if batch:
|
|
1333
|
+
cursor.executemany(
|
|
1334
|
+
f"UPDATE {table} SET embedding = ? WHERE id = ?", batch
|
|
1335
|
+
)
|
|
1336
|
+
self.conn.commit()
|
|
1337
|
+
migrated += len(batch)
|
|
1338
|
+
|
|
1339
|
+
return {"migrated": migrated, "skipped": skipped, "errors": errors}
|
|
1340
|
+
|
|
1196
1341
|
async def update_access_stats(self, memory_id: int):
|
|
1197
1342
|
"""Update access statistics for a memory."""
|
|
1198
1343
|
cursor = self.conn.cursor()
|
|
@@ -1401,53 +1546,53 @@ class DatabaseService:
|
|
|
1401
1546
|
# Clamp confidence to valid range
|
|
1402
1547
|
confidence = max(0.0, min(1.0, confidence))
|
|
1403
1548
|
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1549
|
+
with self.transaction() as conn:
|
|
1550
|
+
cursor = conn.cursor()
|
|
1551
|
+
cursor.execute(
|
|
1552
|
+
"""
|
|
1553
|
+
INSERT INTO memories (
|
|
1554
|
+
type, content, embedding, metadata,
|
|
1555
|
+
project_path, project_name, project_type, tech_stack,
|
|
1556
|
+
session_id, chat_id,
|
|
1557
|
+
agent_type, skill_used, tools_used,
|
|
1558
|
+
outcome, success,
|
|
1559
|
+
tags, importance, confidence,
|
|
1560
|
+
outcome_status, fixed, did_not_fix, caused, superseded_by,
|
|
1561
|
+
worked_in, failed_in, context_confidence
|
|
1562
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1563
|
+
""",
|
|
1564
|
+
(
|
|
1565
|
+
memory_type,
|
|
1566
|
+
content,
|
|
1567
|
+
self._serialize_embedding(embedding),
|
|
1568
|
+
json.dumps(metadata or {}),
|
|
1569
|
+
project_path,
|
|
1570
|
+
project_name,
|
|
1571
|
+
project_type,
|
|
1572
|
+
json.dumps(tech_stack) if tech_stack else None,
|
|
1573
|
+
session_id,
|
|
1574
|
+
chat_id,
|
|
1575
|
+
agent_type,
|
|
1576
|
+
skill_used,
|
|
1577
|
+
json.dumps(tools_used) if tools_used else None,
|
|
1578
|
+
outcome,
|
|
1579
|
+
1 if success else (0 if success is False else None),
|
|
1580
|
+
json.dumps(tags) if tags else None,
|
|
1581
|
+
importance,
|
|
1582
|
+
confidence,
|
|
1583
|
+
outcome_status,
|
|
1584
|
+
json.dumps(fixed) if fixed else None,
|
|
1585
|
+
json.dumps(did_not_fix) if did_not_fix else None,
|
|
1586
|
+
json.dumps(caused) if caused else None,
|
|
1587
|
+
superseded_by,
|
|
1588
|
+
json.dumps(worked_in) if worked_in else None,
|
|
1589
|
+
json.dumps(failed_in) if failed_in else None,
|
|
1590
|
+
context_confidence
|
|
1591
|
+
)
|
|
1445
1592
|
)
|
|
1446
|
-
|
|
1447
|
-
self.conn.commit()
|
|
1448
|
-
memory_id = cursor.lastrowid
|
|
1593
|
+
memory_id = cursor.lastrowid
|
|
1449
1594
|
|
|
1450
|
-
# Add to FAISS index if available
|
|
1595
|
+
# Add to FAISS index if available (outside transaction - index is in-memory)
|
|
1451
1596
|
if self._memories_index and embedding:
|
|
1452
1597
|
self._memories_index.add(memory_id, embedding)
|
|
1453
1598
|
|
|
@@ -1469,7 +1614,10 @@ class DatabaseService:
|
|
|
1469
1614
|
include_unreliable: bool = False,
|
|
1470
1615
|
outcome_status: Optional[str] = None,
|
|
1471
1616
|
# Context-aware search
|
|
1472
|
-
current_context: Optional[Dict[str, Any]] = None
|
|
1617
|
+
current_context: Optional[Dict[str, Any]] = None,
|
|
1618
|
+
# Adaptive ranking
|
|
1619
|
+
query_text: Optional[str] = None,
|
|
1620
|
+
temperature: Optional[float] = None
|
|
1473
1621
|
) -> List[Dict[str, Any]]:
|
|
1474
1622
|
"""Search for similar memories with optional filters.
|
|
1475
1623
|
|
|
@@ -1653,13 +1801,28 @@ class DatabaseService:
|
|
|
1653
1801
|
"metadata": json.loads(row["metadata"]) if row["metadata"] else {}
|
|
1654
1802
|
})
|
|
1655
1803
|
|
|
1656
|
-
#
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1804
|
+
# Compute decay multipliers
|
|
1805
|
+
from services.memory_decay import calculate_search_decay_multiplier
|
|
1806
|
+
for r in results:
|
|
1807
|
+
r["_decay_multiplier"] = calculate_search_decay_multiplier(r)
|
|
1808
|
+
|
|
1809
|
+
# Adaptive ranking: multi-signal scoring with temperature control
|
|
1810
|
+
try:
|
|
1811
|
+
from services.adaptive_ranker import AdaptiveRanker
|
|
1812
|
+
ranker = AdaptiveRanker(temperature=temperature)
|
|
1813
|
+
results = ranker.rank_results(
|
|
1814
|
+
results,
|
|
1815
|
+
query_text=query_text or '',
|
|
1816
|
+
temperature=temperature
|
|
1817
|
+
)
|
|
1818
|
+
except ImportError:
|
|
1819
|
+
# Fallback to original static formula if adaptive ranker unavailable
|
|
1820
|
+
results.sort(
|
|
1821
|
+
key=lambda x: (
|
|
1822
|
+
(x["similarity"] * 0.7) + (x["confidence"] * 0.3) + x.get("context_adjustment", 0.0)
|
|
1823
|
+
) * x.get("_decay_multiplier", 1.0),
|
|
1824
|
+
reverse=True
|
|
1825
|
+
)
|
|
1663
1826
|
|
|
1664
1827
|
# Update last_accessed for returned results
|
|
1665
1828
|
if results:
|
|
@@ -1807,9 +1970,15 @@ class DatabaseService:
|
|
|
1807
1970
|
"metadata": json.loads(row["metadata"]) if row["metadata"] else {}
|
|
1808
1971
|
})
|
|
1809
1972
|
|
|
1810
|
-
# Sort by combined score including outcome boost
|
|
1973
|
+
# Sort by combined score including outcome boost, context adjustment, and decay
|
|
1974
|
+
from services.memory_decay import calculate_search_decay_multiplier
|
|
1975
|
+
for r in results:
|
|
1976
|
+
r["_decay_multiplier"] = calculate_search_decay_multiplier(r)
|
|
1977
|
+
|
|
1811
1978
|
results.sort(
|
|
1812
|
-
key=lambda x: (
|
|
1979
|
+
key=lambda x: (
|
|
1980
|
+
(x["similarity"] * 0.7) + (x["confidence"] * 0.3) + x.get("context_adjustment", 0.0)
|
|
1981
|
+
) * x.get("outcome_boost", 1.0) * x.get("_decay_multiplier", 1.0),
|
|
1813
1982
|
reverse=True
|
|
1814
1983
|
)
|
|
1815
1984
|
|
|
@@ -1870,6 +2039,185 @@ class DatabaseService:
|
|
|
1870
2039
|
"message": f"Confidence updated from {old_confidence:.3f} to {confidence:.3f}"
|
|
1871
2040
|
}
|
|
1872
2041
|
|
|
2042
|
+
async def find_similar_for_dedup(
|
|
2043
|
+
self,
|
|
2044
|
+
embedding: List[float],
|
|
2045
|
+
project_path: Optional[str] = None,
|
|
2046
|
+
threshold: float = 0.92,
|
|
2047
|
+
limit: int = 3
|
|
2048
|
+
) -> List[Dict[str, Any]]:
|
|
2049
|
+
"""Lightweight similarity search specifically for dedup at ingest time.
|
|
2050
|
+
|
|
2051
|
+
Optimized for speed: returns minimal fields (id, content length,
|
|
2052
|
+
importance, confidence, similarity) needed for merge decisions.
|
|
2053
|
+
|
|
2054
|
+
Args:
|
|
2055
|
+
embedding: The embedding vector of the new content
|
|
2056
|
+
project_path: Only check within the same project (required for scoping)
|
|
2057
|
+
threshold: Minimum cosine similarity to consider a duplicate
|
|
2058
|
+
limit: Maximum number of matches to return
|
|
2059
|
+
|
|
2060
|
+
Returns:
|
|
2061
|
+
List of dicts with id, content, importance, confidence, similarity,
|
|
2062
|
+
sorted by similarity descending. Empty list if no matches.
|
|
2063
|
+
"""
|
|
2064
|
+
# Normalize project path for consistent matching
|
|
2065
|
+
project_path = normalize_path(project_path)
|
|
2066
|
+
|
|
2067
|
+
# Ensure indexes are initialized
|
|
2068
|
+
await self._init_vector_indexes()
|
|
2069
|
+
|
|
2070
|
+
cursor = self.conn.cursor()
|
|
2071
|
+
|
|
2072
|
+
# Try FAISS index first for fast search
|
|
2073
|
+
if self._memories_index and self._memories_index.size() > 0:
|
|
2074
|
+
# Get candidates from FAISS (search broadly, filter by project after)
|
|
2075
|
+
candidate_limit = limit * 10 # Over-fetch to account for project filtering
|
|
2076
|
+
candidates = self._memories_index.search(
|
|
2077
|
+
query_embedding=embedding,
|
|
2078
|
+
k=candidate_limit,
|
|
2079
|
+
threshold=threshold
|
|
2080
|
+
)
|
|
2081
|
+
|
|
2082
|
+
if candidates:
|
|
2083
|
+
candidate_ids = [c[0] for c in candidates]
|
|
2084
|
+
similarity_map = {c[0]: c[1] for c in candidates}
|
|
2085
|
+
|
|
2086
|
+
placeholders = ",".join("?" * len(candidate_ids))
|
|
2087
|
+
query = f"""
|
|
2088
|
+
SELECT id, content, importance, confidence
|
|
2089
|
+
FROM memories
|
|
2090
|
+
WHERE id IN ({placeholders})
|
|
2091
|
+
"""
|
|
2092
|
+
params = list(candidate_ids)
|
|
2093
|
+
|
|
2094
|
+
if project_path:
|
|
2095
|
+
query += " AND project_path = ?"
|
|
2096
|
+
params.append(project_path)
|
|
2097
|
+
|
|
2098
|
+
# Exclude already-failed memories from dedup
|
|
2099
|
+
query += " AND (outcome_status IS NULL OR outcome_status != 'failed')"
|
|
2100
|
+
query += " AND (failure_count IS NULL OR failure_count < 3)"
|
|
2101
|
+
|
|
2102
|
+
cursor.execute(query, params)
|
|
2103
|
+
rows = cursor.fetchall()
|
|
2104
|
+
|
|
2105
|
+
results = []
|
|
2106
|
+
for row in rows:
|
|
2107
|
+
similarity = similarity_map.get(row["id"], 0)
|
|
2108
|
+
if similarity >= threshold:
|
|
2109
|
+
results.append({
|
|
2110
|
+
"id": row["id"],
|
|
2111
|
+
"content": row["content"],
|
|
2112
|
+
"importance": row["importance"],
|
|
2113
|
+
"confidence": row["confidence"] if row["confidence"] is not None else 0.5,
|
|
2114
|
+
"similarity": similarity
|
|
2115
|
+
})
|
|
2116
|
+
|
|
2117
|
+
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
2118
|
+
return results[:limit]
|
|
2119
|
+
|
|
2120
|
+
# Fallback: numpy-based linear scan (only within project for speed)
|
|
2121
|
+
query = """
|
|
2122
|
+
SELECT id, content, embedding, importance, confidence
|
|
2123
|
+
FROM memories
|
|
2124
|
+
WHERE embedding IS NOT NULL
|
|
2125
|
+
AND (outcome_status IS NULL OR outcome_status != 'failed')
|
|
2126
|
+
AND (failure_count IS NULL OR failure_count < 3)
|
|
2127
|
+
"""
|
|
2128
|
+
params = []
|
|
2129
|
+
|
|
2130
|
+
if project_path:
|
|
2131
|
+
query += " AND project_path = ?"
|
|
2132
|
+
params.append(project_path)
|
|
2133
|
+
|
|
2134
|
+
cursor.execute(query, params)
|
|
2135
|
+
rows = cursor.fetchall()
|
|
2136
|
+
|
|
2137
|
+
results = []
|
|
2138
|
+
for row in rows:
|
|
2139
|
+
stored_embedding = self._deserialize_embedding(row["embedding"])
|
|
2140
|
+
if stored_embedding:
|
|
2141
|
+
similarity = self._cosine_similarity(embedding, stored_embedding)
|
|
2142
|
+
if similarity >= threshold:
|
|
2143
|
+
results.append({
|
|
2144
|
+
"id": row["id"],
|
|
2145
|
+
"content": row["content"],
|
|
2146
|
+
"importance": row["importance"],
|
|
2147
|
+
"confidence": row["confidence"] if row["confidence"] is not None else 0.5,
|
|
2148
|
+
"similarity": similarity
|
|
2149
|
+
})
|
|
2150
|
+
|
|
2151
|
+
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
2152
|
+
return results[:limit]
|
|
2153
|
+
|
|
2154
|
+
async def merge_memory(
|
|
2155
|
+
self,
|
|
2156
|
+
existing_id: int,
|
|
2157
|
+
new_content: str,
|
|
2158
|
+
new_importance: int,
|
|
2159
|
+
new_confidence: float
|
|
2160
|
+
) -> int:
|
|
2161
|
+
"""Merge new content into an existing memory (dedup merge).
|
|
2162
|
+
|
|
2163
|
+
Keeps the longer content, takes the higher importance and confidence,
|
|
2164
|
+
increments access_count, and updates the timestamp.
|
|
2165
|
+
|
|
2166
|
+
Args:
|
|
2167
|
+
existing_id: ID of the existing memory to merge into
|
|
2168
|
+
new_content: Content from the new (duplicate) memory
|
|
2169
|
+
new_importance: Importance from the new memory
|
|
2170
|
+
new_confidence: Confidence from the new memory
|
|
2171
|
+
|
|
2172
|
+
Returns:
|
|
2173
|
+
The existing memory ID that was updated
|
|
2174
|
+
"""
|
|
2175
|
+
with self.transaction() as conn:
|
|
2176
|
+
cursor = conn.cursor()
|
|
2177
|
+
|
|
2178
|
+
cursor.execute(
|
|
2179
|
+
"SELECT id, content, importance, confidence FROM memories WHERE id = ?",
|
|
2180
|
+
[existing_id]
|
|
2181
|
+
)
|
|
2182
|
+
row = cursor.fetchone()
|
|
2183
|
+
|
|
2184
|
+
if not row:
|
|
2185
|
+
raise ValueError(f"Memory with ID {existing_id} not found for merge")
|
|
2186
|
+
|
|
2187
|
+
# Keep the longer content (more detail is better)
|
|
2188
|
+
merged_content = new_content if len(new_content) > len(row["content"]) else row["content"]
|
|
2189
|
+
|
|
2190
|
+
# Take the higher importance and confidence
|
|
2191
|
+
merged_importance = max(new_importance, row["importance"] or 0)
|
|
2192
|
+
merged_confidence = max(
|
|
2193
|
+
new_confidence,
|
|
2194
|
+
row["confidence"] if row["confidence"] is not None else 0.5
|
|
2195
|
+
)
|
|
2196
|
+
merged_confidence = max(0.0, min(1.0, merged_confidence))
|
|
2197
|
+
|
|
2198
|
+
cursor.execute(
|
|
2199
|
+
"""
|
|
2200
|
+
UPDATE memories
|
|
2201
|
+
SET content = ?,
|
|
2202
|
+
importance = ?,
|
|
2203
|
+
confidence = ?,
|
|
2204
|
+
access_count = COALESCE(access_count, 0) + 1,
|
|
2205
|
+
updated_at = datetime('now'),
|
|
2206
|
+
last_accessed = datetime('now')
|
|
2207
|
+
WHERE id = ?
|
|
2208
|
+
""",
|
|
2209
|
+
[merged_content, merged_importance, merged_confidence, existing_id]
|
|
2210
|
+
)
|
|
2211
|
+
|
|
2212
|
+
logger.info(
|
|
2213
|
+
f"Merged memory into #{existing_id}: "
|
|
2214
|
+
f"importance {row['importance']}->{merged_importance}, "
|
|
2215
|
+
f"confidence {row['confidence']}->{merged_confidence:.2f}, "
|
|
2216
|
+
f"content_len {len(row['content'])}->{len(merged_content)}"
|
|
2217
|
+
)
|
|
2218
|
+
|
|
2219
|
+
return existing_id
|
|
2220
|
+
|
|
1873
2221
|
async def keyword_search(
|
|
1874
2222
|
self,
|
|
1875
2223
|
query: str,
|
|
@@ -3144,28 +3492,30 @@ class DatabaseService:
|
|
|
3144
3492
|
return {"success": False, "error": f"Target memory {target_id} not found"}
|
|
3145
3493
|
|
|
3146
3494
|
try:
|
|
3147
|
-
|
|
3148
|
-
|
|
3149
|
-
|
|
3150
|
-
|
|
3151
|
-
|
|
3152
|
-
|
|
3153
|
-
|
|
3154
|
-
|
|
3155
|
-
|
|
3156
|
-
|
|
3157
|
-
|
|
3158
|
-
|
|
3159
|
-
|
|
3160
|
-
|
|
3495
|
+
with self.transaction() as conn:
|
|
3496
|
+
tx_cursor = conn.cursor()
|
|
3497
|
+
tx_cursor.execute("""
|
|
3498
|
+
INSERT INTO memory_relationships (source_id, target_id, relationship, strength)
|
|
3499
|
+
VALUES (?, ?, ?, ?)
|
|
3500
|
+
""", (source_id, target_id, relationship, strength))
|
|
3501
|
+
|
|
3502
|
+
return {
|
|
3503
|
+
"success": True,
|
|
3504
|
+
"id": tx_cursor.lastrowid,
|
|
3505
|
+
"source_id": source_id,
|
|
3506
|
+
"target_id": target_id,
|
|
3507
|
+
"relationship": relationship,
|
|
3508
|
+
"strength": strength
|
|
3509
|
+
}
|
|
3161
3510
|
except sqlite3.IntegrityError:
|
|
3162
3511
|
# Relationship already exists, update strength
|
|
3163
|
-
|
|
3164
|
-
|
|
3165
|
-
|
|
3166
|
-
|
|
3167
|
-
|
|
3168
|
-
|
|
3512
|
+
with self.transaction() as conn:
|
|
3513
|
+
tx_cursor = conn.cursor()
|
|
3514
|
+
tx_cursor.execute("""
|
|
3515
|
+
UPDATE memory_relationships
|
|
3516
|
+
SET strength = ?, created_at = CURRENT_TIMESTAMP
|
|
3517
|
+
WHERE source_id = ? AND target_id = ? AND relationship = ?
|
|
3518
|
+
""", (strength, source_id, target_id, relationship))
|
|
3169
3519
|
|
|
3170
3520
|
return {
|
|
3171
3521
|
"success": True,
|
|
@@ -3258,6 +3608,137 @@ class DatabaseService:
|
|
|
3258
3608
|
await traverse(memory_id, 1)
|
|
3259
3609
|
return results
|
|
3260
3610
|
|
|
3611
|
+
async def get_related_memories_batch(
|
|
3612
|
+
self,
|
|
3613
|
+
memory_ids: List[int],
|
|
3614
|
+
relationship: str = None,
|
|
3615
|
+
direction: str = 'both'
|
|
3616
|
+
) -> Dict[int, list]:
|
|
3617
|
+
"""Get related memories for multiple IDs in a single query batch.
|
|
3618
|
+
|
|
3619
|
+
More efficient than calling get_related_memories() in a loop because
|
|
3620
|
+
it uses IN (...) clauses instead of individual queries per memory_id.
|
|
3621
|
+
|
|
3622
|
+
Args:
|
|
3623
|
+
memory_ids: List of memory IDs to find relationships for
|
|
3624
|
+
relationship: Optional filter by relationship type
|
|
3625
|
+
direction: 'outgoing', 'incoming', or 'both'
|
|
3626
|
+
|
|
3627
|
+
Returns:
|
|
3628
|
+
Dict mapping memory_id -> list of related memories
|
|
3629
|
+
"""
|
|
3630
|
+
if not memory_ids:
|
|
3631
|
+
return {}
|
|
3632
|
+
|
|
3633
|
+
cursor = self.conn.cursor()
|
|
3634
|
+
results = {mid: [] for mid in memory_ids}
|
|
3635
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
3636
|
+
|
|
3637
|
+
queries = []
|
|
3638
|
+
if direction in ('outgoing', 'both'):
|
|
3639
|
+
q = f"""
|
|
3640
|
+
SELECT mr.source_id, mr.target_id as related_id, mr.relationship,
|
|
3641
|
+
mr.strength, 'outgoing' as direction,
|
|
3642
|
+
m.type, m.content, m.project_path, m.importance, m.created_at
|
|
3643
|
+
FROM memory_relationships mr
|
|
3644
|
+
JOIN memories m ON m.id = mr.target_id
|
|
3645
|
+
WHERE mr.source_id IN ({placeholders})
|
|
3646
|
+
"""
|
|
3647
|
+
params = list(memory_ids)
|
|
3648
|
+
if relationship:
|
|
3649
|
+
q += " AND mr.relationship = ?"
|
|
3650
|
+
params.append(relationship)
|
|
3651
|
+
queries.append((q, params, 'source_id'))
|
|
3652
|
+
|
|
3653
|
+
if direction in ('incoming', 'both'):
|
|
3654
|
+
q = f"""
|
|
3655
|
+
SELECT mr.target_id, mr.source_id as related_id, mr.relationship,
|
|
3656
|
+
mr.strength, 'incoming' as direction,
|
|
3657
|
+
m.type, m.content, m.project_path, m.importance, m.created_at
|
|
3658
|
+
FROM memory_relationships mr
|
|
3659
|
+
JOIN memories m ON m.id = mr.source_id
|
|
3660
|
+
WHERE mr.target_id IN ({placeholders})
|
|
3661
|
+
"""
|
|
3662
|
+
params = list(memory_ids)
|
|
3663
|
+
if relationship:
|
|
3664
|
+
q += " AND mr.relationship = ?"
|
|
3665
|
+
params.append(relationship)
|
|
3666
|
+
queries.append((q, params, 'target_id'))
|
|
3667
|
+
|
|
3668
|
+
for query, params, id_col in queries:
|
|
3669
|
+
cursor.execute(query, params)
|
|
3670
|
+
for row in cursor.fetchall():
|
|
3671
|
+
owner_id = row[id_col]
|
|
3672
|
+
if owner_id in results:
|
|
3673
|
+
results[owner_id].append({
|
|
3674
|
+
"id": row["related_id"],
|
|
3675
|
+
"relationship": row["relationship"],
|
|
3676
|
+
"strength": row["strength"],
|
|
3677
|
+
"direction": row["direction"],
|
|
3678
|
+
"type": row["type"],
|
|
3679
|
+
"content": row["content"][:200] + "..." if len(row["content"]) > 200 else row["content"],
|
|
3680
|
+
"project_path": row["project_path"],
|
|
3681
|
+
"importance": row["importance"],
|
|
3682
|
+
"created_at": row["created_at"]
|
|
3683
|
+
})
|
|
3684
|
+
|
|
3685
|
+
return results
|
|
3686
|
+
|
|
3687
|
+
async def find_contradictions_batch(self, memory_ids: List[int]) -> Dict[int, list]:
|
|
3688
|
+
"""Find contradictions for multiple memories in a single query.
|
|
3689
|
+
|
|
3690
|
+
Args:
|
|
3691
|
+
memory_ids: List of memory IDs
|
|
3692
|
+
|
|
3693
|
+
Returns:
|
|
3694
|
+
Dict mapping memory_id -> list of contradicting memories
|
|
3695
|
+
"""
|
|
3696
|
+
if not memory_ids:
|
|
3697
|
+
return {}
|
|
3698
|
+
|
|
3699
|
+
cursor = self.conn.cursor()
|
|
3700
|
+
results = {mid: [] for mid in memory_ids}
|
|
3701
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
3702
|
+
|
|
3703
|
+
# Check both directions of contradiction relationships
|
|
3704
|
+
cursor.execute(f"""
|
|
3705
|
+
SELECT mr.source_id, mr.target_id, mr.strength,
|
|
3706
|
+
m.id as related_id, m.type, m.content, m.project_path
|
|
3707
|
+
FROM memory_relationships mr
|
|
3708
|
+
JOIN memories m ON m.id = mr.target_id
|
|
3709
|
+
WHERE mr.relationship = 'contradicts'
|
|
3710
|
+
AND mr.source_id IN ({placeholders})
|
|
3711
|
+
""", memory_ids)
|
|
3712
|
+
|
|
3713
|
+
for row in cursor.fetchall():
|
|
3714
|
+
src = row["source_id"]
|
|
3715
|
+
if src in results:
|
|
3716
|
+
results[src].append({
|
|
3717
|
+
"id": row["related_id"],
|
|
3718
|
+
"content": row["content"][:200],
|
|
3719
|
+
"type": row["type"]
|
|
3720
|
+
})
|
|
3721
|
+
|
|
3722
|
+
cursor.execute(f"""
|
|
3723
|
+
SELECT mr.target_id, mr.source_id, mr.strength,
|
|
3724
|
+
m.id as related_id, m.type, m.content, m.project_path
|
|
3725
|
+
FROM memory_relationships mr
|
|
3726
|
+
JOIN memories m ON m.id = mr.source_id
|
|
3727
|
+
WHERE mr.relationship = 'contradicts'
|
|
3728
|
+
AND mr.target_id IN ({placeholders})
|
|
3729
|
+
""", memory_ids)
|
|
3730
|
+
|
|
3731
|
+
for row in cursor.fetchall():
|
|
3732
|
+
tgt = row["target_id"]
|
|
3733
|
+
if tgt in results:
|
|
3734
|
+
results[tgt].append({
|
|
3735
|
+
"id": row["related_id"],
|
|
3736
|
+
"content": row["content"][:200],
|
|
3737
|
+
"type": row["type"]
|
|
3738
|
+
})
|
|
3739
|
+
|
|
3740
|
+
return results
|
|
3741
|
+
|
|
3261
3742
|
async def get_causal_chain(self, memory_id: int, max_depth: int = 5) -> dict:
|
|
3262
3743
|
"""Traverse the fixes/caused_by chain to find root cause and all fixes.
|
|
3263
3744
|
|