claude-memory-agent 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/bin/cli.js +11 -1
  2. package/bin/lib/banner.js +39 -0
  3. package/bin/lib/environment.js +166 -0
  4. package/bin/lib/installer.js +291 -0
  5. package/bin/lib/models.js +95 -0
  6. package/bin/lib/steps/advanced.js +101 -0
  7. package/bin/lib/steps/confirm.js +87 -0
  8. package/bin/lib/steps/model.js +57 -0
  9. package/bin/lib/steps/provider.js +65 -0
  10. package/bin/lib/steps/scope.js +59 -0
  11. package/bin/lib/steps/server.js +74 -0
  12. package/bin/lib/ui.js +75 -0
  13. package/bin/onboarding.js +164 -0
  14. package/bin/postinstall.js +22 -257
  15. package/config.py +103 -4
  16. package/dashboard.html +697 -27
  17. package/hooks/extract_memories.py +439 -0
  18. package/hooks/pre_compact_hook.py +76 -0
  19. package/hooks/session_end_hook.py +149 -0
  20. package/hooks/stop_hook.py +372 -0
  21. package/install.py +91 -37
  22. package/main.py +1636 -892
  23. package/mcp_server.py +451 -0
  24. package/package.json +14 -3
  25. package/requirements.txt +12 -8
  26. package/services/adaptive_ranker.py +272 -0
  27. package/services/agent_catalog.json +153 -0
  28. package/services/agent_registry.py +245 -730
  29. package/services/claude_md_sync.py +320 -4
  30. package/services/consolidation.py +417 -0
  31. package/services/database.py +586 -105
  32. package/services/embedding_pipeline.py +262 -0
  33. package/services/embeddings.py +493 -85
  34. package/services/memory_decay.py +408 -0
  35. package/services/native_memory_paths.py +86 -0
  36. package/services/native_memory_sync.py +496 -0
  37. package/services/response_manager.py +183 -0
  38. package/services/terminal_ui.py +199 -0
  39. package/services/tier_manager.py +235 -0
  40. package/services/websocket.py +26 -6
  41. package/skills/search.py +136 -61
  42. package/skills/session_review.py +210 -23
  43. package/skills/store.py +125 -18
  44. package/terminal_dashboard.py +474 -0
  45. package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
  46. package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
  47. package/hooks/__pycache__/grounding-hook.cpython-312.pyc +0 -0
  48. package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
  49. package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
  50. package/services/__pycache__/__init__.cpython-312.pyc +0 -0
  51. package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
  52. package/services/__pycache__/auth.cpython-312.pyc +0 -0
  53. package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
  54. package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
  55. package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
  56. package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
  57. package/services/__pycache__/confidence.cpython-312.pyc +0 -0
  58. package/services/__pycache__/curator.cpython-312.pyc +0 -0
  59. package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
  60. package/services/__pycache__/database.cpython-312.pyc +0 -0
  61. package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
  62. package/services/__pycache__/insights.cpython-312.pyc +0 -0
  63. package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
  64. package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
  65. package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
  66. package/services/__pycache__/timeline.cpython-312.pyc +0 -0
  67. package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
  68. package/services/__pycache__/websocket.cpython-312.pyc +0 -0
  69. package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
  70. package/skills/__pycache__/admin.cpython-312.pyc +0 -0
  71. package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
  72. package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
  73. package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
  74. package/skills/__pycache__/confidence_tracker.cpython-312.pyc +0 -0
  75. package/skills/__pycache__/context.cpython-312.pyc +0 -0
  76. package/skills/__pycache__/curator.cpython-312.pyc +0 -0
  77. package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
  78. package/skills/__pycache__/insights.cpython-312.pyc +0 -0
  79. package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
  80. package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
  81. package/skills/__pycache__/search.cpython-312.pyc +0 -0
  82. package/skills/__pycache__/session_review.cpython-312.pyc +0 -0
  83. package/skills/__pycache__/state.cpython-312.pyc +0 -0
  84. package/skills/__pycache__/store.cpython-312.pyc +0 -0
  85. package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
  86. package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
  87. package/skills/__pycache__/verification.cpython-312.pyc +0 -0
  88. package/test_automation.py +0 -221
  89. package/test_complete.py +0 -338
  90. package/test_full.py +0 -322
  91. package/verify_db.py +0 -134
@@ -104,7 +104,7 @@ class SQLiteConnectionPool:
104
104
  self.db_path,
105
105
  timeout=self.timeout,
106
106
  check_same_thread=False,
107
- isolation_level=None # Autocommit mode for better concurrency
107
+ isolation_level="DEFERRED" # Use DEFERRED transactions for safety
108
108
  )
109
109
  conn.row_factory = sqlite3.Row
110
110
  # Enable WAL mode for better concurrent read/write performance
@@ -323,6 +323,37 @@ class DatabaseService:
323
323
  # Fallback for backward compatibility
324
324
  yield self.conn
325
325
 
326
+ @contextmanager
327
+ def transaction(self, conn=None):
328
+ """Context manager for transactional database operations.
329
+
330
+ Wraps a block in BEGIN/COMMIT with automatic ROLLBACK on error.
331
+ Prevents partial state corruption during multi-step operations.
332
+
333
+ Usage:
334
+ with self.transaction() as conn:
335
+ cursor = conn.cursor()
336
+ cursor.execute("INSERT ...")
337
+ cursor.execute("UPDATE ...")
338
+ # auto-committed on success, rolled back on exception
339
+
340
+ Args:
341
+ conn: Optional connection to use. If None, uses self.conn.
342
+ """
343
+ use_conn = conn or self.conn
344
+ if use_conn is None:
345
+ raise ConnectionPoolError("No database connection available")
346
+ try:
347
+ use_conn.execute("BEGIN")
348
+ yield use_conn
349
+ use_conn.execute("COMMIT")
350
+ except Exception:
351
+ try:
352
+ use_conn.execute("ROLLBACK")
353
+ except Exception as rollback_err:
354
+ logger.error(f"Rollback failed: {rollback_err}")
355
+ raise
356
+
326
357
  async def connect(self):
327
358
  """Establish database connection and initialize connection pool."""
328
359
  try:
@@ -333,10 +364,15 @@ class DatabaseService:
333
364
  timeout=DB_TIMEOUT
334
365
  )
335
366
  # Keep a primary connection for backward compatibility
336
- self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
367
+ self.conn = sqlite3.connect(
368
+ self.db_path,
369
+ check_same_thread=False,
370
+ isolation_level="DEFERRED"
371
+ )
337
372
  self.conn.row_factory = sqlite3.Row
338
373
  # Enable WAL mode on primary connection too
339
374
  self.conn.execute("PRAGMA journal_mode=WAL")
375
+ self.conn.execute("PRAGMA synchronous=NORMAL")
340
376
  self.conn.execute("PRAGMA busy_timeout=30000")
341
377
  logger.info(f"Database connected with pool size {DB_POOL_SIZE}")
342
378
  except sqlite3.Error as e:
@@ -567,6 +603,7 @@ class DatabaseService:
567
603
  cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_agent ON memories(agent_type)")
568
604
  cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_success ON memories(success)")
569
605
  cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_importance ON memories(importance)")
606
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_outcome_status ON memories(outcome_status)")
570
607
  cursor.execute("CREATE INDEX IF NOT EXISTS idx_patterns_problem ON patterns(problem_type)")
571
608
 
572
609
  # Migration helper function
@@ -635,6 +672,66 @@ class DatabaseService:
635
672
  safe_add_column("memories", "failed_in", "TEXT") # JSON array of contexts where solution failed
636
673
  safe_add_column("memories", "context_confidence", "REAL") # Context-specific confidence score
637
674
 
675
+ # Migration: Add CLaRa-inspired tier columns (v2.4.0)
676
+ # Hierarchical memory tiers: hot (fast access), warm (compressed), cold (archive)
677
+ safe_add_column("memories", "tier", "TEXT DEFAULT 'hot'")
678
+ safe_add_column("memories", "tier_changed_at", "TEXT")
679
+ safe_add_column("memories", "compressed_content", "TEXT") # Compressed version for warm tier
680
+
681
+ # Tier index for fast filtering
682
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_memories_tier ON memories(tier)")
683
+
684
+ # Migration: Consolidate legacy outcome/success into outcome_status (v2.5.0)
685
+ # Maps: success=1 -> 'success', success=0 -> 'failed', outcome text -> outcome_status
686
+ # Only updates rows that still have outcome_status='pending' and have legacy data
687
+ try:
688
+ cursor.execute("""
689
+ UPDATE memories
690
+ SET outcome_status = 'success'
691
+ WHERE outcome_status = 'pending'
692
+ AND success = 1
693
+ AND outcome_status != 'success'
694
+ """)
695
+ cursor.execute("""
696
+ UPDATE memories
697
+ SET outcome_status = 'failed'
698
+ WHERE outcome_status = 'pending'
699
+ AND success = 0
700
+ AND outcome_status != 'failed'
701
+ """)
702
+ rows_migrated = cursor.rowcount
703
+ if rows_migrated > 0:
704
+ logger.info(f"Migration: Consolidated {rows_migrated} legacy success values into outcome_status")
705
+ except Exception as e:
706
+ logger.debug(f"Legacy outcome consolidation skipped: {e}")
707
+
708
+ # Memory archive table for consolidation (v2.4.0)
709
+ cursor.execute("""
710
+ CREATE TABLE IF NOT EXISTS memory_archive (
711
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
712
+ original_id INTEGER,
713
+ type TEXT,
714
+ content TEXT,
715
+ embedding TEXT,
716
+ project_path TEXT,
717
+ session_id TEXT,
718
+ importance INTEGER,
719
+ access_count INTEGER,
720
+ decay_factor REAL,
721
+ metadata TEXT,
722
+ archive_reason TEXT,
723
+ relevance_score_at_archive REAL,
724
+ consolidated_into INTEGER,
725
+ archived_at TEXT DEFAULT (datetime('now'))
726
+ )
727
+ """)
728
+ # Migration: ensure memory_archive has consolidation column (v2.4.0)
729
+ # Older schema may lack this column
730
+ safe_add_column("memory_archive", "consolidated_into", "INTEGER")
731
+
732
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_archive_original ON memory_archive(original_id)")
733
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_archive_consolidated ON memory_archive(consolidated_into)")
734
+
638
735
  # ============================================================
639
736
  # SESSION TIMELINE TABLES (Anti-Hallucination Layer)
640
737
  # ============================================================
@@ -890,32 +987,6 @@ class DatabaseService:
890
987
  # MEMORY CLEANUP AND ARCHIVAL TABLES
891
988
  # ============================================================
892
989
 
893
- # Archived memories (soft-deleted for recovery)
894
- cursor.execute("""
895
- CREATE TABLE IF NOT EXISTS memory_archive (
896
- id INTEGER PRIMARY KEY AUTOINCREMENT,
897
- original_id INTEGER NOT NULL,
898
-
899
- -- Original memory data
900
- type TEXT NOT NULL,
901
- content TEXT NOT NULL,
902
- embedding TEXT,
903
- project_path TEXT,
904
- session_id TEXT,
905
- importance INTEGER,
906
- access_count INTEGER,
907
- decay_factor REAL,
908
- metadata TEXT,
909
-
910
- -- Archive metadata
911
- archive_reason TEXT NOT NULL,
912
- archived_at TEXT DEFAULT (datetime('now')),
913
- archived_by TEXT,
914
- relevance_score_at_archive REAL,
915
- expires_at TEXT
916
- )
917
- """)
918
-
919
990
  # Cleanup configuration per project
920
991
  cursor.execute("""
921
992
  CREATE TABLE IF NOT EXISTS cleanup_config (
@@ -1125,10 +1196,31 @@ class DatabaseService:
1125
1196
  self.conn.commit()
1126
1197
 
1127
1198
  def _serialize_embedding(self, embedding: List[float]) -> str:
1128
- return json.dumps(embedding)
1199
+ """Serialize embedding to binary format (base64-encoded struct pack).
1200
+
1201
+ Uses 'b64:' prefix to distinguish from legacy JSON format.
1202
+ ~30-35% smaller than JSON serialization.
1203
+ """
1204
+ import struct
1205
+ import base64
1206
+ packed = struct.pack(f'{len(embedding)}f', *embedding)
1207
+ return 'b64:' + base64.b64encode(packed).decode('ascii')
1129
1208
 
1130
1209
  def _deserialize_embedding(self, embedding_str: str) -> List[float]:
1131
- return json.loads(embedding_str) if embedding_str else []
1210
+ """Deserialize embedding from binary or JSON format.
1211
+
1212
+ Auto-detects format: 'b64:' prefix = binary, otherwise JSON.
1213
+ Backward compatible with existing JSON-serialized embeddings.
1214
+ """
1215
+ if not embedding_str:
1216
+ return []
1217
+ if embedding_str.startswith('b64:'):
1218
+ import struct
1219
+ import base64
1220
+ raw = base64.b64decode(embedding_str[4:])
1221
+ count = len(raw) // 4 # 4 bytes per float32
1222
+ return list(struct.unpack(f'{count}f', raw))
1223
+ return json.loads(embedding_str)
1132
1224
 
1133
1225
  def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
1134
1226
  a = np.array(vec1)
@@ -1193,6 +1285,59 @@ class DatabaseService:
1193
1285
 
1194
1286
  return round(score, 4)
1195
1287
 
1288
+ async def migrate_embeddings_to_binary(self, batch_size: int = 100) -> Dict[str, int]:
1289
+ """Migrate existing JSON-serialized embeddings to binary format.
1290
+
1291
+ Processes in batches to avoid lock contention. Safe to run multiple times.
1292
+
1293
+ Returns:
1294
+ Dict with 'migrated', 'skipped', 'errors' counts.
1295
+ """
1296
+ import struct
1297
+ import base64
1298
+ cursor = self.conn.cursor()
1299
+ migrated = 0
1300
+ skipped = 0
1301
+ errors = 0
1302
+
1303
+ for table in ['memories', 'patterns', 'timeline_events']:
1304
+ try:
1305
+ cursor.execute(f"SELECT id, embedding FROM {table} WHERE embedding IS NOT NULL")
1306
+ except Exception:
1307
+ continue
1308
+
1309
+ batch = []
1310
+ for row in cursor.fetchall():
1311
+ emb_str = row['embedding']
1312
+ if not emb_str or emb_str.startswith('b64:'):
1313
+ skipped += 1
1314
+ continue
1315
+ try:
1316
+ floats = json.loads(emb_str)
1317
+ packed = struct.pack(f'{len(floats)}f', *floats)
1318
+ new_val = 'b64:' + base64.b64encode(packed).decode('ascii')
1319
+ batch.append((new_val, row['id']))
1320
+ except Exception:
1321
+ errors += 1
1322
+ continue
1323
+
1324
+ if len(batch) >= batch_size:
1325
+ cursor.executemany(
1326
+ f"UPDATE {table} SET embedding = ? WHERE id = ?", batch
1327
+ )
1328
+ self.conn.commit()
1329
+ migrated += len(batch)
1330
+ batch = []
1331
+
1332
+ if batch:
1333
+ cursor.executemany(
1334
+ f"UPDATE {table} SET embedding = ? WHERE id = ?", batch
1335
+ )
1336
+ self.conn.commit()
1337
+ migrated += len(batch)
1338
+
1339
+ return {"migrated": migrated, "skipped": skipped, "errors": errors}
1340
+
1196
1341
  async def update_access_stats(self, memory_id: int):
1197
1342
  """Update access statistics for a memory."""
1198
1343
  cursor = self.conn.cursor()
@@ -1401,53 +1546,53 @@ class DatabaseService:
1401
1546
  # Clamp confidence to valid range
1402
1547
  confidence = max(0.0, min(1.0, confidence))
1403
1548
 
1404
- cursor = self.conn.cursor()
1405
- cursor.execute(
1406
- """
1407
- INSERT INTO memories (
1408
- type, content, embedding, metadata,
1409
- project_path, project_name, project_type, tech_stack,
1410
- session_id, chat_id,
1411
- agent_type, skill_used, tools_used,
1412
- outcome, success,
1413
- tags, importance, confidence,
1414
- outcome_status, fixed, did_not_fix, caused, superseded_by,
1415
- worked_in, failed_in, context_confidence
1416
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1417
- """,
1418
- (
1419
- memory_type,
1420
- content,
1421
- self._serialize_embedding(embedding),
1422
- json.dumps(metadata or {}),
1423
- project_path,
1424
- project_name,
1425
- project_type,
1426
- json.dumps(tech_stack) if tech_stack else None,
1427
- session_id,
1428
- chat_id,
1429
- agent_type,
1430
- skill_used,
1431
- json.dumps(tools_used) if tools_used else None,
1432
- outcome,
1433
- 1 if success else (0 if success is False else None),
1434
- json.dumps(tags) if tags else None,
1435
- importance,
1436
- confidence,
1437
- outcome_status,
1438
- json.dumps(fixed) if fixed else None,
1439
- json.dumps(did_not_fix) if did_not_fix else None,
1440
- json.dumps(caused) if caused else None,
1441
- superseded_by,
1442
- json.dumps(worked_in) if worked_in else None,
1443
- json.dumps(failed_in) if failed_in else None,
1444
- context_confidence
1549
+ with self.transaction() as conn:
1550
+ cursor = conn.cursor()
1551
+ cursor.execute(
1552
+ """
1553
+ INSERT INTO memories (
1554
+ type, content, embedding, metadata,
1555
+ project_path, project_name, project_type, tech_stack,
1556
+ session_id, chat_id,
1557
+ agent_type, skill_used, tools_used,
1558
+ outcome, success,
1559
+ tags, importance, confidence,
1560
+ outcome_status, fixed, did_not_fix, caused, superseded_by,
1561
+ worked_in, failed_in, context_confidence
1562
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1563
+ """,
1564
+ (
1565
+ memory_type,
1566
+ content,
1567
+ self._serialize_embedding(embedding),
1568
+ json.dumps(metadata or {}),
1569
+ project_path,
1570
+ project_name,
1571
+ project_type,
1572
+ json.dumps(tech_stack) if tech_stack else None,
1573
+ session_id,
1574
+ chat_id,
1575
+ agent_type,
1576
+ skill_used,
1577
+ json.dumps(tools_used) if tools_used else None,
1578
+ outcome,
1579
+ 1 if success else (0 if success is False else None),
1580
+ json.dumps(tags) if tags else None,
1581
+ importance,
1582
+ confidence,
1583
+ outcome_status,
1584
+ json.dumps(fixed) if fixed else None,
1585
+ json.dumps(did_not_fix) if did_not_fix else None,
1586
+ json.dumps(caused) if caused else None,
1587
+ superseded_by,
1588
+ json.dumps(worked_in) if worked_in else None,
1589
+ json.dumps(failed_in) if failed_in else None,
1590
+ context_confidence
1591
+ )
1445
1592
  )
1446
- )
1447
- self.conn.commit()
1448
- memory_id = cursor.lastrowid
1593
+ memory_id = cursor.lastrowid
1449
1594
 
1450
- # Add to FAISS index if available
1595
+ # Add to FAISS index if available (outside transaction - index is in-memory)
1451
1596
  if self._memories_index and embedding:
1452
1597
  self._memories_index.add(memory_id, embedding)
1453
1598
 
@@ -1469,7 +1614,10 @@ class DatabaseService:
1469
1614
  include_unreliable: bool = False,
1470
1615
  outcome_status: Optional[str] = None,
1471
1616
  # Context-aware search
1472
- current_context: Optional[Dict[str, Any]] = None
1617
+ current_context: Optional[Dict[str, Any]] = None,
1618
+ # Adaptive ranking
1619
+ query_text: Optional[str] = None,
1620
+ temperature: Optional[float] = None
1473
1621
  ) -> List[Dict[str, Any]]:
1474
1622
  """Search for similar memories with optional filters.
1475
1623
 
@@ -1653,13 +1801,28 @@ class DatabaseService:
1653
1801
  "metadata": json.loads(row["metadata"]) if row["metadata"] else {}
1654
1802
  })
1655
1803
 
1656
- # Sort by combined score: (similarity * 0.7) + (confidence * 0.3) + context_adjustment
1657
- # This ranking prioritizes semantic relevance while boosting high-confidence memories
1658
- # and adjusting for context compatibility
1659
- results.sort(
1660
- key=lambda x: (x["similarity"] * 0.7) + (x["confidence"] * 0.3) + x.get("context_adjustment", 0.0),
1661
- reverse=True
1662
- )
1804
+ # Compute decay multipliers
1805
+ from services.memory_decay import calculate_search_decay_multiplier
1806
+ for r in results:
1807
+ r["_decay_multiplier"] = calculate_search_decay_multiplier(r)
1808
+
1809
+ # Adaptive ranking: multi-signal scoring with temperature control
1810
+ try:
1811
+ from services.adaptive_ranker import AdaptiveRanker
1812
+ ranker = AdaptiveRanker(temperature=temperature)
1813
+ results = ranker.rank_results(
1814
+ results,
1815
+ query_text=query_text or '',
1816
+ temperature=temperature
1817
+ )
1818
+ except ImportError:
1819
+ # Fallback to original static formula if adaptive ranker unavailable
1820
+ results.sort(
1821
+ key=lambda x: (
1822
+ (x["similarity"] * 0.7) + (x["confidence"] * 0.3) + x.get("context_adjustment", 0.0)
1823
+ ) * x.get("_decay_multiplier", 1.0),
1824
+ reverse=True
1825
+ )
1663
1826
 
1664
1827
  # Update last_accessed for returned results
1665
1828
  if results:
@@ -1807,9 +1970,15 @@ class DatabaseService:
1807
1970
  "metadata": json.loads(row["metadata"]) if row["metadata"] else {}
1808
1971
  })
1809
1972
 
1810
- # Sort by combined score including outcome boost and context adjustment
1973
+ # Sort by combined score including outcome boost, context adjustment, and decay
1974
+ from services.memory_decay import calculate_search_decay_multiplier
1975
+ for r in results:
1976
+ r["_decay_multiplier"] = calculate_search_decay_multiplier(r)
1977
+
1811
1978
  results.sort(
1812
- key=lambda x: ((x["similarity"] * 0.7) + (x["confidence"] * 0.3) + x.get("context_adjustment", 0.0)) * x.get("outcome_boost", 1.0),
1979
+ key=lambda x: (
1980
+ (x["similarity"] * 0.7) + (x["confidence"] * 0.3) + x.get("context_adjustment", 0.0)
1981
+ ) * x.get("outcome_boost", 1.0) * x.get("_decay_multiplier", 1.0),
1813
1982
  reverse=True
1814
1983
  )
1815
1984
 
@@ -1870,6 +2039,185 @@ class DatabaseService:
1870
2039
  "message": f"Confidence updated from {old_confidence:.3f} to {confidence:.3f}"
1871
2040
  }
1872
2041
 
2042
+ async def find_similar_for_dedup(
2043
+ self,
2044
+ embedding: List[float],
2045
+ project_path: Optional[str] = None,
2046
+ threshold: float = 0.92,
2047
+ limit: int = 3
2048
+ ) -> List[Dict[str, Any]]:
2049
+ """Lightweight similarity search specifically for dedup at ingest time.
2050
+
2051
+ Optimized for speed: returns minimal fields (id, content length,
2052
+ importance, confidence, similarity) needed for merge decisions.
2053
+
2054
+ Args:
2055
+ embedding: The embedding vector of the new content
2056
+ project_path: Only check within the same project (required for scoping)
2057
+ threshold: Minimum cosine similarity to consider a duplicate
2058
+ limit: Maximum number of matches to return
2059
+
2060
+ Returns:
2061
+ List of dicts with id, content, importance, confidence, similarity,
2062
+ sorted by similarity descending. Empty list if no matches.
2063
+ """
2064
+ # Normalize project path for consistent matching
2065
+ project_path = normalize_path(project_path)
2066
+
2067
+ # Ensure indexes are initialized
2068
+ await self._init_vector_indexes()
2069
+
2070
+ cursor = self.conn.cursor()
2071
+
2072
+ # Try FAISS index first for fast search
2073
+ if self._memories_index and self._memories_index.size() > 0:
2074
+ # Get candidates from FAISS (search broadly, filter by project after)
2075
+ candidate_limit = limit * 10 # Over-fetch to account for project filtering
2076
+ candidates = self._memories_index.search(
2077
+ query_embedding=embedding,
2078
+ k=candidate_limit,
2079
+ threshold=threshold
2080
+ )
2081
+
2082
+ if candidates:
2083
+ candidate_ids = [c[0] for c in candidates]
2084
+ similarity_map = {c[0]: c[1] for c in candidates}
2085
+
2086
+ placeholders = ",".join("?" * len(candidate_ids))
2087
+ query = f"""
2088
+ SELECT id, content, importance, confidence
2089
+ FROM memories
2090
+ WHERE id IN ({placeholders})
2091
+ """
2092
+ params = list(candidate_ids)
2093
+
2094
+ if project_path:
2095
+ query += " AND project_path = ?"
2096
+ params.append(project_path)
2097
+
2098
+ # Exclude already-failed memories from dedup
2099
+ query += " AND (outcome_status IS NULL OR outcome_status != 'failed')"
2100
+ query += " AND (failure_count IS NULL OR failure_count < 3)"
2101
+
2102
+ cursor.execute(query, params)
2103
+ rows = cursor.fetchall()
2104
+
2105
+ results = []
2106
+ for row in rows:
2107
+ similarity = similarity_map.get(row["id"], 0)
2108
+ if similarity >= threshold:
2109
+ results.append({
2110
+ "id": row["id"],
2111
+ "content": row["content"],
2112
+ "importance": row["importance"],
2113
+ "confidence": row["confidence"] if row["confidence"] is not None else 0.5,
2114
+ "similarity": similarity
2115
+ })
2116
+
2117
+ results.sort(key=lambda x: x["similarity"], reverse=True)
2118
+ return results[:limit]
2119
+
2120
+ # Fallback: numpy-based linear scan (only within project for speed)
2121
+ query = """
2122
+ SELECT id, content, embedding, importance, confidence
2123
+ FROM memories
2124
+ WHERE embedding IS NOT NULL
2125
+ AND (outcome_status IS NULL OR outcome_status != 'failed')
2126
+ AND (failure_count IS NULL OR failure_count < 3)
2127
+ """
2128
+ params = []
2129
+
2130
+ if project_path:
2131
+ query += " AND project_path = ?"
2132
+ params.append(project_path)
2133
+
2134
+ cursor.execute(query, params)
2135
+ rows = cursor.fetchall()
2136
+
2137
+ results = []
2138
+ for row in rows:
2139
+ stored_embedding = self._deserialize_embedding(row["embedding"])
2140
+ if stored_embedding:
2141
+ similarity = self._cosine_similarity(embedding, stored_embedding)
2142
+ if similarity >= threshold:
2143
+ results.append({
2144
+ "id": row["id"],
2145
+ "content": row["content"],
2146
+ "importance": row["importance"],
2147
+ "confidence": row["confidence"] if row["confidence"] is not None else 0.5,
2148
+ "similarity": similarity
2149
+ })
2150
+
2151
+ results.sort(key=lambda x: x["similarity"], reverse=True)
2152
+ return results[:limit]
2153
+
2154
+ async def merge_memory(
2155
+ self,
2156
+ existing_id: int,
2157
+ new_content: str,
2158
+ new_importance: int,
2159
+ new_confidence: float
2160
+ ) -> int:
2161
+ """Merge new content into an existing memory (dedup merge).
2162
+
2163
+ Keeps the longer content, takes the higher importance and confidence,
2164
+ increments access_count, and updates the timestamp.
2165
+
2166
+ Args:
2167
+ existing_id: ID of the existing memory to merge into
2168
+ new_content: Content from the new (duplicate) memory
2169
+ new_importance: Importance from the new memory
2170
+ new_confidence: Confidence from the new memory
2171
+
2172
+ Returns:
2173
+ The existing memory ID that was updated
2174
+ """
2175
+ with self.transaction() as conn:
2176
+ cursor = conn.cursor()
2177
+
2178
+ cursor.execute(
2179
+ "SELECT id, content, importance, confidence FROM memories WHERE id = ?",
2180
+ [existing_id]
2181
+ )
2182
+ row = cursor.fetchone()
2183
+
2184
+ if not row:
2185
+ raise ValueError(f"Memory with ID {existing_id} not found for merge")
2186
+
2187
+ # Keep the longer content (more detail is better)
2188
+ merged_content = new_content if len(new_content) > len(row["content"]) else row["content"]
2189
+
2190
+ # Take the higher importance and confidence
2191
+ merged_importance = max(new_importance, row["importance"] or 0)
2192
+ merged_confidence = max(
2193
+ new_confidence,
2194
+ row["confidence"] if row["confidence"] is not None else 0.5
2195
+ )
2196
+ merged_confidence = max(0.0, min(1.0, merged_confidence))
2197
+
2198
+ cursor.execute(
2199
+ """
2200
+ UPDATE memories
2201
+ SET content = ?,
2202
+ importance = ?,
2203
+ confidence = ?,
2204
+ access_count = COALESCE(access_count, 0) + 1,
2205
+ updated_at = datetime('now'),
2206
+ last_accessed = datetime('now')
2207
+ WHERE id = ?
2208
+ """,
2209
+ [merged_content, merged_importance, merged_confidence, existing_id]
2210
+ )
2211
+
2212
+ logger.info(
2213
+ f"Merged memory into #{existing_id}: "
2214
+ f"importance {row['importance']}->{merged_importance}, "
2215
+ f"confidence {row['confidence']}->{merged_confidence:.2f}, "
2216
+ f"content_len {len(row['content'])}->{len(merged_content)}"
2217
+ )
2218
+
2219
+ return existing_id
2220
+
1873
2221
  async def keyword_search(
1874
2222
  self,
1875
2223
  query: str,
@@ -3144,28 +3492,30 @@ class DatabaseService:
3144
3492
  return {"success": False, "error": f"Target memory {target_id} not found"}
3145
3493
 
3146
3494
  try:
3147
- cursor.execute("""
3148
- INSERT INTO memory_relationships (source_id, target_id, relationship, strength)
3149
- VALUES (?, ?, ?, ?)
3150
- """, (source_id, target_id, relationship, strength))
3151
- self.conn.commit()
3152
-
3153
- return {
3154
- "success": True,
3155
- "id": cursor.lastrowid,
3156
- "source_id": source_id,
3157
- "target_id": target_id,
3158
- "relationship": relationship,
3159
- "strength": strength
3160
- }
3495
+ with self.transaction() as conn:
3496
+ tx_cursor = conn.cursor()
3497
+ tx_cursor.execute("""
3498
+ INSERT INTO memory_relationships (source_id, target_id, relationship, strength)
3499
+ VALUES (?, ?, ?, ?)
3500
+ """, (source_id, target_id, relationship, strength))
3501
+
3502
+ return {
3503
+ "success": True,
3504
+ "id": tx_cursor.lastrowid,
3505
+ "source_id": source_id,
3506
+ "target_id": target_id,
3507
+ "relationship": relationship,
3508
+ "strength": strength
3509
+ }
3161
3510
  except sqlite3.IntegrityError:
3162
3511
  # Relationship already exists, update strength
3163
- cursor.execute("""
3164
- UPDATE memory_relationships
3165
- SET strength = ?, created_at = CURRENT_TIMESTAMP
3166
- WHERE source_id = ? AND target_id = ? AND relationship = ?
3167
- """, (strength, source_id, target_id, relationship))
3168
- self.conn.commit()
3512
+ with self.transaction() as conn:
3513
+ tx_cursor = conn.cursor()
3514
+ tx_cursor.execute("""
3515
+ UPDATE memory_relationships
3516
+ SET strength = ?, created_at = CURRENT_TIMESTAMP
3517
+ WHERE source_id = ? AND target_id = ? AND relationship = ?
3518
+ """, (strength, source_id, target_id, relationship))
3169
3519
 
3170
3520
  return {
3171
3521
  "success": True,
@@ -3258,6 +3608,137 @@ class DatabaseService:
3258
3608
  await traverse(memory_id, 1)
3259
3609
  return results
3260
3610
 
3611
+ async def get_related_memories_batch(
3612
+ self,
3613
+ memory_ids: List[int],
3614
+ relationship: str = None,
3615
+ direction: str = 'both'
3616
+ ) -> Dict[int, list]:
3617
+ """Get related memories for multiple IDs in a single query batch.
3618
+
3619
+ More efficient than calling get_related_memories() in a loop because
3620
+ it uses IN (...) clauses instead of individual queries per memory_id.
3621
+
3622
+ Args:
3623
+ memory_ids: List of memory IDs to find relationships for
3624
+ relationship: Optional filter by relationship type
3625
+ direction: 'outgoing', 'incoming', or 'both'
3626
+
3627
+ Returns:
3628
+ Dict mapping memory_id -> list of related memories
3629
+ """
3630
+ if not memory_ids:
3631
+ return {}
3632
+
3633
+ cursor = self.conn.cursor()
3634
+ results = {mid: [] for mid in memory_ids}
3635
+ placeholders = ','.join('?' * len(memory_ids))
3636
+
3637
+ queries = []
3638
+ if direction in ('outgoing', 'both'):
3639
+ q = f"""
3640
+ SELECT mr.source_id, mr.target_id as related_id, mr.relationship,
3641
+ mr.strength, 'outgoing' as direction,
3642
+ m.type, m.content, m.project_path, m.importance, m.created_at
3643
+ FROM memory_relationships mr
3644
+ JOIN memories m ON m.id = mr.target_id
3645
+ WHERE mr.source_id IN ({placeholders})
3646
+ """
3647
+ params = list(memory_ids)
3648
+ if relationship:
3649
+ q += " AND mr.relationship = ?"
3650
+ params.append(relationship)
3651
+ queries.append((q, params, 'source_id'))
3652
+
3653
+ if direction in ('incoming', 'both'):
3654
+ q = f"""
3655
+ SELECT mr.target_id, mr.source_id as related_id, mr.relationship,
3656
+ mr.strength, 'incoming' as direction,
3657
+ m.type, m.content, m.project_path, m.importance, m.created_at
3658
+ FROM memory_relationships mr
3659
+ JOIN memories m ON m.id = mr.source_id
3660
+ WHERE mr.target_id IN ({placeholders})
3661
+ """
3662
+ params = list(memory_ids)
3663
+ if relationship:
3664
+ q += " AND mr.relationship = ?"
3665
+ params.append(relationship)
3666
+ queries.append((q, params, 'target_id'))
3667
+
3668
+ for query, params, id_col in queries:
3669
+ cursor.execute(query, params)
3670
+ for row in cursor.fetchall():
3671
+ owner_id = row[id_col]
3672
+ if owner_id in results:
3673
+ results[owner_id].append({
3674
+ "id": row["related_id"],
3675
+ "relationship": row["relationship"],
3676
+ "strength": row["strength"],
3677
+ "direction": row["direction"],
3678
+ "type": row["type"],
3679
+ "content": row["content"][:200] + "..." if len(row["content"]) > 200 else row["content"],
3680
+ "project_path": row["project_path"],
3681
+ "importance": row["importance"],
3682
+ "created_at": row["created_at"]
3683
+ })
3684
+
3685
+ return results
3686
+
3687
+ async def find_contradictions_batch(self, memory_ids: List[int]) -> Dict[int, list]:
3688
+ """Find contradictions for multiple memories in a single query.
3689
+
3690
+ Args:
3691
+ memory_ids: List of memory IDs
3692
+
3693
+ Returns:
3694
+ Dict mapping memory_id -> list of contradicting memories
3695
+ """
3696
+ if not memory_ids:
3697
+ return {}
3698
+
3699
+ cursor = self.conn.cursor()
3700
+ results = {mid: [] for mid in memory_ids}
3701
+ placeholders = ','.join('?' * len(memory_ids))
3702
+
3703
+ # Check both directions of contradiction relationships
3704
+ cursor.execute(f"""
3705
+ SELECT mr.source_id, mr.target_id, mr.strength,
3706
+ m.id as related_id, m.type, m.content, m.project_path
3707
+ FROM memory_relationships mr
3708
+ JOIN memories m ON m.id = mr.target_id
3709
+ WHERE mr.relationship = 'contradicts'
3710
+ AND mr.source_id IN ({placeholders})
3711
+ """, memory_ids)
3712
+
3713
+ for row in cursor.fetchall():
3714
+ src = row["source_id"]
3715
+ if src in results:
3716
+ results[src].append({
3717
+ "id": row["related_id"],
3718
+ "content": row["content"][:200],
3719
+ "type": row["type"]
3720
+ })
3721
+
3722
+ cursor.execute(f"""
3723
+ SELECT mr.target_id, mr.source_id, mr.strength,
3724
+ m.id as related_id, m.type, m.content, m.project_path
3725
+ FROM memory_relationships mr
3726
+ JOIN memories m ON m.id = mr.source_id
3727
+ WHERE mr.relationship = 'contradicts'
3728
+ AND mr.target_id IN ({placeholders})
3729
+ """, memory_ids)
3730
+
3731
+ for row in cursor.fetchall():
3732
+ tgt = row["target_id"]
3733
+ if tgt in results:
3734
+ results[tgt].append({
3735
+ "id": row["related_id"],
3736
+ "content": row["content"][:200],
3737
+ "type": row["type"]
3738
+ })
3739
+
3740
+ return results
3741
+
3261
3742
  async def get_causal_chain(self, memory_id: int, max_depth: int = 5) -> dict:
3262
3743
  """Traverse the fixes/caused_by chain to find root cause and all fixes.
3263
3744