claude-memory-agent 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +206 -200
  2. package/agent_card.py +186 -0
  3. package/bin/cli.js +317 -181
  4. package/bin/postinstall.js +270 -216
  5. package/dashboard.html +4232 -2689
  6. package/hooks/__pycache__/grounding-hook.cpython-312.pyc +0 -0
  7. package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
  8. package/hooks/grounding-hook.py +422 -348
  9. package/hooks/session_end.py +293 -192
  10. package/hooks/session_start.py +227 -227
  11. package/install.py +919 -887
  12. package/main.py +4496 -2859
  13. package/package.json +47 -55
  14. package/services/__init__.py +50 -50
  15. package/services/__pycache__/__init__.cpython-312.pyc +0 -0
  16. package/services/__pycache__/curator.cpython-312.pyc +0 -0
  17. package/services/__pycache__/database.cpython-312.pyc +0 -0
  18. package/services/curator.py +1606 -0
  19. package/services/database.py +3637 -2485
  20. package/skills/__init__.py +21 -1
  21. package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
  22. package/skills/__pycache__/confidence_tracker.cpython-312.pyc +0 -0
  23. package/skills/__pycache__/context.cpython-312.pyc +0 -0
  24. package/skills/__pycache__/curator.cpython-312.pyc +0 -0
  25. package/skills/__pycache__/search.cpython-312.pyc +0 -0
  26. package/skills/__pycache__/session_review.cpython-312.pyc +0 -0
  27. package/skills/__pycache__/store.cpython-312.pyc +0 -0
  28. package/skills/confidence_tracker.py +441 -0
  29. package/skills/context.py +675 -0
  30. package/skills/curator.py +348 -0
  31. package/skills/search.py +369 -213
  32. package/skills/session_review.py +418 -0
  33. package/skills/store.py +377 -179
  34. package/update_system.py +829 -817
@@ -0,0 +1,1606 @@
1
+ """Memory Curator Service - Autonomous graph exploration and maintenance.
2
+
3
+ The curator agent traverses the memory knowledge graph, finds duplicates,
4
+ suggests relationships, scores quality, and provides curated context.
5
+ """
6
+ import logging
7
+ import json
8
+ import asyncio
9
+ from typing import Dict, Any, Optional, List, Set, Tuple
10
+ from datetime import datetime, timedelta
11
+ from collections import defaultdict
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class MemoryCurator:
17
+ """
18
+ Autonomous curator agent for memory graph maintenance.
19
+
20
+ Capabilities:
21
+ - Graph exploration (BFS/DFS traversal)
22
+ - Duplicate detection (semantic similarity >0.92)
23
+ - Relationship inference (suggest missing links)
24
+ - Quality scoring (usage + connections + confidence)
25
+ - Curated context generation
26
+ - Scheduled maintenance
27
+ """
28
+
29
+ # Confidence thresholds for autonomous actions
30
+ HIGH_CONFIDENCE = 0.9 # Auto-execute
31
+ MEDIUM_CONFIDENCE = 0.7 # Suggest with one-click approval
32
+ LOW_CONFIDENCE = 0.5 # Log for manual review only
33
+
34
+ # Default configuration
35
+ DEFAULT_CONFIG = {
36
+ "auto_dedup_enabled": True,
37
+ "auto_link_enabled": True,
38
+ "dedup_threshold": 0.92,
39
+ "maintenance_interval_hours": 24,
40
+ "curator_active": True
41
+ }
42
+
43
+ def __init__(self, db, embeddings):
44
+ """
45
+ Initialize the curator with database and embedding services.
46
+
47
+ Args:
48
+ db: DatabaseService instance
49
+ embeddings: EmbeddingService instance
50
+ """
51
+ self.db = db
52
+ self.embeddings = embeddings
53
+ self._running = False
54
+ self._last_maintenance: Dict[str, datetime] = {}
55
+
56
+ # ================================================================
57
+ # GRAPH EXPLORATION
58
+ # ================================================================
59
+
60
+ async def explore_graph(
61
+ self,
62
+ start_node_id: int,
63
+ max_depth: int = 3,
64
+ mode: str = "bfs",
65
+ relationship_filter: Optional[List[str]] = None,
66
+ include_orphan_check: bool = True
67
+ ) -> Dict[str, Any]:
68
+ """
69
+ Explore the memory graph from a starting node.
70
+
71
+ Args:
72
+ start_node_id: ID of the memory to start from
73
+ max_depth: Maximum traversal depth
74
+ mode: 'bfs' (breadth-first) or 'dfs' (depth-first)
75
+ relationship_filter: Only follow these relationship types
76
+ include_orphan_check: Check for orphaned nodes in the exploration
77
+
78
+ Returns:
79
+ Dict with explored nodes, edges, clusters, and insights
80
+ """
81
+ cursor = self.db.conn.cursor()
82
+
83
+ # Verify start node exists
84
+ cursor.execute("SELECT id, content, type FROM memories WHERE id = ?", (start_node_id,))
85
+ start_node = cursor.fetchone()
86
+ if not start_node:
87
+ return {"error": f"Memory {start_node_id} not found"}
88
+
89
+ visited: Set[int] = set()
90
+ nodes: List[Dict] = []
91
+ edges: List[Dict] = []
92
+ depth_map: Dict[int, int] = {start_node_id: 0}
93
+
94
+ # BFS/DFS exploration
95
+ if mode == "bfs":
96
+ queue = [start_node_id]
97
+ while queue:
98
+ current_id = queue.pop(0)
99
+ if current_id in visited:
100
+ continue
101
+ visited.add(current_id)
102
+
103
+ current_depth = depth_map.get(current_id, 0)
104
+ if current_depth >= max_depth:
105
+ continue
106
+
107
+ # Get node info
108
+ node_info = await self._get_node_info(current_id)
109
+ if node_info:
110
+ node_info["depth"] = current_depth
111
+ nodes.append(node_info)
112
+
113
+ # Get connected nodes
114
+ neighbors = await self._get_neighbors(
115
+ current_id,
116
+ relationship_filter
117
+ )
118
+
119
+ for neighbor_id, edge_info in neighbors:
120
+ edges.append(edge_info)
121
+ if neighbor_id not in visited:
122
+ queue.append(neighbor_id)
123
+ if neighbor_id not in depth_map:
124
+ depth_map[neighbor_id] = current_depth + 1
125
+ else: # DFS
126
+ stack = [start_node_id]
127
+ while stack:
128
+ current_id = stack.pop()
129
+ if current_id in visited:
130
+ continue
131
+ visited.add(current_id)
132
+
133
+ current_depth = depth_map.get(current_id, 0)
134
+ if current_depth >= max_depth:
135
+ continue
136
+
137
+ node_info = await self._get_node_info(current_id)
138
+ if node_info:
139
+ node_info["depth"] = current_depth
140
+ nodes.append(node_info)
141
+
142
+ neighbors = await self._get_neighbors(
143
+ current_id,
144
+ relationship_filter
145
+ )
146
+
147
+ for neighbor_id, edge_info in neighbors:
148
+ edges.append(edge_info)
149
+ if neighbor_id not in visited:
150
+ stack.append(neighbor_id)
151
+ if neighbor_id not in depth_map:
152
+ depth_map[neighbor_id] = current_depth + 1
153
+
154
+ # Identify clusters
155
+ clusters = self._identify_clusters(nodes, edges)
156
+
157
+ # Find orphans if requested
158
+ orphans = []
159
+ if include_orphan_check:
160
+ orphans = await self.find_orphan_memories(limit=10)
161
+
162
+ return {
163
+ "start_node": start_node_id,
164
+ "mode": mode,
165
+ "max_depth": max_depth,
166
+ "nodes_explored": len(nodes),
167
+ "edges_found": len(edges),
168
+ "nodes": nodes,
169
+ "edges": edges,
170
+ "clusters": clusters,
171
+ "orphans_nearby": orphans[:5] if orphans else [],
172
+ "exploration_insights": self._generate_exploration_insights(nodes, edges, clusters)
173
+ }
174
+
175
+ async def _get_node_info(self, memory_id: int) -> Optional[Dict]:
176
+ """Get detailed info for a memory node."""
177
+ cursor = self.db.conn.cursor()
178
+ cursor.execute("""
179
+ SELECT id, type, content, importance, confidence,
180
+ access_count, decay_factor, project_path, created_at
181
+ FROM memories WHERE id = ?
182
+ """, (memory_id,))
183
+ row = cursor.fetchone()
184
+ if not row:
185
+ return None
186
+
187
+ # Get relationship counts
188
+ cursor.execute("""
189
+ SELECT COUNT(*) as outgoing FROM memory_relationships WHERE source_id = ?
190
+ """, (memory_id,))
191
+ outgoing = cursor.fetchone()["outgoing"]
192
+
193
+ cursor.execute("""
194
+ SELECT COUNT(*) as incoming FROM memory_relationships WHERE target_id = ?
195
+ """, (memory_id,))
196
+ incoming = cursor.fetchone()["incoming"]
197
+
198
+ return {
199
+ "id": row["id"],
200
+ "type": row["type"],
201
+ "content": row["content"][:200] + "..." if len(row["content"]) > 200 else row["content"],
202
+ "importance": row["importance"],
203
+ "confidence": row["confidence"],
204
+ "access_count": row["access_count"],
205
+ "decay_factor": row["decay_factor"],
206
+ "project_path": row["project_path"],
207
+ "created_at": row["created_at"],
208
+ "connection_count": outgoing + incoming,
209
+ "outgoing_edges": outgoing,
210
+ "incoming_edges": incoming
211
+ }
212
+
213
+ async def _get_neighbors(
214
+ self,
215
+ memory_id: int,
216
+ relationship_filter: Optional[List[str]] = None
217
+ ) -> List[Tuple[int, Dict]]:
218
+ """Get all neighboring nodes and edge info."""
219
+ cursor = self.db.conn.cursor()
220
+
221
+ query = """
222
+ SELECT target_id as neighbor_id, relationship, strength, 'outgoing' as direction
223
+ FROM memory_relationships WHERE source_id = ?
224
+ UNION ALL
225
+ SELECT source_id as neighbor_id, relationship, strength, 'incoming' as direction
226
+ FROM memory_relationships WHERE target_id = ?
227
+ """
228
+ cursor.execute(query, (memory_id, memory_id))
229
+
230
+ neighbors = []
231
+ for row in cursor.fetchall():
232
+ if relationship_filter and row["relationship"] not in relationship_filter:
233
+ continue
234
+
235
+ edge_info = {
236
+ "source": memory_id if row["direction"] == "outgoing" else row["neighbor_id"],
237
+ "target": row["neighbor_id"] if row["direction"] == "outgoing" else memory_id,
238
+ "relationship": row["relationship"],
239
+ "strength": row["strength"],
240
+ "direction": row["direction"]
241
+ }
242
+ neighbors.append((row["neighbor_id"], edge_info))
243
+
244
+ return neighbors
245
+
246
+ def _identify_clusters(self, nodes: List[Dict], edges: List[Dict]) -> List[Dict]:
247
+ """Identify clusters of tightly connected nodes."""
248
+ if not nodes:
249
+ return []
250
+
251
+ # Build adjacency for clustering
252
+ adjacency = defaultdict(set)
253
+ for edge in edges:
254
+ adjacency[edge["source"]].add(edge["target"])
255
+ adjacency[edge["target"]].add(edge["source"])
256
+
257
+ # Simple connected component analysis
258
+ visited = set()
259
+ clusters = []
260
+
261
+ for node in nodes:
262
+ node_id = node["id"]
263
+ if node_id in visited:
264
+ continue
265
+
266
+ # BFS to find component
267
+ component = []
268
+ queue = [node_id]
269
+ while queue:
270
+ current = queue.pop(0)
271
+ if current in visited:
272
+ continue
273
+ visited.add(current)
274
+ component.append(current)
275
+ for neighbor in adjacency[current]:
276
+ if neighbor not in visited:
277
+ queue.append(neighbor)
278
+
279
+ if len(component) > 1:
280
+ # Determine cluster type based on node types
281
+ node_types = defaultdict(int)
282
+ for nid in component:
283
+ for n in nodes:
284
+ if n["id"] == nid:
285
+ node_types[n["type"]] += 1
286
+ break
287
+
288
+ clusters.append({
289
+ "node_ids": component,
290
+ "size": len(component),
291
+ "dominant_type": max(node_types, key=node_types.get) if node_types else "mixed",
292
+ "type_distribution": dict(node_types)
293
+ })
294
+
295
+ return sorted(clusters, key=lambda c: c["size"], reverse=True)
296
+
297
+ def _generate_exploration_insights(
298
+ self,
299
+ nodes: List[Dict],
300
+ edges: List[Dict],
301
+ clusters: List[Dict]
302
+ ) -> List[str]:
303
+ """Generate insights from the exploration."""
304
+ insights = []
305
+
306
+ if not nodes:
307
+ return ["No nodes found in exploration"]
308
+
309
+ # Type distribution
310
+ type_counts = defaultdict(int)
311
+ for node in nodes:
312
+ type_counts[node["type"]] += 1
313
+
314
+ dominant = max(type_counts, key=type_counts.get)
315
+ insights.append(f"Dominant memory type: {dominant} ({type_counts[dominant]}/{len(nodes)})")
316
+
317
+ # Connection density
318
+ if nodes:
319
+ avg_connections = sum(n.get("connection_count", 0) for n in nodes) / len(nodes)
320
+ if avg_connections < 1:
321
+ insights.append("Low connectivity: Consider adding more relationships")
322
+ elif avg_connections > 5:
323
+ insights.append("High connectivity: Knowledge graph is well-connected")
324
+
325
+ # Cluster analysis
326
+ if clusters:
327
+ largest = clusters[0]
328
+ insights.append(f"Largest cluster: {largest['size']} nodes ({largest['dominant_type']})")
329
+
330
+ # Quality indicators
331
+ low_confidence = [n for n in nodes if n.get("confidence", 0.5) < 0.3]
332
+ if low_confidence:
333
+ insights.append(f"{len(low_confidence)} nodes with low confidence need review")
334
+
335
+ high_importance = [n for n in nodes if n.get("importance", 5) >= 8]
336
+ if high_importance:
337
+ insights.append(f"{len(high_importance)} high-importance nodes in this subgraph")
338
+
339
+ return insights
340
+
341
+ # ================================================================
342
+ # DUPLICATE DETECTION
343
+ # ================================================================
344
+
345
+ async def find_duplicates(
346
+ self,
347
+ project_path: Optional[str] = None,
348
+ similarity_threshold: float = 0.92,
349
+ limit: int = 50
350
+ ) -> Dict[str, Any]:
351
+ """
352
+ Find semantically similar (duplicate) memories.
353
+
354
+ Args:
355
+ project_path: Optional project filter
356
+ similarity_threshold: Minimum similarity to consider duplicates (default 0.92)
357
+ limit: Maximum number of duplicate pairs to return
358
+
359
+ Returns:
360
+ Dict with duplicate clusters and merge suggestions
361
+ """
362
+ cursor = self.db.conn.cursor()
363
+
364
+ # Get memories with embeddings
365
+ if project_path:
366
+ from services.database import normalize_path
367
+ normalized = normalize_path(project_path)
368
+ cursor.execute("""
369
+ SELECT id, content, type, importance, confidence, embedding, created_at
370
+ FROM memories
371
+ WHERE embedding IS NOT NULL AND project_path = ?
372
+ ORDER BY created_at DESC
373
+ LIMIT 500
374
+ """, (normalized,))
375
+ else:
376
+ cursor.execute("""
377
+ SELECT id, content, type, importance, confidence, embedding, created_at
378
+ FROM memories
379
+ WHERE embedding IS NOT NULL
380
+ ORDER BY created_at DESC
381
+ LIMIT 500
382
+ """)
383
+
384
+ memories = cursor.fetchall()
385
+ if len(memories) < 2:
386
+ return {"duplicate_clusters": [], "total_memories_checked": len(memories)}
387
+
388
+ # Parse embeddings
389
+ memory_data = []
390
+ for mem in memories:
391
+ try:
392
+ embedding = json.loads(mem["embedding"])
393
+ memory_data.append({
394
+ "id": mem["id"],
395
+ "content": mem["content"],
396
+ "type": mem["type"],
397
+ "importance": mem["importance"],
398
+ "confidence": mem["confidence"],
399
+ "embedding": embedding,
400
+ "created_at": mem["created_at"]
401
+ })
402
+ except (json.JSONDecodeError, TypeError):
403
+ continue
404
+
405
+ # Find duplicate pairs
406
+ import numpy as np
407
+ duplicate_pairs = []
408
+ checked_pairs = set()
409
+
410
+ for i, mem1 in enumerate(memory_data):
411
+ for j, mem2 in enumerate(memory_data):
412
+ if i >= j:
413
+ continue
414
+
415
+ pair_key = (min(mem1["id"], mem2["id"]), max(mem1["id"], mem2["id"]))
416
+ if pair_key in checked_pairs:
417
+ continue
418
+ checked_pairs.add(pair_key)
419
+
420
+ # Calculate cosine similarity
421
+ try:
422
+ vec1 = np.array(mem1["embedding"])
423
+ vec2 = np.array(mem2["embedding"])
424
+ similarity = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
425
+
426
+ if similarity >= similarity_threshold:
427
+ duplicate_pairs.append({
428
+ "memory1": {
429
+ "id": mem1["id"],
430
+ "content": mem1["content"][:150],
431
+ "type": mem1["type"],
432
+ "importance": mem1["importance"],
433
+ "confidence": mem1["confidence"],
434
+ "created_at": mem1["created_at"]
435
+ },
436
+ "memory2": {
437
+ "id": mem2["id"],
438
+ "content": mem2["content"][:150],
439
+ "type": mem2["type"],
440
+ "importance": mem2["importance"],
441
+ "confidence": mem2["confidence"],
442
+ "created_at": mem2["created_at"]
443
+ },
444
+ "similarity": float(similarity),
445
+ "merge_recommendation": self._get_merge_recommendation(mem1, mem2, similarity)
446
+ })
447
+ except Exception as e:
448
+ logger.debug(f"Error calculating similarity: {e}")
449
+ continue
450
+
451
+ # Sort by similarity and limit
452
+ duplicate_pairs.sort(key=lambda x: x["similarity"], reverse=True)
453
+ duplicate_pairs = duplicate_pairs[:limit]
454
+
455
+ # Cluster duplicates (transitive grouping)
456
+ clusters = self._cluster_duplicates(duplicate_pairs)
457
+
458
+ return {
459
+ "duplicate_clusters": clusters,
460
+ "duplicate_pairs": duplicate_pairs,
461
+ "total_memories_checked": len(memory_data),
462
+ "duplicates_found": len(duplicate_pairs),
463
+ "threshold_used": similarity_threshold,
464
+ "auto_merge_candidates": [
465
+ p for p in duplicate_pairs
466
+ if p["merge_recommendation"]["confidence"] >= self.HIGH_CONFIDENCE
467
+ ]
468
+ }
469
+
470
+ def _get_merge_recommendation(
471
+ self,
472
+ mem1: Dict,
473
+ mem2: Dict,
474
+ similarity: float
475
+ ) -> Dict[str, Any]:
476
+ """Determine which memory to keep in a merge."""
477
+ # Scoring: higher is better to keep
478
+ score1 = 0
479
+ score2 = 0
480
+
481
+ # Prefer higher importance
482
+ score1 += mem1["importance"] * 2
483
+ score2 += mem2["importance"] * 2
484
+
485
+ # Prefer higher confidence
486
+ score1 += mem1["confidence"] * 10
487
+ score2 += mem2["confidence"] * 10
488
+
489
+ # Prefer longer content (more detail)
490
+ score1 += min(len(mem1["content"]) / 100, 5)
491
+ score2 += min(len(mem2["content"]) / 100, 5)
492
+
493
+ # Prefer newer for decisions, older for established patterns
494
+ if mem1["type"] == "decision":
495
+ # Newer decisions are more relevant
496
+ score1 += 3 if mem1["created_at"] > mem2["created_at"] else 0
497
+ score2 += 3 if mem2["created_at"] > mem1["created_at"] else 0
498
+ else:
499
+ # Older patterns are more established
500
+ score1 += 2 if mem1["created_at"] < mem2["created_at"] else 0
501
+ score2 += 2 if mem2["created_at"] < mem1["created_at"] else 0
502
+
503
+ keep_id = mem1["id"] if score1 >= score2 else mem2["id"]
504
+ remove_id = mem2["id"] if score1 >= score2 else mem1["id"]
505
+
506
+ # Confidence in recommendation
507
+ score_diff = abs(score1 - score2)
508
+ if score_diff > 10 and similarity > 0.95:
509
+ confidence = self.HIGH_CONFIDENCE
510
+ elif score_diff > 5 and similarity > 0.93:
511
+ confidence = self.MEDIUM_CONFIDENCE
512
+ else:
513
+ confidence = self.LOW_CONFIDENCE
514
+
515
+ return {
516
+ "keep": keep_id,
517
+ "remove": remove_id,
518
+ "confidence": confidence,
519
+ "reason": f"Score {keep_id}={max(score1,score2):.1f} vs {remove_id}={min(score1,score2):.1f}"
520
+ }
521
+
522
+ def _cluster_duplicates(self, pairs: List[Dict]) -> List[Dict]:
523
+ """Cluster duplicate pairs into groups."""
524
+ if not pairs:
525
+ return []
526
+
527
+ # Build union-find
528
+ parent = {}
529
+
530
+ def find(x):
531
+ if x not in parent:
532
+ parent[x] = x
533
+ if parent[x] != x:
534
+ parent[x] = find(parent[x])
535
+ return parent[x]
536
+
537
+ def union(x, y):
538
+ px, py = find(x), find(y)
539
+ if px != py:
540
+ parent[px] = py
541
+
542
+ # Union all pairs
543
+ for pair in pairs:
544
+ union(pair["memory1"]["id"], pair["memory2"]["id"])
545
+
546
+ # Group by root
547
+ clusters_map = defaultdict(list)
548
+ all_ids = set()
549
+ for pair in pairs:
550
+ all_ids.add(pair["memory1"]["id"])
551
+ all_ids.add(pair["memory2"]["id"])
552
+
553
+ for mem_id in all_ids:
554
+ root = find(mem_id)
555
+ clusters_map[root].append(mem_id)
556
+
557
+ # Build cluster objects
558
+ clusters = []
559
+ for root, members in clusters_map.items():
560
+ if len(members) > 1:
561
+ # Find the best candidate to keep
562
+ best_id = None
563
+ best_score = -1
564
+ for pair in pairs:
565
+ if pair["memory1"]["id"] in members:
566
+ rec = pair["merge_recommendation"]
567
+ if rec["keep"] in members and rec["confidence"] > best_score:
568
+ best_id = rec["keep"]
569
+ best_score = rec["confidence"]
570
+
571
+ clusters.append({
572
+ "member_ids": sorted(members),
573
+ "size": len(members),
574
+ "recommended_keep": best_id,
575
+ "merge_confidence": best_score
576
+ })
577
+
578
+ return sorted(clusters, key=lambda c: c["size"], reverse=True)
579
+
580
+ # ================================================================
581
+ # RELATIONSHIP INFERENCE
582
+ # ================================================================
583
+
584
+ async def suggest_relationships(
585
+ self,
586
+ memory_id: Optional[int] = None,
587
+ project_path: Optional[str] = None,
588
+ similarity_threshold: float = 0.7,
589
+ limit: int = 20
590
+ ) -> Dict[str, Any]:
591
+ """
592
+ Suggest missing relationships between memories.
593
+
594
+ Uses semantic similarity and content analysis to infer
595
+ relationships that should exist but don't.
596
+
597
+ Args:
598
+ memory_id: Optional specific memory to find links for
599
+ project_path: Optional project filter
600
+ similarity_threshold: Minimum similarity for suggestions
601
+ limit: Maximum suggestions to return
602
+
603
+ Returns:
604
+ Dict with suggested relationships
605
+ """
606
+ cursor = self.db.conn.cursor()
607
+
608
+ suggestions = []
609
+
610
+ if memory_id:
611
+ # Find relationships for a specific memory
612
+ cursor.execute("""
613
+ SELECT id, content, type, embedding FROM memories WHERE id = ?
614
+ """, (memory_id,))
615
+ source = cursor.fetchone()
616
+ if not source or not source["embedding"]:
617
+ return {"suggestions": [], "error": "Memory not found or has no embedding"}
618
+
619
+ source_embedding = json.loads(source["embedding"])
620
+
621
+ # Get existing relationships
622
+ cursor.execute("""
623
+ SELECT target_id FROM memory_relationships WHERE source_id = ?
624
+ UNION
625
+ SELECT source_id FROM memory_relationships WHERE target_id = ?
626
+ """, (memory_id, memory_id))
627
+ existing = {row[0] for row in cursor.fetchall()}
628
+ existing.add(memory_id)
629
+
630
+ # Find similar unconnected memories
631
+ cursor.execute("""
632
+ SELECT id, content, type, embedding, importance
633
+ FROM memories
634
+ WHERE embedding IS NOT NULL AND id NOT IN ({})
635
+ LIMIT 200
636
+ """.format(','.join('?' * len(existing))), tuple(existing))
637
+
638
+ import numpy as np
639
+ source_vec = np.array(source_embedding)
640
+
641
+ for row in cursor.fetchall():
642
+ try:
643
+ target_vec = np.array(json.loads(row["embedding"]))
644
+ similarity = np.dot(source_vec, target_vec) / (
645
+ np.linalg.norm(source_vec) * np.linalg.norm(target_vec)
646
+ )
647
+
648
+ if similarity >= similarity_threshold:
649
+ rel_type = self._infer_relationship_type(
650
+ source["type"], source["content"],
651
+ row["type"], row["content"]
652
+ )
653
+
654
+ suggestions.append({
655
+ "source_id": memory_id,
656
+ "target_id": row["id"],
657
+ "relationship": rel_type,
658
+ "similarity": float(similarity),
659
+ "confidence": self._calculate_link_confidence(
660
+ similarity, source["type"], row["type"]
661
+ ),
662
+ "source_preview": source["content"][:100],
663
+ "target_preview": row["content"][:100]
664
+ })
665
+ except Exception as e:
666
+ logger.debug(f"Error processing memory {row['id']}: {e}")
667
+ continue
668
+ else:
669
+ # Find suggestions across the project
670
+ if project_path:
671
+ from services.database import normalize_path
672
+ normalized = normalize_path(project_path)
673
+ cursor.execute("""
674
+ SELECT id, content, type, embedding, importance
675
+ FROM memories
676
+ WHERE embedding IS NOT NULL AND project_path = ?
677
+ ORDER BY importance DESC
678
+ LIMIT 100
679
+ """, (normalized,))
680
+ else:
681
+ cursor.execute("""
682
+ SELECT id, content, type, embedding, importance
683
+ FROM memories
684
+ WHERE embedding IS NOT NULL
685
+ ORDER BY importance DESC
686
+ LIMIT 100
687
+ """)
688
+
689
+ memories = cursor.fetchall()
690
+
691
+ # Get all existing relationships
692
+ cursor.execute("SELECT source_id, target_id FROM memory_relationships")
693
+ existing_pairs = {(row[0], row[1]) for row in cursor.fetchall()}
694
+
695
+ import numpy as np
696
+
697
+ # Check pairs for potential relationships
698
+ for i, mem1 in enumerate(memories):
699
+ if len(suggestions) >= limit:
700
+ break
701
+
702
+ for mem2 in memories[i+1:]:
703
+ if len(suggestions) >= limit:
704
+ break
705
+
706
+ pair = (min(mem1["id"], mem2["id"]), max(mem1["id"], mem2["id"]))
707
+ if pair in existing_pairs or (pair[1], pair[0]) in existing_pairs:
708
+ continue
709
+
710
+ try:
711
+ vec1 = np.array(json.loads(mem1["embedding"]))
712
+ vec2 = np.array(json.loads(mem2["embedding"]))
713
+ similarity = np.dot(vec1, vec2) / (
714
+ np.linalg.norm(vec1) * np.linalg.norm(vec2)
715
+ )
716
+
717
+ if similarity >= similarity_threshold:
718
+ rel_type = self._infer_relationship_type(
719
+ mem1["type"], mem1["content"],
720
+ mem2["type"], mem2["content"]
721
+ )
722
+
723
+ suggestions.append({
724
+ "source_id": mem1["id"],
725
+ "target_id": mem2["id"],
726
+ "relationship": rel_type,
727
+ "similarity": float(similarity),
728
+ "confidence": self._calculate_link_confidence(
729
+ similarity, mem1["type"], mem2["type"]
730
+ ),
731
+ "source_preview": mem1["content"][:100],
732
+ "target_preview": mem2["content"][:100]
733
+ })
734
+ except Exception:
735
+ continue
736
+
737
+ # Sort by confidence
738
+ suggestions.sort(key=lambda x: x["confidence"], reverse=True)
739
+ suggestions = suggestions[:limit]
740
+
741
+ return {
742
+ "suggestions": suggestions,
743
+ "total_found": len(suggestions),
744
+ "auto_apply_candidates": [
745
+ s for s in suggestions
746
+ if s["confidence"] >= self.HIGH_CONFIDENCE
747
+ ]
748
+ }
749
+
750
+ def _infer_relationship_type(
751
+ self,
752
+ type1: str, content1: str,
753
+ type2: str, content2: str
754
+ ) -> str:
755
+ """Infer the most likely relationship type between two memories."""
756
+ content1_lower = content1.lower()
757
+ content2_lower = content2.lower()
758
+
759
+ # Error + fix pattern
760
+ if type1 == "error" and type2 in ["code", "decision"]:
761
+ if any(w in content2_lower for w in ["fix", "solve", "resolve", "solution"]):
762
+ return "fixes"
763
+ if type2 == "error" and type1 in ["code", "decision"]:
764
+ if any(w in content1_lower for w in ["fix", "solve", "resolve", "solution"]):
765
+ return "fixes"
766
+
767
+ # Cause-effect pattern
768
+ if any(w in content1_lower for w in ["because", "caused", "led to", "resulted"]):
769
+ return "caused_by"
770
+ if any(w in content2_lower for w in ["because", "caused", "led to", "resulted"]):
771
+ return "caused_by"
772
+
773
+ # Contradiction pattern
774
+ if any(w in content1_lower for w in ["but", "however", "instead", "contrary"]):
775
+ return "contradicts"
776
+ if any(w in content2_lower for w in ["but", "however", "instead", "contrary"]):
777
+ return "contradicts"
778
+
779
+ # Support pattern
780
+ if type1 == type2 == "decision":
781
+ return "supports"
782
+
783
+ # Default to related
784
+ return "related"
785
+
786
+ def _calculate_link_confidence(
787
+ self,
788
+ similarity: float,
789
+ type1: str,
790
+ type2: str
791
+ ) -> float:
792
+ """Calculate confidence score for a suggested link."""
793
+ base = similarity
794
+
795
+ # Boost for complementary types
796
+ complementary = {
797
+ ("error", "code"): 0.1,
798
+ ("error", "decision"): 0.1,
799
+ ("decision", "decision"): 0.05,
800
+ ("code", "code"): 0.05,
801
+ }
802
+
803
+ pair = (type1, type2) if type1 <= type2 else (type2, type1)
804
+ boost = complementary.get(pair, 0)
805
+
806
+ return min(base + boost, 1.0)
807
+
808
+ # ================================================================
809
+ # QUALITY SCORING
810
+ # ================================================================
811
+
812
+ async def score_quality(
813
+ self,
814
+ memory_id: Optional[int] = None,
815
+ project_path: Optional[str] = None,
816
+ limit: int = 100
817
+ ) -> Dict[str, Any]:
818
+ """
819
+ Calculate quality scores for memories.
820
+
821
+ Quality = f(usage, connections, confidence, age_decay)
822
+
823
+ Args:
824
+ memory_id: Optional specific memory to score
825
+ project_path: Optional project filter
826
+ limit: Maximum memories to score
827
+
828
+ Returns:
829
+ Dict with quality scores and insights
830
+ """
831
+ cursor = self.db.conn.cursor()
832
+
833
+ if memory_id:
834
+ cursor.execute("""
835
+ SELECT id, content, type, importance, confidence,
836
+ access_count, decay_factor, created_at
837
+ FROM memories WHERE id = ?
838
+ """, (memory_id,))
839
+ memories = cursor.fetchall()
840
+ elif project_path:
841
+ from services.database import normalize_path
842
+ normalized = normalize_path(project_path)
843
+ cursor.execute("""
844
+ SELECT id, content, type, importance, confidence,
845
+ access_count, decay_factor, created_at
846
+ FROM memories WHERE project_path = ?
847
+ ORDER BY created_at DESC
848
+ LIMIT ?
849
+ """, (normalized, limit))
850
+ memories = cursor.fetchall()
851
+ else:
852
+ cursor.execute("""
853
+ SELECT id, content, type, importance, confidence,
854
+ access_count, decay_factor, created_at
855
+ FROM memories
856
+ ORDER BY created_at DESC
857
+ LIMIT ?
858
+ """, (limit,))
859
+ memories = cursor.fetchall()
860
+
861
+ scores = []
862
+ for mem in memories:
863
+ # Get connection count
864
+ cursor.execute("""
865
+ SELECT COUNT(*) as count FROM memory_relationships
866
+ WHERE source_id = ? OR target_id = ?
867
+ """, (mem["id"], mem["id"]))
868
+ connections = cursor.fetchone()["count"]
869
+
870
+ # Calculate quality score
871
+ quality = self._calculate_quality_score(
872
+ importance=mem["importance"],
873
+ confidence=mem["confidence"],
874
+ access_count=mem["access_count"],
875
+ decay_factor=mem["decay_factor"],
876
+ connections=connections
877
+ )
878
+
879
+ scores.append({
880
+ "id": mem["id"],
881
+ "type": mem["type"],
882
+ "content_preview": mem["content"][:100],
883
+ "quality_score": quality,
884
+ "components": {
885
+ "importance": mem["importance"],
886
+ "confidence": mem["confidence"],
887
+ "usage": mem["access_count"],
888
+ "decay": mem["decay_factor"],
889
+ "connections": connections
890
+ },
891
+ "needs_attention": quality < 0.3,
892
+ "is_high_quality": quality > 0.7
893
+ })
894
+
895
+ scores.sort(key=lambda x: x["quality_score"], reverse=True)
896
+
897
+ # Generate insights
898
+ low_quality = [s for s in scores if s["quality_score"] < 0.3]
899
+ high_quality = [s for s in scores if s["quality_score"] > 0.7]
900
+ avg_quality = sum(s["quality_score"] for s in scores) / len(scores) if scores else 0
901
+
902
+ return {
903
+ "scores": scores,
904
+ "summary": {
905
+ "total_scored": len(scores),
906
+ "average_quality": round(avg_quality, 3),
907
+ "high_quality_count": len(high_quality),
908
+ "needs_attention_count": len(low_quality)
909
+ },
910
+ "needs_attention": low_quality[:10],
911
+ "top_quality": high_quality[:10]
912
+ }
913
+
914
+ def _calculate_quality_score(
915
+ self,
916
+ importance: int,
917
+ confidence: float,
918
+ access_count: int,
919
+ decay_factor: float,
920
+ connections: int
921
+ ) -> float:
922
+ """Calculate overall quality score (0-1)."""
923
+ # Normalize components
924
+ importance_norm = (importance or 5) / 10 # 0-1
925
+ confidence_norm = confidence or 0.5 # Already 0-1
926
+ usage_norm = min((access_count or 0) / 20, 1) # Cap at 20 uses
927
+ decay_norm = decay_factor or 1.0 # Already 0-1
928
+ connection_norm = min(connections / 10, 1) # Cap at 10 connections
929
+
930
+ # Weighted average
931
+ weights = {
932
+ "importance": 0.25,
933
+ "confidence": 0.25,
934
+ "usage": 0.15,
935
+ "decay": 0.15,
936
+ "connections": 0.20
937
+ }
938
+
939
+ score = (
940
+ importance_norm * weights["importance"] +
941
+ confidence_norm * weights["confidence"] +
942
+ usage_norm * weights["usage"] +
943
+ decay_norm * weights["decay"] +
944
+ connection_norm * weights["connections"]
945
+ )
946
+
947
+ return round(score, 3)
948
+
949
+ # ================================================================
950
+ # ORPHAN DETECTION
951
+ # ================================================================
952
+
953
+ async def find_orphan_memories(
954
+ self,
955
+ project_path: Optional[str] = None,
956
+ limit: int = 50
957
+ ) -> List[Dict]:
958
+ """Find memories with no relationships."""
959
+ cursor = self.db.conn.cursor()
960
+
961
+ if project_path:
962
+ from services.database import normalize_path
963
+ normalized = normalize_path(project_path)
964
+ cursor.execute("""
965
+ SELECT m.id, m.content, m.type, m.importance, m.confidence, m.created_at
966
+ FROM memories m
967
+ LEFT JOIN memory_relationships mr1 ON m.id = mr1.source_id
968
+ LEFT JOIN memory_relationships mr2 ON m.id = mr2.target_id
969
+ WHERE mr1.id IS NULL AND mr2.id IS NULL AND m.project_path = ?
970
+ ORDER BY m.importance DESC, m.created_at DESC
971
+ LIMIT ?
972
+ """, (normalized, limit))
973
+ else:
974
+ cursor.execute("""
975
+ SELECT m.id, m.content, m.type, m.importance, m.confidence, m.created_at
976
+ FROM memories m
977
+ LEFT JOIN memory_relationships mr1 ON m.id = mr1.source_id
978
+ LEFT JOIN memory_relationships mr2 ON m.id = mr2.target_id
979
+ WHERE mr1.id IS NULL AND mr2.id IS NULL
980
+ ORDER BY m.importance DESC, m.created_at DESC
981
+ LIMIT ?
982
+ """, (limit,))
983
+
984
+ orphans = []
985
+ for row in cursor.fetchall():
986
+ orphans.append({
987
+ "id": row["id"],
988
+ "content": row["content"][:150],
989
+ "type": row["type"],
990
+ "importance": row["importance"],
991
+ "confidence": row["confidence"],
992
+ "created_at": row["created_at"]
993
+ })
994
+
995
+ return orphans
996
+
997
+ # ================================================================
998
+ # CURATED CONTEXT GENERATION
999
+ # ================================================================
1000
+
1001
+ async def generate_summary(
1002
+ self,
1003
+ query: str,
1004
+ project_path: Optional[str] = None,
1005
+ max_memories: int = 10,
1006
+ include_graph: bool = True
1007
+ ) -> Dict[str, Any]:
1008
+ """
1009
+ Generate curated context summary for a query.
1010
+
1011
+ This is what gets injected into the main Claude's context
1012
+ via the grounding hook.
1013
+
1014
+ Args:
1015
+ query: The topic/query to generate context for
1016
+ project_path: Optional project filter
1017
+ max_memories: Maximum memories to include
1018
+ include_graph: Include relationship graph context
1019
+
1020
+ Returns:
1021
+ Dict with curated context summary
1022
+ """
1023
+ # Search for relevant memories
1024
+ from skills.search import semantic_search
1025
+ results = await semantic_search(
1026
+ db=self.db,
1027
+ embeddings=self.embeddings,
1028
+ query=query,
1029
+ limit=max_memories,
1030
+ project_path=project_path,
1031
+ threshold=0.5
1032
+ )
1033
+
1034
+ memories = results.get("results", [])
1035
+
1036
+ if not memories:
1037
+ return {
1038
+ "query": query,
1039
+ "context": "No relevant memories found.",
1040
+ "memories": [],
1041
+ "graph_context": None
1042
+ }
1043
+
1044
+ # Build context sections
1045
+ sections = []
1046
+
1047
+ # Group by type
1048
+ by_type = defaultdict(list)
1049
+ for mem in memories:
1050
+ by_type[mem.get("type", "chunk")].append(mem)
1051
+
1052
+ # Decisions first (most important for context)
1053
+ if by_type.get("decision"):
1054
+ sections.append("**Key Decisions:**")
1055
+ for mem in by_type["decision"][:3]:
1056
+ sections.append(f"- {mem['content'][:200]}")
1057
+
1058
+ # Errors and fixes
1059
+ if by_type.get("error"):
1060
+ sections.append("\n**Known Issues:**")
1061
+ for mem in by_type["error"][:3]:
1062
+ sections.append(f"- {mem['content'][:200]}")
1063
+
1064
+ # Code patterns
1065
+ if by_type.get("code"):
1066
+ sections.append("\n**Code Patterns:**")
1067
+ for mem in by_type["code"][:3]:
1068
+ sections.append(f"- {mem['content'][:200]}")
1069
+
1070
+ # Other relevant
1071
+ other = [m for t, mems in by_type.items()
1072
+ for m in mems if t not in ["decision", "error", "code"]]
1073
+ if other:
1074
+ sections.append("\n**Related Context:**")
1075
+ for mem in other[:3]:
1076
+ sections.append(f"- {mem['content'][:200]}")
1077
+
1078
+ # Build graph context if requested
1079
+ graph_context = None
1080
+ if include_graph and memories:
1081
+ graph_context = await self._build_graph_context(memories)
1082
+
1083
+ # Check for pending curator items
1084
+ pending = await self._get_pending_reviews(project_path)
1085
+
1086
+ return {
1087
+ "query": query,
1088
+ "context": "\n".join(sections),
1089
+ "memories": [
1090
+ {"id": m["id"], "type": m.get("type"), "relevance": m.get("relevance", 0)}
1091
+ for m in memories
1092
+ ],
1093
+ "graph_context": graph_context,
1094
+ "pending_reviews": pending,
1095
+ "generated_at": datetime.now().isoformat()
1096
+ }
1097
+
1098
+ async def _build_graph_context(self, memories: List[Dict]) -> Dict[str, Any]:
1099
+ """Build graph relationship context for memories."""
1100
+ memory_ids = [m["id"] for m in memories if m.get("id")]
1101
+ if not memory_ids:
1102
+ return None
1103
+
1104
+ cursor = self.db.conn.cursor()
1105
+
1106
+ # Get relationships between these memories
1107
+ placeholders = ','.join('?' * len(memory_ids))
1108
+ cursor.execute(f"""
1109
+ SELECT source_id, target_id, relationship, strength
1110
+ FROM memory_relationships
1111
+ WHERE source_id IN ({placeholders}) OR target_id IN ({placeholders})
1112
+ """, memory_ids + memory_ids)
1113
+
1114
+ edges = []
1115
+ for row in cursor.fetchall():
1116
+ edges.append({
1117
+ "source": row["source_id"],
1118
+ "target": row["target_id"],
1119
+ "type": row["relationship"],
1120
+ "strength": row["strength"]
1121
+ })
1122
+
1123
+ # Format as readable context
1124
+ if not edges:
1125
+ return {"edges": [], "summary": "No relationships between these memories"}
1126
+
1127
+ relationship_summary = []
1128
+ for edge in edges[:10]:
1129
+ relationship_summary.append(
1130
+ f"Memory #{edge['source']} {edge['type']} Memory #{edge['target']}"
1131
+ )
1132
+
1133
+ return {
1134
+ "edges": edges,
1135
+ "summary": "; ".join(relationship_summary),
1136
+ "edge_count": len(edges)
1137
+ }
1138
+
1139
+ async def _get_pending_reviews(self, project_path: Optional[str] = None) -> Dict[str, Any]:
1140
+ """Get pending curator review items."""
1141
+ # Check for duplicates
1142
+ duplicates = await self.find_duplicates(
1143
+ project_path=project_path,
1144
+ similarity_threshold=0.92,
1145
+ limit=5
1146
+ )
1147
+
1148
+ # Check for suggested links
1149
+ suggestions = await self.suggest_relationships(
1150
+ project_path=project_path,
1151
+ similarity_threshold=0.8,
1152
+ limit=5
1153
+ )
1154
+
1155
+ # Check for orphans
1156
+ orphans = await self.find_orphan_memories(
1157
+ project_path=project_path,
1158
+ limit=5
1159
+ )
1160
+
1161
+ return {
1162
+ "duplicate_clusters": len(duplicates.get("duplicate_clusters", [])),
1163
+ "suggested_links": len(suggestions.get("suggestions", [])),
1164
+ "orphan_memories": len(orphans),
1165
+ "total_pending": (
1166
+ len(duplicates.get("duplicate_clusters", [])) +
1167
+ len(suggestions.get("suggestions", [])) +
1168
+ len(orphans)
1169
+ )
1170
+ }
1171
+
1172
+ # ================================================================
1173
+ # MERGE OPERATIONS
1174
+ # ================================================================
1175
+
1176
+ async def merge_memories(
1177
+ self,
1178
+ keep_id: int,
1179
+ remove_ids: List[int],
1180
+ merge_content: bool = False
1181
+ ) -> Dict[str, Any]:
1182
+ """
1183
+ Merge duplicate memories into one.
1184
+
1185
+ Args:
1186
+ keep_id: Memory ID to keep
1187
+ remove_ids: Memory IDs to merge into keep_id
1188
+ merge_content: If True, append removed content to kept memory
1189
+
1190
+ Returns:
1191
+ Dict with merge result
1192
+ """
1193
+ cursor = self.db.conn.cursor()
1194
+
1195
+ # Verify keep memory exists
1196
+ cursor.execute("SELECT * FROM memories WHERE id = ?", (keep_id,))
1197
+ keep_memory = cursor.fetchone()
1198
+ if not keep_memory:
1199
+ return {"error": f"Memory {keep_id} not found"}
1200
+
1201
+ merged_count = 0
1202
+ merged_relationships = 0
1203
+
1204
+ for remove_id in remove_ids:
1205
+ if remove_id == keep_id:
1206
+ continue
1207
+
1208
+ cursor.execute("SELECT * FROM memories WHERE id = ?", (remove_id,))
1209
+ remove_memory = cursor.fetchone()
1210
+ if not remove_memory:
1211
+ continue
1212
+
1213
+ # Transfer relationships
1214
+ # Update outgoing relationships
1215
+ cursor.execute("""
1216
+ UPDATE OR IGNORE memory_relationships
1217
+ SET source_id = ?
1218
+ WHERE source_id = ?
1219
+ """, (keep_id, remove_id))
1220
+ merged_relationships += cursor.rowcount
1221
+
1222
+ # Update incoming relationships
1223
+ cursor.execute("""
1224
+ UPDATE OR IGNORE memory_relationships
1225
+ SET target_id = ?
1226
+ WHERE target_id = ?
1227
+ """, (keep_id, remove_id))
1228
+ merged_relationships += cursor.rowcount
1229
+
1230
+ # Delete duplicate relationships
1231
+ cursor.execute("""
1232
+ DELETE FROM memory_relationships
1233
+ WHERE source_id = ? OR target_id = ?
1234
+ """, (remove_id, remove_id))
1235
+
1236
+ # Optionally merge content
1237
+ if merge_content:
1238
+ cursor.execute("""
1239
+ UPDATE memories
1240
+ SET content = content || '\n\n[Merged from #' || ? || ']: ' || ?
1241
+ WHERE id = ?
1242
+ """, (remove_id, remove_memory["content"], keep_id))
1243
+
1244
+ # Archive the removed memory
1245
+ cursor.execute("""
1246
+ INSERT INTO memory_archive
1247
+ (original_id, type, content, embedding, project_path, session_id,
1248
+ importance, access_count, decay_factor, metadata, archive_reason)
1249
+ SELECT id, type, content, embedding, project_path, session_id,
1250
+ importance, access_count, decay_factor, metadata, 'merged'
1251
+ FROM memories WHERE id = ?
1252
+ """, (remove_id,))
1253
+
1254
+ # Delete the memory
1255
+ cursor.execute("DELETE FROM memories WHERE id = ?", (remove_id,))
1256
+ merged_count += 1
1257
+
1258
+ self.db.conn.commit()
1259
+
1260
+ # Update importance if we merged several
1261
+ if merged_count > 0:
1262
+ new_importance = min(keep_memory["importance"] + merged_count, 10)
1263
+ cursor.execute("""
1264
+ UPDATE memories SET importance = ? WHERE id = ?
1265
+ """, (new_importance, keep_id))
1266
+ self.db.conn.commit()
1267
+
1268
+ return {
1269
+ "success": True,
1270
+ "kept_id": keep_id,
1271
+ "merged_count": merged_count,
1272
+ "relationships_transferred": merged_relationships,
1273
+ "new_importance": min(keep_memory["importance"] + merged_count, 10)
1274
+ }
1275
+
1276
+ # ================================================================
1277
+ # MAINTENANCE TASKS
1278
+ # ================================================================
1279
+
1280
+ async def run_maintenance(
1281
+ self,
1282
+ project_path: Optional[str] = None,
1283
+ tasks: Optional[List[str]] = None
1284
+ ) -> Dict[str, Any]:
1285
+ """
1286
+ Run curator maintenance tasks.
1287
+
1288
+ Args:
1289
+ project_path: Optional project filter
1290
+ tasks: Specific tasks to run, or None for all
1291
+ Options: dedup, orphans, links, decay, quality
1292
+
1293
+ Returns:
1294
+ Dict with maintenance report
1295
+ """
1296
+ all_tasks = ["dedup", "orphans", "links", "decay", "quality"]
1297
+ tasks_to_run = tasks or all_tasks
1298
+
1299
+ report = {
1300
+ "started_at": datetime.now().isoformat(),
1301
+ "project_path": project_path,
1302
+ "tasks_run": [],
1303
+ "findings": {},
1304
+ "actions_taken": {},
1305
+ "recommendations": []
1306
+ }
1307
+
1308
+ # Get config
1309
+ config = await self.get_config(project_path)
1310
+
1311
+ if "dedup" in tasks_to_run and config.get("auto_dedup_enabled", True):
1312
+ duplicates = await self.find_duplicates(
1313
+ project_path=project_path,
1314
+ similarity_threshold=config.get("dedup_threshold", 0.92)
1315
+ )
1316
+ report["findings"]["duplicates"] = duplicates.get("duplicates_found", 0)
1317
+ report["tasks_run"].append("dedup")
1318
+
1319
+ # Auto-merge high-confidence duplicates
1320
+ auto_merge = duplicates.get("auto_merge_candidates", [])
1321
+ if auto_merge:
1322
+ for pair in auto_merge[:5]: # Limit auto-merges
1323
+ rec = pair["merge_recommendation"]
1324
+ await self.merge_memories(
1325
+ keep_id=rec["keep"],
1326
+ remove_ids=[rec["remove"]]
1327
+ )
1328
+ report["actions_taken"]["auto_merged"] = len(auto_merge[:5])
1329
+
1330
+ if "orphans" in tasks_to_run:
1331
+ orphans = await self.find_orphan_memories(project_path=project_path)
1332
+ report["findings"]["orphans"] = len(orphans)
1333
+ report["tasks_run"].append("orphans")
1334
+
1335
+ if orphans:
1336
+ report["recommendations"].append(
1337
+ f"Found {len(orphans)} orphan memories - consider linking or archiving"
1338
+ )
1339
+
1340
+ if "links" in tasks_to_run and config.get("auto_link_enabled", True):
1341
+ suggestions = await self.suggest_relationships(
1342
+ project_path=project_path,
1343
+ similarity_threshold=0.75
1344
+ )
1345
+ report["findings"]["suggested_links"] = len(suggestions.get("suggestions", []))
1346
+ report["tasks_run"].append("links")
1347
+
1348
+ # Auto-apply high-confidence links
1349
+ auto_links = suggestions.get("auto_apply_candidates", [])
1350
+ if auto_links:
1351
+ for link in auto_links[:10]:
1352
+ await self.db.create_relationship(
1353
+ source_id=link["source_id"],
1354
+ target_id=link["target_id"],
1355
+ relationship=link["relationship"],
1356
+ strength=link["similarity"]
1357
+ )
1358
+ report["actions_taken"]["auto_linked"] = len(auto_links[:10])
1359
+
1360
+ if "quality" in tasks_to_run:
1361
+ quality = await self.score_quality(project_path=project_path)
1362
+ report["findings"]["quality_summary"] = quality.get("summary", {})
1363
+ report["tasks_run"].append("quality")
1364
+
1365
+ needs_attention = quality.get("needs_attention", [])
1366
+ if needs_attention:
1367
+ report["recommendations"].append(
1368
+ f"{len(needs_attention)} memories need attention (low quality score)"
1369
+ )
1370
+
1371
+ if "decay" in tasks_to_run:
1372
+ # Apply confidence decay to unused memories
1373
+ decayed = await self._apply_confidence_decay(project_path)
1374
+ report["actions_taken"]["memories_decayed"] = decayed
1375
+ report["tasks_run"].append("decay")
1376
+
1377
+ report["completed_at"] = datetime.now().isoformat()
1378
+
1379
+ # Save report
1380
+ await self._save_report(report, project_path)
1381
+
1382
+ return report
1383
+
1384
+ async def _apply_confidence_decay(
1385
+ self,
1386
+ project_path: Optional[str] = None,
1387
+ decay_rate: float = 0.95
1388
+ ) -> int:
1389
+ """Apply decay to memories not accessed recently."""
1390
+ cursor = self.db.conn.cursor()
1391
+
1392
+ # Decay memories not accessed in the last 30 days
1393
+ cutoff = (datetime.now() - timedelta(days=30)).isoformat()
1394
+
1395
+ if project_path:
1396
+ from services.database import normalize_path
1397
+ normalized = normalize_path(project_path)
1398
+ cursor.execute("""
1399
+ UPDATE memories
1400
+ SET decay_factor = decay_factor * ?,
1401
+ confidence = confidence * ?
1402
+ WHERE (last_accessed IS NULL OR last_accessed < ?)
1403
+ AND project_path = ?
1404
+ AND decay_factor > 0.1
1405
+ """, (decay_rate, decay_rate, cutoff, normalized))
1406
+ else:
1407
+ cursor.execute("""
1408
+ UPDATE memories
1409
+ SET decay_factor = decay_factor * ?,
1410
+ confidence = confidence * ?
1411
+ WHERE (last_accessed IS NULL OR last_accessed < ?)
1412
+ AND decay_factor > 0.1
1413
+ """, (decay_rate, decay_rate, cutoff))
1414
+
1415
+ decayed = cursor.rowcount
1416
+ self.db.conn.commit()
1417
+ return decayed
1418
+
1419
+ async def _save_report(self, report: Dict, project_path: Optional[str] = None):
1420
+ """Save maintenance report to database."""
1421
+ cursor = self.db.conn.cursor()
1422
+
1423
+ from services.database import normalize_path
1424
+ normalized = normalize_path(project_path) if project_path else None
1425
+
1426
+ cursor.execute("""
1427
+ INSERT INTO curator_reports
1428
+ (project_path, report_type, summary, findings, actions_taken, recommendations)
1429
+ VALUES (?, 'maintenance', ?, ?, ?, ?)
1430
+ """, (
1431
+ normalized,
1432
+ f"Ran tasks: {', '.join(report.get('tasks_run', []))}",
1433
+ json.dumps(report.get("findings", {})),
1434
+ json.dumps(report.get("actions_taken", {})),
1435
+ json.dumps(report.get("recommendations", []))
1436
+ ))
1437
+ self.db.conn.commit()
1438
+
1439
+ # ================================================================
1440
+ # CONFIGURATION
1441
+ # ================================================================
1442
+
1443
+ async def get_config(self, project_path: Optional[str] = None) -> Dict[str, Any]:
1444
+ """Get curator configuration for a project."""
1445
+ cursor = self.db.conn.cursor()
1446
+
1447
+ if project_path:
1448
+ from services.database import normalize_path
1449
+ normalized = normalize_path(project_path)
1450
+ cursor.execute("""
1451
+ SELECT * FROM curator_config WHERE project_path = ?
1452
+ """, (normalized,))
1453
+ row = cursor.fetchone()
1454
+ if row:
1455
+ return dict(row)
1456
+
1457
+ return self.DEFAULT_CONFIG.copy()
1458
+
1459
+ async def update_config(
1460
+ self,
1461
+ project_path: str,
1462
+ **config_updates
1463
+ ) -> Dict[str, Any]:
1464
+ """Update curator configuration for a project."""
1465
+ cursor = self.db.conn.cursor()
1466
+
1467
+ from services.database import normalize_path
1468
+ normalized = normalize_path(project_path)
1469
+
1470
+ # Get existing or default
1471
+ existing = await self.get_config(project_path)
1472
+ existing.update(config_updates)
1473
+
1474
+ cursor.execute("""
1475
+ INSERT INTO curator_config
1476
+ (project_path, auto_dedup_enabled, auto_link_enabled, dedup_threshold,
1477
+ maintenance_interval_hours, curator_active)
1478
+ VALUES (?, ?, ?, ?, ?, ?)
1479
+ ON CONFLICT(project_path) DO UPDATE SET
1480
+ auto_dedup_enabled = excluded.auto_dedup_enabled,
1481
+ auto_link_enabled = excluded.auto_link_enabled,
1482
+ dedup_threshold = excluded.dedup_threshold,
1483
+ maintenance_interval_hours = excluded.maintenance_interval_hours,
1484
+ curator_active = excluded.curator_active
1485
+ """, (
1486
+ normalized,
1487
+ existing.get("auto_dedup_enabled", True),
1488
+ existing.get("auto_link_enabled", True),
1489
+ existing.get("dedup_threshold", 0.92),
1490
+ existing.get("maintenance_interval_hours", 24),
1491
+ existing.get("curator_active", True)
1492
+ ))
1493
+ self.db.conn.commit()
1494
+
1495
+ return existing
1496
+
1497
+ async def get_latest_report(
1498
+ self,
1499
+ project_path: Optional[str] = None
1500
+ ) -> Optional[Dict[str, Any]]:
1501
+ """Get the latest curator report."""
1502
+ cursor = self.db.conn.cursor()
1503
+
1504
+ if project_path:
1505
+ from services.database import normalize_path
1506
+ normalized = normalize_path(project_path)
1507
+ cursor.execute("""
1508
+ SELECT * FROM curator_reports
1509
+ WHERE project_path = ?
1510
+ ORDER BY created_at DESC
1511
+ LIMIT 1
1512
+ """, (normalized,))
1513
+ else:
1514
+ cursor.execute("""
1515
+ SELECT * FROM curator_reports
1516
+ ORDER BY created_at DESC
1517
+ LIMIT 1
1518
+ """)
1519
+
1520
+ row = cursor.fetchone()
1521
+ if not row:
1522
+ return None
1523
+
1524
+ return {
1525
+ "id": row["id"],
1526
+ "project_path": row["project_path"],
1527
+ "report_type": row["report_type"],
1528
+ "created_at": row["created_at"],
1529
+ "summary": row["summary"],
1530
+ "findings": json.loads(row["findings"]) if row["findings"] else {},
1531
+ "actions_taken": json.loads(row["actions_taken"]) if row["actions_taken"] else {},
1532
+ "recommendations": json.loads(row["recommendations"]) if row["recommendations"] else []
1533
+ }
1534
+
1535
+ async def get_status(self) -> Dict[str, Any]:
1536
+ """Get current curator agent status."""
1537
+ cursor = self.db.conn.cursor()
1538
+
1539
+ # Get total memories
1540
+ cursor.execute("SELECT COUNT(*) as total FROM memories")
1541
+ total_memories = cursor.fetchone()["total"]
1542
+
1543
+ # Get total relationships
1544
+ cursor.execute("SELECT COUNT(*) as total FROM memory_relationships")
1545
+ total_relationships = cursor.fetchone()["total"]
1546
+
1547
+ # Get orphan count
1548
+ cursor.execute("""
1549
+ SELECT COUNT(*) as count FROM memories m
1550
+ LEFT JOIN memory_relationships mr1 ON m.id = mr1.source_id
1551
+ LEFT JOIN memory_relationships mr2 ON m.id = mr2.target_id
1552
+ WHERE mr1.id IS NULL AND mr2.id IS NULL
1553
+ """)
1554
+ orphan_count = cursor.fetchone()["count"]
1555
+
1556
+ # Get latest report
1557
+ latest_report = await self.get_latest_report()
1558
+
1559
+ return {
1560
+ "active": True,
1561
+ "total_memories": total_memories,
1562
+ "total_relationships": total_relationships,
1563
+ "orphan_count": orphan_count,
1564
+ "connection_ratio": round(total_relationships / max(total_memories, 1), 2),
1565
+ "last_maintenance": latest_report.get("created_at") if latest_report else None,
1566
+ "last_report_summary": latest_report.get("summary") if latest_report else None
1567
+ }
1568
+
1569
+
1570
+ # Singleton instance
1571
+ _curator_instance: Optional[MemoryCurator] = None
1572
+
1573
+
1574
+ def get_curator(db, embeddings) -> MemoryCurator:
1575
+ """Get or create the curator singleton."""
1576
+ global _curator_instance
1577
+ if _curator_instance is None:
1578
+ _curator_instance = MemoryCurator(db, embeddings)
1579
+ return _curator_instance
1580
+
1581
+
1582
+ async def run_curator_scheduler(
1583
+ db,
1584
+ embeddings,
1585
+ interval_hours: int = 24
1586
+ ):
1587
+ """Background scheduler for curator maintenance."""
1588
+ curator = get_curator(db, embeddings)
1589
+
1590
+ while True:
1591
+ try:
1592
+ # Wait for the interval
1593
+ await asyncio.sleep(interval_hours * 3600)
1594
+
1595
+ # Run maintenance
1596
+ logger.info("Running scheduled curator maintenance...")
1597
+ report = await curator.run_maintenance()
1598
+ logger.info(f"Curator maintenance complete: {report.get('summary', '')}")
1599
+
1600
+ except asyncio.CancelledError:
1601
+ logger.info("Curator scheduler cancelled")
1602
+ break
1603
+ except Exception as e:
1604
+ logger.error(f"Curator scheduler error: {e}")
1605
+ # Continue running despite errors
1606
+ await asyncio.sleep(300) # Wait 5 min before retry