claude-memory-agent 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +206 -200
- package/agent_card.py +186 -0
- package/bin/cli.js +317 -181
- package/bin/postinstall.js +270 -216
- package/dashboard.html +4232 -2689
- package/hooks/__pycache__/grounding-hook.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
- package/hooks/grounding-hook.py +422 -348
- package/hooks/session_end.py +293 -192
- package/hooks/session_start.py +227 -227
- package/install.py +919 -887
- package/main.py +4496 -2859
- package/package.json +47 -55
- package/services/__init__.py +50 -50
- package/services/__pycache__/__init__.cpython-312.pyc +0 -0
- package/services/__pycache__/curator.cpython-312.pyc +0 -0
- package/services/__pycache__/database.cpython-312.pyc +0 -0
- package/services/curator.py +1606 -0
- package/services/database.py +3637 -2485
- package/skills/__init__.py +21 -1
- package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/__pycache__/confidence_tracker.cpython-312.pyc +0 -0
- package/skills/__pycache__/context.cpython-312.pyc +0 -0
- package/skills/__pycache__/curator.cpython-312.pyc +0 -0
- package/skills/__pycache__/search.cpython-312.pyc +0 -0
- package/skills/__pycache__/session_review.cpython-312.pyc +0 -0
- package/skills/__pycache__/store.cpython-312.pyc +0 -0
- package/skills/confidence_tracker.py +441 -0
- package/skills/context.py +675 -0
- package/skills/curator.py +348 -0
- package/skills/search.py +369 -213
- package/skills/session_review.py +418 -0
- package/skills/store.py +377 -179
- package/update_system.py +829 -817
|
@@ -0,0 +1,1606 @@
|
|
|
1
|
+
"""Memory Curator Service - Autonomous graph exploration and maintenance.
|
|
2
|
+
|
|
3
|
+
The curator agent traverses the memory knowledge graph, finds duplicates,
|
|
4
|
+
suggests relationships, scores quality, and provides curated context.
|
|
5
|
+
"""
|
|
6
|
+
import logging
|
|
7
|
+
import json
|
|
8
|
+
import asyncio
|
|
9
|
+
from typing import Dict, Any, Optional, List, Set, Tuple
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MemoryCurator:
|
|
17
|
+
"""
|
|
18
|
+
Autonomous curator agent for memory graph maintenance.
|
|
19
|
+
|
|
20
|
+
Capabilities:
|
|
21
|
+
- Graph exploration (BFS/DFS traversal)
|
|
22
|
+
- Duplicate detection (semantic similarity >0.92)
|
|
23
|
+
- Relationship inference (suggest missing links)
|
|
24
|
+
- Quality scoring (usage + connections + confidence)
|
|
25
|
+
- Curated context generation
|
|
26
|
+
- Scheduled maintenance
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
# Confidence thresholds for autonomous actions
|
|
30
|
+
HIGH_CONFIDENCE = 0.9 # Auto-execute
|
|
31
|
+
MEDIUM_CONFIDENCE = 0.7 # Suggest with one-click approval
|
|
32
|
+
LOW_CONFIDENCE = 0.5 # Log for manual review only
|
|
33
|
+
|
|
34
|
+
# Default configuration
|
|
35
|
+
DEFAULT_CONFIG = {
|
|
36
|
+
"auto_dedup_enabled": True,
|
|
37
|
+
"auto_link_enabled": True,
|
|
38
|
+
"dedup_threshold": 0.92,
|
|
39
|
+
"maintenance_interval_hours": 24,
|
|
40
|
+
"curator_active": True
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
def __init__(self, db, embeddings):
|
|
44
|
+
"""
|
|
45
|
+
Initialize the curator with database and embedding services.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
db: DatabaseService instance
|
|
49
|
+
embeddings: EmbeddingService instance
|
|
50
|
+
"""
|
|
51
|
+
self.db = db
|
|
52
|
+
self.embeddings = embeddings
|
|
53
|
+
self._running = False
|
|
54
|
+
self._last_maintenance: Dict[str, datetime] = {}
|
|
55
|
+
|
|
56
|
+
# ================================================================
|
|
57
|
+
# GRAPH EXPLORATION
|
|
58
|
+
# ================================================================
|
|
59
|
+
|
|
60
|
+
async def explore_graph(
|
|
61
|
+
self,
|
|
62
|
+
start_node_id: int,
|
|
63
|
+
max_depth: int = 3,
|
|
64
|
+
mode: str = "bfs",
|
|
65
|
+
relationship_filter: Optional[List[str]] = None,
|
|
66
|
+
include_orphan_check: bool = True
|
|
67
|
+
) -> Dict[str, Any]:
|
|
68
|
+
"""
|
|
69
|
+
Explore the memory graph from a starting node.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
start_node_id: ID of the memory to start from
|
|
73
|
+
max_depth: Maximum traversal depth
|
|
74
|
+
mode: 'bfs' (breadth-first) or 'dfs' (depth-first)
|
|
75
|
+
relationship_filter: Only follow these relationship types
|
|
76
|
+
include_orphan_check: Check for orphaned nodes in the exploration
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Dict with explored nodes, edges, clusters, and insights
|
|
80
|
+
"""
|
|
81
|
+
cursor = self.db.conn.cursor()
|
|
82
|
+
|
|
83
|
+
# Verify start node exists
|
|
84
|
+
cursor.execute("SELECT id, content, type FROM memories WHERE id = ?", (start_node_id,))
|
|
85
|
+
start_node = cursor.fetchone()
|
|
86
|
+
if not start_node:
|
|
87
|
+
return {"error": f"Memory {start_node_id} not found"}
|
|
88
|
+
|
|
89
|
+
visited: Set[int] = set()
|
|
90
|
+
nodes: List[Dict] = []
|
|
91
|
+
edges: List[Dict] = []
|
|
92
|
+
depth_map: Dict[int, int] = {start_node_id: 0}
|
|
93
|
+
|
|
94
|
+
# BFS/DFS exploration
|
|
95
|
+
if mode == "bfs":
|
|
96
|
+
queue = [start_node_id]
|
|
97
|
+
while queue:
|
|
98
|
+
current_id = queue.pop(0)
|
|
99
|
+
if current_id in visited:
|
|
100
|
+
continue
|
|
101
|
+
visited.add(current_id)
|
|
102
|
+
|
|
103
|
+
current_depth = depth_map.get(current_id, 0)
|
|
104
|
+
if current_depth >= max_depth:
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
# Get node info
|
|
108
|
+
node_info = await self._get_node_info(current_id)
|
|
109
|
+
if node_info:
|
|
110
|
+
node_info["depth"] = current_depth
|
|
111
|
+
nodes.append(node_info)
|
|
112
|
+
|
|
113
|
+
# Get connected nodes
|
|
114
|
+
neighbors = await self._get_neighbors(
|
|
115
|
+
current_id,
|
|
116
|
+
relationship_filter
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
for neighbor_id, edge_info in neighbors:
|
|
120
|
+
edges.append(edge_info)
|
|
121
|
+
if neighbor_id not in visited:
|
|
122
|
+
queue.append(neighbor_id)
|
|
123
|
+
if neighbor_id not in depth_map:
|
|
124
|
+
depth_map[neighbor_id] = current_depth + 1
|
|
125
|
+
else: # DFS
|
|
126
|
+
stack = [start_node_id]
|
|
127
|
+
while stack:
|
|
128
|
+
current_id = stack.pop()
|
|
129
|
+
if current_id in visited:
|
|
130
|
+
continue
|
|
131
|
+
visited.add(current_id)
|
|
132
|
+
|
|
133
|
+
current_depth = depth_map.get(current_id, 0)
|
|
134
|
+
if current_depth >= max_depth:
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
node_info = await self._get_node_info(current_id)
|
|
138
|
+
if node_info:
|
|
139
|
+
node_info["depth"] = current_depth
|
|
140
|
+
nodes.append(node_info)
|
|
141
|
+
|
|
142
|
+
neighbors = await self._get_neighbors(
|
|
143
|
+
current_id,
|
|
144
|
+
relationship_filter
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
for neighbor_id, edge_info in neighbors:
|
|
148
|
+
edges.append(edge_info)
|
|
149
|
+
if neighbor_id not in visited:
|
|
150
|
+
stack.append(neighbor_id)
|
|
151
|
+
if neighbor_id not in depth_map:
|
|
152
|
+
depth_map[neighbor_id] = current_depth + 1
|
|
153
|
+
|
|
154
|
+
# Identify clusters
|
|
155
|
+
clusters = self._identify_clusters(nodes, edges)
|
|
156
|
+
|
|
157
|
+
# Find orphans if requested
|
|
158
|
+
orphans = []
|
|
159
|
+
if include_orphan_check:
|
|
160
|
+
orphans = await self.find_orphan_memories(limit=10)
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
"start_node": start_node_id,
|
|
164
|
+
"mode": mode,
|
|
165
|
+
"max_depth": max_depth,
|
|
166
|
+
"nodes_explored": len(nodes),
|
|
167
|
+
"edges_found": len(edges),
|
|
168
|
+
"nodes": nodes,
|
|
169
|
+
"edges": edges,
|
|
170
|
+
"clusters": clusters,
|
|
171
|
+
"orphans_nearby": orphans[:5] if orphans else [],
|
|
172
|
+
"exploration_insights": self._generate_exploration_insights(nodes, edges, clusters)
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
async def _get_node_info(self, memory_id: int) -> Optional[Dict]:
|
|
176
|
+
"""Get detailed info for a memory node."""
|
|
177
|
+
cursor = self.db.conn.cursor()
|
|
178
|
+
cursor.execute("""
|
|
179
|
+
SELECT id, type, content, importance, confidence,
|
|
180
|
+
access_count, decay_factor, project_path, created_at
|
|
181
|
+
FROM memories WHERE id = ?
|
|
182
|
+
""", (memory_id,))
|
|
183
|
+
row = cursor.fetchone()
|
|
184
|
+
if not row:
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
# Get relationship counts
|
|
188
|
+
cursor.execute("""
|
|
189
|
+
SELECT COUNT(*) as outgoing FROM memory_relationships WHERE source_id = ?
|
|
190
|
+
""", (memory_id,))
|
|
191
|
+
outgoing = cursor.fetchone()["outgoing"]
|
|
192
|
+
|
|
193
|
+
cursor.execute("""
|
|
194
|
+
SELECT COUNT(*) as incoming FROM memory_relationships WHERE target_id = ?
|
|
195
|
+
""", (memory_id,))
|
|
196
|
+
incoming = cursor.fetchone()["incoming"]
|
|
197
|
+
|
|
198
|
+
return {
|
|
199
|
+
"id": row["id"],
|
|
200
|
+
"type": row["type"],
|
|
201
|
+
"content": row["content"][:200] + "..." if len(row["content"]) > 200 else row["content"],
|
|
202
|
+
"importance": row["importance"],
|
|
203
|
+
"confidence": row["confidence"],
|
|
204
|
+
"access_count": row["access_count"],
|
|
205
|
+
"decay_factor": row["decay_factor"],
|
|
206
|
+
"project_path": row["project_path"],
|
|
207
|
+
"created_at": row["created_at"],
|
|
208
|
+
"connection_count": outgoing + incoming,
|
|
209
|
+
"outgoing_edges": outgoing,
|
|
210
|
+
"incoming_edges": incoming
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
async def _get_neighbors(
|
|
214
|
+
self,
|
|
215
|
+
memory_id: int,
|
|
216
|
+
relationship_filter: Optional[List[str]] = None
|
|
217
|
+
) -> List[Tuple[int, Dict]]:
|
|
218
|
+
"""Get all neighboring nodes and edge info."""
|
|
219
|
+
cursor = self.db.conn.cursor()
|
|
220
|
+
|
|
221
|
+
query = """
|
|
222
|
+
SELECT target_id as neighbor_id, relationship, strength, 'outgoing' as direction
|
|
223
|
+
FROM memory_relationships WHERE source_id = ?
|
|
224
|
+
UNION ALL
|
|
225
|
+
SELECT source_id as neighbor_id, relationship, strength, 'incoming' as direction
|
|
226
|
+
FROM memory_relationships WHERE target_id = ?
|
|
227
|
+
"""
|
|
228
|
+
cursor.execute(query, (memory_id, memory_id))
|
|
229
|
+
|
|
230
|
+
neighbors = []
|
|
231
|
+
for row in cursor.fetchall():
|
|
232
|
+
if relationship_filter and row["relationship"] not in relationship_filter:
|
|
233
|
+
continue
|
|
234
|
+
|
|
235
|
+
edge_info = {
|
|
236
|
+
"source": memory_id if row["direction"] == "outgoing" else row["neighbor_id"],
|
|
237
|
+
"target": row["neighbor_id"] if row["direction"] == "outgoing" else memory_id,
|
|
238
|
+
"relationship": row["relationship"],
|
|
239
|
+
"strength": row["strength"],
|
|
240
|
+
"direction": row["direction"]
|
|
241
|
+
}
|
|
242
|
+
neighbors.append((row["neighbor_id"], edge_info))
|
|
243
|
+
|
|
244
|
+
return neighbors
|
|
245
|
+
|
|
246
|
+
def _identify_clusters(self, nodes: List[Dict], edges: List[Dict]) -> List[Dict]:
|
|
247
|
+
"""Identify clusters of tightly connected nodes."""
|
|
248
|
+
if not nodes:
|
|
249
|
+
return []
|
|
250
|
+
|
|
251
|
+
# Build adjacency for clustering
|
|
252
|
+
adjacency = defaultdict(set)
|
|
253
|
+
for edge in edges:
|
|
254
|
+
adjacency[edge["source"]].add(edge["target"])
|
|
255
|
+
adjacency[edge["target"]].add(edge["source"])
|
|
256
|
+
|
|
257
|
+
# Simple connected component analysis
|
|
258
|
+
visited = set()
|
|
259
|
+
clusters = []
|
|
260
|
+
|
|
261
|
+
for node in nodes:
|
|
262
|
+
node_id = node["id"]
|
|
263
|
+
if node_id in visited:
|
|
264
|
+
continue
|
|
265
|
+
|
|
266
|
+
# BFS to find component
|
|
267
|
+
component = []
|
|
268
|
+
queue = [node_id]
|
|
269
|
+
while queue:
|
|
270
|
+
current = queue.pop(0)
|
|
271
|
+
if current in visited:
|
|
272
|
+
continue
|
|
273
|
+
visited.add(current)
|
|
274
|
+
component.append(current)
|
|
275
|
+
for neighbor in adjacency[current]:
|
|
276
|
+
if neighbor not in visited:
|
|
277
|
+
queue.append(neighbor)
|
|
278
|
+
|
|
279
|
+
if len(component) > 1:
|
|
280
|
+
# Determine cluster type based on node types
|
|
281
|
+
node_types = defaultdict(int)
|
|
282
|
+
for nid in component:
|
|
283
|
+
for n in nodes:
|
|
284
|
+
if n["id"] == nid:
|
|
285
|
+
node_types[n["type"]] += 1
|
|
286
|
+
break
|
|
287
|
+
|
|
288
|
+
clusters.append({
|
|
289
|
+
"node_ids": component,
|
|
290
|
+
"size": len(component),
|
|
291
|
+
"dominant_type": max(node_types, key=node_types.get) if node_types else "mixed",
|
|
292
|
+
"type_distribution": dict(node_types)
|
|
293
|
+
})
|
|
294
|
+
|
|
295
|
+
return sorted(clusters, key=lambda c: c["size"], reverse=True)
|
|
296
|
+
|
|
297
|
+
def _generate_exploration_insights(
|
|
298
|
+
self,
|
|
299
|
+
nodes: List[Dict],
|
|
300
|
+
edges: List[Dict],
|
|
301
|
+
clusters: List[Dict]
|
|
302
|
+
) -> List[str]:
|
|
303
|
+
"""Generate insights from the exploration."""
|
|
304
|
+
insights = []
|
|
305
|
+
|
|
306
|
+
if not nodes:
|
|
307
|
+
return ["No nodes found in exploration"]
|
|
308
|
+
|
|
309
|
+
# Type distribution
|
|
310
|
+
type_counts = defaultdict(int)
|
|
311
|
+
for node in nodes:
|
|
312
|
+
type_counts[node["type"]] += 1
|
|
313
|
+
|
|
314
|
+
dominant = max(type_counts, key=type_counts.get)
|
|
315
|
+
insights.append(f"Dominant memory type: {dominant} ({type_counts[dominant]}/{len(nodes)})")
|
|
316
|
+
|
|
317
|
+
# Connection density
|
|
318
|
+
if nodes:
|
|
319
|
+
avg_connections = sum(n.get("connection_count", 0) for n in nodes) / len(nodes)
|
|
320
|
+
if avg_connections < 1:
|
|
321
|
+
insights.append("Low connectivity: Consider adding more relationships")
|
|
322
|
+
elif avg_connections > 5:
|
|
323
|
+
insights.append("High connectivity: Knowledge graph is well-connected")
|
|
324
|
+
|
|
325
|
+
# Cluster analysis
|
|
326
|
+
if clusters:
|
|
327
|
+
largest = clusters[0]
|
|
328
|
+
insights.append(f"Largest cluster: {largest['size']} nodes ({largest['dominant_type']})")
|
|
329
|
+
|
|
330
|
+
# Quality indicators
|
|
331
|
+
low_confidence = [n for n in nodes if n.get("confidence", 0.5) < 0.3]
|
|
332
|
+
if low_confidence:
|
|
333
|
+
insights.append(f"{len(low_confidence)} nodes with low confidence need review")
|
|
334
|
+
|
|
335
|
+
high_importance = [n for n in nodes if n.get("importance", 5) >= 8]
|
|
336
|
+
if high_importance:
|
|
337
|
+
insights.append(f"{len(high_importance)} high-importance nodes in this subgraph")
|
|
338
|
+
|
|
339
|
+
return insights
|
|
340
|
+
|
|
341
|
+
# ================================================================
|
|
342
|
+
# DUPLICATE DETECTION
|
|
343
|
+
# ================================================================
|
|
344
|
+
|
|
345
|
+
async def find_duplicates(
|
|
346
|
+
self,
|
|
347
|
+
project_path: Optional[str] = None,
|
|
348
|
+
similarity_threshold: float = 0.92,
|
|
349
|
+
limit: int = 50
|
|
350
|
+
) -> Dict[str, Any]:
|
|
351
|
+
"""
|
|
352
|
+
Find semantically similar (duplicate) memories.
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
project_path: Optional project filter
|
|
356
|
+
similarity_threshold: Minimum similarity to consider duplicates (default 0.92)
|
|
357
|
+
limit: Maximum number of duplicate pairs to return
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
Dict with duplicate clusters and merge suggestions
|
|
361
|
+
"""
|
|
362
|
+
cursor = self.db.conn.cursor()
|
|
363
|
+
|
|
364
|
+
# Get memories with embeddings
|
|
365
|
+
if project_path:
|
|
366
|
+
from services.database import normalize_path
|
|
367
|
+
normalized = normalize_path(project_path)
|
|
368
|
+
cursor.execute("""
|
|
369
|
+
SELECT id, content, type, importance, confidence, embedding, created_at
|
|
370
|
+
FROM memories
|
|
371
|
+
WHERE embedding IS NOT NULL AND project_path = ?
|
|
372
|
+
ORDER BY created_at DESC
|
|
373
|
+
LIMIT 500
|
|
374
|
+
""", (normalized,))
|
|
375
|
+
else:
|
|
376
|
+
cursor.execute("""
|
|
377
|
+
SELECT id, content, type, importance, confidence, embedding, created_at
|
|
378
|
+
FROM memories
|
|
379
|
+
WHERE embedding IS NOT NULL
|
|
380
|
+
ORDER BY created_at DESC
|
|
381
|
+
LIMIT 500
|
|
382
|
+
""")
|
|
383
|
+
|
|
384
|
+
memories = cursor.fetchall()
|
|
385
|
+
if len(memories) < 2:
|
|
386
|
+
return {"duplicate_clusters": [], "total_memories_checked": len(memories)}
|
|
387
|
+
|
|
388
|
+
# Parse embeddings
|
|
389
|
+
memory_data = []
|
|
390
|
+
for mem in memories:
|
|
391
|
+
try:
|
|
392
|
+
embedding = json.loads(mem["embedding"])
|
|
393
|
+
memory_data.append({
|
|
394
|
+
"id": mem["id"],
|
|
395
|
+
"content": mem["content"],
|
|
396
|
+
"type": mem["type"],
|
|
397
|
+
"importance": mem["importance"],
|
|
398
|
+
"confidence": mem["confidence"],
|
|
399
|
+
"embedding": embedding,
|
|
400
|
+
"created_at": mem["created_at"]
|
|
401
|
+
})
|
|
402
|
+
except (json.JSONDecodeError, TypeError):
|
|
403
|
+
continue
|
|
404
|
+
|
|
405
|
+
# Find duplicate pairs
|
|
406
|
+
import numpy as np
|
|
407
|
+
duplicate_pairs = []
|
|
408
|
+
checked_pairs = set()
|
|
409
|
+
|
|
410
|
+
for i, mem1 in enumerate(memory_data):
|
|
411
|
+
for j, mem2 in enumerate(memory_data):
|
|
412
|
+
if i >= j:
|
|
413
|
+
continue
|
|
414
|
+
|
|
415
|
+
pair_key = (min(mem1["id"], mem2["id"]), max(mem1["id"], mem2["id"]))
|
|
416
|
+
if pair_key in checked_pairs:
|
|
417
|
+
continue
|
|
418
|
+
checked_pairs.add(pair_key)
|
|
419
|
+
|
|
420
|
+
# Calculate cosine similarity
|
|
421
|
+
try:
|
|
422
|
+
vec1 = np.array(mem1["embedding"])
|
|
423
|
+
vec2 = np.array(mem2["embedding"])
|
|
424
|
+
similarity = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
|
425
|
+
|
|
426
|
+
if similarity >= similarity_threshold:
|
|
427
|
+
duplicate_pairs.append({
|
|
428
|
+
"memory1": {
|
|
429
|
+
"id": mem1["id"],
|
|
430
|
+
"content": mem1["content"][:150],
|
|
431
|
+
"type": mem1["type"],
|
|
432
|
+
"importance": mem1["importance"],
|
|
433
|
+
"confidence": mem1["confidence"],
|
|
434
|
+
"created_at": mem1["created_at"]
|
|
435
|
+
},
|
|
436
|
+
"memory2": {
|
|
437
|
+
"id": mem2["id"],
|
|
438
|
+
"content": mem2["content"][:150],
|
|
439
|
+
"type": mem2["type"],
|
|
440
|
+
"importance": mem2["importance"],
|
|
441
|
+
"confidence": mem2["confidence"],
|
|
442
|
+
"created_at": mem2["created_at"]
|
|
443
|
+
},
|
|
444
|
+
"similarity": float(similarity),
|
|
445
|
+
"merge_recommendation": self._get_merge_recommendation(mem1, mem2, similarity)
|
|
446
|
+
})
|
|
447
|
+
except Exception as e:
|
|
448
|
+
logger.debug(f"Error calculating similarity: {e}")
|
|
449
|
+
continue
|
|
450
|
+
|
|
451
|
+
# Sort by similarity and limit
|
|
452
|
+
duplicate_pairs.sort(key=lambda x: x["similarity"], reverse=True)
|
|
453
|
+
duplicate_pairs = duplicate_pairs[:limit]
|
|
454
|
+
|
|
455
|
+
# Cluster duplicates (transitive grouping)
|
|
456
|
+
clusters = self._cluster_duplicates(duplicate_pairs)
|
|
457
|
+
|
|
458
|
+
return {
|
|
459
|
+
"duplicate_clusters": clusters,
|
|
460
|
+
"duplicate_pairs": duplicate_pairs,
|
|
461
|
+
"total_memories_checked": len(memory_data),
|
|
462
|
+
"duplicates_found": len(duplicate_pairs),
|
|
463
|
+
"threshold_used": similarity_threshold,
|
|
464
|
+
"auto_merge_candidates": [
|
|
465
|
+
p for p in duplicate_pairs
|
|
466
|
+
if p["merge_recommendation"]["confidence"] >= self.HIGH_CONFIDENCE
|
|
467
|
+
]
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
def _get_merge_recommendation(
|
|
471
|
+
self,
|
|
472
|
+
mem1: Dict,
|
|
473
|
+
mem2: Dict,
|
|
474
|
+
similarity: float
|
|
475
|
+
) -> Dict[str, Any]:
|
|
476
|
+
"""Determine which memory to keep in a merge."""
|
|
477
|
+
# Scoring: higher is better to keep
|
|
478
|
+
score1 = 0
|
|
479
|
+
score2 = 0
|
|
480
|
+
|
|
481
|
+
# Prefer higher importance
|
|
482
|
+
score1 += mem1["importance"] * 2
|
|
483
|
+
score2 += mem2["importance"] * 2
|
|
484
|
+
|
|
485
|
+
# Prefer higher confidence
|
|
486
|
+
score1 += mem1["confidence"] * 10
|
|
487
|
+
score2 += mem2["confidence"] * 10
|
|
488
|
+
|
|
489
|
+
# Prefer longer content (more detail)
|
|
490
|
+
score1 += min(len(mem1["content"]) / 100, 5)
|
|
491
|
+
score2 += min(len(mem2["content"]) / 100, 5)
|
|
492
|
+
|
|
493
|
+
# Prefer newer for decisions, older for established patterns
|
|
494
|
+
if mem1["type"] == "decision":
|
|
495
|
+
# Newer decisions are more relevant
|
|
496
|
+
score1 += 3 if mem1["created_at"] > mem2["created_at"] else 0
|
|
497
|
+
score2 += 3 if mem2["created_at"] > mem1["created_at"] else 0
|
|
498
|
+
else:
|
|
499
|
+
# Older patterns are more established
|
|
500
|
+
score1 += 2 if mem1["created_at"] < mem2["created_at"] else 0
|
|
501
|
+
score2 += 2 if mem2["created_at"] < mem1["created_at"] else 0
|
|
502
|
+
|
|
503
|
+
keep_id = mem1["id"] if score1 >= score2 else mem2["id"]
|
|
504
|
+
remove_id = mem2["id"] if score1 >= score2 else mem1["id"]
|
|
505
|
+
|
|
506
|
+
# Confidence in recommendation
|
|
507
|
+
score_diff = abs(score1 - score2)
|
|
508
|
+
if score_diff > 10 and similarity > 0.95:
|
|
509
|
+
confidence = self.HIGH_CONFIDENCE
|
|
510
|
+
elif score_diff > 5 and similarity > 0.93:
|
|
511
|
+
confidence = self.MEDIUM_CONFIDENCE
|
|
512
|
+
else:
|
|
513
|
+
confidence = self.LOW_CONFIDENCE
|
|
514
|
+
|
|
515
|
+
return {
|
|
516
|
+
"keep": keep_id,
|
|
517
|
+
"remove": remove_id,
|
|
518
|
+
"confidence": confidence,
|
|
519
|
+
"reason": f"Score {keep_id}={max(score1,score2):.1f} vs {remove_id}={min(score1,score2):.1f}"
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
def _cluster_duplicates(self, pairs: List[Dict]) -> List[Dict]:
|
|
523
|
+
"""Cluster duplicate pairs into groups."""
|
|
524
|
+
if not pairs:
|
|
525
|
+
return []
|
|
526
|
+
|
|
527
|
+
# Build union-find
|
|
528
|
+
parent = {}
|
|
529
|
+
|
|
530
|
+
def find(x):
|
|
531
|
+
if x not in parent:
|
|
532
|
+
parent[x] = x
|
|
533
|
+
if parent[x] != x:
|
|
534
|
+
parent[x] = find(parent[x])
|
|
535
|
+
return parent[x]
|
|
536
|
+
|
|
537
|
+
def union(x, y):
|
|
538
|
+
px, py = find(x), find(y)
|
|
539
|
+
if px != py:
|
|
540
|
+
parent[px] = py
|
|
541
|
+
|
|
542
|
+
# Union all pairs
|
|
543
|
+
for pair in pairs:
|
|
544
|
+
union(pair["memory1"]["id"], pair["memory2"]["id"])
|
|
545
|
+
|
|
546
|
+
# Group by root
|
|
547
|
+
clusters_map = defaultdict(list)
|
|
548
|
+
all_ids = set()
|
|
549
|
+
for pair in pairs:
|
|
550
|
+
all_ids.add(pair["memory1"]["id"])
|
|
551
|
+
all_ids.add(pair["memory2"]["id"])
|
|
552
|
+
|
|
553
|
+
for mem_id in all_ids:
|
|
554
|
+
root = find(mem_id)
|
|
555
|
+
clusters_map[root].append(mem_id)
|
|
556
|
+
|
|
557
|
+
# Build cluster objects
|
|
558
|
+
clusters = []
|
|
559
|
+
for root, members in clusters_map.items():
|
|
560
|
+
if len(members) > 1:
|
|
561
|
+
# Find the best candidate to keep
|
|
562
|
+
best_id = None
|
|
563
|
+
best_score = -1
|
|
564
|
+
for pair in pairs:
|
|
565
|
+
if pair["memory1"]["id"] in members:
|
|
566
|
+
rec = pair["merge_recommendation"]
|
|
567
|
+
if rec["keep"] in members and rec["confidence"] > best_score:
|
|
568
|
+
best_id = rec["keep"]
|
|
569
|
+
best_score = rec["confidence"]
|
|
570
|
+
|
|
571
|
+
clusters.append({
|
|
572
|
+
"member_ids": sorted(members),
|
|
573
|
+
"size": len(members),
|
|
574
|
+
"recommended_keep": best_id,
|
|
575
|
+
"merge_confidence": best_score
|
|
576
|
+
})
|
|
577
|
+
|
|
578
|
+
return sorted(clusters, key=lambda c: c["size"], reverse=True)
|
|
579
|
+
|
|
580
|
+
# ================================================================
|
|
581
|
+
# RELATIONSHIP INFERENCE
|
|
582
|
+
# ================================================================
|
|
583
|
+
|
|
584
|
+
async def suggest_relationships(
|
|
585
|
+
self,
|
|
586
|
+
memory_id: Optional[int] = None,
|
|
587
|
+
project_path: Optional[str] = None,
|
|
588
|
+
similarity_threshold: float = 0.7,
|
|
589
|
+
limit: int = 20
|
|
590
|
+
) -> Dict[str, Any]:
|
|
591
|
+
"""
|
|
592
|
+
Suggest missing relationships between memories.
|
|
593
|
+
|
|
594
|
+
Uses semantic similarity and content analysis to infer
|
|
595
|
+
relationships that should exist but don't.
|
|
596
|
+
|
|
597
|
+
Args:
|
|
598
|
+
memory_id: Optional specific memory to find links for
|
|
599
|
+
project_path: Optional project filter
|
|
600
|
+
similarity_threshold: Minimum similarity for suggestions
|
|
601
|
+
limit: Maximum suggestions to return
|
|
602
|
+
|
|
603
|
+
Returns:
|
|
604
|
+
Dict with suggested relationships
|
|
605
|
+
"""
|
|
606
|
+
cursor = self.db.conn.cursor()
|
|
607
|
+
|
|
608
|
+
suggestions = []
|
|
609
|
+
|
|
610
|
+
if memory_id:
|
|
611
|
+
# Find relationships for a specific memory
|
|
612
|
+
cursor.execute("""
|
|
613
|
+
SELECT id, content, type, embedding FROM memories WHERE id = ?
|
|
614
|
+
""", (memory_id,))
|
|
615
|
+
source = cursor.fetchone()
|
|
616
|
+
if not source or not source["embedding"]:
|
|
617
|
+
return {"suggestions": [], "error": "Memory not found or has no embedding"}
|
|
618
|
+
|
|
619
|
+
source_embedding = json.loads(source["embedding"])
|
|
620
|
+
|
|
621
|
+
# Get existing relationships
|
|
622
|
+
cursor.execute("""
|
|
623
|
+
SELECT target_id FROM memory_relationships WHERE source_id = ?
|
|
624
|
+
UNION
|
|
625
|
+
SELECT source_id FROM memory_relationships WHERE target_id = ?
|
|
626
|
+
""", (memory_id, memory_id))
|
|
627
|
+
existing = {row[0] for row in cursor.fetchall()}
|
|
628
|
+
existing.add(memory_id)
|
|
629
|
+
|
|
630
|
+
# Find similar unconnected memories
|
|
631
|
+
cursor.execute("""
|
|
632
|
+
SELECT id, content, type, embedding, importance
|
|
633
|
+
FROM memories
|
|
634
|
+
WHERE embedding IS NOT NULL AND id NOT IN ({})
|
|
635
|
+
LIMIT 200
|
|
636
|
+
""".format(','.join('?' * len(existing))), tuple(existing))
|
|
637
|
+
|
|
638
|
+
import numpy as np
|
|
639
|
+
source_vec = np.array(source_embedding)
|
|
640
|
+
|
|
641
|
+
for row in cursor.fetchall():
|
|
642
|
+
try:
|
|
643
|
+
target_vec = np.array(json.loads(row["embedding"]))
|
|
644
|
+
similarity = np.dot(source_vec, target_vec) / (
|
|
645
|
+
np.linalg.norm(source_vec) * np.linalg.norm(target_vec)
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
if similarity >= similarity_threshold:
|
|
649
|
+
rel_type = self._infer_relationship_type(
|
|
650
|
+
source["type"], source["content"],
|
|
651
|
+
row["type"], row["content"]
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
suggestions.append({
|
|
655
|
+
"source_id": memory_id,
|
|
656
|
+
"target_id": row["id"],
|
|
657
|
+
"relationship": rel_type,
|
|
658
|
+
"similarity": float(similarity),
|
|
659
|
+
"confidence": self._calculate_link_confidence(
|
|
660
|
+
similarity, source["type"], row["type"]
|
|
661
|
+
),
|
|
662
|
+
"source_preview": source["content"][:100],
|
|
663
|
+
"target_preview": row["content"][:100]
|
|
664
|
+
})
|
|
665
|
+
except Exception as e:
|
|
666
|
+
logger.debug(f"Error processing memory {row['id']}: {e}")
|
|
667
|
+
continue
|
|
668
|
+
else:
|
|
669
|
+
# Find suggestions across the project
|
|
670
|
+
if project_path:
|
|
671
|
+
from services.database import normalize_path
|
|
672
|
+
normalized = normalize_path(project_path)
|
|
673
|
+
cursor.execute("""
|
|
674
|
+
SELECT id, content, type, embedding, importance
|
|
675
|
+
FROM memories
|
|
676
|
+
WHERE embedding IS NOT NULL AND project_path = ?
|
|
677
|
+
ORDER BY importance DESC
|
|
678
|
+
LIMIT 100
|
|
679
|
+
""", (normalized,))
|
|
680
|
+
else:
|
|
681
|
+
cursor.execute("""
|
|
682
|
+
SELECT id, content, type, embedding, importance
|
|
683
|
+
FROM memories
|
|
684
|
+
WHERE embedding IS NOT NULL
|
|
685
|
+
ORDER BY importance DESC
|
|
686
|
+
LIMIT 100
|
|
687
|
+
""")
|
|
688
|
+
|
|
689
|
+
memories = cursor.fetchall()
|
|
690
|
+
|
|
691
|
+
# Get all existing relationships
|
|
692
|
+
cursor.execute("SELECT source_id, target_id FROM memory_relationships")
|
|
693
|
+
existing_pairs = {(row[0], row[1]) for row in cursor.fetchall()}
|
|
694
|
+
|
|
695
|
+
import numpy as np
|
|
696
|
+
|
|
697
|
+
# Check pairs for potential relationships
|
|
698
|
+
for i, mem1 in enumerate(memories):
|
|
699
|
+
if len(suggestions) >= limit:
|
|
700
|
+
break
|
|
701
|
+
|
|
702
|
+
for mem2 in memories[i+1:]:
|
|
703
|
+
if len(suggestions) >= limit:
|
|
704
|
+
break
|
|
705
|
+
|
|
706
|
+
pair = (min(mem1["id"], mem2["id"]), max(mem1["id"], mem2["id"]))
|
|
707
|
+
if pair in existing_pairs or (pair[1], pair[0]) in existing_pairs:
|
|
708
|
+
continue
|
|
709
|
+
|
|
710
|
+
try:
|
|
711
|
+
vec1 = np.array(json.loads(mem1["embedding"]))
|
|
712
|
+
vec2 = np.array(json.loads(mem2["embedding"]))
|
|
713
|
+
similarity = np.dot(vec1, vec2) / (
|
|
714
|
+
np.linalg.norm(vec1) * np.linalg.norm(vec2)
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
if similarity >= similarity_threshold:
|
|
718
|
+
rel_type = self._infer_relationship_type(
|
|
719
|
+
mem1["type"], mem1["content"],
|
|
720
|
+
mem2["type"], mem2["content"]
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
suggestions.append({
|
|
724
|
+
"source_id": mem1["id"],
|
|
725
|
+
"target_id": mem2["id"],
|
|
726
|
+
"relationship": rel_type,
|
|
727
|
+
"similarity": float(similarity),
|
|
728
|
+
"confidence": self._calculate_link_confidence(
|
|
729
|
+
similarity, mem1["type"], mem2["type"]
|
|
730
|
+
),
|
|
731
|
+
"source_preview": mem1["content"][:100],
|
|
732
|
+
"target_preview": mem2["content"][:100]
|
|
733
|
+
})
|
|
734
|
+
except Exception:
|
|
735
|
+
continue
|
|
736
|
+
|
|
737
|
+
# Sort by confidence
|
|
738
|
+
suggestions.sort(key=lambda x: x["confidence"], reverse=True)
|
|
739
|
+
suggestions = suggestions[:limit]
|
|
740
|
+
|
|
741
|
+
return {
|
|
742
|
+
"suggestions": suggestions,
|
|
743
|
+
"total_found": len(suggestions),
|
|
744
|
+
"auto_apply_candidates": [
|
|
745
|
+
s for s in suggestions
|
|
746
|
+
if s["confidence"] >= self.HIGH_CONFIDENCE
|
|
747
|
+
]
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
def _infer_relationship_type(
|
|
751
|
+
self,
|
|
752
|
+
type1: str, content1: str,
|
|
753
|
+
type2: str, content2: str
|
|
754
|
+
) -> str:
|
|
755
|
+
"""Infer the most likely relationship type between two memories."""
|
|
756
|
+
content1_lower = content1.lower()
|
|
757
|
+
content2_lower = content2.lower()
|
|
758
|
+
|
|
759
|
+
# Error + fix pattern
|
|
760
|
+
if type1 == "error" and type2 in ["code", "decision"]:
|
|
761
|
+
if any(w in content2_lower for w in ["fix", "solve", "resolve", "solution"]):
|
|
762
|
+
return "fixes"
|
|
763
|
+
if type2 == "error" and type1 in ["code", "decision"]:
|
|
764
|
+
if any(w in content1_lower for w in ["fix", "solve", "resolve", "solution"]):
|
|
765
|
+
return "fixes"
|
|
766
|
+
|
|
767
|
+
# Cause-effect pattern
|
|
768
|
+
if any(w in content1_lower for w in ["because", "caused", "led to", "resulted"]):
|
|
769
|
+
return "caused_by"
|
|
770
|
+
if any(w in content2_lower for w in ["because", "caused", "led to", "resulted"]):
|
|
771
|
+
return "caused_by"
|
|
772
|
+
|
|
773
|
+
# Contradiction pattern
|
|
774
|
+
if any(w in content1_lower for w in ["but", "however", "instead", "contrary"]):
|
|
775
|
+
return "contradicts"
|
|
776
|
+
if any(w in content2_lower for w in ["but", "however", "instead", "contrary"]):
|
|
777
|
+
return "contradicts"
|
|
778
|
+
|
|
779
|
+
# Support pattern
|
|
780
|
+
if type1 == type2 == "decision":
|
|
781
|
+
return "supports"
|
|
782
|
+
|
|
783
|
+
# Default to related
|
|
784
|
+
return "related"
|
|
785
|
+
|
|
786
|
+
def _calculate_link_confidence(
|
|
787
|
+
self,
|
|
788
|
+
similarity: float,
|
|
789
|
+
type1: str,
|
|
790
|
+
type2: str
|
|
791
|
+
) -> float:
|
|
792
|
+
"""Calculate confidence score for a suggested link."""
|
|
793
|
+
base = similarity
|
|
794
|
+
|
|
795
|
+
# Boost for complementary types
|
|
796
|
+
complementary = {
|
|
797
|
+
("error", "code"): 0.1,
|
|
798
|
+
("error", "decision"): 0.1,
|
|
799
|
+
("decision", "decision"): 0.05,
|
|
800
|
+
("code", "code"): 0.05,
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
pair = (type1, type2) if type1 <= type2 else (type2, type1)
|
|
804
|
+
boost = complementary.get(pair, 0)
|
|
805
|
+
|
|
806
|
+
return min(base + boost, 1.0)
|
|
807
|
+
|
|
808
|
+
# ================================================================
|
|
809
|
+
# QUALITY SCORING
|
|
810
|
+
# ================================================================
|
|
811
|
+
|
|
812
|
+
async def score_quality(
|
|
813
|
+
self,
|
|
814
|
+
memory_id: Optional[int] = None,
|
|
815
|
+
project_path: Optional[str] = None,
|
|
816
|
+
limit: int = 100
|
|
817
|
+
) -> Dict[str, Any]:
|
|
818
|
+
"""
|
|
819
|
+
Calculate quality scores for memories.
|
|
820
|
+
|
|
821
|
+
Quality = f(usage, connections, confidence, age_decay)
|
|
822
|
+
|
|
823
|
+
Args:
|
|
824
|
+
memory_id: Optional specific memory to score
|
|
825
|
+
project_path: Optional project filter
|
|
826
|
+
limit: Maximum memories to score
|
|
827
|
+
|
|
828
|
+
Returns:
|
|
829
|
+
Dict with quality scores and insights
|
|
830
|
+
"""
|
|
831
|
+
cursor = self.db.conn.cursor()
|
|
832
|
+
|
|
833
|
+
if memory_id:
|
|
834
|
+
cursor.execute("""
|
|
835
|
+
SELECT id, content, type, importance, confidence,
|
|
836
|
+
access_count, decay_factor, created_at
|
|
837
|
+
FROM memories WHERE id = ?
|
|
838
|
+
""", (memory_id,))
|
|
839
|
+
memories = cursor.fetchall()
|
|
840
|
+
elif project_path:
|
|
841
|
+
from services.database import normalize_path
|
|
842
|
+
normalized = normalize_path(project_path)
|
|
843
|
+
cursor.execute("""
|
|
844
|
+
SELECT id, content, type, importance, confidence,
|
|
845
|
+
access_count, decay_factor, created_at
|
|
846
|
+
FROM memories WHERE project_path = ?
|
|
847
|
+
ORDER BY created_at DESC
|
|
848
|
+
LIMIT ?
|
|
849
|
+
""", (normalized, limit))
|
|
850
|
+
memories = cursor.fetchall()
|
|
851
|
+
else:
|
|
852
|
+
cursor.execute("""
|
|
853
|
+
SELECT id, content, type, importance, confidence,
|
|
854
|
+
access_count, decay_factor, created_at
|
|
855
|
+
FROM memories
|
|
856
|
+
ORDER BY created_at DESC
|
|
857
|
+
LIMIT ?
|
|
858
|
+
""", (limit,))
|
|
859
|
+
memories = cursor.fetchall()
|
|
860
|
+
|
|
861
|
+
scores = []
|
|
862
|
+
for mem in memories:
|
|
863
|
+
# Get connection count
|
|
864
|
+
cursor.execute("""
|
|
865
|
+
SELECT COUNT(*) as count FROM memory_relationships
|
|
866
|
+
WHERE source_id = ? OR target_id = ?
|
|
867
|
+
""", (mem["id"], mem["id"]))
|
|
868
|
+
connections = cursor.fetchone()["count"]
|
|
869
|
+
|
|
870
|
+
# Calculate quality score
|
|
871
|
+
quality = self._calculate_quality_score(
|
|
872
|
+
importance=mem["importance"],
|
|
873
|
+
confidence=mem["confidence"],
|
|
874
|
+
access_count=mem["access_count"],
|
|
875
|
+
decay_factor=mem["decay_factor"],
|
|
876
|
+
connections=connections
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
scores.append({
|
|
880
|
+
"id": mem["id"],
|
|
881
|
+
"type": mem["type"],
|
|
882
|
+
"content_preview": mem["content"][:100],
|
|
883
|
+
"quality_score": quality,
|
|
884
|
+
"components": {
|
|
885
|
+
"importance": mem["importance"],
|
|
886
|
+
"confidence": mem["confidence"],
|
|
887
|
+
"usage": mem["access_count"],
|
|
888
|
+
"decay": mem["decay_factor"],
|
|
889
|
+
"connections": connections
|
|
890
|
+
},
|
|
891
|
+
"needs_attention": quality < 0.3,
|
|
892
|
+
"is_high_quality": quality > 0.7
|
|
893
|
+
})
|
|
894
|
+
|
|
895
|
+
scores.sort(key=lambda x: x["quality_score"], reverse=True)
|
|
896
|
+
|
|
897
|
+
# Generate insights
|
|
898
|
+
low_quality = [s for s in scores if s["quality_score"] < 0.3]
|
|
899
|
+
high_quality = [s for s in scores if s["quality_score"] > 0.7]
|
|
900
|
+
avg_quality = sum(s["quality_score"] for s in scores) / len(scores) if scores else 0
|
|
901
|
+
|
|
902
|
+
return {
|
|
903
|
+
"scores": scores,
|
|
904
|
+
"summary": {
|
|
905
|
+
"total_scored": len(scores),
|
|
906
|
+
"average_quality": round(avg_quality, 3),
|
|
907
|
+
"high_quality_count": len(high_quality),
|
|
908
|
+
"needs_attention_count": len(low_quality)
|
|
909
|
+
},
|
|
910
|
+
"needs_attention": low_quality[:10],
|
|
911
|
+
"top_quality": high_quality[:10]
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
def _calculate_quality_score(
|
|
915
|
+
self,
|
|
916
|
+
importance: int,
|
|
917
|
+
confidence: float,
|
|
918
|
+
access_count: int,
|
|
919
|
+
decay_factor: float,
|
|
920
|
+
connections: int
|
|
921
|
+
) -> float:
|
|
922
|
+
"""Calculate overall quality score (0-1)."""
|
|
923
|
+
# Normalize components
|
|
924
|
+
importance_norm = (importance or 5) / 10 # 0-1
|
|
925
|
+
confidence_norm = confidence or 0.5 # Already 0-1
|
|
926
|
+
usage_norm = min((access_count or 0) / 20, 1) # Cap at 20 uses
|
|
927
|
+
decay_norm = decay_factor or 1.0 # Already 0-1
|
|
928
|
+
connection_norm = min(connections / 10, 1) # Cap at 10 connections
|
|
929
|
+
|
|
930
|
+
# Weighted average
|
|
931
|
+
weights = {
|
|
932
|
+
"importance": 0.25,
|
|
933
|
+
"confidence": 0.25,
|
|
934
|
+
"usage": 0.15,
|
|
935
|
+
"decay": 0.15,
|
|
936
|
+
"connections": 0.20
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
score = (
|
|
940
|
+
importance_norm * weights["importance"] +
|
|
941
|
+
confidence_norm * weights["confidence"] +
|
|
942
|
+
usage_norm * weights["usage"] +
|
|
943
|
+
decay_norm * weights["decay"] +
|
|
944
|
+
connection_norm * weights["connections"]
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
return round(score, 3)
|
|
948
|
+
|
|
949
|
+
# ================================================================
|
|
950
|
+
# ORPHAN DETECTION
|
|
951
|
+
# ================================================================
|
|
952
|
+
|
|
953
|
+
async def find_orphan_memories(
|
|
954
|
+
self,
|
|
955
|
+
project_path: Optional[str] = None,
|
|
956
|
+
limit: int = 50
|
|
957
|
+
) -> List[Dict]:
|
|
958
|
+
"""Find memories with no relationships."""
|
|
959
|
+
cursor = self.db.conn.cursor()
|
|
960
|
+
|
|
961
|
+
if project_path:
|
|
962
|
+
from services.database import normalize_path
|
|
963
|
+
normalized = normalize_path(project_path)
|
|
964
|
+
cursor.execute("""
|
|
965
|
+
SELECT m.id, m.content, m.type, m.importance, m.confidence, m.created_at
|
|
966
|
+
FROM memories m
|
|
967
|
+
LEFT JOIN memory_relationships mr1 ON m.id = mr1.source_id
|
|
968
|
+
LEFT JOIN memory_relationships mr2 ON m.id = mr2.target_id
|
|
969
|
+
WHERE mr1.id IS NULL AND mr2.id IS NULL AND m.project_path = ?
|
|
970
|
+
ORDER BY m.importance DESC, m.created_at DESC
|
|
971
|
+
LIMIT ?
|
|
972
|
+
""", (normalized, limit))
|
|
973
|
+
else:
|
|
974
|
+
cursor.execute("""
|
|
975
|
+
SELECT m.id, m.content, m.type, m.importance, m.confidence, m.created_at
|
|
976
|
+
FROM memories m
|
|
977
|
+
LEFT JOIN memory_relationships mr1 ON m.id = mr1.source_id
|
|
978
|
+
LEFT JOIN memory_relationships mr2 ON m.id = mr2.target_id
|
|
979
|
+
WHERE mr1.id IS NULL AND mr2.id IS NULL
|
|
980
|
+
ORDER BY m.importance DESC, m.created_at DESC
|
|
981
|
+
LIMIT ?
|
|
982
|
+
""", (limit,))
|
|
983
|
+
|
|
984
|
+
orphans = []
|
|
985
|
+
for row in cursor.fetchall():
|
|
986
|
+
orphans.append({
|
|
987
|
+
"id": row["id"],
|
|
988
|
+
"content": row["content"][:150],
|
|
989
|
+
"type": row["type"],
|
|
990
|
+
"importance": row["importance"],
|
|
991
|
+
"confidence": row["confidence"],
|
|
992
|
+
"created_at": row["created_at"]
|
|
993
|
+
})
|
|
994
|
+
|
|
995
|
+
return orphans
|
|
996
|
+
|
|
997
|
+
# ================================================================
|
|
998
|
+
# CURATED CONTEXT GENERATION
|
|
999
|
+
# ================================================================
|
|
1000
|
+
|
|
1001
|
+
async def generate_summary(
|
|
1002
|
+
self,
|
|
1003
|
+
query: str,
|
|
1004
|
+
project_path: Optional[str] = None,
|
|
1005
|
+
max_memories: int = 10,
|
|
1006
|
+
include_graph: bool = True
|
|
1007
|
+
) -> Dict[str, Any]:
|
|
1008
|
+
"""
|
|
1009
|
+
Generate curated context summary for a query.
|
|
1010
|
+
|
|
1011
|
+
This is what gets injected into the main Claude's context
|
|
1012
|
+
via the grounding hook.
|
|
1013
|
+
|
|
1014
|
+
Args:
|
|
1015
|
+
query: The topic/query to generate context for
|
|
1016
|
+
project_path: Optional project filter
|
|
1017
|
+
max_memories: Maximum memories to include
|
|
1018
|
+
include_graph: Include relationship graph context
|
|
1019
|
+
|
|
1020
|
+
Returns:
|
|
1021
|
+
Dict with curated context summary
|
|
1022
|
+
"""
|
|
1023
|
+
# Search for relevant memories
|
|
1024
|
+
from skills.search import semantic_search
|
|
1025
|
+
results = await semantic_search(
|
|
1026
|
+
db=self.db,
|
|
1027
|
+
embeddings=self.embeddings,
|
|
1028
|
+
query=query,
|
|
1029
|
+
limit=max_memories,
|
|
1030
|
+
project_path=project_path,
|
|
1031
|
+
threshold=0.5
|
|
1032
|
+
)
|
|
1033
|
+
|
|
1034
|
+
memories = results.get("results", [])
|
|
1035
|
+
|
|
1036
|
+
if not memories:
|
|
1037
|
+
return {
|
|
1038
|
+
"query": query,
|
|
1039
|
+
"context": "No relevant memories found.",
|
|
1040
|
+
"memories": [],
|
|
1041
|
+
"graph_context": None
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
# Build context sections
|
|
1045
|
+
sections = []
|
|
1046
|
+
|
|
1047
|
+
# Group by type
|
|
1048
|
+
by_type = defaultdict(list)
|
|
1049
|
+
for mem in memories:
|
|
1050
|
+
by_type[mem.get("type", "chunk")].append(mem)
|
|
1051
|
+
|
|
1052
|
+
# Decisions first (most important for context)
|
|
1053
|
+
if by_type.get("decision"):
|
|
1054
|
+
sections.append("**Key Decisions:**")
|
|
1055
|
+
for mem in by_type["decision"][:3]:
|
|
1056
|
+
sections.append(f"- {mem['content'][:200]}")
|
|
1057
|
+
|
|
1058
|
+
# Errors and fixes
|
|
1059
|
+
if by_type.get("error"):
|
|
1060
|
+
sections.append("\n**Known Issues:**")
|
|
1061
|
+
for mem in by_type["error"][:3]:
|
|
1062
|
+
sections.append(f"- {mem['content'][:200]}")
|
|
1063
|
+
|
|
1064
|
+
# Code patterns
|
|
1065
|
+
if by_type.get("code"):
|
|
1066
|
+
sections.append("\n**Code Patterns:**")
|
|
1067
|
+
for mem in by_type["code"][:3]:
|
|
1068
|
+
sections.append(f"- {mem['content'][:200]}")
|
|
1069
|
+
|
|
1070
|
+
# Other relevant
|
|
1071
|
+
other = [m for t, mems in by_type.items()
|
|
1072
|
+
for m in mems if t not in ["decision", "error", "code"]]
|
|
1073
|
+
if other:
|
|
1074
|
+
sections.append("\n**Related Context:**")
|
|
1075
|
+
for mem in other[:3]:
|
|
1076
|
+
sections.append(f"- {mem['content'][:200]}")
|
|
1077
|
+
|
|
1078
|
+
# Build graph context if requested
|
|
1079
|
+
graph_context = None
|
|
1080
|
+
if include_graph and memories:
|
|
1081
|
+
graph_context = await self._build_graph_context(memories)
|
|
1082
|
+
|
|
1083
|
+
# Check for pending curator items
|
|
1084
|
+
pending = await self._get_pending_reviews(project_path)
|
|
1085
|
+
|
|
1086
|
+
return {
|
|
1087
|
+
"query": query,
|
|
1088
|
+
"context": "\n".join(sections),
|
|
1089
|
+
"memories": [
|
|
1090
|
+
{"id": m["id"], "type": m.get("type"), "relevance": m.get("relevance", 0)}
|
|
1091
|
+
for m in memories
|
|
1092
|
+
],
|
|
1093
|
+
"graph_context": graph_context,
|
|
1094
|
+
"pending_reviews": pending,
|
|
1095
|
+
"generated_at": datetime.now().isoformat()
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
async def _build_graph_context(self, memories: List[Dict]) -> Dict[str, Any]:
|
|
1099
|
+
"""Build graph relationship context for memories."""
|
|
1100
|
+
memory_ids = [m["id"] for m in memories if m.get("id")]
|
|
1101
|
+
if not memory_ids:
|
|
1102
|
+
return None
|
|
1103
|
+
|
|
1104
|
+
cursor = self.db.conn.cursor()
|
|
1105
|
+
|
|
1106
|
+
# Get relationships between these memories
|
|
1107
|
+
placeholders = ','.join('?' * len(memory_ids))
|
|
1108
|
+
cursor.execute(f"""
|
|
1109
|
+
SELECT source_id, target_id, relationship, strength
|
|
1110
|
+
FROM memory_relationships
|
|
1111
|
+
WHERE source_id IN ({placeholders}) OR target_id IN ({placeholders})
|
|
1112
|
+
""", memory_ids + memory_ids)
|
|
1113
|
+
|
|
1114
|
+
edges = []
|
|
1115
|
+
for row in cursor.fetchall():
|
|
1116
|
+
edges.append({
|
|
1117
|
+
"source": row["source_id"],
|
|
1118
|
+
"target": row["target_id"],
|
|
1119
|
+
"type": row["relationship"],
|
|
1120
|
+
"strength": row["strength"]
|
|
1121
|
+
})
|
|
1122
|
+
|
|
1123
|
+
# Format as readable context
|
|
1124
|
+
if not edges:
|
|
1125
|
+
return {"edges": [], "summary": "No relationships between these memories"}
|
|
1126
|
+
|
|
1127
|
+
relationship_summary = []
|
|
1128
|
+
for edge in edges[:10]:
|
|
1129
|
+
relationship_summary.append(
|
|
1130
|
+
f"Memory #{edge['source']} {edge['type']} Memory #{edge['target']}"
|
|
1131
|
+
)
|
|
1132
|
+
|
|
1133
|
+
return {
|
|
1134
|
+
"edges": edges,
|
|
1135
|
+
"summary": "; ".join(relationship_summary),
|
|
1136
|
+
"edge_count": len(edges)
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
async def _get_pending_reviews(self, project_path: Optional[str] = None) -> Dict[str, Any]:
|
|
1140
|
+
"""Get pending curator review items."""
|
|
1141
|
+
# Check for duplicates
|
|
1142
|
+
duplicates = await self.find_duplicates(
|
|
1143
|
+
project_path=project_path,
|
|
1144
|
+
similarity_threshold=0.92,
|
|
1145
|
+
limit=5
|
|
1146
|
+
)
|
|
1147
|
+
|
|
1148
|
+
# Check for suggested links
|
|
1149
|
+
suggestions = await self.suggest_relationships(
|
|
1150
|
+
project_path=project_path,
|
|
1151
|
+
similarity_threshold=0.8,
|
|
1152
|
+
limit=5
|
|
1153
|
+
)
|
|
1154
|
+
|
|
1155
|
+
# Check for orphans
|
|
1156
|
+
orphans = await self.find_orphan_memories(
|
|
1157
|
+
project_path=project_path,
|
|
1158
|
+
limit=5
|
|
1159
|
+
)
|
|
1160
|
+
|
|
1161
|
+
return {
|
|
1162
|
+
"duplicate_clusters": len(duplicates.get("duplicate_clusters", [])),
|
|
1163
|
+
"suggested_links": len(suggestions.get("suggestions", [])),
|
|
1164
|
+
"orphan_memories": len(orphans),
|
|
1165
|
+
"total_pending": (
|
|
1166
|
+
len(duplicates.get("duplicate_clusters", [])) +
|
|
1167
|
+
len(suggestions.get("suggestions", [])) +
|
|
1168
|
+
len(orphans)
|
|
1169
|
+
)
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
# ================================================================
|
|
1173
|
+
# MERGE OPERATIONS
|
|
1174
|
+
# ================================================================
|
|
1175
|
+
|
|
1176
|
+
async def merge_memories(
|
|
1177
|
+
self,
|
|
1178
|
+
keep_id: int,
|
|
1179
|
+
remove_ids: List[int],
|
|
1180
|
+
merge_content: bool = False
|
|
1181
|
+
) -> Dict[str, Any]:
|
|
1182
|
+
"""
|
|
1183
|
+
Merge duplicate memories into one.
|
|
1184
|
+
|
|
1185
|
+
Args:
|
|
1186
|
+
keep_id: Memory ID to keep
|
|
1187
|
+
remove_ids: Memory IDs to merge into keep_id
|
|
1188
|
+
merge_content: If True, append removed content to kept memory
|
|
1189
|
+
|
|
1190
|
+
Returns:
|
|
1191
|
+
Dict with merge result
|
|
1192
|
+
"""
|
|
1193
|
+
cursor = self.db.conn.cursor()
|
|
1194
|
+
|
|
1195
|
+
# Verify keep memory exists
|
|
1196
|
+
cursor.execute("SELECT * FROM memories WHERE id = ?", (keep_id,))
|
|
1197
|
+
keep_memory = cursor.fetchone()
|
|
1198
|
+
if not keep_memory:
|
|
1199
|
+
return {"error": f"Memory {keep_id} not found"}
|
|
1200
|
+
|
|
1201
|
+
merged_count = 0
|
|
1202
|
+
merged_relationships = 0
|
|
1203
|
+
|
|
1204
|
+
for remove_id in remove_ids:
|
|
1205
|
+
if remove_id == keep_id:
|
|
1206
|
+
continue
|
|
1207
|
+
|
|
1208
|
+
cursor.execute("SELECT * FROM memories WHERE id = ?", (remove_id,))
|
|
1209
|
+
remove_memory = cursor.fetchone()
|
|
1210
|
+
if not remove_memory:
|
|
1211
|
+
continue
|
|
1212
|
+
|
|
1213
|
+
# Transfer relationships
|
|
1214
|
+
# Update outgoing relationships
|
|
1215
|
+
cursor.execute("""
|
|
1216
|
+
UPDATE OR IGNORE memory_relationships
|
|
1217
|
+
SET source_id = ?
|
|
1218
|
+
WHERE source_id = ?
|
|
1219
|
+
""", (keep_id, remove_id))
|
|
1220
|
+
merged_relationships += cursor.rowcount
|
|
1221
|
+
|
|
1222
|
+
# Update incoming relationships
|
|
1223
|
+
cursor.execute("""
|
|
1224
|
+
UPDATE OR IGNORE memory_relationships
|
|
1225
|
+
SET target_id = ?
|
|
1226
|
+
WHERE target_id = ?
|
|
1227
|
+
""", (keep_id, remove_id))
|
|
1228
|
+
merged_relationships += cursor.rowcount
|
|
1229
|
+
|
|
1230
|
+
# Delete duplicate relationships
|
|
1231
|
+
cursor.execute("""
|
|
1232
|
+
DELETE FROM memory_relationships
|
|
1233
|
+
WHERE source_id = ? OR target_id = ?
|
|
1234
|
+
""", (remove_id, remove_id))
|
|
1235
|
+
|
|
1236
|
+
# Optionally merge content
|
|
1237
|
+
if merge_content:
|
|
1238
|
+
cursor.execute("""
|
|
1239
|
+
UPDATE memories
|
|
1240
|
+
SET content = content || '\n\n[Merged from #' || ? || ']: ' || ?
|
|
1241
|
+
WHERE id = ?
|
|
1242
|
+
""", (remove_id, remove_memory["content"], keep_id))
|
|
1243
|
+
|
|
1244
|
+
# Archive the removed memory
|
|
1245
|
+
cursor.execute("""
|
|
1246
|
+
INSERT INTO memory_archive
|
|
1247
|
+
(original_id, type, content, embedding, project_path, session_id,
|
|
1248
|
+
importance, access_count, decay_factor, metadata, archive_reason)
|
|
1249
|
+
SELECT id, type, content, embedding, project_path, session_id,
|
|
1250
|
+
importance, access_count, decay_factor, metadata, 'merged'
|
|
1251
|
+
FROM memories WHERE id = ?
|
|
1252
|
+
""", (remove_id,))
|
|
1253
|
+
|
|
1254
|
+
# Delete the memory
|
|
1255
|
+
cursor.execute("DELETE FROM memories WHERE id = ?", (remove_id,))
|
|
1256
|
+
merged_count += 1
|
|
1257
|
+
|
|
1258
|
+
self.db.conn.commit()
|
|
1259
|
+
|
|
1260
|
+
# Update importance if we merged several
|
|
1261
|
+
if merged_count > 0:
|
|
1262
|
+
new_importance = min(keep_memory["importance"] + merged_count, 10)
|
|
1263
|
+
cursor.execute("""
|
|
1264
|
+
UPDATE memories SET importance = ? WHERE id = ?
|
|
1265
|
+
""", (new_importance, keep_id))
|
|
1266
|
+
self.db.conn.commit()
|
|
1267
|
+
|
|
1268
|
+
return {
|
|
1269
|
+
"success": True,
|
|
1270
|
+
"kept_id": keep_id,
|
|
1271
|
+
"merged_count": merged_count,
|
|
1272
|
+
"relationships_transferred": merged_relationships,
|
|
1273
|
+
"new_importance": min(keep_memory["importance"] + merged_count, 10)
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
# ================================================================
|
|
1277
|
+
# MAINTENANCE TASKS
|
|
1278
|
+
# ================================================================
|
|
1279
|
+
|
|
1280
|
+
async def run_maintenance(
|
|
1281
|
+
self,
|
|
1282
|
+
project_path: Optional[str] = None,
|
|
1283
|
+
tasks: Optional[List[str]] = None
|
|
1284
|
+
) -> Dict[str, Any]:
|
|
1285
|
+
"""
|
|
1286
|
+
Run curator maintenance tasks.
|
|
1287
|
+
|
|
1288
|
+
Args:
|
|
1289
|
+
project_path: Optional project filter
|
|
1290
|
+
tasks: Specific tasks to run, or None for all
|
|
1291
|
+
Options: dedup, orphans, links, decay, quality
|
|
1292
|
+
|
|
1293
|
+
Returns:
|
|
1294
|
+
Dict with maintenance report
|
|
1295
|
+
"""
|
|
1296
|
+
all_tasks = ["dedup", "orphans", "links", "decay", "quality"]
|
|
1297
|
+
tasks_to_run = tasks or all_tasks
|
|
1298
|
+
|
|
1299
|
+
report = {
|
|
1300
|
+
"started_at": datetime.now().isoformat(),
|
|
1301
|
+
"project_path": project_path,
|
|
1302
|
+
"tasks_run": [],
|
|
1303
|
+
"findings": {},
|
|
1304
|
+
"actions_taken": {},
|
|
1305
|
+
"recommendations": []
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
# Get config
|
|
1309
|
+
config = await self.get_config(project_path)
|
|
1310
|
+
|
|
1311
|
+
if "dedup" in tasks_to_run and config.get("auto_dedup_enabled", True):
|
|
1312
|
+
duplicates = await self.find_duplicates(
|
|
1313
|
+
project_path=project_path,
|
|
1314
|
+
similarity_threshold=config.get("dedup_threshold", 0.92)
|
|
1315
|
+
)
|
|
1316
|
+
report["findings"]["duplicates"] = duplicates.get("duplicates_found", 0)
|
|
1317
|
+
report["tasks_run"].append("dedup")
|
|
1318
|
+
|
|
1319
|
+
# Auto-merge high-confidence duplicates
|
|
1320
|
+
auto_merge = duplicates.get("auto_merge_candidates", [])
|
|
1321
|
+
if auto_merge:
|
|
1322
|
+
for pair in auto_merge[:5]: # Limit auto-merges
|
|
1323
|
+
rec = pair["merge_recommendation"]
|
|
1324
|
+
await self.merge_memories(
|
|
1325
|
+
keep_id=rec["keep"],
|
|
1326
|
+
remove_ids=[rec["remove"]]
|
|
1327
|
+
)
|
|
1328
|
+
report["actions_taken"]["auto_merged"] = len(auto_merge[:5])
|
|
1329
|
+
|
|
1330
|
+
if "orphans" in tasks_to_run:
|
|
1331
|
+
orphans = await self.find_orphan_memories(project_path=project_path)
|
|
1332
|
+
report["findings"]["orphans"] = len(orphans)
|
|
1333
|
+
report["tasks_run"].append("orphans")
|
|
1334
|
+
|
|
1335
|
+
if orphans:
|
|
1336
|
+
report["recommendations"].append(
|
|
1337
|
+
f"Found {len(orphans)} orphan memories - consider linking or archiving"
|
|
1338
|
+
)
|
|
1339
|
+
|
|
1340
|
+
if "links" in tasks_to_run and config.get("auto_link_enabled", True):
|
|
1341
|
+
suggestions = await self.suggest_relationships(
|
|
1342
|
+
project_path=project_path,
|
|
1343
|
+
similarity_threshold=0.75
|
|
1344
|
+
)
|
|
1345
|
+
report["findings"]["suggested_links"] = len(suggestions.get("suggestions", []))
|
|
1346
|
+
report["tasks_run"].append("links")
|
|
1347
|
+
|
|
1348
|
+
# Auto-apply high-confidence links
|
|
1349
|
+
auto_links = suggestions.get("auto_apply_candidates", [])
|
|
1350
|
+
if auto_links:
|
|
1351
|
+
for link in auto_links[:10]:
|
|
1352
|
+
await self.db.create_relationship(
|
|
1353
|
+
source_id=link["source_id"],
|
|
1354
|
+
target_id=link["target_id"],
|
|
1355
|
+
relationship=link["relationship"],
|
|
1356
|
+
strength=link["similarity"]
|
|
1357
|
+
)
|
|
1358
|
+
report["actions_taken"]["auto_linked"] = len(auto_links[:10])
|
|
1359
|
+
|
|
1360
|
+
if "quality" in tasks_to_run:
|
|
1361
|
+
quality = await self.score_quality(project_path=project_path)
|
|
1362
|
+
report["findings"]["quality_summary"] = quality.get("summary", {})
|
|
1363
|
+
report["tasks_run"].append("quality")
|
|
1364
|
+
|
|
1365
|
+
needs_attention = quality.get("needs_attention", [])
|
|
1366
|
+
if needs_attention:
|
|
1367
|
+
report["recommendations"].append(
|
|
1368
|
+
f"{len(needs_attention)} memories need attention (low quality score)"
|
|
1369
|
+
)
|
|
1370
|
+
|
|
1371
|
+
if "decay" in tasks_to_run:
|
|
1372
|
+
# Apply confidence decay to unused memories
|
|
1373
|
+
decayed = await self._apply_confidence_decay(project_path)
|
|
1374
|
+
report["actions_taken"]["memories_decayed"] = decayed
|
|
1375
|
+
report["tasks_run"].append("decay")
|
|
1376
|
+
|
|
1377
|
+
report["completed_at"] = datetime.now().isoformat()
|
|
1378
|
+
|
|
1379
|
+
# Save report
|
|
1380
|
+
await self._save_report(report, project_path)
|
|
1381
|
+
|
|
1382
|
+
return report
|
|
1383
|
+
|
|
1384
|
+
async def _apply_confidence_decay(
|
|
1385
|
+
self,
|
|
1386
|
+
project_path: Optional[str] = None,
|
|
1387
|
+
decay_rate: float = 0.95
|
|
1388
|
+
) -> int:
|
|
1389
|
+
"""Apply decay to memories not accessed recently."""
|
|
1390
|
+
cursor = self.db.conn.cursor()
|
|
1391
|
+
|
|
1392
|
+
# Decay memories not accessed in the last 30 days
|
|
1393
|
+
cutoff = (datetime.now() - timedelta(days=30)).isoformat()
|
|
1394
|
+
|
|
1395
|
+
if project_path:
|
|
1396
|
+
from services.database import normalize_path
|
|
1397
|
+
normalized = normalize_path(project_path)
|
|
1398
|
+
cursor.execute("""
|
|
1399
|
+
UPDATE memories
|
|
1400
|
+
SET decay_factor = decay_factor * ?,
|
|
1401
|
+
confidence = confidence * ?
|
|
1402
|
+
WHERE (last_accessed IS NULL OR last_accessed < ?)
|
|
1403
|
+
AND project_path = ?
|
|
1404
|
+
AND decay_factor > 0.1
|
|
1405
|
+
""", (decay_rate, decay_rate, cutoff, normalized))
|
|
1406
|
+
else:
|
|
1407
|
+
cursor.execute("""
|
|
1408
|
+
UPDATE memories
|
|
1409
|
+
SET decay_factor = decay_factor * ?,
|
|
1410
|
+
confidence = confidence * ?
|
|
1411
|
+
WHERE (last_accessed IS NULL OR last_accessed < ?)
|
|
1412
|
+
AND decay_factor > 0.1
|
|
1413
|
+
""", (decay_rate, decay_rate, cutoff))
|
|
1414
|
+
|
|
1415
|
+
decayed = cursor.rowcount
|
|
1416
|
+
self.db.conn.commit()
|
|
1417
|
+
return decayed
|
|
1418
|
+
|
|
1419
|
+
async def _save_report(self, report: Dict, project_path: Optional[str] = None):
|
|
1420
|
+
"""Save maintenance report to database."""
|
|
1421
|
+
cursor = self.db.conn.cursor()
|
|
1422
|
+
|
|
1423
|
+
from services.database import normalize_path
|
|
1424
|
+
normalized = normalize_path(project_path) if project_path else None
|
|
1425
|
+
|
|
1426
|
+
cursor.execute("""
|
|
1427
|
+
INSERT INTO curator_reports
|
|
1428
|
+
(project_path, report_type, summary, findings, actions_taken, recommendations)
|
|
1429
|
+
VALUES (?, 'maintenance', ?, ?, ?, ?)
|
|
1430
|
+
""", (
|
|
1431
|
+
normalized,
|
|
1432
|
+
f"Ran tasks: {', '.join(report.get('tasks_run', []))}",
|
|
1433
|
+
json.dumps(report.get("findings", {})),
|
|
1434
|
+
json.dumps(report.get("actions_taken", {})),
|
|
1435
|
+
json.dumps(report.get("recommendations", []))
|
|
1436
|
+
))
|
|
1437
|
+
self.db.conn.commit()
|
|
1438
|
+
|
|
1439
|
+
# ================================================================
|
|
1440
|
+
# CONFIGURATION
|
|
1441
|
+
# ================================================================
|
|
1442
|
+
|
|
1443
|
+
async def get_config(self, project_path: Optional[str] = None) -> Dict[str, Any]:
|
|
1444
|
+
"""Get curator configuration for a project."""
|
|
1445
|
+
cursor = self.db.conn.cursor()
|
|
1446
|
+
|
|
1447
|
+
if project_path:
|
|
1448
|
+
from services.database import normalize_path
|
|
1449
|
+
normalized = normalize_path(project_path)
|
|
1450
|
+
cursor.execute("""
|
|
1451
|
+
SELECT * FROM curator_config WHERE project_path = ?
|
|
1452
|
+
""", (normalized,))
|
|
1453
|
+
row = cursor.fetchone()
|
|
1454
|
+
if row:
|
|
1455
|
+
return dict(row)
|
|
1456
|
+
|
|
1457
|
+
return self.DEFAULT_CONFIG.copy()
|
|
1458
|
+
|
|
1459
|
+
async def update_config(
|
|
1460
|
+
self,
|
|
1461
|
+
project_path: str,
|
|
1462
|
+
**config_updates
|
|
1463
|
+
) -> Dict[str, Any]:
|
|
1464
|
+
"""Update curator configuration for a project."""
|
|
1465
|
+
cursor = self.db.conn.cursor()
|
|
1466
|
+
|
|
1467
|
+
from services.database import normalize_path
|
|
1468
|
+
normalized = normalize_path(project_path)
|
|
1469
|
+
|
|
1470
|
+
# Get existing or default
|
|
1471
|
+
existing = await self.get_config(project_path)
|
|
1472
|
+
existing.update(config_updates)
|
|
1473
|
+
|
|
1474
|
+
cursor.execute("""
|
|
1475
|
+
INSERT INTO curator_config
|
|
1476
|
+
(project_path, auto_dedup_enabled, auto_link_enabled, dedup_threshold,
|
|
1477
|
+
maintenance_interval_hours, curator_active)
|
|
1478
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
1479
|
+
ON CONFLICT(project_path) DO UPDATE SET
|
|
1480
|
+
auto_dedup_enabled = excluded.auto_dedup_enabled,
|
|
1481
|
+
auto_link_enabled = excluded.auto_link_enabled,
|
|
1482
|
+
dedup_threshold = excluded.dedup_threshold,
|
|
1483
|
+
maintenance_interval_hours = excluded.maintenance_interval_hours,
|
|
1484
|
+
curator_active = excluded.curator_active
|
|
1485
|
+
""", (
|
|
1486
|
+
normalized,
|
|
1487
|
+
existing.get("auto_dedup_enabled", True),
|
|
1488
|
+
existing.get("auto_link_enabled", True),
|
|
1489
|
+
existing.get("dedup_threshold", 0.92),
|
|
1490
|
+
existing.get("maintenance_interval_hours", 24),
|
|
1491
|
+
existing.get("curator_active", True)
|
|
1492
|
+
))
|
|
1493
|
+
self.db.conn.commit()
|
|
1494
|
+
|
|
1495
|
+
return existing
|
|
1496
|
+
|
|
1497
|
+
async def get_latest_report(
|
|
1498
|
+
self,
|
|
1499
|
+
project_path: Optional[str] = None
|
|
1500
|
+
) -> Optional[Dict[str, Any]]:
|
|
1501
|
+
"""Get the latest curator report."""
|
|
1502
|
+
cursor = self.db.conn.cursor()
|
|
1503
|
+
|
|
1504
|
+
if project_path:
|
|
1505
|
+
from services.database import normalize_path
|
|
1506
|
+
normalized = normalize_path(project_path)
|
|
1507
|
+
cursor.execute("""
|
|
1508
|
+
SELECT * FROM curator_reports
|
|
1509
|
+
WHERE project_path = ?
|
|
1510
|
+
ORDER BY created_at DESC
|
|
1511
|
+
LIMIT 1
|
|
1512
|
+
""", (normalized,))
|
|
1513
|
+
else:
|
|
1514
|
+
cursor.execute("""
|
|
1515
|
+
SELECT * FROM curator_reports
|
|
1516
|
+
ORDER BY created_at DESC
|
|
1517
|
+
LIMIT 1
|
|
1518
|
+
""")
|
|
1519
|
+
|
|
1520
|
+
row = cursor.fetchone()
|
|
1521
|
+
if not row:
|
|
1522
|
+
return None
|
|
1523
|
+
|
|
1524
|
+
return {
|
|
1525
|
+
"id": row["id"],
|
|
1526
|
+
"project_path": row["project_path"],
|
|
1527
|
+
"report_type": row["report_type"],
|
|
1528
|
+
"created_at": row["created_at"],
|
|
1529
|
+
"summary": row["summary"],
|
|
1530
|
+
"findings": json.loads(row["findings"]) if row["findings"] else {},
|
|
1531
|
+
"actions_taken": json.loads(row["actions_taken"]) if row["actions_taken"] else {},
|
|
1532
|
+
"recommendations": json.loads(row["recommendations"]) if row["recommendations"] else []
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1535
|
+
async def get_status(self) -> Dict[str, Any]:
|
|
1536
|
+
"""Get current curator agent status."""
|
|
1537
|
+
cursor = self.db.conn.cursor()
|
|
1538
|
+
|
|
1539
|
+
# Get total memories
|
|
1540
|
+
cursor.execute("SELECT COUNT(*) as total FROM memories")
|
|
1541
|
+
total_memories = cursor.fetchone()["total"]
|
|
1542
|
+
|
|
1543
|
+
# Get total relationships
|
|
1544
|
+
cursor.execute("SELECT COUNT(*) as total FROM memory_relationships")
|
|
1545
|
+
total_relationships = cursor.fetchone()["total"]
|
|
1546
|
+
|
|
1547
|
+
# Get orphan count
|
|
1548
|
+
cursor.execute("""
|
|
1549
|
+
SELECT COUNT(*) as count FROM memories m
|
|
1550
|
+
LEFT JOIN memory_relationships mr1 ON m.id = mr1.source_id
|
|
1551
|
+
LEFT JOIN memory_relationships mr2 ON m.id = mr2.target_id
|
|
1552
|
+
WHERE mr1.id IS NULL AND mr2.id IS NULL
|
|
1553
|
+
""")
|
|
1554
|
+
orphan_count = cursor.fetchone()["count"]
|
|
1555
|
+
|
|
1556
|
+
# Get latest report
|
|
1557
|
+
latest_report = await self.get_latest_report()
|
|
1558
|
+
|
|
1559
|
+
return {
|
|
1560
|
+
"active": True,
|
|
1561
|
+
"total_memories": total_memories,
|
|
1562
|
+
"total_relationships": total_relationships,
|
|
1563
|
+
"orphan_count": orphan_count,
|
|
1564
|
+
"connection_ratio": round(total_relationships / max(total_memories, 1), 2),
|
|
1565
|
+
"last_maintenance": latest_report.get("created_at") if latest_report else None,
|
|
1566
|
+
"last_report_summary": latest_report.get("summary") if latest_report else None
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1569
|
+
|
|
1570
|
+
# Singleton instance
|
|
1571
|
+
_curator_instance: Optional[MemoryCurator] = None
|
|
1572
|
+
|
|
1573
|
+
|
|
1574
|
+
def get_curator(db, embeddings) -> MemoryCurator:
|
|
1575
|
+
"""Get or create the curator singleton."""
|
|
1576
|
+
global _curator_instance
|
|
1577
|
+
if _curator_instance is None:
|
|
1578
|
+
_curator_instance = MemoryCurator(db, embeddings)
|
|
1579
|
+
return _curator_instance
|
|
1580
|
+
|
|
1581
|
+
|
|
1582
|
+
async def run_curator_scheduler(
|
|
1583
|
+
db,
|
|
1584
|
+
embeddings,
|
|
1585
|
+
interval_hours: int = 24
|
|
1586
|
+
):
|
|
1587
|
+
"""Background scheduler for curator maintenance."""
|
|
1588
|
+
curator = get_curator(db, embeddings)
|
|
1589
|
+
|
|
1590
|
+
while True:
|
|
1591
|
+
try:
|
|
1592
|
+
# Wait for the interval
|
|
1593
|
+
await asyncio.sleep(interval_hours * 3600)
|
|
1594
|
+
|
|
1595
|
+
# Run maintenance
|
|
1596
|
+
logger.info("Running scheduled curator maintenance...")
|
|
1597
|
+
report = await curator.run_maintenance()
|
|
1598
|
+
logger.info(f"Curator maintenance complete: {report.get('summary', '')}")
|
|
1599
|
+
|
|
1600
|
+
except asyncio.CancelledError:
|
|
1601
|
+
logger.info("Curator scheduler cancelled")
|
|
1602
|
+
break
|
|
1603
|
+
except Exception as e:
|
|
1604
|
+
logger.error(f"Curator scheduler error: {e}")
|
|
1605
|
+
# Continue running despite errors
|
|
1606
|
+
await asyncio.sleep(300) # Wait 5 min before retry
|