claude-memory-agent 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +107 -0
- package/README.md +200 -0
- package/agent_card.py +512 -0
- package/bin/cli.js +181 -0
- package/bin/postinstall.js +216 -0
- package/config.py +104 -0
- package/dashboard.html +2689 -0
- package/hooks/README.md +196 -0
- package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
- package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
- package/hooks/auto-detect-response.py +348 -0
- package/hooks/auto_capture.py +255 -0
- package/hooks/detect-correction.py +173 -0
- package/hooks/grounding-hook.py +348 -0
- package/hooks/log-tool-use.py +234 -0
- package/hooks/log-user-request.py +208 -0
- package/hooks/pre-tool-decision.py +218 -0
- package/hooks/problem-detector.py +343 -0
- package/hooks/session_end.py +192 -0
- package/hooks/session_start.py +227 -0
- package/install.py +887 -0
- package/main.py +2859 -0
- package/manager.py +997 -0
- package/package.json +55 -0
- package/requirements.txt +8 -0
- package/run_server.py +136 -0
- package/services/__init__.py +50 -0
- package/services/__pycache__/__init__.cpython-312.pyc +0 -0
- package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
- package/services/__pycache__/auth.cpython-312.pyc +0 -0
- package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
- package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
- package/services/__pycache__/confidence.cpython-312.pyc +0 -0
- package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
- package/services/__pycache__/database.cpython-312.pyc +0 -0
- package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
- package/services/__pycache__/insights.cpython-312.pyc +0 -0
- package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
- package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
- package/services/__pycache__/timeline.cpython-312.pyc +0 -0
- package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
- package/services/__pycache__/websocket.cpython-312.pyc +0 -0
- package/services/agent_registry.py +753 -0
- package/services/auth.py +331 -0
- package/services/auto_inject.py +250 -0
- package/services/claude_md_sync.py +275 -0
- package/services/cleanup.py +667 -0
- package/services/compaction_flush.py +447 -0
- package/services/confidence.py +301 -0
- package/services/daily_log.py +333 -0
- package/services/database.py +2485 -0
- package/services/embeddings.py +358 -0
- package/services/insights.py +632 -0
- package/services/llm_analyzer.py +595 -0
- package/services/memory_md_sync.py +409 -0
- package/services/retry_queue.py +453 -0
- package/services/timeline.py +579 -0
- package/services/vector_index.py +398 -0
- package/services/websocket.py +257 -0
- package/skills/__init__.py +6 -0
- package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/__pycache__/admin.cpython-312.pyc +0 -0
- package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
- package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
- package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
- package/skills/__pycache__/insights.cpython-312.pyc +0 -0
- package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
- package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
- package/skills/__pycache__/search.cpython-312.pyc +0 -0
- package/skills/__pycache__/state.cpython-312.pyc +0 -0
- package/skills/__pycache__/store.cpython-312.pyc +0 -0
- package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
- package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
- package/skills/__pycache__/verification.cpython-312.pyc +0 -0
- package/skills/admin.py +469 -0
- package/skills/checkpoint.py +198 -0
- package/skills/claude_md.py +363 -0
- package/skills/cleanup.py +241 -0
- package/skills/grounding.py +801 -0
- package/skills/insights.py +231 -0
- package/skills/natural_language.py +277 -0
- package/skills/retrieve.py +67 -0
- package/skills/search.py +213 -0
- package/skills/state.py +182 -0
- package/skills/store.py +179 -0
- package/skills/summarize.py +588 -0
- package/skills/timeline.py +387 -0
- package/skills/verification.py +391 -0
- package/start_daemon.py +155 -0
- package/test_automation.py +221 -0
- package/test_complete.py +338 -0
- package/test_full.py +322 -0
- package/update_system.py +817 -0
- package/verify_db.py +134 -0
|
@@ -0,0 +1,667 @@
|
|
|
1
|
+
"""Memory cleanup and pruning service.
|
|
2
|
+
|
|
3
|
+
Handles automatic cleanup of old, low-value, and duplicate memories.
|
|
4
|
+
Supports archival before deletion and configurable retention policies.
|
|
5
|
+
"""
|
|
6
|
+
import json
|
|
7
|
+
from datetime import datetime, timedelta
|
|
8
|
+
from typing import Dict, Any, Optional, List, Tuple
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CleanupService:
|
|
13
|
+
"""Service for memory cleanup, deduplication, and archival.
|
|
14
|
+
|
|
15
|
+
Features:
|
|
16
|
+
- Relevance-based cleanup (low-scoring memories)
|
|
17
|
+
- Age-based retention (older than N days)
|
|
18
|
+
- Duplicate detection and merging
|
|
19
|
+
- Soft-delete with archive for recovery
|
|
20
|
+
- Per-project configuration
|
|
21
|
+
- Dry-run mode for preview
|
|
22
|
+
- Audit logging
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, db, embeddings=None):
|
|
26
|
+
self.db = db
|
|
27
|
+
self.embeddings = embeddings
|
|
28
|
+
|
|
29
|
+
async def get_config(
|
|
30
|
+
self,
|
|
31
|
+
project_path: Optional[str] = None
|
|
32
|
+
) -> Dict[str, Any]:
|
|
33
|
+
"""Get cleanup configuration for a project or global default."""
|
|
34
|
+
cursor = self.db.conn.cursor()
|
|
35
|
+
|
|
36
|
+
if project_path:
|
|
37
|
+
cursor.execute(
|
|
38
|
+
"SELECT * FROM cleanup_config WHERE project_path = ?",
|
|
39
|
+
(project_path,)
|
|
40
|
+
)
|
|
41
|
+
row = cursor.fetchone()
|
|
42
|
+
if row:
|
|
43
|
+
return dict(row)
|
|
44
|
+
|
|
45
|
+
# Return defaults
|
|
46
|
+
return {
|
|
47
|
+
"retention_days": 90,
|
|
48
|
+
"min_relevance_score": 0.1,
|
|
49
|
+
"keep_high_importance": True,
|
|
50
|
+
"importance_threshold": 7,
|
|
51
|
+
"dedup_enabled": True,
|
|
52
|
+
"dedup_threshold": 0.95,
|
|
53
|
+
"archive_before_delete": True,
|
|
54
|
+
"archive_retention_days": 365,
|
|
55
|
+
"auto_cleanup_enabled": False
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async def save_config(
|
|
59
|
+
self,
|
|
60
|
+
project_path: Optional[str],
|
|
61
|
+
config: Dict[str, Any]
|
|
62
|
+
) -> bool:
|
|
63
|
+
"""Save cleanup configuration for a project."""
|
|
64
|
+
cursor = self.db.conn.cursor()
|
|
65
|
+
|
|
66
|
+
cursor.execute(
|
|
67
|
+
"""
|
|
68
|
+
INSERT INTO cleanup_config (
|
|
69
|
+
project_path, retention_days, min_relevance_score,
|
|
70
|
+
keep_high_importance, importance_threshold,
|
|
71
|
+
dedup_enabled, dedup_threshold,
|
|
72
|
+
archive_before_delete, archive_retention_days,
|
|
73
|
+
auto_cleanup_enabled
|
|
74
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
75
|
+
ON CONFLICT(project_path) DO UPDATE SET
|
|
76
|
+
retention_days = excluded.retention_days,
|
|
77
|
+
min_relevance_score = excluded.min_relevance_score,
|
|
78
|
+
keep_high_importance = excluded.keep_high_importance,
|
|
79
|
+
importance_threshold = excluded.importance_threshold,
|
|
80
|
+
dedup_enabled = excluded.dedup_enabled,
|
|
81
|
+
dedup_threshold = excluded.dedup_threshold,
|
|
82
|
+
archive_before_delete = excluded.archive_before_delete,
|
|
83
|
+
archive_retention_days = excluded.archive_retention_days,
|
|
84
|
+
auto_cleanup_enabled = excluded.auto_cleanup_enabled,
|
|
85
|
+
updated_at = datetime('now')
|
|
86
|
+
""",
|
|
87
|
+
(
|
|
88
|
+
project_path,
|
|
89
|
+
config.get("retention_days", 90),
|
|
90
|
+
config.get("min_relevance_score", 0.1),
|
|
91
|
+
1 if config.get("keep_high_importance", True) else 0,
|
|
92
|
+
config.get("importance_threshold", 7),
|
|
93
|
+
1 if config.get("dedup_enabled", True) else 0,
|
|
94
|
+
config.get("dedup_threshold", 0.95),
|
|
95
|
+
1 if config.get("archive_before_delete", True) else 0,
|
|
96
|
+
config.get("archive_retention_days", 365),
|
|
97
|
+
1 if config.get("auto_cleanup_enabled", False) else 0
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
self.db.conn.commit()
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
async def run_cleanup(
|
|
104
|
+
self,
|
|
105
|
+
project_path: Optional[str] = None,
|
|
106
|
+
dry_run: bool = False
|
|
107
|
+
) -> Dict[str, Any]:
|
|
108
|
+
"""Run full cleanup job.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
project_path: Filter to specific project (None = all)
|
|
112
|
+
dry_run: If True, only preview what would be cleaned
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Cleanup results with counts and details
|
|
116
|
+
"""
|
|
117
|
+
config = await self.get_config(project_path)
|
|
118
|
+
results = {
|
|
119
|
+
"dry_run": dry_run,
|
|
120
|
+
"project_path": project_path,
|
|
121
|
+
"config": config,
|
|
122
|
+
"low_relevance": {"count": 0, "ids": []},
|
|
123
|
+
"expired": {"count": 0, "ids": []},
|
|
124
|
+
"duplicates": {"count": 0, "groups": []},
|
|
125
|
+
"total_archived": 0,
|
|
126
|
+
"total_deleted": 0,
|
|
127
|
+
"total_merged": 0
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
# 1. Clean up low-relevance memories
|
|
131
|
+
low_rel_result = await self._cleanup_low_relevance(
|
|
132
|
+
project_path=project_path,
|
|
133
|
+
min_score=config["min_relevance_score"],
|
|
134
|
+
keep_high_importance=config["keep_high_importance"],
|
|
135
|
+
importance_threshold=config["importance_threshold"],
|
|
136
|
+
archive=config["archive_before_delete"],
|
|
137
|
+
dry_run=dry_run
|
|
138
|
+
)
|
|
139
|
+
results["low_relevance"] = low_rel_result
|
|
140
|
+
results["total_archived"] += low_rel_result.get("archived", 0)
|
|
141
|
+
results["total_deleted"] += low_rel_result.get("deleted", 0)
|
|
142
|
+
|
|
143
|
+
# 2. Clean up expired memories
|
|
144
|
+
expired_result = await self._cleanup_expired(
|
|
145
|
+
project_path=project_path,
|
|
146
|
+
retention_days=config["retention_days"],
|
|
147
|
+
keep_high_importance=config["keep_high_importance"],
|
|
148
|
+
importance_threshold=config["importance_threshold"],
|
|
149
|
+
archive=config["archive_before_delete"],
|
|
150
|
+
dry_run=dry_run
|
|
151
|
+
)
|
|
152
|
+
results["expired"] = expired_result
|
|
153
|
+
results["total_archived"] += expired_result.get("archived", 0)
|
|
154
|
+
results["total_deleted"] += expired_result.get("deleted", 0)
|
|
155
|
+
|
|
156
|
+
# 3. Deduplicate memories
|
|
157
|
+
if config["dedup_enabled"]:
|
|
158
|
+
dedup_result = await self._deduplicate_memories(
|
|
159
|
+
project_path=project_path,
|
|
160
|
+
threshold=config["dedup_threshold"],
|
|
161
|
+
archive=config["archive_before_delete"],
|
|
162
|
+
dry_run=dry_run
|
|
163
|
+
)
|
|
164
|
+
results["duplicates"] = dedup_result
|
|
165
|
+
results["total_merged"] += dedup_result.get("merged", 0)
|
|
166
|
+
|
|
167
|
+
# 4. Log the cleanup
|
|
168
|
+
if not dry_run:
|
|
169
|
+
await self._log_cleanup(
|
|
170
|
+
cleanup_type="full",
|
|
171
|
+
project_path=project_path,
|
|
172
|
+
archived=results["total_archived"],
|
|
173
|
+
deleted=results["total_deleted"],
|
|
174
|
+
merged=results["total_merged"],
|
|
175
|
+
details=json.dumps(results)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Update last cleanup timestamp
|
|
179
|
+
cursor = self.db.conn.cursor()
|
|
180
|
+
if project_path:
|
|
181
|
+
cursor.execute(
|
|
182
|
+
"""
|
|
183
|
+
UPDATE cleanup_config
|
|
184
|
+
SET last_cleanup_at = datetime('now')
|
|
185
|
+
WHERE project_path = ?
|
|
186
|
+
""",
|
|
187
|
+
(project_path,)
|
|
188
|
+
)
|
|
189
|
+
self.db.conn.commit()
|
|
190
|
+
|
|
191
|
+
return results
|
|
192
|
+
|
|
193
|
+
async def _cleanup_low_relevance(
|
|
194
|
+
self,
|
|
195
|
+
project_path: Optional[str],
|
|
196
|
+
min_score: float,
|
|
197
|
+
keep_high_importance: bool,
|
|
198
|
+
importance_threshold: int,
|
|
199
|
+
archive: bool,
|
|
200
|
+
dry_run: bool
|
|
201
|
+
) -> Dict[str, Any]:
|
|
202
|
+
"""Clean up memories with low relevance scores."""
|
|
203
|
+
cursor = self.db.conn.cursor()
|
|
204
|
+
|
|
205
|
+
# Build query to find low-relevance memories
|
|
206
|
+
query = """
|
|
207
|
+
SELECT id, type, content, embedding, project_path, session_id,
|
|
208
|
+
importance, access_count, decay_factor, metadata,
|
|
209
|
+
created_at, last_accessed
|
|
210
|
+
FROM memories
|
|
211
|
+
WHERE 1=1
|
|
212
|
+
"""
|
|
213
|
+
params = []
|
|
214
|
+
|
|
215
|
+
if project_path:
|
|
216
|
+
query += " AND project_path = ?"
|
|
217
|
+
params.append(project_path)
|
|
218
|
+
|
|
219
|
+
if keep_high_importance:
|
|
220
|
+
query += " AND importance < ?"
|
|
221
|
+
params.append(importance_threshold)
|
|
222
|
+
|
|
223
|
+
cursor.execute(query, tuple(params))
|
|
224
|
+
memories = [dict(row) for row in cursor.fetchall()]
|
|
225
|
+
|
|
226
|
+
# Filter by calculated relevance score
|
|
227
|
+
to_clean = []
|
|
228
|
+
for mem in memories:
|
|
229
|
+
score = self.db.calculate_relevance_score(
|
|
230
|
+
importance=mem.get("importance", 5),
|
|
231
|
+
created_at=mem.get("created_at"),
|
|
232
|
+
last_accessed=mem.get("last_accessed"),
|
|
233
|
+
access_count=mem.get("access_count", 0),
|
|
234
|
+
decay_factor=mem.get("decay_factor", 1.0)
|
|
235
|
+
)
|
|
236
|
+
if score < min_score:
|
|
237
|
+
mem["relevance_score"] = score
|
|
238
|
+
to_clean.append(mem)
|
|
239
|
+
|
|
240
|
+
result = {
|
|
241
|
+
"count": len(to_clean),
|
|
242
|
+
"ids": [m["id"] for m in to_clean],
|
|
243
|
+
"archived": 0,
|
|
244
|
+
"deleted": 0
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
if dry_run or not to_clean:
|
|
248
|
+
return result
|
|
249
|
+
|
|
250
|
+
# Archive and/or delete
|
|
251
|
+
for mem in to_clean:
|
|
252
|
+
if archive:
|
|
253
|
+
await self._archive_memory(mem, reason="low_relevance")
|
|
254
|
+
result["archived"] += 1
|
|
255
|
+
|
|
256
|
+
cursor.execute("DELETE FROM memories WHERE id = ?", (mem["id"],))
|
|
257
|
+
result["deleted"] += 1
|
|
258
|
+
|
|
259
|
+
self.db.conn.commit()
|
|
260
|
+
return result
|
|
261
|
+
|
|
262
|
+
async def _cleanup_expired(
|
|
263
|
+
self,
|
|
264
|
+
project_path: Optional[str],
|
|
265
|
+
retention_days: int,
|
|
266
|
+
keep_high_importance: bool,
|
|
267
|
+
importance_threshold: int,
|
|
268
|
+
archive: bool,
|
|
269
|
+
dry_run: bool
|
|
270
|
+
) -> Dict[str, Any]:
|
|
271
|
+
"""Clean up memories older than retention period."""
|
|
272
|
+
cursor = self.db.conn.cursor()
|
|
273
|
+
cutoff = (datetime.now() - timedelta(days=retention_days)).isoformat()
|
|
274
|
+
|
|
275
|
+
query = """
|
|
276
|
+
SELECT id, type, content, embedding, project_path, session_id,
|
|
277
|
+
importance, access_count, decay_factor, metadata,
|
|
278
|
+
created_at, last_accessed
|
|
279
|
+
FROM memories
|
|
280
|
+
WHERE created_at < ?
|
|
281
|
+
"""
|
|
282
|
+
params = [cutoff]
|
|
283
|
+
|
|
284
|
+
if project_path:
|
|
285
|
+
query += " AND project_path = ?"
|
|
286
|
+
params.append(project_path)
|
|
287
|
+
|
|
288
|
+
if keep_high_importance:
|
|
289
|
+
query += " AND importance < ?"
|
|
290
|
+
params.append(importance_threshold)
|
|
291
|
+
|
|
292
|
+
cursor.execute(query, tuple(params))
|
|
293
|
+
memories = [dict(row) for row in cursor.fetchall()]
|
|
294
|
+
|
|
295
|
+
result = {
|
|
296
|
+
"count": len(memories),
|
|
297
|
+
"ids": [m["id"] for m in memories],
|
|
298
|
+
"cutoff_date": cutoff,
|
|
299
|
+
"archived": 0,
|
|
300
|
+
"deleted": 0
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if dry_run or not memories:
|
|
304
|
+
return result
|
|
305
|
+
|
|
306
|
+
for mem in memories:
|
|
307
|
+
if archive:
|
|
308
|
+
# Calculate relevance at archive time
|
|
309
|
+
score = self.db.calculate_relevance_score(
|
|
310
|
+
importance=mem.get("importance", 5),
|
|
311
|
+
created_at=mem.get("created_at"),
|
|
312
|
+
last_accessed=mem.get("last_accessed"),
|
|
313
|
+
access_count=mem.get("access_count", 0),
|
|
314
|
+
decay_factor=mem.get("decay_factor", 1.0)
|
|
315
|
+
)
|
|
316
|
+
mem["relevance_score"] = score
|
|
317
|
+
await self._archive_memory(mem, reason="expired")
|
|
318
|
+
result["archived"] += 1
|
|
319
|
+
|
|
320
|
+
cursor.execute("DELETE FROM memories WHERE id = ?", (mem["id"],))
|
|
321
|
+
result["deleted"] += 1
|
|
322
|
+
|
|
323
|
+
self.db.conn.commit()
|
|
324
|
+
return result
|
|
325
|
+
|
|
326
|
+
async def _deduplicate_memories(
|
|
327
|
+
self,
|
|
328
|
+
project_path: Optional[str],
|
|
329
|
+
threshold: float,
|
|
330
|
+
archive: bool,
|
|
331
|
+
dry_run: bool
|
|
332
|
+
) -> Dict[str, Any]:
|
|
333
|
+
"""Find and merge duplicate memories."""
|
|
334
|
+
cursor = self.db.conn.cursor()
|
|
335
|
+
|
|
336
|
+
# Get memories with embeddings
|
|
337
|
+
query = """
|
|
338
|
+
SELECT id, type, content, embedding, project_path, session_id,
|
|
339
|
+
importance, access_count, created_at
|
|
340
|
+
FROM memories
|
|
341
|
+
WHERE embedding IS NOT NULL
|
|
342
|
+
"""
|
|
343
|
+
params = []
|
|
344
|
+
|
|
345
|
+
if project_path:
|
|
346
|
+
query += " AND project_path = ?"
|
|
347
|
+
params.append(project_path)
|
|
348
|
+
|
|
349
|
+
query += " ORDER BY importance DESC, access_count DESC"
|
|
350
|
+
|
|
351
|
+
cursor.execute(query, tuple(params))
|
|
352
|
+
memories = [dict(row) for row in cursor.fetchall()]
|
|
353
|
+
|
|
354
|
+
if len(memories) < 2:
|
|
355
|
+
return {"count": 0, "groups": [], "merged": 0}
|
|
356
|
+
|
|
357
|
+
# Find duplicate groups using greedy clustering
|
|
358
|
+
groups = []
|
|
359
|
+
used = set()
|
|
360
|
+
|
|
361
|
+
for i, mem in enumerate(memories):
|
|
362
|
+
if mem["id"] in used:
|
|
363
|
+
continue
|
|
364
|
+
|
|
365
|
+
emb1 = self._parse_embedding(mem.get("embedding"))
|
|
366
|
+
if not emb1:
|
|
367
|
+
continue
|
|
368
|
+
|
|
369
|
+
group = [mem]
|
|
370
|
+
used.add(mem["id"])
|
|
371
|
+
|
|
372
|
+
for j, other in enumerate(memories[i+1:], start=i+1):
|
|
373
|
+
if other["id"] in used:
|
|
374
|
+
continue
|
|
375
|
+
|
|
376
|
+
emb2 = self._parse_embedding(other.get("embedding"))
|
|
377
|
+
if not emb2:
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
similarity = self._cosine_similarity(emb1, emb2)
|
|
381
|
+
if similarity >= threshold:
|
|
382
|
+
group.append(other)
|
|
383
|
+
used.add(other["id"])
|
|
384
|
+
|
|
385
|
+
if len(group) > 1:
|
|
386
|
+
groups.append(group)
|
|
387
|
+
|
|
388
|
+
result = {
|
|
389
|
+
"count": sum(len(g) - 1 for g in groups), # Duplicates to remove
|
|
390
|
+
"groups": [
|
|
391
|
+
{
|
|
392
|
+
"keep_id": g[0]["id"],
|
|
393
|
+
"merge_ids": [m["id"] for m in g[1:]],
|
|
394
|
+
"content_preview": g[0]["content"][:100]
|
|
395
|
+
}
|
|
396
|
+
for g in groups
|
|
397
|
+
],
|
|
398
|
+
"merged": 0
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
if dry_run or not groups:
|
|
402
|
+
return result
|
|
403
|
+
|
|
404
|
+
# Merge duplicates - keep highest importance, aggregate access count
|
|
405
|
+
for group in groups:
|
|
406
|
+
keep = group[0]
|
|
407
|
+
duplicates = group[1:]
|
|
408
|
+
|
|
409
|
+
# Aggregate stats
|
|
410
|
+
total_access = keep.get("access_count", 0)
|
|
411
|
+
for dup in duplicates:
|
|
412
|
+
total_access += dup.get("access_count", 0)
|
|
413
|
+
|
|
414
|
+
if archive:
|
|
415
|
+
await self._archive_memory(
|
|
416
|
+
dup,
|
|
417
|
+
reason="duplicate",
|
|
418
|
+
archived_by=f"merged_into_{keep['id']}"
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
cursor.execute("DELETE FROM memories WHERE id = ?", (dup["id"],))
|
|
422
|
+
result["merged"] += 1
|
|
423
|
+
|
|
424
|
+
# Update the kept memory with aggregated access count
|
|
425
|
+
cursor.execute(
|
|
426
|
+
"UPDATE memories SET access_count = ? WHERE id = ?",
|
|
427
|
+
(total_access, keep["id"])
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
self.db.conn.commit()
|
|
431
|
+
return result
|
|
432
|
+
|
|
433
|
+
async def _archive_memory(
|
|
434
|
+
self,
|
|
435
|
+
memory: Dict[str, Any],
|
|
436
|
+
reason: str,
|
|
437
|
+
archived_by: Optional[str] = None
|
|
438
|
+
):
|
|
439
|
+
"""Archive a memory before deletion."""
|
|
440
|
+
cursor = self.db.conn.cursor()
|
|
441
|
+
|
|
442
|
+
# Calculate expiration date
|
|
443
|
+
config = await self.get_config(memory.get("project_path"))
|
|
444
|
+
expires_at = (
|
|
445
|
+
datetime.now() + timedelta(days=config["archive_retention_days"])
|
|
446
|
+
).isoformat()
|
|
447
|
+
|
|
448
|
+
cursor.execute(
|
|
449
|
+
"""
|
|
450
|
+
INSERT INTO memory_archive (
|
|
451
|
+
original_id, type, content, embedding, project_path,
|
|
452
|
+
session_id, importance, access_count, decay_factor,
|
|
453
|
+
metadata, archive_reason, archived_by,
|
|
454
|
+
relevance_score_at_archive, expires_at
|
|
455
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
456
|
+
""",
|
|
457
|
+
(
|
|
458
|
+
memory.get("id"),
|
|
459
|
+
memory.get("type"),
|
|
460
|
+
memory.get("content"),
|
|
461
|
+
memory.get("embedding"),
|
|
462
|
+
memory.get("project_path"),
|
|
463
|
+
memory.get("session_id"),
|
|
464
|
+
memory.get("importance"),
|
|
465
|
+
memory.get("access_count"),
|
|
466
|
+
memory.get("decay_factor"),
|
|
467
|
+
memory.get("metadata"),
|
|
468
|
+
reason,
|
|
469
|
+
archived_by,
|
|
470
|
+
memory.get("relevance_score"),
|
|
471
|
+
expires_at
|
|
472
|
+
)
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
async def _log_cleanup(
|
|
476
|
+
self,
|
|
477
|
+
cleanup_type: str,
|
|
478
|
+
project_path: Optional[str],
|
|
479
|
+
archived: int,
|
|
480
|
+
deleted: int,
|
|
481
|
+
merged: int,
|
|
482
|
+
details: str
|
|
483
|
+
):
|
|
484
|
+
"""Log cleanup action for audit trail."""
|
|
485
|
+
cursor = self.db.conn.cursor()
|
|
486
|
+
cursor.execute(
|
|
487
|
+
"""
|
|
488
|
+
INSERT INTO cleanup_log (
|
|
489
|
+
cleanup_type, project_path, memories_archived,
|
|
490
|
+
memories_deleted, memories_merged, details
|
|
491
|
+
) VALUES (?, ?, ?, ?, ?, ?)
|
|
492
|
+
""",
|
|
493
|
+
(cleanup_type, project_path, archived, deleted, merged, details)
|
|
494
|
+
)
|
|
495
|
+
self.db.conn.commit()
|
|
496
|
+
|
|
497
|
+
async def get_archived_memories(
|
|
498
|
+
self,
|
|
499
|
+
project_path: Optional[str] = None,
|
|
500
|
+
reason: Optional[str] = None,
|
|
501
|
+
limit: int = 50
|
|
502
|
+
) -> List[Dict[str, Any]]:
|
|
503
|
+
"""Get archived memories for potential recovery."""
|
|
504
|
+
cursor = self.db.conn.cursor()
|
|
505
|
+
|
|
506
|
+
query = "SELECT * FROM memory_archive WHERE 1=1"
|
|
507
|
+
params = []
|
|
508
|
+
|
|
509
|
+
if project_path:
|
|
510
|
+
query += " AND project_path = ?"
|
|
511
|
+
params.append(project_path)
|
|
512
|
+
|
|
513
|
+
if reason:
|
|
514
|
+
query += " AND archive_reason = ?"
|
|
515
|
+
params.append(reason)
|
|
516
|
+
|
|
517
|
+
query += " ORDER BY archived_at DESC LIMIT ?"
|
|
518
|
+
params.append(limit)
|
|
519
|
+
|
|
520
|
+
cursor.execute(query, tuple(params))
|
|
521
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
522
|
+
|
|
523
|
+
async def restore_memory(
|
|
524
|
+
self,
|
|
525
|
+
archive_id: int
|
|
526
|
+
) -> Dict[str, Any]:
|
|
527
|
+
"""Restore an archived memory."""
|
|
528
|
+
cursor = self.db.conn.cursor()
|
|
529
|
+
|
|
530
|
+
# Get archived memory
|
|
531
|
+
cursor.execute("SELECT * FROM memory_archive WHERE id = ?", (archive_id,))
|
|
532
|
+
archived = cursor.fetchone()
|
|
533
|
+
|
|
534
|
+
if not archived:
|
|
535
|
+
return {"success": False, "error": "Archived memory not found"}
|
|
536
|
+
|
|
537
|
+
archived = dict(archived)
|
|
538
|
+
|
|
539
|
+
# Restore to memories table
|
|
540
|
+
cursor.execute(
|
|
541
|
+
"""
|
|
542
|
+
INSERT INTO memories (
|
|
543
|
+
type, content, embedding, project_path, session_id,
|
|
544
|
+
importance, access_count, decay_factor, metadata
|
|
545
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
546
|
+
""",
|
|
547
|
+
(
|
|
548
|
+
archived.get("type"),
|
|
549
|
+
archived.get("content"),
|
|
550
|
+
archived.get("embedding"),
|
|
551
|
+
archived.get("project_path"),
|
|
552
|
+
archived.get("session_id"),
|
|
553
|
+
archived.get("importance"),
|
|
554
|
+
archived.get("access_count"),
|
|
555
|
+
archived.get("decay_factor"),
|
|
556
|
+
archived.get("metadata")
|
|
557
|
+
)
|
|
558
|
+
)
|
|
559
|
+
new_id = cursor.lastrowid
|
|
560
|
+
|
|
561
|
+
# Remove from archive
|
|
562
|
+
cursor.execute("DELETE FROM memory_archive WHERE id = ?", (archive_id,))
|
|
563
|
+
self.db.conn.commit()
|
|
564
|
+
|
|
565
|
+
return {
|
|
566
|
+
"success": True,
|
|
567
|
+
"restored_id": new_id,
|
|
568
|
+
"original_id": archived.get("original_id"),
|
|
569
|
+
"archive_reason": archived.get("archive_reason")
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
async def purge_expired_archives(self) -> Dict[str, Any]:
|
|
573
|
+
"""Permanently delete archives past their expiration date."""
|
|
574
|
+
cursor = self.db.conn.cursor()
|
|
575
|
+
|
|
576
|
+
# Count expired
|
|
577
|
+
cursor.execute(
|
|
578
|
+
"SELECT COUNT(*) as count FROM memory_archive WHERE expires_at < datetime('now')"
|
|
579
|
+
)
|
|
580
|
+
count = cursor.fetchone()["count"]
|
|
581
|
+
|
|
582
|
+
if count > 0:
|
|
583
|
+
cursor.execute(
|
|
584
|
+
"DELETE FROM memory_archive WHERE expires_at < datetime('now')"
|
|
585
|
+
)
|
|
586
|
+
self.db.conn.commit()
|
|
587
|
+
|
|
588
|
+
return {
|
|
589
|
+
"success": True,
|
|
590
|
+
"purged_count": count
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
async def get_cleanup_stats(self) -> Dict[str, Any]:
|
|
594
|
+
"""Get overall cleanup statistics."""
|
|
595
|
+
cursor = self.db.conn.cursor()
|
|
596
|
+
|
|
597
|
+
# Memory counts
|
|
598
|
+
cursor.execute("SELECT COUNT(*) as count FROM memories")
|
|
599
|
+
memory_count = cursor.fetchone()["count"]
|
|
600
|
+
|
|
601
|
+
cursor.execute("SELECT COUNT(*) as count FROM memory_archive")
|
|
602
|
+
archive_count = cursor.fetchone()["count"]
|
|
603
|
+
|
|
604
|
+
# Recent cleanup log
|
|
605
|
+
cursor.execute(
|
|
606
|
+
"""
|
|
607
|
+
SELECT * FROM cleanup_log
|
|
608
|
+
ORDER BY created_at DESC
|
|
609
|
+
LIMIT 10
|
|
610
|
+
"""
|
|
611
|
+
)
|
|
612
|
+
recent_cleanups = [dict(row) for row in cursor.fetchall()]
|
|
613
|
+
|
|
614
|
+
# Totals from logs
|
|
615
|
+
cursor.execute(
|
|
616
|
+
"""
|
|
617
|
+
SELECT
|
|
618
|
+
SUM(memories_archived) as total_archived,
|
|
619
|
+
SUM(memories_deleted) as total_deleted,
|
|
620
|
+
SUM(memories_merged) as total_merged
|
|
621
|
+
FROM cleanup_log
|
|
622
|
+
"""
|
|
623
|
+
)
|
|
624
|
+
totals = dict(cursor.fetchone())
|
|
625
|
+
|
|
626
|
+
return {
|
|
627
|
+
"current_memories": memory_count,
|
|
628
|
+
"archived_memories": archive_count,
|
|
629
|
+
"total_archived": totals.get("total_archived") or 0,
|
|
630
|
+
"total_deleted": totals.get("total_deleted") or 0,
|
|
631
|
+
"total_merged": totals.get("total_merged") or 0,
|
|
632
|
+
"recent_cleanups": recent_cleanups
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
def _parse_embedding(self, embedding_str) -> Optional[List[float]]:
|
|
636
|
+
"""Parse embedding from string or list."""
|
|
637
|
+
if not embedding_str:
|
|
638
|
+
return None
|
|
639
|
+
if isinstance(embedding_str, list):
|
|
640
|
+
return embedding_str
|
|
641
|
+
try:
|
|
642
|
+
return json.loads(embedding_str)
|
|
643
|
+
except:
|
|
644
|
+
return None
|
|
645
|
+
|
|
646
|
+
def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
|
|
647
|
+
"""Calculate cosine similarity between two vectors."""
|
|
648
|
+
import numpy as np
|
|
649
|
+
a = np.array(vec1)
|
|
650
|
+
b = np.array(vec2)
|
|
651
|
+
norm_a = np.linalg.norm(a)
|
|
652
|
+
norm_b = np.linalg.norm(b)
|
|
653
|
+
if norm_a == 0 or norm_b == 0:
|
|
654
|
+
return 0.0
|
|
655
|
+
return float(np.dot(a, b) / (norm_a * norm_b))
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
# Global instance
|
|
659
|
+
_cleanup: Optional[CleanupService] = None
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def get_cleanup_service(db, embeddings=None) -> CleanupService:
|
|
663
|
+
"""Get the global cleanup service instance."""
|
|
664
|
+
global _cleanup
|
|
665
|
+
if _cleanup is None:
|
|
666
|
+
_cleanup = CleanupService(db, embeddings)
|
|
667
|
+
return _cleanup
|