claude-memory-agent 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/.env.example +107 -0
  2. package/README.md +200 -0
  3. package/agent_card.py +512 -0
  4. package/bin/cli.js +181 -0
  5. package/bin/postinstall.js +216 -0
  6. package/config.py +104 -0
  7. package/dashboard.html +2689 -0
  8. package/hooks/README.md +196 -0
  9. package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
  10. package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
  11. package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
  12. package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
  13. package/hooks/auto-detect-response.py +348 -0
  14. package/hooks/auto_capture.py +255 -0
  15. package/hooks/detect-correction.py +173 -0
  16. package/hooks/grounding-hook.py +348 -0
  17. package/hooks/log-tool-use.py +234 -0
  18. package/hooks/log-user-request.py +208 -0
  19. package/hooks/pre-tool-decision.py +218 -0
  20. package/hooks/problem-detector.py +343 -0
  21. package/hooks/session_end.py +192 -0
  22. package/hooks/session_start.py +227 -0
  23. package/install.py +887 -0
  24. package/main.py +2859 -0
  25. package/manager.py +997 -0
  26. package/package.json +55 -0
  27. package/requirements.txt +8 -0
  28. package/run_server.py +136 -0
  29. package/services/__init__.py +50 -0
  30. package/services/__pycache__/__init__.cpython-312.pyc +0 -0
  31. package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
  32. package/services/__pycache__/auth.cpython-312.pyc +0 -0
  33. package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
  34. package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
  35. package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
  36. package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
  37. package/services/__pycache__/confidence.cpython-312.pyc +0 -0
  38. package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
  39. package/services/__pycache__/database.cpython-312.pyc +0 -0
  40. package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
  41. package/services/__pycache__/insights.cpython-312.pyc +0 -0
  42. package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
  43. package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
  44. package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
  45. package/services/__pycache__/timeline.cpython-312.pyc +0 -0
  46. package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
  47. package/services/__pycache__/websocket.cpython-312.pyc +0 -0
  48. package/services/agent_registry.py +753 -0
  49. package/services/auth.py +331 -0
  50. package/services/auto_inject.py +250 -0
  51. package/services/claude_md_sync.py +275 -0
  52. package/services/cleanup.py +667 -0
  53. package/services/compaction_flush.py +447 -0
  54. package/services/confidence.py +301 -0
  55. package/services/daily_log.py +333 -0
  56. package/services/database.py +2485 -0
  57. package/services/embeddings.py +358 -0
  58. package/services/insights.py +632 -0
  59. package/services/llm_analyzer.py +595 -0
  60. package/services/memory_md_sync.py +409 -0
  61. package/services/retry_queue.py +453 -0
  62. package/services/timeline.py +579 -0
  63. package/services/vector_index.py +398 -0
  64. package/services/websocket.py +257 -0
  65. package/skills/__init__.py +6 -0
  66. package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
  67. package/skills/__pycache__/admin.cpython-312.pyc +0 -0
  68. package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
  69. package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
  70. package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
  71. package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
  72. package/skills/__pycache__/insights.cpython-312.pyc +0 -0
  73. package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
  74. package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
  75. package/skills/__pycache__/search.cpython-312.pyc +0 -0
  76. package/skills/__pycache__/state.cpython-312.pyc +0 -0
  77. package/skills/__pycache__/store.cpython-312.pyc +0 -0
  78. package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
  79. package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
  80. package/skills/__pycache__/verification.cpython-312.pyc +0 -0
  81. package/skills/admin.py +469 -0
  82. package/skills/checkpoint.py +198 -0
  83. package/skills/claude_md.py +363 -0
  84. package/skills/cleanup.py +241 -0
  85. package/skills/grounding.py +801 -0
  86. package/skills/insights.py +231 -0
  87. package/skills/natural_language.py +277 -0
  88. package/skills/retrieve.py +67 -0
  89. package/skills/search.py +213 -0
  90. package/skills/state.py +182 -0
  91. package/skills/store.py +179 -0
  92. package/skills/summarize.py +588 -0
  93. package/skills/timeline.py +387 -0
  94. package/skills/verification.py +391 -0
  95. package/start_daemon.py +155 -0
  96. package/test_automation.py +221 -0
  97. package/test_complete.py +338 -0
  98. package/test_full.py +322 -0
  99. package/update_system.py +817 -0
  100. package/verify_db.py +134 -0
@@ -0,0 +1,667 @@
1
+ """Memory cleanup and pruning service.
2
+
3
+ Handles automatic cleanup of old, low-value, and duplicate memories.
4
+ Supports archival before deletion and configurable retention policies.
5
+ """
6
+ import json
7
+ from datetime import datetime, timedelta
8
+ from typing import Dict, Any, Optional, List, Tuple
9
+ from collections import defaultdict
10
+
11
+
12
+ class CleanupService:
13
+ """Service for memory cleanup, deduplication, and archival.
14
+
15
+ Features:
16
+ - Relevance-based cleanup (low-scoring memories)
17
+ - Age-based retention (older than N days)
18
+ - Duplicate detection and merging
19
+ - Soft-delete with archive for recovery
20
+ - Per-project configuration
21
+ - Dry-run mode for preview
22
+ - Audit logging
23
+ """
24
+
25
+ def __init__(self, db, embeddings=None):
26
+ self.db = db
27
+ self.embeddings = embeddings
28
+
29
+ async def get_config(
30
+ self,
31
+ project_path: Optional[str] = None
32
+ ) -> Dict[str, Any]:
33
+ """Get cleanup configuration for a project or global default."""
34
+ cursor = self.db.conn.cursor()
35
+
36
+ if project_path:
37
+ cursor.execute(
38
+ "SELECT * FROM cleanup_config WHERE project_path = ?",
39
+ (project_path,)
40
+ )
41
+ row = cursor.fetchone()
42
+ if row:
43
+ return dict(row)
44
+
45
+ # Return defaults
46
+ return {
47
+ "retention_days": 90,
48
+ "min_relevance_score": 0.1,
49
+ "keep_high_importance": True,
50
+ "importance_threshold": 7,
51
+ "dedup_enabled": True,
52
+ "dedup_threshold": 0.95,
53
+ "archive_before_delete": True,
54
+ "archive_retention_days": 365,
55
+ "auto_cleanup_enabled": False
56
+ }
57
+
58
+ async def save_config(
59
+ self,
60
+ project_path: Optional[str],
61
+ config: Dict[str, Any]
62
+ ) -> bool:
63
+ """Save cleanup configuration for a project."""
64
+ cursor = self.db.conn.cursor()
65
+
66
+ cursor.execute(
67
+ """
68
+ INSERT INTO cleanup_config (
69
+ project_path, retention_days, min_relevance_score,
70
+ keep_high_importance, importance_threshold,
71
+ dedup_enabled, dedup_threshold,
72
+ archive_before_delete, archive_retention_days,
73
+ auto_cleanup_enabled
74
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
75
+ ON CONFLICT(project_path) DO UPDATE SET
76
+ retention_days = excluded.retention_days,
77
+ min_relevance_score = excluded.min_relevance_score,
78
+ keep_high_importance = excluded.keep_high_importance,
79
+ importance_threshold = excluded.importance_threshold,
80
+ dedup_enabled = excluded.dedup_enabled,
81
+ dedup_threshold = excluded.dedup_threshold,
82
+ archive_before_delete = excluded.archive_before_delete,
83
+ archive_retention_days = excluded.archive_retention_days,
84
+ auto_cleanup_enabled = excluded.auto_cleanup_enabled,
85
+ updated_at = datetime('now')
86
+ """,
87
+ (
88
+ project_path,
89
+ config.get("retention_days", 90),
90
+ config.get("min_relevance_score", 0.1),
91
+ 1 if config.get("keep_high_importance", True) else 0,
92
+ config.get("importance_threshold", 7),
93
+ 1 if config.get("dedup_enabled", True) else 0,
94
+ config.get("dedup_threshold", 0.95),
95
+ 1 if config.get("archive_before_delete", True) else 0,
96
+ config.get("archive_retention_days", 365),
97
+ 1 if config.get("auto_cleanup_enabled", False) else 0
98
+ )
99
+ )
100
+ self.db.conn.commit()
101
+ return True
102
+
103
+ async def run_cleanup(
104
+ self,
105
+ project_path: Optional[str] = None,
106
+ dry_run: bool = False
107
+ ) -> Dict[str, Any]:
108
+ """Run full cleanup job.
109
+
110
+ Args:
111
+ project_path: Filter to specific project (None = all)
112
+ dry_run: If True, only preview what would be cleaned
113
+
114
+ Returns:
115
+ Cleanup results with counts and details
116
+ """
117
+ config = await self.get_config(project_path)
118
+ results = {
119
+ "dry_run": dry_run,
120
+ "project_path": project_path,
121
+ "config": config,
122
+ "low_relevance": {"count": 0, "ids": []},
123
+ "expired": {"count": 0, "ids": []},
124
+ "duplicates": {"count": 0, "groups": []},
125
+ "total_archived": 0,
126
+ "total_deleted": 0,
127
+ "total_merged": 0
128
+ }
129
+
130
+ # 1. Clean up low-relevance memories
131
+ low_rel_result = await self._cleanup_low_relevance(
132
+ project_path=project_path,
133
+ min_score=config["min_relevance_score"],
134
+ keep_high_importance=config["keep_high_importance"],
135
+ importance_threshold=config["importance_threshold"],
136
+ archive=config["archive_before_delete"],
137
+ dry_run=dry_run
138
+ )
139
+ results["low_relevance"] = low_rel_result
140
+ results["total_archived"] += low_rel_result.get("archived", 0)
141
+ results["total_deleted"] += low_rel_result.get("deleted", 0)
142
+
143
+ # 2. Clean up expired memories
144
+ expired_result = await self._cleanup_expired(
145
+ project_path=project_path,
146
+ retention_days=config["retention_days"],
147
+ keep_high_importance=config["keep_high_importance"],
148
+ importance_threshold=config["importance_threshold"],
149
+ archive=config["archive_before_delete"],
150
+ dry_run=dry_run
151
+ )
152
+ results["expired"] = expired_result
153
+ results["total_archived"] += expired_result.get("archived", 0)
154
+ results["total_deleted"] += expired_result.get("deleted", 0)
155
+
156
+ # 3. Deduplicate memories
157
+ if config["dedup_enabled"]:
158
+ dedup_result = await self._deduplicate_memories(
159
+ project_path=project_path,
160
+ threshold=config["dedup_threshold"],
161
+ archive=config["archive_before_delete"],
162
+ dry_run=dry_run
163
+ )
164
+ results["duplicates"] = dedup_result
165
+ results["total_merged"] += dedup_result.get("merged", 0)
166
+
167
+ # 4. Log the cleanup
168
+ if not dry_run:
169
+ await self._log_cleanup(
170
+ cleanup_type="full",
171
+ project_path=project_path,
172
+ archived=results["total_archived"],
173
+ deleted=results["total_deleted"],
174
+ merged=results["total_merged"],
175
+ details=json.dumps(results)
176
+ )
177
+
178
+ # Update last cleanup timestamp
179
+ cursor = self.db.conn.cursor()
180
+ if project_path:
181
+ cursor.execute(
182
+ """
183
+ UPDATE cleanup_config
184
+ SET last_cleanup_at = datetime('now')
185
+ WHERE project_path = ?
186
+ """,
187
+ (project_path,)
188
+ )
189
+ self.db.conn.commit()
190
+
191
+ return results
192
+
193
+ async def _cleanup_low_relevance(
194
+ self,
195
+ project_path: Optional[str],
196
+ min_score: float,
197
+ keep_high_importance: bool,
198
+ importance_threshold: int,
199
+ archive: bool,
200
+ dry_run: bool
201
+ ) -> Dict[str, Any]:
202
+ """Clean up memories with low relevance scores."""
203
+ cursor = self.db.conn.cursor()
204
+
205
+ # Build query to find low-relevance memories
206
+ query = """
207
+ SELECT id, type, content, embedding, project_path, session_id,
208
+ importance, access_count, decay_factor, metadata,
209
+ created_at, last_accessed
210
+ FROM memories
211
+ WHERE 1=1
212
+ """
213
+ params = []
214
+
215
+ if project_path:
216
+ query += " AND project_path = ?"
217
+ params.append(project_path)
218
+
219
+ if keep_high_importance:
220
+ query += " AND importance < ?"
221
+ params.append(importance_threshold)
222
+
223
+ cursor.execute(query, tuple(params))
224
+ memories = [dict(row) for row in cursor.fetchall()]
225
+
226
+ # Filter by calculated relevance score
227
+ to_clean = []
228
+ for mem in memories:
229
+ score = self.db.calculate_relevance_score(
230
+ importance=mem.get("importance", 5),
231
+ created_at=mem.get("created_at"),
232
+ last_accessed=mem.get("last_accessed"),
233
+ access_count=mem.get("access_count", 0),
234
+ decay_factor=mem.get("decay_factor", 1.0)
235
+ )
236
+ if score < min_score:
237
+ mem["relevance_score"] = score
238
+ to_clean.append(mem)
239
+
240
+ result = {
241
+ "count": len(to_clean),
242
+ "ids": [m["id"] for m in to_clean],
243
+ "archived": 0,
244
+ "deleted": 0
245
+ }
246
+
247
+ if dry_run or not to_clean:
248
+ return result
249
+
250
+ # Archive and/or delete
251
+ for mem in to_clean:
252
+ if archive:
253
+ await self._archive_memory(mem, reason="low_relevance")
254
+ result["archived"] += 1
255
+
256
+ cursor.execute("DELETE FROM memories WHERE id = ?", (mem["id"],))
257
+ result["deleted"] += 1
258
+
259
+ self.db.conn.commit()
260
+ return result
261
+
262
+ async def _cleanup_expired(
263
+ self,
264
+ project_path: Optional[str],
265
+ retention_days: int,
266
+ keep_high_importance: bool,
267
+ importance_threshold: int,
268
+ archive: bool,
269
+ dry_run: bool
270
+ ) -> Dict[str, Any]:
271
+ """Clean up memories older than retention period."""
272
+ cursor = self.db.conn.cursor()
273
+ cutoff = (datetime.now() - timedelta(days=retention_days)).isoformat()
274
+
275
+ query = """
276
+ SELECT id, type, content, embedding, project_path, session_id,
277
+ importance, access_count, decay_factor, metadata,
278
+ created_at, last_accessed
279
+ FROM memories
280
+ WHERE created_at < ?
281
+ """
282
+ params = [cutoff]
283
+
284
+ if project_path:
285
+ query += " AND project_path = ?"
286
+ params.append(project_path)
287
+
288
+ if keep_high_importance:
289
+ query += " AND importance < ?"
290
+ params.append(importance_threshold)
291
+
292
+ cursor.execute(query, tuple(params))
293
+ memories = [dict(row) for row in cursor.fetchall()]
294
+
295
+ result = {
296
+ "count": len(memories),
297
+ "ids": [m["id"] for m in memories],
298
+ "cutoff_date": cutoff,
299
+ "archived": 0,
300
+ "deleted": 0
301
+ }
302
+
303
+ if dry_run or not memories:
304
+ return result
305
+
306
+ for mem in memories:
307
+ if archive:
308
+ # Calculate relevance at archive time
309
+ score = self.db.calculate_relevance_score(
310
+ importance=mem.get("importance", 5),
311
+ created_at=mem.get("created_at"),
312
+ last_accessed=mem.get("last_accessed"),
313
+ access_count=mem.get("access_count", 0),
314
+ decay_factor=mem.get("decay_factor", 1.0)
315
+ )
316
+ mem["relevance_score"] = score
317
+ await self._archive_memory(mem, reason="expired")
318
+ result["archived"] += 1
319
+
320
+ cursor.execute("DELETE FROM memories WHERE id = ?", (mem["id"],))
321
+ result["deleted"] += 1
322
+
323
+ self.db.conn.commit()
324
+ return result
325
+
326
+ async def _deduplicate_memories(
327
+ self,
328
+ project_path: Optional[str],
329
+ threshold: float,
330
+ archive: bool,
331
+ dry_run: bool
332
+ ) -> Dict[str, Any]:
333
+ """Find and merge duplicate memories."""
334
+ cursor = self.db.conn.cursor()
335
+
336
+ # Get memories with embeddings
337
+ query = """
338
+ SELECT id, type, content, embedding, project_path, session_id,
339
+ importance, access_count, created_at
340
+ FROM memories
341
+ WHERE embedding IS NOT NULL
342
+ """
343
+ params = []
344
+
345
+ if project_path:
346
+ query += " AND project_path = ?"
347
+ params.append(project_path)
348
+
349
+ query += " ORDER BY importance DESC, access_count DESC"
350
+
351
+ cursor.execute(query, tuple(params))
352
+ memories = [dict(row) for row in cursor.fetchall()]
353
+
354
+ if len(memories) < 2:
355
+ return {"count": 0, "groups": [], "merged": 0}
356
+
357
+ # Find duplicate groups using greedy clustering
358
+ groups = []
359
+ used = set()
360
+
361
+ for i, mem in enumerate(memories):
362
+ if mem["id"] in used:
363
+ continue
364
+
365
+ emb1 = self._parse_embedding(mem.get("embedding"))
366
+ if not emb1:
367
+ continue
368
+
369
+ group = [mem]
370
+ used.add(mem["id"])
371
+
372
+ for j, other in enumerate(memories[i+1:], start=i+1):
373
+ if other["id"] in used:
374
+ continue
375
+
376
+ emb2 = self._parse_embedding(other.get("embedding"))
377
+ if not emb2:
378
+ continue
379
+
380
+ similarity = self._cosine_similarity(emb1, emb2)
381
+ if similarity >= threshold:
382
+ group.append(other)
383
+ used.add(other["id"])
384
+
385
+ if len(group) > 1:
386
+ groups.append(group)
387
+
388
+ result = {
389
+ "count": sum(len(g) - 1 for g in groups), # Duplicates to remove
390
+ "groups": [
391
+ {
392
+ "keep_id": g[0]["id"],
393
+ "merge_ids": [m["id"] for m in g[1:]],
394
+ "content_preview": g[0]["content"][:100]
395
+ }
396
+ for g in groups
397
+ ],
398
+ "merged": 0
399
+ }
400
+
401
+ if dry_run or not groups:
402
+ return result
403
+
404
+ # Merge duplicates - keep highest importance, aggregate access count
405
+ for group in groups:
406
+ keep = group[0]
407
+ duplicates = group[1:]
408
+
409
+ # Aggregate stats
410
+ total_access = keep.get("access_count", 0)
411
+ for dup in duplicates:
412
+ total_access += dup.get("access_count", 0)
413
+
414
+ if archive:
415
+ await self._archive_memory(
416
+ dup,
417
+ reason="duplicate",
418
+ archived_by=f"merged_into_{keep['id']}"
419
+ )
420
+
421
+ cursor.execute("DELETE FROM memories WHERE id = ?", (dup["id"],))
422
+ result["merged"] += 1
423
+
424
+ # Update the kept memory with aggregated access count
425
+ cursor.execute(
426
+ "UPDATE memories SET access_count = ? WHERE id = ?",
427
+ (total_access, keep["id"])
428
+ )
429
+
430
+ self.db.conn.commit()
431
+ return result
432
+
433
+ async def _archive_memory(
434
+ self,
435
+ memory: Dict[str, Any],
436
+ reason: str,
437
+ archived_by: Optional[str] = None
438
+ ):
439
+ """Archive a memory before deletion."""
440
+ cursor = self.db.conn.cursor()
441
+
442
+ # Calculate expiration date
443
+ config = await self.get_config(memory.get("project_path"))
444
+ expires_at = (
445
+ datetime.now() + timedelta(days=config["archive_retention_days"])
446
+ ).isoformat()
447
+
448
+ cursor.execute(
449
+ """
450
+ INSERT INTO memory_archive (
451
+ original_id, type, content, embedding, project_path,
452
+ session_id, importance, access_count, decay_factor,
453
+ metadata, archive_reason, archived_by,
454
+ relevance_score_at_archive, expires_at
455
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
456
+ """,
457
+ (
458
+ memory.get("id"),
459
+ memory.get("type"),
460
+ memory.get("content"),
461
+ memory.get("embedding"),
462
+ memory.get("project_path"),
463
+ memory.get("session_id"),
464
+ memory.get("importance"),
465
+ memory.get("access_count"),
466
+ memory.get("decay_factor"),
467
+ memory.get("metadata"),
468
+ reason,
469
+ archived_by,
470
+ memory.get("relevance_score"),
471
+ expires_at
472
+ )
473
+ )
474
+
475
+ async def _log_cleanup(
476
+ self,
477
+ cleanup_type: str,
478
+ project_path: Optional[str],
479
+ archived: int,
480
+ deleted: int,
481
+ merged: int,
482
+ details: str
483
+ ):
484
+ """Log cleanup action for audit trail."""
485
+ cursor = self.db.conn.cursor()
486
+ cursor.execute(
487
+ """
488
+ INSERT INTO cleanup_log (
489
+ cleanup_type, project_path, memories_archived,
490
+ memories_deleted, memories_merged, details
491
+ ) VALUES (?, ?, ?, ?, ?, ?)
492
+ """,
493
+ (cleanup_type, project_path, archived, deleted, merged, details)
494
+ )
495
+ self.db.conn.commit()
496
+
497
+ async def get_archived_memories(
498
+ self,
499
+ project_path: Optional[str] = None,
500
+ reason: Optional[str] = None,
501
+ limit: int = 50
502
+ ) -> List[Dict[str, Any]]:
503
+ """Get archived memories for potential recovery."""
504
+ cursor = self.db.conn.cursor()
505
+
506
+ query = "SELECT * FROM memory_archive WHERE 1=1"
507
+ params = []
508
+
509
+ if project_path:
510
+ query += " AND project_path = ?"
511
+ params.append(project_path)
512
+
513
+ if reason:
514
+ query += " AND archive_reason = ?"
515
+ params.append(reason)
516
+
517
+ query += " ORDER BY archived_at DESC LIMIT ?"
518
+ params.append(limit)
519
+
520
+ cursor.execute(query, tuple(params))
521
+ return [dict(row) for row in cursor.fetchall()]
522
+
523
+ async def restore_memory(
524
+ self,
525
+ archive_id: int
526
+ ) -> Dict[str, Any]:
527
+ """Restore an archived memory."""
528
+ cursor = self.db.conn.cursor()
529
+
530
+ # Get archived memory
531
+ cursor.execute("SELECT * FROM memory_archive WHERE id = ?", (archive_id,))
532
+ archived = cursor.fetchone()
533
+
534
+ if not archived:
535
+ return {"success": False, "error": "Archived memory not found"}
536
+
537
+ archived = dict(archived)
538
+
539
+ # Restore to memories table
540
+ cursor.execute(
541
+ """
542
+ INSERT INTO memories (
543
+ type, content, embedding, project_path, session_id,
544
+ importance, access_count, decay_factor, metadata
545
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
546
+ """,
547
+ (
548
+ archived.get("type"),
549
+ archived.get("content"),
550
+ archived.get("embedding"),
551
+ archived.get("project_path"),
552
+ archived.get("session_id"),
553
+ archived.get("importance"),
554
+ archived.get("access_count"),
555
+ archived.get("decay_factor"),
556
+ archived.get("metadata")
557
+ )
558
+ )
559
+ new_id = cursor.lastrowid
560
+
561
+ # Remove from archive
562
+ cursor.execute("DELETE FROM memory_archive WHERE id = ?", (archive_id,))
563
+ self.db.conn.commit()
564
+
565
+ return {
566
+ "success": True,
567
+ "restored_id": new_id,
568
+ "original_id": archived.get("original_id"),
569
+ "archive_reason": archived.get("archive_reason")
570
+ }
571
+
572
+ async def purge_expired_archives(self) -> Dict[str, Any]:
573
+ """Permanently delete archives past their expiration date."""
574
+ cursor = self.db.conn.cursor()
575
+
576
+ # Count expired
577
+ cursor.execute(
578
+ "SELECT COUNT(*) as count FROM memory_archive WHERE expires_at < datetime('now')"
579
+ )
580
+ count = cursor.fetchone()["count"]
581
+
582
+ if count > 0:
583
+ cursor.execute(
584
+ "DELETE FROM memory_archive WHERE expires_at < datetime('now')"
585
+ )
586
+ self.db.conn.commit()
587
+
588
+ return {
589
+ "success": True,
590
+ "purged_count": count
591
+ }
592
+
593
+ async def get_cleanup_stats(self) -> Dict[str, Any]:
594
+ """Get overall cleanup statistics."""
595
+ cursor = self.db.conn.cursor()
596
+
597
+ # Memory counts
598
+ cursor.execute("SELECT COUNT(*) as count FROM memories")
599
+ memory_count = cursor.fetchone()["count"]
600
+
601
+ cursor.execute("SELECT COUNT(*) as count FROM memory_archive")
602
+ archive_count = cursor.fetchone()["count"]
603
+
604
+ # Recent cleanup log
605
+ cursor.execute(
606
+ """
607
+ SELECT * FROM cleanup_log
608
+ ORDER BY created_at DESC
609
+ LIMIT 10
610
+ """
611
+ )
612
+ recent_cleanups = [dict(row) for row in cursor.fetchall()]
613
+
614
+ # Totals from logs
615
+ cursor.execute(
616
+ """
617
+ SELECT
618
+ SUM(memories_archived) as total_archived,
619
+ SUM(memories_deleted) as total_deleted,
620
+ SUM(memories_merged) as total_merged
621
+ FROM cleanup_log
622
+ """
623
+ )
624
+ totals = dict(cursor.fetchone())
625
+
626
+ return {
627
+ "current_memories": memory_count,
628
+ "archived_memories": archive_count,
629
+ "total_archived": totals.get("total_archived") or 0,
630
+ "total_deleted": totals.get("total_deleted") or 0,
631
+ "total_merged": totals.get("total_merged") or 0,
632
+ "recent_cleanups": recent_cleanups
633
+ }
634
+
635
+ def _parse_embedding(self, embedding_str) -> Optional[List[float]]:
636
+ """Parse embedding from string or list."""
637
+ if not embedding_str:
638
+ return None
639
+ if isinstance(embedding_str, list):
640
+ return embedding_str
641
+ try:
642
+ return json.loads(embedding_str)
643
+ except:
644
+ return None
645
+
646
+ def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
647
+ """Calculate cosine similarity between two vectors."""
648
+ import numpy as np
649
+ a = np.array(vec1)
650
+ b = np.array(vec2)
651
+ norm_a = np.linalg.norm(a)
652
+ norm_b = np.linalg.norm(b)
653
+ if norm_a == 0 or norm_b == 0:
654
+ return 0.0
655
+ return float(np.dot(a, b) / (norm_a * norm_b))
656
+
657
+
658
+ # Global instance
659
+ _cleanup: Optional[CleanupService] = None
660
+
661
+
662
+ def get_cleanup_service(db, embeddings=None) -> CleanupService:
663
+ """Get the global cleanup service instance."""
664
+ global _cleanup
665
+ if _cleanup is None:
666
+ _cleanup = CleanupService(db, embeddings)
667
+ return _cleanup