claude-memory-agent 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/bin/cli.js +11 -1
  2. package/bin/lib/banner.js +39 -0
  3. package/bin/lib/environment.js +166 -0
  4. package/bin/lib/installer.js +291 -0
  5. package/bin/lib/models.js +95 -0
  6. package/bin/lib/steps/advanced.js +101 -0
  7. package/bin/lib/steps/confirm.js +87 -0
  8. package/bin/lib/steps/model.js +57 -0
  9. package/bin/lib/steps/provider.js +65 -0
  10. package/bin/lib/steps/scope.js +59 -0
  11. package/bin/lib/steps/server.js +74 -0
  12. package/bin/lib/ui.js +75 -0
  13. package/bin/onboarding.js +164 -0
  14. package/bin/postinstall.js +22 -257
  15. package/config.py +103 -4
  16. package/dashboard.html +697 -27
  17. package/hooks/extract_memories.py +439 -0
  18. package/hooks/pre_compact_hook.py +76 -0
  19. package/hooks/session_end_hook.py +149 -0
  20. package/hooks/stop_hook.py +372 -0
  21. package/install.py +91 -37
  22. package/main.py +1636 -892
  23. package/mcp_server.py +451 -0
  24. package/package.json +14 -3
  25. package/requirements.txt +12 -8
  26. package/services/adaptive_ranker.py +272 -0
  27. package/services/agent_catalog.json +153 -0
  28. package/services/agent_registry.py +245 -730
  29. package/services/claude_md_sync.py +320 -4
  30. package/services/consolidation.py +417 -0
  31. package/services/database.py +586 -105
  32. package/services/embedding_pipeline.py +262 -0
  33. package/services/embeddings.py +493 -85
  34. package/services/memory_decay.py +408 -0
  35. package/services/native_memory_paths.py +86 -0
  36. package/services/native_memory_sync.py +496 -0
  37. package/services/response_manager.py +183 -0
  38. package/services/terminal_ui.py +199 -0
  39. package/services/tier_manager.py +235 -0
  40. package/services/websocket.py +26 -6
  41. package/skills/search.py +136 -61
  42. package/skills/session_review.py +210 -23
  43. package/skills/store.py +125 -18
  44. package/terminal_dashboard.py +474 -0
  45. package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
  46. package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
  47. package/hooks/__pycache__/grounding-hook.cpython-312.pyc +0 -0
  48. package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
  49. package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
  50. package/services/__pycache__/__init__.cpython-312.pyc +0 -0
  51. package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
  52. package/services/__pycache__/auth.cpython-312.pyc +0 -0
  53. package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
  54. package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
  55. package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
  56. package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
  57. package/services/__pycache__/confidence.cpython-312.pyc +0 -0
  58. package/services/__pycache__/curator.cpython-312.pyc +0 -0
  59. package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
  60. package/services/__pycache__/database.cpython-312.pyc +0 -0
  61. package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
  62. package/services/__pycache__/insights.cpython-312.pyc +0 -0
  63. package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
  64. package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
  65. package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
  66. package/services/__pycache__/timeline.cpython-312.pyc +0 -0
  67. package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
  68. package/services/__pycache__/websocket.cpython-312.pyc +0 -0
  69. package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
  70. package/skills/__pycache__/admin.cpython-312.pyc +0 -0
  71. package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
  72. package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
  73. package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
  74. package/skills/__pycache__/confidence_tracker.cpython-312.pyc +0 -0
  75. package/skills/__pycache__/context.cpython-312.pyc +0 -0
  76. package/skills/__pycache__/curator.cpython-312.pyc +0 -0
  77. package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
  78. package/skills/__pycache__/insights.cpython-312.pyc +0 -0
  79. package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
  80. package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
  81. package/skills/__pycache__/search.cpython-312.pyc +0 -0
  82. package/skills/__pycache__/session_review.cpython-312.pyc +0 -0
  83. package/skills/__pycache__/state.cpython-312.pyc +0 -0
  84. package/skills/__pycache__/store.cpython-312.pyc +0 -0
  85. package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
  86. package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
  87. package/skills/__pycache__/verification.cpython-312.pyc +0 -0
  88. package/test_automation.py +0 -221
  89. package/test_complete.py +0 -338
  90. package/test_full.py +0 -322
  91. package/verify_db.py +0 -134
@@ -0,0 +1,417 @@
1
+ """Memory Consolidation Service - CLaRa-inspired salient compression.
2
+
3
+ Clusters similar warm-tier memories and consolidates them into single
4
+ compressed memories, preserving the most salient information.
5
+
6
+ Inspired by CLaRa's approach of compressing documents into fixed-size
7
+ memory tokens while maintaining semantic quality through salience scoring.
8
+
9
+ Process:
10
+ 1. Find clusters of similar warm-tier memories (cosine sim >= threshold)
11
+ 2. Score each memory by salience (outcome, importance, confidence, access)
12
+ 3. Preserve top 2 full, summarize the rest
13
+ 4. Create consolidated memory with weighted-average embedding
14
+ 5. Archive originals with reference to consolidated memory
15
+ """
16
+ import json
17
+ import logging
18
+ import numpy as np
19
+ from datetime import datetime
20
+ from typing import Dict, Any, List, Optional, Tuple
21
+
22
+ from config import config
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class ConsolidationService:
28
+ """Consolidates similar memories to reduce redundancy and improve search.
29
+
30
+ Only operates on warm-tier memories to avoid disrupting hot (active) content.
31
+ """
32
+
33
+ def __init__(self, db, embeddings=None):
34
+ self.db = db
35
+ self.embeddings = embeddings
36
+ self.similarity_threshold = config.CONSOLIDATION_THRESHOLD
37
+ self.min_group_size = config.CONSOLIDATION_MIN_GROUP
38
+ self.max_group_size = config.CONSOLIDATION_MAX_GROUP
39
+ self.max_per_run = config.CONSOLIDATION_MAX_PER_RUN
40
+
41
+ def _calculate_salience(self, memory: dict) -> float:
42
+ """Calculate salience score for a memory.
43
+
44
+ Salience = outcome_success_bonus + importance/10 + confidence + access_frequency
45
+
46
+ Args:
47
+ memory: Memory dict with outcome, importance, confidence, access_count
48
+
49
+ Returns:
50
+ Float salience score (higher = more important to preserve)
51
+ """
52
+ score = 0.0
53
+
54
+ # Outcome success bonus
55
+ outcome_status = memory.get('outcome_status', 'pending')
56
+ if outcome_status == 'success':
57
+ score += 3.0
58
+ elif outcome_status == 'partial':
59
+ score += 1.5
60
+
61
+ success = memory.get('success')
62
+ if success:
63
+ score += 2.0
64
+
65
+ # Importance (normalized 0-1)
66
+ importance = memory.get('importance', 5) or 5
67
+ score += importance / 10.0
68
+
69
+ # Confidence
70
+ confidence = memory.get('confidence', 0.5) or 0.5
71
+ score += confidence
72
+
73
+ # Access frequency (log scale, capped)
74
+ access_count = memory.get('access_count', 0) or 0
75
+ import math
76
+ score += 0.2 * math.log(1 + min(access_count, 50))
77
+
78
+ return round(score, 4)
79
+
80
+ async def find_consolidation_candidates(self) -> List[List[dict]]:
81
+ """Find groups of similar warm-tier memories that could be consolidated.
82
+
83
+ Uses pairwise cosine similarity on warm-tier memory embeddings.
84
+
85
+ Returns:
86
+ List of groups, where each group is a list of similar memory dicts.
87
+ """
88
+ cursor = self.db.conn.cursor()
89
+
90
+ # Get warm-tier memories with embeddings
91
+ cursor.execute("""
92
+ SELECT id, type, content, embedding, importance, confidence,
93
+ access_count, outcome_status, success, created_at,
94
+ project_path, metadata, tags, outcome
95
+ FROM memories
96
+ WHERE (tier = 'warm' OR (tier IS NULL AND importance < 7))
97
+ AND embedding IS NOT NULL
98
+ ORDER BY created_at DESC
99
+ LIMIT 500
100
+ """)
101
+
102
+ rows = cursor.fetchall()
103
+ if len(rows) < self.min_group_size:
104
+ return []
105
+
106
+ # Deserialize embeddings
107
+ memories = []
108
+ embeddings_list = []
109
+ for row in rows:
110
+ emb = self.db._deserialize_embedding(row['embedding'])
111
+ if emb:
112
+ memories.append(dict(row))
113
+ embeddings_list.append(emb)
114
+
115
+ if len(memories) < self.min_group_size:
116
+ return []
117
+
118
+ # Compute pairwise cosine similarity matrix
119
+ emb_matrix = np.array(embeddings_list, dtype=np.float32)
120
+ # Normalize
121
+ norms = np.linalg.norm(emb_matrix, axis=1, keepdims=True)
122
+ norms[norms == 0] = 1 # Avoid division by zero
123
+ emb_matrix = emb_matrix / norms
124
+
125
+ similarity_matrix = emb_matrix @ emb_matrix.T
126
+
127
+ # Greedy clustering: find groups above threshold
128
+ used = set()
129
+ groups = []
130
+
131
+ for i in range(len(memories)):
132
+ if i in used:
133
+ continue
134
+
135
+ group_indices = [i]
136
+ for j in range(i + 1, len(memories)):
137
+ if j in used:
138
+ continue
139
+ if similarity_matrix[i][j] >= self.similarity_threshold:
140
+ group_indices.append(j)
141
+ if len(group_indices) >= self.max_group_size:
142
+ break
143
+
144
+ if len(group_indices) >= self.min_group_size:
145
+ group = [memories[idx] for idx in group_indices]
146
+ groups.append(group)
147
+ used.update(group_indices)
148
+
149
+ return groups
150
+
151
+ async def consolidate_group(self, group: List[dict]) -> Optional[Dict[str, Any]]:
152
+ """Consolidate a group of similar memories into one.
153
+
154
+ Strategy:
155
+ 1. Score each by salience
156
+ 2. Top 2: preserve full content
157
+ 3. Remaining: first 100 chars as summary
158
+ 4. Embedding: weighted average by salience
159
+ 5. Best metadata from the group
160
+
161
+ Args:
162
+ group: List of similar memory dicts
163
+
164
+ Returns:
165
+ Dict with consolidated memory info, or None on failure
166
+ """
167
+ if len(group) < self.min_group_size:
168
+ return None
169
+
170
+ # Score by salience
171
+ scored = [(mem, self._calculate_salience(mem)) for mem in group]
172
+ scored.sort(key=lambda x: x[1], reverse=True)
173
+
174
+ # Build consolidated content
175
+ content_parts = []
176
+ for i, (mem, salience) in enumerate(scored):
177
+ if i < 2:
178
+ # Top 2: full content
179
+ content_parts.append(mem['content'])
180
+ else:
181
+ # Rest: truncated summary
182
+ truncated = mem['content'][:100]
183
+ if len(mem['content']) > 100:
184
+ truncated += '...'
185
+ content_parts.append(f"[Related] {truncated}")
186
+
187
+ consolidated_content = '\n\n---\n\n'.join(content_parts)
188
+
189
+ # Weighted average embedding
190
+ embeddings = []
191
+ weights = []
192
+ for mem, salience in scored:
193
+ emb = self.db._deserialize_embedding(mem.get('embedding', ''))
194
+ if emb:
195
+ embeddings.append(emb)
196
+ weights.append(salience)
197
+
198
+ consolidated_embedding = None
199
+ if embeddings:
200
+ emb_array = np.array(embeddings, dtype=np.float32)
201
+ weight_array = np.array(weights, dtype=np.float32)
202
+ weight_array = weight_array / weight_array.sum() # Normalize
203
+ consolidated_embedding = (emb_array * weight_array[:, np.newaxis]).sum(axis=0).tolist()
204
+
205
+ # Best metadata from group
206
+ best_mem, best_salience = scored[0]
207
+ best_importance = max(m.get('importance', 5) or 5 for m, _ in scored)
208
+ best_confidence = max(m.get('confidence', 0.5) or 0.5 for m, _ in scored)
209
+ best_outcome = None
210
+ for m, _ in scored:
211
+ if m.get('outcome_status') == 'success':
212
+ best_outcome = 'success'
213
+ break
214
+ elif m.get('outcome_status') == 'partial':
215
+ best_outcome = 'partial'
216
+
217
+ source_ids = [m['id'] for m, _ in scored]
218
+
219
+ # Merge metadata
220
+ merged_metadata = {
221
+ 'consolidated': True,
222
+ 'source_ids': source_ids,
223
+ 'consolidation_strategy': 'salient_compression',
224
+ 'consolidated_at': datetime.now().isoformat(),
225
+ 'group_size': len(group),
226
+ 'salience_scores': {m['id']: s for m, s in scored}
227
+ }
228
+
229
+ # Insert consolidated memory
230
+ cursor = self.db.conn.cursor()
231
+ try:
232
+ embedding_str = self.db._serialize_embedding(consolidated_embedding) if consolidated_embedding else None
233
+
234
+ cursor.execute("""
235
+ INSERT INTO memories
236
+ (type, content, embedding, project_path, importance, confidence,
237
+ metadata, outcome_status, tier, tier_changed_at, tags, outcome)
238
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'warm', ?, ?, ?)
239
+ """, (
240
+ best_mem.get('type', 'chunk'),
241
+ consolidated_content,
242
+ embedding_str,
243
+ best_mem.get('project_path'),
244
+ best_importance,
245
+ best_confidence,
246
+ json.dumps(merged_metadata),
247
+ best_outcome or 'pending',
248
+ datetime.now().isoformat(),
249
+ best_mem.get('tags'),
250
+ best_mem.get('outcome')
251
+ ))
252
+
253
+ consolidated_id = cursor.lastrowid
254
+
255
+ # Archive originals
256
+ for mem, salience in scored:
257
+ cursor.execute("""
258
+ INSERT INTO memory_archive
259
+ (original_id, type, content, embedding, project_path, session_id,
260
+ importance, access_count, decay_factor, metadata,
261
+ archive_reason, consolidated_into)
262
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'consolidated', ?)
263
+ """, (
264
+ mem['id'], mem.get('type'), mem['content'], mem.get('embedding'),
265
+ mem.get('project_path'), mem.get('session_id'),
266
+ mem.get('importance'), mem.get('access_count'),
267
+ mem.get('decay_factor'), mem.get('metadata'),
268
+ consolidated_id
269
+ ))
270
+
271
+ # Delete original from active memories
272
+ cursor.execute("DELETE FROM memories WHERE id = ?", (mem['id'],))
273
+
274
+ self.db.conn.commit()
275
+
276
+ # Add to FAISS index if available
277
+ if consolidated_embedding and hasattr(self.db, '_memories_index') and self.db._memories_index:
278
+ self.db._memories_index.add(consolidated_id, consolidated_embedding)
279
+
280
+ logger.info(
281
+ f"Consolidated {len(group)} memories into memory {consolidated_id} "
282
+ f"(sources: {source_ids})"
283
+ )
284
+
285
+ return {
286
+ 'consolidated_id': consolidated_id,
287
+ 'source_ids': source_ids,
288
+ 'group_size': len(group),
289
+ 'content_length': len(consolidated_content),
290
+ 'best_importance': best_importance,
291
+ 'best_confidence': best_confidence
292
+ }
293
+
294
+ except Exception as e:
295
+ self.db.conn.rollback()
296
+ logger.error(f"Failed to consolidate group: {e}")
297
+ return None
298
+
299
+ async def run_consolidation(self) -> Dict[str, Any]:
300
+ """Run a consolidation pass.
301
+
302
+ Finds candidates and consolidates up to max_per_run groups.
303
+
304
+ Returns:
305
+ Dict with consolidation statistics
306
+ """
307
+ groups = await self.find_consolidation_candidates()
308
+
309
+ results = {
310
+ 'candidates_found': len(groups),
311
+ 'consolidated': 0,
312
+ 'memories_archived': 0,
313
+ 'consolidations': [],
314
+ 'timestamp': datetime.now().isoformat()
315
+ }
316
+
317
+ for group in groups[:self.max_per_run]:
318
+ result = await self.consolidate_group(group)
319
+ if result:
320
+ results['consolidated'] += 1
321
+ results['memories_archived'] += result['group_size']
322
+ results['consolidations'].append(result)
323
+
324
+ return results
325
+
326
+ async def deconsolidate(self, consolidated_id: int) -> Dict[str, Any]:
327
+ """Restore original memories from a consolidated memory.
328
+
329
+ Args:
330
+ consolidated_id: ID of the consolidated memory
331
+
332
+ Returns:
333
+ Dict with restoration details
334
+ """
335
+ cursor = self.db.conn.cursor()
336
+
337
+ # Find archived originals
338
+ cursor.execute("""
339
+ SELECT * FROM memory_archive
340
+ WHERE consolidated_into = ?
341
+ """, (consolidated_id,))
342
+
343
+ archived = cursor.fetchall()
344
+ if not archived:
345
+ return {'success': False, 'error': 'No archived memories found for this consolidation'}
346
+
347
+ restored_ids = []
348
+ try:
349
+ for row in archived:
350
+ # Restore to memories table
351
+ cursor.execute("""
352
+ INSERT INTO memories
353
+ (type, content, embedding, project_path, session_id,
354
+ importance, access_count, decay_factor, metadata, tier)
355
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 'warm')
356
+ """, (
357
+ row['type'], row['content'], row['embedding'],
358
+ row['project_path'], row['session_id'],
359
+ row['importance'], row['access_count'],
360
+ row['decay_factor'], row['metadata']
361
+ ))
362
+ restored_ids.append(cursor.lastrowid)
363
+
364
+ # Remove from archive
365
+ cursor.execute("DELETE FROM memory_archive WHERE id = ?", (row['id'],))
366
+
367
+ # Delete the consolidated memory
368
+ cursor.execute("DELETE FROM memories WHERE id = ?", (consolidated_id,))
369
+
370
+ self.db.conn.commit()
371
+
372
+ return {
373
+ 'success': True,
374
+ 'consolidated_id': consolidated_id,
375
+ 'restored_count': len(restored_ids),
376
+ 'restored_ids': restored_ids
377
+ }
378
+
379
+ except Exception as e:
380
+ self.db.conn.rollback()
381
+ logger.error(f"Failed to deconsolidate memory {consolidated_id}: {e}")
382
+ return {'success': False, 'error': str(e)}
383
+
384
+ async def get_consolidation_stats(self) -> Dict[str, Any]:
385
+ """Get statistics about consolidation activity."""
386
+ cursor = self.db.conn.cursor()
387
+
388
+ # Count consolidated memories
389
+ cursor.execute("""
390
+ SELECT COUNT(*) as count FROM memories
391
+ WHERE metadata LIKE '%"consolidated": true%'
392
+ """)
393
+ consolidated_count = cursor.fetchone()['count']
394
+
395
+ # Count archived by consolidation
396
+ cursor.execute("""
397
+ SELECT COUNT(*) as count FROM memory_archive
398
+ WHERE archive_reason = 'consolidated'
399
+ """)
400
+ archived_count = cursor.fetchone()['count']
401
+
402
+ # Average group size
403
+ cursor.execute("""
404
+ SELECT AVG(json_extract(metadata, '$.group_size')) as avg_group_size
405
+ FROM memories
406
+ WHERE metadata LIKE '%"consolidated": true%'
407
+ """)
408
+ row = cursor.fetchone()
409
+ avg_group_size = round(row['avg_group_size'] or 0, 1)
410
+
411
+ return {
412
+ 'consolidated_memories': consolidated_count,
413
+ 'archived_originals': archived_count,
414
+ 'avg_group_size': avg_group_size,
415
+ 'space_savings_estimate': f"{archived_count - consolidated_count} memories removed",
416
+ 'timestamp': datetime.now().isoformat()
417
+ }