claude-memory-agent 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +11 -1
- package/bin/lib/banner.js +39 -0
- package/bin/lib/environment.js +166 -0
- package/bin/lib/installer.js +291 -0
- package/bin/lib/models.js +95 -0
- package/bin/lib/steps/advanced.js +101 -0
- package/bin/lib/steps/confirm.js +87 -0
- package/bin/lib/steps/model.js +57 -0
- package/bin/lib/steps/provider.js +65 -0
- package/bin/lib/steps/scope.js +59 -0
- package/bin/lib/steps/server.js +74 -0
- package/bin/lib/ui.js +75 -0
- package/bin/onboarding.js +164 -0
- package/bin/postinstall.js +22 -257
- package/config.py +103 -4
- package/dashboard.html +697 -27
- package/hooks/extract_memories.py +439 -0
- package/hooks/pre_compact_hook.py +76 -0
- package/hooks/session_end_hook.py +149 -0
- package/hooks/stop_hook.py +372 -0
- package/install.py +85 -32
- package/main.py +1636 -892
- package/mcp_server.py +451 -0
- package/package.json +14 -3
- package/requirements.txt +12 -8
- package/services/adaptive_ranker.py +272 -0
- package/services/agent_catalog.json +153 -0
- package/services/agent_registry.py +245 -730
- package/services/claude_md_sync.py +320 -4
- package/services/consolidation.py +417 -0
- package/services/database.py +586 -105
- package/services/embedding_pipeline.py +262 -0
- package/services/embeddings.py +493 -85
- package/services/memory_decay.py +408 -0
- package/services/native_memory_paths.py +86 -0
- package/services/native_memory_sync.py +496 -0
- package/services/response_manager.py +183 -0
- package/services/terminal_ui.py +199 -0
- package/services/tier_manager.py +235 -0
- package/services/websocket.py +26 -6
- package/skills/search.py +136 -61
- package/skills/session_review.py +210 -23
- package/skills/store.py +125 -18
- package/terminal_dashboard.py +474 -0
- package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
- package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
- package/hooks/__pycache__/grounding-hook.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
- package/services/__pycache__/__init__.cpython-312.pyc +0 -0
- package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
- package/services/__pycache__/auth.cpython-312.pyc +0 -0
- package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
- package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
- package/services/__pycache__/confidence.cpython-312.pyc +0 -0
- package/services/__pycache__/curator.cpython-312.pyc +0 -0
- package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
- package/services/__pycache__/database.cpython-312.pyc +0 -0
- package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
- package/services/__pycache__/insights.cpython-312.pyc +0 -0
- package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
- package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
- package/services/__pycache__/timeline.cpython-312.pyc +0 -0
- package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
- package/services/__pycache__/websocket.cpython-312.pyc +0 -0
- package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/__pycache__/admin.cpython-312.pyc +0 -0
- package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
- package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
- package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/skills/__pycache__/confidence_tracker.cpython-312.pyc +0 -0
- package/skills/__pycache__/context.cpython-312.pyc +0 -0
- package/skills/__pycache__/curator.cpython-312.pyc +0 -0
- package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
- package/skills/__pycache__/insights.cpython-312.pyc +0 -0
- package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
- package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
- package/skills/__pycache__/search.cpython-312.pyc +0 -0
- package/skills/__pycache__/session_review.cpython-312.pyc +0 -0
- package/skills/__pycache__/state.cpython-312.pyc +0 -0
- package/skills/__pycache__/store.cpython-312.pyc +0 -0
- package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
- package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
- package/skills/__pycache__/verification.cpython-312.pyc +0 -0
- package/test_automation.py +0 -221
- package/test_complete.py +0 -338
- package/test_full.py +0 -322
- package/verify_db.py +0 -134
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""Embedding Pipeline - LRU cache, batch generation, and pre-computation.
|
|
2
|
+
|
|
3
|
+
CLaRa-inspired pre-processing pipeline for embeddings:
|
|
4
|
+
1. LRU cache for query embeddings (common searches return instantly)
|
|
5
|
+
2. Batch generation via asyncio.gather for throughput
|
|
6
|
+
3. Background pre-computation for memories missing embeddings
|
|
7
|
+
"""
|
|
8
|
+
import hashlib
|
|
9
|
+
import logging
|
|
10
|
+
import asyncio
|
|
11
|
+
from collections import OrderedDict
|
|
12
|
+
from typing import List, Optional, Dict, Any
|
|
13
|
+
|
|
14
|
+
from config import config
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EmbeddingCache:
|
|
20
|
+
"""LRU cache for embedding queries.
|
|
21
|
+
|
|
22
|
+
MD5 hash of text -> embedding vector.
|
|
23
|
+
~1.5MB footprint for 500 entries at 768 dimensions.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, max_size: int = None):
|
|
27
|
+
self.max_size = max_size or config.EMBEDDING_CACHE_SIZE
|
|
28
|
+
self._cache: OrderedDict[str, List[float]] = OrderedDict()
|
|
29
|
+
self._hits = 0
|
|
30
|
+
self._misses = 0
|
|
31
|
+
|
|
32
|
+
def _key(self, text: str) -> str:
|
|
33
|
+
"""Generate cache key from text."""
|
|
34
|
+
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
|
35
|
+
|
|
36
|
+
def get(self, text: str) -> Optional[List[float]]:
|
|
37
|
+
"""Get cached embedding for text.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
text: Input text
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Cached embedding or None
|
|
44
|
+
"""
|
|
45
|
+
key = self._key(text)
|
|
46
|
+
if key in self._cache:
|
|
47
|
+
self._hits += 1
|
|
48
|
+
# Move to end (most recently used)
|
|
49
|
+
self._cache.move_to_end(key)
|
|
50
|
+
return self._cache[key]
|
|
51
|
+
self._misses += 1
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
def put(self, text: str, embedding: List[float]):
|
|
55
|
+
"""Cache an embedding.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
text: Input text
|
|
59
|
+
embedding: Embedding vector
|
|
60
|
+
"""
|
|
61
|
+
key = self._key(text)
|
|
62
|
+
if key in self._cache:
|
|
63
|
+
self._cache.move_to_end(key)
|
|
64
|
+
else:
|
|
65
|
+
if len(self._cache) >= self.max_size:
|
|
66
|
+
self._cache.popitem(last=False) # Remove oldest
|
|
67
|
+
self._cache[key] = embedding
|
|
68
|
+
|
|
69
|
+
def clear(self):
|
|
70
|
+
"""Clear the cache."""
|
|
71
|
+
self._cache.clear()
|
|
72
|
+
self._hits = 0
|
|
73
|
+
self._misses = 0
|
|
74
|
+
|
|
75
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
76
|
+
"""Get cache statistics."""
|
|
77
|
+
total = self._hits + self._misses
|
|
78
|
+
return {
|
|
79
|
+
'size': len(self._cache),
|
|
80
|
+
'max_size': self.max_size,
|
|
81
|
+
'hits': self._hits,
|
|
82
|
+
'misses': self._misses,
|
|
83
|
+
'hit_rate': round(self._hits / total, 4) if total > 0 else 0.0,
|
|
84
|
+
'estimated_memory_mb': round(len(self._cache) * 768 * 4 / 1024 / 1024, 2)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class EmbeddingPipeline:
|
|
89
|
+
"""Manages embedding generation with caching and batch processing.
|
|
90
|
+
|
|
91
|
+
Wraps an EmbeddingService with:
|
|
92
|
+
- LRU query cache
|
|
93
|
+
- Batch generation via asyncio.gather
|
|
94
|
+
- Background pre-computation for missing embeddings
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(self, embedding_service, db=None):
|
|
98
|
+
self.embedding_service = embedding_service
|
|
99
|
+
self.db = db
|
|
100
|
+
self.cache = EmbeddingCache()
|
|
101
|
+
self.batch_size = config.EMBEDDING_BATCH_SIZE
|
|
102
|
+
self._precompute_running = False
|
|
103
|
+
|
|
104
|
+
async def generate_embedding(self, text: str) -> Optional[List[float]]:
|
|
105
|
+
"""Generate embedding with LRU cache.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
text: Text to embed
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Embedding vector or None if service unavailable
|
|
112
|
+
"""
|
|
113
|
+
# Check cache first
|
|
114
|
+
cached = self.cache.get(text)
|
|
115
|
+
if cached is not None:
|
|
116
|
+
return cached
|
|
117
|
+
|
|
118
|
+
# Generate new embedding
|
|
119
|
+
embedding = await self.embedding_service.generate_embedding(text)
|
|
120
|
+
|
|
121
|
+
# Cache if successful
|
|
122
|
+
if embedding is not None:
|
|
123
|
+
self.cache.put(text, embedding)
|
|
124
|
+
|
|
125
|
+
return embedding
|
|
126
|
+
|
|
127
|
+
async def generate_embeddings_batch(self, texts: List[str]) -> List[Optional[List[float]]]:
|
|
128
|
+
"""Generate embeddings for multiple texts using concurrent batches.
|
|
129
|
+
|
|
130
|
+
Processes in batches of self.batch_size, each batch runs concurrently
|
|
131
|
+
via asyncio.gather against Ollama.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
texts: List of texts to embed
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of embeddings (or None for failed ones)
|
|
138
|
+
"""
|
|
139
|
+
results = [None] * len(texts)
|
|
140
|
+
|
|
141
|
+
# Check cache for existing embeddings
|
|
142
|
+
uncached_indices = []
|
|
143
|
+
for i, text in enumerate(texts):
|
|
144
|
+
cached = self.cache.get(text)
|
|
145
|
+
if cached is not None:
|
|
146
|
+
results[i] = cached
|
|
147
|
+
else:
|
|
148
|
+
uncached_indices.append(i)
|
|
149
|
+
|
|
150
|
+
if not uncached_indices:
|
|
151
|
+
return results
|
|
152
|
+
|
|
153
|
+
# Process uncached texts in concurrent batches
|
|
154
|
+
for batch_start in range(0, len(uncached_indices), self.batch_size):
|
|
155
|
+
batch_indices = uncached_indices[batch_start:batch_start + self.batch_size]
|
|
156
|
+
batch_texts = [texts[i] for i in batch_indices]
|
|
157
|
+
|
|
158
|
+
# Generate all in parallel
|
|
159
|
+
batch_results = await asyncio.gather(
|
|
160
|
+
*[self.embedding_service.generate_embedding(text) for text in batch_texts],
|
|
161
|
+
return_exceptions=True
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
for idx, emb_result in zip(batch_indices, batch_results):
|
|
165
|
+
if isinstance(emb_result, Exception):
|
|
166
|
+
logger.debug(f"Batch embedding failed for index {idx}: {emb_result}")
|
|
167
|
+
continue
|
|
168
|
+
if emb_result is not None:
|
|
169
|
+
results[idx] = emb_result
|
|
170
|
+
self.cache.put(texts[idx], emb_result)
|
|
171
|
+
|
|
172
|
+
return results
|
|
173
|
+
|
|
174
|
+
async def precompute_missing_embeddings(self) -> Dict[str, Any]:
|
|
175
|
+
"""Background task: find memories with NULL embeddings and generate them.
|
|
176
|
+
|
|
177
|
+
Only runs when Ollama is healthy (not degraded).
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Dict with precomputation stats
|
|
181
|
+
"""
|
|
182
|
+
if self._precompute_running:
|
|
183
|
+
return {'skipped': True, 'reason': 'already_running'}
|
|
184
|
+
|
|
185
|
+
if self.embedding_service.is_degraded():
|
|
186
|
+
return {'skipped': True, 'reason': 'ollama_degraded'}
|
|
187
|
+
|
|
188
|
+
if not self.db:
|
|
189
|
+
return {'skipped': True, 'reason': 'no_db'}
|
|
190
|
+
|
|
191
|
+
self._precompute_running = True
|
|
192
|
+
try:
|
|
193
|
+
cursor = self.db.conn.cursor()
|
|
194
|
+
|
|
195
|
+
# Find memories with missing embeddings
|
|
196
|
+
cursor.execute("""
|
|
197
|
+
SELECT id, content FROM memories
|
|
198
|
+
WHERE embedding IS NULL
|
|
199
|
+
ORDER BY importance DESC, created_at DESC
|
|
200
|
+
LIMIT ?
|
|
201
|
+
""", (self.batch_size * 5,)) # Process up to 50 at a time
|
|
202
|
+
|
|
203
|
+
rows = cursor.fetchall()
|
|
204
|
+
if not rows:
|
|
205
|
+
return {'generated': 0, 'message': 'all_embeddings_present'}
|
|
206
|
+
|
|
207
|
+
# Generate embeddings in batch
|
|
208
|
+
texts = [row['content'] for row in rows]
|
|
209
|
+
embeddings = await self.generate_embeddings_batch(texts)
|
|
210
|
+
|
|
211
|
+
# Update database
|
|
212
|
+
updated = 0
|
|
213
|
+
for row, emb in zip(rows, embeddings):
|
|
214
|
+
if emb is not None:
|
|
215
|
+
emb_str = self.db._serialize_embedding(emb)
|
|
216
|
+
cursor.execute(
|
|
217
|
+
"UPDATE memories SET embedding = ? WHERE id = ?",
|
|
218
|
+
(emb_str, row['id'])
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Add to FAISS index if available
|
|
222
|
+
if hasattr(self.db, '_memories_index') and self.db._memories_index:
|
|
223
|
+
self.db._memories_index.add(row['id'], emb)
|
|
224
|
+
|
|
225
|
+
updated += 1
|
|
226
|
+
|
|
227
|
+
if updated:
|
|
228
|
+
self.db.conn.commit()
|
|
229
|
+
|
|
230
|
+
return {
|
|
231
|
+
'found_missing': len(rows),
|
|
232
|
+
'generated': updated,
|
|
233
|
+
'failed': len(rows) - updated
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
except Exception as e:
|
|
237
|
+
logger.error(f"Precompute failed: {e}")
|
|
238
|
+
return {'error': str(e)}
|
|
239
|
+
|
|
240
|
+
finally:
|
|
241
|
+
self._precompute_running = False
|
|
242
|
+
|
|
243
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
244
|
+
"""Get pipeline statistics."""
|
|
245
|
+
return {
|
|
246
|
+
'cache': self.cache.get_stats(),
|
|
247
|
+
'batch_size': self.batch_size,
|
|
248
|
+
'precompute_running': self._precompute_running,
|
|
249
|
+
'service_degraded': self.embedding_service.is_degraded()
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# Global pipeline instance
|
|
254
|
+
_pipeline: Optional[EmbeddingPipeline] = None
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def get_embedding_pipeline(embedding_service=None, db=None) -> EmbeddingPipeline:
|
|
258
|
+
"""Get or create the global embedding pipeline."""
|
|
259
|
+
global _pipeline
|
|
260
|
+
if _pipeline is None and embedding_service:
|
|
261
|
+
_pipeline = EmbeddingPipeline(embedding_service, db)
|
|
262
|
+
return _pipeline
|