claude-memory-agent 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +107 -0
- package/README.md +200 -0
- package/agent_card.py +512 -0
- package/bin/cli.js +181 -0
- package/bin/postinstall.js +216 -0
- package/config.py +104 -0
- package/dashboard.html +2689 -0
- package/hooks/README.md +196 -0
- package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
- package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
- package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
- package/hooks/auto-detect-response.py +348 -0
- package/hooks/auto_capture.py +255 -0
- package/hooks/detect-correction.py +173 -0
- package/hooks/grounding-hook.py +348 -0
- package/hooks/log-tool-use.py +234 -0
- package/hooks/log-user-request.py +208 -0
- package/hooks/pre-tool-decision.py +218 -0
- package/hooks/problem-detector.py +343 -0
- package/hooks/session_end.py +192 -0
- package/hooks/session_start.py +227 -0
- package/install.py +887 -0
- package/main.py +2859 -0
- package/manager.py +997 -0
- package/package.json +55 -0
- package/requirements.txt +8 -0
- package/run_server.py +136 -0
- package/services/__init__.py +50 -0
- package/services/__pycache__/__init__.cpython-312.pyc +0 -0
- package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
- package/services/__pycache__/auth.cpython-312.pyc +0 -0
- package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
- package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
- package/services/__pycache__/confidence.cpython-312.pyc +0 -0
- package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
- package/services/__pycache__/database.cpython-312.pyc +0 -0
- package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
- package/services/__pycache__/insights.cpython-312.pyc +0 -0
- package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
- package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
- package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
- package/services/__pycache__/timeline.cpython-312.pyc +0 -0
- package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
- package/services/__pycache__/websocket.cpython-312.pyc +0 -0
- package/services/agent_registry.py +753 -0
- package/services/auth.py +331 -0
- package/services/auto_inject.py +250 -0
- package/services/claude_md_sync.py +275 -0
- package/services/cleanup.py +667 -0
- package/services/compaction_flush.py +447 -0
- package/services/confidence.py +301 -0
- package/services/daily_log.py +333 -0
- package/services/database.py +2485 -0
- package/services/embeddings.py +358 -0
- package/services/insights.py +632 -0
- package/services/llm_analyzer.py +595 -0
- package/services/memory_md_sync.py +409 -0
- package/services/retry_queue.py +453 -0
- package/services/timeline.py +579 -0
- package/services/vector_index.py +398 -0
- package/services/websocket.py +257 -0
- package/skills/__init__.py +6 -0
- package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
- package/skills/__pycache__/admin.cpython-312.pyc +0 -0
- package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
- package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
- package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
- package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
- package/skills/__pycache__/insights.cpython-312.pyc +0 -0
- package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
- package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
- package/skills/__pycache__/search.cpython-312.pyc +0 -0
- package/skills/__pycache__/state.cpython-312.pyc +0 -0
- package/skills/__pycache__/store.cpython-312.pyc +0 -0
- package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
- package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
- package/skills/__pycache__/verification.cpython-312.pyc +0 -0
- package/skills/admin.py +469 -0
- package/skills/checkpoint.py +198 -0
- package/skills/claude_md.py +363 -0
- package/skills/cleanup.py +241 -0
- package/skills/grounding.py +801 -0
- package/skills/insights.py +231 -0
- package/skills/natural_language.py +277 -0
- package/skills/retrieve.py +67 -0
- package/skills/search.py +213 -0
- package/skills/state.py +182 -0
- package/skills/store.py +179 -0
- package/skills/summarize.py +588 -0
- package/skills/timeline.py +387 -0
- package/skills/verification.py +391 -0
- package/start_daemon.py +155 -0
- package/test_automation.py +221 -0
- package/test_complete.py +338 -0
- package/test_full.py +322 -0
- package/update_system.py +817 -0
- package/verify_db.py +134 -0
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
"""Embedding service using Ollama with multi-model support.
|
|
2
|
+
|
|
3
|
+
Includes health checks, graceful degradation, and model switching capabilities.
|
|
4
|
+
"""
|
|
5
|
+
import os
|
|
6
|
+
import time
|
|
7
|
+
import asyncio
|
|
8
|
+
from typing import List, Optional, Dict, Any
|
|
9
|
+
import ollama
|
|
10
|
+
from dotenv import load_dotenv
|
|
11
|
+
|
|
12
|
+
load_dotenv()
|
|
13
|
+
|
|
14
|
+
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
|
15
|
+
DEFAULT_MODEL = os.getenv("EMBEDDING_MODEL", "nomic-embed-text")
|
|
16
|
+
HEALTH_CHECK_TIMEOUT = float(os.getenv("OLLAMA_HEALTH_TIMEOUT", "2.0"))
|
|
17
|
+
HEALTH_CACHE_TTL = float(os.getenv("OLLAMA_HEALTH_CACHE_TTL", "30.0"))
|
|
18
|
+
|
|
19
|
+
# Model configurations: model_name -> dimension
|
|
20
|
+
MODEL_CONFIGS = {
|
|
21
|
+
"nomic-embed-text": {"dimension": 768, "description": "General purpose, fast"},
|
|
22
|
+
"mxbai-embed-large": {"dimension": 1024, "description": "Higher quality, larger"},
|
|
23
|
+
"all-minilm": {"dimension": 384, "description": "Lightweight, fast"},
|
|
24
|
+
"snowflake-arctic-embed": {"dimension": 1024, "description": "High quality, multilingual"},
|
|
25
|
+
"bge-m3": {"dimension": 1024, "description": "Multilingual, dense retrieval"},
|
|
26
|
+
"default": {"alias_for": "nomic-embed-text"},
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class EmbeddingService:
|
|
31
|
+
"""Service for generating embeddings using Ollama with multi-model support.
|
|
32
|
+
|
|
33
|
+
Features:
|
|
34
|
+
- Multiple model support with automatic dimension handling
|
|
35
|
+
- Health check with caching to avoid hammering Ollama
|
|
36
|
+
- Graceful degradation: returns None when Ollama unavailable
|
|
37
|
+
- Timeout handling for unresponsive Ollama instances
|
|
38
|
+
- Model switching without data loss
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, model: Optional[str] = None):
|
|
42
|
+
self.host = OLLAMA_HOST
|
|
43
|
+
self.client = ollama.Client(host=OLLAMA_HOST)
|
|
44
|
+
|
|
45
|
+
# Resolve model (handle aliases)
|
|
46
|
+
self.model = self._resolve_model(model or DEFAULT_MODEL)
|
|
47
|
+
self._model_config = self._get_model_config(self.model)
|
|
48
|
+
|
|
49
|
+
# Health check caching
|
|
50
|
+
self._health_status: Optional[bool] = None
|
|
51
|
+
self._health_last_check: float = 0
|
|
52
|
+
self._health_cache_ttl = HEALTH_CACHE_TTL
|
|
53
|
+
self._health_error: Optional[str] = None
|
|
54
|
+
|
|
55
|
+
# Degraded mode tracking
|
|
56
|
+
self._degraded_mode = False
|
|
57
|
+
self._degraded_since: Optional[float] = None
|
|
58
|
+
|
|
59
|
+
# Available models cache
|
|
60
|
+
self._available_models: Optional[List[str]] = None
|
|
61
|
+
self._models_last_check: float = 0
|
|
62
|
+
|
|
63
|
+
def _resolve_model(self, model: str) -> str:
|
|
64
|
+
"""Resolve model aliases to actual model names."""
|
|
65
|
+
config = MODEL_CONFIGS.get(model, {})
|
|
66
|
+
if "alias_for" in config:
|
|
67
|
+
return config["alias_for"]
|
|
68
|
+
return model
|
|
69
|
+
|
|
70
|
+
def _get_model_config(self, model: str) -> Dict[str, Any]:
|
|
71
|
+
"""Get configuration for a model."""
|
|
72
|
+
if model in MODEL_CONFIGS:
|
|
73
|
+
return MODEL_CONFIGS[model]
|
|
74
|
+
# Default config for unknown models
|
|
75
|
+
return {"dimension": 768, "description": "Unknown model"}
|
|
76
|
+
|
|
77
|
+
async def check_health(self, force: bool = False) -> Dict[str, Any]:
|
|
78
|
+
"""Check if Ollama is healthy and responsive.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
force: If True, bypass cache and check immediately
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Dict with 'healthy', 'model_loaded', 'latency_ms', 'error' keys
|
|
85
|
+
"""
|
|
86
|
+
now = time.time()
|
|
87
|
+
|
|
88
|
+
# Return cached result if within TTL
|
|
89
|
+
if not force and self._health_status is not None:
|
|
90
|
+
if (now - self._health_last_check) < self._health_cache_ttl:
|
|
91
|
+
return {
|
|
92
|
+
"healthy": self._health_status,
|
|
93
|
+
"cached": True,
|
|
94
|
+
"model": self.model,
|
|
95
|
+
"host": self.host,
|
|
96
|
+
"error": self._health_error,
|
|
97
|
+
"degraded_mode": self._degraded_mode
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
# Perform health check with timeout
|
|
101
|
+
start_time = time.time()
|
|
102
|
+
try:
|
|
103
|
+
loop = asyncio.get_event_loop()
|
|
104
|
+
|
|
105
|
+
def _check():
|
|
106
|
+
# Try to list models to verify Ollama is responding
|
|
107
|
+
models = self.client.list()
|
|
108
|
+
model_names = [m.get('name', m.get('model', '')) for m in models.get('models', [])]
|
|
109
|
+
# Check if our model is available
|
|
110
|
+
model_loaded = any(self.model in name for name in model_names)
|
|
111
|
+
return models, model_loaded, model_names
|
|
112
|
+
|
|
113
|
+
# Run with timeout
|
|
114
|
+
models, model_loaded, model_names = await asyncio.wait_for(
|
|
115
|
+
loop.run_in_executor(None, _check),
|
|
116
|
+
timeout=HEALTH_CHECK_TIMEOUT
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
120
|
+
|
|
121
|
+
self._health_status = True
|
|
122
|
+
self._health_last_check = now
|
|
123
|
+
self._health_error = None
|
|
124
|
+
self._available_models = model_names
|
|
125
|
+
self._models_last_check = now
|
|
126
|
+
|
|
127
|
+
# Exit degraded mode if we were in it
|
|
128
|
+
if self._degraded_mode:
|
|
129
|
+
self._degraded_mode = False
|
|
130
|
+
self._degraded_since = None
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
"healthy": True,
|
|
134
|
+
"cached": False,
|
|
135
|
+
"model": self.model,
|
|
136
|
+
"model_loaded": model_loaded,
|
|
137
|
+
"host": self.host,
|
|
138
|
+
"latency_ms": round(latency_ms, 2),
|
|
139
|
+
"error": None,
|
|
140
|
+
"degraded_mode": False,
|
|
141
|
+
"available_models": model_names
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
except asyncio.TimeoutError:
|
|
145
|
+
self._health_status = False
|
|
146
|
+
self._health_last_check = now
|
|
147
|
+
self._health_error = f"Timeout after {HEALTH_CHECK_TIMEOUT}s"
|
|
148
|
+
self._enter_degraded_mode()
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
"healthy": False,
|
|
152
|
+
"cached": False,
|
|
153
|
+
"model": self.model,
|
|
154
|
+
"host": self.host,
|
|
155
|
+
"error": self._health_error,
|
|
156
|
+
"degraded_mode": True
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
except Exception as e:
|
|
160
|
+
self._health_status = False
|
|
161
|
+
self._health_last_check = now
|
|
162
|
+
self._health_error = str(e)
|
|
163
|
+
self._enter_degraded_mode()
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
"healthy": False,
|
|
167
|
+
"cached": False,
|
|
168
|
+
"model": self.model,
|
|
169
|
+
"host": self.host,
|
|
170
|
+
"error": self._health_error,
|
|
171
|
+
"degraded_mode": True
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
def _enter_degraded_mode(self):
|
|
175
|
+
"""Enter degraded mode when Ollama is unavailable."""
|
|
176
|
+
if not self._degraded_mode:
|
|
177
|
+
self._degraded_mode = True
|
|
178
|
+
self._degraded_since = time.time()
|
|
179
|
+
|
|
180
|
+
def is_degraded(self) -> bool:
|
|
181
|
+
"""Check if service is in degraded mode."""
|
|
182
|
+
return self._degraded_mode
|
|
183
|
+
|
|
184
|
+
def get_degraded_duration(self) -> Optional[float]:
|
|
185
|
+
"""Get how long service has been in degraded mode."""
|
|
186
|
+
if self._degraded_since:
|
|
187
|
+
return time.time() - self._degraded_since
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
async def generate_embedding(
|
|
191
|
+
self,
|
|
192
|
+
text: str,
|
|
193
|
+
model: Optional[str] = None,
|
|
194
|
+
fallback_on_error: bool = True
|
|
195
|
+
) -> Optional[List[float]]:
|
|
196
|
+
"""Generate embedding for a single text.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
text: Text to embed
|
|
200
|
+
model: Optional model override (uses default if not specified)
|
|
201
|
+
fallback_on_error: If True, return None instead of raising on error
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of floats (embedding) or None if Ollama unavailable and fallback enabled
|
|
205
|
+
"""
|
|
206
|
+
use_model = self._resolve_model(model) if model else self.model
|
|
207
|
+
|
|
208
|
+
# Quick check if we're in degraded mode
|
|
209
|
+
if self._degraded_mode:
|
|
210
|
+
# Check if we should retry (every 30s)
|
|
211
|
+
if time.time() - self._health_last_check >= self._health_cache_ttl:
|
|
212
|
+
health = await self.check_health(force=True)
|
|
213
|
+
if not health["healthy"]:
|
|
214
|
+
if fallback_on_error:
|
|
215
|
+
return None
|
|
216
|
+
raise ConnectionError(f"Ollama unavailable: {health['error']}")
|
|
217
|
+
elif fallback_on_error:
|
|
218
|
+
return None
|
|
219
|
+
else:
|
|
220
|
+
raise ConnectionError(f"Ollama unavailable (degraded mode): {self._health_error}")
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
loop = asyncio.get_event_loop()
|
|
224
|
+
|
|
225
|
+
def _embed():
|
|
226
|
+
response = self.client.embeddings(model=use_model, prompt=text)
|
|
227
|
+
return response["embedding"]
|
|
228
|
+
|
|
229
|
+
# Run with timeout
|
|
230
|
+
embedding = await asyncio.wait_for(
|
|
231
|
+
loop.run_in_executor(None, _embed),
|
|
232
|
+
timeout=30.0 # 30s timeout for embedding generation
|
|
233
|
+
)
|
|
234
|
+
return embedding
|
|
235
|
+
|
|
236
|
+
except asyncio.TimeoutError:
|
|
237
|
+
self._enter_degraded_mode()
|
|
238
|
+
self._health_error = "Embedding generation timed out"
|
|
239
|
+
if fallback_on_error:
|
|
240
|
+
return None
|
|
241
|
+
raise
|
|
242
|
+
|
|
243
|
+
except Exception as e:
|
|
244
|
+
# Check if it's a connection error
|
|
245
|
+
error_str = str(e).lower()
|
|
246
|
+
if "connection" in error_str or "refused" in error_str or "timeout" in error_str:
|
|
247
|
+
self._enter_degraded_mode()
|
|
248
|
+
self._health_error = str(e)
|
|
249
|
+
|
|
250
|
+
if fallback_on_error:
|
|
251
|
+
return None
|
|
252
|
+
raise
|
|
253
|
+
|
|
254
|
+
async def generate_embeddings(
|
|
255
|
+
self,
|
|
256
|
+
texts: List[str],
|
|
257
|
+
model: Optional[str] = None,
|
|
258
|
+
fallback_on_error: bool = True
|
|
259
|
+
) -> List[Optional[List[float]]]:
|
|
260
|
+
"""Generate embeddings for multiple texts.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
texts: List of texts to embed
|
|
264
|
+
model: Optional model override
|
|
265
|
+
fallback_on_error: If True, include None for failed embeddings
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
List of embeddings (or None for failed ones if fallback enabled)
|
|
269
|
+
"""
|
|
270
|
+
embeddings = []
|
|
271
|
+
for text in texts:
|
|
272
|
+
embedding = await self.generate_embedding(text, model, fallback_on_error)
|
|
273
|
+
embeddings.append(embedding)
|
|
274
|
+
return embeddings
|
|
275
|
+
|
|
276
|
+
def get_dimension(self, model: Optional[str] = None) -> int:
|
|
277
|
+
"""Return the embedding dimension for a model."""
|
|
278
|
+
use_model = self._resolve_model(model) if model else self.model
|
|
279
|
+
config = self._get_model_config(use_model)
|
|
280
|
+
return config.get("dimension", 768)
|
|
281
|
+
|
|
282
|
+
def get_current_model(self) -> str:
|
|
283
|
+
"""Get the current default model."""
|
|
284
|
+
return self.model
|
|
285
|
+
|
|
286
|
+
def set_model(self, model: str):
|
|
287
|
+
"""Set the default model.
|
|
288
|
+
|
|
289
|
+
Note: This only changes the default for new embeddings.
|
|
290
|
+
Existing embeddings are not affected.
|
|
291
|
+
"""
|
|
292
|
+
self.model = self._resolve_model(model)
|
|
293
|
+
self._model_config = self._get_model_config(self.model)
|
|
294
|
+
|
|
295
|
+
def get_available_models(self) -> List[Dict[str, Any]]:
|
|
296
|
+
"""Get list of available embedding models with their configurations."""
|
|
297
|
+
models = []
|
|
298
|
+
for name, config in MODEL_CONFIGS.items():
|
|
299
|
+
if "alias_for" in config:
|
|
300
|
+
continue # Skip aliases
|
|
301
|
+
models.append({
|
|
302
|
+
"name": name,
|
|
303
|
+
"dimension": config.get("dimension", 768),
|
|
304
|
+
"description": config.get("description", ""),
|
|
305
|
+
"is_current": name == self.model,
|
|
306
|
+
"available_in_ollama": (
|
|
307
|
+
any(name in m for m in (self._available_models or []))
|
|
308
|
+
if self._available_models else None
|
|
309
|
+
)
|
|
310
|
+
})
|
|
311
|
+
return models
|
|
312
|
+
|
|
313
|
+
async def get_ollama_models(self) -> List[str]:
|
|
314
|
+
"""Get list of models currently available in Ollama."""
|
|
315
|
+
if self._available_models and (time.time() - self._models_last_check) < 60:
|
|
316
|
+
return self._available_models
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
loop = asyncio.get_event_loop()
|
|
320
|
+
models = await loop.run_in_executor(None, self.client.list)
|
|
321
|
+
model_names = [m.get('name', m.get('model', '')) for m in models.get('models', [])]
|
|
322
|
+
self._available_models = model_names
|
|
323
|
+
self._models_last_check = time.time()
|
|
324
|
+
return model_names
|
|
325
|
+
except:
|
|
326
|
+
return self._available_models or []
|
|
327
|
+
|
|
328
|
+
def get_status(self) -> Dict[str, Any]:
|
|
329
|
+
"""Get current service status."""
|
|
330
|
+
return {
|
|
331
|
+
"model": self.model,
|
|
332
|
+
"dimension": self.get_dimension(),
|
|
333
|
+
"host": self.host,
|
|
334
|
+
"degraded_mode": self._degraded_mode,
|
|
335
|
+
"degraded_since": self._degraded_since,
|
|
336
|
+
"degraded_duration_seconds": self.get_degraded_duration(),
|
|
337
|
+
"last_health_check": self._health_last_check,
|
|
338
|
+
"last_health_status": self._health_status,
|
|
339
|
+
"last_health_error": self._health_error,
|
|
340
|
+
"available_models_in_ollama": self._available_models
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
# Global registry of embedding services per model
|
|
345
|
+
_embedding_services: Dict[str, EmbeddingService] = {}
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def get_embedding_service(model: Optional[str] = None) -> EmbeddingService:
|
|
349
|
+
"""Get an embedding service for a specific model.
|
|
350
|
+
|
|
351
|
+
Uses a shared instance per model to maintain health check state.
|
|
352
|
+
"""
|
|
353
|
+
model_key = model or DEFAULT_MODEL
|
|
354
|
+
|
|
355
|
+
if model_key not in _embedding_services:
|
|
356
|
+
_embedding_services[model_key] = EmbeddingService(model_key)
|
|
357
|
+
|
|
358
|
+
return _embedding_services[model_key]
|