alma-memory 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +296 -194
- alma/compression/__init__.py +33 -0
- alma/compression/pipeline.py +980 -0
- alma/confidence/__init__.py +47 -47
- alma/confidence/engine.py +540 -540
- alma/confidence/types.py +351 -351
- alma/config/loader.py +157 -157
- alma/consolidation/__init__.py +23 -23
- alma/consolidation/engine.py +678 -678
- alma/consolidation/prompts.py +84 -84
- alma/core.py +1189 -322
- alma/domains/__init__.py +30 -30
- alma/domains/factory.py +359 -359
- alma/domains/schemas.py +448 -448
- alma/domains/types.py +272 -272
- alma/events/__init__.py +75 -75
- alma/events/emitter.py +285 -284
- alma/events/storage_mixin.py +246 -246
- alma/events/types.py +126 -126
- alma/events/webhook.py +425 -425
- alma/exceptions.py +49 -49
- alma/extraction/__init__.py +31 -31
- alma/extraction/auto_learner.py +265 -264
- alma/extraction/extractor.py +420 -420
- alma/graph/__init__.py +106 -81
- alma/graph/backends/__init__.py +32 -18
- alma/graph/backends/kuzu.py +624 -0
- alma/graph/backends/memgraph.py +432 -0
- alma/graph/backends/memory.py +236 -236
- alma/graph/backends/neo4j.py +417 -417
- alma/graph/base.py +159 -159
- alma/graph/extraction.py +198 -198
- alma/graph/store.py +860 -860
- alma/harness/__init__.py +35 -35
- alma/harness/base.py +386 -386
- alma/harness/domains.py +705 -705
- alma/initializer/__init__.py +37 -37
- alma/initializer/initializer.py +418 -418
- alma/initializer/types.py +250 -250
- alma/integration/__init__.py +62 -62
- alma/integration/claude_agents.py +444 -432
- alma/integration/helena.py +423 -423
- alma/integration/victor.py +471 -471
- alma/learning/__init__.py +101 -86
- alma/learning/decay.py +878 -0
- alma/learning/forgetting.py +1446 -1446
- alma/learning/heuristic_extractor.py +390 -390
- alma/learning/protocols.py +374 -374
- alma/learning/validation.py +346 -346
- alma/mcp/__init__.py +123 -45
- alma/mcp/__main__.py +156 -156
- alma/mcp/resources.py +122 -122
- alma/mcp/server.py +955 -591
- alma/mcp/tools.py +3254 -511
- alma/observability/__init__.py +91 -0
- alma/observability/config.py +302 -0
- alma/observability/guidelines.py +170 -0
- alma/observability/logging.py +424 -0
- alma/observability/metrics.py +583 -0
- alma/observability/tracing.py +440 -0
- alma/progress/__init__.py +21 -21
- alma/progress/tracker.py +607 -607
- alma/progress/types.py +250 -250
- alma/retrieval/__init__.py +134 -53
- alma/retrieval/budget.py +525 -0
- alma/retrieval/cache.py +1304 -1061
- alma/retrieval/embeddings.py +202 -202
- alma/retrieval/engine.py +850 -366
- alma/retrieval/modes.py +365 -0
- alma/retrieval/progressive.py +560 -0
- alma/retrieval/scoring.py +344 -344
- alma/retrieval/trust_scoring.py +637 -0
- alma/retrieval/verification.py +797 -0
- alma/session/__init__.py +19 -19
- alma/session/manager.py +442 -399
- alma/session/types.py +288 -288
- alma/storage/__init__.py +101 -61
- alma/storage/archive.py +233 -0
- alma/storage/azure_cosmos.py +1259 -1048
- alma/storage/base.py +1083 -525
- alma/storage/chroma.py +1443 -1443
- alma/storage/constants.py +103 -0
- alma/storage/file_based.py +614 -619
- alma/storage/migrations/__init__.py +21 -0
- alma/storage/migrations/base.py +321 -0
- alma/storage/migrations/runner.py +323 -0
- alma/storage/migrations/version_stores.py +337 -0
- alma/storage/migrations/versions/__init__.py +11 -0
- alma/storage/migrations/versions/v1_0_0.py +373 -0
- alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
- alma/storage/pinecone.py +1080 -1080
- alma/storage/postgresql.py +1948 -1452
- alma/storage/qdrant.py +1306 -1306
- alma/storage/sqlite_local.py +3041 -1358
- alma/testing/__init__.py +46 -0
- alma/testing/factories.py +301 -0
- alma/testing/mocks.py +389 -0
- alma/types.py +292 -264
- alma/utils/__init__.py +19 -0
- alma/utils/tokenizer.py +521 -0
- alma/workflow/__init__.py +83 -0
- alma/workflow/artifacts.py +170 -0
- alma/workflow/checkpoint.py +311 -0
- alma/workflow/context.py +228 -0
- alma/workflow/outcomes.py +189 -0
- alma/workflow/reducers.py +393 -0
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/METADATA +244 -72
- alma_memory-0.7.0.dist-info/RECORD +112 -0
- alma_memory-0.5.0.dist-info/RECORD +0 -76
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
alma/storage/chroma.py
CHANGED
|
@@ -1,1443 +1,1443 @@
|
|
|
1
|
-
"""
|
|
2
|
-
ALMA Chroma Storage Backend.
|
|
3
|
-
|
|
4
|
-
Vector database storage using ChromaDB for semantic search capabilities.
|
|
5
|
-
Supports both persistent local storage and client-server mode.
|
|
6
|
-
|
|
7
|
-
Recommended for:
|
|
8
|
-
- Semantic search-focused deployments
|
|
9
|
-
- Local development with vector search
|
|
10
|
-
- Small to medium scale applications
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
import json
|
|
14
|
-
import logging
|
|
15
|
-
import os
|
|
16
|
-
from datetime import datetime, timezone
|
|
17
|
-
from typing import Any, Dict, List, Optional
|
|
18
|
-
|
|
19
|
-
from alma.storage.base import StorageBackend
|
|
20
|
-
from alma.types import (
|
|
21
|
-
AntiPattern,
|
|
22
|
-
DomainKnowledge,
|
|
23
|
-
Heuristic,
|
|
24
|
-
Outcome,
|
|
25
|
-
UserPreference,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
logger = logging.getLogger(__name__)
|
|
29
|
-
|
|
30
|
-
# Try to import chromadb
|
|
31
|
-
try:
|
|
32
|
-
import chromadb
|
|
33
|
-
from chromadb.config import Settings
|
|
34
|
-
|
|
35
|
-
CHROMADB_AVAILABLE = True
|
|
36
|
-
except ImportError:
|
|
37
|
-
CHROMADB_AVAILABLE = False
|
|
38
|
-
logger.warning(
|
|
39
|
-
"chromadb not installed. Install with: pip install 'alma-memory[chroma]'"
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class ChromaStorage(StorageBackend):
|
|
44
|
-
"""
|
|
45
|
-
ChromaDB storage backend with native vector search.
|
|
46
|
-
|
|
47
|
-
Uses ChromaDB collections for each memory type with built-in
|
|
48
|
-
embedding storage and similarity search.
|
|
49
|
-
|
|
50
|
-
Collections:
|
|
51
|
-
- alma_heuristics: Learned strategies
|
|
52
|
-
- alma_outcomes: Task execution records
|
|
53
|
-
- alma_preferences: User preferences
|
|
54
|
-
- alma_domain_knowledge: Domain facts
|
|
55
|
-
- alma_anti_patterns: Patterns to avoid
|
|
56
|
-
|
|
57
|
-
Modes:
|
|
58
|
-
- Persistent: Local storage with persist_directory
|
|
59
|
-
- Client-Server: Remote server with host/port
|
|
60
|
-
- Ephemeral: In-memory (for testing)
|
|
61
|
-
"""
|
|
62
|
-
|
|
63
|
-
# Collection names
|
|
64
|
-
HEURISTICS_COLLECTION = "alma_heuristics"
|
|
65
|
-
OUTCOMES_COLLECTION = "alma_outcomes"
|
|
66
|
-
PREFERENCES_COLLECTION = "alma_preferences"
|
|
67
|
-
DOMAIN_KNOWLEDGE_COLLECTION = "alma_domain_knowledge"
|
|
68
|
-
ANTI_PATTERNS_COLLECTION = "alma_anti_patterns"
|
|
69
|
-
|
|
70
|
-
def __init__(
|
|
71
|
-
self,
|
|
72
|
-
persist_directory: Optional[str] = None,
|
|
73
|
-
host: Optional[str] = None,
|
|
74
|
-
port: Optional[int] = None,
|
|
75
|
-
embedding_dim: int = 384,
|
|
76
|
-
collection_metadata: Optional[Dict[str, Any]] = None,
|
|
77
|
-
):
|
|
78
|
-
"""
|
|
79
|
-
Initialize Chroma storage.
|
|
80
|
-
|
|
81
|
-
Args:
|
|
82
|
-
persist_directory: Path for persistent local storage (mutually exclusive with host/port)
|
|
83
|
-
host: Chroma server host (mutually exclusive with persist_directory)
|
|
84
|
-
port: Chroma server port (required if host is specified)
|
|
85
|
-
embedding_dim: Dimension of embedding vectors (for validation)
|
|
86
|
-
collection_metadata: Optional metadata for collections (e.g., distance function)
|
|
87
|
-
"""
|
|
88
|
-
if not CHROMADB_AVAILABLE:
|
|
89
|
-
raise ImportError(
|
|
90
|
-
"chromadb not installed. Install with: pip install 'alma-memory[chroma]'"
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
self.embedding_dim = embedding_dim
|
|
94
|
-
self._collection_metadata = collection_metadata or {
|
|
95
|
-
"hnsw:space": "cosine" # Use cosine similarity
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
# Initialize client based on mode
|
|
99
|
-
if host and port:
|
|
100
|
-
# Client-server mode
|
|
101
|
-
self._client = chromadb.HttpClient(host=host, port=port)
|
|
102
|
-
self._mode = "client-server"
|
|
103
|
-
logger.info(f"ChromaDB client-server mode: {host}:{port}")
|
|
104
|
-
elif persist_directory:
|
|
105
|
-
# Persistent local mode
|
|
106
|
-
self._client = chromadb.PersistentClient(
|
|
107
|
-
path=persist_directory,
|
|
108
|
-
settings=Settings(anonymized_telemetry=False),
|
|
109
|
-
)
|
|
110
|
-
self._mode = "persistent"
|
|
111
|
-
logger.info(f"ChromaDB persistent mode: {persist_directory}")
|
|
112
|
-
else:
|
|
113
|
-
# Ephemeral mode (in-memory, for testing)
|
|
114
|
-
self._client = chromadb.Client(
|
|
115
|
-
settings=Settings(anonymized_telemetry=False),
|
|
116
|
-
)
|
|
117
|
-
self._mode = "ephemeral"
|
|
118
|
-
logger.info("ChromaDB ephemeral mode (in-memory)")
|
|
119
|
-
|
|
120
|
-
# Initialize collections
|
|
121
|
-
self._init_collections()
|
|
122
|
-
|
|
123
|
-
@classmethod
|
|
124
|
-
def from_config(cls, config: Dict[str, Any]) -> "ChromaStorage":
|
|
125
|
-
"""Create instance from configuration."""
|
|
126
|
-
chroma_config = config.get("chroma", {})
|
|
127
|
-
|
|
128
|
-
# Support environment variable expansion
|
|
129
|
-
def get_value(key: str, default: Any = None) -> Any:
|
|
130
|
-
value = chroma_config.get(key, default)
|
|
131
|
-
if (
|
|
132
|
-
isinstance(value, str)
|
|
133
|
-
and value.startswith("${")
|
|
134
|
-
and value.endswith("}")
|
|
135
|
-
):
|
|
136
|
-
env_var = value[2:-1]
|
|
137
|
-
return os.environ.get(env_var, default)
|
|
138
|
-
return value
|
|
139
|
-
|
|
140
|
-
persist_directory = get_value("persist_directory")
|
|
141
|
-
host = get_value("host")
|
|
142
|
-
port = get_value("port")
|
|
143
|
-
|
|
144
|
-
if port is not None:
|
|
145
|
-
port = int(port)
|
|
146
|
-
|
|
147
|
-
return cls(
|
|
148
|
-
persist_directory=persist_directory,
|
|
149
|
-
host=host,
|
|
150
|
-
port=port,
|
|
151
|
-
embedding_dim=int(config.get("embedding_dim", 384)),
|
|
152
|
-
collection_metadata=chroma_config.get("collection_metadata"),
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
def _init_collections(self):
|
|
156
|
-
"""Initialize or get all collections."""
|
|
157
|
-
self._heuristics = self._client.get_or_create_collection(
|
|
158
|
-
name=self.HEURISTICS_COLLECTION,
|
|
159
|
-
metadata=self._collection_metadata,
|
|
160
|
-
)
|
|
161
|
-
self._outcomes = self._client.get_or_create_collection(
|
|
162
|
-
name=self.OUTCOMES_COLLECTION,
|
|
163
|
-
metadata=self._collection_metadata,
|
|
164
|
-
)
|
|
165
|
-
self._preferences = self._client.get_or_create_collection(
|
|
166
|
-
name=self.PREFERENCES_COLLECTION,
|
|
167
|
-
metadata=self._collection_metadata,
|
|
168
|
-
)
|
|
169
|
-
self._domain_knowledge = self._client.get_or_create_collection(
|
|
170
|
-
name=self.DOMAIN_KNOWLEDGE_COLLECTION,
|
|
171
|
-
metadata=self._collection_metadata,
|
|
172
|
-
)
|
|
173
|
-
self._anti_patterns = self._client.get_or_create_collection(
|
|
174
|
-
name=self.ANTI_PATTERNS_COLLECTION,
|
|
175
|
-
metadata=self._collection_metadata,
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
def _format_get_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
|
|
179
|
-
"""Reformat get() results to match query() format."""
|
|
180
|
-
emb = results.get("embeddings")
|
|
181
|
-
has_embeddings = emb is not None and (
|
|
182
|
-
(hasattr(emb, "__len__") and len(emb) > 0)
|
|
183
|
-
or (hasattr(emb, "size") and emb.size > 0)
|
|
184
|
-
)
|
|
185
|
-
return {
|
|
186
|
-
"ids": [results.get("ids", [])],
|
|
187
|
-
"metadatas": [results.get("metadatas", [])],
|
|
188
|
-
"documents": [results.get("documents", [])],
|
|
189
|
-
"embeddings": [emb] if has_embeddings else None,
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
def _datetime_to_str(self, dt: Optional[datetime]) -> Optional[str]:
|
|
193
|
-
"""Convert datetime to ISO string for storage."""
|
|
194
|
-
if dt is None:
|
|
195
|
-
return None
|
|
196
|
-
return dt.isoformat()
|
|
197
|
-
|
|
198
|
-
def _str_to_datetime(self, s: Optional[str]) -> Optional[datetime]:
|
|
199
|
-
"""Convert ISO string to datetime."""
|
|
200
|
-
if s is None:
|
|
201
|
-
return None
|
|
202
|
-
try:
|
|
203
|
-
return datetime.fromisoformat(s.replace("Z", "+00:00"))
|
|
204
|
-
except (ValueError, AttributeError):
|
|
205
|
-
return datetime.now(timezone.utc)
|
|
206
|
-
|
|
207
|
-
# ==================== WRITE OPERATIONS ====================
|
|
208
|
-
|
|
209
|
-
def save_heuristic(self, heuristic: Heuristic) -> str:
|
|
210
|
-
"""Save a heuristic."""
|
|
211
|
-
metadata = {
|
|
212
|
-
"agent": heuristic.agent,
|
|
213
|
-
"project_id": heuristic.project_id,
|
|
214
|
-
"condition": heuristic.condition,
|
|
215
|
-
"strategy": heuristic.strategy,
|
|
216
|
-
"confidence": heuristic.confidence,
|
|
217
|
-
"occurrence_count": heuristic.occurrence_count,
|
|
218
|
-
"success_count": heuristic.success_count,
|
|
219
|
-
"last_validated": self._datetime_to_str(heuristic.last_validated),
|
|
220
|
-
"created_at": self._datetime_to_str(heuristic.created_at),
|
|
221
|
-
"extra_metadata": json.dumps(heuristic.metadata)
|
|
222
|
-
if heuristic.metadata
|
|
223
|
-
else "{}",
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
# Chroma requires documents - use condition + strategy as document
|
|
227
|
-
document = f"{heuristic.condition}\n{heuristic.strategy}"
|
|
228
|
-
|
|
229
|
-
if heuristic.embedding:
|
|
230
|
-
self._heuristics.upsert(
|
|
231
|
-
ids=[heuristic.id],
|
|
232
|
-
embeddings=[heuristic.embedding],
|
|
233
|
-
metadatas=[metadata],
|
|
234
|
-
documents=[document],
|
|
235
|
-
)
|
|
236
|
-
else:
|
|
237
|
-
self._heuristics.upsert(
|
|
238
|
-
ids=[heuristic.id],
|
|
239
|
-
metadatas=[metadata],
|
|
240
|
-
documents=[document],
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
logger.debug(f"Saved heuristic: {heuristic.id}")
|
|
244
|
-
return heuristic.id
|
|
245
|
-
|
|
246
|
-
def save_outcome(self, outcome: Outcome) -> str:
|
|
247
|
-
"""Save an outcome."""
|
|
248
|
-
metadata = {
|
|
249
|
-
"agent": outcome.agent,
|
|
250
|
-
"project_id": outcome.project_id,
|
|
251
|
-
"task_type": outcome.task_type or "general",
|
|
252
|
-
"success": outcome.success,
|
|
253
|
-
"strategy_used": outcome.strategy_used or "",
|
|
254
|
-
"duration_ms": outcome.duration_ms or 0,
|
|
255
|
-
"error_message": outcome.error_message or "",
|
|
256
|
-
"user_feedback": outcome.user_feedback or "",
|
|
257
|
-
"timestamp": self._datetime_to_str(outcome.timestamp),
|
|
258
|
-
"extra_metadata": json.dumps(outcome.metadata)
|
|
259
|
-
if outcome.metadata
|
|
260
|
-
else "{}",
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
document = outcome.task_description
|
|
264
|
-
|
|
265
|
-
if outcome.embedding:
|
|
266
|
-
self._outcomes.upsert(
|
|
267
|
-
ids=[outcome.id],
|
|
268
|
-
embeddings=[outcome.embedding],
|
|
269
|
-
metadatas=[metadata],
|
|
270
|
-
documents=[document],
|
|
271
|
-
)
|
|
272
|
-
else:
|
|
273
|
-
self._outcomes.upsert(
|
|
274
|
-
ids=[outcome.id],
|
|
275
|
-
metadatas=[metadata],
|
|
276
|
-
documents=[document],
|
|
277
|
-
)
|
|
278
|
-
|
|
279
|
-
logger.debug(f"Saved outcome: {outcome.id}")
|
|
280
|
-
return outcome.id
|
|
281
|
-
|
|
282
|
-
def save_user_preference(self, preference: UserPreference) -> str:
|
|
283
|
-
"""Save a user preference."""
|
|
284
|
-
metadata = {
|
|
285
|
-
"user_id": preference.user_id,
|
|
286
|
-
"category": preference.category or "general",
|
|
287
|
-
"source": preference.source or "unknown",
|
|
288
|
-
"confidence": preference.confidence,
|
|
289
|
-
"timestamp": self._datetime_to_str(preference.timestamp),
|
|
290
|
-
"extra_metadata": json.dumps(preference.metadata)
|
|
291
|
-
if preference.metadata
|
|
292
|
-
else "{}",
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
document = preference.preference
|
|
296
|
-
|
|
297
|
-
self._preferences.upsert(
|
|
298
|
-
ids=[preference.id],
|
|
299
|
-
metadatas=[metadata],
|
|
300
|
-
documents=[document],
|
|
301
|
-
)
|
|
302
|
-
|
|
303
|
-
logger.debug(f"Saved preference: {preference.id}")
|
|
304
|
-
return preference.id
|
|
305
|
-
|
|
306
|
-
def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
|
|
307
|
-
"""Save domain knowledge."""
|
|
308
|
-
metadata = {
|
|
309
|
-
"agent": knowledge.agent,
|
|
310
|
-
"project_id": knowledge.project_id,
|
|
311
|
-
"domain": knowledge.domain or "general",
|
|
312
|
-
"source": knowledge.source or "unknown",
|
|
313
|
-
"confidence": knowledge.confidence,
|
|
314
|
-
"last_verified": self._datetime_to_str(knowledge.last_verified),
|
|
315
|
-
"extra_metadata": json.dumps(knowledge.metadata)
|
|
316
|
-
if knowledge.metadata
|
|
317
|
-
else "{}",
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
document = knowledge.fact
|
|
321
|
-
|
|
322
|
-
if knowledge.embedding:
|
|
323
|
-
self._domain_knowledge.upsert(
|
|
324
|
-
ids=[knowledge.id],
|
|
325
|
-
embeddings=[knowledge.embedding],
|
|
326
|
-
metadatas=[metadata],
|
|
327
|
-
documents=[document],
|
|
328
|
-
)
|
|
329
|
-
else:
|
|
330
|
-
self._domain_knowledge.upsert(
|
|
331
|
-
ids=[knowledge.id],
|
|
332
|
-
metadatas=[metadata],
|
|
333
|
-
documents=[document],
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
logger.debug(f"Saved domain knowledge: {knowledge.id}")
|
|
337
|
-
return knowledge.id
|
|
338
|
-
|
|
339
|
-
def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
|
|
340
|
-
"""Save an anti-pattern."""
|
|
341
|
-
metadata = {
|
|
342
|
-
"agent": anti_pattern.agent,
|
|
343
|
-
"project_id": anti_pattern.project_id,
|
|
344
|
-
"why_bad": anti_pattern.why_bad or "",
|
|
345
|
-
"better_alternative": anti_pattern.better_alternative or "",
|
|
346
|
-
"occurrence_count": anti_pattern.occurrence_count,
|
|
347
|
-
"last_seen": self._datetime_to_str(anti_pattern.last_seen),
|
|
348
|
-
"created_at": self._datetime_to_str(anti_pattern.created_at),
|
|
349
|
-
"extra_metadata": json.dumps(anti_pattern.metadata)
|
|
350
|
-
if anti_pattern.metadata
|
|
351
|
-
else "{}",
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
document = anti_pattern.pattern
|
|
355
|
-
|
|
356
|
-
if anti_pattern.embedding:
|
|
357
|
-
self._anti_patterns.upsert(
|
|
358
|
-
ids=[anti_pattern.id],
|
|
359
|
-
embeddings=[anti_pattern.embedding],
|
|
360
|
-
metadatas=[metadata],
|
|
361
|
-
documents=[document],
|
|
362
|
-
)
|
|
363
|
-
else:
|
|
364
|
-
self._anti_patterns.upsert(
|
|
365
|
-
ids=[anti_pattern.id],
|
|
366
|
-
metadatas=[metadata],
|
|
367
|
-
documents=[document],
|
|
368
|
-
)
|
|
369
|
-
|
|
370
|
-
logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
|
|
371
|
-
return anti_pattern.id
|
|
372
|
-
|
|
373
|
-
# ==================== BATCH WRITE OPERATIONS ====================
|
|
374
|
-
|
|
375
|
-
def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
|
|
376
|
-
"""Save multiple heuristics in a batch."""
|
|
377
|
-
if not heuristics:
|
|
378
|
-
return []
|
|
379
|
-
|
|
380
|
-
ids = []
|
|
381
|
-
embeddings = []
|
|
382
|
-
metadatas = []
|
|
383
|
-
documents = []
|
|
384
|
-
has_embeddings = False
|
|
385
|
-
|
|
386
|
-
for h in heuristics:
|
|
387
|
-
ids.append(h.id)
|
|
388
|
-
metadatas.append(
|
|
389
|
-
{
|
|
390
|
-
"agent": h.agent,
|
|
391
|
-
"project_id": h.project_id,
|
|
392
|
-
"condition": h.condition,
|
|
393
|
-
"strategy": h.strategy,
|
|
394
|
-
"confidence": h.confidence,
|
|
395
|
-
"occurrence_count": h.occurrence_count,
|
|
396
|
-
"success_count": h.success_count,
|
|
397
|
-
"last_validated": self._datetime_to_str(h.last_validated),
|
|
398
|
-
"created_at": self._datetime_to_str(h.created_at),
|
|
399
|
-
"extra_metadata": json.dumps(h.metadata) if h.metadata else "{}",
|
|
400
|
-
}
|
|
401
|
-
)
|
|
402
|
-
documents.append(f"{h.condition}\n{h.strategy}")
|
|
403
|
-
if h.embedding:
|
|
404
|
-
embeddings.append(h.embedding)
|
|
405
|
-
has_embeddings = True
|
|
406
|
-
else:
|
|
407
|
-
embeddings.append(None)
|
|
408
|
-
|
|
409
|
-
if has_embeddings and all(e is not None for e in embeddings):
|
|
410
|
-
self._heuristics.upsert(
|
|
411
|
-
ids=ids,
|
|
412
|
-
embeddings=embeddings,
|
|
413
|
-
metadatas=metadatas,
|
|
414
|
-
documents=documents,
|
|
415
|
-
)
|
|
416
|
-
else:
|
|
417
|
-
self._heuristics.upsert(
|
|
418
|
-
ids=ids,
|
|
419
|
-
metadatas=metadatas,
|
|
420
|
-
documents=documents,
|
|
421
|
-
)
|
|
422
|
-
|
|
423
|
-
logger.debug(f"Batch saved {len(heuristics)} heuristics")
|
|
424
|
-
return ids
|
|
425
|
-
|
|
426
|
-
def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
|
|
427
|
-
"""Save multiple outcomes in a batch."""
|
|
428
|
-
if not outcomes:
|
|
429
|
-
return []
|
|
430
|
-
|
|
431
|
-
ids = []
|
|
432
|
-
embeddings = []
|
|
433
|
-
metadatas = []
|
|
434
|
-
documents = []
|
|
435
|
-
has_embeddings = False
|
|
436
|
-
|
|
437
|
-
for o in outcomes:
|
|
438
|
-
ids.append(o.id)
|
|
439
|
-
metadatas.append(
|
|
440
|
-
{
|
|
441
|
-
"agent": o.agent,
|
|
442
|
-
"project_id": o.project_id,
|
|
443
|
-
"task_type": o.task_type or "general",
|
|
444
|
-
"success": o.success,
|
|
445
|
-
"strategy_used": o.strategy_used or "",
|
|
446
|
-
"duration_ms": o.duration_ms or 0,
|
|
447
|
-
"error_message": o.error_message or "",
|
|
448
|
-
"user_feedback": o.user_feedback or "",
|
|
449
|
-
"timestamp": self._datetime_to_str(o.timestamp),
|
|
450
|
-
"extra_metadata": json.dumps(o.metadata) if o.metadata else "{}",
|
|
451
|
-
}
|
|
452
|
-
)
|
|
453
|
-
documents.append(o.task_description)
|
|
454
|
-
if o.embedding:
|
|
455
|
-
embeddings.append(o.embedding)
|
|
456
|
-
has_embeddings = True
|
|
457
|
-
else:
|
|
458
|
-
embeddings.append(None)
|
|
459
|
-
|
|
460
|
-
if has_embeddings and all(e is not None for e in embeddings):
|
|
461
|
-
self._outcomes.upsert(
|
|
462
|
-
ids=ids,
|
|
463
|
-
embeddings=embeddings,
|
|
464
|
-
metadatas=metadatas,
|
|
465
|
-
documents=documents,
|
|
466
|
-
)
|
|
467
|
-
else:
|
|
468
|
-
self._outcomes.upsert(
|
|
469
|
-
ids=ids,
|
|
470
|
-
metadatas=metadatas,
|
|
471
|
-
documents=documents,
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
logger.debug(f"Batch saved {len(outcomes)} outcomes")
|
|
475
|
-
return ids
|
|
476
|
-
|
|
477
|
-
def save_domain_knowledge_batch(
|
|
478
|
-
self, knowledge_items: List[DomainKnowledge]
|
|
479
|
-
) -> List[str]:
|
|
480
|
-
"""Save multiple domain knowledge items in a batch."""
|
|
481
|
-
if not knowledge_items:
|
|
482
|
-
return []
|
|
483
|
-
|
|
484
|
-
ids = []
|
|
485
|
-
embeddings = []
|
|
486
|
-
metadatas = []
|
|
487
|
-
documents = []
|
|
488
|
-
has_embeddings = False
|
|
489
|
-
|
|
490
|
-
for k in knowledge_items:
|
|
491
|
-
ids.append(k.id)
|
|
492
|
-
metadatas.append(
|
|
493
|
-
{
|
|
494
|
-
"agent": k.agent,
|
|
495
|
-
"project_id": k.project_id,
|
|
496
|
-
"domain": k.domain or "general",
|
|
497
|
-
"source": k.source or "unknown",
|
|
498
|
-
"confidence": k.confidence,
|
|
499
|
-
"last_verified": self._datetime_to_str(k.last_verified),
|
|
500
|
-
"extra_metadata": json.dumps(k.metadata) if k.metadata else "{}",
|
|
501
|
-
}
|
|
502
|
-
)
|
|
503
|
-
documents.append(k.fact)
|
|
504
|
-
if k.embedding:
|
|
505
|
-
embeddings.append(k.embedding)
|
|
506
|
-
has_embeddings = True
|
|
507
|
-
else:
|
|
508
|
-
embeddings.append(None)
|
|
509
|
-
|
|
510
|
-
if has_embeddings and all(e is not None for e in embeddings):
|
|
511
|
-
self._domain_knowledge.upsert(
|
|
512
|
-
ids=ids,
|
|
513
|
-
embeddings=embeddings,
|
|
514
|
-
metadatas=metadatas,
|
|
515
|
-
documents=documents,
|
|
516
|
-
)
|
|
517
|
-
else:
|
|
518
|
-
self._domain_knowledge.upsert(
|
|
519
|
-
ids=ids,
|
|
520
|
-
metadatas=metadatas,
|
|
521
|
-
documents=documents,
|
|
522
|
-
)
|
|
523
|
-
|
|
524
|
-
logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
|
|
525
|
-
return ids
|
|
526
|
-
|
|
527
|
-
# ==================== READ OPERATIONS ====================
|
|
528
|
-
|
|
529
|
-
def _build_where_filter(
|
|
530
|
-
self,
|
|
531
|
-
project_id: Optional[str] = None,
|
|
532
|
-
agent: Optional[str] = None,
|
|
533
|
-
user_id: Optional[str] = None,
|
|
534
|
-
domain: Optional[str] = None,
|
|
535
|
-
task_type: Optional[str] = None,
|
|
536
|
-
min_confidence: Optional[float] = None,
|
|
537
|
-
success_only: bool = False,
|
|
538
|
-
) -> Optional[Dict[str, Any]]:
|
|
539
|
-
"""Build Chroma where filter from parameters."""
|
|
540
|
-
conditions = []
|
|
541
|
-
|
|
542
|
-
if project_id:
|
|
543
|
-
conditions.append({"project_id": {"$eq": project_id}})
|
|
544
|
-
if agent:
|
|
545
|
-
conditions.append({"agent": {"$eq": agent}})
|
|
546
|
-
if user_id:
|
|
547
|
-
conditions.append({"user_id": {"$eq": user_id}})
|
|
548
|
-
if domain:
|
|
549
|
-
conditions.append({"domain": {"$eq": domain}})
|
|
550
|
-
if task_type:
|
|
551
|
-
conditions.append({"task_type": {"$eq": task_type}})
|
|
552
|
-
if min_confidence is not None and min_confidence > 0:
|
|
553
|
-
conditions.append({"confidence": {"$gte": min_confidence}})
|
|
554
|
-
if success_only:
|
|
555
|
-
conditions.append({"success": {"$eq": True}})
|
|
556
|
-
|
|
557
|
-
if not conditions:
|
|
558
|
-
return None
|
|
559
|
-
if len(conditions) == 1:
|
|
560
|
-
return conditions[0]
|
|
561
|
-
return {"$and": conditions}
|
|
562
|
-
|
|
563
|
-
def _has_embedding(self, emb: Any) -> bool:
|
|
564
|
-
"""Safely check if embedding is not None/empty (handles numpy arrays)."""
|
|
565
|
-
if emb is None:
|
|
566
|
-
return False
|
|
567
|
-
if hasattr(emb, "__len__"):
|
|
568
|
-
try:
|
|
569
|
-
return len(emb) > 0
|
|
570
|
-
except (TypeError, ValueError):
|
|
571
|
-
pass
|
|
572
|
-
if hasattr(emb, "size"):
|
|
573
|
-
return emb.size > 0
|
|
574
|
-
return True
|
|
575
|
-
|
|
576
|
-
def _get_embedding_list(self, results: Dict[str, Any], num_ids: int) -> List[Any]:
|
|
577
|
-
"""Safely extract embeddings list from results."""
|
|
578
|
-
emb_data = results.get("embeddings")
|
|
579
|
-
if emb_data is None:
|
|
580
|
-
return [None] * num_ids
|
|
581
|
-
# Handle both nested list format (query results) and flat format
|
|
582
|
-
if isinstance(emb_data, list) and len(emb_data) > 0:
|
|
583
|
-
first = emb_data[0]
|
|
584
|
-
# Check if it's a nested list (query format: [[emb1, emb2, ...]])
|
|
585
|
-
if isinstance(first, list) or (
|
|
586
|
-
hasattr(first, "__iter__") and not isinstance(first, (str, bytes))
|
|
587
|
-
):
|
|
588
|
-
# Could be list of embeddings or numpy array
|
|
589
|
-
try:
|
|
590
|
-
if hasattr(first, "tolist"):
|
|
591
|
-
# numpy array
|
|
592
|
-
return list(emb_data[0])
|
|
593
|
-
return list(first) if isinstance(first, list) else [first]
|
|
594
|
-
except (TypeError, IndexError):
|
|
595
|
-
return [None] * num_ids
|
|
596
|
-
return list(emb_data)
|
|
597
|
-
return [None] * num_ids
|
|
598
|
-
|
|
599
|
-
def _results_to_heuristics(self, results: Dict[str, Any]) -> List[Heuristic]:
|
|
600
|
-
"""Convert Chroma query results to Heuristic objects."""
|
|
601
|
-
heuristics = []
|
|
602
|
-
if not results or not results.get("ids") or not results["ids"][0]:
|
|
603
|
-
return heuristics
|
|
604
|
-
|
|
605
|
-
ids = results["ids"][0]
|
|
606
|
-
metadatas = results.get("metadatas", [[]])[0]
|
|
607
|
-
embeddings = self._get_embedding_list(results, len(ids))
|
|
608
|
-
|
|
609
|
-
for i, id_ in enumerate(ids):
|
|
610
|
-
meta = metadatas[i] if i < len(metadatas) else {}
|
|
611
|
-
emb = embeddings[i] if i < len(embeddings) else None
|
|
612
|
-
|
|
613
|
-
extra = json.loads(meta.get("extra_metadata", "{}"))
|
|
614
|
-
|
|
615
|
-
heuristics.append(
|
|
616
|
-
Heuristic(
|
|
617
|
-
id=id_,
|
|
618
|
-
agent=meta.get("agent", ""),
|
|
619
|
-
project_id=meta.get("project_id", ""),
|
|
620
|
-
condition=meta.get("condition", ""),
|
|
621
|
-
strategy=meta.get("strategy", ""),
|
|
622
|
-
confidence=meta.get("confidence", 0.0),
|
|
623
|
-
occurrence_count=meta.get("occurrence_count", 0),
|
|
624
|
-
success_count=meta.get("success_count", 0),
|
|
625
|
-
last_validated=self._str_to_datetime(meta.get("last_validated"))
|
|
626
|
-
or datetime.now(timezone.utc),
|
|
627
|
-
created_at=self._str_to_datetime(meta.get("created_at"))
|
|
628
|
-
or datetime.now(timezone.utc),
|
|
629
|
-
embedding=list(emb)
|
|
630
|
-
if emb is not None and hasattr(emb, "__iter__")
|
|
631
|
-
else emb,
|
|
632
|
-
metadata=extra,
|
|
633
|
-
)
|
|
634
|
-
)
|
|
635
|
-
|
|
636
|
-
return heuristics
|
|
637
|
-
|
|
638
|
-
def _results_to_outcomes(self, results: Dict[str, Any]) -> List[Outcome]:
|
|
639
|
-
"""Convert Chroma query results to Outcome objects."""
|
|
640
|
-
outcomes = []
|
|
641
|
-
if not results or not results.get("ids") or not results["ids"][0]:
|
|
642
|
-
return outcomes
|
|
643
|
-
|
|
644
|
-
ids = results["ids"][0]
|
|
645
|
-
metadatas = results.get("metadatas", [[]])[0]
|
|
646
|
-
documents = results.get("documents", [[]])[0]
|
|
647
|
-
embeddings = self._get_embedding_list(results, len(ids))
|
|
648
|
-
|
|
649
|
-
for i, id_ in enumerate(ids):
|
|
650
|
-
meta = metadatas[i] if i < len(metadatas) else {}
|
|
651
|
-
doc = documents[i] if i < len(documents) else ""
|
|
652
|
-
emb = embeddings[i] if i < len(embeddings) else None
|
|
653
|
-
|
|
654
|
-
extra = json.loads(meta.get("extra_metadata", "{}"))
|
|
655
|
-
|
|
656
|
-
outcomes.append(
|
|
657
|
-
Outcome(
|
|
658
|
-
id=id_,
|
|
659
|
-
agent=meta.get("agent", ""),
|
|
660
|
-
project_id=meta.get("project_id", ""),
|
|
661
|
-
task_type=meta.get("task_type", "general"),
|
|
662
|
-
task_description=doc,
|
|
663
|
-
success=meta.get("success", False),
|
|
664
|
-
strategy_used=meta.get("strategy_used", ""),
|
|
665
|
-
duration_ms=meta.get("duration_ms"),
|
|
666
|
-
error_message=meta.get("error_message") or None,
|
|
667
|
-
user_feedback=meta.get("user_feedback") or None,
|
|
668
|
-
timestamp=self._str_to_datetime(meta.get("timestamp"))
|
|
669
|
-
or datetime.now(timezone.utc),
|
|
670
|
-
embedding=list(emb)
|
|
671
|
-
if emb is not None and hasattr(emb, "__iter__")
|
|
672
|
-
else emb,
|
|
673
|
-
metadata=extra,
|
|
674
|
-
)
|
|
675
|
-
)
|
|
676
|
-
|
|
677
|
-
return outcomes
|
|
678
|
-
|
|
679
|
-
def _results_to_preferences(self, results: Dict[str, Any]) -> List[UserPreference]:
|
|
680
|
-
"""Convert Chroma query results to UserPreference objects."""
|
|
681
|
-
preferences = []
|
|
682
|
-
if not results or not results.get("ids") or not results["ids"][0]:
|
|
683
|
-
return preferences
|
|
684
|
-
|
|
685
|
-
ids = results["ids"][0]
|
|
686
|
-
metadatas = results.get("metadatas", [[]])[0]
|
|
687
|
-
documents = results.get("documents", [[]])[0]
|
|
688
|
-
|
|
689
|
-
for i, id_ in enumerate(ids):
|
|
690
|
-
meta = metadatas[i] if i < len(metadatas) else {}
|
|
691
|
-
doc = documents[i] if i < len(documents) else ""
|
|
692
|
-
|
|
693
|
-
extra = json.loads(meta.get("extra_metadata", "{}"))
|
|
694
|
-
|
|
695
|
-
preferences.append(
|
|
696
|
-
UserPreference(
|
|
697
|
-
id=id_,
|
|
698
|
-
user_id=meta.get("user_id", ""),
|
|
699
|
-
category=meta.get("category", "general"),
|
|
700
|
-
preference=doc,
|
|
701
|
-
source=meta.get("source", "unknown"),
|
|
702
|
-
confidence=meta.get("confidence", 1.0),
|
|
703
|
-
timestamp=self._str_to_datetime(meta.get("timestamp"))
|
|
704
|
-
or datetime.now(timezone.utc),
|
|
705
|
-
metadata=extra,
|
|
706
|
-
)
|
|
707
|
-
)
|
|
708
|
-
|
|
709
|
-
return preferences
|
|
710
|
-
|
|
711
|
-
def _results_to_domain_knowledge(
|
|
712
|
-
self, results: Dict[str, Any]
|
|
713
|
-
) -> List[DomainKnowledge]:
|
|
714
|
-
"""Convert Chroma query results to DomainKnowledge objects."""
|
|
715
|
-
knowledge = []
|
|
716
|
-
if not results or not results.get("ids") or not results["ids"][0]:
|
|
717
|
-
return knowledge
|
|
718
|
-
|
|
719
|
-
ids = results["ids"][0]
|
|
720
|
-
metadatas = results.get("metadatas", [[]])[0]
|
|
721
|
-
documents = results.get("documents", [[]])[0]
|
|
722
|
-
embeddings = self._get_embedding_list(results, len(ids))
|
|
723
|
-
|
|
724
|
-
for i, id_ in enumerate(ids):
|
|
725
|
-
meta = metadatas[i] if i < len(metadatas) else {}
|
|
726
|
-
doc = documents[i] if i < len(documents) else ""
|
|
727
|
-
emb = embeddings[i] if i < len(embeddings) else None
|
|
728
|
-
|
|
729
|
-
extra = json.loads(meta.get("extra_metadata", "{}"))
|
|
730
|
-
|
|
731
|
-
knowledge.append(
|
|
732
|
-
DomainKnowledge(
|
|
733
|
-
id=id_,
|
|
734
|
-
agent=meta.get("agent", ""),
|
|
735
|
-
project_id=meta.get("project_id", ""),
|
|
736
|
-
domain=meta.get("domain", "general"),
|
|
737
|
-
fact=doc,
|
|
738
|
-
source=meta.get("source", "unknown"),
|
|
739
|
-
confidence=meta.get("confidence", 1.0),
|
|
740
|
-
last_verified=self._str_to_datetime(meta.get("last_verified"))
|
|
741
|
-
or datetime.now(timezone.utc),
|
|
742
|
-
embedding=list(emb)
|
|
743
|
-
if emb is not None and hasattr(emb, "__iter__")
|
|
744
|
-
else emb,
|
|
745
|
-
metadata=extra,
|
|
746
|
-
)
|
|
747
|
-
)
|
|
748
|
-
|
|
749
|
-
return knowledge
|
|
750
|
-
|
|
751
|
-
def _results_to_anti_patterns(self, results: Dict[str, Any]) -> List[AntiPattern]:
|
|
752
|
-
"""Convert Chroma query results to AntiPattern objects."""
|
|
753
|
-
patterns = []
|
|
754
|
-
if not results or not results.get("ids") or not results["ids"][0]:
|
|
755
|
-
return patterns
|
|
756
|
-
|
|
757
|
-
ids = results["ids"][0]
|
|
758
|
-
metadatas = results.get("metadatas", [[]])[0]
|
|
759
|
-
documents = results.get("documents", [[]])[0]
|
|
760
|
-
embeddings = self._get_embedding_list(results, len(ids))
|
|
761
|
-
|
|
762
|
-
for i, id_ in enumerate(ids):
|
|
763
|
-
meta = metadatas[i] if i < len(metadatas) else {}
|
|
764
|
-
doc = documents[i] if i < len(documents) else ""
|
|
765
|
-
emb = embeddings[i] if i < len(embeddings) else None
|
|
766
|
-
|
|
767
|
-
extra = json.loads(meta.get("extra_metadata", "{}"))
|
|
768
|
-
|
|
769
|
-
patterns.append(
|
|
770
|
-
AntiPattern(
|
|
771
|
-
id=id_,
|
|
772
|
-
agent=meta.get("agent", ""),
|
|
773
|
-
project_id=meta.get("project_id", ""),
|
|
774
|
-
pattern=doc,
|
|
775
|
-
why_bad=meta.get("why_bad", ""),
|
|
776
|
-
better_alternative=meta.get("better_alternative", ""),
|
|
777
|
-
occurrence_count=meta.get("occurrence_count", 1),
|
|
778
|
-
last_seen=self._str_to_datetime(meta.get("last_seen"))
|
|
779
|
-
or datetime.now(timezone.utc),
|
|
780
|
-
created_at=self._str_to_datetime(meta.get("created_at"))
|
|
781
|
-
or datetime.now(timezone.utc),
|
|
782
|
-
embedding=list(emb)
|
|
783
|
-
if emb is not None and hasattr(emb, "__iter__")
|
|
784
|
-
else emb,
|
|
785
|
-
metadata=extra,
|
|
786
|
-
)
|
|
787
|
-
)
|
|
788
|
-
|
|
789
|
-
return patterns
|
|
790
|
-
|
|
791
|
-
def get_heuristics(
|
|
792
|
-
self,
|
|
793
|
-
project_id: str,
|
|
794
|
-
agent: Optional[str] = None,
|
|
795
|
-
embedding: Optional[List[float]] = None,
|
|
796
|
-
top_k: int = 5,
|
|
797
|
-
min_confidence: float = 0.0,
|
|
798
|
-
) -> List[Heuristic]:
|
|
799
|
-
"""Get heuristics with optional vector search."""
|
|
800
|
-
where_filter = self._build_where_filter(
|
|
801
|
-
project_id=project_id,
|
|
802
|
-
agent=agent,
|
|
803
|
-
min_confidence=min_confidence,
|
|
804
|
-
)
|
|
805
|
-
|
|
806
|
-
if embedding:
|
|
807
|
-
results = self._heuristics.query(
|
|
808
|
-
query_embeddings=[embedding],
|
|
809
|
-
n_results=top_k,
|
|
810
|
-
where=where_filter,
|
|
811
|
-
include=["metadatas", "documents", "embeddings"],
|
|
812
|
-
)
|
|
813
|
-
else:
|
|
814
|
-
results = self._heuristics.get(
|
|
815
|
-
where=where_filter,
|
|
816
|
-
limit=top_k,
|
|
817
|
-
include=["metadatas", "documents", "embeddings"],
|
|
818
|
-
)
|
|
819
|
-
results = self._format_get_results(results)
|
|
820
|
-
|
|
821
|
-
return self._results_to_heuristics(results)
|
|
822
|
-
|
|
823
|
-
def get_outcomes(
|
|
824
|
-
self,
|
|
825
|
-
project_id: str,
|
|
826
|
-
agent: Optional[str] = None,
|
|
827
|
-
task_type: Optional[str] = None,
|
|
828
|
-
embedding: Optional[List[float]] = None,
|
|
829
|
-
top_k: int = 5,
|
|
830
|
-
success_only: bool = False,
|
|
831
|
-
) -> List[Outcome]:
|
|
832
|
-
"""Get outcomes with optional vector search."""
|
|
833
|
-
where_filter = self._build_where_filter(
|
|
834
|
-
project_id=project_id,
|
|
835
|
-
agent=agent,
|
|
836
|
-
task_type=task_type,
|
|
837
|
-
success_only=success_only,
|
|
838
|
-
)
|
|
839
|
-
|
|
840
|
-
if embedding:
|
|
841
|
-
results = self._outcomes.query(
|
|
842
|
-
query_embeddings=[embedding],
|
|
843
|
-
n_results=top_k,
|
|
844
|
-
where=where_filter,
|
|
845
|
-
include=["metadatas", "documents", "embeddings"],
|
|
846
|
-
)
|
|
847
|
-
else:
|
|
848
|
-
results = self._outcomes.get(
|
|
849
|
-
where=where_filter,
|
|
850
|
-
limit=top_k,
|
|
851
|
-
include=["metadatas", "documents", "embeddings"],
|
|
852
|
-
)
|
|
853
|
-
results = self._format_get_results(results)
|
|
854
|
-
|
|
855
|
-
return self._results_to_outcomes(results)
|
|
856
|
-
|
|
857
|
-
def get_user_preferences(
|
|
858
|
-
self,
|
|
859
|
-
user_id: str,
|
|
860
|
-
category: Optional[str] = None,
|
|
861
|
-
) -> List[UserPreference]:
|
|
862
|
-
"""Get user preferences."""
|
|
863
|
-
where_filter = self._build_where_filter(user_id=user_id)
|
|
864
|
-
if category:
|
|
865
|
-
if where_filter:
|
|
866
|
-
where_filter = {"$and": [where_filter, {"category": {"$eq": category}}]}
|
|
867
|
-
else:
|
|
868
|
-
where_filter = {"category": {"$eq": category}}
|
|
869
|
-
|
|
870
|
-
results = self._preferences.get(
|
|
871
|
-
where=where_filter,
|
|
872
|
-
include=["metadatas", "documents"],
|
|
873
|
-
)
|
|
874
|
-
results = {
|
|
875
|
-
"ids": [results.get("ids", [])],
|
|
876
|
-
"metadatas": [results.get("metadatas", [])],
|
|
877
|
-
"documents": [results.get("documents", [])],
|
|
878
|
-
}
|
|
879
|
-
|
|
880
|
-
return self._results_to_preferences(results)
|
|
881
|
-
|
|
882
|
-
def get_domain_knowledge(
|
|
883
|
-
self,
|
|
884
|
-
project_id: str,
|
|
885
|
-
agent: Optional[str] = None,
|
|
886
|
-
domain: Optional[str] = None,
|
|
887
|
-
embedding: Optional[List[float]] = None,
|
|
888
|
-
top_k: int = 5,
|
|
889
|
-
) -> List[DomainKnowledge]:
|
|
890
|
-
"""Get domain knowledge with optional vector search."""
|
|
891
|
-
where_filter = self._build_where_filter(
|
|
892
|
-
project_id=project_id,
|
|
893
|
-
agent=agent,
|
|
894
|
-
domain=domain,
|
|
895
|
-
)
|
|
896
|
-
|
|
897
|
-
if embedding:
|
|
898
|
-
results = self._domain_knowledge.query(
|
|
899
|
-
query_embeddings=[embedding],
|
|
900
|
-
n_results=top_k,
|
|
901
|
-
where=where_filter,
|
|
902
|
-
include=["metadatas", "documents", "embeddings"],
|
|
903
|
-
)
|
|
904
|
-
else:
|
|
905
|
-
results = self._domain_knowledge.get(
|
|
906
|
-
where=where_filter,
|
|
907
|
-
limit=top_k,
|
|
908
|
-
include=["metadatas", "documents", "embeddings"],
|
|
909
|
-
)
|
|
910
|
-
results = self._format_get_results(results)
|
|
911
|
-
|
|
912
|
-
return self._results_to_domain_knowledge(results)
|
|
913
|
-
|
|
914
|
-
def get_anti_patterns(
|
|
915
|
-
self,
|
|
916
|
-
project_id: str,
|
|
917
|
-
agent: Optional[str] = None,
|
|
918
|
-
embedding: Optional[List[float]] = None,
|
|
919
|
-
top_k: int = 5,
|
|
920
|
-
) -> List[AntiPattern]:
|
|
921
|
-
"""Get anti-patterns with optional vector search."""
|
|
922
|
-
where_filter = self._build_where_filter(
|
|
923
|
-
project_id=project_id,
|
|
924
|
-
agent=agent,
|
|
925
|
-
)
|
|
926
|
-
|
|
927
|
-
if embedding:
|
|
928
|
-
results = self._anti_patterns.query(
|
|
929
|
-
query_embeddings=[embedding],
|
|
930
|
-
n_results=top_k,
|
|
931
|
-
where=where_filter,
|
|
932
|
-
include=["metadatas", "documents", "embeddings"],
|
|
933
|
-
)
|
|
934
|
-
else:
|
|
935
|
-
results = self._anti_patterns.get(
|
|
936
|
-
where=where_filter,
|
|
937
|
-
limit=top_k,
|
|
938
|
-
include=["metadatas", "documents", "embeddings"],
|
|
939
|
-
)
|
|
940
|
-
results = self._format_get_results(results)
|
|
941
|
-
|
|
942
|
-
return self._results_to_anti_patterns(results)
|
|
943
|
-
|
|
944
|
-
# ==================== MULTI-AGENT MEMORY SHARING ====================
|
|
945
|
-
|
|
946
|
-
def _build_agents_filter(
|
|
947
|
-
self,
|
|
948
|
-
project_id: str,
|
|
949
|
-
agents: List[str],
|
|
950
|
-
**kwargs: Any,
|
|
951
|
-
) -> Optional[Dict[str, Any]]:
|
|
952
|
-
"""Build filter for multiple agents."""
|
|
953
|
-
if not agents:
|
|
954
|
-
return None
|
|
955
|
-
|
|
956
|
-
agent_conditions = [{"agent": {"$eq": a}} for a in agents]
|
|
957
|
-
agents_filter = (
|
|
958
|
-
{"$or": agent_conditions}
|
|
959
|
-
if len(agent_conditions) > 1
|
|
960
|
-
else agent_conditions[0]
|
|
961
|
-
)
|
|
962
|
-
|
|
963
|
-
base_filter = self._build_where_filter(project_id=project_id, **kwargs)
|
|
964
|
-
|
|
965
|
-
if base_filter:
|
|
966
|
-
return {"$and": [base_filter, agents_filter]}
|
|
967
|
-
return {"$and": [{"project_id": {"$eq": project_id}}, agents_filter]}
|
|
968
|
-
|
|
969
|
-
def get_heuristics_for_agents(
|
|
970
|
-
self,
|
|
971
|
-
project_id: str,
|
|
972
|
-
agents: List[str],
|
|
973
|
-
embedding: Optional[List[float]] = None,
|
|
974
|
-
top_k: int = 5,
|
|
975
|
-
min_confidence: float = 0.0,
|
|
976
|
-
) -> List[Heuristic]:
|
|
977
|
-
"""Get heuristics from multiple agents."""
|
|
978
|
-
if not agents:
|
|
979
|
-
return []
|
|
980
|
-
|
|
981
|
-
where_filter = self._build_agents_filter(
|
|
982
|
-
project_id=project_id,
|
|
983
|
-
agents=agents,
|
|
984
|
-
min_confidence=min_confidence,
|
|
985
|
-
)
|
|
986
|
-
|
|
987
|
-
if embedding:
|
|
988
|
-
results = self._heuristics.query(
|
|
989
|
-
query_embeddings=[embedding],
|
|
990
|
-
n_results=top_k * len(agents),
|
|
991
|
-
where=where_filter,
|
|
992
|
-
include=["metadatas", "documents", "embeddings"],
|
|
993
|
-
)
|
|
994
|
-
else:
|
|
995
|
-
results = self._heuristics.get(
|
|
996
|
-
where=where_filter,
|
|
997
|
-
limit=top_k * len(agents),
|
|
998
|
-
include=["metadatas", "documents", "embeddings"],
|
|
999
|
-
)
|
|
1000
|
-
results = self._format_get_results(results)
|
|
1001
|
-
|
|
1002
|
-
return self._results_to_heuristics(results)
|
|
1003
|
-
|
|
1004
|
-
def get_outcomes_for_agents(
|
|
1005
|
-
self,
|
|
1006
|
-
project_id: str,
|
|
1007
|
-
agents: List[str],
|
|
1008
|
-
task_type: Optional[str] = None,
|
|
1009
|
-
embedding: Optional[List[float]] = None,
|
|
1010
|
-
top_k: int = 5,
|
|
1011
|
-
success_only: bool = False,
|
|
1012
|
-
) -> List[Outcome]:
|
|
1013
|
-
"""Get outcomes from multiple agents."""
|
|
1014
|
-
if not agents:
|
|
1015
|
-
return []
|
|
1016
|
-
|
|
1017
|
-
where_filter = self._build_agents_filter(
|
|
1018
|
-
project_id=project_id,
|
|
1019
|
-
agents=agents,
|
|
1020
|
-
task_type=task_type,
|
|
1021
|
-
success_only=success_only,
|
|
1022
|
-
)
|
|
1023
|
-
|
|
1024
|
-
if embedding:
|
|
1025
|
-
results = self._outcomes.query(
|
|
1026
|
-
query_embeddings=[embedding],
|
|
1027
|
-
n_results=top_k * len(agents),
|
|
1028
|
-
where=where_filter,
|
|
1029
|
-
include=["metadatas", "documents", "embeddings"],
|
|
1030
|
-
)
|
|
1031
|
-
else:
|
|
1032
|
-
results = self._outcomes.get(
|
|
1033
|
-
where=where_filter,
|
|
1034
|
-
limit=top_k * len(agents),
|
|
1035
|
-
include=["metadatas", "documents", "embeddings"],
|
|
1036
|
-
)
|
|
1037
|
-
results = self._format_get_results(results)
|
|
1038
|
-
|
|
1039
|
-
return self._results_to_outcomes(results)
|
|
1040
|
-
|
|
1041
|
-
def get_domain_knowledge_for_agents(
|
|
1042
|
-
self,
|
|
1043
|
-
project_id: str,
|
|
1044
|
-
agents: List[str],
|
|
1045
|
-
domain: Optional[str] = None,
|
|
1046
|
-
embedding: Optional[List[float]] = None,
|
|
1047
|
-
top_k: int = 5,
|
|
1048
|
-
) -> List[DomainKnowledge]:
|
|
1049
|
-
"""Get domain knowledge from multiple agents."""
|
|
1050
|
-
if not agents:
|
|
1051
|
-
return []
|
|
1052
|
-
|
|
1053
|
-
where_filter = self._build_agents_filter(
|
|
1054
|
-
project_id=project_id,
|
|
1055
|
-
agents=agents,
|
|
1056
|
-
domain=domain,
|
|
1057
|
-
)
|
|
1058
|
-
|
|
1059
|
-
if embedding:
|
|
1060
|
-
results = self._domain_knowledge.query(
|
|
1061
|
-
query_embeddings=[embedding],
|
|
1062
|
-
n_results=top_k * len(agents),
|
|
1063
|
-
where=where_filter,
|
|
1064
|
-
include=["metadatas", "documents", "embeddings"],
|
|
1065
|
-
)
|
|
1066
|
-
else:
|
|
1067
|
-
results = self._domain_knowledge.get(
|
|
1068
|
-
where=where_filter,
|
|
1069
|
-
limit=top_k * len(agents),
|
|
1070
|
-
include=["metadatas", "documents", "embeddings"],
|
|
1071
|
-
)
|
|
1072
|
-
results = self._format_get_results(results)
|
|
1073
|
-
|
|
1074
|
-
return self._results_to_domain_knowledge(results)
|
|
1075
|
-
|
|
1076
|
-
def get_anti_patterns_for_agents(
|
|
1077
|
-
self,
|
|
1078
|
-
project_id: str,
|
|
1079
|
-
agents: List[str],
|
|
1080
|
-
embedding: Optional[List[float]] = None,
|
|
1081
|
-
top_k: int = 5,
|
|
1082
|
-
) -> List[AntiPattern]:
|
|
1083
|
-
"""Get anti-patterns from multiple agents."""
|
|
1084
|
-
if not agents:
|
|
1085
|
-
return []
|
|
1086
|
-
|
|
1087
|
-
where_filter = self._build_agents_filter(
|
|
1088
|
-
project_id=project_id,
|
|
1089
|
-
agents=agents,
|
|
1090
|
-
)
|
|
1091
|
-
|
|
1092
|
-
if embedding:
|
|
1093
|
-
results = self._anti_patterns.query(
|
|
1094
|
-
query_embeddings=[embedding],
|
|
1095
|
-
n_results=top_k * len(agents),
|
|
1096
|
-
where=where_filter,
|
|
1097
|
-
include=["metadatas", "documents", "embeddings"],
|
|
1098
|
-
)
|
|
1099
|
-
else:
|
|
1100
|
-
results = self._anti_patterns.get(
|
|
1101
|
-
where=where_filter,
|
|
1102
|
-
limit=top_k * len(agents),
|
|
1103
|
-
include=["metadatas", "documents", "embeddings"],
|
|
1104
|
-
)
|
|
1105
|
-
results = self._format_get_results(results)
|
|
1106
|
-
|
|
1107
|
-
return self._results_to_anti_patterns(results)
|
|
1108
|
-
|
|
1109
|
-
# ==================== UPDATE OPERATIONS ====================
|
|
1110
|
-
|
|
1111
|
-
def update_heuristic(
|
|
1112
|
-
self,
|
|
1113
|
-
heuristic_id: str,
|
|
1114
|
-
updates: Dict[str, Any],
|
|
1115
|
-
) -> bool:
|
|
1116
|
-
"""Update a heuristic's fields."""
|
|
1117
|
-
if not updates:
|
|
1118
|
-
return False
|
|
1119
|
-
|
|
1120
|
-
try:
|
|
1121
|
-
# Get existing heuristic
|
|
1122
|
-
existing = self._heuristics.get(
|
|
1123
|
-
ids=[heuristic_id], include=["metadatas", "documents", "embeddings"]
|
|
1124
|
-
)
|
|
1125
|
-
if not existing or not existing.get("ids"):
|
|
1126
|
-
return False
|
|
1127
|
-
|
|
1128
|
-
metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
|
|
1129
|
-
document = existing["documents"][0] if existing.get("documents") else ""
|
|
1130
|
-
emb_list = existing.get("embeddings")
|
|
1131
|
-
embedding = (
|
|
1132
|
-
emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
|
|
1133
|
-
)
|
|
1134
|
-
|
|
1135
|
-
# Apply updates
|
|
1136
|
-
for key, value in updates.items():
|
|
1137
|
-
if key == "condition":
|
|
1138
|
-
metadata["condition"] = value
|
|
1139
|
-
# Update document as well
|
|
1140
|
-
parts = document.split("\n", 1)
|
|
1141
|
-
document = f"{value}\n{parts[1] if len(parts) > 1 else ''}"
|
|
1142
|
-
elif key == "strategy":
|
|
1143
|
-
metadata["strategy"] = value
|
|
1144
|
-
parts = document.split("\n", 1)
|
|
1145
|
-
document = f"{parts[0] if parts else ''}\n{value}"
|
|
1146
|
-
elif key == "metadata":
|
|
1147
|
-
metadata["extra_metadata"] = json.dumps(value)
|
|
1148
|
-
elif key in ("last_validated", "created_at") and isinstance(
|
|
1149
|
-
value, datetime
|
|
1150
|
-
):
|
|
1151
|
-
metadata[key] = value.isoformat()
|
|
1152
|
-
elif key in metadata:
|
|
1153
|
-
metadata[key] = value
|
|
1154
|
-
|
|
1155
|
-
# Upsert with updated values
|
|
1156
|
-
if self._has_embedding(embedding):
|
|
1157
|
-
self._heuristics.upsert(
|
|
1158
|
-
ids=[heuristic_id],
|
|
1159
|
-
embeddings=[
|
|
1160
|
-
list(embedding) if hasattr(embedding, "__iter__") else embedding
|
|
1161
|
-
],
|
|
1162
|
-
metadatas=[metadata],
|
|
1163
|
-
documents=[document],
|
|
1164
|
-
)
|
|
1165
|
-
else:
|
|
1166
|
-
self._heuristics.upsert(
|
|
1167
|
-
ids=[heuristic_id],
|
|
1168
|
-
metadatas=[metadata],
|
|
1169
|
-
documents=[document],
|
|
1170
|
-
)
|
|
1171
|
-
|
|
1172
|
-
return True
|
|
1173
|
-
except Exception as e:
|
|
1174
|
-
logger.
|
|
1175
|
-
return False
|
|
1176
|
-
|
|
1177
|
-
def increment_heuristic_occurrence(
|
|
1178
|
-
self,
|
|
1179
|
-
heuristic_id: str,
|
|
1180
|
-
success: bool,
|
|
1181
|
-
) -> bool:
|
|
1182
|
-
"""Increment heuristic occurrence count."""
|
|
1183
|
-
try:
|
|
1184
|
-
existing = self._heuristics.get(
|
|
1185
|
-
ids=[heuristic_id], include=["metadatas", "documents", "embeddings"]
|
|
1186
|
-
)
|
|
1187
|
-
if not existing or not existing.get("ids"):
|
|
1188
|
-
return False
|
|
1189
|
-
|
|
1190
|
-
metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
|
|
1191
|
-
document = existing["documents"][0] if existing.get("documents") else ""
|
|
1192
|
-
emb_list = existing.get("embeddings")
|
|
1193
|
-
embedding = (
|
|
1194
|
-
emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
|
|
1195
|
-
)
|
|
1196
|
-
|
|
1197
|
-
metadata["occurrence_count"] = metadata.get("occurrence_count", 0) + 1
|
|
1198
|
-
if success:
|
|
1199
|
-
metadata["success_count"] = metadata.get("success_count", 0) + 1
|
|
1200
|
-
metadata["last_validated"] = datetime.now(timezone.utc).isoformat()
|
|
1201
|
-
|
|
1202
|
-
if self._has_embedding(embedding):
|
|
1203
|
-
self._heuristics.upsert(
|
|
1204
|
-
ids=[heuristic_id],
|
|
1205
|
-
embeddings=[
|
|
1206
|
-
list(embedding) if hasattr(embedding, "__iter__") else embedding
|
|
1207
|
-
],
|
|
1208
|
-
metadatas=[metadata],
|
|
1209
|
-
documents=[document],
|
|
1210
|
-
)
|
|
1211
|
-
else:
|
|
1212
|
-
self._heuristics.upsert(
|
|
1213
|
-
ids=[heuristic_id],
|
|
1214
|
-
metadatas=[metadata],
|
|
1215
|
-
documents=[document],
|
|
1216
|
-
)
|
|
1217
|
-
|
|
1218
|
-
return True
|
|
1219
|
-
except Exception as e:
|
|
1220
|
-
logger.
|
|
1221
|
-
return False
|
|
1222
|
-
|
|
1223
|
-
def update_heuristic_confidence(
|
|
1224
|
-
self,
|
|
1225
|
-
heuristic_id: str,
|
|
1226
|
-
new_confidence: float,
|
|
1227
|
-
) -> bool:
|
|
1228
|
-
"""Update a heuristic's confidence value."""
|
|
1229
|
-
return self.update_heuristic(heuristic_id, {"confidence": new_confidence})
|
|
1230
|
-
|
|
1231
|
-
def update_knowledge_confidence(
|
|
1232
|
-
self,
|
|
1233
|
-
knowledge_id: str,
|
|
1234
|
-
new_confidence: float,
|
|
1235
|
-
) -> bool:
|
|
1236
|
-
"""Update domain knowledge confidence value."""
|
|
1237
|
-
try:
|
|
1238
|
-
existing = self._domain_knowledge.get(
|
|
1239
|
-
ids=[knowledge_id], include=["metadatas", "documents", "embeddings"]
|
|
1240
|
-
)
|
|
1241
|
-
if not existing or not existing.get("ids"):
|
|
1242
|
-
return False
|
|
1243
|
-
|
|
1244
|
-
metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
|
|
1245
|
-
document = existing["documents"][0] if existing.get("documents") else ""
|
|
1246
|
-
emb_list = existing.get("embeddings")
|
|
1247
|
-
embedding = (
|
|
1248
|
-
emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
|
|
1249
|
-
)
|
|
1250
|
-
|
|
1251
|
-
metadata["confidence"] = new_confidence
|
|
1252
|
-
|
|
1253
|
-
if self._has_embedding(embedding):
|
|
1254
|
-
self._domain_knowledge.upsert(
|
|
1255
|
-
ids=[knowledge_id],
|
|
1256
|
-
embeddings=[
|
|
1257
|
-
list(embedding) if hasattr(embedding, "__iter__") else embedding
|
|
1258
|
-
],
|
|
1259
|
-
metadatas=[metadata],
|
|
1260
|
-
documents=[document],
|
|
1261
|
-
)
|
|
1262
|
-
else:
|
|
1263
|
-
self._domain_knowledge.upsert(
|
|
1264
|
-
ids=[knowledge_id],
|
|
1265
|
-
metadatas=[metadata],
|
|
1266
|
-
documents=[document],
|
|
1267
|
-
)
|
|
1268
|
-
|
|
1269
|
-
return True
|
|
1270
|
-
except Exception as e:
|
|
1271
|
-
logger.
|
|
1272
|
-
return False
|
|
1273
|
-
|
|
1274
|
-
# ==================== DELETE OPERATIONS ====================
|
|
1275
|
-
|
|
1276
|
-
def delete_heuristic(self, heuristic_id: str) -> bool:
|
|
1277
|
-
"""Delete a heuristic by ID."""
|
|
1278
|
-
try:
|
|
1279
|
-
existing = self._heuristics.get(ids=[heuristic_id])
|
|
1280
|
-
if not existing or not existing.get("ids"):
|
|
1281
|
-
return False
|
|
1282
|
-
self._heuristics.delete(ids=[heuristic_id])
|
|
1283
|
-
logger.debug(f"Deleted heuristic: {heuristic_id}")
|
|
1284
|
-
return True
|
|
1285
|
-
except Exception as e:
|
|
1286
|
-
logger.
|
|
1287
|
-
return False
|
|
1288
|
-
|
|
1289
|
-
def delete_outcome(self, outcome_id: str) -> bool:
|
|
1290
|
-
"""Delete an outcome by ID."""
|
|
1291
|
-
try:
|
|
1292
|
-
existing = self._outcomes.get(ids=[outcome_id])
|
|
1293
|
-
if not existing or not existing.get("ids"):
|
|
1294
|
-
return False
|
|
1295
|
-
self._outcomes.delete(ids=[outcome_id])
|
|
1296
|
-
logger.debug(f"Deleted outcome: {outcome_id}")
|
|
1297
|
-
return True
|
|
1298
|
-
except Exception as e:
|
|
1299
|
-
logger.
|
|
1300
|
-
return False
|
|
1301
|
-
|
|
1302
|
-
def delete_domain_knowledge(self, knowledge_id: str) -> bool:
|
|
1303
|
-
"""Delete domain knowledge by ID."""
|
|
1304
|
-
try:
|
|
1305
|
-
existing = self._domain_knowledge.get(ids=[knowledge_id])
|
|
1306
|
-
if not existing or not existing.get("ids"):
|
|
1307
|
-
return False
|
|
1308
|
-
self._domain_knowledge.delete(ids=[knowledge_id])
|
|
1309
|
-
logger.debug(f"Deleted domain knowledge: {knowledge_id}")
|
|
1310
|
-
return True
|
|
1311
|
-
except Exception as e:
|
|
1312
|
-
logger.
|
|
1313
|
-
return False
|
|
1314
|
-
|
|
1315
|
-
def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
|
|
1316
|
-
"""Delete an anti-pattern by ID."""
|
|
1317
|
-
try:
|
|
1318
|
-
existing = self._anti_patterns.get(ids=[anti_pattern_id])
|
|
1319
|
-
if not existing or not existing.get("ids"):
|
|
1320
|
-
return False
|
|
1321
|
-
self._anti_patterns.delete(ids=[anti_pattern_id])
|
|
1322
|
-
logger.debug(f"Deleted anti-pattern: {anti_pattern_id}")
|
|
1323
|
-
return True
|
|
1324
|
-
except Exception as e:
|
|
1325
|
-
logger.
|
|
1326
|
-
return False
|
|
1327
|
-
|
|
1328
|
-
def delete_outcomes_older_than(
|
|
1329
|
-
self,
|
|
1330
|
-
project_id: str,
|
|
1331
|
-
older_than: datetime,
|
|
1332
|
-
agent: Optional[str] = None,
|
|
1333
|
-
) -> int:
|
|
1334
|
-
"""Delete old outcomes."""
|
|
1335
|
-
where_filter = self._build_where_filter(project_id=project_id, agent=agent)
|
|
1336
|
-
|
|
1337
|
-
# Get all matching outcomes
|
|
1338
|
-
results = self._outcomes.get(
|
|
1339
|
-
where=where_filter,
|
|
1340
|
-
include=["metadatas"],
|
|
1341
|
-
)
|
|
1342
|
-
|
|
1343
|
-
if not results or not results.get("ids"):
|
|
1344
|
-
return 0
|
|
1345
|
-
|
|
1346
|
-
older_than_str = older_than.isoformat()
|
|
1347
|
-
ids_to_delete = []
|
|
1348
|
-
|
|
1349
|
-
for i, id_ in enumerate(results["ids"]):
|
|
1350
|
-
meta = (
|
|
1351
|
-
results["metadatas"][i] if i < len(results.get("metadatas", [])) else {}
|
|
1352
|
-
)
|
|
1353
|
-
timestamp_str = meta.get("timestamp", "")
|
|
1354
|
-
if timestamp_str and timestamp_str < older_than_str:
|
|
1355
|
-
ids_to_delete.append(id_)
|
|
1356
|
-
|
|
1357
|
-
if ids_to_delete:
|
|
1358
|
-
self._outcomes.delete(ids=ids_to_delete)
|
|
1359
|
-
|
|
1360
|
-
logger.info(f"Deleted {len(ids_to_delete)} old outcomes")
|
|
1361
|
-
return len(ids_to_delete)
|
|
1362
|
-
|
|
1363
|
-
def delete_low_confidence_heuristics(
|
|
1364
|
-
self,
|
|
1365
|
-
project_id: str,
|
|
1366
|
-
below_confidence: float,
|
|
1367
|
-
agent: Optional[str] = None,
|
|
1368
|
-
) -> int:
|
|
1369
|
-
"""Delete low-confidence heuristics."""
|
|
1370
|
-
where_filter = self._build_where_filter(project_id=project_id, agent=agent)
|
|
1371
|
-
|
|
1372
|
-
# Get all matching heuristics
|
|
1373
|
-
results = self._heuristics.get(
|
|
1374
|
-
where=where_filter,
|
|
1375
|
-
include=["metadatas"],
|
|
1376
|
-
)
|
|
1377
|
-
|
|
1378
|
-
if not results or not results.get("ids"):
|
|
1379
|
-
return 0
|
|
1380
|
-
|
|
1381
|
-
ids_to_delete = []
|
|
1382
|
-
|
|
1383
|
-
for i, id_ in enumerate(results["ids"]):
|
|
1384
|
-
meta = (
|
|
1385
|
-
results["metadatas"][i] if i < len(results.get("metadatas", [])) else {}
|
|
1386
|
-
)
|
|
1387
|
-
confidence = meta.get("confidence", 0.0)
|
|
1388
|
-
if confidence < below_confidence:
|
|
1389
|
-
ids_to_delete.append(id_)
|
|
1390
|
-
|
|
1391
|
-
if ids_to_delete:
|
|
1392
|
-
self._heuristics.delete(ids=ids_to_delete)
|
|
1393
|
-
|
|
1394
|
-
logger.info(f"Deleted {len(ids_to_delete)} low-confidence heuristics")
|
|
1395
|
-
return len(ids_to_delete)
|
|
1396
|
-
|
|
1397
|
-
# ==================== STATS ====================
|
|
1398
|
-
|
|
1399
|
-
def get_stats(
|
|
1400
|
-
self,
|
|
1401
|
-
project_id: str,
|
|
1402
|
-
agent: Optional[str] = None,
|
|
1403
|
-
) -> Dict[str, Any]:
|
|
1404
|
-
"""Get memory statistics."""
|
|
1405
|
-
stats = {
|
|
1406
|
-
"project_id": project_id,
|
|
1407
|
-
"agent": agent,
|
|
1408
|
-
"storage_type": "chroma",
|
|
1409
|
-
"mode": self._mode,
|
|
1410
|
-
}
|
|
1411
|
-
|
|
1412
|
-
where_filter = self._build_where_filter(project_id=project_id, agent=agent)
|
|
1413
|
-
|
|
1414
|
-
# Count items in each collection
|
|
1415
|
-
for name, collection in [
|
|
1416
|
-
("heuristics", self._heuristics),
|
|
1417
|
-
("outcomes", self._outcomes),
|
|
1418
|
-
("domain_knowledge", self._domain_knowledge),
|
|
1419
|
-
("anti_patterns", self._anti_patterns),
|
|
1420
|
-
]:
|
|
1421
|
-
try:
|
|
1422
|
-
results = collection.get(where=where_filter)
|
|
1423
|
-
stats[f"{name}_count"] = len(results.get("ids", []))
|
|
1424
|
-
except Exception:
|
|
1425
|
-
stats[f"{name}_count"] = 0
|
|
1426
|
-
|
|
1427
|
-
# Preferences don't have project_id filter
|
|
1428
|
-
try:
|
|
1429
|
-
results = self._preferences.get()
|
|
1430
|
-
stats["preferences_count"] = len(results.get("ids", []))
|
|
1431
|
-
except Exception:
|
|
1432
|
-
stats["preferences_count"] = 0
|
|
1433
|
-
|
|
1434
|
-
stats["total_count"] = sum(
|
|
1435
|
-
stats.get(k, 0) for k in stats if k.endswith("_count")
|
|
1436
|
-
)
|
|
1437
|
-
|
|
1438
|
-
return stats
|
|
1439
|
-
|
|
1440
|
-
def close(self):
|
|
1441
|
-
"""Close the Chroma client (if applicable)."""
|
|
1442
|
-
# ChromaDB handles cleanup automatically
|
|
1443
|
-
logger.info("ChromaDB storage closed")
|
|
1
|
+
"""
|
|
2
|
+
ALMA Chroma Storage Backend.
|
|
3
|
+
|
|
4
|
+
Vector database storage using ChromaDB for semantic search capabilities.
|
|
5
|
+
Supports both persistent local storage and client-server mode.
|
|
6
|
+
|
|
7
|
+
Recommended for:
|
|
8
|
+
- Semantic search-focused deployments
|
|
9
|
+
- Local development with vector search
|
|
10
|
+
- Small to medium scale applications
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
from typing import Any, Dict, List, Optional
|
|
18
|
+
|
|
19
|
+
from alma.storage.base import StorageBackend
|
|
20
|
+
from alma.types import (
|
|
21
|
+
AntiPattern,
|
|
22
|
+
DomainKnowledge,
|
|
23
|
+
Heuristic,
|
|
24
|
+
Outcome,
|
|
25
|
+
UserPreference,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
# Try to import chromadb
|
|
31
|
+
try:
|
|
32
|
+
import chromadb
|
|
33
|
+
from chromadb.config import Settings
|
|
34
|
+
|
|
35
|
+
CHROMADB_AVAILABLE = True
|
|
36
|
+
except ImportError:
|
|
37
|
+
CHROMADB_AVAILABLE = False
|
|
38
|
+
logger.warning(
|
|
39
|
+
"chromadb not installed. Install with: pip install 'alma-memory[chroma]'"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ChromaStorage(StorageBackend):
|
|
44
|
+
"""
|
|
45
|
+
ChromaDB storage backend with native vector search.
|
|
46
|
+
|
|
47
|
+
Uses ChromaDB collections for each memory type with built-in
|
|
48
|
+
embedding storage and similarity search.
|
|
49
|
+
|
|
50
|
+
Collections:
|
|
51
|
+
- alma_heuristics: Learned strategies
|
|
52
|
+
- alma_outcomes: Task execution records
|
|
53
|
+
- alma_preferences: User preferences
|
|
54
|
+
- alma_domain_knowledge: Domain facts
|
|
55
|
+
- alma_anti_patterns: Patterns to avoid
|
|
56
|
+
|
|
57
|
+
Modes:
|
|
58
|
+
- Persistent: Local storage with persist_directory
|
|
59
|
+
- Client-Server: Remote server with host/port
|
|
60
|
+
- Ephemeral: In-memory (for testing)
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
# Collection names
|
|
64
|
+
HEURISTICS_COLLECTION = "alma_heuristics"
|
|
65
|
+
OUTCOMES_COLLECTION = "alma_outcomes"
|
|
66
|
+
PREFERENCES_COLLECTION = "alma_preferences"
|
|
67
|
+
DOMAIN_KNOWLEDGE_COLLECTION = "alma_domain_knowledge"
|
|
68
|
+
ANTI_PATTERNS_COLLECTION = "alma_anti_patterns"
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
persist_directory: Optional[str] = None,
|
|
73
|
+
host: Optional[str] = None,
|
|
74
|
+
port: Optional[int] = None,
|
|
75
|
+
embedding_dim: int = 384,
|
|
76
|
+
collection_metadata: Optional[Dict[str, Any]] = None,
|
|
77
|
+
):
|
|
78
|
+
"""
|
|
79
|
+
Initialize Chroma storage.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
persist_directory: Path for persistent local storage (mutually exclusive with host/port)
|
|
83
|
+
host: Chroma server host (mutually exclusive with persist_directory)
|
|
84
|
+
port: Chroma server port (required if host is specified)
|
|
85
|
+
embedding_dim: Dimension of embedding vectors (for validation)
|
|
86
|
+
collection_metadata: Optional metadata for collections (e.g., distance function)
|
|
87
|
+
"""
|
|
88
|
+
if not CHROMADB_AVAILABLE:
|
|
89
|
+
raise ImportError(
|
|
90
|
+
"chromadb not installed. Install with: pip install 'alma-memory[chroma]'"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
self.embedding_dim = embedding_dim
|
|
94
|
+
self._collection_metadata = collection_metadata or {
|
|
95
|
+
"hnsw:space": "cosine" # Use cosine similarity
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
# Initialize client based on mode
|
|
99
|
+
if host and port:
|
|
100
|
+
# Client-server mode
|
|
101
|
+
self._client = chromadb.HttpClient(host=host, port=port)
|
|
102
|
+
self._mode = "client-server"
|
|
103
|
+
logger.info(f"ChromaDB client-server mode: {host}:{port}")
|
|
104
|
+
elif persist_directory:
|
|
105
|
+
# Persistent local mode
|
|
106
|
+
self._client = chromadb.PersistentClient(
|
|
107
|
+
path=persist_directory,
|
|
108
|
+
settings=Settings(anonymized_telemetry=False),
|
|
109
|
+
)
|
|
110
|
+
self._mode = "persistent"
|
|
111
|
+
logger.info(f"ChromaDB persistent mode: {persist_directory}")
|
|
112
|
+
else:
|
|
113
|
+
# Ephemeral mode (in-memory, for testing)
|
|
114
|
+
self._client = chromadb.Client(
|
|
115
|
+
settings=Settings(anonymized_telemetry=False),
|
|
116
|
+
)
|
|
117
|
+
self._mode = "ephemeral"
|
|
118
|
+
logger.info("ChromaDB ephemeral mode (in-memory)")
|
|
119
|
+
|
|
120
|
+
# Initialize collections
|
|
121
|
+
self._init_collections()
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def from_config(cls, config: Dict[str, Any]) -> "ChromaStorage":
|
|
125
|
+
"""Create instance from configuration."""
|
|
126
|
+
chroma_config = config.get("chroma", {})
|
|
127
|
+
|
|
128
|
+
# Support environment variable expansion
|
|
129
|
+
def get_value(key: str, default: Any = None) -> Any:
|
|
130
|
+
value = chroma_config.get(key, default)
|
|
131
|
+
if (
|
|
132
|
+
isinstance(value, str)
|
|
133
|
+
and value.startswith("${")
|
|
134
|
+
and value.endswith("}")
|
|
135
|
+
):
|
|
136
|
+
env_var = value[2:-1]
|
|
137
|
+
return os.environ.get(env_var, default)
|
|
138
|
+
return value
|
|
139
|
+
|
|
140
|
+
persist_directory = get_value("persist_directory")
|
|
141
|
+
host = get_value("host")
|
|
142
|
+
port = get_value("port")
|
|
143
|
+
|
|
144
|
+
if port is not None:
|
|
145
|
+
port = int(port)
|
|
146
|
+
|
|
147
|
+
return cls(
|
|
148
|
+
persist_directory=persist_directory,
|
|
149
|
+
host=host,
|
|
150
|
+
port=port,
|
|
151
|
+
embedding_dim=int(config.get("embedding_dim", 384)),
|
|
152
|
+
collection_metadata=chroma_config.get("collection_metadata"),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def _init_collections(self):
|
|
156
|
+
"""Initialize or get all collections."""
|
|
157
|
+
self._heuristics = self._client.get_or_create_collection(
|
|
158
|
+
name=self.HEURISTICS_COLLECTION,
|
|
159
|
+
metadata=self._collection_metadata,
|
|
160
|
+
)
|
|
161
|
+
self._outcomes = self._client.get_or_create_collection(
|
|
162
|
+
name=self.OUTCOMES_COLLECTION,
|
|
163
|
+
metadata=self._collection_metadata,
|
|
164
|
+
)
|
|
165
|
+
self._preferences = self._client.get_or_create_collection(
|
|
166
|
+
name=self.PREFERENCES_COLLECTION,
|
|
167
|
+
metadata=self._collection_metadata,
|
|
168
|
+
)
|
|
169
|
+
self._domain_knowledge = self._client.get_or_create_collection(
|
|
170
|
+
name=self.DOMAIN_KNOWLEDGE_COLLECTION,
|
|
171
|
+
metadata=self._collection_metadata,
|
|
172
|
+
)
|
|
173
|
+
self._anti_patterns = self._client.get_or_create_collection(
|
|
174
|
+
name=self.ANTI_PATTERNS_COLLECTION,
|
|
175
|
+
metadata=self._collection_metadata,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def _format_get_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
|
|
179
|
+
"""Reformat get() results to match query() format."""
|
|
180
|
+
emb = results.get("embeddings")
|
|
181
|
+
has_embeddings = emb is not None and (
|
|
182
|
+
(hasattr(emb, "__len__") and len(emb) > 0)
|
|
183
|
+
or (hasattr(emb, "size") and emb.size > 0)
|
|
184
|
+
)
|
|
185
|
+
return {
|
|
186
|
+
"ids": [results.get("ids", [])],
|
|
187
|
+
"metadatas": [results.get("metadatas", [])],
|
|
188
|
+
"documents": [results.get("documents", [])],
|
|
189
|
+
"embeddings": [emb] if has_embeddings else None,
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
def _datetime_to_str(self, dt: Optional[datetime]) -> Optional[str]:
|
|
193
|
+
"""Convert datetime to ISO string for storage."""
|
|
194
|
+
if dt is None:
|
|
195
|
+
return None
|
|
196
|
+
return dt.isoformat()
|
|
197
|
+
|
|
198
|
+
def _str_to_datetime(self, s: Optional[str]) -> Optional[datetime]:
|
|
199
|
+
"""Convert ISO string to datetime."""
|
|
200
|
+
if s is None:
|
|
201
|
+
return None
|
|
202
|
+
try:
|
|
203
|
+
return datetime.fromisoformat(s.replace("Z", "+00:00"))
|
|
204
|
+
except (ValueError, AttributeError):
|
|
205
|
+
return datetime.now(timezone.utc)
|
|
206
|
+
|
|
207
|
+
# ==================== WRITE OPERATIONS ====================
|
|
208
|
+
|
|
209
|
+
def save_heuristic(self, heuristic: Heuristic) -> str:
|
|
210
|
+
"""Save a heuristic."""
|
|
211
|
+
metadata = {
|
|
212
|
+
"agent": heuristic.agent,
|
|
213
|
+
"project_id": heuristic.project_id,
|
|
214
|
+
"condition": heuristic.condition,
|
|
215
|
+
"strategy": heuristic.strategy,
|
|
216
|
+
"confidence": heuristic.confidence,
|
|
217
|
+
"occurrence_count": heuristic.occurrence_count,
|
|
218
|
+
"success_count": heuristic.success_count,
|
|
219
|
+
"last_validated": self._datetime_to_str(heuristic.last_validated),
|
|
220
|
+
"created_at": self._datetime_to_str(heuristic.created_at),
|
|
221
|
+
"extra_metadata": json.dumps(heuristic.metadata)
|
|
222
|
+
if heuristic.metadata
|
|
223
|
+
else "{}",
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
# Chroma requires documents - use condition + strategy as document
|
|
227
|
+
document = f"{heuristic.condition}\n{heuristic.strategy}"
|
|
228
|
+
|
|
229
|
+
if heuristic.embedding:
|
|
230
|
+
self._heuristics.upsert(
|
|
231
|
+
ids=[heuristic.id],
|
|
232
|
+
embeddings=[heuristic.embedding],
|
|
233
|
+
metadatas=[metadata],
|
|
234
|
+
documents=[document],
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
self._heuristics.upsert(
|
|
238
|
+
ids=[heuristic.id],
|
|
239
|
+
metadatas=[metadata],
|
|
240
|
+
documents=[document],
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
logger.debug(f"Saved heuristic: {heuristic.id}")
|
|
244
|
+
return heuristic.id
|
|
245
|
+
|
|
246
|
+
def save_outcome(self, outcome: Outcome) -> str:
|
|
247
|
+
"""Save an outcome."""
|
|
248
|
+
metadata = {
|
|
249
|
+
"agent": outcome.agent,
|
|
250
|
+
"project_id": outcome.project_id,
|
|
251
|
+
"task_type": outcome.task_type or "general",
|
|
252
|
+
"success": outcome.success,
|
|
253
|
+
"strategy_used": outcome.strategy_used or "",
|
|
254
|
+
"duration_ms": outcome.duration_ms or 0,
|
|
255
|
+
"error_message": outcome.error_message or "",
|
|
256
|
+
"user_feedback": outcome.user_feedback or "",
|
|
257
|
+
"timestamp": self._datetime_to_str(outcome.timestamp),
|
|
258
|
+
"extra_metadata": json.dumps(outcome.metadata)
|
|
259
|
+
if outcome.metadata
|
|
260
|
+
else "{}",
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
document = outcome.task_description
|
|
264
|
+
|
|
265
|
+
if outcome.embedding:
|
|
266
|
+
self._outcomes.upsert(
|
|
267
|
+
ids=[outcome.id],
|
|
268
|
+
embeddings=[outcome.embedding],
|
|
269
|
+
metadatas=[metadata],
|
|
270
|
+
documents=[document],
|
|
271
|
+
)
|
|
272
|
+
else:
|
|
273
|
+
self._outcomes.upsert(
|
|
274
|
+
ids=[outcome.id],
|
|
275
|
+
metadatas=[metadata],
|
|
276
|
+
documents=[document],
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
logger.debug(f"Saved outcome: {outcome.id}")
|
|
280
|
+
return outcome.id
|
|
281
|
+
|
|
282
|
+
def save_user_preference(self, preference: UserPreference) -> str:
|
|
283
|
+
"""Save a user preference."""
|
|
284
|
+
metadata = {
|
|
285
|
+
"user_id": preference.user_id,
|
|
286
|
+
"category": preference.category or "general",
|
|
287
|
+
"source": preference.source or "unknown",
|
|
288
|
+
"confidence": preference.confidence,
|
|
289
|
+
"timestamp": self._datetime_to_str(preference.timestamp),
|
|
290
|
+
"extra_metadata": json.dumps(preference.metadata)
|
|
291
|
+
if preference.metadata
|
|
292
|
+
else "{}",
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
document = preference.preference
|
|
296
|
+
|
|
297
|
+
self._preferences.upsert(
|
|
298
|
+
ids=[preference.id],
|
|
299
|
+
metadatas=[metadata],
|
|
300
|
+
documents=[document],
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
logger.debug(f"Saved preference: {preference.id}")
|
|
304
|
+
return preference.id
|
|
305
|
+
|
|
306
|
+
def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
|
|
307
|
+
"""Save domain knowledge."""
|
|
308
|
+
metadata = {
|
|
309
|
+
"agent": knowledge.agent,
|
|
310
|
+
"project_id": knowledge.project_id,
|
|
311
|
+
"domain": knowledge.domain or "general",
|
|
312
|
+
"source": knowledge.source or "unknown",
|
|
313
|
+
"confidence": knowledge.confidence,
|
|
314
|
+
"last_verified": self._datetime_to_str(knowledge.last_verified),
|
|
315
|
+
"extra_metadata": json.dumps(knowledge.metadata)
|
|
316
|
+
if knowledge.metadata
|
|
317
|
+
else "{}",
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
document = knowledge.fact
|
|
321
|
+
|
|
322
|
+
if knowledge.embedding:
|
|
323
|
+
self._domain_knowledge.upsert(
|
|
324
|
+
ids=[knowledge.id],
|
|
325
|
+
embeddings=[knowledge.embedding],
|
|
326
|
+
metadatas=[metadata],
|
|
327
|
+
documents=[document],
|
|
328
|
+
)
|
|
329
|
+
else:
|
|
330
|
+
self._domain_knowledge.upsert(
|
|
331
|
+
ids=[knowledge.id],
|
|
332
|
+
metadatas=[metadata],
|
|
333
|
+
documents=[document],
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
logger.debug(f"Saved domain knowledge: {knowledge.id}")
|
|
337
|
+
return knowledge.id
|
|
338
|
+
|
|
339
|
+
def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
|
|
340
|
+
"""Save an anti-pattern."""
|
|
341
|
+
metadata = {
|
|
342
|
+
"agent": anti_pattern.agent,
|
|
343
|
+
"project_id": anti_pattern.project_id,
|
|
344
|
+
"why_bad": anti_pattern.why_bad or "",
|
|
345
|
+
"better_alternative": anti_pattern.better_alternative or "",
|
|
346
|
+
"occurrence_count": anti_pattern.occurrence_count,
|
|
347
|
+
"last_seen": self._datetime_to_str(anti_pattern.last_seen),
|
|
348
|
+
"created_at": self._datetime_to_str(anti_pattern.created_at),
|
|
349
|
+
"extra_metadata": json.dumps(anti_pattern.metadata)
|
|
350
|
+
if anti_pattern.metadata
|
|
351
|
+
else "{}",
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
document = anti_pattern.pattern
|
|
355
|
+
|
|
356
|
+
if anti_pattern.embedding:
|
|
357
|
+
self._anti_patterns.upsert(
|
|
358
|
+
ids=[anti_pattern.id],
|
|
359
|
+
embeddings=[anti_pattern.embedding],
|
|
360
|
+
metadatas=[metadata],
|
|
361
|
+
documents=[document],
|
|
362
|
+
)
|
|
363
|
+
else:
|
|
364
|
+
self._anti_patterns.upsert(
|
|
365
|
+
ids=[anti_pattern.id],
|
|
366
|
+
metadatas=[metadata],
|
|
367
|
+
documents=[document],
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
|
|
371
|
+
return anti_pattern.id
|
|
372
|
+
|
|
373
|
+
# ==================== BATCH WRITE OPERATIONS ====================
|
|
374
|
+
|
|
375
|
+
def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
|
|
376
|
+
"""Save multiple heuristics in a batch."""
|
|
377
|
+
if not heuristics:
|
|
378
|
+
return []
|
|
379
|
+
|
|
380
|
+
ids = []
|
|
381
|
+
embeddings = []
|
|
382
|
+
metadatas = []
|
|
383
|
+
documents = []
|
|
384
|
+
has_embeddings = False
|
|
385
|
+
|
|
386
|
+
for h in heuristics:
|
|
387
|
+
ids.append(h.id)
|
|
388
|
+
metadatas.append(
|
|
389
|
+
{
|
|
390
|
+
"agent": h.agent,
|
|
391
|
+
"project_id": h.project_id,
|
|
392
|
+
"condition": h.condition,
|
|
393
|
+
"strategy": h.strategy,
|
|
394
|
+
"confidence": h.confidence,
|
|
395
|
+
"occurrence_count": h.occurrence_count,
|
|
396
|
+
"success_count": h.success_count,
|
|
397
|
+
"last_validated": self._datetime_to_str(h.last_validated),
|
|
398
|
+
"created_at": self._datetime_to_str(h.created_at),
|
|
399
|
+
"extra_metadata": json.dumps(h.metadata) if h.metadata else "{}",
|
|
400
|
+
}
|
|
401
|
+
)
|
|
402
|
+
documents.append(f"{h.condition}\n{h.strategy}")
|
|
403
|
+
if h.embedding:
|
|
404
|
+
embeddings.append(h.embedding)
|
|
405
|
+
has_embeddings = True
|
|
406
|
+
else:
|
|
407
|
+
embeddings.append(None)
|
|
408
|
+
|
|
409
|
+
if has_embeddings and all(e is not None for e in embeddings):
|
|
410
|
+
self._heuristics.upsert(
|
|
411
|
+
ids=ids,
|
|
412
|
+
embeddings=embeddings,
|
|
413
|
+
metadatas=metadatas,
|
|
414
|
+
documents=documents,
|
|
415
|
+
)
|
|
416
|
+
else:
|
|
417
|
+
self._heuristics.upsert(
|
|
418
|
+
ids=ids,
|
|
419
|
+
metadatas=metadatas,
|
|
420
|
+
documents=documents,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
logger.debug(f"Batch saved {len(heuristics)} heuristics")
|
|
424
|
+
return ids
|
|
425
|
+
|
|
426
|
+
def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
|
|
427
|
+
"""Save multiple outcomes in a batch."""
|
|
428
|
+
if not outcomes:
|
|
429
|
+
return []
|
|
430
|
+
|
|
431
|
+
ids = []
|
|
432
|
+
embeddings = []
|
|
433
|
+
metadatas = []
|
|
434
|
+
documents = []
|
|
435
|
+
has_embeddings = False
|
|
436
|
+
|
|
437
|
+
for o in outcomes:
|
|
438
|
+
ids.append(o.id)
|
|
439
|
+
metadatas.append(
|
|
440
|
+
{
|
|
441
|
+
"agent": o.agent,
|
|
442
|
+
"project_id": o.project_id,
|
|
443
|
+
"task_type": o.task_type or "general",
|
|
444
|
+
"success": o.success,
|
|
445
|
+
"strategy_used": o.strategy_used or "",
|
|
446
|
+
"duration_ms": o.duration_ms or 0,
|
|
447
|
+
"error_message": o.error_message or "",
|
|
448
|
+
"user_feedback": o.user_feedback or "",
|
|
449
|
+
"timestamp": self._datetime_to_str(o.timestamp),
|
|
450
|
+
"extra_metadata": json.dumps(o.metadata) if o.metadata else "{}",
|
|
451
|
+
}
|
|
452
|
+
)
|
|
453
|
+
documents.append(o.task_description)
|
|
454
|
+
if o.embedding:
|
|
455
|
+
embeddings.append(o.embedding)
|
|
456
|
+
has_embeddings = True
|
|
457
|
+
else:
|
|
458
|
+
embeddings.append(None)
|
|
459
|
+
|
|
460
|
+
if has_embeddings and all(e is not None for e in embeddings):
|
|
461
|
+
self._outcomes.upsert(
|
|
462
|
+
ids=ids,
|
|
463
|
+
embeddings=embeddings,
|
|
464
|
+
metadatas=metadatas,
|
|
465
|
+
documents=documents,
|
|
466
|
+
)
|
|
467
|
+
else:
|
|
468
|
+
self._outcomes.upsert(
|
|
469
|
+
ids=ids,
|
|
470
|
+
metadatas=metadatas,
|
|
471
|
+
documents=documents,
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
logger.debug(f"Batch saved {len(outcomes)} outcomes")
|
|
475
|
+
return ids
|
|
476
|
+
|
|
477
|
+
def save_domain_knowledge_batch(
|
|
478
|
+
self, knowledge_items: List[DomainKnowledge]
|
|
479
|
+
) -> List[str]:
|
|
480
|
+
"""Save multiple domain knowledge items in a batch."""
|
|
481
|
+
if not knowledge_items:
|
|
482
|
+
return []
|
|
483
|
+
|
|
484
|
+
ids = []
|
|
485
|
+
embeddings = []
|
|
486
|
+
metadatas = []
|
|
487
|
+
documents = []
|
|
488
|
+
has_embeddings = False
|
|
489
|
+
|
|
490
|
+
for k in knowledge_items:
|
|
491
|
+
ids.append(k.id)
|
|
492
|
+
metadatas.append(
|
|
493
|
+
{
|
|
494
|
+
"agent": k.agent,
|
|
495
|
+
"project_id": k.project_id,
|
|
496
|
+
"domain": k.domain or "general",
|
|
497
|
+
"source": k.source or "unknown",
|
|
498
|
+
"confidence": k.confidence,
|
|
499
|
+
"last_verified": self._datetime_to_str(k.last_verified),
|
|
500
|
+
"extra_metadata": json.dumps(k.metadata) if k.metadata else "{}",
|
|
501
|
+
}
|
|
502
|
+
)
|
|
503
|
+
documents.append(k.fact)
|
|
504
|
+
if k.embedding:
|
|
505
|
+
embeddings.append(k.embedding)
|
|
506
|
+
has_embeddings = True
|
|
507
|
+
else:
|
|
508
|
+
embeddings.append(None)
|
|
509
|
+
|
|
510
|
+
if has_embeddings and all(e is not None for e in embeddings):
|
|
511
|
+
self._domain_knowledge.upsert(
|
|
512
|
+
ids=ids,
|
|
513
|
+
embeddings=embeddings,
|
|
514
|
+
metadatas=metadatas,
|
|
515
|
+
documents=documents,
|
|
516
|
+
)
|
|
517
|
+
else:
|
|
518
|
+
self._domain_knowledge.upsert(
|
|
519
|
+
ids=ids,
|
|
520
|
+
metadatas=metadatas,
|
|
521
|
+
documents=documents,
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
|
|
525
|
+
return ids
|
|
526
|
+
|
|
527
|
+
# ==================== READ OPERATIONS ====================
|
|
528
|
+
|
|
529
|
+
def _build_where_filter(
|
|
530
|
+
self,
|
|
531
|
+
project_id: Optional[str] = None,
|
|
532
|
+
agent: Optional[str] = None,
|
|
533
|
+
user_id: Optional[str] = None,
|
|
534
|
+
domain: Optional[str] = None,
|
|
535
|
+
task_type: Optional[str] = None,
|
|
536
|
+
min_confidence: Optional[float] = None,
|
|
537
|
+
success_only: bool = False,
|
|
538
|
+
) -> Optional[Dict[str, Any]]:
|
|
539
|
+
"""Build Chroma where filter from parameters."""
|
|
540
|
+
conditions = []
|
|
541
|
+
|
|
542
|
+
if project_id:
|
|
543
|
+
conditions.append({"project_id": {"$eq": project_id}})
|
|
544
|
+
if agent:
|
|
545
|
+
conditions.append({"agent": {"$eq": agent}})
|
|
546
|
+
if user_id:
|
|
547
|
+
conditions.append({"user_id": {"$eq": user_id}})
|
|
548
|
+
if domain:
|
|
549
|
+
conditions.append({"domain": {"$eq": domain}})
|
|
550
|
+
if task_type:
|
|
551
|
+
conditions.append({"task_type": {"$eq": task_type}})
|
|
552
|
+
if min_confidence is not None and min_confidence > 0:
|
|
553
|
+
conditions.append({"confidence": {"$gte": min_confidence}})
|
|
554
|
+
if success_only:
|
|
555
|
+
conditions.append({"success": {"$eq": True}})
|
|
556
|
+
|
|
557
|
+
if not conditions:
|
|
558
|
+
return None
|
|
559
|
+
if len(conditions) == 1:
|
|
560
|
+
return conditions[0]
|
|
561
|
+
return {"$and": conditions}
|
|
562
|
+
|
|
563
|
+
def _has_embedding(self, emb: Any) -> bool:
|
|
564
|
+
"""Safely check if embedding is not None/empty (handles numpy arrays)."""
|
|
565
|
+
if emb is None:
|
|
566
|
+
return False
|
|
567
|
+
if hasattr(emb, "__len__"):
|
|
568
|
+
try:
|
|
569
|
+
return len(emb) > 0
|
|
570
|
+
except (TypeError, ValueError):
|
|
571
|
+
pass
|
|
572
|
+
if hasattr(emb, "size"):
|
|
573
|
+
return emb.size > 0
|
|
574
|
+
return True
|
|
575
|
+
|
|
576
|
+
def _get_embedding_list(self, results: Dict[str, Any], num_ids: int) -> List[Any]:
|
|
577
|
+
"""Safely extract embeddings list from results."""
|
|
578
|
+
emb_data = results.get("embeddings")
|
|
579
|
+
if emb_data is None:
|
|
580
|
+
return [None] * num_ids
|
|
581
|
+
# Handle both nested list format (query results) and flat format
|
|
582
|
+
if isinstance(emb_data, list) and len(emb_data) > 0:
|
|
583
|
+
first = emb_data[0]
|
|
584
|
+
# Check if it's a nested list (query format: [[emb1, emb2, ...]])
|
|
585
|
+
if isinstance(first, list) or (
|
|
586
|
+
hasattr(first, "__iter__") and not isinstance(first, (str, bytes))
|
|
587
|
+
):
|
|
588
|
+
# Could be list of embeddings or numpy array
|
|
589
|
+
try:
|
|
590
|
+
if hasattr(first, "tolist"):
|
|
591
|
+
# numpy array
|
|
592
|
+
return list(emb_data[0])
|
|
593
|
+
return list(first) if isinstance(first, list) else [first]
|
|
594
|
+
except (TypeError, IndexError):
|
|
595
|
+
return [None] * num_ids
|
|
596
|
+
return list(emb_data)
|
|
597
|
+
return [None] * num_ids
|
|
598
|
+
|
|
599
|
+
def _results_to_heuristics(self, results: Dict[str, Any]) -> List[Heuristic]:
|
|
600
|
+
"""Convert Chroma query results to Heuristic objects."""
|
|
601
|
+
heuristics = []
|
|
602
|
+
if not results or not results.get("ids") or not results["ids"][0]:
|
|
603
|
+
return heuristics
|
|
604
|
+
|
|
605
|
+
ids = results["ids"][0]
|
|
606
|
+
metadatas = results.get("metadatas", [[]])[0]
|
|
607
|
+
embeddings = self._get_embedding_list(results, len(ids))
|
|
608
|
+
|
|
609
|
+
for i, id_ in enumerate(ids):
|
|
610
|
+
meta = metadatas[i] if i < len(metadatas) else {}
|
|
611
|
+
emb = embeddings[i] if i < len(embeddings) else None
|
|
612
|
+
|
|
613
|
+
extra = json.loads(meta.get("extra_metadata", "{}"))
|
|
614
|
+
|
|
615
|
+
heuristics.append(
|
|
616
|
+
Heuristic(
|
|
617
|
+
id=id_,
|
|
618
|
+
agent=meta.get("agent", ""),
|
|
619
|
+
project_id=meta.get("project_id", ""),
|
|
620
|
+
condition=meta.get("condition", ""),
|
|
621
|
+
strategy=meta.get("strategy", ""),
|
|
622
|
+
confidence=meta.get("confidence", 0.0),
|
|
623
|
+
occurrence_count=meta.get("occurrence_count", 0),
|
|
624
|
+
success_count=meta.get("success_count", 0),
|
|
625
|
+
last_validated=self._str_to_datetime(meta.get("last_validated"))
|
|
626
|
+
or datetime.now(timezone.utc),
|
|
627
|
+
created_at=self._str_to_datetime(meta.get("created_at"))
|
|
628
|
+
or datetime.now(timezone.utc),
|
|
629
|
+
embedding=list(emb)
|
|
630
|
+
if emb is not None and hasattr(emb, "__iter__")
|
|
631
|
+
else emb,
|
|
632
|
+
metadata=extra,
|
|
633
|
+
)
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
return heuristics
|
|
637
|
+
|
|
638
|
+
def _results_to_outcomes(self, results: Dict[str, Any]) -> List[Outcome]:
|
|
639
|
+
"""Convert Chroma query results to Outcome objects."""
|
|
640
|
+
outcomes = []
|
|
641
|
+
if not results or not results.get("ids") or not results["ids"][0]:
|
|
642
|
+
return outcomes
|
|
643
|
+
|
|
644
|
+
ids = results["ids"][0]
|
|
645
|
+
metadatas = results.get("metadatas", [[]])[0]
|
|
646
|
+
documents = results.get("documents", [[]])[0]
|
|
647
|
+
embeddings = self._get_embedding_list(results, len(ids))
|
|
648
|
+
|
|
649
|
+
for i, id_ in enumerate(ids):
|
|
650
|
+
meta = metadatas[i] if i < len(metadatas) else {}
|
|
651
|
+
doc = documents[i] if i < len(documents) else ""
|
|
652
|
+
emb = embeddings[i] if i < len(embeddings) else None
|
|
653
|
+
|
|
654
|
+
extra = json.loads(meta.get("extra_metadata", "{}"))
|
|
655
|
+
|
|
656
|
+
outcomes.append(
|
|
657
|
+
Outcome(
|
|
658
|
+
id=id_,
|
|
659
|
+
agent=meta.get("agent", ""),
|
|
660
|
+
project_id=meta.get("project_id", ""),
|
|
661
|
+
task_type=meta.get("task_type", "general"),
|
|
662
|
+
task_description=doc,
|
|
663
|
+
success=meta.get("success", False),
|
|
664
|
+
strategy_used=meta.get("strategy_used", ""),
|
|
665
|
+
duration_ms=meta.get("duration_ms"),
|
|
666
|
+
error_message=meta.get("error_message") or None,
|
|
667
|
+
user_feedback=meta.get("user_feedback") or None,
|
|
668
|
+
timestamp=self._str_to_datetime(meta.get("timestamp"))
|
|
669
|
+
or datetime.now(timezone.utc),
|
|
670
|
+
embedding=list(emb)
|
|
671
|
+
if emb is not None and hasattr(emb, "__iter__")
|
|
672
|
+
else emb,
|
|
673
|
+
metadata=extra,
|
|
674
|
+
)
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
return outcomes
|
|
678
|
+
|
|
679
|
+
def _results_to_preferences(self, results: Dict[str, Any]) -> List[UserPreference]:
|
|
680
|
+
"""Convert Chroma query results to UserPreference objects."""
|
|
681
|
+
preferences = []
|
|
682
|
+
if not results or not results.get("ids") or not results["ids"][0]:
|
|
683
|
+
return preferences
|
|
684
|
+
|
|
685
|
+
ids = results["ids"][0]
|
|
686
|
+
metadatas = results.get("metadatas", [[]])[0]
|
|
687
|
+
documents = results.get("documents", [[]])[0]
|
|
688
|
+
|
|
689
|
+
for i, id_ in enumerate(ids):
|
|
690
|
+
meta = metadatas[i] if i < len(metadatas) else {}
|
|
691
|
+
doc = documents[i] if i < len(documents) else ""
|
|
692
|
+
|
|
693
|
+
extra = json.loads(meta.get("extra_metadata", "{}"))
|
|
694
|
+
|
|
695
|
+
preferences.append(
|
|
696
|
+
UserPreference(
|
|
697
|
+
id=id_,
|
|
698
|
+
user_id=meta.get("user_id", ""),
|
|
699
|
+
category=meta.get("category", "general"),
|
|
700
|
+
preference=doc,
|
|
701
|
+
source=meta.get("source", "unknown"),
|
|
702
|
+
confidence=meta.get("confidence", 1.0),
|
|
703
|
+
timestamp=self._str_to_datetime(meta.get("timestamp"))
|
|
704
|
+
or datetime.now(timezone.utc),
|
|
705
|
+
metadata=extra,
|
|
706
|
+
)
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
return preferences
|
|
710
|
+
|
|
711
|
+
def _results_to_domain_knowledge(
|
|
712
|
+
self, results: Dict[str, Any]
|
|
713
|
+
) -> List[DomainKnowledge]:
|
|
714
|
+
"""Convert Chroma query results to DomainKnowledge objects."""
|
|
715
|
+
knowledge = []
|
|
716
|
+
if not results or not results.get("ids") or not results["ids"][0]:
|
|
717
|
+
return knowledge
|
|
718
|
+
|
|
719
|
+
ids = results["ids"][0]
|
|
720
|
+
metadatas = results.get("metadatas", [[]])[0]
|
|
721
|
+
documents = results.get("documents", [[]])[0]
|
|
722
|
+
embeddings = self._get_embedding_list(results, len(ids))
|
|
723
|
+
|
|
724
|
+
for i, id_ in enumerate(ids):
|
|
725
|
+
meta = metadatas[i] if i < len(metadatas) else {}
|
|
726
|
+
doc = documents[i] if i < len(documents) else ""
|
|
727
|
+
emb = embeddings[i] if i < len(embeddings) else None
|
|
728
|
+
|
|
729
|
+
extra = json.loads(meta.get("extra_metadata", "{}"))
|
|
730
|
+
|
|
731
|
+
knowledge.append(
|
|
732
|
+
DomainKnowledge(
|
|
733
|
+
id=id_,
|
|
734
|
+
agent=meta.get("agent", ""),
|
|
735
|
+
project_id=meta.get("project_id", ""),
|
|
736
|
+
domain=meta.get("domain", "general"),
|
|
737
|
+
fact=doc,
|
|
738
|
+
source=meta.get("source", "unknown"),
|
|
739
|
+
confidence=meta.get("confidence", 1.0),
|
|
740
|
+
last_verified=self._str_to_datetime(meta.get("last_verified"))
|
|
741
|
+
or datetime.now(timezone.utc),
|
|
742
|
+
embedding=list(emb)
|
|
743
|
+
if emb is not None and hasattr(emb, "__iter__")
|
|
744
|
+
else emb,
|
|
745
|
+
metadata=extra,
|
|
746
|
+
)
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
return knowledge
|
|
750
|
+
|
|
751
|
+
def _results_to_anti_patterns(self, results: Dict[str, Any]) -> List[AntiPattern]:
|
|
752
|
+
"""Convert Chroma query results to AntiPattern objects."""
|
|
753
|
+
patterns = []
|
|
754
|
+
if not results or not results.get("ids") or not results["ids"][0]:
|
|
755
|
+
return patterns
|
|
756
|
+
|
|
757
|
+
ids = results["ids"][0]
|
|
758
|
+
metadatas = results.get("metadatas", [[]])[0]
|
|
759
|
+
documents = results.get("documents", [[]])[0]
|
|
760
|
+
embeddings = self._get_embedding_list(results, len(ids))
|
|
761
|
+
|
|
762
|
+
for i, id_ in enumerate(ids):
|
|
763
|
+
meta = metadatas[i] if i < len(metadatas) else {}
|
|
764
|
+
doc = documents[i] if i < len(documents) else ""
|
|
765
|
+
emb = embeddings[i] if i < len(embeddings) else None
|
|
766
|
+
|
|
767
|
+
extra = json.loads(meta.get("extra_metadata", "{}"))
|
|
768
|
+
|
|
769
|
+
patterns.append(
|
|
770
|
+
AntiPattern(
|
|
771
|
+
id=id_,
|
|
772
|
+
agent=meta.get("agent", ""),
|
|
773
|
+
project_id=meta.get("project_id", ""),
|
|
774
|
+
pattern=doc,
|
|
775
|
+
why_bad=meta.get("why_bad", ""),
|
|
776
|
+
better_alternative=meta.get("better_alternative", ""),
|
|
777
|
+
occurrence_count=meta.get("occurrence_count", 1),
|
|
778
|
+
last_seen=self._str_to_datetime(meta.get("last_seen"))
|
|
779
|
+
or datetime.now(timezone.utc),
|
|
780
|
+
created_at=self._str_to_datetime(meta.get("created_at"))
|
|
781
|
+
or datetime.now(timezone.utc),
|
|
782
|
+
embedding=list(emb)
|
|
783
|
+
if emb is not None and hasattr(emb, "__iter__")
|
|
784
|
+
else emb,
|
|
785
|
+
metadata=extra,
|
|
786
|
+
)
|
|
787
|
+
)
|
|
788
|
+
|
|
789
|
+
return patterns
|
|
790
|
+
|
|
791
|
+
def get_heuristics(
|
|
792
|
+
self,
|
|
793
|
+
project_id: str,
|
|
794
|
+
agent: Optional[str] = None,
|
|
795
|
+
embedding: Optional[List[float]] = None,
|
|
796
|
+
top_k: int = 5,
|
|
797
|
+
min_confidence: float = 0.0,
|
|
798
|
+
) -> List[Heuristic]:
|
|
799
|
+
"""Get heuristics with optional vector search."""
|
|
800
|
+
where_filter = self._build_where_filter(
|
|
801
|
+
project_id=project_id,
|
|
802
|
+
agent=agent,
|
|
803
|
+
min_confidence=min_confidence,
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
if embedding:
|
|
807
|
+
results = self._heuristics.query(
|
|
808
|
+
query_embeddings=[embedding],
|
|
809
|
+
n_results=top_k,
|
|
810
|
+
where=where_filter,
|
|
811
|
+
include=["metadatas", "documents", "embeddings"],
|
|
812
|
+
)
|
|
813
|
+
else:
|
|
814
|
+
results = self._heuristics.get(
|
|
815
|
+
where=where_filter,
|
|
816
|
+
limit=top_k,
|
|
817
|
+
include=["metadatas", "documents", "embeddings"],
|
|
818
|
+
)
|
|
819
|
+
results = self._format_get_results(results)
|
|
820
|
+
|
|
821
|
+
return self._results_to_heuristics(results)
|
|
822
|
+
|
|
823
|
+
def get_outcomes(
|
|
824
|
+
self,
|
|
825
|
+
project_id: str,
|
|
826
|
+
agent: Optional[str] = None,
|
|
827
|
+
task_type: Optional[str] = None,
|
|
828
|
+
embedding: Optional[List[float]] = None,
|
|
829
|
+
top_k: int = 5,
|
|
830
|
+
success_only: bool = False,
|
|
831
|
+
) -> List[Outcome]:
|
|
832
|
+
"""Get outcomes with optional vector search."""
|
|
833
|
+
where_filter = self._build_where_filter(
|
|
834
|
+
project_id=project_id,
|
|
835
|
+
agent=agent,
|
|
836
|
+
task_type=task_type,
|
|
837
|
+
success_only=success_only,
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
if embedding:
|
|
841
|
+
results = self._outcomes.query(
|
|
842
|
+
query_embeddings=[embedding],
|
|
843
|
+
n_results=top_k,
|
|
844
|
+
where=where_filter,
|
|
845
|
+
include=["metadatas", "documents", "embeddings"],
|
|
846
|
+
)
|
|
847
|
+
else:
|
|
848
|
+
results = self._outcomes.get(
|
|
849
|
+
where=where_filter,
|
|
850
|
+
limit=top_k,
|
|
851
|
+
include=["metadatas", "documents", "embeddings"],
|
|
852
|
+
)
|
|
853
|
+
results = self._format_get_results(results)
|
|
854
|
+
|
|
855
|
+
return self._results_to_outcomes(results)
|
|
856
|
+
|
|
857
|
+
def get_user_preferences(
|
|
858
|
+
self,
|
|
859
|
+
user_id: str,
|
|
860
|
+
category: Optional[str] = None,
|
|
861
|
+
) -> List[UserPreference]:
|
|
862
|
+
"""Get user preferences."""
|
|
863
|
+
where_filter = self._build_where_filter(user_id=user_id)
|
|
864
|
+
if category:
|
|
865
|
+
if where_filter:
|
|
866
|
+
where_filter = {"$and": [where_filter, {"category": {"$eq": category}}]}
|
|
867
|
+
else:
|
|
868
|
+
where_filter = {"category": {"$eq": category}}
|
|
869
|
+
|
|
870
|
+
results = self._preferences.get(
|
|
871
|
+
where=where_filter,
|
|
872
|
+
include=["metadatas", "documents"],
|
|
873
|
+
)
|
|
874
|
+
results = {
|
|
875
|
+
"ids": [results.get("ids", [])],
|
|
876
|
+
"metadatas": [results.get("metadatas", [])],
|
|
877
|
+
"documents": [results.get("documents", [])],
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
return self._results_to_preferences(results)
|
|
881
|
+
|
|
882
|
+
def get_domain_knowledge(
|
|
883
|
+
self,
|
|
884
|
+
project_id: str,
|
|
885
|
+
agent: Optional[str] = None,
|
|
886
|
+
domain: Optional[str] = None,
|
|
887
|
+
embedding: Optional[List[float]] = None,
|
|
888
|
+
top_k: int = 5,
|
|
889
|
+
) -> List[DomainKnowledge]:
|
|
890
|
+
"""Get domain knowledge with optional vector search."""
|
|
891
|
+
where_filter = self._build_where_filter(
|
|
892
|
+
project_id=project_id,
|
|
893
|
+
agent=agent,
|
|
894
|
+
domain=domain,
|
|
895
|
+
)
|
|
896
|
+
|
|
897
|
+
if embedding:
|
|
898
|
+
results = self._domain_knowledge.query(
|
|
899
|
+
query_embeddings=[embedding],
|
|
900
|
+
n_results=top_k,
|
|
901
|
+
where=where_filter,
|
|
902
|
+
include=["metadatas", "documents", "embeddings"],
|
|
903
|
+
)
|
|
904
|
+
else:
|
|
905
|
+
results = self._domain_knowledge.get(
|
|
906
|
+
where=where_filter,
|
|
907
|
+
limit=top_k,
|
|
908
|
+
include=["metadatas", "documents", "embeddings"],
|
|
909
|
+
)
|
|
910
|
+
results = self._format_get_results(results)
|
|
911
|
+
|
|
912
|
+
return self._results_to_domain_knowledge(results)
|
|
913
|
+
|
|
914
|
+
def get_anti_patterns(
|
|
915
|
+
self,
|
|
916
|
+
project_id: str,
|
|
917
|
+
agent: Optional[str] = None,
|
|
918
|
+
embedding: Optional[List[float]] = None,
|
|
919
|
+
top_k: int = 5,
|
|
920
|
+
) -> List[AntiPattern]:
|
|
921
|
+
"""Get anti-patterns with optional vector search."""
|
|
922
|
+
where_filter = self._build_where_filter(
|
|
923
|
+
project_id=project_id,
|
|
924
|
+
agent=agent,
|
|
925
|
+
)
|
|
926
|
+
|
|
927
|
+
if embedding:
|
|
928
|
+
results = self._anti_patterns.query(
|
|
929
|
+
query_embeddings=[embedding],
|
|
930
|
+
n_results=top_k,
|
|
931
|
+
where=where_filter,
|
|
932
|
+
include=["metadatas", "documents", "embeddings"],
|
|
933
|
+
)
|
|
934
|
+
else:
|
|
935
|
+
results = self._anti_patterns.get(
|
|
936
|
+
where=where_filter,
|
|
937
|
+
limit=top_k,
|
|
938
|
+
include=["metadatas", "documents", "embeddings"],
|
|
939
|
+
)
|
|
940
|
+
results = self._format_get_results(results)
|
|
941
|
+
|
|
942
|
+
return self._results_to_anti_patterns(results)
|
|
943
|
+
|
|
944
|
+
# ==================== MULTI-AGENT MEMORY SHARING ====================
|
|
945
|
+
|
|
946
|
+
def _build_agents_filter(
|
|
947
|
+
self,
|
|
948
|
+
project_id: str,
|
|
949
|
+
agents: List[str],
|
|
950
|
+
**kwargs: Any,
|
|
951
|
+
) -> Optional[Dict[str, Any]]:
|
|
952
|
+
"""Build filter for multiple agents."""
|
|
953
|
+
if not agents:
|
|
954
|
+
return None
|
|
955
|
+
|
|
956
|
+
agent_conditions = [{"agent": {"$eq": a}} for a in agents]
|
|
957
|
+
agents_filter = (
|
|
958
|
+
{"$or": agent_conditions}
|
|
959
|
+
if len(agent_conditions) > 1
|
|
960
|
+
else agent_conditions[0]
|
|
961
|
+
)
|
|
962
|
+
|
|
963
|
+
base_filter = self._build_where_filter(project_id=project_id, **kwargs)
|
|
964
|
+
|
|
965
|
+
if base_filter:
|
|
966
|
+
return {"$and": [base_filter, agents_filter]}
|
|
967
|
+
return {"$and": [{"project_id": {"$eq": project_id}}, agents_filter]}
|
|
968
|
+
|
|
969
|
+
def get_heuristics_for_agents(
|
|
970
|
+
self,
|
|
971
|
+
project_id: str,
|
|
972
|
+
agents: List[str],
|
|
973
|
+
embedding: Optional[List[float]] = None,
|
|
974
|
+
top_k: int = 5,
|
|
975
|
+
min_confidence: float = 0.0,
|
|
976
|
+
) -> List[Heuristic]:
|
|
977
|
+
"""Get heuristics from multiple agents."""
|
|
978
|
+
if not agents:
|
|
979
|
+
return []
|
|
980
|
+
|
|
981
|
+
where_filter = self._build_agents_filter(
|
|
982
|
+
project_id=project_id,
|
|
983
|
+
agents=agents,
|
|
984
|
+
min_confidence=min_confidence,
|
|
985
|
+
)
|
|
986
|
+
|
|
987
|
+
if embedding:
|
|
988
|
+
results = self._heuristics.query(
|
|
989
|
+
query_embeddings=[embedding],
|
|
990
|
+
n_results=top_k * len(agents),
|
|
991
|
+
where=where_filter,
|
|
992
|
+
include=["metadatas", "documents", "embeddings"],
|
|
993
|
+
)
|
|
994
|
+
else:
|
|
995
|
+
results = self._heuristics.get(
|
|
996
|
+
where=where_filter,
|
|
997
|
+
limit=top_k * len(agents),
|
|
998
|
+
include=["metadatas", "documents", "embeddings"],
|
|
999
|
+
)
|
|
1000
|
+
results = self._format_get_results(results)
|
|
1001
|
+
|
|
1002
|
+
return self._results_to_heuristics(results)
|
|
1003
|
+
|
|
1004
|
+
def get_outcomes_for_agents(
|
|
1005
|
+
self,
|
|
1006
|
+
project_id: str,
|
|
1007
|
+
agents: List[str],
|
|
1008
|
+
task_type: Optional[str] = None,
|
|
1009
|
+
embedding: Optional[List[float]] = None,
|
|
1010
|
+
top_k: int = 5,
|
|
1011
|
+
success_only: bool = False,
|
|
1012
|
+
) -> List[Outcome]:
|
|
1013
|
+
"""Get outcomes from multiple agents."""
|
|
1014
|
+
if not agents:
|
|
1015
|
+
return []
|
|
1016
|
+
|
|
1017
|
+
where_filter = self._build_agents_filter(
|
|
1018
|
+
project_id=project_id,
|
|
1019
|
+
agents=agents,
|
|
1020
|
+
task_type=task_type,
|
|
1021
|
+
success_only=success_only,
|
|
1022
|
+
)
|
|
1023
|
+
|
|
1024
|
+
if embedding:
|
|
1025
|
+
results = self._outcomes.query(
|
|
1026
|
+
query_embeddings=[embedding],
|
|
1027
|
+
n_results=top_k * len(agents),
|
|
1028
|
+
where=where_filter,
|
|
1029
|
+
include=["metadatas", "documents", "embeddings"],
|
|
1030
|
+
)
|
|
1031
|
+
else:
|
|
1032
|
+
results = self._outcomes.get(
|
|
1033
|
+
where=where_filter,
|
|
1034
|
+
limit=top_k * len(agents),
|
|
1035
|
+
include=["metadatas", "documents", "embeddings"],
|
|
1036
|
+
)
|
|
1037
|
+
results = self._format_get_results(results)
|
|
1038
|
+
|
|
1039
|
+
return self._results_to_outcomes(results)
|
|
1040
|
+
|
|
1041
|
+
def get_domain_knowledge_for_agents(
|
|
1042
|
+
self,
|
|
1043
|
+
project_id: str,
|
|
1044
|
+
agents: List[str],
|
|
1045
|
+
domain: Optional[str] = None,
|
|
1046
|
+
embedding: Optional[List[float]] = None,
|
|
1047
|
+
top_k: int = 5,
|
|
1048
|
+
) -> List[DomainKnowledge]:
|
|
1049
|
+
"""Get domain knowledge from multiple agents."""
|
|
1050
|
+
if not agents:
|
|
1051
|
+
return []
|
|
1052
|
+
|
|
1053
|
+
where_filter = self._build_agents_filter(
|
|
1054
|
+
project_id=project_id,
|
|
1055
|
+
agents=agents,
|
|
1056
|
+
domain=domain,
|
|
1057
|
+
)
|
|
1058
|
+
|
|
1059
|
+
if embedding:
|
|
1060
|
+
results = self._domain_knowledge.query(
|
|
1061
|
+
query_embeddings=[embedding],
|
|
1062
|
+
n_results=top_k * len(agents),
|
|
1063
|
+
where=where_filter,
|
|
1064
|
+
include=["metadatas", "documents", "embeddings"],
|
|
1065
|
+
)
|
|
1066
|
+
else:
|
|
1067
|
+
results = self._domain_knowledge.get(
|
|
1068
|
+
where=where_filter,
|
|
1069
|
+
limit=top_k * len(agents),
|
|
1070
|
+
include=["metadatas", "documents", "embeddings"],
|
|
1071
|
+
)
|
|
1072
|
+
results = self._format_get_results(results)
|
|
1073
|
+
|
|
1074
|
+
return self._results_to_domain_knowledge(results)
|
|
1075
|
+
|
|
1076
|
+
def get_anti_patterns_for_agents(
|
|
1077
|
+
self,
|
|
1078
|
+
project_id: str,
|
|
1079
|
+
agents: List[str],
|
|
1080
|
+
embedding: Optional[List[float]] = None,
|
|
1081
|
+
top_k: int = 5,
|
|
1082
|
+
) -> List[AntiPattern]:
|
|
1083
|
+
"""Get anti-patterns from multiple agents."""
|
|
1084
|
+
if not agents:
|
|
1085
|
+
return []
|
|
1086
|
+
|
|
1087
|
+
where_filter = self._build_agents_filter(
|
|
1088
|
+
project_id=project_id,
|
|
1089
|
+
agents=agents,
|
|
1090
|
+
)
|
|
1091
|
+
|
|
1092
|
+
if embedding:
|
|
1093
|
+
results = self._anti_patterns.query(
|
|
1094
|
+
query_embeddings=[embedding],
|
|
1095
|
+
n_results=top_k * len(agents),
|
|
1096
|
+
where=where_filter,
|
|
1097
|
+
include=["metadatas", "documents", "embeddings"],
|
|
1098
|
+
)
|
|
1099
|
+
else:
|
|
1100
|
+
results = self._anti_patterns.get(
|
|
1101
|
+
where=where_filter,
|
|
1102
|
+
limit=top_k * len(agents),
|
|
1103
|
+
include=["metadatas", "documents", "embeddings"],
|
|
1104
|
+
)
|
|
1105
|
+
results = self._format_get_results(results)
|
|
1106
|
+
|
|
1107
|
+
return self._results_to_anti_patterns(results)
|
|
1108
|
+
|
|
1109
|
+
# ==================== UPDATE OPERATIONS ====================
|
|
1110
|
+
|
|
1111
|
+
def update_heuristic(
|
|
1112
|
+
self,
|
|
1113
|
+
heuristic_id: str,
|
|
1114
|
+
updates: Dict[str, Any],
|
|
1115
|
+
) -> bool:
|
|
1116
|
+
"""Update a heuristic's fields."""
|
|
1117
|
+
if not updates:
|
|
1118
|
+
return False
|
|
1119
|
+
|
|
1120
|
+
try:
|
|
1121
|
+
# Get existing heuristic
|
|
1122
|
+
existing = self._heuristics.get(
|
|
1123
|
+
ids=[heuristic_id], include=["metadatas", "documents", "embeddings"]
|
|
1124
|
+
)
|
|
1125
|
+
if not existing or not existing.get("ids"):
|
|
1126
|
+
return False
|
|
1127
|
+
|
|
1128
|
+
metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
|
|
1129
|
+
document = existing["documents"][0] if existing.get("documents") else ""
|
|
1130
|
+
emb_list = existing.get("embeddings")
|
|
1131
|
+
embedding = (
|
|
1132
|
+
emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
|
|
1133
|
+
)
|
|
1134
|
+
|
|
1135
|
+
# Apply updates
|
|
1136
|
+
for key, value in updates.items():
|
|
1137
|
+
if key == "condition":
|
|
1138
|
+
metadata["condition"] = value
|
|
1139
|
+
# Update document as well
|
|
1140
|
+
parts = document.split("\n", 1)
|
|
1141
|
+
document = f"{value}\n{parts[1] if len(parts) > 1 else ''}"
|
|
1142
|
+
elif key == "strategy":
|
|
1143
|
+
metadata["strategy"] = value
|
|
1144
|
+
parts = document.split("\n", 1)
|
|
1145
|
+
document = f"{parts[0] if parts else ''}\n{value}"
|
|
1146
|
+
elif key == "metadata":
|
|
1147
|
+
metadata["extra_metadata"] = json.dumps(value)
|
|
1148
|
+
elif key in ("last_validated", "created_at") and isinstance(
|
|
1149
|
+
value, datetime
|
|
1150
|
+
):
|
|
1151
|
+
metadata[key] = value.isoformat()
|
|
1152
|
+
elif key in metadata:
|
|
1153
|
+
metadata[key] = value
|
|
1154
|
+
|
|
1155
|
+
# Upsert with updated values
|
|
1156
|
+
if self._has_embedding(embedding):
|
|
1157
|
+
self._heuristics.upsert(
|
|
1158
|
+
ids=[heuristic_id],
|
|
1159
|
+
embeddings=[
|
|
1160
|
+
list(embedding) if hasattr(embedding, "__iter__") else embedding
|
|
1161
|
+
],
|
|
1162
|
+
metadatas=[metadata],
|
|
1163
|
+
documents=[document],
|
|
1164
|
+
)
|
|
1165
|
+
else:
|
|
1166
|
+
self._heuristics.upsert(
|
|
1167
|
+
ids=[heuristic_id],
|
|
1168
|
+
metadatas=[metadata],
|
|
1169
|
+
documents=[document],
|
|
1170
|
+
)
|
|
1171
|
+
|
|
1172
|
+
return True
|
|
1173
|
+
except Exception as e:
|
|
1174
|
+
logger.warning(f"Failed to update heuristic {heuristic_id}: {e}")
|
|
1175
|
+
return False
|
|
1176
|
+
|
|
1177
|
+
def increment_heuristic_occurrence(
|
|
1178
|
+
self,
|
|
1179
|
+
heuristic_id: str,
|
|
1180
|
+
success: bool,
|
|
1181
|
+
) -> bool:
|
|
1182
|
+
"""Increment heuristic occurrence count."""
|
|
1183
|
+
try:
|
|
1184
|
+
existing = self._heuristics.get(
|
|
1185
|
+
ids=[heuristic_id], include=["metadatas", "documents", "embeddings"]
|
|
1186
|
+
)
|
|
1187
|
+
if not existing or not existing.get("ids"):
|
|
1188
|
+
return False
|
|
1189
|
+
|
|
1190
|
+
metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
|
|
1191
|
+
document = existing["documents"][0] if existing.get("documents") else ""
|
|
1192
|
+
emb_list = existing.get("embeddings")
|
|
1193
|
+
embedding = (
|
|
1194
|
+
emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
|
|
1195
|
+
)
|
|
1196
|
+
|
|
1197
|
+
metadata["occurrence_count"] = metadata.get("occurrence_count", 0) + 1
|
|
1198
|
+
if success:
|
|
1199
|
+
metadata["success_count"] = metadata.get("success_count", 0) + 1
|
|
1200
|
+
metadata["last_validated"] = datetime.now(timezone.utc).isoformat()
|
|
1201
|
+
|
|
1202
|
+
if self._has_embedding(embedding):
|
|
1203
|
+
self._heuristics.upsert(
|
|
1204
|
+
ids=[heuristic_id],
|
|
1205
|
+
embeddings=[
|
|
1206
|
+
list(embedding) if hasattr(embedding, "__iter__") else embedding
|
|
1207
|
+
],
|
|
1208
|
+
metadatas=[metadata],
|
|
1209
|
+
documents=[document],
|
|
1210
|
+
)
|
|
1211
|
+
else:
|
|
1212
|
+
self._heuristics.upsert(
|
|
1213
|
+
ids=[heuristic_id],
|
|
1214
|
+
metadatas=[metadata],
|
|
1215
|
+
documents=[document],
|
|
1216
|
+
)
|
|
1217
|
+
|
|
1218
|
+
return True
|
|
1219
|
+
except Exception as e:
|
|
1220
|
+
logger.warning(f"Failed to increment occurrence for {heuristic_id}: {e}")
|
|
1221
|
+
return False
|
|
1222
|
+
|
|
1223
|
+
def update_heuristic_confidence(
|
|
1224
|
+
self,
|
|
1225
|
+
heuristic_id: str,
|
|
1226
|
+
new_confidence: float,
|
|
1227
|
+
) -> bool:
|
|
1228
|
+
"""Update a heuristic's confidence value."""
|
|
1229
|
+
return self.update_heuristic(heuristic_id, {"confidence": new_confidence})
|
|
1230
|
+
|
|
1231
|
+
def update_knowledge_confidence(
|
|
1232
|
+
self,
|
|
1233
|
+
knowledge_id: str,
|
|
1234
|
+
new_confidence: float,
|
|
1235
|
+
) -> bool:
|
|
1236
|
+
"""Update domain knowledge confidence value."""
|
|
1237
|
+
try:
|
|
1238
|
+
existing = self._domain_knowledge.get(
|
|
1239
|
+
ids=[knowledge_id], include=["metadatas", "documents", "embeddings"]
|
|
1240
|
+
)
|
|
1241
|
+
if not existing or not existing.get("ids"):
|
|
1242
|
+
return False
|
|
1243
|
+
|
|
1244
|
+
metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
|
|
1245
|
+
document = existing["documents"][0] if existing.get("documents") else ""
|
|
1246
|
+
emb_list = existing.get("embeddings")
|
|
1247
|
+
embedding = (
|
|
1248
|
+
emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
|
|
1249
|
+
)
|
|
1250
|
+
|
|
1251
|
+
metadata["confidence"] = new_confidence
|
|
1252
|
+
|
|
1253
|
+
if self._has_embedding(embedding):
|
|
1254
|
+
self._domain_knowledge.upsert(
|
|
1255
|
+
ids=[knowledge_id],
|
|
1256
|
+
embeddings=[
|
|
1257
|
+
list(embedding) if hasattr(embedding, "__iter__") else embedding
|
|
1258
|
+
],
|
|
1259
|
+
metadatas=[metadata],
|
|
1260
|
+
documents=[document],
|
|
1261
|
+
)
|
|
1262
|
+
else:
|
|
1263
|
+
self._domain_knowledge.upsert(
|
|
1264
|
+
ids=[knowledge_id],
|
|
1265
|
+
metadatas=[metadata],
|
|
1266
|
+
documents=[document],
|
|
1267
|
+
)
|
|
1268
|
+
|
|
1269
|
+
return True
|
|
1270
|
+
except Exception as e:
|
|
1271
|
+
logger.warning(f"Failed to update knowledge confidence {knowledge_id}: {e}")
|
|
1272
|
+
return False
|
|
1273
|
+
|
|
1274
|
+
# ==================== DELETE OPERATIONS ====================
|
|
1275
|
+
|
|
1276
|
+
def delete_heuristic(self, heuristic_id: str) -> bool:
|
|
1277
|
+
"""Delete a heuristic by ID."""
|
|
1278
|
+
try:
|
|
1279
|
+
existing = self._heuristics.get(ids=[heuristic_id])
|
|
1280
|
+
if not existing or not existing.get("ids"):
|
|
1281
|
+
return False
|
|
1282
|
+
self._heuristics.delete(ids=[heuristic_id])
|
|
1283
|
+
logger.debug(f"Deleted heuristic: {heuristic_id}")
|
|
1284
|
+
return True
|
|
1285
|
+
except Exception as e:
|
|
1286
|
+
logger.warning(f"Failed to delete heuristic {heuristic_id}: {e}")
|
|
1287
|
+
return False
|
|
1288
|
+
|
|
1289
|
+
def delete_outcome(self, outcome_id: str) -> bool:
|
|
1290
|
+
"""Delete an outcome by ID."""
|
|
1291
|
+
try:
|
|
1292
|
+
existing = self._outcomes.get(ids=[outcome_id])
|
|
1293
|
+
if not existing or not existing.get("ids"):
|
|
1294
|
+
return False
|
|
1295
|
+
self._outcomes.delete(ids=[outcome_id])
|
|
1296
|
+
logger.debug(f"Deleted outcome: {outcome_id}")
|
|
1297
|
+
return True
|
|
1298
|
+
except Exception as e:
|
|
1299
|
+
logger.warning(f"Failed to delete outcome {outcome_id}: {e}")
|
|
1300
|
+
return False
|
|
1301
|
+
|
|
1302
|
+
def delete_domain_knowledge(self, knowledge_id: str) -> bool:
|
|
1303
|
+
"""Delete domain knowledge by ID."""
|
|
1304
|
+
try:
|
|
1305
|
+
existing = self._domain_knowledge.get(ids=[knowledge_id])
|
|
1306
|
+
if not existing or not existing.get("ids"):
|
|
1307
|
+
return False
|
|
1308
|
+
self._domain_knowledge.delete(ids=[knowledge_id])
|
|
1309
|
+
logger.debug(f"Deleted domain knowledge: {knowledge_id}")
|
|
1310
|
+
return True
|
|
1311
|
+
except Exception as e:
|
|
1312
|
+
logger.warning(f"Failed to delete domain knowledge {knowledge_id}: {e}")
|
|
1313
|
+
return False
|
|
1314
|
+
|
|
1315
|
+
def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
|
|
1316
|
+
"""Delete an anti-pattern by ID."""
|
|
1317
|
+
try:
|
|
1318
|
+
existing = self._anti_patterns.get(ids=[anti_pattern_id])
|
|
1319
|
+
if not existing or not existing.get("ids"):
|
|
1320
|
+
return False
|
|
1321
|
+
self._anti_patterns.delete(ids=[anti_pattern_id])
|
|
1322
|
+
logger.debug(f"Deleted anti-pattern: {anti_pattern_id}")
|
|
1323
|
+
return True
|
|
1324
|
+
except Exception as e:
|
|
1325
|
+
logger.warning(f"Failed to delete anti-pattern {anti_pattern_id}: {e}")
|
|
1326
|
+
return False
|
|
1327
|
+
|
|
1328
|
+
def delete_outcomes_older_than(
|
|
1329
|
+
self,
|
|
1330
|
+
project_id: str,
|
|
1331
|
+
older_than: datetime,
|
|
1332
|
+
agent: Optional[str] = None,
|
|
1333
|
+
) -> int:
|
|
1334
|
+
"""Delete old outcomes."""
|
|
1335
|
+
where_filter = self._build_where_filter(project_id=project_id, agent=agent)
|
|
1336
|
+
|
|
1337
|
+
# Get all matching outcomes
|
|
1338
|
+
results = self._outcomes.get(
|
|
1339
|
+
where=where_filter,
|
|
1340
|
+
include=["metadatas"],
|
|
1341
|
+
)
|
|
1342
|
+
|
|
1343
|
+
if not results or not results.get("ids"):
|
|
1344
|
+
return 0
|
|
1345
|
+
|
|
1346
|
+
older_than_str = older_than.isoformat()
|
|
1347
|
+
ids_to_delete = []
|
|
1348
|
+
|
|
1349
|
+
for i, id_ in enumerate(results["ids"]):
|
|
1350
|
+
meta = (
|
|
1351
|
+
results["metadatas"][i] if i < len(results.get("metadatas", [])) else {}
|
|
1352
|
+
)
|
|
1353
|
+
timestamp_str = meta.get("timestamp", "")
|
|
1354
|
+
if timestamp_str and timestamp_str < older_than_str:
|
|
1355
|
+
ids_to_delete.append(id_)
|
|
1356
|
+
|
|
1357
|
+
if ids_to_delete:
|
|
1358
|
+
self._outcomes.delete(ids=ids_to_delete)
|
|
1359
|
+
|
|
1360
|
+
logger.info(f"Deleted {len(ids_to_delete)} old outcomes")
|
|
1361
|
+
return len(ids_to_delete)
|
|
1362
|
+
|
|
1363
|
+
def delete_low_confidence_heuristics(
|
|
1364
|
+
self,
|
|
1365
|
+
project_id: str,
|
|
1366
|
+
below_confidence: float,
|
|
1367
|
+
agent: Optional[str] = None,
|
|
1368
|
+
) -> int:
|
|
1369
|
+
"""Delete low-confidence heuristics."""
|
|
1370
|
+
where_filter = self._build_where_filter(project_id=project_id, agent=agent)
|
|
1371
|
+
|
|
1372
|
+
# Get all matching heuristics
|
|
1373
|
+
results = self._heuristics.get(
|
|
1374
|
+
where=where_filter,
|
|
1375
|
+
include=["metadatas"],
|
|
1376
|
+
)
|
|
1377
|
+
|
|
1378
|
+
if not results or not results.get("ids"):
|
|
1379
|
+
return 0
|
|
1380
|
+
|
|
1381
|
+
ids_to_delete = []
|
|
1382
|
+
|
|
1383
|
+
for i, id_ in enumerate(results["ids"]):
|
|
1384
|
+
meta = (
|
|
1385
|
+
results["metadatas"][i] if i < len(results.get("metadatas", [])) else {}
|
|
1386
|
+
)
|
|
1387
|
+
confidence = meta.get("confidence", 0.0)
|
|
1388
|
+
if confidence < below_confidence:
|
|
1389
|
+
ids_to_delete.append(id_)
|
|
1390
|
+
|
|
1391
|
+
if ids_to_delete:
|
|
1392
|
+
self._heuristics.delete(ids=ids_to_delete)
|
|
1393
|
+
|
|
1394
|
+
logger.info(f"Deleted {len(ids_to_delete)} low-confidence heuristics")
|
|
1395
|
+
return len(ids_to_delete)
|
|
1396
|
+
|
|
1397
|
+
# ==================== STATS ====================
|
|
1398
|
+
|
|
1399
|
+
def get_stats(
|
|
1400
|
+
self,
|
|
1401
|
+
project_id: str,
|
|
1402
|
+
agent: Optional[str] = None,
|
|
1403
|
+
) -> Dict[str, Any]:
|
|
1404
|
+
"""Get memory statistics."""
|
|
1405
|
+
stats = {
|
|
1406
|
+
"project_id": project_id,
|
|
1407
|
+
"agent": agent,
|
|
1408
|
+
"storage_type": "chroma",
|
|
1409
|
+
"mode": self._mode,
|
|
1410
|
+
}
|
|
1411
|
+
|
|
1412
|
+
where_filter = self._build_where_filter(project_id=project_id, agent=agent)
|
|
1413
|
+
|
|
1414
|
+
# Count items in each collection
|
|
1415
|
+
for name, collection in [
|
|
1416
|
+
("heuristics", self._heuristics),
|
|
1417
|
+
("outcomes", self._outcomes),
|
|
1418
|
+
("domain_knowledge", self._domain_knowledge),
|
|
1419
|
+
("anti_patterns", self._anti_patterns),
|
|
1420
|
+
]:
|
|
1421
|
+
try:
|
|
1422
|
+
results = collection.get(where=where_filter)
|
|
1423
|
+
stats[f"{name}_count"] = len(results.get("ids", []))
|
|
1424
|
+
except Exception:
|
|
1425
|
+
stats[f"{name}_count"] = 0
|
|
1426
|
+
|
|
1427
|
+
# Preferences don't have project_id filter
|
|
1428
|
+
try:
|
|
1429
|
+
results = self._preferences.get()
|
|
1430
|
+
stats["preferences_count"] = len(results.get("ids", []))
|
|
1431
|
+
except Exception:
|
|
1432
|
+
stats["preferences_count"] = 0
|
|
1433
|
+
|
|
1434
|
+
stats["total_count"] = sum(
|
|
1435
|
+
stats.get(k, 0) for k in stats if k.endswith("_count")
|
|
1436
|
+
)
|
|
1437
|
+
|
|
1438
|
+
return stats
|
|
1439
|
+
|
|
1440
|
+
def close(self):
|
|
1441
|
+
"""Close the Chroma client (if applicable)."""
|
|
1442
|
+
# ChromaDB handles cleanup automatically
|
|
1443
|
+
logger.info("ChromaDB storage closed")
|