alma-memory 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -194
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -322
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -264
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -81
  26. alma/graph/backends/__init__.py +32 -18
  27. alma/graph/backends/kuzu.py +624 -0
  28. alma/graph/backends/memgraph.py +432 -0
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -432
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -511
  55. alma/observability/__init__.py +91 -0
  56. alma/observability/config.py +302 -0
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -0
  59. alma/observability/metrics.py +583 -0
  60. alma/observability/tracing.py +440 -0
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -366
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -61
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1048
  80. alma/storage/base.py +1083 -525
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -0
  83. alma/storage/file_based.py +614 -619
  84. alma/storage/migrations/__init__.py +21 -0
  85. alma/storage/migrations/base.py +321 -0
  86. alma/storage/migrations/runner.py +323 -0
  87. alma/storage/migrations/version_stores.py +337 -0
  88. alma/storage/migrations/versions/__init__.py +11 -0
  89. alma/storage/migrations/versions/v1_0_0.py +373 -0
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1452
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1358
  95. alma/testing/__init__.py +46 -0
  96. alma/testing/factories.py +301 -0
  97. alma/testing/mocks.py +389 -0
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/METADATA +244 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.0.dist-info/RECORD +0 -76
  110. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
alma/storage/chroma.py CHANGED
@@ -1,1443 +1,1443 @@
1
- """
2
- ALMA Chroma Storage Backend.
3
-
4
- Vector database storage using ChromaDB for semantic search capabilities.
5
- Supports both persistent local storage and client-server mode.
6
-
7
- Recommended for:
8
- - Semantic search-focused deployments
9
- - Local development with vector search
10
- - Small to medium scale applications
11
- """
12
-
13
- import json
14
- import logging
15
- import os
16
- from datetime import datetime, timezone
17
- from typing import Any, Dict, List, Optional
18
-
19
- from alma.storage.base import StorageBackend
20
- from alma.types import (
21
- AntiPattern,
22
- DomainKnowledge,
23
- Heuristic,
24
- Outcome,
25
- UserPreference,
26
- )
27
-
28
- logger = logging.getLogger(__name__)
29
-
30
- # Try to import chromadb
31
- try:
32
- import chromadb
33
- from chromadb.config import Settings
34
-
35
- CHROMADB_AVAILABLE = True
36
- except ImportError:
37
- CHROMADB_AVAILABLE = False
38
- logger.warning(
39
- "chromadb not installed. Install with: pip install 'alma-memory[chroma]'"
40
- )
41
-
42
-
43
- class ChromaStorage(StorageBackend):
44
- """
45
- ChromaDB storage backend with native vector search.
46
-
47
- Uses ChromaDB collections for each memory type with built-in
48
- embedding storage and similarity search.
49
-
50
- Collections:
51
- - alma_heuristics: Learned strategies
52
- - alma_outcomes: Task execution records
53
- - alma_preferences: User preferences
54
- - alma_domain_knowledge: Domain facts
55
- - alma_anti_patterns: Patterns to avoid
56
-
57
- Modes:
58
- - Persistent: Local storage with persist_directory
59
- - Client-Server: Remote server with host/port
60
- - Ephemeral: In-memory (for testing)
61
- """
62
-
63
- # Collection names
64
- HEURISTICS_COLLECTION = "alma_heuristics"
65
- OUTCOMES_COLLECTION = "alma_outcomes"
66
- PREFERENCES_COLLECTION = "alma_preferences"
67
- DOMAIN_KNOWLEDGE_COLLECTION = "alma_domain_knowledge"
68
- ANTI_PATTERNS_COLLECTION = "alma_anti_patterns"
69
-
70
- def __init__(
71
- self,
72
- persist_directory: Optional[str] = None,
73
- host: Optional[str] = None,
74
- port: Optional[int] = None,
75
- embedding_dim: int = 384,
76
- collection_metadata: Optional[Dict[str, Any]] = None,
77
- ):
78
- """
79
- Initialize Chroma storage.
80
-
81
- Args:
82
- persist_directory: Path for persistent local storage (mutually exclusive with host/port)
83
- host: Chroma server host (mutually exclusive with persist_directory)
84
- port: Chroma server port (required if host is specified)
85
- embedding_dim: Dimension of embedding vectors (for validation)
86
- collection_metadata: Optional metadata for collections (e.g., distance function)
87
- """
88
- if not CHROMADB_AVAILABLE:
89
- raise ImportError(
90
- "chromadb not installed. Install with: pip install 'alma-memory[chroma]'"
91
- )
92
-
93
- self.embedding_dim = embedding_dim
94
- self._collection_metadata = collection_metadata or {
95
- "hnsw:space": "cosine" # Use cosine similarity
96
- }
97
-
98
- # Initialize client based on mode
99
- if host and port:
100
- # Client-server mode
101
- self._client = chromadb.HttpClient(host=host, port=port)
102
- self._mode = "client-server"
103
- logger.info(f"ChromaDB client-server mode: {host}:{port}")
104
- elif persist_directory:
105
- # Persistent local mode
106
- self._client = chromadb.PersistentClient(
107
- path=persist_directory,
108
- settings=Settings(anonymized_telemetry=False),
109
- )
110
- self._mode = "persistent"
111
- logger.info(f"ChromaDB persistent mode: {persist_directory}")
112
- else:
113
- # Ephemeral mode (in-memory, for testing)
114
- self._client = chromadb.Client(
115
- settings=Settings(anonymized_telemetry=False),
116
- )
117
- self._mode = "ephemeral"
118
- logger.info("ChromaDB ephemeral mode (in-memory)")
119
-
120
- # Initialize collections
121
- self._init_collections()
122
-
123
- @classmethod
124
- def from_config(cls, config: Dict[str, Any]) -> "ChromaStorage":
125
- """Create instance from configuration."""
126
- chroma_config = config.get("chroma", {})
127
-
128
- # Support environment variable expansion
129
- def get_value(key: str, default: Any = None) -> Any:
130
- value = chroma_config.get(key, default)
131
- if (
132
- isinstance(value, str)
133
- and value.startswith("${")
134
- and value.endswith("}")
135
- ):
136
- env_var = value[2:-1]
137
- return os.environ.get(env_var, default)
138
- return value
139
-
140
- persist_directory = get_value("persist_directory")
141
- host = get_value("host")
142
- port = get_value("port")
143
-
144
- if port is not None:
145
- port = int(port)
146
-
147
- return cls(
148
- persist_directory=persist_directory,
149
- host=host,
150
- port=port,
151
- embedding_dim=int(config.get("embedding_dim", 384)),
152
- collection_metadata=chroma_config.get("collection_metadata"),
153
- )
154
-
155
- def _init_collections(self):
156
- """Initialize or get all collections."""
157
- self._heuristics = self._client.get_or_create_collection(
158
- name=self.HEURISTICS_COLLECTION,
159
- metadata=self._collection_metadata,
160
- )
161
- self._outcomes = self._client.get_or_create_collection(
162
- name=self.OUTCOMES_COLLECTION,
163
- metadata=self._collection_metadata,
164
- )
165
- self._preferences = self._client.get_or_create_collection(
166
- name=self.PREFERENCES_COLLECTION,
167
- metadata=self._collection_metadata,
168
- )
169
- self._domain_knowledge = self._client.get_or_create_collection(
170
- name=self.DOMAIN_KNOWLEDGE_COLLECTION,
171
- metadata=self._collection_metadata,
172
- )
173
- self._anti_patterns = self._client.get_or_create_collection(
174
- name=self.ANTI_PATTERNS_COLLECTION,
175
- metadata=self._collection_metadata,
176
- )
177
-
178
- def _format_get_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
179
- """Reformat get() results to match query() format."""
180
- emb = results.get("embeddings")
181
- has_embeddings = emb is not None and (
182
- (hasattr(emb, "__len__") and len(emb) > 0)
183
- or (hasattr(emb, "size") and emb.size > 0)
184
- )
185
- return {
186
- "ids": [results.get("ids", [])],
187
- "metadatas": [results.get("metadatas", [])],
188
- "documents": [results.get("documents", [])],
189
- "embeddings": [emb] if has_embeddings else None,
190
- }
191
-
192
- def _datetime_to_str(self, dt: Optional[datetime]) -> Optional[str]:
193
- """Convert datetime to ISO string for storage."""
194
- if dt is None:
195
- return None
196
- return dt.isoformat()
197
-
198
- def _str_to_datetime(self, s: Optional[str]) -> Optional[datetime]:
199
- """Convert ISO string to datetime."""
200
- if s is None:
201
- return None
202
- try:
203
- return datetime.fromisoformat(s.replace("Z", "+00:00"))
204
- except (ValueError, AttributeError):
205
- return datetime.now(timezone.utc)
206
-
207
- # ==================== WRITE OPERATIONS ====================
208
-
209
- def save_heuristic(self, heuristic: Heuristic) -> str:
210
- """Save a heuristic."""
211
- metadata = {
212
- "agent": heuristic.agent,
213
- "project_id": heuristic.project_id,
214
- "condition": heuristic.condition,
215
- "strategy": heuristic.strategy,
216
- "confidence": heuristic.confidence,
217
- "occurrence_count": heuristic.occurrence_count,
218
- "success_count": heuristic.success_count,
219
- "last_validated": self._datetime_to_str(heuristic.last_validated),
220
- "created_at": self._datetime_to_str(heuristic.created_at),
221
- "extra_metadata": json.dumps(heuristic.metadata)
222
- if heuristic.metadata
223
- else "{}",
224
- }
225
-
226
- # Chroma requires documents - use condition + strategy as document
227
- document = f"{heuristic.condition}\n{heuristic.strategy}"
228
-
229
- if heuristic.embedding:
230
- self._heuristics.upsert(
231
- ids=[heuristic.id],
232
- embeddings=[heuristic.embedding],
233
- metadatas=[metadata],
234
- documents=[document],
235
- )
236
- else:
237
- self._heuristics.upsert(
238
- ids=[heuristic.id],
239
- metadatas=[metadata],
240
- documents=[document],
241
- )
242
-
243
- logger.debug(f"Saved heuristic: {heuristic.id}")
244
- return heuristic.id
245
-
246
- def save_outcome(self, outcome: Outcome) -> str:
247
- """Save an outcome."""
248
- metadata = {
249
- "agent": outcome.agent,
250
- "project_id": outcome.project_id,
251
- "task_type": outcome.task_type or "general",
252
- "success": outcome.success,
253
- "strategy_used": outcome.strategy_used or "",
254
- "duration_ms": outcome.duration_ms or 0,
255
- "error_message": outcome.error_message or "",
256
- "user_feedback": outcome.user_feedback or "",
257
- "timestamp": self._datetime_to_str(outcome.timestamp),
258
- "extra_metadata": json.dumps(outcome.metadata)
259
- if outcome.metadata
260
- else "{}",
261
- }
262
-
263
- document = outcome.task_description
264
-
265
- if outcome.embedding:
266
- self._outcomes.upsert(
267
- ids=[outcome.id],
268
- embeddings=[outcome.embedding],
269
- metadatas=[metadata],
270
- documents=[document],
271
- )
272
- else:
273
- self._outcomes.upsert(
274
- ids=[outcome.id],
275
- metadatas=[metadata],
276
- documents=[document],
277
- )
278
-
279
- logger.debug(f"Saved outcome: {outcome.id}")
280
- return outcome.id
281
-
282
- def save_user_preference(self, preference: UserPreference) -> str:
283
- """Save a user preference."""
284
- metadata = {
285
- "user_id": preference.user_id,
286
- "category": preference.category or "general",
287
- "source": preference.source or "unknown",
288
- "confidence": preference.confidence,
289
- "timestamp": self._datetime_to_str(preference.timestamp),
290
- "extra_metadata": json.dumps(preference.metadata)
291
- if preference.metadata
292
- else "{}",
293
- }
294
-
295
- document = preference.preference
296
-
297
- self._preferences.upsert(
298
- ids=[preference.id],
299
- metadatas=[metadata],
300
- documents=[document],
301
- )
302
-
303
- logger.debug(f"Saved preference: {preference.id}")
304
- return preference.id
305
-
306
- def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
307
- """Save domain knowledge."""
308
- metadata = {
309
- "agent": knowledge.agent,
310
- "project_id": knowledge.project_id,
311
- "domain": knowledge.domain or "general",
312
- "source": knowledge.source or "unknown",
313
- "confidence": knowledge.confidence,
314
- "last_verified": self._datetime_to_str(knowledge.last_verified),
315
- "extra_metadata": json.dumps(knowledge.metadata)
316
- if knowledge.metadata
317
- else "{}",
318
- }
319
-
320
- document = knowledge.fact
321
-
322
- if knowledge.embedding:
323
- self._domain_knowledge.upsert(
324
- ids=[knowledge.id],
325
- embeddings=[knowledge.embedding],
326
- metadatas=[metadata],
327
- documents=[document],
328
- )
329
- else:
330
- self._domain_knowledge.upsert(
331
- ids=[knowledge.id],
332
- metadatas=[metadata],
333
- documents=[document],
334
- )
335
-
336
- logger.debug(f"Saved domain knowledge: {knowledge.id}")
337
- return knowledge.id
338
-
339
- def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
340
- """Save an anti-pattern."""
341
- metadata = {
342
- "agent": anti_pattern.agent,
343
- "project_id": anti_pattern.project_id,
344
- "why_bad": anti_pattern.why_bad or "",
345
- "better_alternative": anti_pattern.better_alternative or "",
346
- "occurrence_count": anti_pattern.occurrence_count,
347
- "last_seen": self._datetime_to_str(anti_pattern.last_seen),
348
- "created_at": self._datetime_to_str(anti_pattern.created_at),
349
- "extra_metadata": json.dumps(anti_pattern.metadata)
350
- if anti_pattern.metadata
351
- else "{}",
352
- }
353
-
354
- document = anti_pattern.pattern
355
-
356
- if anti_pattern.embedding:
357
- self._anti_patterns.upsert(
358
- ids=[anti_pattern.id],
359
- embeddings=[anti_pattern.embedding],
360
- metadatas=[metadata],
361
- documents=[document],
362
- )
363
- else:
364
- self._anti_patterns.upsert(
365
- ids=[anti_pattern.id],
366
- metadatas=[metadata],
367
- documents=[document],
368
- )
369
-
370
- logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
371
- return anti_pattern.id
372
-
373
- # ==================== BATCH WRITE OPERATIONS ====================
374
-
375
- def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
376
- """Save multiple heuristics in a batch."""
377
- if not heuristics:
378
- return []
379
-
380
- ids = []
381
- embeddings = []
382
- metadatas = []
383
- documents = []
384
- has_embeddings = False
385
-
386
- for h in heuristics:
387
- ids.append(h.id)
388
- metadatas.append(
389
- {
390
- "agent": h.agent,
391
- "project_id": h.project_id,
392
- "condition": h.condition,
393
- "strategy": h.strategy,
394
- "confidence": h.confidence,
395
- "occurrence_count": h.occurrence_count,
396
- "success_count": h.success_count,
397
- "last_validated": self._datetime_to_str(h.last_validated),
398
- "created_at": self._datetime_to_str(h.created_at),
399
- "extra_metadata": json.dumps(h.metadata) if h.metadata else "{}",
400
- }
401
- )
402
- documents.append(f"{h.condition}\n{h.strategy}")
403
- if h.embedding:
404
- embeddings.append(h.embedding)
405
- has_embeddings = True
406
- else:
407
- embeddings.append(None)
408
-
409
- if has_embeddings and all(e is not None for e in embeddings):
410
- self._heuristics.upsert(
411
- ids=ids,
412
- embeddings=embeddings,
413
- metadatas=metadatas,
414
- documents=documents,
415
- )
416
- else:
417
- self._heuristics.upsert(
418
- ids=ids,
419
- metadatas=metadatas,
420
- documents=documents,
421
- )
422
-
423
- logger.debug(f"Batch saved {len(heuristics)} heuristics")
424
- return ids
425
-
426
- def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
427
- """Save multiple outcomes in a batch."""
428
- if not outcomes:
429
- return []
430
-
431
- ids = []
432
- embeddings = []
433
- metadatas = []
434
- documents = []
435
- has_embeddings = False
436
-
437
- for o in outcomes:
438
- ids.append(o.id)
439
- metadatas.append(
440
- {
441
- "agent": o.agent,
442
- "project_id": o.project_id,
443
- "task_type": o.task_type or "general",
444
- "success": o.success,
445
- "strategy_used": o.strategy_used or "",
446
- "duration_ms": o.duration_ms or 0,
447
- "error_message": o.error_message or "",
448
- "user_feedback": o.user_feedback or "",
449
- "timestamp": self._datetime_to_str(o.timestamp),
450
- "extra_metadata": json.dumps(o.metadata) if o.metadata else "{}",
451
- }
452
- )
453
- documents.append(o.task_description)
454
- if o.embedding:
455
- embeddings.append(o.embedding)
456
- has_embeddings = True
457
- else:
458
- embeddings.append(None)
459
-
460
- if has_embeddings and all(e is not None for e in embeddings):
461
- self._outcomes.upsert(
462
- ids=ids,
463
- embeddings=embeddings,
464
- metadatas=metadatas,
465
- documents=documents,
466
- )
467
- else:
468
- self._outcomes.upsert(
469
- ids=ids,
470
- metadatas=metadatas,
471
- documents=documents,
472
- )
473
-
474
- logger.debug(f"Batch saved {len(outcomes)} outcomes")
475
- return ids
476
-
477
- def save_domain_knowledge_batch(
478
- self, knowledge_items: List[DomainKnowledge]
479
- ) -> List[str]:
480
- """Save multiple domain knowledge items in a batch."""
481
- if not knowledge_items:
482
- return []
483
-
484
- ids = []
485
- embeddings = []
486
- metadatas = []
487
- documents = []
488
- has_embeddings = False
489
-
490
- for k in knowledge_items:
491
- ids.append(k.id)
492
- metadatas.append(
493
- {
494
- "agent": k.agent,
495
- "project_id": k.project_id,
496
- "domain": k.domain or "general",
497
- "source": k.source or "unknown",
498
- "confidence": k.confidence,
499
- "last_verified": self._datetime_to_str(k.last_verified),
500
- "extra_metadata": json.dumps(k.metadata) if k.metadata else "{}",
501
- }
502
- )
503
- documents.append(k.fact)
504
- if k.embedding:
505
- embeddings.append(k.embedding)
506
- has_embeddings = True
507
- else:
508
- embeddings.append(None)
509
-
510
- if has_embeddings and all(e is not None for e in embeddings):
511
- self._domain_knowledge.upsert(
512
- ids=ids,
513
- embeddings=embeddings,
514
- metadatas=metadatas,
515
- documents=documents,
516
- )
517
- else:
518
- self._domain_knowledge.upsert(
519
- ids=ids,
520
- metadatas=metadatas,
521
- documents=documents,
522
- )
523
-
524
- logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
525
- return ids
526
-
527
- # ==================== READ OPERATIONS ====================
528
-
529
- def _build_where_filter(
530
- self,
531
- project_id: Optional[str] = None,
532
- agent: Optional[str] = None,
533
- user_id: Optional[str] = None,
534
- domain: Optional[str] = None,
535
- task_type: Optional[str] = None,
536
- min_confidence: Optional[float] = None,
537
- success_only: bool = False,
538
- ) -> Optional[Dict[str, Any]]:
539
- """Build Chroma where filter from parameters."""
540
- conditions = []
541
-
542
- if project_id:
543
- conditions.append({"project_id": {"$eq": project_id}})
544
- if agent:
545
- conditions.append({"agent": {"$eq": agent}})
546
- if user_id:
547
- conditions.append({"user_id": {"$eq": user_id}})
548
- if domain:
549
- conditions.append({"domain": {"$eq": domain}})
550
- if task_type:
551
- conditions.append({"task_type": {"$eq": task_type}})
552
- if min_confidence is not None and min_confidence > 0:
553
- conditions.append({"confidence": {"$gte": min_confidence}})
554
- if success_only:
555
- conditions.append({"success": {"$eq": True}})
556
-
557
- if not conditions:
558
- return None
559
- if len(conditions) == 1:
560
- return conditions[0]
561
- return {"$and": conditions}
562
-
563
- def _has_embedding(self, emb: Any) -> bool:
564
- """Safely check if embedding is not None/empty (handles numpy arrays)."""
565
- if emb is None:
566
- return False
567
- if hasattr(emb, "__len__"):
568
- try:
569
- return len(emb) > 0
570
- except (TypeError, ValueError):
571
- pass
572
- if hasattr(emb, "size"):
573
- return emb.size > 0
574
- return True
575
-
576
- def _get_embedding_list(self, results: Dict[str, Any], num_ids: int) -> List[Any]:
577
- """Safely extract embeddings list from results."""
578
- emb_data = results.get("embeddings")
579
- if emb_data is None:
580
- return [None] * num_ids
581
- # Handle both nested list format (query results) and flat format
582
- if isinstance(emb_data, list) and len(emb_data) > 0:
583
- first = emb_data[0]
584
- # Check if it's a nested list (query format: [[emb1, emb2, ...]])
585
- if isinstance(first, list) or (
586
- hasattr(first, "__iter__") and not isinstance(first, (str, bytes))
587
- ):
588
- # Could be list of embeddings or numpy array
589
- try:
590
- if hasattr(first, "tolist"):
591
- # numpy array
592
- return list(emb_data[0])
593
- return list(first) if isinstance(first, list) else [first]
594
- except (TypeError, IndexError):
595
- return [None] * num_ids
596
- return list(emb_data)
597
- return [None] * num_ids
598
-
599
- def _results_to_heuristics(self, results: Dict[str, Any]) -> List[Heuristic]:
600
- """Convert Chroma query results to Heuristic objects."""
601
- heuristics = []
602
- if not results or not results.get("ids") or not results["ids"][0]:
603
- return heuristics
604
-
605
- ids = results["ids"][0]
606
- metadatas = results.get("metadatas", [[]])[0]
607
- embeddings = self._get_embedding_list(results, len(ids))
608
-
609
- for i, id_ in enumerate(ids):
610
- meta = metadatas[i] if i < len(metadatas) else {}
611
- emb = embeddings[i] if i < len(embeddings) else None
612
-
613
- extra = json.loads(meta.get("extra_metadata", "{}"))
614
-
615
- heuristics.append(
616
- Heuristic(
617
- id=id_,
618
- agent=meta.get("agent", ""),
619
- project_id=meta.get("project_id", ""),
620
- condition=meta.get("condition", ""),
621
- strategy=meta.get("strategy", ""),
622
- confidence=meta.get("confidence", 0.0),
623
- occurrence_count=meta.get("occurrence_count", 0),
624
- success_count=meta.get("success_count", 0),
625
- last_validated=self._str_to_datetime(meta.get("last_validated"))
626
- or datetime.now(timezone.utc),
627
- created_at=self._str_to_datetime(meta.get("created_at"))
628
- or datetime.now(timezone.utc),
629
- embedding=list(emb)
630
- if emb is not None and hasattr(emb, "__iter__")
631
- else emb,
632
- metadata=extra,
633
- )
634
- )
635
-
636
- return heuristics
637
-
638
- def _results_to_outcomes(self, results: Dict[str, Any]) -> List[Outcome]:
639
- """Convert Chroma query results to Outcome objects."""
640
- outcomes = []
641
- if not results or not results.get("ids") or not results["ids"][0]:
642
- return outcomes
643
-
644
- ids = results["ids"][0]
645
- metadatas = results.get("metadatas", [[]])[0]
646
- documents = results.get("documents", [[]])[0]
647
- embeddings = self._get_embedding_list(results, len(ids))
648
-
649
- for i, id_ in enumerate(ids):
650
- meta = metadatas[i] if i < len(metadatas) else {}
651
- doc = documents[i] if i < len(documents) else ""
652
- emb = embeddings[i] if i < len(embeddings) else None
653
-
654
- extra = json.loads(meta.get("extra_metadata", "{}"))
655
-
656
- outcomes.append(
657
- Outcome(
658
- id=id_,
659
- agent=meta.get("agent", ""),
660
- project_id=meta.get("project_id", ""),
661
- task_type=meta.get("task_type", "general"),
662
- task_description=doc,
663
- success=meta.get("success", False),
664
- strategy_used=meta.get("strategy_used", ""),
665
- duration_ms=meta.get("duration_ms"),
666
- error_message=meta.get("error_message") or None,
667
- user_feedback=meta.get("user_feedback") or None,
668
- timestamp=self._str_to_datetime(meta.get("timestamp"))
669
- or datetime.now(timezone.utc),
670
- embedding=list(emb)
671
- if emb is not None and hasattr(emb, "__iter__")
672
- else emb,
673
- metadata=extra,
674
- )
675
- )
676
-
677
- return outcomes
678
-
679
- def _results_to_preferences(self, results: Dict[str, Any]) -> List[UserPreference]:
680
- """Convert Chroma query results to UserPreference objects."""
681
- preferences = []
682
- if not results or not results.get("ids") or not results["ids"][0]:
683
- return preferences
684
-
685
- ids = results["ids"][0]
686
- metadatas = results.get("metadatas", [[]])[0]
687
- documents = results.get("documents", [[]])[0]
688
-
689
- for i, id_ in enumerate(ids):
690
- meta = metadatas[i] if i < len(metadatas) else {}
691
- doc = documents[i] if i < len(documents) else ""
692
-
693
- extra = json.loads(meta.get("extra_metadata", "{}"))
694
-
695
- preferences.append(
696
- UserPreference(
697
- id=id_,
698
- user_id=meta.get("user_id", ""),
699
- category=meta.get("category", "general"),
700
- preference=doc,
701
- source=meta.get("source", "unknown"),
702
- confidence=meta.get("confidence", 1.0),
703
- timestamp=self._str_to_datetime(meta.get("timestamp"))
704
- or datetime.now(timezone.utc),
705
- metadata=extra,
706
- )
707
- )
708
-
709
- return preferences
710
-
711
- def _results_to_domain_knowledge(
712
- self, results: Dict[str, Any]
713
- ) -> List[DomainKnowledge]:
714
- """Convert Chroma query results to DomainKnowledge objects."""
715
- knowledge = []
716
- if not results or not results.get("ids") or not results["ids"][0]:
717
- return knowledge
718
-
719
- ids = results["ids"][0]
720
- metadatas = results.get("metadatas", [[]])[0]
721
- documents = results.get("documents", [[]])[0]
722
- embeddings = self._get_embedding_list(results, len(ids))
723
-
724
- for i, id_ in enumerate(ids):
725
- meta = metadatas[i] if i < len(metadatas) else {}
726
- doc = documents[i] if i < len(documents) else ""
727
- emb = embeddings[i] if i < len(embeddings) else None
728
-
729
- extra = json.loads(meta.get("extra_metadata", "{}"))
730
-
731
- knowledge.append(
732
- DomainKnowledge(
733
- id=id_,
734
- agent=meta.get("agent", ""),
735
- project_id=meta.get("project_id", ""),
736
- domain=meta.get("domain", "general"),
737
- fact=doc,
738
- source=meta.get("source", "unknown"),
739
- confidence=meta.get("confidence", 1.0),
740
- last_verified=self._str_to_datetime(meta.get("last_verified"))
741
- or datetime.now(timezone.utc),
742
- embedding=list(emb)
743
- if emb is not None and hasattr(emb, "__iter__")
744
- else emb,
745
- metadata=extra,
746
- )
747
- )
748
-
749
- return knowledge
750
-
751
- def _results_to_anti_patterns(self, results: Dict[str, Any]) -> List[AntiPattern]:
752
- """Convert Chroma query results to AntiPattern objects."""
753
- patterns = []
754
- if not results or not results.get("ids") or not results["ids"][0]:
755
- return patterns
756
-
757
- ids = results["ids"][0]
758
- metadatas = results.get("metadatas", [[]])[0]
759
- documents = results.get("documents", [[]])[0]
760
- embeddings = self._get_embedding_list(results, len(ids))
761
-
762
- for i, id_ in enumerate(ids):
763
- meta = metadatas[i] if i < len(metadatas) else {}
764
- doc = documents[i] if i < len(documents) else ""
765
- emb = embeddings[i] if i < len(embeddings) else None
766
-
767
- extra = json.loads(meta.get("extra_metadata", "{}"))
768
-
769
- patterns.append(
770
- AntiPattern(
771
- id=id_,
772
- agent=meta.get("agent", ""),
773
- project_id=meta.get("project_id", ""),
774
- pattern=doc,
775
- why_bad=meta.get("why_bad", ""),
776
- better_alternative=meta.get("better_alternative", ""),
777
- occurrence_count=meta.get("occurrence_count", 1),
778
- last_seen=self._str_to_datetime(meta.get("last_seen"))
779
- or datetime.now(timezone.utc),
780
- created_at=self._str_to_datetime(meta.get("created_at"))
781
- or datetime.now(timezone.utc),
782
- embedding=list(emb)
783
- if emb is not None and hasattr(emb, "__iter__")
784
- else emb,
785
- metadata=extra,
786
- )
787
- )
788
-
789
- return patterns
790
-
791
- def get_heuristics(
792
- self,
793
- project_id: str,
794
- agent: Optional[str] = None,
795
- embedding: Optional[List[float]] = None,
796
- top_k: int = 5,
797
- min_confidence: float = 0.0,
798
- ) -> List[Heuristic]:
799
- """Get heuristics with optional vector search."""
800
- where_filter = self._build_where_filter(
801
- project_id=project_id,
802
- agent=agent,
803
- min_confidence=min_confidence,
804
- )
805
-
806
- if embedding:
807
- results = self._heuristics.query(
808
- query_embeddings=[embedding],
809
- n_results=top_k,
810
- where=where_filter,
811
- include=["metadatas", "documents", "embeddings"],
812
- )
813
- else:
814
- results = self._heuristics.get(
815
- where=where_filter,
816
- limit=top_k,
817
- include=["metadatas", "documents", "embeddings"],
818
- )
819
- results = self._format_get_results(results)
820
-
821
- return self._results_to_heuristics(results)
822
-
823
- def get_outcomes(
824
- self,
825
- project_id: str,
826
- agent: Optional[str] = None,
827
- task_type: Optional[str] = None,
828
- embedding: Optional[List[float]] = None,
829
- top_k: int = 5,
830
- success_only: bool = False,
831
- ) -> List[Outcome]:
832
- """Get outcomes with optional vector search."""
833
- where_filter = self._build_where_filter(
834
- project_id=project_id,
835
- agent=agent,
836
- task_type=task_type,
837
- success_only=success_only,
838
- )
839
-
840
- if embedding:
841
- results = self._outcomes.query(
842
- query_embeddings=[embedding],
843
- n_results=top_k,
844
- where=where_filter,
845
- include=["metadatas", "documents", "embeddings"],
846
- )
847
- else:
848
- results = self._outcomes.get(
849
- where=where_filter,
850
- limit=top_k,
851
- include=["metadatas", "documents", "embeddings"],
852
- )
853
- results = self._format_get_results(results)
854
-
855
- return self._results_to_outcomes(results)
856
-
857
- def get_user_preferences(
858
- self,
859
- user_id: str,
860
- category: Optional[str] = None,
861
- ) -> List[UserPreference]:
862
- """Get user preferences."""
863
- where_filter = self._build_where_filter(user_id=user_id)
864
- if category:
865
- if where_filter:
866
- where_filter = {"$and": [where_filter, {"category": {"$eq": category}}]}
867
- else:
868
- where_filter = {"category": {"$eq": category}}
869
-
870
- results = self._preferences.get(
871
- where=where_filter,
872
- include=["metadatas", "documents"],
873
- )
874
- results = {
875
- "ids": [results.get("ids", [])],
876
- "metadatas": [results.get("metadatas", [])],
877
- "documents": [results.get("documents", [])],
878
- }
879
-
880
- return self._results_to_preferences(results)
881
-
882
- def get_domain_knowledge(
883
- self,
884
- project_id: str,
885
- agent: Optional[str] = None,
886
- domain: Optional[str] = None,
887
- embedding: Optional[List[float]] = None,
888
- top_k: int = 5,
889
- ) -> List[DomainKnowledge]:
890
- """Get domain knowledge with optional vector search."""
891
- where_filter = self._build_where_filter(
892
- project_id=project_id,
893
- agent=agent,
894
- domain=domain,
895
- )
896
-
897
- if embedding:
898
- results = self._domain_knowledge.query(
899
- query_embeddings=[embedding],
900
- n_results=top_k,
901
- where=where_filter,
902
- include=["metadatas", "documents", "embeddings"],
903
- )
904
- else:
905
- results = self._domain_knowledge.get(
906
- where=where_filter,
907
- limit=top_k,
908
- include=["metadatas", "documents", "embeddings"],
909
- )
910
- results = self._format_get_results(results)
911
-
912
- return self._results_to_domain_knowledge(results)
913
-
914
- def get_anti_patterns(
915
- self,
916
- project_id: str,
917
- agent: Optional[str] = None,
918
- embedding: Optional[List[float]] = None,
919
- top_k: int = 5,
920
- ) -> List[AntiPattern]:
921
- """Get anti-patterns with optional vector search."""
922
- where_filter = self._build_where_filter(
923
- project_id=project_id,
924
- agent=agent,
925
- )
926
-
927
- if embedding:
928
- results = self._anti_patterns.query(
929
- query_embeddings=[embedding],
930
- n_results=top_k,
931
- where=where_filter,
932
- include=["metadatas", "documents", "embeddings"],
933
- )
934
- else:
935
- results = self._anti_patterns.get(
936
- where=where_filter,
937
- limit=top_k,
938
- include=["metadatas", "documents", "embeddings"],
939
- )
940
- results = self._format_get_results(results)
941
-
942
- return self._results_to_anti_patterns(results)
943
-
944
- # ==================== MULTI-AGENT MEMORY SHARING ====================
945
-
946
- def _build_agents_filter(
947
- self,
948
- project_id: str,
949
- agents: List[str],
950
- **kwargs: Any,
951
- ) -> Optional[Dict[str, Any]]:
952
- """Build filter for multiple agents."""
953
- if not agents:
954
- return None
955
-
956
- agent_conditions = [{"agent": {"$eq": a}} for a in agents]
957
- agents_filter = (
958
- {"$or": agent_conditions}
959
- if len(agent_conditions) > 1
960
- else agent_conditions[0]
961
- )
962
-
963
- base_filter = self._build_where_filter(project_id=project_id, **kwargs)
964
-
965
- if base_filter:
966
- return {"$and": [base_filter, agents_filter]}
967
- return {"$and": [{"project_id": {"$eq": project_id}}, agents_filter]}
968
-
969
- def get_heuristics_for_agents(
970
- self,
971
- project_id: str,
972
- agents: List[str],
973
- embedding: Optional[List[float]] = None,
974
- top_k: int = 5,
975
- min_confidence: float = 0.0,
976
- ) -> List[Heuristic]:
977
- """Get heuristics from multiple agents."""
978
- if not agents:
979
- return []
980
-
981
- where_filter = self._build_agents_filter(
982
- project_id=project_id,
983
- agents=agents,
984
- min_confidence=min_confidence,
985
- )
986
-
987
- if embedding:
988
- results = self._heuristics.query(
989
- query_embeddings=[embedding],
990
- n_results=top_k * len(agents),
991
- where=where_filter,
992
- include=["metadatas", "documents", "embeddings"],
993
- )
994
- else:
995
- results = self._heuristics.get(
996
- where=where_filter,
997
- limit=top_k * len(agents),
998
- include=["metadatas", "documents", "embeddings"],
999
- )
1000
- results = self._format_get_results(results)
1001
-
1002
- return self._results_to_heuristics(results)
1003
-
1004
- def get_outcomes_for_agents(
1005
- self,
1006
- project_id: str,
1007
- agents: List[str],
1008
- task_type: Optional[str] = None,
1009
- embedding: Optional[List[float]] = None,
1010
- top_k: int = 5,
1011
- success_only: bool = False,
1012
- ) -> List[Outcome]:
1013
- """Get outcomes from multiple agents."""
1014
- if not agents:
1015
- return []
1016
-
1017
- where_filter = self._build_agents_filter(
1018
- project_id=project_id,
1019
- agents=agents,
1020
- task_type=task_type,
1021
- success_only=success_only,
1022
- )
1023
-
1024
- if embedding:
1025
- results = self._outcomes.query(
1026
- query_embeddings=[embedding],
1027
- n_results=top_k * len(agents),
1028
- where=where_filter,
1029
- include=["metadatas", "documents", "embeddings"],
1030
- )
1031
- else:
1032
- results = self._outcomes.get(
1033
- where=where_filter,
1034
- limit=top_k * len(agents),
1035
- include=["metadatas", "documents", "embeddings"],
1036
- )
1037
- results = self._format_get_results(results)
1038
-
1039
- return self._results_to_outcomes(results)
1040
-
1041
- def get_domain_knowledge_for_agents(
1042
- self,
1043
- project_id: str,
1044
- agents: List[str],
1045
- domain: Optional[str] = None,
1046
- embedding: Optional[List[float]] = None,
1047
- top_k: int = 5,
1048
- ) -> List[DomainKnowledge]:
1049
- """Get domain knowledge from multiple agents."""
1050
- if not agents:
1051
- return []
1052
-
1053
- where_filter = self._build_agents_filter(
1054
- project_id=project_id,
1055
- agents=agents,
1056
- domain=domain,
1057
- )
1058
-
1059
- if embedding:
1060
- results = self._domain_knowledge.query(
1061
- query_embeddings=[embedding],
1062
- n_results=top_k * len(agents),
1063
- where=where_filter,
1064
- include=["metadatas", "documents", "embeddings"],
1065
- )
1066
- else:
1067
- results = self._domain_knowledge.get(
1068
- where=where_filter,
1069
- limit=top_k * len(agents),
1070
- include=["metadatas", "documents", "embeddings"],
1071
- )
1072
- results = self._format_get_results(results)
1073
-
1074
- return self._results_to_domain_knowledge(results)
1075
-
1076
- def get_anti_patterns_for_agents(
1077
- self,
1078
- project_id: str,
1079
- agents: List[str],
1080
- embedding: Optional[List[float]] = None,
1081
- top_k: int = 5,
1082
- ) -> List[AntiPattern]:
1083
- """Get anti-patterns from multiple agents."""
1084
- if not agents:
1085
- return []
1086
-
1087
- where_filter = self._build_agents_filter(
1088
- project_id=project_id,
1089
- agents=agents,
1090
- )
1091
-
1092
- if embedding:
1093
- results = self._anti_patterns.query(
1094
- query_embeddings=[embedding],
1095
- n_results=top_k * len(agents),
1096
- where=where_filter,
1097
- include=["metadatas", "documents", "embeddings"],
1098
- )
1099
- else:
1100
- results = self._anti_patterns.get(
1101
- where=where_filter,
1102
- limit=top_k * len(agents),
1103
- include=["metadatas", "documents", "embeddings"],
1104
- )
1105
- results = self._format_get_results(results)
1106
-
1107
- return self._results_to_anti_patterns(results)
1108
-
1109
- # ==================== UPDATE OPERATIONS ====================
1110
-
1111
- def update_heuristic(
1112
- self,
1113
- heuristic_id: str,
1114
- updates: Dict[str, Any],
1115
- ) -> bool:
1116
- """Update a heuristic's fields."""
1117
- if not updates:
1118
- return False
1119
-
1120
- try:
1121
- # Get existing heuristic
1122
- existing = self._heuristics.get(
1123
- ids=[heuristic_id], include=["metadatas", "documents", "embeddings"]
1124
- )
1125
- if not existing or not existing.get("ids"):
1126
- return False
1127
-
1128
- metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
1129
- document = existing["documents"][0] if existing.get("documents") else ""
1130
- emb_list = existing.get("embeddings")
1131
- embedding = (
1132
- emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
1133
- )
1134
-
1135
- # Apply updates
1136
- for key, value in updates.items():
1137
- if key == "condition":
1138
- metadata["condition"] = value
1139
- # Update document as well
1140
- parts = document.split("\n", 1)
1141
- document = f"{value}\n{parts[1] if len(parts) > 1 else ''}"
1142
- elif key == "strategy":
1143
- metadata["strategy"] = value
1144
- parts = document.split("\n", 1)
1145
- document = f"{parts[0] if parts else ''}\n{value}"
1146
- elif key == "metadata":
1147
- metadata["extra_metadata"] = json.dumps(value)
1148
- elif key in ("last_validated", "created_at") and isinstance(
1149
- value, datetime
1150
- ):
1151
- metadata[key] = value.isoformat()
1152
- elif key in metadata:
1153
- metadata[key] = value
1154
-
1155
- # Upsert with updated values
1156
- if self._has_embedding(embedding):
1157
- self._heuristics.upsert(
1158
- ids=[heuristic_id],
1159
- embeddings=[
1160
- list(embedding) if hasattr(embedding, "__iter__") else embedding
1161
- ],
1162
- metadatas=[metadata],
1163
- documents=[document],
1164
- )
1165
- else:
1166
- self._heuristics.upsert(
1167
- ids=[heuristic_id],
1168
- metadatas=[metadata],
1169
- documents=[document],
1170
- )
1171
-
1172
- return True
1173
- except Exception as e:
1174
- logger.error(f"Failed to update heuristic {heuristic_id}: {e}")
1175
- return False
1176
-
1177
- def increment_heuristic_occurrence(
1178
- self,
1179
- heuristic_id: str,
1180
- success: bool,
1181
- ) -> bool:
1182
- """Increment heuristic occurrence count."""
1183
- try:
1184
- existing = self._heuristics.get(
1185
- ids=[heuristic_id], include=["metadatas", "documents", "embeddings"]
1186
- )
1187
- if not existing or not existing.get("ids"):
1188
- return False
1189
-
1190
- metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
1191
- document = existing["documents"][0] if existing.get("documents") else ""
1192
- emb_list = existing.get("embeddings")
1193
- embedding = (
1194
- emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
1195
- )
1196
-
1197
- metadata["occurrence_count"] = metadata.get("occurrence_count", 0) + 1
1198
- if success:
1199
- metadata["success_count"] = metadata.get("success_count", 0) + 1
1200
- metadata["last_validated"] = datetime.now(timezone.utc).isoformat()
1201
-
1202
- if self._has_embedding(embedding):
1203
- self._heuristics.upsert(
1204
- ids=[heuristic_id],
1205
- embeddings=[
1206
- list(embedding) if hasattr(embedding, "__iter__") else embedding
1207
- ],
1208
- metadatas=[metadata],
1209
- documents=[document],
1210
- )
1211
- else:
1212
- self._heuristics.upsert(
1213
- ids=[heuristic_id],
1214
- metadatas=[metadata],
1215
- documents=[document],
1216
- )
1217
-
1218
- return True
1219
- except Exception as e:
1220
- logger.error(f"Failed to increment occurrence for {heuristic_id}: {e}")
1221
- return False
1222
-
1223
- def update_heuristic_confidence(
1224
- self,
1225
- heuristic_id: str,
1226
- new_confidence: float,
1227
- ) -> bool:
1228
- """Update a heuristic's confidence value."""
1229
- return self.update_heuristic(heuristic_id, {"confidence": new_confidence})
1230
-
1231
- def update_knowledge_confidence(
1232
- self,
1233
- knowledge_id: str,
1234
- new_confidence: float,
1235
- ) -> bool:
1236
- """Update domain knowledge confidence value."""
1237
- try:
1238
- existing = self._domain_knowledge.get(
1239
- ids=[knowledge_id], include=["metadatas", "documents", "embeddings"]
1240
- )
1241
- if not existing or not existing.get("ids"):
1242
- return False
1243
-
1244
- metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
1245
- document = existing["documents"][0] if existing.get("documents") else ""
1246
- emb_list = existing.get("embeddings")
1247
- embedding = (
1248
- emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
1249
- )
1250
-
1251
- metadata["confidence"] = new_confidence
1252
-
1253
- if self._has_embedding(embedding):
1254
- self._domain_knowledge.upsert(
1255
- ids=[knowledge_id],
1256
- embeddings=[
1257
- list(embedding) if hasattr(embedding, "__iter__") else embedding
1258
- ],
1259
- metadatas=[metadata],
1260
- documents=[document],
1261
- )
1262
- else:
1263
- self._domain_knowledge.upsert(
1264
- ids=[knowledge_id],
1265
- metadatas=[metadata],
1266
- documents=[document],
1267
- )
1268
-
1269
- return True
1270
- except Exception as e:
1271
- logger.error(f"Failed to update knowledge confidence {knowledge_id}: {e}")
1272
- return False
1273
-
1274
- # ==================== DELETE OPERATIONS ====================
1275
-
1276
- def delete_heuristic(self, heuristic_id: str) -> bool:
1277
- """Delete a heuristic by ID."""
1278
- try:
1279
- existing = self._heuristics.get(ids=[heuristic_id])
1280
- if not existing or not existing.get("ids"):
1281
- return False
1282
- self._heuristics.delete(ids=[heuristic_id])
1283
- logger.debug(f"Deleted heuristic: {heuristic_id}")
1284
- return True
1285
- except Exception as e:
1286
- logger.error(f"Failed to delete heuristic {heuristic_id}: {e}")
1287
- return False
1288
-
1289
- def delete_outcome(self, outcome_id: str) -> bool:
1290
- """Delete an outcome by ID."""
1291
- try:
1292
- existing = self._outcomes.get(ids=[outcome_id])
1293
- if not existing or not existing.get("ids"):
1294
- return False
1295
- self._outcomes.delete(ids=[outcome_id])
1296
- logger.debug(f"Deleted outcome: {outcome_id}")
1297
- return True
1298
- except Exception as e:
1299
- logger.error(f"Failed to delete outcome {outcome_id}: {e}")
1300
- return False
1301
-
1302
- def delete_domain_knowledge(self, knowledge_id: str) -> bool:
1303
- """Delete domain knowledge by ID."""
1304
- try:
1305
- existing = self._domain_knowledge.get(ids=[knowledge_id])
1306
- if not existing or not existing.get("ids"):
1307
- return False
1308
- self._domain_knowledge.delete(ids=[knowledge_id])
1309
- logger.debug(f"Deleted domain knowledge: {knowledge_id}")
1310
- return True
1311
- except Exception as e:
1312
- logger.error(f"Failed to delete domain knowledge {knowledge_id}: {e}")
1313
- return False
1314
-
1315
- def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
1316
- """Delete an anti-pattern by ID."""
1317
- try:
1318
- existing = self._anti_patterns.get(ids=[anti_pattern_id])
1319
- if not existing or not existing.get("ids"):
1320
- return False
1321
- self._anti_patterns.delete(ids=[anti_pattern_id])
1322
- logger.debug(f"Deleted anti-pattern: {anti_pattern_id}")
1323
- return True
1324
- except Exception as e:
1325
- logger.error(f"Failed to delete anti-pattern {anti_pattern_id}: {e}")
1326
- return False
1327
-
1328
- def delete_outcomes_older_than(
1329
- self,
1330
- project_id: str,
1331
- older_than: datetime,
1332
- agent: Optional[str] = None,
1333
- ) -> int:
1334
- """Delete old outcomes."""
1335
- where_filter = self._build_where_filter(project_id=project_id, agent=agent)
1336
-
1337
- # Get all matching outcomes
1338
- results = self._outcomes.get(
1339
- where=where_filter,
1340
- include=["metadatas"],
1341
- )
1342
-
1343
- if not results or not results.get("ids"):
1344
- return 0
1345
-
1346
- older_than_str = older_than.isoformat()
1347
- ids_to_delete = []
1348
-
1349
- for i, id_ in enumerate(results["ids"]):
1350
- meta = (
1351
- results["metadatas"][i] if i < len(results.get("metadatas", [])) else {}
1352
- )
1353
- timestamp_str = meta.get("timestamp", "")
1354
- if timestamp_str and timestamp_str < older_than_str:
1355
- ids_to_delete.append(id_)
1356
-
1357
- if ids_to_delete:
1358
- self._outcomes.delete(ids=ids_to_delete)
1359
-
1360
- logger.info(f"Deleted {len(ids_to_delete)} old outcomes")
1361
- return len(ids_to_delete)
1362
-
1363
- def delete_low_confidence_heuristics(
1364
- self,
1365
- project_id: str,
1366
- below_confidence: float,
1367
- agent: Optional[str] = None,
1368
- ) -> int:
1369
- """Delete low-confidence heuristics."""
1370
- where_filter = self._build_where_filter(project_id=project_id, agent=agent)
1371
-
1372
- # Get all matching heuristics
1373
- results = self._heuristics.get(
1374
- where=where_filter,
1375
- include=["metadatas"],
1376
- )
1377
-
1378
- if not results or not results.get("ids"):
1379
- return 0
1380
-
1381
- ids_to_delete = []
1382
-
1383
- for i, id_ in enumerate(results["ids"]):
1384
- meta = (
1385
- results["metadatas"][i] if i < len(results.get("metadatas", [])) else {}
1386
- )
1387
- confidence = meta.get("confidence", 0.0)
1388
- if confidence < below_confidence:
1389
- ids_to_delete.append(id_)
1390
-
1391
- if ids_to_delete:
1392
- self._heuristics.delete(ids=ids_to_delete)
1393
-
1394
- logger.info(f"Deleted {len(ids_to_delete)} low-confidence heuristics")
1395
- return len(ids_to_delete)
1396
-
1397
- # ==================== STATS ====================
1398
-
1399
- def get_stats(
1400
- self,
1401
- project_id: str,
1402
- agent: Optional[str] = None,
1403
- ) -> Dict[str, Any]:
1404
- """Get memory statistics."""
1405
- stats = {
1406
- "project_id": project_id,
1407
- "agent": agent,
1408
- "storage_type": "chroma",
1409
- "mode": self._mode,
1410
- }
1411
-
1412
- where_filter = self._build_where_filter(project_id=project_id, agent=agent)
1413
-
1414
- # Count items in each collection
1415
- for name, collection in [
1416
- ("heuristics", self._heuristics),
1417
- ("outcomes", self._outcomes),
1418
- ("domain_knowledge", self._domain_knowledge),
1419
- ("anti_patterns", self._anti_patterns),
1420
- ]:
1421
- try:
1422
- results = collection.get(where=where_filter)
1423
- stats[f"{name}_count"] = len(results.get("ids", []))
1424
- except Exception:
1425
- stats[f"{name}_count"] = 0
1426
-
1427
- # Preferences don't have project_id filter
1428
- try:
1429
- results = self._preferences.get()
1430
- stats["preferences_count"] = len(results.get("ids", []))
1431
- except Exception:
1432
- stats["preferences_count"] = 0
1433
-
1434
- stats["total_count"] = sum(
1435
- stats.get(k, 0) for k in stats if k.endswith("_count")
1436
- )
1437
-
1438
- return stats
1439
-
1440
- def close(self):
1441
- """Close the Chroma client (if applicable)."""
1442
- # ChromaDB handles cleanup automatically
1443
- logger.info("ChromaDB storage closed")
1
+ """
2
+ ALMA Chroma Storage Backend.
3
+
4
+ Vector database storage using ChromaDB for semantic search capabilities.
5
+ Supports both persistent local storage and client-server mode.
6
+
7
+ Recommended for:
8
+ - Semantic search-focused deployments
9
+ - Local development with vector search
10
+ - Small to medium scale applications
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ import os
16
+ from datetime import datetime, timezone
17
+ from typing import Any, Dict, List, Optional
18
+
19
+ from alma.storage.base import StorageBackend
20
+ from alma.types import (
21
+ AntiPattern,
22
+ DomainKnowledge,
23
+ Heuristic,
24
+ Outcome,
25
+ UserPreference,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Try to import chromadb
31
+ try:
32
+ import chromadb
33
+ from chromadb.config import Settings
34
+
35
+ CHROMADB_AVAILABLE = True
36
+ except ImportError:
37
+ CHROMADB_AVAILABLE = False
38
+ logger.warning(
39
+ "chromadb not installed. Install with: pip install 'alma-memory[chroma]'"
40
+ )
41
+
42
+
43
+ class ChromaStorage(StorageBackend):
44
+ """
45
+ ChromaDB storage backend with native vector search.
46
+
47
+ Uses ChromaDB collections for each memory type with built-in
48
+ embedding storage and similarity search.
49
+
50
+ Collections:
51
+ - alma_heuristics: Learned strategies
52
+ - alma_outcomes: Task execution records
53
+ - alma_preferences: User preferences
54
+ - alma_domain_knowledge: Domain facts
55
+ - alma_anti_patterns: Patterns to avoid
56
+
57
+ Modes:
58
+ - Persistent: Local storage with persist_directory
59
+ - Client-Server: Remote server with host/port
60
+ - Ephemeral: In-memory (for testing)
61
+ """
62
+
63
+ # Collection names
64
+ HEURISTICS_COLLECTION = "alma_heuristics"
65
+ OUTCOMES_COLLECTION = "alma_outcomes"
66
+ PREFERENCES_COLLECTION = "alma_preferences"
67
+ DOMAIN_KNOWLEDGE_COLLECTION = "alma_domain_knowledge"
68
+ ANTI_PATTERNS_COLLECTION = "alma_anti_patterns"
69
+
70
+ def __init__(
71
+ self,
72
+ persist_directory: Optional[str] = None,
73
+ host: Optional[str] = None,
74
+ port: Optional[int] = None,
75
+ embedding_dim: int = 384,
76
+ collection_metadata: Optional[Dict[str, Any]] = None,
77
+ ):
78
+ """
79
+ Initialize Chroma storage.
80
+
81
+ Args:
82
+ persist_directory: Path for persistent local storage (mutually exclusive with host/port)
83
+ host: Chroma server host (mutually exclusive with persist_directory)
84
+ port: Chroma server port (required if host is specified)
85
+ embedding_dim: Dimension of embedding vectors (for validation)
86
+ collection_metadata: Optional metadata for collections (e.g., distance function)
87
+ """
88
+ if not CHROMADB_AVAILABLE:
89
+ raise ImportError(
90
+ "chromadb not installed. Install with: pip install 'alma-memory[chroma]'"
91
+ )
92
+
93
+ self.embedding_dim = embedding_dim
94
+ self._collection_metadata = collection_metadata or {
95
+ "hnsw:space": "cosine" # Use cosine similarity
96
+ }
97
+
98
+ # Initialize client based on mode
99
+ if host and port:
100
+ # Client-server mode
101
+ self._client = chromadb.HttpClient(host=host, port=port)
102
+ self._mode = "client-server"
103
+ logger.info(f"ChromaDB client-server mode: {host}:{port}")
104
+ elif persist_directory:
105
+ # Persistent local mode
106
+ self._client = chromadb.PersistentClient(
107
+ path=persist_directory,
108
+ settings=Settings(anonymized_telemetry=False),
109
+ )
110
+ self._mode = "persistent"
111
+ logger.info(f"ChromaDB persistent mode: {persist_directory}")
112
+ else:
113
+ # Ephemeral mode (in-memory, for testing)
114
+ self._client = chromadb.Client(
115
+ settings=Settings(anonymized_telemetry=False),
116
+ )
117
+ self._mode = "ephemeral"
118
+ logger.info("ChromaDB ephemeral mode (in-memory)")
119
+
120
+ # Initialize collections
121
+ self._init_collections()
122
+
123
+ @classmethod
124
+ def from_config(cls, config: Dict[str, Any]) -> "ChromaStorage":
125
+ """Create instance from configuration."""
126
+ chroma_config = config.get("chroma", {})
127
+
128
+ # Support environment variable expansion
129
+ def get_value(key: str, default: Any = None) -> Any:
130
+ value = chroma_config.get(key, default)
131
+ if (
132
+ isinstance(value, str)
133
+ and value.startswith("${")
134
+ and value.endswith("}")
135
+ ):
136
+ env_var = value[2:-1]
137
+ return os.environ.get(env_var, default)
138
+ return value
139
+
140
+ persist_directory = get_value("persist_directory")
141
+ host = get_value("host")
142
+ port = get_value("port")
143
+
144
+ if port is not None:
145
+ port = int(port)
146
+
147
+ return cls(
148
+ persist_directory=persist_directory,
149
+ host=host,
150
+ port=port,
151
+ embedding_dim=int(config.get("embedding_dim", 384)),
152
+ collection_metadata=chroma_config.get("collection_metadata"),
153
+ )
154
+
155
+ def _init_collections(self):
156
+ """Initialize or get all collections."""
157
+ self._heuristics = self._client.get_or_create_collection(
158
+ name=self.HEURISTICS_COLLECTION,
159
+ metadata=self._collection_metadata,
160
+ )
161
+ self._outcomes = self._client.get_or_create_collection(
162
+ name=self.OUTCOMES_COLLECTION,
163
+ metadata=self._collection_metadata,
164
+ )
165
+ self._preferences = self._client.get_or_create_collection(
166
+ name=self.PREFERENCES_COLLECTION,
167
+ metadata=self._collection_metadata,
168
+ )
169
+ self._domain_knowledge = self._client.get_or_create_collection(
170
+ name=self.DOMAIN_KNOWLEDGE_COLLECTION,
171
+ metadata=self._collection_metadata,
172
+ )
173
+ self._anti_patterns = self._client.get_or_create_collection(
174
+ name=self.ANTI_PATTERNS_COLLECTION,
175
+ metadata=self._collection_metadata,
176
+ )
177
+
178
+ def _format_get_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
179
+ """Reformat get() results to match query() format."""
180
+ emb = results.get("embeddings")
181
+ has_embeddings = emb is not None and (
182
+ (hasattr(emb, "__len__") and len(emb) > 0)
183
+ or (hasattr(emb, "size") and emb.size > 0)
184
+ )
185
+ return {
186
+ "ids": [results.get("ids", [])],
187
+ "metadatas": [results.get("metadatas", [])],
188
+ "documents": [results.get("documents", [])],
189
+ "embeddings": [emb] if has_embeddings else None,
190
+ }
191
+
192
+ def _datetime_to_str(self, dt: Optional[datetime]) -> Optional[str]:
193
+ """Convert datetime to ISO string for storage."""
194
+ if dt is None:
195
+ return None
196
+ return dt.isoformat()
197
+
198
+ def _str_to_datetime(self, s: Optional[str]) -> Optional[datetime]:
199
+ """Convert ISO string to datetime."""
200
+ if s is None:
201
+ return None
202
+ try:
203
+ return datetime.fromisoformat(s.replace("Z", "+00:00"))
204
+ except (ValueError, AttributeError):
205
+ return datetime.now(timezone.utc)
206
+
207
+ # ==================== WRITE OPERATIONS ====================
208
+
209
+ def save_heuristic(self, heuristic: Heuristic) -> str:
210
+ """Save a heuristic."""
211
+ metadata = {
212
+ "agent": heuristic.agent,
213
+ "project_id": heuristic.project_id,
214
+ "condition": heuristic.condition,
215
+ "strategy": heuristic.strategy,
216
+ "confidence": heuristic.confidence,
217
+ "occurrence_count": heuristic.occurrence_count,
218
+ "success_count": heuristic.success_count,
219
+ "last_validated": self._datetime_to_str(heuristic.last_validated),
220
+ "created_at": self._datetime_to_str(heuristic.created_at),
221
+ "extra_metadata": json.dumps(heuristic.metadata)
222
+ if heuristic.metadata
223
+ else "{}",
224
+ }
225
+
226
+ # Chroma requires documents - use condition + strategy as document
227
+ document = f"{heuristic.condition}\n{heuristic.strategy}"
228
+
229
+ if heuristic.embedding:
230
+ self._heuristics.upsert(
231
+ ids=[heuristic.id],
232
+ embeddings=[heuristic.embedding],
233
+ metadatas=[metadata],
234
+ documents=[document],
235
+ )
236
+ else:
237
+ self._heuristics.upsert(
238
+ ids=[heuristic.id],
239
+ metadatas=[metadata],
240
+ documents=[document],
241
+ )
242
+
243
+ logger.debug(f"Saved heuristic: {heuristic.id}")
244
+ return heuristic.id
245
+
246
+ def save_outcome(self, outcome: Outcome) -> str:
247
+ """Save an outcome."""
248
+ metadata = {
249
+ "agent": outcome.agent,
250
+ "project_id": outcome.project_id,
251
+ "task_type": outcome.task_type or "general",
252
+ "success": outcome.success,
253
+ "strategy_used": outcome.strategy_used or "",
254
+ "duration_ms": outcome.duration_ms or 0,
255
+ "error_message": outcome.error_message or "",
256
+ "user_feedback": outcome.user_feedback or "",
257
+ "timestamp": self._datetime_to_str(outcome.timestamp),
258
+ "extra_metadata": json.dumps(outcome.metadata)
259
+ if outcome.metadata
260
+ else "{}",
261
+ }
262
+
263
+ document = outcome.task_description
264
+
265
+ if outcome.embedding:
266
+ self._outcomes.upsert(
267
+ ids=[outcome.id],
268
+ embeddings=[outcome.embedding],
269
+ metadatas=[metadata],
270
+ documents=[document],
271
+ )
272
+ else:
273
+ self._outcomes.upsert(
274
+ ids=[outcome.id],
275
+ metadatas=[metadata],
276
+ documents=[document],
277
+ )
278
+
279
+ logger.debug(f"Saved outcome: {outcome.id}")
280
+ return outcome.id
281
+
282
+ def save_user_preference(self, preference: UserPreference) -> str:
283
+ """Save a user preference."""
284
+ metadata = {
285
+ "user_id": preference.user_id,
286
+ "category": preference.category or "general",
287
+ "source": preference.source or "unknown",
288
+ "confidence": preference.confidence,
289
+ "timestamp": self._datetime_to_str(preference.timestamp),
290
+ "extra_metadata": json.dumps(preference.metadata)
291
+ if preference.metadata
292
+ else "{}",
293
+ }
294
+
295
+ document = preference.preference
296
+
297
+ self._preferences.upsert(
298
+ ids=[preference.id],
299
+ metadatas=[metadata],
300
+ documents=[document],
301
+ )
302
+
303
+ logger.debug(f"Saved preference: {preference.id}")
304
+ return preference.id
305
+
306
+ def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
307
+ """Save domain knowledge."""
308
+ metadata = {
309
+ "agent": knowledge.agent,
310
+ "project_id": knowledge.project_id,
311
+ "domain": knowledge.domain or "general",
312
+ "source": knowledge.source or "unknown",
313
+ "confidence": knowledge.confidence,
314
+ "last_verified": self._datetime_to_str(knowledge.last_verified),
315
+ "extra_metadata": json.dumps(knowledge.metadata)
316
+ if knowledge.metadata
317
+ else "{}",
318
+ }
319
+
320
+ document = knowledge.fact
321
+
322
+ if knowledge.embedding:
323
+ self._domain_knowledge.upsert(
324
+ ids=[knowledge.id],
325
+ embeddings=[knowledge.embedding],
326
+ metadatas=[metadata],
327
+ documents=[document],
328
+ )
329
+ else:
330
+ self._domain_knowledge.upsert(
331
+ ids=[knowledge.id],
332
+ metadatas=[metadata],
333
+ documents=[document],
334
+ )
335
+
336
+ logger.debug(f"Saved domain knowledge: {knowledge.id}")
337
+ return knowledge.id
338
+
339
+ def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
340
+ """Save an anti-pattern."""
341
+ metadata = {
342
+ "agent": anti_pattern.agent,
343
+ "project_id": anti_pattern.project_id,
344
+ "why_bad": anti_pattern.why_bad or "",
345
+ "better_alternative": anti_pattern.better_alternative or "",
346
+ "occurrence_count": anti_pattern.occurrence_count,
347
+ "last_seen": self._datetime_to_str(anti_pattern.last_seen),
348
+ "created_at": self._datetime_to_str(anti_pattern.created_at),
349
+ "extra_metadata": json.dumps(anti_pattern.metadata)
350
+ if anti_pattern.metadata
351
+ else "{}",
352
+ }
353
+
354
+ document = anti_pattern.pattern
355
+
356
+ if anti_pattern.embedding:
357
+ self._anti_patterns.upsert(
358
+ ids=[anti_pattern.id],
359
+ embeddings=[anti_pattern.embedding],
360
+ metadatas=[metadata],
361
+ documents=[document],
362
+ )
363
+ else:
364
+ self._anti_patterns.upsert(
365
+ ids=[anti_pattern.id],
366
+ metadatas=[metadata],
367
+ documents=[document],
368
+ )
369
+
370
+ logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
371
+ return anti_pattern.id
372
+
373
+ # ==================== BATCH WRITE OPERATIONS ====================
374
+
375
+ def save_heuristics(self, heuristics: List[Heuristic]) -> List[str]:
376
+ """Save multiple heuristics in a batch."""
377
+ if not heuristics:
378
+ return []
379
+
380
+ ids = []
381
+ embeddings = []
382
+ metadatas = []
383
+ documents = []
384
+ has_embeddings = False
385
+
386
+ for h in heuristics:
387
+ ids.append(h.id)
388
+ metadatas.append(
389
+ {
390
+ "agent": h.agent,
391
+ "project_id": h.project_id,
392
+ "condition": h.condition,
393
+ "strategy": h.strategy,
394
+ "confidence": h.confidence,
395
+ "occurrence_count": h.occurrence_count,
396
+ "success_count": h.success_count,
397
+ "last_validated": self._datetime_to_str(h.last_validated),
398
+ "created_at": self._datetime_to_str(h.created_at),
399
+ "extra_metadata": json.dumps(h.metadata) if h.metadata else "{}",
400
+ }
401
+ )
402
+ documents.append(f"{h.condition}\n{h.strategy}")
403
+ if h.embedding:
404
+ embeddings.append(h.embedding)
405
+ has_embeddings = True
406
+ else:
407
+ embeddings.append(None)
408
+
409
+ if has_embeddings and all(e is not None for e in embeddings):
410
+ self._heuristics.upsert(
411
+ ids=ids,
412
+ embeddings=embeddings,
413
+ metadatas=metadatas,
414
+ documents=documents,
415
+ )
416
+ else:
417
+ self._heuristics.upsert(
418
+ ids=ids,
419
+ metadatas=metadatas,
420
+ documents=documents,
421
+ )
422
+
423
+ logger.debug(f"Batch saved {len(heuristics)} heuristics")
424
+ return ids
425
+
426
+ def save_outcomes(self, outcomes: List[Outcome]) -> List[str]:
427
+ """Save multiple outcomes in a batch."""
428
+ if not outcomes:
429
+ return []
430
+
431
+ ids = []
432
+ embeddings = []
433
+ metadatas = []
434
+ documents = []
435
+ has_embeddings = False
436
+
437
+ for o in outcomes:
438
+ ids.append(o.id)
439
+ metadatas.append(
440
+ {
441
+ "agent": o.agent,
442
+ "project_id": o.project_id,
443
+ "task_type": o.task_type or "general",
444
+ "success": o.success,
445
+ "strategy_used": o.strategy_used or "",
446
+ "duration_ms": o.duration_ms or 0,
447
+ "error_message": o.error_message or "",
448
+ "user_feedback": o.user_feedback or "",
449
+ "timestamp": self._datetime_to_str(o.timestamp),
450
+ "extra_metadata": json.dumps(o.metadata) if o.metadata else "{}",
451
+ }
452
+ )
453
+ documents.append(o.task_description)
454
+ if o.embedding:
455
+ embeddings.append(o.embedding)
456
+ has_embeddings = True
457
+ else:
458
+ embeddings.append(None)
459
+
460
+ if has_embeddings and all(e is not None for e in embeddings):
461
+ self._outcomes.upsert(
462
+ ids=ids,
463
+ embeddings=embeddings,
464
+ metadatas=metadatas,
465
+ documents=documents,
466
+ )
467
+ else:
468
+ self._outcomes.upsert(
469
+ ids=ids,
470
+ metadatas=metadatas,
471
+ documents=documents,
472
+ )
473
+
474
+ logger.debug(f"Batch saved {len(outcomes)} outcomes")
475
+ return ids
476
+
477
+ def save_domain_knowledge_batch(
478
+ self, knowledge_items: List[DomainKnowledge]
479
+ ) -> List[str]:
480
+ """Save multiple domain knowledge items in a batch."""
481
+ if not knowledge_items:
482
+ return []
483
+
484
+ ids = []
485
+ embeddings = []
486
+ metadatas = []
487
+ documents = []
488
+ has_embeddings = False
489
+
490
+ for k in knowledge_items:
491
+ ids.append(k.id)
492
+ metadatas.append(
493
+ {
494
+ "agent": k.agent,
495
+ "project_id": k.project_id,
496
+ "domain": k.domain or "general",
497
+ "source": k.source or "unknown",
498
+ "confidence": k.confidence,
499
+ "last_verified": self._datetime_to_str(k.last_verified),
500
+ "extra_metadata": json.dumps(k.metadata) if k.metadata else "{}",
501
+ }
502
+ )
503
+ documents.append(k.fact)
504
+ if k.embedding:
505
+ embeddings.append(k.embedding)
506
+ has_embeddings = True
507
+ else:
508
+ embeddings.append(None)
509
+
510
+ if has_embeddings and all(e is not None for e in embeddings):
511
+ self._domain_knowledge.upsert(
512
+ ids=ids,
513
+ embeddings=embeddings,
514
+ metadatas=metadatas,
515
+ documents=documents,
516
+ )
517
+ else:
518
+ self._domain_knowledge.upsert(
519
+ ids=ids,
520
+ metadatas=metadatas,
521
+ documents=documents,
522
+ )
523
+
524
+ logger.debug(f"Batch saved {len(knowledge_items)} domain knowledge items")
525
+ return ids
526
+
527
+ # ==================== READ OPERATIONS ====================
528
+
529
+ def _build_where_filter(
530
+ self,
531
+ project_id: Optional[str] = None,
532
+ agent: Optional[str] = None,
533
+ user_id: Optional[str] = None,
534
+ domain: Optional[str] = None,
535
+ task_type: Optional[str] = None,
536
+ min_confidence: Optional[float] = None,
537
+ success_only: bool = False,
538
+ ) -> Optional[Dict[str, Any]]:
539
+ """Build Chroma where filter from parameters."""
540
+ conditions = []
541
+
542
+ if project_id:
543
+ conditions.append({"project_id": {"$eq": project_id}})
544
+ if agent:
545
+ conditions.append({"agent": {"$eq": agent}})
546
+ if user_id:
547
+ conditions.append({"user_id": {"$eq": user_id}})
548
+ if domain:
549
+ conditions.append({"domain": {"$eq": domain}})
550
+ if task_type:
551
+ conditions.append({"task_type": {"$eq": task_type}})
552
+ if min_confidence is not None and min_confidence > 0:
553
+ conditions.append({"confidence": {"$gte": min_confidence}})
554
+ if success_only:
555
+ conditions.append({"success": {"$eq": True}})
556
+
557
+ if not conditions:
558
+ return None
559
+ if len(conditions) == 1:
560
+ return conditions[0]
561
+ return {"$and": conditions}
562
+
563
+ def _has_embedding(self, emb: Any) -> bool:
564
+ """Safely check if embedding is not None/empty (handles numpy arrays)."""
565
+ if emb is None:
566
+ return False
567
+ if hasattr(emb, "__len__"):
568
+ try:
569
+ return len(emb) > 0
570
+ except (TypeError, ValueError):
571
+ pass
572
+ if hasattr(emb, "size"):
573
+ return emb.size > 0
574
+ return True
575
+
576
+ def _get_embedding_list(self, results: Dict[str, Any], num_ids: int) -> List[Any]:
577
+ """Safely extract embeddings list from results."""
578
+ emb_data = results.get("embeddings")
579
+ if emb_data is None:
580
+ return [None] * num_ids
581
+ # Handle both nested list format (query results) and flat format
582
+ if isinstance(emb_data, list) and len(emb_data) > 0:
583
+ first = emb_data[0]
584
+ # Check if it's a nested list (query format: [[emb1, emb2, ...]])
585
+ if isinstance(first, list) or (
586
+ hasattr(first, "__iter__") and not isinstance(first, (str, bytes))
587
+ ):
588
+ # Could be list of embeddings or numpy array
589
+ try:
590
+ if hasattr(first, "tolist"):
591
+ # numpy array
592
+ return list(emb_data[0])
593
+ return list(first) if isinstance(first, list) else [first]
594
+ except (TypeError, IndexError):
595
+ return [None] * num_ids
596
+ return list(emb_data)
597
+ return [None] * num_ids
598
+
599
+ def _results_to_heuristics(self, results: Dict[str, Any]) -> List[Heuristic]:
600
+ """Convert Chroma query results to Heuristic objects."""
601
+ heuristics = []
602
+ if not results or not results.get("ids") or not results["ids"][0]:
603
+ return heuristics
604
+
605
+ ids = results["ids"][0]
606
+ metadatas = results.get("metadatas", [[]])[0]
607
+ embeddings = self._get_embedding_list(results, len(ids))
608
+
609
+ for i, id_ in enumerate(ids):
610
+ meta = metadatas[i] if i < len(metadatas) else {}
611
+ emb = embeddings[i] if i < len(embeddings) else None
612
+
613
+ extra = json.loads(meta.get("extra_metadata", "{}"))
614
+
615
+ heuristics.append(
616
+ Heuristic(
617
+ id=id_,
618
+ agent=meta.get("agent", ""),
619
+ project_id=meta.get("project_id", ""),
620
+ condition=meta.get("condition", ""),
621
+ strategy=meta.get("strategy", ""),
622
+ confidence=meta.get("confidence", 0.0),
623
+ occurrence_count=meta.get("occurrence_count", 0),
624
+ success_count=meta.get("success_count", 0),
625
+ last_validated=self._str_to_datetime(meta.get("last_validated"))
626
+ or datetime.now(timezone.utc),
627
+ created_at=self._str_to_datetime(meta.get("created_at"))
628
+ or datetime.now(timezone.utc),
629
+ embedding=list(emb)
630
+ if emb is not None and hasattr(emb, "__iter__")
631
+ else emb,
632
+ metadata=extra,
633
+ )
634
+ )
635
+
636
+ return heuristics
637
+
638
+ def _results_to_outcomes(self, results: Dict[str, Any]) -> List[Outcome]:
639
+ """Convert Chroma query results to Outcome objects."""
640
+ outcomes = []
641
+ if not results or not results.get("ids") or not results["ids"][0]:
642
+ return outcomes
643
+
644
+ ids = results["ids"][0]
645
+ metadatas = results.get("metadatas", [[]])[0]
646
+ documents = results.get("documents", [[]])[0]
647
+ embeddings = self._get_embedding_list(results, len(ids))
648
+
649
+ for i, id_ in enumerate(ids):
650
+ meta = metadatas[i] if i < len(metadatas) else {}
651
+ doc = documents[i] if i < len(documents) else ""
652
+ emb = embeddings[i] if i < len(embeddings) else None
653
+
654
+ extra = json.loads(meta.get("extra_metadata", "{}"))
655
+
656
+ outcomes.append(
657
+ Outcome(
658
+ id=id_,
659
+ agent=meta.get("agent", ""),
660
+ project_id=meta.get("project_id", ""),
661
+ task_type=meta.get("task_type", "general"),
662
+ task_description=doc,
663
+ success=meta.get("success", False),
664
+ strategy_used=meta.get("strategy_used", ""),
665
+ duration_ms=meta.get("duration_ms"),
666
+ error_message=meta.get("error_message") or None,
667
+ user_feedback=meta.get("user_feedback") or None,
668
+ timestamp=self._str_to_datetime(meta.get("timestamp"))
669
+ or datetime.now(timezone.utc),
670
+ embedding=list(emb)
671
+ if emb is not None and hasattr(emb, "__iter__")
672
+ else emb,
673
+ metadata=extra,
674
+ )
675
+ )
676
+
677
+ return outcomes
678
+
679
+ def _results_to_preferences(self, results: Dict[str, Any]) -> List[UserPreference]:
680
+ """Convert Chroma query results to UserPreference objects."""
681
+ preferences = []
682
+ if not results or not results.get("ids") or not results["ids"][0]:
683
+ return preferences
684
+
685
+ ids = results["ids"][0]
686
+ metadatas = results.get("metadatas", [[]])[0]
687
+ documents = results.get("documents", [[]])[0]
688
+
689
+ for i, id_ in enumerate(ids):
690
+ meta = metadatas[i] if i < len(metadatas) else {}
691
+ doc = documents[i] if i < len(documents) else ""
692
+
693
+ extra = json.loads(meta.get("extra_metadata", "{}"))
694
+
695
+ preferences.append(
696
+ UserPreference(
697
+ id=id_,
698
+ user_id=meta.get("user_id", ""),
699
+ category=meta.get("category", "general"),
700
+ preference=doc,
701
+ source=meta.get("source", "unknown"),
702
+ confidence=meta.get("confidence", 1.0),
703
+ timestamp=self._str_to_datetime(meta.get("timestamp"))
704
+ or datetime.now(timezone.utc),
705
+ metadata=extra,
706
+ )
707
+ )
708
+
709
+ return preferences
710
+
711
+ def _results_to_domain_knowledge(
712
+ self, results: Dict[str, Any]
713
+ ) -> List[DomainKnowledge]:
714
+ """Convert Chroma query results to DomainKnowledge objects."""
715
+ knowledge = []
716
+ if not results or not results.get("ids") or not results["ids"][0]:
717
+ return knowledge
718
+
719
+ ids = results["ids"][0]
720
+ metadatas = results.get("metadatas", [[]])[0]
721
+ documents = results.get("documents", [[]])[0]
722
+ embeddings = self._get_embedding_list(results, len(ids))
723
+
724
+ for i, id_ in enumerate(ids):
725
+ meta = metadatas[i] if i < len(metadatas) else {}
726
+ doc = documents[i] if i < len(documents) else ""
727
+ emb = embeddings[i] if i < len(embeddings) else None
728
+
729
+ extra = json.loads(meta.get("extra_metadata", "{}"))
730
+
731
+ knowledge.append(
732
+ DomainKnowledge(
733
+ id=id_,
734
+ agent=meta.get("agent", ""),
735
+ project_id=meta.get("project_id", ""),
736
+ domain=meta.get("domain", "general"),
737
+ fact=doc,
738
+ source=meta.get("source", "unknown"),
739
+ confidence=meta.get("confidence", 1.0),
740
+ last_verified=self._str_to_datetime(meta.get("last_verified"))
741
+ or datetime.now(timezone.utc),
742
+ embedding=list(emb)
743
+ if emb is not None and hasattr(emb, "__iter__")
744
+ else emb,
745
+ metadata=extra,
746
+ )
747
+ )
748
+
749
+ return knowledge
750
+
751
+ def _results_to_anti_patterns(self, results: Dict[str, Any]) -> List[AntiPattern]:
752
+ """Convert Chroma query results to AntiPattern objects."""
753
+ patterns = []
754
+ if not results or not results.get("ids") or not results["ids"][0]:
755
+ return patterns
756
+
757
+ ids = results["ids"][0]
758
+ metadatas = results.get("metadatas", [[]])[0]
759
+ documents = results.get("documents", [[]])[0]
760
+ embeddings = self._get_embedding_list(results, len(ids))
761
+
762
+ for i, id_ in enumerate(ids):
763
+ meta = metadatas[i] if i < len(metadatas) else {}
764
+ doc = documents[i] if i < len(documents) else ""
765
+ emb = embeddings[i] if i < len(embeddings) else None
766
+
767
+ extra = json.loads(meta.get("extra_metadata", "{}"))
768
+
769
+ patterns.append(
770
+ AntiPattern(
771
+ id=id_,
772
+ agent=meta.get("agent", ""),
773
+ project_id=meta.get("project_id", ""),
774
+ pattern=doc,
775
+ why_bad=meta.get("why_bad", ""),
776
+ better_alternative=meta.get("better_alternative", ""),
777
+ occurrence_count=meta.get("occurrence_count", 1),
778
+ last_seen=self._str_to_datetime(meta.get("last_seen"))
779
+ or datetime.now(timezone.utc),
780
+ created_at=self._str_to_datetime(meta.get("created_at"))
781
+ or datetime.now(timezone.utc),
782
+ embedding=list(emb)
783
+ if emb is not None and hasattr(emb, "__iter__")
784
+ else emb,
785
+ metadata=extra,
786
+ )
787
+ )
788
+
789
+ return patterns
790
+
791
+ def get_heuristics(
792
+ self,
793
+ project_id: str,
794
+ agent: Optional[str] = None,
795
+ embedding: Optional[List[float]] = None,
796
+ top_k: int = 5,
797
+ min_confidence: float = 0.0,
798
+ ) -> List[Heuristic]:
799
+ """Get heuristics with optional vector search."""
800
+ where_filter = self._build_where_filter(
801
+ project_id=project_id,
802
+ agent=agent,
803
+ min_confidence=min_confidence,
804
+ )
805
+
806
+ if embedding:
807
+ results = self._heuristics.query(
808
+ query_embeddings=[embedding],
809
+ n_results=top_k,
810
+ where=where_filter,
811
+ include=["metadatas", "documents", "embeddings"],
812
+ )
813
+ else:
814
+ results = self._heuristics.get(
815
+ where=where_filter,
816
+ limit=top_k,
817
+ include=["metadatas", "documents", "embeddings"],
818
+ )
819
+ results = self._format_get_results(results)
820
+
821
+ return self._results_to_heuristics(results)
822
+
823
+ def get_outcomes(
824
+ self,
825
+ project_id: str,
826
+ agent: Optional[str] = None,
827
+ task_type: Optional[str] = None,
828
+ embedding: Optional[List[float]] = None,
829
+ top_k: int = 5,
830
+ success_only: bool = False,
831
+ ) -> List[Outcome]:
832
+ """Get outcomes with optional vector search."""
833
+ where_filter = self._build_where_filter(
834
+ project_id=project_id,
835
+ agent=agent,
836
+ task_type=task_type,
837
+ success_only=success_only,
838
+ )
839
+
840
+ if embedding:
841
+ results = self._outcomes.query(
842
+ query_embeddings=[embedding],
843
+ n_results=top_k,
844
+ where=where_filter,
845
+ include=["metadatas", "documents", "embeddings"],
846
+ )
847
+ else:
848
+ results = self._outcomes.get(
849
+ where=where_filter,
850
+ limit=top_k,
851
+ include=["metadatas", "documents", "embeddings"],
852
+ )
853
+ results = self._format_get_results(results)
854
+
855
+ return self._results_to_outcomes(results)
856
+
857
+ def get_user_preferences(
858
+ self,
859
+ user_id: str,
860
+ category: Optional[str] = None,
861
+ ) -> List[UserPreference]:
862
+ """Get user preferences."""
863
+ where_filter = self._build_where_filter(user_id=user_id)
864
+ if category:
865
+ if where_filter:
866
+ where_filter = {"$and": [where_filter, {"category": {"$eq": category}}]}
867
+ else:
868
+ where_filter = {"category": {"$eq": category}}
869
+
870
+ results = self._preferences.get(
871
+ where=where_filter,
872
+ include=["metadatas", "documents"],
873
+ )
874
+ results = {
875
+ "ids": [results.get("ids", [])],
876
+ "metadatas": [results.get("metadatas", [])],
877
+ "documents": [results.get("documents", [])],
878
+ }
879
+
880
+ return self._results_to_preferences(results)
881
+
882
+ def get_domain_knowledge(
883
+ self,
884
+ project_id: str,
885
+ agent: Optional[str] = None,
886
+ domain: Optional[str] = None,
887
+ embedding: Optional[List[float]] = None,
888
+ top_k: int = 5,
889
+ ) -> List[DomainKnowledge]:
890
+ """Get domain knowledge with optional vector search."""
891
+ where_filter = self._build_where_filter(
892
+ project_id=project_id,
893
+ agent=agent,
894
+ domain=domain,
895
+ )
896
+
897
+ if embedding:
898
+ results = self._domain_knowledge.query(
899
+ query_embeddings=[embedding],
900
+ n_results=top_k,
901
+ where=where_filter,
902
+ include=["metadatas", "documents", "embeddings"],
903
+ )
904
+ else:
905
+ results = self._domain_knowledge.get(
906
+ where=where_filter,
907
+ limit=top_k,
908
+ include=["metadatas", "documents", "embeddings"],
909
+ )
910
+ results = self._format_get_results(results)
911
+
912
+ return self._results_to_domain_knowledge(results)
913
+
914
+ def get_anti_patterns(
915
+ self,
916
+ project_id: str,
917
+ agent: Optional[str] = None,
918
+ embedding: Optional[List[float]] = None,
919
+ top_k: int = 5,
920
+ ) -> List[AntiPattern]:
921
+ """Get anti-patterns with optional vector search."""
922
+ where_filter = self._build_where_filter(
923
+ project_id=project_id,
924
+ agent=agent,
925
+ )
926
+
927
+ if embedding:
928
+ results = self._anti_patterns.query(
929
+ query_embeddings=[embedding],
930
+ n_results=top_k,
931
+ where=where_filter,
932
+ include=["metadatas", "documents", "embeddings"],
933
+ )
934
+ else:
935
+ results = self._anti_patterns.get(
936
+ where=where_filter,
937
+ limit=top_k,
938
+ include=["metadatas", "documents", "embeddings"],
939
+ )
940
+ results = self._format_get_results(results)
941
+
942
+ return self._results_to_anti_patterns(results)
943
+
944
+ # ==================== MULTI-AGENT MEMORY SHARING ====================
945
+
946
+ def _build_agents_filter(
947
+ self,
948
+ project_id: str,
949
+ agents: List[str],
950
+ **kwargs: Any,
951
+ ) -> Optional[Dict[str, Any]]:
952
+ """Build filter for multiple agents."""
953
+ if not agents:
954
+ return None
955
+
956
+ agent_conditions = [{"agent": {"$eq": a}} for a in agents]
957
+ agents_filter = (
958
+ {"$or": agent_conditions}
959
+ if len(agent_conditions) > 1
960
+ else agent_conditions[0]
961
+ )
962
+
963
+ base_filter = self._build_where_filter(project_id=project_id, **kwargs)
964
+
965
+ if base_filter:
966
+ return {"$and": [base_filter, agents_filter]}
967
+ return {"$and": [{"project_id": {"$eq": project_id}}, agents_filter]}
968
+
969
+ def get_heuristics_for_agents(
970
+ self,
971
+ project_id: str,
972
+ agents: List[str],
973
+ embedding: Optional[List[float]] = None,
974
+ top_k: int = 5,
975
+ min_confidence: float = 0.0,
976
+ ) -> List[Heuristic]:
977
+ """Get heuristics from multiple agents."""
978
+ if not agents:
979
+ return []
980
+
981
+ where_filter = self._build_agents_filter(
982
+ project_id=project_id,
983
+ agents=agents,
984
+ min_confidence=min_confidence,
985
+ )
986
+
987
+ if embedding:
988
+ results = self._heuristics.query(
989
+ query_embeddings=[embedding],
990
+ n_results=top_k * len(agents),
991
+ where=where_filter,
992
+ include=["metadatas", "documents", "embeddings"],
993
+ )
994
+ else:
995
+ results = self._heuristics.get(
996
+ where=where_filter,
997
+ limit=top_k * len(agents),
998
+ include=["metadatas", "documents", "embeddings"],
999
+ )
1000
+ results = self._format_get_results(results)
1001
+
1002
+ return self._results_to_heuristics(results)
1003
+
1004
+ def get_outcomes_for_agents(
1005
+ self,
1006
+ project_id: str,
1007
+ agents: List[str],
1008
+ task_type: Optional[str] = None,
1009
+ embedding: Optional[List[float]] = None,
1010
+ top_k: int = 5,
1011
+ success_only: bool = False,
1012
+ ) -> List[Outcome]:
1013
+ """Get outcomes from multiple agents."""
1014
+ if not agents:
1015
+ return []
1016
+
1017
+ where_filter = self._build_agents_filter(
1018
+ project_id=project_id,
1019
+ agents=agents,
1020
+ task_type=task_type,
1021
+ success_only=success_only,
1022
+ )
1023
+
1024
+ if embedding:
1025
+ results = self._outcomes.query(
1026
+ query_embeddings=[embedding],
1027
+ n_results=top_k * len(agents),
1028
+ where=where_filter,
1029
+ include=["metadatas", "documents", "embeddings"],
1030
+ )
1031
+ else:
1032
+ results = self._outcomes.get(
1033
+ where=where_filter,
1034
+ limit=top_k * len(agents),
1035
+ include=["metadatas", "documents", "embeddings"],
1036
+ )
1037
+ results = self._format_get_results(results)
1038
+
1039
+ return self._results_to_outcomes(results)
1040
+
1041
+ def get_domain_knowledge_for_agents(
1042
+ self,
1043
+ project_id: str,
1044
+ agents: List[str],
1045
+ domain: Optional[str] = None,
1046
+ embedding: Optional[List[float]] = None,
1047
+ top_k: int = 5,
1048
+ ) -> List[DomainKnowledge]:
1049
+ """Get domain knowledge from multiple agents."""
1050
+ if not agents:
1051
+ return []
1052
+
1053
+ where_filter = self._build_agents_filter(
1054
+ project_id=project_id,
1055
+ agents=agents,
1056
+ domain=domain,
1057
+ )
1058
+
1059
+ if embedding:
1060
+ results = self._domain_knowledge.query(
1061
+ query_embeddings=[embedding],
1062
+ n_results=top_k * len(agents),
1063
+ where=where_filter,
1064
+ include=["metadatas", "documents", "embeddings"],
1065
+ )
1066
+ else:
1067
+ results = self._domain_knowledge.get(
1068
+ where=where_filter,
1069
+ limit=top_k * len(agents),
1070
+ include=["metadatas", "documents", "embeddings"],
1071
+ )
1072
+ results = self._format_get_results(results)
1073
+
1074
+ return self._results_to_domain_knowledge(results)
1075
+
1076
+ def get_anti_patterns_for_agents(
1077
+ self,
1078
+ project_id: str,
1079
+ agents: List[str],
1080
+ embedding: Optional[List[float]] = None,
1081
+ top_k: int = 5,
1082
+ ) -> List[AntiPattern]:
1083
+ """Get anti-patterns from multiple agents."""
1084
+ if not agents:
1085
+ return []
1086
+
1087
+ where_filter = self._build_agents_filter(
1088
+ project_id=project_id,
1089
+ agents=agents,
1090
+ )
1091
+
1092
+ if embedding:
1093
+ results = self._anti_patterns.query(
1094
+ query_embeddings=[embedding],
1095
+ n_results=top_k * len(agents),
1096
+ where=where_filter,
1097
+ include=["metadatas", "documents", "embeddings"],
1098
+ )
1099
+ else:
1100
+ results = self._anti_patterns.get(
1101
+ where=where_filter,
1102
+ limit=top_k * len(agents),
1103
+ include=["metadatas", "documents", "embeddings"],
1104
+ )
1105
+ results = self._format_get_results(results)
1106
+
1107
+ return self._results_to_anti_patterns(results)
1108
+
1109
+ # ==================== UPDATE OPERATIONS ====================
1110
+
1111
+ def update_heuristic(
1112
+ self,
1113
+ heuristic_id: str,
1114
+ updates: Dict[str, Any],
1115
+ ) -> bool:
1116
+ """Update a heuristic's fields."""
1117
+ if not updates:
1118
+ return False
1119
+
1120
+ try:
1121
+ # Get existing heuristic
1122
+ existing = self._heuristics.get(
1123
+ ids=[heuristic_id], include=["metadatas", "documents", "embeddings"]
1124
+ )
1125
+ if not existing or not existing.get("ids"):
1126
+ return False
1127
+
1128
+ metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
1129
+ document = existing["documents"][0] if existing.get("documents") else ""
1130
+ emb_list = existing.get("embeddings")
1131
+ embedding = (
1132
+ emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
1133
+ )
1134
+
1135
+ # Apply updates
1136
+ for key, value in updates.items():
1137
+ if key == "condition":
1138
+ metadata["condition"] = value
1139
+ # Update document as well
1140
+ parts = document.split("\n", 1)
1141
+ document = f"{value}\n{parts[1] if len(parts) > 1 else ''}"
1142
+ elif key == "strategy":
1143
+ metadata["strategy"] = value
1144
+ parts = document.split("\n", 1)
1145
+ document = f"{parts[0] if parts else ''}\n{value}"
1146
+ elif key == "metadata":
1147
+ metadata["extra_metadata"] = json.dumps(value)
1148
+ elif key in ("last_validated", "created_at") and isinstance(
1149
+ value, datetime
1150
+ ):
1151
+ metadata[key] = value.isoformat()
1152
+ elif key in metadata:
1153
+ metadata[key] = value
1154
+
1155
+ # Upsert with updated values
1156
+ if self._has_embedding(embedding):
1157
+ self._heuristics.upsert(
1158
+ ids=[heuristic_id],
1159
+ embeddings=[
1160
+ list(embedding) if hasattr(embedding, "__iter__") else embedding
1161
+ ],
1162
+ metadatas=[metadata],
1163
+ documents=[document],
1164
+ )
1165
+ else:
1166
+ self._heuristics.upsert(
1167
+ ids=[heuristic_id],
1168
+ metadatas=[metadata],
1169
+ documents=[document],
1170
+ )
1171
+
1172
+ return True
1173
+ except Exception as e:
1174
+ logger.warning(f"Failed to update heuristic {heuristic_id}: {e}")
1175
+ return False
1176
+
1177
+ def increment_heuristic_occurrence(
1178
+ self,
1179
+ heuristic_id: str,
1180
+ success: bool,
1181
+ ) -> bool:
1182
+ """Increment heuristic occurrence count."""
1183
+ try:
1184
+ existing = self._heuristics.get(
1185
+ ids=[heuristic_id], include=["metadatas", "documents", "embeddings"]
1186
+ )
1187
+ if not existing or not existing.get("ids"):
1188
+ return False
1189
+
1190
+ metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
1191
+ document = existing["documents"][0] if existing.get("documents") else ""
1192
+ emb_list = existing.get("embeddings")
1193
+ embedding = (
1194
+ emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
1195
+ )
1196
+
1197
+ metadata["occurrence_count"] = metadata.get("occurrence_count", 0) + 1
1198
+ if success:
1199
+ metadata["success_count"] = metadata.get("success_count", 0) + 1
1200
+ metadata["last_validated"] = datetime.now(timezone.utc).isoformat()
1201
+
1202
+ if self._has_embedding(embedding):
1203
+ self._heuristics.upsert(
1204
+ ids=[heuristic_id],
1205
+ embeddings=[
1206
+ list(embedding) if hasattr(embedding, "__iter__") else embedding
1207
+ ],
1208
+ metadatas=[metadata],
1209
+ documents=[document],
1210
+ )
1211
+ else:
1212
+ self._heuristics.upsert(
1213
+ ids=[heuristic_id],
1214
+ metadatas=[metadata],
1215
+ documents=[document],
1216
+ )
1217
+
1218
+ return True
1219
+ except Exception as e:
1220
+ logger.warning(f"Failed to increment occurrence for {heuristic_id}: {e}")
1221
+ return False
1222
+
1223
+ def update_heuristic_confidence(
1224
+ self,
1225
+ heuristic_id: str,
1226
+ new_confidence: float,
1227
+ ) -> bool:
1228
+ """Update a heuristic's confidence value."""
1229
+ return self.update_heuristic(heuristic_id, {"confidence": new_confidence})
1230
+
1231
+ def update_knowledge_confidence(
1232
+ self,
1233
+ knowledge_id: str,
1234
+ new_confidence: float,
1235
+ ) -> bool:
1236
+ """Update domain knowledge confidence value."""
1237
+ try:
1238
+ existing = self._domain_knowledge.get(
1239
+ ids=[knowledge_id], include=["metadatas", "documents", "embeddings"]
1240
+ )
1241
+ if not existing or not existing.get("ids"):
1242
+ return False
1243
+
1244
+ metadata = existing["metadatas"][0] if existing.get("metadatas") else {}
1245
+ document = existing["documents"][0] if existing.get("documents") else ""
1246
+ emb_list = existing.get("embeddings")
1247
+ embedding = (
1248
+ emb_list[0] if emb_list is not None and len(emb_list) > 0 else None
1249
+ )
1250
+
1251
+ metadata["confidence"] = new_confidence
1252
+
1253
+ if self._has_embedding(embedding):
1254
+ self._domain_knowledge.upsert(
1255
+ ids=[knowledge_id],
1256
+ embeddings=[
1257
+ list(embedding) if hasattr(embedding, "__iter__") else embedding
1258
+ ],
1259
+ metadatas=[metadata],
1260
+ documents=[document],
1261
+ )
1262
+ else:
1263
+ self._domain_knowledge.upsert(
1264
+ ids=[knowledge_id],
1265
+ metadatas=[metadata],
1266
+ documents=[document],
1267
+ )
1268
+
1269
+ return True
1270
+ except Exception as e:
1271
+ logger.warning(f"Failed to update knowledge confidence {knowledge_id}: {e}")
1272
+ return False
1273
+
1274
+ # ==================== DELETE OPERATIONS ====================
1275
+
1276
+ def delete_heuristic(self, heuristic_id: str) -> bool:
1277
+ """Delete a heuristic by ID."""
1278
+ try:
1279
+ existing = self._heuristics.get(ids=[heuristic_id])
1280
+ if not existing or not existing.get("ids"):
1281
+ return False
1282
+ self._heuristics.delete(ids=[heuristic_id])
1283
+ logger.debug(f"Deleted heuristic: {heuristic_id}")
1284
+ return True
1285
+ except Exception as e:
1286
+ logger.warning(f"Failed to delete heuristic {heuristic_id}: {e}")
1287
+ return False
1288
+
1289
+ def delete_outcome(self, outcome_id: str) -> bool:
1290
+ """Delete an outcome by ID."""
1291
+ try:
1292
+ existing = self._outcomes.get(ids=[outcome_id])
1293
+ if not existing or not existing.get("ids"):
1294
+ return False
1295
+ self._outcomes.delete(ids=[outcome_id])
1296
+ logger.debug(f"Deleted outcome: {outcome_id}")
1297
+ return True
1298
+ except Exception as e:
1299
+ logger.warning(f"Failed to delete outcome {outcome_id}: {e}")
1300
+ return False
1301
+
1302
+ def delete_domain_knowledge(self, knowledge_id: str) -> bool:
1303
+ """Delete domain knowledge by ID."""
1304
+ try:
1305
+ existing = self._domain_knowledge.get(ids=[knowledge_id])
1306
+ if not existing or not existing.get("ids"):
1307
+ return False
1308
+ self._domain_knowledge.delete(ids=[knowledge_id])
1309
+ logger.debug(f"Deleted domain knowledge: {knowledge_id}")
1310
+ return True
1311
+ except Exception as e:
1312
+ logger.warning(f"Failed to delete domain knowledge {knowledge_id}: {e}")
1313
+ return False
1314
+
1315
+ def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
1316
+ """Delete an anti-pattern by ID."""
1317
+ try:
1318
+ existing = self._anti_patterns.get(ids=[anti_pattern_id])
1319
+ if not existing or not existing.get("ids"):
1320
+ return False
1321
+ self._anti_patterns.delete(ids=[anti_pattern_id])
1322
+ logger.debug(f"Deleted anti-pattern: {anti_pattern_id}")
1323
+ return True
1324
+ except Exception as e:
1325
+ logger.warning(f"Failed to delete anti-pattern {anti_pattern_id}: {e}")
1326
+ return False
1327
+
1328
+ def delete_outcomes_older_than(
1329
+ self,
1330
+ project_id: str,
1331
+ older_than: datetime,
1332
+ agent: Optional[str] = None,
1333
+ ) -> int:
1334
+ """Delete old outcomes."""
1335
+ where_filter = self._build_where_filter(project_id=project_id, agent=agent)
1336
+
1337
+ # Get all matching outcomes
1338
+ results = self._outcomes.get(
1339
+ where=where_filter,
1340
+ include=["metadatas"],
1341
+ )
1342
+
1343
+ if not results or not results.get("ids"):
1344
+ return 0
1345
+
1346
+ older_than_str = older_than.isoformat()
1347
+ ids_to_delete = []
1348
+
1349
+ for i, id_ in enumerate(results["ids"]):
1350
+ meta = (
1351
+ results["metadatas"][i] if i < len(results.get("metadatas", [])) else {}
1352
+ )
1353
+ timestamp_str = meta.get("timestamp", "")
1354
+ if timestamp_str and timestamp_str < older_than_str:
1355
+ ids_to_delete.append(id_)
1356
+
1357
+ if ids_to_delete:
1358
+ self._outcomes.delete(ids=ids_to_delete)
1359
+
1360
+ logger.info(f"Deleted {len(ids_to_delete)} old outcomes")
1361
+ return len(ids_to_delete)
1362
+
1363
+ def delete_low_confidence_heuristics(
1364
+ self,
1365
+ project_id: str,
1366
+ below_confidence: float,
1367
+ agent: Optional[str] = None,
1368
+ ) -> int:
1369
+ """Delete low-confidence heuristics."""
1370
+ where_filter = self._build_where_filter(project_id=project_id, agent=agent)
1371
+
1372
+ # Get all matching heuristics
1373
+ results = self._heuristics.get(
1374
+ where=where_filter,
1375
+ include=["metadatas"],
1376
+ )
1377
+
1378
+ if not results or not results.get("ids"):
1379
+ return 0
1380
+
1381
+ ids_to_delete = []
1382
+
1383
+ for i, id_ in enumerate(results["ids"]):
1384
+ meta = (
1385
+ results["metadatas"][i] if i < len(results.get("metadatas", [])) else {}
1386
+ )
1387
+ confidence = meta.get("confidence", 0.0)
1388
+ if confidence < below_confidence:
1389
+ ids_to_delete.append(id_)
1390
+
1391
+ if ids_to_delete:
1392
+ self._heuristics.delete(ids=ids_to_delete)
1393
+
1394
+ logger.info(f"Deleted {len(ids_to_delete)} low-confidence heuristics")
1395
+ return len(ids_to_delete)
1396
+
1397
+ # ==================== STATS ====================
1398
+
1399
+ def get_stats(
1400
+ self,
1401
+ project_id: str,
1402
+ agent: Optional[str] = None,
1403
+ ) -> Dict[str, Any]:
1404
+ """Get memory statistics."""
1405
+ stats = {
1406
+ "project_id": project_id,
1407
+ "agent": agent,
1408
+ "storage_type": "chroma",
1409
+ "mode": self._mode,
1410
+ }
1411
+
1412
+ where_filter = self._build_where_filter(project_id=project_id, agent=agent)
1413
+
1414
+ # Count items in each collection
1415
+ for name, collection in [
1416
+ ("heuristics", self._heuristics),
1417
+ ("outcomes", self._outcomes),
1418
+ ("domain_knowledge", self._domain_knowledge),
1419
+ ("anti_patterns", self._anti_patterns),
1420
+ ]:
1421
+ try:
1422
+ results = collection.get(where=where_filter)
1423
+ stats[f"{name}_count"] = len(results.get("ids", []))
1424
+ except Exception:
1425
+ stats[f"{name}_count"] = 0
1426
+
1427
+ # Preferences don't have project_id filter
1428
+ try:
1429
+ results = self._preferences.get()
1430
+ stats["preferences_count"] = len(results.get("ids", []))
1431
+ except Exception:
1432
+ stats["preferences_count"] = 0
1433
+
1434
+ stats["total_count"] = sum(
1435
+ stats.get(k, 0) for k in stats if k.endswith("_count")
1436
+ )
1437
+
1438
+ return stats
1439
+
1440
+ def close(self):
1441
+ """Close the Chroma client (if applicable)."""
1442
+ # ChromaDB handles cleanup automatically
1443
+ logger.info("ChromaDB storage closed")