alma-memory 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,972 @@
1
+ """
2
+ ALMA Azure Cosmos DB Storage Backend.
3
+
4
+ Production storage using Azure Cosmos DB with vector search capabilities.
5
+ Uses Azure Key Vault for secrets management.
6
+
7
+ Requirements:
8
+ pip install azure-cosmos azure-identity azure-keyvault-secrets
9
+
10
+ Configuration (config.yaml):
11
+ alma:
12
+ storage: azure
13
+ azure:
14
+ endpoint: ${AZURE_COSMOS_ENDPOINT}
15
+ key: ${KEYVAULT:cosmos-db-key}
16
+ database: alma-memory
17
+ embedding_dim: 384
18
+ """
19
+
20
+ import json
21
+ import logging
22
+ from datetime import datetime, timezone
23
+ from typing import Optional, List, Dict, Any, Tuple
24
+ from dataclasses import asdict
25
+
26
+ from alma.types import (
27
+ Heuristic,
28
+ Outcome,
29
+ UserPreference,
30
+ DomainKnowledge,
31
+ AntiPattern,
32
+ )
33
+ from alma.storage.base import StorageBackend
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ # Try to import Azure SDK
38
+ try:
39
+ from azure.cosmos import CosmosClient, PartitionKey, exceptions
40
+ from azure.cosmos.container import ContainerProxy
41
+ from azure.cosmos.database import DatabaseProxy
42
+ AZURE_COSMOS_AVAILABLE = True
43
+ except ImportError:
44
+ AZURE_COSMOS_AVAILABLE = False
45
+ logger.warning(
46
+ "azure-cosmos package not installed. "
47
+ "Install with: pip install azure-cosmos"
48
+ )
49
+
50
+
51
+ class AzureCosmosStorage(StorageBackend):
52
+ """
53
+ Azure Cosmos DB storage backend with vector search.
54
+
55
+ Uses:
56
+ - NoSQL API for document storage
57
+ - DiskANN vector indexing for similarity search
58
+ - Partition key: project_id for efficient queries
59
+
60
+ Container structure:
61
+ - alma-heuristics: Heuristics with vector embeddings
62
+ - alma-outcomes: Task outcomes with vector embeddings
63
+ - alma-preferences: User preferences (no vectors)
64
+ - alma-knowledge: Domain knowledge with vector embeddings
65
+ - alma-antipatterns: Anti-patterns with vector embeddings
66
+ """
67
+
68
+ CONTAINER_NAMES = {
69
+ "heuristics": "alma-heuristics",
70
+ "outcomes": "alma-outcomes",
71
+ "preferences": "alma-preferences",
72
+ "knowledge": "alma-knowledge",
73
+ "antipatterns": "alma-antipatterns",
74
+ }
75
+
76
+ def __init__(
77
+ self,
78
+ endpoint: str,
79
+ key: str,
80
+ database_name: str = "alma-memory",
81
+ embedding_dim: int = 384,
82
+ create_if_not_exists: bool = True,
83
+ ):
84
+ """
85
+ Initialize Azure Cosmos DB storage.
86
+
87
+ Args:
88
+ endpoint: Cosmos DB account endpoint
89
+ key: Cosmos DB account key
90
+ database_name: Name of the database
91
+ embedding_dim: Dimension of embedding vectors
92
+ create_if_not_exists: Create database/containers if missing
93
+ """
94
+ if not AZURE_COSMOS_AVAILABLE:
95
+ raise ImportError(
96
+ "azure-cosmos package required. Install with: pip install azure-cosmos"
97
+ )
98
+
99
+ self.endpoint = endpoint
100
+ self.database_name = database_name
101
+ self.embedding_dim = embedding_dim
102
+
103
+ # Initialize client
104
+ self.client = CosmosClient(endpoint, credential=key)
105
+
106
+ # Get or create database
107
+ if create_if_not_exists:
108
+ self.database = self.client.create_database_if_not_exists(
109
+ id=database_name
110
+ )
111
+ self._init_containers()
112
+ else:
113
+ self.database = self.client.get_database_client(database_name)
114
+
115
+ # Cache container clients
116
+ self._containers: Dict[str, ContainerProxy] = {}
117
+ for key_name, container_name in self.CONTAINER_NAMES.items():
118
+ self._containers[key_name] = self.database.get_container_client(
119
+ container_name
120
+ )
121
+
122
+ logger.info(f"Connected to Azure Cosmos DB: {database_name}")
123
+
124
+ @classmethod
125
+ def from_config(cls, config: Dict[str, Any]) -> "AzureCosmosStorage":
126
+ """Create instance from configuration."""
127
+ azure_config = config.get("azure", {})
128
+
129
+ endpoint = azure_config.get("endpoint")
130
+ key = azure_config.get("key")
131
+
132
+ if not endpoint or not key:
133
+ raise ValueError(
134
+ "Azure Cosmos DB requires 'azure.endpoint' and 'azure.key' in config"
135
+ )
136
+
137
+ return cls(
138
+ endpoint=endpoint,
139
+ key=key,
140
+ database_name=azure_config.get("database", "alma-memory"),
141
+ embedding_dim=azure_config.get("embedding_dim", 384),
142
+ create_if_not_exists=azure_config.get("create_if_not_exists", True),
143
+ )
144
+
145
+ def _init_containers(self):
146
+ """Initialize containers with vector search indexing."""
147
+ # Container configs with indexing policies
148
+ container_configs = {
149
+ "heuristics": {
150
+ "partition_key": "/project_id",
151
+ "vector_path": "/embedding",
152
+ "vector_indexes": True,
153
+ },
154
+ "outcomes": {
155
+ "partition_key": "/project_id",
156
+ "vector_path": "/embedding",
157
+ "vector_indexes": True,
158
+ },
159
+ "preferences": {
160
+ "partition_key": "/user_id",
161
+ "vector_path": None,
162
+ "vector_indexes": False,
163
+ },
164
+ "knowledge": {
165
+ "partition_key": "/project_id",
166
+ "vector_path": "/embedding",
167
+ "vector_indexes": True,
168
+ },
169
+ "antipatterns": {
170
+ "partition_key": "/project_id",
171
+ "vector_path": "/embedding",
172
+ "vector_indexes": True,
173
+ },
174
+ }
175
+
176
+ for key_name, cfg in container_configs.items():
177
+ container_name = self.CONTAINER_NAMES[key_name]
178
+
179
+ # Build indexing policy
180
+ indexing_policy = {
181
+ "indexingMode": "consistent",
182
+ "automatic": True,
183
+ "includedPaths": [{"path": "/*"}],
184
+ "excludedPaths": [{"path": '/"_etag"/?'}],
185
+ }
186
+
187
+ # Add vector embedding policy if needed
188
+ vector_embedding_policy = None
189
+ if cfg["vector_indexes"] and cfg["vector_path"]:
190
+ # Exclude vector path from regular indexing
191
+ indexing_policy["excludedPaths"].append(
192
+ {"path": f'{cfg["vector_path"]}/*'}
193
+ )
194
+
195
+ # Vector embedding policy for DiskANN
196
+ vector_embedding_policy = {
197
+ "vectorEmbeddings": [
198
+ {
199
+ "path": cfg["vector_path"],
200
+ "dataType": "float32",
201
+ "dimensions": self.embedding_dim,
202
+ "distanceFunction": "cosine",
203
+ }
204
+ ]
205
+ }
206
+
207
+ try:
208
+ container_properties = {
209
+ "id": container_name,
210
+ "partition_key": PartitionKey(path=cfg["partition_key"]),
211
+ "indexing_policy": indexing_policy,
212
+ }
213
+
214
+ if vector_embedding_policy:
215
+ container_properties["vector_embedding_policy"] = (
216
+ vector_embedding_policy
217
+ )
218
+
219
+ self.database.create_container_if_not_exists(**container_properties)
220
+ logger.debug(f"Container ready: {container_name}")
221
+
222
+ except exceptions.CosmosHttpResponseError as e:
223
+ if e.status_code == 409:
224
+ logger.debug(f"Container already exists: {container_name}")
225
+ else:
226
+ raise
227
+
228
+ def _get_container(self, container_key: str) -> ContainerProxy:
229
+ """Get container client by key."""
230
+ return self._containers[container_key]
231
+
232
+ # ==================== WRITE OPERATIONS ====================
233
+
234
+ def save_heuristic(self, heuristic: Heuristic) -> str:
235
+ """Save a heuristic."""
236
+ container = self._get_container("heuristics")
237
+
238
+ doc = {
239
+ "id": heuristic.id,
240
+ "agent": heuristic.agent,
241
+ "project_id": heuristic.project_id,
242
+ "condition": heuristic.condition,
243
+ "strategy": heuristic.strategy,
244
+ "confidence": heuristic.confidence,
245
+ "occurrence_count": heuristic.occurrence_count,
246
+ "success_count": heuristic.success_count,
247
+ "last_validated": heuristic.last_validated.isoformat()
248
+ if heuristic.last_validated
249
+ else None,
250
+ "created_at": heuristic.created_at.isoformat()
251
+ if heuristic.created_at
252
+ else None,
253
+ "metadata": heuristic.metadata or {},
254
+ "embedding": heuristic.embedding,
255
+ "type": "heuristic",
256
+ }
257
+
258
+ container.upsert_item(doc)
259
+ logger.debug(f"Saved heuristic: {heuristic.id}")
260
+ return heuristic.id
261
+
262
+ def save_outcome(self, outcome: Outcome) -> str:
263
+ """Save an outcome."""
264
+ container = self._get_container("outcomes")
265
+
266
+ doc = {
267
+ "id": outcome.id,
268
+ "agent": outcome.agent,
269
+ "project_id": outcome.project_id,
270
+ "task_type": outcome.task_type,
271
+ "task_description": outcome.task_description,
272
+ "success": outcome.success,
273
+ "strategy_used": outcome.strategy_used,
274
+ "duration_ms": outcome.duration_ms,
275
+ "error_message": outcome.error_message,
276
+ "user_feedback": outcome.user_feedback,
277
+ "timestamp": outcome.timestamp.isoformat() if outcome.timestamp else None,
278
+ "metadata": outcome.metadata or {},
279
+ "embedding": outcome.embedding,
280
+ "type": "outcome",
281
+ }
282
+
283
+ container.upsert_item(doc)
284
+ logger.debug(f"Saved outcome: {outcome.id}")
285
+ return outcome.id
286
+
287
+ def save_user_preference(self, preference: UserPreference) -> str:
288
+ """Save a user preference."""
289
+ container = self._get_container("preferences")
290
+
291
+ doc = {
292
+ "id": preference.id,
293
+ "user_id": preference.user_id,
294
+ "category": preference.category,
295
+ "preference": preference.preference,
296
+ "source": preference.source,
297
+ "confidence": preference.confidence,
298
+ "timestamp": preference.timestamp.isoformat()
299
+ if preference.timestamp
300
+ else None,
301
+ "metadata": preference.metadata or {},
302
+ "type": "preference",
303
+ }
304
+
305
+ container.upsert_item(doc)
306
+ logger.debug(f"Saved preference: {preference.id}")
307
+ return preference.id
308
+
309
+ def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
310
+ """Save domain knowledge."""
311
+ container = self._get_container("knowledge")
312
+
313
+ doc = {
314
+ "id": knowledge.id,
315
+ "agent": knowledge.agent,
316
+ "project_id": knowledge.project_id,
317
+ "domain": knowledge.domain,
318
+ "fact": knowledge.fact,
319
+ "source": knowledge.source,
320
+ "confidence": knowledge.confidence,
321
+ "last_verified": knowledge.last_verified.isoformat()
322
+ if knowledge.last_verified
323
+ else None,
324
+ "metadata": knowledge.metadata or {},
325
+ "embedding": knowledge.embedding,
326
+ "type": "domain_knowledge",
327
+ }
328
+
329
+ container.upsert_item(doc)
330
+ logger.debug(f"Saved domain knowledge: {knowledge.id}")
331
+ return knowledge.id
332
+
333
+ def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
334
+ """Save an anti-pattern."""
335
+ container = self._get_container("antipatterns")
336
+
337
+ doc = {
338
+ "id": anti_pattern.id,
339
+ "agent": anti_pattern.agent,
340
+ "project_id": anti_pattern.project_id,
341
+ "pattern": anti_pattern.pattern,
342
+ "why_bad": anti_pattern.why_bad,
343
+ "better_alternative": anti_pattern.better_alternative,
344
+ "occurrence_count": anti_pattern.occurrence_count,
345
+ "last_seen": anti_pattern.last_seen.isoformat()
346
+ if anti_pattern.last_seen
347
+ else None,
348
+ "created_at": anti_pattern.created_at.isoformat()
349
+ if anti_pattern.created_at
350
+ else None,
351
+ "metadata": anti_pattern.metadata or {},
352
+ "embedding": anti_pattern.embedding,
353
+ "type": "anti_pattern",
354
+ }
355
+
356
+ container.upsert_item(doc)
357
+ logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
358
+ return anti_pattern.id
359
+
360
+ # ==================== READ OPERATIONS ====================
361
+
362
+ def get_heuristics(
363
+ self,
364
+ project_id: str,
365
+ agent: Optional[str] = None,
366
+ embedding: Optional[List[float]] = None,
367
+ top_k: int = 5,
368
+ min_confidence: float = 0.0,
369
+ ) -> List[Heuristic]:
370
+ """Get heuristics with optional vector search."""
371
+ container = self._get_container("heuristics")
372
+
373
+ if embedding:
374
+ # Vector search query
375
+ query = """
376
+ SELECT TOP @top_k *
377
+ FROM c
378
+ WHERE c.project_id = @project_id
379
+ AND c.confidence >= @min_confidence
380
+ """
381
+ if agent:
382
+ query += " AND c.agent = @agent"
383
+ query += " ORDER BY VectorDistance(c.embedding, @embedding)"
384
+
385
+ parameters = [
386
+ {"name": "@top_k", "value": top_k},
387
+ {"name": "@project_id", "value": project_id},
388
+ {"name": "@min_confidence", "value": min_confidence},
389
+ {"name": "@embedding", "value": embedding},
390
+ ]
391
+ if agent:
392
+ parameters.append({"name": "@agent", "value": agent})
393
+
394
+ else:
395
+ # Regular query
396
+ query = """
397
+ SELECT TOP @top_k *
398
+ FROM c
399
+ WHERE c.project_id = @project_id
400
+ AND c.confidence >= @min_confidence
401
+ """
402
+ if agent:
403
+ query += " AND c.agent = @agent"
404
+ query += " ORDER BY c.confidence DESC"
405
+
406
+ parameters = [
407
+ {"name": "@top_k", "value": top_k},
408
+ {"name": "@project_id", "value": project_id},
409
+ {"name": "@min_confidence", "value": min_confidence},
410
+ ]
411
+ if agent:
412
+ parameters.append({"name": "@agent", "value": agent})
413
+
414
+ items = list(
415
+ container.query_items(
416
+ query=query,
417
+ parameters=parameters,
418
+ enable_cross_partition_query=False,
419
+ partition_key=project_id,
420
+ )
421
+ )
422
+
423
+ return [self._doc_to_heuristic(doc) for doc in items]
424
+
425
+ def get_outcomes(
426
+ self,
427
+ project_id: str,
428
+ agent: Optional[str] = None,
429
+ task_type: Optional[str] = None,
430
+ embedding: Optional[List[float]] = None,
431
+ top_k: int = 5,
432
+ success_only: bool = False,
433
+ ) -> List[Outcome]:
434
+ """Get outcomes with optional vector search."""
435
+ container = self._get_container("outcomes")
436
+
437
+ if embedding:
438
+ # Vector search query
439
+ query = """
440
+ SELECT TOP @top_k *
441
+ FROM c
442
+ WHERE c.project_id = @project_id
443
+ """
444
+ parameters = [
445
+ {"name": "@top_k", "value": top_k},
446
+ {"name": "@project_id", "value": project_id},
447
+ {"name": "@embedding", "value": embedding},
448
+ ]
449
+
450
+ if agent:
451
+ query += " AND c.agent = @agent"
452
+ parameters.append({"name": "@agent", "value": agent})
453
+ if task_type:
454
+ query += " AND c.task_type = @task_type"
455
+ parameters.append({"name": "@task_type", "value": task_type})
456
+ if success_only:
457
+ query += " AND c.success = true"
458
+
459
+ query += " ORDER BY VectorDistance(c.embedding, @embedding)"
460
+
461
+ else:
462
+ # Regular query
463
+ query = """
464
+ SELECT TOP @top_k *
465
+ FROM c
466
+ WHERE c.project_id = @project_id
467
+ """
468
+ parameters = [
469
+ {"name": "@top_k", "value": top_k},
470
+ {"name": "@project_id", "value": project_id},
471
+ ]
472
+
473
+ if agent:
474
+ query += " AND c.agent = @agent"
475
+ parameters.append({"name": "@agent", "value": agent})
476
+ if task_type:
477
+ query += " AND c.task_type = @task_type"
478
+ parameters.append({"name": "@task_type", "value": task_type})
479
+ if success_only:
480
+ query += " AND c.success = true"
481
+
482
+ query += " ORDER BY c.timestamp DESC"
483
+
484
+ items = list(
485
+ container.query_items(
486
+ query=query,
487
+ parameters=parameters,
488
+ enable_cross_partition_query=False,
489
+ partition_key=project_id,
490
+ )
491
+ )
492
+
493
+ return [self._doc_to_outcome(doc) for doc in items]
494
+
495
+ def get_user_preferences(
496
+ self,
497
+ user_id: str,
498
+ category: Optional[str] = None,
499
+ ) -> List[UserPreference]:
500
+ """Get user preferences."""
501
+ container = self._get_container("preferences")
502
+
503
+ query = "SELECT * FROM c WHERE c.user_id = @user_id"
504
+ parameters = [{"name": "@user_id", "value": user_id}]
505
+
506
+ if category:
507
+ query += " AND c.category = @category"
508
+ parameters.append({"name": "@category", "value": category})
509
+
510
+ items = list(
511
+ container.query_items(
512
+ query=query,
513
+ parameters=parameters,
514
+ enable_cross_partition_query=False,
515
+ partition_key=user_id,
516
+ )
517
+ )
518
+
519
+ return [self._doc_to_preference(doc) for doc in items]
520
+
521
+ def get_domain_knowledge(
522
+ self,
523
+ project_id: str,
524
+ agent: Optional[str] = None,
525
+ domain: Optional[str] = None,
526
+ embedding: Optional[List[float]] = None,
527
+ top_k: int = 5,
528
+ ) -> List[DomainKnowledge]:
529
+ """Get domain knowledge with optional vector search."""
530
+ container = self._get_container("knowledge")
531
+
532
+ if embedding:
533
+ query = """
534
+ SELECT TOP @top_k *
535
+ FROM c
536
+ WHERE c.project_id = @project_id
537
+ """
538
+ parameters = [
539
+ {"name": "@top_k", "value": top_k},
540
+ {"name": "@project_id", "value": project_id},
541
+ {"name": "@embedding", "value": embedding},
542
+ ]
543
+
544
+ if agent:
545
+ query += " AND c.agent = @agent"
546
+ parameters.append({"name": "@agent", "value": agent})
547
+ if domain:
548
+ query += " AND c.domain = @domain"
549
+ parameters.append({"name": "@domain", "value": domain})
550
+
551
+ query += " ORDER BY VectorDistance(c.embedding, @embedding)"
552
+
553
+ else:
554
+ query = """
555
+ SELECT TOP @top_k *
556
+ FROM c
557
+ WHERE c.project_id = @project_id
558
+ """
559
+ parameters = [
560
+ {"name": "@top_k", "value": top_k},
561
+ {"name": "@project_id", "value": project_id},
562
+ ]
563
+
564
+ if agent:
565
+ query += " AND c.agent = @agent"
566
+ parameters.append({"name": "@agent", "value": agent})
567
+ if domain:
568
+ query += " AND c.domain = @domain"
569
+ parameters.append({"name": "@domain", "value": domain})
570
+
571
+ query += " ORDER BY c.confidence DESC"
572
+
573
+ items = list(
574
+ container.query_items(
575
+ query=query,
576
+ parameters=parameters,
577
+ enable_cross_partition_query=False,
578
+ partition_key=project_id,
579
+ )
580
+ )
581
+
582
+ return [self._doc_to_domain_knowledge(doc) for doc in items]
583
+
584
+ def get_anti_patterns(
585
+ self,
586
+ project_id: str,
587
+ agent: Optional[str] = None,
588
+ embedding: Optional[List[float]] = None,
589
+ top_k: int = 5,
590
+ ) -> List[AntiPattern]:
591
+ """Get anti-patterns with optional vector search."""
592
+ container = self._get_container("antipatterns")
593
+
594
+ if embedding:
595
+ query = """
596
+ SELECT TOP @top_k *
597
+ FROM c
598
+ WHERE c.project_id = @project_id
599
+ """
600
+ parameters = [
601
+ {"name": "@top_k", "value": top_k},
602
+ {"name": "@project_id", "value": project_id},
603
+ {"name": "@embedding", "value": embedding},
604
+ ]
605
+
606
+ if agent:
607
+ query += " AND c.agent = @agent"
608
+ parameters.append({"name": "@agent", "value": agent})
609
+
610
+ query += " ORDER BY VectorDistance(c.embedding, @embedding)"
611
+
612
+ else:
613
+ query = """
614
+ SELECT TOP @top_k *
615
+ FROM c
616
+ WHERE c.project_id = @project_id
617
+ """
618
+ parameters = [
619
+ {"name": "@top_k", "value": top_k},
620
+ {"name": "@project_id", "value": project_id},
621
+ ]
622
+
623
+ if agent:
624
+ query += " AND c.agent = @agent"
625
+ parameters.append({"name": "@agent", "value": agent})
626
+
627
+ query += " ORDER BY c.occurrence_count DESC"
628
+
629
+ items = list(
630
+ container.query_items(
631
+ query=query,
632
+ parameters=parameters,
633
+ enable_cross_partition_query=False,
634
+ partition_key=project_id,
635
+ )
636
+ )
637
+
638
+ return [self._doc_to_anti_pattern(doc) for doc in items]
639
+
640
+ # ==================== UPDATE OPERATIONS ====================
641
+
642
+ def update_heuristic(
643
+ self,
644
+ heuristic_id: str,
645
+ updates: Dict[str, Any],
646
+ ) -> bool:
647
+ """Update a heuristic's fields."""
648
+ container = self._get_container("heuristics")
649
+
650
+ # We need project_id to read the item (partition key)
651
+ # First try to find the heuristic
652
+ query = "SELECT * FROM c WHERE c.id = @id"
653
+ items = list(
654
+ container.query_items(
655
+ query=query,
656
+ parameters=[{"name": "@id", "value": heuristic_id}],
657
+ enable_cross_partition_query=True,
658
+ )
659
+ )
660
+
661
+ if not items:
662
+ return False
663
+
664
+ doc = items[0]
665
+ project_id = doc["project_id"]
666
+
667
+ # Apply updates
668
+ for key, value in updates.items():
669
+ if isinstance(value, datetime):
670
+ doc[key] = value.isoformat()
671
+ else:
672
+ doc[key] = value
673
+
674
+ container.replace_item(item=heuristic_id, body=doc)
675
+ return True
676
+
677
+ def increment_heuristic_occurrence(
678
+ self,
679
+ heuristic_id: str,
680
+ success: bool,
681
+ ) -> bool:
682
+ """Increment heuristic occurrence count."""
683
+ container = self._get_container("heuristics")
684
+
685
+ # Find the heuristic
686
+ query = "SELECT * FROM c WHERE c.id = @id"
687
+ items = list(
688
+ container.query_items(
689
+ query=query,
690
+ parameters=[{"name": "@id", "value": heuristic_id}],
691
+ enable_cross_partition_query=True,
692
+ )
693
+ )
694
+
695
+ if not items:
696
+ return False
697
+
698
+ doc = items[0]
699
+ doc["occurrence_count"] = doc.get("occurrence_count", 0) + 1
700
+ if success:
701
+ doc["success_count"] = doc.get("success_count", 0) + 1
702
+ doc["last_validated"] = datetime.now(timezone.utc).isoformat()
703
+
704
+ container.replace_item(item=heuristic_id, body=doc)
705
+ return True
706
+
707
+ # ==================== DELETE OPERATIONS ====================
708
+
709
+ def delete_outcomes_older_than(
710
+ self,
711
+ project_id: str,
712
+ older_than: datetime,
713
+ agent: Optional[str] = None,
714
+ ) -> int:
715
+ """Delete old outcomes."""
716
+ container = self._get_container("outcomes")
717
+
718
+ query = """
719
+ SELECT c.id FROM c
720
+ WHERE c.project_id = @project_id
721
+ AND c.timestamp < @older_than
722
+ """
723
+ parameters = [
724
+ {"name": "@project_id", "value": project_id},
725
+ {"name": "@older_than", "value": older_than.isoformat()},
726
+ ]
727
+
728
+ if agent:
729
+ query += " AND c.agent = @agent"
730
+ parameters.append({"name": "@agent", "value": agent})
731
+
732
+ items = list(
733
+ container.query_items(
734
+ query=query,
735
+ parameters=parameters,
736
+ enable_cross_partition_query=False,
737
+ partition_key=project_id,
738
+ )
739
+ )
740
+
741
+ deleted = 0
742
+ for item in items:
743
+ try:
744
+ container.delete_item(item=item["id"], partition_key=project_id)
745
+ deleted += 1
746
+ except exceptions.CosmosResourceNotFoundError:
747
+ pass
748
+
749
+ logger.info(f"Deleted {deleted} old outcomes")
750
+ return deleted
751
+
752
+ def delete_low_confidence_heuristics(
753
+ self,
754
+ project_id: str,
755
+ below_confidence: float,
756
+ agent: Optional[str] = None,
757
+ ) -> int:
758
+ """Delete low-confidence heuristics."""
759
+ container = self._get_container("heuristics")
760
+
761
+ query = """
762
+ SELECT c.id FROM c
763
+ WHERE c.project_id = @project_id
764
+ AND c.confidence < @below_confidence
765
+ """
766
+ parameters = [
767
+ {"name": "@project_id", "value": project_id},
768
+ {"name": "@below_confidence", "value": below_confidence},
769
+ ]
770
+
771
+ if agent:
772
+ query += " AND c.agent = @agent"
773
+ parameters.append({"name": "@agent", "value": agent})
774
+
775
+ items = list(
776
+ container.query_items(
777
+ query=query,
778
+ parameters=parameters,
779
+ enable_cross_partition_query=False,
780
+ partition_key=project_id,
781
+ )
782
+ )
783
+
784
+ deleted = 0
785
+ for item in items:
786
+ try:
787
+ container.delete_item(item=item["id"], partition_key=project_id)
788
+ deleted += 1
789
+ except exceptions.CosmosResourceNotFoundError:
790
+ pass
791
+
792
+ logger.info(f"Deleted {deleted} low-confidence heuristics")
793
+ return deleted
794
+
795
+ def delete_heuristic(self, heuristic_id: str) -> bool:
796
+ """Delete a specific heuristic by ID."""
797
+ container = self._get_container("heuristics")
798
+
799
+ # Find the heuristic to get project_id
800
+ query = "SELECT c.project_id FROM c WHERE c.id = @id"
801
+ items = list(
802
+ container.query_items(
803
+ query=query,
804
+ parameters=[{"name": "@id", "value": heuristic_id}],
805
+ enable_cross_partition_query=True,
806
+ )
807
+ )
808
+
809
+ if not items:
810
+ return False
811
+
812
+ project_id = items[0]["project_id"]
813
+
814
+ try:
815
+ container.delete_item(item=heuristic_id, partition_key=project_id)
816
+ return True
817
+ except exceptions.CosmosResourceNotFoundError:
818
+ return False
819
+
820
+ # ==================== STATS ====================
821
+
822
+ def get_stats(
823
+ self,
824
+ project_id: str,
825
+ agent: Optional[str] = None,
826
+ ) -> Dict[str, Any]:
827
+ """Get memory statistics."""
828
+ stats = {
829
+ "project_id": project_id,
830
+ "agent": agent,
831
+ "storage_type": "azure_cosmos",
832
+ "database": self.database_name,
833
+ }
834
+
835
+ # Count items in each container
836
+ container_keys = ["heuristics", "outcomes", "knowledge", "antipatterns"]
837
+ for key in container_keys:
838
+ container = self._get_container(key)
839
+ query = "SELECT VALUE COUNT(1) FROM c WHERE c.project_id = @project_id"
840
+ parameters = [{"name": "@project_id", "value": project_id}]
841
+
842
+ if agent and key != "preferences":
843
+ query = """
844
+ SELECT VALUE COUNT(1) FROM c
845
+ WHERE c.project_id = @project_id AND c.agent = @agent
846
+ """
847
+ parameters.append({"name": "@agent", "value": agent})
848
+
849
+ result = list(
850
+ container.query_items(
851
+ query=query,
852
+ parameters=parameters,
853
+ enable_cross_partition_query=False,
854
+ partition_key=project_id,
855
+ )
856
+ )
857
+ stats[f"{key}_count"] = result[0] if result else 0
858
+
859
+ # Preferences count (no project_id filter)
860
+ container = self._get_container("preferences")
861
+ result = list(
862
+ container.query_items(
863
+ query="SELECT VALUE COUNT(1) FROM c",
864
+ enable_cross_partition_query=True,
865
+ )
866
+ )
867
+ stats["preferences_count"] = result[0] if result else 0
868
+
869
+ stats["total_count"] = sum(
870
+ stats.get(k, 0) for k in stats if k.endswith("_count")
871
+ )
872
+
873
+ return stats
874
+
875
+ # ==================== HELPERS ====================
876
+
877
+ def _parse_datetime(self, value: Any) -> Optional[datetime]:
878
+ """Parse datetime from string."""
879
+ if value is None:
880
+ return None
881
+ if isinstance(value, datetime):
882
+ return value
883
+ try:
884
+ return datetime.fromisoformat(value.replace("Z", "+00:00"))
885
+ except (ValueError, AttributeError):
886
+ return None
887
+
888
+ def _doc_to_heuristic(self, doc: Dict[str, Any]) -> Heuristic:
889
+ """Convert Cosmos DB document to Heuristic."""
890
+ return Heuristic(
891
+ id=doc["id"],
892
+ agent=doc["agent"],
893
+ project_id=doc["project_id"],
894
+ condition=doc["condition"],
895
+ strategy=doc["strategy"],
896
+ confidence=doc.get("confidence", 0.0),
897
+ occurrence_count=doc.get("occurrence_count", 0),
898
+ success_count=doc.get("success_count", 0),
899
+ last_validated=self._parse_datetime(doc.get("last_validated"))
900
+ or datetime.now(timezone.utc),
901
+ created_at=self._parse_datetime(doc.get("created_at"))
902
+ or datetime.now(timezone.utc),
903
+ embedding=doc.get("embedding"),
904
+ metadata=doc.get("metadata", {}),
905
+ )
906
+
907
+ def _doc_to_outcome(self, doc: Dict[str, Any]) -> Outcome:
908
+ """Convert Cosmos DB document to Outcome."""
909
+ return Outcome(
910
+ id=doc["id"],
911
+ agent=doc["agent"],
912
+ project_id=doc["project_id"],
913
+ task_type=doc.get("task_type", "general"),
914
+ task_description=doc["task_description"],
915
+ success=doc.get("success", False),
916
+ strategy_used=doc.get("strategy_used", ""),
917
+ duration_ms=doc.get("duration_ms"),
918
+ error_message=doc.get("error_message"),
919
+ user_feedback=doc.get("user_feedback"),
920
+ timestamp=self._parse_datetime(doc.get("timestamp"))
921
+ or datetime.now(timezone.utc),
922
+ embedding=doc.get("embedding"),
923
+ metadata=doc.get("metadata", {}),
924
+ )
925
+
926
+ def _doc_to_preference(self, doc: Dict[str, Any]) -> UserPreference:
927
+ """Convert Cosmos DB document to UserPreference."""
928
+ return UserPreference(
929
+ id=doc["id"],
930
+ user_id=doc["user_id"],
931
+ category=doc.get("category", "general"),
932
+ preference=doc["preference"],
933
+ source=doc.get("source", "unknown"),
934
+ confidence=doc.get("confidence", 1.0),
935
+ timestamp=self._parse_datetime(doc.get("timestamp"))
936
+ or datetime.now(timezone.utc),
937
+ metadata=doc.get("metadata", {}),
938
+ )
939
+
940
+ def _doc_to_domain_knowledge(self, doc: Dict[str, Any]) -> DomainKnowledge:
941
+ """Convert Cosmos DB document to DomainKnowledge."""
942
+ return DomainKnowledge(
943
+ id=doc["id"],
944
+ agent=doc["agent"],
945
+ project_id=doc["project_id"],
946
+ domain=doc.get("domain", "general"),
947
+ fact=doc["fact"],
948
+ source=doc.get("source", "unknown"),
949
+ confidence=doc.get("confidence", 1.0),
950
+ last_verified=self._parse_datetime(doc.get("last_verified"))
951
+ or datetime.now(timezone.utc),
952
+ embedding=doc.get("embedding"),
953
+ metadata=doc.get("metadata", {}),
954
+ )
955
+
956
+ def _doc_to_anti_pattern(self, doc: Dict[str, Any]) -> AntiPattern:
957
+ """Convert Cosmos DB document to AntiPattern."""
958
+ return AntiPattern(
959
+ id=doc["id"],
960
+ agent=doc["agent"],
961
+ project_id=doc["project_id"],
962
+ pattern=doc["pattern"],
963
+ why_bad=doc.get("why_bad", ""),
964
+ better_alternative=doc.get("better_alternative", ""),
965
+ occurrence_count=doc.get("occurrence_count", 1),
966
+ last_seen=self._parse_datetime(doc.get("last_seen"))
967
+ or datetime.now(timezone.utc),
968
+ created_at=self._parse_datetime(doc.get("created_at"))
969
+ or datetime.now(timezone.utc),
970
+ embedding=doc.get("embedding"),
971
+ metadata=doc.get("metadata", {}),
972
+ )