alma-memory 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +121 -45
- alma/confidence/__init__.py +1 -1
- alma/confidence/engine.py +92 -58
- alma/confidence/types.py +34 -14
- alma/config/loader.py +3 -2
- alma/consolidation/__init__.py +23 -0
- alma/consolidation/engine.py +678 -0
- alma/consolidation/prompts.py +84 -0
- alma/core.py +136 -28
- alma/domains/__init__.py +6 -6
- alma/domains/factory.py +12 -9
- alma/domains/schemas.py +17 -3
- alma/domains/types.py +8 -4
- alma/events/__init__.py +75 -0
- alma/events/emitter.py +284 -0
- alma/events/storage_mixin.py +246 -0
- alma/events/types.py +126 -0
- alma/events/webhook.py +425 -0
- alma/exceptions.py +49 -0
- alma/extraction/__init__.py +31 -0
- alma/extraction/auto_learner.py +265 -0
- alma/extraction/extractor.py +420 -0
- alma/graph/__init__.py +106 -0
- alma/graph/backends/__init__.py +32 -0
- alma/graph/backends/kuzu.py +624 -0
- alma/graph/backends/memgraph.py +432 -0
- alma/graph/backends/memory.py +236 -0
- alma/graph/backends/neo4j.py +417 -0
- alma/graph/base.py +159 -0
- alma/graph/extraction.py +198 -0
- alma/graph/store.py +860 -0
- alma/harness/__init__.py +4 -4
- alma/harness/base.py +18 -9
- alma/harness/domains.py +27 -11
- alma/initializer/__init__.py +1 -1
- alma/initializer/initializer.py +51 -43
- alma/initializer/types.py +25 -17
- alma/integration/__init__.py +9 -9
- alma/integration/claude_agents.py +32 -20
- alma/integration/helena.py +32 -22
- alma/integration/victor.py +57 -33
- alma/learning/__init__.py +27 -27
- alma/learning/forgetting.py +198 -148
- alma/learning/heuristic_extractor.py +40 -24
- alma/learning/protocols.py +65 -17
- alma/learning/validation.py +7 -2
- alma/mcp/__init__.py +4 -4
- alma/mcp/__main__.py +2 -1
- alma/mcp/resources.py +17 -16
- alma/mcp/server.py +102 -44
- alma/mcp/tools.py +180 -45
- alma/observability/__init__.py +84 -0
- alma/observability/config.py +302 -0
- alma/observability/logging.py +424 -0
- alma/observability/metrics.py +583 -0
- alma/observability/tracing.py +440 -0
- alma/progress/__init__.py +3 -3
- alma/progress/tracker.py +26 -20
- alma/progress/types.py +8 -12
- alma/py.typed +0 -0
- alma/retrieval/__init__.py +11 -11
- alma/retrieval/cache.py +20 -21
- alma/retrieval/embeddings.py +4 -4
- alma/retrieval/engine.py +179 -39
- alma/retrieval/scoring.py +73 -63
- alma/session/__init__.py +2 -2
- alma/session/manager.py +5 -5
- alma/session/types.py +5 -4
- alma/storage/__init__.py +70 -0
- alma/storage/azure_cosmos.py +414 -133
- alma/storage/base.py +215 -4
- alma/storage/chroma.py +1443 -0
- alma/storage/constants.py +103 -0
- alma/storage/file_based.py +59 -28
- alma/storage/migrations/__init__.py +21 -0
- alma/storage/migrations/base.py +321 -0
- alma/storage/migrations/runner.py +323 -0
- alma/storage/migrations/version_stores.py +337 -0
- alma/storage/migrations/versions/__init__.py +11 -0
- alma/storage/migrations/versions/v1_0_0.py +373 -0
- alma/storage/pinecone.py +1080 -0
- alma/storage/postgresql.py +1559 -0
- alma/storage/qdrant.py +1306 -0
- alma/storage/sqlite_local.py +504 -60
- alma/testing/__init__.py +46 -0
- alma/testing/factories.py +301 -0
- alma/testing/mocks.py +389 -0
- alma/types.py +62 -14
- alma_memory-0.5.1.dist-info/METADATA +939 -0
- alma_memory-0.5.1.dist-info/RECORD +93 -0
- {alma_memory-0.4.0.dist-info → alma_memory-0.5.1.dist-info}/WHEEL +1 -1
- alma_memory-0.4.0.dist-info/METADATA +0 -488
- alma_memory-0.4.0.dist-info/RECORD +0 -52
- {alma_memory-0.4.0.dist-info → alma_memory-0.5.1.dist-info}/top_level.txt +0 -0
alma/storage/azure_cosmos.py
CHANGED
|
@@ -17,20 +17,19 @@ Configuration (config.yaml):
|
|
|
17
17
|
embedding_dim: 384
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
|
-
import json
|
|
21
20
|
import logging
|
|
22
21
|
from datetime import datetime, timezone
|
|
23
|
-
from typing import
|
|
24
|
-
from dataclasses import asdict
|
|
22
|
+
from typing import Any, Dict, List, Optional
|
|
25
23
|
|
|
24
|
+
from alma.storage.base import StorageBackend
|
|
25
|
+
from alma.storage.constants import AZURE_COSMOS_CONTAINER_NAMES, MemoryType
|
|
26
26
|
from alma.types import (
|
|
27
|
+
AntiPattern,
|
|
28
|
+
DomainKnowledge,
|
|
27
29
|
Heuristic,
|
|
28
30
|
Outcome,
|
|
29
31
|
UserPreference,
|
|
30
|
-
DomainKnowledge,
|
|
31
|
-
AntiPattern,
|
|
32
32
|
)
|
|
33
|
-
from alma.storage.base import StorageBackend
|
|
34
33
|
|
|
35
34
|
logger = logging.getLogger(__name__)
|
|
36
35
|
|
|
@@ -39,6 +38,7 @@ try:
|
|
|
39
38
|
from azure.cosmos import CosmosClient, PartitionKey, exceptions
|
|
40
39
|
from azure.cosmos.container import ContainerProxy
|
|
41
40
|
from azure.cosmos.database import DatabaseProxy
|
|
41
|
+
|
|
42
42
|
AZURE_COSMOS_AVAILABLE = True
|
|
43
43
|
except ImportError:
|
|
44
44
|
AZURE_COSMOS_AVAILABLE = False
|
|
@@ -49,8 +49,7 @@ except ImportError:
|
|
|
49
49
|
ContainerProxy = Any # type: ignore
|
|
50
50
|
DatabaseProxy = Any # type: ignore
|
|
51
51
|
logger.warning(
|
|
52
|
-
"azure-cosmos package not installed. "
|
|
53
|
-
"Install with: pip install azure-cosmos"
|
|
52
|
+
"azure-cosmos package not installed. Install with: pip install azure-cosmos"
|
|
54
53
|
)
|
|
55
54
|
|
|
56
55
|
|
|
@@ -63,21 +62,19 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
63
62
|
- DiskANN vector indexing for similarity search
|
|
64
63
|
- Partition key: project_id for efficient queries
|
|
65
64
|
|
|
66
|
-
Container structure:
|
|
67
|
-
-
|
|
68
|
-
-
|
|
69
|
-
-
|
|
70
|
-
-
|
|
71
|
-
-
|
|
65
|
+
Container structure (uses canonical memory type names with alma_ prefix):
|
|
66
|
+
- alma_heuristics: Heuristics with vector embeddings
|
|
67
|
+
- alma_outcomes: Task outcomes with vector embeddings
|
|
68
|
+
- alma_preferences: User preferences (no vectors)
|
|
69
|
+
- alma_domain_knowledge: Domain knowledge with vector embeddings
|
|
70
|
+
- alma_anti_patterns: Anti-patterns with vector embeddings
|
|
71
|
+
|
|
72
|
+
Container names are derived from alma.storage.constants.AZURE_COSMOS_CONTAINER_NAMES
|
|
73
|
+
for consistency across all storage backends.
|
|
72
74
|
"""
|
|
73
75
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
"outcomes": "alma-outcomes",
|
|
77
|
-
"preferences": "alma-preferences",
|
|
78
|
-
"knowledge": "alma-knowledge",
|
|
79
|
-
"antipatterns": "alma-antipatterns",
|
|
80
|
-
}
|
|
76
|
+
# Use canonical container names from constants
|
|
77
|
+
CONTAINER_NAMES = AZURE_COSMOS_CONTAINER_NAMES
|
|
81
78
|
|
|
82
79
|
def __init__(
|
|
83
80
|
self,
|
|
@@ -111,9 +108,7 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
111
108
|
|
|
112
109
|
# Get or create database
|
|
113
110
|
if create_if_not_exists:
|
|
114
|
-
self.database = self.client.create_database_if_not_exists(
|
|
115
|
-
id=database_name
|
|
116
|
-
)
|
|
111
|
+
self.database = self.client.create_database_if_not_exists(id=database_name)
|
|
117
112
|
self._init_containers()
|
|
118
113
|
else:
|
|
119
114
|
self.database = self.client.get_database_client(database_name)
|
|
@@ -125,6 +120,14 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
125
120
|
container_name
|
|
126
121
|
)
|
|
127
122
|
|
|
123
|
+
# Cache for partition key mappings: {container_key: {doc_id: partition_key}}
|
|
124
|
+
# This reduces RU consumption by avoiding cross-partition queries
|
|
125
|
+
self._partition_key_cache: Dict[str, Dict[str, str]] = {
|
|
126
|
+
mt: {} for mt in MemoryType.ALL
|
|
127
|
+
}
|
|
128
|
+
# Maximum cache size per container to prevent memory issues
|
|
129
|
+
self._cache_max_size = 1000
|
|
130
|
+
|
|
128
131
|
logger.info(f"Connected to Azure Cosmos DB: {database_name}")
|
|
129
132
|
|
|
130
133
|
@classmethod
|
|
@@ -150,29 +153,29 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
150
153
|
|
|
151
154
|
def _init_containers(self):
|
|
152
155
|
"""Initialize containers with vector search indexing."""
|
|
153
|
-
# Container configs with indexing policies
|
|
156
|
+
# Container configs with indexing policies (using canonical memory types)
|
|
154
157
|
container_configs = {
|
|
155
|
-
|
|
158
|
+
MemoryType.HEURISTICS: {
|
|
156
159
|
"partition_key": "/project_id",
|
|
157
160
|
"vector_path": "/embedding",
|
|
158
161
|
"vector_indexes": True,
|
|
159
162
|
},
|
|
160
|
-
|
|
163
|
+
MemoryType.OUTCOMES: {
|
|
161
164
|
"partition_key": "/project_id",
|
|
162
165
|
"vector_path": "/embedding",
|
|
163
166
|
"vector_indexes": True,
|
|
164
167
|
},
|
|
165
|
-
|
|
168
|
+
MemoryType.PREFERENCES: {
|
|
166
169
|
"partition_key": "/user_id",
|
|
167
170
|
"vector_path": None,
|
|
168
171
|
"vector_indexes": False,
|
|
169
172
|
},
|
|
170
|
-
|
|
173
|
+
MemoryType.DOMAIN_KNOWLEDGE: {
|
|
171
174
|
"partition_key": "/project_id",
|
|
172
175
|
"vector_path": "/embedding",
|
|
173
176
|
"vector_indexes": True,
|
|
174
177
|
},
|
|
175
|
-
|
|
178
|
+
MemoryType.ANTI_PATTERNS: {
|
|
176
179
|
"partition_key": "/project_id",
|
|
177
180
|
"vector_path": "/embedding",
|
|
178
181
|
"vector_indexes": True,
|
|
@@ -195,7 +198,7 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
195
198
|
if cfg["vector_indexes"] and cfg["vector_path"]:
|
|
196
199
|
# Exclude vector path from regular indexing
|
|
197
200
|
indexing_policy["excludedPaths"].append(
|
|
198
|
-
{"path": f
|
|
201
|
+
{"path": f"{cfg['vector_path']}/*"}
|
|
199
202
|
)
|
|
200
203
|
|
|
201
204
|
# Vector embedding policy for DiskANN
|
|
@@ -235,11 +238,123 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
235
238
|
"""Get container client by key."""
|
|
236
239
|
return self._containers[container_key]
|
|
237
240
|
|
|
241
|
+
def _cache_partition_key(
|
|
242
|
+
self, container_key: str, doc_id: str, partition_key: str
|
|
243
|
+
) -> None:
|
|
244
|
+
"""
|
|
245
|
+
Cache the partition key mapping for a document.
|
|
246
|
+
|
|
247
|
+
This enables point reads for future operations, reducing RU consumption
|
|
248
|
+
by avoiding expensive cross-partition queries.
|
|
249
|
+
"""
|
|
250
|
+
cache = self._partition_key_cache[container_key]
|
|
251
|
+
|
|
252
|
+
# Evict oldest entries if cache is full (simple FIFO eviction)
|
|
253
|
+
if len(cache) >= self._cache_max_size:
|
|
254
|
+
# Remove first 10% of entries
|
|
255
|
+
keys_to_remove = list(cache.keys())[: self._cache_max_size // 10]
|
|
256
|
+
for key in keys_to_remove:
|
|
257
|
+
del cache[key]
|
|
258
|
+
|
|
259
|
+
cache[doc_id] = partition_key
|
|
260
|
+
|
|
261
|
+
def _get_cached_partition_key(
|
|
262
|
+
self, container_key: str, doc_id: str
|
|
263
|
+
) -> Optional[str]:
|
|
264
|
+
"""
|
|
265
|
+
Get cached partition key for a document if available.
|
|
266
|
+
|
|
267
|
+
Returns None if the partition key is not cached.
|
|
268
|
+
"""
|
|
269
|
+
return self._partition_key_cache.get(container_key, {}).get(doc_id)
|
|
270
|
+
|
|
271
|
+
def _invalidate_partition_key_cache(self, container_key: str, doc_id: str) -> None:
|
|
272
|
+
"""Remove a document from the partition key cache."""
|
|
273
|
+
cache = self._partition_key_cache.get(container_key, {})
|
|
274
|
+
cache.pop(doc_id, None)
|
|
275
|
+
|
|
276
|
+
def _point_read_document(
|
|
277
|
+
self,
|
|
278
|
+
container_key: str,
|
|
279
|
+
doc_id: str,
|
|
280
|
+
partition_key: Optional[str] = None,
|
|
281
|
+
) -> Optional[Dict[str, Any]]:
|
|
282
|
+
"""
|
|
283
|
+
Attempt to read a document using a point read (1 RU) instead of a query.
|
|
284
|
+
|
|
285
|
+
If partition_key is provided, performs a direct point read.
|
|
286
|
+
If partition_key is not provided but is cached, uses the cached value.
|
|
287
|
+
If neither is available, falls back to a cross-partition query.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
container_key: The container key (e.g., 'heuristics', 'knowledge')
|
|
291
|
+
doc_id: The document ID
|
|
292
|
+
partition_key: Optional partition key for direct point read
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
The document if found, None otherwise
|
|
296
|
+
"""
|
|
297
|
+
container = self._get_container(container_key)
|
|
298
|
+
|
|
299
|
+
# Try to get partition key from cache if not provided
|
|
300
|
+
if partition_key is None:
|
|
301
|
+
partition_key = self._get_cached_partition_key(container_key, doc_id)
|
|
302
|
+
|
|
303
|
+
# If we have a partition key, use point read (1 RU)
|
|
304
|
+
if partition_key is not None:
|
|
305
|
+
try:
|
|
306
|
+
doc = container.read_item(item=doc_id, partition_key=partition_key)
|
|
307
|
+
# Refresh cache on successful read
|
|
308
|
+
self._cache_partition_key(container_key, doc_id, partition_key)
|
|
309
|
+
return doc
|
|
310
|
+
except exceptions.CosmosResourceNotFoundError:
|
|
311
|
+
# Document not found or partition key was wrong
|
|
312
|
+
self._invalidate_partition_key_cache(container_key, doc_id)
|
|
313
|
+
# Fall through to cross-partition query
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.warning(f"Point read failed for {doc_id}: {e}")
|
|
316
|
+
# Fall through to cross-partition query
|
|
317
|
+
|
|
318
|
+
# Fallback: Cross-partition query (expensive but necessary without partition key)
|
|
319
|
+
logger.debug(
|
|
320
|
+
f"Using cross-partition query for {doc_id} in {container_key} "
|
|
321
|
+
"(consider providing project_id for better performance)"
|
|
322
|
+
)
|
|
323
|
+
query = "SELECT * FROM c WHERE c.id = @id"
|
|
324
|
+
items = list(
|
|
325
|
+
container.query_items(
|
|
326
|
+
query=query,
|
|
327
|
+
parameters=[{"name": "@id", "value": doc_id}],
|
|
328
|
+
enable_cross_partition_query=True,
|
|
329
|
+
)
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
if items:
|
|
333
|
+
doc = items[0]
|
|
334
|
+
# Cache the partition key for future operations
|
|
335
|
+
pk_field = self._get_partition_key_field(container_key)
|
|
336
|
+
if pk_field and pk_field in doc:
|
|
337
|
+
self._cache_partition_key(container_key, doc_id, doc[pk_field])
|
|
338
|
+
return doc
|
|
339
|
+
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
def _get_partition_key_field(self, container_key: str) -> Optional[str]:
|
|
343
|
+
"""Get the partition key field name for a container."""
|
|
344
|
+
partition_key_fields = {
|
|
345
|
+
MemoryType.HEURISTICS: "project_id",
|
|
346
|
+
MemoryType.OUTCOMES: "project_id",
|
|
347
|
+
MemoryType.PREFERENCES: "user_id",
|
|
348
|
+
MemoryType.DOMAIN_KNOWLEDGE: "project_id",
|
|
349
|
+
MemoryType.ANTI_PATTERNS: "project_id",
|
|
350
|
+
}
|
|
351
|
+
return partition_key_fields.get(container_key)
|
|
352
|
+
|
|
238
353
|
# ==================== WRITE OPERATIONS ====================
|
|
239
354
|
|
|
240
355
|
def save_heuristic(self, heuristic: Heuristic) -> str:
|
|
241
356
|
"""Save a heuristic."""
|
|
242
|
-
container = self._get_container(
|
|
357
|
+
container = self._get_container(MemoryType.HEURISTICS)
|
|
243
358
|
|
|
244
359
|
doc = {
|
|
245
360
|
"id": heuristic.id,
|
|
@@ -250,24 +365,30 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
250
365
|
"confidence": heuristic.confidence,
|
|
251
366
|
"occurrence_count": heuristic.occurrence_count,
|
|
252
367
|
"success_count": heuristic.success_count,
|
|
253
|
-
"last_validated":
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
368
|
+
"last_validated": (
|
|
369
|
+
heuristic.last_validated.isoformat()
|
|
370
|
+
if heuristic.last_validated
|
|
371
|
+
else None
|
|
372
|
+
),
|
|
373
|
+
"created_at": (
|
|
374
|
+
heuristic.created_at.isoformat() if heuristic.created_at else None
|
|
375
|
+
),
|
|
259
376
|
"metadata": heuristic.metadata or {},
|
|
260
377
|
"embedding": heuristic.embedding,
|
|
261
378
|
"type": "heuristic",
|
|
262
379
|
}
|
|
263
380
|
|
|
264
381
|
container.upsert_item(doc)
|
|
382
|
+
# Cache partition key for efficient future updates
|
|
383
|
+
self._cache_partition_key(
|
|
384
|
+
MemoryType.HEURISTICS, heuristic.id, heuristic.project_id
|
|
385
|
+
)
|
|
265
386
|
logger.debug(f"Saved heuristic: {heuristic.id}")
|
|
266
387
|
return heuristic.id
|
|
267
388
|
|
|
268
389
|
def save_outcome(self, outcome: Outcome) -> str:
|
|
269
390
|
"""Save an outcome."""
|
|
270
|
-
container = self._get_container(
|
|
391
|
+
container = self._get_container(MemoryType.OUTCOMES)
|
|
271
392
|
|
|
272
393
|
doc = {
|
|
273
394
|
"id": outcome.id,
|
|
@@ -287,12 +408,14 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
287
408
|
}
|
|
288
409
|
|
|
289
410
|
container.upsert_item(doc)
|
|
411
|
+
# Cache partition key for efficient future updates
|
|
412
|
+
self._cache_partition_key(MemoryType.OUTCOMES, outcome.id, outcome.project_id)
|
|
290
413
|
logger.debug(f"Saved outcome: {outcome.id}")
|
|
291
414
|
return outcome.id
|
|
292
415
|
|
|
293
416
|
def save_user_preference(self, preference: UserPreference) -> str:
|
|
294
417
|
"""Save a user preference."""
|
|
295
|
-
container = self._get_container(
|
|
418
|
+
container = self._get_container(MemoryType.PREFERENCES)
|
|
296
419
|
|
|
297
420
|
doc = {
|
|
298
421
|
"id": preference.id,
|
|
@@ -301,20 +424,24 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
301
424
|
"preference": preference.preference,
|
|
302
425
|
"source": preference.source,
|
|
303
426
|
"confidence": preference.confidence,
|
|
304
|
-
"timestamp":
|
|
305
|
-
|
|
306
|
-
|
|
427
|
+
"timestamp": (
|
|
428
|
+
preference.timestamp.isoformat() if preference.timestamp else None
|
|
429
|
+
),
|
|
307
430
|
"metadata": preference.metadata or {},
|
|
308
431
|
"type": "preference",
|
|
309
432
|
}
|
|
310
433
|
|
|
311
434
|
container.upsert_item(doc)
|
|
435
|
+
# Cache partition key for efficient future updates
|
|
436
|
+
self._cache_partition_key(
|
|
437
|
+
MemoryType.PREFERENCES, preference.id, preference.user_id
|
|
438
|
+
)
|
|
312
439
|
logger.debug(f"Saved preference: {preference.id}")
|
|
313
440
|
return preference.id
|
|
314
441
|
|
|
315
442
|
def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
|
|
316
443
|
"""Save domain knowledge."""
|
|
317
|
-
container = self._get_container(
|
|
444
|
+
container = self._get_container(MemoryType.DOMAIN_KNOWLEDGE)
|
|
318
445
|
|
|
319
446
|
doc = {
|
|
320
447
|
"id": knowledge.id,
|
|
@@ -324,21 +451,25 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
324
451
|
"fact": knowledge.fact,
|
|
325
452
|
"source": knowledge.source,
|
|
326
453
|
"confidence": knowledge.confidence,
|
|
327
|
-
"last_verified":
|
|
328
|
-
|
|
329
|
-
|
|
454
|
+
"last_verified": (
|
|
455
|
+
knowledge.last_verified.isoformat() if knowledge.last_verified else None
|
|
456
|
+
),
|
|
330
457
|
"metadata": knowledge.metadata or {},
|
|
331
458
|
"embedding": knowledge.embedding,
|
|
332
459
|
"type": "domain_knowledge",
|
|
333
460
|
}
|
|
334
461
|
|
|
335
462
|
container.upsert_item(doc)
|
|
463
|
+
# Cache partition key for efficient future updates
|
|
464
|
+
self._cache_partition_key(
|
|
465
|
+
MemoryType.DOMAIN_KNOWLEDGE, knowledge.id, knowledge.project_id
|
|
466
|
+
)
|
|
336
467
|
logger.debug(f"Saved domain knowledge: {knowledge.id}")
|
|
337
468
|
return knowledge.id
|
|
338
469
|
|
|
339
470
|
def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
|
|
340
471
|
"""Save an anti-pattern."""
|
|
341
|
-
container = self._get_container(
|
|
472
|
+
container = self._get_container(MemoryType.ANTI_PATTERNS)
|
|
342
473
|
|
|
343
474
|
doc = {
|
|
344
475
|
"id": anti_pattern.id,
|
|
@@ -348,18 +479,22 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
348
479
|
"why_bad": anti_pattern.why_bad,
|
|
349
480
|
"better_alternative": anti_pattern.better_alternative,
|
|
350
481
|
"occurrence_count": anti_pattern.occurrence_count,
|
|
351
|
-
"last_seen":
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
"created_at":
|
|
355
|
-
|
|
356
|
-
|
|
482
|
+
"last_seen": (
|
|
483
|
+
anti_pattern.last_seen.isoformat() if anti_pattern.last_seen else None
|
|
484
|
+
),
|
|
485
|
+
"created_at": (
|
|
486
|
+
anti_pattern.created_at.isoformat() if anti_pattern.created_at else None
|
|
487
|
+
),
|
|
357
488
|
"metadata": anti_pattern.metadata or {},
|
|
358
489
|
"embedding": anti_pattern.embedding,
|
|
359
490
|
"type": "anti_pattern",
|
|
360
491
|
}
|
|
361
492
|
|
|
362
493
|
container.upsert_item(doc)
|
|
494
|
+
# Cache partition key for efficient future updates
|
|
495
|
+
self._cache_partition_key(
|
|
496
|
+
MemoryType.ANTI_PATTERNS, anti_pattern.id, anti_pattern.project_id
|
|
497
|
+
)
|
|
363
498
|
logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
|
|
364
499
|
return anti_pattern.id
|
|
365
500
|
|
|
@@ -374,7 +509,7 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
374
509
|
min_confidence: float = 0.0,
|
|
375
510
|
) -> List[Heuristic]:
|
|
376
511
|
"""Get heuristics with optional vector search."""
|
|
377
|
-
container = self._get_container(
|
|
512
|
+
container = self._get_container(MemoryType.HEURISTICS)
|
|
378
513
|
|
|
379
514
|
if embedding:
|
|
380
515
|
# Vector search query
|
|
@@ -426,6 +561,12 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
426
561
|
)
|
|
427
562
|
)
|
|
428
563
|
|
|
564
|
+
# Cache partition keys for efficient future updates
|
|
565
|
+
for doc in items:
|
|
566
|
+
self._cache_partition_key(
|
|
567
|
+
MemoryType.HEURISTICS, doc["id"], doc["project_id"]
|
|
568
|
+
)
|
|
569
|
+
|
|
429
570
|
return [self._doc_to_heuristic(doc) for doc in items]
|
|
430
571
|
|
|
431
572
|
def get_outcomes(
|
|
@@ -438,7 +579,7 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
438
579
|
success_only: bool = False,
|
|
439
580
|
) -> List[Outcome]:
|
|
440
581
|
"""Get outcomes with optional vector search."""
|
|
441
|
-
container = self._get_container(
|
|
582
|
+
container = self._get_container(MemoryType.OUTCOMES)
|
|
442
583
|
|
|
443
584
|
if embedding:
|
|
444
585
|
# Vector search query
|
|
@@ -496,6 +637,10 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
496
637
|
)
|
|
497
638
|
)
|
|
498
639
|
|
|
640
|
+
# Cache partition keys for efficient future updates
|
|
641
|
+
for doc in items:
|
|
642
|
+
self._cache_partition_key(MemoryType.OUTCOMES, doc["id"], doc["project_id"])
|
|
643
|
+
|
|
499
644
|
return [self._doc_to_outcome(doc) for doc in items]
|
|
500
645
|
|
|
501
646
|
def get_user_preferences(
|
|
@@ -504,7 +649,7 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
504
649
|
category: Optional[str] = None,
|
|
505
650
|
) -> List[UserPreference]:
|
|
506
651
|
"""Get user preferences."""
|
|
507
|
-
container = self._get_container(
|
|
652
|
+
container = self._get_container(MemoryType.PREFERENCES)
|
|
508
653
|
|
|
509
654
|
query = "SELECT * FROM c WHERE c.user_id = @user_id"
|
|
510
655
|
parameters = [{"name": "@user_id", "value": user_id}]
|
|
@@ -522,6 +667,10 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
522
667
|
)
|
|
523
668
|
)
|
|
524
669
|
|
|
670
|
+
# Cache partition keys for efficient future updates
|
|
671
|
+
for doc in items:
|
|
672
|
+
self._cache_partition_key(MemoryType.PREFERENCES, doc["id"], doc["user_id"])
|
|
673
|
+
|
|
525
674
|
return [self._doc_to_preference(doc) for doc in items]
|
|
526
675
|
|
|
527
676
|
def get_domain_knowledge(
|
|
@@ -533,7 +682,7 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
533
682
|
top_k: int = 5,
|
|
534
683
|
) -> List[DomainKnowledge]:
|
|
535
684
|
"""Get domain knowledge with optional vector search."""
|
|
536
|
-
container = self._get_container(
|
|
685
|
+
container = self._get_container(MemoryType.DOMAIN_KNOWLEDGE)
|
|
537
686
|
|
|
538
687
|
if embedding:
|
|
539
688
|
query = """
|
|
@@ -585,6 +734,12 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
585
734
|
)
|
|
586
735
|
)
|
|
587
736
|
|
|
737
|
+
# Cache partition keys for efficient future updates
|
|
738
|
+
for doc in items:
|
|
739
|
+
self._cache_partition_key(
|
|
740
|
+
MemoryType.DOMAIN_KNOWLEDGE, doc["id"], doc["project_id"]
|
|
741
|
+
)
|
|
742
|
+
|
|
588
743
|
return [self._doc_to_domain_knowledge(doc) for doc in items]
|
|
589
744
|
|
|
590
745
|
def get_anti_patterns(
|
|
@@ -595,7 +750,7 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
595
750
|
top_k: int = 5,
|
|
596
751
|
) -> List[AntiPattern]:
|
|
597
752
|
"""Get anti-patterns with optional vector search."""
|
|
598
|
-
container = self._get_container(
|
|
753
|
+
container = self._get_container(MemoryType.ANTI_PATTERNS)
|
|
599
754
|
|
|
600
755
|
if embedding:
|
|
601
756
|
query = """
|
|
@@ -641,6 +796,12 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
641
796
|
)
|
|
642
797
|
)
|
|
643
798
|
|
|
799
|
+
# Cache partition keys for efficient future updates
|
|
800
|
+
for doc in items:
|
|
801
|
+
self._cache_partition_key(
|
|
802
|
+
MemoryType.ANTI_PATTERNS, doc["id"], doc["project_id"]
|
|
803
|
+
)
|
|
804
|
+
|
|
644
805
|
return [self._doc_to_anti_pattern(doc) for doc in items]
|
|
645
806
|
|
|
646
807
|
# ==================== UPDATE OPERATIONS ====================
|
|
@@ -649,26 +810,28 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
649
810
|
self,
|
|
650
811
|
heuristic_id: str,
|
|
651
812
|
updates: Dict[str, Any],
|
|
813
|
+
project_id: Optional[str] = None,
|
|
652
814
|
) -> bool:
|
|
653
|
-
"""
|
|
654
|
-
|
|
815
|
+
"""
|
|
816
|
+
Update a heuristic's fields.
|
|
655
817
|
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
818
|
+
Args:
|
|
819
|
+
heuristic_id: The heuristic document ID
|
|
820
|
+
updates: Dictionary of fields to update
|
|
821
|
+
project_id: Optional partition key for efficient point read (1 RU).
|
|
822
|
+
If not provided, will attempt cache lookup, then
|
|
823
|
+
fall back to cross-partition query (more expensive).
|
|
824
|
+
|
|
825
|
+
Returns:
|
|
826
|
+
True if update succeeded, False if document not found
|
|
827
|
+
"""
|
|
828
|
+
container = self._get_container(MemoryType.HEURISTICS)
|
|
666
829
|
|
|
667
|
-
|
|
668
|
-
|
|
830
|
+
# Use optimized point read with cache fallback
|
|
831
|
+
doc = self._point_read_document(MemoryType.HEURISTICS, heuristic_id, project_id)
|
|
669
832
|
|
|
670
|
-
|
|
671
|
-
|
|
833
|
+
if not doc:
|
|
834
|
+
return False
|
|
672
835
|
|
|
673
836
|
# Apply updates
|
|
674
837
|
for key, value in updates.items():
|
|
@@ -684,24 +847,29 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
684
847
|
self,
|
|
685
848
|
heuristic_id: str,
|
|
686
849
|
success: bool,
|
|
850
|
+
project_id: Optional[str] = None,
|
|
687
851
|
) -> bool:
|
|
688
|
-
"""
|
|
689
|
-
|
|
852
|
+
"""
|
|
853
|
+
Increment heuristic occurrence count.
|
|
690
854
|
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
855
|
+
Args:
|
|
856
|
+
heuristic_id: The heuristic document ID
|
|
857
|
+
success: Whether this occurrence was successful
|
|
858
|
+
project_id: Optional partition key for efficient point read (1 RU).
|
|
859
|
+
If not provided, will attempt cache lookup, then
|
|
860
|
+
fall back to cross-partition query (more expensive).
|
|
861
|
+
|
|
862
|
+
Returns:
|
|
863
|
+
True if update succeeded, False if document not found
|
|
864
|
+
"""
|
|
865
|
+
container = self._get_container(MemoryType.HEURISTICS)
|
|
700
866
|
|
|
701
|
-
|
|
867
|
+
# Use optimized point read with cache fallback
|
|
868
|
+
doc = self._point_read_document(MemoryType.HEURISTICS, heuristic_id, project_id)
|
|
869
|
+
|
|
870
|
+
if not doc:
|
|
702
871
|
return False
|
|
703
872
|
|
|
704
|
-
doc = items[0]
|
|
705
873
|
doc["occurrence_count"] = doc.get("occurrence_count", 0) + 1
|
|
706
874
|
if success:
|
|
707
875
|
doc["success_count"] = doc.get("success_count", 0) + 1
|
|
@@ -710,6 +878,88 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
710
878
|
container.replace_item(item=heuristic_id, body=doc)
|
|
711
879
|
return True
|
|
712
880
|
|
|
881
|
+
def update_heuristic_confidence(
|
|
882
|
+
self,
|
|
883
|
+
heuristic_id: str,
|
|
884
|
+
new_confidence: float,
|
|
885
|
+
project_id: Optional[str] = None,
|
|
886
|
+
) -> bool:
|
|
887
|
+
"""
|
|
888
|
+
Update confidence score for a heuristic.
|
|
889
|
+
|
|
890
|
+
Args:
|
|
891
|
+
heuristic_id: The heuristic document ID
|
|
892
|
+
new_confidence: The new confidence value
|
|
893
|
+
project_id: Optional partition key for efficient point read (1 RU).
|
|
894
|
+
If not provided, will attempt cache lookup, then
|
|
895
|
+
fall back to cross-partition query (more expensive).
|
|
896
|
+
|
|
897
|
+
Returns:
|
|
898
|
+
True if update succeeded, False if document not found
|
|
899
|
+
|
|
900
|
+
Performance Note:
|
|
901
|
+
- With project_id: 1 RU for point read + write cost
|
|
902
|
+
- With cached partition key: 1 RU for point read + write cost
|
|
903
|
+
- Without either: Cross-partition query (variable, higher RUs)
|
|
904
|
+
"""
|
|
905
|
+
container = self._get_container(MemoryType.HEURISTICS)
|
|
906
|
+
|
|
907
|
+
# Use optimized point read with cache fallback
|
|
908
|
+
doc = self._point_read_document(MemoryType.HEURISTICS, heuristic_id, project_id)
|
|
909
|
+
|
|
910
|
+
if not doc:
|
|
911
|
+
return False
|
|
912
|
+
|
|
913
|
+
doc["confidence"] = new_confidence
|
|
914
|
+
|
|
915
|
+
container.replace_item(item=heuristic_id, body=doc)
|
|
916
|
+
logger.debug(
|
|
917
|
+
f"Updated heuristic confidence: {heuristic_id} -> {new_confidence}"
|
|
918
|
+
)
|
|
919
|
+
return True
|
|
920
|
+
|
|
921
|
+
def update_knowledge_confidence(
|
|
922
|
+
self,
|
|
923
|
+
knowledge_id: str,
|
|
924
|
+
new_confidence: float,
|
|
925
|
+
project_id: Optional[str] = None,
|
|
926
|
+
) -> bool:
|
|
927
|
+
"""
|
|
928
|
+
Update confidence score for domain knowledge.
|
|
929
|
+
|
|
930
|
+
Args:
|
|
931
|
+
knowledge_id: The knowledge document ID
|
|
932
|
+
new_confidence: The new confidence value
|
|
933
|
+
project_id: Optional partition key for efficient point read (1 RU).
|
|
934
|
+
If not provided, will attempt cache lookup, then
|
|
935
|
+
fall back to cross-partition query (more expensive).
|
|
936
|
+
|
|
937
|
+
Returns:
|
|
938
|
+
True if update succeeded, False if document not found
|
|
939
|
+
|
|
940
|
+
Performance Note:
|
|
941
|
+
- With project_id: 1 RU for point read + write cost
|
|
942
|
+
- With cached partition key: 1 RU for point read + write cost
|
|
943
|
+
- Without either: Cross-partition query (variable, higher RUs)
|
|
944
|
+
"""
|
|
945
|
+
container = self._get_container(MemoryType.DOMAIN_KNOWLEDGE)
|
|
946
|
+
|
|
947
|
+
# Use optimized point read with cache fallback
|
|
948
|
+
doc = self._point_read_document(
|
|
949
|
+
MemoryType.DOMAIN_KNOWLEDGE, knowledge_id, project_id
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
if not doc:
|
|
953
|
+
return False
|
|
954
|
+
|
|
955
|
+
doc["confidence"] = new_confidence
|
|
956
|
+
|
|
957
|
+
container.replace_item(item=knowledge_id, body=doc)
|
|
958
|
+
logger.debug(
|
|
959
|
+
f"Updated knowledge confidence: {knowledge_id} -> {new_confidence}"
|
|
960
|
+
)
|
|
961
|
+
return True
|
|
962
|
+
|
|
713
963
|
# ==================== DELETE OPERATIONS ====================
|
|
714
964
|
|
|
715
965
|
def delete_outcomes_older_than(
|
|
@@ -719,7 +969,7 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
719
969
|
agent: Optional[str] = None,
|
|
720
970
|
) -> int:
|
|
721
971
|
"""Delete old outcomes."""
|
|
722
|
-
container = self._get_container(
|
|
972
|
+
container = self._get_container(MemoryType.OUTCOMES)
|
|
723
973
|
|
|
724
974
|
query = """
|
|
725
975
|
SELECT c.id FROM c
|
|
@@ -762,7 +1012,7 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
762
1012
|
agent: Optional[str] = None,
|
|
763
1013
|
) -> int:
|
|
764
1014
|
"""Delete low-confidence heuristics."""
|
|
765
|
-
container = self._get_container(
|
|
1015
|
+
container = self._get_container(MemoryType.HEURISTICS)
|
|
766
1016
|
|
|
767
1017
|
query = """
|
|
768
1018
|
SELECT c.id FROM c
|
|
@@ -798,27 +1048,59 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
798
1048
|
logger.info(f"Deleted {deleted} low-confidence heuristics")
|
|
799
1049
|
return deleted
|
|
800
1050
|
|
|
801
|
-
def delete_heuristic(
|
|
802
|
-
|
|
803
|
-
|
|
1051
|
+
def delete_heuristic(
|
|
1052
|
+
self, heuristic_id: str, project_id: Optional[str] = None
|
|
1053
|
+
) -> bool:
|
|
1054
|
+
"""
|
|
1055
|
+
Delete a specific heuristic by ID.
|
|
804
1056
|
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
1057
|
+
Args:
|
|
1058
|
+
heuristic_id: The heuristic document ID
|
|
1059
|
+
project_id: Optional partition key for efficient point read (1 RU).
|
|
1060
|
+
If not provided, will attempt cache lookup, then
|
|
1061
|
+
fall back to cross-partition query (more expensive).
|
|
1062
|
+
|
|
1063
|
+
Returns:
|
|
1064
|
+
True if deletion succeeded, False if document not found
|
|
1065
|
+
"""
|
|
1066
|
+
container = self._get_container(MemoryType.HEURISTICS)
|
|
1067
|
+
|
|
1068
|
+
# Try to get partition key from cache if not provided
|
|
1069
|
+
if project_id is None:
|
|
1070
|
+
project_id = self._get_cached_partition_key(
|
|
1071
|
+
MemoryType.HEURISTICS, heuristic_id
|
|
812
1072
|
)
|
|
1073
|
+
|
|
1074
|
+
# If we have a partition key, try direct delete
|
|
1075
|
+
if project_id is not None:
|
|
1076
|
+
try:
|
|
1077
|
+
container.delete_item(item=heuristic_id, partition_key=project_id)
|
|
1078
|
+
self._invalidate_partition_key_cache(
|
|
1079
|
+
MemoryType.HEURISTICS, heuristic_id
|
|
1080
|
+
)
|
|
1081
|
+
return True
|
|
1082
|
+
except exceptions.CosmosResourceNotFoundError:
|
|
1083
|
+
# Document not found or partition key was wrong
|
|
1084
|
+
self._invalidate_partition_key_cache(
|
|
1085
|
+
MemoryType.HEURISTICS, heuristic_id
|
|
1086
|
+
)
|
|
1087
|
+
# Fall through to cross-partition lookup
|
|
1088
|
+
|
|
1089
|
+
# Fallback: Cross-partition query to find the document
|
|
1090
|
+
logger.debug(
|
|
1091
|
+
f"Using cross-partition query for delete {heuristic_id} "
|
|
1092
|
+
"(consider providing project_id for better performance)"
|
|
813
1093
|
)
|
|
1094
|
+
doc = self._point_read_document(MemoryType.HEURISTICS, heuristic_id, None)
|
|
814
1095
|
|
|
815
|
-
if not
|
|
1096
|
+
if not doc:
|
|
816
1097
|
return False
|
|
817
1098
|
|
|
818
|
-
project_id =
|
|
1099
|
+
project_id = doc["project_id"]
|
|
819
1100
|
|
|
820
1101
|
try:
|
|
821
1102
|
container.delete_item(item=heuristic_id, partition_key=project_id)
|
|
1103
|
+
self._invalidate_partition_key_cache(MemoryType.HEURISTICS, heuristic_id)
|
|
822
1104
|
return True
|
|
823
1105
|
except exceptions.CosmosResourceNotFoundError:
|
|
824
1106
|
return False
|
|
@@ -838,39 +1120,38 @@ class AzureCosmosStorage(StorageBackend):
|
|
|
838
1120
|
"database": self.database_name,
|
|
839
1121
|
}
|
|
840
1122
|
|
|
841
|
-
# Count items in each container
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
container = self._get_container(key)
|
|
845
|
-
query = "SELECT VALUE COUNT(1) FROM c WHERE c.project_id = @project_id"
|
|
846
|
-
parameters = [{"name": "@project_id", "value": project_id}]
|
|
847
|
-
|
|
848
|
-
if agent and key != "preferences":
|
|
849
|
-
query = """
|
|
850
|
-
SELECT VALUE COUNT(1) FROM c
|
|
851
|
-
WHERE c.project_id = @project_id AND c.agent = @agent
|
|
852
|
-
"""
|
|
853
|
-
parameters.append({"name": "@agent", "value": agent})
|
|
1123
|
+
# Count items in each container using canonical memory types
|
|
1124
|
+
for memory_type in MemoryType.ALL:
|
|
1125
|
+
container = self._get_container(memory_type)
|
|
854
1126
|
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
1127
|
+
if memory_type == MemoryType.PREFERENCES:
|
|
1128
|
+
# Preferences use user_id, not project_id
|
|
1129
|
+
result = list(
|
|
1130
|
+
container.query_items(
|
|
1131
|
+
query="SELECT VALUE COUNT(1) FROM c",
|
|
1132
|
+
enable_cross_partition_query=True,
|
|
1133
|
+
)
|
|
861
1134
|
)
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
1135
|
+
else:
|
|
1136
|
+
query = "SELECT VALUE COUNT(1) FROM c WHERE c.project_id = @project_id"
|
|
1137
|
+
parameters = [{"name": "@project_id", "value": project_id}]
|
|
1138
|
+
|
|
1139
|
+
if agent:
|
|
1140
|
+
query = """
|
|
1141
|
+
SELECT VALUE COUNT(1) FROM c
|
|
1142
|
+
WHERE c.project_id = @project_id AND c.agent = @agent
|
|
1143
|
+
"""
|
|
1144
|
+
parameters.append({"name": "@agent", "value": agent})
|
|
1145
|
+
|
|
1146
|
+
result = list(
|
|
1147
|
+
container.query_items(
|
|
1148
|
+
query=query,
|
|
1149
|
+
parameters=parameters,
|
|
1150
|
+
enable_cross_partition_query=False,
|
|
1151
|
+
partition_key=project_id,
|
|
1152
|
+
)
|
|
1153
|
+
)
|
|
1154
|
+
stats[f"{memory_type}_count"] = result[0] if result else 0
|
|
874
1155
|
|
|
875
1156
|
stats["total_count"] = sum(
|
|
876
1157
|
stats.get(k, 0) for k in stats if k.endswith("_count")
|