alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -226
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -430
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -265
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -106
  26. alma/graph/backends/__init__.py +32 -32
  27. alma/graph/backends/kuzu.py +624 -624
  28. alma/graph/backends/memgraph.py +432 -432
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -444
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -509
  55. alma/observability/__init__.py +91 -84
  56. alma/observability/config.py +302 -302
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -424
  59. alma/observability/metrics.py +583 -583
  60. alma/observability/tracing.py +440 -440
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -427
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -90
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1259
  80. alma/storage/base.py +1083 -583
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -103
  83. alma/storage/file_based.py +614 -614
  84. alma/storage/migrations/__init__.py +21 -21
  85. alma/storage/migrations/base.py +321 -321
  86. alma/storage/migrations/runner.py +323 -323
  87. alma/storage/migrations/version_stores.py +337 -337
  88. alma/storage/migrations/versions/__init__.py +11 -11
  89. alma/storage/migrations/versions/v1_0_0.py +373 -373
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1559
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1457
  95. alma/testing/__init__.py +46 -46
  96. alma/testing/factories.py +301 -301
  97. alma/testing/mocks.py +389 -389
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.1.dist-info/RECORD +0 -93
  110. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
alma/retrieval/cache.py CHANGED
@@ -1,1061 +1,1304 @@
1
- """
2
- ALMA Retrieval Cache.
3
-
4
- Multi-backend caching layer for retrieval results with TTL-based expiration.
5
- Supports in-memory and Redis backends with performance monitoring.
6
- """
7
-
8
- import hashlib
9
- import json
10
- import logging
11
- import threading
12
- import time
13
- from abc import ABC, abstractmethod
14
- from dataclasses import dataclass, field
15
- from datetime import datetime, timezone
16
- from typing import Any, Callable, Dict, List, Optional
17
-
18
- from alma.types import MemorySlice
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
-
23
- # ==================== DATA STRUCTURES ====================
24
-
25
-
26
- @dataclass
27
- class CacheEntry:
28
- """A cached retrieval result with metadata."""
29
-
30
- result: MemorySlice
31
- created_at: float # time.time() timestamp
32
- expires_at: float
33
- hit_count: int = 0
34
- query_hash: str = ""
35
- # Metadata for selective invalidation
36
- agent: str = ""
37
- project_id: str = ""
38
- user_id: str = ""
39
-
40
-
41
- @dataclass
42
- class CacheStats:
43
- """Statistics about cache performance."""
44
-
45
- hits: int = 0
46
- misses: int = 0
47
- evictions: int = 0
48
- current_size: int = 0
49
- max_size: int = 0
50
- # Performance metrics
51
- avg_get_time_ms: float = 0.0
52
- avg_set_time_ms: float = 0.0
53
- p95_get_time_ms: float = 0.0
54
- p95_set_time_ms: float = 0.0
55
- total_get_calls: int = 0
56
- total_set_calls: int = 0
57
-
58
- @property
59
- def hit_rate(self) -> float:
60
- """Calculate cache hit rate."""
61
- total = self.hits + self.misses
62
- return self.hits / total if total > 0 else 0.0
63
-
64
- def to_dict(self) -> Dict[str, Any]:
65
- """Convert to dictionary."""
66
- return {
67
- "hits": self.hits,
68
- "misses": self.misses,
69
- "evictions": self.evictions,
70
- "hit_rate": f"{self.hit_rate:.2%}",
71
- "current_size": self.current_size,
72
- "max_size": self.max_size,
73
- "avg_get_time_ms": round(self.avg_get_time_ms, 2),
74
- "avg_set_time_ms": round(self.avg_set_time_ms, 2),
75
- "p95_get_time_ms": round(self.p95_get_time_ms, 2),
76
- "p95_set_time_ms": round(self.p95_set_time_ms, 2),
77
- "total_get_calls": self.total_get_calls,
78
- "total_set_calls": self.total_set_calls,
79
- }
80
-
81
-
82
- @dataclass
83
- class PerformanceMetrics:
84
- """Tracks timing metrics for performance analysis."""
85
-
86
- get_times: List[float] = field(default_factory=list)
87
- set_times: List[float] = field(default_factory=list)
88
- max_samples: int = 1000
89
-
90
- def record_get(self, duration_ms: float):
91
- """Record a get operation time."""
92
- self.get_times.append(duration_ms)
93
- if len(self.get_times) > self.max_samples:
94
- self.get_times = self.get_times[-self.max_samples :]
95
-
96
- def record_set(self, duration_ms: float):
97
- """Record a set operation time."""
98
- self.set_times.append(duration_ms)
99
- if len(self.set_times) > self.max_samples:
100
- self.set_times = self.set_times[-self.max_samples :]
101
-
102
- def get_percentile(self, times: List[float], percentile: float) -> float:
103
- """Calculate percentile from timing data."""
104
- if not times:
105
- return 0.0
106
- sorted_times = sorted(times)
107
- idx = int(len(sorted_times) * percentile / 100)
108
- return sorted_times[min(idx, len(sorted_times) - 1)]
109
-
110
- def get_avg(self, times: List[float]) -> float:
111
- """Calculate average from timing data."""
112
- if not times:
113
- return 0.0
114
- return sum(times) / len(times)
115
-
116
-
117
- # ==================== CACHE INTERFACE ====================
118
-
119
-
120
- class CacheBackend(ABC):
121
- """Abstract interface for cache backends."""
122
-
123
- @abstractmethod
124
- def get(
125
- self,
126
- query: str,
127
- agent: str,
128
- project_id: str,
129
- user_id: Optional[str] = None,
130
- top_k: int = 5,
131
- ) -> Optional[MemorySlice]:
132
- """Get cached result if available."""
133
- pass
134
-
135
- @abstractmethod
136
- def set(
137
- self,
138
- query: str,
139
- agent: str,
140
- project_id: str,
141
- result: MemorySlice,
142
- user_id: Optional[str] = None,
143
- top_k: int = 5,
144
- ttl_override: Optional[int] = None,
145
- ) -> None:
146
- """Cache a retrieval result."""
147
- pass
148
-
149
- @abstractmethod
150
- def invalidate(
151
- self,
152
- agent: Optional[str] = None,
153
- project_id: Optional[str] = None,
154
- ) -> int:
155
- """Invalidate cache entries. Returns count of invalidated entries."""
156
- pass
157
-
158
- @abstractmethod
159
- def get_stats(self) -> CacheStats:
160
- """Get cache statistics."""
161
- pass
162
-
163
- @abstractmethod
164
- def clear(self) -> None:
165
- """Clear all cache entries."""
166
- pass
167
-
168
-
169
- # ==================== IN-MEMORY CACHE ====================
170
-
171
-
172
- class RetrievalCache(CacheBackend):
173
- """
174
- In-memory cache for retrieval results.
175
-
176
- Features:
177
- - TTL-based expiration
178
- - LRU eviction when max size reached
179
- - Thread-safe operations
180
- - Selective cache invalidation by agent/project
181
- - Performance metrics tracking
182
- - Monitoring hooks
183
- """
184
-
185
- def __init__(
186
- self,
187
- ttl_seconds: int = 300,
188
- max_entries: int = 1000,
189
- cleanup_interval: int = 60,
190
- enable_metrics: bool = True,
191
- ):
192
- """
193
- Initialize cache.
194
-
195
- Args:
196
- ttl_seconds: Time-to-live for cache entries (default: 5 minutes)
197
- max_entries: Maximum number of cached entries before eviction
198
- cleanup_interval: Seconds between cleanup cycles for expired entries
199
- enable_metrics: Whether to track performance metrics
200
- """
201
- self.ttl = ttl_seconds
202
- self.max_entries = max_entries
203
- self.cleanup_interval = cleanup_interval
204
- self.enable_metrics = enable_metrics
205
-
206
- self._cache: Dict[str, CacheEntry] = {}
207
- # Index for selective invalidation: agent -> set of cache keys
208
- self._agent_index: Dict[str, set] = {}
209
- # Index for selective invalidation: project_id -> set of cache keys
210
- self._project_index: Dict[str, set] = {}
211
-
212
- self._lock = threading.RLock()
213
- self._stats = CacheStats(max_size=max_entries)
214
- self._metrics = PerformanceMetrics() if enable_metrics else None
215
- self._last_cleanup = time.time()
216
-
217
- # Monitoring hooks
218
- self._on_hit: Optional[Callable[[str, float], None]] = None
219
- self._on_miss: Optional[Callable[[str], None]] = None
220
- self._on_eviction: Optional[Callable[[int], None]] = None
221
-
222
- def set_hooks(
223
- self,
224
- on_hit: Optional[Callable[[str, float], None]] = None,
225
- on_miss: Optional[Callable[[str], None]] = None,
226
- on_eviction: Optional[Callable[[int], None]] = None,
227
- ):
228
- """
229
- Set monitoring hooks for cache events.
230
-
231
- Args:
232
- on_hit: Called on cache hit with (query_hash, latency_ms)
233
- on_miss: Called on cache miss with (query_hash)
234
- on_eviction: Called on eviction with (count)
235
- """
236
- self._on_hit = on_hit
237
- self._on_miss = on_miss
238
- self._on_eviction = on_eviction
239
-
240
- def _generate_key(
241
- self,
242
- query: str,
243
- agent: str,
244
- project_id: str,
245
- user_id: Optional[str] = None,
246
- top_k: int = 5,
247
- ) -> str:
248
- """Generate a unique cache key for the query parameters."""
249
- key_parts = [
250
- query.lower().strip(),
251
- agent,
252
- project_id,
253
- user_id or "",
254
- str(top_k),
255
- ]
256
- key_string = "|".join(key_parts)
257
- return hashlib.sha256(key_string.encode()).hexdigest()[:32]
258
-
259
- def get(
260
- self,
261
- query: str,
262
- agent: str,
263
- project_id: str,
264
- user_id: Optional[str] = None,
265
- top_k: int = 5,
266
- ) -> Optional[MemorySlice]:
267
- """Get cached result if available and not expired."""
268
- start_time = time.time()
269
- key = self._generate_key(query, agent, project_id, user_id, top_k)
270
- now = time.time()
271
-
272
- with self._lock:
273
- # Periodic cleanup
274
- if now - self._last_cleanup > self.cleanup_interval:
275
- self._cleanup_expired()
276
-
277
- entry = self._cache.get(key)
278
-
279
- if entry is None:
280
- self._stats.misses += 1
281
- if self._on_miss:
282
- self._on_miss(key)
283
- self._record_get_time(start_time)
284
- return None
285
-
286
- if now > entry.expires_at:
287
- # Entry expired
288
- self._remove_entry(key, entry)
289
- self._stats.misses += 1
290
- if self._on_miss:
291
- self._on_miss(key)
292
- self._record_get_time(start_time)
293
- return None
294
-
295
- # Cache hit
296
- entry.hit_count += 1
297
- self._stats.hits += 1
298
- latency_ms = (time.time() - start_time) * 1000
299
- if self._on_hit:
300
- self._on_hit(key, latency_ms)
301
- self._record_get_time(start_time)
302
- logger.debug(f"Cache hit for query: {query[:50]}...")
303
- return entry.result
304
-
305
- def set(
306
- self,
307
- query: str,
308
- agent: str,
309
- project_id: str,
310
- result: MemorySlice,
311
- user_id: Optional[str] = None,
312
- top_k: int = 5,
313
- ttl_override: Optional[int] = None,
314
- ) -> None:
315
- """Cache a retrieval result."""
316
- start_time = time.time()
317
- key = self._generate_key(query, agent, project_id, user_id, top_k)
318
- now = time.time()
319
- ttl = ttl_override or self.ttl
320
-
321
- with self._lock:
322
- # Check if we need to evict entries
323
- if len(self._cache) >= self.max_entries and key not in self._cache:
324
- self._evict_lru()
325
-
326
- entry = CacheEntry(
327
- result=result,
328
- created_at=now,
329
- expires_at=now + ttl,
330
- hit_count=0,
331
- query_hash=key,
332
- agent=agent,
333
- project_id=project_id,
334
- user_id=user_id or "",
335
- )
336
-
337
- self._cache[key] = entry
338
-
339
- # Update indexes
340
- if agent not in self._agent_index:
341
- self._agent_index[agent] = set()
342
- self._agent_index[agent].add(key)
343
-
344
- if project_id not in self._project_index:
345
- self._project_index[project_id] = set()
346
- self._project_index[project_id].add(key)
347
-
348
- self._stats.current_size = len(self._cache)
349
- self._record_set_time(start_time)
350
- logger.debug(f"Cached result for query: {query[:50]}...")
351
-
352
- def invalidate(
353
- self,
354
- agent: Optional[str] = None,
355
- project_id: Optional[str] = None,
356
- ) -> int:
357
- """
358
- Invalidate cache entries matching criteria.
359
-
360
- If no criteria provided, clears entire cache.
361
-
362
- Args:
363
- agent: Invalidate entries for this agent
364
- project_id: Invalidate entries for this project
365
-
366
- Returns:
367
- Number of entries invalidated
368
- """
369
- with self._lock:
370
- if agent is None and project_id is None:
371
- # Clear all
372
- count = len(self._cache)
373
- self._cache.clear()
374
- self._agent_index.clear()
375
- self._project_index.clear()
376
- self._stats.evictions += count
377
- self._stats.current_size = 0
378
- if self._on_eviction and count > 0:
379
- self._on_eviction(count)
380
- logger.info(f"Invalidated entire cache ({count} entries)")
381
- return count
382
-
383
- keys_to_remove: set = set()
384
-
385
- # Collect keys matching agent
386
- if agent and agent in self._agent_index:
387
- keys_to_remove.update(self._agent_index[agent])
388
-
389
- # Collect keys matching project (intersection if both specified)
390
- if project_id and project_id in self._project_index:
391
- project_keys = self._project_index[project_id]
392
- if agent:
393
- # Intersection: both agent AND project must match
394
- keys_to_remove = keys_to_remove.intersection(project_keys)
395
- else:
396
- keys_to_remove.update(project_keys)
397
-
398
- # Remove matched entries
399
- count = 0
400
- for key in keys_to_remove:
401
- if key in self._cache:
402
- entry = self._cache[key]
403
- self._remove_entry(key, entry)
404
- count += 1
405
-
406
- self._stats.evictions += count
407
- if self._on_eviction and count > 0:
408
- self._on_eviction(count)
409
- logger.info(
410
- f"Invalidated {count} cache entries for agent={agent}, project={project_id}"
411
- )
412
- return count
413
-
414
- def _remove_entry(self, key: str, entry: CacheEntry) -> None:
415
- """Remove an entry from cache and indexes."""
416
- del self._cache[key]
417
-
418
- # Update indexes
419
- if entry.agent in self._agent_index:
420
- self._agent_index[entry.agent].discard(key)
421
- if not self._agent_index[entry.agent]:
422
- del self._agent_index[entry.agent]
423
-
424
- if entry.project_id in self._project_index:
425
- self._project_index[entry.project_id].discard(key)
426
- if not self._project_index[entry.project_id]:
427
- del self._project_index[entry.project_id]
428
-
429
- self._stats.current_size = len(self._cache)
430
-
431
- def _cleanup_expired(self) -> None:
432
- """Remove all expired entries."""
433
- now = time.time()
434
- expired = [
435
- (key, entry) for key, entry in self._cache.items() if now > entry.expires_at
436
- ]
437
-
438
- for key, entry in expired:
439
- self._remove_entry(key, entry)
440
-
441
- if expired:
442
- self._stats.evictions += len(expired)
443
- if self._on_eviction:
444
- self._on_eviction(len(expired))
445
- logger.debug(f"Cleaned up {len(expired)} expired cache entries")
446
-
447
- self._last_cleanup = now
448
-
449
- def _evict_lru(self) -> None:
450
- """Evict least recently used entry (based on hit count and age)."""
451
- if not self._cache:
452
- return
453
-
454
- # Find entry with lowest score (hit_count / age)
455
- now = time.time()
456
- worst_key = None
457
- worst_entry = None
458
- worst_score = float("inf")
459
-
460
- for key, entry in self._cache.items():
461
- age = now - entry.created_at + 1 # +1 to avoid division by zero
462
- score = (entry.hit_count + 1) / age
463
- if score < worst_score:
464
- worst_score = score
465
- worst_key = key
466
- worst_entry = entry
467
-
468
- if worst_key and worst_entry:
469
- self._remove_entry(worst_key, worst_entry)
470
- self._stats.evictions += 1
471
- if self._on_eviction:
472
- self._on_eviction(1)
473
- logger.debug("Evicted LRU cache entry")
474
-
475
- def _record_get_time(self, start_time: float) -> None:
476
- """Record get operation timing."""
477
- if self._metrics:
478
- duration_ms = (time.time() - start_time) * 1000
479
- self._metrics.record_get(duration_ms)
480
- self._stats.total_get_calls += 1
481
-
482
- def _record_set_time(self, start_time: float) -> None:
483
- """Record set operation timing."""
484
- if self._metrics:
485
- duration_ms = (time.time() - start_time) * 1000
486
- self._metrics.record_set(duration_ms)
487
- self._stats.total_set_calls += 1
488
-
489
- def get_stats(self) -> CacheStats:
490
- """Get cache statistics with performance metrics."""
491
- with self._lock:
492
- self._stats.current_size = len(self._cache)
493
-
494
- if self._metrics:
495
- self._stats.avg_get_time_ms = self._metrics.get_avg(
496
- self._metrics.get_times
497
- )
498
- self._stats.avg_set_time_ms = self._metrics.get_avg(
499
- self._metrics.set_times
500
- )
501
- self._stats.p95_get_time_ms = self._metrics.get_percentile(
502
- self._metrics.get_times, 95
503
- )
504
- self._stats.p95_set_time_ms = self._metrics.get_percentile(
505
- self._metrics.set_times, 95
506
- )
507
-
508
- return self._stats
509
-
510
- def clear(self) -> None:
511
- """Clear all cache entries."""
512
- with self._lock:
513
- count = len(self._cache)
514
- self._cache.clear()
515
- self._agent_index.clear()
516
- self._project_index.clear()
517
- self._stats = CacheStats(max_size=self.max_entries)
518
- if self._metrics:
519
- self._metrics = PerformanceMetrics()
520
- logger.info(f"Cleared cache ({count} entries)")
521
-
522
-
523
- # ==================== REDIS CACHE ====================
524
-
525
-
526
- class RedisCache(CacheBackend):
527
- """
528
- Redis-based cache for distributed deployments.
529
-
530
- Features:
531
- - Distributed caching across multiple instances
532
- - Built-in TTL via Redis EXPIRE
533
- - Selective invalidation using key prefixes and patterns
534
- - Performance metrics tracking
535
- - Automatic reconnection handling
536
- """
537
-
538
- def __init__(
539
- self,
540
- host: str = "localhost",
541
- port: int = 6379,
542
- db: int = 0,
543
- password: Optional[str] = None,
544
- ttl_seconds: int = 300,
545
- key_prefix: str = "alma:cache:",
546
- connection_pool_size: int = 10,
547
- enable_metrics: bool = True,
548
- ):
549
- """
550
- Initialize Redis cache.
551
-
552
- Args:
553
- host: Redis host
554
- port: Redis port
555
- db: Redis database number
556
- password: Redis password (optional)
557
- ttl_seconds: Default TTL for cache entries
558
- key_prefix: Prefix for all cache keys
559
- connection_pool_size: Size of connection pool
560
- enable_metrics: Whether to track performance metrics
561
- """
562
- self.ttl = ttl_seconds
563
- self.key_prefix = key_prefix
564
- self.enable_metrics = enable_metrics
565
-
566
- self._stats = CacheStats()
567
- self._metrics = PerformanceMetrics() if enable_metrics else None
568
- self._lock = threading.RLock()
569
-
570
- # Monitoring hooks
571
- self._on_hit: Optional[Callable[[str, float], None]] = None
572
- self._on_miss: Optional[Callable[[str], None]] = None
573
- self._on_eviction: Optional[Callable[[int], None]] = None
574
-
575
- # Try to import redis
576
- try:
577
- import redis
578
-
579
- self._redis = redis.Redis(
580
- host=host,
581
- port=port,
582
- db=db,
583
- password=password,
584
- max_connections=connection_pool_size,
585
- decode_responses=False, # We handle encoding ourselves
586
- )
587
- # Test connection
588
- self._redis.ping()
589
- logger.info(f"Connected to Redis at {host}:{port}")
590
- except ImportError as err:
591
- raise ImportError(
592
- "redis package required for RedisCache. Install with: pip install redis"
593
- ) from err
594
- except Exception as e:
595
- raise ConnectionError(f"Failed to connect to Redis: {e}") from e
596
-
597
- def set_hooks(
598
- self,
599
- on_hit: Optional[Callable[[str, float], None]] = None,
600
- on_miss: Optional[Callable[[str], None]] = None,
601
- on_eviction: Optional[Callable[[int], None]] = None,
602
- ):
603
- """Set monitoring hooks for cache events."""
604
- self._on_hit = on_hit
605
- self._on_miss = on_miss
606
- self._on_eviction = on_eviction
607
-
608
- def _generate_key(
609
- self,
610
- query: str,
611
- agent: str,
612
- project_id: str,
613
- user_id: Optional[str] = None,
614
- top_k: int = 5,
615
- ) -> str:
616
- """Generate a unique cache key with prefix for pattern matching."""
617
- key_parts = [
618
- query.lower().strip(),
619
- user_id or "",
620
- str(top_k),
621
- ]
622
- hash_part = hashlib.sha256("|".join(key_parts).encode()).hexdigest()[:16]
623
- # Structure: prefix:project:agent:hash
624
- # This enables pattern-based invalidation
625
- return f"{self.key_prefix}{project_id}:{agent}:{hash_part}"
626
-
627
- def _serialize_result(self, result: MemorySlice) -> bytes:
628
- """Serialize MemorySlice to bytes."""
629
- data = {
630
- "query": result.query,
631
- "agent": result.agent,
632
- "retrieval_time_ms": result.retrieval_time_ms,
633
- "heuristics": [
634
- {
635
- "id": h.id,
636
- "agent": h.agent,
637
- "project_id": h.project_id,
638
- "condition": h.condition,
639
- "strategy": h.strategy,
640
- "confidence": h.confidence,
641
- "occurrence_count": h.occurrence_count,
642
- "success_count": h.success_count,
643
- "last_validated": (
644
- h.last_validated.isoformat() if h.last_validated else None
645
- ),
646
- "created_at": h.created_at.isoformat() if h.created_at else None,
647
- }
648
- for h in result.heuristics
649
- ],
650
- "outcomes": [
651
- {
652
- "id": o.id,
653
- "agent": o.agent,
654
- "project_id": o.project_id,
655
- "task_type": o.task_type,
656
- "task_description": o.task_description,
657
- "success": o.success,
658
- "strategy_used": o.strategy_used,
659
- "duration_ms": o.duration_ms,
660
- "timestamp": o.timestamp.isoformat() if o.timestamp else None,
661
- }
662
- for o in result.outcomes
663
- ],
664
- "preferences": [
665
- {
666
- "id": p.id,
667
- "user_id": p.user_id,
668
- "category": p.category,
669
- "preference": p.preference,
670
- "source": p.source,
671
- "confidence": p.confidence,
672
- }
673
- for p in result.preferences
674
- ],
675
- "domain_knowledge": [
676
- {
677
- "id": dk.id,
678
- "agent": dk.agent,
679
- "project_id": dk.project_id,
680
- "domain": dk.domain,
681
- "fact": dk.fact,
682
- "source": dk.source,
683
- "confidence": dk.confidence,
684
- }
685
- for dk in result.domain_knowledge
686
- ],
687
- "anti_patterns": [
688
- {
689
- "id": ap.id,
690
- "agent": ap.agent,
691
- "project_id": ap.project_id,
692
- "pattern": ap.pattern,
693
- "why_bad": ap.why_bad,
694
- "better_alternative": ap.better_alternative,
695
- "occurrence_count": ap.occurrence_count,
696
- }
697
- for ap in result.anti_patterns
698
- ],
699
- }
700
- return json.dumps(data).encode("utf-8")
701
-
702
- def _deserialize_result(self, data: bytes) -> MemorySlice:
703
- """Deserialize bytes to MemorySlice."""
704
- from alma.types import (
705
- AntiPattern,
706
- DomainKnowledge,
707
- Heuristic,
708
- Outcome,
709
- UserPreference,
710
- )
711
-
712
- obj = json.loads(data.decode("utf-8"))
713
-
714
- def parse_datetime(s):
715
- if s is None:
716
- return datetime.now(timezone.utc)
717
- return datetime.fromisoformat(s.replace("Z", "+00:00"))
718
-
719
- heuristics = [
720
- Heuristic(
721
- id=h["id"],
722
- agent=h["agent"],
723
- project_id=h["project_id"],
724
- condition=h["condition"],
725
- strategy=h["strategy"],
726
- confidence=h["confidence"],
727
- occurrence_count=h["occurrence_count"],
728
- success_count=h["success_count"],
729
- last_validated=parse_datetime(h.get("last_validated")),
730
- created_at=parse_datetime(h.get("created_at")),
731
- )
732
- for h in obj.get("heuristics", [])
733
- ]
734
-
735
- outcomes = [
736
- Outcome(
737
- id=o["id"],
738
- agent=o["agent"],
739
- project_id=o["project_id"],
740
- task_type=o["task_type"],
741
- task_description=o["task_description"],
742
- success=o["success"],
743
- strategy_used=o["strategy_used"],
744
- duration_ms=o.get("duration_ms"),
745
- timestamp=parse_datetime(o.get("timestamp")),
746
- )
747
- for o in obj.get("outcomes", [])
748
- ]
749
-
750
- preferences = [
751
- UserPreference(
752
- id=p["id"],
753
- user_id=p["user_id"],
754
- category=p["category"],
755
- preference=p["preference"],
756
- source=p["source"],
757
- confidence=p.get("confidence", 1.0),
758
- )
759
- for p in obj.get("preferences", [])
760
- ]
761
-
762
- domain_knowledge = [
763
- DomainKnowledge(
764
- id=dk["id"],
765
- agent=dk["agent"],
766
- project_id=dk["project_id"],
767
- domain=dk["domain"],
768
- fact=dk["fact"],
769
- source=dk["source"],
770
- confidence=dk.get("confidence", 1.0),
771
- )
772
- for dk in obj.get("domain_knowledge", [])
773
- ]
774
-
775
- anti_patterns = [
776
- AntiPattern(
777
- id=ap["id"],
778
- agent=ap["agent"],
779
- project_id=ap["project_id"],
780
- pattern=ap["pattern"],
781
- why_bad=ap["why_bad"],
782
- better_alternative=ap["better_alternative"],
783
- occurrence_count=ap["occurrence_count"],
784
- last_seen=datetime.now(timezone.utc),
785
- )
786
- for ap in obj.get("anti_patterns", [])
787
- ]
788
-
789
- return MemorySlice(
790
- heuristics=heuristics,
791
- outcomes=outcomes,
792
- preferences=preferences,
793
- domain_knowledge=domain_knowledge,
794
- anti_patterns=anti_patterns,
795
- query=obj.get("query"),
796
- agent=obj.get("agent"),
797
- retrieval_time_ms=obj.get("retrieval_time_ms"),
798
- )
799
-
800
- def get(
801
- self,
802
- query: str,
803
- agent: str,
804
- project_id: str,
805
- user_id: Optional[str] = None,
806
- top_k: int = 5,
807
- ) -> Optional[MemorySlice]:
808
- """Get cached result from Redis."""
809
- start_time = time.time()
810
- key = self._generate_key(query, agent, project_id, user_id, top_k)
811
-
812
- try:
813
- data = self._redis.get(key)
814
-
815
- if data is None:
816
- with self._lock:
817
- self._stats.misses += 1
818
- if self._on_miss:
819
- self._on_miss(key)
820
- self._record_get_time(start_time)
821
- return None
822
-
823
- result = self._deserialize_result(data)
824
- with self._lock:
825
- self._stats.hits += 1
826
- latency_ms = (time.time() - start_time) * 1000
827
- if self._on_hit:
828
- self._on_hit(key, latency_ms)
829
- self._record_get_time(start_time)
830
- logger.debug(f"Redis cache hit for query: {query[:50]}...")
831
- return result
832
-
833
- except Exception as e:
834
- logger.error(f"Redis get error: {e}")
835
- with self._lock:
836
- self._stats.misses += 1
837
- self._record_get_time(start_time)
838
- return None
839
-
840
- def set(
841
- self,
842
- query: str,
843
- agent: str,
844
- project_id: str,
845
- result: MemorySlice,
846
- user_id: Optional[str] = None,
847
- top_k: int = 5,
848
- ttl_override: Optional[int] = None,
849
- ) -> None:
850
- """Cache a retrieval result in Redis."""
851
- start_time = time.time()
852
- key = self._generate_key(query, agent, project_id, user_id, top_k)
853
- ttl = ttl_override or self.ttl
854
-
855
- try:
856
- data = self._serialize_result(result)
857
- self._redis.setex(key, ttl, data)
858
- self._record_set_time(start_time)
859
- logger.debug(f"Redis cached result for query: {query[:50]}...")
860
-
861
- except Exception as e:
862
- logger.error(f"Redis set error: {e}")
863
- self._record_set_time(start_time)
864
-
865
- def invalidate(
866
- self,
867
- agent: Optional[str] = None,
868
- project_id: Optional[str] = None,
869
- ) -> int:
870
- """
871
- Invalidate cache entries using Redis pattern matching.
872
-
873
- Pattern structure: prefix:project:agent:hash
874
- """
875
- try:
876
- if agent is None and project_id is None:
877
- # Clear all ALMA cache keys
878
- pattern = f"{self.key_prefix}*"
879
- elif project_id and agent:
880
- # Specific project and agent
881
- pattern = f"{self.key_prefix}{project_id}:{agent}:*"
882
- elif project_id:
883
- # All agents for a project
884
- pattern = f"{self.key_prefix}{project_id}:*"
885
- elif agent:
886
- # Specific agent across all projects
887
- pattern = f"{self.key_prefix}*:{agent}:*"
888
- else:
889
- return 0
890
-
891
- # Use SCAN for safe iteration over keys
892
- count = 0
893
- cursor = 0
894
- while True:
895
- cursor, keys = self._redis.scan(cursor, match=pattern, count=100)
896
- if keys:
897
- self._redis.delete(*keys)
898
- count += len(keys)
899
- if cursor == 0:
900
- break
901
-
902
- with self._lock:
903
- self._stats.evictions += count
904
- if self._on_eviction and count > 0:
905
- self._on_eviction(count)
906
- logger.info(
907
- f"Invalidated {count} Redis cache entries for agent={agent}, project={project_id}"
908
- )
909
- return count
910
-
911
- except Exception as e:
912
- logger.error(f"Redis invalidate error: {e}")
913
- return 0
914
-
915
- def _record_get_time(self, start_time: float) -> None:
916
- """Record get operation timing."""
917
- if self._metrics:
918
- with self._lock:
919
- duration_ms = (time.time() - start_time) * 1000
920
- self._metrics.record_get(duration_ms)
921
- self._stats.total_get_calls += 1
922
-
923
- def _record_set_time(self, start_time: float) -> None:
924
- """Record set operation timing."""
925
- if self._metrics:
926
- with self._lock:
927
- duration_ms = (time.time() - start_time) * 1000
928
- self._metrics.record_set(duration_ms)
929
- self._stats.total_set_calls += 1
930
-
931
- def get_stats(self) -> CacheStats:
932
- """Get cache statistics."""
933
- try:
934
- # Get current cache size from Redis
935
- pattern = f"{self.key_prefix}*"
936
- cursor = 0
937
- count = 0
938
- while True:
939
- cursor, keys = self._redis.scan(cursor, match=pattern, count=100)
940
- count += len(keys)
941
- if cursor == 0:
942
- break
943
-
944
- with self._lock:
945
- self._stats.current_size = count
946
-
947
- if self._metrics:
948
- self._stats.avg_get_time_ms = self._metrics.get_avg(
949
- self._metrics.get_times
950
- )
951
- self._stats.avg_set_time_ms = self._metrics.get_avg(
952
- self._metrics.set_times
953
- )
954
- self._stats.p95_get_time_ms = self._metrics.get_percentile(
955
- self._metrics.get_times, 95
956
- )
957
- self._stats.p95_set_time_ms = self._metrics.get_percentile(
958
- self._metrics.set_times, 95
959
- )
960
-
961
- return self._stats
962
-
963
- except Exception as e:
964
- logger.error(f"Redis get_stats error: {e}")
965
- return self._stats
966
-
967
- def clear(self) -> None:
968
- """Clear all ALMA cache entries from Redis."""
969
- try:
970
- count = self.invalidate()
971
- with self._lock:
972
- self._stats = CacheStats()
973
- if self._metrics:
974
- self._metrics = PerformanceMetrics()
975
- logger.info(f"Cleared Redis cache ({count} entries)")
976
- except Exception as e:
977
- logger.error(f"Redis clear error: {e}")
978
-
979
-
980
- # ==================== NULL CACHE ====================
981
-
982
-
983
- class NullCache(CacheBackend):
984
- """
985
- A no-op cache implementation for testing or when caching is disabled.
986
-
987
- All operations are valid but don't actually cache anything.
988
- """
989
-
990
- def __init__(self):
991
- """Initialize null cache."""
992
- self._stats = CacheStats()
993
-
994
- def get(self, *args, **kwargs) -> Optional[MemorySlice]:
995
- """Always returns None (cache miss)."""
996
- self._stats.misses += 1
997
- return None
998
-
999
- def set(self, *args, **kwargs) -> None:
1000
- """No-op."""
1001
- pass
1002
-
1003
- def invalidate(self, *args, **kwargs) -> int:
1004
- """No-op."""
1005
- return 0
1006
-
1007
- def get_stats(self) -> CacheStats:
1008
- """Get cache statistics."""
1009
- return self._stats
1010
-
1011
- def clear(self) -> None:
1012
- """No-op."""
1013
- pass
1014
-
1015
-
1016
- # ==================== CACHE FACTORY ====================
1017
-
1018
-
1019
- def create_cache(
1020
- backend: str = "memory",
1021
- ttl_seconds: int = 300,
1022
- max_entries: int = 1000,
1023
- redis_host: str = "localhost",
1024
- redis_port: int = 6379,
1025
- redis_password: Optional[str] = None,
1026
- redis_db: int = 0,
1027
- enable_metrics: bool = True,
1028
- ) -> CacheBackend:
1029
- """
1030
- Factory function to create a cache backend.
1031
-
1032
- Args:
1033
- backend: "memory", "redis", or "null"
1034
- ttl_seconds: TTL for cache entries
1035
- max_entries: Max entries for memory cache
1036
- redis_host: Redis host (for redis backend)
1037
- redis_port: Redis port (for redis backend)
1038
- redis_password: Redis password (for redis backend)
1039
- redis_db: Redis database number (for redis backend)
1040
- enable_metrics: Whether to track performance metrics
1041
-
1042
- Returns:
1043
- Configured CacheBackend instance
1044
- """
1045
- if backend == "redis":
1046
- return RedisCache(
1047
- host=redis_host,
1048
- port=redis_port,
1049
- db=redis_db,
1050
- password=redis_password,
1051
- ttl_seconds=ttl_seconds,
1052
- enable_metrics=enable_metrics,
1053
- )
1054
- elif backend == "null":
1055
- return NullCache()
1056
- else:
1057
- return RetrievalCache(
1058
- ttl_seconds=ttl_seconds,
1059
- max_entries=max_entries,
1060
- enable_metrics=enable_metrics,
1061
- )
1
+ """
2
+ ALMA Retrieval Cache.
3
+
4
+ Multi-backend caching layer for retrieval results with TTL-based expiration.
5
+ Supports in-memory and Redis backends with performance monitoring.
6
+
7
+ Key Features:
8
+ - Collision-resistant cache key generation using SHA-256
9
+ - Namespace support for multi-agent/multi-tenant isolation
10
+ - TTL-based expiration with configurable cleanup
11
+ - LRU eviction when max entries reached
12
+ - Performance metrics tracking
13
+ """
14
+
15
+ import hashlib
16
+ import json
17
+ import logging
18
+ import struct
19
+ import threading
20
+ import time
21
+ from abc import ABC, abstractmethod
22
+ from dataclasses import dataclass, field
23
+ from datetime import datetime, timezone
24
+ from typing import Any, Callable, Dict, List, Optional, Tuple
25
+
26
+ from alma.types import MemorySlice
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ # ==================== CACHE KEY GENERATION ====================
32
+
33
+
34
+ class CacheKeyGenerator:
35
+ """
36
+ Collision-resistant cache key generator with namespace support.
37
+
38
+ Uses SHA-256 hashing with length-prefixed encoding to prevent
39
+ delimiter-based collision attacks. Supports namespaces for
40
+ multi-agent/multi-tenant isolation.
41
+
42
+ Key Structure:
43
+ {namespace}:{version}:{full_sha256_hash}
44
+
45
+ The hash is computed over length-prefixed components to ensure
46
+ that "a|b" + "c" cannot collide with "a" + "b|c".
47
+ """
48
+
49
+ # Version for cache key format - increment if algorithm changes
50
+ KEY_VERSION = "v1"
51
+
52
+ # Default namespace for single-agent deployments
53
+ DEFAULT_NAMESPACE = "alma"
54
+
55
+ def __init__(self, namespace: Optional[str] = None):
56
+ """
57
+ Initialize the cache key generator.
58
+
59
+ Args:
60
+ namespace: Optional namespace for cache isolation.
61
+ Useful for multi-agent or multi-tenant deployments.
62
+ Defaults to "alma".
63
+ """
64
+ self.namespace = namespace or self.DEFAULT_NAMESPACE
65
+
66
+ @staticmethod
67
+ def _length_prefix_encode(value: str) -> bytes:
68
+ """
69
+ Encode a string with its length prefix for collision resistance.
70
+
71
+ This prevents collision attacks where "a|b" + "c" could match "a" + "b|c"
72
+ when using simple delimiter-based concatenation.
73
+
74
+ Args:
75
+ value: String to encode
76
+
77
+ Returns:
78
+ Bytes with 4-byte big-endian length prefix followed by UTF-8 encoded value
79
+ """
80
+ encoded = value.encode("utf-8")
81
+ length = len(encoded)
82
+ return struct.pack(">I", length) + encoded
83
+
84
+ @staticmethod
85
+ def _normalize_query(query: str) -> str:
86
+ """
87
+ Normalize query string for consistent cache key generation.
88
+
89
+ - Converts to lowercase
90
+ - Strips leading/trailing whitespace
91
+ - Normalizes internal whitespace to single spaces
92
+
93
+ Args:
94
+ query: Raw query string
95
+
96
+ Returns:
97
+ Normalized query string
98
+ """
99
+ return " ".join(query.lower().split())
100
+
101
+ def generate(
102
+ self,
103
+ query: str,
104
+ agent: str,
105
+ project_id: str,
106
+ user_id: Optional[str] = None,
107
+ top_k: int = 5,
108
+ extra_context: Optional[Dict[str, str]] = None,
109
+ ) -> str:
110
+ """
111
+ Generate a collision-resistant cache key.
112
+
113
+ Uses length-prefixed encoding of all components followed by SHA-256
114
+ hashing to prevent delimiter-based collision attacks.
115
+
116
+ Args:
117
+ query: The search query (will be normalized)
118
+ agent: Agent identifier
119
+ project_id: Project identifier
120
+ user_id: Optional user identifier
121
+ top_k: Number of results requested
122
+ extra_context: Optional extra context for key generation
123
+
124
+ Returns:
125
+ Cache key in format: {namespace}:{version}:{sha256_hash}
126
+ """
127
+ # Normalize query
128
+ normalized_query = self._normalize_query(query)
129
+
130
+ # Build the hash input using length-prefixed encoding
131
+ # This ensures "a|b" + "c" cannot collide with "a" + "b|c"
132
+ hash_input = b""
133
+ hash_input += self._length_prefix_encode(normalized_query)
134
+ hash_input += self._length_prefix_encode(agent)
135
+ hash_input += self._length_prefix_encode(project_id)
136
+ hash_input += self._length_prefix_encode(user_id or "")
137
+ hash_input += struct.pack(">I", top_k) # 4-byte big-endian integer
138
+
139
+ # Add extra context if provided (sorted for determinism)
140
+ if extra_context:
141
+ for key in sorted(extra_context.keys()):
142
+ hash_input += self._length_prefix_encode(key)
143
+ hash_input += self._length_prefix_encode(extra_context[key])
144
+
145
+ # Compute full SHA-256 hash (64 hex chars = 256 bits)
146
+ hash_hex = hashlib.sha256(hash_input).hexdigest()
147
+
148
+ # Return namespaced key with version
149
+ return f"{self.namespace}:{self.KEY_VERSION}:{hash_hex}"
150
+
151
+ def generate_pattern(
152
+ self,
153
+ agent: Optional[str] = None,
154
+ project_id: Optional[str] = None,
155
+ ) -> str:
156
+ """
157
+ Generate a pattern for bulk cache invalidation.
158
+
159
+ This is primarily useful for Redis-style pattern matching.
160
+ For in-memory caches, use the index-based invalidation instead.
161
+
162
+ Args:
163
+ agent: Optional agent to match
164
+ project_id: Optional project to match
165
+
166
+ Returns:
167
+ Pattern string (e.g., "alma:v1:*" for all keys in namespace)
168
+ """
169
+ return f"{self.namespace}:{self.KEY_VERSION}:*"
170
+
171
+ def parse_key(self, key: str) -> Tuple[str, str, str]:
172
+ """
173
+ Parse a cache key into its components.
174
+
175
+ Args:
176
+ key: Cache key to parse
177
+
178
+ Returns:
179
+ Tuple of (namespace, version, hash)
180
+
181
+ Raises:
182
+ ValueError: If key format is invalid
183
+ """
184
+ parts = key.split(":", 2)
185
+ if len(parts) != 3:
186
+ raise ValueError(f"Invalid cache key format: {key}")
187
+ return (parts[0], parts[1], parts[2])
188
+
189
+ def is_valid_key(self, key: str) -> bool:
190
+ """
191
+ Check if a key matches this generator's namespace and version.
192
+
193
+ Args:
194
+ key: Cache key to validate
195
+
196
+ Returns:
197
+ True if key is valid for this generator
198
+ """
199
+ try:
200
+ namespace, version, hash_part = self.parse_key(key)
201
+ return (
202
+ namespace == self.namespace
203
+ and version == self.KEY_VERSION
204
+ and len(hash_part) == 64 # SHA-256 hex
205
+ )
206
+ except ValueError:
207
+ return False
208
+
209
+
210
+ # Global default key generator
211
+ _default_key_generator = CacheKeyGenerator()
212
+
213
+
214
+ # ==================== DATA STRUCTURES ====================
215
+
216
+
217
+ @dataclass
218
+ class CacheEntry:
219
+ """A cached retrieval result with metadata."""
220
+
221
+ result: MemorySlice
222
+ created_at: float # time.time() timestamp
223
+ expires_at: float
224
+ hit_count: int = 0
225
+ query_hash: str = ""
226
+ # Metadata for selective invalidation
227
+ agent: str = ""
228
+ project_id: str = ""
229
+ user_id: str = ""
230
+
231
+
232
+ @dataclass
233
+ class CacheStats:
234
+ """Statistics about cache performance."""
235
+
236
+ hits: int = 0
237
+ misses: int = 0
238
+ evictions: int = 0
239
+ current_size: int = 0
240
+ max_size: int = 0
241
+ # Performance metrics
242
+ avg_get_time_ms: float = 0.0
243
+ avg_set_time_ms: float = 0.0
244
+ p95_get_time_ms: float = 0.0
245
+ p95_set_time_ms: float = 0.0
246
+ total_get_calls: int = 0
247
+ total_set_calls: int = 0
248
+
249
+ @property
250
+ def hit_rate(self) -> float:
251
+ """Calculate cache hit rate."""
252
+ total = self.hits + self.misses
253
+ return self.hits / total if total > 0 else 0.0
254
+
255
+ def to_dict(self) -> Dict[str, Any]:
256
+ """Convert to dictionary."""
257
+ return {
258
+ "hits": self.hits,
259
+ "misses": self.misses,
260
+ "evictions": self.evictions,
261
+ "hit_rate": f"{self.hit_rate:.2%}",
262
+ "current_size": self.current_size,
263
+ "max_size": self.max_size,
264
+ "avg_get_time_ms": round(self.avg_get_time_ms, 2),
265
+ "avg_set_time_ms": round(self.avg_set_time_ms, 2),
266
+ "p95_get_time_ms": round(self.p95_get_time_ms, 2),
267
+ "p95_set_time_ms": round(self.p95_set_time_ms, 2),
268
+ "total_get_calls": self.total_get_calls,
269
+ "total_set_calls": self.total_set_calls,
270
+ }
271
+
272
+
273
+ @dataclass
274
+ class PerformanceMetrics:
275
+ """Tracks timing metrics for performance analysis."""
276
+
277
+ get_times: List[float] = field(default_factory=list)
278
+ set_times: List[float] = field(default_factory=list)
279
+ max_samples: int = 1000
280
+
281
+ def record_get(self, duration_ms: float):
282
+ """Record a get operation time."""
283
+ self.get_times.append(duration_ms)
284
+ if len(self.get_times) > self.max_samples:
285
+ self.get_times = self.get_times[-self.max_samples :]
286
+
287
+ def record_set(self, duration_ms: float):
288
+ """Record a set operation time."""
289
+ self.set_times.append(duration_ms)
290
+ if len(self.set_times) > self.max_samples:
291
+ self.set_times = self.set_times[-self.max_samples :]
292
+
293
+ def get_percentile(self, times: List[float], percentile: float) -> float:
294
+ """Calculate percentile from timing data."""
295
+ if not times:
296
+ return 0.0
297
+ sorted_times = sorted(times)
298
+ idx = int(len(sorted_times) * percentile / 100)
299
+ return sorted_times[min(idx, len(sorted_times) - 1)]
300
+
301
+ def get_avg(self, times: List[float]) -> float:
302
+ """Calculate average from timing data."""
303
+ if not times:
304
+ return 0.0
305
+ return sum(times) / len(times)
306
+
307
+
308
+ # ==================== CACHE INTERFACE ====================
309
+
310
+
311
+ class CacheBackend(ABC):
312
+ """Abstract interface for cache backends."""
313
+
314
+ @abstractmethod
315
+ def get(
316
+ self,
317
+ query: str,
318
+ agent: str,
319
+ project_id: str,
320
+ user_id: Optional[str] = None,
321
+ top_k: int = 5,
322
+ ) -> Optional[MemorySlice]:
323
+ """Get cached result if available."""
324
+ pass
325
+
326
+ @abstractmethod
327
+ def set(
328
+ self,
329
+ query: str,
330
+ agent: str,
331
+ project_id: str,
332
+ result: MemorySlice,
333
+ user_id: Optional[str] = None,
334
+ top_k: int = 5,
335
+ ttl_override: Optional[int] = None,
336
+ ) -> None:
337
+ """Cache a retrieval result."""
338
+ pass
339
+
340
+ @abstractmethod
341
+ def invalidate(
342
+ self,
343
+ agent: Optional[str] = None,
344
+ project_id: Optional[str] = None,
345
+ ) -> int:
346
+ """Invalidate cache entries. Returns count of invalidated entries."""
347
+ pass
348
+
349
+ @abstractmethod
350
+ def get_stats(self) -> CacheStats:
351
+ """Get cache statistics."""
352
+ pass
353
+
354
+ @abstractmethod
355
+ def clear(self) -> None:
356
+ """Clear all cache entries."""
357
+ pass
358
+
359
+
360
+ # ==================== IN-MEMORY CACHE ====================
361
+
362
+
363
+ class RetrievalCache(CacheBackend):
364
+ """
365
+ In-memory cache for retrieval results.
366
+
367
+ Features:
368
+ - Collision-resistant cache key generation using SHA-256
369
+ - Namespace support for multi-agent/multi-tenant isolation
370
+ - TTL-based expiration
371
+ - LRU eviction when max size reached
372
+ - Thread-safe operations
373
+ - Selective cache invalidation by agent/project
374
+ - Performance metrics tracking
375
+ - Monitoring hooks
376
+ """
377
+
378
+ def __init__(
379
+ self,
380
+ ttl_seconds: int = 300,
381
+ max_entries: int = 1000,
382
+ cleanup_interval: int = 60,
383
+ enable_metrics: bool = True,
384
+ namespace: Optional[str] = None,
385
+ ):
386
+ """
387
+ Initialize cache.
388
+
389
+ Args:
390
+ ttl_seconds: Time-to-live for cache entries (default: 5 minutes)
391
+ max_entries: Maximum number of cached entries before eviction
392
+ cleanup_interval: Seconds between cleanup cycles for expired entries
393
+ enable_metrics: Whether to track performance metrics
394
+ namespace: Optional namespace for cache isolation (default: "alma")
395
+ """
396
+ self.ttl = ttl_seconds
397
+ self.max_entries = max_entries
398
+ self.cleanup_interval = cleanup_interval
399
+ self.enable_metrics = enable_metrics
400
+ self.namespace = namespace
401
+
402
+ # Initialize collision-resistant key generator with namespace
403
+ self._key_generator = CacheKeyGenerator(namespace=namespace)
404
+
405
+ self._cache: Dict[str, CacheEntry] = {}
406
+ # Index for selective invalidation: agent -> set of cache keys
407
+ self._agent_index: Dict[str, set] = {}
408
+ # Index for selective invalidation: project_id -> set of cache keys
409
+ self._project_index: Dict[str, set] = {}
410
+
411
+ self._lock = threading.RLock()
412
+ self._stats = CacheStats(max_size=max_entries)
413
+ self._metrics = PerformanceMetrics() if enable_metrics else None
414
+ self._last_cleanup = time.time()
415
+
416
+ # Monitoring hooks
417
+ self._on_hit: Optional[Callable[[str, float], None]] = None
418
+ self._on_miss: Optional[Callable[[str], None]] = None
419
+ self._on_eviction: Optional[Callable[[int], None]] = None
420
+
421
+ def set_hooks(
422
+ self,
423
+ on_hit: Optional[Callable[[str, float], None]] = None,
424
+ on_miss: Optional[Callable[[str], None]] = None,
425
+ on_eviction: Optional[Callable[[int], None]] = None,
426
+ ):
427
+ """
428
+ Set monitoring hooks for cache events.
429
+
430
+ Args:
431
+ on_hit: Called on cache hit with (query_hash, latency_ms)
432
+ on_miss: Called on cache miss with (query_hash)
433
+ on_eviction: Called on eviction with (count)
434
+ """
435
+ self._on_hit = on_hit
436
+ self._on_miss = on_miss
437
+ self._on_eviction = on_eviction
438
+
439
+ def _generate_key(
440
+ self,
441
+ query: str,
442
+ agent: str,
443
+ project_id: str,
444
+ user_id: Optional[str] = None,
445
+ top_k: int = 5,
446
+ ) -> str:
447
+ """
448
+ Generate a collision-resistant cache key for the query parameters.
449
+
450
+ Uses the CacheKeyGenerator with length-prefixed encoding and full
451
+ SHA-256 hashing to prevent delimiter-based collision attacks.
452
+
453
+ Args:
454
+ query: The search query (will be normalized)
455
+ agent: Agent identifier
456
+ project_id: Project identifier
457
+ user_id: Optional user identifier
458
+ top_k: Number of results requested
459
+
460
+ Returns:
461
+ Cache key in format: {namespace}:{version}:{sha256_hash}
462
+ """
463
+ return self._key_generator.generate(
464
+ query=query,
465
+ agent=agent,
466
+ project_id=project_id,
467
+ user_id=user_id,
468
+ top_k=top_k,
469
+ )
470
+
471
+ def get(
472
+ self,
473
+ query: str,
474
+ agent: str,
475
+ project_id: str,
476
+ user_id: Optional[str] = None,
477
+ top_k: int = 5,
478
+ ) -> Optional[MemorySlice]:
479
+ """Get cached result if available and not expired."""
480
+ start_time = time.time()
481
+ key = self._generate_key(query, agent, project_id, user_id, top_k)
482
+ now = time.time()
483
+
484
+ with self._lock:
485
+ # Periodic cleanup
486
+ if now - self._last_cleanup > self.cleanup_interval:
487
+ self._cleanup_expired()
488
+
489
+ entry = self._cache.get(key)
490
+
491
+ if entry is None:
492
+ self._stats.misses += 1
493
+ if self._on_miss:
494
+ self._on_miss(key)
495
+ self._record_get_time(start_time)
496
+ return None
497
+
498
+ if now > entry.expires_at:
499
+ # Entry expired
500
+ self._remove_entry(key, entry)
501
+ self._stats.misses += 1
502
+ if self._on_miss:
503
+ self._on_miss(key)
504
+ self._record_get_time(start_time)
505
+ return None
506
+
507
+ # Cache hit
508
+ entry.hit_count += 1
509
+ self._stats.hits += 1
510
+ latency_ms = (time.time() - start_time) * 1000
511
+ if self._on_hit:
512
+ self._on_hit(key, latency_ms)
513
+ self._record_get_time(start_time)
514
+ logger.debug(f"Cache hit for query: {query[:50]}...")
515
+ return entry.result
516
+
517
+ def set(
518
+ self,
519
+ query: str,
520
+ agent: str,
521
+ project_id: str,
522
+ result: MemorySlice,
523
+ user_id: Optional[str] = None,
524
+ top_k: int = 5,
525
+ ttl_override: Optional[int] = None,
526
+ ) -> None:
527
+ """Cache a retrieval result."""
528
+ start_time = time.time()
529
+ key = self._generate_key(query, agent, project_id, user_id, top_k)
530
+ now = time.time()
531
+ ttl = ttl_override or self.ttl
532
+
533
+ with self._lock:
534
+ # Check if we need to evict entries
535
+ if len(self._cache) >= self.max_entries and key not in self._cache:
536
+ self._evict_lru()
537
+
538
+ entry = CacheEntry(
539
+ result=result,
540
+ created_at=now,
541
+ expires_at=now + ttl,
542
+ hit_count=0,
543
+ query_hash=key,
544
+ agent=agent,
545
+ project_id=project_id,
546
+ user_id=user_id or "",
547
+ )
548
+
549
+ self._cache[key] = entry
550
+
551
+ # Update indexes
552
+ if agent not in self._agent_index:
553
+ self._agent_index[agent] = set()
554
+ self._agent_index[agent].add(key)
555
+
556
+ if project_id not in self._project_index:
557
+ self._project_index[project_id] = set()
558
+ self._project_index[project_id].add(key)
559
+
560
+ self._stats.current_size = len(self._cache)
561
+ self._record_set_time(start_time)
562
+ logger.debug(f"Cached result for query: {query[:50]}...")
563
+
564
+ def invalidate(
565
+ self,
566
+ agent: Optional[str] = None,
567
+ project_id: Optional[str] = None,
568
+ ) -> int:
569
+ """
570
+ Invalidate cache entries matching criteria.
571
+
572
+ If no criteria provided, clears entire cache.
573
+
574
+ Args:
575
+ agent: Invalidate entries for this agent
576
+ project_id: Invalidate entries for this project
577
+
578
+ Returns:
579
+ Number of entries invalidated
580
+ """
581
+ with self._lock:
582
+ if agent is None and project_id is None:
583
+ # Clear all
584
+ count = len(self._cache)
585
+ self._cache.clear()
586
+ self._agent_index.clear()
587
+ self._project_index.clear()
588
+ self._stats.evictions += count
589
+ self._stats.current_size = 0
590
+ if self._on_eviction and count > 0:
591
+ self._on_eviction(count)
592
+ logger.info(f"Invalidated entire cache ({count} entries)")
593
+ return count
594
+
595
+ keys_to_remove: set = set()
596
+
597
+ # Collect keys matching agent
598
+ if agent and agent in self._agent_index:
599
+ keys_to_remove.update(self._agent_index[agent])
600
+
601
+ # Collect keys matching project (intersection if both specified)
602
+ if project_id and project_id in self._project_index:
603
+ project_keys = self._project_index[project_id]
604
+ if agent:
605
+ # Intersection: both agent AND project must match
606
+ keys_to_remove = keys_to_remove.intersection(project_keys)
607
+ else:
608
+ keys_to_remove.update(project_keys)
609
+
610
+ # Remove matched entries
611
+ count = 0
612
+ for key in keys_to_remove:
613
+ if key in self._cache:
614
+ entry = self._cache[key]
615
+ self._remove_entry(key, entry)
616
+ count += 1
617
+
618
+ self._stats.evictions += count
619
+ if self._on_eviction and count > 0:
620
+ self._on_eviction(count)
621
+ logger.info(
622
+ f"Invalidated {count} cache entries for agent={agent}, project={project_id}"
623
+ )
624
+ return count
625
+
626
+ def _remove_entry(self, key: str, entry: CacheEntry) -> None:
627
+ """Remove an entry from cache and indexes."""
628
+ del self._cache[key]
629
+
630
+ # Update indexes
631
+ if entry.agent in self._agent_index:
632
+ self._agent_index[entry.agent].discard(key)
633
+ if not self._agent_index[entry.agent]:
634
+ del self._agent_index[entry.agent]
635
+
636
+ if entry.project_id in self._project_index:
637
+ self._project_index[entry.project_id].discard(key)
638
+ if not self._project_index[entry.project_id]:
639
+ del self._project_index[entry.project_id]
640
+
641
+ self._stats.current_size = len(self._cache)
642
+
643
+ def _cleanup_expired(self) -> None:
644
+ """Remove all expired entries."""
645
+ now = time.time()
646
+ expired = [
647
+ (key, entry) for key, entry in self._cache.items() if now > entry.expires_at
648
+ ]
649
+
650
+ for key, entry in expired:
651
+ self._remove_entry(key, entry)
652
+
653
+ if expired:
654
+ self._stats.evictions += len(expired)
655
+ if self._on_eviction:
656
+ self._on_eviction(len(expired))
657
+ logger.debug(f"Cleaned up {len(expired)} expired cache entries")
658
+
659
+ self._last_cleanup = now
660
+
661
+ def _evict_lru(self) -> None:
662
+ """Evict least recently used entry (based on hit count and age)."""
663
+ if not self._cache:
664
+ return
665
+
666
+ # Find entry with lowest score (hit_count / age)
667
+ now = time.time()
668
+ worst_key = None
669
+ worst_entry = None
670
+ worst_score = float("inf")
671
+
672
+ for key, entry in self._cache.items():
673
+ age = now - entry.created_at + 1 # +1 to avoid division by zero
674
+ score = (entry.hit_count + 1) / age
675
+ if score < worst_score:
676
+ worst_score = score
677
+ worst_key = key
678
+ worst_entry = entry
679
+
680
+ if worst_key and worst_entry:
681
+ self._remove_entry(worst_key, worst_entry)
682
+ self._stats.evictions += 1
683
+ if self._on_eviction:
684
+ self._on_eviction(1)
685
+ logger.debug("Evicted LRU cache entry")
686
+
687
+ def _record_get_time(self, start_time: float) -> None:
688
+ """Record get operation timing."""
689
+ if self._metrics:
690
+ duration_ms = (time.time() - start_time) * 1000
691
+ self._metrics.record_get(duration_ms)
692
+ self._stats.total_get_calls += 1
693
+
694
+ def _record_set_time(self, start_time: float) -> None:
695
+ """Record set operation timing."""
696
+ if self._metrics:
697
+ duration_ms = (time.time() - start_time) * 1000
698
+ self._metrics.record_set(duration_ms)
699
+ self._stats.total_set_calls += 1
700
+
701
+ def get_stats(self) -> CacheStats:
702
+ """Get cache statistics with performance metrics."""
703
+ with self._lock:
704
+ self._stats.current_size = len(self._cache)
705
+
706
+ if self._metrics:
707
+ self._stats.avg_get_time_ms = self._metrics.get_avg(
708
+ self._metrics.get_times
709
+ )
710
+ self._stats.avg_set_time_ms = self._metrics.get_avg(
711
+ self._metrics.set_times
712
+ )
713
+ self._stats.p95_get_time_ms = self._metrics.get_percentile(
714
+ self._metrics.get_times, 95
715
+ )
716
+ self._stats.p95_set_time_ms = self._metrics.get_percentile(
717
+ self._metrics.set_times, 95
718
+ )
719
+
720
+ return self._stats
721
+
722
+ def clear(self) -> None:
723
+ """Clear all cache entries."""
724
+ with self._lock:
725
+ count = len(self._cache)
726
+ self._cache.clear()
727
+ self._agent_index.clear()
728
+ self._project_index.clear()
729
+ self._stats = CacheStats(max_size=self.max_entries)
730
+ if self._metrics:
731
+ self._metrics = PerformanceMetrics()
732
+ logger.info(f"Cleared cache ({count} entries)")
733
+
734
+
735
+ # ==================== REDIS CACHE ====================
736
+
737
+
738
+ class RedisCache(CacheBackend):
739
+ """
740
+ Redis-based cache for distributed deployments.
741
+
742
+ Features:
743
+ - Distributed caching across multiple instances
744
+ - Built-in TTL via Redis EXPIRE
745
+ - Selective invalidation using key prefixes and patterns
746
+ - Performance metrics tracking
747
+ - Automatic reconnection handling
748
+ """
749
+
750
+ def __init__(
751
+ self,
752
+ host: str = "localhost",
753
+ port: int = 6379,
754
+ db: int = 0,
755
+ password: Optional[str] = None,
756
+ ttl_seconds: int = 300,
757
+ key_prefix: str = "alma:cache:",
758
+ connection_pool_size: int = 10,
759
+ enable_metrics: bool = True,
760
+ namespace: Optional[str] = None,
761
+ ):
762
+ """
763
+ Initialize Redis cache.
764
+
765
+ Args:
766
+ host: Redis host
767
+ port: Redis port
768
+ db: Redis database number
769
+ password: Redis password (optional)
770
+ ttl_seconds: Default TTL for cache entries
771
+ key_prefix: Prefix for all cache keys
772
+ connection_pool_size: Size of connection pool
773
+ enable_metrics: Whether to track performance metrics
774
+ namespace: Optional namespace for cache isolation (default: "alma")
775
+ """
776
+ self.ttl = ttl_seconds
777
+ self.key_prefix = key_prefix
778
+ self.enable_metrics = enable_metrics
779
+ self.namespace = namespace
780
+
781
+ # Initialize collision-resistant key generator with namespace
782
+ self._key_generator = CacheKeyGenerator(namespace=namespace)
783
+
784
+ self._stats = CacheStats()
785
+ self._metrics = PerformanceMetrics() if enable_metrics else None
786
+ self._lock = threading.RLock()
787
+
788
+ # Monitoring hooks
789
+ self._on_hit: Optional[Callable[[str, float], None]] = None
790
+ self._on_miss: Optional[Callable[[str], None]] = None
791
+ self._on_eviction: Optional[Callable[[int], None]] = None
792
+
793
+ # Try to import redis
794
+ try:
795
+ import redis
796
+
797
+ self._redis = redis.Redis(
798
+ host=host,
799
+ port=port,
800
+ db=db,
801
+ password=password,
802
+ max_connections=connection_pool_size,
803
+ decode_responses=False, # We handle encoding ourselves
804
+ )
805
+ # Test connection
806
+ self._redis.ping()
807
+ logger.info(f"Connected to Redis at {host}:{port}")
808
+ except ImportError as err:
809
+ raise ImportError(
810
+ "redis package required for RedisCache. Install with: pip install redis"
811
+ ) from err
812
+ except Exception as e:
813
+ raise ConnectionError(f"Failed to connect to Redis: {e}") from e
814
+
815
+ def set_hooks(
816
+ self,
817
+ on_hit: Optional[Callable[[str, float], None]] = None,
818
+ on_miss: Optional[Callable[[str], None]] = None,
819
+ on_eviction: Optional[Callable[[int], None]] = None,
820
+ ):
821
+ """Set monitoring hooks for cache events."""
822
+ self._on_hit = on_hit
823
+ self._on_miss = on_miss
824
+ self._on_eviction = on_eviction
825
+
826
+ def _generate_key(
827
+ self,
828
+ query: str,
829
+ agent: str,
830
+ project_id: str,
831
+ user_id: Optional[str] = None,
832
+ top_k: int = 5,
833
+ ) -> str:
834
+ """
835
+ Generate a collision-resistant cache key with Redis prefix for pattern matching.
836
+
837
+ Uses the CacheKeyGenerator for the hash component, then wraps it with
838
+ Redis-specific prefix structure for pattern-based invalidation.
839
+
840
+ Structure: {redis_prefix}{project}:{agent}:{namespace}:{version}:{hash}
841
+
842
+ Args:
843
+ query: The search query (will be normalized)
844
+ agent: Agent identifier
845
+ project_id: Project identifier
846
+ user_id: Optional user identifier
847
+ top_k: Number of results requested
848
+
849
+ Returns:
850
+ Redis cache key with prefix for pattern matching
851
+ """
852
+ # Generate collision-resistant key
853
+ base_key = self._key_generator.generate(
854
+ query=query,
855
+ agent=agent,
856
+ project_id=project_id,
857
+ user_id=user_id,
858
+ top_k=top_k,
859
+ )
860
+
861
+ # Structure: prefix:project:agent:base_key
862
+ # This enables pattern-based invalidation by project or agent
863
+ return f"{self.key_prefix}{project_id}:{agent}:{base_key}"
864
+
865
+ def _serialize_result(self, result: MemorySlice) -> bytes:
866
+ """Serialize MemorySlice to bytes."""
867
+ data = {
868
+ "query": result.query,
869
+ "agent": result.agent,
870
+ "retrieval_time_ms": result.retrieval_time_ms,
871
+ "heuristics": [
872
+ {
873
+ "id": h.id,
874
+ "agent": h.agent,
875
+ "project_id": h.project_id,
876
+ "condition": h.condition,
877
+ "strategy": h.strategy,
878
+ "confidence": h.confidence,
879
+ "occurrence_count": h.occurrence_count,
880
+ "success_count": h.success_count,
881
+ "last_validated": (
882
+ h.last_validated.isoformat() if h.last_validated else None
883
+ ),
884
+ "created_at": h.created_at.isoformat() if h.created_at else None,
885
+ }
886
+ for h in result.heuristics
887
+ ],
888
+ "outcomes": [
889
+ {
890
+ "id": o.id,
891
+ "agent": o.agent,
892
+ "project_id": o.project_id,
893
+ "task_type": o.task_type,
894
+ "task_description": o.task_description,
895
+ "success": o.success,
896
+ "strategy_used": o.strategy_used,
897
+ "duration_ms": o.duration_ms,
898
+ "timestamp": o.timestamp.isoformat() if o.timestamp else None,
899
+ }
900
+ for o in result.outcomes
901
+ ],
902
+ "preferences": [
903
+ {
904
+ "id": p.id,
905
+ "user_id": p.user_id,
906
+ "category": p.category,
907
+ "preference": p.preference,
908
+ "source": p.source,
909
+ "confidence": p.confidence,
910
+ }
911
+ for p in result.preferences
912
+ ],
913
+ "domain_knowledge": [
914
+ {
915
+ "id": dk.id,
916
+ "agent": dk.agent,
917
+ "project_id": dk.project_id,
918
+ "domain": dk.domain,
919
+ "fact": dk.fact,
920
+ "source": dk.source,
921
+ "confidence": dk.confidence,
922
+ }
923
+ for dk in result.domain_knowledge
924
+ ],
925
+ "anti_patterns": [
926
+ {
927
+ "id": ap.id,
928
+ "agent": ap.agent,
929
+ "project_id": ap.project_id,
930
+ "pattern": ap.pattern,
931
+ "why_bad": ap.why_bad,
932
+ "better_alternative": ap.better_alternative,
933
+ "occurrence_count": ap.occurrence_count,
934
+ }
935
+ for ap in result.anti_patterns
936
+ ],
937
+ }
938
+ return json.dumps(data).encode("utf-8")
939
+
940
+ def _deserialize_result(self, data: bytes) -> MemorySlice:
941
+ """Deserialize bytes to MemorySlice."""
942
+ from alma.types import (
943
+ AntiPattern,
944
+ DomainKnowledge,
945
+ Heuristic,
946
+ Outcome,
947
+ UserPreference,
948
+ )
949
+
950
+ obj = json.loads(data.decode("utf-8"))
951
+
952
+ def parse_datetime(s):
953
+ if s is None:
954
+ return datetime.now(timezone.utc)
955
+ return datetime.fromisoformat(s.replace("Z", "+00:00"))
956
+
957
+ heuristics = [
958
+ Heuristic(
959
+ id=h["id"],
960
+ agent=h["agent"],
961
+ project_id=h["project_id"],
962
+ condition=h["condition"],
963
+ strategy=h["strategy"],
964
+ confidence=h["confidence"],
965
+ occurrence_count=h["occurrence_count"],
966
+ success_count=h["success_count"],
967
+ last_validated=parse_datetime(h.get("last_validated")),
968
+ created_at=parse_datetime(h.get("created_at")),
969
+ )
970
+ for h in obj.get("heuristics", [])
971
+ ]
972
+
973
+ outcomes = [
974
+ Outcome(
975
+ id=o["id"],
976
+ agent=o["agent"],
977
+ project_id=o["project_id"],
978
+ task_type=o["task_type"],
979
+ task_description=o["task_description"],
980
+ success=o["success"],
981
+ strategy_used=o["strategy_used"],
982
+ duration_ms=o.get("duration_ms"),
983
+ timestamp=parse_datetime(o.get("timestamp")),
984
+ )
985
+ for o in obj.get("outcomes", [])
986
+ ]
987
+
988
+ preferences = [
989
+ UserPreference(
990
+ id=p["id"],
991
+ user_id=p["user_id"],
992
+ category=p["category"],
993
+ preference=p["preference"],
994
+ source=p["source"],
995
+ confidence=p.get("confidence", 1.0),
996
+ )
997
+ for p in obj.get("preferences", [])
998
+ ]
999
+
1000
+ domain_knowledge = [
1001
+ DomainKnowledge(
1002
+ id=dk["id"],
1003
+ agent=dk["agent"],
1004
+ project_id=dk["project_id"],
1005
+ domain=dk["domain"],
1006
+ fact=dk["fact"],
1007
+ source=dk["source"],
1008
+ confidence=dk.get("confidence", 1.0),
1009
+ )
1010
+ for dk in obj.get("domain_knowledge", [])
1011
+ ]
1012
+
1013
+ anti_patterns = [
1014
+ AntiPattern(
1015
+ id=ap["id"],
1016
+ agent=ap["agent"],
1017
+ project_id=ap["project_id"],
1018
+ pattern=ap["pattern"],
1019
+ why_bad=ap["why_bad"],
1020
+ better_alternative=ap["better_alternative"],
1021
+ occurrence_count=ap["occurrence_count"],
1022
+ last_seen=datetime.now(timezone.utc),
1023
+ )
1024
+ for ap in obj.get("anti_patterns", [])
1025
+ ]
1026
+
1027
+ return MemorySlice(
1028
+ heuristics=heuristics,
1029
+ outcomes=outcomes,
1030
+ preferences=preferences,
1031
+ domain_knowledge=domain_knowledge,
1032
+ anti_patterns=anti_patterns,
1033
+ query=obj.get("query"),
1034
+ agent=obj.get("agent"),
1035
+ retrieval_time_ms=obj.get("retrieval_time_ms"),
1036
+ )
1037
+
1038
+ def get(
1039
+ self,
1040
+ query: str,
1041
+ agent: str,
1042
+ project_id: str,
1043
+ user_id: Optional[str] = None,
1044
+ top_k: int = 5,
1045
+ ) -> Optional[MemorySlice]:
1046
+ """Get cached result from Redis."""
1047
+ start_time = time.time()
1048
+ key = self._generate_key(query, agent, project_id, user_id, top_k)
1049
+
1050
+ try:
1051
+ data = self._redis.get(key)
1052
+
1053
+ if data is None:
1054
+ with self._lock:
1055
+ self._stats.misses += 1
1056
+ if self._on_miss:
1057
+ self._on_miss(key)
1058
+ self._record_get_time(start_time)
1059
+ return None
1060
+
1061
+ result = self._deserialize_result(data)
1062
+ with self._lock:
1063
+ self._stats.hits += 1
1064
+ latency_ms = (time.time() - start_time) * 1000
1065
+ if self._on_hit:
1066
+ self._on_hit(key, latency_ms)
1067
+ self._record_get_time(start_time)
1068
+ logger.debug(f"Redis cache hit for query: {query[:50]}...")
1069
+ return result
1070
+
1071
+ except Exception as e:
1072
+ logger.warning(f"Redis get error: {e}")
1073
+ with self._lock:
1074
+ self._stats.misses += 1
1075
+ self._record_get_time(start_time)
1076
+ return None
1077
+
1078
+ def set(
1079
+ self,
1080
+ query: str,
1081
+ agent: str,
1082
+ project_id: str,
1083
+ result: MemorySlice,
1084
+ user_id: Optional[str] = None,
1085
+ top_k: int = 5,
1086
+ ttl_override: Optional[int] = None,
1087
+ ) -> None:
1088
+ """Cache a retrieval result in Redis."""
1089
+ start_time = time.time()
1090
+ key = self._generate_key(query, agent, project_id, user_id, top_k)
1091
+ ttl = ttl_override or self.ttl
1092
+
1093
+ try:
1094
+ data = self._serialize_result(result)
1095
+ self._redis.setex(key, ttl, data)
1096
+ self._record_set_time(start_time)
1097
+ logger.debug(f"Redis cached result for query: {query[:50]}...")
1098
+
1099
+ except Exception as e:
1100
+ logger.warning(f"Redis set error: {e}")
1101
+ self._record_set_time(start_time)
1102
+
1103
+ def invalidate(
1104
+ self,
1105
+ agent: Optional[str] = None,
1106
+ project_id: Optional[str] = None,
1107
+ ) -> int:
1108
+ """
1109
+ Invalidate cache entries using Redis pattern matching.
1110
+
1111
+ Pattern structure: prefix:project:agent:hash
1112
+ """
1113
+ try:
1114
+ if agent is None and project_id is None:
1115
+ # Clear all ALMA cache keys
1116
+ pattern = f"{self.key_prefix}*"
1117
+ elif project_id and agent:
1118
+ # Specific project and agent
1119
+ pattern = f"{self.key_prefix}{project_id}:{agent}:*"
1120
+ elif project_id:
1121
+ # All agents for a project
1122
+ pattern = f"{self.key_prefix}{project_id}:*"
1123
+ elif agent:
1124
+ # Specific agent across all projects
1125
+ pattern = f"{self.key_prefix}*:{agent}:*"
1126
+ else:
1127
+ return 0
1128
+
1129
+ # Use SCAN for safe iteration over keys
1130
+ count = 0
1131
+ cursor = 0
1132
+ while True:
1133
+ cursor, keys = self._redis.scan(cursor, match=pattern, count=100)
1134
+ if keys:
1135
+ self._redis.delete(*keys)
1136
+ count += len(keys)
1137
+ if cursor == 0:
1138
+ break
1139
+
1140
+ with self._lock:
1141
+ self._stats.evictions += count
1142
+ if self._on_eviction and count > 0:
1143
+ self._on_eviction(count)
1144
+ logger.info(
1145
+ f"Invalidated {count} Redis cache entries for agent={agent}, project={project_id}"
1146
+ )
1147
+ return count
1148
+
1149
+ except Exception as e:
1150
+ logger.warning(f"Redis invalidate error: {e}")
1151
+ return 0
1152
+
1153
+ def _record_get_time(self, start_time: float) -> None:
1154
+ """Record get operation timing."""
1155
+ if self._metrics:
1156
+ with self._lock:
1157
+ duration_ms = (time.time() - start_time) * 1000
1158
+ self._metrics.record_get(duration_ms)
1159
+ self._stats.total_get_calls += 1
1160
+
1161
+ def _record_set_time(self, start_time: float) -> None:
1162
+ """Record set operation timing."""
1163
+ if self._metrics:
1164
+ with self._lock:
1165
+ duration_ms = (time.time() - start_time) * 1000
1166
+ self._metrics.record_set(duration_ms)
1167
+ self._stats.total_set_calls += 1
1168
+
1169
+ def get_stats(self) -> CacheStats:
1170
+ """Get cache statistics."""
1171
+ try:
1172
+ # Get current cache size from Redis
1173
+ pattern = f"{self.key_prefix}*"
1174
+ cursor = 0
1175
+ count = 0
1176
+ while True:
1177
+ cursor, keys = self._redis.scan(cursor, match=pattern, count=100)
1178
+ count += len(keys)
1179
+ if cursor == 0:
1180
+ break
1181
+
1182
+ with self._lock:
1183
+ self._stats.current_size = count
1184
+
1185
+ if self._metrics:
1186
+ self._stats.avg_get_time_ms = self._metrics.get_avg(
1187
+ self._metrics.get_times
1188
+ )
1189
+ self._stats.avg_set_time_ms = self._metrics.get_avg(
1190
+ self._metrics.set_times
1191
+ )
1192
+ self._stats.p95_get_time_ms = self._metrics.get_percentile(
1193
+ self._metrics.get_times, 95
1194
+ )
1195
+ self._stats.p95_set_time_ms = self._metrics.get_percentile(
1196
+ self._metrics.set_times, 95
1197
+ )
1198
+
1199
+ return self._stats
1200
+
1201
+ except Exception as e:
1202
+ logger.warning(f"Redis get_stats error: {e}")
1203
+ return self._stats
1204
+
1205
+ def clear(self) -> None:
1206
+ """Clear all ALMA cache entries from Redis."""
1207
+ try:
1208
+ count = self.invalidate()
1209
+ with self._lock:
1210
+ self._stats = CacheStats()
1211
+ if self._metrics:
1212
+ self._metrics = PerformanceMetrics()
1213
+ logger.info(f"Cleared Redis cache ({count} entries)")
1214
+ except Exception as e:
1215
+ logger.warning(f"Redis clear error: {e}")
1216
+
1217
+
1218
+ # ==================== NULL CACHE ====================
1219
+
1220
+
1221
+ class NullCache(CacheBackend):
1222
+ """
1223
+ A no-op cache implementation for testing or when caching is disabled.
1224
+
1225
+ All operations are valid but don't actually cache anything.
1226
+ """
1227
+
1228
+ def __init__(self):
1229
+ """Initialize null cache."""
1230
+ self._stats = CacheStats()
1231
+
1232
+ def get(self, *args, **kwargs) -> Optional[MemorySlice]:
1233
+ """Always returns None (cache miss)."""
1234
+ self._stats.misses += 1
1235
+ return None
1236
+
1237
+ def set(self, *args, **kwargs) -> None:
1238
+ """No-op."""
1239
+ pass
1240
+
1241
+ def invalidate(self, *args, **kwargs) -> int:
1242
+ """No-op."""
1243
+ return 0
1244
+
1245
+ def get_stats(self) -> CacheStats:
1246
+ """Get cache statistics."""
1247
+ return self._stats
1248
+
1249
+ def clear(self) -> None:
1250
+ """No-op."""
1251
+ pass
1252
+
1253
+
1254
+ # ==================== CACHE FACTORY ====================
1255
+
1256
+
1257
+ def create_cache(
1258
+ backend: str = "memory",
1259
+ ttl_seconds: int = 300,
1260
+ max_entries: int = 1000,
1261
+ redis_host: str = "localhost",
1262
+ redis_port: int = 6379,
1263
+ redis_password: Optional[str] = None,
1264
+ redis_db: int = 0,
1265
+ enable_metrics: bool = True,
1266
+ namespace: Optional[str] = None,
1267
+ ) -> CacheBackend:
1268
+ """
1269
+ Factory function to create a cache backend.
1270
+
1271
+ Args:
1272
+ backend: "memory", "redis", or "null"
1273
+ ttl_seconds: TTL for cache entries
1274
+ max_entries: Max entries for memory cache
1275
+ redis_host: Redis host (for redis backend)
1276
+ redis_port: Redis port (for redis backend)
1277
+ redis_password: Redis password (for redis backend)
1278
+ redis_db: Redis database number (for redis backend)
1279
+ enable_metrics: Whether to track performance metrics
1280
+ namespace: Optional namespace for cache isolation (default: "alma").
1281
+ Useful for multi-agent or multi-tenant deployments.
1282
+
1283
+ Returns:
1284
+ Configured CacheBackend instance
1285
+ """
1286
+ if backend == "redis":
1287
+ return RedisCache(
1288
+ host=redis_host,
1289
+ port=redis_port,
1290
+ db=redis_db,
1291
+ password=redis_password,
1292
+ ttl_seconds=ttl_seconds,
1293
+ enable_metrics=enable_metrics,
1294
+ namespace=namespace,
1295
+ )
1296
+ elif backend == "null":
1297
+ return NullCache()
1298
+ else:
1299
+ return RetrievalCache(
1300
+ ttl_seconds=ttl_seconds,
1301
+ max_entries=max_entries,
1302
+ enable_metrics=enable_metrics,
1303
+ namespace=namespace,
1304
+ )