alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -226
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -430
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -265
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -106
  26. alma/graph/backends/__init__.py +32 -32
  27. alma/graph/backends/kuzu.py +624 -624
  28. alma/graph/backends/memgraph.py +432 -432
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -444
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -509
  55. alma/observability/__init__.py +91 -84
  56. alma/observability/config.py +302 -302
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -424
  59. alma/observability/metrics.py +583 -583
  60. alma/observability/tracing.py +440 -440
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -427
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -90
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1259
  80. alma/storage/base.py +1083 -583
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -103
  83. alma/storage/file_based.py +614 -614
  84. alma/storage/migrations/__init__.py +21 -21
  85. alma/storage/migrations/base.py +321 -321
  86. alma/storage/migrations/runner.py +323 -323
  87. alma/storage/migrations/version_stores.py +337 -337
  88. alma/storage/migrations/versions/__init__.py +11 -11
  89. alma/storage/migrations/versions/v1_0_0.py +373 -373
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1559
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1457
  95. alma/testing/__init__.py +46 -46
  96. alma/testing/factories.py +301 -301
  97. alma/testing/mocks.py +389 -389
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.1.dist-info/RECORD +0 -93
  110. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,525 @@
1
+ """
2
+ ALMA Token Budget Management.
3
+
4
+ Implements attention budget tracking for retrieval to prevent context overflow.
5
+ Based on context engineering principles: "Context window space is finite and expensive."
6
+
7
+ Features:
8
+ - Token counting with configurable estimator
9
+ - Priority-based inclusion (must-see → should-see → fetch-on-demand)
10
+ - Budget enforcement with graceful degradation
11
+ - Tracking and metrics for optimization
12
+ """
13
+
14
+ import logging
15
+ from dataclasses import dataclass, field
16
+ from enum import Enum
17
+ from typing import Any, Callable, Dict, List, Optional, Tuple
18
+
19
+ from alma.types import (
20
+ AntiPattern,
21
+ DomainKnowledge,
22
+ Heuristic,
23
+ MemorySlice,
24
+ Outcome,
25
+ UserPreference,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class PriorityTier(Enum):
32
+ """
33
+ Priority tiers for attention budget allocation.
34
+
35
+ Based on context engineering guidance:
36
+ - MUST_SEE: In the window for every step, no exceptions
37
+ - SHOULD_SEE: Important but can be summarized if needed
38
+ - FETCH_ON_DEMAND: Referenced but not included; agent can request
39
+ - EXCLUDE: Never read again; processed and compressed
40
+ """
41
+
42
+ MUST_SEE = 1
43
+ SHOULD_SEE = 2
44
+ FETCH_ON_DEMAND = 3
45
+ EXCLUDE = 4
46
+
47
+
48
+ @dataclass
49
+ class BudgetConfig:
50
+ """Configuration for token budget management."""
51
+
52
+ # Total budget in tokens
53
+ max_tokens: int = 4000
54
+
55
+ # Per-tier allocation (percentages of max_tokens)
56
+ must_see_pct: float = 0.4 # 40% for critical memories
57
+ should_see_pct: float = 0.35 # 35% for important memories
58
+ fetch_on_demand_pct: float = 0.25 # 25% reserved for on-demand
59
+
60
+ # Per-type limits (within tier allocation)
61
+ max_heuristics: int = 10
62
+ max_outcomes: int = 10
63
+ max_knowledge: int = 5
64
+ max_anti_patterns: int = 5
65
+ max_preferences: int = 5
66
+
67
+ # Token estimation
68
+ chars_per_token: int = 4 # Rough estimate: 4 chars = 1 token
69
+
70
+ # Truncation settings
71
+ truncate_long_content: bool = True
72
+ max_content_chars: int = 500 # Truncate individual items
73
+
74
+ def get_tier_budget(self, tier: PriorityTier) -> int:
75
+ """Get token budget for a priority tier."""
76
+ if tier == PriorityTier.MUST_SEE:
77
+ return int(self.max_tokens * self.must_see_pct)
78
+ elif tier == PriorityTier.SHOULD_SEE:
79
+ return int(self.max_tokens * self.should_see_pct)
80
+ elif tier == PriorityTier.FETCH_ON_DEMAND:
81
+ return int(self.max_tokens * self.fetch_on_demand_pct)
82
+ return 0
83
+
84
+
85
+ @dataclass
86
+ class BudgetedItem:
87
+ """A memory item with budget metadata."""
88
+
89
+ item: Any
90
+ memory_type: str
91
+ priority: PriorityTier
92
+ estimated_tokens: int
93
+ included: bool = True
94
+ truncated: bool = False
95
+ summary_only: bool = False
96
+
97
+
98
+ @dataclass
99
+ class BudgetReport:
100
+ """Report on budget usage after retrieval."""
101
+
102
+ total_budget: int
103
+ used_tokens: int
104
+ remaining_tokens: int
105
+
106
+ # Per-tier breakdown
107
+ must_see_used: int = 0
108
+ must_see_budget: int = 0
109
+ should_see_used: int = 0
110
+ should_see_budget: int = 0
111
+ fetch_on_demand_count: int = 0
112
+
113
+ # Items by status
114
+ included_count: int = 0
115
+ excluded_count: int = 0
116
+ truncated_count: int = 0
117
+ summary_only_count: int = 0
118
+
119
+ # Warnings
120
+ budget_exceeded: bool = False
121
+ items_dropped: List[str] = field(default_factory=list)
122
+
123
+ @property
124
+ def utilization_pct(self) -> float:
125
+ """Budget utilization percentage."""
126
+ if self.total_budget == 0:
127
+ return 0.0
128
+ return (self.used_tokens / self.total_budget) * 100
129
+
130
+
131
+ class TokenEstimator:
132
+ """Estimates token count for memory items."""
133
+
134
+ def __init__(self, chars_per_token: int = 4):
135
+ self.chars_per_token = chars_per_token
136
+
137
+ def estimate(self, item: Any) -> int:
138
+ """Estimate tokens for any memory item."""
139
+ if isinstance(item, Heuristic):
140
+ return self._estimate_heuristic(item)
141
+ elif isinstance(item, Outcome):
142
+ return self._estimate_outcome(item)
143
+ elif isinstance(item, DomainKnowledge):
144
+ return self._estimate_knowledge(item)
145
+ elif isinstance(item, AntiPattern):
146
+ return self._estimate_anti_pattern(item)
147
+ elif isinstance(item, UserPreference):
148
+ return self._estimate_preference(item)
149
+ elif isinstance(item, str):
150
+ return len(item) // self.chars_per_token
151
+ return 50 # Default estimate
152
+
153
+ def _estimate_heuristic(self, h: Heuristic) -> int:
154
+ """Estimate tokens for a heuristic."""
155
+ text = f"{h.condition} {h.strategy}"
156
+ return len(text) // self.chars_per_token + 20 # +20 for metadata
157
+
158
+ def _estimate_outcome(self, o: Outcome) -> int:
159
+ """Estimate tokens for an outcome."""
160
+ text = f"{o.task_type} {o.task_description} {o.strategy_used}"
161
+ if o.error_message:
162
+ text += f" {o.error_message}"
163
+ return len(text) // self.chars_per_token + 15
164
+
165
+ def _estimate_knowledge(self, k: DomainKnowledge) -> int:
166
+ """Estimate tokens for domain knowledge."""
167
+ # Knowledge can have complex 'fact' structures
168
+ fact_str = str(k.fact) if k.fact else ""
169
+ text = f"{k.domain} {fact_str}"
170
+ return len(text) // self.chars_per_token + 10
171
+
172
+ def _estimate_anti_pattern(self, ap: AntiPattern) -> int:
173
+ """Estimate tokens for an anti-pattern."""
174
+ text = f"{ap.pattern} {ap.why_bad} {ap.better_alternative}"
175
+ return len(text) // self.chars_per_token + 15
176
+
177
+ def _estimate_preference(self, p: UserPreference) -> int:
178
+ """Estimate tokens for a preference."""
179
+ text = f"{p.category} {p.preference} {p.context or ''}"
180
+ return len(text) // self.chars_per_token + 10
181
+
182
+ def estimate_slice(self, memory_slice: MemorySlice) -> int:
183
+ """Estimate total tokens for a MemorySlice."""
184
+ total = 0
185
+ for h in memory_slice.heuristics:
186
+ total += self._estimate_heuristic(h)
187
+ for o in memory_slice.outcomes:
188
+ total += self._estimate_outcome(o)
189
+ for k in memory_slice.domain_knowledge:
190
+ total += self._estimate_knowledge(k)
191
+ for ap in memory_slice.anti_patterns:
192
+ total += self._estimate_anti_pattern(ap)
193
+ for p in memory_slice.preferences:
194
+ total += self._estimate_preference(p)
195
+ return total
196
+
197
+
198
+ class RetrievalBudget:
199
+ """
200
+ Manages token budget for memory retrieval.
201
+
202
+ Ensures retrieval results fit within context window limits
203
+ while prioritizing the most important memories.
204
+
205
+ Usage:
206
+ budget = RetrievalBudget(config=BudgetConfig(max_tokens=4000))
207
+
208
+ # Check before including
209
+ if budget.can_include(memory, priority=PriorityTier.MUST_SEE):
210
+ budget.include(memory, "heuristic", PriorityTier.MUST_SEE)
211
+
212
+ # Or process entire slice
213
+ budgeted_slice, report = budget.apply_budget(memory_slice, priorities)
214
+ """
215
+
216
+ def __init__(
217
+ self,
218
+ config: Optional[BudgetConfig] = None,
219
+ estimator: Optional[TokenEstimator] = None,
220
+ priority_classifier: Optional[Callable[[Any, str], PriorityTier]] = None,
221
+ ):
222
+ self.config = config or BudgetConfig()
223
+ self.estimator = estimator or TokenEstimator(self.config.chars_per_token)
224
+ self.priority_classifier = priority_classifier or self._default_classifier
225
+
226
+ # Tracking
227
+ self._used_tokens = 0
228
+ self._tier_usage: Dict[PriorityTier, int] = {
229
+ PriorityTier.MUST_SEE: 0,
230
+ PriorityTier.SHOULD_SEE: 0,
231
+ PriorityTier.FETCH_ON_DEMAND: 0,
232
+ }
233
+ self._items: List[BudgetedItem] = []
234
+ self._excluded: List[str] = []
235
+
236
+ def reset(self) -> None:
237
+ """Reset budget tracking for new retrieval."""
238
+ self._used_tokens = 0
239
+ self._tier_usage = {
240
+ PriorityTier.MUST_SEE: 0,
241
+ PriorityTier.SHOULD_SEE: 0,
242
+ PriorityTier.FETCH_ON_DEMAND: 0,
243
+ }
244
+ self._items = []
245
+ self._excluded = []
246
+
247
+ @property
248
+ def remaining_tokens(self) -> int:
249
+ """Tokens remaining in total budget."""
250
+ return max(0, self.config.max_tokens - self._used_tokens)
251
+
252
+ @property
253
+ def used_tokens(self) -> int:
254
+ """Tokens used so far."""
255
+ return self._used_tokens
256
+
257
+ def can_include(
258
+ self,
259
+ item: Any,
260
+ priority: PriorityTier = PriorityTier.SHOULD_SEE,
261
+ ) -> bool:
262
+ """Check if an item can be included within budget."""
263
+ if priority == PriorityTier.EXCLUDE:
264
+ return False
265
+
266
+ estimated = self.estimator.estimate(item)
267
+ tier_budget = self.config.get_tier_budget(priority)
268
+ tier_used = self._tier_usage.get(priority, 0)
269
+
270
+ # Check tier budget
271
+ if tier_used + estimated > tier_budget:
272
+ return False
273
+
274
+ # Check total budget
275
+ if self._used_tokens + estimated > self.config.max_tokens:
276
+ return False
277
+
278
+ return True
279
+
280
+ def include(
281
+ self,
282
+ item: Any,
283
+ memory_type: str,
284
+ priority: PriorityTier = PriorityTier.SHOULD_SEE,
285
+ force: bool = False,
286
+ ) -> BudgetedItem:
287
+ """
288
+ Include an item in the budget.
289
+
290
+ Args:
291
+ item: Memory item to include
292
+ memory_type: Type name (heuristic, outcome, etc.)
293
+ priority: Priority tier for allocation
294
+ force: Include even if over budget (for MUST_SEE items)
295
+
296
+ Returns:
297
+ BudgetedItem with inclusion status
298
+ """
299
+ estimated = self.estimator.estimate(item)
300
+ can_fit = self.can_include(item, priority)
301
+
302
+ budgeted = BudgetedItem(
303
+ item=item,
304
+ memory_type=memory_type,
305
+ priority=priority,
306
+ estimated_tokens=estimated,
307
+ included=can_fit or force,
308
+ )
309
+
310
+ if budgeted.included:
311
+ self._used_tokens += estimated
312
+ self._tier_usage[priority] = self._tier_usage.get(priority, 0) + estimated
313
+ else:
314
+ item_desc = f"{memory_type}:{getattr(item, 'id', 'unknown')}"
315
+ self._excluded.append(item_desc)
316
+
317
+ self._items.append(budgeted)
318
+ return budgeted
319
+
320
+ def apply_budget(
321
+ self,
322
+ memory_slice: MemorySlice,
323
+ type_priorities: Optional[Dict[str, PriorityTier]] = None,
324
+ ) -> Tuple[MemorySlice, BudgetReport]:
325
+ """
326
+ Apply budget constraints to a MemorySlice.
327
+
328
+ Args:
329
+ memory_slice: Raw retrieval results
330
+ type_priorities: Optional priority overrides per type
331
+
332
+ Returns:
333
+ Tuple of (budgeted MemorySlice, BudgetReport)
334
+ """
335
+ self.reset()
336
+
337
+ # Default priorities
338
+ priorities = type_priorities or {
339
+ "heuristic": PriorityTier.MUST_SEE,
340
+ "outcome": PriorityTier.SHOULD_SEE,
341
+ "domain_knowledge": PriorityTier.SHOULD_SEE,
342
+ "anti_pattern": PriorityTier.MUST_SEE, # Important for avoiding mistakes
343
+ "preference": PriorityTier.MUST_SEE, # User prefs are critical
344
+ }
345
+
346
+ # Process each type in priority order
347
+ included_heuristics = []
348
+ included_outcomes = []
349
+ included_knowledge = []
350
+ included_anti_patterns = []
351
+ included_preferences = []
352
+
353
+ # MUST_SEE first (preferences, anti-patterns, heuristics)
354
+ for pref in memory_slice.preferences[: self.config.max_preferences]:
355
+ budgeted = self.include(
356
+ pref, "preference", priorities.get("preference", PriorityTier.MUST_SEE)
357
+ )
358
+ if budgeted.included:
359
+ included_preferences.append(pref)
360
+
361
+ for ap in memory_slice.anti_patterns[: self.config.max_anti_patterns]:
362
+ budgeted = self.include(
363
+ ap,
364
+ "anti_pattern",
365
+ priorities.get("anti_pattern", PriorityTier.MUST_SEE),
366
+ )
367
+ if budgeted.included:
368
+ included_anti_patterns.append(ap)
369
+
370
+ for h in memory_slice.heuristics[: self.config.max_heuristics]:
371
+ budgeted = self.include(
372
+ h, "heuristic", priorities.get("heuristic", PriorityTier.MUST_SEE)
373
+ )
374
+ if budgeted.included:
375
+ included_heuristics.append(h)
376
+
377
+ # SHOULD_SEE next (outcomes, knowledge)
378
+ for o in memory_slice.outcomes[: self.config.max_outcomes]:
379
+ budgeted = self.include(
380
+ o, "outcome", priorities.get("outcome", PriorityTier.SHOULD_SEE)
381
+ )
382
+ if budgeted.included:
383
+ included_outcomes.append(o)
384
+
385
+ for k in memory_slice.domain_knowledge[: self.config.max_knowledge]:
386
+ budgeted = self.include(
387
+ k,
388
+ "domain_knowledge",
389
+ priorities.get("domain_knowledge", PriorityTier.SHOULD_SEE),
390
+ )
391
+ if budgeted.included:
392
+ included_knowledge.append(k)
393
+
394
+ # Build report
395
+ report = BudgetReport(
396
+ total_budget=self.config.max_tokens,
397
+ used_tokens=self._used_tokens,
398
+ remaining_tokens=self.remaining_tokens,
399
+ must_see_used=self._tier_usage.get(PriorityTier.MUST_SEE, 0),
400
+ must_see_budget=self.config.get_tier_budget(PriorityTier.MUST_SEE),
401
+ should_see_used=self._tier_usage.get(PriorityTier.SHOULD_SEE, 0),
402
+ should_see_budget=self.config.get_tier_budget(PriorityTier.SHOULD_SEE),
403
+ included_count=len([i for i in self._items if i.included]),
404
+ excluded_count=len([i for i in self._items if not i.included]),
405
+ truncated_count=len([i for i in self._items if i.truncated]),
406
+ summary_only_count=len([i for i in self._items if i.summary_only]),
407
+ budget_exceeded=self._used_tokens > self.config.max_tokens,
408
+ items_dropped=self._excluded,
409
+ )
410
+
411
+ # Build budgeted slice
412
+ budgeted_slice = MemorySlice(
413
+ heuristics=included_heuristics,
414
+ outcomes=included_outcomes,
415
+ preferences=included_preferences,
416
+ domain_knowledge=included_knowledge,
417
+ anti_patterns=included_anti_patterns,
418
+ query=memory_slice.query,
419
+ agent=memory_slice.agent,
420
+ retrieval_time_ms=memory_slice.retrieval_time_ms,
421
+ )
422
+
423
+ # Add budget metadata
424
+ budgeted_slice.metadata["budget_report"] = {
425
+ "total_budget": report.total_budget,
426
+ "used_tokens": report.used_tokens,
427
+ "utilization_pct": report.utilization_pct,
428
+ "items_dropped": len(report.items_dropped),
429
+ }
430
+
431
+ logger.info(
432
+ f"Budget applied: {report.used_tokens}/{report.total_budget} tokens "
433
+ f"({report.utilization_pct:.1f}%), "
434
+ f"{report.included_count} included, {report.excluded_count} excluded"
435
+ )
436
+
437
+ return budgeted_slice, report
438
+
439
+ def _default_classifier(self, item: Any, memory_type: str) -> PriorityTier:
440
+ """Default priority classification based on memory type and attributes."""
441
+ # Anti-patterns and preferences are always high priority
442
+ if memory_type in ("anti_pattern", "preference"):
443
+ return PriorityTier.MUST_SEE
444
+
445
+ # High-confidence heuristics are must-see
446
+ if memory_type == "heuristic":
447
+ if hasattr(item, "confidence") and item.confidence >= 0.8:
448
+ return PriorityTier.MUST_SEE
449
+ return PriorityTier.SHOULD_SEE
450
+
451
+ # Recent successful outcomes
452
+ if memory_type == "outcome":
453
+ if hasattr(item, "success") and item.success:
454
+ return PriorityTier.SHOULD_SEE
455
+ return PriorityTier.FETCH_ON_DEMAND
456
+
457
+ # Domain knowledge by confidence
458
+ if memory_type == "domain_knowledge":
459
+ if hasattr(item, "confidence") and item.confidence >= 0.7:
460
+ return PriorityTier.SHOULD_SEE
461
+ return PriorityTier.FETCH_ON_DEMAND
462
+
463
+ return PriorityTier.SHOULD_SEE
464
+
465
+ def get_fetch_on_demand_ids(self) -> List[str]:
466
+ """Get IDs of items marked for fetch-on-demand."""
467
+ return [
468
+ getattr(i.item, "id", None)
469
+ for i in self._items
470
+ if i.priority == PriorityTier.FETCH_ON_DEMAND and hasattr(i.item, "id")
471
+ ]
472
+
473
+
474
+ class BudgetAwareRetrieval:
475
+ """
476
+ Wrapper that adds budget management to retrieval operations.
477
+
478
+ Usage:
479
+ budget_retrieval = BudgetAwareRetrieval(
480
+ retrieval_engine,
481
+ budget_config=BudgetConfig(max_tokens=4000)
482
+ )
483
+
484
+ result, report = budget_retrieval.retrieve_with_budget(
485
+ query="...",
486
+ agent="helena",
487
+ project_id="my-project"
488
+ )
489
+ """
490
+
491
+ def __init__(
492
+ self,
493
+ retrieval_engine: Any, # RetrievalEngine
494
+ budget_config: Optional[BudgetConfig] = None,
495
+ ):
496
+ self.engine = retrieval_engine
497
+ self.budget = RetrievalBudget(config=budget_config)
498
+
499
+ def retrieve_with_budget(
500
+ self,
501
+ query: str,
502
+ agent: str,
503
+ project_id: str,
504
+ user_id: Optional[str] = None,
505
+ top_k: int = 10,
506
+ **kwargs,
507
+ ) -> Tuple[MemorySlice, BudgetReport]:
508
+ """
509
+ Retrieve memories with budget enforcement.
510
+
511
+ Returns:
512
+ Tuple of (budgeted MemorySlice, BudgetReport)
513
+ """
514
+ # Get raw results (request more than needed for budget filtering)
515
+ raw_slice = self.engine.retrieve(
516
+ query=query,
517
+ agent=agent,
518
+ project_id=project_id,
519
+ user_id=user_id,
520
+ top_k=top_k * 2, # Get extra for filtering
521
+ **kwargs,
522
+ )
523
+
524
+ # Apply budget
525
+ return self.budget.apply_budget(raw_slice)