alma-memory 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +296 -194
- alma/compression/__init__.py +33 -0
- alma/compression/pipeline.py +980 -0
- alma/confidence/__init__.py +47 -47
- alma/confidence/engine.py +540 -540
- alma/confidence/types.py +351 -351
- alma/config/loader.py +157 -157
- alma/consolidation/__init__.py +23 -23
- alma/consolidation/engine.py +678 -678
- alma/consolidation/prompts.py +84 -84
- alma/core.py +1189 -322
- alma/domains/__init__.py +30 -30
- alma/domains/factory.py +359 -359
- alma/domains/schemas.py +448 -448
- alma/domains/types.py +272 -272
- alma/events/__init__.py +75 -75
- alma/events/emitter.py +285 -284
- alma/events/storage_mixin.py +246 -246
- alma/events/types.py +126 -126
- alma/events/webhook.py +425 -425
- alma/exceptions.py +49 -49
- alma/extraction/__init__.py +31 -31
- alma/extraction/auto_learner.py +265 -264
- alma/extraction/extractor.py +420 -420
- alma/graph/__init__.py +106 -81
- alma/graph/backends/__init__.py +32 -18
- alma/graph/backends/kuzu.py +624 -0
- alma/graph/backends/memgraph.py +432 -0
- alma/graph/backends/memory.py +236 -236
- alma/graph/backends/neo4j.py +417 -417
- alma/graph/base.py +159 -159
- alma/graph/extraction.py +198 -198
- alma/graph/store.py +860 -860
- alma/harness/__init__.py +35 -35
- alma/harness/base.py +386 -386
- alma/harness/domains.py +705 -705
- alma/initializer/__init__.py +37 -37
- alma/initializer/initializer.py +418 -418
- alma/initializer/types.py +250 -250
- alma/integration/__init__.py +62 -62
- alma/integration/claude_agents.py +444 -432
- alma/integration/helena.py +423 -423
- alma/integration/victor.py +471 -471
- alma/learning/__init__.py +101 -86
- alma/learning/decay.py +878 -0
- alma/learning/forgetting.py +1446 -1446
- alma/learning/heuristic_extractor.py +390 -390
- alma/learning/protocols.py +374 -374
- alma/learning/validation.py +346 -346
- alma/mcp/__init__.py +123 -45
- alma/mcp/__main__.py +156 -156
- alma/mcp/resources.py +122 -122
- alma/mcp/server.py +955 -591
- alma/mcp/tools.py +3254 -511
- alma/observability/__init__.py +91 -0
- alma/observability/config.py +302 -0
- alma/observability/guidelines.py +170 -0
- alma/observability/logging.py +424 -0
- alma/observability/metrics.py +583 -0
- alma/observability/tracing.py +440 -0
- alma/progress/__init__.py +21 -21
- alma/progress/tracker.py +607 -607
- alma/progress/types.py +250 -250
- alma/retrieval/__init__.py +134 -53
- alma/retrieval/budget.py +525 -0
- alma/retrieval/cache.py +1304 -1061
- alma/retrieval/embeddings.py +202 -202
- alma/retrieval/engine.py +850 -366
- alma/retrieval/modes.py +365 -0
- alma/retrieval/progressive.py +560 -0
- alma/retrieval/scoring.py +344 -344
- alma/retrieval/trust_scoring.py +637 -0
- alma/retrieval/verification.py +797 -0
- alma/session/__init__.py +19 -19
- alma/session/manager.py +442 -399
- alma/session/types.py +288 -288
- alma/storage/__init__.py +101 -61
- alma/storage/archive.py +233 -0
- alma/storage/azure_cosmos.py +1259 -1048
- alma/storage/base.py +1083 -525
- alma/storage/chroma.py +1443 -1443
- alma/storage/constants.py +103 -0
- alma/storage/file_based.py +614 -619
- alma/storage/migrations/__init__.py +21 -0
- alma/storage/migrations/base.py +321 -0
- alma/storage/migrations/runner.py +323 -0
- alma/storage/migrations/version_stores.py +337 -0
- alma/storage/migrations/versions/__init__.py +11 -0
- alma/storage/migrations/versions/v1_0_0.py +373 -0
- alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
- alma/storage/pinecone.py +1080 -1080
- alma/storage/postgresql.py +1948 -1452
- alma/storage/qdrant.py +1306 -1306
- alma/storage/sqlite_local.py +3041 -1358
- alma/testing/__init__.py +46 -0
- alma/testing/factories.py +301 -0
- alma/testing/mocks.py +389 -0
- alma/types.py +292 -264
- alma/utils/__init__.py +19 -0
- alma/utils/tokenizer.py +521 -0
- alma/workflow/__init__.py +83 -0
- alma/workflow/artifacts.py +170 -0
- alma/workflow/checkpoint.py +311 -0
- alma/workflow/context.py +228 -0
- alma/workflow/outcomes.py +189 -0
- alma/workflow/reducers.py +393 -0
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/METADATA +244 -72
- alma_memory-0.7.0.dist-info/RECORD +112 -0
- alma_memory-0.5.0.dist-info/RECORD +0 -76
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
alma/retrieval/budget.py
ADDED
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Token Budget Management.
|
|
3
|
+
|
|
4
|
+
Implements attention budget tracking for retrieval to prevent context overflow.
|
|
5
|
+
Based on context engineering principles: "Context window space is finite and expensive."
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Token counting with configurable estimator
|
|
9
|
+
- Priority-based inclusion (must-see → should-see → fetch-on-demand)
|
|
10
|
+
- Budget enforcement with graceful degradation
|
|
11
|
+
- Tracking and metrics for optimization
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
18
|
+
|
|
19
|
+
from alma.types import (
|
|
20
|
+
AntiPattern,
|
|
21
|
+
DomainKnowledge,
|
|
22
|
+
Heuristic,
|
|
23
|
+
MemorySlice,
|
|
24
|
+
Outcome,
|
|
25
|
+
UserPreference,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PriorityTier(Enum):
|
|
32
|
+
"""
|
|
33
|
+
Priority tiers for attention budget allocation.
|
|
34
|
+
|
|
35
|
+
Based on context engineering guidance:
|
|
36
|
+
- MUST_SEE: In the window for every step, no exceptions
|
|
37
|
+
- SHOULD_SEE: Important but can be summarized if needed
|
|
38
|
+
- FETCH_ON_DEMAND: Referenced but not included; agent can request
|
|
39
|
+
- EXCLUDE: Never read again; processed and compressed
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
MUST_SEE = 1
|
|
43
|
+
SHOULD_SEE = 2
|
|
44
|
+
FETCH_ON_DEMAND = 3
|
|
45
|
+
EXCLUDE = 4
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class BudgetConfig:
|
|
50
|
+
"""Configuration for token budget management."""
|
|
51
|
+
|
|
52
|
+
# Total budget in tokens
|
|
53
|
+
max_tokens: int = 4000
|
|
54
|
+
|
|
55
|
+
# Per-tier allocation (percentages of max_tokens)
|
|
56
|
+
must_see_pct: float = 0.4 # 40% for critical memories
|
|
57
|
+
should_see_pct: float = 0.35 # 35% for important memories
|
|
58
|
+
fetch_on_demand_pct: float = 0.25 # 25% reserved for on-demand
|
|
59
|
+
|
|
60
|
+
# Per-type limits (within tier allocation)
|
|
61
|
+
max_heuristics: int = 10
|
|
62
|
+
max_outcomes: int = 10
|
|
63
|
+
max_knowledge: int = 5
|
|
64
|
+
max_anti_patterns: int = 5
|
|
65
|
+
max_preferences: int = 5
|
|
66
|
+
|
|
67
|
+
# Token estimation
|
|
68
|
+
chars_per_token: int = 4 # Rough estimate: 4 chars = 1 token
|
|
69
|
+
|
|
70
|
+
# Truncation settings
|
|
71
|
+
truncate_long_content: bool = True
|
|
72
|
+
max_content_chars: int = 500 # Truncate individual items
|
|
73
|
+
|
|
74
|
+
def get_tier_budget(self, tier: PriorityTier) -> int:
|
|
75
|
+
"""Get token budget for a priority tier."""
|
|
76
|
+
if tier == PriorityTier.MUST_SEE:
|
|
77
|
+
return int(self.max_tokens * self.must_see_pct)
|
|
78
|
+
elif tier == PriorityTier.SHOULD_SEE:
|
|
79
|
+
return int(self.max_tokens * self.should_see_pct)
|
|
80
|
+
elif tier == PriorityTier.FETCH_ON_DEMAND:
|
|
81
|
+
return int(self.max_tokens * self.fetch_on_demand_pct)
|
|
82
|
+
return 0
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class BudgetedItem:
|
|
87
|
+
"""A memory item with budget metadata."""
|
|
88
|
+
|
|
89
|
+
item: Any
|
|
90
|
+
memory_type: str
|
|
91
|
+
priority: PriorityTier
|
|
92
|
+
estimated_tokens: int
|
|
93
|
+
included: bool = True
|
|
94
|
+
truncated: bool = False
|
|
95
|
+
summary_only: bool = False
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass
|
|
99
|
+
class BudgetReport:
|
|
100
|
+
"""Report on budget usage after retrieval."""
|
|
101
|
+
|
|
102
|
+
total_budget: int
|
|
103
|
+
used_tokens: int
|
|
104
|
+
remaining_tokens: int
|
|
105
|
+
|
|
106
|
+
# Per-tier breakdown
|
|
107
|
+
must_see_used: int = 0
|
|
108
|
+
must_see_budget: int = 0
|
|
109
|
+
should_see_used: int = 0
|
|
110
|
+
should_see_budget: int = 0
|
|
111
|
+
fetch_on_demand_count: int = 0
|
|
112
|
+
|
|
113
|
+
# Items by status
|
|
114
|
+
included_count: int = 0
|
|
115
|
+
excluded_count: int = 0
|
|
116
|
+
truncated_count: int = 0
|
|
117
|
+
summary_only_count: int = 0
|
|
118
|
+
|
|
119
|
+
# Warnings
|
|
120
|
+
budget_exceeded: bool = False
|
|
121
|
+
items_dropped: List[str] = field(default_factory=list)
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def utilization_pct(self) -> float:
|
|
125
|
+
"""Budget utilization percentage."""
|
|
126
|
+
if self.total_budget == 0:
|
|
127
|
+
return 0.0
|
|
128
|
+
return (self.used_tokens / self.total_budget) * 100
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class TokenEstimator:
|
|
132
|
+
"""Estimates token count for memory items."""
|
|
133
|
+
|
|
134
|
+
def __init__(self, chars_per_token: int = 4):
|
|
135
|
+
self.chars_per_token = chars_per_token
|
|
136
|
+
|
|
137
|
+
def estimate(self, item: Any) -> int:
|
|
138
|
+
"""Estimate tokens for any memory item."""
|
|
139
|
+
if isinstance(item, Heuristic):
|
|
140
|
+
return self._estimate_heuristic(item)
|
|
141
|
+
elif isinstance(item, Outcome):
|
|
142
|
+
return self._estimate_outcome(item)
|
|
143
|
+
elif isinstance(item, DomainKnowledge):
|
|
144
|
+
return self._estimate_knowledge(item)
|
|
145
|
+
elif isinstance(item, AntiPattern):
|
|
146
|
+
return self._estimate_anti_pattern(item)
|
|
147
|
+
elif isinstance(item, UserPreference):
|
|
148
|
+
return self._estimate_preference(item)
|
|
149
|
+
elif isinstance(item, str):
|
|
150
|
+
return len(item) // self.chars_per_token
|
|
151
|
+
return 50 # Default estimate
|
|
152
|
+
|
|
153
|
+
def _estimate_heuristic(self, h: Heuristic) -> int:
|
|
154
|
+
"""Estimate tokens for a heuristic."""
|
|
155
|
+
text = f"{h.condition} {h.strategy}"
|
|
156
|
+
return len(text) // self.chars_per_token + 20 # +20 for metadata
|
|
157
|
+
|
|
158
|
+
def _estimate_outcome(self, o: Outcome) -> int:
|
|
159
|
+
"""Estimate tokens for an outcome."""
|
|
160
|
+
text = f"{o.task_type} {o.task_description} {o.strategy_used}"
|
|
161
|
+
if o.error_message:
|
|
162
|
+
text += f" {o.error_message}"
|
|
163
|
+
return len(text) // self.chars_per_token + 15
|
|
164
|
+
|
|
165
|
+
def _estimate_knowledge(self, k: DomainKnowledge) -> int:
|
|
166
|
+
"""Estimate tokens for domain knowledge."""
|
|
167
|
+
# Knowledge can have complex 'fact' structures
|
|
168
|
+
fact_str = str(k.fact) if k.fact else ""
|
|
169
|
+
text = f"{k.domain} {fact_str}"
|
|
170
|
+
return len(text) // self.chars_per_token + 10
|
|
171
|
+
|
|
172
|
+
def _estimate_anti_pattern(self, ap: AntiPattern) -> int:
|
|
173
|
+
"""Estimate tokens for an anti-pattern."""
|
|
174
|
+
text = f"{ap.pattern} {ap.why_bad} {ap.better_alternative}"
|
|
175
|
+
return len(text) // self.chars_per_token + 15
|
|
176
|
+
|
|
177
|
+
def _estimate_preference(self, p: UserPreference) -> int:
|
|
178
|
+
"""Estimate tokens for a preference."""
|
|
179
|
+
text = f"{p.category} {p.preference} {p.context or ''}"
|
|
180
|
+
return len(text) // self.chars_per_token + 10
|
|
181
|
+
|
|
182
|
+
def estimate_slice(self, memory_slice: MemorySlice) -> int:
|
|
183
|
+
"""Estimate total tokens for a MemorySlice."""
|
|
184
|
+
total = 0
|
|
185
|
+
for h in memory_slice.heuristics:
|
|
186
|
+
total += self._estimate_heuristic(h)
|
|
187
|
+
for o in memory_slice.outcomes:
|
|
188
|
+
total += self._estimate_outcome(o)
|
|
189
|
+
for k in memory_slice.domain_knowledge:
|
|
190
|
+
total += self._estimate_knowledge(k)
|
|
191
|
+
for ap in memory_slice.anti_patterns:
|
|
192
|
+
total += self._estimate_anti_pattern(ap)
|
|
193
|
+
for p in memory_slice.preferences:
|
|
194
|
+
total += self._estimate_preference(p)
|
|
195
|
+
return total
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class RetrievalBudget:
|
|
199
|
+
"""
|
|
200
|
+
Manages token budget for memory retrieval.
|
|
201
|
+
|
|
202
|
+
Ensures retrieval results fit within context window limits
|
|
203
|
+
while prioritizing the most important memories.
|
|
204
|
+
|
|
205
|
+
Usage:
|
|
206
|
+
budget = RetrievalBudget(config=BudgetConfig(max_tokens=4000))
|
|
207
|
+
|
|
208
|
+
# Check before including
|
|
209
|
+
if budget.can_include(memory, priority=PriorityTier.MUST_SEE):
|
|
210
|
+
budget.include(memory, "heuristic", PriorityTier.MUST_SEE)
|
|
211
|
+
|
|
212
|
+
# Or process entire slice
|
|
213
|
+
budgeted_slice, report = budget.apply_budget(memory_slice, priorities)
|
|
214
|
+
"""
|
|
215
|
+
|
|
216
|
+
def __init__(
|
|
217
|
+
self,
|
|
218
|
+
config: Optional[BudgetConfig] = None,
|
|
219
|
+
estimator: Optional[TokenEstimator] = None,
|
|
220
|
+
priority_classifier: Optional[Callable[[Any, str], PriorityTier]] = None,
|
|
221
|
+
):
|
|
222
|
+
self.config = config or BudgetConfig()
|
|
223
|
+
self.estimator = estimator or TokenEstimator(self.config.chars_per_token)
|
|
224
|
+
self.priority_classifier = priority_classifier or self._default_classifier
|
|
225
|
+
|
|
226
|
+
# Tracking
|
|
227
|
+
self._used_tokens = 0
|
|
228
|
+
self._tier_usage: Dict[PriorityTier, int] = {
|
|
229
|
+
PriorityTier.MUST_SEE: 0,
|
|
230
|
+
PriorityTier.SHOULD_SEE: 0,
|
|
231
|
+
PriorityTier.FETCH_ON_DEMAND: 0,
|
|
232
|
+
}
|
|
233
|
+
self._items: List[BudgetedItem] = []
|
|
234
|
+
self._excluded: List[str] = []
|
|
235
|
+
|
|
236
|
+
def reset(self) -> None:
|
|
237
|
+
"""Reset budget tracking for new retrieval."""
|
|
238
|
+
self._used_tokens = 0
|
|
239
|
+
self._tier_usage = {
|
|
240
|
+
PriorityTier.MUST_SEE: 0,
|
|
241
|
+
PriorityTier.SHOULD_SEE: 0,
|
|
242
|
+
PriorityTier.FETCH_ON_DEMAND: 0,
|
|
243
|
+
}
|
|
244
|
+
self._items = []
|
|
245
|
+
self._excluded = []
|
|
246
|
+
|
|
247
|
+
@property
|
|
248
|
+
def remaining_tokens(self) -> int:
|
|
249
|
+
"""Tokens remaining in total budget."""
|
|
250
|
+
return max(0, self.config.max_tokens - self._used_tokens)
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def used_tokens(self) -> int:
|
|
254
|
+
"""Tokens used so far."""
|
|
255
|
+
return self._used_tokens
|
|
256
|
+
|
|
257
|
+
def can_include(
|
|
258
|
+
self,
|
|
259
|
+
item: Any,
|
|
260
|
+
priority: PriorityTier = PriorityTier.SHOULD_SEE,
|
|
261
|
+
) -> bool:
|
|
262
|
+
"""Check if an item can be included within budget."""
|
|
263
|
+
if priority == PriorityTier.EXCLUDE:
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
estimated = self.estimator.estimate(item)
|
|
267
|
+
tier_budget = self.config.get_tier_budget(priority)
|
|
268
|
+
tier_used = self._tier_usage.get(priority, 0)
|
|
269
|
+
|
|
270
|
+
# Check tier budget
|
|
271
|
+
if tier_used + estimated > tier_budget:
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
# Check total budget
|
|
275
|
+
if self._used_tokens + estimated > self.config.max_tokens:
|
|
276
|
+
return False
|
|
277
|
+
|
|
278
|
+
return True
|
|
279
|
+
|
|
280
|
+
def include(
|
|
281
|
+
self,
|
|
282
|
+
item: Any,
|
|
283
|
+
memory_type: str,
|
|
284
|
+
priority: PriorityTier = PriorityTier.SHOULD_SEE,
|
|
285
|
+
force: bool = False,
|
|
286
|
+
) -> BudgetedItem:
|
|
287
|
+
"""
|
|
288
|
+
Include an item in the budget.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
item: Memory item to include
|
|
292
|
+
memory_type: Type name (heuristic, outcome, etc.)
|
|
293
|
+
priority: Priority tier for allocation
|
|
294
|
+
force: Include even if over budget (for MUST_SEE items)
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
BudgetedItem with inclusion status
|
|
298
|
+
"""
|
|
299
|
+
estimated = self.estimator.estimate(item)
|
|
300
|
+
can_fit = self.can_include(item, priority)
|
|
301
|
+
|
|
302
|
+
budgeted = BudgetedItem(
|
|
303
|
+
item=item,
|
|
304
|
+
memory_type=memory_type,
|
|
305
|
+
priority=priority,
|
|
306
|
+
estimated_tokens=estimated,
|
|
307
|
+
included=can_fit or force,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
if budgeted.included:
|
|
311
|
+
self._used_tokens += estimated
|
|
312
|
+
self._tier_usage[priority] = self._tier_usage.get(priority, 0) + estimated
|
|
313
|
+
else:
|
|
314
|
+
item_desc = f"{memory_type}:{getattr(item, 'id', 'unknown')}"
|
|
315
|
+
self._excluded.append(item_desc)
|
|
316
|
+
|
|
317
|
+
self._items.append(budgeted)
|
|
318
|
+
return budgeted
|
|
319
|
+
|
|
320
|
+
def apply_budget(
|
|
321
|
+
self,
|
|
322
|
+
memory_slice: MemorySlice,
|
|
323
|
+
type_priorities: Optional[Dict[str, PriorityTier]] = None,
|
|
324
|
+
) -> Tuple[MemorySlice, BudgetReport]:
|
|
325
|
+
"""
|
|
326
|
+
Apply budget constraints to a MemorySlice.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
memory_slice: Raw retrieval results
|
|
330
|
+
type_priorities: Optional priority overrides per type
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Tuple of (budgeted MemorySlice, BudgetReport)
|
|
334
|
+
"""
|
|
335
|
+
self.reset()
|
|
336
|
+
|
|
337
|
+
# Default priorities
|
|
338
|
+
priorities = type_priorities or {
|
|
339
|
+
"heuristic": PriorityTier.MUST_SEE,
|
|
340
|
+
"outcome": PriorityTier.SHOULD_SEE,
|
|
341
|
+
"domain_knowledge": PriorityTier.SHOULD_SEE,
|
|
342
|
+
"anti_pattern": PriorityTier.MUST_SEE, # Important for avoiding mistakes
|
|
343
|
+
"preference": PriorityTier.MUST_SEE, # User prefs are critical
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
# Process each type in priority order
|
|
347
|
+
included_heuristics = []
|
|
348
|
+
included_outcomes = []
|
|
349
|
+
included_knowledge = []
|
|
350
|
+
included_anti_patterns = []
|
|
351
|
+
included_preferences = []
|
|
352
|
+
|
|
353
|
+
# MUST_SEE first (preferences, anti-patterns, heuristics)
|
|
354
|
+
for pref in memory_slice.preferences[: self.config.max_preferences]:
|
|
355
|
+
budgeted = self.include(
|
|
356
|
+
pref, "preference", priorities.get("preference", PriorityTier.MUST_SEE)
|
|
357
|
+
)
|
|
358
|
+
if budgeted.included:
|
|
359
|
+
included_preferences.append(pref)
|
|
360
|
+
|
|
361
|
+
for ap in memory_slice.anti_patterns[: self.config.max_anti_patterns]:
|
|
362
|
+
budgeted = self.include(
|
|
363
|
+
ap,
|
|
364
|
+
"anti_pattern",
|
|
365
|
+
priorities.get("anti_pattern", PriorityTier.MUST_SEE),
|
|
366
|
+
)
|
|
367
|
+
if budgeted.included:
|
|
368
|
+
included_anti_patterns.append(ap)
|
|
369
|
+
|
|
370
|
+
for h in memory_slice.heuristics[: self.config.max_heuristics]:
|
|
371
|
+
budgeted = self.include(
|
|
372
|
+
h, "heuristic", priorities.get("heuristic", PriorityTier.MUST_SEE)
|
|
373
|
+
)
|
|
374
|
+
if budgeted.included:
|
|
375
|
+
included_heuristics.append(h)
|
|
376
|
+
|
|
377
|
+
# SHOULD_SEE next (outcomes, knowledge)
|
|
378
|
+
for o in memory_slice.outcomes[: self.config.max_outcomes]:
|
|
379
|
+
budgeted = self.include(
|
|
380
|
+
o, "outcome", priorities.get("outcome", PriorityTier.SHOULD_SEE)
|
|
381
|
+
)
|
|
382
|
+
if budgeted.included:
|
|
383
|
+
included_outcomes.append(o)
|
|
384
|
+
|
|
385
|
+
for k in memory_slice.domain_knowledge[: self.config.max_knowledge]:
|
|
386
|
+
budgeted = self.include(
|
|
387
|
+
k,
|
|
388
|
+
"domain_knowledge",
|
|
389
|
+
priorities.get("domain_knowledge", PriorityTier.SHOULD_SEE),
|
|
390
|
+
)
|
|
391
|
+
if budgeted.included:
|
|
392
|
+
included_knowledge.append(k)
|
|
393
|
+
|
|
394
|
+
# Build report
|
|
395
|
+
report = BudgetReport(
|
|
396
|
+
total_budget=self.config.max_tokens,
|
|
397
|
+
used_tokens=self._used_tokens,
|
|
398
|
+
remaining_tokens=self.remaining_tokens,
|
|
399
|
+
must_see_used=self._tier_usage.get(PriorityTier.MUST_SEE, 0),
|
|
400
|
+
must_see_budget=self.config.get_tier_budget(PriorityTier.MUST_SEE),
|
|
401
|
+
should_see_used=self._tier_usage.get(PriorityTier.SHOULD_SEE, 0),
|
|
402
|
+
should_see_budget=self.config.get_tier_budget(PriorityTier.SHOULD_SEE),
|
|
403
|
+
included_count=len([i for i in self._items if i.included]),
|
|
404
|
+
excluded_count=len([i for i in self._items if not i.included]),
|
|
405
|
+
truncated_count=len([i for i in self._items if i.truncated]),
|
|
406
|
+
summary_only_count=len([i for i in self._items if i.summary_only]),
|
|
407
|
+
budget_exceeded=self._used_tokens > self.config.max_tokens,
|
|
408
|
+
items_dropped=self._excluded,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# Build budgeted slice
|
|
412
|
+
budgeted_slice = MemorySlice(
|
|
413
|
+
heuristics=included_heuristics,
|
|
414
|
+
outcomes=included_outcomes,
|
|
415
|
+
preferences=included_preferences,
|
|
416
|
+
domain_knowledge=included_knowledge,
|
|
417
|
+
anti_patterns=included_anti_patterns,
|
|
418
|
+
query=memory_slice.query,
|
|
419
|
+
agent=memory_slice.agent,
|
|
420
|
+
retrieval_time_ms=memory_slice.retrieval_time_ms,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
# Add budget metadata
|
|
424
|
+
budgeted_slice.metadata["budget_report"] = {
|
|
425
|
+
"total_budget": report.total_budget,
|
|
426
|
+
"used_tokens": report.used_tokens,
|
|
427
|
+
"utilization_pct": report.utilization_pct,
|
|
428
|
+
"items_dropped": len(report.items_dropped),
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
logger.info(
|
|
432
|
+
f"Budget applied: {report.used_tokens}/{report.total_budget} tokens "
|
|
433
|
+
f"({report.utilization_pct:.1f}%), "
|
|
434
|
+
f"{report.included_count} included, {report.excluded_count} excluded"
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
return budgeted_slice, report
|
|
438
|
+
|
|
439
|
+
def _default_classifier(self, item: Any, memory_type: str) -> PriorityTier:
|
|
440
|
+
"""Default priority classification based on memory type and attributes."""
|
|
441
|
+
# Anti-patterns and preferences are always high priority
|
|
442
|
+
if memory_type in ("anti_pattern", "preference"):
|
|
443
|
+
return PriorityTier.MUST_SEE
|
|
444
|
+
|
|
445
|
+
# High-confidence heuristics are must-see
|
|
446
|
+
if memory_type == "heuristic":
|
|
447
|
+
if hasattr(item, "confidence") and item.confidence >= 0.8:
|
|
448
|
+
return PriorityTier.MUST_SEE
|
|
449
|
+
return PriorityTier.SHOULD_SEE
|
|
450
|
+
|
|
451
|
+
# Recent successful outcomes
|
|
452
|
+
if memory_type == "outcome":
|
|
453
|
+
if hasattr(item, "success") and item.success:
|
|
454
|
+
return PriorityTier.SHOULD_SEE
|
|
455
|
+
return PriorityTier.FETCH_ON_DEMAND
|
|
456
|
+
|
|
457
|
+
# Domain knowledge by confidence
|
|
458
|
+
if memory_type == "domain_knowledge":
|
|
459
|
+
if hasattr(item, "confidence") and item.confidence >= 0.7:
|
|
460
|
+
return PriorityTier.SHOULD_SEE
|
|
461
|
+
return PriorityTier.FETCH_ON_DEMAND
|
|
462
|
+
|
|
463
|
+
return PriorityTier.SHOULD_SEE
|
|
464
|
+
|
|
465
|
+
def get_fetch_on_demand_ids(self) -> List[str]:
|
|
466
|
+
"""Get IDs of items marked for fetch-on-demand."""
|
|
467
|
+
return [
|
|
468
|
+
getattr(i.item, "id", None)
|
|
469
|
+
for i in self._items
|
|
470
|
+
if i.priority == PriorityTier.FETCH_ON_DEMAND and hasattr(i.item, "id")
|
|
471
|
+
]
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
class BudgetAwareRetrieval:
|
|
475
|
+
"""
|
|
476
|
+
Wrapper that adds budget management to retrieval operations.
|
|
477
|
+
|
|
478
|
+
Usage:
|
|
479
|
+
budget_retrieval = BudgetAwareRetrieval(
|
|
480
|
+
retrieval_engine,
|
|
481
|
+
budget_config=BudgetConfig(max_tokens=4000)
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
result, report = budget_retrieval.retrieve_with_budget(
|
|
485
|
+
query="...",
|
|
486
|
+
agent="helena",
|
|
487
|
+
project_id="my-project"
|
|
488
|
+
)
|
|
489
|
+
"""
|
|
490
|
+
|
|
491
|
+
def __init__(
|
|
492
|
+
self,
|
|
493
|
+
retrieval_engine: Any, # RetrievalEngine
|
|
494
|
+
budget_config: Optional[BudgetConfig] = None,
|
|
495
|
+
):
|
|
496
|
+
self.engine = retrieval_engine
|
|
497
|
+
self.budget = RetrievalBudget(config=budget_config)
|
|
498
|
+
|
|
499
|
+
def retrieve_with_budget(
|
|
500
|
+
self,
|
|
501
|
+
query: str,
|
|
502
|
+
agent: str,
|
|
503
|
+
project_id: str,
|
|
504
|
+
user_id: Optional[str] = None,
|
|
505
|
+
top_k: int = 10,
|
|
506
|
+
**kwargs,
|
|
507
|
+
) -> Tuple[MemorySlice, BudgetReport]:
|
|
508
|
+
"""
|
|
509
|
+
Retrieve memories with budget enforcement.
|
|
510
|
+
|
|
511
|
+
Returns:
|
|
512
|
+
Tuple of (budgeted MemorySlice, BudgetReport)
|
|
513
|
+
"""
|
|
514
|
+
# Get raw results (request more than needed for budget filtering)
|
|
515
|
+
raw_slice = self.engine.retrieve(
|
|
516
|
+
query=query,
|
|
517
|
+
agent=agent,
|
|
518
|
+
project_id=project_id,
|
|
519
|
+
user_id=user_id,
|
|
520
|
+
top_k=top_k * 2, # Get extra for filtering
|
|
521
|
+
**kwargs,
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
# Apply budget
|
|
525
|
+
return self.budget.apply_budget(raw_slice)
|