alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +296 -226
- alma/compression/__init__.py +33 -0
- alma/compression/pipeline.py +980 -0
- alma/confidence/__init__.py +47 -47
- alma/confidence/engine.py +540 -540
- alma/confidence/types.py +351 -351
- alma/config/loader.py +157 -157
- alma/consolidation/__init__.py +23 -23
- alma/consolidation/engine.py +678 -678
- alma/consolidation/prompts.py +84 -84
- alma/core.py +1189 -430
- alma/domains/__init__.py +30 -30
- alma/domains/factory.py +359 -359
- alma/domains/schemas.py +448 -448
- alma/domains/types.py +272 -272
- alma/events/__init__.py +75 -75
- alma/events/emitter.py +285 -284
- alma/events/storage_mixin.py +246 -246
- alma/events/types.py +126 -126
- alma/events/webhook.py +425 -425
- alma/exceptions.py +49 -49
- alma/extraction/__init__.py +31 -31
- alma/extraction/auto_learner.py +265 -265
- alma/extraction/extractor.py +420 -420
- alma/graph/__init__.py +106 -106
- alma/graph/backends/__init__.py +32 -32
- alma/graph/backends/kuzu.py +624 -624
- alma/graph/backends/memgraph.py +432 -432
- alma/graph/backends/memory.py +236 -236
- alma/graph/backends/neo4j.py +417 -417
- alma/graph/base.py +159 -159
- alma/graph/extraction.py +198 -198
- alma/graph/store.py +860 -860
- alma/harness/__init__.py +35 -35
- alma/harness/base.py +386 -386
- alma/harness/domains.py +705 -705
- alma/initializer/__init__.py +37 -37
- alma/initializer/initializer.py +418 -418
- alma/initializer/types.py +250 -250
- alma/integration/__init__.py +62 -62
- alma/integration/claude_agents.py +444 -444
- alma/integration/helena.py +423 -423
- alma/integration/victor.py +471 -471
- alma/learning/__init__.py +101 -86
- alma/learning/decay.py +878 -0
- alma/learning/forgetting.py +1446 -1446
- alma/learning/heuristic_extractor.py +390 -390
- alma/learning/protocols.py +374 -374
- alma/learning/validation.py +346 -346
- alma/mcp/__init__.py +123 -45
- alma/mcp/__main__.py +156 -156
- alma/mcp/resources.py +122 -122
- alma/mcp/server.py +955 -591
- alma/mcp/tools.py +3254 -509
- alma/observability/__init__.py +91 -84
- alma/observability/config.py +302 -302
- alma/observability/guidelines.py +170 -0
- alma/observability/logging.py +424 -424
- alma/observability/metrics.py +583 -583
- alma/observability/tracing.py +440 -440
- alma/progress/__init__.py +21 -21
- alma/progress/tracker.py +607 -607
- alma/progress/types.py +250 -250
- alma/retrieval/__init__.py +134 -53
- alma/retrieval/budget.py +525 -0
- alma/retrieval/cache.py +1304 -1061
- alma/retrieval/embeddings.py +202 -202
- alma/retrieval/engine.py +850 -427
- alma/retrieval/modes.py +365 -0
- alma/retrieval/progressive.py +560 -0
- alma/retrieval/scoring.py +344 -344
- alma/retrieval/trust_scoring.py +637 -0
- alma/retrieval/verification.py +797 -0
- alma/session/__init__.py +19 -19
- alma/session/manager.py +442 -399
- alma/session/types.py +288 -288
- alma/storage/__init__.py +101 -90
- alma/storage/archive.py +233 -0
- alma/storage/azure_cosmos.py +1259 -1259
- alma/storage/base.py +1083 -583
- alma/storage/chroma.py +1443 -1443
- alma/storage/constants.py +103 -103
- alma/storage/file_based.py +614 -614
- alma/storage/migrations/__init__.py +21 -21
- alma/storage/migrations/base.py +321 -321
- alma/storage/migrations/runner.py +323 -323
- alma/storage/migrations/version_stores.py +337 -337
- alma/storage/migrations/versions/__init__.py +11 -11
- alma/storage/migrations/versions/v1_0_0.py +373 -373
- alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
- alma/storage/pinecone.py +1080 -1080
- alma/storage/postgresql.py +1948 -1559
- alma/storage/qdrant.py +1306 -1306
- alma/storage/sqlite_local.py +3041 -1457
- alma/testing/__init__.py +46 -46
- alma/testing/factories.py +301 -301
- alma/testing/mocks.py +389 -389
- alma/types.py +292 -264
- alma/utils/__init__.py +19 -0
- alma/utils/tokenizer.py +521 -0
- alma/workflow/__init__.py +83 -0
- alma/workflow/artifacts.py +170 -0
- alma/workflow/checkpoint.py +311 -0
- alma/workflow/context.py +228 -0
- alma/workflow/outcomes.py +189 -0
- alma/workflow/reducers.py +393 -0
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
- alma_memory-0.7.0.dist-info/RECORD +112 -0
- alma_memory-0.5.1.dist-info/RECORD +0 -93
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
alma/retrieval/scoring.py
CHANGED
|
@@ -1,344 +1,344 @@
|
|
|
1
|
-
"""
|
|
2
|
-
ALMA Memory Scoring.
|
|
3
|
-
|
|
4
|
-
Combines semantic similarity, recency, and success rate for optimal retrieval.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import math
|
|
8
|
-
from dataclasses import dataclass
|
|
9
|
-
from datetime import datetime, timezone
|
|
10
|
-
from typing import Any, List, Optional
|
|
11
|
-
|
|
12
|
-
from alma.types import AntiPattern, DomainKnowledge, Heuristic, Outcome
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclass
|
|
16
|
-
class ScoringWeights:
|
|
17
|
-
"""
|
|
18
|
-
Configurable weights for memory scoring.
|
|
19
|
-
|
|
20
|
-
All weights should sum to 1.0 for normalized scores.
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
similarity: float = 0.4 # Semantic relevance to query
|
|
24
|
-
recency: float = 0.3 # How recently the memory was validated/used
|
|
25
|
-
success_rate: float = 0.2 # Historical success rate
|
|
26
|
-
confidence: float = 0.1 # Stored confidence score
|
|
27
|
-
|
|
28
|
-
def __post_init__(self):
|
|
29
|
-
"""Validate weights sum to approximately 1.0."""
|
|
30
|
-
total = self.similarity + self.recency + self.success_rate + self.confidence
|
|
31
|
-
if not (0.99 <= total <= 1.01):
|
|
32
|
-
# Normalize if not summing to 1
|
|
33
|
-
self.similarity /= total
|
|
34
|
-
self.recency /= total
|
|
35
|
-
self.success_rate /= total
|
|
36
|
-
self.confidence /= total
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
@dataclass
|
|
40
|
-
class ScoredItem:
|
|
41
|
-
"""A memory item with its computed score."""
|
|
42
|
-
|
|
43
|
-
item: Any
|
|
44
|
-
score: float
|
|
45
|
-
similarity_score: float
|
|
46
|
-
recency_score: float
|
|
47
|
-
success_score: float
|
|
48
|
-
confidence_score: float
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class MemoryScorer:
|
|
52
|
-
"""
|
|
53
|
-
Scores memories based on multiple factors for optimal retrieval.
|
|
54
|
-
|
|
55
|
-
Factors:
|
|
56
|
-
- Semantic similarity (from vector search)
|
|
57
|
-
- Recency (newer memories preferred, with decay)
|
|
58
|
-
- Success rate (for heuristics and outcomes)
|
|
59
|
-
- Confidence (stored confidence values)
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
def __init__(
|
|
63
|
-
self,
|
|
64
|
-
weights: Optional[ScoringWeights] = None,
|
|
65
|
-
recency_half_life_days: float = 30.0,
|
|
66
|
-
):
|
|
67
|
-
"""
|
|
68
|
-
Initialize scorer.
|
|
69
|
-
|
|
70
|
-
Args:
|
|
71
|
-
weights: Scoring weights for each factor
|
|
72
|
-
recency_half_life_days: Days after which recency score is halved
|
|
73
|
-
"""
|
|
74
|
-
self.weights = weights or ScoringWeights()
|
|
75
|
-
self.recency_half_life = recency_half_life_days
|
|
76
|
-
|
|
77
|
-
def score_heuristics(
|
|
78
|
-
self,
|
|
79
|
-
heuristics: List[Heuristic],
|
|
80
|
-
similarities: Optional[List[float]] = None,
|
|
81
|
-
) -> List[ScoredItem]:
|
|
82
|
-
"""
|
|
83
|
-
Score and rank heuristics.
|
|
84
|
-
|
|
85
|
-
Args:
|
|
86
|
-
heuristics: List of heuristics to score
|
|
87
|
-
similarities: Optional pre-computed similarity scores (0-1)
|
|
88
|
-
|
|
89
|
-
Returns:
|
|
90
|
-
Sorted list of ScoredItems (highest first)
|
|
91
|
-
"""
|
|
92
|
-
if not heuristics:
|
|
93
|
-
return []
|
|
94
|
-
|
|
95
|
-
similarities = similarities or [1.0] * len(heuristics)
|
|
96
|
-
scored = []
|
|
97
|
-
|
|
98
|
-
for h, sim in zip(heuristics, similarities, strict=False):
|
|
99
|
-
recency = self._compute_recency_score(h.last_validated)
|
|
100
|
-
success = h.success_rate
|
|
101
|
-
confidence = h.confidence
|
|
102
|
-
|
|
103
|
-
total = (
|
|
104
|
-
self.weights.similarity * sim
|
|
105
|
-
+ self.weights.recency * recency
|
|
106
|
-
+ self.weights.success_rate * success
|
|
107
|
-
+ self.weights.confidence * confidence
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
scored.append(
|
|
111
|
-
ScoredItem(
|
|
112
|
-
item=h,
|
|
113
|
-
score=total,
|
|
114
|
-
similarity_score=sim,
|
|
115
|
-
recency_score=recency,
|
|
116
|
-
success_score=success,
|
|
117
|
-
confidence_score=confidence,
|
|
118
|
-
)
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
return sorted(scored, key=lambda x: -x.score)
|
|
122
|
-
|
|
123
|
-
def score_outcomes(
|
|
124
|
-
self,
|
|
125
|
-
outcomes: List[Outcome],
|
|
126
|
-
similarities: Optional[List[float]] = None,
|
|
127
|
-
) -> List[ScoredItem]:
|
|
128
|
-
"""
|
|
129
|
-
Score and rank outcomes.
|
|
130
|
-
|
|
131
|
-
Successful outcomes score higher, but failures are still included
|
|
132
|
-
for learning purposes.
|
|
133
|
-
|
|
134
|
-
Args:
|
|
135
|
-
outcomes: List of outcomes to score
|
|
136
|
-
similarities: Optional pre-computed similarity scores (0-1)
|
|
137
|
-
|
|
138
|
-
Returns:
|
|
139
|
-
Sorted list of ScoredItems (highest first)
|
|
140
|
-
"""
|
|
141
|
-
if not outcomes:
|
|
142
|
-
return []
|
|
143
|
-
|
|
144
|
-
similarities = similarities or [1.0] * len(outcomes)
|
|
145
|
-
scored = []
|
|
146
|
-
|
|
147
|
-
for o, sim in zip(outcomes, similarities, strict=False):
|
|
148
|
-
recency = self._compute_recency_score(o.timestamp)
|
|
149
|
-
# Success gets full score, failure gets partial (still useful to learn from)
|
|
150
|
-
success = 1.0 if o.success else 0.3
|
|
151
|
-
# Outcomes don't have stored confidence, default to 1.0
|
|
152
|
-
confidence = 1.0
|
|
153
|
-
|
|
154
|
-
total = (
|
|
155
|
-
self.weights.similarity * sim
|
|
156
|
-
+ self.weights.recency * recency
|
|
157
|
-
+ self.weights.success_rate * success
|
|
158
|
-
+ self.weights.confidence * confidence
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
scored.append(
|
|
162
|
-
ScoredItem(
|
|
163
|
-
item=o,
|
|
164
|
-
score=total,
|
|
165
|
-
similarity_score=sim,
|
|
166
|
-
recency_score=recency,
|
|
167
|
-
success_score=success,
|
|
168
|
-
confidence_score=confidence,
|
|
169
|
-
)
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
return sorted(scored, key=lambda x: -x.score)
|
|
173
|
-
|
|
174
|
-
def score_domain_knowledge(
|
|
175
|
-
self,
|
|
176
|
-
knowledge: List[DomainKnowledge],
|
|
177
|
-
similarities: Optional[List[float]] = None,
|
|
178
|
-
) -> List[ScoredItem]:
|
|
179
|
-
"""
|
|
180
|
-
Score and rank domain knowledge.
|
|
181
|
-
|
|
182
|
-
Args:
|
|
183
|
-
knowledge: List of domain knowledge to score
|
|
184
|
-
similarities: Optional pre-computed similarity scores (0-1)
|
|
185
|
-
|
|
186
|
-
Returns:
|
|
187
|
-
Sorted list of ScoredItems (highest first)
|
|
188
|
-
"""
|
|
189
|
-
if not knowledge:
|
|
190
|
-
return []
|
|
191
|
-
|
|
192
|
-
similarities = similarities or [1.0] * len(knowledge)
|
|
193
|
-
scored = []
|
|
194
|
-
|
|
195
|
-
for dk, sim in zip(knowledge, similarities, strict=False):
|
|
196
|
-
recency = self._compute_recency_score(dk.last_verified)
|
|
197
|
-
# Knowledge doesn't have success rate, use 1.0
|
|
198
|
-
success = 1.0
|
|
199
|
-
confidence = dk.confidence
|
|
200
|
-
|
|
201
|
-
total = (
|
|
202
|
-
self.weights.similarity * sim
|
|
203
|
-
+ self.weights.recency * recency
|
|
204
|
-
+ self.weights.success_rate * success
|
|
205
|
-
+ self.weights.confidence * confidence
|
|
206
|
-
)
|
|
207
|
-
|
|
208
|
-
scored.append(
|
|
209
|
-
ScoredItem(
|
|
210
|
-
item=dk,
|
|
211
|
-
score=total,
|
|
212
|
-
similarity_score=sim,
|
|
213
|
-
recency_score=recency,
|
|
214
|
-
success_score=success,
|
|
215
|
-
confidence_score=confidence,
|
|
216
|
-
)
|
|
217
|
-
)
|
|
218
|
-
|
|
219
|
-
return sorted(scored, key=lambda x: -x.score)
|
|
220
|
-
|
|
221
|
-
def score_anti_patterns(
|
|
222
|
-
self,
|
|
223
|
-
anti_patterns: List[AntiPattern],
|
|
224
|
-
similarities: Optional[List[float]] = None,
|
|
225
|
-
) -> List[ScoredItem]:
|
|
226
|
-
"""
|
|
227
|
-
Score and rank anti-patterns.
|
|
228
|
-
|
|
229
|
-
Anti-patterns that were seen recently are more relevant.
|
|
230
|
-
|
|
231
|
-
Args:
|
|
232
|
-
anti_patterns: List of anti-patterns to score
|
|
233
|
-
similarities: Optional pre-computed similarity scores (0-1)
|
|
234
|
-
|
|
235
|
-
Returns:
|
|
236
|
-
Sorted list of ScoredItems (highest first)
|
|
237
|
-
"""
|
|
238
|
-
if not anti_patterns:
|
|
239
|
-
return []
|
|
240
|
-
|
|
241
|
-
similarities = similarities or [1.0] * len(anti_patterns)
|
|
242
|
-
scored = []
|
|
243
|
-
|
|
244
|
-
for ap, sim in zip(anti_patterns, similarities, strict=False):
|
|
245
|
-
recency = self._compute_recency_score(ap.last_seen)
|
|
246
|
-
# More occurrences = more important to avoid
|
|
247
|
-
# Normalize occurrence count (cap at 10 for scoring)
|
|
248
|
-
success = min(ap.occurrence_count / 10.0, 1.0)
|
|
249
|
-
confidence = 1.0
|
|
250
|
-
|
|
251
|
-
total = (
|
|
252
|
-
self.weights.similarity * sim
|
|
253
|
-
+ self.weights.recency * recency
|
|
254
|
-
+ self.weights.success_rate * success
|
|
255
|
-
+ self.weights.confidence * confidence
|
|
256
|
-
)
|
|
257
|
-
|
|
258
|
-
scored.append(
|
|
259
|
-
ScoredItem(
|
|
260
|
-
item=ap,
|
|
261
|
-
score=total,
|
|
262
|
-
similarity_score=sim,
|
|
263
|
-
recency_score=recency,
|
|
264
|
-
success_score=success,
|
|
265
|
-
confidence_score=confidence,
|
|
266
|
-
)
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
return sorted(scored, key=lambda x: -x.score)
|
|
270
|
-
|
|
271
|
-
def _compute_recency_score(self, timestamp: datetime) -> float:
|
|
272
|
-
"""
|
|
273
|
-
Compute recency score using exponential decay.
|
|
274
|
-
|
|
275
|
-
Score = 0.5 ^ (days_ago / half_life)
|
|
276
|
-
|
|
277
|
-
Args:
|
|
278
|
-
timestamp: When the memory was last validated/used
|
|
279
|
-
|
|
280
|
-
Returns:
|
|
281
|
-
Score between 0 and 1 (1 = now, decays over time)
|
|
282
|
-
"""
|
|
283
|
-
now = datetime.now(timezone.utc)
|
|
284
|
-
|
|
285
|
-
# Handle naive datetimes
|
|
286
|
-
if timestamp.tzinfo is None:
|
|
287
|
-
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
288
|
-
|
|
289
|
-
delta = now - timestamp
|
|
290
|
-
days_ago = delta.total_seconds() / (24 * 60 * 60)
|
|
291
|
-
|
|
292
|
-
# Exponential decay: score halves every half_life days
|
|
293
|
-
return math.pow(0.5, days_ago / self.recency_half_life)
|
|
294
|
-
|
|
295
|
-
def apply_score_threshold(
|
|
296
|
-
self,
|
|
297
|
-
scored_items: List[ScoredItem],
|
|
298
|
-
min_score: float = 0.2,
|
|
299
|
-
) -> List[ScoredItem]:
|
|
300
|
-
"""
|
|
301
|
-
Filter out items below a minimum score threshold.
|
|
302
|
-
|
|
303
|
-
Args:
|
|
304
|
-
scored_items: List of scored items
|
|
305
|
-
min_score: Minimum score to keep (0-1)
|
|
306
|
-
|
|
307
|
-
Returns:
|
|
308
|
-
Filtered list
|
|
309
|
-
"""
|
|
310
|
-
return [item for item in scored_items if item.score >= min_score]
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
def compute_composite_score(
|
|
314
|
-
similarity: float,
|
|
315
|
-
recency_days: float,
|
|
316
|
-
success_rate: float,
|
|
317
|
-
confidence: float,
|
|
318
|
-
weights: Optional[ScoringWeights] = None,
|
|
319
|
-
recency_half_life: float = 30.0,
|
|
320
|
-
) -> float:
|
|
321
|
-
"""
|
|
322
|
-
Convenience function to compute a single composite score.
|
|
323
|
-
|
|
324
|
-
Args:
|
|
325
|
-
similarity: Semantic similarity (0-1)
|
|
326
|
-
recency_days: Days since last validation
|
|
327
|
-
success_rate: Historical success rate (0-1)
|
|
328
|
-
confidence: Stored confidence (0-1)
|
|
329
|
-
weights: Optional scoring weights
|
|
330
|
-
recency_half_life: Days after which recency score halves
|
|
331
|
-
|
|
332
|
-
Returns:
|
|
333
|
-
Composite score (0-1)
|
|
334
|
-
"""
|
|
335
|
-
weights = weights or ScoringWeights()
|
|
336
|
-
|
|
337
|
-
recency_score = math.pow(0.5, recency_days / recency_half_life)
|
|
338
|
-
|
|
339
|
-
return (
|
|
340
|
-
weights.similarity * similarity
|
|
341
|
-
+ weights.recency * recency_score
|
|
342
|
-
+ weights.success_rate * success_rate
|
|
343
|
-
+ weights.confidence * confidence
|
|
344
|
-
)
|
|
1
|
+
"""
|
|
2
|
+
ALMA Memory Scoring.
|
|
3
|
+
|
|
4
|
+
Combines semantic similarity, recency, and success rate for optimal retrieval.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import math
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from typing import Any, List, Optional
|
|
11
|
+
|
|
12
|
+
from alma.types import AntiPattern, DomainKnowledge, Heuristic, Outcome
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ScoringWeights:
|
|
17
|
+
"""
|
|
18
|
+
Configurable weights for memory scoring.
|
|
19
|
+
|
|
20
|
+
All weights should sum to 1.0 for normalized scores.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
similarity: float = 0.4 # Semantic relevance to query
|
|
24
|
+
recency: float = 0.3 # How recently the memory was validated/used
|
|
25
|
+
success_rate: float = 0.2 # Historical success rate
|
|
26
|
+
confidence: float = 0.1 # Stored confidence score
|
|
27
|
+
|
|
28
|
+
def __post_init__(self):
|
|
29
|
+
"""Validate weights sum to approximately 1.0."""
|
|
30
|
+
total = self.similarity + self.recency + self.success_rate + self.confidence
|
|
31
|
+
if not (0.99 <= total <= 1.01):
|
|
32
|
+
# Normalize if not summing to 1
|
|
33
|
+
self.similarity /= total
|
|
34
|
+
self.recency /= total
|
|
35
|
+
self.success_rate /= total
|
|
36
|
+
self.confidence /= total
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class ScoredItem:
|
|
41
|
+
"""A memory item with its computed score."""
|
|
42
|
+
|
|
43
|
+
item: Any
|
|
44
|
+
score: float
|
|
45
|
+
similarity_score: float
|
|
46
|
+
recency_score: float
|
|
47
|
+
success_score: float
|
|
48
|
+
confidence_score: float
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class MemoryScorer:
|
|
52
|
+
"""
|
|
53
|
+
Scores memories based on multiple factors for optimal retrieval.
|
|
54
|
+
|
|
55
|
+
Factors:
|
|
56
|
+
- Semantic similarity (from vector search)
|
|
57
|
+
- Recency (newer memories preferred, with decay)
|
|
58
|
+
- Success rate (for heuristics and outcomes)
|
|
59
|
+
- Confidence (stored confidence values)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
weights: Optional[ScoringWeights] = None,
|
|
65
|
+
recency_half_life_days: float = 30.0,
|
|
66
|
+
):
|
|
67
|
+
"""
|
|
68
|
+
Initialize scorer.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
weights: Scoring weights for each factor
|
|
72
|
+
recency_half_life_days: Days after which recency score is halved
|
|
73
|
+
"""
|
|
74
|
+
self.weights = weights or ScoringWeights()
|
|
75
|
+
self.recency_half_life = recency_half_life_days
|
|
76
|
+
|
|
77
|
+
def score_heuristics(
|
|
78
|
+
self,
|
|
79
|
+
heuristics: List[Heuristic],
|
|
80
|
+
similarities: Optional[List[float]] = None,
|
|
81
|
+
) -> List[ScoredItem]:
|
|
82
|
+
"""
|
|
83
|
+
Score and rank heuristics.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
heuristics: List of heuristics to score
|
|
87
|
+
similarities: Optional pre-computed similarity scores (0-1)
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Sorted list of ScoredItems (highest first)
|
|
91
|
+
"""
|
|
92
|
+
if not heuristics:
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
similarities = similarities or [1.0] * len(heuristics)
|
|
96
|
+
scored = []
|
|
97
|
+
|
|
98
|
+
for h, sim in zip(heuristics, similarities, strict=False):
|
|
99
|
+
recency = self._compute_recency_score(h.last_validated)
|
|
100
|
+
success = h.success_rate
|
|
101
|
+
confidence = h.confidence
|
|
102
|
+
|
|
103
|
+
total = (
|
|
104
|
+
self.weights.similarity * sim
|
|
105
|
+
+ self.weights.recency * recency
|
|
106
|
+
+ self.weights.success_rate * success
|
|
107
|
+
+ self.weights.confidence * confidence
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
scored.append(
|
|
111
|
+
ScoredItem(
|
|
112
|
+
item=h,
|
|
113
|
+
score=total,
|
|
114
|
+
similarity_score=sim,
|
|
115
|
+
recency_score=recency,
|
|
116
|
+
success_score=success,
|
|
117
|
+
confidence_score=confidence,
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return sorted(scored, key=lambda x: -x.score)
|
|
122
|
+
|
|
123
|
+
def score_outcomes(
|
|
124
|
+
self,
|
|
125
|
+
outcomes: List[Outcome],
|
|
126
|
+
similarities: Optional[List[float]] = None,
|
|
127
|
+
) -> List[ScoredItem]:
|
|
128
|
+
"""
|
|
129
|
+
Score and rank outcomes.
|
|
130
|
+
|
|
131
|
+
Successful outcomes score higher, but failures are still included
|
|
132
|
+
for learning purposes.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
outcomes: List of outcomes to score
|
|
136
|
+
similarities: Optional pre-computed similarity scores (0-1)
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Sorted list of ScoredItems (highest first)
|
|
140
|
+
"""
|
|
141
|
+
if not outcomes:
|
|
142
|
+
return []
|
|
143
|
+
|
|
144
|
+
similarities = similarities or [1.0] * len(outcomes)
|
|
145
|
+
scored = []
|
|
146
|
+
|
|
147
|
+
for o, sim in zip(outcomes, similarities, strict=False):
|
|
148
|
+
recency = self._compute_recency_score(o.timestamp)
|
|
149
|
+
# Success gets full score, failure gets partial (still useful to learn from)
|
|
150
|
+
success = 1.0 if o.success else 0.3
|
|
151
|
+
# Outcomes don't have stored confidence, default to 1.0
|
|
152
|
+
confidence = 1.0
|
|
153
|
+
|
|
154
|
+
total = (
|
|
155
|
+
self.weights.similarity * sim
|
|
156
|
+
+ self.weights.recency * recency
|
|
157
|
+
+ self.weights.success_rate * success
|
|
158
|
+
+ self.weights.confidence * confidence
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
scored.append(
|
|
162
|
+
ScoredItem(
|
|
163
|
+
item=o,
|
|
164
|
+
score=total,
|
|
165
|
+
similarity_score=sim,
|
|
166
|
+
recency_score=recency,
|
|
167
|
+
success_score=success,
|
|
168
|
+
confidence_score=confidence,
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return sorted(scored, key=lambda x: -x.score)
|
|
173
|
+
|
|
174
|
+
def score_domain_knowledge(
|
|
175
|
+
self,
|
|
176
|
+
knowledge: List[DomainKnowledge],
|
|
177
|
+
similarities: Optional[List[float]] = None,
|
|
178
|
+
) -> List[ScoredItem]:
|
|
179
|
+
"""
|
|
180
|
+
Score and rank domain knowledge.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
knowledge: List of domain knowledge to score
|
|
184
|
+
similarities: Optional pre-computed similarity scores (0-1)
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Sorted list of ScoredItems (highest first)
|
|
188
|
+
"""
|
|
189
|
+
if not knowledge:
|
|
190
|
+
return []
|
|
191
|
+
|
|
192
|
+
similarities = similarities or [1.0] * len(knowledge)
|
|
193
|
+
scored = []
|
|
194
|
+
|
|
195
|
+
for dk, sim in zip(knowledge, similarities, strict=False):
|
|
196
|
+
recency = self._compute_recency_score(dk.last_verified)
|
|
197
|
+
# Knowledge doesn't have success rate, use 1.0
|
|
198
|
+
success = 1.0
|
|
199
|
+
confidence = dk.confidence
|
|
200
|
+
|
|
201
|
+
total = (
|
|
202
|
+
self.weights.similarity * sim
|
|
203
|
+
+ self.weights.recency * recency
|
|
204
|
+
+ self.weights.success_rate * success
|
|
205
|
+
+ self.weights.confidence * confidence
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
scored.append(
|
|
209
|
+
ScoredItem(
|
|
210
|
+
item=dk,
|
|
211
|
+
score=total,
|
|
212
|
+
similarity_score=sim,
|
|
213
|
+
recency_score=recency,
|
|
214
|
+
success_score=success,
|
|
215
|
+
confidence_score=confidence,
|
|
216
|
+
)
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
return sorted(scored, key=lambda x: -x.score)
|
|
220
|
+
|
|
221
|
+
def score_anti_patterns(
|
|
222
|
+
self,
|
|
223
|
+
anti_patterns: List[AntiPattern],
|
|
224
|
+
similarities: Optional[List[float]] = None,
|
|
225
|
+
) -> List[ScoredItem]:
|
|
226
|
+
"""
|
|
227
|
+
Score and rank anti-patterns.
|
|
228
|
+
|
|
229
|
+
Anti-patterns that were seen recently are more relevant.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
anti_patterns: List of anti-patterns to score
|
|
233
|
+
similarities: Optional pre-computed similarity scores (0-1)
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Sorted list of ScoredItems (highest first)
|
|
237
|
+
"""
|
|
238
|
+
if not anti_patterns:
|
|
239
|
+
return []
|
|
240
|
+
|
|
241
|
+
similarities = similarities or [1.0] * len(anti_patterns)
|
|
242
|
+
scored = []
|
|
243
|
+
|
|
244
|
+
for ap, sim in zip(anti_patterns, similarities, strict=False):
|
|
245
|
+
recency = self._compute_recency_score(ap.last_seen)
|
|
246
|
+
# More occurrences = more important to avoid
|
|
247
|
+
# Normalize occurrence count (cap at 10 for scoring)
|
|
248
|
+
success = min(ap.occurrence_count / 10.0, 1.0)
|
|
249
|
+
confidence = 1.0
|
|
250
|
+
|
|
251
|
+
total = (
|
|
252
|
+
self.weights.similarity * sim
|
|
253
|
+
+ self.weights.recency * recency
|
|
254
|
+
+ self.weights.success_rate * success
|
|
255
|
+
+ self.weights.confidence * confidence
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
scored.append(
|
|
259
|
+
ScoredItem(
|
|
260
|
+
item=ap,
|
|
261
|
+
score=total,
|
|
262
|
+
similarity_score=sim,
|
|
263
|
+
recency_score=recency,
|
|
264
|
+
success_score=success,
|
|
265
|
+
confidence_score=confidence,
|
|
266
|
+
)
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
return sorted(scored, key=lambda x: -x.score)
|
|
270
|
+
|
|
271
|
+
def _compute_recency_score(self, timestamp: datetime) -> float:
|
|
272
|
+
"""
|
|
273
|
+
Compute recency score using exponential decay.
|
|
274
|
+
|
|
275
|
+
Score = 0.5 ^ (days_ago / half_life)
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
timestamp: When the memory was last validated/used
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Score between 0 and 1 (1 = now, decays over time)
|
|
282
|
+
"""
|
|
283
|
+
now = datetime.now(timezone.utc)
|
|
284
|
+
|
|
285
|
+
# Handle naive datetimes
|
|
286
|
+
if timestamp.tzinfo is None:
|
|
287
|
+
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
288
|
+
|
|
289
|
+
delta = now - timestamp
|
|
290
|
+
days_ago = delta.total_seconds() / (24 * 60 * 60)
|
|
291
|
+
|
|
292
|
+
# Exponential decay: score halves every half_life days
|
|
293
|
+
return math.pow(0.5, days_ago / self.recency_half_life)
|
|
294
|
+
|
|
295
|
+
def apply_score_threshold(
|
|
296
|
+
self,
|
|
297
|
+
scored_items: List[ScoredItem],
|
|
298
|
+
min_score: float = 0.2,
|
|
299
|
+
) -> List[ScoredItem]:
|
|
300
|
+
"""
|
|
301
|
+
Filter out items below a minimum score threshold.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
scored_items: List of scored items
|
|
305
|
+
min_score: Minimum score to keep (0-1)
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Filtered list
|
|
309
|
+
"""
|
|
310
|
+
return [item for item in scored_items if item.score >= min_score]
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def compute_composite_score(
|
|
314
|
+
similarity: float,
|
|
315
|
+
recency_days: float,
|
|
316
|
+
success_rate: float,
|
|
317
|
+
confidence: float,
|
|
318
|
+
weights: Optional[ScoringWeights] = None,
|
|
319
|
+
recency_half_life: float = 30.0,
|
|
320
|
+
) -> float:
|
|
321
|
+
"""
|
|
322
|
+
Convenience function to compute a single composite score.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
similarity: Semantic similarity (0-1)
|
|
326
|
+
recency_days: Days since last validation
|
|
327
|
+
success_rate: Historical success rate (0-1)
|
|
328
|
+
confidence: Stored confidence (0-1)
|
|
329
|
+
weights: Optional scoring weights
|
|
330
|
+
recency_half_life: Days after which recency score halves
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Composite score (0-1)
|
|
334
|
+
"""
|
|
335
|
+
weights = weights or ScoringWeights()
|
|
336
|
+
|
|
337
|
+
recency_score = math.pow(0.5, recency_days / recency_half_life)
|
|
338
|
+
|
|
339
|
+
return (
|
|
340
|
+
weights.similarity * similarity
|
|
341
|
+
+ weights.recency * recency_score
|
|
342
|
+
+ weights.success_rate * success_rate
|
|
343
|
+
+ weights.confidence * confidence
|
|
344
|
+
)
|