roampal 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- roampal/__init__.py +29 -0
- roampal/__main__.py +6 -0
- roampal/backend/__init__.py +1 -0
- roampal/backend/modules/__init__.py +1 -0
- roampal/backend/modules/memory/__init__.py +43 -0
- roampal/backend/modules/memory/chromadb_adapter.py +623 -0
- roampal/backend/modules/memory/config.py +102 -0
- roampal/backend/modules/memory/content_graph.py +543 -0
- roampal/backend/modules/memory/context_service.py +455 -0
- roampal/backend/modules/memory/embedding_service.py +96 -0
- roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
- roampal/backend/modules/memory/memory_bank_service.py +433 -0
- roampal/backend/modules/memory/memory_types.py +296 -0
- roampal/backend/modules/memory/outcome_service.py +400 -0
- roampal/backend/modules/memory/promotion_service.py +473 -0
- roampal/backend/modules/memory/routing_service.py +444 -0
- roampal/backend/modules/memory/scoring_service.py +324 -0
- roampal/backend/modules/memory/search_service.py +646 -0
- roampal/backend/modules/memory/tests/__init__.py +1 -0
- roampal/backend/modules/memory/tests/conftest.py +12 -0
- roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
- roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
- roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
- roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
- roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
- roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
- roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
- roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
- roampal/backend/modules/memory/unified_memory_system.py +1277 -0
- roampal/cli.py +638 -0
- roampal/hooks/__init__.py +16 -0
- roampal/hooks/session_manager.py +587 -0
- roampal/hooks/stop_hook.py +176 -0
- roampal/hooks/user_prompt_submit_hook.py +103 -0
- roampal/mcp/__init__.py +7 -0
- roampal/mcp/server.py +611 -0
- roampal/server/__init__.py +7 -0
- roampal/server/main.py +744 -0
- roampal-0.1.4.dist-info/METADATA +179 -0
- roampal-0.1.4.dist-info/RECORD +44 -0
- roampal-0.1.4.dist-info/WHEEL +5 -0
- roampal-0.1.4.dist-info/entry_points.txt +2 -0
- roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
- roampal-0.1.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Scoring Service
|
|
3
|
+
|
|
4
|
+
Handles all score calculations for the memory system including:
|
|
5
|
+
- Wilson score lower bound calculation
|
|
6
|
+
- Final rank score calculation with dynamic weighting
|
|
7
|
+
- Memory maturity-based weight adjustments
|
|
8
|
+
|
|
9
|
+
Extracted from UnifiedMemorySystem lines 47-90 (wilson_score_lower) and
|
|
10
|
+
lines 1514-1656 (scoring logic in search()).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import math
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Dict, Any, Tuple, Optional
|
|
17
|
+
from scipy import stats
|
|
18
|
+
|
|
19
|
+
from .config import MemoryConfig
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def wilson_score_lower(successes: float, total: int, confidence: float = 0.95) -> float:
|
|
25
|
+
"""
|
|
26
|
+
Calculate the lower bound of Wilson score confidence interval (v0.2.5).
|
|
27
|
+
|
|
28
|
+
This solves the "cold start" ranking problem where a memory with 1 success / 1 use (100%)
|
|
29
|
+
would outrank a proven memory with 90/100 (90%). Wilson score uses statistical confidence
|
|
30
|
+
intervals to favor proven records over lucky new ones.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
successes: Number of successful outcomes (works + partial)
|
|
34
|
+
total: Total number of uses
|
|
35
|
+
confidence: Confidence level (0.95 = 95% confidence interval)
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Lower bound of confidence interval (0.0 to 1.0)
|
|
39
|
+
- 1/1 success → ~0.20 (low confidence due to small sample)
|
|
40
|
+
- 90/100 success → ~0.84 (high confidence due to large sample)
|
|
41
|
+
- 0/0 → 0.5 (neutral for untested memories)
|
|
42
|
+
|
|
43
|
+
Formula: Wilson score interval lower bound
|
|
44
|
+
p̃ = (p + z²/2n - z√(p(1-p)/n + z²/4n²)) / (1 + z²/n)
|
|
45
|
+
|
|
46
|
+
Reference: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval
|
|
47
|
+
"""
|
|
48
|
+
if total == 0:
|
|
49
|
+
return 0.5 # Neutral score for untested memories
|
|
50
|
+
|
|
51
|
+
# z-score for confidence level (1.96 for 95% confidence)
|
|
52
|
+
z = stats.norm.ppf(1 - (1 - confidence) / 2)
|
|
53
|
+
|
|
54
|
+
p = successes / total # Observed proportion
|
|
55
|
+
n = total
|
|
56
|
+
|
|
57
|
+
# Wilson score formula
|
|
58
|
+
denominator = 1 + z * z / n
|
|
59
|
+
center = p + z * z / (2 * n)
|
|
60
|
+
|
|
61
|
+
# Variance term under the square root
|
|
62
|
+
variance = p * (1 - p) / n + z * z / (4 * n * n)
|
|
63
|
+
|
|
64
|
+
# Lower bound of confidence interval
|
|
65
|
+
lower_bound = (center - z * math.sqrt(variance)) / denominator
|
|
66
|
+
|
|
67
|
+
return max(0.0, lower_bound) # Ensure non-negative
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ScoringService:
|
|
71
|
+
"""
|
|
72
|
+
Service for calculating memory scores.
|
|
73
|
+
|
|
74
|
+
Responsibilities:
|
|
75
|
+
- Wilson score calculation for statistical confidence
|
|
76
|
+
- Final rank score calculation with dynamic weighting
|
|
77
|
+
- Memory maturity-based weight adjustments
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, config: Optional[MemoryConfig] = None):
|
|
81
|
+
"""
|
|
82
|
+
Initialize the scoring service.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
config: Memory configuration. Uses defaults if not provided.
|
|
86
|
+
"""
|
|
87
|
+
self.config = config or MemoryConfig()
|
|
88
|
+
|
|
89
|
+
def calculate_wilson_score(
|
|
90
|
+
self,
|
|
91
|
+
successes: float,
|
|
92
|
+
total: int,
|
|
93
|
+
confidence: Optional[float] = None
|
|
94
|
+
) -> float:
|
|
95
|
+
"""
|
|
96
|
+
Calculate Wilson score lower bound.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
successes: Number of successful outcomes
|
|
100
|
+
total: Total number of uses
|
|
101
|
+
confidence: Confidence level (uses config default if not provided)
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Wilson score lower bound (0.0 to 1.0)
|
|
105
|
+
"""
|
|
106
|
+
conf = confidence or self.config.wilson_confidence
|
|
107
|
+
return wilson_score_lower(successes, total, conf)
|
|
108
|
+
|
|
109
|
+
def count_successes_from_history(self, outcome_history: str) -> float:
|
|
110
|
+
"""
|
|
111
|
+
Count successes from outcome history JSON string.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
outcome_history: JSON string of outcome entries
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Number of successes (worked=1, partial=0.5)
|
|
118
|
+
"""
|
|
119
|
+
successes = 0.0
|
|
120
|
+
try:
|
|
121
|
+
history = json.loads(outcome_history) if outcome_history else []
|
|
122
|
+
for entry in history:
|
|
123
|
+
if isinstance(entry, dict):
|
|
124
|
+
outcome = entry.get("outcome", "")
|
|
125
|
+
if outcome == "worked":
|
|
126
|
+
successes += 1.0
|
|
127
|
+
elif outcome == "partial":
|
|
128
|
+
successes += 0.5
|
|
129
|
+
except (json.JSONDecodeError, TypeError):
|
|
130
|
+
pass
|
|
131
|
+
return successes
|
|
132
|
+
|
|
133
|
+
def calculate_learned_score(
|
|
134
|
+
self,
|
|
135
|
+
raw_score: float,
|
|
136
|
+
uses: int,
|
|
137
|
+
outcome_history: str = ""
|
|
138
|
+
) -> Tuple[float, float]:
|
|
139
|
+
"""
|
|
140
|
+
Calculate learned score with Wilson score blending.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
raw_score: Raw score from metadata
|
|
144
|
+
uses: Number of times memory was used
|
|
145
|
+
outcome_history: JSON string of outcome history
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Tuple of (learned_score, wilson_score)
|
|
149
|
+
"""
|
|
150
|
+
# Count successes from outcome history
|
|
151
|
+
successes = self.count_successes_from_history(outcome_history)
|
|
152
|
+
|
|
153
|
+
# Fallback: estimate from raw score if no history
|
|
154
|
+
if successes == 0 and uses > 0:
|
|
155
|
+
successes = raw_score * uses
|
|
156
|
+
|
|
157
|
+
# Calculate Wilson score
|
|
158
|
+
wilson = self.calculate_wilson_score(successes, uses)
|
|
159
|
+
|
|
160
|
+
# Blend Wilson score with raw score based on sample size
|
|
161
|
+
if uses == 0:
|
|
162
|
+
learned = raw_score
|
|
163
|
+
elif uses < 3:
|
|
164
|
+
blend = uses / 3 # 0.33 for 1 use, 0.67 for 2 uses
|
|
165
|
+
learned = (1 - blend) * raw_score + blend * wilson
|
|
166
|
+
else:
|
|
167
|
+
learned = wilson
|
|
168
|
+
|
|
169
|
+
return learned, wilson
|
|
170
|
+
|
|
171
|
+
def get_dynamic_weights(
|
|
172
|
+
self,
|
|
173
|
+
uses: int,
|
|
174
|
+
learned_score: float,
|
|
175
|
+
collection: str,
|
|
176
|
+
importance: float = 0.7,
|
|
177
|
+
confidence: float = 0.7
|
|
178
|
+
) -> Tuple[float, float]:
|
|
179
|
+
"""
|
|
180
|
+
Get dynamic embedding/learned weights based on memory maturity.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
uses: Number of times memory was used
|
|
184
|
+
learned_score: Calculated learned score
|
|
185
|
+
collection: Collection name
|
|
186
|
+
importance: Memory importance (for memory_bank)
|
|
187
|
+
confidence: Memory confidence (for memory_bank)
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Tuple of (embedding_weight, learned_weight)
|
|
191
|
+
"""
|
|
192
|
+
if uses >= 5 and learned_score >= 0.8:
|
|
193
|
+
# PROVEN HIGH-VALUE MEMORY
|
|
194
|
+
return (self.config.embedding_weight_proven, self.config.learned_weight_proven)
|
|
195
|
+
|
|
196
|
+
elif uses >= 3 and learned_score >= 0.7:
|
|
197
|
+
# ESTABLISHED MEMORY
|
|
198
|
+
return (0.25, 0.75)
|
|
199
|
+
|
|
200
|
+
elif uses >= 2 and learned_score >= 0.5:
|
|
201
|
+
# EMERGING PATTERN (positive)
|
|
202
|
+
return (0.35, 0.65)
|
|
203
|
+
|
|
204
|
+
elif uses >= 2:
|
|
205
|
+
# FAILING PATTERN
|
|
206
|
+
return (0.7, 0.3)
|
|
207
|
+
|
|
208
|
+
elif collection == "memory_bank":
|
|
209
|
+
# MEMORY BANK SPECIAL CASE - quality-based ranking
|
|
210
|
+
quality = importance * confidence
|
|
211
|
+
if quality >= 0.8:
|
|
212
|
+
return (0.45, 0.55)
|
|
213
|
+
else:
|
|
214
|
+
return (0.5, 0.5)
|
|
215
|
+
|
|
216
|
+
else:
|
|
217
|
+
# NEW/UNKNOWN MEMORY
|
|
218
|
+
return (self.config.embedding_weight_new, self.config.learned_weight_new)
|
|
219
|
+
|
|
220
|
+
def calculate_final_score(
|
|
221
|
+
self,
|
|
222
|
+
metadata: Dict[str, Any],
|
|
223
|
+
distance: float,
|
|
224
|
+
collection: str
|
|
225
|
+
) -> Dict[str, float]:
|
|
226
|
+
"""
|
|
227
|
+
Calculate final rank score for a search result.
|
|
228
|
+
|
|
229
|
+
This is the main scoring function that combines:
|
|
230
|
+
- Embedding similarity (from distance)
|
|
231
|
+
- Learned score (from outcome history with Wilson scoring)
|
|
232
|
+
- Dynamic weighting based on memory maturity
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
metadata: Memory metadata dict
|
|
236
|
+
distance: L2 distance from embedding search
|
|
237
|
+
collection: Collection name
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Dict with scoring details:
|
|
241
|
+
{
|
|
242
|
+
"final_rank_score": combined score,
|
|
243
|
+
"wilson_score": statistical confidence,
|
|
244
|
+
"embedding_similarity": 1/(1+distance),
|
|
245
|
+
"learned_score": outcome-based score,
|
|
246
|
+
"embedding_weight": weight used,
|
|
247
|
+
"learned_weight": weight used
|
|
248
|
+
}
|
|
249
|
+
"""
|
|
250
|
+
raw_score = metadata.get("score", 0.5)
|
|
251
|
+
uses = metadata.get("uses", 0)
|
|
252
|
+
outcome_history = metadata.get("outcome_history", "")
|
|
253
|
+
importance = metadata.get("importance", 0.7)
|
|
254
|
+
confidence = metadata.get("confidence", 0.7)
|
|
255
|
+
|
|
256
|
+
# Ensure numeric types
|
|
257
|
+
try:
|
|
258
|
+
importance = float(importance) if not isinstance(importance, (int, float)) else importance
|
|
259
|
+
confidence = float(confidence) if not isinstance(confidence, (int, float)) else confidence
|
|
260
|
+
except (ValueError, TypeError):
|
|
261
|
+
importance = 0.7
|
|
262
|
+
confidence = 0.7
|
|
263
|
+
|
|
264
|
+
# Calculate learned score with Wilson blending
|
|
265
|
+
learned_score, wilson_score = self.calculate_learned_score(
|
|
266
|
+
raw_score, uses, outcome_history
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Special case: memory_bank uses quality as learned score
|
|
270
|
+
if collection == "memory_bank":
|
|
271
|
+
quality = importance * confidence
|
|
272
|
+
learned_score = quality
|
|
273
|
+
|
|
274
|
+
# Convert distance to similarity
|
|
275
|
+
embedding_similarity = 1.0 / (1.0 + distance)
|
|
276
|
+
|
|
277
|
+
# Get dynamic weights
|
|
278
|
+
embedding_weight, learned_weight = self.get_dynamic_weights(
|
|
279
|
+
uses, learned_score, collection, importance, confidence
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# Calculate combined score
|
|
283
|
+
final_score = (embedding_weight * embedding_similarity) + (learned_weight * learned_score)
|
|
284
|
+
|
|
285
|
+
return {
|
|
286
|
+
"final_rank_score": final_score,
|
|
287
|
+
"wilson_score": wilson_score,
|
|
288
|
+
"embedding_similarity": embedding_similarity,
|
|
289
|
+
"learned_score": learned_score,
|
|
290
|
+
"embedding_weight": embedding_weight,
|
|
291
|
+
"learned_weight": learned_weight,
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
def apply_scoring_to_results(
|
|
295
|
+
self,
|
|
296
|
+
results: list,
|
|
297
|
+
sort: bool = True
|
|
298
|
+
) -> list:
|
|
299
|
+
"""
|
|
300
|
+
Apply scoring to a list of search results.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
results: List of search result dicts
|
|
304
|
+
sort: Whether to sort by final_rank_score (descending)
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
List of results with scoring fields added
|
|
308
|
+
"""
|
|
309
|
+
for r in results:
|
|
310
|
+
metadata = r.get("metadata", {})
|
|
311
|
+
distance = r.get("distance", 1.0)
|
|
312
|
+
collection = r.get("collection", "")
|
|
313
|
+
|
|
314
|
+
scores = self.calculate_final_score(metadata, distance, collection)
|
|
315
|
+
|
|
316
|
+
# Add scores to result
|
|
317
|
+
r.update(scores)
|
|
318
|
+
r["original_distance"] = distance
|
|
319
|
+
r["uses"] = metadata.get("uses", 0)
|
|
320
|
+
|
|
321
|
+
if sort:
|
|
322
|
+
results.sort(key=lambda x: x.get("final_rank_score", 0.0), reverse=True)
|
|
323
|
+
|
|
324
|
+
return results
|