roampal 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. roampal/__init__.py +29 -0
  2. roampal/__main__.py +6 -0
  3. roampal/backend/__init__.py +1 -0
  4. roampal/backend/modules/__init__.py +1 -0
  5. roampal/backend/modules/memory/__init__.py +43 -0
  6. roampal/backend/modules/memory/chromadb_adapter.py +623 -0
  7. roampal/backend/modules/memory/config.py +102 -0
  8. roampal/backend/modules/memory/content_graph.py +543 -0
  9. roampal/backend/modules/memory/context_service.py +455 -0
  10. roampal/backend/modules/memory/embedding_service.py +96 -0
  11. roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
  12. roampal/backend/modules/memory/memory_bank_service.py +433 -0
  13. roampal/backend/modules/memory/memory_types.py +296 -0
  14. roampal/backend/modules/memory/outcome_service.py +400 -0
  15. roampal/backend/modules/memory/promotion_service.py +473 -0
  16. roampal/backend/modules/memory/routing_service.py +444 -0
  17. roampal/backend/modules/memory/scoring_service.py +324 -0
  18. roampal/backend/modules/memory/search_service.py +646 -0
  19. roampal/backend/modules/memory/tests/__init__.py +1 -0
  20. roampal/backend/modules/memory/tests/conftest.py +12 -0
  21. roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
  22. roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
  23. roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
  24. roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
  25. roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
  26. roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
  27. roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
  28. roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
  29. roampal/backend/modules/memory/unified_memory_system.py +1277 -0
  30. roampal/cli.py +638 -0
  31. roampal/hooks/__init__.py +16 -0
  32. roampal/hooks/session_manager.py +587 -0
  33. roampal/hooks/stop_hook.py +176 -0
  34. roampal/hooks/user_prompt_submit_hook.py +103 -0
  35. roampal/mcp/__init__.py +7 -0
  36. roampal/mcp/server.py +611 -0
  37. roampal/server/__init__.py +7 -0
  38. roampal/server/main.py +744 -0
  39. roampal-0.1.4.dist-info/METADATA +179 -0
  40. roampal-0.1.4.dist-info/RECORD +44 -0
  41. roampal-0.1.4.dist-info/WHEEL +5 -0
  42. roampal-0.1.4.dist-info/entry_points.txt +2 -0
  43. roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
  44. roampal-0.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,324 @@
1
+ """
2
+ Scoring Service
3
+
4
+ Handles all score calculations for the memory system including:
5
+ - Wilson score lower bound calculation
6
+ - Final rank score calculation with dynamic weighting
7
+ - Memory maturity-based weight adjustments
8
+
9
+ Extracted from UnifiedMemorySystem lines 47-90 (wilson_score_lower) and
10
+ lines 1514-1656 (scoring logic in search()).
11
+ """
12
+
13
+ import math
14
+ import json
15
+ import logging
16
+ from typing import Dict, Any, Tuple, Optional
17
+ from scipy import stats
18
+
19
+ from .config import MemoryConfig
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ def wilson_score_lower(successes: float, total: int, confidence: float = 0.95) -> float:
25
+ """
26
+ Calculate the lower bound of Wilson score confidence interval (v0.2.5).
27
+
28
+ This solves the "cold start" ranking problem where a memory with 1 success / 1 use (100%)
29
+ would outrank a proven memory with 90/100 (90%). Wilson score uses statistical confidence
30
+ intervals to favor proven records over lucky new ones.
31
+
32
+ Args:
33
+ successes: Number of successful outcomes (works + partial)
34
+ total: Total number of uses
35
+ confidence: Confidence level (0.95 = 95% confidence interval)
36
+
37
+ Returns:
38
+ Lower bound of confidence interval (0.0 to 1.0)
39
+ - 1/1 success → ~0.20 (low confidence due to small sample)
40
+ - 90/100 success → ~0.84 (high confidence due to large sample)
41
+ - 0/0 → 0.5 (neutral for untested memories)
42
+
43
+ Formula: Wilson score interval lower bound
44
+ p̃ = (p + z²/2n - z√(p(1-p)/n + z²/4n²)) / (1 + z²/n)
45
+
46
+ Reference: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval
47
+ """
48
+ if total == 0:
49
+ return 0.5 # Neutral score for untested memories
50
+
51
+ # z-score for confidence level (1.96 for 95% confidence)
52
+ z = stats.norm.ppf(1 - (1 - confidence) / 2)
53
+
54
+ p = successes / total # Observed proportion
55
+ n = total
56
+
57
+ # Wilson score formula
58
+ denominator = 1 + z * z / n
59
+ center = p + z * z / (2 * n)
60
+
61
+ # Variance term under the square root
62
+ variance = p * (1 - p) / n + z * z / (4 * n * n)
63
+
64
+ # Lower bound of confidence interval
65
+ lower_bound = (center - z * math.sqrt(variance)) / denominator
66
+
67
+ return max(0.0, lower_bound) # Ensure non-negative
68
+
69
+
70
+ class ScoringService:
71
+ """
72
+ Service for calculating memory scores.
73
+
74
+ Responsibilities:
75
+ - Wilson score calculation for statistical confidence
76
+ - Final rank score calculation with dynamic weighting
77
+ - Memory maturity-based weight adjustments
78
+ """
79
+
80
+ def __init__(self, config: Optional[MemoryConfig] = None):
81
+ """
82
+ Initialize the scoring service.
83
+
84
+ Args:
85
+ config: Memory configuration. Uses defaults if not provided.
86
+ """
87
+ self.config = config or MemoryConfig()
88
+
89
+ def calculate_wilson_score(
90
+ self,
91
+ successes: float,
92
+ total: int,
93
+ confidence: Optional[float] = None
94
+ ) -> float:
95
+ """
96
+ Calculate Wilson score lower bound.
97
+
98
+ Args:
99
+ successes: Number of successful outcomes
100
+ total: Total number of uses
101
+ confidence: Confidence level (uses config default if not provided)
102
+
103
+ Returns:
104
+ Wilson score lower bound (0.0 to 1.0)
105
+ """
106
+ conf = confidence or self.config.wilson_confidence
107
+ return wilson_score_lower(successes, total, conf)
108
+
109
+ def count_successes_from_history(self, outcome_history: str) -> float:
110
+ """
111
+ Count successes from outcome history JSON string.
112
+
113
+ Args:
114
+ outcome_history: JSON string of outcome entries
115
+
116
+ Returns:
117
+ Number of successes (worked=1, partial=0.5)
118
+ """
119
+ successes = 0.0
120
+ try:
121
+ history = json.loads(outcome_history) if outcome_history else []
122
+ for entry in history:
123
+ if isinstance(entry, dict):
124
+ outcome = entry.get("outcome", "")
125
+ if outcome == "worked":
126
+ successes += 1.0
127
+ elif outcome == "partial":
128
+ successes += 0.5
129
+ except (json.JSONDecodeError, TypeError):
130
+ pass
131
+ return successes
132
+
133
+ def calculate_learned_score(
134
+ self,
135
+ raw_score: float,
136
+ uses: int,
137
+ outcome_history: str = ""
138
+ ) -> Tuple[float, float]:
139
+ """
140
+ Calculate learned score with Wilson score blending.
141
+
142
+ Args:
143
+ raw_score: Raw score from metadata
144
+ uses: Number of times memory was used
145
+ outcome_history: JSON string of outcome history
146
+
147
+ Returns:
148
+ Tuple of (learned_score, wilson_score)
149
+ """
150
+ # Count successes from outcome history
151
+ successes = self.count_successes_from_history(outcome_history)
152
+
153
+ # Fallback: estimate from raw score if no history
154
+ if successes == 0 and uses > 0:
155
+ successes = raw_score * uses
156
+
157
+ # Calculate Wilson score
158
+ wilson = self.calculate_wilson_score(successes, uses)
159
+
160
+ # Blend Wilson score with raw score based on sample size
161
+ if uses == 0:
162
+ learned = raw_score
163
+ elif uses < 3:
164
+ blend = uses / 3 # 0.33 for 1 use, 0.67 for 2 uses
165
+ learned = (1 - blend) * raw_score + blend * wilson
166
+ else:
167
+ learned = wilson
168
+
169
+ return learned, wilson
170
+
171
+ def get_dynamic_weights(
172
+ self,
173
+ uses: int,
174
+ learned_score: float,
175
+ collection: str,
176
+ importance: float = 0.7,
177
+ confidence: float = 0.7
178
+ ) -> Tuple[float, float]:
179
+ """
180
+ Get dynamic embedding/learned weights based on memory maturity.
181
+
182
+ Args:
183
+ uses: Number of times memory was used
184
+ learned_score: Calculated learned score
185
+ collection: Collection name
186
+ importance: Memory importance (for memory_bank)
187
+ confidence: Memory confidence (for memory_bank)
188
+
189
+ Returns:
190
+ Tuple of (embedding_weight, learned_weight)
191
+ """
192
+ if uses >= 5 and learned_score >= 0.8:
193
+ # PROVEN HIGH-VALUE MEMORY
194
+ return (self.config.embedding_weight_proven, self.config.learned_weight_proven)
195
+
196
+ elif uses >= 3 and learned_score >= 0.7:
197
+ # ESTABLISHED MEMORY
198
+ return (0.25, 0.75)
199
+
200
+ elif uses >= 2 and learned_score >= 0.5:
201
+ # EMERGING PATTERN (positive)
202
+ return (0.35, 0.65)
203
+
204
+ elif uses >= 2:
205
+ # FAILING PATTERN
206
+ return (0.7, 0.3)
207
+
208
+ elif collection == "memory_bank":
209
+ # MEMORY BANK SPECIAL CASE - quality-based ranking
210
+ quality = importance * confidence
211
+ if quality >= 0.8:
212
+ return (0.45, 0.55)
213
+ else:
214
+ return (0.5, 0.5)
215
+
216
+ else:
217
+ # NEW/UNKNOWN MEMORY
218
+ return (self.config.embedding_weight_new, self.config.learned_weight_new)
219
+
220
+ def calculate_final_score(
221
+ self,
222
+ metadata: Dict[str, Any],
223
+ distance: float,
224
+ collection: str
225
+ ) -> Dict[str, float]:
226
+ """
227
+ Calculate final rank score for a search result.
228
+
229
+ This is the main scoring function that combines:
230
+ - Embedding similarity (from distance)
231
+ - Learned score (from outcome history with Wilson scoring)
232
+ - Dynamic weighting based on memory maturity
233
+
234
+ Args:
235
+ metadata: Memory metadata dict
236
+ distance: L2 distance from embedding search
237
+ collection: Collection name
238
+
239
+ Returns:
240
+ Dict with scoring details:
241
+ {
242
+ "final_rank_score": combined score,
243
+ "wilson_score": statistical confidence,
244
+ "embedding_similarity": 1/(1+distance),
245
+ "learned_score": outcome-based score,
246
+ "embedding_weight": weight used,
247
+ "learned_weight": weight used
248
+ }
249
+ """
250
+ raw_score = metadata.get("score", 0.5)
251
+ uses = metadata.get("uses", 0)
252
+ outcome_history = metadata.get("outcome_history", "")
253
+ importance = metadata.get("importance", 0.7)
254
+ confidence = metadata.get("confidence", 0.7)
255
+
256
+ # Ensure numeric types
257
+ try:
258
+ importance = float(importance) if not isinstance(importance, (int, float)) else importance
259
+ confidence = float(confidence) if not isinstance(confidence, (int, float)) else confidence
260
+ except (ValueError, TypeError):
261
+ importance = 0.7
262
+ confidence = 0.7
263
+
264
+ # Calculate learned score with Wilson blending
265
+ learned_score, wilson_score = self.calculate_learned_score(
266
+ raw_score, uses, outcome_history
267
+ )
268
+
269
+ # Special case: memory_bank uses quality as learned score
270
+ if collection == "memory_bank":
271
+ quality = importance * confidence
272
+ learned_score = quality
273
+
274
+ # Convert distance to similarity
275
+ embedding_similarity = 1.0 / (1.0 + distance)
276
+
277
+ # Get dynamic weights
278
+ embedding_weight, learned_weight = self.get_dynamic_weights(
279
+ uses, learned_score, collection, importance, confidence
280
+ )
281
+
282
+ # Calculate combined score
283
+ final_score = (embedding_weight * embedding_similarity) + (learned_weight * learned_score)
284
+
285
+ return {
286
+ "final_rank_score": final_score,
287
+ "wilson_score": wilson_score,
288
+ "embedding_similarity": embedding_similarity,
289
+ "learned_score": learned_score,
290
+ "embedding_weight": embedding_weight,
291
+ "learned_weight": learned_weight,
292
+ }
293
+
294
+ def apply_scoring_to_results(
295
+ self,
296
+ results: list,
297
+ sort: bool = True
298
+ ) -> list:
299
+ """
300
+ Apply scoring to a list of search results.
301
+
302
+ Args:
303
+ results: List of search result dicts
304
+ sort: Whether to sort by final_rank_score (descending)
305
+
306
+ Returns:
307
+ List of results with scoring fields added
308
+ """
309
+ for r in results:
310
+ metadata = r.get("metadata", {})
311
+ distance = r.get("distance", 1.0)
312
+ collection = r.get("collection", "")
313
+
314
+ scores = self.calculate_final_score(metadata, distance, collection)
315
+
316
+ # Add scores to result
317
+ r.update(scores)
318
+ r["original_distance"] = distance
319
+ r["uses"] = metadata.get("uses", 0)
320
+
321
+ if sort:
322
+ results.sort(key=lambda x: x.get("final_rank_score", 0.0), reverse=True)
323
+
324
+ return results