roampal 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. roampal/__init__.py +29 -0
  2. roampal/__main__.py +6 -0
  3. roampal/backend/__init__.py +1 -0
  4. roampal/backend/modules/__init__.py +1 -0
  5. roampal/backend/modules/memory/__init__.py +43 -0
  6. roampal/backend/modules/memory/chromadb_adapter.py +623 -0
  7. roampal/backend/modules/memory/config.py +102 -0
  8. roampal/backend/modules/memory/content_graph.py +543 -0
  9. roampal/backend/modules/memory/context_service.py +455 -0
  10. roampal/backend/modules/memory/embedding_service.py +96 -0
  11. roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
  12. roampal/backend/modules/memory/memory_bank_service.py +433 -0
  13. roampal/backend/modules/memory/memory_types.py +296 -0
  14. roampal/backend/modules/memory/outcome_service.py +400 -0
  15. roampal/backend/modules/memory/promotion_service.py +473 -0
  16. roampal/backend/modules/memory/routing_service.py +444 -0
  17. roampal/backend/modules/memory/scoring_service.py +324 -0
  18. roampal/backend/modules/memory/search_service.py +646 -0
  19. roampal/backend/modules/memory/tests/__init__.py +1 -0
  20. roampal/backend/modules/memory/tests/conftest.py +12 -0
  21. roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
  22. roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
  23. roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
  24. roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
  25. roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
  26. roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
  27. roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
  28. roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
  29. roampal/backend/modules/memory/unified_memory_system.py +1277 -0
  30. roampal/cli.py +638 -0
  31. roampal/hooks/__init__.py +16 -0
  32. roampal/hooks/session_manager.py +587 -0
  33. roampal/hooks/stop_hook.py +176 -0
  34. roampal/hooks/user_prompt_submit_hook.py +103 -0
  35. roampal/mcp/__init__.py +7 -0
  36. roampal/mcp/server.py +611 -0
  37. roampal/server/__init__.py +7 -0
  38. roampal/server/main.py +744 -0
  39. roampal-0.1.4.dist-info/METADATA +179 -0
  40. roampal-0.1.4.dist-info/RECORD +44 -0
  41. roampal-0.1.4.dist-info/WHEEL +5 -0
  42. roampal-0.1.4.dist-info/entry_points.txt +2 -0
  43. roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
  44. roampal-0.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,444 @@
1
+ """
2
+ Routing Service - Intelligent collection routing using learned KG patterns.
3
+
4
+ Extracted from UnifiedMemorySystem as part of refactoring.
5
+
6
+ Responsibilities:
7
+ - Query preprocessing (acronym expansion)
8
+ - Intelligent routing based on learned patterns
9
+ - Tier score calculation for collections
10
+ - Tier recommendations for insights
11
+ """
12
+
13
+ import logging
14
+ import re
15
+ from datetime import datetime
16
+ from typing import Any, Dict, List, Optional
17
+
18
+ from .config import MemoryConfig
19
+ from .knowledge_graph_service import KnowledgeGraphService
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ # Collection names for routing
25
+ ALL_COLLECTIONS = ["working", "patterns", "history", "books", "memory_bank"]
26
+
27
+
28
+ class RoutingService:
29
+ """
30
+ Intelligent collection routing using learned KG patterns.
31
+
32
+ Implements architecture.md specification with learning phases:
33
+ - Phase 1 (Exploration): total_score < 0.5 -> all 5 collections
34
+ - Phase 2 (Medium Confidence): 0.5 <= total_score < 2.0 -> top 2-3 collections
35
+ - Phase 3 (High Confidence): total_score >= 2.0 -> top 1-2 collections
36
+ """
37
+
38
+ # Acronym dictionary for query expansion
39
+ ACRONYM_DICT = {
40
+ # Technology
41
+ "api": "application programming interface",
42
+ "apis": "application programming interfaces",
43
+ "sdk": "software development kit",
44
+ "sdks": "software development kits",
45
+ "ui": "user interface",
46
+ "ux": "user experience",
47
+ "db": "database",
48
+ "sql": "structured query language",
49
+ "html": "hypertext markup language",
50
+ "css": "cascading style sheets",
51
+ "js": "javascript",
52
+ "ts": "typescript",
53
+ "ml": "machine learning",
54
+ "ai": "artificial intelligence",
55
+ "llm": "large language model",
56
+ "nlp": "natural language processing",
57
+ "gpu": "graphics processing unit",
58
+ "cpu": "central processing unit",
59
+ "ram": "random access memory",
60
+ "ssd": "solid state drive",
61
+ "hdd": "hard disk drive",
62
+ "os": "operating system",
63
+ "ide": "integrated development environment",
64
+ "cli": "command line interface",
65
+ "gui": "graphical user interface",
66
+ "ci": "continuous integration",
67
+ "cd": "continuous deployment",
68
+ "devops": "development operations",
69
+ "qa": "quality assurance",
70
+ "uat": "user acceptance testing",
71
+ "mvp": "minimum viable product",
72
+ "poc": "proof of concept",
73
+ "saas": "software as a service",
74
+ "paas": "platform as a service",
75
+ "iaas": "infrastructure as a service",
76
+ "iot": "internet of things",
77
+ "vpn": "virtual private network",
78
+ "dns": "domain name system",
79
+ "http": "hypertext transfer protocol",
80
+ "https": "hypertext transfer protocol secure",
81
+ "ftp": "file transfer protocol",
82
+ "ssh": "secure shell",
83
+ "ssl": "secure sockets layer",
84
+ "tls": "transport layer security",
85
+ "jwt": "json web token",
86
+ "oauth": "open authorization",
87
+ "rest": "representational state transfer",
88
+ "crud": "create read update delete",
89
+ "orm": "object relational mapping",
90
+ "json": "javascript object notation",
91
+ "xml": "extensible markup language",
92
+ "yaml": "yaml ain't markup language",
93
+ "csv": "comma separated values",
94
+ "pdf": "portable document format",
95
+ "svg": "scalable vector graphics",
96
+ "png": "portable network graphics",
97
+ "jpg": "joint photographic experts group",
98
+ "gif": "graphics interchange format",
99
+ "aws": "amazon web services",
100
+ "gcp": "google cloud platform",
101
+ "vm": "virtual machine",
102
+ "k8s": "kubernetes",
103
+ "npm": "node package manager",
104
+ "pip": "pip installs packages",
105
+ "git": "git", # Keep as is, well known
106
+ "pr": "pull request",
107
+ "mr": "merge request",
108
+ "env": "environment",
109
+ "prod": "production",
110
+ "dev": "development",
111
+ "repo": "repository",
112
+ "config": "configuration",
113
+ "auth": "authentication",
114
+ "async": "asynchronous",
115
+ "sync": "synchronous",
116
+
117
+ # Locations
118
+ "nyc": "new york city",
119
+ "la": "los angeles",
120
+ "sf": "san francisco",
121
+ "dc": "washington dc",
122
+ "uk": "united kingdom",
123
+ "usa": "united states of america",
124
+ "us": "united states",
125
+
126
+ # Organizations
127
+ "nasa": "national aeronautics and space administration",
128
+ "fbi": "federal bureau of investigation",
129
+ "cia": "central intelligence agency",
130
+ "fda": "food and drug administration",
131
+ "cdc": "centers for disease control",
132
+ "mit": "massachusetts institute of technology",
133
+ "ucla": "university of california los angeles",
134
+ "stanford": "stanford university",
135
+ "harvard": "harvard university",
136
+
137
+ # Business
138
+ "ceo": "chief executive officer",
139
+ "cto": "chief technology officer",
140
+ "cfo": "chief financial officer",
141
+ "coo": "chief operating officer",
142
+ "vp": "vice president",
143
+ "hr": "human resources",
144
+ "roi": "return on investment",
145
+ "kpi": "key performance indicator",
146
+ "okr": "objectives and key results",
147
+ "b2b": "business to business",
148
+ "b2c": "business to consumer",
149
+ "erp": "enterprise resource planning",
150
+ "crm": "customer relationship management",
151
+ "eod": "end of day",
152
+ "asap": "as soon as possible",
153
+ "eta": "estimated time of arrival",
154
+ "fyi": "for your information",
155
+ "tbd": "to be determined",
156
+ "pov": "point of view",
157
+ "wfh": "work from home",
158
+ }
159
+
160
+ # Build reverse mapping (expansion -> acronym) for bidirectional matching
161
+ EXPANSION_TO_ACRONYM = {v.lower(): k.lower() for k, v in ACRONYM_DICT.items()}
162
+
163
+ def __init__(
164
+ self,
165
+ kg_service: KnowledgeGraphService,
166
+ config: Optional[MemoryConfig] = None,
167
+ ):
168
+ """
169
+ Initialize RoutingService.
170
+
171
+ Args:
172
+ kg_service: KnowledgeGraphService for concept extraction and KG access
173
+ config: Optional MemoryConfig for thresholds
174
+ """
175
+ self.kg_service = kg_service
176
+ self.config = config or MemoryConfig()
177
+
178
+ @property
179
+ def knowledge_graph(self) -> Dict[str, Any]:
180
+ """Access the knowledge graph from KG service."""
181
+ return self.kg_service.knowledge_graph
182
+
183
+ # =========================================================================
184
+ # Query Preprocessing
185
+ # =========================================================================
186
+
187
+ def preprocess_query(self, query: str) -> str:
188
+ """
189
+ Preprocess search query for better retrieval:
190
+ 1. Expand acronyms (API -> "API application programming interface")
191
+ 2. Normalize whitespace
192
+
193
+ This improves recall when user queries with acronyms but facts stored with full names.
194
+
195
+ Args:
196
+ query: Original search query
197
+
198
+ Returns:
199
+ Enhanced query with expanded acronyms
200
+ """
201
+ if not query:
202
+ return query
203
+
204
+ # Normalize whitespace
205
+ query = " ".join(query.split())
206
+
207
+ # Find and expand acronyms in query
208
+ words = query.split()
209
+ expansions_to_add = []
210
+
211
+ for word in words:
212
+ word_lower = word.lower().strip(".,!?;:'\"()")
213
+
214
+ # Check if word is a known acronym
215
+ if word_lower in self.ACRONYM_DICT:
216
+ expansion = self.ACRONYM_DICT[word_lower]
217
+ # Add expansion if not already in query
218
+ if expansion.lower() not in query.lower():
219
+ expansions_to_add.append(expansion)
220
+ logger.debug(f"[QUERY_PREPROCESS] Expanded '{word}' -> '{expansion}'")
221
+
222
+ # Append expansions to query (keeps original + adds expanded versions)
223
+ if expansions_to_add:
224
+ enhanced_query = query + " " + " ".join(expansions_to_add)
225
+ logger.debug(f"[QUERY_PREPROCESS] Enhanced query: '{query}' -> '{enhanced_query}'")
226
+ return enhanced_query
227
+
228
+ return query
229
+
230
+ # =========================================================================
231
+ # Tier Score Calculation
232
+ # =========================================================================
233
+
234
+ def calculate_tier_scores(self, concepts: List[str]) -> Dict[str, float]:
235
+ """
236
+ Calculate tier scores for each collection based on learned patterns.
237
+ Implements architecture.md tier scoring formula:
238
+
239
+ tier_score = success_rate * confidence
240
+ where:
241
+ success_rate = successes / (successes + failures)
242
+ confidence = min(total_uses / 10, 1.0)
243
+
244
+ Returns dict mapping collection_name -> total_score
245
+ """
246
+ collection_scores = {
247
+ "working": 0.0,
248
+ "patterns": 0.0,
249
+ "history": 0.0,
250
+ "books": 0.0,
251
+ "memory_bank": 0.0
252
+ }
253
+
254
+ # Aggregate scores across all concepts
255
+ for concept in concepts:
256
+ if concept in self.knowledge_graph.get("routing_patterns", {}):
257
+ pattern_data = self.knowledge_graph["routing_patterns"][concept]
258
+ collections_used = pattern_data.get("collections_used", {})
259
+
260
+ for collection, stats in collections_used.items():
261
+ if collection not in collection_scores:
262
+ continue # Skip unknown collections
263
+
264
+ successes = stats.get("successes", 0)
265
+ failures = stats.get("failures", 0)
266
+ partials = stats.get("partials", 0)
267
+ total_uses = successes + failures + partials
268
+
269
+ # Calculate success_rate (exclude partials from denominator)
270
+ if successes + failures > 0:
271
+ success_rate = successes / (successes + failures)
272
+ else:
273
+ success_rate = 0.5 # Neutral for no confirmed outcomes
274
+
275
+ # Calculate confidence (reaches 1.0 after 10 uses)
276
+ confidence = min(total_uses / 10.0, 1.0)
277
+
278
+ # Tier score
279
+ tier_score = success_rate * confidence
280
+
281
+ # Add to collection's total score
282
+ collection_scores[collection] += tier_score
283
+
284
+ return collection_scores
285
+
286
+ # =========================================================================
287
+ # Query Routing
288
+ # =========================================================================
289
+
290
+ def route_query(self, query: str) -> List[str]:
291
+ """
292
+ Intelligent routing using learned KG patterns.
293
+ Implements architecture.md specification with learning phases:
294
+
295
+ Phase 1 (Exploration): total_score < 0.5 -> search all 5 collections
296
+ Phase 2 (Medium Confidence): 0.5 <= total_score < 2.0 -> search top 2-3 collections
297
+ Phase 3 (High Confidence): total_score >= 2.0 -> search top 1-2 collections
298
+
299
+ Returns list of collection names to search.
300
+ """
301
+ # Extract concepts from query using KG service
302
+ concepts = self.kg_service.extract_concepts(query)
303
+
304
+ if not concepts:
305
+ logger.debug("[Routing] No concepts extracted, searching all collections")
306
+ return ALL_COLLECTIONS.copy()
307
+
308
+ # Calculate tier scores for each collection
309
+ collection_scores = self.calculate_tier_scores(concepts)
310
+
311
+ # Calculate total score (sum of all collection scores)
312
+ total_score = sum(collection_scores.values())
313
+
314
+ # Sort collections by score (highest first)
315
+ sorted_collections = sorted(
316
+ collection_scores.items(),
317
+ key=lambda x: x[1],
318
+ reverse=True
319
+ )
320
+
321
+ # Apply routing thresholds
322
+ if total_score < 0.5:
323
+ # EXPLORATION PHASE: No learned patterns yet, search everything
324
+ selected = ALL_COLLECTIONS.copy()
325
+ logger.info(f"[Routing] Exploration phase (score={total_score:.2f}): searching all collections")
326
+
327
+ elif total_score < 2.0:
328
+ # MEDIUM CONFIDENCE: Search top 2-3 collections
329
+ # Take top collections with score > 0.1, up to 3
330
+ selected = [
331
+ coll for coll, score in sorted_collections[:3]
332
+ if score > 0.1
333
+ ]
334
+ if not selected:
335
+ selected = [sorted_collections[0][0]] # At least take top 1
336
+ logger.info(f"[Routing] Medium confidence (score={total_score:.2f}): searching {selected}")
337
+
338
+ else:
339
+ # HIGH CONFIDENCE: Search top 1-2 collections
340
+ # Take top collections with score > 0.5, up to 2
341
+ selected = [
342
+ coll for coll, score in sorted_collections[:2]
343
+ if score > 0.5
344
+ ]
345
+ if not selected:
346
+ selected = [sorted_collections[0][0]] # At least take top 1
347
+ logger.info(f"[Routing] High confidence (score={total_score:.2f}): searching {selected}")
348
+
349
+ # Log concept extraction and scores for debugging
350
+ logger.debug(f"[Routing] Concepts: {concepts[:5]}...")
351
+ logger.debug(f"[Routing] Scores: {dict(sorted_collections[:3])}")
352
+
353
+ # Track usage for KG visualization (increment 'total' for used patterns)
354
+ # This makes MCP-searched patterns visible in UI even without explicit outcome feedback
355
+ self._track_routing_usage(concepts, selected)
356
+
357
+ return selected
358
+
359
+ def _track_routing_usage(self, concepts: List[str], selected_collections: List[str]):
360
+ """Track routing usage in KG for visualization."""
361
+ for concept in concepts:
362
+ if concept in self.knowledge_graph.get("routing_patterns", {}):
363
+ pattern = self.knowledge_graph["routing_patterns"][concept]
364
+ collections_used = pattern.get("collections_used", {})
365
+
366
+ # Increment total for each collection that was selected for search
367
+ for collection in selected_collections:
368
+ if collection in collections_used:
369
+ collections_used[collection]["total"] = collections_used[collection].get("total", 0) + 1
370
+ else:
371
+ # Initialize if this collection not tracked yet
372
+ collections_used[collection] = {
373
+ "successes": 0,
374
+ "failures": 0,
375
+ "partials": 0,
376
+ "total": 1
377
+ }
378
+
379
+ # Update last_used timestamp
380
+ pattern["last_used"] = datetime.now().isoformat()
381
+
382
+ # =========================================================================
383
+ # Tier Recommendations (for get_context_insights)
384
+ # =========================================================================
385
+
386
+ def get_tier_recommendations(self, concepts: List[str]) -> Dict[str, Any]:
387
+ """
388
+ Query Routing KG for best collections given concepts (v0.2.6 Directive Insights).
389
+
390
+ Uses the same logic as route_query but returns recommendations
391
+ for get_context_insights output.
392
+
393
+ Args:
394
+ concepts: List of extracted concepts from user query
395
+
396
+ Returns:
397
+ Dict with top_collections, match_count, confidence_level
398
+ """
399
+ if not concepts:
400
+ return {
401
+ "top_collections": ALL_COLLECTIONS.copy(),
402
+ "match_count": 0,
403
+ "confidence_level": "exploration"
404
+ }
405
+
406
+ # Calculate tier scores
407
+ collection_scores = self.calculate_tier_scores(concepts)
408
+ total_score = sum(collection_scores.values())
409
+
410
+ # Sort by score
411
+ sorted_collections = sorted(
412
+ collection_scores.items(),
413
+ key=lambda x: x[1],
414
+ reverse=True
415
+ )
416
+
417
+ # Count matched patterns
418
+ match_count = 0
419
+ for concept in concepts:
420
+ if concept in self.knowledge_graph.get("routing_patterns", {}):
421
+ match_count += 1
422
+
423
+ # Determine confidence level and top collections
424
+ if total_score < 0.5:
425
+ confidence_level = "exploration"
426
+ top_collections = ALL_COLLECTIONS.copy()
427
+ elif total_score < 2.0:
428
+ confidence_level = "medium"
429
+ top_collections = [coll for coll, score in sorted_collections[:3] if score > 0.1]
430
+ if not top_collections:
431
+ top_collections = [sorted_collections[0][0]]
432
+ else:
433
+ confidence_level = "high"
434
+ top_collections = [coll for coll, score in sorted_collections[:2] if score > 0.5]
435
+ if not top_collections:
436
+ top_collections = [sorted_collections[0][0]]
437
+
438
+ return {
439
+ "top_collections": top_collections,
440
+ "match_count": match_count,
441
+ "confidence_level": confidence_level,
442
+ "total_score": total_score,
443
+ "scores": dict(sorted_collections[:3]) # Top 3 for visibility
444
+ }