roampal 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. roampal/__init__.py +29 -0
  2. roampal/__main__.py +6 -0
  3. roampal/backend/__init__.py +1 -0
  4. roampal/backend/modules/__init__.py +1 -0
  5. roampal/backend/modules/memory/__init__.py +43 -0
  6. roampal/backend/modules/memory/chromadb_adapter.py +623 -0
  7. roampal/backend/modules/memory/config.py +102 -0
  8. roampal/backend/modules/memory/content_graph.py +543 -0
  9. roampal/backend/modules/memory/context_service.py +455 -0
  10. roampal/backend/modules/memory/embedding_service.py +96 -0
  11. roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
  12. roampal/backend/modules/memory/memory_bank_service.py +433 -0
  13. roampal/backend/modules/memory/memory_types.py +296 -0
  14. roampal/backend/modules/memory/outcome_service.py +400 -0
  15. roampal/backend/modules/memory/promotion_service.py +473 -0
  16. roampal/backend/modules/memory/routing_service.py +444 -0
  17. roampal/backend/modules/memory/scoring_service.py +324 -0
  18. roampal/backend/modules/memory/search_service.py +646 -0
  19. roampal/backend/modules/memory/tests/__init__.py +1 -0
  20. roampal/backend/modules/memory/tests/conftest.py +12 -0
  21. roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
  22. roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
  23. roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
  24. roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
  25. roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
  26. roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
  27. roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
  28. roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
  29. roampal/backend/modules/memory/unified_memory_system.py +1277 -0
  30. roampal/cli.py +638 -0
  31. roampal/hooks/__init__.py +16 -0
  32. roampal/hooks/session_manager.py +587 -0
  33. roampal/hooks/stop_hook.py +176 -0
  34. roampal/hooks/user_prompt_submit_hook.py +103 -0
  35. roampal/mcp/__init__.py +7 -0
  36. roampal/mcp/server.py +611 -0
  37. roampal/server/__init__.py +7 -0
  38. roampal/server/main.py +744 -0
  39. roampal-0.1.4.dist-info/METADATA +179 -0
  40. roampal-0.1.4.dist-info/RECORD +44 -0
  41. roampal-0.1.4.dist-info/WHEEL +5 -0
  42. roampal-0.1.4.dist-info/entry_points.txt +2 -0
  43. roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
  44. roampal-0.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1052 @@
1
+ """
2
+ Knowledge Graph Service - Manages dual KG system (Routing KG + Content KG).
3
+
4
+ Extracted from UnifiedMemorySystem as part of refactoring.
5
+ Includes race condition fix for debounced KG saves.
6
+
7
+ Responsibilities:
8
+ - Loading/saving both routing KG and content KG
9
+ - Concept extraction from text
10
+ - Building concept relationships
11
+ - Tracking problem-solution patterns
12
+ - KG cleanup operations
13
+ - Entity/relationship queries for visualization
14
+ """
15
+
16
+ import asyncio
17
+ import json
18
+ import logging
19
+ import math
20
+ import re
21
+ from collections import defaultdict
22
+ from datetime import datetime
23
+ from pathlib import Path
24
+ from typing import Any, Callable, Dict, List, Optional, Set
25
+
26
+ from filelock import FileLock
27
+
28
+ from .config import MemoryConfig
29
+ from .content_graph import ContentGraph
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class KnowledgeGraphService:
35
+ """
36
+ Manages dual Knowledge Graph system.
37
+
38
+ Dual KG Architecture:
39
+ - Routing KG: Query patterns -> collection routing decisions
40
+ - Content KG: Memory content -> entity relationships
41
+
42
+ Includes race condition fix: Uses asyncio.Lock to protect
43
+ concurrent access to _kg_save_task in debounced saves.
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ kg_path: Path,
49
+ content_graph_path: Path,
50
+ relationships_path: Path,
51
+ config: Optional[MemoryConfig] = None,
52
+ ):
53
+ """
54
+ Initialize KnowledgeGraphService.
55
+
56
+ Args:
57
+ kg_path: Path to routing KG JSON file
58
+ content_graph_path: Path to content KG JSON file
59
+ relationships_path: Path to memory relationships JSON file
60
+ config: Optional MemoryConfig for thresholds
61
+ """
62
+ self.config = config or MemoryConfig()
63
+ self.kg_path = kg_path
64
+ self.content_graph_path = content_graph_path
65
+ self.relationships_path = relationships_path
66
+
67
+ # Load graphs
68
+ self.knowledge_graph = self._load_kg()
69
+ self.content_graph = self._load_content_graph()
70
+ self.relationships = self._load_relationships()
71
+
72
+ # Debounced save state with RACE CONDITION FIX
73
+ self._kg_save_task: Optional[asyncio.Task] = None
74
+ self._kg_save_pending = False
75
+ self._kg_save_lock = asyncio.Lock() # FIX: Protect concurrent task access
76
+
77
+ logger.info(f"KnowledgeGraphService initialized from {kg_path}")
78
+
79
+ # =========================================================================
80
+ # Loading Methods
81
+ # =========================================================================
82
+
83
+ def _load_content_graph(self) -> ContentGraph:
84
+ """
85
+ Load Content Knowledge Graph from disk.
86
+
87
+ CRITICAL: This is a core feature for entity relationship mapping.
88
+ Do not disable or remove - required for dual KG visualization.
89
+ """
90
+ if self.content_graph_path.exists():
91
+ try:
92
+ return ContentGraph.load_from_file(str(self.content_graph_path))
93
+ except Exception as e:
94
+ logger.warning(f"Failed to load content graph, creating new: {e}")
95
+ return ContentGraph()
96
+
97
+ def _load_kg(self) -> Dict[str, Any]:
98
+ """Load knowledge graph routing patterns."""
99
+ default_kg = {
100
+ "routing_patterns": {}, # concept -> best_collection
101
+ "success_rates": {}, # collection -> success_rate
102
+ "failure_patterns": {}, # concept -> failure_reasons
103
+ "problem_categories": {}, # problem_type -> preferred_collections
104
+ "problem_solutions": {}, # problem_signature -> [solution_ids]
105
+ "solution_patterns": {}, # pattern_hash -> {problem, solution, success_rate}
106
+ # v0.2.1 Causal Learning: Action-level effectiveness tracking
107
+ "context_action_effectiveness": {} # (context, action, collection) -> {success, fail, success_rate}
108
+ }
109
+
110
+ if self.kg_path.exists():
111
+ try:
112
+ with open(self.kg_path, 'r') as f:
113
+ loaded_kg = json.load(f)
114
+ # Ensure all required keys exist
115
+ for key in default_kg:
116
+ if key not in loaded_kg:
117
+ loaded_kg[key] = default_kg[key]
118
+ return loaded_kg
119
+ except Exception:
120
+ pass
121
+ return default_kg
122
+
123
+ def _load_relationships(self) -> Dict[str, Any]:
124
+ """Load memory relationships."""
125
+ if self.relationships_path.exists():
126
+ try:
127
+ with open(self.relationships_path, 'r') as f:
128
+ return json.load(f)
129
+ except Exception:
130
+ pass
131
+ return {
132
+ "related": {}, # doc_id -> [related_doc_ids]
133
+ "evolution": {}, # doc_id -> {parent, children}
134
+ "conflicts": {} # doc_id -> [conflicting_doc_ids]
135
+ }
136
+
137
+ def reload_kg(self):
138
+ """Reload KG from disk to pick up changes from other processes."""
139
+ self.knowledge_graph = self._load_kg()
140
+
141
+ # =========================================================================
142
+ # Saving Methods (with race condition fix)
143
+ # =========================================================================
144
+
145
+ def _save_kg_sync(self):
146
+ """
147
+ Synchronous save both routing KG and content KG.
148
+
149
+ CRITICAL: Saves both graphs atomically to maintain consistency.
150
+ Do not remove content graph save - it's required for entity tracking.
151
+ """
152
+ # Save routing KG
153
+ lock_path = str(self.kg_path) + ".lock"
154
+ try:
155
+ with FileLock(lock_path, timeout=10):
156
+ self.kg_path.parent.mkdir(exist_ok=True, parents=True)
157
+ # Write to temp file first then rename (atomic operation)
158
+ temp_path = self.kg_path.with_suffix('.tmp')
159
+ with open(temp_path, 'w') as f:
160
+ json.dump(self.knowledge_graph, f, indent=2)
161
+ temp_path.replace(self.kg_path)
162
+ except PermissionError as e:
163
+ logger.error(f"Permission denied saving routing KG: {e}")
164
+ except Exception as e:
165
+ logger.error(f"Failed to save routing KG: {e}", exc_info=True)
166
+
167
+ # CRITICAL: Save content KG (entity relationships)
168
+ try:
169
+ self.content_graph.save_to_file(str(self.content_graph_path))
170
+ except Exception as e:
171
+ logger.error(f"Failed to save content KG: {e}", exc_info=True)
172
+
173
+ async def _save_kg(self):
174
+ """Save knowledge graph asynchronously."""
175
+ await asyncio.to_thread(self._save_kg_sync)
176
+
177
+ async def _debounced_save_kg(self):
178
+ """
179
+ Debounce KG saves to batch within 5-second window to reduce file I/O.
180
+
181
+ RACE CONDITION FIX: Uses asyncio.Lock to protect concurrent access
182
+ to _kg_save_task. Without this, multiple coroutines could both
183
+ cancel the task and create new ones simultaneously.
184
+ """
185
+ async with self._kg_save_lock: # FIX: Serialize access
186
+ # Cancel existing pending save task
187
+ if self._kg_save_task and not self._kg_save_task.done():
188
+ self._kg_save_task.cancel()
189
+ try:
190
+ await self._kg_save_task
191
+ except asyncio.CancelledError:
192
+ pass
193
+
194
+ # Create new delayed save task
195
+ async def delayed_save():
196
+ try:
197
+ await asyncio.sleep(self.config.kg_debounce_seconds)
198
+ await self._save_kg()
199
+ self._kg_save_pending = False
200
+ except asyncio.CancelledError:
201
+ pass
202
+
203
+ self._kg_save_pending = True
204
+ self._kg_save_task = asyncio.create_task(delayed_save())
205
+
206
+ async def debounced_save_kg(self):
207
+ """Public alias for debounced KG save (used by OutcomeService)."""
208
+ await self._debounced_save_kg()
209
+
210
+ def update_success_rate(self, doc_id: str, outcome: str):
211
+ """
212
+ Update success rate tracking for a document in the routing KG.
213
+
214
+ This tracks which documents (by ID) lead to successful outcomes,
215
+ enabling the routing system to prefer historically successful sources.
216
+
217
+ Args:
218
+ doc_id: Document ID that had an outcome
219
+ outcome: "worked", "failed", or "partial"
220
+ """
221
+ # Extract collection from doc_id (e.g., "working_abc123" -> "working")
222
+ parts = doc_id.split("_")
223
+ if len(parts) < 2:
224
+ return
225
+
226
+ collection = parts[0]
227
+
228
+ # Track in routing_patterns
229
+ if collection not in self.knowledge_graph["routing_patterns"]:
230
+ self.knowledge_graph["routing_patterns"][collection] = {
231
+ "successes": 0,
232
+ "failures": 0,
233
+ "partials": 0,
234
+ "total": 0,
235
+ "success_rate": 0.5
236
+ }
237
+
238
+ stats = self.knowledge_graph["routing_patterns"][collection]
239
+ stats["total"] += 1
240
+
241
+ if outcome == "worked":
242
+ stats["successes"] += 1
243
+ elif outcome == "failed":
244
+ stats["failures"] += 1
245
+ else:
246
+ stats["partials"] += 1
247
+
248
+ # Recalculate success rate (partials count as 0.5)
249
+ if stats["total"] > 0:
250
+ weighted = stats["successes"] + (stats["partials"] * 0.5)
251
+ stats["success_rate"] = weighted / stats["total"]
252
+
253
+ def add_relationship(self, doc_id: str, rel_type: str, data: Dict[str, Any]):
254
+ """
255
+ Add a relationship to the relationships tracking structure.
256
+
257
+ Args:
258
+ doc_id: Document ID
259
+ rel_type: Relationship type (e.g., "evolution", "related", "conflicts")
260
+ data: Relationship data
261
+ """
262
+ if rel_type not in self.relationships:
263
+ self.relationships[rel_type] = {}
264
+
265
+ if doc_id not in self.relationships[rel_type]:
266
+ self.relationships[rel_type][doc_id] = []
267
+
268
+ self.relationships[rel_type][doc_id].append(data)
269
+
270
+
271
+
272
+ def _save_relationships_sync(self):
273
+ """Synchronous save memory relationships - to be called in thread with file locking."""
274
+ lock_path = str(self.relationships_path) + ".lock"
275
+ try:
276
+ with FileLock(lock_path, timeout=10):
277
+ self.relationships_path.parent.mkdir(exist_ok=True, parents=True)
278
+ # Atomic write
279
+ temp_path = self.relationships_path.with_suffix('.tmp')
280
+ with open(temp_path, 'w') as f:
281
+ json.dump(self.relationships, f, indent=2)
282
+ temp_path.replace(self.relationships_path)
283
+ except PermissionError as e:
284
+ logger.error(f"Permission denied saving relationships: {e}")
285
+ except Exception as e:
286
+ logger.error(f"Failed to save relationships: {e}", exc_info=True)
287
+
288
+ async def save_relationships(self):
289
+ """Save relationships asynchronously."""
290
+ await asyncio.to_thread(self._save_relationships_sync)
291
+
292
+ # =========================================================================
293
+ # Concept Extraction
294
+ # =========================================================================
295
+
296
+ def extract_concepts(self, text: str) -> List[str]:
297
+ """
298
+ Extract N-grams (unigrams, bigrams, trigrams) from text for KG routing.
299
+ Implements architecture.md specification for concept extraction.
300
+ """
301
+ concepts: Set[str] = set()
302
+
303
+ # Normalize and tokenize
304
+ text_lower = text.lower()
305
+ # Remove punctuation except hyphens and underscores
306
+ text_clean = re.sub(r'[^\w\s\-_]', ' ', text_lower)
307
+ words = text_clean.split()
308
+
309
+ # Stop words (expanded set)
310
+ stop_words = {
311
+ "the", "a", "an", "is", "are", "was", "were", "to", "for", "of",
312
+ "with", "in", "on", "at", "by", "from", "as", "be", "this", "that",
313
+ "it", "i", "you", "we", "they", "my", "your", "our", "their", "what",
314
+ "when", "where", "how", "why", "can", "could", "would", "should"
315
+ }
316
+
317
+ # v0.2.1: Blocklist for MCP tool names and internal function-like patterns
318
+ # These pollute the Content KG with non-semantic entities
319
+ tool_blocklist = {
320
+ # MCP tool names
321
+ "search_memory", "add_to_memory_bank", "create_memory", "update_memory",
322
+ "archive_memory", "get_context_insights", "record_response", "validated",
323
+ # Common patterns from tool descriptions
324
+ "memory_bank", "working", "history", "patterns", "books",
325
+ # Internal function-like terms
326
+ "function", "parameter", "response", "request", "query", "result",
327
+ "collection", "collections", "metadata", "timestamp", "document"
328
+ }
329
+
330
+ # Filter stop words
331
+ filtered_words = [w for w in words if w not in stop_words and len(w) > 2]
332
+
333
+ # 1. Extract UNIGRAMS (single words)
334
+ for word in filtered_words:
335
+ if len(word) > 3: # Only meaningful words
336
+ concepts.add(word)
337
+
338
+ # 2. Extract BIGRAMS (2-word phrases)
339
+ for i in range(len(filtered_words) - 1):
340
+ bigram = f"{filtered_words[i]}_{filtered_words[i+1]}"
341
+ concepts.add(bigram)
342
+
343
+ # 3. Extract TRIGRAMS (3-word phrases)
344
+ for i in range(len(filtered_words) - 2):
345
+ trigram = f"{filtered_words[i]}_{filtered_words[i+1]}_{filtered_words[i+2]}"
346
+ concepts.add(trigram)
347
+
348
+ # Filter out blocklisted terms
349
+ filtered_concepts = [
350
+ c for c in concepts
351
+ if not any(blocked in c for blocked in tool_blocklist)
352
+ ]
353
+
354
+ return filtered_concepts
355
+
356
+ # =========================================================================
357
+ # Concept Relationships
358
+ # =========================================================================
359
+
360
+ def build_concept_relationships(self, concepts: List[str]):
361
+ """Build relationships between co-occurring concepts."""
362
+ if "relationships" not in self.knowledge_graph:
363
+ self.knowledge_graph["relationships"] = {}
364
+
365
+ # Build relationships between all concept pairs
366
+ for i, concept1 in enumerate(concepts):
367
+ for concept2 in concepts[i+1:]:
368
+ # Create bidirectional relationship key (sorted for consistency)
369
+ rel_key = "|".join(sorted([concept1, concept2]))
370
+
371
+ if rel_key not in self.knowledge_graph["relationships"]:
372
+ self.knowledge_graph["relationships"][rel_key] = {
373
+ "co_occurrence": 0,
374
+ "success_together": 0,
375
+ "failure_together": 0
376
+ }
377
+
378
+ # Increment co-occurrence
379
+ self.knowledge_graph["relationships"][rel_key]["co_occurrence"] += 1
380
+
381
+ async def update_kg_routing(self, query: str, collection: str, outcome: str):
382
+ """Update KG routing patterns and relationships based on outcome."""
383
+ if not query:
384
+ return
385
+
386
+ concepts = self.extract_concepts(query)
387
+
388
+ # Build relationships between concepts
389
+ self.build_concept_relationships(concepts)
390
+
391
+ for concept in concepts:
392
+ if concept not in self.knowledge_graph["routing_patterns"]:
393
+ self.knowledge_graph["routing_patterns"][concept] = {
394
+ "collections_used": {},
395
+ "best_collection": collection,
396
+ "success_rate": 0.5
397
+ }
398
+
399
+ pattern = self.knowledge_graph["routing_patterns"][concept]
400
+
401
+ # Track collection performance
402
+ if collection not in pattern["collections_used"]:
403
+ pattern["collections_used"][collection] = {
404
+ "successes": 0,
405
+ "failures": 0,
406
+ "total": 0
407
+ }
408
+
409
+ stats = pattern["collections_used"][collection]
410
+ stats["total"] += 1
411
+
412
+ if outcome == "worked":
413
+ stats["successes"] += 1
414
+ elif outcome == "failed":
415
+ stats["failures"] += 1
416
+
417
+ # Update best collection
418
+ best_collection = collection
419
+ best_rate = 0.0
420
+
421
+ for coll_name, coll_stats in pattern["collections_used"].items():
422
+ # Calculate success rate: successes / (successes + failures)
423
+ # Excludes "partial" and "unknown" outcomes per v0.1.6 spec
424
+ total_with_feedback = coll_stats["successes"] + coll_stats["failures"]
425
+
426
+ if total_with_feedback > 0:
427
+ rate = coll_stats["successes"] / total_with_feedback
428
+ else:
429
+ rate = 0.5 # Neutral baseline (50%) for untested patterns
430
+
431
+ if rate > best_rate:
432
+ best_rate = rate
433
+ best_collection = coll_name
434
+
435
+ pattern["best_collection"] = best_collection
436
+ # Default to 0.5 if no collections have been tested with explicit feedback
437
+ pattern["success_rate"] = best_rate if best_rate > 0 else 0.5
438
+
439
+ # Update relationship outcomes
440
+ for i, concept1 in enumerate(concepts):
441
+ for concept2 in concepts[i+1:]:
442
+ rel_key = "|".join(sorted([concept1, concept2]))
443
+ if rel_key in self.knowledge_graph.get("relationships", {}):
444
+ rel_data = self.knowledge_graph["relationships"][rel_key]
445
+ if outcome == "worked":
446
+ rel_data["success_together"] += 1
447
+ elif outcome == "failed":
448
+ rel_data["failure_together"] += 1
449
+
450
+ # Save KG with proper await (debounced)
451
+ await self._debounced_save_kg()
452
+
453
+
454
+ def add_problem_category(self, problem_key: str, doc_id: str):
455
+ """Add a document to a problem category."""
456
+ if "problem_categories" not in self.knowledge_graph:
457
+ self.knowledge_graph["problem_categories"] = {}
458
+
459
+ if problem_key not in self.knowledge_graph["problem_categories"]:
460
+ self.knowledge_graph["problem_categories"][problem_key] = []
461
+
462
+ if doc_id not in self.knowledge_graph["problem_categories"][problem_key]:
463
+ self.knowledge_graph["problem_categories"][problem_key].append(doc_id)
464
+
465
+ def get_problem_categories(self) -> Dict[str, List[str]]:
466
+ """Get all problem categories."""
467
+ return self.knowledge_graph.get("problem_categories", {})
468
+
469
+ # =========================================================================
470
+ # Problem-Solution Tracking
471
+ # =========================================================================
472
+
473
+ async def find_known_solutions(self, query: str, get_fragment_fn: Callable) -> List[Dict[str, Any]]:
474
+ """
475
+ Find known solutions for similar problems.
476
+
477
+ Args:
478
+ query: Search query text
479
+ get_fragment_fn: Function to retrieve document by ID (collection_name, doc_id) -> doc
480
+ """
481
+ try:
482
+ if not query:
483
+ return []
484
+
485
+ # Extract concepts from the query
486
+ query_concepts = self.extract_concepts(query)
487
+ query_signature = "_".join(sorted(query_concepts[:5]))
488
+
489
+ known_solutions = []
490
+
491
+ # Ensure problem_solutions exists in knowledge graph
492
+ if "problem_solutions" not in self.knowledge_graph:
493
+ self.knowledge_graph["problem_solutions"] = {}
494
+
495
+ # Look for exact problem matches
496
+ if query_signature in self.knowledge_graph["problem_solutions"]:
497
+ solutions = self.knowledge_graph["problem_solutions"][query_signature]
498
+
499
+ # Sort by success count and recency
500
+ sorted_solutions = sorted(
501
+ solutions,
502
+ key=lambda x: (x.get("success_count", 0), x.get("last_used", "")),
503
+ reverse=True
504
+ )
505
+
506
+ # Add top solutions to results
507
+ for solution in sorted_solutions[:3]:
508
+ doc_id = solution.get("doc_id")
509
+ if doc_id:
510
+ # Try to find the actual document
511
+ for coll_name in ["patterns", "history", "memory_bank", "books"]:
512
+ if doc_id.startswith(coll_name):
513
+ doc = get_fragment_fn(coll_name, doc_id)
514
+ if doc:
515
+ # Boost the score for known solutions
516
+ doc["distance"] = doc.get("distance", 1.0) * 0.5 # 50% boost
517
+ doc["is_known_solution"] = True
518
+ doc["solution_success_count"] = solution.get("success_count", 0)
519
+ known_solutions.append(doc)
520
+ logger.info(f"Found known solution: {doc_id} (used {solution['success_count']} times)")
521
+ break
522
+
523
+ # Also check for partial matches (3+ concept overlap)
524
+ for problem_sig, solutions in self.knowledge_graph["problem_solutions"].items():
525
+ if problem_sig != query_signature:
526
+ problem_concepts_stored = set(problem_sig.split("_"))
527
+ overlap = len(set(query_concepts) & problem_concepts_stored)
528
+
529
+ if overlap >= 3: # Significant overlap
530
+ for solution in solutions[:1]: # Take best from partial matches
531
+ doc_id = solution.get("doc_id")
532
+ if doc_id and doc_id not in [s.get("id") for s in known_solutions]:
533
+ for coll_name in ["patterns", "history", "memory_bank", "books"]:
534
+ if doc_id.startswith(coll_name):
535
+ doc = get_fragment_fn(coll_name, doc_id)
536
+ if doc:
537
+ doc["distance"] = doc.get("distance", 1.0) * 0.7 # 30% boost
538
+ doc["is_partial_solution"] = True
539
+ doc["concept_overlap"] = overlap
540
+ known_solutions.append(doc)
541
+ break
542
+
543
+ return known_solutions
544
+
545
+ except Exception as e:
546
+ logger.error(f"Error finding known solutions: {e}")
547
+ return []
548
+
549
+ async def track_problem_solution(
550
+ self,
551
+ doc_id: str,
552
+ metadata: Dict[str, Any],
553
+ context: Optional[Dict[str, Any]]
554
+ ):
555
+ """Track successful problem->solution patterns for future reuse."""
556
+ try:
557
+ # Extract problem signature from the original query/context
558
+ problem_text = metadata.get("original_context", "") or metadata.get("query", "")
559
+ solution_text = metadata.get("text", "")
560
+
561
+ if not problem_text or not solution_text:
562
+ return
563
+
564
+ # Create problem signature (simplified concepts)
565
+ problem_concepts = self.extract_concepts(problem_text)
566
+ problem_signature = "_".join(sorted(problem_concepts[:5])) # Top 5 concepts
567
+
568
+ if not problem_signature:
569
+ return
570
+
571
+ # Store problem->solution mapping
572
+ if problem_signature not in self.knowledge_graph["problem_solutions"]:
573
+ self.knowledge_graph["problem_solutions"][problem_signature] = []
574
+
575
+ solution_entry = {
576
+ "doc_id": doc_id,
577
+ "solution": solution_text, # Store abbreviated solution
578
+ "success_count": 1,
579
+ "last_used": datetime.now().isoformat(),
580
+ "contexts": [context] if context else []
581
+ }
582
+
583
+ # Check if this solution already exists for this problem
584
+ existing_solutions = self.knowledge_graph["problem_solutions"][problem_signature]
585
+ solution_found = False
586
+
587
+ for existing in existing_solutions:
588
+ if existing["doc_id"] == doc_id:
589
+ # Update existing solution
590
+ existing["success_count"] += 1
591
+ existing["last_used"] = datetime.now().isoformat()
592
+ if context and context not in existing.get("contexts", []):
593
+ existing.setdefault("contexts", []).append(context)
594
+ solution_found = True
595
+ break
596
+
597
+ if not solution_found:
598
+ existing_solutions.append(solution_entry)
599
+
600
+ # Track solution patterns (for pattern matching)
601
+ pattern_hash = f"{problem_signature}::{doc_id}"
602
+ if pattern_hash not in self.knowledge_graph["solution_patterns"]:
603
+ self.knowledge_graph["solution_patterns"][pattern_hash] = {
604
+ "problem": problem_text,
605
+ "solution": solution_text,
606
+ "success_count": 0,
607
+ "failure_count": 0,
608
+ "contexts": []
609
+ }
610
+
611
+ pattern = self.knowledge_graph["solution_patterns"][pattern_hash]
612
+ pattern["success_count"] += 1
613
+ pattern["success_rate"] = pattern["success_count"] / (pattern["success_count"] + pattern["failure_count"])
614
+
615
+ # Save updated KG with proper await (debounced)
616
+ await self._debounced_save_kg()
617
+
618
+ logger.info(f"Tracked problem->solution pattern: {problem_signature[:30]}... -> {doc_id}")
619
+
620
+ except Exception as e:
621
+ logger.error(f"Error tracking problem->solution: {e}")
622
+
623
+ # =========================================================================
624
+ # Cleanup Methods
625
+ # =========================================================================
626
+
627
+ async def cleanup_dead_references(self, doc_exists_fn: Callable[[str], bool]) -> int:
628
+ """
629
+ Remove doc_id references that no longer exist in collections.
630
+
631
+ Args:
632
+ doc_exists_fn: Function to check if doc exists (doc_id) -> bool
633
+ """
634
+ try:
635
+ cleaned = 0
636
+
637
+ # Clean problem_categories
638
+ for problem_key, doc_ids in list(self.knowledge_graph.get("problem_categories", {}).items()):
639
+ valid_ids = [doc_id for doc_id in doc_ids if doc_exists_fn(doc_id)]
640
+ if len(valid_ids) < len(doc_ids):
641
+ cleaned += len(doc_ids) - len(valid_ids)
642
+ if valid_ids:
643
+ self.knowledge_graph["problem_categories"][problem_key] = valid_ids
644
+ else:
645
+ del self.knowledge_graph["problem_categories"][problem_key]
646
+
647
+ # Clean problem_solutions
648
+ for problem_sig, solutions in list(self.knowledge_graph.get("problem_solutions", {}).items()):
649
+ valid_solutions = [s for s in solutions if doc_exists_fn(s.get("doc_id"))]
650
+ if len(valid_solutions) < len(solutions):
651
+ cleaned += len(solutions) - len(valid_solutions)
652
+ if valid_solutions:
653
+ self.knowledge_graph["problem_solutions"][problem_sig] = valid_solutions
654
+ else:
655
+ del self.knowledge_graph["problem_solutions"][problem_sig]
656
+
657
+ # Clean routing_patterns (remove patterns with 0 total uses)
658
+ for concept, pattern in list(self.knowledge_graph.get("routing_patterns", {}).items()):
659
+ collections_used = pattern.get("collections_used", {})
660
+ total_uses = sum(stats.get("total", 0) for stats in collections_used.values())
661
+ if total_uses == 0:
662
+ del self.knowledge_graph["routing_patterns"][concept]
663
+ cleaned += 1
664
+
665
+ if cleaned > 0:
666
+ logger.info(f"KG cleanup: removed {cleaned} dead references")
667
+ await self._save_kg() # Immediate save for cleanup operation
668
+
669
+ return cleaned
670
+ except Exception as e:
671
+ logger.error(f"Error cleaning KG dead references: {e}")
672
+ return 0
673
+
674
+ async def cleanup_action_kg_for_doc_ids(self, doc_ids: List[str]) -> int:
675
+ """
676
+ Remove Action KG examples referencing specific doc_ids (v0.2.6).
677
+
678
+ Called when books are deleted to prevent stale doc_id references
679
+ in context_action_effectiveness examples.
680
+
681
+ Args:
682
+ doc_ids: List of document IDs to remove from Action KG examples
683
+
684
+ Returns:
685
+ Number of examples removed
686
+ """
687
+ if not doc_ids:
688
+ return 0
689
+
690
+ try:
691
+ doc_id_set = set(doc_ids)
692
+ cleaned = 0
693
+
694
+ for key, stats in self.knowledge_graph.get("context_action_effectiveness", {}).items():
695
+ examples = stats.get("examples", [])
696
+ original_count = len(examples)
697
+
698
+ # Filter out examples with matching doc_ids
699
+ stats["examples"] = [
700
+ ex for ex in examples
701
+ if ex.get("doc_id") not in doc_id_set
702
+ ]
703
+
704
+ cleaned += original_count - len(stats["examples"])
705
+
706
+ if cleaned > 0:
707
+ logger.info(f"Action KG cleanup: removed {cleaned} examples for deleted doc_ids")
708
+ await self._save_kg()
709
+
710
+ return cleaned
711
+ except Exception as e:
712
+ logger.error(f"Error cleaning Action KG for doc_ids: {e}")
713
+ return 0
714
+
715
+ # =========================================================================
716
+ # Entity/Relationship Queries (for visualization)
717
+ # =========================================================================
718
+
719
+ async def get_kg_entities(
720
+ self,
721
+ filter_text: Optional[str] = None,
722
+ limit: int = 200
723
+ ) -> List[Dict[str, Any]]:
724
+ """
725
+ Get entities from DUAL knowledge graph (Routing KG + Content KG merged).
726
+
727
+ CRITICAL: This merges both graphs to provide complete entity view.
728
+ - Routing KG: Query patterns -> collection routing
729
+ - Content KG: Entity relationships from memory_bank content
730
+ - Entities in both graphs get source="both" (purple nodes in UI)
731
+
732
+ NOTE: Reloads KG from disk to pick up changes from MCP process.
733
+ """
734
+ # Reload KG from disk to pick up changes from MCP process
735
+ self.reload_kg()
736
+
737
+ entities_map: Dict[str, Dict[str, Any]] = {}
738
+
739
+ # STEP 1: Get routing KG entities (query-based patterns)
740
+ for concept, pattern in self.knowledge_graph.get("routing_patterns", {}).items():
741
+ if filter_text and filter_text.lower() not in concept.lower():
742
+ continue
743
+
744
+ # Count routing connections
745
+ routing_connections = 0
746
+ for rel_key in self.knowledge_graph.get("relationships", {}).keys():
747
+ if concept in rel_key.split("|"):
748
+ routing_connections += 1
749
+
750
+ # Get total usage across all collections
751
+ collections_used = pattern.get("collections_used", {})
752
+ total_usage = sum(c.get("total", 0) for c in collections_used.values())
753
+
754
+ entities_map[concept] = {
755
+ "entity": concept,
756
+ "source": "routing", # Will be updated if also in content KG
757
+ "routing_connections": routing_connections,
758
+ "content_connections": 0,
759
+ "total_connections": routing_connections,
760
+ "success_rate": pattern.get("success_rate", 0.5),
761
+ "best_collection": pattern.get("best_collection"),
762
+ "collections_used": collections_used,
763
+ "usage_count": total_usage,
764
+ "mentions": 0, # From content KG
765
+ "last_used": pattern.get("last_used"),
766
+ "created_at": pattern.get("created_at")
767
+ }
768
+
769
+ # STEP 2: Get content KG entities (memory-based relationships)
770
+ # CRITICAL: Do not skip this step - provides green/purple nodes in UI
771
+ content_entities = self.content_graph.get_all_entities(min_mentions=1)
772
+
773
+ # v0.2.1: Blocklist for filtering out tool-like entities
774
+ tool_blocklist_terms = {
775
+ "search_memory", "add_to_memory_bank", "create_memory", "update_memory",
776
+ "archive_memory", "get_context_insights", "record_response", "validated",
777
+ "memory_bank", "working", "history", "patterns", "books",
778
+ "function", "parameter", "response", "request", "query", "result",
779
+ "collection", "collections", "metadata", "timestamp", "document"
780
+ }
781
+
782
+ for entity_data in content_entities:
783
+ entity_name = entity_data["entity"]
784
+
785
+ # v0.2.1: Skip entities that look like tool names
786
+ is_tool_like = any(term in entity_name for term in tool_blocklist_terms)
787
+ if is_tool_like:
788
+ continue
789
+
790
+ if filter_text and filter_text.lower() not in entity_name.lower():
791
+ continue
792
+
793
+ # Count content connections
794
+ content_rels = self.content_graph.get_entity_relationships(entity_name, min_strength=0.0)
795
+ content_connections = len(content_rels)
796
+
797
+ if entity_name in entities_map:
798
+ # Entity exists in BOTH graphs -> source="both" (purple node)
799
+ entities_map[entity_name]["source"] = "both"
800
+ entities_map[entity_name]["content_connections"] = content_connections
801
+ entities_map[entity_name]["total_connections"] += content_connections
802
+ entities_map[entity_name]["mentions"] = entity_data["mentions"]
803
+ else:
804
+ # Entity only in content KG -> source="content" (green node)
805
+ entities_map[entity_name] = {
806
+ "entity": entity_name,
807
+ "source": "content", # Content KG only
808
+ "routing_connections": 0,
809
+ "content_connections": content_connections,
810
+ "total_connections": content_connections,
811
+ "success_rate": 0.5, # Neutral (no routing data)
812
+ "best_collection": "memory_bank", # Content entities are from memory_bank
813
+ "collections_used": {"memory_bank": {"total": entity_data["mentions"]}},
814
+ "usage_count": entity_data["mentions"],
815
+ "mentions": entity_data["mentions"],
816
+ "last_used": entity_data.get("last_seen"),
817
+ "created_at": entity_data.get("first_seen")
818
+ }
819
+
820
+ # STEP 3: Get Action Effectiveness KG entities (context|action|collection patterns)
821
+ # v0.2.1: Orange nodes showing action success rates per context
822
+ for key, stats in self.knowledge_graph.get("context_action_effectiveness", {}).items():
823
+ parts = key.split("|")
824
+ if len(parts) >= 2:
825
+ context_type = parts[0]
826
+ action_type = parts[1]
827
+ collection = parts[2] if len(parts) > 2 else "*"
828
+
829
+ # Create readable label
830
+ label = f"{action_type}@{context_type}"
831
+ if collection != "*":
832
+ label += f"->{collection}"
833
+
834
+ if filter_text and filter_text.lower() not in label.lower():
835
+ continue
836
+
837
+ total_uses = stats.get("successes", 0) + stats.get("failures", 0)
838
+ if total_uses == 0:
839
+ continue # Skip unused patterns
840
+
841
+ success_rate = stats.get("success_rate", 0.5)
842
+
843
+ # Don't overwrite routing/content entities with same name
844
+ if label not in entities_map:
845
+ entities_map[label] = {
846
+ "entity": label,
847
+ "source": "action", # Orange nodes for action effectiveness
848
+ "routing_connections": 0,
849
+ "content_connections": 0,
850
+ "total_connections": 0,
851
+ "success_rate": success_rate,
852
+ "best_collection": collection if collection != "*" else None,
853
+ "collections_used": {collection: {"total": total_uses, "successes": stats.get("successes", 0), "failures": stats.get("failures", 0)}},
854
+ "usage_count": total_uses,
855
+ "mentions": 0,
856
+ "last_used": stats.get("last_used"),
857
+ "created_at": stats.get("first_used"),
858
+ # Action-specific metadata
859
+ "context_type": context_type,
860
+ "action_type": action_type,
861
+ "partials": stats.get("partials", 0)
862
+ }
863
+
864
+ # Convert to list and sort by usage
865
+ entities = list(entities_map.values())
866
+ entities.sort(key=lambda x: x["usage_count"], reverse=True)
867
+ return entities[:limit]
868
+
869
+ async def get_kg_relationships(self, entity: str) -> List[Dict[str, Any]]:
870
+ """
871
+ Get relationships for a specific entity (DUAL KG merged).
872
+
873
+ CRITICAL: Merges routing + content relationships for complete view.
874
+ """
875
+ relationships_map: Dict[str, Dict[str, Any]] = {}
876
+
877
+ # STEP 1: Get routing KG relationships
878
+ for rel_key, rel_data in self.knowledge_graph.get("relationships", {}).items():
879
+ concepts = rel_key.split("|")
880
+ if entity in concepts:
881
+ related = concepts[1] if concepts[0] == entity else concepts[0]
882
+ relationships_map[related] = {
883
+ "related_entity": related,
884
+ "source": "routing", # Will update if also in content
885
+ "strength": rel_data.get("co_occurrence", 0),
886
+ "total_strength": rel_data.get("co_occurrence", 0),
887
+ "success_together": rel_data.get("success_together", 0),
888
+ "failure_together": rel_data.get("failure_together", 0),
889
+ "content_strength": 0 # From content KG
890
+ }
891
+
892
+ # STEP 2: Get content KG relationships
893
+ # CRITICAL: Do not skip - provides entity relationship visualization
894
+ content_rels = self.content_graph.get_entity_relationships(entity, min_strength=0.0)
895
+ for rel_data in content_rels:
896
+ related = rel_data["related_entity"]
897
+ content_strength = rel_data["strength"]
898
+
899
+ if related in relationships_map:
900
+ # Relationship exists in BOTH graphs
901
+ relationships_map[related]["source"] = "both"
902
+ relationships_map[related]["content_strength"] = content_strength
903
+ relationships_map[related]["total_strength"] += content_strength
904
+ else:
905
+ # Relationship only in content KG
906
+ relationships_map[related] = {
907
+ "related_entity": related,
908
+ "source": "content", # Content KG only
909
+ "strength": 0, # No routing data
910
+ "total_strength": content_strength,
911
+ "success_together": 0,
912
+ "failure_together": 0,
913
+ "content_strength": content_strength
914
+ }
915
+
916
+ relationships = list(relationships_map.values())
917
+ relationships.sort(key=lambda x: x["total_strength"], reverse=True)
918
+ return relationships
919
+
920
+ # =========================================================================
921
+ # Content Graph Integration
922
+ # =========================================================================
923
+
924
+ def add_failure_pattern(self, failure_reason: str, doc_id: str, problem_text: str):
925
+ """Track a failure pattern for learning."""
926
+ if "failure_patterns" not in self.knowledge_graph:
927
+ self.knowledge_graph["failure_patterns"] = {}
928
+
929
+ if failure_reason not in self.knowledge_graph["failure_patterns"]:
930
+ self.knowledge_graph["failure_patterns"][failure_reason] = []
931
+
932
+ self.knowledge_graph["failure_patterns"][failure_reason].append({
933
+ "doc_id": doc_id,
934
+ "problem_text": problem_text
935
+ })
936
+
937
+ def add_problem_solution(
938
+ self,
939
+ problem_signature: str,
940
+ doc_id: str,
941
+ solution_text: str,
942
+ context: Optional[Dict[str, Any]] = None
943
+ ):
944
+ """Track a successful problem->solution mapping."""
945
+ if "problem_solutions" not in self.knowledge_graph:
946
+ self.knowledge_graph["problem_solutions"] = {}
947
+
948
+ if problem_signature not in self.knowledge_graph["problem_solutions"]:
949
+ self.knowledge_graph["problem_solutions"][problem_signature] = []
950
+
951
+ # Add if not already present
952
+ existing = self.knowledge_graph["problem_solutions"][problem_signature]
953
+ if doc_id not in existing:
954
+ existing.append(doc_id)
955
+
956
+ def add_solution_pattern(
957
+ self,
958
+ doc_id: str,
959
+ solution_text: str,
960
+ score: float,
961
+ problem_keys: List[str],
962
+ solution_concepts: List[str]
963
+ ):
964
+ """Track a solution pattern for reuse."""
965
+ if "solution_patterns" not in self.knowledge_graph:
966
+ self.knowledge_graph["solution_patterns"] = {}
967
+
968
+ pattern_key = f"{doc_id}::{'_'.join(problem_keys[:3])}"
969
+
970
+ self.knowledge_graph["solution_patterns"][pattern_key] = {
971
+ "doc_id": doc_id,
972
+ "solution_text": solution_text[:500],
973
+ "score": score,
974
+ "problem_keys": problem_keys,
975
+ "solution_concepts": solution_concepts,
976
+ "uses": 1
977
+ }
978
+
979
+ def add_solution_pattern_entry(
980
+ self,
981
+ pattern_hash: str,
982
+ problem_text: str,
983
+ solution_text: str,
984
+ outcome: str
985
+ ):
986
+ """Add or update a solution pattern entry."""
987
+ if "solution_patterns" not in self.knowledge_graph:
988
+ self.knowledge_graph["solution_patterns"] = {}
989
+
990
+ if pattern_hash not in self.knowledge_graph["solution_patterns"]:
991
+ self.knowledge_graph["solution_patterns"][pattern_hash] = {
992
+ "problem_text": problem_text[:200],
993
+ "solution_text": solution_text[:500],
994
+ "successes": 0,
995
+ "failures": 0,
996
+ "success_rate": 0.5
997
+ }
998
+
999
+ entry = self.knowledge_graph["solution_patterns"][pattern_hash]
1000
+
1001
+ if outcome == "worked":
1002
+ entry["successes"] = entry.get("successes", 0) + 1
1003
+ elif outcome == "failed":
1004
+ entry["failures"] = entry.get("failures", 0) + 1
1005
+
1006
+ total = entry.get("successes", 0) + entry.get("failures", 0)
1007
+ if total > 0:
1008
+ entry["success_rate"] = entry.get("successes", 0) / total
1009
+
1010
+
1011
+ def add_entities_from_text(
1012
+ self,
1013
+ text: str,
1014
+ doc_id: str,
1015
+ collection: str,
1016
+ quality_score: Optional[float] = None
1017
+ ) -> List[str]:
1018
+ """
1019
+ Add entities from text to content graph.
1020
+ Wrapper around ContentGraph.add_entities_from_text.
1021
+ """
1022
+ return self.content_graph.add_entities_from_text(
1023
+ text=text,
1024
+ doc_id=doc_id,
1025
+ collection=collection,
1026
+ extract_concepts_fn=self.extract_concepts,
1027
+ quality_score=quality_score
1028
+ )
1029
+
1030
+ def remove_entity_mention(self, doc_id: str):
1031
+ """Remove a document's entity mentions from content graph."""
1032
+ self.content_graph.remove_entity_mention(doc_id)
1033
+
1034
+ # =========================================================================
1035
+ # Shutdown
1036
+ # =========================================================================
1037
+
1038
+ async def cleanup(self):
1039
+ """Clean shutdown - save pending changes."""
1040
+ async with self._kg_save_lock:
1041
+ if self._kg_save_task and not self._kg_save_task.done():
1042
+ self._kg_save_task.cancel()
1043
+ try:
1044
+ await self._kg_save_task
1045
+ except asyncio.CancelledError:
1046
+ pass
1047
+
1048
+ # Final save if pending
1049
+ if self._kg_save_pending:
1050
+ await self._save_kg()
1051
+
1052
+ logger.info("KnowledgeGraphService cleaned up")