roampal 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. roampal/__init__.py +29 -0
  2. roampal/__main__.py +6 -0
  3. roampal/backend/__init__.py +1 -0
  4. roampal/backend/modules/__init__.py +1 -0
  5. roampal/backend/modules/memory/__init__.py +43 -0
  6. roampal/backend/modules/memory/chromadb_adapter.py +623 -0
  7. roampal/backend/modules/memory/config.py +102 -0
  8. roampal/backend/modules/memory/content_graph.py +543 -0
  9. roampal/backend/modules/memory/context_service.py +455 -0
  10. roampal/backend/modules/memory/embedding_service.py +96 -0
  11. roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
  12. roampal/backend/modules/memory/memory_bank_service.py +433 -0
  13. roampal/backend/modules/memory/memory_types.py +296 -0
  14. roampal/backend/modules/memory/outcome_service.py +400 -0
  15. roampal/backend/modules/memory/promotion_service.py +473 -0
  16. roampal/backend/modules/memory/routing_service.py +444 -0
  17. roampal/backend/modules/memory/scoring_service.py +324 -0
  18. roampal/backend/modules/memory/search_service.py +646 -0
  19. roampal/backend/modules/memory/tests/__init__.py +1 -0
  20. roampal/backend/modules/memory/tests/conftest.py +12 -0
  21. roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
  22. roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
  23. roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
  24. roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
  25. roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
  26. roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
  27. roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
  28. roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
  29. roampal/backend/modules/memory/unified_memory_system.py +1277 -0
  30. roampal/cli.py +638 -0
  31. roampal/hooks/__init__.py +16 -0
  32. roampal/hooks/session_manager.py +587 -0
  33. roampal/hooks/stop_hook.py +176 -0
  34. roampal/hooks/user_prompt_submit_hook.py +103 -0
  35. roampal/mcp/__init__.py +7 -0
  36. roampal/mcp/server.py +611 -0
  37. roampal/server/__init__.py +7 -0
  38. roampal/server/main.py +744 -0
  39. roampal-0.1.4.dist-info/METADATA +179 -0
  40. roampal-0.1.4.dist-info/RECORD +44 -0
  41. roampal-0.1.4.dist-info/WHEEL +5 -0
  42. roampal-0.1.4.dist-info/entry_points.txt +2 -0
  43. roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
  44. roampal-0.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1277 @@
1
+ """
2
+ UnifiedMemorySystem - Facade coordinating memory services
3
+
4
+ Simplified from Roampal ui-implementation for roampal-core.
5
+ Stripped: Ollama, KG service, complex routing.
6
+ Kept: Core search, outcome tracking, memory bank operations.
7
+ """
8
+
9
+ import logging
10
+ import json
11
+ import uuid
12
+ import os
13
+ from pathlib import Path
14
+ from datetime import datetime
15
+ from typing import List, Dict, Any, Optional, Literal
16
+ from dataclasses import dataclass, field
17
+
18
+ from .config import MemoryConfig
19
+ from .chromadb_adapter import ChromaDBAdapter
20
+ from .embedding_service import EmbeddingService
21
+ from .scoring_service import ScoringService
22
+ from .outcome_service import OutcomeService
23
+ from .memory_bank_service import MemoryBankService
24
+ from .context_service import ContextService
25
+ from .promotion_service import PromotionService
26
+ from .routing_service import RoutingService
27
+ from .knowledge_graph_service import KnowledgeGraphService
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ CollectionName = Literal["books", "working", "history", "patterns", "memory_bank"]
32
+
33
+ # ContextType is any string - LLM discovers topics organically (coding, fitness, finance, etc.)
34
+ ContextType = str
35
+
36
+
37
+ @dataclass
38
+ class ActionOutcome:
39
+ """
40
+ Tracks individual action outcomes with topic-based context awareness (v0.2.1 Causal Learning).
41
+
42
+ Enables learning: "In topic X, action Y leads to outcome Z"
43
+
44
+ Examples:
45
+ - For CODING: search_memory → 92% success (searching code patterns works well)
46
+ - For FITNESS: create_memory → 88% success (storing workout logs works well)
47
+
48
+ Context is detected from conversation (coding, fitness, finance, creative_writing, etc.)
49
+ """
50
+ action_type: str # Tool name: "search_memory", "create_memory", "score_response", etc.
51
+ context_type: ContextType # Topic: "coding", "fitness", "finance", etc.
52
+ outcome: Literal["worked", "failed", "partial"]
53
+ timestamp: datetime = field(default_factory=datetime.now)
54
+
55
+ # Action details
56
+ action_params: Dict[str, Any] = field(default_factory=dict)
57
+ doc_id: Optional[str] = None
58
+ collection: Optional[str] = None
59
+
60
+ # Outcome details
61
+ failure_reason: Optional[str] = None
62
+ success_context: Optional[Dict[str, Any]] = None
63
+
64
+ # Causal attribution
65
+ chain_position: int = 0 # Position in action chain (0 = first action)
66
+ chain_length: int = 1 # Total actions in chain
67
+ caused_final_outcome: bool = True # Did this action cause the final outcome?
68
+
69
+ def to_dict(self) -> Dict[str, Any]:
70
+ """Serialize to dict for KG storage"""
71
+ return {
72
+ "action_type": self.action_type,
73
+ "context_type": self.context_type,
74
+ "outcome": self.outcome,
75
+ "timestamp": self.timestamp.isoformat(),
76
+ "action_params": self.action_params,
77
+ "doc_id": self.doc_id,
78
+ "collection": self.collection,
79
+ "failure_reason": self.failure_reason,
80
+ "success_context": self.success_context,
81
+ "chain_position": self.chain_position,
82
+ "chain_length": self.chain_length,
83
+ "caused_final_outcome": self.caused_final_outcome,
84
+ }
85
+
86
+ @classmethod
87
+ def from_dict(cls, data: Dict[str, Any]) -> "ActionOutcome":
88
+ """Deserialize from dict"""
89
+ data = data.copy()
90
+ data["timestamp"] = datetime.fromisoformat(data["timestamp"])
91
+ return cls(**data)
92
+
93
+
94
+ class UnifiedMemorySystem:
95
+ """
96
+ The unified memory system for roampal-core.
97
+
98
+ Coordinates services for:
99
+ - Multi-collection vector search
100
+ - Outcome-based learning
101
+ - Memory bank (user facts)
102
+ - Context analysis for hook injection
103
+
104
+ 5 Collections:
105
+ - books: Uploaded reference material (never decays)
106
+ - working: Current session context (session-scoped)
107
+ - history: Past conversations (auto-promoted to patterns)
108
+ - patterns: Proven solutions (what actually worked)
109
+ - memory_bank: Persistent user facts (LLM-controlled, never decays)
110
+ """
111
+
112
+ def __init__(
113
+ self,
114
+ data_path: str = None,
115
+ config: Optional[MemoryConfig] = None
116
+ ):
117
+ """
118
+ Initialize UnifiedMemorySystem.
119
+
120
+ Args:
121
+ data_path: Path for ChromaDB storage. Defaults to %APPDATA%/Roampal/data
122
+ config: Memory configuration
123
+ """
124
+ self.config = config or MemoryConfig()
125
+
126
+ # Default data path - same as Roampal Desktop
127
+ # Can override with ROAMPAL_DATA_PATH env var
128
+ # Windows: %APPDATA%/Roampal/data (or dev-data for dev mode)
129
+ # macOS: ~/Library/Application Support/Roampal/data
130
+ # Linux: ~/.local/share/Roampal/data
131
+ if data_path is None:
132
+ # Check for env override first
133
+ data_path = os.environ.get('ROAMPAL_DATA_PATH')
134
+
135
+ if data_path is None:
136
+ # Check for dev mode - matches Desktop's ROAMPAL_DATA_DIR convention
137
+ # DEV: Roampal_DEV/data, PROD: Roampal/data
138
+ dev_mode = os.environ.get('ROAMPAL_DEV', '').lower() in ('1', 'true', 'yes')
139
+ app_folder = 'Roampal_DEV' if dev_mode else 'Roampal'
140
+
141
+ if os.name == 'nt': # Windows
142
+ appdata = os.environ.get('APPDATA', os.path.expanduser('~'))
143
+ data_path = os.path.join(appdata, app_folder, 'data')
144
+ elif os.uname().sysname == 'Darwin': # macOS
145
+ data_path = os.path.join(os.path.expanduser('~'), 'Library', 'Application Support', app_folder, 'data')
146
+ else: # Linux
147
+ data_path = os.path.join(os.path.expanduser('~'), '.local', 'share', app_folder.lower(), 'data')
148
+ self.data_path = Path(data_path)
149
+ self.data_path.mkdir(parents=True, exist_ok=True)
150
+
151
+ # Services (lazy initialized)
152
+ self._embedding_service: Optional[EmbeddingService] = None
153
+ self._scoring_service: Optional[ScoringService] = None
154
+ self._promotion_service: Optional[PromotionService] = None
155
+ self._outcome_service: Optional[OutcomeService] = None
156
+ self._memory_bank_service: Optional[MemoryBankService] = None
157
+ self._context_service: Optional[ContextService] = None
158
+
159
+ # Collections
160
+ self.collections: Dict[str, ChromaDBAdapter] = {}
161
+ self.initialized = False
162
+
163
+ # Knowledge Graph - shared with Desktop
164
+ self.kg_path = self.data_path / "knowledge_graph.json"
165
+ self.knowledge_graph = self._load_kg()
166
+
167
+ def _load_kg(self) -> Dict[str, Any]:
168
+ """Load knowledge graph from disk, or return default structure."""
169
+ if self.kg_path.exists():
170
+ try:
171
+ with open(self.kg_path, 'r') as f:
172
+ return json.load(f)
173
+ except Exception as e:
174
+ logger.warning(f"Failed to load KG from {self.kg_path}: {e}")
175
+
176
+ # Default empty KG structure
177
+ return {
178
+ "routing_patterns": {},
179
+ "success_rates": {},
180
+ "failure_patterns": {},
181
+ "problem_categories": {},
182
+ "problem_solutions": {},
183
+ "solution_patterns": {},
184
+ "context_action_effectiveness": {}
185
+ }
186
+
187
+ async def initialize(self):
188
+ """Initialize collections and services."""
189
+ if self.initialized:
190
+ return
191
+
192
+ logger.info("Initializing UnifiedMemorySystem...")
193
+
194
+ # Initialize embedding service
195
+ self._embedding_service = EmbeddingService()
196
+ await self._embedding_service.prewarm()
197
+
198
+ # Initialize collections - use roampal_ prefix to match Desktop data
199
+ # Keys are short names (for code), values use roampal_ prefix in chromadb
200
+ collection_mapping = {
201
+ "books": "roampal_books",
202
+ "working": "roampal_working",
203
+ "history": "roampal_history",
204
+ "patterns": "roampal_patterns",
205
+ "memory_bank": "roampal_memory_bank"
206
+ }
207
+ for short_name, chroma_name in collection_mapping.items():
208
+ self.collections[short_name] = ChromaDBAdapter(
209
+ collection_name=chroma_name,
210
+ persist_directory=str(self.data_path / "chromadb")
211
+ )
212
+ logger.info(f"Initialized collection: {short_name} -> {chroma_name}")
213
+
214
+ # Initialize services with dependencies
215
+ self._scoring_service = ScoringService(config=self.config)
216
+
217
+ # KG service (Desktop parity)
218
+ self._kg_service = KnowledgeGraphService(
219
+ kg_path=self.data_path / "knowledge_graph.json",
220
+ content_graph_path=self.data_path / "content_graph.json",
221
+ relationships_path=self.data_path / "memory_relationships.json",
222
+ config=self.config
223
+ )
224
+
225
+ # Routing service (Desktop parity)
226
+ self._routing_service = RoutingService(
227
+ kg_service=self._kg_service,
228
+ config=self.config
229
+ )
230
+
231
+ # Initialize PromotionService for auto-deletion/promotion when scores drop
232
+ self._promotion_service = PromotionService(
233
+ collections=self.collections,
234
+ embed_fn=self._embedding_service.embed_text,
235
+ config=self.config
236
+ )
237
+
238
+ # OutcomeService wired to PromotionService and KG - garbage now gets deleted
239
+ self._outcome_service = OutcomeService(
240
+ collections=self.collections,
241
+ kg_service=self._kg_service,
242
+ promotion_service=self._promotion_service,
243
+ config=self.config
244
+ )
245
+
246
+ self._memory_bank_service = MemoryBankService(
247
+ collection=self.collections["memory_bank"],
248
+ embed_fn=self._embedding_service.embed_text,
249
+ config=self.config
250
+ )
251
+
252
+ self._context_service = ContextService(
253
+ collections=self.collections,
254
+ embed_fn=self._embedding_service.embed_text,
255
+ config=self.config
256
+ )
257
+
258
+ self.initialized = True
259
+ logger.info("UnifiedMemorySystem initialized successfully")
260
+
261
+ # Startup cleanup: delete garbage memories (score < 0.2)
262
+ await self._startup_cleanup()
263
+
264
+ async def _startup_cleanup(self):
265
+ """
266
+ Delete garbage memories on startup.
267
+
268
+ Scans working, history, and patterns collections for items with score < 0.2
269
+ and deletes them. This ensures garbage doesn't pile up between sessions.
270
+ """
271
+ deleted_count = 0
272
+ collections_to_clean = ["working", "history", "patterns"]
273
+
274
+ for coll_name in collections_to_clean:
275
+ adapter = self.collections.get(coll_name)
276
+ if not adapter or not adapter.collection:
277
+ continue
278
+
279
+ try:
280
+ # Get all documents in collection
281
+ results = adapter.collection.get(include=["metadatas"])
282
+ ids = results.get("ids", [])
283
+ metadatas = results.get("metadatas", [])
284
+
285
+ ids_to_delete = []
286
+ for i, doc_id in enumerate(ids):
287
+ if i < len(metadatas):
288
+ score = metadatas[i].get("score", 0.5)
289
+ if score < self.config.deletion_score_threshold:
290
+ ids_to_delete.append(doc_id)
291
+
292
+ if ids_to_delete:
293
+ adapter.delete_vectors(ids_to_delete)
294
+ deleted_count += len(ids_to_delete)
295
+ logger.info(f"Startup cleanup: deleted {len(ids_to_delete)} garbage items from {coll_name}")
296
+
297
+ except Exception as e:
298
+ logger.warning(f"Startup cleanup error for {coll_name}: {e}")
299
+
300
+ if deleted_count > 0:
301
+ logger.info(f"Startup cleanup: {deleted_count} garbage deleted")
302
+
303
+ # Also clean old working memories (> 24 hours)
304
+ await self._promotion_service.cleanup_old_working_memory(max_age_hours=24.0)
305
+
306
+ # ==================== Core Search ====================
307
+
308
+ async def search(
309
+ self,
310
+ query: str,
311
+ limit: int = 10,
312
+ collections: Optional[List[CollectionName]] = None,
313
+ metadata_filters: Optional[Dict[str, Any]] = None
314
+ ) -> List[Dict[str, Any]]:
315
+ """
316
+ Search memory with optional collection filtering.
317
+
318
+ Args:
319
+ query: Search query
320
+ limit: Max results per collection
321
+ collections: Which collections to search (default: all)
322
+ metadata_filters: Optional metadata filters
323
+
324
+ Returns:
325
+ Ranked results with scores
326
+ """
327
+ if not self.initialized:
328
+ await self.initialize()
329
+
330
+ if collections is None:
331
+ collections = list(self.collections.keys())
332
+
333
+ # Get query embedding
334
+ query_vector = await self._embedding_service.embed_text(query)
335
+
336
+ all_results = []
337
+ for coll_name in collections:
338
+ if coll_name not in self.collections:
339
+ continue
340
+
341
+ try:
342
+ results = await self.collections[coll_name].query_vectors(
343
+ query_vector=query_vector,
344
+ top_k=limit,
345
+ filters=metadata_filters
346
+ )
347
+
348
+ for r in results:
349
+ # Add collection info
350
+ r["collection"] = coll_name
351
+
352
+ # Get metadata and calculate base similarity
353
+ metadata = r.get("metadata", {})
354
+ distance = r.get("distance", 1.0)
355
+ embedding_similarity = 1.0 / (1.0 + distance)
356
+
357
+ # Calculate quality score based on collection type
358
+ if coll_name == "memory_bank":
359
+ # Memory bank: use importance × confidence
360
+ importance = float(metadata.get("importance", 0.7))
361
+ confidence = float(metadata.get("confidence", 0.7))
362
+ quality = importance * confidence
363
+ else:
364
+ # Other collections: use learned score from outcome history
365
+ quality = float(metadata.get("score", 0.5))
366
+
367
+ # Apply quality-weighted scoring (Desktop-compatible formula)
368
+ # Distance boost: adjust distance by quality
369
+ quality_boost = 1.0 - (quality * 0.8) # High quality = lower effective distance
370
+ adjusted_distance = distance * quality_boost
371
+ adjusted_similarity = 1.0 / (1.0 + adjusted_distance)
372
+
373
+ # Final score: blend embedding similarity with quality
374
+ # This ensures high-quality results rank above low-quality semantic matches
375
+ final_rank_score = adjusted_similarity * (1.0 + quality)
376
+
377
+ # Store Desktop-compatible field names
378
+ r["embedding_similarity"] = embedding_similarity
379
+ r["final_rank_score"] = final_rank_score
380
+ r["quality"] = quality
381
+
382
+ # Keep legacy fields for backwards compatibility
383
+ r["combined_score"] = final_rank_score
384
+ r["similarity"] = embedding_similarity
385
+
386
+ all_results.append(r)
387
+
388
+ except Exception as e:
389
+ logger.warning(f"Error searching {coll_name}: {e}")
390
+
391
+ # Sort by final_rank_score (quality-weighted)
392
+ all_results.sort(key=lambda x: x.get("final_rank_score", 0), reverse=True)
393
+
394
+ return all_results[:limit * 2] # Return top across all collections
395
+
396
+ # ==================== Generic Store (Desktop-compatible wrapper) ====================
397
+
398
+ async def store(
399
+ self,
400
+ text: str,
401
+ collection: str = "working",
402
+ metadata: Optional[Dict[str, Any]] = None
403
+ ) -> str:
404
+ """
405
+ Store text in a collection (Desktop-compatible wrapper).
406
+
407
+ Args:
408
+ text: Text to store
409
+ collection: Target collection (working, books, history, patterns, memory_bank)
410
+ metadata: Optional metadata
411
+
412
+ Returns:
413
+ Document ID
414
+ """
415
+ if not self.initialized:
416
+ await self.initialize()
417
+
418
+ # Route to appropriate collection-specific method
419
+ if collection == "working":
420
+ return await self.store_working(content=text, metadata=metadata)
421
+ elif collection == "books":
422
+ # store_book returns list of chunk IDs; return first for Desktop compat
423
+ chunk_ids = await self.store_book(text)
424
+ return chunk_ids[0] if chunk_ids else None
425
+ elif collection == "memory_bank":
426
+ tags = metadata.get("tags", []) if metadata else []
427
+ importance = metadata.get("importance", 0.7) if metadata else 0.7
428
+ confidence = metadata.get("confidence", 0.7) if metadata else 0.7
429
+ return await self.store_memory_bank(text, tags, importance, confidence)
430
+ elif collection in ("patterns", "history"):
431
+ # Direct store to patterns/history (Desktop-compatible)
432
+ import uuid
433
+ doc_id = f"{collection}_{uuid.uuid4().hex[:8]}"
434
+ final_metadata = {
435
+ "text": text,
436
+ "content": text,
437
+ "score": 0.5,
438
+ "uses": 0,
439
+ "timestamp": datetime.now().isoformat(),
440
+ **(metadata or {})
441
+ }
442
+ embedding = await self._embedding_service.embed_text(text)
443
+ await self.collections[collection].upsert_vectors(
444
+ ids=[doc_id],
445
+ vectors=[embedding],
446
+ metadatas=[final_metadata]
447
+ )
448
+ return doc_id
449
+ else:
450
+ # Unknown collection - default to working
451
+ return await self.store_working(content=text, metadata=metadata)
452
+
453
+ # ==================== Memory Bank Operations ====================
454
+
455
+ async def store_memory_bank(
456
+ self,
457
+ text: str,
458
+ tags: List[str] = None,
459
+ importance: float = 0.7,
460
+ confidence: float = 0.7
461
+ ) -> str:
462
+ """
463
+ Store a fact in memory_bank.
464
+
465
+ Args:
466
+ text: The fact to remember
467
+ tags: Categories (identity, preference, goal, project)
468
+ importance: How critical (0.0-1.0)
469
+ confidence: How certain (0.0-1.0)
470
+
471
+ Returns:
472
+ Document ID
473
+ """
474
+ if not self.initialized:
475
+ await self.initialize()
476
+
477
+ return await self._memory_bank_service.store(
478
+ text=text,
479
+ tags=tags or [],
480
+ importance=importance,
481
+ confidence=confidence
482
+ )
483
+
484
+ async def update_memory_bank(
485
+ self,
486
+ old_content: str,
487
+ new_content: str
488
+ ) -> Optional[str]:
489
+ """
490
+ Update a memory_bank entry.
491
+
492
+ Args:
493
+ old_content: Content to find (semantic match)
494
+ new_content: New content
495
+
496
+ Returns:
497
+ Document ID or None if not found
498
+ """
499
+ if not self.initialized:
500
+ await self.initialize()
501
+
502
+ return await self._memory_bank_service.update(old_content, new_content)
503
+
504
+ async def archive_memory_bank(self, content: str) -> bool:
505
+ """
506
+ Archive a memory_bank entry.
507
+
508
+ Args:
509
+ content: Content to archive (semantic match)
510
+
511
+ Returns:
512
+ Success status
513
+ """
514
+ if not self.initialized:
515
+ await self.initialize()
516
+
517
+ return await self._memory_bank_service.archive(content)
518
+
519
+ # ==================== Outcome Recording ====================
520
+
521
+ async def record_outcome(
522
+ self,
523
+ doc_ids: List[str] = None,
524
+ outcome: Literal["worked", "failed", "partial"] = "worked",
525
+ failure_reason: Optional[str] = None,
526
+ # Desktop-compatible parameters
527
+ doc_id: str = None,
528
+ context: Dict[str, Any] = None
529
+ ) -> Dict[str, Any]:
530
+ """
531
+ Record outcome for searched documents.
532
+
533
+ This updates scores for memories that were used in a response.
534
+ Called by record_response MCP tool.
535
+
536
+ Args:
537
+ doc_ids: Documents that were used (Core style)
538
+ outcome: Whether the response worked
539
+ failure_reason: Reason if failed
540
+ doc_id: Single document ID (Desktop-compatible)
541
+ context: Additional context (Desktop-compatible, unused)
542
+
543
+ Returns:
544
+ Summary of updates
545
+ """
546
+ if not self.initialized:
547
+ await self.initialize()
548
+
549
+ # Desktop compatibility: handle single doc_id
550
+ if doc_id is not None and doc_ids is None:
551
+ doc_ids = [doc_id]
552
+ elif doc_ids is None:
553
+ doc_ids = []
554
+
555
+ updates = []
556
+ for single_doc_id in doc_ids:
557
+ result = await self._outcome_service.record_outcome(
558
+ doc_id=single_doc_id,
559
+ outcome=outcome,
560
+ failure_reason=failure_reason
561
+ )
562
+ if result:
563
+ updates.append({
564
+ "doc_id": single_doc_id,
565
+ "new_score": result.get("score"),
566
+ "outcome": outcome
567
+ })
568
+
569
+ logger.info(f"Recorded outcome '{outcome}' for {len(updates)} documents")
570
+ return {
571
+ "outcome": outcome,
572
+ "documents_updated": len(updates),
573
+ "updates": updates
574
+ }
575
+
576
+ # ==================== Context Analysis (for Hooks) ====================
577
+
578
+ async def get_context_for_injection(
579
+ self,
580
+ query: str,
581
+ conversation_id: str = None,
582
+ recent_conversation: List[Dict[str, Any]] = None
583
+ ) -> Dict[str, Any]:
584
+ """
585
+ Get context to inject into LLM prompt via hooks.
586
+
587
+ Uses KG-routed unified search: searches ALL collections, ranks by Wilson score,
588
+ returns top 5 most relevant/proven memories regardless of collection.
589
+
590
+ Args:
591
+ query: The user's message
592
+ conversation_id: Current conversation ID
593
+ recent_conversation: Recent messages for continuity
594
+
595
+ Returns:
596
+ Dict with memories, doc_ids for scoring, and formatted injection
597
+ """
598
+ if not self.initialized:
599
+ await self.initialize()
600
+
601
+ result = {
602
+ "memories": [],
603
+ "formatted_injection": "",
604
+ "doc_ids": []
605
+ }
606
+
607
+ # 1. Extract concepts for KG routing insight
608
+ concepts = self._extract_concepts(query)
609
+
610
+ # 2. Get KG recommendations (informational - we still search all)
611
+ kg_recs = self.get_tier_recommendations(concepts)
612
+
613
+ # 3. Unified search across ALL collections
614
+ all_collections = ["working", "patterns", "history", "books", "memory_bank"]
615
+ search_results = await self.search(
616
+ query=query,
617
+ limit=5,
618
+ collections=all_collections
619
+ )
620
+
621
+ # 4. Apply Wilson scoring for proper ranking
622
+ scored_results = self._scoring_service.apply_scoring_to_results(search_results)
623
+
624
+ # 5. Take top 5 across all collections
625
+ top_memories = scored_results[:5]
626
+
627
+ # 6. Enrich with Action KG effectiveness stats
628
+ for mem in top_memories:
629
+ coll = mem.get("collection", "unknown")
630
+ eff = self.get_action_effectiveness("general", "search", coll)
631
+ if eff:
632
+ mem["effectiveness"] = eff.get("success_rate", 0)
633
+
634
+ result["memories"] = top_memories
635
+ result["relevant_memories"] = top_memories # Alias for selective scoring in hooks
636
+ result["doc_ids"] = [m.get("id") for m in top_memories if m.get("id")]
637
+ result["formatted_injection"] = self._format_context_injection(result)
638
+
639
+ return result
640
+
641
+
642
+ def _format_context_injection(self, context: Dict[str, Any]) -> str:
643
+ """
644
+ Format context for injection into LLM prompt.
645
+
646
+ Shows top 5 memories across all collections with effectiveness stats.
647
+ """
648
+ parts = []
649
+
650
+ memories = context.get("memories", [])
651
+ if memories:
652
+ parts.append("═══ KNOWN CONTEXT ═══")
653
+ for mem in memories[:5]:
654
+ # Get content from various possible locations
655
+ content = mem.get("content") or mem.get("text") or mem.get("metadata", {}).get("text", "")
656
+ collection = mem.get("collection", "unknown")
657
+
658
+ # Get Wilson score and effectiveness
659
+ wilson = mem.get("wilson_score", 0)
660
+ effectiveness = mem.get("effectiveness", 0)
661
+
662
+ # Format with collection and score info
663
+ if wilson >= 0.7:
664
+ parts.append(f"• {content} ({int(wilson*100)}% proven, {collection})")
665
+ elif effectiveness > 0:
666
+ parts.append(f"• {content} ({int(effectiveness*100)}% effective, {collection})")
667
+ else:
668
+ parts.append(f"• {content} ({collection})")
669
+
670
+ parts.append("═══ END CONTEXT ═══")
671
+ parts.append("")
672
+ return "\n".join(parts)
673
+
674
+ return ""
675
+
676
+ # ==================== Book/Document Operations ====================
677
+
678
+ async def store_book(
679
+ self,
680
+ content: str,
681
+ title: str = None,
682
+ source: str = None,
683
+ chunk_size: int = 1000,
684
+ chunk_overlap: int = 200
685
+ ) -> List[str]:
686
+ """
687
+ Store a document in the books collection.
688
+
689
+ Documents are chunked for better retrieval.
690
+
691
+ Args:
692
+ content: Full document text
693
+ title: Document title
694
+ source: Source file path or URL
695
+ chunk_size: Characters per chunk
696
+ chunk_overlap: Overlap between chunks
697
+
698
+ Returns:
699
+ List of chunk document IDs
700
+ """
701
+ if not self.initialized:
702
+ await self.initialize()
703
+
704
+ # Simple chunking by character count with overlap
705
+ chunks = []
706
+ start = 0
707
+ while start < len(content):
708
+ end = start + chunk_size
709
+ chunk = content[start:end]
710
+ chunks.append(chunk)
711
+ start = end - chunk_overlap
712
+ if start < 0:
713
+ start = 0
714
+
715
+ doc_ids = []
716
+ base_id = f"book_{uuid.uuid4().hex[:8]}"
717
+
718
+ for i, chunk in enumerate(chunks):
719
+ doc_id = f"{base_id}_chunk_{i}"
720
+ embedding = await self._embedding_service.embed_text(chunk)
721
+
722
+ meta = {
723
+ "content": chunk,
724
+ "text": chunk,
725
+ "title": title or "Untitled",
726
+ "source": source or "unknown",
727
+ "chunk_index": i,
728
+ "total_chunks": len(chunks),
729
+ "created_at": datetime.now().isoformat()
730
+ }
731
+
732
+ await self.collections["books"].upsert_vectors(
733
+ ids=[doc_id],
734
+ vectors=[embedding],
735
+ metadatas=[meta]
736
+ )
737
+ doc_ids.append(doc_id)
738
+
739
+ logger.info(f"Stored book '{title}' in {len(chunks)} chunks")
740
+ return doc_ids
741
+
742
+ async def remove_book(self, title: str) -> Dict[str, Any]:
743
+ """
744
+ Remove a book by title.
745
+
746
+ Deletes all chunks from ChromaDB and cleans Action KG references.
747
+
748
+ Args:
749
+ title: The book title to remove
750
+
751
+ Returns:
752
+ Dict with removal stats
753
+ """
754
+ if not self.initialized:
755
+ await self.initialize()
756
+
757
+ books_collection = self.collections.get("books")
758
+ if not books_collection or books_collection.collection is None:
759
+ return {"removed": 0, "error": "Books collection not initialized"}
760
+
761
+ # Find all chunks with this title
762
+ try:
763
+ results = books_collection.collection.get(
764
+ where={"title": title},
765
+ include=["metadatas"]
766
+ )
767
+ except Exception as e:
768
+ return {"removed": 0, "error": str(e)}
769
+
770
+ doc_ids = results.get("ids", [])
771
+ if not doc_ids:
772
+ return {"removed": 0, "message": f"No book found with title '{title}'"}
773
+
774
+ # Delete from ChromaDB
775
+ try:
776
+ books_collection.delete_vectors(doc_ids)
777
+ logger.info(f"Removed {len(doc_ids)} chunks for book '{title}'")
778
+ except Exception as e:
779
+ logger.error(f"Failed to delete book chunks from ChromaDB: {e}")
780
+ return {"removed": 0, "error": f"ChromaDB delete failed: {str(e)}"}
781
+
782
+ # Clean Action KG references
783
+ cleaned_refs = await self.cleanup_action_kg_for_doc_ids(doc_ids)
784
+
785
+ return {
786
+ "removed": len(doc_ids),
787
+ "title": title,
788
+ "cleaned_kg_refs": cleaned_refs
789
+ }
790
+
791
+ async def cleanup_action_kg_for_doc_ids(self, doc_ids: List[str]) -> int:
792
+ """
793
+ Remove Action KG examples that reference specific doc_ids.
794
+
795
+ Called when books are deleted to maintain KG integrity.
796
+
797
+ Args:
798
+ doc_ids: List of document IDs being deleted
799
+
800
+ Returns:
801
+ Number of examples removed
802
+ """
803
+ if not doc_ids:
804
+ return 0
805
+
806
+ try:
807
+ doc_id_set = set(doc_ids)
808
+ cleaned = 0
809
+
810
+ for key, stats in self.knowledge_graph.get("context_action_effectiveness", {}).items():
811
+ examples = stats.get("examples", [])
812
+ original_count = len(examples)
813
+ stats["examples"] = [
814
+ ex for ex in examples
815
+ if ex.get("doc_id") not in doc_id_set
816
+ ]
817
+ cleaned += original_count - len(stats["examples"])
818
+
819
+ if cleaned > 0:
820
+ logger.info(f"Action KG cleanup: removed {cleaned} examples for deleted doc_ids")
821
+ self._save_kg()
822
+
823
+ return cleaned
824
+ except Exception as e:
825
+ logger.error(f"Error cleaning Action KG for doc_ids: {e}")
826
+ return 0
827
+
828
+ async def list_books(self) -> List[Dict[str, Any]]:
829
+ """
830
+ List all books with metadata.
831
+
832
+ Returns:
833
+ List of book info dicts grouped by title
834
+ """
835
+ if not self.initialized:
836
+ await self.initialize()
837
+
838
+ books_collection = self.collections.get("books")
839
+ if not books_collection or books_collection.collection is None:
840
+ return []
841
+
842
+ try:
843
+ results = books_collection.collection.get(include=["metadatas"])
844
+ except Exception:
845
+ return []
846
+
847
+ # Group by title
848
+ books_by_title = {}
849
+ for i, doc_id in enumerate(results.get("ids", [])):
850
+ metadata = results.get("metadatas", [])[i] if i < len(results.get("metadatas", [])) else {}
851
+ title = metadata.get("title", "Untitled")
852
+
853
+ if title not in books_by_title:
854
+ books_by_title[title] = {
855
+ "title": title,
856
+ "source": metadata.get("source", "unknown"),
857
+ "created_at": metadata.get("created_at", ""),
858
+ "chunk_count": 0
859
+ }
860
+ books_by_title[title]["chunk_count"] += 1
861
+
862
+ return list(books_by_title.values())
863
+
864
+ def _save_kg(self):
865
+ """Save knowledge graph to disk."""
866
+ try:
867
+ with open(self.kg_path, 'w') as f:
868
+ json.dump(self.knowledge_graph, f, indent=2)
869
+ except Exception as e:
870
+ logger.error(f"Failed to save knowledge graph: {e}")
871
+
872
+ # ==================== Working Memory Operations ====================
873
+
874
+ async def store_working(
875
+ self,
876
+ content: str,
877
+ conversation_id: str = None,
878
+ metadata: Dict[str, Any] = None,
879
+ initial_score: float = 0.5
880
+ ) -> str:
881
+ """
882
+ Store content in working memory.
883
+
884
+ Used for session context that may be promoted to patterns.
885
+
886
+ Args:
887
+ content: Content to store
888
+ conversation_id: Session identifier
889
+ metadata: Additional metadata
890
+ initial_score: Starting score (default 0.5, can be boosted/demoted based on outcome)
891
+
892
+ Returns:
893
+ Document ID
894
+ """
895
+ if not self.initialized:
896
+ await self.initialize()
897
+
898
+ doc_id = f"working_{uuid.uuid4().hex[:8]}"
899
+ embedding = await self._embedding_service.embed_text(content)
900
+
901
+ meta = {
902
+ "content": content,
903
+ "text": content,
904
+ "score": initial_score, # Can be adjusted based on outcome at creation time
905
+ "uses": 0,
906
+ "created_at": datetime.now().isoformat(),
907
+ "conversation_id": conversation_id or "unknown"
908
+ }
909
+ if metadata:
910
+ meta.update(metadata)
911
+
912
+ await self.collections["working"].upsert_vectors(
913
+ ids=[doc_id],
914
+ vectors=[embedding],
915
+ metadatas=[meta]
916
+ )
917
+
918
+ return doc_id
919
+
920
+ # ==================== Query Routing (Desktop-compatible) ====================
921
+
922
+ def _route_query(self, query: str) -> List[str]:
923
+ """Route query to appropriate collections (delegates to routing service)."""
924
+ if self._routing_service:
925
+ return self._routing_service.route_query(query)
926
+ return ["working", "patterns", "history", "books", "memory_bank"]
927
+
928
+ # ==================== Stats and Diagnostics ====================
929
+
930
+ def get_stats(self) -> Dict[str, Any]:
931
+ """Get memory system statistics."""
932
+ stats = {
933
+ "initialized": self.initialized,
934
+ "data_path": str(self.data_path),
935
+ "collections": {}
936
+ }
937
+
938
+ for name, adapter in self.collections.items():
939
+ try:
940
+ count = adapter.collection.count() if adapter.collection else 0
941
+ stats["collections"][name] = {"count": count}
942
+ except Exception as e:
943
+ stats["collections"][name] = {"error": str(e)}
944
+
945
+ return stats
946
+
947
+ # ==================== Knowledge Graph Methods ====================
948
+
949
+ def _extract_concepts(self, text: str) -> List[str]:
950
+ """
951
+ Extract concepts from text using basic extraction.
952
+
953
+ Returns: List of lowercase concept keywords
954
+ """
955
+ if not text:
956
+ return []
957
+
958
+ import re
959
+ words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower())
960
+
961
+ stop_words = {'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had',
962
+ 'her', 'was', 'one', 'our', 'out', 'has', 'have', 'been', 'this', 'that',
963
+ 'with', 'they', 'from', 'what', 'when', 'where', 'which', 'how', 'why',
964
+ 'just', 'will', 'would', 'could', 'should', 'there', 'their', 'about'}
965
+
966
+ concepts = [w for w in words if w not in stop_words]
967
+ return concepts[:10]
968
+
969
+ def get_tier_recommendations(self, concepts: List[str]) -> Dict[str, Any]:
970
+ """Query Routing KG for best collections given concepts."""
971
+ ALL_COLLECTIONS = ["working", "patterns", "history", "books", "memory_bank"]
972
+
973
+ if not concepts:
974
+ return {"top_collections": ALL_COLLECTIONS.copy(), "match_count": 0, "confidence_level": "exploration"}
975
+
976
+ collection_scores = {c: 0.0 for c in ALL_COLLECTIONS}
977
+ match_count = 0
978
+
979
+ routing_patterns = self.knowledge_graph.get("routing_patterns", {})
980
+ problem_categories = self.knowledge_graph.get("problem_categories", {})
981
+
982
+ for concept in concepts:
983
+ if concept in routing_patterns:
984
+ pattern_data = routing_patterns[concept]
985
+ # Handle both old format (string) and new format (dict with best_collection)
986
+ if isinstance(pattern_data, dict):
987
+ best_coll = pattern_data.get("best_collection", "")
988
+ else:
989
+ best_coll = pattern_data
990
+ if best_coll in collection_scores:
991
+ collection_scores[best_coll] += 1.0
992
+ match_count += 1
993
+
994
+ if concept in problem_categories:
995
+ preferred = problem_categories[concept]
996
+ if isinstance(preferred, list):
997
+ for coll in preferred:
998
+ if coll in collection_scores:
999
+ collection_scores[coll] += 0.5
1000
+ match_count += 1
1001
+
1002
+ sorted_collections = sorted(collection_scores.items(), key=lambda x: x[1], reverse=True)
1003
+
1004
+ if match_count >= 3:
1005
+ confidence = "high"
1006
+ elif match_count >= 1:
1007
+ confidence = "medium"
1008
+ else:
1009
+ confidence = "exploration"
1010
+
1011
+ return {"top_collections": [c[0] for c in sorted_collections], "match_count": match_count, "confidence_level": confidence}
1012
+
1013
+ def get_action_effectiveness(self, context_type: str, action_type: str, collection: Optional[str] = None) -> Optional[Dict[str, Any]]:
1014
+ """Get effectiveness stats for an action in a specific context."""
1015
+ key = f"{context_type}|{action_type}|{collection or '*'}"
1016
+ return self.knowledge_graph.get("context_action_effectiveness", {}).get(key)
1017
+
1018
+ async def get_facts_for_entities(self, entities: List[str], limit: int = 2) -> List[Dict[str, Any]]:
1019
+ """Query Content KG to retrieve matching memory_bank facts."""
1020
+ if not self.initialized:
1021
+ await self.initialize()
1022
+
1023
+ facts = []
1024
+ seen_ids = set()
1025
+
1026
+ for entity in entities:
1027
+ if len(facts) >= limit:
1028
+ break
1029
+
1030
+ try:
1031
+ results = await self.search(query=entity, collections=["memory_bank"], limit=2)
1032
+
1033
+ for result in results:
1034
+ if len(facts) >= limit:
1035
+ break
1036
+
1037
+ doc_id = result.get("id")
1038
+ if doc_id and doc_id not in seen_ids:
1039
+ seen_ids.add(doc_id)
1040
+ effectiveness = self._get_doc_effectiveness(doc_id)
1041
+ facts.append({
1042
+ "id": doc_id,
1043
+ "content": result.get("content", result.get("text", "")),
1044
+ "entity": entity,
1045
+ "effectiveness": effectiveness
1046
+ })
1047
+ except Exception as e:
1048
+ logger.warning(f"Error getting facts for entity '{entity}': {e}")
1049
+
1050
+ return facts
1051
+
1052
+ def _get_doc_effectiveness(self, doc_id: str) -> Optional[Dict[str, Any]]:
1053
+ """Get doc effectiveness from Action KG examples."""
1054
+ successes = failures = 0
1055
+
1056
+ for key, stats in self.knowledge_graph.get("context_action_effectiveness", {}).items():
1057
+ for ex in stats.get("examples", []):
1058
+ if ex.get("doc_id") == doc_id:
1059
+ if ex.get("outcome") == "worked":
1060
+ successes += 1
1061
+ elif ex.get("outcome") == "failed":
1062
+ failures += 1
1063
+
1064
+ total = successes + failures
1065
+ return {"success_rate": successes / total, "total_uses": total, "successes": successes, "failures": failures} if total else None
1066
+
1067
+ async def detect_context_type(self, system_prompts: List[str] = None, recent_messages: List[Dict[str, Any]] = None) -> str:
1068
+ """Detect context type from conversation (coding, fitness, general, etc.)."""
1069
+ all_text = ""
1070
+ if system_prompts:
1071
+ all_text += " ".join(system_prompts)
1072
+ if recent_messages:
1073
+ for msg in recent_messages:
1074
+ all_text += " " + msg.get("content", "")
1075
+
1076
+ all_text = all_text.lower()
1077
+
1078
+ context_keywords = {
1079
+ "coding": ["code", "function", "class", "error", "debug", "api", "database", "python", "javascript", "typescript", "react", "git", "build", "test"],
1080
+ "fitness": ["workout", "exercise", "gym", "weight", "muscle", "cardio", "diet", "protein", "calories"],
1081
+ "finance": ["money", "budget", "invest", "stock", "savings", "expense", "income", "tax"],
1082
+ "learning": ["learn", "study", "course", "book", "tutorial", "understand", "concept"],
1083
+ "writing": ["write", "essay", "article", "blog", "content", "draft", "edit"]
1084
+ }
1085
+
1086
+ scores = {ctx: sum(1 for kw in kws if kw in all_text) for ctx, kws in context_keywords.items()}
1087
+ best_ctx = max(scores.items(), key=lambda x: x[1])
1088
+ return best_ctx[0] if best_ctx[1] > 0 else "general"
1089
+
1090
+ async def analyze_conversation_context(self, current_message: str, recent_conversation: List[Dict[str, Any]], conversation_id: str) -> Dict[str, Any]:
1091
+ """Analyze conversation context for organic memory injection."""
1092
+ context = {"relevant_patterns": [], "past_outcomes": [], "topic_continuity": [], "proactive_insights": [], "matched_concepts": []}
1093
+
1094
+ try:
1095
+ current_concepts = self._extract_concepts(current_message)
1096
+ context["matched_concepts"] = current_concepts
1097
+
1098
+ if current_concepts:
1099
+ pattern_signature = "_".join(sorted(current_concepts[:3]))
1100
+
1101
+ if pattern_signature in self.knowledge_graph.get("problem_categories", {}):
1102
+ past_solutions = self.knowledge_graph["problem_categories"][pattern_signature]
1103
+
1104
+ for doc_id in past_solutions[:2]:
1105
+ for coll_name in ["patterns", "history"]:
1106
+ if coll_name in self.collections:
1107
+ doc = self.collections[coll_name].get_fragment(doc_id)
1108
+ if doc:
1109
+ metadata = doc.get("metadata", {})
1110
+ score = metadata.get("score", 0.5)
1111
+ uses = metadata.get("uses", 0)
1112
+ last_outcome = metadata.get("last_outcome", "unknown")
1113
+
1114
+ if score >= 0.7 and last_outcome == "worked":
1115
+ context["relevant_patterns"].append({
1116
+ "text": doc.get("content", ""),
1117
+ "score": score,
1118
+ "uses": uses,
1119
+ "collection": coll_name,
1120
+ "insight": f"Based on {uses} past use(s), this approach had a {int(score*100)}% success rate"
1121
+ })
1122
+ break
1123
+
1124
+ failure_patterns = self.knowledge_graph.get("failure_patterns", {})
1125
+ for failure_key, failures in failure_patterns.items():
1126
+ if any(concept in failure_key.lower() for concept in current_concepts):
1127
+ for failure in failures[-2:]:
1128
+ context["past_outcomes"].append({
1129
+ "outcome": "failed",
1130
+ "reason": failure_key,
1131
+ "when": failure.get("timestamp", ""),
1132
+ "insight": f"Note: Similar approach failed before due to: {failure_key}"
1133
+ })
1134
+
1135
+ except Exception as e:
1136
+ logger.warning(f"Error analyzing conversation context: {e}")
1137
+
1138
+ return context
1139
+
1140
+ # ==================== Action KG Tracking ====================
1141
+
1142
+ async def record_action_outcome(self, action: ActionOutcome):
1143
+ """
1144
+ Record action-level outcome with context awareness (v0.2.1 Causal Learning).
1145
+
1146
+ This enables learning: "In context X, action Y on collection Z leads to outcome W"
1147
+ Example: In "coding" context, search_memory on books → 90% success
1148
+
1149
+ Works for ALL collections including memory_bank and books (at collection level,
1150
+ not individual doc scoring).
1151
+
1152
+ Args:
1153
+ action: ActionOutcome with context type, action type, outcome, and causal attribution
1154
+ """
1155
+ # Build key for context-action-collection effectiveness tracking
1156
+ # Format: "{context_type}|{action_type}|{collection}"
1157
+ key = f"{action.context_type}|{action.action_type}|{action.collection or '*'}"
1158
+
1159
+ # Initialize tracking structure if needed
1160
+ if key not in self.knowledge_graph["context_action_effectiveness"]:
1161
+ self.knowledge_graph["context_action_effectiveness"][key] = {
1162
+ "successes": 0,
1163
+ "failures": 0,
1164
+ "partials": 0,
1165
+ "success_rate": 0.0,
1166
+ "total_uses": 0,
1167
+ "first_seen": datetime.now().isoformat(),
1168
+ "last_used": datetime.now().isoformat(),
1169
+ "examples": []
1170
+ }
1171
+
1172
+ stats = self.knowledge_graph["context_action_effectiveness"][key]
1173
+
1174
+ # Update counts based on outcome
1175
+ if action.outcome == "worked":
1176
+ stats["successes"] += 1
1177
+ elif action.outcome == "failed":
1178
+ stats["failures"] += 1
1179
+ else: # partial
1180
+ stats["partials"] += 1
1181
+
1182
+ stats["total_uses"] += 1
1183
+ stats["last_used"] = datetime.now().isoformat()
1184
+
1185
+ # Calculate success rate (successes / total, treating partials as 0.5)
1186
+ total = stats["successes"] + stats["failures"] + stats["partials"]
1187
+ if total > 0:
1188
+ weighted_successes = stats["successes"] + (stats["partials"] * 0.5)
1189
+ stats["success_rate"] = weighted_successes / total
1190
+
1191
+ # Store example for debugging (keep last 5)
1192
+ example = {
1193
+ "timestamp": action.timestamp.isoformat(),
1194
+ "outcome": action.outcome,
1195
+ "doc_id": action.doc_id,
1196
+ "params": action.action_params,
1197
+ "chain_position": action.chain_position,
1198
+ "chain_length": action.chain_length,
1199
+ "caused_final": action.caused_final_outcome
1200
+ }
1201
+ if action.failure_reason:
1202
+ example["failure_reason"] = action.failure_reason
1203
+
1204
+ stats["examples"] = (stats.get("examples", []) + [example])[-5:]
1205
+
1206
+ # Log learning for transparency
1207
+ logger.info(
1208
+ f"[Causal Learning] {key}: {action.outcome} "
1209
+ f"(rate={stats['success_rate']:.2%}, uses={stats['total_uses']}, "
1210
+ f"chain={action.chain_position+1}/{action.chain_length})"
1211
+ )
1212
+
1213
+ # Save KG to disk
1214
+ self._save_kg()
1215
+
1216
+ async def _update_kg_routing(self, query: str, collection: str, outcome: str):
1217
+ """
1218
+ Update KG routing patterns based on outcome.
1219
+
1220
+ Learns which collections work best for which query patterns.
1221
+ Works for ALL collections including memory_bank and books.
1222
+
1223
+ Example: "Python tutorial" queries that work on books → routes future
1224
+ similar queries to books first.
1225
+ """
1226
+ if not query:
1227
+ return
1228
+
1229
+ concepts = self._extract_concepts(query)
1230
+
1231
+ for concept in concepts:
1232
+ if concept not in self.knowledge_graph["routing_patterns"]:
1233
+ self.knowledge_graph["routing_patterns"][concept] = {
1234
+ "collections_used": {},
1235
+ "best_collection": collection,
1236
+ "success_rate": 0.5
1237
+ }
1238
+
1239
+ pattern = self.knowledge_graph["routing_patterns"][concept]
1240
+
1241
+ # Track collection performance
1242
+ if collection not in pattern["collections_used"]:
1243
+ pattern["collections_used"][collection] = {
1244
+ "successes": 0,
1245
+ "failures": 0,
1246
+ "total": 0
1247
+ }
1248
+
1249
+ stats = pattern["collections_used"][collection]
1250
+ stats["total"] += 1
1251
+
1252
+ if outcome == "worked":
1253
+ stats["successes"] += 1
1254
+ elif outcome == "failed":
1255
+ stats["failures"] += 1
1256
+
1257
+ # Update best collection based on success rates
1258
+ best_collection = collection
1259
+ best_rate = 0.0
1260
+
1261
+ for coll_name, coll_stats in pattern["collections_used"].items():
1262
+ total_with_feedback = coll_stats["successes"] + coll_stats["failures"]
1263
+ if total_with_feedback > 0:
1264
+ rate = coll_stats["successes"] / total_with_feedback
1265
+ else:
1266
+ rate = 0.5 # Neutral baseline
1267
+
1268
+ if rate > best_rate:
1269
+ best_rate = rate
1270
+ best_collection = coll_name
1271
+
1272
+ pattern["best_collection"] = best_collection
1273
+ pattern["success_rate"] = best_rate if best_rate > 0 else 0.5
1274
+
1275
+ # Save KG
1276
+ self._save_kg()
1277
+ logger.info(f"[Routing KG] Updated patterns for '{query[:50]}' → {collection} ({outcome})")