roampal 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. roampal/__init__.py +29 -0
  2. roampal/__main__.py +6 -0
  3. roampal/backend/__init__.py +1 -0
  4. roampal/backend/modules/__init__.py +1 -0
  5. roampal/backend/modules/memory/__init__.py +43 -0
  6. roampal/backend/modules/memory/chromadb_adapter.py +623 -0
  7. roampal/backend/modules/memory/config.py +102 -0
  8. roampal/backend/modules/memory/content_graph.py +543 -0
  9. roampal/backend/modules/memory/context_service.py +455 -0
  10. roampal/backend/modules/memory/embedding_service.py +96 -0
  11. roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
  12. roampal/backend/modules/memory/memory_bank_service.py +433 -0
  13. roampal/backend/modules/memory/memory_types.py +296 -0
  14. roampal/backend/modules/memory/outcome_service.py +400 -0
  15. roampal/backend/modules/memory/promotion_service.py +473 -0
  16. roampal/backend/modules/memory/routing_service.py +444 -0
  17. roampal/backend/modules/memory/scoring_service.py +324 -0
  18. roampal/backend/modules/memory/search_service.py +646 -0
  19. roampal/backend/modules/memory/tests/__init__.py +1 -0
  20. roampal/backend/modules/memory/tests/conftest.py +12 -0
  21. roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
  22. roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
  23. roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
  24. roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
  25. roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
  26. roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
  27. roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
  28. roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
  29. roampal/backend/modules/memory/unified_memory_system.py +1277 -0
  30. roampal/cli.py +638 -0
  31. roampal/hooks/__init__.py +16 -0
  32. roampal/hooks/session_manager.py +587 -0
  33. roampal/hooks/stop_hook.py +176 -0
  34. roampal/hooks/user_prompt_submit_hook.py +103 -0
  35. roampal/mcp/__init__.py +7 -0
  36. roampal/mcp/server.py +611 -0
  37. roampal/server/__init__.py +7 -0
  38. roampal/server/main.py +744 -0
  39. roampal-0.1.4.dist-info/METADATA +179 -0
  40. roampal-0.1.4.dist-info/RECORD +44 -0
  41. roampal-0.1.4.dist-info/WHEEL +5 -0
  42. roampal-0.1.4.dist-info/entry_points.txt +2 -0
  43. roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
  44. roampal-0.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,296 @@
1
+ """
2
+ Memory System Type Definitions
3
+
4
+ Centralizes all type definitions, dataclasses, and type aliases used throughout
5
+ the memory system. Extracted from UnifiedMemorySystem module-level definitions.
6
+
7
+ Replaces loose JSON string serialization with proper typed dataclasses.
8
+ """
9
+
10
+ from dataclasses import dataclass, field, asdict
11
+ from datetime import datetime
12
+ from typing import Dict, Any, List, Optional, Literal
13
+ import json
14
+
15
+
16
+ # Type aliases (from lines 45, 94)
17
+ CollectionName = Literal["books", "working", "history", "patterns", "memory_bank"]
18
+ ContextType = str # LLM discovers topics organically (coding, fitness, finance, etc.)
19
+
20
+
21
+ @dataclass
22
+ class OutcomeEntry:
23
+ """
24
+ A single outcome event for a memory.
25
+
26
+ Replaces JSON strings like:
27
+ {"outcome": "worked", "timestamp": "2024-12-10T...", "context": "coding"}
28
+ """
29
+ outcome: Literal["worked", "failed", "partial", "unknown"]
30
+ timestamp: str
31
+ context: Optional[str] = None
32
+ confidence: float = 1.0
33
+ implicit: bool = False
34
+ reason: Optional[str] = None
35
+
36
+
37
+ @dataclass
38
+ class OutcomeHistory:
39
+ """
40
+ History of outcomes for a memory item.
41
+
42
+ Provides proper serialization/deserialization instead of raw JSON strings
43
+ stored in ChromaDB metadata.
44
+ """
45
+ entries: List[OutcomeEntry] = field(default_factory=list)
46
+
47
+ def to_json(self) -> str:
48
+ """Serialize to JSON string for ChromaDB storage."""
49
+ return json.dumps([asdict(e) for e in self.entries])
50
+
51
+ @classmethod
52
+ def from_json(cls, data: str) -> "OutcomeHistory":
53
+ """Deserialize from JSON string."""
54
+ if not data:
55
+ return cls()
56
+ try:
57
+ entries = [OutcomeEntry(**e) for e in json.loads(data)]
58
+ return cls(entries=entries)
59
+ except (json.JSONDecodeError, TypeError):
60
+ return cls()
61
+
62
+ def add_outcome(
63
+ self,
64
+ outcome: Literal["worked", "failed", "partial", "unknown"],
65
+ context: Optional[str] = None,
66
+ confidence: float = 1.0,
67
+ implicit: bool = False,
68
+ reason: Optional[str] = None
69
+ ):
70
+ """Add a new outcome entry."""
71
+ self.entries.append(OutcomeEntry(
72
+ outcome=outcome,
73
+ timestamp=datetime.now().isoformat(),
74
+ context=context,
75
+ confidence=confidence,
76
+ implicit=implicit,
77
+ reason=reason
78
+ ))
79
+
80
+ @property
81
+ def success_count(self) -> int:
82
+ """Count of worked + partial outcomes."""
83
+ return sum(1 for e in self.entries if e.outcome in ("worked", "partial"))
84
+
85
+ @property
86
+ def failure_count(self) -> int:
87
+ """Count of failed outcomes."""
88
+ return sum(1 for e in self.entries if e.outcome == "failed")
89
+
90
+ @property
91
+ def total_count(self) -> int:
92
+ """Total outcome count."""
93
+ return len(self.entries)
94
+
95
+
96
+ @dataclass
97
+ class PromotionRecord:
98
+ """Record of a single promotion/demotion event."""
99
+ from_collection: str
100
+ to_collection: str
101
+ timestamp: str
102
+ score: float
103
+ uses: int
104
+ reason: str = "score_threshold"
105
+
106
+
107
+ @dataclass
108
+ class PromotionHistory:
109
+ """
110
+ History of promotions/demotions for a memory item.
111
+ """
112
+ promotions: List[PromotionRecord] = field(default_factory=list)
113
+
114
+ def to_json(self) -> str:
115
+ """Serialize to JSON string for ChromaDB storage."""
116
+ return json.dumps([asdict(p) for p in self.promotions])
117
+
118
+ @classmethod
119
+ def from_json(cls, data: str) -> "PromotionHistory":
120
+ """Deserialize from JSON string."""
121
+ if not data:
122
+ return cls()
123
+ try:
124
+ promotions = [PromotionRecord(**p) for p in json.loads(data)]
125
+ return cls(promotions=promotions)
126
+ except (json.JSONDecodeError, TypeError):
127
+ return cls()
128
+
129
+ def add_promotion(
130
+ self,
131
+ from_collection: str,
132
+ to_collection: str,
133
+ score: float,
134
+ uses: int,
135
+ reason: str = "score_threshold"
136
+ ):
137
+ """Record a promotion/demotion event."""
138
+ self.promotions.append(PromotionRecord(
139
+ from_collection=from_collection,
140
+ to_collection=to_collection,
141
+ timestamp=datetime.now().isoformat(),
142
+ score=score,
143
+ uses=uses,
144
+ reason=reason
145
+ ))
146
+
147
+
148
+ @dataclass
149
+ class ActionOutcome:
150
+ """
151
+ Tracks individual action outcomes with topic-based context awareness (v0.2.1 Causal Learning).
152
+
153
+ Copied from original UnifiedMemorySystem lines 97-153.
154
+
155
+ Enables learning: "In topic X, action Y leads to outcome Z"
156
+
157
+ Examples:
158
+ - For CODING: search_memory → 92% success (searching code patterns works well)
159
+ - For FITNESS: create_memory → 88% success (storing workout logs works well)
160
+ - For FINANCE: archive_memory → 75% success (archiving expenses works well)
161
+ """
162
+ action_type: str # Tool name: "search_memory", "create_memory", "update_memory", etc.
163
+ context_type: ContextType # LLM-classified topic: "coding", "fitness", "finance", etc.
164
+ outcome: Literal["worked", "failed", "partial"]
165
+ timestamp: datetime = field(default_factory=datetime.now)
166
+
167
+ # Action details
168
+ action_params: Dict[str, Any] = field(default_factory=dict) # Tool parameters
169
+ doc_id: Optional[str] = None # If action involved a document
170
+ collection: Optional[str] = None # Which collection was accessed
171
+
172
+ # Outcome details
173
+ failure_reason: Optional[str] = None
174
+ success_context: Optional[Dict[str, Any]] = None
175
+
176
+ # Causal attribution
177
+ chain_position: int = 0 # Position in action chain (0 = first action)
178
+ chain_length: int = 1 # Total actions in chain
179
+ caused_final_outcome: bool = True # Did this action cause the final outcome?
180
+
181
+ def to_dict(self) -> Dict[str, Any]:
182
+ """Serialize to dict for KG storage."""
183
+ return {
184
+ "action_type": self.action_type,
185
+ "context_type": self.context_type,
186
+ "outcome": self.outcome,
187
+ "timestamp": self.timestamp.isoformat(),
188
+ "action_params": self.action_params,
189
+ "doc_id": self.doc_id,
190
+ "collection": self.collection,
191
+ "failure_reason": self.failure_reason,
192
+ "success_context": self.success_context,
193
+ "chain_position": self.chain_position,
194
+ "chain_length": self.chain_length,
195
+ "caused_final_outcome": self.caused_final_outcome,
196
+ }
197
+
198
+ @classmethod
199
+ def from_dict(cls, data: Dict[str, Any]) -> "ActionOutcome":
200
+ """Deserialize from dict."""
201
+ data = data.copy()
202
+ data["timestamp"] = datetime.fromisoformat(data["timestamp"])
203
+ return cls(**data)
204
+
205
+
206
+ @dataclass
207
+ class MemoryMetadata:
208
+ """
209
+ Structured metadata for a memory item.
210
+
211
+ Provides type-safe access to commonly used metadata fields.
212
+ """
213
+ id: str
214
+ timestamp: str
215
+ collection: str
216
+ score: float = 0.5
217
+ uses: int = 0
218
+ importance: float = 0.7
219
+ confidence: float = 0.7
220
+ tags: List[str] = field(default_factory=list)
221
+ outcome_history: Optional[str] = None # JSON string
222
+ promotion_history: Optional[str] = None # JSON string
223
+ conversation_id: Optional[str] = None
224
+ context_type: Optional[str] = None
225
+ source: Optional[str] = None
226
+
227
+ def to_dict(self) -> Dict[str, Any]:
228
+ """Convert to dict for ChromaDB storage."""
229
+ return {k: v for k, v in asdict(self).items() if v is not None}
230
+
231
+ @classmethod
232
+ def from_dict(cls, data: Dict[str, Any]) -> "MemoryMetadata":
233
+ """Create from ChromaDB metadata dict."""
234
+ # Only include fields that exist in the dataclass
235
+ valid_fields = {f.name for f in cls.__dataclass_fields__.values()}
236
+ filtered = {k: v for k, v in data.items() if k in valid_fields}
237
+ return cls(**filtered)
238
+
239
+
240
+ @dataclass
241
+ class SearchResult:
242
+ """
243
+ Structured search result.
244
+
245
+ Provides type-safe access to search result fields.
246
+ """
247
+ text: str
248
+ collection: str
249
+ distance: float
250
+ metadata: Dict[str, Any]
251
+ final_rank_score: float = 0.0
252
+ wilson_score: float = 0.5
253
+ embedding_similarity: float = 0.0
254
+ learned_score: float = 0.5
255
+ ce_score: Optional[float] = None # Cross-encoder score
256
+
257
+ @classmethod
258
+ def from_dict(cls, data: Dict[str, Any]) -> "SearchResult":
259
+ """Create from raw search result dict."""
260
+ return cls(
261
+ text=data.get("text", ""),
262
+ collection=data.get("collection", ""),
263
+ distance=data.get("distance", 0.0),
264
+ metadata=data.get("metadata", {}),
265
+ final_rank_score=data.get("final_rank_score", 0.0),
266
+ wilson_score=data.get("wilson_score", 0.5),
267
+ embedding_similarity=data.get("embedding_similarity", 0.0),
268
+ learned_score=data.get("learned_score", 0.5),
269
+ ce_score=data.get("ce_score"),
270
+ )
271
+
272
+
273
+ # Type aliases for search results
274
+ MemoryResult = Dict[str, Any] # Generic memory result dict
275
+
276
+
277
+ @dataclass
278
+ class SearchMetadata:
279
+ """
280
+ Metadata about a search operation.
281
+
282
+ Returned alongside results when return_metadata=True.
283
+ """
284
+ query: str
285
+ collections_searched: List[str]
286
+ total_results: int
287
+ routing_phase: str = "unknown" # exploration, medium, high
288
+ tier_scores: Dict[str, float] = field(default_factory=dict)
289
+ cached_doc_ids: List[str] = field(default_factory=list)
290
+ entity_boost_applied: bool = False
291
+ cross_encoder_used: bool = False
292
+ search_time_ms: float = 0.0
293
+
294
+ def to_dict(self) -> Dict[str, Any]:
295
+ """Convert to dict for JSON serialization."""
296
+ return asdict(self)
@@ -0,0 +1,400 @@
1
+ """
2
+ OutcomeService - Extracted from UnifiedMemorySystem
3
+
4
+ Handles outcome recording, score updates, and learning from feedback.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from datetime import datetime
10
+ from typing import Dict, Any, Optional, List, Literal, Callable, Awaitable
11
+
12
+ from .config import MemoryConfig
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class OutcomeService:
18
+ """
19
+ Service for recording outcomes and updating memory scores.
20
+
21
+ Extracted from UnifiedMemorySystem.record_outcome and related methods.
22
+ Handles:
23
+ - Time-weighted score updates
24
+ - Outcome history tracking
25
+ - KG routing updates
26
+ - Problem-solution pattern tracking
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ collections: Dict[str, Any],
32
+ kg_service: Any = None,
33
+ promotion_service: Any = None,
34
+ config: Optional[MemoryConfig] = None
35
+ ):
36
+ """
37
+ Initialize OutcomeService.
38
+
39
+ Args:
40
+ collections: Dict of collection name -> adapter
41
+ kg_service: KnowledgeGraphService for routing updates
42
+ promotion_service: PromotionService for promotion handling
43
+ config: Memory configuration
44
+ """
45
+ self.collections = collections
46
+ self.kg_service = kg_service
47
+ self.promotion_service = promotion_service
48
+ self.config = config or MemoryConfig()
49
+
50
+ async def record_outcome(
51
+ self,
52
+ doc_id: str,
53
+ outcome: Literal["worked", "failed", "partial"],
54
+ failure_reason: Optional[str] = None,
55
+ context: Optional[Dict[str, Any]] = None
56
+ ) -> Optional[Dict[str, Any]]:
57
+ """
58
+ Record outcome and trigger learning.
59
+
60
+ Args:
61
+ doc_id: Document that was used
62
+ outcome: Whether it worked
63
+ failure_reason: Reason for failure (if applicable)
64
+ context: Additional context for learning
65
+
66
+ Returns:
67
+ Updated metadata or None if document not found
68
+ """
69
+ # Find collection and document FIRST (needed for KG routing update)
70
+ collection_name = None
71
+ doc = None
72
+
73
+ for coll_name, adapter in self.collections.items():
74
+ if doc_id.startswith(coll_name):
75
+ collection_name = coll_name
76
+ doc = adapter.get_fragment(doc_id)
77
+ break
78
+
79
+ # UPDATE KG ROUTING FIRST - even for books/memory_bank
80
+ # This allows KG to learn which collections answer which queries
81
+ if doc and collection_name and self.kg_service:
82
+ metadata = doc.get("metadata", {})
83
+ # For quiz retrievals, use quiz_question from context; otherwise use stored query
84
+ problem_text = ""
85
+ if context and "quiz_question" in context:
86
+ problem_text = context["quiz_question"]
87
+ else:
88
+ problem_text = metadata.get("query", "") or metadata.get("text", "")[:200]
89
+
90
+ if problem_text:
91
+ await self.kg_service.update_kg_routing(problem_text, collection_name, outcome)
92
+ logger.info(f"[KG] Updated routing for '{problem_text[:50]}' -> {collection_name} (outcome={outcome})")
93
+
94
+ # SAFEGUARD: Books are reference material, not scorable memories
95
+ # But we still updated KG routing above so system learns to route to books
96
+ if doc_id.startswith("books_"):
97
+ logger.info(f"[KG] Learned routing pattern for books, but skipping score update (static reference material)")
98
+ return None
99
+
100
+ # SAFEGUARD: Memory bank is user identity/facts, not scorable patterns
101
+ # But we still updated KG routing above so system learns to route to memory_bank
102
+ if doc_id.startswith("memory_bank_"):
103
+ logger.info(f"[KG] Learned routing pattern for memory_bank, but skipping score update (persistent user facts)")
104
+ return None
105
+
106
+ if not doc:
107
+ logger.warning(f"Document {doc_id} not found")
108
+ return None
109
+
110
+ # Calculate score update
111
+ metadata = doc.get("metadata", {})
112
+ current_score = metadata.get("score", 0.5)
113
+ uses = metadata.get("uses", 0)
114
+
115
+ # Time-weighted score update
116
+ time_weight = self._calculate_time_weight(metadata.get("last_used"))
117
+ score_delta, new_score, uses = self._calculate_score_update(
118
+ outcome, current_score, uses, time_weight
119
+ )
120
+
121
+ # Update context tracking
122
+ if outcome == "worked" and context:
123
+ contexts = json.loads(metadata.get("success_contexts", "[]"))
124
+ contexts.append(context)
125
+ metadata["success_contexts"] = json.dumps(contexts)
126
+ elif outcome == "failed" and failure_reason:
127
+ reasons = json.loads(metadata.get("failure_reasons", "[]"))
128
+ reasons.append({
129
+ "reason": failure_reason,
130
+ "timestamp": datetime.now().isoformat()
131
+ })
132
+ metadata["failure_reasons"] = json.dumps(reasons)
133
+
134
+ # Update outcome history
135
+ outcome_history = json.loads(metadata.get("outcome_history", "[]"))
136
+ outcome_history.append({
137
+ "outcome": outcome,
138
+ "timestamp": datetime.now().isoformat(),
139
+ "reason": failure_reason
140
+ })
141
+ outcome_history = outcome_history[-10:] # Keep last 10
142
+
143
+ # Update metadata
144
+ metadata.update({
145
+ "score": new_score,
146
+ "uses": uses,
147
+ "last_outcome": outcome,
148
+ "last_used": datetime.now().isoformat(),
149
+ "outcome_history": json.dumps(outcome_history)
150
+ })
151
+
152
+ # Persist to collection
153
+ self.collections[collection_name].update_fragment_metadata(doc_id, metadata)
154
+
155
+ logger.info(
156
+ f"Score update [{collection_name}]: {current_score:.2f} → {new_score:.2f} "
157
+ f"(outcome={outcome}, delta={score_delta:+.2f}, time_weight={time_weight:.2f}, uses={uses})"
158
+ )
159
+
160
+ # Update KG routing if service available
161
+ if self.kg_service:
162
+ problem_text = metadata.get("query", "")
163
+ await self._update_kg_with_outcome(
164
+ doc_id, outcome, problem_text, doc.get("content", ""),
165
+ new_score, metadata, failure_reason, context
166
+ )
167
+
168
+ # Handle promotion/demotion if service available
169
+ if self.promotion_service:
170
+ collection_size = self.collections[collection_name].collection.count()
171
+ await self.promotion_service.handle_promotion(
172
+ doc_id=doc_id,
173
+ collection=collection_name,
174
+ score=new_score,
175
+ uses=uses,
176
+ metadata=metadata,
177
+ collection_size=collection_size
178
+ )
179
+
180
+ logger.info(f"Outcome recorded: {doc_id} -> {outcome} (score: {new_score:.2f})")
181
+ return metadata
182
+
183
+ def _calculate_time_weight(self, last_used: Optional[str]) -> float:
184
+ """Calculate time weight for score updates."""
185
+ if not last_used:
186
+ return 1.0
187
+
188
+ try:
189
+ age_days = (datetime.now() - datetime.fromisoformat(last_used)).days
190
+ return 1.0 / (1 + age_days / 30) # Decay over month
191
+ except:
192
+ return 1.0
193
+
194
+ def _calculate_score_update(
195
+ self,
196
+ outcome: str,
197
+ current_score: float,
198
+ uses: int,
199
+ time_weight: float
200
+ ) -> tuple:
201
+ """
202
+ Calculate score delta and new values.
203
+
204
+ Returns:
205
+ Tuple of (score_delta, new_score, new_uses)
206
+ """
207
+ if outcome == "worked":
208
+ score_delta = 0.2 * time_weight
209
+ new_score = min(1.0, current_score + score_delta)
210
+ uses += 1
211
+ elif outcome == "failed":
212
+ score_delta = -0.3 * time_weight
213
+ new_score = max(0.0, current_score + score_delta)
214
+ else: # partial
215
+ score_delta = 0.05 * time_weight
216
+ new_score = min(1.0, current_score + score_delta)
217
+ uses += 1
218
+
219
+ return score_delta, new_score, uses
220
+
221
+ async def _update_kg_with_outcome(
222
+ self,
223
+ doc_id: str,
224
+ outcome: str,
225
+ problem_text: str,
226
+ solution_text: str,
227
+ new_score: float,
228
+ metadata: Dict[str, Any],
229
+ failure_reason: Optional[str],
230
+ context: Optional[Dict[str, Any]]
231
+ ):
232
+ """Update knowledge graph based on outcome."""
233
+ if not self.kg_service:
234
+ return
235
+
236
+ # Update routing patterns
237
+ # Extract collection name from doc_id (handles memory_bank correctly)
238
+ # doc_id format: collection_uuid or collection_name_uuid
239
+ if doc_id.startswith("memory_bank_"):
240
+ collection_name = "memory_bank"
241
+ elif doc_id.startswith("books_"):
242
+ collection_name = "books"
243
+ elif doc_id.startswith("working_"):
244
+ collection_name = "working"
245
+ elif doc_id.startswith("history_"):
246
+ collection_name = "history"
247
+ elif doc_id.startswith("patterns_"):
248
+ collection_name = "patterns"
249
+ else:
250
+ collection_name = doc_id.split("_")[0] if "_" in doc_id else "unknown"
251
+ await self.kg_service.update_kg_routing(problem_text, collection_name, outcome)
252
+
253
+ if outcome == "worked" and problem_text and solution_text:
254
+ # Extract concepts
255
+ problem_concepts = self.kg_service.extract_concepts(problem_text)
256
+ solution_concepts = self.kg_service.extract_concepts(solution_text)
257
+ all_concepts = list(set(problem_concepts + solution_concepts))
258
+
259
+ # Build relationships
260
+ self.kg_service.build_concept_relationships(all_concepts)
261
+
262
+ # Track problem category
263
+ problem_key = "_".join(sorted(problem_concepts)[:3])
264
+ self.kg_service.add_problem_category(problem_key, doc_id)
265
+
266
+ # Track solution pattern
267
+ self.kg_service.add_solution_pattern(
268
+ doc_id, solution_text, new_score,
269
+ [problem_key], solution_concepts[:5]
270
+ )
271
+
272
+ # Update success rate
273
+ self.kg_service.update_success_rate(doc_id, outcome)
274
+
275
+ # Track problem-solution mapping
276
+ await self._track_problem_solution(doc_id, metadata, context)
277
+
278
+ elif outcome == "failed":
279
+ # Track failure
280
+ self.kg_service.update_success_rate(doc_id, outcome)
281
+
282
+ if failure_reason:
283
+ self.kg_service.add_failure_pattern(
284
+ failure_reason[:50], doc_id, problem_text[:100]
285
+ )
286
+
287
+ elif outcome == "partial":
288
+ self.kg_service.update_success_rate(doc_id, outcome)
289
+
290
+ # Save KG (debounced)
291
+ await self.kg_service.debounced_save_kg()
292
+
293
+ async def _track_problem_solution(
294
+ self,
295
+ doc_id: str,
296
+ metadata: Dict[str, Any],
297
+ context: Optional[Dict[str, Any]]
298
+ ):
299
+ """Track successful problem→solution patterns for future reuse."""
300
+ if not self.kg_service:
301
+ return
302
+
303
+ try:
304
+ problem_text = metadata.get("original_context", "") or metadata.get("query", "")
305
+ solution_text = metadata.get("text", "")
306
+
307
+ if not problem_text or not solution_text:
308
+ return
309
+
310
+ # Create problem signature
311
+ problem_concepts = self.kg_service.extract_concepts(problem_text)
312
+ problem_signature = "_".join(sorted(problem_concepts[:5]))
313
+
314
+ if not problem_signature:
315
+ return
316
+
317
+ # Track in KG
318
+ self.kg_service.add_problem_solution(
319
+ problem_signature=problem_signature,
320
+ doc_id=doc_id,
321
+ solution_text=solution_text,
322
+ context=context
323
+ )
324
+
325
+ # Track solution pattern
326
+ pattern_hash = f"{problem_signature}::{doc_id}"
327
+ self.kg_service.add_solution_pattern_entry(
328
+ pattern_hash=pattern_hash,
329
+ problem_text=problem_text,
330
+ solution_text=solution_text,
331
+ outcome="worked"
332
+ )
333
+
334
+ logger.info(f"Tracked problem→solution: {problem_signature[:30]}... -> {doc_id}")
335
+
336
+ except Exception as e:
337
+ logger.error(f"Error tracking problem→solution: {e}")
338
+
339
+ def count_successes_from_history(self, outcome_history_json: str) -> float:
340
+ """
341
+ Count successes from outcome history JSON.
342
+
343
+ Args:
344
+ outcome_history_json: JSON string of outcome history
345
+
346
+ Returns:
347
+ Weighted success count (worked=1, partial=0.5)
348
+ """
349
+ if not outcome_history_json or outcome_history_json == "[]":
350
+ return 0
351
+
352
+ try:
353
+ history = json.loads(outcome_history_json)
354
+ successes = 0.0
355
+ for entry in history:
356
+ outcome = entry.get("outcome", "")
357
+ if outcome == "worked":
358
+ successes += 1.0
359
+ elif outcome == "partial":
360
+ successes += 0.5
361
+ return successes
362
+ except json.JSONDecodeError:
363
+ return 0
364
+
365
+ def get_outcome_stats(self, doc_id: str) -> Dict[str, Any]:
366
+ """
367
+ Get outcome statistics for a document.
368
+
369
+ Args:
370
+ doc_id: Document ID
371
+
372
+ Returns:
373
+ Dict with outcome stats
374
+ """
375
+ for coll_name, adapter in self.collections.items():
376
+ if doc_id.startswith(coll_name):
377
+ doc = adapter.get_fragment(doc_id)
378
+ if doc:
379
+ metadata = doc.get("metadata", {})
380
+ outcome_history = json.loads(metadata.get("outcome_history", "[]"))
381
+
382
+ worked = sum(1 for o in outcome_history if o.get("outcome") == "worked")
383
+ failed = sum(1 for o in outcome_history if o.get("outcome") == "failed")
384
+ partial = sum(1 for o in outcome_history if o.get("outcome") == "partial")
385
+
386
+ return {
387
+ "doc_id": doc_id,
388
+ "collection": coll_name,
389
+ "score": metadata.get("score", 0.5),
390
+ "uses": metadata.get("uses", 0),
391
+ "last_outcome": metadata.get("last_outcome"),
392
+ "outcomes": {
393
+ "worked": worked,
394
+ "failed": failed,
395
+ "partial": partial
396
+ },
397
+ "total_outcomes": len(outcome_history)
398
+ }
399
+
400
+ return {"doc_id": doc_id, "error": "not_found"}