roampal 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- roampal/__init__.py +29 -0
- roampal/__main__.py +6 -0
- roampal/backend/__init__.py +1 -0
- roampal/backend/modules/__init__.py +1 -0
- roampal/backend/modules/memory/__init__.py +43 -0
- roampal/backend/modules/memory/chromadb_adapter.py +623 -0
- roampal/backend/modules/memory/config.py +102 -0
- roampal/backend/modules/memory/content_graph.py +543 -0
- roampal/backend/modules/memory/context_service.py +455 -0
- roampal/backend/modules/memory/embedding_service.py +96 -0
- roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
- roampal/backend/modules/memory/memory_bank_service.py +433 -0
- roampal/backend/modules/memory/memory_types.py +296 -0
- roampal/backend/modules/memory/outcome_service.py +400 -0
- roampal/backend/modules/memory/promotion_service.py +473 -0
- roampal/backend/modules/memory/routing_service.py +444 -0
- roampal/backend/modules/memory/scoring_service.py +324 -0
- roampal/backend/modules/memory/search_service.py +646 -0
- roampal/backend/modules/memory/tests/__init__.py +1 -0
- roampal/backend/modules/memory/tests/conftest.py +12 -0
- roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
- roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
- roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
- roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
- roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
- roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
- roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
- roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
- roampal/backend/modules/memory/unified_memory_system.py +1277 -0
- roampal/cli.py +638 -0
- roampal/hooks/__init__.py +16 -0
- roampal/hooks/session_manager.py +587 -0
- roampal/hooks/stop_hook.py +176 -0
- roampal/hooks/user_prompt_submit_hook.py +103 -0
- roampal/mcp/__init__.py +7 -0
- roampal/mcp/server.py +611 -0
- roampal/server/__init__.py +7 -0
- roampal/server/main.py +744 -0
- roampal-0.1.4.dist-info/METADATA +179 -0
- roampal-0.1.4.dist-info/RECORD +44 -0
- roampal-0.1.4.dist-info/WHEEL +5 -0
- roampal-0.1.4.dist-info/entry_points.txt +2 -0
- roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
- roampal-0.1.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory System Type Definitions
|
|
3
|
+
|
|
4
|
+
Centralizes all type definitions, dataclasses, and type aliases used throughout
|
|
5
|
+
the memory system. Extracted from UnifiedMemorySystem module-level definitions.
|
|
6
|
+
|
|
7
|
+
Replaces loose JSON string serialization with proper typed dataclasses.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field, asdict
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from typing import Dict, Any, List, Optional, Literal
|
|
13
|
+
import json
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Type aliases (from lines 45, 94)
|
|
17
|
+
CollectionName = Literal["books", "working", "history", "patterns", "memory_bank"]
|
|
18
|
+
ContextType = str # LLM discovers topics organically (coding, fitness, finance, etc.)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class OutcomeEntry:
|
|
23
|
+
"""
|
|
24
|
+
A single outcome event for a memory.
|
|
25
|
+
|
|
26
|
+
Replaces JSON strings like:
|
|
27
|
+
{"outcome": "worked", "timestamp": "2024-12-10T...", "context": "coding"}
|
|
28
|
+
"""
|
|
29
|
+
outcome: Literal["worked", "failed", "partial", "unknown"]
|
|
30
|
+
timestamp: str
|
|
31
|
+
context: Optional[str] = None
|
|
32
|
+
confidence: float = 1.0
|
|
33
|
+
implicit: bool = False
|
|
34
|
+
reason: Optional[str] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class OutcomeHistory:
|
|
39
|
+
"""
|
|
40
|
+
History of outcomes for a memory item.
|
|
41
|
+
|
|
42
|
+
Provides proper serialization/deserialization instead of raw JSON strings
|
|
43
|
+
stored in ChromaDB metadata.
|
|
44
|
+
"""
|
|
45
|
+
entries: List[OutcomeEntry] = field(default_factory=list)
|
|
46
|
+
|
|
47
|
+
def to_json(self) -> str:
|
|
48
|
+
"""Serialize to JSON string for ChromaDB storage."""
|
|
49
|
+
return json.dumps([asdict(e) for e in self.entries])
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_json(cls, data: str) -> "OutcomeHistory":
|
|
53
|
+
"""Deserialize from JSON string."""
|
|
54
|
+
if not data:
|
|
55
|
+
return cls()
|
|
56
|
+
try:
|
|
57
|
+
entries = [OutcomeEntry(**e) for e in json.loads(data)]
|
|
58
|
+
return cls(entries=entries)
|
|
59
|
+
except (json.JSONDecodeError, TypeError):
|
|
60
|
+
return cls()
|
|
61
|
+
|
|
62
|
+
def add_outcome(
|
|
63
|
+
self,
|
|
64
|
+
outcome: Literal["worked", "failed", "partial", "unknown"],
|
|
65
|
+
context: Optional[str] = None,
|
|
66
|
+
confidence: float = 1.0,
|
|
67
|
+
implicit: bool = False,
|
|
68
|
+
reason: Optional[str] = None
|
|
69
|
+
):
|
|
70
|
+
"""Add a new outcome entry."""
|
|
71
|
+
self.entries.append(OutcomeEntry(
|
|
72
|
+
outcome=outcome,
|
|
73
|
+
timestamp=datetime.now().isoformat(),
|
|
74
|
+
context=context,
|
|
75
|
+
confidence=confidence,
|
|
76
|
+
implicit=implicit,
|
|
77
|
+
reason=reason
|
|
78
|
+
))
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def success_count(self) -> int:
|
|
82
|
+
"""Count of worked + partial outcomes."""
|
|
83
|
+
return sum(1 for e in self.entries if e.outcome in ("worked", "partial"))
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def failure_count(self) -> int:
|
|
87
|
+
"""Count of failed outcomes."""
|
|
88
|
+
return sum(1 for e in self.entries if e.outcome == "failed")
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def total_count(self) -> int:
|
|
92
|
+
"""Total outcome count."""
|
|
93
|
+
return len(self.entries)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass
|
|
97
|
+
class PromotionRecord:
|
|
98
|
+
"""Record of a single promotion/demotion event."""
|
|
99
|
+
from_collection: str
|
|
100
|
+
to_collection: str
|
|
101
|
+
timestamp: str
|
|
102
|
+
score: float
|
|
103
|
+
uses: int
|
|
104
|
+
reason: str = "score_threshold"
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@dataclass
|
|
108
|
+
class PromotionHistory:
|
|
109
|
+
"""
|
|
110
|
+
History of promotions/demotions for a memory item.
|
|
111
|
+
"""
|
|
112
|
+
promotions: List[PromotionRecord] = field(default_factory=list)
|
|
113
|
+
|
|
114
|
+
def to_json(self) -> str:
|
|
115
|
+
"""Serialize to JSON string for ChromaDB storage."""
|
|
116
|
+
return json.dumps([asdict(p) for p in self.promotions])
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def from_json(cls, data: str) -> "PromotionHistory":
|
|
120
|
+
"""Deserialize from JSON string."""
|
|
121
|
+
if not data:
|
|
122
|
+
return cls()
|
|
123
|
+
try:
|
|
124
|
+
promotions = [PromotionRecord(**p) for p in json.loads(data)]
|
|
125
|
+
return cls(promotions=promotions)
|
|
126
|
+
except (json.JSONDecodeError, TypeError):
|
|
127
|
+
return cls()
|
|
128
|
+
|
|
129
|
+
def add_promotion(
|
|
130
|
+
self,
|
|
131
|
+
from_collection: str,
|
|
132
|
+
to_collection: str,
|
|
133
|
+
score: float,
|
|
134
|
+
uses: int,
|
|
135
|
+
reason: str = "score_threshold"
|
|
136
|
+
):
|
|
137
|
+
"""Record a promotion/demotion event."""
|
|
138
|
+
self.promotions.append(PromotionRecord(
|
|
139
|
+
from_collection=from_collection,
|
|
140
|
+
to_collection=to_collection,
|
|
141
|
+
timestamp=datetime.now().isoformat(),
|
|
142
|
+
score=score,
|
|
143
|
+
uses=uses,
|
|
144
|
+
reason=reason
|
|
145
|
+
))
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@dataclass
|
|
149
|
+
class ActionOutcome:
|
|
150
|
+
"""
|
|
151
|
+
Tracks individual action outcomes with topic-based context awareness (v0.2.1 Causal Learning).
|
|
152
|
+
|
|
153
|
+
Copied from original UnifiedMemorySystem lines 97-153.
|
|
154
|
+
|
|
155
|
+
Enables learning: "In topic X, action Y leads to outcome Z"
|
|
156
|
+
|
|
157
|
+
Examples:
|
|
158
|
+
- For CODING: search_memory → 92% success (searching code patterns works well)
|
|
159
|
+
- For FITNESS: create_memory → 88% success (storing workout logs works well)
|
|
160
|
+
- For FINANCE: archive_memory → 75% success (archiving expenses works well)
|
|
161
|
+
"""
|
|
162
|
+
action_type: str # Tool name: "search_memory", "create_memory", "update_memory", etc.
|
|
163
|
+
context_type: ContextType # LLM-classified topic: "coding", "fitness", "finance", etc.
|
|
164
|
+
outcome: Literal["worked", "failed", "partial"]
|
|
165
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
166
|
+
|
|
167
|
+
# Action details
|
|
168
|
+
action_params: Dict[str, Any] = field(default_factory=dict) # Tool parameters
|
|
169
|
+
doc_id: Optional[str] = None # If action involved a document
|
|
170
|
+
collection: Optional[str] = None # Which collection was accessed
|
|
171
|
+
|
|
172
|
+
# Outcome details
|
|
173
|
+
failure_reason: Optional[str] = None
|
|
174
|
+
success_context: Optional[Dict[str, Any]] = None
|
|
175
|
+
|
|
176
|
+
# Causal attribution
|
|
177
|
+
chain_position: int = 0 # Position in action chain (0 = first action)
|
|
178
|
+
chain_length: int = 1 # Total actions in chain
|
|
179
|
+
caused_final_outcome: bool = True # Did this action cause the final outcome?
|
|
180
|
+
|
|
181
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
182
|
+
"""Serialize to dict for KG storage."""
|
|
183
|
+
return {
|
|
184
|
+
"action_type": self.action_type,
|
|
185
|
+
"context_type": self.context_type,
|
|
186
|
+
"outcome": self.outcome,
|
|
187
|
+
"timestamp": self.timestamp.isoformat(),
|
|
188
|
+
"action_params": self.action_params,
|
|
189
|
+
"doc_id": self.doc_id,
|
|
190
|
+
"collection": self.collection,
|
|
191
|
+
"failure_reason": self.failure_reason,
|
|
192
|
+
"success_context": self.success_context,
|
|
193
|
+
"chain_position": self.chain_position,
|
|
194
|
+
"chain_length": self.chain_length,
|
|
195
|
+
"caused_final_outcome": self.caused_final_outcome,
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
@classmethod
|
|
199
|
+
def from_dict(cls, data: Dict[str, Any]) -> "ActionOutcome":
|
|
200
|
+
"""Deserialize from dict."""
|
|
201
|
+
data = data.copy()
|
|
202
|
+
data["timestamp"] = datetime.fromisoformat(data["timestamp"])
|
|
203
|
+
return cls(**data)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@dataclass
|
|
207
|
+
class MemoryMetadata:
|
|
208
|
+
"""
|
|
209
|
+
Structured metadata for a memory item.
|
|
210
|
+
|
|
211
|
+
Provides type-safe access to commonly used metadata fields.
|
|
212
|
+
"""
|
|
213
|
+
id: str
|
|
214
|
+
timestamp: str
|
|
215
|
+
collection: str
|
|
216
|
+
score: float = 0.5
|
|
217
|
+
uses: int = 0
|
|
218
|
+
importance: float = 0.7
|
|
219
|
+
confidence: float = 0.7
|
|
220
|
+
tags: List[str] = field(default_factory=list)
|
|
221
|
+
outcome_history: Optional[str] = None # JSON string
|
|
222
|
+
promotion_history: Optional[str] = None # JSON string
|
|
223
|
+
conversation_id: Optional[str] = None
|
|
224
|
+
context_type: Optional[str] = None
|
|
225
|
+
source: Optional[str] = None
|
|
226
|
+
|
|
227
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
228
|
+
"""Convert to dict for ChromaDB storage."""
|
|
229
|
+
return {k: v for k, v in asdict(self).items() if v is not None}
|
|
230
|
+
|
|
231
|
+
@classmethod
|
|
232
|
+
def from_dict(cls, data: Dict[str, Any]) -> "MemoryMetadata":
|
|
233
|
+
"""Create from ChromaDB metadata dict."""
|
|
234
|
+
# Only include fields that exist in the dataclass
|
|
235
|
+
valid_fields = {f.name for f in cls.__dataclass_fields__.values()}
|
|
236
|
+
filtered = {k: v for k, v in data.items() if k in valid_fields}
|
|
237
|
+
return cls(**filtered)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@dataclass
|
|
241
|
+
class SearchResult:
|
|
242
|
+
"""
|
|
243
|
+
Structured search result.
|
|
244
|
+
|
|
245
|
+
Provides type-safe access to search result fields.
|
|
246
|
+
"""
|
|
247
|
+
text: str
|
|
248
|
+
collection: str
|
|
249
|
+
distance: float
|
|
250
|
+
metadata: Dict[str, Any]
|
|
251
|
+
final_rank_score: float = 0.0
|
|
252
|
+
wilson_score: float = 0.5
|
|
253
|
+
embedding_similarity: float = 0.0
|
|
254
|
+
learned_score: float = 0.5
|
|
255
|
+
ce_score: Optional[float] = None # Cross-encoder score
|
|
256
|
+
|
|
257
|
+
@classmethod
|
|
258
|
+
def from_dict(cls, data: Dict[str, Any]) -> "SearchResult":
|
|
259
|
+
"""Create from raw search result dict."""
|
|
260
|
+
return cls(
|
|
261
|
+
text=data.get("text", ""),
|
|
262
|
+
collection=data.get("collection", ""),
|
|
263
|
+
distance=data.get("distance", 0.0),
|
|
264
|
+
metadata=data.get("metadata", {}),
|
|
265
|
+
final_rank_score=data.get("final_rank_score", 0.0),
|
|
266
|
+
wilson_score=data.get("wilson_score", 0.5),
|
|
267
|
+
embedding_similarity=data.get("embedding_similarity", 0.0),
|
|
268
|
+
learned_score=data.get("learned_score", 0.5),
|
|
269
|
+
ce_score=data.get("ce_score"),
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
# Type aliases for search results
|
|
274
|
+
MemoryResult = Dict[str, Any] # Generic memory result dict
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
@dataclass
|
|
278
|
+
class SearchMetadata:
|
|
279
|
+
"""
|
|
280
|
+
Metadata about a search operation.
|
|
281
|
+
|
|
282
|
+
Returned alongside results when return_metadata=True.
|
|
283
|
+
"""
|
|
284
|
+
query: str
|
|
285
|
+
collections_searched: List[str]
|
|
286
|
+
total_results: int
|
|
287
|
+
routing_phase: str = "unknown" # exploration, medium, high
|
|
288
|
+
tier_scores: Dict[str, float] = field(default_factory=dict)
|
|
289
|
+
cached_doc_ids: List[str] = field(default_factory=list)
|
|
290
|
+
entity_boost_applied: bool = False
|
|
291
|
+
cross_encoder_used: bool = False
|
|
292
|
+
search_time_ms: float = 0.0
|
|
293
|
+
|
|
294
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
295
|
+
"""Convert to dict for JSON serialization."""
|
|
296
|
+
return asdict(self)
|
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OutcomeService - Extracted from UnifiedMemorySystem
|
|
3
|
+
|
|
4
|
+
Handles outcome recording, score updates, and learning from feedback.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import Dict, Any, Optional, List, Literal, Callable, Awaitable
|
|
11
|
+
|
|
12
|
+
from .config import MemoryConfig
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OutcomeService:
|
|
18
|
+
"""
|
|
19
|
+
Service for recording outcomes and updating memory scores.
|
|
20
|
+
|
|
21
|
+
Extracted from UnifiedMemorySystem.record_outcome and related methods.
|
|
22
|
+
Handles:
|
|
23
|
+
- Time-weighted score updates
|
|
24
|
+
- Outcome history tracking
|
|
25
|
+
- KG routing updates
|
|
26
|
+
- Problem-solution pattern tracking
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
collections: Dict[str, Any],
|
|
32
|
+
kg_service: Any = None,
|
|
33
|
+
promotion_service: Any = None,
|
|
34
|
+
config: Optional[MemoryConfig] = None
|
|
35
|
+
):
|
|
36
|
+
"""
|
|
37
|
+
Initialize OutcomeService.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
collections: Dict of collection name -> adapter
|
|
41
|
+
kg_service: KnowledgeGraphService for routing updates
|
|
42
|
+
promotion_service: PromotionService for promotion handling
|
|
43
|
+
config: Memory configuration
|
|
44
|
+
"""
|
|
45
|
+
self.collections = collections
|
|
46
|
+
self.kg_service = kg_service
|
|
47
|
+
self.promotion_service = promotion_service
|
|
48
|
+
self.config = config or MemoryConfig()
|
|
49
|
+
|
|
50
|
+
async def record_outcome(
|
|
51
|
+
self,
|
|
52
|
+
doc_id: str,
|
|
53
|
+
outcome: Literal["worked", "failed", "partial"],
|
|
54
|
+
failure_reason: Optional[str] = None,
|
|
55
|
+
context: Optional[Dict[str, Any]] = None
|
|
56
|
+
) -> Optional[Dict[str, Any]]:
|
|
57
|
+
"""
|
|
58
|
+
Record outcome and trigger learning.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
doc_id: Document that was used
|
|
62
|
+
outcome: Whether it worked
|
|
63
|
+
failure_reason: Reason for failure (if applicable)
|
|
64
|
+
context: Additional context for learning
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Updated metadata or None if document not found
|
|
68
|
+
"""
|
|
69
|
+
# Find collection and document FIRST (needed for KG routing update)
|
|
70
|
+
collection_name = None
|
|
71
|
+
doc = None
|
|
72
|
+
|
|
73
|
+
for coll_name, adapter in self.collections.items():
|
|
74
|
+
if doc_id.startswith(coll_name):
|
|
75
|
+
collection_name = coll_name
|
|
76
|
+
doc = adapter.get_fragment(doc_id)
|
|
77
|
+
break
|
|
78
|
+
|
|
79
|
+
# UPDATE KG ROUTING FIRST - even for books/memory_bank
|
|
80
|
+
# This allows KG to learn which collections answer which queries
|
|
81
|
+
if doc and collection_name and self.kg_service:
|
|
82
|
+
metadata = doc.get("metadata", {})
|
|
83
|
+
# For quiz retrievals, use quiz_question from context; otherwise use stored query
|
|
84
|
+
problem_text = ""
|
|
85
|
+
if context and "quiz_question" in context:
|
|
86
|
+
problem_text = context["quiz_question"]
|
|
87
|
+
else:
|
|
88
|
+
problem_text = metadata.get("query", "") or metadata.get("text", "")[:200]
|
|
89
|
+
|
|
90
|
+
if problem_text:
|
|
91
|
+
await self.kg_service.update_kg_routing(problem_text, collection_name, outcome)
|
|
92
|
+
logger.info(f"[KG] Updated routing for '{problem_text[:50]}' -> {collection_name} (outcome={outcome})")
|
|
93
|
+
|
|
94
|
+
# SAFEGUARD: Books are reference material, not scorable memories
|
|
95
|
+
# But we still updated KG routing above so system learns to route to books
|
|
96
|
+
if doc_id.startswith("books_"):
|
|
97
|
+
logger.info(f"[KG] Learned routing pattern for books, but skipping score update (static reference material)")
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
# SAFEGUARD: Memory bank is user identity/facts, not scorable patterns
|
|
101
|
+
# But we still updated KG routing above so system learns to route to memory_bank
|
|
102
|
+
if doc_id.startswith("memory_bank_"):
|
|
103
|
+
logger.info(f"[KG] Learned routing pattern for memory_bank, but skipping score update (persistent user facts)")
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
if not doc:
|
|
107
|
+
logger.warning(f"Document {doc_id} not found")
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
# Calculate score update
|
|
111
|
+
metadata = doc.get("metadata", {})
|
|
112
|
+
current_score = metadata.get("score", 0.5)
|
|
113
|
+
uses = metadata.get("uses", 0)
|
|
114
|
+
|
|
115
|
+
# Time-weighted score update
|
|
116
|
+
time_weight = self._calculate_time_weight(metadata.get("last_used"))
|
|
117
|
+
score_delta, new_score, uses = self._calculate_score_update(
|
|
118
|
+
outcome, current_score, uses, time_weight
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Update context tracking
|
|
122
|
+
if outcome == "worked" and context:
|
|
123
|
+
contexts = json.loads(metadata.get("success_contexts", "[]"))
|
|
124
|
+
contexts.append(context)
|
|
125
|
+
metadata["success_contexts"] = json.dumps(contexts)
|
|
126
|
+
elif outcome == "failed" and failure_reason:
|
|
127
|
+
reasons = json.loads(metadata.get("failure_reasons", "[]"))
|
|
128
|
+
reasons.append({
|
|
129
|
+
"reason": failure_reason,
|
|
130
|
+
"timestamp": datetime.now().isoformat()
|
|
131
|
+
})
|
|
132
|
+
metadata["failure_reasons"] = json.dumps(reasons)
|
|
133
|
+
|
|
134
|
+
# Update outcome history
|
|
135
|
+
outcome_history = json.loads(metadata.get("outcome_history", "[]"))
|
|
136
|
+
outcome_history.append({
|
|
137
|
+
"outcome": outcome,
|
|
138
|
+
"timestamp": datetime.now().isoformat(),
|
|
139
|
+
"reason": failure_reason
|
|
140
|
+
})
|
|
141
|
+
outcome_history = outcome_history[-10:] # Keep last 10
|
|
142
|
+
|
|
143
|
+
# Update metadata
|
|
144
|
+
metadata.update({
|
|
145
|
+
"score": new_score,
|
|
146
|
+
"uses": uses,
|
|
147
|
+
"last_outcome": outcome,
|
|
148
|
+
"last_used": datetime.now().isoformat(),
|
|
149
|
+
"outcome_history": json.dumps(outcome_history)
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
# Persist to collection
|
|
153
|
+
self.collections[collection_name].update_fragment_metadata(doc_id, metadata)
|
|
154
|
+
|
|
155
|
+
logger.info(
|
|
156
|
+
f"Score update [{collection_name}]: {current_score:.2f} → {new_score:.2f} "
|
|
157
|
+
f"(outcome={outcome}, delta={score_delta:+.2f}, time_weight={time_weight:.2f}, uses={uses})"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Update KG routing if service available
|
|
161
|
+
if self.kg_service:
|
|
162
|
+
problem_text = metadata.get("query", "")
|
|
163
|
+
await self._update_kg_with_outcome(
|
|
164
|
+
doc_id, outcome, problem_text, doc.get("content", ""),
|
|
165
|
+
new_score, metadata, failure_reason, context
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Handle promotion/demotion if service available
|
|
169
|
+
if self.promotion_service:
|
|
170
|
+
collection_size = self.collections[collection_name].collection.count()
|
|
171
|
+
await self.promotion_service.handle_promotion(
|
|
172
|
+
doc_id=doc_id,
|
|
173
|
+
collection=collection_name,
|
|
174
|
+
score=new_score,
|
|
175
|
+
uses=uses,
|
|
176
|
+
metadata=metadata,
|
|
177
|
+
collection_size=collection_size
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
logger.info(f"Outcome recorded: {doc_id} -> {outcome} (score: {new_score:.2f})")
|
|
181
|
+
return metadata
|
|
182
|
+
|
|
183
|
+
def _calculate_time_weight(self, last_used: Optional[str]) -> float:
|
|
184
|
+
"""Calculate time weight for score updates."""
|
|
185
|
+
if not last_used:
|
|
186
|
+
return 1.0
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
age_days = (datetime.now() - datetime.fromisoformat(last_used)).days
|
|
190
|
+
return 1.0 / (1 + age_days / 30) # Decay over month
|
|
191
|
+
except:
|
|
192
|
+
return 1.0
|
|
193
|
+
|
|
194
|
+
def _calculate_score_update(
|
|
195
|
+
self,
|
|
196
|
+
outcome: str,
|
|
197
|
+
current_score: float,
|
|
198
|
+
uses: int,
|
|
199
|
+
time_weight: float
|
|
200
|
+
) -> tuple:
|
|
201
|
+
"""
|
|
202
|
+
Calculate score delta and new values.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Tuple of (score_delta, new_score, new_uses)
|
|
206
|
+
"""
|
|
207
|
+
if outcome == "worked":
|
|
208
|
+
score_delta = 0.2 * time_weight
|
|
209
|
+
new_score = min(1.0, current_score + score_delta)
|
|
210
|
+
uses += 1
|
|
211
|
+
elif outcome == "failed":
|
|
212
|
+
score_delta = -0.3 * time_weight
|
|
213
|
+
new_score = max(0.0, current_score + score_delta)
|
|
214
|
+
else: # partial
|
|
215
|
+
score_delta = 0.05 * time_weight
|
|
216
|
+
new_score = min(1.0, current_score + score_delta)
|
|
217
|
+
uses += 1
|
|
218
|
+
|
|
219
|
+
return score_delta, new_score, uses
|
|
220
|
+
|
|
221
|
+
async def _update_kg_with_outcome(
|
|
222
|
+
self,
|
|
223
|
+
doc_id: str,
|
|
224
|
+
outcome: str,
|
|
225
|
+
problem_text: str,
|
|
226
|
+
solution_text: str,
|
|
227
|
+
new_score: float,
|
|
228
|
+
metadata: Dict[str, Any],
|
|
229
|
+
failure_reason: Optional[str],
|
|
230
|
+
context: Optional[Dict[str, Any]]
|
|
231
|
+
):
|
|
232
|
+
"""Update knowledge graph based on outcome."""
|
|
233
|
+
if not self.kg_service:
|
|
234
|
+
return
|
|
235
|
+
|
|
236
|
+
# Update routing patterns
|
|
237
|
+
# Extract collection name from doc_id (handles memory_bank correctly)
|
|
238
|
+
# doc_id format: collection_uuid or collection_name_uuid
|
|
239
|
+
if doc_id.startswith("memory_bank_"):
|
|
240
|
+
collection_name = "memory_bank"
|
|
241
|
+
elif doc_id.startswith("books_"):
|
|
242
|
+
collection_name = "books"
|
|
243
|
+
elif doc_id.startswith("working_"):
|
|
244
|
+
collection_name = "working"
|
|
245
|
+
elif doc_id.startswith("history_"):
|
|
246
|
+
collection_name = "history"
|
|
247
|
+
elif doc_id.startswith("patterns_"):
|
|
248
|
+
collection_name = "patterns"
|
|
249
|
+
else:
|
|
250
|
+
collection_name = doc_id.split("_")[0] if "_" in doc_id else "unknown"
|
|
251
|
+
await self.kg_service.update_kg_routing(problem_text, collection_name, outcome)
|
|
252
|
+
|
|
253
|
+
if outcome == "worked" and problem_text and solution_text:
|
|
254
|
+
# Extract concepts
|
|
255
|
+
problem_concepts = self.kg_service.extract_concepts(problem_text)
|
|
256
|
+
solution_concepts = self.kg_service.extract_concepts(solution_text)
|
|
257
|
+
all_concepts = list(set(problem_concepts + solution_concepts))
|
|
258
|
+
|
|
259
|
+
# Build relationships
|
|
260
|
+
self.kg_service.build_concept_relationships(all_concepts)
|
|
261
|
+
|
|
262
|
+
# Track problem category
|
|
263
|
+
problem_key = "_".join(sorted(problem_concepts)[:3])
|
|
264
|
+
self.kg_service.add_problem_category(problem_key, doc_id)
|
|
265
|
+
|
|
266
|
+
# Track solution pattern
|
|
267
|
+
self.kg_service.add_solution_pattern(
|
|
268
|
+
doc_id, solution_text, new_score,
|
|
269
|
+
[problem_key], solution_concepts[:5]
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Update success rate
|
|
273
|
+
self.kg_service.update_success_rate(doc_id, outcome)
|
|
274
|
+
|
|
275
|
+
# Track problem-solution mapping
|
|
276
|
+
await self._track_problem_solution(doc_id, metadata, context)
|
|
277
|
+
|
|
278
|
+
elif outcome == "failed":
|
|
279
|
+
# Track failure
|
|
280
|
+
self.kg_service.update_success_rate(doc_id, outcome)
|
|
281
|
+
|
|
282
|
+
if failure_reason:
|
|
283
|
+
self.kg_service.add_failure_pattern(
|
|
284
|
+
failure_reason[:50], doc_id, problem_text[:100]
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
elif outcome == "partial":
|
|
288
|
+
self.kg_service.update_success_rate(doc_id, outcome)
|
|
289
|
+
|
|
290
|
+
# Save KG (debounced)
|
|
291
|
+
await self.kg_service.debounced_save_kg()
|
|
292
|
+
|
|
293
|
+
async def _track_problem_solution(
|
|
294
|
+
self,
|
|
295
|
+
doc_id: str,
|
|
296
|
+
metadata: Dict[str, Any],
|
|
297
|
+
context: Optional[Dict[str, Any]]
|
|
298
|
+
):
|
|
299
|
+
"""Track successful problem→solution patterns for future reuse."""
|
|
300
|
+
if not self.kg_service:
|
|
301
|
+
return
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
problem_text = metadata.get("original_context", "") or metadata.get("query", "")
|
|
305
|
+
solution_text = metadata.get("text", "")
|
|
306
|
+
|
|
307
|
+
if not problem_text or not solution_text:
|
|
308
|
+
return
|
|
309
|
+
|
|
310
|
+
# Create problem signature
|
|
311
|
+
problem_concepts = self.kg_service.extract_concepts(problem_text)
|
|
312
|
+
problem_signature = "_".join(sorted(problem_concepts[:5]))
|
|
313
|
+
|
|
314
|
+
if not problem_signature:
|
|
315
|
+
return
|
|
316
|
+
|
|
317
|
+
# Track in KG
|
|
318
|
+
self.kg_service.add_problem_solution(
|
|
319
|
+
problem_signature=problem_signature,
|
|
320
|
+
doc_id=doc_id,
|
|
321
|
+
solution_text=solution_text,
|
|
322
|
+
context=context
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Track solution pattern
|
|
326
|
+
pattern_hash = f"{problem_signature}::{doc_id}"
|
|
327
|
+
self.kg_service.add_solution_pattern_entry(
|
|
328
|
+
pattern_hash=pattern_hash,
|
|
329
|
+
problem_text=problem_text,
|
|
330
|
+
solution_text=solution_text,
|
|
331
|
+
outcome="worked"
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
logger.info(f"Tracked problem→solution: {problem_signature[:30]}... -> {doc_id}")
|
|
335
|
+
|
|
336
|
+
except Exception as e:
|
|
337
|
+
logger.error(f"Error tracking problem→solution: {e}")
|
|
338
|
+
|
|
339
|
+
def count_successes_from_history(self, outcome_history_json: str) -> float:
|
|
340
|
+
"""
|
|
341
|
+
Count successes from outcome history JSON.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
outcome_history_json: JSON string of outcome history
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
Weighted success count (worked=1, partial=0.5)
|
|
348
|
+
"""
|
|
349
|
+
if not outcome_history_json or outcome_history_json == "[]":
|
|
350
|
+
return 0
|
|
351
|
+
|
|
352
|
+
try:
|
|
353
|
+
history = json.loads(outcome_history_json)
|
|
354
|
+
successes = 0.0
|
|
355
|
+
for entry in history:
|
|
356
|
+
outcome = entry.get("outcome", "")
|
|
357
|
+
if outcome == "worked":
|
|
358
|
+
successes += 1.0
|
|
359
|
+
elif outcome == "partial":
|
|
360
|
+
successes += 0.5
|
|
361
|
+
return successes
|
|
362
|
+
except json.JSONDecodeError:
|
|
363
|
+
return 0
|
|
364
|
+
|
|
365
|
+
def get_outcome_stats(self, doc_id: str) -> Dict[str, Any]:
|
|
366
|
+
"""
|
|
367
|
+
Get outcome statistics for a document.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
doc_id: Document ID
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
Dict with outcome stats
|
|
374
|
+
"""
|
|
375
|
+
for coll_name, adapter in self.collections.items():
|
|
376
|
+
if doc_id.startswith(coll_name):
|
|
377
|
+
doc = adapter.get_fragment(doc_id)
|
|
378
|
+
if doc:
|
|
379
|
+
metadata = doc.get("metadata", {})
|
|
380
|
+
outcome_history = json.loads(metadata.get("outcome_history", "[]"))
|
|
381
|
+
|
|
382
|
+
worked = sum(1 for o in outcome_history if o.get("outcome") == "worked")
|
|
383
|
+
failed = sum(1 for o in outcome_history if o.get("outcome") == "failed")
|
|
384
|
+
partial = sum(1 for o in outcome_history if o.get("outcome") == "partial")
|
|
385
|
+
|
|
386
|
+
return {
|
|
387
|
+
"doc_id": doc_id,
|
|
388
|
+
"collection": coll_name,
|
|
389
|
+
"score": metadata.get("score", 0.5),
|
|
390
|
+
"uses": metadata.get("uses", 0),
|
|
391
|
+
"last_outcome": metadata.get("last_outcome"),
|
|
392
|
+
"outcomes": {
|
|
393
|
+
"worked": worked,
|
|
394
|
+
"failed": failed,
|
|
395
|
+
"partial": partial
|
|
396
|
+
},
|
|
397
|
+
"total_outcomes": len(outcome_history)
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
return {"doc_id": doc_id, "error": "not_found"}
|