roampal 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- roampal/__init__.py +29 -0
- roampal/__main__.py +6 -0
- roampal/backend/__init__.py +1 -0
- roampal/backend/modules/__init__.py +1 -0
- roampal/backend/modules/memory/__init__.py +43 -0
- roampal/backend/modules/memory/chromadb_adapter.py +623 -0
- roampal/backend/modules/memory/config.py +102 -0
- roampal/backend/modules/memory/content_graph.py +543 -0
- roampal/backend/modules/memory/context_service.py +455 -0
- roampal/backend/modules/memory/embedding_service.py +96 -0
- roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
- roampal/backend/modules/memory/memory_bank_service.py +433 -0
- roampal/backend/modules/memory/memory_types.py +296 -0
- roampal/backend/modules/memory/outcome_service.py +400 -0
- roampal/backend/modules/memory/promotion_service.py +473 -0
- roampal/backend/modules/memory/routing_service.py +444 -0
- roampal/backend/modules/memory/scoring_service.py +324 -0
- roampal/backend/modules/memory/search_service.py +646 -0
- roampal/backend/modules/memory/tests/__init__.py +1 -0
- roampal/backend/modules/memory/tests/conftest.py +12 -0
- roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
- roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
- roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
- roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
- roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
- roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
- roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
- roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
- roampal/backend/modules/memory/unified_memory_system.py +1277 -0
- roampal/cli.py +638 -0
- roampal/hooks/__init__.py +16 -0
- roampal/hooks/session_manager.py +587 -0
- roampal/hooks/stop_hook.py +176 -0
- roampal/hooks/user_prompt_submit_hook.py +103 -0
- roampal/mcp/__init__.py +7 -0
- roampal/mcp/server.py +611 -0
- roampal/server/__init__.py +7 -0
- roampal/server/main.py +744 -0
- roampal-0.1.4.dist-info/METADATA +179 -0
- roampal-0.1.4.dist-info/RECORD +44 -0
- roampal-0.1.4.dist-info/WHEEL +5 -0
- roampal-0.1.4.dist-info/entry_points.txt +2 -0
- roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
- roampal-0.1.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1052 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Graph Service - Manages dual KG system (Routing KG + Content KG).
|
|
3
|
+
|
|
4
|
+
Extracted from UnifiedMemorySystem as part of refactoring.
|
|
5
|
+
Includes race condition fix for debounced KG saves.
|
|
6
|
+
|
|
7
|
+
Responsibilities:
|
|
8
|
+
- Loading/saving both routing KG and content KG
|
|
9
|
+
- Concept extraction from text
|
|
10
|
+
- Building concept relationships
|
|
11
|
+
- Tracking problem-solution patterns
|
|
12
|
+
- KG cleanup operations
|
|
13
|
+
- Entity/relationship queries for visualization
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
import math
|
|
20
|
+
import re
|
|
21
|
+
from collections import defaultdict
|
|
22
|
+
from datetime import datetime
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any, Callable, Dict, List, Optional, Set
|
|
25
|
+
|
|
26
|
+
from filelock import FileLock
|
|
27
|
+
|
|
28
|
+
from .config import MemoryConfig
|
|
29
|
+
from .content_graph import ContentGraph
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class KnowledgeGraphService:
|
|
35
|
+
"""
|
|
36
|
+
Manages dual Knowledge Graph system.
|
|
37
|
+
|
|
38
|
+
Dual KG Architecture:
|
|
39
|
+
- Routing KG: Query patterns -> collection routing decisions
|
|
40
|
+
- Content KG: Memory content -> entity relationships
|
|
41
|
+
|
|
42
|
+
Includes race condition fix: Uses asyncio.Lock to protect
|
|
43
|
+
concurrent access to _kg_save_task in debounced saves.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
kg_path: Path,
|
|
49
|
+
content_graph_path: Path,
|
|
50
|
+
relationships_path: Path,
|
|
51
|
+
config: Optional[MemoryConfig] = None,
|
|
52
|
+
):
|
|
53
|
+
"""
|
|
54
|
+
Initialize KnowledgeGraphService.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
kg_path: Path to routing KG JSON file
|
|
58
|
+
content_graph_path: Path to content KG JSON file
|
|
59
|
+
relationships_path: Path to memory relationships JSON file
|
|
60
|
+
config: Optional MemoryConfig for thresholds
|
|
61
|
+
"""
|
|
62
|
+
self.config = config or MemoryConfig()
|
|
63
|
+
self.kg_path = kg_path
|
|
64
|
+
self.content_graph_path = content_graph_path
|
|
65
|
+
self.relationships_path = relationships_path
|
|
66
|
+
|
|
67
|
+
# Load graphs
|
|
68
|
+
self.knowledge_graph = self._load_kg()
|
|
69
|
+
self.content_graph = self._load_content_graph()
|
|
70
|
+
self.relationships = self._load_relationships()
|
|
71
|
+
|
|
72
|
+
# Debounced save state with RACE CONDITION FIX
|
|
73
|
+
self._kg_save_task: Optional[asyncio.Task] = None
|
|
74
|
+
self._kg_save_pending = False
|
|
75
|
+
self._kg_save_lock = asyncio.Lock() # FIX: Protect concurrent task access
|
|
76
|
+
|
|
77
|
+
logger.info(f"KnowledgeGraphService initialized from {kg_path}")
|
|
78
|
+
|
|
79
|
+
# =========================================================================
|
|
80
|
+
# Loading Methods
|
|
81
|
+
# =========================================================================
|
|
82
|
+
|
|
83
|
+
def _load_content_graph(self) -> ContentGraph:
|
|
84
|
+
"""
|
|
85
|
+
Load Content Knowledge Graph from disk.
|
|
86
|
+
|
|
87
|
+
CRITICAL: This is a core feature for entity relationship mapping.
|
|
88
|
+
Do not disable or remove - required for dual KG visualization.
|
|
89
|
+
"""
|
|
90
|
+
if self.content_graph_path.exists():
|
|
91
|
+
try:
|
|
92
|
+
return ContentGraph.load_from_file(str(self.content_graph_path))
|
|
93
|
+
except Exception as e:
|
|
94
|
+
logger.warning(f"Failed to load content graph, creating new: {e}")
|
|
95
|
+
return ContentGraph()
|
|
96
|
+
|
|
97
|
+
def _load_kg(self) -> Dict[str, Any]:
|
|
98
|
+
"""Load knowledge graph routing patterns."""
|
|
99
|
+
default_kg = {
|
|
100
|
+
"routing_patterns": {}, # concept -> best_collection
|
|
101
|
+
"success_rates": {}, # collection -> success_rate
|
|
102
|
+
"failure_patterns": {}, # concept -> failure_reasons
|
|
103
|
+
"problem_categories": {}, # problem_type -> preferred_collections
|
|
104
|
+
"problem_solutions": {}, # problem_signature -> [solution_ids]
|
|
105
|
+
"solution_patterns": {}, # pattern_hash -> {problem, solution, success_rate}
|
|
106
|
+
# v0.2.1 Causal Learning: Action-level effectiveness tracking
|
|
107
|
+
"context_action_effectiveness": {} # (context, action, collection) -> {success, fail, success_rate}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if self.kg_path.exists():
|
|
111
|
+
try:
|
|
112
|
+
with open(self.kg_path, 'r') as f:
|
|
113
|
+
loaded_kg = json.load(f)
|
|
114
|
+
# Ensure all required keys exist
|
|
115
|
+
for key in default_kg:
|
|
116
|
+
if key not in loaded_kg:
|
|
117
|
+
loaded_kg[key] = default_kg[key]
|
|
118
|
+
return loaded_kg
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
121
|
+
return default_kg
|
|
122
|
+
|
|
123
|
+
def _load_relationships(self) -> Dict[str, Any]:
|
|
124
|
+
"""Load memory relationships."""
|
|
125
|
+
if self.relationships_path.exists():
|
|
126
|
+
try:
|
|
127
|
+
with open(self.relationships_path, 'r') as f:
|
|
128
|
+
return json.load(f)
|
|
129
|
+
except Exception:
|
|
130
|
+
pass
|
|
131
|
+
return {
|
|
132
|
+
"related": {}, # doc_id -> [related_doc_ids]
|
|
133
|
+
"evolution": {}, # doc_id -> {parent, children}
|
|
134
|
+
"conflicts": {} # doc_id -> [conflicting_doc_ids]
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
def reload_kg(self):
|
|
138
|
+
"""Reload KG from disk to pick up changes from other processes."""
|
|
139
|
+
self.knowledge_graph = self._load_kg()
|
|
140
|
+
|
|
141
|
+
# =========================================================================
|
|
142
|
+
# Saving Methods (with race condition fix)
|
|
143
|
+
# =========================================================================
|
|
144
|
+
|
|
145
|
+
def _save_kg_sync(self):
|
|
146
|
+
"""
|
|
147
|
+
Synchronous save both routing KG and content KG.
|
|
148
|
+
|
|
149
|
+
CRITICAL: Saves both graphs atomically to maintain consistency.
|
|
150
|
+
Do not remove content graph save - it's required for entity tracking.
|
|
151
|
+
"""
|
|
152
|
+
# Save routing KG
|
|
153
|
+
lock_path = str(self.kg_path) + ".lock"
|
|
154
|
+
try:
|
|
155
|
+
with FileLock(lock_path, timeout=10):
|
|
156
|
+
self.kg_path.parent.mkdir(exist_ok=True, parents=True)
|
|
157
|
+
# Write to temp file first then rename (atomic operation)
|
|
158
|
+
temp_path = self.kg_path.with_suffix('.tmp')
|
|
159
|
+
with open(temp_path, 'w') as f:
|
|
160
|
+
json.dump(self.knowledge_graph, f, indent=2)
|
|
161
|
+
temp_path.replace(self.kg_path)
|
|
162
|
+
except PermissionError as e:
|
|
163
|
+
logger.error(f"Permission denied saving routing KG: {e}")
|
|
164
|
+
except Exception as e:
|
|
165
|
+
logger.error(f"Failed to save routing KG: {e}", exc_info=True)
|
|
166
|
+
|
|
167
|
+
# CRITICAL: Save content KG (entity relationships)
|
|
168
|
+
try:
|
|
169
|
+
self.content_graph.save_to_file(str(self.content_graph_path))
|
|
170
|
+
except Exception as e:
|
|
171
|
+
logger.error(f"Failed to save content KG: {e}", exc_info=True)
|
|
172
|
+
|
|
173
|
+
async def _save_kg(self):
|
|
174
|
+
"""Save knowledge graph asynchronously."""
|
|
175
|
+
await asyncio.to_thread(self._save_kg_sync)
|
|
176
|
+
|
|
177
|
+
async def _debounced_save_kg(self):
|
|
178
|
+
"""
|
|
179
|
+
Debounce KG saves to batch within 5-second window to reduce file I/O.
|
|
180
|
+
|
|
181
|
+
RACE CONDITION FIX: Uses asyncio.Lock to protect concurrent access
|
|
182
|
+
to _kg_save_task. Without this, multiple coroutines could both
|
|
183
|
+
cancel the task and create new ones simultaneously.
|
|
184
|
+
"""
|
|
185
|
+
async with self._kg_save_lock: # FIX: Serialize access
|
|
186
|
+
# Cancel existing pending save task
|
|
187
|
+
if self._kg_save_task and not self._kg_save_task.done():
|
|
188
|
+
self._kg_save_task.cancel()
|
|
189
|
+
try:
|
|
190
|
+
await self._kg_save_task
|
|
191
|
+
except asyncio.CancelledError:
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
# Create new delayed save task
|
|
195
|
+
async def delayed_save():
|
|
196
|
+
try:
|
|
197
|
+
await asyncio.sleep(self.config.kg_debounce_seconds)
|
|
198
|
+
await self._save_kg()
|
|
199
|
+
self._kg_save_pending = False
|
|
200
|
+
except asyncio.CancelledError:
|
|
201
|
+
pass
|
|
202
|
+
|
|
203
|
+
self._kg_save_pending = True
|
|
204
|
+
self._kg_save_task = asyncio.create_task(delayed_save())
|
|
205
|
+
|
|
206
|
+
async def debounced_save_kg(self):
|
|
207
|
+
"""Public alias for debounced KG save (used by OutcomeService)."""
|
|
208
|
+
await self._debounced_save_kg()
|
|
209
|
+
|
|
210
|
+
def update_success_rate(self, doc_id: str, outcome: str):
|
|
211
|
+
"""
|
|
212
|
+
Update success rate tracking for a document in the routing KG.
|
|
213
|
+
|
|
214
|
+
This tracks which documents (by ID) lead to successful outcomes,
|
|
215
|
+
enabling the routing system to prefer historically successful sources.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
doc_id: Document ID that had an outcome
|
|
219
|
+
outcome: "worked", "failed", or "partial"
|
|
220
|
+
"""
|
|
221
|
+
# Extract collection from doc_id (e.g., "working_abc123" -> "working")
|
|
222
|
+
parts = doc_id.split("_")
|
|
223
|
+
if len(parts) < 2:
|
|
224
|
+
return
|
|
225
|
+
|
|
226
|
+
collection = parts[0]
|
|
227
|
+
|
|
228
|
+
# Track in routing_patterns
|
|
229
|
+
if collection not in self.knowledge_graph["routing_patterns"]:
|
|
230
|
+
self.knowledge_graph["routing_patterns"][collection] = {
|
|
231
|
+
"successes": 0,
|
|
232
|
+
"failures": 0,
|
|
233
|
+
"partials": 0,
|
|
234
|
+
"total": 0,
|
|
235
|
+
"success_rate": 0.5
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
stats = self.knowledge_graph["routing_patterns"][collection]
|
|
239
|
+
stats["total"] += 1
|
|
240
|
+
|
|
241
|
+
if outcome == "worked":
|
|
242
|
+
stats["successes"] += 1
|
|
243
|
+
elif outcome == "failed":
|
|
244
|
+
stats["failures"] += 1
|
|
245
|
+
else:
|
|
246
|
+
stats["partials"] += 1
|
|
247
|
+
|
|
248
|
+
# Recalculate success rate (partials count as 0.5)
|
|
249
|
+
if stats["total"] > 0:
|
|
250
|
+
weighted = stats["successes"] + (stats["partials"] * 0.5)
|
|
251
|
+
stats["success_rate"] = weighted / stats["total"]
|
|
252
|
+
|
|
253
|
+
def add_relationship(self, doc_id: str, rel_type: str, data: Dict[str, Any]):
|
|
254
|
+
"""
|
|
255
|
+
Add a relationship to the relationships tracking structure.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
doc_id: Document ID
|
|
259
|
+
rel_type: Relationship type (e.g., "evolution", "related", "conflicts")
|
|
260
|
+
data: Relationship data
|
|
261
|
+
"""
|
|
262
|
+
if rel_type not in self.relationships:
|
|
263
|
+
self.relationships[rel_type] = {}
|
|
264
|
+
|
|
265
|
+
if doc_id not in self.relationships[rel_type]:
|
|
266
|
+
self.relationships[rel_type][doc_id] = []
|
|
267
|
+
|
|
268
|
+
self.relationships[rel_type][doc_id].append(data)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _save_relationships_sync(self):
|
|
273
|
+
"""Synchronous save memory relationships - to be called in thread with file locking."""
|
|
274
|
+
lock_path = str(self.relationships_path) + ".lock"
|
|
275
|
+
try:
|
|
276
|
+
with FileLock(lock_path, timeout=10):
|
|
277
|
+
self.relationships_path.parent.mkdir(exist_ok=True, parents=True)
|
|
278
|
+
# Atomic write
|
|
279
|
+
temp_path = self.relationships_path.with_suffix('.tmp')
|
|
280
|
+
with open(temp_path, 'w') as f:
|
|
281
|
+
json.dump(self.relationships, f, indent=2)
|
|
282
|
+
temp_path.replace(self.relationships_path)
|
|
283
|
+
except PermissionError as e:
|
|
284
|
+
logger.error(f"Permission denied saving relationships: {e}")
|
|
285
|
+
except Exception as e:
|
|
286
|
+
logger.error(f"Failed to save relationships: {e}", exc_info=True)
|
|
287
|
+
|
|
288
|
+
async def save_relationships(self):
|
|
289
|
+
"""Save relationships asynchronously."""
|
|
290
|
+
await asyncio.to_thread(self._save_relationships_sync)
|
|
291
|
+
|
|
292
|
+
# =========================================================================
|
|
293
|
+
# Concept Extraction
|
|
294
|
+
# =========================================================================
|
|
295
|
+
|
|
296
|
+
def extract_concepts(self, text: str) -> List[str]:
|
|
297
|
+
"""
|
|
298
|
+
Extract N-grams (unigrams, bigrams, trigrams) from text for KG routing.
|
|
299
|
+
Implements architecture.md specification for concept extraction.
|
|
300
|
+
"""
|
|
301
|
+
concepts: Set[str] = set()
|
|
302
|
+
|
|
303
|
+
# Normalize and tokenize
|
|
304
|
+
text_lower = text.lower()
|
|
305
|
+
# Remove punctuation except hyphens and underscores
|
|
306
|
+
text_clean = re.sub(r'[^\w\s\-_]', ' ', text_lower)
|
|
307
|
+
words = text_clean.split()
|
|
308
|
+
|
|
309
|
+
# Stop words (expanded set)
|
|
310
|
+
stop_words = {
|
|
311
|
+
"the", "a", "an", "is", "are", "was", "were", "to", "for", "of",
|
|
312
|
+
"with", "in", "on", "at", "by", "from", "as", "be", "this", "that",
|
|
313
|
+
"it", "i", "you", "we", "they", "my", "your", "our", "their", "what",
|
|
314
|
+
"when", "where", "how", "why", "can", "could", "would", "should"
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
# v0.2.1: Blocklist for MCP tool names and internal function-like patterns
|
|
318
|
+
# These pollute the Content KG with non-semantic entities
|
|
319
|
+
tool_blocklist = {
|
|
320
|
+
# MCP tool names
|
|
321
|
+
"search_memory", "add_to_memory_bank", "create_memory", "update_memory",
|
|
322
|
+
"archive_memory", "get_context_insights", "record_response", "validated",
|
|
323
|
+
# Common patterns from tool descriptions
|
|
324
|
+
"memory_bank", "working", "history", "patterns", "books",
|
|
325
|
+
# Internal function-like terms
|
|
326
|
+
"function", "parameter", "response", "request", "query", "result",
|
|
327
|
+
"collection", "collections", "metadata", "timestamp", "document"
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
# Filter stop words
|
|
331
|
+
filtered_words = [w for w in words if w not in stop_words and len(w) > 2]
|
|
332
|
+
|
|
333
|
+
# 1. Extract UNIGRAMS (single words)
|
|
334
|
+
for word in filtered_words:
|
|
335
|
+
if len(word) > 3: # Only meaningful words
|
|
336
|
+
concepts.add(word)
|
|
337
|
+
|
|
338
|
+
# 2. Extract BIGRAMS (2-word phrases)
|
|
339
|
+
for i in range(len(filtered_words) - 1):
|
|
340
|
+
bigram = f"{filtered_words[i]}_{filtered_words[i+1]}"
|
|
341
|
+
concepts.add(bigram)
|
|
342
|
+
|
|
343
|
+
# 3. Extract TRIGRAMS (3-word phrases)
|
|
344
|
+
for i in range(len(filtered_words) - 2):
|
|
345
|
+
trigram = f"{filtered_words[i]}_{filtered_words[i+1]}_{filtered_words[i+2]}"
|
|
346
|
+
concepts.add(trigram)
|
|
347
|
+
|
|
348
|
+
# Filter out blocklisted terms
|
|
349
|
+
filtered_concepts = [
|
|
350
|
+
c for c in concepts
|
|
351
|
+
if not any(blocked in c for blocked in tool_blocklist)
|
|
352
|
+
]
|
|
353
|
+
|
|
354
|
+
return filtered_concepts
|
|
355
|
+
|
|
356
|
+
# =========================================================================
|
|
357
|
+
# Concept Relationships
|
|
358
|
+
# =========================================================================
|
|
359
|
+
|
|
360
|
+
def build_concept_relationships(self, concepts: List[str]):
|
|
361
|
+
"""Build relationships between co-occurring concepts."""
|
|
362
|
+
if "relationships" not in self.knowledge_graph:
|
|
363
|
+
self.knowledge_graph["relationships"] = {}
|
|
364
|
+
|
|
365
|
+
# Build relationships between all concept pairs
|
|
366
|
+
for i, concept1 in enumerate(concepts):
|
|
367
|
+
for concept2 in concepts[i+1:]:
|
|
368
|
+
# Create bidirectional relationship key (sorted for consistency)
|
|
369
|
+
rel_key = "|".join(sorted([concept1, concept2]))
|
|
370
|
+
|
|
371
|
+
if rel_key not in self.knowledge_graph["relationships"]:
|
|
372
|
+
self.knowledge_graph["relationships"][rel_key] = {
|
|
373
|
+
"co_occurrence": 0,
|
|
374
|
+
"success_together": 0,
|
|
375
|
+
"failure_together": 0
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
# Increment co-occurrence
|
|
379
|
+
self.knowledge_graph["relationships"][rel_key]["co_occurrence"] += 1
|
|
380
|
+
|
|
381
|
+
async def update_kg_routing(self, query: str, collection: str, outcome: str):
|
|
382
|
+
"""Update KG routing patterns and relationships based on outcome."""
|
|
383
|
+
if not query:
|
|
384
|
+
return
|
|
385
|
+
|
|
386
|
+
concepts = self.extract_concepts(query)
|
|
387
|
+
|
|
388
|
+
# Build relationships between concepts
|
|
389
|
+
self.build_concept_relationships(concepts)
|
|
390
|
+
|
|
391
|
+
for concept in concepts:
|
|
392
|
+
if concept not in self.knowledge_graph["routing_patterns"]:
|
|
393
|
+
self.knowledge_graph["routing_patterns"][concept] = {
|
|
394
|
+
"collections_used": {},
|
|
395
|
+
"best_collection": collection,
|
|
396
|
+
"success_rate": 0.5
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
pattern = self.knowledge_graph["routing_patterns"][concept]
|
|
400
|
+
|
|
401
|
+
# Track collection performance
|
|
402
|
+
if collection not in pattern["collections_used"]:
|
|
403
|
+
pattern["collections_used"][collection] = {
|
|
404
|
+
"successes": 0,
|
|
405
|
+
"failures": 0,
|
|
406
|
+
"total": 0
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
stats = pattern["collections_used"][collection]
|
|
410
|
+
stats["total"] += 1
|
|
411
|
+
|
|
412
|
+
if outcome == "worked":
|
|
413
|
+
stats["successes"] += 1
|
|
414
|
+
elif outcome == "failed":
|
|
415
|
+
stats["failures"] += 1
|
|
416
|
+
|
|
417
|
+
# Update best collection
|
|
418
|
+
best_collection = collection
|
|
419
|
+
best_rate = 0.0
|
|
420
|
+
|
|
421
|
+
for coll_name, coll_stats in pattern["collections_used"].items():
|
|
422
|
+
# Calculate success rate: successes / (successes + failures)
|
|
423
|
+
# Excludes "partial" and "unknown" outcomes per v0.1.6 spec
|
|
424
|
+
total_with_feedback = coll_stats["successes"] + coll_stats["failures"]
|
|
425
|
+
|
|
426
|
+
if total_with_feedback > 0:
|
|
427
|
+
rate = coll_stats["successes"] / total_with_feedback
|
|
428
|
+
else:
|
|
429
|
+
rate = 0.5 # Neutral baseline (50%) for untested patterns
|
|
430
|
+
|
|
431
|
+
if rate > best_rate:
|
|
432
|
+
best_rate = rate
|
|
433
|
+
best_collection = coll_name
|
|
434
|
+
|
|
435
|
+
pattern["best_collection"] = best_collection
|
|
436
|
+
# Default to 0.5 if no collections have been tested with explicit feedback
|
|
437
|
+
pattern["success_rate"] = best_rate if best_rate > 0 else 0.5
|
|
438
|
+
|
|
439
|
+
# Update relationship outcomes
|
|
440
|
+
for i, concept1 in enumerate(concepts):
|
|
441
|
+
for concept2 in concepts[i+1:]:
|
|
442
|
+
rel_key = "|".join(sorted([concept1, concept2]))
|
|
443
|
+
if rel_key in self.knowledge_graph.get("relationships", {}):
|
|
444
|
+
rel_data = self.knowledge_graph["relationships"][rel_key]
|
|
445
|
+
if outcome == "worked":
|
|
446
|
+
rel_data["success_together"] += 1
|
|
447
|
+
elif outcome == "failed":
|
|
448
|
+
rel_data["failure_together"] += 1
|
|
449
|
+
|
|
450
|
+
# Save KG with proper await (debounced)
|
|
451
|
+
await self._debounced_save_kg()
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def add_problem_category(self, problem_key: str, doc_id: str):
|
|
455
|
+
"""Add a document to a problem category."""
|
|
456
|
+
if "problem_categories" not in self.knowledge_graph:
|
|
457
|
+
self.knowledge_graph["problem_categories"] = {}
|
|
458
|
+
|
|
459
|
+
if problem_key not in self.knowledge_graph["problem_categories"]:
|
|
460
|
+
self.knowledge_graph["problem_categories"][problem_key] = []
|
|
461
|
+
|
|
462
|
+
if doc_id not in self.knowledge_graph["problem_categories"][problem_key]:
|
|
463
|
+
self.knowledge_graph["problem_categories"][problem_key].append(doc_id)
|
|
464
|
+
|
|
465
|
+
def get_problem_categories(self) -> Dict[str, List[str]]:
|
|
466
|
+
"""Get all problem categories."""
|
|
467
|
+
return self.knowledge_graph.get("problem_categories", {})
|
|
468
|
+
|
|
469
|
+
# =========================================================================
|
|
470
|
+
# Problem-Solution Tracking
|
|
471
|
+
# =========================================================================
|
|
472
|
+
|
|
473
|
+
async def find_known_solutions(self, query: str, get_fragment_fn: Callable) -> List[Dict[str, Any]]:
|
|
474
|
+
"""
|
|
475
|
+
Find known solutions for similar problems.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
query: Search query text
|
|
479
|
+
get_fragment_fn: Function to retrieve document by ID (collection_name, doc_id) -> doc
|
|
480
|
+
"""
|
|
481
|
+
try:
|
|
482
|
+
if not query:
|
|
483
|
+
return []
|
|
484
|
+
|
|
485
|
+
# Extract concepts from the query
|
|
486
|
+
query_concepts = self.extract_concepts(query)
|
|
487
|
+
query_signature = "_".join(sorted(query_concepts[:5]))
|
|
488
|
+
|
|
489
|
+
known_solutions = []
|
|
490
|
+
|
|
491
|
+
# Ensure problem_solutions exists in knowledge graph
|
|
492
|
+
if "problem_solutions" not in self.knowledge_graph:
|
|
493
|
+
self.knowledge_graph["problem_solutions"] = {}
|
|
494
|
+
|
|
495
|
+
# Look for exact problem matches
|
|
496
|
+
if query_signature in self.knowledge_graph["problem_solutions"]:
|
|
497
|
+
solutions = self.knowledge_graph["problem_solutions"][query_signature]
|
|
498
|
+
|
|
499
|
+
# Sort by success count and recency
|
|
500
|
+
sorted_solutions = sorted(
|
|
501
|
+
solutions,
|
|
502
|
+
key=lambda x: (x.get("success_count", 0), x.get("last_used", "")),
|
|
503
|
+
reverse=True
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
# Add top solutions to results
|
|
507
|
+
for solution in sorted_solutions[:3]:
|
|
508
|
+
doc_id = solution.get("doc_id")
|
|
509
|
+
if doc_id:
|
|
510
|
+
# Try to find the actual document
|
|
511
|
+
for coll_name in ["patterns", "history", "memory_bank", "books"]:
|
|
512
|
+
if doc_id.startswith(coll_name):
|
|
513
|
+
doc = get_fragment_fn(coll_name, doc_id)
|
|
514
|
+
if doc:
|
|
515
|
+
# Boost the score for known solutions
|
|
516
|
+
doc["distance"] = doc.get("distance", 1.0) * 0.5 # 50% boost
|
|
517
|
+
doc["is_known_solution"] = True
|
|
518
|
+
doc["solution_success_count"] = solution.get("success_count", 0)
|
|
519
|
+
known_solutions.append(doc)
|
|
520
|
+
logger.info(f"Found known solution: {doc_id} (used {solution['success_count']} times)")
|
|
521
|
+
break
|
|
522
|
+
|
|
523
|
+
# Also check for partial matches (3+ concept overlap)
|
|
524
|
+
for problem_sig, solutions in self.knowledge_graph["problem_solutions"].items():
|
|
525
|
+
if problem_sig != query_signature:
|
|
526
|
+
problem_concepts_stored = set(problem_sig.split("_"))
|
|
527
|
+
overlap = len(set(query_concepts) & problem_concepts_stored)
|
|
528
|
+
|
|
529
|
+
if overlap >= 3: # Significant overlap
|
|
530
|
+
for solution in solutions[:1]: # Take best from partial matches
|
|
531
|
+
doc_id = solution.get("doc_id")
|
|
532
|
+
if doc_id and doc_id not in [s.get("id") for s in known_solutions]:
|
|
533
|
+
for coll_name in ["patterns", "history", "memory_bank", "books"]:
|
|
534
|
+
if doc_id.startswith(coll_name):
|
|
535
|
+
doc = get_fragment_fn(coll_name, doc_id)
|
|
536
|
+
if doc:
|
|
537
|
+
doc["distance"] = doc.get("distance", 1.0) * 0.7 # 30% boost
|
|
538
|
+
doc["is_partial_solution"] = True
|
|
539
|
+
doc["concept_overlap"] = overlap
|
|
540
|
+
known_solutions.append(doc)
|
|
541
|
+
break
|
|
542
|
+
|
|
543
|
+
return known_solutions
|
|
544
|
+
|
|
545
|
+
except Exception as e:
|
|
546
|
+
logger.error(f"Error finding known solutions: {e}")
|
|
547
|
+
return []
|
|
548
|
+
|
|
549
|
+
async def track_problem_solution(
|
|
550
|
+
self,
|
|
551
|
+
doc_id: str,
|
|
552
|
+
metadata: Dict[str, Any],
|
|
553
|
+
context: Optional[Dict[str, Any]]
|
|
554
|
+
):
|
|
555
|
+
"""Track successful problem->solution patterns for future reuse."""
|
|
556
|
+
try:
|
|
557
|
+
# Extract problem signature from the original query/context
|
|
558
|
+
problem_text = metadata.get("original_context", "") or metadata.get("query", "")
|
|
559
|
+
solution_text = metadata.get("text", "")
|
|
560
|
+
|
|
561
|
+
if not problem_text or not solution_text:
|
|
562
|
+
return
|
|
563
|
+
|
|
564
|
+
# Create problem signature (simplified concepts)
|
|
565
|
+
problem_concepts = self.extract_concepts(problem_text)
|
|
566
|
+
problem_signature = "_".join(sorted(problem_concepts[:5])) # Top 5 concepts
|
|
567
|
+
|
|
568
|
+
if not problem_signature:
|
|
569
|
+
return
|
|
570
|
+
|
|
571
|
+
# Store problem->solution mapping
|
|
572
|
+
if problem_signature not in self.knowledge_graph["problem_solutions"]:
|
|
573
|
+
self.knowledge_graph["problem_solutions"][problem_signature] = []
|
|
574
|
+
|
|
575
|
+
solution_entry = {
|
|
576
|
+
"doc_id": doc_id,
|
|
577
|
+
"solution": solution_text, # Store abbreviated solution
|
|
578
|
+
"success_count": 1,
|
|
579
|
+
"last_used": datetime.now().isoformat(),
|
|
580
|
+
"contexts": [context] if context else []
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
# Check if this solution already exists for this problem
|
|
584
|
+
existing_solutions = self.knowledge_graph["problem_solutions"][problem_signature]
|
|
585
|
+
solution_found = False
|
|
586
|
+
|
|
587
|
+
for existing in existing_solutions:
|
|
588
|
+
if existing["doc_id"] == doc_id:
|
|
589
|
+
# Update existing solution
|
|
590
|
+
existing["success_count"] += 1
|
|
591
|
+
existing["last_used"] = datetime.now().isoformat()
|
|
592
|
+
if context and context not in existing.get("contexts", []):
|
|
593
|
+
existing.setdefault("contexts", []).append(context)
|
|
594
|
+
solution_found = True
|
|
595
|
+
break
|
|
596
|
+
|
|
597
|
+
if not solution_found:
|
|
598
|
+
existing_solutions.append(solution_entry)
|
|
599
|
+
|
|
600
|
+
# Track solution patterns (for pattern matching)
|
|
601
|
+
pattern_hash = f"{problem_signature}::{doc_id}"
|
|
602
|
+
if pattern_hash not in self.knowledge_graph["solution_patterns"]:
|
|
603
|
+
self.knowledge_graph["solution_patterns"][pattern_hash] = {
|
|
604
|
+
"problem": problem_text,
|
|
605
|
+
"solution": solution_text,
|
|
606
|
+
"success_count": 0,
|
|
607
|
+
"failure_count": 0,
|
|
608
|
+
"contexts": []
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
pattern = self.knowledge_graph["solution_patterns"][pattern_hash]
|
|
612
|
+
pattern["success_count"] += 1
|
|
613
|
+
pattern["success_rate"] = pattern["success_count"] / (pattern["success_count"] + pattern["failure_count"])
|
|
614
|
+
|
|
615
|
+
# Save updated KG with proper await (debounced)
|
|
616
|
+
await self._debounced_save_kg()
|
|
617
|
+
|
|
618
|
+
logger.info(f"Tracked problem->solution pattern: {problem_signature[:30]}... -> {doc_id}")
|
|
619
|
+
|
|
620
|
+
except Exception as e:
|
|
621
|
+
logger.error(f"Error tracking problem->solution: {e}")
|
|
622
|
+
|
|
623
|
+
# =========================================================================
|
|
624
|
+
# Cleanup Methods
|
|
625
|
+
# =========================================================================
|
|
626
|
+
|
|
627
|
+
async def cleanup_dead_references(self, doc_exists_fn: Callable[[str], bool]) -> int:
|
|
628
|
+
"""
|
|
629
|
+
Remove doc_id references that no longer exist in collections.
|
|
630
|
+
|
|
631
|
+
Args:
|
|
632
|
+
doc_exists_fn: Function to check if doc exists (doc_id) -> bool
|
|
633
|
+
"""
|
|
634
|
+
try:
|
|
635
|
+
cleaned = 0
|
|
636
|
+
|
|
637
|
+
# Clean problem_categories
|
|
638
|
+
for problem_key, doc_ids in list(self.knowledge_graph.get("problem_categories", {}).items()):
|
|
639
|
+
valid_ids = [doc_id for doc_id in doc_ids if doc_exists_fn(doc_id)]
|
|
640
|
+
if len(valid_ids) < len(doc_ids):
|
|
641
|
+
cleaned += len(doc_ids) - len(valid_ids)
|
|
642
|
+
if valid_ids:
|
|
643
|
+
self.knowledge_graph["problem_categories"][problem_key] = valid_ids
|
|
644
|
+
else:
|
|
645
|
+
del self.knowledge_graph["problem_categories"][problem_key]
|
|
646
|
+
|
|
647
|
+
# Clean problem_solutions
|
|
648
|
+
for problem_sig, solutions in list(self.knowledge_graph.get("problem_solutions", {}).items()):
|
|
649
|
+
valid_solutions = [s for s in solutions if doc_exists_fn(s.get("doc_id"))]
|
|
650
|
+
if len(valid_solutions) < len(solutions):
|
|
651
|
+
cleaned += len(solutions) - len(valid_solutions)
|
|
652
|
+
if valid_solutions:
|
|
653
|
+
self.knowledge_graph["problem_solutions"][problem_sig] = valid_solutions
|
|
654
|
+
else:
|
|
655
|
+
del self.knowledge_graph["problem_solutions"][problem_sig]
|
|
656
|
+
|
|
657
|
+
# Clean routing_patterns (remove patterns with 0 total uses)
|
|
658
|
+
for concept, pattern in list(self.knowledge_graph.get("routing_patterns", {}).items()):
|
|
659
|
+
collections_used = pattern.get("collections_used", {})
|
|
660
|
+
total_uses = sum(stats.get("total", 0) for stats in collections_used.values())
|
|
661
|
+
if total_uses == 0:
|
|
662
|
+
del self.knowledge_graph["routing_patterns"][concept]
|
|
663
|
+
cleaned += 1
|
|
664
|
+
|
|
665
|
+
if cleaned > 0:
|
|
666
|
+
logger.info(f"KG cleanup: removed {cleaned} dead references")
|
|
667
|
+
await self._save_kg() # Immediate save for cleanup operation
|
|
668
|
+
|
|
669
|
+
return cleaned
|
|
670
|
+
except Exception as e:
|
|
671
|
+
logger.error(f"Error cleaning KG dead references: {e}")
|
|
672
|
+
return 0
|
|
673
|
+
|
|
674
|
+
async def cleanup_action_kg_for_doc_ids(self, doc_ids: List[str]) -> int:
|
|
675
|
+
"""
|
|
676
|
+
Remove Action KG examples referencing specific doc_ids (v0.2.6).
|
|
677
|
+
|
|
678
|
+
Called when books are deleted to prevent stale doc_id references
|
|
679
|
+
in context_action_effectiveness examples.
|
|
680
|
+
|
|
681
|
+
Args:
|
|
682
|
+
doc_ids: List of document IDs to remove from Action KG examples
|
|
683
|
+
|
|
684
|
+
Returns:
|
|
685
|
+
Number of examples removed
|
|
686
|
+
"""
|
|
687
|
+
if not doc_ids:
|
|
688
|
+
return 0
|
|
689
|
+
|
|
690
|
+
try:
|
|
691
|
+
doc_id_set = set(doc_ids)
|
|
692
|
+
cleaned = 0
|
|
693
|
+
|
|
694
|
+
for key, stats in self.knowledge_graph.get("context_action_effectiveness", {}).items():
|
|
695
|
+
examples = stats.get("examples", [])
|
|
696
|
+
original_count = len(examples)
|
|
697
|
+
|
|
698
|
+
# Filter out examples with matching doc_ids
|
|
699
|
+
stats["examples"] = [
|
|
700
|
+
ex for ex in examples
|
|
701
|
+
if ex.get("doc_id") not in doc_id_set
|
|
702
|
+
]
|
|
703
|
+
|
|
704
|
+
cleaned += original_count - len(stats["examples"])
|
|
705
|
+
|
|
706
|
+
if cleaned > 0:
|
|
707
|
+
logger.info(f"Action KG cleanup: removed {cleaned} examples for deleted doc_ids")
|
|
708
|
+
await self._save_kg()
|
|
709
|
+
|
|
710
|
+
return cleaned
|
|
711
|
+
except Exception as e:
|
|
712
|
+
logger.error(f"Error cleaning Action KG for doc_ids: {e}")
|
|
713
|
+
return 0
|
|
714
|
+
|
|
715
|
+
# =========================================================================
|
|
716
|
+
# Entity/Relationship Queries (for visualization)
|
|
717
|
+
# =========================================================================
|
|
718
|
+
|
|
719
|
+
async def get_kg_entities(
|
|
720
|
+
self,
|
|
721
|
+
filter_text: Optional[str] = None,
|
|
722
|
+
limit: int = 200
|
|
723
|
+
) -> List[Dict[str, Any]]:
|
|
724
|
+
"""
|
|
725
|
+
Get entities from DUAL knowledge graph (Routing KG + Content KG merged).
|
|
726
|
+
|
|
727
|
+
CRITICAL: This merges both graphs to provide complete entity view.
|
|
728
|
+
- Routing KG: Query patterns -> collection routing
|
|
729
|
+
- Content KG: Entity relationships from memory_bank content
|
|
730
|
+
- Entities in both graphs get source="both" (purple nodes in UI)
|
|
731
|
+
|
|
732
|
+
NOTE: Reloads KG from disk to pick up changes from MCP process.
|
|
733
|
+
"""
|
|
734
|
+
# Reload KG from disk to pick up changes from MCP process
|
|
735
|
+
self.reload_kg()
|
|
736
|
+
|
|
737
|
+
entities_map: Dict[str, Dict[str, Any]] = {}
|
|
738
|
+
|
|
739
|
+
# STEP 1: Get routing KG entities (query-based patterns)
|
|
740
|
+
for concept, pattern in self.knowledge_graph.get("routing_patterns", {}).items():
|
|
741
|
+
if filter_text and filter_text.lower() not in concept.lower():
|
|
742
|
+
continue
|
|
743
|
+
|
|
744
|
+
# Count routing connections
|
|
745
|
+
routing_connections = 0
|
|
746
|
+
for rel_key in self.knowledge_graph.get("relationships", {}).keys():
|
|
747
|
+
if concept in rel_key.split("|"):
|
|
748
|
+
routing_connections += 1
|
|
749
|
+
|
|
750
|
+
# Get total usage across all collections
|
|
751
|
+
collections_used = pattern.get("collections_used", {})
|
|
752
|
+
total_usage = sum(c.get("total", 0) for c in collections_used.values())
|
|
753
|
+
|
|
754
|
+
entities_map[concept] = {
|
|
755
|
+
"entity": concept,
|
|
756
|
+
"source": "routing", # Will be updated if also in content KG
|
|
757
|
+
"routing_connections": routing_connections,
|
|
758
|
+
"content_connections": 0,
|
|
759
|
+
"total_connections": routing_connections,
|
|
760
|
+
"success_rate": pattern.get("success_rate", 0.5),
|
|
761
|
+
"best_collection": pattern.get("best_collection"),
|
|
762
|
+
"collections_used": collections_used,
|
|
763
|
+
"usage_count": total_usage,
|
|
764
|
+
"mentions": 0, # From content KG
|
|
765
|
+
"last_used": pattern.get("last_used"),
|
|
766
|
+
"created_at": pattern.get("created_at")
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
# STEP 2: Get content KG entities (memory-based relationships)
|
|
770
|
+
# CRITICAL: Do not skip this step - provides green/purple nodes in UI
|
|
771
|
+
content_entities = self.content_graph.get_all_entities(min_mentions=1)
|
|
772
|
+
|
|
773
|
+
# v0.2.1: Blocklist for filtering out tool-like entities
|
|
774
|
+
tool_blocklist_terms = {
|
|
775
|
+
"search_memory", "add_to_memory_bank", "create_memory", "update_memory",
|
|
776
|
+
"archive_memory", "get_context_insights", "record_response", "validated",
|
|
777
|
+
"memory_bank", "working", "history", "patterns", "books",
|
|
778
|
+
"function", "parameter", "response", "request", "query", "result",
|
|
779
|
+
"collection", "collections", "metadata", "timestamp", "document"
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
for entity_data in content_entities:
|
|
783
|
+
entity_name = entity_data["entity"]
|
|
784
|
+
|
|
785
|
+
# v0.2.1: Skip entities that look like tool names
|
|
786
|
+
is_tool_like = any(term in entity_name for term in tool_blocklist_terms)
|
|
787
|
+
if is_tool_like:
|
|
788
|
+
continue
|
|
789
|
+
|
|
790
|
+
if filter_text and filter_text.lower() not in entity_name.lower():
|
|
791
|
+
continue
|
|
792
|
+
|
|
793
|
+
# Count content connections
|
|
794
|
+
content_rels = self.content_graph.get_entity_relationships(entity_name, min_strength=0.0)
|
|
795
|
+
content_connections = len(content_rels)
|
|
796
|
+
|
|
797
|
+
if entity_name in entities_map:
|
|
798
|
+
# Entity exists in BOTH graphs -> source="both" (purple node)
|
|
799
|
+
entities_map[entity_name]["source"] = "both"
|
|
800
|
+
entities_map[entity_name]["content_connections"] = content_connections
|
|
801
|
+
entities_map[entity_name]["total_connections"] += content_connections
|
|
802
|
+
entities_map[entity_name]["mentions"] = entity_data["mentions"]
|
|
803
|
+
else:
|
|
804
|
+
# Entity only in content KG -> source="content" (green node)
|
|
805
|
+
entities_map[entity_name] = {
|
|
806
|
+
"entity": entity_name,
|
|
807
|
+
"source": "content", # Content KG only
|
|
808
|
+
"routing_connections": 0,
|
|
809
|
+
"content_connections": content_connections,
|
|
810
|
+
"total_connections": content_connections,
|
|
811
|
+
"success_rate": 0.5, # Neutral (no routing data)
|
|
812
|
+
"best_collection": "memory_bank", # Content entities are from memory_bank
|
|
813
|
+
"collections_used": {"memory_bank": {"total": entity_data["mentions"]}},
|
|
814
|
+
"usage_count": entity_data["mentions"],
|
|
815
|
+
"mentions": entity_data["mentions"],
|
|
816
|
+
"last_used": entity_data.get("last_seen"),
|
|
817
|
+
"created_at": entity_data.get("first_seen")
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
# STEP 3: Get Action Effectiveness KG entities (context|action|collection patterns)
|
|
821
|
+
# v0.2.1: Orange nodes showing action success rates per context
|
|
822
|
+
for key, stats in self.knowledge_graph.get("context_action_effectiveness", {}).items():
|
|
823
|
+
parts = key.split("|")
|
|
824
|
+
if len(parts) >= 2:
|
|
825
|
+
context_type = parts[0]
|
|
826
|
+
action_type = parts[1]
|
|
827
|
+
collection = parts[2] if len(parts) > 2 else "*"
|
|
828
|
+
|
|
829
|
+
# Create readable label
|
|
830
|
+
label = f"{action_type}@{context_type}"
|
|
831
|
+
if collection != "*":
|
|
832
|
+
label += f"->{collection}"
|
|
833
|
+
|
|
834
|
+
if filter_text and filter_text.lower() not in label.lower():
|
|
835
|
+
continue
|
|
836
|
+
|
|
837
|
+
total_uses = stats.get("successes", 0) + stats.get("failures", 0)
|
|
838
|
+
if total_uses == 0:
|
|
839
|
+
continue # Skip unused patterns
|
|
840
|
+
|
|
841
|
+
success_rate = stats.get("success_rate", 0.5)
|
|
842
|
+
|
|
843
|
+
# Don't overwrite routing/content entities with same name
|
|
844
|
+
if label not in entities_map:
|
|
845
|
+
entities_map[label] = {
|
|
846
|
+
"entity": label,
|
|
847
|
+
"source": "action", # Orange nodes for action effectiveness
|
|
848
|
+
"routing_connections": 0,
|
|
849
|
+
"content_connections": 0,
|
|
850
|
+
"total_connections": 0,
|
|
851
|
+
"success_rate": success_rate,
|
|
852
|
+
"best_collection": collection if collection != "*" else None,
|
|
853
|
+
"collections_used": {collection: {"total": total_uses, "successes": stats.get("successes", 0), "failures": stats.get("failures", 0)}},
|
|
854
|
+
"usage_count": total_uses,
|
|
855
|
+
"mentions": 0,
|
|
856
|
+
"last_used": stats.get("last_used"),
|
|
857
|
+
"created_at": stats.get("first_used"),
|
|
858
|
+
# Action-specific metadata
|
|
859
|
+
"context_type": context_type,
|
|
860
|
+
"action_type": action_type,
|
|
861
|
+
"partials": stats.get("partials", 0)
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
# Convert to list and sort by usage
|
|
865
|
+
entities = list(entities_map.values())
|
|
866
|
+
entities.sort(key=lambda x: x["usage_count"], reverse=True)
|
|
867
|
+
return entities[:limit]
|
|
868
|
+
|
|
869
|
+
async def get_kg_relationships(self, entity: str) -> List[Dict[str, Any]]:
|
|
870
|
+
"""
|
|
871
|
+
Get relationships for a specific entity (DUAL KG merged).
|
|
872
|
+
|
|
873
|
+
CRITICAL: Merges routing + content relationships for complete view.
|
|
874
|
+
"""
|
|
875
|
+
relationships_map: Dict[str, Dict[str, Any]] = {}
|
|
876
|
+
|
|
877
|
+
# STEP 1: Get routing KG relationships
|
|
878
|
+
for rel_key, rel_data in self.knowledge_graph.get("relationships", {}).items():
|
|
879
|
+
concepts = rel_key.split("|")
|
|
880
|
+
if entity in concepts:
|
|
881
|
+
related = concepts[1] if concepts[0] == entity else concepts[0]
|
|
882
|
+
relationships_map[related] = {
|
|
883
|
+
"related_entity": related,
|
|
884
|
+
"source": "routing", # Will update if also in content
|
|
885
|
+
"strength": rel_data.get("co_occurrence", 0),
|
|
886
|
+
"total_strength": rel_data.get("co_occurrence", 0),
|
|
887
|
+
"success_together": rel_data.get("success_together", 0),
|
|
888
|
+
"failure_together": rel_data.get("failure_together", 0),
|
|
889
|
+
"content_strength": 0 # From content KG
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
# STEP 2: Get content KG relationships
|
|
893
|
+
# CRITICAL: Do not skip - provides entity relationship visualization
|
|
894
|
+
content_rels = self.content_graph.get_entity_relationships(entity, min_strength=0.0)
|
|
895
|
+
for rel_data in content_rels:
|
|
896
|
+
related = rel_data["related_entity"]
|
|
897
|
+
content_strength = rel_data["strength"]
|
|
898
|
+
|
|
899
|
+
if related in relationships_map:
|
|
900
|
+
# Relationship exists in BOTH graphs
|
|
901
|
+
relationships_map[related]["source"] = "both"
|
|
902
|
+
relationships_map[related]["content_strength"] = content_strength
|
|
903
|
+
relationships_map[related]["total_strength"] += content_strength
|
|
904
|
+
else:
|
|
905
|
+
# Relationship only in content KG
|
|
906
|
+
relationships_map[related] = {
|
|
907
|
+
"related_entity": related,
|
|
908
|
+
"source": "content", # Content KG only
|
|
909
|
+
"strength": 0, # No routing data
|
|
910
|
+
"total_strength": content_strength,
|
|
911
|
+
"success_together": 0,
|
|
912
|
+
"failure_together": 0,
|
|
913
|
+
"content_strength": content_strength
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
relationships = list(relationships_map.values())
|
|
917
|
+
relationships.sort(key=lambda x: x["total_strength"], reverse=True)
|
|
918
|
+
return relationships
|
|
919
|
+
|
|
920
|
+
# =========================================================================
|
|
921
|
+
# Content Graph Integration
|
|
922
|
+
# =========================================================================
|
|
923
|
+
|
|
924
|
+
def add_failure_pattern(self, failure_reason: str, doc_id: str, problem_text: str):
|
|
925
|
+
"""Track a failure pattern for learning."""
|
|
926
|
+
if "failure_patterns" not in self.knowledge_graph:
|
|
927
|
+
self.knowledge_graph["failure_patterns"] = {}
|
|
928
|
+
|
|
929
|
+
if failure_reason not in self.knowledge_graph["failure_patterns"]:
|
|
930
|
+
self.knowledge_graph["failure_patterns"][failure_reason] = []
|
|
931
|
+
|
|
932
|
+
self.knowledge_graph["failure_patterns"][failure_reason].append({
|
|
933
|
+
"doc_id": doc_id,
|
|
934
|
+
"problem_text": problem_text
|
|
935
|
+
})
|
|
936
|
+
|
|
937
|
+
def add_problem_solution(
|
|
938
|
+
self,
|
|
939
|
+
problem_signature: str,
|
|
940
|
+
doc_id: str,
|
|
941
|
+
solution_text: str,
|
|
942
|
+
context: Optional[Dict[str, Any]] = None
|
|
943
|
+
):
|
|
944
|
+
"""Track a successful problem->solution mapping."""
|
|
945
|
+
if "problem_solutions" not in self.knowledge_graph:
|
|
946
|
+
self.knowledge_graph["problem_solutions"] = {}
|
|
947
|
+
|
|
948
|
+
if problem_signature not in self.knowledge_graph["problem_solutions"]:
|
|
949
|
+
self.knowledge_graph["problem_solutions"][problem_signature] = []
|
|
950
|
+
|
|
951
|
+
# Add if not already present
|
|
952
|
+
existing = self.knowledge_graph["problem_solutions"][problem_signature]
|
|
953
|
+
if doc_id not in existing:
|
|
954
|
+
existing.append(doc_id)
|
|
955
|
+
|
|
956
|
+
def add_solution_pattern(
|
|
957
|
+
self,
|
|
958
|
+
doc_id: str,
|
|
959
|
+
solution_text: str,
|
|
960
|
+
score: float,
|
|
961
|
+
problem_keys: List[str],
|
|
962
|
+
solution_concepts: List[str]
|
|
963
|
+
):
|
|
964
|
+
"""Track a solution pattern for reuse."""
|
|
965
|
+
if "solution_patterns" not in self.knowledge_graph:
|
|
966
|
+
self.knowledge_graph["solution_patterns"] = {}
|
|
967
|
+
|
|
968
|
+
pattern_key = f"{doc_id}::{'_'.join(problem_keys[:3])}"
|
|
969
|
+
|
|
970
|
+
self.knowledge_graph["solution_patterns"][pattern_key] = {
|
|
971
|
+
"doc_id": doc_id,
|
|
972
|
+
"solution_text": solution_text[:500],
|
|
973
|
+
"score": score,
|
|
974
|
+
"problem_keys": problem_keys,
|
|
975
|
+
"solution_concepts": solution_concepts,
|
|
976
|
+
"uses": 1
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
def add_solution_pattern_entry(
|
|
980
|
+
self,
|
|
981
|
+
pattern_hash: str,
|
|
982
|
+
problem_text: str,
|
|
983
|
+
solution_text: str,
|
|
984
|
+
outcome: str
|
|
985
|
+
):
|
|
986
|
+
"""Add or update a solution pattern entry."""
|
|
987
|
+
if "solution_patterns" not in self.knowledge_graph:
|
|
988
|
+
self.knowledge_graph["solution_patterns"] = {}
|
|
989
|
+
|
|
990
|
+
if pattern_hash not in self.knowledge_graph["solution_patterns"]:
|
|
991
|
+
self.knowledge_graph["solution_patterns"][pattern_hash] = {
|
|
992
|
+
"problem_text": problem_text[:200],
|
|
993
|
+
"solution_text": solution_text[:500],
|
|
994
|
+
"successes": 0,
|
|
995
|
+
"failures": 0,
|
|
996
|
+
"success_rate": 0.5
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
entry = self.knowledge_graph["solution_patterns"][pattern_hash]
|
|
1000
|
+
|
|
1001
|
+
if outcome == "worked":
|
|
1002
|
+
entry["successes"] = entry.get("successes", 0) + 1
|
|
1003
|
+
elif outcome == "failed":
|
|
1004
|
+
entry["failures"] = entry.get("failures", 0) + 1
|
|
1005
|
+
|
|
1006
|
+
total = entry.get("successes", 0) + entry.get("failures", 0)
|
|
1007
|
+
if total > 0:
|
|
1008
|
+
entry["success_rate"] = entry.get("successes", 0) / total
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
def add_entities_from_text(
|
|
1012
|
+
self,
|
|
1013
|
+
text: str,
|
|
1014
|
+
doc_id: str,
|
|
1015
|
+
collection: str,
|
|
1016
|
+
quality_score: Optional[float] = None
|
|
1017
|
+
) -> List[str]:
|
|
1018
|
+
"""
|
|
1019
|
+
Add entities from text to content graph.
|
|
1020
|
+
Wrapper around ContentGraph.add_entities_from_text.
|
|
1021
|
+
"""
|
|
1022
|
+
return self.content_graph.add_entities_from_text(
|
|
1023
|
+
text=text,
|
|
1024
|
+
doc_id=doc_id,
|
|
1025
|
+
collection=collection,
|
|
1026
|
+
extract_concepts_fn=self.extract_concepts,
|
|
1027
|
+
quality_score=quality_score
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
def remove_entity_mention(self, doc_id: str):
|
|
1031
|
+
"""Remove a document's entity mentions from content graph."""
|
|
1032
|
+
self.content_graph.remove_entity_mention(doc_id)
|
|
1033
|
+
|
|
1034
|
+
# =========================================================================
|
|
1035
|
+
# Shutdown
|
|
1036
|
+
# =========================================================================
|
|
1037
|
+
|
|
1038
|
+
async def cleanup(self):
|
|
1039
|
+
"""Clean shutdown - save pending changes."""
|
|
1040
|
+
async with self._kg_save_lock:
|
|
1041
|
+
if self._kg_save_task and not self._kg_save_task.done():
|
|
1042
|
+
self._kg_save_task.cancel()
|
|
1043
|
+
try:
|
|
1044
|
+
await self._kg_save_task
|
|
1045
|
+
except asyncio.CancelledError:
|
|
1046
|
+
pass
|
|
1047
|
+
|
|
1048
|
+
# Final save if pending
|
|
1049
|
+
if self._kg_save_pending:
|
|
1050
|
+
await self._save_kg()
|
|
1051
|
+
|
|
1052
|
+
logger.info("KnowledgeGraphService cleaned up")
|