roampal 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- roampal/__init__.py +29 -0
- roampal/__main__.py +6 -0
- roampal/backend/__init__.py +1 -0
- roampal/backend/modules/__init__.py +1 -0
- roampal/backend/modules/memory/__init__.py +43 -0
- roampal/backend/modules/memory/chromadb_adapter.py +623 -0
- roampal/backend/modules/memory/config.py +102 -0
- roampal/backend/modules/memory/content_graph.py +543 -0
- roampal/backend/modules/memory/context_service.py +455 -0
- roampal/backend/modules/memory/embedding_service.py +96 -0
- roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
- roampal/backend/modules/memory/memory_bank_service.py +433 -0
- roampal/backend/modules/memory/memory_types.py +296 -0
- roampal/backend/modules/memory/outcome_service.py +400 -0
- roampal/backend/modules/memory/promotion_service.py +473 -0
- roampal/backend/modules/memory/routing_service.py +444 -0
- roampal/backend/modules/memory/scoring_service.py +324 -0
- roampal/backend/modules/memory/search_service.py +646 -0
- roampal/backend/modules/memory/tests/__init__.py +1 -0
- roampal/backend/modules/memory/tests/conftest.py +12 -0
- roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
- roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
- roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
- roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
- roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
- roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
- roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
- roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
- roampal/backend/modules/memory/unified_memory_system.py +1277 -0
- roampal/cli.py +638 -0
- roampal/hooks/__init__.py +16 -0
- roampal/hooks/session_manager.py +587 -0
- roampal/hooks/stop_hook.py +176 -0
- roampal/hooks/user_prompt_submit_hook.py +103 -0
- roampal/mcp/__init__.py +7 -0
- roampal/mcp/server.py +611 -0
- roampal/server/__init__.py +7 -0
- roampal/server/main.py +744 -0
- roampal-0.1.4.dist-info/METADATA +179 -0
- roampal-0.1.4.dist-info/RECORD +44 -0
- roampal-0.1.4.dist-info/WHEEL +5 -0
- roampal-0.1.4.dist-info/entry_points.txt +2 -0
- roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
- roampal-0.1.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ContextService - Extracted from UnifiedMemorySystem
|
|
3
|
+
|
|
4
|
+
Handles conversation context analysis for organic memory injection.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Dict, Any, Optional, List, Callable, Awaitable
|
|
10
|
+
|
|
11
|
+
from .config import MemoryConfig
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ContextService:
|
|
17
|
+
"""
|
|
18
|
+
Service for analyzing conversation context.
|
|
19
|
+
|
|
20
|
+
Extracted from UnifiedMemorySystem.analyze_conversation_context and related.
|
|
21
|
+
Provides:
|
|
22
|
+
- Pattern recognition from past conversations
|
|
23
|
+
- Failure awareness for similar approaches
|
|
24
|
+
- Topic continuity detection
|
|
25
|
+
- Proactive insights based on routing patterns
|
|
26
|
+
- Repetition detection
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
collections: Dict[str, Any],
|
|
32
|
+
kg_service: Any = None,
|
|
33
|
+
embed_fn: Optional[Callable[[str], Awaitable[List[float]]]] = None,
|
|
34
|
+
config: Optional[MemoryConfig] = None
|
|
35
|
+
):
|
|
36
|
+
"""
|
|
37
|
+
Initialize ContextService.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
collections: Dict of collection name -> adapter
|
|
41
|
+
kg_service: KnowledgeGraphService for pattern/routing access
|
|
42
|
+
embed_fn: Async function to embed text for similarity search
|
|
43
|
+
config: Memory configuration
|
|
44
|
+
"""
|
|
45
|
+
self.collections = collections
|
|
46
|
+
self.kg_service = kg_service
|
|
47
|
+
self.embed_fn = embed_fn
|
|
48
|
+
self.config = config or MemoryConfig()
|
|
49
|
+
|
|
50
|
+
async def analyze_conversation_context(
|
|
51
|
+
self,
|
|
52
|
+
current_message: str,
|
|
53
|
+
recent_conversation: List[Dict[str, Any]],
|
|
54
|
+
conversation_id: str
|
|
55
|
+
) -> Dict[str, Any]:
|
|
56
|
+
"""
|
|
57
|
+
Analyze conversation context for organic memory injection.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
current_message: The current user message
|
|
61
|
+
recent_conversation: List of recent conversation messages
|
|
62
|
+
conversation_id: Current conversation ID
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Dict with relevant patterns, past outcomes, continuity, insights
|
|
66
|
+
"""
|
|
67
|
+
context = {
|
|
68
|
+
"relevant_patterns": [],
|
|
69
|
+
"past_outcomes": [],
|
|
70
|
+
"topic_continuity": [],
|
|
71
|
+
"proactive_insights": []
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
# Extract concepts from current message
|
|
76
|
+
current_concepts = self._extract_concepts(current_message)
|
|
77
|
+
|
|
78
|
+
# 1. Pattern Recognition
|
|
79
|
+
patterns = await self._find_relevant_patterns(current_concepts)
|
|
80
|
+
context["relevant_patterns"] = patterns
|
|
81
|
+
|
|
82
|
+
# 2. Failure Awareness
|
|
83
|
+
past_outcomes = self._check_failure_patterns(current_concepts)
|
|
84
|
+
context["past_outcomes"] = past_outcomes
|
|
85
|
+
|
|
86
|
+
# 3. Topic Continuity
|
|
87
|
+
continuity = self._detect_topic_continuity(
|
|
88
|
+
current_concepts, recent_conversation
|
|
89
|
+
)
|
|
90
|
+
context["topic_continuity"] = continuity
|
|
91
|
+
|
|
92
|
+
# 4. Proactive Insights
|
|
93
|
+
insights = self._get_proactive_insights(current_concepts)
|
|
94
|
+
context["proactive_insights"] = insights
|
|
95
|
+
|
|
96
|
+
# 5. Repetition Detection
|
|
97
|
+
if self.embed_fn and "working" in self.collections:
|
|
98
|
+
repetitions = await self._detect_repetition(
|
|
99
|
+
current_message, conversation_id
|
|
100
|
+
)
|
|
101
|
+
context["proactive_insights"].extend(repetitions[:1])
|
|
102
|
+
|
|
103
|
+
except Exception as e:
|
|
104
|
+
logger.error(f"Error analyzing conversation context: {e}")
|
|
105
|
+
|
|
106
|
+
return context
|
|
107
|
+
|
|
108
|
+
async def _find_relevant_patterns(
|
|
109
|
+
self,
|
|
110
|
+
concepts: List[str]
|
|
111
|
+
) -> List[Dict[str, Any]]:
|
|
112
|
+
"""Find relevant patterns from past conversations."""
|
|
113
|
+
patterns = []
|
|
114
|
+
|
|
115
|
+
if not self.kg_service or not concepts:
|
|
116
|
+
return patterns
|
|
117
|
+
|
|
118
|
+
# Create pattern signature from concepts
|
|
119
|
+
pattern_signature = "_".join(sorted(concepts[:3]))
|
|
120
|
+
|
|
121
|
+
# Check problem categories
|
|
122
|
+
problem_categories = self.kg_service.get_problem_categories()
|
|
123
|
+
if pattern_signature in problem_categories:
|
|
124
|
+
past_solutions = problem_categories[pattern_signature]
|
|
125
|
+
|
|
126
|
+
for doc_id in past_solutions[:2]:
|
|
127
|
+
# Look in patterns and history collections
|
|
128
|
+
for coll_name in ["patterns", "history"]:
|
|
129
|
+
if coll_name not in self.collections:
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
doc = self.collections[coll_name].get_fragment(doc_id)
|
|
133
|
+
if doc:
|
|
134
|
+
metadata = doc.get("metadata", {})
|
|
135
|
+
score = metadata.get("score", 0.5)
|
|
136
|
+
uses = metadata.get("uses", 0)
|
|
137
|
+
last_outcome = metadata.get("last_outcome", "unknown")
|
|
138
|
+
|
|
139
|
+
# Only include proven patterns
|
|
140
|
+
if score >= self.config.promotion_score_threshold and last_outcome == "worked":
|
|
141
|
+
# v0.2.8: Full content, no truncation
|
|
142
|
+
patterns.append({
|
|
143
|
+
"text": doc.get("content", ""),
|
|
144
|
+
"score": score,
|
|
145
|
+
"uses": uses,
|
|
146
|
+
"collection": coll_name,
|
|
147
|
+
"insight": f"Based on {uses} past use(s), this approach had a {int(score*100)}% success rate"
|
|
148
|
+
})
|
|
149
|
+
break
|
|
150
|
+
|
|
151
|
+
return patterns
|
|
152
|
+
|
|
153
|
+
def _check_failure_patterns(
|
|
154
|
+
self,
|
|
155
|
+
concepts: List[str]
|
|
156
|
+
) -> List[Dict[str, Any]]:
|
|
157
|
+
"""Check if similar attempts failed before."""
|
|
158
|
+
past_outcomes = []
|
|
159
|
+
|
|
160
|
+
if not self.kg_service:
|
|
161
|
+
return past_outcomes
|
|
162
|
+
|
|
163
|
+
failure_patterns = self.kg_service.get_failure_patterns()
|
|
164
|
+
|
|
165
|
+
for failure_key, failures in failure_patterns.items():
|
|
166
|
+
# Check if current message relates to past failures
|
|
167
|
+
if any(concept in failure_key.lower() for concept in concepts):
|
|
168
|
+
recent_failures = [f for f in failures if f.get("timestamp", "")][-2:]
|
|
169
|
+
|
|
170
|
+
for failure in recent_failures:
|
|
171
|
+
# v0.2.8: Full content, no truncation
|
|
172
|
+
past_outcomes.append({
|
|
173
|
+
"outcome": "failed",
|
|
174
|
+
"reason": failure_key,
|
|
175
|
+
"when": failure.get("timestamp", ""),
|
|
176
|
+
"insight": f"Note: Similar approach failed before due to: {failure_key}"
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
return past_outcomes
|
|
180
|
+
|
|
181
|
+
def _detect_topic_continuity(
|
|
182
|
+
self,
|
|
183
|
+
current_concepts: List[str],
|
|
184
|
+
recent_conversation: List[Dict[str, Any]]
|
|
185
|
+
) -> List[Dict[str, Any]]:
|
|
186
|
+
"""Detect if continuing or switching topics."""
|
|
187
|
+
continuity = []
|
|
188
|
+
|
|
189
|
+
if not recent_conversation or len(recent_conversation) < 2:
|
|
190
|
+
return continuity
|
|
191
|
+
|
|
192
|
+
# Get last user message
|
|
193
|
+
last_messages = [
|
|
194
|
+
msg for msg in recent_conversation[-3:]
|
|
195
|
+
if msg.get("role") == "user"
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
if not last_messages:
|
|
199
|
+
return continuity
|
|
200
|
+
|
|
201
|
+
last_message = last_messages[-1].get("content", "")
|
|
202
|
+
last_concepts = self._extract_concepts(last_message)
|
|
203
|
+
|
|
204
|
+
# Check concept overlap
|
|
205
|
+
overlap = set(current_concepts) & set(last_concepts)
|
|
206
|
+
if overlap:
|
|
207
|
+
continuity.append({
|
|
208
|
+
"continuing": True,
|
|
209
|
+
"common_concepts": list(overlap),
|
|
210
|
+
"insight": f"Continuing discussion about: {', '.join(list(overlap)[:3])}"
|
|
211
|
+
})
|
|
212
|
+
else:
|
|
213
|
+
continuity.append({
|
|
214
|
+
"continuing": False,
|
|
215
|
+
"insight": "Topic shift detected - loading new context"
|
|
216
|
+
})
|
|
217
|
+
|
|
218
|
+
return continuity
|
|
219
|
+
|
|
220
|
+
def _get_proactive_insights(
|
|
221
|
+
self,
|
|
222
|
+
concepts: List[str]
|
|
223
|
+
) -> List[Dict[str, Any]]:
|
|
224
|
+
"""Get proactive insights based on routing patterns."""
|
|
225
|
+
insights = []
|
|
226
|
+
|
|
227
|
+
if not self.kg_service:
|
|
228
|
+
return insights
|
|
229
|
+
|
|
230
|
+
routing_patterns = self.kg_service.get_routing_patterns()
|
|
231
|
+
|
|
232
|
+
for concept in concepts[:3]:
|
|
233
|
+
if concept in routing_patterns:
|
|
234
|
+
pattern = routing_patterns[concept]
|
|
235
|
+
success_rate = pattern.get("success_rate", 0)
|
|
236
|
+
best_collection = pattern.get("best_collection", "unknown")
|
|
237
|
+
|
|
238
|
+
if success_rate > 0.7:
|
|
239
|
+
insights.append({
|
|
240
|
+
"concept": concept,
|
|
241
|
+
"success_rate": success_rate,
|
|
242
|
+
"recommendation": f"For '{concept}', check {best_collection} collection (historically {int(success_rate*100)}% effective)"
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
return insights
|
|
246
|
+
|
|
247
|
+
async def _detect_repetition(
|
|
248
|
+
self,
|
|
249
|
+
current_message: str,
|
|
250
|
+
conversation_id: str
|
|
251
|
+
) -> List[Dict[str, Any]]:
|
|
252
|
+
"""Detect if user asked similar question recently."""
|
|
253
|
+
repetitions = []
|
|
254
|
+
|
|
255
|
+
if not self.embed_fn or "working" not in self.collections:
|
|
256
|
+
return repetitions
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
# Get embedding
|
|
260
|
+
query_vector = await self.embed_fn(current_message)
|
|
261
|
+
|
|
262
|
+
# Search working memory
|
|
263
|
+
working_items = await self.collections["working"].query_vectors(
|
|
264
|
+
query_vector=query_vector,
|
|
265
|
+
top_k=3
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
for item in working_items:
|
|
269
|
+
metadata = item.get("metadata", {})
|
|
270
|
+
if metadata.get("conversation_id") == conversation_id:
|
|
271
|
+
# Calculate similarity
|
|
272
|
+
similarity = 1.0 / (1.0 + item.get("distance", 1.0))
|
|
273
|
+
if similarity > 0.85: # Very similar
|
|
274
|
+
# v0.2.8: Full content, no truncation
|
|
275
|
+
repetitions.append({
|
|
276
|
+
"text": item.get("content", ""),
|
|
277
|
+
"similarity": similarity,
|
|
278
|
+
"insight": f"You mentioned something similar recently (similarity: {int(similarity*100)}%)"
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
except Exception as e:
|
|
282
|
+
logger.warning(f"Error detecting repetition: {e}")
|
|
283
|
+
|
|
284
|
+
return repetitions
|
|
285
|
+
|
|
286
|
+
def _extract_concepts(self, text: str) -> List[str]:
|
|
287
|
+
"""
|
|
288
|
+
Extract concepts from text.
|
|
289
|
+
|
|
290
|
+
Uses KG service if available, otherwise basic extraction.
|
|
291
|
+
"""
|
|
292
|
+
if self.kg_service:
|
|
293
|
+
return self.kg_service.extract_concepts(text)
|
|
294
|
+
|
|
295
|
+
# Basic extraction fallback
|
|
296
|
+
return self._basic_concept_extraction(text)
|
|
297
|
+
|
|
298
|
+
def _basic_concept_extraction(self, text: str) -> List[str]:
|
|
299
|
+
"""Basic concept extraction without KG service."""
|
|
300
|
+
if not text:
|
|
301
|
+
return []
|
|
302
|
+
|
|
303
|
+
# Simple word extraction (lowercase, alphabetic only)
|
|
304
|
+
words = text.lower().split()
|
|
305
|
+
concepts = []
|
|
306
|
+
|
|
307
|
+
# Filter stopwords and short words
|
|
308
|
+
stopwords = {
|
|
309
|
+
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been',
|
|
310
|
+
'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
|
|
311
|
+
'would', 'could', 'should', 'may', 'might', 'must', 'shall',
|
|
312
|
+
'can', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by',
|
|
313
|
+
'from', 'up', 'about', 'into', 'through', 'during', 'before',
|
|
314
|
+
'after', 'above', 'below', 'between', 'under', 'again',
|
|
315
|
+
'further', 'then', 'once', 'here', 'there', 'when', 'where',
|
|
316
|
+
'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other',
|
|
317
|
+
'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same',
|
|
318
|
+
'so', 'than', 'too', 'very', 's', 't', 'just', 'don', 'now',
|
|
319
|
+
'it', 'its', 'and', 'but', 'or', 'if', 'as', 'this', 'that',
|
|
320
|
+
'i', 'me', 'my', 'you', 'your', 'he', 'she', 'we', 'they'
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
for word in words:
|
|
324
|
+
# Clean word
|
|
325
|
+
clean_word = ''.join(c for c in word if c.isalnum())
|
|
326
|
+
if len(clean_word) >= 3 and clean_word not in stopwords:
|
|
327
|
+
concepts.append(clean_word)
|
|
328
|
+
|
|
329
|
+
return concepts[:10]
|
|
330
|
+
|
|
331
|
+
async def find_known_solutions(self, query: str) -> List[Dict[str, Any]]:
|
|
332
|
+
"""
|
|
333
|
+
Find known solutions for similar problems.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
query: The problem query
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
List of known solutions with boost information
|
|
340
|
+
"""
|
|
341
|
+
if not query or not self.kg_service:
|
|
342
|
+
return []
|
|
343
|
+
|
|
344
|
+
try:
|
|
345
|
+
# Extract concepts
|
|
346
|
+
query_concepts = self._extract_concepts(query)
|
|
347
|
+
query_signature = "_".join(sorted(query_concepts[:5]))
|
|
348
|
+
|
|
349
|
+
known_solutions = []
|
|
350
|
+
problem_solutions = self.kg_service.get_problem_solutions()
|
|
351
|
+
|
|
352
|
+
# Look for exact matches
|
|
353
|
+
if query_signature in problem_solutions:
|
|
354
|
+
solutions = problem_solutions[query_signature]
|
|
355
|
+
|
|
356
|
+
# Sort by success count and recency
|
|
357
|
+
sorted_solutions = sorted(
|
|
358
|
+
solutions,
|
|
359
|
+
key=lambda x: (x.get("success_count", 0), x.get("last_used", "")),
|
|
360
|
+
reverse=True
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Get actual documents
|
|
364
|
+
for solution in sorted_solutions[:3]:
|
|
365
|
+
doc_id = solution.get("doc_id")
|
|
366
|
+
if doc_id:
|
|
367
|
+
doc = self._get_document(doc_id)
|
|
368
|
+
if doc:
|
|
369
|
+
doc["distance"] = doc.get("distance", 1.0) * 0.5 # 50% boost
|
|
370
|
+
doc["is_known_solution"] = True
|
|
371
|
+
doc["solution_success_count"] = solution.get("success_count", 0)
|
|
372
|
+
known_solutions.append(doc)
|
|
373
|
+
logger.info(f"Found known solution: {doc_id}")
|
|
374
|
+
|
|
375
|
+
# Check partial matches
|
|
376
|
+
for problem_sig, solutions in problem_solutions.items():
|
|
377
|
+
if problem_sig != query_signature:
|
|
378
|
+
problem_concepts_stored = set(problem_sig.split("_"))
|
|
379
|
+
overlap = len(set(query_concepts) & problem_concepts_stored)
|
|
380
|
+
|
|
381
|
+
if overlap >= 3: # Significant overlap
|
|
382
|
+
for solution in solutions[:1]:
|
|
383
|
+
doc_id = solution.get("doc_id")
|
|
384
|
+
existing_ids = [s.get("id") for s in known_solutions]
|
|
385
|
+
|
|
386
|
+
if doc_id and doc_id not in existing_ids:
|
|
387
|
+
doc = self._get_document(doc_id)
|
|
388
|
+
if doc:
|
|
389
|
+
doc["distance"] = doc.get("distance", 1.0) * 0.7 # 30% boost
|
|
390
|
+
doc["is_partial_solution"] = True
|
|
391
|
+
doc["concept_overlap"] = overlap
|
|
392
|
+
known_solutions.append(doc)
|
|
393
|
+
|
|
394
|
+
return known_solutions
|
|
395
|
+
|
|
396
|
+
except Exception as e:
|
|
397
|
+
logger.error(f"Error finding known solutions: {e}")
|
|
398
|
+
return []
|
|
399
|
+
|
|
400
|
+
def _get_document(self, doc_id: str) -> Optional[Dict[str, Any]]:
|
|
401
|
+
"""Get document from appropriate collection."""
|
|
402
|
+
for coll_name, adapter in self.collections.items():
|
|
403
|
+
if doc_id.startswith(coll_name):
|
|
404
|
+
doc = adapter.get_fragment(doc_id)
|
|
405
|
+
if doc:
|
|
406
|
+
return {
|
|
407
|
+
"id": doc_id,
|
|
408
|
+
"content": doc.get("content", ""),
|
|
409
|
+
"metadata": doc.get("metadata", {}),
|
|
410
|
+
"distance": 1.0,
|
|
411
|
+
"collection": coll_name
|
|
412
|
+
}
|
|
413
|
+
return None
|
|
414
|
+
|
|
415
|
+
def get_context_summary(
|
|
416
|
+
self,
|
|
417
|
+
context: Dict[str, Any]
|
|
418
|
+
) -> str:
|
|
419
|
+
"""
|
|
420
|
+
Generate a human-readable summary of context analysis.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
context: Result from analyze_conversation_context
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
Summary string
|
|
427
|
+
"""
|
|
428
|
+
parts = []
|
|
429
|
+
|
|
430
|
+
# Patterns
|
|
431
|
+
patterns = context.get("relevant_patterns", [])
|
|
432
|
+
if patterns:
|
|
433
|
+
parts.append(f"Found {len(patterns)} relevant pattern(s) from past conversations")
|
|
434
|
+
|
|
435
|
+
# Failures
|
|
436
|
+
failures = context.get("past_outcomes", [])
|
|
437
|
+
if failures:
|
|
438
|
+
parts.append(f"Warning: {len(failures)} similar approach(es) failed before")
|
|
439
|
+
|
|
440
|
+
# Continuity
|
|
441
|
+
continuity = context.get("topic_continuity", [])
|
|
442
|
+
if continuity:
|
|
443
|
+
cont = continuity[0]
|
|
444
|
+
if cont.get("continuing"):
|
|
445
|
+
concepts = cont.get("common_concepts", [])
|
|
446
|
+
parts.append(f"Continuing discussion: {', '.join(concepts[:3])}")
|
|
447
|
+
else:
|
|
448
|
+
parts.append("New topic detected")
|
|
449
|
+
|
|
450
|
+
# Insights
|
|
451
|
+
insights = context.get("proactive_insights", [])
|
|
452
|
+
if insights:
|
|
453
|
+
parts.append(f"{len(insights)} proactive insight(s) available")
|
|
454
|
+
|
|
455
|
+
return " | ".join(parts) if parts else "No significant context detected"
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Embedding Service
|
|
3
|
+
|
|
4
|
+
Handles text embedding using sentence-transformers.
|
|
5
|
+
Uses the same bundled model as Roampal: paraphrase-multilingual-mpnet-base-v2
|
|
6
|
+
|
|
7
|
+
Simplified from Roampal - removed Ollama embedding fallback since roampal-core
|
|
8
|
+
uses bundled model only.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from typing import List, Optional
|
|
13
|
+
from sentence_transformers import SentenceTransformer
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
# Default model - same as Roampal
|
|
18
|
+
DEFAULT_MODEL = "paraphrase-multilingual-mpnet-base-v2"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class EmbeddingService:
|
|
22
|
+
"""
|
|
23
|
+
Service for generating text embeddings.
|
|
24
|
+
|
|
25
|
+
Uses sentence-transformers with a multilingual model that works well
|
|
26
|
+
for code and natural language.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, model_name: str = DEFAULT_MODEL):
|
|
30
|
+
"""
|
|
31
|
+
Initialize embedding service.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
model_name: Name of sentence-transformers model to use
|
|
35
|
+
"""
|
|
36
|
+
self.model_name = model_name
|
|
37
|
+
self._model: Optional[SentenceTransformer] = None
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def model(self) -> SentenceTransformer:
|
|
41
|
+
"""Lazy-load model on first use."""
|
|
42
|
+
if self._model is None:
|
|
43
|
+
logger.info(f"Loading embedding model: {self.model_name}")
|
|
44
|
+
self._model = SentenceTransformer(self.model_name)
|
|
45
|
+
logger.info(f"Embedding model loaded: {self.model_name}")
|
|
46
|
+
return self._model
|
|
47
|
+
|
|
48
|
+
async def embed_text(self, text: str) -> List[float]:
|
|
49
|
+
"""
|
|
50
|
+
Generate embedding for a single text.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
text: Text to embed
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
List of floats representing the embedding vector
|
|
57
|
+
"""
|
|
58
|
+
if not text or not text.strip():
|
|
59
|
+
logger.warning("Empty text provided for embedding")
|
|
60
|
+
# Return zero vector of appropriate dimension
|
|
61
|
+
return [0.0] * 768 # paraphrase-multilingual-mpnet-base-v2 dimension
|
|
62
|
+
|
|
63
|
+
# Generate embedding
|
|
64
|
+
embedding = self.model.encode(text, convert_to_numpy=True)
|
|
65
|
+
return embedding.tolist()
|
|
66
|
+
|
|
67
|
+
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
68
|
+
"""
|
|
69
|
+
Generate embeddings for multiple texts (batch).
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
texts: List of texts to embed
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of embedding vectors
|
|
76
|
+
"""
|
|
77
|
+
if not texts:
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
# Filter empty texts
|
|
81
|
+
valid_texts = [t for t in texts if t and t.strip()]
|
|
82
|
+
if not valid_texts:
|
|
83
|
+
return [[0.0] * 768 for _ in texts]
|
|
84
|
+
|
|
85
|
+
# Batch embed
|
|
86
|
+
embeddings = self.model.encode(valid_texts, convert_to_numpy=True)
|
|
87
|
+
return [e.tolist() for e in embeddings]
|
|
88
|
+
|
|
89
|
+
def get_embedding_dimension(self) -> int:
|
|
90
|
+
"""Get the dimension of embeddings produced by this model."""
|
|
91
|
+
return self.model.get_sentence_embedding_dimension()
|
|
92
|
+
|
|
93
|
+
async def prewarm(self):
|
|
94
|
+
"""Pre-warm the model by loading it."""
|
|
95
|
+
_ = self.model
|
|
96
|
+
logger.info(f"Embedding model pre-warmed: {self.model_name}")
|