roampal 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- roampal/__init__.py +29 -0
- roampal/__main__.py +6 -0
- roampal/backend/__init__.py +1 -0
- roampal/backend/modules/__init__.py +1 -0
- roampal/backend/modules/memory/__init__.py +43 -0
- roampal/backend/modules/memory/chromadb_adapter.py +623 -0
- roampal/backend/modules/memory/config.py +102 -0
- roampal/backend/modules/memory/content_graph.py +543 -0
- roampal/backend/modules/memory/context_service.py +455 -0
- roampal/backend/modules/memory/embedding_service.py +96 -0
- roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
- roampal/backend/modules/memory/memory_bank_service.py +433 -0
- roampal/backend/modules/memory/memory_types.py +296 -0
- roampal/backend/modules/memory/outcome_service.py +400 -0
- roampal/backend/modules/memory/promotion_service.py +473 -0
- roampal/backend/modules/memory/routing_service.py +444 -0
- roampal/backend/modules/memory/scoring_service.py +324 -0
- roampal/backend/modules/memory/search_service.py +646 -0
- roampal/backend/modules/memory/tests/__init__.py +1 -0
- roampal/backend/modules/memory/tests/conftest.py +12 -0
- roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
- roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
- roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
- roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
- roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
- roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
- roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
- roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
- roampal/backend/modules/memory/unified_memory_system.py +1277 -0
- roampal/cli.py +638 -0
- roampal/hooks/__init__.py +16 -0
- roampal/hooks/session_manager.py +587 -0
- roampal/hooks/stop_hook.py +176 -0
- roampal/hooks/user_prompt_submit_hook.py +103 -0
- roampal/mcp/__init__.py +7 -0
- roampal/mcp/server.py +611 -0
- roampal/server/__init__.py +7 -0
- roampal/server/main.py +744 -0
- roampal-0.1.4.dist-info/METADATA +179 -0
- roampal-0.1.4.dist-info/RECORD +44 -0
- roampal-0.1.4.dist-info/WHEEL +5 -0
- roampal-0.1.4.dist-info/entry_points.txt +2 -0
- roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
- roampal-0.1.4.dist-info/top_level.txt +1 -0
roampal/server/main.py
ADDED
|
@@ -0,0 +1,744 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Roampal FastAPI Server
|
|
3
|
+
|
|
4
|
+
Provides:
|
|
5
|
+
- /api/hooks/get-context: Called by UserPromptSubmit hook (injects scoring prompt)
|
|
6
|
+
- /api/hooks/stop: Called by Stop hook (stores exchange, enforces record_response)
|
|
7
|
+
- /api/health: Health check endpoint
|
|
8
|
+
- MCP server (optional, for power users)
|
|
9
|
+
|
|
10
|
+
Hook Enforcement Flow:
|
|
11
|
+
1. UserPromptSubmit hook calls /api/hooks/get-context
|
|
12
|
+
- If previous exchange unscored, injects scoring prompt
|
|
13
|
+
- Adds relevant memories to context
|
|
14
|
+
2. LLM MUST call record_response(outcome) to score previous exchange
|
|
15
|
+
3. Stop hook calls /api/hooks/stop
|
|
16
|
+
- Stores current exchange with doc_id
|
|
17
|
+
- Can block (exit 2) if record_response wasn't called
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import json
|
|
22
|
+
import asyncio
|
|
23
|
+
import os
|
|
24
|
+
from datetime import datetime
|
|
25
|
+
from typing import Optional, List, Dict, Any
|
|
26
|
+
from contextlib import asynccontextmanager
|
|
27
|
+
|
|
28
|
+
from fastapi import FastAPI, HTTPException, Request
|
|
29
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
30
|
+
from pydantic import BaseModel
|
|
31
|
+
import uvicorn
|
|
32
|
+
|
|
33
|
+
# Import memory system and session manager
|
|
34
|
+
from roampal.backend.modules.memory import UnifiedMemorySystem
|
|
35
|
+
from roampal.backend.modules.memory.unified_memory_system import ActionOutcome
|
|
36
|
+
from roampal.hooks import SessionManager
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
# Global instances
|
|
41
|
+
_memory: Optional[UnifiedMemorySystem] = None
|
|
42
|
+
_session_manager: Optional[SessionManager] = None
|
|
43
|
+
|
|
44
|
+
# Search result cache for outcome scoring (session_id -> doc_ids)
|
|
45
|
+
_search_cache: Dict[str, Dict[str, Any]] = {}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
async def _build_cold_start_profile() -> Optional[str]:
|
|
49
|
+
"""
|
|
50
|
+
Build the cold start user profile injection.
|
|
51
|
+
|
|
52
|
+
Dumps ALL memory_bank facts to give the LLM a complete picture
|
|
53
|
+
of who the user is at the start of each session.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Formatted user profile string, or None if no facts exist
|
|
57
|
+
"""
|
|
58
|
+
if not _memory:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
# Get memory_bank facts - use a broad query to find user-related facts
|
|
63
|
+
# Empty query returns random results, so we search for user-specific content
|
|
64
|
+
# Include "Logan name who is" to find identity facts
|
|
65
|
+
all_facts = await _memory.search(
|
|
66
|
+
query="Logan name who is identity preference goal project communication style",
|
|
67
|
+
collections=["memory_bank"],
|
|
68
|
+
limit=50 # Get up to 50 facts for cold start
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if not all_facts:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
# Group facts by tags for organization
|
|
75
|
+
identity_facts = []
|
|
76
|
+
preference_facts = []
|
|
77
|
+
goal_facts = []
|
|
78
|
+
project_facts = []
|
|
79
|
+
other_facts = []
|
|
80
|
+
|
|
81
|
+
for fact in all_facts:
|
|
82
|
+
# Content can be in 'text', 'content', or metadata
|
|
83
|
+
content = fact.get("text") or fact.get("content") or fact.get("metadata", {}).get("content", "")
|
|
84
|
+
# Tags can be a list or JSON string
|
|
85
|
+
tags_raw = fact.get("metadata", {}).get("tags", [])
|
|
86
|
+
if isinstance(tags_raw, str):
|
|
87
|
+
try:
|
|
88
|
+
import json as json_module
|
|
89
|
+
tags = json_module.loads(tags_raw)
|
|
90
|
+
except:
|
|
91
|
+
tags = []
|
|
92
|
+
else:
|
|
93
|
+
tags = tags_raw
|
|
94
|
+
|
|
95
|
+
# Debug logging
|
|
96
|
+
logger.debug(f"Cold start fact: content={content[:50]}..., tags={tags}")
|
|
97
|
+
|
|
98
|
+
if "identity" in tags:
|
|
99
|
+
identity_facts.append(content)
|
|
100
|
+
elif "preference" in tags:
|
|
101
|
+
preference_facts.append(content)
|
|
102
|
+
elif "goal" in tags:
|
|
103
|
+
goal_facts.append(content)
|
|
104
|
+
elif "project" in tags:
|
|
105
|
+
project_facts.append(content)
|
|
106
|
+
else:
|
|
107
|
+
other_facts.append(content)
|
|
108
|
+
|
|
109
|
+
# Build formatted profile
|
|
110
|
+
profile_parts = ["<roampal-user-profile>"]
|
|
111
|
+
profile_parts.append("COLD START: Here's everything you know about this user:\n")
|
|
112
|
+
|
|
113
|
+
if identity_facts:
|
|
114
|
+
profile_parts.append("IDENTITY:")
|
|
115
|
+
for fact in identity_facts:
|
|
116
|
+
profile_parts.append(f" - {fact}")
|
|
117
|
+
|
|
118
|
+
if preference_facts:
|
|
119
|
+
profile_parts.append("\nPREFERENCES:")
|
|
120
|
+
for fact in preference_facts:
|
|
121
|
+
profile_parts.append(f" - {fact}")
|
|
122
|
+
|
|
123
|
+
if goal_facts:
|
|
124
|
+
profile_parts.append("\nGOALS:")
|
|
125
|
+
for fact in goal_facts:
|
|
126
|
+
profile_parts.append(f" - {fact}")
|
|
127
|
+
|
|
128
|
+
if project_facts:
|
|
129
|
+
profile_parts.append("\nPROJECTS:")
|
|
130
|
+
for fact in project_facts:
|
|
131
|
+
profile_parts.append(f" - {fact}")
|
|
132
|
+
|
|
133
|
+
if other_facts:
|
|
134
|
+
profile_parts.append("\nOTHER FACTS:")
|
|
135
|
+
for fact in other_facts:
|
|
136
|
+
profile_parts.append(f" - {fact}")
|
|
137
|
+
|
|
138
|
+
profile_parts.append("\nUse this context to personalize your responses.")
|
|
139
|
+
profile_parts.append("</roampal-user-profile>\n")
|
|
140
|
+
|
|
141
|
+
return "\n".join(profile_parts)
|
|
142
|
+
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.error(f"Error building cold start profile: {e}")
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ==================== Request/Response Models ====================
|
|
149
|
+
|
|
150
|
+
class GetContextRequest(BaseModel):
|
|
151
|
+
"""Request for hook context injection."""
|
|
152
|
+
query: str
|
|
153
|
+
conversation_id: Optional[str] = None
|
|
154
|
+
recent_messages: Optional[List[Dict[str, Any]]] = None
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class GetContextResponse(BaseModel):
|
|
158
|
+
"""Response with context to inject."""
|
|
159
|
+
formatted_injection: str
|
|
160
|
+
user_facts: List[Dict[str, Any]]
|
|
161
|
+
relevant_memories: List[Dict[str, Any]]
|
|
162
|
+
context_summary: str
|
|
163
|
+
scoring_required: bool = False # True if previous exchange needs scoring
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class StopHookRequest(BaseModel):
|
|
167
|
+
"""Request from Stop hook after LLM responds."""
|
|
168
|
+
conversation_id: str
|
|
169
|
+
user_message: str
|
|
170
|
+
assistant_response: str
|
|
171
|
+
transcript: Optional[str] = None # Full transcript to check for record_response call
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class StopHookResponse(BaseModel):
|
|
175
|
+
"""Response to Stop hook."""
|
|
176
|
+
stored: bool
|
|
177
|
+
doc_id: str
|
|
178
|
+
scoring_complete: bool # Did the LLM call record_response?
|
|
179
|
+
should_block: bool # Should hook block with exit code 2?
|
|
180
|
+
block_message: Optional[str] = None
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class SearchRequest(BaseModel):
|
|
184
|
+
"""Request for searching memory."""
|
|
185
|
+
query: str
|
|
186
|
+
conversation_id: Optional[str] = None
|
|
187
|
+
collections: Optional[List[str]] = None
|
|
188
|
+
limit: int = 10
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class MemoryBankAddRequest(BaseModel):
|
|
192
|
+
"""Request to add to memory bank."""
|
|
193
|
+
content: str
|
|
194
|
+
tags: Optional[List[str]] = None
|
|
195
|
+
importance: float = 0.7
|
|
196
|
+
confidence: float = 0.7
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class MemoryBankUpdateRequest(BaseModel):
|
|
200
|
+
"""Request to update memory bank."""
|
|
201
|
+
old_content: str
|
|
202
|
+
new_content: str
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class RecordOutcomeRequest(BaseModel):
|
|
206
|
+
"""Request to record outcome for scoring."""
|
|
207
|
+
conversation_id: str
|
|
208
|
+
outcome: str # worked, failed, partial, unknown
|
|
209
|
+
related: Optional[List[str]] = None # Optional: filter to only score these doc_ids
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# ==================== Lifecycle ====================
|
|
213
|
+
|
|
214
|
+
@asynccontextmanager
|
|
215
|
+
async def lifespan(app: FastAPI):
|
|
216
|
+
"""Manage memory system lifecycle."""
|
|
217
|
+
global _memory, _session_manager
|
|
218
|
+
logger.info("Starting Roampal server...")
|
|
219
|
+
|
|
220
|
+
# Check for dev mode or custom data path
|
|
221
|
+
dev_mode = os.environ.get('ROAMPAL_DEV', '').lower() in ('1', 'true', 'yes')
|
|
222
|
+
data_path = os.environ.get("ROAMPAL_DATA_PATH")
|
|
223
|
+
|
|
224
|
+
if dev_mode:
|
|
225
|
+
logger.info("DEV MODE enabled - using Roampal_DEV data directory")
|
|
226
|
+
if data_path:
|
|
227
|
+
logger.info(f"Using custom data path: {data_path}")
|
|
228
|
+
|
|
229
|
+
# Initialize memory system
|
|
230
|
+
_memory = UnifiedMemorySystem(data_path=data_path)
|
|
231
|
+
await _memory.initialize()
|
|
232
|
+
logger.info("Memory system initialized")
|
|
233
|
+
|
|
234
|
+
# Initialize session manager (uses same data path)
|
|
235
|
+
_session_manager = SessionManager(_memory.data_path)
|
|
236
|
+
logger.info("Session manager initialized")
|
|
237
|
+
|
|
238
|
+
yield
|
|
239
|
+
|
|
240
|
+
# Cleanup
|
|
241
|
+
logger.info("Shutting down Roampal server...")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def create_app() -> FastAPI:
|
|
245
|
+
"""Create FastAPI application."""
|
|
246
|
+
app = FastAPI(
|
|
247
|
+
title="Roampal",
|
|
248
|
+
description="Persistent memory for AI coding tools",
|
|
249
|
+
version="0.1.0",
|
|
250
|
+
lifespan=lifespan
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# CORS for local development
|
|
254
|
+
app.add_middleware(
|
|
255
|
+
CORSMiddleware,
|
|
256
|
+
allow_origins=["*"],
|
|
257
|
+
allow_credentials=True,
|
|
258
|
+
allow_methods=["*"],
|
|
259
|
+
allow_headers=["*"],
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# ==================== Hook Endpoints ====================
|
|
263
|
+
|
|
264
|
+
@app.post("/api/hooks/get-context", response_model=GetContextResponse)
|
|
265
|
+
async def get_context(request: GetContextRequest):
|
|
266
|
+
"""
|
|
267
|
+
Called by UserPromptSubmit hook BEFORE the LLM sees the message.
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
1. Cold start user profile (on first message of session)
|
|
271
|
+
2. Scoring prompt if previous exchange needs scoring AND assistant completed
|
|
272
|
+
3. Relevant memories from search
|
|
273
|
+
4. User facts from memory_bank
|
|
274
|
+
"""
|
|
275
|
+
if not _memory or not _session_manager:
|
|
276
|
+
raise HTTPException(status_code=503, detail="Memory system not ready")
|
|
277
|
+
|
|
278
|
+
try:
|
|
279
|
+
formatted_parts = []
|
|
280
|
+
scoring_required = False
|
|
281
|
+
conversation_id = request.conversation_id or "default"
|
|
282
|
+
|
|
283
|
+
# 0. Check for cold start (first message of session)
|
|
284
|
+
is_cold_start = _session_manager.is_first_message(conversation_id)
|
|
285
|
+
|
|
286
|
+
if is_cold_start:
|
|
287
|
+
# Dump full user profile on first message
|
|
288
|
+
cold_start_profile = await _build_cold_start_profile()
|
|
289
|
+
if cold_start_profile:
|
|
290
|
+
formatted_parts.append(cold_start_profile)
|
|
291
|
+
logger.info(f"Cold start: injected user profile for {conversation_id}")
|
|
292
|
+
|
|
293
|
+
# Mark first message as seen
|
|
294
|
+
_session_manager.mark_first_message_seen(conversation_id)
|
|
295
|
+
|
|
296
|
+
# 1. Get memory context FIRST (needed for scoring prompt to include surfaced memories)
|
|
297
|
+
context = await _memory.get_context_for_injection(
|
|
298
|
+
query=request.query,
|
|
299
|
+
conversation_id=conversation_id,
|
|
300
|
+
recent_conversation=request.recent_messages
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# 2. Check if assistant completed a response (vs user interrupting mid-work)
|
|
304
|
+
assistant_completed = _session_manager.check_and_clear_completed(conversation_id)
|
|
305
|
+
|
|
306
|
+
# Only inject scoring prompt if:
|
|
307
|
+
# - Assistant completed their previous response (not mid-work interruption)
|
|
308
|
+
# - There's an unscored exchange to score
|
|
309
|
+
# - NOT a cold start (no previous exchange to score on first message)
|
|
310
|
+
if assistant_completed and not is_cold_start:
|
|
311
|
+
previous = await _session_manager.get_previous_exchange(conversation_id)
|
|
312
|
+
|
|
313
|
+
if previous and not previous.get("scored", False):
|
|
314
|
+
# Build list of surfaced memories for selective scoring
|
|
315
|
+
surfaced_memories = context.get("relevant_memories", [])
|
|
316
|
+
|
|
317
|
+
# Inject scoring prompt with surfaced memories
|
|
318
|
+
scoring_prompt = _session_manager.build_scoring_prompt(
|
|
319
|
+
previous_exchange=previous,
|
|
320
|
+
current_user_message=request.query,
|
|
321
|
+
surfaced_memories=surfaced_memories if surfaced_memories else None
|
|
322
|
+
)
|
|
323
|
+
formatted_parts.append(scoring_prompt)
|
|
324
|
+
scoring_required = True
|
|
325
|
+
# Track that we injected scoring prompt (for Stop hook to check)
|
|
326
|
+
_session_manager.set_scoring_required(conversation_id, True)
|
|
327
|
+
logger.info(f"Injecting scoring prompt for conversation {conversation_id} with {len(surfaced_memories)} memories")
|
|
328
|
+
else:
|
|
329
|
+
# No unscored exchange, but assistant did complete
|
|
330
|
+
_session_manager.set_scoring_required(conversation_id, False)
|
|
331
|
+
else:
|
|
332
|
+
# User interrupted mid-work OR cold start - no scoring needed
|
|
333
|
+
_session_manager.set_scoring_required(conversation_id, False)
|
|
334
|
+
if not is_cold_start:
|
|
335
|
+
logger.info(f"Skipping scoring - user interrupted mid-work for {conversation_id}")
|
|
336
|
+
|
|
337
|
+
# 3. Add memory context after scoring prompt
|
|
338
|
+
if context.get("formatted_injection"):
|
|
339
|
+
formatted_parts.append(context["formatted_injection"])
|
|
340
|
+
|
|
341
|
+
# 3. Cache doc_ids for outcome scoring via record_response
|
|
342
|
+
# This ensures hook-injected memories can be scored later
|
|
343
|
+
injected_doc_ids = context.get("doc_ids", [])
|
|
344
|
+
if injected_doc_ids:
|
|
345
|
+
_search_cache[conversation_id] = {
|
|
346
|
+
"doc_ids": injected_doc_ids,
|
|
347
|
+
"query": request.query,
|
|
348
|
+
"source": "hook_injection",
|
|
349
|
+
"timestamp": datetime.now().isoformat()
|
|
350
|
+
}
|
|
351
|
+
logger.info(f"Cached {len(injected_doc_ids)} doc_ids from hook injection for {conversation_id}")
|
|
352
|
+
|
|
353
|
+
return GetContextResponse(
|
|
354
|
+
formatted_injection="\n".join(formatted_parts),
|
|
355
|
+
user_facts=context.get("user_facts", []),
|
|
356
|
+
relevant_memories=context.get("relevant_memories", []),
|
|
357
|
+
context_summary=context.get("context_summary", ""),
|
|
358
|
+
scoring_required=scoring_required
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
except Exception as e:
|
|
362
|
+
logger.error(f"Error getting context: {e}")
|
|
363
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
364
|
+
|
|
365
|
+
@app.post("/api/hooks/stop", response_model=StopHookResponse)
|
|
366
|
+
async def stop_hook(request: StopHookRequest):
|
|
367
|
+
"""
|
|
368
|
+
Called by Stop hook AFTER the LLM responds.
|
|
369
|
+
|
|
370
|
+
1. Stores the exchange with doc_id for later scoring
|
|
371
|
+
2. Checks if record_response was called (with retry for race condition)
|
|
372
|
+
3. Returns should_block=True if scoring was required but not done
|
|
373
|
+
"""
|
|
374
|
+
if not _memory or not _session_manager:
|
|
375
|
+
raise HTTPException(status_code=503, detail="Memory system not ready")
|
|
376
|
+
|
|
377
|
+
try:
|
|
378
|
+
conversation_id = request.conversation_id or "default"
|
|
379
|
+
|
|
380
|
+
# Store the exchange in working memory
|
|
381
|
+
content = f"User: {request.user_message}\n\nAssistant: {request.assistant_response}"
|
|
382
|
+
doc_id = await _memory.store_working(
|
|
383
|
+
content=content,
|
|
384
|
+
conversation_id=conversation_id,
|
|
385
|
+
metadata={
|
|
386
|
+
"turn_type": "exchange",
|
|
387
|
+
"timestamp": datetime.now().isoformat()
|
|
388
|
+
}
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
# Store exchange in session file
|
|
392
|
+
await _session_manager.store_exchange(
|
|
393
|
+
conversation_id=conversation_id,
|
|
394
|
+
user_message=request.user_message,
|
|
395
|
+
assistant_response=request.assistant_response,
|
|
396
|
+
doc_id=doc_id
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# IMPORTANT: Check scoring flags BEFORE set_completed() resets them
|
|
400
|
+
scoring_was_required = _session_manager.was_scoring_required(conversation_id)
|
|
401
|
+
|
|
402
|
+
# Race condition fix: If scoring was required, wait briefly for record_response
|
|
403
|
+
# MCP tool call to complete. The tool might be in-flight when Stop hook fires.
|
|
404
|
+
scored_this_turn = _session_manager.was_scored_this_turn(conversation_id)
|
|
405
|
+
|
|
406
|
+
if scoring_was_required and not scored_this_turn:
|
|
407
|
+
# Wait up to 500ms with 50ms intervals for the MCP tool to complete
|
|
408
|
+
for _ in range(10):
|
|
409
|
+
await asyncio.sleep(0.05) # 50ms
|
|
410
|
+
scored_this_turn = _session_manager.was_scored_this_turn(conversation_id)
|
|
411
|
+
if scored_this_turn:
|
|
412
|
+
logger.info(f"Race condition resolved: record_response completed after {(_ + 1) * 50}ms")
|
|
413
|
+
break
|
|
414
|
+
|
|
415
|
+
# Mark assistant as completed - this signals UserPromptSubmit that
|
|
416
|
+
# scoring is needed on the NEXT user message
|
|
417
|
+
# Note: This resets scoring_required, so we check it above first
|
|
418
|
+
_session_manager.set_completed(conversation_id)
|
|
419
|
+
logger.info(f"Marked assistant as completed for {conversation_id}")
|
|
420
|
+
|
|
421
|
+
# Determine blocking based on scoring state
|
|
422
|
+
scoring_complete = False
|
|
423
|
+
should_block = False
|
|
424
|
+
block_message = None
|
|
425
|
+
|
|
426
|
+
if scored_this_turn:
|
|
427
|
+
scoring_complete = True
|
|
428
|
+
logger.info(f"record_response was called this turn for {conversation_id}")
|
|
429
|
+
elif scoring_was_required:
|
|
430
|
+
# Scoring was required this turn but LLM didn't call record_response
|
|
431
|
+
# SOFT ENFORCE: Log warning but don't block (prompt injection does 95% of the work)
|
|
432
|
+
# We trade guaranteed enforcement for smooth UX - no block_message either
|
|
433
|
+
should_block = False
|
|
434
|
+
block_message = None # Don't send any message to avoid UI noise
|
|
435
|
+
logger.warning(f"Soft enforce: record_response not called for {conversation_id}")
|
|
436
|
+
else:
|
|
437
|
+
# Scoring wasn't required (user interrupted mid-work) - don't block
|
|
438
|
+
logger.info(f"No scoring required this turn for {conversation_id} - not blocking")
|
|
439
|
+
|
|
440
|
+
return StopHookResponse(
|
|
441
|
+
stored=True,
|
|
442
|
+
doc_id=doc_id,
|
|
443
|
+
scoring_complete=scoring_complete,
|
|
444
|
+
should_block=should_block,
|
|
445
|
+
block_message=block_message
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
except Exception as e:
|
|
449
|
+
logger.error(f"Error in stop hook: {e}")
|
|
450
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
451
|
+
|
|
452
|
+
# ==================== Memory API Endpoints ====================
|
|
453
|
+
|
|
454
|
+
@app.post("/api/search")
|
|
455
|
+
async def search_memory(request: SearchRequest):
|
|
456
|
+
"""Search across memory collections."""
|
|
457
|
+
if not _memory:
|
|
458
|
+
raise HTTPException(status_code=503, detail="Memory system not ready")
|
|
459
|
+
|
|
460
|
+
try:
|
|
461
|
+
results = await _memory.search(
|
|
462
|
+
query=request.query,
|
|
463
|
+
collections=request.collections,
|
|
464
|
+
limit=request.limit
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
# Cache doc_ids for outcome scoring
|
|
468
|
+
if request.conversation_id:
|
|
469
|
+
doc_ids = [r.get("id") for r in results if r.get("id")]
|
|
470
|
+
_search_cache[request.conversation_id] = {
|
|
471
|
+
"doc_ids": doc_ids,
|
|
472
|
+
"query": request.query,
|
|
473
|
+
"timestamp": datetime.now().isoformat()
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
return {
|
|
477
|
+
"query": request.query,
|
|
478
|
+
"count": len(results),
|
|
479
|
+
"results": results
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
except Exception as e:
|
|
483
|
+
logger.error(f"Error searching: {e}")
|
|
484
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
485
|
+
|
|
486
|
+
@app.post("/api/memory-bank/add")
|
|
487
|
+
async def add_to_memory_bank(request: MemoryBankAddRequest):
|
|
488
|
+
"""Add a fact to memory bank."""
|
|
489
|
+
if not _memory:
|
|
490
|
+
raise HTTPException(status_code=503, detail="Memory system not ready")
|
|
491
|
+
|
|
492
|
+
try:
|
|
493
|
+
doc_id = await _memory.store_memory_bank(
|
|
494
|
+
text=request.content,
|
|
495
|
+
tags=request.tags,
|
|
496
|
+
importance=request.importance,
|
|
497
|
+
confidence=request.confidence
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
return {"success": True, "doc_id": doc_id}
|
|
501
|
+
|
|
502
|
+
except Exception as e:
|
|
503
|
+
logger.error(f"Error adding to memory bank: {e}")
|
|
504
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
505
|
+
|
|
506
|
+
@app.post("/api/memory-bank/update")
|
|
507
|
+
async def update_memory_bank(request: MemoryBankUpdateRequest):
|
|
508
|
+
"""Update a memory bank entry."""
|
|
509
|
+
if not _memory:
|
|
510
|
+
raise HTTPException(status_code=503, detail="Memory system not ready")
|
|
511
|
+
|
|
512
|
+
try:
|
|
513
|
+
doc_id = await _memory.update_memory_bank(
|
|
514
|
+
old_content=request.old_content,
|
|
515
|
+
new_content=request.new_content
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
return {
|
|
519
|
+
"success": doc_id is not None,
|
|
520
|
+
"doc_id": doc_id
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
except Exception as e:
|
|
524
|
+
logger.error(f"Error updating memory bank: {e}")
|
|
525
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
526
|
+
|
|
527
|
+
@app.post("/api/memory-bank/archive")
|
|
528
|
+
async def archive_memory_bank(request: Dict[str, str]):
|
|
529
|
+
"""Archive a memory bank entry."""
|
|
530
|
+
if not _memory:
|
|
531
|
+
raise HTTPException(status_code=503, detail="Memory system not ready")
|
|
532
|
+
|
|
533
|
+
content = request.get("content", "")
|
|
534
|
+
if not content:
|
|
535
|
+
raise HTTPException(status_code=400, detail="Content required")
|
|
536
|
+
|
|
537
|
+
try:
|
|
538
|
+
success = await _memory.archive_memory_bank(content)
|
|
539
|
+
return {"success": success}
|
|
540
|
+
|
|
541
|
+
except Exception as e:
|
|
542
|
+
logger.error(f"Error archiving: {e}")
|
|
543
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
544
|
+
|
|
545
|
+
@app.post("/api/ingest")
|
|
546
|
+
async def ingest_document(request: Dict[str, Any]):
|
|
547
|
+
"""
|
|
548
|
+
Ingest a document into the books collection.
|
|
549
|
+
|
|
550
|
+
Called by CLI when server is running, so data is immediately searchable.
|
|
551
|
+
|
|
552
|
+
Request body:
|
|
553
|
+
content: Document text
|
|
554
|
+
title: Document title
|
|
555
|
+
source: Source file path
|
|
556
|
+
chunk_size: Characters per chunk (default 1000)
|
|
557
|
+
chunk_overlap: Overlap between chunks (default 200)
|
|
558
|
+
"""
|
|
559
|
+
if not _memory:
|
|
560
|
+
raise HTTPException(status_code=503, detail="Memory system not ready")
|
|
561
|
+
|
|
562
|
+
content = request.get("content", "")
|
|
563
|
+
title = request.get("title", "Untitled")
|
|
564
|
+
source = request.get("source", "unknown")
|
|
565
|
+
chunk_size = request.get("chunk_size", 1000)
|
|
566
|
+
chunk_overlap = request.get("chunk_overlap", 200)
|
|
567
|
+
|
|
568
|
+
if not content:
|
|
569
|
+
raise HTTPException(status_code=400, detail="Content required")
|
|
570
|
+
|
|
571
|
+
try:
|
|
572
|
+
doc_ids = await _memory.store_book(
|
|
573
|
+
content=content,
|
|
574
|
+
title=title,
|
|
575
|
+
source=source,
|
|
576
|
+
chunk_size=chunk_size,
|
|
577
|
+
chunk_overlap=chunk_overlap
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
logger.info(f"Ingested '{title}' in {len(doc_ids)} chunks")
|
|
581
|
+
|
|
582
|
+
return {
|
|
583
|
+
"success": True,
|
|
584
|
+
"title": title,
|
|
585
|
+
"chunks": len(doc_ids),
|
|
586
|
+
"doc_ids": doc_ids
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
except Exception as e:
|
|
590
|
+
logger.error(f"Error archiving: {e}")
|
|
591
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
592
|
+
|
|
593
|
+
@app.post("/api/record-outcome")
|
|
594
|
+
async def record_outcome(request: RecordOutcomeRequest):
|
|
595
|
+
"""
|
|
596
|
+
Record outcome for learning.
|
|
597
|
+
|
|
598
|
+
Called by the score_response MCP tool.
|
|
599
|
+
Scores:
|
|
600
|
+
1. Most recent unscored exchange (across ALL sessions - handles MCP/hook ID mismatch)
|
|
601
|
+
2. Cached search results (from _search_cache)
|
|
602
|
+
"""
|
|
603
|
+
if not _memory or not _session_manager:
|
|
604
|
+
raise HTTPException(status_code=503, detail="Memory system not ready")
|
|
605
|
+
|
|
606
|
+
try:
|
|
607
|
+
conversation_id = request.conversation_id or "default"
|
|
608
|
+
doc_ids_scored = []
|
|
609
|
+
|
|
610
|
+
# Track that score_response was called this turn (for Stop hook blocking)
|
|
611
|
+
_session_manager.set_scored_this_turn(conversation_id, True)
|
|
612
|
+
|
|
613
|
+
# 1. Find the most recent unscored exchange across ALL sessions
|
|
614
|
+
# This handles the MCP/hook session ID mismatch
|
|
615
|
+
previous = await _session_manager.get_most_recent_unscored_exchange()
|
|
616
|
+
if previous and previous.get("doc_id"):
|
|
617
|
+
await _session_manager.mark_scored(
|
|
618
|
+
conversation_id=previous.get("conversation_id", conversation_id),
|
|
619
|
+
doc_id=previous["doc_id"],
|
|
620
|
+
outcome=request.outcome
|
|
621
|
+
)
|
|
622
|
+
doc_ids_scored.append(previous["doc_id"])
|
|
623
|
+
logger.info(f"Scored exchange from session {previous.get('conversation_id')}")
|
|
624
|
+
|
|
625
|
+
# 2. Score cached search results
|
|
626
|
+
# First try exact conversation_id match
|
|
627
|
+
cached = _search_cache.get(conversation_id, {})
|
|
628
|
+
cached_doc_ids = cached.get("doc_ids", [])
|
|
629
|
+
cache_key_used = conversation_id
|
|
630
|
+
|
|
631
|
+
# If no cache for this ID, find the most recent cache entry
|
|
632
|
+
# This handles MCP using "default" while hook caches under real session_id
|
|
633
|
+
if not cached_doc_ids and _search_cache:
|
|
634
|
+
most_recent_key = max(_search_cache.keys(),
|
|
635
|
+
key=lambda k: _search_cache[k].get("timestamp", ""))
|
|
636
|
+
cached = _search_cache.get(most_recent_key, {})
|
|
637
|
+
cached_doc_ids = cached.get("doc_ids", [])
|
|
638
|
+
cache_key_used = most_recent_key
|
|
639
|
+
if cached_doc_ids:
|
|
640
|
+
logger.info(f"Using cache from session {most_recent_key} (MCP used {conversation_id})")
|
|
641
|
+
|
|
642
|
+
# 2a. If related doc_ids provided, use directly (bypass stale cache)
|
|
643
|
+
# This fixes the timing issue where cache is overwritten before scoring
|
|
644
|
+
if request.related is not None and len(request.related) > 0:
|
|
645
|
+
doc_ids_scored.extend(request.related)
|
|
646
|
+
logger.info(f"Direct scoring: {len(request.related)} doc_ids from related param")
|
|
647
|
+
elif cached_doc_ids:
|
|
648
|
+
# No related filter - score all cached (backwards compatible)
|
|
649
|
+
doc_ids_scored.extend(cached_doc_ids)
|
|
650
|
+
logger.info(f"Cache scoring: {len(cached_doc_ids)} doc_ids")
|
|
651
|
+
|
|
652
|
+
# 3. Apply outcome to filtered documents
|
|
653
|
+
if doc_ids_scored and request.outcome in ["worked", "failed", "partial"]:
|
|
654
|
+
result = await _memory.record_outcome(
|
|
655
|
+
doc_ids=doc_ids_scored,
|
|
656
|
+
outcome=request.outcome
|
|
657
|
+
)
|
|
658
|
+
logger.info(f"Scored {len(doc_ids_scored)} documents with outcome '{request.outcome}'")
|
|
659
|
+
|
|
660
|
+
# 4. Track in Action KG (works for ALL collections including memory_bank/books)
|
|
661
|
+
# Detect context type from recent activity
|
|
662
|
+
context_type = await _memory.detect_context_type() or "general"
|
|
663
|
+
|
|
664
|
+
# Get cached query for routing updates
|
|
665
|
+
cached_query = cached.get("query", "")
|
|
666
|
+
collections_updated = set()
|
|
667
|
+
|
|
668
|
+
for doc_id in doc_ids_scored:
|
|
669
|
+
# Extract collection from doc_id prefix
|
|
670
|
+
collection = None
|
|
671
|
+
for coll_name in ["memory_bank", "books", "working", "history", "patterns"]:
|
|
672
|
+
if doc_id.startswith(coll_name):
|
|
673
|
+
collection = coll_name
|
|
674
|
+
break
|
|
675
|
+
|
|
676
|
+
# Track in Action KG
|
|
677
|
+
action = ActionOutcome(
|
|
678
|
+
action_type="score_response",
|
|
679
|
+
context_type=context_type,
|
|
680
|
+
outcome=request.outcome,
|
|
681
|
+
doc_id=doc_id,
|
|
682
|
+
collection=collection
|
|
683
|
+
)
|
|
684
|
+
await _memory.record_action_outcome(action)
|
|
685
|
+
|
|
686
|
+
# Track collection for routing update (once per collection)
|
|
687
|
+
if collection:
|
|
688
|
+
collections_updated.add(collection)
|
|
689
|
+
|
|
690
|
+
# 5. Update Routing KG (learns which collections work for which queries)
|
|
691
|
+
# This benefits ALL collections including memory_bank/books
|
|
692
|
+
if cached_query:
|
|
693
|
+
for collection in collections_updated:
|
|
694
|
+
await _memory._update_kg_routing(cached_query, collection, request.outcome)
|
|
695
|
+
else:
|
|
696
|
+
result = {"outcome": request.outcome, "documents_updated": 0}
|
|
697
|
+
|
|
698
|
+
# Clear search cache for the key we used
|
|
699
|
+
if cache_key_used in _search_cache:
|
|
700
|
+
del _search_cache[cache_key_used]
|
|
701
|
+
|
|
702
|
+
return {
|
|
703
|
+
"success": True,
|
|
704
|
+
"outcome": request.outcome,
|
|
705
|
+
"documents_scored": len(doc_ids_scored),
|
|
706
|
+
**result
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
except Exception as e:
|
|
710
|
+
logger.error(f"Error recording outcome: {e}")
|
|
711
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
712
|
+
|
|
713
|
+
# ==================== Health/Status Endpoints ====================
|
|
714
|
+
|
|
715
|
+
@app.get("/api/health")
|
|
716
|
+
async def health_check():
|
|
717
|
+
"""Health check endpoint."""
|
|
718
|
+
return {
|
|
719
|
+
"status": "healthy",
|
|
720
|
+
"memory_initialized": _memory is not None and _memory.initialized,
|
|
721
|
+
"session_manager_ready": _session_manager is not None,
|
|
722
|
+
"timestamp": datetime.now().isoformat()
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
@app.get("/api/stats")
|
|
726
|
+
async def get_stats():
|
|
727
|
+
"""Get memory system statistics."""
|
|
728
|
+
if not _memory:
|
|
729
|
+
raise HTTPException(status_code=503, detail="Memory system not ready")
|
|
730
|
+
|
|
731
|
+
return _memory.get_stats()
|
|
732
|
+
|
|
733
|
+
return app
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def start_server(host: str = "127.0.0.1", port: int = 27182):
|
|
737
|
+
"""Start the Roampal server."""
|
|
738
|
+
app = create_app()
|
|
739
|
+
uvicorn.run(app, host=host, port=port)
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
if __name__ == "__main__":
|
|
743
|
+
logging.basicConfig(level=logging.INFO)
|
|
744
|
+
start_server()
|