remdb 0.3.163__py3-none-any.whl → 0.3.200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/agent_manager.py +2 -1
- rem/agentic/context.py +101 -0
- rem/agentic/context_builder.py +30 -8
- rem/agentic/mcp/tool_wrapper.py +43 -14
- rem/agentic/providers/pydantic_ai.py +76 -34
- rem/agentic/schema.py +4 -3
- rem/agentic/tools/rem_tools.py +11 -0
- rem/api/main.py +1 -1
- rem/api/mcp_router/resources.py +75 -14
- rem/api/mcp_router/server.py +31 -24
- rem/api/mcp_router/tools.py +476 -155
- rem/api/routers/auth.py +11 -6
- rem/api/routers/chat/completions.py +52 -10
- rem/api/routers/chat/sse_events.py +2 -2
- rem/api/routers/chat/streaming.py +162 -19
- rem/api/routers/messages.py +96 -23
- rem/auth/middleware.py +59 -42
- rem/cli/README.md +62 -0
- rem/cli/commands/ask.py +1 -1
- rem/cli/commands/db.py +148 -70
- rem/cli/commands/process.py +171 -43
- rem/models/entities/ontology.py +93 -101
- rem/schemas/agents/core/agent-builder.yaml +143 -42
- rem/services/content/service.py +18 -5
- rem/services/email/service.py +17 -6
- rem/services/embeddings/worker.py +26 -12
- rem/services/postgres/__init__.py +28 -3
- rem/services/postgres/diff_service.py +57 -5
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
- rem/services/postgres/register_type.py +12 -11
- rem/services/postgres/repository.py +32 -21
- rem/services/postgres/schema_generator.py +5 -5
- rem/services/postgres/sql_builder.py +6 -5
- rem/services/session/__init__.py +7 -1
- rem/services/session/pydantic_messages.py +210 -0
- rem/services/user_service.py +12 -9
- rem/settings.py +7 -1
- rem/sql/background_indexes.sql +5 -0
- rem/sql/migrations/001_install.sql +148 -11
- rem/sql/migrations/002_install_models.sql +162 -132
- rem/sql/migrations/004_cache_system.sql +7 -275
- rem/utils/model_helpers.py +101 -0
- rem/utils/schema_loader.py +51 -13
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/METADATA +1 -1
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/RECORD +48 -46
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/WHEEL +0 -0
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/entry_points.txt +0 -0
rem/api/mcp_router/tools.py
CHANGED
|
@@ -20,6 +20,7 @@ Available Tools:
|
|
|
20
20
|
- get_schema: Get detailed schema for a table (columns, types, indexes)
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
+
import json
|
|
23
24
|
from functools import wraps
|
|
24
25
|
from typing import Any, Callable, Literal, cast
|
|
25
26
|
|
|
@@ -116,7 +117,8 @@ def mcp_tool_error_handler(func: Callable) -> Callable:
|
|
|
116
117
|
# Otherwise wrap in success response
|
|
117
118
|
return {"status": "success", **result}
|
|
118
119
|
except Exception as e:
|
|
119
|
-
|
|
120
|
+
# Use %s format to avoid issues with curly braces in error messages
|
|
121
|
+
logger.opt(exception=True).error("{} failed: {}", func.__name__, str(e))
|
|
120
122
|
return {
|
|
121
123
|
"status": "error",
|
|
122
124
|
"error": str(e),
|
|
@@ -127,201 +129,228 @@ def mcp_tool_error_handler(func: Callable) -> Callable:
|
|
|
127
129
|
|
|
128
130
|
@mcp_tool_error_handler
|
|
129
131
|
async def search_rem(
|
|
130
|
-
|
|
131
|
-
# LOOKUP parameters
|
|
132
|
-
entity_key: str | None = None,
|
|
133
|
-
# FUZZY parameters
|
|
134
|
-
query_text: str | None = None,
|
|
135
|
-
threshold: float = 0.7,
|
|
136
|
-
# SEARCH parameters
|
|
137
|
-
table: str | None = None,
|
|
132
|
+
query: str,
|
|
138
133
|
limit: int = 20,
|
|
139
|
-
# SQL parameters
|
|
140
|
-
sql_query: str | None = None,
|
|
141
|
-
# TRAVERSE parameters
|
|
142
|
-
initial_query: str | None = None,
|
|
143
|
-
edge_types: list[str] | None = None,
|
|
144
|
-
depth: int = 1,
|
|
145
|
-
# Optional context override (defaults to authenticated user)
|
|
146
|
-
user_id: str | None = None,
|
|
147
134
|
) -> dict[str, Any]:
|
|
148
135
|
"""
|
|
149
|
-
Execute REM
|
|
150
|
-
|
|
151
|
-
REM supports multiple query types for different retrieval patterns:
|
|
136
|
+
Execute a REM query using the REM query dialect.
|
|
152
137
|
|
|
153
|
-
**
|
|
154
|
-
- Fast exact match across all tables
|
|
155
|
-
- Uses indexed label_vector for instant retrieval
|
|
156
|
-
- Example: LOOKUP "Sarah Chen" returns all entities named "Sarah Chen"
|
|
138
|
+
**REM Query Syntax:**
|
|
157
139
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
140
|
+
LOOKUP <entity_key>
|
|
141
|
+
Find entity by exact name/key. Searches across all tables.
|
|
142
|
+
Example: LOOKUP phq-9-procedure
|
|
143
|
+
Example: LOOKUP sertraline
|
|
161
144
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
145
|
+
SEARCH <text> IN <table>
|
|
146
|
+
Semantic vector search within a specific table.
|
|
147
|
+
Tables: 'ontologies' (clinical knowledge, procedures, drugs, DSM criteria)
|
|
148
|
+
'resources' (documents, files, user content)
|
|
149
|
+
Example: SEARCH depression IN ontologies
|
|
150
|
+
Example: SEARCH Module F IN ontologies
|
|
165
151
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
152
|
+
FUZZY <text>
|
|
153
|
+
Fuzzy text matching for partial matches and typos.
|
|
154
|
+
Example: FUZZY setraline
|
|
169
155
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
- Example: TRAVERSE "Sarah Chen" edge_types=["manages", "reports_to"] depth=2
|
|
156
|
+
TRAVERSE <start_entity>
|
|
157
|
+
Graph traversal from a starting entity.
|
|
158
|
+
Example: TRAVERSE sarah-chen
|
|
174
159
|
|
|
175
160
|
Args:
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
query_text: Search text for FUZZY or SEARCH
|
|
179
|
-
threshold: Similarity threshold for FUZZY (0.0-1.0)
|
|
180
|
-
table: Target table for SEARCH (resources, moments, users, etc.)
|
|
181
|
-
limit: Max results for SEARCH
|
|
182
|
-
sql_query: SQL WHERE clause for SQL type (e.g. "id = '123'")
|
|
183
|
-
initial_query: Starting entity for TRAVERSE
|
|
184
|
-
edge_types: Edge types to follow for TRAVERSE (e.g., ["manages", "reports_to"])
|
|
185
|
-
depth: Traversal depth for TRAVERSE (0=plan only, 1-5=actual traversal)
|
|
186
|
-
user_id: Optional user identifier (defaults to authenticated user or "default")
|
|
161
|
+
query: REM query string (e.g., "LOOKUP phq-9-procedure", "SEARCH depression IN ontologies")
|
|
162
|
+
limit: Maximum results to return (default: 20)
|
|
187
163
|
|
|
188
164
|
Returns:
|
|
189
|
-
Dict with query results
|
|
165
|
+
Dict with query results and metadata. If no results found, includes
|
|
166
|
+
'suggestions' with alternative search strategies.
|
|
190
167
|
|
|
191
168
|
Examples:
|
|
192
|
-
|
|
193
|
-
search_rem(
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
# Semantic search
|
|
199
|
-
search_rem(
|
|
200
|
-
query_type="search",
|
|
201
|
-
query_text="database migration",
|
|
202
|
-
table="resources",
|
|
203
|
-
limit=10
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
# SQL query (WHERE clause only)
|
|
207
|
-
search_rem(
|
|
208
|
-
query_type="sql",
|
|
209
|
-
table="resources",
|
|
210
|
-
sql_query="category = 'document'"
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
# Graph traversal
|
|
214
|
-
search_rem(
|
|
215
|
-
query_type="traverse",
|
|
216
|
-
initial_query="Sarah Chen",
|
|
217
|
-
edge_types=["manages", "reports_to"],
|
|
218
|
-
depth=2
|
|
219
|
-
)
|
|
169
|
+
search_rem("LOOKUP phq-9-procedure")
|
|
170
|
+
search_rem("SEARCH depression IN ontologies")
|
|
171
|
+
search_rem("SEARCH anxiety treatment IN ontologies", limit=10)
|
|
172
|
+
search_rem("FUZZY setraline")
|
|
220
173
|
"""
|
|
221
174
|
# Get RemService instance (lazy initialization)
|
|
222
175
|
rem_service = await get_rem_service()
|
|
223
176
|
|
|
224
|
-
# Get user_id from context
|
|
225
|
-
|
|
226
|
-
|
|
177
|
+
# Get user_id from context
|
|
178
|
+
user_id = AgentContext.get_user_id_or_default(None, source="search_rem")
|
|
179
|
+
|
|
180
|
+
# Parse the REM query string
|
|
181
|
+
if not query or not query.strip():
|
|
182
|
+
return {
|
|
183
|
+
"status": "error",
|
|
184
|
+
"error": "Empty query. Use REM syntax: LOOKUP <key>, SEARCH <text> IN <table>, FUZZY <text>, or TRAVERSE <entity>",
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
query = query.strip()
|
|
188
|
+
parts = query.split(None, 1) # Split on first whitespace
|
|
227
189
|
|
|
228
|
-
|
|
229
|
-
|
|
190
|
+
if len(parts) < 2:
|
|
191
|
+
return {
|
|
192
|
+
"status": "error",
|
|
193
|
+
"error": f"Invalid query format: '{query}'. Expected: LOOKUP <key>, SEARCH <text> IN <table>, FUZZY <text>, or TRAVERSE <entity>",
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
query_type = parts[0].upper()
|
|
197
|
+
remainder = parts[1].strip()
|
|
230
198
|
|
|
231
199
|
# Build RemQuery based on query_type
|
|
232
|
-
if query_type == "
|
|
233
|
-
if not
|
|
234
|
-
return {
|
|
200
|
+
if query_type == "LOOKUP":
|
|
201
|
+
if not remainder:
|
|
202
|
+
return {
|
|
203
|
+
"status": "error",
|
|
204
|
+
"error": "LOOKUP requires an entity key. Example: LOOKUP phq-9-procedure",
|
|
205
|
+
}
|
|
235
206
|
|
|
236
|
-
|
|
207
|
+
rem_query = RemQuery(
|
|
237
208
|
query_type=QueryType.LOOKUP,
|
|
238
209
|
parameters=LookupParameters(
|
|
239
|
-
key=
|
|
210
|
+
key=remainder,
|
|
240
211
|
user_id=user_id,
|
|
241
212
|
),
|
|
242
213
|
user_id=user_id,
|
|
243
214
|
)
|
|
215
|
+
table = None # LOOKUP searches all tables
|
|
216
|
+
|
|
217
|
+
elif query_type == "SEARCH":
|
|
218
|
+
# Parse "text IN table" format
|
|
219
|
+
if " IN " in remainder.upper():
|
|
220
|
+
# Find the last " IN " to handle cases like "SEARCH pain IN back IN ontologies"
|
|
221
|
+
in_pos = remainder.upper().rfind(" IN ")
|
|
222
|
+
search_text = remainder[:in_pos].strip()
|
|
223
|
+
table = remainder[in_pos + 4:].strip().lower()
|
|
224
|
+
else:
|
|
225
|
+
return {
|
|
226
|
+
"status": "error",
|
|
227
|
+
"error": f"SEARCH requires table: SEARCH <text> IN <table>. "
|
|
228
|
+
"Use 'ontologies' for clinical knowledge or 'resources' for documents. "
|
|
229
|
+
f"Example: SEARCH {remainder} IN ontologies",
|
|
230
|
+
}
|
|
244
231
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
query_type=QueryType.FUZZY,
|
|
251
|
-
parameters=FuzzyParameters(
|
|
252
|
-
query_text=query_text,
|
|
253
|
-
threshold=threshold,
|
|
254
|
-
limit=limit, # Limit was missing in original logic but likely intended
|
|
255
|
-
),
|
|
256
|
-
user_id=user_id,
|
|
257
|
-
)
|
|
258
|
-
|
|
259
|
-
elif query_type == "search":
|
|
260
|
-
if not query_text:
|
|
261
|
-
return {"status": "error", "error": "query_text required for SEARCH"}
|
|
262
|
-
if not table:
|
|
263
|
-
return {"status": "error", "error": "table required for SEARCH"}
|
|
232
|
+
if not search_text:
|
|
233
|
+
return {
|
|
234
|
+
"status": "error",
|
|
235
|
+
"error": "SEARCH requires search text. Example: SEARCH depression IN ontologies",
|
|
236
|
+
}
|
|
264
237
|
|
|
265
|
-
|
|
238
|
+
rem_query = RemQuery(
|
|
266
239
|
query_type=QueryType.SEARCH,
|
|
267
240
|
parameters=SearchParameters(
|
|
268
|
-
query_text=
|
|
241
|
+
query_text=search_text,
|
|
269
242
|
table_name=table,
|
|
270
243
|
limit=limit,
|
|
271
244
|
),
|
|
272
245
|
user_id=user_id,
|
|
273
246
|
)
|
|
274
247
|
|
|
275
|
-
elif query_type == "
|
|
276
|
-
if not
|
|
277
|
-
return {
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
parameters=SQLParameters(
|
|
288
|
-
table_name=table,
|
|
289
|
-
where_clause=sql_query,
|
|
248
|
+
elif query_type == "FUZZY":
|
|
249
|
+
if not remainder:
|
|
250
|
+
return {
|
|
251
|
+
"status": "error",
|
|
252
|
+
"error": "FUZZY requires search text. Example: FUZZY setraline",
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
rem_query = RemQuery(
|
|
256
|
+
query_type=QueryType.FUZZY,
|
|
257
|
+
parameters=FuzzyParameters(
|
|
258
|
+
query_text=remainder,
|
|
259
|
+
threshold=0.3, # pg_trgm similarity - 0.3 is reasonable for typo correction
|
|
290
260
|
limit=limit,
|
|
291
261
|
),
|
|
292
262
|
user_id=user_id,
|
|
293
263
|
)
|
|
264
|
+
table = None
|
|
294
265
|
|
|
295
|
-
elif query_type == "
|
|
296
|
-
if not
|
|
266
|
+
elif query_type == "TRAVERSE":
|
|
267
|
+
if not remainder:
|
|
297
268
|
return {
|
|
298
269
|
"status": "error",
|
|
299
|
-
"error": "
|
|
270
|
+
"error": "TRAVERSE requires a starting entity. Example: TRAVERSE sarah-chen",
|
|
300
271
|
}
|
|
301
272
|
|
|
302
|
-
|
|
273
|
+
rem_query = RemQuery(
|
|
303
274
|
query_type=QueryType.TRAVERSE,
|
|
304
275
|
parameters=TraverseParameters(
|
|
305
|
-
initial_query=
|
|
306
|
-
edge_types=
|
|
307
|
-
max_depth=
|
|
276
|
+
initial_query=remainder,
|
|
277
|
+
edge_types=[],
|
|
278
|
+
max_depth=1,
|
|
308
279
|
),
|
|
309
280
|
user_id=user_id,
|
|
310
281
|
)
|
|
282
|
+
table = None
|
|
311
283
|
|
|
312
284
|
else:
|
|
313
|
-
return {
|
|
285
|
+
return {
|
|
286
|
+
"status": "error",
|
|
287
|
+
"error": f"Unknown query type: '{query_type}'. Valid types: LOOKUP, SEARCH, FUZZY, TRAVERSE. "
|
|
288
|
+
"Examples: LOOKUP phq-9-procedure, SEARCH depression IN ontologies",
|
|
289
|
+
}
|
|
314
290
|
|
|
315
291
|
# Execute query (errors handled by decorator)
|
|
316
292
|
logger.info(f"Executing REM query: {query_type} for user {user_id}")
|
|
317
|
-
result = await rem_service.execute_query(
|
|
293
|
+
result = await rem_service.execute_query(rem_query)
|
|
318
294
|
|
|
319
295
|
logger.info(f"Query completed successfully: {query_type}")
|
|
320
|
-
|
|
296
|
+
|
|
297
|
+
# Provide helpful guidance when no results found
|
|
298
|
+
response: dict[str, Any] = {
|
|
321
299
|
"query_type": query_type,
|
|
322
300
|
"results": result,
|
|
323
301
|
}
|
|
324
302
|
|
|
303
|
+
# Check if results are empty - handle both list and dict result formats
|
|
304
|
+
is_empty = False
|
|
305
|
+
if not result:
|
|
306
|
+
is_empty = True
|
|
307
|
+
elif isinstance(result, list) and len(result) == 0:
|
|
308
|
+
is_empty = True
|
|
309
|
+
elif isinstance(result, dict):
|
|
310
|
+
# RemService returns dict with 'results' key containing actual matches
|
|
311
|
+
inner_results = result.get("results", [])
|
|
312
|
+
count = result.get("count", len(inner_results) if isinstance(inner_results, list) else 0)
|
|
313
|
+
is_empty = count == 0 or (isinstance(inner_results, list) and len(inner_results) == 0)
|
|
314
|
+
|
|
315
|
+
if is_empty:
|
|
316
|
+
# Build helpful suggestions based on query type
|
|
317
|
+
suggestions = []
|
|
318
|
+
|
|
319
|
+
if query_type in ("LOOKUP", "FUZZY"):
|
|
320
|
+
suggestions.append(
|
|
321
|
+
"LOOKUP/FUZZY searches across ALL tables. If you expected results, "
|
|
322
|
+
"verify the entity name is spelled correctly."
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if query_type == "SEARCH":
|
|
326
|
+
if table == "resources":
|
|
327
|
+
suggestions.append(
|
|
328
|
+
"No results in 'resources' table. Try: SEARCH <text> IN ontologies - "
|
|
329
|
+
"clinical procedures, drug info, and diagnostic criteria are stored there."
|
|
330
|
+
)
|
|
331
|
+
elif table == "ontologies":
|
|
332
|
+
suggestions.append(
|
|
333
|
+
"No results in 'ontologies' table. Try: SEARCH <text> IN resources - "
|
|
334
|
+
"for user-uploaded documents and general content."
|
|
335
|
+
)
|
|
336
|
+
else:
|
|
337
|
+
suggestions.append(
|
|
338
|
+
"Try: SEARCH <text> IN ontologies (clinical knowledge, procedures, drugs) "
|
|
339
|
+
"or SEARCH <text> IN resources (documents, files)."
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# Always suggest both tables if no specific table guidance given
|
|
343
|
+
if not suggestions:
|
|
344
|
+
suggestions.append(
|
|
345
|
+
"No results found. Try: SEARCH <text> IN ontologies (clinical procedures, drugs) "
|
|
346
|
+
"or SEARCH <text> IN resources (documents, files)."
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
response["suggestions"] = suggestions
|
|
350
|
+
response["hint"] = "0 results returned. See 'suggestions' for alternative search strategies."
|
|
351
|
+
|
|
352
|
+
return response
|
|
353
|
+
|
|
325
354
|
|
|
326
355
|
@mcp_tool_error_handler
|
|
327
356
|
async def ask_rem_agent(
|
|
@@ -372,19 +401,45 @@ async def ask_rem_agent(
|
|
|
372
401
|
query="Show me Sarah's reporting chain and their recent projects"
|
|
373
402
|
)
|
|
374
403
|
"""
|
|
375
|
-
# Get user_id from context if not provided
|
|
376
|
-
# TODO: Extract from authenticated session context when auth is enabled
|
|
377
|
-
user_id = AgentContext.get_user_id_or_default(user_id, source="ask_rem_agent")
|
|
378
|
-
|
|
379
404
|
from ...agentic import create_agent
|
|
405
|
+
from ...agentic.context import get_current_context
|
|
380
406
|
from ...utils.schema_loader import load_agent_schema
|
|
381
407
|
|
|
382
|
-
#
|
|
383
|
-
context
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
408
|
+
# Get parent context for multi-agent support
|
|
409
|
+
# This enables context propagation from parent agent to child agent
|
|
410
|
+
parent_context = get_current_context()
|
|
411
|
+
|
|
412
|
+
# Build child context: inherit from parent if available, otherwise use defaults
|
|
413
|
+
if parent_context is not None:
|
|
414
|
+
# Inherit user_id, tenant_id, session_id, is_eval from parent
|
|
415
|
+
# Allow explicit user_id override if provided
|
|
416
|
+
effective_user_id = user_id or parent_context.user_id
|
|
417
|
+
context = parent_context.child_context(agent_schema_uri=agent_schema)
|
|
418
|
+
if user_id is not None:
|
|
419
|
+
# Override user_id if explicitly provided
|
|
420
|
+
context = AgentContext(
|
|
421
|
+
user_id=user_id,
|
|
422
|
+
tenant_id=parent_context.tenant_id,
|
|
423
|
+
session_id=parent_context.session_id,
|
|
424
|
+
default_model=parent_context.default_model,
|
|
425
|
+
agent_schema_uri=agent_schema,
|
|
426
|
+
is_eval=parent_context.is_eval,
|
|
427
|
+
)
|
|
428
|
+
logger.debug(
|
|
429
|
+
f"ask_rem_agent inheriting context from parent: "
|
|
430
|
+
f"user_id={context.user_id}, session_id={context.session_id}"
|
|
431
|
+
)
|
|
432
|
+
else:
|
|
433
|
+
# No parent context - create fresh context (backwards compatible)
|
|
434
|
+
effective_user_id = AgentContext.get_user_id_or_default(
|
|
435
|
+
user_id, source="ask_rem_agent"
|
|
436
|
+
)
|
|
437
|
+
context = AgentContext(
|
|
438
|
+
user_id=effective_user_id,
|
|
439
|
+
tenant_id=effective_user_id or "default",
|
|
440
|
+
default_model=settings.llm.default_model,
|
|
441
|
+
agent_schema_uri=agent_schema,
|
|
442
|
+
)
|
|
388
443
|
|
|
389
444
|
# Load agent schema
|
|
390
445
|
try:
|
|
@@ -424,15 +479,18 @@ async def ingest_into_rem(
|
|
|
424
479
|
category: str | None = None,
|
|
425
480
|
tags: list[str] | None = None,
|
|
426
481
|
is_local_server: bool = False,
|
|
427
|
-
user_id: str | None = None,
|
|
428
482
|
resource_type: str | None = None,
|
|
429
483
|
) -> dict[str, Any]:
|
|
430
484
|
"""
|
|
431
|
-
Ingest file into REM, creating searchable resources and embeddings.
|
|
485
|
+
Ingest file into REM, creating searchable PUBLIC resources and embeddings.
|
|
486
|
+
|
|
487
|
+
**IMPORTANT: All ingested data is PUBLIC by default.** This is correct for
|
|
488
|
+
shared knowledge bases (ontologies, procedures, reference data). Private
|
|
489
|
+
user-scoped data requires different handling via the CLI with --make-private.
|
|
432
490
|
|
|
433
491
|
This tool provides the complete file ingestion pipeline:
|
|
434
492
|
1. **Read**: File from local/S3/HTTP
|
|
435
|
-
2. **Store**: To
|
|
493
|
+
2. **Store**: To internal storage (public namespace)
|
|
436
494
|
3. **Parse**: Extract content, metadata, tables, images
|
|
437
495
|
4. **Chunk**: Semantic chunking for embeddings
|
|
438
496
|
5. **Embed**: Create Resource chunks with vector embeddings
|
|
@@ -451,7 +509,6 @@ async def ingest_into_rem(
|
|
|
451
509
|
category: Optional category (document, code, audio, etc.)
|
|
452
510
|
tags: Optional tags for file
|
|
453
511
|
is_local_server: True if running as local/stdio MCP server
|
|
454
|
-
user_id: Optional user identifier (defaults to authenticated user or "default")
|
|
455
512
|
resource_type: Optional resource type for storing chunks (case-insensitive).
|
|
456
513
|
Supports flexible naming:
|
|
457
514
|
- "resource", "resources", "Resource" → Resource (default)
|
|
@@ -470,10 +527,10 @@ async def ingest_into_rem(
|
|
|
470
527
|
- message: Human-readable status message
|
|
471
528
|
|
|
472
529
|
Examples:
|
|
473
|
-
# Ingest local file (local server only
|
|
530
|
+
# Ingest local file (local server only)
|
|
474
531
|
ingest_into_rem(
|
|
475
|
-
file_uri="/Users/me/
|
|
476
|
-
category="
|
|
532
|
+
file_uri="/Users/me/procedure.pdf",
|
|
533
|
+
category="medical",
|
|
477
534
|
is_local_server=True
|
|
478
535
|
)
|
|
479
536
|
|
|
@@ -497,15 +554,14 @@ async def ingest_into_rem(
|
|
|
497
554
|
"""
|
|
498
555
|
from ...services.content import ContentService
|
|
499
556
|
|
|
500
|
-
#
|
|
501
|
-
#
|
|
502
|
-
user_id = AgentContext.get_user_id_or_default(user_id, source="ingest_into_rem")
|
|
557
|
+
# Data is PUBLIC by default (user_id=None)
|
|
558
|
+
# Private user-scoped data requires CLI with --make-private flag
|
|
503
559
|
|
|
504
560
|
# Delegate to ContentService for centralized ingestion (errors handled by decorator)
|
|
505
561
|
content_service = ContentService()
|
|
506
562
|
result = await content_service.ingest_file(
|
|
507
563
|
file_uri=file_uri,
|
|
508
|
-
user_id=
|
|
564
|
+
user_id=None, # PUBLIC - all ingested data is shared/public
|
|
509
565
|
category=category,
|
|
510
566
|
tags=tags,
|
|
511
567
|
is_local_server=is_local_server,
|
|
@@ -615,6 +671,8 @@ async def register_metadata(
|
|
|
615
671
|
recommended_action: str | None = None,
|
|
616
672
|
# Generic extension - any additional key-value pairs
|
|
617
673
|
extra: dict[str, Any] | None = None,
|
|
674
|
+
# Agent schema (auto-populated from context if not provided)
|
|
675
|
+
agent_schema: str | None = None,
|
|
618
676
|
) -> dict[str, Any]:
|
|
619
677
|
"""
|
|
620
678
|
Register response metadata to be emitted as an SSE MetadataEvent.
|
|
@@ -655,6 +713,8 @@ async def register_metadata(
|
|
|
655
713
|
extra: Dict of arbitrary additional metadata. Use this for any
|
|
656
714
|
domain-specific fields not covered by the standard parameters.
|
|
657
715
|
Example: {"topics_detected": ["anxiety", "sleep"], "session_count": 5}
|
|
716
|
+
agent_schema: Optional agent schema name. If not provided, automatically
|
|
717
|
+
populated from the current agent context (for multi-agent tracing).
|
|
658
718
|
|
|
659
719
|
Returns:
|
|
660
720
|
Dict with:
|
|
@@ -698,10 +758,17 @@ async def register_metadata(
|
|
|
698
758
|
}
|
|
699
759
|
)
|
|
700
760
|
"""
|
|
761
|
+
# Auto-populate agent_schema from context if not provided
|
|
762
|
+
if agent_schema is None:
|
|
763
|
+
from ...agentic.context import get_current_context
|
|
764
|
+
current_context = get_current_context()
|
|
765
|
+
if current_context and current_context.agent_schema_uri:
|
|
766
|
+
agent_schema = current_context.agent_schema_uri
|
|
767
|
+
|
|
701
768
|
logger.debug(
|
|
702
769
|
f"Registering metadata: confidence={confidence}, "
|
|
703
770
|
f"risk_level={risk_level}, refs={len(references or [])}, "
|
|
704
|
-
f"sources={len(sources or [])}"
|
|
771
|
+
f"sources={len(sources or [])}, agent_schema={agent_schema}"
|
|
705
772
|
)
|
|
706
773
|
|
|
707
774
|
result = {
|
|
@@ -711,6 +778,7 @@ async def register_metadata(
|
|
|
711
778
|
"references": references,
|
|
712
779
|
"sources": sources,
|
|
713
780
|
"flags": flags,
|
|
781
|
+
"agent_schema": agent_schema, # Include agent schema for tracing
|
|
714
782
|
}
|
|
715
783
|
|
|
716
784
|
# Add session name if provided
|
|
@@ -1130,3 +1198,256 @@ async def save_agent(
|
|
|
1130
1198
|
result["message"] = f"Agent '{name}' saved. Use `/custom-agent {name}` to chat with it."
|
|
1131
1199
|
|
|
1132
1200
|
return result
|
|
1201
|
+
|
|
1202
|
+
|
|
1203
|
+
# =============================================================================
|
|
1204
|
+
# Multi-Agent Tools
|
|
1205
|
+
# =============================================================================
|
|
1206
|
+
|
|
1207
|
+
|
|
1208
|
+
@mcp_tool_error_handler
|
|
1209
|
+
async def ask_agent(
|
|
1210
|
+
agent_name: str,
|
|
1211
|
+
input_text: str,
|
|
1212
|
+
input_data: dict[str, Any] | None = None,
|
|
1213
|
+
user_id: str | None = None,
|
|
1214
|
+
timeout_seconds: int = 300,
|
|
1215
|
+
) -> dict[str, Any]:
|
|
1216
|
+
"""
|
|
1217
|
+
Invoke another agent by name and return its response.
|
|
1218
|
+
|
|
1219
|
+
This tool enables multi-agent orchestration by allowing one agent to call
|
|
1220
|
+
another. The child agent inherits the parent's context (user_id, session_id,
|
|
1221
|
+
tenant_id, is_eval) for proper scoping and continuity.
|
|
1222
|
+
|
|
1223
|
+
Use Cases:
|
|
1224
|
+
- Orchestrator agents that delegate to specialized sub-agents
|
|
1225
|
+
- Workflow agents that chain multiple processing steps
|
|
1226
|
+
- Ensemble agents that aggregate responses from multiple specialists
|
|
1227
|
+
|
|
1228
|
+
Args:
|
|
1229
|
+
agent_name: Name of the agent to invoke. Can be:
|
|
1230
|
+
- A user-created agent (saved via save_agent)
|
|
1231
|
+
- A system agent (e.g., "ask_rem", "knowledge-query")
|
|
1232
|
+
input_text: The user message/query to send to the agent
|
|
1233
|
+
input_data: Optional structured input data for the agent
|
|
1234
|
+
user_id: Optional user override (defaults to parent's user_id)
|
|
1235
|
+
timeout_seconds: Maximum execution time (default: 300s)
|
|
1236
|
+
|
|
1237
|
+
Returns:
|
|
1238
|
+
Dict with:
|
|
1239
|
+
- status: "success" or "error"
|
|
1240
|
+
- output: Agent's structured output (if using output schema)
|
|
1241
|
+
- text_response: Agent's text response
|
|
1242
|
+
- agent_schema: Name of the invoked agent
|
|
1243
|
+
- metadata: Any metadata registered by the agent (confidence, etc.)
|
|
1244
|
+
|
|
1245
|
+
Examples:
|
|
1246
|
+
# Simple delegation
|
|
1247
|
+
ask_agent(
|
|
1248
|
+
agent_name="sentiment-analyzer",
|
|
1249
|
+
input_text="I love this product! Best purchase ever."
|
|
1250
|
+
)
|
|
1251
|
+
# Returns: {"status": "success", "output": {"sentiment": "positive"}, ...}
|
|
1252
|
+
|
|
1253
|
+
# Orchestrator pattern
|
|
1254
|
+
ask_agent(
|
|
1255
|
+
agent_name="knowledge-query",
|
|
1256
|
+
input_text="What are the latest Q3 results?"
|
|
1257
|
+
)
|
|
1258
|
+
|
|
1259
|
+
# Chain with structured input
|
|
1260
|
+
ask_agent(
|
|
1261
|
+
agent_name="summarizer",
|
|
1262
|
+
input_text="Summarize this document",
|
|
1263
|
+
input_data={"document_id": "doc-123", "max_length": 500}
|
|
1264
|
+
)
|
|
1265
|
+
"""
|
|
1266
|
+
import asyncio
|
|
1267
|
+
from ...agentic import create_agent
|
|
1268
|
+
from ...agentic.context import get_current_context, agent_context_scope
|
|
1269
|
+
from ...agentic.agents.agent_manager import get_agent
|
|
1270
|
+
from ...utils.schema_loader import load_agent_schema
|
|
1271
|
+
|
|
1272
|
+
# Get parent context for inheritance
|
|
1273
|
+
parent_context = get_current_context()
|
|
1274
|
+
|
|
1275
|
+
# Determine effective user_id
|
|
1276
|
+
if parent_context is not None:
|
|
1277
|
+
effective_user_id = user_id or parent_context.user_id
|
|
1278
|
+
else:
|
|
1279
|
+
effective_user_id = AgentContext.get_user_id_or_default(
|
|
1280
|
+
user_id, source="ask_agent"
|
|
1281
|
+
)
|
|
1282
|
+
|
|
1283
|
+
# Build child context
|
|
1284
|
+
if parent_context is not None:
|
|
1285
|
+
child_context = parent_context.child_context(agent_schema_uri=agent_name)
|
|
1286
|
+
if user_id is not None:
|
|
1287
|
+
# Explicit user_id override
|
|
1288
|
+
child_context = AgentContext(
|
|
1289
|
+
user_id=user_id,
|
|
1290
|
+
tenant_id=parent_context.tenant_id,
|
|
1291
|
+
session_id=parent_context.session_id,
|
|
1292
|
+
default_model=parent_context.default_model,
|
|
1293
|
+
agent_schema_uri=agent_name,
|
|
1294
|
+
is_eval=parent_context.is_eval,
|
|
1295
|
+
)
|
|
1296
|
+
logger.debug(
|
|
1297
|
+
f"ask_agent '{agent_name}' inheriting context: "
|
|
1298
|
+
f"user_id={child_context.user_id}, session_id={child_context.session_id}"
|
|
1299
|
+
)
|
|
1300
|
+
else:
|
|
1301
|
+
child_context = AgentContext(
|
|
1302
|
+
user_id=effective_user_id,
|
|
1303
|
+
tenant_id=effective_user_id or "default",
|
|
1304
|
+
default_model=settings.llm.default_model,
|
|
1305
|
+
agent_schema_uri=agent_name,
|
|
1306
|
+
)
|
|
1307
|
+
|
|
1308
|
+
# Try to load agent schema from:
|
|
1309
|
+
# 1. Database (user-created or system agents)
|
|
1310
|
+
# 2. File system (packaged agents)
|
|
1311
|
+
schema = None
|
|
1312
|
+
|
|
1313
|
+
# Try database first
|
|
1314
|
+
if effective_user_id:
|
|
1315
|
+
schema = await get_agent(agent_name, user_id=effective_user_id)
|
|
1316
|
+
if schema:
|
|
1317
|
+
logger.debug(f"Loaded agent '{agent_name}' from database")
|
|
1318
|
+
|
|
1319
|
+
# Fall back to file system
|
|
1320
|
+
if schema is None:
|
|
1321
|
+
try:
|
|
1322
|
+
schema = load_agent_schema(agent_name)
|
|
1323
|
+
logger.debug(f"Loaded agent '{agent_name}' from file system")
|
|
1324
|
+
except FileNotFoundError:
|
|
1325
|
+
pass
|
|
1326
|
+
|
|
1327
|
+
if schema is None:
|
|
1328
|
+
return {
|
|
1329
|
+
"status": "error",
|
|
1330
|
+
"error": f"Agent not found: {agent_name}",
|
|
1331
|
+
"hint": "Use list_agents to see available agents, or save_agent to create one",
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
# Create agent runtime
|
|
1335
|
+
agent_runtime = await create_agent(
|
|
1336
|
+
context=child_context,
|
|
1337
|
+
agent_schema_override=schema,
|
|
1338
|
+
)
|
|
1339
|
+
|
|
1340
|
+
# Build prompt with optional input_data
|
|
1341
|
+
prompt = input_text
|
|
1342
|
+
if input_data:
|
|
1343
|
+
prompt = f"{input_text}\n\nInput data: {json.dumps(input_data)}"
|
|
1344
|
+
|
|
1345
|
+
# Run agent with timeout and context propagation
|
|
1346
|
+
logger.info(f"Invoking agent '{agent_name}' with prompt: {prompt[:100]}...")
|
|
1347
|
+
|
|
1348
|
+
try:
|
|
1349
|
+
# Set child context for nested tool calls
|
|
1350
|
+
with agent_context_scope(child_context):
|
|
1351
|
+
result = await asyncio.wait_for(
|
|
1352
|
+
agent_runtime.run(prompt),
|
|
1353
|
+
timeout=timeout_seconds
|
|
1354
|
+
)
|
|
1355
|
+
except asyncio.TimeoutError:
|
|
1356
|
+
return {
|
|
1357
|
+
"status": "error",
|
|
1358
|
+
"error": f"Agent '{agent_name}' timed out after {timeout_seconds}s",
|
|
1359
|
+
"agent_schema": agent_name,
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
# Serialize output
|
|
1363
|
+
from rem.agentic.serialization import serialize_agent_result
|
|
1364
|
+
output = serialize_agent_result(result.output)
|
|
1365
|
+
|
|
1366
|
+
logger.info(f"Agent '{agent_name}' completed successfully")
|
|
1367
|
+
|
|
1368
|
+
return {
|
|
1369
|
+
"status": "success",
|
|
1370
|
+
"output": output,
|
|
1371
|
+
"text_response": str(result.output),
|
|
1372
|
+
"agent_schema": agent_name,
|
|
1373
|
+
"input_text": input_text,
|
|
1374
|
+
}
|
|
1375
|
+
|
|
1376
|
+
|
|
1377
|
+
# =============================================================================
|
|
1378
|
+
# Test/Debug Tools (for development only)
|
|
1379
|
+
# =============================================================================
|
|
1380
|
+
|
|
1381
|
+
@mcp_tool_error_handler
|
|
1382
|
+
async def test_error_handling(
|
|
1383
|
+
error_type: Literal["exception", "error_response", "timeout", "success"] = "success",
|
|
1384
|
+
delay_seconds: float = 0,
|
|
1385
|
+
error_message: str = "Test error occurred",
|
|
1386
|
+
) -> dict[str, Any]:
|
|
1387
|
+
"""
|
|
1388
|
+
Test tool for simulating different error scenarios.
|
|
1389
|
+
|
|
1390
|
+
**FOR DEVELOPMENT/TESTING ONLY** - This tool helps verify that error
|
|
1391
|
+
handling works correctly through the streaming layer.
|
|
1392
|
+
|
|
1393
|
+
Args:
|
|
1394
|
+
error_type: Type of error to simulate:
|
|
1395
|
+
- "success": Returns successful response (default)
|
|
1396
|
+
- "exception": Raises an exception (tests @mcp_tool_error_handler)
|
|
1397
|
+
- "error_response": Returns {"status": "error", ...} dict
|
|
1398
|
+
- "timeout": Delays for 60 seconds (simulates timeout)
|
|
1399
|
+
delay_seconds: Optional delay before responding (0-10 seconds)
|
|
1400
|
+
error_message: Custom error message for error scenarios
|
|
1401
|
+
|
|
1402
|
+
Returns:
|
|
1403
|
+
Dict with test results or error information
|
|
1404
|
+
|
|
1405
|
+
Examples:
|
|
1406
|
+
# Test successful response
|
|
1407
|
+
test_error_handling(error_type="success")
|
|
1408
|
+
|
|
1409
|
+
# Test exception handling
|
|
1410
|
+
test_error_handling(error_type="exception", error_message="Database connection failed")
|
|
1411
|
+
|
|
1412
|
+
# Test error response format
|
|
1413
|
+
test_error_handling(error_type="error_response", error_message="Resource not found")
|
|
1414
|
+
|
|
1415
|
+
# Test with delay
|
|
1416
|
+
test_error_handling(error_type="success", delay_seconds=2)
|
|
1417
|
+
"""
|
|
1418
|
+
import asyncio
|
|
1419
|
+
|
|
1420
|
+
logger.info(f"test_error_handling called: type={error_type}, delay={delay_seconds}")
|
|
1421
|
+
|
|
1422
|
+
# Apply delay (capped at 10 seconds for safety)
|
|
1423
|
+
if delay_seconds > 0:
|
|
1424
|
+
await asyncio.sleep(min(delay_seconds, 10))
|
|
1425
|
+
|
|
1426
|
+
if error_type == "exception":
|
|
1427
|
+
# This tests the @mcp_tool_error_handler decorator
|
|
1428
|
+
raise RuntimeError(f"TEST EXCEPTION: {error_message}")
|
|
1429
|
+
|
|
1430
|
+
elif error_type == "error_response":
|
|
1431
|
+
# This tests how the streaming layer handles error status responses
|
|
1432
|
+
return {
|
|
1433
|
+
"status": "error",
|
|
1434
|
+
"error": error_message,
|
|
1435
|
+
"error_code": "TEST_ERROR",
|
|
1436
|
+
"recoverable": True,
|
|
1437
|
+
}
|
|
1438
|
+
|
|
1439
|
+
elif error_type == "timeout":
|
|
1440
|
+
# Simulate a very long operation (for testing client-side timeouts)
|
|
1441
|
+
await asyncio.sleep(60)
|
|
1442
|
+
return {"status": "success", "message": "Timeout test completed (should not reach here)"}
|
|
1443
|
+
|
|
1444
|
+
else: # success
|
|
1445
|
+
return {
|
|
1446
|
+
"status": "success",
|
|
1447
|
+
"message": "Test completed successfully",
|
|
1448
|
+
"test_data": {
|
|
1449
|
+
"error_type": error_type,
|
|
1450
|
+
"delay_applied": delay_seconds,
|
|
1451
|
+
"timestamp": str(asyncio.get_event_loop().time()),
|
|
1452
|
+
},
|
|
1453
|
+
}
|