remdb 0.3.118__py3-none-any.whl → 0.3.146__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +23 -3
- rem/agentic/mcp/tool_wrapper.py +126 -15
- rem/agentic/otel/setup.py +1 -0
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +122 -43
- rem/agentic/schema.py +4 -1
- rem/api/mcp_router/tools.py +13 -2
- rem/api/routers/chat/completions.py +250 -4
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +35 -1
- rem/api/routers/feedback.py +134 -14
- rem/auth/middleware.py +66 -1
- rem/cli/commands/cluster.py +590 -82
- rem/cli/commands/configure.py +3 -4
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +2 -0
- rem/config.py +8 -1
- rem/models/core/experiment.py +58 -14
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/pydantic_to_sqlalchemy.py +9 -12
- rem/services/session/compression.py +7 -0
- rem/settings.py +260 -17
- rem/sql/migrations/002_install_models.sql +91 -91
- rem/sql/migrations/004_cache_system.sql +1 -1
- rem/utils/README.md +45 -0
- rem/utils/files.py +157 -1
- rem/utils/schema_loader.py +94 -3
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +2 -1
- rem/workers/db_listener.py +579 -0
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/METADATA +161 -147
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/RECORD +44 -41
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/WHEEL +0 -0
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/entry_points.txt +0 -0
|
@@ -553,7 +553,7 @@ async def create_agent(
|
|
|
553
553
|
if agent_schema:
|
|
554
554
|
system_prompt = get_system_prompt(agent_schema)
|
|
555
555
|
metadata = get_metadata(agent_schema)
|
|
556
|
-
mcp_server_configs = [s.model_dump() for s in metadata.mcp_servers] if hasattr(metadata, 'mcp_servers') else []
|
|
556
|
+
mcp_server_configs = [s.model_dump() for s in metadata.mcp_servers] if hasattr(metadata, 'mcp_servers') and metadata.mcp_servers else []
|
|
557
557
|
resource_configs = metadata.resources if hasattr(metadata, 'resources') else []
|
|
558
558
|
|
|
559
559
|
if metadata.system_prompt:
|
|
@@ -564,6 +564,38 @@ async def create_agent(
|
|
|
564
564
|
mcp_server_configs = []
|
|
565
565
|
resource_configs = []
|
|
566
566
|
|
|
567
|
+
# Auto-detect local MCP server if not explicitly configured
|
|
568
|
+
# This makes mcp_servers config optional - agents get tools automatically
|
|
569
|
+
if not mcp_server_configs:
|
|
570
|
+
import importlib
|
|
571
|
+
import os
|
|
572
|
+
import sys
|
|
573
|
+
|
|
574
|
+
# Ensure current working directory is in sys.path for local imports
|
|
575
|
+
cwd = os.getcwd()
|
|
576
|
+
if cwd not in sys.path:
|
|
577
|
+
sys.path.insert(0, cwd)
|
|
578
|
+
|
|
579
|
+
# Try common local MCP server module paths first
|
|
580
|
+
auto_detect_modules = [
|
|
581
|
+
"tools.mcp_server", # Convention: tools/mcp_server.py
|
|
582
|
+
"mcp_server", # Alternative: mcp_server.py in root
|
|
583
|
+
]
|
|
584
|
+
for module_path in auto_detect_modules:
|
|
585
|
+
try:
|
|
586
|
+
mcp_module = importlib.import_module(module_path)
|
|
587
|
+
if hasattr(mcp_module, "mcp"):
|
|
588
|
+
logger.info(f"Auto-detected local MCP server: {module_path}")
|
|
589
|
+
mcp_server_configs = [{"type": "local", "module": module_path, "id": "auto-detected"}]
|
|
590
|
+
break
|
|
591
|
+
except ImportError:
|
|
592
|
+
continue
|
|
593
|
+
|
|
594
|
+
# Fall back to REM's default MCP server if no local server found
|
|
595
|
+
if not mcp_server_configs:
|
|
596
|
+
logger.debug("No local MCP server found, using REM default")
|
|
597
|
+
mcp_server_configs = [{"type": "local", "module": "rem.mcp_server", "id": "rem"}]
|
|
598
|
+
|
|
567
599
|
# Extract temperature and max_iterations from schema metadata (with fallback to settings defaults)
|
|
568
600
|
if metadata:
|
|
569
601
|
temperature = metadata.override_temperature if metadata.override_temperature is not None else settings.llm.default_temperature
|
|
@@ -608,50 +640,97 @@ async def create_agent(
|
|
|
608
640
|
search_rem_suffix += f"Example: `SEARCH \"your query\" FROM {default_table} LIMIT 10`"
|
|
609
641
|
|
|
610
642
|
# Add tools from MCP server (in-process, no subprocess)
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
description_suffix=tool_suffix,
|
|
641
|
-
)
|
|
642
|
-
tools.append(wrapped_tool)
|
|
643
|
-
logger.debug(f"Loaded MCP tool: {tool_name}" + (" (with schema suffix)" if tool_suffix else ""))
|
|
644
|
-
|
|
645
|
-
logger.info(f"Loaded {len(mcp_tools_dict)} tools from MCP server: {server_id} (in-process)")
|
|
646
|
-
|
|
647
|
-
except Exception as e:
|
|
648
|
-
logger.error(f"Failed to load MCP server {server_id}: {e}", exc_info=True)
|
|
649
|
-
else:
|
|
650
|
-
logger.warning(f"Unsupported MCP server type: {server_type}")
|
|
643
|
+
# Track loaded MCP servers for resource resolution
|
|
644
|
+
loaded_mcp_server = None
|
|
645
|
+
|
|
646
|
+
for server_config in mcp_server_configs:
|
|
647
|
+
server_type = server_config.get("type")
|
|
648
|
+
server_id = server_config.get("id", "mcp-server")
|
|
649
|
+
|
|
650
|
+
if server_type == "local":
|
|
651
|
+
# Import MCP server directly (in-process)
|
|
652
|
+
module_path = server_config.get("module", "rem.mcp_server")
|
|
653
|
+
|
|
654
|
+
try:
|
|
655
|
+
# Dynamic import of MCP server module
|
|
656
|
+
import importlib
|
|
657
|
+
mcp_module = importlib.import_module(module_path)
|
|
658
|
+
mcp_server = mcp_module.mcp
|
|
659
|
+
|
|
660
|
+
# Store the loaded server for resource resolution
|
|
661
|
+
loaded_mcp_server = mcp_server
|
|
662
|
+
|
|
663
|
+
# Extract tools from MCP server (get_tools is async)
|
|
664
|
+
from ..mcp.tool_wrapper import create_mcp_tool_wrapper
|
|
665
|
+
|
|
666
|
+
# Await async get_tools() call
|
|
667
|
+
mcp_tools_dict = await mcp_server.get_tools()
|
|
668
|
+
|
|
669
|
+
for tool_name, tool_func in mcp_tools_dict.items():
|
|
670
|
+
# Add description suffix to search_rem tool if schema specifies a default table
|
|
671
|
+
tool_suffix = search_rem_suffix if tool_name == "search_rem" else None
|
|
651
672
|
|
|
673
|
+
wrapped_tool = create_mcp_tool_wrapper(
|
|
674
|
+
tool_name,
|
|
675
|
+
tool_func,
|
|
676
|
+
user_id=context.user_id if context else None,
|
|
677
|
+
description_suffix=tool_suffix,
|
|
678
|
+
)
|
|
679
|
+
tools.append(wrapped_tool)
|
|
680
|
+
logger.debug(f"Loaded MCP tool: {tool_name}" + (" (with schema suffix)" if tool_suffix else ""))
|
|
681
|
+
|
|
682
|
+
logger.info(f"Loaded {len(mcp_tools_dict)} tools from MCP server: {server_id} (in-process)")
|
|
683
|
+
|
|
684
|
+
except Exception as e:
|
|
685
|
+
logger.error(f"Failed to load MCP server {server_id}: {e}", exc_info=True)
|
|
686
|
+
else:
|
|
687
|
+
logger.warning(f"Unsupported MCP server type: {server_type}")
|
|
688
|
+
|
|
689
|
+
# Convert resources to tools (MCP convenience syntax)
|
|
690
|
+
# Resources declared in agent YAML become callable tools - eliminates
|
|
691
|
+
# the artificial MCP distinction between tools and resources
|
|
692
|
+
#
|
|
693
|
+
# Supports both concrete and template URIs:
|
|
694
|
+
# - Concrete: "rem://schemas" -> no-param tool
|
|
695
|
+
# - Template: "patient-profile://field/{field_key}" -> tool with field_key param
|
|
696
|
+
from ..mcp.tool_wrapper import create_resource_tool
|
|
697
|
+
|
|
698
|
+
# Collect all resource URIs from both resources section AND tools section
|
|
699
|
+
resource_uris = []
|
|
700
|
+
|
|
701
|
+
# From resources section (legacy format)
|
|
652
702
|
if resource_configs:
|
|
653
|
-
|
|
654
|
-
|
|
703
|
+
for resource_config in resource_configs:
|
|
704
|
+
if hasattr(resource_config, 'uri'):
|
|
705
|
+
uri = resource_config.uri
|
|
706
|
+
usage = resource_config.description or ""
|
|
707
|
+
else:
|
|
708
|
+
uri = resource_config.get("uri", "")
|
|
709
|
+
usage = resource_config.get("description", "")
|
|
710
|
+
if uri:
|
|
711
|
+
resource_uris.append((uri, usage))
|
|
712
|
+
|
|
713
|
+
# From tools section - detect URIs (anything with ://)
|
|
714
|
+
# This allows unified syntax: resources as tools
|
|
715
|
+
tool_configs = metadata.tools if metadata and hasattr(metadata, 'tools') else []
|
|
716
|
+
for tool_config in tool_configs:
|
|
717
|
+
if hasattr(tool_config, 'name'):
|
|
718
|
+
tool_name = tool_config.name
|
|
719
|
+
tool_desc = tool_config.description or ""
|
|
720
|
+
else:
|
|
721
|
+
tool_name = tool_config.get("name", "")
|
|
722
|
+
tool_desc = tool_config.get("description", "")
|
|
723
|
+
|
|
724
|
+
# Auto-detect resource URIs (anything with :// scheme)
|
|
725
|
+
if "://" in tool_name:
|
|
726
|
+
resource_uris.append((tool_name, tool_desc))
|
|
727
|
+
|
|
728
|
+
# Create tools from collected resource URIs
|
|
729
|
+
# Pass the loaded MCP server so resources can be resolved from it
|
|
730
|
+
for uri, usage in resource_uris:
|
|
731
|
+
resource_tool = create_resource_tool(uri, usage, mcp_server=loaded_mcp_server)
|
|
732
|
+
tools.append(resource_tool)
|
|
733
|
+
logger.debug(f"Loaded resource as tool: {uri}")
|
|
655
734
|
|
|
656
735
|
# Create dynamic result_type from schema if not provided
|
|
657
736
|
# Note: use_structured_output is set earlier from metadata.structured_output
|
rem/agentic/schema.py
CHANGED
|
@@ -154,8 +154,10 @@ class MCPServerConfig(BaseModel):
|
|
|
154
154
|
)
|
|
155
155
|
|
|
156
156
|
id: str = Field(
|
|
157
|
+
default="mcp-server",
|
|
157
158
|
description=(
|
|
158
159
|
"Server identifier for logging and debugging. "
|
|
160
|
+
"Defaults to 'mcp-server' if not specified. "
|
|
159
161
|
"Example: 'rem-local'"
|
|
160
162
|
)
|
|
161
163
|
)
|
|
@@ -228,7 +230,8 @@ class AgentSchemaMetadata(BaseModel):
|
|
|
228
230
|
description=(
|
|
229
231
|
"MCP server configurations for dynamic tool loading. "
|
|
230
232
|
"Servers are loaded in-process at agent creation time. "
|
|
231
|
-
"All tools from configured servers become available to the agent."
|
|
233
|
+
"All tools from configured servers become available to the agent. "
|
|
234
|
+
"If not specified, defaults to rem.mcp_server (REM's built-in tools)."
|
|
232
235
|
),
|
|
233
236
|
)
|
|
234
237
|
|
rem/api/mcp_router/tools.py
CHANGED
|
@@ -606,7 +606,9 @@ async def register_metadata(
|
|
|
606
606
|
references: list[str] | None = None,
|
|
607
607
|
sources: list[str] | None = None,
|
|
608
608
|
flags: list[str] | None = None,
|
|
609
|
-
#
|
|
609
|
+
# Session naming
|
|
610
|
+
session_name: str | None = None,
|
|
611
|
+
# Risk assessment fields (used by specialized agents)
|
|
610
612
|
risk_level: str | None = None,
|
|
611
613
|
risk_score: int | None = None,
|
|
612
614
|
risk_reasoning: str | None = None,
|
|
@@ -639,6 +641,11 @@ async def register_metadata(
|
|
|
639
641
|
flags: Optional flags for the response (e.g., "needs_review",
|
|
640
642
|
"uncertain", "incomplete", "crisis_alert").
|
|
641
643
|
|
|
644
|
+
session_name: Short 1-3 phrase name describing the session topic.
|
|
645
|
+
Used by the UI to label conversations in the sidebar.
|
|
646
|
+
Examples: "Prescription Drug Questions", "AWS Setup Help",
|
|
647
|
+
"Python Code Review", "Travel Planning".
|
|
648
|
+
|
|
642
649
|
risk_level: Risk level indicator (e.g., "green", "orange", "red").
|
|
643
650
|
Used by mental health agents for C-SSRS style assessment.
|
|
644
651
|
risk_score: Numeric risk score (e.g., 0-6 for C-SSRS).
|
|
@@ -663,7 +670,7 @@ async def register_metadata(
|
|
|
663
670
|
sources=["REM database lookup"]
|
|
664
671
|
)
|
|
665
672
|
|
|
666
|
-
#
|
|
673
|
+
# Risk assessment example
|
|
667
674
|
register_metadata(
|
|
668
675
|
confidence=0.9,
|
|
669
676
|
risk_level="green",
|
|
@@ -706,6 +713,10 @@ async def register_metadata(
|
|
|
706
713
|
"flags": flags,
|
|
707
714
|
}
|
|
708
715
|
|
|
716
|
+
# Add session name if provided
|
|
717
|
+
if session_name is not None:
|
|
718
|
+
result["session_name"] = session_name
|
|
719
|
+
|
|
709
720
|
# Add risk assessment fields if provided
|
|
710
721
|
if risk_level is not None:
|
|
711
722
|
result["risk_level"] = risk_level
|
|
@@ -1,13 +1,94 @@
|
|
|
1
1
|
"""
|
|
2
2
|
OpenAI-compatible chat completions router for REM.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
Quick Start (Local Development)
|
|
5
|
+
===============================
|
|
6
|
+
|
|
7
|
+
NOTE: Local dev uses LOCAL databases (Postgres via Docker Compose on port 5050).
|
|
8
|
+
Do NOT port-forward databases. Only port-forward observability services.
|
|
9
|
+
|
|
10
|
+
IMPORTANT: Session IDs MUST be UUIDs. Non-UUID session IDs will cause message
|
|
11
|
+
storage issues and feedback will not work correctly.
|
|
12
|
+
|
|
13
|
+
1. Port Forwarding (REQUIRED for trace capture and Phoenix sync):
|
|
14
|
+
|
|
15
|
+
# Terminal 1: OTEL Collector (HTTP) - sends traces to Phoenix
|
|
16
|
+
kubectl port-forward -n observability svc/otel-collector-collector 4318:4318
|
|
17
|
+
|
|
18
|
+
# Terminal 2: Phoenix UI - view traces at http://localhost:6006
|
|
19
|
+
kubectl port-forward -n siggy svc/phoenix 6006:6006
|
|
20
|
+
|
|
21
|
+
2. Get Phoenix API Key (REQUIRED for feedback->Phoenix sync):
|
|
22
|
+
|
|
23
|
+
export PHOENIX_API_KEY=$(kubectl get secret -n siggy rem-phoenix-api-key \\
|
|
24
|
+
-o jsonpath='{.data.PHOENIX_API_KEY}' | base64 -d)
|
|
25
|
+
|
|
26
|
+
3. Start API with OTEL and Phoenix enabled:
|
|
27
|
+
|
|
28
|
+
cd /path/to/remstack/rem
|
|
29
|
+
source .venv/bin/activate
|
|
30
|
+
OTEL__ENABLED=true \\
|
|
31
|
+
PHOENIX__ENABLED=true \\
|
|
32
|
+
PHOENIX_API_KEY="$PHOENIX_API_KEY" \\
|
|
33
|
+
uvicorn rem.api.main:app --host 0.0.0.0 --port 8000 --app-dir src
|
|
34
|
+
|
|
35
|
+
4. Test Chat Request (session_id MUST be a UUID):
|
|
36
|
+
|
|
37
|
+
SESSION_ID=$(python3 -c "import uuid; print(uuid.uuid4())")
|
|
38
|
+
curl -s -N -X POST http://localhost:8000/api/v1/chat/completions \\
|
|
39
|
+
-H 'Content-Type: application/json' \\
|
|
40
|
+
-H "X-Session-Id: $SESSION_ID" \\
|
|
41
|
+
-H 'X-Agent-Schema: rem' \\
|
|
42
|
+
-d '{"messages": [{"role": "user", "content": "Hello"}], "stream": true}'
|
|
43
|
+
|
|
44
|
+
# Note: Use 'rem' agent schema (default) for real LLM responses.
|
|
45
|
+
# The 'simulator' agent is for testing SSE events without LLM calls.
|
|
46
|
+
|
|
47
|
+
5. Submit Feedback on Response:
|
|
48
|
+
|
|
49
|
+
The metadata SSE event contains message_id and trace_id for feedback:
|
|
50
|
+
event: metadata
|
|
51
|
+
data: {"message_id": "728882f8-...", "trace_id": "e53c701c...", ...}
|
|
52
|
+
|
|
53
|
+
Use session_id (UUID you generated) and message_id to submit feedback:
|
|
54
|
+
|
|
55
|
+
curl -X POST http://localhost:8000/api/v1/messages/feedback \\
|
|
56
|
+
-H 'Content-Type: application/json' \\
|
|
57
|
+
-H 'X-Tenant-Id: default' \\
|
|
58
|
+
-d '{
|
|
59
|
+
"session_id": "<your-uuid-session-id>",
|
|
60
|
+
"message_id": "<message-id-from-metadata>",
|
|
61
|
+
"rating": 1,
|
|
62
|
+
"categories": ["helpful"],
|
|
63
|
+
"comment": "Good response"
|
|
64
|
+
}'
|
|
65
|
+
|
|
66
|
+
Expected response (201 = synced to Phoenix):
|
|
67
|
+
{"phoenix_synced": true, "trace_id": "e53c701c...", "span_id": "6432d497..."}
|
|
68
|
+
|
|
69
|
+
OTEL Architecture
|
|
70
|
+
=================
|
|
71
|
+
|
|
72
|
+
REM API --[OTLP/HTTP]--> OTEL Collector --[relay]--> Phoenix
|
|
73
|
+
(port 4318) (k8s: observability) (k8s: siggy)
|
|
74
|
+
|
|
75
|
+
Environment Variables:
|
|
76
|
+
OTEL__ENABLED=true Enable OTEL tracing (required for trace capture)
|
|
77
|
+
PHOENIX__ENABLED=true Enable Phoenix integration (required for feedback sync)
|
|
78
|
+
PHOENIX_API_KEY=<jwt> Phoenix API key (required for feedback->Phoenix sync)
|
|
79
|
+
OTEL__COLLECTOR_ENDPOINT Default: http://localhost:4318
|
|
80
|
+
OTEL__PROTOCOL Default: http (use port 4318, not gRPC 4317)
|
|
81
|
+
|
|
82
|
+
Design Pattern
|
|
83
|
+
==============
|
|
84
|
+
|
|
85
|
+
- Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Session-Id, X-Agent-Schema, X-Is-Eval)
|
|
6
86
|
- ContextBuilder centralizes message construction with user profile + session history
|
|
7
87
|
- Body.model is the LLM model for Pydantic AI
|
|
8
88
|
- X-Agent-Schema header specifies which agent schema to use (defaults to 'rem')
|
|
9
89
|
- Support for streaming (SSE) and non-streaming modes
|
|
10
90
|
- Response format control (text vs json_object)
|
|
91
|
+
- OpenAI-compatible body fields: metadata, store, reasoning_effort, etc.
|
|
11
92
|
|
|
12
93
|
Context Building Flow:
|
|
13
94
|
1. ContextBuilder.build_from_headers() extracts user_id, session_id from headers
|
|
@@ -25,9 +106,10 @@ Context Building Flow:
|
|
|
25
106
|
Headers Mapping
|
|
26
107
|
X-User-Id → AgentContext.user_id
|
|
27
108
|
X-Tenant-Id → AgentContext.tenant_id
|
|
28
|
-
X-Session-Id → AgentContext.session_id
|
|
109
|
+
X-Session-Id → AgentContext.session_id (use UUID for new sessions)
|
|
29
110
|
X-Model-Name → AgentContext.default_model (overrides body.model)
|
|
30
111
|
X-Agent-Schema → AgentContext.agent_schema_uri (defaults to 'rem')
|
|
112
|
+
X-Is-Eval → AgentContext.is_eval (sets session mode to EVALUATION)
|
|
31
113
|
|
|
32
114
|
Default Agent:
|
|
33
115
|
If X-Agent-Schema header is not provided, the system loads 'rem' schema,
|
|
@@ -42,6 +124,7 @@ Example Request:
|
|
|
42
124
|
POST /api/v1/chat/completions
|
|
43
125
|
X-Tenant-Id: acme-corp
|
|
44
126
|
X-User-Id: user123
|
|
127
|
+
X-Session-Id: a1b2c3d4-e5f6-7890-abcd-ef1234567890 # UUID
|
|
45
128
|
X-Agent-Schema: rem # Optional, this is the default
|
|
46
129
|
|
|
47
130
|
{
|
|
@@ -67,7 +150,9 @@ from loguru import logger
|
|
|
67
150
|
from ....agentic.context import AgentContext
|
|
68
151
|
from ....agentic.context_builder import ContextBuilder
|
|
69
152
|
from ....agentic.providers.pydantic_ai import create_agent
|
|
153
|
+
from ....models.entities.session import Session, SessionMode
|
|
70
154
|
from ....services.audio.transcriber import AudioTranscriber
|
|
155
|
+
from ....services.postgres.repository import Repository
|
|
71
156
|
from ....services.session import SessionMessageStore, reload_session
|
|
72
157
|
from ....settings import settings
|
|
73
158
|
from ....utils.schema_loader import load_agent_schema, load_agent_schema_async
|
|
@@ -87,6 +172,105 @@ router = APIRouter(prefix="/api/v1", tags=["chat"])
|
|
|
87
172
|
DEFAULT_AGENT_SCHEMA = "rem"
|
|
88
173
|
|
|
89
174
|
|
|
175
|
+
def get_current_trace_context() -> tuple[str | None, str | None]:
|
|
176
|
+
"""Get trace_id and span_id from current OTEL context.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
|
|
180
|
+
"""
|
|
181
|
+
try:
|
|
182
|
+
from opentelemetry import trace
|
|
183
|
+
span = trace.get_current_span()
|
|
184
|
+
if span and span.get_span_context().is_valid:
|
|
185
|
+
ctx = span.get_span_context()
|
|
186
|
+
trace_id = format(ctx.trace_id, '032x')
|
|
187
|
+
span_id = format(ctx.span_id, '016x')
|
|
188
|
+
return trace_id, span_id
|
|
189
|
+
except Exception:
|
|
190
|
+
pass
|
|
191
|
+
return None, None
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def get_tracer():
|
|
195
|
+
"""Get the OpenTelemetry tracer for chat completions."""
|
|
196
|
+
try:
|
|
197
|
+
from opentelemetry import trace
|
|
198
|
+
return trace.get_tracer("rem.chat.completions")
|
|
199
|
+
except Exception:
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
async def ensure_session_with_metadata(
|
|
204
|
+
session_id: str,
|
|
205
|
+
user_id: str | None,
|
|
206
|
+
tenant_id: str,
|
|
207
|
+
is_eval: bool,
|
|
208
|
+
request_metadata: dict[str, str] | None,
|
|
209
|
+
agent_schema: str | None = None,
|
|
210
|
+
) -> None:
|
|
211
|
+
"""
|
|
212
|
+
Ensure session exists and update with metadata/mode.
|
|
213
|
+
|
|
214
|
+
If X-Is-Eval header is true, sets session mode to EVALUATION.
|
|
215
|
+
Merges request metadata with existing session metadata.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
session_id: Session identifier (maps to Session.name)
|
|
219
|
+
user_id: User identifier
|
|
220
|
+
tenant_id: Tenant identifier
|
|
221
|
+
is_eval: Whether this is an evaluation session
|
|
222
|
+
request_metadata: Metadata from request body to merge
|
|
223
|
+
agent_schema: Optional agent schema being used
|
|
224
|
+
"""
|
|
225
|
+
if not settings.postgres.enabled:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
repo = Repository(Session, table_name="sessions")
|
|
230
|
+
|
|
231
|
+
# Try to load existing session by name (session_id is the name field)
|
|
232
|
+
existing_list = await repo.find(
|
|
233
|
+
filters={"name": session_id, "tenant_id": tenant_id},
|
|
234
|
+
limit=1,
|
|
235
|
+
)
|
|
236
|
+
existing = existing_list[0] if existing_list else None
|
|
237
|
+
|
|
238
|
+
if existing:
|
|
239
|
+
# Merge metadata if provided
|
|
240
|
+
merged_metadata = existing.metadata or {}
|
|
241
|
+
if request_metadata:
|
|
242
|
+
merged_metadata.update(request_metadata)
|
|
243
|
+
|
|
244
|
+
# Update session if eval flag or new metadata
|
|
245
|
+
needs_update = False
|
|
246
|
+
if is_eval and existing.mode != SessionMode.EVALUATION:
|
|
247
|
+
existing.mode = SessionMode.EVALUATION
|
|
248
|
+
needs_update = True
|
|
249
|
+
if request_metadata:
|
|
250
|
+
existing.metadata = merged_metadata
|
|
251
|
+
needs_update = True
|
|
252
|
+
|
|
253
|
+
if needs_update:
|
|
254
|
+
await repo.upsert(existing)
|
|
255
|
+
logger.debug(f"Updated session {session_id} (eval={is_eval}, metadata keys={list(merged_metadata.keys())})")
|
|
256
|
+
else:
|
|
257
|
+
# Create new session
|
|
258
|
+
session = Session(
|
|
259
|
+
name=session_id,
|
|
260
|
+
mode=SessionMode.EVALUATION if is_eval else SessionMode.NORMAL,
|
|
261
|
+
user_id=user_id,
|
|
262
|
+
tenant_id=tenant_id,
|
|
263
|
+
agent_schema_uri=agent_schema,
|
|
264
|
+
metadata=request_metadata or {},
|
|
265
|
+
)
|
|
266
|
+
await repo.upsert(session)
|
|
267
|
+
logger.info(f"Created session {session_id} (eval={is_eval})")
|
|
268
|
+
|
|
269
|
+
except Exception as e:
|
|
270
|
+
# Non-critical - log but don't fail the request
|
|
271
|
+
logger.error(f"Failed to ensure session metadata: {e}", exc_info=True)
|
|
272
|
+
|
|
273
|
+
|
|
90
274
|
@router.post("/chat/completions", response_model=None)
|
|
91
275
|
async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
92
276
|
"""
|
|
@@ -102,6 +286,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
102
286
|
| X-Tenant-Id | Tenant identifier (multi-tenancy) | AgentContext.tenant_id | "default" |
|
|
103
287
|
| X-Session-Id | Session/conversation identifier | AgentContext.session_id | None |
|
|
104
288
|
| X-Agent-Schema | Agent schema name | AgentContext.agent_schema_uri | "rem" |
|
|
289
|
+
| X-Is-Eval | Mark as evaluation session | AgentContext.is_eval | false |
|
|
290
|
+
|
|
291
|
+
Additional OpenAI-compatible Body Fields:
|
|
292
|
+
- metadata: Key-value pairs merged with session metadata (max 16 keys)
|
|
293
|
+
- store: Whether to store for distillation/evaluation
|
|
294
|
+
- max_completion_tokens: Max tokens to generate (replaces max_tokens)
|
|
295
|
+
- seed: Seed for deterministic sampling
|
|
296
|
+
- top_p: Nucleus sampling probability
|
|
297
|
+
- logprobs: Return log probabilities
|
|
298
|
+
- reasoning_effort: low/medium/high for o-series models
|
|
299
|
+
- service_tier: auto/flex/priority/default
|
|
105
300
|
|
|
106
301
|
Example Models:
|
|
107
302
|
- anthropic:claude-sonnet-4-5-20250929 (Claude 4.5 Sonnet)
|
|
@@ -127,6 +322,12 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
127
322
|
- If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
|
|
128
323
|
- New messages saved to database with compression for session continuity
|
|
129
324
|
- When Postgres is disabled, session management is skipped
|
|
325
|
+
|
|
326
|
+
Evaluation Sessions:
|
|
327
|
+
- Set X-Is-Eval: true header to mark session as evaluation
|
|
328
|
+
- Session mode will be set to EVALUATION
|
|
329
|
+
- Request metadata is merged with session metadata
|
|
330
|
+
- Useful for A/B testing, model comparison, and feedback collection
|
|
130
331
|
"""
|
|
131
332
|
# Load agent schema: use header value from context or default
|
|
132
333
|
# Extract AgentContext first to get schema name
|
|
@@ -151,6 +352,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
151
352
|
new_messages=new_messages,
|
|
152
353
|
)
|
|
153
354
|
|
|
355
|
+
# Ensure session exists with metadata and eval mode if applicable
|
|
356
|
+
if context.session_id:
|
|
357
|
+
await ensure_session_with_metadata(
|
|
358
|
+
session_id=context.session_id,
|
|
359
|
+
user_id=context.user_id,
|
|
360
|
+
tenant_id=context.tenant_id,
|
|
361
|
+
is_eval=context.is_eval,
|
|
362
|
+
request_metadata=body.metadata,
|
|
363
|
+
agent_schema="simulator",
|
|
364
|
+
)
|
|
365
|
+
|
|
154
366
|
# Get the last user message as prompt
|
|
155
367
|
prompt = body.messages[-1].content if body.messages else "demo"
|
|
156
368
|
request_id = f"sim-{uuid.uuid4().hex[:24]}"
|
|
@@ -301,6 +513,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
301
513
|
|
|
302
514
|
logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
|
|
303
515
|
|
|
516
|
+
# Ensure session exists with metadata and eval mode if applicable
|
|
517
|
+
if context.session_id:
|
|
518
|
+
await ensure_session_with_metadata(
|
|
519
|
+
session_id=context.session_id,
|
|
520
|
+
user_id=context.user_id,
|
|
521
|
+
tenant_id=context.tenant_id,
|
|
522
|
+
is_eval=context.is_eval,
|
|
523
|
+
request_metadata=body.metadata,
|
|
524
|
+
agent_schema=schema_name,
|
|
525
|
+
)
|
|
526
|
+
|
|
304
527
|
# Create agent with schema and model override
|
|
305
528
|
agent = await create_agent(
|
|
306
529
|
context=context,
|
|
@@ -351,7 +574,26 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
351
574
|
)
|
|
352
575
|
|
|
353
576
|
# Non-streaming mode
|
|
354
|
-
|
|
577
|
+
# Create a parent span to capture trace context for message storage
|
|
578
|
+
trace_id, span_id = None, None
|
|
579
|
+
tracer = get_tracer()
|
|
580
|
+
|
|
581
|
+
if tracer:
|
|
582
|
+
with tracer.start_as_current_span(
|
|
583
|
+
"chat_completion",
|
|
584
|
+
attributes={
|
|
585
|
+
"session.id": context.session_id or "",
|
|
586
|
+
"user.id": context.user_id or "",
|
|
587
|
+
"model": body.model,
|
|
588
|
+
"agent.schema": context.agent_schema_uri or DEFAULT_AGENT_SCHEMA,
|
|
589
|
+
}
|
|
590
|
+
) as span:
|
|
591
|
+
# Capture trace context from the span we just created
|
|
592
|
+
trace_id, span_id = get_current_trace_context()
|
|
593
|
+
result = await agent.run(prompt)
|
|
594
|
+
else:
|
|
595
|
+
# No tracer available, run without tracing
|
|
596
|
+
result = await agent.run(prompt)
|
|
355
597
|
|
|
356
598
|
# Determine content format based on response_format request
|
|
357
599
|
if body.response_format and body.response_format.type == "json_object":
|
|
@@ -374,12 +616,16 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
374
616
|
"role": "user",
|
|
375
617
|
"content": body.messages[-1].content if body.messages else "",
|
|
376
618
|
"timestamp": datetime.utcnow().isoformat(),
|
|
619
|
+
"trace_id": trace_id,
|
|
620
|
+
"span_id": span_id,
|
|
377
621
|
}
|
|
378
622
|
|
|
379
623
|
assistant_message = {
|
|
380
624
|
"role": "assistant",
|
|
381
625
|
"content": content,
|
|
382
626
|
"timestamp": datetime.utcnow().isoformat(),
|
|
627
|
+
"trace_id": trace_id,
|
|
628
|
+
"span_id": span_id,
|
|
383
629
|
}
|
|
384
630
|
|
|
385
631
|
try:
|