remdb 0.3.118__py3-none-any.whl → 0.3.141__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (40) hide show
  1. rem/agentic/agents/sse_simulator.py +2 -0
  2. rem/agentic/context.py +23 -3
  3. rem/agentic/mcp/tool_wrapper.py +126 -15
  4. rem/agentic/otel/setup.py +1 -0
  5. rem/agentic/providers/phoenix.py +371 -108
  6. rem/agentic/providers/pydantic_ai.py +122 -43
  7. rem/agentic/schema.py +4 -1
  8. rem/api/mcp_router/tools.py +13 -2
  9. rem/api/routers/chat/completions.py +250 -4
  10. rem/api/routers/chat/models.py +81 -7
  11. rem/api/routers/chat/otel_utils.py +33 -0
  12. rem/api/routers/chat/sse_events.py +17 -1
  13. rem/api/routers/chat/streaming.py +35 -1
  14. rem/api/routers/feedback.py +134 -14
  15. rem/cli/commands/cluster.py +590 -82
  16. rem/cli/commands/configure.py +3 -4
  17. rem/cli/commands/experiments.py +436 -30
  18. rem/cli/commands/session.py +336 -0
  19. rem/cli/dreaming.py +2 -2
  20. rem/cli/main.py +2 -0
  21. rem/config.py +8 -1
  22. rem/models/core/experiment.py +54 -0
  23. rem/models/entities/ontology.py +1 -1
  24. rem/models/entities/ontology_config.py +1 -1
  25. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  26. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  27. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  28. rem/services/phoenix/client.py +59 -18
  29. rem/services/session/compression.py +7 -0
  30. rem/settings.py +236 -13
  31. rem/sql/migrations/002_install_models.sql +91 -91
  32. rem/sql/migrations/004_cache_system.sql +1 -1
  33. rem/utils/schema_loader.py +94 -3
  34. rem/utils/vision.py +1 -1
  35. rem/workers/__init__.py +2 -1
  36. rem/workers/db_listener.py +579 -0
  37. {remdb-0.3.118.dist-info → remdb-0.3.141.dist-info}/METADATA +156 -144
  38. {remdb-0.3.118.dist-info → remdb-0.3.141.dist-info}/RECORD +40 -37
  39. {remdb-0.3.118.dist-info → remdb-0.3.141.dist-info}/WHEEL +0 -0
  40. {remdb-0.3.118.dist-info → remdb-0.3.141.dist-info}/entry_points.txt +0 -0
@@ -553,7 +553,7 @@ async def create_agent(
553
553
  if agent_schema:
554
554
  system_prompt = get_system_prompt(agent_schema)
555
555
  metadata = get_metadata(agent_schema)
556
- mcp_server_configs = [s.model_dump() for s in metadata.mcp_servers] if hasattr(metadata, 'mcp_servers') else []
556
+ mcp_server_configs = [s.model_dump() for s in metadata.mcp_servers] if hasattr(metadata, 'mcp_servers') and metadata.mcp_servers else []
557
557
  resource_configs = metadata.resources if hasattr(metadata, 'resources') else []
558
558
 
559
559
  if metadata.system_prompt:
@@ -564,6 +564,38 @@ async def create_agent(
564
564
  mcp_server_configs = []
565
565
  resource_configs = []
566
566
 
567
+ # Auto-detect local MCP server if not explicitly configured
568
+ # This makes mcp_servers config optional - agents get tools automatically
569
+ if not mcp_server_configs:
570
+ import importlib
571
+ import os
572
+ import sys
573
+
574
+ # Ensure current working directory is in sys.path for local imports
575
+ cwd = os.getcwd()
576
+ if cwd not in sys.path:
577
+ sys.path.insert(0, cwd)
578
+
579
+ # Try common local MCP server module paths first
580
+ auto_detect_modules = [
581
+ "tools.mcp_server", # Convention: tools/mcp_server.py
582
+ "mcp_server", # Alternative: mcp_server.py in root
583
+ ]
584
+ for module_path in auto_detect_modules:
585
+ try:
586
+ mcp_module = importlib.import_module(module_path)
587
+ if hasattr(mcp_module, "mcp"):
588
+ logger.info(f"Auto-detected local MCP server: {module_path}")
589
+ mcp_server_configs = [{"type": "local", "module": module_path, "id": "auto-detected"}]
590
+ break
591
+ except ImportError:
592
+ continue
593
+
594
+ # Fall back to REM's default MCP server if no local server found
595
+ if not mcp_server_configs:
596
+ logger.debug("No local MCP server found, using REM default")
597
+ mcp_server_configs = [{"type": "local", "module": "rem.mcp_server", "id": "rem"}]
598
+
567
599
  # Extract temperature and max_iterations from schema metadata (with fallback to settings defaults)
568
600
  if metadata:
569
601
  temperature = metadata.override_temperature if metadata.override_temperature is not None else settings.llm.default_temperature
@@ -608,50 +640,97 @@ async def create_agent(
608
640
  search_rem_suffix += f"Example: `SEARCH \"your query\" FROM {default_table} LIMIT 10`"
609
641
 
610
642
  # Add tools from MCP server (in-process, no subprocess)
611
- if mcp_server_configs:
612
- for server_config in mcp_server_configs:
613
- server_type = server_config.get("type")
614
- server_id = server_config.get("id", "mcp-server")
615
-
616
- if server_type == "local":
617
- # Import MCP server directly (in-process)
618
- module_path = server_config.get("module", "rem.mcp_server")
619
-
620
- try:
621
- # Dynamic import of MCP server module
622
- import importlib
623
- mcp_module = importlib.import_module(module_path)
624
- mcp_server = mcp_module.mcp
625
-
626
- # Extract tools from MCP server (get_tools is async)
627
- from ..mcp.tool_wrapper import create_mcp_tool_wrapper
628
-
629
- # Await async get_tools() call
630
- mcp_tools_dict = await mcp_server.get_tools()
631
-
632
- for tool_name, tool_func in mcp_tools_dict.items():
633
- # Add description suffix to search_rem tool if schema specifies a default table
634
- tool_suffix = search_rem_suffix if tool_name == "search_rem" else None
635
-
636
- wrapped_tool = create_mcp_tool_wrapper(
637
- tool_name,
638
- tool_func,
639
- user_id=context.user_id if context else None,
640
- description_suffix=tool_suffix,
641
- )
642
- tools.append(wrapped_tool)
643
- logger.debug(f"Loaded MCP tool: {tool_name}" + (" (with schema suffix)" if tool_suffix else ""))
644
-
645
- logger.info(f"Loaded {len(mcp_tools_dict)} tools from MCP server: {server_id} (in-process)")
646
-
647
- except Exception as e:
648
- logger.error(f"Failed to load MCP server {server_id}: {e}", exc_info=True)
649
- else:
650
- logger.warning(f"Unsupported MCP server type: {server_type}")
643
+ # Track loaded MCP servers for resource resolution
644
+ loaded_mcp_server = None
645
+
646
+ for server_config in mcp_server_configs:
647
+ server_type = server_config.get("type")
648
+ server_id = server_config.get("id", "mcp-server")
649
+
650
+ if server_type == "local":
651
+ # Import MCP server directly (in-process)
652
+ module_path = server_config.get("module", "rem.mcp_server")
653
+
654
+ try:
655
+ # Dynamic import of MCP server module
656
+ import importlib
657
+ mcp_module = importlib.import_module(module_path)
658
+ mcp_server = mcp_module.mcp
659
+
660
+ # Store the loaded server for resource resolution
661
+ loaded_mcp_server = mcp_server
662
+
663
+ # Extract tools from MCP server (get_tools is async)
664
+ from ..mcp.tool_wrapper import create_mcp_tool_wrapper
665
+
666
+ # Await async get_tools() call
667
+ mcp_tools_dict = await mcp_server.get_tools()
668
+
669
+ for tool_name, tool_func in mcp_tools_dict.items():
670
+ # Add description suffix to search_rem tool if schema specifies a default table
671
+ tool_suffix = search_rem_suffix if tool_name == "search_rem" else None
651
672
 
673
+ wrapped_tool = create_mcp_tool_wrapper(
674
+ tool_name,
675
+ tool_func,
676
+ user_id=context.user_id if context else None,
677
+ description_suffix=tool_suffix,
678
+ )
679
+ tools.append(wrapped_tool)
680
+ logger.debug(f"Loaded MCP tool: {tool_name}" + (" (with schema suffix)" if tool_suffix else ""))
681
+
682
+ logger.info(f"Loaded {len(mcp_tools_dict)} tools from MCP server: {server_id} (in-process)")
683
+
684
+ except Exception as e:
685
+ logger.error(f"Failed to load MCP server {server_id}: {e}", exc_info=True)
686
+ else:
687
+ logger.warning(f"Unsupported MCP server type: {server_type}")
688
+
689
+ # Convert resources to tools (MCP convenience syntax)
690
+ # Resources declared in agent YAML become callable tools - eliminates
691
+ # the artificial MCP distinction between tools and resources
692
+ #
693
+ # Supports both concrete and template URIs:
694
+ # - Concrete: "rem://schemas" -> no-param tool
695
+ # - Template: "patient-profile://field/{field_key}" -> tool with field_key param
696
+ from ..mcp.tool_wrapper import create_resource_tool
697
+
698
+ # Collect all resource URIs from both resources section AND tools section
699
+ resource_uris = []
700
+
701
+ # From resources section (legacy format)
652
702
  if resource_configs:
653
- # TODO: Convert resources to tools (MCP convenience syntax)
654
- pass
703
+ for resource_config in resource_configs:
704
+ if hasattr(resource_config, 'uri'):
705
+ uri = resource_config.uri
706
+ usage = resource_config.description or ""
707
+ else:
708
+ uri = resource_config.get("uri", "")
709
+ usage = resource_config.get("description", "")
710
+ if uri:
711
+ resource_uris.append((uri, usage))
712
+
713
+ # From tools section - detect URIs (anything with ://)
714
+ # This allows unified syntax: resources as tools
715
+ tool_configs = metadata.tools if metadata and hasattr(metadata, 'tools') else []
716
+ for tool_config in tool_configs:
717
+ if hasattr(tool_config, 'name'):
718
+ tool_name = tool_config.name
719
+ tool_desc = tool_config.description or ""
720
+ else:
721
+ tool_name = tool_config.get("name", "")
722
+ tool_desc = tool_config.get("description", "")
723
+
724
+ # Auto-detect resource URIs (anything with :// scheme)
725
+ if "://" in tool_name:
726
+ resource_uris.append((tool_name, tool_desc))
727
+
728
+ # Create tools from collected resource URIs
729
+ # Pass the loaded MCP server so resources can be resolved from it
730
+ for uri, usage in resource_uris:
731
+ resource_tool = create_resource_tool(uri, usage, mcp_server=loaded_mcp_server)
732
+ tools.append(resource_tool)
733
+ logger.debug(f"Loaded resource as tool: {uri}")
655
734
 
656
735
  # Create dynamic result_type from schema if not provided
657
736
  # Note: use_structured_output is set earlier from metadata.structured_output
rem/agentic/schema.py CHANGED
@@ -154,8 +154,10 @@ class MCPServerConfig(BaseModel):
154
154
  )
155
155
 
156
156
  id: str = Field(
157
+ default="mcp-server",
157
158
  description=(
158
159
  "Server identifier for logging and debugging. "
160
+ "Defaults to 'mcp-server' if not specified. "
159
161
  "Example: 'rem-local'"
160
162
  )
161
163
  )
@@ -228,7 +230,8 @@ class AgentSchemaMetadata(BaseModel):
228
230
  description=(
229
231
  "MCP server configurations for dynamic tool loading. "
230
232
  "Servers are loaded in-process at agent creation time. "
231
- "All tools from configured servers become available to the agent."
233
+ "All tools from configured servers become available to the agent. "
234
+ "If not specified, defaults to rem.mcp_server (REM's built-in tools)."
232
235
  ),
233
236
  )
234
237
 
@@ -606,7 +606,9 @@ async def register_metadata(
606
606
  references: list[str] | None = None,
607
607
  sources: list[str] | None = None,
608
608
  flags: list[str] | None = None,
609
- # Risk assessment fields (used by mental health agents like Siggy)
609
+ # Session naming
610
+ session_name: str | None = None,
611
+ # Risk assessment fields (used by specialized agents)
610
612
  risk_level: str | None = None,
611
613
  risk_score: int | None = None,
612
614
  risk_reasoning: str | None = None,
@@ -639,6 +641,11 @@ async def register_metadata(
639
641
  flags: Optional flags for the response (e.g., "needs_review",
640
642
  "uncertain", "incomplete", "crisis_alert").
641
643
 
644
+ session_name: Short 1-3 phrase name describing the session topic.
645
+ Used by the UI to label conversations in the sidebar.
646
+ Examples: "Prescription Drug Questions", "AWS Setup Help",
647
+ "Python Code Review", "Travel Planning".
648
+
642
649
  risk_level: Risk level indicator (e.g., "green", "orange", "red").
643
650
  Used by mental health agents for C-SSRS style assessment.
644
651
  risk_score: Numeric risk score (e.g., 0-6 for C-SSRS).
@@ -663,7 +670,7 @@ async def register_metadata(
663
670
  sources=["REM database lookup"]
664
671
  )
665
672
 
666
- # Mental health risk assessment (Siggy-style)
673
+ # Risk assessment example
667
674
  register_metadata(
668
675
  confidence=0.9,
669
676
  risk_level="green",
@@ -706,6 +713,10 @@ async def register_metadata(
706
713
  "flags": flags,
707
714
  }
708
715
 
716
+ # Add session name if provided
717
+ if session_name is not None:
718
+ result["session_name"] = session_name
719
+
709
720
  # Add risk assessment fields if provided
710
721
  if risk_level is not None:
711
722
  result["risk_level"] = risk_level
@@ -1,13 +1,94 @@
1
1
  """
2
2
  OpenAI-compatible chat completions router for REM.
3
3
 
4
- Design Pattern:
5
- - Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Session-Id, X-Agent-Schema)
4
+ Quick Start (Local Development)
5
+ ===============================
6
+
7
+ NOTE: Local dev uses LOCAL databases (Postgres via Docker Compose on port 5050).
8
+ Do NOT port-forward databases. Only port-forward observability services.
9
+
10
+ IMPORTANT: Session IDs MUST be UUIDs. Non-UUID session IDs will cause message
11
+ storage issues and feedback will not work correctly.
12
+
13
+ 1. Port Forwarding (REQUIRED for trace capture and Phoenix sync):
14
+
15
+ # Terminal 1: OTEL Collector (HTTP) - sends traces to Phoenix
16
+ kubectl port-forward -n observability svc/otel-collector-collector 4318:4318
17
+
18
+ # Terminal 2: Phoenix UI - view traces at http://localhost:6006
19
+ kubectl port-forward -n siggy svc/phoenix 6006:6006
20
+
21
+ 2. Get Phoenix API Key (REQUIRED for feedback->Phoenix sync):
22
+
23
+ export PHOENIX_API_KEY=$(kubectl get secret -n siggy rem-phoenix-api-key \\
24
+ -o jsonpath='{.data.PHOENIX_API_KEY}' | base64 -d)
25
+
26
+ 3. Start API with OTEL and Phoenix enabled:
27
+
28
+ cd /path/to/remstack/rem
29
+ source .venv/bin/activate
30
+ OTEL__ENABLED=true \\
31
+ PHOENIX__ENABLED=true \\
32
+ PHOENIX_API_KEY="$PHOENIX_API_KEY" \\
33
+ uvicorn rem.api.main:app --host 0.0.0.0 --port 8000 --app-dir src
34
+
35
+ 4. Test Chat Request (session_id MUST be a UUID):
36
+
37
+ SESSION_ID=$(python3 -c "import uuid; print(uuid.uuid4())")
38
+ curl -s -N -X POST http://localhost:8000/api/v1/chat/completions \\
39
+ -H 'Content-Type: application/json' \\
40
+ -H "X-Session-Id: $SESSION_ID" \\
41
+ -H 'X-Agent-Schema: rem' \\
42
+ -d '{"messages": [{"role": "user", "content": "Hello"}], "stream": true}'
43
+
44
+ # Note: Use 'rem' agent schema (default) for real LLM responses.
45
+ # The 'simulator' agent is for testing SSE events without LLM calls.
46
+
47
+ 5. Submit Feedback on Response:
48
+
49
+ The metadata SSE event contains message_id and trace_id for feedback:
50
+ event: metadata
51
+ data: {"message_id": "728882f8-...", "trace_id": "e53c701c...", ...}
52
+
53
+ Use session_id (UUID you generated) and message_id to submit feedback:
54
+
55
+ curl -X POST http://localhost:8000/api/v1/messages/feedback \\
56
+ -H 'Content-Type: application/json' \\
57
+ -H 'X-Tenant-Id: default' \\
58
+ -d '{
59
+ "session_id": "<your-uuid-session-id>",
60
+ "message_id": "<message-id-from-metadata>",
61
+ "rating": 1,
62
+ "categories": ["helpful"],
63
+ "comment": "Good response"
64
+ }'
65
+
66
+ Expected response (201 = synced to Phoenix):
67
+ {"phoenix_synced": true, "trace_id": "e53c701c...", "span_id": "6432d497..."}
68
+
69
+ OTEL Architecture
70
+ =================
71
+
72
+ REM API --[OTLP/HTTP]--> OTEL Collector --[relay]--> Phoenix
73
+ (port 4318) (k8s: observability) (k8s: siggy)
74
+
75
+ Environment Variables:
76
+ OTEL__ENABLED=true Enable OTEL tracing (required for trace capture)
77
+ PHOENIX__ENABLED=true Enable Phoenix integration (required for feedback sync)
78
+ PHOENIX_API_KEY=<jwt> Phoenix API key (required for feedback->Phoenix sync)
79
+ OTEL__COLLECTOR_ENDPOINT Default: http://localhost:4318
80
+ OTEL__PROTOCOL Default: http (use port 4318, not gRPC 4317)
81
+
82
+ Design Pattern
83
+ ==============
84
+
85
+ - Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Session-Id, X-Agent-Schema, X-Is-Eval)
6
86
  - ContextBuilder centralizes message construction with user profile + session history
7
87
  - Body.model is the LLM model for Pydantic AI
8
88
  - X-Agent-Schema header specifies which agent schema to use (defaults to 'rem')
9
89
  - Support for streaming (SSE) and non-streaming modes
10
90
  - Response format control (text vs json_object)
91
+ - OpenAI-compatible body fields: metadata, store, reasoning_effort, etc.
11
92
 
12
93
  Context Building Flow:
13
94
  1. ContextBuilder.build_from_headers() extracts user_id, session_id from headers
@@ -25,9 +106,10 @@ Context Building Flow:
25
106
  Headers Mapping
26
107
  X-User-Id → AgentContext.user_id
27
108
  X-Tenant-Id → AgentContext.tenant_id
28
- X-Session-Id → AgentContext.session_id
109
+ X-Session-Id → AgentContext.session_id (use UUID for new sessions)
29
110
  X-Model-Name → AgentContext.default_model (overrides body.model)
30
111
  X-Agent-Schema → AgentContext.agent_schema_uri (defaults to 'rem')
112
+ X-Is-Eval → AgentContext.is_eval (sets session mode to EVALUATION)
31
113
 
32
114
  Default Agent:
33
115
  If X-Agent-Schema header is not provided, the system loads 'rem' schema,
@@ -42,6 +124,7 @@ Example Request:
42
124
  POST /api/v1/chat/completions
43
125
  X-Tenant-Id: acme-corp
44
126
  X-User-Id: user123
127
+ X-Session-Id: a1b2c3d4-e5f6-7890-abcd-ef1234567890 # UUID
45
128
  X-Agent-Schema: rem # Optional, this is the default
46
129
 
47
130
  {
@@ -67,7 +150,9 @@ from loguru import logger
67
150
  from ....agentic.context import AgentContext
68
151
  from ....agentic.context_builder import ContextBuilder
69
152
  from ....agentic.providers.pydantic_ai import create_agent
153
+ from ....models.entities.session import Session, SessionMode
70
154
  from ....services.audio.transcriber import AudioTranscriber
155
+ from ....services.postgres.repository import Repository
71
156
  from ....services.session import SessionMessageStore, reload_session
72
157
  from ....settings import settings
73
158
  from ....utils.schema_loader import load_agent_schema, load_agent_schema_async
@@ -87,6 +172,105 @@ router = APIRouter(prefix="/api/v1", tags=["chat"])
87
172
  DEFAULT_AGENT_SCHEMA = "rem"
88
173
 
89
174
 
175
+ def get_current_trace_context() -> tuple[str | None, str | None]:
176
+ """Get trace_id and span_id from current OTEL context.
177
+
178
+ Returns:
179
+ Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
180
+ """
181
+ try:
182
+ from opentelemetry import trace
183
+ span = trace.get_current_span()
184
+ if span and span.get_span_context().is_valid:
185
+ ctx = span.get_span_context()
186
+ trace_id = format(ctx.trace_id, '032x')
187
+ span_id = format(ctx.span_id, '016x')
188
+ return trace_id, span_id
189
+ except Exception:
190
+ pass
191
+ return None, None
192
+
193
+
194
+ def get_tracer():
195
+ """Get the OpenTelemetry tracer for chat completions."""
196
+ try:
197
+ from opentelemetry import trace
198
+ return trace.get_tracer("rem.chat.completions")
199
+ except Exception:
200
+ return None
201
+
202
+
203
+ async def ensure_session_with_metadata(
204
+ session_id: str,
205
+ user_id: str | None,
206
+ tenant_id: str,
207
+ is_eval: bool,
208
+ request_metadata: dict[str, str] | None,
209
+ agent_schema: str | None = None,
210
+ ) -> None:
211
+ """
212
+ Ensure session exists and update with metadata/mode.
213
+
214
+ If X-Is-Eval header is true, sets session mode to EVALUATION.
215
+ Merges request metadata with existing session metadata.
216
+
217
+ Args:
218
+ session_id: Session identifier (maps to Session.name)
219
+ user_id: User identifier
220
+ tenant_id: Tenant identifier
221
+ is_eval: Whether this is an evaluation session
222
+ request_metadata: Metadata from request body to merge
223
+ agent_schema: Optional agent schema being used
224
+ """
225
+ if not settings.postgres.enabled:
226
+ return
227
+
228
+ try:
229
+ repo = Repository(Session, table_name="sessions")
230
+
231
+ # Try to load existing session by name (session_id is the name field)
232
+ existing_list = await repo.find(
233
+ filters={"name": session_id, "tenant_id": tenant_id},
234
+ limit=1,
235
+ )
236
+ existing = existing_list[0] if existing_list else None
237
+
238
+ if existing:
239
+ # Merge metadata if provided
240
+ merged_metadata = existing.metadata or {}
241
+ if request_metadata:
242
+ merged_metadata.update(request_metadata)
243
+
244
+ # Update session if eval flag or new metadata
245
+ needs_update = False
246
+ if is_eval and existing.mode != SessionMode.EVALUATION:
247
+ existing.mode = SessionMode.EVALUATION
248
+ needs_update = True
249
+ if request_metadata:
250
+ existing.metadata = merged_metadata
251
+ needs_update = True
252
+
253
+ if needs_update:
254
+ await repo.upsert(existing)
255
+ logger.debug(f"Updated session {session_id} (eval={is_eval}, metadata keys={list(merged_metadata.keys())})")
256
+ else:
257
+ # Create new session
258
+ session = Session(
259
+ name=session_id,
260
+ mode=SessionMode.EVALUATION if is_eval else SessionMode.NORMAL,
261
+ user_id=user_id,
262
+ tenant_id=tenant_id,
263
+ agent_schema_uri=agent_schema,
264
+ metadata=request_metadata or {},
265
+ )
266
+ await repo.upsert(session)
267
+ logger.info(f"Created session {session_id} (eval={is_eval})")
268
+
269
+ except Exception as e:
270
+ # Non-critical - log but don't fail the request
271
+ logger.error(f"Failed to ensure session metadata: {e}", exc_info=True)
272
+
273
+
90
274
  @router.post("/chat/completions", response_model=None)
91
275
  async def chat_completions(body: ChatCompletionRequest, request: Request):
92
276
  """
@@ -102,6 +286,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
102
286
  | X-Tenant-Id | Tenant identifier (multi-tenancy) | AgentContext.tenant_id | "default" |
103
287
  | X-Session-Id | Session/conversation identifier | AgentContext.session_id | None |
104
288
  | X-Agent-Schema | Agent schema name | AgentContext.agent_schema_uri | "rem" |
289
+ | X-Is-Eval | Mark as evaluation session | AgentContext.is_eval | false |
290
+
291
+ Additional OpenAI-compatible Body Fields:
292
+ - metadata: Key-value pairs merged with session metadata (max 16 keys)
293
+ - store: Whether to store for distillation/evaluation
294
+ - max_completion_tokens: Max tokens to generate (replaces max_tokens)
295
+ - seed: Seed for deterministic sampling
296
+ - top_p: Nucleus sampling probability
297
+ - logprobs: Return log probabilities
298
+ - reasoning_effort: low/medium/high for o-series models
299
+ - service_tier: auto/flex/priority/default
105
300
 
106
301
  Example Models:
107
302
  - anthropic:claude-sonnet-4-5-20250929 (Claude 4.5 Sonnet)
@@ -127,6 +322,12 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
127
322
  - If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
128
323
  - New messages saved to database with compression for session continuity
129
324
  - When Postgres is disabled, session management is skipped
325
+
326
+ Evaluation Sessions:
327
+ - Set X-Is-Eval: true header to mark session as evaluation
328
+ - Session mode will be set to EVALUATION
329
+ - Request metadata is merged with session metadata
330
+ - Useful for A/B testing, model comparison, and feedback collection
130
331
  """
131
332
  # Load agent schema: use header value from context or default
132
333
  # Extract AgentContext first to get schema name
@@ -151,6 +352,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
151
352
  new_messages=new_messages,
152
353
  )
153
354
 
355
+ # Ensure session exists with metadata and eval mode if applicable
356
+ if context.session_id:
357
+ await ensure_session_with_metadata(
358
+ session_id=context.session_id,
359
+ user_id=context.user_id,
360
+ tenant_id=context.tenant_id,
361
+ is_eval=context.is_eval,
362
+ request_metadata=body.metadata,
363
+ agent_schema="simulator",
364
+ )
365
+
154
366
  # Get the last user message as prompt
155
367
  prompt = body.messages[-1].content if body.messages else "demo"
156
368
  request_id = f"sim-{uuid.uuid4().hex[:24]}"
@@ -301,6 +513,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
301
513
 
302
514
  logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
303
515
 
516
+ # Ensure session exists with metadata and eval mode if applicable
517
+ if context.session_id:
518
+ await ensure_session_with_metadata(
519
+ session_id=context.session_id,
520
+ user_id=context.user_id,
521
+ tenant_id=context.tenant_id,
522
+ is_eval=context.is_eval,
523
+ request_metadata=body.metadata,
524
+ agent_schema=schema_name,
525
+ )
526
+
304
527
  # Create agent with schema and model override
305
528
  agent = await create_agent(
306
529
  context=context,
@@ -351,7 +574,26 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
351
574
  )
352
575
 
353
576
  # Non-streaming mode
354
- result = await agent.run(prompt)
577
+ # Create a parent span to capture trace context for message storage
578
+ trace_id, span_id = None, None
579
+ tracer = get_tracer()
580
+
581
+ if tracer:
582
+ with tracer.start_as_current_span(
583
+ "chat_completion",
584
+ attributes={
585
+ "session.id": context.session_id or "",
586
+ "user.id": context.user_id or "",
587
+ "model": body.model,
588
+ "agent.schema": context.agent_schema_uri or DEFAULT_AGENT_SCHEMA,
589
+ }
590
+ ) as span:
591
+ # Capture trace context from the span we just created
592
+ trace_id, span_id = get_current_trace_context()
593
+ result = await agent.run(prompt)
594
+ else:
595
+ # No tracer available, run without tracing
596
+ result = await agent.run(prompt)
355
597
 
356
598
  # Determine content format based on response_format request
357
599
  if body.response_format and body.response_format.type == "json_object":
@@ -374,12 +616,16 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
374
616
  "role": "user",
375
617
  "content": body.messages[-1].content if body.messages else "",
376
618
  "timestamp": datetime.utcnow().isoformat(),
619
+ "trace_id": trace_id,
620
+ "span_id": span_id,
377
621
  }
378
622
 
379
623
  assistant_message = {
380
624
  "role": "assistant",
381
625
  "content": content,
382
626
  "timestamp": datetime.utcnow().isoformat(),
627
+ "trace_id": trace_id,
628
+ "span_id": span_id,
383
629
  }
384
630
 
385
631
  try: