remdb 0.3.114__py3-none-any.whl → 0.3.127__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (41) hide show
  1. rem/agentic/agents/sse_simulator.py +2 -0
  2. rem/agentic/context.py +23 -3
  3. rem/agentic/mcp/tool_wrapper.py +29 -3
  4. rem/agentic/otel/setup.py +1 -0
  5. rem/agentic/providers/pydantic_ai.py +26 -2
  6. rem/api/main.py +4 -1
  7. rem/api/mcp_router/server.py +9 -3
  8. rem/api/mcp_router/tools.py +324 -2
  9. rem/api/routers/admin.py +218 -1
  10. rem/api/routers/chat/completions.py +250 -4
  11. rem/api/routers/chat/models.py +81 -7
  12. rem/api/routers/chat/otel_utils.py +33 -0
  13. rem/api/routers/chat/sse_events.py +17 -1
  14. rem/api/routers/chat/streaming.py +35 -1
  15. rem/api/routers/feedback.py +134 -14
  16. rem/api/routers/query.py +6 -3
  17. rem/cli/commands/README.md +42 -0
  18. rem/cli/commands/cluster.py +617 -168
  19. rem/cli/commands/configure.py +1 -3
  20. rem/cli/commands/db.py +66 -22
  21. rem/cli/commands/experiments.py +242 -26
  22. rem/cli/commands/schema.py +6 -5
  23. rem/config.py +8 -1
  24. rem/services/phoenix/client.py +59 -18
  25. rem/services/postgres/diff_service.py +108 -3
  26. rem/services/postgres/schema_generator.py +205 -4
  27. rem/services/session/compression.py +7 -0
  28. rem/settings.py +150 -18
  29. rem/sql/migrations/001_install.sql +156 -0
  30. rem/sql/migrations/002_install_models.sql +1864 -1
  31. rem/sql/migrations/004_cache_system.sql +548 -0
  32. rem/utils/__init__.py +18 -0
  33. rem/utils/schema_loader.py +94 -3
  34. rem/utils/sql_paths.py +146 -0
  35. rem/workers/__init__.py +3 -1
  36. rem/workers/db_listener.py +579 -0
  37. rem/workers/unlogged_maintainer.py +463 -0
  38. {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/METADATA +213 -177
  39. {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/RECORD +41 -36
  40. {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/WHEEL +0 -0
  41. {remdb-0.3.114.dist-info → remdb-0.3.127.dist-info}/entry_points.txt +0 -0
rem/api/routers/admin.py CHANGED
@@ -9,6 +9,9 @@ Endpoints:
9
9
  GET /api/admin/messages - List all messages across users (admin only)
10
10
  GET /api/admin/stats - System statistics (admin only)
11
11
 
12
+ Internal Endpoints (hidden from Swagger, secret-protected):
13
+ POST /api/admin/internal/rebuild-kv - Trigger kv_store rebuild (called by pg_net)
14
+
12
15
  All endpoints require:
13
16
  1. Authentication (valid session)
14
17
  2. Admin role in user's roles list
@@ -17,11 +20,14 @@ Design Pattern:
17
20
  - Uses require_admin dependency for role enforcement
18
21
  - Cross-tenant queries (no user_id filtering)
19
22
  - Audit logging for admin actions
23
+ - Internal endpoints use X-Internal-Secret header for authentication
20
24
  """
21
25
 
26
+ import asyncio
27
+ import threading
22
28
  from typing import Literal
23
29
 
24
- from fastapi import APIRouter, Depends, HTTPException, Query
30
+ from fastapi import APIRouter, Depends, Header, HTTPException, Query, BackgroundTasks
25
31
  from loguru import logger
26
32
  from pydantic import BaseModel
27
33
 
@@ -32,6 +38,12 @@ from ...settings import settings
32
38
 
33
39
  router = APIRouter(prefix="/api/admin", tags=["admin"])
34
40
 
41
+ # =============================================================================
42
+ # Internal Router (hidden from Swagger)
43
+ # =============================================================================
44
+
45
+ internal_router = APIRouter(prefix="/internal", include_in_schema=False)
46
+
35
47
 
36
48
  # =============================================================================
37
49
  # Response Models
@@ -275,3 +287,208 @@ async def get_system_stats(
275
287
  active_sessions_24h=0, # TODO: implement
276
288
  messages_24h=0, # TODO: implement
277
289
  )
290
+
291
+
292
+ # =============================================================================
293
+ # Internal Endpoints (hidden from Swagger, secret-protected)
294
+ # =============================================================================
295
+
296
+
297
+ class RebuildKVRequest(BaseModel):
298
+ """Request body for kv_store rebuild trigger."""
299
+
300
+ user_id: str | None = None
301
+ triggered_by: str = "api"
302
+ timestamp: str | None = None
303
+
304
+
305
+ class RebuildKVResponse(BaseModel):
306
+ """Response from kv_store rebuild trigger."""
307
+
308
+ status: Literal["submitted", "started", "skipped"]
309
+ message: str
310
+ job_method: str | None = None # "sqs" or "thread"
311
+
312
+
313
+ async def _get_internal_secret() -> str | None:
314
+ """
315
+ Get the internal API secret from cache_system_state table.
316
+
317
+ Returns None if the table doesn't exist or secret not found.
318
+ """
319
+ from ...services.postgres import get_postgres_service
320
+
321
+ db = get_postgres_service()
322
+ if not db:
323
+ return None
324
+
325
+ try:
326
+ await db.connect()
327
+ secret = await db.fetchval("SELECT rem_get_cache_api_secret()")
328
+ return secret
329
+ except Exception as e:
330
+ logger.warning(f"Could not get internal API secret: {e}")
331
+ return None
332
+ finally:
333
+ await db.disconnect()
334
+
335
+
336
+ async def _validate_internal_secret(x_internal_secret: str | None = Header(None)):
337
+ """
338
+ Dependency to validate the X-Internal-Secret header.
339
+
340
+ Raises 401 if secret is missing or invalid.
341
+ """
342
+ if not x_internal_secret:
343
+ logger.warning("Internal endpoint called without X-Internal-Secret header")
344
+ raise HTTPException(status_code=401, detail="Missing X-Internal-Secret header")
345
+
346
+ expected_secret = await _get_internal_secret()
347
+ if not expected_secret:
348
+ logger.error("Could not retrieve internal secret from database")
349
+ raise HTTPException(status_code=503, detail="Internal secret not configured")
350
+
351
+ if x_internal_secret != expected_secret:
352
+ logger.warning("Internal endpoint called with invalid secret")
353
+ raise HTTPException(status_code=401, detail="Invalid X-Internal-Secret")
354
+
355
+ return True
356
+
357
+
358
+ def _run_rebuild_in_thread():
359
+ """
360
+ Run the kv_store rebuild in a background thread.
361
+
362
+ This is the fallback when SQS is not available.
363
+ """
364
+
365
+ def rebuild_task():
366
+ """Thread target function."""
367
+ import asyncio
368
+ from ...workers.unlogged_maintainer import UnloggedMaintainer
369
+
370
+ async def _run():
371
+ maintainer = UnloggedMaintainer()
372
+ if not maintainer.db:
373
+ logger.error("Database not configured, cannot rebuild")
374
+ return
375
+ try:
376
+ await maintainer.db.connect()
377
+ await maintainer.rebuild_with_lock()
378
+ except Exception as e:
379
+ logger.error(f"Background rebuild failed: {e}")
380
+ finally:
381
+ await maintainer.db.disconnect()
382
+
383
+ # Create new event loop for this thread
384
+ loop = asyncio.new_event_loop()
385
+ asyncio.set_event_loop(loop)
386
+ try:
387
+ loop.run_until_complete(_run())
388
+ finally:
389
+ loop.close()
390
+
391
+ thread = threading.Thread(target=rebuild_task, name="kv-rebuild-worker")
392
+ thread.daemon = True
393
+ thread.start()
394
+ logger.info(f"Started background rebuild thread: {thread.name}")
395
+
396
+
397
+ def _submit_sqs_rebuild_job_sync(request: RebuildKVRequest) -> bool:
398
+ """
399
+ Submit rebuild job to SQS queue (synchronous).
400
+
401
+ Returns True if job was submitted, False if SQS unavailable.
402
+ """
403
+ import json
404
+
405
+ import boto3
406
+ from botocore.exceptions import ClientError
407
+
408
+ if not settings.sqs.queue_url:
409
+ logger.debug("SQS queue URL not configured, cannot submit SQS job")
410
+ return False
411
+
412
+ try:
413
+ sqs = boto3.client("sqs", region_name=settings.sqs.region)
414
+
415
+ message_body = {
416
+ "action": "rebuild_kv_store",
417
+ "user_id": request.user_id,
418
+ "triggered_by": request.triggered_by,
419
+ "timestamp": request.timestamp,
420
+ }
421
+
422
+ response = sqs.send_message(
423
+ QueueUrl=settings.sqs.queue_url,
424
+ MessageBody=json.dumps(message_body),
425
+ MessageAttributes={
426
+ "action": {"DataType": "String", "StringValue": "rebuild_kv_store"},
427
+ },
428
+ )
429
+
430
+ message_id = response.get("MessageId")
431
+ logger.info(f"Submitted rebuild job to SQS: {message_id}")
432
+ return True
433
+
434
+ except ClientError as e:
435
+ logger.warning(f"Failed to submit SQS job: {e}")
436
+ return False
437
+ except Exception as e:
438
+ logger.warning(f"SQS submission error: {e}")
439
+ return False
440
+
441
+
442
+ async def _submit_sqs_rebuild_job(request: RebuildKVRequest) -> bool:
443
+ """
444
+ Submit rebuild job to SQS queue (async wrapper).
445
+
446
+ Runs boto3 call in thread pool to avoid blocking event loop.
447
+ """
448
+ import asyncio
449
+
450
+ return await asyncio.to_thread(_submit_sqs_rebuild_job_sync, request)
451
+
452
+
453
+ @internal_router.post("/rebuild-kv", response_model=RebuildKVResponse)
454
+ async def trigger_kv_rebuild(
455
+ request: RebuildKVRequest,
456
+ _: bool = Depends(_validate_internal_secret),
457
+ ) -> RebuildKVResponse:
458
+ """
459
+ Trigger kv_store rebuild (internal endpoint, not shown in Swagger).
460
+
461
+ Called by pg_net from PostgreSQL when self-healing detects empty cache.
462
+ Authentication: X-Internal-Secret header must match secret in cache_system_state.
463
+
464
+ Priority:
465
+ 1. Submit job to SQS (if configured) - scales with KEDA
466
+ 2. Fallback to background thread - runs in same process
467
+
468
+ Note: This endpoint returns immediately. Rebuild happens asynchronously.
469
+ """
470
+ logger.info(
471
+ f"Rebuild kv_store requested by {request.triggered_by} "
472
+ f"(user_id={request.user_id})"
473
+ )
474
+
475
+ # Try SQS first
476
+ if await _submit_sqs_rebuild_job(request):
477
+ return RebuildKVResponse(
478
+ status="submitted",
479
+ message="Rebuild job submitted to SQS queue",
480
+ job_method="sqs",
481
+ )
482
+
483
+ # Fallback to background thread
484
+ _run_rebuild_in_thread()
485
+
486
+ return RebuildKVResponse(
487
+ status="started",
488
+ message="Rebuild started in background thread (SQS unavailable)",
489
+ job_method="thread",
490
+ )
491
+
492
+
493
+ # Include internal router in main router
494
+ router.include_router(internal_router)
@@ -1,13 +1,94 @@
1
1
  """
2
2
  OpenAI-compatible chat completions router for REM.
3
3
 
4
- Design Pattern:
5
- - Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Session-Id, X-Agent-Schema)
4
+ Quick Start (Local Development)
5
+ ===============================
6
+
7
+ NOTE: Local dev uses LOCAL databases (Postgres via Docker Compose on port 5050).
8
+ Do NOT port-forward databases. Only port-forward observability services.
9
+
10
+ IMPORTANT: Session IDs MUST be UUIDs. Non-UUID session IDs will cause message
11
+ storage issues and feedback will not work correctly.
12
+
13
+ 1. Port Forwarding (REQUIRED for trace capture and Phoenix sync):
14
+
15
+ # Terminal 1: OTEL Collector (HTTP) - sends traces to Phoenix
16
+ kubectl port-forward -n observability svc/otel-collector-collector 4318:4318
17
+
18
+ # Terminal 2: Phoenix UI - view traces at http://localhost:6006
19
+ kubectl port-forward -n siggy svc/phoenix 6006:6006
20
+
21
+ 2. Get Phoenix API Key (REQUIRED for feedback->Phoenix sync):
22
+
23
+ export PHOENIX_API_KEY=$(kubectl get secret -n siggy rem-phoenix-api-key \\
24
+ -o jsonpath='{.data.PHOENIX_API_KEY}' | base64 -d)
25
+
26
+ 3. Start API with OTEL and Phoenix enabled:
27
+
28
+ cd /path/to/remstack/rem
29
+ source .venv/bin/activate
30
+ OTEL__ENABLED=true \\
31
+ PHOENIX__ENABLED=true \\
32
+ PHOENIX_API_KEY="$PHOENIX_API_KEY" \\
33
+ uvicorn rem.api.main:app --host 0.0.0.0 --port 8000 --app-dir src
34
+
35
+ 4. Test Chat Request (session_id MUST be a UUID):
36
+
37
+ SESSION_ID=$(python3 -c "import uuid; print(uuid.uuid4())")
38
+ curl -s -N -X POST http://localhost:8000/api/v1/chat/completions \\
39
+ -H 'Content-Type: application/json' \\
40
+ -H "X-Session-Id: $SESSION_ID" \\
41
+ -H 'X-Agent-Schema: rem' \\
42
+ -d '{"messages": [{"role": "user", "content": "Hello"}], "stream": true}'
43
+
44
+ # Note: Use 'rem' agent schema (default) for real LLM responses.
45
+ # The 'simulator' agent is for testing SSE events without LLM calls.
46
+
47
+ 5. Submit Feedback on Response:
48
+
49
+ The metadata SSE event contains message_id and trace_id for feedback:
50
+ event: metadata
51
+ data: {"message_id": "728882f8-...", "trace_id": "e53c701c...", ...}
52
+
53
+ Use session_id (UUID you generated) and message_id to submit feedback:
54
+
55
+ curl -X POST http://localhost:8000/api/v1/messages/feedback \\
56
+ -H 'Content-Type: application/json' \\
57
+ -H 'X-Tenant-Id: default' \\
58
+ -d '{
59
+ "session_id": "<your-uuid-session-id>",
60
+ "message_id": "<message-id-from-metadata>",
61
+ "rating": 1,
62
+ "categories": ["helpful"],
63
+ "comment": "Good response"
64
+ }'
65
+
66
+ Expected response (201 = synced to Phoenix):
67
+ {"phoenix_synced": true, "trace_id": "e53c701c...", "span_id": "6432d497..."}
68
+
69
+ OTEL Architecture
70
+ =================
71
+
72
+ REM API --[OTLP/HTTP]--> OTEL Collector --[relay]--> Phoenix
73
+ (port 4318) (k8s: observability) (k8s: siggy)
74
+
75
+ Environment Variables:
76
+ OTEL__ENABLED=true Enable OTEL tracing (required for trace capture)
77
+ PHOENIX__ENABLED=true Enable Phoenix integration (required for feedback sync)
78
+ PHOENIX_API_KEY=<jwt> Phoenix API key (required for feedback->Phoenix sync)
79
+ OTEL__COLLECTOR_ENDPOINT Default: http://localhost:4318
80
+ OTEL__PROTOCOL Default: http (use port 4318, not gRPC 4317)
81
+
82
+ Design Pattern
83
+ ==============
84
+
85
+ - Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Session-Id, X-Agent-Schema, X-Is-Eval)
6
86
  - ContextBuilder centralizes message construction with user profile + session history
7
87
  - Body.model is the LLM model for Pydantic AI
8
88
  - X-Agent-Schema header specifies which agent schema to use (defaults to 'rem')
9
89
  - Support for streaming (SSE) and non-streaming modes
10
90
  - Response format control (text vs json_object)
91
+ - OpenAI-compatible body fields: metadata, store, reasoning_effort, etc.
11
92
 
12
93
  Context Building Flow:
13
94
  1. ContextBuilder.build_from_headers() extracts user_id, session_id from headers
@@ -25,9 +106,10 @@ Context Building Flow:
25
106
  Headers Mapping
26
107
  X-User-Id → AgentContext.user_id
27
108
  X-Tenant-Id → AgentContext.tenant_id
28
- X-Session-Id → AgentContext.session_id
109
+ X-Session-Id → AgentContext.session_id (use UUID for new sessions)
29
110
  X-Model-Name → AgentContext.default_model (overrides body.model)
30
111
  X-Agent-Schema → AgentContext.agent_schema_uri (defaults to 'rem')
112
+ X-Is-Eval → AgentContext.is_eval (sets session mode to EVALUATION)
31
113
 
32
114
  Default Agent:
33
115
  If X-Agent-Schema header is not provided, the system loads 'rem' schema,
@@ -42,6 +124,7 @@ Example Request:
42
124
  POST /api/v1/chat/completions
43
125
  X-Tenant-Id: acme-corp
44
126
  X-User-Id: user123
127
+ X-Session-Id: a1b2c3d4-e5f6-7890-abcd-ef1234567890 # UUID
45
128
  X-Agent-Schema: rem # Optional, this is the default
46
129
 
47
130
  {
@@ -67,7 +150,9 @@ from loguru import logger
67
150
  from ....agentic.context import AgentContext
68
151
  from ....agentic.context_builder import ContextBuilder
69
152
  from ....agentic.providers.pydantic_ai import create_agent
153
+ from ....models.entities.session import Session, SessionMode
70
154
  from ....services.audio.transcriber import AudioTranscriber
155
+ from ....services.postgres.repository import Repository
71
156
  from ....services.session import SessionMessageStore, reload_session
72
157
  from ....settings import settings
73
158
  from ....utils.schema_loader import load_agent_schema, load_agent_schema_async
@@ -87,6 +172,105 @@ router = APIRouter(prefix="/api/v1", tags=["chat"])
87
172
  DEFAULT_AGENT_SCHEMA = "rem"
88
173
 
89
174
 
175
+ def get_current_trace_context() -> tuple[str | None, str | None]:
176
+ """Get trace_id and span_id from current OTEL context.
177
+
178
+ Returns:
179
+ Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
180
+ """
181
+ try:
182
+ from opentelemetry import trace
183
+ span = trace.get_current_span()
184
+ if span and span.get_span_context().is_valid:
185
+ ctx = span.get_span_context()
186
+ trace_id = format(ctx.trace_id, '032x')
187
+ span_id = format(ctx.span_id, '016x')
188
+ return trace_id, span_id
189
+ except Exception:
190
+ pass
191
+ return None, None
192
+
193
+
194
+ def get_tracer():
195
+ """Get the OpenTelemetry tracer for chat completions."""
196
+ try:
197
+ from opentelemetry import trace
198
+ return trace.get_tracer("rem.chat.completions")
199
+ except Exception:
200
+ return None
201
+
202
+
203
+ async def ensure_session_with_metadata(
204
+ session_id: str,
205
+ user_id: str | None,
206
+ tenant_id: str,
207
+ is_eval: bool,
208
+ request_metadata: dict[str, str] | None,
209
+ agent_schema: str | None = None,
210
+ ) -> None:
211
+ """
212
+ Ensure session exists and update with metadata/mode.
213
+
214
+ If X-Is-Eval header is true, sets session mode to EVALUATION.
215
+ Merges request metadata with existing session metadata.
216
+
217
+ Args:
218
+ session_id: Session identifier (maps to Session.name)
219
+ user_id: User identifier
220
+ tenant_id: Tenant identifier
221
+ is_eval: Whether this is an evaluation session
222
+ request_metadata: Metadata from request body to merge
223
+ agent_schema: Optional agent schema being used
224
+ """
225
+ if not settings.postgres.enabled:
226
+ return
227
+
228
+ try:
229
+ repo = Repository(Session, table_name="sessions")
230
+
231
+ # Try to load existing session by name (session_id is the name field)
232
+ existing_list = await repo.find(
233
+ filters={"name": session_id, "tenant_id": tenant_id},
234
+ limit=1,
235
+ )
236
+ existing = existing_list[0] if existing_list else None
237
+
238
+ if existing:
239
+ # Merge metadata if provided
240
+ merged_metadata = existing.metadata or {}
241
+ if request_metadata:
242
+ merged_metadata.update(request_metadata)
243
+
244
+ # Update session if eval flag or new metadata
245
+ needs_update = False
246
+ if is_eval and existing.mode != SessionMode.EVALUATION:
247
+ existing.mode = SessionMode.EVALUATION
248
+ needs_update = True
249
+ if request_metadata:
250
+ existing.metadata = merged_metadata
251
+ needs_update = True
252
+
253
+ if needs_update:
254
+ await repo.upsert(existing)
255
+ logger.debug(f"Updated session {session_id} (eval={is_eval}, metadata keys={list(merged_metadata.keys())})")
256
+ else:
257
+ # Create new session
258
+ session = Session(
259
+ name=session_id,
260
+ mode=SessionMode.EVALUATION if is_eval else SessionMode.NORMAL,
261
+ user_id=user_id,
262
+ tenant_id=tenant_id,
263
+ agent_schema_uri=agent_schema,
264
+ metadata=request_metadata or {},
265
+ )
266
+ await repo.upsert(session)
267
+ logger.info(f"Created session {session_id} (eval={is_eval})")
268
+
269
+ except Exception as e:
270
+ # Non-critical - log but don't fail the request
271
+ logger.error(f"Failed to ensure session metadata: {e}", exc_info=True)
272
+
273
+
90
274
  @router.post("/chat/completions", response_model=None)
91
275
  async def chat_completions(body: ChatCompletionRequest, request: Request):
92
276
  """
@@ -102,6 +286,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
102
286
  | X-Tenant-Id | Tenant identifier (multi-tenancy) | AgentContext.tenant_id | "default" |
103
287
  | X-Session-Id | Session/conversation identifier | AgentContext.session_id | None |
104
288
  | X-Agent-Schema | Agent schema name | AgentContext.agent_schema_uri | "rem" |
289
+ | X-Is-Eval | Mark as evaluation session | AgentContext.is_eval | false |
290
+
291
+ Additional OpenAI-compatible Body Fields:
292
+ - metadata: Key-value pairs merged with session metadata (max 16 keys)
293
+ - store: Whether to store for distillation/evaluation
294
+ - max_completion_tokens: Max tokens to generate (replaces max_tokens)
295
+ - seed: Seed for deterministic sampling
296
+ - top_p: Nucleus sampling probability
297
+ - logprobs: Return log probabilities
298
+ - reasoning_effort: low/medium/high for o-series models
299
+ - service_tier: auto/flex/priority/default
105
300
 
106
301
  Example Models:
107
302
  - anthropic:claude-sonnet-4-5-20250929 (Claude 4.5 Sonnet)
@@ -127,6 +322,12 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
127
322
  - If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
128
323
  - New messages saved to database with compression for session continuity
129
324
  - When Postgres is disabled, session management is skipped
325
+
326
+ Evaluation Sessions:
327
+ - Set X-Is-Eval: true header to mark session as evaluation
328
+ - Session mode will be set to EVALUATION
329
+ - Request metadata is merged with session metadata
330
+ - Useful for A/B testing, model comparison, and feedback collection
130
331
  """
131
332
  # Load agent schema: use header value from context or default
132
333
  # Extract AgentContext first to get schema name
@@ -151,6 +352,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
151
352
  new_messages=new_messages,
152
353
  )
153
354
 
355
+ # Ensure session exists with metadata and eval mode if applicable
356
+ if context.session_id:
357
+ await ensure_session_with_metadata(
358
+ session_id=context.session_id,
359
+ user_id=context.user_id,
360
+ tenant_id=context.tenant_id,
361
+ is_eval=context.is_eval,
362
+ request_metadata=body.metadata,
363
+ agent_schema="simulator",
364
+ )
365
+
154
366
  # Get the last user message as prompt
155
367
  prompt = body.messages[-1].content if body.messages else "demo"
156
368
  request_id = f"sim-{uuid.uuid4().hex[:24]}"
@@ -301,6 +513,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
301
513
 
302
514
  logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
303
515
 
516
+ # Ensure session exists with metadata and eval mode if applicable
517
+ if context.session_id:
518
+ await ensure_session_with_metadata(
519
+ session_id=context.session_id,
520
+ user_id=context.user_id,
521
+ tenant_id=context.tenant_id,
522
+ is_eval=context.is_eval,
523
+ request_metadata=body.metadata,
524
+ agent_schema=schema_name,
525
+ )
526
+
304
527
  # Create agent with schema and model override
305
528
  agent = await create_agent(
306
529
  context=context,
@@ -351,7 +574,26 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
351
574
  )
352
575
 
353
576
  # Non-streaming mode
354
- result = await agent.run(prompt)
577
+ # Create a parent span to capture trace context for message storage
578
+ trace_id, span_id = None, None
579
+ tracer = get_tracer()
580
+
581
+ if tracer:
582
+ with tracer.start_as_current_span(
583
+ "chat_completion",
584
+ attributes={
585
+ "session.id": context.session_id or "",
586
+ "user.id": context.user_id or "",
587
+ "model": body.model,
588
+ "agent.schema": context.agent_schema_uri or DEFAULT_AGENT_SCHEMA,
589
+ }
590
+ ) as span:
591
+ # Capture trace context from the span we just created
592
+ trace_id, span_id = get_current_trace_context()
593
+ result = await agent.run(prompt)
594
+ else:
595
+ # No tracer available, run without tracing
596
+ result = await agent.run(prompt)
355
597
 
356
598
  # Determine content format based on response_format request
357
599
  if body.response_format and body.response_format.type == "json_object":
@@ -374,12 +616,16 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
374
616
  "role": "user",
375
617
  "content": body.messages[-1].content if body.messages else "",
376
618
  "timestamp": datetime.utcnow().isoformat(),
619
+ "trace_id": trace_id,
620
+ "span_id": span_id,
377
621
  }
378
622
 
379
623
  assistant_message = {
380
624
  "role": "assistant",
381
625
  "content": content,
382
626
  "timestamp": datetime.utcnow().isoformat(),
627
+ "trace_id": trace_id,
628
+ "span_id": span_id,
383
629
  }
384
630
 
385
631
  try: