remdb 0.3.180__py3-none-any.whl → 0.3.258__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. rem/agentic/README.md +36 -2
  2. rem/agentic/__init__.py +10 -1
  3. rem/agentic/context.py +185 -1
  4. rem/agentic/context_builder.py +56 -35
  5. rem/agentic/mcp/tool_wrapper.py +2 -2
  6. rem/agentic/providers/pydantic_ai.py +303 -111
  7. rem/agentic/schema.py +2 -2
  8. rem/api/main.py +1 -1
  9. rem/api/mcp_router/resources.py +223 -0
  10. rem/api/mcp_router/server.py +4 -0
  11. rem/api/mcp_router/tools.py +608 -166
  12. rem/api/routers/admin.py +30 -4
  13. rem/api/routers/auth.py +219 -20
  14. rem/api/routers/chat/child_streaming.py +393 -0
  15. rem/api/routers/chat/completions.py +77 -40
  16. rem/api/routers/chat/sse_events.py +7 -3
  17. rem/api/routers/chat/streaming.py +381 -291
  18. rem/api/routers/chat/streaming_utils.py +325 -0
  19. rem/api/routers/common.py +18 -0
  20. rem/api/routers/dev.py +7 -1
  21. rem/api/routers/feedback.py +11 -3
  22. rem/api/routers/messages.py +176 -38
  23. rem/api/routers/models.py +9 -1
  24. rem/api/routers/query.py +17 -15
  25. rem/api/routers/shared_sessions.py +16 -0
  26. rem/auth/jwt.py +19 -4
  27. rem/auth/middleware.py +42 -28
  28. rem/cli/README.md +62 -0
  29. rem/cli/commands/ask.py +205 -114
  30. rem/cli/commands/db.py +55 -31
  31. rem/cli/commands/experiments.py +1 -1
  32. rem/cli/commands/process.py +179 -43
  33. rem/cli/commands/query.py +109 -0
  34. rem/cli/commands/session.py +117 -0
  35. rem/cli/main.py +2 -0
  36. rem/models/core/experiment.py +1 -1
  37. rem/models/entities/ontology.py +18 -20
  38. rem/models/entities/session.py +1 -0
  39. rem/schemas/agents/core/agent-builder.yaml +1 -1
  40. rem/schemas/agents/rem.yaml +1 -1
  41. rem/schemas/agents/test_orchestrator.yaml +42 -0
  42. rem/schemas/agents/test_structured_output.yaml +52 -0
  43. rem/services/content/providers.py +151 -49
  44. rem/services/content/service.py +18 -5
  45. rem/services/embeddings/worker.py +26 -12
  46. rem/services/postgres/__init__.py +28 -3
  47. rem/services/postgres/diff_service.py +57 -5
  48. rem/services/postgres/programmable_diff_service.py +635 -0
  49. rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
  50. rem/services/postgres/register_type.py +11 -10
  51. rem/services/postgres/repository.py +39 -28
  52. rem/services/postgres/schema_generator.py +5 -5
  53. rem/services/postgres/sql_builder.py +6 -5
  54. rem/services/rem/README.md +4 -3
  55. rem/services/rem/parser.py +7 -10
  56. rem/services/rem/service.py +47 -0
  57. rem/services/session/__init__.py +8 -1
  58. rem/services/session/compression.py +47 -5
  59. rem/services/session/pydantic_messages.py +310 -0
  60. rem/services/session/reload.py +2 -1
  61. rem/settings.py +92 -7
  62. rem/sql/migrations/001_install.sql +125 -7
  63. rem/sql/migrations/002_install_models.sql +159 -149
  64. rem/sql/migrations/004_cache_system.sql +10 -276
  65. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  66. rem/utils/schema_loader.py +180 -120
  67. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/METADATA +7 -6
  68. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/RECORD +70 -61
  69. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
  70. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,393 @@
1
+ """
2
+ Child Agent Event Handling.
3
+
4
+ Handles events from child agents during multi-agent orchestration.
5
+
6
+ Event Flow:
7
+ ```
8
+ Parent Agent (Orchestrator)
9
+
10
+
11
+ ask_agent tool
12
+
13
+ ├──────────────────────────────────┐
14
+ ▼ │
15
+ Child Agent (intake_diverge) │
16
+ │ │
17
+ ├── child_tool_start ──────────────┼──► Event Sink (Queue)
18
+ ├── child_content ─────────────────┤
19
+ └── child_tool_result ─────────────┘
20
+
21
+
22
+ drain_child_events()
23
+
24
+ ├── SSE to client
25
+ └── DB persistence
26
+ ```
27
+
28
+ IMPORTANT: When child_content is streamed, parent text output should be SKIPPED
29
+ to prevent content duplication.
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ import asyncio
35
+ import json
36
+ import uuid
37
+ from typing import TYPE_CHECKING, Any, AsyncGenerator
38
+
39
+ from loguru import logger
40
+
41
+ from .streaming_utils import StreamingState, build_content_chunk
42
+ from .sse_events import MetadataEvent, ToolCallEvent, format_sse_event
43
+ from ....services.session import SessionMessageStore
44
+ from ....settings import settings
45
+ from ....utils.date_utils import to_iso, utc_now
46
+
47
+ if TYPE_CHECKING:
48
+ from ....agentic.context import AgentContext
49
+
50
+
51
+ async def handle_child_tool_start(
52
+ state: StreamingState,
53
+ child_agent: str,
54
+ tool_name: str,
55
+ arguments: dict | str | None,
56
+ session_id: str | None,
57
+ user_id: str | None,
58
+ ) -> AsyncGenerator[str, None]:
59
+ """
60
+ Handle child_tool_start event.
61
+
62
+ Actions:
63
+ 1. Log the tool call
64
+ 2. Emit SSE event
65
+ 3. Save to database (with tool_arguments in metadata for consistency with parent)
66
+ """
67
+ full_tool_name = f"{child_agent}:{tool_name}"
68
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
69
+
70
+ # Normalize arguments - may come as JSON string from ToolCallPart.args
71
+ if isinstance(arguments, str):
72
+ try:
73
+ arguments = json.loads(arguments)
74
+ except json.JSONDecodeError:
75
+ arguments = None
76
+ elif not isinstance(arguments, dict):
77
+ arguments = None
78
+
79
+ # 1. LOG
80
+ logger.info(f"🔧 {full_tool_name}")
81
+
82
+ # 2. EMIT SSE
83
+ yield format_sse_event(ToolCallEvent(
84
+ tool_name=full_tool_name,
85
+ tool_id=tool_id,
86
+ status="started",
87
+ arguments=arguments,
88
+ ))
89
+
90
+ # 3. SAVE TO DB - content contains args as JSON (pydantic_messages.py parses it)
91
+ if session_id and settings.postgres.enabled:
92
+ try:
93
+ store = SessionMessageStore(
94
+ user_id=user_id or settings.test.effective_user_id
95
+ )
96
+ tool_msg = {
97
+ "role": "tool",
98
+ # Content is the tool call args as JSON - this is what the agent sees on reload
99
+ # and what pydantic_messages.py parses for ToolCallPart.args
100
+ "content": json.dumps(arguments) if arguments else "",
101
+ "timestamp": to_iso(utc_now()),
102
+ "tool_call_id": tool_id,
103
+ "tool_name": full_tool_name,
104
+ }
105
+ await store.store_session_messages(
106
+ session_id=session_id,
107
+ messages=[tool_msg],
108
+ user_id=user_id,
109
+ compress=False,
110
+ )
111
+ except Exception as e:
112
+ logger.warning(f"Failed to save child tool call: {e}")
113
+
114
+
115
+ def handle_child_content(
116
+ state: StreamingState,
117
+ child_agent: str,
118
+ content: str,
119
+ ) -> str | None:
120
+ """
121
+ Handle child_content event.
122
+
123
+ CRITICAL: Sets state.child_content_streamed = True
124
+ This flag is used to skip parent text output and prevent duplication.
125
+
126
+ Returns:
127
+ SSE chunk or None if content is empty
128
+ """
129
+ if not content:
130
+ return None
131
+
132
+ # Track that child content was streamed
133
+ # Parent text output should be SKIPPED when this is True
134
+ state.child_content_streamed = True
135
+ state.responding_agent = child_agent
136
+
137
+ return build_content_chunk(state, content)
138
+
139
+
140
+ async def handle_child_tool_result(
141
+ state: StreamingState,
142
+ child_agent: str,
143
+ result: Any,
144
+ message_id: str | None,
145
+ session_id: str | None,
146
+ agent_schema: str | None,
147
+ ) -> AsyncGenerator[str, None]:
148
+ """
149
+ Handle child_tool_result event.
150
+
151
+ Actions:
152
+ 1. Log metadata if present
153
+ 2. Emit metadata event if present
154
+ 3. Emit tool completion event
155
+ """
156
+ # Check for metadata registration
157
+ if isinstance(result, dict) and result.get("_metadata_event"):
158
+ risk = result.get("risk_level", "")
159
+ conf = result.get("confidence", "")
160
+ logger.info(f"📊 {child_agent} metadata: risk={risk}, confidence={conf}")
161
+
162
+ # Update responding agent from child
163
+ if result.get("agent_schema"):
164
+ state.responding_agent = result.get("agent_schema")
165
+
166
+ # Build extra dict with risk fields
167
+ extra_data = {}
168
+ if risk:
169
+ extra_data["risk_level"] = risk
170
+
171
+ yield format_sse_event(MetadataEvent(
172
+ message_id=message_id,
173
+ session_id=session_id,
174
+ agent_schema=agent_schema,
175
+ responding_agent=state.responding_agent,
176
+ confidence=result.get("confidence"),
177
+ extra=extra_data if extra_data else None,
178
+ ))
179
+
180
+ # Emit tool completion
181
+ # Preserve full result for dict/list types (needed for frontend)
182
+ if isinstance(result, (dict, list)):
183
+ result_for_sse = result
184
+ else:
185
+ result_for_sse = str(result) if result else None
186
+
187
+ yield format_sse_event(ToolCallEvent(
188
+ tool_name=f"{child_agent}:tool",
189
+ tool_id=f"call_{uuid.uuid4().hex[:8]}",
190
+ status="completed",
191
+ result=result_for_sse,
192
+ ))
193
+
194
+
195
+ async def drain_child_events(
196
+ event_sink: asyncio.Queue,
197
+ state: StreamingState,
198
+ session_id: str | None = None,
199
+ user_id: str | None = None,
200
+ message_id: str | None = None,
201
+ agent_schema: str | None = None,
202
+ ) -> AsyncGenerator[str, None]:
203
+ """
204
+ Drain all pending child events from the event sink.
205
+
206
+ This is called during tool execution to process events
207
+ pushed by child agents via ask_agent.
208
+
209
+ IMPORTANT: When child_content events are processed, this sets
210
+ state.child_content_streamed = True. Callers should check this
211
+ flag and skip parent text output to prevent duplication.
212
+ """
213
+ while not event_sink.empty():
214
+ try:
215
+ child_event = event_sink.get_nowait()
216
+ async for chunk in process_child_event(
217
+ child_event, state, session_id, user_id, message_id, agent_schema
218
+ ):
219
+ yield chunk
220
+ except Exception as e:
221
+ logger.warning(f"Error processing child event: {e}")
222
+
223
+
224
+ async def process_child_event(
225
+ child_event: dict,
226
+ state: StreamingState,
227
+ session_id: str | None = None,
228
+ user_id: str | None = None,
229
+ message_id: str | None = None,
230
+ agent_schema: str | None = None,
231
+ ) -> AsyncGenerator[str, None]:
232
+ """Process a single child event and yield SSE chunks."""
233
+ event_type = child_event.get("type", "")
234
+ child_agent = child_event.get("agent_name", "child")
235
+
236
+ if event_type == "child_tool_start":
237
+ async for chunk in handle_child_tool_start(
238
+ state=state,
239
+ child_agent=child_agent,
240
+ tool_name=child_event.get("tool_name", "tool"),
241
+ arguments=child_event.get("arguments"),
242
+ session_id=session_id,
243
+ user_id=user_id,
244
+ ):
245
+ yield chunk
246
+
247
+ elif event_type == "child_content":
248
+ chunk = handle_child_content(
249
+ state=state,
250
+ child_agent=child_agent,
251
+ content=child_event.get("content", ""),
252
+ )
253
+ if chunk:
254
+ yield chunk
255
+
256
+ elif event_type == "child_tool_result":
257
+ async for chunk in handle_child_tool_result(
258
+ state=state,
259
+ child_agent=child_agent,
260
+ result=child_event.get("result"),
261
+ message_id=message_id,
262
+ session_id=session_id,
263
+ agent_schema=agent_schema,
264
+ ):
265
+ yield chunk
266
+
267
+
268
+ async def stream_with_child_events(
269
+ tools_stream,
270
+ child_event_sink: asyncio.Queue,
271
+ state: StreamingState,
272
+ session_id: str | None = None,
273
+ user_id: str | None = None,
274
+ message_id: str | None = None,
275
+ agent_schema: str | None = None,
276
+ ) -> AsyncGenerator[tuple[str, Any], None]:
277
+ """
278
+ Multiplex tool events with child events using asyncio.wait().
279
+
280
+ This is the key fix for child agent streaming - instead of draining
281
+ the queue synchronously during tool event iteration, we concurrently
282
+ listen to both sources and yield events as they arrive.
283
+
284
+ Yields:
285
+ Tuples of (event_type, event_data) where event_type is either
286
+ "tool" or "child", allowing the caller to handle each appropriately.
287
+ """
288
+ tool_iter = tools_stream.__aiter__()
289
+
290
+ # Create initial tasks
291
+ pending_tool: asyncio.Task | None = None
292
+ pending_child: asyncio.Task | None = None
293
+
294
+ try:
295
+ pending_tool = asyncio.create_task(tool_iter.__anext__())
296
+ except StopAsyncIteration:
297
+ # No tool events, just drain any remaining child events
298
+ while not child_event_sink.empty():
299
+ try:
300
+ child_event = child_event_sink.get_nowait()
301
+ yield ("child", child_event)
302
+ except asyncio.QueueEmpty:
303
+ break
304
+ return
305
+
306
+ # Start listening for child events with a short timeout
307
+ pending_child = asyncio.create_task(
308
+ _get_child_event_with_timeout(child_event_sink, timeout=0.05)
309
+ )
310
+
311
+ try:
312
+ while True:
313
+ # Wait for either source to produce an event
314
+ tasks = {t for t in [pending_tool, pending_child] if t is not None}
315
+ if not tasks:
316
+ break
317
+
318
+ done, _ = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
319
+
320
+ for task in done:
321
+ try:
322
+ result = task.result()
323
+ except asyncio.TimeoutError:
324
+ # Child queue timeout - restart listener
325
+ if task is pending_child:
326
+ pending_child = asyncio.create_task(
327
+ _get_child_event_with_timeout(child_event_sink, timeout=0.05)
328
+ )
329
+ continue
330
+ except StopAsyncIteration:
331
+ # Tool stream exhausted
332
+ if task is pending_tool:
333
+ pending_tool = None
334
+ # Final drain of any remaining child events
335
+ if pending_child:
336
+ pending_child.cancel()
337
+ try:
338
+ await pending_child
339
+ except asyncio.CancelledError:
340
+ pass
341
+ while not child_event_sink.empty():
342
+ try:
343
+ child_event = child_event_sink.get_nowait()
344
+ yield ("child", child_event)
345
+ except asyncio.QueueEmpty:
346
+ break
347
+ return
348
+ continue
349
+
350
+ if task is pending_child and result is not None:
351
+ # Got a child event
352
+ yield ("child", result)
353
+ # Restart child listener
354
+ pending_child = asyncio.create_task(
355
+ _get_child_event_with_timeout(child_event_sink, timeout=0.05)
356
+ )
357
+ elif task is pending_tool:
358
+ # Got a tool event
359
+ yield ("tool", result)
360
+ # Get next tool event
361
+ try:
362
+ pending_tool = asyncio.create_task(tool_iter.__anext__())
363
+ except StopAsyncIteration:
364
+ pending_tool = None
365
+ elif task is pending_child and result is None:
366
+ # Timeout with no event - restart listener
367
+ pending_child = asyncio.create_task(
368
+ _get_child_event_with_timeout(child_event_sink, timeout=0.05)
369
+ )
370
+ finally:
371
+ # Cleanup any pending tasks
372
+ for task in [pending_tool, pending_child]:
373
+ if task and not task.done():
374
+ task.cancel()
375
+ try:
376
+ await task
377
+ except asyncio.CancelledError:
378
+ pass
379
+
380
+
381
+ async def _get_child_event_with_timeout(
382
+ queue: asyncio.Queue, timeout: float = 0.05
383
+ ) -> dict | None:
384
+ """
385
+ Get an event from the queue with a timeout.
386
+
387
+ Returns None on timeout (no event available).
388
+ This allows the multiplexer to check for tool events regularly.
389
+ """
390
+ try:
391
+ return await asyncio.wait_for(queue.get(), timeout=timeout)
392
+ except asyncio.TimeoutError:
393
+ return None
@@ -16,11 +16,11 @@ IMPORTANT: Session IDs MUST be UUIDs. Non-UUID session IDs will cause message
16
16
  kubectl port-forward -n observability svc/otel-collector-collector 4318:4318
17
17
 
18
18
  # Terminal 2: Phoenix UI - view traces at http://localhost:6006
19
- kubectl port-forward -n siggy svc/phoenix 6006:6006
19
+ kubectl port-forward -n rem svc/phoenix 6006:6006
20
20
 
21
21
  2. Get Phoenix API Key (REQUIRED for feedback->Phoenix sync):
22
22
 
23
- export PHOENIX_API_KEY=$(kubectl get secret -n siggy rem-phoenix-api-key \\
23
+ export PHOENIX_API_KEY=$(kubectl get secret -n rem rem-phoenix-api-key \\
24
24
  -o jsonpath='{.data.PHOENIX_API_KEY}' | base64 -d)
25
25
 
26
26
  3. Start API with OTEL and Phoenix enabled:
@@ -70,7 +70,7 @@ OTEL Architecture
70
70
  =================
71
71
 
72
72
  REM API --[OTLP/HTTP]--> OTEL Collector --[relay]--> Phoenix
73
- (port 4318) (k8s: observability) (k8s: siggy)
73
+ (port 4318) (k8s: observability) (k8s: rem)
74
74
 
75
75
  Environment Variables:
76
76
  OTEL__ENABLED=true Enable OTEL tracing (required for trace capture)
@@ -164,7 +164,7 @@ from .models import (
164
164
  ChatCompletionUsage,
165
165
  ChatMessage,
166
166
  )
167
- from .streaming import stream_openai_response, stream_openai_response_with_save, stream_simulator_response
167
+ from .streaming import stream_openai_response, stream_openai_response_with_save, stream_simulator_response, save_user_message
168
168
 
169
169
  router = APIRouter(prefix="/api/v1", tags=["chat"])
170
170
 
@@ -215,7 +215,7 @@ async def ensure_session_with_metadata(
215
215
  Merges request metadata with existing session metadata.
216
216
 
217
217
  Args:
218
- session_id: Session identifier (maps to Session.name)
218
+ session_id: Session UUID from X-Session-Id header
219
219
  user_id: User identifier
220
220
  tenant_id: Tenant identifier
221
221
  is_eval: Whether this is an evaluation session
@@ -228,12 +228,8 @@ async def ensure_session_with_metadata(
228
228
  try:
229
229
  repo = Repository(Session, table_name="sessions")
230
230
 
231
- # Try to load existing session by name (session_id is the name field)
232
- existing_list = await repo.find(
233
- filters={"name": session_id, "tenant_id": tenant_id},
234
- limit=1,
235
- )
236
- existing = existing_list[0] if existing_list else None
231
+ # Look up session by UUID (id field)
232
+ existing = await repo.get_by_id(session_id)
237
233
 
238
234
  if existing:
239
235
  # Merge metadata if provided
@@ -254,9 +250,10 @@ async def ensure_session_with_metadata(
254
250
  await repo.upsert(existing)
255
251
  logger.debug(f"Updated session {session_id} (eval={is_eval}, metadata keys={list(merged_metadata.keys())})")
256
252
  else:
257
- # Create new session
253
+ # Create new session with the provided UUID as the id
258
254
  session = Session(
259
- name=session_id,
255
+ id=session_id, # Use the provided UUID as session id
256
+ name=session_id, # Default name to UUID, can be updated later with LLM-generated name
260
257
  mode=SessionMode.EVALUATION if is_eval else SessionMode.NORMAL,
261
258
  user_id=user_id,
262
259
  tenant_id=tenant_id,
@@ -503,16 +500,51 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
503
500
  logger.error(f"Failed to transcribe audio: {e}")
504
501
  # Fall through with original content (will likely fail at agent)
505
502
 
506
- # Use ContextBuilder to construct complete message list with:
507
- # 1. System context hint (date + user profile)
508
- # 2. Session history (if session_id provided)
509
- # 3. New messages from request body (transcribed if audio)
503
+ # Use ContextBuilder to construct context and basic messages
504
+ # Note: We load session history separately for proper pydantic-ai message_history
510
505
  context, messages = await ContextBuilder.build_from_headers(
511
506
  headers=dict(request.headers),
512
507
  new_messages=new_messages,
513
508
  user_id=temp_context.user_id, # From JWT token (source of truth)
514
509
  )
515
510
 
511
+ # Load raw session history for proper pydantic-ai message_history format
512
+ # This enables proper tool call/return pairing for LLM API compatibility
513
+ from ....services.session import SessionMessageStore, session_to_pydantic_messages, audit_session_history
514
+ from ....agentic.schema import get_system_prompt
515
+
516
+ pydantic_message_history = None
517
+ if context.session_id and settings.postgres.enabled:
518
+ try:
519
+ store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
520
+ raw_session_history = await store.load_session_messages(
521
+ session_id=context.session_id,
522
+ user_id=context.user_id,
523
+ compress_on_load=False, # Don't compress - we need full data for reconstruction
524
+ )
525
+ if raw_session_history:
526
+ # CRITICAL: Extract and pass the agent's system prompt
527
+ # pydantic-ai only auto-adds system prompts when message_history is empty
528
+ # When we pass message_history, we must include the system prompt ourselves
529
+ agent_system_prompt = get_system_prompt(agent_schema) if agent_schema else None
530
+ pydantic_message_history = session_to_pydantic_messages(
531
+ raw_session_history,
532
+ system_prompt=agent_system_prompt,
533
+ )
534
+ logger.debug(f"Converted {len(raw_session_history)} session messages to {len(pydantic_message_history)} pydantic-ai messages (with system prompt)")
535
+
536
+ # Audit session history if enabled (for debugging)
537
+ audit_session_history(
538
+ session_id=context.session_id,
539
+ agent_name=schema_name or "default",
540
+ prompt=body.messages[-1].content if body.messages else "",
541
+ raw_session_history=raw_session_history,
542
+ pydantic_messages_count=len(pydantic_message_history),
543
+ )
544
+ except Exception as e:
545
+ logger.warning(f"Failed to load session history for message_history: {e}")
546
+ # Fall back to old behavior (concatenated prompt)
547
+
516
548
  logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
517
549
 
518
550
  # Ensure session exists with metadata and eval mode if applicable
@@ -533,33 +565,30 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
533
565
  model_override=body.model, # type: ignore[arg-type]
534
566
  )
535
567
 
536
- # Combine all messages into single prompt for agent
537
- # ContextBuilder already assembled: system context + history + new messages
538
- prompt = "\n".join(msg.content for msg in messages)
568
+ # Build the prompt for the agent
569
+ # If we have proper message_history, use just the latest user message as prompt
570
+ # Otherwise, fall back to concatenating all messages (legacy behavior)
571
+ if pydantic_message_history:
572
+ # Use the latest user message as the prompt, with history passed separately
573
+ user_prompt = body.messages[-1].content if body.messages else ""
574
+ prompt = user_prompt
575
+ logger.debug(f"Using message_history with {len(pydantic_message_history)} messages")
576
+ else:
577
+ # Legacy: Combine all messages into single prompt for agent
578
+ prompt = "\n".join(msg.content for msg in messages)
539
579
 
540
580
  # Generate OpenAI-compatible request ID
541
581
  request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
542
582
 
543
583
  # Streaming mode
544
584
  if body.stream:
545
- # Save user message before streaming starts
546
- if settings.postgres.enabled and context.session_id:
547
- user_message = {
548
- "role": "user",
549
- "content": body.messages[-1].content if body.messages else "",
550
- "timestamp": datetime.utcnow().isoformat(),
551
- }
552
- try:
553
- store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
554
- await store.store_session_messages(
555
- session_id=context.session_id,
556
- messages=[user_message],
557
- user_id=context.user_id,
558
- compress=False, # User messages are typically short
559
- )
560
- logger.debug(f"Saved user message to session {context.session_id}")
561
- except Exception as e:
562
- logger.error(f"Failed to save user message: {e}", exc_info=True)
585
+ # Save user message before streaming starts (using shared utility)
586
+ if context.session_id:
587
+ await save_user_message(
588
+ session_id=context.session_id,
589
+ user_id=context.user_id,
590
+ content=body.messages[-1].content if body.messages else "",
591
+ )
563
592
 
564
593
  return StreamingResponse(
565
594
  stream_openai_response_with_save(
@@ -570,6 +599,8 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
570
599
  agent_schema=schema_name,
571
600
  session_id=context.session_id,
572
601
  user_id=context.user_id,
602
+ agent_context=context, # Pass context for multi-agent support
603
+ message_history=pydantic_message_history, # Native pydantic-ai message history
573
604
  ),
574
605
  media_type="text/event-stream",
575
606
  headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
@@ -592,10 +623,16 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
592
623
  ) as span:
593
624
  # Capture trace context from the span we just created
594
625
  trace_id, span_id = get_current_trace_context()
595
- result = await agent.run(prompt)
626
+ if pydantic_message_history:
627
+ result = await agent.run(prompt, message_history=pydantic_message_history)
628
+ else:
629
+ result = await agent.run(prompt)
596
630
  else:
597
631
  # No tracer available, run without tracing
598
- result = await agent.run(prompt)
632
+ if pydantic_message_history:
633
+ result = await agent.run(prompt, message_history=pydantic_message_history)
634
+ else:
635
+ result = await agent.run(prompt)
599
636
 
600
637
  # Determine content format based on response_format request
601
638
  if body.response_format and body.response_format.type == "json_object":
@@ -321,7 +321,11 @@ class MetadataEvent(BaseModel):
321
321
  # Agent info
322
322
  agent_schema: str | None = Field(
323
323
  default=None,
324
- description="Name of the agent schema used for this response (e.g., 'rem', 'query-assistant')"
324
+ description="Name of the top-level agent schema (e.g., 'rem', 'intake')"
325
+ )
326
+ responding_agent: str | None = Field(
327
+ default=None,
328
+ description="Name of the agent that produced this response (may differ from agent_schema if delegated via ask_agent)"
325
329
  )
326
330
 
327
331
  # Session info
@@ -409,9 +413,9 @@ class ToolCallEvent(BaseModel):
409
413
  default=None,
410
414
  description="Tool arguments (for 'started' status)"
411
415
  )
412
- result: str | None = Field(
416
+ result: str | dict[str, Any] | list[Any] | None = Field(
413
417
  default=None,
414
- description="Tool result summary (for 'completed' status)"
418
+ description="Tool result - full dict/list for structured data, string for simple results"
415
419
  )
416
420
  error: str | None = Field(
417
421
  default=None,