remdb 0.3.180__py3-none-any.whl → 0.3.258__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +36 -2
- rem/agentic/__init__.py +10 -1
- rem/agentic/context.py +185 -1
- rem/agentic/context_builder.py +56 -35
- rem/agentic/mcp/tool_wrapper.py +2 -2
- rem/agentic/providers/pydantic_ai.py +303 -111
- rem/agentic/schema.py +2 -2
- rem/api/main.py +1 -1
- rem/api/mcp_router/resources.py +223 -0
- rem/api/mcp_router/server.py +4 -0
- rem/api/mcp_router/tools.py +608 -166
- rem/api/routers/admin.py +30 -4
- rem/api/routers/auth.py +219 -20
- rem/api/routers/chat/child_streaming.py +393 -0
- rem/api/routers/chat/completions.py +77 -40
- rem/api/routers/chat/sse_events.py +7 -3
- rem/api/routers/chat/streaming.py +381 -291
- rem/api/routers/chat/streaming_utils.py +325 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +7 -1
- rem/api/routers/feedback.py +11 -3
- rem/api/routers/messages.py +176 -38
- rem/api/routers/models.py +9 -1
- rem/api/routers/query.py +17 -15
- rem/api/routers/shared_sessions.py +16 -0
- rem/auth/jwt.py +19 -4
- rem/auth/middleware.py +42 -28
- rem/cli/README.md +62 -0
- rem/cli/commands/ask.py +205 -114
- rem/cli/commands/db.py +55 -31
- rem/cli/commands/experiments.py +1 -1
- rem/cli/commands/process.py +179 -43
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/session.py +117 -0
- rem/cli/main.py +2 -0
- rem/models/core/experiment.py +1 -1
- rem/models/entities/ontology.py +18 -20
- rem/models/entities/session.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +1 -1
- rem/schemas/agents/rem.yaml +1 -1
- rem/schemas/agents/test_orchestrator.yaml +42 -0
- rem/schemas/agents/test_structured_output.yaml +52 -0
- rem/services/content/providers.py +151 -49
- rem/services/content/service.py +18 -5
- rem/services/embeddings/worker.py +26 -12
- rem/services/postgres/__init__.py +28 -3
- rem/services/postgres/diff_service.py +57 -5
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
- rem/services/postgres/register_type.py +11 -10
- rem/services/postgres/repository.py +39 -28
- rem/services/postgres/schema_generator.py +5 -5
- rem/services/postgres/sql_builder.py +6 -5
- rem/services/rem/README.md +4 -3
- rem/services/rem/parser.py +7 -10
- rem/services/rem/service.py +47 -0
- rem/services/session/__init__.py +8 -1
- rem/services/session/compression.py +47 -5
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +2 -1
- rem/settings.py +92 -7
- rem/sql/migrations/001_install.sql +125 -7
- rem/sql/migrations/002_install_models.sql +159 -149
- rem/sql/migrations/004_cache_system.sql +10 -276
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/schema_loader.py +180 -120
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/METADATA +7 -6
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/RECORD +70 -61
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Child Agent Event Handling.
|
|
3
|
+
|
|
4
|
+
Handles events from child agents during multi-agent orchestration.
|
|
5
|
+
|
|
6
|
+
Event Flow:
|
|
7
|
+
```
|
|
8
|
+
Parent Agent (Orchestrator)
|
|
9
|
+
│
|
|
10
|
+
▼
|
|
11
|
+
ask_agent tool
|
|
12
|
+
│
|
|
13
|
+
├──────────────────────────────────┐
|
|
14
|
+
▼ │
|
|
15
|
+
Child Agent (intake_diverge) │
|
|
16
|
+
│ │
|
|
17
|
+
├── child_tool_start ──────────────┼──► Event Sink (Queue)
|
|
18
|
+
├── child_content ─────────────────┤
|
|
19
|
+
└── child_tool_result ─────────────┘
|
|
20
|
+
│
|
|
21
|
+
▼
|
|
22
|
+
drain_child_events()
|
|
23
|
+
│
|
|
24
|
+
├── SSE to client
|
|
25
|
+
└── DB persistence
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
IMPORTANT: When child_content is streamed, parent text output should be SKIPPED
|
|
29
|
+
to prevent content duplication.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import asyncio
|
|
35
|
+
import json
|
|
36
|
+
import uuid
|
|
37
|
+
from typing import TYPE_CHECKING, Any, AsyncGenerator
|
|
38
|
+
|
|
39
|
+
from loguru import logger
|
|
40
|
+
|
|
41
|
+
from .streaming_utils import StreamingState, build_content_chunk
|
|
42
|
+
from .sse_events import MetadataEvent, ToolCallEvent, format_sse_event
|
|
43
|
+
from ....services.session import SessionMessageStore
|
|
44
|
+
from ....settings import settings
|
|
45
|
+
from ....utils.date_utils import to_iso, utc_now
|
|
46
|
+
|
|
47
|
+
if TYPE_CHECKING:
|
|
48
|
+
from ....agentic.context import AgentContext
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
async def handle_child_tool_start(
|
|
52
|
+
state: StreamingState,
|
|
53
|
+
child_agent: str,
|
|
54
|
+
tool_name: str,
|
|
55
|
+
arguments: dict | str | None,
|
|
56
|
+
session_id: str | None,
|
|
57
|
+
user_id: str | None,
|
|
58
|
+
) -> AsyncGenerator[str, None]:
|
|
59
|
+
"""
|
|
60
|
+
Handle child_tool_start event.
|
|
61
|
+
|
|
62
|
+
Actions:
|
|
63
|
+
1. Log the tool call
|
|
64
|
+
2. Emit SSE event
|
|
65
|
+
3. Save to database (with tool_arguments in metadata for consistency with parent)
|
|
66
|
+
"""
|
|
67
|
+
full_tool_name = f"{child_agent}:{tool_name}"
|
|
68
|
+
tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
69
|
+
|
|
70
|
+
# Normalize arguments - may come as JSON string from ToolCallPart.args
|
|
71
|
+
if isinstance(arguments, str):
|
|
72
|
+
try:
|
|
73
|
+
arguments = json.loads(arguments)
|
|
74
|
+
except json.JSONDecodeError:
|
|
75
|
+
arguments = None
|
|
76
|
+
elif not isinstance(arguments, dict):
|
|
77
|
+
arguments = None
|
|
78
|
+
|
|
79
|
+
# 1. LOG
|
|
80
|
+
logger.info(f"🔧 {full_tool_name}")
|
|
81
|
+
|
|
82
|
+
# 2. EMIT SSE
|
|
83
|
+
yield format_sse_event(ToolCallEvent(
|
|
84
|
+
tool_name=full_tool_name,
|
|
85
|
+
tool_id=tool_id,
|
|
86
|
+
status="started",
|
|
87
|
+
arguments=arguments,
|
|
88
|
+
))
|
|
89
|
+
|
|
90
|
+
# 3. SAVE TO DB - content contains args as JSON (pydantic_messages.py parses it)
|
|
91
|
+
if session_id and settings.postgres.enabled:
|
|
92
|
+
try:
|
|
93
|
+
store = SessionMessageStore(
|
|
94
|
+
user_id=user_id or settings.test.effective_user_id
|
|
95
|
+
)
|
|
96
|
+
tool_msg = {
|
|
97
|
+
"role": "tool",
|
|
98
|
+
# Content is the tool call args as JSON - this is what the agent sees on reload
|
|
99
|
+
# and what pydantic_messages.py parses for ToolCallPart.args
|
|
100
|
+
"content": json.dumps(arguments) if arguments else "",
|
|
101
|
+
"timestamp": to_iso(utc_now()),
|
|
102
|
+
"tool_call_id": tool_id,
|
|
103
|
+
"tool_name": full_tool_name,
|
|
104
|
+
}
|
|
105
|
+
await store.store_session_messages(
|
|
106
|
+
session_id=session_id,
|
|
107
|
+
messages=[tool_msg],
|
|
108
|
+
user_id=user_id,
|
|
109
|
+
compress=False,
|
|
110
|
+
)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logger.warning(f"Failed to save child tool call: {e}")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def handle_child_content(
|
|
116
|
+
state: StreamingState,
|
|
117
|
+
child_agent: str,
|
|
118
|
+
content: str,
|
|
119
|
+
) -> str | None:
|
|
120
|
+
"""
|
|
121
|
+
Handle child_content event.
|
|
122
|
+
|
|
123
|
+
CRITICAL: Sets state.child_content_streamed = True
|
|
124
|
+
This flag is used to skip parent text output and prevent duplication.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
SSE chunk or None if content is empty
|
|
128
|
+
"""
|
|
129
|
+
if not content:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
# Track that child content was streamed
|
|
133
|
+
# Parent text output should be SKIPPED when this is True
|
|
134
|
+
state.child_content_streamed = True
|
|
135
|
+
state.responding_agent = child_agent
|
|
136
|
+
|
|
137
|
+
return build_content_chunk(state, content)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
async def handle_child_tool_result(
|
|
141
|
+
state: StreamingState,
|
|
142
|
+
child_agent: str,
|
|
143
|
+
result: Any,
|
|
144
|
+
message_id: str | None,
|
|
145
|
+
session_id: str | None,
|
|
146
|
+
agent_schema: str | None,
|
|
147
|
+
) -> AsyncGenerator[str, None]:
|
|
148
|
+
"""
|
|
149
|
+
Handle child_tool_result event.
|
|
150
|
+
|
|
151
|
+
Actions:
|
|
152
|
+
1. Log metadata if present
|
|
153
|
+
2. Emit metadata event if present
|
|
154
|
+
3. Emit tool completion event
|
|
155
|
+
"""
|
|
156
|
+
# Check for metadata registration
|
|
157
|
+
if isinstance(result, dict) and result.get("_metadata_event"):
|
|
158
|
+
risk = result.get("risk_level", "")
|
|
159
|
+
conf = result.get("confidence", "")
|
|
160
|
+
logger.info(f"📊 {child_agent} metadata: risk={risk}, confidence={conf}")
|
|
161
|
+
|
|
162
|
+
# Update responding agent from child
|
|
163
|
+
if result.get("agent_schema"):
|
|
164
|
+
state.responding_agent = result.get("agent_schema")
|
|
165
|
+
|
|
166
|
+
# Build extra dict with risk fields
|
|
167
|
+
extra_data = {}
|
|
168
|
+
if risk:
|
|
169
|
+
extra_data["risk_level"] = risk
|
|
170
|
+
|
|
171
|
+
yield format_sse_event(MetadataEvent(
|
|
172
|
+
message_id=message_id,
|
|
173
|
+
session_id=session_id,
|
|
174
|
+
agent_schema=agent_schema,
|
|
175
|
+
responding_agent=state.responding_agent,
|
|
176
|
+
confidence=result.get("confidence"),
|
|
177
|
+
extra=extra_data if extra_data else None,
|
|
178
|
+
))
|
|
179
|
+
|
|
180
|
+
# Emit tool completion
|
|
181
|
+
# Preserve full result for dict/list types (needed for frontend)
|
|
182
|
+
if isinstance(result, (dict, list)):
|
|
183
|
+
result_for_sse = result
|
|
184
|
+
else:
|
|
185
|
+
result_for_sse = str(result) if result else None
|
|
186
|
+
|
|
187
|
+
yield format_sse_event(ToolCallEvent(
|
|
188
|
+
tool_name=f"{child_agent}:tool",
|
|
189
|
+
tool_id=f"call_{uuid.uuid4().hex[:8]}",
|
|
190
|
+
status="completed",
|
|
191
|
+
result=result_for_sse,
|
|
192
|
+
))
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
async def drain_child_events(
|
|
196
|
+
event_sink: asyncio.Queue,
|
|
197
|
+
state: StreamingState,
|
|
198
|
+
session_id: str | None = None,
|
|
199
|
+
user_id: str | None = None,
|
|
200
|
+
message_id: str | None = None,
|
|
201
|
+
agent_schema: str | None = None,
|
|
202
|
+
) -> AsyncGenerator[str, None]:
|
|
203
|
+
"""
|
|
204
|
+
Drain all pending child events from the event sink.
|
|
205
|
+
|
|
206
|
+
This is called during tool execution to process events
|
|
207
|
+
pushed by child agents via ask_agent.
|
|
208
|
+
|
|
209
|
+
IMPORTANT: When child_content events are processed, this sets
|
|
210
|
+
state.child_content_streamed = True. Callers should check this
|
|
211
|
+
flag and skip parent text output to prevent duplication.
|
|
212
|
+
"""
|
|
213
|
+
while not event_sink.empty():
|
|
214
|
+
try:
|
|
215
|
+
child_event = event_sink.get_nowait()
|
|
216
|
+
async for chunk in process_child_event(
|
|
217
|
+
child_event, state, session_id, user_id, message_id, agent_schema
|
|
218
|
+
):
|
|
219
|
+
yield chunk
|
|
220
|
+
except Exception as e:
|
|
221
|
+
logger.warning(f"Error processing child event: {e}")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
async def process_child_event(
|
|
225
|
+
child_event: dict,
|
|
226
|
+
state: StreamingState,
|
|
227
|
+
session_id: str | None = None,
|
|
228
|
+
user_id: str | None = None,
|
|
229
|
+
message_id: str | None = None,
|
|
230
|
+
agent_schema: str | None = None,
|
|
231
|
+
) -> AsyncGenerator[str, None]:
|
|
232
|
+
"""Process a single child event and yield SSE chunks."""
|
|
233
|
+
event_type = child_event.get("type", "")
|
|
234
|
+
child_agent = child_event.get("agent_name", "child")
|
|
235
|
+
|
|
236
|
+
if event_type == "child_tool_start":
|
|
237
|
+
async for chunk in handle_child_tool_start(
|
|
238
|
+
state=state,
|
|
239
|
+
child_agent=child_agent,
|
|
240
|
+
tool_name=child_event.get("tool_name", "tool"),
|
|
241
|
+
arguments=child_event.get("arguments"),
|
|
242
|
+
session_id=session_id,
|
|
243
|
+
user_id=user_id,
|
|
244
|
+
):
|
|
245
|
+
yield chunk
|
|
246
|
+
|
|
247
|
+
elif event_type == "child_content":
|
|
248
|
+
chunk = handle_child_content(
|
|
249
|
+
state=state,
|
|
250
|
+
child_agent=child_agent,
|
|
251
|
+
content=child_event.get("content", ""),
|
|
252
|
+
)
|
|
253
|
+
if chunk:
|
|
254
|
+
yield chunk
|
|
255
|
+
|
|
256
|
+
elif event_type == "child_tool_result":
|
|
257
|
+
async for chunk in handle_child_tool_result(
|
|
258
|
+
state=state,
|
|
259
|
+
child_agent=child_agent,
|
|
260
|
+
result=child_event.get("result"),
|
|
261
|
+
message_id=message_id,
|
|
262
|
+
session_id=session_id,
|
|
263
|
+
agent_schema=agent_schema,
|
|
264
|
+
):
|
|
265
|
+
yield chunk
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
async def stream_with_child_events(
|
|
269
|
+
tools_stream,
|
|
270
|
+
child_event_sink: asyncio.Queue,
|
|
271
|
+
state: StreamingState,
|
|
272
|
+
session_id: str | None = None,
|
|
273
|
+
user_id: str | None = None,
|
|
274
|
+
message_id: str | None = None,
|
|
275
|
+
agent_schema: str | None = None,
|
|
276
|
+
) -> AsyncGenerator[tuple[str, Any], None]:
|
|
277
|
+
"""
|
|
278
|
+
Multiplex tool events with child events using asyncio.wait().
|
|
279
|
+
|
|
280
|
+
This is the key fix for child agent streaming - instead of draining
|
|
281
|
+
the queue synchronously during tool event iteration, we concurrently
|
|
282
|
+
listen to both sources and yield events as they arrive.
|
|
283
|
+
|
|
284
|
+
Yields:
|
|
285
|
+
Tuples of (event_type, event_data) where event_type is either
|
|
286
|
+
"tool" or "child", allowing the caller to handle each appropriately.
|
|
287
|
+
"""
|
|
288
|
+
tool_iter = tools_stream.__aiter__()
|
|
289
|
+
|
|
290
|
+
# Create initial tasks
|
|
291
|
+
pending_tool: asyncio.Task | None = None
|
|
292
|
+
pending_child: asyncio.Task | None = None
|
|
293
|
+
|
|
294
|
+
try:
|
|
295
|
+
pending_tool = asyncio.create_task(tool_iter.__anext__())
|
|
296
|
+
except StopAsyncIteration:
|
|
297
|
+
# No tool events, just drain any remaining child events
|
|
298
|
+
while not child_event_sink.empty():
|
|
299
|
+
try:
|
|
300
|
+
child_event = child_event_sink.get_nowait()
|
|
301
|
+
yield ("child", child_event)
|
|
302
|
+
except asyncio.QueueEmpty:
|
|
303
|
+
break
|
|
304
|
+
return
|
|
305
|
+
|
|
306
|
+
# Start listening for child events with a short timeout
|
|
307
|
+
pending_child = asyncio.create_task(
|
|
308
|
+
_get_child_event_with_timeout(child_event_sink, timeout=0.05)
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
while True:
|
|
313
|
+
# Wait for either source to produce an event
|
|
314
|
+
tasks = {t for t in [pending_tool, pending_child] if t is not None}
|
|
315
|
+
if not tasks:
|
|
316
|
+
break
|
|
317
|
+
|
|
318
|
+
done, _ = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
|
|
319
|
+
|
|
320
|
+
for task in done:
|
|
321
|
+
try:
|
|
322
|
+
result = task.result()
|
|
323
|
+
except asyncio.TimeoutError:
|
|
324
|
+
# Child queue timeout - restart listener
|
|
325
|
+
if task is pending_child:
|
|
326
|
+
pending_child = asyncio.create_task(
|
|
327
|
+
_get_child_event_with_timeout(child_event_sink, timeout=0.05)
|
|
328
|
+
)
|
|
329
|
+
continue
|
|
330
|
+
except StopAsyncIteration:
|
|
331
|
+
# Tool stream exhausted
|
|
332
|
+
if task is pending_tool:
|
|
333
|
+
pending_tool = None
|
|
334
|
+
# Final drain of any remaining child events
|
|
335
|
+
if pending_child:
|
|
336
|
+
pending_child.cancel()
|
|
337
|
+
try:
|
|
338
|
+
await pending_child
|
|
339
|
+
except asyncio.CancelledError:
|
|
340
|
+
pass
|
|
341
|
+
while not child_event_sink.empty():
|
|
342
|
+
try:
|
|
343
|
+
child_event = child_event_sink.get_nowait()
|
|
344
|
+
yield ("child", child_event)
|
|
345
|
+
except asyncio.QueueEmpty:
|
|
346
|
+
break
|
|
347
|
+
return
|
|
348
|
+
continue
|
|
349
|
+
|
|
350
|
+
if task is pending_child and result is not None:
|
|
351
|
+
# Got a child event
|
|
352
|
+
yield ("child", result)
|
|
353
|
+
# Restart child listener
|
|
354
|
+
pending_child = asyncio.create_task(
|
|
355
|
+
_get_child_event_with_timeout(child_event_sink, timeout=0.05)
|
|
356
|
+
)
|
|
357
|
+
elif task is pending_tool:
|
|
358
|
+
# Got a tool event
|
|
359
|
+
yield ("tool", result)
|
|
360
|
+
# Get next tool event
|
|
361
|
+
try:
|
|
362
|
+
pending_tool = asyncio.create_task(tool_iter.__anext__())
|
|
363
|
+
except StopAsyncIteration:
|
|
364
|
+
pending_tool = None
|
|
365
|
+
elif task is pending_child and result is None:
|
|
366
|
+
# Timeout with no event - restart listener
|
|
367
|
+
pending_child = asyncio.create_task(
|
|
368
|
+
_get_child_event_with_timeout(child_event_sink, timeout=0.05)
|
|
369
|
+
)
|
|
370
|
+
finally:
|
|
371
|
+
# Cleanup any pending tasks
|
|
372
|
+
for task in [pending_tool, pending_child]:
|
|
373
|
+
if task and not task.done():
|
|
374
|
+
task.cancel()
|
|
375
|
+
try:
|
|
376
|
+
await task
|
|
377
|
+
except asyncio.CancelledError:
|
|
378
|
+
pass
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
async def _get_child_event_with_timeout(
|
|
382
|
+
queue: asyncio.Queue, timeout: float = 0.05
|
|
383
|
+
) -> dict | None:
|
|
384
|
+
"""
|
|
385
|
+
Get an event from the queue with a timeout.
|
|
386
|
+
|
|
387
|
+
Returns None on timeout (no event available).
|
|
388
|
+
This allows the multiplexer to check for tool events regularly.
|
|
389
|
+
"""
|
|
390
|
+
try:
|
|
391
|
+
return await asyncio.wait_for(queue.get(), timeout=timeout)
|
|
392
|
+
except asyncio.TimeoutError:
|
|
393
|
+
return None
|
|
@@ -16,11 +16,11 @@ IMPORTANT: Session IDs MUST be UUIDs. Non-UUID session IDs will cause message
|
|
|
16
16
|
kubectl port-forward -n observability svc/otel-collector-collector 4318:4318
|
|
17
17
|
|
|
18
18
|
# Terminal 2: Phoenix UI - view traces at http://localhost:6006
|
|
19
|
-
kubectl port-forward -n
|
|
19
|
+
kubectl port-forward -n rem svc/phoenix 6006:6006
|
|
20
20
|
|
|
21
21
|
2. Get Phoenix API Key (REQUIRED for feedback->Phoenix sync):
|
|
22
22
|
|
|
23
|
-
export PHOENIX_API_KEY=$(kubectl get secret -n
|
|
23
|
+
export PHOENIX_API_KEY=$(kubectl get secret -n rem rem-phoenix-api-key \\
|
|
24
24
|
-o jsonpath='{.data.PHOENIX_API_KEY}' | base64 -d)
|
|
25
25
|
|
|
26
26
|
3. Start API with OTEL and Phoenix enabled:
|
|
@@ -70,7 +70,7 @@ OTEL Architecture
|
|
|
70
70
|
=================
|
|
71
71
|
|
|
72
72
|
REM API --[OTLP/HTTP]--> OTEL Collector --[relay]--> Phoenix
|
|
73
|
-
(port 4318) (k8s: observability) (k8s:
|
|
73
|
+
(port 4318) (k8s: observability) (k8s: rem)
|
|
74
74
|
|
|
75
75
|
Environment Variables:
|
|
76
76
|
OTEL__ENABLED=true Enable OTEL tracing (required for trace capture)
|
|
@@ -164,7 +164,7 @@ from .models import (
|
|
|
164
164
|
ChatCompletionUsage,
|
|
165
165
|
ChatMessage,
|
|
166
166
|
)
|
|
167
|
-
from .streaming import stream_openai_response, stream_openai_response_with_save, stream_simulator_response
|
|
167
|
+
from .streaming import stream_openai_response, stream_openai_response_with_save, stream_simulator_response, save_user_message
|
|
168
168
|
|
|
169
169
|
router = APIRouter(prefix="/api/v1", tags=["chat"])
|
|
170
170
|
|
|
@@ -215,7 +215,7 @@ async def ensure_session_with_metadata(
|
|
|
215
215
|
Merges request metadata with existing session metadata.
|
|
216
216
|
|
|
217
217
|
Args:
|
|
218
|
-
session_id: Session
|
|
218
|
+
session_id: Session UUID from X-Session-Id header
|
|
219
219
|
user_id: User identifier
|
|
220
220
|
tenant_id: Tenant identifier
|
|
221
221
|
is_eval: Whether this is an evaluation session
|
|
@@ -228,12 +228,8 @@ async def ensure_session_with_metadata(
|
|
|
228
228
|
try:
|
|
229
229
|
repo = Repository(Session, table_name="sessions")
|
|
230
230
|
|
|
231
|
-
#
|
|
232
|
-
|
|
233
|
-
filters={"name": session_id, "tenant_id": tenant_id},
|
|
234
|
-
limit=1,
|
|
235
|
-
)
|
|
236
|
-
existing = existing_list[0] if existing_list else None
|
|
231
|
+
# Look up session by UUID (id field)
|
|
232
|
+
existing = await repo.get_by_id(session_id)
|
|
237
233
|
|
|
238
234
|
if existing:
|
|
239
235
|
# Merge metadata if provided
|
|
@@ -254,9 +250,10 @@ async def ensure_session_with_metadata(
|
|
|
254
250
|
await repo.upsert(existing)
|
|
255
251
|
logger.debug(f"Updated session {session_id} (eval={is_eval}, metadata keys={list(merged_metadata.keys())})")
|
|
256
252
|
else:
|
|
257
|
-
# Create new session
|
|
253
|
+
# Create new session with the provided UUID as the id
|
|
258
254
|
session = Session(
|
|
259
|
-
|
|
255
|
+
id=session_id, # Use the provided UUID as session id
|
|
256
|
+
name=session_id, # Default name to UUID, can be updated later with LLM-generated name
|
|
260
257
|
mode=SessionMode.EVALUATION if is_eval else SessionMode.NORMAL,
|
|
261
258
|
user_id=user_id,
|
|
262
259
|
tenant_id=tenant_id,
|
|
@@ -503,16 +500,51 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
503
500
|
logger.error(f"Failed to transcribe audio: {e}")
|
|
504
501
|
# Fall through with original content (will likely fail at agent)
|
|
505
502
|
|
|
506
|
-
# Use ContextBuilder to construct
|
|
507
|
-
#
|
|
508
|
-
# 2. Session history (if session_id provided)
|
|
509
|
-
# 3. New messages from request body (transcribed if audio)
|
|
503
|
+
# Use ContextBuilder to construct context and basic messages
|
|
504
|
+
# Note: We load session history separately for proper pydantic-ai message_history
|
|
510
505
|
context, messages = await ContextBuilder.build_from_headers(
|
|
511
506
|
headers=dict(request.headers),
|
|
512
507
|
new_messages=new_messages,
|
|
513
508
|
user_id=temp_context.user_id, # From JWT token (source of truth)
|
|
514
509
|
)
|
|
515
510
|
|
|
511
|
+
# Load raw session history for proper pydantic-ai message_history format
|
|
512
|
+
# This enables proper tool call/return pairing for LLM API compatibility
|
|
513
|
+
from ....services.session import SessionMessageStore, session_to_pydantic_messages, audit_session_history
|
|
514
|
+
from ....agentic.schema import get_system_prompt
|
|
515
|
+
|
|
516
|
+
pydantic_message_history = None
|
|
517
|
+
if context.session_id and settings.postgres.enabled:
|
|
518
|
+
try:
|
|
519
|
+
store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
|
|
520
|
+
raw_session_history = await store.load_session_messages(
|
|
521
|
+
session_id=context.session_id,
|
|
522
|
+
user_id=context.user_id,
|
|
523
|
+
compress_on_load=False, # Don't compress - we need full data for reconstruction
|
|
524
|
+
)
|
|
525
|
+
if raw_session_history:
|
|
526
|
+
# CRITICAL: Extract and pass the agent's system prompt
|
|
527
|
+
# pydantic-ai only auto-adds system prompts when message_history is empty
|
|
528
|
+
# When we pass message_history, we must include the system prompt ourselves
|
|
529
|
+
agent_system_prompt = get_system_prompt(agent_schema) if agent_schema else None
|
|
530
|
+
pydantic_message_history = session_to_pydantic_messages(
|
|
531
|
+
raw_session_history,
|
|
532
|
+
system_prompt=agent_system_prompt,
|
|
533
|
+
)
|
|
534
|
+
logger.debug(f"Converted {len(raw_session_history)} session messages to {len(pydantic_message_history)} pydantic-ai messages (with system prompt)")
|
|
535
|
+
|
|
536
|
+
# Audit session history if enabled (for debugging)
|
|
537
|
+
audit_session_history(
|
|
538
|
+
session_id=context.session_id,
|
|
539
|
+
agent_name=schema_name or "default",
|
|
540
|
+
prompt=body.messages[-1].content if body.messages else "",
|
|
541
|
+
raw_session_history=raw_session_history,
|
|
542
|
+
pydantic_messages_count=len(pydantic_message_history),
|
|
543
|
+
)
|
|
544
|
+
except Exception as e:
|
|
545
|
+
logger.warning(f"Failed to load session history for message_history: {e}")
|
|
546
|
+
# Fall back to old behavior (concatenated prompt)
|
|
547
|
+
|
|
516
548
|
logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
|
|
517
549
|
|
|
518
550
|
# Ensure session exists with metadata and eval mode if applicable
|
|
@@ -533,33 +565,30 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
533
565
|
model_override=body.model, # type: ignore[arg-type]
|
|
534
566
|
)
|
|
535
567
|
|
|
536
|
-
#
|
|
537
|
-
#
|
|
538
|
-
|
|
568
|
+
# Build the prompt for the agent
|
|
569
|
+
# If we have proper message_history, use just the latest user message as prompt
|
|
570
|
+
# Otherwise, fall back to concatenating all messages (legacy behavior)
|
|
571
|
+
if pydantic_message_history:
|
|
572
|
+
# Use the latest user message as the prompt, with history passed separately
|
|
573
|
+
user_prompt = body.messages[-1].content if body.messages else ""
|
|
574
|
+
prompt = user_prompt
|
|
575
|
+
logger.debug(f"Using message_history with {len(pydantic_message_history)} messages")
|
|
576
|
+
else:
|
|
577
|
+
# Legacy: Combine all messages into single prompt for agent
|
|
578
|
+
prompt = "\n".join(msg.content for msg in messages)
|
|
539
579
|
|
|
540
580
|
# Generate OpenAI-compatible request ID
|
|
541
581
|
request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
|
542
582
|
|
|
543
583
|
# Streaming mode
|
|
544
584
|
if body.stream:
|
|
545
|
-
# Save user message before streaming starts
|
|
546
|
-
if
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
"
|
|
551
|
-
|
|
552
|
-
try:
|
|
553
|
-
store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
|
|
554
|
-
await store.store_session_messages(
|
|
555
|
-
session_id=context.session_id,
|
|
556
|
-
messages=[user_message],
|
|
557
|
-
user_id=context.user_id,
|
|
558
|
-
compress=False, # User messages are typically short
|
|
559
|
-
)
|
|
560
|
-
logger.debug(f"Saved user message to session {context.session_id}")
|
|
561
|
-
except Exception as e:
|
|
562
|
-
logger.error(f"Failed to save user message: {e}", exc_info=True)
|
|
585
|
+
# Save user message before streaming starts (using shared utility)
|
|
586
|
+
if context.session_id:
|
|
587
|
+
await save_user_message(
|
|
588
|
+
session_id=context.session_id,
|
|
589
|
+
user_id=context.user_id,
|
|
590
|
+
content=body.messages[-1].content if body.messages else "",
|
|
591
|
+
)
|
|
563
592
|
|
|
564
593
|
return StreamingResponse(
|
|
565
594
|
stream_openai_response_with_save(
|
|
@@ -570,6 +599,8 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
570
599
|
agent_schema=schema_name,
|
|
571
600
|
session_id=context.session_id,
|
|
572
601
|
user_id=context.user_id,
|
|
602
|
+
agent_context=context, # Pass context for multi-agent support
|
|
603
|
+
message_history=pydantic_message_history, # Native pydantic-ai message history
|
|
573
604
|
),
|
|
574
605
|
media_type="text/event-stream",
|
|
575
606
|
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
|
@@ -592,10 +623,16 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
592
623
|
) as span:
|
|
593
624
|
# Capture trace context from the span we just created
|
|
594
625
|
trace_id, span_id = get_current_trace_context()
|
|
595
|
-
|
|
626
|
+
if pydantic_message_history:
|
|
627
|
+
result = await agent.run(prompt, message_history=pydantic_message_history)
|
|
628
|
+
else:
|
|
629
|
+
result = await agent.run(prompt)
|
|
596
630
|
else:
|
|
597
631
|
# No tracer available, run without tracing
|
|
598
|
-
|
|
632
|
+
if pydantic_message_history:
|
|
633
|
+
result = await agent.run(prompt, message_history=pydantic_message_history)
|
|
634
|
+
else:
|
|
635
|
+
result = await agent.run(prompt)
|
|
599
636
|
|
|
600
637
|
# Determine content format based on response_format request
|
|
601
638
|
if body.response_format and body.response_format.type == "json_object":
|
|
@@ -321,7 +321,11 @@ class MetadataEvent(BaseModel):
|
|
|
321
321
|
# Agent info
|
|
322
322
|
agent_schema: str | None = Field(
|
|
323
323
|
default=None,
|
|
324
|
-
description="Name of the agent schema
|
|
324
|
+
description="Name of the top-level agent schema (e.g., 'rem', 'intake')"
|
|
325
|
+
)
|
|
326
|
+
responding_agent: str | None = Field(
|
|
327
|
+
default=None,
|
|
328
|
+
description="Name of the agent that produced this response (may differ from agent_schema if delegated via ask_agent)"
|
|
325
329
|
)
|
|
326
330
|
|
|
327
331
|
# Session info
|
|
@@ -409,9 +413,9 @@ class ToolCallEvent(BaseModel):
|
|
|
409
413
|
default=None,
|
|
410
414
|
description="Tool arguments (for 'started' status)"
|
|
411
415
|
)
|
|
412
|
-
result: str | None = Field(
|
|
416
|
+
result: str | dict[str, Any] | list[Any] | None = Field(
|
|
413
417
|
default=None,
|
|
414
|
-
description="Tool result
|
|
418
|
+
description="Tool result - full dict/list for structured data, string for simple results"
|
|
415
419
|
)
|
|
416
420
|
error: str | None = Field(
|
|
417
421
|
default=None,
|