remdb 0.3.202__py3-none-any.whl → 0.3.245__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/README.md +36 -2
- rem/agentic/context.py +86 -3
- rem/agentic/context_builder.py +39 -33
- rem/agentic/mcp/tool_wrapper.py +2 -2
- rem/agentic/providers/pydantic_ai.py +68 -51
- rem/agentic/schema.py +2 -2
- rem/api/mcp_router/resources.py +223 -0
- rem/api/mcp_router/tools.py +170 -18
- rem/api/routers/admin.py +30 -4
- rem/api/routers/auth.py +175 -18
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +24 -29
- rem/api/routers/chat/sse_events.py +5 -1
- rem/api/routers/chat/streaming.py +242 -272
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +7 -1
- rem/api/routers/feedback.py +9 -1
- rem/api/routers/messages.py +80 -15
- rem/api/routers/models.py +9 -1
- rem/api/routers/query.py +17 -15
- rem/api/routers/shared_sessions.py +16 -0
- rem/cli/commands/ask.py +205 -114
- rem/cli/commands/process.py +12 -4
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/session.py +117 -0
- rem/cli/main.py +2 -0
- rem/models/entities/session.py +1 -0
- rem/schemas/agents/rem.yaml +1 -1
- rem/services/postgres/repository.py +7 -7
- rem/services/rem/service.py +47 -0
- rem/services/session/__init__.py +2 -1
- rem/services/session/compression.py +14 -12
- rem/services/session/pydantic_messages.py +111 -11
- rem/services/session/reload.py +2 -1
- rem/settings.py +71 -0
- rem/sql/migrations/001_install.sql +4 -4
- rem/sql/migrations/004_cache_system.sql +3 -1
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/schema_loader.py +139 -111
- {remdb-0.3.202.dist-info → remdb-0.3.245.dist-info}/METADATA +2 -2
- {remdb-0.3.202.dist-info → remdb-0.3.245.dist-info}/RECORD +44 -39
- {remdb-0.3.202.dist-info → remdb-0.3.245.dist-info}/WHEEL +0 -0
- {remdb-0.3.202.dist-info → remdb-0.3.245.dist-info}/entry_points.txt +0 -0
|
@@ -1,42 +1,36 @@
|
|
|
1
1
|
"""
|
|
2
2
|
OpenAI-compatible streaming relay for Pydantic AI agents.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
-
|
|
21
|
-
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
event: reasoning\\ndata: {"type": "reasoning", "content": "..."}\\n\\n
|
|
29
|
-
event: tool_call\\ndata: {"type": "tool_call", "tool_name": "...", "status": "started"}\\n\\n
|
|
30
|
-
event: progress\\ndata: {"type": "progress", "step": 1, "total_steps": 3}\\n\\n
|
|
31
|
-
event: metadata\\ndata: {"type": "metadata", "confidence": 0.95}\\n\\n
|
|
32
|
-
|
|
33
|
-
See sse_events.py for the full event type definitions.
|
|
4
|
+
Architecture:
|
|
5
|
+
```
|
|
6
|
+
User Request → stream_openai_response → agent.iter() → SSE Events → Client
|
|
7
|
+
│
|
|
8
|
+
├── Parent agent events (text, tool calls)
|
|
9
|
+
│
|
|
10
|
+
└── Child agent events (via ask_agent tool)
|
|
11
|
+
│
|
|
12
|
+
▼
|
|
13
|
+
Event Sink (asyncio.Queue)
|
|
14
|
+
│
|
|
15
|
+
▼
|
|
16
|
+
drain_child_events() → SSE + DB
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Modules:
|
|
20
|
+
- streaming.py: Main workflow orchestrator (this file)
|
|
21
|
+
- streaming_utils.py: Pure utility functions, StreamingState dataclass
|
|
22
|
+
- child_streaming.py: Child agent event handling
|
|
23
|
+
|
|
24
|
+
Key Design Decision (DUPLICATION FIX):
|
|
25
|
+
When child_content is streamed, state.child_content_streamed is set True.
|
|
26
|
+
Parent TextPartDelta events are SKIPPED when this flag is True,
|
|
27
|
+
preventing content from being emitted twice.
|
|
34
28
|
"""
|
|
35
29
|
|
|
36
30
|
from __future__ import annotations
|
|
37
31
|
|
|
32
|
+
import asyncio
|
|
38
33
|
import json
|
|
39
|
-
import time
|
|
40
34
|
import uuid
|
|
41
35
|
from typing import TYPE_CHECKING, AsyncGenerator
|
|
42
36
|
|
|
@@ -54,7 +48,17 @@ from pydantic_ai.messages import (
|
|
|
54
48
|
ToolCallPart,
|
|
55
49
|
)
|
|
56
50
|
|
|
57
|
-
from .
|
|
51
|
+
from .child_streaming import drain_child_events, stream_with_child_events, process_child_event
|
|
52
|
+
from .streaming_utils import (
|
|
53
|
+
StreamingState,
|
|
54
|
+
build_content_chunk,
|
|
55
|
+
build_progress_event,
|
|
56
|
+
build_tool_start_event,
|
|
57
|
+
extract_metadata_from_result,
|
|
58
|
+
extract_tool_args,
|
|
59
|
+
log_tool_call,
|
|
60
|
+
)
|
|
61
|
+
from .otel_utils import get_current_trace_context
|
|
58
62
|
from .models import (
|
|
59
63
|
ChatCompletionMessageDelta,
|
|
60
64
|
ChatCompletionStreamChoice,
|
|
@@ -69,6 +73,9 @@ from .sse_events import (
|
|
|
69
73
|
ToolCallEvent,
|
|
70
74
|
format_sse_event,
|
|
71
75
|
)
|
|
76
|
+
from ....services.session import SessionMessageStore
|
|
77
|
+
from ....settings import settings
|
|
78
|
+
from ....utils.date_utils import to_iso, utc_now
|
|
72
79
|
|
|
73
80
|
if TYPE_CHECKING:
|
|
74
81
|
from ....agentic.context import AgentContext
|
|
@@ -147,48 +154,34 @@ async def stream_openai_response(
|
|
|
147
154
|
event: done
|
|
148
155
|
data: {"type": "done", "reason": "stop"}
|
|
149
156
|
"""
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
is_first_chunk = True
|
|
156
|
-
reasoning_step = 0
|
|
157
|
-
current_step = 0
|
|
158
|
-
total_steps = 3 # Model request, tool execution (optional), final response
|
|
159
|
-
token_count = 0
|
|
160
|
-
|
|
161
|
-
# Track active tool calls for completion events
|
|
162
|
-
# Maps index -> (tool_name, tool_id) for correlating start/end events
|
|
163
|
-
active_tool_calls: dict[int, tuple[str, str]] = {}
|
|
164
|
-
# Queue of tool calls awaiting completion (FIFO for matching)
|
|
165
|
-
pending_tool_completions: list[tuple[str, str]] = []
|
|
166
|
-
# Track if metadata was registered via register_metadata tool
|
|
167
|
-
metadata_registered = False
|
|
168
|
-
# Track pending tool calls with full data for persistence
|
|
169
|
-
# Maps tool_id -> {"tool_name": str, "tool_id": str, "arguments": dict}
|
|
170
|
-
pending_tool_data: dict[str, dict] = {}
|
|
157
|
+
# Initialize streaming state
|
|
158
|
+
state = StreamingState.create(model=model, request_id=request_id)
|
|
159
|
+
|
|
160
|
+
# Get effective user_id for database operations
|
|
161
|
+
effective_user_id = agent_context.user_id if agent_context else None
|
|
171
162
|
|
|
172
163
|
# Import context functions for multi-agent support
|
|
173
|
-
from ....agentic.context import set_current_context
|
|
164
|
+
from ....agentic.context import set_current_context, set_event_sink
|
|
174
165
|
|
|
175
166
|
# Set up context for multi-agent propagation
|
|
176
|
-
# This allows child agents (via ask_agent tool) to access parent context
|
|
177
167
|
previous_context = None
|
|
178
168
|
if agent_context is not None:
|
|
179
169
|
from ....agentic.context import get_current_context
|
|
180
170
|
previous_context = get_current_context()
|
|
181
171
|
set_current_context(agent_context)
|
|
182
172
|
|
|
173
|
+
# Set up event sink for child agent event proxying
|
|
174
|
+
child_event_sink: asyncio.Queue = asyncio.Queue()
|
|
175
|
+
set_event_sink(child_event_sink)
|
|
176
|
+
|
|
183
177
|
try:
|
|
184
178
|
# Emit initial progress event
|
|
185
|
-
current_step = 1
|
|
186
|
-
yield
|
|
187
|
-
step=current_step,
|
|
188
|
-
total_steps=total_steps,
|
|
179
|
+
state.current_step = 1
|
|
180
|
+
yield build_progress_event(
|
|
181
|
+
step=state.current_step,
|
|
182
|
+
total_steps=state.total_steps,
|
|
189
183
|
label="Processing request",
|
|
190
|
-
|
|
191
|
-
))
|
|
184
|
+
)
|
|
192
185
|
|
|
193
186
|
# Use agent.iter() to get complete execution with tool calls
|
|
194
187
|
# Pass message_history if available for proper tool call/return pairing
|
|
@@ -214,11 +207,11 @@ async def stream_openai_response(
|
|
|
214
207
|
if isinstance(event, PartStartEvent) and isinstance(
|
|
215
208
|
event.part, ThinkingPart
|
|
216
209
|
):
|
|
217
|
-
reasoning_step += 1
|
|
210
|
+
state.reasoning_step += 1
|
|
218
211
|
if event.part.content:
|
|
219
212
|
yield format_sse_event(ReasoningEvent(
|
|
220
213
|
content=event.part.content,
|
|
221
|
-
step=reasoning_step
|
|
214
|
+
step=state.reasoning_step
|
|
222
215
|
))
|
|
223
216
|
|
|
224
217
|
# Reasoning delta (streaming thinking)
|
|
@@ -228,7 +221,7 @@ async def stream_openai_response(
|
|
|
228
221
|
if event.delta.content_delta:
|
|
229
222
|
yield format_sse_event(ReasoningEvent(
|
|
230
223
|
content=event.delta.content_delta,
|
|
231
|
-
step=reasoning_step
|
|
224
|
+
step=state.reasoning_step
|
|
232
225
|
))
|
|
233
226
|
|
|
234
227
|
# ============================================
|
|
@@ -237,28 +230,11 @@ async def stream_openai_response(
|
|
|
237
230
|
elif isinstance(event, PartStartEvent) and isinstance(
|
|
238
231
|
event.part, TextPart
|
|
239
232
|
):
|
|
240
|
-
#
|
|
233
|
+
# Skip if child already streamed content
|
|
234
|
+
if state.child_content_streamed:
|
|
235
|
+
continue
|
|
241
236
|
if event.part.content:
|
|
242
|
-
|
|
243
|
-
token_count += len(content.split())
|
|
244
|
-
|
|
245
|
-
content_chunk = ChatCompletionStreamResponse(
|
|
246
|
-
id=request_id,
|
|
247
|
-
created=created_at,
|
|
248
|
-
model=model,
|
|
249
|
-
choices=[
|
|
250
|
-
ChatCompletionStreamChoice(
|
|
251
|
-
index=0,
|
|
252
|
-
delta=ChatCompletionMessageDelta(
|
|
253
|
-
role="assistant" if is_first_chunk else None,
|
|
254
|
-
content=content,
|
|
255
|
-
),
|
|
256
|
-
finish_reason=None,
|
|
257
|
-
)
|
|
258
|
-
],
|
|
259
|
-
)
|
|
260
|
-
is_first_chunk = False
|
|
261
|
-
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
237
|
+
yield build_content_chunk(state, event.part.content)
|
|
262
238
|
|
|
263
239
|
# ============================================
|
|
264
240
|
# TOOL CALL START EVENTS
|
|
@@ -268,94 +244,39 @@ async def stream_openai_response(
|
|
|
268
244
|
):
|
|
269
245
|
tool_name = event.part.tool_name
|
|
270
246
|
|
|
271
|
-
# Handle final_result
|
|
272
|
-
# internal tool for structured output
|
|
247
|
+
# Handle final_result (Pydantic AI's internal tool)
|
|
273
248
|
if tool_name == "final_result":
|
|
274
|
-
|
|
275
|
-
args_dict = None
|
|
276
|
-
if event.part.args is not None:
|
|
277
|
-
if hasattr(event.part.args, 'args_dict'):
|
|
278
|
-
args_dict = event.part.args.args_dict
|
|
279
|
-
elif isinstance(event.part.args, dict):
|
|
280
|
-
args_dict = event.part.args
|
|
281
|
-
|
|
249
|
+
args_dict = extract_tool_args(event.part)
|
|
282
250
|
if args_dict:
|
|
283
|
-
# Emit the structured result as JSON content
|
|
284
251
|
result_json = json.dumps(args_dict, indent=2)
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
created=created_at,
|
|
288
|
-
model=model,
|
|
289
|
-
choices=[
|
|
290
|
-
ChatCompletionStreamChoice(
|
|
291
|
-
index=0,
|
|
292
|
-
delta=ChatCompletionMessageDelta(
|
|
293
|
-
role="assistant" if is_first_chunk else None,
|
|
294
|
-
content=result_json,
|
|
295
|
-
),
|
|
296
|
-
finish_reason=None,
|
|
297
|
-
)
|
|
298
|
-
],
|
|
299
|
-
)
|
|
300
|
-
is_first_chunk = False
|
|
301
|
-
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
302
|
-
continue # Skip regular tool call handling
|
|
252
|
+
yield build_content_chunk(state, result_json)
|
|
253
|
+
continue
|
|
303
254
|
|
|
304
255
|
tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
305
|
-
active_tool_calls[event.index] = (tool_name, tool_id)
|
|
306
|
-
|
|
307
|
-
pending_tool_completions.append((tool_name, tool_id))
|
|
308
|
-
|
|
309
|
-
# Emit tool_call SSE event (started)
|
|
310
|
-
# Try to get arguments as dict
|
|
311
|
-
args_dict = None
|
|
312
|
-
if event.part.args is not None:
|
|
313
|
-
if hasattr(event.part.args, 'args_dict'):
|
|
314
|
-
args_dict = event.part.args.args_dict
|
|
315
|
-
elif isinstance(event.part.args, dict):
|
|
316
|
-
args_dict = event.part.args
|
|
317
|
-
elif isinstance(event.part.args, str):
|
|
318
|
-
# Parse JSON string args (common with pydantic-ai)
|
|
319
|
-
try:
|
|
320
|
-
args_dict = json.loads(event.part.args)
|
|
321
|
-
except json.JSONDecodeError:
|
|
322
|
-
logger.warning(f"Failed to parse tool args as JSON: {event.part.args[:100]}")
|
|
323
|
-
|
|
324
|
-
# Log tool call with key parameters
|
|
325
|
-
if args_dict and tool_name == "search_rem":
|
|
326
|
-
query_type = args_dict.get("query_type", "?")
|
|
327
|
-
limit = args_dict.get("limit", 20)
|
|
328
|
-
table = args_dict.get("table", "")
|
|
329
|
-
query_text = args_dict.get("query_text", args_dict.get("entity_key", ""))
|
|
330
|
-
if query_text and len(query_text) > 50:
|
|
331
|
-
query_text = query_text[:50] + "..."
|
|
332
|
-
logger.info(f"🔧 {tool_name} {query_type.upper()} '{query_text}' table={table} limit={limit}")
|
|
333
|
-
else:
|
|
334
|
-
logger.info(f"🔧 {tool_name}")
|
|
256
|
+
state.active_tool_calls[event.index] = (tool_name, tool_id)
|
|
257
|
+
state.pending_tool_completions.append((tool_name, tool_id))
|
|
335
258
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
status="started",
|
|
340
|
-
arguments=args_dict
|
|
341
|
-
))
|
|
259
|
+
# Extract and log arguments
|
|
260
|
+
args_dict = extract_tool_args(event.part)
|
|
261
|
+
log_tool_call(tool_name, args_dict)
|
|
342
262
|
|
|
343
|
-
|
|
344
|
-
|
|
263
|
+
yield build_tool_start_event(tool_name, tool_id, args_dict)
|
|
264
|
+
|
|
265
|
+
# Track for persistence
|
|
266
|
+
state.pending_tool_data[tool_id] = {
|
|
345
267
|
"tool_name": tool_name,
|
|
346
268
|
"tool_id": tool_id,
|
|
347
269
|
"arguments": args_dict,
|
|
348
270
|
}
|
|
349
271
|
|
|
350
272
|
# Update progress
|
|
351
|
-
current_step = 2
|
|
352
|
-
total_steps = 4
|
|
353
|
-
yield
|
|
354
|
-
step=current_step,
|
|
355
|
-
total_steps=total_steps,
|
|
273
|
+
state.current_step = 2
|
|
274
|
+
state.total_steps = 4
|
|
275
|
+
yield build_progress_event(
|
|
276
|
+
step=state.current_step,
|
|
277
|
+
total_steps=state.total_steps,
|
|
356
278
|
label=f"Calling {tool_name}",
|
|
357
|
-
|
|
358
|
-
))
|
|
279
|
+
)
|
|
359
280
|
|
|
360
281
|
# ============================================
|
|
361
282
|
# TOOL CALL COMPLETION (PartEndEvent)
|
|
@@ -363,28 +284,14 @@ async def stream_openai_response(
|
|
|
363
284
|
elif isinstance(event, PartEndEvent) and isinstance(
|
|
364
285
|
event.part, ToolCallPart
|
|
365
286
|
):
|
|
366
|
-
if event.index in active_tool_calls:
|
|
367
|
-
tool_name, tool_id = active_tool_calls[event.index]
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
args_dict = event.part.args.args_dict
|
|
375
|
-
elif isinstance(event.part.args, dict):
|
|
376
|
-
args_dict = event.part.args
|
|
377
|
-
elif isinstance(event.part.args, str) and event.part.args:
|
|
378
|
-
try:
|
|
379
|
-
args_dict = json.loads(event.part.args)
|
|
380
|
-
except json.JSONDecodeError:
|
|
381
|
-
logger.warning(f"Failed to parse tool args: {event.part.args[:100]}")
|
|
382
|
-
|
|
383
|
-
# Update pending_tool_data with complete args
|
|
384
|
-
if tool_id in pending_tool_data:
|
|
385
|
-
pending_tool_data[tool_id]["arguments"] = args_dict
|
|
386
|
-
|
|
387
|
-
del active_tool_calls[event.index]
|
|
287
|
+
if event.index in state.active_tool_calls:
|
|
288
|
+
tool_name, tool_id = state.active_tool_calls[event.index]
|
|
289
|
+
args_dict = extract_tool_args(event.part)
|
|
290
|
+
|
|
291
|
+
if tool_id in state.pending_tool_data:
|
|
292
|
+
state.pending_tool_data[tool_id]["arguments"] = args_dict
|
|
293
|
+
|
|
294
|
+
del state.active_tool_calls[event.index]
|
|
388
295
|
|
|
389
296
|
# ============================================
|
|
390
297
|
# TEXT CONTENT DELTA
|
|
@@ -392,116 +299,117 @@ async def stream_openai_response(
|
|
|
392
299
|
elif isinstance(event, PartDeltaEvent) and isinstance(
|
|
393
300
|
event.delta, TextPartDelta
|
|
394
301
|
):
|
|
302
|
+
# DUPLICATION FIX: Skip parent text if child already streamed content
|
|
303
|
+
# Child agents stream via child_content events in ask_agent tool.
|
|
304
|
+
# If parent tries to echo that content, skip it.
|
|
305
|
+
if state.child_content_streamed:
|
|
306
|
+
logger.debug("Skipping parent TextPartDelta - child content already streamed")
|
|
307
|
+
continue
|
|
308
|
+
|
|
395
309
|
content = event.delta.content_delta
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
content_chunk = ChatCompletionStreamResponse(
|
|
399
|
-
id=request_id,
|
|
400
|
-
created=created_at,
|
|
401
|
-
model=model,
|
|
402
|
-
choices=[
|
|
403
|
-
ChatCompletionStreamChoice(
|
|
404
|
-
index=0,
|
|
405
|
-
delta=ChatCompletionMessageDelta(
|
|
406
|
-
role="assistant" if is_first_chunk else None,
|
|
407
|
-
content=content,
|
|
408
|
-
),
|
|
409
|
-
finish_reason=None,
|
|
410
|
-
)
|
|
411
|
-
],
|
|
412
|
-
)
|
|
413
|
-
is_first_chunk = False
|
|
414
|
-
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
310
|
+
yield build_content_chunk(state, content)
|
|
415
311
|
|
|
416
312
|
# ============================================
|
|
417
313
|
# TOOL EXECUTION NODE
|
|
418
314
|
# ============================================
|
|
419
315
|
elif Agent.is_call_tools_node(node):
|
|
420
316
|
async with node.stream(agent_run.ctx) as tools_stream:
|
|
421
|
-
|
|
317
|
+
# Use concurrent multiplexer to handle both tool events
|
|
318
|
+
# and child agent events as they arrive (fixes streaming lag)
|
|
319
|
+
async for event_type, event_data in stream_with_child_events(
|
|
320
|
+
tools_stream=tools_stream,
|
|
321
|
+
child_event_sink=child_event_sink,
|
|
322
|
+
state=state,
|
|
323
|
+
session_id=session_id,
|
|
324
|
+
user_id=effective_user_id,
|
|
325
|
+
message_id=message_id,
|
|
326
|
+
agent_schema=agent_schema,
|
|
327
|
+
):
|
|
328
|
+
# Handle child events (streamed from ask_agent)
|
|
329
|
+
if event_type == "child":
|
|
330
|
+
async for chunk in process_child_event(
|
|
331
|
+
child_event=event_data,
|
|
332
|
+
state=state,
|
|
333
|
+
session_id=session_id,
|
|
334
|
+
user_id=effective_user_id,
|
|
335
|
+
message_id=message_id,
|
|
336
|
+
agent_schema=agent_schema,
|
|
337
|
+
):
|
|
338
|
+
yield chunk
|
|
339
|
+
continue
|
|
340
|
+
|
|
341
|
+
# Handle tool events
|
|
342
|
+
tool_event = event_data
|
|
343
|
+
|
|
422
344
|
# Tool result event - emit completion
|
|
423
345
|
if isinstance(tool_event, FunctionToolResultEvent):
|
|
424
346
|
# Get the tool name/id from the pending queue (FIFO)
|
|
425
|
-
if pending_tool_completions:
|
|
426
|
-
tool_name, tool_id = pending_tool_completions.pop(0)
|
|
347
|
+
if state.pending_tool_completions:
|
|
348
|
+
tool_name, tool_id = state.pending_tool_completions.pop(0)
|
|
427
349
|
else:
|
|
428
|
-
# Fallback if queue is empty (shouldn't happen)
|
|
429
350
|
tool_name = "tool"
|
|
430
351
|
tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
431
352
|
|
|
432
|
-
# Check if this is a register_metadata tool result
|
|
433
|
-
# It returns a dict with _metadata_event: True marker
|
|
434
353
|
result_content = tool_event.result.content if hasattr(tool_event.result, 'content') else tool_event.result
|
|
435
354
|
is_metadata_event = False
|
|
436
355
|
|
|
437
|
-
|
|
356
|
+
# Handle register_metadata tool results
|
|
357
|
+
metadata = extract_metadata_from_result(result_content)
|
|
358
|
+
if metadata:
|
|
438
359
|
is_metadata_event = True
|
|
439
|
-
metadata_registered = True
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
registered_flags = result_content.get("flags")
|
|
445
|
-
# Session naming
|
|
446
|
-
registered_session_name = result_content.get("session_name")
|
|
447
|
-
# Risk assessment fields
|
|
448
|
-
registered_risk_level = result_content.get("risk_level")
|
|
449
|
-
registered_risk_score = result_content.get("risk_score")
|
|
450
|
-
registered_risk_reasoning = result_content.get("risk_reasoning")
|
|
451
|
-
registered_recommended_action = result_content.get("recommended_action")
|
|
452
|
-
# Extra fields
|
|
453
|
-
registered_extra = result_content.get("extra")
|
|
360
|
+
state.metadata_registered = True
|
|
361
|
+
|
|
362
|
+
# Only set responding_agent if not already set by child
|
|
363
|
+
if not state.responding_agent and metadata.get("agent_schema"):
|
|
364
|
+
state.responding_agent = metadata["agent_schema"]
|
|
454
365
|
|
|
455
366
|
logger.info(
|
|
456
|
-
f"📊 Metadata
|
|
457
|
-
f"
|
|
458
|
-
f"risk_level={registered_risk_level}, sources={registered_sources}"
|
|
367
|
+
f"📊 Metadata: confidence={metadata.get('confidence')}, "
|
|
368
|
+
f"risk_level={metadata.get('risk_level')}"
|
|
459
369
|
)
|
|
460
370
|
|
|
461
|
-
# Build extra dict with risk fields
|
|
371
|
+
# Build extra dict with risk fields
|
|
462
372
|
extra_data = {}
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
if registered_recommended_action is not None:
|
|
470
|
-
extra_data["recommended_action"] = registered_recommended_action
|
|
471
|
-
if registered_extra:
|
|
472
|
-
extra_data.update(registered_extra)
|
|
473
|
-
|
|
474
|
-
# Emit metadata event immediately
|
|
373
|
+
for field in ["risk_level", "risk_score", "risk_reasoning", "recommended_action"]:
|
|
374
|
+
if metadata.get(field) is not None:
|
|
375
|
+
extra_data[field] = metadata[field]
|
|
376
|
+
if metadata.get("extra"):
|
|
377
|
+
extra_data.update(metadata["extra"])
|
|
378
|
+
|
|
475
379
|
yield format_sse_event(MetadataEvent(
|
|
476
380
|
message_id=message_id,
|
|
477
381
|
in_reply_to=in_reply_to,
|
|
478
382
|
session_id=session_id,
|
|
479
383
|
agent_schema=agent_schema,
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
384
|
+
responding_agent=state.responding_agent,
|
|
385
|
+
session_name=metadata.get("session_name"),
|
|
386
|
+
confidence=metadata.get("confidence"),
|
|
387
|
+
sources=metadata.get("sources"),
|
|
483
388
|
model_version=model,
|
|
484
|
-
flags=
|
|
389
|
+
flags=metadata.get("flags"),
|
|
485
390
|
extra=extra_data if extra_data else None,
|
|
486
391
|
hidden=False,
|
|
487
392
|
))
|
|
488
393
|
|
|
489
|
-
# Get complete args from pending_tool_data
|
|
490
|
-
# (captured at PartEndEvent with full args)
|
|
394
|
+
# Get complete args from pending_tool_data
|
|
491
395
|
completed_args = None
|
|
492
|
-
if tool_id in pending_tool_data:
|
|
493
|
-
completed_args = pending_tool_data[tool_id].get("arguments")
|
|
396
|
+
if tool_id in state.pending_tool_data:
|
|
397
|
+
completed_args = state.pending_tool_data[tool_id].get("arguments")
|
|
494
398
|
|
|
495
|
-
# Capture tool call
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
tool_data = pending_tool_data[tool_id]
|
|
399
|
+
# Capture tool call for persistence
|
|
400
|
+
if tool_calls_out is not None and tool_id in state.pending_tool_data:
|
|
401
|
+
tool_data = state.pending_tool_data[tool_id]
|
|
499
402
|
tool_data["result"] = result_content
|
|
500
403
|
tool_data["is_metadata"] = is_metadata_event
|
|
501
404
|
tool_calls_out.append(tool_data)
|
|
502
|
-
del pending_tool_data[tool_id]
|
|
405
|
+
del state.pending_tool_data[tool_id]
|
|
503
406
|
|
|
504
407
|
if not is_metadata_event:
|
|
408
|
+
# NOTE: text_response fallback is DISABLED
|
|
409
|
+
# Child agents now stream content via child_content events (above)
|
|
410
|
+
# which provides real-time streaming. The text_response in tool
|
|
411
|
+
# result would duplicate that content, so we skip it entirely.
|
|
412
|
+
|
|
505
413
|
# Normal tool completion - emit ToolCallEvent
|
|
506
414
|
# For finalize_intake, send full result dict for frontend
|
|
507
415
|
if tool_name == "finalize_intake" and isinstance(result_content, dict):
|
|
@@ -544,10 +452,10 @@ async def stream_openai_response(
|
|
|
544
452
|
))
|
|
545
453
|
|
|
546
454
|
# Update progress after tool completion
|
|
547
|
-
current_step = 3
|
|
455
|
+
state.current_step = 3
|
|
548
456
|
yield format_sse_event(ProgressEvent(
|
|
549
|
-
step=current_step,
|
|
550
|
-
total_steps=total_steps,
|
|
457
|
+
step=state.current_step,
|
|
458
|
+
total_steps=state.total_steps,
|
|
551
459
|
label="Generating response",
|
|
552
460
|
status="in_progress"
|
|
553
461
|
))
|
|
@@ -576,36 +484,36 @@ async def stream_openai_response(
|
|
|
576
484
|
result_dict = {"result": str(output)}
|
|
577
485
|
|
|
578
486
|
result_json = json.dumps(result_dict, indent=2, default=str)
|
|
579
|
-
token_count += len(result_json.split())
|
|
487
|
+
state.token_count += len(result_json.split())
|
|
580
488
|
|
|
581
489
|
# Emit structured result as content
|
|
582
490
|
result_chunk = ChatCompletionStreamResponse(
|
|
583
|
-
id=request_id,
|
|
584
|
-
created=created_at,
|
|
491
|
+
id=state.request_id,
|
|
492
|
+
created=state.created_at,
|
|
585
493
|
model=model,
|
|
586
494
|
choices=[
|
|
587
495
|
ChatCompletionStreamChoice(
|
|
588
496
|
index=0,
|
|
589
497
|
delta=ChatCompletionMessageDelta(
|
|
590
|
-
role="assistant" if is_first_chunk else None,
|
|
498
|
+
role="assistant" if state.is_first_chunk else None,
|
|
591
499
|
content=result_json,
|
|
592
500
|
),
|
|
593
501
|
finish_reason=None,
|
|
594
502
|
)
|
|
595
503
|
],
|
|
596
504
|
)
|
|
597
|
-
is_first_chunk = False
|
|
505
|
+
state.is_first_chunk = False
|
|
598
506
|
yield f"data: {result_chunk.model_dump_json()}\n\n"
|
|
599
507
|
except Exception as e:
|
|
600
508
|
logger.debug(f"No structured result available: {e}")
|
|
601
509
|
|
|
602
510
|
# Calculate latency
|
|
603
|
-
latency_ms =
|
|
511
|
+
latency_ms = state.latency_ms()
|
|
604
512
|
|
|
605
513
|
# Final OpenAI chunk with finish_reason
|
|
606
514
|
final_chunk = ChatCompletionStreamResponse(
|
|
607
|
-
id=request_id,
|
|
608
|
-
created=created_at,
|
|
515
|
+
id=state.request_id,
|
|
516
|
+
created=state.created_at,
|
|
609
517
|
model=model,
|
|
610
518
|
choices=[
|
|
611
519
|
ChatCompletionStreamChoice(
|
|
@@ -618,27 +526,28 @@ async def stream_openai_response(
|
|
|
618
526
|
yield f"data: {final_chunk.model_dump_json()}\n\n"
|
|
619
527
|
|
|
620
528
|
# Emit metadata event only if not already registered via register_metadata tool
|
|
621
|
-
if not metadata_registered:
|
|
529
|
+
if not state.metadata_registered:
|
|
622
530
|
yield format_sse_event(MetadataEvent(
|
|
623
531
|
message_id=message_id,
|
|
624
532
|
in_reply_to=in_reply_to,
|
|
625
533
|
session_id=session_id,
|
|
626
534
|
agent_schema=agent_schema,
|
|
535
|
+
responding_agent=state.responding_agent,
|
|
627
536
|
confidence=1.0, # Default to 100% confidence
|
|
628
537
|
model_version=model,
|
|
629
538
|
latency_ms=latency_ms,
|
|
630
|
-
token_count=token_count,
|
|
539
|
+
token_count=state.token_count,
|
|
631
540
|
# Include deterministic trace context captured from OTEL
|
|
632
541
|
trace_id=captured_trace_id,
|
|
633
542
|
span_id=captured_span_id,
|
|
634
543
|
))
|
|
635
544
|
|
|
636
545
|
# Mark all progress complete
|
|
637
|
-
for step in range(1, total_steps + 1):
|
|
546
|
+
for step in range(1, state.total_steps + 1):
|
|
638
547
|
yield format_sse_event(ProgressEvent(
|
|
639
548
|
step=step,
|
|
640
|
-
total_steps=total_steps,
|
|
641
|
-
label="Complete" if step == total_steps else f"Step {step}",
|
|
549
|
+
total_steps=state.total_steps,
|
|
550
|
+
label="Complete" if step == state.total_steps else f"Step {step}",
|
|
642
551
|
status="completed"
|
|
643
552
|
))
|
|
644
553
|
|
|
@@ -716,6 +625,8 @@ async def stream_openai_response(
|
|
|
716
625
|
yield "data: [DONE]\n\n"
|
|
717
626
|
|
|
718
627
|
finally:
|
|
628
|
+
# Clean up event sink for multi-agent streaming
|
|
629
|
+
set_event_sink(None)
|
|
719
630
|
# Restore previous context for multi-agent support
|
|
720
631
|
# This ensures nested agent calls don't pollute the parent's context
|
|
721
632
|
if agent_context is not None:
|
|
@@ -823,6 +734,37 @@ async def stream_minimal_simulator(
|
|
|
823
734
|
yield sse_string
|
|
824
735
|
|
|
825
736
|
|
|
737
|
+
async def save_user_message(
|
|
738
|
+
session_id: str,
|
|
739
|
+
user_id: str | None,
|
|
740
|
+
content: str,
|
|
741
|
+
) -> None:
|
|
742
|
+
"""
|
|
743
|
+
Save user message to database before streaming.
|
|
744
|
+
|
|
745
|
+
Shared utility used by both API and CLI for consistent user message storage.
|
|
746
|
+
"""
|
|
747
|
+
if not settings.postgres.enabled or not session_id:
|
|
748
|
+
return
|
|
749
|
+
|
|
750
|
+
user_msg = {
|
|
751
|
+
"role": "user",
|
|
752
|
+
"content": content,
|
|
753
|
+
"timestamp": to_iso(utc_now()),
|
|
754
|
+
}
|
|
755
|
+
try:
|
|
756
|
+
store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
|
|
757
|
+
await store.store_session_messages(
|
|
758
|
+
session_id=session_id,
|
|
759
|
+
messages=[user_msg],
|
|
760
|
+
user_id=user_id,
|
|
761
|
+
compress=False,
|
|
762
|
+
)
|
|
763
|
+
logger.debug(f"Saved user message to session {session_id}")
|
|
764
|
+
except Exception as e:
|
|
765
|
+
logger.error(f"Failed to save user message: {e}", exc_info=True)
|
|
766
|
+
|
|
767
|
+
|
|
826
768
|
async def stream_openai_response_with_save(
|
|
827
769
|
agent: Agent,
|
|
828
770
|
prompt: str,
|
|
@@ -842,6 +784,9 @@ async def stream_openai_response_with_save(
|
|
|
842
784
|
This accumulates all text content during streaming and saves it to the database
|
|
843
785
|
after the stream completes.
|
|
844
786
|
|
|
787
|
+
NOTE: Call save_user_message() BEFORE this function to save the user's message.
|
|
788
|
+
This function only saves tool calls and assistant responses.
|
|
789
|
+
|
|
845
790
|
Args:
|
|
846
791
|
agent: Pydantic AI agent instance
|
|
847
792
|
prompt: User prompt
|
|
@@ -855,10 +800,6 @@ async def stream_openai_response_with_save(
|
|
|
855
800
|
Yields:
|
|
856
801
|
SSE-formatted strings
|
|
857
802
|
"""
|
|
858
|
-
from ....utils.date_utils import utc_now, to_iso
|
|
859
|
-
from ....services.session import SessionMessageStore
|
|
860
|
-
from ....settings import settings
|
|
861
|
-
|
|
862
803
|
# Pre-generate message_id so it can be sent in metadata event
|
|
863
804
|
# This allows frontend to use it for feedback before DB persistence
|
|
864
805
|
message_id = str(uuid.uuid4())
|
|
@@ -899,6 +840,9 @@ async def stream_openai_response_with_save(
|
|
|
899
840
|
delta = data["choices"][0].get("delta", {})
|
|
900
841
|
content = delta.get("content")
|
|
901
842
|
if content:
|
|
843
|
+
# DEBUG: Check for [Calling markers in content
|
|
844
|
+
if "[Calling" in content:
|
|
845
|
+
logger.warning(f"DEBUG: Found [Calling in content chunk: {repr(content[:100])}")
|
|
902
846
|
accumulated_content.append(content)
|
|
903
847
|
except (json.JSONDecodeError, KeyError, IndexError):
|
|
904
848
|
pass # Skip non-JSON or malformed chunks
|
|
@@ -931,8 +875,34 @@ async def stream_openai_response_with_save(
|
|
|
931
875
|
messages_to_store.append(tool_message)
|
|
932
876
|
|
|
933
877
|
# Then store assistant text response (if any)
|
|
878
|
+
# Priority: direct TextPartDelta content > tool call text_response
|
|
879
|
+
# When an agent delegates via ask_agent, the child's text_response becomes
|
|
880
|
+
# the parent's assistant response (the parent is just orchestrating)
|
|
881
|
+
full_content = None
|
|
882
|
+
|
|
934
883
|
if accumulated_content:
|
|
935
884
|
full_content = "".join(accumulated_content)
|
|
885
|
+
logger.warning(f"DEBUG: Using accumulated_content ({len(accumulated_content)} chunks, {len(full_content)} chars)")
|
|
886
|
+
logger.warning(f"DEBUG: First 200 chars: {repr(full_content[:200])}")
|
|
887
|
+
else:
|
|
888
|
+
logger.warning("DEBUG: accumulated_content is empty, checking text_response fallback")
|
|
889
|
+
# No direct text from TextPartDelta - check tool results for text_response
|
|
890
|
+
# This handles multi-agent delegation where child agent output is the response
|
|
891
|
+
for tool_call in tool_calls:
|
|
892
|
+
if not tool_call:
|
|
893
|
+
continue
|
|
894
|
+
result = tool_call.get("result")
|
|
895
|
+
if isinstance(result, dict) and result.get("text_response"):
|
|
896
|
+
text_response = result["text_response"]
|
|
897
|
+
if text_response and str(text_response).strip():
|
|
898
|
+
full_content = str(text_response)
|
|
899
|
+
logger.debug(
|
|
900
|
+
f"Using text_response from {tool_call.get('tool_name', 'tool')} "
|
|
901
|
+
f"({len(full_content)} chars) as assistant message"
|
|
902
|
+
)
|
|
903
|
+
break
|
|
904
|
+
|
|
905
|
+
if full_content:
|
|
936
906
|
assistant_message = {
|
|
937
907
|
"id": message_id, # Use pre-generated ID for consistency with metadata event
|
|
938
908
|
"role": "assistant",
|
|
@@ -954,7 +924,7 @@ async def stream_openai_response_with_save(
|
|
|
954
924
|
)
|
|
955
925
|
logger.debug(
|
|
956
926
|
f"Saved {len(tool_calls)} tool calls and "
|
|
957
|
-
f"{'assistant response' if
|
|
927
|
+
f"{'assistant response' if full_content else 'no text'} "
|
|
958
928
|
f"to session {session_id}"
|
|
959
929
|
)
|
|
960
930
|
except Exception as e:
|