remdb 0.3.226__py3-none-any.whl → 0.3.245__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/README.md +22 -248
- rem/agentic/context.py +13 -2
- rem/agentic/context_builder.py +39 -33
- rem/agentic/providers/pydantic_ai.py +67 -50
- rem/api/mcp_router/resources.py +223 -0
- rem/api/mcp_router/tools.py +25 -9
- rem/api/routers/auth.py +112 -9
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/streaming.py +166 -357
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/query.py +5 -14
- rem/cli/commands/ask.py +144 -33
- rem/cli/commands/process.py +9 -1
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/session.py +117 -0
- rem/cli/main.py +2 -0
- rem/models/entities/session.py +1 -0
- rem/services/postgres/repository.py +7 -17
- rem/services/rem/service.py +47 -0
- rem/services/session/compression.py +7 -3
- rem/services/session/pydantic_messages.py +45 -11
- rem/services/session/reload.py +2 -1
- rem/settings.py +43 -0
- rem/sql/migrations/004_cache_system.sql +3 -1
- rem/utils/schema_loader.py +99 -99
- {remdb-0.3.226.dist-info → remdb-0.3.245.dist-info}/METADATA +2 -2
- {remdb-0.3.226.dist-info → remdb-0.3.245.dist-info}/RECORD +29 -26
- {remdb-0.3.226.dist-info → remdb-0.3.245.dist-info}/WHEEL +0 -0
- {remdb-0.3.226.dist-info → remdb-0.3.245.dist-info}/entry_points.txt +0 -0
|
@@ -1,42 +1,36 @@
|
|
|
1
1
|
"""
|
|
2
2
|
OpenAI-compatible streaming relay for Pydantic AI agents.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
-
|
|
21
|
-
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
event: reasoning\\ndata: {"type": "reasoning", "content": "..."}\\n\\n
|
|
29
|
-
event: tool_call\\ndata: {"type": "tool_call", "tool_name": "...", "status": "started"}\\n\\n
|
|
30
|
-
event: progress\\ndata: {"type": "progress", "step": 1, "total_steps": 3}\\n\\n
|
|
31
|
-
event: metadata\\ndata: {"type": "metadata", "confidence": 0.95}\\n\\n
|
|
32
|
-
|
|
33
|
-
See sse_events.py for the full event type definitions.
|
|
4
|
+
Architecture:
|
|
5
|
+
```
|
|
6
|
+
User Request → stream_openai_response → agent.iter() → SSE Events → Client
|
|
7
|
+
│
|
|
8
|
+
├── Parent agent events (text, tool calls)
|
|
9
|
+
│
|
|
10
|
+
└── Child agent events (via ask_agent tool)
|
|
11
|
+
│
|
|
12
|
+
▼
|
|
13
|
+
Event Sink (asyncio.Queue)
|
|
14
|
+
│
|
|
15
|
+
▼
|
|
16
|
+
drain_child_events() → SSE + DB
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Modules:
|
|
20
|
+
- streaming.py: Main workflow orchestrator (this file)
|
|
21
|
+
- streaming_utils.py: Pure utility functions, StreamingState dataclass
|
|
22
|
+
- child_streaming.py: Child agent event handling
|
|
23
|
+
|
|
24
|
+
Key Design Decision (DUPLICATION FIX):
|
|
25
|
+
When child_content is streamed, state.child_content_streamed is set True.
|
|
26
|
+
Parent TextPartDelta events are SKIPPED when this flag is True,
|
|
27
|
+
preventing content from being emitted twice.
|
|
34
28
|
"""
|
|
35
29
|
|
|
36
30
|
from __future__ import annotations
|
|
37
31
|
|
|
32
|
+
import asyncio
|
|
38
33
|
import json
|
|
39
|
-
import time
|
|
40
34
|
import uuid
|
|
41
35
|
from typing import TYPE_CHECKING, AsyncGenerator
|
|
42
36
|
|
|
@@ -54,7 +48,17 @@ from pydantic_ai.messages import (
|
|
|
54
48
|
ToolCallPart,
|
|
55
49
|
)
|
|
56
50
|
|
|
57
|
-
from .
|
|
51
|
+
from .child_streaming import drain_child_events, stream_with_child_events, process_child_event
|
|
52
|
+
from .streaming_utils import (
|
|
53
|
+
StreamingState,
|
|
54
|
+
build_content_chunk,
|
|
55
|
+
build_progress_event,
|
|
56
|
+
build_tool_start_event,
|
|
57
|
+
extract_metadata_from_result,
|
|
58
|
+
extract_tool_args,
|
|
59
|
+
log_tool_call,
|
|
60
|
+
)
|
|
61
|
+
from .otel_utils import get_current_trace_context
|
|
58
62
|
from .models import (
|
|
59
63
|
ChatCompletionMessageDelta,
|
|
60
64
|
ChatCompletionStreamChoice,
|
|
@@ -69,6 +73,9 @@ from .sse_events import (
|
|
|
69
73
|
ToolCallEvent,
|
|
70
74
|
format_sse_event,
|
|
71
75
|
)
|
|
76
|
+
from ....services.session import SessionMessageStore
|
|
77
|
+
from ....settings import settings
|
|
78
|
+
from ....utils.date_utils import to_iso, utc_now
|
|
72
79
|
|
|
73
80
|
if TYPE_CHECKING:
|
|
74
81
|
from ....agentic.context import AgentContext
|
|
@@ -147,35 +154,16 @@ async def stream_openai_response(
|
|
|
147
154
|
event: done
|
|
148
155
|
data: {"type": "done", "reason": "stop"}
|
|
149
156
|
"""
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
is_first_chunk = True
|
|
156
|
-
reasoning_step = 0
|
|
157
|
-
current_step = 0
|
|
158
|
-
total_steps = 3 # Model request, tool execution (optional), final response
|
|
159
|
-
token_count = 0
|
|
160
|
-
|
|
161
|
-
# Track active tool calls for completion events
|
|
162
|
-
# Maps index -> (tool_name, tool_id) for correlating start/end events
|
|
163
|
-
active_tool_calls: dict[int, tuple[str, str]] = {}
|
|
164
|
-
# Queue of tool calls awaiting completion (FIFO for matching)
|
|
165
|
-
pending_tool_completions: list[tuple[str, str]] = []
|
|
166
|
-
# Track if metadata was registered via register_metadata tool
|
|
167
|
-
metadata_registered = False
|
|
168
|
-
# Track which agent is actually responding (may be child agent if delegated)
|
|
169
|
-
responding_agent: str | None = None
|
|
170
|
-
# Track pending tool calls with full data for persistence
|
|
171
|
-
# Maps tool_id -> {"tool_name": str, "tool_id": str, "arguments": dict}
|
|
172
|
-
pending_tool_data: dict[str, dict] = {}
|
|
157
|
+
# Initialize streaming state
|
|
158
|
+
state = StreamingState.create(model=model, request_id=request_id)
|
|
159
|
+
|
|
160
|
+
# Get effective user_id for database operations
|
|
161
|
+
effective_user_id = agent_context.user_id if agent_context else None
|
|
173
162
|
|
|
174
163
|
# Import context functions for multi-agent support
|
|
175
164
|
from ....agentic.context import set_current_context, set_event_sink
|
|
176
165
|
|
|
177
166
|
# Set up context for multi-agent propagation
|
|
178
|
-
# This allows child agents (via ask_agent tool) to access parent context
|
|
179
167
|
previous_context = None
|
|
180
168
|
if agent_context is not None:
|
|
181
169
|
from ....agentic.context import get_current_context
|
|
@@ -183,20 +171,17 @@ async def stream_openai_response(
|
|
|
183
171
|
set_current_context(agent_context)
|
|
184
172
|
|
|
185
173
|
# Set up event sink for child agent event proxying
|
|
186
|
-
# Child agents (via ask_agent) will push their events here
|
|
187
|
-
import asyncio
|
|
188
174
|
child_event_sink: asyncio.Queue = asyncio.Queue()
|
|
189
175
|
set_event_sink(child_event_sink)
|
|
190
176
|
|
|
191
177
|
try:
|
|
192
178
|
# Emit initial progress event
|
|
193
|
-
current_step = 1
|
|
194
|
-
yield
|
|
195
|
-
step=current_step,
|
|
196
|
-
total_steps=total_steps,
|
|
179
|
+
state.current_step = 1
|
|
180
|
+
yield build_progress_event(
|
|
181
|
+
step=state.current_step,
|
|
182
|
+
total_steps=state.total_steps,
|
|
197
183
|
label="Processing request",
|
|
198
|
-
|
|
199
|
-
))
|
|
184
|
+
)
|
|
200
185
|
|
|
201
186
|
# Use agent.iter() to get complete execution with tool calls
|
|
202
187
|
# Pass message_history if available for proper tool call/return pairing
|
|
@@ -222,11 +207,11 @@ async def stream_openai_response(
|
|
|
222
207
|
if isinstance(event, PartStartEvent) and isinstance(
|
|
223
208
|
event.part, ThinkingPart
|
|
224
209
|
):
|
|
225
|
-
reasoning_step += 1
|
|
210
|
+
state.reasoning_step += 1
|
|
226
211
|
if event.part.content:
|
|
227
212
|
yield format_sse_event(ReasoningEvent(
|
|
228
213
|
content=event.part.content,
|
|
229
|
-
step=reasoning_step
|
|
214
|
+
step=state.reasoning_step
|
|
230
215
|
))
|
|
231
216
|
|
|
232
217
|
# Reasoning delta (streaming thinking)
|
|
@@ -236,7 +221,7 @@ async def stream_openai_response(
|
|
|
236
221
|
if event.delta.content_delta:
|
|
237
222
|
yield format_sse_event(ReasoningEvent(
|
|
238
223
|
content=event.delta.content_delta,
|
|
239
|
-
step=reasoning_step
|
|
224
|
+
step=state.reasoning_step
|
|
240
225
|
))
|
|
241
226
|
|
|
242
227
|
# ============================================
|
|
@@ -245,28 +230,11 @@ async def stream_openai_response(
|
|
|
245
230
|
elif isinstance(event, PartStartEvent) and isinstance(
|
|
246
231
|
event.part, TextPart
|
|
247
232
|
):
|
|
248
|
-
#
|
|
233
|
+
# Skip if child already streamed content
|
|
234
|
+
if state.child_content_streamed:
|
|
235
|
+
continue
|
|
249
236
|
if event.part.content:
|
|
250
|
-
|
|
251
|
-
token_count += len(content.split())
|
|
252
|
-
|
|
253
|
-
content_chunk = ChatCompletionStreamResponse(
|
|
254
|
-
id=request_id,
|
|
255
|
-
created=created_at,
|
|
256
|
-
model=model,
|
|
257
|
-
choices=[
|
|
258
|
-
ChatCompletionStreamChoice(
|
|
259
|
-
index=0,
|
|
260
|
-
delta=ChatCompletionMessageDelta(
|
|
261
|
-
role="assistant" if is_first_chunk else None,
|
|
262
|
-
content=content,
|
|
263
|
-
),
|
|
264
|
-
finish_reason=None,
|
|
265
|
-
)
|
|
266
|
-
],
|
|
267
|
-
)
|
|
268
|
-
is_first_chunk = False
|
|
269
|
-
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
237
|
+
yield build_content_chunk(state, event.part.content)
|
|
270
238
|
|
|
271
239
|
# ============================================
|
|
272
240
|
# TOOL CALL START EVENTS
|
|
@@ -276,94 +244,39 @@ async def stream_openai_response(
|
|
|
276
244
|
):
|
|
277
245
|
tool_name = event.part.tool_name
|
|
278
246
|
|
|
279
|
-
# Handle final_result
|
|
280
|
-
# internal tool for structured output
|
|
247
|
+
# Handle final_result (Pydantic AI's internal tool)
|
|
281
248
|
if tool_name == "final_result":
|
|
282
|
-
|
|
283
|
-
args_dict = None
|
|
284
|
-
if event.part.args is not None:
|
|
285
|
-
if hasattr(event.part.args, 'args_dict'):
|
|
286
|
-
args_dict = event.part.args.args_dict
|
|
287
|
-
elif isinstance(event.part.args, dict):
|
|
288
|
-
args_dict = event.part.args
|
|
289
|
-
|
|
249
|
+
args_dict = extract_tool_args(event.part)
|
|
290
250
|
if args_dict:
|
|
291
|
-
# Emit the structured result as JSON content
|
|
292
251
|
result_json = json.dumps(args_dict, indent=2)
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
created=created_at,
|
|
296
|
-
model=model,
|
|
297
|
-
choices=[
|
|
298
|
-
ChatCompletionStreamChoice(
|
|
299
|
-
index=0,
|
|
300
|
-
delta=ChatCompletionMessageDelta(
|
|
301
|
-
role="assistant" if is_first_chunk else None,
|
|
302
|
-
content=result_json,
|
|
303
|
-
),
|
|
304
|
-
finish_reason=None,
|
|
305
|
-
)
|
|
306
|
-
],
|
|
307
|
-
)
|
|
308
|
-
is_first_chunk = False
|
|
309
|
-
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
310
|
-
continue # Skip regular tool call handling
|
|
252
|
+
yield build_content_chunk(state, result_json)
|
|
253
|
+
continue
|
|
311
254
|
|
|
312
255
|
tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
313
|
-
active_tool_calls[event.index] = (tool_name, tool_id)
|
|
314
|
-
|
|
315
|
-
pending_tool_completions.append((tool_name, tool_id))
|
|
316
|
-
|
|
317
|
-
# Emit tool_call SSE event (started)
|
|
318
|
-
# Try to get arguments as dict
|
|
319
|
-
args_dict = None
|
|
320
|
-
if event.part.args is not None:
|
|
321
|
-
if hasattr(event.part.args, 'args_dict'):
|
|
322
|
-
args_dict = event.part.args.args_dict
|
|
323
|
-
elif isinstance(event.part.args, dict):
|
|
324
|
-
args_dict = event.part.args
|
|
325
|
-
elif isinstance(event.part.args, str):
|
|
326
|
-
# Parse JSON string args (common with pydantic-ai)
|
|
327
|
-
try:
|
|
328
|
-
args_dict = json.loads(event.part.args)
|
|
329
|
-
except json.JSONDecodeError:
|
|
330
|
-
logger.warning(f"Failed to parse tool args as JSON: {event.part.args[:100]}")
|
|
331
|
-
|
|
332
|
-
# Log tool call with key parameters
|
|
333
|
-
if args_dict and tool_name == "search_rem":
|
|
334
|
-
query_type = args_dict.get("query_type", "?")
|
|
335
|
-
limit = args_dict.get("limit", 20)
|
|
336
|
-
table = args_dict.get("table", "")
|
|
337
|
-
query_text = args_dict.get("query_text", args_dict.get("entity_key", ""))
|
|
338
|
-
if query_text and len(query_text) > 50:
|
|
339
|
-
query_text = query_text[:50] + "..."
|
|
340
|
-
logger.info(f"🔧 {tool_name} {query_type.upper()} '{query_text}' table={table} limit={limit}")
|
|
341
|
-
else:
|
|
342
|
-
logger.info(f"🔧 {tool_name}")
|
|
256
|
+
state.active_tool_calls[event.index] = (tool_name, tool_id)
|
|
257
|
+
state.pending_tool_completions.append((tool_name, tool_id))
|
|
343
258
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
))
|
|
259
|
+
# Extract and log arguments
|
|
260
|
+
args_dict = extract_tool_args(event.part)
|
|
261
|
+
log_tool_call(tool_name, args_dict)
|
|
262
|
+
|
|
263
|
+
yield build_tool_start_event(tool_name, tool_id, args_dict)
|
|
350
264
|
|
|
351
|
-
# Track
|
|
352
|
-
pending_tool_data[tool_id] = {
|
|
265
|
+
# Track for persistence
|
|
266
|
+
state.pending_tool_data[tool_id] = {
|
|
353
267
|
"tool_name": tool_name,
|
|
354
268
|
"tool_id": tool_id,
|
|
355
269
|
"arguments": args_dict,
|
|
356
270
|
}
|
|
357
271
|
|
|
358
272
|
# Update progress
|
|
359
|
-
current_step = 2
|
|
360
|
-
total_steps = 4
|
|
361
|
-
yield
|
|
362
|
-
step=current_step,
|
|
363
|
-
total_steps=total_steps,
|
|
273
|
+
state.current_step = 2
|
|
274
|
+
state.total_steps = 4
|
|
275
|
+
yield build_progress_event(
|
|
276
|
+
step=state.current_step,
|
|
277
|
+
total_steps=state.total_steps,
|
|
364
278
|
label=f"Calling {tool_name}",
|
|
365
|
-
|
|
366
|
-
))
|
|
279
|
+
)
|
|
367
280
|
|
|
368
281
|
# ============================================
|
|
369
282
|
# TOOL CALL COMPLETION (PartEndEvent)
|
|
@@ -371,28 +284,14 @@ async def stream_openai_response(
|
|
|
371
284
|
elif isinstance(event, PartEndEvent) and isinstance(
|
|
372
285
|
event.part, ToolCallPart
|
|
373
286
|
):
|
|
374
|
-
if event.index in active_tool_calls:
|
|
375
|
-
tool_name, tool_id = active_tool_calls[event.index]
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
args_dict = event.part.args.args_dict
|
|
383
|
-
elif isinstance(event.part.args, dict):
|
|
384
|
-
args_dict = event.part.args
|
|
385
|
-
elif isinstance(event.part.args, str) and event.part.args:
|
|
386
|
-
try:
|
|
387
|
-
args_dict = json.loads(event.part.args)
|
|
388
|
-
except json.JSONDecodeError:
|
|
389
|
-
logger.warning(f"Failed to parse tool args: {event.part.args[:100]}")
|
|
390
|
-
|
|
391
|
-
# Update pending_tool_data with complete args
|
|
392
|
-
if tool_id in pending_tool_data:
|
|
393
|
-
pending_tool_data[tool_id]["arguments"] = args_dict
|
|
394
|
-
|
|
395
|
-
del active_tool_calls[event.index]
|
|
287
|
+
if event.index in state.active_tool_calls:
|
|
288
|
+
tool_name, tool_id = state.active_tool_calls[event.index]
|
|
289
|
+
args_dict = extract_tool_args(event.part)
|
|
290
|
+
|
|
291
|
+
if tool_id in state.pending_tool_data:
|
|
292
|
+
state.pending_tool_data[tool_id]["arguments"] = args_dict
|
|
293
|
+
|
|
294
|
+
del state.active_tool_calls[event.index]
|
|
396
295
|
|
|
397
296
|
# ============================================
|
|
398
297
|
# TEXT CONTENT DELTA
|
|
@@ -400,186 +299,110 @@ async def stream_openai_response(
|
|
|
400
299
|
elif isinstance(event, PartDeltaEvent) and isinstance(
|
|
401
300
|
event.delta, TextPartDelta
|
|
402
301
|
):
|
|
302
|
+
# DUPLICATION FIX: Skip parent text if child already streamed content
|
|
303
|
+
# Child agents stream via child_content events in ask_agent tool.
|
|
304
|
+
# If parent tries to echo that content, skip it.
|
|
305
|
+
if state.child_content_streamed:
|
|
306
|
+
logger.debug("Skipping parent TextPartDelta - child content already streamed")
|
|
307
|
+
continue
|
|
308
|
+
|
|
403
309
|
content = event.delta.content_delta
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
content_chunk = ChatCompletionStreamResponse(
|
|
407
|
-
id=request_id,
|
|
408
|
-
created=created_at,
|
|
409
|
-
model=model,
|
|
410
|
-
choices=[
|
|
411
|
-
ChatCompletionStreamChoice(
|
|
412
|
-
index=0,
|
|
413
|
-
delta=ChatCompletionMessageDelta(
|
|
414
|
-
role="assistant" if is_first_chunk else None,
|
|
415
|
-
content=content,
|
|
416
|
-
),
|
|
417
|
-
finish_reason=None,
|
|
418
|
-
)
|
|
419
|
-
],
|
|
420
|
-
)
|
|
421
|
-
is_first_chunk = False
|
|
422
|
-
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
310
|
+
yield build_content_chunk(state, content)
|
|
423
311
|
|
|
424
312
|
# ============================================
|
|
425
313
|
# TOOL EXECUTION NODE
|
|
426
314
|
# ============================================
|
|
427
315
|
elif Agent.is_call_tools_node(node):
|
|
428
316
|
async with node.stream(agent_run.ctx) as tools_stream:
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
content = child_event.get("content", "")
|
|
456
|
-
if content:
|
|
457
|
-
content_chunk = ChatCompletionStreamResponse(
|
|
458
|
-
id=request_id,
|
|
459
|
-
created=created_at,
|
|
460
|
-
model=model,
|
|
461
|
-
choices=[
|
|
462
|
-
ChatCompletionStreamChoice(
|
|
463
|
-
index=0,
|
|
464
|
-
delta=ChatCompletionMessageDelta(
|
|
465
|
-
role="assistant" if is_first_chunk else None,
|
|
466
|
-
content=content,
|
|
467
|
-
),
|
|
468
|
-
finish_reason=None,
|
|
469
|
-
)
|
|
470
|
-
],
|
|
471
|
-
)
|
|
472
|
-
is_first_chunk = False
|
|
473
|
-
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
474
|
-
elif event_type == "child_tool_result":
|
|
475
|
-
# Emit child tool completion
|
|
476
|
-
result = child_event.get("result", {})
|
|
477
|
-
# Emit metadata event for child agent if it registered metadata
|
|
478
|
-
if isinstance(result, dict) and result.get("_metadata_event"):
|
|
479
|
-
responding_agent = result.get("agent_schema") or responding_agent
|
|
480
|
-
yield format_sse_event(MetadataEvent(
|
|
481
|
-
message_id=message_id,
|
|
482
|
-
session_id=session_id,
|
|
483
|
-
agent_schema=agent_schema,
|
|
484
|
-
responding_agent=responding_agent,
|
|
485
|
-
confidence=result.get("confidence"),
|
|
486
|
-
extra={"risk_level": result.get("risk_level")} if result.get("risk_level") else None,
|
|
487
|
-
))
|
|
488
|
-
yield format_sse_event(ToolCallEvent(
|
|
489
|
-
tool_name=f"{child_agent}:tool",
|
|
490
|
-
tool_id=f"call_{uuid.uuid4().hex[:8]}",
|
|
491
|
-
status="completed",
|
|
492
|
-
result=str(result)[:200] if result else None,
|
|
493
|
-
))
|
|
494
|
-
except Exception as e:
|
|
495
|
-
logger.warning(f"Error processing child event: {e}")
|
|
317
|
+
# Use concurrent multiplexer to handle both tool events
|
|
318
|
+
# and child agent events as they arrive (fixes streaming lag)
|
|
319
|
+
async for event_type, event_data in stream_with_child_events(
|
|
320
|
+
tools_stream=tools_stream,
|
|
321
|
+
child_event_sink=child_event_sink,
|
|
322
|
+
state=state,
|
|
323
|
+
session_id=session_id,
|
|
324
|
+
user_id=effective_user_id,
|
|
325
|
+
message_id=message_id,
|
|
326
|
+
agent_schema=agent_schema,
|
|
327
|
+
):
|
|
328
|
+
# Handle child events (streamed from ask_agent)
|
|
329
|
+
if event_type == "child":
|
|
330
|
+
async for chunk in process_child_event(
|
|
331
|
+
child_event=event_data,
|
|
332
|
+
state=state,
|
|
333
|
+
session_id=session_id,
|
|
334
|
+
user_id=effective_user_id,
|
|
335
|
+
message_id=message_id,
|
|
336
|
+
agent_schema=agent_schema,
|
|
337
|
+
):
|
|
338
|
+
yield chunk
|
|
339
|
+
continue
|
|
340
|
+
|
|
341
|
+
# Handle tool events
|
|
342
|
+
tool_event = event_data
|
|
496
343
|
|
|
497
344
|
# Tool result event - emit completion
|
|
498
345
|
if isinstance(tool_event, FunctionToolResultEvent):
|
|
499
346
|
# Get the tool name/id from the pending queue (FIFO)
|
|
500
|
-
if pending_tool_completions:
|
|
501
|
-
tool_name, tool_id = pending_tool_completions.pop(0)
|
|
347
|
+
if state.pending_tool_completions:
|
|
348
|
+
tool_name, tool_id = state.pending_tool_completions.pop(0)
|
|
502
349
|
else:
|
|
503
|
-
# Fallback if queue is empty (shouldn't happen)
|
|
504
350
|
tool_name = "tool"
|
|
505
351
|
tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
506
352
|
|
|
507
|
-
# Check if this is a register_metadata tool result
|
|
508
|
-
# It returns a dict with _metadata_event: True marker
|
|
509
353
|
result_content = tool_event.result.content if hasattr(tool_event.result, 'content') else tool_event.result
|
|
510
354
|
is_metadata_event = False
|
|
511
355
|
|
|
512
|
-
|
|
356
|
+
# Handle register_metadata tool results
|
|
357
|
+
metadata = extract_metadata_from_result(result_content)
|
|
358
|
+
if metadata:
|
|
513
359
|
is_metadata_event = True
|
|
514
|
-
metadata_registered = True
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
registered_flags = result_content.get("flags")
|
|
520
|
-
# Session naming
|
|
521
|
-
registered_session_name = result_content.get("session_name")
|
|
522
|
-
# Risk assessment fields
|
|
523
|
-
registered_risk_level = result_content.get("risk_level")
|
|
524
|
-
registered_risk_score = result_content.get("risk_score")
|
|
525
|
-
registered_risk_reasoning = result_content.get("risk_reasoning")
|
|
526
|
-
registered_recommended_action = result_content.get("recommended_action")
|
|
527
|
-
# Extra fields
|
|
528
|
-
registered_extra = result_content.get("extra")
|
|
529
|
-
# Only set responding_agent if not already set by child events
|
|
530
|
-
# Child agents should take precedence - they're the actual responders
|
|
531
|
-
if not responding_agent:
|
|
532
|
-
responding_agent = result_content.get("agent_schema")
|
|
360
|
+
state.metadata_registered = True
|
|
361
|
+
|
|
362
|
+
# Only set responding_agent if not already set by child
|
|
363
|
+
if not state.responding_agent and metadata.get("agent_schema"):
|
|
364
|
+
state.responding_agent = metadata["agent_schema"]
|
|
533
365
|
|
|
534
366
|
logger.info(
|
|
535
|
-
f"📊 Metadata
|
|
536
|
-
f"
|
|
537
|
-
f"risk_level={registered_risk_level}, sources={registered_sources}"
|
|
367
|
+
f"📊 Metadata: confidence={metadata.get('confidence')}, "
|
|
368
|
+
f"risk_level={metadata.get('risk_level')}"
|
|
538
369
|
)
|
|
539
370
|
|
|
540
|
-
# Build extra dict with risk fields
|
|
371
|
+
# Build extra dict with risk fields
|
|
541
372
|
extra_data = {}
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
if registered_recommended_action is not None:
|
|
549
|
-
extra_data["recommended_action"] = registered_recommended_action
|
|
550
|
-
if registered_extra:
|
|
551
|
-
extra_data.update(registered_extra)
|
|
552
|
-
|
|
553
|
-
# Emit metadata event immediately
|
|
373
|
+
for field in ["risk_level", "risk_score", "risk_reasoning", "recommended_action"]:
|
|
374
|
+
if metadata.get(field) is not None:
|
|
375
|
+
extra_data[field] = metadata[field]
|
|
376
|
+
if metadata.get("extra"):
|
|
377
|
+
extra_data.update(metadata["extra"])
|
|
378
|
+
|
|
554
379
|
yield format_sse_event(MetadataEvent(
|
|
555
380
|
message_id=message_id,
|
|
556
381
|
in_reply_to=in_reply_to,
|
|
557
382
|
session_id=session_id,
|
|
558
383
|
agent_schema=agent_schema,
|
|
559
|
-
responding_agent=responding_agent,
|
|
560
|
-
session_name=
|
|
561
|
-
confidence=
|
|
562
|
-
sources=
|
|
384
|
+
responding_agent=state.responding_agent,
|
|
385
|
+
session_name=metadata.get("session_name"),
|
|
386
|
+
confidence=metadata.get("confidence"),
|
|
387
|
+
sources=metadata.get("sources"),
|
|
563
388
|
model_version=model,
|
|
564
|
-
flags=
|
|
389
|
+
flags=metadata.get("flags"),
|
|
565
390
|
extra=extra_data if extra_data else None,
|
|
566
391
|
hidden=False,
|
|
567
392
|
))
|
|
568
393
|
|
|
569
|
-
# Get complete args from pending_tool_data
|
|
570
|
-
# (captured at PartEndEvent with full args)
|
|
394
|
+
# Get complete args from pending_tool_data
|
|
571
395
|
completed_args = None
|
|
572
|
-
if tool_id in pending_tool_data:
|
|
573
|
-
completed_args = pending_tool_data[tool_id].get("arguments")
|
|
396
|
+
if tool_id in state.pending_tool_data:
|
|
397
|
+
completed_args = state.pending_tool_data[tool_id].get("arguments")
|
|
574
398
|
|
|
575
|
-
# Capture tool call
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
tool_data = pending_tool_data[tool_id]
|
|
399
|
+
# Capture tool call for persistence
|
|
400
|
+
if tool_calls_out is not None and tool_id in state.pending_tool_data:
|
|
401
|
+
tool_data = state.pending_tool_data[tool_id]
|
|
579
402
|
tool_data["result"] = result_content
|
|
580
403
|
tool_data["is_metadata"] = is_metadata_event
|
|
581
404
|
tool_calls_out.append(tool_data)
|
|
582
|
-
del pending_tool_data[tool_id]
|
|
405
|
+
del state.pending_tool_data[tool_id]
|
|
583
406
|
|
|
584
407
|
if not is_metadata_event:
|
|
585
408
|
# NOTE: text_response fallback is DISABLED
|
|
@@ -629,10 +452,10 @@ async def stream_openai_response(
|
|
|
629
452
|
))
|
|
630
453
|
|
|
631
454
|
# Update progress after tool completion
|
|
632
|
-
current_step = 3
|
|
455
|
+
state.current_step = 3
|
|
633
456
|
yield format_sse_event(ProgressEvent(
|
|
634
|
-
step=current_step,
|
|
635
|
-
total_steps=total_steps,
|
|
457
|
+
step=state.current_step,
|
|
458
|
+
total_steps=state.total_steps,
|
|
636
459
|
label="Generating response",
|
|
637
460
|
status="in_progress"
|
|
638
461
|
))
|
|
@@ -661,36 +484,36 @@ async def stream_openai_response(
|
|
|
661
484
|
result_dict = {"result": str(output)}
|
|
662
485
|
|
|
663
486
|
result_json = json.dumps(result_dict, indent=2, default=str)
|
|
664
|
-
token_count += len(result_json.split())
|
|
487
|
+
state.token_count += len(result_json.split())
|
|
665
488
|
|
|
666
489
|
# Emit structured result as content
|
|
667
490
|
result_chunk = ChatCompletionStreamResponse(
|
|
668
|
-
id=request_id,
|
|
669
|
-
created=created_at,
|
|
491
|
+
id=state.request_id,
|
|
492
|
+
created=state.created_at,
|
|
670
493
|
model=model,
|
|
671
494
|
choices=[
|
|
672
495
|
ChatCompletionStreamChoice(
|
|
673
496
|
index=0,
|
|
674
497
|
delta=ChatCompletionMessageDelta(
|
|
675
|
-
role="assistant" if is_first_chunk else None,
|
|
498
|
+
role="assistant" if state.is_first_chunk else None,
|
|
676
499
|
content=result_json,
|
|
677
500
|
),
|
|
678
501
|
finish_reason=None,
|
|
679
502
|
)
|
|
680
503
|
],
|
|
681
504
|
)
|
|
682
|
-
is_first_chunk = False
|
|
505
|
+
state.is_first_chunk = False
|
|
683
506
|
yield f"data: {result_chunk.model_dump_json()}\n\n"
|
|
684
507
|
except Exception as e:
|
|
685
508
|
logger.debug(f"No structured result available: {e}")
|
|
686
509
|
|
|
687
510
|
# Calculate latency
|
|
688
|
-
latency_ms =
|
|
511
|
+
latency_ms = state.latency_ms()
|
|
689
512
|
|
|
690
513
|
# Final OpenAI chunk with finish_reason
|
|
691
514
|
final_chunk = ChatCompletionStreamResponse(
|
|
692
|
-
id=request_id,
|
|
693
|
-
created=created_at,
|
|
515
|
+
id=state.request_id,
|
|
516
|
+
created=state.created_at,
|
|
694
517
|
model=model,
|
|
695
518
|
choices=[
|
|
696
519
|
ChatCompletionStreamChoice(
|
|
@@ -703,28 +526,28 @@ async def stream_openai_response(
|
|
|
703
526
|
yield f"data: {final_chunk.model_dump_json()}\n\n"
|
|
704
527
|
|
|
705
528
|
# Emit metadata event only if not already registered via register_metadata tool
|
|
706
|
-
if not metadata_registered:
|
|
529
|
+
if not state.metadata_registered:
|
|
707
530
|
yield format_sse_event(MetadataEvent(
|
|
708
531
|
message_id=message_id,
|
|
709
532
|
in_reply_to=in_reply_to,
|
|
710
533
|
session_id=session_id,
|
|
711
534
|
agent_schema=agent_schema,
|
|
712
|
-
responding_agent=responding_agent,
|
|
535
|
+
responding_agent=state.responding_agent,
|
|
713
536
|
confidence=1.0, # Default to 100% confidence
|
|
714
537
|
model_version=model,
|
|
715
538
|
latency_ms=latency_ms,
|
|
716
|
-
token_count=token_count,
|
|
539
|
+
token_count=state.token_count,
|
|
717
540
|
# Include deterministic trace context captured from OTEL
|
|
718
541
|
trace_id=captured_trace_id,
|
|
719
542
|
span_id=captured_span_id,
|
|
720
543
|
))
|
|
721
544
|
|
|
722
545
|
# Mark all progress complete
|
|
723
|
-
for step in range(1, total_steps + 1):
|
|
546
|
+
for step in range(1, state.total_steps + 1):
|
|
724
547
|
yield format_sse_event(ProgressEvent(
|
|
725
548
|
step=step,
|
|
726
|
-
total_steps=total_steps,
|
|
727
|
-
label="Complete" if step == total_steps else f"Step {step}",
|
|
549
|
+
total_steps=state.total_steps,
|
|
550
|
+
label="Complete" if step == state.total_steps else f"Step {step}",
|
|
728
551
|
status="completed"
|
|
729
552
|
))
|
|
730
553
|
|
|
@@ -919,18 +742,8 @@ async def save_user_message(
|
|
|
919
742
|
"""
|
|
920
743
|
Save user message to database before streaming.
|
|
921
744
|
|
|
922
|
-
|
|
923
|
-
user message storage.
|
|
924
|
-
|
|
925
|
-
Args:
|
|
926
|
-
session_id: Session ID for message storage
|
|
927
|
-
user_id: User ID for message storage
|
|
928
|
-
content: The user's message content
|
|
745
|
+
Shared utility used by both API and CLI for consistent user message storage.
|
|
929
746
|
"""
|
|
930
|
-
from ....utils.date_utils import utc_now, to_iso
|
|
931
|
-
from ....services.session import SessionMessageStore
|
|
932
|
-
from ....settings import settings
|
|
933
|
-
|
|
934
747
|
if not settings.postgres.enabled or not session_id:
|
|
935
748
|
return
|
|
936
749
|
|
|
@@ -987,10 +800,6 @@ async def stream_openai_response_with_save(
|
|
|
987
800
|
Yields:
|
|
988
801
|
SSE-formatted strings
|
|
989
802
|
"""
|
|
990
|
-
from ....utils.date_utils import utc_now, to_iso
|
|
991
|
-
from ....services.session import SessionMessageStore
|
|
992
|
-
from ....settings import settings
|
|
993
|
-
|
|
994
803
|
# Pre-generate message_id so it can be sent in metadata event
|
|
995
804
|
# This allows frontend to use it for feedback before DB persistence
|
|
996
805
|
message_id = str(uuid.uuid4())
|