remdb 0.3.180__py3-none-any.whl → 0.3.258__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +36 -2
- rem/agentic/__init__.py +10 -1
- rem/agentic/context.py +185 -1
- rem/agentic/context_builder.py +56 -35
- rem/agentic/mcp/tool_wrapper.py +2 -2
- rem/agentic/providers/pydantic_ai.py +303 -111
- rem/agentic/schema.py +2 -2
- rem/api/main.py +1 -1
- rem/api/mcp_router/resources.py +223 -0
- rem/api/mcp_router/server.py +4 -0
- rem/api/mcp_router/tools.py +608 -166
- rem/api/routers/admin.py +30 -4
- rem/api/routers/auth.py +219 -20
- rem/api/routers/chat/child_streaming.py +393 -0
- rem/api/routers/chat/completions.py +77 -40
- rem/api/routers/chat/sse_events.py +7 -3
- rem/api/routers/chat/streaming.py +381 -291
- rem/api/routers/chat/streaming_utils.py +325 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +7 -1
- rem/api/routers/feedback.py +11 -3
- rem/api/routers/messages.py +176 -38
- rem/api/routers/models.py +9 -1
- rem/api/routers/query.py +17 -15
- rem/api/routers/shared_sessions.py +16 -0
- rem/auth/jwt.py +19 -4
- rem/auth/middleware.py +42 -28
- rem/cli/README.md +62 -0
- rem/cli/commands/ask.py +205 -114
- rem/cli/commands/db.py +55 -31
- rem/cli/commands/experiments.py +1 -1
- rem/cli/commands/process.py +179 -43
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/session.py +117 -0
- rem/cli/main.py +2 -0
- rem/models/core/experiment.py +1 -1
- rem/models/entities/ontology.py +18 -20
- rem/models/entities/session.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +1 -1
- rem/schemas/agents/rem.yaml +1 -1
- rem/schemas/agents/test_orchestrator.yaml +42 -0
- rem/schemas/agents/test_structured_output.yaml +52 -0
- rem/services/content/providers.py +151 -49
- rem/services/content/service.py +18 -5
- rem/services/embeddings/worker.py +26 -12
- rem/services/postgres/__init__.py +28 -3
- rem/services/postgres/diff_service.py +57 -5
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
- rem/services/postgres/register_type.py +11 -10
- rem/services/postgres/repository.py +39 -28
- rem/services/postgres/schema_generator.py +5 -5
- rem/services/postgres/sql_builder.py +6 -5
- rem/services/rem/README.md +4 -3
- rem/services/rem/parser.py +7 -10
- rem/services/rem/service.py +47 -0
- rem/services/session/__init__.py +8 -1
- rem/services/session/compression.py +47 -5
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +2 -1
- rem/settings.py +92 -7
- rem/sql/migrations/001_install.sql +125 -7
- rem/sql/migrations/002_install_models.sql +159 -149
- rem/sql/migrations/004_cache_system.sql +10 -276
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/schema_loader.py +180 -120
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/METADATA +7 -6
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/RECORD +70 -61
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0
|
@@ -1,37 +1,38 @@
|
|
|
1
1
|
"""
|
|
2
2
|
OpenAI-compatible streaming relay for Pydantic AI agents.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
See sse_events.py for the full event type definitions.
|
|
4
|
+
Architecture:
|
|
5
|
+
```
|
|
6
|
+
User Request → stream_openai_response → agent.iter() → SSE Events → Client
|
|
7
|
+
│
|
|
8
|
+
├── Parent agent events (text, tool calls)
|
|
9
|
+
│
|
|
10
|
+
└── Child agent events (via ask_agent tool)
|
|
11
|
+
│
|
|
12
|
+
▼
|
|
13
|
+
Event Sink (asyncio.Queue)
|
|
14
|
+
│
|
|
15
|
+
▼
|
|
16
|
+
drain_child_events() → SSE + DB
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Modules:
|
|
20
|
+
- streaming.py: Main workflow orchestrator (this file)
|
|
21
|
+
- streaming_utils.py: Pure utility functions, StreamingState dataclass
|
|
22
|
+
- child_streaming.py: Child agent event handling
|
|
23
|
+
|
|
24
|
+
Key Design Decision (DUPLICATION FIX):
|
|
25
|
+
When child_content is streamed, state.child_content_streamed is set True.
|
|
26
|
+
Parent TextPartDelta events are SKIPPED when this flag is True,
|
|
27
|
+
preventing content from being emitted twice.
|
|
29
28
|
"""
|
|
30
29
|
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import asyncio
|
|
31
33
|
import json
|
|
32
|
-
import time
|
|
33
34
|
import uuid
|
|
34
|
-
from typing import AsyncGenerator
|
|
35
|
+
from typing import TYPE_CHECKING, AsyncGenerator
|
|
35
36
|
|
|
36
37
|
from loguru import logger
|
|
37
38
|
from pydantic_ai.agent import Agent
|
|
@@ -47,7 +48,17 @@ from pydantic_ai.messages import (
|
|
|
47
48
|
ToolCallPart,
|
|
48
49
|
)
|
|
49
50
|
|
|
50
|
-
from .
|
|
51
|
+
from .child_streaming import drain_child_events, stream_with_child_events, process_child_event
|
|
52
|
+
from .streaming_utils import (
|
|
53
|
+
StreamingState,
|
|
54
|
+
build_content_chunk,
|
|
55
|
+
build_progress_event,
|
|
56
|
+
build_tool_start_event,
|
|
57
|
+
extract_metadata_from_result,
|
|
58
|
+
extract_tool_args,
|
|
59
|
+
log_tool_call,
|
|
60
|
+
)
|
|
61
|
+
from .otel_utils import get_current_trace_context
|
|
51
62
|
from .models import (
|
|
52
63
|
ChatCompletionMessageDelta,
|
|
53
64
|
ChatCompletionStreamChoice,
|
|
@@ -55,12 +66,19 @@ from .models import (
|
|
|
55
66
|
)
|
|
56
67
|
from .sse_events import (
|
|
57
68
|
DoneEvent,
|
|
69
|
+
ErrorEvent,
|
|
58
70
|
MetadataEvent,
|
|
59
71
|
ProgressEvent,
|
|
60
72
|
ReasoningEvent,
|
|
61
73
|
ToolCallEvent,
|
|
62
74
|
format_sse_event,
|
|
63
75
|
)
|
|
76
|
+
from ....services.session import SessionMessageStore
|
|
77
|
+
from ....settings import settings
|
|
78
|
+
from ....utils.date_utils import to_iso, utc_now
|
|
79
|
+
|
|
80
|
+
if TYPE_CHECKING:
|
|
81
|
+
from ....agentic.context import AgentContext
|
|
64
82
|
|
|
65
83
|
|
|
66
84
|
async def stream_openai_response(
|
|
@@ -79,6 +97,11 @@ async def stream_openai_response(
|
|
|
79
97
|
# Mutable container to capture tool calls for persistence
|
|
80
98
|
# Format: list of {"tool_name": str, "tool_id": str, "arguments": dict, "result": any}
|
|
81
99
|
tool_calls_out: list | None = None,
|
|
100
|
+
# Agent context for multi-agent propagation
|
|
101
|
+
# When set, enables child agents to access parent context via get_current_context()
|
|
102
|
+
agent_context: "AgentContext | None" = None,
|
|
103
|
+
# Pydantic-ai native message history for proper tool call/return pairing
|
|
104
|
+
message_history: list | None = None,
|
|
82
105
|
) -> AsyncGenerator[str, None]:
|
|
83
106
|
"""
|
|
84
107
|
Stream Pydantic AI agent responses with rich SSE events.
|
|
@@ -131,40 +154,39 @@ async def stream_openai_response(
|
|
|
131
154
|
event: done
|
|
132
155
|
data: {"type": "done", "reason": "stop"}
|
|
133
156
|
"""
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
#
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
# Maps tool_id -> {"tool_name": str, "tool_id": str, "arguments": dict}
|
|
154
|
-
pending_tool_data: dict[str, dict] = {}
|
|
157
|
+
# Initialize streaming state
|
|
158
|
+
state = StreamingState.create(model=model, request_id=request_id)
|
|
159
|
+
|
|
160
|
+
# Get effective user_id for database operations
|
|
161
|
+
effective_user_id = agent_context.user_id if agent_context else None
|
|
162
|
+
|
|
163
|
+
# Import context functions for multi-agent support
|
|
164
|
+
from ....agentic.context import set_current_context, set_event_sink
|
|
165
|
+
|
|
166
|
+
# Set up context for multi-agent propagation
|
|
167
|
+
previous_context = None
|
|
168
|
+
if agent_context is not None:
|
|
169
|
+
from ....agentic.context import get_current_context
|
|
170
|
+
previous_context = get_current_context()
|
|
171
|
+
set_current_context(agent_context)
|
|
172
|
+
|
|
173
|
+
# Set up event sink for child agent event proxying
|
|
174
|
+
child_event_sink: asyncio.Queue = asyncio.Queue()
|
|
175
|
+
set_event_sink(child_event_sink)
|
|
155
176
|
|
|
156
177
|
try:
|
|
157
178
|
# Emit initial progress event
|
|
158
|
-
current_step = 1
|
|
159
|
-
yield
|
|
160
|
-
step=current_step,
|
|
161
|
-
total_steps=total_steps,
|
|
179
|
+
state.current_step = 1
|
|
180
|
+
yield build_progress_event(
|
|
181
|
+
step=state.current_step,
|
|
182
|
+
total_steps=state.total_steps,
|
|
162
183
|
label="Processing request",
|
|
163
|
-
|
|
164
|
-
))
|
|
184
|
+
)
|
|
165
185
|
|
|
166
186
|
# Use agent.iter() to get complete execution with tool calls
|
|
167
|
-
|
|
187
|
+
# Pass message_history if available for proper tool call/return pairing
|
|
188
|
+
iter_kwargs = {"message_history": message_history} if message_history else {}
|
|
189
|
+
async with agent.iter(prompt, **iter_kwargs) as agent_run:
|
|
168
190
|
# Capture trace context IMMEDIATELY inside agent execution
|
|
169
191
|
# This is deterministic - it's the OTEL context from Pydantic AI instrumentation
|
|
170
192
|
# NOT dependent on any AI-generated content
|
|
@@ -185,11 +207,11 @@ async def stream_openai_response(
|
|
|
185
207
|
if isinstance(event, PartStartEvent) and isinstance(
|
|
186
208
|
event.part, ThinkingPart
|
|
187
209
|
):
|
|
188
|
-
reasoning_step += 1
|
|
210
|
+
state.reasoning_step += 1
|
|
189
211
|
if event.part.content:
|
|
190
212
|
yield format_sse_event(ReasoningEvent(
|
|
191
213
|
content=event.part.content,
|
|
192
|
-
step=reasoning_step
|
|
214
|
+
step=state.reasoning_step
|
|
193
215
|
))
|
|
194
216
|
|
|
195
217
|
# Reasoning delta (streaming thinking)
|
|
@@ -199,7 +221,7 @@ async def stream_openai_response(
|
|
|
199
221
|
if event.delta.content_delta:
|
|
200
222
|
yield format_sse_event(ReasoningEvent(
|
|
201
223
|
content=event.delta.content_delta,
|
|
202
|
-
step=reasoning_step
|
|
224
|
+
step=state.reasoning_step
|
|
203
225
|
))
|
|
204
226
|
|
|
205
227
|
# ============================================
|
|
@@ -208,28 +230,11 @@ async def stream_openai_response(
|
|
|
208
230
|
elif isinstance(event, PartStartEvent) and isinstance(
|
|
209
231
|
event.part, TextPart
|
|
210
232
|
):
|
|
211
|
-
#
|
|
233
|
+
# Skip if child already streamed content
|
|
234
|
+
if state.child_content_streamed:
|
|
235
|
+
continue
|
|
212
236
|
if event.part.content:
|
|
213
|
-
|
|
214
|
-
token_count += len(content.split())
|
|
215
|
-
|
|
216
|
-
content_chunk = ChatCompletionStreamResponse(
|
|
217
|
-
id=request_id,
|
|
218
|
-
created=created_at,
|
|
219
|
-
model=model,
|
|
220
|
-
choices=[
|
|
221
|
-
ChatCompletionStreamChoice(
|
|
222
|
-
index=0,
|
|
223
|
-
delta=ChatCompletionMessageDelta(
|
|
224
|
-
role="assistant" if is_first_chunk else None,
|
|
225
|
-
content=content,
|
|
226
|
-
),
|
|
227
|
-
finish_reason=None,
|
|
228
|
-
)
|
|
229
|
-
],
|
|
230
|
-
)
|
|
231
|
-
is_first_chunk = False
|
|
232
|
-
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
237
|
+
yield build_content_chunk(state, event.part.content)
|
|
233
238
|
|
|
234
239
|
# ============================================
|
|
235
240
|
# TOOL CALL START EVENTS
|
|
@@ -239,88 +244,39 @@ async def stream_openai_response(
|
|
|
239
244
|
):
|
|
240
245
|
tool_name = event.part.tool_name
|
|
241
246
|
|
|
242
|
-
# Handle final_result
|
|
243
|
-
# internal tool for structured output
|
|
247
|
+
# Handle final_result (Pydantic AI's internal tool)
|
|
244
248
|
if tool_name == "final_result":
|
|
245
|
-
|
|
246
|
-
args_dict = None
|
|
247
|
-
if event.part.args is not None:
|
|
248
|
-
if hasattr(event.part.args, 'args_dict'):
|
|
249
|
-
args_dict = event.part.args.args_dict
|
|
250
|
-
elif isinstance(event.part.args, dict):
|
|
251
|
-
args_dict = event.part.args
|
|
252
|
-
|
|
249
|
+
args_dict = extract_tool_args(event.part)
|
|
253
250
|
if args_dict:
|
|
254
|
-
# Emit the structured result as JSON content
|
|
255
251
|
result_json = json.dumps(args_dict, indent=2)
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
created=created_at,
|
|
259
|
-
model=model,
|
|
260
|
-
choices=[
|
|
261
|
-
ChatCompletionStreamChoice(
|
|
262
|
-
index=0,
|
|
263
|
-
delta=ChatCompletionMessageDelta(
|
|
264
|
-
role="assistant" if is_first_chunk else None,
|
|
265
|
-
content=result_json,
|
|
266
|
-
),
|
|
267
|
-
finish_reason=None,
|
|
268
|
-
)
|
|
269
|
-
],
|
|
270
|
-
)
|
|
271
|
-
is_first_chunk = False
|
|
272
|
-
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
273
|
-
continue # Skip regular tool call handling
|
|
252
|
+
yield build_content_chunk(state, result_json)
|
|
253
|
+
continue
|
|
274
254
|
|
|
275
255
|
tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
276
|
-
active_tool_calls[event.index] = (tool_name, tool_id)
|
|
277
|
-
|
|
278
|
-
pending_tool_completions.append((tool_name, tool_id))
|
|
279
|
-
|
|
280
|
-
# Emit tool_call SSE event (started)
|
|
281
|
-
# Try to get arguments as dict
|
|
282
|
-
args_dict = None
|
|
283
|
-
if event.part.args is not None:
|
|
284
|
-
if hasattr(event.part.args, 'args_dict'):
|
|
285
|
-
args_dict = event.part.args.args_dict
|
|
286
|
-
elif isinstance(event.part.args, dict):
|
|
287
|
-
args_dict = event.part.args
|
|
288
|
-
|
|
289
|
-
# Log tool call with key parameters
|
|
290
|
-
if args_dict and tool_name == "search_rem":
|
|
291
|
-
query_type = args_dict.get("query_type", "?")
|
|
292
|
-
limit = args_dict.get("limit", 20)
|
|
293
|
-
table = args_dict.get("table", "")
|
|
294
|
-
query_text = args_dict.get("query_text", args_dict.get("entity_key", ""))
|
|
295
|
-
if query_text and len(query_text) > 50:
|
|
296
|
-
query_text = query_text[:50] + "..."
|
|
297
|
-
logger.info(f"🔧 {tool_name} {query_type.upper()} '{query_text}' table={table} limit={limit}")
|
|
298
|
-
else:
|
|
299
|
-
logger.info(f"🔧 {tool_name}")
|
|
256
|
+
state.active_tool_calls[event.index] = (tool_name, tool_id)
|
|
257
|
+
state.pending_tool_completions.append((tool_name, tool_id))
|
|
300
258
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
status="started",
|
|
305
|
-
arguments=args_dict
|
|
306
|
-
))
|
|
259
|
+
# Extract and log arguments
|
|
260
|
+
args_dict = extract_tool_args(event.part)
|
|
261
|
+
log_tool_call(tool_name, args_dict)
|
|
307
262
|
|
|
308
|
-
|
|
309
|
-
|
|
263
|
+
yield build_tool_start_event(tool_name, tool_id, args_dict)
|
|
264
|
+
|
|
265
|
+
# Track for persistence
|
|
266
|
+
state.pending_tool_data[tool_id] = {
|
|
310
267
|
"tool_name": tool_name,
|
|
311
268
|
"tool_id": tool_id,
|
|
312
269
|
"arguments": args_dict,
|
|
313
270
|
}
|
|
314
271
|
|
|
315
272
|
# Update progress
|
|
316
|
-
current_step = 2
|
|
317
|
-
total_steps = 4
|
|
318
|
-
yield
|
|
319
|
-
step=current_step,
|
|
320
|
-
total_steps=total_steps,
|
|
273
|
+
state.current_step = 2
|
|
274
|
+
state.total_steps = 4
|
|
275
|
+
yield build_progress_event(
|
|
276
|
+
step=state.current_step,
|
|
277
|
+
total_steps=state.total_steps,
|
|
321
278
|
label=f"Calling {tool_name}",
|
|
322
|
-
|
|
323
|
-
))
|
|
279
|
+
)
|
|
324
280
|
|
|
325
281
|
# ============================================
|
|
326
282
|
# TOOL CALL COMPLETION (PartEndEvent)
|
|
@@ -328,11 +284,14 @@ async def stream_openai_response(
|
|
|
328
284
|
elif isinstance(event, PartEndEvent) and isinstance(
|
|
329
285
|
event.part, ToolCallPart
|
|
330
286
|
):
|
|
331
|
-
if event.index in active_tool_calls:
|
|
332
|
-
tool_name, tool_id = active_tool_calls[event.index]
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
287
|
+
if event.index in state.active_tool_calls:
|
|
288
|
+
tool_name, tool_id = state.active_tool_calls[event.index]
|
|
289
|
+
args_dict = extract_tool_args(event.part)
|
|
290
|
+
|
|
291
|
+
if tool_id in state.pending_tool_data:
|
|
292
|
+
state.pending_tool_data[tool_id]["arguments"] = args_dict
|
|
293
|
+
|
|
294
|
+
del state.active_tool_calls[event.index]
|
|
336
295
|
|
|
337
296
|
# ============================================
|
|
338
297
|
# TEXT CONTENT DELTA
|
|
@@ -340,151 +299,158 @@ async def stream_openai_response(
|
|
|
340
299
|
elif isinstance(event, PartDeltaEvent) and isinstance(
|
|
341
300
|
event.delta, TextPartDelta
|
|
342
301
|
):
|
|
302
|
+
# DUPLICATION FIX: Skip parent text if child already streamed content
|
|
303
|
+
# Child agents stream via child_content events in ask_agent tool.
|
|
304
|
+
# If parent tries to echo that content, skip it.
|
|
305
|
+
if state.child_content_streamed:
|
|
306
|
+
logger.debug("Skipping parent TextPartDelta - child content already streamed")
|
|
307
|
+
continue
|
|
308
|
+
|
|
343
309
|
content = event.delta.content_delta
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
content_chunk = ChatCompletionStreamResponse(
|
|
347
|
-
id=request_id,
|
|
348
|
-
created=created_at,
|
|
349
|
-
model=model,
|
|
350
|
-
choices=[
|
|
351
|
-
ChatCompletionStreamChoice(
|
|
352
|
-
index=0,
|
|
353
|
-
delta=ChatCompletionMessageDelta(
|
|
354
|
-
role="assistant" if is_first_chunk else None,
|
|
355
|
-
content=content,
|
|
356
|
-
),
|
|
357
|
-
finish_reason=None,
|
|
358
|
-
)
|
|
359
|
-
],
|
|
360
|
-
)
|
|
361
|
-
is_first_chunk = False
|
|
362
|
-
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
310
|
+
yield build_content_chunk(state, content)
|
|
363
311
|
|
|
364
312
|
# ============================================
|
|
365
313
|
# TOOL EXECUTION NODE
|
|
366
314
|
# ============================================
|
|
367
315
|
elif Agent.is_call_tools_node(node):
|
|
368
316
|
async with node.stream(agent_run.ctx) as tools_stream:
|
|
369
|
-
|
|
317
|
+
# Use concurrent multiplexer to handle both tool events
|
|
318
|
+
# and child agent events as they arrive (fixes streaming lag)
|
|
319
|
+
async for event_type, event_data in stream_with_child_events(
|
|
320
|
+
tools_stream=tools_stream,
|
|
321
|
+
child_event_sink=child_event_sink,
|
|
322
|
+
state=state,
|
|
323
|
+
session_id=session_id,
|
|
324
|
+
user_id=effective_user_id,
|
|
325
|
+
message_id=message_id,
|
|
326
|
+
agent_schema=agent_schema,
|
|
327
|
+
):
|
|
328
|
+
# Handle child events (streamed from ask_agent)
|
|
329
|
+
if event_type == "child":
|
|
330
|
+
async for chunk in process_child_event(
|
|
331
|
+
child_event=event_data,
|
|
332
|
+
state=state,
|
|
333
|
+
session_id=session_id,
|
|
334
|
+
user_id=effective_user_id,
|
|
335
|
+
message_id=message_id,
|
|
336
|
+
agent_schema=agent_schema,
|
|
337
|
+
):
|
|
338
|
+
yield chunk
|
|
339
|
+
continue
|
|
340
|
+
|
|
341
|
+
# Handle tool events
|
|
342
|
+
tool_event = event_data
|
|
343
|
+
|
|
370
344
|
# Tool result event - emit completion
|
|
371
345
|
if isinstance(tool_event, FunctionToolResultEvent):
|
|
372
346
|
# Get the tool name/id from the pending queue (FIFO)
|
|
373
|
-
if pending_tool_completions:
|
|
374
|
-
tool_name, tool_id = pending_tool_completions.pop(0)
|
|
347
|
+
if state.pending_tool_completions:
|
|
348
|
+
tool_name, tool_id = state.pending_tool_completions.pop(0)
|
|
375
349
|
else:
|
|
376
|
-
# Fallback if queue is empty (shouldn't happen)
|
|
377
350
|
tool_name = "tool"
|
|
378
351
|
tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
379
352
|
|
|
380
|
-
# Check if this is a register_metadata tool result
|
|
381
|
-
# It returns a dict with _metadata_event: True marker
|
|
382
353
|
result_content = tool_event.result.content if hasattr(tool_event.result, 'content') else tool_event.result
|
|
383
354
|
is_metadata_event = False
|
|
384
355
|
|
|
385
|
-
|
|
356
|
+
# Handle register_metadata tool results
|
|
357
|
+
metadata = extract_metadata_from_result(result_content)
|
|
358
|
+
if metadata:
|
|
386
359
|
is_metadata_event = True
|
|
387
|
-
metadata_registered = True
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
registered_flags = result_content.get("flags")
|
|
393
|
-
# Session naming
|
|
394
|
-
registered_session_name = result_content.get("session_name")
|
|
395
|
-
# Risk assessment fields
|
|
396
|
-
registered_risk_level = result_content.get("risk_level")
|
|
397
|
-
registered_risk_score = result_content.get("risk_score")
|
|
398
|
-
registered_risk_reasoning = result_content.get("risk_reasoning")
|
|
399
|
-
registered_recommended_action = result_content.get("recommended_action")
|
|
400
|
-
# Extra fields
|
|
401
|
-
registered_extra = result_content.get("extra")
|
|
360
|
+
state.metadata_registered = True
|
|
361
|
+
|
|
362
|
+
# Only set responding_agent if not already set by child
|
|
363
|
+
if not state.responding_agent and metadata.get("agent_schema"):
|
|
364
|
+
state.responding_agent = metadata["agent_schema"]
|
|
402
365
|
|
|
403
366
|
logger.info(
|
|
404
|
-
f"📊 Metadata
|
|
405
|
-
f"
|
|
406
|
-
f"risk_level={registered_risk_level}, sources={registered_sources}"
|
|
367
|
+
f"📊 Metadata: confidence={metadata.get('confidence')}, "
|
|
368
|
+
f"risk_level={metadata.get('risk_level')}"
|
|
407
369
|
)
|
|
408
370
|
|
|
409
|
-
# Build extra dict with risk fields
|
|
371
|
+
# Build extra dict with risk fields
|
|
410
372
|
extra_data = {}
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
if registered_recommended_action is not None:
|
|
418
|
-
extra_data["recommended_action"] = registered_recommended_action
|
|
419
|
-
if registered_extra:
|
|
420
|
-
extra_data.update(registered_extra)
|
|
421
|
-
|
|
422
|
-
# Emit metadata event immediately
|
|
373
|
+
for field in ["risk_level", "risk_score", "risk_reasoning", "recommended_action"]:
|
|
374
|
+
if metadata.get(field) is not None:
|
|
375
|
+
extra_data[field] = metadata[field]
|
|
376
|
+
if metadata.get("extra"):
|
|
377
|
+
extra_data.update(metadata["extra"])
|
|
378
|
+
|
|
423
379
|
yield format_sse_event(MetadataEvent(
|
|
424
380
|
message_id=message_id,
|
|
425
381
|
in_reply_to=in_reply_to,
|
|
426
382
|
session_id=session_id,
|
|
427
383
|
agent_schema=agent_schema,
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
384
|
+
responding_agent=state.responding_agent,
|
|
385
|
+
session_name=metadata.get("session_name"),
|
|
386
|
+
confidence=metadata.get("confidence"),
|
|
387
|
+
sources=metadata.get("sources"),
|
|
431
388
|
model_version=model,
|
|
432
|
-
flags=
|
|
389
|
+
flags=metadata.get("flags"),
|
|
433
390
|
extra=extra_data if extra_data else None,
|
|
434
391
|
hidden=False,
|
|
435
392
|
))
|
|
436
393
|
|
|
437
|
-
#
|
|
438
|
-
|
|
439
|
-
if
|
|
440
|
-
|
|
394
|
+
# Get complete args from pending_tool_data
|
|
395
|
+
completed_args = None
|
|
396
|
+
if tool_id in state.pending_tool_data:
|
|
397
|
+
completed_args = state.pending_tool_data[tool_id].get("arguments")
|
|
398
|
+
|
|
399
|
+
# Capture tool call for persistence
|
|
400
|
+
if tool_calls_out is not None and tool_id in state.pending_tool_data:
|
|
401
|
+
tool_data = state.pending_tool_data[tool_id]
|
|
441
402
|
tool_data["result"] = result_content
|
|
442
403
|
tool_data["is_metadata"] = is_metadata_event
|
|
443
404
|
tool_calls_out.append(tool_data)
|
|
444
|
-
del pending_tool_data[tool_id]
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
405
|
+
del state.pending_tool_data[tool_id]
|
|
406
|
+
|
|
407
|
+
# Always emit ToolCallEvent completion for frontend tracking
|
|
408
|
+
# Send full result for dict/list types, stringify others
|
|
409
|
+
if isinstance(result_content, (dict, list)):
|
|
410
|
+
result_for_sse = result_content
|
|
411
|
+
else:
|
|
412
|
+
result_for_sse = str(result_content)
|
|
413
|
+
|
|
414
|
+
# Log result count for search_rem
|
|
415
|
+
if tool_name == "search_rem" and isinstance(result_content, dict):
|
|
416
|
+
results = result_content.get("results", {})
|
|
417
|
+
# Handle nested result structure: results may be a dict with 'results' list and 'count'
|
|
418
|
+
if isinstance(results, dict):
|
|
419
|
+
count = results.get("count", len(results.get("results", [])))
|
|
420
|
+
query_type = results.get("query_type", "?")
|
|
421
|
+
query_text = results.get("query_text", results.get("key", ""))
|
|
422
|
+
table = results.get("table_name", "")
|
|
423
|
+
elif isinstance(results, list):
|
|
424
|
+
count = len(results)
|
|
425
|
+
query_type = "?"
|
|
426
|
+
query_text = ""
|
|
427
|
+
table = ""
|
|
428
|
+
else:
|
|
429
|
+
count = "?"
|
|
430
|
+
query_type = "?"
|
|
431
|
+
query_text = ""
|
|
432
|
+
table = ""
|
|
433
|
+
status = result_content.get("status", "unknown")
|
|
434
|
+
# Truncate query text for logging
|
|
435
|
+
if query_text and len(str(query_text)) > 40:
|
|
436
|
+
query_text = str(query_text)[:40] + "..."
|
|
437
|
+
logger.info(f" ↳ {tool_name} {query_type} '{query_text}' table={table} → {count} results")
|
|
438
|
+
|
|
439
|
+
# Always emit ToolCallEvent completion for frontend tracking
|
|
440
|
+
# This includes register_metadata calls so they turn green in the UI
|
|
441
|
+
yield format_sse_event(ToolCallEvent(
|
|
442
|
+
tool_name=tool_name,
|
|
443
|
+
tool_id=tool_id,
|
|
444
|
+
status="completed",
|
|
445
|
+
arguments=completed_args,
|
|
446
|
+
result=result_for_sse
|
|
447
|
+
))
|
|
482
448
|
|
|
483
449
|
# Update progress after tool completion
|
|
484
|
-
current_step = 3
|
|
450
|
+
state.current_step = 3
|
|
485
451
|
yield format_sse_event(ProgressEvent(
|
|
486
|
-
step=current_step,
|
|
487
|
-
total_steps=total_steps,
|
|
452
|
+
step=state.current_step,
|
|
453
|
+
total_steps=state.total_steps,
|
|
488
454
|
label="Generating response",
|
|
489
455
|
status="in_progress"
|
|
490
456
|
))
|
|
@@ -513,36 +479,36 @@ async def stream_openai_response(
|
|
|
513
479
|
result_dict = {"result": str(output)}
|
|
514
480
|
|
|
515
481
|
result_json = json.dumps(result_dict, indent=2, default=str)
|
|
516
|
-
token_count += len(result_json.split())
|
|
482
|
+
state.token_count += len(result_json.split())
|
|
517
483
|
|
|
518
484
|
# Emit structured result as content
|
|
519
485
|
result_chunk = ChatCompletionStreamResponse(
|
|
520
|
-
id=request_id,
|
|
521
|
-
created=created_at,
|
|
486
|
+
id=state.request_id,
|
|
487
|
+
created=state.created_at,
|
|
522
488
|
model=model,
|
|
523
489
|
choices=[
|
|
524
490
|
ChatCompletionStreamChoice(
|
|
525
491
|
index=0,
|
|
526
492
|
delta=ChatCompletionMessageDelta(
|
|
527
|
-
role="assistant" if is_first_chunk else None,
|
|
493
|
+
role="assistant" if state.is_first_chunk else None,
|
|
528
494
|
content=result_json,
|
|
529
495
|
),
|
|
530
496
|
finish_reason=None,
|
|
531
497
|
)
|
|
532
498
|
],
|
|
533
499
|
)
|
|
534
|
-
is_first_chunk = False
|
|
500
|
+
state.is_first_chunk = False
|
|
535
501
|
yield f"data: {result_chunk.model_dump_json()}\n\n"
|
|
536
502
|
except Exception as e:
|
|
537
503
|
logger.debug(f"No structured result available: {e}")
|
|
538
504
|
|
|
539
505
|
# Calculate latency
|
|
540
|
-
latency_ms =
|
|
506
|
+
latency_ms = state.latency_ms()
|
|
541
507
|
|
|
542
508
|
# Final OpenAI chunk with finish_reason
|
|
543
509
|
final_chunk = ChatCompletionStreamResponse(
|
|
544
|
-
id=request_id,
|
|
545
|
-
created=created_at,
|
|
510
|
+
id=state.request_id,
|
|
511
|
+
created=state.created_at,
|
|
546
512
|
model=model,
|
|
547
513
|
choices=[
|
|
548
514
|
ChatCompletionStreamChoice(
|
|
@@ -555,27 +521,28 @@ async def stream_openai_response(
|
|
|
555
521
|
yield f"data: {final_chunk.model_dump_json()}\n\n"
|
|
556
522
|
|
|
557
523
|
# Emit metadata event only if not already registered via register_metadata tool
|
|
558
|
-
if not metadata_registered:
|
|
524
|
+
if not state.metadata_registered:
|
|
559
525
|
yield format_sse_event(MetadataEvent(
|
|
560
526
|
message_id=message_id,
|
|
561
527
|
in_reply_to=in_reply_to,
|
|
562
528
|
session_id=session_id,
|
|
563
529
|
agent_schema=agent_schema,
|
|
530
|
+
responding_agent=state.responding_agent,
|
|
564
531
|
confidence=1.0, # Default to 100% confidence
|
|
565
532
|
model_version=model,
|
|
566
533
|
latency_ms=latency_ms,
|
|
567
|
-
token_count=token_count,
|
|
534
|
+
token_count=state.token_count,
|
|
568
535
|
# Include deterministic trace context captured from OTEL
|
|
569
536
|
trace_id=captured_trace_id,
|
|
570
537
|
span_id=captured_span_id,
|
|
571
538
|
))
|
|
572
539
|
|
|
573
540
|
# Mark all progress complete
|
|
574
|
-
for step in range(1, total_steps + 1):
|
|
541
|
+
for step in range(1, state.total_steps + 1):
|
|
575
542
|
yield format_sse_event(ProgressEvent(
|
|
576
543
|
step=step,
|
|
577
|
-
total_steps=total_steps,
|
|
578
|
-
label="Complete" if step == total_steps else f"Step {step}",
|
|
544
|
+
total_steps=state.total_steps,
|
|
545
|
+
label="Complete" if step == state.total_steps else f"Step {step}",
|
|
579
546
|
status="completed"
|
|
580
547
|
))
|
|
581
548
|
|
|
@@ -587,25 +554,79 @@ async def stream_openai_response(
|
|
|
587
554
|
|
|
588
555
|
except Exception as e:
|
|
589
556
|
import traceback
|
|
557
|
+
import re
|
|
590
558
|
|
|
591
559
|
error_msg = str(e)
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
560
|
+
|
|
561
|
+
# Parse error details for better client handling
|
|
562
|
+
error_code = "stream_error"
|
|
563
|
+
error_details: dict = {}
|
|
564
|
+
recoverable = True
|
|
565
|
+
|
|
566
|
+
# Check for rate limit errors (OpenAI 429)
|
|
567
|
+
if "429" in error_msg or "rate_limit" in error_msg.lower() or "RateLimitError" in type(e).__name__:
|
|
568
|
+
error_code = "rate_limit_exceeded"
|
|
569
|
+
recoverable = True
|
|
570
|
+
|
|
571
|
+
# Extract retry-after time from error message
|
|
572
|
+
# Pattern: "Please try again in X.XXs" or "Please try again in Xs"
|
|
573
|
+
retry_match = re.search(r"try again in (\d+(?:\.\d+)?)\s*s", error_msg)
|
|
574
|
+
if retry_match:
|
|
575
|
+
retry_seconds = float(retry_match.group(1))
|
|
576
|
+
error_details["retry_after_seconds"] = retry_seconds
|
|
577
|
+
error_details["retry_after_ms"] = int(retry_seconds * 1000)
|
|
578
|
+
|
|
579
|
+
# Extract token usage info if available
|
|
580
|
+
used_match = re.search(r"Used (\d+)", error_msg)
|
|
581
|
+
limit_match = re.search(r"Limit (\d+)", error_msg)
|
|
582
|
+
requested_match = re.search(r"Requested (\d+)", error_msg)
|
|
583
|
+
if used_match:
|
|
584
|
+
error_details["tokens_used"] = int(used_match.group(1))
|
|
585
|
+
if limit_match:
|
|
586
|
+
error_details["tokens_limit"] = int(limit_match.group(1))
|
|
587
|
+
if requested_match:
|
|
588
|
+
error_details["tokens_requested"] = int(requested_match.group(1))
|
|
589
|
+
|
|
590
|
+
logger.error(f"🔴 Streaming error: status_code: 429, model_name: {model}, body: {error_msg[:200]}")
|
|
591
|
+
|
|
592
|
+
# Check for authentication errors
|
|
593
|
+
elif "401" in error_msg or "AuthenticationError" in type(e).__name__:
|
|
594
|
+
error_code = "authentication_error"
|
|
595
|
+
recoverable = False
|
|
596
|
+
logger.error(f"🔴 Streaming error: Authentication failed")
|
|
597
|
+
|
|
598
|
+
# Check for model not found / invalid model
|
|
599
|
+
elif "404" in error_msg or "model" in error_msg.lower() and "not found" in error_msg.lower():
|
|
600
|
+
error_code = "model_not_found"
|
|
601
|
+
recoverable = False
|
|
602
|
+
logger.error(f"🔴 Streaming error: Model not found")
|
|
603
|
+
|
|
604
|
+
# Generic error
|
|
605
|
+
else:
|
|
606
|
+
logger.error(f"🔴 Streaming error: {error_msg}")
|
|
607
|
+
|
|
608
|
+
logger.error(f"🔴 {traceback.format_exc()}")
|
|
609
|
+
|
|
610
|
+
# Emit proper ErrorEvent via SSE (with event: prefix for client parsing)
|
|
611
|
+
yield format_sse_event(ErrorEvent(
|
|
612
|
+
code=error_code,
|
|
613
|
+
message=error_msg,
|
|
614
|
+
details=error_details if error_details else None,
|
|
615
|
+
recoverable=recoverable,
|
|
616
|
+
))
|
|
604
617
|
|
|
605
618
|
# Emit done event with error reason
|
|
606
619
|
yield format_sse_event(DoneEvent(reason="error"))
|
|
607
620
|
yield "data: [DONE]\n\n"
|
|
608
621
|
|
|
622
|
+
finally:
|
|
623
|
+
# Clean up event sink for multi-agent streaming
|
|
624
|
+
set_event_sink(None)
|
|
625
|
+
# Restore previous context for multi-agent support
|
|
626
|
+
# This ensures nested agent calls don't pollute the parent's context
|
|
627
|
+
if agent_context is not None:
|
|
628
|
+
set_current_context(previous_context)
|
|
629
|
+
|
|
609
630
|
|
|
610
631
|
async def stream_simulator_response(
|
|
611
632
|
prompt: str,
|
|
@@ -708,6 +729,37 @@ async def stream_minimal_simulator(
|
|
|
708
729
|
yield sse_string
|
|
709
730
|
|
|
710
731
|
|
|
732
|
+
async def save_user_message(
|
|
733
|
+
session_id: str,
|
|
734
|
+
user_id: str | None,
|
|
735
|
+
content: str,
|
|
736
|
+
) -> None:
|
|
737
|
+
"""
|
|
738
|
+
Save user message to database before streaming.
|
|
739
|
+
|
|
740
|
+
Shared utility used by both API and CLI for consistent user message storage.
|
|
741
|
+
"""
|
|
742
|
+
if not settings.postgres.enabled or not session_id:
|
|
743
|
+
return
|
|
744
|
+
|
|
745
|
+
user_msg = {
|
|
746
|
+
"role": "user",
|
|
747
|
+
"content": content,
|
|
748
|
+
"timestamp": to_iso(utc_now()),
|
|
749
|
+
}
|
|
750
|
+
try:
|
|
751
|
+
store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
|
|
752
|
+
await store.store_session_messages(
|
|
753
|
+
session_id=session_id,
|
|
754
|
+
messages=[user_msg],
|
|
755
|
+
user_id=user_id,
|
|
756
|
+
compress=False,
|
|
757
|
+
)
|
|
758
|
+
logger.debug(f"Saved user message to session {session_id}")
|
|
759
|
+
except Exception as e:
|
|
760
|
+
logger.error(f"Failed to save user message: {e}", exc_info=True)
|
|
761
|
+
|
|
762
|
+
|
|
711
763
|
async def stream_openai_response_with_save(
|
|
712
764
|
agent: Agent,
|
|
713
765
|
prompt: str,
|
|
@@ -716,6 +768,10 @@ async def stream_openai_response_with_save(
|
|
|
716
768
|
agent_schema: str | None = None,
|
|
717
769
|
session_id: str | None = None,
|
|
718
770
|
user_id: str | None = None,
|
|
771
|
+
# Agent context for multi-agent propagation
|
|
772
|
+
agent_context: "AgentContext | None" = None,
|
|
773
|
+
# Pydantic-ai native message history for proper tool call/return pairing
|
|
774
|
+
message_history: list | None = None,
|
|
719
775
|
) -> AsyncGenerator[str, None]:
|
|
720
776
|
"""
|
|
721
777
|
Wrapper around stream_openai_response that saves the assistant response after streaming.
|
|
@@ -723,6 +779,9 @@ async def stream_openai_response_with_save(
|
|
|
723
779
|
This accumulates all text content during streaming and saves it to the database
|
|
724
780
|
after the stream completes.
|
|
725
781
|
|
|
782
|
+
NOTE: Call save_user_message() BEFORE this function to save the user's message.
|
|
783
|
+
This function only saves tool calls and assistant responses.
|
|
784
|
+
|
|
726
785
|
Args:
|
|
727
786
|
agent: Pydantic AI agent instance
|
|
728
787
|
prompt: User prompt
|
|
@@ -731,14 +790,11 @@ async def stream_openai_response_with_save(
|
|
|
731
790
|
agent_schema: Agent schema name
|
|
732
791
|
session_id: Session ID for message storage
|
|
733
792
|
user_id: User ID for message storage
|
|
793
|
+
agent_context: Agent context for multi-agent propagation (enables child agents)
|
|
734
794
|
|
|
735
795
|
Yields:
|
|
736
796
|
SSE-formatted strings
|
|
737
797
|
"""
|
|
738
|
-
from ....utils.date_utils import utc_now, to_iso
|
|
739
|
-
from ....services.session import SessionMessageStore
|
|
740
|
-
from ....settings import settings
|
|
741
|
-
|
|
742
798
|
# Pre-generate message_id so it can be sent in metadata event
|
|
743
799
|
# This allows frontend to use it for feedback before DB persistence
|
|
744
800
|
message_id = str(uuid.uuid4())
|
|
@@ -763,6 +819,8 @@ async def stream_openai_response_with_save(
|
|
|
763
819
|
message_id=message_id,
|
|
764
820
|
trace_context_out=trace_context, # Pass container to capture trace IDs
|
|
765
821
|
tool_calls_out=tool_calls, # Capture tool calls for persistence
|
|
822
|
+
agent_context=agent_context, # Pass context for multi-agent support
|
|
823
|
+
message_history=message_history, # Native pydantic-ai message history
|
|
766
824
|
):
|
|
767
825
|
yield chunk
|
|
768
826
|
|
|
@@ -777,6 +835,9 @@ async def stream_openai_response_with_save(
|
|
|
777
835
|
delta = data["choices"][0].get("delta", {})
|
|
778
836
|
content = delta.get("content")
|
|
779
837
|
if content:
|
|
838
|
+
# DEBUG: Check for [Calling markers in content
|
|
839
|
+
if "[Calling" in content:
|
|
840
|
+
logger.warning(f"DEBUG: Found [Calling in content chunk: {repr(content[:100])}")
|
|
780
841
|
accumulated_content.append(content)
|
|
781
842
|
except (json.JSONDecodeError, KeyError, IndexError):
|
|
782
843
|
pass # Skip non-JSON or malformed chunks
|
|
@@ -793,6 +854,8 @@ async def stream_openai_response_with_save(
|
|
|
793
854
|
|
|
794
855
|
# First, store tool call messages (message_type: "tool")
|
|
795
856
|
for tool_call in tool_calls:
|
|
857
|
+
if not tool_call:
|
|
858
|
+
continue
|
|
796
859
|
tool_message = {
|
|
797
860
|
"role": "tool",
|
|
798
861
|
"content": json.dumps(tool_call.get("result", {}), default=str),
|
|
@@ -807,8 +870,34 @@ async def stream_openai_response_with_save(
|
|
|
807
870
|
messages_to_store.append(tool_message)
|
|
808
871
|
|
|
809
872
|
# Then store assistant text response (if any)
|
|
873
|
+
# Priority: direct TextPartDelta content > tool call text_response
|
|
874
|
+
# When an agent delegates via ask_agent, the child's text_response becomes
|
|
875
|
+
# the parent's assistant response (the parent is just orchestrating)
|
|
876
|
+
full_content = None
|
|
877
|
+
|
|
810
878
|
if accumulated_content:
|
|
811
879
|
full_content = "".join(accumulated_content)
|
|
880
|
+
logger.warning(f"DEBUG: Using accumulated_content ({len(accumulated_content)} chunks, {len(full_content)} chars)")
|
|
881
|
+
logger.warning(f"DEBUG: First 200 chars: {repr(full_content[:200])}")
|
|
882
|
+
else:
|
|
883
|
+
logger.warning("DEBUG: accumulated_content is empty, checking text_response fallback")
|
|
884
|
+
# No direct text from TextPartDelta - check tool results for text_response
|
|
885
|
+
# This handles multi-agent delegation where child agent output is the response
|
|
886
|
+
for tool_call in tool_calls:
|
|
887
|
+
if not tool_call:
|
|
888
|
+
continue
|
|
889
|
+
result = tool_call.get("result")
|
|
890
|
+
if isinstance(result, dict) and result.get("text_response"):
|
|
891
|
+
text_response = result["text_response"]
|
|
892
|
+
if text_response and str(text_response).strip():
|
|
893
|
+
full_content = str(text_response)
|
|
894
|
+
logger.debug(
|
|
895
|
+
f"Using text_response from {tool_call.get('tool_name', 'tool')} "
|
|
896
|
+
f"({len(full_content)} chars) as assistant message"
|
|
897
|
+
)
|
|
898
|
+
break
|
|
899
|
+
|
|
900
|
+
if full_content:
|
|
812
901
|
assistant_message = {
|
|
813
902
|
"id": message_id, # Use pre-generated ID for consistency with metadata event
|
|
814
903
|
"role": "assistant",
|
|
@@ -830,7 +919,7 @@ async def stream_openai_response_with_save(
|
|
|
830
919
|
)
|
|
831
920
|
logger.debug(
|
|
832
921
|
f"Saved {len(tool_calls)} tool calls and "
|
|
833
|
-
f"{'assistant response' if
|
|
922
|
+
f"{'assistant response' if full_content else 'no text'} "
|
|
834
923
|
f"to session {session_id}"
|
|
835
924
|
)
|
|
836
925
|
except Exception as e:
|
|
@@ -838,8 +927,9 @@ async def stream_openai_response_with_save(
|
|
|
838
927
|
|
|
839
928
|
# Update session description with session_name (non-blocking, after all yields)
|
|
840
929
|
for tool_call in tool_calls:
|
|
841
|
-
if tool_call.get("tool_name") == "register_metadata" and tool_call.get("is_metadata"):
|
|
842
|
-
|
|
930
|
+
if tool_call and tool_call.get("tool_name") == "register_metadata" and tool_call.get("is_metadata"):
|
|
931
|
+
arguments = tool_call.get("arguments") or {}
|
|
932
|
+
session_name = arguments.get("session_name")
|
|
843
933
|
if session_name:
|
|
844
934
|
try:
|
|
845
935
|
from ....models.entities import Session
|