remdb 0.3.180__py3-none-any.whl → 0.3.258__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. rem/agentic/README.md +36 -2
  2. rem/agentic/__init__.py +10 -1
  3. rem/agentic/context.py +185 -1
  4. rem/agentic/context_builder.py +56 -35
  5. rem/agentic/mcp/tool_wrapper.py +2 -2
  6. rem/agentic/providers/pydantic_ai.py +303 -111
  7. rem/agentic/schema.py +2 -2
  8. rem/api/main.py +1 -1
  9. rem/api/mcp_router/resources.py +223 -0
  10. rem/api/mcp_router/server.py +4 -0
  11. rem/api/mcp_router/tools.py +608 -166
  12. rem/api/routers/admin.py +30 -4
  13. rem/api/routers/auth.py +219 -20
  14. rem/api/routers/chat/child_streaming.py +393 -0
  15. rem/api/routers/chat/completions.py +77 -40
  16. rem/api/routers/chat/sse_events.py +7 -3
  17. rem/api/routers/chat/streaming.py +381 -291
  18. rem/api/routers/chat/streaming_utils.py +325 -0
  19. rem/api/routers/common.py +18 -0
  20. rem/api/routers/dev.py +7 -1
  21. rem/api/routers/feedback.py +11 -3
  22. rem/api/routers/messages.py +176 -38
  23. rem/api/routers/models.py +9 -1
  24. rem/api/routers/query.py +17 -15
  25. rem/api/routers/shared_sessions.py +16 -0
  26. rem/auth/jwt.py +19 -4
  27. rem/auth/middleware.py +42 -28
  28. rem/cli/README.md +62 -0
  29. rem/cli/commands/ask.py +205 -114
  30. rem/cli/commands/db.py +55 -31
  31. rem/cli/commands/experiments.py +1 -1
  32. rem/cli/commands/process.py +179 -43
  33. rem/cli/commands/query.py +109 -0
  34. rem/cli/commands/session.py +117 -0
  35. rem/cli/main.py +2 -0
  36. rem/models/core/experiment.py +1 -1
  37. rem/models/entities/ontology.py +18 -20
  38. rem/models/entities/session.py +1 -0
  39. rem/schemas/agents/core/agent-builder.yaml +1 -1
  40. rem/schemas/agents/rem.yaml +1 -1
  41. rem/schemas/agents/test_orchestrator.yaml +42 -0
  42. rem/schemas/agents/test_structured_output.yaml +52 -0
  43. rem/services/content/providers.py +151 -49
  44. rem/services/content/service.py +18 -5
  45. rem/services/embeddings/worker.py +26 -12
  46. rem/services/postgres/__init__.py +28 -3
  47. rem/services/postgres/diff_service.py +57 -5
  48. rem/services/postgres/programmable_diff_service.py +635 -0
  49. rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
  50. rem/services/postgres/register_type.py +11 -10
  51. rem/services/postgres/repository.py +39 -28
  52. rem/services/postgres/schema_generator.py +5 -5
  53. rem/services/postgres/sql_builder.py +6 -5
  54. rem/services/rem/README.md +4 -3
  55. rem/services/rem/parser.py +7 -10
  56. rem/services/rem/service.py +47 -0
  57. rem/services/session/__init__.py +8 -1
  58. rem/services/session/compression.py +47 -5
  59. rem/services/session/pydantic_messages.py +310 -0
  60. rem/services/session/reload.py +2 -1
  61. rem/settings.py +92 -7
  62. rem/sql/migrations/001_install.sql +125 -7
  63. rem/sql/migrations/002_install_models.sql +159 -149
  64. rem/sql/migrations/004_cache_system.sql +10 -276
  65. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  66. rem/utils/schema_loader.py +180 -120
  67. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/METADATA +7 -6
  68. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/RECORD +70 -61
  69. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
  70. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0
@@ -1,37 +1,38 @@
1
1
  """
2
2
  OpenAI-compatible streaming relay for Pydantic AI agents.
3
3
 
4
- Design Pattern:
5
- - Uses Pydantic AI's agent.iter() to capture full execution including tool calls
6
- - Emits rich SSE events: reasoning, tool_call, progress, metadata, text_delta
7
- - Proper OpenAI SSE format with data: prefix and [DONE] terminator
8
- - Error handling with graceful degradation
9
-
10
- Key Insight
11
- - agent.run_stream() stops after first output, missing tool calls
12
- - agent.iter() provides complete execution with tool call visibility
13
- - Use PartStartEvent to detect tool calls and thinking parts
14
- - Use PartDeltaEvent with TextPartDelta/ThinkingPartDelta for streaming
15
- - Use PartEndEvent to detect tool completion
16
- - Use FunctionToolResultEvent to get tool results
17
-
18
- SSE Format (OpenAI-compatible):
19
- data: {"id": "chatcmpl-...", "choices": [{"delta": {"content": "..."}}]}\\n\\n
20
- data: [DONE]\\n\\n
21
-
22
- Extended SSE Format (Custom Events):
23
- event: reasoning\\ndata: {"type": "reasoning", "content": "..."}\\n\\n
24
- event: tool_call\\ndata: {"type": "tool_call", "tool_name": "...", "status": "started"}\\n\\n
25
- event: progress\\ndata: {"type": "progress", "step": 1, "total_steps": 3}\\n\\n
26
- event: metadata\\ndata: {"type": "metadata", "confidence": 0.95}\\n\\n
27
-
28
- See sse_events.py for the full event type definitions.
4
+ Architecture:
5
+ ```
6
+ User Request stream_openai_response agent.iter() SSE Events → Client
7
+
8
+ ├── Parent agent events (text, tool calls)
9
+
10
+ └── Child agent events (via ask_agent tool)
11
+
12
+
13
+ Event Sink (asyncio.Queue)
14
+
15
+
16
+ drain_child_events() SSE + DB
17
+ ```
18
+
19
+ Modules:
20
+ - streaming.py: Main workflow orchestrator (this file)
21
+ - streaming_utils.py: Pure utility functions, StreamingState dataclass
22
+ - child_streaming.py: Child agent event handling
23
+
24
+ Key Design Decision (DUPLICATION FIX):
25
+ When child_content is streamed, state.child_content_streamed is set True.
26
+ Parent TextPartDelta events are SKIPPED when this flag is True,
27
+ preventing content from being emitted twice.
29
28
  """
30
29
 
30
+ from __future__ import annotations
31
+
32
+ import asyncio
31
33
  import json
32
- import time
33
34
  import uuid
34
- from typing import AsyncGenerator
35
+ from typing import TYPE_CHECKING, AsyncGenerator
35
36
 
36
37
  from loguru import logger
37
38
  from pydantic_ai.agent import Agent
@@ -47,7 +48,17 @@ from pydantic_ai.messages import (
47
48
  ToolCallPart,
48
49
  )
49
50
 
50
- from .otel_utils import get_current_trace_context, get_tracer
51
+ from .child_streaming import drain_child_events, stream_with_child_events, process_child_event
52
+ from .streaming_utils import (
53
+ StreamingState,
54
+ build_content_chunk,
55
+ build_progress_event,
56
+ build_tool_start_event,
57
+ extract_metadata_from_result,
58
+ extract_tool_args,
59
+ log_tool_call,
60
+ )
61
+ from .otel_utils import get_current_trace_context
51
62
  from .models import (
52
63
  ChatCompletionMessageDelta,
53
64
  ChatCompletionStreamChoice,
@@ -55,12 +66,19 @@ from .models import (
55
66
  )
56
67
  from .sse_events import (
57
68
  DoneEvent,
69
+ ErrorEvent,
58
70
  MetadataEvent,
59
71
  ProgressEvent,
60
72
  ReasoningEvent,
61
73
  ToolCallEvent,
62
74
  format_sse_event,
63
75
  )
76
+ from ....services.session import SessionMessageStore
77
+ from ....settings import settings
78
+ from ....utils.date_utils import to_iso, utc_now
79
+
80
+ if TYPE_CHECKING:
81
+ from ....agentic.context import AgentContext
64
82
 
65
83
 
66
84
  async def stream_openai_response(
@@ -79,6 +97,11 @@ async def stream_openai_response(
79
97
  # Mutable container to capture tool calls for persistence
80
98
  # Format: list of {"tool_name": str, "tool_id": str, "arguments": dict, "result": any}
81
99
  tool_calls_out: list | None = None,
100
+ # Agent context for multi-agent propagation
101
+ # When set, enables child agents to access parent context via get_current_context()
102
+ agent_context: "AgentContext | None" = None,
103
+ # Pydantic-ai native message history for proper tool call/return pairing
104
+ message_history: list | None = None,
82
105
  ) -> AsyncGenerator[str, None]:
83
106
  """
84
107
  Stream Pydantic AI agent responses with rich SSE events.
@@ -131,40 +154,39 @@ async def stream_openai_response(
131
154
  event: done
132
155
  data: {"type": "done", "reason": "stop"}
133
156
  """
134
- if request_id is None:
135
- request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
136
-
137
- created_at = int(time.time())
138
- start_time = time.time()
139
- is_first_chunk = True
140
- reasoning_step = 0
141
- current_step = 0
142
- total_steps = 3 # Model request, tool execution (optional), final response
143
- token_count = 0
144
-
145
- # Track active tool calls for completion events
146
- # Maps index -> (tool_name, tool_id) for correlating start/end events
147
- active_tool_calls: dict[int, tuple[str, str]] = {}
148
- # Queue of tool calls awaiting completion (FIFO for matching)
149
- pending_tool_completions: list[tuple[str, str]] = []
150
- # Track if metadata was registered via register_metadata tool
151
- metadata_registered = False
152
- # Track pending tool calls with full data for persistence
153
- # Maps tool_id -> {"tool_name": str, "tool_id": str, "arguments": dict}
154
- pending_tool_data: dict[str, dict] = {}
157
+ # Initialize streaming state
158
+ state = StreamingState.create(model=model, request_id=request_id)
159
+
160
+ # Get effective user_id for database operations
161
+ effective_user_id = agent_context.user_id if agent_context else None
162
+
163
+ # Import context functions for multi-agent support
164
+ from ....agentic.context import set_current_context, set_event_sink
165
+
166
+ # Set up context for multi-agent propagation
167
+ previous_context = None
168
+ if agent_context is not None:
169
+ from ....agentic.context import get_current_context
170
+ previous_context = get_current_context()
171
+ set_current_context(agent_context)
172
+
173
+ # Set up event sink for child agent event proxying
174
+ child_event_sink: asyncio.Queue = asyncio.Queue()
175
+ set_event_sink(child_event_sink)
155
176
 
156
177
  try:
157
178
  # Emit initial progress event
158
- current_step = 1
159
- yield format_sse_event(ProgressEvent(
160
- step=current_step,
161
- total_steps=total_steps,
179
+ state.current_step = 1
180
+ yield build_progress_event(
181
+ step=state.current_step,
182
+ total_steps=state.total_steps,
162
183
  label="Processing request",
163
- status="in_progress"
164
- ))
184
+ )
165
185
 
166
186
  # Use agent.iter() to get complete execution with tool calls
167
- async with agent.iter(prompt) as agent_run:
187
+ # Pass message_history if available for proper tool call/return pairing
188
+ iter_kwargs = {"message_history": message_history} if message_history else {}
189
+ async with agent.iter(prompt, **iter_kwargs) as agent_run:
168
190
  # Capture trace context IMMEDIATELY inside agent execution
169
191
  # This is deterministic - it's the OTEL context from Pydantic AI instrumentation
170
192
  # NOT dependent on any AI-generated content
@@ -185,11 +207,11 @@ async def stream_openai_response(
185
207
  if isinstance(event, PartStartEvent) and isinstance(
186
208
  event.part, ThinkingPart
187
209
  ):
188
- reasoning_step += 1
210
+ state.reasoning_step += 1
189
211
  if event.part.content:
190
212
  yield format_sse_event(ReasoningEvent(
191
213
  content=event.part.content,
192
- step=reasoning_step
214
+ step=state.reasoning_step
193
215
  ))
194
216
 
195
217
  # Reasoning delta (streaming thinking)
@@ -199,7 +221,7 @@ async def stream_openai_response(
199
221
  if event.delta.content_delta:
200
222
  yield format_sse_event(ReasoningEvent(
201
223
  content=event.delta.content_delta,
202
- step=reasoning_step
224
+ step=state.reasoning_step
203
225
  ))
204
226
 
205
227
  # ============================================
@@ -208,28 +230,11 @@ async def stream_openai_response(
208
230
  elif isinstance(event, PartStartEvent) and isinstance(
209
231
  event.part, TextPart
210
232
  ):
211
- # TextPart may contain initial content that needs to be emitted
233
+ # Skip if child already streamed content
234
+ if state.child_content_streamed:
235
+ continue
212
236
  if event.part.content:
213
- content = event.part.content
214
- token_count += len(content.split())
215
-
216
- content_chunk = ChatCompletionStreamResponse(
217
- id=request_id,
218
- created=created_at,
219
- model=model,
220
- choices=[
221
- ChatCompletionStreamChoice(
222
- index=0,
223
- delta=ChatCompletionMessageDelta(
224
- role="assistant" if is_first_chunk else None,
225
- content=content,
226
- ),
227
- finish_reason=None,
228
- )
229
- ],
230
- )
231
- is_first_chunk = False
232
- yield f"data: {content_chunk.model_dump_json()}\n\n"
237
+ yield build_content_chunk(state, event.part.content)
233
238
 
234
239
  # ============================================
235
240
  # TOOL CALL START EVENTS
@@ -239,88 +244,39 @@ async def stream_openai_response(
239
244
  ):
240
245
  tool_name = event.part.tool_name
241
246
 
242
- # Handle final_result specially - it's Pydantic AI's
243
- # internal tool for structured output
247
+ # Handle final_result (Pydantic AI's internal tool)
244
248
  if tool_name == "final_result":
245
- # Extract the structured result and emit as content
246
- args_dict = None
247
- if event.part.args is not None:
248
- if hasattr(event.part.args, 'args_dict'):
249
- args_dict = event.part.args.args_dict
250
- elif isinstance(event.part.args, dict):
251
- args_dict = event.part.args
252
-
249
+ args_dict = extract_tool_args(event.part)
253
250
  if args_dict:
254
- # Emit the structured result as JSON content
255
251
  result_json = json.dumps(args_dict, indent=2)
256
- content_chunk = ChatCompletionStreamResponse(
257
- id=request_id,
258
- created=created_at,
259
- model=model,
260
- choices=[
261
- ChatCompletionStreamChoice(
262
- index=0,
263
- delta=ChatCompletionMessageDelta(
264
- role="assistant" if is_first_chunk else None,
265
- content=result_json,
266
- ),
267
- finish_reason=None,
268
- )
269
- ],
270
- )
271
- is_first_chunk = False
272
- yield f"data: {content_chunk.model_dump_json()}\n\n"
273
- continue # Skip regular tool call handling
252
+ yield build_content_chunk(state, result_json)
253
+ continue
274
254
 
275
255
  tool_id = f"call_{uuid.uuid4().hex[:8]}"
276
- active_tool_calls[event.index] = (tool_name, tool_id)
277
- # Queue for completion matching (FIFO)
278
- pending_tool_completions.append((tool_name, tool_id))
279
-
280
- # Emit tool_call SSE event (started)
281
- # Try to get arguments as dict
282
- args_dict = None
283
- if event.part.args is not None:
284
- if hasattr(event.part.args, 'args_dict'):
285
- args_dict = event.part.args.args_dict
286
- elif isinstance(event.part.args, dict):
287
- args_dict = event.part.args
288
-
289
- # Log tool call with key parameters
290
- if args_dict and tool_name == "search_rem":
291
- query_type = args_dict.get("query_type", "?")
292
- limit = args_dict.get("limit", 20)
293
- table = args_dict.get("table", "")
294
- query_text = args_dict.get("query_text", args_dict.get("entity_key", ""))
295
- if query_text and len(query_text) > 50:
296
- query_text = query_text[:50] + "..."
297
- logger.info(f"🔧 {tool_name} {query_type.upper()} '{query_text}' table={table} limit={limit}")
298
- else:
299
- logger.info(f"🔧 {tool_name}")
256
+ state.active_tool_calls[event.index] = (tool_name, tool_id)
257
+ state.pending_tool_completions.append((tool_name, tool_id))
300
258
 
301
- yield format_sse_event(ToolCallEvent(
302
- tool_name=tool_name,
303
- tool_id=tool_id,
304
- status="started",
305
- arguments=args_dict
306
- ))
259
+ # Extract and log arguments
260
+ args_dict = extract_tool_args(event.part)
261
+ log_tool_call(tool_name, args_dict)
307
262
 
308
- # Track tool call data for persistence (especially register_metadata)
309
- pending_tool_data[tool_id] = {
263
+ yield build_tool_start_event(tool_name, tool_id, args_dict)
264
+
265
+ # Track for persistence
266
+ state.pending_tool_data[tool_id] = {
310
267
  "tool_name": tool_name,
311
268
  "tool_id": tool_id,
312
269
  "arguments": args_dict,
313
270
  }
314
271
 
315
272
  # Update progress
316
- current_step = 2
317
- total_steps = 4 # Added tool execution step
318
- yield format_sse_event(ProgressEvent(
319
- step=current_step,
320
- total_steps=total_steps,
273
+ state.current_step = 2
274
+ state.total_steps = 4
275
+ yield build_progress_event(
276
+ step=state.current_step,
277
+ total_steps=state.total_steps,
321
278
  label=f"Calling {tool_name}",
322
- status="in_progress"
323
- ))
279
+ )
324
280
 
325
281
  # ============================================
326
282
  # TOOL CALL COMPLETION (PartEndEvent)
@@ -328,11 +284,14 @@ async def stream_openai_response(
328
284
  elif isinstance(event, PartEndEvent) and isinstance(
329
285
  event.part, ToolCallPart
330
286
  ):
331
- if event.index in active_tool_calls:
332
- tool_name, tool_id = active_tool_calls[event.index]
333
- # Note: result comes from FunctionToolResultEvent below
334
- # For now, mark as completed without result
335
- del active_tool_calls[event.index]
287
+ if event.index in state.active_tool_calls:
288
+ tool_name, tool_id = state.active_tool_calls[event.index]
289
+ args_dict = extract_tool_args(event.part)
290
+
291
+ if tool_id in state.pending_tool_data:
292
+ state.pending_tool_data[tool_id]["arguments"] = args_dict
293
+
294
+ del state.active_tool_calls[event.index]
336
295
 
337
296
  # ============================================
338
297
  # TEXT CONTENT DELTA
@@ -340,151 +299,158 @@ async def stream_openai_response(
340
299
  elif isinstance(event, PartDeltaEvent) and isinstance(
341
300
  event.delta, TextPartDelta
342
301
  ):
302
+ # DUPLICATION FIX: Skip parent text if child already streamed content
303
+ # Child agents stream via child_content events in ask_agent tool.
304
+ # If parent tries to echo that content, skip it.
305
+ if state.child_content_streamed:
306
+ logger.debug("Skipping parent TextPartDelta - child content already streamed")
307
+ continue
308
+
343
309
  content = event.delta.content_delta
344
- token_count += len(content.split()) # Rough token estimate
345
-
346
- content_chunk = ChatCompletionStreamResponse(
347
- id=request_id,
348
- created=created_at,
349
- model=model,
350
- choices=[
351
- ChatCompletionStreamChoice(
352
- index=0,
353
- delta=ChatCompletionMessageDelta(
354
- role="assistant" if is_first_chunk else None,
355
- content=content,
356
- ),
357
- finish_reason=None,
358
- )
359
- ],
360
- )
361
- is_first_chunk = False
362
- yield f"data: {content_chunk.model_dump_json()}\n\n"
310
+ yield build_content_chunk(state, content)
363
311
 
364
312
  # ============================================
365
313
  # TOOL EXECUTION NODE
366
314
  # ============================================
367
315
  elif Agent.is_call_tools_node(node):
368
316
  async with node.stream(agent_run.ctx) as tools_stream:
369
- async for tool_event in tools_stream:
317
+ # Use concurrent multiplexer to handle both tool events
318
+ # and child agent events as they arrive (fixes streaming lag)
319
+ async for event_type, event_data in stream_with_child_events(
320
+ tools_stream=tools_stream,
321
+ child_event_sink=child_event_sink,
322
+ state=state,
323
+ session_id=session_id,
324
+ user_id=effective_user_id,
325
+ message_id=message_id,
326
+ agent_schema=agent_schema,
327
+ ):
328
+ # Handle child events (streamed from ask_agent)
329
+ if event_type == "child":
330
+ async for chunk in process_child_event(
331
+ child_event=event_data,
332
+ state=state,
333
+ session_id=session_id,
334
+ user_id=effective_user_id,
335
+ message_id=message_id,
336
+ agent_schema=agent_schema,
337
+ ):
338
+ yield chunk
339
+ continue
340
+
341
+ # Handle tool events
342
+ tool_event = event_data
343
+
370
344
  # Tool result event - emit completion
371
345
  if isinstance(tool_event, FunctionToolResultEvent):
372
346
  # Get the tool name/id from the pending queue (FIFO)
373
- if pending_tool_completions:
374
- tool_name, tool_id = pending_tool_completions.pop(0)
347
+ if state.pending_tool_completions:
348
+ tool_name, tool_id = state.pending_tool_completions.pop(0)
375
349
  else:
376
- # Fallback if queue is empty (shouldn't happen)
377
350
  tool_name = "tool"
378
351
  tool_id = f"call_{uuid.uuid4().hex[:8]}"
379
352
 
380
- # Check if this is a register_metadata tool result
381
- # It returns a dict with _metadata_event: True marker
382
353
  result_content = tool_event.result.content if hasattr(tool_event.result, 'content') else tool_event.result
383
354
  is_metadata_event = False
384
355
 
385
- if isinstance(result_content, dict) and result_content.get("_metadata_event"):
356
+ # Handle register_metadata tool results
357
+ metadata = extract_metadata_from_result(result_content)
358
+ if metadata:
386
359
  is_metadata_event = True
387
- metadata_registered = True # Skip default metadata at end
388
- # Emit MetadataEvent with registered values
389
- registered_confidence = result_content.get("confidence")
390
- registered_sources = result_content.get("sources")
391
- registered_references = result_content.get("references")
392
- registered_flags = result_content.get("flags")
393
- # Session naming
394
- registered_session_name = result_content.get("session_name")
395
- # Risk assessment fields
396
- registered_risk_level = result_content.get("risk_level")
397
- registered_risk_score = result_content.get("risk_score")
398
- registered_risk_reasoning = result_content.get("risk_reasoning")
399
- registered_recommended_action = result_content.get("recommended_action")
400
- # Extra fields
401
- registered_extra = result_content.get("extra")
360
+ state.metadata_registered = True
361
+
362
+ # Only set responding_agent if not already set by child
363
+ if not state.responding_agent and metadata.get("agent_schema"):
364
+ state.responding_agent = metadata["agent_schema"]
402
365
 
403
366
  logger.info(
404
- f"📊 Metadata registered: confidence={registered_confidence}, "
405
- f"session_name={registered_session_name}, "
406
- f"risk_level={registered_risk_level}, sources={registered_sources}"
367
+ f"📊 Metadata: confidence={metadata.get('confidence')}, "
368
+ f"risk_level={metadata.get('risk_level')}"
407
369
  )
408
370
 
409
- # Build extra dict with risk fields and any custom extras
371
+ # Build extra dict with risk fields
410
372
  extra_data = {}
411
- if registered_risk_level is not None:
412
- extra_data["risk_level"] = registered_risk_level
413
- if registered_risk_score is not None:
414
- extra_data["risk_score"] = registered_risk_score
415
- if registered_risk_reasoning is not None:
416
- extra_data["risk_reasoning"] = registered_risk_reasoning
417
- if registered_recommended_action is not None:
418
- extra_data["recommended_action"] = registered_recommended_action
419
- if registered_extra:
420
- extra_data.update(registered_extra)
421
-
422
- # Emit metadata event immediately
373
+ for field in ["risk_level", "risk_score", "risk_reasoning", "recommended_action"]:
374
+ if metadata.get(field) is not None:
375
+ extra_data[field] = metadata[field]
376
+ if metadata.get("extra"):
377
+ extra_data.update(metadata["extra"])
378
+
423
379
  yield format_sse_event(MetadataEvent(
424
380
  message_id=message_id,
425
381
  in_reply_to=in_reply_to,
426
382
  session_id=session_id,
427
383
  agent_schema=agent_schema,
428
- session_name=registered_session_name,
429
- confidence=registered_confidence,
430
- sources=registered_sources,
384
+ responding_agent=state.responding_agent,
385
+ session_name=metadata.get("session_name"),
386
+ confidence=metadata.get("confidence"),
387
+ sources=metadata.get("sources"),
431
388
  model_version=model,
432
- flags=registered_flags,
389
+ flags=metadata.get("flags"),
433
390
  extra=extra_data if extra_data else None,
434
391
  hidden=False,
435
392
  ))
436
393
 
437
- # Capture tool call with result for persistence
438
- # Special handling for register_metadata - always capture full data
439
- if tool_calls_out is not None and tool_id in pending_tool_data:
440
- tool_data = pending_tool_data[tool_id]
394
+ # Get complete args from pending_tool_data
395
+ completed_args = None
396
+ if tool_id in state.pending_tool_data:
397
+ completed_args = state.pending_tool_data[tool_id].get("arguments")
398
+
399
+ # Capture tool call for persistence
400
+ if tool_calls_out is not None and tool_id in state.pending_tool_data:
401
+ tool_data = state.pending_tool_data[tool_id]
441
402
  tool_data["result"] = result_content
442
403
  tool_data["is_metadata"] = is_metadata_event
443
404
  tool_calls_out.append(tool_data)
444
- del pending_tool_data[tool_id]
445
-
446
- if not is_metadata_event:
447
- # Normal tool completion - emit ToolCallEvent
448
- result_str = str(result_content)
449
- result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
450
-
451
- # Log result count for search_rem
452
- if tool_name == "search_rem" and isinstance(result_content, dict):
453
- results = result_content.get("results", {})
454
- # Handle nested result structure: results may be a dict with 'results' list and 'count'
455
- if isinstance(results, dict):
456
- count = results.get("count", len(results.get("results", [])))
457
- query_type = results.get("query_type", "?")
458
- query_text = results.get("query_text", results.get("key", ""))
459
- table = results.get("table_name", "")
460
- elif isinstance(results, list):
461
- count = len(results)
462
- query_type = "?"
463
- query_text = ""
464
- table = ""
465
- else:
466
- count = "?"
467
- query_type = "?"
468
- query_text = ""
469
- table = ""
470
- status = result_content.get("status", "unknown")
471
- # Truncate query text for logging
472
- if query_text and len(str(query_text)) > 40:
473
- query_text = str(query_text)[:40] + "..."
474
- logger.info(f" ↳ {tool_name} {query_type} '{query_text}' table={table} → {count} results")
475
-
476
- yield format_sse_event(ToolCallEvent(
477
- tool_name=tool_name,
478
- tool_id=tool_id,
479
- status="completed",
480
- result=result_summary
481
- ))
405
+ del state.pending_tool_data[tool_id]
406
+
407
+ # Always emit ToolCallEvent completion for frontend tracking
408
+ # Send full result for dict/list types, stringify others
409
+ if isinstance(result_content, (dict, list)):
410
+ result_for_sse = result_content
411
+ else:
412
+ result_for_sse = str(result_content)
413
+
414
+ # Log result count for search_rem
415
+ if tool_name == "search_rem" and isinstance(result_content, dict):
416
+ results = result_content.get("results", {})
417
+ # Handle nested result structure: results may be a dict with 'results' list and 'count'
418
+ if isinstance(results, dict):
419
+ count = results.get("count", len(results.get("results", [])))
420
+ query_type = results.get("query_type", "?")
421
+ query_text = results.get("query_text", results.get("key", ""))
422
+ table = results.get("table_name", "")
423
+ elif isinstance(results, list):
424
+ count = len(results)
425
+ query_type = "?"
426
+ query_text = ""
427
+ table = ""
428
+ else:
429
+ count = "?"
430
+ query_type = "?"
431
+ query_text = ""
432
+ table = ""
433
+ status = result_content.get("status", "unknown")
434
+ # Truncate query text for logging
435
+ if query_text and len(str(query_text)) > 40:
436
+ query_text = str(query_text)[:40] + "..."
437
+ logger.info(f" ↳ {tool_name} {query_type} '{query_text}' table={table} → {count} results")
438
+
439
+ # Always emit ToolCallEvent completion for frontend tracking
440
+ # This includes register_metadata calls so they turn green in the UI
441
+ yield format_sse_event(ToolCallEvent(
442
+ tool_name=tool_name,
443
+ tool_id=tool_id,
444
+ status="completed",
445
+ arguments=completed_args,
446
+ result=result_for_sse
447
+ ))
482
448
 
483
449
  # Update progress after tool completion
484
- current_step = 3
450
+ state.current_step = 3
485
451
  yield format_sse_event(ProgressEvent(
486
- step=current_step,
487
- total_steps=total_steps,
452
+ step=state.current_step,
453
+ total_steps=state.total_steps,
488
454
  label="Generating response",
489
455
  status="in_progress"
490
456
  ))
@@ -513,36 +479,36 @@ async def stream_openai_response(
513
479
  result_dict = {"result": str(output)}
514
480
 
515
481
  result_json = json.dumps(result_dict, indent=2, default=str)
516
- token_count += len(result_json.split())
482
+ state.token_count += len(result_json.split())
517
483
 
518
484
  # Emit structured result as content
519
485
  result_chunk = ChatCompletionStreamResponse(
520
- id=request_id,
521
- created=created_at,
486
+ id=state.request_id,
487
+ created=state.created_at,
522
488
  model=model,
523
489
  choices=[
524
490
  ChatCompletionStreamChoice(
525
491
  index=0,
526
492
  delta=ChatCompletionMessageDelta(
527
- role="assistant" if is_first_chunk else None,
493
+ role="assistant" if state.is_first_chunk else None,
528
494
  content=result_json,
529
495
  ),
530
496
  finish_reason=None,
531
497
  )
532
498
  ],
533
499
  )
534
- is_first_chunk = False
500
+ state.is_first_chunk = False
535
501
  yield f"data: {result_chunk.model_dump_json()}\n\n"
536
502
  except Exception as e:
537
503
  logger.debug(f"No structured result available: {e}")
538
504
 
539
505
  # Calculate latency
540
- latency_ms = int((time.time() - start_time) * 1000)
506
+ latency_ms = state.latency_ms()
541
507
 
542
508
  # Final OpenAI chunk with finish_reason
543
509
  final_chunk = ChatCompletionStreamResponse(
544
- id=request_id,
545
- created=created_at,
510
+ id=state.request_id,
511
+ created=state.created_at,
546
512
  model=model,
547
513
  choices=[
548
514
  ChatCompletionStreamChoice(
@@ -555,27 +521,28 @@ async def stream_openai_response(
555
521
  yield f"data: {final_chunk.model_dump_json()}\n\n"
556
522
 
557
523
  # Emit metadata event only if not already registered via register_metadata tool
558
- if not metadata_registered:
524
+ if not state.metadata_registered:
559
525
  yield format_sse_event(MetadataEvent(
560
526
  message_id=message_id,
561
527
  in_reply_to=in_reply_to,
562
528
  session_id=session_id,
563
529
  agent_schema=agent_schema,
530
+ responding_agent=state.responding_agent,
564
531
  confidence=1.0, # Default to 100% confidence
565
532
  model_version=model,
566
533
  latency_ms=latency_ms,
567
- token_count=token_count,
534
+ token_count=state.token_count,
568
535
  # Include deterministic trace context captured from OTEL
569
536
  trace_id=captured_trace_id,
570
537
  span_id=captured_span_id,
571
538
  ))
572
539
 
573
540
  # Mark all progress complete
574
- for step in range(1, total_steps + 1):
541
+ for step in range(1, state.total_steps + 1):
575
542
  yield format_sse_event(ProgressEvent(
576
543
  step=step,
577
- total_steps=total_steps,
578
- label="Complete" if step == total_steps else f"Step {step}",
544
+ total_steps=state.total_steps,
545
+ label="Complete" if step == state.total_steps else f"Step {step}",
579
546
  status="completed"
580
547
  ))
581
548
 
@@ -587,25 +554,79 @@ async def stream_openai_response(
587
554
 
588
555
  except Exception as e:
589
556
  import traceback
557
+ import re
590
558
 
591
559
  error_msg = str(e)
592
- logger.error(f"Streaming error: {error_msg}")
593
- logger.error(traceback.format_exc())
594
-
595
- # Send error as final chunk
596
- error_data = {
597
- "error": {
598
- "message": error_msg,
599
- "type": "internal_error",
600
- "code": "stream_error",
601
- }
602
- }
603
- yield f"data: {json.dumps(error_data)}\n\n"
560
+
561
+ # Parse error details for better client handling
562
+ error_code = "stream_error"
563
+ error_details: dict = {}
564
+ recoverable = True
565
+
566
+ # Check for rate limit errors (OpenAI 429)
567
+ if "429" in error_msg or "rate_limit" in error_msg.lower() or "RateLimitError" in type(e).__name__:
568
+ error_code = "rate_limit_exceeded"
569
+ recoverable = True
570
+
571
+ # Extract retry-after time from error message
572
+ # Pattern: "Please try again in X.XXs" or "Please try again in Xs"
573
+ retry_match = re.search(r"try again in (\d+(?:\.\d+)?)\s*s", error_msg)
574
+ if retry_match:
575
+ retry_seconds = float(retry_match.group(1))
576
+ error_details["retry_after_seconds"] = retry_seconds
577
+ error_details["retry_after_ms"] = int(retry_seconds * 1000)
578
+
579
+ # Extract token usage info if available
580
+ used_match = re.search(r"Used (\d+)", error_msg)
581
+ limit_match = re.search(r"Limit (\d+)", error_msg)
582
+ requested_match = re.search(r"Requested (\d+)", error_msg)
583
+ if used_match:
584
+ error_details["tokens_used"] = int(used_match.group(1))
585
+ if limit_match:
586
+ error_details["tokens_limit"] = int(limit_match.group(1))
587
+ if requested_match:
588
+ error_details["tokens_requested"] = int(requested_match.group(1))
589
+
590
+ logger.error(f"🔴 Streaming error: status_code: 429, model_name: {model}, body: {error_msg[:200]}")
591
+
592
+ # Check for authentication errors
593
+ elif "401" in error_msg or "AuthenticationError" in type(e).__name__:
594
+ error_code = "authentication_error"
595
+ recoverable = False
596
+ logger.error(f"🔴 Streaming error: Authentication failed")
597
+
598
+ # Check for model not found / invalid model
599
+ elif "404" in error_msg or "model" in error_msg.lower() and "not found" in error_msg.lower():
600
+ error_code = "model_not_found"
601
+ recoverable = False
602
+ logger.error(f"🔴 Streaming error: Model not found")
603
+
604
+ # Generic error
605
+ else:
606
+ logger.error(f"🔴 Streaming error: {error_msg}")
607
+
608
+ logger.error(f"🔴 {traceback.format_exc()}")
609
+
610
+ # Emit proper ErrorEvent via SSE (with event: prefix for client parsing)
611
+ yield format_sse_event(ErrorEvent(
612
+ code=error_code,
613
+ message=error_msg,
614
+ details=error_details if error_details else None,
615
+ recoverable=recoverable,
616
+ ))
604
617
 
605
618
  # Emit done event with error reason
606
619
  yield format_sse_event(DoneEvent(reason="error"))
607
620
  yield "data: [DONE]\n\n"
608
621
 
622
+ finally:
623
+ # Clean up event sink for multi-agent streaming
624
+ set_event_sink(None)
625
+ # Restore previous context for multi-agent support
626
+ # This ensures nested agent calls don't pollute the parent's context
627
+ if agent_context is not None:
628
+ set_current_context(previous_context)
629
+
609
630
 
610
631
  async def stream_simulator_response(
611
632
  prompt: str,
@@ -708,6 +729,37 @@ async def stream_minimal_simulator(
708
729
  yield sse_string
709
730
 
710
731
 
732
+ async def save_user_message(
733
+ session_id: str,
734
+ user_id: str | None,
735
+ content: str,
736
+ ) -> None:
737
+ """
738
+ Save user message to database before streaming.
739
+
740
+ Shared utility used by both API and CLI for consistent user message storage.
741
+ """
742
+ if not settings.postgres.enabled or not session_id:
743
+ return
744
+
745
+ user_msg = {
746
+ "role": "user",
747
+ "content": content,
748
+ "timestamp": to_iso(utc_now()),
749
+ }
750
+ try:
751
+ store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
752
+ await store.store_session_messages(
753
+ session_id=session_id,
754
+ messages=[user_msg],
755
+ user_id=user_id,
756
+ compress=False,
757
+ )
758
+ logger.debug(f"Saved user message to session {session_id}")
759
+ except Exception as e:
760
+ logger.error(f"Failed to save user message: {e}", exc_info=True)
761
+
762
+
711
763
  async def stream_openai_response_with_save(
712
764
  agent: Agent,
713
765
  prompt: str,
@@ -716,6 +768,10 @@ async def stream_openai_response_with_save(
716
768
  agent_schema: str | None = None,
717
769
  session_id: str | None = None,
718
770
  user_id: str | None = None,
771
+ # Agent context for multi-agent propagation
772
+ agent_context: "AgentContext | None" = None,
773
+ # Pydantic-ai native message history for proper tool call/return pairing
774
+ message_history: list | None = None,
719
775
  ) -> AsyncGenerator[str, None]:
720
776
  """
721
777
  Wrapper around stream_openai_response that saves the assistant response after streaming.
@@ -723,6 +779,9 @@ async def stream_openai_response_with_save(
723
779
  This accumulates all text content during streaming and saves it to the database
724
780
  after the stream completes.
725
781
 
782
+ NOTE: Call save_user_message() BEFORE this function to save the user's message.
783
+ This function only saves tool calls and assistant responses.
784
+
726
785
  Args:
727
786
  agent: Pydantic AI agent instance
728
787
  prompt: User prompt
@@ -731,14 +790,11 @@ async def stream_openai_response_with_save(
731
790
  agent_schema: Agent schema name
732
791
  session_id: Session ID for message storage
733
792
  user_id: User ID for message storage
793
+ agent_context: Agent context for multi-agent propagation (enables child agents)
734
794
 
735
795
  Yields:
736
796
  SSE-formatted strings
737
797
  """
738
- from ....utils.date_utils import utc_now, to_iso
739
- from ....services.session import SessionMessageStore
740
- from ....settings import settings
741
-
742
798
  # Pre-generate message_id so it can be sent in metadata event
743
799
  # This allows frontend to use it for feedback before DB persistence
744
800
  message_id = str(uuid.uuid4())
@@ -763,6 +819,8 @@ async def stream_openai_response_with_save(
763
819
  message_id=message_id,
764
820
  trace_context_out=trace_context, # Pass container to capture trace IDs
765
821
  tool_calls_out=tool_calls, # Capture tool calls for persistence
822
+ agent_context=agent_context, # Pass context for multi-agent support
823
+ message_history=message_history, # Native pydantic-ai message history
766
824
  ):
767
825
  yield chunk
768
826
 
@@ -777,6 +835,9 @@ async def stream_openai_response_with_save(
777
835
  delta = data["choices"][0].get("delta", {})
778
836
  content = delta.get("content")
779
837
  if content:
838
+ # DEBUG: Check for [Calling markers in content
839
+ if "[Calling" in content:
840
+ logger.warning(f"DEBUG: Found [Calling in content chunk: {repr(content[:100])}")
780
841
  accumulated_content.append(content)
781
842
  except (json.JSONDecodeError, KeyError, IndexError):
782
843
  pass # Skip non-JSON or malformed chunks
@@ -793,6 +854,8 @@ async def stream_openai_response_with_save(
793
854
 
794
855
  # First, store tool call messages (message_type: "tool")
795
856
  for tool_call in tool_calls:
857
+ if not tool_call:
858
+ continue
796
859
  tool_message = {
797
860
  "role": "tool",
798
861
  "content": json.dumps(tool_call.get("result", {}), default=str),
@@ -807,8 +870,34 @@ async def stream_openai_response_with_save(
807
870
  messages_to_store.append(tool_message)
808
871
 
809
872
  # Then store assistant text response (if any)
873
+ # Priority: direct TextPartDelta content > tool call text_response
874
+ # When an agent delegates via ask_agent, the child's text_response becomes
875
+ # the parent's assistant response (the parent is just orchestrating)
876
+ full_content = None
877
+
810
878
  if accumulated_content:
811
879
  full_content = "".join(accumulated_content)
880
+ logger.warning(f"DEBUG: Using accumulated_content ({len(accumulated_content)} chunks, {len(full_content)} chars)")
881
+ logger.warning(f"DEBUG: First 200 chars: {repr(full_content[:200])}")
882
+ else:
883
+ logger.warning("DEBUG: accumulated_content is empty, checking text_response fallback")
884
+ # No direct text from TextPartDelta - check tool results for text_response
885
+ # This handles multi-agent delegation where child agent output is the response
886
+ for tool_call in tool_calls:
887
+ if not tool_call:
888
+ continue
889
+ result = tool_call.get("result")
890
+ if isinstance(result, dict) and result.get("text_response"):
891
+ text_response = result["text_response"]
892
+ if text_response and str(text_response).strip():
893
+ full_content = str(text_response)
894
+ logger.debug(
895
+ f"Using text_response from {tool_call.get('tool_name', 'tool')} "
896
+ f"({len(full_content)} chars) as assistant message"
897
+ )
898
+ break
899
+
900
+ if full_content:
812
901
  assistant_message = {
813
902
  "id": message_id, # Use pre-generated ID for consistency with metadata event
814
903
  "role": "assistant",
@@ -830,7 +919,7 @@ async def stream_openai_response_with_save(
830
919
  )
831
920
  logger.debug(
832
921
  f"Saved {len(tool_calls)} tool calls and "
833
- f"{'assistant response' if accumulated_content else 'no text'} "
922
+ f"{'assistant response' if full_content else 'no text'} "
834
923
  f"to session {session_id}"
835
924
  )
836
925
  except Exception as e:
@@ -838,8 +927,9 @@ async def stream_openai_response_with_save(
838
927
 
839
928
  # Update session description with session_name (non-blocking, after all yields)
840
929
  for tool_call in tool_calls:
841
- if tool_call.get("tool_name") == "register_metadata" and tool_call.get("is_metadata"):
842
- session_name = tool_call.get("arguments", {}).get("session_name")
930
+ if tool_call and tool_call.get("tool_name") == "register_metadata" and tool_call.get("is_metadata"):
931
+ arguments = tool_call.get("arguments") or {}
932
+ session_name = arguments.get("session_name")
843
933
  if session_name:
844
934
  try:
845
935
  from ....models.entities import Session