remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,950 @@
1
+ """
2
+ OpenAI-compatible streaming relay for Pydantic AI agents.
3
+
4
+ Architecture:
5
+ ```
6
+ User Request → stream_openai_response → agent.iter() → SSE Events → Client
7
+
8
+ ├── Parent agent events (text, tool calls)
9
+
10
+ └── Child agent events (via ask_agent tool)
11
+
12
+
13
+ Event Sink (asyncio.Queue)
14
+
15
+
16
+ drain_child_events() → SSE + DB
17
+ ```
18
+
19
+ Modules:
20
+ - streaming.py: Main workflow orchestrator (this file)
21
+ - streaming_utils.py: Pure utility functions, StreamingState dataclass
22
+ - child_streaming.py: Child agent event handling
23
+
24
+ Key Design Decision (DUPLICATION FIX):
25
+ When child_content is streamed, state.child_content_streamed is set True.
26
+ Parent TextPartDelta events are SKIPPED when this flag is True,
27
+ preventing content from being emitted twice.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import asyncio
33
+ import json
34
+ import uuid
35
+ from typing import TYPE_CHECKING, AsyncGenerator
36
+
37
+ from loguru import logger
38
+ from pydantic_ai.agent import Agent
39
+ from pydantic_ai.messages import (
40
+ FunctionToolResultEvent,
41
+ PartDeltaEvent,
42
+ PartEndEvent,
43
+ PartStartEvent,
44
+ TextPart,
45
+ TextPartDelta,
46
+ ThinkingPart,
47
+ ThinkingPartDelta,
48
+ ToolCallPart,
49
+ )
50
+
51
+ from .child_streaming import drain_child_events, stream_with_child_events, process_child_event
52
+ from .streaming_utils import (
53
+ StreamingState,
54
+ build_content_chunk,
55
+ build_progress_event,
56
+ build_tool_start_event,
57
+ extract_metadata_from_result,
58
+ extract_tool_args,
59
+ log_tool_call,
60
+ )
61
+ from .otel_utils import get_current_trace_context
62
+ from .models import (
63
+ ChatCompletionMessageDelta,
64
+ ChatCompletionStreamChoice,
65
+ ChatCompletionStreamResponse,
66
+ )
67
+ from .sse_events import (
68
+ DoneEvent,
69
+ ErrorEvent,
70
+ MetadataEvent,
71
+ ProgressEvent,
72
+ ReasoningEvent,
73
+ ToolCallEvent,
74
+ format_sse_event,
75
+ )
76
+ from ....services.session import SessionMessageStore
77
+ from ....settings import settings
78
+ from ....utils.date_utils import to_iso, utc_now
79
+
80
+ if TYPE_CHECKING:
81
+ from ....agentic.context import AgentContext
82
+
83
+
84
+ async def stream_openai_response(
85
+ agent: Agent,
86
+ prompt: str,
87
+ model: str,
88
+ request_id: str | None = None,
89
+ # Message correlation IDs for metadata
90
+ message_id: str | None = None,
91
+ in_reply_to: str | None = None,
92
+ session_id: str | None = None,
93
+ # Agent info for metadata
94
+ agent_schema: str | None = None,
95
+ # Mutable container to capture trace context (deterministic, not AI-dependent)
96
+ trace_context_out: dict | None = None,
97
+ # Mutable container to capture tool calls for persistence
98
+ # Format: list of {"tool_name": str, "tool_id": str, "arguments": dict, "result": any}
99
+ tool_calls_out: list | None = None,
100
+ # Agent context for multi-agent propagation
101
+ # When set, enables child agents to access parent context via get_current_context()
102
+ agent_context: "AgentContext | None" = None,
103
+ # Pydantic-ai native message history for proper tool call/return pairing
104
+ message_history: list | None = None,
105
+ ) -> AsyncGenerator[str, None]:
106
+ """
107
+ Stream Pydantic AI agent responses with rich SSE events.
108
+
109
+ Emits all SSE event types matching the simulator:
110
+ - reasoning: Model thinking/chain-of-thought (from ThinkingPart)
111
+ - tool_call: Tool invocation start/complete (from ToolCallPart, FunctionToolResultEvent)
112
+ - progress: Step indicators for multi-step execution
113
+ - text_delta: Streamed content (OpenAI-compatible format)
114
+ - metadata: Message IDs, model info, performance metrics
115
+ - done: Stream completion
116
+
117
+ Design Pattern:
118
+ 1. Use agent.iter() for complete execution (not run_stream())
119
+ 2. Iterate over nodes to capture model requests and tool executions
120
+ 3. Emit rich SSE events for reasoning, tools, progress
121
+ 4. Stream text content in OpenAI-compatible format
122
+ 5. Send metadata and done events at completion
123
+
124
+ Args:
125
+ agent: Pydantic AI agent instance
126
+ prompt: User prompt to run
127
+ model: Model name for response metadata
128
+ request_id: Optional request ID (generates UUID if not provided)
129
+ message_id: Database ID of the assistant message being streamed
130
+ in_reply_to: Database ID of the user message this responds to
131
+ session_id: Session ID for conversation correlation
132
+
133
+ Yields:
134
+ SSE-formatted strings
135
+
136
+ Example Stream:
137
+ event: progress
138
+ data: {"type": "progress", "step": 1, "total_steps": 3, "label": "Processing", "status": "in_progress"}
139
+
140
+ event: reasoning
141
+ data: {"type": "reasoning", "content": "Analyzing the request..."}
142
+
143
+ event: tool_call
144
+ data: {"type": "tool_call", "tool_name": "search", "status": "started", "arguments": {...}}
145
+
146
+ event: tool_call
147
+ data: {"type": "tool_call", "tool_name": "search", "status": "completed", "result": "..."}
148
+
149
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 results..."}}]}
150
+
151
+ event: metadata
152
+ data: {"type": "metadata", "message_id": "...", "latency_ms": 1234}
153
+
154
+ event: done
155
+ data: {"type": "done", "reason": "stop"}
156
+ """
157
+ # Initialize streaming state
158
+ state = StreamingState.create(model=model, request_id=request_id)
159
+
160
+ # Get effective user_id for database operations
161
+ effective_user_id = agent_context.user_id if agent_context else None
162
+
163
+ # Import context functions for multi-agent support
164
+ from ....agentic.context import set_current_context, set_event_sink
165
+
166
+ # Set up context for multi-agent propagation
167
+ previous_context = None
168
+ if agent_context is not None:
169
+ from ....agentic.context import get_current_context
170
+ previous_context = get_current_context()
171
+ set_current_context(agent_context)
172
+
173
+ # Set up event sink for child agent event proxying
174
+ child_event_sink: asyncio.Queue = asyncio.Queue()
175
+ set_event_sink(child_event_sink)
176
+
177
+ try:
178
+ # Emit initial progress event
179
+ state.current_step = 1
180
+ yield build_progress_event(
181
+ step=state.current_step,
182
+ total_steps=state.total_steps,
183
+ label="Processing request",
184
+ )
185
+
186
+ # Use agent.iter() to get complete execution with tool calls
187
+ # Pass message_history if available for proper tool call/return pairing
188
+ iter_kwargs = {"message_history": message_history} if message_history else {}
189
+ async with agent.iter(prompt, **iter_kwargs) as agent_run:
190
+ # Capture trace context IMMEDIATELY inside agent execution
191
+ # This is deterministic - it's the OTEL context from Pydantic AI instrumentation
192
+ # NOT dependent on any AI-generated content
193
+ captured_trace_id, captured_span_id = get_current_trace_context()
194
+ if trace_context_out is not None:
195
+ trace_context_out["trace_id"] = captured_trace_id
196
+ trace_context_out["span_id"] = captured_span_id
197
+
198
+ async for node in agent_run:
199
+ # Check if this is a model request node (includes tool calls)
200
+ if Agent.is_model_request_node(node):
201
+ # Stream events from model request
202
+ async with node.stream(agent_run.ctx) as request_stream:
203
+ async for event in request_stream:
204
+ # ============================================
205
+ # REASONING EVENTS (ThinkingPart)
206
+ # ============================================
207
+ if isinstance(event, PartStartEvent) and isinstance(
208
+ event.part, ThinkingPart
209
+ ):
210
+ state.reasoning_step += 1
211
+ if event.part.content:
212
+ yield format_sse_event(ReasoningEvent(
213
+ content=event.part.content,
214
+ step=state.reasoning_step
215
+ ))
216
+
217
+ # Reasoning delta (streaming thinking)
218
+ elif isinstance(event, PartDeltaEvent) and isinstance(
219
+ event.delta, ThinkingPartDelta
220
+ ):
221
+ if event.delta.content_delta:
222
+ yield format_sse_event(ReasoningEvent(
223
+ content=event.delta.content_delta,
224
+ step=state.reasoning_step
225
+ ))
226
+
227
+ # ============================================
228
+ # TEXT CONTENT START (initial text chunk)
229
+ # ============================================
230
+ elif isinstance(event, PartStartEvent) and isinstance(
231
+ event.part, TextPart
232
+ ):
233
+ # Skip if child already streamed content
234
+ if state.child_content_streamed:
235
+ continue
236
+ if event.part.content:
237
+ yield build_content_chunk(state, event.part.content)
238
+
239
+ # ============================================
240
+ # TOOL CALL START EVENTS
241
+ # ============================================
242
+ elif isinstance(event, PartStartEvent) and isinstance(
243
+ event.part, ToolCallPart
244
+ ):
245
+ tool_name = event.part.tool_name
246
+
247
+ # Handle final_result (Pydantic AI's internal tool)
248
+ if tool_name == "final_result":
249
+ args_dict = extract_tool_args(event.part)
250
+ if args_dict:
251
+ result_json = json.dumps(args_dict, indent=2)
252
+ yield build_content_chunk(state, result_json)
253
+ continue
254
+
255
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
256
+ state.active_tool_calls[event.index] = (tool_name, tool_id)
257
+ state.pending_tool_completions.append((tool_name, tool_id))
258
+
259
+ # Extract and log arguments
260
+ args_dict = extract_tool_args(event.part)
261
+ log_tool_call(tool_name, args_dict)
262
+
263
+ yield build_tool_start_event(tool_name, tool_id, args_dict)
264
+
265
+ # Track for persistence
266
+ state.pending_tool_data[tool_id] = {
267
+ "tool_name": tool_name,
268
+ "tool_id": tool_id,
269
+ "arguments": args_dict,
270
+ }
271
+
272
+ # Update progress
273
+ state.current_step = 2
274
+ state.total_steps = 4
275
+ yield build_progress_event(
276
+ step=state.current_step,
277
+ total_steps=state.total_steps,
278
+ label=f"Calling {tool_name}",
279
+ )
280
+
281
+ # ============================================
282
+ # TOOL CALL COMPLETION (PartEndEvent)
283
+ # ============================================
284
+ elif isinstance(event, PartEndEvent) and isinstance(
285
+ event.part, ToolCallPart
286
+ ):
287
+ if event.index in state.active_tool_calls:
288
+ tool_name, tool_id = state.active_tool_calls[event.index]
289
+ args_dict = extract_tool_args(event.part)
290
+
291
+ if tool_id in state.pending_tool_data:
292
+ state.pending_tool_data[tool_id]["arguments"] = args_dict
293
+
294
+ del state.active_tool_calls[event.index]
295
+
296
+ # ============================================
297
+ # TEXT CONTENT DELTA
298
+ # ============================================
299
+ elif isinstance(event, PartDeltaEvent) and isinstance(
300
+ event.delta, TextPartDelta
301
+ ):
302
+ # DUPLICATION FIX: Skip parent text if child already streamed content
303
+ # Child agents stream via child_content events in ask_agent tool.
304
+ # If parent tries to echo that content, skip it.
305
+ if state.child_content_streamed:
306
+ logger.debug("Skipping parent TextPartDelta - child content already streamed")
307
+ continue
308
+
309
+ content = event.delta.content_delta
310
+ yield build_content_chunk(state, content)
311
+
312
+ # ============================================
313
+ # TOOL EXECUTION NODE
314
+ # ============================================
315
+ elif Agent.is_call_tools_node(node):
316
+ async with node.stream(agent_run.ctx) as tools_stream:
317
+ # Use concurrent multiplexer to handle both tool events
318
+ # and child agent events as they arrive (fixes streaming lag)
319
+ async for event_type, event_data in stream_with_child_events(
320
+ tools_stream=tools_stream,
321
+ child_event_sink=child_event_sink,
322
+ state=state,
323
+ session_id=session_id,
324
+ user_id=effective_user_id,
325
+ message_id=message_id,
326
+ agent_schema=agent_schema,
327
+ ):
328
+ # Handle child events (streamed from ask_agent)
329
+ if event_type == "child":
330
+ async for chunk in process_child_event(
331
+ child_event=event_data,
332
+ state=state,
333
+ session_id=session_id,
334
+ user_id=effective_user_id,
335
+ message_id=message_id,
336
+ agent_schema=agent_schema,
337
+ ):
338
+ yield chunk
339
+ continue
340
+
341
+ # Handle tool events
342
+ tool_event = event_data
343
+
344
+ # Tool result event - emit completion
345
+ if isinstance(tool_event, FunctionToolResultEvent):
346
+ # Get the tool name/id from the pending queue (FIFO)
347
+ if state.pending_tool_completions:
348
+ tool_name, tool_id = state.pending_tool_completions.pop(0)
349
+ else:
350
+ tool_name = "tool"
351
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
352
+
353
+ result_content = tool_event.result.content if hasattr(tool_event.result, 'content') else tool_event.result
354
+ is_metadata_event = False
355
+
356
+ # Handle register_metadata tool results
357
+ metadata = extract_metadata_from_result(result_content)
358
+ if metadata:
359
+ is_metadata_event = True
360
+ state.metadata_registered = True
361
+
362
+ # Only set responding_agent if not already set by child
363
+ if not state.responding_agent and metadata.get("agent_schema"):
364
+ state.responding_agent = metadata["agent_schema"]
365
+
366
+ logger.info(
367
+ f"📊 Metadata: confidence={metadata.get('confidence')}, "
368
+ f"risk_level={metadata.get('risk_level')}"
369
+ )
370
+
371
+ # Build extra dict with risk fields
372
+ extra_data = {}
373
+ for field in ["risk_level", "risk_score", "risk_reasoning", "recommended_action"]:
374
+ if metadata.get(field) is not None:
375
+ extra_data[field] = metadata[field]
376
+ if metadata.get("extra"):
377
+ extra_data.update(metadata["extra"])
378
+
379
+ yield format_sse_event(MetadataEvent(
380
+ message_id=message_id,
381
+ in_reply_to=in_reply_to,
382
+ session_id=session_id,
383
+ agent_schema=agent_schema,
384
+ responding_agent=state.responding_agent,
385
+ session_name=metadata.get("session_name"),
386
+ confidence=metadata.get("confidence"),
387
+ sources=metadata.get("sources"),
388
+ model_version=model,
389
+ flags=metadata.get("flags"),
390
+ extra=extra_data if extra_data else None,
391
+ hidden=False,
392
+ ))
393
+
394
+ # Get complete args from pending_tool_data
395
+ completed_args = None
396
+ if tool_id in state.pending_tool_data:
397
+ completed_args = state.pending_tool_data[tool_id].get("arguments")
398
+
399
+ # Capture tool call for persistence
400
+ if tool_calls_out is not None and tool_id in state.pending_tool_data:
401
+ tool_data = state.pending_tool_data[tool_id]
402
+ tool_data["result"] = result_content
403
+ tool_data["is_metadata"] = is_metadata_event
404
+ tool_calls_out.append(tool_data)
405
+ del state.pending_tool_data[tool_id]
406
+
407
+ if not is_metadata_event:
408
+ # NOTE: text_response fallback is DISABLED
409
+ # Child agents now stream content via child_content events (above)
410
+ # which provides real-time streaming. The text_response in tool
411
+ # result would duplicate that content, so we skip it entirely.
412
+
413
+ # Normal tool completion - emit ToolCallEvent
414
+ # For finalize_intake, send full result dict for frontend
415
+ if tool_name == "finalize_intake" and isinstance(result_content, dict):
416
+ result_for_sse = result_content
417
+ else:
418
+ result_str = str(result_content)
419
+ result_for_sse = result_str[:200] + "..." if len(result_str) > 200 else result_str
420
+
421
+ # Log result count for search_rem
422
+ if tool_name == "search_rem" and isinstance(result_content, dict):
423
+ results = result_content.get("results", {})
424
+ # Handle nested result structure: results may be a dict with 'results' list and 'count'
425
+ if isinstance(results, dict):
426
+ count = results.get("count", len(results.get("results", [])))
427
+ query_type = results.get("query_type", "?")
428
+ query_text = results.get("query_text", results.get("key", ""))
429
+ table = results.get("table_name", "")
430
+ elif isinstance(results, list):
431
+ count = len(results)
432
+ query_type = "?"
433
+ query_text = ""
434
+ table = ""
435
+ else:
436
+ count = "?"
437
+ query_type = "?"
438
+ query_text = ""
439
+ table = ""
440
+ status = result_content.get("status", "unknown")
441
+ # Truncate query text for logging
442
+ if query_text and len(str(query_text)) > 40:
443
+ query_text = str(query_text)[:40] + "..."
444
+ logger.info(f" ↳ {tool_name} {query_type} '{query_text}' table={table} → {count} results")
445
+
446
+ yield format_sse_event(ToolCallEvent(
447
+ tool_name=tool_name,
448
+ tool_id=tool_id,
449
+ status="completed",
450
+ arguments=completed_args,
451
+ result=result_for_sse
452
+ ))
453
+
454
+ # Update progress after tool completion
455
+ state.current_step = 3
456
+ yield format_sse_event(ProgressEvent(
457
+ step=state.current_step,
458
+ total_steps=state.total_steps,
459
+ label="Generating response",
460
+ status="in_progress"
461
+ ))
462
+
463
+ # After iteration completes, check for structured result
464
+ # This handles agents with result_type (structured output)
465
+ # Skip for plain text output - already streamed via TextPartDelta
466
+ try:
467
+ result = agent_run.result
468
+ if result is not None and hasattr(result, 'output'):
469
+ output = result.output
470
+
471
+ # Skip plain string output - already streamed via TextPartDelta
472
+ # Non-structured output agents (structured_output: false) return strings
473
+ if isinstance(output, str):
474
+ logger.debug("Plain text output already streamed via TextPartDelta, skipping final emission")
475
+ else:
476
+ # Serialize the structured output (Pydantic models)
477
+ if hasattr(output, 'model_dump'):
478
+ # Pydantic model
479
+ result_dict = output.model_dump()
480
+ elif hasattr(output, '__dict__'):
481
+ result_dict = output.__dict__
482
+ else:
483
+ # Fallback for unknown types
484
+ result_dict = {"result": str(output)}
485
+
486
+ result_json = json.dumps(result_dict, indent=2, default=str)
487
+ state.token_count += len(result_json.split())
488
+
489
+ # Emit structured result as content
490
+ result_chunk = ChatCompletionStreamResponse(
491
+ id=state.request_id,
492
+ created=state.created_at,
493
+ model=model,
494
+ choices=[
495
+ ChatCompletionStreamChoice(
496
+ index=0,
497
+ delta=ChatCompletionMessageDelta(
498
+ role="assistant" if state.is_first_chunk else None,
499
+ content=result_json,
500
+ ),
501
+ finish_reason=None,
502
+ )
503
+ ],
504
+ )
505
+ state.is_first_chunk = False
506
+ yield f"data: {result_chunk.model_dump_json()}\n\n"
507
+ except Exception as e:
508
+ logger.debug(f"No structured result available: {e}")
509
+
510
+ # Calculate latency
511
+ latency_ms = state.latency_ms()
512
+
513
+ # Final OpenAI chunk with finish_reason
514
+ final_chunk = ChatCompletionStreamResponse(
515
+ id=state.request_id,
516
+ created=state.created_at,
517
+ model=model,
518
+ choices=[
519
+ ChatCompletionStreamChoice(
520
+ index=0,
521
+ delta=ChatCompletionMessageDelta(),
522
+ finish_reason="stop",
523
+ )
524
+ ],
525
+ )
526
+ yield f"data: {final_chunk.model_dump_json()}\n\n"
527
+
528
+ # Emit metadata event only if not already registered via register_metadata tool
529
+ if not state.metadata_registered:
530
+ yield format_sse_event(MetadataEvent(
531
+ message_id=message_id,
532
+ in_reply_to=in_reply_to,
533
+ session_id=session_id,
534
+ agent_schema=agent_schema,
535
+ responding_agent=state.responding_agent,
536
+ confidence=1.0, # Default to 100% confidence
537
+ model_version=model,
538
+ latency_ms=latency_ms,
539
+ token_count=state.token_count,
540
+ # Include deterministic trace context captured from OTEL
541
+ trace_id=captured_trace_id,
542
+ span_id=captured_span_id,
543
+ ))
544
+
545
+ # Mark all progress complete
546
+ for step in range(1, state.total_steps + 1):
547
+ yield format_sse_event(ProgressEvent(
548
+ step=step,
549
+ total_steps=state.total_steps,
550
+ label="Complete" if step == state.total_steps else f"Step {step}",
551
+ status="completed"
552
+ ))
553
+
554
+ # Emit done event
555
+ yield format_sse_event(DoneEvent(reason="stop"))
556
+
557
+ # OpenAI termination marker (for compatibility)
558
+ yield "data: [DONE]\n\n"
559
+
560
+ except Exception as e:
561
+ import traceback
562
+ import re
563
+
564
+ error_msg = str(e)
565
+
566
+ # Parse error details for better client handling
567
+ error_code = "stream_error"
568
+ error_details: dict = {}
569
+ recoverable = True
570
+
571
+ # Check for rate limit errors (OpenAI 429)
572
+ if "429" in error_msg or "rate_limit" in error_msg.lower() or "RateLimitError" in type(e).__name__:
573
+ error_code = "rate_limit_exceeded"
574
+ recoverable = True
575
+
576
+ # Extract retry-after time from error message
577
+ # Pattern: "Please try again in X.XXs" or "Please try again in Xs"
578
+ retry_match = re.search(r"try again in (\d+(?:\.\d+)?)\s*s", error_msg)
579
+ if retry_match:
580
+ retry_seconds = float(retry_match.group(1))
581
+ error_details["retry_after_seconds"] = retry_seconds
582
+ error_details["retry_after_ms"] = int(retry_seconds * 1000)
583
+
584
+ # Extract token usage info if available
585
+ used_match = re.search(r"Used (\d+)", error_msg)
586
+ limit_match = re.search(r"Limit (\d+)", error_msg)
587
+ requested_match = re.search(r"Requested (\d+)", error_msg)
588
+ if used_match:
589
+ error_details["tokens_used"] = int(used_match.group(1))
590
+ if limit_match:
591
+ error_details["tokens_limit"] = int(limit_match.group(1))
592
+ if requested_match:
593
+ error_details["tokens_requested"] = int(requested_match.group(1))
594
+
595
+ logger.error(f"🔴 Streaming error: status_code: 429, model_name: {model}, body: {error_msg[:200]}")
596
+
597
+ # Check for authentication errors
598
+ elif "401" in error_msg or "AuthenticationError" in type(e).__name__:
599
+ error_code = "authentication_error"
600
+ recoverable = False
601
+ logger.error(f"🔴 Streaming error: Authentication failed")
602
+
603
+ # Check for model not found / invalid model
604
+ elif "404" in error_msg or "model" in error_msg.lower() and "not found" in error_msg.lower():
605
+ error_code = "model_not_found"
606
+ recoverable = False
607
+ logger.error(f"🔴 Streaming error: Model not found")
608
+
609
+ # Generic error
610
+ else:
611
+ logger.error(f"🔴 Streaming error: {error_msg}")
612
+
613
+ logger.error(f"🔴 {traceback.format_exc()}")
614
+
615
+ # Emit proper ErrorEvent via SSE (with event: prefix for client parsing)
616
+ yield format_sse_event(ErrorEvent(
617
+ code=error_code,
618
+ message=error_msg,
619
+ details=error_details if error_details else None,
620
+ recoverable=recoverable,
621
+ ))
622
+
623
+ # Emit done event with error reason
624
+ yield format_sse_event(DoneEvent(reason="error"))
625
+ yield "data: [DONE]\n\n"
626
+
627
+ finally:
628
+ # Clean up event sink for multi-agent streaming
629
+ set_event_sink(None)
630
+ # Restore previous context for multi-agent support
631
+ # This ensures nested agent calls don't pollute the parent's context
632
+ if agent_context is not None:
633
+ set_current_context(previous_context)
634
+
635
+
636
+ async def stream_simulator_response(
637
+ prompt: str,
638
+ model: str = "simulator-v1.0.0",
639
+ request_id: str | None = None,
640
+ delay_ms: int = 50,
641
+ include_reasoning: bool = True,
642
+ include_progress: bool = True,
643
+ include_tool_calls: bool = True,
644
+ include_actions: bool = True,
645
+ include_metadata: bool = True,
646
+ # Message correlation IDs
647
+ message_id: str | None = None,
648
+ in_reply_to: str | None = None,
649
+ session_id: str | None = None,
650
+ ) -> AsyncGenerator[str, None]:
651
+ """
652
+ Stream SSE simulator events for testing and demonstration.
653
+
654
+ This function wraps the SSE simulator to produce formatted SSE strings
655
+ ready for HTTP streaming. No LLM calls are made.
656
+
657
+ The simulator produces a rich sequence of events:
658
+ 1. Reasoning events (model thinking)
659
+ 2. Progress events (step indicators)
660
+ 3. Tool call events (simulated tool usage)
661
+ 4. Text delta events (streamed content)
662
+ 5. Metadata events (confidence, sources, message IDs)
663
+ 6. Action request events (user interaction)
664
+ 7. Done event
665
+
666
+ Args:
667
+ prompt: User prompt (passed to simulator)
668
+ model: Model name for metadata
669
+ request_id: Optional request ID
670
+ delay_ms: Delay between events in milliseconds
671
+ include_reasoning: Whether to emit reasoning events
672
+ include_progress: Whether to emit progress events
673
+ include_tool_calls: Whether to emit tool call events
674
+ include_actions: Whether to emit action request at end
675
+ include_metadata: Whether to emit metadata event
676
+ message_id: Database ID of the assistant message being streamed
677
+ in_reply_to: Database ID of the user message this responds to
678
+ session_id: Session ID for conversation correlation
679
+
680
+ Yields:
681
+ SSE-formatted strings ready for HTTP response
682
+
683
+ Example:
684
+ ```python
685
+ from starlette.responses import StreamingResponse
686
+
687
+ async def simulator_endpoint():
688
+ return StreamingResponse(
689
+ stream_simulator_response("demo"),
690
+ media_type="text/event-stream"
691
+ )
692
+ ```
693
+ """
694
+ from rem.agentic.agents.sse_simulator import stream_simulator_events
695
+
696
+ # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
697
+ async for sse_string in stream_simulator_events(
698
+ prompt=prompt,
699
+ delay_ms=delay_ms,
700
+ include_reasoning=include_reasoning,
701
+ include_progress=include_progress,
702
+ include_tool_calls=include_tool_calls,
703
+ include_actions=include_actions,
704
+ include_metadata=include_metadata,
705
+ # Pass message correlation IDs
706
+ message_id=message_id,
707
+ in_reply_to=in_reply_to,
708
+ session_id=session_id,
709
+ model=model,
710
+ ):
711
+ yield sse_string
712
+
713
+
714
+ async def stream_minimal_simulator(
715
+ content: str = "Hello from the simulator!",
716
+ delay_ms: int = 30,
717
+ ) -> AsyncGenerator[str, None]:
718
+ """
719
+ Stream minimal simulator output (text + done only).
720
+
721
+ Useful for simple testing without the full event sequence.
722
+
723
+ Args:
724
+ content: Text content to stream
725
+ delay_ms: Delay between chunks
726
+
727
+ Yields:
728
+ SSE-formatted strings
729
+ """
730
+ from rem.agentic.agents.sse_simulator import stream_minimal_demo
731
+
732
+ # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
733
+ async for sse_string in stream_minimal_demo(content=content, delay_ms=delay_ms):
734
+ yield sse_string
735
+
736
+
737
+ async def save_user_message(
738
+ session_id: str,
739
+ user_id: str | None,
740
+ content: str,
741
+ ) -> None:
742
+ """
743
+ Save user message to database before streaming.
744
+
745
+ Shared utility used by both API and CLI for consistent user message storage.
746
+ """
747
+ if not settings.postgres.enabled or not session_id:
748
+ return
749
+
750
+ user_msg = {
751
+ "role": "user",
752
+ "content": content,
753
+ "timestamp": to_iso(utc_now()),
754
+ }
755
+ try:
756
+ store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
757
+ await store.store_session_messages(
758
+ session_id=session_id,
759
+ messages=[user_msg],
760
+ user_id=user_id,
761
+ compress=False,
762
+ )
763
+ logger.debug(f"Saved user message to session {session_id}")
764
+ except Exception as e:
765
+ logger.error(f"Failed to save user message: {e}", exc_info=True)
766
+
767
+
768
+ async def stream_openai_response_with_save(
769
+ agent: Agent,
770
+ prompt: str,
771
+ model: str,
772
+ request_id: str | None = None,
773
+ agent_schema: str | None = None,
774
+ session_id: str | None = None,
775
+ user_id: str | None = None,
776
+ # Agent context for multi-agent propagation
777
+ agent_context: "AgentContext | None" = None,
778
+ # Pydantic-ai native message history for proper tool call/return pairing
779
+ message_history: list | None = None,
780
+ ) -> AsyncGenerator[str, None]:
781
+ """
782
+ Wrapper around stream_openai_response that saves the assistant response after streaming.
783
+
784
+ This accumulates all text content during streaming and saves it to the database
785
+ after the stream completes.
786
+
787
+ NOTE: Call save_user_message() BEFORE this function to save the user's message.
788
+ This function only saves tool calls and assistant responses.
789
+
790
+ Args:
791
+ agent: Pydantic AI agent instance
792
+ prompt: User prompt
793
+ model: Model name
794
+ request_id: Optional request ID
795
+ agent_schema: Agent schema name
796
+ session_id: Session ID for message storage
797
+ user_id: User ID for message storage
798
+ agent_context: Agent context for multi-agent propagation (enables child agents)
799
+
800
+ Yields:
801
+ SSE-formatted strings
802
+ """
803
+ # Pre-generate message_id so it can be sent in metadata event
804
+ # This allows frontend to use it for feedback before DB persistence
805
+ message_id = str(uuid.uuid4())
806
+
807
+ # Mutable container for capturing trace context from inside agent execution
808
+ # This is deterministic - captured from OTEL instrumentation, not AI-generated
809
+ trace_context: dict = {}
810
+
811
+ # Accumulate content during streaming
812
+ accumulated_content = []
813
+
814
+ # Capture tool calls for persistence (especially register_metadata)
815
+ tool_calls: list = []
816
+
817
+ async for chunk in stream_openai_response(
818
+ agent=agent,
819
+ prompt=prompt,
820
+ model=model,
821
+ request_id=request_id,
822
+ agent_schema=agent_schema,
823
+ session_id=session_id,
824
+ message_id=message_id,
825
+ trace_context_out=trace_context, # Pass container to capture trace IDs
826
+ tool_calls_out=tool_calls, # Capture tool calls for persistence
827
+ agent_context=agent_context, # Pass context for multi-agent support
828
+ message_history=message_history, # Native pydantic-ai message history
829
+ ):
830
+ yield chunk
831
+
832
+ # Extract text content from OpenAI-format chunks
833
+ # Format: data: {"choices": [{"delta": {"content": "..."}}]}
834
+ if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
835
+ try:
836
+ data_str = chunk[6:].strip() # Remove "data: " prefix
837
+ if data_str:
838
+ data = json.loads(data_str)
839
+ if "choices" in data and data["choices"]:
840
+ delta = data["choices"][0].get("delta", {})
841
+ content = delta.get("content")
842
+ if content:
843
+ # DEBUG: Check for [Calling markers in content
844
+ if "[Calling" in content:
845
+ logger.warning(f"DEBUG: Found [Calling in content chunk: {repr(content[:100])}")
846
+ accumulated_content.append(content)
847
+ except (json.JSONDecodeError, KeyError, IndexError):
848
+ pass # Skip non-JSON or malformed chunks
849
+
850
+ # After streaming completes, save tool calls and assistant response
851
+ # Note: All messages stored UNCOMPRESSED. Compression happens on reload.
852
+ if settings.postgres.enabled and session_id:
853
+ # Get captured trace context from container (deterministically captured inside agent execution)
854
+ captured_trace_id = trace_context.get("trace_id")
855
+ captured_span_id = trace_context.get("span_id")
856
+ timestamp = to_iso(utc_now())
857
+
858
+ messages_to_store = []
859
+
860
+ # First, store tool call messages (message_type: "tool")
861
+ for tool_call in tool_calls:
862
+ if not tool_call:
863
+ continue
864
+ tool_message = {
865
+ "role": "tool",
866
+ "content": json.dumps(tool_call.get("result", {}), default=str),
867
+ "timestamp": timestamp,
868
+ "trace_id": captured_trace_id,
869
+ "span_id": captured_span_id,
870
+ # Store tool call details in a way that can be reconstructed
871
+ "tool_call_id": tool_call.get("tool_id"),
872
+ "tool_name": tool_call.get("tool_name"),
873
+ "tool_arguments": tool_call.get("arguments"),
874
+ }
875
+ messages_to_store.append(tool_message)
876
+
877
+ # Then store assistant text response (if any)
878
+ # Priority: direct TextPartDelta content > tool call text_response
879
+ # When an agent delegates via ask_agent, the child's text_response becomes
880
+ # the parent's assistant response (the parent is just orchestrating)
881
+ full_content = None
882
+
883
+ if accumulated_content:
884
+ full_content = "".join(accumulated_content)
885
+ logger.warning(f"DEBUG: Using accumulated_content ({len(accumulated_content)} chunks, {len(full_content)} chars)")
886
+ logger.warning(f"DEBUG: First 200 chars: {repr(full_content[:200])}")
887
+ else:
888
+ logger.warning("DEBUG: accumulated_content is empty, checking text_response fallback")
889
+ # No direct text from TextPartDelta - check tool results for text_response
890
+ # This handles multi-agent delegation where child agent output is the response
891
+ for tool_call in tool_calls:
892
+ if not tool_call:
893
+ continue
894
+ result = tool_call.get("result")
895
+ if isinstance(result, dict) and result.get("text_response"):
896
+ text_response = result["text_response"]
897
+ if text_response and str(text_response).strip():
898
+ full_content = str(text_response)
899
+ logger.debug(
900
+ f"Using text_response from {tool_call.get('tool_name', 'tool')} "
901
+ f"({len(full_content)} chars) as assistant message"
902
+ )
903
+ break
904
+
905
+ if full_content:
906
+ assistant_message = {
907
+ "id": message_id, # Use pre-generated ID for consistency with metadata event
908
+ "role": "assistant",
909
+ "content": full_content,
910
+ "timestamp": timestamp,
911
+ "trace_id": captured_trace_id,
912
+ "span_id": captured_span_id,
913
+ }
914
+ messages_to_store.append(assistant_message)
915
+
916
+ if messages_to_store:
917
+ try:
918
+ store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
919
+ await store.store_session_messages(
920
+ session_id=session_id,
921
+ messages=messages_to_store,
922
+ user_id=user_id,
923
+ compress=False, # Store uncompressed; compression happens on reload
924
+ )
925
+ logger.debug(
926
+ f"Saved {len(tool_calls)} tool calls and "
927
+ f"{'assistant response' if full_content else 'no text'} "
928
+ f"to session {session_id}"
929
+ )
930
+ except Exception as e:
931
+ logger.error(f"Failed to save session messages: {e}", exc_info=True)
932
+
933
+ # Update session description with session_name (non-blocking, after all yields)
934
+ for tool_call in tool_calls:
935
+ if tool_call and tool_call.get("tool_name") == "register_metadata" and tool_call.get("is_metadata"):
936
+ arguments = tool_call.get("arguments") or {}
937
+ session_name = arguments.get("session_name")
938
+ if session_name:
939
+ try:
940
+ from ....models.entities import Session
941
+ from ....services.postgres import Repository
942
+ repo = Repository(Session, table_name="sessions")
943
+ session = await repo.get_by_id(session_id)
944
+ if session and session.description != session_name:
945
+ session.description = session_name
946
+ await repo.update(session)
947
+ logger.debug(f"Updated session {session_id} description to '{session_name}'")
948
+ except Exception as e:
949
+ logger.warning(f"Failed to update session description: {e}")
950
+ break