remdb 0.3.14__py3-none-any.whl → 0.3.133__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. rem/agentic/README.md +76 -0
  2. rem/agentic/__init__.py +15 -0
  3. rem/agentic/agents/__init__.py +16 -2
  4. rem/agentic/agents/sse_simulator.py +502 -0
  5. rem/agentic/context.py +51 -27
  6. rem/agentic/llm_provider_models.py +301 -0
  7. rem/agentic/mcp/tool_wrapper.py +112 -17
  8. rem/agentic/otel/setup.py +93 -4
  9. rem/agentic/providers/phoenix.py +302 -109
  10. rem/agentic/providers/pydantic_ai.py +215 -26
  11. rem/agentic/schema.py +361 -21
  12. rem/agentic/tools/rem_tools.py +3 -3
  13. rem/api/README.md +215 -1
  14. rem/api/deps.py +255 -0
  15. rem/api/main.py +132 -40
  16. rem/api/mcp_router/resources.py +1 -1
  17. rem/api/mcp_router/server.py +26 -5
  18. rem/api/mcp_router/tools.py +465 -7
  19. rem/api/routers/admin.py +494 -0
  20. rem/api/routers/auth.py +70 -0
  21. rem/api/routers/chat/completions.py +402 -20
  22. rem/api/routers/chat/models.py +88 -10
  23. rem/api/routers/chat/otel_utils.py +33 -0
  24. rem/api/routers/chat/sse_events.py +542 -0
  25. rem/api/routers/chat/streaming.py +642 -45
  26. rem/api/routers/dev.py +81 -0
  27. rem/api/routers/feedback.py +268 -0
  28. rem/api/routers/messages.py +473 -0
  29. rem/api/routers/models.py +78 -0
  30. rem/api/routers/query.py +360 -0
  31. rem/api/routers/shared_sessions.py +406 -0
  32. rem/auth/middleware.py +126 -27
  33. rem/cli/commands/README.md +237 -64
  34. rem/cli/commands/cluster.py +1808 -0
  35. rem/cli/commands/configure.py +1 -3
  36. rem/cli/commands/db.py +386 -143
  37. rem/cli/commands/experiments.py +418 -27
  38. rem/cli/commands/process.py +14 -8
  39. rem/cli/commands/schema.py +97 -50
  40. rem/cli/main.py +27 -6
  41. rem/config.py +10 -3
  42. rem/models/core/core_model.py +7 -1
  43. rem/models/core/experiment.py +54 -0
  44. rem/models/core/rem_query.py +5 -2
  45. rem/models/entities/__init__.py +21 -0
  46. rem/models/entities/domain_resource.py +38 -0
  47. rem/models/entities/feedback.py +123 -0
  48. rem/models/entities/message.py +30 -1
  49. rem/models/entities/session.py +83 -0
  50. rem/models/entities/shared_session.py +180 -0
  51. rem/registry.py +10 -4
  52. rem/schemas/agents/rem.yaml +7 -3
  53. rem/services/content/service.py +92 -20
  54. rem/services/embeddings/api.py +4 -4
  55. rem/services/embeddings/worker.py +16 -16
  56. rem/services/phoenix/client.py +154 -14
  57. rem/services/postgres/README.md +159 -15
  58. rem/services/postgres/__init__.py +2 -1
  59. rem/services/postgres/diff_service.py +531 -0
  60. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  61. rem/services/postgres/repository.py +132 -0
  62. rem/services/postgres/schema_generator.py +205 -4
  63. rem/services/postgres/service.py +6 -6
  64. rem/services/rem/parser.py +44 -9
  65. rem/services/rem/service.py +36 -2
  66. rem/services/session/compression.py +24 -1
  67. rem/services/session/reload.py +1 -1
  68. rem/settings.py +324 -23
  69. rem/sql/background_indexes.sql +21 -16
  70. rem/sql/migrations/001_install.sql +387 -54
  71. rem/sql/migrations/002_install_models.sql +2320 -393
  72. rem/sql/migrations/003_optional_extensions.sql +326 -0
  73. rem/sql/migrations/004_cache_system.sql +548 -0
  74. rem/utils/__init__.py +18 -0
  75. rem/utils/date_utils.py +2 -2
  76. rem/utils/model_helpers.py +156 -1
  77. rem/utils/schema_loader.py +220 -22
  78. rem/utils/sql_paths.py +146 -0
  79. rem/utils/sql_types.py +3 -1
  80. rem/workers/__init__.py +3 -1
  81. rem/workers/db_listener.py +579 -0
  82. rem/workers/unlogged_maintainer.py +463 -0
  83. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/METADATA +335 -226
  84. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/RECORD +86 -66
  85. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
  86. rem/sql/002_install_models.sql +0 -1068
  87. rem/sql/install_models.sql +0 -1051
  88. rem/sql/migrations/003_seed_default_user.sql +0 -48
  89. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
@@ -3,20 +3,29 @@ OpenAI-compatible streaming relay for Pydantic AI agents.
3
3
 
4
4
  Design Pattern:
5
5
  - Uses Pydantic AI's agent.iter() to capture full execution including tool calls
6
- - Streams tool call events with [Calling: tool_name] markers
7
- - Streams text content deltas as they arrive
6
+ - Emits rich SSE events: reasoning, tool_call, progress, metadata, text_delta
8
7
  - Proper OpenAI SSE format with data: prefix and [DONE] terminator
9
8
  - Error handling with graceful degradation
10
9
 
11
- Key Insight
10
+ Key Insight
12
11
  - agent.run_stream() stops after first output, missing tool calls
13
12
  - agent.iter() provides complete execution with tool call visibility
14
- - Use PartStartEvent to detect tool calls
15
- - Use PartDeltaEvent with TextPartDelta for content streaming
13
+ - Use PartStartEvent to detect tool calls and thinking parts
14
+ - Use PartDeltaEvent with TextPartDelta/ThinkingPartDelta for streaming
15
+ - Use PartEndEvent to detect tool completion
16
+ - Use FunctionToolResultEvent to get tool results
16
17
 
17
- SSE Format:
18
+ SSE Format (OpenAI-compatible):
18
19
  data: {"id": "chatcmpl-...", "choices": [{"delta": {"content": "..."}}]}\\n\\n
19
20
  data: [DONE]\\n\\n
21
+
22
+ Extended SSE Format (Custom Events):
23
+ event: reasoning\\ndata: {"type": "reasoning", "content": "..."}\\n\\n
24
+ event: tool_call\\ndata: {"type": "tool_call", "tool_name": "...", "status": "started"}\\n\\n
25
+ event: progress\\ndata: {"type": "progress", "step": 1, "total_steps": 3}\\n\\n
26
+ event: metadata\\ndata: {"type": "metadata", "confidence": 0.95}\\n\\n
27
+
28
+ See sse_events.py for the full event type definitions.
20
29
  """
21
30
 
22
31
  import json
@@ -27,17 +36,31 @@ from typing import AsyncGenerator
27
36
  from loguru import logger
28
37
  from pydantic_ai.agent import Agent
29
38
  from pydantic_ai.messages import (
39
+ FunctionToolResultEvent,
30
40
  PartDeltaEvent,
41
+ PartEndEvent,
31
42
  PartStartEvent,
43
+ TextPart,
32
44
  TextPartDelta,
45
+ ThinkingPart,
46
+ ThinkingPartDelta,
33
47
  ToolCallPart,
34
48
  )
35
49
 
50
+ from .otel_utils import get_current_trace_context, get_tracer
36
51
  from .models import (
37
52
  ChatCompletionMessageDelta,
38
53
  ChatCompletionStreamChoice,
39
54
  ChatCompletionStreamResponse,
40
55
  )
56
+ from .sse_events import (
57
+ DoneEvent,
58
+ MetadataEvent,
59
+ ProgressEvent,
60
+ ReasoningEvent,
61
+ ToolCallEvent,
62
+ format_sse_event,
63
+ )
41
64
 
42
65
 
43
66
  async def stream_openai_response(
@@ -45,82 +68,268 @@ async def stream_openai_response(
45
68
  prompt: str,
46
69
  model: str,
47
70
  request_id: str | None = None,
71
+ # Message correlation IDs for metadata
72
+ message_id: str | None = None,
73
+ in_reply_to: str | None = None,
74
+ session_id: str | None = None,
75
+ # Agent info for metadata
76
+ agent_schema: str | None = None,
77
+ # Mutable container to capture trace context (deterministic, not AI-dependent)
78
+ trace_context_out: dict | None = None,
48
79
  ) -> AsyncGenerator[str, None]:
49
80
  """
50
- Stream Pydantic AI agent responses in OpenAI SSE format with tool call events.
81
+ Stream Pydantic AI agent responses with rich SSE events.
82
+
83
+ Emits all SSE event types matching the simulator:
84
+ - reasoning: Model thinking/chain-of-thought (from ThinkingPart)
85
+ - tool_call: Tool invocation start/complete (from ToolCallPart, FunctionToolResultEvent)
86
+ - progress: Step indicators for multi-step execution
87
+ - text_delta: Streamed content (OpenAI-compatible format)
88
+ - metadata: Message IDs, model info, performance metrics
89
+ - done: Stream completion
51
90
 
52
91
  Design Pattern:
53
92
  1. Use agent.iter() for complete execution (not run_stream())
54
93
  2. Iterate over nodes to capture model requests and tool executions
55
- 3. Stream tool call start events as [Calling: tool_name]
56
- 4. Stream text content deltas as they arrive
57
- 5. Send final chunk with finish_reason="stop"
58
- 6. Send OpenAI termination marker [DONE]
94
+ 3. Emit rich SSE events for reasoning, tools, progress
95
+ 4. Stream text content in OpenAI-compatible format
96
+ 5. Send metadata and done events at completion
59
97
 
60
98
  Args:
61
99
  agent: Pydantic AI agent instance
62
100
  prompt: User prompt to run
63
101
  model: Model name for response metadata
64
102
  request_id: Optional request ID (generates UUID if not provided)
103
+ message_id: Database ID of the assistant message being streamed
104
+ in_reply_to: Database ID of the user message this responds to
105
+ session_id: Session ID for conversation correlation
65
106
 
66
107
  Yields:
67
- SSE-formatted strings: "data: {json}\\n\\n"
108
+ SSE-formatted strings
68
109
 
69
110
  Example Stream:
70
- data: {"id": "chatcmpl-123", "choices": [{"delta": {"role": "assistant", "content": ""}}]}
111
+ event: progress
112
+ data: {"type": "progress", "step": 1, "total_steps": 3, "label": "Processing", "status": "in_progress"}
71
113
 
72
- data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "[Calling: search]"}}]}
114
+ event: reasoning
115
+ data: {"type": "reasoning", "content": "Analyzing the request..."}
116
+
117
+ event: tool_call
118
+ data: {"type": "tool_call", "tool_name": "search", "status": "started", "arguments": {...}}
119
+
120
+ event: tool_call
121
+ data: {"type": "tool_call", "tool_name": "search", "status": "completed", "result": "..."}
73
122
 
74
123
  data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 results..."}}]}
75
124
 
76
- data: {"id": "chatcmpl-123", "choices": [{"delta": {}, "finish_reason": "stop"}]}
125
+ event: metadata
126
+ data: {"type": "metadata", "message_id": "...", "latency_ms": 1234}
77
127
 
78
- data: [DONE]
128
+ event: done
129
+ data: {"type": "done", "reason": "stop"}
79
130
  """
80
131
  if request_id is None:
81
132
  request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
82
133
 
83
134
  created_at = int(time.time())
135
+ start_time = time.time()
84
136
  is_first_chunk = True
137
+ reasoning_step = 0
138
+ current_step = 0
139
+ total_steps = 3 # Model request, tool execution (optional), final response
140
+ token_count = 0
141
+
142
+ # Track active tool calls for completion events
143
+ # Maps index -> (tool_name, tool_id) for correlating start/end events
144
+ active_tool_calls: dict[int, tuple[str, str]] = {}
145
+ # Queue of tool calls awaiting completion (FIFO for matching)
146
+ pending_tool_completions: list[tuple[str, str]] = []
147
+ # Track if metadata was registered via register_metadata tool
148
+ metadata_registered = False
85
149
 
86
150
  try:
151
+ # Emit initial progress event
152
+ current_step = 1
153
+ yield format_sse_event(ProgressEvent(
154
+ step=current_step,
155
+ total_steps=total_steps,
156
+ label="Processing request",
157
+ status="in_progress"
158
+ ))
159
+
87
160
  # Use agent.iter() to get complete execution with tool calls
88
- # run_stream() stops after first output, missing tool calls
89
161
  async with agent.iter(prompt) as agent_run:
162
+ # Capture trace context IMMEDIATELY inside agent execution
163
+ # This is deterministic - it's the OTEL context from Pydantic AI instrumentation
164
+ # NOT dependent on any AI-generated content
165
+ captured_trace_id, captured_span_id = get_current_trace_context()
166
+ if trace_context_out is not None:
167
+ trace_context_out["trace_id"] = captured_trace_id
168
+ trace_context_out["span_id"] = captured_span_id
169
+
90
170
  async for node in agent_run:
91
171
  # Check if this is a model request node (includes tool calls)
92
172
  if Agent.is_model_request_node(node):
93
173
  # Stream events from model request
94
174
  async with node.stream(agent_run.ctx) as request_stream:
95
175
  async for event in request_stream:
96
- # Tool call start event
176
+ # ============================================
177
+ # REASONING EVENTS (ThinkingPart)
178
+ # ============================================
97
179
  if isinstance(event, PartStartEvent) and isinstance(
180
+ event.part, ThinkingPart
181
+ ):
182
+ reasoning_step += 1
183
+ if event.part.content:
184
+ yield format_sse_event(ReasoningEvent(
185
+ content=event.part.content,
186
+ step=reasoning_step
187
+ ))
188
+
189
+ # Reasoning delta (streaming thinking)
190
+ elif isinstance(event, PartDeltaEvent) and isinstance(
191
+ event.delta, ThinkingPartDelta
192
+ ):
193
+ if event.delta.content_delta:
194
+ yield format_sse_event(ReasoningEvent(
195
+ content=event.delta.content_delta,
196
+ step=reasoning_step
197
+ ))
198
+
199
+ # ============================================
200
+ # TEXT CONTENT START (initial text chunk)
201
+ # ============================================
202
+ elif isinstance(event, PartStartEvent) and isinstance(
203
+ event.part, TextPart
204
+ ):
205
+ # TextPart may contain initial content that needs to be emitted
206
+ if event.part.content:
207
+ content = event.part.content
208
+ token_count += len(content.split())
209
+
210
+ content_chunk = ChatCompletionStreamResponse(
211
+ id=request_id,
212
+ created=created_at,
213
+ model=model,
214
+ choices=[
215
+ ChatCompletionStreamChoice(
216
+ index=0,
217
+ delta=ChatCompletionMessageDelta(
218
+ role="assistant" if is_first_chunk else None,
219
+ content=content,
220
+ ),
221
+ finish_reason=None,
222
+ )
223
+ ],
224
+ )
225
+ is_first_chunk = False
226
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
227
+
228
+ # ============================================
229
+ # TOOL CALL START EVENTS
230
+ # ============================================
231
+ elif isinstance(event, PartStartEvent) and isinstance(
98
232
  event.part, ToolCallPart
99
233
  ):
100
- logger.info(f"🔧 {event.part.tool_name}")
234
+ tool_name = event.part.tool_name
101
235
 
102
- tool_call_chunk = ChatCompletionStreamResponse(
103
- id=request_id,
104
- created=created_at,
105
- model=model,
106
- choices=[
107
- ChatCompletionStreamChoice(
108
- index=0,
109
- delta=ChatCompletionMessageDelta(
110
- role="assistant" if is_first_chunk else None,
111
- content=f"[Calling: {event.part.tool_name}]",
112
- ),
113
- finish_reason=None,
236
+ # Handle final_result specially - it's Pydantic AI's
237
+ # internal tool for structured output
238
+ if tool_name == "final_result":
239
+ # Extract the structured result and emit as content
240
+ args_dict = None
241
+ if event.part.args is not None:
242
+ if hasattr(event.part.args, 'args_dict'):
243
+ args_dict = event.part.args.args_dict
244
+ elif isinstance(event.part.args, dict):
245
+ args_dict = event.part.args
246
+
247
+ if args_dict:
248
+ # Emit the structured result as JSON content
249
+ result_json = json.dumps(args_dict, indent=2)
250
+ content_chunk = ChatCompletionStreamResponse(
251
+ id=request_id,
252
+ created=created_at,
253
+ model=model,
254
+ choices=[
255
+ ChatCompletionStreamChoice(
256
+ index=0,
257
+ delta=ChatCompletionMessageDelta(
258
+ role="assistant" if is_first_chunk else None,
259
+ content=result_json,
260
+ ),
261
+ finish_reason=None,
262
+ )
263
+ ],
114
264
  )
115
- ],
116
- )
117
- is_first_chunk = False
118
- yield f"data: {tool_call_chunk.model_dump_json()}\n\n"
265
+ is_first_chunk = False
266
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
267
+ continue # Skip regular tool call handling
268
+
269
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
270
+ active_tool_calls[event.index] = (tool_name, tool_id)
271
+ # Queue for completion matching (FIFO)
272
+ pending_tool_completions.append((tool_name, tool_id))
273
+
274
+ # Emit tool_call SSE event (started)
275
+ # Try to get arguments as dict
276
+ args_dict = None
277
+ if event.part.args is not None:
278
+ if hasattr(event.part.args, 'args_dict'):
279
+ args_dict = event.part.args.args_dict
280
+ elif isinstance(event.part.args, dict):
281
+ args_dict = event.part.args
282
+
283
+ # Log tool call with key parameters
284
+ if args_dict and tool_name == "search_rem":
285
+ query_type = args_dict.get("query_type", "?")
286
+ limit = args_dict.get("limit", 20)
287
+ table = args_dict.get("table", "")
288
+ query_text = args_dict.get("query_text", args_dict.get("entity_key", ""))
289
+ if query_text and len(query_text) > 50:
290
+ query_text = query_text[:50] + "..."
291
+ logger.info(f"🔧 {tool_name} {query_type.upper()} '{query_text}' table={table} limit={limit}")
292
+ else:
293
+ logger.info(f"🔧 {tool_name}")
119
294
 
120
- # Text content delta
295
+ yield format_sse_event(ToolCallEvent(
296
+ tool_name=tool_name,
297
+ tool_id=tool_id,
298
+ status="started",
299
+ arguments=args_dict
300
+ ))
301
+
302
+ # Update progress
303
+ current_step = 2
304
+ total_steps = 4 # Added tool execution step
305
+ yield format_sse_event(ProgressEvent(
306
+ step=current_step,
307
+ total_steps=total_steps,
308
+ label=f"Calling {tool_name}",
309
+ status="in_progress"
310
+ ))
311
+
312
+ # ============================================
313
+ # TOOL CALL COMPLETION (PartEndEvent)
314
+ # ============================================
315
+ elif isinstance(event, PartEndEvent) and isinstance(
316
+ event.part, ToolCallPart
317
+ ):
318
+ if event.index in active_tool_calls:
319
+ tool_name, tool_id = active_tool_calls[event.index]
320
+ # Note: result comes from FunctionToolResultEvent below
321
+ # For now, mark as completed without result
322
+ del active_tool_calls[event.index]
323
+
324
+ # ============================================
325
+ # TEXT CONTENT DELTA
326
+ # ============================================
121
327
  elif isinstance(event, PartDeltaEvent) and isinstance(
122
328
  event.delta, TextPartDelta
123
329
  ):
330
+ content = event.delta.content_delta
331
+ token_count += len(content.split()) # Rough token estimate
332
+
124
333
  content_chunk = ChatCompletionStreamResponse(
125
334
  id=request_id,
126
335
  created=created_at,
@@ -130,7 +339,7 @@ async def stream_openai_response(
130
339
  index=0,
131
340
  delta=ChatCompletionMessageDelta(
132
341
  role="assistant" if is_first_chunk else None,
133
- content=event.delta.content_delta,
342
+ content=content,
134
343
  ),
135
344
  finish_reason=None,
136
345
  )
@@ -139,16 +348,176 @@ async def stream_openai_response(
139
348
  is_first_chunk = False
140
349
  yield f"data: {content_chunk.model_dump_json()}\n\n"
141
350
 
142
- # Check if this is a tool execution node
351
+ # ============================================
352
+ # TOOL EXECUTION NODE
353
+ # ============================================
143
354
  elif Agent.is_call_tools_node(node):
144
- # Stream tool execution - tools complete here
145
355
  async with node.stream(agent_run.ctx) as tools_stream:
146
- async for event in tools_stream:
147
- # We can log tool completion here if needed
148
- # For now, we already logged the call start above
149
- pass
356
+ async for tool_event in tools_stream:
357
+ # Tool result event - emit completion
358
+ if isinstance(tool_event, FunctionToolResultEvent):
359
+ # Get the tool name/id from the pending queue (FIFO)
360
+ if pending_tool_completions:
361
+ tool_name, tool_id = pending_tool_completions.pop(0)
362
+ else:
363
+ # Fallback if queue is empty (shouldn't happen)
364
+ tool_name = "tool"
365
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
366
+
367
+ # Check if this is a register_metadata tool result
368
+ # It returns a dict with _metadata_event: True marker
369
+ result_content = tool_event.result.content if hasattr(tool_event.result, 'content') else tool_event.result
370
+ is_metadata_event = False
371
+
372
+ if isinstance(result_content, dict) and result_content.get("_metadata_event"):
373
+ is_metadata_event = True
374
+ metadata_registered = True # Skip default metadata at end
375
+ # Emit MetadataEvent with registered values
376
+ registered_confidence = result_content.get("confidence")
377
+ registered_sources = result_content.get("sources")
378
+ registered_references = result_content.get("references")
379
+ registered_flags = result_content.get("flags")
380
+ # Session naming
381
+ registered_session_name = result_content.get("session_name")
382
+ # Risk assessment fields
383
+ registered_risk_level = result_content.get("risk_level")
384
+ registered_risk_score = result_content.get("risk_score")
385
+ registered_risk_reasoning = result_content.get("risk_reasoning")
386
+ registered_recommended_action = result_content.get("recommended_action")
387
+ # Extra fields
388
+ registered_extra = result_content.get("extra")
389
+
390
+ logger.info(
391
+ f"📊 Metadata registered: confidence={registered_confidence}, "
392
+ f"session_name={registered_session_name}, "
393
+ f"risk_level={registered_risk_level}, sources={registered_sources}"
394
+ )
395
+
396
+ # Build extra dict with risk fields and any custom extras
397
+ extra_data = {}
398
+ if registered_risk_level is not None:
399
+ extra_data["risk_level"] = registered_risk_level
400
+ if registered_risk_score is not None:
401
+ extra_data["risk_score"] = registered_risk_score
402
+ if registered_risk_reasoning is not None:
403
+ extra_data["risk_reasoning"] = registered_risk_reasoning
404
+ if registered_recommended_action is not None:
405
+ extra_data["recommended_action"] = registered_recommended_action
406
+ if registered_extra:
407
+ extra_data.update(registered_extra)
408
+
409
+ # Emit metadata event immediately
410
+ yield format_sse_event(MetadataEvent(
411
+ message_id=message_id,
412
+ in_reply_to=in_reply_to,
413
+ session_id=session_id,
414
+ agent_schema=agent_schema,
415
+ session_name=registered_session_name,
416
+ confidence=registered_confidence,
417
+ sources=registered_sources,
418
+ model_version=model,
419
+ flags=registered_flags,
420
+ extra=extra_data if extra_data else None,
421
+ hidden=False,
422
+ ))
423
+
424
+ if not is_metadata_event:
425
+ # Normal tool completion - emit ToolCallEvent
426
+ result_str = str(result_content)
427
+ result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
428
+
429
+ # Log result count for search_rem
430
+ if tool_name == "search_rem" and isinstance(result_content, dict):
431
+ results = result_content.get("results", {})
432
+ # Handle nested result structure: results may be a dict with 'results' list and 'count'
433
+ if isinstance(results, dict):
434
+ count = results.get("count", len(results.get("results", [])))
435
+ query_type = results.get("query_type", "?")
436
+ query_text = results.get("query_text", results.get("key", ""))
437
+ table = results.get("table_name", "")
438
+ elif isinstance(results, list):
439
+ count = len(results)
440
+ query_type = "?"
441
+ query_text = ""
442
+ table = ""
443
+ else:
444
+ count = "?"
445
+ query_type = "?"
446
+ query_text = ""
447
+ table = ""
448
+ status = result_content.get("status", "unknown")
449
+ # Truncate query text for logging
450
+ if query_text and len(str(query_text)) > 40:
451
+ query_text = str(query_text)[:40] + "..."
452
+ logger.info(f" ↳ {tool_name} {query_type} '{query_text}' table={table} → {count} results")
453
+
454
+ yield format_sse_event(ToolCallEvent(
455
+ tool_name=tool_name,
456
+ tool_id=tool_id,
457
+ status="completed",
458
+ result=result_summary
459
+ ))
460
+
461
+ # Update progress after tool completion
462
+ current_step = 3
463
+ yield format_sse_event(ProgressEvent(
464
+ step=current_step,
465
+ total_steps=total_steps,
466
+ label="Generating response",
467
+ status="in_progress"
468
+ ))
469
+
470
+ # After iteration completes, check for structured result
471
+ # This handles agents with result_type (structured output)
472
+ # Skip for plain text output - already streamed via TextPartDelta
473
+ try:
474
+ result = agent_run.result
475
+ if result is not None and hasattr(result, 'output'):
476
+ output = result.output
477
+
478
+ # Skip plain string output - already streamed via TextPartDelta
479
+ # Non-structured output agents (structured_output: false) return strings
480
+ if isinstance(output, str):
481
+ logger.debug("Plain text output already streamed via TextPartDelta, skipping final emission")
482
+ else:
483
+ # Serialize the structured output (Pydantic models)
484
+ if hasattr(output, 'model_dump'):
485
+ # Pydantic model
486
+ result_dict = output.model_dump()
487
+ elif hasattr(output, '__dict__'):
488
+ result_dict = output.__dict__
489
+ else:
490
+ # Fallback for unknown types
491
+ result_dict = {"result": str(output)}
492
+
493
+ result_json = json.dumps(result_dict, indent=2, default=str)
494
+ token_count += len(result_json.split())
495
+
496
+ # Emit structured result as content
497
+ result_chunk = ChatCompletionStreamResponse(
498
+ id=request_id,
499
+ created=created_at,
500
+ model=model,
501
+ choices=[
502
+ ChatCompletionStreamChoice(
503
+ index=0,
504
+ delta=ChatCompletionMessageDelta(
505
+ role="assistant" if is_first_chunk else None,
506
+ content=result_json,
507
+ ),
508
+ finish_reason=None,
509
+ )
510
+ ],
511
+ )
512
+ is_first_chunk = False
513
+ yield f"data: {result_chunk.model_dump_json()}\n\n"
514
+ except Exception as e:
515
+ logger.debug(f"No structured result available: {e}")
150
516
 
151
- # Final chunk with finish_reason
517
+ # Calculate latency
518
+ latency_ms = int((time.time() - start_time) * 1000)
519
+
520
+ # Final OpenAI chunk with finish_reason
152
521
  final_chunk = ChatCompletionStreamResponse(
153
522
  id=request_id,
154
523
  created=created_at,
@@ -163,7 +532,35 @@ async def stream_openai_response(
163
532
  )
164
533
  yield f"data: {final_chunk.model_dump_json()}\n\n"
165
534
 
166
- # OpenAI termination marker
535
+ # Emit metadata event only if not already registered via register_metadata tool
536
+ if not metadata_registered:
537
+ yield format_sse_event(MetadataEvent(
538
+ message_id=message_id,
539
+ in_reply_to=in_reply_to,
540
+ session_id=session_id,
541
+ agent_schema=agent_schema,
542
+ confidence=1.0, # Default to 100% confidence
543
+ model_version=model,
544
+ latency_ms=latency_ms,
545
+ token_count=token_count,
546
+ # Include deterministic trace context captured from OTEL
547
+ trace_id=captured_trace_id,
548
+ span_id=captured_span_id,
549
+ ))
550
+
551
+ # Mark all progress complete
552
+ for step in range(1, total_steps + 1):
553
+ yield format_sse_event(ProgressEvent(
554
+ step=step,
555
+ total_steps=total_steps,
556
+ label="Complete" if step == total_steps else f"Step {step}",
557
+ status="completed"
558
+ ))
559
+
560
+ # Emit done event
561
+ yield format_sse_event(DoneEvent(reason="stop"))
562
+
563
+ # OpenAI termination marker (for compatibility)
167
564
  yield "data: [DONE]\n\n"
168
565
 
169
566
  except Exception as e:
@@ -182,4 +579,204 @@ async def stream_openai_response(
182
579
  }
183
580
  }
184
581
  yield f"data: {json.dumps(error_data)}\n\n"
582
+
583
+ # Emit done event with error reason
584
+ yield format_sse_event(DoneEvent(reason="error"))
185
585
  yield "data: [DONE]\n\n"
586
+
587
+
588
+ async def stream_simulator_response(
589
+ prompt: str,
590
+ model: str = "simulator-v1.0.0",
591
+ request_id: str | None = None,
592
+ delay_ms: int = 50,
593
+ include_reasoning: bool = True,
594
+ include_progress: bool = True,
595
+ include_tool_calls: bool = True,
596
+ include_actions: bool = True,
597
+ include_metadata: bool = True,
598
+ # Message correlation IDs
599
+ message_id: str | None = None,
600
+ in_reply_to: str | None = None,
601
+ session_id: str | None = None,
602
+ ) -> AsyncGenerator[str, None]:
603
+ """
604
+ Stream SSE simulator events for testing and demonstration.
605
+
606
+ This function wraps the SSE simulator to produce formatted SSE strings
607
+ ready for HTTP streaming. No LLM calls are made.
608
+
609
+ The simulator produces a rich sequence of events:
610
+ 1. Reasoning events (model thinking)
611
+ 2. Progress events (step indicators)
612
+ 3. Tool call events (simulated tool usage)
613
+ 4. Text delta events (streamed content)
614
+ 5. Metadata events (confidence, sources, message IDs)
615
+ 6. Action request events (user interaction)
616
+ 7. Done event
617
+
618
+ Args:
619
+ prompt: User prompt (passed to simulator)
620
+ model: Model name for metadata
621
+ request_id: Optional request ID
622
+ delay_ms: Delay between events in milliseconds
623
+ include_reasoning: Whether to emit reasoning events
624
+ include_progress: Whether to emit progress events
625
+ include_tool_calls: Whether to emit tool call events
626
+ include_actions: Whether to emit action request at end
627
+ include_metadata: Whether to emit metadata event
628
+ message_id: Database ID of the assistant message being streamed
629
+ in_reply_to: Database ID of the user message this responds to
630
+ session_id: Session ID for conversation correlation
631
+
632
+ Yields:
633
+ SSE-formatted strings ready for HTTP response
634
+
635
+ Example:
636
+ ```python
637
+ from starlette.responses import StreamingResponse
638
+
639
+ async def simulator_endpoint():
640
+ return StreamingResponse(
641
+ stream_simulator_response("demo"),
642
+ media_type="text/event-stream"
643
+ )
644
+ ```
645
+ """
646
+ from rem.agentic.agents.sse_simulator import stream_simulator_events
647
+
648
+ # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
649
+ async for sse_string in stream_simulator_events(
650
+ prompt=prompt,
651
+ delay_ms=delay_ms,
652
+ include_reasoning=include_reasoning,
653
+ include_progress=include_progress,
654
+ include_tool_calls=include_tool_calls,
655
+ include_actions=include_actions,
656
+ include_metadata=include_metadata,
657
+ # Pass message correlation IDs
658
+ message_id=message_id,
659
+ in_reply_to=in_reply_to,
660
+ session_id=session_id,
661
+ model=model,
662
+ ):
663
+ yield sse_string
664
+
665
+
666
+ async def stream_minimal_simulator(
667
+ content: str = "Hello from the simulator!",
668
+ delay_ms: int = 30,
669
+ ) -> AsyncGenerator[str, None]:
670
+ """
671
+ Stream minimal simulator output (text + done only).
672
+
673
+ Useful for simple testing without the full event sequence.
674
+
675
+ Args:
676
+ content: Text content to stream
677
+ delay_ms: Delay between chunks
678
+
679
+ Yields:
680
+ SSE-formatted strings
681
+ """
682
+ from rem.agentic.agents.sse_simulator import stream_minimal_demo
683
+
684
+ # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
685
+ async for sse_string in stream_minimal_demo(content=content, delay_ms=delay_ms):
686
+ yield sse_string
687
+
688
+
689
+ async def stream_openai_response_with_save(
690
+ agent: Agent,
691
+ prompt: str,
692
+ model: str,
693
+ request_id: str | None = None,
694
+ agent_schema: str | None = None,
695
+ session_id: str | None = None,
696
+ user_id: str | None = None,
697
+ ) -> AsyncGenerator[str, None]:
698
+ """
699
+ Wrapper around stream_openai_response that saves the assistant response after streaming.
700
+
701
+ This accumulates all text content during streaming and saves it to the database
702
+ after the stream completes.
703
+
704
+ Args:
705
+ agent: Pydantic AI agent instance
706
+ prompt: User prompt
707
+ model: Model name
708
+ request_id: Optional request ID
709
+ agent_schema: Agent schema name
710
+ session_id: Session ID for message storage
711
+ user_id: User ID for message storage
712
+
713
+ Yields:
714
+ SSE-formatted strings
715
+ """
716
+ from ....utils.date_utils import utc_now, to_iso
717
+ from ....services.session import SessionMessageStore
718
+ from ....settings import settings
719
+
720
+ # Pre-generate message_id so it can be sent in metadata event
721
+ # This allows frontend to use it for feedback before DB persistence
722
+ message_id = str(uuid.uuid4())
723
+
724
+ # Mutable container for capturing trace context from inside agent execution
725
+ # This is deterministic - captured from OTEL instrumentation, not AI-generated
726
+ trace_context: dict = {}
727
+
728
+ # Accumulate content during streaming
729
+ accumulated_content = []
730
+
731
+ async for chunk in stream_openai_response(
732
+ agent=agent,
733
+ prompt=prompt,
734
+ model=model,
735
+ request_id=request_id,
736
+ agent_schema=agent_schema,
737
+ session_id=session_id,
738
+ message_id=message_id,
739
+ trace_context_out=trace_context, # Pass container to capture trace IDs
740
+ ):
741
+ yield chunk
742
+
743
+ # Extract text content from OpenAI-format chunks
744
+ # Format: data: {"choices": [{"delta": {"content": "..."}}]}
745
+ if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
746
+ try:
747
+ data_str = chunk[6:].strip() # Remove "data: " prefix
748
+ if data_str:
749
+ data = json.loads(data_str)
750
+ if "choices" in data and data["choices"]:
751
+ delta = data["choices"][0].get("delta", {})
752
+ content = delta.get("content")
753
+ if content:
754
+ accumulated_content.append(content)
755
+ except (json.JSONDecodeError, KeyError, IndexError):
756
+ pass # Skip non-JSON or malformed chunks
757
+
758
+ # After streaming completes, save the assistant response
759
+ if settings.postgres.enabled and session_id and accumulated_content:
760
+ full_content = "".join(accumulated_content)
761
+ # Get captured trace context from container (deterministically captured inside agent execution)
762
+ captured_trace_id = trace_context.get("trace_id")
763
+ captured_span_id = trace_context.get("span_id")
764
+ assistant_message = {
765
+ "id": message_id, # Use pre-generated ID for consistency with metadata event
766
+ "role": "assistant",
767
+ "content": full_content,
768
+ "timestamp": to_iso(utc_now()),
769
+ "trace_id": captured_trace_id,
770
+ "span_id": captured_span_id,
771
+ }
772
+ try:
773
+ store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
774
+ await store.store_session_messages(
775
+ session_id=session_id,
776
+ messages=[assistant_message],
777
+ user_id=user_id,
778
+ compress=True, # Compress long assistant responses
779
+ )
780
+ logger.debug(f"Saved assistant response {message_id} to session {session_id} ({len(full_content)} chars)")
781
+ except Exception as e:
782
+ logger.error(f"Failed to save assistant response: {e}", exc_info=True)