remdb 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. rem/agentic/README.md +76 -0
  2. rem/agentic/__init__.py +15 -0
  3. rem/agentic/agents/__init__.py +32 -2
  4. rem/agentic/agents/agent_manager.py +310 -0
  5. rem/agentic/agents/sse_simulator.py +502 -0
  6. rem/agentic/context.py +51 -27
  7. rem/agentic/context_builder.py +5 -3
  8. rem/agentic/llm_provider_models.py +301 -0
  9. rem/agentic/mcp/tool_wrapper.py +155 -18
  10. rem/agentic/otel/setup.py +93 -4
  11. rem/agentic/providers/phoenix.py +371 -108
  12. rem/agentic/providers/pydantic_ai.py +280 -57
  13. rem/agentic/schema.py +361 -21
  14. rem/agentic/tools/rem_tools.py +3 -3
  15. rem/api/README.md +215 -1
  16. rem/api/deps.py +255 -0
  17. rem/api/main.py +132 -40
  18. rem/api/mcp_router/resources.py +1 -1
  19. rem/api/mcp_router/server.py +28 -5
  20. rem/api/mcp_router/tools.py +555 -7
  21. rem/api/routers/admin.py +494 -0
  22. rem/api/routers/auth.py +278 -4
  23. rem/api/routers/chat/completions.py +402 -20
  24. rem/api/routers/chat/models.py +88 -10
  25. rem/api/routers/chat/otel_utils.py +33 -0
  26. rem/api/routers/chat/sse_events.py +542 -0
  27. rem/api/routers/chat/streaming.py +697 -45
  28. rem/api/routers/dev.py +81 -0
  29. rem/api/routers/feedback.py +268 -0
  30. rem/api/routers/messages.py +473 -0
  31. rem/api/routers/models.py +78 -0
  32. rem/api/routers/query.py +360 -0
  33. rem/api/routers/shared_sessions.py +406 -0
  34. rem/auth/__init__.py +13 -3
  35. rem/auth/middleware.py +186 -22
  36. rem/auth/providers/__init__.py +4 -1
  37. rem/auth/providers/email.py +215 -0
  38. rem/cli/commands/README.md +237 -64
  39. rem/cli/commands/cluster.py +1808 -0
  40. rem/cli/commands/configure.py +4 -7
  41. rem/cli/commands/db.py +386 -143
  42. rem/cli/commands/experiments.py +468 -76
  43. rem/cli/commands/process.py +14 -8
  44. rem/cli/commands/schema.py +97 -50
  45. rem/cli/commands/session.py +336 -0
  46. rem/cli/dreaming.py +2 -2
  47. rem/cli/main.py +29 -6
  48. rem/config.py +10 -3
  49. rem/models/core/core_model.py +7 -1
  50. rem/models/core/experiment.py +58 -14
  51. rem/models/core/rem_query.py +5 -2
  52. rem/models/entities/__init__.py +25 -0
  53. rem/models/entities/domain_resource.py +38 -0
  54. rem/models/entities/feedback.py +123 -0
  55. rem/models/entities/message.py +30 -1
  56. rem/models/entities/ontology.py +1 -1
  57. rem/models/entities/ontology_config.py +1 -1
  58. rem/models/entities/session.py +83 -0
  59. rem/models/entities/shared_session.py +180 -0
  60. rem/models/entities/subscriber.py +175 -0
  61. rem/models/entities/user.py +1 -0
  62. rem/registry.py +10 -4
  63. rem/schemas/agents/core/agent-builder.yaml +134 -0
  64. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  65. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  66. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  67. rem/schemas/agents/rem.yaml +7 -3
  68. rem/services/__init__.py +3 -1
  69. rem/services/content/service.py +92 -19
  70. rem/services/email/__init__.py +10 -0
  71. rem/services/email/service.py +459 -0
  72. rem/services/email/templates.py +360 -0
  73. rem/services/embeddings/api.py +4 -4
  74. rem/services/embeddings/worker.py +16 -16
  75. rem/services/phoenix/client.py +154 -14
  76. rem/services/postgres/README.md +197 -15
  77. rem/services/postgres/__init__.py +2 -1
  78. rem/services/postgres/diff_service.py +547 -0
  79. rem/services/postgres/pydantic_to_sqlalchemy.py +470 -140
  80. rem/services/postgres/repository.py +132 -0
  81. rem/services/postgres/schema_generator.py +205 -4
  82. rem/services/postgres/service.py +6 -6
  83. rem/services/rem/parser.py +44 -9
  84. rem/services/rem/service.py +36 -2
  85. rem/services/session/compression.py +137 -51
  86. rem/services/session/reload.py +15 -8
  87. rem/settings.py +515 -27
  88. rem/sql/background_indexes.sql +21 -16
  89. rem/sql/migrations/001_install.sql +387 -54
  90. rem/sql/migrations/002_install_models.sql +2304 -377
  91. rem/sql/migrations/003_optional_extensions.sql +326 -0
  92. rem/sql/migrations/004_cache_system.sql +548 -0
  93. rem/sql/migrations/005_schema_update.sql +145 -0
  94. rem/utils/README.md +45 -0
  95. rem/utils/__init__.py +18 -0
  96. rem/utils/date_utils.py +2 -2
  97. rem/utils/files.py +157 -1
  98. rem/utils/model_helpers.py +156 -1
  99. rem/utils/schema_loader.py +220 -22
  100. rem/utils/sql_paths.py +146 -0
  101. rem/utils/sql_types.py +3 -1
  102. rem/utils/vision.py +1 -1
  103. rem/workers/__init__.py +3 -1
  104. rem/workers/db_listener.py +579 -0
  105. rem/workers/unlogged_maintainer.py +463 -0
  106. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/METADATA +340 -229
  107. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/RECORD +109 -80
  108. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/WHEEL +1 -1
  109. rem/sql/002_install_models.sql +0 -1068
  110. rem/sql/install_models.sql +0 -1051
  111. rem/sql/migrations/003_seed_default_user.sql +0 -48
  112. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0
@@ -3,20 +3,29 @@ OpenAI-compatible streaming relay for Pydantic AI agents.
3
3
 
4
4
  Design Pattern:
5
5
  - Uses Pydantic AI's agent.iter() to capture full execution including tool calls
6
- - Streams tool call events with [Calling: tool_name] markers
7
- - Streams text content deltas as they arrive
6
+ - Emits rich SSE events: reasoning, tool_call, progress, metadata, text_delta
8
7
  - Proper OpenAI SSE format with data: prefix and [DONE] terminator
9
8
  - Error handling with graceful degradation
10
9
 
11
- Key Insight
10
+ Key Insight
12
11
  - agent.run_stream() stops after first output, missing tool calls
13
12
  - agent.iter() provides complete execution with tool call visibility
14
- - Use PartStartEvent to detect tool calls
15
- - Use PartDeltaEvent with TextPartDelta for content streaming
13
+ - Use PartStartEvent to detect tool calls and thinking parts
14
+ - Use PartDeltaEvent with TextPartDelta/ThinkingPartDelta for streaming
15
+ - Use PartEndEvent to detect tool completion
16
+ - Use FunctionToolResultEvent to get tool results
16
17
 
17
- SSE Format:
18
+ SSE Format (OpenAI-compatible):
18
19
  data: {"id": "chatcmpl-...", "choices": [{"delta": {"content": "..."}}]}\\n\\n
19
20
  data: [DONE]\\n\\n
21
+
22
+ Extended SSE Format (Custom Events):
23
+ event: reasoning\\ndata: {"type": "reasoning", "content": "..."}\\n\\n
24
+ event: tool_call\\ndata: {"type": "tool_call", "tool_name": "...", "status": "started"}\\n\\n
25
+ event: progress\\ndata: {"type": "progress", "step": 1, "total_steps": 3}\\n\\n
26
+ event: metadata\\ndata: {"type": "metadata", "confidence": 0.95}\\n\\n
27
+
28
+ See sse_events.py for the full event type definitions.
20
29
  """
21
30
 
22
31
  import json
@@ -27,17 +36,31 @@ from typing import AsyncGenerator
27
36
  from loguru import logger
28
37
  from pydantic_ai.agent import Agent
29
38
  from pydantic_ai.messages import (
39
+ FunctionToolResultEvent,
30
40
  PartDeltaEvent,
41
+ PartEndEvent,
31
42
  PartStartEvent,
43
+ TextPart,
32
44
  TextPartDelta,
45
+ ThinkingPart,
46
+ ThinkingPartDelta,
33
47
  ToolCallPart,
34
48
  )
35
49
 
50
+ from .otel_utils import get_current_trace_context, get_tracer
36
51
  from .models import (
37
52
  ChatCompletionMessageDelta,
38
53
  ChatCompletionStreamChoice,
39
54
  ChatCompletionStreamResponse,
40
55
  )
56
+ from .sse_events import (
57
+ DoneEvent,
58
+ MetadataEvent,
59
+ ProgressEvent,
60
+ ReasoningEvent,
61
+ ToolCallEvent,
62
+ format_sse_event,
63
+ )
41
64
 
42
65
 
43
66
  async def stream_openai_response(
@@ -45,82 +68,281 @@ async def stream_openai_response(
45
68
  prompt: str,
46
69
  model: str,
47
70
  request_id: str | None = None,
71
+ # Message correlation IDs for metadata
72
+ message_id: str | None = None,
73
+ in_reply_to: str | None = None,
74
+ session_id: str | None = None,
75
+ # Agent info for metadata
76
+ agent_schema: str | None = None,
77
+ # Mutable container to capture trace context (deterministic, not AI-dependent)
78
+ trace_context_out: dict | None = None,
79
+ # Mutable container to capture tool calls for persistence
80
+ # Format: list of {"tool_name": str, "tool_id": str, "arguments": dict, "result": any}
81
+ tool_calls_out: list | None = None,
48
82
  ) -> AsyncGenerator[str, None]:
49
83
  """
50
- Stream Pydantic AI agent responses in OpenAI SSE format with tool call events.
84
+ Stream Pydantic AI agent responses with rich SSE events.
85
+
86
+ Emits all SSE event types matching the simulator:
87
+ - reasoning: Model thinking/chain-of-thought (from ThinkingPart)
88
+ - tool_call: Tool invocation start/complete (from ToolCallPart, FunctionToolResultEvent)
89
+ - progress: Step indicators for multi-step execution
90
+ - text_delta: Streamed content (OpenAI-compatible format)
91
+ - metadata: Message IDs, model info, performance metrics
92
+ - done: Stream completion
51
93
 
52
94
  Design Pattern:
53
95
  1. Use agent.iter() for complete execution (not run_stream())
54
96
  2. Iterate over nodes to capture model requests and tool executions
55
- 3. Stream tool call start events as [Calling: tool_name]
56
- 4. Stream text content deltas as they arrive
57
- 5. Send final chunk with finish_reason="stop"
58
- 6. Send OpenAI termination marker [DONE]
97
+ 3. Emit rich SSE events for reasoning, tools, progress
98
+ 4. Stream text content in OpenAI-compatible format
99
+ 5. Send metadata and done events at completion
59
100
 
60
101
  Args:
61
102
  agent: Pydantic AI agent instance
62
103
  prompt: User prompt to run
63
104
  model: Model name for response metadata
64
105
  request_id: Optional request ID (generates UUID if not provided)
106
+ message_id: Database ID of the assistant message being streamed
107
+ in_reply_to: Database ID of the user message this responds to
108
+ session_id: Session ID for conversation correlation
65
109
 
66
110
  Yields:
67
- SSE-formatted strings: "data: {json}\\n\\n"
111
+ SSE-formatted strings
68
112
 
69
113
  Example Stream:
70
- data: {"id": "chatcmpl-123", "choices": [{"delta": {"role": "assistant", "content": ""}}]}
114
+ event: progress
115
+ data: {"type": "progress", "step": 1, "total_steps": 3, "label": "Processing", "status": "in_progress"}
71
116
 
72
- data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "[Calling: search]"}}]}
117
+ event: reasoning
118
+ data: {"type": "reasoning", "content": "Analyzing the request..."}
119
+
120
+ event: tool_call
121
+ data: {"type": "tool_call", "tool_name": "search", "status": "started", "arguments": {...}}
122
+
123
+ event: tool_call
124
+ data: {"type": "tool_call", "tool_name": "search", "status": "completed", "result": "..."}
73
125
 
74
126
  data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 results..."}}]}
75
127
 
76
- data: {"id": "chatcmpl-123", "choices": [{"delta": {}, "finish_reason": "stop"}]}
128
+ event: metadata
129
+ data: {"type": "metadata", "message_id": "...", "latency_ms": 1234}
77
130
 
78
- data: [DONE]
131
+ event: done
132
+ data: {"type": "done", "reason": "stop"}
79
133
  """
80
134
  if request_id is None:
81
135
  request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
82
136
 
83
137
  created_at = int(time.time())
138
+ start_time = time.time()
84
139
  is_first_chunk = True
140
+ reasoning_step = 0
141
+ current_step = 0
142
+ total_steps = 3 # Model request, tool execution (optional), final response
143
+ token_count = 0
144
+
145
+ # Track active tool calls for completion events
146
+ # Maps index -> (tool_name, tool_id) for correlating start/end events
147
+ active_tool_calls: dict[int, tuple[str, str]] = {}
148
+ # Queue of tool calls awaiting completion (FIFO for matching)
149
+ pending_tool_completions: list[tuple[str, str]] = []
150
+ # Track if metadata was registered via register_metadata tool
151
+ metadata_registered = False
152
+ # Track pending tool calls with full data for persistence
153
+ # Maps tool_id -> {"tool_name": str, "tool_id": str, "arguments": dict}
154
+ pending_tool_data: dict[str, dict] = {}
85
155
 
86
156
  try:
157
+ # Emit initial progress event
158
+ current_step = 1
159
+ yield format_sse_event(ProgressEvent(
160
+ step=current_step,
161
+ total_steps=total_steps,
162
+ label="Processing request",
163
+ status="in_progress"
164
+ ))
165
+
87
166
  # Use agent.iter() to get complete execution with tool calls
88
- # run_stream() stops after first output, missing tool calls
89
167
  async with agent.iter(prompt) as agent_run:
168
+ # Capture trace context IMMEDIATELY inside agent execution
169
+ # This is deterministic - it's the OTEL context from Pydantic AI instrumentation
170
+ # NOT dependent on any AI-generated content
171
+ captured_trace_id, captured_span_id = get_current_trace_context()
172
+ if trace_context_out is not None:
173
+ trace_context_out["trace_id"] = captured_trace_id
174
+ trace_context_out["span_id"] = captured_span_id
175
+
90
176
  async for node in agent_run:
91
177
  # Check if this is a model request node (includes tool calls)
92
178
  if Agent.is_model_request_node(node):
93
179
  # Stream events from model request
94
180
  async with node.stream(agent_run.ctx) as request_stream:
95
181
  async for event in request_stream:
96
- # Tool call start event
182
+ # ============================================
183
+ # REASONING EVENTS (ThinkingPart)
184
+ # ============================================
97
185
  if isinstance(event, PartStartEvent) and isinstance(
186
+ event.part, ThinkingPart
187
+ ):
188
+ reasoning_step += 1
189
+ if event.part.content:
190
+ yield format_sse_event(ReasoningEvent(
191
+ content=event.part.content,
192
+ step=reasoning_step
193
+ ))
194
+
195
+ # Reasoning delta (streaming thinking)
196
+ elif isinstance(event, PartDeltaEvent) and isinstance(
197
+ event.delta, ThinkingPartDelta
198
+ ):
199
+ if event.delta.content_delta:
200
+ yield format_sse_event(ReasoningEvent(
201
+ content=event.delta.content_delta,
202
+ step=reasoning_step
203
+ ))
204
+
205
+ # ============================================
206
+ # TEXT CONTENT START (initial text chunk)
207
+ # ============================================
208
+ elif isinstance(event, PartStartEvent) and isinstance(
209
+ event.part, TextPart
210
+ ):
211
+ # TextPart may contain initial content that needs to be emitted
212
+ if event.part.content:
213
+ content = event.part.content
214
+ token_count += len(content.split())
215
+
216
+ content_chunk = ChatCompletionStreamResponse(
217
+ id=request_id,
218
+ created=created_at,
219
+ model=model,
220
+ choices=[
221
+ ChatCompletionStreamChoice(
222
+ index=0,
223
+ delta=ChatCompletionMessageDelta(
224
+ role="assistant" if is_first_chunk else None,
225
+ content=content,
226
+ ),
227
+ finish_reason=None,
228
+ )
229
+ ],
230
+ )
231
+ is_first_chunk = False
232
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
233
+
234
+ # ============================================
235
+ # TOOL CALL START EVENTS
236
+ # ============================================
237
+ elif isinstance(event, PartStartEvent) and isinstance(
98
238
  event.part, ToolCallPart
99
239
  ):
100
- logger.info(f"🔧 {event.part.tool_name}")
240
+ tool_name = event.part.tool_name
101
241
 
102
- tool_call_chunk = ChatCompletionStreamResponse(
103
- id=request_id,
104
- created=created_at,
105
- model=model,
106
- choices=[
107
- ChatCompletionStreamChoice(
108
- index=0,
109
- delta=ChatCompletionMessageDelta(
110
- role="assistant" if is_first_chunk else None,
111
- content=f"[Calling: {event.part.tool_name}]",
112
- ),
113
- finish_reason=None,
242
+ # Handle final_result specially - it's Pydantic AI's
243
+ # internal tool for structured output
244
+ if tool_name == "final_result":
245
+ # Extract the structured result and emit as content
246
+ args_dict = None
247
+ if event.part.args is not None:
248
+ if hasattr(event.part.args, 'args_dict'):
249
+ args_dict = event.part.args.args_dict
250
+ elif isinstance(event.part.args, dict):
251
+ args_dict = event.part.args
252
+
253
+ if args_dict:
254
+ # Emit the structured result as JSON content
255
+ result_json = json.dumps(args_dict, indent=2)
256
+ content_chunk = ChatCompletionStreamResponse(
257
+ id=request_id,
258
+ created=created_at,
259
+ model=model,
260
+ choices=[
261
+ ChatCompletionStreamChoice(
262
+ index=0,
263
+ delta=ChatCompletionMessageDelta(
264
+ role="assistant" if is_first_chunk else None,
265
+ content=result_json,
266
+ ),
267
+ finish_reason=None,
268
+ )
269
+ ],
114
270
  )
115
- ],
116
- )
117
- is_first_chunk = False
118
- yield f"data: {tool_call_chunk.model_dump_json()}\n\n"
271
+ is_first_chunk = False
272
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
273
+ continue # Skip regular tool call handling
274
+
275
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
276
+ active_tool_calls[event.index] = (tool_name, tool_id)
277
+ # Queue for completion matching (FIFO)
278
+ pending_tool_completions.append((tool_name, tool_id))
279
+
280
+ # Emit tool_call SSE event (started)
281
+ # Try to get arguments as dict
282
+ args_dict = None
283
+ if event.part.args is not None:
284
+ if hasattr(event.part.args, 'args_dict'):
285
+ args_dict = event.part.args.args_dict
286
+ elif isinstance(event.part.args, dict):
287
+ args_dict = event.part.args
288
+
289
+ # Log tool call with key parameters
290
+ if args_dict and tool_name == "search_rem":
291
+ query_type = args_dict.get("query_type", "?")
292
+ limit = args_dict.get("limit", 20)
293
+ table = args_dict.get("table", "")
294
+ query_text = args_dict.get("query_text", args_dict.get("entity_key", ""))
295
+ if query_text and len(query_text) > 50:
296
+ query_text = query_text[:50] + "..."
297
+ logger.info(f"🔧 {tool_name} {query_type.upper()} '{query_text}' table={table} limit={limit}")
298
+ else:
299
+ logger.info(f"🔧 {tool_name}")
119
300
 
120
- # Text content delta
301
+ yield format_sse_event(ToolCallEvent(
302
+ tool_name=tool_name,
303
+ tool_id=tool_id,
304
+ status="started",
305
+ arguments=args_dict
306
+ ))
307
+
308
+ # Track tool call data for persistence (especially register_metadata)
309
+ pending_tool_data[tool_id] = {
310
+ "tool_name": tool_name,
311
+ "tool_id": tool_id,
312
+ "arguments": args_dict,
313
+ }
314
+
315
+ # Update progress
316
+ current_step = 2
317
+ total_steps = 4 # Added tool execution step
318
+ yield format_sse_event(ProgressEvent(
319
+ step=current_step,
320
+ total_steps=total_steps,
321
+ label=f"Calling {tool_name}",
322
+ status="in_progress"
323
+ ))
324
+
325
+ # ============================================
326
+ # TOOL CALL COMPLETION (PartEndEvent)
327
+ # ============================================
328
+ elif isinstance(event, PartEndEvent) and isinstance(
329
+ event.part, ToolCallPart
330
+ ):
331
+ if event.index in active_tool_calls:
332
+ tool_name, tool_id = active_tool_calls[event.index]
333
+ # Note: result comes from FunctionToolResultEvent below
334
+ # For now, mark as completed without result
335
+ del active_tool_calls[event.index]
336
+
337
+ # ============================================
338
+ # TEXT CONTENT DELTA
339
+ # ============================================
121
340
  elif isinstance(event, PartDeltaEvent) and isinstance(
122
341
  event.delta, TextPartDelta
123
342
  ):
343
+ content = event.delta.content_delta
344
+ token_count += len(content.split()) # Rough token estimate
345
+
124
346
  content_chunk = ChatCompletionStreamResponse(
125
347
  id=request_id,
126
348
  created=created_at,
@@ -130,7 +352,7 @@ async def stream_openai_response(
130
352
  index=0,
131
353
  delta=ChatCompletionMessageDelta(
132
354
  role="assistant" if is_first_chunk else None,
133
- content=event.delta.content_delta,
355
+ content=content,
134
356
  ),
135
357
  finish_reason=None,
136
358
  )
@@ -139,16 +361,185 @@ async def stream_openai_response(
139
361
  is_first_chunk = False
140
362
  yield f"data: {content_chunk.model_dump_json()}\n\n"
141
363
 
142
- # Check if this is a tool execution node
364
+ # ============================================
365
+ # TOOL EXECUTION NODE
366
+ # ============================================
143
367
  elif Agent.is_call_tools_node(node):
144
- # Stream tool execution - tools complete here
145
368
  async with node.stream(agent_run.ctx) as tools_stream:
146
- async for event in tools_stream:
147
- # We can log tool completion here if needed
148
- # For now, we already logged the call start above
149
- pass
369
+ async for tool_event in tools_stream:
370
+ # Tool result event - emit completion
371
+ if isinstance(tool_event, FunctionToolResultEvent):
372
+ # Get the tool name/id from the pending queue (FIFO)
373
+ if pending_tool_completions:
374
+ tool_name, tool_id = pending_tool_completions.pop(0)
375
+ else:
376
+ # Fallback if queue is empty (shouldn't happen)
377
+ tool_name = "tool"
378
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
379
+
380
+ # Check if this is a register_metadata tool result
381
+ # It returns a dict with _metadata_event: True marker
382
+ result_content = tool_event.result.content if hasattr(tool_event.result, 'content') else tool_event.result
383
+ is_metadata_event = False
384
+
385
+ if isinstance(result_content, dict) and result_content.get("_metadata_event"):
386
+ is_metadata_event = True
387
+ metadata_registered = True # Skip default metadata at end
388
+ # Emit MetadataEvent with registered values
389
+ registered_confidence = result_content.get("confidence")
390
+ registered_sources = result_content.get("sources")
391
+ registered_references = result_content.get("references")
392
+ registered_flags = result_content.get("flags")
393
+ # Session naming
394
+ registered_session_name = result_content.get("session_name")
395
+ # Risk assessment fields
396
+ registered_risk_level = result_content.get("risk_level")
397
+ registered_risk_score = result_content.get("risk_score")
398
+ registered_risk_reasoning = result_content.get("risk_reasoning")
399
+ registered_recommended_action = result_content.get("recommended_action")
400
+ # Extra fields
401
+ registered_extra = result_content.get("extra")
402
+
403
+ logger.info(
404
+ f"📊 Metadata registered: confidence={registered_confidence}, "
405
+ f"session_name={registered_session_name}, "
406
+ f"risk_level={registered_risk_level}, sources={registered_sources}"
407
+ )
408
+
409
+ # Build extra dict with risk fields and any custom extras
410
+ extra_data = {}
411
+ if registered_risk_level is not None:
412
+ extra_data["risk_level"] = registered_risk_level
413
+ if registered_risk_score is not None:
414
+ extra_data["risk_score"] = registered_risk_score
415
+ if registered_risk_reasoning is not None:
416
+ extra_data["risk_reasoning"] = registered_risk_reasoning
417
+ if registered_recommended_action is not None:
418
+ extra_data["recommended_action"] = registered_recommended_action
419
+ if registered_extra:
420
+ extra_data.update(registered_extra)
150
421
 
151
- # Final chunk with finish_reason
422
+ # Emit metadata event immediately
423
+ yield format_sse_event(MetadataEvent(
424
+ message_id=message_id,
425
+ in_reply_to=in_reply_to,
426
+ session_id=session_id,
427
+ agent_schema=agent_schema,
428
+ session_name=registered_session_name,
429
+ confidence=registered_confidence,
430
+ sources=registered_sources,
431
+ model_version=model,
432
+ flags=registered_flags,
433
+ extra=extra_data if extra_data else None,
434
+ hidden=False,
435
+ ))
436
+
437
+ # Capture tool call with result for persistence
438
+ # Special handling for register_metadata - always capture full data
439
+ if tool_calls_out is not None and tool_id in pending_tool_data:
440
+ tool_data = pending_tool_data[tool_id]
441
+ tool_data["result"] = result_content
442
+ tool_data["is_metadata"] = is_metadata_event
443
+ tool_calls_out.append(tool_data)
444
+ del pending_tool_data[tool_id]
445
+
446
+ if not is_metadata_event:
447
+ # Normal tool completion - emit ToolCallEvent
448
+ result_str = str(result_content)
449
+ result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
450
+
451
+ # Log result count for search_rem
452
+ if tool_name == "search_rem" and isinstance(result_content, dict):
453
+ results = result_content.get("results", {})
454
+ # Handle nested result structure: results may be a dict with 'results' list and 'count'
455
+ if isinstance(results, dict):
456
+ count = results.get("count", len(results.get("results", [])))
457
+ query_type = results.get("query_type", "?")
458
+ query_text = results.get("query_text", results.get("key", ""))
459
+ table = results.get("table_name", "")
460
+ elif isinstance(results, list):
461
+ count = len(results)
462
+ query_type = "?"
463
+ query_text = ""
464
+ table = ""
465
+ else:
466
+ count = "?"
467
+ query_type = "?"
468
+ query_text = ""
469
+ table = ""
470
+ status = result_content.get("status", "unknown")
471
+ # Truncate query text for logging
472
+ if query_text and len(str(query_text)) > 40:
473
+ query_text = str(query_text)[:40] + "..."
474
+ logger.info(f" ↳ {tool_name} {query_type} '{query_text}' table={table} → {count} results")
475
+
476
+ yield format_sse_event(ToolCallEvent(
477
+ tool_name=tool_name,
478
+ tool_id=tool_id,
479
+ status="completed",
480
+ result=result_summary
481
+ ))
482
+
483
+ # Update progress after tool completion
484
+ current_step = 3
485
+ yield format_sse_event(ProgressEvent(
486
+ step=current_step,
487
+ total_steps=total_steps,
488
+ label="Generating response",
489
+ status="in_progress"
490
+ ))
491
+
492
+ # After iteration completes, check for structured result
493
+ # This handles agents with result_type (structured output)
494
+ # Skip for plain text output - already streamed via TextPartDelta
495
+ try:
496
+ result = agent_run.result
497
+ if result is not None and hasattr(result, 'output'):
498
+ output = result.output
499
+
500
+ # Skip plain string output - already streamed via TextPartDelta
501
+ # Non-structured output agents (structured_output: false) return strings
502
+ if isinstance(output, str):
503
+ logger.debug("Plain text output already streamed via TextPartDelta, skipping final emission")
504
+ else:
505
+ # Serialize the structured output (Pydantic models)
506
+ if hasattr(output, 'model_dump'):
507
+ # Pydantic model
508
+ result_dict = output.model_dump()
509
+ elif hasattr(output, '__dict__'):
510
+ result_dict = output.__dict__
511
+ else:
512
+ # Fallback for unknown types
513
+ result_dict = {"result": str(output)}
514
+
515
+ result_json = json.dumps(result_dict, indent=2, default=str)
516
+ token_count += len(result_json.split())
517
+
518
+ # Emit structured result as content
519
+ result_chunk = ChatCompletionStreamResponse(
520
+ id=request_id,
521
+ created=created_at,
522
+ model=model,
523
+ choices=[
524
+ ChatCompletionStreamChoice(
525
+ index=0,
526
+ delta=ChatCompletionMessageDelta(
527
+ role="assistant" if is_first_chunk else None,
528
+ content=result_json,
529
+ ),
530
+ finish_reason=None,
531
+ )
532
+ ],
533
+ )
534
+ is_first_chunk = False
535
+ yield f"data: {result_chunk.model_dump_json()}\n\n"
536
+ except Exception as e:
537
+ logger.debug(f"No structured result available: {e}")
538
+
539
+ # Calculate latency
540
+ latency_ms = int((time.time() - start_time) * 1000)
541
+
542
+ # Final OpenAI chunk with finish_reason
152
543
  final_chunk = ChatCompletionStreamResponse(
153
544
  id=request_id,
154
545
  created=created_at,
@@ -163,7 +554,35 @@ async def stream_openai_response(
163
554
  )
164
555
  yield f"data: {final_chunk.model_dump_json()}\n\n"
165
556
 
166
- # OpenAI termination marker
557
+ # Emit metadata event only if not already registered via register_metadata tool
558
+ if not metadata_registered:
559
+ yield format_sse_event(MetadataEvent(
560
+ message_id=message_id,
561
+ in_reply_to=in_reply_to,
562
+ session_id=session_id,
563
+ agent_schema=agent_schema,
564
+ confidence=1.0, # Default to 100% confidence
565
+ model_version=model,
566
+ latency_ms=latency_ms,
567
+ token_count=token_count,
568
+ # Include deterministic trace context captured from OTEL
569
+ trace_id=captured_trace_id,
570
+ span_id=captured_span_id,
571
+ ))
572
+
573
+ # Mark all progress complete
574
+ for step in range(1, total_steps + 1):
575
+ yield format_sse_event(ProgressEvent(
576
+ step=step,
577
+ total_steps=total_steps,
578
+ label="Complete" if step == total_steps else f"Step {step}",
579
+ status="completed"
580
+ ))
581
+
582
+ # Emit done event
583
+ yield format_sse_event(DoneEvent(reason="stop"))
584
+
585
+ # OpenAI termination marker (for compatibility)
167
586
  yield "data: [DONE]\n\n"
168
587
 
169
588
  except Exception as e:
@@ -182,4 +601,237 @@ async def stream_openai_response(
182
601
  }
183
602
  }
184
603
  yield f"data: {json.dumps(error_data)}\n\n"
604
+
605
+ # Emit done event with error reason
606
+ yield format_sse_event(DoneEvent(reason="error"))
185
607
  yield "data: [DONE]\n\n"
608
+
609
+
610
+ async def stream_simulator_response(
611
+ prompt: str,
612
+ model: str = "simulator-v1.0.0",
613
+ request_id: str | None = None,
614
+ delay_ms: int = 50,
615
+ include_reasoning: bool = True,
616
+ include_progress: bool = True,
617
+ include_tool_calls: bool = True,
618
+ include_actions: bool = True,
619
+ include_metadata: bool = True,
620
+ # Message correlation IDs
621
+ message_id: str | None = None,
622
+ in_reply_to: str | None = None,
623
+ session_id: str | None = None,
624
+ ) -> AsyncGenerator[str, None]:
625
+ """
626
+ Stream SSE simulator events for testing and demonstration.
627
+
628
+ This function wraps the SSE simulator to produce formatted SSE strings
629
+ ready for HTTP streaming. No LLM calls are made.
630
+
631
+ The simulator produces a rich sequence of events:
632
+ 1. Reasoning events (model thinking)
633
+ 2. Progress events (step indicators)
634
+ 3. Tool call events (simulated tool usage)
635
+ 4. Text delta events (streamed content)
636
+ 5. Metadata events (confidence, sources, message IDs)
637
+ 6. Action request events (user interaction)
638
+ 7. Done event
639
+
640
+ Args:
641
+ prompt: User prompt (passed to simulator)
642
+ model: Model name for metadata
643
+ request_id: Optional request ID
644
+ delay_ms: Delay between events in milliseconds
645
+ include_reasoning: Whether to emit reasoning events
646
+ include_progress: Whether to emit progress events
647
+ include_tool_calls: Whether to emit tool call events
648
+ include_actions: Whether to emit action request at end
649
+ include_metadata: Whether to emit metadata event
650
+ message_id: Database ID of the assistant message being streamed
651
+ in_reply_to: Database ID of the user message this responds to
652
+ session_id: Session ID for conversation correlation
653
+
654
+ Yields:
655
+ SSE-formatted strings ready for HTTP response
656
+
657
+ Example:
658
+ ```python
659
+ from starlette.responses import StreamingResponse
660
+
661
+ async def simulator_endpoint():
662
+ return StreamingResponse(
663
+ stream_simulator_response("demo"),
664
+ media_type="text/event-stream"
665
+ )
666
+ ```
667
+ """
668
+ from rem.agentic.agents.sse_simulator import stream_simulator_events
669
+
670
+ # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
671
+ async for sse_string in stream_simulator_events(
672
+ prompt=prompt,
673
+ delay_ms=delay_ms,
674
+ include_reasoning=include_reasoning,
675
+ include_progress=include_progress,
676
+ include_tool_calls=include_tool_calls,
677
+ include_actions=include_actions,
678
+ include_metadata=include_metadata,
679
+ # Pass message correlation IDs
680
+ message_id=message_id,
681
+ in_reply_to=in_reply_to,
682
+ session_id=session_id,
683
+ model=model,
684
+ ):
685
+ yield sse_string
686
+
687
+
688
+ async def stream_minimal_simulator(
689
+ content: str = "Hello from the simulator!",
690
+ delay_ms: int = 30,
691
+ ) -> AsyncGenerator[str, None]:
692
+ """
693
+ Stream minimal simulator output (text + done only).
694
+
695
+ Useful for simple testing without the full event sequence.
696
+
697
+ Args:
698
+ content: Text content to stream
699
+ delay_ms: Delay between chunks
700
+
701
+ Yields:
702
+ SSE-formatted strings
703
+ """
704
+ from rem.agentic.agents.sse_simulator import stream_minimal_demo
705
+
706
+ # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
707
+ async for sse_string in stream_minimal_demo(content=content, delay_ms=delay_ms):
708
+ yield sse_string
709
+
710
+
711
+ async def stream_openai_response_with_save(
712
+ agent: Agent,
713
+ prompt: str,
714
+ model: str,
715
+ request_id: str | None = None,
716
+ agent_schema: str | None = None,
717
+ session_id: str | None = None,
718
+ user_id: str | None = None,
719
+ ) -> AsyncGenerator[str, None]:
720
+ """
721
+ Wrapper around stream_openai_response that saves the assistant response after streaming.
722
+
723
+ This accumulates all text content during streaming and saves it to the database
724
+ after the stream completes.
725
+
726
+ Args:
727
+ agent: Pydantic AI agent instance
728
+ prompt: User prompt
729
+ model: Model name
730
+ request_id: Optional request ID
731
+ agent_schema: Agent schema name
732
+ session_id: Session ID for message storage
733
+ user_id: User ID for message storage
734
+
735
+ Yields:
736
+ SSE-formatted strings
737
+ """
738
+ from ....utils.date_utils import utc_now, to_iso
739
+ from ....services.session import SessionMessageStore
740
+ from ....settings import settings
741
+
742
+ # Pre-generate message_id so it can be sent in metadata event
743
+ # This allows frontend to use it for feedback before DB persistence
744
+ message_id = str(uuid.uuid4())
745
+
746
+ # Mutable container for capturing trace context from inside agent execution
747
+ # This is deterministic - captured from OTEL instrumentation, not AI-generated
748
+ trace_context: dict = {}
749
+
750
+ # Accumulate content during streaming
751
+ accumulated_content = []
752
+
753
+ # Capture tool calls for persistence (especially register_metadata)
754
+ tool_calls: list = []
755
+
756
+ async for chunk in stream_openai_response(
757
+ agent=agent,
758
+ prompt=prompt,
759
+ model=model,
760
+ request_id=request_id,
761
+ agent_schema=agent_schema,
762
+ session_id=session_id,
763
+ message_id=message_id,
764
+ trace_context_out=trace_context, # Pass container to capture trace IDs
765
+ tool_calls_out=tool_calls, # Capture tool calls for persistence
766
+ ):
767
+ yield chunk
768
+
769
+ # Extract text content from OpenAI-format chunks
770
+ # Format: data: {"choices": [{"delta": {"content": "..."}}]}
771
+ if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
772
+ try:
773
+ data_str = chunk[6:].strip() # Remove "data: " prefix
774
+ if data_str:
775
+ data = json.loads(data_str)
776
+ if "choices" in data and data["choices"]:
777
+ delta = data["choices"][0].get("delta", {})
778
+ content = delta.get("content")
779
+ if content:
780
+ accumulated_content.append(content)
781
+ except (json.JSONDecodeError, KeyError, IndexError):
782
+ pass # Skip non-JSON or malformed chunks
783
+
784
+ # After streaming completes, save tool calls and assistant response
785
+ # Note: All messages stored UNCOMPRESSED. Compression happens on reload.
786
+ if settings.postgres.enabled and session_id:
787
+ # Get captured trace context from container (deterministically captured inside agent execution)
788
+ captured_trace_id = trace_context.get("trace_id")
789
+ captured_span_id = trace_context.get("span_id")
790
+ timestamp = to_iso(utc_now())
791
+
792
+ messages_to_store = []
793
+
794
+ # First, store tool call messages (message_type: "tool")
795
+ for tool_call in tool_calls:
796
+ tool_message = {
797
+ "role": "tool",
798
+ "content": json.dumps(tool_call.get("result", {}), default=str),
799
+ "timestamp": timestamp,
800
+ "trace_id": captured_trace_id,
801
+ "span_id": captured_span_id,
802
+ # Store tool call details in a way that can be reconstructed
803
+ "tool_call_id": tool_call.get("tool_id"),
804
+ "tool_name": tool_call.get("tool_name"),
805
+ "tool_arguments": tool_call.get("arguments"),
806
+ }
807
+ messages_to_store.append(tool_message)
808
+
809
+ # Then store assistant text response (if any)
810
+ if accumulated_content:
811
+ full_content = "".join(accumulated_content)
812
+ assistant_message = {
813
+ "id": message_id, # Use pre-generated ID for consistency with metadata event
814
+ "role": "assistant",
815
+ "content": full_content,
816
+ "timestamp": timestamp,
817
+ "trace_id": captured_trace_id,
818
+ "span_id": captured_span_id,
819
+ }
820
+ messages_to_store.append(assistant_message)
821
+
822
+ if messages_to_store:
823
+ try:
824
+ store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
825
+ await store.store_session_messages(
826
+ session_id=session_id,
827
+ messages=messages_to_store,
828
+ user_id=user_id,
829
+ compress=False, # Store uncompressed; compression happens on reload
830
+ )
831
+ logger.debug(
832
+ f"Saved {len(tool_calls)} tool calls and "
833
+ f"{'assistant response' if accumulated_content else 'no text'} "
834
+ f"to session {session_id}"
835
+ )
836
+ except Exception as e:
837
+ logger.error(f"Failed to save session messages: {e}", exc_info=True)