remdb 0.2.6__py3-none-any.whl → 0.3.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (104) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +28 -22
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/mcp/tool_wrapper.py +29 -3
  9. rem/agentic/otel/setup.py +92 -4
  10. rem/agentic/providers/phoenix.py +32 -43
  11. rem/agentic/providers/pydantic_ai.py +168 -24
  12. rem/agentic/schema.py +358 -21
  13. rem/agentic/tools/rem_tools.py +3 -3
  14. rem/api/README.md +238 -1
  15. rem/api/deps.py +255 -0
  16. rem/api/main.py +154 -37
  17. rem/api/mcp_router/resources.py +1 -1
  18. rem/api/mcp_router/server.py +26 -5
  19. rem/api/mcp_router/tools.py +454 -7
  20. rem/api/middleware/tracking.py +172 -0
  21. rem/api/routers/admin.py +494 -0
  22. rem/api/routers/auth.py +124 -0
  23. rem/api/routers/chat/completions.py +152 -16
  24. rem/api/routers/chat/models.py +7 -3
  25. rem/api/routers/chat/sse_events.py +526 -0
  26. rem/api/routers/chat/streaming.py +608 -45
  27. rem/api/routers/dev.py +81 -0
  28. rem/api/routers/feedback.py +148 -0
  29. rem/api/routers/messages.py +473 -0
  30. rem/api/routers/models.py +78 -0
  31. rem/api/routers/query.py +360 -0
  32. rem/api/routers/shared_sessions.py +406 -0
  33. rem/auth/middleware.py +126 -27
  34. rem/cli/commands/README.md +237 -64
  35. rem/cli/commands/ask.py +15 -11
  36. rem/cli/commands/cluster.py +1300 -0
  37. rem/cli/commands/configure.py +170 -97
  38. rem/cli/commands/db.py +396 -139
  39. rem/cli/commands/experiments.py +278 -96
  40. rem/cli/commands/process.py +22 -15
  41. rem/cli/commands/scaffold.py +47 -0
  42. rem/cli/commands/schema.py +97 -50
  43. rem/cli/main.py +37 -6
  44. rem/config.py +2 -2
  45. rem/models/core/core_model.py +7 -1
  46. rem/models/core/rem_query.py +5 -2
  47. rem/models/entities/__init__.py +21 -0
  48. rem/models/entities/domain_resource.py +38 -0
  49. rem/models/entities/feedback.py +123 -0
  50. rem/models/entities/message.py +30 -1
  51. rem/models/entities/session.py +83 -0
  52. rem/models/entities/shared_session.py +180 -0
  53. rem/models/entities/user.py +10 -3
  54. rem/registry.py +373 -0
  55. rem/schemas/agents/rem.yaml +7 -3
  56. rem/services/content/providers.py +94 -140
  57. rem/services/content/service.py +115 -24
  58. rem/services/dreaming/affinity_service.py +2 -16
  59. rem/services/dreaming/moment_service.py +2 -15
  60. rem/services/embeddings/api.py +24 -17
  61. rem/services/embeddings/worker.py +16 -16
  62. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  63. rem/services/phoenix/client.py +252 -19
  64. rem/services/postgres/README.md +159 -15
  65. rem/services/postgres/__init__.py +2 -1
  66. rem/services/postgres/diff_service.py +531 -0
  67. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  68. rem/services/postgres/repository.py +132 -0
  69. rem/services/postgres/schema_generator.py +291 -9
  70. rem/services/postgres/service.py +6 -6
  71. rem/services/rate_limit.py +113 -0
  72. rem/services/rem/README.md +14 -0
  73. rem/services/rem/parser.py +44 -9
  74. rem/services/rem/service.py +36 -2
  75. rem/services/session/compression.py +17 -1
  76. rem/services/session/reload.py +1 -1
  77. rem/services/user_service.py +98 -0
  78. rem/settings.py +169 -22
  79. rem/sql/background_indexes.sql +21 -16
  80. rem/sql/migrations/001_install.sql +387 -54
  81. rem/sql/migrations/002_install_models.sql +2320 -393
  82. rem/sql/migrations/003_optional_extensions.sql +326 -0
  83. rem/sql/migrations/004_cache_system.sql +548 -0
  84. rem/utils/__init__.py +18 -0
  85. rem/utils/constants.py +97 -0
  86. rem/utils/date_utils.py +228 -0
  87. rem/utils/embeddings.py +17 -4
  88. rem/utils/files.py +167 -0
  89. rem/utils/mime_types.py +158 -0
  90. rem/utils/model_helpers.py +156 -1
  91. rem/utils/schema_loader.py +284 -21
  92. rem/utils/sql_paths.py +146 -0
  93. rem/utils/sql_types.py +3 -1
  94. rem/utils/vision.py +9 -14
  95. rem/workers/README.md +14 -14
  96. rem/workers/__init__.py +2 -1
  97. rem/workers/db_maintainer.py +74 -0
  98. rem/workers/unlogged_maintainer.py +463 -0
  99. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/METADATA +598 -171
  100. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/RECORD +102 -73
  101. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/WHEEL +1 -1
  102. rem/sql/002_install_models.sql +0 -1068
  103. rem/sql/install_models.sql +0 -1038
  104. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
@@ -3,20 +3,29 @@ OpenAI-compatible streaming relay for Pydantic AI agents.
3
3
 
4
4
  Design Pattern:
5
5
  - Uses Pydantic AI's agent.iter() to capture full execution including tool calls
6
- - Streams tool call events with [Calling: tool_name] markers
7
- - Streams text content deltas as they arrive
6
+ - Emits rich SSE events: reasoning, tool_call, progress, metadata, text_delta
8
7
  - Proper OpenAI SSE format with data: prefix and [DONE] terminator
9
8
  - Error handling with graceful degradation
10
9
 
11
- Key Insight
10
+ Key Insight
12
11
  - agent.run_stream() stops after first output, missing tool calls
13
12
  - agent.iter() provides complete execution with tool call visibility
14
- - Use PartStartEvent to detect tool calls
15
- - Use PartDeltaEvent with TextPartDelta for content streaming
13
+ - Use PartStartEvent to detect tool calls and thinking parts
14
+ - Use PartDeltaEvent with TextPartDelta/ThinkingPartDelta for streaming
15
+ - Use PartEndEvent to detect tool completion
16
+ - Use FunctionToolResultEvent to get tool results
16
17
 
17
- SSE Format:
18
+ SSE Format (OpenAI-compatible):
18
19
  data: {"id": "chatcmpl-...", "choices": [{"delta": {"content": "..."}}]}\\n\\n
19
20
  data: [DONE]\\n\\n
21
+
22
+ Extended SSE Format (Custom Events):
23
+ event: reasoning\\ndata: {"type": "reasoning", "content": "..."}\\n\\n
24
+ event: tool_call\\ndata: {"type": "tool_call", "tool_name": "...", "status": "started"}\\n\\n
25
+ event: progress\\ndata: {"type": "progress", "step": 1, "total_steps": 3}\\n\\n
26
+ event: metadata\\ndata: {"type": "metadata", "confidence": 0.95}\\n\\n
27
+
28
+ See sse_events.py for the full event type definitions.
20
29
  """
21
30
 
22
31
  import json
@@ -27,9 +36,14 @@ from typing import AsyncGenerator
27
36
  from loguru import logger
28
37
  from pydantic_ai.agent import Agent
29
38
  from pydantic_ai.messages import (
39
+ FunctionToolResultEvent,
30
40
  PartDeltaEvent,
41
+ PartEndEvent,
31
42
  PartStartEvent,
43
+ TextPart,
32
44
  TextPartDelta,
45
+ ThinkingPart,
46
+ ThinkingPartDelta,
33
47
  ToolCallPart,
34
48
  )
35
49
 
@@ -38,6 +52,14 @@ from .models import (
38
52
  ChatCompletionStreamChoice,
39
53
  ChatCompletionStreamResponse,
40
54
  )
55
+ from .sse_events import (
56
+ DoneEvent,
57
+ MetadataEvent,
58
+ ProgressEvent,
59
+ ReasoningEvent,
60
+ ToolCallEvent,
61
+ format_sse_event,
62
+ )
41
63
 
42
64
 
43
65
  async def stream_openai_response(
@@ -45,47 +67,94 @@ async def stream_openai_response(
45
67
  prompt: str,
46
68
  model: str,
47
69
  request_id: str | None = None,
70
+ # Message correlation IDs for metadata
71
+ message_id: str | None = None,
72
+ in_reply_to: str | None = None,
73
+ session_id: str | None = None,
74
+ # Agent info for metadata
75
+ agent_schema: str | None = None,
48
76
  ) -> AsyncGenerator[str, None]:
49
77
  """
50
- Stream Pydantic AI agent responses in OpenAI SSE format with tool call events.
78
+ Stream Pydantic AI agent responses with rich SSE events.
79
+
80
+ Emits all SSE event types matching the simulator:
81
+ - reasoning: Model thinking/chain-of-thought (from ThinkingPart)
82
+ - tool_call: Tool invocation start/complete (from ToolCallPart, FunctionToolResultEvent)
83
+ - progress: Step indicators for multi-step execution
84
+ - text_delta: Streamed content (OpenAI-compatible format)
85
+ - metadata: Message IDs, model info, performance metrics
86
+ - done: Stream completion
51
87
 
52
88
  Design Pattern:
53
89
  1. Use agent.iter() for complete execution (not run_stream())
54
90
  2. Iterate over nodes to capture model requests and tool executions
55
- 3. Stream tool call start events as [Calling: tool_name]
56
- 4. Stream text content deltas as they arrive
57
- 5. Send final chunk with finish_reason="stop"
58
- 6. Send OpenAI termination marker [DONE]
91
+ 3. Emit rich SSE events for reasoning, tools, progress
92
+ 4. Stream text content in OpenAI-compatible format
93
+ 5. Send metadata and done events at completion
59
94
 
60
95
  Args:
61
96
  agent: Pydantic AI agent instance
62
97
  prompt: User prompt to run
63
98
  model: Model name for response metadata
64
99
  request_id: Optional request ID (generates UUID if not provided)
100
+ message_id: Database ID of the assistant message being streamed
101
+ in_reply_to: Database ID of the user message this responds to
102
+ session_id: Session ID for conversation correlation
65
103
 
66
104
  Yields:
67
- SSE-formatted strings: "data: {json}\\n\\n"
105
+ SSE-formatted strings
68
106
 
69
107
  Example Stream:
70
- data: {"id": "chatcmpl-123", "choices": [{"delta": {"role": "assistant", "content": ""}}]}
108
+ event: progress
109
+ data: {"type": "progress", "step": 1, "total_steps": 3, "label": "Processing", "status": "in_progress"}
71
110
 
72
- data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "[Calling: search]"}}]}
111
+ event: reasoning
112
+ data: {"type": "reasoning", "content": "Analyzing the request..."}
113
+
114
+ event: tool_call
115
+ data: {"type": "tool_call", "tool_name": "search", "status": "started", "arguments": {...}}
116
+
117
+ event: tool_call
118
+ data: {"type": "tool_call", "tool_name": "search", "status": "completed", "result": "..."}
73
119
 
74
120
  data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 results..."}}]}
75
121
 
76
- data: {"id": "chatcmpl-123", "choices": [{"delta": {}, "finish_reason": "stop"}]}
122
+ event: metadata
123
+ data: {"type": "metadata", "message_id": "...", "latency_ms": 1234}
77
124
 
78
- data: [DONE]
125
+ event: done
126
+ data: {"type": "done", "reason": "stop"}
79
127
  """
80
128
  if request_id is None:
81
129
  request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
82
130
 
83
131
  created_at = int(time.time())
132
+ start_time = time.time()
84
133
  is_first_chunk = True
134
+ reasoning_step = 0
135
+ current_step = 0
136
+ total_steps = 3 # Model request, tool execution (optional), final response
137
+ token_count = 0
138
+
139
+ # Track active tool calls for completion events
140
+ # Maps index -> (tool_name, tool_id) for correlating start/end events
141
+ active_tool_calls: dict[int, tuple[str, str]] = {}
142
+ # Queue of tool calls awaiting completion (FIFO for matching)
143
+ pending_tool_completions: list[tuple[str, str]] = []
144
+ # Track if metadata was registered via register_metadata tool
145
+ metadata_registered = False
85
146
 
86
147
  try:
148
+ # Emit initial progress event
149
+ current_step = 1
150
+ yield format_sse_event(ProgressEvent(
151
+ step=current_step,
152
+ total_steps=total_steps,
153
+ label="Processing request",
154
+ status="in_progress"
155
+ ))
156
+
87
157
  # Use agent.iter() to get complete execution with tool calls
88
- # run_stream() stops after first output, missing tool calls
89
158
  async with agent.iter(prompt) as agent_run:
90
159
  async for node in agent_run:
91
160
  # Check if this is a model request node (includes tool calls)
@@ -93,34 +162,163 @@ async def stream_openai_response(
93
162
  # Stream events from model request
94
163
  async with node.stream(agent_run.ctx) as request_stream:
95
164
  async for event in request_stream:
96
- # Tool call start event
165
+ # ============================================
166
+ # REASONING EVENTS (ThinkingPart)
167
+ # ============================================
97
168
  if isinstance(event, PartStartEvent) and isinstance(
169
+ event.part, ThinkingPart
170
+ ):
171
+ reasoning_step += 1
172
+ if event.part.content:
173
+ yield format_sse_event(ReasoningEvent(
174
+ content=event.part.content,
175
+ step=reasoning_step
176
+ ))
177
+
178
+ # Reasoning delta (streaming thinking)
179
+ elif isinstance(event, PartDeltaEvent) and isinstance(
180
+ event.delta, ThinkingPartDelta
181
+ ):
182
+ if event.delta.content_delta:
183
+ yield format_sse_event(ReasoningEvent(
184
+ content=event.delta.content_delta,
185
+ step=reasoning_step
186
+ ))
187
+
188
+ # ============================================
189
+ # TEXT CONTENT START (initial text chunk)
190
+ # ============================================
191
+ elif isinstance(event, PartStartEvent) and isinstance(
192
+ event.part, TextPart
193
+ ):
194
+ # TextPart may contain initial content that needs to be emitted
195
+ if event.part.content:
196
+ content = event.part.content
197
+ token_count += len(content.split())
198
+
199
+ content_chunk = ChatCompletionStreamResponse(
200
+ id=request_id,
201
+ created=created_at,
202
+ model=model,
203
+ choices=[
204
+ ChatCompletionStreamChoice(
205
+ index=0,
206
+ delta=ChatCompletionMessageDelta(
207
+ role="assistant" if is_first_chunk else None,
208
+ content=content,
209
+ ),
210
+ finish_reason=None,
211
+ )
212
+ ],
213
+ )
214
+ is_first_chunk = False
215
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
216
+
217
+ # ============================================
218
+ # TOOL CALL START EVENTS
219
+ # ============================================
220
+ elif isinstance(event, PartStartEvent) and isinstance(
98
221
  event.part, ToolCallPart
99
222
  ):
100
- logger.info(f"🔧 {event.part.tool_name}")
223
+ tool_name = event.part.tool_name
101
224
 
102
- tool_call_chunk = ChatCompletionStreamResponse(
103
- id=request_id,
104
- created=created_at,
105
- model=model,
106
- choices=[
107
- ChatCompletionStreamChoice(
108
- index=0,
109
- delta=ChatCompletionMessageDelta(
110
- role="assistant" if is_first_chunk else None,
111
- content=f"[Calling: {event.part.tool_name}]",
112
- ),
113
- finish_reason=None,
225
+ # Handle final_result specially - it's Pydantic AI's
226
+ # internal tool for structured output
227
+ if tool_name == "final_result":
228
+ # Extract the structured result and emit as content
229
+ args_dict = None
230
+ if event.part.args is not None:
231
+ if hasattr(event.part.args, 'args_dict'):
232
+ args_dict = event.part.args.args_dict
233
+ elif isinstance(event.part.args, dict):
234
+ args_dict = event.part.args
235
+
236
+ if args_dict:
237
+ # Emit the structured result as JSON content
238
+ result_json = json.dumps(args_dict, indent=2)
239
+ content_chunk = ChatCompletionStreamResponse(
240
+ id=request_id,
241
+ created=created_at,
242
+ model=model,
243
+ choices=[
244
+ ChatCompletionStreamChoice(
245
+ index=0,
246
+ delta=ChatCompletionMessageDelta(
247
+ role="assistant" if is_first_chunk else None,
248
+ content=result_json,
249
+ ),
250
+ finish_reason=None,
251
+ )
252
+ ],
114
253
  )
115
- ],
116
- )
117
- is_first_chunk = False
118
- yield f"data: {tool_call_chunk.model_dump_json()}\n\n"
254
+ is_first_chunk = False
255
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
256
+ continue # Skip regular tool call handling
257
+
258
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
259
+ active_tool_calls[event.index] = (tool_name, tool_id)
260
+ # Queue for completion matching (FIFO)
261
+ pending_tool_completions.append((tool_name, tool_id))
262
+
263
+ # Emit tool_call SSE event (started)
264
+ # Try to get arguments as dict
265
+ args_dict = None
266
+ if event.part.args is not None:
267
+ if hasattr(event.part.args, 'args_dict'):
268
+ args_dict = event.part.args.args_dict
269
+ elif isinstance(event.part.args, dict):
270
+ args_dict = event.part.args
271
+
272
+ # Log tool call with key parameters
273
+ if args_dict and tool_name == "search_rem":
274
+ query_type = args_dict.get("query_type", "?")
275
+ limit = args_dict.get("limit", 20)
276
+ table = args_dict.get("table", "")
277
+ query_text = args_dict.get("query_text", args_dict.get("entity_key", ""))
278
+ if query_text and len(query_text) > 50:
279
+ query_text = query_text[:50] + "..."
280
+ logger.info(f"🔧 {tool_name} {query_type.upper()} '{query_text}' table={table} limit={limit}")
281
+ else:
282
+ logger.info(f"🔧 {tool_name}")
283
+
284
+ yield format_sse_event(ToolCallEvent(
285
+ tool_name=tool_name,
286
+ tool_id=tool_id,
287
+ status="started",
288
+ arguments=args_dict
289
+ ))
119
290
 
120
- # Text content delta
291
+ # Update progress
292
+ current_step = 2
293
+ total_steps = 4 # Added tool execution step
294
+ yield format_sse_event(ProgressEvent(
295
+ step=current_step,
296
+ total_steps=total_steps,
297
+ label=f"Calling {tool_name}",
298
+ status="in_progress"
299
+ ))
300
+
301
+ # ============================================
302
+ # TOOL CALL COMPLETION (PartEndEvent)
303
+ # ============================================
304
+ elif isinstance(event, PartEndEvent) and isinstance(
305
+ event.part, ToolCallPart
306
+ ):
307
+ if event.index in active_tool_calls:
308
+ tool_name, tool_id = active_tool_calls[event.index]
309
+ # Note: result comes from FunctionToolResultEvent below
310
+ # For now, mark as completed without result
311
+ del active_tool_calls[event.index]
312
+
313
+ # ============================================
314
+ # TEXT CONTENT DELTA
315
+ # ============================================
121
316
  elif isinstance(event, PartDeltaEvent) and isinstance(
122
317
  event.delta, TextPartDelta
123
318
  ):
319
+ content = event.delta.content_delta
320
+ token_count += len(content.split()) # Rough token estimate
321
+
124
322
  content_chunk = ChatCompletionStreamResponse(
125
323
  id=request_id,
126
324
  created=created_at,
@@ -130,7 +328,7 @@ async def stream_openai_response(
130
328
  index=0,
131
329
  delta=ChatCompletionMessageDelta(
132
330
  role="assistant" if is_first_chunk else None,
133
- content=event.delta.content_delta,
331
+ content=content,
134
332
  ),
135
333
  finish_reason=None,
136
334
  )
@@ -139,16 +337,172 @@ async def stream_openai_response(
139
337
  is_first_chunk = False
140
338
  yield f"data: {content_chunk.model_dump_json()}\n\n"
141
339
 
142
- # Check if this is a tool execution node
340
+ # ============================================
341
+ # TOOL EXECUTION NODE
342
+ # ============================================
143
343
  elif Agent.is_call_tools_node(node):
144
- # Stream tool execution - tools complete here
145
344
  async with node.stream(agent_run.ctx) as tools_stream:
146
- async for event in tools_stream:
147
- # We can log tool completion here if needed
148
- # For now, we already logged the call start above
149
- pass
345
+ async for tool_event in tools_stream:
346
+ # Tool result event - emit completion
347
+ if isinstance(tool_event, FunctionToolResultEvent):
348
+ # Get the tool name/id from the pending queue (FIFO)
349
+ if pending_tool_completions:
350
+ tool_name, tool_id = pending_tool_completions.pop(0)
351
+ else:
352
+ # Fallback if queue is empty (shouldn't happen)
353
+ tool_name = "tool"
354
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
355
+
356
+ # Check if this is a register_metadata tool result
357
+ # It returns a dict with _metadata_event: True marker
358
+ result_content = tool_event.result.content if hasattr(tool_event.result, 'content') else tool_event.result
359
+ is_metadata_event = False
360
+
361
+ if isinstance(result_content, dict) and result_content.get("_metadata_event"):
362
+ is_metadata_event = True
363
+ metadata_registered = True # Skip default metadata at end
364
+ # Emit MetadataEvent with registered values
365
+ registered_confidence = result_content.get("confidence")
366
+ registered_sources = result_content.get("sources")
367
+ registered_references = result_content.get("references")
368
+ registered_flags = result_content.get("flags")
369
+ # Risk assessment fields
370
+ registered_risk_level = result_content.get("risk_level")
371
+ registered_risk_score = result_content.get("risk_score")
372
+ registered_risk_reasoning = result_content.get("risk_reasoning")
373
+ registered_recommended_action = result_content.get("recommended_action")
374
+ # Extra fields
375
+ registered_extra = result_content.get("extra")
376
+
377
+ logger.info(
378
+ f"📊 Metadata registered: confidence={registered_confidence}, "
379
+ f"risk_level={registered_risk_level}, sources={registered_sources}"
380
+ )
381
+
382
+ # Build extra dict with risk fields and any custom extras
383
+ extra_data = {}
384
+ if registered_risk_level is not None:
385
+ extra_data["risk_level"] = registered_risk_level
386
+ if registered_risk_score is not None:
387
+ extra_data["risk_score"] = registered_risk_score
388
+ if registered_risk_reasoning is not None:
389
+ extra_data["risk_reasoning"] = registered_risk_reasoning
390
+ if registered_recommended_action is not None:
391
+ extra_data["recommended_action"] = registered_recommended_action
392
+ if registered_extra:
393
+ extra_data.update(registered_extra)
394
+
395
+ # Emit metadata event immediately
396
+ yield format_sse_event(MetadataEvent(
397
+ message_id=message_id,
398
+ in_reply_to=in_reply_to,
399
+ session_id=session_id,
400
+ agent_schema=agent_schema,
401
+ confidence=registered_confidence,
402
+ sources=registered_sources,
403
+ model_version=model,
404
+ flags=registered_flags,
405
+ extra=extra_data if extra_data else None,
406
+ hidden=False,
407
+ ))
408
+
409
+ if not is_metadata_event:
410
+ # Normal tool completion - emit ToolCallEvent
411
+ result_str = str(result_content)
412
+ result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
413
+
414
+ # Log result count for search_rem
415
+ if tool_name == "search_rem" and isinstance(result_content, dict):
416
+ results = result_content.get("results", {})
417
+ # Handle nested result structure: results may be a dict with 'results' list and 'count'
418
+ if isinstance(results, dict):
419
+ count = results.get("count", len(results.get("results", [])))
420
+ query_type = results.get("query_type", "?")
421
+ query_text = results.get("query_text", results.get("key", ""))
422
+ table = results.get("table_name", "")
423
+ elif isinstance(results, list):
424
+ count = len(results)
425
+ query_type = "?"
426
+ query_text = ""
427
+ table = ""
428
+ else:
429
+ count = "?"
430
+ query_type = "?"
431
+ query_text = ""
432
+ table = ""
433
+ status = result_content.get("status", "unknown")
434
+ # Truncate query text for logging
435
+ if query_text and len(str(query_text)) > 40:
436
+ query_text = str(query_text)[:40] + "..."
437
+ logger.info(f" ↳ {tool_name} {query_type} '{query_text}' table={table} → {count} results")
438
+
439
+ yield format_sse_event(ToolCallEvent(
440
+ tool_name=tool_name,
441
+ tool_id=tool_id,
442
+ status="completed",
443
+ result=result_summary
444
+ ))
445
+
446
+ # Update progress after tool completion
447
+ current_step = 3
448
+ yield format_sse_event(ProgressEvent(
449
+ step=current_step,
450
+ total_steps=total_steps,
451
+ label="Generating response",
452
+ status="in_progress"
453
+ ))
454
+
455
+ # After iteration completes, check for structured result
456
+ # This handles agents with result_type (structured output)
457
+ # Skip for plain text output - already streamed via TextPartDelta
458
+ try:
459
+ result = agent_run.result
460
+ if result is not None and hasattr(result, 'output'):
461
+ output = result.output
462
+
463
+ # Skip plain string output - already streamed via TextPartDelta
464
+ # Non-structured output agents (structured_output: false) return strings
465
+ if isinstance(output, str):
466
+ logger.debug("Plain text output already streamed via TextPartDelta, skipping final emission")
467
+ else:
468
+ # Serialize the structured output (Pydantic models)
469
+ if hasattr(output, 'model_dump'):
470
+ # Pydantic model
471
+ result_dict = output.model_dump()
472
+ elif hasattr(output, '__dict__'):
473
+ result_dict = output.__dict__
474
+ else:
475
+ # Fallback for unknown types
476
+ result_dict = {"result": str(output)}
477
+
478
+ result_json = json.dumps(result_dict, indent=2, default=str)
479
+ token_count += len(result_json.split())
480
+
481
+ # Emit structured result as content
482
+ result_chunk = ChatCompletionStreamResponse(
483
+ id=request_id,
484
+ created=created_at,
485
+ model=model,
486
+ choices=[
487
+ ChatCompletionStreamChoice(
488
+ index=0,
489
+ delta=ChatCompletionMessageDelta(
490
+ role="assistant" if is_first_chunk else None,
491
+ content=result_json,
492
+ ),
493
+ finish_reason=None,
494
+ )
495
+ ],
496
+ )
497
+ is_first_chunk = False
498
+ yield f"data: {result_chunk.model_dump_json()}\n\n"
499
+ except Exception as e:
500
+ logger.debug(f"No structured result available: {e}")
150
501
 
151
- # Final chunk with finish_reason
502
+ # Calculate latency
503
+ latency_ms = int((time.time() - start_time) * 1000)
504
+
505
+ # Final OpenAI chunk with finish_reason
152
506
  final_chunk = ChatCompletionStreamResponse(
153
507
  id=request_id,
154
508
  created=created_at,
@@ -163,7 +517,32 @@ async def stream_openai_response(
163
517
  )
164
518
  yield f"data: {final_chunk.model_dump_json()}\n\n"
165
519
 
166
- # OpenAI termination marker
520
+ # Emit metadata event only if not already registered via register_metadata tool
521
+ if not metadata_registered:
522
+ yield format_sse_event(MetadataEvent(
523
+ message_id=message_id,
524
+ in_reply_to=in_reply_to,
525
+ session_id=session_id,
526
+ agent_schema=agent_schema,
527
+ confidence=1.0, # Default to 100% confidence
528
+ model_version=model,
529
+ latency_ms=latency_ms,
530
+ token_count=token_count,
531
+ ))
532
+
533
+ # Mark all progress complete
534
+ for step in range(1, total_steps + 1):
535
+ yield format_sse_event(ProgressEvent(
536
+ step=step,
537
+ total_steps=total_steps,
538
+ label="Complete" if step == total_steps else f"Step {step}",
539
+ status="completed"
540
+ ))
541
+
542
+ # Emit done event
543
+ yield format_sse_event(DoneEvent(reason="stop"))
544
+
545
+ # OpenAI termination marker (for compatibility)
167
546
  yield "data: [DONE]\n\n"
168
547
 
169
548
  except Exception as e:
@@ -182,4 +561,188 @@ async def stream_openai_response(
182
561
  }
183
562
  }
184
563
  yield f"data: {json.dumps(error_data)}\n\n"
564
+
565
+ # Emit done event with error reason
566
+ yield format_sse_event(DoneEvent(reason="error"))
185
567
  yield "data: [DONE]\n\n"
568
+
569
+
570
+ async def stream_simulator_response(
571
+ prompt: str,
572
+ model: str = "simulator-v1.0.0",
573
+ request_id: str | None = None,
574
+ delay_ms: int = 50,
575
+ include_reasoning: bool = True,
576
+ include_progress: bool = True,
577
+ include_tool_calls: bool = True,
578
+ include_actions: bool = True,
579
+ include_metadata: bool = True,
580
+ # Message correlation IDs
581
+ message_id: str | None = None,
582
+ in_reply_to: str | None = None,
583
+ session_id: str | None = None,
584
+ ) -> AsyncGenerator[str, None]:
585
+ """
586
+ Stream SSE simulator events for testing and demonstration.
587
+
588
+ This function wraps the SSE simulator to produce formatted SSE strings
589
+ ready for HTTP streaming. No LLM calls are made.
590
+
591
+ The simulator produces a rich sequence of events:
592
+ 1. Reasoning events (model thinking)
593
+ 2. Progress events (step indicators)
594
+ 3. Tool call events (simulated tool usage)
595
+ 4. Text delta events (streamed content)
596
+ 5. Metadata events (confidence, sources, message IDs)
597
+ 6. Action request events (user interaction)
598
+ 7. Done event
599
+
600
+ Args:
601
+ prompt: User prompt (passed to simulator)
602
+ model: Model name for metadata
603
+ request_id: Optional request ID
604
+ delay_ms: Delay between events in milliseconds
605
+ include_reasoning: Whether to emit reasoning events
606
+ include_progress: Whether to emit progress events
607
+ include_tool_calls: Whether to emit tool call events
608
+ include_actions: Whether to emit action request at end
609
+ include_metadata: Whether to emit metadata event
610
+ message_id: Database ID of the assistant message being streamed
611
+ in_reply_to: Database ID of the user message this responds to
612
+ session_id: Session ID for conversation correlation
613
+
614
+ Yields:
615
+ SSE-formatted strings ready for HTTP response
616
+
617
+ Example:
618
+ ```python
619
+ from starlette.responses import StreamingResponse
620
+
621
+ async def simulator_endpoint():
622
+ return StreamingResponse(
623
+ stream_simulator_response("demo"),
624
+ media_type="text/event-stream"
625
+ )
626
+ ```
627
+ """
628
+ from rem.agentic.agents.sse_simulator import stream_simulator_events
629
+
630
+ # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
631
+ async for sse_string in stream_simulator_events(
632
+ prompt=prompt,
633
+ delay_ms=delay_ms,
634
+ include_reasoning=include_reasoning,
635
+ include_progress=include_progress,
636
+ include_tool_calls=include_tool_calls,
637
+ include_actions=include_actions,
638
+ include_metadata=include_metadata,
639
+ # Pass message correlation IDs
640
+ message_id=message_id,
641
+ in_reply_to=in_reply_to,
642
+ session_id=session_id,
643
+ model=model,
644
+ ):
645
+ yield sse_string
646
+
647
+
648
+ async def stream_minimal_simulator(
649
+ content: str = "Hello from the simulator!",
650
+ delay_ms: int = 30,
651
+ ) -> AsyncGenerator[str, None]:
652
+ """
653
+ Stream minimal simulator output (text + done only).
654
+
655
+ Useful for simple testing without the full event sequence.
656
+
657
+ Args:
658
+ content: Text content to stream
659
+ delay_ms: Delay between chunks
660
+
661
+ Yields:
662
+ SSE-formatted strings
663
+ """
664
+ from rem.agentic.agents.sse_simulator import stream_minimal_demo
665
+
666
+ # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
667
+ async for sse_string in stream_minimal_demo(content=content, delay_ms=delay_ms):
668
+ yield sse_string
669
+
670
+
671
+ async def stream_openai_response_with_save(
672
+ agent: Agent,
673
+ prompt: str,
674
+ model: str,
675
+ request_id: str | None = None,
676
+ agent_schema: str | None = None,
677
+ session_id: str | None = None,
678
+ user_id: str | None = None,
679
+ ) -> AsyncGenerator[str, None]:
680
+ """
681
+ Wrapper around stream_openai_response that saves the assistant response after streaming.
682
+
683
+ This accumulates all text content during streaming and saves it to the database
684
+ after the stream completes.
685
+
686
+ Args:
687
+ agent: Pydantic AI agent instance
688
+ prompt: User prompt
689
+ model: Model name
690
+ request_id: Optional request ID
691
+ agent_schema: Agent schema name
692
+ session_id: Session ID for message storage
693
+ user_id: User ID for message storage
694
+
695
+ Yields:
696
+ SSE-formatted strings
697
+ """
698
+ from ....utils.date_utils import utc_now, to_iso
699
+ from ....services.session import SessionMessageStore
700
+ from ....settings import settings
701
+
702
+ # Accumulate content during streaming
703
+ accumulated_content = []
704
+
705
+ async for chunk in stream_openai_response(
706
+ agent=agent,
707
+ prompt=prompt,
708
+ model=model,
709
+ request_id=request_id,
710
+ agent_schema=agent_schema,
711
+ session_id=session_id,
712
+ ):
713
+ yield chunk
714
+
715
+ # Extract text content from OpenAI-format chunks
716
+ # Format: data: {"choices": [{"delta": {"content": "..."}}]}
717
+ if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
718
+ try:
719
+ data_str = chunk[6:].strip() # Remove "data: " prefix
720
+ if data_str:
721
+ data = json.loads(data_str)
722
+ if "choices" in data and data["choices"]:
723
+ delta = data["choices"][0].get("delta", {})
724
+ content = delta.get("content")
725
+ if content:
726
+ accumulated_content.append(content)
727
+ except (json.JSONDecodeError, KeyError, IndexError):
728
+ pass # Skip non-JSON or malformed chunks
729
+
730
+ # After streaming completes, save the assistant response
731
+ if settings.postgres.enabled and session_id and accumulated_content:
732
+ full_content = "".join(accumulated_content)
733
+ assistant_message = {
734
+ "role": "assistant",
735
+ "content": full_content,
736
+ "timestamp": to_iso(utc_now()),
737
+ }
738
+ try:
739
+ store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
740
+ await store.store_session_messages(
741
+ session_id=session_id,
742
+ messages=[assistant_message],
743
+ user_id=user_id,
744
+ compress=True, # Compress long assistant responses
745
+ )
746
+ logger.debug(f"Saved assistant response to session {session_id} ({len(full_content)} chars)")
747
+ except Exception as e:
748
+ logger.error(f"Failed to save assistant response: {e}", exc_info=True)