remdb 0.2.6__py3-none-any.whl → 0.3.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (82) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +7 -5
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/providers/phoenix.py +32 -43
  9. rem/agentic/providers/pydantic_ai.py +84 -10
  10. rem/api/README.md +238 -1
  11. rem/api/deps.py +255 -0
  12. rem/api/main.py +70 -22
  13. rem/api/mcp_router/server.py +8 -1
  14. rem/api/mcp_router/tools.py +80 -0
  15. rem/api/middleware/tracking.py +172 -0
  16. rem/api/routers/admin.py +277 -0
  17. rem/api/routers/auth.py +124 -0
  18. rem/api/routers/chat/completions.py +123 -14
  19. rem/api/routers/chat/models.py +7 -3
  20. rem/api/routers/chat/sse_events.py +526 -0
  21. rem/api/routers/chat/streaming.py +468 -45
  22. rem/api/routers/dev.py +81 -0
  23. rem/api/routers/feedback.py +455 -0
  24. rem/api/routers/messages.py +473 -0
  25. rem/api/routers/models.py +78 -0
  26. rem/api/routers/shared_sessions.py +406 -0
  27. rem/auth/middleware.py +126 -27
  28. rem/cli/commands/ask.py +15 -11
  29. rem/cli/commands/configure.py +169 -94
  30. rem/cli/commands/db.py +53 -7
  31. rem/cli/commands/experiments.py +278 -96
  32. rem/cli/commands/process.py +8 -7
  33. rem/cli/commands/scaffold.py +47 -0
  34. rem/cli/commands/schema.py +9 -9
  35. rem/cli/main.py +10 -0
  36. rem/config.py +2 -2
  37. rem/models/core/core_model.py +7 -1
  38. rem/models/entities/__init__.py +21 -0
  39. rem/models/entities/domain_resource.py +38 -0
  40. rem/models/entities/feedback.py +123 -0
  41. rem/models/entities/message.py +30 -1
  42. rem/models/entities/session.py +83 -0
  43. rem/models/entities/shared_session.py +206 -0
  44. rem/models/entities/user.py +10 -3
  45. rem/registry.py +367 -0
  46. rem/schemas/agents/rem.yaml +7 -3
  47. rem/services/content/providers.py +94 -140
  48. rem/services/content/service.py +85 -16
  49. rem/services/dreaming/affinity_service.py +2 -16
  50. rem/services/dreaming/moment_service.py +2 -15
  51. rem/services/embeddings/api.py +20 -13
  52. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  53. rem/services/phoenix/client.py +252 -19
  54. rem/services/postgres/README.md +29 -10
  55. rem/services/postgres/repository.py +132 -0
  56. rem/services/postgres/schema_generator.py +86 -5
  57. rem/services/rate_limit.py +113 -0
  58. rem/services/rem/README.md +14 -0
  59. rem/services/session/compression.py +17 -1
  60. rem/services/user_service.py +98 -0
  61. rem/settings.py +115 -17
  62. rem/sql/background_indexes.sql +10 -0
  63. rem/sql/migrations/001_install.sql +152 -2
  64. rem/sql/migrations/002_install_models.sql +580 -231
  65. rem/sql/migrations/003_seed_default_user.sql +48 -0
  66. rem/utils/constants.py +97 -0
  67. rem/utils/date_utils.py +228 -0
  68. rem/utils/embeddings.py +17 -4
  69. rem/utils/files.py +167 -0
  70. rem/utils/mime_types.py +158 -0
  71. rem/utils/model_helpers.py +156 -1
  72. rem/utils/schema_loader.py +273 -14
  73. rem/utils/sql_types.py +3 -1
  74. rem/utils/vision.py +9 -14
  75. rem/workers/README.md +14 -14
  76. rem/workers/db_maintainer.py +74 -0
  77. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/METADATA +486 -132
  78. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/RECORD +80 -57
  79. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/WHEEL +1 -1
  80. rem/sql/002_install_models.sql +0 -1068
  81. rem/sql/install_models.sql +0 -1038
  82. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/entry_points.txt +0 -0
@@ -3,20 +3,29 @@ OpenAI-compatible streaming relay for Pydantic AI agents.
3
3
 
4
4
  Design Pattern:
5
5
  - Uses Pydantic AI's agent.iter() to capture full execution including tool calls
6
- - Streams tool call events with [Calling: tool_name] markers
7
- - Streams text content deltas as they arrive
6
+ - Emits rich SSE events: reasoning, tool_call, progress, metadata, text_delta
8
7
  - Proper OpenAI SSE format with data: prefix and [DONE] terminator
9
8
  - Error handling with graceful degradation
10
9
 
11
- Key Insight
10
+ Key Insight
12
11
  - agent.run_stream() stops after first output, missing tool calls
13
12
  - agent.iter() provides complete execution with tool call visibility
14
- - Use PartStartEvent to detect tool calls
15
- - Use PartDeltaEvent with TextPartDelta for content streaming
13
+ - Use PartStartEvent to detect tool calls and thinking parts
14
+ - Use PartDeltaEvent with TextPartDelta/ThinkingPartDelta for streaming
15
+ - Use PartEndEvent to detect tool completion
16
+ - Use FunctionToolResultEvent to get tool results
16
17
 
17
- SSE Format:
18
+ SSE Format (OpenAI-compatible):
18
19
  data: {"id": "chatcmpl-...", "choices": [{"delta": {"content": "..."}}]}\\n\\n
19
20
  data: [DONE]\\n\\n
21
+
22
+ Extended SSE Format (Custom Events):
23
+ event: reasoning\\ndata: {"type": "reasoning", "content": "..."}\\n\\n
24
+ event: tool_call\\ndata: {"type": "tool_call", "tool_name": "...", "status": "started"}\\n\\n
25
+ event: progress\\ndata: {"type": "progress", "step": 1, "total_steps": 3}\\n\\n
26
+ event: metadata\\ndata: {"type": "metadata", "confidence": 0.95}\\n\\n
27
+
28
+ See sse_events.py for the full event type definitions.
20
29
  """
21
30
 
22
31
  import json
@@ -27,9 +36,14 @@ from typing import AsyncGenerator
27
36
  from loguru import logger
28
37
  from pydantic_ai.agent import Agent
29
38
  from pydantic_ai.messages import (
39
+ FunctionToolResultEvent,
30
40
  PartDeltaEvent,
41
+ PartEndEvent,
31
42
  PartStartEvent,
43
+ TextPart,
32
44
  TextPartDelta,
45
+ ThinkingPart,
46
+ ThinkingPartDelta,
33
47
  ToolCallPart,
34
48
  )
35
49
 
@@ -38,6 +52,14 @@ from .models import (
38
52
  ChatCompletionStreamChoice,
39
53
  ChatCompletionStreamResponse,
40
54
  )
55
+ from .sse_events import (
56
+ DoneEvent,
57
+ MetadataEvent,
58
+ ProgressEvent,
59
+ ReasoningEvent,
60
+ ToolCallEvent,
61
+ format_sse_event,
62
+ )
41
63
 
42
64
 
43
65
  async def stream_openai_response(
@@ -45,47 +67,92 @@ async def stream_openai_response(
45
67
  prompt: str,
46
68
  model: str,
47
69
  request_id: str | None = None,
70
+ # Message correlation IDs for metadata
71
+ message_id: str | None = None,
72
+ in_reply_to: str | None = None,
73
+ session_id: str | None = None,
48
74
  ) -> AsyncGenerator[str, None]:
49
75
  """
50
- Stream Pydantic AI agent responses in OpenAI SSE format with tool call events.
76
+ Stream Pydantic AI agent responses with rich SSE events.
77
+
78
+ Emits all SSE event types matching the simulator:
79
+ - reasoning: Model thinking/chain-of-thought (from ThinkingPart)
80
+ - tool_call: Tool invocation start/complete (from ToolCallPart, FunctionToolResultEvent)
81
+ - progress: Step indicators for multi-step execution
82
+ - text_delta: Streamed content (OpenAI-compatible format)
83
+ - metadata: Message IDs, model info, performance metrics
84
+ - done: Stream completion
51
85
 
52
86
  Design Pattern:
53
87
  1. Use agent.iter() for complete execution (not run_stream())
54
88
  2. Iterate over nodes to capture model requests and tool executions
55
- 3. Stream tool call start events as [Calling: tool_name]
56
- 4. Stream text content deltas as they arrive
57
- 5. Send final chunk with finish_reason="stop"
58
- 6. Send OpenAI termination marker [DONE]
89
+ 3. Emit rich SSE events for reasoning, tools, progress
90
+ 4. Stream text content in OpenAI-compatible format
91
+ 5. Send metadata and done events at completion
59
92
 
60
93
  Args:
61
94
  agent: Pydantic AI agent instance
62
95
  prompt: User prompt to run
63
96
  model: Model name for response metadata
64
97
  request_id: Optional request ID (generates UUID if not provided)
98
+ message_id: Database ID of the assistant message being streamed
99
+ in_reply_to: Database ID of the user message this responds to
100
+ session_id: Session ID for conversation correlation
65
101
 
66
102
  Yields:
67
- SSE-formatted strings: "data: {json}\\n\\n"
103
+ SSE-formatted strings
68
104
 
69
105
  Example Stream:
70
- data: {"id": "chatcmpl-123", "choices": [{"delta": {"role": "assistant", "content": ""}}]}
106
+ event: progress
107
+ data: {"type": "progress", "step": 1, "total_steps": 3, "label": "Processing", "status": "in_progress"}
108
+
109
+ event: reasoning
110
+ data: {"type": "reasoning", "content": "Analyzing the request..."}
111
+
112
+ event: tool_call
113
+ data: {"type": "tool_call", "tool_name": "search", "status": "started", "arguments": {...}}
71
114
 
72
- data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "[Calling: search]"}}]}
115
+ event: tool_call
116
+ data: {"type": "tool_call", "tool_name": "search", "status": "completed", "result": "..."}
73
117
 
74
118
  data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 results..."}}]}
75
119
 
76
- data: {"id": "chatcmpl-123", "choices": [{"delta": {}, "finish_reason": "stop"}]}
120
+ event: metadata
121
+ data: {"type": "metadata", "message_id": "...", "latency_ms": 1234}
77
122
 
78
- data: [DONE]
123
+ event: done
124
+ data: {"type": "done", "reason": "stop"}
79
125
  """
80
126
  if request_id is None:
81
127
  request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
82
128
 
83
129
  created_at = int(time.time())
130
+ start_time = time.time()
84
131
  is_first_chunk = True
132
+ reasoning_step = 0
133
+ current_step = 0
134
+ total_steps = 3 # Model request, tool execution (optional), final response
135
+ token_count = 0
136
+
137
+ # Track active tool calls for completion events
138
+ # Maps index -> (tool_name, tool_id) for correlating start/end events
139
+ active_tool_calls: dict[int, tuple[str, str]] = {}
140
+ # Queue of tool calls awaiting completion (FIFO for matching)
141
+ pending_tool_completions: list[tuple[str, str]] = []
142
+ # Track if metadata was registered via register_metadata tool
143
+ metadata_registered = False
85
144
 
86
145
  try:
146
+ # Emit initial progress event
147
+ current_step = 1
148
+ yield format_sse_event(ProgressEvent(
149
+ step=current_step,
150
+ total_steps=total_steps,
151
+ label="Processing request",
152
+ status="in_progress"
153
+ ))
154
+
87
155
  # Use agent.iter() to get complete execution with tool calls
88
- # run_stream() stops after first output, missing tool calls
89
156
  async with agent.iter(prompt) as agent_run:
90
157
  async for node in agent_run:
91
158
  # Check if this is a model request node (includes tool calls)
@@ -93,34 +160,153 @@ async def stream_openai_response(
93
160
  # Stream events from model request
94
161
  async with node.stream(agent_run.ctx) as request_stream:
95
162
  async for event in request_stream:
96
- # Tool call start event
163
+ # ============================================
164
+ # REASONING EVENTS (ThinkingPart)
165
+ # ============================================
97
166
  if isinstance(event, PartStartEvent) and isinstance(
167
+ event.part, ThinkingPart
168
+ ):
169
+ reasoning_step += 1
170
+ if event.part.content:
171
+ yield format_sse_event(ReasoningEvent(
172
+ content=event.part.content,
173
+ step=reasoning_step
174
+ ))
175
+
176
+ # Reasoning delta (streaming thinking)
177
+ elif isinstance(event, PartDeltaEvent) and isinstance(
178
+ event.delta, ThinkingPartDelta
179
+ ):
180
+ if event.delta.content_delta:
181
+ yield format_sse_event(ReasoningEvent(
182
+ content=event.delta.content_delta,
183
+ step=reasoning_step
184
+ ))
185
+
186
+ # ============================================
187
+ # TEXT CONTENT START (initial text chunk)
188
+ # ============================================
189
+ elif isinstance(event, PartStartEvent) and isinstance(
190
+ event.part, TextPart
191
+ ):
192
+ # TextPart may contain initial content that needs to be emitted
193
+ if event.part.content:
194
+ content = event.part.content
195
+ token_count += len(content.split())
196
+
197
+ content_chunk = ChatCompletionStreamResponse(
198
+ id=request_id,
199
+ created=created_at,
200
+ model=model,
201
+ choices=[
202
+ ChatCompletionStreamChoice(
203
+ index=0,
204
+ delta=ChatCompletionMessageDelta(
205
+ role="assistant" if is_first_chunk else None,
206
+ content=content,
207
+ ),
208
+ finish_reason=None,
209
+ )
210
+ ],
211
+ )
212
+ is_first_chunk = False
213
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
214
+
215
+ # ============================================
216
+ # TOOL CALL START EVENTS
217
+ # ============================================
218
+ elif isinstance(event, PartStartEvent) and isinstance(
98
219
  event.part, ToolCallPart
99
220
  ):
100
- logger.info(f"🔧 {event.part.tool_name}")
221
+ tool_name = event.part.tool_name
101
222
 
102
- tool_call_chunk = ChatCompletionStreamResponse(
103
- id=request_id,
104
- created=created_at,
105
- model=model,
106
- choices=[
107
- ChatCompletionStreamChoice(
108
- index=0,
109
- delta=ChatCompletionMessageDelta(
110
- role="assistant" if is_first_chunk else None,
111
- content=f"[Calling: {event.part.tool_name}]",
112
- ),
113
- finish_reason=None,
223
+ # Handle final_result specially - it's Pydantic AI's
224
+ # internal tool for structured output
225
+ if tool_name == "final_result":
226
+ # Extract the structured result and emit as content
227
+ args_dict = None
228
+ if event.part.args is not None:
229
+ if hasattr(event.part.args, 'args_dict'):
230
+ args_dict = event.part.args.args_dict
231
+ elif isinstance(event.part.args, dict):
232
+ args_dict = event.part.args
233
+
234
+ if args_dict:
235
+ # Emit the structured result as JSON content
236
+ result_json = json.dumps(args_dict, indent=2)
237
+ content_chunk = ChatCompletionStreamResponse(
238
+ id=request_id,
239
+ created=created_at,
240
+ model=model,
241
+ choices=[
242
+ ChatCompletionStreamChoice(
243
+ index=0,
244
+ delta=ChatCompletionMessageDelta(
245
+ role="assistant" if is_first_chunk else None,
246
+ content=result_json,
247
+ ),
248
+ finish_reason=None,
249
+ )
250
+ ],
114
251
  )
115
- ],
116
- )
117
- is_first_chunk = False
118
- yield f"data: {tool_call_chunk.model_dump_json()}\n\n"
252
+ is_first_chunk = False
253
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
254
+ continue # Skip regular tool call handling
255
+
256
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
257
+ active_tool_calls[event.index] = (tool_name, tool_id)
258
+ # Queue for completion matching (FIFO)
259
+ pending_tool_completions.append((tool_name, tool_id))
260
+
261
+ logger.info(f"🔧 {tool_name}")
119
262
 
120
- # Text content delta
263
+ # Emit tool_call SSE event (started)
264
+ # Try to get arguments as dict
265
+ args_dict = None
266
+ if event.part.args is not None:
267
+ if hasattr(event.part.args, 'args_dict'):
268
+ args_dict = event.part.args.args_dict
269
+ elif isinstance(event.part.args, dict):
270
+ args_dict = event.part.args
271
+
272
+ yield format_sse_event(ToolCallEvent(
273
+ tool_name=tool_name,
274
+ tool_id=tool_id,
275
+ status="started",
276
+ arguments=args_dict
277
+ ))
278
+
279
+ # Update progress
280
+ current_step = 2
281
+ total_steps = 4 # Added tool execution step
282
+ yield format_sse_event(ProgressEvent(
283
+ step=current_step,
284
+ total_steps=total_steps,
285
+ label=f"Calling {tool_name}",
286
+ status="in_progress"
287
+ ))
288
+
289
+ # ============================================
290
+ # TOOL CALL COMPLETION (PartEndEvent)
291
+ # ============================================
292
+ elif isinstance(event, PartEndEvent) and isinstance(
293
+ event.part, ToolCallPart
294
+ ):
295
+ if event.index in active_tool_calls:
296
+ tool_name, tool_id = active_tool_calls[event.index]
297
+ # Note: result comes from FunctionToolResultEvent below
298
+ # For now, mark as completed without result
299
+ del active_tool_calls[event.index]
300
+
301
+ # ============================================
302
+ # TEXT CONTENT DELTA
303
+ # ============================================
121
304
  elif isinstance(event, PartDeltaEvent) and isinstance(
122
305
  event.delta, TextPartDelta
123
306
  ):
307
+ content = event.delta.content_delta
308
+ token_count += len(content.split()) # Rough token estimate
309
+
124
310
  content_chunk = ChatCompletionStreamResponse(
125
311
  id=request_id,
126
312
  created=created_at,
@@ -130,7 +316,7 @@ async def stream_openai_response(
130
316
  index=0,
131
317
  delta=ChatCompletionMessageDelta(
132
318
  role="assistant" if is_first_chunk else None,
133
- content=event.delta.content_delta,
319
+ content=content,
134
320
  ),
135
321
  finish_reason=None,
136
322
  )
@@ -139,16 +325,125 @@ async def stream_openai_response(
139
325
  is_first_chunk = False
140
326
  yield f"data: {content_chunk.model_dump_json()}\n\n"
141
327
 
142
- # Check if this is a tool execution node
328
+ # ============================================
329
+ # TOOL EXECUTION NODE
330
+ # ============================================
143
331
  elif Agent.is_call_tools_node(node):
144
- # Stream tool execution - tools complete here
145
332
  async with node.stream(agent_run.ctx) as tools_stream:
146
- async for event in tools_stream:
147
- # We can log tool completion here if needed
148
- # For now, we already logged the call start above
149
- pass
333
+ async for tool_event in tools_stream:
334
+ # Tool result event - emit completion
335
+ if isinstance(tool_event, FunctionToolResultEvent):
336
+ # Get the tool name/id from the pending queue (FIFO)
337
+ if pending_tool_completions:
338
+ tool_name, tool_id = pending_tool_completions.pop(0)
339
+ else:
340
+ # Fallback if queue is empty (shouldn't happen)
341
+ tool_name = "tool"
342
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
343
+
344
+ # Check if this is a register_metadata tool result
345
+ # It returns a dict with _metadata_event: True marker
346
+ result_content = tool_event.result.content if hasattr(tool_event.result, 'content') else tool_event.result
347
+ is_metadata_event = False
348
+
349
+ if isinstance(result_content, dict) and result_content.get("_metadata_event"):
350
+ is_metadata_event = True
351
+ metadata_registered = True # Skip default metadata at end
352
+ # Emit MetadataEvent with registered values
353
+ registered_confidence = result_content.get("confidence")
354
+ registered_sources = result_content.get("sources")
355
+ registered_references = result_content.get("references")
356
+ registered_flags = result_content.get("flags")
357
+
358
+ logger.info(
359
+ f"📊 Metadata registered: confidence={registered_confidence}, "
360
+ f"sources={registered_sources}"
361
+ )
150
362
 
151
- # Final chunk with finish_reason
363
+ # Emit metadata event immediately
364
+ yield format_sse_event(MetadataEvent(
365
+ message_id=message_id,
366
+ in_reply_to=in_reply_to,
367
+ session_id=session_id,
368
+ confidence=registered_confidence,
369
+ sources=registered_sources,
370
+ model_version=model,
371
+ flags=registered_flags,
372
+ hidden=False,
373
+ ))
374
+
375
+ if not is_metadata_event:
376
+ # Normal tool completion - emit ToolCallEvent
377
+ result_str = str(result_content)
378
+ result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
379
+
380
+ yield format_sse_event(ToolCallEvent(
381
+ tool_name=tool_name,
382
+ tool_id=tool_id,
383
+ status="completed",
384
+ result=result_summary
385
+ ))
386
+
387
+ # Update progress after tool completion
388
+ current_step = 3
389
+ yield format_sse_event(ProgressEvent(
390
+ step=current_step,
391
+ total_steps=total_steps,
392
+ label="Generating response",
393
+ status="in_progress"
394
+ ))
395
+
396
+ # After iteration completes, check for structured result
397
+ # This handles agents with result_type (structured output)
398
+ # Skip for plain text output - already streamed via TextPartDelta
399
+ try:
400
+ result = agent_run.result
401
+ if result is not None and hasattr(result, 'output'):
402
+ output = result.output
403
+
404
+ # Skip plain string output - already streamed via TextPartDelta
405
+ # Non-structured output agents (structured_output: false) return strings
406
+ if isinstance(output, str):
407
+ logger.debug("Plain text output already streamed via TextPartDelta, skipping final emission")
408
+ else:
409
+ # Serialize the structured output (Pydantic models)
410
+ if hasattr(output, 'model_dump'):
411
+ # Pydantic model
412
+ result_dict = output.model_dump()
413
+ elif hasattr(output, '__dict__'):
414
+ result_dict = output.__dict__
415
+ else:
416
+ # Fallback for unknown types
417
+ result_dict = {"result": str(output)}
418
+
419
+ result_json = json.dumps(result_dict, indent=2, default=str)
420
+ token_count += len(result_json.split())
421
+
422
+ # Emit structured result as content
423
+ result_chunk = ChatCompletionStreamResponse(
424
+ id=request_id,
425
+ created=created_at,
426
+ model=model,
427
+ choices=[
428
+ ChatCompletionStreamChoice(
429
+ index=0,
430
+ delta=ChatCompletionMessageDelta(
431
+ role="assistant" if is_first_chunk else None,
432
+ content=result_json,
433
+ ),
434
+ finish_reason=None,
435
+ )
436
+ ],
437
+ )
438
+ is_first_chunk = False
439
+ yield f"data: {result_chunk.model_dump_json()}\n\n"
440
+ except Exception as e:
441
+ logger.debug(f"No structured result available: {e}")
442
+
443
+ # Calculate latency
444
+ latency_ms = int((time.time() - start_time) * 1000)
445
+
446
+ # Final OpenAI chunk with finish_reason
152
447
  final_chunk = ChatCompletionStreamResponse(
153
448
  id=request_id,
154
449
  created=created_at,
@@ -163,7 +458,31 @@ async def stream_openai_response(
163
458
  )
164
459
  yield f"data: {final_chunk.model_dump_json()}\n\n"
165
460
 
166
- # OpenAI termination marker
461
+ # Emit metadata event only if not already registered via register_metadata tool
462
+ if not metadata_registered:
463
+ yield format_sse_event(MetadataEvent(
464
+ message_id=message_id,
465
+ in_reply_to=in_reply_to,
466
+ session_id=session_id,
467
+ confidence=1.0, # Default to 100% confidence
468
+ model_version=model,
469
+ latency_ms=latency_ms,
470
+ token_count=token_count,
471
+ ))
472
+
473
+ # Mark all progress complete
474
+ for step in range(1, total_steps + 1):
475
+ yield format_sse_event(ProgressEvent(
476
+ step=step,
477
+ total_steps=total_steps,
478
+ label="Complete" if step == total_steps else f"Step {step}",
479
+ status="completed"
480
+ ))
481
+
482
+ # Emit done event
483
+ yield format_sse_event(DoneEvent(reason="stop"))
484
+
485
+ # OpenAI termination marker (for compatibility)
167
486
  yield "data: [DONE]\n\n"
168
487
 
169
488
  except Exception as e:
@@ -182,4 +501,108 @@ async def stream_openai_response(
182
501
  }
183
502
  }
184
503
  yield f"data: {json.dumps(error_data)}\n\n"
504
+
505
+ # Emit done event with error reason
506
+ yield format_sse_event(DoneEvent(reason="error"))
185
507
  yield "data: [DONE]\n\n"
508
+
509
+
510
+ async def stream_simulator_response(
511
+ prompt: str,
512
+ model: str = "simulator-v1.0.0",
513
+ request_id: str | None = None,
514
+ delay_ms: int = 50,
515
+ include_reasoning: bool = True,
516
+ include_progress: bool = True,
517
+ include_tool_calls: bool = True,
518
+ include_actions: bool = True,
519
+ include_metadata: bool = True,
520
+ # Message correlation IDs
521
+ message_id: str | None = None,
522
+ in_reply_to: str | None = None,
523
+ session_id: str | None = None,
524
+ ) -> AsyncGenerator[str, None]:
525
+ """
526
+ Stream SSE simulator events for testing and demonstration.
527
+
528
+ This function wraps the SSE simulator to produce formatted SSE strings
529
+ ready for HTTP streaming. No LLM calls are made.
530
+
531
+ The simulator produces a rich sequence of events:
532
+ 1. Reasoning events (model thinking)
533
+ 2. Progress events (step indicators)
534
+ 3. Tool call events (simulated tool usage)
535
+ 4. Text delta events (streamed content)
536
+ 5. Metadata events (confidence, sources, message IDs)
537
+ 6. Action request events (user interaction)
538
+ 7. Done event
539
+
540
+ Args:
541
+ prompt: User prompt (passed to simulator)
542
+ model: Model name for metadata
543
+ request_id: Optional request ID
544
+ delay_ms: Delay between events in milliseconds
545
+ include_reasoning: Whether to emit reasoning events
546
+ include_progress: Whether to emit progress events
547
+ include_tool_calls: Whether to emit tool call events
548
+ include_actions: Whether to emit action request at end
549
+ include_metadata: Whether to emit metadata event
550
+ message_id: Database ID of the assistant message being streamed
551
+ in_reply_to: Database ID of the user message this responds to
552
+ session_id: Session ID for conversation correlation
553
+
554
+ Yields:
555
+ SSE-formatted strings ready for HTTP response
556
+
557
+ Example:
558
+ ```python
559
+ from starlette.responses import StreamingResponse
560
+
561
+ async def simulator_endpoint():
562
+ return StreamingResponse(
563
+ stream_simulator_response("demo"),
564
+ media_type="text/event-stream"
565
+ )
566
+ ```
567
+ """
568
+ from rem.agentic.agents.sse_simulator import stream_simulator_events
569
+
570
+ # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
571
+ async for sse_string in stream_simulator_events(
572
+ prompt=prompt,
573
+ delay_ms=delay_ms,
574
+ include_reasoning=include_reasoning,
575
+ include_progress=include_progress,
576
+ include_tool_calls=include_tool_calls,
577
+ include_actions=include_actions,
578
+ include_metadata=include_metadata,
579
+ # Pass message correlation IDs
580
+ message_id=message_id,
581
+ in_reply_to=in_reply_to,
582
+ session_id=session_id,
583
+ model=model,
584
+ ):
585
+ yield sse_string
586
+
587
+
588
+ async def stream_minimal_simulator(
589
+ content: str = "Hello from the simulator!",
590
+ delay_ms: int = 30,
591
+ ) -> AsyncGenerator[str, None]:
592
+ """
593
+ Stream minimal simulator output (text + done only).
594
+
595
+ Useful for simple testing without the full event sequence.
596
+
597
+ Args:
598
+ content: Text content to stream
599
+ delay_ms: Delay between chunks
600
+
601
+ Yields:
602
+ SSE-formatted strings
603
+ """
604
+ from rem.agentic.agents.sse_simulator import stream_minimal_demo
605
+
606
+ # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
607
+ async for sse_string in stream_minimal_demo(content=content, delay_ms=delay_ms):
608
+ yield sse_string