remdb 0.3.0__py3-none-any.whl → 0.3.127__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -25
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +168 -24
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +154 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +465 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +642 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/ask.py +13 -10
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +5 -6
- rem/cli/commands/db.py +396 -139
- rem/cli/commands/experiments.py +293 -73
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +29 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +92 -20
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +302 -28
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +291 -9
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +24 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +313 -29
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +282 -35
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/METADATA +464 -289
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/RECORD +104 -73
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/entry_points.txt +0 -0
|
@@ -3,20 +3,29 @@ OpenAI-compatible streaming relay for Pydantic AI agents.
|
|
|
3
3
|
|
|
4
4
|
Design Pattern:
|
|
5
5
|
- Uses Pydantic AI's agent.iter() to capture full execution including tool calls
|
|
6
|
-
-
|
|
7
|
-
- Streams text content deltas as they arrive
|
|
6
|
+
- Emits rich SSE events: reasoning, tool_call, progress, metadata, text_delta
|
|
8
7
|
- Proper OpenAI SSE format with data: prefix and [DONE] terminator
|
|
9
8
|
- Error handling with graceful degradation
|
|
10
9
|
|
|
11
|
-
Key Insight
|
|
10
|
+
Key Insight
|
|
12
11
|
- agent.run_stream() stops after first output, missing tool calls
|
|
13
12
|
- agent.iter() provides complete execution with tool call visibility
|
|
14
|
-
- Use PartStartEvent to detect tool calls
|
|
15
|
-
- Use PartDeltaEvent with TextPartDelta for
|
|
13
|
+
- Use PartStartEvent to detect tool calls and thinking parts
|
|
14
|
+
- Use PartDeltaEvent with TextPartDelta/ThinkingPartDelta for streaming
|
|
15
|
+
- Use PartEndEvent to detect tool completion
|
|
16
|
+
- Use FunctionToolResultEvent to get tool results
|
|
16
17
|
|
|
17
|
-
SSE Format:
|
|
18
|
+
SSE Format (OpenAI-compatible):
|
|
18
19
|
data: {"id": "chatcmpl-...", "choices": [{"delta": {"content": "..."}}]}\\n\\n
|
|
19
20
|
data: [DONE]\\n\\n
|
|
21
|
+
|
|
22
|
+
Extended SSE Format (Custom Events):
|
|
23
|
+
event: reasoning\\ndata: {"type": "reasoning", "content": "..."}\\n\\n
|
|
24
|
+
event: tool_call\\ndata: {"type": "tool_call", "tool_name": "...", "status": "started"}\\n\\n
|
|
25
|
+
event: progress\\ndata: {"type": "progress", "step": 1, "total_steps": 3}\\n\\n
|
|
26
|
+
event: metadata\\ndata: {"type": "metadata", "confidence": 0.95}\\n\\n
|
|
27
|
+
|
|
28
|
+
See sse_events.py for the full event type definitions.
|
|
20
29
|
"""
|
|
21
30
|
|
|
22
31
|
import json
|
|
@@ -27,17 +36,31 @@ from typing import AsyncGenerator
|
|
|
27
36
|
from loguru import logger
|
|
28
37
|
from pydantic_ai.agent import Agent
|
|
29
38
|
from pydantic_ai.messages import (
|
|
39
|
+
FunctionToolResultEvent,
|
|
30
40
|
PartDeltaEvent,
|
|
41
|
+
PartEndEvent,
|
|
31
42
|
PartStartEvent,
|
|
43
|
+
TextPart,
|
|
32
44
|
TextPartDelta,
|
|
45
|
+
ThinkingPart,
|
|
46
|
+
ThinkingPartDelta,
|
|
33
47
|
ToolCallPart,
|
|
34
48
|
)
|
|
35
49
|
|
|
50
|
+
from .otel_utils import get_current_trace_context, get_tracer
|
|
36
51
|
from .models import (
|
|
37
52
|
ChatCompletionMessageDelta,
|
|
38
53
|
ChatCompletionStreamChoice,
|
|
39
54
|
ChatCompletionStreamResponse,
|
|
40
55
|
)
|
|
56
|
+
from .sse_events import (
|
|
57
|
+
DoneEvent,
|
|
58
|
+
MetadataEvent,
|
|
59
|
+
ProgressEvent,
|
|
60
|
+
ReasoningEvent,
|
|
61
|
+
ToolCallEvent,
|
|
62
|
+
format_sse_event,
|
|
63
|
+
)
|
|
41
64
|
|
|
42
65
|
|
|
43
66
|
async def stream_openai_response(
|
|
@@ -45,82 +68,268 @@ async def stream_openai_response(
|
|
|
45
68
|
prompt: str,
|
|
46
69
|
model: str,
|
|
47
70
|
request_id: str | None = None,
|
|
71
|
+
# Message correlation IDs for metadata
|
|
72
|
+
message_id: str | None = None,
|
|
73
|
+
in_reply_to: str | None = None,
|
|
74
|
+
session_id: str | None = None,
|
|
75
|
+
# Agent info for metadata
|
|
76
|
+
agent_schema: str | None = None,
|
|
77
|
+
# Mutable container to capture trace context (deterministic, not AI-dependent)
|
|
78
|
+
trace_context_out: dict | None = None,
|
|
48
79
|
) -> AsyncGenerator[str, None]:
|
|
49
80
|
"""
|
|
50
|
-
Stream Pydantic AI agent responses
|
|
81
|
+
Stream Pydantic AI agent responses with rich SSE events.
|
|
82
|
+
|
|
83
|
+
Emits all SSE event types matching the simulator:
|
|
84
|
+
- reasoning: Model thinking/chain-of-thought (from ThinkingPart)
|
|
85
|
+
- tool_call: Tool invocation start/complete (from ToolCallPart, FunctionToolResultEvent)
|
|
86
|
+
- progress: Step indicators for multi-step execution
|
|
87
|
+
- text_delta: Streamed content (OpenAI-compatible format)
|
|
88
|
+
- metadata: Message IDs, model info, performance metrics
|
|
89
|
+
- done: Stream completion
|
|
51
90
|
|
|
52
91
|
Design Pattern:
|
|
53
92
|
1. Use agent.iter() for complete execution (not run_stream())
|
|
54
93
|
2. Iterate over nodes to capture model requests and tool executions
|
|
55
|
-
3.
|
|
56
|
-
4. Stream text content
|
|
57
|
-
5. Send
|
|
58
|
-
6. Send OpenAI termination marker [DONE]
|
|
94
|
+
3. Emit rich SSE events for reasoning, tools, progress
|
|
95
|
+
4. Stream text content in OpenAI-compatible format
|
|
96
|
+
5. Send metadata and done events at completion
|
|
59
97
|
|
|
60
98
|
Args:
|
|
61
99
|
agent: Pydantic AI agent instance
|
|
62
100
|
prompt: User prompt to run
|
|
63
101
|
model: Model name for response metadata
|
|
64
102
|
request_id: Optional request ID (generates UUID if not provided)
|
|
103
|
+
message_id: Database ID of the assistant message being streamed
|
|
104
|
+
in_reply_to: Database ID of the user message this responds to
|
|
105
|
+
session_id: Session ID for conversation correlation
|
|
65
106
|
|
|
66
107
|
Yields:
|
|
67
|
-
SSE-formatted strings
|
|
108
|
+
SSE-formatted strings
|
|
68
109
|
|
|
69
110
|
Example Stream:
|
|
70
|
-
|
|
111
|
+
event: progress
|
|
112
|
+
data: {"type": "progress", "step": 1, "total_steps": 3, "label": "Processing", "status": "in_progress"}
|
|
71
113
|
|
|
72
|
-
|
|
114
|
+
event: reasoning
|
|
115
|
+
data: {"type": "reasoning", "content": "Analyzing the request..."}
|
|
116
|
+
|
|
117
|
+
event: tool_call
|
|
118
|
+
data: {"type": "tool_call", "tool_name": "search", "status": "started", "arguments": {...}}
|
|
119
|
+
|
|
120
|
+
event: tool_call
|
|
121
|
+
data: {"type": "tool_call", "tool_name": "search", "status": "completed", "result": "..."}
|
|
73
122
|
|
|
74
123
|
data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 results..."}}]}
|
|
75
124
|
|
|
76
|
-
|
|
125
|
+
event: metadata
|
|
126
|
+
data: {"type": "metadata", "message_id": "...", "latency_ms": 1234}
|
|
77
127
|
|
|
78
|
-
|
|
128
|
+
event: done
|
|
129
|
+
data: {"type": "done", "reason": "stop"}
|
|
79
130
|
"""
|
|
80
131
|
if request_id is None:
|
|
81
132
|
request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
|
82
133
|
|
|
83
134
|
created_at = int(time.time())
|
|
135
|
+
start_time = time.time()
|
|
84
136
|
is_first_chunk = True
|
|
137
|
+
reasoning_step = 0
|
|
138
|
+
current_step = 0
|
|
139
|
+
total_steps = 3 # Model request, tool execution (optional), final response
|
|
140
|
+
token_count = 0
|
|
141
|
+
|
|
142
|
+
# Track active tool calls for completion events
|
|
143
|
+
# Maps index -> (tool_name, tool_id) for correlating start/end events
|
|
144
|
+
active_tool_calls: dict[int, tuple[str, str]] = {}
|
|
145
|
+
# Queue of tool calls awaiting completion (FIFO for matching)
|
|
146
|
+
pending_tool_completions: list[tuple[str, str]] = []
|
|
147
|
+
# Track if metadata was registered via register_metadata tool
|
|
148
|
+
metadata_registered = False
|
|
85
149
|
|
|
86
150
|
try:
|
|
151
|
+
# Emit initial progress event
|
|
152
|
+
current_step = 1
|
|
153
|
+
yield format_sse_event(ProgressEvent(
|
|
154
|
+
step=current_step,
|
|
155
|
+
total_steps=total_steps,
|
|
156
|
+
label="Processing request",
|
|
157
|
+
status="in_progress"
|
|
158
|
+
))
|
|
159
|
+
|
|
87
160
|
# Use agent.iter() to get complete execution with tool calls
|
|
88
|
-
# run_stream() stops after first output, missing tool calls
|
|
89
161
|
async with agent.iter(prompt) as agent_run:
|
|
162
|
+
# Capture trace context IMMEDIATELY inside agent execution
|
|
163
|
+
# This is deterministic - it's the OTEL context from Pydantic AI instrumentation
|
|
164
|
+
# NOT dependent on any AI-generated content
|
|
165
|
+
captured_trace_id, captured_span_id = get_current_trace_context()
|
|
166
|
+
if trace_context_out is not None:
|
|
167
|
+
trace_context_out["trace_id"] = captured_trace_id
|
|
168
|
+
trace_context_out["span_id"] = captured_span_id
|
|
169
|
+
|
|
90
170
|
async for node in agent_run:
|
|
91
171
|
# Check if this is a model request node (includes tool calls)
|
|
92
172
|
if Agent.is_model_request_node(node):
|
|
93
173
|
# Stream events from model request
|
|
94
174
|
async with node.stream(agent_run.ctx) as request_stream:
|
|
95
175
|
async for event in request_stream:
|
|
96
|
-
#
|
|
176
|
+
# ============================================
|
|
177
|
+
# REASONING EVENTS (ThinkingPart)
|
|
178
|
+
# ============================================
|
|
97
179
|
if isinstance(event, PartStartEvent) and isinstance(
|
|
180
|
+
event.part, ThinkingPart
|
|
181
|
+
):
|
|
182
|
+
reasoning_step += 1
|
|
183
|
+
if event.part.content:
|
|
184
|
+
yield format_sse_event(ReasoningEvent(
|
|
185
|
+
content=event.part.content,
|
|
186
|
+
step=reasoning_step
|
|
187
|
+
))
|
|
188
|
+
|
|
189
|
+
# Reasoning delta (streaming thinking)
|
|
190
|
+
elif isinstance(event, PartDeltaEvent) and isinstance(
|
|
191
|
+
event.delta, ThinkingPartDelta
|
|
192
|
+
):
|
|
193
|
+
if event.delta.content_delta:
|
|
194
|
+
yield format_sse_event(ReasoningEvent(
|
|
195
|
+
content=event.delta.content_delta,
|
|
196
|
+
step=reasoning_step
|
|
197
|
+
))
|
|
198
|
+
|
|
199
|
+
# ============================================
|
|
200
|
+
# TEXT CONTENT START (initial text chunk)
|
|
201
|
+
# ============================================
|
|
202
|
+
elif isinstance(event, PartStartEvent) and isinstance(
|
|
203
|
+
event.part, TextPart
|
|
204
|
+
):
|
|
205
|
+
# TextPart may contain initial content that needs to be emitted
|
|
206
|
+
if event.part.content:
|
|
207
|
+
content = event.part.content
|
|
208
|
+
token_count += len(content.split())
|
|
209
|
+
|
|
210
|
+
content_chunk = ChatCompletionStreamResponse(
|
|
211
|
+
id=request_id,
|
|
212
|
+
created=created_at,
|
|
213
|
+
model=model,
|
|
214
|
+
choices=[
|
|
215
|
+
ChatCompletionStreamChoice(
|
|
216
|
+
index=0,
|
|
217
|
+
delta=ChatCompletionMessageDelta(
|
|
218
|
+
role="assistant" if is_first_chunk else None,
|
|
219
|
+
content=content,
|
|
220
|
+
),
|
|
221
|
+
finish_reason=None,
|
|
222
|
+
)
|
|
223
|
+
],
|
|
224
|
+
)
|
|
225
|
+
is_first_chunk = False
|
|
226
|
+
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
227
|
+
|
|
228
|
+
# ============================================
|
|
229
|
+
# TOOL CALL START EVENTS
|
|
230
|
+
# ============================================
|
|
231
|
+
elif isinstance(event, PartStartEvent) and isinstance(
|
|
98
232
|
event.part, ToolCallPart
|
|
99
233
|
):
|
|
100
|
-
|
|
234
|
+
tool_name = event.part.tool_name
|
|
101
235
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
236
|
+
# Handle final_result specially - it's Pydantic AI's
|
|
237
|
+
# internal tool for structured output
|
|
238
|
+
if tool_name == "final_result":
|
|
239
|
+
# Extract the structured result and emit as content
|
|
240
|
+
args_dict = None
|
|
241
|
+
if event.part.args is not None:
|
|
242
|
+
if hasattr(event.part.args, 'args_dict'):
|
|
243
|
+
args_dict = event.part.args.args_dict
|
|
244
|
+
elif isinstance(event.part.args, dict):
|
|
245
|
+
args_dict = event.part.args
|
|
246
|
+
|
|
247
|
+
if args_dict:
|
|
248
|
+
# Emit the structured result as JSON content
|
|
249
|
+
result_json = json.dumps(args_dict, indent=2)
|
|
250
|
+
content_chunk = ChatCompletionStreamResponse(
|
|
251
|
+
id=request_id,
|
|
252
|
+
created=created_at,
|
|
253
|
+
model=model,
|
|
254
|
+
choices=[
|
|
255
|
+
ChatCompletionStreamChoice(
|
|
256
|
+
index=0,
|
|
257
|
+
delta=ChatCompletionMessageDelta(
|
|
258
|
+
role="assistant" if is_first_chunk else None,
|
|
259
|
+
content=result_json,
|
|
260
|
+
),
|
|
261
|
+
finish_reason=None,
|
|
262
|
+
)
|
|
263
|
+
],
|
|
114
264
|
)
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
265
|
+
is_first_chunk = False
|
|
266
|
+
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
267
|
+
continue # Skip regular tool call handling
|
|
268
|
+
|
|
269
|
+
tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
270
|
+
active_tool_calls[event.index] = (tool_name, tool_id)
|
|
271
|
+
# Queue for completion matching (FIFO)
|
|
272
|
+
pending_tool_completions.append((tool_name, tool_id))
|
|
273
|
+
|
|
274
|
+
# Emit tool_call SSE event (started)
|
|
275
|
+
# Try to get arguments as dict
|
|
276
|
+
args_dict = None
|
|
277
|
+
if event.part.args is not None:
|
|
278
|
+
if hasattr(event.part.args, 'args_dict'):
|
|
279
|
+
args_dict = event.part.args.args_dict
|
|
280
|
+
elif isinstance(event.part.args, dict):
|
|
281
|
+
args_dict = event.part.args
|
|
282
|
+
|
|
283
|
+
# Log tool call with key parameters
|
|
284
|
+
if args_dict and tool_name == "search_rem":
|
|
285
|
+
query_type = args_dict.get("query_type", "?")
|
|
286
|
+
limit = args_dict.get("limit", 20)
|
|
287
|
+
table = args_dict.get("table", "")
|
|
288
|
+
query_text = args_dict.get("query_text", args_dict.get("entity_key", ""))
|
|
289
|
+
if query_text and len(query_text) > 50:
|
|
290
|
+
query_text = query_text[:50] + "..."
|
|
291
|
+
logger.info(f"🔧 {tool_name} {query_type.upper()} '{query_text}' table={table} limit={limit}")
|
|
292
|
+
else:
|
|
293
|
+
logger.info(f"🔧 {tool_name}")
|
|
119
294
|
|
|
120
|
-
|
|
295
|
+
yield format_sse_event(ToolCallEvent(
|
|
296
|
+
tool_name=tool_name,
|
|
297
|
+
tool_id=tool_id,
|
|
298
|
+
status="started",
|
|
299
|
+
arguments=args_dict
|
|
300
|
+
))
|
|
301
|
+
|
|
302
|
+
# Update progress
|
|
303
|
+
current_step = 2
|
|
304
|
+
total_steps = 4 # Added tool execution step
|
|
305
|
+
yield format_sse_event(ProgressEvent(
|
|
306
|
+
step=current_step,
|
|
307
|
+
total_steps=total_steps,
|
|
308
|
+
label=f"Calling {tool_name}",
|
|
309
|
+
status="in_progress"
|
|
310
|
+
))
|
|
311
|
+
|
|
312
|
+
# ============================================
|
|
313
|
+
# TOOL CALL COMPLETION (PartEndEvent)
|
|
314
|
+
# ============================================
|
|
315
|
+
elif isinstance(event, PartEndEvent) and isinstance(
|
|
316
|
+
event.part, ToolCallPart
|
|
317
|
+
):
|
|
318
|
+
if event.index in active_tool_calls:
|
|
319
|
+
tool_name, tool_id = active_tool_calls[event.index]
|
|
320
|
+
# Note: result comes from FunctionToolResultEvent below
|
|
321
|
+
# For now, mark as completed without result
|
|
322
|
+
del active_tool_calls[event.index]
|
|
323
|
+
|
|
324
|
+
# ============================================
|
|
325
|
+
# TEXT CONTENT DELTA
|
|
326
|
+
# ============================================
|
|
121
327
|
elif isinstance(event, PartDeltaEvent) and isinstance(
|
|
122
328
|
event.delta, TextPartDelta
|
|
123
329
|
):
|
|
330
|
+
content = event.delta.content_delta
|
|
331
|
+
token_count += len(content.split()) # Rough token estimate
|
|
332
|
+
|
|
124
333
|
content_chunk = ChatCompletionStreamResponse(
|
|
125
334
|
id=request_id,
|
|
126
335
|
created=created_at,
|
|
@@ -130,7 +339,7 @@ async def stream_openai_response(
|
|
|
130
339
|
index=0,
|
|
131
340
|
delta=ChatCompletionMessageDelta(
|
|
132
341
|
role="assistant" if is_first_chunk else None,
|
|
133
|
-
content=
|
|
342
|
+
content=content,
|
|
134
343
|
),
|
|
135
344
|
finish_reason=None,
|
|
136
345
|
)
|
|
@@ -139,16 +348,176 @@ async def stream_openai_response(
|
|
|
139
348
|
is_first_chunk = False
|
|
140
349
|
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
141
350
|
|
|
142
|
-
#
|
|
351
|
+
# ============================================
|
|
352
|
+
# TOOL EXECUTION NODE
|
|
353
|
+
# ============================================
|
|
143
354
|
elif Agent.is_call_tools_node(node):
|
|
144
|
-
# Stream tool execution - tools complete here
|
|
145
355
|
async with node.stream(agent_run.ctx) as tools_stream:
|
|
146
|
-
async for
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
356
|
+
async for tool_event in tools_stream:
|
|
357
|
+
# Tool result event - emit completion
|
|
358
|
+
if isinstance(tool_event, FunctionToolResultEvent):
|
|
359
|
+
# Get the tool name/id from the pending queue (FIFO)
|
|
360
|
+
if pending_tool_completions:
|
|
361
|
+
tool_name, tool_id = pending_tool_completions.pop(0)
|
|
362
|
+
else:
|
|
363
|
+
# Fallback if queue is empty (shouldn't happen)
|
|
364
|
+
tool_name = "tool"
|
|
365
|
+
tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
366
|
+
|
|
367
|
+
# Check if this is a register_metadata tool result
|
|
368
|
+
# It returns a dict with _metadata_event: True marker
|
|
369
|
+
result_content = tool_event.result.content if hasattr(tool_event.result, 'content') else tool_event.result
|
|
370
|
+
is_metadata_event = False
|
|
371
|
+
|
|
372
|
+
if isinstance(result_content, dict) and result_content.get("_metadata_event"):
|
|
373
|
+
is_metadata_event = True
|
|
374
|
+
metadata_registered = True # Skip default metadata at end
|
|
375
|
+
# Emit MetadataEvent with registered values
|
|
376
|
+
registered_confidence = result_content.get("confidence")
|
|
377
|
+
registered_sources = result_content.get("sources")
|
|
378
|
+
registered_references = result_content.get("references")
|
|
379
|
+
registered_flags = result_content.get("flags")
|
|
380
|
+
# Session naming
|
|
381
|
+
registered_session_name = result_content.get("session_name")
|
|
382
|
+
# Risk assessment fields
|
|
383
|
+
registered_risk_level = result_content.get("risk_level")
|
|
384
|
+
registered_risk_score = result_content.get("risk_score")
|
|
385
|
+
registered_risk_reasoning = result_content.get("risk_reasoning")
|
|
386
|
+
registered_recommended_action = result_content.get("recommended_action")
|
|
387
|
+
# Extra fields
|
|
388
|
+
registered_extra = result_content.get("extra")
|
|
389
|
+
|
|
390
|
+
logger.info(
|
|
391
|
+
f"📊 Metadata registered: confidence={registered_confidence}, "
|
|
392
|
+
f"session_name={registered_session_name}, "
|
|
393
|
+
f"risk_level={registered_risk_level}, sources={registered_sources}"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# Build extra dict with risk fields and any custom extras
|
|
397
|
+
extra_data = {}
|
|
398
|
+
if registered_risk_level is not None:
|
|
399
|
+
extra_data["risk_level"] = registered_risk_level
|
|
400
|
+
if registered_risk_score is not None:
|
|
401
|
+
extra_data["risk_score"] = registered_risk_score
|
|
402
|
+
if registered_risk_reasoning is not None:
|
|
403
|
+
extra_data["risk_reasoning"] = registered_risk_reasoning
|
|
404
|
+
if registered_recommended_action is not None:
|
|
405
|
+
extra_data["recommended_action"] = registered_recommended_action
|
|
406
|
+
if registered_extra:
|
|
407
|
+
extra_data.update(registered_extra)
|
|
408
|
+
|
|
409
|
+
# Emit metadata event immediately
|
|
410
|
+
yield format_sse_event(MetadataEvent(
|
|
411
|
+
message_id=message_id,
|
|
412
|
+
in_reply_to=in_reply_to,
|
|
413
|
+
session_id=session_id,
|
|
414
|
+
agent_schema=agent_schema,
|
|
415
|
+
session_name=registered_session_name,
|
|
416
|
+
confidence=registered_confidence,
|
|
417
|
+
sources=registered_sources,
|
|
418
|
+
model_version=model,
|
|
419
|
+
flags=registered_flags,
|
|
420
|
+
extra=extra_data if extra_data else None,
|
|
421
|
+
hidden=False,
|
|
422
|
+
))
|
|
423
|
+
|
|
424
|
+
if not is_metadata_event:
|
|
425
|
+
# Normal tool completion - emit ToolCallEvent
|
|
426
|
+
result_str = str(result_content)
|
|
427
|
+
result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
|
|
428
|
+
|
|
429
|
+
# Log result count for search_rem
|
|
430
|
+
if tool_name == "search_rem" and isinstance(result_content, dict):
|
|
431
|
+
results = result_content.get("results", {})
|
|
432
|
+
# Handle nested result structure: results may be a dict with 'results' list and 'count'
|
|
433
|
+
if isinstance(results, dict):
|
|
434
|
+
count = results.get("count", len(results.get("results", [])))
|
|
435
|
+
query_type = results.get("query_type", "?")
|
|
436
|
+
query_text = results.get("query_text", results.get("key", ""))
|
|
437
|
+
table = results.get("table_name", "")
|
|
438
|
+
elif isinstance(results, list):
|
|
439
|
+
count = len(results)
|
|
440
|
+
query_type = "?"
|
|
441
|
+
query_text = ""
|
|
442
|
+
table = ""
|
|
443
|
+
else:
|
|
444
|
+
count = "?"
|
|
445
|
+
query_type = "?"
|
|
446
|
+
query_text = ""
|
|
447
|
+
table = ""
|
|
448
|
+
status = result_content.get("status", "unknown")
|
|
449
|
+
# Truncate query text for logging
|
|
450
|
+
if query_text and len(str(query_text)) > 40:
|
|
451
|
+
query_text = str(query_text)[:40] + "..."
|
|
452
|
+
logger.info(f" ↳ {tool_name} {query_type} '{query_text}' table={table} → {count} results")
|
|
453
|
+
|
|
454
|
+
yield format_sse_event(ToolCallEvent(
|
|
455
|
+
tool_name=tool_name,
|
|
456
|
+
tool_id=tool_id,
|
|
457
|
+
status="completed",
|
|
458
|
+
result=result_summary
|
|
459
|
+
))
|
|
460
|
+
|
|
461
|
+
# Update progress after tool completion
|
|
462
|
+
current_step = 3
|
|
463
|
+
yield format_sse_event(ProgressEvent(
|
|
464
|
+
step=current_step,
|
|
465
|
+
total_steps=total_steps,
|
|
466
|
+
label="Generating response",
|
|
467
|
+
status="in_progress"
|
|
468
|
+
))
|
|
469
|
+
|
|
470
|
+
# After iteration completes, check for structured result
|
|
471
|
+
# This handles agents with result_type (structured output)
|
|
472
|
+
# Skip for plain text output - already streamed via TextPartDelta
|
|
473
|
+
try:
|
|
474
|
+
result = agent_run.result
|
|
475
|
+
if result is not None and hasattr(result, 'output'):
|
|
476
|
+
output = result.output
|
|
477
|
+
|
|
478
|
+
# Skip plain string output - already streamed via TextPartDelta
|
|
479
|
+
# Non-structured output agents (structured_output: false) return strings
|
|
480
|
+
if isinstance(output, str):
|
|
481
|
+
logger.debug("Plain text output already streamed via TextPartDelta, skipping final emission")
|
|
482
|
+
else:
|
|
483
|
+
# Serialize the structured output (Pydantic models)
|
|
484
|
+
if hasattr(output, 'model_dump'):
|
|
485
|
+
# Pydantic model
|
|
486
|
+
result_dict = output.model_dump()
|
|
487
|
+
elif hasattr(output, '__dict__'):
|
|
488
|
+
result_dict = output.__dict__
|
|
489
|
+
else:
|
|
490
|
+
# Fallback for unknown types
|
|
491
|
+
result_dict = {"result": str(output)}
|
|
492
|
+
|
|
493
|
+
result_json = json.dumps(result_dict, indent=2, default=str)
|
|
494
|
+
token_count += len(result_json.split())
|
|
495
|
+
|
|
496
|
+
# Emit structured result as content
|
|
497
|
+
result_chunk = ChatCompletionStreamResponse(
|
|
498
|
+
id=request_id,
|
|
499
|
+
created=created_at,
|
|
500
|
+
model=model,
|
|
501
|
+
choices=[
|
|
502
|
+
ChatCompletionStreamChoice(
|
|
503
|
+
index=0,
|
|
504
|
+
delta=ChatCompletionMessageDelta(
|
|
505
|
+
role="assistant" if is_first_chunk else None,
|
|
506
|
+
content=result_json,
|
|
507
|
+
),
|
|
508
|
+
finish_reason=None,
|
|
509
|
+
)
|
|
510
|
+
],
|
|
511
|
+
)
|
|
512
|
+
is_first_chunk = False
|
|
513
|
+
yield f"data: {result_chunk.model_dump_json()}\n\n"
|
|
514
|
+
except Exception as e:
|
|
515
|
+
logger.debug(f"No structured result available: {e}")
|
|
150
516
|
|
|
151
|
-
#
|
|
517
|
+
# Calculate latency
|
|
518
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
519
|
+
|
|
520
|
+
# Final OpenAI chunk with finish_reason
|
|
152
521
|
final_chunk = ChatCompletionStreamResponse(
|
|
153
522
|
id=request_id,
|
|
154
523
|
created=created_at,
|
|
@@ -163,7 +532,35 @@ async def stream_openai_response(
|
|
|
163
532
|
)
|
|
164
533
|
yield f"data: {final_chunk.model_dump_json()}\n\n"
|
|
165
534
|
|
|
166
|
-
#
|
|
535
|
+
# Emit metadata event only if not already registered via register_metadata tool
|
|
536
|
+
if not metadata_registered:
|
|
537
|
+
yield format_sse_event(MetadataEvent(
|
|
538
|
+
message_id=message_id,
|
|
539
|
+
in_reply_to=in_reply_to,
|
|
540
|
+
session_id=session_id,
|
|
541
|
+
agent_schema=agent_schema,
|
|
542
|
+
confidence=1.0, # Default to 100% confidence
|
|
543
|
+
model_version=model,
|
|
544
|
+
latency_ms=latency_ms,
|
|
545
|
+
token_count=token_count,
|
|
546
|
+
# Include deterministic trace context captured from OTEL
|
|
547
|
+
trace_id=captured_trace_id,
|
|
548
|
+
span_id=captured_span_id,
|
|
549
|
+
))
|
|
550
|
+
|
|
551
|
+
# Mark all progress complete
|
|
552
|
+
for step in range(1, total_steps + 1):
|
|
553
|
+
yield format_sse_event(ProgressEvent(
|
|
554
|
+
step=step,
|
|
555
|
+
total_steps=total_steps,
|
|
556
|
+
label="Complete" if step == total_steps else f"Step {step}",
|
|
557
|
+
status="completed"
|
|
558
|
+
))
|
|
559
|
+
|
|
560
|
+
# Emit done event
|
|
561
|
+
yield format_sse_event(DoneEvent(reason="stop"))
|
|
562
|
+
|
|
563
|
+
# OpenAI termination marker (for compatibility)
|
|
167
564
|
yield "data: [DONE]\n\n"
|
|
168
565
|
|
|
169
566
|
except Exception as e:
|
|
@@ -182,4 +579,204 @@ async def stream_openai_response(
|
|
|
182
579
|
}
|
|
183
580
|
}
|
|
184
581
|
yield f"data: {json.dumps(error_data)}\n\n"
|
|
582
|
+
|
|
583
|
+
# Emit done event with error reason
|
|
584
|
+
yield format_sse_event(DoneEvent(reason="error"))
|
|
185
585
|
yield "data: [DONE]\n\n"
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
async def stream_simulator_response(
|
|
589
|
+
prompt: str,
|
|
590
|
+
model: str = "simulator-v1.0.0",
|
|
591
|
+
request_id: str | None = None,
|
|
592
|
+
delay_ms: int = 50,
|
|
593
|
+
include_reasoning: bool = True,
|
|
594
|
+
include_progress: bool = True,
|
|
595
|
+
include_tool_calls: bool = True,
|
|
596
|
+
include_actions: bool = True,
|
|
597
|
+
include_metadata: bool = True,
|
|
598
|
+
# Message correlation IDs
|
|
599
|
+
message_id: str | None = None,
|
|
600
|
+
in_reply_to: str | None = None,
|
|
601
|
+
session_id: str | None = None,
|
|
602
|
+
) -> AsyncGenerator[str, None]:
|
|
603
|
+
"""
|
|
604
|
+
Stream SSE simulator events for testing and demonstration.
|
|
605
|
+
|
|
606
|
+
This function wraps the SSE simulator to produce formatted SSE strings
|
|
607
|
+
ready for HTTP streaming. No LLM calls are made.
|
|
608
|
+
|
|
609
|
+
The simulator produces a rich sequence of events:
|
|
610
|
+
1. Reasoning events (model thinking)
|
|
611
|
+
2. Progress events (step indicators)
|
|
612
|
+
3. Tool call events (simulated tool usage)
|
|
613
|
+
4. Text delta events (streamed content)
|
|
614
|
+
5. Metadata events (confidence, sources, message IDs)
|
|
615
|
+
6. Action request events (user interaction)
|
|
616
|
+
7. Done event
|
|
617
|
+
|
|
618
|
+
Args:
|
|
619
|
+
prompt: User prompt (passed to simulator)
|
|
620
|
+
model: Model name for metadata
|
|
621
|
+
request_id: Optional request ID
|
|
622
|
+
delay_ms: Delay between events in milliseconds
|
|
623
|
+
include_reasoning: Whether to emit reasoning events
|
|
624
|
+
include_progress: Whether to emit progress events
|
|
625
|
+
include_tool_calls: Whether to emit tool call events
|
|
626
|
+
include_actions: Whether to emit action request at end
|
|
627
|
+
include_metadata: Whether to emit metadata event
|
|
628
|
+
message_id: Database ID of the assistant message being streamed
|
|
629
|
+
in_reply_to: Database ID of the user message this responds to
|
|
630
|
+
session_id: Session ID for conversation correlation
|
|
631
|
+
|
|
632
|
+
Yields:
|
|
633
|
+
SSE-formatted strings ready for HTTP response
|
|
634
|
+
|
|
635
|
+
Example:
|
|
636
|
+
```python
|
|
637
|
+
from starlette.responses import StreamingResponse
|
|
638
|
+
|
|
639
|
+
async def simulator_endpoint():
|
|
640
|
+
return StreamingResponse(
|
|
641
|
+
stream_simulator_response("demo"),
|
|
642
|
+
media_type="text/event-stream"
|
|
643
|
+
)
|
|
644
|
+
```
|
|
645
|
+
"""
|
|
646
|
+
from rem.agentic.agents.sse_simulator import stream_simulator_events
|
|
647
|
+
|
|
648
|
+
# Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
|
|
649
|
+
async for sse_string in stream_simulator_events(
|
|
650
|
+
prompt=prompt,
|
|
651
|
+
delay_ms=delay_ms,
|
|
652
|
+
include_reasoning=include_reasoning,
|
|
653
|
+
include_progress=include_progress,
|
|
654
|
+
include_tool_calls=include_tool_calls,
|
|
655
|
+
include_actions=include_actions,
|
|
656
|
+
include_metadata=include_metadata,
|
|
657
|
+
# Pass message correlation IDs
|
|
658
|
+
message_id=message_id,
|
|
659
|
+
in_reply_to=in_reply_to,
|
|
660
|
+
session_id=session_id,
|
|
661
|
+
model=model,
|
|
662
|
+
):
|
|
663
|
+
yield sse_string
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
async def stream_minimal_simulator(
|
|
667
|
+
content: str = "Hello from the simulator!",
|
|
668
|
+
delay_ms: int = 30,
|
|
669
|
+
) -> AsyncGenerator[str, None]:
|
|
670
|
+
"""
|
|
671
|
+
Stream minimal simulator output (text + done only).
|
|
672
|
+
|
|
673
|
+
Useful for simple testing without the full event sequence.
|
|
674
|
+
|
|
675
|
+
Args:
|
|
676
|
+
content: Text content to stream
|
|
677
|
+
delay_ms: Delay between chunks
|
|
678
|
+
|
|
679
|
+
Yields:
|
|
680
|
+
SSE-formatted strings
|
|
681
|
+
"""
|
|
682
|
+
from rem.agentic.agents.sse_simulator import stream_minimal_demo
|
|
683
|
+
|
|
684
|
+
# Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
|
|
685
|
+
async for sse_string in stream_minimal_demo(content=content, delay_ms=delay_ms):
|
|
686
|
+
yield sse_string
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
async def stream_openai_response_with_save(
|
|
690
|
+
agent: Agent,
|
|
691
|
+
prompt: str,
|
|
692
|
+
model: str,
|
|
693
|
+
request_id: str | None = None,
|
|
694
|
+
agent_schema: str | None = None,
|
|
695
|
+
session_id: str | None = None,
|
|
696
|
+
user_id: str | None = None,
|
|
697
|
+
) -> AsyncGenerator[str, None]:
|
|
698
|
+
"""
|
|
699
|
+
Wrapper around stream_openai_response that saves the assistant response after streaming.
|
|
700
|
+
|
|
701
|
+
This accumulates all text content during streaming and saves it to the database
|
|
702
|
+
after the stream completes.
|
|
703
|
+
|
|
704
|
+
Args:
|
|
705
|
+
agent: Pydantic AI agent instance
|
|
706
|
+
prompt: User prompt
|
|
707
|
+
model: Model name
|
|
708
|
+
request_id: Optional request ID
|
|
709
|
+
agent_schema: Agent schema name
|
|
710
|
+
session_id: Session ID for message storage
|
|
711
|
+
user_id: User ID for message storage
|
|
712
|
+
|
|
713
|
+
Yields:
|
|
714
|
+
SSE-formatted strings
|
|
715
|
+
"""
|
|
716
|
+
from ....utils.date_utils import utc_now, to_iso
|
|
717
|
+
from ....services.session import SessionMessageStore
|
|
718
|
+
from ....settings import settings
|
|
719
|
+
|
|
720
|
+
# Pre-generate message_id so it can be sent in metadata event
|
|
721
|
+
# This allows frontend to use it for feedback before DB persistence
|
|
722
|
+
message_id = str(uuid.uuid4())
|
|
723
|
+
|
|
724
|
+
# Mutable container for capturing trace context from inside agent execution
|
|
725
|
+
# This is deterministic - captured from OTEL instrumentation, not AI-generated
|
|
726
|
+
trace_context: dict = {}
|
|
727
|
+
|
|
728
|
+
# Accumulate content during streaming
|
|
729
|
+
accumulated_content = []
|
|
730
|
+
|
|
731
|
+
async for chunk in stream_openai_response(
|
|
732
|
+
agent=agent,
|
|
733
|
+
prompt=prompt,
|
|
734
|
+
model=model,
|
|
735
|
+
request_id=request_id,
|
|
736
|
+
agent_schema=agent_schema,
|
|
737
|
+
session_id=session_id,
|
|
738
|
+
message_id=message_id,
|
|
739
|
+
trace_context_out=trace_context, # Pass container to capture trace IDs
|
|
740
|
+
):
|
|
741
|
+
yield chunk
|
|
742
|
+
|
|
743
|
+
# Extract text content from OpenAI-format chunks
|
|
744
|
+
# Format: data: {"choices": [{"delta": {"content": "..."}}]}
|
|
745
|
+
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
|
|
746
|
+
try:
|
|
747
|
+
data_str = chunk[6:].strip() # Remove "data: " prefix
|
|
748
|
+
if data_str:
|
|
749
|
+
data = json.loads(data_str)
|
|
750
|
+
if "choices" in data and data["choices"]:
|
|
751
|
+
delta = data["choices"][0].get("delta", {})
|
|
752
|
+
content = delta.get("content")
|
|
753
|
+
if content:
|
|
754
|
+
accumulated_content.append(content)
|
|
755
|
+
except (json.JSONDecodeError, KeyError, IndexError):
|
|
756
|
+
pass # Skip non-JSON or malformed chunks
|
|
757
|
+
|
|
758
|
+
# After streaming completes, save the assistant response
|
|
759
|
+
if settings.postgres.enabled and session_id and accumulated_content:
|
|
760
|
+
full_content = "".join(accumulated_content)
|
|
761
|
+
# Get captured trace context from container (deterministically captured inside agent execution)
|
|
762
|
+
captured_trace_id = trace_context.get("trace_id")
|
|
763
|
+
captured_span_id = trace_context.get("span_id")
|
|
764
|
+
assistant_message = {
|
|
765
|
+
"id": message_id, # Use pre-generated ID for consistency with metadata event
|
|
766
|
+
"role": "assistant",
|
|
767
|
+
"content": full_content,
|
|
768
|
+
"timestamp": to_iso(utc_now()),
|
|
769
|
+
"trace_id": captured_trace_id,
|
|
770
|
+
"span_id": captured_span_id,
|
|
771
|
+
}
|
|
772
|
+
try:
|
|
773
|
+
store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
|
|
774
|
+
await store.store_session_messages(
|
|
775
|
+
session_id=session_id,
|
|
776
|
+
messages=[assistant_message],
|
|
777
|
+
user_id=user_id,
|
|
778
|
+
compress=True, # Compress long assistant responses
|
|
779
|
+
)
|
|
780
|
+
logger.debug(f"Saved assistant response {message_id} to session {session_id} ({len(full_content)} chars)")
|
|
781
|
+
except Exception as e:
|
|
782
|
+
logger.error(f"Failed to save assistant response: {e}", exc_info=True)
|