remdb 0.3.172__py3-none-any.whl → 0.3.223__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/README.md +262 -2
- rem/agentic/context.py +173 -0
- rem/agentic/context_builder.py +12 -2
- rem/agentic/mcp/tool_wrapper.py +39 -16
- rem/agentic/providers/pydantic_ai.py +46 -43
- rem/agentic/schema.py +2 -2
- rem/agentic/tools/rem_tools.py +11 -0
- rem/api/main.py +1 -1
- rem/api/mcp_router/resources.py +64 -8
- rem/api/mcp_router/server.py +31 -24
- rem/api/mcp_router/tools.py +621 -166
- rem/api/routers/admin.py +30 -4
- rem/api/routers/auth.py +114 -15
- rem/api/routers/chat/completions.py +66 -18
- rem/api/routers/chat/sse_events.py +7 -3
- rem/api/routers/chat/streaming.py +254 -22
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +7 -1
- rem/api/routers/feedback.py +9 -1
- rem/api/routers/messages.py +176 -38
- rem/api/routers/models.py +9 -1
- rem/api/routers/query.py +12 -1
- rem/api/routers/shared_sessions.py +16 -0
- rem/auth/jwt.py +19 -4
- rem/auth/middleware.py +42 -28
- rem/cli/README.md +62 -0
- rem/cli/commands/ask.py +1 -1
- rem/cli/commands/db.py +148 -70
- rem/cli/commands/process.py +171 -43
- rem/models/entities/ontology.py +91 -101
- rem/schemas/agents/rem.yaml +1 -1
- rem/services/content/service.py +18 -5
- rem/services/email/service.py +11 -2
- rem/services/embeddings/worker.py +26 -12
- rem/services/postgres/__init__.py +28 -3
- rem/services/postgres/diff_service.py +57 -5
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
- rem/services/postgres/register_type.py +12 -11
- rem/services/postgres/repository.py +46 -25
- rem/services/postgres/schema_generator.py +5 -5
- rem/services/postgres/sql_builder.py +6 -5
- rem/services/session/__init__.py +8 -1
- rem/services/session/compression.py +40 -2
- rem/services/session/pydantic_messages.py +276 -0
- rem/settings.py +28 -0
- rem/sql/background_indexes.sql +5 -0
- rem/sql/migrations/001_install.sql +157 -10
- rem/sql/migrations/002_install_models.sql +160 -132
- rem/sql/migrations/004_cache_system.sql +7 -275
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/model_helpers.py +101 -0
- rem/utils/schema_loader.py +6 -6
- {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/METADATA +1 -1
- {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/RECORD +57 -53
- {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/WHEEL +0 -0
- {remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/entry_points.txt +0 -0
|
@@ -15,6 +15,11 @@ Key Insight
|
|
|
15
15
|
- Use PartEndEvent to detect tool completion
|
|
16
16
|
- Use FunctionToolResultEvent to get tool results
|
|
17
17
|
|
|
18
|
+
Multi-Agent Context Propagation:
|
|
19
|
+
- AgentContext is set via agent_context_scope() before agent.iter()
|
|
20
|
+
- Child agents (via ask_agent tool) can access parent context via get_current_context()
|
|
21
|
+
- Context includes user_id, tenant_id, session_id, is_eval for proper scoping
|
|
22
|
+
|
|
18
23
|
SSE Format (OpenAI-compatible):
|
|
19
24
|
data: {"id": "chatcmpl-...", "choices": [{"delta": {"content": "..."}}]}\\n\\n
|
|
20
25
|
data: [DONE]\\n\\n
|
|
@@ -28,10 +33,12 @@ Extended SSE Format (Custom Events):
|
|
|
28
33
|
See sse_events.py for the full event type definitions.
|
|
29
34
|
"""
|
|
30
35
|
|
|
36
|
+
from __future__ import annotations
|
|
37
|
+
|
|
31
38
|
import json
|
|
32
39
|
import time
|
|
33
40
|
import uuid
|
|
34
|
-
from typing import AsyncGenerator
|
|
41
|
+
from typing import TYPE_CHECKING, AsyncGenerator
|
|
35
42
|
|
|
36
43
|
from loguru import logger
|
|
37
44
|
from pydantic_ai.agent import Agent
|
|
@@ -55,6 +62,7 @@ from .models import (
|
|
|
55
62
|
)
|
|
56
63
|
from .sse_events import (
|
|
57
64
|
DoneEvent,
|
|
65
|
+
ErrorEvent,
|
|
58
66
|
MetadataEvent,
|
|
59
67
|
ProgressEvent,
|
|
60
68
|
ReasoningEvent,
|
|
@@ -62,6 +70,9 @@ from .sse_events import (
|
|
|
62
70
|
format_sse_event,
|
|
63
71
|
)
|
|
64
72
|
|
|
73
|
+
if TYPE_CHECKING:
|
|
74
|
+
from ....agentic.context import AgentContext
|
|
75
|
+
|
|
65
76
|
|
|
66
77
|
async def stream_openai_response(
|
|
67
78
|
agent: Agent,
|
|
@@ -79,6 +90,11 @@ async def stream_openai_response(
|
|
|
79
90
|
# Mutable container to capture tool calls for persistence
|
|
80
91
|
# Format: list of {"tool_name": str, "tool_id": str, "arguments": dict, "result": any}
|
|
81
92
|
tool_calls_out: list | None = None,
|
|
93
|
+
# Agent context for multi-agent propagation
|
|
94
|
+
# When set, enables child agents to access parent context via get_current_context()
|
|
95
|
+
agent_context: "AgentContext | None" = None,
|
|
96
|
+
# Pydantic-ai native message history for proper tool call/return pairing
|
|
97
|
+
message_history: list | None = None,
|
|
82
98
|
) -> AsyncGenerator[str, None]:
|
|
83
99
|
"""
|
|
84
100
|
Stream Pydantic AI agent responses with rich SSE events.
|
|
@@ -149,10 +165,29 @@ async def stream_openai_response(
|
|
|
149
165
|
pending_tool_completions: list[tuple[str, str]] = []
|
|
150
166
|
# Track if metadata was registered via register_metadata tool
|
|
151
167
|
metadata_registered = False
|
|
168
|
+
# Track which agent is actually responding (may be child agent if delegated)
|
|
169
|
+
responding_agent: str | None = None
|
|
152
170
|
# Track pending tool calls with full data for persistence
|
|
153
171
|
# Maps tool_id -> {"tool_name": str, "tool_id": str, "arguments": dict}
|
|
154
172
|
pending_tool_data: dict[str, dict] = {}
|
|
155
173
|
|
|
174
|
+
# Import context functions for multi-agent support
|
|
175
|
+
from ....agentic.context import set_current_context, set_event_sink
|
|
176
|
+
|
|
177
|
+
# Set up context for multi-agent propagation
|
|
178
|
+
# This allows child agents (via ask_agent tool) to access parent context
|
|
179
|
+
previous_context = None
|
|
180
|
+
if agent_context is not None:
|
|
181
|
+
from ....agentic.context import get_current_context
|
|
182
|
+
previous_context = get_current_context()
|
|
183
|
+
set_current_context(agent_context)
|
|
184
|
+
|
|
185
|
+
# Set up event sink for child agent event proxying
|
|
186
|
+
# Child agents (via ask_agent) will push their events here
|
|
187
|
+
import asyncio
|
|
188
|
+
child_event_sink: asyncio.Queue = asyncio.Queue()
|
|
189
|
+
set_event_sink(child_event_sink)
|
|
190
|
+
|
|
156
191
|
try:
|
|
157
192
|
# Emit initial progress event
|
|
158
193
|
current_step = 1
|
|
@@ -164,7 +199,9 @@ async def stream_openai_response(
|
|
|
164
199
|
))
|
|
165
200
|
|
|
166
201
|
# Use agent.iter() to get complete execution with tool calls
|
|
167
|
-
|
|
202
|
+
# Pass message_history if available for proper tool call/return pairing
|
|
203
|
+
iter_kwargs = {"message_history": message_history} if message_history else {}
|
|
204
|
+
async with agent.iter(prompt, **iter_kwargs) as agent_run:
|
|
168
205
|
# Capture trace context IMMEDIATELY inside agent execution
|
|
169
206
|
# This is deterministic - it's the OTEL context from Pydantic AI instrumentation
|
|
170
207
|
# NOT dependent on any AI-generated content
|
|
@@ -285,6 +322,12 @@ async def stream_openai_response(
|
|
|
285
322
|
args_dict = event.part.args.args_dict
|
|
286
323
|
elif isinstance(event.part.args, dict):
|
|
287
324
|
args_dict = event.part.args
|
|
325
|
+
elif isinstance(event.part.args, str):
|
|
326
|
+
# Parse JSON string args (common with pydantic-ai)
|
|
327
|
+
try:
|
|
328
|
+
args_dict = json.loads(event.part.args)
|
|
329
|
+
except json.JSONDecodeError:
|
|
330
|
+
logger.warning(f"Failed to parse tool args as JSON: {event.part.args[:100]}")
|
|
288
331
|
|
|
289
332
|
# Log tool call with key parameters
|
|
290
333
|
if args_dict and tool_name == "search_rem":
|
|
@@ -330,8 +373,25 @@ async def stream_openai_response(
|
|
|
330
373
|
):
|
|
331
374
|
if event.index in active_tool_calls:
|
|
332
375
|
tool_name, tool_id = active_tool_calls[event.index]
|
|
333
|
-
|
|
334
|
-
#
|
|
376
|
+
|
|
377
|
+
# Extract full args from completed ToolCallPart
|
|
378
|
+
# (PartStartEvent only has empty/partial args during streaming)
|
|
379
|
+
args_dict = None
|
|
380
|
+
if event.part.args is not None:
|
|
381
|
+
if hasattr(event.part.args, 'args_dict'):
|
|
382
|
+
args_dict = event.part.args.args_dict
|
|
383
|
+
elif isinstance(event.part.args, dict):
|
|
384
|
+
args_dict = event.part.args
|
|
385
|
+
elif isinstance(event.part.args, str) and event.part.args:
|
|
386
|
+
try:
|
|
387
|
+
args_dict = json.loads(event.part.args)
|
|
388
|
+
except json.JSONDecodeError:
|
|
389
|
+
logger.warning(f"Failed to parse tool args: {event.part.args[:100]}")
|
|
390
|
+
|
|
391
|
+
# Update pending_tool_data with complete args
|
|
392
|
+
if tool_id in pending_tool_data:
|
|
393
|
+
pending_tool_data[tool_id]["arguments"] = args_dict
|
|
394
|
+
|
|
335
395
|
del active_tool_calls[event.index]
|
|
336
396
|
|
|
337
397
|
# ============================================
|
|
@@ -367,6 +427,69 @@ async def stream_openai_response(
|
|
|
367
427
|
elif Agent.is_call_tools_node(node):
|
|
368
428
|
async with node.stream(agent_run.ctx) as tools_stream:
|
|
369
429
|
async for tool_event in tools_stream:
|
|
430
|
+
# First, drain any child agent events that were pushed while tool was executing
|
|
431
|
+
# This handles ask_agent streaming - child events are proxied here
|
|
432
|
+
while not child_event_sink.empty():
|
|
433
|
+
try:
|
|
434
|
+
child_event = child_event_sink.get_nowait()
|
|
435
|
+
event_type = child_event.get("type", "")
|
|
436
|
+
child_agent = child_event.get("agent_name", "child")
|
|
437
|
+
|
|
438
|
+
if event_type == "child_tool_start":
|
|
439
|
+
# Emit child tool start as a nested tool call
|
|
440
|
+
child_tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
441
|
+
yield format_sse_event(ToolCallEvent(
|
|
442
|
+
tool_name=f"{child_agent}:{child_event.get('tool_name', 'tool')}",
|
|
443
|
+
tool_id=child_tool_id,
|
|
444
|
+
status="started",
|
|
445
|
+
arguments=child_event.get("arguments"),
|
|
446
|
+
))
|
|
447
|
+
elif event_type == "child_content":
|
|
448
|
+
# Emit child content as assistant content
|
|
449
|
+
# Track which child agent is responding
|
|
450
|
+
responding_agent = child_agent
|
|
451
|
+
content = child_event.get("content", "")
|
|
452
|
+
if content:
|
|
453
|
+
content_chunk = ChatCompletionStreamResponse(
|
|
454
|
+
id=request_id,
|
|
455
|
+
created=created_at,
|
|
456
|
+
model=model,
|
|
457
|
+
choices=[
|
|
458
|
+
ChatCompletionStreamChoice(
|
|
459
|
+
index=0,
|
|
460
|
+
delta=ChatCompletionMessageDelta(
|
|
461
|
+
role="assistant" if is_first_chunk else None,
|
|
462
|
+
content=content,
|
|
463
|
+
),
|
|
464
|
+
finish_reason=None,
|
|
465
|
+
)
|
|
466
|
+
],
|
|
467
|
+
)
|
|
468
|
+
is_first_chunk = False
|
|
469
|
+
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
470
|
+
elif event_type == "child_tool_result":
|
|
471
|
+
# Emit child tool completion
|
|
472
|
+
result = child_event.get("result", {})
|
|
473
|
+
# Emit metadata event for child agent if it registered metadata
|
|
474
|
+
if isinstance(result, dict) and result.get("_metadata_event"):
|
|
475
|
+
responding_agent = result.get("agent_schema") or responding_agent
|
|
476
|
+
yield format_sse_event(MetadataEvent(
|
|
477
|
+
message_id=message_id,
|
|
478
|
+
session_id=session_id,
|
|
479
|
+
agent_schema=agent_schema,
|
|
480
|
+
responding_agent=responding_agent,
|
|
481
|
+
confidence=result.get("confidence"),
|
|
482
|
+
extra={"risk_level": result.get("risk_level")} if result.get("risk_level") else None,
|
|
483
|
+
))
|
|
484
|
+
yield format_sse_event(ToolCallEvent(
|
|
485
|
+
tool_name=f"{child_agent}:tool",
|
|
486
|
+
tool_id=f"call_{uuid.uuid4().hex[:8]}",
|
|
487
|
+
status="completed",
|
|
488
|
+
result=str(result)[:200] if result else None,
|
|
489
|
+
))
|
|
490
|
+
except Exception as e:
|
|
491
|
+
logger.warning(f"Error processing child event: {e}")
|
|
492
|
+
|
|
370
493
|
# Tool result event - emit completion
|
|
371
494
|
if isinstance(tool_event, FunctionToolResultEvent):
|
|
372
495
|
# Get the tool name/id from the pending queue (FIFO)
|
|
@@ -399,6 +522,10 @@ async def stream_openai_response(
|
|
|
399
522
|
registered_recommended_action = result_content.get("recommended_action")
|
|
400
523
|
# Extra fields
|
|
401
524
|
registered_extra = result_content.get("extra")
|
|
525
|
+
# Only set responding_agent if not already set by child events
|
|
526
|
+
# Child agents should take precedence - they're the actual responders
|
|
527
|
+
if not responding_agent:
|
|
528
|
+
responding_agent = result_content.get("agent_schema")
|
|
402
529
|
|
|
403
530
|
logger.info(
|
|
404
531
|
f"📊 Metadata registered: confidence={registered_confidence}, "
|
|
@@ -425,6 +552,7 @@ async def stream_openai_response(
|
|
|
425
552
|
in_reply_to=in_reply_to,
|
|
426
553
|
session_id=session_id,
|
|
427
554
|
agent_schema=agent_schema,
|
|
555
|
+
responding_agent=responding_agent,
|
|
428
556
|
session_name=registered_session_name,
|
|
429
557
|
confidence=registered_confidence,
|
|
430
558
|
sources=registered_sources,
|
|
@@ -434,6 +562,12 @@ async def stream_openai_response(
|
|
|
434
562
|
hidden=False,
|
|
435
563
|
))
|
|
436
564
|
|
|
565
|
+
# Get complete args from pending_tool_data BEFORE deleting
|
|
566
|
+
# (captured at PartEndEvent with full args)
|
|
567
|
+
completed_args = None
|
|
568
|
+
if tool_id in pending_tool_data:
|
|
569
|
+
completed_args = pending_tool_data[tool_id].get("arguments")
|
|
570
|
+
|
|
437
571
|
# Capture tool call with result for persistence
|
|
438
572
|
# Special handling for register_metadata - always capture full data
|
|
439
573
|
if tool_calls_out is not None and tool_id in pending_tool_data:
|
|
@@ -444,9 +578,18 @@ async def stream_openai_response(
|
|
|
444
578
|
del pending_tool_data[tool_id]
|
|
445
579
|
|
|
446
580
|
if not is_metadata_event:
|
|
581
|
+
# NOTE: text_response fallback is DISABLED
|
|
582
|
+
# Child agents now stream content via child_content events (above)
|
|
583
|
+
# which provides real-time streaming. The text_response in tool
|
|
584
|
+
# result would duplicate that content, so we skip it entirely.
|
|
585
|
+
|
|
447
586
|
# Normal tool completion - emit ToolCallEvent
|
|
448
|
-
|
|
449
|
-
|
|
587
|
+
# For finalize_intake, send full result dict for frontend
|
|
588
|
+
if tool_name == "finalize_intake" and isinstance(result_content, dict):
|
|
589
|
+
result_for_sse = result_content
|
|
590
|
+
else:
|
|
591
|
+
result_str = str(result_content)
|
|
592
|
+
result_for_sse = result_str[:200] + "..." if len(result_str) > 200 else result_str
|
|
450
593
|
|
|
451
594
|
# Log result count for search_rem
|
|
452
595
|
if tool_name == "search_rem" and isinstance(result_content, dict):
|
|
@@ -477,7 +620,8 @@ async def stream_openai_response(
|
|
|
477
620
|
tool_name=tool_name,
|
|
478
621
|
tool_id=tool_id,
|
|
479
622
|
status="completed",
|
|
480
|
-
|
|
623
|
+
arguments=completed_args,
|
|
624
|
+
result=result_for_sse
|
|
481
625
|
))
|
|
482
626
|
|
|
483
627
|
# Update progress after tool completion
|
|
@@ -561,6 +705,7 @@ async def stream_openai_response(
|
|
|
561
705
|
in_reply_to=in_reply_to,
|
|
562
706
|
session_id=session_id,
|
|
563
707
|
agent_schema=agent_schema,
|
|
708
|
+
responding_agent=responding_agent,
|
|
564
709
|
confidence=1.0, # Default to 100% confidence
|
|
565
710
|
model_version=model,
|
|
566
711
|
latency_ms=latency_ms,
|
|
@@ -587,25 +732,79 @@ async def stream_openai_response(
|
|
|
587
732
|
|
|
588
733
|
except Exception as e:
|
|
589
734
|
import traceback
|
|
735
|
+
import re
|
|
590
736
|
|
|
591
737
|
error_msg = str(e)
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
738
|
+
|
|
739
|
+
# Parse error details for better client handling
|
|
740
|
+
error_code = "stream_error"
|
|
741
|
+
error_details: dict = {}
|
|
742
|
+
recoverable = True
|
|
743
|
+
|
|
744
|
+
# Check for rate limit errors (OpenAI 429)
|
|
745
|
+
if "429" in error_msg or "rate_limit" in error_msg.lower() or "RateLimitError" in type(e).__name__:
|
|
746
|
+
error_code = "rate_limit_exceeded"
|
|
747
|
+
recoverable = True
|
|
748
|
+
|
|
749
|
+
# Extract retry-after time from error message
|
|
750
|
+
# Pattern: "Please try again in X.XXs" or "Please try again in Xs"
|
|
751
|
+
retry_match = re.search(r"try again in (\d+(?:\.\d+)?)\s*s", error_msg)
|
|
752
|
+
if retry_match:
|
|
753
|
+
retry_seconds = float(retry_match.group(1))
|
|
754
|
+
error_details["retry_after_seconds"] = retry_seconds
|
|
755
|
+
error_details["retry_after_ms"] = int(retry_seconds * 1000)
|
|
756
|
+
|
|
757
|
+
# Extract token usage info if available
|
|
758
|
+
used_match = re.search(r"Used (\d+)", error_msg)
|
|
759
|
+
limit_match = re.search(r"Limit (\d+)", error_msg)
|
|
760
|
+
requested_match = re.search(r"Requested (\d+)", error_msg)
|
|
761
|
+
if used_match:
|
|
762
|
+
error_details["tokens_used"] = int(used_match.group(1))
|
|
763
|
+
if limit_match:
|
|
764
|
+
error_details["tokens_limit"] = int(limit_match.group(1))
|
|
765
|
+
if requested_match:
|
|
766
|
+
error_details["tokens_requested"] = int(requested_match.group(1))
|
|
767
|
+
|
|
768
|
+
logger.error(f"🔴 Streaming error: status_code: 429, model_name: {model}, body: {error_msg[:200]}")
|
|
769
|
+
|
|
770
|
+
# Check for authentication errors
|
|
771
|
+
elif "401" in error_msg or "AuthenticationError" in type(e).__name__:
|
|
772
|
+
error_code = "authentication_error"
|
|
773
|
+
recoverable = False
|
|
774
|
+
logger.error(f"🔴 Streaming error: Authentication failed")
|
|
775
|
+
|
|
776
|
+
# Check for model not found / invalid model
|
|
777
|
+
elif "404" in error_msg or "model" in error_msg.lower() and "not found" in error_msg.lower():
|
|
778
|
+
error_code = "model_not_found"
|
|
779
|
+
recoverable = False
|
|
780
|
+
logger.error(f"🔴 Streaming error: Model not found")
|
|
781
|
+
|
|
782
|
+
# Generic error
|
|
783
|
+
else:
|
|
784
|
+
logger.error(f"🔴 Streaming error: {error_msg}")
|
|
785
|
+
|
|
786
|
+
logger.error(f"🔴 {traceback.format_exc()}")
|
|
787
|
+
|
|
788
|
+
# Emit proper ErrorEvent via SSE (with event: prefix for client parsing)
|
|
789
|
+
yield format_sse_event(ErrorEvent(
|
|
790
|
+
code=error_code,
|
|
791
|
+
message=error_msg,
|
|
792
|
+
details=error_details if error_details else None,
|
|
793
|
+
recoverable=recoverable,
|
|
794
|
+
))
|
|
604
795
|
|
|
605
796
|
# Emit done event with error reason
|
|
606
797
|
yield format_sse_event(DoneEvent(reason="error"))
|
|
607
798
|
yield "data: [DONE]\n\n"
|
|
608
799
|
|
|
800
|
+
finally:
|
|
801
|
+
# Clean up event sink for multi-agent streaming
|
|
802
|
+
set_event_sink(None)
|
|
803
|
+
# Restore previous context for multi-agent support
|
|
804
|
+
# This ensures nested agent calls don't pollute the parent's context
|
|
805
|
+
if agent_context is not None:
|
|
806
|
+
set_current_context(previous_context)
|
|
807
|
+
|
|
609
808
|
|
|
610
809
|
async def stream_simulator_response(
|
|
611
810
|
prompt: str,
|
|
@@ -716,6 +915,10 @@ async def stream_openai_response_with_save(
|
|
|
716
915
|
agent_schema: str | None = None,
|
|
717
916
|
session_id: str | None = None,
|
|
718
917
|
user_id: str | None = None,
|
|
918
|
+
# Agent context for multi-agent propagation
|
|
919
|
+
agent_context: "AgentContext | None" = None,
|
|
920
|
+
# Pydantic-ai native message history for proper tool call/return pairing
|
|
921
|
+
message_history: list | None = None,
|
|
719
922
|
) -> AsyncGenerator[str, None]:
|
|
720
923
|
"""
|
|
721
924
|
Wrapper around stream_openai_response that saves the assistant response after streaming.
|
|
@@ -731,6 +934,7 @@ async def stream_openai_response_with_save(
|
|
|
731
934
|
agent_schema: Agent schema name
|
|
732
935
|
session_id: Session ID for message storage
|
|
733
936
|
user_id: User ID for message storage
|
|
937
|
+
agent_context: Agent context for multi-agent propagation (enables child agents)
|
|
734
938
|
|
|
735
939
|
Yields:
|
|
736
940
|
SSE-formatted strings
|
|
@@ -763,6 +967,8 @@ async def stream_openai_response_with_save(
|
|
|
763
967
|
message_id=message_id,
|
|
764
968
|
trace_context_out=trace_context, # Pass container to capture trace IDs
|
|
765
969
|
tool_calls_out=tool_calls, # Capture tool calls for persistence
|
|
970
|
+
agent_context=agent_context, # Pass context for multi-agent support
|
|
971
|
+
message_history=message_history, # Native pydantic-ai message history
|
|
766
972
|
):
|
|
767
973
|
yield chunk
|
|
768
974
|
|
|
@@ -793,6 +999,8 @@ async def stream_openai_response_with_save(
|
|
|
793
999
|
|
|
794
1000
|
# First, store tool call messages (message_type: "tool")
|
|
795
1001
|
for tool_call in tool_calls:
|
|
1002
|
+
if not tool_call:
|
|
1003
|
+
continue
|
|
796
1004
|
tool_message = {
|
|
797
1005
|
"role": "tool",
|
|
798
1006
|
"content": json.dumps(tool_call.get("result", {}), default=str),
|
|
@@ -807,8 +1015,31 @@ async def stream_openai_response_with_save(
|
|
|
807
1015
|
messages_to_store.append(tool_message)
|
|
808
1016
|
|
|
809
1017
|
# Then store assistant text response (if any)
|
|
1018
|
+
# Priority: direct TextPartDelta content > tool call text_response
|
|
1019
|
+
# When an agent delegates via ask_agent, the child's text_response becomes
|
|
1020
|
+
# the parent's assistant response (the parent is just orchestrating)
|
|
1021
|
+
full_content = None
|
|
1022
|
+
|
|
810
1023
|
if accumulated_content:
|
|
811
1024
|
full_content = "".join(accumulated_content)
|
|
1025
|
+
else:
|
|
1026
|
+
# No direct text from TextPartDelta - check tool results for text_response
|
|
1027
|
+
# This handles multi-agent delegation where child agent output is the response
|
|
1028
|
+
for tool_call in tool_calls:
|
|
1029
|
+
if not tool_call:
|
|
1030
|
+
continue
|
|
1031
|
+
result = tool_call.get("result")
|
|
1032
|
+
if isinstance(result, dict) and result.get("text_response"):
|
|
1033
|
+
text_response = result["text_response"]
|
|
1034
|
+
if text_response and str(text_response).strip():
|
|
1035
|
+
full_content = str(text_response)
|
|
1036
|
+
logger.debug(
|
|
1037
|
+
f"Using text_response from {tool_call.get('tool_name', 'tool')} "
|
|
1038
|
+
f"({len(full_content)} chars) as assistant message"
|
|
1039
|
+
)
|
|
1040
|
+
break
|
|
1041
|
+
|
|
1042
|
+
if full_content:
|
|
812
1043
|
assistant_message = {
|
|
813
1044
|
"id": message_id, # Use pre-generated ID for consistency with metadata event
|
|
814
1045
|
"role": "assistant",
|
|
@@ -830,7 +1061,7 @@ async def stream_openai_response_with_save(
|
|
|
830
1061
|
)
|
|
831
1062
|
logger.debug(
|
|
832
1063
|
f"Saved {len(tool_calls)} tool calls and "
|
|
833
|
-
f"{'assistant response' if
|
|
1064
|
+
f"{'assistant response' if full_content else 'no text'} "
|
|
834
1065
|
f"to session {session_id}"
|
|
835
1066
|
)
|
|
836
1067
|
except Exception as e:
|
|
@@ -838,8 +1069,9 @@ async def stream_openai_response_with_save(
|
|
|
838
1069
|
|
|
839
1070
|
# Update session description with session_name (non-blocking, after all yields)
|
|
840
1071
|
for tool_call in tool_calls:
|
|
841
|
-
if tool_call.get("tool_name") == "register_metadata" and tool_call.get("is_metadata"):
|
|
842
|
-
|
|
1072
|
+
if tool_call and tool_call.get("tool_name") == "register_metadata" and tool_call.get("is_metadata"):
|
|
1073
|
+
arguments = tool_call.get("arguments") or {}
|
|
1074
|
+
session_name = arguments.get("session_name")
|
|
843
1075
|
if session_name:
|
|
844
1076
|
try:
|
|
845
1077
|
from ....models.entities import Session
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Common models shared across API routers.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ErrorResponse(BaseModel):
|
|
9
|
+
"""Standard error response format for HTTPException errors.
|
|
10
|
+
|
|
11
|
+
This is different from FastAPI's HTTPValidationError which is used
|
|
12
|
+
for Pydantic validation failures (422 errors with loc/msg/type array).
|
|
13
|
+
|
|
14
|
+
HTTPException errors return this simpler format:
|
|
15
|
+
{"detail": "Error message here"}
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
detail: str = Field(description="Error message describing what went wrong")
|
rem/api/routers/dev.py
CHANGED
|
@@ -11,6 +11,7 @@ Endpoints:
|
|
|
11
11
|
from fastapi import APIRouter, HTTPException, Request
|
|
12
12
|
from loguru import logger
|
|
13
13
|
|
|
14
|
+
from .common import ErrorResponse
|
|
14
15
|
from ...settings import settings
|
|
15
16
|
|
|
16
17
|
router = APIRouter(prefix="/api/dev", tags=["dev"])
|
|
@@ -45,7 +46,12 @@ def verify_dev_token(token: str) -> bool:
|
|
|
45
46
|
return token == expected
|
|
46
47
|
|
|
47
48
|
|
|
48
|
-
@router.get(
|
|
49
|
+
@router.get(
|
|
50
|
+
"/token",
|
|
51
|
+
responses={
|
|
52
|
+
401: {"model": ErrorResponse, "description": "Dev tokens not available in production"},
|
|
53
|
+
},
|
|
54
|
+
)
|
|
49
55
|
async def get_dev_token(request: Request):
|
|
50
56
|
"""
|
|
51
57
|
Get a development token for testing (non-production only).
|
rem/api/routers/feedback.py
CHANGED
|
@@ -63,6 +63,8 @@ from fastapi import APIRouter, Header, HTTPException, Request, Response
|
|
|
63
63
|
from loguru import logger
|
|
64
64
|
from pydantic import BaseModel, Field
|
|
65
65
|
|
|
66
|
+
from .common import ErrorResponse
|
|
67
|
+
|
|
66
68
|
from ..deps import get_user_id_from_request
|
|
67
69
|
from ...models.entities import Feedback
|
|
68
70
|
from ...services.postgres import Repository
|
|
@@ -121,7 +123,13 @@ class FeedbackResponse(BaseModel):
|
|
|
121
123
|
# =============================================================================
|
|
122
124
|
|
|
123
125
|
|
|
124
|
-
@router.post(
|
|
126
|
+
@router.post(
|
|
127
|
+
"/messages/feedback",
|
|
128
|
+
response_model=FeedbackResponse,
|
|
129
|
+
responses={
|
|
130
|
+
503: {"model": ErrorResponse, "description": "Database not enabled"},
|
|
131
|
+
},
|
|
132
|
+
)
|
|
125
133
|
async def submit_feedback(
|
|
126
134
|
request: Request,
|
|
127
135
|
response: Response,
|