PyPI - remdb - Versions diffs - 0.3.172__py3-none-any.whl → 0.3.223__py3-none-any.whl - Mend

remdb 0.3.172py3-none-any.whl → 0.3.223py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (57) hide show

rem/agentic/README.md +262 -2
rem/agentic/context.py +173 -0
rem/agentic/context_builder.py +12 -2
rem/agentic/mcp/tool_wrapper.py +39 -16
rem/agentic/providers/pydantic_ai.py +46 -43
rem/agentic/schema.py +2 -2
rem/agentic/tools/rem_tools.py +11 -0
rem/api/main.py +1 -1
rem/api/mcp_router/resources.py +64 -8
rem/api/mcp_router/server.py +31 -24
rem/api/mcp_router/tools.py +621 -166
rem/api/routers/admin.py +30 -4
rem/api/routers/auth.py +114 -15
rem/api/routers/chat/completions.py +66 -18
rem/api/routers/chat/sse_events.py +7 -3
rem/api/routers/chat/streaming.py +254 -22
rem/api/routers/common.py +18 -0
rem/api/routers/dev.py +7 -1
rem/api/routers/feedback.py +9 -1
rem/api/routers/messages.py +176 -38
rem/api/routers/models.py +9 -1
rem/api/routers/query.py +12 -1
rem/api/routers/shared_sessions.py +16 -0
rem/auth/jwt.py +19 -4
rem/auth/middleware.py +42 -28
rem/cli/README.md +62 -0
rem/cli/commands/ask.py +1 -1
rem/cli/commands/db.py +148 -70
rem/cli/commands/process.py +171 -43
rem/models/entities/ontology.py +91 -101
rem/schemas/agents/rem.yaml +1 -1
rem/services/content/service.py +18 -5
rem/services/email/service.py +11 -2
rem/services/embeddings/worker.py +26 -12
rem/services/postgres/__init__.py +28 -3
rem/services/postgres/diff_service.py +57 -5
rem/services/postgres/programmable_diff_service.py +635 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
rem/services/postgres/register_type.py +12 -11
rem/services/postgres/repository.py +46 -25
rem/services/postgres/schema_generator.py +5 -5
rem/services/postgres/sql_builder.py +6 -5
rem/services/session/__init__.py +8 -1
rem/services/session/compression.py +40 -2
rem/services/session/pydantic_messages.py +276 -0
rem/settings.py +28 -0
rem/sql/background_indexes.sql +5 -0
rem/sql/migrations/001_install.sql +157 -10
rem/sql/migrations/002_install_models.sql +160 -132
rem/sql/migrations/004_cache_system.sql +7 -275
rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
rem/utils/model_helpers.py +101 -0
rem/utils/schema_loader.py +6 -6
{remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/METADATA +1 -1
{remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/RECORD +57 -53
{remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/WHEEL +0 -0
{remdb-0.3.172.dist-info → remdb-0.3.223.dist-info}/entry_points.txt +0 -0

rem/api/routers/chat/streaming.py CHANGED Viewed

@@ -15,6 +15,11 @@ Key Insight
 - Use PartEndEvent to detect tool completion
 - Use FunctionToolResultEvent to get tool results
+Multi-Agent Context Propagation:
+- AgentContext is set via agent_context_scope() before agent.iter()
+- Child agents (via ask_agent tool) can access parent context via get_current_context()
+- Context includes user_id, tenant_id, session_id, is_eval for proper scoping
 SSE Format (OpenAI-compatible):
     data: {"id": "chatcmpl-...", "choices": [{"delta": {"content": "..."}}]}\\n\\n
     data: [DONE]\\n\\n
@@ -28,10 +33,12 @@ Extended SSE Format (Custom Events):
 See sse_events.py for the full event type definitions.
 """
+from __future__ import annotations
 import json
 import time
 import uuid
-from typing import AsyncGenerator
+from typing import TYPE_CHECKING, AsyncGenerator
 from loguru import logger
 from pydantic_ai.agent import Agent
@@ -55,6 +62,7 @@ from .models import (
 )
 from .sse_events import (
     DoneEvent,
+    ErrorEvent,
     MetadataEvent,
     ProgressEvent,
     ReasoningEvent,
@@ -62,6 +70,9 @@ from .sse_events import (
     format_sse_event,
 )
+if TYPE_CHECKING:
+    from ....agentic.context import AgentContext
 async def stream_openai_response(
     agent: Agent,
@@ -79,6 +90,11 @@ async def stream_openai_response(
     # Mutable container to capture tool calls for persistence
     # Format: list of {"tool_name": str, "tool_id": str, "arguments": dict, "result": any}
     tool_calls_out: list | None = None,
+    # Agent context for multi-agent propagation
+    # When set, enables child agents to access parent context via get_current_context()
+    agent_context: "AgentContext | None" = None,
+    # Pydantic-ai native message history for proper tool call/return pairing
+    message_history: list | None = None,
 ) -> AsyncGenerator[str, None]:
     """
     Stream Pydantic AI agent responses with rich SSE events.
@@ -149,10 +165,29 @@ async def stream_openai_response(
     pending_tool_completions: list[tuple[str, str]] = []
     # Track if metadata was registered via register_metadata tool
     metadata_registered = False
+    # Track which agent is actually responding (may be child agent if delegated)
+    responding_agent: str | None = None
     # Track pending tool calls with full data for persistence
     # Maps tool_id -> {"tool_name": str, "tool_id": str, "arguments": dict}
     pending_tool_data: dict[str, dict] = {}
+    # Import context functions for multi-agent support
+    from ....agentic.context import set_current_context, set_event_sink
+    # Set up context for multi-agent propagation
+    # This allows child agents (via ask_agent tool) to access parent context
+    previous_context = None
+    if agent_context is not None:
+        from ....agentic.context import get_current_context
+        previous_context = get_current_context()
+        set_current_context(agent_context)
+    # Set up event sink for child agent event proxying
+    # Child agents (via ask_agent) will push their events here
+    import asyncio
+    child_event_sink: asyncio.Queue = asyncio.Queue()
+    set_event_sink(child_event_sink)
     try:
         # Emit initial progress event
         current_step = 1
@@ -164,7 +199,9 @@ async def stream_openai_response(
         ))
         # Use agent.iter() to get complete execution with tool calls
-        async with agent.iter(prompt) as agent_run:
+        # Pass message_history if available for proper tool call/return pairing
+        iter_kwargs = {"message_history": message_history} if message_history else {}
+        async with agent.iter(prompt, **iter_kwargs) as agent_run:
             # Capture trace context IMMEDIATELY inside agent execution
             # This is deterministic - it's the OTEL context from Pydantic AI instrumentation
             # NOT dependent on any AI-generated content
@@ -285,6 +322,12 @@ async def stream_openai_response(
                                         args_dict = event.part.args.args_dict
                                     elif isinstance(event.part.args, dict):
                                         args_dict = event.part.args
+                                    elif isinstance(event.part.args, str):
+                                        # Parse JSON string args (common with pydantic-ai)
+                                        try:
+                                            args_dict = json.loads(event.part.args)
+                                        except json.JSONDecodeError:
+                                            logger.warning(f"Failed to parse tool args as JSON: {event.part.args[:100]}")
                                 # Log tool call with key parameters
                                 if args_dict and tool_name == "search_rem":
@@ -330,8 +373,25 @@ async def stream_openai_response(
                             ):
                                 if event.index in active_tool_calls:
                                     tool_name, tool_id = active_tool_calls[event.index]
-                                    # Note: result comes from FunctionToolResultEvent below
-                                    # For now, mark as completed without result
+                                    # Extract full args from completed ToolCallPart
+                                    # (PartStartEvent only has empty/partial args during streaming)
+                                    args_dict = None
+                                    if event.part.args is not None:
+                                        if hasattr(event.part.args, 'args_dict'):
+                                            args_dict = event.part.args.args_dict
+                                        elif isinstance(event.part.args, dict):
+                                            args_dict = event.part.args
+                                        elif isinstance(event.part.args, str) and event.part.args:
+                                            try:
+                                                args_dict = json.loads(event.part.args)
+                                            except json.JSONDecodeError:
+                                                logger.warning(f"Failed to parse tool args: {event.part.args[:100]}")
+                                    # Update pending_tool_data with complete args
+                                    if tool_id in pending_tool_data:
+                                        pending_tool_data[tool_id]["arguments"] = args_dict
                                     del active_tool_calls[event.index]
                             # ============================================
@@ -367,6 +427,69 @@ async def stream_openai_response(
                 elif Agent.is_call_tools_node(node):
                     async with node.stream(agent_run.ctx) as tools_stream:
                         async for tool_event in tools_stream:
+                            # First, drain any child agent events that were pushed while tool was executing
+                            # This handles ask_agent streaming - child events are proxied here
+                            while not child_event_sink.empty():
+                                try:
+                                    child_event = child_event_sink.get_nowait()
+                                    event_type = child_event.get("type", "")
+                                    child_agent = child_event.get("agent_name", "child")
+                                    if event_type == "child_tool_start":
+                                        # Emit child tool start as a nested tool call
+                                        child_tool_id = f"call_{uuid.uuid4().hex[:8]}"
+                                        yield format_sse_event(ToolCallEvent(
+                                            tool_name=f"{child_agent}:{child_event.get('tool_name', 'tool')}",
+                                            tool_id=child_tool_id,
+                                            status="started",
+                                            arguments=child_event.get("arguments"),
+                                        ))
+                                    elif event_type == "child_content":
+                                        # Emit child content as assistant content
+                                        # Track which child agent is responding
+                                        responding_agent = child_agent
+                                        content = child_event.get("content", "")
+                                        if content:
+                                            content_chunk = ChatCompletionStreamResponse(
+                                                id=request_id,
+                                                created=created_at,
+                                                model=model,
+                                                choices=[
+                                                    ChatCompletionStreamChoice(
+                                                        index=0,
+                                                        delta=ChatCompletionMessageDelta(
+                                                            role="assistant" if is_first_chunk else None,
+                                                            content=content,
+                                                        ),
+                                                        finish_reason=None,
+                                                    )
+                                                ],
+                                            )
+                                            is_first_chunk = False
+                                            yield f"data: {content_chunk.model_dump_json()}\n\n"
+                                    elif event_type == "child_tool_result":
+                                        # Emit child tool completion
+                                        result = child_event.get("result", {})
+                                        # Emit metadata event for child agent if it registered metadata
+                                        if isinstance(result, dict) and result.get("_metadata_event"):
+                                            responding_agent = result.get("agent_schema") or responding_agent
+                                            yield format_sse_event(MetadataEvent(
+                                                message_id=message_id,
+                                                session_id=session_id,
+                                                agent_schema=agent_schema,
+                                                responding_agent=responding_agent,
+                                                confidence=result.get("confidence"),
+                                                extra={"risk_level": result.get("risk_level")} if result.get("risk_level") else None,
+                                            ))
+                                        yield format_sse_event(ToolCallEvent(
+                                            tool_name=f"{child_agent}:tool",
+                                            tool_id=f"call_{uuid.uuid4().hex[:8]}",
+                                            status="completed",
+                                            result=str(result)[:200] if result else None,
+                                        ))
+                                except Exception as e:
+                                    logger.warning(f"Error processing child event: {e}")
                             # Tool result event - emit completion
                             if isinstance(tool_event, FunctionToolResultEvent):
                                 # Get the tool name/id from the pending queue (FIFO)
@@ -399,6 +522,10 @@ async def stream_openai_response(
                                     registered_recommended_action = result_content.get("recommended_action")
                                     # Extra fields
                                     registered_extra = result_content.get("extra")
+                                    # Only set responding_agent if not already set by child events
+                                    # Child agents should take precedence - they're the actual responders
+                                    if not responding_agent:
+                                        responding_agent = result_content.get("agent_schema")
                                     logger.info(
                                         f"📊 Metadata registered: confidence={registered_confidence}, "
@@ -425,6 +552,7 @@ async def stream_openai_response(
                                         in_reply_to=in_reply_to,
                                         session_id=session_id,
                                         agent_schema=agent_schema,
+                                        responding_agent=responding_agent,
                                         session_name=registered_session_name,
                                         confidence=registered_confidence,
                                         sources=registered_sources,
@@ -434,6 +562,12 @@ async def stream_openai_response(
                                         hidden=False,
                                     ))
+                                # Get complete args from pending_tool_data BEFORE deleting
+                                # (captured at PartEndEvent with full args)
+                                completed_args = None
+                                if tool_id in pending_tool_data:
+                                    completed_args = pending_tool_data[tool_id].get("arguments")
                                 # Capture tool call with result for persistence
                                 # Special handling for register_metadata - always capture full data
                                 if tool_calls_out is not None and tool_id in pending_tool_data:
@@ -444,9 +578,18 @@ async def stream_openai_response(
                                     del pending_tool_data[tool_id]
                                 if not is_metadata_event:
+                                    # NOTE: text_response fallback is DISABLED
+                                    # Child agents now stream content via child_content events (above)
+                                    # which provides real-time streaming. The text_response in tool
+                                    # result would duplicate that content, so we skip it entirely.
                                     # Normal tool completion - emit ToolCallEvent
-                                    result_str = str(result_content)
-                                    result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
+                                    # For finalize_intake, send full result dict for frontend
+                                    if tool_name == "finalize_intake" and isinstance(result_content, dict):
+                                        result_for_sse = result_content
+                                    else:
+                                        result_str = str(result_content)
+                                        result_for_sse = result_str[:200] + "..." if len(result_str) > 200 else result_str
                                     # Log result count for search_rem
                                     if tool_name == "search_rem" and isinstance(result_content, dict):
@@ -477,7 +620,8 @@ async def stream_openai_response(
                                         tool_name=tool_name,
                                         tool_id=tool_id,
                                         status="completed",
-                                        result=result_summary
+                                        arguments=completed_args,
+                                        result=result_for_sse
                                     ))
                                 # Update progress after tool completion
@@ -561,6 +705,7 @@ async def stream_openai_response(
                 in_reply_to=in_reply_to,
                 session_id=session_id,
                 agent_schema=agent_schema,
+                responding_agent=responding_agent,
                 confidence=1.0,  # Default to 100% confidence
                 model_version=model,
                 latency_ms=latency_ms,
@@ -587,25 +732,79 @@ async def stream_openai_response(
     except Exception as e:
         import traceback
+        import re
         error_msg = str(e)
-        logger.error(f"Streaming error: {error_msg}")
-        logger.error(traceback.format_exc())
-        # Send error as final chunk
-        error_data = {
-            "error": {
-                "message": error_msg,
-                "type": "internal_error",
-                "code": "stream_error",
-            }
-        }
-        yield f"data: {json.dumps(error_data)}\n\n"
+        # Parse error details for better client handling
+        error_code = "stream_error"
+        error_details: dict = {}
+        recoverable = True
+        # Check for rate limit errors (OpenAI 429)
+        if "429" in error_msg or "rate_limit" in error_msg.lower() or "RateLimitError" in type(e).__name__:
+            error_code = "rate_limit_exceeded"
+            recoverable = True
+            # Extract retry-after time from error message
+            # Pattern: "Please try again in X.XXs" or "Please try again in Xs"
+            retry_match = re.search(r"try again in (\d+(?:\.\d+)?)\s*s", error_msg)
+            if retry_match:
+                retry_seconds = float(retry_match.group(1))
+                error_details["retry_after_seconds"] = retry_seconds
+                error_details["retry_after_ms"] = int(retry_seconds * 1000)
+            # Extract token usage info if available
+            used_match = re.search(r"Used (\d+)", error_msg)
+            limit_match = re.search(r"Limit (\d+)", error_msg)
+            requested_match = re.search(r"Requested (\d+)", error_msg)
+            if used_match:
+                error_details["tokens_used"] = int(used_match.group(1))
+            if limit_match:
+                error_details["tokens_limit"] = int(limit_match.group(1))
+            if requested_match:
+                error_details["tokens_requested"] = int(requested_match.group(1))
+            logger.error(f"🔴 Streaming error: status_code: 429, model_name: {model}, body: {error_msg[:200]}")
+        # Check for authentication errors
+        elif "401" in error_msg or "AuthenticationError" in type(e).__name__:
+            error_code = "authentication_error"
+            recoverable = False
+            logger.error(f"🔴 Streaming error: Authentication failed")
+        # Check for model not found / invalid model
+        elif "404" in error_msg or "model" in error_msg.lower() and "not found" in error_msg.lower():
+            error_code = "model_not_found"
+            recoverable = False
+            logger.error(f"🔴 Streaming error: Model not found")
+        # Generic error
+        else:
+            logger.error(f"🔴 Streaming error: {error_msg}")
+        logger.error(f"🔴 {traceback.format_exc()}")
+        # Emit proper ErrorEvent via SSE (with event: prefix for client parsing)
+        yield format_sse_event(ErrorEvent(
+            code=error_code,
+            message=error_msg,
+            details=error_details if error_details else None,
+            recoverable=recoverable,
+        ))
         # Emit done event with error reason
         yield format_sse_event(DoneEvent(reason="error"))
         yield "data: [DONE]\n\n"
+    finally:
+        # Clean up event sink for multi-agent streaming
+        set_event_sink(None)
+        # Restore previous context for multi-agent support
+        # This ensures nested agent calls don't pollute the parent's context
+        if agent_context is not None:
+            set_current_context(previous_context)
 async def stream_simulator_response(
     prompt: str,
@@ -716,6 +915,10 @@ async def stream_openai_response_with_save(
     agent_schema: str | None = None,
     session_id: str | None = None,
     user_id: str | None = None,
+    # Agent context for multi-agent propagation
+    agent_context: "AgentContext | None" = None,
+    # Pydantic-ai native message history for proper tool call/return pairing
+    message_history: list | None = None,
 ) -> AsyncGenerator[str, None]:
     """
     Wrapper around stream_openai_response that saves the assistant response after streaming.
@@ -731,6 +934,7 @@ async def stream_openai_response_with_save(
         agent_schema: Agent schema name
         session_id: Session ID for message storage
         user_id: User ID for message storage
+        agent_context: Agent context for multi-agent propagation (enables child agents)
     Yields:
         SSE-formatted strings
@@ -763,6 +967,8 @@ async def stream_openai_response_with_save(
         message_id=message_id,
         trace_context_out=trace_context,  # Pass container to capture trace IDs
         tool_calls_out=tool_calls,  # Capture tool calls for persistence
+        agent_context=agent_context,  # Pass context for multi-agent support
+        message_history=message_history,  # Native pydantic-ai message history
     ):
         yield chunk
@@ -793,6 +999,8 @@ async def stream_openai_response_with_save(
         # First, store tool call messages (message_type: "tool")
         for tool_call in tool_calls:
+            if not tool_call:
+                continue
             tool_message = {
                 "role": "tool",
                 "content": json.dumps(tool_call.get("result", {}), default=str),
@@ -807,8 +1015,31 @@ async def stream_openai_response_with_save(
             messages_to_store.append(tool_message)
         # Then store assistant text response (if any)
+        # Priority: direct TextPartDelta content > tool call text_response
+        # When an agent delegates via ask_agent, the child's text_response becomes
+        # the parent's assistant response (the parent is just orchestrating)
+        full_content = None
         if accumulated_content:
             full_content = "".join(accumulated_content)
+        else:
+            # No direct text from TextPartDelta - check tool results for text_response
+            # This handles multi-agent delegation where child agent output is the response
+            for tool_call in tool_calls:
+                if not tool_call:
+                    continue
+                result = tool_call.get("result")
+                if isinstance(result, dict) and result.get("text_response"):
+                    text_response = result["text_response"]
+                    if text_response and str(text_response).strip():
+                        full_content = str(text_response)
+                        logger.debug(
+                            f"Using text_response from {tool_call.get('tool_name', 'tool')} "
+                            f"({len(full_content)} chars) as assistant message"
+                        )
+                        break
+        if full_content:
             assistant_message = {
                 "id": message_id,  # Use pre-generated ID for consistency with metadata event
                 "role": "assistant",
@@ -830,7 +1061,7 @@ async def stream_openai_response_with_save(
                 )
                 logger.debug(
                     f"Saved {len(tool_calls)} tool calls and "
-                    f"{'assistant response' if accumulated_content else 'no text'} "
+                    f"{'assistant response' if full_content else 'no text'} "
                     f"to session {session_id}"
                 )
             except Exception as e:
@@ -838,8 +1069,9 @@ async def stream_openai_response_with_save(
         # Update session description with session_name (non-blocking, after all yields)
         for tool_call in tool_calls:
-            if tool_call.get("tool_name") == "register_metadata" and tool_call.get("is_metadata"):
-                session_name = tool_call.get("arguments", {}).get("session_name")
+            if tool_call and tool_call.get("tool_name") == "register_metadata" and tool_call.get("is_metadata"):
+                arguments = tool_call.get("arguments") or {}
+                session_name = arguments.get("session_name")
                 if session_name:
                     try:
                         from ....models.entities import Session

rem/api/routers/common.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""
+Common models shared across API routers.
+"""
+from pydantic import BaseModel, Field
+class ErrorResponse(BaseModel):
+    """Standard error response format for HTTPException errors.
+    This is different from FastAPI's HTTPValidationError which is used
+    for Pydantic validation failures (422 errors with loc/msg/type array).
+    HTTPException errors return this simpler format:
+        {"detail": "Error message here"}
+    """
+    detail: str = Field(description="Error message describing what went wrong")

rem/api/routers/dev.py CHANGED Viewed

@@ -11,6 +11,7 @@ Endpoints:
 from fastapi import APIRouter, HTTPException, Request
 from loguru import logger
+from .common import ErrorResponse
 from ...settings import settings
 router = APIRouter(prefix="/api/dev", tags=["dev"])
@@ -45,7 +46,12 @@ def verify_dev_token(token: str) -> bool:
     return token == expected
-@router.get("/token")
+@router.get(
+    "/token",
+    responses={
+        401: {"model": ErrorResponse, "description": "Dev tokens not available in production"},
+    },
+)
 async def get_dev_token(request: Request):
     """
     Get a development token for testing (non-production only).

rem/api/routers/feedback.py CHANGED Viewed

@@ -63,6 +63,8 @@ from fastapi import APIRouter, Header, HTTPException, Request, Response
 from loguru import logger
 from pydantic import BaseModel, Field
+from .common import ErrorResponse
 from ..deps import get_user_id_from_request
 from ...models.entities import Feedback
 from ...services.postgres import Repository
@@ -121,7 +123,13 @@ class FeedbackResponse(BaseModel):
 # =============================================================================
-@router.post("/messages/feedback", response_model=FeedbackResponse)
+@router.post(
+    "/messages/feedback",
+    response_model=FeedbackResponse,
+    responses={
+        503: {"model": ErrorResponse, "description": "Database not enabled"},
+    },
+)
 async def submit_feedback(
     request: Request,
     response: Response,

remdb 0.3.172__py3-none-any.whl → 0.3.223__py3-none-any.whl

Potentially problematic release.

remdb 0.3.172py3-none-any.whl → 0.3.223py3-none-any.whl