PyPI - remdb - Versions diffs - 0.3.230__py3-none-any.whl → 0.3.258__py3-none-any.whl - Mend

remdb 0.3.230py3-none-any.whl → 0.3.258py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

rem/agentic/__init__.py +10 -1
rem/agentic/context.py +13 -2
rem/agentic/context_builder.py +45 -34
rem/agentic/providers/pydantic_ai.py +302 -110
rem/api/mcp_router/resources.py +223 -0
rem/api/mcp_router/tools.py +76 -10
rem/api/routers/auth.py +113 -10
rem/api/routers/chat/child_streaming.py +22 -8
rem/api/routers/chat/completions.py +3 -3
rem/api/routers/chat/sse_events.py +3 -3
rem/api/routers/chat/streaming.py +40 -45
rem/api/routers/chat/streaming_utils.py +5 -7
rem/api/routers/feedback.py +2 -2
rem/api/routers/query.py +5 -14
rem/cli/commands/ask.py +144 -33
rem/cli/commands/experiments.py +1 -1
rem/cli/commands/process.py +9 -1
rem/cli/commands/query.py +109 -0
rem/cli/commands/session.py +117 -0
rem/cli/main.py +2 -0
rem/models/core/experiment.py +1 -1
rem/models/entities/session.py +1 -0
rem/schemas/agents/core/agent-builder.yaml +1 -1
rem/schemas/agents/test_orchestrator.yaml +42 -0
rem/schemas/agents/test_structured_output.yaml +52 -0
rem/services/content/providers.py +151 -49
rem/services/postgres/repository.py +1 -0
rem/services/rem/README.md +4 -3
rem/services/rem/parser.py +7 -10
rem/services/rem/service.py +47 -0
rem/services/session/compression.py +7 -3
rem/services/session/pydantic_messages.py +25 -7
rem/services/session/reload.py +2 -1
rem/settings.py +64 -7
rem/sql/migrations/004_cache_system.sql +3 -1
rem/utils/schema_loader.py +135 -103
{remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/METADATA +6 -5
{remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/RECORD +40 -37
{remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
{remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0

rem/agentic/providers/pydantic_ai.py CHANGED Viewed

@@ -35,66 +35,41 @@ Unique Design:
     - Tools and resources loaded from MCP servers via schema config
     - Stripped descriptions to avoid LLM schema bloat
-TODO:
-    Model Cache Implementation (Critical for Production Scale)
-    Current bottleneck: Every agent.run() call creates a new Agent instance with
-    model initialization overhead. At scale (100+ requests/sec), this becomes expensive.
+Caching Implementation:
+    Agent instance caching is now implemented to reduce latency from repeated
+    agent creation. See the _agent_cache module-level variables and helpers.
-    Need two-tier caching strategy:
+    Cache Features:
+    - LRU eviction when max size (50) exceeded
+    - 5-minute TTL for cache entries
+    - Thread-safe via asyncio.Lock
+    - Cache key: hash(schema) + model + user_id
+    Usage:
+        # Normal usage (cache enabled by default)
+        agent = await create_agent(context, agent_schema_override=schema)
+        # Bypass cache for testing
+        agent = await create_agent(context, use_cache=False)
+        # Clear cache
+        await clear_agent_cache()  # Clear all
+        await clear_agent_cache("siggy")  # Clear specific schema
+        # Monitor cache
+        stats = get_agent_cache_stats()
+    Future Improvements:
     1. Schema Cache (see rem/utils/schema_loader.py TODO):
        - Filesystem schemas: LRU cache, no TTL (immutable)
        - Database schemas: TTL cache (5-15 min)
        - Reduces disk I/O and DB queries
-    2. Model Instance Cache (THIS TODO):
-       - Cache Pydantic AI Model() instances (connection pools, tokenizers)
-       - Key: (provider, model_name) → Model instance
-       - Benefits:
-         * Reuse HTTP connection pools (httpx.AsyncClient)
-         * Reuse tokenizer instances
-         * Faster model initialization
-         * Lower memory footprint
-       - Implementation:
-         ```python
-         _model_cache: dict[tuple[str, str], Model] = {}
-         def get_or_create_model(model_name: str) -> Model:
-             cache_key = _parse_model_name(model_name)  # ("anthropic", "claude-3-5-sonnet")
-             if cache_key not in _model_cache:
-                 _model_cache[cache_key] = Model(model_name)
-             return _model_cache[cache_key]
-         ```
-       - Considerations:
-         * Max cache size (LRU eviction, e.g., 20 models)
-         * Thread safety (asyncio.Lock for cache access)
-         * Model warmup on server startup for hot paths
-         * Clear cache on model config changes
-    3. Agent Instance Caching (Advanced):
-       - Cache complete Agent instances (model + schema + tools)
-       - Key: (schema_name, model_name) → Agent instance
-       - Benefits:
-         * Skip schema parsing and model creation entirely
-         * Fastest possible agent.run() latency
-       - Challenges:
-         * Agent state management (stateless required)
-         * Tool/resource updates (cache invalidation)
-         * Memory usage (agents are heavier than models)
-       - Recommendation: Start with Model cache, add Agent cache if profiling shows benefit
-    Profiling Targets (measure before optimizing):
-    - schema_loader.load_agent_schema() calls per request
-    - create_agent() execution time (model init overhead)
-    - Model() instance creation time by provider
-    - Agent.run() total latency breakdown
-    Related Files:
-    - rem/utils/schema_loader.py (schema caching TODO)
-    - rem/agentic/providers/pydantic_ai.py:339 (create_agent - this file)
-    - rem/services/schema_repository.py (database schema loading)
-    Priority: HIGH (blocks production scaling beyond 50 req/sec)
+    2. Model Instance Cache:
+       - Cache Pydantic AI Model() instances separately
+       - Would allow sharing models across different agent schemas
+    Priority: MEDIUM (agent cache handles the critical path)
     4. Response Format Control (structured_output enhancement):
        - Current: structured_output is bool (True=strict schema, False=free-form text)
@@ -147,6 +122,10 @@ Example Agent Schema:
 }
 """
+import asyncio
+import hashlib
+import json
+import time
 from typing import Any
 from loguru import logger
@@ -169,6 +148,120 @@ from ..context import AgentContext
 from ...settings import settings
+# =============================================================================
+# AGENT INSTANCE CACHE
+# =============================================================================
+# Caches AgentRuntime instances to avoid repeated MCP tool loading and agent
+# creation overhead. Cache key is based on schema content hash + model name.
+#
+# Design:
+# - LRU-style eviction when max size exceeded
+# - Optional TTL for cache entries
+# - Thread-safe via asyncio.Lock
+# - Cache can be cleared manually or on schema updates
+# =============================================================================
+_agent_cache: dict[str, tuple["AgentRuntime", float]] = {}  # key -> (agent, created_at)
+_agent_cache_lock = asyncio.Lock()
+_AGENT_CACHE_MAX_SIZE = 50  # Max cached agents
+_AGENT_CACHE_TTL_SECONDS = 300  # 5 minutes TTL (0 = no TTL)
+def _compute_cache_key(
+    agent_schema: dict[str, Any] | None,
+    model: str,
+    user_id: str | None,
+) -> str:
+    """
+    Compute cache key for an agent configuration.
+    Key components:
+    - Schema content hash (captures prompt + tools + output schema)
+    - Model name
+    - User ID (tools may be user-scoped)
+    """
+    # Hash the schema content for stable key
+    if agent_schema:
+        # Sort keys for deterministic hashing
+        schema_str = json.dumps(agent_schema, sort_keys=True)
+        schema_hash = hashlib.md5(schema_str.encode()).hexdigest()[:12]
+    else:
+        schema_hash = "no-schema"
+    user_part = user_id[:8] if user_id else "no-user"
+    return f"{schema_hash}:{model}:{user_part}"
+async def _get_cached_agent(cache_key: str) -> "AgentRuntime | None":
+    """Get agent from cache if exists and not expired."""
+    async with _agent_cache_lock:
+        if cache_key in _agent_cache:
+            agent, created_at = _agent_cache[cache_key]
+            # Check TTL
+            if _AGENT_CACHE_TTL_SECONDS > 0:
+                age = time.time() - created_at
+                if age > _AGENT_CACHE_TTL_SECONDS:
+                    del _agent_cache[cache_key]
+                    logger.debug(f"Agent cache expired: {cache_key} (age={age:.1f}s)")
+                    return None
+            logger.debug(f"Agent cache hit: {cache_key}")
+            return agent
+        return None
+async def _cache_agent(cache_key: str, agent: "AgentRuntime") -> None:
+    """Add agent to cache with LRU eviction."""
+    async with _agent_cache_lock:
+        # Evict oldest entries if at capacity
+        while len(_agent_cache) >= _AGENT_CACHE_MAX_SIZE:
+            # Find oldest entry
+            oldest_key = min(_agent_cache.keys(), key=lambda k: _agent_cache[k][1])
+            del _agent_cache[oldest_key]
+            logger.debug(f"Agent cache evicted: {oldest_key}")
+        _agent_cache[cache_key] = (agent, time.time())
+        logger.debug(f"Agent cached: {cache_key} (total={len(_agent_cache)})")
+async def clear_agent_cache(schema_name: str | None = None) -> int:
+    """
+    Clear agent cache entries.
+    Args:
+        schema_name: If provided, only clear entries for this schema.
+                    If None, clear entire cache.
+    Returns:
+        Number of entries cleared.
+    """
+    async with _agent_cache_lock:
+        if schema_name is None:
+            count = len(_agent_cache)
+            _agent_cache.clear()
+            logger.info(f"Agent cache cleared: {count} entries")
+            return count
+        else:
+            # Clear entries matching schema name (in the hash)
+            keys_to_remove = [k for k in _agent_cache if schema_name in k]
+            for k in keys_to_remove:
+                del _agent_cache[k]
+            logger.info(f"Agent cache cleared for '{schema_name}': {len(keys_to_remove)} entries")
+            return len(keys_to_remove)
+def get_agent_cache_stats() -> dict[str, Any]:
+    """Get cache statistics for monitoring."""
+    return {
+        "size": len(_agent_cache),
+        "max_size": _AGENT_CACHE_MAX_SIZE,
+        "ttl_seconds": _AGENT_CACHE_TTL_SECONDS,
+        "keys": list(_agent_cache.keys()),
+    }
 class AgentRuntime:
     """
     Agent runtime configuration bundle with delegation pattern.
@@ -349,6 +442,68 @@ def _prepare_schema_for_qwen(schema: dict[str, Any]) -> dict[str, Any]:
     return schema_copy
+def _render_schema_recursive(schema: dict[str, Any], indent: int = 0) -> list[str]:
+    """
+    Recursively render a JSON schema as YAML-like text with exact field names.
+    This ensures the LLM sees the actual field names (e.g., 'title', 'description')
+    for nested objects, not just high-level descriptions.
+    Args:
+        schema: JSON Schema dict (can be nested object, array, or primitive)
+        indent: Current indentation level
+    Returns:
+        List of lines representing the schema
+    """
+    lines = []
+    prefix = "  " * indent
+    schema_type = schema.get("type", "any")
+    if schema_type == "object":
+        props = schema.get("properties", {})
+        required = schema.get("required", [])
+        for field_name, field_def in props.items():
+            field_type = field_def.get("type", "any")
+            field_desc = field_def.get("description", "")
+            is_required = field_name in required
+            # Format field header
+            req_marker = " (required)" if is_required else ""
+            if field_type == "object":
+                lines.append(f"{prefix}{field_name}:{req_marker}")
+                if field_desc:
+                    lines.append(f"{prefix}  # {field_desc}")
+                # Recurse into nested object
+                nested_lines = _render_schema_recursive(field_def, indent + 1)
+                lines.extend(nested_lines)
+            elif field_type == "array":
+                items = field_def.get("items", {})
+                items_type = items.get("type", "any")
+                lines.append(f"{prefix}{field_name}: [{items_type}]{req_marker}")
+                if field_desc:
+                    lines.append(f"{prefix}  # {field_desc}")
+                # If array items are objects, show their structure
+                if items_type == "object":
+                    lines.append(f"{prefix}  # Each item has:")
+                    nested_lines = _render_schema_recursive(items, indent + 2)
+                    lines.extend(nested_lines)
+            else:
+                # Primitive type
+                enum_vals = field_def.get("enum")
+                if enum_vals:
+                    type_str = f"{field_type} (one of: {', '.join(str(v) for v in enum_vals)})"
+                else:
+                    type_str = field_type
+                lines.append(f"{prefix}{field_name}: {type_str}{req_marker}")
+                if field_desc:
+                    lines.append(f"{prefix}  # {field_desc}")
+    return lines
 def _convert_properties_to_prompt(properties: dict[str, Any]) -> str:
     """
     Convert schema properties to prompt guidance text.
@@ -357,56 +512,71 @@ def _convert_properties_to_prompt(properties: dict[str, Any]) -> str:
     definition into natural language guidance that informs the agent
     about the expected response structure without forcing JSON output.
+    CRITICAL: This function now recursively renders nested schemas so the LLM
+    can see exact field names (e.g., 'title' vs 'name' in treatment options).
     Args:
         properties: JSON Schema properties dict
     Returns:
         Prompt text describing the expected response elements
-    Example:
-        properties = {
-            "answer": {"type": "string", "description": "The answer"},
-            "confidence": {"type": "number", "description": "Confidence 0-1"}
-        }
-        # Returns:
-        # "## Response Structure\n\nYour response should include:\n- **answer**: The answer\n..."
     """
     if not properties:
         return ""
-    lines = ["## Response Guidelines", "", "Your response should address the following elements:"]
-    for field_name, field_def in properties.items():
-        field_type = field_def.get("type", "any")
-        description = field_def.get("description", "")
-        # Format based on type
-        if field_type == "array":
-            type_hint = "list"
-        elif field_type == "number":
-            type_hint = "number"
-            # Include min/max if specified
-            if "minimum" in field_def or "maximum" in field_def:
-                min_val = field_def.get("minimum", "")
-                max_val = field_def.get("maximum", "")
-                if min_val != "" and max_val != "":
-                    type_hint = f"number ({min_val}-{max_val})"
-        elif field_type == "boolean":
-            type_hint = "yes/no"
-        else:
-            type_hint = field_type
+    # Separate answer (output) from other fields (internal tracking)
+    answer_field = properties.get("answer")
+    internal_fields = {k: v for k, v in properties.items() if k != "answer"}
-        # Build field description
-        field_line = f"- **{field_name}**"
-        if type_hint and type_hint != "string":
-            field_line += f" ({type_hint})"
-        if description:
-            field_line += f": {description}"
+    lines = ["## Internal Thinking Structure (DO NOT output these labels)"]
+    lines.append("")
+    lines.append("Use this structure to organize your thinking, but ONLY output the answer content:")
+    lines.append("")
+    # If there's an answer field, emphasize it's the ONLY output
+    if answer_field:
+        answer_desc = answer_field.get("description", "Your response")
+        lines.append(f"**OUTPUT (what the user sees):** {answer_desc}")
+        lines.append("")
+    # Document internal fields with FULL recursive schema
+    if internal_fields:
+        lines.append("**INTERNAL (for your tracking only - do NOT include in output):**")
+        lines.append("")
+        lines.append("Schema (use these EXACT field names):")
+        lines.append("```yaml")
+        # Render each internal field recursively
+        for field_name, field_def in internal_fields.items():
+            field_type = field_def.get("type", "any")
+            field_desc = field_def.get("description", "")
+            if field_type == "object":
+                lines.append(f"{field_name}:")
+                if field_desc:
+                    lines.append(f"  # {field_desc}")
+                nested_lines = _render_schema_recursive(field_def, indent=1)
+                lines.extend(nested_lines)
+            elif field_type == "array":
+                items = field_def.get("items", {})
+                items_type = items.get("type", "any")
+                lines.append(f"{field_name}: [{items_type}]")
+                if field_desc:
+                    lines.append(f"  # {field_desc}")
+                if items_type == "object":
+                    lines.append(f"  # Each item has:")
+                    nested_lines = _render_schema_recursive(items, indent=2)
+                    lines.extend(nested_lines)
+            else:
+                lines.append(f"{field_name}: {field_type}")
+                if field_desc:
+                    lines.append(f"  # {field_desc}")
-        lines.append(field_line)
+        lines.append("```")
     lines.append("")
-    lines.append("Respond naturally in prose, addressing these elements where relevant.")
+    lines.append("⚠️ CRITICAL: Your response must be ONLY the conversational answer text.")
+    lines.append("Do NOT output field names like 'answer:' or 'diverge_output:' - just the response itself.")
     return "\n".join(lines)
@@ -509,6 +679,7 @@ async def create_agent(
     model_override: KnownModelName | Model | None = None,
     result_type: type[BaseModel] | None = None,
     strip_model_description: bool = True,
+    use_cache: bool = True,
 ) -> AgentRuntime:
     """
     Create agent from context with dynamic schema loading.
@@ -532,6 +703,7 @@ async def create_agent(
         model_override: Optional explicit model (bypasses context.default_model)
         result_type: Optional Pydantic model for structured output
         strip_model_description: If True, removes model docstring from LLM schema
+        use_cache: If True, use agent instance cache (default: True)
     Returns:
         Configured Pydantic.AI Agent with MCP tools
@@ -555,6 +727,9 @@ async def create_agent(
             agent_schema_override=schema,
             result_type=Output
         )
+        # Bypass cache for testing
+        agent = await create_agent(context, use_cache=False)
     """
     # Initialize OTEL instrumentation if enabled (idempotent)
     if settings.otel.enabled:
@@ -576,6 +751,17 @@ async def create_agent(
     default_model = context.default_model if context else settings.llm.default_model
     model = get_valid_model_or_default(model_override, default_model)
+    # Check cache first (if enabled and no custom result_type)
+    # Note: Custom result_type bypasses cache since it changes the agent's output schema
+    user_id = context.user_id if context else None
+    if use_cache and result_type is None:
+        cache_key = _compute_cache_key(agent_schema, str(model), user_id)
+        cached_agent = await _get_cached_agent(cache_key)
+        if cached_agent is not None:
+            return cached_agent
+    else:
+        cache_key = None
     # Extract schema fields using typed helpers
     from ..schema import get_system_prompt, get_metadata
@@ -664,26 +850,26 @@ async def create_agent(
         set_agent_resource_attributes(agent_schema=agent_schema)
-    # Extract schema metadata for search_rem tool description suffix
-    # This allows entity schemas to add context-specific notes to the search_rem tool
-    search_rem_suffix = None
-    if metadata:
-        # Check for default_search_table in metadata (set by entity schemas)
-        extra = agent_schema.get("json_schema_extra", {}) if agent_schema else {}
-        default_table = extra.get("default_search_table")
-        has_embeddings = extra.get("has_embeddings", False)
-        if default_table:
-            # Build description suffix for search_rem
-            search_rem_suffix = f"\n\nFor this schema, use `search_rem` to query `{default_table}`. "
-            if has_embeddings:
-                search_rem_suffix += f"SEARCH works well on {default_table} (has embeddings). "
-            search_rem_suffix += f"Example: `SEARCH \"your query\" FROM {default_table} LIMIT 10`"
     # Add tools from MCP server (in-process, no subprocess)
     # Track loaded MCP servers for resource resolution
     loaded_mcp_server = None
+    # Build map of tool_name → schema description from agent schema tools section
+    # This allows agent-specific tool guidance to override/augment MCP tool descriptions
+    schema_tool_descriptions: dict[str, str] = {}
+    tool_configs = metadata.tools if metadata and hasattr(metadata, 'tools') else []
+    for tool_config in tool_configs:
+        if hasattr(tool_config, 'name'):
+            t_name = tool_config.name
+            t_desc = tool_config.description or ""
+        else:
+            t_name = tool_config.get("name", "")
+            t_desc = tool_config.get("description", "")
+        # Skip resource URIs (handled separately below)
+        if t_name and "://" not in t_name and t_desc:
+            schema_tool_descriptions[t_name] = t_desc
+            logger.debug(f"Schema tool description for '{t_name}': {len(t_desc)} chars")
     for server_config in mcp_server_configs:
         server_type = server_config.get("type")
         server_id = server_config.get("id", "mcp-server")
@@ -708,8 +894,8 @@ async def create_agent(
                 mcp_tools_dict = await mcp_server.get_tools()
                 for tool_name, tool_func in mcp_tools_dict.items():
-                    # Add description suffix to search_rem tool if schema specifies a default table
-                    tool_suffix = search_rem_suffix if tool_name == "search_rem" else None
+                    # Get schema description suffix if agent schema defines one for this tool
+                    tool_suffix = schema_tool_descriptions.get(tool_name)
                     wrapped_tool = create_mcp_tool_wrapper(
                         tool_name,
@@ -718,7 +904,7 @@ async def create_agent(
                         description_suffix=tool_suffix,
                     )
                     tools.append(wrapped_tool)
-                    logger.debug(f"Loaded MCP tool: {tool_name}" + (" (with schema suffix)" if tool_suffix else ""))
+                    logger.debug(f"Loaded MCP tool: {tool_name}" + (" (with schema desc)" if tool_suffix else ""))
                 logger.info(f"Loaded {len(mcp_tools_dict)} tools from MCP server: {server_id} (in-process)")
@@ -830,8 +1016,14 @@ async def create_agent(
     #     from ..otel import set_agent_context_attributes
     #     set_agent_context_attributes(context)
-    return AgentRuntime(
+    agent_runtime = AgentRuntime(
         agent=agent,
         temperature=temperature,
         max_iterations=max_iterations,
     )
+    # Cache the agent if caching is enabled
+    if cache_key is not None:
+        await _cache_agent(cache_key, agent_runtime)
+    return agent_runtime

remdb 0.3.230__py3-none-any.whl → 0.3.258__py3-none-any.whl

remdb 0.3.230py3-none-any.whl → 0.3.258py3-none-any.whl