PyPI - remdb - Versions diffs - 0.3.226__py3-none-any.whl → 0.3.245__py3-none-any.whl - Mend

remdb 0.3.226py3-none-any.whl → 0.3.245py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (29) hide show

rem/agentic/README.md +22 -248
rem/agentic/context.py +13 -2
rem/agentic/context_builder.py +39 -33
rem/agentic/providers/pydantic_ai.py +67 -50
rem/api/mcp_router/resources.py +223 -0
rem/api/mcp_router/tools.py +25 -9
rem/api/routers/auth.py +112 -9
rem/api/routers/chat/child_streaming.py +394 -0
rem/api/routers/chat/streaming.py +166 -357
rem/api/routers/chat/streaming_utils.py +327 -0
rem/api/routers/query.py +5 -14
rem/cli/commands/ask.py +144 -33
rem/cli/commands/process.py +9 -1
rem/cli/commands/query.py +109 -0
rem/cli/commands/session.py +117 -0
rem/cli/main.py +2 -0
rem/models/entities/session.py +1 -0
rem/services/postgres/repository.py +7 -17
rem/services/rem/service.py +47 -0
rem/services/session/compression.py +7 -3
rem/services/session/pydantic_messages.py +45 -11
rem/services/session/reload.py +2 -1
rem/settings.py +43 -0
rem/sql/migrations/004_cache_system.sql +3 -1
rem/utils/schema_loader.py +99 -99
{remdb-0.3.226.dist-info → remdb-0.3.245.dist-info}/METADATA +2 -2
{remdb-0.3.226.dist-info → remdb-0.3.245.dist-info}/RECORD +29 -26
{remdb-0.3.226.dist-info → remdb-0.3.245.dist-info}/WHEEL +0 -0
{remdb-0.3.226.dist-info → remdb-0.3.245.dist-info}/entry_points.txt +0 -0

rem/cli/commands/query.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""
+REM query command.
+Usage:
+    rem query --sql 'LOOKUP "Sarah Chen"'
+    rem query --sql 'SEARCH resources "API design" LIMIT 10'
+    rem query --sql "SELECT * FROM resources LIMIT 5"
+    rem query --file queries/my_query.sql
+This tool connects to the configured PostgreSQL instance and executes the
+provided REM dialect query, printing results as JSON (default) or plain dicts.
+"""
+from __future__ import annotations
+import asyncio
+import json
+from pathlib import Path
+from typing import List
+import click
+from loguru import logger
+from ...services.rem import QueryExecutionError
+from ...services.rem.service import RemService
+@click.command("query")
+@click.option("--sql", "-s", default=None, help="REM query string (LOOKUP, SEARCH, FUZZY, TRAVERSE, or SQL)")
+@click.option(
+    "--file",
+    "-f",
+    "sql_file",
+    type=click.Path(exists=True, path_type=Path),
+    default=None,
+    help="Path to file containing REM query",
+)
+@click.option("--no-json", is_flag=True, default=False, help="Print rows as Python dicts instead of JSON")
+@click.option("--user-id", "-u", default=None, help="Scope query to a specific user")
+def query_command(sql: str | None, sql_file: Path | None, no_json: bool, user_id: str | None):
+    """
+    Execute a REM query against the database.
+    Supports REM dialect queries (LOOKUP, SEARCH, FUZZY, TRAVERSE) and raw SQL.
+    Either --sql or --file must be provided.
+    """
+    if not sql and not sql_file:
+        click.secho("Error: either --sql or --file is required", fg="red")
+        raise click.Abort()
+    # Read query from file if provided
+    if sql_file:
+        query_text = sql_file.read_text(encoding="utf-8")
+    else:
+        query_text = sql  # type: ignore[assignment]
+    try:
+        asyncio.run(_run_query_async(query_text, not no_json, user_id))
+    except Exception as exc:  # pragma: no cover - CLI error path
+        logger.exception("Query failed")
+        click.secho(f"✗ Query failed: {exc}", fg="red")
+        raise click.Abort()
+async def _run_query_async(query_text: str, as_json: bool, user_id: str | None) -> None:
+    """
+    Execute the query using RemService.execute_query_string().
+    """
+    from ...services.postgres import get_postgres_service
+    db = get_postgres_service()
+    if not db:
+        click.secho("✗ PostgreSQL is disabled in settings. Enable with POSTGRES__ENABLED=true", fg="red")
+        raise click.Abort()
+    if db.pool is None:
+        await db.connect()
+    rem_service = RemService(db)
+    try:
+        # Use the unified execute_query_string method
+        result = await rem_service.execute_query_string(query_text, user_id=user_id)
+        output_rows = result.get("results", [])
+    except QueryExecutionError as qe:
+        logger.exception("Query execution failed")
+        click.secho(f"✗ Query execution failed: {qe}. Please check the query you provided and try again.", fg="red")
+        raise click.Abort()
+    except ValueError as ve:
+        # Parse errors from the query parser
+        click.secho(f"✗ Invalid query: {ve}", fg="red")
+        raise click.Abort()
+    except Exception as exc:  # pragma: no cover - CLI error path
+        logger.exception("Unexpected error during query execution")
+        click.secho("✗ An unexpected error occurred while executing the query. Please check the query you provided and try again.", fg="red")
+        raise click.Abort()
+    if as_json:
+        click.echo(json.dumps(output_rows, default=str, indent=2))
+    else:
+        for r in output_rows:
+            click.echo(str(r))
+def register_command(cli_group):
+    """Register the query command on the given CLI group (top-level)."""
+    cli_group.add_command(query_command)

rem/cli/commands/session.py CHANGED Viewed

@@ -331,6 +331,123 @@ async def _show_async(
             raise
+@session.command("clone")
+@click.argument("session_id")
+@click.option("--to-turn", "-t", type=int, help="Clone up to turn N (counting user messages only)")
+@click.option("--name", "-n", help="Name/description for the cloned session")
+def clone(session_id: str, to_turn: int | None, name: str | None):
+    """
+    Clone a session for exploring alternate conversation paths.
+    SESSION_ID: The session ID to clone.
+    Examples:
+        # Clone entire session
+        rem session clone 810f1f2d-d5a1-4c02-83b6-67040b47f7c0
+        # Clone up to turn 3 (first 3 user messages and their responses)
+        rem session clone 810f1f2d-d5a1-4c02-83b6-67040b47f7c0 --to-turn 3
+        # Clone with a descriptive name
+        rem session clone 810f1f2d-d5a1-4c02-83b6-67040b47f7c0 -n "Alternate anxiety path"
+    """
+    asyncio.run(_clone_async(session_id, to_turn, name))
+async def _clone_async(
+    session_id: str,
+    to_turn: int | None,
+    name: str | None,
+):
+    """Async implementation of clone command."""
+    from uuid import uuid4
+    from ...models.entities.session import Session, SessionMode
+    pg = get_postgres_service()
+    if not pg:
+        logger.error("PostgreSQL not available")
+        return
+    await pg.connect()
+    try:
+        # Load original session messages
+        message_repo = Repository(Message, "messages", db=pg)
+        messages = await message_repo.find(
+            filters={"session_id": session_id},
+            order_by="created_at ASC",
+            limit=1000,
+        )
+        if not messages:
+            logger.error(f"No messages found for session {session_id}")
+            return
+        # If --to-turn specified, filter messages up to that turn (user messages)
+        if to_turn is not None:
+            user_count = 0
+            cutoff_idx = len(messages)
+            for idx, msg in enumerate(messages):
+                if msg.message_type == "user":
+                    user_count += 1
+                    if user_count > to_turn:
+                        cutoff_idx = idx
+                        break
+            messages = messages[:cutoff_idx]
+            logger.info(f"Cloning {len(messages)} messages (up to turn {to_turn})")
+        else:
+            logger.info(f"Cloning all {len(messages)} messages")
+        # Generate new session ID
+        new_session_id = str(uuid4())
+        # Get user_id and tenant_id from first message
+        first_msg = messages[0]
+        user_id = first_msg.user_id
+        tenant_id = first_msg.tenant_id or "default"
+        # Create Session record with CLONE mode and lineage
+        session_repo = Repository(Session, "sessions", db=pg)
+        new_session = Session(
+            id=uuid4(),
+            name=name or f"Clone of {session_id[:8]}",
+            mode=SessionMode.CLONE,
+            original_trace_id=session_id,
+            description=f"Cloned from session {session_id}" + (f" at turn {to_turn}" if to_turn else ""),
+            user_id=user_id,
+            tenant_id=tenant_id,
+            message_count=len(messages),
+        )
+        await session_repo.upsert(new_session)
+        logger.info(f"Created session record: {new_session.id}")
+        # Copy messages with new session_id
+        for msg in messages:
+            new_msg = Message(
+                id=uuid4(),
+                user_id=msg.user_id,
+                tenant_id=msg.tenant_id,
+                session_id=str(new_session.id),
+                content=msg.content,
+                message_type=msg.message_type,
+                metadata=msg.metadata,
+            )
+            await message_repo.upsert(new_msg)
+        click.echo(f"\n✅ Cloned session successfully!")
+        click.echo(f"   Original: {session_id}")
+        click.echo(f"   New:      {new_session.id}")
+        click.echo(f"   Messages: {len(messages)}")
+        if to_turn:
+            click.echo(f"   Turns:    {to_turn}")
+        click.echo(f"\nContinue this session with:")
+        click.echo(f"   rem ask <agent> \"your message\" --session-id {new_session.id}")
+    finally:
+        await pg.disconnect()
 def register_command(cli_group):
     """Register the session command group."""
     cli_group.add_command(session)

rem/cli/main.py CHANGED Viewed

@@ -97,6 +97,7 @@ from .commands.mcp import register_command as register_mcp_command
 from .commands.scaffold import scaffold as scaffold_command
 from .commands.cluster import register_commands as register_cluster_commands
 from .commands.session import register_command as register_session_command
+from .commands.query import register_command as register_query_command
 register_schema_commands(schema)
 register_db_commands(db)
@@ -107,6 +108,7 @@ register_ask_command(cli)
 register_configure_command(cli)
 register_serve_command(cli)
 register_mcp_command(cli)
+register_query_command(cli)
 cli.add_command(experiments_group)
 cli.add_command(scaffold_command)
 register_session_command(cli)

rem/models/entities/session.py CHANGED Viewed

@@ -21,6 +21,7 @@ class SessionMode(str, Enum):
     NORMAL = "normal"
     EVALUATION = "evaluation"
+    CLONE = "clone"
 class Session(CoreModel):

rem/services/postgres/repository.py CHANGED Viewed

@@ -31,27 +31,17 @@ if TYPE_CHECKING:
     from .service import PostgresService
-# Singleton instance for connection pool reuse
-_postgres_instance: "PostgresService | None" = None
 def get_postgres_service() -> "PostgresService | None":
     """
-    Get PostgresService singleton instance.
+    Get PostgresService singleton from parent module.
-    Returns None if Postgres is disabled.
-    Uses singleton pattern to prevent connection pool exhaustion.
+    Uses late import to avoid circular import issues.
+    Previously had a separate _postgres_instance here which caused
+    "pool not connected" errors due to duplicate connection pools.
     """
-    global _postgres_instance
-    if not settings.postgres.enabled:
-        return None
-    if _postgres_instance is None:
-        from .service import PostgresService
-        _postgres_instance = PostgresService()
-    return _postgres_instance
+    # Late import to avoid circular import (repository.py imported by __init__.py)
+    from rem.services.postgres import get_postgres_service as _get_singleton
+    return _get_singleton()
 T = TypeVar("T", bound=BaseModel)

rem/services/rem/service.py CHANGED Viewed

@@ -478,6 +478,53 @@ class RemService:
         parser = RemQueryParser()
         return parser.parse(query_string)
+    async def execute_query_string(
+        self, query_string: str, user_id: str | None = None
+    ) -> dict[str, Any]:
+        """
+        Execute a REM dialect query string directly.
+        This is the unified entry point for executing REM queries from both
+        the CLI and API. It handles parsing the query string, creating the
+        RemQuery model, and executing it.
+        Args:
+            query_string: REM dialect query (e.g., 'LOOKUP "Sarah Chen"',
+                         'SEARCH resources "API design"', 'SELECT * FROM users')
+            user_id: Optional user ID for query isolation
+        Returns:
+            Dict with query results and metadata:
+            - query_type: The type of query executed
+            - results: List of result rows
+            - count: Number of results
+            - Additional fields depending on query type
+        Raises:
+            ValueError: If the query string is invalid
+            QueryExecutionError: If query execution fails
+        Example:
+            >>> result = await rem_service.execute_query_string(
+            ...     'LOOKUP "Sarah Chen"',
+            ...     user_id="user-123"
+            ... )
+            >>> print(result["count"])
+            1
+        """
+        # Parse the query string into type and parameters
+        query_type, parameters = self._parse_query_string(query_string)
+        # Create and validate the RemQuery model
+        rem_query = RemQuery.model_validate({
+            "query_type": query_type,
+            "parameters": parameters,
+            "user_id": user_id,
+        })
+        # Execute and return results
+        return await self.execute_query(rem_query)
     async def ask_rem(
         self, natural_query: str, tenant_id: str, llm_model: str | None = None, plan_mode: bool = False
     ) -> dict[str, Any]:

rem/services/session/compression.py CHANGED Viewed

@@ -96,7 +96,7 @@ class MessageCompressor:
         Returns:
             Compressed message dict
         """
-        content = message.get("content", "")
+        content = message.get("content") or ""
         # Don't compress short messages or system messages
         if (
@@ -242,7 +242,7 @@ class SessionMessageStore:
         # Use pre-generated id from message dict if available (for frontend feedback)
         msg = Message(
             id=message.get("id"),  # Use pre-generated ID if provided
-            content=message.get("content", ""),
+            content=message.get("content") or "",
             message_type=message.get("role", "assistant"),
             session_id=session_id,
             tenant_id=self.user_id,  # Set tenant_id to user_id (application scoped to user)
@@ -337,7 +337,7 @@ class SessionMessageStore:
         compressed_messages = []
         for idx, message in enumerate(messages):
-            content = message.get("content", "")
+            content = message.get("content") or ""
             # Only store and compress long assistant responses
             if (
@@ -368,6 +368,8 @@ class SessionMessageStore:
                 }
                 # For tool messages, include tool call details in metadata
+                # Note: tool_arguments is stored only when provided (parent tool calls)
+                # For child tool calls (e.g., register_metadata), args are in content as JSON
                 if message.get("role") == "tool":
                     if message.get("tool_call_id"):
                         msg_metadata["tool_call_id"] = message.get("tool_call_id")
@@ -436,6 +438,8 @@ class SessionMessageStore:
                 }
                 # For tool messages, reconstruct tool call metadata
+                # Note: tool_arguments may be in metadata (parent calls) or parsed from
+                # content (child calls like register_metadata) by pydantic_messages.py
                 if role == "tool" and msg.metadata:
                     if msg.metadata.get("tool_call_id"):
                         msg_dict["tool_call_id"] = msg.metadata["tool_call_id"]

rem/services/session/pydantic_messages.py CHANGED Viewed

@@ -5,12 +5,16 @@ storage format into pydantic-ai's native ModelRequest/ModelResponse types.
 Key insight: When we store tool results, we only store the result (ToolReturnPart).
 But LLM APIs require matching ToolCallPart for each ToolReturnPart. So we synthesize
-the ToolCallPart from stored metadata (tool_name, tool_call_id, tool_arguments).
+the ToolCallPart from stored metadata (tool_name, tool_call_id) and arguments.
+Tool arguments can come from two places:
+- Parent tool calls (ask_agent): tool_arguments stored in metadata (content = result)
+- Child tool calls (register_metadata): arguments parsed from content (content = args as JSON)
 Storage format (our simplified format):
     {"role": "user", "content": "..."}
     {"role": "assistant", "content": "..."}
-    {"role": "tool", "content": "{...}", "tool_name": "...", "tool_call_id": "...", "tool_arguments": {...}}
+    {"role": "tool", "content": "{...}", "tool_name": "...", "tool_call_id": "...", "tool_arguments": {...}}  # optional
 Pydantic-ai format (what the LLM expects):
     ModelRequest(parts=[UserPromptPart(content="...")])
@@ -31,6 +35,7 @@ Example usage:
 """
 import json
+import re
 from typing import Any
 from loguru import logger
@@ -46,6 +51,15 @@ from pydantic_ai.messages import (
 )
+def _sanitize_tool_name(tool_name: str) -> str:
+    """Sanitize tool name for OpenAI API compatibility.
+    OpenAI requires tool names to match pattern: ^[a-zA-Z0-9_-]+$
+    This replaces invalid characters (like colons) with underscores.
+    """
+    return re.sub(r'[^a-zA-Z0-9_-]', '_', tool_name)
 def session_to_pydantic_messages(
     session_history: list[dict[str, Any]],
     system_prompt: str | None = None,
@@ -92,7 +106,7 @@ def session_to_pydantic_messages(
     while i < len(session_history):
         msg = session_history[i]
         role = msg.get("role", "")
-        content = msg.get("content", "")
+        content = msg.get("content") or ""
         if role == "user":
             # User messages become ModelRequest with UserPromptPart
@@ -110,8 +124,15 @@ def session_to_pydantic_messages(
                 tool_msg = session_history[j]
                 tool_name = tool_msg.get("tool_name", "unknown_tool")
                 tool_call_id = tool_msg.get("tool_call_id", f"call_{j}")
-                tool_arguments = tool_msg.get("tool_arguments", {})
-                tool_content = tool_msg.get("content", "{}")
+                tool_content = tool_msg.get("content") or "{}"
+                # tool_arguments: prefer explicit field, fallback to parsing content
+                tool_arguments = tool_msg.get("tool_arguments")
+                if tool_arguments is None and isinstance(tool_content, str) and tool_content:
+                    try:
+                        tool_arguments = json.loads(tool_content)
+                    except json.JSONDecodeError:
+                        tool_arguments = {}
                 # Parse tool content if it's a JSON string
                 if isinstance(tool_content, str):
@@ -122,16 +143,19 @@ def session_to_pydantic_messages(
                 else:
                     tool_result = tool_content
+                # Sanitize tool name for OpenAI API compatibility
+                safe_tool_name = _sanitize_tool_name(tool_name)
                 # Synthesize ToolCallPart (what the model "called")
                 tool_calls.append(ToolCallPart(
-                    tool_name=tool_name,
+                    tool_name=safe_tool_name,
                     args=tool_arguments if tool_arguments else {},
                     tool_call_id=tool_call_id,
                 ))
                 # Create ToolReturnPart (the actual result)
                 tool_returns.append(ToolReturnPart(
-                    tool_name=tool_name,
+                    tool_name=safe_tool_name,
                     content=tool_result,
                     tool_call_id=tool_call_id,
                 ))
@@ -166,8 +190,15 @@ def session_to_pydantic_messages(
             # Orphan tool message (no preceding assistant) - synthesize both parts
             tool_name = msg.get("tool_name", "unknown_tool")
             tool_call_id = msg.get("tool_call_id", f"call_{i}")
-            tool_arguments = msg.get("tool_arguments", {})
-            tool_content = msg.get("content", "{}")
+            tool_content = msg.get("content") or "{}"
+            # tool_arguments: prefer explicit field, fallback to parsing content
+            tool_arguments = msg.get("tool_arguments")
+            if tool_arguments is None and isinstance(tool_content, str) and tool_content:
+                try:
+                    tool_arguments = json.loads(tool_content)
+                except json.JSONDecodeError:
+                    tool_arguments = {}
             # Parse tool content
             if isinstance(tool_content, str):
@@ -178,10 +209,13 @@ def session_to_pydantic_messages(
             else:
                 tool_result = tool_content
+            # Sanitize tool name for OpenAI API compatibility
+            safe_tool_name = _sanitize_tool_name(tool_name)
             # Synthesize the tool call (ModelResponse with ToolCallPart)
             messages.append(ModelResponse(
                 parts=[ToolCallPart(
-                    tool_name=tool_name,
+                    tool_name=safe_tool_name,
                     args=tool_arguments if tool_arguments else {},
                     tool_call_id=tool_call_id,
                 )],
@@ -191,7 +225,7 @@ def session_to_pydantic_messages(
             # Add the tool return (ModelRequest with ToolReturnPart)
             messages.append(ModelRequest(
                 parts=[ToolReturnPart(
-                    tool_name=tool_name,
+                    tool_name=safe_tool_name,
                     content=tool_result,
                     tool_call_id=tool_call_id,
                 )]

rem/services/session/reload.py CHANGED Viewed

@@ -12,7 +12,8 @@ Design Pattern:
 Message Types on Reload:
 - user: Returned as-is
-- tool: Returned as-is with metadata (tool_call_id, tool_name, tool_arguments)
+- tool: Returned with metadata (tool_call_id, tool_name). tool_arguments may be in
+  metadata (parent calls) or parsed from content (child calls) by pydantic_messages.py
 - assistant: Compressed on load if long (>400 chars), with REM LOOKUP for recovery
 """

rem/settings.py CHANGED Viewed

@@ -424,6 +424,49 @@ class AuthSettings(BaseSettings):
     google: GoogleOAuthSettings = Field(default_factory=GoogleOAuthSettings)
     microsoft: MicrosoftOAuthSettings = Field(default_factory=MicrosoftOAuthSettings)
+    # Pre-approved login codes (bypass email verification)
+    # Format: comma-separated codes with prefix A=admin, B=normal user
+    # Example: "A12345,A67890,B11111,B22222"
+    preapproved_codes: str = Field(
+        default="",
+        description=(
+            "Comma-separated list of pre-approved login codes. "
+            "Prefix A = admin user, B = normal user. "
+            "Example: 'A12345,A67890,B11111'. "
+            "Users can login with these codes without email verification."
+        ),
+    )
+    def check_preapproved_code(self, code: str) -> dict | None:
+        """
+        Check if a code is in the pre-approved list.
+        Args:
+            code: The code to check (including prefix)
+        Returns:
+            Dict with 'role' key if valid, None if not found.
+            - A prefix -> role='admin'
+            - B prefix -> role='user'
+        """
+        if not self.preapproved_codes:
+            return None
+        codes = [c.strip().upper() for c in self.preapproved_codes.split(",") if c.strip()]
+        code_upper = code.strip().upper()
+        if code_upper not in codes:
+            return None
+        # Parse prefix to determine role
+        if code_upper.startswith("A"):
+            return {"role": "admin", "code": code_upper}
+        elif code_upper.startswith("B"):
+            return {"role": "user", "code": code_upper}
+        else:
+            # Unknown prefix, treat as user
+            return {"role": "user", "code": code_upper}
     @field_validator("session_secret", mode="before")
     @classmethod
     def generate_dev_secret(cls, v: str | None, info: ValidationInfo) -> str:

rem/sql/migrations/004_cache_system.sql CHANGED Viewed

@@ -64,9 +64,11 @@ CREATE OR REPLACE FUNCTION rem_kv_store_empty(p_user_id TEXT)
 RETURNS BOOLEAN AS $$
 BEGIN
     -- Quick existence check - very fast with index
+    -- Check for user-specific OR public (NULL user_id) entries
+    -- This ensures self-healing triggers correctly for public ontologies
     RETURN NOT EXISTS (
         SELECT 1 FROM kv_store
-        WHERE user_id = p_user_id
+        WHERE user_id = p_user_id OR user_id IS NULL
         LIMIT 1
     );
 END;

remdb 0.3.226__py3-none-any.whl → 0.3.245__py3-none-any.whl

Potentially problematic release.

remdb 0.3.226py3-none-any.whl → 0.3.245py3-none-any.whl