PyPI - remdb - Versions diffs - 0.3.103__py3-none-any.whl → 0.3.118__py3-none-any.whl - Mend

remdb 0.3.103py3-none-any.whl → 0.3.118py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (55) hide show

rem/agentic/context.py +28 -24
rem/agentic/mcp/tool_wrapper.py +29 -3
rem/agentic/otel/setup.py +92 -4
rem/agentic/providers/pydantic_ai.py +88 -18
rem/agentic/schema.py +358 -21
rem/agentic/tools/rem_tools.py +3 -3
rem/api/main.py +85 -16
rem/api/mcp_router/resources.py +1 -1
rem/api/mcp_router/server.py +18 -4
rem/api/mcp_router/tools.py +383 -16
rem/api/routers/admin.py +218 -1
rem/api/routers/chat/completions.py +30 -3
rem/api/routers/chat/streaming.py +143 -3
rem/api/routers/feedback.py +12 -319
rem/api/routers/query.py +360 -0
rem/api/routers/shared_sessions.py +13 -13
rem/cli/commands/README.md +237 -64
rem/cli/commands/cluster.py +1300 -0
rem/cli/commands/configure.py +1 -3
rem/cli/commands/db.py +354 -143
rem/cli/commands/process.py +14 -8
rem/cli/commands/schema.py +92 -45
rem/cli/main.py +27 -6
rem/models/core/rem_query.py +5 -2
rem/models/entities/shared_session.py +2 -28
rem/registry.py +10 -4
rem/services/content/service.py +30 -8
rem/services/embeddings/api.py +4 -4
rem/services/embeddings/worker.py +16 -16
rem/services/postgres/README.md +151 -26
rem/services/postgres/__init__.py +2 -1
rem/services/postgres/diff_service.py +531 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
rem/services/postgres/schema_generator.py +205 -4
rem/services/postgres/service.py +6 -6
rem/services/rem/parser.py +44 -9
rem/services/rem/service.py +36 -2
rem/services/session/reload.py +1 -1
rem/settings.py +56 -7
rem/sql/background_indexes.sql +19 -24
rem/sql/migrations/001_install.sql +252 -69
rem/sql/migrations/002_install_models.sql +2171 -593
rem/sql/migrations/003_optional_extensions.sql +326 -0
rem/sql/migrations/004_cache_system.sql +548 -0
rem/utils/__init__.py +18 -0
rem/utils/date_utils.py +2 -2
rem/utils/schema_loader.py +17 -13
rem/utils/sql_paths.py +146 -0
rem/workers/__init__.py +2 -1
rem/workers/unlogged_maintainer.py +463 -0
{remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/METADATA +149 -76
{remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/RECORD +54 -48
rem/sql/migrations/003_seed_default_user.sql +0 -48
{remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/WHEEL +0 -0
{remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0

rem/agentic/schema.py CHANGED Viewed

@@ -13,7 +13,7 @@ The schema protocol serves as:
 """
 from typing import Any, Literal
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 class MCPToolReference(BaseModel):
@@ -23,11 +23,21 @@ class MCPToolReference(BaseModel):
     Tools are functions that agents can call during execution to
     interact with external systems, retrieve data, or perform actions.
-    Example:
+    Two usage patterns:
+    1. With mcp_servers config: Just declare name + description, tools loaded from MCP servers
+    2. Explicit MCP server: Specify mcp_server to load tool from specific server
+    Example (declarative with mcp_servers):
+        {
+            "name": "search_rem",
+            "description": "Execute REM queries for entity lookup and search"
+        }
+    Example (explicit server):
         {
             "name": "lookup_entity",
             "mcp_server": "rem",
-            "description": "Lookup entities by exact key with O(1) performance"
+            "description": "Lookup entities by exact key"
         }
     """
@@ -38,20 +48,20 @@ class MCPToolReference(BaseModel):
         )
     )
-    mcp_server: str = Field(
+    mcp_server: str | None = Field(
+        default=None,
         description=(
-            "MCP server identifier. Resolved via environment variable: "
-            "MCP_SERVER_{NAME} or MCP__{NAME}__URL. "
-            "Common values: 'rem' (REM knowledge graph), 'filesystem', 'web'."
+            "MCP server identifier (optional when using mcp_servers config). "
+            "If not specified, tool is expected from configured mcp_servers. "
+            "Resolved via environment variable: MCP_SERVER_{NAME} or MCP__{NAME}__URL."
         )
     )
     description: str | None = Field(
         default=None,
         description=(
-            "Optional description override. If provided, replaces the tool's "
-            "description from the MCP server in the agent's context. "
-            "Use this to provide agent-specific guidance on tool usage."
+            "Tool description for the agent. Explains what the tool does "
+            "and when to use it. This is visible to the LLM."
         ),
     )
@@ -63,29 +73,90 @@ class MCPResourceReference(BaseModel):
     Resources are data sources that can be read by agents, such as
     knowledge graph entities, files, or API endpoints.
-    Example:
+    Two formats supported:
+    1. uri: Exact URI or URI with query params
+    2. uri_pattern: Regex pattern for flexible matching
+    Example (exact URI):
+        {
+            "uri": "rem://schemas",
+            "name": "Agent Schemas",
+            "description": "List all available agent schemas"
+        }
+    Example (pattern):
         {
             "uri_pattern": "rem://resources/.*",
             "mcp_server": "rem"
         }
     """
-    uri_pattern: str = Field(
+    # Support both exact URI and pattern
+    uri: str | None = Field(
+        default=None,
+        description=(
+            "Exact resource URI or URI with query parameters. "
+            "Examples: 'rem://schemas', 'rem://resources?category=drug.*'"
+        )
+    )
+    uri_pattern: str | None = Field(
+        default=None,
         description=(
             "Regex pattern matching resource URIs. "
-            "Examples: "
-            "'rem://resources/.*' (all resources), "
-            "'rem://moments/.*' (all moments), "
-            "'file:///data/.*' (local files). "
-            "Supports full regex syntax for flexible matching."
+            "Examples: 'rem://resources/.*' (all resources). "
+            "Use uri for exact URIs, uri_pattern for regex matching."
+        )
+    )
+    name: str | None = Field(
+        default=None,
+        description="Human-readable name for the resource."
+    )
+    description: str | None = Field(
+        default=None,
+        description="Description of what the resource provides."
+    )
+    mcp_server: str | None = Field(
+        default=None,
+        description=(
+            "MCP server identifier (optional when using mcp_servers config). "
+            "Resolved via environment variable MCP_SERVER_{NAME}."
+        )
+    )
+class MCPServerConfig(BaseModel):
+    """
+    MCP server configuration for in-process tool loading.
+    Example:
+        {
+            "type": "local",
+            "module": "rem.mcp_server",
+            "id": "rem-local"
+        }
+    """
+    type: Literal["local"] = Field(
+        default="local",
+        description="Server type. Currently only 'local' (in-process) is supported.",
+    )
+    module: str = Field(
+        description=(
+            "Python module path containing the MCP server. "
+            "The module must export an 'mcp' object that supports get_tools(). "
+            "Example: 'rem.mcp_server'"
         )
     )
-    mcp_server: str = Field(
+    id: str = Field(
         description=(
-            "MCP server identifier that provides these resources. "
-            "Resolved via environment variable MCP_SERVER_{NAME}. "
-            "The server must expose resources matching the uri_pattern."
+            "Server identifier for logging and debugging. "
+            "Example: 'rem-local'"
         )
     )
@@ -130,6 +201,37 @@ class AgentSchemaMetadata(BaseModel):
         ),
     )
+    # System prompt override (takes precedence over description when present)
+    system_prompt: str | None = Field(
+        default=None,
+        description=(
+            "Custom system prompt that overrides or extends the schema description. "
+            "When present, this is combined with the main schema.description field "
+            "to form the complete system prompt. Use this for detailed instructions "
+            "that you don't want in the public schema description."
+        ),
+    )
+    # Structured output toggle
+    structured_output: bool = Field(
+        default=True,
+        description=(
+            "Whether to enforce structured JSON output. "
+            "When False, the agent produces free-form text and schema properties "
+            "are converted to prompt guidance instead. Default: True (JSON output)."
+        ),
+    )
+    # MCP server configurations (for dynamic tool loading)
+    mcp_servers: list[MCPServerConfig] = Field(
+        default_factory=list,
+        description=(
+            "MCP server configurations for dynamic tool loading. "
+            "Servers are loaded in-process at agent creation time. "
+            "All tools from configured servers become available to the agent."
+        ),
+    )
     tools: list[MCPToolReference] = Field(
         default_factory=list,
         description=(
@@ -394,3 +496,238 @@ def create_agent_schema(
         json_schema_extra=metadata.model_dump(),
         **kwargs,
     )
+# =============================================================================
+# YAML and Database Serialization
+# =============================================================================
+def schema_to_dict(schema: AgentSchema, exclude_none: bool = True) -> dict[str, Any]:
+    """
+    Serialize AgentSchema to a dictionary suitable for YAML or database storage.
+    This produces the canonical format used in:
+    - YAML files (schemas/agents/*.yaml)
+    - Database spec column (schemas table)
+    - API responses
+    Args:
+        schema: AgentSchema instance to serialize
+        exclude_none: If True, omit None values from output
+    Returns:
+        Dictionary representation of the schema
+    Example:
+        >>> schema = AgentSchema(
+        ...     description="System prompt...",
+        ...     properties={"answer": {"type": "string"}},
+        ...     json_schema_extra={"name": "my-agent", "structured_output": False}
+        ... )
+        >>> d = schema_to_dict(schema)
+        >>> d["json_schema_extra"]["name"]
+        "my-agent"
+    """
+    return schema.model_dump(exclude_none=exclude_none)
+def schema_from_dict(data: dict[str, Any]) -> AgentSchema:
+    """
+    Deserialize a dictionary to AgentSchema.
+    This handles:
+    - YAML files loaded with yaml.safe_load()
+    - Database spec column (JSON)
+    - API request bodies
+    Args:
+        data: Dictionary containing schema data
+    Returns:
+        Validated AgentSchema instance
+    Raises:
+        ValidationError: If data doesn't match schema structure
+    Example:
+        >>> data = {"type": "object", "description": "...", "properties": {}, "json_schema_extra": {"name": "test"}}
+        >>> schema = schema_from_dict(data)
+        >>> schema.json_schema_extra["name"]
+        "test"
+    """
+    return AgentSchema.model_validate(data)
+def schema_to_yaml(schema: AgentSchema) -> str:
+    """
+    Serialize AgentSchema to YAML string.
+    The output format matches the canonical schema file format:
+    ```yaml
+    type: object
+    description: |
+      System prompt here...
+    properties:
+      answer:
+        type: string
+    json_schema_extra:
+      name: my-agent
+      system_prompt: |
+        Extended prompt here...
+    ```
+    Args:
+        schema: AgentSchema instance to serialize
+    Returns:
+        YAML string representation
+    Example:
+        >>> schema = create_agent_schema(
+        ...     description="You are a test agent",
+        ...     properties={"answer": {"type": "string"}},
+        ...     required=["answer"],
+        ...     name="test-agent"
+        ... )
+        >>> yaml_str = schema_to_yaml(schema)
+        >>> "test-agent" in yaml_str
+        True
+    """
+    import yaml
+    return yaml.dump(
+        schema_to_dict(schema),
+        default_flow_style=False,
+        allow_unicode=True,
+        sort_keys=False,
+    )
+def schema_from_yaml(yaml_content: str) -> AgentSchema:
+    """
+    Deserialize YAML string to AgentSchema.
+    Args:
+        yaml_content: YAML string containing schema definition
+    Returns:
+        Validated AgentSchema instance
+    Raises:
+        yaml.YAMLError: If YAML parsing fails
+        ValidationError: If schema structure is invalid
+    Example:
+        >>> yaml_str = '''
+        ... type: object
+        ... description: Test agent
+        ... properties:
+        ...   answer:
+        ...     type: string
+        ... json_schema_extra:
+        ...   name: test
+        ... '''
+        >>> schema = schema_from_yaml(yaml_str)
+        >>> schema.json_schema_extra["name"]
+        "test"
+    """
+    import yaml
+    data = yaml.safe_load(yaml_content)
+    return schema_from_dict(data)
+def schema_from_yaml_file(file_path: str) -> AgentSchema:
+    """
+    Load AgentSchema from a YAML file.
+    Args:
+        file_path: Path to YAML file
+    Returns:
+        Validated AgentSchema instance
+    Raises:
+        FileNotFoundError: If file doesn't exist
+        yaml.YAMLError: If YAML parsing fails
+        ValidationError: If schema structure is invalid
+    Example:
+        >>> schema = schema_from_yaml_file("schemas/agents/rem.yaml")
+        >>> schema.json_schema_extra["name"]
+        "rem"
+    """
+    with open(file_path, "r") as f:
+        return schema_from_yaml(f.read())
+def get_system_prompt(schema: AgentSchema | dict[str, Any]) -> str:
+    """
+    Extract the complete system prompt from a schema.
+    Combines:
+    1. schema.description (base system prompt / public description)
+    2. json_schema_extra.system_prompt (extended instructions if present)
+    Args:
+        schema: AgentSchema instance or raw dict
+    Returns:
+        Complete system prompt string
+    Example:
+        >>> schema = AgentSchema(
+        ...     description="Base description",
+        ...     properties={},
+        ...     json_schema_extra={"name": "test", "system_prompt": "Extended instructions"}
+        ... )
+        >>> prompt = get_system_prompt(schema)
+        >>> "Base description" in prompt and "Extended instructions" in prompt
+        True
+    """
+    if isinstance(schema, dict):
+        base = schema.get("description", "")
+        extra = schema.get("json_schema_extra", {})
+        custom = extra.get("system_prompt") if isinstance(extra, dict) else None
+    else:
+        base = schema.description
+        extra = schema.json_schema_extra
+        if isinstance(extra, dict):
+            custom = extra.get("system_prompt")
+        elif isinstance(extra, AgentSchemaMetadata):
+            custom = extra.system_prompt
+        else:
+            custom = None
+    if custom:
+        return f"{base}\n\n{custom}" if base else custom
+    return base
+def get_metadata(schema: AgentSchema | dict[str, Any]) -> AgentSchemaMetadata:
+    """
+    Extract and validate metadata from a schema.
+    Args:
+        schema: AgentSchema instance or raw dict
+    Returns:
+        Validated AgentSchemaMetadata instance
+    Example:
+        >>> schema = {"json_schema_extra": {"name": "test", "system_prompt": "hello"}}
+        >>> meta = get_metadata(schema)
+        >>> meta.name
+        "test"
+        >>> meta.system_prompt
+        "hello"
+    """
+    if isinstance(schema, dict):
+        extra = schema.get("json_schema_extra", {})
+    else:
+        extra = schema.json_schema_extra
+    if isinstance(extra, AgentSchemaMetadata):
+        return extra
+    return AgentSchemaMetadata.model_validate(extra)

rem/agentic/tools/rem_tools.py CHANGED Viewed

@@ -162,10 +162,10 @@ async def search_rem_tool(
             return {"status": "error", "error": f"Unknown query_type: {query_type}"}
         # Execute query
-        logger.info(f"Executing REM query: {query_type} for user {user_id}")
+        logger.debug(f"Executing REM query: {query_type} for user {user_id}")
         result = await rem_service.execute_query(query)
-        logger.info(f"Query completed: {query_type}")
+        logger.debug(f"Query completed: {query_type}")
         return {
             "status": "success",
             "query_type": query_type,
@@ -212,7 +212,7 @@ async def ingest_file_tool(
             is_local_server=is_local_server,
         )
-        logger.info(
+        logger.debug(
             f"File ingestion complete: {result['file_name']} "
             f"(status: {result['processing_status']}, "
             f"resources: {result['resources_created']})"

rem/api/main.py CHANGED Viewed

@@ -26,10 +26,10 @@ Endpoints:
 - /health                    : Health check
 - /api/v1/mcp                : MCP endpoint (HTTP transport)
 - /api/v1/chat/completions   : OpenAI-compatible chat completions (streaming & non-streaming)
-- /api/v1/query              : REM query execution (TODO)
+- /api/v1/query              : REM query execution (rem-dialect or natural-language)
 - /api/v1/resources          : Resource CRUD (TODO)
 - /api/v1/moments            : Moment CRUD (TODO)
-- /api/auth/*                : OAuth/OIDC authentication (TODO)
+- /api/auth/*                : OAuth/OIDC authentication
 - /docs                      : OpenAPI documentation
 Headers → AgentContext Mapping:
@@ -59,8 +59,16 @@ Running:
     hypercorn rem.api.main:app --bind 0.0.0.0:8000
 """
+import importlib.metadata
 import secrets
+import sys
 import time
+# Get package version for API responses
+try:
+    __version__ = importlib.metadata.version("remdb")
+except importlib.metadata.PackageNotFoundError:
+    __version__ = "0.0.0-dev"
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, Request
@@ -73,6 +81,23 @@ from starlette.middleware.sessions import SessionMiddleware
 from .mcp_router.server import create_mcp_server
 from ..settings import settings
+# Configure loguru based on settings
+# Remove default handler and add one with configured level
+logger.remove()
+# Configure level icons - only warnings and errors get visual indicators
+logger.level("DEBUG", icon=" ")
+logger.level("INFO", icon=" ")
+logger.level("WARNING", icon="🟠")
+logger.level("ERROR", icon="🔴")
+logger.level("CRITICAL", icon="🔴")
+logger.add(
+    sys.stderr,
+    level=settings.api.log_level.upper(),
+    format="<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | {level.icon} <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
+)
 class RequestLoggingMiddleware(BaseHTTPMiddleware):
     """
@@ -82,26 +107,64 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
     - Logs request method, path, client, user-agent
     - Logs response status, content-type, duration
     - Essential for debugging OAuth flow and MCP sessions
+    - Health checks and 404s logged at DEBUG level to reduce noise
+    - Scanner/exploit attempts (common vulnerability probes) logged at DEBUG
     """
+    # Paths to log at DEBUG level (health checks, probes)
+    DEBUG_PATHS = {"/health", "/healthz", "/ready", "/readyz", "/livez"}
+    # Path patterns that indicate vulnerability scanners (log at DEBUG)
+    SCANNER_PATTERNS = (
+        "/vendor/",      # PHP composer exploits
+        "/.git/",        # Git config exposure
+        "/.env",         # Environment file exposure
+        "/wp-",          # WordPress exploits
+        "/phpunit/",     # PHPUnit RCE
+        "/eval-stdin",   # PHP eval exploits
+        "/console/",     # Console exposure
+        "/actuator/",    # Spring Boot actuator
+        "/debug/",       # Debug endpoints
+        "/admin/",       # Admin panel probes (when we don't have one)
+    )
+    def _should_log_at_debug(self, path: str, status_code: int) -> bool:
+        """Determine if request should be logged at DEBUG level."""
+        # Health checks
+        if path in self.DEBUG_PATHS:
+            return True
+        # 404 responses (not found - includes scanner probes)
+        if status_code == 404:
+            return True
+        # Known scanner patterns
+        if any(pattern in path for pattern in self.SCANNER_PATTERNS):
+            return True
+        return False
     async def dispatch(self, request: Request, call_next):
         start_time = time.time()
+        path = request.url.path
-        # Log incoming request
+        # Log incoming request (preliminary - may adjust after response)
         client_host = request.client.host if request.client else "unknown"
-        logger.info(
-            f"→ REQUEST: {request.method} {request.url.path} | "
-            f"Client: {client_host} | "
-            f"User-Agent: {request.headers.get('user-agent', 'unknown')[:100]}"
-        )
+        user_agent = request.headers.get('user-agent', 'unknown')[:100]
         # Process request
         response = await call_next(request)
-        # Log response
+        # Determine log level based on path AND response status
         duration_ms = (time.time() - start_time) * 1000
-        logger.info(
-            f"← RESPONSE: {request.method} {request.url.path} | "
+        use_debug = self._should_log_at_debug(path, response.status_code)
+        log_fn = logger.debug if use_debug else logger.info
+        # Log request and response together
+        log_fn(
+            f"→ REQUEST: {request.method} {path} | "
+            f"Client: {client_host} | "
+            f"User-Agent: {user_agent}"
+        )
+        log_fn(
+            f"← RESPONSE: {request.method} {path} | "
             f"Status: {response.status_code} | "
             f"Duration: {duration_ms:.2f}ms"
         )
@@ -154,7 +217,8 @@ async def lifespan(app: FastAPI):
             "and history lookups are unavailable. Enable database with POSTGRES__ENABLED=true"
         )
     else:
-        logger.info(f"Database enabled: {settings.postgres.connection_string}")
+        # Log database host only - never log credentials
+        logger.info(f"Database enabled: {settings.postgres.host}:{settings.postgres.port}/{settings.postgres.database}")
     yield
@@ -216,7 +280,7 @@ def create_app() -> FastAPI:
     app = FastAPI(
         title=f"{settings.app_name} API",
         description=f"{settings.app_name} - Resources Entities Moments system for agentic AI",
-        version="0.1.0",
+        version=__version__,
         lifespan=combined_lifespan,
         root_path=settings.root_path if settings.root_path else "",
         redirect_slashes=False,  # Don't redirect /mcp/ -> /mcp
@@ -290,7 +354,7 @@ def create_app() -> FastAPI:
         # TODO: If auth enabled and no user, return 401 with WWW-Authenticate
         return {
             "name": f"{settings.app_name} API",
-            "version": "0.1.0",
+            "version": __version__,
             "mcp_endpoint": "/api/v1/mcp",
             "docs": "/docs",
         }
@@ -299,7 +363,7 @@ def create_app() -> FastAPI:
     @app.get("/health")
     async def health():
         """Health check endpoint."""
-        return {"status": "healthy", "version": "0.1.0"}
+        return {"status": "healthy", "version": __version__}
     # Register API routers
     from .routers.chat import router as chat_router
@@ -308,13 +372,18 @@ def create_app() -> FastAPI:
     from .routers.feedback import router as feedback_router
     from .routers.admin import router as admin_router
     from .routers.shared_sessions import router as shared_sessions_router
+    from .routers.query import router as query_router
     app.include_router(chat_router)
     app.include_router(models_router)
+    # shared_sessions_router MUST be before messages_router
+    # because messages_router has /sessions/{session_id} which would match
+    # before the more specific /sessions/shared-with-me routes
+    app.include_router(shared_sessions_router)
     app.include_router(messages_router)
     app.include_router(feedback_router)
     app.include_router(admin_router)
-    app.include_router(shared_sessions_router)
+    app.include_router(query_router)
     # Register auth router (if enabled)
     if settings.auth.enabled:

rem/api/mcp_router/resources.py CHANGED Viewed

@@ -181,7 +181,7 @@ Parameters:
 - table_name (required): Table to search (resources, moments, etc.)
 - field_name (optional): Field to search (defaults to "content")
 - provider (optional): Embedding provider (default: from LLM__EMBEDDING_PROVIDER setting)
-- min_similarity (optional): Minimum similarity 0.0-1.0 (default: 0.7)
+- min_similarity (optional): Minimum similarity 0.0-1.0 (default: 0.3)
 - limit (optional): Max results (default: 10)
 - user_id (optional): User scoping

remdb 0.3.103__py3-none-any.whl → 0.3.118__py3-none-any.whl

Potentially problematic release.

remdb 0.3.103py3-none-any.whl → 0.3.118py3-none-any.whl