PyPI - remdb - Versions diffs - 0.3.7__py3-none-any.whl → 0.3.133__py3-none-any.whl - Mend

remdb 0.3.7py3-none-any.whl → 0.3.133py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

rem/__init__.py +129 -2
rem/agentic/README.md +76 -0
rem/agentic/__init__.py +15 -0
rem/agentic/agents/__init__.py +16 -2
rem/agentic/agents/sse_simulator.py +502 -0
rem/agentic/context.py +51 -25
rem/agentic/llm_provider_models.py +301 -0
rem/agentic/mcp/tool_wrapper.py +112 -17
rem/agentic/otel/setup.py +93 -4
rem/agentic/providers/phoenix.py +314 -132
rem/agentic/providers/pydantic_ai.py +215 -26
rem/agentic/schema.py +361 -21
rem/agentic/tools/rem_tools.py +3 -3
rem/api/README.md +238 -1
rem/api/deps.py +255 -0
rem/api/main.py +154 -37
rem/api/mcp_router/resources.py +1 -1
rem/api/mcp_router/server.py +26 -5
rem/api/mcp_router/tools.py +465 -7
rem/api/middleware/tracking.py +172 -0
rem/api/routers/admin.py +494 -0
rem/api/routers/auth.py +124 -0
rem/api/routers/chat/completions.py +402 -20
rem/api/routers/chat/models.py +88 -10
rem/api/routers/chat/otel_utils.py +33 -0
rem/api/routers/chat/sse_events.py +542 -0
rem/api/routers/chat/streaming.py +642 -45
rem/api/routers/dev.py +81 -0
rem/api/routers/feedback.py +268 -0
rem/api/routers/messages.py +473 -0
rem/api/routers/models.py +78 -0
rem/api/routers/query.py +360 -0
rem/api/routers/shared_sessions.py +406 -0
rem/auth/middleware.py +126 -27
rem/cli/commands/README.md +237 -64
rem/cli/commands/ask.py +13 -10
rem/cli/commands/cluster.py +1808 -0
rem/cli/commands/configure.py +5 -6
rem/cli/commands/db.py +396 -139
rem/cli/commands/experiments.py +469 -74
rem/cli/commands/process.py +22 -15
rem/cli/commands/scaffold.py +47 -0
rem/cli/commands/schema.py +97 -50
rem/cli/main.py +29 -6
rem/config.py +10 -3
rem/models/core/core_model.py +7 -1
rem/models/core/experiment.py +54 -0
rem/models/core/rem_query.py +5 -2
rem/models/entities/__init__.py +21 -0
rem/models/entities/domain_resource.py +38 -0
rem/models/entities/feedback.py +123 -0
rem/models/entities/message.py +30 -1
rem/models/entities/session.py +83 -0
rem/models/entities/shared_session.py +180 -0
rem/models/entities/user.py +10 -3
rem/registry.py +373 -0
rem/schemas/agents/rem.yaml +7 -3
rem/services/content/providers.py +92 -133
rem/services/content/service.py +92 -20
rem/services/dreaming/affinity_service.py +2 -16
rem/services/dreaming/moment_service.py +2 -15
rem/services/embeddings/api.py +24 -17
rem/services/embeddings/worker.py +16 -16
rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
rem/services/phoenix/client.py +302 -28
rem/services/postgres/README.md +159 -15
rem/services/postgres/__init__.py +2 -1
rem/services/postgres/diff_service.py +531 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
rem/services/postgres/repository.py +132 -0
rem/services/postgres/schema_generator.py +291 -9
rem/services/postgres/service.py +6 -6
rem/services/rate_limit.py +113 -0
rem/services/rem/README.md +14 -0
rem/services/rem/parser.py +44 -9
rem/services/rem/service.py +36 -2
rem/services/session/compression.py +24 -1
rem/services/session/reload.py +1 -1
rem/services/user_service.py +98 -0
rem/settings.py +399 -29
rem/sql/background_indexes.sql +21 -16
rem/sql/migrations/001_install.sql +387 -54
rem/sql/migrations/002_install_models.sql +2320 -393
rem/sql/migrations/003_optional_extensions.sql +326 -0
rem/sql/migrations/004_cache_system.sql +548 -0
rem/utils/__init__.py +18 -0
rem/utils/constants.py +97 -0
rem/utils/date_utils.py +228 -0
rem/utils/embeddings.py +17 -4
rem/utils/files.py +167 -0
rem/utils/mime_types.py +158 -0
rem/utils/model_helpers.py +156 -1
rem/utils/schema_loader.py +282 -35
rem/utils/sql_paths.py +146 -0
rem/utils/sql_types.py +3 -1
rem/utils/vision.py +9 -14
rem/workers/README.md +14 -14
rem/workers/__init__.py +3 -1
rem/workers/db_listener.py +579 -0
rem/workers/db_maintainer.py +74 -0
rem/workers/unlogged_maintainer.py +463 -0
{remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/METADATA +460 -303
{remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/RECORD +105 -74
{remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
rem/sql/002_install_models.sql +0 -1068
rem/sql/install_models.sql +0 -1038
{remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0

rem/api/mcp_router/tools.py CHANGED Viewed

@@ -15,6 +15,9 @@ Available Tools:
 - ask_rem_agent: Natural language to REM query conversion via agent
 - ingest_into_rem: Full file ingestion pipeline (read + store + parse + chunk)
 - read_resource: Access MCP resources (for Claude Desktop compatibility)
+- register_metadata: Register response metadata for SSE MetadataEvent
+- list_schema: List all schemas (tables, agents) in the database with row counts
+- get_schema: Get detailed schema for a table (columns, types, indexes)
 """
 from functools import wraps
@@ -53,7 +56,7 @@ def init_services(postgres_service: PostgresService, rem_service: RemService):
     """
     _service_cache["postgres"] = postgres_service
     _service_cache["rem"] = rem_service
-    logger.info("MCP tools initialized with service instances")
+    logger.debug("MCP tools initialized with service instances")
 async def get_rem_service() -> RemService:
@@ -79,7 +82,7 @@ async def get_rem_service() -> RemService:
     _service_cache["postgres"] = postgres_service
     _service_cache["rem"] = rem_service
-    logger.info("MCP tools: lazy initialized services")
+    logger.debug("MCP tools: lazy initialized services")
     return rem_service
@@ -399,14 +402,14 @@ async def ask_rem_agent(
     )
     # Run agent (errors handled by decorator)
-    logger.info(f"Running ask_rem agent for query: {query[:100]}...")
+    logger.debug(f"Running ask_rem agent for query: {query[:100]}...")
     result = await agent_runtime.run(query)
     # Extract output
     from rem.agentic.serialization import serialize_agent_result
     query_output = serialize_agent_result(result.output)
-    logger.info("Agent execution completed successfully")
+    logger.debug("Agent execution completed successfully")
     return {
         "response": str(result.output),
@@ -422,6 +425,7 @@ async def ingest_into_rem(
     tags: list[str] | None = None,
     is_local_server: bool = False,
     user_id: str | None = None,
+    resource_type: str | None = None,
 ) -> dict[str, Any]:
     """
     Ingest file into REM, creating searchable resources and embeddings.
@@ -448,6 +452,11 @@ async def ingest_into_rem(
         tags: Optional tags for file
         is_local_server: True if running as local/stdio MCP server
         user_id: Optional user identifier (defaults to authenticated user or "default")
+        resource_type: Optional resource type for storing chunks (case-insensitive).
+            Supports flexible naming:
+            - "resource", "resources", "Resource" → Resource (default)
+            - "domain-resource", "domain_resource", "DomainResource",
+              "domain-resources" → DomainResource (curated internal knowledge)
     Returns:
         Dict with:
@@ -478,6 +487,13 @@ async def ingest_into_rem(
             file_uri="https://example.com/whitepaper.pdf",
             tags=["research", "whitepaper"]
         )
+        # Ingest as curated domain knowledge
+        ingest_into_rem(
+            file_uri="s3://bucket/internal/procedures.pdf",
+            resource_type="domain-resource",
+            category="procedures"
+        )
     """
     from ...services.content import ContentService
@@ -493,9 +509,10 @@ async def ingest_into_rem(
         category=category,
         tags=tags,
         is_local_server=is_local_server,
+        resource_type=resource_type,
     )
-    logger.info(
+    logger.debug(
         f"MCP ingestion complete: {result['file_name']} "
         f"(status: {result['processing_status']}, "
         f"resources: {result['resources_created']})"
@@ -550,7 +567,7 @@ async def read_resource(uri: str) -> dict[str, Any]:
         # Check system status
         read_resource(uri="rem://status")
     """
-    logger.info(f"📖 Reading resource: {uri}")
+    logger.debug(f"Reading resource: {uri}")
     # Import here to avoid circular dependency
     from .resources import load_resource
@@ -558,7 +575,7 @@ async def read_resource(uri: str) -> dict[str, Any]:
     # Load resource using the existing resource handler (errors handled by decorator)
     result = await load_resource(uri)
-    logger.info(f"✓ Resource loaded successfully: {uri}")
+    logger.debug(f"Resource loaded successfully: {uri}")
     # If result is already a dict, return it
     if isinstance(result, dict):
@@ -582,3 +599,444 @@ async def read_resource(uri: str) -> dict[str, Any]:
             "uri": uri,
             "data": {"content": result},
         }
+async def register_metadata(
+    confidence: float | None = None,
+    references: list[str] | None = None,
+    sources: list[str] | None = None,
+    flags: list[str] | None = None,
+    # Session naming
+    session_name: str | None = None,
+    # Risk assessment fields (used by specialized agents)
+    risk_level: str | None = None,
+    risk_score: int | None = None,
+    risk_reasoning: str | None = None,
+    recommended_action: str | None = None,
+    # Generic extension - any additional key-value pairs
+    extra: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """
+    Register response metadata to be emitted as an SSE MetadataEvent.
+    Call this tool BEFORE generating your final response to provide structured
+    metadata that will be sent to the client alongside your natural language output.
+    This allows you to stream conversational responses while still providing
+    machine-readable confidence scores, references, and other metadata.
+    **Design Pattern**: Agents can call this once before their final response to
+    register metadata that the streaming layer will emit as a MetadataEvent.
+    This decouples structured metadata from the response format.
+    Args:
+        confidence: Confidence score (0.0-1.0) for the response quality.
+            - 0.9-1.0: High confidence, answer is well-supported
+            - 0.7-0.9: Medium confidence, some uncertainty
+            - 0.5-0.7: Low confidence, significant gaps
+            - <0.5: Very uncertain, may need clarification
+        references: List of reference identifiers (file paths, document IDs,
+            entity labels) that support the response.
+        sources: List of source descriptions (e.g., "REM database",
+            "search results", "user context").
+        flags: Optional flags for the response (e.g., "needs_review",
+            "uncertain", "incomplete", "crisis_alert").
+        session_name: Short 1-3 phrase name describing the session topic.
+            Used by the UI to label conversations in the sidebar.
+            Examples: "Prescription Drug Questions", "AWS Setup Help",
+            "Python Code Review", "Travel Planning".
+        risk_level: Risk level indicator (e.g., "green", "orange", "red").
+            Used by mental health agents for C-SSRS style assessment.
+        risk_score: Numeric risk score (e.g., 0-6 for C-SSRS).
+        risk_reasoning: Brief explanation of risk assessment.
+        recommended_action: Suggested next steps based on assessment.
+        extra: Dict of arbitrary additional metadata. Use this for any
+            domain-specific fields not covered by the standard parameters.
+            Example: {"topics_detected": ["anxiety", "sleep"], "session_count": 5}
+    Returns:
+        Dict with:
+        - status: "success"
+        - _metadata_event: True (marker for streaming layer)
+        - All provided fields merged into response
+    Examples:
+        # High confidence answer with references
+        register_metadata(
+            confidence=0.95,
+            references=["sarah-chen", "q3-report-2024"],
+            sources=["REM database lookup"]
+        )
+        # Risk assessment example
+        register_metadata(
+            confidence=0.9,
+            risk_level="green",
+            risk_score=0,
+            risk_reasoning="No risk indicators detected in message",
+            sources=["mental_health_resources"]
+        )
+        # Orange risk with recommended action
+        register_metadata(
+            risk_level="orange",
+            risk_score=2,
+            risk_reasoning="Passive ideation detected - 'feeling hopeless'",
+            recommended_action="Schedule care team check-in within 24-48 hours",
+            flags=["care_team_alert"]
+        )
+        # Custom domain-specific metadata
+        register_metadata(
+            confidence=0.8,
+            extra={
+                "topics_detected": ["medication", "side_effects"],
+                "drug_mentioned": "sertraline",
+                "sentiment": "concerned"
+            }
+        )
+    """
+    logger.debug(
+        f"Registering metadata: confidence={confidence}, "
+        f"risk_level={risk_level}, refs={len(references or [])}, "
+        f"sources={len(sources or [])}"
+    )
+    result = {
+        "status": "success",
+        "_metadata_event": True,  # Marker for streaming layer
+        "confidence": confidence,
+        "references": references,
+        "sources": sources,
+        "flags": flags,
+    }
+    # Add session name if provided
+    if session_name is not None:
+        result["session_name"] = session_name
+    # Add risk assessment fields if provided
+    if risk_level is not None:
+        result["risk_level"] = risk_level
+    if risk_score is not None:
+        result["risk_score"] = risk_score
+    if risk_reasoning is not None:
+        result["risk_reasoning"] = risk_reasoning
+    if recommended_action is not None:
+        result["recommended_action"] = recommended_action
+    # Merge any extra fields
+    if extra:
+        result["extra"] = extra
+    return result
+@mcp_tool_error_handler
+async def list_schema(
+    include_system: bool = False,
+    user_id: str | None = None,
+) -> dict[str, Any]:
+    """
+    List all schemas (tables) in the REM database.
+    Returns metadata about all available tables including their names,
+    row counts, and descriptions. Use this to discover what data is
+    available before constructing queries.
+    Args:
+        include_system: If True, include PostgreSQL system tables (pg_*, information_schema).
+                       Default False shows only REM application tables.
+        user_id: Optional user identifier (defaults to authenticated user or "default")
+    Returns:
+        Dict with:
+        - status: "success" or "error"
+        - tables: List of table metadata dicts with:
+            - name: Table name
+            - schema: Schema name (usually "public")
+            - estimated_rows: Approximate row count
+            - description: Table comment if available
+    Examples:
+        # List all REM schemas
+        list_schema()
+        # Include system tables
+        list_schema(include_system=True)
+    """
+    rem_service = await get_rem_service()
+    user_id = AgentContext.get_user_id_or_default(user_id, source="list_schema")
+    # Query information_schema for tables
+    schema_filter = ""
+    if not include_system:
+        schema_filter = """
+            AND table_schema = 'public'
+            AND table_name NOT LIKE 'pg_%'
+            AND table_name NOT LIKE '_pg_%'
+        """
+    query = f"""
+        SELECT
+            t.table_schema,
+            t.table_name,
+            pg_catalog.obj_description(
+                (quote_ident(t.table_schema) || '.' || quote_ident(t.table_name))::regclass,
+                'pg_class'
+            ) as description,
+            (
+                SELECT reltuples::bigint
+                FROM pg_class c
+                JOIN pg_namespace n ON n.oid = c.relnamespace
+                WHERE c.relname = t.table_name
+                AND n.nspname = t.table_schema
+            ) as estimated_rows
+        FROM information_schema.tables t
+        WHERE t.table_type = 'BASE TABLE'
+        {schema_filter}
+        ORDER BY t.table_schema, t.table_name
+    """
+    # Access postgres service directly from cache
+    postgres_service = _service_cache.get("postgres")
+    if not postgres_service:
+        postgres_service = rem_service._postgres
+    rows = await postgres_service.fetch(query)
+    tables = []
+    for row in rows:
+        tables.append({
+            "name": row["table_name"],
+            "schema": row["table_schema"],
+            "estimated_rows": int(row["estimated_rows"]) if row["estimated_rows"] else 0,
+            "description": row["description"],
+        })
+    logger.info(f"Listed {len(tables)} schemas for user {user_id}")
+    return {
+        "tables": tables,
+        "count": len(tables),
+    }
+@mcp_tool_error_handler
+async def get_schema(
+    table_name: str,
+    include_indexes: bool = True,
+    include_constraints: bool = True,
+    columns: list[str] | None = None,
+    user_id: str | None = None,
+) -> dict[str, Any]:
+    """
+    Get detailed schema information for a specific table.
+    Returns column definitions, data types, constraints, and indexes.
+    Use this to understand table structure before writing SQL queries.
+    Args:
+        table_name: Name of the table to inspect (e.g., "resources", "moments")
+        include_indexes: Include index information (default True)
+        include_constraints: Include constraint information (default True)
+        columns: Optional list of specific columns to return. If None, returns all columns.
+        user_id: Optional user identifier (defaults to authenticated user or "default")
+    Returns:
+        Dict with:
+        - status: "success" or "error"
+        - table_name: Name of the table
+        - columns: List of column definitions with:
+            - name: Column name
+            - type: PostgreSQL data type
+            - nullable: Whether NULL is allowed
+            - default: Default value if any
+            - description: Column comment if available
+        - indexes: List of indexes (if include_indexes=True)
+        - constraints: List of constraints (if include_constraints=True)
+        - primary_key: Primary key column(s)
+    Examples:
+        # Get full schema for resources table
+        get_schema(table_name="resources")
+        # Get only specific columns
+        get_schema(
+            table_name="resources",
+            columns=["id", "name", "created_at"]
+        )
+        # Get schema without indexes
+        get_schema(
+            table_name="moments",
+            include_indexes=False
+        )
+    """
+    rem_service = await get_rem_service()
+    user_id = AgentContext.get_user_id_or_default(user_id, source="get_schema")
+    # Access postgres service
+    postgres_service = _service_cache.get("postgres")
+    if not postgres_service:
+        postgres_service = rem_service._postgres
+    # Verify table exists
+    exists_query = """
+        SELECT EXISTS (
+            SELECT 1 FROM information_schema.tables
+            WHERE table_schema = 'public' AND table_name = $1
+        )
+    """
+    exists = await postgres_service.fetchval(exists_query, table_name)
+    if not exists:
+        return {
+            "status": "error",
+            "error": f"Table '{table_name}' not found in public schema",
+        }
+    # Get columns
+    columns_filter = ""
+    if columns:
+        placeholders = ", ".join(f"${i+2}" for i in range(len(columns)))
+        columns_filter = f"AND column_name IN ({placeholders})"
+    columns_query = f"""
+        SELECT
+            c.column_name,
+            c.data_type,
+            c.udt_name,
+            c.is_nullable,
+            c.column_default,
+            c.character_maximum_length,
+            c.numeric_precision,
+            pg_catalog.col_description(
+                (quote_ident(c.table_schema) || '.' || quote_ident(c.table_name))::regclass,
+                c.ordinal_position
+            ) as description
+        FROM information_schema.columns c
+        WHERE c.table_schema = 'public'
+        AND c.table_name = $1
+        {columns_filter}
+        ORDER BY c.ordinal_position
+    """
+    params = [table_name]
+    if columns:
+        params.extend(columns)
+    column_rows = await postgres_service.fetch(columns_query, *params)
+    column_defs = []
+    for row in column_rows:
+        # Build a more readable type string
+        data_type = row["data_type"]
+        if row["character_maximum_length"]:
+            data_type = f"{data_type}({row['character_maximum_length']})"
+        elif row["udt_name"] in ("int4", "int8", "float4", "float8"):
+            # Use common type names
+            type_map = {"int4": "integer", "int8": "bigint", "float4": "real", "float8": "double precision"}
+            data_type = type_map.get(row["udt_name"], data_type)
+        elif row["udt_name"] == "vector":
+            data_type = "vector"
+        column_defs.append({
+            "name": row["column_name"],
+            "type": data_type,
+            "nullable": row["is_nullable"] == "YES",
+            "default": row["column_default"],
+            "description": row["description"],
+        })
+    result = {
+        "table_name": table_name,
+        "columns": column_defs,
+        "column_count": len(column_defs),
+    }
+    # Get primary key
+    pk_query = """
+        SELECT a.attname as column_name
+        FROM pg_index i
+        JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
+        WHERE i.indrelid = $1::regclass
+        AND i.indisprimary
+        ORDER BY array_position(i.indkey, a.attnum)
+    """
+    pk_rows = await postgres_service.fetch(pk_query, table_name)
+    result["primary_key"] = [row["column_name"] for row in pk_rows]
+    # Get indexes
+    if include_indexes:
+        indexes_query = """
+            SELECT
+                i.relname as index_name,
+                am.amname as index_type,
+                ix.indisunique as is_unique,
+                ix.indisprimary as is_primary,
+                array_agg(a.attname ORDER BY array_position(ix.indkey, a.attnum)) as columns
+            FROM pg_index ix
+            JOIN pg_class i ON i.oid = ix.indexrelid
+            JOIN pg_class t ON t.oid = ix.indrelid
+            JOIN pg_am am ON am.oid = i.relam
+            JOIN pg_attribute a ON a.attrelid = t.oid AND a.attnum = ANY(ix.indkey)
+            WHERE t.relname = $1
+            GROUP BY i.relname, am.amname, ix.indisunique, ix.indisprimary
+            ORDER BY i.relname
+        """
+        index_rows = await postgres_service.fetch(indexes_query, table_name)
+        result["indexes"] = [
+            {
+                "name": row["index_name"],
+                "type": row["index_type"],
+                "unique": row["is_unique"],
+                "primary": row["is_primary"],
+                "columns": row["columns"],
+            }
+            for row in index_rows
+        ]
+    # Get constraints
+    if include_constraints:
+        constraints_query = """
+            SELECT
+                con.conname as constraint_name,
+                con.contype as constraint_type,
+                array_agg(a.attname ORDER BY array_position(con.conkey, a.attnum)) as columns,
+                pg_get_constraintdef(con.oid) as definition
+            FROM pg_constraint con
+            JOIN pg_class t ON t.oid = con.conrelid
+            JOIN pg_attribute a ON a.attrelid = t.oid AND a.attnum = ANY(con.conkey)
+            WHERE t.relname = $1
+            GROUP BY con.conname, con.contype, con.oid
+            ORDER BY con.contype, con.conname
+        """
+        constraint_rows = await postgres_service.fetch(constraints_query, table_name)
+        # Map constraint types to readable names
+        type_map = {
+            "p": "PRIMARY KEY",
+            "u": "UNIQUE",
+            "f": "FOREIGN KEY",
+            "c": "CHECK",
+            "x": "EXCLUSION",
+        }
+        result["constraints"] = []
+        for row in constraint_rows:
+            # contype is returned as bytes (char type), decode it
+            con_type = row["constraint_type"]
+            if isinstance(con_type, bytes):
+                con_type = con_type.decode("utf-8")
+            result["constraints"].append({
+                "name": row["constraint_name"],
+                "type": type_map.get(con_type, con_type),
+                "columns": row["columns"],
+                "definition": row["definition"],
+            })
+    logger.info(f"Retrieved schema for table '{table_name}' with {len(column_defs)} columns")
+    return result

remdb 0.3.7__py3-none-any.whl → 0.3.133__py3-none-any.whl

remdb 0.3.7py3-none-any.whl → 0.3.133py3-none-any.whl