PyPI - remdb - Versions diffs - 0.3.200__py3-none-any.whl → 0.3.226__py3-none-any.whl - Mend

remdb 0.3.200py3-none-any.whl → 0.3.226py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (36) hide show

rem/agentic/README.md +262 -2
rem/agentic/context.py +73 -1
rem/agentic/mcp/tool_wrapper.py +2 -2
rem/agentic/providers/pydantic_ai.py +1 -1
rem/agentic/schema.py +2 -2
rem/api/mcp_router/tools.py +154 -18
rem/api/routers/admin.py +30 -4
rem/api/routers/auth.py +106 -10
rem/api/routers/chat/completions.py +24 -29
rem/api/routers/chat/sse_events.py +5 -1
rem/api/routers/chat/streaming.py +163 -2
rem/api/routers/common.py +18 -0
rem/api/routers/dev.py +7 -1
rem/api/routers/feedback.py +9 -1
rem/api/routers/messages.py +80 -15
rem/api/routers/models.py +9 -1
rem/api/routers/query.py +12 -1
rem/api/routers/shared_sessions.py +16 -0
rem/auth/jwt.py +19 -4
rem/cli/commands/ask.py +61 -81
rem/cli/commands/process.py +3 -3
rem/models/entities/ontology.py +18 -20
rem/schemas/agents/rem.yaml +1 -1
rem/services/postgres/repository.py +14 -4
rem/services/session/__init__.py +2 -1
rem/services/session/compression.py +40 -2
rem/services/session/pydantic_messages.py +66 -0
rem/settings.py +28 -0
rem/sql/migrations/001_install.sql +13 -3
rem/sql/migrations/002_install_models.sql +20 -22
rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
rem/utils/schema_loader.py +73 -45
{remdb-0.3.200.dist-info → remdb-0.3.226.dist-info}/METADATA +1 -1
{remdb-0.3.200.dist-info → remdb-0.3.226.dist-info}/RECORD +36 -34
{remdb-0.3.200.dist-info → remdb-0.3.226.dist-info}/WHEEL +0 -0
{remdb-0.3.200.dist-info → remdb-0.3.226.dist-info}/entry_points.txt +0 -0

rem/services/session/__init__.py CHANGED Viewed

@@ -1,12 +1,13 @@
 """Session management services for conversation persistence and compression."""
 from .compression import MessageCompressor, SessionMessageStore
-from .pydantic_messages import session_to_pydantic_messages
+from .pydantic_messages import audit_session_history, session_to_pydantic_messages
 from .reload import reload_session
 __all__ = [
     "MessageCompressor",
     "SessionMessageStore",
+    "audit_session_history",
     "reload_session",
     "session_to_pydantic_messages",
 ]

rem/services/session/compression.py CHANGED Viewed

@@ -65,7 +65,7 @@ def truncate_key(key: str, max_length: int = MAX_ENTITY_KEY_LENGTH) -> str:
     logger.warning(f"Truncated key from {len(key)} to {len(truncated)} chars: {key[:50]}...")
     return truncated
-from rem.models.entities import Message
+from rem.models.entities import Message, Session
 from rem.services.postgres import PostgresService, Repository
 from rem.settings import settings
@@ -177,6 +177,39 @@ class SessionMessageStore:
         self.user_id = user_id
         self.compressor = compressor or MessageCompressor()
         self.repo = Repository(Message)
+        self._session_repo = Repository(Session, table_name="sessions")
+    async def _ensure_session_exists(
+        self,
+        session_id: str,
+        user_id: str | None = None,
+    ) -> None:
+        """
+        Ensure session exists, creating it if necessary.
+        Args:
+            session_id: Session UUID from X-Session-Id header
+            user_id: Optional user identifier
+        """
+        try:
+            # Check if session already exists by UUID
+            existing = await self._session_repo.get_by_id(session_id)
+            if existing:
+                return  # Session already exists
+            # Create new session with the provided UUID as id
+            session = Session(
+                id=session_id,  # Use the provided UUID as session id
+                name=session_id,  # Default name to UUID, can be updated later
+                user_id=user_id or self.user_id,
+                tenant_id=self.user_id,  # tenant_id set to user_id for scoping
+            )
+            await self._session_repo.upsert(session)
+            logger.info(f"Created session {session_id} for user {user_id or self.user_id}")
+        except Exception as e:
+            # Log but don't fail - session creation is best-effort
+            logger.warning(f"Failed to ensure session exists: {e}")
     async def store_message(
         self,
@@ -283,8 +316,10 @@ class SessionMessageStore:
         """
         Store all session messages and return compressed versions.
+        Ensures session exists before storing messages.
         Args:
-            session_id: Session identifier
+            session_id: Session UUID
             messages: List of messages to store
             user_id: Optional user identifier
             compress: Whether to compress messages (default: True)
@@ -296,6 +331,9 @@ class SessionMessageStore:
             logger.debug("Postgres disabled, returning messages uncompressed")
             return messages
+        # Ensure session exists before storing messages
+        await self._ensure_session_exists(session_id, user_id)
         compressed_messages = []
         for idx, message in enumerate(messages):

rem/services/session/pydantic_messages.py CHANGED Viewed

@@ -208,3 +208,69 @@ def session_to_pydantic_messages(
     logger.debug(f"Converted {len(session_history)} stored messages to {len(messages)} pydantic-ai messages")
     return messages
+def audit_session_history(
+    session_id: str,
+    agent_name: str,
+    prompt: str,
+    raw_session_history: list[dict[str, Any]],
+    pydantic_messages_count: int,
+) -> None:
+    """
+    Dump session history to a YAML file for debugging.
+    Only runs when DEBUG__AUDIT_SESSION=true. Writes to DEBUG__AUDIT_DIR (default /tmp).
+    Appends to the same file for a session, so all agent invocations are in one place.
+    Args:
+        session_id: The session identifier
+        agent_name: Name of the agent being invoked
+        prompt: The prompt being sent to the agent
+        raw_session_history: The raw session messages from the database
+        pydantic_messages_count: Count of converted pydantic-ai messages
+    """
+    from ...settings import settings
+    if not settings.debug.audit_session:
+        return
+    try:
+        import yaml
+        from pathlib import Path
+        from ...utils.date_utils import utc_now, to_iso
+        audit_dir = Path(settings.debug.audit_dir)
+        audit_dir.mkdir(parents=True, exist_ok=True)
+        audit_file = audit_dir / f"{session_id}.yaml"
+        # Create entry for this agent invocation
+        entry = {
+            "timestamp": to_iso(utc_now()),
+            "agent_name": agent_name,
+            "prompt": prompt,
+            "raw_history_count": len(raw_session_history),
+            "pydantic_messages_count": pydantic_messages_count,
+            "raw_session_history": raw_session_history,
+        }
+        # Load existing data or create new
+        existing_data: dict[str, Any] = {"session_id": session_id, "invocations": []}
+        if audit_file.exists():
+            with open(audit_file) as f:
+                loaded = yaml.safe_load(f)
+                if loaded:
+                    # Ensure session_id is always present (backfill if missing)
+                    existing_data = {
+                        "session_id": loaded.get("session_id", session_id),
+                        "invocations": loaded.get("invocations", []),
+                    }
+        # Append this invocation
+        existing_data["invocations"].append(entry)
+        with open(audit_file, "w") as f:
+            yaml.dump(existing_data, f, default_flow_style=False, allow_unicode=True)
+        logger.info(f"DEBUG: Session audit updated: {audit_file}")
+    except Exception as e:
+        logger.warning(f"DEBUG: Failed to dump session audit: {e}")

rem/settings.py CHANGED Viewed

@@ -1651,6 +1651,33 @@ class EmailSettings(BaseSettings):
         return kwargs
+class DebugSettings(BaseSettings):
+    """
+    Debug settings for development and troubleshooting.
+    Environment variables:
+        DEBUG__AUDIT_SESSION - Dump session history to /tmp/{session_id}.yaml
+        DEBUG__AUDIT_DIR - Directory for session audit files (default: /tmp)
+    """
+    model_config = SettingsConfigDict(
+        env_prefix="DEBUG__",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    audit_session: bool = Field(
+        default=False,
+        description="When true, dump full session history to audit files for debugging",
+    )
+    audit_dir: str = Field(
+        default="/tmp",
+        description="Directory for session audit files",
+    )
 class TestSettings(BaseSettings):
     """
     Test environment settings.
@@ -1767,6 +1794,7 @@ class Settings(BaseSettings):
     schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
     email: EmailSettings = Field(default_factory=EmailSettings)
     test: TestSettings = Field(default_factory=TestSettings)
+    debug: DebugSettings = Field(default_factory=DebugSettings)
 # Auto-load .env file from current directory if it exists

rem/sql/migrations/001_install.sql CHANGED Viewed

@@ -822,6 +822,7 @@ COMMENT ON FUNCTION fn_get_shared_messages IS
 -- Function to list sessions with user details (name, email) for admin views
 -- List sessions with user info, CTE pagination
+-- Note: messages.session_id stores the session UUID (sessions.id)
 CREATE OR REPLACE FUNCTION fn_list_sessions_with_user(
     p_user_id VARCHAR(256) DEFAULT NULL,  -- Filter by user_id (NULL = all users, admin only)
     p_user_name VARCHAR(256) DEFAULT NULL,  -- Filter by user name (partial match, admin only)
@@ -847,7 +848,15 @@ RETURNS TABLE(
 ) AS $$
 BEGIN
     RETURN QUERY
-    WITH filtered_sessions AS (
+    WITH session_msg_counts AS (
+        -- Count messages per session (joining on session UUID)
+        SELECT
+            m.session_id,
+            COUNT(*)::INTEGER as actual_message_count
+        FROM messages m
+        GROUP BY m.session_id
+    ),
+    filtered_sessions AS (
         SELECT
             s.id,
             s.name,
@@ -856,13 +865,14 @@ BEGIN
             s.user_id,
             COALESCE(u.name, s.user_id)::VARCHAR(256) AS user_name,
             u.email::VARCHAR(256) AS user_email,
-            s.message_count,
+            COALESCE(mc.actual_message_count, 0) AS message_count,
             s.total_tokens,
             s.created_at,
             s.updated_at,
             s.metadata
         FROM sessions s
         LEFT JOIN users u ON u.id::text = s.user_id
+        LEFT JOIN session_msg_counts mc ON mc.session_id = s.id::text
         WHERE s.deleted_at IS NULL
           AND (p_user_id IS NULL OR s.user_id = p_user_id)
           AND (p_user_name IS NULL OR u.name ILIKE '%' || p_user_name || '%')
@@ -895,7 +905,7 @@ END;
 $$ LANGUAGE plpgsql STABLE;
 COMMENT ON FUNCTION fn_list_sessions_with_user IS
-'List sessions with user details (name, email). Supports filtering by user_id, user_name, user_email, and mode.';
+'List sessions with user details and computed message counts. Joins messages on session name.';
 -- ============================================================================
 -- RECORD INSTALLATION

rem/sql/migrations/002_install_models.sql CHANGED Viewed

@@ -1,7 +1,7 @@
 -- REM Model Schema (install_models.sql)
 -- Generated from Pydantic models
 -- Source: model registry
--- Generated at: 2025-12-15T09:58:08.880060
+-- Generated at: 2025-12-22T17:34:54.187339
 --
 -- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
 --
@@ -2088,32 +2088,30 @@ Domain-specific knowledge - either agent-extracted or direct-loaded.
             tags=["cv", "engineering"]
         )
-        # Direct-loaded: Medical knowledge base from git
-        disorder_ontology = Ontology(
-            name="panic-disorder",
-            uri="git://bwolfson-siggie/Siggy-MVP/ontology/disorders/anxiety/panic-disorder.md",
-            content="# Panic Disorder\n\nPanic disorder is characterized by...",
+        # Direct-loaded: Knowledge base from git
+        api_docs = Ontology(
+            name="rest-api-guide",
+            uri="git://example-org/docs/api/rest-api-guide.md",
+            content="# REST API Guide\n\nThis guide covers RESTful API design...",
             extracted_data={
-                "type": "disorder",
-                "category": "anxiety",
-                "icd10": "F41.0",
-                "dsm5_criteria": ["A", "B", "C", "D"],
+                "type": "documentation",
+                "category": "api",
+                "version": "2.0",
             },
-            tags=["disorder", "anxiety", "dsm5"]
+            tags=["api", "rest", "documentation"]
         )
-        # Direct-loaded: Clinical procedure from git
-        scid_node = Ontology(
-            name="scid-5-f1",
-            uri="git://bwolfson-siggie/Siggy-MVP/ontology/procedures/scid-5/module-f/scid-5-f1.md",
-            content="# scid-5-f1: Panic Attack Screening\n\n...",
+        # Direct-loaded: Technical spec from git
+        config_spec = Ontology(
+            name="config-schema",
+            uri="git://example-org/docs/specs/config-schema.md",
+            content="# Configuration Schema\n\nThis document defines...",
             extracted_data={
-                "type": "procedure",
-                "module": "F",
-                "section": "Panic Disorder",
-                "dsm5_criterion": "Panic Attack Specifier",
+                "type": "specification",
+                "format": "yaml",
+                "version": "1.0",
             },
-            tags=["scid-5", "procedure", "anxiety"]
+            tags=["config", "schema", "specification"]
         )
@@ -2227,7 +2225,7 @@ This schema includes the `search_rem` tool which supports:
 - **Optional**
 ',
-    '{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n    Attributes:\n        name: Human-readable label for this ontology instance\n        uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n        file_id: Foreign key to File entity (optional - only for agent-extracted)\n        agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n        provider_name: LLM provider used for extraction (optional)\n        model_name: Specific model used (optional)\n        extracted_data: Structured data - either extracted by agent or parsed from source\n        confidence_score: Optional confidence score from extraction (0.0-1.0)\n        extraction_timestamp: When extraction was performed\n        content: Text used for generating embedding\n\n    Inherited from CoreModel:\n        id: UUID or string identifier\n        created_at: Entity creation timestamp\n        updated_at: Last update timestamp\n        deleted_at: Soft deletion timestamp\n        tenant_id: Multi-tenancy isolation\n        user_id: Ownership\n        graph_edges: Relationships to other entities\n        metadata: Flexible metadata storage\n        tags: Classification tags\n\n    Example Usage:\n        # Agent-extracted: CV parsing\n        cv_ontology = Ontology(\n            name=\"john-doe-cv-2024\",\n            file_id=\"file-uuid-123\",\n            agent_schema_id=\"cv-parser-v1\",\n            provider_name=\"anthropic\",\n            model_name=\"claude-sonnet-4-5-20250929\",\n            extracted_data={\n                \"candidate_name\": \"John Doe\",\n                \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n            },\n            confidence_score=0.95,\n            tags=[\"cv\", \"engineering\"]\n        )\n\n        # Direct-loaded: Medical knowledge base from git\n        disorder_ontology = Ontology(\n            name=\"panic-disorder\",\n            uri=\"git://bwolfson-siggie/Siggy-MVP/ontology/disorders/anxiety/panic-disorder.md\",\n            content=\"# Panic Disorder\\n\\nPanic disorder is characterized by...\",\n            extracted_data={\n                \"type\": \"disorder\",\n                \"category\": \"anxiety\",\n                \"icd10\": \"F41.0\",\n                \"dsm5_criteria\": [\"A\", \"B\", \"C\", \"D\"],\n            },\n            tags=[\"disorder\", \"anxiety\", \"dsm5\"]\n        )\n\n        # Direct-loaded: Clinical procedure from git\n        scid_node = Ontology(\n            name=\"scid-5-f1\",\n            uri=\"git://bwolfson-siggie/Siggy-MVP/ontology/procedures/scid-5/module-f/scid-5-f1.md\",\n            content=\"# scid-5-f1: Panic Attack Screening\\n\\n...\",\n            extracted_data={\n                \"type\": \"procedure\",\n                \"module\": \"F\",\n                \"section\": \"Panic Disorder\",\n                \"dsm5_criterion\": \"Panic Attack Specifier\",\n            },\n            tags=[\"scid-5\", \"procedure\", \"anxiety\"]\n        )\n    \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
+    '{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n    Attributes:\n        name: Human-readable label for this ontology instance\n        uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n        file_id: Foreign key to File entity (optional - only for agent-extracted)\n        agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n        provider_name: LLM provider used for extraction (optional)\n        model_name: Specific model used (optional)\n        extracted_data: Structured data - either extracted by agent or parsed from source\n        confidence_score: Optional confidence score from extraction (0.0-1.0)\n        extraction_timestamp: When extraction was performed\n        content: Text used for generating embedding\n\n    Inherited from CoreModel:\n        id: UUID or string identifier\n        created_at: Entity creation timestamp\n        updated_at: Last update timestamp\n        deleted_at: Soft deletion timestamp\n        tenant_id: Multi-tenancy isolation\n        user_id: Ownership\n        graph_edges: Relationships to other entities\n        metadata: Flexible metadata storage\n        tags: Classification tags\n\n    Example Usage:\n        # Agent-extracted: CV parsing\n        cv_ontology = Ontology(\n            name=\"john-doe-cv-2024\",\n            file_id=\"file-uuid-123\",\n            agent_schema_id=\"cv-parser-v1\",\n            provider_name=\"anthropic\",\n            model_name=\"claude-sonnet-4-5-20250929\",\n            extracted_data={\n                \"candidate_name\": \"John Doe\",\n                \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n            },\n            confidence_score=0.95,\n            tags=[\"cv\", \"engineering\"]\n        )\n\n        # Direct-loaded: Knowledge base from git\n        api_docs = Ontology(\n            name=\"rest-api-guide\",\n            uri=\"git://example-org/docs/api/rest-api-guide.md\",\n            content=\"# REST API Guide\\n\\nThis guide covers RESTful API design...\",\n            extracted_data={\n                \"type\": \"documentation\",\n                \"category\": \"api\",\n                \"version\": \"2.0\",\n            },\n            tags=[\"api\", \"rest\", \"documentation\"]\n        )\n\n        # Direct-loaded: Technical spec from git\n        config_spec = Ontology(\n            name=\"config-schema\",\n            uri=\"git://example-org/docs/specs/config-schema.md\",\n            content=\"# Configuration Schema\\n\\nThis document defines...\",\n            extracted_data={\n                \"type\": \"specification\",\n                \"format\": \"yaml\",\n                \"version\": \"1.0\",\n            },\n            tags=[\"config\", \"schema\", \"specification\"]\n        )\n    \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
     'entity',
     '{"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
 )

rem/sql/migrations/migrate_session_id_to_uuid.sql ADDED Viewed

@@ -0,0 +1,45 @@
+-- Migration: Update messages.session_id from session name to session UUID
+-- This fixes the bug where messages were stored with session.name instead of session.id
+--
+-- Run this migration AFTER deploying the code fixes in remdb 0.3.204+
+-- The code now correctly stores session.id (UUID), but existing data needs migration.
+BEGIN;
+-- First, count how many messages need to be updated
+DO $$
+DECLARE
+    count_to_migrate INTEGER;
+BEGIN
+    SELECT COUNT(*) INTO count_to_migrate
+    FROM messages m
+    JOIN sessions s ON m.session_id = s.name
+    WHERE m.session_id != s.id::text;
+    RAISE NOTICE 'Messages needing migration: %', count_to_migrate;
+END $$;
+-- Update messages.session_id from session name to session UUID
+UPDATE messages m
+SET session_id = s.id::text
+FROM sessions s
+WHERE m.session_id = s.name
+  AND m.session_id != s.id::text;
+-- Report how many were updated
+DO $$
+DECLARE
+    updated_count INTEGER;
+BEGIN
+    GET DIAGNOSTICS updated_count = ROW_COUNT;
+    RAISE NOTICE 'Messages updated: %', updated_count;
+END $$;
+COMMIT;
+-- Verify the fix - all messages should now join by UUID
+SELECT
+    'Messages matching sessions by UUID' as status,
+    COUNT(*) as count
+FROM messages m
+JOIN sessions s ON m.session_id = s.id::text;

rem/utils/schema_loader.py CHANGED Viewed

@@ -147,15 +147,25 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
             try:
                 await db.connect()
-                query = """
-                    SELECT spec FROM schemas
-                    WHERE LOWER(name) = LOWER($1)
-                    AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
-                    LIMIT 1
-                """
-                logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id}")
-                row = await db.fetchrow(query, schema_name, user_id)
+                # Query for public schemas (user_id IS NULL) and optionally user-specific
+                if user_id:
+                    query = """
+                        SELECT spec FROM schemas
+                        WHERE LOWER(name) = LOWER($1)
+                        AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
+                        LIMIT 1
+                    """
+                    row = await db.fetchrow(query, schema_name, user_id)
+                else:
+                    # No user_id - only search public schemas
+                    query = """
+                        SELECT spec FROM schemas
+                        WHERE LOWER(name) = LOWER($1)
+                        AND (user_id = 'system' OR user_id IS NULL)
+                        LIMIT 1
+                    """
+                    row = await db.fetchrow(query, schema_name)
+                logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
                 if row:
                     spec = row.get("spec")
@@ -193,17 +203,25 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
         try:
             await db.connect()
-            # Query schemas table directly by name
-            # Note: Schema name lookup is case-insensitive for user convenience
-            query = """
-                SELECT spec FROM schemas
-                WHERE LOWER(name) = LOWER($1)
-                AND (user_id = $2 OR user_id = 'system')
-                LIMIT 1
-            """
-            logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id}")
-            row = await db.fetchrow(query, schema_name, user_id)
+            # Query for public schemas (user_id IS NULL) and optionally user-specific
+            if user_id:
+                query = """
+                    SELECT spec FROM schemas
+                    WHERE LOWER(name) = LOWER($1)
+                    AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
+                    LIMIT 1
+                """
+                row = await db.fetchrow(query, schema_name, user_id)
+            else:
+                # No user_id - only search public schemas
+                query = """
+                    SELECT spec FROM schemas
+                    WHERE LOWER(name) = LOWER($1)
+                    AND (user_id = 'system' OR user_id IS NULL)
+                    LIMIT 1
+                """
+                row = await db.fetchrow(query, schema_name)
+            logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
             if row:
                 spec = row.get("spec")
@@ -365,13 +383,14 @@ def load_agent_schema(
             logger.debug(f"Could not load from {search_path}: {e}")
             continue
-    # 5. Try database LOOKUP fallback (if enabled and user_id provided)
-    if enable_db_fallback and user_id:
+    # 5. Try database LOOKUP fallback (if enabled)
+    # Always search for public schemas (user_id IS NULL), plus user-specific if user_id provided
+    if enable_db_fallback:
         try:
-            logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id})")
+            logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id or 'public'})")
             db_schema = _load_schema_from_database(base_name, user_id)
             if db_schema:
-                logger.info(f"✅ Loaded schema from database: {base_name} (user_id={user_id})")
+                logger.info(f"✅ Loaded schema from database: {base_name}")
                 return db_schema
         except Exception as e:
             logger.debug(f"Database schema lookup failed: {e}")
@@ -387,9 +406,9 @@ def load_agent_schema(
     db_search_note = ""
     if enable_db_fallback:
         if user_id:
-            db_search_note = f"\n  - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id='{user_id}' (no match)"
+            db_search_note = f"\n  - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('{user_id}', 'system', NULL) (no match)"
         else:
-            db_search_note = "\n  - Database: (skipped - no user_id provided)"
+            db_search_note = f"\n  - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('system', NULL) (no match)"
     raise FileNotFoundError(
         f"Schema not found: {schema_name_or_path}\n"
@@ -484,19 +503,19 @@ async def load_agent_schema_async(
         except Exception:
             continue
-    # Try database lookup
-    if user_id:
-        from rem.services.postgres import get_postgres_service
-        should_disconnect = False
-        if db is None:
-            db = get_postgres_service()
-            if db:
-                await db.connect()
-                should_disconnect = True
+    # Try database lookup - always search public schemas, plus user-specific if user_id provided
+    from rem.services.postgres import get_postgres_service
+    should_disconnect = False
+    if db is None:
+        db = get_postgres_service()
         if db:
-            try:
+            await db.connect()
+            should_disconnect = True
+    if db:
+        try:
+            if user_id:
                 query = """
                     SELECT spec FROM schemas
                     WHERE LOWER(name) = LOWER($1)
@@ -504,14 +523,23 @@ async def load_agent_schema_async(
                     LIMIT 1
                 """
                 row = await db.fetchrow(query, base_name, user_id)
-                if row:
-                    spec = row.get("spec")
-                    if spec and isinstance(spec, dict):
-                        logger.info(f"✅ Loaded schema from database: {base_name}")
-                        return spec
-            finally:
-                if should_disconnect:
-                    await db.disconnect()
+            else:
+                # No user_id - only search public schemas
+                query = """
+                    SELECT spec FROM schemas
+                    WHERE LOWER(name) = LOWER($1)
+                    AND (user_id = 'system' OR user_id IS NULL)
+                    LIMIT 1
+                """
+                row = await db.fetchrow(query, base_name)
+            if row:
+                spec = row.get("spec")
+                if spec and isinstance(spec, dict):
+                    logger.info(f"✅ Loaded schema from database: {base_name}")
+                    return spec
+        finally:
+            if should_disconnect:
+                await db.disconnect()
     # Not found
     raise FileNotFoundError(f"Schema not found: {schema_name_or_path}")

{remdb-0.3.200.dist-info → remdb-0.3.226.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: remdb
-Version: 0.3.200
+Version: 0.3.226
 Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
 Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
 Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md

remdb 0.3.200__py3-none-any.whl → 0.3.226__py3-none-any.whl

Potentially problematic release.

remdb 0.3.200py3-none-any.whl → 0.3.226py3-none-any.whl