PyPI - remdb - Versions diffs - 0.3.230__py3-none-any.whl → 0.3.258__py3-none-any.whl - Mend

remdb 0.3.230py3-none-any.whl → 0.3.258py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

rem/agentic/__init__.py +10 -1
rem/agentic/context.py +13 -2
rem/agentic/context_builder.py +45 -34
rem/agentic/providers/pydantic_ai.py +302 -110
rem/api/mcp_router/resources.py +223 -0
rem/api/mcp_router/tools.py +76 -10
rem/api/routers/auth.py +113 -10
rem/api/routers/chat/child_streaming.py +22 -8
rem/api/routers/chat/completions.py +3 -3
rem/api/routers/chat/sse_events.py +3 -3
rem/api/routers/chat/streaming.py +40 -45
rem/api/routers/chat/streaming_utils.py +5 -7
rem/api/routers/feedback.py +2 -2
rem/api/routers/query.py +5 -14
rem/cli/commands/ask.py +144 -33
rem/cli/commands/experiments.py +1 -1
rem/cli/commands/process.py +9 -1
rem/cli/commands/query.py +109 -0
rem/cli/commands/session.py +117 -0
rem/cli/main.py +2 -0
rem/models/core/experiment.py +1 -1
rem/models/entities/session.py +1 -0
rem/schemas/agents/core/agent-builder.yaml +1 -1
rem/schemas/agents/test_orchestrator.yaml +42 -0
rem/schemas/agents/test_structured_output.yaml +52 -0
rem/services/content/providers.py +151 -49
rem/services/postgres/repository.py +1 -0
rem/services/rem/README.md +4 -3
rem/services/rem/parser.py +7 -10
rem/services/rem/service.py +47 -0
rem/services/session/compression.py +7 -3
rem/services/session/pydantic_messages.py +25 -7
rem/services/session/reload.py +2 -1
rem/settings.py +64 -7
rem/sql/migrations/004_cache_system.sql +3 -1
rem/utils/schema_loader.py +135 -103
{remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/METADATA +6 -5
{remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/RECORD +40 -37
{remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
{remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0

rem/services/session/compression.py CHANGED Viewed

@@ -96,7 +96,7 @@ class MessageCompressor:
         Returns:
             Compressed message dict
         """
-        content = message.get("content", "")
+        content = message.get("content") or ""
         # Don't compress short messages or system messages
         if (
@@ -242,7 +242,7 @@ class SessionMessageStore:
         # Use pre-generated id from message dict if available (for frontend feedback)
         msg = Message(
             id=message.get("id"),  # Use pre-generated ID if provided
-            content=message.get("content", ""),
+            content=message.get("content") or "",
             message_type=message.get("role", "assistant"),
             session_id=session_id,
             tenant_id=self.user_id,  # Set tenant_id to user_id (application scoped to user)
@@ -337,7 +337,7 @@ class SessionMessageStore:
         compressed_messages = []
         for idx, message in enumerate(messages):
-            content = message.get("content", "")
+            content = message.get("content") or ""
             # Only store and compress long assistant responses
             if (
@@ -368,6 +368,8 @@ class SessionMessageStore:
                 }
                 # For tool messages, include tool call details in metadata
+                # Note: tool_arguments is stored only when provided (parent tool calls)
+                # For child tool calls (e.g., register_metadata), args are in content as JSON
                 if message.get("role") == "tool":
                     if message.get("tool_call_id"):
                         msg_metadata["tool_call_id"] = message.get("tool_call_id")
@@ -436,6 +438,8 @@ class SessionMessageStore:
                 }
                 # For tool messages, reconstruct tool call metadata
+                # Note: tool_arguments may be in metadata (parent calls) or parsed from
+                # content (child calls like register_metadata) by pydantic_messages.py
                 if role == "tool" and msg.metadata:
                     if msg.metadata.get("tool_call_id"):
                         msg_dict["tool_call_id"] = msg.metadata["tool_call_id"]

rem/services/session/pydantic_messages.py CHANGED Viewed

@@ -5,12 +5,16 @@ storage format into pydantic-ai's native ModelRequest/ModelResponse types.
 Key insight: When we store tool results, we only store the result (ToolReturnPart).
 But LLM APIs require matching ToolCallPart for each ToolReturnPart. So we synthesize
-the ToolCallPart from stored metadata (tool_name, tool_call_id, tool_arguments).
+the ToolCallPart from stored metadata (tool_name, tool_call_id) and arguments.
+Tool arguments can come from two places:
+- Parent tool calls (ask_agent): tool_arguments stored in metadata (content = result)
+- Child tool calls (register_metadata): arguments parsed from content (content = args as JSON)
 Storage format (our simplified format):
     {"role": "user", "content": "..."}
     {"role": "assistant", "content": "..."}
-    {"role": "tool", "content": "{...}", "tool_name": "...", "tool_call_id": "...", "tool_arguments": {...}}
+    {"role": "tool", "content": "{...}", "tool_name": "...", "tool_call_id": "...", "tool_arguments": {...}}  # optional
 Pydantic-ai format (what the LLM expects):
     ModelRequest(parts=[UserPromptPart(content="...")])
@@ -102,7 +106,7 @@ def session_to_pydantic_messages(
     while i < len(session_history):
         msg = session_history[i]
         role = msg.get("role", "")
-        content = msg.get("content", "")
+        content = msg.get("content") or ""
         if role == "user":
             # User messages become ModelRequest with UserPromptPart
@@ -120,8 +124,15 @@ def session_to_pydantic_messages(
                 tool_msg = session_history[j]
                 tool_name = tool_msg.get("tool_name", "unknown_tool")
                 tool_call_id = tool_msg.get("tool_call_id", f"call_{j}")
-                tool_arguments = tool_msg.get("tool_arguments", {})
-                tool_content = tool_msg.get("content", "{}")
+                tool_content = tool_msg.get("content") or "{}"
+                # tool_arguments: prefer explicit field, fallback to parsing content
+                tool_arguments = tool_msg.get("tool_arguments")
+                if tool_arguments is None and isinstance(tool_content, str) and tool_content:
+                    try:
+                        tool_arguments = json.loads(tool_content)
+                    except json.JSONDecodeError:
+                        tool_arguments = {}
                 # Parse tool content if it's a JSON string
                 if isinstance(tool_content, str):
@@ -179,8 +190,15 @@ def session_to_pydantic_messages(
             # Orphan tool message (no preceding assistant) - synthesize both parts
             tool_name = msg.get("tool_name", "unknown_tool")
             tool_call_id = msg.get("tool_call_id", f"call_{i}")
-            tool_arguments = msg.get("tool_arguments", {})
-            tool_content = msg.get("content", "{}")
+            tool_content = msg.get("content") or "{}"
+            # tool_arguments: prefer explicit field, fallback to parsing content
+            tool_arguments = msg.get("tool_arguments")
+            if tool_arguments is None and isinstance(tool_content, str) and tool_content:
+                try:
+                    tool_arguments = json.loads(tool_content)
+                except json.JSONDecodeError:
+                    tool_arguments = {}
             # Parse tool content
             if isinstance(tool_content, str):

rem/services/session/reload.py CHANGED Viewed

@@ -12,7 +12,8 @@ Design Pattern:
 Message Types on Reload:
 - user: Returned as-is
-- tool: Returned as-is with metadata (tool_call_id, tool_name, tool_arguments)
+- tool: Returned with metadata (tool_call_id, tool_name). tool_arguments may be in
+  metadata (parent calls) or parsed from content (child calls) by pydantic_messages.py
 - assistant: Compressed on load if long (>400 chars), with REM LOOKUP for recovery
 """

rem/settings.py CHANGED Viewed

@@ -424,6 +424,49 @@ class AuthSettings(BaseSettings):
     google: GoogleOAuthSettings = Field(default_factory=GoogleOAuthSettings)
     microsoft: MicrosoftOAuthSettings = Field(default_factory=MicrosoftOAuthSettings)
+    # Pre-approved login codes (bypass email verification)
+    # Format: comma-separated codes with prefix A=admin, B=normal user
+    # Example: "A12345,A67890,B11111,B22222"
+    preapproved_codes: str = Field(
+        default="",
+        description=(
+            "Comma-separated list of pre-approved login codes. "
+            "Prefix A = admin user, B = normal user. "
+            "Example: 'A12345,A67890,B11111'. "
+            "Users can login with these codes without email verification."
+        ),
+    )
+    def check_preapproved_code(self, code: str) -> dict | None:
+        """
+        Check if a code is in the pre-approved list.
+        Args:
+            code: The code to check (including prefix)
+        Returns:
+            Dict with 'role' key if valid, None if not found.
+            - A prefix -> role='admin'
+            - B prefix -> role='user'
+        """
+        if not self.preapproved_codes:
+            return None
+        codes = [c.strip().upper() for c in self.preapproved_codes.split(",") if c.strip()]
+        code_upper = code.strip().upper()
+        if code_upper not in codes:
+            return None
+        # Parse prefix to determine role
+        if code_upper.startswith("A"):
+            return {"role": "admin", "code": code_upper}
+        elif code_upper.startswith("B"):
+            return {"role": "user", "code": code_upper}
+        else:
+            # Unknown prefix, treat as user
+            return {"role": "user", "code": code_upper}
     @field_validator("session_secret", mode="before")
     @classmethod
     def generate_dev_secret(cls, v: str | None, info: ValidationInfo) -> str:
@@ -722,7 +765,7 @@ class DataLakeSettings(BaseSettings):
         │       └── cpt/                # CPT codes
         └── calibration/                # Agent calibration
             ├── experiments/            # Experiment configs + results
-            │   └── {agent}/{task}/     # e.g., siggy/risk-assessment
+            │   └── {agent}/{task}/     # e.g., rem/risk-assessment
             └── datasets/               # Shared evaluation datasets
     Experiment Storage:
@@ -1598,7 +1641,7 @@ class EmailSettings(BaseSettings):
             "Existing users can always login regardless of domain. "
             "New users must have an email from a trusted domain. "
             "Empty string means all domains are allowed. "
-            "Example: 'siggymd.ai,example.com'"
+            "Example: 'mycompany.com,example.com'"
         ),
     )
@@ -1797,14 +1840,28 @@ class Settings(BaseSettings):
     debug: DebugSettings = Field(default_factory=DebugSettings)
-# Auto-load .env file from current directory if it exists
-# This happens BEFORE config file loading, so .env takes precedence
+# Auto-load .env file from current directory or parent directories
+# This happens BEFORE config file loading, so .env takes precedence over shell env vars
 from pathlib import Path
 from dotenv import load_dotenv
-_dotenv_path = Path(".env")
-if _dotenv_path.exists():
-    load_dotenv(_dotenv_path, override=False)  # Don't override existing env vars
+def _find_dotenv() -> Path | None:
+    """Search for .env in current dir and up to 3 parent directories."""
+    current = Path.cwd()
+    for _ in range(4):  # Current + 3 parents
+        env_path = current / ".env"
+        if env_path.exists():
+            return env_path
+        if current.parent == current:  # Reached root
+            break
+        current = current.parent
+    return None
+_dotenv_path = _find_dotenv()
+if _dotenv_path:
+    load_dotenv(_dotenv_path, override=True)  # .env takes precedence over shell env vars
     logger.debug(f"Loaded environment from {_dotenv_path.resolve()}")
 # Load configuration from ~/.rem/config.yaml before initializing settings

rem/sql/migrations/004_cache_system.sql CHANGED Viewed

@@ -64,9 +64,11 @@ CREATE OR REPLACE FUNCTION rem_kv_store_empty(p_user_id TEXT)
 RETURNS BOOLEAN AS $$
 BEGIN
     -- Quick existence check - very fast with index
+    -- Check for user-specific OR public (NULL user_id) entries
+    -- This ensures self-healing triggers correctly for public ontologies
     RETURN NOT EXISTS (
         SELECT 1 FROM kv_store
-        WHERE user_id = p_user_id
+        WHERE user_id = p_user_id OR user_id IS NULL
         LIMIT 1
     );
 END;

rem/utils/schema_loader.py CHANGED Viewed

@@ -84,6 +84,7 @@ Schema Caching Status:
 """
 import importlib.resources
+import time
 from pathlib import Path
 from typing import Any, cast
@@ -104,10 +105,32 @@ SCHEMA_SEARCH_PATHS = [
 # In-memory cache for filesystem schemas (no TTL - immutable)
 _fs_schema_cache: dict[str, dict[str, Any]] = {}
-# Future: Database schema cache (with TTL - mutable)
-# Will be used when loading schemas from database (SchemaRepository)
-# _db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
-# _db_schema_ttl: int = 300  # 5 minutes in seconds
+# Database schema cache (with TTL - mutable, supports hot-reload)
+# Cache key: (schema_name, user_id or "public") → (schema_dict, timestamp)
+_db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
+_db_schema_ttl: int = 300  # 5 minutes in seconds
+def _get_cached_db_schema(schema_name: str, user_id: str | None) -> dict[str, Any] | None:
+    """Get schema from DB cache if exists and not expired."""
+    cache_key = (schema_name.lower(), user_id or "public")
+    if cache_key in _db_schema_cache:
+        schema, timestamp = _db_schema_cache[cache_key]
+        if time.time() - timestamp < _db_schema_ttl:
+            logger.debug(f"Schema cache hit: {schema_name} (age: {time.time() - timestamp:.0f}s)")
+            return schema
+        else:
+            # Expired, remove from cache
+            del _db_schema_cache[cache_key]
+            logger.debug(f"Schema cache expired: {schema_name}")
+    return None
+def _cache_db_schema(schema_name: str, user_id: str | None, schema: dict[str, Any]) -> None:
+    """Add schema to DB cache with current timestamp."""
+    cache_key = (schema_name.lower(), user_id or "public")
+    _db_schema_cache[cache_key] = (schema, time.time())
+    logger.debug(f"Schema cached: {schema_name} (TTL: {_db_schema_ttl}s)")
 def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any] | None:
@@ -249,73 +272,65 @@ def load_agent_schema(
     enable_db_fallback: bool = True,
 ) -> dict[str, Any]:
     """
-    Load agent schema from YAML file with unified search logic and caching.
+    Load agent schema with database-first priority for hot-reloading support.
     Schema names are case-invariant - "Rem", "rem", "REM" all resolve to the same schema.
-    Filesystem schemas are cached indefinitely (immutable, versioned with code).
-    Database schemas (future) will be cached with TTL for invalidation.
+    **IMPORTANT**: Database is checked FIRST (before filesystem) to enable hot-reloading
+    of schema updates without redeploying the application. This allows operators to
+    update schemas via `rem process ingest` and have changes take effect immediately.
     Handles path resolution automatically:
-    - "rem" → searches schemas/agents/rem.yaml (top-level)
-    - "moment-builder" → searches schemas/agents/core/moment-builder.yaml
-    - "contract-analyzer" → searches schemas/agents/examples/contract-analyzer.yaml
-    - "core/moment-builder" → searches schemas/agents/core/moment-builder.yaml
-    - "/absolute/path.yaml" → loads directly
-    - "relative/path.yaml" → loads relative to cwd
+    - "rem" → searches database, then schemas/agents/rem.yaml
+    - "moment-builder" → searches database, then schemas/agents/core/moment-builder.yaml
+    - "/absolute/path.yaml" → loads directly from filesystem (exact paths skip database)
+    - "relative/path.yaml" → loads relative to cwd (exact paths skip database)
     Search Order:
-    1. Check cache (if use_cache=True and schema found in FS cache)
-    2. Exact path if it exists (absolute or relative)
-    3. Custom paths from rem.register_schema_path() and SCHEMA__PATHS env var
-    4. Package resources: schemas/agents/{name}.yaml (top-level)
-    5. Package resources: schemas/agents/core/{name}.yaml
-    6. Package resources: schemas/agents/examples/{name}.yaml
-    7. Package resources: schemas/evaluators/{name}.yaml
-    8. Package resources: schemas/{name}.yaml
-    9. Database LOOKUP: schemas table (if enable_db_fallback=True and user_id provided)
+    1. Exact path if it exists (absolute or relative) - skips database
+    2. Database LOOKUP: schemas table (if enable_db_fallback=True) - PREFERRED for hot-reload
+    3. Check cache (if use_cache=True and schema found in FS cache)
+    4. Custom paths from rem.register_schema_path() and SCHEMA__PATHS env var
+    5. Package resources: schemas/agents/{name}.yaml (top-level)
+    6. Package resources: schemas/agents/core/{name}.yaml
+    7. Package resources: schemas/agents/examples/{name}.yaml
+    8. Package resources: schemas/evaluators/{name}.yaml
+    9. Package resources: schemas/{name}.yaml
     Args:
         schema_name_or_path: Schema name or file path (case-invariant for names)
             Examples: "rem-query-agent", "Contract-Analyzer", "./my-schema.yaml"
         use_cache: If True, uses in-memory cache for filesystem schemas
-        user_id: User ID for database schema lookup (required for DB fallback)
-        enable_db_fallback: If True, falls back to database LOOKUP when file not found
+        user_id: User ID for database schema lookup
+        enable_db_fallback: If True, checks database FIRST for schema (default: True)
     Returns:
         Agent schema as dictionary
     Raises:
-        FileNotFoundError: If schema not found in any search location (filesystem + database)
+        FileNotFoundError: If schema not found in any search location (database + filesystem)
         yaml.YAMLError: If schema file is invalid YAML
     Examples:
-        >>> # Load by short name (cached after first load) - case invariant
-        >>> schema = load_agent_schema("Contract-Analyzer")  # same as "contract-analyzer"
+        >>> # Load by short name - checks database first for hot-reload support
+        >>> schema = load_agent_schema("Contract-Analyzer")  # case invariant
         >>>
-        >>> # Load from custom path (not cached - custom paths may change)
+        >>> # Load from custom path (skips database - exact paths always use filesystem)
         >>> schema = load_agent_schema("./my-agent.yaml")
         >>>
-        >>> # Load evaluator schema (cached)
+        >>> # Load evaluator schema
         >>> schema = load_agent_schema("rem-lookup-correctness")
-        >>>
-        >>> # Load custom user schema from database (case invariant)
-        >>> schema = load_agent_schema("My-Agent", user_id="user-123")  # same as "my-agent"
     """
     # Normalize the name for cache key (lowercase for case-invariant lookups)
     cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '').lower()
     if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
         cache_key = cache_key.rsplit('.', 1)[0]
-    # Check cache first (only for package resources, not custom paths)
     path = Path(schema_name_or_path)
     is_custom_path = (path.exists() and path.is_file()) or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
-    if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
-        logger.debug(f"Loading schema from cache: {cache_key}")
-        return _fs_schema_cache[cache_key]
     # 1. Try exact path first (absolute or relative to cwd) - must be a file, not directory
+    # Exact paths skip database lookup (explicit file reference)
     if path.exists() and path.is_file():
         logger.debug(f"Loading schema from exact path: {path}")
         with open(path, "r") as f:
@@ -324,10 +339,28 @@ def load_agent_schema(
         # Don't cache custom paths (they may change)
         return cast(dict[str, Any], schema)
-    # 2. Normalize name for package resource search (lowercase)
+    # 2. Normalize name for lookups (lowercase)
     base_name = cache_key
-    # 3. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
+    # 3. Try database FIRST (if enabled) - enables hot-reload without redeploy
+    # Database schemas are NOT cached to ensure hot-reload works immediately
+    if enable_db_fallback and not is_custom_path:
+        try:
+            logger.debug(f"Checking database for schema: {base_name} (user_id={user_id or 'public'})")
+            db_schema = _load_schema_from_database(base_name, user_id)
+            if db_schema:
+                logger.info(f"✅ Loaded schema from database: {base_name}")
+                return db_schema
+        except Exception as e:
+            logger.debug(f"Database schema lookup failed: {e}")
+            # Fall through to filesystem search
+    # 4. Check filesystem cache (only for package resources, not custom paths)
+    if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
+        logger.debug(f"Loading schema from cache: {cache_key}")
+        return _fs_schema_cache[cache_key]
+    # 5. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
     from ..registry import get_schema_paths
     custom_paths = get_schema_paths()
@@ -358,7 +391,7 @@ def load_agent_schema(
                 # Don't cache custom paths (they may change during development)
                 return cast(dict[str, Any], schema)
-    # 4. Try package resources with standard search paths
+    # 6. Try package resources with standard search paths
     for search_pattern in SCHEMA_SEARCH_PATHS:
         search_path = search_pattern.format(name=base_name)
@@ -383,20 +416,7 @@ def load_agent_schema(
             logger.debug(f"Could not load from {search_path}: {e}")
             continue
-    # 5. Try database LOOKUP fallback (if enabled)
-    # Always search for public schemas (user_id IS NULL), plus user-specific if user_id provided
-    if enable_db_fallback:
-        try:
-            logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id or 'public'})")
-            db_schema = _load_schema_from_database(base_name, user_id)
-            if db_schema:
-                logger.info(f"✅ Loaded schema from database: {base_name}")
-                return db_schema
-        except Exception as e:
-            logger.debug(f"Database schema lookup failed: {e}")
-            # Fall through to error below
-    # 6. Schema not found in any location
+    # 7. Schema not found in any location
     searched_paths = [pattern.format(name=base_name) for pattern in SCHEMA_SEARCH_PATHS]
     custom_paths_note = ""
@@ -424,18 +444,21 @@ async def load_agent_schema_async(
     schema_name_or_path: str,
     user_id: str | None = None,
     db=None,
+    enable_db_fallback: bool = True,
 ) -> dict[str, Any]:
     """
-    Async version of load_agent_schema for use in async contexts.
+    Async version of load_agent_schema with database-first priority.
     Schema names are case-invariant - "MyAgent", "myagent", "MYAGENT" all resolve to the same schema.
-    This version accepts an existing database connection to avoid creating new connections.
+    **IMPORTANT**: Database is checked FIRST (before filesystem) to enable hot-reloading
+    of schema updates without redeploying the application.
     Args:
         schema_name_or_path: Schema name or file path (case-invariant for names)
         user_id: User ID for database schema lookup
         db: Optional existing PostgresService connection (if None, will create one)
+        enable_db_fallback: If True, checks database FIRST for schema (default: True)
     Returns:
         Agent schema as dictionary
@@ -443,7 +466,6 @@ async def load_agent_schema_async(
     Raises:
         FileNotFoundError: If schema not found
     """
-    # First try filesystem search (sync operations are fine)
     path = Path(schema_name_or_path)
     # Normalize the name for cache key (lowercase for case-invariant lookups)
@@ -453,12 +475,7 @@ async def load_agent_schema_async(
     is_custom_path = (path.exists() and path.is_file()) or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
-    # Check cache
-    if not is_custom_path and cache_key in _fs_schema_cache:
-        logger.debug(f"Loading schema from cache: {cache_key}")
-        return _fs_schema_cache[cache_key]
-    # Try exact path (must be a file, not directory)
+    # 1. Try exact path first (skips database - explicit file reference)
     if path.exists() and path.is_file():
         logger.debug(f"Loading schema from exact path: {path}")
         with open(path, "r") as f:
@@ -467,7 +484,60 @@ async def load_agent_schema_async(
     base_name = cache_key
-    # Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
+    # 2. Try database FIRST (if enabled) - enables hot-reload without redeploy
+    if enable_db_fallback and not is_custom_path:
+        # Check DB schema cache first (TTL-based)
+        cached_schema = _get_cached_db_schema(base_name, user_id)
+        if cached_schema is not None:
+            logger.info(f"✅ Loaded schema from cache: {base_name}")
+            return cached_schema
+        # Cache miss - query database
+        from rem.services.postgres import get_postgres_service
+        should_disconnect = False
+        if db is None:
+            db = get_postgres_service()
+            if db:
+                await db.connect()
+                should_disconnect = True
+        if db:
+            try:
+                if user_id:
+                    query = """
+                        SELECT spec FROM schemas
+                        WHERE LOWER(name) = LOWER($1)
+                        AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
+                        LIMIT 1
+                    """
+                    row = await db.fetchrow(query, base_name, user_id)
+                else:
+                    # No user_id - only search public schemas
+                    query = """
+                        SELECT spec FROM schemas
+                        WHERE LOWER(name) = LOWER($1)
+                        AND (user_id = 'system' OR user_id IS NULL)
+                        LIMIT 1
+                    """
+                    row = await db.fetchrow(query, base_name)
+                if row:
+                    spec = row.get("spec")
+                    if spec and isinstance(spec, dict):
+                        # Cache the schema for future requests
+                        _cache_db_schema(base_name, user_id, spec)
+                        logger.info(f"✅ Loaded schema from database: {base_name}")
+                        return spec
+            finally:
+                if should_disconnect:
+                    await db.disconnect()
+    # 3. Check filesystem cache
+    if not is_custom_path and cache_key in _fs_schema_cache:
+        logger.debug(f"Loading schema from cache: {cache_key}")
+        return _fs_schema_cache[cache_key]
+    # 4. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
     from ..registry import get_schema_paths
     custom_paths = get_schema_paths()
@@ -489,7 +559,7 @@ async def load_agent_schema_async(
                     schema = yaml.safe_load(f)
                 return cast(dict[str, Any], schema)
-    # Try package resources
+    # 5. Try package resources
     for search_pattern in SCHEMA_SEARCH_PATHS:
         search_path = search_pattern.format(name=base_name)
         try:
@@ -503,44 +573,6 @@ async def load_agent_schema_async(
         except Exception:
             continue
-    # Try database lookup - always search public schemas, plus user-specific if user_id provided
-    from rem.services.postgres import get_postgres_service
-    should_disconnect = False
-    if db is None:
-        db = get_postgres_service()
-        if db:
-            await db.connect()
-            should_disconnect = True
-    if db:
-        try:
-            if user_id:
-                query = """
-                    SELECT spec FROM schemas
-                    WHERE LOWER(name) = LOWER($1)
-                    AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
-                    LIMIT 1
-                """
-                row = await db.fetchrow(query, base_name, user_id)
-            else:
-                # No user_id - only search public schemas
-                query = """
-                    SELECT spec FROM schemas
-                    WHERE LOWER(name) = LOWER($1)
-                    AND (user_id = 'system' OR user_id IS NULL)
-                    LIMIT 1
-                """
-                row = await db.fetchrow(query, base_name)
-            if row:
-                spec = row.get("spec")
-                if spec and isinstance(spec, dict):
-                    logger.info(f"✅ Loaded schema from database: {base_name}")
-                    return spec
-        finally:
-            if should_disconnect:
-                await db.disconnect()
     # Not found
     raise FileNotFoundError(f"Schema not found: {schema_name_or_path}")

{remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: remdb
-Version: 0.3.230
+Version: 0.3.258
 Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
 Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
 Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
@@ -28,7 +28,7 @@ Requires-Dist: gitpython>=3.1.45
 Requires-Dist: hypercorn>=0.17.0
 Requires-Dist: itsdangerous>=2.0.0
 Requires-Dist: json-schema-to-pydantic>=0.2.0
-Requires-Dist: kreuzberg<4.0.0,>=3.21.0
+Requires-Dist: kreuzberg>=4.0.5
 Requires-Dist: loguru>=0.7.0
 Requires-Dist: openinference-instrumentation-pydantic-ai>=0.1.0
 Requires-Dist: opentelemetry-api>=1.28.0
@@ -1300,15 +1300,16 @@ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
   available   : Stage 1+
   example     : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
-SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
+SearchQuery ::= SEARCH <text:string> [IN|TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
   text        : Semantic query text
-  table       : Target table (default: "resources")
+  table       : Target table (default: "resources"). Use IN or TABLE keyword.
   clause      : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
   limit       : Max results (default: 10)
   performance : Indexed (pgvector)
   available   : Stage 3+
   examples    :
-    - SEARCH "database migration" TABLE resources LIMIT 10
+    - SEARCH "database migration" IN resources LIMIT 10
+    - SEARCH "parcel delivery" IN ontologies
     - SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
     - SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
     - SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10

remdb 0.3.230__py3-none-any.whl → 0.3.258__py3-none-any.whl

remdb 0.3.230py3-none-any.whl → 0.3.258py3-none-any.whl