PyPI - remdb - Versions diffs - 0.3.7__py3-none-any.whl → 0.3.14__py3-none-any.whl - Mend

remdb 0.3.7py3-none-any.whl → 0.3.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

rem/__init__.py +129 -2
rem/agentic/context.py +7 -5
rem/agentic/providers/phoenix.py +32 -43
rem/api/README.md +23 -0
rem/api/main.py +27 -2
rem/api/middleware/tracking.py +172 -0
rem/api/routers/auth.py +54 -0
rem/api/routers/chat/completions.py +1 -1
rem/cli/commands/ask.py +13 -10
rem/cli/commands/configure.py +4 -3
rem/cli/commands/db.py +17 -3
rem/cli/commands/experiments.py +76 -72
rem/cli/commands/process.py +8 -7
rem/cli/commands/scaffold.py +47 -0
rem/cli/main.py +2 -0
rem/models/entities/user.py +10 -3
rem/registry.py +367 -0
rem/services/content/providers.py +92 -133
rem/services/dreaming/affinity_service.py +2 -16
rem/services/dreaming/moment_service.py +2 -15
rem/services/embeddings/api.py +20 -13
rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
rem/services/phoenix/client.py +148 -14
rem/services/postgres/schema_generator.py +86 -5
rem/services/rate_limit.py +113 -0
rem/services/rem/README.md +14 -0
rem/services/user_service.py +98 -0
rem/settings.py +79 -10
rem/sql/install_models.sql +13 -0
rem/sql/migrations/003_seed_default_user.sql +48 -0
rem/utils/constants.py +97 -0
rem/utils/date_utils.py +228 -0
rem/utils/embeddings.py +17 -4
rem/utils/files.py +167 -0
rem/utils/mime_types.py +158 -0
rem/utils/schema_loader.py +63 -14
rem/utils/vision.py +9 -14
rem/workers/README.md +14 -14
rem/workers/db_maintainer.py +74 -0
{remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/METADATA +169 -121
{remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/RECORD +43 -32
{remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/WHEEL +0 -0
{remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/entry_points.txt +0 -0

rem/utils/files.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""
+File utilities for consistent file handling throughout REM.
+Provides context managers and helpers for temporary file operations,
+ensuring proper cleanup and consistent patterns.
+"""
+import tempfile
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Generator, Optional
+from loguru import logger
+@contextmanager
+def temp_file_from_bytes(
+    content: bytes,
+    suffix: str = "",
+    prefix: str = "rem_",
+    dir: Optional[str] = None,
+) -> Generator[Path, None, None]:
+    """
+    Create a temporary file from bytes, yield path, cleanup automatically.
+    This context manager ensures proper cleanup of temporary files even
+    if an exception occurs during processing.
+    Args:
+        content: Bytes to write to the temporary file
+        suffix: File extension (e.g., ".pdf", ".wav")
+        prefix: Prefix for the temp file name
+        dir: Directory for temp file (uses system temp if None)
+    Yields:
+        Path to the temporary file
+    Example:
+        >>> with temp_file_from_bytes(pdf_bytes, suffix=".pdf") as tmp_path:
+        ...     result = process_pdf(tmp_path)
+        # File is automatically cleaned up after the block
+    Note:
+        The file is created with delete=False so we control cleanup.
+        This allows the file to be read by external processes.
+    """
+    tmp_path: Optional[Path] = None
+    try:
+        with tempfile.NamedTemporaryFile(
+            suffix=suffix,
+            prefix=prefix,
+            dir=dir,
+            delete=False,
+        ) as tmp:
+            tmp.write(content)
+            tmp_path = Path(tmp.name)
+        yield tmp_path
+    finally:
+        if tmp_path is not None:
+            try:
+                tmp_path.unlink(missing_ok=True)
+            except Exception as e:
+                logger.warning(f"Failed to cleanup temp file {tmp_path}: {e}")
+@contextmanager
+def temp_file_empty(
+    suffix: str = "",
+    prefix: str = "rem_",
+    dir: Optional[str] = None,
+) -> Generator[Path, None, None]:
+    """
+    Create an empty temporary file, yield path, cleanup automatically.
+    Useful when you need to write to a file after creation or when
+    an external process will write to the file.
+    Args:
+        suffix: File extension
+        prefix: Prefix for the temp file name
+        dir: Directory for temp file
+    Yields:
+        Path to the empty temporary file
+    """
+    tmp_path: Optional[Path] = None
+    try:
+        with tempfile.NamedTemporaryFile(
+            suffix=suffix,
+            prefix=prefix,
+            dir=dir,
+            delete=False,
+        ) as tmp:
+            tmp_path = Path(tmp.name)
+        yield tmp_path
+    finally:
+        if tmp_path is not None:
+            try:
+                tmp_path.unlink(missing_ok=True)
+            except Exception as e:
+                logger.warning(f"Failed to cleanup temp file {tmp_path}: {e}")
+@contextmanager
+def temp_directory(
+    prefix: str = "rem_",
+    dir: Optional[str] = None,
+) -> Generator[Path, None, None]:
+    """
+    Create a temporary directory, yield path, cleanup automatically.
+    Args:
+        prefix: Prefix for the temp directory name
+        dir: Parent directory for temp directory
+    Yields:
+        Path to the temporary directory
+    """
+    import shutil
+    tmp_dir: Optional[Path] = None
+    try:
+        tmp_dir = Path(tempfile.mkdtemp(prefix=prefix, dir=dir))
+        yield tmp_dir
+    finally:
+        if tmp_dir is not None:
+            try:
+                shutil.rmtree(tmp_dir, ignore_errors=True)
+            except Exception as e:
+                logger.warning(f"Failed to cleanup temp directory {tmp_dir}: {e}")
+def ensure_parent_exists(path: Path) -> Path:
+    """
+    Ensure parent directory exists, creating if necessary.
+    Args:
+        path: File path whose parent should exist
+    Returns:
+        The original path (for chaining)
+    """
+    path.parent.mkdir(parents=True, exist_ok=True)
+    return path
+def safe_delete(path: Path) -> bool:
+    """
+    Safely delete a file, returning success status.
+    Args:
+        path: Path to delete
+    Returns:
+        True if deleted or didn't exist, False on error
+    """
+    try:
+        path.unlink(missing_ok=True)
+        return True
+    except Exception as e:
+        logger.warning(f"Failed to delete {path}: {e}")
+        return False

rem/utils/mime_types.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""
+Centralized MIME type mappings for file format detection.
+Provides bidirectional mappings between file extensions and MIME types.
+Use these constants throughout the codebase instead of inline dictionaries.
+"""
+# Extension to MIME type mapping (extension includes leading dot)
+EXTENSION_TO_MIME: dict[str, str] = {
+    # Images
+    ".png": "image/png",
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".gif": "image/gif",
+    ".webp": "image/webp",
+    ".bmp": "image/bmp",
+    ".tiff": "image/tiff",
+    ".svg": "image/svg+xml",
+    # Documents
+    ".pdf": "application/pdf",
+    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    ".doc": "application/msword",
+    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+    ".ppt": "application/vnd.ms-powerpoint",
+    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    ".xls": "application/vnd.ms-excel",
+    # Audio
+    ".wav": "audio/wav",
+    ".mp3": "audio/mpeg",
+    ".m4a": "audio/x-m4a",
+    ".flac": "audio/flac",
+    ".ogg": "audio/ogg",
+    ".aac": "audio/aac",
+    # Video
+    ".mp4": "video/mp4",
+    ".webm": "video/webm",
+    ".avi": "video/x-msvideo",
+    ".mov": "video/quicktime",
+    # Text/Code
+    ".txt": "text/plain",
+    ".md": "text/markdown",
+    ".markdown": "text/markdown",
+    ".json": "application/json",
+    ".yaml": "application/x-yaml",
+    ".yml": "application/x-yaml",
+    ".xml": "application/xml",
+    ".html": "text/html",
+    ".css": "text/css",
+    ".js": "application/javascript",
+    ".py": "text/x-python",
+    ".ts": "application/typescript",
+    ".csv": "text/csv",
+}
+# MIME type to extension mapping (reverse of above, preferring shorter extensions)
+MIME_TO_EXTENSION: dict[str, str] = {
+    # Images
+    "image/png": ".png",
+    "image/jpeg": ".jpg",
+    "image/gif": ".gif",
+    "image/webp": ".webp",
+    "image/bmp": ".bmp",
+    "image/tiff": ".tiff",
+    "image/svg+xml": ".svg",
+    # Documents
+    "application/pdf": ".pdf",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
+    "application/msword": ".doc",
+    "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
+    "application/vnd.ms-powerpoint": ".ppt",
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
+    "application/vnd.ms-excel": ".xls",
+    # Audio
+    "audio/wav": ".wav",
+    "audio/mpeg": ".mp3",
+    "audio/x-m4a": ".m4a",
+    "audio/mp4": ".m4a",
+    "audio/flac": ".flac",
+    "audio/ogg": ".ogg",
+    "audio/aac": ".aac",
+    # Video
+    "video/mp4": ".mp4",
+    "video/webm": ".webm",
+    "video/x-msvideo": ".avi",
+    "video/quicktime": ".mov",
+    # Text/Code
+    "text/plain": ".txt",
+    "text/markdown": ".md",
+    "application/json": ".json",
+    "application/x-yaml": ".yaml",
+    "application/xml": ".xml",
+    "text/html": ".html",
+    "text/css": ".css",
+    "application/javascript": ".js",
+    "text/x-python": ".py",
+    "application/typescript": ".ts",
+    "text/csv": ".csv",
+}
+# Grouped by category for convenience
+IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".svg"}
+DOCUMENT_EXTENSIONS = {".pdf", ".docx", ".doc", ".pptx", ".ppt", ".xlsx", ".xls"}
+AUDIO_EXTENSIONS = {".wav", ".mp3", ".m4a", ".flac", ".ogg", ".aac"}
+VIDEO_EXTENSIONS = {".mp4", ".webm", ".avi", ".mov"}
+TEXT_EXTENSIONS = {".txt", ".md", ".markdown", ".json", ".yaml", ".yml", ".xml", ".html", ".css", ".js", ".py", ".ts", ".csv"}
+def get_extension(mime_type: str, default: str = ".bin") -> str:
+    """
+    Get file extension for a MIME type.
+    Args:
+        mime_type: MIME type string (e.g., "image/png")
+        default: Default extension if MIME type not found
+    Returns:
+        File extension with leading dot (e.g., ".png")
+    """
+    return MIME_TO_EXTENSION.get(mime_type, default)
+def get_mime_type(extension: str, default: str = "application/octet-stream") -> str:
+    """
+    Get MIME type for a file extension.
+    Args:
+        extension: File extension with or without leading dot
+        default: Default MIME type if extension not found
+    Returns:
+        MIME type string (e.g., "image/png")
+    """
+    # Normalize extension to have leading dot
+    ext = extension if extension.startswith(".") else f".{extension}"
+    return EXTENSION_TO_MIME.get(ext.lower(), default)
+def is_image(extension_or_mime: str) -> bool:
+    """Check if extension or MIME type represents an image."""
+    if extension_or_mime.startswith("."):
+        return extension_or_mime.lower() in IMAGE_EXTENSIONS
+    return extension_or_mime.startswith("image/")
+def is_audio(extension_or_mime: str) -> bool:
+    """Check if extension or MIME type represents audio."""
+    if extension_or_mime.startswith("."):
+        return extension_or_mime.lower() in AUDIO_EXTENSIONS
+    return extension_or_mime.startswith("audio/")
+def is_document(extension_or_mime: str) -> bool:
+    """Check if extension or MIME type represents a document."""
+    if extension_or_mime.startswith("."):
+        return extension_or_mime.lower() in DOCUMENT_EXTENSIONS
+    # Check common document MIME types
+    doc_mimes = {"application/pdf", "application/msword"}
+    return extension_or_mime in doc_mimes or "officedocument" in extension_or_mime

rem/utils/schema_loader.py CHANGED Viewed

@@ -9,7 +9,7 @@ Design Pattern:
 - Support short names: "contract-analyzer" → "schemas/agents/contract-analyzer.yaml"
 - Support relative/absolute paths
 - Consistent error messages and logging
-i
 Usage:
     # From API
     schema = load_agent_schema("rem")
@@ -20,6 +20,26 @@ Usage:
     # From agent factory
     schema = load_agent_schema("contract-analyzer")
+TODO: Git FS Integration
+    The schema loader currently uses importlib.resources for package schemas
+    and direct filesystem access for custom paths. The FS abstraction layer
+    (rem.services.fs.FS) could be used to abstract storage backends:
+    - Local filesystem (current)
+    - Git repositories (GitService)
+    - S3 (via FS provider)
+    This would enable loading schemas from versioned Git repos or S3 buckets
+    without changing the API. The FS provider pattern already exists and just
+    needs integration testing with the schema loader.
+    Example future usage:
+        # Load from Git at specific version
+        schema = load_agent_schema("git://rem/schemas/agents/rem.yaml?ref=v1.0.0")
+        # Load from S3
+        schema = load_agent_schema("s3://rem-schemas/agents/cv-parser.yaml")
 Schema Caching Status:
     ✅ IMPLEMENTED: Filesystem Schema Caching (2025-11-22)
@@ -71,13 +91,14 @@ import yaml
 from loguru import logger
-# Standard search paths for agent schemas (in priority order)
+# Standard search paths for agent/evaluator schemas (in priority order)
 SCHEMA_SEARCH_PATHS = [
     "schemas/agents/{name}.yaml",          # Top-level agents (e.g., rem.yaml)
     "schemas/agents/core/{name}.yaml",     # Core system agents
     "schemas/agents/examples/{name}.yaml", # Example agents
-    "schemas/evaluators/{name}.yaml",
-    "schemas/{name}.yaml",
+    "schemas/evaluators/{name}.yaml",      # Nested evaluators (e.g., hello-world/default)
+    "schemas/evaluators/rem/{name}.yaml",  # REM evaluators (e.g., lookup-correctness)
+    "schemas/{name}.yaml",                 # Generic schemas
 ]
 # In-memory cache for filesystem schemas (no TTL - immutable)
@@ -188,12 +209,13 @@ def load_agent_schema(
     Search Order:
     1. Check cache (if use_cache=True and schema found in FS cache)
     2. Exact path if it exists (absolute or relative)
-    3. Package resources: schemas/agents/{name}.yaml (top-level)
-    4. Package resources: schemas/agents/core/{name}.yaml
-    5. Package resources: schemas/agents/examples/{name}.yaml
-    6. Package resources: schemas/evaluators/{name}.yaml
-    7. Package resources: schemas/{name}.yaml
-    8. Database LOOKUP: schemas table (if enable_db_fallback=True and user_id provided)
+    3. Custom paths from rem.register_schema_path() and SCHEMA__PATHS env var
+    4. Package resources: schemas/agents/{name}.yaml (top-level)
+    5. Package resources: schemas/agents/core/{name}.yaml
+    6. Package resources: schemas/agents/examples/{name}.yaml
+    7. Package resources: schemas/evaluators/{name}.yaml
+    8. Package resources: schemas/{name}.yaml
+    9. Database LOOKUP: schemas table (if enable_db_fallback=True and user_id provided)
     Args:
         schema_name_or_path: Schema name or file path
@@ -247,7 +269,28 @@ def load_agent_schema(
     # 2. Normalize name for package resource search
     base_name = cache_key
-    # 3. Try package resources with standard search paths
+    # 3. Try custom schema paths (from registry + SCHEMA__PATHS env var)
+    from ..registry import get_schema_paths
+    custom_paths = get_schema_paths()
+    for custom_dir in custom_paths:
+        # Try various patterns within each custom directory
+        for pattern in [
+            f"{base_name}.yaml",
+            f"{base_name}.yml",
+            f"agents/{base_name}.yaml",
+            f"evaluators/{base_name}.yaml",
+        ]:
+            custom_path = Path(custom_dir) / pattern
+            if custom_path.exists():
+                logger.debug(f"Loading schema from custom path: {custom_path}")
+                with open(custom_path, "r") as f:
+                    schema = yaml.safe_load(f)
+                logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
+                # Don't cache custom paths (they may change during development)
+                return cast(dict[str, Any], schema)
+    # 4. Try package resources with standard search paths
     for search_pattern in SCHEMA_SEARCH_PATHS:
         search_path = search_pattern.format(name=base_name)
@@ -272,7 +315,7 @@ def load_agent_schema(
             logger.debug(f"Could not load from {search_path}: {e}")
             continue
-    # 4. Try database LOOKUP fallback (if enabled and user_id provided)
+    # 5. Try database LOOKUP fallback (if enabled and user_id provided)
     if enable_db_fallback and user_id:
         try:
             logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id})")
@@ -284,8 +327,13 @@ def load_agent_schema(
             logger.debug(f"Database schema lookup failed: {e}")
             # Fall through to error below
-    # 5. Schema not found in any location
+    # 6. Schema not found in any location
     searched_paths = [pattern.format(name=base_name) for pattern in SCHEMA_SEARCH_PATHS]
+    custom_paths_note = ""
+    if custom_paths:
+        custom_paths_note = f"\n  - Custom paths: {', '.join(custom_paths)}"
     db_search_note = ""
     if enable_db_fallback:
         if user_id:
@@ -296,7 +344,8 @@ def load_agent_schema(
     raise FileNotFoundError(
         f"Schema not found: {schema_name_or_path}\n"
         f"Searched locations:\n"
-        f"  - Exact path: {path}\n"
+        f"  - Exact path: {path}"
+        f"{custom_paths_note}\n"
         f"  - Package resources: {', '.join(searched_paths)}"
         f"{db_search_note}"
     )

rem/utils/vision.py CHANGED Viewed

@@ -11,7 +11,6 @@ markdown descriptions of images.
 """
 import base64
-import os
 from enum import Enum
 from pathlib import Path
 from typing import Optional
@@ -19,6 +18,9 @@ from typing import Optional
 import requests
 from loguru import logger
+from rem.utils.constants import HTTP_TIMEOUT_LONG, VISION_MAX_TOKENS
+from rem.utils.mime_types import EXTENSION_TO_MIME
 class VisionProvider(str, Enum):
     """Supported vision providers."""
@@ -141,14 +143,7 @@ class ImageAnalyzer:
         # Detect media type
         suffix = image_path.suffix.lower()
-        media_type_map = {
-            ".png": "image/png",
-            ".jpg": "image/jpeg",
-            ".jpeg": "image/jpeg",
-            ".gif": "image/gif",
-            ".webp": "image/webp",
-        }
-        media_type = media_type_map.get(suffix, "image/png")
+        media_type = EXTENSION_TO_MIME.get(suffix, "image/png")
         logger.info(f"Analyzing {image_path.name} with {self.provider.value} ({self.model})")
@@ -190,7 +185,7 @@ class ImageAnalyzer:
         body = {
             "model": self.model,
-            "max_tokens": 2048,
+            "max_tokens": VISION_MAX_TOKENS,
             "messages": [
                 {
                     "role": "user",
@@ -216,7 +211,7 @@ class ImageAnalyzer:
             "https://api.anthropic.com/v1/messages",
             headers=headers,
             json=body,
-            timeout=60.0,
+            timeout=HTTP_TIMEOUT_LONG,
         )
         if response.status_code != 200:
@@ -261,7 +256,7 @@ class ImageAnalyzer:
             url,
             params=params,
             json=body,
-            timeout=60.0,
+            timeout=HTTP_TIMEOUT_LONG,
         )
         if response.status_code != 200:
@@ -311,14 +306,14 @@ class ImageAnalyzer:
                     ],
                 }
             ],
-            "max_tokens": 2048,
+            "max_tokens": VISION_MAX_TOKENS,
         }
         response = requests.post(
             url,
             headers=headers,
             json=body,
-            timeout=60.0,
+            timeout=HTTP_TIMEOUT_LONG,
         )
         if response.status_code != 200:

rem/workers/README.md CHANGED Viewed

@@ -207,7 +207,7 @@ Reads recent activity to generate comprehensive user profiles.
 **CLI:**
 ```bash
-rem-dreaming user-model --tenant-id=tenant-123
+rem-dreaming user-model
 ```
 **Frequency:** Daily (runs as part of full workflow)
@@ -235,13 +235,13 @@ Extracts temporal narratives from resources.
 **CLI:**
 ```bash
 # Process last 24 hours
-rem-dreaming moments --tenant-id=tenant-123
+rem-dreaming moments
 # Custom lookback
-rem-dreaming moments --tenant-id=tenant-123 --lookback-hours=48
+rem-dreaming moments  --lookback-hours=48
 # Limit resources processed
-rem-dreaming moments --tenant-id=tenant-123 --limit=100
+rem-dreaming moments  --limit=100
 ```
 **Frequency:** Daily or on-demand
@@ -283,13 +283,13 @@ Builds semantic relationships between resources.
 **CLI:**
 ```bash
 # Semantic mode (fast, cheap)
-rem-dreaming affinity --tenant-id=tenant-123
+rem-dreaming affinity
 # LLM mode (intelligent, expensive)
-rem-dreaming affinity --tenant-id=tenant-123 --use-llm --limit=100
+rem-dreaming affinity  --use-llm --limit=100
 # Custom lookback
-rem-dreaming affinity --tenant-id=tenant-123 --lookback-hours=168
+rem-dreaming affinity  --lookback-hours=168
 ```
 **Frequency:**
@@ -308,13 +308,13 @@ Runs all operations in sequence.
 **CLI:**
 ```bash
 # Single tenant
-rem-dreaming full --tenant-id=tenant-123
+rem-dreaming full
 # All active tenants (daily cron)
 rem-dreaming full --all-tenants
 # Use LLM affinity mode
-rem-dreaming full --tenant-id=tenant-123 --use-llm-affinity
+rem-dreaming full  --use-llm-affinity
 ```
 **Frequency:** Daily at 3 AM UTC
@@ -455,16 +455,16 @@ export REM_API_URL=http://localhost:8000
 export OPENAI_API_KEY=sk-...
 # Run user model update
-python -m rem.cli.dreaming user-model --tenant-id=tenant-test
+python -m rem.cli.dreaming user-model
 # Run moment construction
-python -m rem.cli.dreaming moments --tenant-id=tenant-test --lookback-hours=24
+python -m rem.cli.dreaming moments  --lookback-hours=24
 # Run affinity (semantic mode)
-python -m rem.cli.dreaming affinity --tenant-id=tenant-test
+python -m rem.cli.dreaming affinity
 # Run full workflow
-python -m rem.cli.dreaming full --tenant-id=tenant-test
+python -m rem.cli.dreaming full
 ```
 ### Testing with Docker
@@ -478,7 +478,7 @@ docker run --rm \
   -e REM_API_URL=http://host.docker.internal:8000 \
   -e OPENAI_API_KEY=$OPENAI_API_KEY \
   rem-stack:latest \
-  python -m rem.cli.dreaming full --tenant-id=tenant-test
+  python -m rem.cli.dreaming full
 ```
 ## Architecture Decisions

remdb 0.3.7__py3-none-any.whl → 0.3.14__py3-none-any.whl

remdb 0.3.7py3-none-any.whl → 0.3.14py3-none-any.whl