PyPI - remdb - Versions diffs - 0.3.141__py3-none-any.whl → 0.3.163__py3-none-any.whl - Mend

remdb 0.3.141py3-none-any.whl → 0.3.163py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (44) hide show

rem/agentic/agents/__init__.py +16 -0
rem/agentic/agents/agent_manager.py +310 -0
rem/agentic/context.py +81 -3
rem/agentic/context_builder.py +18 -3
rem/api/deps.py +3 -5
rem/api/main.py +22 -3
rem/api/mcp_router/server.py +2 -0
rem/api/mcp_router/tools.py +90 -0
rem/api/middleware/tracking.py +5 -5
rem/api/routers/auth.py +346 -5
rem/api/routers/chat/completions.py +4 -2
rem/api/routers/chat/streaming.py +77 -22
rem/api/routers/messages.py +24 -15
rem/auth/__init__.py +13 -3
rem/auth/jwt.py +352 -0
rem/auth/middleware.py +108 -6
rem/auth/providers/__init__.py +4 -1
rem/auth/providers/email.py +215 -0
rem/cli/commands/experiments.py +32 -46
rem/models/core/experiment.py +4 -14
rem/models/entities/__init__.py +4 -0
rem/models/entities/subscriber.py +175 -0
rem/models/entities/user.py +1 -0
rem/schemas/agents/core/agent-builder.yaml +134 -0
rem/services/__init__.py +3 -1
rem/services/content/service.py +4 -3
rem/services/email/__init__.py +10 -0
rem/services/email/service.py +511 -0
rem/services/email/templates.py +360 -0
rem/services/postgres/README.md +38 -0
rem/services/postgres/diff_service.py +19 -3
rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
rem/services/postgres/repository.py +5 -4
rem/services/session/compression.py +113 -50
rem/services/session/reload.py +14 -7
rem/services/user_service.py +29 -0
rem/settings.py +199 -4
rem/sql/migrations/005_schema_update.sql +145 -0
rem/utils/README.md +45 -0
rem/utils/files.py +157 -1
{remdb-0.3.141.dist-info → remdb-0.3.163.dist-info}/METADATA +7 -5
{remdb-0.3.141.dist-info → remdb-0.3.163.dist-info}/RECORD +44 -35
{remdb-0.3.141.dist-info → remdb-0.3.163.dist-info}/WHEEL +0 -0
{remdb-0.3.141.dist-info → remdb-0.3.163.dist-info}/entry_points.txt +0 -0

rem/auth/middleware.py CHANGED Viewed

@@ -1,13 +1,16 @@
 """
-OAuth Authentication Middleware for FastAPI.
+Authentication Middleware for FastAPI.
-Protects API endpoints by requiring valid session.
-Supports anonymous access with rate limiting when allow_anonymous=True.
+Protects API endpoints by requiring valid authentication.
+Supports multiple auth methods: JWT, API Key, Session, Dev Token.
+Anonymous access with rate limiting when allow_anonymous=True.
 MCP endpoints are always protected unless explicitly disabled.
 Design Pattern:
+- Check X-API-Key header first (if API key auth enabled)
+- Check JWT token in Authorization header (Bearer token)
+- Check dev token (non-production only, starts with "dev_")
 - Check session for user on protected paths
-- Check Bearer token for dev token (non-production only)
 - MCP paths always require authentication (protected service)
 - If allow_anonymous=True: Allow unauthenticated requests (marked as ANONYMOUS tier)
 - If allow_anonymous=False: Return 401 for API calls, redirect browsers to login
@@ -20,6 +23,12 @@ Access Modes (configured in settings.auth):
 - mcp_requires_auth=true (default): MCP always requires login regardless of allow_anonymous
 - mcp_requires_auth=false: MCP follows normal allow_anonymous rules (dev only)
+API Key Authentication (configured in settings.api):
+- api_key_enabled=true: Require X-API-Key header for protected endpoints
+- api_key: The secret key to validate against
+- Provides simple programmatic access without OAuth flow
+- X-API-Key header takes precedence over session auth
 Dev Token Support (non-production only):
 - GET /api/auth/dev/token returns a Bearer token for test-user
 - Include as: Authorization: Bearer dev_<signature>
@@ -82,6 +91,67 @@ class AuthMiddleware(BaseHTTPMiddleware):
         self.mcp_requires_auth = mcp_requires_auth
         self.mcp_path = mcp_path
+    def _check_api_key(self, request: Request) -> dict | None:
+        """
+        Check for valid X-API-Key header.
+        Returns:
+            API key user dict if valid, None otherwise
+        """
+        # Only check if API key auth is enabled
+        if not settings.api.api_key_enabled:
+            return None
+        # Check for X-API-Key header
+        api_key = request.headers.get("x-api-key")
+        if not api_key:
+            return None
+        # Validate against configured API key
+        if settings.api.api_key and api_key == settings.api.api_key:
+            logger.debug("X-API-Key authenticated")
+            return {
+                "id": "api-key-user",
+                "email": "api@rem.local",
+                "name": "API Key User",
+                "provider": "api-key",
+                "tenant_id": "default",
+                "tier": "pro",  # API key users get full access
+                "roles": ["user"],
+            }
+        # Invalid API key
+        logger.warning("Invalid X-API-Key provided")
+        return None
+    def _check_jwt_token(self, request: Request) -> dict | None:
+        """
+        Check for valid JWT in Authorization header.
+        Returns:
+            User dict if valid JWT, None otherwise
+        """
+        auth_header = request.headers.get("authorization", "")
+        if not auth_header.startswith("Bearer "):
+            return None
+        token = auth_header[7:]  # Strip "Bearer "
+        # Skip dev tokens (handled separately)
+        if token.startswith("dev_"):
+            return None
+        # Verify JWT token
+        from .jwt import get_jwt_service
+        jwt_service = get_jwt_service()
+        user = jwt_service.verify_token(token)
+        if user:
+            logger.debug(f"JWT authenticated: {user.get('email')}")
+            return user
+        return None
     def _check_dev_token(self, request: Request) -> dict | None:
         """
         Check for valid dev token in Authorization header (non-production only).
@@ -105,7 +175,7 @@ class AuthMiddleware(BaseHTTPMiddleware):
         # Verify dev token
         from ..api.routers.dev import verify_dev_token
         if verify_dev_token(token):
-            logger.debug(f"Dev token authenticated as test-user")
+            logger.debug("Dev token authenticated as test-user")
             return {
                 "id": "test-user",
                 "email": "test@rem.local",
@@ -142,6 +212,38 @@ class AuthMiddleware(BaseHTTPMiddleware):
         if not is_protected or is_excluded:
             return await call_next(request)
+        # Check for X-API-Key header first (if enabled)
+        api_key_user = self._check_api_key(request)
+        if api_key_user:
+            request.state.user = api_key_user
+            request.state.is_anonymous = False
+            return await call_next(request)
+        # If API key auth is enabled but no valid key provided, reject immediately
+        if settings.api.api_key_enabled:
+            # Check if X-API-Key header was provided but invalid
+            if request.headers.get("x-api-key"):
+                logger.warning(f"Invalid X-API-Key for: {path}")
+                return JSONResponse(
+                    status_code=401,
+                    content={"detail": "Invalid API key"},
+                    headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
+                )
+            # No API key provided when required
+            logger.debug(f"Missing X-API-Key for: {path}")
+            return JSONResponse(
+                status_code=401,
+                content={"detail": "API key required. Include X-API-Key header."},
+                headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
+            )
+        # Check for JWT token in Authorization header
+        jwt_user = self._check_jwt_token(request)
+        if jwt_user:
+            request.state.user = jwt_user
+            request.state.is_anonymous = False
+            return await call_next(request)
         # Check for dev token (non-production only)
         dev_user = self._check_dev_token(request)
         if dev_user:
@@ -149,7 +251,7 @@ class AuthMiddleware(BaseHTTPMiddleware):
             request.state.is_anonymous = False
             return await call_next(request)
-        # Check for valid session
+        # Check for valid session (backward compatibility)
         user = request.session.get("user")
         if user:

rem/auth/providers/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
-"""OAuth provider implementations."""
+"""Authentication provider implementations."""
 from .base import OAuthProvider, OAuthTokens, OAuthUserInfo
+from .email import EmailAuthProvider, EmailAuthResult
 from .google import GoogleOAuthProvider
 from .microsoft import MicrosoftOAuthProvider
@@ -8,6 +9,8 @@ __all__ = [
     "OAuthProvider",
     "OAuthTokens",
     "OAuthUserInfo",
+    "EmailAuthProvider",
+    "EmailAuthResult",
     "GoogleOAuthProvider",
     "MicrosoftOAuthProvider",
 ]

rem/auth/providers/email.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""
+Email Authentication Provider.
+Passwordless authentication using email verification codes.
+Unlike OAuth providers, this handles the full flow internally.
+Flow:
+1. User requests login with email address
+2. System generates code, upserts user, sends email
+3. User enters code
+4. System verifies code and creates session
+Design:
+- Uses EmailService for sending codes
+- Creates users with deterministic UUID from email hash
+- Stores challenge in user metadata
+- No external OAuth dependencies
+"""
+from typing import TYPE_CHECKING
+from pydantic import BaseModel, Field
+from loguru import logger
+from ...services.email import EmailService
+if TYPE_CHECKING:
+    from ...services.postgres import PostgresService
+class EmailAuthResult(BaseModel):
+    """Result of email authentication operations."""
+    success: bool = Field(description="Whether operation succeeded")
+    email: str = Field(description="Email address")
+    user_id: str | None = Field(default=None, description="User ID if authenticated")
+    error: str | None = Field(default=None, description="Error message if failed")
+    message: str | None = Field(default=None, description="User-friendly message")
+class EmailAuthProvider:
+    """
+    Email-based passwordless authentication provider.
+    Handles the complete email login flow:
+    1. send_code() - Generate and send verification code
+    2. verify_code() - Verify code and return user info
+    """
+    def __init__(
+        self,
+        email_service: EmailService | None = None,
+        template_kwargs: dict | None = None,
+    ):
+        """
+        Initialize EmailAuthProvider.
+        Args:
+            email_service: EmailService instance (creates new one if not provided)
+            template_kwargs: Customization for email templates (colors, branding, etc.)
+        """
+        self._email_service = email_service or EmailService()
+        self._template_kwargs = template_kwargs or {}
+    @property
+    def is_configured(self) -> bool:
+        """Check if email auth is properly configured."""
+        return self._email_service.is_configured
+    async def send_code(
+        self,
+        email: str,
+        db: "PostgresService",
+        tenant_id: str = "default",
+    ) -> EmailAuthResult:
+        """
+        Send a verification code to an email address.
+        Creates user if not exists (using deterministic UUID from email).
+        Stores code in user metadata.
+        Args:
+            email: Email address to send code to
+            db: PostgresService instance
+            tenant_id: Tenant identifier
+        Returns:
+            EmailAuthResult with success status
+        """
+        if not self.is_configured:
+            return EmailAuthResult(
+                success=False,
+                email=email,
+                error="Email service not configured",
+                message="Email login is not available. Please try another method.",
+            )
+        try:
+            result = await self._email_service.send_login_code(
+                email=email,
+                db=db,
+                tenant_id=tenant_id,
+                template_kwargs=self._template_kwargs,
+            )
+            if result["success"]:
+                return EmailAuthResult(
+                    success=True,
+                    email=email,
+                    user_id=result["user_id"],
+                    message=f"Verification code sent to {email}. Check your inbox.",
+                )
+            else:
+                return EmailAuthResult(
+                    success=False,
+                    email=email,
+                    error=result.get("error", "Failed to send code"),
+                    message="Failed to send verification code. Please try again.",
+                )
+        except Exception as e:
+            logger.error(f"Error sending login code: {e}")
+            return EmailAuthResult(
+                success=False,
+                email=email,
+                error=str(e),
+                message="An error occurred. Please try again.",
+            )
+    async def verify_code(
+        self,
+        email: str,
+        code: str,
+        db: "PostgresService",
+        tenant_id: str = "default",
+    ) -> EmailAuthResult:
+        """
+        Verify a login code and authenticate user.
+        Args:
+            email: Email address
+            code: 6-digit verification code
+            db: PostgresService instance
+            tenant_id: Tenant identifier
+        Returns:
+            EmailAuthResult with user_id if successful
+        """
+        try:
+            result = await self._email_service.verify_login_code(
+                email=email,
+                code=code,
+                db=db,
+                tenant_id=tenant_id,
+            )
+            if result["valid"]:
+                return EmailAuthResult(
+                    success=True,
+                    email=email,
+                    user_id=result["user_id"],
+                    message="Successfully authenticated!",
+                )
+            else:
+                error = result.get("error", "Invalid code")
+                # User-friendly error messages
+                if error == "Login code expired":
+                    message = "Your code has expired. Please request a new one."
+                elif error == "Invalid login code":
+                    message = "Invalid code. Please check and try again."
+                elif error == "No login code requested":
+                    message = "No code was requested for this email. Please request a new code."
+                elif error == "User not found":
+                    message = "Email not found. Please request a login code first."
+                else:
+                    message = "Verification failed. Please try again."
+                return EmailAuthResult(
+                    success=False,
+                    email=email,
+                    error=error,
+                    message=message,
+                )
+        except Exception as e:
+            logger.error(f"Error verifying login code: {e}")
+            return EmailAuthResult(
+                success=False,
+                email=email,
+                error=str(e),
+                message="An error occurred. Please try again.",
+            )
+    def get_user_dict(self, email: str, user_id: str) -> dict:
+        """
+        Create a user dict for session storage.
+        Compatible with OAuth user format for consistent session handling.
+        Args:
+            email: User's email
+            user_id: User's UUID
+        Returns:
+            User dict for session
+        """
+        return {
+            "id": user_id,
+            "email": email,
+            "email_verified": True,  # Email is verified through code
+            "name": email.split("@")[0],  # Use email prefix as name
+            "provider": "email",
+            "tenant_id": "default",
+            "tier": "free",  # Email users start at free tier
+            "roles": ["user"],
+        }

rem/cli/commands/experiments.py CHANGED Viewed

@@ -125,19 +125,17 @@ def create(
         # Resolve base path: CLI arg > EXPERIMENTS_HOME env var > default "experiments"
         if base_path is None:
             base_path = os.getenv("EXPERIMENTS_HOME", "experiments")
-        # Build dataset reference
+        # Build dataset reference (format auto-detected from file extension)
         if dataset_location == "git":
             dataset_ref = DatasetReference(
                 location=DatasetLocation.GIT,
                 path="ground-truth/dataset.csv",
-                format="csv",
                 description="Ground truth Q&A dataset for evaluation"
             )
         else:  # s3 or hybrid
             dataset_ref = DatasetReference(
                 location=DatasetLocation(dataset_location),
                 path=f"s3://rem-experiments/{name}/datasets/ground_truth.parquet",
-                format="parquet",
                 schema_path="datasets/schema.yaml" if dataset_location == "hybrid" else None,
                 description="Ground truth dataset for evaluation"
             )
@@ -930,58 +928,46 @@ def run(
                 raise click.Abort()
             click.echo("✓ Evaluator credentials validated")
-        # Load dataset using Polars
-        import polars as pl
+        # Load dataset using read_dataframe utility (auto-detects format from extension)
+        from rem.utils.files import read_dataframe
         click.echo(f"Loading dataset: {list(config.datasets.keys())[0]}")
         dataset_ref = list(config.datasets.values())[0]
-        if dataset_ref.location.value == "git":
-            # Load from Git (local filesystem)
-            dataset_path = Path(base_path) / name / dataset_ref.path
-            if not dataset_path.exists():
-                click.echo(f"Error: Dataset not found: {dataset_path}")
-                raise click.Abort()
-            if dataset_ref.format == "csv":
-                dataset_df = pl.read_csv(dataset_path)
-            elif dataset_ref.format == "parquet":
-                dataset_df = pl.read_parquet(dataset_path)
-            elif dataset_ref.format == "jsonl":
-                dataset_df = pl.read_ndjson(dataset_path)
-            else:
-                click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
-                raise click.Abort()
-        elif dataset_ref.location.value in ["s3", "hybrid"]:
-            # Load from S3 using FS provider
-            from rem.services.fs import FS
-            from io import BytesIO
+        try:
+            if dataset_ref.location.value == "git":
+                # Load from Git (local filesystem)
+                dataset_path = Path(base_path) / name / dataset_ref.path
+                if not dataset_path.exists():
+                    click.echo(f"Error: Dataset not found: {dataset_path}")
+                    raise click.Abort()
-            fs = FS()
+                dataset_df = read_dataframe(dataset_path)
-            try:
-                if dataset_ref.format == "csv":
-                    content = fs.read(dataset_ref.path)
-                    dataset_df = pl.read_csv(BytesIO(content.encode() if isinstance(content, str) else content))
-                elif dataset_ref.format == "parquet":
-                    content_bytes = fs.read(dataset_ref.path)
-                    dataset_df = pl.read_parquet(BytesIO(content_bytes if isinstance(content_bytes, bytes) else content_bytes.encode()))
-                elif dataset_ref.format == "jsonl":
-                    content = fs.read(dataset_ref.path)
-                    dataset_df = pl.read_ndjson(BytesIO(content.encode() if isinstance(content, str) else content))
-                else:
-                    click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
-                    raise click.Abort()
+            elif dataset_ref.location.value in ["s3", "hybrid"]:
+                # Load from S3 using FS provider
+                from rem.services.fs import FS
+                fs = FS()
+                content = fs.read(dataset_ref.path)
+                # Ensure we have bytes
+                if isinstance(content, str):
+                    content = content.encode()
+                dataset_df = read_dataframe(content, filename=dataset_ref.path)
                 click.echo(f"✓ Loaded dataset from S3")
-            except Exception as e:
-                logger.error(f"Failed to load dataset from S3: {e}")
-                click.echo(f"Error: Could not load dataset from S3")
-                click.echo(f"  Path: {dataset_ref.path}")
-                click.echo(f"  Format: {dataset_ref.format}")
+            else:
+                click.echo(f"Error: Unknown dataset location: {dataset_ref.location.value}")
                 raise click.Abort()
-        else:
-            click.echo(f"Error: Unknown dataset location: {dataset_ref.location.value}")
+        except ValueError as e:
+            # Unsupported format error from read_dataframe
+            click.echo(f"Error: {e}")
+            raise click.Abort()
+        except Exception as e:
+            logger.error(f"Failed to load dataset: {e}")
+            click.echo(f"Error: Could not load dataset")
+            click.echo(f"  Path: {dataset_ref.path}")
             raise click.Abort()
         click.echo(f"✓ Loaded dataset: {len(dataset_df)} examples")

rem/models/core/experiment.py CHANGED Viewed

@@ -138,18 +138,14 @@ class DatasetReference(BaseModel):
     path: str = Field(
         description=(
-            "Path to dataset:\n"
+            "Path to dataset. Format is inferred from file extension.\n"
+            "Supported: .csv, .tsv, .parquet, .json, .jsonl, .xlsx, .ods, .avro, .ipc\n"
             "- Git: Relative path from experiment root (e.g., 'datasets/ground_truth.csv')\n"
-            "- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/ground_truth.csv')\n"
+            "- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/data.parquet')\n"
             "- Hybrid: S3 URI for data, Git path for schema"
         )
     )
-    format: Literal["csv", "jsonl", "parquet", "json"] = Field(
-        default="csv",
-        description="Dataset file format"
-    )
     schema_path: str | None = Field(
         default=None,
         description=(
@@ -262,8 +258,7 @@ class ExperimentConfig(BaseModel):
     datasets:
       ground_truth:
         location: git
-        path: datasets/ground_truth.csv
-        format: csv
+        path: datasets/ground_truth.csv  # format inferred from extension
     results:
       location: git
       base_path: results/
@@ -288,12 +283,10 @@ class ExperimentConfig(BaseModel):
       ground_truth:
         location: s3
         path: s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet
-        format: parquet
         schema_path: datasets/schema.yaml  # Schema in Git for documentation
       test_cases:
         location: s3
         path: s3://rem-prod/experiments/cv-parser-production/datasets/test_cases.jsonl
-        format: jsonl
     results:
       location: hybrid
       base_path: s3://rem-prod/experiments/cv-parser-production/results/
@@ -558,7 +551,6 @@ class ExperimentConfig(BaseModel):
 - **Location**: `{dataset.location.value}`
 - **Path**: `{dataset.path}`
-- **Format**: `{dataset.format}`
 """
             if dataset.description:
                 readme += f"- **Description**: {dataset.description}\n"
@@ -629,7 +621,6 @@ EXAMPLE_SMALL_EXPERIMENT = ExperimentConfig(
         "ground_truth": DatasetReference(
             location=DatasetLocation.GIT,
             path="datasets/ground_truth.csv",
-            format="csv",
             description="10 manually curated test cases"
         )
     },
@@ -659,7 +650,6 @@ EXAMPLE_LARGE_EXPERIMENT = ExperimentConfig(
         "ground_truth": DatasetReference(
             location=DatasetLocation.S3,
             path="s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet",
-            format="parquet",
             schema_path="datasets/schema.yaml",
             description="10,000 CV/resume pairs with ground truth extractions"
         )

rem/models/entities/__init__.py CHANGED Viewed

@@ -39,6 +39,7 @@ from .shared_session import (
     SharedWithMeResponse,
     SharedWithMeSummary,
 )
+from .subscriber import Subscriber, SubscriberOrigin, SubscriberStatus
 from .user import User, UserTier
 __all__ = [
@@ -56,6 +57,9 @@ __all__ = [
     "FeedbackCategory",
     "User",
     "UserTier",
+    "Subscriber",
+    "SubscriberStatus",
+    "SubscriberOrigin",
     "File",
     "Moment",
     "Schema",

remdb 0.3.141__py3-none-any.whl → 0.3.163__py3-none-any.whl

Potentially problematic release.

remdb 0.3.141py3-none-any.whl → 0.3.163py3-none-any.whl