PyPI - remdb - Versions diffs - 0.3.133__py3-none-any.whl → 0.3.157__py3-none-any.whl - Mend

remdb 0.3.133py3-none-any.whl → 0.3.157py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

rem/agentic/agents/__init__.py +16 -0
rem/agentic/agents/agent_manager.py +310 -0
rem/agentic/context_builder.py +5 -3
rem/agentic/mcp/tool_wrapper.py +48 -6
rem/agentic/providers/phoenix.py +91 -21
rem/agentic/providers/pydantic_ai.py +77 -43
rem/api/deps.py +2 -2
rem/api/main.py +1 -1
rem/api/mcp_router/server.py +2 -0
rem/api/mcp_router/tools.py +90 -0
rem/api/routers/auth.py +208 -4
rem/api/routers/chat/streaming.py +77 -22
rem/auth/__init__.py +13 -3
rem/auth/middleware.py +66 -1
rem/auth/providers/__init__.py +4 -1
rem/auth/providers/email.py +215 -0
rem/cli/commands/configure.py +3 -4
rem/cli/commands/experiments.py +50 -49
rem/cli/commands/session.py +336 -0
rem/cli/dreaming.py +2 -2
rem/cli/main.py +2 -0
rem/models/core/experiment.py +4 -14
rem/models/entities/__init__.py +4 -0
rem/models/entities/ontology.py +1 -1
rem/models/entities/ontology_config.py +1 -1
rem/models/entities/subscriber.py +175 -0
rem/models/entities/user.py +1 -0
rem/schemas/agents/core/agent-builder.yaml +134 -0
rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
rem/schemas/agents/examples/contract-extractor.yaml +1 -1
rem/schemas/agents/examples/cv-parser.yaml +1 -1
rem/services/__init__.py +3 -1
rem/services/content/service.py +4 -3
rem/services/email/__init__.py +10 -0
rem/services/email/service.py +459 -0
rem/services/email/templates.py +360 -0
rem/services/postgres/README.md +38 -0
rem/services/postgres/diff_service.py +19 -3
rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
rem/services/session/compression.py +113 -50
rem/services/session/reload.py +14 -7
rem/settings.py +191 -4
rem/sql/migrations/002_install_models.sql +91 -91
rem/sql/migrations/005_schema_update.sql +145 -0
rem/utils/README.md +45 -0
rem/utils/files.py +157 -1
rem/utils/vision.py +1 -1
{remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/METADATA +7 -5
{remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/RECORD +51 -42
{remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/WHEEL +0 -0
{remdb-0.3.133.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0

rem/cli/commands/session.py ADDED Viewed

@@ -0,0 +1,336 @@
+"""
+CLI command for viewing and simulating session conversations.
+Usage:
+    rem session show <user_id> [--session-id] [--role user|assistant|system]
+    rem session show <user_id> --simulate-next [--save] [--custom-sim-prompt "..."]
+Examples:
+    # Show all messages for a user
+    rem session show 11111111-1111-1111-1111-111111111001
+    # Show only user messages
+    rem session show 11111111-1111-1111-1111-111111111001 --role user
+    # Simulate next user message
+    rem session show 11111111-1111-1111-1111-111111111001 --simulate-next
+    # Simulate with custom prompt and save
+    rem session show 11111111-1111-1111-1111-111111111001 --simulate-next --save \
+        --custom-sim-prompt "Respond as an anxious patient"
+"""
+import asyncio
+from pathlib import Path
+from typing import Literal
+import click
+import yaml
+from loguru import logger
+from ...models.entities.user import User
+from ...models.entities.message import Message
+from ...services.postgres import get_postgres_service
+from ...services.postgres.repository import Repository
+from ...settings import settings
+SIMULATOR_PROMPT = """You are simulating a patient in a mental health conversation.
+## Context
+You are continuing a conversation with a clinical evaluation agent. Based on the
+user profile and conversation history below, generate the next realistic patient message.
+## User Profile
+{user_profile}
+## Conversation History
+{conversation_history}
+## Instructions
+- Stay in character as the patient described in the profile
+- Your response should be natural, conversational, and consistent with the patient's presentation
+- Consider the patient's risk level, symptoms, and communication style
+- Do NOT include any metadata or role labels - just the raw message content
+- Keep responses concise (1-3 sentences typical for conversation)
+Generate the next patient message:"""
+async def _load_user_and_messages(
+    user_id: str,
+    session_id: str | None = None,
+    role_filter: str | None = None,
+    limit: int = 100,
+) -> tuple[User | None, list[Message]]:
+    """Load user profile and messages from database."""
+    pg = get_postgres_service()
+    if not pg:
+        logger.error("PostgreSQL not available")
+        return None, []
+    await pg.connect()
+    try:
+        # Load user
+        user_repo = Repository(User, "users", db=pg)
+        user = await user_repo.get_by_id(user_id, tenant_id="default")
+        # Load messages
+        message_repo = Repository(Message, "messages", db=pg)
+        filters = {"user_id": user_id}
+        if session_id:
+            filters["session_id"] = session_id
+        messages = await message_repo.find(
+            filters=filters,
+            order_by="created_at ASC",
+            limit=limit,
+        )
+        # Filter by role if specified
+        if role_filter:
+            messages = [m for m in messages if m.message_type == role_filter]
+        return user, messages
+    finally:
+        await pg.disconnect()
+def _format_user_yaml(user: User | None) -> str:
+    """Format user profile as YAML."""
+    if not user:
+        return "# No user found"
+    data = {
+        "id": str(user.id),
+        "name": user.name,
+        "summary": user.summary,
+        "interests": user.interests,
+        "preferred_topics": user.preferred_topics,
+        "metadata": user.metadata,
+    }
+    return yaml.dump(data, default_flow_style=False, allow_unicode=True)
+def _format_messages_yaml(messages: list[Message]) -> str:
+    """Format messages as YAML."""
+    if not messages:
+        return "# No messages found"
+    data = []
+    for msg in messages:
+        data.append({
+            "role": msg.message_type or "unknown",
+            "content": msg.content,
+            "session_id": msg.session_id,
+            "created_at": msg.created_at.isoformat() if msg.created_at else None,
+        })
+    return yaml.dump(data, default_flow_style=False, allow_unicode=True)
+def _format_conversation_for_llm(messages: list[Message]) -> str:
+    """Format conversation history for LLM context."""
+    lines = []
+    for msg in messages:
+        role = msg.message_type or "unknown"
+        lines.append(f"[{role.upper()}]: {msg.content}")
+    return "\n\n".join(lines) if lines else "(No previous messages)"
+async def _simulate_next_message(
+    user: User | None,
+    messages: list[Message],
+    custom_prompt: str | None = None,
+) -> str:
+    """Use LLM to simulate the next patient message."""
+    from pydantic_ai import Agent
+    # Build context
+    user_profile = _format_user_yaml(user) if user else "Unknown patient"
+    conversation_history = _format_conversation_for_llm(messages)
+    # Use custom prompt or default
+    if custom_prompt:
+        # Check if it's a file path
+        if Path(custom_prompt).exists():
+            prompt_template = Path(custom_prompt).read_text()
+        else:
+            prompt_template = custom_prompt
+        # Simple variable substitution
+        prompt = prompt_template.replace("{user_profile}", user_profile)
+        prompt = prompt.replace("{conversation_history}", conversation_history)
+    else:
+        prompt = SIMULATOR_PROMPT.format(
+            user_profile=user_profile,
+            conversation_history=conversation_history,
+        )
+    # Create simple agent for simulation
+    agent = Agent(
+        model=settings.llm.default_model,
+        system_prompt="You are a patient simulator. Generate realistic patient responses.",
+    )
+    result = await agent.run(prompt)
+    return result.output
+async def _save_message(
+    user_id: str,
+    session_id: str | None,
+    content: str,
+    role: str = "user",
+) -> Message:
+    """Save a simulated message to the database."""
+    from uuid import uuid4
+    pg = get_postgres_service()
+    if not pg:
+        raise RuntimeError("PostgreSQL not available")
+    await pg.connect()
+    try:
+        message_repo = Repository(Message, "messages", db=pg)
+        message = Message(
+            id=uuid4(),
+            user_id=user_id,
+            tenant_id="default",
+            session_id=session_id or str(uuid4()),
+            content=content,
+            message_type=role,
+        )
+        await message_repo.upsert(message)
+        return message
+    finally:
+        await pg.disconnect()
+@click.group()
+def session():
+    """Session viewing and simulation commands."""
+    pass
+@session.command("show")
+@click.argument("user_id")
+@click.option("--session-id", "-s", help="Filter by session ID")
+@click.option(
+    "--role", "-r",
+    type=click.Choice(["user", "assistant", "system", "tool"]),
+    help="Filter messages by role",
+)
+@click.option("--limit", "-l", default=100, help="Max messages to load")
+@click.option("--simulate-next", is_flag=True, help="Simulate the next patient message")
+@click.option("--save", is_flag=True, help="Save simulated message to database")
+@click.option(
+    "--custom-sim-prompt", "-p",
+    help="Custom simulation prompt (text or file path)",
+)
+def show(
+    user_id: str,
+    session_id: str | None,
+    role: str | None,
+    limit: int,
+    simulate_next: bool,
+    save: bool,
+    custom_sim_prompt: str | None,
+):
+    """
+    Show user profile and session messages.
+    USER_ID: The user identifier to load.
+    Examples:
+        # Show user and all messages
+        rem session show 11111111-1111-1111-1111-111111111001
+        # Show only assistant responses
+        rem session show 11111111-1111-1111-1111-111111111001 --role assistant
+        # Simulate next patient message
+        rem session show 11111111-1111-1111-1111-111111111001 --simulate-next
+        # Simulate and save to database
+        rem session show 11111111-1111-1111-1111-111111111001 --simulate-next --save
+    """
+    asyncio.run(_show_async(
+        user_id=user_id,
+        session_id=session_id,
+        role_filter=role,
+        limit=limit,
+        simulate_next=simulate_next,
+        save=save,
+        custom_sim_prompt=custom_sim_prompt,
+    ))
+async def _show_async(
+    user_id: str,
+    session_id: str | None,
+    role_filter: str | None,
+    limit: int,
+    simulate_next: bool,
+    save: bool,
+    custom_sim_prompt: str | None,
+):
+    """Async implementation of show command."""
+    # Load data
+    user, messages = await _load_user_and_messages(
+        user_id=user_id,
+        session_id=session_id,
+        role_filter=role_filter if not simulate_next else None,  # Need all messages for simulation
+        limit=limit,
+    )
+    # Display user profile
+    click.echo("\n# User Profile")
+    click.echo("---")
+    click.echo(_format_user_yaml(user))
+    # Display messages (apply filter for display if simulating)
+    display_messages = messages
+    if simulate_next and role_filter:
+        display_messages = [m for m in messages if m.message_type == role_filter]
+    click.echo("\n# Messages")
+    click.echo("---")
+    click.echo(_format_messages_yaml(display_messages))
+    # Simulate next message if requested
+    if simulate_next:
+        click.echo("\n# Simulated Next Message")
+        click.echo("---")
+        try:
+            simulated = await _simulate_next_message(
+                user=user,
+                messages=messages,
+                custom_prompt=custom_sim_prompt,
+            )
+            click.echo(f"role: user")
+            click.echo(f"content: |\n  {simulated}")
+            if save:
+                saved_msg = await _save_message(
+                    user_id=user_id,
+                    session_id=session_id,
+                    content=simulated,
+                    role="user",
+                )
+                logger.success(f"Saved message: {saved_msg.id}")
+        except Exception as e:
+            logger.error(f"Simulation failed: {e}")
+            raise
+def register_command(cli_group):
+    """Register the session command group."""
+    cli_group.add_command(session)

rem/cli/dreaming.py CHANGED Viewed

@@ -43,7 +43,7 @@ rem-dreaming full --user-id=user-123 --rem-api-url=http://localhost:8000
 Environment Variables:
 - REM_API_URL: REM API endpoint (default: http://rem-api:8000)
 - REM_EMBEDDING_PROVIDER: Embedding provider (default: text-embedding-3-small)
-- REM_DEFAULT_MODEL: LLM model (default: gpt-4o)
+- REM_DEFAULT_MODEL: LLM model (default: gpt-4.1)
 - REM_LOOKBACK_HOURS: Default lookback window (default: 24)
 - OPENAI_API_KEY: OpenAI API key
@@ -83,7 +83,7 @@ def get_worker() -> DreamingWorker:
         embedding_provider=os.getenv(
             "REM_EMBEDDING_PROVIDER", "text-embedding-3-small"
         ),
-        default_model=os.getenv("REM_DEFAULT_MODEL", "gpt-4o"),
+        default_model=os.getenv("REM_DEFAULT_MODEL", "gpt-4.1"),
         lookback_hours=int(os.getenv("REM_LOOKBACK_HOURS", "24")),
     )

rem/cli/main.py CHANGED Viewed

@@ -96,6 +96,7 @@ from .commands.serve import register_command as register_serve_command
 from .commands.mcp import register_command as register_mcp_command
 from .commands.scaffold import scaffold as scaffold_command
 from .commands.cluster import register_commands as register_cluster_commands
+from .commands.session import register_command as register_session_command
 register_schema_commands(schema)
 register_db_commands(db)
@@ -108,6 +109,7 @@ register_serve_command(cli)
 register_mcp_command(cli)
 cli.add_command(experiments_group)
 cli.add_command(scaffold_command)
+register_session_command(cli)
 def main():

rem/models/core/experiment.py CHANGED Viewed

@@ -138,18 +138,14 @@ class DatasetReference(BaseModel):
     path: str = Field(
         description=(
-            "Path to dataset:\n"
+            "Path to dataset. Format is inferred from file extension.\n"
+            "Supported: .csv, .tsv, .parquet, .json, .jsonl, .xlsx, .ods, .avro, .ipc\n"
             "- Git: Relative path from experiment root (e.g., 'datasets/ground_truth.csv')\n"
-            "- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/ground_truth.csv')\n"
+            "- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/data.parquet')\n"
             "- Hybrid: S3 URI for data, Git path for schema"
         )
     )
-    format: Literal["csv", "jsonl", "parquet", "json"] = Field(
-        default="csv",
-        description="Dataset file format"
-    )
     schema_path: str | None = Field(
         default=None,
         description=(
@@ -262,8 +258,7 @@ class ExperimentConfig(BaseModel):
     datasets:
       ground_truth:
         location: git
-        path: datasets/ground_truth.csv
-        format: csv
+        path: datasets/ground_truth.csv  # format inferred from extension
     results:
       location: git
       base_path: results/
@@ -288,12 +283,10 @@ class ExperimentConfig(BaseModel):
       ground_truth:
         location: s3
         path: s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet
-        format: parquet
         schema_path: datasets/schema.yaml  # Schema in Git for documentation
       test_cases:
         location: s3
         path: s3://rem-prod/experiments/cv-parser-production/datasets/test_cases.jsonl
-        format: jsonl
     results:
       location: hybrid
       base_path: s3://rem-prod/experiments/cv-parser-production/results/
@@ -558,7 +551,6 @@ class ExperimentConfig(BaseModel):
 - **Location**: `{dataset.location.value}`
 - **Path**: `{dataset.path}`
-- **Format**: `{dataset.format}`
 """
             if dataset.description:
                 readme += f"- **Description**: {dataset.description}\n"
@@ -629,7 +621,6 @@ EXAMPLE_SMALL_EXPERIMENT = ExperimentConfig(
         "ground_truth": DatasetReference(
             location=DatasetLocation.GIT,
             path="datasets/ground_truth.csv",
-            format="csv",
             description="10 manually curated test cases"
         )
     },
@@ -659,7 +650,6 @@ EXAMPLE_LARGE_EXPERIMENT = ExperimentConfig(
         "ground_truth": DatasetReference(
             location=DatasetLocation.S3,
             path="s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet",
-            format="parquet",
             schema_path="datasets/schema.yaml",
             description="10,000 CV/resume pairs with ground truth extractions"
         )

rem/models/entities/__init__.py CHANGED Viewed

@@ -39,6 +39,7 @@ from .shared_session import (
     SharedWithMeResponse,
     SharedWithMeSummary,
 )
+from .subscriber import Subscriber, SubscriberOrigin, SubscriberStatus
 from .user import User, UserTier
 __all__ = [
@@ -56,6 +57,9 @@ __all__ = [
     "FeedbackCategory",
     "User",
     "UserTier",
+    "Subscriber",
+    "SubscriberStatus",
+    "SubscriberOrigin",
     "File",
     "Moment",
     "Schema",

rem/models/entities/ontology.py CHANGED Viewed

@@ -129,7 +129,7 @@ class Ontology(CoreModel):
             file_id="file-uuid-456",
             agent_schema_id="contract-parser-v2",
             provider_name="openai",
-            model_name="gpt-4o",
+            model_name="gpt-4.1",
             extracted_data={
                 "contract_type": "supplier_agreement",
                 "parties": [

rem/models/entities/ontology_config.py CHANGED Viewed

@@ -74,7 +74,7 @@ class OntologyConfig(CoreModel):
             priority=200,  # Higher priority = runs first
             enabled=True,
             provider_name="openai",  # Override default provider
-            model_name="gpt-4o",
+            model_name="gpt-4.1",
             tenant_id="acme-corp",
             tags=["legal", "procurement"]
         )

rem/models/entities/subscriber.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""
+Subscriber - Email subscription management.
+This model stores subscribers who sign up via websites/apps.
+Subscribers can be collected before user registration for newsletters,
+updates, and approval-based access control.
+Key features:
+- Deterministic UUID from email (same email = same ID)
+- Approval workflow for access control
+- Tags for segmentation
+- Origin tracking for analytics
+"""
+import uuid
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Optional
+from pydantic import Field, EmailStr, model_validator
+from ..core import CoreModel
+class SubscriberStatus(str, Enum):
+    """Subscription status."""
+    ACTIVE = "active"           # Actively subscribed
+    UNSUBSCRIBED = "unsubscribed"  # User unsubscribed
+    BOUNCED = "bounced"         # Email bounced
+    PENDING = "pending"         # Pending confirmation (if double opt-in)
+class SubscriberOrigin(str, Enum):
+    """Where the subscription originated from."""
+    WEBSITE = "website"         # Main website subscribe form
+    LANDING_PAGE = "landing_page"  # Campaign landing page
+    APP = "app"                 # In-app subscription
+    IMPORT = "import"           # Bulk import
+    REFERRAL = "referral"       # Referred by another user
+    OTHER = "other"
+class Subscriber(CoreModel):
+    """
+    Email subscriber for newsletters and access control.
+    This model captures subscribers who sign up via the website, landing pages,
+    or in-app prompts. Uses deterministic UUID from email for natural upserts.
+    Access control via `approved` field:
+    - When email auth checks subscriber status, only approved subscribers
+      can complete login (if approval is enabled in settings).
+    - Subscribers can be pre-approved, or approved manually/automatically.
+    Usage:
+        from rem.services.postgres import Repository
+        from rem.models.entities import Subscriber, SubscriberStatus
+        repo = Repository(Subscriber, db=db)
+        # Create subscriber (ID auto-generated from email)
+        subscriber = Subscriber(
+            email="user@example.com",
+            name="John Doe",
+            origin=SubscriberOrigin.WEBSITE,
+        )
+        await repo.upsert(subscriber)
+        # Check if approved for login
+        subscriber = await repo.get_by_id(subscriber.id, tenant_id="default")
+        if subscriber and subscriber.approved:
+            # Allow login
+            pass
+    """
+    # Required field
+    email: EmailStr = Field(
+        description="Subscriber's email address (unique identifier)"
+    )
+    # Optional fields
+    name: Optional[str] = Field(
+        default=None,
+        description="Subscriber's name (optional)"
+    )
+    comment: Optional[str] = Field(
+        default=None,
+        max_length=500,
+        description="Optional comment or message from subscriber"
+    )
+    status: SubscriberStatus = Field(
+        default=SubscriberStatus.ACTIVE,
+        description="Current subscription status"
+    )
+    # Access control
+    approved: bool = Field(
+        default=False,
+        description="Whether subscriber is approved for login (for approval workflows)"
+    )
+    approved_at: Optional[datetime] = Field(
+        default=None,
+        description="When the subscriber was approved"
+    )
+    approved_by: Optional[str] = Field(
+        default=None,
+        description="Who approved the subscriber (user ID or 'system')"
+    )
+    # Origin tracking
+    origin: SubscriberOrigin = Field(
+        default=SubscriberOrigin.WEBSITE,
+        description="Where the subscription originated"
+    )
+    origin_detail: Optional[str] = Field(
+        default=None,
+        description="Additional origin context (e.g., campaign name, page URL)"
+    )
+    # Timestamps
+    subscribed_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        description="When the subscription was created"
+    )
+    unsubscribed_at: Optional[datetime] = Field(
+        default=None,
+        description="When the user unsubscribed (if applicable)"
+    )
+    # Compliance
+    ip_address: Optional[str] = Field(
+        default=None,
+        description="IP address at subscription time (for compliance)"
+    )
+    user_agent: Optional[str] = Field(
+        default=None,
+        description="Browser user agent at subscription time"
+    )
+    # Segmentation
+    tags: list[str] = Field(
+        default_factory=list,
+        description="Tags for segmentation (e.g., ['early-access', 'beta'])"
+    )
+    @staticmethod
+    def email_to_uuid(email: str) -> uuid.UUID:
+        """Generate a deterministic UUID from an email address.
+        Uses UUID v5 with DNS namespace for consistency with
+        EmailService.generate_user_id_from_email().
+        Args:
+            email: Email address
+        Returns:
+            Deterministic UUID
+        """
+        return uuid.uuid5(uuid.NAMESPACE_DNS, email.lower().strip())
+    @model_validator(mode="after")
+    def set_id_from_email(self) -> "Subscriber":
+        """Auto-generate deterministic ID from email for natural upsert."""
+        if self.email:
+            self.id = self.email_to_uuid(self.email)
+        return self

rem/models/entities/user.py CHANGED Viewed

@@ -22,6 +22,7 @@ from ..core import CoreModel
 class UserTier(str, Enum):
     """User subscription tier for feature gating."""
+    BLOCKED = "blocked"  # User is blocked from logging in
     ANONYMOUS = "anonymous"
     FREE = "free"
     BASIC = "basic"

remdb 0.3.133__py3-none-any.whl → 0.3.157__py3-none-any.whl

remdb 0.3.133py3-none-any.whl → 0.3.157py3-none-any.whl