PyPI - remdb - Versions diffs - 0.3.7__py3-none-any.whl → 0.3.14__py3-none-any.whl - Mend

remdb 0.3.7py3-none-any.whl → 0.3.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

rem/__init__.py +129 -2
rem/agentic/context.py +7 -5
rem/agentic/providers/phoenix.py +32 -43
rem/api/README.md +23 -0
rem/api/main.py +27 -2
rem/api/middleware/tracking.py +172 -0
rem/api/routers/auth.py +54 -0
rem/api/routers/chat/completions.py +1 -1
rem/cli/commands/ask.py +13 -10
rem/cli/commands/configure.py +4 -3
rem/cli/commands/db.py +17 -3
rem/cli/commands/experiments.py +76 -72
rem/cli/commands/process.py +8 -7
rem/cli/commands/scaffold.py +47 -0
rem/cli/main.py +2 -0
rem/models/entities/user.py +10 -3
rem/registry.py +367 -0
rem/services/content/providers.py +92 -133
rem/services/dreaming/affinity_service.py +2 -16
rem/services/dreaming/moment_service.py +2 -15
rem/services/embeddings/api.py +20 -13
rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
rem/services/phoenix/client.py +148 -14
rem/services/postgres/schema_generator.py +86 -5
rem/services/rate_limit.py +113 -0
rem/services/rem/README.md +14 -0
rem/services/user_service.py +98 -0
rem/settings.py +79 -10
rem/sql/install_models.sql +13 -0
rem/sql/migrations/003_seed_default_user.sql +48 -0
rem/utils/constants.py +97 -0
rem/utils/date_utils.py +228 -0
rem/utils/embeddings.py +17 -4
rem/utils/files.py +167 -0
rem/utils/mime_types.py +158 -0
rem/utils/schema_loader.py +63 -14
rem/utils/vision.py +9 -14
rem/workers/README.md +14 -14
rem/workers/db_maintainer.py +74 -0
{remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/METADATA +169 -121
{remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/RECORD +43 -32
{remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/WHEEL +0 -0
{remdb-0.3.7.dist-info → remdb-0.3.14.dist-info}/entry_points.txt +0 -0

rem/cli/commands/ask.py CHANGED Viewed

@@ -89,8 +89,8 @@ async def run_agent_streaming(
         context: Optional AgentContext for session persistence
         max_iterations: Maximum iterations/requests (from agent schema or settings)
     """
-    from datetime import datetime, timezone
     from pydantic_ai import UsageLimits
+    from rem.utils.date_utils import to_iso_with_z, utc_now
     logger.info("Running agent in streaming mode...")
@@ -151,13 +151,13 @@ async def run_agent_streaming(
             user_message = {
                 "role": "user",
                 "content": user_message_content,
-                "timestamp": datetime.now(timezone.utc).isoformat(),
+                "timestamp": to_iso_with_z(utc_now()),
             }
             assistant_message = {
                 "role": "assistant",
                 "content": "".join(assistant_response_parts),
-                "timestamp": datetime.now(timezone.utc).isoformat(),
+                "timestamp": to_iso_with_z(utc_now()),
             }
             # Store messages with compression
@@ -200,8 +200,8 @@ async def run_agent_non_streaming(
     Returns:
         Output data if successful, None otherwise
     """
-    from datetime import datetime, timezone
     from pydantic_ai import UsageLimits
+    from rem.utils.date_utils import to_iso_with_z, utc_now
     logger.info("Running agent in non-streaming mode...")
@@ -248,13 +248,13 @@ async def run_agent_non_streaming(
             user_message = {
                 "role": "user",
                 "content": user_message_content,
-                "timestamp": datetime.now(timezone.utc).isoformat(),
+                "timestamp": to_iso_with_z(utc_now()),
             }
             assistant_message = {
                 "role": "assistant",
                 "content": assistant_content,
-                "timestamp": datetime.now(timezone.utc).isoformat(),
+                "timestamp": to_iso_with_z(utc_now()),
             }
             # Store messages with compression
@@ -357,8 +357,8 @@ async def _save_output_file(file_path: Path, data: dict[str, Any]) -> None:
 )
 @click.option(
     "--user-id",
-    default="test-user",
-    help="User ID for context (default: test-user)",
+    default=None,
+    help="User ID for context (default: from settings.test.effective_user_id)",
 )
 @click.option(
     "--session-id",
@@ -393,7 +393,7 @@ def ask(
     max_turns: int,
     version: str | None,
     stream: bool,
-    user_id: str,
+    user_id: str | None,
     session_id: str | None,
     input_file: Path | None,
     output_file: Path | None,
@@ -434,6 +434,9 @@ def ask(
         # Two arguments provided
         name = name_or_query
+    # Resolve user_id from settings if not provided
+    effective_user_id = user_id or settings.test.effective_user_id
     asyncio.run(
         _ask_async(
             name=name,
@@ -443,7 +446,7 @@ def ask(
             max_turns=max_turns,
             version=version,
             stream=stream,
-            user_id=user_id,
+            user_id=effective_user_id,
             session_id=session_id,
             input_file=input_file,
             output_file=output_file,

rem/cli/commands/configure.py CHANGED Viewed

@@ -49,7 +49,7 @@ def prompt_postgres_config(use_defaults: bool = False) -> dict:
     # Default values
     host = "localhost"
-    port = 5050
+    port = 5051
     database = "rem"
     username = "rem"
     password = "rem"
@@ -431,8 +431,9 @@ def configure_command(install: bool, claude_desktop: bool, show: bool, edit: boo
             if os.name == "nt":  # Windows
                 config_dir = Path.home() / "AppData/Roaming/Claude"
             elif os.name == "posix":
-                if Path.home() / "Library/Application Support/Claude":
-                    config_dir = Path.home() / "Library/Application Support/Claude"
+                macos_path = Path.home() / "Library/Application Support/Claude"
+                if macos_path.exists():
+                    config_dir = macos_path
                 else:
                     config_dir = Path.home() / ".config/Claude"
             else:

rem/cli/commands/db.py CHANGED Viewed

@@ -382,9 +382,9 @@ def rebuild_cache(connection: str | None):
 @click.command()
 @click.argument("file_path", type=click.Path(exists=True, path_type=Path))
-@click.option("--user-id", default="test-user", help="User ID for loaded data")
+@click.option("--user-id", default=None, help="User ID for loaded data (default: from settings)")
 @click.option("--dry-run", is_flag=True, help="Show what would be loaded without loading")
-def load(file_path: Path, user_id: str, dry_run: bool):
+def load(file_path: Path, user_id: str | None, dry_run: bool):
     """
     Load data from YAML file into database.
@@ -400,7 +400,11 @@ def load(file_path: Path, user_id: str, dry_run: bool):
         rem db load data.yaml --user-id my-user
         rem db load data.yaml --dry-run
     """
-    asyncio.run(_load_async(file_path, user_id, dry_run))
+    from ...settings import settings
+    # Resolve user_id from settings if not provided
+    effective_user_id = user_id or settings.test.effective_user_id
+    asyncio.run(_load_async(file_path, effective_user_id, dry_run))
 async def _load_async(file_path: Path, user_id: str, dry_run: bool):
@@ -467,6 +471,16 @@ async def _load_async(file_path: Path, user_id: str, dry_run: bool):
                         for edge in row_data["graph_edges"]
                     ]
+                # Convert any ISO timestamp strings with Z suffix to naive datetime
+                # This handles fields like starts_timestamp, ends_timestamp, etc.
+                from ...utils.date_utils import parse_iso
+                for key, value in list(row_data.items()):
+                    if isinstance(value, str) and (key.endswith("_timestamp") or key.endswith("_at")):
+                        try:
+                            row_data[key] = parse_iso(value)
+                        except (ValueError, TypeError):
+                            pass  # Not a valid datetime string, leave as-is
                 # Create model instance and upsert via repository
                 from ...services.postgres.repository import Repository

rem/cli/commands/experiments.py CHANGED Viewed

@@ -578,8 +578,7 @@ def run(
     from rem.services.git import GitService
     from rem.services.phoenix import PhoenixClient
     from rem.agentic.providers.phoenix import create_evaluator_from_schema
-    from datetime import datetime
-    import pandas as pd
+    from rem.utils.date_utils import utc_now, to_iso, format_timestamp_for_experiment
     import os
     try:
@@ -615,36 +614,22 @@ def run(
             click.echo(f"  Mode: DRY RUN (no data will be saved)")
         click.echo()
-        # Load agent schema from Git or filesystem
+        # Load agent schema using centralized schema loader
         agent_name = config.agent_schema_ref.name
         agent_version = config.agent_schema_ref.version
         click.echo(f"Loading agent schema: {agent_name} (version: {agent_version or 'latest'})")
-        # Try Git first, fallback to filesystem
-        agent_schema = None
-        try:
-            git_svc = GitService()
-            agent_schema = git_svc.load_schema(agent_name, version=agent_version)
-            click.echo(f"✓ Loaded agent schema from Git")
-        except Exception as e:
-            logger.debug(f"Git not available, trying filesystem: {e}")
+        from rem.utils.schema_loader import load_agent_schema
-            # Fallback to local filesystem
-            from rem.services.fs import FS
-            fs = FS()
-            schema_path = f"schemas/agents/{agent_name}.yaml"
-            try:
-                agent_schema = fs.read(schema_path)
-                click.echo(f"✓ Loaded agent schema from filesystem")
-            except Exception as fs_error:
-                logger.error(f"Failed to load agent schema: Git: {e}, FS: {fs_error}")
-                click.echo(f"Error: Could not load agent schema '{agent_name}'")
-                click.echo(f"  Tried Git: {e}")
-                click.echo(f"  Tried filesystem: {schema_path}")
-                click.echo(f"  Make sure the schema exists")
-                raise click.Abort()
+        try:
+            agent_schema = load_agent_schema(agent_name)
+            click.echo(f"✓ Loaded agent schema: {agent_name}")
+        except FileNotFoundError as e:
+            logger.error(f"Failed to load agent schema: {e}")
+            click.echo(f"Error: Could not load agent schema '{agent_name}'")
+            click.echo(f"  {e}")
+            raise click.Abort()
         # Create agent function from schema
         from rem.agentic.providers.pydantic_ai import create_agent
@@ -683,73 +668,85 @@ def run(
                 return {"output": serialized}
             return serialized if isinstance(serialized, dict) else {"output": str(serialized)}
-        # Load evaluator schema
+        # Load evaluator schema using centralized schema loader
         evaluator_name = config.evaluator_schema_ref.name
         evaluator_version = config.evaluator_schema_ref.version
-        # Resolve evaluator path (evaluators are organized by agent name)
-        evaluator_schema_path = f"rem/schemas/evaluators/{agent_name}/{evaluator_name}.yaml"
         click.echo(f"Loading evaluator: {evaluator_name} for agent {agent_name}")
-        try:
-            evaluator_fn = create_evaluator_from_schema(
-                evaluator_schema_path=evaluator_schema_path,
-                model_name=None,  # Use default from schema
-            )
-            click.echo(f"✓ Loaded evaluator schema")
-        except Exception as e:
-            logger.warning(f"Failed to load evaluator: {e}")
-            click.echo(f"Error: Could not load evaluator schema")
-            click.echo(f"  Path: {evaluator_schema_path}")
-            click.echo(f"  Make sure the schema exists")
+        # Try multiple evaluator path patterns (agent-specific, then generic)
+        evaluator_paths_to_try = [
+            f"{agent_name}/{evaluator_name}",  # e.g., hello-world/default
+            f"{agent_name}-{evaluator_name}",  # e.g., hello-world-default
+            evaluator_name,                     # e.g., default (generic)
+        ]
+        evaluator_fn = None
+        evaluator_load_error = None
+        for evaluator_path in evaluator_paths_to_try:
+            try:
+                evaluator_fn = create_evaluator_from_schema(
+                    evaluator_schema_path=evaluator_path,
+                    model_name=None,  # Use default from schema
+                )
+                click.echo(f"✓ Loaded evaluator schema: {evaluator_path}")
+                break
+            except FileNotFoundError as e:
+                evaluator_load_error = e
+                logger.debug(f"Evaluator not found at {evaluator_path}: {e}")
+                continue
+            except Exception as e:
+                evaluator_load_error = e
+                logger.warning(f"Failed to load evaluator from {evaluator_path}: {e}")
+                continue
+        if evaluator_fn is None:
+            click.echo(f"Error: Could not load evaluator schema '{evaluator_name}'")
+            click.echo(f"  Tried paths: {evaluator_paths_to_try}")
+            if evaluator_load_error:
+                click.echo(f"  Last error: {evaluator_load_error}")
             raise click.Abort()
-        # Load dataset
+        # Load dataset using Polars
+        import polars as pl
         click.echo(f"Loading dataset: {list(config.datasets.keys())[0]}")
         dataset_ref = list(config.datasets.values())[0]
         if dataset_ref.location.value == "git":
-            # Load from Git
+            # Load from Git (local filesystem)
             dataset_path = Path(base_path) / name / dataset_ref.path
             if not dataset_path.exists():
                 click.echo(f"Error: Dataset not found: {dataset_path}")
                 raise click.Abort()
             if dataset_ref.format == "csv":
-                dataset_df = pd.read_csv(dataset_path)
+                dataset_df = pl.read_csv(dataset_path)
             elif dataset_ref.format == "parquet":
-                dataset_df = pd.read_parquet(dataset_path)
+                dataset_df = pl.read_parquet(dataset_path)
             elif dataset_ref.format == "jsonl":
-                dataset_df = pd.read_json(dataset_path, lines=True)
+                dataset_df = pl.read_ndjson(dataset_path)
             else:
                 click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
                 raise click.Abort()
         elif dataset_ref.location.value in ["s3", "hybrid"]:
             # Load from S3 using FS provider
             from rem.services.fs import FS
+            from io import BytesIO
             fs = FS()
             try:
                 if dataset_ref.format == "csv":
                     content = fs.read(dataset_ref.path)
-                    from io import StringIO
-                    dataset_df = pd.read_csv(StringIO(content))
+                    dataset_df = pl.read_csv(BytesIO(content.encode() if isinstance(content, str) else content))
                 elif dataset_ref.format == "parquet":
-                    # For parquet, we need binary read
-                    import tempfile
-                    with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
-                        tmp_path = tmp.name
-                        # Download via FS
-                        content_bytes = fs.read(dataset_ref.path)
-                        tmp.write(content_bytes)
-                    dataset_df = pd.read_parquet(tmp_path)
-                    Path(tmp_path).unlink()  # Clean up temp file
+                    content_bytes = fs.read(dataset_ref.path)
+                    dataset_df = pl.read_parquet(BytesIO(content_bytes if isinstance(content_bytes, bytes) else content_bytes.encode()))
                 elif dataset_ref.format == "jsonl":
                     content = fs.read(dataset_ref.path)
-                    from io import StringIO
-                    dataset_df = pd.read_json(StringIO(content), lines=True)
+                    dataset_df = pl.read_ndjson(BytesIO(content.encode() if isinstance(content, str) else content))
                 else:
                     click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
                     raise click.Abort()
@@ -793,13 +790,13 @@ def run(
             client = PhoenixClient(config=phoenix_config)
-            experiment_name = f"{config.name}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
+            experiment_name = f"{config.name}-{format_timestamp_for_experiment()}"
             click.echo(f"\n⏳ Running experiment: {experiment_name}")
             click.echo(f"   This may take several minutes...")
             experiment = client.run_experiment(
-                dataset=dataset_df,  # type: ignore[arg-type]
+                dataset=dataset_df,
                 task=task_fn,
                 evaluators=[evaluator_fn],
                 experiment_name=experiment_name,
@@ -809,12 +806,15 @@ def run(
                     "evaluator": config.evaluator_schema_ref.name,
                     "experiment_config": config.name,
                     **config.metadata
-                }
+                },
+                # Smart column detection for DataFrame -> Phoenix Dataset conversion
+                input_keys=["input"] if "input" in dataset_df.columns else None,
+                output_keys=["expected_output"] if "expected_output" in dataset_df.columns else None,
             )
             # Update experiment status
             config.status = ExperimentStatus.COMPLETED
-            config.last_run_at = datetime.now()
+            config.last_run_at = utc_now()
             if not version:  # Only save if not loading from Git
                 config.save(base_path)
@@ -835,7 +835,7 @@ def run(
                         "agent": config.agent_schema_ref.name,
                         "evaluator": config.evaluator_schema_ref.name,
                         "dataset_size": len(dataset_df),
-                        "completed_at": datetime.now().isoformat(),
+                        "completed_at": to_iso(utc_now()),
                         "phoenix_url": getattr(experiment, "url", None),
                         "task_runs": len(exp_data.get("task_runs", [])),
                     }
@@ -1015,20 +1015,24 @@ def dataset_add(
             --output-keys expected_label,expected_type
     """
     from rem.services.phoenix import PhoenixClient
-    import pandas as pd
+    import polars as pl
     try:
         client = PhoenixClient()
-        # Load CSV
-        df = pd.read_csv(from_csv)
+        # Load CSV with Polars
+        df = pl.read_csv(from_csv)
+        records = df.to_dicts()
         # Extract data
-        inputs = cast(list[dict[str, Any]], df[input_keys.split(",")].to_dict("records"))
-        outputs = cast(list[dict[str, Any]], df[output_keys.split(",")].to_dict("records"))
+        input_cols = input_keys.split(",")
+        output_cols = output_keys.split(",")
+        inputs = [{k: row.get(k) for k in input_cols} for row in records]
+        outputs = [{k: row.get(k) for k in output_cols} for row in records]
         metadata = None
         if metadata_keys:
-            metadata = cast(list[dict[str, Any]], df[metadata_keys.split(",")].to_dict("records"))
+            meta_cols = metadata_keys.split(",")
+            metadata = [{k: row.get(k) for k in meta_cols} for row in records]
         # Add to dataset
         dataset = client.add_examples_to_dataset(
@@ -1269,12 +1273,12 @@ def trace_list(
         rem experiments trace list --project rem-agents --days 7 --limit 50
     """
     from rem.services.phoenix import PhoenixClient
-    from datetime import datetime, timedelta
+    from rem.utils.date_utils import days_ago
     try:
         client = PhoenixClient()
-        start_time = datetime.now() - timedelta(days=days)
+        start_time = days_ago(days)
         traces_df = client.get_traces(
             project_name=project,

rem/cli/commands/process.py CHANGED Viewed

@@ -192,15 +192,13 @@ def process_uri(uri: str, output: str, save: str | None):
 @click.command(name="files")
-@click.option("--tenant-id", required=True, help="Tenant ID")
-@click.option("--user-id", help="Filter by user ID")
+@click.option("--user-id", default=None, help="User ID (default: from settings)")
 @click.option("--status", type=click.Choice(["pending", "processing", "completed", "failed"]), help="Filter by status")
 @click.option("--extractor", help="Run files through custom extractor (e.g., cv-parser-v1)")
 @click.option("--limit", type=int, help="Max files to process")
 @click.option("--provider", help="Optional LLM provider override")
 @click.option("--model", help="Optional model override")
 def process_files(
-    tenant_id: str,
     user_id: Optional[str],
     status: Optional[str],
     extractor: Optional[str],
@@ -217,19 +215,22 @@ def process_files(
         \b
         # List completed files
-        rem process files --tenant-id acme-corp --status completed
+        rem process files --status completed
         \b
         # Extract from CV files
-        rem process files --tenant-id acme-corp --extractor cv-parser-v1 --limit 10
+        rem process files --extractor cv-parser-v1 --limit 10
         \b
         # Extract with provider override
-        rem process files --tenant-id acme-corp --extractor contract-analyzer-v1 \\
+        rem process files --extractor contract-analyzer-v1 \\
             --provider anthropic --model claude-sonnet-4-5
     """
+    from ...settings import settings
+    effective_user_id = user_id or settings.test.effective_user_id
     logger.warning("Not implemented yet")
-    logger.info(f"Would process files for tenant: {tenant_id}")
+    logger.info(f"Would process files for user: {effective_user_id}")
     if user_id:
         logger.info(f"Filter: user_id={user_id}")

rem/cli/commands/scaffold.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""
+Scaffold command - generate project structure for REM-based applications.
+TODO: Implement this command to generate:
+- my_app/main.py (entry point with create_app)
+- my_app/models.py (example CoreModel subclass)
+- my_app/routers/ (example FastAPI router)
+- schemas/agents/ (example agent schema)
+- schemas/evaluators/ (example evaluator)
+- sql/migrations/ (empty migrations directory)
+- pyproject.toml (with remdb dependency)
+- README.md (basic usage instructions)
+Usage:
+    rem scaffold my-app
+    rem scaffold my-app --with-examples  # Include example models/routers/tools
+"""
+import click
+@click.command()
+@click.argument("name")
+@click.option("--with-examples", is_flag=True, help="Include example code")
+def scaffold(name: str, with_examples: bool) -> None:
+    """
+    Generate a new REM-based project structure.
+    NAME is the project directory name to create.
+    """
+    click.echo(f"TODO: Scaffold command not yet implemented")
+    click.echo(f"Would create project: {name}")
+    click.echo(f"With examples: {with_examples}")
+    click.echo()
+    click.echo("For now, manually create this structure:")
+    click.echo(f"""
+{name}/
+├── {name.replace('-', '_')}/
+│   ├── main.py           # Entry point (create_app + extensions)
+│   ├── models.py         # Custom models (inherit CoreModel)
+│   └── routers/          # Custom FastAPI routers
+├── schemas/
+│   ├── agents/           # Custom agent YAML schemas
+│   └── evaluators/       # Custom evaluator schemas
+├── sql/migrations/       # Custom SQL migrations
+└── pyproject.toml
+""")

rem/cli/main.py CHANGED Viewed

@@ -75,6 +75,7 @@ from .commands.experiments import experiments as experiments_group
 from .commands.configure import register_command as register_configure_command
 from .commands.serve import register_command as register_serve_command
 from .commands.mcp import register_command as register_mcp_command
+from .commands.scaffold import scaffold as scaffold_command
 register_schema_commands(schema)
 register_db_commands(db)
@@ -85,6 +86,7 @@ register_configure_command(cli)
 register_serve_command(cli)
 register_mcp_command(cli)
 cli.add_command(experiments_group)
+cli.add_command(scaffold_command)
 def main():

rem/models/entities/user.py CHANGED Viewed

@@ -22,9 +22,12 @@ from ..core import CoreModel
 class UserTier(str, Enum):
     """User subscription tier for feature gating."""
+    ANONYMOUS = "anonymous"
     FREE = "free"
-    SILVER = "silver"
-    GOLD = "gold"
+    BASIC = "basic"
+    PRO = "pro"
+    SILVER = "silver"  # Deprecated? Keeping for backward compatibility if needed
+    GOLD = "gold"      # Deprecated? Keeping for backward compatibility if needed
 class User(CoreModel):
@@ -57,7 +60,11 @@ class User(CoreModel):
     )
     tier: UserTier = Field(
         default=UserTier.FREE,
-        description="User subscription tier (free, silver, gold) for feature gating",
+        description="User subscription tier (free, basic, pro) for feature gating",
+    )
+    anonymous_ids: list[str] = Field(
+        default_factory=list,
+        description="Linked anonymous session IDs used for merging history",
     )
     sec_policy: dict = Field(
         default_factory=dict,

remdb 0.3.7__py3-none-any.whl → 0.3.14__py3-none-any.whl

remdb 0.3.7py3-none-any.whl → 0.3.14py3-none-any.whl