PyPI - remdb - Versions diffs - 0.3.114__py3-none-any.whl → 0.3.172__py3-none-any.whl - Mend

remdb 0.3.114py3-none-any.whl → 0.3.172py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (83) hide show

rem/agentic/agents/__init__.py +16 -0
rem/agentic/agents/agent_manager.py +311 -0
rem/agentic/agents/sse_simulator.py +2 -0
rem/agentic/context.py +103 -5
rem/agentic/context_builder.py +36 -9
rem/agentic/mcp/tool_wrapper.py +161 -18
rem/agentic/otel/setup.py +1 -0
rem/agentic/providers/phoenix.py +371 -108
rem/agentic/providers/pydantic_ai.py +172 -30
rem/agentic/schema.py +8 -4
rem/api/deps.py +3 -5
rem/api/main.py +26 -4
rem/api/mcp_router/resources.py +15 -10
rem/api/mcp_router/server.py +11 -3
rem/api/mcp_router/tools.py +418 -4
rem/api/middleware/tracking.py +5 -5
rem/api/routers/admin.py +218 -1
rem/api/routers/auth.py +349 -6
rem/api/routers/chat/completions.py +255 -7
rem/api/routers/chat/models.py +81 -7
rem/api/routers/chat/otel_utils.py +33 -0
rem/api/routers/chat/sse_events.py +17 -1
rem/api/routers/chat/streaming.py +126 -19
rem/api/routers/feedback.py +134 -14
rem/api/routers/messages.py +24 -15
rem/api/routers/query.py +6 -3
rem/auth/__init__.py +13 -3
rem/auth/jwt.py +352 -0
rem/auth/middleware.py +115 -10
rem/auth/providers/__init__.py +4 -1
rem/auth/providers/email.py +215 -0
rem/cli/commands/README.md +42 -0
rem/cli/commands/cluster.py +617 -168
rem/cli/commands/configure.py +4 -7
rem/cli/commands/db.py +66 -22
rem/cli/commands/experiments.py +468 -76
rem/cli/commands/schema.py +6 -5
rem/cli/commands/session.py +336 -0
rem/cli/dreaming.py +2 -2
rem/cli/main.py +2 -0
rem/config.py +8 -1
rem/models/core/experiment.py +58 -14
rem/models/entities/__init__.py +4 -0
rem/models/entities/ontology.py +1 -1
rem/models/entities/ontology_config.py +1 -1
rem/models/entities/subscriber.py +175 -0
rem/models/entities/user.py +1 -0
rem/schemas/agents/core/agent-builder.yaml +235 -0
rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
rem/schemas/agents/examples/contract-extractor.yaml +1 -1
rem/schemas/agents/examples/cv-parser.yaml +1 -1
rem/services/__init__.py +3 -1
rem/services/content/service.py +4 -3
rem/services/email/__init__.py +10 -0
rem/services/email/service.py +513 -0
rem/services/email/templates.py +360 -0
rem/services/phoenix/client.py +59 -18
rem/services/postgres/README.md +38 -0
rem/services/postgres/diff_service.py +127 -6
rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
rem/services/postgres/repository.py +5 -4
rem/services/postgres/schema_generator.py +205 -4
rem/services/session/compression.py +120 -50
rem/services/session/reload.py +14 -7
rem/services/user_service.py +41 -9
rem/settings.py +442 -23
rem/sql/migrations/001_install.sql +156 -0
rem/sql/migrations/002_install_models.sql +1951 -88
rem/sql/migrations/004_cache_system.sql +548 -0
rem/sql/migrations/005_schema_update.sql +145 -0
rem/utils/README.md +45 -0
rem/utils/__init__.py +18 -0
rem/utils/files.py +157 -1
rem/utils/schema_loader.py +139 -10
rem/utils/sql_paths.py +146 -0
rem/utils/vision.py +1 -1
rem/workers/__init__.py +3 -1
rem/workers/db_listener.py +579 -0
rem/workers/unlogged_maintainer.py +463 -0
{remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/METADATA +218 -180
{remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/RECORD +83 -68
{remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/WHEEL +0 -0
{remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/entry_points.txt +0 -0

rem/cli/commands/experiments.py CHANGED Viewed

@@ -63,6 +63,7 @@ def experiments():
 @experiments.command("create")
 @click.argument("name")
 @click.option("--agent", "-a", required=True, help="Agent schema name (e.g., 'cv-parser')")
+@click.option("--task", "-t", default="general", help="Task name for organizing experiments (e.g., 'risk-assessment')")
 @click.option("--evaluator", "-e", default="default", help="Evaluator schema name (default: 'default')")
 @click.option("--description", "-d", help="Experiment description")
 @click.option("--dataset-location", type=click.Choice(["git", "s3", "hybrid"]), default="git",
@@ -74,6 +75,7 @@ def experiments():
 def create(
     name: str,
     agent: str,
+    task: str,
     evaluator: str,
     description: Optional[str],
     dataset_location: str,
@@ -123,19 +125,17 @@ def create(
         # Resolve base path: CLI arg > EXPERIMENTS_HOME env var > default "experiments"
         if base_path is None:
             base_path = os.getenv("EXPERIMENTS_HOME", "experiments")
-        # Build dataset reference
+        # Build dataset reference (format auto-detected from file extension)
         if dataset_location == "git":
             dataset_ref = DatasetReference(
                 location=DatasetLocation.GIT,
                 path="ground-truth/dataset.csv",
-                format="csv",
                 description="Ground truth Q&A dataset for evaluation"
             )
         else:  # s3 or hybrid
             dataset_ref = DatasetReference(
                 location=DatasetLocation(dataset_location),
                 path=f"s3://rem-experiments/{name}/datasets/ground_truth.parquet",
-                format="parquet",
                 schema_path="datasets/schema.yaml" if dataset_location == "hybrid" else None,
                 description="Ground truth dataset for evaluation"
             )
@@ -170,7 +170,8 @@ def create(
         # Create experiment config
         config = ExperimentConfig(
             name=name,
-            description=description or f"Evaluation experiment for {agent} agent",
+            task=task,
+            description=description or f"Evaluation experiment for {agent} agent ({task} task)",
             agent_schema_ref=SchemaReference(
                 name=agent,
                 version=None,  # Use latest by default
@@ -514,6 +515,159 @@ def show(name: str, base_path: Optional[str]):
         raise click.Abort()
+# =============================================================================
+# VIBES MODE HELPER
+# =============================================================================
+def _run_vibes_mode(
+    config: Any,
+    dataset_df: Any,
+    task_fn: Any,
+    base_path: str,
+    limit: Optional[int],
+    evaluator_schema_path: Path,
+) -> None:
+    """Run experiment in vibes mode - execute agent and export for AI evaluation.
+    Vibes mode runs the agent on each example and saves results to a JSONL file.
+    The AI assistant (e.g., Claude Code) then acts as the judge using the
+    evaluator schema to evaluate results.
+    Args:
+        config: ExperimentConfig object
+        dataset_df: Polars DataFrame with ground truth examples
+        task_fn: Function to run agent on each example
+        base_path: Base directory for experiments
+        limit: Optional limit on number of examples to process
+        evaluator_schema_path: Path to the evaluator schema YAML file
+    """
+    from rem.utils.date_utils import format_timestamp_for_experiment, utc_now, to_iso
+    import json
+    # Apply limit if specified
+    if limit:
+        dataset_df = dataset_df.head(limit)
+        click.echo(f"  (Limited to {limit} examples)")
+    # Create results directory
+    timestamp = format_timestamp_for_experiment()
+    results_dir = Path(base_path) / config.name / "results" / timestamp
+    results_dir.mkdir(parents=True, exist_ok=True)
+    click.echo(f"\n⏳ Running agent on {len(dataset_df)} examples...")
+    click.echo(f"   Results will be saved to: {results_dir}")
+    click.echo()
+    # Run agent on each example and collect results
+    results = []
+    records = dataset_df.to_dicts()
+    for i, record in enumerate(records, 1):
+        example_id = record.get("id", i)
+        click.echo(f"  [{i}/{len(records)}] Processing example {example_id}...", nl=False)
+        try:
+            # Prepare input for agent
+            input_text = record.get("text", record.get("input", record.get("query", "")))
+            example_input = {"query": input_text} if isinstance(input_text, str) else input_text
+            # Run agent
+            output = task_fn({"input": example_input})
+            result = {
+                "id": example_id,
+                "input": input_text,
+                "ground_truth": record.get("ground_truth", record.get("expected_output", "")),
+                "category": record.get("category", ""),
+                "agent_output": output,
+                "status": "success",
+            }
+            click.echo(" ✓")
+        except Exception as e:
+            result = {
+                "id": example_id,
+                "input": record.get("text", record.get("input", "")),
+                "ground_truth": record.get("ground_truth", record.get("expected_output", "")),
+                "category": record.get("category", ""),
+                "agent_output": None,
+                "status": "error",
+                "error": str(e),
+            }
+            click.echo(f" ✗ ({e})")
+        results.append(result)
+    # Save results to JSONL
+    results_file = results_dir / "vibes-results.jsonl"
+    with open(results_file, "w") as f:
+        for result in results:
+            f.write(json.dumps(result) + "\n")
+    # Copy evaluator schema to results dir for easy reference
+    import shutil
+    evaluator_copy = results_dir / "evaluator-schema.yaml"
+    shutil.copy(evaluator_schema_path, evaluator_copy)
+    # Save run metadata
+    run_info = {
+        "experiment": config.name,
+        "agent": config.agent_schema_ref.name,
+        "evaluator": config.evaluator_schema_ref.name,
+        "mode": "vibes",
+        "timestamp": timestamp,
+        "total_examples": len(records),
+        "successful": len([r for r in results if r["status"] == "success"]),
+        "failed": len([r for r in results if r["status"] == "error"]),
+        "completed_at": to_iso(utc_now()),
+    }
+    run_info_file = results_dir / "run-info.json"
+    with open(run_info_file, "w") as f:
+        json.dump(run_info, f, indent=2)
+    # Print summary and instructions
+    success_count = run_info["successful"]
+    fail_count = run_info["failed"]
+    click.echo(f"\n{'=' * 60}")
+    click.echo(f"VIBES MODE COMPLETE")
+    click.echo(f"{'=' * 60}")
+    click.echo(f"\nResults: {success_count} successful, {fail_count} failed")
+    click.echo(f"\nFiles saved to: {results_dir}/")
+    click.echo(f"  - vibes-results.jsonl    (agent outputs)")
+    click.echo(f"  - evaluator-schema.yaml  (evaluation criteria)")
+    click.echo(f"  - run-info.json          (run metadata)")
+    click.echo(f"\n{'=' * 60}")
+    click.echo(f"NEXT STEP: Ask your AI assistant to evaluate")
+    click.echo(f"{'=' * 60}")
+    click.echo(f"""
+Copy this prompt to Claude Code or your AI assistant:
+    Please evaluate the experiment results in:
+    {results_dir}/
+    Read the vibes-results.jsonl file and evaluate each example
+    using the evaluator schema in evaluator-schema.yaml.
+    For each example, provide:
+    1. extracted_classification
+    2. exact_match (vs ground_truth)
+    3. semantic_match
+    4. reasoning_quality_score
+    5. overall_score
+    6. pass/fail
+    Then provide summary metrics:
+    - Exact match accuracy
+    - Semantic match accuracy
+    - Average overall score
+    - Pass rate
+""")
 # =============================================================================
 # RUN COMMAND
 # =============================================================================
@@ -524,6 +678,8 @@ def show(name: str, base_path: Optional[str]):
 @click.option("--base-path", help="Base directory for experiments (default: EXPERIMENTS_HOME or 'experiments')")
 @click.option("--version", help="Git tag version to load (e.g., 'experiments/my-exp/v1.0.0')")
 @click.option("--dry-run", is_flag=True, help="Test on small subset without saving")
+@click.option("--only-vibes", is_flag=True, help="Run agent locally, export results for AI evaluation (no Phoenix)")
+@click.option("--limit", "-n", type=int, help="Limit number of examples to evaluate (useful with --only-vibes)")
 @click.option("--update-prompts", is_flag=True, help="Update prompts in Phoenix before running")
 @click.option("--phoenix-url", help="Phoenix server URL (overrides PHOENIX_BASE_URL env var)")
 @click.option("--phoenix-api-key", help="Phoenix API key (overrides PHOENIX_API_KEY env var)")
@@ -532,14 +688,45 @@ def run(
     base_path: Optional[str],
     version: Optional[str],
     dry_run: bool,
+    only_vibes: bool,
+    limit: Optional[int],
     update_prompts: bool,
     phoenix_url: Optional[str],
     phoenix_api_key: Optional[str],
 ):
-    """Run an experiment using Phoenix provider.
+    """Run an experiment using Phoenix provider or local vibes mode.
     Loads configuration, executes agent and evaluator, saves results.
+    Vibes Mode (--only-vibes):
+        Run agent locally without Phoenix infrastructure. Agent outputs are saved
+        to a JSONL file along with the evaluator schema. Your AI assistant (e.g.,
+        Claude Code) then acts as the judge to evaluate results.
+        This enables seamless switching between:
+        - Local evaluation: Quick iteration with AI-as-judge
+        - Phoenix evaluation: Production metrics and dashboards
+        Usage:
+            rem experiments run my-experiment --only-vibes
+            rem experiments run my-experiment --only-vibes --limit 5
+        The command will:
+        1. Run the agent on each ground-truth example
+        2. Save results to results/{timestamp}/vibes-results.jsonl
+        3. Print the evaluator prompt and schema
+        4. Instruct you to ask your AI assistant to evaluate
+        Example workflow with Claude Code:
+            $ rem experiments run mental-health-classifier --only-vibes --limit 3
+            # ... agent runs ...
+            # Results saved to: .experiments/mental-health-classifier/results/20241203-143022/
+            # Then ask Claude Code:
+            "Please evaluate the experiment results in
+             .experiments/mental-health-classifier/results/20241203-143022/
+             using the evaluator schema provided"
     Phoenix Connection:
         Commands respect PHOENIX_BASE_URL and PHOENIX_API_KEY environment variables.
         Defaults to localhost:6006 for local development.
@@ -562,6 +749,12 @@ def run(
         # Run experiment with latest schemas
         rem experiments run hello-world-validation
+        # Quick local evaluation (vibes mode)
+        rem experiments run hello-world-validation --only-vibes
+        # Vibes mode with limited examples
+        rem experiments run hello-world-validation --only-vibes --limit 5
         # Run specific version
         rem experiments run hello-world-validation \\
             --version experiments/hello-world-validation/v1.0.0
@@ -674,92 +867,107 @@ def run(
         click.echo(f"Loading evaluator: {evaluator_name} for agent {agent_name}")
-        # Try multiple evaluator path patterns (agent-specific, then generic)
-        evaluator_paths_to_try = [
-            f"{agent_name}/{evaluator_name}",  # e.g., hello-world/default
-            f"{agent_name}-{evaluator_name}",  # e.g., hello-world-default
-            evaluator_name,                     # e.g., default (generic)
-        ]
+        # Find evaluator schema file path
+        from rem.utils.schema_loader import get_evaluator_schema_path
+        evaluator_schema_path = get_evaluator_schema_path(evaluator_name)
+        if not evaluator_schema_path or not evaluator_schema_path.exists():
+            click.echo(f"Error: Could not find evaluator schema '{evaluator_name}'")
+            raise click.Abort()
+        click.echo(f"✓ Found evaluator schema: {evaluator_schema_path}")
+        # For Phoenix mode, also load evaluator function
         evaluator_fn = None
-        evaluator_load_error = None
+        if not only_vibes:
+            # Try multiple evaluator path patterns (agent-specific, then generic)
+            evaluator_paths_to_try = [
+                f"{agent_name}/{evaluator_name}",  # e.g., hello-world/default
+                f"{agent_name}-{evaluator_name}",  # e.g., hello-world-default
+                evaluator_name,                     # e.g., default (generic)
+            ]
-        for evaluator_path in evaluator_paths_to_try:
-            try:
-                evaluator_fn = create_evaluator_from_schema(
-                    evaluator_schema_path=evaluator_path,
-                    model_name=None,  # Use default from schema
-                )
-                click.echo(f"✓ Loaded evaluator schema: {evaluator_path}")
-                break
-            except FileNotFoundError as e:
-                evaluator_load_error = e
-                logger.debug(f"Evaluator not found at {evaluator_path}: {e}")
-                continue
-            except Exception as e:
-                evaluator_load_error = e
-                logger.warning(f"Failed to load evaluator from {evaluator_path}: {e}")
-                continue
+            evaluator_load_error = None
+            for evaluator_path in evaluator_paths_to_try:
+                try:
+                    evaluator_fn = create_evaluator_from_schema(
+                        evaluator_schema_path=evaluator_path,
+                        model_name=None,  # Use default from schema
+                    )
+                    click.echo(f"✓ Loaded evaluator function: {evaluator_path}")
+                    break
+                except FileNotFoundError as e:
+                    evaluator_load_error = e
+                    logger.debug(f"Evaluator not found at {evaluator_path}: {e}")
+                    continue
+                except Exception as e:
+                    evaluator_load_error = e
+                    logger.warning(f"Failed to load evaluator from {evaluator_path}: {e}")
+                    continue
-        if evaluator_fn is None:
-            click.echo(f"Error: Could not load evaluator schema '{evaluator_name}'")
+        if evaluator_fn is None and not only_vibes:
+            click.echo(f"Error: Could not load evaluator function '{evaluator_name}'")
             click.echo(f"  Tried paths: {evaluator_paths_to_try}")
             if evaluator_load_error:
                 click.echo(f"  Last error: {evaluator_load_error}")
             raise click.Abort()
-        # Load dataset using Polars
-        import polars as pl
+        # Validate evaluator credentials before running expensive agent tasks
+        if evaluator_fn is not None and not only_vibes:
+            from rem.agentic.providers.phoenix import validate_evaluator_credentials
+            click.echo("Validating evaluator credentials...")
+            is_valid, error_msg = validate_evaluator_credentials()
+            if not is_valid:
+                click.echo(click.style(f"\n⚠️  Evaluator validation failed: {error_msg}", fg="yellow"))
+                click.echo("\nOptions:")
+                click.echo("  1. Fix the credentials issue and re-run")
+                click.echo("  2. Run with --only-vibes to skip LLM evaluation")
+                click.echo("  3. Use --evaluator-model to specify a different model")
+                raise click.Abort()
+            click.echo("✓ Evaluator credentials validated")
+        # Load dataset using read_dataframe utility (auto-detects format from extension)
+        from rem.utils.files import read_dataframe
         click.echo(f"Loading dataset: {list(config.datasets.keys())[0]}")
         dataset_ref = list(config.datasets.values())[0]
-        if dataset_ref.location.value == "git":
-            # Load from Git (local filesystem)
-            dataset_path = Path(base_path) / name / dataset_ref.path
-            if not dataset_path.exists():
-                click.echo(f"Error: Dataset not found: {dataset_path}")
-                raise click.Abort()
-            if dataset_ref.format == "csv":
-                dataset_df = pl.read_csv(dataset_path)
-            elif dataset_ref.format == "parquet":
-                dataset_df = pl.read_parquet(dataset_path)
-            elif dataset_ref.format == "jsonl":
-                dataset_df = pl.read_ndjson(dataset_path)
-            else:
-                click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
-                raise click.Abort()
-        elif dataset_ref.location.value in ["s3", "hybrid"]:
-            # Load from S3 using FS provider
-            from rem.services.fs import FS
-            from io import BytesIO
+        try:
+            if dataset_ref.location.value == "git":
+                # Load from Git (local filesystem)
+                dataset_path = Path(base_path) / name / dataset_ref.path
+                if not dataset_path.exists():
+                    click.echo(f"Error: Dataset not found: {dataset_path}")
+                    raise click.Abort()
-            fs = FS()
+                dataset_df = read_dataframe(dataset_path)
-            try:
-                if dataset_ref.format == "csv":
-                    content = fs.read(dataset_ref.path)
-                    dataset_df = pl.read_csv(BytesIO(content.encode() if isinstance(content, str) else content))
-                elif dataset_ref.format == "parquet":
-                    content_bytes = fs.read(dataset_ref.path)
-                    dataset_df = pl.read_parquet(BytesIO(content_bytes if isinstance(content_bytes, bytes) else content_bytes.encode()))
-                elif dataset_ref.format == "jsonl":
-                    content = fs.read(dataset_ref.path)
-                    dataset_df = pl.read_ndjson(BytesIO(content.encode() if isinstance(content, str) else content))
-                else:
-                    click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
-                    raise click.Abort()
+            elif dataset_ref.location.value in ["s3", "hybrid"]:
+                # Load from S3 using FS provider
+                from rem.services.fs import FS
+                fs = FS()
+                content = fs.read(dataset_ref.path)
+                # Ensure we have bytes
+                if isinstance(content, str):
+                    content = content.encode()
+                dataset_df = read_dataframe(content, filename=dataset_ref.path)
                 click.echo(f"✓ Loaded dataset from S3")
-            except Exception as e:
-                logger.error(f"Failed to load dataset from S3: {e}")
-                click.echo(f"Error: Could not load dataset from S3")
-                click.echo(f"  Path: {dataset_ref.path}")
-                click.echo(f"  Format: {dataset_ref.format}")
+            else:
+                click.echo(f"Error: Unknown dataset location: {dataset_ref.location.value}")
                 raise click.Abort()
-        else:
-            click.echo(f"Error: Unknown dataset location: {dataset_ref.location.value}")
+        except ValueError as e:
+            # Unsupported format error from read_dataframe
+            click.echo(f"Error: {e}")
+            raise click.Abort()
+        except Exception as e:
+            logger.error(f"Failed to load dataset: {e}")
+            click.echo(f"Error: Could not load dataset")
+            click.echo(f"  Path: {dataset_ref.path}")
             raise click.Abort()
         click.echo(f"✓ Loaded dataset: {len(dataset_df)} examples")
@@ -769,6 +977,18 @@ def run(
             # TODO: Implement prompt updating
             click.echo("⚠  --update-prompts not yet implemented")
+        # Vibes mode: run agent and export for AI evaluation
+        if only_vibes:
+            _run_vibes_mode(
+                config=config,
+                dataset_df=dataset_df,
+                task_fn=task_fn,
+                base_path=base_path,
+                limit=limit,
+                evaluator_schema_path=evaluator_schema_path,
+            )
+            return
         # Run experiment via Phoenix
         if not dry_run:
             # Create Phoenix client with optional overrides
@@ -1067,7 +1287,7 @@ def prompt():
 @click.option("--system-prompt", "-s", required=True, help="System prompt text")
 @click.option("--description", "-d", help="Prompt description")
 @click.option("--model-provider", default="OPENAI", help="Model provider (OPENAI, ANTHROPIC)")
-@click.option("--model-name", "-m", help="Model name (e.g., gpt-4o, claude-sonnet-4-5)")
+@click.option("--model-name", "-m", help="Model name (e.g., gpt-4.1, claude-sonnet-4-5)")
 @click.option("--type", "-t", "prompt_type", default="Agent", help="Prompt type (Agent or Evaluator)")
 def prompt_create(
     name: str,
@@ -1083,7 +1303,7 @@ def prompt_create(
         # Create agent prompt
         rem experiments prompt create hello-world \\
             --system-prompt "You are a helpful assistant." \\
-            --model-name gpt-4o
+            --model-name gpt-4.1
         # Create evaluator prompt
         rem experiments prompt create correctness-evaluator \\
@@ -1101,7 +1321,7 @@ def prompt_create(
     try:
         # Set default model if not specified
         if not model_name:
-            model_name = "gpt-4o" if model_provider == "OPENAI" else "claude-sonnet-4-5-20250929"
+            model_name = "gpt-4.1" if model_provider == "OPENAI" else "claude-sonnet-4-5-20250929"
         # Get config
         phoenix_client = PhoenixClient()
@@ -1304,3 +1524,175 @@ def trace_list(
         logger.error(f"Failed to list traces: {e}")
         click.echo(f"Error: {e}", err=True)
         raise click.Abort()
+# =============================================================================
+# EXPORT COMMAND
+# =============================================================================
+@experiments.command("export")
+@click.argument("name")
+@click.option("--base-path", help="Base directory for experiments (default: EXPERIMENTS_HOME or 'experiments')")
+@click.option("--bucket", "-b", help="S3 bucket name (default: DATA_LAKE__BUCKET_NAME)")
+@click.option("--version", "-v", default="v0", help="Data lake version prefix (default: v0)")
+@click.option("--plan", is_flag=True, help="Show what would be exported without uploading")
+@click.option("--include-results", is_flag=True, help="Include results directory in export")
+def export(
+    name: str,
+    base_path: Optional[str],
+    bucket: Optional[str],
+    version: str,
+    plan: bool,
+    include_results: bool,
+):
+    """Export experiment to S3 data lake.
+    Exports experiment configuration, ground truth, and optionally results
+    to the S3 data lake following the convention:
+        s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
+    The export includes:
+    - experiment.yaml (configuration)
+    - README.md (documentation)
+    - ground-truth/ (evaluation datasets)
+    - seed-data/ (optional seed data)
+    - results/ (optional, with --include-results)
+    Examples:
+        # Preview what would be exported
+        rem experiments export my-experiment --plan
+        # Export to configured data lake bucket
+        rem experiments export my-experiment
+        # Export to specific bucket
+        rem experiments export my-experiment --bucket siggy-data
+        # Include results in export
+        rem experiments export my-experiment --include-results
+        # Export with custom version prefix
+        rem experiments export my-experiment --version v1
+    """
+    from rem.models.core.experiment import ExperimentConfig
+    from rem.settings import settings
+    from rem.services.fs.s3_provider import S3Provider
+    import os
+    import json
+    try:
+        # Resolve base path
+        if base_path is None:
+            base_path = os.getenv("EXPERIMENTS_HOME", "experiments")
+        # Load experiment configuration
+        config_path = Path(base_path) / name / "experiment.yaml"
+        if not config_path.exists():
+            click.echo(f"Experiment not found: {name}")
+            click.echo(f"  Looked in: {config_path}")
+            raise click.Abort()
+        config = ExperimentConfig.from_yaml(config_path)
+        click.echo(f"✓ Loaded experiment: {name}")
+        # Resolve bucket
+        if bucket is None:
+            bucket = settings.data_lake.bucket_name
+            if bucket is None:
+                click.echo("Error: No S3 bucket configured.")
+                click.echo("  Set DATA_LAKE__BUCKET_NAME environment variable or use --bucket option")
+                raise click.Abort()
+        # Build S3 paths
+        s3_base = config.get_s3_export_path(bucket, version)
+        exp_dir = config.get_experiment_dir(base_path)
+        # Collect files to export
+        files_to_export = []
+        # Always include these files
+        required_files = [
+            ("experiment.yaml", exp_dir / "experiment.yaml"),
+            ("README.md", exp_dir / "README.md"),
+        ]
+        for s3_name, local_path in required_files:
+            if local_path.exists():
+                files_to_export.append((s3_name, local_path))
+        # Include ground-truth directory
+        ground_truth_dir = exp_dir / "ground-truth"
+        if ground_truth_dir.exists():
+            for f in ground_truth_dir.rglob("*"):
+                if f.is_file():
+                    relative = f.relative_to(exp_dir)
+                    files_to_export.append((str(relative), f))
+        # Include seed-data directory
+        seed_data_dir = exp_dir / "seed-data"
+        if seed_data_dir.exists():
+            for f in seed_data_dir.rglob("*"):
+                if f.is_file():
+                    relative = f.relative_to(exp_dir)
+                    files_to_export.append((str(relative), f))
+        # Optionally include results
+        if include_results:
+            results_dir = exp_dir / "results"
+            if results_dir.exists():
+                for f in results_dir.rglob("*"):
+                    if f.is_file():
+                        relative = f.relative_to(exp_dir)
+                        files_to_export.append((str(relative), f))
+        # Display export plan
+        click.echo(f"\n{'=' * 60}")
+        click.echo(f"EXPORT {'PLAN' if plan else 'TO S3'}")
+        click.echo(f"{'=' * 60}")
+        click.echo(f"\nExperiment: {config.name}")
+        click.echo(f"Agent: {config.agent_schema_ref.name}")
+        click.echo(f"Task: {config.task}")
+        click.echo(f"Evaluator file: {config.get_evaluator_filename()}")
+        click.echo(f"\nDestination: {s3_base}/")
+        click.echo(f"\nFiles to export ({len(files_to_export)}):")
+        for s3_name, local_path in files_to_export:
+            s3_uri = f"{s3_base}/{s3_name}"
+            if plan:
+                click.echo(f"  {local_path}")
+                click.echo(f"    → {s3_uri}")
+            else:
+                click.echo(f"  {s3_name}")
+        if plan:
+            click.echo(f"\n[PLAN MODE] No files were uploaded.")
+            click.echo(f"Run without --plan to execute the export.")
+            return
+        # Execute export
+        click.echo(f"\n⏳ Uploading to S3...")
+        s3 = S3Provider()
+        uploaded = 0
+        for s3_name, local_path in files_to_export:
+            s3_uri = f"{s3_base}/{s3_name}"
+            try:
+                s3.copy(str(local_path), s3_uri)
+                uploaded += 1
+                click.echo(f"  ✓ {s3_name}")
+            except Exception as e:
+                click.echo(f"  ✗ {s3_name}: {e}")
+        click.echo(f"\n✓ Exported {uploaded}/{len(files_to_export)} files to {s3_base}/")
+        # Show next steps
+        click.echo(f"\nNext steps:")
+        click.echo(f"  - View in S3: aws s3 ls {s3_base}/ --recursive")
+        click.echo(f"  - Download: aws s3 sync {s3_base}/ ./{config.agent_schema_ref.name}/{config.task}/")
+    except Exception as e:
+        logger.error(f"Failed to export experiment: {e}")
+        click.echo(f"Error: {e}", err=True)
+        raise click.Abort()

remdb 0.3.114__py3-none-any.whl → 0.3.172__py3-none-any.whl

Potentially problematic release.

remdb 0.3.114py3-none-any.whl → 0.3.172py3-none-any.whl