PyPI - alchemist-nrel - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

alchemist-nrel 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

alchemist_core/__init__.py +2 -2
alchemist_core/acquisition/botorch_acquisition.py +83 -126
alchemist_core/data/experiment_manager.py +181 -12
alchemist_core/models/botorch_model.py +292 -63
alchemist_core/models/sklearn_model.py +145 -13
alchemist_core/session.py +3330 -31
alchemist_core/utils/__init__.py +3 -1
alchemist_core/utils/acquisition_utils.py +60 -0
alchemist_core/visualization/__init__.py +45 -0
alchemist_core/visualization/helpers.py +130 -0
alchemist_core/visualization/plots.py +1449 -0
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/METADATA +13 -13
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/RECORD +31 -26
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/WHEEL +1 -1
api/main.py +1 -1
api/models/requests.py +52 -0
api/models/responses.py +79 -2
api/routers/experiments.py +333 -8
api/routers/sessions.py +84 -9
api/routers/visualizations.py +6 -4
api/routers/websocket.py +2 -2
api/services/session_store.py +295 -71
api/static/assets/index-B6Cf6s_b.css +1 -0
api/static/assets/{index-DWfIKU9j.js → index-B7njvc9r.js} +201 -196
api/static/index.html +2 -2
ui/gpr_panel.py +11 -5
ui/target_column_dialog.py +299 -0
ui/ui.py +52 -5
api/static/assets/index-sMIa_1hV.css +0 -1
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/entry_points.txt +0 -0
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/licenses/LICENSE +0 -0
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/top_level.txt +0 -0

api/routers/experiments.py CHANGED Viewed

@@ -3,15 +3,27 @@ Experiments router - Experimental data management.
 """
 from fastapi import APIRouter, Depends, UploadFile, File, Query
-from ..models.requests import AddExperimentRequest, AddExperimentsBatchRequest, InitialDesignRequest
+from ..models.requests import (
+    AddExperimentRequest,
+    AddExperimentsBatchRequest,
+    InitialDesignRequest,
+    StageExperimentRequest,
+    StageExperimentsBatchRequest,
+    CompleteStagedExperimentsRequest
+)
 from ..models.responses import (
     ExperimentResponse,
     ExperimentsListResponse,
     ExperimentsSummaryResponse,
-    InitialDesignResponse
+    InitialDesignResponse,
+    StagedExperimentResponse,
+    StagedExperimentsListResponse,
+    StagedExperimentsClearResponse,
+    StagedExperimentsCompletedResponse
 )
 from ..dependencies import get_session
 from ..middleware.error_handlers import NoVariablesError
+from .websocket import broadcast_to_session
 from alchemist_core.session import OptimizationSession
 import logging
 import pandas as pd
@@ -96,6 +108,17 @@ async def add_experiment(
             logger.error(f"Auto-train failed for session {session_id}: {e}")
             # Don't fail the whole request, just log it
+    # Broadcast experiment update to WebSocket clients
+    await broadcast_to_session(session_id, {
+        "event": "experiments_updated",
+        "n_experiments": n_experiments
+    })
+    if model_trained:
+        await broadcast_to_session(session_id, {
+            "event": "model_trained",
+            "metrics": training_metrics
+        })
     return ExperimentResponse(
         message="Experiment added successfully",
         n_experiments=n_experiments,
@@ -158,6 +181,17 @@ async def add_experiments_batch(
         except Exception as e:
             logger.error(f"Auto-train failed for session {session_id}: {e}")
+    # Broadcast experiment update to WebSocket clients
+    await broadcast_to_session(session_id, {
+        "event": "experiments_updated",
+        "n_experiments": n_experiments
+    })
+    if model_trained:
+        await broadcast_to_session(session_id, {
+            "event": "model_trained",
+            "metrics": training_metrics
+        })
     return ExperimentResponse(
         message=f"Added {len(batch.experiments)} experiments successfully",
         n_experiments=n_experiments,
@@ -227,19 +261,84 @@ async def list_experiments(
     )
+@router.post("/{session_id}/experiments/preview")
+async def preview_csv_columns(
+    session_id: str,
+    file: UploadFile = File(...),
+    session: OptimizationSession = Depends(get_session)
+):
+    """
+    Preview CSV file columns before uploading to check for target columns.
+    Returns:
+        - available_columns: List of all columns in CSV
+        - has_output: Whether 'Output' column exists
+        - recommended_target: Suggested target column if 'Output' missing
+    """
+    # Save uploaded file temporarily
+    with tempfile.NamedTemporaryFile(mode='wb', delete=False, suffix='.csv') as tmp:
+        content = await file.read()
+        tmp.write(content)
+        tmp_path = tmp.name
+    try:
+        # Read CSV to get column names
+        df = pd.read_csv(tmp_path)
+        columns = df.columns.tolist()
+        # Check for 'Output' column
+        has_output = 'Output' in columns
+        # Filter out metadata columns
+        metadata_cols = {'Iteration', 'Reason', 'Noise'}
+        available_targets = [col for col in columns if col not in metadata_cols]
+        # Recommend target column
+        recommended = None
+        if not has_output:
+            # Look for common target column names
+            common_names = ['output', 'y', 'target', 'yield', 'response']
+            for name in common_names:
+                if name in [col.lower() for col in available_targets]:
+                    recommended = [col for col in available_targets if col.lower() == name][0]
+                    break
+            # If no common name found, use first numeric column
+            if not recommended and available_targets:
+                # Check if first available column is numeric
+                if pd.api.types.is_numeric_dtype(df[available_targets[0]]):
+                    recommended = available_targets[0]
+        return {
+            "columns": columns,
+            "available_targets": available_targets,
+            "has_output": has_output,
+            "recommended_target": recommended,
+            "n_rows": len(df)
+        }
+    finally:
+        # Clean up temp file
+        if os.path.exists(tmp_path):
+            os.unlink(tmp_path)
 @router.post("/{session_id}/experiments/upload")
 async def upload_experiments(
     session_id: str,
     file: UploadFile = File(...),
-    target_column: str = "Output",
+    target_columns: str = "Output",  # Note: API accepts string, will be normalized by Session API
     session: OptimizationSession = Depends(get_session)
 ):
     """
     Upload experimental data from CSV file.
     The CSV should have columns matching the variable names,
-    plus an optional output column (default: "Output") and
-    optional noise column ("Noise").
+    plus target column(s) (default: "Output") and optional noise column ("Noise").
+    Args:
+        target_columns: Target column name (single-objective) or comma-separated names (multi-objective).
+                       Examples: "Output", "yield", "yield,selectivity"
     """
     # Check if variables are defined
     if len(session.search_space.variables) == 0:
@@ -252,17 +351,26 @@ async def upload_experiments(
         tmp_path = tmp.name
     try:
+        # Parse target_columns (handle comma-separated for future multi-objective support)
+        target_cols_parsed = target_columns.split(',') if ',' in target_columns else target_columns
         # Load data using session's load_data method
-        session.load_data(tmp_path, target_column=target_column)
+        session.load_data(tmp_path, target_columns=target_cols_parsed)
         n_experiments = len(session.experiment_manager.df)
         logger.info(f"Loaded {n_experiments} experiments from CSV for session {session_id}")
+        # Broadcast experiment update to WebSocket clients
+        await broadcast_to_session(session_id, {
+            "event": "experiments_updated",
+            "n_experiments": n_experiments
+        })
         return {
             "message": f"Loaded {n_experiments} experiments successfully",
             "n_experiments": n_experiments
         }
     finally:
         # Clean up temp file
         if os.path.exists(tmp_path):
@@ -280,3 +388,220 @@ async def get_experiments_summary(
     Returns sample size, target variable statistics, and feature information.
     """
     return session.get_data_summary()
+# ============================================================
+# Staged Experiments Endpoints
+# ============================================================
+@router.post("/{session_id}/experiments/staged", response_model=StagedExperimentResponse)
+async def stage_experiment(
+    session_id: str,
+    request: StageExperimentRequest,
+    session: OptimizationSession = Depends(get_session)
+):
+    """
+    Stage an experiment for later execution.
+    Staged experiments are stored in a queue awaiting evaluation.
+    This is useful for autonomous workflows where the controller
+    needs to track which experiments are pending execution.
+    Use GET /experiments/staged to retrieve staged experiments,
+    and POST /experiments/staged/complete to finalize them with outputs.
+    """
+    # Check if variables are defined
+    if len(session.search_space.variables) == 0:
+        raise NoVariablesError("No variables defined. Add variables to search space first.")
+    # Add reason metadata if provided
+    inputs_with_meta = dict(request.inputs)
+    if request.reason:
+        inputs_with_meta['_reason'] = request.reason
+    session.add_staged_experiment(inputs_with_meta)
+    n_staged = len(session.get_staged_experiments())
+    logger.info(f"Staged experiment for session {session_id}. Total staged: {n_staged}")
+    return StagedExperimentResponse(
+        message="Experiment staged successfully",
+        n_staged=n_staged,
+        staged_inputs=request.inputs
+    )
+@router.post("/{session_id}/experiments/staged/batch", response_model=StagedExperimentsListResponse)
+async def stage_experiments_batch(
+    session_id: str,
+    request: StageExperimentsBatchRequest,
+    session: OptimizationSession = Depends(get_session)
+):
+    """
+    Stage multiple experiments at once.
+    Useful after acquisition functions suggest multiple points for parallel execution.
+    The `reason` parameter is stored as metadata and will be used when completing
+    the experiments (recorded in the 'Reason' column of the experiment data).
+    """
+    # Check if variables are defined
+    if len(session.search_space.variables) == 0:
+        raise NoVariablesError("No variables defined. Add variables to search space first.")
+    for inputs in request.experiments:
+        inputs_with_meta = dict(inputs)
+        if request.reason:
+            inputs_with_meta['_reason'] = request.reason
+        session.add_staged_experiment(inputs_with_meta)
+    logger.info(f"Staged {len(request.experiments)} experiments for session {session_id}. Total staged: {len(session.get_staged_experiments())}")
+    # Return clean experiments (without metadata) for client use
+    return StagedExperimentsListResponse(
+        experiments=request.experiments,  # Return the original clean inputs
+        n_staged=len(session.get_staged_experiments()),
+        reason=request.reason
+    )
+@router.get("/{session_id}/experiments/staged", response_model=StagedExperimentsListResponse)
+async def get_staged_experiments(
+    session_id: str,
+    session: OptimizationSession = Depends(get_session)
+):
+    """
+    Get all staged experiments awaiting execution.
+    Returns the list of experiments that have been queued but not yet
+    completed with output values. The response includes:
+    - experiments: Clean variable inputs only (no metadata)
+    - reason: The strategy/reason for these experiments (if provided when staging)
+    """
+    staged = session.get_staged_experiments()
+    # Extract reason from first experiment (if present) and clean all experiments
+    reason = None
+    clean_experiments = []
+    for exp in staged:
+        if '_reason' in exp and reason is None:
+            reason = exp['_reason']
+        # Return only variable values, not metadata
+        clean_exp = {k: v for k, v in exp.items() if not k.startswith('_')}
+        clean_experiments.append(clean_exp)
+    return StagedExperimentsListResponse(
+        experiments=clean_experiments,
+        n_staged=len(staged),
+        reason=reason
+    )
+@router.delete("/{session_id}/experiments/staged", response_model=StagedExperimentsClearResponse)
+async def clear_staged_experiments(
+    session_id: str,
+    session: OptimizationSession = Depends(get_session)
+):
+    """
+    Clear all staged experiments.
+    Use this to reset the staging queue if experiments were cancelled
+    or need to be regenerated.
+    """
+    n_cleared = session.clear_staged_experiments()
+    logger.info(f"Cleared {n_cleared} staged experiments for session {session_id}")
+    return StagedExperimentsClearResponse(
+        message="Staged experiments cleared",
+        n_cleared=n_cleared
+    )
+@router.post("/{session_id}/experiments/staged/complete", response_model=StagedExperimentsCompletedResponse)
+async def complete_staged_experiments(
+    session_id: str,
+    request: CompleteStagedExperimentsRequest,
+    auto_train: bool = Query(False, description="Auto-train model after adding data"),
+    training_backend: Optional[str] = Query(None, description="Model backend (sklearn/botorch)"),
+    training_kernel: Optional[str] = Query(None, description="Kernel type (rbf/matern)"),
+    session: OptimizationSession = Depends(get_session)
+):
+    """
+    Complete staged experiments by providing output values.
+    This pairs the staged experiment inputs with the provided outputs,
+    adds them to the experiment dataset, and clears the staging queue.
+    The number of outputs must match the number of staged experiments.
+    Outputs should be in the same order as the staged experiments were added.
+    Args:
+        auto_train: If True, retrain model after adding data
+        training_backend: Model backend (uses last if None)
+        training_kernel: Kernel type (uses last or 'rbf' if None)
+    """
+    staged = session.get_staged_experiments()
+    if len(staged) == 0:
+        return StagedExperimentsCompletedResponse(
+            message="No staged experiments to complete",
+            n_added=0,
+            n_experiments=len(session.experiment_manager.df),
+            model_trained=False
+        )
+    if len(request.outputs) != len(staged):
+        raise ValueError(
+            f"Number of outputs ({len(request.outputs)}) must match "
+            f"number of staged experiments ({len(staged)})"
+        )
+    # Use the core Session method to move staged experiments to dataset
+    n_added = session.move_staged_to_experiments(
+        outputs=request.outputs,
+        noises=request.noises,
+        iteration=request.iteration,
+        reason=request.reason
+    )
+    n_experiments = len(session.experiment_manager.df)
+    logger.info(f"Completed {n_added} staged experiments for session {session_id}. Total: {n_experiments}")
+    # Auto-train if requested
+    model_trained = False
+    training_metrics = None
+    if auto_train and n_experiments >= 5:
+        try:
+            backend = training_backend or (session.model_backend if session.model else "sklearn")
+            kernel = training_kernel or "rbf"
+            result = session.train_model(backend=backend, kernel=kernel)
+            model_trained = True
+            metrics = result.get("metrics", {})
+            training_metrics = {
+                "rmse": metrics.get("rmse"),
+                "r2": metrics.get("r2"),
+                "backend": backend
+            }
+            logger.info(f"Auto-trained model for session {session_id}: {training_metrics}")
+        except Exception as e:
+            logger.error(f"Auto-train failed for session {session_id}: {e}")
+    # Broadcast experiment update to WebSocket clients
+    await broadcast_to_session(session_id, {
+        "event": "experiments_updated",
+        "n_experiments": n_experiments
+    })
+    if model_trained:
+        await broadcast_to_session(session_id, {
+            "event": "model_trained",
+            "metrics": training_metrics
+        })
+    return StagedExperimentsCompletedResponse(
+        message="Staged experiments completed and added to dataset",
+        n_added=n_added,
+        n_experiments=n_experiments,
+        model_trained=model_trained,
+        training_metrics=training_metrics
+    )

api/routers/sessions.py CHANGED Viewed

@@ -39,8 +39,7 @@ async def create_session():
     return SessionCreateResponse(
         session_id=session_id,
-        created_at=session_info["created_at"],
-        expires_at=session_info["expires_at"]
+        created_at=session_info["created_at"]
     )
@@ -123,10 +122,8 @@ async def extend_session(session_id: str, hours: int = 24):
             detail=f"Session {session_id} not found"
         )
-    info = session_store.get_info(session_id)
     return {
-        "message": "Session TTL extended",
-        "expires_at": info["expires_at"]
+        "message": "Session TTL extended (legacy endpoint - no longer has effect)"
     }
@@ -194,8 +191,7 @@ async def import_session(file: UploadFile = File(...)):
         session_info = session_store.get_info(session_id)
         return SessionCreateResponse(
             session_id=session_id,
-            created_at=session_info["created_at"],
-            expires_at=session_info["expires_at"]
+            created_at=session_info["created_at"]
         )
     except Exception as e:
@@ -452,8 +448,7 @@ async def upload_session(file: UploadFile = File(...)):
             return SessionCreateResponse(
                 session_id=new_session_id,
-                created_at=session_info["created_at"],
-                expires_at=session_info["expires_at"]
+                created_at=session_info["created_at"]
             )
         finally:
@@ -468,6 +463,86 @@ async def upload_session(file: UploadFile = File(...)):
         )
+# ============================================================
+# Recovery / Backup Endpoints
+# ============================================================
+@router.post("/sessions/{session_id}/backup", status_code=status.HTTP_200_OK)
+async def create_recovery_backup(session_id: str):
+    """
+    Create a silent recovery backup for crash protection.
+    Called automatically by frontend every 30 seconds while user has session open.
+    User never sees these backups unless browser crashes and recovery is needed.
+    """
+    success = session_store.save_recovery_backup(session_id)
+    if not success:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Session {session_id} not found or backup failed"
+        )
+    return {"message": "Recovery backup created"}
+@router.delete("/sessions/{session_id}/backup", status_code=status.HTTP_200_OK)
+async def clear_recovery_backup(session_id: str):
+    """
+    Clear recovery backups for a session.
+    Called after user successfully saves their session to their computer.
+    This prevents recovery prompt from appearing unnecessarily.
+    """
+    deleted = session_store.clear_recovery_backup(session_id)
+    return {"message": "Recovery backup cleared", "deleted": deleted}
+@router.get("/recovery/list")
+async def list_recovery_sessions():
+    """
+    List available recovery sessions.
+    Called on app startup to check if there are any unsaved sessions
+    that can be recovered from a crash.
+    """
+    recoveries = session_store.list_recovery_sessions()
+    return {"recoveries": recoveries, "count": len(recoveries)}
+@router.post("/recovery/{session_id}/restore", response_model=SessionCreateResponse, status_code=status.HTTP_201_CREATED)
+async def restore_recovery_session(session_id: str):
+    """
+    Restore a session from recovery backup.
+    Called when user clicks "Restore" on the recovery banner.
+    Creates a new active session from the recovery file.
+    """
+    new_session_id = session_store.restore_from_recovery(session_id)
+    if new_session_id is None:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"No recovery data found for session {session_id}"
+        )
+    session_info = session_store.get_info(new_session_id)
+    return SessionCreateResponse(
+        session_id=new_session_id,
+        created_at=session_info["created_at"]
+    )
+@router.delete("/recovery/cleanup")
+async def cleanup_old_recoveries(max_age_hours: int = 24):
+    """
+    Clean up old recovery files.
+    Deletes recovery files older than specified hours.
+    Can be called manually or via scheduled task.
+    """
+    session_store.cleanup_old_recoveries(max_age_hours)
+    return {"message": f"Cleaned up recovery files older than {max_age_hours} hours"}
 # ============================================================
 # Session Locking Endpoints
 # ============================================================

api/routers/visualizations.py CHANGED Viewed

@@ -194,9 +194,10 @@ async def get_contour_data(
     # CRITICAL FIX: Reorder columns to match training data
     # The model was trained with a specific column order, we must match it.
     # Exclude metadata columns that are part of the experiments table but
-    # are not model input features (e.g., Iteration, Reason, Output, Noise).
+    # are not model input features (e.g., Iteration, Reason, target columns, Noise).
     train_data = session.experiment_manager.get_data()
-    metadata_cols = {'Iteration', 'Reason', 'Output', 'Noise'}
+    target_cols = set(session.experiment_manager.target_columns)
+    metadata_cols = {'Iteration', 'Reason', 'Noise'} | target_cols
     feature_cols = [col for col in train_data.columns if col not in metadata_cols]
     # Safely align the prediction grid to the model feature order.
@@ -216,11 +217,12 @@ async def get_contour_data(
     experiments_data = None
     if request.include_experiments and len(session.experiment_manager) > 0:
         exp_df = session.experiment_manager.get_data()
-        if request.x_var in exp_df.columns and request.y_var in exp_df.columns and "Output" in exp_df.columns:
+        target_col = session.experiment_manager.target_columns[0]  # Use first target for visualization
+        if request.x_var in exp_df.columns and request.y_var in exp_df.columns and target_col in exp_df.columns:
             experiments_data = {
                 "x": exp_df[request.x_var].tolist(),
                 "y": exp_df[request.y_var].tolist(),
-                "output": exp_df["Output"].tolist()
+                "output": exp_df[target_col].tolist()
             }
     # Get suggestion data if requested (would need to be stored in session)

api/routers/websocket.py CHANGED Viewed

@@ -32,7 +32,7 @@ async def websocket_endpoint(websocket: WebSocket, session_id: str):
         session_id: Session ID to subscribe to
     """
     await websocket.accept()
-    logger.info(f"WebSocket connected: session_id={session_id}")
+    logger.debug(f"WebSocket connected: session_id={session_id}")
     # Register this connection for this session
     if session_id not in active_connections:
@@ -60,7 +60,7 @@ async def websocket_endpoint(websocket: WebSocket, session_id: str):
                 logger.warning(f"Invalid JSON from client: {data}")
     except WebSocketDisconnect:
-        logger.info(f"WebSocket disconnected: session_id={session_id}")
+        logger.debug(f"WebSocket disconnected: session_id={session_id}")
     finally:
         # Clean up on disconnect
         if session_id in active_connections:

alchemist-nrel 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

alchemist-nrel 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl