PyPI - openadapt-ml - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

openadapt-ml 0.1.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

openadapt_ml/baselines/__init__.py +121 -0
openadapt_ml/baselines/adapter.py +185 -0
openadapt_ml/baselines/cli.py +314 -0
openadapt_ml/baselines/config.py +448 -0
openadapt_ml/baselines/parser.py +922 -0
openadapt_ml/baselines/prompts.py +787 -0
openadapt_ml/benchmarks/__init__.py +13 -107
openadapt_ml/benchmarks/agent.py +297 -374
openadapt_ml/benchmarks/azure.py +62 -24
openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
openadapt_ml/benchmarks/cli.py +1874 -751
openadapt_ml/benchmarks/trace_export.py +631 -0
openadapt_ml/benchmarks/viewer.py +1236 -0
openadapt_ml/benchmarks/vm_monitor.py +1111 -0
openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
openadapt_ml/cloud/azure_inference.py +3 -5
openadapt_ml/cloud/lambda_labs.py +722 -307
openadapt_ml/cloud/local.py +3194 -89
openadapt_ml/cloud/ssh_tunnel.py +595 -0
openadapt_ml/datasets/next_action.py +125 -96
openadapt_ml/evals/grounding.py +32 -9
openadapt_ml/evals/plot_eval_metrics.py +15 -13
openadapt_ml/evals/trajectory_matching.py +120 -57
openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
openadapt_ml/experiments/representation_shootout/config.py +390 -0
openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
openadapt_ml/experiments/representation_shootout/runner.py +687 -0
openadapt_ml/experiments/waa_demo/__init__.py +10 -0
openadapt_ml/experiments/waa_demo/demos.py +357 -0
openadapt_ml/experiments/waa_demo/runner.py +732 -0
openadapt_ml/experiments/waa_demo/tasks.py +151 -0
openadapt_ml/export/__init__.py +9 -0
openadapt_ml/export/__main__.py +6 -0
openadapt_ml/export/cli.py +89 -0
openadapt_ml/export/parquet.py +277 -0
openadapt_ml/grounding/detector.py +18 -14
openadapt_ml/ingest/__init__.py +11 -10
openadapt_ml/ingest/capture.py +97 -86
openadapt_ml/ingest/loader.py +120 -69
openadapt_ml/ingest/synthetic.py +344 -193
openadapt_ml/models/api_adapter.py +14 -4
openadapt_ml/models/base_adapter.py +10 -2
openadapt_ml/models/providers/__init__.py +288 -0
openadapt_ml/models/providers/anthropic.py +266 -0
openadapt_ml/models/providers/base.py +299 -0
openadapt_ml/models/providers/google.py +376 -0
openadapt_ml/models/providers/openai.py +342 -0
openadapt_ml/models/qwen_vl.py +46 -19
openadapt_ml/perception/__init__.py +35 -0
openadapt_ml/perception/integration.py +399 -0
openadapt_ml/retrieval/README.md +226 -0
openadapt_ml/retrieval/USAGE.md +391 -0
openadapt_ml/retrieval/__init__.py +91 -0
openadapt_ml/retrieval/demo_retriever.py +843 -0
openadapt_ml/retrieval/embeddings.py +630 -0
openadapt_ml/retrieval/index.py +194 -0
openadapt_ml/retrieval/retriever.py +162 -0
openadapt_ml/runtime/__init__.py +50 -0
openadapt_ml/runtime/policy.py +27 -14
openadapt_ml/runtime/safety_gate.py +471 -0
openadapt_ml/schema/__init__.py +113 -0
openadapt_ml/schema/converters.py +588 -0
openadapt_ml/schema/episode.py +470 -0
openadapt_ml/scripts/capture_screenshots.py +530 -0
openadapt_ml/scripts/compare.py +102 -61
openadapt_ml/scripts/demo_policy.py +4 -1
openadapt_ml/scripts/eval_policy.py +19 -14
openadapt_ml/scripts/make_gif.py +1 -1
openadapt_ml/scripts/prepare_synthetic.py +16 -17
openadapt_ml/scripts/train.py +98 -75
openadapt_ml/segmentation/README.md +920 -0
openadapt_ml/segmentation/__init__.py +97 -0
openadapt_ml/segmentation/adapters/__init__.py +5 -0
openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
openadapt_ml/segmentation/annotator.py +610 -0
openadapt_ml/segmentation/cache.py +290 -0
openadapt_ml/segmentation/cli.py +674 -0
openadapt_ml/segmentation/deduplicator.py +656 -0
openadapt_ml/segmentation/frame_describer.py +788 -0
openadapt_ml/segmentation/pipeline.py +340 -0
openadapt_ml/segmentation/schemas.py +622 -0
openadapt_ml/segmentation/segment_extractor.py +634 -0
openadapt_ml/training/azure_ops_viewer.py +1097 -0
openadapt_ml/training/benchmark_viewer.py +3255 -19
openadapt_ml/training/shared_ui.py +7 -7
openadapt_ml/training/stub_provider.py +57 -35
openadapt_ml/training/trainer.py +255 -441
openadapt_ml/training/trl_trainer.py +403 -0
openadapt_ml/training/viewer.py +323 -108
openadapt_ml/training/viewer_components.py +180 -0
{openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
openadapt_ml-0.2.1.dist-info/RECORD +116 -0
openadapt_ml/benchmarks/base.py +0 -366
openadapt_ml/benchmarks/data_collection.py +0 -432
openadapt_ml/benchmarks/runner.py +0 -381
openadapt_ml/benchmarks/waa.py +0 -704
openadapt_ml/schemas/__init__.py +0 -53
openadapt_ml/schemas/sessions.py +0 -122
openadapt_ml/schemas/validation.py +0 -252
openadapt_ml-0.1.0.dist-info/RECORD +0 -55
{openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
{openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0

openadapt_ml/training/trainer.py CHANGED Viewed

@@ -4,15 +4,9 @@ import json
 import time
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Dict, List
-import torch
-from torch.optim import Optimizer
-from torch.optim.lr_scheduler import LambdaLR
-from torch.utils.data import DataLoader, Dataset
-from openadapt_ml.models.base_adapter import BaseVLMAdapter
-from openadapt_ml.schemas.sessions import Episode
+from openadapt_ml.schema import ActionType
 from openadapt_ml.training.shared_ui import (
     get_shared_header_css as _get_shared_header_css,
     generate_shared_header_html as _generate_shared_header_html,
@@ -21,6 +15,10 @@ from openadapt_ml.training.shared_ui import (
 from openadapt_ml.training.viewer import (
     generate_unified_viewer_from_output_dir,
 )
+from openadapt_ml.training.benchmark_viewer import (
+    _get_azure_jobs_panel_css,
+    _get_azure_jobs_panel_html,
+)
 def setup_job_directory(base_dir: str | Path, job_id: str) -> Path:
@@ -110,12 +108,18 @@ class TrainingConfig:
 @dataclass
 class TrainingState:
     """Tracks training progress for visualization."""
     # Job identification
     job_id: str = field(default_factory=lambda: time.strftime("%Y%m%d_%H%M%S"))
-    hostname: str = field(default_factory=lambda: __import__('socket').gethostname())
+    hostname: str = field(default_factory=lambda: __import__("socket").gethostname())
     capture_path: str = ""
     config_path: str = ""
     goal: str = ""  # Task goal/description for the training run
+    # Model configuration
+    model_name: str = ""  # e.g. "Qwen/Qwen3-VL-2B-Instruct"
+    lora_r: int = 0  # LoRA rank
+    lora_alpha: int = 0  # LoRA alpha
+    load_in_4bit: bool = False  # Quantization
     # Training progress
     epoch: int = 0
     step: int = 0
@@ -139,7 +143,9 @@ class TrainingState:
     setup_status: str = ""  # e.g. "booting", "installing", "training", "complete"
     setup_logs: List[str] = field(default_factory=list)  # Setup progress messages
     # Termination tracking
-    termination_status: str = ""  # e.g. "auto_low_loss", "auto_complete", "user_stop", "running"
+    termination_status: str = (
+        ""  # e.g. "auto_low_loss", "auto_complete", "user_stop", "running"
+    )
     termination_message: str = ""  # Human-readable termination reason
     def log_step(self, epoch: int, step: int, loss: float, lr: float = 0.0) -> None:
@@ -148,33 +154,46 @@ class TrainingState:
         self.step = step
         self.loss = loss
         self.learning_rate = lr
-        self.losses.append({
-            "epoch": epoch,
-            "step": step,
-            "loss": loss,
-            "lr": lr,
-            "time": time.time() - self.start_time,
-        })
-    def log_evaluation(self, epoch: int, sample_idx: int, image_path: str,
-                       human_action: Dict, predicted_action: Dict) -> None:
+        self.losses.append(
+            {
+                "epoch": epoch,
+                "step": step,
+                "loss": loss,
+                "lr": lr,
+                "time": time.time() - self.start_time,
+            }
+        )
+    def log_evaluation(
+        self,
+        epoch: int,
+        sample_idx: int,
+        image_path: str,
+        human_action: Dict,
+        predicted_action: Dict,
+    ) -> None:
         """Log an evaluation sample."""
         # Calculate distance for click actions
         distance = 0.0
-        if human_action.get("type") == "click" and predicted_action.get("type") == "click":
+        if (
+            human_action.get("type") == "click"
+            and predicted_action.get("type") == "click"
+        ):
             hx, hy = human_action.get("x", 0), human_action.get("y", 0)
             px, py = predicted_action.get("x", 0), predicted_action.get("y", 0)
             distance = ((hx - px) ** 2 + (hy - py) ** 2) ** 0.5
-        self.evaluations.append({
-            "epoch": epoch,
-            "sample_idx": sample_idx,
-            "image_path": image_path,
-            "human_action": human_action,
-            "predicted_action": predicted_action,
-            "distance": distance,
-            "correct": distance < 50,  # Within 50 pixels is "correct"
-        })
+        self.evaluations.append(
+            {
+                "epoch": epoch,
+                "sample_idx": sample_idx,
+                "image_path": image_path,
+                "human_action": human_action,
+                "predicted_action": predicted_action,
+                "distance": distance,
+                "correct": distance < 50,  # Within 50 pixels is "correct"
+            }
+        )
     def to_dict(self) -> Dict[str, Any]:
         """Convert state to serializable dict."""
@@ -185,9 +204,16 @@ class TrainingState:
             "capture_path": self.capture_path,
             "config_path": self.config_path,
             "goal": self.goal,
+            # Model configuration
+            "model_name": self.model_name,
+            "lora_r": self.lora_r,
+            "lora_alpha": self.lora_alpha,
+            "load_in_4bit": self.load_in_4bit,
             "instance_type": self.instance_type,
             "instance_ip": self.instance_ip,
-            "started_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(self.start_time)),
+            "started_at": time.strftime(
+                "%Y-%m-%dT%H:%M:%SZ", time.gmtime(self.start_time)
+            ),
             # Cloud provider info
             "cloud_provider": self.cloud_provider,
             "cloud_dashboard_url": self.cloud_dashboard_url,
@@ -227,6 +253,11 @@ class TrainingLogger:
         cloud_dashboard_url: str = "",
         cloud_instance_id: str = "",
         job_id: str = "",
+        # Model configuration
+        model_name: str = "",
+        lora_r: int = 0,
+        lora_alpha: int = 0,
+        load_in_4bit: bool = False,
     ):
         # Generate job_id if not provided
         if not job_id:
@@ -242,6 +273,10 @@ class TrainingLogger:
             capture_path=capture_path,
             config_path=config_path,
             goal=goal,
+            model_name=model_name,
+            lora_r=lora_r,
+            lora_alpha=lora_alpha,
+            load_in_4bit=load_in_4bit,
             instance_ip=instance_ip,
             instance_type=instance_type,
             total_epochs=config.num_train_epochs,
@@ -299,6 +334,7 @@ class TrainingLogger:
     def _save_config_snapshot(self) -> None:
         """Save training config snapshot to JSON."""
         from dataclasses import asdict
         config_file = self.output_dir / "config.json"
         config_dict = asdict(self.config)
         with open(config_file, "w") as f:
@@ -316,32 +352,45 @@ class TrainingLogger:
         dashboard_path.write_text(html)
-def _generate_termination_status_html(state: TrainingState, is_training_complete: bool) -> str:
+def _generate_termination_status_html(
+    state: TrainingState, is_training_complete: bool
+) -> str:
     """Generate HTML for termination status section."""
     # Check if we have termination info
     if state.termination_status:
         # Map termination status to colors and icons
         status_styles = {
-            "auto_complete": {"color": "#22c55e", "icon": "✓", "label": "Training Complete"},
-            "auto_low_loss": {"color": "#22c55e", "icon": "✓", "label": "Auto-Stopped (Low Loss)"},
+            "auto_complete": {
+                "color": "#22c55e",
+                "icon": "✓",
+                "label": "Training Complete",
+            },
+            "auto_low_loss": {
+                "color": "#22c55e",
+                "icon": "✓",
+                "label": "Auto-Stopped (Low Loss)",
+            },
             "user_stop": {"color": "#f59e0b", "icon": "■", "label": "Stopped by User"},
         }
-        style = status_styles.get(state.termination_status, {"color": "#22c55e", "icon": "✓", "label": "Complete"})
+        style = status_styles.get(
+            state.termination_status,
+            {"color": "#22c55e", "icon": "✓", "label": "Complete"},
+        )
-        return f'''<div style="display: flex; flex-direction: column; gap: 8px;">
-            <div style="display: flex; align-items: center; gap: 8px; color: {style['color']};">
-                <span style="font-size: 1.2rem;">{style['icon']}</span>
-                <span style="font-weight: 600;">{style['label']}</span>
+        return f"""<div style="display: flex; flex-direction: column; gap: 8px;">
+            <div style="display: flex; align-items: center; gap: 8px; color: {style["color"]};">
+                <span style="font-size: 1.2rem;">{style["icon"]}</span>
+                <span style="font-weight: 600;">{style["label"]}</span>
             </div>
-            {f'<div style="font-size: 0.85rem; color: var(--text-muted); margin-left: 28px;">{state.termination_message}</div>' if state.termination_message else ''}
-        </div>'''
+            {f'<div style="font-size: 0.85rem; color: var(--text-muted); margin-left: 28px;">{state.termination_message}</div>' if state.termination_message else ""}
+        </div>"""
     elif is_training_complete:
-        return '''<div style="display: flex; align-items: center; gap: 8px; color: #22c55e;">
+        return """<div style="display: flex; align-items: center; gap: 8px; color: #22c55e;">
             <span style="font-size: 1.2rem;">✓</span>
             <span style="font-weight: 600;">Training Complete</span>
-        </div>'''
+        </div>"""
     else:
-        return '''<button id="stop-training-btn" onclick="stopTraining()" style="
+        return """<button id="stop-training-btn" onclick="stopTraining()" style="
             background: linear-gradient(135deg, #ef4444 0%, #dc2626 100%);
             color: white;
             border: none;
@@ -357,53 +406,65 @@ def _generate_termination_status_html(state: TrainingState, is_training_complete
         ">
             <span style="font-size: 1.1rem;">■</span> Stop Training
         </button>
-        <p id="stop-status" style="margin-top: 8px; font-size: 0.75rem; color: var(--text-muted);"></p>'''
+        <p id="stop-status" style="margin-top: 8px; font-size: 0.75rem; color: var(--text-muted);"></p>"""
 def generate_training_dashboard(state: TrainingState, config: TrainingConfig) -> str:
     """Generate an HTML dashboard for training visualization."""
     losses_json = json.dumps(state.losses)
     # Use stored elapsed_time if available (historical data), otherwise calculate
-    elapsed = state.elapsed_time if state.elapsed_time > 0 else time.time() - state.start_time
+    elapsed = (
+        state.elapsed_time if state.elapsed_time > 0 else time.time() - state.start_time
+    )
     elapsed_str = f"{int(elapsed // 60)}m {int(elapsed % 60)}s"
     # Calculate stats
     if state.losses:
-        min_loss = min(l["loss"] for l in state.losses)
-        avg_loss = sum(l["loss"] for l in state.losses) / len(state.losses)
+        min_loss = min(loss["loss"] for loss in state.losses)
+        sum(loss["loss"] for loss in state.losses) / len(state.losses)
         recent_losses = state.losses[-10:] if len(state.losses) >= 10 else state.losses
-        recent_avg = sum(l["loss"] for l in recent_losses) / len(recent_losses)
+        recent_avg = sum(loss["loss"] for loss in recent_losses) / len(recent_losses)
         # Calculate step times
         step_times = []
         for i in range(1, len(state.losses)):
-            step_times.append(state.losses[i]["time"] - state.losses[i-1]["time"])
+            step_times.append(state.losses[i]["time"] - state.losses[i - 1]["time"])
         avg_step_time = sum(step_times) / len(step_times) if step_times else 0
         # Loss by epoch
         epoch_losses: dict = {}
-        for l in state.losses:
-            ep = l["epoch"]
+        for loss in state.losses:
+            ep = loss["epoch"]
             if ep not in epoch_losses:
                 epoch_losses[ep] = []
-            epoch_losses[ep].append(l["loss"])
-        epoch_avg = {ep: sum(losses)/len(losses) for ep, losses in epoch_losses.items()}
+            epoch_losses[ep].append(loss["loss"])
+        epoch_avg = {
+            ep: sum(losses) / len(losses) for ep, losses in epoch_losses.items()
+        }
         # Estimate ETA
         # Steps per epoch = steps in completed epochs / completed epochs
         completed_epochs = state.epoch
-        steps_in_completed = sum(1 for l in state.losses if l["epoch"] < completed_epochs)
+        steps_in_completed = sum(
+            1 for loss in state.losses if loss["epoch"] < completed_epochs
+        )
         if completed_epochs > 0 and steps_in_completed > 0:
             steps_per_epoch = steps_in_completed / completed_epochs
         else:
             # Estimate from current epoch progress
-            steps_per_epoch = len(state.losses) / (state.epoch + 1) if state.epoch >= 0 else len(state.losses)
+            steps_per_epoch = (
+                len(state.losses) / (state.epoch + 1)
+                if state.epoch >= 0
+                else len(state.losses)
+            )
-        total_epochs = state.total_epochs if state.total_epochs > 0 else config.num_train_epochs
+        total_epochs = (
+            state.total_epochs if state.total_epochs > 0 else config.num_train_epochs
+        )
         total_steps_estimate = steps_per_epoch * total_epochs
         remaining_steps = max(0, total_steps_estimate - len(state.losses))
         eta_seconds = remaining_steps * avg_step_time if avg_step_time > 0 else 0
         # Check if training is complete (all steps done)
         is_training_complete = remaining_steps == 0 and len(state.losses) > 0
     else:
-        min_loss = avg_loss = recent_avg = avg_step_time = 0.0
+        min_loss = recent_avg = avg_step_time = 0.0
         epoch_avg = {}
         eta_seconds = 0
         steps_per_epoch = 0
@@ -414,10 +475,9 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
     epoch_avg_json = json.dumps(list(epoch_avg.items()))
     # Generate comparison viewer preview if capture path available
-    comparison_viewer_path = ""
     if state.capture_path:
         try:
-            from openadapt_ml.scripts.compare import generate_comparison_html, generate_comparison_data
+            from openadapt_ml.scripts.compare import generate_comparison_html
             from openadapt_ml.ingest.capture import capture_to_episode
             capture_path = Path(state.capture_path)
@@ -428,14 +488,20 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
                 # Generate comparison data with null predictions (shows "— No prediction")
                 comparison_data = []
                 for i, step in enumerate(episode.steps):
+                    # Extract normalized coordinates if available
+                    action_x, action_y = None, None
+                    if step.action.normalized_coordinates:
+                        action_x, action_y = step.action.normalized_coordinates
                     step_data = {
                         "index": i,
-                        "time": step.t,
-                        "image_path": step.observation.image_path,
+                        "time": step.step_index,
+                        "image_path": step.observation.screenshot_path,
                         "human_action": {
-                            "type": step.action.type,
-                            "x": step.action.x,
-                            "y": step.action.y,
+                            "type": step.action.type.value
+                            if isinstance(step.action.type, ActionType)
+                            else step.action.type,
+                            "x": action_x,
+                            "y": action_y,
                             "text": step.action.text,
                         },
                         "predicted_action": None,  # Shows "— No prediction" in viewer
@@ -444,15 +510,21 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
                     comparison_data.append(step_data)
                 # Generate comparison HTML
-                output_dir = Path(config.output_dir) if hasattr(config, 'output_dir') else Path("training_output")
+                output_dir = (
+                    Path(config.output_dir)
+                    if hasattr(config, "output_dir")
+                    else Path("training_output")
+                )
                 output_dir.mkdir(parents=True, exist_ok=True)
                 comparison_output = output_dir / "comparison_preview.html"
-                generate_comparison_html(capture_path, episode, comparison_data, comparison_output)
-                comparison_viewer_path = str(comparison_output.name)  # Relative path
-        except Exception as e:
+                generate_comparison_html(
+                    capture_path, episode, comparison_data, comparison_output
+                )
+                str(comparison_output.name)  # Relative path
+        except Exception:
             pass  # Fail silently if comparison viewer can't be generated
-    html = f'''<!DOCTYPE html>
+    html = f"""<!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
@@ -596,6 +668,42 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
         .setup-log-line.current {{
             color: var(--accent);
         }}
+        .config-panel {{
+            background: var(--bg-secondary);
+            border: 1px solid var(--border-color);
+            border-radius: 12px;
+            padding: 16px 20px;
+            margin-bottom: 24px;
+        }}
+        .config-grid {{
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+            gap: 16px;
+        }}
+        .config-item {{
+            display: flex;
+            flex-direction: column;
+            gap: 4px;
+        }}
+        .config-label {{
+            font-size: 0.7rem;
+            color: var(--text-secondary);
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+        }}
+        .config-value {{
+            font-family: "SF Mono", Monaco, monospace;
+            font-size: 0.85rem;
+            color: var(--text-primary);
+        }}
+        .config-value.model {{
+            color: var(--accent);
+        }}
+        .config-value.goal {{
+            font-family: -apple-system, BlinkMacSystemFont, "Inter", sans-serif;
+            font-size: 0.8rem;
+            opacity: 0.9;
+        }}
         .status {{
             display: flex;
             align-items: center;
@@ -754,6 +862,8 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
         }}
         /* Shared header styles (injected from _get_shared_header_css) */
         {_get_shared_header_css()}
+        /* Azure ML Jobs panel styles */
+        {_get_azure_jobs_panel_css()}
         .eval-panel {{
             background: var(--bg-secondary);
             border: 1px solid var(--border-color);
@@ -1073,10 +1183,10 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
     <div class="container">
         <header>
             <div>
-                <h1>Training Dashboard{f' <a href="{state.cloud_dashboard_url}" target="_blank" class="cloud-link cloud-badge"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M18 10h-1.26A8 8 0 1 0 9 20h9a5 5 0 0 0 0-10z"/></svg>{state.cloud_provider.title()} Cloud</a>' if state.cloud_dashboard_url else ''}</h1>
+                <h1>Training Dashboard{f' <a href="{state.cloud_dashboard_url}" target="_blank" class="cloud-link cloud-badge"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M18 10h-1.26A8 8 0 1 0 9 20h9a5 5 0 0 0 0-10z"/></svg>{state.cloud_provider.title()} Cloud</a>' if state.cloud_dashboard_url else ""}</h1>
                 <div class="job-info" id="job-info">
-                    <span class="job-host">{state.hostname or 'stub-local'} @ {state.instance_ip or '127.0.0.1'}</span>
-                    {f'<span class="job-config">{state.instance_type}</span>' if state.instance_type else ''}
+                    <span class="job-host">{state.hostname or "stub-local"} @ {state.instance_ip or "127.0.0.1"}</span>
+                    {f'<span class="job-config">{state.instance_type}</span>' if state.instance_type else ""}
                 </div>
             </div>
             <div class="status" id="status">
@@ -1085,13 +1195,40 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
             </div>
         </header>
-        <div class="setup-panel{' hidden' if not state.setup_logs else ''}" id="setup-panel">
+        <div class="setup-panel{" hidden" if not state.setup_logs else ""}" id="setup-panel">
             <div class="setup-header">
                 <h2>Setup Progress</h2>
-                <span class="setup-status-badge {state.setup_status}" id="setup-status-badge">{state.setup_status or 'initializing'}</span>
+                <span class="setup-status-badge {state.setup_status}" id="setup-status-badge">{state.setup_status or "initializing"}</span>
             </div>
             <div class="setup-logs" id="setup-logs">
-                {''.join(f'<div class="setup-log-line{" current" if i == len(state.setup_logs) - 1 else ""}">{log}</div>' for i, log in enumerate(state.setup_logs)) if state.setup_logs else '<div class="setup-log-line">Waiting for setup logs...</div>'}
+                {"".join(f'<div class="setup-log-line{" current" if i == len(state.setup_logs) - 1 else ""}">{log}</div>' for i, log in enumerate(state.setup_logs)) if state.setup_logs else '<div class="setup-log-line">Waiting for setup logs...</div>'}
+            </div>
+        </div>
+        {_get_azure_jobs_panel_html()}
+        <div class="config-panel" id="config-panel">
+            <div class="config-grid">
+                <div class="config-item">
+                    <span class="config-label">Model</span>
+                    <span class="config-value model" id="config-model">{state.model_name or "Not specified"}</span>
+                </div>
+                <div class="config-item">
+                    <span class="config-label">Goal</span>
+                    <span class="config-value goal" id="config-goal">{state.goal or "Not specified"}</span>
+                </div>
+                <div class="config-item">
+                    <span class="config-label">LoRA</span>
+                    <span class="config-value" id="config-lora">{f"r={state.lora_r}, α={state.lora_alpha}" if state.lora_r else "Not specified"}</span>
+                </div>
+                <div class="config-item">
+                    <span class="config-label">Quantization</span>
+                    <span class="config-value" id="config-quant">{"4-bit" if state.load_in_4bit else "None"}</span>
+                </div>
+                <div class="config-item">
+                    <span class="config-label">Config</span>
+                    <span class="config-value" id="config-path">{state.config_path or "Not specified"}</span>
+                </div>
             </div>
         </div>
@@ -1437,7 +1574,7 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
         let etaSeconds = {eta_seconds};
         let avgStepTime = {avg_step_time};
         let remainingSteps = {remaining_steps};
-        let isTrainingComplete = {'true' if is_training_complete else 'false'};
+        let isTrainingComplete = {"true" if is_training_complete else "false"};
         // Auto-stop when loss <= threshold (INVARIANT: training should stop when loss <= 1.0)
         const AUTO_STOP_LOSS_THRESHOLD = 1.0;
@@ -1519,6 +1656,28 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
                     }}
                 }}
+                // Update config panel
+                const configModel = document.getElementById('config-model');
+                const configGoal = document.getElementById('config-goal');
+                const configLora = document.getElementById('config-lora');
+                const configQuant = document.getElementById('config-quant');
+                const configPath = document.getElementById('config-path');
+                if (configModel && data.model_name) {{
+                    configModel.textContent = data.model_name;
+                }}
+                if (configGoal && data.goal) {{
+                    configGoal.textContent = data.goal;
+                }}
+                if (configLora && (data.lora_r || data.lora_alpha)) {{
+                    configLora.textContent = `r=${{data.lora_r || 0}}, α=${{data.lora_alpha || 0}}`;
+                }}
+                if (configQuant) {{
+                    configQuant.textContent = data.load_in_4bit ? '4-bit' : 'None';
+                }}
+                if (configPath && data.config_path) {{
+                    configPath.textContent = data.config_path;
+                }}
                 // Update setup panel if setup logs present
                 if (data.setup_logs && data.setup_logs.length > 0) {{
                     const setupPanel = document.getElementById('setup-panel');
@@ -1914,7 +2073,7 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
         setInterval(updateStatusIndicator, 1000);  // Update LIVE/STALE indicator every second
     </script>
 </body>
-</html>'''
+</html>"""
     return html
@@ -1952,6 +2111,7 @@ def regenerate_all_dashboards(output_dir: str | Path) -> list[Path]:
     except Exception as e:
         print(f"Warning: Failed to generate unified viewer: {e}")
         import traceback
         traceback.print_exc()
     return regenerated
@@ -1994,7 +2154,9 @@ def regenerate_local_dashboard(
     state = TrainingState(
         job_id=data.get("job_id", "unknown"),
         hostname=data.get("hostname", ""),
-        capture_path=str(capture_path) if capture_path else data.get("capture_path", ""),
+        capture_path=str(capture_path)
+        if capture_path
+        else data.get("capture_path", ""),
         config_path=data.get("config_path", ""),
         epoch=data.get("epoch", 0),
         step=data.get("step", 0),
@@ -2037,30 +2199,33 @@ def regenerate_local_dashboard(
     if training_status == "COMPLETED":
         html = html.replace(
             '<div class="status" id="status">',
-            '<div class="status complete" id="status">'
+            '<div class="status complete" id="status">',
         )
         html = html.replace(
             '<span id="status-text">Training in progress</span>',
-            '<span id="status-text">COMPLETED</span>'
+            '<span id="status-text">COMPLETED</span>',
         )
     elif training_status == "STOPPED":
         html = html.replace(
-            '<div class="status" id="status">',
-            '<div class="status stale" id="status">'
+            '<div class="status" id="status">', '<div class="status stale" id="status">'
         )
         html = html.replace(
             '<span id="status-text">Training in progress</span>',
-            '<span id="status-text">STOPPED (Epoch {}/{})'.format(current_epoch + 1, total_epochs) + '</span>'
+            '<span id="status-text">STOPPED (Epoch {}/{})'.format(
+                current_epoch + 1, total_epochs
+            )
+            + "</span>",
         )
     # Fix ETA display for completed/stopped training
     import re
     if training_status in ("COMPLETED", "STOPPED"):
         # Replace "calculating..." with appropriate status
         html = re.sub(
             r'(<div class="stat-value" id="stat-eta">)[^<]*(</div>)',
-            r'\1—\2' if training_status == "STOPPED" else r'\1complete\2',
-            html
+            r"\1—\2" if training_status == "STOPPED" else r"\1complete\2",
+            html,
         )
     # Replace dynamic nav with static unified header
@@ -2071,20 +2236,20 @@ def regenerate_local_dashboard(
     # This is critical for file:// protocol where fetch() doesn't work
     html = html.replace(
         "setInterval(fetchAndUpdate, 3000);",
-        "// fetchAndUpdate disabled for static dashboard"
+        "// fetchAndUpdate disabled for static dashboard",
     )
     html = html.replace(
         "setInterval(updateElapsedDisplay, 1000);",
-        "// updateElapsedDisplay disabled for static dashboard"
+        "// updateElapsedDisplay disabled for static dashboard",
     )
     html = html.replace(
         "setInterval(updateStatusIndicator, 1000);",
-        "// updateStatusIndicator disabled for static dashboard"
+        "// updateStatusIndicator disabled for static dashboard",
     )
     # CRITICAL: Disable discoverDashboards() - it overwrites static nav on file:// protocol
     html = html.replace(
         "discoverDashboards();",
-        "// discoverDashboards disabled - using static nav for file:// protocol"
+        "// discoverDashboards disabled - using static nav for file:// protocol",
     )
     # Write output
@@ -2093,354 +2258,3 @@ def regenerate_local_dashboard(
     print(f"Regenerated dashboard: {dashboard_path}")
     return dashboard_path
-def run_epoch_evaluation(
-    adapter: BaseVLMAdapter,
-    episode: Episode,
-    epoch: int,
-    config: TrainingConfig,
-    logger: "TrainingLogger",
-    sample_indices: Optional[List[int]] = None,
-) -> Path:
-    """Run inference evaluation on sample steps after an epoch.
-    This generates a comparison_epoch{N}.html file showing human vs predicted actions.
-    Args:
-        adapter: Trained adapter to use for inference
-        episode: Episode with steps to evaluate
-        epoch: Current epoch number
-        config: Training configuration
-        logger: Training logger for state tracking
-        sample_indices: Specific step indices to evaluate (default: evenly spaced)
-    Returns:
-        Path to generated comparison HTML file
-    """
-    from openadapt_ml.scripts.compare import generate_comparison_html, predict_action, format_action
-    output_dir = Path(config.output_dir)
-    output_dir.mkdir(parents=True, exist_ok=True)
-    # Select sample indices if not provided
-    num_samples = min(config.eval_samples, len(episode.steps))
-    if sample_indices is None:
-        if num_samples >= len(episode.steps):
-            sample_indices = list(range(len(episode.steps)))
-        else:
-            # Evenly space samples across the episode
-            step_size = len(episode.steps) // num_samples
-            sample_indices = [i * step_size for i in range(num_samples)]
-    print(f"  Running inference on {len(sample_indices)} sample steps...")
-    # Switch adapter to eval mode
-    adapter.eval()
-    comparison_data = []
-    action_history: List[str] = []
-    total_steps = len(episode.steps)
-    for i, step in enumerate(episode.steps):
-        step_data = {
-            "index": i,
-            "time": step.t,
-            "image_path": step.observation.image_path,
-            "human_action": {
-                "type": step.action.type,
-                "x": step.action.x,
-                "y": step.action.y,
-                "text": step.action.text,
-            },
-            "predicted_action": None,
-            "match": None,
-        }
-        # Only run inference on selected samples (for speed)
-        if i in sample_indices and step.observation.image_path:
-            try:
-                predicted = predict_action(
-                    adapter,
-                    step.observation.image_path,
-                    episode.goal,
-                    step_index=i,
-                    total_steps=total_steps,
-                    action_history=action_history.copy(),
-                )
-                step_data["predicted_action"] = predicted
-                # Check match and calculate distance
-                if predicted and predicted.get("type") == step.action.type:
-                    step_data["match"] = True
-                    # Calculate distance for click actions
-                    if step.action.type == "click":
-                        hx, hy = step.action.x or 0, step.action.y or 0
-                        px, py = predicted.get("x", 0), predicted.get("y", 0)
-                        distance = ((hx - px) ** 2 + (hy - py) ** 2) ** 0.5
-                        # Log evaluation to training state
-                        logger.state.log_evaluation(
-                            epoch=epoch,
-                            sample_idx=i,
-                            image_path=step.observation.image_path,
-                            human_action=step_data["human_action"],
-                            predicted_action=predicted,
-                        )
-                else:
-                    step_data["match"] = False
-                print(f"    Step {i}: {step.action.type} -> {predicted.get('type') if predicted else 'none'}")
-            except Exception as e:
-                print(f"    Step {i}: inference failed - {e}")
-        # Build action history for context
-        action_history.append(format_action(step.action, use_som=False))
-        comparison_data.append(step_data)
-    # Switch back to train mode
-    adapter.train()
-    # Generate comparison HTML
-    output_path = output_dir / f"comparison_epoch{epoch}.html"
-    capture_path = Path(logger.state.capture_path) if logger.state.capture_path else Path(".")
-    generate_comparison_html(capture_path, episode, comparison_data, output_path)
-    print(f"  Comparison saved: {output_path}")
-    # Also regenerate all dashboards to update navigation
-    regenerate_all_dashboards(output_dir)
-    return output_path
-def _create_dataloader(dataset: Dataset, batch_size: int) -> DataLoader:
-    # Use an identity collate_fn so that each batch is a List[Dict], matching
-    # the expectations of adapters that operate on SFT-style samples.
-    return DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: x)
-def _create_lr_scheduler(
-    optimizer: Optimizer,
-    config: TrainingConfig,
-    num_training_steps: int,
-) -> Optional[LambdaLR]:
-    """Create learning rate scheduler based on config.
-    Args:
-        optimizer: The optimizer to schedule.
-        config: Training configuration with lr_scheduler_type and warmup_ratio.
-        num_training_steps: Total number of training steps.
-    Returns:
-        LambdaLR scheduler or None if scheduler_type is "none" or "constant".
-    """
-    scheduler_type = config.lr_scheduler_type.lower()
-    if scheduler_type in ("none", "constant"):
-        return None
-    num_warmup_steps = int(num_training_steps * config.warmup_ratio)
-    if scheduler_type == "linear":
-        def lr_lambda(current_step: int) -> float:
-            if current_step < num_warmup_steps:
-                # Linear warmup
-                return float(current_step) / float(max(1, num_warmup_steps))
-            # Linear decay
-            return max(
-                0.0,
-                float(num_training_steps - current_step) / float(max(1, num_training_steps - num_warmup_steps))
-            )
-    elif scheduler_type == "cosine":
-        import math
-        def lr_lambda(current_step: int) -> float:
-            if current_step < num_warmup_steps:
-                # Linear warmup
-                return float(current_step) / float(max(1, num_warmup_steps))
-            # Cosine decay
-            progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
-            return max(0.0, 0.5 * (1.0 + math.cos(math.pi * progress)))
-    else:
-        raise ValueError(f"Unknown lr_scheduler_type: {scheduler_type}. Use 'linear', 'cosine', 'constant', or 'none'.")
-    return LambdaLR(optimizer, lr_lambda)
-def train_supervised(
-    adapter: BaseVLMAdapter,
-    dataset: Dataset,
-    config: TrainingConfig,
-    optimizer: Optional[Optimizer] = None,
-    logger: Optional[TrainingLogger] = None,
-    episode: Optional[Episode] = None,
-) -> bool:
-    """Minimal supervised training loop skeleton.
-    This assumes that `adapter.prepare_inputs` and `adapter.compute_loss` are
-    implemented. It will raise if those methods are not implemented.
-    Args:
-        adapter: VLM adapter to train.
-        dataset: Training dataset.
-        config: Training configuration.
-        optimizer: Optional optimizer (default: AdamW).
-        logger: Optional training logger for visualization.
-        episode: Optional episode for periodic evaluation (generates comparison_epoch{N}.html).
-    Returns:
-        True if training completed successfully, False if aborted due to NaN/Inf loss.
-    """
-    device = adapter.device  # type: ignore[attr-defined]
-    dataloader = _create_dataloader(dataset, batch_size=config.per_device_train_batch_size)
-    if optimizer is None:
-        optimizer = torch.optim.AdamW(
-            adapter.model.parameters(),  # type: ignore[arg-type]
-            lr=config.learning_rate,
-            weight_decay=config.weight_decay,
-        )
-    # Create logger if not provided
-    if logger is None:
-        logger = TrainingLogger(config.output_dir, config)
-    # Calculate total training steps for scheduler
-    num_training_steps = len(dataloader) * config.num_train_epochs // config.gradient_accumulation_steps
-    # Create learning rate scheduler
-    lr_scheduler = _create_lr_scheduler(optimizer, config, num_training_steps)
-    total_steps = 0
-    adapter.train()
-    # Early stopping tracking
-    consecutive_low_loss = 0
-    early_stopped = False
-    user_stopped = False
-    for epoch in range(config.num_train_epochs):
-        if early_stopped or user_stopped:
-            break
-        for _, batch in enumerate(dataloader):
-            # Check for stop signal from dashboard
-            stop_file = Path(config.output_dir) / "STOP_TRAINING"
-            if stop_file.exists():
-                msg = "Stop signal received from dashboard. Stopping training..."
-                print(msg)
-                logger._log_to_terminal(msg)
-                # Set termination status for dashboard
-                logger.state.termination_status = "user_stop"
-                logger.state.termination_message = "Training stopped by user via dashboard"
-                logger.save()
-                user_stopped = True
-                stop_file.unlink()  # Remove signal file
-                break
-            # Batch is a List[Dict[str, Any]] of SFT-style samples; adapter is
-            # responsible for converting it into model inputs.
-            samples: List[Dict[str, Any]] = batch
-            inputs = adapter.prepare_inputs(samples)
-            inputs = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-            loss = adapter.compute_loss(inputs)
-            # Guard against invalid losses to avoid propagating NaNs/Infs
-            if torch.isnan(loss) or torch.isinf(loss):
-                msg = f"Encountered invalid loss at epoch={epoch} step={total_steps + 1}: {loss.item()}"
-                print(msg)
-                logger._log_to_terminal(msg)
-                logger.on_train_end()
-                return False
-            loss.backward()
-            if (total_steps + 1) % config.gradient_accumulation_steps == 0:
-                torch.nn.utils.clip_grad_norm_(adapter.model.parameters(), config.max_grad_norm)  # type: ignore[arg-type]
-                optimizer.step()
-                if lr_scheduler is not None:
-                    lr_scheduler.step()
-                optimizer.zero_grad()
-            total_steps += 1
-            loss_val = loss.item()
-            # Get current learning rate from optimizer
-            current_lr = optimizer.param_groups[0]['lr']
-            # Log step
-            logger.on_step(epoch, total_steps, loss_val, current_lr)
-            if config.logging_steps and total_steps % config.logging_steps == 0:
-                msg = f"epoch={epoch} step={total_steps} loss={loss_val:.4f} lr={current_lr:.6f}"
-                print(msg)
-                logger._log_to_terminal(msg)
-            # Early stopping check
-            if loss_val < config.early_stop_loss:
-                consecutive_low_loss += 1
-                if consecutive_low_loss >= config.early_stop_patience:
-                    msg = (
-                        f"Early stopping: loss ({loss_val:.6f}) below threshold "
-                        f"({config.early_stop_loss}) for {config.early_stop_patience} consecutive steps"
-                    )
-                    print(msg)
-                    logger._log_to_terminal(msg)
-                    # Set termination status for dashboard
-                    logger.state.termination_status = "auto_low_loss"
-                    logger.state.termination_message = (
-                        f"Loss reached {loss_val:.6f} (< {config.early_stop_loss}) "
-                        f"for {config.early_stop_patience} consecutive steps"
-                    )
-                    logger.save()
-                    early_stopped = True
-                    break
-            else:
-                consecutive_low_loss = 0
-        # End of epoch
-        logger.on_epoch_end(epoch)
-        # Save checkpoint at end of each epoch
-        if config.save_checkpoint_every_epoch:
-            checkpoint_path = Path(config.checkpoint_dir) / f"epoch_{epoch}"
-            checkpoint_path.mkdir(parents=True, exist_ok=True)
-            try:
-                adapter.save_checkpoint(str(checkpoint_path))
-                msg = f"Checkpoint saved to {checkpoint_path}"
-                print(msg)
-                logger._log_to_terminal(msg)
-            except Exception as e:
-                msg = f"Warning: Failed to save checkpoint: {e}"
-                print(msg)
-                logger._log_to_terminal(msg)
-        # Run evaluation after each epoch (generates comparison_epoch{N}.html)
-        if config.eval_every_epoch and episode is not None:
-            try:
-                print(f"Running epoch {epoch} evaluation...")
-                run_epoch_evaluation(
-                    adapter=adapter,
-                    episode=episode,
-                    epoch=epoch,
-                    config=config,
-                    logger=logger,
-                )
-            except Exception as e:
-                print(f"Warning: Epoch evaluation failed: {e}")
-                import traceback
-                traceback.print_exc()
-    # Set termination status if not already set (normal completion)
-    if not logger.state.termination_status:
-        logger.state.termination_status = "auto_complete"
-        logger.state.termination_message = f"Training completed all {config.num_train_epochs} epochs"
-        logger.save()
-    logger.on_train_end()
-    return True

openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

openadapt-ml 0.1.0py3-none-any.whl → 0.2.1py3-none-any.whl