PyPI - openadapt-ml - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

openadapt-ml 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

openadapt_ml/baselines/__init__.py +121 -0
openadapt_ml/baselines/adapter.py +185 -0
openadapt_ml/baselines/cli.py +314 -0
openadapt_ml/baselines/config.py +448 -0
openadapt_ml/baselines/parser.py +922 -0
openadapt_ml/baselines/prompts.py +787 -0
openadapt_ml/benchmarks/__init__.py +13 -115
openadapt_ml/benchmarks/agent.py +265 -421
openadapt_ml/benchmarks/azure.py +28 -19
openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
openadapt_ml/benchmarks/cli.py +1722 -4847
openadapt_ml/benchmarks/trace_export.py +631 -0
openadapt_ml/benchmarks/viewer.py +22 -5
openadapt_ml/benchmarks/vm_monitor.py +530 -29
openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
openadapt_ml/cloud/azure_inference.py +3 -5
openadapt_ml/cloud/lambda_labs.py +722 -307
openadapt_ml/cloud/local.py +2038 -487
openadapt_ml/cloud/ssh_tunnel.py +68 -26
openadapt_ml/datasets/next_action.py +40 -30
openadapt_ml/evals/grounding.py +8 -3
openadapt_ml/evals/plot_eval_metrics.py +15 -13
openadapt_ml/evals/trajectory_matching.py +41 -26
openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
openadapt_ml/experiments/representation_shootout/config.py +390 -0
openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
openadapt_ml/experiments/representation_shootout/runner.py +687 -0
openadapt_ml/experiments/waa_demo/runner.py +29 -14
openadapt_ml/export/parquet.py +36 -24
openadapt_ml/grounding/detector.py +18 -14
openadapt_ml/ingest/__init__.py +8 -6
openadapt_ml/ingest/capture.py +25 -22
openadapt_ml/ingest/loader.py +7 -4
openadapt_ml/ingest/synthetic.py +189 -100
openadapt_ml/models/api_adapter.py +14 -4
openadapt_ml/models/base_adapter.py +10 -2
openadapt_ml/models/providers/__init__.py +288 -0
openadapt_ml/models/providers/anthropic.py +266 -0
openadapt_ml/models/providers/base.py +299 -0
openadapt_ml/models/providers/google.py +376 -0
openadapt_ml/models/providers/openai.py +342 -0
openadapt_ml/models/qwen_vl.py +46 -19
openadapt_ml/perception/__init__.py +35 -0
openadapt_ml/perception/integration.py +399 -0
openadapt_ml/retrieval/demo_retriever.py +50 -24
openadapt_ml/retrieval/embeddings.py +9 -8
openadapt_ml/retrieval/retriever.py +3 -1
openadapt_ml/runtime/__init__.py +50 -0
openadapt_ml/runtime/policy.py +18 -5
openadapt_ml/runtime/safety_gate.py +471 -0
openadapt_ml/schema/__init__.py +9 -0
openadapt_ml/schema/converters.py +74 -27
openadapt_ml/schema/episode.py +31 -18
openadapt_ml/scripts/capture_screenshots.py +530 -0
openadapt_ml/scripts/compare.py +85 -54
openadapt_ml/scripts/demo_policy.py +4 -1
openadapt_ml/scripts/eval_policy.py +15 -9
openadapt_ml/scripts/make_gif.py +1 -1
openadapt_ml/scripts/prepare_synthetic.py +3 -1
openadapt_ml/scripts/train.py +21 -9
openadapt_ml/segmentation/README.md +920 -0
openadapt_ml/segmentation/__init__.py +97 -0
openadapt_ml/segmentation/adapters/__init__.py +5 -0
openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
openadapt_ml/segmentation/annotator.py +610 -0
openadapt_ml/segmentation/cache.py +290 -0
openadapt_ml/segmentation/cli.py +674 -0
openadapt_ml/segmentation/deduplicator.py +656 -0
openadapt_ml/segmentation/frame_describer.py +788 -0
openadapt_ml/segmentation/pipeline.py +340 -0
openadapt_ml/segmentation/schemas.py +622 -0
openadapt_ml/segmentation/segment_extractor.py +634 -0
openadapt_ml/training/azure_ops_viewer.py +1097 -0
openadapt_ml/training/benchmark_viewer.py +52 -41
openadapt_ml/training/shared_ui.py +7 -7
openadapt_ml/training/stub_provider.py +57 -35
openadapt_ml/training/trainer.py +143 -86
openadapt_ml/training/trl_trainer.py +70 -21
openadapt_ml/training/viewer.py +323 -108
openadapt_ml/training/viewer_components.py +180 -0
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
openadapt_ml-0.2.1.dist-info/RECORD +116 -0
openadapt_ml/benchmarks/base.py +0 -366
openadapt_ml/benchmarks/data_collection.py +0 -432
openadapt_ml/benchmarks/live_tracker.py +0 -180
openadapt_ml/benchmarks/runner.py +0 -418
openadapt_ml/benchmarks/waa.py +0 -761
openadapt_ml/benchmarks/waa_live.py +0 -619
openadapt_ml-0.2.0.dist-info/RECORD +0 -86
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0

openadapt_ml/training/trainer.py CHANGED Viewed

@@ -4,9 +4,9 @@ import json
 import time
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Dict, List
-from openadapt_ml.schema import Episode, Step, Action, ActionType
+from openadapt_ml.schema import ActionType
 from openadapt_ml.training.shared_ui import (
     get_shared_header_css as _get_shared_header_css,
     generate_shared_header_html as _generate_shared_header_html,
@@ -108,9 +108,10 @@ class TrainingConfig:
 @dataclass
 class TrainingState:
     """Tracks training progress for visualization."""
     # Job identification
     job_id: str = field(default_factory=lambda: time.strftime("%Y%m%d_%H%M%S"))
-    hostname: str = field(default_factory=lambda: __import__('socket').gethostname())
+    hostname: str = field(default_factory=lambda: __import__("socket").gethostname())
     capture_path: str = ""
     config_path: str = ""
     goal: str = ""  # Task goal/description for the training run
@@ -142,7 +143,9 @@ class TrainingState:
     setup_status: str = ""  # e.g. "booting", "installing", "training", "complete"
     setup_logs: List[str] = field(default_factory=list)  # Setup progress messages
     # Termination tracking
-    termination_status: str = ""  # e.g. "auto_low_loss", "auto_complete", "user_stop", "running"
+    termination_status: str = (
+        ""  # e.g. "auto_low_loss", "auto_complete", "user_stop", "running"
+    )
     termination_message: str = ""  # Human-readable termination reason
     def log_step(self, epoch: int, step: int, loss: float, lr: float = 0.0) -> None:
@@ -151,33 +154,46 @@ class TrainingState:
         self.step = step
         self.loss = loss
         self.learning_rate = lr
-        self.losses.append({
-            "epoch": epoch,
-            "step": step,
-            "loss": loss,
-            "lr": lr,
-            "time": time.time() - self.start_time,
-        })
-    def log_evaluation(self, epoch: int, sample_idx: int, image_path: str,
-                       human_action: Dict, predicted_action: Dict) -> None:
+        self.losses.append(
+            {
+                "epoch": epoch,
+                "step": step,
+                "loss": loss,
+                "lr": lr,
+                "time": time.time() - self.start_time,
+            }
+        )
+    def log_evaluation(
+        self,
+        epoch: int,
+        sample_idx: int,
+        image_path: str,
+        human_action: Dict,
+        predicted_action: Dict,
+    ) -> None:
         """Log an evaluation sample."""
         # Calculate distance for click actions
         distance = 0.0
-        if human_action.get("type") == "click" and predicted_action.get("type") == "click":
+        if (
+            human_action.get("type") == "click"
+            and predicted_action.get("type") == "click"
+        ):
             hx, hy = human_action.get("x", 0), human_action.get("y", 0)
             px, py = predicted_action.get("x", 0), predicted_action.get("y", 0)
             distance = ((hx - px) ** 2 + (hy - py) ** 2) ** 0.5
-        self.evaluations.append({
-            "epoch": epoch,
-            "sample_idx": sample_idx,
-            "image_path": image_path,
-            "human_action": human_action,
-            "predicted_action": predicted_action,
-            "distance": distance,
-            "correct": distance < 50,  # Within 50 pixels is "correct"
-        })
+        self.evaluations.append(
+            {
+                "epoch": epoch,
+                "sample_idx": sample_idx,
+                "image_path": image_path,
+                "human_action": human_action,
+                "predicted_action": predicted_action,
+                "distance": distance,
+                "correct": distance < 50,  # Within 50 pixels is "correct"
+            }
+        )
     def to_dict(self) -> Dict[str, Any]:
         """Convert state to serializable dict."""
@@ -195,7 +211,9 @@ class TrainingState:
             "load_in_4bit": self.load_in_4bit,
             "instance_type": self.instance_type,
             "instance_ip": self.instance_ip,
-            "started_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(self.start_time)),
+            "started_at": time.strftime(
+                "%Y-%m-%dT%H:%M:%SZ", time.gmtime(self.start_time)
+            ),
             # Cloud provider info
             "cloud_provider": self.cloud_provider,
             "cloud_dashboard_url": self.cloud_dashboard_url,
@@ -316,6 +334,7 @@ class TrainingLogger:
     def _save_config_snapshot(self) -> None:
         """Save training config snapshot to JSON."""
         from dataclasses import asdict
         config_file = self.output_dir / "config.json"
         config_dict = asdict(self.config)
         with open(config_file, "w") as f:
@@ -333,32 +352,45 @@ class TrainingLogger:
         dashboard_path.write_text(html)
-def _generate_termination_status_html(state: TrainingState, is_training_complete: bool) -> str:
+def _generate_termination_status_html(
+    state: TrainingState, is_training_complete: bool
+) -> str:
     """Generate HTML for termination status section."""
     # Check if we have termination info
     if state.termination_status:
         # Map termination status to colors and icons
         status_styles = {
-            "auto_complete": {"color": "#22c55e", "icon": "✓", "label": "Training Complete"},
-            "auto_low_loss": {"color": "#22c55e", "icon": "✓", "label": "Auto-Stopped (Low Loss)"},
+            "auto_complete": {
+                "color": "#22c55e",
+                "icon": "✓",
+                "label": "Training Complete",
+            },
+            "auto_low_loss": {
+                "color": "#22c55e",
+                "icon": "✓",
+                "label": "Auto-Stopped (Low Loss)",
+            },
             "user_stop": {"color": "#f59e0b", "icon": "■", "label": "Stopped by User"},
         }
-        style = status_styles.get(state.termination_status, {"color": "#22c55e", "icon": "✓", "label": "Complete"})
+        style = status_styles.get(
+            state.termination_status,
+            {"color": "#22c55e", "icon": "✓", "label": "Complete"},
+        )
-        return f'''<div style="display: flex; flex-direction: column; gap: 8px;">
-            <div style="display: flex; align-items: center; gap: 8px; color: {style['color']};">
-                <span style="font-size: 1.2rem;">{style['icon']}</span>
-                <span style="font-weight: 600;">{style['label']}</span>
+        return f"""<div style="display: flex; flex-direction: column; gap: 8px;">
+            <div style="display: flex; align-items: center; gap: 8px; color: {style["color"]};">
+                <span style="font-size: 1.2rem;">{style["icon"]}</span>
+                <span style="font-weight: 600;">{style["label"]}</span>
             </div>
-            {f'<div style="font-size: 0.85rem; color: var(--text-muted); margin-left: 28px;">{state.termination_message}</div>' if state.termination_message else ''}
-        </div>'''
+            {f'<div style="font-size: 0.85rem; color: var(--text-muted); margin-left: 28px;">{state.termination_message}</div>' if state.termination_message else ""}
+        </div>"""
     elif is_training_complete:
-        return '''<div style="display: flex; align-items: center; gap: 8px; color: #22c55e;">
+        return """<div style="display: flex; align-items: center; gap: 8px; color: #22c55e;">
             <span style="font-size: 1.2rem;">✓</span>
             <span style="font-weight: 600;">Training Complete</span>
-        </div>'''
+        </div>"""
     else:
-        return '''<button id="stop-training-btn" onclick="stopTraining()" style="
+        return """<button id="stop-training-btn" onclick="stopTraining()" style="
             background: linear-gradient(135deg, #ef4444 0%, #dc2626 100%);
             color: white;
             border: none;
@@ -374,53 +406,65 @@ def _generate_termination_status_html(state: TrainingState, is_training_complete
         ">
             <span style="font-size: 1.1rem;">■</span> Stop Training
         </button>
-        <p id="stop-status" style="margin-top: 8px; font-size: 0.75rem; color: var(--text-muted);"></p>'''
+        <p id="stop-status" style="margin-top: 8px; font-size: 0.75rem; color: var(--text-muted);"></p>"""
 def generate_training_dashboard(state: TrainingState, config: TrainingConfig) -> str:
     """Generate an HTML dashboard for training visualization."""
     losses_json = json.dumps(state.losses)
     # Use stored elapsed_time if available (historical data), otherwise calculate
-    elapsed = state.elapsed_time if state.elapsed_time > 0 else time.time() - state.start_time
+    elapsed = (
+        state.elapsed_time if state.elapsed_time > 0 else time.time() - state.start_time
+    )
     elapsed_str = f"{int(elapsed // 60)}m {int(elapsed % 60)}s"
     # Calculate stats
     if state.losses:
-        min_loss = min(l["loss"] for l in state.losses)
-        avg_loss = sum(l["loss"] for l in state.losses) / len(state.losses)
+        min_loss = min(loss["loss"] for loss in state.losses)
+        sum(loss["loss"] for loss in state.losses) / len(state.losses)
         recent_losses = state.losses[-10:] if len(state.losses) >= 10 else state.losses
-        recent_avg = sum(l["loss"] for l in recent_losses) / len(recent_losses)
+        recent_avg = sum(loss["loss"] for loss in recent_losses) / len(recent_losses)
         # Calculate step times
         step_times = []
         for i in range(1, len(state.losses)):
-            step_times.append(state.losses[i]["time"] - state.losses[i-1]["time"])
+            step_times.append(state.losses[i]["time"] - state.losses[i - 1]["time"])
         avg_step_time = sum(step_times) / len(step_times) if step_times else 0
         # Loss by epoch
         epoch_losses: dict = {}
-        for l in state.losses:
-            ep = l["epoch"]
+        for loss in state.losses:
+            ep = loss["epoch"]
             if ep not in epoch_losses:
                 epoch_losses[ep] = []
-            epoch_losses[ep].append(l["loss"])
-        epoch_avg = {ep: sum(losses)/len(losses) for ep, losses in epoch_losses.items()}
+            epoch_losses[ep].append(loss["loss"])
+        epoch_avg = {
+            ep: sum(losses) / len(losses) for ep, losses in epoch_losses.items()
+        }
         # Estimate ETA
         # Steps per epoch = steps in completed epochs / completed epochs
         completed_epochs = state.epoch
-        steps_in_completed = sum(1 for l in state.losses if l["epoch"] < completed_epochs)
+        steps_in_completed = sum(
+            1 for loss in state.losses if loss["epoch"] < completed_epochs
+        )
         if completed_epochs > 0 and steps_in_completed > 0:
             steps_per_epoch = steps_in_completed / completed_epochs
         else:
             # Estimate from current epoch progress
-            steps_per_epoch = len(state.losses) / (state.epoch + 1) if state.epoch >= 0 else len(state.losses)
-        total_epochs = state.total_epochs if state.total_epochs > 0 else config.num_train_epochs
+            steps_per_epoch = (
+                len(state.losses) / (state.epoch + 1)
+                if state.epoch >= 0
+                else len(state.losses)
+            )
+        total_epochs = (
+            state.total_epochs if state.total_epochs > 0 else config.num_train_epochs
+        )
         total_steps_estimate = steps_per_epoch * total_epochs
         remaining_steps = max(0, total_steps_estimate - len(state.losses))
         eta_seconds = remaining_steps * avg_step_time if avg_step_time > 0 else 0
         # Check if training is complete (all steps done)
         is_training_complete = remaining_steps == 0 and len(state.losses) > 0
     else:
-        min_loss = avg_loss = recent_avg = avg_step_time = 0.0
+        min_loss = recent_avg = avg_step_time = 0.0
         epoch_avg = {}
         eta_seconds = 0
         steps_per_epoch = 0
@@ -431,10 +475,9 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
     epoch_avg_json = json.dumps(list(epoch_avg.items()))
     # Generate comparison viewer preview if capture path available
-    comparison_viewer_path = ""
     if state.capture_path:
         try:
-            from openadapt_ml.scripts.compare import generate_comparison_html, generate_comparison_data
+            from openadapt_ml.scripts.compare import generate_comparison_html
             from openadapt_ml.ingest.capture import capture_to_episode
             capture_path = Path(state.capture_path)
@@ -454,7 +497,9 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
                         "time": step.step_index,
                         "image_path": step.observation.screenshot_path,
                         "human_action": {
-                            "type": step.action.type.value if isinstance(step.action.type, ActionType) else step.action.type,
+                            "type": step.action.type.value
+                            if isinstance(step.action.type, ActionType)
+                            else step.action.type,
                             "x": action_x,
                             "y": action_y,
                             "text": step.action.text,
@@ -465,15 +510,21 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
                     comparison_data.append(step_data)
                 # Generate comparison HTML
-                output_dir = Path(config.output_dir) if hasattr(config, 'output_dir') else Path("training_output")
+                output_dir = (
+                    Path(config.output_dir)
+                    if hasattr(config, "output_dir")
+                    else Path("training_output")
+                )
                 output_dir.mkdir(parents=True, exist_ok=True)
                 comparison_output = output_dir / "comparison_preview.html"
-                generate_comparison_html(capture_path, episode, comparison_data, comparison_output)
-                comparison_viewer_path = str(comparison_output.name)  # Relative path
-        except Exception as e:
+                generate_comparison_html(
+                    capture_path, episode, comparison_data, comparison_output
+                )
+                str(comparison_output.name)  # Relative path
+        except Exception:
             pass  # Fail silently if comparison viewer can't be generated
-    html = f'''<!DOCTYPE html>
+    html = f"""<!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
@@ -1132,10 +1183,10 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
     <div class="container">
         <header>
             <div>
-                <h1>Training Dashboard{f' <a href="{state.cloud_dashboard_url}" target="_blank" class="cloud-link cloud-badge"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M18 10h-1.26A8 8 0 1 0 9 20h9a5 5 0 0 0 0-10z"/></svg>{state.cloud_provider.title()} Cloud</a>' if state.cloud_dashboard_url else ''}</h1>
+                <h1>Training Dashboard{f' <a href="{state.cloud_dashboard_url}" target="_blank" class="cloud-link cloud-badge"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M18 10h-1.26A8 8 0 1 0 9 20h9a5 5 0 0 0 0-10z"/></svg>{state.cloud_provider.title()} Cloud</a>' if state.cloud_dashboard_url else ""}</h1>
                 <div class="job-info" id="job-info">
-                    <span class="job-host">{state.hostname or 'stub-local'} @ {state.instance_ip or '127.0.0.1'}</span>
-                    {f'<span class="job-config">{state.instance_type}</span>' if state.instance_type else ''}
+                    <span class="job-host">{state.hostname or "stub-local"} @ {state.instance_ip or "127.0.0.1"}</span>
+                    {f'<span class="job-config">{state.instance_type}</span>' if state.instance_type else ""}
                 </div>
             </div>
             <div class="status" id="status">
@@ -1144,13 +1195,13 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
             </div>
         </header>
-        <div class="setup-panel{' hidden' if not state.setup_logs else ''}" id="setup-panel">
+        <div class="setup-panel{" hidden" if not state.setup_logs else ""}" id="setup-panel">
             <div class="setup-header">
                 <h2>Setup Progress</h2>
-                <span class="setup-status-badge {state.setup_status}" id="setup-status-badge">{state.setup_status or 'initializing'}</span>
+                <span class="setup-status-badge {state.setup_status}" id="setup-status-badge">{state.setup_status or "initializing"}</span>
             </div>
             <div class="setup-logs" id="setup-logs">
-                {''.join(f'<div class="setup-log-line{" current" if i == len(state.setup_logs) - 1 else ""}">{log}</div>' for i, log in enumerate(state.setup_logs)) if state.setup_logs else '<div class="setup-log-line">Waiting for setup logs...</div>'}
+                {"".join(f'<div class="setup-log-line{" current" if i == len(state.setup_logs) - 1 else ""}">{log}</div>' for i, log in enumerate(state.setup_logs)) if state.setup_logs else '<div class="setup-log-line">Waiting for setup logs...</div>'}
             </div>
         </div>
@@ -1160,23 +1211,23 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
             <div class="config-grid">
                 <div class="config-item">
                     <span class="config-label">Model</span>
-                    <span class="config-value model" id="config-model">{state.model_name or 'Not specified'}</span>
+                    <span class="config-value model" id="config-model">{state.model_name or "Not specified"}</span>
                 </div>
                 <div class="config-item">
                     <span class="config-label">Goal</span>
-                    <span class="config-value goal" id="config-goal">{state.goal or 'Not specified'}</span>
+                    <span class="config-value goal" id="config-goal">{state.goal or "Not specified"}</span>
                 </div>
                 <div class="config-item">
                     <span class="config-label">LoRA</span>
-                    <span class="config-value" id="config-lora">{f'r={state.lora_r}, α={state.lora_alpha}' if state.lora_r else 'Not specified'}</span>
+                    <span class="config-value" id="config-lora">{f"r={state.lora_r}, α={state.lora_alpha}" if state.lora_r else "Not specified"}</span>
                 </div>
                 <div class="config-item">
                     <span class="config-label">Quantization</span>
-                    <span class="config-value" id="config-quant">{'4-bit' if state.load_in_4bit else 'None'}</span>
+                    <span class="config-value" id="config-quant">{"4-bit" if state.load_in_4bit else "None"}</span>
                 </div>
                 <div class="config-item">
                     <span class="config-label">Config</span>
-                    <span class="config-value" id="config-path">{state.config_path or 'Not specified'}</span>
+                    <span class="config-value" id="config-path">{state.config_path or "Not specified"}</span>
                 </div>
             </div>
         </div>
@@ -1523,7 +1574,7 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
         let etaSeconds = {eta_seconds};
         let avgStepTime = {avg_step_time};
         let remainingSteps = {remaining_steps};
-        let isTrainingComplete = {'true' if is_training_complete else 'false'};
+        let isTrainingComplete = {"true" if is_training_complete else "false"};
         // Auto-stop when loss <= threshold (INVARIANT: training should stop when loss <= 1.0)
         const AUTO_STOP_LOSS_THRESHOLD = 1.0;
@@ -2022,7 +2073,7 @@ def generate_training_dashboard(state: TrainingState, config: TrainingConfig) ->
         setInterval(updateStatusIndicator, 1000);  // Update LIVE/STALE indicator every second
     </script>
 </body>
-</html>'''
+</html>"""
     return html
@@ -2060,6 +2111,7 @@ def regenerate_all_dashboards(output_dir: str | Path) -> list[Path]:
     except Exception as e:
         print(f"Warning: Failed to generate unified viewer: {e}")
         import traceback
         traceback.print_exc()
     return regenerated
@@ -2102,7 +2154,9 @@ def regenerate_local_dashboard(
     state = TrainingState(
         job_id=data.get("job_id", "unknown"),
         hostname=data.get("hostname", ""),
-        capture_path=str(capture_path) if capture_path else data.get("capture_path", ""),
+        capture_path=str(capture_path)
+        if capture_path
+        else data.get("capture_path", ""),
         config_path=data.get("config_path", ""),
         epoch=data.get("epoch", 0),
         step=data.get("step", 0),
@@ -2145,30 +2199,33 @@ def regenerate_local_dashboard(
     if training_status == "COMPLETED":
         html = html.replace(
             '<div class="status" id="status">',
-            '<div class="status complete" id="status">'
+            '<div class="status complete" id="status">',
         )
         html = html.replace(
             '<span id="status-text">Training in progress</span>',
-            '<span id="status-text">COMPLETED</span>'
+            '<span id="status-text">COMPLETED</span>',
         )
     elif training_status == "STOPPED":
         html = html.replace(
-            '<div class="status" id="status">',
-            '<div class="status stale" id="status">'
+            '<div class="status" id="status">', '<div class="status stale" id="status">'
         )
         html = html.replace(
             '<span id="status-text">Training in progress</span>',
-            '<span id="status-text">STOPPED (Epoch {}/{})'.format(current_epoch + 1, total_epochs) + '</span>'
+            '<span id="status-text">STOPPED (Epoch {}/{})'.format(
+                current_epoch + 1, total_epochs
+            )
+            + "</span>",
         )
     # Fix ETA display for completed/stopped training
     import re
     if training_status in ("COMPLETED", "STOPPED"):
         # Replace "calculating..." with appropriate status
         html = re.sub(
             r'(<div class="stat-value" id="stat-eta">)[^<]*(</div>)',
-            r'\1—\2' if training_status == "STOPPED" else r'\1complete\2',
-            html
+            r"\1—\2" if training_status == "STOPPED" else r"\1complete\2",
+            html,
         )
     # Replace dynamic nav with static unified header
@@ -2179,20 +2236,20 @@ def regenerate_local_dashboard(
     # This is critical for file:// protocol where fetch() doesn't work
     html = html.replace(
         "setInterval(fetchAndUpdate, 3000);",
-        "// fetchAndUpdate disabled for static dashboard"
+        "// fetchAndUpdate disabled for static dashboard",
     )
     html = html.replace(
         "setInterval(updateElapsedDisplay, 1000);",
-        "// updateElapsedDisplay disabled for static dashboard"
+        "// updateElapsedDisplay disabled for static dashboard",
     )
     html = html.replace(
         "setInterval(updateStatusIndicator, 1000);",
-        "// updateStatusIndicator disabled for static dashboard"
+        "// updateStatusIndicator disabled for static dashboard",
     )
     # CRITICAL: Disable discoverDashboards() - it overwrites static nav on file:// protocol
     html = html.replace(
         "discoverDashboards();",
-        "// discoverDashboards disabled - using static nav for file:// protocol"
+        "// discoverDashboards disabled - using static nav for file:// protocol",
     )
     # Write output

openadapt_ml/training/trl_trainer.py CHANGED Viewed

@@ -91,7 +91,9 @@ def _load_unsloth_model(config: TRLTrainingConfig):
         # Enable training mode
         FastVisionModel.for_training(model)
-        print(f"✓ Loaded {config.model_name} with Unsloth (4-bit: {config.load_in_4bit})")
+        print(
+            f"✓ Loaded {config.model_name} with Unsloth (4-bit: {config.load_in_4bit})"
+        )
         return model, tokenizer, True
     except ImportError:
@@ -100,26 +102,70 @@ def _load_unsloth_model(config: TRLTrainingConfig):
 def _load_standard_model(config: TRLTrainingConfig):
-    """Fallback: Load model with standard transformers + peft."""
-    from transformers import AutoModelForCausalLM, AutoProcessor
+    """Fallback: Load model with standard transformers + peft.
+    Automatically detects vision-language models and uses the appropriate
+    model class (Qwen2VLForConditionalGeneration for VL models,
+    AutoModelForCausalLM for text-only models).
+    """
+    from transformers import AutoConfig, AutoProcessor
     from peft import LoraConfig, get_peft_model
     import torch
-    model = AutoModelForCausalLM.from_pretrained(
-        config.model_name,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-        trust_remote_code=True,
+    # Check if this is a vision-language model
+    model_config = AutoConfig.from_pretrained(
+        config.model_name, trust_remote_code=True
+    )
+    is_vl_model = (
+        "VL" in config.model_name.upper()
+        or "vision" in config.model_name.lower()
+        or hasattr(model_config, "vision_config")
     )
+    if is_vl_model:
+        # Vision-language model - use Qwen2VLForConditionalGeneration or AutoModelForVision2Seq
+        try:
+            from transformers import Qwen2VLForConditionalGeneration
+            model = Qwen2VLForConditionalGeneration.from_pretrained(
+                config.model_name,
+                torch_dtype=torch.bfloat16,
+                device_map="auto",
+                trust_remote_code=True,
+            )
+            print("  Using Qwen2VLForConditionalGeneration for VL model")
+        except (ImportError, ValueError, RuntimeError, TypeError):
+            # Fallback to AutoModelForVision2Seq for other VL models
+            from transformers import AutoModelForVision2Seq
+            model = AutoModelForVision2Seq.from_pretrained(
+                config.model_name,
+                torch_dtype=torch.bfloat16,
+                device_map="auto",
+                trust_remote_code=True,
+            )
+            print("  Using AutoModelForVision2Seq for VL model")
+    else:
+        # Text-only model
+        from transformers import AutoModelForCausalLM
+        model = AutoModelForCausalLM.from_pretrained(
+            config.model_name,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            trust_remote_code=True,
+        )
+        print("  Using AutoModelForCausalLM for text-only model")
     processor = AutoProcessor.from_pretrained(config.model_name, trust_remote_code=True)
-    # Apply LoRA
+    # Apply LoRA - use SEQ_2_SEQ_LM for VL models, CAUSAL_LM for text-only
     peft_config = LoraConfig(
         r=config.lora_r,
         lora_alpha=config.lora_alpha,
         lora_dropout=config.lora_dropout,
         target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
-        task_type="CAUSAL_LM",
+        task_type="SEQ_2_SEQ_LM" if is_vl_model else "CAUSAL_LM",
     )
     model = get_peft_model(model, peft_config)
@@ -161,10 +207,12 @@ def _convert_samples_to_trl_format(
         if not pil_images:
             continue  # Skip samples with missing images
-        trl_samples.append({
-            "images": pil_images,
-            "messages": sample["messages"],
-        })
+        trl_samples.append(
+            {
+                "images": pil_images,
+                "messages": sample["messages"],
+            }
+        )
     return trl_samples
@@ -261,7 +309,7 @@ def train_with_trl(
                 logging_steps=config.logging_steps,
                 save_strategy=config.save_strategy,
                 max_length=None,  # Critical for VLMs
-                assistant_only_loss=True,
+                assistant_only_loss=False,  # Not supported for VL models yet
             )
             trainer = SFTTrainer(
@@ -270,15 +318,15 @@ def train_with_trl(
                 args=training_args,
             )
-        print(f"\n{'='*50}")
-        print(f"Starting training:")
+        print(f"\n{'=' * 50}")
+        print("Starting training:")
         print(f"  Model: {config.model_name}")
         print(f"  Samples: {len(trl_samples)}")
         print(f"  Epochs: {config.num_epochs}")
         print(f"  Batch size: {config.batch_size}")
         print(f"  Unsloth: {is_unsloth}")
         print(f"  Output: {config.output_dir}")
-        print(f"{'='*50}\n")
+        print(f"{'=' * 50}\n")
         trainer.train()
@@ -291,8 +339,7 @@ def train_with_trl(
     except ImportError as e:
         raise ImportError(
-            f"TRL not installed. Install with: pip install trl\n"
-            f"Original error: {e}"
+            f"TRL not installed. Install with: pip install trl\nOriginal error: {e}"
         )
@@ -333,7 +380,9 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Train VLM with TRL + Unsloth")
     parser.add_argument("--parquet", required=True, help="Path to parquet file")
     parser.add_argument("--output", default="checkpoints", help="Output directory")
-    parser.add_argument("--model", default="unsloth/Qwen2.5-VL-7B-Instruct", help="Model name")
+    parser.add_argument(
+        "--model", default="unsloth/Qwen2.5-VL-7B-Instruct", help="Model name"
+    )
     parser.add_argument("--epochs", type=int, default=3, help="Number of epochs")
     parser.add_argument("--use-som", action="store_true", help="Use Set-of-Marks DSL")

openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

openadapt-ml 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl