PyPI - openadapt-ml - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

openadapt-ml 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

openadapt_ml/baselines/__init__.py +121 -0
openadapt_ml/baselines/adapter.py +185 -0
openadapt_ml/baselines/cli.py +314 -0
openadapt_ml/baselines/config.py +448 -0
openadapt_ml/baselines/parser.py +922 -0
openadapt_ml/baselines/prompts.py +787 -0
openadapt_ml/benchmarks/__init__.py +13 -115
openadapt_ml/benchmarks/agent.py +265 -421
openadapt_ml/benchmarks/azure.py +28 -19
openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
openadapt_ml/benchmarks/cli.py +1722 -4847
openadapt_ml/benchmarks/trace_export.py +631 -0
openadapt_ml/benchmarks/viewer.py +22 -5
openadapt_ml/benchmarks/vm_monitor.py +530 -29
openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
openadapt_ml/cloud/azure_inference.py +3 -5
openadapt_ml/cloud/lambda_labs.py +722 -307
openadapt_ml/cloud/local.py +2038 -487
openadapt_ml/cloud/ssh_tunnel.py +68 -26
openadapt_ml/datasets/next_action.py +40 -30
openadapt_ml/evals/grounding.py +8 -3
openadapt_ml/evals/plot_eval_metrics.py +15 -13
openadapt_ml/evals/trajectory_matching.py +41 -26
openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
openadapt_ml/experiments/representation_shootout/config.py +390 -0
openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
openadapt_ml/experiments/representation_shootout/runner.py +687 -0
openadapt_ml/experiments/waa_demo/runner.py +29 -14
openadapt_ml/export/parquet.py +36 -24
openadapt_ml/grounding/detector.py +18 -14
openadapt_ml/ingest/__init__.py +8 -6
openadapt_ml/ingest/capture.py +25 -22
openadapt_ml/ingest/loader.py +7 -4
openadapt_ml/ingest/synthetic.py +189 -100
openadapt_ml/models/api_adapter.py +14 -4
openadapt_ml/models/base_adapter.py +10 -2
openadapt_ml/models/providers/__init__.py +288 -0
openadapt_ml/models/providers/anthropic.py +266 -0
openadapt_ml/models/providers/base.py +299 -0
openadapt_ml/models/providers/google.py +376 -0
openadapt_ml/models/providers/openai.py +342 -0
openadapt_ml/models/qwen_vl.py +46 -19
openadapt_ml/perception/__init__.py +35 -0
openadapt_ml/perception/integration.py +399 -0
openadapt_ml/retrieval/demo_retriever.py +50 -24
openadapt_ml/retrieval/embeddings.py +9 -8
openadapt_ml/retrieval/retriever.py +3 -1
openadapt_ml/runtime/__init__.py +50 -0
openadapt_ml/runtime/policy.py +18 -5
openadapt_ml/runtime/safety_gate.py +471 -0
openadapt_ml/schema/__init__.py +9 -0
openadapt_ml/schema/converters.py +74 -27
openadapt_ml/schema/episode.py +31 -18
openadapt_ml/scripts/capture_screenshots.py +530 -0
openadapt_ml/scripts/compare.py +85 -54
openadapt_ml/scripts/demo_policy.py +4 -1
openadapt_ml/scripts/eval_policy.py +15 -9
openadapt_ml/scripts/make_gif.py +1 -1
openadapt_ml/scripts/prepare_synthetic.py +3 -1
openadapt_ml/scripts/train.py +21 -9
openadapt_ml/segmentation/README.md +920 -0
openadapt_ml/segmentation/__init__.py +97 -0
openadapt_ml/segmentation/adapters/__init__.py +5 -0
openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
openadapt_ml/segmentation/annotator.py +610 -0
openadapt_ml/segmentation/cache.py +290 -0
openadapt_ml/segmentation/cli.py +674 -0
openadapt_ml/segmentation/deduplicator.py +656 -0
openadapt_ml/segmentation/frame_describer.py +788 -0
openadapt_ml/segmentation/pipeline.py +340 -0
openadapt_ml/segmentation/schemas.py +622 -0
openadapt_ml/segmentation/segment_extractor.py +634 -0
openadapt_ml/training/azure_ops_viewer.py +1097 -0
openadapt_ml/training/benchmark_viewer.py +52 -41
openadapt_ml/training/shared_ui.py +7 -7
openadapt_ml/training/stub_provider.py +57 -35
openadapt_ml/training/trainer.py +143 -86
openadapt_ml/training/trl_trainer.py +70 -21
openadapt_ml/training/viewer.py +323 -108
openadapt_ml/training/viewer_components.py +180 -0
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
openadapt_ml-0.2.1.dist-info/RECORD +116 -0
openadapt_ml/benchmarks/base.py +0 -366
openadapt_ml/benchmarks/data_collection.py +0 -432
openadapt_ml/benchmarks/live_tracker.py +0 -180
openadapt_ml/benchmarks/runner.py +0 -418
openadapt_ml/benchmarks/waa.py +0 -761
openadapt_ml/benchmarks/waa_live.py +0 -619
openadapt_ml-0.2.0.dist-info/RECORD +0 -86
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0

openadapt_ml/training/benchmark_viewer.py CHANGED Viewed

@@ -12,7 +12,7 @@ from pathlib import Path
 def _get_background_tasks_panel_css() -> str:
     """Return CSS for background tasks panel."""
-    return '''
+    return """
         .tasks-panel {
             background: linear-gradient(135deg, rgba(100, 100, 255, 0.1) 0%, rgba(100, 100, 255, 0.05) 100%);
             border: 1px solid rgba(100, 100, 255, 0.3);
@@ -287,12 +287,12 @@ def _get_background_tasks_panel_css() -> str:
             border-radius: 3px;
             transition: width 0.5s ease;
         }
-    '''
+    """
 def _get_background_tasks_panel_html() -> str:
     """Return HTML for background tasks panel with JS polling and improved styling."""
-    return '''
+    return """
     <div class="tasks-panel" id="tasks-panel">
         <div class="tasks-header">
             <div class="tasks-title">
@@ -701,12 +701,12 @@ def _get_background_tasks_panel_html() -> str:
         fetchBackgroundTasks();
         setInterval(fetchBackgroundTasks, 10000);
     </script>
-    '''
+    """
 def _get_live_evaluation_panel_css() -> str:
     """Return CSS for live evaluation progress panel."""
-    return '''
+    return """
         .live-eval-panel {
             background: linear-gradient(135deg, rgba(139, 92, 246, 0.15) 0%, rgba(139, 92, 246, 0.05) 100%);
             border: 1px solid rgba(139, 92, 246, 0.3);
@@ -859,12 +859,12 @@ def _get_live_evaluation_panel_css() -> str:
             0%, 100% { opacity: 1; }
             50% { opacity: 0.3; }
         }
-    '''
+    """
 def _get_live_evaluation_panel_html() -> str:
     """Return HTML for live evaluation panel with SSE and polling fallback."""
-    return '''
+    return """
     <div class="live-eval-panel" id="live-eval-panel">
         <div class="live-eval-header">
             <div class="live-eval-title">
@@ -1237,12 +1237,12 @@ def _get_live_evaluation_panel_html() -> str:
             if (window.sseManager) window.sseManager.disconnect();
         });
     </script>
-    '''
+    """
 def _get_azure_jobs_panel_css() -> str:
     """Return CSS for the Azure jobs status panel with color-coded status indicators."""
-    return '''
+    return """
         .azure-jobs-panel {
             background: linear-gradient(135deg, rgba(0, 120, 212, 0.15) 0%, rgba(0, 120, 212, 0.05) 100%);
             border: 1px solid rgba(0, 120, 212, 0.3);
@@ -1535,7 +1535,7 @@ def _get_azure_jobs_panel_css() -> str:
         @keyframes spin {
             to { transform: rotate(360deg); }
         }
-    '''
+    """
 def _get_azure_jobs_panel_html() -> str:
@@ -1545,7 +1545,7 @@ def _get_azure_jobs_panel_html() -> str:
     is used for training jobs, not for WAA benchmarks (which require nested virtualization
     that managed compute doesn't support).
     """
-    return '''
+    return """
     <div class="azure-jobs-panel collapsed" id="azure-jobs-panel">
         <div class="azure-jobs-header" onclick="toggleAzureJobsPanel()" title="Azure ML training jobs">
             <div class="azure-jobs-title">
@@ -1928,12 +1928,12 @@ def _get_azure_jobs_panel_html() -> str:
         setInterval(fetchAzureJobs, 30000);
         setInterval(fetchJobLogs, 5000);  // Poll logs every 5 seconds
     </script>
-    '''
+    """
 def _get_vm_discovery_panel_css() -> str:
     """Return CSS for VM Discovery panel with prominent VNC button."""
-    return '''
+    return """
         .vm-discovery-panel {
             background: linear-gradient(135deg, rgba(16, 185, 129, 0.15) 0%, rgba(5, 150, 105, 0.05) 100%);
             border: 1px solid rgba(16, 185, 129, 0.3);
@@ -2262,12 +2262,12 @@ def _get_vm_discovery_panel_css() -> str:
             cursor: pointer;
             font-size: 0.85rem;
         }
-    '''
+    """
 def _get_vm_discovery_panel_html() -> str:
     """Return HTML for VM Discovery panel with prominent VNC button and loading states."""
-    return '''
+    return """
     <div class="vm-discovery-panel" id="vm-discovery-panel">
         <div class="vm-discovery-header">
             <div class="vm-discovery-title">
@@ -2571,12 +2571,12 @@ def _get_vm_discovery_panel_html() -> str:
         fetchVMs();
         setInterval(fetchVMs, 10000);
     </script>
-    '''
+    """
 def _get_run_benchmark_panel_css() -> str:
     """Return CSS for the Run Benchmark configuration panel."""
-    return '''
+    return """
         .run-benchmark-panel {
             background: linear-gradient(135deg, rgba(16, 185, 129, 0.1) 0%, rgba(16, 185, 129, 0.05) 100%);
             border: 1px solid rgba(16, 185, 129, 0.3);
@@ -2742,12 +2742,12 @@ def _get_run_benchmark_panel_css() -> str:
             color: #6ee7b7;
             border: 1px solid rgba(16, 185, 129, 0.3);
         }
-    '''
+    """
 def _get_run_benchmark_panel_html() -> str:
     """Return HTML for the Run Benchmark configuration panel."""
-    return '''
+    return """
     <div class="run-benchmark-panel" id="run-benchmark-panel">
         <div class="run-benchmark-header">
             <div class="run-benchmark-title">
@@ -2828,7 +2828,7 @@ def _get_run_benchmark_panel_html() -> str:
         <div class="run-benchmark-status" id="run-benchmark-status"></div>
     </div>
-    '''
+    """
 def _get_run_benchmark_panel_js(include_script_tags: bool = True) -> str:
@@ -2838,7 +2838,7 @@ def _get_run_benchmark_panel_js(include_script_tags: bool = True) -> str:
         include_script_tags: If True, wrap JS in <script> tags. Set to False when
             inserting into an existing script block.
     """
-    js_code = '''
+    js_code = """
         // Handle model dropdown change to show/hide custom input
         function handleModelChange() {
             const select = document.getElementById('benchmark-model');
@@ -2958,9 +2958,9 @@ def _get_run_benchmark_panel_js(include_script_tags: bool = True) -> str:
         document.addEventListener('DOMContentLoaded', function() {
             updateTaskSelectionState();
         });
-    '''
+    """
     if include_script_tags:
-        return f'<script>{js_code}</script>'
+        return f"<script>{js_code}</script>"
     return js_code
@@ -3045,7 +3045,10 @@ def generate_benchmark_viewer(
             task_results.append(task_result)
     # Import shared header components from trainer
-    from openadapt_ml.training.trainer import _get_shared_header_css, _generate_shared_header_html
+    from openadapt_ml.training.trainer import (
+        _get_shared_header_css,
+        _generate_shared_header_html,
+    )
     # Generate HTML
     html = _generate_benchmark_viewer_html(
@@ -3127,21 +3130,26 @@ def generate_multi_run_benchmark_viewer(
                 }
                 task_results.append(task_result)
-        all_runs.append({
-            "run_name": metadata.get("run_name", benchmark_dir.name),
-            "model_id": metadata.get("model_id", "unknown"),
-            "created_at": metadata.get("created_at", ""),
-            "benchmark_name": metadata.get("benchmark_name", ""),
-            "dir_name": benchmark_dir.name,  # For screenshot paths
-            "summary": summary,
-            "tasks": task_results,
-        })
+        all_runs.append(
+            {
+                "run_name": metadata.get("run_name", benchmark_dir.name),
+                "model_id": metadata.get("model_id", "unknown"),
+                "created_at": metadata.get("created_at", ""),
+                "benchmark_name": metadata.get("benchmark_name", ""),
+                "dir_name": benchmark_dir.name,  # For screenshot paths
+                "summary": summary,
+                "tasks": task_results,
+            }
+        )
     if not all_runs:
         return generate_empty_benchmark_viewer(output_path)
     # Import shared header components from trainer
-    from openadapt_ml.training.trainer import _get_shared_header_css, _generate_shared_header_html
+    from openadapt_ml.training.trainer import (
+        _get_shared_header_css,
+        _generate_shared_header_html,
+    )
     # Generate HTML
     html = _generate_multi_run_benchmark_viewer_html(
@@ -3167,7 +3175,10 @@ def generate_empty_benchmark_viewer(output_path: Path | str) -> Path:
     output_path = Path(output_path)
     # Import shared header components from trainer
-    from openadapt_ml.training.trainer import _get_shared_header_css, _generate_shared_header_html
+    from openadapt_ml.training.trainer import (
+        _get_shared_header_css,
+        _generate_shared_header_html,
+    )
     shared_header_css = _get_shared_header_css()
     shared_header_html = _generate_shared_header_html("benchmarks")
@@ -3182,7 +3193,7 @@ def generate_empty_benchmark_viewer(output_path: Path | str) -> Path:
     vm_discovery_css = _get_vm_discovery_panel_css()
     vm_discovery_html = _get_vm_discovery_panel_html()
-    html = f'''<!DOCTYPE html>
+    html = f"""<!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
@@ -3331,7 +3342,7 @@ uv run python -m openadapt_ml.benchmarks.cli run-azure --workers 4</code>
         </div>
     </div>
 </body>
-</html>'''
+</html>"""
     output_path.write_text(html)
     return output_path
@@ -3368,7 +3379,7 @@ def _generate_benchmark_viewer_html(
     domains_json = json.dumps(domains)
     # Generate HTML
-    html = f'''<!DOCTYPE html>
+    html = f"""<!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
@@ -3987,7 +3998,7 @@ def _generate_benchmark_viewer_html(
         init();
     </script>
 </body>
-</html>'''
+</html>"""
     return html
@@ -4039,7 +4050,7 @@ def _generate_multi_run_benchmark_viewer_html(
     run_options_html = "\n".join(run_options)
     # Generate HTML
-    html = f'''<!DOCTYPE html>
+    html = f"""<!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
@@ -4758,6 +4769,6 @@ def _generate_multi_run_benchmark_viewer_html(
         init();
     </script>
 </body>
-</html>'''
+</html>"""
     return html

openadapt_ml/training/shared_ui.py CHANGED Viewed

@@ -13,7 +13,7 @@ def get_shared_header_css() -> str:
     This CSS is used by both the Training Dashboard and the Viewer.
     Any changes here will affect all dashboards consistently.
     """
-    return '''
+    return """
     .unified-header {
         display: flex;
         align-items: center;
@@ -101,7 +101,7 @@ def get_shared_header_css() -> str:
         color: var(--text-muted);
         font-family: "SF Mono", Monaco, monospace;
     }
-    '''
+    """
 def generate_shared_header_html(
@@ -125,14 +125,14 @@ def generate_shared_header_html(
     controls_section = ""
     if controls_html or meta_html:
-        controls_section = f'''
+        controls_section = f"""
         <div class="controls-section">
             {controls_html}
-            {f'<span class="header-meta">{meta_html}</span>' if meta_html else ''}
+            {f'<span class="header-meta">{meta_html}</span>' if meta_html else ""}
         </div>
-        '''
+        """
-    return f'''
+    return f"""
     <div class="unified-header">
         <div class="nav-tabs">
             <a href="dashboard.html" class="nav-tab {training_active}">Training</a>
@@ -141,7 +141,7 @@ def generate_shared_header_html(
         </div>
         {controls_section}
     </div>
-    '''
+    """
 def build_nav_links() -> list[tuple[str, str]]:

openadapt_ml/training/stub_provider.py CHANGED Viewed

@@ -2,7 +2,6 @@
 import json
 import random
-import sys
 import time
 from datetime import datetime
 from pathlib import Path
@@ -92,13 +91,15 @@ class StubTrainingProvider:
         elapsed = time.time() - self.start_time
-        self.losses.append({
-            "epoch": self.current_epoch,
-            "step": self.current_step + 1,
-            "loss": loss,
-            "lr": 5e-5,
-            "time": elapsed,
-        })
+        self.losses.append(
+            {
+                "epoch": self.current_epoch,
+                "step": self.current_step + 1,
+                "loss": loss,
+                "lr": 5e-5,
+                "time": elapsed,
+            }
+        )
         self.current_step += 1
@@ -123,32 +124,37 @@ class StubTrainingProvider:
         if not sample_path.exists():
             # Try to copy from common capture location
             import shutil
-            capture_screenshots = Path.home() / "oa/src/openadapt-capture/turn-off-nightshift/screenshots"
+            capture_screenshots = (
+                Path.home() / "oa/src/openadapt-capture/turn-off-nightshift/screenshots"
+            )
             if capture_screenshots.exists():
                 sample_path.parent.mkdir(parents=True, exist_ok=True)
                 for img in capture_screenshots.glob("*.png"):
                     shutil.copy(img, sample_path)
                     break  # Just copy the first one
-        self.evaluations.append({
-            "epoch": self.current_epoch,
-            "sample_idx": 7,  # Match the real training sample
-            "image_path": "screenshots/sample.png",
-            "human_action": {
-                "type": "click",
-                "x": 0.65,
-                "y": 0.65,
-                "text": None,
-            },
-            "predicted_action": {
-                "type": "click",
-                "x": 0.65 + random.uniform(-0.15, 0.15) * (1 - accuracy_boost),
-                "y": 0.65 + random.uniform(-0.15, 0.15) * (1 - accuracy_boost),
-                "raw_output": f"Thought: [Stub] Epoch {self.current_epoch} - analyzing screenshot to find target element. The model is learning to identify UI components.\nAction: CLICK(x=0.65, y=0.65)",
-            },
-            "distance": random.uniform(0.05, 0.2) * (1 - accuracy_boost),
-            "correct": random.random() > (0.5 - accuracy_boost),
-        })
+        self.evaluations.append(
+            {
+                "epoch": self.current_epoch,
+                "sample_idx": 7,  # Match the real training sample
+                "image_path": "screenshots/sample.png",
+                "human_action": {
+                    "type": "click",
+                    "x": 0.65,
+                    "y": 0.65,
+                    "text": None,
+                },
+                "predicted_action": {
+                    "type": "click",
+                    "x": 0.65 + random.uniform(-0.15, 0.15) * (1 - accuracy_boost),
+                    "y": 0.65 + random.uniform(-0.15, 0.15) * (1 - accuracy_boost),
+                    "raw_output": f"Thought: [Stub] Epoch {self.current_epoch} - analyzing screenshot to find target element. The model is learning to identify UI components.\nAction: CLICK(x=0.65, y=0.65)",
+                },
+                "distance": random.uniform(0.05, 0.2) * (1 - accuracy_boost),
+                "correct": random.random() > (0.5 - accuracy_boost),
+            }
+        )
     def get_status(self) -> dict:
         """Return current training status.
@@ -161,7 +167,11 @@ class StubTrainingProvider:
         # Determine status
         if self.termination_status:
-            status = "completed" if self.termination_status == "auto_complete" else self.termination_status
+            status = (
+                "completed"
+                if self.termination_status == "auto_complete"
+                else self.termination_status
+            )
         elif self.is_complete():
             status = "completed"
         else:
@@ -215,11 +225,17 @@ class StubTrainingProvider:
         Args:
             callback: Optional function called after each step with status dict
         """
-        self._log(f"[Stub] Starting simulated training: {self.epochs} epochs, {self.steps_per_epoch} steps/epoch")
+        self._log(
+            f"[Stub] Starting simulated training: {self.epochs} epochs, {self.steps_per_epoch} steps/epoch"
+        )
         self._log(f"[Stub] Output: {self.output_dir}")
-        self._log(f"[Stub] Step delay: {self.step_delay}s (total ~{self.epochs * self.steps_per_epoch * self.step_delay:.0f}s)")
+        self._log(
+            f"[Stub] Step delay: {self.step_delay}s (total ~{self.epochs * self.steps_per_epoch * self.step_delay:.0f}s)"
+        )
         if self.early_stop_loss > 0:
-            self._log(f"[Stub] Early stop: loss < {self.early_stop_loss} for {self.early_stop_patience} steps")
+            self._log(
+                f"[Stub] Early stop: loss < {self.early_stop_loss} for {self.early_stop_patience} steps"
+            )
         self._log("")
         while not self.is_complete():
@@ -239,9 +255,13 @@ class StubTrainingProvider:
             if self.early_stop_loss > 0 and loss < self.early_stop_loss:
                 self.consecutive_low_loss += 1
                 if self.consecutive_low_loss >= self.early_stop_patience:
-                    self._log(f"\n[Stub] Auto-stopped: loss ({loss:.4f}) < {self.early_stop_loss} for {self.early_stop_patience} steps")
+                    self._log(
+                        f"\n[Stub] Auto-stopped: loss ({loss:.4f}) < {self.early_stop_loss} for {self.early_stop_patience} steps"
+                    )
                     self.termination_status = "auto_low_loss"
-                    self.termination_message = f"Loss reached {loss:.4f} (< {self.early_stop_loss})"
+                    self.termination_message = (
+                        f"Loss reached {loss:.4f} (< {self.early_stop_loss})"
+                    )
                     self.write_status()
                     break
             else:
@@ -253,7 +273,9 @@ class StubTrainingProvider:
             epoch = status["epoch"]
             step = status["step"]
             display_epoch = min(epoch + 1, self.epochs)  # Cap at max for display
-            self._log(f"  Epoch {display_epoch}/{self.epochs} | Step {step} | Loss: {loss:.4f}")
+            self._log(
+                f"  Epoch {display_epoch}/{self.epochs} | Step {step} | Loss: {loss:.4f}"
+            )
             if callback:
                 callback(status)

openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

openadapt-ml 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl