PyPI - openadapt-ml - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

openadapt-ml 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

openadapt_ml/baselines/__init__.py +121 -0
openadapt_ml/baselines/adapter.py +185 -0
openadapt_ml/baselines/cli.py +314 -0
openadapt_ml/baselines/config.py +448 -0
openadapt_ml/baselines/parser.py +922 -0
openadapt_ml/baselines/prompts.py +787 -0
openadapt_ml/benchmarks/__init__.py +13 -115
openadapt_ml/benchmarks/agent.py +265 -421
openadapt_ml/benchmarks/azure.py +28 -19
openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
openadapt_ml/benchmarks/cli.py +1722 -4847
openadapt_ml/benchmarks/trace_export.py +631 -0
openadapt_ml/benchmarks/viewer.py +22 -5
openadapt_ml/benchmarks/vm_monitor.py +530 -29
openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
openadapt_ml/cloud/azure_inference.py +3 -5
openadapt_ml/cloud/lambda_labs.py +722 -307
openadapt_ml/cloud/local.py +2038 -487
openadapt_ml/cloud/ssh_tunnel.py +68 -26
openadapt_ml/datasets/next_action.py +40 -30
openadapt_ml/evals/grounding.py +8 -3
openadapt_ml/evals/plot_eval_metrics.py +15 -13
openadapt_ml/evals/trajectory_matching.py +41 -26
openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
openadapt_ml/experiments/representation_shootout/config.py +390 -0
openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
openadapt_ml/experiments/representation_shootout/runner.py +687 -0
openadapt_ml/experiments/waa_demo/runner.py +29 -14
openadapt_ml/export/parquet.py +36 -24
openadapt_ml/grounding/detector.py +18 -14
openadapt_ml/ingest/__init__.py +8 -6
openadapt_ml/ingest/capture.py +25 -22
openadapt_ml/ingest/loader.py +7 -4
openadapt_ml/ingest/synthetic.py +189 -100
openadapt_ml/models/api_adapter.py +14 -4
openadapt_ml/models/base_adapter.py +10 -2
openadapt_ml/models/providers/__init__.py +288 -0
openadapt_ml/models/providers/anthropic.py +266 -0
openadapt_ml/models/providers/base.py +299 -0
openadapt_ml/models/providers/google.py +376 -0
openadapt_ml/models/providers/openai.py +342 -0
openadapt_ml/models/qwen_vl.py +46 -19
openadapt_ml/perception/__init__.py +35 -0
openadapt_ml/perception/integration.py +399 -0
openadapt_ml/retrieval/demo_retriever.py +50 -24
openadapt_ml/retrieval/embeddings.py +9 -8
openadapt_ml/retrieval/retriever.py +3 -1
openadapt_ml/runtime/__init__.py +50 -0
openadapt_ml/runtime/policy.py +18 -5
openadapt_ml/runtime/safety_gate.py +471 -0
openadapt_ml/schema/__init__.py +9 -0
openadapt_ml/schema/converters.py +74 -27
openadapt_ml/schema/episode.py +31 -18
openadapt_ml/scripts/capture_screenshots.py +530 -0
openadapt_ml/scripts/compare.py +85 -54
openadapt_ml/scripts/demo_policy.py +4 -1
openadapt_ml/scripts/eval_policy.py +15 -9
openadapt_ml/scripts/make_gif.py +1 -1
openadapt_ml/scripts/prepare_synthetic.py +3 -1
openadapt_ml/scripts/train.py +21 -9
openadapt_ml/segmentation/README.md +920 -0
openadapt_ml/segmentation/__init__.py +97 -0
openadapt_ml/segmentation/adapters/__init__.py +5 -0
openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
openadapt_ml/segmentation/annotator.py +610 -0
openadapt_ml/segmentation/cache.py +290 -0
openadapt_ml/segmentation/cli.py +674 -0
openadapt_ml/segmentation/deduplicator.py +656 -0
openadapt_ml/segmentation/frame_describer.py +788 -0
openadapt_ml/segmentation/pipeline.py +340 -0
openadapt_ml/segmentation/schemas.py +622 -0
openadapt_ml/segmentation/segment_extractor.py +634 -0
openadapt_ml/training/azure_ops_viewer.py +1097 -0
openadapt_ml/training/benchmark_viewer.py +52 -41
openadapt_ml/training/shared_ui.py +7 -7
openadapt_ml/training/stub_provider.py +57 -35
openadapt_ml/training/trainer.py +143 -86
openadapt_ml/training/trl_trainer.py +70 -21
openadapt_ml/training/viewer.py +323 -108
openadapt_ml/training/viewer_components.py +180 -0
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
openadapt_ml-0.2.1.dist-info/RECORD +116 -0
openadapt_ml/benchmarks/base.py +0 -366
openadapt_ml/benchmarks/data_collection.py +0 -432
openadapt_ml/benchmarks/live_tracker.py +0 -180
openadapt_ml/benchmarks/runner.py +0 -418
openadapt_ml/benchmarks/waa.py +0 -761
openadapt_ml/benchmarks/waa_live.py +0 -619
openadapt_ml-0.2.0.dist-info/RECORD +0 -86
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0

openadapt_ml/scripts/compare.py CHANGED Viewed

@@ -17,9 +17,12 @@ from pathlib import Path
 from typing import Any
 from openadapt_ml.ingest.capture import capture_to_episode
-from openadapt_ml.schema import Episode, Step, ActionType
+from openadapt_ml.schema import Episode, ActionType
 from openadapt_ml.datasets.next_action import SYSTEM_PROMPT, format_action
-from openadapt_ml.training.trainer import _get_shared_header_css, _generate_shared_header_html
+from openadapt_ml.training.trainer import (
+    _get_shared_header_css,
+    _generate_shared_header_html,
+)
 def load_model(checkpoint_path: str | None, config_path: str | None = None):
@@ -50,6 +53,7 @@ def load_model(checkpoint_path: str | None, config_path: str | None = None):
     except Exception as e:
         print(f"Warning: Could not load model: {e}")
         import traceback
         traceback.print_exc()
         return None
@@ -79,7 +83,9 @@ def predict_action(
                 history_text += f"  {i}. {action_text}\n"
             history_text += f"\nThis is step {step_index + 1} of {total_steps}. "
         else:
-            history_text = f"This is step 1 of {total_steps} (no actions completed yet). "
+            history_text = (
+                f"This is step 1 of {total_steps} (no actions completed yet). "
+            )
         # Match training prompt format exactly
         user_content = (
@@ -87,7 +93,7 @@ def predict_action(
             f"{history_text}"
             "Look at the screenshot and determine the NEXT action.\n\n"
             "Thought: [what element to interact with and why]\n"
-            "Action: [CLICK(x=..., y=...) or TYPE(text=\"...\") or WAIT() or DONE()]"
+            'Action: [CLICK(x=..., y=...) or TYPE(text="...") or WAIT() or DONE()]'
         )
         # Build sample in the format expected by the adapter
@@ -107,14 +113,20 @@ def predict_action(
         # Try to extract coordinates from output
         # Match patterns like: CLICK(x=0.42, y=0.31) or click at (0.42, 0.31)
-        click_match = re.search(r'CLICK\s*\(\s*x\s*=\s*([\d.]+)\s*,\s*y\s*=\s*([\d.]+)\s*\)', result, re.IGNORECASE)
+        click_match = re.search(
+            r"CLICK\s*\(\s*x\s*=\s*([\d.]+)\s*,\s*y\s*=\s*([\d.]+)\s*\)",
+            result,
+            re.IGNORECASE,
+        )
         if not click_match:
-            click_match = re.search(r'click.*?\(\s*([\d.]+)\s*,\s*([\d.]+)\s*\)', result, re.IGNORECASE)
+            click_match = re.search(
+                r"click.*?\(\s*([\d.]+)\s*,\s*([\d.]+)\s*\)", result, re.IGNORECASE
+            )
         if not click_match:
             # Try to find any two decimal numbers
-            nums = re.findall(r'(0\.\d+)', result)
+            nums = re.findall(r"(0\.\d+)", result)
             if len(nums) >= 2:
-                click_match = type('Match', (), {'group': lambda s, i: nums[i-1]})()
+                click_match = type("Match", (), {"group": lambda s, i: nums[i - 1]})()
         if click_match:
             action["x"] = float(click_match.group(1))
@@ -124,6 +136,7 @@ def predict_action(
         return action
     except Exception as e:
         import traceback
         traceback.print_exc()
         return {"type": "error", "error": str(e)}
@@ -145,7 +158,11 @@ def generate_comparison_data(
         action_x, action_y = None, None
         if step.action.normalized_coordinates:
             action_x, action_y = step.action.normalized_coordinates
-        action_type_str = step.action.type.value if isinstance(step.action.type, ActionType) else step.action.type
+        action_type_str = (
+            step.action.type.value
+            if isinstance(step.action.type, ActionType)
+            else step.action.type
+        )
         step_data = {
             "index": i,
             "time": step.step_index,
@@ -204,7 +221,7 @@ def generate_comparison_html(
         comparison_json = json.dumps(comparison_data)
         # Add comparison panel above screenshot in main content
-        comparison_panel = '''
+        comparison_panel = """
         <div class="comparison-panel" id="comparison-panel">
             <div class="comparison-header">
                 <h2>Action Comparison</h2>
@@ -223,9 +240,9 @@ def generate_comparison_html(
                 <div class="match-indicator" id="match-indicator"></div>
             </div>
         </div>
-        '''
+        """
-        comparison_styles = '''
+        comparison_styles = """
         <style>
         /* Navigation bar */
         .nav-bar {
@@ -432,9 +449,9 @@ def generate_comparison_html(
             border-color: var(--accent);
         }
         </style>
-        '''
+        """
-        comparison_script = f'''
+        comparison_script = f"""
         <script>
         // Consolidated viewer script - all variables and functions in one scope
         // Export to window for cross-script access (for checkpoint dropdown script)
@@ -714,32 +731,33 @@ def generate_comparison_html(
             // Note: Nav is now injected via shared header HTML, no need for discoverDashboards()
         }}, 100);
         </script>
-        '''
+        """
         # Insert into HTML
         # Add shared header CSS and comparison styles before </head>
-        shared_header_css = f'<style>{_get_shared_header_css()}</style>'
-        html = base_html.replace('</head>', shared_header_css + comparison_styles + '</head>')
+        shared_header_css = f"<style>{_get_shared_header_css()}</style>"
+        html = base_html.replace(
+            "</head>", shared_header_css + comparison_styles + "</head>"
+        )
         # Add shared header HTML after container div
         shared_header_html = _generate_shared_header_html("viewer")
         html = html.replace(
-            '<div class="container">',
-            '<div class="container">\n' + shared_header_html
+            '<div class="container">', '<div class="container">\n' + shared_header_html
         )
         # Add comparison panel as full-width row BEFORE the main-content/sidebar flex row
         # Insert right BEFORE <div class="main-content"> as a sibling
         html = html.replace(
             '<div class="main-content">',
-            comparison_panel + '\n        <div class="main-content">'
+            comparison_panel + '\n        <div class="main-content">',
         )
         # Add script before </body>
-        html = html.replace('</body>', comparison_script + '</body>')
+        html = html.replace("</body>", comparison_script + "</body>")
         # Write output
-        output_path.write_text(html, encoding='utf-8')
+        output_path.write_text(html, encoding="utf-8")
         print(f"Generated comparison viewer: {output_path}")
     except ImportError:
@@ -752,20 +770,24 @@ def main():
         description="Compare human actions vs model predictions on a capture."
     )
     parser.add_argument(
-        "--capture", "-c",
+        "--capture",
+        "-c",
         required=True,
         help="Path to openadapt-capture recording directory",
     )
     parser.add_argument(
-        "--checkpoint", "-m",
+        "--checkpoint",
+        "-m",
         help="Path to trained model checkpoint (optional)",
     )
     parser.add_argument(
-        "--output", "-o",
+        "--output",
+        "-o",
         help="Output HTML path (default: capture_dir/comparison.html)",
     )
     parser.add_argument(
-        "--goal", "-g",
+        "--goal",
+        "-g",
         help="Task goal/description (auto-detected from capture if not provided)",
     )
     parser.add_argument(
@@ -797,7 +819,7 @@ def main():
         matches = sum(1 for d in comparison_data if d.get("match") is True)
         total = sum(1 for d in comparison_data if d.get("match") is not None)
         if total > 0:
-            print(f"Match rate: {matches}/{total} ({100*matches/total:.1f}%)")
+            print(f"Match rate: {matches}/{total} ({100 * matches / total:.1f}%)")
     # Generate HTML
     output_path = Path(args.output) if args.output else capture_path / "comparison.html"
@@ -806,6 +828,7 @@ def main():
     # Open in browser
     if args.open:
         import webbrowser
         webbrowser.open(f"file://{output_path.absolute()}")
     return 0
@@ -842,11 +865,13 @@ def generate_unified_viewer(
             capture_id = capture_path.name if capture_path else "unknown"
         if available_captures is None:
-            available_captures = [{
-                "id": capture_id,
-                "name": episode.instruction or "Untitled",
-                "steps": len(episode.steps),
-            }]
+            available_captures = [
+                {
+                    "id": capture_id,
+                    "name": episode.instruction or "Untitled",
+                    "steps": len(episode.steps),
+                }
+            ]
         # Prepare base capture data (human actions only, no predictions)
         base_data = []
@@ -855,18 +880,24 @@ def generate_unified_viewer(
             action_x, action_y = None, None
             if step.action.normalized_coordinates:
                 action_x, action_y = step.action.normalized_coordinates
-            action_type_str = step.action.type.value if isinstance(step.action.type, ActionType) else step.action.type
-            base_data.append({
-                "index": i,
-                "time": step.step_index,
-                "image_path": step.observation.screenshot_path,
-                "human_action": {
-                    "type": action_type_str,
-                    "x": action_x,
-                    "y": action_y,
-                    "text": step.action.text,
-                },
-            })
+            action_type_str = (
+                step.action.type.value
+                if isinstance(step.action.type, ActionType)
+                else step.action.type
+            )
+            base_data.append(
+                {
+                    "index": i,
+                    "time": step.step_index,
+                    "image_path": step.observation.screenshot_path,
+                    "human_action": {
+                        "type": action_type_str,
+                        "x": action_x,
+                        "y": action_y,
+                        "text": step.action.text,
+                    },
+                }
+            )
         # JSON encode all data
         base_data_json = json.dumps(base_data)
@@ -875,7 +906,7 @@ def generate_unified_viewer(
         current_capture_json = json.dumps(capture_id)
         # Unified viewer styles and controls
-        unified_styles = '''
+        unified_styles = """
         <style>
         /* Navigation bar */
         .nav-bar {
@@ -1129,10 +1160,10 @@ def generate_unified_viewer(
             border-color: var(--accent);
         }
         </style>
-        '''
+        """
         # Comparison panel HTML
-        comparison_panel = '''
+        comparison_panel = """
         <div class="viewer-controls" id="viewer-controls">
             <div class="control-group">
                 <span class="control-label">Training Example:</span>
@@ -1162,10 +1193,10 @@ def generate_unified_viewer(
                 <div class="match-indicator" id="match-indicator"></div>
             </div>
         </div>
-        '''
+        """
         # Unified viewer script
-        unified_script = f'''
+        unified_script = f"""
         <script>
         // Consolidated unified viewer script - all variables in one scope
         // Data
@@ -1477,18 +1508,18 @@ def generate_unified_viewer(
             updateComparison(currentIndex);
         }}, 100);
         </script>
-        '''
+        """
         # Inject into HTML
-        html = base_html.replace('</head>', unified_styles + '</head>')
+        html = base_html.replace("</head>", unified_styles + "</head>")
         html = html.replace(
             '<div class="main-content">',
-            comparison_panel + '\n        <div class="main-content">'
+            comparison_panel + '\n        <div class="main-content">',
         )
-        html = html.replace('</body>', unified_script + '</body>')
+        html = html.replace("</body>", unified_script + "</body>")
         # Write output
-        output_path.write_text(html, encoding='utf-8')
+        output_path.write_text(html, encoding="utf-8")
         print(f"Generated unified viewer: {output_path}")
     except ImportError:

openadapt_ml/scripts/demo_policy.py CHANGED Viewed

@@ -20,7 +20,9 @@ def main() -> None:
     args = parser.parse_args()
     # Use synthetic data to build one SFT-style sample
-    sessions = generate_synthetic_sessions(num_sessions=1, seed=99, output_dir="synthetic/demo")
+    sessions = generate_synthetic_sessions(
+        num_sessions=1, seed=99, output_dir="synthetic/demo"
+    )
     episodes = [ep for sess in sessions for ep in sess.episodes]
     samples = build_next_action_sft_samples(episodes)
@@ -58,5 +60,6 @@ def main() -> None:
     print("State:", state)
     print("Raw output:", raw_text)
 if __name__ == "__main__":
     main()

openadapt_ml/scripts/eval_policy.py CHANGED Viewed

@@ -3,11 +3,11 @@ from __future__ import annotations
 import argparse
 import json
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, Optional
 import yaml
-from openadapt_ml.datasets.next_action import build_next_action_sft_samples, parse_action_som
+from openadapt_ml.datasets.next_action import build_next_action_sft_samples
 from openadapt_ml.evals.trajectory_matching import evaluate_policy_on_episodes
 from openadapt_ml.ingest.synthetic import generate_synthetic_episodes
 from openadapt_ml.models.dummy_adapter import DummyAdapter
@@ -199,7 +199,9 @@ def main(
                 "mean_episode_step_score": metrics.mean_episode_step_score,
                 "weak_episode_success_rate": metrics.weak_episode_success_rate,
                 "state_success_rate": metrics.state_success_rate,
-                "element_accuracy": metrics.element_accuracy if hasattr(metrics, 'element_accuracy') else None,
+                "element_accuracy": metrics.element_accuracy
+                if hasattr(metrics, "element_accuracy")
+                else None,
             },
         }
         out_path = Path(output_json)
@@ -210,8 +212,12 @@ def main(
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Evaluate a policy on synthetic episodes.")
-    parser.add_argument("--config", type=str, required=True, help="Path to YAML config file.")
+    parser = argparse.ArgumentParser(
+        description="Evaluate a policy on synthetic episodes."
+    )
+    parser.add_argument(
+        "--config", type=str, required=True, help="Path to YAML config file."
+    )
     parser.add_argument(
         "--backend",
         type=str,
@@ -248,19 +254,19 @@ if __name__ == "__main__":
         choices=["coord", "som"],
         default="coord",
         help="DSL mode: 'coord' for coordinate-based (CLICK(x=..., y=...)), "
-             "'som' for Set-of-Marks index-based (CLICK([1])). Default: coord.",
+        "'som' for Set-of-Marks index-based (CLICK([1])). Default: coord.",
     )
     parser.add_argument(
         "--overfit",
         action="store_true",
         help="Evaluate on training data to check memorization/overfitting. "
-             "If not set, generates fresh data to test generalization.",
+        "If not set, generates fresh data to test generalization.",
     )
     parser.add_argument(
         "--no-jitter",
         action="store_true",
         help="Disable jitter for deterministic UI layouts. "
-             "Useful for testing memorization of fixed layouts.",
+        "Useful for testing memorization of fixed layouts.",
     )
     parser.add_argument(
         "--scenario",
@@ -268,7 +274,7 @@ if __name__ == "__main__":
         choices=["login", "registration"],
         default=None,
         help="Scenario type: 'login' (6 steps, 3 elements) or 'registration' (12 steps, 6 elements). "
-             "Overrides config if provided.",
+        "Overrides config if provided.",
     )
     args = parser.parse_args()

openadapt_ml/scripts/make_gif.py CHANGED Viewed

@@ -4,7 +4,7 @@ import argparse
 import glob
 import os
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import List, Optional
 from PIL import Image, ImageDraw, ImageFont

openadapt_ml/scripts/prepare_synthetic.py CHANGED Viewed

@@ -8,7 +8,9 @@ from openadapt_ml.ingest.synthetic import generate_synthetic_episodes
 def main() -> None:
     output_dir = Path("synthetic") / "debug"
-    episodes = generate_synthetic_episodes(num_episodes=2, seed=42, output_dir=output_dir)
+    episodes = generate_synthetic_episodes(
+        num_episodes=2, seed=42, output_dir=output_dir
+    )
     print(f"Generated {len(episodes)} episodes into {output_dir.resolve()}")

openadapt_ml/scripts/train.py CHANGED Viewed

@@ -126,6 +126,7 @@ def main(
     # Disable Unsloth if requested
     if not use_unsloth:
         import os
         os.environ["OPENADAPT_DISABLE_UNSLOTH"] = "1"
     base_path = Path(capture_path).parent if capture_path else None
@@ -142,6 +143,7 @@ def main(
     # Open dashboard in browser if requested
     if open_dashboard:
         import webbrowser
         dashboard_path = Path(output_dir) / "dashboard.html"
         if dashboard_path.exists():
             webbrowser.open(f"file://{dashboard_path.absolute()}")
@@ -153,22 +155,32 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Train Qwen-VL adapter on synthetic data or openadapt-capture recordings."
     )
-    parser.add_argument("--config", type=str, required=True, help="Path to YAML config file.")
-    parser.add_argument("--capture", type=str, help="Path to openadapt-capture recording directory.")
-    parser.add_argument("--goal", type=str, help="Task goal/description (overrides recording's task description).")
-    parser.add_argument("--output-dir", type=str, help="Output directory for logs and dashboard.")
-    parser.add_argument("--open", action="store_true", help="Open training dashboard in browser.")
+    parser.add_argument(
+        "--config", type=str, required=True, help="Path to YAML config file."
+    )
+    parser.add_argument(
+        "--capture", type=str, help="Path to openadapt-capture recording directory."
+    )
+    parser.add_argument(
+        "--goal",
+        type=str,
+        help="Task goal/description (overrides recording's task description).",
+    )
+    parser.add_argument(
+        "--output-dir", type=str, help="Output directory for logs and dashboard."
+    )
+    parser.add_argument(
+        "--open", action="store_true", help="Open training dashboard in browser."
+    )
     parser.add_argument(
         "--use-unsloth",
         action="store_true",
         default=True,
-        help="Enable Unsloth optimizations (default)."
+        help="Enable Unsloth optimizations (default).",
     )
     parser.add_argument(
-        "--no-unsloth",
-        action="store_true",
-        help="Disable Unsloth optimizations."
+        "--no-unsloth", action="store_true", help="Disable Unsloth optimizations."
     )
     args = parser.parse_args()

openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

openadapt-ml 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl