PyPI - openadapt-ml - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

openadapt-ml 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

openadapt_ml/baselines/__init__.py +121 -0
openadapt_ml/baselines/adapter.py +185 -0
openadapt_ml/baselines/cli.py +314 -0
openadapt_ml/baselines/config.py +448 -0
openadapt_ml/baselines/parser.py +922 -0
openadapt_ml/baselines/prompts.py +787 -0
openadapt_ml/benchmarks/__init__.py +13 -115
openadapt_ml/benchmarks/agent.py +265 -421
openadapt_ml/benchmarks/azure.py +28 -19
openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
openadapt_ml/benchmarks/cli.py +1722 -4847
openadapt_ml/benchmarks/trace_export.py +631 -0
openadapt_ml/benchmarks/viewer.py +22 -5
openadapt_ml/benchmarks/vm_monitor.py +530 -29
openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
openadapt_ml/cloud/azure_inference.py +3 -5
openadapt_ml/cloud/lambda_labs.py +722 -307
openadapt_ml/cloud/local.py +2038 -487
openadapt_ml/cloud/ssh_tunnel.py +68 -26
openadapt_ml/datasets/next_action.py +40 -30
openadapt_ml/evals/grounding.py +8 -3
openadapt_ml/evals/plot_eval_metrics.py +15 -13
openadapt_ml/evals/trajectory_matching.py +41 -26
openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
openadapt_ml/experiments/representation_shootout/config.py +390 -0
openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
openadapt_ml/experiments/representation_shootout/runner.py +687 -0
openadapt_ml/experiments/waa_demo/runner.py +29 -14
openadapt_ml/export/parquet.py +36 -24
openadapt_ml/grounding/detector.py +18 -14
openadapt_ml/ingest/__init__.py +8 -6
openadapt_ml/ingest/capture.py +25 -22
openadapt_ml/ingest/loader.py +7 -4
openadapt_ml/ingest/synthetic.py +189 -100
openadapt_ml/models/api_adapter.py +14 -4
openadapt_ml/models/base_adapter.py +10 -2
openadapt_ml/models/providers/__init__.py +288 -0
openadapt_ml/models/providers/anthropic.py +266 -0
openadapt_ml/models/providers/base.py +299 -0
openadapt_ml/models/providers/google.py +376 -0
openadapt_ml/models/providers/openai.py +342 -0
openadapt_ml/models/qwen_vl.py +46 -19
openadapt_ml/perception/__init__.py +35 -0
openadapt_ml/perception/integration.py +399 -0
openadapt_ml/retrieval/demo_retriever.py +50 -24
openadapt_ml/retrieval/embeddings.py +9 -8
openadapt_ml/retrieval/retriever.py +3 -1
openadapt_ml/runtime/__init__.py +50 -0
openadapt_ml/runtime/policy.py +18 -5
openadapt_ml/runtime/safety_gate.py +471 -0
openadapt_ml/schema/__init__.py +9 -0
openadapt_ml/schema/converters.py +74 -27
openadapt_ml/schema/episode.py +31 -18
openadapt_ml/scripts/capture_screenshots.py +530 -0
openadapt_ml/scripts/compare.py +85 -54
openadapt_ml/scripts/demo_policy.py +4 -1
openadapt_ml/scripts/eval_policy.py +15 -9
openadapt_ml/scripts/make_gif.py +1 -1
openadapt_ml/scripts/prepare_synthetic.py +3 -1
openadapt_ml/scripts/train.py +21 -9
openadapt_ml/segmentation/README.md +920 -0
openadapt_ml/segmentation/__init__.py +97 -0
openadapt_ml/segmentation/adapters/__init__.py +5 -0
openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
openadapt_ml/segmentation/annotator.py +610 -0
openadapt_ml/segmentation/cache.py +290 -0
openadapt_ml/segmentation/cli.py +674 -0
openadapt_ml/segmentation/deduplicator.py +656 -0
openadapt_ml/segmentation/frame_describer.py +788 -0
openadapt_ml/segmentation/pipeline.py +340 -0
openadapt_ml/segmentation/schemas.py +622 -0
openadapt_ml/segmentation/segment_extractor.py +634 -0
openadapt_ml/training/azure_ops_viewer.py +1097 -0
openadapt_ml/training/benchmark_viewer.py +52 -41
openadapt_ml/training/shared_ui.py +7 -7
openadapt_ml/training/stub_provider.py +57 -35
openadapt_ml/training/trainer.py +143 -86
openadapt_ml/training/trl_trainer.py +70 -21
openadapt_ml/training/viewer.py +323 -108
openadapt_ml/training/viewer_components.py +180 -0
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/METADATA +215 -14
openadapt_ml-0.2.2.dist-info/RECORD +116 -0
openadapt_ml/benchmarks/base.py +0 -366
openadapt_ml/benchmarks/data_collection.py +0 -432
openadapt_ml/benchmarks/live_tracker.py +0 -180
openadapt_ml/benchmarks/runner.py +0 -418
openadapt_ml/benchmarks/waa.py +0 -761
openadapt_ml/benchmarks/waa_live.py +0 -619
openadapt_ml-0.2.0.dist-info/RECORD +0 -86
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/WHEEL +0 -0
{openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/licenses/LICENSE +0 -0

openadapt_ml/experiments/waa_demo/runner.py CHANGED Viewed

@@ -26,7 +26,6 @@ import sys
 from typing import TYPE_CHECKING, Any
 from openadapt_ml.experiments.waa_demo.demos import (
-    DEMOS,
     format_demo_for_prompt,
     get_complete_demos,
     get_demo,
@@ -34,14 +33,16 @@ from openadapt_ml.experiments.waa_demo.demos import (
 )
 from openadapt_ml.experiments.waa_demo.tasks import (
     TASKS,
-    WATask,
-    get_manual_tasks,
     get_recorded_tasks,
     get_task,
 )
 if TYPE_CHECKING:
-    from openadapt_ml.benchmarks.base import BenchmarkAction, BenchmarkObservation, BenchmarkTask
+    from openadapt_evals import (
+        BenchmarkAction,
+        BenchmarkObservation,
+        BenchmarkTask,
+    )
 logger = logging.getLogger(__name__)
@@ -73,7 +74,9 @@ def cmd_list(args: argparse.Namespace) -> int:
     print()
     print("Tasks needing recorded demos on Windows:")
     for task in get_recorded_tasks():
-        print(f"  - #{list(TASKS.keys())[list(TASKS.values()).index(task)]}: {task.instruction}")
+        print(
+            f"  - #{list(TASKS.keys())[list(TASKS.values()).index(task)]}: {task.instruction}"
+        )
     return 0
@@ -122,7 +125,9 @@ def cmd_prompt(args: argparse.Namespace) -> int:
     else:
         print(f"Task: {task.instruction}")
         print()
-        print("Analyze the screenshot and provide the next action to complete this task.")
+        print(
+            "Analyze the screenshot and provide the next action to complete this task."
+        )
         if demo and "[PLACEHOLDER" in demo:
             print()
             print("[Note: Demo not available - this would be zero-shot]")
@@ -208,6 +213,7 @@ Think step by step, then output the action on a new line starting with "ACTION:"
         """Lazily initialize the API adapter."""
         if self._adapter is None:
             from openadapt_ml.models.api_adapter import ApiVLMAdapter
             self._adapter = ApiVLMAdapter(
                 provider=self.provider,
                 api_key=self.api_key,
@@ -261,7 +267,7 @@ Think step by step, then output the action on a new line starting with "ACTION:"
         Returns:
             BenchmarkAction parsed from VLM response
         """
-        from openadapt_ml.benchmarks.base import BenchmarkAction
+        from openadapt_evals import BenchmarkAction
         adapter = self._get_adapter()
@@ -325,7 +331,9 @@ Think step by step, then output the action on a new line starting with "ACTION:"
             history_str = self._format_history(history)
             content_parts.append(f"Previous actions:\n{history_str}")
-        content_parts.append("\nAnalyze the current screenshot and provide the next action.")
+        content_parts.append(
+            "\nAnalyze the current screenshot and provide the next action."
+        )
         sample: dict[str, Any] = {
             "messages": [
@@ -401,7 +409,7 @@ Think step by step, then output the action on a new line starting with "ACTION:"
         Uses the same parsing logic as APIBenchmarkAgent.
         """
         import re
-        from openadapt_ml.benchmarks.base import BenchmarkAction
+        from openadapt_evals import BenchmarkAction
         raw_action = {"response": response}
@@ -457,7 +465,9 @@ Think step by step, then output the action on a new line starting with "ACTION:"
             r"TYPE\s*\(\s*[\"'](.+?)[\"']\s*\)", action_line, re.IGNORECASE
         )
         if type_match:
-            return BenchmarkAction(type="type", text=type_match.group(1), raw_action=raw_action)
+            return BenchmarkAction(
+                type="type", text=type_match.group(1), raw_action=raw_action
+            )
         # Parse KEY
         key_match = re.match(r"KEY\s*\(\s*(.+?)\s*\)", action_line, re.IGNORECASE)
@@ -502,11 +512,12 @@ def cmd_run(args: argparse.Namespace) -> int:
     This integrates with the benchmarks infrastructure to run either
     zero-shot or demo-conditioned evaluation on WAA tasks.
     """
-    from openadapt_ml.benchmarks import (
+    from openadapt_evals import (
+        EvaluationConfig,
         WAAMockAdapter,
         compute_metrics,
+        evaluate_agent_on_benchmark,
     )
-    from openadapt_ml.benchmarks.runner import EvaluationConfig, evaluate_agent_on_benchmark
     print("WAA Demo-Conditioned Experiment Runner")
     print("=" * 80)
@@ -539,7 +550,7 @@ def cmd_run(args: argparse.Namespace) -> int:
         print(f"Running {len(task_ids)} tasks with complete demos")
     # Check for mock mode or real WAA
-    use_mock = getattr(args, 'mock', False)
+    use_mock = getattr(args, "mock", False)
     if use_mock:
         print("Using mock adapter (no Windows required)")
@@ -599,7 +610,11 @@ def cmd_run(args: argparse.Namespace) -> int:
     except Exception as e:
         print(f"Error during evaluation: {e}")
         if "API key" in str(e) or "api_key" in str(e).lower():
-            key_name = "ANTHROPIC_API_KEY" if args.provider == "anthropic" else "OPENAI_API_KEY"
+            key_name = (
+                "ANTHROPIC_API_KEY"
+                if args.provider == "anthropic"
+                else "OPENAI_API_KEY"
+            )
             print(f"\nMake sure {key_name} is set in your environment or .env file.")
         return 1

openadapt_ml/export/parquet.py CHANGED Viewed

@@ -7,7 +7,6 @@ Episode JSON remains the canonical representation.
 from __future__ import annotations
 import json
-from pathlib import Path
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
@@ -73,7 +72,11 @@ def to_parquet(
             # Extract action type value (enum -> string)
             action_type = None
             if step.action:
-                action_type = step.action.type.value if hasattr(step.action.type, 'value') else step.action.type
+                action_type = (
+                    step.action.type.value
+                    if hasattr(step.action.type, "value")
+                    else step.action.type
+                )
             row = {
                 "episode_id": episode.episode_id,
@@ -84,8 +87,12 @@ def to_parquet(
                 "action_type": action_type,
                 "x": x,
                 "y": y,
-                "end_x": step.action.normalized_end[0] if step.action and step.action.normalized_end else None,
-                "end_y": step.action.normalized_end[1] if step.action and step.action.normalized_end else None,
+                "end_x": step.action.normalized_end[0]
+                if step.action and step.action.normalized_end
+                else None,
+                "end_y": step.action.normalized_end[1]
+                if step.action and step.action.normalized_end
+                else None,
                 "text": getattr(step.action, "text", None) if step.action else None,
                 "key": getattr(step.action, "key", None) if step.action else None,
                 "scroll_direction": (
@@ -131,33 +138,37 @@ def _write_summary(episodes: list[Episode], output_path: str) -> None:
     for episode in episodes:
         first_t = episode.steps[0].timestamp if episode.steps else None
         last_t = episode.steps[-1].timestamp if episode.steps else None
-        duration = (last_t - first_t) if first_t is not None and last_t is not None else None
+        duration = (
+            (last_t - first_t) if first_t is not None and last_t is not None else None
+        )
         # Extract action type values (enum -> string)
         first_action_type = None
         last_action_type = None
         if episode.steps and episode.steps[0].action:
             t = episode.steps[0].action.type
-            first_action_type = t.value if hasattr(t, 'value') else t
+            first_action_type = t.value if hasattr(t, "value") else t
         if episode.steps and episode.steps[-1].action:
             t = episode.steps[-1].action.type
-            last_action_type = t.value if hasattr(t, 'value') else t
-        summary_rows.append({
-            "episode_id": episode.episode_id,
-            "instruction": episode.instruction,
-            "task_id": getattr(episode, "task_id", None),
-            "step_count": len(episode.steps),
-            "duration": duration,
-            "success": getattr(episode, "success", None),
-            "first_action_type": first_action_type,
-            "last_action_type": last_action_type,
-            "metadata": (
-                json.dumps(episode.metadata)
-                if hasattr(episode, "metadata") and episode.metadata
-                else None
-            ),
-        })
+            last_action_type = t.value if hasattr(t, "value") else t
+        summary_rows.append(
+            {
+                "episode_id": episode.episode_id,
+                "instruction": episode.instruction,
+                "task_id": getattr(episode, "task_id", None),
+                "step_count": len(episode.steps),
+                "duration": duration,
+                "success": getattr(episode, "success", None),
+                "first_action_type": first_action_type,
+                "last_action_type": last_action_type,
+                "metadata": (
+                    json.dumps(episode.metadata)
+                    if hasattr(episode, "metadata") and episode.metadata
+                    else None
+                ),
+            }
+        )
     summary_table = pa.Table.from_pylist(summary_rows)
     summary_path = str(output_path).replace(".parquet", "_summary.parquet")
@@ -255,7 +266,8 @@ def from_parquet(parquet_path: str) -> list[Episode]:
         episode = Episode(
             episode_id=str(episode_id),
-            instruction=group.iloc[0].get("instruction") or group.iloc[0].get("goal", ""),
+            instruction=group.iloc[0].get("instruction")
+            or group.iloc[0].get("goal", ""),
             steps=steps,
             task_id=group.iloc[0].get("task_id"),
             metadata=metadata,

openadapt_ml/grounding/detector.py CHANGED Viewed

@@ -20,7 +20,7 @@ from openadapt_ml.config import settings
 from openadapt_ml.grounding.base import GroundingModule, RegionCandidate
 if TYPE_CHECKING:
-    from PIL import Image, ImageDraw, ImageFont
+    from PIL import Image
 class GeminiGrounder(GroundingModule):
@@ -104,7 +104,7 @@ class GeminiGrounder(GroundingModule):
         # Try to parse JSON from the response
         # Look for JSON array or object in the response
-        json_match = re.search(r'\[[\s\S]*\]|\{[\s\S]*\}', response_text)
+        json_match = re.search(r"\[[\s\S]*\]|\{[\s\S]*\}", response_text)
         if not json_match:
             return candidates
@@ -340,11 +340,11 @@ Example output format:
         response_text = response.text
         # Try to extract JSON array from response
-        json_match = re.search(r'\[[\s\S]*\]', response_text)
+        json_match = re.search(r"\[[\s\S]*\]", response_text)
         if not json_match:
             # Maybe it's just a plain array
-            if response_text.strip().startswith('['):
-                json_match = re.match(r'.*', response_text)
+            if response_text.strip().startswith("["):
+                json_match = re.match(r".*", response_text)
             else:
                 return []
@@ -369,13 +369,18 @@ Example output format:
                         max(0, min(1, y2 / screenshot.height)),
                     ]
-                normalized_elements.append({
-                    "id": elem.get("id", len(normalized_elements) + 1),
-                    "label": elem.get("label", f"Element {elem.get('id', len(normalized_elements) + 1)}"),
-                    "bbox": norm_bbox,
-                    "type": elem.get("type", "other"),
-                    "text": elem.get("text", ""),
-                })
+                normalized_elements.append(
+                    {
+                        "id": elem.get("id", len(normalized_elements) + 1),
+                        "label": elem.get(
+                            "label",
+                            f"Element {elem.get('id', len(normalized_elements) + 1)}",
+                        ),
+                        "bbox": norm_bbox,
+                        "type": elem.get("type", "other"),
+                        "text": elem.get("text", ""),
+                    }
+                )
         return normalized_elements
@@ -549,8 +554,7 @@ class DetectorGrounder(GroundingModule):
             self._backend = GeminiGrounder(**kwargs)
         elif backend == "omniparser":
             raise NotImplementedError(
-                "OmniParser backend not yet implemented. "
-                "Use backend='gemini' for now."
+                "OmniParser backend not yet implemented. Use backend='gemini' for now."
             )
         else:
             raise ValueError(f"Unknown backend: {backend}")

openadapt_ml/ingest/__init__.py CHANGED Viewed

@@ -27,16 +27,18 @@ __all__ = [
 # Conditionally export capture functions if openadapt-capture is installed
 try:
-    from openadapt_ml.ingest.capture import (
+    from openadapt_ml.ingest.capture import (  # noqa: F401
         capture_to_episode,
         capture_to_session,
         load_captures_as_sessions,
     )
-    __all__.extend([
-        "capture_to_episode",
-        "capture_to_session",
-        "load_captures_as_sessions",
-    ])
+    __all__.extend(
+        [
+            "capture_to_episode",
+            "capture_to_session",
+            "load_captures_as_sessions",
+        ]
+    )
 except ImportError:
     pass

openadapt_ml/ingest/capture.py CHANGED Viewed

@@ -6,7 +6,6 @@ and convert them to the Episode/Step format used by openadapt-ml for training.
 from __future__ import annotations
-import uuid
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -101,7 +100,7 @@ def capture_to_episode(
     """
     try:
         from openadapt_capture import Capture
-        from openadapt_capture.events import (
+        from openadapt_capture.events import (  # noqa: F401
             EventType,
             KeyTypeEvent,
             MouseClickEvent,
@@ -135,7 +134,9 @@ def capture_to_episode(
             dir_name = capture_path.name
             if dir_name and dir_name != "capture":
                 # Convert kebab-case/snake_case to readable text
-                instruction = dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
+                instruction = (
+                    dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
+                )
             else:
                 instruction = "Complete the recorded workflow"
@@ -155,9 +156,7 @@ def capture_to_episode(
         screenshot_path = _save_screenshot(screenshot, output_dir, episode_id, idx)
         # Normalize coordinates
-        norm_coords = _normalize_coords(
-            action.x, action.y, screen_width, screen_height
-        )
+        norm_coords = _normalize_coords(action.x, action.y, screen_width, screen_height)
         # Map event type to openadapt-ml ActionType
         event_type = action.type
@@ -174,15 +173,15 @@ def capture_to_episode(
         if isinstance(action.event, MouseDragEvent):
             end_x = action.event.x + action.event.dx
             end_y = action.event.y + action.event.dy
-            norm_end = _normalize_coords(
-                end_x, end_y, screen_width, screen_height
+            norm_end = _normalize_coords(end_x, end_y, screen_width, screen_height)
+            ml_action = ml_action.model_copy(
+                update={
+                    "normalized_end": norm_end,
+                    "raw": {
+                        "button": action.event.button,
+                    },
+                }
             )
-            ml_action = ml_action.model_copy(update={
-                "normalized_end": norm_end,
-                "raw": {
-                    "button": action.event.button,
-                },
-            })
         # Handle scroll events
         if isinstance(action.event, MouseScrollEvent):
@@ -197,13 +196,15 @@ def capture_to_episode(
             elif action.event.dx < 0:
                 scroll_direction = "left"
-            ml_action = ml_action.model_copy(update={
-                "scroll_direction": scroll_direction,
-                "raw": {
-                    "dx": action.event.dx,
-                    "dy": action.event.dy,
-                },
-            })
+            ml_action = ml_action.model_copy(
+                update={
+                    "scroll_direction": scroll_direction,
+                    "raw": {
+                        "dx": action.event.dx,
+                        "dy": action.event.dy,
+                    },
+                }
+            )
         # Handle keyboard events - include key names for special keys
         if action.keys:
@@ -227,7 +228,9 @@ def capture_to_episode(
         last_step = steps[-1]
         done_step = Step(
             step_index=len(steps),
-            observation=Observation(screenshot_path=last_step.observation.screenshot_path),
+            observation=Observation(
+                screenshot_path=last_step.observation.screenshot_path
+            ),
             action=Action(type=ActionType.DONE),
             reasoning="Workflow complete.",
             timestamp=(last_step.timestamp or 0) + 0.1,

openadapt_ml/ingest/loader.py CHANGED Viewed

@@ -8,9 +8,8 @@ from __future__ import annotations
 import json
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Union
-from pydantic import ValidationError
 from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
@@ -143,7 +142,8 @@ def _dict_to_episode(data: Dict[str, Any], validate: bool = True) -> Episode:
         # Parse observation
         obs_data = step_data.get("observation", {})
         observation = Observation(
-            screenshot_path=obs_data.get("screenshot_path") or obs_data.get("image_path"),
+            screenshot_path=obs_data.get("screenshot_path")
+            or obs_data.get("image_path"),
             raw=obs_data.get("raw") or obs_data.get("meta"),
             a11y_tree=obs_data.get("a11y_tree") or obs_data.get("accessibility_tree"),
             dom=obs_data.get("dom") or obs_data.get("dom_html"),
@@ -169,7 +169,10 @@ def _dict_to_episode(data: Dict[str, Any], validate: bool = True) -> Episode:
         normalized_end = None
         if action_data.get("normalized_end"):
             normalized_end = tuple(action_data["normalized_end"])
-        elif action_data.get("end_x") is not None and action_data.get("end_y") is not None:
+        elif (
+            action_data.get("end_x") is not None
+            and action_data.get("end_y") is not None
+        ):
             normalized_end = (action_data["end_x"], action_data["end_y"])
         action = Action(

openadapt-ml 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

openadapt-ml 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl