PyPI - openadapt-ml - Versions diffs - 0.1.0__py3-none-any.whl - Mend

openadapt-ml 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

openadapt_ml/__init__.py +0 -0
openadapt_ml/benchmarks/__init__.py +125 -0
openadapt_ml/benchmarks/agent.py +825 -0
openadapt_ml/benchmarks/azure.py +761 -0
openadapt_ml/benchmarks/base.py +366 -0
openadapt_ml/benchmarks/cli.py +884 -0
openadapt_ml/benchmarks/data_collection.py +432 -0
openadapt_ml/benchmarks/runner.py +381 -0
openadapt_ml/benchmarks/waa.py +704 -0
openadapt_ml/cloud/__init__.py +5 -0
openadapt_ml/cloud/azure_inference.py +441 -0
openadapt_ml/cloud/lambda_labs.py +2445 -0
openadapt_ml/cloud/local.py +790 -0
openadapt_ml/config.py +56 -0
openadapt_ml/datasets/__init__.py +0 -0
openadapt_ml/datasets/next_action.py +507 -0
openadapt_ml/evals/__init__.py +23 -0
openadapt_ml/evals/grounding.py +241 -0
openadapt_ml/evals/plot_eval_metrics.py +174 -0
openadapt_ml/evals/trajectory_matching.py +486 -0
openadapt_ml/grounding/__init__.py +45 -0
openadapt_ml/grounding/base.py +236 -0
openadapt_ml/grounding/detector.py +570 -0
openadapt_ml/ingest/__init__.py +43 -0
openadapt_ml/ingest/capture.py +312 -0
openadapt_ml/ingest/loader.py +232 -0
openadapt_ml/ingest/synthetic.py +1102 -0
openadapt_ml/models/__init__.py +0 -0
openadapt_ml/models/api_adapter.py +171 -0
openadapt_ml/models/base_adapter.py +59 -0
openadapt_ml/models/dummy_adapter.py +42 -0
openadapt_ml/models/qwen_vl.py +426 -0
openadapt_ml/runtime/__init__.py +0 -0
openadapt_ml/runtime/policy.py +182 -0
openadapt_ml/schemas/__init__.py +53 -0
openadapt_ml/schemas/sessions.py +122 -0
openadapt_ml/schemas/validation.py +252 -0
openadapt_ml/scripts/__init__.py +0 -0
openadapt_ml/scripts/compare.py +1490 -0
openadapt_ml/scripts/demo_policy.py +62 -0
openadapt_ml/scripts/eval_policy.py +287 -0
openadapt_ml/scripts/make_gif.py +153 -0
openadapt_ml/scripts/prepare_synthetic.py +43 -0
openadapt_ml/scripts/run_qwen_login_benchmark.py +192 -0
openadapt_ml/scripts/train.py +174 -0
openadapt_ml/training/__init__.py +0 -0
openadapt_ml/training/benchmark_viewer.py +1538 -0
openadapt_ml/training/shared_ui.py +157 -0
openadapt_ml/training/stub_provider.py +276 -0
openadapt_ml/training/trainer.py +2446 -0
openadapt_ml/training/viewer.py +2970 -0
openadapt_ml-0.1.0.dist-info/METADATA +818 -0
openadapt_ml-0.1.0.dist-info/RECORD +55 -0
openadapt_ml-0.1.0.dist-info/WHEEL +4 -0
openadapt_ml-0.1.0.dist-info/licenses/LICENSE +21 -0

openadapt_ml/ingest/capture.py ADDED Viewed

@@ -0,0 +1,312 @@
+"""Adapter for converting openadapt-capture recordings to openadapt-ml Episode format.
+This module provides functions to ingest real GUI recordings from openadapt-capture
+and convert them to the Episode/Step format used by openadapt-ml for training.
+"""
+from __future__ import annotations
+import uuid
+from pathlib import Path
+from typing import TYPE_CHECKING
+from openadapt_ml.schemas.sessions import Action, Episode, Observation, Session, Step
+if TYPE_CHECKING:
+    from PIL import Image
+# Event type mapping from openadapt-capture to openadapt-ml
+EVENT_TYPE_MAP = {
+    "mouse.singleclick": "click",
+    "mouse.click": "click",
+    "mouse.doubleclick": "double_click",
+    "mouse.drag": "drag",
+    "mouse.scroll": "scroll",
+    "key.type": "type",
+    "key.down": "key_press",
+    "key.up": "key_press",
+}
+def _normalize_coords(
+    x: float | None,
+    y: float | None,
+    screen_width: int,
+    screen_height: int,
+) -> tuple[float | None, float | None]:
+    """Normalize pixel coordinates to [0, 1] range.
+    Args:
+        x: X coordinate in pixels.
+        y: Y coordinate in pixels.
+        screen_width: Screen width in pixels.
+        screen_height: Screen height in pixels.
+    Returns:
+        Tuple of (normalized_x, normalized_y).
+    """
+    if x is None or y is None:
+        return None, None
+    return x / screen_width, y / screen_height
+def _save_screenshot(
+    image: "Image",
+    output_dir: Path,
+    episode_id: str,
+    step_idx: int,
+) -> str:
+    """Save a screenshot and return its path.
+    Args:
+        image: PIL Image to save.
+        output_dir: Directory to save images to.
+        episode_id: Episode identifier.
+        step_idx: Step index.
+    Returns:
+        Path to saved image.
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    filename = f"{episode_id}_step_{step_idx}.png"
+    filepath = output_dir / filename
+    image.save(filepath)
+    return str(filepath)
+def capture_to_episode(
+    capture_path: str | Path,
+    output_dir: str | Path | None = None,
+    goal: str | None = None,
+    episode_id: str | None = None,
+    include_moves: bool = False,
+) -> Episode:
+    """Convert an openadapt-capture recording to an Episode.
+    Args:
+        capture_path: Path to the capture directory.
+        output_dir: Directory to save extracted screenshots. If None, uses
+                    capture_path/screenshots.
+        goal: Task description/goal for the episode. If None, uses capture's
+              task_description or a generic message.
+        episode_id: Identifier for the episode. If None, generates a UUID.
+        include_moves: Whether to include mouse move events.
+    Returns:
+        Episode containing Steps with Observations and Actions.
+    Raises:
+        ImportError: If openadapt-capture is not installed.
+        FileNotFoundError: If capture doesn't exist.
+    """
+    try:
+        from openadapt_capture import Capture
+        from openadapt_capture.events import (
+            EventType,
+            KeyTypeEvent,
+            MouseClickEvent,
+            MouseDoubleClickEvent,
+            MouseDragEvent,
+            MouseScrollEvent,
+        )
+    except ImportError as e:
+        raise ImportError(
+            "openadapt-capture is required. Install with: pip install openadapt-capture"
+        ) from e
+    capture_path = Path(capture_path)
+    if output_dir is None:
+        output_dir = capture_path / "screenshots"
+    output_dir = Path(output_dir)
+    # Load capture
+    capture = Capture.load(capture_path)
+    # Generate episode ID if not provided
+    if episode_id is None:
+        episode_id = f"capture_{capture.id}"
+    # Get goal from capture or derive from context
+    if goal is None:
+        if capture.task_description:
+            goal = capture.task_description
+        else:
+            # Try to derive goal from directory name (e.g., "turn-off-nightshift" -> "Turn off nightshift")
+            dir_name = capture_path.name
+            if dir_name and dir_name != "capture":
+                # Convert kebab-case/snake_case to readable text
+                goal = dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
+            else:
+                goal = "Complete the recorded workflow"
+    # Get screen dimensions for coordinate normalization
+    screen_width, screen_height = capture.screen_size
+    steps: list[Step] = []
+    start_time = capture.started_at
+    for idx, action in enumerate(capture.actions(include_moves=include_moves)):
+        # Get screenshot at action time
+        screenshot = action.screenshot
+        if screenshot is None:
+            continue
+        # Save screenshot
+        image_path = _save_screenshot(screenshot, output_dir, episode_id, idx)
+        # Normalize coordinates
+        norm_x, norm_y = _normalize_coords(
+            action.x, action.y, screen_width, screen_height
+        )
+        # Map event type to openadapt-ml action type
+        event_type = action.type
+        action_type = EVENT_TYPE_MAP.get(event_type, "click")
+        # Build Action object
+        ml_action = Action(
+            type=action_type,
+            x=norm_x,
+            y=norm_y,
+            text=action.text,
+        )
+        # Handle drag events - add end coordinates
+        if isinstance(action.event, MouseDragEvent):
+            end_x = action.event.x + action.event.dx
+            end_y = action.event.y + action.event.dy
+            norm_end_x, norm_end_y = _normalize_coords(
+                end_x, end_y, screen_width, screen_height
+            )
+            ml_action.raw = {
+                "end_x": norm_end_x,
+                "end_y": norm_end_y,
+                "button": action.event.button,
+            }
+        # Handle scroll events
+        if isinstance(action.event, MouseScrollEvent):
+            ml_action.raw = {
+                "dx": action.event.dx,
+                "dy": action.event.dy,
+            }
+        # Handle keyboard events - include key names for special keys
+        if action.keys:
+            if ml_action.raw is None:
+                ml_action.raw = {}
+            ml_action.raw["keys"] = action.keys
+        # Create Step
+        step = Step(
+            t=action.timestamp - start_time,
+            observation=Observation(image_path=image_path),
+            action=ml_action,
+            thought=None,  # Real recordings don't have thoughts
+        )
+        steps.append(step)
+    # Add terminal DONE action if there are steps
+    if steps:
+        # Use the last screenshot for the done action
+        last_step = steps[-1]
+        done_step = Step(
+            t=last_step.t + 0.1,
+            observation=Observation(image_path=last_step.observation.image_path),
+            action=Action(type="done"),
+            thought="Workflow complete.",
+        )
+        steps.append(done_step)
+    capture.close()
+    return Episode(
+        id=episode_id,
+        goal=goal,
+        steps=steps,
+        summary=f"Real recording with {len(steps)} steps",
+        success=True,
+        workflow_id=capture.id,
+    )
+def capture_to_session(
+    capture_path: str | Path,
+    output_dir: str | Path | None = None,
+    goal: str | None = None,
+    session_id: str | None = None,
+    include_moves: bool = False,
+) -> Session:
+    """Convert an openadapt-capture recording to a Session.
+    Args:
+        capture_path: Path to the capture directory.
+        output_dir: Directory to save extracted screenshots.
+        goal: Task description/goal for the episode.
+        session_id: Identifier for the session. If None, generates a UUID.
+        include_moves: Whether to include mouse move events.
+    Returns:
+        Session containing a single Episode.
+    """
+    episode = capture_to_episode(
+        capture_path=capture_path,
+        output_dir=output_dir,
+        goal=goal,
+        include_moves=include_moves,
+    )
+    if session_id is None:
+        session_id = f"session_{uuid.uuid4().hex[:8]}"
+    return Session(
+        id=session_id,
+        episodes=[episode],
+        meta={
+            "source": "openadapt-capture",
+            "capture_path": str(capture_path),
+        },
+    )
+def load_captures_as_sessions(
+    captures_dir: str | Path,
+    output_dir: str | Path | None = None,
+    include_moves: bool = False,
+) -> list[Session]:
+    """Load multiple captures from a directory.
+    Scans for subdirectories containing capture.db files.
+    Args:
+        captures_dir: Directory containing capture subdirectories.
+        output_dir: Base directory for screenshots. Each capture gets a subdirectory.
+        include_moves: Whether to include mouse move events.
+    Returns:
+        List of Sessions, one per capture.
+    """
+    captures_dir = Path(captures_dir)
+    sessions = []
+    # Find all capture.db files
+    for db_path in captures_dir.glob("**/capture.db"):
+        capture_path = db_path.parent
+        # Determine output directory for this capture
+        if output_dir is not None:
+            capture_output = Path(output_dir) / capture_path.name
+        else:
+            capture_output = None
+        try:
+            session = capture_to_session(
+                capture_path=capture_path,
+                output_dir=capture_output,
+                include_moves=include_moves,
+            )
+            sessions.append(session)
+        except Exception as e:
+            print(f"Warning: Failed to load {capture_path}: {e}")
+    return sessions

openadapt_ml/ingest/loader.py ADDED Viewed

@@ -0,0 +1,232 @@
+"""Episode loading utilities for openadapt-ml.
+Load Episodes from JSON files exported by external systems.
+This is the primary entry point for users who have their own data.
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+from openadapt_ml.schemas.sessions import Action, Episode, Observation, Step
+from openadapt_ml.schemas.validation import validate_episodes, summarize_episodes
+def load_episodes(
+    path: Union[str, Path],
+    validate: bool = True,
+    check_images: bool = False,
+) -> List[Episode]:
+    """Load Episodes from a directory or JSON file.
+    Supports two formats:
+    1. Single JSON file containing a list of episodes
+    2. Directory containing multiple JSON files (one episode per file, or batched)
+    Args:
+        path: Path to directory or JSON file containing episode data.
+        validate: If True, validate episodes against schema (default True).
+        check_images: If True, verify image files exist on disk (default False).
+    Returns:
+        List of Episode objects ready for training.
+    Raises:
+        FileNotFoundError: If path doesn't exist.
+        ValidationError: If validate=True and data fails validation.
+        ValueError: If JSON format is invalid.
+    Example:
+        >>> episodes = load_episodes("exported_data/")
+        >>> print(f"Loaded {len(episodes)} episodes")
+        >>> print(f"Total steps: {sum(len(e.steps) for e in episodes)}")
+    """
+    path = Path(path)
+    if not path.exists():
+        raise FileNotFoundError(f"Path not found: {path}")
+    episodes: List[Episode] = []
+    if path.is_file():
+        # Single JSON file
+        episodes = _load_episodes_from_file(path)
+    elif path.is_dir():
+        # Directory of JSON files
+        json_files = sorted(path.glob("*.json"))
+        if not json_files:
+            raise ValueError(f"No JSON files found in {path}")
+        for json_file in json_files:
+            file_episodes = _load_episodes_from_file(json_file)
+            episodes.extend(file_episodes)
+    else:
+        raise ValueError(f"Path must be a file or directory: {path}")
+    if validate:
+        warnings = validate_episodes(episodes, check_images=check_images)
+        if warnings:
+            print(f"Validation warnings ({len(warnings)}):")
+            for w in warnings[:10]:  # Show first 10
+                print(f"  - {w}")
+            if len(warnings) > 10:
+                print(f"  ... and {len(warnings) - 10} more")
+    return episodes
+def _load_episodes_from_file(path: Path) -> List[Episode]:
+    """Load episodes from a single JSON file."""
+    with open(path, "r") as f:
+        data = json.load(f)
+    # Handle different JSON structures
+    if isinstance(data, list):
+        # List of episodes
+        return [_dict_to_episode(ep) for ep in data]
+    elif isinstance(data, dict):
+        # Single episode or wrapped format
+        if "episodes" in data:
+            return [_dict_to_episode(ep) for ep in data["episodes"]]
+        elif "id" in data and "goal" in data:
+            # Single episode
+            return [_dict_to_episode(data)]
+        else:
+            raise ValueError(f"Unrecognized JSON format in {path}")
+    else:
+        raise ValueError(f"Expected list or dict in {path}, got {type(data)}")
+def _dict_to_episode(data: Dict[str, Any]) -> Episode:
+    """Convert a dictionary to an Episode object."""
+    steps = []
+    for step_data in data.get("steps", []):
+        # Parse observation
+        obs_data = step_data.get("observation", {})
+        observation = Observation(
+            image_path=obs_data.get("image_path"),
+            meta=obs_data.get("meta"),
+            accessibility_tree=obs_data.get("accessibility_tree"),
+            dom_html=obs_data.get("dom_html"),
+            url=obs_data.get("url"),
+            window_title=obs_data.get("window_title"),
+            app_name=obs_data.get("app_name"),
+            focused_element=obs_data.get("focused_element"),
+        )
+        # Parse action
+        action_data = step_data.get("action", {})
+        action = Action(
+            type=action_data.get("type", "unknown"),
+            x=action_data.get("x"),
+            y=action_data.get("y"),
+            text=action_data.get("text"),
+            raw=action_data.get("raw"),
+            bbox=tuple(action_data["bbox"]) if action_data.get("bbox") else None,
+            element_index=action_data.get("element_index"),
+            target_node_id=action_data.get("target_node_id"),
+            target_role=action_data.get("target_role"),
+            target_name=action_data.get("target_name"),
+            key=action_data.get("key"),
+            modifiers=action_data.get("modifiers"),
+            scroll_direction=action_data.get("scroll_direction"),
+            scroll_amount=action_data.get("scroll_amount"),
+            end_x=action_data.get("end_x"),
+            end_y=action_data.get("end_y"),
+            answer=action_data.get("answer"),
+        )
+        step = Step(
+            t=step_data.get("t", 0.0),
+            observation=observation,
+            action=action,
+            thought=step_data.get("thought"),
+        )
+        steps.append(step)
+    return Episode(
+        id=data.get("id", "unknown"),
+        goal=data.get("goal", ""),
+        steps=steps,
+        summary=data.get("summary"),
+        success=data.get("success"),
+        workflow_id=data.get("workflow_id"),
+    )
+def save_episodes(
+    episodes: List[Episode],
+    path: Union[str, Path],
+    pretty: bool = True,
+) -> None:
+    """Save Episodes to a JSON file.
+    Args:
+        episodes: List of Episode objects to save.
+        path: Output file path.
+        pretty: If True, format JSON with indentation.
+    Example:
+        >>> save_episodes(episodes, "output/episodes.json")
+    """
+    path = Path(path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    data = [_episode_to_dict(ep) for ep in episodes]
+    with open(path, "w") as f:
+        if pretty:
+            json.dump(data, f, indent=2)
+        else:
+            json.dump(data, f)
+def _episode_to_dict(episode: Episode) -> Dict[str, Any]:
+    """Convert an Episode object to a dictionary."""
+    steps = []
+    for step in episode.steps:
+        step_dict = {
+            "t": step.t,
+            "observation": {
+                "image_path": step.observation.image_path,
+                "meta": step.observation.meta,
+                "accessibility_tree": step.observation.accessibility_tree,
+                "dom_html": step.observation.dom_html,
+                "url": step.observation.url,
+                "window_title": step.observation.window_title,
+                "app_name": step.observation.app_name,
+                "focused_element": step.observation.focused_element,
+            },
+            "action": {
+                "type": step.action.type,
+                "x": step.action.x,
+                "y": step.action.y,
+                "text": step.action.text,
+                "raw": step.action.raw,
+                "bbox": list(step.action.bbox) if step.action.bbox else None,
+                "element_index": step.action.element_index,
+                "target_node_id": step.action.target_node_id,
+                "target_role": step.action.target_role,
+                "target_name": step.action.target_name,
+                "key": step.action.key,
+                "modifiers": step.action.modifiers,
+                "scroll_direction": step.action.scroll_direction,
+                "scroll_amount": step.action.scroll_amount,
+                "end_x": step.action.end_x,
+                "end_y": step.action.end_y,
+                "answer": step.action.answer,
+            },
+            "thought": step.thought,
+        }
+        steps.append(step_dict)
+    return {
+        "id": episode.id,
+        "goal": episode.goal,
+        "steps": steps,
+        "summary": episode.summary,
+        "success": episode.success,
+        "workflow_id": episode.workflow_id,
+    }