PyPI - openadapt-ml - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

openadapt-ml 0.1.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

openadapt_ml/baselines/__init__.py +121 -0
openadapt_ml/baselines/adapter.py +185 -0
openadapt_ml/baselines/cli.py +314 -0
openadapt_ml/baselines/config.py +448 -0
openadapt_ml/baselines/parser.py +922 -0
openadapt_ml/baselines/prompts.py +787 -0
openadapt_ml/benchmarks/__init__.py +13 -107
openadapt_ml/benchmarks/agent.py +297 -374
openadapt_ml/benchmarks/azure.py +62 -24
openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
openadapt_ml/benchmarks/cli.py +1874 -751
openadapt_ml/benchmarks/trace_export.py +631 -0
openadapt_ml/benchmarks/viewer.py +1236 -0
openadapt_ml/benchmarks/vm_monitor.py +1111 -0
openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
openadapt_ml/cloud/azure_inference.py +3 -5
openadapt_ml/cloud/lambda_labs.py +722 -307
openadapt_ml/cloud/local.py +3194 -89
openadapt_ml/cloud/ssh_tunnel.py +595 -0
openadapt_ml/datasets/next_action.py +125 -96
openadapt_ml/evals/grounding.py +32 -9
openadapt_ml/evals/plot_eval_metrics.py +15 -13
openadapt_ml/evals/trajectory_matching.py +120 -57
openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
openadapt_ml/experiments/representation_shootout/config.py +390 -0
openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
openadapt_ml/experiments/representation_shootout/runner.py +687 -0
openadapt_ml/experiments/waa_demo/__init__.py +10 -0
openadapt_ml/experiments/waa_demo/demos.py +357 -0
openadapt_ml/experiments/waa_demo/runner.py +732 -0
openadapt_ml/experiments/waa_demo/tasks.py +151 -0
openadapt_ml/export/__init__.py +9 -0
openadapt_ml/export/__main__.py +6 -0
openadapt_ml/export/cli.py +89 -0
openadapt_ml/export/parquet.py +277 -0
openadapt_ml/grounding/detector.py +18 -14
openadapt_ml/ingest/__init__.py +11 -10
openadapt_ml/ingest/capture.py +97 -86
openadapt_ml/ingest/loader.py +120 -69
openadapt_ml/ingest/synthetic.py +344 -193
openadapt_ml/models/api_adapter.py +14 -4
openadapt_ml/models/base_adapter.py +10 -2
openadapt_ml/models/providers/__init__.py +288 -0
openadapt_ml/models/providers/anthropic.py +266 -0
openadapt_ml/models/providers/base.py +299 -0
openadapt_ml/models/providers/google.py +376 -0
openadapt_ml/models/providers/openai.py +342 -0
openadapt_ml/models/qwen_vl.py +46 -19
openadapt_ml/perception/__init__.py +35 -0
openadapt_ml/perception/integration.py +399 -0
openadapt_ml/retrieval/README.md +226 -0
openadapt_ml/retrieval/USAGE.md +391 -0
openadapt_ml/retrieval/__init__.py +91 -0
openadapt_ml/retrieval/demo_retriever.py +843 -0
openadapt_ml/retrieval/embeddings.py +630 -0
openadapt_ml/retrieval/index.py +194 -0
openadapt_ml/retrieval/retriever.py +162 -0
openadapt_ml/runtime/__init__.py +50 -0
openadapt_ml/runtime/policy.py +27 -14
openadapt_ml/runtime/safety_gate.py +471 -0
openadapt_ml/schema/__init__.py +113 -0
openadapt_ml/schema/converters.py +588 -0
openadapt_ml/schema/episode.py +470 -0
openadapt_ml/scripts/capture_screenshots.py +530 -0
openadapt_ml/scripts/compare.py +102 -61
openadapt_ml/scripts/demo_policy.py +4 -1
openadapt_ml/scripts/eval_policy.py +19 -14
openadapt_ml/scripts/make_gif.py +1 -1
openadapt_ml/scripts/prepare_synthetic.py +16 -17
openadapt_ml/scripts/train.py +98 -75
openadapt_ml/segmentation/README.md +920 -0
openadapt_ml/segmentation/__init__.py +97 -0
openadapt_ml/segmentation/adapters/__init__.py +5 -0
openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
openadapt_ml/segmentation/annotator.py +610 -0
openadapt_ml/segmentation/cache.py +290 -0
openadapt_ml/segmentation/cli.py +674 -0
openadapt_ml/segmentation/deduplicator.py +656 -0
openadapt_ml/segmentation/frame_describer.py +788 -0
openadapt_ml/segmentation/pipeline.py +340 -0
openadapt_ml/segmentation/schemas.py +622 -0
openadapt_ml/segmentation/segment_extractor.py +634 -0
openadapt_ml/training/azure_ops_viewer.py +1097 -0
openadapt_ml/training/benchmark_viewer.py +3255 -19
openadapt_ml/training/shared_ui.py +7 -7
openadapt_ml/training/stub_provider.py +57 -35
openadapt_ml/training/trainer.py +255 -441
openadapt_ml/training/trl_trainer.py +403 -0
openadapt_ml/training/viewer.py +323 -108
openadapt_ml/training/viewer_components.py +180 -0
{openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
openadapt_ml-0.2.1.dist-info/RECORD +116 -0
openadapt_ml/benchmarks/base.py +0 -366
openadapt_ml/benchmarks/data_collection.py +0 -432
openadapt_ml/benchmarks/runner.py +0 -381
openadapt_ml/benchmarks/waa.py +0 -704
openadapt_ml/schemas/__init__.py +0 -53
openadapt_ml/schemas/sessions.py +0 -122
openadapt_ml/schemas/validation.py +0 -252
openadapt_ml-0.1.0.dist-info/RECORD +0 -55
{openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
{openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0

openadapt_ml/ingest/capture.py CHANGED Viewed

@@ -6,25 +6,24 @@ and convert them to the Episode/Step format used by openadapt-ml for training.
 from __future__ import annotations
-import uuid
 from pathlib import Path
 from typing import TYPE_CHECKING
-from openadapt_ml.schemas.sessions import Action, Episode, Observation, Session, Step
+from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
 if TYPE_CHECKING:
     from PIL import Image
-# Event type mapping from openadapt-capture to openadapt-ml
+# Event type mapping from openadapt-capture to openadapt-ml ActionType
 EVENT_TYPE_MAP = {
-    "mouse.singleclick": "click",
-    "mouse.click": "click",
-    "mouse.doubleclick": "double_click",
-    "mouse.drag": "drag",
-    "mouse.scroll": "scroll",
-    "key.type": "type",
-    "key.down": "key_press",
-    "key.up": "key_press",
+    "mouse.singleclick": ActionType.CLICK,
+    "mouse.click": ActionType.CLICK,
+    "mouse.doubleclick": ActionType.DOUBLE_CLICK,
+    "mouse.drag": ActionType.DRAG,
+    "mouse.scroll": ActionType.SCROLL,
+    "key.type": ActionType.TYPE,
+    "key.down": ActionType.KEY,
+    "key.up": ActionType.KEY,
 }
@@ -33,7 +32,7 @@ def _normalize_coords(
     y: float | None,
     screen_width: int,
     screen_height: int,
-) -> tuple[float | None, float | None]:
+) -> tuple[float, float] | None:
     """Normalize pixel coordinates to [0, 1] range.
     Args:
@@ -43,11 +42,11 @@ def _normalize_coords(
         screen_height: Screen height in pixels.
     Returns:
-        Tuple of (normalized_x, normalized_y).
+        Tuple of (normalized_x, normalized_y) or None if coords are None.
     """
     if x is None or y is None:
-        return None, None
-    return x / screen_width, y / screen_height
+        return None
+    return (x / screen_width, y / screen_height)
 def _save_screenshot(
@@ -77,7 +76,7 @@ def _save_screenshot(
 def capture_to_episode(
     capture_path: str | Path,
     output_dir: str | Path | None = None,
-    goal: str | None = None,
+    instruction: str | None = None,
     episode_id: str | None = None,
     include_moves: bool = False,
 ) -> Episode:
@@ -87,8 +86,8 @@ def capture_to_episode(
         capture_path: Path to the capture directory.
         output_dir: Directory to save extracted screenshots. If None, uses
                     capture_path/screenshots.
-        goal: Task description/goal for the episode. If None, uses capture's
-              task_description or a generic message.
+        instruction: Task description/instruction for the episode. If None, uses
+                     capture's task_description or a generic message.
         episode_id: Identifier for the episode. If None, generates a UUID.
         include_moves: Whether to include mouse move events.
@@ -101,7 +100,7 @@ def capture_to_episode(
     """
     try:
         from openadapt_capture import Capture
-        from openadapt_capture.events import (
+        from openadapt_capture.events import (  # noqa: F401
             EventType,
             KeyTypeEvent,
             MouseClickEvent,
@@ -126,18 +125,20 @@ def capture_to_episode(
     if episode_id is None:
         episode_id = f"capture_{capture.id}"
-    # Get goal from capture or derive from context
-    if goal is None:
+    # Get instruction from capture or derive from context
+    if instruction is None:
         if capture.task_description:
-            goal = capture.task_description
+            instruction = capture.task_description
         else:
-            # Try to derive goal from directory name (e.g., "turn-off-nightshift" -> "Turn off nightshift")
+            # Try to derive instruction from directory name (e.g., "turn-off-nightshift" -> "Turn off nightshift")
             dir_name = capture_path.name
             if dir_name and dir_name != "capture":
                 # Convert kebab-case/snake_case to readable text
-                goal = dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
+                instruction = (
+                    dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
+                )
             else:
-                goal = "Complete the recorded workflow"
+                instruction = "Complete the recorded workflow"
     # Get screen dimensions for coordinate normalization
     screen_width, screen_height = capture.screen_size
@@ -152,22 +153,19 @@ def capture_to_episode(
             continue
         # Save screenshot
-        image_path = _save_screenshot(screenshot, output_dir, episode_id, idx)
+        screenshot_path = _save_screenshot(screenshot, output_dir, episode_id, idx)
         # Normalize coordinates
-        norm_x, norm_y = _normalize_coords(
-            action.x, action.y, screen_width, screen_height
-        )
+        norm_coords = _normalize_coords(action.x, action.y, screen_width, screen_height)
-        # Map event type to openadapt-ml action type
+        # Map event type to openadapt-ml ActionType
         event_type = action.type
-        action_type = EVENT_TYPE_MAP.get(event_type, "click")
+        action_type = EVENT_TYPE_MAP.get(event_type, ActionType.CLICK)
         # Build Action object
         ml_action = Action(
             type=action_type,
-            x=norm_x,
-            y=norm_y,
+            normalized_coordinates=norm_coords,
             text=action.text,
         )
@@ -175,34 +173,52 @@ def capture_to_episode(
         if isinstance(action.event, MouseDragEvent):
             end_x = action.event.x + action.event.dx
             end_y = action.event.y + action.event.dy
-            norm_end_x, norm_end_y = _normalize_coords(
-                end_x, end_y, screen_width, screen_height
+            norm_end = _normalize_coords(end_x, end_y, screen_width, screen_height)
+            ml_action = ml_action.model_copy(
+                update={
+                    "normalized_end": norm_end,
+                    "raw": {
+                        "button": action.event.button,
+                    },
+                }
             )
-            ml_action.raw = {
-                "end_x": norm_end_x,
-                "end_y": norm_end_y,
-                "button": action.event.button,
-            }
         # Handle scroll events
         if isinstance(action.event, MouseScrollEvent):
-            ml_action.raw = {
-                "dx": action.event.dx,
-                "dy": action.event.dy,
-            }
+            # Determine scroll direction from dx/dy
+            scroll_direction = None
+            if action.event.dy > 0:
+                scroll_direction = "down"
+            elif action.event.dy < 0:
+                scroll_direction = "up"
+            elif action.event.dx > 0:
+                scroll_direction = "right"
+            elif action.event.dx < 0:
+                scroll_direction = "left"
+            ml_action = ml_action.model_copy(
+                update={
+                    "scroll_direction": scroll_direction,
+                    "raw": {
+                        "dx": action.event.dx,
+                        "dy": action.event.dy,
+                    },
+                }
+            )
         # Handle keyboard events - include key names for special keys
         if action.keys:
-            if ml_action.raw is None:
-                ml_action.raw = {}
-            ml_action.raw["keys"] = action.keys
+            raw = ml_action.raw or {}
+            raw["keys"] = action.keys
+            ml_action = ml_action.model_copy(update={"raw": raw})
         # Create Step
         step = Step(
-            t=action.timestamp - start_time,
-            observation=Observation(image_path=image_path),
+            step_index=idx,
+            observation=Observation(screenshot_path=screenshot_path),
             action=ml_action,
-            thought=None,  # Real recordings don't have thoughts
+            reasoning=None,  # Real recordings don't have reasoning
+            timestamp=action.timestamp - start_time,
         )
         steps.append(step)
@@ -211,69 +227,64 @@ def capture_to_episode(
         # Use the last screenshot for the done action
         last_step = steps[-1]
         done_step = Step(
-            t=last_step.t + 0.1,
-            observation=Observation(image_path=last_step.observation.image_path),
-            action=Action(type="done"),
-            thought="Workflow complete.",
+            step_index=len(steps),
+            observation=Observation(
+                screenshot_path=last_step.observation.screenshot_path
+            ),
+            action=Action(type=ActionType.DONE),
+            reasoning="Workflow complete.",
+            timestamp=(last_step.timestamp or 0) + 0.1,
         )
         steps.append(done_step)
     capture.close()
     return Episode(
-        id=episode_id,
-        goal=goal,
+        episode_id=episode_id,
+        instruction=instruction,
         steps=steps,
-        summary=f"Real recording with {len(steps)} steps",
         success=True,
-        workflow_id=capture.id,
+        metadata={
+            "summary": f"Real recording with {len(steps)} steps",
+            "workflow_id": capture.id,
+        },
     )
-def capture_to_session(
+def capture_to_episodes(
     capture_path: str | Path,
     output_dir: str | Path | None = None,
-    goal: str | None = None,
-    session_id: str | None = None,
+    instruction: str | None = None,
     include_moves: bool = False,
-) -> Session:
-    """Convert an openadapt-capture recording to a Session.
+) -> list[Episode]:
+    """Convert an openadapt-capture recording to a list with one Episode.
+    This is a convenience function that returns episodes as a list for consistency
+    with the new schema (which uses list[Episode] instead of Session).
     Args:
         capture_path: Path to the capture directory.
         output_dir: Directory to save extracted screenshots.
-        goal: Task description/goal for the episode.
-        session_id: Identifier for the session. If None, generates a UUID.
+        instruction: Task description/instruction for the episode.
         include_moves: Whether to include mouse move events.
     Returns:
-        Session containing a single Episode.
+        List containing a single Episode.
     """
     episode = capture_to_episode(
         capture_path=capture_path,
         output_dir=output_dir,
-        goal=goal,
+        instruction=instruction,
         include_moves=include_moves,
     )
-    if session_id is None:
-        session_id = f"session_{uuid.uuid4().hex[:8]}"
-    return Session(
-        id=session_id,
-        episodes=[episode],
-        meta={
-            "source": "openadapt-capture",
-            "capture_path": str(capture_path),
-        },
-    )
+    return [episode]
-def load_captures_as_sessions(
+def load_captures_as_episodes(
     captures_dir: str | Path,
     output_dir: str | Path | None = None,
     include_moves: bool = False,
-) -> list[Session]:
+) -> list[Episode]:
     """Load multiple captures from a directory.
     Scans for subdirectories containing capture.db files.
@@ -284,10 +295,10 @@ def load_captures_as_sessions(
         include_moves: Whether to include mouse move events.
     Returns:
-        List of Sessions, one per capture.
+        List of Episodes, one per capture.
     """
     captures_dir = Path(captures_dir)
-    sessions = []
+    episodes = []
     # Find all capture.db files
     for db_path in captures_dir.glob("**/capture.db"):
@@ -300,13 +311,13 @@ def load_captures_as_sessions(
             capture_output = None
         try:
-            session = capture_to_session(
+            episode = capture_to_episode(
                 capture_path=capture_path,
                 output_dir=capture_output,
                 include_moves=include_moves,
             )
-            sessions.append(session)
+            episodes.append(episode)
         except Exception as e:
             print(f"Warning: Failed to load {capture_path}: {e}")
-    return sessions
+    return episodes

openadapt_ml/ingest/loader.py CHANGED Viewed

@@ -8,10 +8,10 @@ from __future__ import annotations
 import json
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Union
-from openadapt_ml.schemas.sessions import Action, Episode, Observation, Step
-from openadapt_ml.schemas.validation import validate_episodes, summarize_episodes
+from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
 def load_episodes(
@@ -52,7 +52,7 @@ def load_episodes(
     if path.is_file():
         # Single JSON file
-        episodes = _load_episodes_from_file(path)
+        episodes = _load_episodes_from_file(path, validate=validate)
     elif path.is_dir():
         # Directory of JSON files
         json_files = sorted(path.glob("*.json"))
@@ -60,15 +60,15 @@ def load_episodes(
             raise ValueError(f"No JSON files found in {path}")
         for json_file in json_files:
-            file_episodes = _load_episodes_from_file(json_file)
+            file_episodes = _load_episodes_from_file(json_file, validate=validate)
             episodes.extend(file_episodes)
     else:
         raise ValueError(f"Path must be a file or directory: {path}")
-    if validate:
-        warnings = validate_episodes(episodes, check_images=check_images)
+    if check_images:
+        warnings = _check_episode_images(episodes)
         if warnings:
-            print(f"Validation warnings ({len(warnings)}):")
+            print(f"Image warnings ({len(warnings)}):")
             for w in warnings[:10]:  # Show first 10
                 print(f"  - {w}")
             if len(warnings) > 10:
@@ -77,7 +77,21 @@ def load_episodes(
     return episodes
-def _load_episodes_from_file(path: Path) -> List[Episode]:
+def _check_episode_images(episodes: List[Episode]) -> List[str]:
+    """Check that all referenced images exist on disk."""
+    warnings = []
+    for ep in episodes:
+        for step in ep.steps:
+            if step.observation.screenshot_path:
+                if not Path(step.observation.screenshot_path).exists():
+                    warnings.append(
+                        f"Episode {ep.episode_id}, step {step.step_index}: "
+                        f"Image not found: {step.observation.screenshot_path}"
+                    )
+    return warnings
+def _load_episodes_from_file(path: Path, validate: bool = True) -> List[Episode]:
     """Load episodes from a single JSON file."""
     with open(path, "r") as f:
         data = json.load(f)
@@ -85,75 +99,123 @@ def _load_episodes_from_file(path: Path) -> List[Episode]:
     # Handle different JSON structures
     if isinstance(data, list):
         # List of episodes
-        return [_dict_to_episode(ep) for ep in data]
+        return [_dict_to_episode(ep, validate=validate) for ep in data]
     elif isinstance(data, dict):
         # Single episode or wrapped format
         if "episodes" in data:
-            return [_dict_to_episode(ep) for ep in data["episodes"]]
-        elif "id" in data and "goal" in data:
-            # Single episode
-            return [_dict_to_episode(data)]
+            return [_dict_to_episode(ep, validate=validate) for ep in data["episodes"]]
+        elif "episode_id" in data or "id" in data:
+            # Single episode (support both old and new field names)
+            return [_dict_to_episode(data, validate=validate)]
         else:
             raise ValueError(f"Unrecognized JSON format in {path}")
     else:
         raise ValueError(f"Expected list or dict in {path}, got {type(data)}")
-def _dict_to_episode(data: Dict[str, Any]) -> Episode:
+def _parse_action_type(type_str: str) -> ActionType:
+    """Parse action type string to ActionType enum."""
+    # Handle common mappings from old format
+    type_map = {
+        "unknown": ActionType.CLICK,
+        "double_click": ActionType.DOUBLE_CLICK,
+        "right_click": ActionType.RIGHT_CLICK,
+        "key_press": ActionType.KEY,
+    }
+    type_lower = type_str.lower()
+    if type_lower in type_map:
+        return type_map[type_lower]
+    # Try direct enum lookup
+    try:
+        return ActionType(type_lower)
+    except ValueError:
+        # Default to CLICK for unknown types
+        return ActionType.CLICK
+def _dict_to_episode(data: Dict[str, Any], validate: bool = True) -> Episode:
     """Convert a dictionary to an Episode object."""
     steps = []
-    for step_data in data.get("steps", []):
+    for step_idx, step_data in enumerate(data.get("steps", [])):
         # Parse observation
         obs_data = step_data.get("observation", {})
         observation = Observation(
-            image_path=obs_data.get("image_path"),
-            meta=obs_data.get("meta"),
-            accessibility_tree=obs_data.get("accessibility_tree"),
-            dom_html=obs_data.get("dom_html"),
-            url=obs_data.get("url"),
+            screenshot_path=obs_data.get("screenshot_path")
+            or obs_data.get("image_path"),
+            raw=obs_data.get("raw") or obs_data.get("meta"),
+            a11y_tree=obs_data.get("a11y_tree") or obs_data.get("accessibility_tree"),
+            dom=obs_data.get("dom") or obs_data.get("dom_html"),
             window_title=obs_data.get("window_title"),
-            app_name=obs_data.get("app_name"),
             focused_element=obs_data.get("focused_element"),
         )
         # Parse action
         action_data = step_data.get("action", {})
+        # Handle action type (string -> enum)
+        action_type_raw = action_data.get("type", "click")
+        action_type = _parse_action_type(action_type_raw)
+        # Handle coordinates: convert x,y to normalized_coordinates tuple
+        normalized_coords = None
+        if action_data.get("normalized_coordinates"):
+            normalized_coords = tuple(action_data["normalized_coordinates"])
+        elif action_data.get("x") is not None and action_data.get("y") is not None:
+            normalized_coords = (action_data["x"], action_data["y"])
+        # Handle end coordinates for drag actions
+        normalized_end = None
+        if action_data.get("normalized_end"):
+            normalized_end = tuple(action_data["normalized_end"])
+        elif (
+            action_data.get("end_x") is not None
+            and action_data.get("end_y") is not None
+        ):
+            normalized_end = (action_data["end_x"], action_data["end_y"])
         action = Action(
-            type=action_data.get("type", "unknown"),
-            x=action_data.get("x"),
-            y=action_data.get("y"),
+            type=action_type,
+            normalized_coordinates=normalized_coords,
+            normalized_end=normalized_end,
             text=action_data.get("text"),
             raw=action_data.get("raw"),
-            bbox=tuple(action_data["bbox"]) if action_data.get("bbox") else None,
-            element_index=action_data.get("element_index"),
-            target_node_id=action_data.get("target_node_id"),
-            target_role=action_data.get("target_role"),
-            target_name=action_data.get("target_name"),
             key=action_data.get("key"),
             modifiers=action_data.get("modifiers"),
             scroll_direction=action_data.get("scroll_direction"),
             scroll_amount=action_data.get("scroll_amount"),
-            end_x=action_data.get("end_x"),
-            end_y=action_data.get("end_y"),
-            answer=action_data.get("answer"),
         )
+        # Handle step index and timestamp
+        step_index = step_data.get("step_index", step_idx)
+        timestamp = step_data.get("timestamp") or step_data.get("t")
         step = Step(
-            t=step_data.get("t", 0.0),
+            step_index=step_index,
             observation=observation,
             action=action,
-            thought=step_data.get("thought"),
+            reasoning=step_data.get("reasoning") or step_data.get("thought"),
+            timestamp=timestamp,
         )
         steps.append(step)
-    return Episode(
-        id=data.get("id", "unknown"),
-        goal=data.get("goal", ""),
-        steps=steps,
-        summary=data.get("summary"),
-        success=data.get("success"),
-        workflow_id=data.get("workflow_id"),
-    )
+    # Build episode with field mapping (old -> new)
+    episode_data = {
+        "episode_id": data.get("episode_id") or data.get("id", "unknown"),
+        "instruction": data.get("instruction") or data.get("goal", ""),
+        "steps": steps,
+        "success": data.get("success"),
+        "metadata": {
+            "summary": data.get("summary"),
+            "workflow_id": data.get("workflow_id"),
+        },
+    }
+    if validate:
+        return Episode.model_validate(episode_data)
+    else:
+        return Episode(**episode_data)
 def save_episodes(
@@ -178,9 +240,9 @@ def save_episodes(
     with open(path, "w") as f:
         if pretty:
-            json.dump(data, f, indent=2)
+            json.dump(data, f, indent=2, default=str)
         else:
-            json.dump(data, f)
+            json.dump(data, f, default=str)
 def _episode_to_dict(episode: Episode) -> Dict[str, Any]:
@@ -188,45 +250,34 @@ def _episode_to_dict(episode: Episode) -> Dict[str, Any]:
     steps = []
     for step in episode.steps:
         step_dict = {
-            "t": step.t,
+            "step_index": step.step_index,
+            "timestamp": step.timestamp,
             "observation": {
-                "image_path": step.observation.image_path,
-                "meta": step.observation.meta,
-                "accessibility_tree": step.observation.accessibility_tree,
-                "dom_html": step.observation.dom_html,
-                "url": step.observation.url,
+                "screenshot_path": step.observation.screenshot_path,
+                "raw": step.observation.raw,
+                "a11y_tree": step.observation.a11y_tree,
+                "dom": step.observation.dom,
                 "window_title": step.observation.window_title,
-                "app_name": step.observation.app_name,
-                "focused_element": step.observation.focused_element,
             },
             "action": {
-                "type": step.action.type,
-                "x": step.action.x,
-                "y": step.action.y,
+                "type": step.action.type.value,
+                "normalized_coordinates": step.action.normalized_coordinates,
+                "normalized_end": step.action.normalized_end,
                 "text": step.action.text,
                 "raw": step.action.raw,
-                "bbox": list(step.action.bbox) if step.action.bbox else None,
-                "element_index": step.action.element_index,
-                "target_node_id": step.action.target_node_id,
-                "target_role": step.action.target_role,
-                "target_name": step.action.target_name,
                 "key": step.action.key,
                 "modifiers": step.action.modifiers,
                 "scroll_direction": step.action.scroll_direction,
                 "scroll_amount": step.action.scroll_amount,
-                "end_x": step.action.end_x,
-                "end_y": step.action.end_y,
-                "answer": step.action.answer,
             },
-            "thought": step.thought,
+            "reasoning": step.reasoning,
         }
         steps.append(step_dict)
     return {
-        "id": episode.id,
-        "goal": episode.goal,
+        "episode_id": episode.episode_id,
+        "instruction": episode.instruction,
         "steps": steps,
-        "summary": episode.summary,
         "success": episode.success,
-        "workflow_id": episode.workflow_id,
+        "metadata": episode.metadata,
     }

openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

openadapt-ml 0.1.0py3-none-any.whl → 0.2.1py3-none-any.whl