openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. openadapt_ml/benchmarks/__init__.py +8 -0
  2. openadapt_ml/benchmarks/agent.py +90 -11
  3. openadapt_ml/benchmarks/azure.py +35 -6
  4. openadapt_ml/benchmarks/cli.py +4449 -201
  5. openadapt_ml/benchmarks/live_tracker.py +180 -0
  6. openadapt_ml/benchmarks/runner.py +41 -4
  7. openadapt_ml/benchmarks/viewer.py +1219 -0
  8. openadapt_ml/benchmarks/vm_monitor.py +610 -0
  9. openadapt_ml/benchmarks/waa.py +61 -4
  10. openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
  11. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  12. openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
  13. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  14. openadapt_ml/benchmarks/waa_live.py +619 -0
  15. openadapt_ml/cloud/local.py +1555 -1
  16. openadapt_ml/cloud/ssh_tunnel.py +553 -0
  17. openadapt_ml/datasets/next_action.py +87 -68
  18. openadapt_ml/evals/grounding.py +26 -8
  19. openadapt_ml/evals/trajectory_matching.py +84 -36
  20. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  21. openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
  22. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  23. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  24. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  25. openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
  26. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  27. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  28. openadapt_ml/experiments/waa_demo/runner.py +717 -0
  29. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  30. openadapt_ml/export/__init__.py +9 -0
  31. openadapt_ml/export/__main__.py +6 -0
  32. openadapt_ml/export/cli.py +89 -0
  33. openadapt_ml/export/parquet.py +265 -0
  34. openadapt_ml/ingest/__init__.py +3 -4
  35. openadapt_ml/ingest/capture.py +89 -81
  36. openadapt_ml/ingest/loader.py +116 -68
  37. openadapt_ml/ingest/synthetic.py +221 -159
  38. openadapt_ml/retrieval/README.md +226 -0
  39. openadapt_ml/retrieval/USAGE.md +391 -0
  40. openadapt_ml/retrieval/__init__.py +91 -0
  41. openadapt_ml/retrieval/demo_retriever.py +817 -0
  42. openadapt_ml/retrieval/embeddings.py +629 -0
  43. openadapt_ml/retrieval/index.py +194 -0
  44. openadapt_ml/retrieval/retriever.py +160 -0
  45. openadapt_ml/runtime/policy.py +10 -10
  46. openadapt_ml/schema/__init__.py +104 -0
  47. openadapt_ml/schema/converters.py +541 -0
  48. openadapt_ml/schema/episode.py +457 -0
  49. openadapt_ml/scripts/compare.py +26 -16
  50. openadapt_ml/scripts/eval_policy.py +4 -5
  51. openadapt_ml/scripts/prepare_synthetic.py +14 -17
  52. openadapt_ml/scripts/train.py +81 -70
  53. openadapt_ml/training/benchmark_viewer.py +3225 -0
  54. openadapt_ml/training/trainer.py +120 -363
  55. openadapt_ml/training/trl_trainer.py +354 -0
  56. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
  57. openadapt_ml-0.2.0.dist-info/RECORD +86 -0
  58. openadapt_ml/schemas/__init__.py +0 -53
  59. openadapt_ml/schemas/sessions.py +0 -122
  60. openadapt_ml/schemas/validation.py +0 -252
  61. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  62. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
  63. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -10,21 +10,21 @@ import uuid
10
10
  from pathlib import Path
11
11
  from typing import TYPE_CHECKING
12
12
 
13
- from openadapt_ml.schemas.sessions import Action, Episode, Observation, Session, Step
13
+ from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from PIL import Image
17
17
 
18
- # Event type mapping from openadapt-capture to openadapt-ml
18
+ # Event type mapping from openadapt-capture to openadapt-ml ActionType
19
19
  EVENT_TYPE_MAP = {
20
- "mouse.singleclick": "click",
21
- "mouse.click": "click",
22
- "mouse.doubleclick": "double_click",
23
- "mouse.drag": "drag",
24
- "mouse.scroll": "scroll",
25
- "key.type": "type",
26
- "key.down": "key_press",
27
- "key.up": "key_press",
20
+ "mouse.singleclick": ActionType.CLICK,
21
+ "mouse.click": ActionType.CLICK,
22
+ "mouse.doubleclick": ActionType.DOUBLE_CLICK,
23
+ "mouse.drag": ActionType.DRAG,
24
+ "mouse.scroll": ActionType.SCROLL,
25
+ "key.type": ActionType.TYPE,
26
+ "key.down": ActionType.KEY,
27
+ "key.up": ActionType.KEY,
28
28
  }
29
29
 
30
30
 
@@ -33,7 +33,7 @@ def _normalize_coords(
33
33
  y: float | None,
34
34
  screen_width: int,
35
35
  screen_height: int,
36
- ) -> tuple[float | None, float | None]:
36
+ ) -> tuple[float, float] | None:
37
37
  """Normalize pixel coordinates to [0, 1] range.
38
38
 
39
39
  Args:
@@ -43,11 +43,11 @@ def _normalize_coords(
43
43
  screen_height: Screen height in pixels.
44
44
 
45
45
  Returns:
46
- Tuple of (normalized_x, normalized_y).
46
+ Tuple of (normalized_x, normalized_y) or None if coords are None.
47
47
  """
48
48
  if x is None or y is None:
49
- return None, None
50
- return x / screen_width, y / screen_height
49
+ return None
50
+ return (x / screen_width, y / screen_height)
51
51
 
52
52
 
53
53
  def _save_screenshot(
@@ -77,7 +77,7 @@ def _save_screenshot(
77
77
  def capture_to_episode(
78
78
  capture_path: str | Path,
79
79
  output_dir: str | Path | None = None,
80
- goal: str | None = None,
80
+ instruction: str | None = None,
81
81
  episode_id: str | None = None,
82
82
  include_moves: bool = False,
83
83
  ) -> Episode:
@@ -87,8 +87,8 @@ def capture_to_episode(
87
87
  capture_path: Path to the capture directory.
88
88
  output_dir: Directory to save extracted screenshots. If None, uses
89
89
  capture_path/screenshots.
90
- goal: Task description/goal for the episode. If None, uses capture's
91
- task_description or a generic message.
90
+ instruction: Task description/instruction for the episode. If None, uses
91
+ capture's task_description or a generic message.
92
92
  episode_id: Identifier for the episode. If None, generates a UUID.
93
93
  include_moves: Whether to include mouse move events.
94
94
 
@@ -126,18 +126,18 @@ def capture_to_episode(
126
126
  if episode_id is None:
127
127
  episode_id = f"capture_{capture.id}"
128
128
 
129
- # Get goal from capture or derive from context
130
- if goal is None:
129
+ # Get instruction from capture or derive from context
130
+ if instruction is None:
131
131
  if capture.task_description:
132
- goal = capture.task_description
132
+ instruction = capture.task_description
133
133
  else:
134
- # Try to derive goal from directory name (e.g., "turn-off-nightshift" -> "Turn off nightshift")
134
+ # Try to derive instruction from directory name (e.g., "turn-off-nightshift" -> "Turn off nightshift")
135
135
  dir_name = capture_path.name
136
136
  if dir_name and dir_name != "capture":
137
137
  # Convert kebab-case/snake_case to readable text
138
- goal = dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
138
+ instruction = dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
139
139
  else:
140
- goal = "Complete the recorded workflow"
140
+ instruction = "Complete the recorded workflow"
141
141
 
142
142
  # Get screen dimensions for coordinate normalization
143
143
  screen_width, screen_height = capture.screen_size
@@ -152,22 +152,21 @@ def capture_to_episode(
152
152
  continue
153
153
 
154
154
  # Save screenshot
155
- image_path = _save_screenshot(screenshot, output_dir, episode_id, idx)
155
+ screenshot_path = _save_screenshot(screenshot, output_dir, episode_id, idx)
156
156
 
157
157
  # Normalize coordinates
158
- norm_x, norm_y = _normalize_coords(
158
+ norm_coords = _normalize_coords(
159
159
  action.x, action.y, screen_width, screen_height
160
160
  )
161
161
 
162
- # Map event type to openadapt-ml action type
162
+ # Map event type to openadapt-ml ActionType
163
163
  event_type = action.type
164
- action_type = EVENT_TYPE_MAP.get(event_type, "click")
164
+ action_type = EVENT_TYPE_MAP.get(event_type, ActionType.CLICK)
165
165
 
166
166
  # Build Action object
167
167
  ml_action = Action(
168
168
  type=action_type,
169
- x=norm_x,
170
- y=norm_y,
169
+ normalized_coordinates=norm_coords,
171
170
  text=action.text,
172
171
  )
173
172
 
@@ -175,34 +174,50 @@ def capture_to_episode(
175
174
  if isinstance(action.event, MouseDragEvent):
176
175
  end_x = action.event.x + action.event.dx
177
176
  end_y = action.event.y + action.event.dy
178
- norm_end_x, norm_end_y = _normalize_coords(
177
+ norm_end = _normalize_coords(
179
178
  end_x, end_y, screen_width, screen_height
180
179
  )
181
- ml_action.raw = {
182
- "end_x": norm_end_x,
183
- "end_y": norm_end_y,
184
- "button": action.event.button,
185
- }
180
+ ml_action = ml_action.model_copy(update={
181
+ "normalized_end": norm_end,
182
+ "raw": {
183
+ "button": action.event.button,
184
+ },
185
+ })
186
186
 
187
187
  # Handle scroll events
188
188
  if isinstance(action.event, MouseScrollEvent):
189
- ml_action.raw = {
190
- "dx": action.event.dx,
191
- "dy": action.event.dy,
192
- }
189
+ # Determine scroll direction from dx/dy
190
+ scroll_direction = None
191
+ if action.event.dy > 0:
192
+ scroll_direction = "down"
193
+ elif action.event.dy < 0:
194
+ scroll_direction = "up"
195
+ elif action.event.dx > 0:
196
+ scroll_direction = "right"
197
+ elif action.event.dx < 0:
198
+ scroll_direction = "left"
199
+
200
+ ml_action = ml_action.model_copy(update={
201
+ "scroll_direction": scroll_direction,
202
+ "raw": {
203
+ "dx": action.event.dx,
204
+ "dy": action.event.dy,
205
+ },
206
+ })
193
207
 
194
208
  # Handle keyboard events - include key names for special keys
195
209
  if action.keys:
196
- if ml_action.raw is None:
197
- ml_action.raw = {}
198
- ml_action.raw["keys"] = action.keys
210
+ raw = ml_action.raw or {}
211
+ raw["keys"] = action.keys
212
+ ml_action = ml_action.model_copy(update={"raw": raw})
199
213
 
200
214
  # Create Step
201
215
  step = Step(
202
- t=action.timestamp - start_time,
203
- observation=Observation(image_path=image_path),
216
+ step_index=idx,
217
+ observation=Observation(screenshot_path=screenshot_path),
204
218
  action=ml_action,
205
- thought=None, # Real recordings don't have thoughts
219
+ reasoning=None, # Real recordings don't have reasoning
220
+ timestamp=action.timestamp - start_time,
206
221
  )
207
222
  steps.append(step)
208
223
 
@@ -211,69 +226,62 @@ def capture_to_episode(
211
226
  # Use the last screenshot for the done action
212
227
  last_step = steps[-1]
213
228
  done_step = Step(
214
- t=last_step.t + 0.1,
215
- observation=Observation(image_path=last_step.observation.image_path),
216
- action=Action(type="done"),
217
- thought="Workflow complete.",
229
+ step_index=len(steps),
230
+ observation=Observation(screenshot_path=last_step.observation.screenshot_path),
231
+ action=Action(type=ActionType.DONE),
232
+ reasoning="Workflow complete.",
233
+ timestamp=(last_step.timestamp or 0) + 0.1,
218
234
  )
219
235
  steps.append(done_step)
220
236
 
221
237
  capture.close()
222
238
 
223
239
  return Episode(
224
- id=episode_id,
225
- goal=goal,
240
+ episode_id=episode_id,
241
+ instruction=instruction,
226
242
  steps=steps,
227
- summary=f"Real recording with {len(steps)} steps",
228
243
  success=True,
229
- workflow_id=capture.id,
244
+ metadata={
245
+ "summary": f"Real recording with {len(steps)} steps",
246
+ "workflow_id": capture.id,
247
+ },
230
248
  )
231
249
 
232
250
 
233
- def capture_to_session(
251
+ def capture_to_episodes(
234
252
  capture_path: str | Path,
235
253
  output_dir: str | Path | None = None,
236
- goal: str | None = None,
237
- session_id: str | None = None,
254
+ instruction: str | None = None,
238
255
  include_moves: bool = False,
239
- ) -> Session:
240
- """Convert an openadapt-capture recording to a Session.
256
+ ) -> list[Episode]:
257
+ """Convert an openadapt-capture recording to a list with one Episode.
258
+
259
+ This is a convenience function that returns episodes as a list for consistency
260
+ with the new schema (which uses list[Episode] instead of Session).
241
261
 
242
262
  Args:
243
263
  capture_path: Path to the capture directory.
244
264
  output_dir: Directory to save extracted screenshots.
245
- goal: Task description/goal for the episode.
246
- session_id: Identifier for the session. If None, generates a UUID.
265
+ instruction: Task description/instruction for the episode.
247
266
  include_moves: Whether to include mouse move events.
248
267
 
249
268
  Returns:
250
- Session containing a single Episode.
269
+ List containing a single Episode.
251
270
  """
252
271
  episode = capture_to_episode(
253
272
  capture_path=capture_path,
254
273
  output_dir=output_dir,
255
- goal=goal,
274
+ instruction=instruction,
256
275
  include_moves=include_moves,
257
276
  )
258
-
259
- if session_id is None:
260
- session_id = f"session_{uuid.uuid4().hex[:8]}"
261
-
262
- return Session(
263
- id=session_id,
264
- episodes=[episode],
265
- meta={
266
- "source": "openadapt-capture",
267
- "capture_path": str(capture_path),
268
- },
269
- )
277
+ return [episode]
270
278
 
271
279
 
272
- def load_captures_as_sessions(
280
+ def load_captures_as_episodes(
273
281
  captures_dir: str | Path,
274
282
  output_dir: str | Path | None = None,
275
283
  include_moves: bool = False,
276
- ) -> list[Session]:
284
+ ) -> list[Episode]:
277
285
  """Load multiple captures from a directory.
278
286
 
279
287
  Scans for subdirectories containing capture.db files.
@@ -284,10 +292,10 @@ def load_captures_as_sessions(
284
292
  include_moves: Whether to include mouse move events.
285
293
 
286
294
  Returns:
287
- List of Sessions, one per capture.
295
+ List of Episodes, one per capture.
288
296
  """
289
297
  captures_dir = Path(captures_dir)
290
- sessions = []
298
+ episodes = []
291
299
 
292
300
  # Find all capture.db files
293
301
  for db_path in captures_dir.glob("**/capture.db"):
@@ -300,13 +308,13 @@ def load_captures_as_sessions(
300
308
  capture_output = None
301
309
 
302
310
  try:
303
- session = capture_to_session(
311
+ episode = capture_to_episode(
304
312
  capture_path=capture_path,
305
313
  output_dir=capture_output,
306
314
  include_moves=include_moves,
307
315
  )
308
- sessions.append(session)
316
+ episodes.append(episode)
309
317
  except Exception as e:
310
318
  print(f"Warning: Failed to load {capture_path}: {e}")
311
319
 
312
- return sessions
320
+ return episodes
@@ -10,8 +10,9 @@ import json
10
10
  from pathlib import Path
11
11
  from typing import Any, Dict, List, Optional, Union
12
12
 
13
- from openadapt_ml.schemas.sessions import Action, Episode, Observation, Step
14
- from openadapt_ml.schemas.validation import validate_episodes, summarize_episodes
13
+ from pydantic import ValidationError
14
+
15
+ from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
15
16
 
16
17
 
17
18
  def load_episodes(
@@ -52,7 +53,7 @@ def load_episodes(
52
53
 
53
54
  if path.is_file():
54
55
  # Single JSON file
55
- episodes = _load_episodes_from_file(path)
56
+ episodes = _load_episodes_from_file(path, validate=validate)
56
57
  elif path.is_dir():
57
58
  # Directory of JSON files
58
59
  json_files = sorted(path.glob("*.json"))
@@ -60,15 +61,15 @@ def load_episodes(
60
61
  raise ValueError(f"No JSON files found in {path}")
61
62
 
62
63
  for json_file in json_files:
63
- file_episodes = _load_episodes_from_file(json_file)
64
+ file_episodes = _load_episodes_from_file(json_file, validate=validate)
64
65
  episodes.extend(file_episodes)
65
66
  else:
66
67
  raise ValueError(f"Path must be a file or directory: {path}")
67
68
 
68
- if validate:
69
- warnings = validate_episodes(episodes, check_images=check_images)
69
+ if check_images:
70
+ warnings = _check_episode_images(episodes)
70
71
  if warnings:
71
- print(f"Validation warnings ({len(warnings)}):")
72
+ print(f"Image warnings ({len(warnings)}):")
72
73
  for w in warnings[:10]: # Show first 10
73
74
  print(f" - {w}")
74
75
  if len(warnings) > 10:
@@ -77,7 +78,21 @@ def load_episodes(
77
78
  return episodes
78
79
 
79
80
 
80
- def _load_episodes_from_file(path: Path) -> List[Episode]:
81
+ def _check_episode_images(episodes: List[Episode]) -> List[str]:
82
+ """Check that all referenced images exist on disk."""
83
+ warnings = []
84
+ for ep in episodes:
85
+ for step in ep.steps:
86
+ if step.observation.screenshot_path:
87
+ if not Path(step.observation.screenshot_path).exists():
88
+ warnings.append(
89
+ f"Episode {ep.episode_id}, step {step.step_index}: "
90
+ f"Image not found: {step.observation.screenshot_path}"
91
+ )
92
+ return warnings
93
+
94
+
95
+ def _load_episodes_from_file(path: Path, validate: bool = True) -> List[Episode]:
81
96
  """Load episodes from a single JSON file."""
82
97
  with open(path, "r") as f:
83
98
  data = json.load(f)
@@ -85,75 +100,119 @@ def _load_episodes_from_file(path: Path) -> List[Episode]:
85
100
  # Handle different JSON structures
86
101
  if isinstance(data, list):
87
102
  # List of episodes
88
- return [_dict_to_episode(ep) for ep in data]
103
+ return [_dict_to_episode(ep, validate=validate) for ep in data]
89
104
  elif isinstance(data, dict):
90
105
  # Single episode or wrapped format
91
106
  if "episodes" in data:
92
- return [_dict_to_episode(ep) for ep in data["episodes"]]
93
- elif "id" in data and "goal" in data:
94
- # Single episode
95
- return [_dict_to_episode(data)]
107
+ return [_dict_to_episode(ep, validate=validate) for ep in data["episodes"]]
108
+ elif "episode_id" in data or "id" in data:
109
+ # Single episode (support both old and new field names)
110
+ return [_dict_to_episode(data, validate=validate)]
96
111
  else:
97
112
  raise ValueError(f"Unrecognized JSON format in {path}")
98
113
  else:
99
114
  raise ValueError(f"Expected list or dict in {path}, got {type(data)}")
100
115
 
101
116
 
102
- def _dict_to_episode(data: Dict[str, Any]) -> Episode:
117
+ def _parse_action_type(type_str: str) -> ActionType:
118
+ """Parse action type string to ActionType enum."""
119
+ # Handle common mappings from old format
120
+ type_map = {
121
+ "unknown": ActionType.CLICK,
122
+ "double_click": ActionType.DOUBLE_CLICK,
123
+ "right_click": ActionType.RIGHT_CLICK,
124
+ "key_press": ActionType.KEY,
125
+ }
126
+
127
+ type_lower = type_str.lower()
128
+ if type_lower in type_map:
129
+ return type_map[type_lower]
130
+
131
+ # Try direct enum lookup
132
+ try:
133
+ return ActionType(type_lower)
134
+ except ValueError:
135
+ # Default to CLICK for unknown types
136
+ return ActionType.CLICK
137
+
138
+
139
+ def _dict_to_episode(data: Dict[str, Any], validate: bool = True) -> Episode:
103
140
  """Convert a dictionary to an Episode object."""
104
141
  steps = []
105
- for step_data in data.get("steps", []):
142
+ for step_idx, step_data in enumerate(data.get("steps", [])):
106
143
  # Parse observation
107
144
  obs_data = step_data.get("observation", {})
108
145
  observation = Observation(
109
- image_path=obs_data.get("image_path"),
110
- meta=obs_data.get("meta"),
111
- accessibility_tree=obs_data.get("accessibility_tree"),
112
- dom_html=obs_data.get("dom_html"),
113
- url=obs_data.get("url"),
146
+ screenshot_path=obs_data.get("screenshot_path") or obs_data.get("image_path"),
147
+ raw=obs_data.get("raw") or obs_data.get("meta"),
148
+ a11y_tree=obs_data.get("a11y_tree") or obs_data.get("accessibility_tree"),
149
+ dom=obs_data.get("dom") or obs_data.get("dom_html"),
114
150
  window_title=obs_data.get("window_title"),
115
- app_name=obs_data.get("app_name"),
116
151
  focused_element=obs_data.get("focused_element"),
117
152
  )
118
153
 
119
154
  # Parse action
120
155
  action_data = step_data.get("action", {})
156
+
157
+ # Handle action type (string -> enum)
158
+ action_type_raw = action_data.get("type", "click")
159
+ action_type = _parse_action_type(action_type_raw)
160
+
161
+ # Handle coordinates: convert x,y to normalized_coordinates tuple
162
+ normalized_coords = None
163
+ if action_data.get("normalized_coordinates"):
164
+ normalized_coords = tuple(action_data["normalized_coordinates"])
165
+ elif action_data.get("x") is not None and action_data.get("y") is not None:
166
+ normalized_coords = (action_data["x"], action_data["y"])
167
+
168
+ # Handle end coordinates for drag actions
169
+ normalized_end = None
170
+ if action_data.get("normalized_end"):
171
+ normalized_end = tuple(action_data["normalized_end"])
172
+ elif action_data.get("end_x") is not None and action_data.get("end_y") is not None:
173
+ normalized_end = (action_data["end_x"], action_data["end_y"])
174
+
121
175
  action = Action(
122
- type=action_data.get("type", "unknown"),
123
- x=action_data.get("x"),
124
- y=action_data.get("y"),
176
+ type=action_type,
177
+ normalized_coordinates=normalized_coords,
178
+ normalized_end=normalized_end,
125
179
  text=action_data.get("text"),
126
180
  raw=action_data.get("raw"),
127
- bbox=tuple(action_data["bbox"]) if action_data.get("bbox") else None,
128
- element_index=action_data.get("element_index"),
129
- target_node_id=action_data.get("target_node_id"),
130
- target_role=action_data.get("target_role"),
131
- target_name=action_data.get("target_name"),
132
181
  key=action_data.get("key"),
133
182
  modifiers=action_data.get("modifiers"),
134
183
  scroll_direction=action_data.get("scroll_direction"),
135
184
  scroll_amount=action_data.get("scroll_amount"),
136
- end_x=action_data.get("end_x"),
137
- end_y=action_data.get("end_y"),
138
- answer=action_data.get("answer"),
139
185
  )
140
186
 
187
+ # Handle step index and timestamp
188
+ step_index = step_data.get("step_index", step_idx)
189
+ timestamp = step_data.get("timestamp") or step_data.get("t")
190
+
141
191
  step = Step(
142
- t=step_data.get("t", 0.0),
192
+ step_index=step_index,
143
193
  observation=observation,
144
194
  action=action,
145
- thought=step_data.get("thought"),
195
+ reasoning=step_data.get("reasoning") or step_data.get("thought"),
196
+ timestamp=timestamp,
146
197
  )
147
198
  steps.append(step)
148
199
 
149
- return Episode(
150
- id=data.get("id", "unknown"),
151
- goal=data.get("goal", ""),
152
- steps=steps,
153
- summary=data.get("summary"),
154
- success=data.get("success"),
155
- workflow_id=data.get("workflow_id"),
156
- )
200
+ # Build episode with field mapping (old -> new)
201
+ episode_data = {
202
+ "episode_id": data.get("episode_id") or data.get("id", "unknown"),
203
+ "instruction": data.get("instruction") or data.get("goal", ""),
204
+ "steps": steps,
205
+ "success": data.get("success"),
206
+ "metadata": {
207
+ "summary": data.get("summary"),
208
+ "workflow_id": data.get("workflow_id"),
209
+ },
210
+ }
211
+
212
+ if validate:
213
+ return Episode.model_validate(episode_data)
214
+ else:
215
+ return Episode(**episode_data)
157
216
 
158
217
 
159
218
  def save_episodes(
@@ -178,9 +237,9 @@ def save_episodes(
178
237
 
179
238
  with open(path, "w") as f:
180
239
  if pretty:
181
- json.dump(data, f, indent=2)
240
+ json.dump(data, f, indent=2, default=str)
182
241
  else:
183
- json.dump(data, f)
242
+ json.dump(data, f, default=str)
184
243
 
185
244
 
186
245
  def _episode_to_dict(episode: Episode) -> Dict[str, Any]:
@@ -188,45 +247,34 @@ def _episode_to_dict(episode: Episode) -> Dict[str, Any]:
188
247
  steps = []
189
248
  for step in episode.steps:
190
249
  step_dict = {
191
- "t": step.t,
250
+ "step_index": step.step_index,
251
+ "timestamp": step.timestamp,
192
252
  "observation": {
193
- "image_path": step.observation.image_path,
194
- "meta": step.observation.meta,
195
- "accessibility_tree": step.observation.accessibility_tree,
196
- "dom_html": step.observation.dom_html,
197
- "url": step.observation.url,
253
+ "screenshot_path": step.observation.screenshot_path,
254
+ "raw": step.observation.raw,
255
+ "a11y_tree": step.observation.a11y_tree,
256
+ "dom": step.observation.dom,
198
257
  "window_title": step.observation.window_title,
199
- "app_name": step.observation.app_name,
200
- "focused_element": step.observation.focused_element,
201
258
  },
202
259
  "action": {
203
- "type": step.action.type,
204
- "x": step.action.x,
205
- "y": step.action.y,
260
+ "type": step.action.type.value,
261
+ "normalized_coordinates": step.action.normalized_coordinates,
262
+ "normalized_end": step.action.normalized_end,
206
263
  "text": step.action.text,
207
264
  "raw": step.action.raw,
208
- "bbox": list(step.action.bbox) if step.action.bbox else None,
209
- "element_index": step.action.element_index,
210
- "target_node_id": step.action.target_node_id,
211
- "target_role": step.action.target_role,
212
- "target_name": step.action.target_name,
213
265
  "key": step.action.key,
214
266
  "modifiers": step.action.modifiers,
215
267
  "scroll_direction": step.action.scroll_direction,
216
268
  "scroll_amount": step.action.scroll_amount,
217
- "end_x": step.action.end_x,
218
- "end_y": step.action.end_y,
219
- "answer": step.action.answer,
220
269
  },
221
- "thought": step.thought,
270
+ "reasoning": step.reasoning,
222
271
  }
223
272
  steps.append(step_dict)
224
273
 
225
274
  return {
226
- "id": episode.id,
227
- "goal": episode.goal,
275
+ "episode_id": episode.episode_id,
276
+ "instruction": episode.instruction,
228
277
  "steps": steps,
229
- "summary": episode.summary,
230
278
  "success": episode.success,
231
- "workflow_id": episode.workflow_id,
279
+ "metadata": episode.metadata,
232
280
  }