openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -107
  8. openadapt_ml/benchmarks/agent.py +297 -374
  9. openadapt_ml/benchmarks/azure.py +62 -24
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1874 -751
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +1236 -0
  14. openadapt_ml/benchmarks/vm_monitor.py +1111 -0
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
  16. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  17. openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
  18. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  19. openadapt_ml/cloud/azure_inference.py +3 -5
  20. openadapt_ml/cloud/lambda_labs.py +722 -307
  21. openadapt_ml/cloud/local.py +3194 -89
  22. openadapt_ml/cloud/ssh_tunnel.py +595 -0
  23. openadapt_ml/datasets/next_action.py +125 -96
  24. openadapt_ml/evals/grounding.py +32 -9
  25. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  26. openadapt_ml/evals/trajectory_matching.py +120 -57
  27. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  28. openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
  29. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  30. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  31. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  32. openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
  33. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  34. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  35. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  36. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  37. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  38. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  39. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  40. openadapt_ml/experiments/waa_demo/runner.py +732 -0
  41. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  42. openadapt_ml/export/__init__.py +9 -0
  43. openadapt_ml/export/__main__.py +6 -0
  44. openadapt_ml/export/cli.py +89 -0
  45. openadapt_ml/export/parquet.py +277 -0
  46. openadapt_ml/grounding/detector.py +18 -14
  47. openadapt_ml/ingest/__init__.py +11 -10
  48. openadapt_ml/ingest/capture.py +97 -86
  49. openadapt_ml/ingest/loader.py +120 -69
  50. openadapt_ml/ingest/synthetic.py +344 -193
  51. openadapt_ml/models/api_adapter.py +14 -4
  52. openadapt_ml/models/base_adapter.py +10 -2
  53. openadapt_ml/models/providers/__init__.py +288 -0
  54. openadapt_ml/models/providers/anthropic.py +266 -0
  55. openadapt_ml/models/providers/base.py +299 -0
  56. openadapt_ml/models/providers/google.py +376 -0
  57. openadapt_ml/models/providers/openai.py +342 -0
  58. openadapt_ml/models/qwen_vl.py +46 -19
  59. openadapt_ml/perception/__init__.py +35 -0
  60. openadapt_ml/perception/integration.py +399 -0
  61. openadapt_ml/retrieval/README.md +226 -0
  62. openadapt_ml/retrieval/USAGE.md +391 -0
  63. openadapt_ml/retrieval/__init__.py +91 -0
  64. openadapt_ml/retrieval/demo_retriever.py +843 -0
  65. openadapt_ml/retrieval/embeddings.py +630 -0
  66. openadapt_ml/retrieval/index.py +194 -0
  67. openadapt_ml/retrieval/retriever.py +162 -0
  68. openadapt_ml/runtime/__init__.py +50 -0
  69. openadapt_ml/runtime/policy.py +27 -14
  70. openadapt_ml/runtime/safety_gate.py +471 -0
  71. openadapt_ml/schema/__init__.py +113 -0
  72. openadapt_ml/schema/converters.py +588 -0
  73. openadapt_ml/schema/episode.py +470 -0
  74. openadapt_ml/scripts/capture_screenshots.py +530 -0
  75. openadapt_ml/scripts/compare.py +102 -61
  76. openadapt_ml/scripts/demo_policy.py +4 -1
  77. openadapt_ml/scripts/eval_policy.py +19 -14
  78. openadapt_ml/scripts/make_gif.py +1 -1
  79. openadapt_ml/scripts/prepare_synthetic.py +16 -17
  80. openadapt_ml/scripts/train.py +98 -75
  81. openadapt_ml/segmentation/README.md +920 -0
  82. openadapt_ml/segmentation/__init__.py +97 -0
  83. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  84. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  85. openadapt_ml/segmentation/annotator.py +610 -0
  86. openadapt_ml/segmentation/cache.py +290 -0
  87. openadapt_ml/segmentation/cli.py +674 -0
  88. openadapt_ml/segmentation/deduplicator.py +656 -0
  89. openadapt_ml/segmentation/frame_describer.py +788 -0
  90. openadapt_ml/segmentation/pipeline.py +340 -0
  91. openadapt_ml/segmentation/schemas.py +622 -0
  92. openadapt_ml/segmentation/segment_extractor.py +634 -0
  93. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  94. openadapt_ml/training/benchmark_viewer.py +3255 -19
  95. openadapt_ml/training/shared_ui.py +7 -7
  96. openadapt_ml/training/stub_provider.py +57 -35
  97. openadapt_ml/training/trainer.py +255 -441
  98. openadapt_ml/training/trl_trainer.py +403 -0
  99. openadapt_ml/training/viewer.py +323 -108
  100. openadapt_ml/training/viewer_components.py +180 -0
  101. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
  102. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  103. openadapt_ml/benchmarks/base.py +0 -366
  104. openadapt_ml/benchmarks/data_collection.py +0 -432
  105. openadapt_ml/benchmarks/runner.py +0 -381
  106. openadapt_ml/benchmarks/waa.py +0 -704
  107. openadapt_ml/schemas/__init__.py +0 -53
  108. openadapt_ml/schemas/sessions.py +0 -122
  109. openadapt_ml/schemas/validation.py +0 -252
  110. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  111. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  112. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,588 @@
1
+ """
2
+ Converters for benchmark-specific episode formats.
3
+
4
+ Supported formats:
5
+ - WAA (Windows Agent Arena)
6
+ - WebArena (coming soon)
7
+ - OSWorld (coming soon)
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import re
14
+ from pathlib import Path
15
+ from typing import Any, Optional, Union
16
+
17
+ from openadapt_ml.schema.episode import (
18
+ Action,
19
+ ActionType,
20
+ BenchmarkSource,
21
+ Coordinates,
22
+ Episode,
23
+ Observation,
24
+ Step,
25
+ UIElement,
26
+ )
27
+
28
+
29
+ # ============================================================================
30
+ # WAA (Windows Agent Arena) Converter
31
+ # ============================================================================
32
+
33
+
34
+ def _parse_waa_action(action_str: str) -> tuple[ActionType, dict[str, Any]]:
35
+ """Parse WAA action string into ActionType and parameters.
36
+
37
+ WAA action format examples:
38
+ - pyautogui.click(100, 200)
39
+ - pyautogui.write('hello')
40
+ - pyautogui.press('enter')
41
+ - pyautogui.hotkey('ctrl', 'c')
42
+ - pyautogui.scroll(3)
43
+ - DONE
44
+ - FAIL
45
+ """
46
+ action_str = action_str.strip()
47
+
48
+ # Meta actions
49
+ if action_str == "DONE":
50
+ return ActionType.DONE, {}
51
+ if action_str == "FAIL":
52
+ return ActionType.FAIL, {}
53
+
54
+ # Parse pyautogui calls
55
+ match = re.match(r"pyautogui\.(\w+)\((.*)\)", action_str)
56
+ if not match:
57
+ # Try without pyautogui prefix
58
+ match = re.match(r"(\w+)\((.*)\)", action_str)
59
+
60
+ if match:
61
+ func_name = match.group(1).lower()
62
+ args_str = match.group(2)
63
+
64
+ # Parse arguments (handle strings with commas inside)
65
+ args = []
66
+ current_arg = ""
67
+ in_string = False
68
+ string_char = None
69
+
70
+ for char in args_str:
71
+ if char in "'\"" and not in_string:
72
+ in_string = True
73
+ string_char = char
74
+ elif char == string_char and in_string:
75
+ in_string = False
76
+ string_char = None
77
+ elif char == "," and not in_string:
78
+ if current_arg.strip():
79
+ args.append(current_arg.strip())
80
+ current_arg = ""
81
+ continue
82
+ current_arg += char
83
+
84
+ if current_arg.strip():
85
+ args.append(current_arg.strip())
86
+
87
+ # Clean up string arguments
88
+ cleaned_args = []
89
+ for arg in args:
90
+ arg = arg.strip()
91
+ if (arg.startswith("'") and arg.endswith("'")) or (
92
+ arg.startswith('"') and arg.endswith('"')
93
+ ):
94
+ cleaned_args.append(arg[1:-1])
95
+ else:
96
+ try:
97
+ cleaned_args.append(int(arg))
98
+ except ValueError:
99
+ try:
100
+ cleaned_args.append(float(arg))
101
+ except ValueError:
102
+ cleaned_args.append(arg)
103
+
104
+ # Map function to action type
105
+ if func_name == "click":
106
+ params = {}
107
+ if len(cleaned_args) >= 2:
108
+ params["coordinates"] = Coordinates(
109
+ x=int(cleaned_args[0]), y=int(cleaned_args[1])
110
+ )
111
+ return ActionType.CLICK, params
112
+
113
+ elif func_name == "doubleclick":
114
+ params = {}
115
+ if len(cleaned_args) >= 2:
116
+ params["coordinates"] = Coordinates(
117
+ x=int(cleaned_args[0]), y=int(cleaned_args[1])
118
+ )
119
+ return ActionType.DOUBLE_CLICK, params
120
+
121
+ elif func_name == "rightclick":
122
+ params = {}
123
+ if len(cleaned_args) >= 2:
124
+ params["coordinates"] = Coordinates(
125
+ x=int(cleaned_args[0]), y=int(cleaned_args[1])
126
+ )
127
+ return ActionType.RIGHT_CLICK, params
128
+
129
+ elif func_name in ("write", "typewrite"):
130
+ return ActionType.TYPE, {"text": cleaned_args[0] if cleaned_args else ""}
131
+
132
+ elif func_name == "press":
133
+ return ActionType.KEY, {"key": cleaned_args[0] if cleaned_args else ""}
134
+
135
+ elif func_name == "hotkey":
136
+ if len(cleaned_args) >= 2:
137
+ return ActionType.HOTKEY, {
138
+ "key": cleaned_args[-1],
139
+ "modifiers": list(cleaned_args[:-1]),
140
+ }
141
+ return ActionType.KEY, {"key": cleaned_args[0] if cleaned_args else ""}
142
+
143
+ elif func_name == "scroll":
144
+ amount = cleaned_args[0] if cleaned_args else 0
145
+ direction = "up" if amount > 0 else "down"
146
+ return ActionType.SCROLL, {
147
+ "scroll_direction": direction,
148
+ "scroll_amount": abs(int(amount)) * 100, # Convert to pixels
149
+ }
150
+
151
+ elif func_name == "moveto":
152
+ params = {}
153
+ if len(cleaned_args) >= 2:
154
+ params["coordinates"] = Coordinates(
155
+ x=int(cleaned_args[0]), y=int(cleaned_args[1])
156
+ )
157
+ return ActionType.HOVER, params
158
+
159
+ elif func_name == "drag" or func_name == "dragto":
160
+ params = {}
161
+ if len(cleaned_args) >= 2:
162
+ params["end_coordinates"] = Coordinates(
163
+ x=int(cleaned_args[0]), y=int(cleaned_args[1])
164
+ )
165
+ return ActionType.DRAG, params
166
+
167
+ # Fallback - treat as raw text if nothing matched
168
+ return ActionType.TYPE, {"text": action_str, "raw": {"original": action_str}}
169
+
170
+
171
+ def from_waa_trajectory(
172
+ trajectory: list[dict[str, Any]],
173
+ task_info: dict[str, Any],
174
+ episode_id: Optional[str] = None,
175
+ ) -> Episode:
176
+ """Convert WAA trajectory format to Episode.
177
+
178
+ Args:
179
+ trajectory: List of WAA step dictionaries with keys like:
180
+ - screenshot_path: Path to screenshot
181
+ - action: Action string (pyautogui format)
182
+ - a11y_tree: Accessibility tree (optional)
183
+ - thought: Agent reasoning (optional)
184
+ task_info: Task metadata with keys like:
185
+ - id: Task ID
186
+ - instruction: Task instruction
187
+ - domain: Task domain (file_explorer, etc.)
188
+
189
+ Returns:
190
+ Episode instance
191
+ """
192
+ steps = []
193
+
194
+ for i, step_data in enumerate(trajectory):
195
+ # Parse observation
196
+ observation = Observation(
197
+ screenshot_path=step_data.get("screenshot_path"),
198
+ a11y_tree=step_data.get("a11y_tree"),
199
+ window_title=step_data.get("window_title"),
200
+ raw=step_data.get("observation_raw"),
201
+ )
202
+
203
+ # Parse action
204
+ action_str = step_data.get("action", "")
205
+ action_type, action_params = _parse_waa_action(action_str)
206
+
207
+ action = Action(
208
+ type=action_type,
209
+ raw={"original": action_str},
210
+ **action_params,
211
+ )
212
+
213
+ # Create step
214
+ step = Step(
215
+ step_index=i,
216
+ observation=observation,
217
+ action=action,
218
+ reasoning=step_data.get("thought") or step_data.get("reasoning"),
219
+ reward=step_data.get("reward"),
220
+ done=step_data.get("done"),
221
+ )
222
+ steps.append(step)
223
+
224
+ # Extract task info
225
+ task_id = task_info.get("id") or task_info.get("task_id")
226
+ instruction = task_info.get("instruction") or task_info.get("goal", "")
227
+
228
+ if episode_id is None:
229
+ episode_id = f"waa_{task_id}" if task_id else f"waa_episode_{id(trajectory)}"
230
+
231
+ return Episode(
232
+ episode_id=episode_id,
233
+ task_id=task_id,
234
+ instruction=instruction,
235
+ steps=steps,
236
+ success=task_info.get("success"),
237
+ source=BenchmarkSource.WAA,
238
+ metadata={
239
+ "domain": task_info.get("domain"),
240
+ "difficulty": task_info.get("difficulty"),
241
+ **{
242
+ k: v
243
+ for k, v in task_info.items()
244
+ if k
245
+ not in [
246
+ "id",
247
+ "task_id",
248
+ "instruction",
249
+ "goal",
250
+ "success",
251
+ "domain",
252
+ "difficulty",
253
+ ]
254
+ },
255
+ },
256
+ )
257
+
258
+
259
+ def to_waa_trajectory(episode: Episode) -> tuple[list[dict[str, Any]], dict[str, Any]]:
260
+ """Convert Episode to WAA trajectory format.
261
+
262
+ Args:
263
+ episode: Episode instance
264
+
265
+ Returns:
266
+ Tuple of (trajectory, task_info)
267
+ """
268
+ trajectory = []
269
+
270
+ for step in episode.steps:
271
+ step_data = {
272
+ "screenshot_path": step.observation.screenshot_path,
273
+ "a11y_tree": step.observation.a11y_tree,
274
+ "window_title": step.observation.window_title,
275
+ }
276
+
277
+ # Convert action back to pyautogui format
278
+ action = step.action
279
+ if action.raw and "original" in action.raw:
280
+ step_data["action"] = action.raw["original"]
281
+ else:
282
+ step_data["action"] = _action_to_pyautogui(action)
283
+
284
+ if step.reasoning:
285
+ step_data["thought"] = step.reasoning
286
+
287
+ if step.reward is not None:
288
+ step_data["reward"] = step.reward
289
+
290
+ if step.done is not None:
291
+ step_data["done"] = step.done
292
+
293
+ trajectory.append(step_data)
294
+
295
+ task_info = {
296
+ "id": episode.task_id,
297
+ "instruction": episode.instruction,
298
+ "success": episode.success,
299
+ }
300
+
301
+ if episode.metadata:
302
+ task_info.update(episode.metadata)
303
+
304
+ return trajectory, task_info
305
+
306
+
307
+ def _action_to_pyautogui(action: Action) -> str:
308
+ """Convert Action to pyautogui string format."""
309
+ if action.type == ActionType.DONE:
310
+ return "DONE"
311
+ if action.type == ActionType.FAIL:
312
+ return "FAIL"
313
+
314
+ if action.type == ActionType.CLICK:
315
+ if action.coordinates:
316
+ return f"pyautogui.click({action.coordinates.x}, {action.coordinates.y})"
317
+ return "pyautogui.click()"
318
+
319
+ if action.type == ActionType.DOUBLE_CLICK:
320
+ if action.coordinates:
321
+ return (
322
+ f"pyautogui.doubleClick({action.coordinates.x}, {action.coordinates.y})"
323
+ )
324
+ return "pyautogui.doubleClick()"
325
+
326
+ if action.type == ActionType.RIGHT_CLICK:
327
+ if action.coordinates:
328
+ return (
329
+ f"pyautogui.rightClick({action.coordinates.x}, {action.coordinates.y})"
330
+ )
331
+ return "pyautogui.rightClick()"
332
+
333
+ if action.type == ActionType.TYPE:
334
+ text = action.text or ""
335
+ # Escape single quotes
336
+ text = text.replace("'", "\\'")
337
+ return f"pyautogui.write('{text}')"
338
+
339
+ if action.type == ActionType.KEY:
340
+ return f"pyautogui.press('{action.key}')"
341
+
342
+ if action.type == ActionType.HOTKEY:
343
+ modifiers = action.modifiers or []
344
+ keys = modifiers + [action.key]
345
+ keys_str = ", ".join(f"'{k}'" for k in keys)
346
+ return f"pyautogui.hotkey({keys_str})"
347
+
348
+ if action.type == ActionType.SCROLL:
349
+ amount = action.scroll_amount or 100
350
+ if action.scroll_direction in ("down", "right"):
351
+ amount = -amount
352
+ return f"pyautogui.scroll({amount // 100})"
353
+
354
+ if action.type == ActionType.HOVER:
355
+ if action.coordinates:
356
+ return f"pyautogui.moveTo({action.coordinates.x}, {action.coordinates.y})"
357
+ return "pyautogui.moveTo()"
358
+
359
+ if action.type == ActionType.DRAG:
360
+ if action.end_coordinates:
361
+ return f"pyautogui.dragTo({action.end_coordinates.x}, {action.end_coordinates.y})"
362
+ return "pyautogui.drag()"
363
+
364
+ return f"# Unknown action: {action.type}"
365
+
366
+
367
+ # ============================================================================
368
+ # Internal Format Converter (openadapt_ml.schemas.sessions)
369
+ # ============================================================================
370
+
371
+
372
+ def from_internal_episode(
373
+ internal_episode: Any,
374
+ episode_id: Optional[str] = None,
375
+ ) -> Episode:
376
+ """Convert from internal training format (openadapt_ml.schemas.sessions.Episode).
377
+
378
+ This converts from the dataclass-based format used by the training pipeline
379
+ to the Pydantic-based Episode format used for external interoperability.
380
+
381
+ Args:
382
+ internal_episode: An openadapt_ml.schemas.sessions.Episode instance
383
+ episode_id: Override episode ID (defaults to internal_episode.id)
384
+
385
+ Returns:
386
+ Episode instance in the new format
387
+ """
388
+ steps = []
389
+ for i, step in enumerate(internal_episode.steps):
390
+ # Convert observation
391
+ obs = Observation(
392
+ screenshot_path=step.observation.image_path,
393
+ a11y_tree=step.observation.accessibility_tree,
394
+ dom=step.observation.dom_html,
395
+ window_title=step.observation.window_title,
396
+ raw=step.observation.meta,
397
+ )
398
+
399
+ # Convert action - note: internal format uses normalized coords in x/y
400
+ action_type_map = {
401
+ "click": ActionType.CLICK,
402
+ "double_click": ActionType.DOUBLE_CLICK,
403
+ "right_click": ActionType.RIGHT_CLICK,
404
+ "drag": ActionType.DRAG,
405
+ "scroll": ActionType.SCROLL,
406
+ "type": ActionType.TYPE,
407
+ "key": ActionType.KEY,
408
+ "wait": ActionType.WAIT,
409
+ "done": ActionType.DONE,
410
+ "failed": ActionType.FAIL,
411
+ "answer": ActionType.DONE, # Map answer to done
412
+ }
413
+ action_type = action_type_map.get(step.action.type, ActionType.CLICK)
414
+
415
+ action = Action(
416
+ type=action_type,
417
+ # Store normalized coords from internal format
418
+ normalized_coordinates=(step.action.x, step.action.y)
419
+ if step.action.x is not None and step.action.y is not None
420
+ else None,
421
+ text=step.action.text,
422
+ key=step.action.key,
423
+ modifiers=step.action.modifiers,
424
+ scroll_direction=step.action.scroll_direction,
425
+ scroll_amount=int(step.action.scroll_amount)
426
+ if step.action.scroll_amount
427
+ else None,
428
+ normalized_end=(step.action.end_x, step.action.end_y)
429
+ if step.action.end_x is not None and step.action.end_y is not None
430
+ else None,
431
+ element=UIElement(
432
+ element_id=step.action.target_node_id,
433
+ role=step.action.target_role,
434
+ name=step.action.target_name,
435
+ )
436
+ if step.action.target_node_id
437
+ else None,
438
+ raw=step.action.raw,
439
+ )
440
+
441
+ steps.append(
442
+ Step(
443
+ step_index=i,
444
+ observation=obs,
445
+ action=action,
446
+ reasoning=step.thought,
447
+ timestamp=step.t,
448
+ )
449
+ )
450
+
451
+ return Episode(
452
+ episode_id=episode_id or internal_episode.id,
453
+ instruction=internal_episode.goal,
454
+ steps=steps,
455
+ success=internal_episode.success,
456
+ metadata={
457
+ "workflow_id": internal_episode.workflow_id,
458
+ "summary": internal_episode.summary,
459
+ }
460
+ if internal_episode.workflow_id or internal_episode.summary
461
+ else None,
462
+ )
463
+
464
+
465
+ def to_internal_episode(episode: Episode) -> dict:
466
+ """Convert Episode to internal training format (as dict).
467
+
468
+ Returns a dict matching openadapt_ml.schemas.sessions.Episode structure.
469
+ The caller can construct the dataclass from this dict.
470
+
471
+ Args:
472
+ episode: Episode in new format
473
+
474
+ Returns:
475
+ Dict matching internal Episode structure
476
+ """
477
+ steps = []
478
+ for step in episode.steps:
479
+ # Get normalized coordinates
480
+ norm_x, norm_y = None, None
481
+ if step.action.normalized_coordinates:
482
+ norm_x, norm_y = step.action.normalized_coordinates
483
+ elif step.action.coordinates:
484
+ # Can't convert pixel to normalized without screen size
485
+ # Store in raw for reference
486
+ pass
487
+
488
+ step_dict = {
489
+ "t": step.timestamp or float(step.step_index),
490
+ "observation": {
491
+ "image_path": step.observation.screenshot_path,
492
+ "accessibility_tree": step.observation.a11y_tree,
493
+ "dom_html": step.observation.dom,
494
+ "window_title": step.observation.window_title,
495
+ "meta": step.observation.raw,
496
+ },
497
+ "action": {
498
+ "type": step.action.type.value,
499
+ "x": norm_x,
500
+ "y": norm_y,
501
+ "text": step.action.text,
502
+ "key": step.action.key,
503
+ "modifiers": step.action.modifiers,
504
+ "scroll_direction": step.action.scroll_direction,
505
+ "scroll_amount": step.action.scroll_amount,
506
+ "end_x": step.action.normalized_end[0]
507
+ if step.action.normalized_end
508
+ else None,
509
+ "end_y": step.action.normalized_end[1]
510
+ if step.action.normalized_end
511
+ else None,
512
+ "target_node_id": step.action.element.element_id
513
+ if step.action.element
514
+ else None,
515
+ "target_role": step.action.element.role
516
+ if step.action.element
517
+ else None,
518
+ "target_name": step.action.element.name
519
+ if step.action.element
520
+ else None,
521
+ "raw": step.action.raw,
522
+ },
523
+ "thought": step.reasoning,
524
+ }
525
+ steps.append(step_dict)
526
+
527
+ return {
528
+ "id": episode.episode_id,
529
+ "goal": episode.instruction,
530
+ "steps": steps,
531
+ "success": episode.success,
532
+ "workflow_id": episode.metadata.get("workflow_id")
533
+ if episode.metadata
534
+ else None,
535
+ "summary": episode.metadata.get("summary") if episode.metadata else None,
536
+ }
537
+
538
+
539
+ def load_waa_result(result_dir: Union[str, Path]) -> Episode:
540
+ """Load episode from WAA result directory.
541
+
542
+ WAA result directories contain:
543
+ - result.txt: Final score
544
+ - trajectory.json or similar: Step-by-step data
545
+
546
+ Args:
547
+ result_dir: Path to WAA result directory
548
+
549
+ Returns:
550
+ Episode instance
551
+ """
552
+ result_dir = Path(result_dir)
553
+
554
+ # Try to find trajectory file
555
+ trajectory_files = list(result_dir.glob("*trajectory*.json")) + list(
556
+ result_dir.glob("*steps*.json")
557
+ )
558
+
559
+ trajectory = []
560
+ task_info = {}
561
+
562
+ if trajectory_files:
563
+ with open(trajectory_files[0]) as f:
564
+ data = json.load(f)
565
+ if isinstance(data, list):
566
+ trajectory = data
567
+ elif isinstance(data, dict):
568
+ trajectory = data.get("steps", data.get("trajectory", []))
569
+ task_info = {
570
+ k: v for k, v in data.items() if k not in ["steps", "trajectory"]
571
+ }
572
+
573
+ # Try to read result
574
+ result_file = result_dir / "result.txt"
575
+ if result_file.exists():
576
+ with open(result_file) as f:
577
+ result_str = f.read().strip()
578
+ try:
579
+ task_info["success"] = float(result_str) > 0
580
+ except ValueError:
581
+ pass
582
+
583
+ # Try to get task info from parent directory name
584
+ task_id = result_dir.name
585
+ if task_id and "task_id" not in task_info:
586
+ task_info["task_id"] = task_id
587
+
588
+ return from_waa_trajectory(trajectory, task_info, episode_id=f"waa_{task_id}")