openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -107
- openadapt_ml/benchmarks/agent.py +297 -374
- openadapt_ml/benchmarks/azure.py +62 -24
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1874 -751
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +1236 -0
- openadapt_ml/benchmarks/vm_monitor.py +1111 -0
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
- openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
- openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +3194 -89
- openadapt_ml/cloud/ssh_tunnel.py +595 -0
- openadapt_ml/datasets/next_action.py +125 -96
- openadapt_ml/evals/grounding.py +32 -9
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +120 -57
- openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
- openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
- openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
- openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/__init__.py +10 -0
- openadapt_ml/experiments/waa_demo/demos.py +357 -0
- openadapt_ml/experiments/waa_demo/runner.py +732 -0
- openadapt_ml/experiments/waa_demo/tasks.py +151 -0
- openadapt_ml/export/__init__.py +9 -0
- openadapt_ml/export/__main__.py +6 -0
- openadapt_ml/export/cli.py +89 -0
- openadapt_ml/export/parquet.py +277 -0
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +11 -10
- openadapt_ml/ingest/capture.py +97 -86
- openadapt_ml/ingest/loader.py +120 -69
- openadapt_ml/ingest/synthetic.py +344 -193
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/README.md +226 -0
- openadapt_ml/retrieval/USAGE.md +391 -0
- openadapt_ml/retrieval/__init__.py +91 -0
- openadapt_ml/retrieval/demo_retriever.py +843 -0
- openadapt_ml/retrieval/embeddings.py +630 -0
- openadapt_ml/retrieval/index.py +194 -0
- openadapt_ml/retrieval/retriever.py +162 -0
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +27 -14
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +113 -0
- openadapt_ml/schema/converters.py +588 -0
- openadapt_ml/schema/episode.py +470 -0
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +102 -61
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +19 -14
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +16 -17
- openadapt_ml/scripts/train.py +98 -75
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +3255 -19
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +255 -441
- openadapt_ml/training/trl_trainer.py +403 -0
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
- openadapt_ml-0.2.1.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/runner.py +0 -381
- openadapt_ml/benchmarks/waa.py +0 -704
- openadapt_ml/schemas/__init__.py +0 -53
- openadapt_ml/schemas/sessions.py +0 -122
- openadapt_ml/schemas/validation.py +0 -252
- openadapt_ml-0.1.0.dist-info/RECORD +0 -55
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,588 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Converters for benchmark-specific episode formats.
|
|
3
|
+
|
|
4
|
+
Supported formats:
|
|
5
|
+
- WAA (Windows Agent Arena)
|
|
6
|
+
- WebArena (coming soon)
|
|
7
|
+
- OSWorld (coming soon)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import re
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Optional, Union
|
|
16
|
+
|
|
17
|
+
from openadapt_ml.schema.episode import (
|
|
18
|
+
Action,
|
|
19
|
+
ActionType,
|
|
20
|
+
BenchmarkSource,
|
|
21
|
+
Coordinates,
|
|
22
|
+
Episode,
|
|
23
|
+
Observation,
|
|
24
|
+
Step,
|
|
25
|
+
UIElement,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# ============================================================================
|
|
30
|
+
# WAA (Windows Agent Arena) Converter
|
|
31
|
+
# ============================================================================
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _parse_waa_action(action_str: str) -> tuple[ActionType, dict[str, Any]]:
|
|
35
|
+
"""Parse WAA action string into ActionType and parameters.
|
|
36
|
+
|
|
37
|
+
WAA action format examples:
|
|
38
|
+
- pyautogui.click(100, 200)
|
|
39
|
+
- pyautogui.write('hello')
|
|
40
|
+
- pyautogui.press('enter')
|
|
41
|
+
- pyautogui.hotkey('ctrl', 'c')
|
|
42
|
+
- pyautogui.scroll(3)
|
|
43
|
+
- DONE
|
|
44
|
+
- FAIL
|
|
45
|
+
"""
|
|
46
|
+
action_str = action_str.strip()
|
|
47
|
+
|
|
48
|
+
# Meta actions
|
|
49
|
+
if action_str == "DONE":
|
|
50
|
+
return ActionType.DONE, {}
|
|
51
|
+
if action_str == "FAIL":
|
|
52
|
+
return ActionType.FAIL, {}
|
|
53
|
+
|
|
54
|
+
# Parse pyautogui calls
|
|
55
|
+
match = re.match(r"pyautogui\.(\w+)\((.*)\)", action_str)
|
|
56
|
+
if not match:
|
|
57
|
+
# Try without pyautogui prefix
|
|
58
|
+
match = re.match(r"(\w+)\((.*)\)", action_str)
|
|
59
|
+
|
|
60
|
+
if match:
|
|
61
|
+
func_name = match.group(1).lower()
|
|
62
|
+
args_str = match.group(2)
|
|
63
|
+
|
|
64
|
+
# Parse arguments (handle strings with commas inside)
|
|
65
|
+
args = []
|
|
66
|
+
current_arg = ""
|
|
67
|
+
in_string = False
|
|
68
|
+
string_char = None
|
|
69
|
+
|
|
70
|
+
for char in args_str:
|
|
71
|
+
if char in "'\"" and not in_string:
|
|
72
|
+
in_string = True
|
|
73
|
+
string_char = char
|
|
74
|
+
elif char == string_char and in_string:
|
|
75
|
+
in_string = False
|
|
76
|
+
string_char = None
|
|
77
|
+
elif char == "," and not in_string:
|
|
78
|
+
if current_arg.strip():
|
|
79
|
+
args.append(current_arg.strip())
|
|
80
|
+
current_arg = ""
|
|
81
|
+
continue
|
|
82
|
+
current_arg += char
|
|
83
|
+
|
|
84
|
+
if current_arg.strip():
|
|
85
|
+
args.append(current_arg.strip())
|
|
86
|
+
|
|
87
|
+
# Clean up string arguments
|
|
88
|
+
cleaned_args = []
|
|
89
|
+
for arg in args:
|
|
90
|
+
arg = arg.strip()
|
|
91
|
+
if (arg.startswith("'") and arg.endswith("'")) or (
|
|
92
|
+
arg.startswith('"') and arg.endswith('"')
|
|
93
|
+
):
|
|
94
|
+
cleaned_args.append(arg[1:-1])
|
|
95
|
+
else:
|
|
96
|
+
try:
|
|
97
|
+
cleaned_args.append(int(arg))
|
|
98
|
+
except ValueError:
|
|
99
|
+
try:
|
|
100
|
+
cleaned_args.append(float(arg))
|
|
101
|
+
except ValueError:
|
|
102
|
+
cleaned_args.append(arg)
|
|
103
|
+
|
|
104
|
+
# Map function to action type
|
|
105
|
+
if func_name == "click":
|
|
106
|
+
params = {}
|
|
107
|
+
if len(cleaned_args) >= 2:
|
|
108
|
+
params["coordinates"] = Coordinates(
|
|
109
|
+
x=int(cleaned_args[0]), y=int(cleaned_args[1])
|
|
110
|
+
)
|
|
111
|
+
return ActionType.CLICK, params
|
|
112
|
+
|
|
113
|
+
elif func_name == "doubleclick":
|
|
114
|
+
params = {}
|
|
115
|
+
if len(cleaned_args) >= 2:
|
|
116
|
+
params["coordinates"] = Coordinates(
|
|
117
|
+
x=int(cleaned_args[0]), y=int(cleaned_args[1])
|
|
118
|
+
)
|
|
119
|
+
return ActionType.DOUBLE_CLICK, params
|
|
120
|
+
|
|
121
|
+
elif func_name == "rightclick":
|
|
122
|
+
params = {}
|
|
123
|
+
if len(cleaned_args) >= 2:
|
|
124
|
+
params["coordinates"] = Coordinates(
|
|
125
|
+
x=int(cleaned_args[0]), y=int(cleaned_args[1])
|
|
126
|
+
)
|
|
127
|
+
return ActionType.RIGHT_CLICK, params
|
|
128
|
+
|
|
129
|
+
elif func_name in ("write", "typewrite"):
|
|
130
|
+
return ActionType.TYPE, {"text": cleaned_args[0] if cleaned_args else ""}
|
|
131
|
+
|
|
132
|
+
elif func_name == "press":
|
|
133
|
+
return ActionType.KEY, {"key": cleaned_args[0] if cleaned_args else ""}
|
|
134
|
+
|
|
135
|
+
elif func_name == "hotkey":
|
|
136
|
+
if len(cleaned_args) >= 2:
|
|
137
|
+
return ActionType.HOTKEY, {
|
|
138
|
+
"key": cleaned_args[-1],
|
|
139
|
+
"modifiers": list(cleaned_args[:-1]),
|
|
140
|
+
}
|
|
141
|
+
return ActionType.KEY, {"key": cleaned_args[0] if cleaned_args else ""}
|
|
142
|
+
|
|
143
|
+
elif func_name == "scroll":
|
|
144
|
+
amount = cleaned_args[0] if cleaned_args else 0
|
|
145
|
+
direction = "up" if amount > 0 else "down"
|
|
146
|
+
return ActionType.SCROLL, {
|
|
147
|
+
"scroll_direction": direction,
|
|
148
|
+
"scroll_amount": abs(int(amount)) * 100, # Convert to pixels
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
elif func_name == "moveto":
|
|
152
|
+
params = {}
|
|
153
|
+
if len(cleaned_args) >= 2:
|
|
154
|
+
params["coordinates"] = Coordinates(
|
|
155
|
+
x=int(cleaned_args[0]), y=int(cleaned_args[1])
|
|
156
|
+
)
|
|
157
|
+
return ActionType.HOVER, params
|
|
158
|
+
|
|
159
|
+
elif func_name == "drag" or func_name == "dragto":
|
|
160
|
+
params = {}
|
|
161
|
+
if len(cleaned_args) >= 2:
|
|
162
|
+
params["end_coordinates"] = Coordinates(
|
|
163
|
+
x=int(cleaned_args[0]), y=int(cleaned_args[1])
|
|
164
|
+
)
|
|
165
|
+
return ActionType.DRAG, params
|
|
166
|
+
|
|
167
|
+
# Fallback - treat as raw text if nothing matched
|
|
168
|
+
return ActionType.TYPE, {"text": action_str, "raw": {"original": action_str}}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def from_waa_trajectory(
|
|
172
|
+
trajectory: list[dict[str, Any]],
|
|
173
|
+
task_info: dict[str, Any],
|
|
174
|
+
episode_id: Optional[str] = None,
|
|
175
|
+
) -> Episode:
|
|
176
|
+
"""Convert WAA trajectory format to Episode.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
trajectory: List of WAA step dictionaries with keys like:
|
|
180
|
+
- screenshot_path: Path to screenshot
|
|
181
|
+
- action: Action string (pyautogui format)
|
|
182
|
+
- a11y_tree: Accessibility tree (optional)
|
|
183
|
+
- thought: Agent reasoning (optional)
|
|
184
|
+
task_info: Task metadata with keys like:
|
|
185
|
+
- id: Task ID
|
|
186
|
+
- instruction: Task instruction
|
|
187
|
+
- domain: Task domain (file_explorer, etc.)
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Episode instance
|
|
191
|
+
"""
|
|
192
|
+
steps = []
|
|
193
|
+
|
|
194
|
+
for i, step_data in enumerate(trajectory):
|
|
195
|
+
# Parse observation
|
|
196
|
+
observation = Observation(
|
|
197
|
+
screenshot_path=step_data.get("screenshot_path"),
|
|
198
|
+
a11y_tree=step_data.get("a11y_tree"),
|
|
199
|
+
window_title=step_data.get("window_title"),
|
|
200
|
+
raw=step_data.get("observation_raw"),
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Parse action
|
|
204
|
+
action_str = step_data.get("action", "")
|
|
205
|
+
action_type, action_params = _parse_waa_action(action_str)
|
|
206
|
+
|
|
207
|
+
action = Action(
|
|
208
|
+
type=action_type,
|
|
209
|
+
raw={"original": action_str},
|
|
210
|
+
**action_params,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Create step
|
|
214
|
+
step = Step(
|
|
215
|
+
step_index=i,
|
|
216
|
+
observation=observation,
|
|
217
|
+
action=action,
|
|
218
|
+
reasoning=step_data.get("thought") or step_data.get("reasoning"),
|
|
219
|
+
reward=step_data.get("reward"),
|
|
220
|
+
done=step_data.get("done"),
|
|
221
|
+
)
|
|
222
|
+
steps.append(step)
|
|
223
|
+
|
|
224
|
+
# Extract task info
|
|
225
|
+
task_id = task_info.get("id") or task_info.get("task_id")
|
|
226
|
+
instruction = task_info.get("instruction") or task_info.get("goal", "")
|
|
227
|
+
|
|
228
|
+
if episode_id is None:
|
|
229
|
+
episode_id = f"waa_{task_id}" if task_id else f"waa_episode_{id(trajectory)}"
|
|
230
|
+
|
|
231
|
+
return Episode(
|
|
232
|
+
episode_id=episode_id,
|
|
233
|
+
task_id=task_id,
|
|
234
|
+
instruction=instruction,
|
|
235
|
+
steps=steps,
|
|
236
|
+
success=task_info.get("success"),
|
|
237
|
+
source=BenchmarkSource.WAA,
|
|
238
|
+
metadata={
|
|
239
|
+
"domain": task_info.get("domain"),
|
|
240
|
+
"difficulty": task_info.get("difficulty"),
|
|
241
|
+
**{
|
|
242
|
+
k: v
|
|
243
|
+
for k, v in task_info.items()
|
|
244
|
+
if k
|
|
245
|
+
not in [
|
|
246
|
+
"id",
|
|
247
|
+
"task_id",
|
|
248
|
+
"instruction",
|
|
249
|
+
"goal",
|
|
250
|
+
"success",
|
|
251
|
+
"domain",
|
|
252
|
+
"difficulty",
|
|
253
|
+
]
|
|
254
|
+
},
|
|
255
|
+
},
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def to_waa_trajectory(episode: Episode) -> tuple[list[dict[str, Any]], dict[str, Any]]:
|
|
260
|
+
"""Convert Episode to WAA trajectory format.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
episode: Episode instance
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Tuple of (trajectory, task_info)
|
|
267
|
+
"""
|
|
268
|
+
trajectory = []
|
|
269
|
+
|
|
270
|
+
for step in episode.steps:
|
|
271
|
+
step_data = {
|
|
272
|
+
"screenshot_path": step.observation.screenshot_path,
|
|
273
|
+
"a11y_tree": step.observation.a11y_tree,
|
|
274
|
+
"window_title": step.observation.window_title,
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
# Convert action back to pyautogui format
|
|
278
|
+
action = step.action
|
|
279
|
+
if action.raw and "original" in action.raw:
|
|
280
|
+
step_data["action"] = action.raw["original"]
|
|
281
|
+
else:
|
|
282
|
+
step_data["action"] = _action_to_pyautogui(action)
|
|
283
|
+
|
|
284
|
+
if step.reasoning:
|
|
285
|
+
step_data["thought"] = step.reasoning
|
|
286
|
+
|
|
287
|
+
if step.reward is not None:
|
|
288
|
+
step_data["reward"] = step.reward
|
|
289
|
+
|
|
290
|
+
if step.done is not None:
|
|
291
|
+
step_data["done"] = step.done
|
|
292
|
+
|
|
293
|
+
trajectory.append(step_data)
|
|
294
|
+
|
|
295
|
+
task_info = {
|
|
296
|
+
"id": episode.task_id,
|
|
297
|
+
"instruction": episode.instruction,
|
|
298
|
+
"success": episode.success,
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if episode.metadata:
|
|
302
|
+
task_info.update(episode.metadata)
|
|
303
|
+
|
|
304
|
+
return trajectory, task_info
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _action_to_pyautogui(action: Action) -> str:
|
|
308
|
+
"""Convert Action to pyautogui string format."""
|
|
309
|
+
if action.type == ActionType.DONE:
|
|
310
|
+
return "DONE"
|
|
311
|
+
if action.type == ActionType.FAIL:
|
|
312
|
+
return "FAIL"
|
|
313
|
+
|
|
314
|
+
if action.type == ActionType.CLICK:
|
|
315
|
+
if action.coordinates:
|
|
316
|
+
return f"pyautogui.click({action.coordinates.x}, {action.coordinates.y})"
|
|
317
|
+
return "pyautogui.click()"
|
|
318
|
+
|
|
319
|
+
if action.type == ActionType.DOUBLE_CLICK:
|
|
320
|
+
if action.coordinates:
|
|
321
|
+
return (
|
|
322
|
+
f"pyautogui.doubleClick({action.coordinates.x}, {action.coordinates.y})"
|
|
323
|
+
)
|
|
324
|
+
return "pyautogui.doubleClick()"
|
|
325
|
+
|
|
326
|
+
if action.type == ActionType.RIGHT_CLICK:
|
|
327
|
+
if action.coordinates:
|
|
328
|
+
return (
|
|
329
|
+
f"pyautogui.rightClick({action.coordinates.x}, {action.coordinates.y})"
|
|
330
|
+
)
|
|
331
|
+
return "pyautogui.rightClick()"
|
|
332
|
+
|
|
333
|
+
if action.type == ActionType.TYPE:
|
|
334
|
+
text = action.text or ""
|
|
335
|
+
# Escape single quotes
|
|
336
|
+
text = text.replace("'", "\\'")
|
|
337
|
+
return f"pyautogui.write('{text}')"
|
|
338
|
+
|
|
339
|
+
if action.type == ActionType.KEY:
|
|
340
|
+
return f"pyautogui.press('{action.key}')"
|
|
341
|
+
|
|
342
|
+
if action.type == ActionType.HOTKEY:
|
|
343
|
+
modifiers = action.modifiers or []
|
|
344
|
+
keys = modifiers + [action.key]
|
|
345
|
+
keys_str = ", ".join(f"'{k}'" for k in keys)
|
|
346
|
+
return f"pyautogui.hotkey({keys_str})"
|
|
347
|
+
|
|
348
|
+
if action.type == ActionType.SCROLL:
|
|
349
|
+
amount = action.scroll_amount or 100
|
|
350
|
+
if action.scroll_direction in ("down", "right"):
|
|
351
|
+
amount = -amount
|
|
352
|
+
return f"pyautogui.scroll({amount // 100})"
|
|
353
|
+
|
|
354
|
+
if action.type == ActionType.HOVER:
|
|
355
|
+
if action.coordinates:
|
|
356
|
+
return f"pyautogui.moveTo({action.coordinates.x}, {action.coordinates.y})"
|
|
357
|
+
return "pyautogui.moveTo()"
|
|
358
|
+
|
|
359
|
+
if action.type == ActionType.DRAG:
|
|
360
|
+
if action.end_coordinates:
|
|
361
|
+
return f"pyautogui.dragTo({action.end_coordinates.x}, {action.end_coordinates.y})"
|
|
362
|
+
return "pyautogui.drag()"
|
|
363
|
+
|
|
364
|
+
return f"# Unknown action: {action.type}"
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
# ============================================================================
|
|
368
|
+
# Internal Format Converter (openadapt_ml.schemas.sessions)
|
|
369
|
+
# ============================================================================
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def from_internal_episode(
|
|
373
|
+
internal_episode: Any,
|
|
374
|
+
episode_id: Optional[str] = None,
|
|
375
|
+
) -> Episode:
|
|
376
|
+
"""Convert from internal training format (openadapt_ml.schemas.sessions.Episode).
|
|
377
|
+
|
|
378
|
+
This converts from the dataclass-based format used by the training pipeline
|
|
379
|
+
to the Pydantic-based Episode format used for external interoperability.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
internal_episode: An openadapt_ml.schemas.sessions.Episode instance
|
|
383
|
+
episode_id: Override episode ID (defaults to internal_episode.id)
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
Episode instance in the new format
|
|
387
|
+
"""
|
|
388
|
+
steps = []
|
|
389
|
+
for i, step in enumerate(internal_episode.steps):
|
|
390
|
+
# Convert observation
|
|
391
|
+
obs = Observation(
|
|
392
|
+
screenshot_path=step.observation.image_path,
|
|
393
|
+
a11y_tree=step.observation.accessibility_tree,
|
|
394
|
+
dom=step.observation.dom_html,
|
|
395
|
+
window_title=step.observation.window_title,
|
|
396
|
+
raw=step.observation.meta,
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Convert action - note: internal format uses normalized coords in x/y
|
|
400
|
+
action_type_map = {
|
|
401
|
+
"click": ActionType.CLICK,
|
|
402
|
+
"double_click": ActionType.DOUBLE_CLICK,
|
|
403
|
+
"right_click": ActionType.RIGHT_CLICK,
|
|
404
|
+
"drag": ActionType.DRAG,
|
|
405
|
+
"scroll": ActionType.SCROLL,
|
|
406
|
+
"type": ActionType.TYPE,
|
|
407
|
+
"key": ActionType.KEY,
|
|
408
|
+
"wait": ActionType.WAIT,
|
|
409
|
+
"done": ActionType.DONE,
|
|
410
|
+
"failed": ActionType.FAIL,
|
|
411
|
+
"answer": ActionType.DONE, # Map answer to done
|
|
412
|
+
}
|
|
413
|
+
action_type = action_type_map.get(step.action.type, ActionType.CLICK)
|
|
414
|
+
|
|
415
|
+
action = Action(
|
|
416
|
+
type=action_type,
|
|
417
|
+
# Store normalized coords from internal format
|
|
418
|
+
normalized_coordinates=(step.action.x, step.action.y)
|
|
419
|
+
if step.action.x is not None and step.action.y is not None
|
|
420
|
+
else None,
|
|
421
|
+
text=step.action.text,
|
|
422
|
+
key=step.action.key,
|
|
423
|
+
modifiers=step.action.modifiers,
|
|
424
|
+
scroll_direction=step.action.scroll_direction,
|
|
425
|
+
scroll_amount=int(step.action.scroll_amount)
|
|
426
|
+
if step.action.scroll_amount
|
|
427
|
+
else None,
|
|
428
|
+
normalized_end=(step.action.end_x, step.action.end_y)
|
|
429
|
+
if step.action.end_x is not None and step.action.end_y is not None
|
|
430
|
+
else None,
|
|
431
|
+
element=UIElement(
|
|
432
|
+
element_id=step.action.target_node_id,
|
|
433
|
+
role=step.action.target_role,
|
|
434
|
+
name=step.action.target_name,
|
|
435
|
+
)
|
|
436
|
+
if step.action.target_node_id
|
|
437
|
+
else None,
|
|
438
|
+
raw=step.action.raw,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
steps.append(
|
|
442
|
+
Step(
|
|
443
|
+
step_index=i,
|
|
444
|
+
observation=obs,
|
|
445
|
+
action=action,
|
|
446
|
+
reasoning=step.thought,
|
|
447
|
+
timestamp=step.t,
|
|
448
|
+
)
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
return Episode(
|
|
452
|
+
episode_id=episode_id or internal_episode.id,
|
|
453
|
+
instruction=internal_episode.goal,
|
|
454
|
+
steps=steps,
|
|
455
|
+
success=internal_episode.success,
|
|
456
|
+
metadata={
|
|
457
|
+
"workflow_id": internal_episode.workflow_id,
|
|
458
|
+
"summary": internal_episode.summary,
|
|
459
|
+
}
|
|
460
|
+
if internal_episode.workflow_id or internal_episode.summary
|
|
461
|
+
else None,
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def to_internal_episode(episode: Episode) -> dict:
|
|
466
|
+
"""Convert Episode to internal training format (as dict).
|
|
467
|
+
|
|
468
|
+
Returns a dict matching openadapt_ml.schemas.sessions.Episode structure.
|
|
469
|
+
The caller can construct the dataclass from this dict.
|
|
470
|
+
|
|
471
|
+
Args:
|
|
472
|
+
episode: Episode in new format
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
Dict matching internal Episode structure
|
|
476
|
+
"""
|
|
477
|
+
steps = []
|
|
478
|
+
for step in episode.steps:
|
|
479
|
+
# Get normalized coordinates
|
|
480
|
+
norm_x, norm_y = None, None
|
|
481
|
+
if step.action.normalized_coordinates:
|
|
482
|
+
norm_x, norm_y = step.action.normalized_coordinates
|
|
483
|
+
elif step.action.coordinates:
|
|
484
|
+
# Can't convert pixel to normalized without screen size
|
|
485
|
+
# Store in raw for reference
|
|
486
|
+
pass
|
|
487
|
+
|
|
488
|
+
step_dict = {
|
|
489
|
+
"t": step.timestamp or float(step.step_index),
|
|
490
|
+
"observation": {
|
|
491
|
+
"image_path": step.observation.screenshot_path,
|
|
492
|
+
"accessibility_tree": step.observation.a11y_tree,
|
|
493
|
+
"dom_html": step.observation.dom,
|
|
494
|
+
"window_title": step.observation.window_title,
|
|
495
|
+
"meta": step.observation.raw,
|
|
496
|
+
},
|
|
497
|
+
"action": {
|
|
498
|
+
"type": step.action.type.value,
|
|
499
|
+
"x": norm_x,
|
|
500
|
+
"y": norm_y,
|
|
501
|
+
"text": step.action.text,
|
|
502
|
+
"key": step.action.key,
|
|
503
|
+
"modifiers": step.action.modifiers,
|
|
504
|
+
"scroll_direction": step.action.scroll_direction,
|
|
505
|
+
"scroll_amount": step.action.scroll_amount,
|
|
506
|
+
"end_x": step.action.normalized_end[0]
|
|
507
|
+
if step.action.normalized_end
|
|
508
|
+
else None,
|
|
509
|
+
"end_y": step.action.normalized_end[1]
|
|
510
|
+
if step.action.normalized_end
|
|
511
|
+
else None,
|
|
512
|
+
"target_node_id": step.action.element.element_id
|
|
513
|
+
if step.action.element
|
|
514
|
+
else None,
|
|
515
|
+
"target_role": step.action.element.role
|
|
516
|
+
if step.action.element
|
|
517
|
+
else None,
|
|
518
|
+
"target_name": step.action.element.name
|
|
519
|
+
if step.action.element
|
|
520
|
+
else None,
|
|
521
|
+
"raw": step.action.raw,
|
|
522
|
+
},
|
|
523
|
+
"thought": step.reasoning,
|
|
524
|
+
}
|
|
525
|
+
steps.append(step_dict)
|
|
526
|
+
|
|
527
|
+
return {
|
|
528
|
+
"id": episode.episode_id,
|
|
529
|
+
"goal": episode.instruction,
|
|
530
|
+
"steps": steps,
|
|
531
|
+
"success": episode.success,
|
|
532
|
+
"workflow_id": episode.metadata.get("workflow_id")
|
|
533
|
+
if episode.metadata
|
|
534
|
+
else None,
|
|
535
|
+
"summary": episode.metadata.get("summary") if episode.metadata else None,
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def load_waa_result(result_dir: Union[str, Path]) -> Episode:
|
|
540
|
+
"""Load episode from WAA result directory.
|
|
541
|
+
|
|
542
|
+
WAA result directories contain:
|
|
543
|
+
- result.txt: Final score
|
|
544
|
+
- trajectory.json or similar: Step-by-step data
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
result_dir: Path to WAA result directory
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
Episode instance
|
|
551
|
+
"""
|
|
552
|
+
result_dir = Path(result_dir)
|
|
553
|
+
|
|
554
|
+
# Try to find trajectory file
|
|
555
|
+
trajectory_files = list(result_dir.glob("*trajectory*.json")) + list(
|
|
556
|
+
result_dir.glob("*steps*.json")
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
trajectory = []
|
|
560
|
+
task_info = {}
|
|
561
|
+
|
|
562
|
+
if trajectory_files:
|
|
563
|
+
with open(trajectory_files[0]) as f:
|
|
564
|
+
data = json.load(f)
|
|
565
|
+
if isinstance(data, list):
|
|
566
|
+
trajectory = data
|
|
567
|
+
elif isinstance(data, dict):
|
|
568
|
+
trajectory = data.get("steps", data.get("trajectory", []))
|
|
569
|
+
task_info = {
|
|
570
|
+
k: v for k, v in data.items() if k not in ["steps", "trajectory"]
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
# Try to read result
|
|
574
|
+
result_file = result_dir / "result.txt"
|
|
575
|
+
if result_file.exists():
|
|
576
|
+
with open(result_file) as f:
|
|
577
|
+
result_str = f.read().strip()
|
|
578
|
+
try:
|
|
579
|
+
task_info["success"] = float(result_str) > 0
|
|
580
|
+
except ValueError:
|
|
581
|
+
pass
|
|
582
|
+
|
|
583
|
+
# Try to get task info from parent directory name
|
|
584
|
+
task_id = result_dir.name
|
|
585
|
+
if task_id and "task_id" not in task_info:
|
|
586
|
+
task_info["task_id"] = task_id
|
|
587
|
+
|
|
588
|
+
return from_waa_trajectory(trajectory, task_info, episode_id=f"waa_{task_id}")
|