openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/benchmarks/__init__.py +8 -0
- openadapt_ml/benchmarks/agent.py +90 -11
- openadapt_ml/benchmarks/azure.py +35 -6
- openadapt_ml/benchmarks/cli.py +4449 -201
- openadapt_ml/benchmarks/live_tracker.py +180 -0
- openadapt_ml/benchmarks/runner.py +41 -4
- openadapt_ml/benchmarks/viewer.py +1219 -0
- openadapt_ml/benchmarks/vm_monitor.py +610 -0
- openadapt_ml/benchmarks/waa.py +61 -4
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
- openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
- openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
- openadapt_ml/benchmarks/waa_live.py +619 -0
- openadapt_ml/cloud/local.py +1555 -1
- openadapt_ml/cloud/ssh_tunnel.py +553 -0
- openadapt_ml/datasets/next_action.py +87 -68
- openadapt_ml/evals/grounding.py +26 -8
- openadapt_ml/evals/trajectory_matching.py +84 -36
- openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
- openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
- openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
- openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
- openadapt_ml/experiments/waa_demo/__init__.py +10 -0
- openadapt_ml/experiments/waa_demo/demos.py +357 -0
- openadapt_ml/experiments/waa_demo/runner.py +717 -0
- openadapt_ml/experiments/waa_demo/tasks.py +151 -0
- openadapt_ml/export/__init__.py +9 -0
- openadapt_ml/export/__main__.py +6 -0
- openadapt_ml/export/cli.py +89 -0
- openadapt_ml/export/parquet.py +265 -0
- openadapt_ml/ingest/__init__.py +3 -4
- openadapt_ml/ingest/capture.py +89 -81
- openadapt_ml/ingest/loader.py +116 -68
- openadapt_ml/ingest/synthetic.py +221 -159
- openadapt_ml/retrieval/README.md +226 -0
- openadapt_ml/retrieval/USAGE.md +391 -0
- openadapt_ml/retrieval/__init__.py +91 -0
- openadapt_ml/retrieval/demo_retriever.py +817 -0
- openadapt_ml/retrieval/embeddings.py +629 -0
- openadapt_ml/retrieval/index.py +194 -0
- openadapt_ml/retrieval/retriever.py +160 -0
- openadapt_ml/runtime/policy.py +10 -10
- openadapt_ml/schema/__init__.py +104 -0
- openadapt_ml/schema/converters.py +541 -0
- openadapt_ml/schema/episode.py +457 -0
- openadapt_ml/scripts/compare.py +26 -16
- openadapt_ml/scripts/eval_policy.py +4 -5
- openadapt_ml/scripts/prepare_synthetic.py +14 -17
- openadapt_ml/scripts/train.py +81 -70
- openadapt_ml/training/benchmark_viewer.py +3225 -0
- openadapt_ml/training/trainer.py +120 -363
- openadapt_ml/training/trl_trainer.py +354 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
- openadapt_ml-0.2.0.dist-info/RECORD +86 -0
- openadapt_ml/schemas/__init__.py +0 -53
- openadapt_ml/schemas/sessions.py +0 -122
- openadapt_ml/schemas/validation.py +0 -252
- openadapt_ml-0.1.0.dist-info/RECORD +0 -55
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
openadapt_ml/ingest/capture.py
CHANGED
|
@@ -10,21 +10,21 @@ import uuid
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import TYPE_CHECKING
|
|
12
12
|
|
|
13
|
-
from openadapt_ml.
|
|
13
|
+
from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
16
|
from PIL import Image
|
|
17
17
|
|
|
18
|
-
# Event type mapping from openadapt-capture to openadapt-ml
|
|
18
|
+
# Event type mapping from openadapt-capture to openadapt-ml ActionType
|
|
19
19
|
EVENT_TYPE_MAP = {
|
|
20
|
-
"mouse.singleclick":
|
|
21
|
-
"mouse.click":
|
|
22
|
-
"mouse.doubleclick":
|
|
23
|
-
"mouse.drag":
|
|
24
|
-
"mouse.scroll":
|
|
25
|
-
"key.type":
|
|
26
|
-
"key.down":
|
|
27
|
-
"key.up":
|
|
20
|
+
"mouse.singleclick": ActionType.CLICK,
|
|
21
|
+
"mouse.click": ActionType.CLICK,
|
|
22
|
+
"mouse.doubleclick": ActionType.DOUBLE_CLICK,
|
|
23
|
+
"mouse.drag": ActionType.DRAG,
|
|
24
|
+
"mouse.scroll": ActionType.SCROLL,
|
|
25
|
+
"key.type": ActionType.TYPE,
|
|
26
|
+
"key.down": ActionType.KEY,
|
|
27
|
+
"key.up": ActionType.KEY,
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
|
|
@@ -33,7 +33,7 @@ def _normalize_coords(
|
|
|
33
33
|
y: float | None,
|
|
34
34
|
screen_width: int,
|
|
35
35
|
screen_height: int,
|
|
36
|
-
) -> tuple[float
|
|
36
|
+
) -> tuple[float, float] | None:
|
|
37
37
|
"""Normalize pixel coordinates to [0, 1] range.
|
|
38
38
|
|
|
39
39
|
Args:
|
|
@@ -43,11 +43,11 @@ def _normalize_coords(
|
|
|
43
43
|
screen_height: Screen height in pixels.
|
|
44
44
|
|
|
45
45
|
Returns:
|
|
46
|
-
Tuple of (normalized_x, normalized_y).
|
|
46
|
+
Tuple of (normalized_x, normalized_y) or None if coords are None.
|
|
47
47
|
"""
|
|
48
48
|
if x is None or y is None:
|
|
49
|
-
return None
|
|
50
|
-
return x / screen_width, y / screen_height
|
|
49
|
+
return None
|
|
50
|
+
return (x / screen_width, y / screen_height)
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
def _save_screenshot(
|
|
@@ -77,7 +77,7 @@ def _save_screenshot(
|
|
|
77
77
|
def capture_to_episode(
|
|
78
78
|
capture_path: str | Path,
|
|
79
79
|
output_dir: str | Path | None = None,
|
|
80
|
-
|
|
80
|
+
instruction: str | None = None,
|
|
81
81
|
episode_id: str | None = None,
|
|
82
82
|
include_moves: bool = False,
|
|
83
83
|
) -> Episode:
|
|
@@ -87,8 +87,8 @@ def capture_to_episode(
|
|
|
87
87
|
capture_path: Path to the capture directory.
|
|
88
88
|
output_dir: Directory to save extracted screenshots. If None, uses
|
|
89
89
|
capture_path/screenshots.
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
instruction: Task description/instruction for the episode. If None, uses
|
|
91
|
+
capture's task_description or a generic message.
|
|
92
92
|
episode_id: Identifier for the episode. If None, generates a UUID.
|
|
93
93
|
include_moves: Whether to include mouse move events.
|
|
94
94
|
|
|
@@ -126,18 +126,18 @@ def capture_to_episode(
|
|
|
126
126
|
if episode_id is None:
|
|
127
127
|
episode_id = f"capture_{capture.id}"
|
|
128
128
|
|
|
129
|
-
# Get
|
|
130
|
-
if
|
|
129
|
+
# Get instruction from capture or derive from context
|
|
130
|
+
if instruction is None:
|
|
131
131
|
if capture.task_description:
|
|
132
|
-
|
|
132
|
+
instruction = capture.task_description
|
|
133
133
|
else:
|
|
134
|
-
# Try to derive
|
|
134
|
+
# Try to derive instruction from directory name (e.g., "turn-off-nightshift" -> "Turn off nightshift")
|
|
135
135
|
dir_name = capture_path.name
|
|
136
136
|
if dir_name and dir_name != "capture":
|
|
137
137
|
# Convert kebab-case/snake_case to readable text
|
|
138
|
-
|
|
138
|
+
instruction = dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
|
|
139
139
|
else:
|
|
140
|
-
|
|
140
|
+
instruction = "Complete the recorded workflow"
|
|
141
141
|
|
|
142
142
|
# Get screen dimensions for coordinate normalization
|
|
143
143
|
screen_width, screen_height = capture.screen_size
|
|
@@ -152,22 +152,21 @@ def capture_to_episode(
|
|
|
152
152
|
continue
|
|
153
153
|
|
|
154
154
|
# Save screenshot
|
|
155
|
-
|
|
155
|
+
screenshot_path = _save_screenshot(screenshot, output_dir, episode_id, idx)
|
|
156
156
|
|
|
157
157
|
# Normalize coordinates
|
|
158
|
-
|
|
158
|
+
norm_coords = _normalize_coords(
|
|
159
159
|
action.x, action.y, screen_width, screen_height
|
|
160
160
|
)
|
|
161
161
|
|
|
162
|
-
# Map event type to openadapt-ml
|
|
162
|
+
# Map event type to openadapt-ml ActionType
|
|
163
163
|
event_type = action.type
|
|
164
|
-
action_type = EVENT_TYPE_MAP.get(event_type,
|
|
164
|
+
action_type = EVENT_TYPE_MAP.get(event_type, ActionType.CLICK)
|
|
165
165
|
|
|
166
166
|
# Build Action object
|
|
167
167
|
ml_action = Action(
|
|
168
168
|
type=action_type,
|
|
169
|
-
|
|
170
|
-
y=norm_y,
|
|
169
|
+
normalized_coordinates=norm_coords,
|
|
171
170
|
text=action.text,
|
|
172
171
|
)
|
|
173
172
|
|
|
@@ -175,34 +174,50 @@ def capture_to_episode(
|
|
|
175
174
|
if isinstance(action.event, MouseDragEvent):
|
|
176
175
|
end_x = action.event.x + action.event.dx
|
|
177
176
|
end_y = action.event.y + action.event.dy
|
|
178
|
-
|
|
177
|
+
norm_end = _normalize_coords(
|
|
179
178
|
end_x, end_y, screen_width, screen_height
|
|
180
179
|
)
|
|
181
|
-
ml_action
|
|
182
|
-
"
|
|
183
|
-
"
|
|
184
|
-
|
|
185
|
-
|
|
180
|
+
ml_action = ml_action.model_copy(update={
|
|
181
|
+
"normalized_end": norm_end,
|
|
182
|
+
"raw": {
|
|
183
|
+
"button": action.event.button,
|
|
184
|
+
},
|
|
185
|
+
})
|
|
186
186
|
|
|
187
187
|
# Handle scroll events
|
|
188
188
|
if isinstance(action.event, MouseScrollEvent):
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
189
|
+
# Determine scroll direction from dx/dy
|
|
190
|
+
scroll_direction = None
|
|
191
|
+
if action.event.dy > 0:
|
|
192
|
+
scroll_direction = "down"
|
|
193
|
+
elif action.event.dy < 0:
|
|
194
|
+
scroll_direction = "up"
|
|
195
|
+
elif action.event.dx > 0:
|
|
196
|
+
scroll_direction = "right"
|
|
197
|
+
elif action.event.dx < 0:
|
|
198
|
+
scroll_direction = "left"
|
|
199
|
+
|
|
200
|
+
ml_action = ml_action.model_copy(update={
|
|
201
|
+
"scroll_direction": scroll_direction,
|
|
202
|
+
"raw": {
|
|
203
|
+
"dx": action.event.dx,
|
|
204
|
+
"dy": action.event.dy,
|
|
205
|
+
},
|
|
206
|
+
})
|
|
193
207
|
|
|
194
208
|
# Handle keyboard events - include key names for special keys
|
|
195
209
|
if action.keys:
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
ml_action.raw
|
|
210
|
+
raw = ml_action.raw or {}
|
|
211
|
+
raw["keys"] = action.keys
|
|
212
|
+
ml_action = ml_action.model_copy(update={"raw": raw})
|
|
199
213
|
|
|
200
214
|
# Create Step
|
|
201
215
|
step = Step(
|
|
202
|
-
|
|
203
|
-
observation=Observation(
|
|
216
|
+
step_index=idx,
|
|
217
|
+
observation=Observation(screenshot_path=screenshot_path),
|
|
204
218
|
action=ml_action,
|
|
205
|
-
|
|
219
|
+
reasoning=None, # Real recordings don't have reasoning
|
|
220
|
+
timestamp=action.timestamp - start_time,
|
|
206
221
|
)
|
|
207
222
|
steps.append(step)
|
|
208
223
|
|
|
@@ -211,69 +226,62 @@ def capture_to_episode(
|
|
|
211
226
|
# Use the last screenshot for the done action
|
|
212
227
|
last_step = steps[-1]
|
|
213
228
|
done_step = Step(
|
|
214
|
-
|
|
215
|
-
observation=Observation(
|
|
216
|
-
action=Action(type=
|
|
217
|
-
|
|
229
|
+
step_index=len(steps),
|
|
230
|
+
observation=Observation(screenshot_path=last_step.observation.screenshot_path),
|
|
231
|
+
action=Action(type=ActionType.DONE),
|
|
232
|
+
reasoning="Workflow complete.",
|
|
233
|
+
timestamp=(last_step.timestamp or 0) + 0.1,
|
|
218
234
|
)
|
|
219
235
|
steps.append(done_step)
|
|
220
236
|
|
|
221
237
|
capture.close()
|
|
222
238
|
|
|
223
239
|
return Episode(
|
|
224
|
-
|
|
225
|
-
|
|
240
|
+
episode_id=episode_id,
|
|
241
|
+
instruction=instruction,
|
|
226
242
|
steps=steps,
|
|
227
|
-
summary=f"Real recording with {len(steps)} steps",
|
|
228
243
|
success=True,
|
|
229
|
-
|
|
244
|
+
metadata={
|
|
245
|
+
"summary": f"Real recording with {len(steps)} steps",
|
|
246
|
+
"workflow_id": capture.id,
|
|
247
|
+
},
|
|
230
248
|
)
|
|
231
249
|
|
|
232
250
|
|
|
233
|
-
def
|
|
251
|
+
def capture_to_episodes(
|
|
234
252
|
capture_path: str | Path,
|
|
235
253
|
output_dir: str | Path | None = None,
|
|
236
|
-
|
|
237
|
-
session_id: str | None = None,
|
|
254
|
+
instruction: str | None = None,
|
|
238
255
|
include_moves: bool = False,
|
|
239
|
-
) ->
|
|
240
|
-
"""Convert an openadapt-capture recording to a
|
|
256
|
+
) -> list[Episode]:
|
|
257
|
+
"""Convert an openadapt-capture recording to a list with one Episode.
|
|
258
|
+
|
|
259
|
+
This is a convenience function that returns episodes as a list for consistency
|
|
260
|
+
with the new schema (which uses list[Episode] instead of Session).
|
|
241
261
|
|
|
242
262
|
Args:
|
|
243
263
|
capture_path: Path to the capture directory.
|
|
244
264
|
output_dir: Directory to save extracted screenshots.
|
|
245
|
-
|
|
246
|
-
session_id: Identifier for the session. If None, generates a UUID.
|
|
265
|
+
instruction: Task description/instruction for the episode.
|
|
247
266
|
include_moves: Whether to include mouse move events.
|
|
248
267
|
|
|
249
268
|
Returns:
|
|
250
|
-
|
|
269
|
+
List containing a single Episode.
|
|
251
270
|
"""
|
|
252
271
|
episode = capture_to_episode(
|
|
253
272
|
capture_path=capture_path,
|
|
254
273
|
output_dir=output_dir,
|
|
255
|
-
|
|
274
|
+
instruction=instruction,
|
|
256
275
|
include_moves=include_moves,
|
|
257
276
|
)
|
|
258
|
-
|
|
259
|
-
if session_id is None:
|
|
260
|
-
session_id = f"session_{uuid.uuid4().hex[:8]}"
|
|
261
|
-
|
|
262
|
-
return Session(
|
|
263
|
-
id=session_id,
|
|
264
|
-
episodes=[episode],
|
|
265
|
-
meta={
|
|
266
|
-
"source": "openadapt-capture",
|
|
267
|
-
"capture_path": str(capture_path),
|
|
268
|
-
},
|
|
269
|
-
)
|
|
277
|
+
return [episode]
|
|
270
278
|
|
|
271
279
|
|
|
272
|
-
def
|
|
280
|
+
def load_captures_as_episodes(
|
|
273
281
|
captures_dir: str | Path,
|
|
274
282
|
output_dir: str | Path | None = None,
|
|
275
283
|
include_moves: bool = False,
|
|
276
|
-
) -> list[
|
|
284
|
+
) -> list[Episode]:
|
|
277
285
|
"""Load multiple captures from a directory.
|
|
278
286
|
|
|
279
287
|
Scans for subdirectories containing capture.db files.
|
|
@@ -284,10 +292,10 @@ def load_captures_as_sessions(
|
|
|
284
292
|
include_moves: Whether to include mouse move events.
|
|
285
293
|
|
|
286
294
|
Returns:
|
|
287
|
-
List of
|
|
295
|
+
List of Episodes, one per capture.
|
|
288
296
|
"""
|
|
289
297
|
captures_dir = Path(captures_dir)
|
|
290
|
-
|
|
298
|
+
episodes = []
|
|
291
299
|
|
|
292
300
|
# Find all capture.db files
|
|
293
301
|
for db_path in captures_dir.glob("**/capture.db"):
|
|
@@ -300,13 +308,13 @@ def load_captures_as_sessions(
|
|
|
300
308
|
capture_output = None
|
|
301
309
|
|
|
302
310
|
try:
|
|
303
|
-
|
|
311
|
+
episode = capture_to_episode(
|
|
304
312
|
capture_path=capture_path,
|
|
305
313
|
output_dir=capture_output,
|
|
306
314
|
include_moves=include_moves,
|
|
307
315
|
)
|
|
308
|
-
|
|
316
|
+
episodes.append(episode)
|
|
309
317
|
except Exception as e:
|
|
310
318
|
print(f"Warning: Failed to load {capture_path}: {e}")
|
|
311
319
|
|
|
312
|
-
return
|
|
320
|
+
return episodes
|
openadapt_ml/ingest/loader.py
CHANGED
|
@@ -10,8 +10,9 @@ import json
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import Any, Dict, List, Optional, Union
|
|
12
12
|
|
|
13
|
-
from
|
|
14
|
-
|
|
13
|
+
from pydantic import ValidationError
|
|
14
|
+
|
|
15
|
+
from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
def load_episodes(
|
|
@@ -52,7 +53,7 @@ def load_episodes(
|
|
|
52
53
|
|
|
53
54
|
if path.is_file():
|
|
54
55
|
# Single JSON file
|
|
55
|
-
episodes = _load_episodes_from_file(path)
|
|
56
|
+
episodes = _load_episodes_from_file(path, validate=validate)
|
|
56
57
|
elif path.is_dir():
|
|
57
58
|
# Directory of JSON files
|
|
58
59
|
json_files = sorted(path.glob("*.json"))
|
|
@@ -60,15 +61,15 @@ def load_episodes(
|
|
|
60
61
|
raise ValueError(f"No JSON files found in {path}")
|
|
61
62
|
|
|
62
63
|
for json_file in json_files:
|
|
63
|
-
file_episodes = _load_episodes_from_file(json_file)
|
|
64
|
+
file_episodes = _load_episodes_from_file(json_file, validate=validate)
|
|
64
65
|
episodes.extend(file_episodes)
|
|
65
66
|
else:
|
|
66
67
|
raise ValueError(f"Path must be a file or directory: {path}")
|
|
67
68
|
|
|
68
|
-
if
|
|
69
|
-
warnings =
|
|
69
|
+
if check_images:
|
|
70
|
+
warnings = _check_episode_images(episodes)
|
|
70
71
|
if warnings:
|
|
71
|
-
print(f"
|
|
72
|
+
print(f"Image warnings ({len(warnings)}):")
|
|
72
73
|
for w in warnings[:10]: # Show first 10
|
|
73
74
|
print(f" - {w}")
|
|
74
75
|
if len(warnings) > 10:
|
|
@@ -77,7 +78,21 @@ def load_episodes(
|
|
|
77
78
|
return episodes
|
|
78
79
|
|
|
79
80
|
|
|
80
|
-
def
|
|
81
|
+
def _check_episode_images(episodes: List[Episode]) -> List[str]:
|
|
82
|
+
"""Check that all referenced images exist on disk."""
|
|
83
|
+
warnings = []
|
|
84
|
+
for ep in episodes:
|
|
85
|
+
for step in ep.steps:
|
|
86
|
+
if step.observation.screenshot_path:
|
|
87
|
+
if not Path(step.observation.screenshot_path).exists():
|
|
88
|
+
warnings.append(
|
|
89
|
+
f"Episode {ep.episode_id}, step {step.step_index}: "
|
|
90
|
+
f"Image not found: {step.observation.screenshot_path}"
|
|
91
|
+
)
|
|
92
|
+
return warnings
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _load_episodes_from_file(path: Path, validate: bool = True) -> List[Episode]:
|
|
81
96
|
"""Load episodes from a single JSON file."""
|
|
82
97
|
with open(path, "r") as f:
|
|
83
98
|
data = json.load(f)
|
|
@@ -85,75 +100,119 @@ def _load_episodes_from_file(path: Path) -> List[Episode]:
|
|
|
85
100
|
# Handle different JSON structures
|
|
86
101
|
if isinstance(data, list):
|
|
87
102
|
# List of episodes
|
|
88
|
-
return [_dict_to_episode(ep) for ep in data]
|
|
103
|
+
return [_dict_to_episode(ep, validate=validate) for ep in data]
|
|
89
104
|
elif isinstance(data, dict):
|
|
90
105
|
# Single episode or wrapped format
|
|
91
106
|
if "episodes" in data:
|
|
92
|
-
return [_dict_to_episode(ep) for ep in data["episodes"]]
|
|
93
|
-
elif "
|
|
94
|
-
# Single episode
|
|
95
|
-
return [_dict_to_episode(data)]
|
|
107
|
+
return [_dict_to_episode(ep, validate=validate) for ep in data["episodes"]]
|
|
108
|
+
elif "episode_id" in data or "id" in data:
|
|
109
|
+
# Single episode (support both old and new field names)
|
|
110
|
+
return [_dict_to_episode(data, validate=validate)]
|
|
96
111
|
else:
|
|
97
112
|
raise ValueError(f"Unrecognized JSON format in {path}")
|
|
98
113
|
else:
|
|
99
114
|
raise ValueError(f"Expected list or dict in {path}, got {type(data)}")
|
|
100
115
|
|
|
101
116
|
|
|
102
|
-
def
|
|
117
|
+
def _parse_action_type(type_str: str) -> ActionType:
|
|
118
|
+
"""Parse action type string to ActionType enum."""
|
|
119
|
+
# Handle common mappings from old format
|
|
120
|
+
type_map = {
|
|
121
|
+
"unknown": ActionType.CLICK,
|
|
122
|
+
"double_click": ActionType.DOUBLE_CLICK,
|
|
123
|
+
"right_click": ActionType.RIGHT_CLICK,
|
|
124
|
+
"key_press": ActionType.KEY,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
type_lower = type_str.lower()
|
|
128
|
+
if type_lower in type_map:
|
|
129
|
+
return type_map[type_lower]
|
|
130
|
+
|
|
131
|
+
# Try direct enum lookup
|
|
132
|
+
try:
|
|
133
|
+
return ActionType(type_lower)
|
|
134
|
+
except ValueError:
|
|
135
|
+
# Default to CLICK for unknown types
|
|
136
|
+
return ActionType.CLICK
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _dict_to_episode(data: Dict[str, Any], validate: bool = True) -> Episode:
|
|
103
140
|
"""Convert a dictionary to an Episode object."""
|
|
104
141
|
steps = []
|
|
105
|
-
for step_data in data.get("steps", []):
|
|
142
|
+
for step_idx, step_data in enumerate(data.get("steps", [])):
|
|
106
143
|
# Parse observation
|
|
107
144
|
obs_data = step_data.get("observation", {})
|
|
108
145
|
observation = Observation(
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
url=obs_data.get("url"),
|
|
146
|
+
screenshot_path=obs_data.get("screenshot_path") or obs_data.get("image_path"),
|
|
147
|
+
raw=obs_data.get("raw") or obs_data.get("meta"),
|
|
148
|
+
a11y_tree=obs_data.get("a11y_tree") or obs_data.get("accessibility_tree"),
|
|
149
|
+
dom=obs_data.get("dom") or obs_data.get("dom_html"),
|
|
114
150
|
window_title=obs_data.get("window_title"),
|
|
115
|
-
app_name=obs_data.get("app_name"),
|
|
116
151
|
focused_element=obs_data.get("focused_element"),
|
|
117
152
|
)
|
|
118
153
|
|
|
119
154
|
# Parse action
|
|
120
155
|
action_data = step_data.get("action", {})
|
|
156
|
+
|
|
157
|
+
# Handle action type (string -> enum)
|
|
158
|
+
action_type_raw = action_data.get("type", "click")
|
|
159
|
+
action_type = _parse_action_type(action_type_raw)
|
|
160
|
+
|
|
161
|
+
# Handle coordinates: convert x,y to normalized_coordinates tuple
|
|
162
|
+
normalized_coords = None
|
|
163
|
+
if action_data.get("normalized_coordinates"):
|
|
164
|
+
normalized_coords = tuple(action_data["normalized_coordinates"])
|
|
165
|
+
elif action_data.get("x") is not None and action_data.get("y") is not None:
|
|
166
|
+
normalized_coords = (action_data["x"], action_data["y"])
|
|
167
|
+
|
|
168
|
+
# Handle end coordinates for drag actions
|
|
169
|
+
normalized_end = None
|
|
170
|
+
if action_data.get("normalized_end"):
|
|
171
|
+
normalized_end = tuple(action_data["normalized_end"])
|
|
172
|
+
elif action_data.get("end_x") is not None and action_data.get("end_y") is not None:
|
|
173
|
+
normalized_end = (action_data["end_x"], action_data["end_y"])
|
|
174
|
+
|
|
121
175
|
action = Action(
|
|
122
|
-
type=
|
|
123
|
-
|
|
124
|
-
|
|
176
|
+
type=action_type,
|
|
177
|
+
normalized_coordinates=normalized_coords,
|
|
178
|
+
normalized_end=normalized_end,
|
|
125
179
|
text=action_data.get("text"),
|
|
126
180
|
raw=action_data.get("raw"),
|
|
127
|
-
bbox=tuple(action_data["bbox"]) if action_data.get("bbox") else None,
|
|
128
|
-
element_index=action_data.get("element_index"),
|
|
129
|
-
target_node_id=action_data.get("target_node_id"),
|
|
130
|
-
target_role=action_data.get("target_role"),
|
|
131
|
-
target_name=action_data.get("target_name"),
|
|
132
181
|
key=action_data.get("key"),
|
|
133
182
|
modifiers=action_data.get("modifiers"),
|
|
134
183
|
scroll_direction=action_data.get("scroll_direction"),
|
|
135
184
|
scroll_amount=action_data.get("scroll_amount"),
|
|
136
|
-
end_x=action_data.get("end_x"),
|
|
137
|
-
end_y=action_data.get("end_y"),
|
|
138
|
-
answer=action_data.get("answer"),
|
|
139
185
|
)
|
|
140
186
|
|
|
187
|
+
# Handle step index and timestamp
|
|
188
|
+
step_index = step_data.get("step_index", step_idx)
|
|
189
|
+
timestamp = step_data.get("timestamp") or step_data.get("t")
|
|
190
|
+
|
|
141
191
|
step = Step(
|
|
142
|
-
|
|
192
|
+
step_index=step_index,
|
|
143
193
|
observation=observation,
|
|
144
194
|
action=action,
|
|
145
|
-
|
|
195
|
+
reasoning=step_data.get("reasoning") or step_data.get("thought"),
|
|
196
|
+
timestamp=timestamp,
|
|
146
197
|
)
|
|
147
198
|
steps.append(step)
|
|
148
199
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
success
|
|
155
|
-
|
|
156
|
-
|
|
200
|
+
# Build episode with field mapping (old -> new)
|
|
201
|
+
episode_data = {
|
|
202
|
+
"episode_id": data.get("episode_id") or data.get("id", "unknown"),
|
|
203
|
+
"instruction": data.get("instruction") or data.get("goal", ""),
|
|
204
|
+
"steps": steps,
|
|
205
|
+
"success": data.get("success"),
|
|
206
|
+
"metadata": {
|
|
207
|
+
"summary": data.get("summary"),
|
|
208
|
+
"workflow_id": data.get("workflow_id"),
|
|
209
|
+
},
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
if validate:
|
|
213
|
+
return Episode.model_validate(episode_data)
|
|
214
|
+
else:
|
|
215
|
+
return Episode(**episode_data)
|
|
157
216
|
|
|
158
217
|
|
|
159
218
|
def save_episodes(
|
|
@@ -178,9 +237,9 @@ def save_episodes(
|
|
|
178
237
|
|
|
179
238
|
with open(path, "w") as f:
|
|
180
239
|
if pretty:
|
|
181
|
-
json.dump(data, f, indent=2)
|
|
240
|
+
json.dump(data, f, indent=2, default=str)
|
|
182
241
|
else:
|
|
183
|
-
json.dump(data, f)
|
|
242
|
+
json.dump(data, f, default=str)
|
|
184
243
|
|
|
185
244
|
|
|
186
245
|
def _episode_to_dict(episode: Episode) -> Dict[str, Any]:
|
|
@@ -188,45 +247,34 @@ def _episode_to_dict(episode: Episode) -> Dict[str, Any]:
|
|
|
188
247
|
steps = []
|
|
189
248
|
for step in episode.steps:
|
|
190
249
|
step_dict = {
|
|
191
|
-
"
|
|
250
|
+
"step_index": step.step_index,
|
|
251
|
+
"timestamp": step.timestamp,
|
|
192
252
|
"observation": {
|
|
193
|
-
"
|
|
194
|
-
"
|
|
195
|
-
"
|
|
196
|
-
"
|
|
197
|
-
"url": step.observation.url,
|
|
253
|
+
"screenshot_path": step.observation.screenshot_path,
|
|
254
|
+
"raw": step.observation.raw,
|
|
255
|
+
"a11y_tree": step.observation.a11y_tree,
|
|
256
|
+
"dom": step.observation.dom,
|
|
198
257
|
"window_title": step.observation.window_title,
|
|
199
|
-
"app_name": step.observation.app_name,
|
|
200
|
-
"focused_element": step.observation.focused_element,
|
|
201
258
|
},
|
|
202
259
|
"action": {
|
|
203
|
-
"type": step.action.type,
|
|
204
|
-
"
|
|
205
|
-
"
|
|
260
|
+
"type": step.action.type.value,
|
|
261
|
+
"normalized_coordinates": step.action.normalized_coordinates,
|
|
262
|
+
"normalized_end": step.action.normalized_end,
|
|
206
263
|
"text": step.action.text,
|
|
207
264
|
"raw": step.action.raw,
|
|
208
|
-
"bbox": list(step.action.bbox) if step.action.bbox else None,
|
|
209
|
-
"element_index": step.action.element_index,
|
|
210
|
-
"target_node_id": step.action.target_node_id,
|
|
211
|
-
"target_role": step.action.target_role,
|
|
212
|
-
"target_name": step.action.target_name,
|
|
213
265
|
"key": step.action.key,
|
|
214
266
|
"modifiers": step.action.modifiers,
|
|
215
267
|
"scroll_direction": step.action.scroll_direction,
|
|
216
268
|
"scroll_amount": step.action.scroll_amount,
|
|
217
|
-
"end_x": step.action.end_x,
|
|
218
|
-
"end_y": step.action.end_y,
|
|
219
|
-
"answer": step.action.answer,
|
|
220
269
|
},
|
|
221
|
-
"
|
|
270
|
+
"reasoning": step.reasoning,
|
|
222
271
|
}
|
|
223
272
|
steps.append(step_dict)
|
|
224
273
|
|
|
225
274
|
return {
|
|
226
|
-
"
|
|
227
|
-
"
|
|
275
|
+
"episode_id": episode.episode_id,
|
|
276
|
+
"instruction": episode.instruction,
|
|
228
277
|
"steps": steps,
|
|
229
|
-
"summary": episode.summary,
|
|
230
278
|
"success": episode.success,
|
|
231
|
-
"
|
|
279
|
+
"metadata": episode.metadata,
|
|
232
280
|
}
|