openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/benchmarks/__init__.py +8 -0
- openadapt_ml/benchmarks/agent.py +90 -11
- openadapt_ml/benchmarks/azure.py +35 -6
- openadapt_ml/benchmarks/cli.py +4449 -201
- openadapt_ml/benchmarks/live_tracker.py +180 -0
- openadapt_ml/benchmarks/runner.py +41 -4
- openadapt_ml/benchmarks/viewer.py +1219 -0
- openadapt_ml/benchmarks/vm_monitor.py +610 -0
- openadapt_ml/benchmarks/waa.py +61 -4
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
- openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
- openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
- openadapt_ml/benchmarks/waa_live.py +619 -0
- openadapt_ml/cloud/local.py +1555 -1
- openadapt_ml/cloud/ssh_tunnel.py +553 -0
- openadapt_ml/datasets/next_action.py +87 -68
- openadapt_ml/evals/grounding.py +26 -8
- openadapt_ml/evals/trajectory_matching.py +84 -36
- openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
- openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
- openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
- openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
- openadapt_ml/experiments/waa_demo/__init__.py +10 -0
- openadapt_ml/experiments/waa_demo/demos.py +357 -0
- openadapt_ml/experiments/waa_demo/runner.py +717 -0
- openadapt_ml/experiments/waa_demo/tasks.py +151 -0
- openadapt_ml/export/__init__.py +9 -0
- openadapt_ml/export/__main__.py +6 -0
- openadapt_ml/export/cli.py +89 -0
- openadapt_ml/export/parquet.py +265 -0
- openadapt_ml/ingest/__init__.py +3 -4
- openadapt_ml/ingest/capture.py +89 -81
- openadapt_ml/ingest/loader.py +116 -68
- openadapt_ml/ingest/synthetic.py +221 -159
- openadapt_ml/retrieval/README.md +226 -0
- openadapt_ml/retrieval/USAGE.md +391 -0
- openadapt_ml/retrieval/__init__.py +91 -0
- openadapt_ml/retrieval/demo_retriever.py +817 -0
- openadapt_ml/retrieval/embeddings.py +629 -0
- openadapt_ml/retrieval/index.py +194 -0
- openadapt_ml/retrieval/retriever.py +160 -0
- openadapt_ml/runtime/policy.py +10 -10
- openadapt_ml/schema/__init__.py +104 -0
- openadapt_ml/schema/converters.py +541 -0
- openadapt_ml/schema/episode.py +457 -0
- openadapt_ml/scripts/compare.py +26 -16
- openadapt_ml/scripts/eval_policy.py +4 -5
- openadapt_ml/scripts/prepare_synthetic.py +14 -17
- openadapt_ml/scripts/train.py +81 -70
- openadapt_ml/training/benchmark_viewer.py +3225 -0
- openadapt_ml/training/trainer.py +120 -363
- openadapt_ml/training/trl_trainer.py +354 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
- openadapt_ml-0.2.0.dist-info/RECORD +86 -0
- openadapt_ml/schemas/__init__.py +0 -53
- openadapt_ml/schemas/sessions.py +0 -122
- openadapt_ml/schemas/validation.py +0 -252
- openadapt_ml-0.1.0.dist-info/RECORD +0 -55
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Episode Schema for GUI Trajectory Data
|
|
3
|
+
|
|
4
|
+
Canonical contract for episode/demonstration data in GUI automation. Designed for
|
|
5
|
+
interoperability across training pipelines, benchmarks, and human demonstrations.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Pydantic models with runtime validation
|
|
9
|
+
- JSON Schema export for language-agnostic tooling
|
|
10
|
+
- Supports pixel coordinates AND normalized (0-1) coordinates
|
|
11
|
+
- Extensible via `raw` and `metadata` fields
|
|
12
|
+
- Converters for common formats (WAA, WebArena, etc.)
|
|
13
|
+
|
|
14
|
+
Quick Start:
|
|
15
|
+
from openadapt_ml.schema import Episode, Step, Action, Observation, ActionType
|
|
16
|
+
|
|
17
|
+
episode = Episode(
|
|
18
|
+
episode_id="demo_001",
|
|
19
|
+
instruction="Open the Settings app and enable Dark Mode",
|
|
20
|
+
steps=[
|
|
21
|
+
Step(
|
|
22
|
+
step_index=0,
|
|
23
|
+
observation=Observation(screenshot_path="step_0.png"),
|
|
24
|
+
action=Action(
|
|
25
|
+
type=ActionType.CLICK,
|
|
26
|
+
coordinates={"x": 512, "y": 384},
|
|
27
|
+
# Or use normalized coords for resolution independence:
|
|
28
|
+
# normalized_coordinates=(0.5, 0.375),
|
|
29
|
+
),
|
|
30
|
+
reasoning="Click on Settings icon",
|
|
31
|
+
),
|
|
32
|
+
],
|
|
33
|
+
success=True,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Validate any dict against the schema
|
|
37
|
+
from openadapt_ml.schema import validate_episode
|
|
38
|
+
is_valid, error = validate_episode(data)
|
|
39
|
+
|
|
40
|
+
# Export JSON Schema for external tools
|
|
41
|
+
from openadapt_ml.schema import export_json_schema
|
|
42
|
+
export_json_schema("episode.schema.json")
|
|
43
|
+
|
|
44
|
+
Schema Version: 1.0.0
|
|
45
|
+
- Core models: Episode, Step, Action, Observation
|
|
46
|
+
- 24 action types covering mouse, keyboard, navigation, and system actions
|
|
47
|
+
- Support for both pixel and normalized coordinates
|
|
48
|
+
- Extension points: raw, metadata fields
|
|
49
|
+
|
|
50
|
+
Evolution Policy (SemVer):
|
|
51
|
+
- PATCH (1.0.x): Documentation, bug fixes (no schema changes)
|
|
52
|
+
- MINOR (1.x.0): New optional fields with defaults (backward compatible)
|
|
53
|
+
- MAJOR (x.0.0): Breaking changes (field removal, type changes, new required fields)
|
|
54
|
+
|
|
55
|
+
Migration Guide:
|
|
56
|
+
- MINOR bumps: No action needed, old data validates
|
|
57
|
+
- MAJOR bumps: Use converters or migration scripts (provided in release notes)
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
from __future__ import annotations
|
|
61
|
+
|
|
62
|
+
import json
|
|
63
|
+
from datetime import datetime
|
|
64
|
+
from enum import Enum
|
|
65
|
+
from pathlib import Path
|
|
66
|
+
from typing import Any, Literal, Optional, Union
|
|
67
|
+
|
|
68
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# Schema version - follows semver
|
|
72
|
+
SCHEMA_VERSION = "1.0.0"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ActionType(str, Enum):
|
|
76
|
+
"""Supported action types for GUI automation."""
|
|
77
|
+
|
|
78
|
+
# Mouse actions
|
|
79
|
+
CLICK = "click"
|
|
80
|
+
DOUBLE_CLICK = "double_click"
|
|
81
|
+
RIGHT_CLICK = "right_click"
|
|
82
|
+
DRAG = "drag"
|
|
83
|
+
SCROLL = "scroll"
|
|
84
|
+
HOVER = "hover"
|
|
85
|
+
|
|
86
|
+
# Keyboard actions
|
|
87
|
+
TYPE = "type"
|
|
88
|
+
KEY = "key"
|
|
89
|
+
HOTKEY = "hotkey"
|
|
90
|
+
|
|
91
|
+
# Combined/special actions
|
|
92
|
+
CLICK_AND_TYPE = "click_and_type"
|
|
93
|
+
WAIT = "wait"
|
|
94
|
+
SCREENSHOT = "screenshot"
|
|
95
|
+
|
|
96
|
+
# Navigation (for web)
|
|
97
|
+
GOTO = "goto"
|
|
98
|
+
BACK = "back"
|
|
99
|
+
FORWARD = "forward"
|
|
100
|
+
REFRESH = "refresh"
|
|
101
|
+
|
|
102
|
+
# System actions
|
|
103
|
+
OPEN_APP = "open_app"
|
|
104
|
+
CLOSE_APP = "close_app"
|
|
105
|
+
SELECT_MONITOR = "select_monitor" # Multi-monitor: focus a specific display
|
|
106
|
+
WINDOW_FOCUS = "window_focus" # Focus a specific window
|
|
107
|
+
WINDOW_RESIZE = "window_resize" # Resize window
|
|
108
|
+
WINDOW_MOVE = "window_move" # Move window
|
|
109
|
+
|
|
110
|
+
# Meta actions
|
|
111
|
+
DONE = "done"
|
|
112
|
+
FAIL = "fail"
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class BenchmarkSource(str, Enum):
|
|
116
|
+
"""Source benchmark/dataset for the episode."""
|
|
117
|
+
|
|
118
|
+
WAA = "waa" # Windows Agent Arena
|
|
119
|
+
WEBARENA = "webarena"
|
|
120
|
+
OSWORLD = "osworld"
|
|
121
|
+
MINIWOB = "miniwob"
|
|
122
|
+
HUMAN = "human" # Human demonstration
|
|
123
|
+
SYNTHETIC = "synthetic" # Generated/augmented
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class Coordinates(BaseModel):
|
|
127
|
+
"""Screen coordinates for mouse actions."""
|
|
128
|
+
|
|
129
|
+
x: int = Field(..., description="X coordinate (pixels from left)")
|
|
130
|
+
y: int = Field(..., description="Y coordinate (pixels from top)")
|
|
131
|
+
|
|
132
|
+
@field_validator("x", "y")
|
|
133
|
+
@classmethod
|
|
134
|
+
def validate_non_negative(cls, v: int) -> int:
|
|
135
|
+
if v < 0:
|
|
136
|
+
raise ValueError("Coordinates must be non-negative")
|
|
137
|
+
return v
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class BoundingBox(BaseModel):
|
|
141
|
+
"""Bounding box for UI elements."""
|
|
142
|
+
|
|
143
|
+
x: int = Field(..., description="Left edge X coordinate")
|
|
144
|
+
y: int = Field(..., description="Top edge Y coordinate")
|
|
145
|
+
width: int = Field(..., ge=0, description="Width in pixels")
|
|
146
|
+
height: int = Field(..., ge=0, description="Height in pixels")
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def center(self) -> Coordinates:
|
|
150
|
+
"""Get center point of bounding box."""
|
|
151
|
+
return Coordinates(x=self.x + self.width // 2, y=self.y + self.height // 2)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class UIElement(BaseModel):
|
|
155
|
+
"""UI element information from accessibility tree or DOM."""
|
|
156
|
+
|
|
157
|
+
role: Optional[str] = Field(None, description="Element role (button, textbox, etc.)")
|
|
158
|
+
name: Optional[str] = Field(None, description="Element accessible name")
|
|
159
|
+
value: Optional[str] = Field(None, description="Element value (for inputs)")
|
|
160
|
+
bounds: Optional[BoundingBox] = Field(None, description="Element bounding box")
|
|
161
|
+
element_id: Optional[str] = Field(None, description="Unique element identifier")
|
|
162
|
+
xpath: Optional[str] = Field(None, description="XPath selector (web)")
|
|
163
|
+
selector: Optional[str] = Field(None, description="CSS selector (web)")
|
|
164
|
+
automation_id: Optional[str] = Field(None, description="Automation ID (Windows)")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class Action(BaseModel):
|
|
168
|
+
"""An action taken by the agent."""
|
|
169
|
+
|
|
170
|
+
type: ActionType = Field(..., description="Type of action")
|
|
171
|
+
|
|
172
|
+
# Mouse action parameters
|
|
173
|
+
coordinates: Optional[Coordinates] = Field(
|
|
174
|
+
None, description="Target coordinates for mouse actions"
|
|
175
|
+
)
|
|
176
|
+
start_coordinates: Optional[Coordinates] = Field(
|
|
177
|
+
None, description="Start coordinates for drag actions"
|
|
178
|
+
)
|
|
179
|
+
end_coordinates: Optional[Coordinates] = Field(
|
|
180
|
+
None, description="End coordinates for drag actions"
|
|
181
|
+
)
|
|
182
|
+
scroll_direction: Optional[Literal["up", "down", "left", "right"]] = Field(
|
|
183
|
+
None, description="Scroll direction"
|
|
184
|
+
)
|
|
185
|
+
scroll_amount: Optional[int] = Field(None, description="Scroll amount in pixels")
|
|
186
|
+
|
|
187
|
+
# Keyboard action parameters
|
|
188
|
+
text: Optional[str] = Field(None, description="Text to type")
|
|
189
|
+
key: Optional[str] = Field(None, description="Key to press (e.g., 'enter', 'tab')")
|
|
190
|
+
modifiers: Optional[list[str]] = Field(
|
|
191
|
+
None, description="Modifier keys (ctrl, alt, shift, meta)"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Element targeting (alternative to coordinates)
|
|
195
|
+
element: Optional[UIElement] = Field(
|
|
196
|
+
None, description="Target element (for element-based actions)"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Additional parameters
|
|
200
|
+
url: Optional[str] = Field(None, description="URL for goto action")
|
|
201
|
+
app_name: Optional[str] = Field(None, description="Application name for open/close")
|
|
202
|
+
duration: Optional[float] = Field(None, description="Duration in seconds (for wait)")
|
|
203
|
+
monitor_id: Optional[int] = Field(None, description="Monitor ID for select_monitor action")
|
|
204
|
+
window_title: Optional[str] = Field(None, description="Window title for window_focus action")
|
|
205
|
+
|
|
206
|
+
# Normalized coordinates (0.0-1.0) - alternative to pixel coordinates
|
|
207
|
+
# Useful for resolution-independent recordings
|
|
208
|
+
normalized_coordinates: Optional[tuple[float, float]] = Field(
|
|
209
|
+
None, description="Normalized (x, y) coordinates (0.0-1.0 range)"
|
|
210
|
+
)
|
|
211
|
+
normalized_start: Optional[tuple[float, float]] = Field(
|
|
212
|
+
None, description="Normalized start coordinates for drag (0.0-1.0 range)"
|
|
213
|
+
)
|
|
214
|
+
normalized_end: Optional[tuple[float, float]] = Field(
|
|
215
|
+
None, description="Normalized end coordinates for drag (0.0-1.0 range)"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Raw/original action data
|
|
219
|
+
raw: Optional[dict[str, Any]] = Field(
|
|
220
|
+
None, description="Original action data from source format"
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
@model_validator(mode="after")
|
|
224
|
+
def validate_action_params(self) -> "Action":
|
|
225
|
+
"""Validate that required parameters are present for action type."""
|
|
226
|
+
if self.type in {ActionType.CLICK, ActionType.DOUBLE_CLICK, ActionType.RIGHT_CLICK}:
|
|
227
|
+
if self.coordinates is None and self.element is None:
|
|
228
|
+
# Allow missing coordinates - can be inferred from context
|
|
229
|
+
pass
|
|
230
|
+
|
|
231
|
+
if self.type == ActionType.TYPE and self.text is None:
|
|
232
|
+
raise ValueError("TYPE action requires 'text' parameter")
|
|
233
|
+
|
|
234
|
+
if self.type == ActionType.KEY and self.key is None:
|
|
235
|
+
raise ValueError("KEY action requires 'key' parameter")
|
|
236
|
+
|
|
237
|
+
if self.type == ActionType.GOTO and self.url is None:
|
|
238
|
+
raise ValueError("GOTO action requires 'url' parameter")
|
|
239
|
+
|
|
240
|
+
return self
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class Observation(BaseModel):
|
|
244
|
+
"""An observation of the environment state."""
|
|
245
|
+
|
|
246
|
+
# Visual observation
|
|
247
|
+
screenshot_path: Optional[str] = Field(
|
|
248
|
+
None, description="Path to screenshot image file"
|
|
249
|
+
)
|
|
250
|
+
screenshot_base64: Optional[str] = Field(
|
|
251
|
+
None, description="Base64-encoded screenshot (for inline storage)"
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Structured observations
|
|
255
|
+
a11y_tree: Optional[dict[str, Any]] = Field(
|
|
256
|
+
None, description="Accessibility tree snapshot"
|
|
257
|
+
)
|
|
258
|
+
dom: Optional[str] = Field(None, description="DOM HTML snapshot (web)")
|
|
259
|
+
|
|
260
|
+
# Window/screen info
|
|
261
|
+
window_title: Optional[str] = Field(None, description="Active window title")
|
|
262
|
+
app_name: Optional[str] = Field(None, description="Application name (e.g., 'Chrome', 'System Settings')")
|
|
263
|
+
url: Optional[str] = Field(None, description="Current URL (for web apps)")
|
|
264
|
+
screen_size: Optional[tuple[int, int]] = Field(
|
|
265
|
+
None, description="Screen dimensions (width, height)"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Focused element
|
|
269
|
+
focused_element: Optional[UIElement] = Field(
|
|
270
|
+
None, description="Currently focused UI element"
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Additional metadata
|
|
274
|
+
timestamp: Optional[float] = Field(None, description="Unix timestamp")
|
|
275
|
+
raw: Optional[dict[str, Any]] = Field(
|
|
276
|
+
None, description="Original observation data from source format"
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class Step(BaseModel):
|
|
281
|
+
"""A single step in an episode (observation -> action pair)."""
|
|
282
|
+
|
|
283
|
+
step_index: int = Field(..., ge=0, description="Step number (0-indexed)")
|
|
284
|
+
|
|
285
|
+
# Core data
|
|
286
|
+
observation: Observation = Field(..., description="State observation before action")
|
|
287
|
+
action: Action = Field(..., description="Action taken")
|
|
288
|
+
|
|
289
|
+
# Agent reasoning (for demos/training)
|
|
290
|
+
reasoning: Optional[str] = Field(
|
|
291
|
+
None, description="Agent's reasoning for the action (chain-of-thought)"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Outcome
|
|
295
|
+
reward: Optional[float] = Field(None, description="Reward signal (if available)")
|
|
296
|
+
done: Optional[bool] = Field(None, description="Whether episode ended after this step")
|
|
297
|
+
|
|
298
|
+
# Timing
|
|
299
|
+
timestamp: Optional[float] = Field(None, description="Unix timestamp of action")
|
|
300
|
+
duration_ms: Optional[int] = Field(
|
|
301
|
+
None, description="Time taken for this step in milliseconds"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class Episode(BaseModel):
|
|
306
|
+
"""A complete episode/demonstration for GUI automation.
|
|
307
|
+
|
|
308
|
+
This is the canonical format for storing and exchanging GUI trajectory data.
|
|
309
|
+
All benchmark-specific formats should be converted to/from this format.
|
|
310
|
+
"""
|
|
311
|
+
|
|
312
|
+
# Schema metadata
|
|
313
|
+
schema_version: str = Field(
|
|
314
|
+
default=SCHEMA_VERSION,
|
|
315
|
+
description="Schema version for compatibility checking"
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Episode identification
|
|
319
|
+
episode_id: str = Field(..., description="Unique episode identifier")
|
|
320
|
+
task_id: Optional[str] = Field(None, description="Task identifier (from benchmark)")
|
|
321
|
+
|
|
322
|
+
# Task specification
|
|
323
|
+
instruction: str = Field(..., description="Natural language task instruction")
|
|
324
|
+
goal: Optional[str] = Field(
|
|
325
|
+
None, description="Detailed goal description (if different from instruction)"
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# Episode data
|
|
329
|
+
steps: list[Step] = Field(..., description="Sequence of steps in the episode")
|
|
330
|
+
|
|
331
|
+
# Outcome
|
|
332
|
+
success: Optional[bool] = Field(None, description="Whether task was completed successfully")
|
|
333
|
+
final_reward: Optional[float] = Field(None, description="Final reward/score")
|
|
334
|
+
|
|
335
|
+
# Provenance
|
|
336
|
+
source: Optional[BenchmarkSource] = Field(
|
|
337
|
+
None, description="Source benchmark/dataset"
|
|
338
|
+
)
|
|
339
|
+
source_file: Optional[str] = Field(
|
|
340
|
+
None, description="Original source file path"
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Metadata
|
|
344
|
+
created_at: Optional[datetime] = Field(
|
|
345
|
+
default_factory=datetime.utcnow,
|
|
346
|
+
description="When episode was created/recorded"
|
|
347
|
+
)
|
|
348
|
+
agent_model: Optional[str] = Field(
|
|
349
|
+
None, description="Model that generated this episode (e.g., 'gpt-4o')"
|
|
350
|
+
)
|
|
351
|
+
environment: Optional[str] = Field(
|
|
352
|
+
None, description="Environment info (OS, browser, etc.)"
|
|
353
|
+
)
|
|
354
|
+
tags: Optional[list[str]] = Field(
|
|
355
|
+
None, description="Tags for categorization"
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
# Extension point for benchmark-specific data
|
|
359
|
+
metadata: Optional[dict[str, Any]] = Field(
|
|
360
|
+
None, description="Additional metadata from source"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
@property
|
|
364
|
+
def num_steps(self) -> int:
|
|
365
|
+
"""Number of steps in the episode."""
|
|
366
|
+
return len(self.steps)
|
|
367
|
+
|
|
368
|
+
@property
|
|
369
|
+
def action_types(self) -> list[ActionType]:
|
|
370
|
+
"""List of action types in this episode."""
|
|
371
|
+
return [step.action.type for step in self.steps]
|
|
372
|
+
|
|
373
|
+
def to_json(self, indent: int = 2) -> str:
|
|
374
|
+
"""Serialize to JSON string."""
|
|
375
|
+
return self.model_dump_json(indent=indent)
|
|
376
|
+
|
|
377
|
+
@classmethod
|
|
378
|
+
def from_json(cls, json_str: str) -> "Episode":
|
|
379
|
+
"""Deserialize from JSON string."""
|
|
380
|
+
return cls.model_validate_json(json_str)
|
|
381
|
+
|
|
382
|
+
@classmethod
|
|
383
|
+
def json_schema(cls) -> dict[str, Any]:
|
|
384
|
+
"""Get JSON Schema for Episode format."""
|
|
385
|
+
return cls.model_json_schema()
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
# ============================================================================
|
|
389
|
+
# Utility Functions
|
|
390
|
+
# ============================================================================
|
|
391
|
+
|
|
392
|
+
def validate_episode(data: dict[str, Any]) -> tuple[bool, Optional[str]]:
|
|
393
|
+
"""Validate episode data against schema.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
data: Episode data as dictionary
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
Tuple of (is_valid, error_message)
|
|
400
|
+
"""
|
|
401
|
+
try:
|
|
402
|
+
Episode.model_validate(data)
|
|
403
|
+
return True, None
|
|
404
|
+
except Exception as e:
|
|
405
|
+
return False, str(e)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def load_episode(path: Union[str, Path]) -> Episode:
|
|
409
|
+
"""Load episode from JSON file.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
path: Path to JSON file
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
Episode instance
|
|
416
|
+
"""
|
|
417
|
+
path = Path(path)
|
|
418
|
+
with open(path, "r") as f:
|
|
419
|
+
data = json.load(f)
|
|
420
|
+
|
|
421
|
+
episode = Episode.model_validate(data)
|
|
422
|
+
|
|
423
|
+
# Set source_file if not already set
|
|
424
|
+
if episode.source_file is None:
|
|
425
|
+
episode = episode.model_copy(update={"source_file": str(path)})
|
|
426
|
+
|
|
427
|
+
return episode
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def save_episode(episode: Episode, path: Union[str, Path], indent: int = 2) -> None:
|
|
431
|
+
"""Save episode to JSON file.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
episode: Episode to save
|
|
435
|
+
path: Output path
|
|
436
|
+
indent: JSON indentation
|
|
437
|
+
"""
|
|
438
|
+
path = Path(path)
|
|
439
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
440
|
+
|
|
441
|
+
with open(path, "w") as f:
|
|
442
|
+
f.write(episode.to_json(indent=indent))
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def export_json_schema(path: Union[str, Path]) -> None:
|
|
446
|
+
"""Export JSON Schema to file for documentation/tooling.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
path: Output path for schema file
|
|
450
|
+
"""
|
|
451
|
+
path = Path(path)
|
|
452
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
453
|
+
|
|
454
|
+
schema = Episode.json_schema()
|
|
455
|
+
|
|
456
|
+
with open(path, "w") as f:
|
|
457
|
+
json.dump(schema, f, indent=2)
|
openadapt_ml/scripts/compare.py
CHANGED
|
@@ -17,7 +17,7 @@ from pathlib import Path
|
|
|
17
17
|
from typing import Any
|
|
18
18
|
|
|
19
19
|
from openadapt_ml.ingest.capture import capture_to_episode
|
|
20
|
-
from openadapt_ml.
|
|
20
|
+
from openadapt_ml.schema import Episode, Step, ActionType
|
|
21
21
|
from openadapt_ml.datasets.next_action import SYSTEM_PROMPT, format_action
|
|
22
22
|
from openadapt_ml.training.trainer import _get_shared_header_css, _generate_shared_header_html
|
|
23
23
|
|
|
@@ -141,14 +141,19 @@ def generate_comparison_data(
|
|
|
141
141
|
total_steps = len(episode.steps)
|
|
142
142
|
|
|
143
143
|
for i, step in enumerate(episode.steps):
|
|
144
|
+
# Extract normalized coordinates if available
|
|
145
|
+
action_x, action_y = None, None
|
|
146
|
+
if step.action.normalized_coordinates:
|
|
147
|
+
action_x, action_y = step.action.normalized_coordinates
|
|
148
|
+
action_type_str = step.action.type.value if isinstance(step.action.type, ActionType) else step.action.type
|
|
144
149
|
step_data = {
|
|
145
150
|
"index": i,
|
|
146
|
-
"time": step.
|
|
147
|
-
"image_path": step.observation.
|
|
151
|
+
"time": step.step_index,
|
|
152
|
+
"image_path": step.observation.screenshot_path,
|
|
148
153
|
"human_action": {
|
|
149
|
-
"type":
|
|
150
|
-
"x":
|
|
151
|
-
"y":
|
|
154
|
+
"type": action_type_str,
|
|
155
|
+
"x": action_x,
|
|
156
|
+
"y": action_y,
|
|
152
157
|
"text": step.action.text,
|
|
153
158
|
},
|
|
154
159
|
"predicted_action": None,
|
|
@@ -156,11 +161,11 @@ def generate_comparison_data(
|
|
|
156
161
|
}
|
|
157
162
|
|
|
158
163
|
# Get prediction if model available
|
|
159
|
-
if model and step.observation.
|
|
164
|
+
if model and step.observation.screenshot_path:
|
|
160
165
|
predicted = predict_action(
|
|
161
166
|
model,
|
|
162
|
-
step.observation.
|
|
163
|
-
episode.
|
|
167
|
+
step.observation.screenshot_path,
|
|
168
|
+
episode.instruction,
|
|
164
169
|
step_index=i,
|
|
165
170
|
total_steps=total_steps,
|
|
166
171
|
action_history=action_history.copy(),
|
|
@@ -168,7 +173,7 @@ def generate_comparison_data(
|
|
|
168
173
|
step_data["predicted_action"] = predicted
|
|
169
174
|
|
|
170
175
|
# Check if prediction matches human action
|
|
171
|
-
if predicted and predicted.get("type") ==
|
|
176
|
+
if predicted and predicted.get("type") == action_type_str:
|
|
172
177
|
step_data["match"] = True
|
|
173
178
|
else:
|
|
174
179
|
step_data["match"] = False
|
|
@@ -839,21 +844,26 @@ def generate_unified_viewer(
|
|
|
839
844
|
if available_captures is None:
|
|
840
845
|
available_captures = [{
|
|
841
846
|
"id": capture_id,
|
|
842
|
-
"name": episode.
|
|
847
|
+
"name": episode.instruction or "Untitled",
|
|
843
848
|
"steps": len(episode.steps),
|
|
844
849
|
}]
|
|
845
850
|
|
|
846
851
|
# Prepare base capture data (human actions only, no predictions)
|
|
847
852
|
base_data = []
|
|
848
853
|
for i, step in enumerate(episode.steps):
|
|
854
|
+
# Extract normalized coordinates if available
|
|
855
|
+
action_x, action_y = None, None
|
|
856
|
+
if step.action.normalized_coordinates:
|
|
857
|
+
action_x, action_y = step.action.normalized_coordinates
|
|
858
|
+
action_type_str = step.action.type.value if isinstance(step.action.type, ActionType) else step.action.type
|
|
849
859
|
base_data.append({
|
|
850
860
|
"index": i,
|
|
851
|
-
"time": step.
|
|
852
|
-
"image_path": step.observation.
|
|
861
|
+
"time": step.step_index,
|
|
862
|
+
"image_path": step.observation.screenshot_path,
|
|
853
863
|
"human_action": {
|
|
854
|
-
"type":
|
|
855
|
-
"x":
|
|
856
|
-
"y":
|
|
864
|
+
"type": action_type_str,
|
|
865
|
+
"x": action_x,
|
|
866
|
+
"y": action_y,
|
|
857
867
|
"text": step.action.text,
|
|
858
868
|
},
|
|
859
869
|
})
|
|
@@ -9,7 +9,7 @@ import yaml
|
|
|
9
9
|
|
|
10
10
|
from openadapt_ml.datasets.next_action import build_next_action_sft_samples, parse_action_som
|
|
11
11
|
from openadapt_ml.evals.trajectory_matching import evaluate_policy_on_episodes
|
|
12
|
-
from openadapt_ml.ingest.synthetic import
|
|
12
|
+
from openadapt_ml.ingest.synthetic import generate_synthetic_episodes
|
|
13
13
|
from openadapt_ml.models.dummy_adapter import DummyAdapter
|
|
14
14
|
from openadapt_ml.models.qwen_vl import QwenVLAdapter
|
|
15
15
|
from openadapt_ml.models.api_adapter import ApiVLMAdapter
|
|
@@ -63,9 +63,9 @@ def main(
|
|
|
63
63
|
# Determine scenario: CLI arg takes precedence, then config, then default "login"
|
|
64
64
|
scenario_to_use = scenario if scenario else synth_cfg.get("scenario", "login")
|
|
65
65
|
|
|
66
|
-
# Generate
|
|
67
|
-
|
|
68
|
-
|
|
66
|
+
# Generate episodes with SoM if requested
|
|
67
|
+
episodes = generate_synthetic_episodes(
|
|
68
|
+
num_episodes=num_sessions,
|
|
69
69
|
seed=seed,
|
|
70
70
|
output_dir=output_dir,
|
|
71
71
|
use_som=use_som,
|
|
@@ -73,7 +73,6 @@ def main(
|
|
|
73
73
|
scenario=scenario_to_use,
|
|
74
74
|
)
|
|
75
75
|
print(f"[INFO] Scenario: {scenario_to_use}")
|
|
76
|
-
episodes = [ep for sess in sessions for ep in sess.episodes]
|
|
77
76
|
|
|
78
77
|
# Build samples with appropriate DSL mode
|
|
79
78
|
samples = build_next_action_sft_samples(episodes, use_som=use_som)
|
|
@@ -3,32 +3,29 @@ from __future__ import annotations
|
|
|
3
3
|
import os
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
from openadapt_ml.ingest.synthetic import
|
|
6
|
+
from openadapt_ml.ingest.synthetic import generate_synthetic_episodes
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def main() -> None:
|
|
10
10
|
output_dir = Path("synthetic") / "debug"
|
|
11
|
-
|
|
11
|
+
episodes = generate_synthetic_episodes(num_episodes=2, seed=42, output_dir=output_dir)
|
|
12
12
|
|
|
13
|
-
print(f"Generated {len(
|
|
13
|
+
print(f"Generated {len(episodes)} episodes into {output_dir.resolve()}")
|
|
14
14
|
|
|
15
|
-
total_episodes = 0
|
|
16
15
|
total_steps = 0
|
|
17
16
|
missing_images: list[str] = []
|
|
18
17
|
|
|
19
|
-
for
|
|
20
|
-
|
|
21
|
-
for
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
path
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
print(f"Episodes: {total_episodes}, Steps: {total_steps}")
|
|
18
|
+
for episode in episodes:
|
|
19
|
+
total_steps += len(episode.steps)
|
|
20
|
+
for step in episode.steps:
|
|
21
|
+
path = step.observation.screenshot_path
|
|
22
|
+
if not path:
|
|
23
|
+
missing_images.append(f"[no path] in episode {episode.episode_id}")
|
|
24
|
+
continue
|
|
25
|
+
if not os.path.exists(path):
|
|
26
|
+
missing_images.append(path)
|
|
27
|
+
|
|
28
|
+
print(f"Episodes: {len(episodes)}, Steps: {total_steps}")
|
|
32
29
|
|
|
33
30
|
if missing_images:
|
|
34
31
|
print("Missing images:")
|