openadapt-ml 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. openadapt_ml/__init__.py +0 -0
  2. openadapt_ml/benchmarks/__init__.py +125 -0
  3. openadapt_ml/benchmarks/agent.py +825 -0
  4. openadapt_ml/benchmarks/azure.py +761 -0
  5. openadapt_ml/benchmarks/base.py +366 -0
  6. openadapt_ml/benchmarks/cli.py +884 -0
  7. openadapt_ml/benchmarks/data_collection.py +432 -0
  8. openadapt_ml/benchmarks/runner.py +381 -0
  9. openadapt_ml/benchmarks/waa.py +704 -0
  10. openadapt_ml/cloud/__init__.py +5 -0
  11. openadapt_ml/cloud/azure_inference.py +441 -0
  12. openadapt_ml/cloud/lambda_labs.py +2445 -0
  13. openadapt_ml/cloud/local.py +790 -0
  14. openadapt_ml/config.py +56 -0
  15. openadapt_ml/datasets/__init__.py +0 -0
  16. openadapt_ml/datasets/next_action.py +507 -0
  17. openadapt_ml/evals/__init__.py +23 -0
  18. openadapt_ml/evals/grounding.py +241 -0
  19. openadapt_ml/evals/plot_eval_metrics.py +174 -0
  20. openadapt_ml/evals/trajectory_matching.py +486 -0
  21. openadapt_ml/grounding/__init__.py +45 -0
  22. openadapt_ml/grounding/base.py +236 -0
  23. openadapt_ml/grounding/detector.py +570 -0
  24. openadapt_ml/ingest/__init__.py +43 -0
  25. openadapt_ml/ingest/capture.py +312 -0
  26. openadapt_ml/ingest/loader.py +232 -0
  27. openadapt_ml/ingest/synthetic.py +1102 -0
  28. openadapt_ml/models/__init__.py +0 -0
  29. openadapt_ml/models/api_adapter.py +171 -0
  30. openadapt_ml/models/base_adapter.py +59 -0
  31. openadapt_ml/models/dummy_adapter.py +42 -0
  32. openadapt_ml/models/qwen_vl.py +426 -0
  33. openadapt_ml/runtime/__init__.py +0 -0
  34. openadapt_ml/runtime/policy.py +182 -0
  35. openadapt_ml/schemas/__init__.py +53 -0
  36. openadapt_ml/schemas/sessions.py +122 -0
  37. openadapt_ml/schemas/validation.py +252 -0
  38. openadapt_ml/scripts/__init__.py +0 -0
  39. openadapt_ml/scripts/compare.py +1490 -0
  40. openadapt_ml/scripts/demo_policy.py +62 -0
  41. openadapt_ml/scripts/eval_policy.py +287 -0
  42. openadapt_ml/scripts/make_gif.py +153 -0
  43. openadapt_ml/scripts/prepare_synthetic.py +43 -0
  44. openadapt_ml/scripts/run_qwen_login_benchmark.py +192 -0
  45. openadapt_ml/scripts/train.py +174 -0
  46. openadapt_ml/training/__init__.py +0 -0
  47. openadapt_ml/training/benchmark_viewer.py +1538 -0
  48. openadapt_ml/training/shared_ui.py +157 -0
  49. openadapt_ml/training/stub_provider.py +276 -0
  50. openadapt_ml/training/trainer.py +2446 -0
  51. openadapt_ml/training/viewer.py +2970 -0
  52. openadapt_ml-0.1.0.dist-info/METADATA +818 -0
  53. openadapt_ml-0.1.0.dist-info/RECORD +55 -0
  54. openadapt_ml-0.1.0.dist-info/WHEEL +4 -0
  55. openadapt_ml-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,312 @@
1
+ """Adapter for converting openadapt-capture recordings to openadapt-ml Episode format.
2
+
3
+ This module provides functions to ingest real GUI recordings from openadapt-capture
4
+ and convert them to the Episode/Step format used by openadapt-ml for training.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import uuid
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ from openadapt_ml.schemas.sessions import Action, Episode, Observation, Session, Step
14
+
15
+ if TYPE_CHECKING:
16
+ from PIL import Image
17
+
18
+ # Event type mapping from openadapt-capture to openadapt-ml
19
+ EVENT_TYPE_MAP = {
20
+ "mouse.singleclick": "click",
21
+ "mouse.click": "click",
22
+ "mouse.doubleclick": "double_click",
23
+ "mouse.drag": "drag",
24
+ "mouse.scroll": "scroll",
25
+ "key.type": "type",
26
+ "key.down": "key_press",
27
+ "key.up": "key_press",
28
+ }
29
+
30
+
31
+ def _normalize_coords(
32
+ x: float | None,
33
+ y: float | None,
34
+ screen_width: int,
35
+ screen_height: int,
36
+ ) -> tuple[float | None, float | None]:
37
+ """Normalize pixel coordinates to [0, 1] range.
38
+
39
+ Args:
40
+ x: X coordinate in pixels.
41
+ y: Y coordinate in pixels.
42
+ screen_width: Screen width in pixels.
43
+ screen_height: Screen height in pixels.
44
+
45
+ Returns:
46
+ Tuple of (normalized_x, normalized_y).
47
+ """
48
+ if x is None or y is None:
49
+ return None, None
50
+ return x / screen_width, y / screen_height
51
+
52
+
53
+ def _save_screenshot(
54
+ image: "Image",
55
+ output_dir: Path,
56
+ episode_id: str,
57
+ step_idx: int,
58
+ ) -> str:
59
+ """Save a screenshot and return its path.
60
+
61
+ Args:
62
+ image: PIL Image to save.
63
+ output_dir: Directory to save images to.
64
+ episode_id: Episode identifier.
65
+ step_idx: Step index.
66
+
67
+ Returns:
68
+ Path to saved image.
69
+ """
70
+ output_dir.mkdir(parents=True, exist_ok=True)
71
+ filename = f"{episode_id}_step_{step_idx}.png"
72
+ filepath = output_dir / filename
73
+ image.save(filepath)
74
+ return str(filepath)
75
+
76
+
77
+ def capture_to_episode(
78
+ capture_path: str | Path,
79
+ output_dir: str | Path | None = None,
80
+ goal: str | None = None,
81
+ episode_id: str | None = None,
82
+ include_moves: bool = False,
83
+ ) -> Episode:
84
+ """Convert an openadapt-capture recording to an Episode.
85
+
86
+ Args:
87
+ capture_path: Path to the capture directory.
88
+ output_dir: Directory to save extracted screenshots. If None, uses
89
+ capture_path/screenshots.
90
+ goal: Task description/goal for the episode. If None, uses capture's
91
+ task_description or a generic message.
92
+ episode_id: Identifier for the episode. If None, generates a UUID.
93
+ include_moves: Whether to include mouse move events.
94
+
95
+ Returns:
96
+ Episode containing Steps with Observations and Actions.
97
+
98
+ Raises:
99
+ ImportError: If openadapt-capture is not installed.
100
+ FileNotFoundError: If capture doesn't exist.
101
+ """
102
+ try:
103
+ from openadapt_capture import Capture
104
+ from openadapt_capture.events import (
105
+ EventType,
106
+ KeyTypeEvent,
107
+ MouseClickEvent,
108
+ MouseDoubleClickEvent,
109
+ MouseDragEvent,
110
+ MouseScrollEvent,
111
+ )
112
+ except ImportError as e:
113
+ raise ImportError(
114
+ "openadapt-capture is required. Install with: pip install openadapt-capture"
115
+ ) from e
116
+
117
+ capture_path = Path(capture_path)
118
+ if output_dir is None:
119
+ output_dir = capture_path / "screenshots"
120
+ output_dir = Path(output_dir)
121
+
122
+ # Load capture
123
+ capture = Capture.load(capture_path)
124
+
125
+ # Generate episode ID if not provided
126
+ if episode_id is None:
127
+ episode_id = f"capture_{capture.id}"
128
+
129
+ # Get goal from capture or derive from context
130
+ if goal is None:
131
+ if capture.task_description:
132
+ goal = capture.task_description
133
+ else:
134
+ # Try to derive goal from directory name (e.g., "turn-off-nightshift" -> "Turn off nightshift")
135
+ dir_name = capture_path.name
136
+ if dir_name and dir_name != "capture":
137
+ # Convert kebab-case/snake_case to readable text
138
+ goal = dir_name.replace("-", " ").replace("_", " ").strip().capitalize()
139
+ else:
140
+ goal = "Complete the recorded workflow"
141
+
142
+ # Get screen dimensions for coordinate normalization
143
+ screen_width, screen_height = capture.screen_size
144
+
145
+ steps: list[Step] = []
146
+ start_time = capture.started_at
147
+
148
+ for idx, action in enumerate(capture.actions(include_moves=include_moves)):
149
+ # Get screenshot at action time
150
+ screenshot = action.screenshot
151
+ if screenshot is None:
152
+ continue
153
+
154
+ # Save screenshot
155
+ image_path = _save_screenshot(screenshot, output_dir, episode_id, idx)
156
+
157
+ # Normalize coordinates
158
+ norm_x, norm_y = _normalize_coords(
159
+ action.x, action.y, screen_width, screen_height
160
+ )
161
+
162
+ # Map event type to openadapt-ml action type
163
+ event_type = action.type
164
+ action_type = EVENT_TYPE_MAP.get(event_type, "click")
165
+
166
+ # Build Action object
167
+ ml_action = Action(
168
+ type=action_type,
169
+ x=norm_x,
170
+ y=norm_y,
171
+ text=action.text,
172
+ )
173
+
174
+ # Handle drag events - add end coordinates
175
+ if isinstance(action.event, MouseDragEvent):
176
+ end_x = action.event.x + action.event.dx
177
+ end_y = action.event.y + action.event.dy
178
+ norm_end_x, norm_end_y = _normalize_coords(
179
+ end_x, end_y, screen_width, screen_height
180
+ )
181
+ ml_action.raw = {
182
+ "end_x": norm_end_x,
183
+ "end_y": norm_end_y,
184
+ "button": action.event.button,
185
+ }
186
+
187
+ # Handle scroll events
188
+ if isinstance(action.event, MouseScrollEvent):
189
+ ml_action.raw = {
190
+ "dx": action.event.dx,
191
+ "dy": action.event.dy,
192
+ }
193
+
194
+ # Handle keyboard events - include key names for special keys
195
+ if action.keys:
196
+ if ml_action.raw is None:
197
+ ml_action.raw = {}
198
+ ml_action.raw["keys"] = action.keys
199
+
200
+ # Create Step
201
+ step = Step(
202
+ t=action.timestamp - start_time,
203
+ observation=Observation(image_path=image_path),
204
+ action=ml_action,
205
+ thought=None, # Real recordings don't have thoughts
206
+ )
207
+ steps.append(step)
208
+
209
+ # Add terminal DONE action if there are steps
210
+ if steps:
211
+ # Use the last screenshot for the done action
212
+ last_step = steps[-1]
213
+ done_step = Step(
214
+ t=last_step.t + 0.1,
215
+ observation=Observation(image_path=last_step.observation.image_path),
216
+ action=Action(type="done"),
217
+ thought="Workflow complete.",
218
+ )
219
+ steps.append(done_step)
220
+
221
+ capture.close()
222
+
223
+ return Episode(
224
+ id=episode_id,
225
+ goal=goal,
226
+ steps=steps,
227
+ summary=f"Real recording with {len(steps)} steps",
228
+ success=True,
229
+ workflow_id=capture.id,
230
+ )
231
+
232
+
233
+ def capture_to_session(
234
+ capture_path: str | Path,
235
+ output_dir: str | Path | None = None,
236
+ goal: str | None = None,
237
+ session_id: str | None = None,
238
+ include_moves: bool = False,
239
+ ) -> Session:
240
+ """Convert an openadapt-capture recording to a Session.
241
+
242
+ Args:
243
+ capture_path: Path to the capture directory.
244
+ output_dir: Directory to save extracted screenshots.
245
+ goal: Task description/goal for the episode.
246
+ session_id: Identifier for the session. If None, generates a UUID.
247
+ include_moves: Whether to include mouse move events.
248
+
249
+ Returns:
250
+ Session containing a single Episode.
251
+ """
252
+ episode = capture_to_episode(
253
+ capture_path=capture_path,
254
+ output_dir=output_dir,
255
+ goal=goal,
256
+ include_moves=include_moves,
257
+ )
258
+
259
+ if session_id is None:
260
+ session_id = f"session_{uuid.uuid4().hex[:8]}"
261
+
262
+ return Session(
263
+ id=session_id,
264
+ episodes=[episode],
265
+ meta={
266
+ "source": "openadapt-capture",
267
+ "capture_path": str(capture_path),
268
+ },
269
+ )
270
+
271
+
272
+ def load_captures_as_sessions(
273
+ captures_dir: str | Path,
274
+ output_dir: str | Path | None = None,
275
+ include_moves: bool = False,
276
+ ) -> list[Session]:
277
+ """Load multiple captures from a directory.
278
+
279
+ Scans for subdirectories containing capture.db files.
280
+
281
+ Args:
282
+ captures_dir: Directory containing capture subdirectories.
283
+ output_dir: Base directory for screenshots. Each capture gets a subdirectory.
284
+ include_moves: Whether to include mouse move events.
285
+
286
+ Returns:
287
+ List of Sessions, one per capture.
288
+ """
289
+ captures_dir = Path(captures_dir)
290
+ sessions = []
291
+
292
+ # Find all capture.db files
293
+ for db_path in captures_dir.glob("**/capture.db"):
294
+ capture_path = db_path.parent
295
+
296
+ # Determine output directory for this capture
297
+ if output_dir is not None:
298
+ capture_output = Path(output_dir) / capture_path.name
299
+ else:
300
+ capture_output = None
301
+
302
+ try:
303
+ session = capture_to_session(
304
+ capture_path=capture_path,
305
+ output_dir=capture_output,
306
+ include_moves=include_moves,
307
+ )
308
+ sessions.append(session)
309
+ except Exception as e:
310
+ print(f"Warning: Failed to load {capture_path}: {e}")
311
+
312
+ return sessions
@@ -0,0 +1,232 @@
1
+ """Episode loading utilities for openadapt-ml.
2
+
3
+ Load Episodes from JSON files exported by external systems.
4
+ This is the primary entry point for users who have their own data.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional, Union
12
+
13
+ from openadapt_ml.schemas.sessions import Action, Episode, Observation, Step
14
+ from openadapt_ml.schemas.validation import validate_episodes, summarize_episodes
15
+
16
+
17
+ def load_episodes(
18
+ path: Union[str, Path],
19
+ validate: bool = True,
20
+ check_images: bool = False,
21
+ ) -> List[Episode]:
22
+ """Load Episodes from a directory or JSON file.
23
+
24
+ Supports two formats:
25
+ 1. Single JSON file containing a list of episodes
26
+ 2. Directory containing multiple JSON files (one episode per file, or batched)
27
+
28
+ Args:
29
+ path: Path to directory or JSON file containing episode data.
30
+ validate: If True, validate episodes against schema (default True).
31
+ check_images: If True, verify image files exist on disk (default False).
32
+
33
+ Returns:
34
+ List of Episode objects ready for training.
35
+
36
+ Raises:
37
+ FileNotFoundError: If path doesn't exist.
38
+ ValidationError: If validate=True and data fails validation.
39
+ ValueError: If JSON format is invalid.
40
+
41
+ Example:
42
+ >>> episodes = load_episodes("exported_data/")
43
+ >>> print(f"Loaded {len(episodes)} episodes")
44
+ >>> print(f"Total steps: {sum(len(e.steps) for e in episodes)}")
45
+ """
46
+ path = Path(path)
47
+
48
+ if not path.exists():
49
+ raise FileNotFoundError(f"Path not found: {path}")
50
+
51
+ episodes: List[Episode] = []
52
+
53
+ if path.is_file():
54
+ # Single JSON file
55
+ episodes = _load_episodes_from_file(path)
56
+ elif path.is_dir():
57
+ # Directory of JSON files
58
+ json_files = sorted(path.glob("*.json"))
59
+ if not json_files:
60
+ raise ValueError(f"No JSON files found in {path}")
61
+
62
+ for json_file in json_files:
63
+ file_episodes = _load_episodes_from_file(json_file)
64
+ episodes.extend(file_episodes)
65
+ else:
66
+ raise ValueError(f"Path must be a file or directory: {path}")
67
+
68
+ if validate:
69
+ warnings = validate_episodes(episodes, check_images=check_images)
70
+ if warnings:
71
+ print(f"Validation warnings ({len(warnings)}):")
72
+ for w in warnings[:10]: # Show first 10
73
+ print(f" - {w}")
74
+ if len(warnings) > 10:
75
+ print(f" ... and {len(warnings) - 10} more")
76
+
77
+ return episodes
78
+
79
+
80
+ def _load_episodes_from_file(path: Path) -> List[Episode]:
81
+ """Load episodes from a single JSON file."""
82
+ with open(path, "r") as f:
83
+ data = json.load(f)
84
+
85
+ # Handle different JSON structures
86
+ if isinstance(data, list):
87
+ # List of episodes
88
+ return [_dict_to_episode(ep) for ep in data]
89
+ elif isinstance(data, dict):
90
+ # Single episode or wrapped format
91
+ if "episodes" in data:
92
+ return [_dict_to_episode(ep) for ep in data["episodes"]]
93
+ elif "id" in data and "goal" in data:
94
+ # Single episode
95
+ return [_dict_to_episode(data)]
96
+ else:
97
+ raise ValueError(f"Unrecognized JSON format in {path}")
98
+ else:
99
+ raise ValueError(f"Expected list or dict in {path}, got {type(data)}")
100
+
101
+
102
+ def _dict_to_episode(data: Dict[str, Any]) -> Episode:
103
+ """Convert a dictionary to an Episode object."""
104
+ steps = []
105
+ for step_data in data.get("steps", []):
106
+ # Parse observation
107
+ obs_data = step_data.get("observation", {})
108
+ observation = Observation(
109
+ image_path=obs_data.get("image_path"),
110
+ meta=obs_data.get("meta"),
111
+ accessibility_tree=obs_data.get("accessibility_tree"),
112
+ dom_html=obs_data.get("dom_html"),
113
+ url=obs_data.get("url"),
114
+ window_title=obs_data.get("window_title"),
115
+ app_name=obs_data.get("app_name"),
116
+ focused_element=obs_data.get("focused_element"),
117
+ )
118
+
119
+ # Parse action
120
+ action_data = step_data.get("action", {})
121
+ action = Action(
122
+ type=action_data.get("type", "unknown"),
123
+ x=action_data.get("x"),
124
+ y=action_data.get("y"),
125
+ text=action_data.get("text"),
126
+ raw=action_data.get("raw"),
127
+ bbox=tuple(action_data["bbox"]) if action_data.get("bbox") else None,
128
+ element_index=action_data.get("element_index"),
129
+ target_node_id=action_data.get("target_node_id"),
130
+ target_role=action_data.get("target_role"),
131
+ target_name=action_data.get("target_name"),
132
+ key=action_data.get("key"),
133
+ modifiers=action_data.get("modifiers"),
134
+ scroll_direction=action_data.get("scroll_direction"),
135
+ scroll_amount=action_data.get("scroll_amount"),
136
+ end_x=action_data.get("end_x"),
137
+ end_y=action_data.get("end_y"),
138
+ answer=action_data.get("answer"),
139
+ )
140
+
141
+ step = Step(
142
+ t=step_data.get("t", 0.0),
143
+ observation=observation,
144
+ action=action,
145
+ thought=step_data.get("thought"),
146
+ )
147
+ steps.append(step)
148
+
149
+ return Episode(
150
+ id=data.get("id", "unknown"),
151
+ goal=data.get("goal", ""),
152
+ steps=steps,
153
+ summary=data.get("summary"),
154
+ success=data.get("success"),
155
+ workflow_id=data.get("workflow_id"),
156
+ )
157
+
158
+
159
+ def save_episodes(
160
+ episodes: List[Episode],
161
+ path: Union[str, Path],
162
+ pretty: bool = True,
163
+ ) -> None:
164
+ """Save Episodes to a JSON file.
165
+
166
+ Args:
167
+ episodes: List of Episode objects to save.
168
+ path: Output file path.
169
+ pretty: If True, format JSON with indentation.
170
+
171
+ Example:
172
+ >>> save_episodes(episodes, "output/episodes.json")
173
+ """
174
+ path = Path(path)
175
+ path.parent.mkdir(parents=True, exist_ok=True)
176
+
177
+ data = [_episode_to_dict(ep) for ep in episodes]
178
+
179
+ with open(path, "w") as f:
180
+ if pretty:
181
+ json.dump(data, f, indent=2)
182
+ else:
183
+ json.dump(data, f)
184
+
185
+
186
+ def _episode_to_dict(episode: Episode) -> Dict[str, Any]:
187
+ """Convert an Episode object to a dictionary."""
188
+ steps = []
189
+ for step in episode.steps:
190
+ step_dict = {
191
+ "t": step.t,
192
+ "observation": {
193
+ "image_path": step.observation.image_path,
194
+ "meta": step.observation.meta,
195
+ "accessibility_tree": step.observation.accessibility_tree,
196
+ "dom_html": step.observation.dom_html,
197
+ "url": step.observation.url,
198
+ "window_title": step.observation.window_title,
199
+ "app_name": step.observation.app_name,
200
+ "focused_element": step.observation.focused_element,
201
+ },
202
+ "action": {
203
+ "type": step.action.type,
204
+ "x": step.action.x,
205
+ "y": step.action.y,
206
+ "text": step.action.text,
207
+ "raw": step.action.raw,
208
+ "bbox": list(step.action.bbox) if step.action.bbox else None,
209
+ "element_index": step.action.element_index,
210
+ "target_node_id": step.action.target_node_id,
211
+ "target_role": step.action.target_role,
212
+ "target_name": step.action.target_name,
213
+ "key": step.action.key,
214
+ "modifiers": step.action.modifiers,
215
+ "scroll_direction": step.action.scroll_direction,
216
+ "scroll_amount": step.action.scroll_amount,
217
+ "end_x": step.action.end_x,
218
+ "end_y": step.action.end_y,
219
+ "answer": step.action.answer,
220
+ },
221
+ "thought": step.thought,
222
+ }
223
+ steps.append(step_dict)
224
+
225
+ return {
226
+ "id": episode.id,
227
+ "goal": episode.goal,
228
+ "steps": steps,
229
+ "summary": episode.summary,
230
+ "success": episode.success,
231
+ "workflow_id": episode.workflow_id,
232
+ }