openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. openadapt_ml/benchmarks/__init__.py +8 -0
  2. openadapt_ml/benchmarks/agent.py +90 -11
  3. openadapt_ml/benchmarks/azure.py +35 -6
  4. openadapt_ml/benchmarks/cli.py +4449 -201
  5. openadapt_ml/benchmarks/live_tracker.py +180 -0
  6. openadapt_ml/benchmarks/runner.py +41 -4
  7. openadapt_ml/benchmarks/viewer.py +1219 -0
  8. openadapt_ml/benchmarks/vm_monitor.py +610 -0
  9. openadapt_ml/benchmarks/waa.py +61 -4
  10. openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
  11. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  12. openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
  13. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  14. openadapt_ml/benchmarks/waa_live.py +619 -0
  15. openadapt_ml/cloud/local.py +1555 -1
  16. openadapt_ml/cloud/ssh_tunnel.py +553 -0
  17. openadapt_ml/datasets/next_action.py +87 -68
  18. openadapt_ml/evals/grounding.py +26 -8
  19. openadapt_ml/evals/trajectory_matching.py +84 -36
  20. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  21. openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
  22. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  23. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  24. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  25. openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
  26. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  27. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  28. openadapt_ml/experiments/waa_demo/runner.py +717 -0
  29. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  30. openadapt_ml/export/__init__.py +9 -0
  31. openadapt_ml/export/__main__.py +6 -0
  32. openadapt_ml/export/cli.py +89 -0
  33. openadapt_ml/export/parquet.py +265 -0
  34. openadapt_ml/ingest/__init__.py +3 -4
  35. openadapt_ml/ingest/capture.py +89 -81
  36. openadapt_ml/ingest/loader.py +116 -68
  37. openadapt_ml/ingest/synthetic.py +221 -159
  38. openadapt_ml/retrieval/README.md +226 -0
  39. openadapt_ml/retrieval/USAGE.md +391 -0
  40. openadapt_ml/retrieval/__init__.py +91 -0
  41. openadapt_ml/retrieval/demo_retriever.py +817 -0
  42. openadapt_ml/retrieval/embeddings.py +629 -0
  43. openadapt_ml/retrieval/index.py +194 -0
  44. openadapt_ml/retrieval/retriever.py +160 -0
  45. openadapt_ml/runtime/policy.py +10 -10
  46. openadapt_ml/schema/__init__.py +104 -0
  47. openadapt_ml/schema/converters.py +541 -0
  48. openadapt_ml/schema/episode.py +457 -0
  49. openadapt_ml/scripts/compare.py +26 -16
  50. openadapt_ml/scripts/eval_policy.py +4 -5
  51. openadapt_ml/scripts/prepare_synthetic.py +14 -17
  52. openadapt_ml/scripts/train.py +81 -70
  53. openadapt_ml/training/benchmark_viewer.py +3225 -0
  54. openadapt_ml/training/trainer.py +120 -363
  55. openadapt_ml/training/trl_trainer.py +354 -0
  56. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
  57. openadapt_ml-0.2.0.dist-info/RECORD +86 -0
  58. openadapt_ml/schemas/__init__.py +0 -53
  59. openadapt_ml/schemas/sessions.py +0 -122
  60. openadapt_ml/schemas/validation.py +0 -252
  61. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  62. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
  63. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,457 @@
1
+ """
2
+ Episode Schema for GUI Trajectory Data
3
+
4
+ Canonical contract for episode/demonstration data in GUI automation. Designed for
5
+ interoperability across training pipelines, benchmarks, and human demonstrations.
6
+
7
+ Features:
8
+ - Pydantic models with runtime validation
9
+ - JSON Schema export for language-agnostic tooling
10
+ - Supports pixel coordinates AND normalized (0-1) coordinates
11
+ - Extensible via `raw` and `metadata` fields
12
+ - Converters for common formats (WAA, WebArena, etc.)
13
+
14
+ Quick Start:
15
+ from openadapt_ml.schema import Episode, Step, Action, Observation, ActionType
16
+
17
+ episode = Episode(
18
+ episode_id="demo_001",
19
+ instruction="Open the Settings app and enable Dark Mode",
20
+ steps=[
21
+ Step(
22
+ step_index=0,
23
+ observation=Observation(screenshot_path="step_0.png"),
24
+ action=Action(
25
+ type=ActionType.CLICK,
26
+ coordinates={"x": 512, "y": 384},
27
+ # Or use normalized coords for resolution independence:
28
+ # normalized_coordinates=(0.5, 0.375),
29
+ ),
30
+ reasoning="Click on Settings icon",
31
+ ),
32
+ ],
33
+ success=True,
34
+ )
35
+
36
+ # Validate any dict against the schema
37
+ from openadapt_ml.schema import validate_episode
38
+ is_valid, error = validate_episode(data)
39
+
40
+ # Export JSON Schema for external tools
41
+ from openadapt_ml.schema import export_json_schema
42
+ export_json_schema("episode.schema.json")
43
+
44
+ Schema Version: 1.0.0
45
+ - Core models: Episode, Step, Action, Observation
46
+ - 24 action types covering mouse, keyboard, navigation, and system actions
47
+ - Support for both pixel and normalized coordinates
48
+ - Extension points: raw, metadata fields
49
+
50
+ Evolution Policy (SemVer):
51
+ - PATCH (1.0.x): Documentation, bug fixes (no schema changes)
52
+ - MINOR (1.x.0): New optional fields with defaults (backward compatible)
53
+ - MAJOR (x.0.0): Breaking changes (field removal, type changes, new required fields)
54
+
55
+ Migration Guide:
56
+ - MINOR bumps: No action needed, old data validates
57
+ - MAJOR bumps: Use converters or migration scripts (provided in release notes)
58
+ """
59
+
60
+ from __future__ import annotations
61
+
62
+ import json
63
+ from datetime import datetime
64
+ from enum import Enum
65
+ from pathlib import Path
66
+ from typing import Any, Literal, Optional, Union
67
+
68
+ from pydantic import BaseModel, Field, field_validator, model_validator
69
+
70
+
71
+ # Schema version - follows semver
72
+ SCHEMA_VERSION = "1.0.0"
73
+
74
+
75
+ class ActionType(str, Enum):
76
+ """Supported action types for GUI automation."""
77
+
78
+ # Mouse actions
79
+ CLICK = "click"
80
+ DOUBLE_CLICK = "double_click"
81
+ RIGHT_CLICK = "right_click"
82
+ DRAG = "drag"
83
+ SCROLL = "scroll"
84
+ HOVER = "hover"
85
+
86
+ # Keyboard actions
87
+ TYPE = "type"
88
+ KEY = "key"
89
+ HOTKEY = "hotkey"
90
+
91
+ # Combined/special actions
92
+ CLICK_AND_TYPE = "click_and_type"
93
+ WAIT = "wait"
94
+ SCREENSHOT = "screenshot"
95
+
96
+ # Navigation (for web)
97
+ GOTO = "goto"
98
+ BACK = "back"
99
+ FORWARD = "forward"
100
+ REFRESH = "refresh"
101
+
102
+ # System actions
103
+ OPEN_APP = "open_app"
104
+ CLOSE_APP = "close_app"
105
+ SELECT_MONITOR = "select_monitor" # Multi-monitor: focus a specific display
106
+ WINDOW_FOCUS = "window_focus" # Focus a specific window
107
+ WINDOW_RESIZE = "window_resize" # Resize window
108
+ WINDOW_MOVE = "window_move" # Move window
109
+
110
+ # Meta actions
111
+ DONE = "done"
112
+ FAIL = "fail"
113
+
114
+
115
+ class BenchmarkSource(str, Enum):
116
+ """Source benchmark/dataset for the episode."""
117
+
118
+ WAA = "waa" # Windows Agent Arena
119
+ WEBARENA = "webarena"
120
+ OSWORLD = "osworld"
121
+ MINIWOB = "miniwob"
122
+ HUMAN = "human" # Human demonstration
123
+ SYNTHETIC = "synthetic" # Generated/augmented
124
+
125
+
126
+ class Coordinates(BaseModel):
127
+ """Screen coordinates for mouse actions."""
128
+
129
+ x: int = Field(..., description="X coordinate (pixels from left)")
130
+ y: int = Field(..., description="Y coordinate (pixels from top)")
131
+
132
+ @field_validator("x", "y")
133
+ @classmethod
134
+ def validate_non_negative(cls, v: int) -> int:
135
+ if v < 0:
136
+ raise ValueError("Coordinates must be non-negative")
137
+ return v
138
+
139
+
140
+ class BoundingBox(BaseModel):
141
+ """Bounding box for UI elements."""
142
+
143
+ x: int = Field(..., description="Left edge X coordinate")
144
+ y: int = Field(..., description="Top edge Y coordinate")
145
+ width: int = Field(..., ge=0, description="Width in pixels")
146
+ height: int = Field(..., ge=0, description="Height in pixels")
147
+
148
+ @property
149
+ def center(self) -> Coordinates:
150
+ """Get center point of bounding box."""
151
+ return Coordinates(x=self.x + self.width // 2, y=self.y + self.height // 2)
152
+
153
+
154
+ class UIElement(BaseModel):
155
+ """UI element information from accessibility tree or DOM."""
156
+
157
+ role: Optional[str] = Field(None, description="Element role (button, textbox, etc.)")
158
+ name: Optional[str] = Field(None, description="Element accessible name")
159
+ value: Optional[str] = Field(None, description="Element value (for inputs)")
160
+ bounds: Optional[BoundingBox] = Field(None, description="Element bounding box")
161
+ element_id: Optional[str] = Field(None, description="Unique element identifier")
162
+ xpath: Optional[str] = Field(None, description="XPath selector (web)")
163
+ selector: Optional[str] = Field(None, description="CSS selector (web)")
164
+ automation_id: Optional[str] = Field(None, description="Automation ID (Windows)")
165
+
166
+
167
+ class Action(BaseModel):
168
+ """An action taken by the agent."""
169
+
170
+ type: ActionType = Field(..., description="Type of action")
171
+
172
+ # Mouse action parameters
173
+ coordinates: Optional[Coordinates] = Field(
174
+ None, description="Target coordinates for mouse actions"
175
+ )
176
+ start_coordinates: Optional[Coordinates] = Field(
177
+ None, description="Start coordinates for drag actions"
178
+ )
179
+ end_coordinates: Optional[Coordinates] = Field(
180
+ None, description="End coordinates for drag actions"
181
+ )
182
+ scroll_direction: Optional[Literal["up", "down", "left", "right"]] = Field(
183
+ None, description="Scroll direction"
184
+ )
185
+ scroll_amount: Optional[int] = Field(None, description="Scroll amount in pixels")
186
+
187
+ # Keyboard action parameters
188
+ text: Optional[str] = Field(None, description="Text to type")
189
+ key: Optional[str] = Field(None, description="Key to press (e.g., 'enter', 'tab')")
190
+ modifiers: Optional[list[str]] = Field(
191
+ None, description="Modifier keys (ctrl, alt, shift, meta)"
192
+ )
193
+
194
+ # Element targeting (alternative to coordinates)
195
+ element: Optional[UIElement] = Field(
196
+ None, description="Target element (for element-based actions)"
197
+ )
198
+
199
+ # Additional parameters
200
+ url: Optional[str] = Field(None, description="URL for goto action")
201
+ app_name: Optional[str] = Field(None, description="Application name for open/close")
202
+ duration: Optional[float] = Field(None, description="Duration in seconds (for wait)")
203
+ monitor_id: Optional[int] = Field(None, description="Monitor ID for select_monitor action")
204
+ window_title: Optional[str] = Field(None, description="Window title for window_focus action")
205
+
206
+ # Normalized coordinates (0.0-1.0) - alternative to pixel coordinates
207
+ # Useful for resolution-independent recordings
208
+ normalized_coordinates: Optional[tuple[float, float]] = Field(
209
+ None, description="Normalized (x, y) coordinates (0.0-1.0 range)"
210
+ )
211
+ normalized_start: Optional[tuple[float, float]] = Field(
212
+ None, description="Normalized start coordinates for drag (0.0-1.0 range)"
213
+ )
214
+ normalized_end: Optional[tuple[float, float]] = Field(
215
+ None, description="Normalized end coordinates for drag (0.0-1.0 range)"
216
+ )
217
+
218
+ # Raw/original action data
219
+ raw: Optional[dict[str, Any]] = Field(
220
+ None, description="Original action data from source format"
221
+ )
222
+
223
+ @model_validator(mode="after")
224
+ def validate_action_params(self) -> "Action":
225
+ """Validate that required parameters are present for action type."""
226
+ if self.type in {ActionType.CLICK, ActionType.DOUBLE_CLICK, ActionType.RIGHT_CLICK}:
227
+ if self.coordinates is None and self.element is None:
228
+ # Allow missing coordinates - can be inferred from context
229
+ pass
230
+
231
+ if self.type == ActionType.TYPE and self.text is None:
232
+ raise ValueError("TYPE action requires 'text' parameter")
233
+
234
+ if self.type == ActionType.KEY and self.key is None:
235
+ raise ValueError("KEY action requires 'key' parameter")
236
+
237
+ if self.type == ActionType.GOTO and self.url is None:
238
+ raise ValueError("GOTO action requires 'url' parameter")
239
+
240
+ return self
241
+
242
+
243
+ class Observation(BaseModel):
244
+ """An observation of the environment state."""
245
+
246
+ # Visual observation
247
+ screenshot_path: Optional[str] = Field(
248
+ None, description="Path to screenshot image file"
249
+ )
250
+ screenshot_base64: Optional[str] = Field(
251
+ None, description="Base64-encoded screenshot (for inline storage)"
252
+ )
253
+
254
+ # Structured observations
255
+ a11y_tree: Optional[dict[str, Any]] = Field(
256
+ None, description="Accessibility tree snapshot"
257
+ )
258
+ dom: Optional[str] = Field(None, description="DOM HTML snapshot (web)")
259
+
260
+ # Window/screen info
261
+ window_title: Optional[str] = Field(None, description="Active window title")
262
+ app_name: Optional[str] = Field(None, description="Application name (e.g., 'Chrome', 'System Settings')")
263
+ url: Optional[str] = Field(None, description="Current URL (for web apps)")
264
+ screen_size: Optional[tuple[int, int]] = Field(
265
+ None, description="Screen dimensions (width, height)"
266
+ )
267
+
268
+ # Focused element
269
+ focused_element: Optional[UIElement] = Field(
270
+ None, description="Currently focused UI element"
271
+ )
272
+
273
+ # Additional metadata
274
+ timestamp: Optional[float] = Field(None, description="Unix timestamp")
275
+ raw: Optional[dict[str, Any]] = Field(
276
+ None, description="Original observation data from source format"
277
+ )
278
+
279
+
280
+ class Step(BaseModel):
281
+ """A single step in an episode (observation -> action pair)."""
282
+
283
+ step_index: int = Field(..., ge=0, description="Step number (0-indexed)")
284
+
285
+ # Core data
286
+ observation: Observation = Field(..., description="State observation before action")
287
+ action: Action = Field(..., description="Action taken")
288
+
289
+ # Agent reasoning (for demos/training)
290
+ reasoning: Optional[str] = Field(
291
+ None, description="Agent's reasoning for the action (chain-of-thought)"
292
+ )
293
+
294
+ # Outcome
295
+ reward: Optional[float] = Field(None, description="Reward signal (if available)")
296
+ done: Optional[bool] = Field(None, description="Whether episode ended after this step")
297
+
298
+ # Timing
299
+ timestamp: Optional[float] = Field(None, description="Unix timestamp of action")
300
+ duration_ms: Optional[int] = Field(
301
+ None, description="Time taken for this step in milliseconds"
302
+ )
303
+
304
+
305
+ class Episode(BaseModel):
306
+ """A complete episode/demonstration for GUI automation.
307
+
308
+ This is the canonical format for storing and exchanging GUI trajectory data.
309
+ All benchmark-specific formats should be converted to/from this format.
310
+ """
311
+
312
+ # Schema metadata
313
+ schema_version: str = Field(
314
+ default=SCHEMA_VERSION,
315
+ description="Schema version for compatibility checking"
316
+ )
317
+
318
+ # Episode identification
319
+ episode_id: str = Field(..., description="Unique episode identifier")
320
+ task_id: Optional[str] = Field(None, description="Task identifier (from benchmark)")
321
+
322
+ # Task specification
323
+ instruction: str = Field(..., description="Natural language task instruction")
324
+ goal: Optional[str] = Field(
325
+ None, description="Detailed goal description (if different from instruction)"
326
+ )
327
+
328
+ # Episode data
329
+ steps: list[Step] = Field(..., description="Sequence of steps in the episode")
330
+
331
+ # Outcome
332
+ success: Optional[bool] = Field(None, description="Whether task was completed successfully")
333
+ final_reward: Optional[float] = Field(None, description="Final reward/score")
334
+
335
+ # Provenance
336
+ source: Optional[BenchmarkSource] = Field(
337
+ None, description="Source benchmark/dataset"
338
+ )
339
+ source_file: Optional[str] = Field(
340
+ None, description="Original source file path"
341
+ )
342
+
343
+ # Metadata
344
+ created_at: Optional[datetime] = Field(
345
+ default_factory=datetime.utcnow,
346
+ description="When episode was created/recorded"
347
+ )
348
+ agent_model: Optional[str] = Field(
349
+ None, description="Model that generated this episode (e.g., 'gpt-4o')"
350
+ )
351
+ environment: Optional[str] = Field(
352
+ None, description="Environment info (OS, browser, etc.)"
353
+ )
354
+ tags: Optional[list[str]] = Field(
355
+ None, description="Tags for categorization"
356
+ )
357
+
358
+ # Extension point for benchmark-specific data
359
+ metadata: Optional[dict[str, Any]] = Field(
360
+ None, description="Additional metadata from source"
361
+ )
362
+
363
+ @property
364
+ def num_steps(self) -> int:
365
+ """Number of steps in the episode."""
366
+ return len(self.steps)
367
+
368
+ @property
369
+ def action_types(self) -> list[ActionType]:
370
+ """List of action types in this episode."""
371
+ return [step.action.type for step in self.steps]
372
+
373
+ def to_json(self, indent: int = 2) -> str:
374
+ """Serialize to JSON string."""
375
+ return self.model_dump_json(indent=indent)
376
+
377
+ @classmethod
378
+ def from_json(cls, json_str: str) -> "Episode":
379
+ """Deserialize from JSON string."""
380
+ return cls.model_validate_json(json_str)
381
+
382
+ @classmethod
383
+ def json_schema(cls) -> dict[str, Any]:
384
+ """Get JSON Schema for Episode format."""
385
+ return cls.model_json_schema()
386
+
387
+
388
+ # ============================================================================
389
+ # Utility Functions
390
+ # ============================================================================
391
+
392
+ def validate_episode(data: dict[str, Any]) -> tuple[bool, Optional[str]]:
393
+ """Validate episode data against schema.
394
+
395
+ Args:
396
+ data: Episode data as dictionary
397
+
398
+ Returns:
399
+ Tuple of (is_valid, error_message)
400
+ """
401
+ try:
402
+ Episode.model_validate(data)
403
+ return True, None
404
+ except Exception as e:
405
+ return False, str(e)
406
+
407
+
408
+ def load_episode(path: Union[str, Path]) -> Episode:
409
+ """Load episode from JSON file.
410
+
411
+ Args:
412
+ path: Path to JSON file
413
+
414
+ Returns:
415
+ Episode instance
416
+ """
417
+ path = Path(path)
418
+ with open(path, "r") as f:
419
+ data = json.load(f)
420
+
421
+ episode = Episode.model_validate(data)
422
+
423
+ # Set source_file if not already set
424
+ if episode.source_file is None:
425
+ episode = episode.model_copy(update={"source_file": str(path)})
426
+
427
+ return episode
428
+
429
+
430
+ def save_episode(episode: Episode, path: Union[str, Path], indent: int = 2) -> None:
431
+ """Save episode to JSON file.
432
+
433
+ Args:
434
+ episode: Episode to save
435
+ path: Output path
436
+ indent: JSON indentation
437
+ """
438
+ path = Path(path)
439
+ path.parent.mkdir(parents=True, exist_ok=True)
440
+
441
+ with open(path, "w") as f:
442
+ f.write(episode.to_json(indent=indent))
443
+
444
+
445
+ def export_json_schema(path: Union[str, Path]) -> None:
446
+ """Export JSON Schema to file for documentation/tooling.
447
+
448
+ Args:
449
+ path: Output path for schema file
450
+ """
451
+ path = Path(path)
452
+ path.parent.mkdir(parents=True, exist_ok=True)
453
+
454
+ schema = Episode.json_schema()
455
+
456
+ with open(path, "w") as f:
457
+ json.dump(schema, f, indent=2)
@@ -17,7 +17,7 @@ from pathlib import Path
17
17
  from typing import Any
18
18
 
19
19
  from openadapt_ml.ingest.capture import capture_to_episode
20
- from openadapt_ml.schemas.sessions import Episode, Step
20
+ from openadapt_ml.schema import Episode, Step, ActionType
21
21
  from openadapt_ml.datasets.next_action import SYSTEM_PROMPT, format_action
22
22
  from openadapt_ml.training.trainer import _get_shared_header_css, _generate_shared_header_html
23
23
 
@@ -141,14 +141,19 @@ def generate_comparison_data(
141
141
  total_steps = len(episode.steps)
142
142
 
143
143
  for i, step in enumerate(episode.steps):
144
+ # Extract normalized coordinates if available
145
+ action_x, action_y = None, None
146
+ if step.action.normalized_coordinates:
147
+ action_x, action_y = step.action.normalized_coordinates
148
+ action_type_str = step.action.type.value if isinstance(step.action.type, ActionType) else step.action.type
144
149
  step_data = {
145
150
  "index": i,
146
- "time": step.t,
147
- "image_path": step.observation.image_path,
151
+ "time": step.step_index,
152
+ "image_path": step.observation.screenshot_path,
148
153
  "human_action": {
149
- "type": step.action.type,
150
- "x": step.action.x,
151
- "y": step.action.y,
154
+ "type": action_type_str,
155
+ "x": action_x,
156
+ "y": action_y,
152
157
  "text": step.action.text,
153
158
  },
154
159
  "predicted_action": None,
@@ -156,11 +161,11 @@ def generate_comparison_data(
156
161
  }
157
162
 
158
163
  # Get prediction if model available
159
- if model and step.observation.image_path:
164
+ if model and step.observation.screenshot_path:
160
165
  predicted = predict_action(
161
166
  model,
162
- step.observation.image_path,
163
- episode.goal,
167
+ step.observation.screenshot_path,
168
+ episode.instruction,
164
169
  step_index=i,
165
170
  total_steps=total_steps,
166
171
  action_history=action_history.copy(),
@@ -168,7 +173,7 @@ def generate_comparison_data(
168
173
  step_data["predicted_action"] = predicted
169
174
 
170
175
  # Check if prediction matches human action
171
- if predicted and predicted.get("type") == step.action.type:
176
+ if predicted and predicted.get("type") == action_type_str:
172
177
  step_data["match"] = True
173
178
  else:
174
179
  step_data["match"] = False
@@ -839,21 +844,26 @@ def generate_unified_viewer(
839
844
  if available_captures is None:
840
845
  available_captures = [{
841
846
  "id": capture_id,
842
- "name": episode.goal or "Untitled",
847
+ "name": episode.instruction or "Untitled",
843
848
  "steps": len(episode.steps),
844
849
  }]
845
850
 
846
851
  # Prepare base capture data (human actions only, no predictions)
847
852
  base_data = []
848
853
  for i, step in enumerate(episode.steps):
854
+ # Extract normalized coordinates if available
855
+ action_x, action_y = None, None
856
+ if step.action.normalized_coordinates:
857
+ action_x, action_y = step.action.normalized_coordinates
858
+ action_type_str = step.action.type.value if isinstance(step.action.type, ActionType) else step.action.type
849
859
  base_data.append({
850
860
  "index": i,
851
- "time": step.t,
852
- "image_path": step.observation.image_path,
861
+ "time": step.step_index,
862
+ "image_path": step.observation.screenshot_path,
853
863
  "human_action": {
854
- "type": step.action.type,
855
- "x": step.action.x,
856
- "y": step.action.y,
864
+ "type": action_type_str,
865
+ "x": action_x,
866
+ "y": action_y,
857
867
  "text": step.action.text,
858
868
  },
859
869
  })
@@ -9,7 +9,7 @@ import yaml
9
9
 
10
10
  from openadapt_ml.datasets.next_action import build_next_action_sft_samples, parse_action_som
11
11
  from openadapt_ml.evals.trajectory_matching import evaluate_policy_on_episodes
12
- from openadapt_ml.ingest.synthetic import generate_synthetic_sessions
12
+ from openadapt_ml.ingest.synthetic import generate_synthetic_episodes
13
13
  from openadapt_ml.models.dummy_adapter import DummyAdapter
14
14
  from openadapt_ml.models.qwen_vl import QwenVLAdapter
15
15
  from openadapt_ml.models.api_adapter import ApiVLMAdapter
@@ -63,9 +63,9 @@ def main(
63
63
  # Determine scenario: CLI arg takes precedence, then config, then default "login"
64
64
  scenario_to_use = scenario if scenario else synth_cfg.get("scenario", "login")
65
65
 
66
- # Generate sessions with SoM if requested
67
- sessions = generate_synthetic_sessions(
68
- num_sessions=num_sessions,
66
+ # Generate episodes with SoM if requested
67
+ episodes = generate_synthetic_episodes(
68
+ num_episodes=num_sessions,
69
69
  seed=seed,
70
70
  output_dir=output_dir,
71
71
  use_som=use_som,
@@ -73,7 +73,6 @@ def main(
73
73
  scenario=scenario_to_use,
74
74
  )
75
75
  print(f"[INFO] Scenario: {scenario_to_use}")
76
- episodes = [ep for sess in sessions for ep in sess.episodes]
77
76
 
78
77
  # Build samples with appropriate DSL mode
79
78
  samples = build_next_action_sft_samples(episodes, use_som=use_som)
@@ -3,32 +3,29 @@ from __future__ import annotations
3
3
  import os
4
4
  from pathlib import Path
5
5
 
6
- from openadapt_ml.ingest.synthetic import generate_synthetic_sessions
6
+ from openadapt_ml.ingest.synthetic import generate_synthetic_episodes
7
7
 
8
8
 
9
9
  def main() -> None:
10
10
  output_dir = Path("synthetic") / "debug"
11
- sessions = generate_synthetic_sessions(num_sessions=2, seed=42, output_dir=output_dir)
11
+ episodes = generate_synthetic_episodes(num_episodes=2, seed=42, output_dir=output_dir)
12
12
 
13
- print(f"Generated {len(sessions)} sessions into {output_dir.resolve()}")
13
+ print(f"Generated {len(episodes)} episodes into {output_dir.resolve()}")
14
14
 
15
- total_episodes = 0
16
15
  total_steps = 0
17
16
  missing_images: list[str] = []
18
17
 
19
- for session in sessions:
20
- total_episodes += len(session.episodes)
21
- for episode in session.episodes:
22
- total_steps += len(episode.steps)
23
- for step in episode.steps:
24
- path = step.observation.image_path
25
- if not path:
26
- missing_images.append(f"[no path] in episode {episode.id}")
27
- continue
28
- if not os.path.exists(path):
29
- missing_images.append(path)
30
-
31
- print(f"Episodes: {total_episodes}, Steps: {total_steps}")
18
+ for episode in episodes:
19
+ total_steps += len(episode.steps)
20
+ for step in episode.steps:
21
+ path = step.observation.screenshot_path
22
+ if not path:
23
+ missing_images.append(f"[no path] in episode {episode.episode_id}")
24
+ continue
25
+ if not os.path.exists(path):
26
+ missing_images.append(path)
27
+
28
+ print(f"Episodes: {len(episodes)}, Steps: {total_steps}")
32
29
 
33
30
  if missing_images:
34
31
  print("Missing images:")