openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. openadapt_ml/benchmarks/__init__.py +8 -0
  2. openadapt_ml/benchmarks/agent.py +90 -11
  3. openadapt_ml/benchmarks/azure.py +35 -6
  4. openadapt_ml/benchmarks/cli.py +4449 -201
  5. openadapt_ml/benchmarks/live_tracker.py +180 -0
  6. openadapt_ml/benchmarks/runner.py +41 -4
  7. openadapt_ml/benchmarks/viewer.py +1219 -0
  8. openadapt_ml/benchmarks/vm_monitor.py +610 -0
  9. openadapt_ml/benchmarks/waa.py +61 -4
  10. openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
  11. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  12. openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
  13. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  14. openadapt_ml/benchmarks/waa_live.py +619 -0
  15. openadapt_ml/cloud/local.py +1555 -1
  16. openadapt_ml/cloud/ssh_tunnel.py +553 -0
  17. openadapt_ml/datasets/next_action.py +87 -68
  18. openadapt_ml/evals/grounding.py +26 -8
  19. openadapt_ml/evals/trajectory_matching.py +84 -36
  20. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  21. openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
  22. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  23. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  24. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  25. openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
  26. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  27. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  28. openadapt_ml/experiments/waa_demo/runner.py +717 -0
  29. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  30. openadapt_ml/export/__init__.py +9 -0
  31. openadapt_ml/export/__main__.py +6 -0
  32. openadapt_ml/export/cli.py +89 -0
  33. openadapt_ml/export/parquet.py +265 -0
  34. openadapt_ml/ingest/__init__.py +3 -4
  35. openadapt_ml/ingest/capture.py +89 -81
  36. openadapt_ml/ingest/loader.py +116 -68
  37. openadapt_ml/ingest/synthetic.py +221 -159
  38. openadapt_ml/retrieval/README.md +226 -0
  39. openadapt_ml/retrieval/USAGE.md +391 -0
  40. openadapt_ml/retrieval/__init__.py +91 -0
  41. openadapt_ml/retrieval/demo_retriever.py +817 -0
  42. openadapt_ml/retrieval/embeddings.py +629 -0
  43. openadapt_ml/retrieval/index.py +194 -0
  44. openadapt_ml/retrieval/retriever.py +160 -0
  45. openadapt_ml/runtime/policy.py +10 -10
  46. openadapt_ml/schema/__init__.py +104 -0
  47. openadapt_ml/schema/converters.py +541 -0
  48. openadapt_ml/schema/episode.py +457 -0
  49. openadapt_ml/scripts/compare.py +26 -16
  50. openadapt_ml/scripts/eval_policy.py +4 -5
  51. openadapt_ml/scripts/prepare_synthetic.py +14 -17
  52. openadapt_ml/scripts/train.py +81 -70
  53. openadapt_ml/training/benchmark_viewer.py +3225 -0
  54. openadapt_ml/training/trainer.py +120 -363
  55. openadapt_ml/training/trl_trainer.py +354 -0
  56. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
  57. openadapt_ml-0.2.0.dist-info/RECORD +86 -0
  58. openadapt_ml/schemas/__init__.py +0 -53
  59. openadapt_ml/schemas/sessions.py +0 -122
  60. openadapt_ml/schemas/validation.py +0 -252
  61. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  62. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
  63. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,151 @@
1
+ """Task definitions for WAA demo experiment.
2
+
3
+ 10 carefully selected tasks across 4 enterprise-relevant domains.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ from enum import Enum
10
+ from typing import Optional
11
+
12
+
13
+ class Difficulty(Enum):
14
+ EASY = "easy"
15
+ MEDIUM = "medium"
16
+ HARD = "hard"
17
+
18
+
19
+ class Domain(Enum):
20
+ BROWSER = "msedge"
21
+ OFFICE_CALC = "libreoffice_calc"
22
+ OFFICE_WRITER = "libreoffice_writer"
23
+ SETTINGS = "settings"
24
+ FILE_EXPLORER = "file_explorer"
25
+
26
+
27
+ @dataclass
28
+ class WATask:
29
+ """A Windows Agent Arena task definition."""
30
+
31
+ task_id: str
32
+ instruction: str
33
+ domain: Domain
34
+ difficulty: Difficulty
35
+ first_action_hint: str
36
+ demo_method: str # "manual" or "recorded"
37
+ json_path: str # Path in WAA repo
38
+
39
+
40
+ TASKS: dict[str, WATask] = {
41
+ "1": WATask(
42
+ task_id="004587f8-6028-4656-94c1-681481abbc9c-wos",
43
+ instruction="Enable the 'Do Not Track' feature in Edge",
44
+ domain=Domain.BROWSER,
45
+ difficulty=Difficulty.MEDIUM,
46
+ first_action_hint="Click three-dot menu in Edge",
47
+ demo_method="manual",
48
+ json_path="examples/msedge/004587f8-6028-4656-94c1-681481abbc9c-wos.json",
49
+ ),
50
+ "2": WATask(
51
+ task_id="049d3788-c979-4ea6-934d-3a35c4630faf-WOS",
52
+ instruction="Save this webpage to bookmarks bar",
53
+ domain=Domain.BROWSER,
54
+ difficulty=Difficulty.EASY,
55
+ first_action_hint="Click star/bookmark icon or Ctrl+D",
56
+ demo_method="manual",
57
+ json_path="examples/msedge/049d3788-c979-4ea6-934d-3a35c4630faf-WOS.json",
58
+ ),
59
+ "3": WATask(
60
+ task_id="2acd62b4-a2ab-44a7-a7e3-f5227bbd8324-wos",
61
+ instruction="Set default font size to largest for grandmother",
62
+ domain=Domain.BROWSER,
63
+ difficulty=Difficulty.MEDIUM,
64
+ first_action_hint="Open Settings > Appearance",
65
+ demo_method="manual",
66
+ json_path="examples/msedge/2acd62b4-a2ab-44a7-a7e3-f5227bbd8324-wos.json",
67
+ ),
68
+ "4": WATask(
69
+ task_id="01b269ae-2111-4a07-81fd-3fcd711993b0-WOS",
70
+ instruction="Fill all blank cells with value from cell above",
71
+ domain=Domain.OFFICE_CALC,
72
+ difficulty=Difficulty.HARD,
73
+ first_action_hint="Select cells, use Go To Special > Blanks",
74
+ demo_method="recorded",
75
+ json_path="examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0-WOS.json",
76
+ ),
77
+ "5": WATask(
78
+ task_id="0a2e43bf-b26c-4631-a966-af9dfa12c9e5-WOS",
79
+ instruction="Calculate monthly totals and create line chart",
80
+ domain=Domain.OFFICE_CALC,
81
+ difficulty=Difficulty.HARD,
82
+ first_action_hint="Click cell for SUM formula",
83
+ demo_method="recorded",
84
+ json_path="examples/libreoffice_calc/0a2e43bf-b26c-4631-a966-af9dfa12c9e5-WOS.json",
85
+ ),
86
+ "6": WATask(
87
+ task_id="3ef2b351-8a84-4ff2-8724-d86eae9b842e-WOS",
88
+ instruction="Center align the heading in LibreOffice Writer",
89
+ domain=Domain.OFFICE_WRITER,
90
+ difficulty=Difficulty.EASY,
91
+ first_action_hint="Select text, click center align button",
92
+ demo_method="manual",
93
+ json_path="examples/libreoffice_writer/3ef2b351-8a84-4ff2-8724-d86eae9b842e-WOS.json",
94
+ ),
95
+ "7": WATask(
96
+ task_id="37e10fc4-b4c5-4b02-a65c-bfae8bc51d3f-wos",
97
+ instruction="Turn off notifications for system",
98
+ domain=Domain.SETTINGS,
99
+ difficulty=Difficulty.MEDIUM,
100
+ first_action_hint="Open Settings > System > Notifications",
101
+ demo_method="manual",
102
+ json_path="examples/settings/37e10fc4-b4c5-4b02-a65c-bfae8bc51d3f-wos.json",
103
+ ),
104
+ "8": WATask(
105
+ task_id="46adf721-2949-4426-b069-010b7c128d8f-wos",
106
+ instruction="Enable Night Light: on at 7PM, off at 7AM",
107
+ domain=Domain.SETTINGS,
108
+ difficulty=Difficulty.MEDIUM,
109
+ first_action_hint="Open Settings > Display > Night Light",
110
+ demo_method="manual",
111
+ json_path="examples/settings/46adf721-2949-4426-b069-010b7c128d8f-wos.json",
112
+ ),
113
+ "9": WATask(
114
+ task_id="0c9dda13-428c-492b-900b-f48562111f93-WOS",
115
+ instruction="Create Archive folder and move all .docx files",
116
+ domain=Domain.FILE_EXPLORER,
117
+ difficulty=Difficulty.MEDIUM,
118
+ first_action_hint="Right-click > New Folder, then select and move files",
119
+ demo_method="recorded",
120
+ json_path="examples/file_explorer/0c9dda13-428c-492b-900b-f48562111f93-WOS.json",
121
+ ),
122
+ "10": WATask(
123
+ task_id="34a4fee9-e52e-4a4a-96d2-68d35091504a-WOS",
124
+ instruction="Change view to Details view",
125
+ domain=Domain.FILE_EXPLORER,
126
+ difficulty=Difficulty.EASY,
127
+ first_action_hint="Click View menu or dropdown",
128
+ demo_method="manual",
129
+ json_path="examples/file_explorer/34a4fee9-e52e-4a4a-96d2-68d35091504a-WOS.json",
130
+ ),
131
+ }
132
+
133
+
134
+ def get_task(task_num: str | int) -> Optional[WATask]:
135
+ """Get a task by its number (1-10)."""
136
+ return TASKS.get(str(task_num))
137
+
138
+
139
+ def get_tasks_by_method(method: str) -> list[WATask]:
140
+ """Get all tasks that use a specific demo method."""
141
+ return [t for t in TASKS.values() if t.demo_method == method]
142
+
143
+
144
+ def get_manual_tasks() -> list[WATask]:
145
+ """Get tasks requiring manual demo writing."""
146
+ return get_tasks_by_method("manual")
147
+
148
+
149
+ def get_recorded_tasks() -> list[WATask]:
150
+ """Get tasks requiring recorded demos."""
151
+ return get_tasks_by_method("recorded")
@@ -0,0 +1,9 @@
1
+ """Export utilities for Episode data.
2
+
3
+ This module provides tools to export Episode trajectories to various formats
4
+ for analytics, training, and sharing.
5
+ """
6
+
7
+ from openadapt_ml.export.parquet import to_parquet, from_parquet
8
+
9
+ __all__ = ["to_parquet", "from_parquet"]
@@ -0,0 +1,6 @@
1
+ """Allow running export module as python -m openadapt_ml.export."""
2
+
3
+ from openadapt_ml.export.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1,89 @@
1
+ """CLI for export utilities."""
2
+
3
+ import argparse
4
+ import sys
5
+ from pathlib import Path
6
+
7
+
8
+ def main() -> int:
9
+ """Main entry point for export CLI."""
10
+ parser = argparse.ArgumentParser(
11
+ description="Export Episode data to various formats",
12
+ prog="python -m openadapt_ml.export",
13
+ )
14
+ subparsers = parser.add_subparsers(dest="command", help="Export format")
15
+
16
+ # Parquet subcommand
17
+ parquet_parser = subparsers.add_parser(
18
+ "parquet",
19
+ help="Export to Parquet format for analytics",
20
+ )
21
+ parquet_parser.add_argument(
22
+ "--input",
23
+ "-i",
24
+ required=True,
25
+ help="Directory containing Episode JSON files",
26
+ )
27
+ parquet_parser.add_argument(
28
+ "--output",
29
+ "-o",
30
+ required=True,
31
+ help="Output path for .parquet file",
32
+ )
33
+ parquet_parser.add_argument(
34
+ "--include-summary",
35
+ action="store_true",
36
+ help="Also generate episode-level summary table",
37
+ )
38
+
39
+ args = parser.parse_args()
40
+
41
+ if args.command == "parquet":
42
+ return export_parquet(args)
43
+ else:
44
+ parser.print_help()
45
+ return 1
46
+
47
+
48
+ def export_parquet(args: argparse.Namespace) -> int:
49
+ """Export Episodes to Parquet."""
50
+ try:
51
+ from openadapt_ml.export import to_parquet
52
+ from openadapt_ml.ingest import load_episodes
53
+ except ImportError as e:
54
+ print(f"Error: {e}", file=sys.stderr)
55
+ return 1
56
+
57
+ input_path = Path(args.input)
58
+ if not input_path.exists():
59
+ print(f"Error: Input path does not exist: {input_path}", file=sys.stderr)
60
+ return 1
61
+
62
+ print(f"Loading episodes from: {input_path}")
63
+ episodes = load_episodes(str(input_path))
64
+ print(f"Loaded {len(episodes)} episodes")
65
+
66
+ if not episodes:
67
+ print("Warning: No episodes found", file=sys.stderr)
68
+ return 1
69
+
70
+ total_steps = sum(len(ep.steps) for ep in episodes)
71
+ print(f"Total steps: {total_steps}")
72
+
73
+ print(f"Exporting to: {args.output}")
74
+ to_parquet(
75
+ episodes,
76
+ args.output,
77
+ include_summary=args.include_summary,
78
+ )
79
+
80
+ print("Done!")
81
+ if args.include_summary:
82
+ summary_path = args.output.replace(".parquet", "_summary.parquet")
83
+ print(f"Summary written to: {summary_path}")
84
+
85
+ return 0
86
+
87
+
88
+ if __name__ == "__main__":
89
+ sys.exit(main())
@@ -0,0 +1,265 @@
1
+ """Parquet export utilities for Episode trajectories.
2
+
3
+ Parquet is a derived format for analytics and governance.
4
+ Episode JSON remains the canonical representation.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ if TYPE_CHECKING:
14
+ from openadapt_ml.schema import Episode
15
+
16
+
17
+ def to_parquet(
18
+ episodes: list[Episode],
19
+ output_path: str,
20
+ flatten_steps: bool = True,
21
+ include_summary: bool = False,
22
+ ) -> None:
23
+ """Export Episodes to Parquet for analytics.
24
+
25
+ Creates a step-level Parquet file with one row per step.
26
+ Episode-level fields are repeated for each step.
27
+
28
+ Args:
29
+ episodes: List of Episode objects to export.
30
+ output_path: Path to output .parquet file.
31
+ flatten_steps: If True, one row per step. If False, one row per episode
32
+ with steps as nested structure (not yet implemented).
33
+ include_summary: If True, also generate {output_path}_summary.parquet
34
+ with episode-level aggregations.
35
+
36
+ Raises:
37
+ ImportError: If pyarrow is not installed.
38
+ ValueError: If flatten_steps is False (not yet implemented).
39
+
40
+ Example:
41
+ >>> from openadapt_ml.ingest import load_episodes
42
+ >>> from openadapt_ml.export import to_parquet
43
+ >>> episodes = load_episodes("workflow_exports/")
44
+ >>> to_parquet(episodes, "episodes.parquet")
45
+ """
46
+ try:
47
+ import pyarrow as pa
48
+ import pyarrow.parquet as pq
49
+ except ImportError:
50
+ raise ImportError(
51
+ "Parquet export requires pyarrow. "
52
+ "Install with: pip install openadapt-ml[parquet]"
53
+ )
54
+
55
+ if not flatten_steps:
56
+ raise ValueError(
57
+ "flatten_steps=False is not yet implemented. "
58
+ "Use flatten_steps=True for step-level rows."
59
+ )
60
+
61
+ rows = []
62
+ for episode in episodes:
63
+ episode_metadata = None
64
+ if hasattr(episode, "metadata") and episode.metadata:
65
+ episode_metadata = json.dumps(episode.metadata)
66
+
67
+ for step in episode.steps:
68
+ # Extract normalized coordinates if available
69
+ x, y = None, None
70
+ if step.action and step.action.normalized_coordinates:
71
+ x, y = step.action.normalized_coordinates
72
+
73
+ # Extract action type value (enum -> string)
74
+ action_type = None
75
+ if step.action:
76
+ action_type = step.action.type.value if hasattr(step.action.type, 'value') else step.action.type
77
+
78
+ row = {
79
+ "episode_id": episode.episode_id,
80
+ "instruction": episode.instruction,
81
+ "task_id": getattr(episode, "task_id", None),
82
+ "step_index": step.step_index,
83
+ "timestamp": step.timestamp,
84
+ "action_type": action_type,
85
+ "x": x,
86
+ "y": y,
87
+ "end_x": step.action.normalized_end[0] if step.action and step.action.normalized_end else None,
88
+ "end_y": step.action.normalized_end[1] if step.action and step.action.normalized_end else None,
89
+ "text": getattr(step.action, "text", None) if step.action else None,
90
+ "key": getattr(step.action, "key", None) if step.action else None,
91
+ "scroll_direction": (
92
+ getattr(step.action, "scroll_direction", None)
93
+ if step.action
94
+ else None
95
+ ),
96
+ "screenshot_path": (
97
+ step.observation.screenshot_path if step.observation else None
98
+ ),
99
+ "window_title": (
100
+ getattr(step.observation, "window_title", None)
101
+ if step.observation
102
+ else None
103
+ ),
104
+ "app_name": (
105
+ None # Not in new schema at Observation level
106
+ ),
107
+ "url": (
108
+ None # Not in new schema at Observation level
109
+ ),
110
+ "reasoning": getattr(step, "reasoning", None),
111
+ "episode_metadata": episode_metadata,
112
+ }
113
+ rows.append(row)
114
+
115
+ table = pa.Table.from_pylist(rows)
116
+ pq.write_table(table, output_path)
117
+
118
+ if include_summary:
119
+ _write_summary(episodes, output_path)
120
+
121
+
122
+ def _write_summary(episodes: list[Episode], output_path: str) -> None:
123
+ """Write episode-level summary Parquet file."""
124
+ try:
125
+ import pyarrow as pa
126
+ import pyarrow.parquet as pq
127
+ except ImportError:
128
+ return
129
+
130
+ summary_rows = []
131
+ for episode in episodes:
132
+ first_t = episode.steps[0].timestamp if episode.steps else None
133
+ last_t = episode.steps[-1].timestamp if episode.steps else None
134
+ duration = (last_t - first_t) if first_t is not None and last_t is not None else None
135
+
136
+ # Extract action type values (enum -> string)
137
+ first_action_type = None
138
+ last_action_type = None
139
+ if episode.steps and episode.steps[0].action:
140
+ t = episode.steps[0].action.type
141
+ first_action_type = t.value if hasattr(t, 'value') else t
142
+ if episode.steps and episode.steps[-1].action:
143
+ t = episode.steps[-1].action.type
144
+ last_action_type = t.value if hasattr(t, 'value') else t
145
+
146
+ summary_rows.append({
147
+ "episode_id": episode.episode_id,
148
+ "instruction": episode.instruction,
149
+ "task_id": getattr(episode, "task_id", None),
150
+ "step_count": len(episode.steps),
151
+ "duration": duration,
152
+ "success": getattr(episode, "success", None),
153
+ "first_action_type": first_action_type,
154
+ "last_action_type": last_action_type,
155
+ "metadata": (
156
+ json.dumps(episode.metadata)
157
+ if hasattr(episode, "metadata") and episode.metadata
158
+ else None
159
+ ),
160
+ })
161
+
162
+ summary_table = pa.Table.from_pylist(summary_rows)
163
+ summary_path = str(output_path).replace(".parquet", "_summary.parquet")
164
+ pq.write_table(summary_table, summary_path)
165
+
166
+
167
+ def from_parquet(parquet_path: str) -> list[Episode]:
168
+ """Load Episodes from Parquet (inverse of to_parquet).
169
+
170
+ This is a lossy reconstruction. For full fidelity, always keep
171
+ Episode JSON as the source of truth.
172
+
173
+ Args:
174
+ parquet_path: Path to the Parquet file created by to_parquet().
175
+
176
+ Returns:
177
+ List of reconstructed Episode objects.
178
+
179
+ Raises:
180
+ ImportError: If pyarrow is not installed.
181
+
182
+ Note:
183
+ - Metadata fields are deserialized from JSON strings
184
+ - Step ordering is recovered from step_index
185
+ - Episode boundaries are recovered from episode_id grouping
186
+ """
187
+ try:
188
+ import pyarrow.parquet as pq
189
+ except ImportError:
190
+ raise ImportError(
191
+ "Parquet import requires pyarrow. "
192
+ "Install with: pip install openadapt-ml[parquet]"
193
+ )
194
+
195
+ from openadapt_ml.schema import Action, ActionType, Episode, Observation, Step
196
+
197
+ table = pq.read_table(parquet_path)
198
+ df = table.to_pandas()
199
+
200
+ episodes = []
201
+ for episode_id, group in df.groupby("episode_id"):
202
+ group = group.sort_values("step_index")
203
+
204
+ steps = []
205
+ for _, row in group.iterrows():
206
+ observation = Observation(
207
+ screenshot_path=row.get("screenshot_path") or row.get("image_path"),
208
+ window_title=row.get("window_title"),
209
+ )
210
+
211
+ action = None
212
+ if row.get("action_type"):
213
+ # Convert string action type to ActionType enum
214
+ action_type_str = row["action_type"]
215
+ try:
216
+ action_type = ActionType(action_type_str)
217
+ except ValueError:
218
+ action_type = ActionType.CLICK # Default fallback
219
+
220
+ # Build normalized coordinates tuple if x and y are present
221
+ normalized_coords = None
222
+ if row.get("x") is not None and row.get("y") is not None:
223
+ normalized_coords = (float(row["x"]), float(row["y"]))
224
+
225
+ # Build normalized end coordinates for drag
226
+ normalized_end = None
227
+ if row.get("end_x") is not None and row.get("end_y") is not None:
228
+ normalized_end = (float(row["end_x"]), float(row["end_y"]))
229
+
230
+ action = Action(
231
+ type=action_type,
232
+ normalized_coordinates=normalized_coords,
233
+ normalized_end=normalized_end,
234
+ text=row.get("text"),
235
+ key=row.get("key"),
236
+ scroll_direction=row.get("scroll_direction"),
237
+ )
238
+
239
+ step = Step(
240
+ step_index=int(row.get("step_index", 0)),
241
+ observation=observation,
242
+ action=action,
243
+ reasoning=row.get("reasoning") or row.get("thought"),
244
+ timestamp=row.get("timestamp"),
245
+ )
246
+ steps.append(step)
247
+
248
+ # Parse metadata if present
249
+ metadata = None
250
+ if group.iloc[0].get("episode_metadata"):
251
+ try:
252
+ metadata = json.loads(group.iloc[0]["episode_metadata"])
253
+ except (json.JSONDecodeError, TypeError):
254
+ pass
255
+
256
+ episode = Episode(
257
+ episode_id=str(episode_id),
258
+ instruction=group.iloc[0].get("instruction") or group.iloc[0].get("goal", ""),
259
+ steps=steps,
260
+ task_id=group.iloc[0].get("task_id"),
261
+ metadata=metadata,
262
+ )
263
+ episodes.append(episode)
264
+
265
+ return episodes
@@ -6,7 +6,6 @@ and converting them to the format used for training.
6
6
  Data Model:
7
7
  - Episode: A single task attempt (e.g., "log into the app"). Contains a sequence
8
8
  of Steps, each with an Observation (screenshot) and Action (click/type/etc).
9
- - Session: A container grouping one or more Episodes with shared metadata.
10
9
 
11
10
  Functions:
12
11
  - load_episodes(): Load Episodes from JSON files (primary entry point)
@@ -14,16 +13,16 @@ Functions:
14
13
  - capture_to_episode(): Converts one openadapt-capture recording → one Episode
15
14
  - capture_to_session(): Converts one recording → Session containing one Episode
16
15
  - load_captures_as_sessions(): Loads multiple recordings → list of Sessions
17
- - generate_synthetic_sessions(): Creates synthetic training data
16
+ - generate_synthetic_episodes(): Creates synthetic training data
18
17
  """
19
18
 
20
19
  from openadapt_ml.ingest.loader import load_episodes, save_episodes
21
- from openadapt_ml.ingest.synthetic import generate_synthetic_sessions
20
+ from openadapt_ml.ingest.synthetic import generate_synthetic_episodes
22
21
 
23
22
  __all__ = [
24
23
  "load_episodes",
25
24
  "save_episodes",
26
- "generate_synthetic_sessions",
25
+ "generate_synthetic_episodes",
27
26
  ]
28
27
 
29
28
  # Conditionally export capture functions if openadapt-capture is installed