openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -115
- openadapt_ml/benchmarks/agent.py +265 -421
- openadapt_ml/benchmarks/azure.py +28 -19
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1722 -4847
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +22 -5
- openadapt_ml/benchmarks/vm_monitor.py +530 -29
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +2038 -487
- openadapt_ml/cloud/ssh_tunnel.py +68 -26
- openadapt_ml/datasets/next_action.py +40 -30
- openadapt_ml/evals/grounding.py +8 -3
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +41 -26
- openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
- openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/runner.py +29 -14
- openadapt_ml/export/parquet.py +36 -24
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +8 -6
- openadapt_ml/ingest/capture.py +25 -22
- openadapt_ml/ingest/loader.py +7 -4
- openadapt_ml/ingest/synthetic.py +189 -100
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/demo_retriever.py +50 -24
- openadapt_ml/retrieval/embeddings.py +9 -8
- openadapt_ml/retrieval/retriever.py +3 -1
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +18 -5
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +9 -0
- openadapt_ml/schema/converters.py +74 -27
- openadapt_ml/schema/episode.py +31 -18
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +85 -54
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +15 -9
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +3 -1
- openadapt_ml/scripts/train.py +21 -9
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +52 -41
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +143 -86
- openadapt_ml/training/trl_trainer.py +70 -21
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
- openadapt_ml-0.2.1.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/live_tracker.py +0 -180
- openadapt_ml/benchmarks/runner.py +0 -418
- openadapt_ml/benchmarks/waa.py +0 -761
- openadapt_ml/benchmarks/waa_live.py +0 -619
- openadapt_ml-0.2.0.dist-info/RECORD +0 -86
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -30,6 +30,7 @@ from openadapt_ml.schema.episode import (
|
|
|
30
30
|
# WAA (Windows Agent Arena) Converter
|
|
31
31
|
# ============================================================================
|
|
32
32
|
|
|
33
|
+
|
|
33
34
|
def _parse_waa_action(action_str: str) -> tuple[ActionType, dict[str, Any]]:
|
|
34
35
|
"""Parse WAA action string into ActionType and parameters.
|
|
35
36
|
|
|
@@ -104,19 +105,25 @@ def _parse_waa_action(action_str: str) -> tuple[ActionType, dict[str, Any]]:
|
|
|
104
105
|
if func_name == "click":
|
|
105
106
|
params = {}
|
|
106
107
|
if len(cleaned_args) >= 2:
|
|
107
|
-
params["coordinates"] = Coordinates(
|
|
108
|
+
params["coordinates"] = Coordinates(
|
|
109
|
+
x=int(cleaned_args[0]), y=int(cleaned_args[1])
|
|
110
|
+
)
|
|
108
111
|
return ActionType.CLICK, params
|
|
109
112
|
|
|
110
113
|
elif func_name == "doubleclick":
|
|
111
114
|
params = {}
|
|
112
115
|
if len(cleaned_args) >= 2:
|
|
113
|
-
params["coordinates"] = Coordinates(
|
|
116
|
+
params["coordinates"] = Coordinates(
|
|
117
|
+
x=int(cleaned_args[0]), y=int(cleaned_args[1])
|
|
118
|
+
)
|
|
114
119
|
return ActionType.DOUBLE_CLICK, params
|
|
115
120
|
|
|
116
121
|
elif func_name == "rightclick":
|
|
117
122
|
params = {}
|
|
118
123
|
if len(cleaned_args) >= 2:
|
|
119
|
-
params["coordinates"] = Coordinates(
|
|
124
|
+
params["coordinates"] = Coordinates(
|
|
125
|
+
x=int(cleaned_args[0]), y=int(cleaned_args[1])
|
|
126
|
+
)
|
|
120
127
|
return ActionType.RIGHT_CLICK, params
|
|
121
128
|
|
|
122
129
|
elif func_name in ("write", "typewrite"):
|
|
@@ -144,7 +151,9 @@ def _parse_waa_action(action_str: str) -> tuple[ActionType, dict[str, Any]]:
|
|
|
144
151
|
elif func_name == "moveto":
|
|
145
152
|
params = {}
|
|
146
153
|
if len(cleaned_args) >= 2:
|
|
147
|
-
params["coordinates"] = Coordinates(
|
|
154
|
+
params["coordinates"] = Coordinates(
|
|
155
|
+
x=int(cleaned_args[0]), y=int(cleaned_args[1])
|
|
156
|
+
)
|
|
148
157
|
return ActionType.HOVER, params
|
|
149
158
|
|
|
150
159
|
elif func_name == "drag" or func_name == "dragto":
|
|
@@ -229,7 +238,20 @@ def from_waa_trajectory(
|
|
|
229
238
|
metadata={
|
|
230
239
|
"domain": task_info.get("domain"),
|
|
231
240
|
"difficulty": task_info.get("difficulty"),
|
|
232
|
-
**{
|
|
241
|
+
**{
|
|
242
|
+
k: v
|
|
243
|
+
for k, v in task_info.items()
|
|
244
|
+
if k
|
|
245
|
+
not in [
|
|
246
|
+
"id",
|
|
247
|
+
"task_id",
|
|
248
|
+
"instruction",
|
|
249
|
+
"goal",
|
|
250
|
+
"success",
|
|
251
|
+
"domain",
|
|
252
|
+
"difficulty",
|
|
253
|
+
]
|
|
254
|
+
},
|
|
233
255
|
},
|
|
234
256
|
)
|
|
235
257
|
|
|
@@ -296,12 +318,16 @@ def _action_to_pyautogui(action: Action) -> str:
|
|
|
296
318
|
|
|
297
319
|
if action.type == ActionType.DOUBLE_CLICK:
|
|
298
320
|
if action.coordinates:
|
|
299
|
-
return
|
|
321
|
+
return (
|
|
322
|
+
f"pyautogui.doubleClick({action.coordinates.x}, {action.coordinates.y})"
|
|
323
|
+
)
|
|
300
324
|
return "pyautogui.doubleClick()"
|
|
301
325
|
|
|
302
326
|
if action.type == ActionType.RIGHT_CLICK:
|
|
303
327
|
if action.coordinates:
|
|
304
|
-
return
|
|
328
|
+
return (
|
|
329
|
+
f"pyautogui.rightClick({action.coordinates.x}, {action.coordinates.y})"
|
|
330
|
+
)
|
|
305
331
|
return "pyautogui.rightClick()"
|
|
306
332
|
|
|
307
333
|
if action.type == ActionType.TYPE:
|
|
@@ -342,6 +368,7 @@ def _action_to_pyautogui(action: Action) -> str:
|
|
|
342
368
|
# Internal Format Converter (openadapt_ml.schemas.sessions)
|
|
343
369
|
# ============================================================================
|
|
344
370
|
|
|
371
|
+
|
|
345
372
|
def from_internal_episode(
|
|
346
373
|
internal_episode: Any,
|
|
347
374
|
episode_id: Optional[str] = None,
|
|
@@ -395,7 +422,9 @@ def from_internal_episode(
|
|
|
395
422
|
key=step.action.key,
|
|
396
423
|
modifiers=step.action.modifiers,
|
|
397
424
|
scroll_direction=step.action.scroll_direction,
|
|
398
|
-
scroll_amount=int(step.action.scroll_amount)
|
|
425
|
+
scroll_amount=int(step.action.scroll_amount)
|
|
426
|
+
if step.action.scroll_amount
|
|
427
|
+
else None,
|
|
399
428
|
normalized_end=(step.action.end_x, step.action.end_y)
|
|
400
429
|
if step.action.end_x is not None and step.action.end_y is not None
|
|
401
430
|
else None,
|
|
@@ -403,17 +432,21 @@ def from_internal_episode(
|
|
|
403
432
|
element_id=step.action.target_node_id,
|
|
404
433
|
role=step.action.target_role,
|
|
405
434
|
name=step.action.target_name,
|
|
406
|
-
)
|
|
435
|
+
)
|
|
436
|
+
if step.action.target_node_id
|
|
437
|
+
else None,
|
|
407
438
|
raw=step.action.raw,
|
|
408
439
|
)
|
|
409
440
|
|
|
410
|
-
steps.append(
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
441
|
+
steps.append(
|
|
442
|
+
Step(
|
|
443
|
+
step_index=i,
|
|
444
|
+
observation=obs,
|
|
445
|
+
action=action,
|
|
446
|
+
reasoning=step.thought,
|
|
447
|
+
timestamp=step.t,
|
|
448
|
+
)
|
|
449
|
+
)
|
|
417
450
|
|
|
418
451
|
return Episode(
|
|
419
452
|
episode_id=episode_id or internal_episode.id,
|
|
@@ -423,7 +456,9 @@ def from_internal_episode(
|
|
|
423
456
|
metadata={
|
|
424
457
|
"workflow_id": internal_episode.workflow_id,
|
|
425
458
|
"summary": internal_episode.summary,
|
|
426
|
-
}
|
|
459
|
+
}
|
|
460
|
+
if internal_episode.workflow_id or internal_episode.summary
|
|
461
|
+
else None,
|
|
427
462
|
)
|
|
428
463
|
|
|
429
464
|
|
|
@@ -468,11 +503,21 @@ def to_internal_episode(episode: Episode) -> dict:
|
|
|
468
503
|
"modifiers": step.action.modifiers,
|
|
469
504
|
"scroll_direction": step.action.scroll_direction,
|
|
470
505
|
"scroll_amount": step.action.scroll_amount,
|
|
471
|
-
"end_x": step.action.normalized_end[0]
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
"
|
|
475
|
-
|
|
506
|
+
"end_x": step.action.normalized_end[0]
|
|
507
|
+
if step.action.normalized_end
|
|
508
|
+
else None,
|
|
509
|
+
"end_y": step.action.normalized_end[1]
|
|
510
|
+
if step.action.normalized_end
|
|
511
|
+
else None,
|
|
512
|
+
"target_node_id": step.action.element.element_id
|
|
513
|
+
if step.action.element
|
|
514
|
+
else None,
|
|
515
|
+
"target_role": step.action.element.role
|
|
516
|
+
if step.action.element
|
|
517
|
+
else None,
|
|
518
|
+
"target_name": step.action.element.name
|
|
519
|
+
if step.action.element
|
|
520
|
+
else None,
|
|
476
521
|
"raw": step.action.raw,
|
|
477
522
|
},
|
|
478
523
|
"thought": step.reasoning,
|
|
@@ -484,7 +529,9 @@ def to_internal_episode(episode: Episode) -> dict:
|
|
|
484
529
|
"goal": episode.instruction,
|
|
485
530
|
"steps": steps,
|
|
486
531
|
"success": episode.success,
|
|
487
|
-
"workflow_id": episode.metadata.get("workflow_id")
|
|
532
|
+
"workflow_id": episode.metadata.get("workflow_id")
|
|
533
|
+
if episode.metadata
|
|
534
|
+
else None,
|
|
488
535
|
"summary": episode.metadata.get("summary") if episode.metadata else None,
|
|
489
536
|
}
|
|
490
537
|
|
|
@@ -519,7 +566,9 @@ def load_waa_result(result_dir: Union[str, Path]) -> Episode:
|
|
|
519
566
|
trajectory = data
|
|
520
567
|
elif isinstance(data, dict):
|
|
521
568
|
trajectory = data.get("steps", data.get("trajectory", []))
|
|
522
|
-
task_info = {
|
|
569
|
+
task_info = {
|
|
570
|
+
k: v for k, v in data.items() if k not in ["steps", "trajectory"]
|
|
571
|
+
}
|
|
523
572
|
|
|
524
573
|
# Try to read result
|
|
525
574
|
result_file = result_dir / "result.txt"
|
|
@@ -536,6 +585,4 @@ def load_waa_result(result_dir: Union[str, Path]) -> Episode:
|
|
|
536
585
|
if task_id and "task_id" not in task_info:
|
|
537
586
|
task_info["task_id"] = task_id
|
|
538
587
|
|
|
539
|
-
return from_waa_trajectory(
|
|
540
|
-
trajectory, task_info, episode_id=f"waa_{task_id}"
|
|
541
|
-
)
|
|
588
|
+
return from_waa_trajectory(trajectory, task_info, episode_id=f"waa_{task_id}")
|
openadapt_ml/schema/episode.py
CHANGED
|
@@ -154,7 +154,9 @@ class BoundingBox(BaseModel):
|
|
|
154
154
|
class UIElement(BaseModel):
|
|
155
155
|
"""UI element information from accessibility tree or DOM."""
|
|
156
156
|
|
|
157
|
-
role: Optional[str] = Field(
|
|
157
|
+
role: Optional[str] = Field(
|
|
158
|
+
None, description="Element role (button, textbox, etc.)"
|
|
159
|
+
)
|
|
158
160
|
name: Optional[str] = Field(None, description="Element accessible name")
|
|
159
161
|
value: Optional[str] = Field(None, description="Element value (for inputs)")
|
|
160
162
|
bounds: Optional[BoundingBox] = Field(None, description="Element bounding box")
|
|
@@ -199,9 +201,15 @@ class Action(BaseModel):
|
|
|
199
201
|
# Additional parameters
|
|
200
202
|
url: Optional[str] = Field(None, description="URL for goto action")
|
|
201
203
|
app_name: Optional[str] = Field(None, description="Application name for open/close")
|
|
202
|
-
duration: Optional[float] = Field(
|
|
203
|
-
|
|
204
|
-
|
|
204
|
+
duration: Optional[float] = Field(
|
|
205
|
+
None, description="Duration in seconds (for wait)"
|
|
206
|
+
)
|
|
207
|
+
monitor_id: Optional[int] = Field(
|
|
208
|
+
None, description="Monitor ID for select_monitor action"
|
|
209
|
+
)
|
|
210
|
+
window_title: Optional[str] = Field(
|
|
211
|
+
None, description="Window title for window_focus action"
|
|
212
|
+
)
|
|
205
213
|
|
|
206
214
|
# Normalized coordinates (0.0-1.0) - alternative to pixel coordinates
|
|
207
215
|
# Useful for resolution-independent recordings
|
|
@@ -223,7 +231,11 @@ class Action(BaseModel):
|
|
|
223
231
|
@model_validator(mode="after")
|
|
224
232
|
def validate_action_params(self) -> "Action":
|
|
225
233
|
"""Validate that required parameters are present for action type."""
|
|
226
|
-
if self.type in {
|
|
234
|
+
if self.type in {
|
|
235
|
+
ActionType.CLICK,
|
|
236
|
+
ActionType.DOUBLE_CLICK,
|
|
237
|
+
ActionType.RIGHT_CLICK,
|
|
238
|
+
}:
|
|
227
239
|
if self.coordinates is None and self.element is None:
|
|
228
240
|
# Allow missing coordinates - can be inferred from context
|
|
229
241
|
pass
|
|
@@ -259,7 +271,9 @@ class Observation(BaseModel):
|
|
|
259
271
|
|
|
260
272
|
# Window/screen info
|
|
261
273
|
window_title: Optional[str] = Field(None, description="Active window title")
|
|
262
|
-
app_name: Optional[str] = Field(
|
|
274
|
+
app_name: Optional[str] = Field(
|
|
275
|
+
None, description="Application name (e.g., 'Chrome', 'System Settings')"
|
|
276
|
+
)
|
|
263
277
|
url: Optional[str] = Field(None, description="Current URL (for web apps)")
|
|
264
278
|
screen_size: Optional[tuple[int, int]] = Field(
|
|
265
279
|
None, description="Screen dimensions (width, height)"
|
|
@@ -293,7 +307,9 @@ class Step(BaseModel):
|
|
|
293
307
|
|
|
294
308
|
# Outcome
|
|
295
309
|
reward: Optional[float] = Field(None, description="Reward signal (if available)")
|
|
296
|
-
done: Optional[bool] = Field(
|
|
310
|
+
done: Optional[bool] = Field(
|
|
311
|
+
None, description="Whether episode ended after this step"
|
|
312
|
+
)
|
|
297
313
|
|
|
298
314
|
# Timing
|
|
299
315
|
timestamp: Optional[float] = Field(None, description="Unix timestamp of action")
|
|
@@ -311,8 +327,7 @@ class Episode(BaseModel):
|
|
|
311
327
|
|
|
312
328
|
# Schema metadata
|
|
313
329
|
schema_version: str = Field(
|
|
314
|
-
default=SCHEMA_VERSION,
|
|
315
|
-
description="Schema version for compatibility checking"
|
|
330
|
+
default=SCHEMA_VERSION, description="Schema version for compatibility checking"
|
|
316
331
|
)
|
|
317
332
|
|
|
318
333
|
# Episode identification
|
|
@@ -329,21 +344,20 @@ class Episode(BaseModel):
|
|
|
329
344
|
steps: list[Step] = Field(..., description="Sequence of steps in the episode")
|
|
330
345
|
|
|
331
346
|
# Outcome
|
|
332
|
-
success: Optional[bool] = Field(
|
|
347
|
+
success: Optional[bool] = Field(
|
|
348
|
+
None, description="Whether task was completed successfully"
|
|
349
|
+
)
|
|
333
350
|
final_reward: Optional[float] = Field(None, description="Final reward/score")
|
|
334
351
|
|
|
335
352
|
# Provenance
|
|
336
353
|
source: Optional[BenchmarkSource] = Field(
|
|
337
354
|
None, description="Source benchmark/dataset"
|
|
338
355
|
)
|
|
339
|
-
source_file: Optional[str] = Field(
|
|
340
|
-
None, description="Original source file path"
|
|
341
|
-
)
|
|
356
|
+
source_file: Optional[str] = Field(None, description="Original source file path")
|
|
342
357
|
|
|
343
358
|
# Metadata
|
|
344
359
|
created_at: Optional[datetime] = Field(
|
|
345
|
-
default_factory=datetime.utcnow,
|
|
346
|
-
description="When episode was created/recorded"
|
|
360
|
+
default_factory=datetime.utcnow, description="When episode was created/recorded"
|
|
347
361
|
)
|
|
348
362
|
agent_model: Optional[str] = Field(
|
|
349
363
|
None, description="Model that generated this episode (e.g., 'gpt-4o')"
|
|
@@ -351,9 +365,7 @@ class Episode(BaseModel):
|
|
|
351
365
|
environment: Optional[str] = Field(
|
|
352
366
|
None, description="Environment info (OS, browser, etc.)"
|
|
353
367
|
)
|
|
354
|
-
tags: Optional[list[str]] = Field(
|
|
355
|
-
None, description="Tags for categorization"
|
|
356
|
-
)
|
|
368
|
+
tags: Optional[list[str]] = Field(None, description="Tags for categorization")
|
|
357
369
|
|
|
358
370
|
# Extension point for benchmark-specific data
|
|
359
371
|
metadata: Optional[dict[str, Any]] = Field(
|
|
@@ -389,6 +401,7 @@ class Episode(BaseModel):
|
|
|
389
401
|
# Utility Functions
|
|
390
402
|
# ============================================================================
|
|
391
403
|
|
|
404
|
+
|
|
392
405
|
def validate_episode(data: dict[str, Any]) -> tuple[bool, Optional[str]]:
|
|
393
406
|
"""Validate episode data against schema.
|
|
394
407
|
|