openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -115
  8. openadapt_ml/benchmarks/agent.py +265 -421
  9. openadapt_ml/benchmarks/azure.py +28 -19
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1722 -4847
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +22 -5
  14. openadapt_ml/benchmarks/vm_monitor.py +530 -29
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
  16. openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
  17. openadapt_ml/cloud/azure_inference.py +3 -5
  18. openadapt_ml/cloud/lambda_labs.py +722 -307
  19. openadapt_ml/cloud/local.py +2038 -487
  20. openadapt_ml/cloud/ssh_tunnel.py +68 -26
  21. openadapt_ml/datasets/next_action.py +40 -30
  22. openadapt_ml/evals/grounding.py +8 -3
  23. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  24. openadapt_ml/evals/trajectory_matching.py +41 -26
  25. openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
  26. openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
  27. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  28. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  29. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  30. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  31. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  32. openadapt_ml/experiments/waa_demo/runner.py +29 -14
  33. openadapt_ml/export/parquet.py +36 -24
  34. openadapt_ml/grounding/detector.py +18 -14
  35. openadapt_ml/ingest/__init__.py +8 -6
  36. openadapt_ml/ingest/capture.py +25 -22
  37. openadapt_ml/ingest/loader.py +7 -4
  38. openadapt_ml/ingest/synthetic.py +189 -100
  39. openadapt_ml/models/api_adapter.py +14 -4
  40. openadapt_ml/models/base_adapter.py +10 -2
  41. openadapt_ml/models/providers/__init__.py +288 -0
  42. openadapt_ml/models/providers/anthropic.py +266 -0
  43. openadapt_ml/models/providers/base.py +299 -0
  44. openadapt_ml/models/providers/google.py +376 -0
  45. openadapt_ml/models/providers/openai.py +342 -0
  46. openadapt_ml/models/qwen_vl.py +46 -19
  47. openadapt_ml/perception/__init__.py +35 -0
  48. openadapt_ml/perception/integration.py +399 -0
  49. openadapt_ml/retrieval/demo_retriever.py +50 -24
  50. openadapt_ml/retrieval/embeddings.py +9 -8
  51. openadapt_ml/retrieval/retriever.py +3 -1
  52. openadapt_ml/runtime/__init__.py +50 -0
  53. openadapt_ml/runtime/policy.py +18 -5
  54. openadapt_ml/runtime/safety_gate.py +471 -0
  55. openadapt_ml/schema/__init__.py +9 -0
  56. openadapt_ml/schema/converters.py +74 -27
  57. openadapt_ml/schema/episode.py +31 -18
  58. openadapt_ml/scripts/capture_screenshots.py +530 -0
  59. openadapt_ml/scripts/compare.py +85 -54
  60. openadapt_ml/scripts/demo_policy.py +4 -1
  61. openadapt_ml/scripts/eval_policy.py +15 -9
  62. openadapt_ml/scripts/make_gif.py +1 -1
  63. openadapt_ml/scripts/prepare_synthetic.py +3 -1
  64. openadapt_ml/scripts/train.py +21 -9
  65. openadapt_ml/segmentation/README.md +920 -0
  66. openadapt_ml/segmentation/__init__.py +97 -0
  67. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  68. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  69. openadapt_ml/segmentation/annotator.py +610 -0
  70. openadapt_ml/segmentation/cache.py +290 -0
  71. openadapt_ml/segmentation/cli.py +674 -0
  72. openadapt_ml/segmentation/deduplicator.py +656 -0
  73. openadapt_ml/segmentation/frame_describer.py +788 -0
  74. openadapt_ml/segmentation/pipeline.py +340 -0
  75. openadapt_ml/segmentation/schemas.py +622 -0
  76. openadapt_ml/segmentation/segment_extractor.py +634 -0
  77. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  78. openadapt_ml/training/benchmark_viewer.py +52 -41
  79. openadapt_ml/training/shared_ui.py +7 -7
  80. openadapt_ml/training/stub_provider.py +57 -35
  81. openadapt_ml/training/trainer.py +143 -86
  82. openadapt_ml/training/trl_trainer.py +70 -21
  83. openadapt_ml/training/viewer.py +323 -108
  84. openadapt_ml/training/viewer_components.py +180 -0
  85. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
  86. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  87. openadapt_ml/benchmarks/base.py +0 -366
  88. openadapt_ml/benchmarks/data_collection.py +0 -432
  89. openadapt_ml/benchmarks/live_tracker.py +0 -180
  90. openadapt_ml/benchmarks/runner.py +0 -418
  91. openadapt_ml/benchmarks/waa.py +0 -761
  92. openadapt_ml/benchmarks/waa_live.py +0 -619
  93. openadapt_ml-0.2.0.dist-info/RECORD +0 -86
  94. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  95. {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -30,6 +30,7 @@ from openadapt_ml.schema.episode import (
30
30
  # WAA (Windows Agent Arena) Converter
31
31
  # ============================================================================
32
32
 
33
+
33
34
  def _parse_waa_action(action_str: str) -> tuple[ActionType, dict[str, Any]]:
34
35
  """Parse WAA action string into ActionType and parameters.
35
36
 
@@ -104,19 +105,25 @@ def _parse_waa_action(action_str: str) -> tuple[ActionType, dict[str, Any]]:
104
105
  if func_name == "click":
105
106
  params = {}
106
107
  if len(cleaned_args) >= 2:
107
- params["coordinates"] = Coordinates(x=int(cleaned_args[0]), y=int(cleaned_args[1]))
108
+ params["coordinates"] = Coordinates(
109
+ x=int(cleaned_args[0]), y=int(cleaned_args[1])
110
+ )
108
111
  return ActionType.CLICK, params
109
112
 
110
113
  elif func_name == "doubleclick":
111
114
  params = {}
112
115
  if len(cleaned_args) >= 2:
113
- params["coordinates"] = Coordinates(x=int(cleaned_args[0]), y=int(cleaned_args[1]))
116
+ params["coordinates"] = Coordinates(
117
+ x=int(cleaned_args[0]), y=int(cleaned_args[1])
118
+ )
114
119
  return ActionType.DOUBLE_CLICK, params
115
120
 
116
121
  elif func_name == "rightclick":
117
122
  params = {}
118
123
  if len(cleaned_args) >= 2:
119
- params["coordinates"] = Coordinates(x=int(cleaned_args[0]), y=int(cleaned_args[1]))
124
+ params["coordinates"] = Coordinates(
125
+ x=int(cleaned_args[0]), y=int(cleaned_args[1])
126
+ )
120
127
  return ActionType.RIGHT_CLICK, params
121
128
 
122
129
  elif func_name in ("write", "typewrite"):
@@ -144,7 +151,9 @@ def _parse_waa_action(action_str: str) -> tuple[ActionType, dict[str, Any]]:
144
151
  elif func_name == "moveto":
145
152
  params = {}
146
153
  if len(cleaned_args) >= 2:
147
- params["coordinates"] = Coordinates(x=int(cleaned_args[0]), y=int(cleaned_args[1]))
154
+ params["coordinates"] = Coordinates(
155
+ x=int(cleaned_args[0]), y=int(cleaned_args[1])
156
+ )
148
157
  return ActionType.HOVER, params
149
158
 
150
159
  elif func_name == "drag" or func_name == "dragto":
@@ -229,7 +238,20 @@ def from_waa_trajectory(
229
238
  metadata={
230
239
  "domain": task_info.get("domain"),
231
240
  "difficulty": task_info.get("difficulty"),
232
- **{k: v for k, v in task_info.items() if k not in ["id", "task_id", "instruction", "goal", "success", "domain", "difficulty"]},
241
+ **{
242
+ k: v
243
+ for k, v in task_info.items()
244
+ if k
245
+ not in [
246
+ "id",
247
+ "task_id",
248
+ "instruction",
249
+ "goal",
250
+ "success",
251
+ "domain",
252
+ "difficulty",
253
+ ]
254
+ },
233
255
  },
234
256
  )
235
257
 
@@ -296,12 +318,16 @@ def _action_to_pyautogui(action: Action) -> str:
296
318
 
297
319
  if action.type == ActionType.DOUBLE_CLICK:
298
320
  if action.coordinates:
299
- return f"pyautogui.doubleClick({action.coordinates.x}, {action.coordinates.y})"
321
+ return (
322
+ f"pyautogui.doubleClick({action.coordinates.x}, {action.coordinates.y})"
323
+ )
300
324
  return "pyautogui.doubleClick()"
301
325
 
302
326
  if action.type == ActionType.RIGHT_CLICK:
303
327
  if action.coordinates:
304
- return f"pyautogui.rightClick({action.coordinates.x}, {action.coordinates.y})"
328
+ return (
329
+ f"pyautogui.rightClick({action.coordinates.x}, {action.coordinates.y})"
330
+ )
305
331
  return "pyautogui.rightClick()"
306
332
 
307
333
  if action.type == ActionType.TYPE:
@@ -342,6 +368,7 @@ def _action_to_pyautogui(action: Action) -> str:
342
368
  # Internal Format Converter (openadapt_ml.schemas.sessions)
343
369
  # ============================================================================
344
370
 
371
+
345
372
  def from_internal_episode(
346
373
  internal_episode: Any,
347
374
  episode_id: Optional[str] = None,
@@ -395,7 +422,9 @@ def from_internal_episode(
395
422
  key=step.action.key,
396
423
  modifiers=step.action.modifiers,
397
424
  scroll_direction=step.action.scroll_direction,
398
- scroll_amount=int(step.action.scroll_amount) if step.action.scroll_amount else None,
425
+ scroll_amount=int(step.action.scroll_amount)
426
+ if step.action.scroll_amount
427
+ else None,
399
428
  normalized_end=(step.action.end_x, step.action.end_y)
400
429
  if step.action.end_x is not None and step.action.end_y is not None
401
430
  else None,
@@ -403,17 +432,21 @@ def from_internal_episode(
403
432
  element_id=step.action.target_node_id,
404
433
  role=step.action.target_role,
405
434
  name=step.action.target_name,
406
- ) if step.action.target_node_id else None,
435
+ )
436
+ if step.action.target_node_id
437
+ else None,
407
438
  raw=step.action.raw,
408
439
  )
409
440
 
410
- steps.append(Step(
411
- step_index=i,
412
- observation=obs,
413
- action=action,
414
- reasoning=step.thought,
415
- timestamp=step.t,
416
- ))
441
+ steps.append(
442
+ Step(
443
+ step_index=i,
444
+ observation=obs,
445
+ action=action,
446
+ reasoning=step.thought,
447
+ timestamp=step.t,
448
+ )
449
+ )
417
450
 
418
451
  return Episode(
419
452
  episode_id=episode_id or internal_episode.id,
@@ -423,7 +456,9 @@ def from_internal_episode(
423
456
  metadata={
424
457
  "workflow_id": internal_episode.workflow_id,
425
458
  "summary": internal_episode.summary,
426
- } if internal_episode.workflow_id or internal_episode.summary else None,
459
+ }
460
+ if internal_episode.workflow_id or internal_episode.summary
461
+ else None,
427
462
  )
428
463
 
429
464
 
@@ -468,11 +503,21 @@ def to_internal_episode(episode: Episode) -> dict:
468
503
  "modifiers": step.action.modifiers,
469
504
  "scroll_direction": step.action.scroll_direction,
470
505
  "scroll_amount": step.action.scroll_amount,
471
- "end_x": step.action.normalized_end[0] if step.action.normalized_end else None,
472
- "end_y": step.action.normalized_end[1] if step.action.normalized_end else None,
473
- "target_node_id": step.action.element.element_id if step.action.element else None,
474
- "target_role": step.action.element.role if step.action.element else None,
475
- "target_name": step.action.element.name if step.action.element else None,
506
+ "end_x": step.action.normalized_end[0]
507
+ if step.action.normalized_end
508
+ else None,
509
+ "end_y": step.action.normalized_end[1]
510
+ if step.action.normalized_end
511
+ else None,
512
+ "target_node_id": step.action.element.element_id
513
+ if step.action.element
514
+ else None,
515
+ "target_role": step.action.element.role
516
+ if step.action.element
517
+ else None,
518
+ "target_name": step.action.element.name
519
+ if step.action.element
520
+ else None,
476
521
  "raw": step.action.raw,
477
522
  },
478
523
  "thought": step.reasoning,
@@ -484,7 +529,9 @@ def to_internal_episode(episode: Episode) -> dict:
484
529
  "goal": episode.instruction,
485
530
  "steps": steps,
486
531
  "success": episode.success,
487
- "workflow_id": episode.metadata.get("workflow_id") if episode.metadata else None,
532
+ "workflow_id": episode.metadata.get("workflow_id")
533
+ if episode.metadata
534
+ else None,
488
535
  "summary": episode.metadata.get("summary") if episode.metadata else None,
489
536
  }
490
537
 
@@ -519,7 +566,9 @@ def load_waa_result(result_dir: Union[str, Path]) -> Episode:
519
566
  trajectory = data
520
567
  elif isinstance(data, dict):
521
568
  trajectory = data.get("steps", data.get("trajectory", []))
522
- task_info = {k: v for k, v in data.items() if k not in ["steps", "trajectory"]}
569
+ task_info = {
570
+ k: v for k, v in data.items() if k not in ["steps", "trajectory"]
571
+ }
523
572
 
524
573
  # Try to read result
525
574
  result_file = result_dir / "result.txt"
@@ -536,6 +585,4 @@ def load_waa_result(result_dir: Union[str, Path]) -> Episode:
536
585
  if task_id and "task_id" not in task_info:
537
586
  task_info["task_id"] = task_id
538
587
 
539
- return from_waa_trajectory(
540
- trajectory, task_info, episode_id=f"waa_{task_id}"
541
- )
588
+ return from_waa_trajectory(trajectory, task_info, episode_id=f"waa_{task_id}")
@@ -154,7 +154,9 @@ class BoundingBox(BaseModel):
154
154
  class UIElement(BaseModel):
155
155
  """UI element information from accessibility tree or DOM."""
156
156
 
157
- role: Optional[str] = Field(None, description="Element role (button, textbox, etc.)")
157
+ role: Optional[str] = Field(
158
+ None, description="Element role (button, textbox, etc.)"
159
+ )
158
160
  name: Optional[str] = Field(None, description="Element accessible name")
159
161
  value: Optional[str] = Field(None, description="Element value (for inputs)")
160
162
  bounds: Optional[BoundingBox] = Field(None, description="Element bounding box")
@@ -199,9 +201,15 @@ class Action(BaseModel):
199
201
  # Additional parameters
200
202
  url: Optional[str] = Field(None, description="URL for goto action")
201
203
  app_name: Optional[str] = Field(None, description="Application name for open/close")
202
- duration: Optional[float] = Field(None, description="Duration in seconds (for wait)")
203
- monitor_id: Optional[int] = Field(None, description="Monitor ID for select_monitor action")
204
- window_title: Optional[str] = Field(None, description="Window title for window_focus action")
204
+ duration: Optional[float] = Field(
205
+ None, description="Duration in seconds (for wait)"
206
+ )
207
+ monitor_id: Optional[int] = Field(
208
+ None, description="Monitor ID for select_monitor action"
209
+ )
210
+ window_title: Optional[str] = Field(
211
+ None, description="Window title for window_focus action"
212
+ )
205
213
 
206
214
  # Normalized coordinates (0.0-1.0) - alternative to pixel coordinates
207
215
  # Useful for resolution-independent recordings
@@ -223,7 +231,11 @@ class Action(BaseModel):
223
231
  @model_validator(mode="after")
224
232
  def validate_action_params(self) -> "Action":
225
233
  """Validate that required parameters are present for action type."""
226
- if self.type in {ActionType.CLICK, ActionType.DOUBLE_CLICK, ActionType.RIGHT_CLICK}:
234
+ if self.type in {
235
+ ActionType.CLICK,
236
+ ActionType.DOUBLE_CLICK,
237
+ ActionType.RIGHT_CLICK,
238
+ }:
227
239
  if self.coordinates is None and self.element is None:
228
240
  # Allow missing coordinates - can be inferred from context
229
241
  pass
@@ -259,7 +271,9 @@ class Observation(BaseModel):
259
271
 
260
272
  # Window/screen info
261
273
  window_title: Optional[str] = Field(None, description="Active window title")
262
- app_name: Optional[str] = Field(None, description="Application name (e.g., 'Chrome', 'System Settings')")
274
+ app_name: Optional[str] = Field(
275
+ None, description="Application name (e.g., 'Chrome', 'System Settings')"
276
+ )
263
277
  url: Optional[str] = Field(None, description="Current URL (for web apps)")
264
278
  screen_size: Optional[tuple[int, int]] = Field(
265
279
  None, description="Screen dimensions (width, height)"
@@ -293,7 +307,9 @@ class Step(BaseModel):
293
307
 
294
308
  # Outcome
295
309
  reward: Optional[float] = Field(None, description="Reward signal (if available)")
296
- done: Optional[bool] = Field(None, description="Whether episode ended after this step")
310
+ done: Optional[bool] = Field(
311
+ None, description="Whether episode ended after this step"
312
+ )
297
313
 
298
314
  # Timing
299
315
  timestamp: Optional[float] = Field(None, description="Unix timestamp of action")
@@ -311,8 +327,7 @@ class Episode(BaseModel):
311
327
 
312
328
  # Schema metadata
313
329
  schema_version: str = Field(
314
- default=SCHEMA_VERSION,
315
- description="Schema version for compatibility checking"
330
+ default=SCHEMA_VERSION, description="Schema version for compatibility checking"
316
331
  )
317
332
 
318
333
  # Episode identification
@@ -329,21 +344,20 @@ class Episode(BaseModel):
329
344
  steps: list[Step] = Field(..., description="Sequence of steps in the episode")
330
345
 
331
346
  # Outcome
332
- success: Optional[bool] = Field(None, description="Whether task was completed successfully")
347
+ success: Optional[bool] = Field(
348
+ None, description="Whether task was completed successfully"
349
+ )
333
350
  final_reward: Optional[float] = Field(None, description="Final reward/score")
334
351
 
335
352
  # Provenance
336
353
  source: Optional[BenchmarkSource] = Field(
337
354
  None, description="Source benchmark/dataset"
338
355
  )
339
- source_file: Optional[str] = Field(
340
- None, description="Original source file path"
341
- )
356
+ source_file: Optional[str] = Field(None, description="Original source file path")
342
357
 
343
358
  # Metadata
344
359
  created_at: Optional[datetime] = Field(
345
- default_factory=datetime.utcnow,
346
- description="When episode was created/recorded"
360
+ default_factory=datetime.utcnow, description="When episode was created/recorded"
347
361
  )
348
362
  agent_model: Optional[str] = Field(
349
363
  None, description="Model that generated this episode (e.g., 'gpt-4o')"
@@ -351,9 +365,7 @@ class Episode(BaseModel):
351
365
  environment: Optional[str] = Field(
352
366
  None, description="Environment info (OS, browser, etc.)"
353
367
  )
354
- tags: Optional[list[str]] = Field(
355
- None, description="Tags for categorization"
356
- )
368
+ tags: Optional[list[str]] = Field(None, description="Tags for categorization")
357
369
 
358
370
  # Extension point for benchmark-specific data
359
371
  metadata: Optional[dict[str, Any]] = Field(
@@ -389,6 +401,7 @@ class Episode(BaseModel):
389
401
  # Utility Functions
390
402
  # ============================================================================
391
403
 
404
+
392
405
  def validate_episode(data: dict[str, Any]) -> tuple[bool, Optional[str]]:
393
406
  """Validate episode data against schema.
394
407