openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. openadapt_ml/baselines/__init__.py +121 -0
  2. openadapt_ml/baselines/adapter.py +185 -0
  3. openadapt_ml/baselines/cli.py +314 -0
  4. openadapt_ml/baselines/config.py +448 -0
  5. openadapt_ml/baselines/parser.py +922 -0
  6. openadapt_ml/baselines/prompts.py +787 -0
  7. openadapt_ml/benchmarks/__init__.py +13 -107
  8. openadapt_ml/benchmarks/agent.py +297 -374
  9. openadapt_ml/benchmarks/azure.py +62 -24
  10. openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
  11. openadapt_ml/benchmarks/cli.py +1874 -751
  12. openadapt_ml/benchmarks/trace_export.py +631 -0
  13. openadapt_ml/benchmarks/viewer.py +1236 -0
  14. openadapt_ml/benchmarks/vm_monitor.py +1111 -0
  15. openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
  16. openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
  17. openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
  18. openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
  19. openadapt_ml/cloud/azure_inference.py +3 -5
  20. openadapt_ml/cloud/lambda_labs.py +722 -307
  21. openadapt_ml/cloud/local.py +3194 -89
  22. openadapt_ml/cloud/ssh_tunnel.py +595 -0
  23. openadapt_ml/datasets/next_action.py +125 -96
  24. openadapt_ml/evals/grounding.py +32 -9
  25. openadapt_ml/evals/plot_eval_metrics.py +15 -13
  26. openadapt_ml/evals/trajectory_matching.py +120 -57
  27. openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
  28. openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
  29. openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
  30. openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
  31. openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
  32. openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
  33. openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
  34. openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
  35. openadapt_ml/experiments/representation_shootout/config.py +390 -0
  36. openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
  37. openadapt_ml/experiments/representation_shootout/runner.py +687 -0
  38. openadapt_ml/experiments/waa_demo/__init__.py +10 -0
  39. openadapt_ml/experiments/waa_demo/demos.py +357 -0
  40. openadapt_ml/experiments/waa_demo/runner.py +732 -0
  41. openadapt_ml/experiments/waa_demo/tasks.py +151 -0
  42. openadapt_ml/export/__init__.py +9 -0
  43. openadapt_ml/export/__main__.py +6 -0
  44. openadapt_ml/export/cli.py +89 -0
  45. openadapt_ml/export/parquet.py +277 -0
  46. openadapt_ml/grounding/detector.py +18 -14
  47. openadapt_ml/ingest/__init__.py +11 -10
  48. openadapt_ml/ingest/capture.py +97 -86
  49. openadapt_ml/ingest/loader.py +120 -69
  50. openadapt_ml/ingest/synthetic.py +344 -193
  51. openadapt_ml/models/api_adapter.py +14 -4
  52. openadapt_ml/models/base_adapter.py +10 -2
  53. openadapt_ml/models/providers/__init__.py +288 -0
  54. openadapt_ml/models/providers/anthropic.py +266 -0
  55. openadapt_ml/models/providers/base.py +299 -0
  56. openadapt_ml/models/providers/google.py +376 -0
  57. openadapt_ml/models/providers/openai.py +342 -0
  58. openadapt_ml/models/qwen_vl.py +46 -19
  59. openadapt_ml/perception/__init__.py +35 -0
  60. openadapt_ml/perception/integration.py +399 -0
  61. openadapt_ml/retrieval/README.md +226 -0
  62. openadapt_ml/retrieval/USAGE.md +391 -0
  63. openadapt_ml/retrieval/__init__.py +91 -0
  64. openadapt_ml/retrieval/demo_retriever.py +843 -0
  65. openadapt_ml/retrieval/embeddings.py +630 -0
  66. openadapt_ml/retrieval/index.py +194 -0
  67. openadapt_ml/retrieval/retriever.py +162 -0
  68. openadapt_ml/runtime/__init__.py +50 -0
  69. openadapt_ml/runtime/policy.py +27 -14
  70. openadapt_ml/runtime/safety_gate.py +471 -0
  71. openadapt_ml/schema/__init__.py +113 -0
  72. openadapt_ml/schema/converters.py +588 -0
  73. openadapt_ml/schema/episode.py +470 -0
  74. openadapt_ml/scripts/capture_screenshots.py +530 -0
  75. openadapt_ml/scripts/compare.py +102 -61
  76. openadapt_ml/scripts/demo_policy.py +4 -1
  77. openadapt_ml/scripts/eval_policy.py +19 -14
  78. openadapt_ml/scripts/make_gif.py +1 -1
  79. openadapt_ml/scripts/prepare_synthetic.py +16 -17
  80. openadapt_ml/scripts/train.py +98 -75
  81. openadapt_ml/segmentation/README.md +920 -0
  82. openadapt_ml/segmentation/__init__.py +97 -0
  83. openadapt_ml/segmentation/adapters/__init__.py +5 -0
  84. openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
  85. openadapt_ml/segmentation/annotator.py +610 -0
  86. openadapt_ml/segmentation/cache.py +290 -0
  87. openadapt_ml/segmentation/cli.py +674 -0
  88. openadapt_ml/segmentation/deduplicator.py +656 -0
  89. openadapt_ml/segmentation/frame_describer.py +788 -0
  90. openadapt_ml/segmentation/pipeline.py +340 -0
  91. openadapt_ml/segmentation/schemas.py +622 -0
  92. openadapt_ml/segmentation/segment_extractor.py +634 -0
  93. openadapt_ml/training/azure_ops_viewer.py +1097 -0
  94. openadapt_ml/training/benchmark_viewer.py +3255 -19
  95. openadapt_ml/training/shared_ui.py +7 -7
  96. openadapt_ml/training/stub_provider.py +57 -35
  97. openadapt_ml/training/trainer.py +255 -441
  98. openadapt_ml/training/trl_trainer.py +403 -0
  99. openadapt_ml/training/viewer.py +323 -108
  100. openadapt_ml/training/viewer_components.py +180 -0
  101. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
  102. openadapt_ml-0.2.1.dist-info/RECORD +116 -0
  103. openadapt_ml/benchmarks/base.py +0 -366
  104. openadapt_ml/benchmarks/data_collection.py +0 -432
  105. openadapt_ml/benchmarks/runner.py +0 -381
  106. openadapt_ml/benchmarks/waa.py +0 -704
  107. openadapt_ml/schemas/__init__.py +0 -53
  108. openadapt_ml/schemas/sessions.py +0 -122
  109. openadapt_ml/schemas/validation.py +0 -252
  110. openadapt_ml-0.1.0.dist-info/RECORD +0 -55
  111. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
  112. {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -30,16 +30,13 @@ from __future__ import annotations
30
30
 
31
31
  import json
32
32
  import logging
33
- import os
34
- import tempfile
35
33
  import time
36
34
  from concurrent.futures import ThreadPoolExecutor, as_completed
37
35
  from dataclasses import dataclass, field
38
36
  from pathlib import Path
39
- from typing import Any, Callable
37
+ from typing import Callable
40
38
 
41
- from openadapt_ml.benchmarks.agent import BenchmarkAgent
42
- from openadapt_ml.benchmarks.base import BenchmarkResult, BenchmarkTask
39
+ from openadapt_evals import BenchmarkAgent, BenchmarkResult, BenchmarkTask
43
40
 
44
41
  logger = logging.getLogger(__name__)
45
42
 
@@ -233,7 +230,9 @@ class AzureMLClient:
233
230
  resource_group_name=self.config.resource_group,
234
231
  workspace_name=self.config.workspace_name,
235
232
  )
236
- logger.info(f"Connected to Azure ML workspace: {self.config.workspace_name}")
233
+ logger.info(
234
+ f"Connected to Azure ML workspace: {self.config.workspace_name}"
235
+ )
237
236
  return self._client
238
237
 
239
238
  def _get_credential(self):
@@ -241,11 +240,13 @@ class AzureMLClient:
241
240
  from openadapt_ml.config import settings
242
241
 
243
242
  # Use service principal if credentials are configured
244
- if all([
245
- settings.azure_client_id,
246
- settings.azure_client_secret,
247
- settings.azure_tenant_id,
248
- ]):
243
+ if all(
244
+ [
245
+ settings.azure_client_id,
246
+ settings.azure_client_secret,
247
+ settings.azure_tenant_id,
248
+ ]
249
+ ):
249
250
  logger.info("Using service principal authentication")
250
251
  return self._ClientSecretCredential(
251
252
  tenant_id=settings.azure_tenant_id,
@@ -301,7 +302,10 @@ class AzureMLClient:
301
302
  f"/providers/Microsoft.ManagedIdentity"
302
303
  f"/userAssignedIdentities/{self.config.managed_identity_name}"
303
304
  )
304
- compute.identity = {"type": "UserAssigned", "user_assigned_identities": [identity_id]}
305
+ compute.identity = {
306
+ "type": "UserAssigned",
307
+ "user_assigned_identities": [identity_id],
308
+ }
305
309
 
306
310
  print(f" Creating VM: {name}...", end="", flush=True)
307
311
  self.client.compute.begin_create_or_update(compute).result()
@@ -355,6 +359,7 @@ class AzureMLClient:
355
359
  command: str,
356
360
  environment_variables: dict[str, str] | None = None,
357
361
  display_name: str | None = None,
362
+ timeout_hours: float = 4.0,
358
363
  ) -> str:
359
364
  """Submit a job to a compute instance.
360
365
 
@@ -363,6 +368,8 @@ class AzureMLClient:
363
368
  command: Command to run.
364
369
  environment_variables: Environment variables.
365
370
  display_name: Job display name.
371
+ timeout_hours: Maximum job duration in hours (default: 4). The job
372
+ will be automatically canceled after this duration.
366
373
 
367
374
  Returns:
368
375
  Job name/ID.
@@ -376,16 +383,28 @@ class AzureMLClient:
376
383
  name="waa-agent-env",
377
384
  )
378
385
 
386
+ import time
387
+ import uuid
388
+
389
+ timestamp = int(time.time())
390
+ unique_id = str(uuid.uuid4())[:8]
391
+ job_name = f"waa-{compute_name}-{timestamp}-{unique_id}"
392
+
393
+ # Convert hours to seconds for Azure ML timeout
394
+ timeout_seconds = int(timeout_hours * 3600)
395
+
379
396
  job = ml_command(
380
397
  command=command,
381
398
  environment=env,
382
399
  compute=compute_name,
400
+ name=job_name, # Unique job name for Azure ML
383
401
  display_name=display_name or f"waa-job-{compute_name}",
384
402
  environment_variables=environment_variables or {},
403
+ limits={"timeout": timeout_seconds},
385
404
  )
386
405
 
387
406
  submitted = self.client.jobs.create_or_update(job)
388
- logger.info(f"Job submitted: {submitted.name}")
407
+ logger.info(f"Job submitted: {submitted.name} (timeout: {timeout_hours}h)")
389
408
  return submitted.name
390
409
 
391
410
  def wait_for_job(self, job_name: str, timeout_seconds: int = 3600) -> dict:
@@ -458,6 +477,7 @@ class AzureWAAOrchestrator:
458
477
  max_steps_per_task: int = 15,
459
478
  on_worker_complete: Callable[[WorkerState], None] | None = None,
460
479
  cleanup_on_complete: bool = True,
480
+ timeout_hours: float = 4.0,
461
481
  ) -> list[BenchmarkResult]:
462
482
  """Run evaluation across multiple Azure VMs.
463
483
 
@@ -468,12 +488,14 @@ class AzureWAAOrchestrator:
468
488
  max_steps_per_task: Maximum steps per task.
469
489
  on_worker_complete: Callback when a worker finishes.
470
490
  cleanup_on_complete: Whether to delete VMs after completion.
491
+ timeout_hours: Maximum job duration in hours (default: 4). Jobs are
492
+ auto-canceled after this duration to prevent runaway costs.
471
493
 
472
494
  Returns:
473
495
  List of BenchmarkResult for all tasks.
474
496
  """
475
497
  # Load tasks
476
- from openadapt_ml.benchmarks.waa import WAAAdapter
498
+ from openadapt_evals import WAAMockAdapter as WAAAdapter
477
499
 
478
500
  adapter = WAAAdapter(waa_repo_path=self.waa_repo_path)
479
501
  if task_ids:
@@ -513,17 +535,21 @@ class AzureWAAOrchestrator:
513
535
 
514
536
  try:
515
537
  # Provision VMs in parallel
516
- print(f"[2/4] Provisioning {num_workers} Azure VM(s)... (this takes 3-5 minutes)")
538
+ print(
539
+ f"[2/4] Provisioning {num_workers} Azure VM(s)... (this takes 3-5 minutes)"
540
+ )
517
541
  self._provision_workers(workers)
518
- print(f" VM(s) ready")
542
+ print(" VM(s) ready")
519
543
 
520
544
  # Submit jobs to workers
521
- print(f"[3/4] Submitting evaluation jobs...")
522
- self._submit_worker_jobs(workers, task_batches, agent, max_steps_per_task)
523
- print(f" Jobs submitted")
545
+ print("[3/4] Submitting evaluation jobs...")
546
+ self._submit_worker_jobs(
547
+ workers, task_batches, agent, max_steps_per_task, timeout_hours
548
+ )
549
+ print(" Jobs submitted")
524
550
 
525
551
  # Wait for completion and collect results
526
- print(f"[4/4] Waiting for workers to complete...")
552
+ print("[4/4] Waiting for workers to complete...")
527
553
  results = self._wait_and_collect_results(workers, on_worker_complete)
528
554
 
529
555
  self._current_run.status = "completed"
@@ -577,8 +603,17 @@ class AzureWAAOrchestrator:
577
603
  task_batches: list[list[BenchmarkTask]],
578
604
  agent: BenchmarkAgent,
579
605
  max_steps: int,
606
+ timeout_hours: float = 4.0,
580
607
  ) -> None:
581
- """Submit evaluation jobs to workers."""
608
+ """Submit evaluation jobs to workers.
609
+
610
+ Args:
611
+ workers: List of worker states.
612
+ task_batches: Task batches for each worker.
613
+ agent: Agent to run.
614
+ max_steps: Maximum steps per task.
615
+ timeout_hours: Maximum job duration in hours.
616
+ """
582
617
  for worker, tasks in zip(workers, task_batches):
583
618
  if worker.status == "failed":
584
619
  continue
@@ -591,7 +626,7 @@ class AzureWAAOrchestrator:
591
626
  # Build command
592
627
  command = self._build_worker_command(task_ids_json, max_steps, agent)
593
628
 
594
- # Submit job
629
+ # Submit job with timeout
595
630
  self.ml_client.submit_job(
596
631
  compute_name=worker.compute_name,
597
632
  command=command,
@@ -600,6 +635,7 @@ class AzureWAAOrchestrator:
600
635
  "WAA_MAX_STEPS": str(max_steps),
601
636
  },
602
637
  display_name=f"waa-worker-{worker.worker_id}",
638
+ timeout_hours=timeout_hours,
603
639
  )
604
640
  worker.status = "running"
605
641
  worker.start_time = time.time()
@@ -625,9 +661,11 @@ class AzureWAAOrchestrator:
625
661
  # TODO: Serialize agent config and pass to remote worker
626
662
  # For now, workers use a default agent configuration
627
663
  _ = agent # Reserved for agent serialization
664
+ # WAA Docker image has client at /client (see Dockerfile-WinArena)
665
+ # The run.py script is at /client/run.py (not a module, so use python run.py)
628
666
  return f"""
629
- cd /workspace/WindowsAgentArena && \
630
- python -m client.run \
667
+ cd /client && \
668
+ python run.py \
631
669
  --task_ids '{task_ids_json}' \
632
670
  --max_steps {max_steps} \
633
671
  --output_dir /outputs