openadapt-ml 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -107
- openadapt_ml/benchmarks/agent.py +297 -374
- openadapt_ml/benchmarks/azure.py +62 -24
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1874 -751
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +1236 -0
- openadapt_ml/benchmarks/vm_monitor.py +1111 -0
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +216 -0
- openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +540 -0
- openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +3194 -89
- openadapt_ml/cloud/ssh_tunnel.py +595 -0
- openadapt_ml/datasets/next_action.py +125 -96
- openadapt_ml/evals/grounding.py +32 -9
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +120 -57
- openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
- openadapt_ml/experiments/demo_prompt/format_demo.py +236 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
- openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
- openadapt_ml/experiments/demo_prompt/run_experiment.py +541 -0
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/__init__.py +10 -0
- openadapt_ml/experiments/waa_demo/demos.py +357 -0
- openadapt_ml/experiments/waa_demo/runner.py +732 -0
- openadapt_ml/experiments/waa_demo/tasks.py +151 -0
- openadapt_ml/export/__init__.py +9 -0
- openadapt_ml/export/__main__.py +6 -0
- openadapt_ml/export/cli.py +89 -0
- openadapt_ml/export/parquet.py +277 -0
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +11 -10
- openadapt_ml/ingest/capture.py +97 -86
- openadapt_ml/ingest/loader.py +120 -69
- openadapt_ml/ingest/synthetic.py +344 -193
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/README.md +226 -0
- openadapt_ml/retrieval/USAGE.md +391 -0
- openadapt_ml/retrieval/__init__.py +91 -0
- openadapt_ml/retrieval/demo_retriever.py +843 -0
- openadapt_ml/retrieval/embeddings.py +630 -0
- openadapt_ml/retrieval/index.py +194 -0
- openadapt_ml/retrieval/retriever.py +162 -0
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +27 -14
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +113 -0
- openadapt_ml/schema/converters.py +588 -0
- openadapt_ml/schema/episode.py +470 -0
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +102 -61
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +19 -14
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +16 -17
- openadapt_ml/scripts/train.py +98 -75
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +3255 -19
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +255 -441
- openadapt_ml/training/trl_trainer.py +403 -0
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +312 -69
- openadapt_ml-0.2.1.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/runner.py +0 -381
- openadapt_ml/benchmarks/waa.py +0 -704
- openadapt_ml/schemas/__init__.py +0 -53
- openadapt_ml/schemas/sessions.py +0 -122
- openadapt_ml/schemas/validation.py +0 -252
- openadapt_ml-0.1.0.dist-info/RECORD +0 -55
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
openadapt_ml/benchmarks/azure.py
CHANGED
|
@@ -30,16 +30,13 @@ from __future__ import annotations
|
|
|
30
30
|
|
|
31
31
|
import json
|
|
32
32
|
import logging
|
|
33
|
-
import os
|
|
34
|
-
import tempfile
|
|
35
33
|
import time
|
|
36
34
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
37
35
|
from dataclasses import dataclass, field
|
|
38
36
|
from pathlib import Path
|
|
39
|
-
from typing import
|
|
37
|
+
from typing import Callable
|
|
40
38
|
|
|
41
|
-
from
|
|
42
|
-
from openadapt_ml.benchmarks.base import BenchmarkResult, BenchmarkTask
|
|
39
|
+
from openadapt_evals import BenchmarkAgent, BenchmarkResult, BenchmarkTask
|
|
43
40
|
|
|
44
41
|
logger = logging.getLogger(__name__)
|
|
45
42
|
|
|
@@ -233,7 +230,9 @@ class AzureMLClient:
|
|
|
233
230
|
resource_group_name=self.config.resource_group,
|
|
234
231
|
workspace_name=self.config.workspace_name,
|
|
235
232
|
)
|
|
236
|
-
logger.info(
|
|
233
|
+
logger.info(
|
|
234
|
+
f"Connected to Azure ML workspace: {self.config.workspace_name}"
|
|
235
|
+
)
|
|
237
236
|
return self._client
|
|
238
237
|
|
|
239
238
|
def _get_credential(self):
|
|
@@ -241,11 +240,13 @@ class AzureMLClient:
|
|
|
241
240
|
from openadapt_ml.config import settings
|
|
242
241
|
|
|
243
242
|
# Use service principal if credentials are configured
|
|
244
|
-
if all(
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
243
|
+
if all(
|
|
244
|
+
[
|
|
245
|
+
settings.azure_client_id,
|
|
246
|
+
settings.azure_client_secret,
|
|
247
|
+
settings.azure_tenant_id,
|
|
248
|
+
]
|
|
249
|
+
):
|
|
249
250
|
logger.info("Using service principal authentication")
|
|
250
251
|
return self._ClientSecretCredential(
|
|
251
252
|
tenant_id=settings.azure_tenant_id,
|
|
@@ -301,7 +302,10 @@ class AzureMLClient:
|
|
|
301
302
|
f"/providers/Microsoft.ManagedIdentity"
|
|
302
303
|
f"/userAssignedIdentities/{self.config.managed_identity_name}"
|
|
303
304
|
)
|
|
304
|
-
compute.identity = {
|
|
305
|
+
compute.identity = {
|
|
306
|
+
"type": "UserAssigned",
|
|
307
|
+
"user_assigned_identities": [identity_id],
|
|
308
|
+
}
|
|
305
309
|
|
|
306
310
|
print(f" Creating VM: {name}...", end="", flush=True)
|
|
307
311
|
self.client.compute.begin_create_or_update(compute).result()
|
|
@@ -355,6 +359,7 @@ class AzureMLClient:
|
|
|
355
359
|
command: str,
|
|
356
360
|
environment_variables: dict[str, str] | None = None,
|
|
357
361
|
display_name: str | None = None,
|
|
362
|
+
timeout_hours: float = 4.0,
|
|
358
363
|
) -> str:
|
|
359
364
|
"""Submit a job to a compute instance.
|
|
360
365
|
|
|
@@ -363,6 +368,8 @@ class AzureMLClient:
|
|
|
363
368
|
command: Command to run.
|
|
364
369
|
environment_variables: Environment variables.
|
|
365
370
|
display_name: Job display name.
|
|
371
|
+
timeout_hours: Maximum job duration in hours (default: 4). The job
|
|
372
|
+
will be automatically canceled after this duration.
|
|
366
373
|
|
|
367
374
|
Returns:
|
|
368
375
|
Job name/ID.
|
|
@@ -376,16 +383,28 @@ class AzureMLClient:
|
|
|
376
383
|
name="waa-agent-env",
|
|
377
384
|
)
|
|
378
385
|
|
|
386
|
+
import time
|
|
387
|
+
import uuid
|
|
388
|
+
|
|
389
|
+
timestamp = int(time.time())
|
|
390
|
+
unique_id = str(uuid.uuid4())[:8]
|
|
391
|
+
job_name = f"waa-{compute_name}-{timestamp}-{unique_id}"
|
|
392
|
+
|
|
393
|
+
# Convert hours to seconds for Azure ML timeout
|
|
394
|
+
timeout_seconds = int(timeout_hours * 3600)
|
|
395
|
+
|
|
379
396
|
job = ml_command(
|
|
380
397
|
command=command,
|
|
381
398
|
environment=env,
|
|
382
399
|
compute=compute_name,
|
|
400
|
+
name=job_name, # Unique job name for Azure ML
|
|
383
401
|
display_name=display_name or f"waa-job-{compute_name}",
|
|
384
402
|
environment_variables=environment_variables or {},
|
|
403
|
+
limits={"timeout": timeout_seconds},
|
|
385
404
|
)
|
|
386
405
|
|
|
387
406
|
submitted = self.client.jobs.create_or_update(job)
|
|
388
|
-
logger.info(f"Job submitted: {submitted.name}")
|
|
407
|
+
logger.info(f"Job submitted: {submitted.name} (timeout: {timeout_hours}h)")
|
|
389
408
|
return submitted.name
|
|
390
409
|
|
|
391
410
|
def wait_for_job(self, job_name: str, timeout_seconds: int = 3600) -> dict:
|
|
@@ -458,6 +477,7 @@ class AzureWAAOrchestrator:
|
|
|
458
477
|
max_steps_per_task: int = 15,
|
|
459
478
|
on_worker_complete: Callable[[WorkerState], None] | None = None,
|
|
460
479
|
cleanup_on_complete: bool = True,
|
|
480
|
+
timeout_hours: float = 4.0,
|
|
461
481
|
) -> list[BenchmarkResult]:
|
|
462
482
|
"""Run evaluation across multiple Azure VMs.
|
|
463
483
|
|
|
@@ -468,12 +488,14 @@ class AzureWAAOrchestrator:
|
|
|
468
488
|
max_steps_per_task: Maximum steps per task.
|
|
469
489
|
on_worker_complete: Callback when a worker finishes.
|
|
470
490
|
cleanup_on_complete: Whether to delete VMs after completion.
|
|
491
|
+
timeout_hours: Maximum job duration in hours (default: 4). Jobs are
|
|
492
|
+
auto-canceled after this duration to prevent runaway costs.
|
|
471
493
|
|
|
472
494
|
Returns:
|
|
473
495
|
List of BenchmarkResult for all tasks.
|
|
474
496
|
"""
|
|
475
497
|
# Load tasks
|
|
476
|
-
from
|
|
498
|
+
from openadapt_evals import WAAMockAdapter as WAAAdapter
|
|
477
499
|
|
|
478
500
|
adapter = WAAAdapter(waa_repo_path=self.waa_repo_path)
|
|
479
501
|
if task_ids:
|
|
@@ -513,17 +535,21 @@ class AzureWAAOrchestrator:
|
|
|
513
535
|
|
|
514
536
|
try:
|
|
515
537
|
# Provision VMs in parallel
|
|
516
|
-
print(
|
|
538
|
+
print(
|
|
539
|
+
f"[2/4] Provisioning {num_workers} Azure VM(s)... (this takes 3-5 minutes)"
|
|
540
|
+
)
|
|
517
541
|
self._provision_workers(workers)
|
|
518
|
-
print(
|
|
542
|
+
print(" VM(s) ready")
|
|
519
543
|
|
|
520
544
|
# Submit jobs to workers
|
|
521
|
-
print(
|
|
522
|
-
self._submit_worker_jobs(
|
|
523
|
-
|
|
545
|
+
print("[3/4] Submitting evaluation jobs...")
|
|
546
|
+
self._submit_worker_jobs(
|
|
547
|
+
workers, task_batches, agent, max_steps_per_task, timeout_hours
|
|
548
|
+
)
|
|
549
|
+
print(" Jobs submitted")
|
|
524
550
|
|
|
525
551
|
# Wait for completion and collect results
|
|
526
|
-
print(
|
|
552
|
+
print("[4/4] Waiting for workers to complete...")
|
|
527
553
|
results = self._wait_and_collect_results(workers, on_worker_complete)
|
|
528
554
|
|
|
529
555
|
self._current_run.status = "completed"
|
|
@@ -577,8 +603,17 @@ class AzureWAAOrchestrator:
|
|
|
577
603
|
task_batches: list[list[BenchmarkTask]],
|
|
578
604
|
agent: BenchmarkAgent,
|
|
579
605
|
max_steps: int,
|
|
606
|
+
timeout_hours: float = 4.0,
|
|
580
607
|
) -> None:
|
|
581
|
-
"""Submit evaluation jobs to workers.
|
|
608
|
+
"""Submit evaluation jobs to workers.
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
workers: List of worker states.
|
|
612
|
+
task_batches: Task batches for each worker.
|
|
613
|
+
agent: Agent to run.
|
|
614
|
+
max_steps: Maximum steps per task.
|
|
615
|
+
timeout_hours: Maximum job duration in hours.
|
|
616
|
+
"""
|
|
582
617
|
for worker, tasks in zip(workers, task_batches):
|
|
583
618
|
if worker.status == "failed":
|
|
584
619
|
continue
|
|
@@ -591,7 +626,7 @@ class AzureWAAOrchestrator:
|
|
|
591
626
|
# Build command
|
|
592
627
|
command = self._build_worker_command(task_ids_json, max_steps, agent)
|
|
593
628
|
|
|
594
|
-
# Submit job
|
|
629
|
+
# Submit job with timeout
|
|
595
630
|
self.ml_client.submit_job(
|
|
596
631
|
compute_name=worker.compute_name,
|
|
597
632
|
command=command,
|
|
@@ -600,6 +635,7 @@ class AzureWAAOrchestrator:
|
|
|
600
635
|
"WAA_MAX_STEPS": str(max_steps),
|
|
601
636
|
},
|
|
602
637
|
display_name=f"waa-worker-{worker.worker_id}",
|
|
638
|
+
timeout_hours=timeout_hours,
|
|
603
639
|
)
|
|
604
640
|
worker.status = "running"
|
|
605
641
|
worker.start_time = time.time()
|
|
@@ -625,9 +661,11 @@ class AzureWAAOrchestrator:
|
|
|
625
661
|
# TODO: Serialize agent config and pass to remote worker
|
|
626
662
|
# For now, workers use a default agent configuration
|
|
627
663
|
_ = agent # Reserved for agent serialization
|
|
664
|
+
# WAA Docker image has client at /client (see Dockerfile-WinArena)
|
|
665
|
+
# The run.py script is at /client/run.py (not a module, so use python run.py)
|
|
628
666
|
return f"""
|
|
629
|
-
cd /
|
|
630
|
-
python
|
|
667
|
+
cd /client && \
|
|
668
|
+
python run.py \
|
|
631
669
|
--task_ids '{task_ids_json}' \
|
|
632
670
|
--max_steps {max_steps} \
|
|
633
671
|
--output_dir /outputs
|