openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -115
- openadapt_ml/benchmarks/agent.py +265 -421
- openadapt_ml/benchmarks/azure.py +28 -19
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1722 -4847
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +22 -5
- openadapt_ml/benchmarks/vm_monitor.py +530 -29
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +2038 -487
- openadapt_ml/cloud/ssh_tunnel.py +68 -26
- openadapt_ml/datasets/next_action.py +40 -30
- openadapt_ml/evals/grounding.py +8 -3
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +41 -26
- openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
- openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/runner.py +29 -14
- openadapt_ml/export/parquet.py +36 -24
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +8 -6
- openadapt_ml/ingest/capture.py +25 -22
- openadapt_ml/ingest/loader.py +7 -4
- openadapt_ml/ingest/synthetic.py +189 -100
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/demo_retriever.py +50 -24
- openadapt_ml/retrieval/embeddings.py +9 -8
- openadapt_ml/retrieval/retriever.py +3 -1
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +18 -5
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +9 -0
- openadapt_ml/schema/converters.py +74 -27
- openadapt_ml/schema/episode.py +31 -18
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +85 -54
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +15 -9
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +3 -1
- openadapt_ml/scripts/train.py +21 -9
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +52 -41
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +143 -86
- openadapt_ml/training/trl_trainer.py +70 -21
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
- openadapt_ml-0.2.1.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/live_tracker.py +0 -180
- openadapt_ml/benchmarks/runner.py +0 -418
- openadapt_ml/benchmarks/waa.py +0 -761
- openadapt_ml/benchmarks/waa_live.py +0 -619
- openadapt_ml-0.2.0.dist-info/RECORD +0 -86
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
This module provides reusable classes for monitoring Windows VMs running WAA.
|
|
4
4
|
Can be used by the viewer, CLI, or as a standalone tool.
|
|
5
5
|
|
|
6
|
+
Enhanced with Azure ML job tracking, cost estimation, and activity detection.
|
|
7
|
+
|
|
6
8
|
Usage:
|
|
7
9
|
# Monitor a single VM
|
|
8
10
|
from openadapt_ml.benchmarks.vm_monitor import VMMonitor, VMConfig
|
|
@@ -21,6 +23,14 @@ Usage:
|
|
|
21
23
|
|
|
22
24
|
# Or run continuous monitoring
|
|
23
25
|
monitor.run_monitor(callback=lambda s: print(s))
|
|
26
|
+
|
|
27
|
+
# Fetch Azure ML jobs
|
|
28
|
+
jobs = fetch_azure_ml_jobs(days=7)
|
|
29
|
+
print(f"Found {len(jobs)} jobs in last 7 days")
|
|
30
|
+
|
|
31
|
+
# Calculate VM costs
|
|
32
|
+
costs = calculate_vm_costs(vm_size="Standard_D4ds_v5", hours=2.5)
|
|
33
|
+
print(f"Estimated cost: ${costs['total_cost_usd']:.2f}")
|
|
24
34
|
"""
|
|
25
35
|
|
|
26
36
|
from __future__ import annotations
|
|
@@ -29,12 +39,15 @@ import json
|
|
|
29
39
|
import subprocess
|
|
30
40
|
import time
|
|
31
41
|
from dataclasses import dataclass, field, asdict
|
|
32
|
-
from datetime import datetime
|
|
42
|
+
from datetime import datetime, timedelta
|
|
33
43
|
from pathlib import Path
|
|
34
44
|
from typing import Callable
|
|
35
45
|
import urllib.request
|
|
36
46
|
import urllib.error
|
|
37
47
|
import socket
|
|
48
|
+
import logging
|
|
49
|
+
|
|
50
|
+
logger = logging.getLogger(__name__)
|
|
38
51
|
|
|
39
52
|
|
|
40
53
|
@dataclass
|
|
@@ -105,9 +118,10 @@ class VMMonitor:
|
|
|
105
118
|
self.timeout = timeout
|
|
106
119
|
|
|
107
120
|
def check_vnc(self) -> bool:
|
|
108
|
-
"""Check if VNC port is reachable."""
|
|
121
|
+
"""Check if VNC port is reachable via SSH tunnel (localhost)."""
|
|
109
122
|
try:
|
|
110
|
-
|
|
123
|
+
# VNC is only accessible via SSH tunnel at localhost, not the public IP
|
|
124
|
+
url = f"http://localhost:{self.config.vnc_port}/"
|
|
111
125
|
req = urllib.request.Request(url, method="HEAD")
|
|
112
126
|
with urllib.request.urlopen(req, timeout=self.timeout):
|
|
113
127
|
return True
|
|
@@ -120,9 +134,12 @@ class VMMonitor:
|
|
|
120
134
|
result = subprocess.run(
|
|
121
135
|
[
|
|
122
136
|
"ssh",
|
|
123
|
-
"-o",
|
|
124
|
-
"
|
|
125
|
-
"-o",
|
|
137
|
+
"-o",
|
|
138
|
+
"StrictHostKeyChecking=no",
|
|
139
|
+
"-o",
|
|
140
|
+
f"ConnectTimeout={self.timeout}",
|
|
141
|
+
"-o",
|
|
142
|
+
"BatchMode=yes",
|
|
126
143
|
f"{self.config.ssh_user}@{self.config.ssh_host}",
|
|
127
144
|
"echo ok",
|
|
128
145
|
],
|
|
@@ -145,9 +162,12 @@ class VMMonitor:
|
|
|
145
162
|
result = subprocess.run(
|
|
146
163
|
[
|
|
147
164
|
"ssh",
|
|
148
|
-
"-o",
|
|
149
|
-
"
|
|
150
|
-
"-o",
|
|
165
|
+
"-o",
|
|
166
|
+
"StrictHostKeyChecking=no",
|
|
167
|
+
"-o",
|
|
168
|
+
f"ConnectTimeout={self.timeout}",
|
|
169
|
+
"-o",
|
|
170
|
+
"BatchMode=yes",
|
|
151
171
|
f"{self.config.ssh_user}@{self.config.ssh_host}",
|
|
152
172
|
cmd,
|
|
153
173
|
],
|
|
@@ -173,9 +193,12 @@ class VMMonitor:
|
|
|
173
193
|
result = subprocess.run(
|
|
174
194
|
[
|
|
175
195
|
"ssh",
|
|
176
|
-
"-o",
|
|
177
|
-
"
|
|
178
|
-
"-o",
|
|
196
|
+
"-o",
|
|
197
|
+
"StrictHostKeyChecking=no",
|
|
198
|
+
"-o",
|
|
199
|
+
f"ConnectTimeout={self.timeout}",
|
|
200
|
+
"-o",
|
|
201
|
+
"BatchMode=yes",
|
|
179
202
|
f"{self.config.ssh_user}@{self.config.ssh_host}",
|
|
180
203
|
cmd,
|
|
181
204
|
],
|
|
@@ -191,9 +214,12 @@ class VMMonitor:
|
|
|
191
214
|
log_result = subprocess.run(
|
|
192
215
|
[
|
|
193
216
|
"ssh",
|
|
194
|
-
"-o",
|
|
195
|
-
"
|
|
196
|
-
"-o",
|
|
217
|
+
"-o",
|
|
218
|
+
"StrictHostKeyChecking=no",
|
|
219
|
+
"-o",
|
|
220
|
+
f"ConnectTimeout={self.timeout}",
|
|
221
|
+
"-o",
|
|
222
|
+
"BatchMode=yes",
|
|
197
223
|
f"{self.config.ssh_user}@{self.config.ssh_host}",
|
|
198
224
|
log_cmd,
|
|
199
225
|
],
|
|
@@ -220,9 +246,12 @@ class VMMonitor:
|
|
|
220
246
|
result = subprocess.run(
|
|
221
247
|
[
|
|
222
248
|
"ssh",
|
|
223
|
-
"-o",
|
|
224
|
-
"
|
|
225
|
-
"-o",
|
|
249
|
+
"-o",
|
|
250
|
+
"StrictHostKeyChecking=no",
|
|
251
|
+
"-o",
|
|
252
|
+
f"ConnectTimeout={self.timeout}",
|
|
253
|
+
"-o",
|
|
254
|
+
"BatchMode=yes",
|
|
226
255
|
f"{self.config.ssh_user}@{self.config.ssh_host}",
|
|
227
256
|
cmd,
|
|
228
257
|
],
|
|
@@ -233,7 +262,7 @@ class VMMonitor:
|
|
|
233
262
|
if result.returncode == 0 and result.stdout.strip():
|
|
234
263
|
try:
|
|
235
264
|
bytes_size = int(result.stdout.strip())
|
|
236
|
-
return round(bytes_size / (1024
|
|
265
|
+
return round(bytes_size / (1024**3), 2)
|
|
237
266
|
except ValueError:
|
|
238
267
|
continue
|
|
239
268
|
return None
|
|
@@ -257,7 +286,9 @@ class VMMonitor:
|
|
|
257
286
|
|
|
258
287
|
if status.ssh_reachable:
|
|
259
288
|
# Check container
|
|
260
|
-
status.container_running, status.container_logs =
|
|
289
|
+
status.container_running, status.container_logs = (
|
|
290
|
+
self.get_container_status()
|
|
291
|
+
)
|
|
261
292
|
|
|
262
293
|
# Check WAA probe
|
|
263
294
|
status.waa_ready, status.waa_probe_response = self.check_waa_probe()
|
|
@@ -412,7 +443,9 @@ class VMPoolRegistry:
|
|
|
412
443
|
resource_group=resource_group,
|
|
413
444
|
location=location,
|
|
414
445
|
vm_size=vm_size,
|
|
415
|
-
workers=[
|
|
446
|
+
workers=[
|
|
447
|
+
PoolWorker(name=name, ip=ip, status="ready") for name, ip in workers
|
|
448
|
+
],
|
|
416
449
|
)
|
|
417
450
|
self.save()
|
|
418
451
|
return self._pool
|
|
@@ -468,7 +501,9 @@ class VMPoolRegistry:
|
|
|
468
501
|
class VMRegistry:
|
|
469
502
|
"""Manage a registry of VMs and their status."""
|
|
470
503
|
|
|
471
|
-
def __init__(
|
|
504
|
+
def __init__(
|
|
505
|
+
self, registry_file: str | Path = "benchmark_results/vm_registry.json"
|
|
506
|
+
):
|
|
472
507
|
"""Initialize registry.
|
|
473
508
|
|
|
474
509
|
Args:
|
|
@@ -546,17 +581,23 @@ def main():
|
|
|
546
581
|
parser.add_argument("--host", help="SSH host")
|
|
547
582
|
parser.add_argument("--user", default="azureuser", help="SSH user")
|
|
548
583
|
parser.add_argument("--container", default="winarena", help="Docker container name")
|
|
549
|
-
parser.add_argument(
|
|
584
|
+
parser.add_argument(
|
|
585
|
+
"--interval", type=int, default=30, help="Check interval in seconds"
|
|
586
|
+
)
|
|
550
587
|
parser.add_argument("--output", help="Output file for status updates (JSON lines)")
|
|
551
588
|
parser.add_argument("--list", action="store_true", help="List all registered VMs")
|
|
552
|
-
parser.add_argument(
|
|
589
|
+
parser.add_argument(
|
|
590
|
+
"--check-all", action="store_true", help="Check all registered VMs"
|
|
591
|
+
)
|
|
553
592
|
|
|
554
593
|
args = parser.parse_args()
|
|
555
594
|
|
|
556
595
|
if args.list:
|
|
557
596
|
registry = VMRegistry()
|
|
558
597
|
for vm in registry.list():
|
|
559
|
-
print(
|
|
598
|
+
print(
|
|
599
|
+
f" {vm.name}: {vm.ssh_user}@{vm.ssh_host} (container: {vm.docker_container})"
|
|
600
|
+
)
|
|
560
601
|
return
|
|
561
602
|
|
|
562
603
|
if args.check_all:
|
|
@@ -586,12 +627,14 @@ def main():
|
|
|
586
627
|
ts = datetime.now().strftime("%H:%M:%S")
|
|
587
628
|
waa_str = "READY!" if status.waa_ready else "not ready"
|
|
588
629
|
disk_str = f"{status.disk_usage_gb}GB" if status.disk_usage_gb else "?"
|
|
589
|
-
print(
|
|
590
|
-
|
|
591
|
-
|
|
630
|
+
print(
|
|
631
|
+
f"[{ts}] SSH: {'✓' if status.ssh_reachable else '✗'} | "
|
|
632
|
+
f"VNC: {'✓' if status.vnc_reachable else '✗'} | "
|
|
633
|
+
f"WAA: {waa_str} | Disk: {disk_str}"
|
|
634
|
+
)
|
|
592
635
|
if status.container_logs:
|
|
593
636
|
# Show last log line
|
|
594
|
-
last_line = status.container_logs.split(
|
|
637
|
+
last_line = status.container_logs.split("\n")[-1][:80]
|
|
595
638
|
print(f" Log: {last_line}")
|
|
596
639
|
|
|
597
640
|
print(f"Monitoring {args.host}... (Ctrl+C to stop)")
|
|
@@ -606,5 +649,463 @@ def main():
|
|
|
606
649
|
print("\nMonitoring stopped.")
|
|
607
650
|
|
|
608
651
|
|
|
652
|
+
# ============================================================================
|
|
653
|
+
# Azure ML Job Tracking
|
|
654
|
+
# ============================================================================
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
@dataclass
|
|
658
|
+
class AzureMLJob:
|
|
659
|
+
"""Represents an Azure ML job."""
|
|
660
|
+
|
|
661
|
+
job_id: str
|
|
662
|
+
display_name: str
|
|
663
|
+
status: str # running, completed, failed, canceled
|
|
664
|
+
created_at: str
|
|
665
|
+
compute_target: str | None = None
|
|
666
|
+
duration_minutes: float | None = None
|
|
667
|
+
cost_usd: float | None = None
|
|
668
|
+
azure_dashboard_url: str | None = None
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def fetch_azure_ml_jobs(
|
|
672
|
+
resource_group: str = "openadapt-agents",
|
|
673
|
+
workspace_name: str = "openadapt-ml",
|
|
674
|
+
days: int = 7,
|
|
675
|
+
max_results: int = 20,
|
|
676
|
+
) -> list[AzureMLJob]:
|
|
677
|
+
"""Fetch recent Azure ML jobs.
|
|
678
|
+
|
|
679
|
+
Args:
|
|
680
|
+
resource_group: Azure resource group name.
|
|
681
|
+
workspace_name: Azure ML workspace name.
|
|
682
|
+
days: Number of days to look back.
|
|
683
|
+
max_results: Maximum number of jobs to return.
|
|
684
|
+
|
|
685
|
+
Returns:
|
|
686
|
+
List of AzureMLJob objects, sorted by creation time (newest first).
|
|
687
|
+
"""
|
|
688
|
+
try:
|
|
689
|
+
result = subprocess.run(
|
|
690
|
+
[
|
|
691
|
+
"az",
|
|
692
|
+
"ml",
|
|
693
|
+
"job",
|
|
694
|
+
"list",
|
|
695
|
+
"--resource-group",
|
|
696
|
+
resource_group,
|
|
697
|
+
"--workspace-name",
|
|
698
|
+
workspace_name,
|
|
699
|
+
"--query",
|
|
700
|
+
"[].{name:name,display_name:display_name,status:status,created_at:creation_context.created_at,compute:compute}",
|
|
701
|
+
"-o",
|
|
702
|
+
"json",
|
|
703
|
+
],
|
|
704
|
+
capture_output=True,
|
|
705
|
+
text=True,
|
|
706
|
+
timeout=30,
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
if result.returncode != 0:
|
|
710
|
+
logger.error(f"Azure CLI error: {result.stderr}")
|
|
711
|
+
return []
|
|
712
|
+
|
|
713
|
+
jobs_raw = json.loads(result.stdout)
|
|
714
|
+
|
|
715
|
+
# Filter by date
|
|
716
|
+
cutoff_date = datetime.now() - timedelta(days=days)
|
|
717
|
+
jobs = []
|
|
718
|
+
|
|
719
|
+
for job in jobs_raw[:max_results]:
|
|
720
|
+
created_at = job.get("created_at", "")
|
|
721
|
+
try:
|
|
722
|
+
# Parse ISO format: 2026-01-17T10:30:00Z
|
|
723
|
+
job_date = datetime.fromisoformat(
|
|
724
|
+
created_at.replace("Z", "+00:00")
|
|
725
|
+
if created_at
|
|
726
|
+
else datetime.now().isoformat()
|
|
727
|
+
)
|
|
728
|
+
if job_date < cutoff_date.replace(tzinfo=job_date.tzinfo):
|
|
729
|
+
continue
|
|
730
|
+
except (ValueError, AttributeError):
|
|
731
|
+
# If date parsing fails, include the job
|
|
732
|
+
pass
|
|
733
|
+
|
|
734
|
+
# Calculate duration for completed jobs
|
|
735
|
+
duration_minutes = None
|
|
736
|
+
status = job.get("status", "unknown").lower()
|
|
737
|
+
|
|
738
|
+
# Build Azure dashboard URL
|
|
739
|
+
subscription_id = get_azure_subscription_id()
|
|
740
|
+
wsid = f"/subscriptions/{subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.MachineLearningServices/workspaces/{workspace_name}"
|
|
741
|
+
dashboard_url = (
|
|
742
|
+
f"https://ml.azure.com/runs/{job.get('name', '')}?wsid={wsid}"
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
jobs.append(
|
|
746
|
+
AzureMLJob(
|
|
747
|
+
job_id=job.get("name", "unknown"),
|
|
748
|
+
display_name=job.get("display_name", ""),
|
|
749
|
+
status=status,
|
|
750
|
+
created_at=created_at,
|
|
751
|
+
compute_target=job.get("compute", None),
|
|
752
|
+
duration_minutes=duration_minutes,
|
|
753
|
+
azure_dashboard_url=dashboard_url,
|
|
754
|
+
)
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
return jobs
|
|
758
|
+
|
|
759
|
+
except Exception as e:
|
|
760
|
+
logger.error(f"Error fetching Azure ML jobs: {e}")
|
|
761
|
+
return []
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
def get_azure_subscription_id() -> str:
|
|
765
|
+
"""Get the current Azure subscription ID."""
|
|
766
|
+
try:
|
|
767
|
+
result = subprocess.run(
|
|
768
|
+
["az", "account", "show", "--query", "id", "-o", "tsv"],
|
|
769
|
+
capture_output=True,
|
|
770
|
+
text=True,
|
|
771
|
+
timeout=10,
|
|
772
|
+
)
|
|
773
|
+
if result.returncode == 0:
|
|
774
|
+
return result.stdout.strip()
|
|
775
|
+
except Exception:
|
|
776
|
+
pass
|
|
777
|
+
return "unknown"
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
# ============================================================================
|
|
781
|
+
# Cost Tracking
|
|
782
|
+
# ============================================================================
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
@dataclass
|
|
786
|
+
class VMCostEstimate:
|
|
787
|
+
"""Estimated costs for VM usage."""
|
|
788
|
+
|
|
789
|
+
vm_size: str
|
|
790
|
+
hourly_rate_usd: float
|
|
791
|
+
hours_elapsed: float
|
|
792
|
+
cost_usd: float
|
|
793
|
+
cost_per_hour_usd: float
|
|
794
|
+
cost_per_day_usd: float
|
|
795
|
+
cost_per_week_usd: float
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
# Azure VM pricing (US East, as of Jan 2025)
|
|
799
|
+
VM_PRICING = {
|
|
800
|
+
"Standard_D2_v3": 0.096,
|
|
801
|
+
"Standard_D4_v3": 0.192,
|
|
802
|
+
"Standard_D8_v3": 0.384,
|
|
803
|
+
"Standard_D4s_v3": 0.192,
|
|
804
|
+
"Standard_D8s_v3": 0.384,
|
|
805
|
+
"Standard_D4ds_v5": 0.192,
|
|
806
|
+
"Standard_D8ds_v5": 0.384,
|
|
807
|
+
"Standard_D16ds_v5": 0.768,
|
|
808
|
+
"Standard_D32ds_v5": 1.536,
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def calculate_vm_costs(
|
|
813
|
+
vm_size: str, hours: float, hourly_rate_override: float | None = None
|
|
814
|
+
) -> VMCostEstimate:
|
|
815
|
+
"""Calculate VM cost estimates.
|
|
816
|
+
|
|
817
|
+
Args:
|
|
818
|
+
vm_size: Azure VM size (e.g., "Standard_D4ds_v5").
|
|
819
|
+
hours: Number of hours the VM has been running.
|
|
820
|
+
hourly_rate_override: Override default hourly rate (for custom pricing).
|
|
821
|
+
|
|
822
|
+
Returns:
|
|
823
|
+
VMCostEstimate with cost breakdown.
|
|
824
|
+
"""
|
|
825
|
+
hourly_rate = hourly_rate_override or VM_PRICING.get(vm_size, 0.20)
|
|
826
|
+
cost_usd = hourly_rate * hours
|
|
827
|
+
|
|
828
|
+
return VMCostEstimate(
|
|
829
|
+
vm_size=vm_size,
|
|
830
|
+
hourly_rate_usd=hourly_rate,
|
|
831
|
+
hours_elapsed=hours,
|
|
832
|
+
cost_usd=cost_usd,
|
|
833
|
+
cost_per_hour_usd=hourly_rate,
|
|
834
|
+
cost_per_day_usd=hourly_rate * 24,
|
|
835
|
+
cost_per_week_usd=hourly_rate * 24 * 7,
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def get_vm_uptime_hours(
|
|
840
|
+
resource_group: str, vm_name: str, check_actual_state: bool = True
|
|
841
|
+
) -> float:
|
|
842
|
+
"""Get VM uptime in hours.
|
|
843
|
+
|
|
844
|
+
Args:
|
|
845
|
+
resource_group: Azure resource group.
|
|
846
|
+
vm_name: VM name.
|
|
847
|
+
check_actual_state: If True, check if VM is actually running.
|
|
848
|
+
|
|
849
|
+
Returns:
|
|
850
|
+
Hours since VM started, or 0 if VM is not running.
|
|
851
|
+
"""
|
|
852
|
+
try:
|
|
853
|
+
# Get VM creation time or last start time
|
|
854
|
+
result = subprocess.run(
|
|
855
|
+
[
|
|
856
|
+
"az",
|
|
857
|
+
"vm",
|
|
858
|
+
"show",
|
|
859
|
+
"-d",
|
|
860
|
+
"-g",
|
|
861
|
+
resource_group,
|
|
862
|
+
"-n",
|
|
863
|
+
vm_name,
|
|
864
|
+
"--query",
|
|
865
|
+
"{powerState:powerState}",
|
|
866
|
+
"-o",
|
|
867
|
+
"json",
|
|
868
|
+
],
|
|
869
|
+
capture_output=True,
|
|
870
|
+
text=True,
|
|
871
|
+
timeout=10,
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
if result.returncode != 0:
|
|
875
|
+
return 0.0
|
|
876
|
+
|
|
877
|
+
info = json.loads(result.stdout)
|
|
878
|
+
power_state = info.get("powerState", "")
|
|
879
|
+
|
|
880
|
+
# Check if VM is running
|
|
881
|
+
if check_actual_state and "running" not in power_state.lower():
|
|
882
|
+
return 0.0
|
|
883
|
+
|
|
884
|
+
# Try to get activity logs for last start time
|
|
885
|
+
result = subprocess.run(
|
|
886
|
+
[
|
|
887
|
+
"az",
|
|
888
|
+
"monitor",
|
|
889
|
+
"activity-log",
|
|
890
|
+
"list",
|
|
891
|
+
"--resource-group",
|
|
892
|
+
resource_group,
|
|
893
|
+
"--resource-id",
|
|
894
|
+
f"/subscriptions/{get_azure_subscription_id()}/resourceGroups/{resource_group}/providers/Microsoft.Compute/virtualMachines/{vm_name}",
|
|
895
|
+
"--query",
|
|
896
|
+
"[?operationName.localizedValue=='Start Virtual Machine' || operationName.localizedValue=='Create or Update Virtual Machine'].eventTimestamp | [0]",
|
|
897
|
+
"-o",
|
|
898
|
+
"tsv",
|
|
899
|
+
],
|
|
900
|
+
capture_output=True,
|
|
901
|
+
text=True,
|
|
902
|
+
timeout=15,
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
906
|
+
start_time_str = result.stdout.strip()
|
|
907
|
+
start_time = datetime.fromisoformat(start_time_str.replace("Z", "+00:00"))
|
|
908
|
+
elapsed = datetime.now(start_time.tzinfo) - start_time
|
|
909
|
+
return elapsed.total_seconds() / 3600
|
|
910
|
+
|
|
911
|
+
# Fallback: assume started 1 hour ago if we can't determine
|
|
912
|
+
return 1.0
|
|
913
|
+
|
|
914
|
+
except Exception as e:
|
|
915
|
+
logger.debug(f"Error getting VM uptime: {e}")
|
|
916
|
+
return 0.0
|
|
917
|
+
|
|
918
|
+
|
|
919
|
+
# ============================================================================
|
|
920
|
+
# VM Activity Detection
|
|
921
|
+
# ============================================================================
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
@dataclass
|
|
925
|
+
class VMActivity:
|
|
926
|
+
"""Current VM activity information."""
|
|
927
|
+
|
|
928
|
+
is_active: bool
|
|
929
|
+
activity_type: str # idle, benchmark_running, training, setup, unknown
|
|
930
|
+
description: str
|
|
931
|
+
benchmark_progress: dict | None = None # If benchmark is running
|
|
932
|
+
last_action_time: str | None = None
|
|
933
|
+
|
|
934
|
+
|
|
935
|
+
def detect_vm_activity(
|
|
936
|
+
ip: str,
|
|
937
|
+
ssh_user: str = "azureuser",
|
|
938
|
+
docker_container: str = "winarena",
|
|
939
|
+
internal_ip: str = "localhost", # WAA server bound to localhost via Docker port forward
|
|
940
|
+
) -> VMActivity:
|
|
941
|
+
"""Detect what the VM is currently doing.
|
|
942
|
+
|
|
943
|
+
Args:
|
|
944
|
+
ip: VM IP address.
|
|
945
|
+
ssh_user: SSH username.
|
|
946
|
+
docker_container: Docker container name.
|
|
947
|
+
internal_ip: Internal IP for WAA server.
|
|
948
|
+
|
|
949
|
+
Returns:
|
|
950
|
+
VMActivity with current activity information.
|
|
951
|
+
"""
|
|
952
|
+
try:
|
|
953
|
+
# Check if container is running
|
|
954
|
+
result = subprocess.run(
|
|
955
|
+
[
|
|
956
|
+
"ssh",
|
|
957
|
+
"-o",
|
|
958
|
+
"StrictHostKeyChecking=no",
|
|
959
|
+
"-o",
|
|
960
|
+
"ConnectTimeout=5",
|
|
961
|
+
f"{ssh_user}@{ip}",
|
|
962
|
+
f"docker ps -q -f name={docker_container}",
|
|
963
|
+
],
|
|
964
|
+
capture_output=True,
|
|
965
|
+
text=True,
|
|
966
|
+
timeout=10,
|
|
967
|
+
)
|
|
968
|
+
|
|
969
|
+
if result.returncode != 0 or not result.stdout.strip():
|
|
970
|
+
return VMActivity(
|
|
971
|
+
is_active=False,
|
|
972
|
+
activity_type="idle",
|
|
973
|
+
description="Container not running",
|
|
974
|
+
)
|
|
975
|
+
|
|
976
|
+
# Check WAA probe for benchmark status
|
|
977
|
+
result = subprocess.run(
|
|
978
|
+
[
|
|
979
|
+
"ssh",
|
|
980
|
+
"-o",
|
|
981
|
+
"StrictHostKeyChecking=no",
|
|
982
|
+
"-o",
|
|
983
|
+
"ConnectTimeout=5",
|
|
984
|
+
f"{ssh_user}@{ip}",
|
|
985
|
+
f"curl -s --connect-timeout 3 http://{internal_ip}:5000/probe",
|
|
986
|
+
],
|
|
987
|
+
capture_output=True,
|
|
988
|
+
text=True,
|
|
989
|
+
timeout=10,
|
|
990
|
+
)
|
|
991
|
+
|
|
992
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
993
|
+
probe_response = result.stdout.strip()
|
|
994
|
+
try:
|
|
995
|
+
probe_data = json.loads(probe_response)
|
|
996
|
+
# WAA is ready and responsive - check if benchmark is actually running
|
|
997
|
+
# by looking for python processes (Navi agent or our client)
|
|
998
|
+
python_check = subprocess.run(
|
|
999
|
+
[
|
|
1000
|
+
"ssh",
|
|
1001
|
+
"-o",
|
|
1002
|
+
"StrictHostKeyChecking=no",
|
|
1003
|
+
"-o",
|
|
1004
|
+
"ConnectTimeout=5",
|
|
1005
|
+
f"{ssh_user}@{ip}",
|
|
1006
|
+
f"docker exec {docker_container} pgrep -f 'python.*run' 2>/dev/null | head -1",
|
|
1007
|
+
],
|
|
1008
|
+
capture_output=True,
|
|
1009
|
+
text=True,
|
|
1010
|
+
timeout=10,
|
|
1011
|
+
)
|
|
1012
|
+
is_running = bool(python_check.stdout.strip())
|
|
1013
|
+
|
|
1014
|
+
return VMActivity(
|
|
1015
|
+
is_active=is_running,
|
|
1016
|
+
activity_type="benchmark_running" if is_running else "idle",
|
|
1017
|
+
description="WAA benchmark running"
|
|
1018
|
+
if is_running
|
|
1019
|
+
else "WAA ready - idle",
|
|
1020
|
+
benchmark_progress=probe_data,
|
|
1021
|
+
)
|
|
1022
|
+
except json.JSONDecodeError:
|
|
1023
|
+
# Got response but not JSON - maybe setup phase
|
|
1024
|
+
return VMActivity(
|
|
1025
|
+
is_active=True,
|
|
1026
|
+
activity_type="setup",
|
|
1027
|
+
description="WAA starting up",
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
# Container running but WAA not ready
|
|
1031
|
+
return VMActivity(
|
|
1032
|
+
is_active=True,
|
|
1033
|
+
activity_type="setup",
|
|
1034
|
+
description="Windows VM booting or WAA initializing",
|
|
1035
|
+
)
|
|
1036
|
+
|
|
1037
|
+
except Exception as e:
|
|
1038
|
+
logger.debug(f"Error detecting VM activity: {e}")
|
|
1039
|
+
return VMActivity(
|
|
1040
|
+
is_active=False,
|
|
1041
|
+
activity_type="unknown",
|
|
1042
|
+
description=f"Error checking activity: {str(e)[:100]}",
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
|
|
1046
|
+
# ============================================================================
|
|
1047
|
+
# Evaluation History
|
|
1048
|
+
# ============================================================================
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
@dataclass
|
|
1052
|
+
class EvaluationRun:
|
|
1053
|
+
"""Historical evaluation run."""
|
|
1054
|
+
|
|
1055
|
+
run_id: str
|
|
1056
|
+
started_at: str
|
|
1057
|
+
completed_at: str | None
|
|
1058
|
+
num_tasks: int
|
|
1059
|
+
success_rate: float | None
|
|
1060
|
+
agent_type: str
|
|
1061
|
+
status: str # running, completed, failed
|
|
1062
|
+
|
|
1063
|
+
|
|
1064
|
+
def get_evaluation_history(
|
|
1065
|
+
results_dir: Path | str = "benchmark_results", max_runs: int = 10
|
|
1066
|
+
) -> list[EvaluationRun]:
|
|
1067
|
+
"""Get history of evaluation runs from results directory.
|
|
1068
|
+
|
|
1069
|
+
Args:
|
|
1070
|
+
results_dir: Path to benchmark results directory.
|
|
1071
|
+
max_runs: Maximum number of runs to return.
|
|
1072
|
+
|
|
1073
|
+
Returns:
|
|
1074
|
+
List of EvaluationRun objects, sorted by start time (newest first).
|
|
1075
|
+
"""
|
|
1076
|
+
results_path = Path(results_dir)
|
|
1077
|
+
if not results_path.exists():
|
|
1078
|
+
return []
|
|
1079
|
+
|
|
1080
|
+
runs = []
|
|
1081
|
+
|
|
1082
|
+
# Look for run directories or result files
|
|
1083
|
+
for item in sorted(results_path.iterdir(), reverse=True):
|
|
1084
|
+
if item.is_dir():
|
|
1085
|
+
# Check for summary.json or similar
|
|
1086
|
+
summary_file = item / "summary.json"
|
|
1087
|
+
if summary_file.exists():
|
|
1088
|
+
try:
|
|
1089
|
+
summary = json.loads(summary_file.read_text())
|
|
1090
|
+
runs.append(
|
|
1091
|
+
EvaluationRun(
|
|
1092
|
+
run_id=item.name,
|
|
1093
|
+
started_at=summary.get("started_at", "unknown"),
|
|
1094
|
+
completed_at=summary.get("completed_at", None),
|
|
1095
|
+
num_tasks=summary.get("num_tasks", 0),
|
|
1096
|
+
success_rate=summary.get("success_rate", None),
|
|
1097
|
+
agent_type=summary.get("agent_type", "unknown"),
|
|
1098
|
+
status=summary.get("status", "completed"),
|
|
1099
|
+
)
|
|
1100
|
+
)
|
|
1101
|
+
except (json.JSONDecodeError, KeyError):
|
|
1102
|
+
continue
|
|
1103
|
+
|
|
1104
|
+
if len(runs) >= max_runs:
|
|
1105
|
+
break
|
|
1106
|
+
|
|
1107
|
+
return runs
|
|
1108
|
+
|
|
1109
|
+
|
|
609
1110
|
if __name__ == "__main__":
|
|
610
1111
|
main()
|