openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -115
- openadapt_ml/benchmarks/agent.py +265 -421
- openadapt_ml/benchmarks/azure.py +28 -19
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1722 -4847
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +22 -5
- openadapt_ml/benchmarks/vm_monitor.py +530 -29
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +2038 -487
- openadapt_ml/cloud/ssh_tunnel.py +68 -26
- openadapt_ml/datasets/next_action.py +40 -30
- openadapt_ml/evals/grounding.py +8 -3
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +41 -26
- openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
- openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/runner.py +29 -14
- openadapt_ml/export/parquet.py +36 -24
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +8 -6
- openadapt_ml/ingest/capture.py +25 -22
- openadapt_ml/ingest/loader.py +7 -4
- openadapt_ml/ingest/synthetic.py +189 -100
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/demo_retriever.py +50 -24
- openadapt_ml/retrieval/embeddings.py +9 -8
- openadapt_ml/retrieval/retriever.py +3 -1
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +18 -5
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +9 -0
- openadapt_ml/schema/converters.py +74 -27
- openadapt_ml/schema/episode.py +31 -18
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +85 -54
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +15 -9
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +3 -1
- openadapt_ml/scripts/train.py +21 -9
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +52 -41
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +143 -86
- openadapt_ml/training/trl_trainer.py +70 -21
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
- openadapt_ml-0.2.1.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/live_tracker.py +0 -180
- openadapt_ml/benchmarks/runner.py +0 -418
- openadapt_ml/benchmarks/waa.py +0 -761
- openadapt_ml/benchmarks/waa_live.py +0 -619
- openadapt_ml-0.2.0.dist-info/RECORD +0 -86
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
"""Benchmark viewer HTML generation.
|
|
2
2
|
|
|
3
|
+
.. deprecated::
|
|
4
|
+
This module is deprecated. Use ``openadapt_viewer`` instead::
|
|
5
|
+
|
|
6
|
+
from openadapt_viewer import generate_benchmark_viewer
|
|
7
|
+
|
|
8
|
+
The openadapt-viewer package is the canonical location for viewer code.
|
|
9
|
+
|
|
3
10
|
This module generates a standalone HTML viewer for benchmark results,
|
|
4
11
|
showing task list with pass/fail status, step-by-step replay of
|
|
5
12
|
benchmark executions, screenshots, actions, and reasoning at each step.
|
|
@@ -34,6 +41,7 @@ from __future__ import annotations
|
|
|
34
41
|
import base64
|
|
35
42
|
import json
|
|
36
43
|
import logging
|
|
44
|
+
import warnings
|
|
37
45
|
from pathlib import Path
|
|
38
46
|
from typing import Any
|
|
39
47
|
|
|
@@ -42,6 +50,13 @@ from openadapt_ml.training.shared_ui import (
|
|
|
42
50
|
generate_shared_header_html as _generate_shared_header_html,
|
|
43
51
|
)
|
|
44
52
|
|
|
53
|
+
warnings.warn(
|
|
54
|
+
"openadapt_ml.benchmarks.viewer is deprecated. "
|
|
55
|
+
"Use openadapt_viewer instead: from openadapt_viewer import generate_benchmark_viewer",
|
|
56
|
+
DeprecationWarning,
|
|
57
|
+
stacklevel=2,
|
|
58
|
+
)
|
|
59
|
+
|
|
45
60
|
logger = logging.getLogger(__name__)
|
|
46
61
|
|
|
47
62
|
|
|
@@ -133,7 +148,9 @@ def load_task_results(benchmark_dir: Path) -> list[dict[str, Any]]:
|
|
|
133
148
|
screenshots_dir = task_dir / "screenshots"
|
|
134
149
|
if screenshots_dir.exists():
|
|
135
150
|
screenshot_paths = sorted(screenshots_dir.glob("*.png"))
|
|
136
|
-
task_data["screenshots"] = [
|
|
151
|
+
task_data["screenshots"] = [
|
|
152
|
+
str(p.relative_to(benchmark_dir)) for p in screenshot_paths
|
|
153
|
+
]
|
|
137
154
|
else:
|
|
138
155
|
task_data["screenshots"] = []
|
|
139
156
|
|
|
@@ -294,7 +311,7 @@ def _generate_benchmark_viewer_html(
|
|
|
294
311
|
num_success = sum(1 for t in tasks if t.get("execution", {}).get("success", False))
|
|
295
312
|
success_rate = (num_success / num_tasks * 100) if num_tasks > 0 else 0
|
|
296
313
|
|
|
297
|
-
html = f
|
|
314
|
+
html = f"""<!DOCTYPE html>
|
|
298
315
|
<html lang="en">
|
|
299
316
|
<head>
|
|
300
317
|
<meta charset="UTF-8">
|
|
@@ -785,7 +802,7 @@ def _generate_benchmark_viewer_html(
|
|
|
785
802
|
<div class="stat-label">Failed</div>
|
|
786
803
|
</div>
|
|
787
804
|
<div class="stat-card">
|
|
788
|
-
<div class="stat-value {
|
|
805
|
+
<div class="stat-value {"success" if success_rate >= 50 else "error"}">{success_rate:.1f}%</div>
|
|
789
806
|
<div class="stat-label">Success Rate</div>
|
|
790
807
|
</div>
|
|
791
808
|
</div>
|
|
@@ -838,7 +855,7 @@ def _generate_benchmark_viewer_html(
|
|
|
838
855
|
const summary = {summary_json};
|
|
839
856
|
const domainStats = {domain_stats_json};
|
|
840
857
|
const tasks = {tasks_json};
|
|
841
|
-
const embedScreenshots = {
|
|
858
|
+
const embedScreenshots = {"true" if embed_screenshots else "false"};
|
|
842
859
|
|
|
843
860
|
let currentTaskIndex = -1;
|
|
844
861
|
let currentStepIndex = 0;
|
|
@@ -1214,6 +1231,6 @@ def _generate_benchmark_viewer_html(
|
|
|
1214
1231
|
</script>
|
|
1215
1232
|
</body>
|
|
1216
1233
|
</html>
|
|
1217
|
-
|
|
1234
|
+
"""
|
|
1218
1235
|
|
|
1219
1236
|
return html
|