openadapt-ml 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -115
- openadapt_ml/benchmarks/agent.py +265 -421
- openadapt_ml/benchmarks/azure.py +28 -19
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1722 -4847
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +22 -5
- openadapt_ml/benchmarks/vm_monitor.py +530 -29
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +2038 -487
- openadapt_ml/cloud/ssh_tunnel.py +68 -26
- openadapt_ml/datasets/next_action.py +40 -30
- openadapt_ml/evals/grounding.py +8 -3
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +41 -26
- openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
- openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/runner.py +29 -14
- openadapt_ml/export/parquet.py +36 -24
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +8 -6
- openadapt_ml/ingest/capture.py +25 -22
- openadapt_ml/ingest/loader.py +7 -4
- openadapt_ml/ingest/synthetic.py +189 -100
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/demo_retriever.py +50 -24
- openadapt_ml/retrieval/embeddings.py +9 -8
- openadapt_ml/retrieval/retriever.py +3 -1
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +18 -5
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +9 -0
- openadapt_ml/schema/converters.py +74 -27
- openadapt_ml/schema/episode.py +31 -18
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +85 -54
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +15 -9
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +3 -1
- openadapt_ml/scripts/train.py +21 -9
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +52 -41
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +143 -86
- openadapt_ml/training/trl_trainer.py +70 -21
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/METADATA +215 -14
- openadapt_ml-0.2.2.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/live_tracker.py +0 -180
- openadapt_ml/benchmarks/runner.py +0 -418
- openadapt_ml/benchmarks/waa.py +0 -761
- openadapt_ml/benchmarks/waa_live.py +0 -619
- openadapt_ml-0.2.0.dist-info/RECORD +0 -86
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
|
-
from openadapt_ml.schema import Action,
|
|
9
|
+
from openadapt_ml.schema import Action, Episode, Step
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def format_action(action: "Action") -> str:
|
|
@@ -19,7 +19,7 @@ def format_action(action: "Action") -> str:
|
|
|
19
19
|
String representation like "CLICK(0.5, 0.3)" or "TYPE('hello')".
|
|
20
20
|
"""
|
|
21
21
|
# Get action type value (handle both enum and string)
|
|
22
|
-
action_type = action.type.value if hasattr(action.type,
|
|
22
|
+
action_type = action.type.value if hasattr(action.type, "value") else action.type
|
|
23
23
|
|
|
24
24
|
if action_type == "click":
|
|
25
25
|
if action.normalized_coordinates is not None:
|
|
@@ -53,7 +53,10 @@ def format_action(action: "Action") -> str:
|
|
|
53
53
|
return f"SCROLL({direction})"
|
|
54
54
|
|
|
55
55
|
elif action_type == "drag":
|
|
56
|
-
if
|
|
56
|
+
if (
|
|
57
|
+
action.normalized_coordinates is not None
|
|
58
|
+
and action.normalized_end is not None
|
|
59
|
+
):
|
|
57
60
|
x, y = action.normalized_coordinates
|
|
58
61
|
end_x, end_y = action.normalized_end
|
|
59
62
|
return f"DRAG({x:.3f}, {y:.3f}, {end_x:.3f}, {end_y:.3f})"
|
|
@@ -112,7 +115,11 @@ def format_episode_as_demo(
|
|
|
112
115
|
lines.append(format_step(step, i))
|
|
113
116
|
|
|
114
117
|
# Optionally include screenshot reference
|
|
115
|
-
if
|
|
118
|
+
if (
|
|
119
|
+
include_screenshots
|
|
120
|
+
and step.observation
|
|
121
|
+
and step.observation.screenshot_path
|
|
122
|
+
):
|
|
116
123
|
lines.append(f" [Screenshot: {step.observation.screenshot_path}]")
|
|
117
124
|
|
|
118
125
|
lines.append("")
|
|
@@ -167,9 +174,12 @@ def format_episode_verbose(
|
|
|
167
174
|
if next_step.observation and next_step.observation.window_title:
|
|
168
175
|
if (
|
|
169
176
|
not step.observation
|
|
170
|
-
or next_step.observation.window_title
|
|
177
|
+
or next_step.observation.window_title
|
|
178
|
+
!= step.observation.window_title
|
|
171
179
|
):
|
|
172
|
-
lines.append(
|
|
180
|
+
lines.append(
|
|
181
|
+
f" [Result: Window changed to {next_step.observation.window_title}]"
|
|
182
|
+
)
|
|
173
183
|
|
|
174
184
|
lines.append("")
|
|
175
185
|
|
|
@@ -8,14 +8,12 @@ from __future__ import annotations
|
|
|
8
8
|
import argparse
|
|
9
9
|
import base64
|
|
10
10
|
import json
|
|
11
|
-
import sys
|
|
12
11
|
from dataclasses import dataclass, field
|
|
13
12
|
from datetime import datetime
|
|
14
13
|
from pathlib import Path
|
|
15
14
|
from typing import Any
|
|
16
15
|
|
|
17
16
|
from openadapt_ml.experiments.demo_prompt.format_demo import (
|
|
18
|
-
format_episode_as_demo,
|
|
19
17
|
format_episode_verbose,
|
|
20
18
|
generate_length_matched_control,
|
|
21
19
|
get_demo_screenshot_paths,
|
|
@@ -138,14 +136,16 @@ class DemoPromptExperiment:
|
|
|
138
136
|
if Path(path).exists():
|
|
139
137
|
with open(path, "rb") as f:
|
|
140
138
|
image_b64 = base64.b64encode(f.read()).decode("utf-8")
|
|
141
|
-
content.append(
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
"
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
139
|
+
content.append(
|
|
140
|
+
{
|
|
141
|
+
"type": "image",
|
|
142
|
+
"source": {
|
|
143
|
+
"type": "base64",
|
|
144
|
+
"media_type": "image/png",
|
|
145
|
+
"data": image_b64,
|
|
146
|
+
},
|
|
147
|
+
}
|
|
148
|
+
)
|
|
149
149
|
|
|
150
150
|
# Add text
|
|
151
151
|
content.append({"type": "text", "text": user_content})
|
|
@@ -158,7 +158,11 @@ class DemoPromptExperiment:
|
|
|
158
158
|
)
|
|
159
159
|
|
|
160
160
|
parts = getattr(response, "content", [])
|
|
161
|
-
texts = [
|
|
161
|
+
texts = [
|
|
162
|
+
getattr(p, "text", "")
|
|
163
|
+
for p in parts
|
|
164
|
+
if getattr(p, "type", "") == "text"
|
|
165
|
+
]
|
|
162
166
|
return "\n".join([t for t in texts if t]).strip()
|
|
163
167
|
|
|
164
168
|
elif self.provider == "openai":
|
|
@@ -170,10 +174,14 @@ class DemoPromptExperiment:
|
|
|
170
174
|
if Path(path).exists():
|
|
171
175
|
with open(path, "rb") as f:
|
|
172
176
|
image_b64 = base64.b64encode(f.read()).decode("utf-8")
|
|
173
|
-
user_content_parts.append(
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
+
user_content_parts.append(
|
|
178
|
+
{
|
|
179
|
+
"type": "image_url",
|
|
180
|
+
"image_url": {
|
|
181
|
+
"url": f"data:image/png;base64,{image_b64}"
|
|
182
|
+
},
|
|
183
|
+
}
|
|
184
|
+
)
|
|
177
185
|
|
|
178
186
|
# Add text
|
|
179
187
|
user_content_parts.append({"type": "text", "text": user_content})
|
|
@@ -446,7 +454,9 @@ def run_experiment(
|
|
|
446
454
|
output_path = Path(output_dir)
|
|
447
455
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
448
456
|
|
|
449
|
-
results_file =
|
|
457
|
+
results_file = (
|
|
458
|
+
output_path / f"results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
|
459
|
+
)
|
|
450
460
|
with open(results_file, "w") as f:
|
|
451
461
|
json.dump(
|
|
452
462
|
{
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Representation Shootout Experiment.
|
|
2
|
+
|
|
3
|
+
Compares three approaches for GUI action prediction under distribution drift:
|
|
4
|
+
|
|
5
|
+
- Condition A: Raw Coordinates - Direct coordinate regression
|
|
6
|
+
- Condition B: Coordinates + Visual Cues - Enhanced with markers and zoom
|
|
7
|
+
- Condition C: Marks (Element IDs) - Element classification using SoM
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
# Run full experiment
|
|
11
|
+
python -m openadapt_ml.experiments.representation_shootout.runner run
|
|
12
|
+
|
|
13
|
+
# Run specific condition
|
|
14
|
+
python -m openadapt_ml.experiments.representation_shootout.runner run --condition marks
|
|
15
|
+
|
|
16
|
+
# Evaluate under specific drift
|
|
17
|
+
python -m openadapt_ml.experiments.representation_shootout.runner eval --drift resolution
|
|
18
|
+
|
|
19
|
+
See docs/experiments/representation_shootout_design.md for full documentation.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from openadapt_ml.experiments.representation_shootout.config import (
|
|
23
|
+
ConditionConfig,
|
|
24
|
+
ConditionName,
|
|
25
|
+
DriftConfig,
|
|
26
|
+
DriftType,
|
|
27
|
+
ExperimentConfig,
|
|
28
|
+
MetricName,
|
|
29
|
+
)
|
|
30
|
+
from openadapt_ml.experiments.representation_shootout.conditions import (
|
|
31
|
+
ConditionBase,
|
|
32
|
+
CoordsCuesCondition,
|
|
33
|
+
MarksCondition,
|
|
34
|
+
RawCoordsCondition,
|
|
35
|
+
create_condition,
|
|
36
|
+
)
|
|
37
|
+
from openadapt_ml.experiments.representation_shootout.evaluator import (
|
|
38
|
+
DriftEvaluator,
|
|
39
|
+
EvaluationResult,
|
|
40
|
+
compute_metrics,
|
|
41
|
+
make_recommendation,
|
|
42
|
+
)
|
|
43
|
+
from openadapt_ml.experiments.representation_shootout.runner import (
|
|
44
|
+
ExperimentRunner,
|
|
45
|
+
run_experiment,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
__all__ = [
|
|
49
|
+
# Config
|
|
50
|
+
"ExperimentConfig",
|
|
51
|
+
"ConditionConfig",
|
|
52
|
+
"ConditionName",
|
|
53
|
+
"DriftConfig",
|
|
54
|
+
"DriftType",
|
|
55
|
+
"MetricName",
|
|
56
|
+
# Conditions
|
|
57
|
+
"ConditionBase",
|
|
58
|
+
"RawCoordsCondition",
|
|
59
|
+
"CoordsCuesCondition",
|
|
60
|
+
"MarksCondition",
|
|
61
|
+
"create_condition",
|
|
62
|
+
# Evaluator
|
|
63
|
+
"DriftEvaluator",
|
|
64
|
+
"EvaluationResult",
|
|
65
|
+
"compute_metrics",
|
|
66
|
+
"make_recommendation",
|
|
67
|
+
# Runner
|
|
68
|
+
"ExperimentRunner",
|
|
69
|
+
"run_experiment",
|
|
70
|
+
]
|