openadapt-ml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/baselines/__init__.py +121 -0
- openadapt_ml/baselines/adapter.py +185 -0
- openadapt_ml/baselines/cli.py +314 -0
- openadapt_ml/baselines/config.py +448 -0
- openadapt_ml/baselines/parser.py +922 -0
- openadapt_ml/baselines/prompts.py +787 -0
- openadapt_ml/benchmarks/__init__.py +13 -115
- openadapt_ml/benchmarks/agent.py +265 -421
- openadapt_ml/benchmarks/azure.py +28 -19
- openadapt_ml/benchmarks/azure_ops_tracker.py +521 -0
- openadapt_ml/benchmarks/cli.py +1722 -4847
- openadapt_ml/benchmarks/trace_export.py +631 -0
- openadapt_ml/benchmarks/viewer.py +22 -5
- openadapt_ml/benchmarks/vm_monitor.py +530 -29
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +47 -53
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +21 -20
- openadapt_ml/cloud/azure_inference.py +3 -5
- openadapt_ml/cloud/lambda_labs.py +722 -307
- openadapt_ml/cloud/local.py +2038 -487
- openadapt_ml/cloud/ssh_tunnel.py +68 -26
- openadapt_ml/datasets/next_action.py +40 -30
- openadapt_ml/evals/grounding.py +8 -3
- openadapt_ml/evals/plot_eval_metrics.py +15 -13
- openadapt_ml/evals/trajectory_matching.py +41 -26
- openadapt_ml/experiments/demo_prompt/format_demo.py +16 -6
- openadapt_ml/experiments/demo_prompt/run_experiment.py +26 -16
- openadapt_ml/experiments/representation_shootout/__init__.py +70 -0
- openadapt_ml/experiments/representation_shootout/conditions.py +708 -0
- openadapt_ml/experiments/representation_shootout/config.py +390 -0
- openadapt_ml/experiments/representation_shootout/evaluator.py +659 -0
- openadapt_ml/experiments/representation_shootout/runner.py +687 -0
- openadapt_ml/experiments/waa_demo/runner.py +29 -14
- openadapt_ml/export/parquet.py +36 -24
- openadapt_ml/grounding/detector.py +18 -14
- openadapt_ml/ingest/__init__.py +8 -6
- openadapt_ml/ingest/capture.py +25 -22
- openadapt_ml/ingest/loader.py +7 -4
- openadapt_ml/ingest/synthetic.py +189 -100
- openadapt_ml/models/api_adapter.py +14 -4
- openadapt_ml/models/base_adapter.py +10 -2
- openadapt_ml/models/providers/__init__.py +288 -0
- openadapt_ml/models/providers/anthropic.py +266 -0
- openadapt_ml/models/providers/base.py +299 -0
- openadapt_ml/models/providers/google.py +376 -0
- openadapt_ml/models/providers/openai.py +342 -0
- openadapt_ml/models/qwen_vl.py +46 -19
- openadapt_ml/perception/__init__.py +35 -0
- openadapt_ml/perception/integration.py +399 -0
- openadapt_ml/retrieval/demo_retriever.py +50 -24
- openadapt_ml/retrieval/embeddings.py +9 -8
- openadapt_ml/retrieval/retriever.py +3 -1
- openadapt_ml/runtime/__init__.py +50 -0
- openadapt_ml/runtime/policy.py +18 -5
- openadapt_ml/runtime/safety_gate.py +471 -0
- openadapt_ml/schema/__init__.py +9 -0
- openadapt_ml/schema/converters.py +74 -27
- openadapt_ml/schema/episode.py +31 -18
- openadapt_ml/scripts/capture_screenshots.py +530 -0
- openadapt_ml/scripts/compare.py +85 -54
- openadapt_ml/scripts/demo_policy.py +4 -1
- openadapt_ml/scripts/eval_policy.py +15 -9
- openadapt_ml/scripts/make_gif.py +1 -1
- openadapt_ml/scripts/prepare_synthetic.py +3 -1
- openadapt_ml/scripts/train.py +21 -9
- openadapt_ml/segmentation/README.md +920 -0
- openadapt_ml/segmentation/__init__.py +97 -0
- openadapt_ml/segmentation/adapters/__init__.py +5 -0
- openadapt_ml/segmentation/adapters/capture_adapter.py +420 -0
- openadapt_ml/segmentation/annotator.py +610 -0
- openadapt_ml/segmentation/cache.py +290 -0
- openadapt_ml/segmentation/cli.py +674 -0
- openadapt_ml/segmentation/deduplicator.py +656 -0
- openadapt_ml/segmentation/frame_describer.py +788 -0
- openadapt_ml/segmentation/pipeline.py +340 -0
- openadapt_ml/segmentation/schemas.py +622 -0
- openadapt_ml/segmentation/segment_extractor.py +634 -0
- openadapt_ml/training/azure_ops_viewer.py +1097 -0
- openadapt_ml/training/benchmark_viewer.py +52 -41
- openadapt_ml/training/shared_ui.py +7 -7
- openadapt_ml/training/stub_provider.py +57 -35
- openadapt_ml/training/trainer.py +143 -86
- openadapt_ml/training/trl_trainer.py +70 -21
- openadapt_ml/training/viewer.py +323 -108
- openadapt_ml/training/viewer_components.py +180 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/METADATA +215 -14
- openadapt_ml-0.2.1.dist-info/RECORD +116 -0
- openadapt_ml/benchmarks/base.py +0 -366
- openadapt_ml/benchmarks/data_collection.py +0 -432
- openadapt_ml/benchmarks/live_tracker.py +0 -180
- openadapt_ml/benchmarks/runner.py +0 -418
- openadapt_ml/benchmarks/waa.py +0 -761
- openadapt_ml/benchmarks/waa_live.py +0 -619
- openadapt_ml-0.2.0.dist-info/RECORD +0 -86
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.2.0.dist-info → openadapt_ml-0.2.1.dist-info}/licenses/LICENSE +0 -0
openadapt_ml/scripts/compare.py
CHANGED
|
@@ -17,9 +17,12 @@ from pathlib import Path
|
|
|
17
17
|
from typing import Any
|
|
18
18
|
|
|
19
19
|
from openadapt_ml.ingest.capture import capture_to_episode
|
|
20
|
-
from openadapt_ml.schema import Episode,
|
|
20
|
+
from openadapt_ml.schema import Episode, ActionType
|
|
21
21
|
from openadapt_ml.datasets.next_action import SYSTEM_PROMPT, format_action
|
|
22
|
-
from openadapt_ml.training.trainer import
|
|
22
|
+
from openadapt_ml.training.trainer import (
|
|
23
|
+
_get_shared_header_css,
|
|
24
|
+
_generate_shared_header_html,
|
|
25
|
+
)
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
def load_model(checkpoint_path: str | None, config_path: str | None = None):
|
|
@@ -50,6 +53,7 @@ def load_model(checkpoint_path: str | None, config_path: str | None = None):
|
|
|
50
53
|
except Exception as e:
|
|
51
54
|
print(f"Warning: Could not load model: {e}")
|
|
52
55
|
import traceback
|
|
56
|
+
|
|
53
57
|
traceback.print_exc()
|
|
54
58
|
return None
|
|
55
59
|
|
|
@@ -79,7 +83,9 @@ def predict_action(
|
|
|
79
83
|
history_text += f" {i}. {action_text}\n"
|
|
80
84
|
history_text += f"\nThis is step {step_index + 1} of {total_steps}. "
|
|
81
85
|
else:
|
|
82
|
-
history_text =
|
|
86
|
+
history_text = (
|
|
87
|
+
f"This is step 1 of {total_steps} (no actions completed yet). "
|
|
88
|
+
)
|
|
83
89
|
|
|
84
90
|
# Match training prompt format exactly
|
|
85
91
|
user_content = (
|
|
@@ -87,7 +93,7 @@ def predict_action(
|
|
|
87
93
|
f"{history_text}"
|
|
88
94
|
"Look at the screenshot and determine the NEXT action.\n\n"
|
|
89
95
|
"Thought: [what element to interact with and why]\n"
|
|
90
|
-
|
|
96
|
+
'Action: [CLICK(x=..., y=...) or TYPE(text="...") or WAIT() or DONE()]'
|
|
91
97
|
)
|
|
92
98
|
|
|
93
99
|
# Build sample in the format expected by the adapter
|
|
@@ -107,14 +113,20 @@ def predict_action(
|
|
|
107
113
|
|
|
108
114
|
# Try to extract coordinates from output
|
|
109
115
|
# Match patterns like: CLICK(x=0.42, y=0.31) or click at (0.42, 0.31)
|
|
110
|
-
click_match = re.search(
|
|
116
|
+
click_match = re.search(
|
|
117
|
+
r"CLICK\s*\(\s*x\s*=\s*([\d.]+)\s*,\s*y\s*=\s*([\d.]+)\s*\)",
|
|
118
|
+
result,
|
|
119
|
+
re.IGNORECASE,
|
|
120
|
+
)
|
|
111
121
|
if not click_match:
|
|
112
|
-
click_match = re.search(
|
|
122
|
+
click_match = re.search(
|
|
123
|
+
r"click.*?\(\s*([\d.]+)\s*,\s*([\d.]+)\s*\)", result, re.IGNORECASE
|
|
124
|
+
)
|
|
113
125
|
if not click_match:
|
|
114
126
|
# Try to find any two decimal numbers
|
|
115
|
-
nums = re.findall(r
|
|
127
|
+
nums = re.findall(r"(0\.\d+)", result)
|
|
116
128
|
if len(nums) >= 2:
|
|
117
|
-
click_match = type(
|
|
129
|
+
click_match = type("Match", (), {"group": lambda s, i: nums[i - 1]})()
|
|
118
130
|
|
|
119
131
|
if click_match:
|
|
120
132
|
action["x"] = float(click_match.group(1))
|
|
@@ -124,6 +136,7 @@ def predict_action(
|
|
|
124
136
|
return action
|
|
125
137
|
except Exception as e:
|
|
126
138
|
import traceback
|
|
139
|
+
|
|
127
140
|
traceback.print_exc()
|
|
128
141
|
return {"type": "error", "error": str(e)}
|
|
129
142
|
|
|
@@ -145,7 +158,11 @@ def generate_comparison_data(
|
|
|
145
158
|
action_x, action_y = None, None
|
|
146
159
|
if step.action.normalized_coordinates:
|
|
147
160
|
action_x, action_y = step.action.normalized_coordinates
|
|
148
|
-
action_type_str =
|
|
161
|
+
action_type_str = (
|
|
162
|
+
step.action.type.value
|
|
163
|
+
if isinstance(step.action.type, ActionType)
|
|
164
|
+
else step.action.type
|
|
165
|
+
)
|
|
149
166
|
step_data = {
|
|
150
167
|
"index": i,
|
|
151
168
|
"time": step.step_index,
|
|
@@ -204,7 +221,7 @@ def generate_comparison_html(
|
|
|
204
221
|
comparison_json = json.dumps(comparison_data)
|
|
205
222
|
|
|
206
223
|
# Add comparison panel above screenshot in main content
|
|
207
|
-
comparison_panel =
|
|
224
|
+
comparison_panel = """
|
|
208
225
|
<div class="comparison-panel" id="comparison-panel">
|
|
209
226
|
<div class="comparison-header">
|
|
210
227
|
<h2>Action Comparison</h2>
|
|
@@ -223,9 +240,9 @@ def generate_comparison_html(
|
|
|
223
240
|
<div class="match-indicator" id="match-indicator"></div>
|
|
224
241
|
</div>
|
|
225
242
|
</div>
|
|
226
|
-
|
|
243
|
+
"""
|
|
227
244
|
|
|
228
|
-
comparison_styles =
|
|
245
|
+
comparison_styles = """
|
|
229
246
|
<style>
|
|
230
247
|
/* Navigation bar */
|
|
231
248
|
.nav-bar {
|
|
@@ -432,9 +449,9 @@ def generate_comparison_html(
|
|
|
432
449
|
border-color: var(--accent);
|
|
433
450
|
}
|
|
434
451
|
</style>
|
|
435
|
-
|
|
452
|
+
"""
|
|
436
453
|
|
|
437
|
-
comparison_script = f
|
|
454
|
+
comparison_script = f"""
|
|
438
455
|
<script>
|
|
439
456
|
// Consolidated viewer script - all variables and functions in one scope
|
|
440
457
|
// Export to window for cross-script access (for checkpoint dropdown script)
|
|
@@ -714,32 +731,33 @@ def generate_comparison_html(
|
|
|
714
731
|
// Note: Nav is now injected via shared header HTML, no need for discoverDashboards()
|
|
715
732
|
}}, 100);
|
|
716
733
|
</script>
|
|
717
|
-
|
|
734
|
+
"""
|
|
718
735
|
|
|
719
736
|
# Insert into HTML
|
|
720
737
|
# Add shared header CSS and comparison styles before </head>
|
|
721
|
-
shared_header_css = f
|
|
722
|
-
html = base_html.replace(
|
|
738
|
+
shared_header_css = f"<style>{_get_shared_header_css()}</style>"
|
|
739
|
+
html = base_html.replace(
|
|
740
|
+
"</head>", shared_header_css + comparison_styles + "</head>"
|
|
741
|
+
)
|
|
723
742
|
|
|
724
743
|
# Add shared header HTML after container div
|
|
725
744
|
shared_header_html = _generate_shared_header_html("viewer")
|
|
726
745
|
html = html.replace(
|
|
727
|
-
'<div class="container">',
|
|
728
|
-
'<div class="container">\n' + shared_header_html
|
|
746
|
+
'<div class="container">', '<div class="container">\n' + shared_header_html
|
|
729
747
|
)
|
|
730
748
|
|
|
731
749
|
# Add comparison panel as full-width row BEFORE the main-content/sidebar flex row
|
|
732
750
|
# Insert right BEFORE <div class="main-content"> as a sibling
|
|
733
751
|
html = html.replace(
|
|
734
752
|
'<div class="main-content">',
|
|
735
|
-
comparison_panel + '\n <div class="main-content">'
|
|
753
|
+
comparison_panel + '\n <div class="main-content">',
|
|
736
754
|
)
|
|
737
755
|
|
|
738
756
|
# Add script before </body>
|
|
739
|
-
html = html.replace(
|
|
757
|
+
html = html.replace("</body>", comparison_script + "</body>")
|
|
740
758
|
|
|
741
759
|
# Write output
|
|
742
|
-
output_path.write_text(html, encoding=
|
|
760
|
+
output_path.write_text(html, encoding="utf-8")
|
|
743
761
|
print(f"Generated comparison viewer: {output_path}")
|
|
744
762
|
|
|
745
763
|
except ImportError:
|
|
@@ -752,20 +770,24 @@ def main():
|
|
|
752
770
|
description="Compare human actions vs model predictions on a capture."
|
|
753
771
|
)
|
|
754
772
|
parser.add_argument(
|
|
755
|
-
"--capture",
|
|
773
|
+
"--capture",
|
|
774
|
+
"-c",
|
|
756
775
|
required=True,
|
|
757
776
|
help="Path to openadapt-capture recording directory",
|
|
758
777
|
)
|
|
759
778
|
parser.add_argument(
|
|
760
|
-
"--checkpoint",
|
|
779
|
+
"--checkpoint",
|
|
780
|
+
"-m",
|
|
761
781
|
help="Path to trained model checkpoint (optional)",
|
|
762
782
|
)
|
|
763
783
|
parser.add_argument(
|
|
764
|
-
"--output",
|
|
784
|
+
"--output",
|
|
785
|
+
"-o",
|
|
765
786
|
help="Output HTML path (default: capture_dir/comparison.html)",
|
|
766
787
|
)
|
|
767
788
|
parser.add_argument(
|
|
768
|
-
"--goal",
|
|
789
|
+
"--goal",
|
|
790
|
+
"-g",
|
|
769
791
|
help="Task goal/description (auto-detected from capture if not provided)",
|
|
770
792
|
)
|
|
771
793
|
parser.add_argument(
|
|
@@ -797,7 +819,7 @@ def main():
|
|
|
797
819
|
matches = sum(1 for d in comparison_data if d.get("match") is True)
|
|
798
820
|
total = sum(1 for d in comparison_data if d.get("match") is not None)
|
|
799
821
|
if total > 0:
|
|
800
|
-
print(f"Match rate: {matches}/{total} ({100*matches/total:.1f}%)")
|
|
822
|
+
print(f"Match rate: {matches}/{total} ({100 * matches / total:.1f}%)")
|
|
801
823
|
|
|
802
824
|
# Generate HTML
|
|
803
825
|
output_path = Path(args.output) if args.output else capture_path / "comparison.html"
|
|
@@ -806,6 +828,7 @@ def main():
|
|
|
806
828
|
# Open in browser
|
|
807
829
|
if args.open:
|
|
808
830
|
import webbrowser
|
|
831
|
+
|
|
809
832
|
webbrowser.open(f"file://{output_path.absolute()}")
|
|
810
833
|
|
|
811
834
|
return 0
|
|
@@ -842,11 +865,13 @@ def generate_unified_viewer(
|
|
|
842
865
|
capture_id = capture_path.name if capture_path else "unknown"
|
|
843
866
|
|
|
844
867
|
if available_captures is None:
|
|
845
|
-
available_captures = [
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
868
|
+
available_captures = [
|
|
869
|
+
{
|
|
870
|
+
"id": capture_id,
|
|
871
|
+
"name": episode.instruction or "Untitled",
|
|
872
|
+
"steps": len(episode.steps),
|
|
873
|
+
}
|
|
874
|
+
]
|
|
850
875
|
|
|
851
876
|
# Prepare base capture data (human actions only, no predictions)
|
|
852
877
|
base_data = []
|
|
@@ -855,18 +880,24 @@ def generate_unified_viewer(
|
|
|
855
880
|
action_x, action_y = None, None
|
|
856
881
|
if step.action.normalized_coordinates:
|
|
857
882
|
action_x, action_y = step.action.normalized_coordinates
|
|
858
|
-
action_type_str =
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
"
|
|
866
|
-
"
|
|
867
|
-
"
|
|
868
|
-
|
|
869
|
-
|
|
883
|
+
action_type_str = (
|
|
884
|
+
step.action.type.value
|
|
885
|
+
if isinstance(step.action.type, ActionType)
|
|
886
|
+
else step.action.type
|
|
887
|
+
)
|
|
888
|
+
base_data.append(
|
|
889
|
+
{
|
|
890
|
+
"index": i,
|
|
891
|
+
"time": step.step_index,
|
|
892
|
+
"image_path": step.observation.screenshot_path,
|
|
893
|
+
"human_action": {
|
|
894
|
+
"type": action_type_str,
|
|
895
|
+
"x": action_x,
|
|
896
|
+
"y": action_y,
|
|
897
|
+
"text": step.action.text,
|
|
898
|
+
},
|
|
899
|
+
}
|
|
900
|
+
)
|
|
870
901
|
|
|
871
902
|
# JSON encode all data
|
|
872
903
|
base_data_json = json.dumps(base_data)
|
|
@@ -875,7 +906,7 @@ def generate_unified_viewer(
|
|
|
875
906
|
current_capture_json = json.dumps(capture_id)
|
|
876
907
|
|
|
877
908
|
# Unified viewer styles and controls
|
|
878
|
-
unified_styles =
|
|
909
|
+
unified_styles = """
|
|
879
910
|
<style>
|
|
880
911
|
/* Navigation bar */
|
|
881
912
|
.nav-bar {
|
|
@@ -1129,10 +1160,10 @@ def generate_unified_viewer(
|
|
|
1129
1160
|
border-color: var(--accent);
|
|
1130
1161
|
}
|
|
1131
1162
|
</style>
|
|
1132
|
-
|
|
1163
|
+
"""
|
|
1133
1164
|
|
|
1134
1165
|
# Comparison panel HTML
|
|
1135
|
-
comparison_panel =
|
|
1166
|
+
comparison_panel = """
|
|
1136
1167
|
<div class="viewer-controls" id="viewer-controls">
|
|
1137
1168
|
<div class="control-group">
|
|
1138
1169
|
<span class="control-label">Training Example:</span>
|
|
@@ -1162,10 +1193,10 @@ def generate_unified_viewer(
|
|
|
1162
1193
|
<div class="match-indicator" id="match-indicator"></div>
|
|
1163
1194
|
</div>
|
|
1164
1195
|
</div>
|
|
1165
|
-
|
|
1196
|
+
"""
|
|
1166
1197
|
|
|
1167
1198
|
# Unified viewer script
|
|
1168
|
-
unified_script = f
|
|
1199
|
+
unified_script = f"""
|
|
1169
1200
|
<script>
|
|
1170
1201
|
// Consolidated unified viewer script - all variables in one scope
|
|
1171
1202
|
// Data
|
|
@@ -1477,18 +1508,18 @@ def generate_unified_viewer(
|
|
|
1477
1508
|
updateComparison(currentIndex);
|
|
1478
1509
|
}}, 100);
|
|
1479
1510
|
</script>
|
|
1480
|
-
|
|
1511
|
+
"""
|
|
1481
1512
|
|
|
1482
1513
|
# Inject into HTML
|
|
1483
|
-
html = base_html.replace(
|
|
1514
|
+
html = base_html.replace("</head>", unified_styles + "</head>")
|
|
1484
1515
|
html = html.replace(
|
|
1485
1516
|
'<div class="main-content">',
|
|
1486
|
-
comparison_panel + '\n <div class="main-content">'
|
|
1517
|
+
comparison_panel + '\n <div class="main-content">',
|
|
1487
1518
|
)
|
|
1488
|
-
html = html.replace(
|
|
1519
|
+
html = html.replace("</body>", unified_script + "</body>")
|
|
1489
1520
|
|
|
1490
1521
|
# Write output
|
|
1491
|
-
output_path.write_text(html, encoding=
|
|
1522
|
+
output_path.write_text(html, encoding="utf-8")
|
|
1492
1523
|
print(f"Generated unified viewer: {output_path}")
|
|
1493
1524
|
|
|
1494
1525
|
except ImportError:
|
|
@@ -20,7 +20,9 @@ def main() -> None:
|
|
|
20
20
|
args = parser.parse_args()
|
|
21
21
|
|
|
22
22
|
# Use synthetic data to build one SFT-style sample
|
|
23
|
-
sessions = generate_synthetic_sessions(
|
|
23
|
+
sessions = generate_synthetic_sessions(
|
|
24
|
+
num_sessions=1, seed=99, output_dir="synthetic/demo"
|
|
25
|
+
)
|
|
24
26
|
episodes = [ep for sess in sessions for ep in sess.episodes]
|
|
25
27
|
samples = build_next_action_sft_samples(episodes)
|
|
26
28
|
|
|
@@ -58,5 +60,6 @@ def main() -> None:
|
|
|
58
60
|
print("State:", state)
|
|
59
61
|
print("Raw output:", raw_text)
|
|
60
62
|
|
|
63
|
+
|
|
61
64
|
if __name__ == "__main__":
|
|
62
65
|
main()
|
|
@@ -3,11 +3,11 @@ from __future__ import annotations
|
|
|
3
3
|
import argparse
|
|
4
4
|
import json
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Any, Dict,
|
|
6
|
+
from typing import Any, Dict, Optional
|
|
7
7
|
|
|
8
8
|
import yaml
|
|
9
9
|
|
|
10
|
-
from openadapt_ml.datasets.next_action import build_next_action_sft_samples
|
|
10
|
+
from openadapt_ml.datasets.next_action import build_next_action_sft_samples
|
|
11
11
|
from openadapt_ml.evals.trajectory_matching import evaluate_policy_on_episodes
|
|
12
12
|
from openadapt_ml.ingest.synthetic import generate_synthetic_episodes
|
|
13
13
|
from openadapt_ml.models.dummy_adapter import DummyAdapter
|
|
@@ -199,7 +199,9 @@ def main(
|
|
|
199
199
|
"mean_episode_step_score": metrics.mean_episode_step_score,
|
|
200
200
|
"weak_episode_success_rate": metrics.weak_episode_success_rate,
|
|
201
201
|
"state_success_rate": metrics.state_success_rate,
|
|
202
|
-
"element_accuracy": metrics.element_accuracy
|
|
202
|
+
"element_accuracy": metrics.element_accuracy
|
|
203
|
+
if hasattr(metrics, "element_accuracy")
|
|
204
|
+
else None,
|
|
203
205
|
},
|
|
204
206
|
}
|
|
205
207
|
out_path = Path(output_json)
|
|
@@ -210,8 +212,12 @@ def main(
|
|
|
210
212
|
|
|
211
213
|
|
|
212
214
|
if __name__ == "__main__":
|
|
213
|
-
parser = argparse.ArgumentParser(
|
|
214
|
-
|
|
215
|
+
parser = argparse.ArgumentParser(
|
|
216
|
+
description="Evaluate a policy on synthetic episodes."
|
|
217
|
+
)
|
|
218
|
+
parser.add_argument(
|
|
219
|
+
"--config", type=str, required=True, help="Path to YAML config file."
|
|
220
|
+
)
|
|
215
221
|
parser.add_argument(
|
|
216
222
|
"--backend",
|
|
217
223
|
type=str,
|
|
@@ -248,19 +254,19 @@ if __name__ == "__main__":
|
|
|
248
254
|
choices=["coord", "som"],
|
|
249
255
|
default="coord",
|
|
250
256
|
help="DSL mode: 'coord' for coordinate-based (CLICK(x=..., y=...)), "
|
|
251
|
-
|
|
257
|
+
"'som' for Set-of-Marks index-based (CLICK([1])). Default: coord.",
|
|
252
258
|
)
|
|
253
259
|
parser.add_argument(
|
|
254
260
|
"--overfit",
|
|
255
261
|
action="store_true",
|
|
256
262
|
help="Evaluate on training data to check memorization/overfitting. "
|
|
257
|
-
|
|
263
|
+
"If not set, generates fresh data to test generalization.",
|
|
258
264
|
)
|
|
259
265
|
parser.add_argument(
|
|
260
266
|
"--no-jitter",
|
|
261
267
|
action="store_true",
|
|
262
268
|
help="Disable jitter for deterministic UI layouts. "
|
|
263
|
-
|
|
269
|
+
"Useful for testing memorization of fixed layouts.",
|
|
264
270
|
)
|
|
265
271
|
parser.add_argument(
|
|
266
272
|
"--scenario",
|
|
@@ -268,7 +274,7 @@ if __name__ == "__main__":
|
|
|
268
274
|
choices=["login", "registration"],
|
|
269
275
|
default=None,
|
|
270
276
|
help="Scenario type: 'login' (6 steps, 3 elements) or 'registration' (12 steps, 6 elements). "
|
|
271
|
-
|
|
277
|
+
"Overrides config if provided.",
|
|
272
278
|
)
|
|
273
279
|
args = parser.parse_args()
|
|
274
280
|
|
openadapt_ml/scripts/make_gif.py
CHANGED
|
@@ -8,7 +8,9 @@ from openadapt_ml.ingest.synthetic import generate_synthetic_episodes
|
|
|
8
8
|
|
|
9
9
|
def main() -> None:
|
|
10
10
|
output_dir = Path("synthetic") / "debug"
|
|
11
|
-
episodes = generate_synthetic_episodes(
|
|
11
|
+
episodes = generate_synthetic_episodes(
|
|
12
|
+
num_episodes=2, seed=42, output_dir=output_dir
|
|
13
|
+
)
|
|
12
14
|
|
|
13
15
|
print(f"Generated {len(episodes)} episodes into {output_dir.resolve()}")
|
|
14
16
|
|
openadapt_ml/scripts/train.py
CHANGED
|
@@ -126,6 +126,7 @@ def main(
|
|
|
126
126
|
# Disable Unsloth if requested
|
|
127
127
|
if not use_unsloth:
|
|
128
128
|
import os
|
|
129
|
+
|
|
129
130
|
os.environ["OPENADAPT_DISABLE_UNSLOTH"] = "1"
|
|
130
131
|
|
|
131
132
|
base_path = Path(capture_path).parent if capture_path else None
|
|
@@ -142,6 +143,7 @@ def main(
|
|
|
142
143
|
# Open dashboard in browser if requested
|
|
143
144
|
if open_dashboard:
|
|
144
145
|
import webbrowser
|
|
146
|
+
|
|
145
147
|
dashboard_path = Path(output_dir) / "dashboard.html"
|
|
146
148
|
if dashboard_path.exists():
|
|
147
149
|
webbrowser.open(f"file://{dashboard_path.absolute()}")
|
|
@@ -153,22 +155,32 @@ if __name__ == "__main__":
|
|
|
153
155
|
parser = argparse.ArgumentParser(
|
|
154
156
|
description="Train Qwen-VL adapter on synthetic data or openadapt-capture recordings."
|
|
155
157
|
)
|
|
156
|
-
parser.add_argument(
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
parser.add_argument(
|
|
160
|
-
|
|
158
|
+
parser.add_argument(
|
|
159
|
+
"--config", type=str, required=True, help="Path to YAML config file."
|
|
160
|
+
)
|
|
161
|
+
parser.add_argument(
|
|
162
|
+
"--capture", type=str, help="Path to openadapt-capture recording directory."
|
|
163
|
+
)
|
|
164
|
+
parser.add_argument(
|
|
165
|
+
"--goal",
|
|
166
|
+
type=str,
|
|
167
|
+
help="Task goal/description (overrides recording's task description).",
|
|
168
|
+
)
|
|
169
|
+
parser.add_argument(
|
|
170
|
+
"--output-dir", type=str, help="Output directory for logs and dashboard."
|
|
171
|
+
)
|
|
172
|
+
parser.add_argument(
|
|
173
|
+
"--open", action="store_true", help="Open training dashboard in browser."
|
|
174
|
+
)
|
|
161
175
|
|
|
162
176
|
parser.add_argument(
|
|
163
177
|
"--use-unsloth",
|
|
164
178
|
action="store_true",
|
|
165
179
|
default=True,
|
|
166
|
-
help="Enable Unsloth optimizations (default)."
|
|
180
|
+
help="Enable Unsloth optimizations (default).",
|
|
167
181
|
)
|
|
168
182
|
parser.add_argument(
|
|
169
|
-
"--no-unsloth",
|
|
170
|
-
action="store_true",
|
|
171
|
-
help="Disable Unsloth optimizations."
|
|
183
|
+
"--no-unsloth", action="store_true", help="Disable Unsloth optimizations."
|
|
172
184
|
)
|
|
173
185
|
args = parser.parse_args()
|
|
174
186
|
|