openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/benchmarks/__init__.py +8 -0
- openadapt_ml/benchmarks/agent.py +90 -11
- openadapt_ml/benchmarks/azure.py +35 -6
- openadapt_ml/benchmarks/cli.py +4449 -201
- openadapt_ml/benchmarks/live_tracker.py +180 -0
- openadapt_ml/benchmarks/runner.py +41 -4
- openadapt_ml/benchmarks/viewer.py +1219 -0
- openadapt_ml/benchmarks/vm_monitor.py +610 -0
- openadapt_ml/benchmarks/waa.py +61 -4
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
- openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
- openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
- openadapt_ml/benchmarks/waa_live.py +619 -0
- openadapt_ml/cloud/local.py +1555 -1
- openadapt_ml/cloud/ssh_tunnel.py +553 -0
- openadapt_ml/datasets/next_action.py +87 -68
- openadapt_ml/evals/grounding.py +26 -8
- openadapt_ml/evals/trajectory_matching.py +84 -36
- openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
- openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
- openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
- openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
- openadapt_ml/experiments/waa_demo/__init__.py +10 -0
- openadapt_ml/experiments/waa_demo/demos.py +357 -0
- openadapt_ml/experiments/waa_demo/runner.py +717 -0
- openadapt_ml/experiments/waa_demo/tasks.py +151 -0
- openadapt_ml/export/__init__.py +9 -0
- openadapt_ml/export/__main__.py +6 -0
- openadapt_ml/export/cli.py +89 -0
- openadapt_ml/export/parquet.py +265 -0
- openadapt_ml/ingest/__init__.py +3 -4
- openadapt_ml/ingest/capture.py +89 -81
- openadapt_ml/ingest/loader.py +116 -68
- openadapt_ml/ingest/synthetic.py +221 -159
- openadapt_ml/retrieval/README.md +226 -0
- openadapt_ml/retrieval/USAGE.md +391 -0
- openadapt_ml/retrieval/__init__.py +91 -0
- openadapt_ml/retrieval/demo_retriever.py +817 -0
- openadapt_ml/retrieval/embeddings.py +629 -0
- openadapt_ml/retrieval/index.py +194 -0
- openadapt_ml/retrieval/retriever.py +160 -0
- openadapt_ml/runtime/policy.py +10 -10
- openadapt_ml/schema/__init__.py +104 -0
- openadapt_ml/schema/converters.py +541 -0
- openadapt_ml/schema/episode.py +457 -0
- openadapt_ml/scripts/compare.py +26 -16
- openadapt_ml/scripts/eval_policy.py +4 -5
- openadapt_ml/scripts/prepare_synthetic.py +14 -17
- openadapt_ml/scripts/train.py +81 -70
- openadapt_ml/training/benchmark_viewer.py +3225 -0
- openadapt_ml/training/trainer.py +120 -363
- openadapt_ml/training/trl_trainer.py +354 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
- openadapt_ml-0.2.0.dist-info/RECORD +86 -0
- openadapt_ml/schemas/__init__.py +0 -53
- openadapt_ml/schemas/sessions.py +0 -122
- openadapt_ml/schemas/validation.py +0 -252
- openadapt_ml-0.1.0.dist-info/RECORD +0 -55
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Demo formatting utilities for few-shot prompting."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from openadapt_ml.schema import Action, ActionType, Episode, Step
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def format_action(action: "Action") -> str:
|
|
13
|
+
"""Format an Action as a string for the prompt.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
action: Action to format.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
String representation like "CLICK(0.5, 0.3)" or "TYPE('hello')".
|
|
20
|
+
"""
|
|
21
|
+
# Get action type value (handle both enum and string)
|
|
22
|
+
action_type = action.type.value if hasattr(action.type, 'value') else action.type
|
|
23
|
+
|
|
24
|
+
if action_type == "click":
|
|
25
|
+
if action.normalized_coordinates is not None:
|
|
26
|
+
x, y = action.normalized_coordinates
|
|
27
|
+
return f"CLICK({x:.3f}, {y:.3f})"
|
|
28
|
+
return "CLICK()"
|
|
29
|
+
|
|
30
|
+
elif action_type == "double_click":
|
|
31
|
+
if action.normalized_coordinates is not None:
|
|
32
|
+
x, y = action.normalized_coordinates
|
|
33
|
+
return f"DOUBLE_CLICK({x:.3f}, {y:.3f})"
|
|
34
|
+
return "DOUBLE_CLICK()"
|
|
35
|
+
|
|
36
|
+
elif action_type == "type":
|
|
37
|
+
text = action.text or ""
|
|
38
|
+
# Escape quotes and truncate if very long
|
|
39
|
+
text = text.replace('"', '\\"')
|
|
40
|
+
if len(text) > 50:
|
|
41
|
+
text = text[:47] + "..."
|
|
42
|
+
return f'TYPE("{text}")'
|
|
43
|
+
|
|
44
|
+
elif action_type == "key":
|
|
45
|
+
key = action.key or "unknown"
|
|
46
|
+
if action.modifiers:
|
|
47
|
+
mods = "+".join(action.modifiers)
|
|
48
|
+
return f"KEY({mods}+{key})"
|
|
49
|
+
return f"KEY({key})"
|
|
50
|
+
|
|
51
|
+
elif action_type == "scroll":
|
|
52
|
+
direction = action.scroll_direction or "down"
|
|
53
|
+
return f"SCROLL({direction})"
|
|
54
|
+
|
|
55
|
+
elif action_type == "drag":
|
|
56
|
+
if action.normalized_coordinates is not None and action.normalized_end is not None:
|
|
57
|
+
x, y = action.normalized_coordinates
|
|
58
|
+
end_x, end_y = action.normalized_end
|
|
59
|
+
return f"DRAG({x:.3f}, {y:.3f}, {end_x:.3f}, {end_y:.3f})"
|
|
60
|
+
return "DRAG()"
|
|
61
|
+
|
|
62
|
+
else:
|
|
63
|
+
return f"{action_type.upper()}()"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def format_step(step: "Step", step_num: int) -> str:
|
|
67
|
+
"""Format a single step for the demo.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
step: Step to format.
|
|
71
|
+
step_num: Step number (1-indexed).
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Formatted step string.
|
|
75
|
+
"""
|
|
76
|
+
lines = [f"Step {step_num}:"]
|
|
77
|
+
|
|
78
|
+
# Add window context if available
|
|
79
|
+
if step.observation and step.observation.window_title:
|
|
80
|
+
lines.append(f" Window: {step.observation.window_title}")
|
|
81
|
+
|
|
82
|
+
# Add action
|
|
83
|
+
if step.action:
|
|
84
|
+
action_str = format_action(step.action)
|
|
85
|
+
lines.append(f" Action: {action_str}")
|
|
86
|
+
|
|
87
|
+
return "\n".join(lines)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def format_episode_as_demo(
|
|
91
|
+
episode: "Episode",
|
|
92
|
+
max_steps: int = 10,
|
|
93
|
+
include_screenshots: bool = False,
|
|
94
|
+
) -> str:
|
|
95
|
+
"""Convert an Episode to a few-shot demo format.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
episode: Episode containing the demonstration.
|
|
99
|
+
max_steps: Maximum number of steps to include.
|
|
100
|
+
include_screenshots: Whether to include screenshot paths (for multi-image).
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Formatted demo string for prompt injection.
|
|
104
|
+
"""
|
|
105
|
+
lines = [
|
|
106
|
+
"DEMONSTRATION:",
|
|
107
|
+
f"Task: {episode.instruction}",
|
|
108
|
+
"",
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
for i, step in enumerate(episode.steps[:max_steps], 1):
|
|
112
|
+
lines.append(format_step(step, i))
|
|
113
|
+
|
|
114
|
+
# Optionally include screenshot reference
|
|
115
|
+
if include_screenshots and step.observation and step.observation.screenshot_path:
|
|
116
|
+
lines.append(f" [Screenshot: {step.observation.screenshot_path}]")
|
|
117
|
+
|
|
118
|
+
lines.append("")
|
|
119
|
+
|
|
120
|
+
lines.append("---")
|
|
121
|
+
return "\n".join(lines)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def format_episode_verbose(
|
|
125
|
+
episode: "Episode",
|
|
126
|
+
max_steps: int = 10,
|
|
127
|
+
) -> str:
|
|
128
|
+
"""Format episode with more context per step.
|
|
129
|
+
|
|
130
|
+
Includes:
|
|
131
|
+
- Screen summary
|
|
132
|
+
- User intent (inferred)
|
|
133
|
+
- Action taken
|
|
134
|
+
- Observed result
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
episode: Episode to format.
|
|
138
|
+
max_steps: Maximum steps to include.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Verbose demo string.
|
|
142
|
+
"""
|
|
143
|
+
lines = [
|
|
144
|
+
"DEMONSTRATION:",
|
|
145
|
+
f"Goal: {episode.instruction}",
|
|
146
|
+
"",
|
|
147
|
+
"The following shows the step-by-step procedure:",
|
|
148
|
+
"",
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
for i, step in enumerate(episode.steps[:max_steps], 1):
|
|
152
|
+
lines.append(f"Step {i}:")
|
|
153
|
+
|
|
154
|
+
# Screen summary
|
|
155
|
+
if step.observation:
|
|
156
|
+
if step.observation.window_title:
|
|
157
|
+
lines.append(f" [Screen: {step.observation.window_title}]")
|
|
158
|
+
|
|
159
|
+
# Action taken
|
|
160
|
+
if step.action:
|
|
161
|
+
action_str = format_action(step.action)
|
|
162
|
+
lines.append(f" [Action: {action_str}]")
|
|
163
|
+
|
|
164
|
+
# Observed result (inferred from next step's observation)
|
|
165
|
+
if i < len(episode.steps):
|
|
166
|
+
next_step = episode.steps[i]
|
|
167
|
+
if next_step.observation and next_step.observation.window_title:
|
|
168
|
+
if (
|
|
169
|
+
not step.observation
|
|
170
|
+
or next_step.observation.window_title != step.observation.window_title
|
|
171
|
+
):
|
|
172
|
+
lines.append(f" [Result: Window changed to {next_step.observation.window_title}]")
|
|
173
|
+
|
|
174
|
+
lines.append("")
|
|
175
|
+
|
|
176
|
+
lines.append("---")
|
|
177
|
+
return "\n".join(lines)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def get_demo_screenshot_paths(
|
|
181
|
+
episode: "Episode",
|
|
182
|
+
max_steps: int = 10,
|
|
183
|
+
) -> list[str]:
|
|
184
|
+
"""Get screenshot paths from episode for multi-image prompting.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
episode: Episode to extract screenshots from.
|
|
188
|
+
max_steps: Maximum steps to include.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
List of screenshot paths.
|
|
192
|
+
"""
|
|
193
|
+
paths = []
|
|
194
|
+
for step in episode.steps[:max_steps]:
|
|
195
|
+
if step.observation and step.observation.screenshot_path:
|
|
196
|
+
path = step.observation.screenshot_path
|
|
197
|
+
if Path(path).exists():
|
|
198
|
+
paths.append(path)
|
|
199
|
+
return paths
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def generate_length_matched_control(demo: str) -> str:
|
|
203
|
+
"""Generate a control prompt with the same token count but no trajectory info.
|
|
204
|
+
|
|
205
|
+
Used to control for prompt length effects.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
demo: The demo string to match length of.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
Control string of similar length with irrelevant content.
|
|
212
|
+
"""
|
|
213
|
+
# Use generic placeholder text
|
|
214
|
+
placeholder = (
|
|
215
|
+
"This is placeholder text that serves as a control condition. "
|
|
216
|
+
"It contains no relevant information about the task or demonstration. "
|
|
217
|
+
"The purpose is to match the token count of the demonstration prompt. "
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Repeat to match approximate length
|
|
221
|
+
target_len = len(demo)
|
|
222
|
+
control = ""
|
|
223
|
+
while len(control) < target_len:
|
|
224
|
+
control += placeholder
|
|
225
|
+
|
|
226
|
+
return control[:target_len]
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
{
|
|
2
|
+
"timestamp": "20251231_002125",
|
|
3
|
+
"provider": "anthropic",
|
|
4
|
+
"screenshot": "/Users/abrichr/oa/src/openadapt-capture/turn-off-nightshift/screenshots/capture_31807990_step_0.png",
|
|
5
|
+
"demo": "DEMONSTRATION:\nGoal: Turn off Night Shift in macOS System Settings\n\nThe following shows the step-by-step procedure:\n\nStep 1:\n [Screen: Desktop with Terminal window visible]\n [Action: CLICK(0.01, 0.01) - Click Apple menu icon in top-left]\n [Result: Apple menu dropdown opened]\n\nStep 2:\n [Screen: Apple menu visible with options]\n [Action: CLICK on \"System Settings...\" menu item]\n [Result: System Settings application opened]\n\nStep 3:\n [Screen: System Settings window with sidebar]\n [Action: CLICK on \"Displays\" in the sidebar]\n [Result: Displays panel shown in main area]\n\nStep 4:\n [Screen: Displays panel showing display settings]\n [Action: CLICK on \"Night Shift...\" button]\n [Result: Night Shift popup/sheet appeared]\n\nStep 5:\n [Screen: Night Shift popup with Schedule dropdown]\n [Action: CLICK on Schedule dropdown, select \"Off\"]\n [Result: Night Shift schedule set to Off, Night Shift disabled]\n\n---",
|
|
6
|
+
"test_cases": [
|
|
7
|
+
{
|
|
8
|
+
"test_case": {
|
|
9
|
+
"name": "near_toggle",
|
|
10
|
+
"task": "Turn ON Night Shift in macOS System Settings",
|
|
11
|
+
"similarity": "near",
|
|
12
|
+
"notes": "Same procedure, just toggle to opposite state"
|
|
13
|
+
},
|
|
14
|
+
"results": {
|
|
15
|
+
"zero_shot": {
|
|
16
|
+
"action": "CLICK(20, 8)",
|
|
17
|
+
"response": "I need to turn ON Night Shift in macOS. Based on the terminal window showing an attempt to control Night Shift, I should access System Settings to properly enable it. Let me click on the Apple menu.\n\nACTION: CLICK(20, 8)",
|
|
18
|
+
"error": null
|
|
19
|
+
},
|
|
20
|
+
"with_demo": {
|
|
21
|
+
"action": "CLICK(20, 8)",
|
|
22
|
+
"response": "I need to turn ON Night Shift in macOS System Settings. Let me start by clicking on the Apple menu icon to access System Settings.\n\nACTION: CLICK(20, 8)",
|
|
23
|
+
"error": null
|
|
24
|
+
},
|
|
25
|
+
"control": {
|
|
26
|
+
"action": "CLICK(1243, 8)",
|
|
27
|
+
"response": "I need to analyze the screenshot to understand how to turn on Night Shift in macOS System Settings.\n\nThe screenshot shows a Terminal window with some command output. To access Night Shift settings in macOS, I need to open System Settings. I can see various system icons in the menu bar at the top of the screen.\n\nACTION: CLICK(1243, 8)",
|
|
28
|
+
"error": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"test_case": {
|
|
34
|
+
"name": "medium_same_panel",
|
|
35
|
+
"task": "Adjust Night Shift color temperature to warmer setting",
|
|
36
|
+
"similarity": "medium",
|
|
37
|
+
"notes": "Same navigation path, different final action"
|
|
38
|
+
},
|
|
39
|
+
"results": {
|
|
40
|
+
"zero_shot": {
|
|
41
|
+
"action": "CLICK(1218, 8)",
|
|
42
|
+
"response": "The task requires adjusting the Night Shift color temperature to a warmer setting. Looking at the screenshot, I can see a macOS Terminal window. The system menu bar at the top right contains various control icons. I need to find the Night Shift or display settings controls.\n\nACTION: CLICK(1218, 8)",
|
|
43
|
+
"error": null
|
|
44
|
+
},
|
|
45
|
+
"with_demo": {
|
|
46
|
+
"action": "CLICK(19, 8)",
|
|
47
|
+
"response": "I need to adjust the Night Shift color temperature to make it warmer. First, I should access the System Settings to find the Night Shift controls.\n\nACTION: CLICK(19, 8)",
|
|
48
|
+
"error": null
|
|
49
|
+
},
|
|
50
|
+
"control": {
|
|
51
|
+
"action": "CLICK(1114, 8)",
|
|
52
|
+
"response": "I need to analyze the screenshot and the goal to determine the next action.\n\nThe screenshot shows a Terminal window on macOS. The goal is to \"Adjust Night Shift color temperature to warmer setting.\"\n\nLooking at the top menu bar, I can see various system icons. To adjust Night Shift settings, I would need to access System Preferences or use the Control Center. I can see what appears to be system control icons in the top-right corner of the screen.\n\nThe most direct way to adjust Night Shift would be to click on the Control Center icon or the display settings in the menu bar.\n\nACTION: CLICK(1114, 8)",
|
|
53
|
+
"error": null
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"test_case": {
|
|
59
|
+
"name": "far_different_setting",
|
|
60
|
+
"task": "Turn on True Tone display in macOS System Settings",
|
|
61
|
+
"similarity": "far",
|
|
62
|
+
"notes": "Same app but different panel (True Tone is in Displays)"
|
|
63
|
+
},
|
|
64
|
+
"results": {
|
|
65
|
+
"zero_shot": {
|
|
66
|
+
"action": "CLICK(1217, 8)",
|
|
67
|
+
"response": "I need to access System Settings to turn on True Tone display. Let me analyze the screen to find how to access it.\n\nACTION: CLICK(1217, 8)",
|
|
68
|
+
"error": null
|
|
69
|
+
},
|
|
70
|
+
"with_demo": {
|
|
71
|
+
"action": "CLICK(20, 8)",
|
|
72
|
+
"response": "I need to turn on True Tone display in macOS System Settings. Let me start by clicking on the Apple menu in the top-left corner.\n\nACTION: CLICK(20, 8)",
|
|
73
|
+
"error": null
|
|
74
|
+
},
|
|
75
|
+
"control": {
|
|
76
|
+
"action": "CLICK(20, 8)",
|
|
77
|
+
"response": "I need to open System Settings to enable True Tone display. I'll click on the Apple menu in the top-left corner to access System Settings.\n\nACTION: CLICK(20, 8)",
|
|
78
|
+
"error": null
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
]
|
|
83
|
+
}
|