openadapt-types 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ publish:
10
+ runs-on: ubuntu-latest
11
+ environment: release
12
+ permissions:
13
+ id-token: write
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: astral-sh/setup-uv@v4
17
+ - run: uv build
18
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,16 @@
1
+ name: test
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: astral-sh/setup-uv@v4
15
+ - run: uv sync --extra dev
16
+ - run: uv run pytest
@@ -0,0 +1,9 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .venv/
5
+ dist/
6
+ build/
7
+ *.egg-info/
8
+ .pytest_cache/
9
+ .env
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 OpenAdapt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: openadapt-types
3
+ Version: 0.1.0
4
+ Summary: Canonical Pydantic schemas for computer-use agents: ComputerState, Action, ActionResult, UINode
5
+ Project-URL: Homepage, https://github.com/OpenAdaptAI/openadapt-types
6
+ Project-URL: Repository, https://github.com/OpenAdaptAI/openadapt-types
7
+ Project-URL: Issues, https://github.com/OpenAdaptAI/openadapt-types/issues
8
+ Author-email: OpenAdapt <admin@openadapt.ai>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: agents,computer-use,gui-automation,pydantic,schemas
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: pydantic>=2.0
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest-cov; extra == 'dev'
26
+ Requires-Dist: pytest>=7.0; extra == 'dev'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # openadapt-types
30
+
31
+ Canonical Pydantic schemas for computer-use agents.
32
+
33
+ ```
34
+ pip install openadapt-types
35
+ ```
36
+
37
+ ## What's in the box
38
+
39
+ | Schema | Purpose |
40
+ |--------|---------|
41
+ | `ComputerState` | Screen state: screenshot + UI element graph + window context |
42
+ | `UINode` | Single UI element with role, bbox, hierarchy, platform anchors |
43
+ | `Action` | Agent action with typed action space + flexible targeting |
44
+ | `ActionTarget` | Where to act: `node_id` > `description` > `(x, y)` coordinates |
45
+ | `ActionResult` | Execution outcome with error taxonomy + state delta |
46
+ | `Episode` / `Step` | Complete task trajectory (observation → action → result) |
47
+ | `FailureRecord` | Classified failure for dataset pipelines |
48
+
49
+ ## Quick start
50
+
51
+ ```python
52
+ from openadapt_types import (
53
+ Action, ActionTarget, ActionType,
54
+ ComputerState, UINode, BoundingBox,
55
+ )
56
+
57
+ # Describe what's on screen
58
+ state = ComputerState(
59
+ viewport=(1920, 1080),
60
+ nodes=[
61
+ UINode(node_id="n0", role="window", name="My App", children_ids=["n1"]),
62
+ UINode(node_id="n1", role="button", name="Submit", parent_id="n0",
63
+ bbox=BoundingBox(x=500, y=400, width=100, height=40)),
64
+ ],
65
+ )
66
+
67
+ # Agent decides what to do
68
+ action = Action(
69
+ type=ActionType.CLICK,
70
+ target=ActionTarget(node_id="n1"),
71
+ reasoning="Click Submit to proceed",
72
+ )
73
+
74
+ # Render element tree for LLM prompts
75
+ print(state.to_text_tree())
76
+ # [n0] window: My App
77
+ # [n1] button: Submit
78
+ ```
79
+
80
+ ## Action targeting
81
+
82
+ `ActionTarget` supports three grounding strategies (in priority order):
83
+
84
+ ```python
85
+ # 1. Element-based (preferred — most robust)
86
+ ActionTarget(node_id="n1")
87
+
88
+ # 2. Description-based (resolved by grounding module)
89
+ ActionTarget(description="the blue submit button")
90
+
91
+ # 3. Coordinate-based (fallback)
92
+ ActionTarget(x=550, y=420)
93
+ ActionTarget(x=0.29, y=0.39, is_normalized=True)
94
+ ```
95
+
96
+ Agents SHOULD produce `node_id` or `description`. The runtime resolves to coordinates.
97
+
98
+ ## Compatibility with existing schemas
99
+
100
+ Converters for three existing OpenAdapt schema formats:
101
+
102
+ ```python
103
+ from openadapt_types._compat import (
104
+ from_benchmark_observation, # openadapt-evals BenchmarkObservation
105
+ from_benchmark_action, # openadapt-evals BenchmarkAction
106
+ from_ml_observation, # openadapt-ml Observation
107
+ from_ml_action, # openadapt-ml Action
108
+ from_omnimcp_screen_state, # omnimcp ScreenState
109
+ from_omnimcp_action_decision, # omnimcp ActionDecision
110
+ )
111
+
112
+ # Convert existing data
113
+ state = from_benchmark_observation(obs.__dict__)
114
+ action = from_benchmark_action(act.__dict__)
115
+ ```
116
+
117
+ ## JSON Schema
118
+
119
+ Export for language-agnostic tooling:
120
+
121
+ ```python
122
+ import json
123
+ from openadapt_types import ComputerState, Action, Episode
124
+
125
+ # Get JSON Schema
126
+ schema = ComputerState.model_json_schema()
127
+ print(json.dumps(schema, indent=2))
128
+ ```
129
+
130
+ ## Design principles
131
+
132
+ - **Pydantic v2** — runtime validation, JSON Schema export, fast serialization
133
+ - **Pixels + structure** — always capture both visual and semantic UI state
134
+ - **Node graph** — full element tree, not just focused element
135
+ - **Platform-agnostic** — same schema for Windows, macOS, Linux, web
136
+ - **Extension-friendly** — `raw`, `attributes`, `metadata` fields everywhere
137
+ - **Backward compatible** — `_compat` converters for gradual migration
138
+
139
+ ## Dependencies
140
+
141
+ Just `pydantic>=2.0`. No ML libraries, no heavy deps.
142
+
143
+ ## License
144
+
145
+ MIT
@@ -0,0 +1,117 @@
1
+ # openadapt-types
2
+
3
+ Canonical Pydantic schemas for computer-use agents.
4
+
5
+ ```
6
+ pip install openadapt-types
7
+ ```
8
+
9
+ ## What's in the box
10
+
11
+ | Schema | Purpose |
12
+ |--------|---------|
13
+ | `ComputerState` | Screen state: screenshot + UI element graph + window context |
14
+ | `UINode` | Single UI element with role, bbox, hierarchy, platform anchors |
15
+ | `Action` | Agent action with typed action space + flexible targeting |
16
+ | `ActionTarget` | Where to act: `node_id` > `description` > `(x, y)` coordinates |
17
+ | `ActionResult` | Execution outcome with error taxonomy + state delta |
18
+ | `Episode` / `Step` | Complete task trajectory (observation → action → result) |
19
+ | `FailureRecord` | Classified failure for dataset pipelines |
20
+
21
+ ## Quick start
22
+
23
+ ```python
24
+ from openadapt_types import (
25
+ Action, ActionTarget, ActionType,
26
+ ComputerState, UINode, BoundingBox,
27
+ )
28
+
29
+ # Describe what's on screen
30
+ state = ComputerState(
31
+ viewport=(1920, 1080),
32
+ nodes=[
33
+ UINode(node_id="n0", role="window", name="My App", children_ids=["n1"]),
34
+ UINode(node_id="n1", role="button", name="Submit", parent_id="n0",
35
+ bbox=BoundingBox(x=500, y=400, width=100, height=40)),
36
+ ],
37
+ )
38
+
39
+ # Agent decides what to do
40
+ action = Action(
41
+ type=ActionType.CLICK,
42
+ target=ActionTarget(node_id="n1"),
43
+ reasoning="Click Submit to proceed",
44
+ )
45
+
46
+ # Render element tree for LLM prompts
47
+ print(state.to_text_tree())
48
+ # [n0] window: My App
49
+ # [n1] button: Submit
50
+ ```
51
+
52
+ ## Action targeting
53
+
54
+ `ActionTarget` supports three grounding strategies (in priority order):
55
+
56
+ ```python
57
+ # 1. Element-based (preferred — most robust)
58
+ ActionTarget(node_id="n1")
59
+
60
+ # 2. Description-based (resolved by grounding module)
61
+ ActionTarget(description="the blue submit button")
62
+
63
+ # 3. Coordinate-based (fallback)
64
+ ActionTarget(x=550, y=420)
65
+ ActionTarget(x=0.29, y=0.39, is_normalized=True)
66
+ ```
67
+
68
+ Agents SHOULD produce `node_id` or `description`. The runtime resolves to coordinates.
69
+
70
+ ## Compatibility with existing schemas
71
+
72
+ Converters for three existing OpenAdapt schema formats:
73
+
74
+ ```python
75
+ from openadapt_types._compat import (
76
+ from_benchmark_observation, # openadapt-evals BenchmarkObservation
77
+ from_benchmark_action, # openadapt-evals BenchmarkAction
78
+ from_ml_observation, # openadapt-ml Observation
79
+ from_ml_action, # openadapt-ml Action
80
+ from_omnimcp_screen_state, # omnimcp ScreenState
81
+ from_omnimcp_action_decision, # omnimcp ActionDecision
82
+ )
83
+
84
+ # Convert existing data
85
+ state = from_benchmark_observation(obs.__dict__)
86
+ action = from_benchmark_action(act.__dict__)
87
+ ```
88
+
89
+ ## JSON Schema
90
+
91
+ Export for language-agnostic tooling:
92
+
93
+ ```python
94
+ import json
95
+ from openadapt_types import ComputerState, Action, Episode
96
+
97
+ # Get JSON Schema
98
+ schema = ComputerState.model_json_schema()
99
+ print(json.dumps(schema, indent=2))
100
+ ```
101
+
102
+ ## Design principles
103
+
104
+ - **Pydantic v2** — runtime validation, JSON Schema export, fast serialization
105
+ - **Pixels + structure** — always capture both visual and semantic UI state
106
+ - **Node graph** — full element tree, not just focused element
107
+ - **Platform-agnostic** — same schema for Windows, macOS, Linux, web
108
+ - **Extension-friendly** — `raw`, `attributes`, `metadata` fields everywhere
109
+ - **Backward compatible** — `_compat` converters for gradual migration
110
+
111
+ ## Dependencies
112
+
113
+ Just `pydantic>=2.0`. No ML libraries, no heavy deps.
114
+
115
+ ## License
116
+
117
+ MIT
@@ -0,0 +1,59 @@
1
+ """openadapt-types: Canonical Pydantic schemas for computer-use agents.
2
+
3
+ This package provides the shared type definitions used across the OpenAdapt
4
+ ecosystem and designed for adoption by any computer-use agent project.
5
+
6
+ Quick start::
7
+
8
+ from openadapt_types import ComputerState, Action, ActionType, UINode
9
+
10
+ state = ComputerState(
11
+ viewport=(1920, 1080),
12
+ nodes=[
13
+ UINode(node_id="n0", role="button", name="Submit"),
14
+ ],
15
+ )
16
+
17
+ action = Action(
18
+ type=ActionType.CLICK,
19
+ target=ActionTarget(node_id="n0"),
20
+ )
21
+ """
22
+
23
+ from openadapt_types.action import (
24
+ Action,
25
+ ActionResult,
26
+ ActionTarget,
27
+ ActionType,
28
+ )
29
+ from openadapt_types.computer_state import (
30
+ BoundingBox,
31
+ ComputerState,
32
+ ElementRole,
33
+ ProcessInfo,
34
+ UINode,
35
+ )
36
+ from openadapt_types.episode import Episode, Step
37
+ from openadapt_types.failure import FailureCategory, FailureRecord
38
+
39
+ __version__ = "0.1.0"
40
+
41
+ __all__ = [
42
+ # computer_state
43
+ "BoundingBox",
44
+ "ComputerState",
45
+ "ElementRole",
46
+ "ProcessInfo",
47
+ "UINode",
48
+ # action
49
+ "Action",
50
+ "ActionResult",
51
+ "ActionTarget",
52
+ "ActionType",
53
+ # episode
54
+ "Episode",
55
+ "Step",
56
+ # failure
57
+ "FailureCategory",
58
+ "FailureRecord",
59
+ ]
@@ -0,0 +1,262 @@
1
+ """Backward-compatibility converters for existing schemas.
2
+
3
+ These functions convert from the three existing schema formats to
4
+ the canonical ``openadapt-types`` models. They allow gradual migration:
5
+ old code keeps working, new code uses ``openadapt-types`` directly.
6
+
7
+ Usage::
8
+
9
+ from openadapt_types._compat import (
10
+ from_benchmark_observation,
11
+ from_benchmark_action,
12
+ from_ml_observation,
13
+ from_ml_action,
14
+ from_omnimcp_screen_state,
15
+ from_omnimcp_action_decision,
16
+ )
17
+
18
+ These converters work with plain dicts (``.__dict__`` or ``asdict()``),
19
+ not imported types, to avoid circular dependencies.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from typing import Any, Optional
25
+
26
+ from .action import Action, ActionTarget, ActionType
27
+ from .computer_state import BoundingBox, ComputerState, UINode
28
+
29
+
30
+ # =====================================================================
31
+ # From openadapt_evals.adapters.base (dataclass dicts)
32
+ # =====================================================================
33
+
34
+
35
+ def from_benchmark_observation(obs: dict[str, Any]) -> ComputerState:
36
+ """Convert a ``BenchmarkObservation.__dict__`` to :class:`ComputerState`.
37
+
38
+ Maps:
39
+ - ``screenshot`` → ``screenshot_png``
40
+ - ``screenshot_path`` → ``screenshot_path``
41
+ - ``viewport`` → ``viewport``
42
+ - ``accessibility_tree`` → ``accessibility_tree_raw``
43
+ - ``dom_html`` → ``dom_html``
44
+ - ``url``, ``window_title``, ``app_name`` → context fields
45
+ - ``focused_element`` → ``focused_node_id`` (if dict with node_id)
46
+ - ``raw_observation`` → ``raw``
47
+ """
48
+ focused = obs.get("focused_element")
49
+ focused_id = None
50
+ if isinstance(focused, dict):
51
+ focused_id = focused.get("node_id")
52
+
53
+ return ComputerState(
54
+ screenshot_png=obs.get("screenshot"),
55
+ screenshot_path=obs.get("screenshot_path"),
56
+ viewport=obs.get("viewport"),
57
+ accessibility_tree_raw=obs.get("accessibility_tree"),
58
+ dom_html=obs.get("dom_html"),
59
+ url=obs.get("url"),
60
+ focused_node_id=focused_id,
61
+ raw=obs.get("raw_observation"),
62
+ )
63
+
64
+
65
+ def from_benchmark_action(act: dict[str, Any]) -> Action:
66
+ """Convert a ``BenchmarkAction.__dict__`` to :class:`Action`.
67
+
68
+ Maps:
69
+ - ``type`` → ``ActionType`` (string to enum)
70
+ - ``x``, ``y`` → ``ActionTarget.x``, ``ActionTarget.y``
71
+ - ``target_node_id`` → ``ActionTarget.node_id``
72
+ - ``text``, ``key``, ``modifiers`` → keyboard params
73
+ - ``scroll_direction``, ``scroll_amount`` → scroll params
74
+ - ``end_x``, ``end_y`` → ``drag_end``
75
+ - ``answer`` → ``answer``
76
+ - ``raw_action`` → ``raw``
77
+ """
78
+ action_type_str = act.get("type", "done")
79
+ try:
80
+ action_type = ActionType(action_type_str)
81
+ except ValueError:
82
+ action_type = ActionType.DONE
83
+
84
+ # Build target
85
+ target = None
86
+ x = act.get("x")
87
+ y = act.get("y")
88
+ node_id = act.get("target_node_id")
89
+ if x is not None or y is not None or node_id is not None:
90
+ target = ActionTarget(node_id=node_id, x=x, y=y)
91
+
92
+ # Build drag end
93
+ drag_end = None
94
+ end_x = act.get("end_x")
95
+ end_y = act.get("end_y")
96
+ if end_x is not None or end_y is not None:
97
+ drag_end = ActionTarget(x=end_x, y=end_y)
98
+
99
+ return Action(
100
+ type=action_type,
101
+ target=target,
102
+ text=act.get("text"),
103
+ key=act.get("key"),
104
+ modifiers=act.get("modifiers"),
105
+ scroll_direction=act.get("scroll_direction"),
106
+ scroll_amount=int(act["scroll_amount"]) if act.get("scroll_amount") is not None else None,
107
+ drag_end=drag_end,
108
+ answer=act.get("answer"),
109
+ raw=act.get("raw_action"),
110
+ )
111
+
112
+
113
+ # =====================================================================
114
+ # From openadapt_ml.schema.episode (Pydantic dicts)
115
+ # =====================================================================
116
+
117
+
118
+ def from_ml_observation(obs: dict[str, Any]) -> ComputerState:
119
+ """Convert an ``openadapt_ml.schema.episode.Observation.model_dump()``
120
+ to :class:`ComputerState`."""
121
+ return ComputerState(
122
+ screenshot_path=obs.get("screenshot_path"),
123
+ screenshot_base64=obs.get("screenshot_base64"),
124
+ viewport=obs.get("screen_size"),
125
+ accessibility_tree_raw=obs.get("a11y_tree"),
126
+ dom_html=obs.get("dom"),
127
+ url=obs.get("url"),
128
+ timestamp=obs.get("timestamp"),
129
+ raw=obs.get("raw"),
130
+ )
131
+
132
+
133
+ def from_ml_action(act: dict[str, Any]) -> Action:
134
+ """Convert an ``openadapt_ml.schema.episode.Action.model_dump()``
135
+ to :class:`Action`."""
136
+ action_type_str = act.get("type", "done")
137
+ try:
138
+ action_type = ActionType(action_type_str)
139
+ except ValueError:
140
+ action_type = ActionType.DONE
141
+
142
+ # Coordinates
143
+ target = None
144
+ coords = act.get("coordinates")
145
+ norm = act.get("normalized_coordinates")
146
+ element = act.get("element")
147
+
148
+ if coords:
149
+ target = ActionTarget(x=float(coords["x"]), y=float(coords["y"]))
150
+ elif norm:
151
+ target = ActionTarget(x=norm[0], y=norm[1], is_normalized=True)
152
+ elif element and element.get("element_id"):
153
+ target = ActionTarget(node_id=element["element_id"])
154
+
155
+ # Drag
156
+ drag_end = None
157
+ end_coords = act.get("end_coordinates")
158
+ norm_end = act.get("normalized_end")
159
+ if end_coords:
160
+ drag_end = ActionTarget(x=float(end_coords["x"]), y=float(end_coords["y"]))
161
+ elif norm_end:
162
+ drag_end = ActionTarget(x=norm_end[0], y=norm_end[1], is_normalized=True)
163
+
164
+ return Action(
165
+ type=action_type,
166
+ target=target,
167
+ text=act.get("text"),
168
+ key=act.get("key"),
169
+ modifiers=act.get("modifiers"),
170
+ scroll_direction=act.get("scroll_direction"),
171
+ scroll_amount=act.get("scroll_amount"),
172
+ drag_end=drag_end,
173
+ url=act.get("url"),
174
+ app_name=act.get("app_name"),
175
+ duration_seconds=act.get("duration"),
176
+ raw=act.get("raw"),
177
+ )
178
+
179
+
180
+ # =====================================================================
181
+ # From omnimcp.types (dataclass dicts)
182
+ # =====================================================================
183
+
184
+
185
+ def from_omnimcp_screen_state(state: dict[str, Any]) -> ComputerState:
186
+ """Convert an ``omnimcp.types.ScreenState`` (as dict) to :class:`ComputerState`.
187
+
188
+ Maps:
189
+ - ``elements`` → ``nodes`` (UIElement → UINode)
190
+ - ``dimensions`` → ``viewport``
191
+ - ``timestamp`` → ``timestamp``
192
+ """
193
+ nodes: list[UINode] = []
194
+ for elem in state.get("elements", []):
195
+ bbox = None
196
+ bounds = elem.get("bounds")
197
+ dims = state.get("dimensions", (1, 1))
198
+ if bounds and len(bounds) == 4:
199
+ # omnimcp bounds are normalized (x, y, w, h)
200
+ vw, vh = dims
201
+ bbox = BoundingBox(
202
+ x=int(bounds[0] * vw),
203
+ y=int(bounds[1] * vh),
204
+ width=int(bounds[2] * vw),
205
+ height=int(bounds[3] * vh),
206
+ )
207
+ nodes.append(UINode(
208
+ node_id=str(elem.get("id", "")),
209
+ role=elem.get("type", "unknown"),
210
+ text=elem.get("content"),
211
+ bbox=bbox,
212
+ confidence=elem.get("confidence", 1.0),
213
+ attributes=elem.get("attributes", {}),
214
+ ))
215
+
216
+ return ComputerState(
217
+ nodes=nodes,
218
+ viewport=state.get("dimensions"),
219
+ timestamp=state.get("timestamp"),
220
+ source="omnimcp",
221
+ )
222
+
223
+
224
+ def from_omnimcp_action_decision(decision: dict[str, Any]) -> Action:
225
+ """Convert an ``omnimcp.types.ActionDecision`` (as dict) to :class:`Action`.
226
+
227
+ Maps:
228
+ - ``action_type`` → ``ActionType``
229
+ - ``target_element_id`` → ``ActionTarget.node_id``
230
+ - ``parameters`` → keyboard/scroll/wait params
231
+ - ``analysis_reasoning`` → ``reasoning``
232
+ """
233
+ action_type_str = decision.get("action_type", "done")
234
+ type_map = {
235
+ "click": ActionType.CLICK,
236
+ "type": ActionType.TYPE,
237
+ "scroll": ActionType.SCROLL,
238
+ "press_key": ActionType.KEY,
239
+ "wait": ActionType.WAIT,
240
+ "finish": ActionType.DONE,
241
+ "launch_app": ActionType.OPEN_APP,
242
+ }
243
+ action_type = type_map.get(action_type_str, ActionType.DONE)
244
+
245
+ target = None
246
+ elem_id = decision.get("target_element_id")
247
+ if elem_id is not None:
248
+ target = ActionTarget(node_id=str(elem_id))
249
+
250
+ params = decision.get("parameters", {})
251
+
252
+ return Action(
253
+ type=action_type,
254
+ target=target,
255
+ text=params.get("text_to_type"),
256
+ key=params.get("key_info"),
257
+ scroll_direction=params.get("scroll_direction"),
258
+ scroll_amount=params.get("scroll_steps"),
259
+ app_name=params.get("app_name"),
260
+ duration_seconds=params.get("wait_duration_s"),
261
+ reasoning=decision.get("analysis_reasoning"),
262
+ )