oagi-core 0.14.1__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oagi/__init__.py CHANGED
@@ -38,6 +38,10 @@ from oagi.types.models import (
38
38
  # Format: name -> (module_path, package_to_check, extra_name)
39
39
  # package_to_check is None if no optional dependency is required
40
40
  _LAZY_IMPORTS_DATA: dict[str, tuple[str, str | None, str | None]] = {
41
+ # Action converters (no optional dependencies)
42
+ "OagiActionConverter": ("oagi.converters.oagi", None, None),
43
+ "ConverterConfig": ("oagi.converters.base", None, None),
44
+ "BaseActionConverter": ("oagi.converters.base", None, None),
41
45
  # Desktop handlers (require pyautogui/PIL)
42
46
  "AsyncPyautoguiActionHandler": (
43
47
  "oagi.handler.async_pyautogui_action_handler",
@@ -88,6 +92,8 @@ if TYPE_CHECKING:
88
92
  from oagi.agent.default import AsyncDefaultAgent
89
93
  from oagi.agent.observer.agent_observer import AsyncAgentObserver
90
94
  from oagi.agent.tasker import TaskerAgent
95
+ from oagi.converters.base import BaseActionConverter, ConverterConfig
96
+ from oagi.converters.oagi import OagiActionConverter
91
97
  from oagi.handler.async_pyautogui_action_handler import AsyncPyautoguiActionHandler
92
98
  from oagi.handler.async_screenshot_maker import AsyncScreenshotMaker
93
99
  from oagi.handler.async_ydotool_action_handler import AsyncYdotoolActionHandler
@@ -174,4 +180,8 @@ __all__ = [
174
180
  "YdotoolConfig",
175
181
  # Lazy imports - Screen manager
176
182
  "ScreenManager",
183
+ # Lazy imports - Action converters
184
+ "OagiActionConverter",
185
+ "ConverterConfig",
186
+ "BaseActionConverter",
177
187
  ]
@@ -0,0 +1,56 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+ """Action converters for VLM support.
9
+
10
+ This module provides the base class and OAGI implementation for action converters.
11
+ Third parties can inherit from BaseActionConverter to create custom converters.
12
+
13
+ Example usage:
14
+ from oagi.converters import OagiActionConverter, ConverterConfig
15
+
16
+ # Configure for 1920x1080 sandbox
17
+ config = ConverterConfig(sandbox_width=1920, sandbox_height=1080)
18
+ converter = OagiActionConverter(config=config)
19
+
20
+ # Convert OAGI actions to pyautogui strings
21
+ result = converter(actions) # list[str]
22
+
23
+ # Convert to runtime API steps
24
+ for cmd in result:
25
+ step = converter.action_string_to_step(cmd)
26
+ # Execute step via runtime API...
27
+
28
+ Creating custom converters:
29
+ from oagi.converters import BaseActionConverter, ConverterConfig
30
+
31
+ class MyActionConverter(BaseActionConverter[MyAction]):
32
+ @property
33
+ def coord_width(self) -> int:
34
+ return 1000 # Your model's coordinate width
35
+
36
+ @property
37
+ def coord_height(self) -> int:
38
+ return 1000 # Your model's coordinate height
39
+
40
+ def _convert_single_action(self, action: MyAction) -> list[str]:
41
+ # Convert action to pyautogui command strings
42
+ ...
43
+
44
+ def serialize_actions(self, actions: list[MyAction]) -> list[dict]:
45
+ # Serialize actions for trajectory logging
46
+ ...
47
+ """
48
+
49
+ from .base import BaseActionConverter, ConverterConfig
50
+ from .oagi import OagiActionConverter
51
+
52
+ __all__ = [
53
+ "BaseActionConverter",
54
+ "ConverterConfig",
55
+ "OagiActionConverter",
56
+ ]
@@ -0,0 +1,292 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+ """Base class for action converters.
9
+
10
+ This module provides the abstract base class for converting model-specific
11
+ actions to pyautogui command strings for remote execution.
12
+ """
13
+
14
+ import re
15
+ from abc import ABC, abstractmethod
16
+ from dataclasses import dataclass
17
+ from typing import Any, Generic, TypeVar
18
+
19
+ from ..handler.capslock_manager import CapsLockManager
20
+ from ..handler.utils import (
21
+ CoordinateScaler,
22
+ normalize_key,
23
+ parse_hotkey,
24
+ validate_keys,
25
+ )
26
+
27
+ T = TypeVar("T")
28
+
29
+
30
+ @dataclass
31
+ class ConverterConfig:
32
+ """Configuration for action converters.
33
+
34
+ Matches the configuration options in PyautoguiConfig for consistency.
35
+ """
36
+
37
+ sandbox_width: int = 1920
38
+ sandbox_height: int = 1080
39
+ drag_duration: float = 0.5
40
+ scroll_amount: int = 2
41
+ wait_duration: float = 1.0
42
+ hotkey_interval: float = 0.1
43
+ capslock_mode: str = "session"
44
+ strict_coordinate_validation: bool = False
45
+ """If True, raise ValueError when coordinates are outside valid range.
46
+ If False (default), clamp coordinates to valid range (original behavior)."""
47
+
48
+
49
+ class BaseActionConverter(ABC, Generic[T]):
50
+ """Abstract base class for action converters.
51
+
52
+ Subclasses must implement:
53
+ - coord_width/coord_height properties for input coordinate space
54
+ - _convert_single_action() for model-specific conversion logic
55
+ - serialize_actions() for trajectory logging
56
+
57
+ Provides common functionality:
58
+ - Coordinate scaling via CoordinateScaler
59
+ - Key normalization via shared utils
60
+ - __call__ interface returning list of action strings
61
+ - action_string_to_step() for runtime API format
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ *,
67
+ config: ConverterConfig | None = None,
68
+ logger: Any | None = None,
69
+ ):
70
+ """Initialize the converter.
71
+
72
+ Args:
73
+ config: Converter configuration. Uses defaults if not provided.
74
+ logger: Optional logger instance for debug/error logging.
75
+ """
76
+ self.config = config or ConverterConfig()
77
+ self.logger = logger
78
+
79
+ # Initialize coordinate scaler
80
+ self._coord_scaler = CoordinateScaler(
81
+ source_width=self.coord_width,
82
+ source_height=self.coord_height,
83
+ target_width=self.config.sandbox_width,
84
+ target_height=self.config.sandbox_height,
85
+ )
86
+
87
+ # Initialize caps lock manager
88
+ self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
89
+
90
+ # Track last cursor position (for actions without explicit coordinates)
91
+ self._last_x: int | None = None
92
+ self._last_y: int | None = None
93
+
94
+ @property
95
+ @abstractmethod
96
+ def coord_width(self) -> int:
97
+ """Input coordinate space width (e.g., 1024 for XGA, 1000 for OAGI)."""
98
+ ...
99
+
100
+ @property
101
+ @abstractmethod
102
+ def coord_height(self) -> int:
103
+ """Input coordinate space height (e.g., 768 for XGA, 1000 for OAGI)."""
104
+ ...
105
+
106
+ @property
107
+ def scale_x(self) -> float:
108
+ """X scaling factor from input to sandbox coordinates."""
109
+ return self._coord_scaler.scale_x
110
+
111
+ @property
112
+ def scale_y(self) -> float:
113
+ """Y scaling factor from input to sandbox coordinates."""
114
+ return self._coord_scaler.scale_y
115
+
116
+ def scale_coordinate(self, x: int | float, y: int | float) -> tuple[int, int]:
117
+ """Scale coordinates from model space to sandbox space.
118
+
119
+ Args:
120
+ x: X coordinate in model space
121
+ y: Y coordinate in model space
122
+
123
+ Returns:
124
+ Tuple of (scaled_x, scaled_y) in sandbox space
125
+ """
126
+ return self._coord_scaler.scale(x, y)
127
+
128
+ def normalize_key(self, key: str) -> str:
129
+ """Normalize a key name to pyautogui format.
130
+
131
+ Args:
132
+ key: Key name to normalize
133
+
134
+ Returns:
135
+ Normalized key name
136
+ """
137
+ return normalize_key(key)
138
+
139
+ def parse_hotkey(self, hotkey_str: str, *, validate: bool = True) -> list[str]:
140
+ """Parse a hotkey string into a list of normalized key names.
141
+
142
+ Args:
143
+ hotkey_str: Hotkey string (e.g., "ctrl+c")
144
+ validate: If True, validate keys against PYAUTOGUI_VALID_KEYS
145
+
146
+ Returns:
147
+ List of normalized key names
148
+ """
149
+ return parse_hotkey(hotkey_str, validate=validate)
150
+
151
+ def validate_keys(self, keys: list[str]) -> None:
152
+ """Validate that all keys are recognized by pyautogui.
153
+
154
+ Args:
155
+ keys: List of key names to validate
156
+
157
+ Raises:
158
+ ValueError: If any key is invalid
159
+ """
160
+ validate_keys(keys)
161
+
162
+ def _get_last_or_center(self) -> tuple[int, int]:
163
+ """Get last cursor position or screen center as fallback.
164
+
165
+ Returns:
166
+ Tuple of (x, y) coordinates
167
+ """
168
+ if self._last_x is not None and self._last_y is not None:
169
+ return self._last_x, self._last_y
170
+ return self.config.sandbox_width // 2, self.config.sandbox_height // 2
171
+
172
+ def _log_error(self, message: str) -> None:
173
+ """Log an error message if logger is available."""
174
+ if self.logger:
175
+ self.logger.error(message)
176
+
177
+ def _log_info(self, message: str) -> None:
178
+ """Log an info message if logger is available."""
179
+ if self.logger:
180
+ self.logger.info(message)
181
+
182
+ def _log_debug(self, message: str) -> None:
183
+ """Log a debug message if logger is available."""
184
+ if self.logger:
185
+ self.logger.debug(message)
186
+
187
+ def __call__(self, actions: list[T]) -> list[str]:
188
+ """Convert actions to list of pyautogui command strings.
189
+
190
+ Args:
191
+ actions: List of model-specific action objects
192
+
193
+ Returns:
194
+ List of pyautogui command strings
195
+
196
+ Raises:
197
+ RuntimeError: If all action conversions failed
198
+ """
199
+ converted: list[str] = []
200
+ failed: list[tuple[str, str]] = []
201
+ skipped: list[str] = []
202
+
203
+ if not actions:
204
+ return converted
205
+
206
+ for action in actions:
207
+ try:
208
+ action_strings = self._convert_single_action(action)
209
+
210
+ if not action_strings:
211
+ # No-op action (e.g., screenshot, cursor_position)
212
+ action_type = getattr(action, "action_type", repr(action))
213
+ skipped.append(str(action_type))
214
+ continue
215
+
216
+ converted.extend(action_strings)
217
+
218
+ except Exception as e:
219
+ action_repr = repr(action)
220
+ self._log_error(f"Failed to convert action: {action_repr}, error: {e}")
221
+ failed.append((action_repr, str(e)))
222
+
223
+ if skipped:
224
+ self._log_debug(f"Skipped no-op actions: {skipped}")
225
+
226
+ if not converted and actions and failed:
227
+ raise RuntimeError(
228
+ f"All action conversions failed ({len(failed)}/{len(actions)}): {failed}"
229
+ )
230
+
231
+ return converted
232
+
233
+ @abstractmethod
234
+ def _convert_single_action(self, action: T) -> list[str]:
235
+ """Convert a single action to pyautogui command string(s).
236
+
237
+ Args:
238
+ action: Model-specific action object
239
+
240
+ Returns:
241
+ List of pyautogui command strings (may be empty for no-op actions)
242
+
243
+ Raises:
244
+ ValueError: If action format is invalid
245
+ """
246
+ ...
247
+
248
+ @abstractmethod
249
+ def serialize_actions(self, actions: list[T]) -> list[dict[str, Any]]:
250
+ """Serialize actions for trajectory logging.
251
+
252
+ Args:
253
+ actions: List of model-specific action objects
254
+
255
+ Returns:
256
+ List of serialized action dictionaries
257
+ """
258
+ ...
259
+
260
+ def action_string_to_step(self, action: str) -> dict[str, Any]:
261
+ """Convert an action string into a step for runtime/do API.
262
+
263
+ Args:
264
+ action: Action string (e.g., "pyautogui.click(x=100, y=200)")
265
+
266
+ Returns:
267
+ Step dict for runtime API
268
+ """
269
+ action_str = str(action).strip()
270
+
271
+ # Special markers
272
+ upper = action_str.upper()
273
+ if upper in ["DONE", "FAIL"]:
274
+ return {"type": "sleep", "parameters": {"seconds": 0}}
275
+
276
+ # WAIT(seconds)
277
+ wait_match = re.match(
278
+ r"^WAIT\((?P<sec>[0-9]*\.?[0-9]+)\)$", action_str, re.IGNORECASE
279
+ )
280
+ if wait_match:
281
+ seconds = float(wait_match.group("sec"))
282
+ return {"type": "sleep", "parameters": {"seconds": seconds}}
283
+
284
+ # pyautogui code path
285
+ if "pyautogui" in action_str.lower():
286
+ return {
287
+ "type": "pyautogui",
288
+ "parameters": {"code": action_str},
289
+ }
290
+
291
+ # Default: shell command
292
+ return {"type": "execute", "parameters": {"command": action_str, "shell": True}}
@@ -0,0 +1,198 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) OpenAGI Foundation
3
+ # All rights reserved.
4
+ #
5
+ # This file is part of the official API project.
6
+ # Licensed under the MIT License.
7
+ # -----------------------------------------------------------------------------
8
+ """OAGI action converter.
9
+
10
+ This module provides the OagiActionConverter for converting OAGI actions
11
+ to pyautogui command strings for remote execution.
12
+ """
13
+
14
+ from typing import Any
15
+
16
+ from ..handler.utils import (
17
+ parse_click_coords,
18
+ parse_drag_coords,
19
+ parse_scroll_coords,
20
+ )
21
+ from ..types import Action, ActionType
22
+ from .base import BaseActionConverter
23
+
24
+ # OAGI uses normalized 0-1000 coordinate space
25
+ OAGI_COORD_SIZE = 1000
26
+
27
+
28
+ class OagiActionConverter(BaseActionConverter[Action]):
29
+ """Convert OAGI actions to pyautogui command strings.
30
+
31
+ This converter handles:
32
+ 1. Coordinate scaling from 0-1000 space to sandbox dimensions (1920x1080)
33
+ 2. Action format conversion from OAGI Action format to pyautogui strings
34
+ 3. Key name normalization for hotkey combinations
35
+
36
+ The output can be converted to runtime API steps via action_string_to_step().
37
+ """
38
+
39
+ @property
40
+ def coord_width(self) -> int:
41
+ return OAGI_COORD_SIZE
42
+
43
+ @property
44
+ def coord_height(self) -> int:
45
+ return OAGI_COORD_SIZE
46
+
47
+ def __call__(self, actions: list[Action]) -> list[str]:
48
+ """Convert OAGI actions to list of pyautogui command strings.
49
+
50
+ Extends base implementation to handle action count and finish detection.
51
+ """
52
+ converted: list[str] = []
53
+ failed: list[tuple[str, str]] = []
54
+ has_terminal = False
55
+
56
+ if not actions:
57
+ return converted
58
+
59
+ for action in actions:
60
+ # Check for duplicate finish()/fail() during iteration
61
+ is_terminal = action.type in (ActionType.FINISH, ActionType.FAIL)
62
+ if is_terminal:
63
+ if has_terminal:
64
+ raise ValueError(
65
+ "Duplicate finish()/fail() detected. "
66
+ "Only one finish() or fail() is allowed per action sequence."
67
+ )
68
+ has_terminal = True
69
+
70
+ try:
71
+ converted.extend(self._convert_action(action))
72
+ except Exception as e:
73
+ action_repr = f"{action.type.value}({action.argument})"
74
+ self._log_error(f"Failed to convert action: {action_repr}, error: {e}")
75
+ failed.append((action_repr, str(e)))
76
+
77
+ if not converted and actions and failed:
78
+ raise RuntimeError(
79
+ f"All action conversions failed ({len(failed)}/{len(actions)}): {failed}"
80
+ )
81
+ return converted
82
+
83
+ def _convert_action(self, action: Action) -> list[str]:
84
+ """Convert action to list of pyautogui command strings.
85
+
86
+ Handles action.count for repeat support.
87
+ """
88
+ count = action.count or 1
89
+ single_actions = self._convert_single_action(action)
90
+
91
+ # Repeat the actions count times
92
+ return single_actions * int(count)
93
+
94
+ def _convert_single_action(self, action: Action) -> list[str]:
95
+ """Convert a single OAGI action to pyautogui command string(s)."""
96
+ action_type = action.type.value
97
+ argument = (action.argument or "").strip("()")
98
+
99
+ drag_duration = self.config.drag_duration
100
+ scroll_amount = self.config.scroll_amount
101
+ wait_duration = self.config.wait_duration
102
+ hotkey_interval = self.config.hotkey_interval
103
+ strict = self.config.strict_coordinate_validation
104
+
105
+ if action_type == ActionType.CLICK.value:
106
+ x, y = parse_click_coords(argument, self._coord_scaler, strict=strict)
107
+ return [f"pyautogui.click(x={x}, y={y})"]
108
+
109
+ if action_type == ActionType.LEFT_DOUBLE.value:
110
+ x, y = parse_click_coords(argument, self._coord_scaler, strict=strict)
111
+ return [f"pyautogui.doubleClick(x={x}, y={y})"]
112
+
113
+ if action_type == ActionType.LEFT_TRIPLE.value:
114
+ x, y = parse_click_coords(argument, self._coord_scaler, strict=strict)
115
+ return [f"pyautogui.tripleClick(x={x}, y={y})"]
116
+
117
+ if action_type == ActionType.RIGHT_SINGLE.value:
118
+ x, y = parse_click_coords(argument, self._coord_scaler, strict=strict)
119
+ return [f"pyautogui.rightClick(x={x}, y={y})"]
120
+
121
+ if action_type == ActionType.DRAG.value:
122
+ sx, sy, ex, ey = parse_drag_coords(
123
+ argument, self._coord_scaler, strict=strict
124
+ )
125
+ return [
126
+ f"pyautogui.moveTo({sx}, {sy})",
127
+ f"pyautogui.dragTo({ex}, {ey}, duration={drag_duration})",
128
+ ]
129
+
130
+ if action_type == ActionType.HOTKEY.value:
131
+ keys = self.parse_hotkey(argument, validate=True)
132
+ valid_keys = [k for k in keys if k]
133
+ if not valid_keys:
134
+ raise ValueError(
135
+ f"Invalid hotkey format: '{argument}'. "
136
+ "Expected key names like 'ctrl+c', 'alt+tab'"
137
+ )
138
+ # Check if this is a caps lock key press
139
+ if len(valid_keys) == 1 and valid_keys[0] == "capslock":
140
+ if self.caps_manager.should_use_system_capslock():
141
+ return [f"pyautogui.hotkey('capslock', interval={hotkey_interval})"]
142
+ else:
143
+ self.caps_manager.toggle()
144
+ return [] # No pyautogui command for session mode
145
+ else:
146
+ keys_str = ", ".join(repr(k) for k in valid_keys)
147
+ return [f"pyautogui.hotkey({keys_str}, interval={hotkey_interval})"]
148
+
149
+ if action_type == ActionType.TYPE.value:
150
+ text = argument.strip("\"'")
151
+ text = self.caps_manager.transform_text(text)
152
+ return [f"pyautogui.typewrite({text!r})"]
153
+
154
+ if action_type == ActionType.SCROLL.value:
155
+ x, y, direction = parse_scroll_coords(
156
+ argument, self._coord_scaler, strict=strict
157
+ )
158
+ amount = scroll_amount if direction == "up" else -scroll_amount
159
+ return [f"pyautogui.moveTo({x}, {y})", f"pyautogui.scroll({amount})"]
160
+
161
+ if action_type == ActionType.WAIT.value:
162
+ try:
163
+ seconds = float(argument) if argument else wait_duration
164
+ except ValueError:
165
+ raise ValueError(
166
+ f"Invalid wait duration: '{argument}'. "
167
+ "Expected numeric value in seconds."
168
+ )
169
+ return [f"WAIT({seconds})"]
170
+
171
+ if action_type == ActionType.FINISH.value:
172
+ self._log_info("Task completion action -> DONE")
173
+ return ["DONE"]
174
+
175
+ if action_type == ActionType.FAIL.value:
176
+ self._log_info("Task infeasible action -> FAIL")
177
+ return ["FAIL"]
178
+
179
+ if action_type == ActionType.CALL_USER.value:
180
+ self._log_info("User intervention requested")
181
+ return []
182
+
183
+ raise ValueError(
184
+ f"Unknown action type: '{action_type}'. "
185
+ "Supported: click, left_double, left_triple, right_single, drag, "
186
+ "hotkey, type, scroll, wait, finish, fail, call_user"
187
+ )
188
+
189
+ def serialize_actions(self, actions: list[Action]) -> list[dict[str, Any]]:
190
+ """Serialize OAGI actions for trajectory logging."""
191
+ return [
192
+ {
193
+ "type": action.type.value,
194
+ "argument": action.argument,
195
+ "count": action.count,
196
+ }
197
+ for action in (actions or [])
198
+ ]
@@ -17,6 +17,7 @@ from ..constants import DEFAULT_STEP_DELAY
17
17
  from ..exceptions import check_optional_dependency
18
18
  from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
19
19
  from .capslock_manager import CapsLockManager
20
+ from .utils import CoordinateScaler, normalize_key, parse_hotkey
20
21
 
21
22
  check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
22
23
  import pyautogui # noqa: E402
@@ -92,6 +93,15 @@ class PyautoguiActionHandler:
92
93
  self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
93
94
  # The origin position of coordinates (the top-left corner of the target screen)
94
95
  self.origin_x, self.origin_y = 0, 0
96
+ # Initialize coordinate scaler (OAGI uses 0-1000 normalized coordinates)
97
+ self._coord_scaler = CoordinateScaler(
98
+ source_width=1000,
99
+ source_height=1000,
100
+ target_width=self.screen_width,
101
+ target_height=self.screen_height,
102
+ origin_x=self.origin_x,
103
+ origin_y=self.origin_y,
104
+ )
95
105
 
96
106
  def reset(self):
97
107
  """Reset handler state.
@@ -109,6 +119,9 @@ class PyautoguiActionHandler:
109
119
  """
110
120
  self.screen_width, self.screen_height = screen.width, screen.height
111
121
  self.origin_x, self.origin_y = screen.x, screen.y
122
+ # Update coordinate scaler
123
+ self._coord_scaler.set_target_size(screen.width, screen.height)
124
+ self._coord_scaler.set_origin(screen.x, screen.y)
112
125
 
113
126
  def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
114
127
  """Convert coordinates from 0-1000 range to actual screen coordinates.
@@ -116,26 +129,7 @@ class PyautoguiActionHandler:
116
129
  Also handles corner coordinates to prevent PyAutoGUI fail-safe trigger.
117
130
  Corner coordinates (0,0), (0,max), (max,0), (max,max) are offset by 1 pixel.
118
131
  """
119
- screen_x = int(x * self.screen_width / 1000)
120
- screen_y = int(y * self.screen_height / 1000)
121
-
122
- # Prevent fail-safe by adjusting corner coordinates
123
- # Check if coordinates are at screen corners (with small tolerance)
124
- if screen_x < 1:
125
- screen_x = 1
126
- elif screen_x > self.screen_width - 1:
127
- screen_x = self.screen_width - 1
128
-
129
- if screen_y < 1:
130
- screen_y = 1
131
- elif screen_y > self.screen_height - 1:
132
- screen_y = self.screen_height - 1
133
-
134
- # Add origin offset to convert relative to top-left corner
135
- screen_x += self.origin_x
136
- screen_y += self.origin_y
137
-
138
- return screen_x, screen_y
132
+ return self._coord_scaler.scale(x, y, prevent_failsafe=True)
139
133
 
140
134
  def _parse_coords(self, args_str: str) -> tuple[int, int]:
141
135
  """Extract x, y coordinates from argument string."""
@@ -163,28 +157,15 @@ class PyautoguiActionHandler:
163
157
 
164
158
  def _normalize_key(self, key: str) -> str:
165
159
  """Normalize key names for consistency."""
166
- key = key.strip().lower()
167
- # Normalize caps lock variations
168
- hotkey_variations_mapping = {
169
- "capslock": ["caps_lock", "caps", "capslock"],
170
- "pgup": ["page_up", "pageup"],
171
- "pgdn": ["page_down", "pagedown"],
172
- }
173
- for normalized, variations in hotkey_variations_mapping.items():
174
- if key in variations:
175
- return normalized
176
- # Remap ctrl to command on macOS if enabled
177
- if self.config.macos_ctrl_to_cmd and sys.platform == "darwin" and key == "ctrl":
178
- return "command"
179
- return key
160
+ return normalize_key(key, macos_ctrl_to_cmd=self.config.macos_ctrl_to_cmd)
180
161
 
181
162
  def _parse_hotkey(self, args_str: str) -> list[str]:
182
163
  """Parse hotkey string into list of keys."""
183
- # Remove parentheses if present
184
- args_str = args_str.strip("()")
185
- # Split by '+' to get individual keys
186
- keys = [self._normalize_key(key) for key in args_str.split("+")]
187
- return keys
164
+ return parse_hotkey(
165
+ args_str,
166
+ macos_ctrl_to_cmd=self.config.macos_ctrl_to_cmd,
167
+ validate=False, # Don't validate, let pyautogui handle invalid keys
168
+ )
188
169
 
189
170
  def _move_and_wait(self, x: int, y: int) -> None:
190
171
  """Move cursor to position and wait before clicking."""
@@ -269,8 +250,8 @@ class PyautoguiActionHandler:
269
250
  )
270
251
  pyautogui.scroll(scroll_amount)
271
252
 
272
- case ActionType.FINISH:
273
- # Task completion - reset handler state
253
+ case ActionType.FINISH | ActionType.FAIL:
254
+ # Task completion or infeasible - reset handler state
274
255
  self.reset()
275
256
 
276
257
  case ActionType.WAIT: