oagi-core 0.14.1__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/__init__.py +10 -0
- oagi/converters/__init__.py +56 -0
- oagi/converters/base.py +292 -0
- oagi/converters/oagi.py +198 -0
- oagi/handler/pyautogui_action_handler.py +22 -41
- oagi/handler/utils.py +587 -0
- oagi/handler/ydotool_action_handler.py +22 -43
- oagi/server/socketio_server.py +1 -1
- oagi/types/models/action.py +1 -0
- oagi/utils/output_parser.py +2 -1
- oagi/utils/prompt_builder.py +1 -0
- {oagi_core-0.14.1.dist-info → oagi_core-0.15.0.dist-info}/METADATA +1 -1
- {oagi_core-0.14.1.dist-info → oagi_core-0.15.0.dist-info}/RECORD +16 -13
- {oagi_core-0.14.1.dist-info → oagi_core-0.15.0.dist-info}/WHEEL +0 -0
- {oagi_core-0.14.1.dist-info → oagi_core-0.15.0.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.14.1.dist-info → oagi_core-0.15.0.dist-info}/licenses/LICENSE +0 -0
oagi/__init__.py
CHANGED
|
@@ -38,6 +38,10 @@ from oagi.types.models import (
|
|
|
38
38
|
# Format: name -> (module_path, package_to_check, extra_name)
|
|
39
39
|
# package_to_check is None if no optional dependency is required
|
|
40
40
|
_LAZY_IMPORTS_DATA: dict[str, tuple[str, str | None, str | None]] = {
|
|
41
|
+
# Action converters (no optional dependencies)
|
|
42
|
+
"OagiActionConverter": ("oagi.converters.oagi", None, None),
|
|
43
|
+
"ConverterConfig": ("oagi.converters.base", None, None),
|
|
44
|
+
"BaseActionConverter": ("oagi.converters.base", None, None),
|
|
41
45
|
# Desktop handlers (require pyautogui/PIL)
|
|
42
46
|
"AsyncPyautoguiActionHandler": (
|
|
43
47
|
"oagi.handler.async_pyautogui_action_handler",
|
|
@@ -88,6 +92,8 @@ if TYPE_CHECKING:
|
|
|
88
92
|
from oagi.agent.default import AsyncDefaultAgent
|
|
89
93
|
from oagi.agent.observer.agent_observer import AsyncAgentObserver
|
|
90
94
|
from oagi.agent.tasker import TaskerAgent
|
|
95
|
+
from oagi.converters.base import BaseActionConverter, ConverterConfig
|
|
96
|
+
from oagi.converters.oagi import OagiActionConverter
|
|
91
97
|
from oagi.handler.async_pyautogui_action_handler import AsyncPyautoguiActionHandler
|
|
92
98
|
from oagi.handler.async_screenshot_maker import AsyncScreenshotMaker
|
|
93
99
|
from oagi.handler.async_ydotool_action_handler import AsyncYdotoolActionHandler
|
|
@@ -174,4 +180,8 @@ __all__ = [
|
|
|
174
180
|
"YdotoolConfig",
|
|
175
181
|
# Lazy imports - Screen manager
|
|
176
182
|
"ScreenManager",
|
|
183
|
+
# Lazy imports - Action converters
|
|
184
|
+
"OagiActionConverter",
|
|
185
|
+
"ConverterConfig",
|
|
186
|
+
"BaseActionConverter",
|
|
177
187
|
]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
"""Action converters for VLM support.
|
|
9
|
+
|
|
10
|
+
This module provides the base class and OAGI implementation for action converters.
|
|
11
|
+
Third parties can inherit from BaseActionConverter to create custom converters.
|
|
12
|
+
|
|
13
|
+
Example usage:
|
|
14
|
+
from oagi.converters import OagiActionConverter, ConverterConfig
|
|
15
|
+
|
|
16
|
+
# Configure for 1920x1080 sandbox
|
|
17
|
+
config = ConverterConfig(sandbox_width=1920, sandbox_height=1080)
|
|
18
|
+
converter = OagiActionConverter(config=config)
|
|
19
|
+
|
|
20
|
+
# Convert OAGI actions to pyautogui strings
|
|
21
|
+
result = converter(actions) # list[str]
|
|
22
|
+
|
|
23
|
+
# Convert to runtime API steps
|
|
24
|
+
for cmd in result:
|
|
25
|
+
step = converter.action_string_to_step(cmd)
|
|
26
|
+
# Execute step via runtime API...
|
|
27
|
+
|
|
28
|
+
Creating custom converters:
|
|
29
|
+
from oagi.converters import BaseActionConverter, ConverterConfig
|
|
30
|
+
|
|
31
|
+
class MyActionConverter(BaseActionConverter[MyAction]):
|
|
32
|
+
@property
|
|
33
|
+
def coord_width(self) -> int:
|
|
34
|
+
return 1000 # Your model's coordinate width
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def coord_height(self) -> int:
|
|
38
|
+
return 1000 # Your model's coordinate height
|
|
39
|
+
|
|
40
|
+
def _convert_single_action(self, action: MyAction) -> list[str]:
|
|
41
|
+
# Convert action to pyautogui command strings
|
|
42
|
+
...
|
|
43
|
+
|
|
44
|
+
def serialize_actions(self, actions: list[MyAction]) -> list[dict]:
|
|
45
|
+
# Serialize actions for trajectory logging
|
|
46
|
+
...
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
from .base import BaseActionConverter, ConverterConfig
|
|
50
|
+
from .oagi import OagiActionConverter
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
"BaseActionConverter",
|
|
54
|
+
"ConverterConfig",
|
|
55
|
+
"OagiActionConverter",
|
|
56
|
+
]
|
oagi/converters/base.py
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
"""Base class for action converters.
|
|
9
|
+
|
|
10
|
+
This module provides the abstract base class for converting model-specific
|
|
11
|
+
actions to pyautogui command strings for remote execution.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from typing import Any, Generic, TypeVar
|
|
18
|
+
|
|
19
|
+
from ..handler.capslock_manager import CapsLockManager
|
|
20
|
+
from ..handler.utils import (
|
|
21
|
+
CoordinateScaler,
|
|
22
|
+
normalize_key,
|
|
23
|
+
parse_hotkey,
|
|
24
|
+
validate_keys,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
T = TypeVar("T")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class ConverterConfig:
|
|
32
|
+
"""Configuration for action converters.
|
|
33
|
+
|
|
34
|
+
Matches the configuration options in PyautoguiConfig for consistency.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
sandbox_width: int = 1920
|
|
38
|
+
sandbox_height: int = 1080
|
|
39
|
+
drag_duration: float = 0.5
|
|
40
|
+
scroll_amount: int = 2
|
|
41
|
+
wait_duration: float = 1.0
|
|
42
|
+
hotkey_interval: float = 0.1
|
|
43
|
+
capslock_mode: str = "session"
|
|
44
|
+
strict_coordinate_validation: bool = False
|
|
45
|
+
"""If True, raise ValueError when coordinates are outside valid range.
|
|
46
|
+
If False (default), clamp coordinates to valid range (original behavior)."""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BaseActionConverter(ABC, Generic[T]):
|
|
50
|
+
"""Abstract base class for action converters.
|
|
51
|
+
|
|
52
|
+
Subclasses must implement:
|
|
53
|
+
- coord_width/coord_height properties for input coordinate space
|
|
54
|
+
- _convert_single_action() for model-specific conversion logic
|
|
55
|
+
- serialize_actions() for trajectory logging
|
|
56
|
+
|
|
57
|
+
Provides common functionality:
|
|
58
|
+
- Coordinate scaling via CoordinateScaler
|
|
59
|
+
- Key normalization via shared utils
|
|
60
|
+
- __call__ interface returning list of action strings
|
|
61
|
+
- action_string_to_step() for runtime API format
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
*,
|
|
67
|
+
config: ConverterConfig | None = None,
|
|
68
|
+
logger: Any | None = None,
|
|
69
|
+
):
|
|
70
|
+
"""Initialize the converter.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
config: Converter configuration. Uses defaults if not provided.
|
|
74
|
+
logger: Optional logger instance for debug/error logging.
|
|
75
|
+
"""
|
|
76
|
+
self.config = config or ConverterConfig()
|
|
77
|
+
self.logger = logger
|
|
78
|
+
|
|
79
|
+
# Initialize coordinate scaler
|
|
80
|
+
self._coord_scaler = CoordinateScaler(
|
|
81
|
+
source_width=self.coord_width,
|
|
82
|
+
source_height=self.coord_height,
|
|
83
|
+
target_width=self.config.sandbox_width,
|
|
84
|
+
target_height=self.config.sandbox_height,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Initialize caps lock manager
|
|
88
|
+
self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
|
|
89
|
+
|
|
90
|
+
# Track last cursor position (for actions without explicit coordinates)
|
|
91
|
+
self._last_x: int | None = None
|
|
92
|
+
self._last_y: int | None = None
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def coord_width(self) -> int:
|
|
97
|
+
"""Input coordinate space width (e.g., 1024 for XGA, 1000 for OAGI)."""
|
|
98
|
+
...
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def coord_height(self) -> int:
|
|
103
|
+
"""Input coordinate space height (e.g., 768 for XGA, 1000 for OAGI)."""
|
|
104
|
+
...
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def scale_x(self) -> float:
|
|
108
|
+
"""X scaling factor from input to sandbox coordinates."""
|
|
109
|
+
return self._coord_scaler.scale_x
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def scale_y(self) -> float:
|
|
113
|
+
"""Y scaling factor from input to sandbox coordinates."""
|
|
114
|
+
return self._coord_scaler.scale_y
|
|
115
|
+
|
|
116
|
+
def scale_coordinate(self, x: int | float, y: int | float) -> tuple[int, int]:
|
|
117
|
+
"""Scale coordinates from model space to sandbox space.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
x: X coordinate in model space
|
|
121
|
+
y: Y coordinate in model space
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Tuple of (scaled_x, scaled_y) in sandbox space
|
|
125
|
+
"""
|
|
126
|
+
return self._coord_scaler.scale(x, y)
|
|
127
|
+
|
|
128
|
+
def normalize_key(self, key: str) -> str:
|
|
129
|
+
"""Normalize a key name to pyautogui format.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
key: Key name to normalize
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Normalized key name
|
|
136
|
+
"""
|
|
137
|
+
return normalize_key(key)
|
|
138
|
+
|
|
139
|
+
def parse_hotkey(self, hotkey_str: str, *, validate: bool = True) -> list[str]:
|
|
140
|
+
"""Parse a hotkey string into a list of normalized key names.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
hotkey_str: Hotkey string (e.g., "ctrl+c")
|
|
144
|
+
validate: If True, validate keys against PYAUTOGUI_VALID_KEYS
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
List of normalized key names
|
|
148
|
+
"""
|
|
149
|
+
return parse_hotkey(hotkey_str, validate=validate)
|
|
150
|
+
|
|
151
|
+
def validate_keys(self, keys: list[str]) -> None:
|
|
152
|
+
"""Validate that all keys are recognized by pyautogui.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
keys: List of key names to validate
|
|
156
|
+
|
|
157
|
+
Raises:
|
|
158
|
+
ValueError: If any key is invalid
|
|
159
|
+
"""
|
|
160
|
+
validate_keys(keys)
|
|
161
|
+
|
|
162
|
+
def _get_last_or_center(self) -> tuple[int, int]:
|
|
163
|
+
"""Get last cursor position or screen center as fallback.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Tuple of (x, y) coordinates
|
|
167
|
+
"""
|
|
168
|
+
if self._last_x is not None and self._last_y is not None:
|
|
169
|
+
return self._last_x, self._last_y
|
|
170
|
+
return self.config.sandbox_width // 2, self.config.sandbox_height // 2
|
|
171
|
+
|
|
172
|
+
def _log_error(self, message: str) -> None:
|
|
173
|
+
"""Log an error message if logger is available."""
|
|
174
|
+
if self.logger:
|
|
175
|
+
self.logger.error(message)
|
|
176
|
+
|
|
177
|
+
def _log_info(self, message: str) -> None:
|
|
178
|
+
"""Log an info message if logger is available."""
|
|
179
|
+
if self.logger:
|
|
180
|
+
self.logger.info(message)
|
|
181
|
+
|
|
182
|
+
def _log_debug(self, message: str) -> None:
|
|
183
|
+
"""Log a debug message if logger is available."""
|
|
184
|
+
if self.logger:
|
|
185
|
+
self.logger.debug(message)
|
|
186
|
+
|
|
187
|
+
def __call__(self, actions: list[T]) -> list[str]:
|
|
188
|
+
"""Convert actions to list of pyautogui command strings.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
actions: List of model-specific action objects
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
List of pyautogui command strings
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
RuntimeError: If all action conversions failed
|
|
198
|
+
"""
|
|
199
|
+
converted: list[str] = []
|
|
200
|
+
failed: list[tuple[str, str]] = []
|
|
201
|
+
skipped: list[str] = []
|
|
202
|
+
|
|
203
|
+
if not actions:
|
|
204
|
+
return converted
|
|
205
|
+
|
|
206
|
+
for action in actions:
|
|
207
|
+
try:
|
|
208
|
+
action_strings = self._convert_single_action(action)
|
|
209
|
+
|
|
210
|
+
if not action_strings:
|
|
211
|
+
# No-op action (e.g., screenshot, cursor_position)
|
|
212
|
+
action_type = getattr(action, "action_type", repr(action))
|
|
213
|
+
skipped.append(str(action_type))
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
converted.extend(action_strings)
|
|
217
|
+
|
|
218
|
+
except Exception as e:
|
|
219
|
+
action_repr = repr(action)
|
|
220
|
+
self._log_error(f"Failed to convert action: {action_repr}, error: {e}")
|
|
221
|
+
failed.append((action_repr, str(e)))
|
|
222
|
+
|
|
223
|
+
if skipped:
|
|
224
|
+
self._log_debug(f"Skipped no-op actions: {skipped}")
|
|
225
|
+
|
|
226
|
+
if not converted and actions and failed:
|
|
227
|
+
raise RuntimeError(
|
|
228
|
+
f"All action conversions failed ({len(failed)}/{len(actions)}): {failed}"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return converted
|
|
232
|
+
|
|
233
|
+
@abstractmethod
|
|
234
|
+
def _convert_single_action(self, action: T) -> list[str]:
|
|
235
|
+
"""Convert a single action to pyautogui command string(s).
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
action: Model-specific action object
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
List of pyautogui command strings (may be empty for no-op actions)
|
|
242
|
+
|
|
243
|
+
Raises:
|
|
244
|
+
ValueError: If action format is invalid
|
|
245
|
+
"""
|
|
246
|
+
...
|
|
247
|
+
|
|
248
|
+
@abstractmethod
|
|
249
|
+
def serialize_actions(self, actions: list[T]) -> list[dict[str, Any]]:
|
|
250
|
+
"""Serialize actions for trajectory logging.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
actions: List of model-specific action objects
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
List of serialized action dictionaries
|
|
257
|
+
"""
|
|
258
|
+
...
|
|
259
|
+
|
|
260
|
+
def action_string_to_step(self, action: str) -> dict[str, Any]:
|
|
261
|
+
"""Convert an action string into a step for runtime/do API.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
action: Action string (e.g., "pyautogui.click(x=100, y=200)")
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Step dict for runtime API
|
|
268
|
+
"""
|
|
269
|
+
action_str = str(action).strip()
|
|
270
|
+
|
|
271
|
+
# Special markers
|
|
272
|
+
upper = action_str.upper()
|
|
273
|
+
if upper in ["DONE", "FAIL"]:
|
|
274
|
+
return {"type": "sleep", "parameters": {"seconds": 0}}
|
|
275
|
+
|
|
276
|
+
# WAIT(seconds)
|
|
277
|
+
wait_match = re.match(
|
|
278
|
+
r"^WAIT\((?P<sec>[0-9]*\.?[0-9]+)\)$", action_str, re.IGNORECASE
|
|
279
|
+
)
|
|
280
|
+
if wait_match:
|
|
281
|
+
seconds = float(wait_match.group("sec"))
|
|
282
|
+
return {"type": "sleep", "parameters": {"seconds": seconds}}
|
|
283
|
+
|
|
284
|
+
# pyautogui code path
|
|
285
|
+
if "pyautogui" in action_str.lower():
|
|
286
|
+
return {
|
|
287
|
+
"type": "pyautogui",
|
|
288
|
+
"parameters": {"code": action_str},
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
# Default: shell command
|
|
292
|
+
return {"type": "execute", "parameters": {"command": action_str, "shell": True}}
|
oagi/converters/oagi.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) OpenAGI Foundation
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is part of the official API project.
|
|
6
|
+
# Licensed under the MIT License.
|
|
7
|
+
# -----------------------------------------------------------------------------
|
|
8
|
+
"""OAGI action converter.
|
|
9
|
+
|
|
10
|
+
This module provides the OagiActionConverter for converting OAGI actions
|
|
11
|
+
to pyautogui command strings for remote execution.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from ..handler.utils import (
|
|
17
|
+
parse_click_coords,
|
|
18
|
+
parse_drag_coords,
|
|
19
|
+
parse_scroll_coords,
|
|
20
|
+
)
|
|
21
|
+
from ..types import Action, ActionType
|
|
22
|
+
from .base import BaseActionConverter
|
|
23
|
+
|
|
24
|
+
# OAGI uses normalized 0-1000 coordinate space
|
|
25
|
+
OAGI_COORD_SIZE = 1000
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class OagiActionConverter(BaseActionConverter[Action]):
|
|
29
|
+
"""Convert OAGI actions to pyautogui command strings.
|
|
30
|
+
|
|
31
|
+
This converter handles:
|
|
32
|
+
1. Coordinate scaling from 0-1000 space to sandbox dimensions (1920x1080)
|
|
33
|
+
2. Action format conversion from OAGI Action format to pyautogui strings
|
|
34
|
+
3. Key name normalization for hotkey combinations
|
|
35
|
+
|
|
36
|
+
The output can be converted to runtime API steps via action_string_to_step().
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def coord_width(self) -> int:
|
|
41
|
+
return OAGI_COORD_SIZE
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def coord_height(self) -> int:
|
|
45
|
+
return OAGI_COORD_SIZE
|
|
46
|
+
|
|
47
|
+
def __call__(self, actions: list[Action]) -> list[str]:
|
|
48
|
+
"""Convert OAGI actions to list of pyautogui command strings.
|
|
49
|
+
|
|
50
|
+
Extends base implementation to handle action count and finish detection.
|
|
51
|
+
"""
|
|
52
|
+
converted: list[str] = []
|
|
53
|
+
failed: list[tuple[str, str]] = []
|
|
54
|
+
has_terminal = False
|
|
55
|
+
|
|
56
|
+
if not actions:
|
|
57
|
+
return converted
|
|
58
|
+
|
|
59
|
+
for action in actions:
|
|
60
|
+
# Check for duplicate finish()/fail() during iteration
|
|
61
|
+
is_terminal = action.type in (ActionType.FINISH, ActionType.FAIL)
|
|
62
|
+
if is_terminal:
|
|
63
|
+
if has_terminal:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
"Duplicate finish()/fail() detected. "
|
|
66
|
+
"Only one finish() or fail() is allowed per action sequence."
|
|
67
|
+
)
|
|
68
|
+
has_terminal = True
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
converted.extend(self._convert_action(action))
|
|
72
|
+
except Exception as e:
|
|
73
|
+
action_repr = f"{action.type.value}({action.argument})"
|
|
74
|
+
self._log_error(f"Failed to convert action: {action_repr}, error: {e}")
|
|
75
|
+
failed.append((action_repr, str(e)))
|
|
76
|
+
|
|
77
|
+
if not converted and actions and failed:
|
|
78
|
+
raise RuntimeError(
|
|
79
|
+
f"All action conversions failed ({len(failed)}/{len(actions)}): {failed}"
|
|
80
|
+
)
|
|
81
|
+
return converted
|
|
82
|
+
|
|
83
|
+
def _convert_action(self, action: Action) -> list[str]:
|
|
84
|
+
"""Convert action to list of pyautogui command strings.
|
|
85
|
+
|
|
86
|
+
Handles action.count for repeat support.
|
|
87
|
+
"""
|
|
88
|
+
count = action.count or 1
|
|
89
|
+
single_actions = self._convert_single_action(action)
|
|
90
|
+
|
|
91
|
+
# Repeat the actions count times
|
|
92
|
+
return single_actions * int(count)
|
|
93
|
+
|
|
94
|
+
def _convert_single_action(self, action: Action) -> list[str]:
|
|
95
|
+
"""Convert a single OAGI action to pyautogui command string(s)."""
|
|
96
|
+
action_type = action.type.value
|
|
97
|
+
argument = (action.argument or "").strip("()")
|
|
98
|
+
|
|
99
|
+
drag_duration = self.config.drag_duration
|
|
100
|
+
scroll_amount = self.config.scroll_amount
|
|
101
|
+
wait_duration = self.config.wait_duration
|
|
102
|
+
hotkey_interval = self.config.hotkey_interval
|
|
103
|
+
strict = self.config.strict_coordinate_validation
|
|
104
|
+
|
|
105
|
+
if action_type == ActionType.CLICK.value:
|
|
106
|
+
x, y = parse_click_coords(argument, self._coord_scaler, strict=strict)
|
|
107
|
+
return [f"pyautogui.click(x={x}, y={y})"]
|
|
108
|
+
|
|
109
|
+
if action_type == ActionType.LEFT_DOUBLE.value:
|
|
110
|
+
x, y = parse_click_coords(argument, self._coord_scaler, strict=strict)
|
|
111
|
+
return [f"pyautogui.doubleClick(x={x}, y={y})"]
|
|
112
|
+
|
|
113
|
+
if action_type == ActionType.LEFT_TRIPLE.value:
|
|
114
|
+
x, y = parse_click_coords(argument, self._coord_scaler, strict=strict)
|
|
115
|
+
return [f"pyautogui.tripleClick(x={x}, y={y})"]
|
|
116
|
+
|
|
117
|
+
if action_type == ActionType.RIGHT_SINGLE.value:
|
|
118
|
+
x, y = parse_click_coords(argument, self._coord_scaler, strict=strict)
|
|
119
|
+
return [f"pyautogui.rightClick(x={x}, y={y})"]
|
|
120
|
+
|
|
121
|
+
if action_type == ActionType.DRAG.value:
|
|
122
|
+
sx, sy, ex, ey = parse_drag_coords(
|
|
123
|
+
argument, self._coord_scaler, strict=strict
|
|
124
|
+
)
|
|
125
|
+
return [
|
|
126
|
+
f"pyautogui.moveTo({sx}, {sy})",
|
|
127
|
+
f"pyautogui.dragTo({ex}, {ey}, duration={drag_duration})",
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
if action_type == ActionType.HOTKEY.value:
|
|
131
|
+
keys = self.parse_hotkey(argument, validate=True)
|
|
132
|
+
valid_keys = [k for k in keys if k]
|
|
133
|
+
if not valid_keys:
|
|
134
|
+
raise ValueError(
|
|
135
|
+
f"Invalid hotkey format: '{argument}'. "
|
|
136
|
+
"Expected key names like 'ctrl+c', 'alt+tab'"
|
|
137
|
+
)
|
|
138
|
+
# Check if this is a caps lock key press
|
|
139
|
+
if len(valid_keys) == 1 and valid_keys[0] == "capslock":
|
|
140
|
+
if self.caps_manager.should_use_system_capslock():
|
|
141
|
+
return [f"pyautogui.hotkey('capslock', interval={hotkey_interval})"]
|
|
142
|
+
else:
|
|
143
|
+
self.caps_manager.toggle()
|
|
144
|
+
return [] # No pyautogui command for session mode
|
|
145
|
+
else:
|
|
146
|
+
keys_str = ", ".join(repr(k) for k in valid_keys)
|
|
147
|
+
return [f"pyautogui.hotkey({keys_str}, interval={hotkey_interval})"]
|
|
148
|
+
|
|
149
|
+
if action_type == ActionType.TYPE.value:
|
|
150
|
+
text = argument.strip("\"'")
|
|
151
|
+
text = self.caps_manager.transform_text(text)
|
|
152
|
+
return [f"pyautogui.typewrite({text!r})"]
|
|
153
|
+
|
|
154
|
+
if action_type == ActionType.SCROLL.value:
|
|
155
|
+
x, y, direction = parse_scroll_coords(
|
|
156
|
+
argument, self._coord_scaler, strict=strict
|
|
157
|
+
)
|
|
158
|
+
amount = scroll_amount if direction == "up" else -scroll_amount
|
|
159
|
+
return [f"pyautogui.moveTo({x}, {y})", f"pyautogui.scroll({amount})"]
|
|
160
|
+
|
|
161
|
+
if action_type == ActionType.WAIT.value:
|
|
162
|
+
try:
|
|
163
|
+
seconds = float(argument) if argument else wait_duration
|
|
164
|
+
except ValueError:
|
|
165
|
+
raise ValueError(
|
|
166
|
+
f"Invalid wait duration: '{argument}'. "
|
|
167
|
+
"Expected numeric value in seconds."
|
|
168
|
+
)
|
|
169
|
+
return [f"WAIT({seconds})"]
|
|
170
|
+
|
|
171
|
+
if action_type == ActionType.FINISH.value:
|
|
172
|
+
self._log_info("Task completion action -> DONE")
|
|
173
|
+
return ["DONE"]
|
|
174
|
+
|
|
175
|
+
if action_type == ActionType.FAIL.value:
|
|
176
|
+
self._log_info("Task infeasible action -> FAIL")
|
|
177
|
+
return ["FAIL"]
|
|
178
|
+
|
|
179
|
+
if action_type == ActionType.CALL_USER.value:
|
|
180
|
+
self._log_info("User intervention requested")
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
raise ValueError(
|
|
184
|
+
f"Unknown action type: '{action_type}'. "
|
|
185
|
+
"Supported: click, left_double, left_triple, right_single, drag, "
|
|
186
|
+
"hotkey, type, scroll, wait, finish, fail, call_user"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
def serialize_actions(self, actions: list[Action]) -> list[dict[str, Any]]:
|
|
190
|
+
"""Serialize OAGI actions for trajectory logging."""
|
|
191
|
+
return [
|
|
192
|
+
{
|
|
193
|
+
"type": action.type.value,
|
|
194
|
+
"argument": action.argument,
|
|
195
|
+
"count": action.count,
|
|
196
|
+
}
|
|
197
|
+
for action in (actions or [])
|
|
198
|
+
]
|
|
@@ -17,6 +17,7 @@ from ..constants import DEFAULT_STEP_DELAY
|
|
|
17
17
|
from ..exceptions import check_optional_dependency
|
|
18
18
|
from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
|
|
19
19
|
from .capslock_manager import CapsLockManager
|
|
20
|
+
from .utils import CoordinateScaler, normalize_key, parse_hotkey
|
|
20
21
|
|
|
21
22
|
check_optional_dependency("pyautogui", "PyautoguiActionHandler", "desktop")
|
|
22
23
|
import pyautogui # noqa: E402
|
|
@@ -92,6 +93,15 @@ class PyautoguiActionHandler:
|
|
|
92
93
|
self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
|
|
93
94
|
# The origin position of coordinates (the top-left corner of the target screen)
|
|
94
95
|
self.origin_x, self.origin_y = 0, 0
|
|
96
|
+
# Initialize coordinate scaler (OAGI uses 0-1000 normalized coordinates)
|
|
97
|
+
self._coord_scaler = CoordinateScaler(
|
|
98
|
+
source_width=1000,
|
|
99
|
+
source_height=1000,
|
|
100
|
+
target_width=self.screen_width,
|
|
101
|
+
target_height=self.screen_height,
|
|
102
|
+
origin_x=self.origin_x,
|
|
103
|
+
origin_y=self.origin_y,
|
|
104
|
+
)
|
|
95
105
|
|
|
96
106
|
def reset(self):
|
|
97
107
|
"""Reset handler state.
|
|
@@ -109,6 +119,9 @@ class PyautoguiActionHandler:
|
|
|
109
119
|
"""
|
|
110
120
|
self.screen_width, self.screen_height = screen.width, screen.height
|
|
111
121
|
self.origin_x, self.origin_y = screen.x, screen.y
|
|
122
|
+
# Update coordinate scaler
|
|
123
|
+
self._coord_scaler.set_target_size(screen.width, screen.height)
|
|
124
|
+
self._coord_scaler.set_origin(screen.x, screen.y)
|
|
112
125
|
|
|
113
126
|
def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
|
|
114
127
|
"""Convert coordinates from 0-1000 range to actual screen coordinates.
|
|
@@ -116,26 +129,7 @@ class PyautoguiActionHandler:
|
|
|
116
129
|
Also handles corner coordinates to prevent PyAutoGUI fail-safe trigger.
|
|
117
130
|
Corner coordinates (0,0), (0,max), (max,0), (max,max) are offset by 1 pixel.
|
|
118
131
|
"""
|
|
119
|
-
|
|
120
|
-
screen_y = int(y * self.screen_height / 1000)
|
|
121
|
-
|
|
122
|
-
# Prevent fail-safe by adjusting corner coordinates
|
|
123
|
-
# Check if coordinates are at screen corners (with small tolerance)
|
|
124
|
-
if screen_x < 1:
|
|
125
|
-
screen_x = 1
|
|
126
|
-
elif screen_x > self.screen_width - 1:
|
|
127
|
-
screen_x = self.screen_width - 1
|
|
128
|
-
|
|
129
|
-
if screen_y < 1:
|
|
130
|
-
screen_y = 1
|
|
131
|
-
elif screen_y > self.screen_height - 1:
|
|
132
|
-
screen_y = self.screen_height - 1
|
|
133
|
-
|
|
134
|
-
# Add origin offset to convert relative to top-left corner
|
|
135
|
-
screen_x += self.origin_x
|
|
136
|
-
screen_y += self.origin_y
|
|
137
|
-
|
|
138
|
-
return screen_x, screen_y
|
|
132
|
+
return self._coord_scaler.scale(x, y, prevent_failsafe=True)
|
|
139
133
|
|
|
140
134
|
def _parse_coords(self, args_str: str) -> tuple[int, int]:
|
|
141
135
|
"""Extract x, y coordinates from argument string."""
|
|
@@ -163,28 +157,15 @@ class PyautoguiActionHandler:
|
|
|
163
157
|
|
|
164
158
|
def _normalize_key(self, key: str) -> str:
|
|
165
159
|
"""Normalize key names for consistency."""
|
|
166
|
-
key =
|
|
167
|
-
# Normalize caps lock variations
|
|
168
|
-
hotkey_variations_mapping = {
|
|
169
|
-
"capslock": ["caps_lock", "caps", "capslock"],
|
|
170
|
-
"pgup": ["page_up", "pageup"],
|
|
171
|
-
"pgdn": ["page_down", "pagedown"],
|
|
172
|
-
}
|
|
173
|
-
for normalized, variations in hotkey_variations_mapping.items():
|
|
174
|
-
if key in variations:
|
|
175
|
-
return normalized
|
|
176
|
-
# Remap ctrl to command on macOS if enabled
|
|
177
|
-
if self.config.macos_ctrl_to_cmd and sys.platform == "darwin" and key == "ctrl":
|
|
178
|
-
return "command"
|
|
179
|
-
return key
|
|
160
|
+
return normalize_key(key, macos_ctrl_to_cmd=self.config.macos_ctrl_to_cmd)
|
|
180
161
|
|
|
181
162
|
def _parse_hotkey(self, args_str: str) -> list[str]:
|
|
182
163
|
"""Parse hotkey string into list of keys."""
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
164
|
+
return parse_hotkey(
|
|
165
|
+
args_str,
|
|
166
|
+
macos_ctrl_to_cmd=self.config.macos_ctrl_to_cmd,
|
|
167
|
+
validate=False, # Don't validate, let pyautogui handle invalid keys
|
|
168
|
+
)
|
|
188
169
|
|
|
189
170
|
def _move_and_wait(self, x: int, y: int) -> None:
|
|
190
171
|
"""Move cursor to position and wait before clicking."""
|
|
@@ -269,8 +250,8 @@ class PyautoguiActionHandler:
|
|
|
269
250
|
)
|
|
270
251
|
pyautogui.scroll(scroll_amount)
|
|
271
252
|
|
|
272
|
-
case ActionType.FINISH:
|
|
273
|
-
# Task completion - reset handler state
|
|
253
|
+
case ActionType.FINISH | ActionType.FAIL:
|
|
254
|
+
# Task completion or infeasible - reset handler state
|
|
274
255
|
self.reset()
|
|
275
256
|
|
|
276
257
|
case ActionType.WAIT:
|