oagi-core 0.14.1__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oagi/__init__.py +10 -0
- oagi/converters/__init__.py +56 -0
- oagi/converters/base.py +292 -0
- oagi/converters/oagi.py +198 -0
- oagi/handler/pyautogui_action_handler.py +22 -41
- oagi/handler/utils.py +587 -0
- oagi/handler/ydotool_action_handler.py +22 -43
- oagi/server/socketio_server.py +1 -1
- oagi/types/models/action.py +1 -0
- oagi/utils/output_parser.py +2 -1
- oagi/utils/prompt_builder.py +1 -0
- {oagi_core-0.14.1.dist-info → oagi_core-0.15.0.dist-info}/METADATA +1 -1
- {oagi_core-0.14.1.dist-info → oagi_core-0.15.0.dist-info}/RECORD +16 -13
- {oagi_core-0.14.1.dist-info → oagi_core-0.15.0.dist-info}/WHEEL +0 -0
- {oagi_core-0.14.1.dist-info → oagi_core-0.15.0.dist-info}/entry_points.txt +0 -0
- {oagi_core-0.14.1.dist-info → oagi_core-0.15.0.dist-info}/licenses/LICENSE +0 -0
oagi/handler/utils.py
CHANGED
|
@@ -5,6 +5,593 @@
|
|
|
5
5
|
# This file is part of the official API project.
|
|
6
6
|
# Licensed under the MIT License.
|
|
7
7
|
# -----------------------------------------------------------------------------
|
|
8
|
+
"""Shared utilities for action handling and conversion.
|
|
9
|
+
|
|
10
|
+
This module provides common functionality used by both PyautoguiActionHandler
|
|
11
|
+
(for local execution) and action converters (for remote execution).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
# =============================================================================
|
|
17
|
+
# Key Normalization Mapping
|
|
18
|
+
# =============================================================================
|
|
19
|
+
|
|
20
|
+
# Minimal key mapping - only normalizes common variations to pyautogui names
|
|
21
|
+
# Matches original PyautoguiActionHandler.hotkey_variations_mapping behavior exactly:
|
|
22
|
+
# "capslock": ["caps_lock", "caps", "capslock"] -> capslock
|
|
23
|
+
# "pgup": ["page_up", "pageup"] -> pgup
|
|
24
|
+
# "pgdn": ["page_down", "pagedown"] -> pgdn
|
|
25
|
+
KEY_MAP: dict[str, str] = {
|
|
26
|
+
# Caps lock variations -> capslock
|
|
27
|
+
"caps_lock": "capslock",
|
|
28
|
+
"caps": "capslock",
|
|
29
|
+
# Page up variations -> pgup (short form, matching original)
|
|
30
|
+
"page_up": "pgup",
|
|
31
|
+
"pageup": "pgup",
|
|
32
|
+
# Page down variations -> pgdn (short form, matching original)
|
|
33
|
+
"page_down": "pgdn",
|
|
34
|
+
"pagedown": "pgdn",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# Valid pyautogui key names
|
|
38
|
+
PYAUTOGUI_VALID_KEYS: frozenset[str] = frozenset(
|
|
39
|
+
{
|
|
40
|
+
# Alphabet keys
|
|
41
|
+
"a",
|
|
42
|
+
"b",
|
|
43
|
+
"c",
|
|
44
|
+
"d",
|
|
45
|
+
"e",
|
|
46
|
+
"f",
|
|
47
|
+
"g",
|
|
48
|
+
"h",
|
|
49
|
+
"i",
|
|
50
|
+
"j",
|
|
51
|
+
"k",
|
|
52
|
+
"l",
|
|
53
|
+
"m",
|
|
54
|
+
"n",
|
|
55
|
+
"o",
|
|
56
|
+
"p",
|
|
57
|
+
"q",
|
|
58
|
+
"r",
|
|
59
|
+
"s",
|
|
60
|
+
"t",
|
|
61
|
+
"u",
|
|
62
|
+
"v",
|
|
63
|
+
"w",
|
|
64
|
+
"x",
|
|
65
|
+
"y",
|
|
66
|
+
"z",
|
|
67
|
+
# Number keys
|
|
68
|
+
"0",
|
|
69
|
+
"1",
|
|
70
|
+
"2",
|
|
71
|
+
"3",
|
|
72
|
+
"4",
|
|
73
|
+
"5",
|
|
74
|
+
"6",
|
|
75
|
+
"7",
|
|
76
|
+
"8",
|
|
77
|
+
"9",
|
|
78
|
+
# Function keys
|
|
79
|
+
"f1",
|
|
80
|
+
"f2",
|
|
81
|
+
"f3",
|
|
82
|
+
"f4",
|
|
83
|
+
"f5",
|
|
84
|
+
"f6",
|
|
85
|
+
"f7",
|
|
86
|
+
"f8",
|
|
87
|
+
"f9",
|
|
88
|
+
"f10",
|
|
89
|
+
"f11",
|
|
90
|
+
"f12",
|
|
91
|
+
"f13",
|
|
92
|
+
"f14",
|
|
93
|
+
"f15",
|
|
94
|
+
"f16",
|
|
95
|
+
"f17",
|
|
96
|
+
"f18",
|
|
97
|
+
"f19",
|
|
98
|
+
"f20",
|
|
99
|
+
"f21",
|
|
100
|
+
"f22",
|
|
101
|
+
"f23",
|
|
102
|
+
"f24",
|
|
103
|
+
# Navigation keys
|
|
104
|
+
"up",
|
|
105
|
+
"down",
|
|
106
|
+
"left",
|
|
107
|
+
"right",
|
|
108
|
+
"home",
|
|
109
|
+
"end",
|
|
110
|
+
"pageup",
|
|
111
|
+
"pagedown",
|
|
112
|
+
"pgup",
|
|
113
|
+
"pgdn",
|
|
114
|
+
# Editing keys
|
|
115
|
+
"backspace",
|
|
116
|
+
"delete",
|
|
117
|
+
"del",
|
|
118
|
+
"insert",
|
|
119
|
+
"enter",
|
|
120
|
+
"return",
|
|
121
|
+
"tab",
|
|
122
|
+
"space",
|
|
123
|
+
# Modifier keys (with left/right variants)
|
|
124
|
+
"shift",
|
|
125
|
+
"shiftleft",
|
|
126
|
+
"shiftright",
|
|
127
|
+
"ctrl",
|
|
128
|
+
"ctrlleft",
|
|
129
|
+
"ctrlright",
|
|
130
|
+
"alt",
|
|
131
|
+
"altleft",
|
|
132
|
+
"altright",
|
|
133
|
+
"option",
|
|
134
|
+
"optionleft",
|
|
135
|
+
"optionright",
|
|
136
|
+
"command",
|
|
137
|
+
"win",
|
|
138
|
+
"winleft",
|
|
139
|
+
"winright",
|
|
140
|
+
"fn",
|
|
141
|
+
# Lock keys
|
|
142
|
+
"capslock",
|
|
143
|
+
"numlock",
|
|
144
|
+
"scrolllock",
|
|
145
|
+
# Special keys
|
|
146
|
+
"esc",
|
|
147
|
+
"escape",
|
|
148
|
+
"pause",
|
|
149
|
+
"printscreen",
|
|
150
|
+
"prtsc",
|
|
151
|
+
"prtscr",
|
|
152
|
+
"prntscrn",
|
|
153
|
+
"print",
|
|
154
|
+
"apps",
|
|
155
|
+
"clear",
|
|
156
|
+
"sleep",
|
|
157
|
+
# Symbols
|
|
158
|
+
"!",
|
|
159
|
+
"@",
|
|
160
|
+
"#",
|
|
161
|
+
"$",
|
|
162
|
+
"%",
|
|
163
|
+
"^",
|
|
164
|
+
"&",
|
|
165
|
+
"*",
|
|
166
|
+
"(",
|
|
167
|
+
")",
|
|
168
|
+
"-",
|
|
169
|
+
"_",
|
|
170
|
+
"=",
|
|
171
|
+
"+",
|
|
172
|
+
"[",
|
|
173
|
+
"]",
|
|
174
|
+
"{",
|
|
175
|
+
"}",
|
|
176
|
+
"\\",
|
|
177
|
+
"|",
|
|
178
|
+
";",
|
|
179
|
+
":",
|
|
180
|
+
"'",
|
|
181
|
+
'"',
|
|
182
|
+
",",
|
|
183
|
+
".",
|
|
184
|
+
"<",
|
|
185
|
+
">",
|
|
186
|
+
"/",
|
|
187
|
+
"?",
|
|
188
|
+
"`",
|
|
189
|
+
"~",
|
|
190
|
+
# Numpad keys
|
|
191
|
+
"num0",
|
|
192
|
+
"num1",
|
|
193
|
+
"num2",
|
|
194
|
+
"num3",
|
|
195
|
+
"num4",
|
|
196
|
+
"num5",
|
|
197
|
+
"num6",
|
|
198
|
+
"num7",
|
|
199
|
+
"num8",
|
|
200
|
+
"num9",
|
|
201
|
+
"divide",
|
|
202
|
+
"multiply",
|
|
203
|
+
"subtract",
|
|
204
|
+
"add",
|
|
205
|
+
"decimal",
|
|
206
|
+
# Media keys
|
|
207
|
+
"volumeup",
|
|
208
|
+
"volumedown",
|
|
209
|
+
"volumemute",
|
|
210
|
+
"playpause",
|
|
211
|
+
"stop",
|
|
212
|
+
"nexttrack",
|
|
213
|
+
"prevtrack",
|
|
214
|
+
# Browser keys
|
|
215
|
+
"browserback",
|
|
216
|
+
"browserforward",
|
|
217
|
+
"browserrefresh",
|
|
218
|
+
"browserstop",
|
|
219
|
+
"browsersearch",
|
|
220
|
+
"browserfavorites",
|
|
221
|
+
"browserhome",
|
|
222
|
+
# Application launch keys
|
|
223
|
+
"launchapp1",
|
|
224
|
+
"launchapp2",
|
|
225
|
+
"launchmail",
|
|
226
|
+
"launchmediaselect",
|
|
227
|
+
}
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
# =============================================================================
|
|
232
|
+
# Coordinate Scaling
|
|
233
|
+
# =============================================================================
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class CoordinateScaler:
|
|
237
|
+
"""Handles coordinate scaling between different coordinate systems.
|
|
238
|
+
|
|
239
|
+
This class provides reusable coordinate transformation logic used by both
|
|
240
|
+
PyautoguiActionHandler (local execution) and action converters (remote execution).
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
source_width: Width of the source coordinate space (e.g., 1000 for OAGI)
|
|
244
|
+
source_height: Height of the source coordinate space
|
|
245
|
+
target_width: Width of the target coordinate space (e.g., screen width)
|
|
246
|
+
target_height: Height of the target coordinate space
|
|
247
|
+
origin_x: X offset of the target coordinate origin (for multi-monitor)
|
|
248
|
+
origin_y: Y offset of the target coordinate origin (for multi-monitor)
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
def __init__(
|
|
252
|
+
self,
|
|
253
|
+
source_width: int,
|
|
254
|
+
source_height: int,
|
|
255
|
+
target_width: int,
|
|
256
|
+
target_height: int,
|
|
257
|
+
origin_x: int = 0,
|
|
258
|
+
origin_y: int = 0,
|
|
259
|
+
):
|
|
260
|
+
self.source_width = source_width
|
|
261
|
+
self.source_height = source_height
|
|
262
|
+
self.target_width = target_width
|
|
263
|
+
self.target_height = target_height
|
|
264
|
+
self.origin_x = origin_x
|
|
265
|
+
self.origin_y = origin_y
|
|
266
|
+
self.scale_x = target_width / source_width
|
|
267
|
+
self.scale_y = target_height / source_height
|
|
268
|
+
|
|
269
|
+
def scale(
|
|
270
|
+
self,
|
|
271
|
+
x: int | float,
|
|
272
|
+
y: int | float,
|
|
273
|
+
*,
|
|
274
|
+
clamp: bool = True,
|
|
275
|
+
prevent_failsafe: bool = False,
|
|
276
|
+
strict: bool = False,
|
|
277
|
+
) -> tuple[int, int]:
|
|
278
|
+
"""Scale coordinates from source to target space.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
x: X coordinate in source space
|
|
282
|
+
y: Y coordinate in source space
|
|
283
|
+
clamp: If True, clamp to valid target range
|
|
284
|
+
prevent_failsafe: If True, offset corner coordinates by 1 pixel
|
|
285
|
+
(prevents PyAutoGUI fail-safe trigger)
|
|
286
|
+
strict: If True, raise ValueError when coordinates are outside
|
|
287
|
+
valid source range [0, source_width] x [0, source_height]
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Tuple of (target_x, target_y) in target coordinate space
|
|
291
|
+
|
|
292
|
+
Raises:
|
|
293
|
+
ValueError: If strict=True and coordinates are outside valid range
|
|
294
|
+
"""
|
|
295
|
+
# Strict validation: check if coordinates are in valid source range
|
|
296
|
+
if strict:
|
|
297
|
+
if x < 0 or x > self.source_width:
|
|
298
|
+
raise ValueError(
|
|
299
|
+
f"x coordinate {x} out of valid range [0, {self.source_width}]. "
|
|
300
|
+
f"Coordinates must be normalized between 0 and {self.source_width}."
|
|
301
|
+
)
|
|
302
|
+
if y < 0 or y > self.source_height:
|
|
303
|
+
raise ValueError(
|
|
304
|
+
f"y coordinate {y} out of valid range [0, {self.source_height}]. "
|
|
305
|
+
f"Coordinates must be normalized between 0 and {self.source_height}."
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
scaled_x = round(x * self.scale_x)
|
|
309
|
+
scaled_y = round(y * self.scale_y)
|
|
310
|
+
|
|
311
|
+
if clamp:
|
|
312
|
+
# Clamp to valid range
|
|
313
|
+
scaled_x = max(0, min(scaled_x, self.target_width - 1))
|
|
314
|
+
scaled_y = max(0, min(scaled_y, self.target_height - 1))
|
|
315
|
+
|
|
316
|
+
if prevent_failsafe:
|
|
317
|
+
# Prevent PyAutoGUI fail-safe by adjusting corner coordinates
|
|
318
|
+
if scaled_x < 1:
|
|
319
|
+
scaled_x = 1
|
|
320
|
+
elif scaled_x > self.target_width - 2:
|
|
321
|
+
scaled_x = self.target_width - 2
|
|
322
|
+
if scaled_y < 1:
|
|
323
|
+
scaled_y = 1
|
|
324
|
+
elif scaled_y > self.target_height - 2:
|
|
325
|
+
scaled_y = self.target_height - 2
|
|
326
|
+
|
|
327
|
+
# Add origin offset (for multi-monitor support)
|
|
328
|
+
return scaled_x + self.origin_x, scaled_y + self.origin_y
|
|
329
|
+
|
|
330
|
+
def set_origin(self, origin_x: int, origin_y: int) -> None:
|
|
331
|
+
"""Update the origin offset."""
|
|
332
|
+
self.origin_x = origin_x
|
|
333
|
+
self.origin_y = origin_y
|
|
334
|
+
|
|
335
|
+
def set_target_size(self, width: int, height: int) -> None:
|
|
336
|
+
"""Update the target size and recalculate scale factors."""
|
|
337
|
+
self.target_width = width
|
|
338
|
+
self.target_height = height
|
|
339
|
+
self.scale_x = width / self.source_width
|
|
340
|
+
self.scale_y = height / self.source_height
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
# =============================================================================
|
|
344
|
+
# Key Normalization Functions
|
|
345
|
+
# =============================================================================
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def normalize_key(key: str, *, macos_ctrl_to_cmd: bool = False) -> str:
|
|
349
|
+
"""Normalize a key name to pyautogui format.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
key: Key name to normalize (e.g., "ctrl", "Control", "page_down")
|
|
353
|
+
macos_ctrl_to_cmd: If True and on macOS, remap 'ctrl' to 'command'
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
Normalized key name (e.g., "ctrl", "pagedown")
|
|
357
|
+
"""
|
|
358
|
+
key = key.strip().lower()
|
|
359
|
+
normalized = KEY_MAP.get(key, key)
|
|
360
|
+
|
|
361
|
+
# Remap ctrl to command on macOS if enabled
|
|
362
|
+
if macos_ctrl_to_cmd and sys.platform == "darwin" and normalized == "ctrl":
|
|
363
|
+
return "command"
|
|
364
|
+
|
|
365
|
+
return normalized
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def parse_hotkey(
|
|
369
|
+
hotkey_str: str,
|
|
370
|
+
*,
|
|
371
|
+
macos_ctrl_to_cmd: bool = False,
|
|
372
|
+
validate: bool = True,
|
|
373
|
+
) -> list[str]:
|
|
374
|
+
"""Parse a hotkey string into a list of normalized key names.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
hotkey_str: Hotkey string (e.g., "ctrl+c", "alt, tab", "Shift+Enter")
|
|
378
|
+
macos_ctrl_to_cmd: If True and on macOS, remap 'ctrl' to 'command'
|
|
379
|
+
validate: If True, validate keys against PYAUTOGUI_VALID_KEYS
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
List of normalized key names (e.g., ["ctrl", "c"])
|
|
383
|
+
|
|
384
|
+
Raises:
|
|
385
|
+
ValueError: If validate=True and any key is invalid
|
|
386
|
+
"""
|
|
387
|
+
# Remove parentheses if present
|
|
388
|
+
hotkey_str = hotkey_str.strip("()")
|
|
389
|
+
|
|
390
|
+
# Split by '+' or ',' to get individual keys
|
|
391
|
+
if "+" in hotkey_str:
|
|
392
|
+
keys = [
|
|
393
|
+
normalize_key(k, macos_ctrl_to_cmd=macos_ctrl_to_cmd)
|
|
394
|
+
for k in hotkey_str.split("+")
|
|
395
|
+
]
|
|
396
|
+
else:
|
|
397
|
+
keys = [
|
|
398
|
+
normalize_key(k, macos_ctrl_to_cmd=macos_ctrl_to_cmd)
|
|
399
|
+
for k in hotkey_str.split(",")
|
|
400
|
+
]
|
|
401
|
+
|
|
402
|
+
# Filter empty strings
|
|
403
|
+
keys = [k for k in keys if k]
|
|
404
|
+
|
|
405
|
+
if validate:
|
|
406
|
+
validate_keys(keys)
|
|
407
|
+
|
|
408
|
+
return keys
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def validate_keys(keys: list[str]) -> None:
|
|
412
|
+
"""Validate that all keys are recognized by pyautogui.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
keys: List of normalized key names
|
|
416
|
+
|
|
417
|
+
Raises:
|
|
418
|
+
ValueError: If any key is invalid, with helpful suggestions
|
|
419
|
+
"""
|
|
420
|
+
invalid_keys = [k for k in keys if k and k not in PYAUTOGUI_VALID_KEYS]
|
|
421
|
+
|
|
422
|
+
if invalid_keys:
|
|
423
|
+
suggestions = []
|
|
424
|
+
for invalid_key in invalid_keys:
|
|
425
|
+
if invalid_key in ("ret",):
|
|
426
|
+
suggestions.append(f"'{invalid_key}' -> use 'enter' or 'return'")
|
|
427
|
+
elif invalid_key.startswith("num") and len(invalid_key) > 3:
|
|
428
|
+
suggestions.append(
|
|
429
|
+
f"'{invalid_key}' -> numpad keys use format 'num0'-'num9'"
|
|
430
|
+
)
|
|
431
|
+
else:
|
|
432
|
+
suggestions.append(f"'{invalid_key}' is not a valid key name")
|
|
433
|
+
|
|
434
|
+
error_msg = "Invalid key name(s) in hotkey: " + ", ".join(suggestions)
|
|
435
|
+
valid_sample = ", ".join(sorted(list(PYAUTOGUI_VALID_KEYS)[:30]))
|
|
436
|
+
error_msg += f"\n\nValid keys include: {valid_sample}... (and more)"
|
|
437
|
+
raise ValueError(error_msg)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
# =============================================================================
|
|
441
|
+
# Coordinate Parsing Functions
|
|
442
|
+
# =============================================================================
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def parse_click_coords(
|
|
446
|
+
argument: str,
|
|
447
|
+
scaler: CoordinateScaler,
|
|
448
|
+
*,
|
|
449
|
+
prevent_failsafe: bool = False,
|
|
450
|
+
strict: bool = False,
|
|
451
|
+
) -> tuple[int, int]:
|
|
452
|
+
"""Parse click coordinates from argument string.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
argument: Coordinate string in format "x, y"
|
|
456
|
+
scaler: CoordinateScaler instance for coordinate transformation
|
|
457
|
+
prevent_failsafe: If True, offset corner coordinates
|
|
458
|
+
strict: If True, raise ValueError for out-of-range coordinates
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Tuple of (x, y) in target coordinate space
|
|
462
|
+
|
|
463
|
+
Raises:
|
|
464
|
+
ValueError: If coordinate format is invalid or (strict=True) out of range
|
|
465
|
+
"""
|
|
466
|
+
# Check for common format errors
|
|
467
|
+
if " and " in argument.lower() or " then " in argument.lower():
|
|
468
|
+
raise ValueError(
|
|
469
|
+
f"Invalid click format: '{argument}'. "
|
|
470
|
+
"Cannot combine multiple actions with 'and' or 'then'."
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
parts = argument.split(",") if argument else []
|
|
474
|
+
if len(parts) < 2:
|
|
475
|
+
raise ValueError(
|
|
476
|
+
f"Invalid click coordinate format: '{argument}'. "
|
|
477
|
+
"Expected 'x, y' (comma-separated numeric values)"
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
try:
|
|
481
|
+
x = float(parts[0].strip())
|
|
482
|
+
y = float(parts[1].strip())
|
|
483
|
+
return scaler.scale(x, y, prevent_failsafe=prevent_failsafe, strict=strict)
|
|
484
|
+
except (ValueError, IndexError) as e:
|
|
485
|
+
raise ValueError(
|
|
486
|
+
f"Failed to parse click coords '{argument}': {e}. "
|
|
487
|
+
"Coordinates must be comma-separated numeric values."
|
|
488
|
+
) from e
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def parse_drag_coords(
|
|
492
|
+
argument: str,
|
|
493
|
+
scaler: CoordinateScaler,
|
|
494
|
+
*,
|
|
495
|
+
prevent_failsafe: bool = False,
|
|
496
|
+
strict: bool = False,
|
|
497
|
+
) -> tuple[int, int, int, int]:
|
|
498
|
+
"""Parse drag coordinates from argument string.
|
|
499
|
+
|
|
500
|
+
Args:
|
|
501
|
+
argument: Coordinate string in format "x1, y1, x2, y2"
|
|
502
|
+
scaler: CoordinateScaler instance for coordinate transformation
|
|
503
|
+
prevent_failsafe: If True, offset corner coordinates
|
|
504
|
+
strict: If True, raise ValueError for out-of-range coordinates
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
Tuple of (x1, y1, x2, y2) in target coordinate space
|
|
508
|
+
|
|
509
|
+
Raises:
|
|
510
|
+
ValueError: If coordinate format is invalid or (strict=True) out of range
|
|
511
|
+
"""
|
|
512
|
+
# Check for common format errors
|
|
513
|
+
if " and " in argument.lower() or " then " in argument.lower():
|
|
514
|
+
raise ValueError(
|
|
515
|
+
f"Invalid drag format: '{argument}'. "
|
|
516
|
+
"Cannot combine multiple actions with 'and' or 'then'."
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
parts = argument.split(",") if argument else []
|
|
520
|
+
if len(parts) != 4:
|
|
521
|
+
raise ValueError(
|
|
522
|
+
f"Invalid drag coordinate format: '{argument}'. "
|
|
523
|
+
"Expected 'x1, y1, x2, y2' (4 comma-separated numeric values)"
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
try:
|
|
527
|
+
sx = float(parts[0].strip())
|
|
528
|
+
sy = float(parts[1].strip())
|
|
529
|
+
ex = float(parts[2].strip())
|
|
530
|
+
ey = float(parts[3].strip())
|
|
531
|
+
x1, y1 = scaler.scale(sx, sy, prevent_failsafe=prevent_failsafe, strict=strict)
|
|
532
|
+
x2, y2 = scaler.scale(ex, ey, prevent_failsafe=prevent_failsafe, strict=strict)
|
|
533
|
+
return x1, y1, x2, y2
|
|
534
|
+
except (ValueError, IndexError) as e:
|
|
535
|
+
raise ValueError(
|
|
536
|
+
f"Failed to parse drag coords '{argument}': {e}. "
|
|
537
|
+
"Coordinates must be comma-separated numeric values."
|
|
538
|
+
) from e
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def parse_scroll_coords(
|
|
542
|
+
argument: str,
|
|
543
|
+
scaler: CoordinateScaler,
|
|
544
|
+
*,
|
|
545
|
+
prevent_failsafe: bool = False,
|
|
546
|
+
strict: bool = False,
|
|
547
|
+
) -> tuple[int, int, str]:
|
|
548
|
+
"""Parse scroll coordinates and direction from argument string.
|
|
549
|
+
|
|
550
|
+
Args:
|
|
551
|
+
argument: Scroll string in format "x, y, direction"
|
|
552
|
+
scaler: CoordinateScaler instance for coordinate transformation
|
|
553
|
+
prevent_failsafe: If True, offset corner coordinates
|
|
554
|
+
strict: If True, raise ValueError for out-of-range coordinates
|
|
555
|
+
|
|
556
|
+
Returns:
|
|
557
|
+
Tuple of (x, y, direction) where direction is 'up' or 'down'
|
|
558
|
+
|
|
559
|
+
Raises:
|
|
560
|
+
ValueError: If format is invalid or (strict=True) coordinates out of range
|
|
561
|
+
"""
|
|
562
|
+
parts = [p.strip() for p in argument.split(",")]
|
|
563
|
+
if len(parts) != 3:
|
|
564
|
+
raise ValueError(
|
|
565
|
+
f"Invalid scroll format: '{argument}'. "
|
|
566
|
+
"Expected 'x, y, direction' (e.g., '500, 300, up')"
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
try:
|
|
570
|
+
x = float(parts[0])
|
|
571
|
+
y = float(parts[1])
|
|
572
|
+
direction = parts[2].lower()
|
|
573
|
+
|
|
574
|
+
if direction not in ("up", "down"):
|
|
575
|
+
raise ValueError(
|
|
576
|
+
f"Invalid scroll direction: '{direction}'. Use 'up' or 'down'."
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
scaled_x, scaled_y = scaler.scale(
|
|
580
|
+
x, y, prevent_failsafe=prevent_failsafe, strict=strict
|
|
581
|
+
)
|
|
582
|
+
return scaled_x, scaled_y, direction
|
|
583
|
+
except (ValueError, IndexError) as e:
|
|
584
|
+
if "scroll direction" in str(e):
|
|
585
|
+
raise
|
|
586
|
+
raise ValueError(
|
|
587
|
+
f"Failed to parse scroll coords '{argument}': {e}. "
|
|
588
|
+
"Format: 'x, y, direction'"
|
|
589
|
+
) from e
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
# =============================================================================
|
|
593
|
+
# Handler Utility Functions
|
|
594
|
+
# =============================================================================
|
|
8
595
|
|
|
9
596
|
|
|
10
597
|
def reset_handler(handler) -> None:
|
|
@@ -15,6 +15,7 @@ from oagi.handler.screen_manager import Screen
|
|
|
15
15
|
from ..constants import DEFAULT_STEP_DELAY
|
|
16
16
|
from ..types import Action, ActionType, parse_coords, parse_drag_coords, parse_scroll
|
|
17
17
|
from .capslock_manager import CapsLockManager
|
|
18
|
+
from .utils import CoordinateScaler, normalize_key, parse_hotkey
|
|
18
19
|
from .wayland_support import Ydotool, get_screen_size
|
|
19
20
|
|
|
20
21
|
|
|
@@ -73,6 +74,13 @@ class YdotoolActionHandler(Ydotool):
|
|
|
73
74
|
self.caps_manager = CapsLockManager(mode=self.config.capslock_mode)
|
|
74
75
|
# The origin position of coordinates (the top-left corner of the screen)
|
|
75
76
|
self.origin_x, self.origin_y = 0, 0
|
|
77
|
+
# Initialize coordinate scaler
|
|
78
|
+
self._coord_scaler = CoordinateScaler(
|
|
79
|
+
source_width=1000,
|
|
80
|
+
source_height=1000,
|
|
81
|
+
target_width=self.screen_width,
|
|
82
|
+
target_height=self.screen_height,
|
|
83
|
+
)
|
|
76
84
|
|
|
77
85
|
def reset(self):
|
|
78
86
|
"""Reset handler state.
|
|
@@ -90,6 +98,12 @@ class YdotoolActionHandler(Ydotool):
|
|
|
90
98
|
"""
|
|
91
99
|
self.screen_width, self.screen_height = screen.width, screen.height
|
|
92
100
|
self.origin_x, self.origin_y = screen.x, screen.y
|
|
101
|
+
self._coord_scaler = CoordinateScaler(
|
|
102
|
+
source_width=1000,
|
|
103
|
+
source_height=1000,
|
|
104
|
+
target_width=self.screen_width,
|
|
105
|
+
target_height=self.screen_height,
|
|
106
|
+
)
|
|
93
107
|
|
|
94
108
|
def _execute_action(self, action: Action) -> bool:
|
|
95
109
|
"""
|
|
@@ -150,8 +164,8 @@ class YdotoolActionHandler(Ydotool):
|
|
|
150
164
|
text = self.caps_manager.transform_text(text)
|
|
151
165
|
self._run_ydotool(["type", text], count=count)
|
|
152
166
|
|
|
153
|
-
case ActionType.FINISH:
|
|
154
|
-
# Task completion - reset handler state
|
|
167
|
+
case ActionType.FINISH | ActionType.FAIL:
|
|
168
|
+
# Task completion or infeasible - reset handler state
|
|
155
169
|
self.reset()
|
|
156
170
|
|
|
157
171
|
case ActionType.WAIT:
|
|
@@ -168,45 +182,14 @@ class YdotoolActionHandler(Ydotool):
|
|
|
168
182
|
return finished
|
|
169
183
|
|
|
170
184
|
def _denormalize_coords(self, x: float, y: float) -> tuple[int, int]:
|
|
171
|
-
"""Convert coordinates from 0-1000 range to actual screen coordinates.
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
"""
|
|
176
|
-
screen_x = int(x * self.screen_width / 1000)
|
|
177
|
-
screen_y = int(y * self.screen_height / 1000)
|
|
178
|
-
|
|
179
|
-
# Prevent fail-safe by adjusting corner coordinates
|
|
180
|
-
# Check if coordinates are at screen corners (with small tolerance)
|
|
181
|
-
if screen_x < 1:
|
|
182
|
-
screen_x = 1
|
|
183
|
-
elif screen_x > self.screen_width - 1:
|
|
184
|
-
screen_x = self.screen_width - 1
|
|
185
|
-
|
|
186
|
-
if screen_y < 1:
|
|
187
|
-
screen_y = 1
|
|
188
|
-
elif screen_y > self.screen_height - 1:
|
|
189
|
-
screen_y = self.screen_height - 1
|
|
190
|
-
|
|
191
|
-
# Add origin offset to convert relative to top-left corner
|
|
192
|
-
screen_x += self.origin_x
|
|
193
|
-
screen_y += self.origin_y
|
|
194
|
-
|
|
195
|
-
return screen_x, screen_y
|
|
185
|
+
"""Convert coordinates from 0-1000 range to actual screen coordinates."""
|
|
186
|
+
screen_x, screen_y = self._coord_scaler.scale(x, y, prevent_failsafe=True)
|
|
187
|
+
# Add origin offset for multi-screen support
|
|
188
|
+
return screen_x + self.origin_x, screen_y + self.origin_y
|
|
196
189
|
|
|
197
190
|
def _normalize_key(self, key: str) -> str:
|
|
198
191
|
"""Normalize key names for consistency."""
|
|
199
|
-
|
|
200
|
-
# Normalize caps lock variations
|
|
201
|
-
hotkey_variations_mapping = {
|
|
202
|
-
"capslock": ["caps_lock", "caps", "capslock"],
|
|
203
|
-
"pgup": ["page_up", "pageup"],
|
|
204
|
-
"pgdn": ["page_down", "pagedown"],
|
|
205
|
-
}
|
|
206
|
-
for normalized, variations in hotkey_variations_mapping.items():
|
|
207
|
-
if key in variations:
|
|
208
|
-
return normalized
|
|
209
|
-
return key
|
|
192
|
+
return normalize_key(key)
|
|
210
193
|
|
|
211
194
|
def _parse_coords(self, args_str: str) -> tuple[int, int]:
|
|
212
195
|
"""Extract x, y coordinates from argument string."""
|
|
@@ -234,11 +217,7 @@ class YdotoolActionHandler(Ydotool):
|
|
|
234
217
|
|
|
235
218
|
def _parse_hotkey(self, args_str: str) -> list[str]:
|
|
236
219
|
"""Parse hotkey string into list of keys."""
|
|
237
|
-
|
|
238
|
-
args_str = args_str.strip("()")
|
|
239
|
-
# Split by '+' to get individual keys
|
|
240
|
-
keys = [self._normalize_key(key) for key in args_str.split("+")]
|
|
241
|
-
return keys
|
|
220
|
+
return parse_hotkey(args_str.strip("()"), validate=False)
|
|
242
221
|
|
|
243
222
|
def __call__(self, actions: list[Action]) -> None:
|
|
244
223
|
"""Execute the provided list of actions."""
|
oagi/server/socketio_server.py
CHANGED
|
@@ -364,7 +364,7 @@ class SessionNamespace(socketio.AsyncNamespace):
|
|
|
364
364
|
timeout=self.config.socketio_timeout,
|
|
365
365
|
)
|
|
366
366
|
|
|
367
|
-
case ActionType.FINISH:
|
|
367
|
+
case ActionType.FINISH | ActionType.FAIL:
|
|
368
368
|
return await self.call(
|
|
369
369
|
"finish",
|
|
370
370
|
FinishEventData(**common).model_dump(),
|