oagi-core 0.14.0__py3-none-any.whl → 0.14.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oagi/handler/utils.py CHANGED
@@ -5,6 +5,593 @@
5
5
  # This file is part of the official API project.
6
6
  # Licensed under the MIT License.
7
7
  # -----------------------------------------------------------------------------
8
+ """Shared utilities for action handling and conversion.
9
+
10
+ This module provides common functionality used by both PyautoguiActionHandler
11
+ (for local execution) and action converters (for remote execution).
12
+ """
13
+
14
+ import sys
15
+
16
+ # =============================================================================
17
+ # Key Normalization Mapping
18
+ # =============================================================================
19
+
20
+ # Minimal key mapping - only normalizes common variations to pyautogui names
21
+ # Matches original PyautoguiActionHandler.hotkey_variations_mapping behavior exactly:
22
+ # "capslock": ["caps_lock", "caps", "capslock"] -> capslock
23
+ # "pgup": ["page_up", "pageup"] -> pgup
24
+ # "pgdn": ["page_down", "pagedown"] -> pgdn
25
+ KEY_MAP: dict[str, str] = {
26
+ # Caps lock variations -> capslock
27
+ "caps_lock": "capslock",
28
+ "caps": "capslock",
29
+ # Page up variations -> pgup (short form, matching original)
30
+ "page_up": "pgup",
31
+ "pageup": "pgup",
32
+ # Page down variations -> pgdn (short form, matching original)
33
+ "page_down": "pgdn",
34
+ "pagedown": "pgdn",
35
+ }
36
+
37
+ # Valid pyautogui key names
38
+ PYAUTOGUI_VALID_KEYS: frozenset[str] = frozenset(
39
+ {
40
+ # Alphabet keys
41
+ "a",
42
+ "b",
43
+ "c",
44
+ "d",
45
+ "e",
46
+ "f",
47
+ "g",
48
+ "h",
49
+ "i",
50
+ "j",
51
+ "k",
52
+ "l",
53
+ "m",
54
+ "n",
55
+ "o",
56
+ "p",
57
+ "q",
58
+ "r",
59
+ "s",
60
+ "t",
61
+ "u",
62
+ "v",
63
+ "w",
64
+ "x",
65
+ "y",
66
+ "z",
67
+ # Number keys
68
+ "0",
69
+ "1",
70
+ "2",
71
+ "3",
72
+ "4",
73
+ "5",
74
+ "6",
75
+ "7",
76
+ "8",
77
+ "9",
78
+ # Function keys
79
+ "f1",
80
+ "f2",
81
+ "f3",
82
+ "f4",
83
+ "f5",
84
+ "f6",
85
+ "f7",
86
+ "f8",
87
+ "f9",
88
+ "f10",
89
+ "f11",
90
+ "f12",
91
+ "f13",
92
+ "f14",
93
+ "f15",
94
+ "f16",
95
+ "f17",
96
+ "f18",
97
+ "f19",
98
+ "f20",
99
+ "f21",
100
+ "f22",
101
+ "f23",
102
+ "f24",
103
+ # Navigation keys
104
+ "up",
105
+ "down",
106
+ "left",
107
+ "right",
108
+ "home",
109
+ "end",
110
+ "pageup",
111
+ "pagedown",
112
+ "pgup",
113
+ "pgdn",
114
+ # Editing keys
115
+ "backspace",
116
+ "delete",
117
+ "del",
118
+ "insert",
119
+ "enter",
120
+ "return",
121
+ "tab",
122
+ "space",
123
+ # Modifier keys (with left/right variants)
124
+ "shift",
125
+ "shiftleft",
126
+ "shiftright",
127
+ "ctrl",
128
+ "ctrlleft",
129
+ "ctrlright",
130
+ "alt",
131
+ "altleft",
132
+ "altright",
133
+ "option",
134
+ "optionleft",
135
+ "optionright",
136
+ "command",
137
+ "win",
138
+ "winleft",
139
+ "winright",
140
+ "fn",
141
+ # Lock keys
142
+ "capslock",
143
+ "numlock",
144
+ "scrolllock",
145
+ # Special keys
146
+ "esc",
147
+ "escape",
148
+ "pause",
149
+ "printscreen",
150
+ "prtsc",
151
+ "prtscr",
152
+ "prntscrn",
153
+ "print",
154
+ "apps",
155
+ "clear",
156
+ "sleep",
157
+ # Symbols
158
+ "!",
159
+ "@",
160
+ "#",
161
+ "$",
162
+ "%",
163
+ "^",
164
+ "&",
165
+ "*",
166
+ "(",
167
+ ")",
168
+ "-",
169
+ "_",
170
+ "=",
171
+ "+",
172
+ "[",
173
+ "]",
174
+ "{",
175
+ "}",
176
+ "\\",
177
+ "|",
178
+ ";",
179
+ ":",
180
+ "'",
181
+ '"',
182
+ ",",
183
+ ".",
184
+ "<",
185
+ ">",
186
+ "/",
187
+ "?",
188
+ "`",
189
+ "~",
190
+ # Numpad keys
191
+ "num0",
192
+ "num1",
193
+ "num2",
194
+ "num3",
195
+ "num4",
196
+ "num5",
197
+ "num6",
198
+ "num7",
199
+ "num8",
200
+ "num9",
201
+ "divide",
202
+ "multiply",
203
+ "subtract",
204
+ "add",
205
+ "decimal",
206
+ # Media keys
207
+ "volumeup",
208
+ "volumedown",
209
+ "volumemute",
210
+ "playpause",
211
+ "stop",
212
+ "nexttrack",
213
+ "prevtrack",
214
+ # Browser keys
215
+ "browserback",
216
+ "browserforward",
217
+ "browserrefresh",
218
+ "browserstop",
219
+ "browsersearch",
220
+ "browserfavorites",
221
+ "browserhome",
222
+ # Application launch keys
223
+ "launchapp1",
224
+ "launchapp2",
225
+ "launchmail",
226
+ "launchmediaselect",
227
+ }
228
+ )
229
+
230
+
231
+ # =============================================================================
232
+ # Coordinate Scaling
233
+ # =============================================================================
234
+
235
+
236
+ class CoordinateScaler:
237
+ """Handles coordinate scaling between different coordinate systems.
238
+
239
+ This class provides reusable coordinate transformation logic used by both
240
+ PyautoguiActionHandler (local execution) and action converters (remote execution).
241
+
242
+ Args:
243
+ source_width: Width of the source coordinate space (e.g., 1000 for OAGI)
244
+ source_height: Height of the source coordinate space
245
+ target_width: Width of the target coordinate space (e.g., screen width)
246
+ target_height: Height of the target coordinate space
247
+ origin_x: X offset of the target coordinate origin (for multi-monitor)
248
+ origin_y: Y offset of the target coordinate origin (for multi-monitor)
249
+ """
250
+
251
+ def __init__(
252
+ self,
253
+ source_width: int,
254
+ source_height: int,
255
+ target_width: int,
256
+ target_height: int,
257
+ origin_x: int = 0,
258
+ origin_y: int = 0,
259
+ ):
260
+ self.source_width = source_width
261
+ self.source_height = source_height
262
+ self.target_width = target_width
263
+ self.target_height = target_height
264
+ self.origin_x = origin_x
265
+ self.origin_y = origin_y
266
+ self.scale_x = target_width / source_width
267
+ self.scale_y = target_height / source_height
268
+
269
+ def scale(
270
+ self,
271
+ x: int | float,
272
+ y: int | float,
273
+ *,
274
+ clamp: bool = True,
275
+ prevent_failsafe: bool = False,
276
+ strict: bool = False,
277
+ ) -> tuple[int, int]:
278
+ """Scale coordinates from source to target space.
279
+
280
+ Args:
281
+ x: X coordinate in source space
282
+ y: Y coordinate in source space
283
+ clamp: If True, clamp to valid target range
284
+ prevent_failsafe: If True, offset corner coordinates by 1 pixel
285
+ (prevents PyAutoGUI fail-safe trigger)
286
+ strict: If True, raise ValueError when coordinates are outside
287
+ valid source range [0, source_width] x [0, source_height]
288
+
289
+ Returns:
290
+ Tuple of (target_x, target_y) in target coordinate space
291
+
292
+ Raises:
293
+ ValueError: If strict=True and coordinates are outside valid range
294
+ """
295
+ # Strict validation: check if coordinates are in valid source range
296
+ if strict:
297
+ if x < 0 or x > self.source_width:
298
+ raise ValueError(
299
+ f"x coordinate {x} out of valid range [0, {self.source_width}]. "
300
+ f"Coordinates must be normalized between 0 and {self.source_width}."
301
+ )
302
+ if y < 0 or y > self.source_height:
303
+ raise ValueError(
304
+ f"y coordinate {y} out of valid range [0, {self.source_height}]. "
305
+ f"Coordinates must be normalized between 0 and {self.source_height}."
306
+ )
307
+
308
+ scaled_x = round(x * self.scale_x)
309
+ scaled_y = round(y * self.scale_y)
310
+
311
+ if clamp:
312
+ # Clamp to valid range
313
+ scaled_x = max(0, min(scaled_x, self.target_width - 1))
314
+ scaled_y = max(0, min(scaled_y, self.target_height - 1))
315
+
316
+ if prevent_failsafe:
317
+ # Prevent PyAutoGUI fail-safe by adjusting corner coordinates
318
+ if scaled_x < 1:
319
+ scaled_x = 1
320
+ elif scaled_x > self.target_width - 2:
321
+ scaled_x = self.target_width - 2
322
+ if scaled_y < 1:
323
+ scaled_y = 1
324
+ elif scaled_y > self.target_height - 2:
325
+ scaled_y = self.target_height - 2
326
+
327
+ # Add origin offset (for multi-monitor support)
328
+ return scaled_x + self.origin_x, scaled_y + self.origin_y
329
+
330
+ def set_origin(self, origin_x: int, origin_y: int) -> None:
331
+ """Update the origin offset."""
332
+ self.origin_x = origin_x
333
+ self.origin_y = origin_y
334
+
335
+ def set_target_size(self, width: int, height: int) -> None:
336
+ """Update the target size and recalculate scale factors."""
337
+ self.target_width = width
338
+ self.target_height = height
339
+ self.scale_x = width / self.source_width
340
+ self.scale_y = height / self.source_height
341
+
342
+
343
+ # =============================================================================
344
+ # Key Normalization Functions
345
+ # =============================================================================
346
+
347
+
348
+ def normalize_key(key: str, *, macos_ctrl_to_cmd: bool = False) -> str:
349
+ """Normalize a key name to pyautogui format.
350
+
351
+ Args:
352
+ key: Key name to normalize (e.g., "ctrl", "Control", "page_down")
353
+ macos_ctrl_to_cmd: If True and on macOS, remap 'ctrl' to 'command'
354
+
355
+ Returns:
356
+ Normalized key name (e.g., "ctrl", "pagedown")
357
+ """
358
+ key = key.strip().lower()
359
+ normalized = KEY_MAP.get(key, key)
360
+
361
+ # Remap ctrl to command on macOS if enabled
362
+ if macos_ctrl_to_cmd and sys.platform == "darwin" and normalized == "ctrl":
363
+ return "command"
364
+
365
+ return normalized
366
+
367
+
368
+ def parse_hotkey(
369
+ hotkey_str: str,
370
+ *,
371
+ macos_ctrl_to_cmd: bool = False,
372
+ validate: bool = True,
373
+ ) -> list[str]:
374
+ """Parse a hotkey string into a list of normalized key names.
375
+
376
+ Args:
377
+ hotkey_str: Hotkey string (e.g., "ctrl+c", "alt, tab", "Shift+Enter")
378
+ macos_ctrl_to_cmd: If True and on macOS, remap 'ctrl' to 'command'
379
+ validate: If True, validate keys against PYAUTOGUI_VALID_KEYS
380
+
381
+ Returns:
382
+ List of normalized key names (e.g., ["ctrl", "c"])
383
+
384
+ Raises:
385
+ ValueError: If validate=True and any key is invalid
386
+ """
387
+ # Remove parentheses if present
388
+ hotkey_str = hotkey_str.strip("()")
389
+
390
+ # Split by '+' or ',' to get individual keys
391
+ if "+" in hotkey_str:
392
+ keys = [
393
+ normalize_key(k, macos_ctrl_to_cmd=macos_ctrl_to_cmd)
394
+ for k in hotkey_str.split("+")
395
+ ]
396
+ else:
397
+ keys = [
398
+ normalize_key(k, macos_ctrl_to_cmd=macos_ctrl_to_cmd)
399
+ for k in hotkey_str.split(",")
400
+ ]
401
+
402
+ # Filter empty strings
403
+ keys = [k for k in keys if k]
404
+
405
+ if validate:
406
+ validate_keys(keys)
407
+
408
+ return keys
409
+
410
+
411
+ def validate_keys(keys: list[str]) -> None:
412
+ """Validate that all keys are recognized by pyautogui.
413
+
414
+ Args:
415
+ keys: List of normalized key names
416
+
417
+ Raises:
418
+ ValueError: If any key is invalid, with helpful suggestions
419
+ """
420
+ invalid_keys = [k for k in keys if k and k not in PYAUTOGUI_VALID_KEYS]
421
+
422
+ if invalid_keys:
423
+ suggestions = []
424
+ for invalid_key in invalid_keys:
425
+ if invalid_key in ("ret",):
426
+ suggestions.append(f"'{invalid_key}' -> use 'enter' or 'return'")
427
+ elif invalid_key.startswith("num") and len(invalid_key) > 3:
428
+ suggestions.append(
429
+ f"'{invalid_key}' -> numpad keys use format 'num0'-'num9'"
430
+ )
431
+ else:
432
+ suggestions.append(f"'{invalid_key}' is not a valid key name")
433
+
434
+ error_msg = "Invalid key name(s) in hotkey: " + ", ".join(suggestions)
435
+ valid_sample = ", ".join(sorted(list(PYAUTOGUI_VALID_KEYS)[:30]))
436
+ error_msg += f"\n\nValid keys include: {valid_sample}... (and more)"
437
+ raise ValueError(error_msg)
438
+
439
+
440
+ # =============================================================================
441
+ # Coordinate Parsing Functions
442
+ # =============================================================================
443
+
444
+
445
+ def parse_click_coords(
446
+ argument: str,
447
+ scaler: CoordinateScaler,
448
+ *,
449
+ prevent_failsafe: bool = False,
450
+ strict: bool = False,
451
+ ) -> tuple[int, int]:
452
+ """Parse click coordinates from argument string.
453
+
454
+ Args:
455
+ argument: Coordinate string in format "x, y"
456
+ scaler: CoordinateScaler instance for coordinate transformation
457
+ prevent_failsafe: If True, offset corner coordinates
458
+ strict: If True, raise ValueError for out-of-range coordinates
459
+
460
+ Returns:
461
+ Tuple of (x, y) in target coordinate space
462
+
463
+ Raises:
464
+ ValueError: If coordinate format is invalid or (strict=True) out of range
465
+ """
466
+ # Check for common format errors
467
+ if " and " in argument.lower() or " then " in argument.lower():
468
+ raise ValueError(
469
+ f"Invalid click format: '{argument}'. "
470
+ "Cannot combine multiple actions with 'and' or 'then'."
471
+ )
472
+
473
+ parts = argument.split(",") if argument else []
474
+ if len(parts) < 2:
475
+ raise ValueError(
476
+ f"Invalid click coordinate format: '{argument}'. "
477
+ "Expected 'x, y' (comma-separated numeric values)"
478
+ )
479
+
480
+ try:
481
+ x = float(parts[0].strip())
482
+ y = float(parts[1].strip())
483
+ return scaler.scale(x, y, prevent_failsafe=prevent_failsafe, strict=strict)
484
+ except (ValueError, IndexError) as e:
485
+ raise ValueError(
486
+ f"Failed to parse click coords '{argument}': {e}. "
487
+ "Coordinates must be comma-separated numeric values."
488
+ ) from e
489
+
490
+
491
+ def parse_drag_coords(
492
+ argument: str,
493
+ scaler: CoordinateScaler,
494
+ *,
495
+ prevent_failsafe: bool = False,
496
+ strict: bool = False,
497
+ ) -> tuple[int, int, int, int]:
498
+ """Parse drag coordinates from argument string.
499
+
500
+ Args:
501
+ argument: Coordinate string in format "x1, y1, x2, y2"
502
+ scaler: CoordinateScaler instance for coordinate transformation
503
+ prevent_failsafe: If True, offset corner coordinates
504
+ strict: If True, raise ValueError for out-of-range coordinates
505
+
506
+ Returns:
507
+ Tuple of (x1, y1, x2, y2) in target coordinate space
508
+
509
+ Raises:
510
+ ValueError: If coordinate format is invalid or (strict=True) out of range
511
+ """
512
+ # Check for common format errors
513
+ if " and " in argument.lower() or " then " in argument.lower():
514
+ raise ValueError(
515
+ f"Invalid drag format: '{argument}'. "
516
+ "Cannot combine multiple actions with 'and' or 'then'."
517
+ )
518
+
519
+ parts = argument.split(",") if argument else []
520
+ if len(parts) != 4:
521
+ raise ValueError(
522
+ f"Invalid drag coordinate format: '{argument}'. "
523
+ "Expected 'x1, y1, x2, y2' (4 comma-separated numeric values)"
524
+ )
525
+
526
+ try:
527
+ sx = float(parts[0].strip())
528
+ sy = float(parts[1].strip())
529
+ ex = float(parts[2].strip())
530
+ ey = float(parts[3].strip())
531
+ x1, y1 = scaler.scale(sx, sy, prevent_failsafe=prevent_failsafe, strict=strict)
532
+ x2, y2 = scaler.scale(ex, ey, prevent_failsafe=prevent_failsafe, strict=strict)
533
+ return x1, y1, x2, y2
534
+ except (ValueError, IndexError) as e:
535
+ raise ValueError(
536
+ f"Failed to parse drag coords '{argument}': {e}. "
537
+ "Coordinates must be comma-separated numeric values."
538
+ ) from e
539
+
540
+
541
+ def parse_scroll_coords(
542
+ argument: str,
543
+ scaler: CoordinateScaler,
544
+ *,
545
+ prevent_failsafe: bool = False,
546
+ strict: bool = False,
547
+ ) -> tuple[int, int, str]:
548
+ """Parse scroll coordinates and direction from argument string.
549
+
550
+ Args:
551
+ argument: Scroll string in format "x, y, direction"
552
+ scaler: CoordinateScaler instance for coordinate transformation
553
+ prevent_failsafe: If True, offset corner coordinates
554
+ strict: If True, raise ValueError for out-of-range coordinates
555
+
556
+ Returns:
557
+ Tuple of (x, y, direction) where direction is 'up' or 'down'
558
+
559
+ Raises:
560
+ ValueError: If format is invalid or (strict=True) coordinates out of range
561
+ """
562
+ parts = [p.strip() for p in argument.split(",")]
563
+ if len(parts) != 3:
564
+ raise ValueError(
565
+ f"Invalid scroll format: '{argument}'. "
566
+ "Expected 'x, y, direction' (e.g., '500, 300, up')"
567
+ )
568
+
569
+ try:
570
+ x = float(parts[0])
571
+ y = float(parts[1])
572
+ direction = parts[2].lower()
573
+
574
+ if direction not in ("up", "down"):
575
+ raise ValueError(
576
+ f"Invalid scroll direction: '{direction}'. Use 'up' or 'down'."
577
+ )
578
+
579
+ scaled_x, scaled_y = scaler.scale(
580
+ x, y, prevent_failsafe=prevent_failsafe, strict=strict
581
+ )
582
+ return scaled_x, scaled_y, direction
583
+ except (ValueError, IndexError) as e:
584
+ if "scroll direction" in str(e):
585
+ raise
586
+ raise ValueError(
587
+ f"Failed to parse scroll coords '{argument}': {e}. "
588
+ "Format: 'x, y, direction'"
589
+ ) from e
590
+
591
+
592
+ # =============================================================================
593
+ # Handler Utility Functions
594
+ # =============================================================================
8
595
 
9
596
 
10
597
  def reset_handler(handler) -> None:
@@ -19,3 +606,17 @@ def reset_handler(handler) -> None:
19
606
  """
20
607
  if hasattr(handler, "reset"):
21
608
  handler.reset()
609
+
610
+
611
+ def configure_handler_delay(handler, step_delay: float) -> None:
612
+ """Configure handler's post_batch_delay from agent's step_delay.
613
+
614
+ Uses duck-typing to check if the handler has a config with post_batch_delay.
615
+ This allows agents to control the delay after action execution.
616
+
617
+ Args:
618
+ handler: The action handler to configure
619
+ step_delay: The delay in seconds to set
620
+ """
621
+ if hasattr(handler, "config") and hasattr(handler.config, "post_batch_delay"):
622
+ handler.config.post_batch_delay = step_delay