computeruseprotocol 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cup/actions/_macos.py ADDED
@@ -0,0 +1,1090 @@
1
+ """macOS action handler — AXUIElement + Quartz CGEvent action execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import difflib
6
+ import os
7
+ import re
8
+ import subprocess
9
+ import time
10
+ from typing import Any
11
+
12
+ from cup.actions._handler import ActionHandler
13
+ from cup.actions._keys import parse_combo
14
+ from cup.actions.executor import ActionResult
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # Quartz CGEvent keyboard constants
18
+ # ---------------------------------------------------------------------------
19
+
20
+ # Virtual keycode mapping for macOS (CGKeyCode values)
21
+ _VK_MAP: dict[str, int] = {
22
+ "enter": 0x24,
23
+ "return": 0x24,
24
+ "tab": 0x30,
25
+ "escape": 0x35,
26
+ "space": 0x31,
27
+ "backspace": 0x33,
28
+ "delete": 0x75,
29
+ "up": 0x7E,
30
+ "down": 0x7D,
31
+ "left": 0x7B,
32
+ "right": 0x7C,
33
+ "home": 0x73,
34
+ "end": 0x77,
35
+ "pageup": 0x74,
36
+ "pagedown": 0x79,
37
+ "f1": 0x7A,
38
+ "f2": 0x78,
39
+ "f3": 0x63,
40
+ "f4": 0x76,
41
+ "f5": 0x60,
42
+ "f6": 0x61,
43
+ "f7": 0x62,
44
+ "f8": 0x64,
45
+ "f9": 0x65,
46
+ "f10": 0x6D,
47
+ "f11": 0x67,
48
+ "f12": 0x6F,
49
+ # Letters (lowercase)
50
+ "a": 0x00,
51
+ "b": 0x0B,
52
+ "c": 0x08,
53
+ "d": 0x02,
54
+ "e": 0x0E,
55
+ "f": 0x03,
56
+ "g": 0x05,
57
+ "h": 0x04,
58
+ "i": 0x22,
59
+ "j": 0x26,
60
+ "k": 0x28,
61
+ "l": 0x25,
62
+ "m": 0x2E,
63
+ "n": 0x2D,
64
+ "o": 0x1F,
65
+ "p": 0x23,
66
+ "q": 0x0C,
67
+ "r": 0x0F,
68
+ "s": 0x01,
69
+ "t": 0x11,
70
+ "u": 0x20,
71
+ "v": 0x09,
72
+ "w": 0x0D,
73
+ "x": 0x07,
74
+ "y": 0x10,
75
+ "z": 0x06,
76
+ # Numbers
77
+ "0": 0x1D,
78
+ "1": 0x12,
79
+ "2": 0x13,
80
+ "3": 0x14,
81
+ "4": 0x15,
82
+ "5": 0x17,
83
+ "6": 0x16,
84
+ "7": 0x1A,
85
+ "8": 0x1C,
86
+ "9": 0x19,
87
+ # Punctuation / symbols
88
+ "-": 0x1B,
89
+ "=": 0x18,
90
+ "[": 0x21,
91
+ "]": 0x1E,
92
+ "\\": 0x2A,
93
+ ";": 0x29,
94
+ "'": 0x27,
95
+ ",": 0x2B,
96
+ ".": 0x2F,
97
+ "/": 0x2C,
98
+ "`": 0x32,
99
+ "minus": 0x1B,
100
+ "equal": 0x18,
101
+ "plus": 0x18,
102
+ }
103
+
104
+ # Modifier flag bits for CGEventSetFlags
105
+ _kCGEventFlagMaskCommand = 1 << 20
106
+ _kCGEventFlagMaskShift = 1 << 17
107
+ _kCGEventFlagMaskAlternate = 1 << 19
108
+ _kCGEventFlagMaskControl = 1 << 18
109
+
110
+ _MOD_FLAGS: dict[str, int] = {
111
+ "meta": _kCGEventFlagMaskCommand,
112
+ "ctrl": _kCGEventFlagMaskControl,
113
+ "alt": _kCGEventFlagMaskAlternate,
114
+ "shift": _kCGEventFlagMaskShift,
115
+ }
116
+
117
+
118
+ def _send_key_combo(combo_str: str) -> None:
119
+ """Send a keyboard combination via Quartz CGEvents."""
120
+ from Quartz import (
121
+ CGEventCreateKeyboardEvent,
122
+ CGEventPost,
123
+ CGEventSetFlags,
124
+ kCGHIDEventTap,
125
+ )
126
+
127
+ mod_names, key_names = parse_combo(combo_str)
128
+
129
+ # Build modifier flags mask
130
+ flags = 0
131
+ for m in mod_names:
132
+ flags |= _MOD_FLAGS.get(m, 0)
133
+
134
+ # Resolve main keycodes
135
+ main_keys: list[int] = []
136
+ for k in key_names:
137
+ if k in _VK_MAP:
138
+ main_keys.append(_VK_MAP[k])
139
+ elif len(k) == 1 and k.lower() in _VK_MAP:
140
+ main_keys.append(_VK_MAP[k.lower()])
141
+
142
+ # If only modifiers were specified (e.g. "cmd"), treat them as key presses
143
+ if not main_keys and mod_names:
144
+ # Map modifier names to their virtual keycodes
145
+ _MOD_VK: dict[str, int] = {
146
+ "meta": 0x37, # kVK_Command
147
+ "ctrl": 0x3B, # kVK_Control
148
+ "alt": 0x3A, # kVK_Option
149
+ "shift": 0x38, # kVK_Shift
150
+ }
151
+ for m in mod_names:
152
+ if m in _MOD_VK:
153
+ main_keys.append(_MOD_VK[m])
154
+ flags = 0 # No modifier flags when pressing modifier alone
155
+
156
+ if not main_keys:
157
+ raise RuntimeError(f"Could not resolve any key codes from combo: {combo_str!r}")
158
+
159
+ # Key down
160
+ for vk in main_keys:
161
+ event = CGEventCreateKeyboardEvent(None, vk, True)
162
+ if flags:
163
+ CGEventSetFlags(event, flags)
164
+ CGEventPost(kCGHIDEventTap, event)
165
+
166
+ time.sleep(0.01)
167
+
168
+ # Key up
169
+ for vk in reversed(main_keys):
170
+ event = CGEventCreateKeyboardEvent(None, vk, False)
171
+ if flags:
172
+ CGEventSetFlags(event, flags)
173
+ CGEventPost(kCGHIDEventTap, event)
174
+
175
+ time.sleep(0.01)
176
+
177
+
178
+ def _type_string(text: str) -> None:
179
+ """Type a string using CGEvents with Unicode support.
180
+
181
+ Uses CGEventKeyboardSetUnicodeString for reliable Unicode input
182
+ regardless of keyboard layout.
183
+ """
184
+ from Quartz import (
185
+ CGEventCreateKeyboardEvent,
186
+ CGEventKeyboardSetUnicodeString,
187
+ CGEventPost,
188
+ kCGHIDEventTap,
189
+ )
190
+
191
+ # Send in chunks — CGEventKeyboardSetUnicodeString supports up to 20 chars
192
+ # per event reliably, but we'll do 1 char at a time for maximum compatibility
193
+ for char in text:
194
+ # Key down with Unicode char
195
+ event_down = CGEventCreateKeyboardEvent(None, 0, True)
196
+ CGEventKeyboardSetUnicodeString(event_down, len(char), char)
197
+ CGEventPost(kCGHIDEventTap, event_down)
198
+
199
+ # Key up
200
+ event_up = CGEventCreateKeyboardEvent(None, 0, False)
201
+ CGEventKeyboardSetUnicodeString(event_up, len(char), char)
202
+ CGEventPost(kCGHIDEventTap, event_up)
203
+
204
+ time.sleep(0.01)
205
+
206
+
207
+ # ---------------------------------------------------------------------------
208
+ # Quartz CGEvent mouse helpers
209
+ # ---------------------------------------------------------------------------
210
+
211
+
212
+ def _get_element_bounds(element) -> tuple[int, int, int, int] | None:
213
+ """Get element bounds (x, y, w, h) from AXUIElement."""
214
+ from ApplicationServices import (
215
+ AXUIElementCopyAttributeValue,
216
+ AXValueGetValue,
217
+ kAXErrorSuccess,
218
+ kAXPositionAttribute,
219
+ kAXSizeAttribute,
220
+ kAXValueCGPointType,
221
+ kAXValueCGSizeType,
222
+ )
223
+
224
+ err, pos_ref = AXUIElementCopyAttributeValue(element, kAXPositionAttribute, None)
225
+ if err != kAXErrorSuccess or pos_ref is None:
226
+ return None
227
+
228
+ err, size_ref = AXUIElementCopyAttributeValue(element, kAXSizeAttribute, None)
229
+ if err != kAXErrorSuccess or size_ref is None:
230
+ return None
231
+
232
+ _, point = AXValueGetValue(pos_ref, kAXValueCGPointType, None)
233
+ _, size = AXValueGetValue(size_ref, kAXValueCGSizeType, None)
234
+
235
+ if point is None or size is None:
236
+ return None
237
+
238
+ return int(point.x), int(point.y), int(size.width), int(size.height)
239
+
240
+
241
+ def _get_element_center(element) -> tuple[float, float] | None:
242
+ """Get center point of an element in screen coordinates."""
243
+ bounds = _get_element_bounds(element)
244
+ if bounds is None:
245
+ return None
246
+ x, y, w, h = bounds
247
+ return x + w / 2.0, y + h / 2.0
248
+
249
+
250
+ def _get_element_center_or_parent(element) -> tuple[float, float] | None:
251
+ """Get center point of an element, walking up parents if needed.
252
+
253
+ Some elements (e.g., offscreen web content nodes in Safari) don't
254
+ report valid bounds. This function walks up the AXParent chain to
255
+ find the nearest ancestor with bounds, falling back to the window
256
+ center as a last resort.
257
+ """
258
+ from ApplicationServices import AXUIElementCopyAttributeValue, kAXErrorSuccess
259
+
260
+ current = element
261
+ for _ in range(20): # guard against infinite loops
262
+ center = _get_element_center(current)
263
+ if center is not None:
264
+ return center
265
+ # Walk up to parent
266
+ err, parent = AXUIElementCopyAttributeValue(current, "AXParent", None)
267
+ if err != kAXErrorSuccess or parent is None:
268
+ break
269
+ current = parent
270
+
271
+ return None
272
+
273
+
274
+ def _send_mouse_click(
275
+ x: float,
276
+ y: float,
277
+ *,
278
+ button: str = "left",
279
+ count: int = 1,
280
+ ) -> None:
281
+ """Send mouse click(s) at screen coordinates via Quartz CGEvents."""
282
+ from Quartz import (
283
+ CGEventCreateMouseEvent,
284
+ CGEventPost,
285
+ CGEventSetIntegerValueField,
286
+ CGPointMake,
287
+ kCGEventLeftMouseDown,
288
+ kCGEventLeftMouseUp,
289
+ kCGEventMouseMoved,
290
+ kCGEventRightMouseDown,
291
+ kCGEventRightMouseUp,
292
+ kCGHIDEventTap,
293
+ kCGMouseButtonLeft,
294
+ kCGMouseButtonRight,
295
+ kCGMouseEventClickState,
296
+ )
297
+
298
+ point = CGPointMake(x, y)
299
+
300
+ if button == "right":
301
+ down_type = kCGEventRightMouseDown
302
+ up_type = kCGEventRightMouseUp
303
+ mouse_button = kCGMouseButtonRight
304
+ else:
305
+ down_type = kCGEventLeftMouseDown
306
+ up_type = kCGEventLeftMouseUp
307
+ mouse_button = kCGMouseButtonLeft
308
+
309
+ # Move cursor to position first
310
+ move = CGEventCreateMouseEvent(None, kCGEventMouseMoved, point, kCGMouseButtonLeft)
311
+ CGEventPost(kCGHIDEventTap, move)
312
+ time.sleep(0.02)
313
+
314
+ # Click(s)
315
+ for i in range(count):
316
+ click_number = i + 1
317
+ down = CGEventCreateMouseEvent(None, down_type, point, mouse_button)
318
+ CGEventSetIntegerValueField(down, kCGMouseEventClickState, click_number)
319
+ CGEventPost(kCGHIDEventTap, down)
320
+
321
+ time.sleep(0.01)
322
+
323
+ up = CGEventCreateMouseEvent(None, up_type, point, mouse_button)
324
+ CGEventSetIntegerValueField(up, kCGMouseEventClickState, click_number)
325
+ CGEventPost(kCGHIDEventTap, up)
326
+
327
+ if i < count - 1:
328
+ time.sleep(0.02)
329
+
330
+ time.sleep(0.01)
331
+
332
+
333
+ def _send_mouse_long_press(x: float, y: float, duration: float = 0.8) -> None:
334
+ """Send a long press (mouse down, hold, mouse up) at screen coordinates."""
335
+ from Quartz import (
336
+ CGEventCreateMouseEvent,
337
+ CGEventPost,
338
+ CGPointMake,
339
+ kCGEventLeftMouseDown,
340
+ kCGEventLeftMouseUp,
341
+ kCGEventMouseMoved,
342
+ kCGHIDEventTap,
343
+ kCGMouseButtonLeft,
344
+ )
345
+
346
+ point = CGPointMake(x, y)
347
+
348
+ # Move cursor
349
+ move = CGEventCreateMouseEvent(None, kCGEventMouseMoved, point, kCGMouseButtonLeft)
350
+ CGEventPost(kCGHIDEventTap, move)
351
+ time.sleep(0.02)
352
+
353
+ # Press down
354
+ down = CGEventCreateMouseEvent(None, kCGEventLeftMouseDown, point, kCGMouseButtonLeft)
355
+ CGEventPost(kCGHIDEventTap, down)
356
+
357
+ # Hold
358
+ time.sleep(duration)
359
+
360
+ # Release
361
+ up = CGEventCreateMouseEvent(None, kCGEventLeftMouseUp, point, kCGMouseButtonLeft)
362
+ CGEventPost(kCGHIDEventTap, up)
363
+ time.sleep(0.01)
364
+
365
+
366
+ def _send_scroll(x: float, y: float, direction: str, amount: int = 5) -> None:
367
+ """Send scroll event at screen coordinates via Quartz CGEvents.
368
+
369
+ Uses pixel-based scrolling (kCGScrollEventUnitPixel) for reliable
370
+ scrolling across all apps. Line-based scrolling (kCGScrollEventUnitLine)
371
+ is unreliable in apps like Safari where line units may be interpreted
372
+ as tiny or zero-pixel movements.
373
+ """
374
+ from Quartz import (
375
+ CGEventCreateScrollWheelEvent,
376
+ CGEventPost,
377
+ CGEventSetLocation,
378
+ CGPointMake,
379
+ kCGHIDEventTap,
380
+ kCGScrollEventUnitPixel,
381
+ )
382
+
383
+ point = CGPointMake(x, y)
384
+
385
+ # Convert line amount to pixels (~80px per line is a reasonable default)
386
+ pixel_amount = amount * 80
387
+
388
+ if direction == "up":
389
+ dy, dx = pixel_amount, 0
390
+ elif direction == "down":
391
+ dy, dx = -pixel_amount, 0
392
+ elif direction == "left":
393
+ dy, dx = 0, pixel_amount
394
+ elif direction == "right":
395
+ dy, dx = 0, -pixel_amount
396
+ else:
397
+ dy, dx = 0, 0
398
+
399
+ event = CGEventCreateScrollWheelEvent(None, kCGScrollEventUnitPixel, 2, dy, dx)
400
+ CGEventSetLocation(event, point)
401
+ CGEventPost(kCGHIDEventTap, event)
402
+ time.sleep(0.02)
403
+
404
+
405
+ # ---------------------------------------------------------------------------
406
+ # AXUIElement action helpers
407
+ # ---------------------------------------------------------------------------
408
+
409
+
410
+ def _ax_perform_action(element, action_name: str) -> bool:
411
+ """Perform a named AX action on an element. Returns True on success."""
412
+ from ApplicationServices import AXUIElementPerformAction, kAXErrorSuccess
413
+
414
+ try:
415
+ err = AXUIElementPerformAction(element, action_name)
416
+ return err == kAXErrorSuccess
417
+ except Exception:
418
+ return False
419
+
420
+
421
+ def _ax_has_action(element, action_name: str) -> bool:
422
+ """Check if an element supports a specific AX action."""
423
+ from ApplicationServices import AXUIElementCopyActionNames, kAXErrorSuccess
424
+
425
+ try:
426
+ err, actions = AXUIElementCopyActionNames(element, None)
427
+ if err == kAXErrorSuccess and actions:
428
+ return action_name in actions
429
+ except Exception:
430
+ pass
431
+ return False
432
+
433
+
434
+ def _ax_get_attr(element, attr: str, default=None):
435
+ """Safely read a single AX attribute."""
436
+ from ApplicationServices import AXUIElementCopyAttributeValue, kAXErrorSuccess
437
+
438
+ try:
439
+ err, value = AXUIElementCopyAttributeValue(element, attr, None)
440
+ if err == kAXErrorSuccess and value is not None:
441
+ return value
442
+ except Exception:
443
+ pass
444
+ return default
445
+
446
+
447
+ def _ax_set_attr(element, attr: str, value) -> bool:
448
+ """Set an AX attribute value. Returns True on success."""
449
+ from ApplicationServices import AXUIElementSetAttributeValue, kAXErrorSuccess
450
+
451
+ try:
452
+ err = AXUIElementSetAttributeValue(element, attr, value)
453
+ return err == kAXErrorSuccess
454
+ except Exception:
455
+ return False
456
+
457
+
458
+ def _ax_is_settable(element, attr: str) -> bool:
459
+ """Check if an attribute is settable."""
460
+ from ApplicationServices import AXUIElementIsAttributeSettable, kAXErrorSuccess
461
+
462
+ try:
463
+ err, settable = AXUIElementIsAttributeSettable(element, attr, None)
464
+ if err == kAXErrorSuccess:
465
+ return bool(settable)
466
+ except Exception:
467
+ pass
468
+ return False
469
+
470
+
471
+ # ---------------------------------------------------------------------------
472
+ # App launching helpers
473
+ # ---------------------------------------------------------------------------
474
+
475
+
476
+ def _discover_apps() -> dict[str, str]:
477
+ """Discover installed macOS apps. Returns {lowercase_name: path_or_bundle_id}."""
478
+ apps: dict[str, str] = {}
479
+
480
+ # Search common application directories
481
+ app_dirs = [
482
+ "/Applications",
483
+ "/Applications/Utilities",
484
+ "/System/Applications",
485
+ "/System/Applications/Utilities",
486
+ os.path.expanduser("~/Applications"),
487
+ ]
488
+
489
+ for app_dir in app_dirs:
490
+ if not os.path.isdir(app_dir):
491
+ continue
492
+ try:
493
+ for entry in os.listdir(app_dir):
494
+ if entry.endswith(".app"):
495
+ app_name = entry[:-4] # Remove .app
496
+ app_path = os.path.join(app_dir, entry)
497
+ apps[app_name.lower()] = app_path
498
+ except OSError:
499
+ continue
500
+
501
+ # Also search via system_profiler for more apps (Homebrew casks, etc.)
502
+ try:
503
+ result = subprocess.run(
504
+ ["mdfind", "kMDItemContentType == 'com.apple.application-bundle'"],
505
+ capture_output=True,
506
+ text=True,
507
+ timeout=5,
508
+ )
509
+ if result.returncode == 0:
510
+ for line in result.stdout.strip().split("\n"):
511
+ line = line.strip()
512
+ if line.endswith(".app"):
513
+ app_name = os.path.basename(line)[:-4]
514
+ if app_name.lower() not in apps:
515
+ apps[app_name.lower()] = line
516
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
517
+ pass
518
+
519
+ return apps
520
+
521
+
522
+ def _fuzzy_match(
523
+ query: str,
524
+ candidates: list[str],
525
+ cutoff: float = 0.5,
526
+ ) -> str | None:
527
+ """Find the best fuzzy match for query among candidates."""
528
+ query_lower = query.lower().strip()
529
+
530
+ # Exact match
531
+ if query_lower in candidates:
532
+ return query_lower
533
+
534
+ # Substring match — prefer shorter candidates (more specific)
535
+ # e.g. "code" should match "visual studio code" not "xcode"
536
+ # and "chrome" should match "google chrome"
537
+ substring_matches = [c for c in candidates if query_lower in c]
538
+ if substring_matches:
539
+ # Prefer candidates where query appears as a whole word boundary
540
+ word_boundary = [
541
+ c
542
+ for c in substring_matches
543
+ if re.search(r"(?:^|[\s\-_])" + re.escape(query_lower) + r"(?:$|[\s\-_])", c)
544
+ ]
545
+ if word_boundary:
546
+ return min(word_boundary, key=len)
547
+ return min(substring_matches, key=len)
548
+
549
+ # Reverse substring (e.g. "google chrome" matches candidate "chrome")
550
+ for c in candidates:
551
+ if c in query_lower:
552
+ return c
553
+
554
+ # Fuzzy match via SequenceMatcher
555
+ best_match = None
556
+ best_score = 0.0
557
+ for c in candidates:
558
+ score = difflib.SequenceMatcher(None, query_lower, c).ratio()
559
+ if score > best_score:
560
+ best_score = score
561
+ best_match = c
562
+
563
+ if best_match and best_score >= cutoff:
564
+ return best_match
565
+ return None
566
+
567
+
568
+ # ---------------------------------------------------------------------------
569
+ # MacosActionHandler
570
+ # ---------------------------------------------------------------------------
571
+
572
+
573
+ class MacosActionHandler(ActionHandler):
574
+ """Execute CUP actions on macOS via AXUIElement API + Quartz CGEvents."""
575
+
576
+ def action(
577
+ self,
578
+ native_ref: Any,
579
+ action: str,
580
+ params: dict[str, Any],
581
+ ) -> ActionResult:
582
+ element = native_ref
583
+
584
+ if action == "click":
585
+ return self._click(element)
586
+ elif action == "toggle":
587
+ return self._toggle(element)
588
+ elif action == "type":
589
+ value = params.get("value", "")
590
+ return self._type(element, value)
591
+ elif action == "setvalue":
592
+ value = params.get("value", "")
593
+ return self._setvalue(element, value)
594
+ elif action == "expand":
595
+ return self._expand(element)
596
+ elif action == "collapse":
597
+ return self._collapse(element)
598
+ elif action == "select":
599
+ return self._select(element)
600
+ elif action == "scroll":
601
+ direction = params.get("direction", "down")
602
+ return self._scroll(element, direction)
603
+ elif action == "increment":
604
+ return self._increment(element)
605
+ elif action == "decrement":
606
+ return self._decrement(element)
607
+ elif action == "rightclick":
608
+ return self._rightclick(element)
609
+ elif action == "doubleclick":
610
+ return self._doubleclick(element)
611
+ elif action == "focus":
612
+ return self._focus(element)
613
+ elif action == "dismiss":
614
+ return self._dismiss(element)
615
+ elif action == "longpress":
616
+ return self._longpress(element)
617
+ else:
618
+ return ActionResult(
619
+ success=False,
620
+ message="",
621
+ error=f"Action '{action}' not implemented for macOS",
622
+ )
623
+
624
+ def press(self, combo: str) -> ActionResult:
625
+ try:
626
+ _send_key_combo(combo)
627
+ return ActionResult(success=True, message=f"Pressed {combo}")
628
+ except Exception as exc:
629
+ return ActionResult(
630
+ success=False,
631
+ message="",
632
+ error=f"Failed to press keys '{combo}': {exc}",
633
+ )
634
+
635
+ # -- individual actions ------------------------------------------------
636
+
637
+ def _click(self, element) -> ActionResult:
638
+ # Try AXPress first (native accessibility action)
639
+ if _ax_perform_action(element, "AXPress"):
640
+ return ActionResult(success=True, message="Clicked")
641
+
642
+ # Try AXConfirm
643
+ if _ax_perform_action(element, "AXConfirm"):
644
+ return ActionResult(success=True, message="Clicked (confirm)")
645
+
646
+ # Fallback: mouse click at element center
647
+ center = _get_element_center(element)
648
+ if center:
649
+ try:
650
+ _send_mouse_click(center[0], center[1])
651
+ return ActionResult(success=True, message="Clicked (mouse fallback)")
652
+ except Exception as exc:
653
+ return ActionResult(
654
+ success=False,
655
+ message="",
656
+ error=f"Mouse click failed: {exc}",
657
+ )
658
+
659
+ return ActionResult(
660
+ success=False,
661
+ message="",
662
+ error="Element does not support click and has no bounds",
663
+ )
664
+
665
+ def _toggle(self, element) -> ActionResult:
666
+ # AXPress toggles checkboxes/switches on macOS
667
+ if _ax_perform_action(element, "AXPress"):
668
+ return ActionResult(success=True, message="Toggled")
669
+ return ActionResult(
670
+ success=False,
671
+ message="",
672
+ error="Element does not support toggle",
673
+ )
674
+
675
+ def _type(self, element, text: str) -> ActionResult:
676
+ """Type text into an element.
677
+
678
+ Strategy:
679
+ 1. Try setting AXValue directly (most reliable, works for most text fields)
680
+ 2. Fall back to CGEvent keyboard typing (for elements that don't support AXValue)
681
+ """
682
+ try:
683
+ # Focus the element first
684
+ _ax_perform_action(element, "AXRaise")
685
+ _ax_set_attr(element, "AXFocused", True)
686
+ time.sleep(0.05)
687
+
688
+ # Strategy 1: Set AXValue directly (preferred — bypasses keyboard entirely)
689
+ if _ax_is_settable(element, "AXValue"):
690
+ if _ax_set_attr(element, "AXValue", text):
691
+ return ActionResult(success=True, message=f"Typed: {text}")
692
+
693
+ # Strategy 2: Click to ensure focus, select all, then type via CGEvent
694
+ center = _get_element_center(element)
695
+ if center:
696
+ _send_mouse_click(center[0], center[1])
697
+ time.sleep(0.05)
698
+
699
+ _send_key_combo("meta+a")
700
+ time.sleep(0.05)
701
+ _type_string(text)
702
+ return ActionResult(success=True, message=f"Typed: {text}")
703
+ except Exception as exc:
704
+ return ActionResult(
705
+ success=False,
706
+ message="",
707
+ error=f"Failed to type: {exc}",
708
+ )
709
+
710
+ def _setvalue(self, element, text: str) -> ActionResult:
711
+ """Set value programmatically via AXValue attribute."""
712
+ if _ax_is_settable(element, "AXValue"):
713
+ if _ax_set_attr(element, "AXValue", text):
714
+ return ActionResult(success=True, message=f"Set value to: {text}")
715
+ return ActionResult(
716
+ success=False,
717
+ message="",
718
+ error="AXValue attribute set failed",
719
+ )
720
+
721
+ # Fallback: try typing
722
+ return self._type(element, text)
723
+
724
+ def _expand(self, element) -> ActionResult:
725
+ # Check if already expanded
726
+ expanded = _ax_get_attr(element, "AXExpanded")
727
+ if expanded is not None and bool(expanded):
728
+ return ActionResult(success=True, message="Already expanded")
729
+
730
+ # Try AXPress (works for disclosure triangles, combo boxes)
731
+ if _ax_perform_action(element, "AXPress"):
732
+ return ActionResult(success=True, message="Expanded")
733
+
734
+ # Try setting AXExpanded directly
735
+ if _ax_set_attr(element, "AXExpanded", True):
736
+ return ActionResult(success=True, message="Expanded")
737
+
738
+ return ActionResult(
739
+ success=False,
740
+ message="",
741
+ error="Element does not support expand",
742
+ )
743
+
744
+ def _collapse(self, element) -> ActionResult:
745
+ # Check if already collapsed
746
+ expanded = _ax_get_attr(element, "AXExpanded")
747
+ if expanded is not None and not bool(expanded):
748
+ return ActionResult(success=True, message="Already collapsed")
749
+
750
+ # Try AXPress
751
+ if _ax_perform_action(element, "AXPress"):
752
+ return ActionResult(success=True, message="Collapsed")
753
+
754
+ # Try setting AXExpanded directly
755
+ if _ax_set_attr(element, "AXExpanded", False):
756
+ return ActionResult(success=True, message="Collapsed")
757
+
758
+ return ActionResult(
759
+ success=False,
760
+ message="",
761
+ error="Element does not support collapse",
762
+ )
763
+
764
+ def _select(self, element) -> ActionResult:
765
+ # Try AXPick (selection action)
766
+ if _ax_perform_action(element, "AXPick"):
767
+ return ActionResult(success=True, message="Selected")
768
+
769
+ # Try AXPress (works for tabs, list items, menu items)
770
+ if _ax_perform_action(element, "AXPress"):
771
+ return ActionResult(success=True, message="Selected")
772
+
773
+ # Try setting AXSelected
774
+ if _ax_set_attr(element, "AXSelected", True):
775
+ return ActionResult(success=True, message="Selected")
776
+
777
+ # Fallback: click
778
+ return self._click(element)
779
+
780
+ def _scroll(self, element, direction: str) -> ActionResult:
781
+ # Get element center for scroll target, walking up parents if needed.
782
+ # Some elements (e.g., offscreen nodes in Safari) have no bounds,
783
+ # so we fall back to the nearest ancestor with valid bounds.
784
+ center = _get_element_center(element) or _get_element_center_or_parent(element)
785
+ if center:
786
+ try:
787
+ _send_scroll(center[0], center[1], direction)
788
+ return ActionResult(success=True, message=f"Scrolled {direction}")
789
+ except Exception as exc:
790
+ return ActionResult(
791
+ success=False,
792
+ message="",
793
+ error=f"Scroll failed: {exc}",
794
+ )
795
+
796
+ return ActionResult(
797
+ success=False,
798
+ message="",
799
+ error="Element has no bounds for scroll target",
800
+ )
801
+
802
+ def _increment(self, element) -> ActionResult:
803
+ if _ax_perform_action(element, "AXIncrement"):
804
+ return ActionResult(success=True, message="Incremented")
805
+ return ActionResult(
806
+ success=False,
807
+ message="",
808
+ error="Element does not support increment",
809
+ )
810
+
811
+ def _decrement(self, element) -> ActionResult:
812
+ if _ax_perform_action(element, "AXDecrement"):
813
+ return ActionResult(success=True, message="Decremented")
814
+ return ActionResult(
815
+ success=False,
816
+ message="",
817
+ error="Element does not support decrement",
818
+ )
819
+
820
+ def _rightclick(self, element) -> ActionResult:
821
+ # Try AXShowMenu (native context menu action)
822
+ if _ax_perform_action(element, "AXShowMenu"):
823
+ return ActionResult(success=True, message="Right-clicked (context menu)")
824
+
825
+ # Fallback: mouse right-click at element center
826
+ center = _get_element_center(element)
827
+ if center:
828
+ try:
829
+ _send_mouse_click(center[0], center[1], button="right")
830
+ return ActionResult(success=True, message="Right-clicked")
831
+ except Exception as exc:
832
+ return ActionResult(
833
+ success=False,
834
+ message="",
835
+ error=f"Right-click failed: {exc}",
836
+ )
837
+
838
+ return ActionResult(
839
+ success=False,
840
+ message="",
841
+ error="Element has no bounds for right-click",
842
+ )
843
+
844
+ def _doubleclick(self, element) -> ActionResult:
845
+ center = _get_element_center(element)
846
+ if center:
847
+ try:
848
+ _send_mouse_click(center[0], center[1], count=2)
849
+ return ActionResult(success=True, message="Double-clicked")
850
+ except Exception as exc:
851
+ return ActionResult(
852
+ success=False,
853
+ message="",
854
+ error=f"Double-click failed: {exc}",
855
+ )
856
+
857
+ return ActionResult(
858
+ success=False,
859
+ message="",
860
+ error="Element has no bounds for double-click",
861
+ )
862
+
863
+ def _focus(self, element) -> ActionResult:
864
+ # Try AXRaise first (brings window/element to front)
865
+ _ax_perform_action(element, "AXRaise")
866
+
867
+ # Set AXFocused
868
+ if _ax_set_attr(element, "AXFocused", True):
869
+ return ActionResult(success=True, message="Focused")
870
+
871
+ # AXRaise succeeded even if AXFocused didn't apply
872
+ if _ax_has_action(element, "AXRaise"):
873
+ return ActionResult(success=True, message="Focused (raised)")
874
+
875
+ return ActionResult(
876
+ success=False,
877
+ message="",
878
+ error="Failed to focus element",
879
+ )
880
+
881
+ def _dismiss(self, element) -> ActionResult:
882
+ # Try AXCancel (native dismiss action for dialogs/sheets)
883
+ if _ax_perform_action(element, "AXCancel"):
884
+ return ActionResult(success=True, message="Dismissed")
885
+
886
+ # Fallback: send Escape key
887
+ try:
888
+ _ax_perform_action(element, "AXRaise")
889
+ _ax_set_attr(element, "AXFocused", True)
890
+ time.sleep(0.05)
891
+ _send_key_combo("escape")
892
+ return ActionResult(success=True, message="Dismissed (Escape)")
893
+ except Exception as exc:
894
+ return ActionResult(
895
+ success=False,
896
+ message="",
897
+ error=f"Failed to dismiss: {exc}",
898
+ )
899
+
900
+ def _longpress(self, element) -> ActionResult:
901
+ center = _get_element_center(element)
902
+ if center:
903
+ try:
904
+ _send_mouse_long_press(center[0], center[1])
905
+ return ActionResult(success=True, message="Long-pressed")
906
+ except Exception as exc:
907
+ return ActionResult(
908
+ success=False,
909
+ message="",
910
+ error=f"Long-press failed: {exc}",
911
+ )
912
+
913
+ return ActionResult(
914
+ success=False,
915
+ message="",
916
+ error="Element has no bounds for long-press",
917
+ )
918
+
919
+ # -- open_app ----------------------------------------------------------
920
+
921
+ def open_app(self, name: str) -> ActionResult:
922
+ """Launch a macOS application by name with fuzzy matching."""
923
+ if not name or not name.strip():
924
+ return ActionResult(
925
+ success=False,
926
+ message="",
927
+ error="App name must not be empty",
928
+ )
929
+
930
+ try:
931
+ apps = _discover_apps()
932
+ if not apps:
933
+ return ActionResult(
934
+ success=False,
935
+ message="",
936
+ error="Could not discover installed applications",
937
+ )
938
+
939
+ match = _fuzzy_match(name, list(apps.keys()))
940
+ if match is None:
941
+ return ActionResult(
942
+ success=False,
943
+ message="",
944
+ error=f"No installed app matching '{name}' found",
945
+ )
946
+
947
+ app_path = apps[match]
948
+ display_name = match.title()
949
+
950
+ # Launch via NSWorkspace (preferred) or open command
951
+ launched = self._launch_via_nsworkspace(app_path)
952
+ if not launched:
953
+ launched = self._launch_via_open(app_path)
954
+
955
+ if not launched:
956
+ return ActionResult(
957
+ success=False,
958
+ message="",
959
+ error=f"Failed to launch '{display_name}'",
960
+ )
961
+
962
+ # Wait for window to appear
963
+ if self._wait_for_window(match):
964
+ return ActionResult(
965
+ success=True,
966
+ message=f"{display_name} launched",
967
+ )
968
+ return ActionResult(
969
+ success=True,
970
+ message=f"{display_name} launch sent, but window not yet detected",
971
+ )
972
+
973
+ except Exception as exc:
974
+ return ActionResult(
975
+ success=False,
976
+ message="",
977
+ error=f"Failed to launch '{name}': {exc}",
978
+ )
979
+
980
+ def _launch_via_nsworkspace(self, app_path: str) -> bool:
981
+ """Launch app via NSWorkspace."""
982
+ try:
983
+ from AppKit import NSWorkspace
984
+
985
+ workspace = NSWorkspace.sharedWorkspace()
986
+
987
+ if app_path.endswith(".app") and os.path.isdir(app_path):
988
+ return bool(workspace.launchApplication_(app_path))
989
+
990
+ # Try as bundle identifier
991
+ return bool(
992
+ workspace.launchAppWithBundleIdentifier_options_additionalEventParamDescriptor_launchIdentifier_(
993
+ app_path,
994
+ 0,
995
+ None,
996
+ None,
997
+ )
998
+ )
999
+ except Exception:
1000
+ return False
1001
+
1002
+ def _launch_via_open(self, app_path: str) -> bool:
1003
+ """Launch app via `open` command (fallback)."""
1004
+ try:
1005
+ if app_path.endswith(".app") and os.path.isdir(app_path):
1006
+ result = subprocess.run(
1007
+ ["open", "-a", app_path],
1008
+ capture_output=True,
1009
+ timeout=10,
1010
+ )
1011
+ else:
1012
+ result = subprocess.run(
1013
+ ["open", "-b", app_path],
1014
+ capture_output=True,
1015
+ timeout=10,
1016
+ )
1017
+ return result.returncode == 0
1018
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
1019
+ return False
1020
+
1021
+ def _wait_for_window(
1022
+ self,
1023
+ app_name: str,
1024
+ timeout: float = 8.0,
1025
+ ) -> bool:
1026
+ """Poll for a window matching the launched app.
1027
+
1028
+ Uses both NSWorkspace (for activation policy filtering) and
1029
+ CGWindowListCopyWindowInfo (for fresh window-server data) to
1030
+ detect when the launched app's window appears.
1031
+ """
1032
+ from AppKit import NSApplicationActivationPolicyRegular, NSWorkspace
1033
+
1034
+ deadline = time.monotonic() + timeout
1035
+ pattern = re.compile(re.escape(app_name), re.IGNORECASE)
1036
+
1037
+ while time.monotonic() < deadline:
1038
+ # Strategy 1: NSWorkspace (may be stale in long-running processes)
1039
+ workspace = NSWorkspace.sharedWorkspace()
1040
+ for app in workspace.runningApplications():
1041
+ if app.activationPolicy() != NSApplicationActivationPolicyRegular:
1042
+ continue
1043
+ name = app.localizedName() or ""
1044
+ if pattern.search(name.lower()):
1045
+ pid = app.processIdentifier()
1046
+ try:
1047
+ from ApplicationServices import (
1048
+ AXUIElementCopyAttributeValue,
1049
+ AXUIElementCreateApplication,
1050
+ kAXErrorSuccess,
1051
+ kAXWindowsAttribute,
1052
+ )
1053
+
1054
+ app_ref = AXUIElementCreateApplication(pid)
1055
+ err, windows = AXUIElementCopyAttributeValue(
1056
+ app_ref,
1057
+ kAXWindowsAttribute,
1058
+ None,
1059
+ )
1060
+ if err == kAXErrorSuccess and windows and len(windows) > 0:
1061
+ return True
1062
+ except Exception:
1063
+ pass
1064
+
1065
+ # Strategy 2: CGWindowListCopyWindowInfo (always fresh from window server)
1066
+ try:
1067
+ from Quartz import (
1068
+ CGWindowListCopyWindowInfo,
1069
+ kCGNullWindowID,
1070
+ kCGWindowListOptionOnScreenOnly,
1071
+ )
1072
+
1073
+ cg_windows = CGWindowListCopyWindowInfo(
1074
+ kCGWindowListOptionOnScreenOnly,
1075
+ kCGNullWindowID,
1076
+ )
1077
+ if cg_windows:
1078
+ for w in cg_windows:
1079
+ layer = w.get("kCGWindowLayer", -1)
1080
+ if layer != 0:
1081
+ continue
1082
+ owner = w.get("kCGWindowOwnerName", "")
1083
+ if owner and pattern.search(owner.lower()):
1084
+ return True
1085
+ except Exception:
1086
+ pass
1087
+
1088
+ time.sleep(0.5)
1089
+
1090
+ return False