computeruseprotocol 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cup/actions/_linux.py ADDED
@@ -0,0 +1,1008 @@
1
+ """Linux action handler — AT-SPI2 + XTest/xdotool action execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ctypes
6
+ import ctypes.util
7
+ import difflib
8
+ import os
9
+ import re
10
+ import subprocess
11
+ import time
12
+ from typing import Any
13
+
14
+ from cup.actions._handler import ActionHandler
15
+ from cup.actions._keys import parse_combo
16
+ from cup.actions.executor import ActionResult
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # X11 keysym mapping (XK_* constants)
20
+ # ---------------------------------------------------------------------------
21
+
22
+ _XK_MAP: dict[str, int] = {
23
+ "enter": 0xFF0D,
24
+ "return": 0xFF0D,
25
+ "tab": 0xFF09,
26
+ "escape": 0xFF1B,
27
+ "space": 0x0020,
28
+ "backspace": 0xFF08,
29
+ "delete": 0xFFFF,
30
+ "up": 0xFF52,
31
+ "down": 0xFF54,
32
+ "left": 0xFF51,
33
+ "right": 0xFF53,
34
+ "home": 0xFF50,
35
+ "end": 0xFF57,
36
+ "pageup": 0xFF55,
37
+ "pagedown": 0xFF56,
38
+ "insert": 0xFF63,
39
+ "f1": 0xFFBE,
40
+ "f2": 0xFFBF,
41
+ "f3": 0xFFC0,
42
+ "f4": 0xFFC1,
43
+ "f5": 0xFFC2,
44
+ "f6": 0xFFC3,
45
+ "f7": 0xFFC4,
46
+ "f8": 0xFFC5,
47
+ "f9": 0xFFC6,
48
+ "f10": 0xFFC7,
49
+ "f11": 0xFFC8,
50
+ "f12": 0xFFC9,
51
+ }
52
+
53
+ _XK_MODIFIERS: dict[str, int] = {
54
+ "ctrl": 0xFFE3, # XK_Control_L
55
+ "alt": 0xFFE9, # XK_Alt_L
56
+ "shift": 0xFFE1, # XK_Shift_L
57
+ "meta": 0xFFEB, # XK_Super_L
58
+ }
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # XTest keyboard/mouse input via ctypes
63
+ # ---------------------------------------------------------------------------
64
+
65
+
66
+ class _XTest:
67
+ """Thin ctypes wrapper around Xlib + XTest for input simulation."""
68
+
69
+ def __init__(self):
70
+ self._xlib = None
71
+ self._xtst = None
72
+ self._display = None
73
+
74
+ def _ensure_open(self):
75
+ if self._xlib is not None:
76
+ return
77
+
78
+ libx11_name = ctypes.util.find_library("X11")
79
+ if not libx11_name:
80
+ raise RuntimeError("libX11 not found. Install libx11-dev or xorg-x11-libs.")
81
+ self._xlib = ctypes.cdll.LoadLibrary(libx11_name)
82
+
83
+ libxtst_name = ctypes.util.find_library("Xtst")
84
+ if not libxtst_name:
85
+ raise RuntimeError("libXtst not found. Install libxtst-dev or xorg-x11-server-utils.")
86
+ self._xtst = ctypes.cdll.LoadLibrary(libxtst_name)
87
+
88
+ display_name = os.environ.get("DISPLAY", ":0").encode()
89
+ self._display = self._xlib.XOpenDisplay(display_name)
90
+ if not self._display:
91
+ raise RuntimeError(
92
+ f"Cannot open X11 display '{display_name.decode()}'. "
93
+ "Ensure DISPLAY is set and X server is running."
94
+ )
95
+
96
+ # Set up function signatures
97
+ self._xlib.XKeysymToKeycode.argtypes = [ctypes.c_void_p, ctypes.c_ulong]
98
+ self._xlib.XKeysymToKeycode.restype = ctypes.c_ubyte
99
+
100
+ self._xtst.XTestFakeKeyEvent.argtypes = [
101
+ ctypes.c_void_p,
102
+ ctypes.c_uint,
103
+ ctypes.c_int,
104
+ ctypes.c_ulong,
105
+ ]
106
+ self._xtst.XTestFakeKeyEvent.restype = ctypes.c_int
107
+
108
+ self._xtst.XTestFakeButtonEvent.argtypes = [
109
+ ctypes.c_void_p,
110
+ ctypes.c_uint,
111
+ ctypes.c_int,
112
+ ctypes.c_ulong,
113
+ ]
114
+ self._xtst.XTestFakeButtonEvent.restype = ctypes.c_int
115
+
116
+ self._xtst.XTestFakeMotionEvent.argtypes = [
117
+ ctypes.c_void_p,
118
+ ctypes.c_int,
119
+ ctypes.c_int,
120
+ ctypes.c_int,
121
+ ctypes.c_ulong,
122
+ ]
123
+ self._xtst.XTestFakeMotionEvent.restype = ctypes.c_int
124
+
125
+ def keysym_to_keycode(self, keysym: int) -> int:
126
+ self._ensure_open()
127
+ return self._xlib.XKeysymToKeycode(self._display, keysym)
128
+
129
+ def fake_key_event(self, keycode: int, is_press: bool, delay: int = 0):
130
+ self._ensure_open()
131
+ self._xtst.XTestFakeKeyEvent(self._display, keycode, int(is_press), delay)
132
+ self._xlib.XFlush(self._display)
133
+
134
+ def fake_button_event(self, button: int, is_press: bool, delay: int = 0):
135
+ self._ensure_open()
136
+ self._xtst.XTestFakeButtonEvent(self._display, button, int(is_press), delay)
137
+ self._xlib.XFlush(self._display)
138
+
139
+ def fake_motion_event(self, x: int, y: int, delay: int = 0):
140
+ self._ensure_open()
141
+ # screen_number = -1 means current screen
142
+ self._xtst.XTestFakeMotionEvent(self._display, -1, x, y, delay)
143
+ self._xlib.XFlush(self._display)
144
+
145
+ def flush(self):
146
+ if self._xlib and self._display:
147
+ self._xlib.XFlush(self._display)
148
+
149
+
150
+ # Singleton instance — lazily initialized
151
+ _xtest: _XTest | None = None
152
+
153
+
154
+ def _get_xtest() -> _XTest:
155
+ global _xtest
156
+ if _xtest is None:
157
+ _xtest = _XTest()
158
+ return _xtest
159
+
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # Input simulation helpers
163
+ # ---------------------------------------------------------------------------
164
+
165
+
166
+ def _send_key_combo(combo_str: str) -> None:
167
+ """Send a keyboard combination via XTest fake key events."""
168
+ xt = _get_xtest()
169
+ mod_names, key_names = parse_combo(combo_str)
170
+
171
+ # Resolve modifier keycodes
172
+ mod_keycodes: list[int] = []
173
+ for m in mod_names:
174
+ keysym = _XK_MODIFIERS.get(m)
175
+ if keysym:
176
+ kc = xt.keysym_to_keycode(keysym)
177
+ if kc:
178
+ mod_keycodes.append(kc)
179
+
180
+ # Resolve main keycodes
181
+ main_keycodes: list[int] = []
182
+ for k in key_names:
183
+ if k in _XK_MAP:
184
+ kc = xt.keysym_to_keycode(_XK_MAP[k])
185
+ if kc:
186
+ main_keycodes.append(kc)
187
+ elif len(k) == 1:
188
+ # Single character — use its Unicode codepoint as keysym
189
+ # For ASCII, keysym == codepoint
190
+ kc = xt.keysym_to_keycode(ord(k))
191
+ if kc:
192
+ main_keycodes.append(kc)
193
+
194
+ # If only modifiers specified, treat them as main keys
195
+ if mod_keycodes and not main_keycodes:
196
+ main_keycodes = mod_keycodes
197
+ mod_keycodes = []
198
+
199
+ if not main_keycodes:
200
+ raise RuntimeError(f"Could not resolve any key codes from combo: {combo_str!r}")
201
+
202
+ # Press modifiers
203
+ for kc in mod_keycodes:
204
+ xt.fake_key_event(kc, True)
205
+ time.sleep(0.01)
206
+
207
+ # Press and release main keys
208
+ for kc in main_keycodes:
209
+ xt.fake_key_event(kc, True)
210
+ time.sleep(0.01)
211
+ for kc in reversed(main_keycodes):
212
+ xt.fake_key_event(kc, False)
213
+
214
+ # Release modifiers
215
+ for kc in reversed(mod_keycodes):
216
+ xt.fake_key_event(kc, False)
217
+
218
+ xt.flush()
219
+ time.sleep(0.01)
220
+
221
+
222
+ def _type_string(text: str) -> None:
223
+ """Type a string using xdotool for reliable Unicode input.
224
+
225
+ Falls back to XTest fake key events for ASCII-only text.
226
+ """
227
+ # For Unicode text, xdotool type is most reliable
228
+ try:
229
+ subprocess.run(
230
+ ["xdotool", "type", "--clearmodifiers", "--", text],
231
+ check=True,
232
+ capture_output=True,
233
+ timeout=10,
234
+ )
235
+ return
236
+ except (FileNotFoundError, subprocess.SubprocessError):
237
+ pass
238
+
239
+ # Fallback: XTest for ASCII characters only
240
+ xt = _get_xtest()
241
+ for char in text:
242
+ keysym = ord(char)
243
+ kc = xt.keysym_to_keycode(keysym)
244
+ if kc:
245
+ xt.fake_key_event(kc, True)
246
+ xt.fake_key_event(kc, False)
247
+ xt.flush()
248
+ time.sleep(0.01)
249
+
250
+
251
+ def _send_mouse_click(
252
+ x: int,
253
+ y: int,
254
+ *,
255
+ button: str = "left",
256
+ count: int = 1,
257
+ ) -> None:
258
+ """Send mouse click(s) at screen coordinates via XTest."""
259
+ xt = _get_xtest()
260
+
261
+ # Button mapping: left=1, middle=2, right=3
262
+ btn_num = 3 if button == "right" else 1
263
+
264
+ # Move to position
265
+ xt.fake_motion_event(x, y)
266
+ time.sleep(0.02)
267
+
268
+ # Click(s)
269
+ for _ in range(count):
270
+ xt.fake_button_event(btn_num, True)
271
+ time.sleep(0.01)
272
+ xt.fake_button_event(btn_num, False)
273
+ time.sleep(0.02)
274
+
275
+ xt.flush()
276
+ time.sleep(0.01)
277
+
278
+
279
+ def _send_mouse_long_press(x: int, y: int, duration: float = 0.8) -> None:
280
+ """Send a long press (mouse down, hold, mouse up) at screen coordinates."""
281
+ xt = _get_xtest()
282
+
283
+ xt.fake_motion_event(x, y)
284
+ time.sleep(0.02)
285
+
286
+ xt.fake_button_event(1, True) # Left button down
287
+ xt.flush()
288
+ time.sleep(duration)
289
+
290
+ xt.fake_button_event(1, False) # Left button up
291
+ xt.flush()
292
+ time.sleep(0.01)
293
+
294
+
295
+ def _send_scroll(x: int, y: int, direction: str, amount: int = 5) -> None:
296
+ """Send scroll events at screen coordinates via XTest.
297
+
298
+ X11 scroll uses buttons 4 (up), 5 (down), 6 (left), 7 (right).
299
+ """
300
+ xt = _get_xtest()
301
+
302
+ xt.fake_motion_event(x, y)
303
+ time.sleep(0.02)
304
+
305
+ button_map = {"up": 4, "down": 5, "left": 6, "right": 7}
306
+ btn = button_map.get(direction, 5)
307
+
308
+ for _ in range(amount):
309
+ xt.fake_button_event(btn, True)
310
+ xt.fake_button_event(btn, False)
311
+ time.sleep(0.01)
312
+
313
+ xt.flush()
314
+ time.sleep(0.02)
315
+
316
+
317
+ # ---------------------------------------------------------------------------
318
+ # AT-SPI2 action helpers
319
+ # ---------------------------------------------------------------------------
320
+
321
+
322
+ def _atspi_do_action(accessible, action_name: str) -> bool:
323
+ """Invoke a named action on an AT-SPI2 accessible object.
324
+
325
+ Searches the Action interface for the matching action name
326
+ and triggers it by index.
327
+ """
328
+ try:
329
+ action_iface = accessible.get_action_iface()
330
+ if action_iface is None:
331
+ return False
332
+ n = action_iface.get_n_actions()
333
+ for i in range(n):
334
+ name = (action_iface.get_action_name(i) or "").lower()
335
+ if name == action_name:
336
+ return action_iface.do_action(i)
337
+ return False
338
+ except Exception:
339
+ return False
340
+
341
+
342
+ def _atspi_get_bounds_xywh(accessible) -> tuple[int, int, int, int] | None:
343
+ """Get bounding rectangle (x, y, w, h) in screen coordinates."""
344
+ try:
345
+ comp = accessible.get_component_iface()
346
+ if comp is None:
347
+ return None
348
+ # ATSPI_COORD_TYPE_SCREEN = 0
349
+ rect = comp.get_extents(0)
350
+ if rect.width > 0 or rect.height > 0:
351
+ return (rect.x, rect.y, rect.width, rect.height)
352
+ except Exception:
353
+ pass
354
+ return None
355
+
356
+
357
+ def _get_element_center(accessible) -> tuple[int, int] | None:
358
+ """Get the center point of an AT-SPI2 element in screen coordinates."""
359
+ bounds = _atspi_get_bounds_xywh(accessible)
360
+ if bounds is None:
361
+ return None
362
+ x, y, w, h = bounds
363
+ return x + w // 2, y + h // 2
364
+
365
+
366
+ def _atspi_grab_focus(accessible) -> bool:
367
+ """Move keyboard focus to an element via the Component interface."""
368
+ try:
369
+ comp = accessible.get_component_iface()
370
+ if comp is not None:
371
+ return comp.grab_focus()
372
+ except Exception:
373
+ pass
374
+ return False
375
+
376
+
377
+ def _atspi_get_value_iface(accessible):
378
+ """Get the Value interface from an accessible, or None."""
379
+ try:
380
+ return accessible.get_value_iface()
381
+ except Exception:
382
+ return None
383
+
384
+
385
+ def _atspi_get_text_iface(accessible):
386
+ """Get the Text interface from an accessible, or None."""
387
+ try:
388
+ return accessible.get_text_iface()
389
+ except Exception:
390
+ return None
391
+
392
+
393
+ def _atspi_get_editable_text_iface(accessible):
394
+ """Get the EditableText interface from an accessible, or None."""
395
+ try:
396
+ return accessible.get_editable_text_iface()
397
+ except Exception:
398
+ return None
399
+
400
+
401
+ def _atspi_get_selection_iface(accessible):
402
+ """Get the Selection interface from an accessible (usually the parent), or None."""
403
+ try:
404
+ return accessible.get_selection_iface()
405
+ except Exception:
406
+ return None
407
+
408
+
409
+ # ---------------------------------------------------------------------------
410
+ # App launching helpers
411
+ # ---------------------------------------------------------------------------
412
+
413
+
414
+ def _discover_desktop_apps() -> dict[str, str]:
415
+ """Discover installed Linux apps from .desktop files.
416
+
417
+ Returns {lowercase_name: desktop_file_path_or_exec_command}.
418
+ """
419
+ apps: dict[str, str] = {}
420
+
421
+ # Standard XDG data directories
422
+ xdg_data_dirs = os.environ.get("XDG_DATA_DIRS", "/usr/local/share:/usr/share").split(":")
423
+ xdg_data_home = os.environ.get("XDG_DATA_HOME", os.path.expanduser("~/.local/share"))
424
+ search_dirs = [xdg_data_home] + xdg_data_dirs
425
+
426
+ for data_dir in search_dirs:
427
+ app_dir = os.path.join(data_dir, "applications")
428
+ if not os.path.isdir(app_dir):
429
+ continue
430
+ try:
431
+ for root, _dirs, files in os.walk(app_dir):
432
+ for fname in files:
433
+ if not fname.endswith(".desktop"):
434
+ continue
435
+ fpath = os.path.join(root, fname)
436
+ name, exec_cmd = _parse_desktop_file(fpath)
437
+ if name and exec_cmd:
438
+ key = name.lower()
439
+ if key not in apps:
440
+ apps[key] = exec_cmd
441
+ except OSError:
442
+ continue
443
+
444
+ return apps
445
+
446
+
447
+ def _parse_desktop_file(path: str) -> tuple[str, str]:
448
+ """Parse a .desktop file and return (Name, Exec) or ("", "")."""
449
+ name = ""
450
+ exec_cmd = ""
451
+ no_display = False
452
+ in_desktop_entry = False
453
+
454
+ try:
455
+ with open(path, encoding="utf-8", errors="ignore") as f:
456
+ for line in f:
457
+ line = line.strip()
458
+ if line == "[Desktop Entry]":
459
+ in_desktop_entry = True
460
+ continue
461
+ if line.startswith("[") and line.endswith("]"):
462
+ if in_desktop_entry:
463
+ break # End of [Desktop Entry] section
464
+ continue
465
+ if not in_desktop_entry:
466
+ continue
467
+ if line.startswith("Name=") and not name:
468
+ name = line[5:].strip()
469
+ elif line.startswith("Exec="):
470
+ exec_cmd = line[5:].strip()
471
+ # Strip field codes (%f, %F, %u, %U, etc.)
472
+ exec_cmd = re.sub(r"\s+%[fFuUdDnNickvm]", "", exec_cmd).strip()
473
+ elif line.startswith("NoDisplay=true"):
474
+ no_display = True
475
+ except OSError:
476
+ return "", ""
477
+
478
+ if no_display:
479
+ return "", ""
480
+ return name, exec_cmd
481
+
482
+
483
+ def _fuzzy_match(
484
+ query: str,
485
+ candidates: list[str],
486
+ cutoff: float = 0.5,
487
+ ) -> str | None:
488
+ """Find the best fuzzy match for query among candidates."""
489
+ query_lower = query.lower().strip()
490
+
491
+ # Exact match
492
+ if query_lower in candidates:
493
+ return query_lower
494
+
495
+ # Substring match — prefer shorter candidates (more specific)
496
+ substring_matches = [c for c in candidates if query_lower in c]
497
+ if substring_matches:
498
+ word_boundary = [
499
+ c
500
+ for c in substring_matches
501
+ if re.search(r"(?:^|[\s\-_])" + re.escape(query_lower) + r"(?:$|[\s\-_])", c)
502
+ ]
503
+ if word_boundary:
504
+ return min(word_boundary, key=len)
505
+ return min(substring_matches, key=len)
506
+
507
+ # Reverse substring
508
+ for c in candidates:
509
+ if c in query_lower:
510
+ return c
511
+
512
+ # Fuzzy match via SequenceMatcher
513
+ best_match = None
514
+ best_score = 0.0
515
+ for c in candidates:
516
+ score = difflib.SequenceMatcher(None, query_lower, c).ratio()
517
+ if score > best_score:
518
+ best_score = score
519
+ best_match = c
520
+
521
+ if best_match and best_score >= cutoff:
522
+ return best_match
523
+ return None
524
+
525
+
526
+ # ---------------------------------------------------------------------------
527
+ # LinuxActionHandler
528
+ # ---------------------------------------------------------------------------
529
+
530
+
531
+ class LinuxActionHandler(ActionHandler):
532
+ """Execute CUP actions on Linux via AT-SPI2 + XTest/xdotool.
533
+
534
+ Uses AT-SPI2 Action/Value/EditableText/Selection/Component interfaces
535
+ for semantic actions, with XTest (libXtst) fallbacks for mouse/keyboard
536
+ input simulation.
537
+
538
+ Requirements:
539
+ - libX11 and libXtst (for XTest fake events)
540
+ - gi.repository.Atspi (PyGObject — usually already present for tree capture)
541
+ - xdotool (optional, for reliable Unicode typing)
542
+ """
543
+
544
+ def action(
545
+ self,
546
+ native_ref: Any,
547
+ action: str,
548
+ params: dict[str, Any],
549
+ ) -> ActionResult:
550
+ element = native_ref
551
+
552
+ if action == "click":
553
+ return self._click(element)
554
+ elif action == "toggle":
555
+ return self._toggle(element)
556
+ elif action == "type":
557
+ value = params.get("value", "")
558
+ return self._type(element, value)
559
+ elif action == "setvalue":
560
+ value = params.get("value", "")
561
+ return self._setvalue(element, value)
562
+ elif action == "expand":
563
+ return self._expand(element)
564
+ elif action == "collapse":
565
+ return self._collapse(element)
566
+ elif action == "select":
567
+ return self._select(element)
568
+ elif action == "scroll":
569
+ direction = params.get("direction", "down")
570
+ return self._scroll(element, direction)
571
+ elif action == "increment":
572
+ return self._increment(element)
573
+ elif action == "decrement":
574
+ return self._decrement(element)
575
+ elif action == "rightclick":
576
+ return self._rightclick(element)
577
+ elif action == "doubleclick":
578
+ return self._doubleclick(element)
579
+ elif action == "focus":
580
+ return self._focus(element)
581
+ elif action == "dismiss":
582
+ return self._dismiss(element)
583
+ elif action == "longpress":
584
+ return self._longpress(element)
585
+ else:
586
+ return ActionResult(
587
+ success=False,
588
+ message="",
589
+ error=f"Action '{action}' not implemented for Linux",
590
+ )
591
+
592
+ def press(self, combo: str) -> ActionResult:
593
+ try:
594
+ _send_key_combo(combo)
595
+ return ActionResult(success=True, message=f"Pressed {combo}")
596
+ except Exception as exc:
597
+ return ActionResult(
598
+ success=False,
599
+ message="",
600
+ error=f"Failed to press keys '{combo}': {exc}",
601
+ )
602
+
603
+ # -- individual actions ------------------------------------------------
604
+
605
+ def _click(self, element) -> ActionResult:
606
+ # Try AT-SPI Action interface first (click, press, activate)
607
+ for act_name in ("click", "press", "activate"):
608
+ if _atspi_do_action(element, act_name):
609
+ return ActionResult(success=True, message="Clicked")
610
+
611
+ # Fallback: focus + Enter
612
+ if _atspi_grab_focus(element):
613
+ time.sleep(0.05)
614
+ try:
615
+ _send_key_combo("enter")
616
+ return ActionResult(success=True, message="Clicked (focus+enter fallback)")
617
+ except Exception:
618
+ pass
619
+
620
+ # Fallback: mouse click at element center
621
+ center = _get_element_center(element)
622
+ if center:
623
+ try:
624
+ _send_mouse_click(center[0], center[1])
625
+ return ActionResult(success=True, message="Clicked (mouse fallback)")
626
+ except Exception as exc:
627
+ return ActionResult(success=False, message="", error=f"Mouse click failed: {exc}")
628
+
629
+ return ActionResult(
630
+ success=False,
631
+ message="",
632
+ error="Element does not support click and has no bounds",
633
+ )
634
+
635
+ def _toggle(self, element) -> ActionResult:
636
+ # Try AT-SPI toggle action
637
+ if _atspi_do_action(element, "toggle"):
638
+ return ActionResult(success=True, message="Toggled")
639
+
640
+ # Many checkboxes/switches use "click" to toggle
641
+ if _atspi_do_action(element, "click"):
642
+ return ActionResult(success=True, message="Toggled")
643
+
644
+ return ActionResult(success=False, message="", error="Element does not support toggle")
645
+
646
+ def _type(self, element, text: str) -> ActionResult:
647
+ """Type text into an element.
648
+
649
+ Strategy:
650
+ 1. Try EditableText interface (insert/set text directly)
651
+ 2. Fall back to focus + XTest/xdotool keyboard input
652
+ """
653
+ try:
654
+ # Strategy 1: EditableText interface (most reliable)
655
+ editable = _atspi_get_editable_text_iface(element)
656
+ if editable is not None:
657
+ try:
658
+ # Select all existing text and replace
659
+ text_iface = _atspi_get_text_iface(element)
660
+ if text_iface:
661
+ char_count = text_iface.get_character_count()
662
+ if char_count > 0:
663
+ editable.delete_text(0, char_count)
664
+ editable.insert_text(0, text, len(text.encode("utf-8")))
665
+ return ActionResult(success=True, message=f"Typed: {text}")
666
+ except Exception:
667
+ pass # Fall through to keyboard input
668
+
669
+ # Strategy 2: Focus + keyboard input
670
+ _atspi_grab_focus(element)
671
+ time.sleep(0.05)
672
+
673
+ # Click to ensure focus
674
+ center = _get_element_center(element)
675
+ if center:
676
+ _send_mouse_click(center[0], center[1])
677
+ time.sleep(0.05)
678
+
679
+ # Select all then type
680
+ _send_key_combo("ctrl+a")
681
+ time.sleep(0.05)
682
+ _type_string(text)
683
+
684
+ return ActionResult(success=True, message=f"Typed: {text}")
685
+ except Exception as exc:
686
+ return ActionResult(success=False, message="", error=f"Failed to type: {exc}")
687
+
688
+ def _setvalue(self, element, text: str) -> ActionResult:
689
+ """Set value programmatically via AT-SPI2 Value or EditableText interface."""
690
+ # Try Value interface (for sliders, spinbuttons)
691
+ value_iface = _atspi_get_value_iface(element)
692
+ if value_iface is not None:
693
+ try:
694
+ value_iface.set_current_value(float(text))
695
+ return ActionResult(success=True, message=f"Set value to: {text}")
696
+ except (ValueError, Exception):
697
+ pass
698
+
699
+ # Try EditableText interface
700
+ editable = _atspi_get_editable_text_iface(element)
701
+ if editable is not None:
702
+ try:
703
+ text_iface = _atspi_get_text_iface(element)
704
+ if text_iface:
705
+ char_count = text_iface.get_character_count()
706
+ if char_count > 0:
707
+ editable.delete_text(0, char_count)
708
+ editable.insert_text(0, text, len(text.encode("utf-8")))
709
+ return ActionResult(success=True, message=f"Set value to: {text}")
710
+ except Exception:
711
+ pass
712
+
713
+ # Fallback to type
714
+ return self._type(element, text)
715
+
716
+ def _expand(self, element) -> ActionResult:
717
+ # Check if already expanded
718
+ try:
719
+ state_set = element.get_state_set()
720
+ from gi.repository import Atspi
721
+
722
+ if state_set.contains(Atspi.StateType.EXPANDED):
723
+ return ActionResult(success=True, message="Already expanded")
724
+ except Exception:
725
+ pass
726
+
727
+ # Try AT-SPI "expand or contract" action (GTK combo boxes)
728
+ if _atspi_do_action(element, "expand or contract"):
729
+ return ActionResult(success=True, message="Expanded")
730
+
731
+ # Try click (works for disclosure triangles, tree items)
732
+ if _atspi_do_action(element, "click") or _atspi_do_action(element, "activate"):
733
+ return ActionResult(success=True, message="Expanded")
734
+
735
+ return ActionResult(success=False, message="", error="Element does not support expand")
736
+
737
+ def _collapse(self, element) -> ActionResult:
738
+ # Check if already collapsed
739
+ try:
740
+ state_set = element.get_state_set()
741
+ from gi.repository import Atspi
742
+
743
+ if not state_set.contains(Atspi.StateType.EXPANDED):
744
+ return ActionResult(success=True, message="Already collapsed")
745
+ except Exception:
746
+ pass
747
+
748
+ if _atspi_do_action(element, "expand or contract"):
749
+ return ActionResult(success=True, message="Collapsed")
750
+
751
+ if _atspi_do_action(element, "click") or _atspi_do_action(element, "activate"):
752
+ return ActionResult(success=True, message="Collapsed")
753
+
754
+ return ActionResult(success=False, message="", error="Element does not support collapse")
755
+
756
+ def _select(self, element) -> ActionResult:
757
+ # Try Selection interface on the parent (e.g., list selects child)
758
+ try:
759
+ parent = element.get_parent()
760
+ if parent:
761
+ sel_iface = _atspi_get_selection_iface(parent)
762
+ if sel_iface is not None:
763
+ # Find this element's index among siblings
764
+ idx = element.get_index_in_parent()
765
+ if idx >= 0 and sel_iface.select_child(idx):
766
+ return ActionResult(success=True, message="Selected")
767
+ except Exception:
768
+ pass
769
+
770
+ # Try AT-SPI click action (works for tabs, menu items, list items)
771
+ if _atspi_do_action(element, "click") or _atspi_do_action(element, "activate"):
772
+ return ActionResult(success=True, message="Selected")
773
+
774
+ # Fallback: mouse click
775
+ return self._click(element)
776
+
777
+ def _scroll(self, element, direction: str) -> ActionResult:
778
+ center = _get_element_center(element)
779
+ if center:
780
+ try:
781
+ _send_scroll(center[0], center[1], direction)
782
+ return ActionResult(success=True, message=f"Scrolled {direction}")
783
+ except Exception as exc:
784
+ return ActionResult(success=False, message="", error=f"Scroll failed: {exc}")
785
+
786
+ return ActionResult(
787
+ success=False,
788
+ message="",
789
+ error="Element has no bounds for scroll target",
790
+ )
791
+
792
+ def _increment(self, element) -> ActionResult:
793
+ # Try AT-SPI Action interface
794
+ if _atspi_do_action(element, "increment"):
795
+ return ActionResult(success=True, message="Incremented")
796
+
797
+ # Try Value interface
798
+ value_iface = _atspi_get_value_iface(element)
799
+ if value_iface is not None:
800
+ try:
801
+ current = value_iface.get_current_value()
802
+ minimum_increment = value_iface.get_minimum_increment()
803
+ step = minimum_increment if minimum_increment > 0 else 1.0
804
+ new_val = current + step
805
+ maximum = value_iface.get_maximum_value()
806
+ new_val = min(new_val, maximum)
807
+ value_iface.set_current_value(new_val)
808
+ return ActionResult(success=True, message=f"Incremented to {new_val}")
809
+ except Exception:
810
+ pass
811
+
812
+ return ActionResult(success=False, message="", error="Element does not support increment")
813
+
814
+ def _decrement(self, element) -> ActionResult:
815
+ if _atspi_do_action(element, "decrement"):
816
+ return ActionResult(success=True, message="Decremented")
817
+
818
+ value_iface = _atspi_get_value_iface(element)
819
+ if value_iface is not None:
820
+ try:
821
+ current = value_iface.get_current_value()
822
+ minimum_increment = value_iface.get_minimum_increment()
823
+ step = minimum_increment if minimum_increment > 0 else 1.0
824
+ new_val = current - step
825
+ minimum = value_iface.get_minimum_value()
826
+ new_val = max(new_val, minimum)
827
+ value_iface.set_current_value(new_val)
828
+ return ActionResult(success=True, message=f"Decremented to {new_val}")
829
+ except Exception:
830
+ pass
831
+
832
+ return ActionResult(success=False, message="", error="Element does not support decrement")
833
+
834
+ def _rightclick(self, element) -> ActionResult:
835
+ center = _get_element_center(element)
836
+ if center:
837
+ try:
838
+ _send_mouse_click(center[0], center[1], button="right")
839
+ return ActionResult(success=True, message="Right-clicked")
840
+ except Exception as exc:
841
+ return ActionResult(success=False, message="", error=f"Right-click failed: {exc}")
842
+
843
+ return ActionResult(
844
+ success=False,
845
+ message="",
846
+ error="Element has no bounds for right-click",
847
+ )
848
+
849
+ def _doubleclick(self, element) -> ActionResult:
850
+ center = _get_element_center(element)
851
+ if center:
852
+ try:
853
+ _send_mouse_click(center[0], center[1], count=2)
854
+ return ActionResult(success=True, message="Double-clicked")
855
+ except Exception as exc:
856
+ return ActionResult(success=False, message="", error=f"Double-click failed: {exc}")
857
+
858
+ return ActionResult(
859
+ success=False,
860
+ message="",
861
+ error="Element has no bounds for double-click",
862
+ )
863
+
864
+ def _focus(self, element) -> ActionResult:
865
+ if _atspi_grab_focus(element):
866
+ return ActionResult(success=True, message="Focused")
867
+ return ActionResult(success=False, message="", error="Failed to focus element")
868
+
869
+ def _dismiss(self, element) -> ActionResult:
870
+ # Try AT-SPI close/dismiss action
871
+ for act_name in ("close", "dismiss"):
872
+ if _atspi_do_action(element, act_name):
873
+ return ActionResult(success=True, message="Dismissed")
874
+
875
+ # Fallback: focus + Escape
876
+ try:
877
+ _atspi_grab_focus(element)
878
+ time.sleep(0.05)
879
+ _send_key_combo("escape")
880
+ return ActionResult(success=True, message="Dismissed (Escape)")
881
+ except Exception as exc:
882
+ return ActionResult(success=False, message="", error=f"Failed to dismiss: {exc}")
883
+
884
+ def _longpress(self, element) -> ActionResult:
885
+ center = _get_element_center(element)
886
+ if center:
887
+ try:
888
+ _send_mouse_long_press(center[0], center[1])
889
+ return ActionResult(success=True, message="Long-pressed")
890
+ except Exception as exc:
891
+ return ActionResult(success=False, message="", error=f"Long-press failed: {exc}")
892
+
893
+ return ActionResult(
894
+ success=False,
895
+ message="",
896
+ error="Element has no bounds for long-press",
897
+ )
898
+
899
+ # -- open_app ----------------------------------------------------------
900
+
901
+ def open_app(self, name: str) -> ActionResult:
902
+ """Launch a Linux application by name with fuzzy matching.
903
+
904
+ Discovers installed apps from .desktop files in XDG data directories,
905
+ fuzzy-matches the name, and launches the best match.
906
+ """
907
+ if not name or not name.strip():
908
+ return ActionResult(success=False, message="", error="App name must not be empty")
909
+
910
+ try:
911
+ apps = _discover_desktop_apps()
912
+ if not apps:
913
+ return ActionResult(
914
+ success=False,
915
+ message="",
916
+ error="Could not discover installed applications",
917
+ )
918
+
919
+ match = _fuzzy_match(name, list(apps.keys()))
920
+ if match is None:
921
+ return ActionResult(
922
+ success=False,
923
+ message="",
924
+ error=f"No installed app matching '{name}' found",
925
+ )
926
+
927
+ exec_cmd = apps[match]
928
+ display_name = match.title()
929
+
930
+ # Launch via subprocess
931
+ try:
932
+ subprocess.Popen(
933
+ exec_cmd,
934
+ shell=True,
935
+ stdout=subprocess.DEVNULL,
936
+ stderr=subprocess.DEVNULL,
937
+ start_new_session=True,
938
+ )
939
+ except Exception as exc:
940
+ return ActionResult(
941
+ success=False,
942
+ message="",
943
+ error=f"Failed to launch '{display_name}': {exc}",
944
+ )
945
+
946
+ # Wait for window to appear
947
+ if self._wait_for_window(match):
948
+ return ActionResult(
949
+ success=True,
950
+ message=f"{display_name} launched",
951
+ )
952
+ return ActionResult(
953
+ success=True,
954
+ message=f"{display_name} launch sent, but window not yet detected",
955
+ )
956
+
957
+ except Exception as exc:
958
+ return ActionResult(
959
+ success=False,
960
+ message="",
961
+ error=f"Failed to launch '{name}': {exc}",
962
+ )
963
+
964
+ def _wait_for_window(
965
+ self,
966
+ app_name: str,
967
+ timeout: float = 8.0,
968
+ ) -> bool:
969
+ """Poll AT-SPI2 desktop for a new window matching the launched app."""
970
+ try:
971
+ import gi
972
+
973
+ gi.require_version("Atspi", "2.0")
974
+ from gi.repository import Atspi
975
+ except Exception:
976
+ return False
977
+
978
+ deadline = time.monotonic() + timeout
979
+ pattern = re.compile(re.escape(app_name), re.IGNORECASE)
980
+
981
+ while time.monotonic() < deadline:
982
+ try:
983
+ desktop = Atspi.get_desktop(0)
984
+ for i in range(desktop.get_child_count()):
985
+ try:
986
+ app = desktop.get_child_at_index(i)
987
+ if app is None:
988
+ continue
989
+ name = (app.get_name() or "").lower()
990
+ if pattern.search(name):
991
+ # Check if app has at least one visible window
992
+ for j in range(app.get_child_count()):
993
+ try:
994
+ win = app.get_child_at_index(j)
995
+ if win is None:
996
+ continue
997
+ state_set = win.get_state_set()
998
+ if state_set.contains(Atspi.StateType.VISIBLE):
999
+ return True
1000
+ except Exception:
1001
+ continue
1002
+ except Exception:
1003
+ continue
1004
+ except Exception:
1005
+ pass
1006
+ time.sleep(0.5)
1007
+
1008
+ return False