computeruseprotocol 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cup/platforms/linux.py ADDED
@@ -0,0 +1,1060 @@
1
+ """
2
+ Linux AT-SPI2 platform adapter for CUP.
3
+
4
+ Captures the accessibility tree via AT-SPI2 over D-Bus (using PyGObject /
5
+ GObject Introspection bindings) and maps it to the canonical CUP schema.
6
+
7
+ Key design choices:
8
+ 1. Uses gi.repository.Atspi — the standard Python binding for AT-SPI2
9
+ 2. Batch-reads core properties per node (role, name, description, states,
10
+ bounds, attributes, actions, value) in a single walk pass
11
+ 3. Xlib (via ctypes) for screen info and foreground window detection
12
+ 4. Parallel tree walking with ThreadPoolExecutor for multi-window captures
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import ctypes
18
+ import ctypes.util
19
+ import itertools
20
+ import os
21
+ import subprocess
22
+ from concurrent.futures import ThreadPoolExecutor
23
+ from typing import Any
24
+
25
+ from cup._base import PlatformAdapter
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # AT-SPI2 role -> CUP ARIA role mapping
29
+ # ---------------------------------------------------------------------------
30
+ # Based on W3C Core AAM (Accessibility API Mappings) spec which defines
31
+ # how ARIA roles map to ATK/AT-SPI roles. AT-SPI role names come from
32
+ # the Atspi.Role enum (e.g. Atspi.Role.PUSH_BUTTON).
33
+ #
34
+ # We map from the string name of the enum (e.g. "push-button") rather
35
+ # than numeric values, for readability and resilience across versions.
36
+
37
+ CUP_ROLES: dict[str, str] = {
38
+ # Core interactive
39
+ "push-button": "button",
40
+ "toggle-button": "button",
41
+ "check-box": "checkbox",
42
+ "radio-button": "radio",
43
+ "combo-box": "combobox",
44
+ "text": "textbox",
45
+ "password-text": "textbox",
46
+ "entry": "textbox",
47
+ "spin-button": "spinbutton",
48
+ "slider": "slider",
49
+ "scroll-bar": "scrollbar",
50
+ "progress-bar": "progressbar",
51
+ "link": "link",
52
+ "menu": "menu",
53
+ "menu-bar": "menubar",
54
+ "menu-item": "menuitem",
55
+ "check-menu-item": "menuitemcheckbox",
56
+ "radio-menu-item": "menuitemradio",
57
+ "separator": "separator",
58
+ # Containers / structure
59
+ "frame": "window",
60
+ "dialog": "dialog",
61
+ "alert": "alert",
62
+ "file-chooser": "dialog",
63
+ "color-chooser": "dialog",
64
+ "font-chooser": "dialog",
65
+ "window": "window",
66
+ "panel": "group",
67
+ "filler": "generic",
68
+ "grouping": "group",
69
+ "split-pane": "group",
70
+ "viewport": "group",
71
+ "scroll-pane": "group",
72
+ "layered-pane": "group",
73
+ "glass-pane": "group",
74
+ "internal-frame": "group",
75
+ "desktop-frame": "group",
76
+ "root-pane": "group",
77
+ "option-pane": "group",
78
+ # Tables / grids
79
+ "table": "table",
80
+ "table-cell": "cell",
81
+ "table-row": "row",
82
+ "table-column-header": "columnheader",
83
+ "table-row-header": "rowheader",
84
+ "tree-table": "treegrid",
85
+ # Lists / trees
86
+ "list": "list",
87
+ "list-item": "listitem",
88
+ "tree": "tree",
89
+ "tree-item": "treeitem",
90
+ # Tabs
91
+ "page-tab-list": "tablist",
92
+ "page-tab": "tab",
93
+ # Text / display
94
+ "label": "text",
95
+ "static": "text",
96
+ "caption": "text",
97
+ "heading": "heading",
98
+ "paragraph": "text",
99
+ "section": "generic",
100
+ "block-quote": "generic",
101
+ "image": "img",
102
+ "icon": "img",
103
+ "animation": "img",
104
+ "canvas": "img",
105
+ "chart": "img",
106
+ # Document / content
107
+ "document-frame": "document",
108
+ "document-web": "document",
109
+ "document-text": "document",
110
+ "document-email": "document",
111
+ "document-spreadsheet": "document",
112
+ "document-presentation": "document",
113
+ "article": "article",
114
+ "form": "form",
115
+ # Toolbar / status
116
+ "tool-bar": "toolbar",
117
+ "tool-tip": "tooltip",
118
+ "status-bar": "status",
119
+ "info-bar": "status",
120
+ "notification": "alert",
121
+ # ARIA landmarks (exposed via AT-SPI when apps set ARIA roles)
122
+ "landmark": "region",
123
+ "log": "log",
124
+ "marquee": "marquee",
125
+ "math": "math",
126
+ "timer": "timer",
127
+ "definition": "definition",
128
+ "note": "note",
129
+ "figure": "figure",
130
+ "footer": "contentinfo",
131
+ "content-deletion": "generic",
132
+ "content-insertion": "generic",
133
+ "description-list": "list",
134
+ "description-term": "term",
135
+ "description-value": "definition",
136
+ "comment": "note",
137
+ # Navigation
138
+ "page": "region",
139
+ "redundant-object": "generic",
140
+ "application": "application",
141
+ "autocomplete": "combobox",
142
+ "embedded": "generic",
143
+ "editbar": "toolbar",
144
+ # Catch-all
145
+ "unknown": "generic",
146
+ "invalid": "generic",
147
+ "extended": "generic",
148
+ }
149
+
150
+ # Roles that accept text input (for adding "type" action)
151
+ TEXT_INPUT_ROLES = {"textbox", "searchbox", "combobox", "document"}
152
+
153
+ # AT-SPI state names -> CUP state mappings
154
+ # We read Atspi.StateSet and map relevant states to CUP equivalents
155
+ STATE_MAP: dict[str, str] = {
156
+ "focused": "focused",
157
+ "selected": "selected",
158
+ "checked": "checked",
159
+ "pressed": "pressed",
160
+ "expanded": "expanded",
161
+ "expandable": "", # used to derive collapsed
162
+ "sensitive": "", # inverse -> disabled
163
+ "enabled": "", # inverse -> disabled
164
+ "editable": "editable",
165
+ "required": "required",
166
+ "modal": "modal",
167
+ "multi-selectable": "multiselectable",
168
+ "busy": "busy",
169
+ "read-only": "readonly",
170
+ "visible": "", # inverse -> hidden
171
+ "showing": "", # inverse -> offscreen
172
+ "indeterminate": "mixed",
173
+ }
174
+
175
+ # AT-SPI action names -> CUP action mappings
176
+ ACTION_MAP: dict[str, str] = {
177
+ "click": "click",
178
+ "press": "click",
179
+ "activate": "click",
180
+ "jump": "click",
181
+ "toggle": "toggle",
182
+ "expand or contract": "expand",
183
+ "menu": "click",
184
+ }
185
+
186
+
187
+ # ---------------------------------------------------------------------------
188
+ # X11 helpers via ctypes (for screen info and foreground window)
189
+ # ---------------------------------------------------------------------------
190
+
191
+
192
+ class _X11:
193
+ """Thin ctypes wrapper around libX11 for screen/window queries."""
194
+
195
+ def __init__(self):
196
+ self._lib = None
197
+ self._display = None
198
+
199
+ def _ensure_open(self):
200
+ if self._lib is not None:
201
+ return
202
+ libx11_name = ctypes.util.find_library("X11")
203
+ if not libx11_name:
204
+ raise RuntimeError("libX11 not found. Install libx11-dev or xorg-x11-libs.")
205
+ self._lib = ctypes.cdll.LoadLibrary(libx11_name)
206
+ display_name = os.environ.get("DISPLAY", ":0").encode()
207
+ self._display = self._lib.XOpenDisplay(display_name)
208
+ if not self._display:
209
+ raise RuntimeError(
210
+ f"Cannot open X11 display '{display_name.decode()}'. "
211
+ "Ensure DISPLAY is set and X server is running."
212
+ )
213
+
214
+ def get_screen_size(self) -> tuple[int, int]:
215
+ """Return (width, height) of the default screen."""
216
+ self._ensure_open()
217
+ screen = self._lib.XDefaultScreen(self._display)
218
+ w = self._lib.XDisplayWidth(self._display, screen)
219
+ h = self._lib.XDisplayHeight(self._display, screen)
220
+ return (w, h)
221
+
222
+ def get_foreground_xid(self) -> int | None:
223
+ """Return the X11 window ID of the currently focused window."""
224
+ self._ensure_open()
225
+ focus_window = ctypes.c_ulong()
226
+ revert_to = ctypes.c_int()
227
+ self._lib.XGetInputFocus(
228
+ self._display,
229
+ ctypes.byref(focus_window),
230
+ ctypes.byref(revert_to),
231
+ )
232
+ xid = focus_window.value
233
+ return xid if xid > 1 else None # 0=None, 1=PointerRoot
234
+
235
+ def close(self):
236
+ if self._lib and self._display:
237
+ self._lib.XCloseDisplay(self._display)
238
+ self._display = None
239
+
240
+
241
+ def _get_scale_factor() -> float:
242
+ """Detect display scale factor from common Linux mechanisms."""
243
+ # GDK_SCALE env var (set by GTK/GNOME)
244
+ gdk_scale = os.environ.get("GDK_SCALE")
245
+ if gdk_scale:
246
+ try:
247
+ return float(gdk_scale)
248
+ except ValueError:
249
+ pass
250
+
251
+ # Qt scale factor
252
+ qt_scale = os.environ.get("QT_SCALE_FACTOR")
253
+ if qt_scale:
254
+ try:
255
+ return float(qt_scale)
256
+ except ValueError:
257
+ pass
258
+
259
+ # gsettings (GNOME)
260
+ try:
261
+ result = subprocess.run(
262
+ ["gsettings", "get", "org.gnome.desktop.interface", "text-scaling-factor"],
263
+ capture_output=True,
264
+ text=True,
265
+ timeout=2,
266
+ )
267
+ if result.returncode == 0:
268
+ val = float(result.stdout.strip())
269
+ if val > 0:
270
+ return val
271
+ except Exception:
272
+ pass
273
+
274
+ return 1.0
275
+
276
+
277
+ # ---------------------------------------------------------------------------
278
+ # AT-SPI2 helpers
279
+ # ---------------------------------------------------------------------------
280
+
281
+
282
+ def _init_atspi():
283
+ """Import and initialize AT-SPI2 via GObject Introspection."""
284
+ import gi
285
+
286
+ gi.require_version("Atspi", "2.0")
287
+ from gi.repository import Atspi
288
+
289
+ # Event listeners are not needed — we only read the tree.
290
+ # But we need to make sure the registry is initialized.
291
+ return Atspi
292
+
293
+
294
+ def _atspi_role_name(accessible) -> str:
295
+ """Get the AT-SPI role as a lowercase dash-separated string.
296
+
297
+ AT-SPI2 returns role names with spaces (e.g. "push button") over D-Bus.
298
+ We normalize to dashes to match our CUP_ROLES mapping keys.
299
+ """
300
+ try:
301
+ raw = accessible.get_role_name() or ""
302
+ return raw.lower().replace(" ", "-") if raw else "unknown"
303
+ except Exception:
304
+ return "unknown"
305
+
306
+
307
+ def _atspi_get_states(accessible) -> set[str]:
308
+ """Read the StateSet and return a set of state name strings.
309
+
310
+ Uses get_states() which returns the list of active Atspi.StateType
311
+ values directly, rather than iterating all possible enum values
312
+ (which can be unreliable across PyGObject versions).
313
+ """
314
+ states: set[str] = set()
315
+ try:
316
+ state_set = accessible.get_state_set()
317
+ active_states = state_set.get_states()
318
+ for st in active_states:
319
+ # GObject enum nick: SENSITIVE -> "sensitive",
320
+ # MULTI_SELECTABLE -> "multi-selectable"
321
+ name = st.value_nick.replace("_", "-")
322
+ states.add(name)
323
+ except Exception:
324
+ pass
325
+ return states
326
+
327
+
328
+ def _atspi_get_actions(accessible) -> list[str]:
329
+ """Read the Action interface and return action names."""
330
+ actions: list[str] = []
331
+ try:
332
+ action = accessible.get_action_iface()
333
+ if action is not None:
334
+ n = action.get_n_actions()
335
+ for i in range(n):
336
+ name = action.get_action_name(i)
337
+ if name:
338
+ actions.append(name.lower())
339
+ except Exception:
340
+ pass
341
+ return actions
342
+
343
+
344
+ def _atspi_get_attributes(accessible) -> dict[str, str]:
345
+ """Read the object attributes dict (e.g. xml-roles, level, etc.)."""
346
+ try:
347
+ attrs = accessible.get_attributes()
348
+ return dict(attrs) if attrs else {}
349
+ except Exception:
350
+ return {}
351
+
352
+
353
+ def _atspi_get_value(accessible) -> tuple[float | None, float | None, float | None]:
354
+ """Read Value interface: (current, min, max) or (None, None, None)."""
355
+ try:
356
+ value_iface = accessible.get_value_iface()
357
+ if value_iface is not None:
358
+ current = value_iface.get_current_value()
359
+ minimum = value_iface.get_minimum_value()
360
+ maximum = value_iface.get_maximum_value()
361
+ return (current, minimum, maximum)
362
+ except Exception:
363
+ pass
364
+ return (None, None, None)
365
+
366
+
367
+ def _atspi_get_text(accessible) -> str:
368
+ """Read the Text interface to get the current text content."""
369
+ try:
370
+ text_iface = accessible.get_text_iface()
371
+ if text_iface is not None:
372
+ char_count = text_iface.get_character_count()
373
+ if 0 < char_count <= 10000:
374
+ return text_iface.get_text(0, char_count)
375
+ except Exception:
376
+ pass
377
+ return ""
378
+
379
+
380
+ def _atspi_get_bounds(accessible) -> dict | None:
381
+ """Get the bounding rectangle in screen coordinates."""
382
+ try:
383
+ comp = accessible.get_component_iface()
384
+ if comp is not None:
385
+ # ATSPI_COORD_TYPE_SCREEN = 0
386
+ rect = comp.get_extents(0)
387
+ if rect.width > 0 or rect.height > 0:
388
+ return {
389
+ "x": rect.x,
390
+ "y": rect.y,
391
+ "w": rect.width,
392
+ "h": rect.height,
393
+ }
394
+ except Exception:
395
+ pass
396
+ return None
397
+
398
+
399
+ def _get_pid(accessible) -> int | None:
400
+ """Get the process ID of the application owning this accessible."""
401
+ try:
402
+ pid = accessible.get_process_id()
403
+ return pid if pid > 0 else None
404
+ except Exception:
405
+ return None
406
+
407
+
408
+ # ---------------------------------------------------------------------------
409
+ # CUP node builder
410
+ # ---------------------------------------------------------------------------
411
+
412
+
413
+ def _build_cup_node(
414
+ accessible,
415
+ id_gen,
416
+ stats: dict,
417
+ depth: int,
418
+ max_depth: int,
419
+ screen_w: int,
420
+ screen_h: int,
421
+ refs: dict,
422
+ ) -> dict | None:
423
+ """Build a CUP node dict from an AT-SPI2 accessible object.
424
+
425
+ Recursively walks children up to max_depth.
426
+ """
427
+ if depth > max_depth:
428
+ return None
429
+
430
+ stats["nodes"] += 1
431
+ stats["max_depth"] = max(stats["max_depth"], depth)
432
+
433
+ # ── Role ──
434
+ role_name = _atspi_role_name(accessible)
435
+ role = CUP_ROLES.get(role_name, "generic")
436
+
437
+ # Track raw AT-SPI role names in stats (like Windows tracks ControlType names)
438
+ stats["roles"][role_name] = stats["roles"].get(role_name, 0) + 1
439
+
440
+ # ── Name / description ──
441
+ try:
442
+ name = accessible.get_name() or ""
443
+ except Exception:
444
+ name = ""
445
+ try:
446
+ description = accessible.get_description() or ""
447
+ except Exception:
448
+ description = ""
449
+
450
+ # ── Object attributes (may refine role) ──
451
+ obj_attrs = _atspi_get_attributes(accessible)
452
+
453
+ # xml-roles / tag can refine the CUP role for web content
454
+ xml_role = obj_attrs.get("xml-roles", "").lower()
455
+ if xml_role:
456
+ # Direct ARIA role override for ambiguous base roles
457
+ ARIA_REFINEMENTS = {
458
+ "heading": "heading",
459
+ "dialog": "dialog",
460
+ "alert": "alert",
461
+ "alertdialog": "alertdialog",
462
+ "searchbox": "searchbox",
463
+ "navigation": "navigation",
464
+ "main": "main",
465
+ "search": "search",
466
+ "banner": "banner",
467
+ "contentinfo": "contentinfo",
468
+ "complementary": "complementary",
469
+ "region": "region",
470
+ "form": "form",
471
+ "switch": "switch",
472
+ "tabpanel": "tabpanel",
473
+ "log": "log",
474
+ "status": "status",
475
+ "timer": "timer",
476
+ "marquee": "marquee",
477
+ "feed": "feed",
478
+ "figure": "figure",
479
+ "math": "math",
480
+ "note": "note",
481
+ "article": "article",
482
+ "directory": "directory",
483
+ }
484
+ if xml_role in ARIA_REFINEMENTS:
485
+ role = ARIA_REFINEMENTS[xml_role]
486
+
487
+ # Panel with a name -> region (like Windows Pane heuristic)
488
+ if role == "group" and name:
489
+ role = "region"
490
+
491
+ # ── Bounds ──
492
+ bounds = _atspi_get_bounds(accessible)
493
+
494
+ # ── States ──
495
+ raw_states = _atspi_get_states(accessible)
496
+ states: list[str] = []
497
+
498
+ # In ATK/AT-SPI2, STATE_SENSITIVE is the primary interactivity flag.
499
+ # STATE_ENABLED is often set alongside it but not always.
500
+ # A widget is disabled only when it lacks sensitive state.
501
+ is_sensitive = "sensitive" in raw_states
502
+ if not is_sensitive:
503
+ states.append("disabled")
504
+ if "focused" in raw_states:
505
+ states.append("focused")
506
+ if "checked" in raw_states:
507
+ states.append("checked")
508
+ if "pressed" in raw_states:
509
+ states.append("pressed")
510
+ if "indeterminate" in raw_states:
511
+ states.append("mixed")
512
+ if "expanded" in raw_states:
513
+ states.append("expanded")
514
+ elif "expandable" in raw_states:
515
+ states.append("collapsed")
516
+ if "selected" in raw_states:
517
+ states.append("selected")
518
+ if "required" in raw_states:
519
+ states.append("required")
520
+ if "modal" in raw_states:
521
+ states.append("modal")
522
+ if "read-only" in raw_states:
523
+ states.append("readonly")
524
+ if "editable" in raw_states and "read-only" not in raw_states:
525
+ states.append("editable")
526
+ if "busy" in raw_states:
527
+ states.append("busy")
528
+ if "multi-selectable" in raw_states:
529
+ states.append("multiselectable")
530
+
531
+ # Offscreen detection: not "showing" or bounds entirely outside screen
532
+ is_offscreen = False
533
+ if "showing" not in raw_states and "visible" in raw_states:
534
+ is_offscreen = True
535
+ elif bounds and screen_w > 0 and screen_h > 0:
536
+ bx, by, bw, bh = bounds["x"], bounds["y"], bounds["w"], bounds["h"]
537
+ if bx + bw <= 0 or by + bh <= 0 or bx >= screen_w or by >= screen_h:
538
+ is_offscreen = True
539
+ if is_offscreen:
540
+ states.append("offscreen")
541
+
542
+ # ── Actions ──
543
+ raw_actions = _atspi_get_actions(accessible)
544
+ actions: list[str] = []
545
+ seen_actions: set[str] = set()
546
+
547
+ for raw_act in raw_actions:
548
+ mapped = ACTION_MAP.get(raw_act, raw_act)
549
+ if mapped and mapped not in seen_actions:
550
+ actions.append(mapped)
551
+ seen_actions.add(mapped)
552
+
553
+ # Expand/collapse from state rather than action list
554
+ if "expandable" in raw_states and "expand" not in seen_actions:
555
+ actions.append("expand")
556
+ actions.append("collapse")
557
+
558
+ # Text input action
559
+ if role in TEXT_INPUT_ROLES and "editable" in raw_states:
560
+ if "type" not in seen_actions:
561
+ actions.append("type")
562
+ if "setvalue" not in seen_actions:
563
+ actions.append("setvalue")
564
+
565
+ # Selection action
566
+ if "selectable" in raw_states and "select" not in seen_actions:
567
+ actions.append("select")
568
+
569
+ # Default focus action
570
+ if not actions and "focusable" in raw_states:
571
+ actions.append("focus")
572
+
573
+ # ── Role refinement from actions ──
574
+ # GTK3/4 headerbar buttons and model buttons may report as "panel" or
575
+ # other non-button roles via AT-SPI. If an element mapped to "generic"
576
+ # has a name and a click action, it's almost certainly a button.
577
+ if role == "generic" and name and "click" in seen_actions:
578
+ role = "button"
579
+
580
+ # ── Value ──
581
+ value_current, value_min, value_max = _atspi_get_value(accessible)
582
+
583
+ # For text inputs, prefer Text interface content as the value
584
+ text_content = ""
585
+ if role in ("textbox", "searchbox", "combobox", "spinbutton", "document"):
586
+ text_content = _atspi_get_text(accessible)
587
+
588
+ value_str = ""
589
+ if text_content:
590
+ value_str = text_content[:200]
591
+ elif value_current is not None and role in (
592
+ "slider",
593
+ "progressbar",
594
+ "spinbutton",
595
+ "scrollbar",
596
+ ):
597
+ value_str = str(value_current)
598
+
599
+ # ── Attributes ──
600
+ attrs: dict = {}
601
+
602
+ # Heading level
603
+ if role == "heading":
604
+ level_str = obj_attrs.get("level", "")
605
+ if level_str:
606
+ try:
607
+ attrs["level"] = int(level_str)
608
+ except ValueError:
609
+ pass
610
+
611
+ # Range widget min/max
612
+ if value_min is not None and role in ("slider", "progressbar", "spinbutton", "scrollbar"):
613
+ attrs["valueMin"] = value_min
614
+ if value_max is not None and role in ("slider", "progressbar", "spinbutton", "scrollbar"):
615
+ attrs["valueMax"] = value_max
616
+ if value_current is not None and role in ("slider", "progressbar", "spinbutton", "scrollbar"):
617
+ attrs["valueNow"] = value_current
618
+
619
+ # Placeholder
620
+ placeholder = obj_attrs.get("placeholder-text", "")
621
+ if placeholder and role in ("textbox", "searchbox", "combobox"):
622
+ attrs["placeholder"] = placeholder[:200]
623
+
624
+ # Orientation
625
+ if "horizontal" in raw_states and role in (
626
+ "scrollbar",
627
+ "slider",
628
+ "separator",
629
+ "toolbar",
630
+ "tablist",
631
+ ):
632
+ attrs["orientation"] = "horizontal"
633
+ elif "vertical" in raw_states and role in (
634
+ "scrollbar",
635
+ "slider",
636
+ "separator",
637
+ "toolbar",
638
+ "tablist",
639
+ ):
640
+ attrs["orientation"] = "vertical"
641
+
642
+ # URL for links
643
+ if role == "link":
644
+ link_url = obj_attrs.get("href", "")
645
+ if link_url:
646
+ attrs["url"] = link_url[:500]
647
+
648
+ # ── Assemble CUP node ──
649
+ node: dict = {
650
+ "id": f"e{next(id_gen)}",
651
+ "role": role,
652
+ "name": name[:200],
653
+ }
654
+
655
+ if description:
656
+ node["description"] = description[:200]
657
+ if value_str:
658
+ node["value"] = value_str
659
+ if bounds:
660
+ node["bounds"] = bounds
661
+ if states:
662
+ node["states"] = states
663
+ if actions:
664
+ node["actions"] = actions
665
+ if attrs:
666
+ node["attributes"] = attrs
667
+
668
+ # ── Platform extension ──
669
+ plat: dict = {"atspiRole": role_name}
670
+ if obj_attrs.get("id"):
671
+ plat["id"] = obj_attrs["id"]
672
+ if obj_attrs.get("class"):
673
+ plat["class"] = obj_attrs["class"]
674
+ if obj_attrs.get("toolkit"):
675
+ plat["toolkit"] = obj_attrs["toolkit"]
676
+ if raw_actions:
677
+ plat["actions"] = raw_actions
678
+ node["platform"] = {"linux": plat}
679
+
680
+ refs[node["id"]] = accessible
681
+
682
+ # ── Children ──
683
+ if depth < max_depth:
684
+ children: list[dict] = []
685
+ try:
686
+ n_children = accessible.get_child_count()
687
+ for i in range(n_children):
688
+ try:
689
+ child_acc = accessible.get_child_at_index(i)
690
+ if child_acc is None:
691
+ continue
692
+ child_node = _build_cup_node(
693
+ child_acc,
694
+ id_gen,
695
+ stats,
696
+ depth + 1,
697
+ max_depth,
698
+ screen_w,
699
+ screen_h,
700
+ refs,
701
+ )
702
+ if child_node is not None:
703
+ children.append(child_node)
704
+ except Exception:
705
+ continue
706
+ except Exception:
707
+ pass
708
+
709
+ if children:
710
+ node["children"] = children
711
+
712
+ return node
713
+
714
+
715
+ # ---------------------------------------------------------------------------
716
+ # LinuxAdapter — PlatformAdapter implementation
717
+ # ---------------------------------------------------------------------------
718
+
719
+
720
+ class LinuxAdapter(PlatformAdapter):
721
+ """CUP adapter for Linux via AT-SPI2 (D-Bus accessibility)."""
722
+
723
+ def __init__(self):
724
+ self._atspi = None
725
+ self._x11: _X11 | None = None
726
+ self._screen_w: int = 0
727
+ self._screen_h: int = 0
728
+ self._scale: float = 1.0
729
+
730
+ @property
731
+ def platform_name(self) -> str:
732
+ return "linux"
733
+
734
+ def initialize(self) -> None:
735
+ if self._atspi is not None:
736
+ return # already initialized
737
+ self._atspi = _init_atspi()
738
+
739
+ # Screen info via X11
740
+ try:
741
+ self._x11 = _X11()
742
+ self._screen_w, self._screen_h = self._x11.get_screen_size()
743
+ except Exception:
744
+ # Fallback: try xdpyinfo or xrandr
745
+ self._screen_w, self._screen_h = _fallback_screen_size()
746
+
747
+ self._scale = _get_scale_factor()
748
+
749
+ def get_screen_info(self) -> tuple[int, int, float]:
750
+ return self._screen_w, self._screen_h, self._scale
751
+
752
+ def get_foreground_window(self) -> dict[str, Any]:
753
+ """Return the focused application's top-level window via AT-SPI2.
754
+
755
+ Walks the AT-SPI desktop to find the application whose window
756
+ currently has keyboard focus, falling back to X11 focus detection.
757
+ """
758
+ desktop = self._atspi.get_desktop(0)
759
+
760
+ # Strategy: find the accessible with STATE_FOCUSED or STATE_ACTIVE
761
+ # among top-level application windows
762
+ best: dict[str, Any] | None = None
763
+
764
+ for i in range(desktop.get_child_count()):
765
+ try:
766
+ app = desktop.get_child_at_index(i)
767
+ if app is None:
768
+ continue
769
+ app_name = app.get_name() or ""
770
+ pid = _get_pid(app)
771
+
772
+ for j in range(app.get_child_count()):
773
+ try:
774
+ win = app.get_child_at_index(j)
775
+ if win is None:
776
+ continue
777
+ state_set = win.get_state_set()
778
+ from gi.repository import Atspi
779
+
780
+ is_active = state_set.contains(Atspi.StateType.ACTIVE)
781
+ is_focused = state_set.contains(Atspi.StateType.FOCUSED)
782
+ title = win.get_name() or app_name
783
+
784
+ if is_active or is_focused:
785
+ return {
786
+ "handle": win,
787
+ "title": title,
788
+ "pid": pid,
789
+ "bundle_id": None,
790
+ }
791
+ # Track first visible window as fallback
792
+ if best is None and state_set.contains(Atspi.StateType.VISIBLE):
793
+ best = {
794
+ "handle": win,
795
+ "title": title,
796
+ "pid": pid,
797
+ "bundle_id": None,
798
+ }
799
+ except Exception:
800
+ continue
801
+ except Exception:
802
+ continue
803
+
804
+ # Fallback to first visible window, or the desktop itself
805
+ if best is not None:
806
+ return best
807
+ return {
808
+ "handle": desktop,
809
+ "title": "Desktop",
810
+ "pid": None,
811
+ "bundle_id": None,
812
+ }
813
+
814
+ def get_all_windows(self) -> list[dict[str, Any]]:
815
+ """Return all visible top-level windows across all AT-SPI applications."""
816
+ desktop = self._atspi.get_desktop(0)
817
+ windows: list[dict[str, Any]] = []
818
+
819
+ for i in range(desktop.get_child_count()):
820
+ try:
821
+ app = desktop.get_child_at_index(i)
822
+ if app is None:
823
+ continue
824
+ app_name = app.get_name() or ""
825
+ pid = _get_pid(app)
826
+
827
+ for j in range(app.get_child_count()):
828
+ try:
829
+ win = app.get_child_at_index(j)
830
+ if win is None:
831
+ continue
832
+ state_set = win.get_state_set()
833
+ from gi.repository import Atspi
834
+
835
+ if not state_set.contains(Atspi.StateType.VISIBLE):
836
+ continue
837
+ title = win.get_name() or app_name
838
+ windows.append(
839
+ {
840
+ "handle": win,
841
+ "title": title,
842
+ "pid": pid,
843
+ "bundle_id": None,
844
+ }
845
+ )
846
+ except Exception:
847
+ continue
848
+ except Exception:
849
+ continue
850
+
851
+ return windows
852
+
853
+ def get_window_list(self) -> list[dict[str, Any]]:
854
+ self.initialize()
855
+ desktop = self._atspi.get_desktop(0)
856
+ results: list[dict[str, Any]] = []
857
+
858
+ # Find foreground PID for marking
859
+ fg_info = self.get_foreground_window()
860
+ fg_pid = fg_info.get("pid")
861
+ fg_title = fg_info.get("title")
862
+
863
+ for i in range(desktop.get_child_count()):
864
+ try:
865
+ app = desktop.get_child_at_index(i)
866
+ if app is None:
867
+ continue
868
+ app_name = app.get_name() or ""
869
+ pid = _get_pid(app)
870
+
871
+ for j in range(app.get_child_count()):
872
+ try:
873
+ win = app.get_child_at_index(j)
874
+ if win is None:
875
+ continue
876
+ state_set = win.get_state_set()
877
+ from gi.repository import Atspi
878
+
879
+ if not state_set.contains(Atspi.StateType.VISIBLE):
880
+ continue
881
+ title = win.get_name() or app_name
882
+ is_fg = state_set.contains(Atspi.StateType.ACTIVE) or (
883
+ pid == fg_pid and title == fg_title
884
+ )
885
+ results.append(
886
+ {
887
+ "title": title,
888
+ "pid": pid,
889
+ "bundle_id": None,
890
+ "foreground": is_fg,
891
+ "bounds": _atspi_get_bounds(win),
892
+ }
893
+ )
894
+ except Exception:
895
+ continue
896
+ except Exception:
897
+ continue
898
+
899
+ return results
900
+
901
+ def get_desktop_window(self) -> dict[str, Any] | None:
902
+ self.initialize()
903
+ desktop = self._atspi.get_desktop(0)
904
+ desktop_apps = {"nautilus", "nemo", "caja", "pcmanfm", "pcmanfm-qt", "thunar"}
905
+
906
+ for i in range(desktop.get_child_count()):
907
+ try:
908
+ app = desktop.get_child_at_index(i)
909
+ if app is None:
910
+ continue
911
+ app_name = (app.get_name() or "").lower()
912
+ if app_name not in desktop_apps:
913
+ continue
914
+ pid = _get_pid(app)
915
+
916
+ for j in range(app.get_child_count()):
917
+ try:
918
+ win = app.get_child_at_index(j)
919
+ if win is None:
920
+ continue
921
+ role = win.get_role_name() or ""
922
+ if role == "desktop frame":
923
+ return {
924
+ "handle": win,
925
+ "title": "Desktop",
926
+ "pid": pid,
927
+ "bundle_id": None,
928
+ }
929
+ except Exception:
930
+ continue
931
+ except Exception:
932
+ continue
933
+
934
+ return None
935
+
936
+ def capture_tree(
937
+ self,
938
+ windows: list[dict[str, Any]],
939
+ *,
940
+ max_depth: int = 999,
941
+ ) -> tuple[list[dict], dict, dict[str, Any]]:
942
+ self.initialize()
943
+ refs: dict[str, Any] = {}
944
+
945
+ if len(windows) <= 1:
946
+ # Single window — sequential walk
947
+ id_gen = itertools.count()
948
+ stats: dict = {"nodes": 0, "max_depth": 0, "roles": {}}
949
+ tree: list[dict] = []
950
+ for win in windows:
951
+ node = _build_cup_node(
952
+ win["handle"],
953
+ id_gen,
954
+ stats,
955
+ 0,
956
+ max_depth,
957
+ self._screen_w,
958
+ self._screen_h,
959
+ refs,
960
+ )
961
+ if node is not None:
962
+ tree.append(node)
963
+ return tree, stats, refs
964
+ else:
965
+ # Multiple windows — parallel walk with merged stats
966
+ return self._parallel_capture(windows, max_depth=max_depth, refs=refs)
967
+
968
+ def _parallel_capture(
969
+ self,
970
+ windows: list[dict[str, Any]],
971
+ *,
972
+ max_depth: int = 999,
973
+ refs: dict[str, Any],
974
+ ) -> tuple[list[dict], dict, dict[str, Any]]:
975
+ """Walk multiple window trees in parallel threads."""
976
+ # Shared counter for globally unique IDs
977
+ id_gen = itertools.count()
978
+ num_workers = min(len(windows), 8)
979
+
980
+ per_window_results: list[tuple[dict | None, dict]] = [(None, {}) for _ in windows]
981
+
982
+ def walk_one(idx: int):
983
+ win = windows[idx]
984
+ local_stats: dict = {"nodes": 0, "max_depth": 0, "roles": {}}
985
+ node = _build_cup_node(
986
+ win["handle"],
987
+ id_gen,
988
+ local_stats,
989
+ 0,
990
+ max_depth,
991
+ self._screen_w,
992
+ self._screen_h,
993
+ refs,
994
+ )
995
+ per_window_results[idx] = (node, local_stats)
996
+
997
+ with ThreadPoolExecutor(max_workers=num_workers) as pool:
998
+ list(pool.map(walk_one, range(len(windows))))
999
+
1000
+ # Merge results
1001
+ tree: list[dict] = []
1002
+ merged_stats: dict = {"nodes": 0, "max_depth": 0, "roles": {}}
1003
+ for node, st in per_window_results:
1004
+ if node is not None:
1005
+ tree.append(node)
1006
+ merged_stats["nodes"] += st.get("nodes", 0)
1007
+ merged_stats["max_depth"] = max(merged_stats["max_depth"], st.get("max_depth", 0))
1008
+ for role, count in st.get("roles", {}).items():
1009
+ merged_stats["roles"][role] = merged_stats["roles"].get(role, 0) + count
1010
+
1011
+ return tree, merged_stats, refs
1012
+
1013
+
1014
+ # ---------------------------------------------------------------------------
1015
+ # Fallback screen size detection
1016
+ # ---------------------------------------------------------------------------
1017
+
1018
+
1019
+ def _fallback_screen_size() -> tuple[int, int]:
1020
+ """Try xrandr / xdpyinfo as fallback for screen dimensions."""
1021
+ # Try xrandr
1022
+ try:
1023
+ result = subprocess.run(
1024
+ ["xrandr", "--query"],
1025
+ capture_output=True,
1026
+ text=True,
1027
+ timeout=3,
1028
+ )
1029
+ if result.returncode == 0:
1030
+ import re
1031
+
1032
+ match = re.search(r"(\d+)x(\d+)\+0\+0", result.stdout)
1033
+ if match:
1034
+ return (int(match.group(1)), int(match.group(2)))
1035
+ # Fallback: look for "current WxH"
1036
+ match = re.search(r"current\s+(\d+)\s*x\s*(\d+)", result.stdout)
1037
+ if match:
1038
+ return (int(match.group(1)), int(match.group(2)))
1039
+ except Exception:
1040
+ pass
1041
+
1042
+ # Try xdpyinfo
1043
+ try:
1044
+ result = subprocess.run(
1045
+ ["xdpyinfo"],
1046
+ capture_output=True,
1047
+ text=True,
1048
+ timeout=3,
1049
+ )
1050
+ if result.returncode == 0:
1051
+ import re
1052
+
1053
+ match = re.search(r"dimensions:\s+(\d+)x(\d+)", result.stdout)
1054
+ if match:
1055
+ return (int(match.group(1)), int(match.group(2)))
1056
+ except Exception:
1057
+ pass
1058
+
1059
+ # Last resort default
1060
+ return (1920, 1080)