computeruseprotocol 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,935 @@
1
+ """
2
+ Windows UIA platform adapter for CUP.
3
+
4
+ Captures the accessibility tree via raw UIA COM interface and maps it to the
5
+ canonical CUP schema — roles, states, actions, and platform metadata.
6
+
7
+ Key optimisations:
8
+ 1. Direct UIA COM via comtypes — no wrapper overhead
9
+ 2. CacheRequest batches 29 properties (core + states + patterns + ARIA) in one call
10
+ 3. Win32 EnumWindows for instant HWND list (skips slow UIA root enumeration)
11
+ 4. ElementFromHandleBuildCache to get UIA elements from HWNDs
12
+ 5. FindAllBuildCache collapses entire subtree into ONE cross-process call
13
+ 6. TreeWalker with BuildCache for structured tree (one call per node, all props)
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import ctypes
19
+ import ctypes.wintypes
20
+ import itertools
21
+ from typing import Any
22
+
23
+ import comtypes
24
+ import comtypes.client
25
+
26
+ from cup._base import PlatformAdapter
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # UIA COM property IDs
30
+ # ---------------------------------------------------------------------------
31
+
32
+ # Core
33
+ UIA_BoundingRectanglePropertyId = 30001
34
+ UIA_ControlTypePropertyId = 30003
35
+ UIA_NamePropertyId = 30005
36
+
37
+ # State / identification
38
+ UIA_HasKeyboardFocusPropertyId = 30008
39
+ UIA_IsEnabledPropertyId = 30010
40
+ UIA_AutomationIdPropertyId = 30011
41
+ UIA_ClassNamePropertyId = 30012
42
+ UIA_HelpTextPropertyId = 30013
43
+ UIA_NativeWindowHandlePropertyId = 30020
44
+ UIA_IsOffscreenPropertyId = 30022
45
+ UIA_OrientationPropertyId = 30023
46
+ UIA_IsRequiredForFormPropertyId = 30025
47
+
48
+ # Pattern availability
49
+ UIA_IsInvokePatternAvailablePropertyId = 30031
50
+ UIA_IsRangeValuePatternAvailablePropertyId = 30033
51
+ UIA_IsSelectionItemPatternAvailablePropertyId = 30036
52
+ UIA_IsScrollPatternAvailablePropertyId = 30037
53
+ UIA_IsTogglePatternAvailablePropertyId = 30041
54
+ UIA_IsExpandCollapsePatternAvailablePropertyId = 30042
55
+ UIA_IsValuePatternAvailablePropertyId = 30043
56
+
57
+ # Pattern state values
58
+ UIA_ValueValuePropertyId = 30045
59
+ UIA_ValueIsReadOnlyPropertyId = 30046
60
+ UIA_RangeValueValuePropertyId = 30047
61
+ UIA_RangeValueMinimumPropertyId = 30049
62
+ UIA_RangeValueMaximumPropertyId = 30050
63
+ UIA_ExpandCollapseExpandCollapseStatePropertyId = 30070
64
+ UIA_WindowIsModalPropertyId = 30077
65
+ UIA_SelectionItemIsSelectedPropertyId = 30079
66
+ UIA_ToggleToggleStatePropertyId = 30086
67
+
68
+ # ARIA (web content hosted in UIA)
69
+ UIA_AriaRolePropertyId = 30101
70
+ UIA_AriaPropertiesPropertyId = 30102
71
+
72
+ # Tree scope / element mode
73
+ TreeScope_Element = 1
74
+ TreeScope_Children = 2
75
+ TreeScope_Subtree = 7
76
+
77
+ AutomationElementMode_None = 0
78
+ AutomationElementMode_Full = 1
79
+
80
+ # All properties to cache in a single COM call
81
+ PROP_IDS = [
82
+ # Core (3)
83
+ UIA_NamePropertyId,
84
+ UIA_ControlTypePropertyId,
85
+ UIA_BoundingRectanglePropertyId,
86
+ # State / identification (7)
87
+ UIA_IsEnabledPropertyId,
88
+ UIA_HasKeyboardFocusPropertyId,
89
+ UIA_IsOffscreenPropertyId,
90
+ UIA_AutomationIdPropertyId,
91
+ UIA_ClassNamePropertyId,
92
+ UIA_HelpTextPropertyId,
93
+ UIA_OrientationPropertyId,
94
+ UIA_IsRequiredForFormPropertyId,
95
+ # Pattern availability (7)
96
+ UIA_IsInvokePatternAvailablePropertyId,
97
+ UIA_IsTogglePatternAvailablePropertyId,
98
+ UIA_IsExpandCollapsePatternAvailablePropertyId,
99
+ UIA_IsValuePatternAvailablePropertyId,
100
+ UIA_IsSelectionItemPatternAvailablePropertyId,
101
+ UIA_IsScrollPatternAvailablePropertyId,
102
+ UIA_IsRangeValuePatternAvailablePropertyId,
103
+ # Pattern state values (8)
104
+ UIA_ToggleToggleStatePropertyId,
105
+ UIA_ExpandCollapseExpandCollapseStatePropertyId,
106
+ UIA_SelectionItemIsSelectedPropertyId,
107
+ UIA_ValueIsReadOnlyPropertyId,
108
+ UIA_ValueValuePropertyId,
109
+ UIA_RangeValueValuePropertyId,
110
+ UIA_RangeValueMinimumPropertyId,
111
+ UIA_RangeValueMaximumPropertyId,
112
+ UIA_WindowIsModalPropertyId,
113
+ # ARIA (2)
114
+ UIA_AriaRolePropertyId,
115
+ UIA_AriaPropertiesPropertyId,
116
+ ]
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # UIA ControlType display names (for benchmark stats)
121
+ # ---------------------------------------------------------------------------
122
+
123
+ CONTROL_TYPES = {
124
+ 50000: "Button",
125
+ 50001: "Calendar",
126
+ 50002: "CheckBox",
127
+ 50003: "ComboBox",
128
+ 50004: "Edit",
129
+ 50005: "Hyperlink",
130
+ 50006: "Image",
131
+ 50007: "ListItem",
132
+ 50008: "List",
133
+ 50009: "Menu",
134
+ 50010: "MenuBar",
135
+ 50011: "MenuItem",
136
+ 50012: "ProgressBar",
137
+ 50013: "RadioButton",
138
+ 50014: "ScrollBar",
139
+ 50015: "Slider",
140
+ 50016: "Spinner",
141
+ 50017: "StatusBar",
142
+ 50018: "Tab",
143
+ 50019: "TabItem",
144
+ 50020: "Text",
145
+ 50021: "ToolBar",
146
+ 50022: "ToolTip",
147
+ 50023: "Tree",
148
+ 50024: "TreeItem",
149
+ 50025: "Custom",
150
+ 50026: "Group",
151
+ 50027: "Thumb",
152
+ 50028: "DataGrid",
153
+ 50029: "DataItem",
154
+ 50030: "Document",
155
+ 50031: "SplitButton",
156
+ 50032: "Window",
157
+ 50033: "Pane",
158
+ 50034: "Header",
159
+ 50035: "HeaderItem",
160
+ 50036: "Table",
161
+ 50037: "TitleBar",
162
+ 50038: "Separator",
163
+ 50039: "SemanticZoom",
164
+ 50040: "AppBar",
165
+ }
166
+
167
+
168
+ # ---------------------------------------------------------------------------
169
+ # CUP role mapping: UIA ControlType ID -> canonical CUP role
170
+ # ---------------------------------------------------------------------------
171
+
172
+ CUP_ROLES = {
173
+ 50000: "button", # Button
174
+ 50001: "grid", # Calendar
175
+ 50002: "checkbox", # CheckBox
176
+ 50003: "combobox", # ComboBox
177
+ 50004: "textbox", # Edit
178
+ 50005: "link", # Hyperlink
179
+ 50006: "img", # Image
180
+ 50007: "listitem", # ListItem
181
+ 50008: "list", # List
182
+ 50009: "menu", # Menu
183
+ 50010: "menubar", # MenuBar
184
+ 50011: "menuitem", # MenuItem
185
+ 50012: "progressbar", # ProgressBar
186
+ 50013: "radio", # RadioButton
187
+ 50014: "scrollbar", # ScrollBar
188
+ 50015: "slider", # Slider
189
+ 50016: "spinbutton", # Spinner
190
+ 50017: "status", # StatusBar
191
+ 50018: "tablist", # Tab (the container)
192
+ 50019: "tab", # TabItem
193
+ 50020: "text", # Text
194
+ 50021: "toolbar", # ToolBar
195
+ 50022: "tooltip", # ToolTip
196
+ 50023: "tree", # Tree
197
+ 50024: "treeitem", # TreeItem
198
+ 50025: "generic", # Custom
199
+ 50026: "group", # Group
200
+ 50027: "generic", # Thumb
201
+ 50028: "grid", # DataGrid
202
+ 50029: "row", # DataItem
203
+ 50030: "document", # Document
204
+ 50031: "button", # SplitButton
205
+ 50032: "window", # Window
206
+ 50033: "generic", # Pane — context-dependent, refined below
207
+ 50034: "group", # Header
208
+ 50035: "columnheader", # HeaderItem
209
+ 50036: "table", # Table
210
+ 50037: "titlebar", # TitleBar
211
+ 50038: "separator", # Separator
212
+ 50039: "generic", # SemanticZoom
213
+ 50040: "toolbar", # AppBar
214
+ }
215
+
216
+ # Roles that accept text input (for adding "type" action)
217
+ TEXT_INPUT_ROLES = {"textbox", "searchbox", "combobox", "document"}
218
+
219
+
220
+ # ---------------------------------------------------------------------------
221
+ # Win32: fast window enumeration via EnumWindows
222
+ # ---------------------------------------------------------------------------
223
+
224
+ user32 = ctypes.windll.user32
225
+ WNDENUMPROC = ctypes.WINFUNCTYPE(ctypes.wintypes.BOOL, ctypes.wintypes.HWND, ctypes.wintypes.LPARAM)
226
+
227
+
228
+ def _win32_enum_windows(*, visible_only: bool = True) -> list[tuple[int, str]]:
229
+ """Use Win32 EnumWindows to get (hwnd, title) for top-level windows. Near-instant."""
230
+ results: list[tuple[int, str]] = []
231
+ buf = ctypes.create_unicode_buffer(512)
232
+
233
+ @WNDENUMPROC
234
+ def callback(hwnd, _lparam):
235
+ if visible_only and not user32.IsWindowVisible(hwnd):
236
+ return True # skip hidden
237
+ length = user32.GetWindowTextW(hwnd, buf, 512)
238
+ title = buf.value if length > 0 else ""
239
+ results.append((hwnd, title))
240
+ return True
241
+
242
+ user32.EnumWindows(callback, 0)
243
+ return results
244
+
245
+
246
+ def _win32_foreground_window() -> tuple[int, str]:
247
+ """Return (hwnd, title) of the current foreground window."""
248
+ hwnd = user32.GetForegroundWindow()
249
+ buf = ctypes.create_unicode_buffer(512)
250
+ user32.GetWindowTextW(hwnd, buf, 512)
251
+ return (hwnd, buf.value)
252
+
253
+
254
+ def _win32_screen_size() -> tuple[int, int]:
255
+ """Return (width, height) of the primary monitor in pixels."""
256
+ return user32.GetSystemMetrics(0), user32.GetSystemMetrics(1)
257
+
258
+
259
+ def _win32_screen_scale() -> float:
260
+ """Return the display scale factor (e.g. 1.5 for 150% DPI)."""
261
+ try:
262
+ dpi = ctypes.windll.shcore.GetDpiForSystem()
263
+ return dpi / 96.0
264
+ except Exception:
265
+ return 1.0
266
+
267
+
268
+ def get_window_pid(hwnd: int) -> int:
269
+ """Return the process ID for a window handle."""
270
+ pid = ctypes.wintypes.DWORD()
271
+ user32.GetWindowThreadProcessId(hwnd, ctypes.byref(pid))
272
+ return pid.value
273
+
274
+
275
+ def _win32_get_window_rect(hwnd: int) -> dict[str, int] | None:
276
+ """Return {x, y, w, h} for a window via Win32 GetWindowRect."""
277
+ rect = ctypes.wintypes.RECT()
278
+ if user32.GetWindowRect(hwnd, ctypes.byref(rect)):
279
+ return {
280
+ "x": rect.left,
281
+ "y": rect.top,
282
+ "w": rect.right - rect.left,
283
+ "h": rect.bottom - rect.top,
284
+ }
285
+ return None
286
+
287
+
288
+ def _win32_find_desktop_hwnd() -> int | None:
289
+ """Find the desktop window (Progman or WorkerW with SHELLDLL_DefView child)."""
290
+ # Try Progman first (classic desktop host)
291
+ progman = user32.FindWindowW("Progman", None)
292
+ if progman:
293
+ shell_view = user32.FindWindowExW(progman, 0, "SHELLDLL_DefView", None)
294
+ if shell_view:
295
+ return progman
296
+
297
+ # Fallback: enumerate WorkerW windows (Windows 10/11 wallpaper engine)
298
+ result: list[int | None] = [None]
299
+
300
+ @WNDENUMPROC
301
+ def _find_worker(hwnd, _lparam):
302
+ shell_view = user32.FindWindowExW(hwnd, 0, "SHELLDLL_DefView", None)
303
+ if shell_view:
304
+ result[0] = hwnd
305
+ return False # stop
306
+ return True
307
+
308
+ user32.EnumWindows(_find_worker, 0)
309
+ return result[0]
310
+
311
+
312
+ # ---------------------------------------------------------------------------
313
+ # UIA COM bootstrap
314
+ # ---------------------------------------------------------------------------
315
+
316
+
317
+ def init_uia():
318
+ """Initialise the IUIAutomation COM interface."""
319
+ comtypes.client.GetModule("UIAutomationCore.dll")
320
+ from comtypes.gen.UIAutomationClient import CUIAutomation, IUIAutomation
321
+
322
+ return comtypes.CoCreateInstance(
323
+ CUIAutomation._reg_clsid_,
324
+ interface=IUIAutomation,
325
+ clsctx=comtypes.CLSCTX_INPROC_SERVER,
326
+ )
327
+
328
+
329
+ def make_cache_request(
330
+ uia, *, element_mode=AutomationElementMode_Full, tree_scope=TreeScope_Element
331
+ ):
332
+ cr = uia.CreateCacheRequest()
333
+ for pid in PROP_IDS:
334
+ cr.AddProperty(pid)
335
+ cr.TreeScope = tree_scope
336
+ cr.AutomationElementMode = element_mode
337
+ return cr
338
+
339
+
340
+ # ---------------------------------------------------------------------------
341
+ # Cached property helpers
342
+ # ---------------------------------------------------------------------------
343
+
344
+
345
+ def _cached_bool(el, pid, default=False):
346
+ """Read a cached boolean UIA property."""
347
+ try:
348
+ v = el.GetCachedPropertyValue(pid)
349
+ if v is None:
350
+ return default
351
+ return bool(v)
352
+ except Exception:
353
+ return default
354
+
355
+
356
+ def _cached_int(el, pid, default=0):
357
+ """Read a cached integer UIA property."""
358
+ try:
359
+ v = el.GetCachedPropertyValue(pid)
360
+ if v is None:
361
+ return default
362
+ return int(v)
363
+ except Exception:
364
+ return default
365
+
366
+
367
+ def _cached_float(el, pid, default=None):
368
+ """Read a cached float UIA property."""
369
+ try:
370
+ v = el.GetCachedPropertyValue(pid)
371
+ if v is None:
372
+ return default
373
+ return float(v)
374
+ except Exception:
375
+ return default
376
+
377
+
378
+ def _cached_str(el, pid, default=""):
379
+ """Read a cached string UIA property."""
380
+ try:
381
+ v = el.GetCachedPropertyValue(pid)
382
+ return str(v) if v else default
383
+ except Exception:
384
+ return default
385
+
386
+
387
+ def is_valid_element(el) -> bool:
388
+ """Check if a UIA COM element is a live (non-NULL) pointer."""
389
+ try:
390
+ _ = el.CachedControlType
391
+ return True
392
+ except Exception:
393
+ return False
394
+
395
+
396
+ # ---------------------------------------------------------------------------
397
+ # CUP node builder
398
+ # ---------------------------------------------------------------------------
399
+
400
+
401
+ def build_cup_node(el, id_gen, stats) -> dict:
402
+ """Build a CUP-formatted node dict from a cached UIA element.
403
+
404
+ Reads all 29 cached properties and maps them to canonical CUP fields:
405
+ role, states, actions, value, attributes, description, and platform metadata.
406
+ """
407
+ stats["nodes"] += 1
408
+
409
+ # ── Core properties ──
410
+ try:
411
+ name = el.CachedName or ""
412
+ except Exception:
413
+ name = ""
414
+ try:
415
+ ct = el.CachedControlType
416
+ except Exception:
417
+ ct = 0
418
+ # BoundingRectangle: use GetCachedPropertyValue which returns a (x, y, w, h)
419
+ # float tuple. The dedicated CachedBoundingRectangle accessor returns a
420
+ # ctypes RECT struct that doesn't support indexing.
421
+ try:
422
+ rect = el.GetCachedPropertyValue(UIA_BoundingRectanglePropertyId)
423
+ if rect and len(rect) == 4:
424
+ bounds = {"x": int(rect[0]), "y": int(rect[1]), "w": int(rect[2]), "h": int(rect[3])}
425
+ else:
426
+ bounds = None
427
+ except Exception:
428
+ bounds = None
429
+
430
+ # Stats tracking (uses UIA names for the benchmark report)
431
+ ct_name = CONTROL_TYPES.get(ct, f"Unknown({ct})")
432
+ stats["roles"][ct_name] = stats["roles"].get(ct_name, 0) + 1
433
+
434
+ # ── State properties ──
435
+ is_enabled = _cached_bool(el, UIA_IsEnabledPropertyId, True)
436
+ has_focus = _cached_bool(el, UIA_HasKeyboardFocusPropertyId, False)
437
+ is_offscreen = _cached_bool(el, UIA_IsOffscreenPropertyId, False)
438
+ is_required = _cached_bool(el, UIA_IsRequiredForFormPropertyId, False)
439
+ is_modal = _cached_bool(el, UIA_WindowIsModalPropertyId, False)
440
+
441
+ # ── Pattern availability ──
442
+ has_invoke = _cached_bool(el, UIA_IsInvokePatternAvailablePropertyId, False)
443
+ has_toggle = _cached_bool(el, UIA_IsTogglePatternAvailablePropertyId, False)
444
+ has_expand = _cached_bool(el, UIA_IsExpandCollapsePatternAvailablePropertyId, False)
445
+ has_value = _cached_bool(el, UIA_IsValuePatternAvailablePropertyId, False)
446
+ has_sel_item = _cached_bool(el, UIA_IsSelectionItemPatternAvailablePropertyId, False)
447
+ has_scroll = _cached_bool(el, UIA_IsScrollPatternAvailablePropertyId, False)
448
+ has_range = _cached_bool(el, UIA_IsRangeValuePatternAvailablePropertyId, False)
449
+
450
+ # ── Pattern state values ──
451
+ toggle_state = _cached_int(el, UIA_ToggleToggleStatePropertyId, -1)
452
+ expand_state = _cached_int(el, UIA_ExpandCollapseExpandCollapseStatePropertyId, -1)
453
+ is_selected = _cached_bool(el, UIA_SelectionItemIsSelectedPropertyId, False)
454
+ val_readonly = _cached_bool(el, UIA_ValueIsReadOnlyPropertyId, False) if has_value else False
455
+ val_str = _cached_str(el, UIA_ValueValuePropertyId) if has_value else ""
456
+
457
+ # ── Identification ──
458
+ automation_id = _cached_str(el, UIA_AutomationIdPropertyId)
459
+ class_name = _cached_str(el, UIA_ClassNamePropertyId)
460
+ help_text = _cached_str(el, UIA_HelpTextPropertyId)
461
+
462
+ # ── ARIA properties (web content hosted in UIA) ──
463
+ aria_role = _cached_str(el, UIA_AriaRolePropertyId)
464
+ aria_props_str = _cached_str(el, UIA_AriaPropertiesPropertyId)
465
+ aria_props: dict[str, str] = {}
466
+ if aria_props_str:
467
+ for pair in aria_props_str.split(";"):
468
+ if "=" in pair:
469
+ k, v = pair.split("=", 1)
470
+ aria_props[k.strip()] = v.strip()
471
+
472
+ # ── Role (ARIA-mapped) ──
473
+ role = CUP_ROLES.get(ct, "generic")
474
+ if ct == 50033 and name: # Pane with name -> region
475
+ role = "region"
476
+
477
+ # Refine role from ARIA (web content in UIA) — only override ambiguous roles
478
+ if aria_role and role in ("generic", "group", "text", "region"):
479
+ ARIA_ROLE_MAP = {
480
+ "heading": "heading",
481
+ "dialog": "dialog",
482
+ "alert": "alert",
483
+ "alertdialog": "alertdialog",
484
+ "searchbox": "searchbox",
485
+ "navigation": "navigation",
486
+ "main": "main",
487
+ "search": "search",
488
+ "banner": "banner",
489
+ "contentinfo": "contentinfo",
490
+ "complementary": "complementary",
491
+ "region": "region",
492
+ "form": "form",
493
+ "cell": "cell",
494
+ "gridcell": "cell",
495
+ "switch": "switch",
496
+ "tab": "tab",
497
+ "tabpanel": "tabpanel",
498
+ "log": "log",
499
+ "status": "status",
500
+ "timer": "timer",
501
+ "marquee": "marquee",
502
+ }
503
+ if aria_role in ARIA_ROLE_MAP:
504
+ role = ARIA_ROLE_MAP[aria_role]
505
+
506
+ # MenuItem subrole refinement (no ARIA needed)
507
+ if ct == 50011: # MenuItem
508
+ if has_toggle:
509
+ role = "menuitemcheckbox"
510
+ elif has_sel_item:
511
+ role = "menuitemradio"
512
+
513
+ # ── States ──
514
+ states = []
515
+ if not is_enabled:
516
+ states.append("disabled")
517
+ if has_focus:
518
+ states.append("focused")
519
+ if is_offscreen:
520
+ states.append("offscreen")
521
+ if has_toggle:
522
+ if toggle_state == 1:
523
+ # Toggle on Button = pressed (toggle button), on CheckBox = checked
524
+ if ct == 50000: # Button
525
+ states.append("pressed")
526
+ else:
527
+ states.append("checked")
528
+ elif toggle_state == 2:
529
+ states.append("mixed")
530
+ if has_expand:
531
+ if expand_state == 0:
532
+ states.append("collapsed")
533
+ elif expand_state in (1, 2):
534
+ states.append("expanded")
535
+ if is_selected:
536
+ states.append("selected")
537
+ if is_required:
538
+ states.append("required")
539
+ if is_modal:
540
+ states.append("modal")
541
+ if has_value and val_readonly:
542
+ states.append("readonly")
543
+ if has_value and not val_readonly and role in TEXT_INPUT_ROLES:
544
+ states.append("editable")
545
+
546
+ # ── Actions (derived from supported UIA patterns) ──
547
+ actions = []
548
+ if has_invoke:
549
+ actions.append("click")
550
+ if has_toggle:
551
+ actions.append("toggle")
552
+ if has_expand and expand_state != 3: # 3 = LeafNode
553
+ actions.append("expand")
554
+ actions.append("collapse")
555
+ if has_value and not val_readonly:
556
+ actions.append("setvalue")
557
+ if role in TEXT_INPUT_ROLES:
558
+ actions.append("type")
559
+ if has_sel_item:
560
+ actions.append("select")
561
+ if has_scroll:
562
+ actions.append("scroll")
563
+ if has_range:
564
+ actions.append("increment")
565
+ actions.append("decrement")
566
+ if not actions and is_enabled:
567
+ actions.append("focus")
568
+
569
+ # ── Attributes ──
570
+ attrs: dict = {}
571
+
572
+ # Heading level from ARIA properties
573
+ if role == "heading" and "level" in aria_props:
574
+ try:
575
+ attrs["level"] = int(aria_props["level"])
576
+ except ValueError:
577
+ pass
578
+
579
+ # Range widget min/max/now
580
+ if has_range:
581
+ range_min = _cached_float(el, UIA_RangeValueMinimumPropertyId)
582
+ range_max = _cached_float(el, UIA_RangeValueMaximumPropertyId)
583
+ range_val = _cached_float(el, UIA_RangeValueValuePropertyId)
584
+ if range_min is not None:
585
+ attrs["valueMin"] = range_min
586
+ if range_max is not None:
587
+ attrs["valueMax"] = range_max
588
+ if range_val is not None:
589
+ attrs["valueNow"] = range_val
590
+
591
+ # Orientation
592
+ orientation = _cached_int(el, UIA_OrientationPropertyId, -1)
593
+ if orientation == 1 and role in ("scrollbar", "slider", "separator", "toolbar", "tablist"):
594
+ attrs["orientation"] = "horizontal"
595
+ elif orientation == 2 and role in ("scrollbar", "slider", "separator", "toolbar", "tablist"):
596
+ attrs["orientation"] = "vertical"
597
+
598
+ # Placeholder from ARIA properties (web content)
599
+ if role in ("textbox", "searchbox", "combobox") and "placeholder" in aria_props:
600
+ attrs["placeholder"] = aria_props["placeholder"][:200]
601
+
602
+ # URL for links from Value pattern string
603
+ if role == "link" and val_str:
604
+ attrs["url"] = val_str[:500]
605
+
606
+ # ── Assemble CUP node ──
607
+ node = {
608
+ "id": f"e{next(id_gen)}",
609
+ "role": role,
610
+ "name": name[:200],
611
+ }
612
+
613
+ # Optional fields — omit when empty to keep payload compact
614
+ if help_text:
615
+ node["description"] = help_text[:200]
616
+ if val_str and role in (
617
+ "textbox",
618
+ "searchbox",
619
+ "combobox",
620
+ "spinbutton",
621
+ "slider",
622
+ "progressbar",
623
+ "document",
624
+ ):
625
+ node["value"] = val_str[:200]
626
+ if bounds:
627
+ node["bounds"] = bounds
628
+ if states:
629
+ node["states"] = states
630
+ if actions:
631
+ node["actions"] = actions
632
+ if attrs:
633
+ node["attributes"] = attrs
634
+
635
+ # ── Platform extension (windows-specific raw data) ──
636
+ patterns = []
637
+ if has_invoke:
638
+ patterns.append("Invoke")
639
+ if has_toggle:
640
+ patterns.append("Toggle")
641
+ if has_expand:
642
+ patterns.append("ExpandCollapse")
643
+ if has_value:
644
+ patterns.append("Value")
645
+ if has_sel_item:
646
+ patterns.append("SelectionItem")
647
+ if has_scroll:
648
+ patterns.append("Scroll")
649
+ if has_range:
650
+ patterns.append("RangeValue")
651
+
652
+ pw = {"controlType": ct}
653
+ if automation_id:
654
+ pw["automationId"] = automation_id
655
+ if class_name:
656
+ pw["className"] = class_name
657
+ if patterns:
658
+ pw["patterns"] = patterns
659
+ node["platform"] = {"windows": pw}
660
+
661
+ return node
662
+
663
+
664
+ # ---------------------------------------------------------------------------
665
+ # Approach A: flat snapshot via FindAllBuildCache
666
+ # ---------------------------------------------------------------------------
667
+
668
+
669
+ def flat_snapshot(uia, root, cache_req, max_depth: int, id_gen, stats) -> list[dict]:
670
+ """Breadth-first, depth-limited snapshot using FindAll(Children) per level.
671
+
672
+ Returns a flat list of CUP nodes (no children nesting).
673
+ """
674
+ true_cond = uia.CreateTrueCondition()
675
+ all_nodes: list[dict] = []
676
+
677
+ root_node = build_cup_node(root, id_gen, stats)
678
+ all_nodes.append(root_node)
679
+
680
+ current_level = [root]
681
+
682
+ for depth in range(1, max_depth + 1):
683
+ stats["max_depth"] = depth
684
+ next_level = []
685
+ for parent in current_level:
686
+ try:
687
+ arr = parent.FindAllBuildCache(TreeScope_Children, true_cond, cache_req)
688
+ except comtypes.COMError:
689
+ continue
690
+ if arr is None:
691
+ continue
692
+ for i in range(arr.Length):
693
+ el = arr.GetElement(i)
694
+ node = build_cup_node(el, id_gen, stats)
695
+ all_nodes.append(node)
696
+ next_level.append(el)
697
+ current_level = next_level
698
+ if not current_level:
699
+ break
700
+
701
+ return all_nodes
702
+
703
+
704
+ # ---------------------------------------------------------------------------
705
+ # Approach B: structured tree via TreeWalker + BuildCache
706
+ # ---------------------------------------------------------------------------
707
+
708
+
709
+ def walk_tree(walker, element, cache_req, depth: int, max_depth: int, id_gen, stats) -> dict | None:
710
+ if depth > max_depth:
711
+ return None
712
+
713
+ node = build_cup_node(element, id_gen, stats)
714
+ stats["max_depth"] = max(stats["max_depth"], depth)
715
+
716
+ if depth < max_depth:
717
+ children = []
718
+ try:
719
+ child = walker.GetFirstChildElementBuildCache(element, cache_req)
720
+ except comtypes.COMError:
721
+ child = None
722
+
723
+ while child is not None and is_valid_element(child):
724
+ child_node = walk_tree(walker, child, cache_req, depth + 1, max_depth, id_gen, stats)
725
+ if child_node is not None:
726
+ children.append(child_node)
727
+ try:
728
+ child = walker.GetNextSiblingElementBuildCache(child, cache_req)
729
+ except comtypes.COMError:
730
+ break
731
+
732
+ if children:
733
+ node["children"] = children
734
+
735
+ return node
736
+
737
+
738
+ # ---------------------------------------------------------------------------
739
+ # Approach C: pre-cached subtree via CacheRequest(TreeScope_Subtree)
740
+ # ---------------------------------------------------------------------------
741
+
742
+
743
+ def walk_cached_tree(element, depth: int, max_depth: int, id_gen, stats, refs) -> dict | None:
744
+ """Walk a subtree that was fully pre-cached in a single COM call.
745
+
746
+ Uses CachedChildren (in-process memory reads) instead of
747
+ GetFirstChild/GetNextSibling (cross-process COM calls per node).
748
+ """
749
+ if depth > max_depth:
750
+ return None
751
+
752
+ node = build_cup_node(element, id_gen, stats)
753
+ stats["max_depth"] = max(stats["max_depth"], depth)
754
+
755
+ refs[node["id"]] = element
756
+
757
+ if depth < max_depth:
758
+ children = []
759
+ try:
760
+ cached_children = element.GetCachedChildren()
761
+ if cached_children is not None:
762
+ for i in range(cached_children.Length):
763
+ child = cached_children.GetElement(i)
764
+ child_node = walk_cached_tree(child, depth + 1, max_depth, id_gen, stats, refs)
765
+ if child_node is not None:
766
+ children.append(child_node)
767
+ except (comtypes.COMError, Exception):
768
+ pass
769
+
770
+ if children:
771
+ node["children"] = children
772
+
773
+ return node
774
+
775
+
776
+ # ---------------------------------------------------------------------------
777
+ # WindowsAdapter — PlatformAdapter implementation
778
+ # ---------------------------------------------------------------------------
779
+
780
+
781
+ class WindowsAdapter(PlatformAdapter):
782
+ """CUP adapter for Windows via UIA COM."""
783
+
784
+ def __init__(self):
785
+ self._uia = None
786
+ self._subtree_cr = None
787
+
788
+ @property
789
+ def platform_name(self) -> str:
790
+ return "windows"
791
+
792
+ def initialize(self) -> None:
793
+ if self._uia is not None:
794
+ return # already initialized
795
+ self._uia = init_uia()
796
+ self._subtree_cr = make_cache_request(
797
+ self._uia,
798
+ element_mode=AutomationElementMode_Full,
799
+ tree_scope=TreeScope_Subtree,
800
+ )
801
+
802
+ def get_screen_info(self) -> tuple[int, int, float]:
803
+ w, h = _win32_screen_size()
804
+ scale = _win32_screen_scale()
805
+ return w, h, scale
806
+
807
+ def get_foreground_window(self) -> dict[str, Any]:
808
+ hwnd, title = _win32_foreground_window()
809
+ pid = get_window_pid(hwnd)
810
+ return {
811
+ "handle": hwnd,
812
+ "title": title,
813
+ "pid": pid,
814
+ "bundle_id": None,
815
+ }
816
+
817
+ def get_all_windows(self) -> list[dict[str, Any]]:
818
+ results = []
819
+ for hwnd, title in _win32_enum_windows(visible_only=True):
820
+ results.append(
821
+ {
822
+ "handle": hwnd,
823
+ "title": title,
824
+ "pid": get_window_pid(hwnd),
825
+ "bundle_id": None,
826
+ }
827
+ )
828
+ return results
829
+
830
+ def get_window_list(self) -> list[dict[str, Any]]:
831
+ fg_hwnd = user32.GetForegroundWindow()
832
+ results = []
833
+ for hwnd, title in _win32_enum_windows(visible_only=True):
834
+ if not title:
835
+ continue
836
+ results.append(
837
+ {
838
+ "title": title,
839
+ "pid": get_window_pid(hwnd),
840
+ "bundle_id": None,
841
+ "foreground": hwnd == fg_hwnd,
842
+ "bounds": _win32_get_window_rect(hwnd),
843
+ }
844
+ )
845
+ return results
846
+
847
+ def get_desktop_window(self) -> dict[str, Any] | None:
848
+ hwnd = _win32_find_desktop_hwnd()
849
+ if hwnd is None:
850
+ return None
851
+ return {
852
+ "handle": hwnd,
853
+ "title": "Desktop",
854
+ "pid": get_window_pid(hwnd),
855
+ "bundle_id": None,
856
+ }
857
+
858
+ # Chromium/Electron apps lazily initialise their accessibility tree.
859
+ # The renderer won't expose web content to UIA until a11y is triggered.
860
+ # We detect this by checking for a "Document" node (the web content
861
+ # root) — browser chrome alone (toolbar, tabs) can produce 40+ nodes
862
+ # but won't include a Document until the renderer initialises a11y.
863
+ _SPARSE_TREE_THRESHOLD = 30
864
+
865
+ def capture_tree(
866
+ self,
867
+ windows: list[dict[str, Any]],
868
+ *,
869
+ max_depth: int = 999,
870
+ ) -> tuple[list[dict], dict, dict[str, Any]]:
871
+ self.initialize()
872
+ tree, stats, refs = self._walk_windows(windows, max_depth=max_depth)
873
+
874
+ if len(windows) == 1 and self._tree_needs_poke(stats):
875
+ hwnd = windows[0]["handle"]
876
+ self._poke_window(hwnd)
877
+ tree, stats, refs = self._walk_windows(windows, max_depth=max_depth)
878
+
879
+ return tree, stats, refs
880
+
881
+ @staticmethod
882
+ def _tree_needs_poke(stats: dict) -> bool:
883
+ """Decide whether the captured tree looks uninitialised.
884
+
885
+ Two heuristics (either triggers a retry):
886
+ 1. Very few nodes overall (original threshold) — catches apps
887
+ that returned almost nothing.
888
+ 2. Has browser-chrome roles (ToolBar, TabItem) but no Document —
889
+ Chromium/Electron rendered the shell but the web content
890
+ a11y tree hasn't been built yet.
891
+ """
892
+ if stats["nodes"] < WindowsAdapter._SPARSE_TREE_THRESHOLD:
893
+ return True
894
+
895
+ roles = stats.get("roles", {})
896
+ has_chrome = bool(roles.get("ToolBar") or roles.get("TabItem"))
897
+ has_document = bool(roles.get("Document"))
898
+ if has_chrome and not has_document:
899
+ return True
900
+
901
+ return False
902
+
903
+ def _walk_windows(
904
+ self,
905
+ windows: list[dict[str, Any]],
906
+ *,
907
+ max_depth: int = 999,
908
+ ) -> tuple[list[dict], dict, dict[str, Any]]:
909
+ """Walk the UIA tree for the given windows."""
910
+ id_gen = itertools.count()
911
+ stats: dict = {"nodes": 0, "max_depth": 0, "roles": {}}
912
+ refs: dict[str, Any] = {}
913
+ tree: list[dict] = []
914
+ for win in windows:
915
+ hwnd = win["handle"]
916
+ try:
917
+ el = self._uia.ElementFromHandleBuildCache(hwnd, self._subtree_cr)
918
+ except Exception:
919
+ continue
920
+ node = walk_cached_tree(el, 0, max_depth, id_gen, stats, refs)
921
+ if node:
922
+ tree.append(node)
923
+ return tree, stats, refs
924
+
925
+ @staticmethod
926
+ def _poke_window(hwnd: int) -> None:
927
+ """Nudge a window to force Chromium to initialise its a11y tree.
928
+
929
+ SetForegroundWindow triggers the renderer's accessibility mode.
930
+ A short sleep gives Chromium time to build the tree before we retry.
931
+ """
932
+ import time
933
+
934
+ user32.SetForegroundWindow(hwnd)
935
+ time.sleep(0.3)