@bun-win32/uia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AI.md +130 -0
- package/README.md +79 -0
- package/agent.ts +83 -0
- package/automation.ts +51 -0
- package/cache.ts +67 -0
- package/com.ts +62 -0
- package/condition.ts +132 -0
- package/constants.ts +233 -0
- package/element.ts +512 -0
- package/index.ts +40 -0
- package/input.ts +149 -0
- package/msaa.ts +99 -0
- package/package.json +86 -0
- package/patterns.ts +234 -0
- package/png.ts +75 -0
- package/reads.ts +66 -0
- package/tree.ts +95 -0
- package/window.ts +107 -0
package/AI.md
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# AI Guide for @bun-win32/uia
|
|
2
|
+
|
|
3
|
+
Everything below is reachable from `@bun-win32/uia` (or its unscoped alias `bun-uia`) alone. **Bun + Windows only.** This file is the complete surface — an agent should not need to read the source. When it must, the "Where to look" section says exactly which one file to open.
|
|
4
|
+
|
|
5
|
+
## What it is
|
|
6
|
+
|
|
7
|
+
Playwright for Windows desktop apps. Query the live UI Automation (accessibility) tree by name / control-type / automationId, invoke controls, type, read values, wait for elements to appear, and serialize a window's tree to JSON for an LLM agent. Three engines behind one facade:
|
|
8
|
+
|
|
9
|
+
1. **IUIAutomation COM client** (the spine) — driven through a cast-free vtable invoker. Query + invoke + patterns + cache.
|
|
10
|
+
2. **Flat `uiautomationcore` C-API** — a VARIANT-free fast path (secondary; the COM path is the default).
|
|
11
|
+
3. **`oleacc` MSAA fallback** — for legacy / owner-draw windows that expose no useful UIA tree.
|
|
12
|
+
|
|
13
|
+
Escalation rule: stay on the `uia` facade. Drop to a lower engine (`msaaTree`, the raw `vcall`) only when you need something the facade lacks.
|
|
14
|
+
|
|
15
|
+
## Mental model (read this first)
|
|
16
|
+
|
|
17
|
+
- **UIA is cross-process.** Every property read and `find` marshals into the target app. `initialize()` once (it sets up a single-threaded COM apartment + makes the process DPI-aware); `uninitialize()` at the end, or `using app = uia.attach(...)`.
|
|
18
|
+
- **Selectors are hybrid.** Exact scalars (`controlType`, `name` string, `automationId`, `className`) compile to a **server-side** UIA condition — the target app filters in-process and only matches come back. Rich predicates (`name` RegExp, `nameContains`) are matched **client-side** on the (already-narrowed) results. Always scope a search to a window (`attach`), **never** `findAll` from the desktop root.
|
|
19
|
+
- **CacheRequest batches.** Naive walks pay N cross-process round-trips; `findAllCached` / `tree()` prefetch a whole subtree in one. The cache wins more the larger the tree.
|
|
20
|
+
- **Only the listed patterns are proven.** `invoke`, `value`/`setValue`, `text`, `toggle`, `expand`/`collapse`, `select`, `rangeValue`/`setRangeValue`, window `close`/`setVisualState` are each proven against a real control. `scrollIntoView` is implemented but unproven (see roadmap).
|
|
21
|
+
- **Synthetic input needs an unlocked, interactive desktop.** `type`, `sendKeys`, `click` go through `SendInput` — they are silently dropped on a locked session. UIA queries, `invoke`, `setValue`, and `screenshot` all work locked. Prefer `setValue` / `invoke` over `type` / `click` when a pattern exists.
|
|
22
|
+
- **Element pointers have apartment affinity.** Keep them on the creating thread; tolerate `UIA_E_ELEMENTNOTAVAILABLE (0x80040201)` if a window closes mid-walk. UIA **events** are out of scope in v1 — poll with `waitFor`.
|
|
23
|
+
|
|
24
|
+
## Capability → API
|
|
25
|
+
|
|
26
|
+
| I want to … | call |
|
|
27
|
+
| --- | --- |
|
|
28
|
+
| attach to an app | `uia.attach('Calculator')` · `uia.attach({ className })` · `uia.attach({ process: pid })` · `uia.attach(hWnd)` |
|
|
29
|
+
| spawn + wait for an app | `await uia.launch(['notepad.exe'], { className: 'Notepad' })` |
|
|
30
|
+
| find by name/type/automationId | `app.find({ controlType: ControlType.Button, name: 'Five' })` |
|
|
31
|
+
| find all matches | `app.findAll({ controlType: ControlType.Button })` |
|
|
32
|
+
| wait for a control (auto-retry) | `await app.waitFor(selector, { timeout: 5000 })` |
|
|
33
|
+
| click / press | `el.invoke()` (UIA) · `el.click()` (bbox + SendInput fallback) |
|
|
34
|
+
| type | `el.type('text')` (Unicode keystrokes) · `uia.sendKeys('Control+S')` |
|
|
35
|
+
| set / read a value | `el.setValue('text')` · `el.value` · `el.text()` (TextPattern) |
|
|
36
|
+
| toggle / expand / select / slider | `el.toggle()` · `el.expand()`/`el.collapse()` · `el.select()` · `el.setRangeValue(n)` |
|
|
37
|
+
| read state | `el.name` `el.controlType` `el.controlTypeName` `el.automationId` `el.className` `el.isEnabled` `el.boundingRectangle` |
|
|
38
|
+
| serialize the tree for an LLM | `uia.tree(app, { agentProfile: true })` |
|
|
39
|
+
| run a JSON action list (agent) | `uia.execute(app, [{ find: {...}, do: 'invoke' }, …])` |
|
|
40
|
+
| screenshot | `app.screenshot()` → PNG bytes |
|
|
41
|
+
| list / target windows | `uia.windows()` · `findWindow({ title })` · `windowForProcess(pid)` |
|
|
42
|
+
| fall back to MSAA | `uia.msaaTree(hWnd)` |
|
|
43
|
+
|
|
44
|
+
## Full API
|
|
45
|
+
|
|
46
|
+
### `uia` — the facade object
|
|
47
|
+
`attach(target)`, `launch(command, target, timeout?)`, `focused()`, `fromPoint(x, y)`, `root()`, `windows()`, `tree(element, options?)`, `execute(element, actions)`, `msaaTree(hWnd, maxDepth?)`, `click(x, y)`, `sendKeys(combo)`, `type(text)`, `initialize()`, `uninitialize()`.
|
|
48
|
+
|
|
49
|
+
### `class Element`
|
|
50
|
+
- Live properties (getters): `name`, `controlType`, `controlTypeName`, `automationId`, `className`, `isEnabled`, `boundingRectangle: Rect`, `nativeWindowHandle: bigint`, `value`, `toggleState`, `expandCollapseState`, `isSelected`, `rangeValue`.
|
|
51
|
+
- Tree: `find(selector, scope?)`, `findAll(selector, scope?)`, `findAllCached(selector, request, scope?)`, `children`, `parent`, `await waitFor(selector, { timeout?, interval? })`, `describeNoMatch(selector)`, `buildUpdatedCache(request)`, `cachedChildren`, `cached{Name,ControlType,AutomationId,ClassName,BoundingRectangle,IsEnabled}`.
|
|
52
|
+
- Patterns (throw if unsupported): `invoke()`, `setValue(text)`, `text()`, `toggle()`, `expand()`, `collapse()`, `select()`, `scrollIntoView()`, `setRangeValue(n)`, `close()`, `setVisualState(WindowVisualState)`.
|
|
53
|
+
- Input (need an unlocked session): `focus()`, `type(text)`, `click()`.
|
|
54
|
+
- Lifecycle: `release()`, `ptr: bigint`.
|
|
55
|
+
|
|
56
|
+
### `class Window extends Element`
|
|
57
|
+
Adds `hWnd: bigint`, `activate()`, `screenshot(): Uint8Array`, `dispose()` / `[Symbol.dispose]`.
|
|
58
|
+
|
|
59
|
+
### Selector & matching
|
|
60
|
+
`interface Selector { controlType?, name? (string | RegExp), nameContains?, automationId?, className? }`. `interface ElementProperties { name, controlType, automationId, className }` (what `matches` reads). `matches(props, selector)`, `selectorToString(selector)`, `formatNoMatch(selector, windowName, candidateNames)`.
|
|
61
|
+
|
|
62
|
+
### Root accessors (return a live `Element`/`Window`)
|
|
63
|
+
`fromHandle(hWnd)` (an `Element` for a window handle — `attach` wraps this in a `Window`), `focused()`, `fromPoint(x, y)`, `root()`.
|
|
64
|
+
|
|
65
|
+
### Constants / enums
|
|
66
|
+
`ControlType` (Button=50000 … AppBar=50040), `PatternId` (10000–10033), `PropertyId` (30000–30024), `TreeScope`, `PropertyConditionFlags`, `ToggleState`, `ExpandCollapseState`, `WindowVisualState`, `SLOT` (verified vtable slots).
|
|
67
|
+
|
|
68
|
+
### Cache
|
|
69
|
+
`createCacheRequest(properties?, scope?, mode?)`, `class CacheRequest { property, pattern, treeScope, elementMode, release }`, `DEFAULT_CACHE_PROPERTIES`, `AutomationElementMode`.
|
|
70
|
+
|
|
71
|
+
### Tree / agent
|
|
72
|
+
`serialize(element, options?: SerializeOptions): UiaNode`, `interface SerializeOptions { maxDepth?, agentProfile? }`, `interface UiaNode { role, name, automationId?, className?, bounds?, enabled?, children }`, `countNodes`, `estimateTokens`, `execute(element, actions): AgentActionResult[]`, `AGENT_TOOLS`, `groundingTree(element)`, `type AgentAction`.
|
|
73
|
+
|
|
74
|
+
### Windows / input / msaa / low-level
|
|
75
|
+
`findWindow`, `listWindows`, `windowForProcess`, `screenshot`, `type WindowInfo`; `sendKeys`, `clickAt`, `virtualKeyCode`, `INPUT_SIZE`, `packKeyboardInput`, `packMouseInput`; `msaaTree`, `accessibleFromWindow`, `type MsaaNode`; `vcall`, `comRelease`, `guid`, `hresult`, `getBstr`, `getLong`, `getRect`, `getHandle`, `decodeBstr`, `encodePNG`, `initialize`, `uninitialize`, `automation`, `type Rect`.
|
|
76
|
+
|
|
77
|
+
## Recipes
|
|
78
|
+
|
|
79
|
+
```ts
|
|
80
|
+
// Notepad round-trip (the 10-line wow) — needs an unlocked session for type()
|
|
81
|
+
import { ControlType, uia } from '@bun-win32/uia';
|
|
82
|
+
const app = await uia.launch(['notepad.exe'], { className: 'Notepad' });
|
|
83
|
+
const edit = await app.waitFor({ controlType: ControlType.Document });
|
|
84
|
+
edit.focus().type('hello from bun-uia');
|
|
85
|
+
console.log(edit.text());
|
|
86
|
+
```
|
|
87
|
+
```ts
|
|
88
|
+
// Calculator 5 + 3 = 8 (works on a locked session — invoke is UIA, not SendInput)
|
|
89
|
+
const calc = await uia.launch(['cmd', '/c', 'start', 'calc'], { title: 'Calculator' });
|
|
90
|
+
for (const name of ['Five', 'Plus', 'Three', 'Equals']) calc.find({ controlType: ControlType.Button, name })?.invoke();
|
|
91
|
+
console.log(calc.find({ automationId: 'CalculatorResults' })?.name); // → "Display is 8"
|
|
92
|
+
```
|
|
93
|
+
```ts
|
|
94
|
+
// Wait for a dialog, dismiss it
|
|
95
|
+
uia.sendKeys('Control+S');
|
|
96
|
+
const cancel = await app.waitFor({ name: 'Cancel', controlType: ControlType.Button }, { timeout: 4000 });
|
|
97
|
+
cancel.invoke();
|
|
98
|
+
```
|
|
99
|
+
```ts
|
|
100
|
+
// Tree → JSON for an LLM agent (ground-truth identity + bounds, no pixel-counting)
|
|
101
|
+
const grounding = uia.tree(app, { agentProfile: true }); // { role, name, bounds, children }
|
|
102
|
+
const results = uia.execute(app, [{ find: { name: 'Five' }, do: 'invoke' }, { find: { automationId: 'CalculatorResults' }, do: 'read' }]);
|
|
103
|
+
```
|
|
104
|
+
```ts
|
|
105
|
+
// Reliable text entry without keyboard focus battles (works locked): ValuePattern
|
|
106
|
+
edit.setValue('typed without keystrokes');
|
|
107
|
+
// Verify visually
|
|
108
|
+
await Bun.write('shot.png', app.screenshot());
|
|
109
|
+
```
|
|
110
|
+
```ts
|
|
111
|
+
// MSAA fallback for an app with no good UIA tree
|
|
112
|
+
const accessible = uia.msaaTree(hWnd, 6);
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Gotchas (the traps ledger)
|
|
116
|
+
|
|
117
|
+
- **Selectors are client-scoped, never from the desktop root.** `attach` a window first; `find` defaults to `TreeScope_Descendants` of that window.
|
|
118
|
+
- **`ElementFromHandle` is vtable slot 6** (slot 7 is `ElementFromPoint`). The package has the verified slot table; if you hand-roll `vcall`, regenerate slots from `UIAutomationClient.h` and prove each — a wrong slot segfaults.
|
|
119
|
+
- **Server-side property conditions work** (the 16-byte VARIANT goes by hidden pointer in the x64 ABI). `nameContains` / RegExp still filter client-side.
|
|
120
|
+
- **`type` / `sendKeys` / `click` are dropped on a locked session.** Prefer `setValue` / `invoke`; they work locked. `screenshot` (PrintWindow) also works locked.
|
|
121
|
+
- **Process is DPI-aware** after `initialize()` so click coordinates match UIA bounds; `click()` uses `SetCursorPos` + physical pixels.
|
|
122
|
+
- **`SendInput` cbSize must be 40** (handled internally) — the x64 `INPUT` is 40 bytes.
|
|
123
|
+
- **BSTR names are bulk-copied before free**; never read after `SysFreeString`.
|
|
124
|
+
- **Only proven patterns ship.** Calling a pattern an element doesn't support throws a clear message (pointing at `.click()` / `.type()` where relevant). `scrollIntoView` is unproven (roadmap).
|
|
125
|
+
- **Non-English / different builds** differ: Calculator's result element is `automationId: 'CalculatorResults'`, buttons are named `Five`/`Plus`/… (full words). Configure selectors per app.
|
|
126
|
+
- **Threading:** v1 is STA, fire-and-forget out-of-process driving. Events / self-UI automation need MTA — out of scope.
|
|
127
|
+
|
|
128
|
+
## Where to look (source)
|
|
129
|
+
|
|
130
|
+
`automation.ts` activation + the singleton · `com.ts` `vcall`/`guid` · `constants.ts` ids + verified slots · `element.ts` `Element`/`Window`/`attach`/`launch`/`waitFor` · `condition.ts` the typed selector · `patterns.ts` control patterns · `input.ts` `SendInput` · `cache.ts` CacheRequest · `tree.ts` agent grounding · `window.ts` targeting + screenshots · `msaa.ts` oleacc fallback · `agent.ts` the LLM tool adapter · `reads.ts` property readers.
|
package/README.md
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# @bun-win32/uia
|
|
2
|
+
|
|
3
|
+
**Playwright for Windows desktop apps.** Query the live UI Automation accessibility tree by name and role, invoke controls, type, wait for elements, and serialize a window to JSON for an LLM agent — from Bun, with **zero native dependencies**. No node-gyp, no prebuild matrix, no Appium server, no .NET.
|
|
4
|
+
|
|
5
|
+
> The unscoped alias [`bun-uia`](https://www.npmjs.com/package/bun-uia) re-exports this package — `bun add bun-uia` is the discoverable front door.
|
|
6
|
+
|
|
7
|
+
```ts
|
|
8
|
+
import { ControlType, uia } from '@bun-win32/uia';
|
|
9
|
+
|
|
10
|
+
const app = await uia.launch(['notepad.exe'], { className: 'Notepad' });
|
|
11
|
+
const edit = await app.waitFor({ controlType: ControlType.Document });
|
|
12
|
+
edit.focus().type('nothing native compiles, and it just works');
|
|
13
|
+
console.log(edit.text()); // → nothing native compiles, and it just works
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
```ts
|
|
17
|
+
// Drive Calculator to 5 + 3 = 8 by name — survives DPI/theme/layout shifts that break pixel scripts:
|
|
18
|
+
const calc = await uia.launch(['cmd', '/c', 'start', 'calc'], { title: 'Calculator' });
|
|
19
|
+
for (const name of ['Five', 'Plus', 'Three', 'Equals']) calc.find({ controlType: ControlType.Button, name })?.invoke();
|
|
20
|
+
console.log(calc.find({ automationId: 'CalculatorResults' })?.name); // → "Display is 8"
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
`bun add @bun-win32/uia` is the entire install story.
|
|
24
|
+
|
|
25
|
+
## Why this exists
|
|
26
|
+
|
|
27
|
+
The Windows desktop-automation cluster on npm is a field of native-addon pain, paywalls, and abandoned daemons. Downloads verified against `api.npmjs.org` for the week of 2026-06-05→11.
|
|
28
|
+
|
|
29
|
+
| Tool | Weekly dl | Install / runtime | The catch |
|
|
30
|
+
| --- | --- | --- | --- |
|
|
31
|
+
| `@nut-tree-fork/nut-js` | 32,360 | libnut N-API addon (cmake-js) | Fork of a **paywalled** original — *"all of my packages around nut.js will cease to exist publicly on npm … only available through the private … registry, which requires an active subscription."* Pixel/image-match, **no a11y tree**. |
|
|
32
|
+
| `appium-windows-driver` | 30,749 | Appium server **+ a separate WinAppDriver.exe** | *"WinAppDriver server has not been maintained by Microsoft for years … Developer mode must be enabled."* Two daemons + a W3C HTTP hop per element read. |
|
|
33
|
+
| `@jitsi/robotjs` / `robotjs` | 15,333 / 11,375 | node-gyp / prebuild matrix | *"No prebuilt binaries found … node-gyp rebuild"* C++ compile fallback — the #1 documented install failure. Blind pixel + keystroke, **no element model**. |
|
|
34
|
+
| `uiohook-napi` (input hooks) | 21,965 | N-API addon | Healthy — but global `SetWindowsHookEx` hooks run on a foreign thread and can assert/segfault (node-addon-api #903). |
|
|
35
|
+
| `@bright-fish/node-ui-automation` | 33 | NAPI/COM native addon | The only real npm UIA wrapper — **dead since 2022**. |
|
|
36
|
+
| NodeRT `windows.ui.uiautomation` | 15 | NodeRT native addon | Dead 2022 **and wrong namespace** (projects WinRT, not the Win32 `IUIAutomation`). |
|
|
37
|
+
| FlaUI / pywinauto / AutoIt | n/a | .NET / Python / bespoke EXE | A foreign runtime to install and ship. |
|
|
38
|
+
|
|
39
|
+
**There is no zero-install, typed, in-process `IUIAutomation` client for Node or Bun.** @bun-win32/uia is a few kilobytes of TypeScript over `bun:ffi` — the runtime's own FFI, not a third-party N-API addon that rots against each Node minor (*"PLEASE ARCHIVE THIS REPO"* — node-ffi-napi #269). It **can't be paywalled** (no compiled binary to gate behind a subscription registry), has **no build step** (no node-gyp, no ABI matrix, no MSVC/Python), and talks to UIA **in-process** (no WinAppDriver.exe, no Appium daemon, no `127.0.0.1:4723` round-trip, no Developer Mode).
|
|
40
|
+
|
|
41
|
+
## What you can do
|
|
42
|
+
|
|
43
|
+
- **Find controls semantically** — by name, role, or automationId, not a fragile `(x, y)`. Exact scalars compile to a **server-side** UIA condition (the target app filters in-process); regex/substring filter client-side.
|
|
44
|
+
- **Act** — `invoke()`, `click()`, `setValue()`, `type()`, `toggle()`, `expand()`, `select()`, `setRangeValue()`, window `close()`/`setVisualState()`. Each pattern is proven against a real control.
|
|
45
|
+
- **`waitFor`** — Playwright-class auto-retry for flaky native UIs. No other Windows-desktop npm tool has it. Timeouts quote the selector, the window, and the nearest candidates.
|
|
46
|
+
- **Read & assert** — `value`, `text()`, `isEnabled`, `boundingRectangle`, `toggleState`. Read state back through the tree to assert — pixel tools can't.
|
|
47
|
+
- **Serialize the tree to JSON** for an LLM agent (`uia.tree`), with a token-svelte agent profile.
|
|
48
|
+
- **Screenshot** any window via PrintWindow (works even on a locked session).
|
|
49
|
+
- **MSAA fallback** (`uia.msaaTree`) for legacy / owner-draw windows.
|
|
50
|
+
- **Crash-safe input observation** via `GetAsyncKeyState` polling — no foreign-thread hook, no message-pump assert.
|
|
51
|
+
|
|
52
|
+
## For AI agents
|
|
53
|
+
|
|
54
|
+
Frontier computer-use agents ground actions in **screenshots** and the literature calls it fragile and expensive. Microsoft **UFO2** (arXiv 2504.14603) fuses the **UI Automation tree first, vision second**, to fix *"fragile screenshot-based interaction"*; OmniParser exists because VLMs can't reliably locate clickable elements from a bitmap; and **OSWorld-Human** (arXiv 2506.16042) reports a11y-tree builds taking **3–26 seconds** and "thousands more tokens per step."
|
|
55
|
+
|
|
56
|
+
@bun-win32/uia is exactly that UIA-first substrate — served **fast and in-process**. `uia.tree(app, { agentProfile: true })` walks a window's subtree in **one cached round-trip** and emits ground-truth `{ role, name, automationId, bounds, children }` an agent acts on without pixel-counting. The measured build time below beats the OSWorld 3–26 s reference by **two-to-three orders of magnitude**. `uia.execute(app, actions)` runs a JSON action list; `AGENT_TOOLS` is a ready LLM tool schema. Honest limit: UIA can't see owner-draw/canvas/games, so this **complements** a vision agent rather than replacing screenshots.
|
|
57
|
+
|
|
58
|
+
## Benchmarks
|
|
59
|
+
|
|
60
|
+
Measured on Windows 11, Bun 1.4, by `bun run example/benchmark.ts` (run it to reproduce):
|
|
61
|
+
|
|
62
|
+
| operation | result |
|
|
63
|
+
| --- | --- |
|
|
64
|
+
| single property read (cross-process) | ~55 µs |
|
|
65
|
+
| naive subtree walk (65 nodes) | ~44 ms |
|
|
66
|
+
| **cached subtree walk** (one round-trip) | **~37 ms** (1.2× faster; the gap widens with tree size) |
|
|
67
|
+
| agent-grounding tree build | ~9 ms, ~2.7k tokens |
|
|
68
|
+
| **vs OSWorld a11y-tree build (3–26 s)** | **~345–2987× faster** |
|
|
69
|
+
|
|
70
|
+
## Requirements & honest scoping
|
|
71
|
+
|
|
72
|
+
- **Windows 10/11, Bun ≥ 1.1.** Windows-only and Bun-only — the owned trade-off (nut.js/robotjs/uiohook are genuinely cross-platform; this is not).
|
|
73
|
+
- **UIA-tree based.** Apps with no accessibility tree (games, canvas/WebGL, custom-draw) get MSAA + screenshots + coordinate `click()`, not vision matching — a complement to screenshot tools, not a replacement.
|
|
74
|
+
- **Synthetic input (`type`/`sendKeys`/`click`) needs an unlocked, interactive desktop.** UIA queries, `invoke`, `setValue`, and `screenshot` work on a locked session; prefer them.
|
|
75
|
+
- **Selectors are client-side for regex/substring** (exact scalars are server-side). **UIA events are roadmap** — poll with `waitFor`. `scrollIntoView` is implemented but not yet proven against a real list.
|
|
76
|
+
|
|
77
|
+
Read [`AI.md`](./AI.md) — it is the complete surface; an agent should not need the source.
|
|
78
|
+
|
|
79
|
+
MIT.
|
package/agent.ts
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
// Drop-in computer-use grounding for the Windows desktop: turn the UIA surface into an LLM tool schema
|
|
2
|
+
// plus a JSON-action executor. Hand an agent the tree() JSON (ground-truth element identity + bounds,
|
|
3
|
+
// not pixels) and these tools; it grounds actions on roles and names instead of counting pixels — the
|
|
4
|
+
// structured alternative the computer-use literature (UFO2, OSWorld) is converging on.
|
|
5
|
+
|
|
6
|
+
import type { Selector } from './condition';
|
|
7
|
+
import type { Element } from './element';
|
|
8
|
+
import { serialize, type UiaNode } from './tree';
|
|
9
|
+
|
|
10
|
+
export interface AgentAction {
|
|
11
|
+
find: Selector;
|
|
12
|
+
do: 'click' | 'invoke' | 'read' | 'setValue' | 'toggle' | 'type';
|
|
13
|
+
text?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface AgentActionResult {
|
|
17
|
+
action: AgentAction;
|
|
18
|
+
ok: boolean;
|
|
19
|
+
value?: string;
|
|
20
|
+
error?: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Execute a JSON action list against a window: each step finds an element by selector, then acts. */
|
|
24
|
+
export function execute(window: Element, actions: readonly AgentAction[]): AgentActionResult[] {
|
|
25
|
+
const results: AgentActionResult[] = [];
|
|
26
|
+
for (const action of actions) {
|
|
27
|
+
const element = window.find(action.find);
|
|
28
|
+
if (element === null) {
|
|
29
|
+
results.push({ action, ok: false, error: window.describeNoMatch(action.find) });
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
let value: string | undefined;
|
|
34
|
+
if (action.do === 'invoke') element.invoke();
|
|
35
|
+
else if (action.do === 'click') element.click();
|
|
36
|
+
else if (action.do === 'type') element.type(action.text ?? '');
|
|
37
|
+
else if (action.do === 'setValue') element.setValue(action.text ?? '');
|
|
38
|
+
else if (action.do === 'toggle') element.toggle();
|
|
39
|
+
else value = element.value || element.text() || element.name;
|
|
40
|
+
results.push({ action, ok: true, value });
|
|
41
|
+
} catch (error) {
|
|
42
|
+
results.push({ action, ok: false, error: (error as Error).message });
|
|
43
|
+
} finally {
|
|
44
|
+
element.release();
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return results;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Serialize a window for an agent — the compact, interactive-only grounding profile. */
|
|
51
|
+
export function groundingTree(window: Element): UiaNode {
|
|
52
|
+
return serialize(window, { agentProfile: true });
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** LLM tool definitions (Anthropic/OpenAI tool-use shape) for desktop grounding. */
|
|
56
|
+
export const AGENT_TOOLS = [
|
|
57
|
+
{
|
|
58
|
+
name: 'find_and_act',
|
|
59
|
+
description: 'Find a desktop control by name/control-type/automationId and act on it (invoke, click, type, setValue, toggle, or read its value).',
|
|
60
|
+
input_schema: {
|
|
61
|
+
type: 'object',
|
|
62
|
+
properties: {
|
|
63
|
+
find: {
|
|
64
|
+
type: 'object',
|
|
65
|
+
properties: {
|
|
66
|
+
name: { type: 'string' },
|
|
67
|
+
controlType: { type: 'number', description: 'a UIA control-type id, e.g. 50000 Button, 50004 Edit' },
|
|
68
|
+
automationId: { type: 'string' },
|
|
69
|
+
nameContains: { type: 'string' },
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
do: { type: 'string', enum: ['invoke', 'click', 'type', 'setValue', 'toggle', 'read'] },
|
|
73
|
+
text: { type: 'string', description: 'the text for type/setValue' },
|
|
74
|
+
},
|
|
75
|
+
required: ['find', 'do'],
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
name: 'read_tree',
|
|
80
|
+
description: 'Serialize the target window accessibility tree to JSON (role, name, automationId, bounds) for grounding actions.',
|
|
81
|
+
input_schema: { type: 'object', properties: { agentProfile: { type: 'boolean', description: 'prune to interactive/named controls' } } },
|
|
82
|
+
},
|
|
83
|
+
] as const;
|
package/automation.ts
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// The UI Automation root: the COM apartment plus the in-process IUIAutomation client, created once.
|
|
2
|
+
|
|
3
|
+
import Combase from '@bun-win32/combase';
|
|
4
|
+
import User32 from '@bun-win32/user32';
|
|
5
|
+
|
|
6
|
+
import { comRelease, guid, hresult } from './com';
|
|
7
|
+
import { CLSCTX_INPROC_SERVER, CLSID_CUIAutomation, COINIT_APARTMENTTHREADED, IID_IUIAutomation, S_FALSE, S_OK } from './constants';
|
|
8
|
+
|
|
9
|
+
let pAutomation = 0n;
|
|
10
|
+
let comInitialized = false;
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Initialize COM (single-threaded apartment) and create the in-process IUIAutomation client.
|
|
14
|
+
* Idempotent — returns the cached client pointer on subsequent calls. Throws (does not exit) when
|
|
15
|
+
* UI Automation is unavailable so callers can catch and degrade.
|
|
16
|
+
*/
|
|
17
|
+
export function initialize(): bigint {
|
|
18
|
+
if (pAutomation !== 0n) return pAutomation;
|
|
19
|
+
if (!comInitialized) {
|
|
20
|
+
// Physical-pixel coordinates so SendInput clicks match UIA bounding rectangles (best-effort).
|
|
21
|
+
User32.SetProcessDPIAware();
|
|
22
|
+
const initHr = Combase.CoInitializeEx(null, COINIT_APARTMENTTHREADED);
|
|
23
|
+
if (initHr !== S_OK && initHr !== S_FALSE) throw new Error(`CoInitializeEx failed: ${hresult(initHr)}`);
|
|
24
|
+
comInitialized = true;
|
|
25
|
+
}
|
|
26
|
+
const clsid = guid(CLSID_CUIAutomation);
|
|
27
|
+
const iid = guid(IID_IUIAutomation);
|
|
28
|
+
const out = Buffer.alloc(8);
|
|
29
|
+
const createHr = Combase.CoCreateInstance(clsid.ptr!, 0n, CLSCTX_INPROC_SERVER, iid.ptr!, out.ptr!);
|
|
30
|
+
if (createHr !== S_OK) throw new Error(`CoCreateInstance(CUIAutomation) failed: ${hresult(createHr)} — UI Automation is unavailable on this system.`);
|
|
31
|
+
pAutomation = out.readBigUInt64LE(0);
|
|
32
|
+
if (pAutomation === 0n) throw new Error('CoCreateInstance(CUIAutomation) returned a null client.');
|
|
33
|
+
return pAutomation;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** The IUIAutomation client pointer, initializing on first use. */
|
|
37
|
+
export function automation(): bigint {
|
|
38
|
+
return pAutomation !== 0n ? pAutomation : initialize();
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Release the IUIAutomation client and uninitialize COM. Safe to call when never initialized. */
|
|
42
|
+
export function uninitialize(): void {
|
|
43
|
+
if (pAutomation !== 0n) {
|
|
44
|
+
comRelease(pAutomation);
|
|
45
|
+
pAutomation = 0n;
|
|
46
|
+
}
|
|
47
|
+
if (comInitialized) {
|
|
48
|
+
Combase.CoUninitialize();
|
|
49
|
+
comInitialized = false;
|
|
50
|
+
}
|
|
51
|
+
}
|
package/cache.ts
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
// CacheRequest batching — the performance spine. Naive UIA reads N properties with N cross-process
|
|
2
|
+
// round-trips; IUIAutomationCacheRequest + FindAllBuildCache prefetches many properties for a whole
|
|
3
|
+
// subtree in ONE round-trip, then get_Cached* reads pay zero further round-trips.
|
|
4
|
+
|
|
5
|
+
import { FFIType } from 'bun:ffi';
|
|
6
|
+
|
|
7
|
+
import { automation } from './automation';
|
|
8
|
+
import { comRelease, vcall } from './com';
|
|
9
|
+
import { PropertyId, S_OK, SLOT, TreeScope } from './constants';
|
|
10
|
+
|
|
11
|
+
/** Properties the default cache prefetches — what tree() and the cached find/walk need. */
|
|
12
|
+
export const DEFAULT_CACHE_PROPERTIES: readonly number[] = [PropertyId.Name, PropertyId.ControlType, PropertyId.AutomationId, PropertyId.ClassName, PropertyId.BoundingRectangle, PropertyId.IsEnabled];
|
|
13
|
+
|
|
14
|
+
export enum AutomationElementMode {
|
|
15
|
+
/** Cached data only — the returned elements cannot be acted on, but BuildCache is cheaper. */
|
|
16
|
+
None = 0x0000_0000,
|
|
17
|
+
/** Full live reference (default) — the returned elements can be acted on. */
|
|
18
|
+
Full = 0x0000_0001,
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export class CacheRequest {
|
|
22
|
+
readonly ptr: bigint;
|
|
23
|
+
|
|
24
|
+
constructor(ptr: bigint) {
|
|
25
|
+
this.ptr = ptr;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Prefetch a property (UIA_*PropertyId) for every element the cache covers. */
|
|
29
|
+
property(propertyId: number): this {
|
|
30
|
+
vcall(this.ptr, SLOT.AddProperty, [FFIType.i32], [propertyId]);
|
|
31
|
+
return this;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/** Prefetch a pattern (UIA_*PatternId) for every element the cache covers. */
|
|
35
|
+
pattern(patternId: number): this {
|
|
36
|
+
vcall(this.ptr, SLOT.AddPattern, [FFIType.i32], [patternId]);
|
|
37
|
+
return this;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Set the cache's tree scope (which relatives are cached around each match). */
|
|
41
|
+
treeScope(scope: number): this {
|
|
42
|
+
vcall(this.ptr, SLOT.put_TreeScope, [FFIType.i32], [scope]);
|
|
43
|
+
return this;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Set whether returned elements keep a live reference (Full) or carry cached data only (None). */
|
|
47
|
+
elementMode(mode: AutomationElementMode): this {
|
|
48
|
+
vcall(this.ptr, SLOT.put_AutomationElementMode, [FFIType.i32], [mode]);
|
|
49
|
+
return this;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** Release the underlying COM pointer. */
|
|
53
|
+
release(): void {
|
|
54
|
+
comRelease(this.ptr);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Build a CacheRequest (default: standard property set, subtree scope, Full mode). Caller releases it. */
|
|
59
|
+
export function createCacheRequest(properties: readonly number[] = DEFAULT_CACHE_PROPERTIES, scope: number = TreeScope.TreeScope_Subtree, mode: AutomationElementMode = AutomationElementMode.Full): CacheRequest {
|
|
60
|
+
const out = Buffer.alloc(8);
|
|
61
|
+
if (vcall(automation(), SLOT.CreateCacheRequest, [FFIType.ptr], [out.ptr!]) !== S_OK) throw new Error('CreateCacheRequest failed');
|
|
62
|
+
const request = new CacheRequest(out.readBigUInt64LE(0));
|
|
63
|
+
for (const propertyId of properties) request.property(propertyId);
|
|
64
|
+
request.treeScope(scope);
|
|
65
|
+
request.elementMode(mode);
|
|
66
|
+
return request;
|
|
67
|
+
}
|
package/com.ts
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
// The cast-free COM vtable invoker, GUID packing, and IUnknown teardown primitive.
|
|
2
|
+
|
|
3
|
+
import { CFunction, FFIType, type Pointer, read } from 'bun:ffi';
|
|
4
|
+
|
|
5
|
+
import Combase from '@bun-win32/combase';
|
|
6
|
+
|
|
7
|
+
import { IUNKNOWN_RELEASE, S_OK } from './constants';
|
|
8
|
+
|
|
9
|
+
// Keyed by the resolved method pointer (a plain number — user-mode addresses fit 2^53 and number
|
|
10
|
+
// Map keys hash faster than bigint in JSC). A COM method has exactly one signature, so the method
|
|
11
|
+
// pointer uniquely identifies it; the per-call vtable walk stays (an address can be reallocated to
|
|
12
|
+
// a different object — the method pointer cannot lie). Every UIA method returns HRESULT, so i32-only.
|
|
13
|
+
const invokers = new Map<number, ReturnType<typeof CFunction>>();
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Invoke the COM method at vtable `slot` on interface pointer `thisPtr`, returning its HRESULT.
|
|
17
|
+
* `argTypes`/`args` EXCLUDE the implicit `this` — the invoker prepends `FFIType.u64` (a spurious
|
|
18
|
+
* leading u64 segfaults multi-pointer calls). `argTypes` must match the method's real signature.
|
|
19
|
+
*/
|
|
20
|
+
export function vcall(thisPtr: bigint, slot: number, argTypes: readonly FFIType[], args: readonly unknown[]): number {
|
|
21
|
+
const vtable = read.ptr(Number(thisPtr) as Pointer, 0);
|
|
22
|
+
const method = read.ptr(Number(vtable) as Pointer, slot * 8);
|
|
23
|
+
let invoke = invokers.get(method);
|
|
24
|
+
if (invoke === undefined) {
|
|
25
|
+
invoke = CFunction({ ptr: Number(method) as Pointer, args: [FFIType.u64, ...argTypes], returns: FFIType.i32 });
|
|
26
|
+
invokers.set(method, invoke);
|
|
27
|
+
}
|
|
28
|
+
// Arity-specialized dispatch — spreading into a native CFunction costs ~16 ns/call (measured).
|
|
29
|
+
switch (args.length) {
|
|
30
|
+
case 0:
|
|
31
|
+
return Number(invoke(thisPtr));
|
|
32
|
+
case 1:
|
|
33
|
+
return Number(invoke(thisPtr, args[0]));
|
|
34
|
+
case 2:
|
|
35
|
+
return Number(invoke(thisPtr, args[0], args[1]));
|
|
36
|
+
case 3:
|
|
37
|
+
return Number(invoke(thisPtr, args[0], args[1], args[2]));
|
|
38
|
+
case 4:
|
|
39
|
+
return Number(invoke(thisPtr, args[0], args[1], args[2], args[3]));
|
|
40
|
+
default:
|
|
41
|
+
return Number(invoke(thisPtr, ...args));
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Release a COM interface (IUnknown::Release, slot 2). No-op on a null handle. */
|
|
46
|
+
export function comRelease(thisPtr: bigint): void {
|
|
47
|
+
if (thisPtr !== 0n) vcall(thisPtr, IUNKNOWN_RELEASE, [], []);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Parse a `{...}` GUID string into a 16-byte little-endian CLSID/IID buffer via CLSIDFromString. */
|
|
51
|
+
export function guid(text: string): Buffer {
|
|
52
|
+
const wide = Buffer.from(`${text}\0`, 'utf16le');
|
|
53
|
+
const out = Buffer.alloc(16);
|
|
54
|
+
const hr = Combase.CLSIDFromString(wide.ptr!, out.ptr!);
|
|
55
|
+
if (hr !== S_OK) throw new Error(`CLSIDFromString(${text}) failed: ${hresult(hr)}`);
|
|
56
|
+
return out;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Format an HRESULT as `0xXXXXXXXX`. */
|
|
60
|
+
export function hresult(hr: number): string {
|
|
61
|
+
return `0x${(hr >>> 0).toString(16).padStart(8, '0')}`;
|
|
62
|
+
}
|
package/condition.ts
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
// The typed Selector: a hybrid of server-side UIA property conditions (one cross-process round-trip
|
|
2
|
+
// filters in the target app's provider, marshaling only matches) and a client-side matcher for the
|
|
3
|
+
// predicates UIA conditions cannot express (regex, substring). Server-side conditions work because
|
|
4
|
+
// the MS x64 ABI passes a 16-byte VARIANT by hidden reference — modeled as a pointer to a VARIANT.
|
|
5
|
+
|
|
6
|
+
import { FFIType } from 'bun:ffi';
|
|
7
|
+
|
|
8
|
+
import Oleaut32 from '@bun-win32/oleaut32';
|
|
9
|
+
|
|
10
|
+
import { comRelease, vcall } from './com';
|
|
11
|
+
import { ControlType, PropertyId, S_OK, SLOT, VT_BSTR, VT_I4 } from './constants';
|
|
12
|
+
|
|
13
|
+
export interface Selector {
|
|
14
|
+
automationId?: string;
|
|
15
|
+
className?: string;
|
|
16
|
+
controlType?: ControlType | number;
|
|
17
|
+
/** Exact string (server-side) or a regular expression (client-side). */
|
|
18
|
+
name?: RegExp | string;
|
|
19
|
+
/** Substring of the name (client-side). */
|
|
20
|
+
nameContains?: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** The minimal property surface the client-side matcher reads — `Element` satisfies it. */
|
|
24
|
+
export interface ElementProperties {
|
|
25
|
+
automationId: string;
|
|
26
|
+
className: string;
|
|
27
|
+
controlType: number;
|
|
28
|
+
name: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** Render a selector as a readable string for error messages. */
|
|
32
|
+
export function selectorToString(selector: Selector): string {
|
|
33
|
+
const parts: string[] = [];
|
|
34
|
+
if (selector.controlType !== undefined) parts.push(`controlType: ${ControlType[selector.controlType] ?? selector.controlType}`);
|
|
35
|
+
if (selector.name !== undefined) parts.push(`name: ${selector.name instanceof RegExp ? selector.name.toString() : JSON.stringify(selector.name)}`);
|
|
36
|
+
if (selector.nameContains !== undefined) parts.push(`nameContains: ${JSON.stringify(selector.nameContains)}`);
|
|
37
|
+
if (selector.automationId !== undefined) parts.push(`automationId: ${JSON.stringify(selector.automationId)}`);
|
|
38
|
+
if (selector.className !== undefined) parts.push(`className: ${JSON.stringify(selector.className)}`);
|
|
39
|
+
return `{ ${parts.join(', ')} }`;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Build the actionable "no element matched … nearest were …" message (the gripe→error design). */
|
|
43
|
+
export function formatNoMatch(selector: Selector, windowName: string, candidateNames: readonly string[]): string {
|
|
44
|
+
const nearest = candidateNames.filter((candidate) => candidate.trim().length > 0).slice(0, 8);
|
|
45
|
+
const tail = nearest.length > 0 ? ` — nearest: ${nearest.map((candidate) => JSON.stringify(candidate)).join(', ')}` : '';
|
|
46
|
+
return `no element matched ${selectorToString(selector)} in "${windowName}"${tail}`;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/** Match a (already-read) element against a selector — all fields AND together. Pure logic. */
|
|
50
|
+
export function matches(element: ElementProperties, selector: Selector): boolean {
|
|
51
|
+
if (selector.controlType !== undefined && element.controlType !== selector.controlType) return false;
|
|
52
|
+
if (selector.automationId !== undefined && element.automationId !== selector.automationId) return false;
|
|
53
|
+
if (selector.className !== undefined && element.className !== selector.className) return false;
|
|
54
|
+
if (selector.name !== undefined) {
|
|
55
|
+
if (selector.name instanceof RegExp) {
|
|
56
|
+
if (!selector.name.test(element.name)) return false;
|
|
57
|
+
} else if (element.name !== selector.name) return false;
|
|
58
|
+
}
|
|
59
|
+
if (selector.nameContains !== undefined && !element.name.includes(selector.nameContains)) return false;
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function propertyConditionInt(pAutomation: bigint, propertyId: number, value: number): bigint {
|
|
64
|
+
const variant = Buffer.alloc(16);
|
|
65
|
+
variant.writeUInt16LE(VT_I4, 0);
|
|
66
|
+
variant.writeInt32LE(value, 8);
|
|
67
|
+
const out = Buffer.alloc(8);
|
|
68
|
+
if (vcall(pAutomation, SLOT.CreatePropertyCondition, [FFIType.i32, FFIType.ptr, FFIType.ptr], [propertyId, variant.ptr!, out.ptr!]) !== S_OK) return 0n;
|
|
69
|
+
return out.readBigUInt64LE(0);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function propertyConditionString(pAutomation: bigint, propertyId: number, value: string): bigint {
|
|
73
|
+
const bstr = Oleaut32.SysAllocString(Buffer.from(`${value}\0`, 'utf16le').ptr!);
|
|
74
|
+
const variant = Buffer.alloc(16);
|
|
75
|
+
variant.writeUInt16LE(VT_BSTR, 0);
|
|
76
|
+
variant.writeBigUInt64LE(BigInt(bstr), 8);
|
|
77
|
+
const out = Buffer.alloc(8);
|
|
78
|
+
const hr = vcall(pAutomation, SLOT.CreatePropertyCondition, [FFIType.i32, FFIType.ptr, FFIType.ptr], [propertyId, variant.ptr!, out.ptr!]);
|
|
79
|
+
Oleaut32.SysFreeString(bstr); // CreatePropertyCondition copies the VARIANT (SysAllocStrings its own BSTR)
|
|
80
|
+
if (hr !== S_OK) return 0n;
|
|
81
|
+
return out.readBigUInt64LE(0);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function trueCondition(pAutomation: bigint): bigint {
|
|
85
|
+
const out = Buffer.alloc(8);
|
|
86
|
+
if (vcall(pAutomation, SLOT.CreateTrueCondition, [FFIType.ptr], [out.ptr!]) !== S_OK) return 0n;
|
|
87
|
+
return out.readBigUInt64LE(0);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function andCondition(pAutomation: bigint, first: bigint, second: bigint): bigint {
|
|
91
|
+
const out = Buffer.alloc(8);
|
|
92
|
+
if (vcall(pAutomation, SLOT.CreateAndCondition, [FFIType.u64, FFIType.u64, FFIType.ptr], [first, second, out.ptr!]) !== S_OK) return 0n;
|
|
93
|
+
return out.readBigUInt64LE(0);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Compile a selector into a server-side condition (the caller must `comRelease` it) plus whether a
|
|
98
|
+
* client-side `matches` pass is still required. Exact scalars (controlType, name, automationId,
|
|
99
|
+
* className) become a server-side AND of property conditions; regex/substring fall to the client.
|
|
100
|
+
*/
|
|
101
|
+
export function compileCondition(pAutomation: bigint, selector: Selector): { condition: bigint; needsClientFilter: boolean } {
|
|
102
|
+
const parts: bigint[] = [];
|
|
103
|
+
let needsClientFilter = false;
|
|
104
|
+
if (selector.controlType !== undefined) {
|
|
105
|
+
const part = propertyConditionInt(pAutomation, PropertyId.ControlType, selector.controlType);
|
|
106
|
+
if (part !== 0n) parts.push(part);
|
|
107
|
+
}
|
|
108
|
+
if (typeof selector.name === 'string') {
|
|
109
|
+
const part = propertyConditionString(pAutomation, PropertyId.Name, selector.name);
|
|
110
|
+
if (part !== 0n) parts.push(part);
|
|
111
|
+
} else if (selector.name instanceof RegExp) {
|
|
112
|
+
needsClientFilter = true;
|
|
113
|
+
}
|
|
114
|
+
if (selector.automationId !== undefined) {
|
|
115
|
+
const part = propertyConditionString(pAutomation, PropertyId.AutomationId, selector.automationId);
|
|
116
|
+
if (part !== 0n) parts.push(part);
|
|
117
|
+
}
|
|
118
|
+
if (selector.className !== undefined) {
|
|
119
|
+
const part = propertyConditionString(pAutomation, PropertyId.ClassName, selector.className);
|
|
120
|
+
if (part !== 0n) parts.push(part);
|
|
121
|
+
}
|
|
122
|
+
if (selector.nameContains !== undefined) needsClientFilter = true;
|
|
123
|
+
if (parts.length === 0) return { condition: trueCondition(pAutomation), needsClientFilter: true };
|
|
124
|
+
let condition = parts[0]!;
|
|
125
|
+
for (let index = 1; index < parts.length; index += 1) {
|
|
126
|
+
const combined = andCondition(pAutomation, condition, parts[index]!);
|
|
127
|
+
comRelease(condition);
|
|
128
|
+
comRelease(parts[index]!);
|
|
129
|
+
condition = combined;
|
|
130
|
+
}
|
|
131
|
+
return { condition, needsClientFilter };
|
|
132
|
+
}
|