@mseep/clawdcursor 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. package/CHANGELOG.md +2264 -0
  2. package/LICENSE +21 -0
  3. package/README.md +385 -0
  4. package/SECURITY.md +44 -0
  5. package/SKILL.md +503 -0
  6. package/dist/core/agent-loop/agent.d.ts +42 -0
  7. package/dist/core/agent-loop/agent.js +1023 -0
  8. package/dist/core/agent-loop/agent.js.map +1 -0
  9. package/dist/core/agent-loop/batch-tool.d.ts +25 -0
  10. package/dist/core/agent-loop/batch-tool.js +218 -0
  11. package/dist/core/agent-loop/batch-tool.js.map +1 -0
  12. package/dist/core/agent-loop/coord-scale.d.ts +72 -0
  13. package/dist/core/agent-loop/coord-scale.js +89 -0
  14. package/dist/core/agent-loop/coord-scale.js.map +1 -0
  15. package/dist/core/agent-loop/focus-guard.d.ts +24 -0
  16. package/dist/core/agent-loop/focus-guard.js +29 -0
  17. package/dist/core/agent-loop/focus-guard.js.map +1 -0
  18. package/dist/core/agent-loop/project-mcp.d.ts +97 -0
  19. package/dist/core/agent-loop/project-mcp.js +253 -0
  20. package/dist/core/agent-loop/project-mcp.js.map +1 -0
  21. package/dist/core/agent-loop/prompt.d.ts +45 -0
  22. package/dist/core/agent-loop/prompt.js +426 -0
  23. package/dist/core/agent-loop/prompt.js.map +1 -0
  24. package/dist/core/agent-loop/tool-meta.d.ts +93 -0
  25. package/dist/core/agent-loop/tool-meta.js +651 -0
  26. package/dist/core/agent-loop/tool-meta.js.map +1 -0
  27. package/dist/core/agent-loop/tools.d.ts +38 -0
  28. package/dist/core/agent-loop/tools.js +2134 -0
  29. package/dist/core/agent-loop/tools.js.map +1 -0
  30. package/dist/core/agent-loop/types.d.ts +170 -0
  31. package/dist/core/agent-loop/types.js +12 -0
  32. package/dist/core/agent-loop/types.js.map +1 -0
  33. package/dist/core/agent.d.ts +51 -0
  34. package/dist/core/agent.js +245 -0
  35. package/dist/core/agent.js.map +1 -0
  36. package/dist/core/app-categories.d.ts +67 -0
  37. package/dist/core/app-categories.js +108 -0
  38. package/dist/core/app-categories.js.map +1 -0
  39. package/dist/core/banner.d.ts +70 -0
  40. package/dist/core/banner.js +245 -0
  41. package/dist/core/banner.js.map +1 -0
  42. package/dist/core/classify/capability.d.ts +45 -0
  43. package/dist/core/classify/capability.js +78 -0
  44. package/dist/core/classify/capability.js.map +1 -0
  45. package/dist/core/decompose/llm-decomposer.d.ts +35 -0
  46. package/dist/core/decompose/llm-decomposer.js +156 -0
  47. package/dist/core/decompose/llm-decomposer.js.map +1 -0
  48. package/dist/core/decompose/parser.d.ts +27 -0
  49. package/dist/core/decompose/parser.js +101 -0
  50. package/dist/core/decompose/parser.js.map +1 -0
  51. package/dist/core/observability/correlation.d.ts +19 -0
  52. package/dist/core/observability/correlation.js +36 -0
  53. package/dist/core/observability/correlation.js.map +1 -0
  54. package/dist/core/observability/cost-meter.d.ts +51 -0
  55. package/dist/core/observability/cost-meter.js +134 -0
  56. package/dist/core/observability/cost-meter.js.map +1 -0
  57. package/dist/core/observability/logger.d.ts +61 -0
  58. package/dist/core/observability/logger.js +550 -0
  59. package/dist/core/observability/logger.js.map +1 -0
  60. package/dist/core/router/aliases.d.ts +50 -0
  61. package/dist/core/router/aliases.js +104 -0
  62. package/dist/core/router/aliases.js.map +1 -0
  63. package/dist/core/router/normalize.d.ts +41 -0
  64. package/dist/core/router/normalize.js +80 -0
  65. package/dist/core/router/normalize.js.map +1 -0
  66. package/dist/core/safety.d.ts +126 -0
  67. package/dist/core/safety.js +568 -0
  68. package/dist/core/safety.js.map +1 -0
  69. package/dist/core/sense/a11y-resolver.d.ts +73 -0
  70. package/dist/core/sense/a11y-resolver.js +76 -0
  71. package/dist/core/sense/a11y-resolver.js.map +1 -0
  72. package/dist/core/sense/fingerprint.d.ts +41 -0
  73. package/dist/core/sense/fingerprint.js +123 -0
  74. package/dist/core/sense/fingerprint.js.map +1 -0
  75. package/dist/core/sense/rank.d.ts +70 -0
  76. package/dist/core/sense/rank.js +192 -0
  77. package/dist/core/sense/rank.js.map +1 -0
  78. package/dist/core/sense/reactive-check.d.ts +40 -0
  79. package/dist/core/sense/reactive-check.js +48 -0
  80. package/dist/core/sense/reactive-check.js.map +1 -0
  81. package/dist/core/sense/snapshot.d.ts +19 -0
  82. package/dist/core/sense/snapshot.js +100 -0
  83. package/dist/core/sense/snapshot.js.map +1 -0
  84. package/dist/core/sense/types.d.ts +66 -0
  85. package/dist/core/sense/types.js +9 -0
  86. package/dist/core/sense/types.js.map +1 -0
  87. package/dist/core/sense/ui-map-anchors.d.ts +7 -0
  88. package/dist/core/sense/ui-map-anchors.js +24 -0
  89. package/dist/core/sense/ui-map-anchors.js.map +1 -0
  90. package/dist/core/sense/ui-map-elements.d.ts +5 -0
  91. package/dist/core/sense/ui-map-elements.js +33 -0
  92. package/dist/core/sense/ui-map-elements.js.map +1 -0
  93. package/dist/core/sense/ui-map-find.d.ts +56 -0
  94. package/dist/core/sense/ui-map-find.js +153 -0
  95. package/dist/core/sense/ui-map-find.js.map +1 -0
  96. package/dist/core/sense/ui-map-fuse.d.ts +4 -0
  97. package/dist/core/sense/ui-map-fuse.js +44 -0
  98. package/dist/core/sense/ui-map-fuse.js.map +1 -0
  99. package/dist/core/sense/ui-map-geom.d.ts +3 -0
  100. package/dist/core/sense/ui-map-geom.js +16 -0
  101. package/dist/core/sense/ui-map-geom.js.map +1 -0
  102. package/dist/core/sense/ui-map-holder.d.ts +58 -0
  103. package/dist/core/sense/ui-map-holder.js +87 -0
  104. package/dist/core/sense/ui-map-holder.js.map +1 -0
  105. package/dist/core/sense/ui-map-normalize.d.ts +19 -0
  106. package/dist/core/sense/ui-map-normalize.js +65 -0
  107. package/dist/core/sense/ui-map-normalize.js.map +1 -0
  108. package/dist/core/sense/ui-map-render.d.ts +4 -0
  109. package/dist/core/sense/ui-map-render.js +34 -0
  110. package/dist/core/sense/ui-map-render.js.map +1 -0
  111. package/dist/core/sense/ui-map-resolve.d.ts +41 -0
  112. package/dist/core/sense/ui-map-resolve.js +59 -0
  113. package/dist/core/sense/ui-map-resolve.js.map +1 -0
  114. package/dist/core/sense/ui-map-types.d.ts +66 -0
  115. package/dist/core/sense/ui-map-types.js +11 -0
  116. package/dist/core/sense/ui-map-types.js.map +1 -0
  117. package/dist/core/sense/ui-map.d.ts +29 -0
  118. package/dist/core/sense/ui-map.js +113 -0
  119. package/dist/core/sense/ui-map.js.map +1 -0
  120. package/dist/core/verify/assertions.d.ts +132 -0
  121. package/dist/core/verify/assertions.js +284 -0
  122. package/dist/core/verify/assertions.js.map +1 -0
  123. package/dist/index.d.ts +21 -0
  124. package/dist/index.js +24 -0
  125. package/dist/index.js.map +1 -0
  126. package/dist/llm/browser-config.d.ts +36 -0
  127. package/dist/llm/browser-config.js +83 -0
  128. package/dist/llm/browser-config.js.map +1 -0
  129. package/dist/llm/client.d.ts +268 -0
  130. package/dist/llm/client.js +1094 -0
  131. package/dist/llm/client.js.map +1 -0
  132. package/dist/llm/config.d.ts +79 -0
  133. package/dist/llm/config.js +375 -0
  134. package/dist/llm/config.js.map +1 -0
  135. package/dist/llm/credentials.d.ts +35 -0
  136. package/dist/llm/credentials.js +491 -0
  137. package/dist/llm/credentials.js.map +1 -0
  138. package/dist/llm/external-creds.d.ts +42 -0
  139. package/dist/llm/external-creds.js +169 -0
  140. package/dist/llm/external-creds.js.map +1 -0
  141. package/dist/llm/providers.d.ts +123 -0
  142. package/dist/llm/providers.js +717 -0
  143. package/dist/llm/providers.js.map +1 -0
  144. package/dist/paths.d.ts +31 -0
  145. package/dist/paths.js +147 -0
  146. package/dist/paths.js.map +1 -0
  147. package/dist/platform/accessibility.d.ts +139 -0
  148. package/dist/platform/accessibility.js +670 -0
  149. package/dist/platform/accessibility.js.map +1 -0
  150. package/dist/platform/cdp-driver.d.ts +318 -0
  151. package/dist/platform/cdp-driver.js +1179 -0
  152. package/dist/platform/cdp-driver.js.map +1 -0
  153. package/dist/platform/index.d.ts +11 -0
  154. package/dist/platform/index.js +69 -0
  155. package/dist/platform/index.js.map +1 -0
  156. package/dist/platform/keys.d.ts +17 -0
  157. package/dist/platform/keys.js +129 -0
  158. package/dist/platform/keys.js.map +1 -0
  159. package/dist/platform/launch-poll.d.ts +101 -0
  160. package/dist/platform/launch-poll.js +177 -0
  161. package/dist/platform/launch-poll.js.map +1 -0
  162. package/dist/platform/linux.d.ts +173 -0
  163. package/dist/platform/linux.js +1253 -0
  164. package/dist/platform/linux.js.map +1 -0
  165. package/dist/platform/macos.d.ts +136 -0
  166. package/dist/platform/macos.js +976 -0
  167. package/dist/platform/macos.js.map +1 -0
  168. package/dist/platform/native-desktop.d.ts +145 -0
  169. package/dist/platform/native-desktop.js +936 -0
  170. package/dist/platform/native-desktop.js.map +1 -0
  171. package/dist/platform/native-helper.d.ts +130 -0
  172. package/dist/platform/native-helper.js +592 -0
  173. package/dist/platform/native-helper.js.map +1 -0
  174. package/dist/platform/ocr-engine.d.ts +78 -0
  175. package/dist/platform/ocr-engine.js +363 -0
  176. package/dist/platform/ocr-engine.js.map +1 -0
  177. package/dist/platform/ps-runner.d.ts +28 -0
  178. package/dist/platform/ps-runner.js +228 -0
  179. package/dist/platform/ps-runner.js.map +1 -0
  180. package/dist/platform/types.d.ts +397 -0
  181. package/dist/platform/types.js +15 -0
  182. package/dist/platform/types.js.map +1 -0
  183. package/dist/platform/uri-handler.d.ts +75 -0
  184. package/dist/platform/uri-handler.js +273 -0
  185. package/dist/platform/uri-handler.js.map +1 -0
  186. package/dist/platform/wayland-backend.d.ts +53 -0
  187. package/dist/platform/wayland-backend.js +348 -0
  188. package/dist/platform/wayland-backend.js.map +1 -0
  189. package/dist/platform/windows.d.ts +232 -0
  190. package/dist/platform/windows.js +1210 -0
  191. package/dist/platform/windows.js.map +1 -0
  192. package/dist/postbuild.d.ts +10 -0
  193. package/dist/postbuild.js +98 -0
  194. package/dist/postbuild.js.map +1 -0
  195. package/dist/schema/snapshot.d.ts +33 -0
  196. package/dist/schema/snapshot.js +90 -0
  197. package/dist/schema/snapshot.js.map +1 -0
  198. package/dist/shortcuts.d.ts +30 -0
  199. package/dist/shortcuts.js +261 -0
  200. package/dist/shortcuts.js.map +1 -0
  201. package/dist/surface/cli.d.ts +7 -0
  202. package/dist/surface/cli.js +1556 -0
  203. package/dist/surface/cli.js.map +1 -0
  204. package/dist/surface/dashboard.d.ts +8 -0
  205. package/dist/surface/dashboard.js +1193 -0
  206. package/dist/surface/dashboard.js.map +1 -0
  207. package/dist/surface/doctor.d.ts +29 -0
  208. package/dist/surface/doctor.js +1514 -0
  209. package/dist/surface/doctor.js.map +1 -0
  210. package/dist/surface/format.d.ts +10 -0
  211. package/dist/surface/format.js +37 -0
  212. package/dist/surface/format.js.map +1 -0
  213. package/dist/surface/http-utility.d.ts +65 -0
  214. package/dist/surface/http-utility.js +336 -0
  215. package/dist/surface/http-utility.js.map +1 -0
  216. package/dist/surface/mcp-server.d.ts +91 -0
  217. package/dist/surface/mcp-server.js +280 -0
  218. package/dist/surface/mcp-server.js.map +1 -0
  219. package/dist/surface/onboarding.d.ts +15 -0
  220. package/dist/surface/onboarding.js +184 -0
  221. package/dist/surface/onboarding.js.map +1 -0
  222. package/dist/surface/pidfile.d.ts +79 -0
  223. package/dist/surface/pidfile.js +263 -0
  224. package/dist/surface/pidfile.js.map +1 -0
  225. package/dist/surface/readiness.d.ts +45 -0
  226. package/dist/surface/readiness.js +230 -0
  227. package/dist/surface/readiness.js.map +1 -0
  228. package/dist/surface/report.d.ts +68 -0
  229. package/dist/surface/report.js +341 -0
  230. package/dist/surface/report.js.map +1 -0
  231. package/dist/surface/skill-register.d.ts +14 -0
  232. package/dist/surface/skill-register.js +150 -0
  233. package/dist/surface/skill-register.js.map +1 -0
  234. package/dist/surface/version.d.ts +6 -0
  235. package/dist/surface/version.js +27 -0
  236. package/dist/surface/version.js.map +1 -0
  237. package/dist/tools/a11y.d.ts +8 -0
  238. package/dist/tools/a11y.js +545 -0
  239. package/dist/tools/a11y.js.map +1 -0
  240. package/dist/tools/a11y_depth.d.ts +19 -0
  241. package/dist/tools/a11y_depth.js +455 -0
  242. package/dist/tools/a11y_depth.js.map +1 -0
  243. package/dist/tools/agent.d.ts +15 -0
  244. package/dist/tools/agent.js +248 -0
  245. package/dist/tools/agent.js.map +1 -0
  246. package/dist/tools/batch.d.ts +46 -0
  247. package/dist/tools/batch.js +230 -0
  248. package/dist/tools/batch.js.map +1 -0
  249. package/dist/tools/cdp.d.ts +8 -0
  250. package/dist/tools/cdp.js +233 -0
  251. package/dist/tools/cdp.js.map +1 -0
  252. package/dist/tools/compact.d.ts +63 -0
  253. package/dist/tools/compact.js +418 -0
  254. package/dist/tools/compact.js.map +1 -0
  255. package/dist/tools/cost-class.d.ts +38 -0
  256. package/dist/tools/cost-class.js +117 -0
  257. package/dist/tools/cost-class.js.map +1 -0
  258. package/dist/tools/desktop.d.ts +9 -0
  259. package/dist/tools/desktop.js +346 -0
  260. package/dist/tools/desktop.js.map +1 -0
  261. package/dist/tools/electron_bridge.d.ts +41 -0
  262. package/dist/tools/electron_bridge.js +261 -0
  263. package/dist/tools/electron_bridge.js.map +1 -0
  264. package/dist/tools/extras.d.ts +22 -0
  265. package/dist/tools/extras.js +942 -0
  266. package/dist/tools/extras.js.map +1 -0
  267. package/dist/tools/favorites.d.ts +13 -0
  268. package/dist/tools/favorites.js +137 -0
  269. package/dist/tools/favorites.js.map +1 -0
  270. package/dist/tools/introspection.d.ts +13 -0
  271. package/dist/tools/introspection.js +55 -0
  272. package/dist/tools/introspection.js.map +1 -0
  273. package/dist/tools/ocr.d.ts +8 -0
  274. package/dist/tools/ocr.js +66 -0
  275. package/dist/tools/ocr.js.map +1 -0
  276. package/dist/tools/orchestration.d.ts +7 -0
  277. package/dist/tools/orchestration.js +377 -0
  278. package/dist/tools/orchestration.js.map +1 -0
  279. package/dist/tools/playbooks/extract-compose.d.ts +22 -0
  280. package/dist/tools/playbooks/extract-compose.js +85 -0
  281. package/dist/tools/playbooks/extract-compose.js.map +1 -0
  282. package/dist/tools/playbooks/find-replace.d.ts +11 -0
  283. package/dist/tools/playbooks/find-replace.js +56 -0
  284. package/dist/tools/playbooks/find-replace.js.map +1 -0
  285. package/dist/tools/playbooks/index.d.ts +63 -0
  286. package/dist/tools/playbooks/index.js +70 -0
  287. package/dist/tools/playbooks/index.js.map +1 -0
  288. package/dist/tools/playbooks/keys-blocklist.d.ts +24 -0
  289. package/dist/tools/playbooks/keys-blocklist.js +89 -0
  290. package/dist/tools/playbooks/keys-blocklist.js.map +1 -0
  291. package/dist/tools/registry.d.ts +40 -0
  292. package/dist/tools/registry.js +560 -0
  293. package/dist/tools/registry.js.map +1 -0
  294. package/dist/tools/safety-gate.d.ts +16 -0
  295. package/dist/tools/safety-gate.js +70 -0
  296. package/dist/tools/safety-gate.js.map +1 -0
  297. package/dist/tools/scheduler.d.ts +76 -0
  298. package/dist/tools/scheduler.js +413 -0
  299. package/dist/tools/scheduler.js.map +1 -0
  300. package/dist/tools/shortcuts.d.ts +13 -0
  301. package/dist/tools/shortcuts.js +205 -0
  302. package/dist/tools/shortcuts.js.map +1 -0
  303. package/dist/tools/smart.d.ts +15 -0
  304. package/dist/tools/smart.js +785 -0
  305. package/dist/tools/smart.js.map +1 -0
  306. package/dist/tools/types.d.ts +174 -0
  307. package/dist/tools/types.js +67 -0
  308. package/dist/tools/types.js.map +1 -0
  309. package/dist/tools/window-text.d.ts +15 -0
  310. package/dist/tools/window-text.js +39 -0
  311. package/dist/tools/window-text.js.map +1 -0
  312. package/dist/types.d.ts +122 -0
  313. package/dist/types.js +41 -0
  314. package/dist/types.js.map +1 -0
  315. package/native/Package.swift +38 -0
  316. package/native/README.md +113 -0
  317. package/native/Sources/ClawdCursorHelper/main.swift +602 -0
  318. package/native/Sources/ClawdCursorHost/main.swift +182 -0
  319. package/native/Sources/PermissionCheck/main.swift +53 -0
  320. package/native/Sources/ScreenshotHelper/main.swift +219 -0
  321. package/native/build.sh +139 -0
  322. package/native/entitlements.plist +12 -0
  323. package/package.json +115 -0
  324. package/scripts/banner.ps1 +112 -0
  325. package/scripts/coord-accuracy.ps1 +140 -0
  326. package/scripts/coord-uwp.ps1 +80 -0
  327. package/scripts/edge-glow.ps1 +180 -0
  328. package/scripts/find-element.ps1 +198 -0
  329. package/scripts/get-foreground-window.ps1 +71 -0
  330. package/scripts/get-screen-context.ps1 +183 -0
  331. package/scripts/get-windows.ps1 +66 -0
  332. package/scripts/install-panic-hotkey.ps1 +46 -0
  333. package/scripts/interact-element.ps1 +431 -0
  334. package/scripts/invoke-element.ps1 +314 -0
  335. package/scripts/linux/atspi-bridge.py +356 -0
  336. package/scripts/linux/ocr-recognize.py +154 -0
  337. package/scripts/mac/_window-picker.jxa +163 -0
  338. package/scripts/mac/find-element.jxa +0 -0
  339. package/scripts/mac/find-element.sh +161 -0
  340. package/scripts/mac/focus-window.jxa +284 -0
  341. package/scripts/mac/get-focused-element.jxa +102 -0
  342. package/scripts/mac/get-foreground-window.jxa +173 -0
  343. package/scripts/mac/get-screen-context.jxa +197 -0
  344. package/scripts/mac/get-ui-tree.sh +141 -0
  345. package/scripts/mac/get-windows.jxa +117 -0
  346. package/scripts/mac/interact-element.sh +235 -0
  347. package/scripts/mac/invoke-element.jxa +408 -0
  348. package/scripts/mac/ocr-recognize.swift +124 -0
  349. package/scripts/ocr-recognize.ps1 +102 -0
  350. package/scripts/postinstall-native.js +48 -0
  351. package/scripts/ps-bridge.ps1 +830 -0
  352. package/scripts/smoke-mcp.ps1 +119 -0
  353. package/scripts/sync-version.ts +178 -0
  354. package/scripts/verify-install.js +81 -0
@@ -0,0 +1,830 @@
1
+ # Persistent PowerShell UIA Bridge
2
+ # Reads newline-delimited JSON commands from stdin, writes results to stdout.
3
+ # Keeps UI Automation assemblies and Win32 types loaded between calls —
4
+ # eliminates 200-500ms PowerShell startup overhead on every a11y operation.
5
+
6
+ # Force UTF-8 on stdin/stdout so non-ASCII window titles, accessibility
7
+ # names, and clipboard contents survive the round-trip to Node. Without
8
+ # this, PowerShell uses the system code page (Windows-1252 in most
9
+ # locales) while Node decodes as UTF-8 — every non-ASCII char arrives as
10
+ # `?` or `�`. Also sets $OutputEncoding so PS-side `ConvertTo-Json`
11
+ # doesn't re-encode the output through the legacy console codepath.
12
+ [Console]::OutputEncoding = [System.Text.Encoding]::UTF8
13
+ [Console]::InputEncoding = [System.Text.Encoding]::UTF8
14
+ $OutputEncoding = [System.Text.Encoding]::UTF8
15
+
16
+ try {
17
+ Add-Type -AssemblyName UIAutomationClient
18
+ Add-Type -AssemblyName UIAutomationTypes
19
+ } catch {
20
+ [Console]::Out.WriteLine((@{ error = "Assembly load failed: $($_.Exception.Message)" } | ConvertTo-Json -Compress))
21
+ [Console]::Out.Flush()
22
+ exit 1
23
+ }
24
+
25
+ try {
26
+ Add-Type @"
27
+ using System;
28
+ using System.Runtime.InteropServices;
29
+ public static class Win32UIA {
30
+ [DllImport("user32.dll")]
31
+ public static extern IntPtr GetForegroundWindow();
32
+ [DllImport("user32.dll", SetLastError = true)]
33
+ public static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint lpdwProcessId);
34
+ [DllImport("user32.dll")]
35
+ public static extern bool SetForegroundWindow(IntPtr hWnd);
36
+ [DllImport("user32.dll")]
37
+ public static extern bool BringWindowToTop(IntPtr hWnd);
38
+ [DllImport("user32.dll")]
39
+ public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
40
+ [DllImport("user32.dll")]
41
+ public static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool fAttach);
42
+ [DllImport("user32.dll")]
43
+ public static extern bool AllowSetForegroundWindow(int dwProcessId);
44
+ [DllImport("kernel32.dll")]
45
+ public static extern uint GetCurrentThreadId();
46
+ [DllImport("user32.dll")]
47
+ public static extern bool SetWindowPos(IntPtr hWnd, IntPtr hWndInsertAfter, int X, int Y, int cx, int cy, uint uFlags);
48
+ [DllImport("user32.dll")]
49
+ public static extern IntPtr WindowFromPoint(int x, int y);
50
+ [DllImport("user32.dll")]
51
+ public static extern IntPtr GetAncestor(IntPtr hWnd, uint gaFlags);
52
+ // Additional constants for force-focus path:
53
+ // HWND_TOPMOST = -1
54
+ // HWND_NOTOPMOST = -2
55
+ // SWP_NOSIZE = 0x0001
56
+ // SWP_NOMOVE = 0x0002
57
+ // SWP_SHOWWINDOW = 0x0040
58
+ // SWP_NOACTIVATE = 0x0010
59
+ // GA_ROOT = 2 (for GetAncestor)
60
+ }
61
+ "@
62
+ } catch { } # May already be defined in a long-running session
63
+
64
+ $ErrorActionPreference = 'Continue'
65
+
66
+ # Control type map
67
+ $ctMap = @{
68
+ "Button" = [System.Windows.Automation.ControlType]::Button
69
+ "CheckBox" = [System.Windows.Automation.ControlType]::CheckBox
70
+ "ComboBox" = [System.Windows.Automation.ControlType]::ComboBox
71
+ "Custom" = [System.Windows.Automation.ControlType]::Custom
72
+ "DataGrid" = [System.Windows.Automation.ControlType]::DataGrid
73
+ "DataItem" = [System.Windows.Automation.ControlType]::DataItem
74
+ "Document" = [System.Windows.Automation.ControlType]::Document
75
+ "Edit" = [System.Windows.Automation.ControlType]::Edit
76
+ "Group" = [System.Windows.Automation.ControlType]::Group
77
+ "Hyperlink" = [System.Windows.Automation.ControlType]::Hyperlink
78
+ "Image" = [System.Windows.Automation.ControlType]::Image
79
+ "List" = [System.Windows.Automation.ControlType]::List
80
+ "ListItem" = [System.Windows.Automation.ControlType]::ListItem
81
+ "Menu" = [System.Windows.Automation.ControlType]::Menu
82
+ "MenuBar" = [System.Windows.Automation.ControlType]::MenuBar
83
+ "MenuItem" = [System.Windows.Automation.ControlType]::MenuItem
84
+ "Pane" = [System.Windows.Automation.ControlType]::Pane
85
+ "RadioButton" = [System.Windows.Automation.ControlType]::RadioButton
86
+ "ScrollBar" = [System.Windows.Automation.ControlType]::ScrollBar
87
+ "Slider" = [System.Windows.Automation.ControlType]::Slider
88
+ "Spinner" = [System.Windows.Automation.ControlType]::Spinner
89
+ "SplitButton" = [System.Windows.Automation.ControlType]::SplitButton
90
+ "Tab" = [System.Windows.Automation.ControlType]::Tab
91
+ "TabItem" = [System.Windows.Automation.ControlType]::TabItem
92
+ "Text" = [System.Windows.Automation.ControlType]::Text
93
+ "ToolBar" = [System.Windows.Automation.ControlType]::ToolBar
94
+ "Tree" = [System.Windows.Automation.ControlType]::Tree
95
+ "TreeItem" = [System.Windows.Automation.ControlType]::TreeItem
96
+ "Window" = [System.Windows.Automation.ControlType]::Window
97
+ }
98
+
99
+ $interactiveTypes = @(
100
+ 'ControlType.Button', 'ControlType.Edit', 'ControlType.ComboBox',
101
+ 'ControlType.CheckBox', 'ControlType.RadioButton', 'ControlType.Hyperlink',
102
+ 'ControlType.MenuItem', 'ControlType.Menu', 'ControlType.Tab',
103
+ 'ControlType.TabItem', 'ControlType.ListItem', 'ControlType.TreeItem',
104
+ 'ControlType.Slider', 'ControlType.Document', 'ControlType.DataItem',
105
+ 'ControlType.Pane', 'ControlType.Custom', 'ControlType.ToolBar',
106
+ 'ControlType.Text', 'ControlType.Group'
107
+ )
108
+
109
+ # ── UI tree builder ───────────────────────────────────────────────────────────
110
+ function ConvertTo-UINode {
111
+ param(
112
+ [System.Windows.Automation.AutomationElement]$Element,
113
+ [int]$Depth = 0,
114
+ [int]$MaxDepth = 8
115
+ )
116
+ if ($null -eq $Element) { return $null }
117
+ try { $cur = $Element.Current } catch { return $null }
118
+
119
+ $typeName = $cur.ControlType.ProgrammaticName
120
+ $hasName = $cur.Name -and $cur.Name.Trim().Length -gt 0
121
+ $isInteractive = $interactiveTypes -contains $typeName
122
+
123
+ if (-not $isInteractive -and -not $hasName -and $Depth -gt 0) {
124
+ # Unnamed non-interactive element — only skip if it's a LEAF (no children)
125
+ # or we've hit max depth. Electron/WebView2 apps nest: Window > Pane > Pane > Pane > Button
126
+ if ($Depth -ge $MaxDepth) { return $null }
127
+ $childNodes = @()
128
+ try {
129
+ $kids = $Element.FindAll([System.Windows.Automation.TreeScope]::Children, [System.Windows.Automation.Condition]::TrueCondition)
130
+ foreach ($kid in $kids) {
131
+ $cn = ConvertTo-UINode -Element $kid -Depth ($Depth + 1) -MaxDepth $MaxDepth
132
+ if ($null -ne $cn) { $childNodes += $cn }
133
+ }
134
+ } catch {}
135
+ # Skip unnamed leaves — but recurse into unnamed containers that have children
136
+ if ($childNodes.Count -eq 0) { return $null }
137
+ return $childNodes
138
+ }
139
+
140
+ $rect = $cur.BoundingRectangle
141
+ $bounds = if ([double]::IsInfinity($rect.X) -or [double]::IsInfinity($rect.Y) -or $rect.X -lt -100 -or $rect.Y -lt -100) {
142
+ @{ x = 0; y = 0; width = 0; height = 0 }
143
+ } else {
144
+ @{ x = [Math]::Round($rect.X); y = [Math]::Round($rect.Y); width = [Math]::Round($rect.Width); height = [Math]::Round($rect.Height) }
145
+ }
146
+
147
+ # Read the field VALUE for editable controls so the value-aware fingerprint
148
+ # (which hashes element.value) actually moves when text is typed on Windows —
149
+ # the tree node carried NO value before, so the D2 fix was inert here while
150
+ # working on macOS (audit 2026-06-11, M5). Guarded: only Edit/Document/
151
+ # ComboBox controls, never password fields, capped length, never throws.
152
+ $nodeValue = $null
153
+ if ($typeName -eq 'ControlType.Edit' -or $typeName -eq 'ControlType.Document' -or $typeName -eq 'ControlType.ComboBox') {
154
+ $isPassword = $false
155
+ try { $isPassword = $cur.IsPassword } catch { }
156
+ if (-not $isPassword) {
157
+ try {
158
+ $vp = $Element.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
159
+ $v = $vp.Current.Value
160
+ if ($v -and $v.Length -gt 0) { $nodeValue = $v }
161
+ } catch { }
162
+ if ($null -eq $nodeValue) {
163
+ try {
164
+ $tp = $Element.GetCurrentPattern([System.Windows.Automation.TextPattern]::Pattern)
165
+ $t = $tp.DocumentRange.GetText(2000)
166
+ if ($t -and $t.Length -gt 0) { $nodeValue = $t }
167
+ } catch { }
168
+ }
169
+ if ($nodeValue -and $nodeValue.Length -gt 2000) { $nodeValue = $nodeValue.Substring(0, 2000) }
170
+ }
171
+ }
172
+
173
+ $node = [ordered]@{
174
+ name = if ($cur.Name) { $cur.Name } else { "" }
175
+ automationId = if ($cur.AutomationId) { $cur.AutomationId } else { "" }
176
+ controlType = $typeName
177
+ className = if ($cur.ClassName) { $cur.ClassName } else { "" }
178
+ isEnabled = $cur.IsEnabled
179
+ bounds = $bounds
180
+ value = $nodeValue
181
+ children = @()
182
+ }
183
+
184
+ if ($Depth -lt $MaxDepth) {
185
+ try {
186
+ $kids = $Element.FindAll([System.Windows.Automation.TreeScope]::Children, [System.Windows.Automation.Condition]::TrueCondition)
187
+ foreach ($kid in $kids) {
188
+ $cn = ConvertTo-UINode -Element $kid -Depth ($Depth + 1) -MaxDepth $MaxDepth
189
+ if ($null -ne $cn) {
190
+ if ($cn -is [array]) { $node.children += $cn } else { $node.children += $cn }
191
+ }
192
+ }
193
+ } catch {}
194
+ }
195
+ return $node
196
+ }
197
+
198
+ # ── Command: get-screen-context ───────────────────────────────────────────────
199
+ function Cmd-GetScreenContext {
200
+ param($cmd)
201
+ $focusedPid = if ($cmd.focusedProcessId) { [int]$cmd.focusedProcessId } else { 0 }
202
+ $maxDepth = if ($cmd.maxDepth) { [int]$cmd.maxDepth } else { 8 }
203
+
204
+ $root = [System.Windows.Automation.AutomationElement]::RootElement
205
+ $winCond = New-Object System.Windows.Automation.PropertyCondition(
206
+ [System.Windows.Automation.AutomationElement]::ControlTypeProperty,
207
+ [System.Windows.Automation.ControlType]::Window
208
+ )
209
+ $allWins = $root.FindAll([System.Windows.Automation.TreeScope]::Children, $winCond)
210
+
211
+ $windowList = @()
212
+ foreach ($win in $allWins) {
213
+ try {
214
+ $c = $win.Current
215
+ if (-not $c.Name -or $c.Name.Trim().Length -eq 0) { continue }
216
+ $pName = "unknown"
217
+ try { $pName = [System.Diagnostics.Process]::GetProcessById($c.ProcessId).ProcessName } catch {}
218
+ $rect = $c.BoundingRectangle
219
+ $bounds = if ([double]::IsInfinity($rect.X)) { @{ x=0;y=0;width=0;height=0 } }
220
+ else { @{ x=[Math]::Round($rect.X); y=[Math]::Round($rect.Y); width=[Math]::Round($rect.Width); height=[Math]::Round($rect.Height) } }
221
+ $isMin = $false
222
+ try {
223
+ $wp = $win.GetCurrentPattern([System.Windows.Automation.WindowPattern]::Pattern)
224
+ if ($wp.Current.WindowVisualState -eq [System.Windows.Automation.WindowVisualState]::Minimized) { $isMin = $true }
225
+ } catch {}
226
+ $windowList += [ordered]@{
227
+ handle = $c.NativeWindowHandle; title = $c.Name; processName = $pName
228
+ processId = $c.ProcessId; bounds = $bounds; isMinimized = $isMin
229
+ }
230
+ } catch {}
231
+ }
232
+
233
+ $uiTree = $null
234
+ if ($focusedPid -gt 0) {
235
+ $pidCond = New-Object System.Windows.Automation.PropertyCondition(
236
+ [System.Windows.Automation.AutomationElement]::ProcessIdProperty, $focusedPid
237
+ )
238
+ $targetWin = $root.FindFirst([System.Windows.Automation.TreeScope]::Children, $pidCond)
239
+ if ($null -ne $targetWin) {
240
+ $uiTree = ConvertTo-UINode -Element $targetWin -Depth 0 -MaxDepth $maxDepth
241
+ }
242
+ }
243
+
244
+ return [ordered]@{ windows = $windowList; uiTree = $uiTree }
245
+ }
246
+
247
+ # ── Command: activate-at-point ────────────────────────────────────────────────
248
+ # Before a coordinate click, ensure the window at (x, y) is the foreground.
249
+ # This prevents clicks from landing on a background window when a dialog sits
250
+ # over another window and the foreground changed between screenshot and click.
251
+ #
252
+ # Uses the same AttachThreadInput + AllowSetForegroundWindow dance as
253
+ # Cmd-FocusWindow so that the Windows foreground lock is properly overcome.
254
+ function Cmd-ActivateAtPoint {
255
+ param($cmd)
256
+ $x = [int]$cmd.x
257
+ $y = [int]$cmd.y
258
+ $hwnd = [Win32UIA]::WindowFromPoint($x, $y)
259
+ if ($hwnd -eq [IntPtr]::Zero) { return @{ success=$true; action="noop"; reason="no-window-at-point" } }
260
+ # Walk up to the root owner (GA_ROOT = 2) so child controls map to their
261
+ # top-level window before we compare / promote to foreground.
262
+ $root = [Win32UIA]::GetAncestor($hwnd, 2)
263
+ if ($root -eq [IntPtr]::Zero) { $root = $hwnd }
264
+ $fg = [Win32UIA]::GetForegroundWindow()
265
+ if ($root -eq $fg) { return @{ success=$true; action="noop"; reason="already-foreground" } }
266
+
267
+ # AttachThreadInput dance — needed to overcome Windows focus lock.
268
+ $currentThread = [Win32UIA]::GetCurrentThreadId()
269
+ $pidTmp = 0
270
+ $fgThread = 0
271
+ if ($fg -ne [IntPtr]::Zero) {
272
+ $fgThread = [Win32UIA]::GetWindowThreadProcessId($fg, [ref]$pidTmp)
273
+ }
274
+ $attached = $false
275
+ if ($fgThread -ne 0 -and $fgThread -ne $currentThread) {
276
+ try { [Win32UIA]::AttachThreadInput($currentThread, $fgThread, $true) | Out-Null; $attached = $true } catch {}
277
+ }
278
+ try {
279
+ [Win32UIA]::AllowSetForegroundWindow(-1) | Out-Null
280
+ [Win32UIA]::BringWindowToTop($root) | Out-Null
281
+ [Win32UIA]::SetForegroundWindow($root) | Out-Null
282
+ } catch {}
283
+ finally {
284
+ if ($attached) { try { [Win32UIA]::AttachThreadInput($currentThread, $fgThread, $false) | Out-Null } catch {} }
285
+ }
286
+
287
+ Start-Sleep -Milliseconds 40
288
+ $newFg = [Win32UIA]::GetForegroundWindow()
289
+ # Report the identity of the window we promoted, so the click tool can warn
290
+ # when activation FAILED (Windows foreground-lock) or when the window at the
291
+ # coords is NOT what the agent intended — a blind keystroke after a missed
292
+ # click leaked an OTP into the wrong window (session 2026-06-11).
293
+ $rootPid = 0
294
+ [void][Win32UIA]::GetWindowThreadProcessId($root, [ref]$rootPid)
295
+ $rootName = "unknown"; $rootTitle = ""
296
+ try { $rootName = [System.Diagnostics.Process]::GetProcessById($rootPid).ProcessName } catch {}
297
+ try {
298
+ $el = [System.Windows.Automation.AutomationElement]::FromHandle($root)
299
+ if ($el) { $rootTitle = $el.Current.Name }
300
+ } catch {}
301
+ return @{
302
+ success = $true
303
+ action = "activated"
304
+ activated = ($newFg -eq $root)
305
+ processId = $rootPid
306
+ processName = $rootName
307
+ title = $rootTitle
308
+ }
309
+ }
310
+
311
+ # ── Command: get-foreground-window ────────────────────────────────────────────
312
+ function Cmd-GetForegroundWindow {
313
+ $fgWin = [Win32UIA]::GetForegroundWindow()
314
+ if ($fgWin -eq [IntPtr]::Zero) { return @{ error = "No foreground window" } }
315
+ $wpid = 0
316
+ [void][Win32UIA]::GetWindowThreadProcessId($fgWin, [ref]$wpid)
317
+ $pName = "unknown"
318
+ try { $pName = [System.Diagnostics.Process]::GetProcessById($wpid).ProcessName } catch {}
319
+ $title = ""
320
+ try {
321
+ $el = [System.Windows.Automation.AutomationElement]::FromHandle($fgWin)
322
+ if ($el) { $title = $el.Current.Name }
323
+ } catch {}
324
+ return [ordered]@{ handle=[int]$fgWin; processId=$wpid; processName=$pName; title=$title; success=$true }
325
+ }
326
+
327
+ # ── Command: focus-window ─────────────────────────────────────────────────────
328
+ function Cmd-FocusWindow {
329
+ param($cmd)
330
+ $title = if ($cmd.title) { $cmd.title } else { "" }
331
+ $wpid = if ($cmd.processId) { [int]$cmd.processId } else { 0 }
332
+ $restore = if ($cmd.restore) { $true } else { $false }
333
+
334
+ $root = [System.Windows.Automation.AutomationElement]::RootElement
335
+ $winCond = New-Object System.Windows.Automation.PropertyCondition(
336
+ [System.Windows.Automation.AutomationElement]::ControlTypeProperty,
337
+ [System.Windows.Automation.ControlType]::Window
338
+ )
339
+ $allWins = $root.FindAll([System.Windows.Automation.TreeScope]::Children, $winCond)
340
+
341
+ $target = $null
342
+ # When BOTH pid and title are supplied, AND-match. Disambiguates tabbed
343
+ # apps like Win11 Notepad where multiple windows share one pid.
344
+ if ($wpid -gt 0 -and $title -ne "") {
345
+ $tl = $title.ToLower()
346
+ foreach ($w in $allWins) {
347
+ try {
348
+ if ($w.Current.ProcessId -ne $wpid) { continue }
349
+ if ($w.Current.Name -and $w.Current.Name.ToLower().Contains($tl)) { $target = $w; break }
350
+ } catch {}
351
+ }
352
+ # Fall back to pid-only if no title match (caller may have passed a stale title)
353
+ if ($null -eq $target) {
354
+ foreach ($w in $allWins) {
355
+ try { if ($w.Current.ProcessId -eq $wpid) { $target = $w; break } } catch {}
356
+ }
357
+ }
358
+ } elseif ($wpid -gt 0) {
359
+ foreach ($w in $allWins) {
360
+ try { if ($w.Current.ProcessId -eq $wpid) { $target = $w; break } } catch {}
361
+ }
362
+ } elseif ($title -ne "") {
363
+ $tl = $title.ToLower()
364
+ foreach ($w in $allWins) {
365
+ try { if ($w.Current.Name -and $w.Current.Name.ToLower().Contains($tl)) { $target = $w; break } } catch {}
366
+ }
367
+ }
368
+
369
+ if ($null -eq $target) { return @{ success=$false; error="Window not found: title='$title' pid=$wpid" } }
370
+
371
+ if ($restore) {
372
+ try {
373
+ $wp = $target.GetCurrentPattern([System.Windows.Automation.WindowPattern]::Pattern)
374
+ if ($wp.Current.WindowVisualState -eq [System.Windows.Automation.WindowVisualState]::Minimized) {
375
+ $wp.SetWindowVisualState([System.Windows.Automation.WindowVisualState]::Normal)
376
+ Start-Sleep -Milliseconds 120
377
+ }
378
+ } catch {}
379
+ }
380
+
381
+ # Force-focus path. Windows' focus-lock blocks SetForegroundWindow from
382
+ # any process that isn't the current foreground. We ALWAYS run the full
383
+ # Win32 path (AttachThreadInput + AllowSetForegroundWindow + BringWindowToTop
384
+ # + SetForegroundWindow) and ALWAYS verify by reading GetForegroundWindow
385
+ # back. UIA SetFocus alone is NOT sufficient on Windows because it only
386
+ # signals accessibility focus -- it does not change the global foreground,
387
+ # which is what subsequent SendInput keystrokes follow. This is the bug
388
+ # that made New Outlook compose-and-type fail: SetFocus reported success
389
+ # but the daemon's launching terminal kept the foreground, so mod+n and
390
+ # type_text landed on PowerShell, not Outlook.
391
+ $hwnd = [IntPtr]$target.Current.NativeWindowHandle
392
+
393
+ # Try UIA SetFocus too (cheap, helps with some custom apps); ignore result.
394
+ try { $target.SetFocus() } catch {}
395
+
396
+ # SW_RESTORE = 9. ShowWindow is a no-op when the window is already shown.
397
+ [Win32UIA]::ShowWindow($hwnd, 9) | Out-Null
398
+ Start-Sleep -Milliseconds 30
399
+
400
+ # Topmost toggle pushes the window to the top of the z-order without
401
+ # changing its always-on-top behavior afterwards.
402
+ $HWND_TOPMOST = [IntPtr]::new(-1)
403
+ $HWND_NOTOPMOST = [IntPtr]::new(-2)
404
+ $SWP_NOMOVE_SIZE = 0x0003 # NOMOVE | NOSIZE
405
+ [Win32UIA]::SetWindowPos($hwnd, $HWND_TOPMOST, 0, 0, 0, 0, $SWP_NOMOVE_SIZE) | Out-Null
406
+ Start-Sleep -Milliseconds 10
407
+ [Win32UIA]::SetWindowPos($hwnd, $HWND_NOTOPMOST, 0, 0, 0, 0, $SWP_NOMOVE_SIZE) | Out-Null
408
+
409
+ # AttachThreadInput dance.
410
+ $currentThread = [Win32UIA]::GetCurrentThreadId()
411
+ $fg = [Win32UIA]::GetForegroundWindow()
412
+ $pidTmp = 0
413
+ $fgThread = 0
414
+ if ($fg -ne [IntPtr]::Zero) {
415
+ $fgThread = [Win32UIA]::GetWindowThreadProcessId($fg, [ref]$pidTmp)
416
+ }
417
+ $attached = $false
418
+ if ($fgThread -ne 0 -and $fgThread -ne $currentThread) {
419
+ try { [Win32UIA]::AttachThreadInput($currentThread, $fgThread, $true) | Out-Null; $attached = $true } catch {}
420
+ }
421
+ try {
422
+ # Give the target's process permission to set foreground, then ask.
423
+ # ASFW_ANY = -1 (any process can SetForegroundWindow until next user input).
424
+ [Win32UIA]::AllowSetForegroundWindow(-1) | Out-Null
425
+ [Win32UIA]::BringWindowToTop($hwnd) | Out-Null
426
+ [Win32UIA]::SetForegroundWindow($hwnd) | Out-Null
427
+ } catch { }
428
+ finally {
429
+ if ($attached) { try { [Win32UIA]::AttachThreadInput($currentThread, $fgThread, $false) | Out-Null } catch {} }
430
+ }
431
+
432
+ # Verify -- the only thing that matters.
433
+ Start-Sleep -Milliseconds 60
434
+ $foreground = ([Win32UIA]::GetForegroundWindow() -eq $hwnd)
435
+
436
+ # If we still don't have foreground, try the Alt-tap synthesis trick.
437
+ # Some Windows configurations require a key event to break the lock.
438
+ if (-not $foreground) {
439
+ try {
440
+ [System.Windows.Forms.SendKeys]::SendWait('%') | Out-Null
441
+ Start-Sleep -Milliseconds 30
442
+ [Win32UIA]::AllowSetForegroundWindow(-1) | Out-Null
443
+ [Win32UIA]::SetForegroundWindow($hwnd) | Out-Null
444
+ Start-Sleep -Milliseconds 50
445
+ $foreground = ([Win32UIA]::GetForegroundWindow() -eq $hwnd)
446
+ } catch {}
447
+ }
448
+
449
+ $c = $target.Current
450
+ # success is now an honest report: the window was found AND it actually
451
+ # became the foreground window. Callers that need to trust this for
452
+ # downstream SendInput must check `foreground`.
453
+ return [ordered]@{
454
+ success = $foreground
455
+ foreground = $foreground
456
+ title = $c.Name
457
+ processId = $c.ProcessId
458
+ handle = $c.NativeWindowHandle
459
+ }
460
+ }
461
+
462
+ # ── Command: find-element (fuzzy name match) ──────────────────────────────────
463
+ function Cmd-FindElement {
464
+ param($cmd)
465
+ $name = if ($cmd.name) { $cmd.name } else { "" }
466
+ $automationId= if ($cmd.automationId){ $cmd.automationId } else { "" }
467
+ $controlType = if ($cmd.controlType) { $cmd.controlType } else { "" }
468
+ $wpid = if ($cmd.processId) { [int]$cmd.processId } else { 0 }
469
+ $maxResults = if ($cmd.maxResults) { [int]$cmd.maxResults } else { 20 }
470
+
471
+ $root = [System.Windows.Automation.AutomationElement]::RootElement
472
+ $searchRoot = $root
473
+ if ($wpid -gt 0) {
474
+ $pc = New-Object System.Windows.Automation.PropertyCondition(
475
+ [System.Windows.Automation.AutomationElement]::ProcessIdProperty, $wpid
476
+ )
477
+ $searchRoot = $root.FindFirst([System.Windows.Automation.TreeScope]::Children, $pc)
478
+ if ($null -eq $searchRoot) { return ,(New-Object System.Object[] 0) }
479
+ }
480
+
481
+ $conditions = @()
482
+ if ($automationId -ne "") {
483
+ $conditions += New-Object System.Windows.Automation.PropertyCondition(
484
+ [System.Windows.Automation.AutomationElement]::AutomationIdProperty, $automationId
485
+ )
486
+ }
487
+ if ($controlType -ne "" -and $ctMap.ContainsKey($controlType)) {
488
+ $conditions += New-Object System.Windows.Automation.PropertyCondition(
489
+ [System.Windows.Automation.AutomationElement]::ControlTypeProperty, $ctMap[$controlType]
490
+ )
491
+ }
492
+
493
+ $searchCond = if ($conditions.Count -eq 0) { [System.Windows.Automation.Condition]::TrueCondition }
494
+ elseif ($conditions.Count -eq 1) { $conditions[0] }
495
+ else { New-Object System.Windows.Automation.AndCondition([System.Windows.Automation.Condition[]]$conditions) }
496
+
497
+ $elements = $searchRoot.FindAll([System.Windows.Automation.TreeScope]::Descendants, $searchCond)
498
+ $results = @()
499
+ $nameLower = $name.ToLower()
500
+
501
+ foreach ($el in $elements) {
502
+ if ($results.Count -ge $maxResults) { break }
503
+ try {
504
+ $c = $el.Current
505
+ if ($name -ne "") {
506
+ # Fuzzy: strip keyboard shortcut suffix ("Save\tCtrl+S" → "save"), then contains-match
507
+ $elName = ($c.Name -replace '\t.*$', '').Trim().ToLower()
508
+ if (-not $elName.Contains($nameLower) -and -not $nameLower.Contains($elName)) { continue }
509
+ if ($elName.Length -eq 0) { continue }
510
+ }
511
+ $rect = $c.BoundingRectangle
512
+ $bounds = if ([double]::IsInfinity($rect.X)) { @{x=0;y=0;width=0;height=0} }
513
+ else { @{x=[int]$rect.X;y=[int]$rect.Y;width=[int]$rect.Width;height=[int]$rect.Height} }
514
+ $results += [ordered]@{
515
+ name=$c.Name; automationId=$c.AutomationId; controlType=$c.ControlType.ProgrammaticName
516
+ className=$c.ClassName; processId=$c.ProcessId; isEnabled=$c.IsEnabled; bounds=$bounds
517
+ }
518
+ } catch {}
519
+ }
520
+ return ,$results
521
+ }
522
+
523
+ # ── Resolve a matched element to the EDITABLE control it represents ───────────
524
+ # Name/label matching often lands on a static label (e.g. the Win11 Save dialog's
525
+ # "File name:" is a Text label, not the editable field) or on a ComboBox wrapping
526
+ # an Edit. Walk to the real editable target so set-value writes somewhere writable.
527
+ # App-agnostic: relies only on UIA control types and the LabeledBy relationship.
528
+ function Resolve-EditableTarget {
529
+ param($el)
530
+ if ($null -eq $el) { return $null }
531
+ $EDIT = [System.Windows.Automation.ControlType]::Edit
532
+ $DOC = [System.Windows.Automation.ControlType]::Document
533
+ $COMBO = [System.Windows.Automation.ControlType]::ComboBox
534
+ $editCond = New-Object System.Windows.Automation.PropertyCondition([System.Windows.Automation.AutomationElement]::ControlTypeProperty, $EDIT)
535
+ $editable = New-Object System.Windows.Automation.OrCondition(
536
+ $editCond,
537
+ (New-Object System.Windows.Automation.PropertyCondition([System.Windows.Automation.AutomationElement]::ControlTypeProperty, $DOC)),
538
+ (New-Object System.Windows.Automation.PropertyCondition([System.Windows.Automation.AutomationElement]::ControlTypeProperty, $COMBO)))
539
+ $ct = $el.Current.ControlType
540
+ # 1) Already editable.
541
+ if ($ct -eq $EDIT -or $ct -eq $DOC) { return $el }
542
+ if ($ct -eq $COMBO) { try { $i = $el.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $editCond); if ($i) { return $i } } catch {}; return $el }
543
+ # 2) Editable descendant (matched a group/pane wrapping the field).
544
+ try { $d = $el.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $editable); if ($d) { if ($d.Current.ControlType -eq $COMBO) { try { $i = $d.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $editCond); if ($i) { return $i } } catch {} }; return $d } } catch {}
545
+ # 3) Matched a label: find the editable control it labels among its siblings.
546
+ try {
547
+ $parent = [System.Windows.Automation.TreeWalker]::ControlViewWalker.GetParent($el)
548
+ if ($parent) {
549
+ $cands = $parent.FindAll([System.Windows.Automation.TreeScope]::Descendants, $editable)
550
+ $fallback = $null
551
+ for ($k = 0; $k -lt $cands.Count; $k++) {
552
+ $cand = $cands.Item($k)
553
+ if ($null -eq $fallback) { $fallback = $cand }
554
+ try {
555
+ $lb = $cand.GetCurrentPropertyValue([System.Windows.Automation.AutomationElement]::LabeledByProperty)
556
+ if ($lb -and $lb.Current.Name -eq $el.Current.Name) {
557
+ if ($cand.Current.ControlType -eq $COMBO) { try { $i = $cand.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $editCond); if ($i) { return $i } } catch {} }
558
+ return $cand
559
+ }
560
+ } catch {}
561
+ }
562
+ if ($fallback) {
563
+ if ($fallback.Current.ControlType -eq $COMBO) { try { $i = $fallback.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $editCond); if ($i) { return $i } } catch {} }
564
+ return $fallback
565
+ }
566
+ }
567
+ } catch {}
568
+ return $el
569
+ }
570
+
571
+ # ── Command: invoke-element (fuzzy name match) ────────────────────────────────
572
+ function Cmd-InvokeElement {
573
+ param($cmd)
574
+ $name = if ($cmd.name) { $cmd.name } else { "" }
575
+ $automationId= if ($cmd.automationId){ $cmd.automationId } else { "" }
576
+ $controlType = if ($cmd.controlType) { $cmd.controlType } else { "" }
577
+ $wpid = [int]$cmd.processId
578
+ $action = $cmd.action
579
+ $value = if ($cmd.value) { $cmd.value } else { "" }
580
+
581
+ $root = [System.Windows.Automation.AutomationElement]::RootElement
582
+ $pc = New-Object System.Windows.Automation.PropertyCondition(
583
+ [System.Windows.Automation.AutomationElement]::ProcessIdProperty, $wpid
584
+ )
585
+ $window = $root.FindFirst([System.Windows.Automation.TreeScope]::Children, $pc)
586
+ if ($null -eq $window) { return @{ success=$false; error="No window for pid $wpid" } }
587
+
588
+ # Find element: prefer automationId (exact), then fuzzy name walk
589
+ $element = $null
590
+ if ($automationId -ne "") {
591
+ $aidCond = New-Object System.Windows.Automation.PropertyCondition(
592
+ [System.Windows.Automation.AutomationElement]::AutomationIdProperty, $automationId
593
+ )
594
+ $element = $window.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $aidCond)
595
+ }
596
+
597
+ if ($null -eq $element -and $name -ne "") {
598
+ $nameLower = $name.ToLower()
599
+ $ctCond = if ($controlType -ne "" -and $ctMap.ContainsKey($controlType)) {
600
+ New-Object System.Windows.Automation.PropertyCondition(
601
+ [System.Windows.Automation.AutomationElement]::ControlTypeProperty, $ctMap[$controlType]
602
+ )
603
+ } else { [System.Windows.Automation.Condition]::TrueCondition }
604
+
605
+ $candidates = $window.FindAll([System.Windows.Automation.TreeScope]::Descendants, $ctCond)
606
+ # First pass: exact match after stripping shortcut suffix
607
+ foreach ($el in $candidates) {
608
+ try {
609
+ $elName = ($el.Current.Name -replace '\t.*$', '').Trim().ToLower()
610
+ if ($elName -eq $nameLower -and $elName.Length -gt 0) { $element = $el; break }
611
+ } catch {}
612
+ }
613
+ # Second pass: contains match
614
+ if ($null -eq $element) {
615
+ foreach ($el in $candidates) {
616
+ try {
617
+ $elName = ($el.Current.Name -replace '\t.*$', '').Trim().ToLower()
618
+ if ($elName.Length -gt 0 -and ($elName.Contains($nameLower) -or $nameLower.Contains($elName))) {
619
+ $element = $el; break
620
+ }
621
+ } catch {}
622
+ }
623
+ }
624
+ }
625
+
626
+ if ($null -eq $element) {
627
+ return @{ success=$false; error="Element not found: name='$name' id='$automationId' ct='$controlType'" }
628
+ }
629
+
630
+ switch ($action) {
631
+ "click" {
632
+ # "click" is the generic ACTIVATE intent. A named target can be a
633
+ # Button (InvokePattern), a checkbox (TogglePattern), or a ListItem /
634
+ # combo-item (SelectionItemPattern) — and a blind agent can't see
635
+ # which. Cascade through the activation patterns in ONE bridge call so
636
+ # the agent never has to retry verbs or fall back to a coord-click
637
+ # (which needs a screenshot). Live regression 2026-06-07: invoke
638
+ # "Cool blue" (a ListItem) failed here because only SelectionItem fit.
639
+ try {
640
+ $p = $element.GetCurrentPattern([System.Windows.Automation.InvokePattern]::Pattern)
641
+ $p.Invoke()
642
+ return @{ success=$true; action="click"; method="InvokePattern" }
643
+ } catch {
644
+ try {
645
+ $p = $element.GetCurrentPattern([System.Windows.Automation.TogglePattern]::Pattern)
646
+ $p.Toggle()
647
+ return @{ success=$true; action="click"; method="TogglePattern" }
648
+ } catch {
649
+ try {
650
+ $p = $element.GetCurrentPattern([System.Windows.Automation.SelectionItemPattern]::Pattern)
651
+ $p.Select()
652
+ return @{ success=$true; action="click"; method="SelectionItemPattern" }
653
+ } catch {
654
+ $rect = $element.Current.BoundingRectangle
655
+ return @{ success=$false; action="click"; error="No invoke/toggle/select pattern";
656
+ clickPoint=@{x=[int]($rect.X+$rect.Width/2);y=[int]($rect.Y+$rect.Height/2)} }
657
+ }
658
+ }
659
+ }
660
+ }
661
+ "set-value" {
662
+ if ($value -eq "") { return @{ success=$false; error="value required for set-value" } }
663
+ # App-agnostic set-value. The named element is often NOT the editable field
664
+ # itself — e.g. the Win11 Save dialog's "File name:" is a read-only Text
665
+ # label, and other fields are a ComboBox wrapping an Edit. Resolve to the
666
+ # real editable target, set it via ValuePattern, VERIFY, then fall back to
667
+ # keyboard entry. Verification catches silent no-ops (wrong-name saves).
668
+ $readVal = { param($el) try { return $el.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern).Current.Value } catch { return $null } }
669
+ $target = Resolve-EditableTarget $element
670
+
671
+ # 1) Writable ValuePattern on the resolved target, verified.
672
+ try {
673
+ $vp = $target.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
674
+ if (-not $vp.Current.IsReadOnly) {
675
+ $vp.SetValue($value)
676
+ if ((& $readVal $target) -eq $value) { return @{ success=$true; action="set-value"; value=$value; method="ValuePattern" } }
677
+ }
678
+ } catch { }
679
+
680
+ # 2) Keyboard fallback: focus the resolved target, select-all, type. Last
681
+ # resort for controls with no usable (writable) ValuePattern.
682
+ try {
683
+ Add-Type -AssemblyName System.Windows.Forms -ErrorAction SilentlyContinue
684
+ $target.SetFocus()
685
+ Start-Sleep -Milliseconds 60
686
+ $esc = [regex]::Replace($value, '([+^%~(){}\[\]])', '{$1}')
687
+ [System.Windows.Forms.SendKeys]::SendWait("^a"); Start-Sleep -Milliseconds 30
688
+ [System.Windows.Forms.SendKeys]::SendWait($esc); Start-Sleep -Milliseconds 60
689
+ $after = & $readVal $target
690
+ if ($after -eq $value -or $null -eq $after) { return @{ success=$true; action="set-value"; value=$value; method="keyboard" } }
691
+ return @{ success=$false; error="set-value did not stick (got '$after')" }
692
+ } catch {
693
+ return @{ success=$false; error="set-value failed (ValuePattern + keyboard): $($_.Exception.Message)" }
694
+ }
695
+ }
696
+ "get-value" {
697
+ # Document/RichEdit controls (Win11 Notepad, WordPad, many editors)
698
+ # expose a ValuePattern that GetCurrentPattern returns successfully
699
+ # but whose .Current.Value is ALWAYS "" — the real text lives in the
700
+ # TextPattern. The old code returned that "" because the try only
701
+ # caught a throw, not an empty value, so get_value /
702
+ # element_value_contains read blank while text was on screen →
703
+ # false DEVIATIONs that told the model to retry (duplicating writes).
704
+ # Try ValuePattern, and if it yields nothing, fall through to
705
+ # TextPattern, then Name. Prefer the first NON-EMPTY result.
706
+ $val = $null; $method = $null
707
+ try {
708
+ $vp = $element.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
709
+ $v = $vp.Current.Value
710
+ if ($null -ne $v -and $v.Length -gt 0) { $val = $v; $method = "ValuePattern" }
711
+ } catch { }
712
+ if ($null -eq $val) {
713
+ try {
714
+ $tp = $element.GetCurrentPattern([System.Windows.Automation.TextPattern]::Pattern)
715
+ $t = $tp.DocumentRange.GetText(-1)
716
+ if ($null -ne $t -and $t.Length -gt 0) { $val = $t; $method = "TextPattern" }
717
+ } catch { }
718
+ }
719
+ if ($null -eq $val) { $val = $element.Current.Name; $method = "Name" }
720
+ return @{ success=$true; action="get-value"; value=$val; method=$method }
721
+ }
722
+ "focus" {
723
+ try { $element.SetFocus(); return @{ success=$true; action="focus" } }
724
+ catch { return @{ success=$false; error="SetFocus failed: $($_.Exception.Message)" } }
725
+ }
726
+ "expand" {
727
+ try {
728
+ $p = $element.GetCurrentPattern([System.Windows.Automation.ExpandCollapsePattern]::Pattern)
729
+ $p.Expand(); return @{ success=$true; action="expand" }
730
+ } catch { return @{ success=$false; error="ExpandCollapsePattern not supported" } }
731
+ }
732
+ "collapse" {
733
+ try {
734
+ $p = $element.GetCurrentPattern([System.Windows.Automation.ExpandCollapsePattern]::Pattern)
735
+ $p.Collapse(); return @{ success=$true; action="collapse" }
736
+ } catch { return @{ success=$false; error="ExpandCollapsePattern not supported" } }
737
+ }
738
+ "toggle" {
739
+ try {
740
+ $p = $element.GetCurrentPattern([System.Windows.Automation.TogglePattern]::Pattern)
741
+ $p.Toggle()
742
+ $state = $p.Current.ToggleState.ToString()
743
+ return @{ success=$true; action="toggle"; data=@{ toggleState=$state } }
744
+ } catch { return @{ success=$false; error="TogglePattern not supported" } }
745
+ }
746
+ "select" {
747
+ try {
748
+ $p = $element.GetCurrentPattern([System.Windows.Automation.SelectionItemPattern]::Pattern)
749
+ $p.Select(); return @{ success=$true; action="select" }
750
+ } catch { return @{ success=$false; error="SelectionItemPattern not supported" } }
751
+ }
752
+ default { return @{ success=$false; error="Unknown action: $action" } }
753
+ }
754
+ }
755
+
756
+ # ── Command: get-focused-element ──────────────────────────────────────────────
757
+ function Cmd-GetFocusedElement {
758
+ try {
759
+ $focused = [System.Windows.Automation.AutomationElement]::FocusedElement
760
+ if ($null -eq $focused) { return @{ success=$false; error="No focused element" } }
761
+ $cur = $focused.Current
762
+ $rect = $cur.BoundingRectangle
763
+ $bounds = if ([double]::IsInfinity($rect.X) -or [double]::IsInfinity($rect.Y)) {
764
+ @{ x=0; y=0; width=0; height=0 }
765
+ } else {
766
+ @{ x=[Math]::Round($rect.X); y=[Math]::Round($rect.Y); width=[Math]::Round($rect.Width); height=[Math]::Round($rect.Height) }
767
+ }
768
+ $typeName = if ($cur.ControlType) { $cur.ControlType.ProgrammaticName } else { "" }
769
+ # Try to read current value if it's an editable element
770
+ $value = ""
771
+ try {
772
+ $vp = $focused.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
773
+ $value = $vp.Current.Value
774
+ } catch {
775
+ try {
776
+ $tp = $focused.GetCurrentPattern([System.Windows.Automation.TextPattern]::Pattern)
777
+ $value = $tp.DocumentRange.GetText(1000)
778
+ } catch {}
779
+ }
780
+ return [ordered]@{
781
+ success = $true
782
+ name = if ($cur.Name) { $cur.Name } else { "" }
783
+ automationId = if ($cur.AutomationId) { $cur.AutomationId } else { "" }
784
+ controlType = $typeName
785
+ className = if ($cur.ClassName) { $cur.ClassName } else { "" }
786
+ processId = $cur.ProcessId
787
+ isEnabled = $cur.IsEnabled
788
+ bounds = $bounds
789
+ value = $value
790
+ }
791
+ } catch {
792
+ return @{ success=$false; error=$_.Exception.Message }
793
+ }
794
+ }
795
+
796
+ # ── Main: signal ready, then read commands ────────────────────────────────────
797
+ [Console]::Out.WriteLine('{"ready":true}')
798
+ [Console]::Out.Flush()
799
+
800
+ while ($true) {
801
+ $line = [Console]::In.ReadLine()
802
+ if ($null -eq $line -or $line.Trim() -eq "EXIT") { break }
803
+ $line = $line.Trim()
804
+ if ($line -eq "") { continue }
805
+
806
+ try {
807
+ $cmd = $line | ConvertFrom-Json
808
+ $result = switch ($cmd.cmd) {
809
+ "get-screen-context" { Cmd-GetScreenContext $cmd }
810
+ "get-foreground-window" { Cmd-GetForegroundWindow }
811
+ "focus-window" { Cmd-FocusWindow $cmd }
812
+ "find-element" { Cmd-FindElement $cmd }
813
+ "invoke-element" { Cmd-InvokeElement $cmd }
814
+ "get-focused-element" { Cmd-GetFocusedElement }
815
+ "activate-at-point" { Cmd-ActivateAtPoint $cmd }
816
+ "ping" { @{ pong=$true } }
817
+ default { @{ error="Unknown command: $($cmd.cmd)" } }
818
+ }
819
+ # -InputObject (NOT pipe): piping an EMPTY array sends zero objects to
820
+ # ConvertTo-Json, which then writes nothing → the bridge never answers
821
+ # and PSRunner stalls for its full 20s timeout (every element MISS paid
822
+ # this; it poisoned wait_for_element / element_exists / the reactive
823
+ # settle-poll). -InputObject serializes @() as "[]" and also preserves
824
+ # single-element arrays instead of unwrapping them to a bare object.
825
+ [Console]::Out.WriteLine((ConvertTo-Json -InputObject $result -Depth 50 -Compress))
826
+ } catch {
827
+ [Console]::Out.WriteLine((ConvertTo-Json -InputObject @{ error=$_.Exception.Message } -Compress))
828
+ }
829
+ [Console]::Out.Flush()
830
+ }