@mseep/clawdcursor 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. package/CHANGELOG.md +2264 -0
  2. package/LICENSE +21 -0
  3. package/README.md +385 -0
  4. package/SECURITY.md +44 -0
  5. package/SKILL.md +503 -0
  6. package/dist/core/agent-loop/agent.d.ts +42 -0
  7. package/dist/core/agent-loop/agent.js +1023 -0
  8. package/dist/core/agent-loop/agent.js.map +1 -0
  9. package/dist/core/agent-loop/batch-tool.d.ts +25 -0
  10. package/dist/core/agent-loop/batch-tool.js +218 -0
  11. package/dist/core/agent-loop/batch-tool.js.map +1 -0
  12. package/dist/core/agent-loop/coord-scale.d.ts +72 -0
  13. package/dist/core/agent-loop/coord-scale.js +89 -0
  14. package/dist/core/agent-loop/coord-scale.js.map +1 -0
  15. package/dist/core/agent-loop/focus-guard.d.ts +24 -0
  16. package/dist/core/agent-loop/focus-guard.js +29 -0
  17. package/dist/core/agent-loop/focus-guard.js.map +1 -0
  18. package/dist/core/agent-loop/project-mcp.d.ts +97 -0
  19. package/dist/core/agent-loop/project-mcp.js +253 -0
  20. package/dist/core/agent-loop/project-mcp.js.map +1 -0
  21. package/dist/core/agent-loop/prompt.d.ts +45 -0
  22. package/dist/core/agent-loop/prompt.js +426 -0
  23. package/dist/core/agent-loop/prompt.js.map +1 -0
  24. package/dist/core/agent-loop/tool-meta.d.ts +93 -0
  25. package/dist/core/agent-loop/tool-meta.js +651 -0
  26. package/dist/core/agent-loop/tool-meta.js.map +1 -0
  27. package/dist/core/agent-loop/tools.d.ts +38 -0
  28. package/dist/core/agent-loop/tools.js +2134 -0
  29. package/dist/core/agent-loop/tools.js.map +1 -0
  30. package/dist/core/agent-loop/types.d.ts +170 -0
  31. package/dist/core/agent-loop/types.js +12 -0
  32. package/dist/core/agent-loop/types.js.map +1 -0
  33. package/dist/core/agent.d.ts +51 -0
  34. package/dist/core/agent.js +245 -0
  35. package/dist/core/agent.js.map +1 -0
  36. package/dist/core/app-categories.d.ts +67 -0
  37. package/dist/core/app-categories.js +108 -0
  38. package/dist/core/app-categories.js.map +1 -0
  39. package/dist/core/banner.d.ts +70 -0
  40. package/dist/core/banner.js +245 -0
  41. package/dist/core/banner.js.map +1 -0
  42. package/dist/core/classify/capability.d.ts +45 -0
  43. package/dist/core/classify/capability.js +78 -0
  44. package/dist/core/classify/capability.js.map +1 -0
  45. package/dist/core/decompose/llm-decomposer.d.ts +35 -0
  46. package/dist/core/decompose/llm-decomposer.js +156 -0
  47. package/dist/core/decompose/llm-decomposer.js.map +1 -0
  48. package/dist/core/decompose/parser.d.ts +27 -0
  49. package/dist/core/decompose/parser.js +101 -0
  50. package/dist/core/decompose/parser.js.map +1 -0
  51. package/dist/core/observability/correlation.d.ts +19 -0
  52. package/dist/core/observability/correlation.js +36 -0
  53. package/dist/core/observability/correlation.js.map +1 -0
  54. package/dist/core/observability/cost-meter.d.ts +51 -0
  55. package/dist/core/observability/cost-meter.js +134 -0
  56. package/dist/core/observability/cost-meter.js.map +1 -0
  57. package/dist/core/observability/logger.d.ts +61 -0
  58. package/dist/core/observability/logger.js +550 -0
  59. package/dist/core/observability/logger.js.map +1 -0
  60. package/dist/core/router/aliases.d.ts +50 -0
  61. package/dist/core/router/aliases.js +104 -0
  62. package/dist/core/router/aliases.js.map +1 -0
  63. package/dist/core/router/normalize.d.ts +41 -0
  64. package/dist/core/router/normalize.js +80 -0
  65. package/dist/core/router/normalize.js.map +1 -0
  66. package/dist/core/safety.d.ts +126 -0
  67. package/dist/core/safety.js +568 -0
  68. package/dist/core/safety.js.map +1 -0
  69. package/dist/core/sense/a11y-resolver.d.ts +73 -0
  70. package/dist/core/sense/a11y-resolver.js +76 -0
  71. package/dist/core/sense/a11y-resolver.js.map +1 -0
  72. package/dist/core/sense/fingerprint.d.ts +41 -0
  73. package/dist/core/sense/fingerprint.js +123 -0
  74. package/dist/core/sense/fingerprint.js.map +1 -0
  75. package/dist/core/sense/rank.d.ts +70 -0
  76. package/dist/core/sense/rank.js +192 -0
  77. package/dist/core/sense/rank.js.map +1 -0
  78. package/dist/core/sense/reactive-check.d.ts +40 -0
  79. package/dist/core/sense/reactive-check.js +48 -0
  80. package/dist/core/sense/reactive-check.js.map +1 -0
  81. package/dist/core/sense/snapshot.d.ts +19 -0
  82. package/dist/core/sense/snapshot.js +100 -0
  83. package/dist/core/sense/snapshot.js.map +1 -0
  84. package/dist/core/sense/types.d.ts +66 -0
  85. package/dist/core/sense/types.js +9 -0
  86. package/dist/core/sense/types.js.map +1 -0
  87. package/dist/core/sense/ui-map-anchors.d.ts +7 -0
  88. package/dist/core/sense/ui-map-anchors.js +24 -0
  89. package/dist/core/sense/ui-map-anchors.js.map +1 -0
  90. package/dist/core/sense/ui-map-elements.d.ts +5 -0
  91. package/dist/core/sense/ui-map-elements.js +33 -0
  92. package/dist/core/sense/ui-map-elements.js.map +1 -0
  93. package/dist/core/sense/ui-map-find.d.ts +56 -0
  94. package/dist/core/sense/ui-map-find.js +153 -0
  95. package/dist/core/sense/ui-map-find.js.map +1 -0
  96. package/dist/core/sense/ui-map-fuse.d.ts +4 -0
  97. package/dist/core/sense/ui-map-fuse.js +44 -0
  98. package/dist/core/sense/ui-map-fuse.js.map +1 -0
  99. package/dist/core/sense/ui-map-geom.d.ts +3 -0
  100. package/dist/core/sense/ui-map-geom.js +16 -0
  101. package/dist/core/sense/ui-map-geom.js.map +1 -0
  102. package/dist/core/sense/ui-map-holder.d.ts +58 -0
  103. package/dist/core/sense/ui-map-holder.js +87 -0
  104. package/dist/core/sense/ui-map-holder.js.map +1 -0
  105. package/dist/core/sense/ui-map-normalize.d.ts +19 -0
  106. package/dist/core/sense/ui-map-normalize.js +65 -0
  107. package/dist/core/sense/ui-map-normalize.js.map +1 -0
  108. package/dist/core/sense/ui-map-render.d.ts +4 -0
  109. package/dist/core/sense/ui-map-render.js +34 -0
  110. package/dist/core/sense/ui-map-render.js.map +1 -0
  111. package/dist/core/sense/ui-map-resolve.d.ts +41 -0
  112. package/dist/core/sense/ui-map-resolve.js +59 -0
  113. package/dist/core/sense/ui-map-resolve.js.map +1 -0
  114. package/dist/core/sense/ui-map-types.d.ts +66 -0
  115. package/dist/core/sense/ui-map-types.js +11 -0
  116. package/dist/core/sense/ui-map-types.js.map +1 -0
  117. package/dist/core/sense/ui-map.d.ts +29 -0
  118. package/dist/core/sense/ui-map.js +113 -0
  119. package/dist/core/sense/ui-map.js.map +1 -0
  120. package/dist/core/verify/assertions.d.ts +132 -0
  121. package/dist/core/verify/assertions.js +284 -0
  122. package/dist/core/verify/assertions.js.map +1 -0
  123. package/dist/index.d.ts +21 -0
  124. package/dist/index.js +24 -0
  125. package/dist/index.js.map +1 -0
  126. package/dist/llm/browser-config.d.ts +36 -0
  127. package/dist/llm/browser-config.js +83 -0
  128. package/dist/llm/browser-config.js.map +1 -0
  129. package/dist/llm/client.d.ts +268 -0
  130. package/dist/llm/client.js +1094 -0
  131. package/dist/llm/client.js.map +1 -0
  132. package/dist/llm/config.d.ts +79 -0
  133. package/dist/llm/config.js +375 -0
  134. package/dist/llm/config.js.map +1 -0
  135. package/dist/llm/credentials.d.ts +35 -0
  136. package/dist/llm/credentials.js +491 -0
  137. package/dist/llm/credentials.js.map +1 -0
  138. package/dist/llm/external-creds.d.ts +42 -0
  139. package/dist/llm/external-creds.js +169 -0
  140. package/dist/llm/external-creds.js.map +1 -0
  141. package/dist/llm/providers.d.ts +123 -0
  142. package/dist/llm/providers.js +717 -0
  143. package/dist/llm/providers.js.map +1 -0
  144. package/dist/paths.d.ts +31 -0
  145. package/dist/paths.js +147 -0
  146. package/dist/paths.js.map +1 -0
  147. package/dist/platform/accessibility.d.ts +139 -0
  148. package/dist/platform/accessibility.js +670 -0
  149. package/dist/platform/accessibility.js.map +1 -0
  150. package/dist/platform/cdp-driver.d.ts +318 -0
  151. package/dist/platform/cdp-driver.js +1179 -0
  152. package/dist/platform/cdp-driver.js.map +1 -0
  153. package/dist/platform/index.d.ts +11 -0
  154. package/dist/platform/index.js +69 -0
  155. package/dist/platform/index.js.map +1 -0
  156. package/dist/platform/keys.d.ts +17 -0
  157. package/dist/platform/keys.js +129 -0
  158. package/dist/platform/keys.js.map +1 -0
  159. package/dist/platform/launch-poll.d.ts +101 -0
  160. package/dist/platform/launch-poll.js +177 -0
  161. package/dist/platform/launch-poll.js.map +1 -0
  162. package/dist/platform/linux.d.ts +173 -0
  163. package/dist/platform/linux.js +1253 -0
  164. package/dist/platform/linux.js.map +1 -0
  165. package/dist/platform/macos.d.ts +136 -0
  166. package/dist/platform/macos.js +976 -0
  167. package/dist/platform/macos.js.map +1 -0
  168. package/dist/platform/native-desktop.d.ts +145 -0
  169. package/dist/platform/native-desktop.js +936 -0
  170. package/dist/platform/native-desktop.js.map +1 -0
  171. package/dist/platform/native-helper.d.ts +130 -0
  172. package/dist/platform/native-helper.js +592 -0
  173. package/dist/platform/native-helper.js.map +1 -0
  174. package/dist/platform/ocr-engine.d.ts +78 -0
  175. package/dist/platform/ocr-engine.js +363 -0
  176. package/dist/platform/ocr-engine.js.map +1 -0
  177. package/dist/platform/ps-runner.d.ts +28 -0
  178. package/dist/platform/ps-runner.js +228 -0
  179. package/dist/platform/ps-runner.js.map +1 -0
  180. package/dist/platform/types.d.ts +397 -0
  181. package/dist/platform/types.js +15 -0
  182. package/dist/platform/types.js.map +1 -0
  183. package/dist/platform/uri-handler.d.ts +75 -0
  184. package/dist/platform/uri-handler.js +273 -0
  185. package/dist/platform/uri-handler.js.map +1 -0
  186. package/dist/platform/wayland-backend.d.ts +53 -0
  187. package/dist/platform/wayland-backend.js +348 -0
  188. package/dist/platform/wayland-backend.js.map +1 -0
  189. package/dist/platform/windows.d.ts +232 -0
  190. package/dist/platform/windows.js +1210 -0
  191. package/dist/platform/windows.js.map +1 -0
  192. package/dist/postbuild.d.ts +10 -0
  193. package/dist/postbuild.js +98 -0
  194. package/dist/postbuild.js.map +1 -0
  195. package/dist/schema/snapshot.d.ts +33 -0
  196. package/dist/schema/snapshot.js +90 -0
  197. package/dist/schema/snapshot.js.map +1 -0
  198. package/dist/shortcuts.d.ts +30 -0
  199. package/dist/shortcuts.js +261 -0
  200. package/dist/shortcuts.js.map +1 -0
  201. package/dist/surface/cli.d.ts +7 -0
  202. package/dist/surface/cli.js +1556 -0
  203. package/dist/surface/cli.js.map +1 -0
  204. package/dist/surface/dashboard.d.ts +8 -0
  205. package/dist/surface/dashboard.js +1193 -0
  206. package/dist/surface/dashboard.js.map +1 -0
  207. package/dist/surface/doctor.d.ts +29 -0
  208. package/dist/surface/doctor.js +1514 -0
  209. package/dist/surface/doctor.js.map +1 -0
  210. package/dist/surface/format.d.ts +10 -0
  211. package/dist/surface/format.js +37 -0
  212. package/dist/surface/format.js.map +1 -0
  213. package/dist/surface/http-utility.d.ts +65 -0
  214. package/dist/surface/http-utility.js +336 -0
  215. package/dist/surface/http-utility.js.map +1 -0
  216. package/dist/surface/mcp-server.d.ts +91 -0
  217. package/dist/surface/mcp-server.js +280 -0
  218. package/dist/surface/mcp-server.js.map +1 -0
  219. package/dist/surface/onboarding.d.ts +15 -0
  220. package/dist/surface/onboarding.js +184 -0
  221. package/dist/surface/onboarding.js.map +1 -0
  222. package/dist/surface/pidfile.d.ts +79 -0
  223. package/dist/surface/pidfile.js +263 -0
  224. package/dist/surface/pidfile.js.map +1 -0
  225. package/dist/surface/readiness.d.ts +45 -0
  226. package/dist/surface/readiness.js +230 -0
  227. package/dist/surface/readiness.js.map +1 -0
  228. package/dist/surface/report.d.ts +68 -0
  229. package/dist/surface/report.js +341 -0
  230. package/dist/surface/report.js.map +1 -0
  231. package/dist/surface/skill-register.d.ts +14 -0
  232. package/dist/surface/skill-register.js +150 -0
  233. package/dist/surface/skill-register.js.map +1 -0
  234. package/dist/surface/version.d.ts +6 -0
  235. package/dist/surface/version.js +27 -0
  236. package/dist/surface/version.js.map +1 -0
  237. package/dist/tools/a11y.d.ts +8 -0
  238. package/dist/tools/a11y.js +545 -0
  239. package/dist/tools/a11y.js.map +1 -0
  240. package/dist/tools/a11y_depth.d.ts +19 -0
  241. package/dist/tools/a11y_depth.js +455 -0
  242. package/dist/tools/a11y_depth.js.map +1 -0
  243. package/dist/tools/agent.d.ts +15 -0
  244. package/dist/tools/agent.js +248 -0
  245. package/dist/tools/agent.js.map +1 -0
  246. package/dist/tools/batch.d.ts +46 -0
  247. package/dist/tools/batch.js +230 -0
  248. package/dist/tools/batch.js.map +1 -0
  249. package/dist/tools/cdp.d.ts +8 -0
  250. package/dist/tools/cdp.js +233 -0
  251. package/dist/tools/cdp.js.map +1 -0
  252. package/dist/tools/compact.d.ts +63 -0
  253. package/dist/tools/compact.js +418 -0
  254. package/dist/tools/compact.js.map +1 -0
  255. package/dist/tools/cost-class.d.ts +38 -0
  256. package/dist/tools/cost-class.js +117 -0
  257. package/dist/tools/cost-class.js.map +1 -0
  258. package/dist/tools/desktop.d.ts +9 -0
  259. package/dist/tools/desktop.js +346 -0
  260. package/dist/tools/desktop.js.map +1 -0
  261. package/dist/tools/electron_bridge.d.ts +41 -0
  262. package/dist/tools/electron_bridge.js +261 -0
  263. package/dist/tools/electron_bridge.js.map +1 -0
  264. package/dist/tools/extras.d.ts +22 -0
  265. package/dist/tools/extras.js +942 -0
  266. package/dist/tools/extras.js.map +1 -0
  267. package/dist/tools/favorites.d.ts +13 -0
  268. package/dist/tools/favorites.js +137 -0
  269. package/dist/tools/favorites.js.map +1 -0
  270. package/dist/tools/introspection.d.ts +13 -0
  271. package/dist/tools/introspection.js +55 -0
  272. package/dist/tools/introspection.js.map +1 -0
  273. package/dist/tools/ocr.d.ts +8 -0
  274. package/dist/tools/ocr.js +66 -0
  275. package/dist/tools/ocr.js.map +1 -0
  276. package/dist/tools/orchestration.d.ts +7 -0
  277. package/dist/tools/orchestration.js +377 -0
  278. package/dist/tools/orchestration.js.map +1 -0
  279. package/dist/tools/playbooks/extract-compose.d.ts +22 -0
  280. package/dist/tools/playbooks/extract-compose.js +85 -0
  281. package/dist/tools/playbooks/extract-compose.js.map +1 -0
  282. package/dist/tools/playbooks/find-replace.d.ts +11 -0
  283. package/dist/tools/playbooks/find-replace.js +56 -0
  284. package/dist/tools/playbooks/find-replace.js.map +1 -0
  285. package/dist/tools/playbooks/index.d.ts +63 -0
  286. package/dist/tools/playbooks/index.js +70 -0
  287. package/dist/tools/playbooks/index.js.map +1 -0
  288. package/dist/tools/playbooks/keys-blocklist.d.ts +24 -0
  289. package/dist/tools/playbooks/keys-blocklist.js +89 -0
  290. package/dist/tools/playbooks/keys-blocklist.js.map +1 -0
  291. package/dist/tools/registry.d.ts +40 -0
  292. package/dist/tools/registry.js +560 -0
  293. package/dist/tools/registry.js.map +1 -0
  294. package/dist/tools/safety-gate.d.ts +16 -0
  295. package/dist/tools/safety-gate.js +70 -0
  296. package/dist/tools/safety-gate.js.map +1 -0
  297. package/dist/tools/scheduler.d.ts +76 -0
  298. package/dist/tools/scheduler.js +413 -0
  299. package/dist/tools/scheduler.js.map +1 -0
  300. package/dist/tools/shortcuts.d.ts +13 -0
  301. package/dist/tools/shortcuts.js +205 -0
  302. package/dist/tools/shortcuts.js.map +1 -0
  303. package/dist/tools/smart.d.ts +15 -0
  304. package/dist/tools/smart.js +785 -0
  305. package/dist/tools/smart.js.map +1 -0
  306. package/dist/tools/types.d.ts +174 -0
  307. package/dist/tools/types.js +67 -0
  308. package/dist/tools/types.js.map +1 -0
  309. package/dist/tools/window-text.d.ts +15 -0
  310. package/dist/tools/window-text.js +39 -0
  311. package/dist/tools/window-text.js.map +1 -0
  312. package/dist/types.d.ts +122 -0
  313. package/dist/types.js +41 -0
  314. package/dist/types.js.map +1 -0
  315. package/native/Package.swift +38 -0
  316. package/native/README.md +113 -0
  317. package/native/Sources/ClawdCursorHelper/main.swift +602 -0
  318. package/native/Sources/ClawdCursorHost/main.swift +182 -0
  319. package/native/Sources/PermissionCheck/main.swift +53 -0
  320. package/native/Sources/ScreenshotHelper/main.swift +219 -0
  321. package/native/build.sh +139 -0
  322. package/native/entitlements.plist +12 -0
  323. package/package.json +115 -0
  324. package/scripts/banner.ps1 +112 -0
  325. package/scripts/coord-accuracy.ps1 +140 -0
  326. package/scripts/coord-uwp.ps1 +80 -0
  327. package/scripts/edge-glow.ps1 +180 -0
  328. package/scripts/find-element.ps1 +198 -0
  329. package/scripts/get-foreground-window.ps1 +71 -0
  330. package/scripts/get-screen-context.ps1 +183 -0
  331. package/scripts/get-windows.ps1 +66 -0
  332. package/scripts/install-panic-hotkey.ps1 +46 -0
  333. package/scripts/interact-element.ps1 +431 -0
  334. package/scripts/invoke-element.ps1 +314 -0
  335. package/scripts/linux/atspi-bridge.py +356 -0
  336. package/scripts/linux/ocr-recognize.py +154 -0
  337. package/scripts/mac/_window-picker.jxa +163 -0
  338. package/scripts/mac/find-element.jxa +0 -0
  339. package/scripts/mac/find-element.sh +161 -0
  340. package/scripts/mac/focus-window.jxa +284 -0
  341. package/scripts/mac/get-focused-element.jxa +102 -0
  342. package/scripts/mac/get-foreground-window.jxa +173 -0
  343. package/scripts/mac/get-screen-context.jxa +197 -0
  344. package/scripts/mac/get-ui-tree.sh +141 -0
  345. package/scripts/mac/get-windows.jxa +117 -0
  346. package/scripts/mac/interact-element.sh +235 -0
  347. package/scripts/mac/invoke-element.jxa +408 -0
  348. package/scripts/mac/ocr-recognize.swift +124 -0
  349. package/scripts/ocr-recognize.ps1 +102 -0
  350. package/scripts/postinstall-native.js +48 -0
  351. package/scripts/ps-bridge.ps1 +830 -0
  352. package/scripts/smoke-mcp.ps1 +119 -0
  353. package/scripts/sync-version.ts +178 -0
  354. package/scripts/verify-install.js +81 -0
@@ -0,0 +1,785 @@
1
+ "use strict";
2
+ /**
3
+ * Smart tools — high-level name-based interaction for blind agents.
4
+ *
5
+ * These tools let MCP clients interact with the desktop WITHOUT needing
6
+ * screenshots or coordinate math.
7
+ *
8
+ * Perception order: OCR first (primary), a11y tree in parallel (supplement).
9
+ * If combined OCR+a11y can't handle it → CDP fallback → fail.
10
+ * Vision is never called from here — that's the caller's last resort.
11
+ *
12
+ * Key design: NO coordinate conversion needed by the caller.
13
+ * OCR coords and a11y coords both match nut-js mouseClick coords directly.
14
+ */
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.getSmartTools = getSmartTools;
17
+ const ocr_engine_1 = require("../platform/ocr-engine");
18
+ const browser_config_1 = require("../llm/browser-config");
19
+ // Shared OCR engine singleton — avoids re-initialization per call
20
+ let sharedOcr = null;
21
+ function getOcr() {
22
+ if (!sharedOcr)
23
+ sharedOcr = new ocr_engine_1.OcrEngine();
24
+ return sharedOcr;
25
+ }
26
+ // ── Known apps with empty accessibility trees ──
27
+ // These apps expose no useful UIA nodes — skip a11y, go straight to OCR.
28
+ const EMPTY_A11Y_APPS = new Set([
29
+ 'windowsterminal', 'terminal', 'wt', 'alacritty', 'wezterm',
30
+ 'hyper', 'mintty', 'conhost',
31
+ ]);
32
+ function getSmartTools() {
33
+ return [
34
+ // ─── smart_read ──────────────────────────────────────────────────────
35
+ {
36
+ name: 'smart_read',
37
+ description: 'Read text from the screen with automatic fallback. ' +
38
+ 'OCR-first pipeline: runs OCR (primary) and a11y tree (supplement) in parallel. ' +
39
+ 'Returns structured text without needing a screenshot. Use this as your primary perception tool.',
40
+ parameters: {
41
+ scope: {
42
+ type: 'string',
43
+ description: 'Read scope: "focused" for focused element, "window" for active window, "screen" for full screen',
44
+ required: false,
45
+ enum: ['focused', 'window', 'screen'],
46
+ },
47
+ target: {
48
+ type: 'string',
49
+ description: 'Element name to read from specifically. Alias: "name".',
50
+ required: false,
51
+ },
52
+ name: {
53
+ type: 'string',
54
+ description: 'Alias for "target".',
55
+ required: false,
56
+ },
57
+ processId: {
58
+ type: 'number',
59
+ description: 'Limit to specific process',
60
+ required: false,
61
+ },
62
+ },
63
+ category: 'perception',
64
+ safetyTier: 0,
65
+ handler: async (params, ctx) => {
66
+ await ctx.ensureInitialized();
67
+ const scope = params.scope || 'window';
68
+ const target = (params.target ?? params.name);
69
+ const processId = params.processId;
70
+ // ── Focused element read (shortcut — no OCR needed) ──
71
+ if (scope === 'focused') {
72
+ try {
73
+ const el = await ctx.a11y.getFocusedElement();
74
+ if (el) {
75
+ return {
76
+ text: `[via UI Automation focused element]\n${JSON.stringify(el, null, 2)}`,
77
+ };
78
+ }
79
+ }
80
+ catch { /* fall through */ }
81
+ }
82
+ // ── Target-specific read (a11y search — precise) ──
83
+ if (target) {
84
+ try {
85
+ const elements = await ctx.a11y.findElement({ name: target, processId });
86
+ if (elements?.length) {
87
+ const lines = elements.slice(0, 10).map((el) => `[${el.controlType}] "${el.name}" id:${el.automationId} @${el.bounds.x},${el.bounds.y} ` +
88
+ `${el.bounds.width}x${el.bounds.height}` +
89
+ (el.value ? ` value="${el.value}"` : '') +
90
+ (el.isEnabled === false ? ' DISABLED' : ''));
91
+ return { text: `[via UI Automation search]\n${lines.join('\n')}` };
92
+ }
93
+ }
94
+ catch { /* fall through */ }
95
+ }
96
+ // ── OCR + a11y in parallel (OCR is primary, a11y supplements) ──
97
+ const activeWin = await ctx.a11y.getActiveWindow().catch(() => null);
98
+ const appName = activeWin?.processName?.toLowerCase() || '';
99
+ const hasA11y = !EMPTY_A11Y_APPS.has(appName);
100
+ // Launch both in parallel
101
+ const ocrPromise = (async () => {
102
+ try {
103
+ const engine = getOcr();
104
+ if (!engine.isAvailable())
105
+ return null;
106
+ const result = await engine.recognizeScreen();
107
+ if (result.elements.length === 0)
108
+ return null;
109
+ // Group by line for readability
110
+ const lines = new Map();
111
+ for (const el of result.elements) {
112
+ const lineEls = lines.get(el.line) ?? [];
113
+ lineEls.push(el);
114
+ lines.set(el.line, lineEls);
115
+ }
116
+ const ocrLines = [];
117
+ for (const [, lineEls] of [...lines.entries()].sort((a, b) => a[0] - b[0])) {
118
+ ocrLines.push(lineEls.sort((a, b) => a.x - b.x).map(el => el.text).join(' '));
119
+ }
120
+ return { text: ocrLines.join('\n'), count: result.elements.length, ms: result.durationMs };
121
+ }
122
+ catch {
123
+ return null;
124
+ }
125
+ })();
126
+ const a11yPromise = (async () => {
127
+ if (!hasA11y || scope === 'screen')
128
+ return null;
129
+ try {
130
+ const active = processId ?? activeWin?.processId;
131
+ const context = await ctx.a11y.getScreenContext(active);
132
+ if (context && context.length > 50)
133
+ return context;
134
+ }
135
+ catch { /* */ }
136
+ return null;
137
+ })();
138
+ const [ocrResult, a11yResult] = await Promise.all([ocrPromise, a11yPromise]);
139
+ // OCR succeeded — return OCR text, attach a11y tree if available
140
+ if (ocrResult) {
141
+ const a11ySuffix = a11yResult
142
+ ? `\n\n=== A11Y TREE (supplement) ===\n${a11yResult}`
143
+ : '';
144
+ return {
145
+ text: `[via OCR — ${ocrResult.count} lines, ${ocrResult.ms}ms]\n${ocrResult.text}${a11ySuffix}`,
146
+ };
147
+ }
148
+ // OCR failed but a11y succeeded — return a11y alone
149
+ if (a11yResult) {
150
+ return { text: `[via UI Automation active window]\n${a11yResult}` };
151
+ }
152
+ // ── CDP fallback (browser content) ──
153
+ try {
154
+ if (await ctx.cdp.isConnected()) {
155
+ const page = ctx.cdp.getPage();
156
+ if (page) {
157
+ const title = await page.title().catch(() => '');
158
+ const text = await page.evaluate(() => document.body?.innerText?.substring(0, 5000) || '').catch(() => '');
159
+ if (text) {
160
+ return { text: `[via CDP — "${title}"]\n${text}` };
161
+ }
162
+ }
163
+ }
164
+ }
165
+ catch { /* fall through */ }
166
+ return { text: '(could not read screen via any method)', isError: true };
167
+ },
168
+ },
169
+ // ─── smart_click ─────────────────────────────────────────────────────
170
+ {
171
+ name: 'smart_click',
172
+ description: 'Click a UI element by name with automatic fallback. ' +
173
+ 'OCR-first: scans screen text and clicks by coordinates. ' +
174
+ 'Also tries a11y invoke (in parallel) and CDP as fallbacks. ' +
175
+ 'No screenshot or coordinate math needed — just provide the element text.',
176
+ parameters: {
177
+ target: {
178
+ type: 'string',
179
+ description: 'Element name/text to click (e.g., "Send", "Submit", "New Email"). Alias: "name".',
180
+ required: false,
181
+ },
182
+ name: {
183
+ type: 'string',
184
+ description: 'Alias for "target" — the rest of the accessibility surface uses "name".',
185
+ required: false,
186
+ },
187
+ processId: {
188
+ type: 'number',
189
+ description: 'Limit search to a specific process',
190
+ required: false,
191
+ },
192
+ timeout: {
193
+ type: 'number',
194
+ description: 'Max time in ms (default 5000)',
195
+ required: false,
196
+ },
197
+ },
198
+ category: 'orchestration',
199
+ safetyTier: 1,
200
+ handler: async (params, ctx) => {
201
+ await ctx.ensureInitialized();
202
+ const target = (params.target ?? params.name);
203
+ if (!target || typeof target !== 'string' || target.trim() === '') {
204
+ return { text: 'smart_click: "target" (alias "name") is required — the element text to click, e.g. "Send".', isError: true };
205
+ }
206
+ const processId = params.processId;
207
+ const timeoutMs = params.timeout || 10000; // default 10s, was 5s
208
+ const attempted = [];
209
+ // Deadline-aware budget. Each fallback checks `remaining()` BEFORE
210
+ // starting an expensive operation. If <500ms remain we skip and
211
+ // record a `deadline_exceeded_before_*` entry so the outer wall-clock
212
+ // timeout never fires mid-fallback and discards diagnostic state.
213
+ // See issue #101: bare `Promise.race` swallowed the inner work's
214
+ // candidates[] / attempted[] arrays whenever it lost the race.
215
+ const startedAt = Date.now();
216
+ const deadline = startedAt + timeoutMs;
217
+ const remaining = () => Math.max(0, deadline - Date.now());
218
+ const MIN_BUDGET_MS = 500;
219
+ const candidates = [];
220
+ const buildFailure = (error, reason) => {
221
+ const payload = {
222
+ error,
223
+ reason,
224
+ target,
225
+ candidates,
226
+ tried: attempted,
227
+ elapsedMs: Date.now() - startedAt,
228
+ };
229
+ return { text: JSON.stringify(payload), isError: true };
230
+ };
231
+ // Detect active window and check traits
232
+ const activeWin = await ctx.a11y.getActiveWindow().catch(() => null);
233
+ const appName = activeWin?.processName?.toLowerCase() || '';
234
+ const isBrowser = (0, browser_config_1.getBrowserProcessNames)().includes(appName);
235
+ const emptyA11y = EMPTY_A11Y_APPS.has(appName);
236
+ const ocrPromise = (async () => {
237
+ try {
238
+ const engine = getOcr();
239
+ if (!engine.isAvailable())
240
+ return null;
241
+ const result = await engine.recognizeScreen();
242
+ // Pick best OCR candidate from a (possibly filtered) subset.
243
+ // Builds n-grams from adjacent same-line tokens so a multi-word
244
+ // target like "begin exam" matches the actual button (whose OCR
245
+ // output is two adjacent tokens "begin" + "exam") rather than the
246
+ // first stray occurrence of one of those words in unrelated
247
+ // instructional text. Returns null if no candidate clears 0.5.
248
+ const norm = (s) => s.toLowerCase().replace(/[^\w\s]/g, ' ').replace(/\s+/g, ' ').trim();
249
+ const targetNorm = norm(target);
250
+ const targetWords = targetNorm.split(' ').filter(Boolean);
251
+ const targetWordSet = new Set(targetWords);
252
+ const pickBest = (cands) => {
253
+ // Group by OCR line index; sort each line left-to-right
254
+ const lineMap = new Map();
255
+ for (const el of cands) {
256
+ if (!el.text)
257
+ continue;
258
+ const arr = lineMap.get(el.line) ?? [];
259
+ arr.push(el);
260
+ lineMap.set(el.line, arr);
261
+ }
262
+ let bestMatch = null;
263
+ let bestScore = 0;
264
+ const scorePhrase = (phrase, tokenCount) => {
265
+ if (!phrase)
266
+ return 0;
267
+ if (phrase === targetNorm)
268
+ return 1.0;
269
+ // A single-character token can never claim a multi-char target.
270
+ // OCR routinely emits stray 1-char fragments ("O", "x", "|"),
271
+ // and the substring rule below scores "O" vs "OK" at
272
+ // min(1,2)/max(1,2)*0.9 = 0.45 — above the 0.4 floor. Live
273
+ // regression 2026-06-06: smart_click("OK") on a discard-draft
274
+ // dialog clicked a stray "O" near the taskbar instead.
275
+ if (phrase.length === 1 && targetNorm.length > 1)
276
+ return 0;
277
+ let raw;
278
+ if (phrase.includes(targetNorm) || targetNorm.includes(phrase)) {
279
+ raw = Math.min(phrase.length, targetNorm.length) / Math.max(phrase.length, targetNorm.length) * 0.9;
280
+ }
281
+ else {
282
+ // Token-overlap fallback (handles transposed / partial matches)
283
+ const phraseWords = phrase.split(' ').filter(Boolean);
284
+ if (!phraseWords.length)
285
+ return 0;
286
+ const overlap = phraseWords.filter(w => targetWordSet.has(w)).length;
287
+ const cov = overlap / targetWords.length;
288
+ if (cov >= 1)
289
+ raw = 0.85;
290
+ else if (cov >= 0.5)
291
+ raw = 0.5 * cov;
292
+ else
293
+ return 0;
294
+ }
295
+ // Penalize a SINGLE token claiming to match a MULTI-WORD target
296
+ // (the exact failure mode that mis-clicked "begin" in instruction
297
+ // text instead of the actual "begin exam" button).
298
+ if (targetWords.length > 1 && tokenCount === 1 && raw < 0.95)
299
+ raw *= 0.55;
300
+ return raw;
301
+ };
302
+ const recordCandidate = (span, score) => {
303
+ if (score <= bestScore)
304
+ return;
305
+ bestScore = score;
306
+ // Synthesize a virtual element covering the full n-gram bounds
307
+ // so the click lands at its visual centroid, not the first token.
308
+ const minX = Math.min(...span.map(t => t.x));
309
+ const minY = Math.min(...span.map(t => t.y));
310
+ const maxX = Math.max(...span.map(t => t.x + t.width));
311
+ const maxY = Math.max(...span.map(t => t.y + t.height));
312
+ const head = span[0];
313
+ bestMatch = {
314
+ ...head,
315
+ text: span.map(t => t.text).join(' '),
316
+ x: minX,
317
+ y: minY,
318
+ width: maxX - minX,
319
+ height: maxY - minY,
320
+ confidence: span.reduce((a, t) => a + (t.confidence ?? 0), 0) / span.length,
321
+ };
322
+ };
323
+ const MAX_N = Math.min(8, targetWords.length + 2);
324
+ for (const lineToks of lineMap.values()) {
325
+ const sorted = [...lineToks].sort((a, b) => a.x - b.x);
326
+ for (let i = 0; i < sorted.length; i++) {
327
+ for (let n = 1; n <= MAX_N && i + n <= sorted.length; n++) {
328
+ const span = sorted.slice(i, i + n);
329
+ // Reject spans with a big horizontal gap (different visual chunks)
330
+ let contiguous = true;
331
+ for (let k = 1; k < span.length; k++) {
332
+ const gap = span[k].x - (span[k - 1].x + span[k - 1].width);
333
+ if (gap > Math.max(span[k - 1].height * 1.5, 30)) {
334
+ contiguous = false;
335
+ break;
336
+ }
337
+ }
338
+ if (!contiguous)
339
+ continue;
340
+ const phrase = norm(span.map(t => t.text).join(' '));
341
+ const score = scorePhrase(phrase, span.length);
342
+ if (score > bestScore)
343
+ recordCandidate(span, score);
344
+ }
345
+ }
346
+ }
347
+ return bestMatch && bestScore >= 0.4 ? { match: bestMatch, score: bestScore } : null;
348
+ };
349
+ // Prefer matches inside the focused window's bounds — full-screen OCR can
350
+ // see text in background windows (e.g. Outlook visible behind a "Pick an
351
+ // account" dialog showing the same email). Only widen to the full screen
352
+ // when the foreground window has no match. See issue #71.
353
+ const winBounds = activeWin?.bounds;
354
+ let pick = null;
355
+ let warning;
356
+ if (winBounds && winBounds.width > 0 && winBounds.height > 0) {
357
+ const inWindow = result.elements.filter(el => {
358
+ const cx = el.x + el.width / 2;
359
+ const cy = el.y + el.height / 2;
360
+ return cx >= winBounds.x && cx < winBounds.x + winBounds.width &&
361
+ cy >= winBounds.y && cy < winBounds.y + winBounds.height;
362
+ });
363
+ pick = pickBest(inWindow);
364
+ }
365
+ // Fall through to full-screen only if foreground produced nothing.
366
+ // Annotate the response so the caller has a signal that the click may
367
+ // have landed in a background window.
368
+ if (!pick) {
369
+ pick = pickBest(result.elements);
370
+ if (pick && winBounds && winBounds.width > 0) {
371
+ warning = 'matched outside focused window';
372
+ }
373
+ }
374
+ if (!pick)
375
+ return null;
376
+ const m = pick.match;
377
+ return {
378
+ x: m.x + Math.round(m.width / 2),
379
+ y: m.y + Math.round(m.height / 2),
380
+ text: m.text,
381
+ warning,
382
+ bounds: { x: m.x, y: m.y, w: m.width, h: m.height },
383
+ confidence: typeof m.confidence === 'number' ? m.confidence : null,
384
+ };
385
+ }
386
+ catch {
387
+ return null;
388
+ }
389
+ })();
390
+ const a11yPromise = (async () => {
391
+ if (emptyA11y)
392
+ return null;
393
+ try {
394
+ const result = await ctx.a11y.invokeElement({
395
+ name: target,
396
+ processId: processId || activeWin?.processId,
397
+ action: 'click',
398
+ });
399
+ if (result.success)
400
+ return { method: 'invoke' };
401
+ if (result.clickPoint)
402
+ return { method: 'bounds', clickPoint: result.clickPoint };
403
+ return null;
404
+ }
405
+ catch {
406
+ // Invocation threw (e.g. RPC error on UWP apps) — try to get bounds for coordinate fallback
407
+ try {
408
+ const elements = await ctx.a11y.findElement({
409
+ name: target,
410
+ processId: processId || activeWin?.processId,
411
+ });
412
+ if (elements?.length) {
413
+ const el = elements[0];
414
+ if (el.bounds?.width > 0) {
415
+ const cx = el.bounds.x + Math.floor(el.bounds.width / 2);
416
+ const cy = el.bounds.y + Math.floor(el.bounds.height / 2);
417
+ return {
418
+ method: 'bounds',
419
+ clickPoint: { x: cx, y: cy },
420
+ bounds: { x: el.bounds.x, y: el.bounds.y, w: el.bounds.width, h: el.bounds.height },
421
+ text: el.name,
422
+ };
423
+ }
424
+ }
425
+ }
426
+ catch { /* fall through */ }
427
+ return null;
428
+ }
429
+ })();
430
+ // Wait for OCR + a11y in parallel, but bounded by the overall deadline.
431
+ // If both subsystems hang we abandon them at the deadline rather than
432
+ // letting an outer race fire — but we keep the diagnostic state we've
433
+ // already collected.
434
+ const parallelResult = {
435
+ ocr: null,
436
+ a11y: null,
437
+ timedOut: false,
438
+ };
439
+ await new Promise((resolve) => {
440
+ let settled = 0;
441
+ const finish = () => { if (++settled >= 2)
442
+ resolve(); };
443
+ const timer = setTimeout(() => {
444
+ parallelResult.timedOut = true;
445
+ resolve();
446
+ }, remaining());
447
+ ocrPromise.then(r => { parallelResult.ocr = r; finish(); }, () => finish());
448
+ a11yPromise.then(r => { parallelResult.a11y = r; finish(); }, () => finish());
449
+ // Cancel the deadline timer once both settle so we don't keep the event loop alive
450
+ Promise.allSettled([ocrPromise, a11yPromise]).then(() => clearTimeout(timer));
451
+ });
452
+ const ocrMatch = parallelResult.ocr;
453
+ const a11yResult = parallelResult.a11y;
454
+ const parallelTimedOut = parallelResult.timedOut;
455
+ // a11y invoke succeeded — best outcome (OS-level click, most reliable)
456
+ if (a11yResult && a11yResult.method === 'invoke') {
457
+ ctx.a11y.invalidateCache();
458
+ return { text: `Clicked "${target}" via UI Automation (invoke_element)` };
459
+ }
460
+ // OCR found the element — coordinate click. OCR returns PHYSICAL
461
+ // pixels (it's running against `screen.grab()` output). On Windows
462
+ // with DPI scaling > 100%, nut-js mouseClick expects LOGICAL pixels,
463
+ // so a physical (1800, 900) on a 2x display would land at logical
464
+ // (1800, 900) — which is far past the actual element. Apply the
465
+ // physical→logical DPI correction. On 100% DPI dpiRatio === 1 and
466
+ // this is a no-op, so the fix is safe across every Windows config
467
+ // and on macOS / Linux (where dpiRatio always returns 1).
468
+ if (ocrMatch) {
469
+ const m = ocrMatch;
470
+ // Record the OCR hit as a candidate even on the happy path —
471
+ // if mouseClick throws below, the failure payload still shows
472
+ // "OCR did find the text, the click itself failed".
473
+ candidates.push({
474
+ source: 'ocr',
475
+ text: m.text,
476
+ bounds: m.bounds,
477
+ confidence: m.confidence,
478
+ });
479
+ const dpi = ctx.desktop.getDpiRatio?.() || 1;
480
+ const cx = Math.round(m.x / dpi);
481
+ const cy = Math.round(m.y / dpi);
482
+ try {
483
+ await ctx.desktop.mouseClick(cx, cy);
484
+ }
485
+ catch (err) {
486
+ attempted.push(`ocr_match_click_threw: ${err?.message?.substring(0, 80) || 'unknown'}`);
487
+ return buildFailure('no_clickable_target', 'ocr_match_click_threw');
488
+ }
489
+ ctx.a11y.invalidateCache();
490
+ const warningSuffix = m.warning ? ` [WARNING: ${m.warning} — verify with read_screen]` : '';
491
+ return { text: `Clicked "${target}" via OCR (matched "${m.text}" at ${cx},${cy}${dpi > 1 ? ` — DPI-corrected from physical ${m.x},${m.y}` : ''})${warningSuffix}` };
492
+ }
493
+ // a11y had bounds but couldn't invoke — coordinate fallback
494
+ if (a11yResult && a11yResult.method === 'bounds') {
495
+ const a = a11yResult;
496
+ if (a.bounds) {
497
+ candidates.push({
498
+ source: 'a11y',
499
+ text: a.text ?? target,
500
+ bounds: a.bounds,
501
+ confidence: null,
502
+ });
503
+ }
504
+ try {
505
+ await ctx.desktop.mouseClick(a.clickPoint.x, a.clickPoint.y);
506
+ }
507
+ catch (err) {
508
+ attempted.push(`a11y_bounds_click_threw: ${err?.message?.substring(0, 80) || 'unknown'}`);
509
+ return buildFailure('no_clickable_target', 'a11y_bounds_click_threw');
510
+ }
511
+ ctx.a11y.invalidateCache();
512
+ return { text: `Clicked "${target}" via a11y bounds (coordinate fallback at ${a.clickPoint.x},${a.clickPoint.y})` };
513
+ }
514
+ // Track what was attempted for diagnostics
515
+ if (parallelTimedOut) {
516
+ attempted.push('deadline_exceeded_during_ocr_a11y_parallel');
517
+ }
518
+ if (emptyA11y) {
519
+ attempted.push(`UIA(skipped): app "${appName}" has known traits: emptyAxTree`);
520
+ }
521
+ else {
522
+ attempted.push('UIA(invoke): element not found or not invocable');
523
+ }
524
+ attempted.push(ocrMatch === null ? 'ocr: no text match found' : 'ocr: unavailable');
525
+ // ── Step 2: CDP click (browser content) ──
526
+ // Deadline-gated so the outer wall-clock can't fire mid-evaluate.
527
+ const cdpBudget = remaining();
528
+ const cdpEligible = isBrowser || (cdpBudget >= MIN_BUDGET_MS &&
529
+ await ctx.cdp.isConnected().catch(() => false));
530
+ if (cdpBudget < MIN_BUDGET_MS) {
531
+ attempted.push('deadline_exceeded_before_cdp');
532
+ }
533
+ else if (cdpEligible) {
534
+ try {
535
+ const connected = await ctx.cdp.isConnected();
536
+ if (connected) {
537
+ const page = ctx.cdp.getPage();
538
+ if (page) {
539
+ const clicked = await page.evaluate((text) => {
540
+ const selectors = 'button, a, [role="button"], [role="link"], [role="menuitem"], input[type="submit"], input[type="button"], [onclick]';
541
+ const elements = document.querySelectorAll(selectors);
542
+ for (const el of elements) {
543
+ const htmlEl = el;
544
+ const elText = htmlEl.textContent?.trim() || htmlEl.getAttribute('aria-label') || htmlEl.getAttribute('title') || '';
545
+ if (elText.toLowerCase().includes(text.toLowerCase())) {
546
+ htmlEl.click();
547
+ return true;
548
+ }
549
+ }
550
+ return false;
551
+ }, target).catch(() => false);
552
+ if (clicked) {
553
+ ctx.a11y.invalidateCache();
554
+ return { text: `Clicked "${target}" via CDP (JS click)` };
555
+ }
556
+ attempted.push('CDP: no text match found');
557
+ }
558
+ }
559
+ else {
560
+ attempted.push('CDP: not connected');
561
+ }
562
+ }
563
+ catch (err) {
564
+ attempted.push(`CDP: ${err.message?.substring(0, 80)}`);
565
+ }
566
+ }
567
+ else {
568
+ attempted.push(`CDP(skipped): foreground app "${appName}" is not a browser`);
569
+ }
570
+ // All methods failed. Choose the error code based on whether the
571
+ // deadline ate any of our subsystems — if so the caller should retry
572
+ // with a longer timeout; if not, the target is genuinely unfindable.
573
+ const deadlineHit = parallelTimedOut || remaining() < MIN_BUDGET_MS;
574
+ if (deadlineHit) {
575
+ return buildFailure('deadline_exceeded', 'deadline_exceeded');
576
+ }
577
+ return buildFailure('no_clickable_target', 'all_fallbacks_failed');
578
+ },
579
+ },
580
+ // ─── smart_type ──────────────────────────────────────────────────────
581
+ {
582
+ name: 'smart_type',
583
+ description: 'Type text into a UI element. If target is specified, finds and focuses the element first. ' +
584
+ 'Uses clipboard paste for reliability (no dropped characters).',
585
+ parameters: {
586
+ text: {
587
+ type: 'string',
588
+ description: 'The text to type',
589
+ required: true,
590
+ },
591
+ target: {
592
+ type: 'string',
593
+ description: 'Element name to focus before typing (optional — types into currently focused element if omitted). Alias: "name".',
594
+ required: false,
595
+ },
596
+ name: {
597
+ type: 'string',
598
+ description: 'Alias for "target".',
599
+ required: false,
600
+ },
601
+ processId: {
602
+ type: 'number',
603
+ description: 'Limit search to a specific process',
604
+ required: false,
605
+ },
606
+ },
607
+ category: 'keyboard',
608
+ safetyTier: 1,
609
+ handler: async (params, ctx) => {
610
+ await ctx.ensureInitialized();
611
+ const text = params.text;
612
+ const target = (params.target ?? params.name);
613
+ const processId = params.processId;
614
+ // If target specified, find and focus it first
615
+ if (target) {
616
+ let focused = false;
617
+ // Try UIA focus
618
+ try {
619
+ const activeWin = await ctx.a11y.getActiveWindow().catch(() => null);
620
+ const appName = activeWin?.processName?.toLowerCase() || '';
621
+ if (!EMPTY_A11Y_APPS.has(appName)) {
622
+ const result = await ctx.a11y.invokeElement({
623
+ name: target,
624
+ processId: processId || activeWin?.processId,
625
+ action: 'focus',
626
+ });
627
+ if (result.success) {
628
+ focused = true;
629
+ }
630
+ else if (result.clickPoint) {
631
+ // Focus failed but we have bounds — click to focus
632
+ // a11y coords match nut-js mouse coords directly
633
+ await ctx.desktop.mouseClick(result.clickPoint.x, result.clickPoint.y);
634
+ await new Promise(r => setTimeout(r, 100));
635
+ focused = true;
636
+ }
637
+ }
638
+ }
639
+ catch { /* fall through */ }
640
+ // Try CDP focus (browser)
641
+ if (!focused) {
642
+ try {
643
+ if (await ctx.cdp.isConnected()) {
644
+ const page = ctx.cdp.getPage();
645
+ if (page) {
646
+ const found = await page.evaluate((label) => {
647
+ const inputs = document.querySelectorAll('input, textarea, [contenteditable]');
648
+ for (const el of inputs) {
649
+ const htmlEl = el;
650
+ const ariaLabel = htmlEl.getAttribute('aria-label') || '';
651
+ const placeholder = htmlEl.getAttribute('placeholder') || '';
652
+ const name = htmlEl.getAttribute('name') || '';
653
+ if ([ariaLabel, placeholder, name].some(a => a.toLowerCase().includes(label.toLowerCase()))) {
654
+ htmlEl.focus();
655
+ return true;
656
+ }
657
+ }
658
+ return false;
659
+ }, target).catch(() => false);
660
+ if (found)
661
+ focused = true;
662
+ }
663
+ }
664
+ }
665
+ catch { /* fall through */ }
666
+ }
667
+ if (!focused) {
668
+ return { text: `Could not find element "${target}" to focus before typing`, isError: true };
669
+ }
670
+ }
671
+ // Type via clipboard paste. `mod+v` resolves to Cmd+V on macOS and
672
+ // Ctrl+V everywhere else — the platform-portable key combo handles
673
+ // the OS difference without raw `process.platform` branching here.
674
+ await ctx.a11y.writeClipboard(text);
675
+ await new Promise(r => setTimeout(r, 50));
676
+ await ctx.desktop.keyPress('mod+v');
677
+ await new Promise(r => setTimeout(r, 100));
678
+ ctx.a11y.invalidateCache();
679
+ const active = await ctx.a11y.getActiveWindow().catch(() => null);
680
+ const activeInfo = active ? `[${active.processName}] "${active.title}"` : '(unknown)';
681
+ return { text: `Typed ${text.length} chars${target ? ` into "${target}"` : ''} in ${activeInfo}` };
682
+ },
683
+ },
684
+ // ─── invoke_element ──────────────────────────────────────────────────
685
+ {
686
+ name: 'invoke_element',
687
+ description: 'Invoke a UI Automation action on an element. More precise than smart_click — ' +
688
+ 'supports set-value, get-value, focus, expand, collapse in addition to click.',
689
+ parameters: {
690
+ name: {
691
+ type: 'string',
692
+ description: 'Element name to find',
693
+ required: false,
694
+ },
695
+ automationId: {
696
+ type: 'string',
697
+ description: 'Element automation ID (more precise than name)',
698
+ required: false,
699
+ },
700
+ controlType: {
701
+ type: 'string',
702
+ description: 'Filter by control type (e.g., "ControlType.Button")',
703
+ required: false,
704
+ },
705
+ processId: {
706
+ type: 'number',
707
+ description: 'Target process ID',
708
+ required: false,
709
+ },
710
+ action: {
711
+ type: 'string',
712
+ description: 'Action to perform',
713
+ required: false,
714
+ enum: ['click', 'set-value', 'get-value', 'focus', 'expand', 'collapse'],
715
+ },
716
+ value: {
717
+ type: 'string',
718
+ description: 'Value for set-value action',
719
+ required: false,
720
+ },
721
+ },
722
+ category: 'window',
723
+ compactGroup: 'accessibility',
724
+ safetyTier: 1,
725
+ handler: async (params, ctx) => {
726
+ await ctx.ensureInitialized();
727
+ if (!params.name && !params.automationId) {
728
+ return { text: 'Either "name" or "automationId" is required', isError: true };
729
+ }
730
+ try {
731
+ const result = await ctx.a11y.invokeElement({
732
+ name: params.name,
733
+ automationId: params.automationId,
734
+ controlType: params.controlType,
735
+ processId: params.processId,
736
+ action: params.action || 'click',
737
+ value: params.value,
738
+ });
739
+ if (result.success) {
740
+ ctx.a11y.invalidateCache();
741
+ const valueInfo = result.value ? ` → value: "${result.value}"` : '';
742
+ return { text: `Invoked "${params.name || params.automationId}" (${params.action || 'click'})${valueInfo}` };
743
+ }
744
+ // Coordinate fallback for click actions
745
+ // a11y coords match nut-js mouse coords directly
746
+ if (result.clickPoint && (params.action === 'click' || !params.action)) {
747
+ await ctx.desktop.mouseClick(result.clickPoint.x, result.clickPoint.y);
748
+ ctx.a11y.invalidateCache();
749
+ return { text: `Invoked "${params.name || params.automationId}" via coordinate fallback (${result.clickPoint.x},${result.clickPoint.y})` };
750
+ }
751
+ return {
752
+ text: `invoke_element failed: ${result.error || 'element not found or action not supported'}`,
753
+ isError: true,
754
+ };
755
+ }
756
+ catch (err) {
757
+ // On any invocation error (e.g. RPC_E_SERVERFAULT on UWP apps, AXError on macOS),
758
+ // try a coordinate fallback for click actions by finding the element's bounds.
759
+ if (params.action === 'click' || !params.action) {
760
+ try {
761
+ const elements = await ctx.a11y.findElement({
762
+ name: params.name,
763
+ automationId: params.automationId,
764
+ processId: params.processId,
765
+ });
766
+ if (elements?.length) {
767
+ const el = elements[0];
768
+ if (el.bounds?.width > 0) {
769
+ const cx = el.bounds.x + Math.floor(el.bounds.width / 2);
770
+ const cy = el.bounds.y + Math.floor(el.bounds.height / 2);
771
+ await ctx.desktop.mouseClick(cx, cy);
772
+ ctx.a11y.invalidateCache();
773
+ return { text: `Invoked "${params.name || params.automationId}" via coordinate fallback after error (${cx},${cy})` };
774
+ }
775
+ }
776
+ }
777
+ catch { /* give up */ }
778
+ }
779
+ return { text: `invoke_element error: ${err.message}`, isError: true };
780
+ }
781
+ },
782
+ },
783
+ ];
784
+ }
785
+ //# sourceMappingURL=smart.js.map