screenhand 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +193 -109
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +5876 -0
  4. package/dist/scripts/codex-monitor-daemon.js +335 -0
  5. package/dist/scripts/export-help-center.js +112 -0
  6. package/dist/scripts/marketing-loop.js +117 -0
  7. package/dist/scripts/observer-daemon.js +288 -0
  8. package/dist/scripts/orchestrator-daemon.js +399 -0
  9. package/dist/scripts/supervisor-daemon.js +272 -0
  10. package/dist/scripts/threads-campaign.js +208 -0
  11. package/dist/scripts/worker-daemon.js +228 -0
  12. package/dist/src/agent/cli.js +82 -0
  13. package/dist/src/agent/loop.js +274 -0
  14. package/dist/src/community/fetcher.js +109 -0
  15. package/dist/src/community/index.js +6 -0
  16. package/dist/src/community/publisher.js +191 -0
  17. package/dist/src/community/remote-api.js +121 -0
  18. package/dist/src/community/types.js +3 -0
  19. package/dist/src/community/validator.js +95 -0
  20. package/{src/config.ts → dist/src/config.js} +5 -10
  21. package/dist/src/context-tracker.js +489 -0
  22. package/{src/index.ts → dist/src/index.js} +32 -52
  23. package/dist/src/ingestion/coverage-auditor.js +233 -0
  24. package/dist/src/ingestion/doc-parser.js +164 -0
  25. package/dist/src/ingestion/index.js +8 -0
  26. package/dist/src/ingestion/menu-scanner.js +152 -0
  27. package/dist/src/ingestion/reference-merger.js +186 -0
  28. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  29. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  30. package/dist/src/ingestion/types.js +3 -0
  31. package/dist/src/jobs/manager.js +305 -0
  32. package/dist/src/jobs/runner.js +806 -0
  33. package/dist/src/jobs/store.js +102 -0
  34. package/dist/src/jobs/types.js +30 -0
  35. package/dist/src/jobs/worker.js +97 -0
  36. package/dist/src/learning/engine.js +356 -0
  37. package/dist/src/learning/index.js +9 -0
  38. package/dist/src/learning/locator-policy.js +120 -0
  39. package/dist/src/learning/pattern-policy.js +89 -0
  40. package/dist/src/learning/recovery-policy.js +116 -0
  41. package/dist/src/learning/sensor-policy.js +115 -0
  42. package/dist/src/learning/timing-model.js +204 -0
  43. package/dist/src/learning/topology-policy.js +90 -0
  44. package/dist/src/learning/types.js +9 -0
  45. package/dist/src/logging/timeline-logger.js +48 -0
  46. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  47. package/dist/src/mcp/server.js +363 -0
  48. package/dist/src/mcp-entry.js +60 -0
  49. package/dist/src/memory/playbook-seeds.js +200 -0
  50. package/dist/src/memory/recall.js +222 -0
  51. package/dist/src/memory/research.js +104 -0
  52. package/dist/src/memory/seeds.js +101 -0
  53. package/dist/src/memory/service.js +446 -0
  54. package/dist/src/memory/session.js +169 -0
  55. package/dist/src/memory/store.js +451 -0
  56. package/{src/runtime/locator-cache.ts → dist/src/memory/types.js} +1 -17
  57. package/dist/src/monitor/codex-monitor.js +382 -0
  58. package/dist/src/monitor/task-queue.js +97 -0
  59. package/dist/src/monitor/types.js +62 -0
  60. package/dist/src/native/bridge-client.js +412 -0
  61. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  62. package/dist/src/observer/state.js +199 -0
  63. package/dist/src/observer/types.js +43 -0
  64. package/dist/src/orchestrator/state.js +68 -0
  65. package/dist/src/orchestrator/types.js +22 -0
  66. package/dist/src/perception/ax-source.js +162 -0
  67. package/dist/src/perception/cdp-source.js +162 -0
  68. package/dist/src/perception/coordinator.js +771 -0
  69. package/dist/src/perception/frame-differ.js +287 -0
  70. package/dist/src/perception/index.js +22 -0
  71. package/dist/src/perception/manager.js +199 -0
  72. package/dist/src/perception/types.js +47 -0
  73. package/dist/src/perception/vision-source.js +399 -0
  74. package/dist/src/planner/deterministic.js +298 -0
  75. package/dist/src/planner/executor.js +870 -0
  76. package/dist/src/planner/goal-store.js +92 -0
  77. package/dist/src/planner/index.js +21 -0
  78. package/dist/src/planner/planner.js +520 -0
  79. package/dist/src/planner/tool-registry.js +71 -0
  80. package/dist/src/planner/types.js +22 -0
  81. package/dist/src/platform/explorer.js +213 -0
  82. package/dist/src/platform/help-center-markdown.js +527 -0
  83. package/dist/src/platform/learner.js +257 -0
  84. package/dist/src/playbook/engine.js +486 -0
  85. package/dist/src/playbook/index.js +20 -0
  86. package/dist/src/playbook/mcp-recorder.js +204 -0
  87. package/dist/src/playbook/recorder.js +536 -0
  88. package/dist/src/playbook/runner.js +408 -0
  89. package/dist/src/playbook/store.js +312 -0
  90. package/dist/src/playbook/types.js +17 -0
  91. package/dist/src/recovery/detectors.js +156 -0
  92. package/dist/src/recovery/engine.js +327 -0
  93. package/dist/src/recovery/index.js +20 -0
  94. package/dist/src/recovery/strategies.js +274 -0
  95. package/dist/src/recovery/types.js +20 -0
  96. package/dist/src/runtime/accessibility-adapter.js +430 -0
  97. package/dist/src/runtime/app-adapter.js +64 -0
  98. package/dist/src/runtime/applescript-adapter.js +305 -0
  99. package/dist/src/runtime/ax-role-map.js +96 -0
  100. package/dist/src/runtime/browser-adapter.js +52 -0
  101. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  102. package/dist/src/runtime/composite-adapter.js +221 -0
  103. package/dist/src/runtime/execution-contract.js +159 -0
  104. package/dist/src/runtime/executor.js +286 -0
  105. package/dist/src/runtime/locator-cache.js +50 -0
  106. package/dist/src/runtime/planning-loop.js +63 -0
  107. package/dist/src/runtime/service.js +432 -0
  108. package/dist/src/runtime/session-manager.js +63 -0
  109. package/dist/src/runtime/state-observer.js +121 -0
  110. package/dist/src/runtime/vision-adapter.js +225 -0
  111. package/dist/src/state/app-map-types.js +72 -0
  112. package/dist/src/state/app-map.js +1974 -0
  113. package/dist/src/state/entity-tracker.js +108 -0
  114. package/dist/src/state/fusion.js +96 -0
  115. package/dist/src/state/index.js +21 -0
  116. package/dist/src/state/ladder-generator.js +236 -0
  117. package/dist/src/state/persistence.js +156 -0
  118. package/dist/src/state/types.js +17 -0
  119. package/dist/src/state/world-model.js +1456 -0
  120. package/dist/src/supervisor/locks.js +186 -0
  121. package/dist/src/supervisor/supervisor.js +403 -0
  122. package/dist/src/supervisor/types.js +30 -0
  123. package/dist/src/test-mcp-protocol.js +154 -0
  124. package/dist/src/types.js +17 -0
  125. package/dist/src/util/atomic-write.js +133 -0
  126. package/dist/src/util/sanitize.js +146 -0
  127. package/dist-app-maps/com.figma.Desktop.json +959 -0
  128. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  129. package/dist-app-maps/notion.id.json +2831 -0
  130. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  131. package/dist-playbooks/codex-desktop.json +76 -0
  132. package/dist-playbooks/competitor-research-stack.json +122 -0
  133. package/dist-playbooks/davinci-color-grade.json +153 -0
  134. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  135. package/dist-playbooks/davinci-render.json +114 -0
  136. package/dist-playbooks/devto.json +52 -0
  137. package/dist-playbooks/discord.json +41 -0
  138. package/dist-playbooks/google-flow-create-project.json +59 -0
  139. package/dist-playbooks/google-flow-edit-image.json +90 -0
  140. package/dist-playbooks/google-flow-edit-video.json +90 -0
  141. package/dist-playbooks/google-flow-generate-image.json +68 -0
  142. package/dist-playbooks/google-flow-generate-video.json +191 -0
  143. package/dist-playbooks/google-flow-open-project.json +48 -0
  144. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  145. package/dist-playbooks/google-flow-search-assets.json +64 -0
  146. package/dist-playbooks/instagram.json +57 -0
  147. package/dist-playbooks/linkedin.json +52 -0
  148. package/dist-playbooks/n8n.json +43 -0
  149. package/dist-playbooks/reddit.json +52 -0
  150. package/dist-playbooks/threads.json +59 -0
  151. package/dist-playbooks/x-twitter.json +59 -0
  152. package/dist-playbooks/youtube.json +59 -0
  153. package/dist-references/canva.json +646 -0
  154. package/dist-references/codex-desktop.json +305 -0
  155. package/dist-references/davinci-resolve-keyboard.json +594 -0
  156. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  157. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  158. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  159. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  160. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  161. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  162. package/dist-references/devto.json +317 -0
  163. package/dist-references/discord.json +549 -0
  164. package/dist-references/figma.json +1186 -0
  165. package/dist-references/finder.json +146 -0
  166. package/dist-references/google-ads-transparency.json +95 -0
  167. package/dist-references/google-flow.json +649 -0
  168. package/dist-references/instagram.json +341 -0
  169. package/dist-references/linkedin.json +324 -0
  170. package/dist-references/meta-ad-library.json +86 -0
  171. package/dist-references/n8n.json +387 -0
  172. package/dist-references/notes.json +27 -0
  173. package/dist-references/notion.json +163 -0
  174. package/dist-references/reddit.json +341 -0
  175. package/dist-references/threads.json +337 -0
  176. package/dist-references/x-twitter.json +403 -0
  177. package/dist-references/youtube.json +373 -0
  178. package/native/macos-bridge/Package.swift +1 -0
  179. package/native/macos-bridge/Sources/AccessibilityBridge.swift +257 -36
  180. package/native/macos-bridge/Sources/AppManagement.swift +212 -2
  181. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +348 -53
  182. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  183. package/native/macos-bridge/Sources/VisionBridge.swift +165 -7
  184. package/native/macos-bridge/Sources/main.swift +169 -16
  185. package/native/windows-bridge/Program.cs +5 -0
  186. package/native/windows-bridge/ScreenCapture.cs +124 -0
  187. package/package.json +29 -4
  188. package/scripts/postinstall.cjs +127 -0
  189. package/.claude/commands/automate.md +0 -28
  190. package/.claude/commands/debug-ui.md +0 -19
  191. package/.claude/commands/screenshot.md +0 -15
  192. package/.github/FUNDING.yml +0 -1
  193. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  194. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  195. package/.mcp.json +0 -8
  196. package/DESKTOP_MCP_GUIDE.md +0 -92
  197. package/SECURITY.md +0 -44
  198. package/docs/architecture.md +0 -47
  199. package/install-skills.sh +0 -19
  200. package/mcp-bridge.ts +0 -271
  201. package/mcp-desktop.ts +0 -1221
  202. package/playbooks/instagram.json +0 -41
  203. package/playbooks/instagram_v2.json +0 -201
  204. package/playbooks/x_v1.json +0 -211
  205. package/scripts/devpost-live-loop.mjs +0 -421
  206. package/src/logging/timeline-logger.ts +0 -55
  207. package/src/mcp/server.ts +0 -449
  208. package/src/memory/recall.ts +0 -191
  209. package/src/memory/research.ts +0 -146
  210. package/src/memory/seeds.ts +0 -123
  211. package/src/memory/session.ts +0 -201
  212. package/src/memory/store.ts +0 -434
  213. package/src/memory/types.ts +0 -69
  214. package/src/native/bridge-client.ts +0 -239
  215. package/src/runtime/accessibility-adapter.ts +0 -487
  216. package/src/runtime/app-adapter.ts +0 -169
  217. package/src/runtime/applescript-adapter.ts +0 -376
  218. package/src/runtime/ax-role-map.ts +0 -102
  219. package/src/runtime/browser-adapter.ts +0 -129
  220. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  221. package/src/runtime/composite-adapter.ts +0 -274
  222. package/src/runtime/executor.ts +0 -396
  223. package/src/runtime/planning-loop.ts +0 -81
  224. package/src/runtime/service.ts +0 -448
  225. package/src/runtime/session-manager.ts +0 -50
  226. package/src/runtime/state-observer.ts +0 -136
  227. package/src/runtime/vision-adapter.ts +0 -297
  228. package/src/types.ts +0 -297
  229. package/tests/bridge-client.test.ts +0 -176
  230. package/tests/browser-stealth.test.ts +0 -210
  231. package/tests/composite-adapter.test.ts +0 -64
  232. package/tests/mcp-server.test.ts +0 -151
  233. package/tests/memory-recall.test.ts +0 -339
  234. package/tests/memory-research.test.ts +0 -159
  235. package/tests/memory-seeds.test.ts +0 -120
  236. package/tests/memory-store.test.ts +0 -392
  237. package/tests/types.test.ts +0 -92
  238. package/tsconfig.check.json +0 -17
  239. package/tsconfig.json +0 -19
  240. package/vitest.config.ts +0 -8
  241. /package/{playbooks → dist-references}/devpost.json +0 -0
@@ -10,12 +10,64 @@ class AccessibilityBridge {
10
10
 
11
11
  // MARK: - Element Tree
12
12
 
13
- func getElementTree(pid: pid_t, maxDepth: Int) throws -> [String: Any] {
13
+ /// Max total nodes to emit in a single tree build to prevent runaway traversal on heavy DOMs (Canva, Figma)
14
+ private static let treeBuildNodeBudget = 3000
15
+ /// Max siblings to include per parent during tree build
16
+ private static let treeBuildSiblingCap = 80
17
+
18
+ func getElementTree(pid: pid_t, maxDepth: Int, windowId: Int? = nil) throws -> [String: Any] {
14
19
  let appElement = AXUIElementCreateApplication(pid)
15
- return try buildTree(element: appElement, depth: 0, maxDepth: maxDepth)
20
+ var nodesEmitted = 0
21
+
22
+ // If windowId specified, scope to that window instead of full app tree
23
+ if let wid = windowId, wid > 0 {
24
+ if let windowsRef = getAttribute(appElement, kAXWindowsAttribute) as? [AXUIElement] {
25
+ // Match by CGWindowID via position/size comparison with CG window list
26
+ let cgWindows = CGWindowListCopyWindowInfo([.optionOnScreenOnly], kCGNullWindowID) as? [[String: Any]] ?? []
27
+ let appCGWindows = cgWindows.filter { ($0[kCGWindowOwnerPID as String] as? Int) == Int(pid) }
28
+
29
+ for win in windowsRef {
30
+ // Get AX window position and size
31
+ var axPos = CGPoint.zero
32
+ var axSize = CGSize.zero
33
+ if let posValue = getAttribute(win, kAXPositionAttribute) {
34
+ AXValueGetValue(posValue as! AXValue, .cgPoint, &axPos)
35
+ }
36
+ if let sizeValue = getAttribute(win, kAXSizeAttribute) {
37
+ AXValueGetValue(sizeValue as! AXValue, .cgSize, &axSize)
38
+ }
39
+
40
+ // Match against CG window with target windowId
41
+ for cgWin in appCGWindows {
42
+ guard let cgId = cgWin[kCGWindowNumber as String] as? Int, cgId == wid else { continue }
43
+ if let bounds = cgWin[kCGWindowBounds as String] as? [String: Any],
44
+ let cgX = bounds["X"] as? Double,
45
+ let cgY = bounds["Y"] as? Double {
46
+ // Match by position (within tolerance for rounding)
47
+ if abs(Double(axPos.x) - cgX) < 2 && abs(Double(axPos.y) - cgY) < 2 {
48
+ var tree = try buildTree(element: win, depth: 0, maxDepth: maxDepth, nodesEmitted: &nodesEmitted, isAppRoot: false)
49
+ tree["_nodeCount"] = nodesEmitted
50
+ return tree
51
+ }
52
+ }
53
+ }
54
+ }
55
+ }
56
+ // Fallback: if window not found by ID, use app root
57
+ }
58
+
59
+ var tree = try buildTree(element: appElement, depth: 0, maxDepth: maxDepth, nodesEmitted: &nodesEmitted, isAppRoot: true)
60
+ tree["_nodeCount"] = nodesEmitted
61
+ return tree
16
62
  }
17
63
 
18
- private func buildTree(element: AXUIElement, depth: Int, maxDepth: Int) throws -> [String: Any] {
64
+ private func buildTree(element: AXUIElement, depth: Int, maxDepth: Int, nodesEmitted: inout Int, isAppRoot: Bool = false) throws -> [String: Any] {
65
+ // Global node budget — stop adding nodes once exceeded
66
+ nodesEmitted += 1
67
+ if nodesEmitted > AccessibilityBridge.treeBuildNodeBudget {
68
+ return ["role": "BudgetExceeded", "_truncated": true]
69
+ }
70
+
19
71
  var node: [String: Any] = [:]
20
72
 
21
73
  node["role"] = getAttribute(element, kAXRoleAttribute) as? String ?? "Unknown"
@@ -52,13 +104,99 @@ class AccessibilityBridge {
52
104
  }
53
105
  }
54
106
 
107
+ // Children (if not at max depth and within global budget)
108
+ if depth < maxDepth && nodesEmitted < AccessibilityBridge.treeBuildNodeBudget {
109
+ if let children = getAttribute(element, kAXChildrenAttribute) as? [AXUIElement] {
110
+ var childNodes: [[String: Any]] = []
111
+ for (index, child) in children.enumerated() {
112
+ if index >= AccessibilityBridge.treeBuildSiblingCap { break }
113
+ if nodesEmitted >= AccessibilityBridge.treeBuildNodeBudget { break }
114
+ // Skip self-referential AXApplication children at the app root level.
115
+ // Some apps (Notes, Safari) list AXApplication elements as children
116
+ // of themselves, causing infinite recursion. Only check at depth 0
117
+ // (the app root) to avoid affecting legitimate nested elements.
118
+ if isAppRoot {
119
+ let childRole = getAttribute(child, kAXRoleAttribute) as? String ?? ""
120
+ if childRole == "AXApplication" { continue }
121
+ }
122
+ if let childNode = try? buildTree(element: child, depth: depth + 1, maxDepth: maxDepth, nodesEmitted: &nodesEmitted) {
123
+ childNodes.append(childNode)
124
+ }
125
+ }
126
+ if !childNodes.isEmpty {
127
+ node["children"] = childNodes
128
+ }
129
+ }
130
+ }
131
+
132
+ return node
133
+ }
134
+
135
+ // MARK: - Menu Bar Tree
136
+
137
+ func getMenuBarTree(pid: pid_t, maxDepth: Int) throws -> [String: Any] {
138
+ let appElement = AXUIElementCreateApplication(pid)
139
+ guard let menuBar = getAttribute(appElement, kAXMenuBarAttribute) as AnyObject? else {
140
+ throw BridgeError.notFound("Menu bar not found for pid \(pid)")
141
+ }
142
+ let menuBarElement = menuBar as! AXUIElement
143
+ return try buildMenuTree(element: menuBarElement, depth: 0, maxDepth: maxDepth, expandMenus: true)
144
+ }
145
+
146
+ private func buildMenuTree(element: AXUIElement, depth: Int, maxDepth: Int, expandMenus: Bool = false) throws -> [String: Any] {
147
+ var node: [String: Any] = [:]
148
+
149
+ let role = getAttribute(element, kAXRoleAttribute) as? String ?? "Unknown"
150
+ node["role"] = role
151
+ if let title = getAttribute(element, kAXTitleAttribute) as? String, !title.isEmpty {
152
+ node["title"] = title
153
+ }
154
+ if let desc = getAttribute(element, kAXDescriptionAttribute) as? String, !desc.isEmpty {
155
+ node["description"] = desc
156
+ }
157
+ if let enabled = getAttribute(element, kAXEnabledAttribute) as? Bool {
158
+ node["enabled"] = enabled
159
+ }
160
+
161
+ // Menu-specific attributes
162
+ if let cmdChar = getAttribute(element, "AXMenuItemCmdChar") as? String, !cmdChar.isEmpty {
163
+ node["AXMenuItemCmdChar"] = cmdChar
164
+ }
165
+ if let cmdMods = getAttribute(element, "AXMenuItemCmdModifiers") {
166
+ node["AXMenuItemCmdModifiers"] = cmdMods
167
+ }
168
+ if let markChar = getAttribute(element, "AXMenuItemMarkChar") as? String, !markChar.isEmpty {
169
+ node["AXMenuItemMarkChar"] = markChar
170
+ }
171
+
55
172
  // Children (if not at max depth)
56
173
  if depth < maxDepth {
57
- if let children = getAttribute(element, kAXChildrenAttribute) as? [AXUIElement] {
174
+ var children = getAttribute(element, kAXChildrenAttribute) as? [AXUIElement]
175
+
176
+ // macOS AXMenuBarItem has a child AXMenu, but the AXMenu's children
177
+ // (actual AXMenuItems) are empty until the menu is opened via AXPress.
178
+ // Always press AXMenuBarItem to populate its submenu items.
179
+ let shouldExpand = expandMenus && role == "AXMenuBarItem"
180
+ if shouldExpand {
181
+ AXUIElementPerformAction(element, kAXPressAction as CFString)
182
+ // Poll for the AXMenu child's children to appear (max 200ms)
183
+ let deadline = Date().addingTimeInterval(0.2)
184
+ while Date() < deadline {
185
+ // Re-read children — the AXMenu inside should now have items
186
+ children = getAttribute(element, kAXChildrenAttribute) as? [AXUIElement]
187
+ if let menu = children?.first {
188
+ let menuChildren = getAttribute(menu, kAXChildrenAttribute) as? [AXUIElement]
189
+ if menuChildren != nil && !menuChildren!.isEmpty { break }
190
+ }
191
+ Thread.sleep(forTimeInterval: 0.02)
192
+ }
193
+ }
194
+
195
+ if let children = children {
58
196
  var childNodes: [[String: Any]] = []
59
197
  for (index, child) in children.enumerated() {
60
198
  if index > 100 { break } // Safety limit
61
- if let childNode = try? buildTree(element: child, depth: depth + 1, maxDepth: maxDepth) {
199
+ if let childNode = try? buildMenuTree(element: child, depth: depth + 1, maxDepth: maxDepth, expandMenus: expandMenus) {
62
200
  childNodes.append(childNode)
63
201
  }
64
202
  }
@@ -66,6 +204,12 @@ class AccessibilityBridge {
66
204
  node["children"] = childNodes
67
205
  }
68
206
  }
207
+
208
+ // Close the menu we opened
209
+ if shouldExpand {
210
+ AXUIElementPerformAction(element, kAXCancelAction as CFString)
211
+ Thread.sleep(forTimeInterval: 0.03)
212
+ }
69
213
  }
70
214
 
71
215
  return node
@@ -74,32 +218,58 @@ class AccessibilityBridge {
74
218
  // MARK: - Find Element
75
219
 
76
220
  func findElement(pid: pid_t, role: String?, title: String?, value: String?,
77
- identifier: String?, exact: Bool) throws -> [String: Any] {
221
+ identifier: String?, exact: Bool, maxDepth: Int = 30) throws -> [String: Any] {
78
222
  let appElement = AXUIElementCreateApplication(pid)
223
+ var visited = 0
79
224
  guard let result = searchElement(
80
- element: appElement, path: [], role: role, title: title,
81
- value: value, identifier: identifier, exact: exact
225
+ element: appElement, path: [], depth: 0, maxDepth: maxDepth,
226
+ nodesVisited: &visited,
227
+ role: role, title: title,
228
+ value: value, identifier: identifier, exact: exact,
229
+ isAppRoot: true
82
230
  ) else {
83
231
  throw BridgeError.notFound("Element not found matching criteria")
84
232
  }
85
233
  return result
86
234
  }
87
235
 
88
- private func searchElement(element: AXUIElement, path: [Int], role: String?,
236
+ /// Max total nodes to visit in a single search to prevent runaway traversal on heavy DOMs
237
+ private static let searchNodeBudget = 2000
238
+ /// Max siblings to check per parent during search
239
+ private static let searchSiblingCap = 100
240
+
241
+ private func searchElement(element: AXUIElement, path: [Int], depth: Int, maxDepth: Int,
242
+ nodesVisited: inout Int,
243
+ role: String?,
89
244
  title: String?, value: String?, identifier: String?,
90
- exact: Bool) -> [String: Any]? {
245
+ exact: Bool, isAppRoot: Bool = false) -> [String: Any]? {
246
+ // Bail if we've exceeded the node budget or depth limit
247
+ nodesVisited += 1
248
+ if nodesVisited > AccessibilityBridge.searchNodeBudget { return nil }
249
+ if depth > maxDepth { return nil }
250
+
91
251
  // Check if this element matches
92
252
  let elementRole = getAttribute(element, kAXRoleAttribute) as? String ?? ""
253
+ let elementSubrole = getAttribute(element, kAXSubroleAttribute) as? String ?? ""
93
254
  let elementTitle = getAttribute(element, kAXTitleAttribute) as? String ?? ""
94
255
  let elementValue = getAttribute(element, kAXValueAttribute).flatMap { "\($0)" } ?? ""
95
256
  let elementId = getAttribute(element, kAXIdentifierAttribute) as? String ?? ""
257
+ let elementDesc = getAttribute(element, kAXDescriptionAttribute) as? String ?? ""
96
258
 
97
259
  var matches = true
98
260
  if let role = role {
99
- matches = matches && matchString(elementRole, role, exact: exact)
261
+ // Match role OR subrole allows searching by "AXCloseButton" subrole
262
+ let roleMatch = matchString(elementRole, role, exact: exact)
263
+ let subroleMatch = !elementSubrole.isEmpty && matchString(elementSubrole, role, exact: exact)
264
+ matches = matches && (roleMatch || subroleMatch)
100
265
  }
101
266
  if let title = title {
102
- matches = matches && matchString(elementTitle, title, exact: exact)
267
+ // Match title, description, OR subrole — many elements have no title but do
268
+ // have AXDescription or a meaningful subrole (e.g. AXCloseButton, AXMinimizeButton).
269
+ let titleMatch = matchString(elementTitle, title, exact: exact)
270
+ let descMatch = !elementDesc.isEmpty && matchString(elementDesc, title, exact: exact)
271
+ let subroleMatch = !elementSubrole.isEmpty && matchString(elementSubrole, title, exact: exact)
272
+ matches = matches && (titleMatch || descMatch || subroleMatch)
103
273
  }
104
274
  if let value = value {
105
275
  matches = matches && matchString(elementValue, value, exact: exact)
@@ -117,6 +287,8 @@ class AccessibilityBridge {
117
287
  ]
118
288
  if !elementValue.isEmpty { result["value"] = elementValue }
119
289
  if !elementId.isEmpty { result["identifier"] = elementId }
290
+ if !elementDesc.isEmpty { result["description"] = elementDesc }
291
+ if !elementSubrole.isEmpty { result["subrole"] = elementSubrole }
120
292
 
121
293
  // Get position for coordinates
122
294
  if let posValue = getAttribute(element, kAXPositionAttribute) {
@@ -137,16 +309,27 @@ class AccessibilityBridge {
137
309
  return result
138
310
  }
139
311
 
140
- // Search children
141
- if let children = getAttribute(element, kAXChildrenAttribute) as? [AXUIElement] {
142
- for (index, child) in children.enumerated() {
143
- var childPath = path
144
- childPath.append(index)
145
- if let found = searchElement(
146
- element: child, path: childPath, role: role, title: title,
147
- value: value, identifier: identifier, exact: exact
148
- ) {
149
- return found
312
+ // Search children (with breadth + depth + budget limits)
313
+ if depth < maxDepth {
314
+ if let children = getAttribute(element, kAXChildrenAttribute) as? [AXUIElement] {
315
+ for (index, child) in children.enumerated() {
316
+ if index >= AccessibilityBridge.searchSiblingCap { break }
317
+ if nodesVisited > AccessibilityBridge.searchNodeBudget { break }
318
+ // Skip self-referential AXApplication children at app root
319
+ if isAppRoot {
320
+ let childRole = getAttribute(child, kAXRoleAttribute) as? String ?? ""
321
+ if childRole == "AXApplication" { continue }
322
+ }
323
+ var childPath = path
324
+ childPath.append(index)
325
+ if let found = searchElement(
326
+ element: child, path: childPath, depth: depth + 1, maxDepth: maxDepth,
327
+ nodesVisited: &nodesVisited,
328
+ role: role, title: title,
329
+ value: value, identifier: identifier, exact: exact
330
+ ) {
331
+ return found
332
+ }
150
333
  }
151
334
  }
152
335
  }
@@ -156,8 +339,8 @@ class AccessibilityBridge {
156
339
 
157
340
  // MARK: - Actions
158
341
 
159
- func performAction(pid: pid_t, elementPath: [Int], action: String) throws {
160
- let element = try resolveElement(pid: pid, path: elementPath)
342
+ func performAction(pid: pid_t, elementPath: [Int], action: String, expectedTitle: String? = nil) throws {
343
+ let element = try resolveElement(pid: pid, path: elementPath, expectedTitle: expectedTitle)
161
344
  let result = AXUIElementPerformAction(element, action as CFString)
162
345
  if result != .success {
163
346
  throw BridgeError.general("AX action '\(action)' failed with code \(result.rawValue)")
@@ -173,8 +356,14 @@ class AccessibilityBridge {
173
356
  if focusResult != .success {
174
357
  throw BridgeError.general("Cannot focus element for value set, code \(focusResult.rawValue)")
175
358
  }
176
- // Use CG to type the value
177
- CoreGraphicsBridge().typeText(text: value)
359
+ // Use CG to type the value — PID-targeted to the correct process
360
+ CoreGraphicsBridge().typeText(text: value, targetPid: pid)
361
+ }
362
+ // Verify the value was actually set — some apps (Notes, etc.) silently ignore AXSetValue
363
+ usleep(50_000) // 50ms settle time
364
+ let readBack = getAttribute(element, kAXValueAttribute).flatMap { "\($0)" } ?? ""
365
+ if !readBack.contains(value.prefix(20)) && readBack != value {
366
+ throw BridgeError.general("Value set reported success but verification failed — element still shows \"\(String(readBack.prefix(60)))\" instead of \"\(String(value.prefix(60)))\". This element may not support programmatic value changes.")
178
367
  }
179
368
  }
180
369
 
@@ -205,18 +394,42 @@ class AccessibilityBridge {
205
394
  }
206
395
 
207
396
  var found = false
397
+ // Strip invisible Unicode direction marks (LTR U+200E, RTL U+200F, etc.)
398
+ // that apps like WhatsApp prepend to menu titles.
399
+ let cleanItem = menuItem.filter { !$0.unicodeScalars.allSatisfy { s in
400
+ (0x200B...0x200F).contains(s.value) || (0x2028...0x202F).contains(s.value) ||
401
+ (0xFEFF...0xFEFF).contains(s.value)
402
+ }}
208
403
  for child in children {
209
- let title = getAttribute(child, kAXTitleAttribute) as? String ?? ""
210
- if title == menuItem {
404
+ let rawTitle = getAttribute(child, kAXTitleAttribute) as? String ?? ""
405
+ let title = rawTitle.filter { !$0.unicodeScalars.allSatisfy { s in
406
+ (0x200B...0x200F).contains(s.value) || (0x2028...0x202F).contains(s.value) ||
407
+ (0xFEFF...0xFEFF).contains(s.value)
408
+ }}
409
+ if title == cleanItem || rawTitle == menuItem {
211
410
  // Press this menu item to open it (for submenus) or activate it
212
411
  AXUIElementPerformAction(child, kAXPressAction as CFString)
213
- // Small delay for menu to open
214
- Thread.sleep(forTimeInterval: 0.1)
215
412
 
216
- // If there are more items in the path, navigate into the submenu
217
- if let submenu = getAttribute(child, kAXChildrenAttribute) as? [AXUIElement],
218
- let firstChild = submenu.first {
219
- currentElement = firstChild
413
+ // Poll for children to appear (max 500ms, 50ms intervals)
414
+ // instead of a fixed 100ms sleep
415
+ let pollDeadline = Date().addingTimeInterval(0.5)
416
+ var submenuResolved = false
417
+ while Date() < pollDeadline {
418
+ if let submenu = getAttribute(child, kAXChildrenAttribute) as? [AXUIElement],
419
+ let firstChild = submenu.first {
420
+ currentElement = firstChild
421
+ submenuResolved = true
422
+ break
423
+ }
424
+ Thread.sleep(forTimeInterval: 0.05)
425
+ }
426
+
427
+ // If no submenu appeared after polling, still check once more
428
+ if !submenuResolved {
429
+ if let submenu = getAttribute(child, kAXChildrenAttribute) as? [AXUIElement],
430
+ let firstChild = submenu.first {
431
+ currentElement = firstChild
432
+ }
220
433
  }
221
434
 
222
435
  found = true
@@ -232,7 +445,7 @@ class AccessibilityBridge {
232
445
 
233
446
  // MARK: - Helpers
234
447
 
235
- private func resolveElement(pid: pid_t, path: [Int]) throws -> AXUIElement {
448
+ private func resolveElement(pid: pid_t, path: [Int], expectedTitle: String? = nil) throws -> AXUIElement {
236
449
  var current = AXUIElementCreateApplication(pid) as AXUIElement
237
450
  for index in path {
238
451
  guard let children = getAttribute(current, kAXChildrenAttribute) as? [AXUIElement] else {
@@ -243,10 +456,18 @@ class AccessibilityBridge {
243
456
  }
244
457
  current = children[index]
245
458
  }
459
+ // Verify the resolved element still matches the expected identity
460
+ if let expected = expectedTitle {
461
+ let actualTitle = getAttribute(current, kAXTitleAttribute) as? String ?? ""
462
+ let actualDesc = getAttribute(current, kAXDescriptionAttribute) as? String ?? ""
463
+ if actualTitle != expected && actualDesc != expected {
464
+ throw BridgeError.general("Element at path has changed: expected '\(expected)' but found '\(actualTitle.isEmpty ? actualDesc : actualTitle)'")
465
+ }
466
+ }
246
467
  return current
247
468
  }
248
469
 
249
- private func getAttribute(_ element: AXUIElement, _ attribute: String) -> AnyObject? {
470
+ func getAttribute(_ element: AXUIElement, _ attribute: String) -> AnyObject? {
250
471
  var value: AnyObject?
251
472
  let result = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
252
473
  return result == .success ? value : nil
@@ -3,6 +3,12 @@ import Foundation
3
3
 
4
4
  class AppManagement {
5
5
 
6
+ private let ax: AccessibilityBridge
7
+
8
+ init(ax: AccessibilityBridge) {
9
+ self.ax = ax
10
+ }
11
+
6
12
  func launchApp(bundleId: String) throws -> [String: Any] {
7
13
  let workspace = NSWorkspace.shared
8
14
 
@@ -51,7 +57,98 @@ class AppManagement {
51
57
  guard let app = NSRunningApplication.runningApplications(withBundleIdentifier: bundleId).first else {
52
58
  throw BridgeError.notFound("No running application with bundle ID '\(bundleId)'")
53
59
  }
54
- app.activate()
60
+
61
+ // Attempt 1: activateIgnoringOtherApps (strongest NSRunningApplication API)
62
+ app.activate(options: .activateIgnoringOtherApps)
63
+
64
+ // Poll up to 400ms for focus to switch
65
+ let pollEnd = Date().addingTimeInterval(0.4)
66
+ while Date() < pollEnd {
67
+ if NSWorkspace.shared.frontmostApplication?.bundleIdentifier == bundleId {
68
+ return
69
+ }
70
+ Thread.sleep(forTimeInterval: 0.05)
71
+ }
72
+
73
+ // Attempt 2: AppleScript activation via bundle ID — goes through Apple Events,
74
+ // often succeeds when NSRunningApplication.activate fails (e.g. VS Code holding focus).
75
+ // Uses "id bundleId" form to avoid issues with Unicode chars in app names (WhatsApp).
76
+ let script = NSAppleScript(source: "tell application id \"\(bundleId)\" to activate")
77
+ script?.executeAndReturnError(nil)
78
+
79
+ // Poll up to 400ms more
80
+ let asEnd = Date().addingTimeInterval(0.4)
81
+ while Date() < asEnd {
82
+ if NSWorkspace.shared.frontmostApplication?.bundleIdentifier == bundleId {
83
+ return
84
+ }
85
+ Thread.sleep(forTimeInterval: 0.05)
86
+ }
87
+
88
+ // Attempt 3: Raise the main window via AX API
89
+ let pid = app.processIdentifier
90
+ let appElement = AXUIElementCreateApplication(pid)
91
+ if let axWindows = self.ax.getAttribute(appElement, kAXWindowsAttribute) as? [AXUIElement],
92
+ let mainWindow = axWindows.first {
93
+ AXUIElementPerformAction(mainWindow, kAXRaiseAction as CFString)
94
+ // One more activate after raising — the combo often works when neither alone does
95
+ app.activate(options: .activateIgnoringOtherApps)
96
+ }
97
+
98
+ // Final poll — 300ms
99
+ let finalEnd = Date().addingTimeInterval(0.3)
100
+ while Date() < finalEnd {
101
+ if NSWorkspace.shared.frontmostApplication?.bundleIdentifier == bundleId {
102
+ return
103
+ }
104
+ Thread.sleep(forTimeInterval: 0.05)
105
+ }
106
+
107
+ // Focus never changed — report honestly
108
+ let actual = NSWorkspace.shared.frontmostApplication?.bundleIdentifier ?? "unknown"
109
+ throw BridgeError.general("Focus failed: \(actual) is frontmost instead of \(bundleId)")
110
+ }
111
+
112
+ /// Focus a specific window by its CG windowId.
113
+ /// Activates the owning app and raises the target window via AX.
114
+ func focusWindow(windowId: Int) throws {
115
+ // Find the window's owner PID from CG
116
+ guard let windowList = CGWindowListCopyWindowInfo([.optionOnScreenOnly, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else {
117
+ throw BridgeError.notFound("Cannot enumerate windows")
118
+ }
119
+ guard let target = windowList.first(where: { ($0[kCGWindowNumber as String] as? Int) == windowId }),
120
+ let ownerPid = target[kCGWindowOwnerPID as String] as? Int else {
121
+ throw BridgeError.notFound("Window \(windowId) not found")
122
+ }
123
+
124
+ // Activate the owning app
125
+ if let app = NSRunningApplication(processIdentifier: pid_t(ownerPid)) {
126
+ app.activate(options: .activateIgnoringOtherApps)
127
+ }
128
+
129
+ // Raise the specific window via AX
130
+ let appElement = AXUIElementCreateApplication(pid_t(ownerPid))
131
+ guard let axWindows = self.ax.getAttribute(appElement, kAXWindowsAttribute) as? [AXUIElement] else {
132
+ return // App activated, but can't raise specific window
133
+ }
134
+
135
+ // Match AX window to CG window by position
136
+ var targetPos = CGPoint.zero
137
+ if let bounds = target[kCGWindowBounds as String] as? [String: Any] {
138
+ targetPos.x = CGFloat((bounds["X"] as? NSNumber)?.doubleValue ?? 0)
139
+ targetPos.y = CGFloat((bounds["Y"] as? NSNumber)?.doubleValue ?? 0)
140
+ }
141
+
142
+ for axWin in axWindows {
143
+ var axPos = CGPoint.zero
144
+ if let posValue = self.ax.getAttribute(axWin, kAXPositionAttribute) {
145
+ AXValueGetValue(posValue as! AXValue, .cgPoint, &axPos)
146
+ }
147
+ if abs(axPos.x - targetPos.x) < 2 && abs(axPos.y - targetPos.y) < 2 {
148
+ AXUIElementPerformAction(axWin, kAXRaiseAction as CFString)
149
+ return
150
+ }
151
+ }
55
152
  }
56
153
 
57
154
  func listRunningApps() -> [[String: Any]] {
@@ -69,7 +166,9 @@ class AppManagement {
69
166
  }
70
167
 
71
168
  func listWindows() -> [[String: Any]] {
72
- guard let windowList = CGWindowListCopyWindowInfo([.optionOnScreenOnly, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else {
169
+ // Use .optionAll to include minimized/off-screen windows (e.g. Safari minimized to dock).
170
+ // layer == 0 filter below already excludes desktop elements and system overlays.
171
+ guard let windowList = CGWindowListCopyWindowInfo([.optionAll, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else {
73
172
  return []
74
173
  }
75
174
 
@@ -96,6 +195,9 @@ class AppManagement {
96
195
  let ownerName = window[kCGWindowOwnerName as String] as? String ?? ""
97
196
  let isOnScreen = window[kCGWindowIsOnscreen as String] as? Bool ?? true
98
197
 
198
+ // Skip zero-size windows (system placeholders, UI services)
199
+ if rect.width < 10 && rect.height < 10 { return nil }
200
+
99
201
  // Look up bundle ID from PID
100
202
  let bundleId = NSRunningApplication(processIdentifier: pid_t(ownerPid))?.bundleIdentifier ?? ""
101
203
 
@@ -116,6 +218,114 @@ class AppManagement {
116
218
  }
117
219
  }
118
220
 
221
+ /// List windows with AX-enriched metadata (focused, isMain) merged onto CG window data.
222
+ /// This is the preferred method for window resolution — it tells callers which window
223
+ /// is actually focused/main, avoiding wrong-window attachment.
224
+ ///
225
+ /// Matching uses a multi-signal join: position + title + size to avoid mis-annotation
226
+ /// when windows share geometry or move during polling.
227
+ func listWindowsWithAX() -> [[String: Any]] {
228
+ var cgWindows = listWindows()
229
+
230
+ // Group by PID so we only query AX for apps that have windows
231
+ var pidSet = Set<Int>()
232
+ for win in cgWindows {
233
+ if let pid = win["pid"] as? Int { pidSet.insert(pid) }
234
+ }
235
+
236
+ // For each PID, get AX windows and extract focused/isMain
237
+ struct AXWindowInfo {
238
+ let posX: Double
239
+ let posY: Double
240
+ let width: Double
241
+ let height: Double
242
+ let title: String
243
+ let focused: Bool
244
+ let isMain: Bool
245
+ let subrole: String
246
+ }
247
+
248
+ var axInfoByPid: [Int: [AXWindowInfo]] = [:]
249
+
250
+ for pid in pidSet {
251
+ let appElement = AXUIElementCreateApplication(pid_t(pid))
252
+ guard let axWindows = self.ax.getAttribute(appElement, kAXWindowsAttribute) as? [AXUIElement] else {
253
+ continue
254
+ }
255
+
256
+ var infos: [AXWindowInfo] = []
257
+ for axWin in axWindows {
258
+ var pos = CGPoint.zero
259
+ var size = CGSize.zero
260
+ if let posValue = self.ax.getAttribute(axWin, kAXPositionAttribute) {
261
+ AXValueGetValue(posValue as! AXValue, .cgPoint, &pos)
262
+ }
263
+ if let sizeValue = self.ax.getAttribute(axWin, kAXSizeAttribute) {
264
+ AXValueGetValue(sizeValue as! AXValue, .cgSize, &size)
265
+ }
266
+ let title = self.ax.getAttribute(axWin, kAXTitleAttribute) as? String ?? ""
267
+ let focused = self.ax.getAttribute(axWin, kAXFocusedAttribute) as? Bool ?? false
268
+ let isMain = self.ax.getAttribute(axWin, kAXMainAttribute) as? Bool ?? false
269
+ let subrole = self.ax.getAttribute(axWin, kAXSubroleAttribute) as? String ?? ""
270
+ infos.append(AXWindowInfo(
271
+ posX: Double(pos.x), posY: Double(pos.y),
272
+ width: Double(size.width), height: Double(size.height),
273
+ title: title,
274
+ focused: focused, isMain: isMain, subrole: subrole
275
+ ))
276
+ }
277
+ axInfoByPid[pid] = infos
278
+ }
279
+
280
+ // Merge AX info into CG windows using multi-signal matching
281
+ for i in 0..<cgWindows.count {
282
+ guard let pid = cgWindows[i]["pid"] as? Int,
283
+ let bounds = cgWindows[i]["bounds"] as? [String: Double],
284
+ let bx = bounds["x"], let by = bounds["y"],
285
+ let bw = bounds["width"], let bh = bounds["height"],
286
+ let infos = axInfoByPid[pid] else {
287
+ continue
288
+ }
289
+
290
+ let cgTitle = cgWindows[i]["title"] as? String ?? ""
291
+
292
+ // Score each AX window — higher score = better match
293
+ var bestScore = 0
294
+ var bestInfo: AXWindowInfo? = nil
295
+
296
+ for info in infos {
297
+ var score = 0
298
+ // Position match (within 2px) — required baseline
299
+ let posMatch = abs(info.posX - bx) < 2 && abs(info.posY - by) < 2
300
+ if !posMatch { continue }
301
+ score += 10
302
+
303
+ // Size match (within 5px)
304
+ if abs(info.width - bw) < 5 && abs(info.height - bh) < 5 {
305
+ score += 5
306
+ }
307
+
308
+ // Title match — strongest signal for disambiguation
309
+ if !cgTitle.isEmpty && !info.title.isEmpty && cgTitle == info.title {
310
+ score += 20
311
+ }
312
+
313
+ if score > bestScore {
314
+ bestScore = score
315
+ bestInfo = info
316
+ }
317
+ }
318
+
319
+ if let info = bestInfo {
320
+ cgWindows[i]["focused"] = info.focused
321
+ cgWindows[i]["isMain"] = info.isMain
322
+ cgWindows[i]["subrole"] = info.subrole
323
+ }
324
+ }
325
+
326
+ return cgWindows
327
+ }
328
+
119
329
  func frontmostApp() -> [String: Any] {
120
330
  guard let app = NSWorkspace.shared.frontmostApplication else {
121
331
  return ["error": "No frontmost application"]