screenhand 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +165 -446
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +3615 -400
  4. package/dist/scripts/export-help-center.js +112 -0
  5. package/dist/scripts/marketing-loop.js +117 -0
  6. package/dist/scripts/observer-daemon.js +288 -0
  7. package/dist/scripts/orchestrator-daemon.js +399 -0
  8. package/dist/scripts/threads-campaign.js +208 -0
  9. package/dist/src/community/fetcher.js +109 -0
  10. package/dist/src/community/index.js +6 -0
  11. package/dist/src/community/publisher.js +191 -0
  12. package/dist/src/community/remote-api.js +121 -0
  13. package/dist/src/community/types.js +3 -0
  14. package/dist/src/community/validator.js +95 -0
  15. package/dist/src/context-tracker.js +489 -0
  16. package/dist/src/ingestion/coverage-auditor.js +233 -0
  17. package/dist/src/ingestion/doc-parser.js +164 -0
  18. package/dist/src/ingestion/index.js +8 -0
  19. package/dist/src/ingestion/menu-scanner.js +152 -0
  20. package/dist/src/ingestion/reference-merger.js +186 -0
  21. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  22. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  23. package/dist/src/ingestion/types.js +3 -0
  24. package/dist/src/jobs/manager.js +82 -14
  25. package/dist/src/jobs/runner.js +138 -15
  26. package/dist/src/learning/engine.js +356 -0
  27. package/dist/src/learning/index.js +9 -0
  28. package/dist/src/learning/locator-policy.js +120 -0
  29. package/dist/src/learning/pattern-policy.js +89 -0
  30. package/dist/src/learning/recovery-policy.js +116 -0
  31. package/dist/src/learning/sensor-policy.js +115 -0
  32. package/dist/src/learning/timing-model.js +204 -0
  33. package/dist/src/learning/topology-policy.js +90 -0
  34. package/dist/src/learning/types.js +9 -0
  35. package/dist/src/logging/timeline-logger.js +4 -1
  36. package/dist/src/memory/playbook-seeds.js +200 -0
  37. package/dist/src/memory/recall.js +60 -8
  38. package/dist/src/memory/service.js +30 -5
  39. package/dist/src/memory/store.js +34 -5
  40. package/dist/src/native/bridge-client.js +253 -31
  41. package/dist/src/observer/state.js +199 -0
  42. package/dist/src/observer/types.js +43 -0
  43. package/dist/src/orchestrator/state.js +68 -0
  44. package/dist/src/orchestrator/types.js +22 -0
  45. package/dist/src/perception/ax-source.js +162 -0
  46. package/dist/src/perception/cdp-source.js +162 -0
  47. package/dist/src/perception/coordinator.js +771 -0
  48. package/dist/src/perception/frame-differ.js +287 -0
  49. package/dist/src/perception/index.js +22 -0
  50. package/dist/src/perception/manager.js +199 -0
  51. package/dist/src/perception/types.js +47 -0
  52. package/dist/src/perception/vision-source.js +399 -0
  53. package/dist/src/planner/deterministic.js +298 -0
  54. package/dist/src/planner/executor.js +870 -0
  55. package/dist/src/planner/goal-store.js +92 -0
  56. package/dist/src/planner/index.js +21 -0
  57. package/dist/src/planner/planner.js +520 -0
  58. package/dist/src/planner/tool-registry.js +71 -0
  59. package/dist/src/planner/types.js +22 -0
  60. package/dist/src/platform/explorer.js +213 -0
  61. package/dist/src/platform/help-center-markdown.js +527 -0
  62. package/dist/src/platform/learner.js +257 -0
  63. package/dist/src/playbook/engine.js +296 -11
  64. package/dist/src/playbook/mcp-recorder.js +204 -0
  65. package/dist/src/playbook/recorder.js +3 -2
  66. package/dist/src/playbook/runner.js +1 -1
  67. package/dist/src/playbook/store.js +139 -10
  68. package/dist/src/recovery/detectors.js +156 -0
  69. package/dist/src/recovery/engine.js +327 -0
  70. package/dist/src/recovery/index.js +20 -0
  71. package/dist/src/recovery/strategies.js +274 -0
  72. package/dist/src/recovery/types.js +20 -0
  73. package/dist/src/runtime/accessibility-adapter.js +55 -18
  74. package/dist/src/runtime/applescript-adapter.js +8 -2
  75. package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
  76. package/dist/src/runtime/executor.js +23 -3
  77. package/dist/src/runtime/locator-cache.js +24 -2
  78. package/dist/src/runtime/service.js +59 -15
  79. package/dist/src/runtime/session-manager.js +4 -1
  80. package/dist/src/runtime/vision-adapter.js +2 -1
  81. package/dist/src/state/app-map-types.js +72 -0
  82. package/dist/src/state/app-map.js +1974 -0
  83. package/dist/src/state/entity-tracker.js +108 -0
  84. package/dist/src/state/fusion.js +96 -0
  85. package/dist/src/state/index.js +21 -0
  86. package/dist/src/state/ladder-generator.js +236 -0
  87. package/dist/src/state/persistence.js +156 -0
  88. package/dist/src/state/types.js +17 -0
  89. package/dist/src/state/world-model.js +1456 -0
  90. package/dist/src/util/atomic-write.js +19 -4
  91. package/dist/src/util/sanitize.js +146 -0
  92. package/dist-app-maps/com.figma.Desktop.json +959 -0
  93. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  94. package/dist-app-maps/notion.id.json +2831 -0
  95. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  96. package/dist-playbooks/codex-desktop.json +76 -0
  97. package/dist-playbooks/competitor-research-stack.json +122 -0
  98. package/dist-playbooks/davinci-color-grade.json +153 -0
  99. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  100. package/dist-playbooks/davinci-render.json +114 -0
  101. package/dist-playbooks/devto.json +52 -0
  102. package/dist-playbooks/discord.json +41 -0
  103. package/dist-playbooks/google-flow-create-project.json +59 -0
  104. package/dist-playbooks/google-flow-edit-image.json +90 -0
  105. package/dist-playbooks/google-flow-edit-video.json +90 -0
  106. package/dist-playbooks/google-flow-generate-image.json +68 -0
  107. package/dist-playbooks/google-flow-generate-video.json +191 -0
  108. package/dist-playbooks/google-flow-open-project.json +48 -0
  109. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  110. package/dist-playbooks/google-flow-search-assets.json +64 -0
  111. package/dist-playbooks/instagram.json +57 -0
  112. package/dist-playbooks/linkedin.json +52 -0
  113. package/dist-playbooks/n8n.json +43 -0
  114. package/dist-playbooks/reddit.json +52 -0
  115. package/dist-playbooks/threads.json +59 -0
  116. package/dist-playbooks/x-twitter.json +59 -0
  117. package/dist-playbooks/youtube.json +59 -0
  118. package/dist-references/canva.json +646 -0
  119. package/dist-references/codex-desktop.json +305 -0
  120. package/dist-references/davinci-resolve-keyboard.json +594 -0
  121. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  122. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  123. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  124. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  125. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  126. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  127. package/dist-references/devpost.json +186 -0
  128. package/dist-references/devto.json +317 -0
  129. package/dist-references/discord.json +549 -0
  130. package/dist-references/figma.json +1186 -0
  131. package/dist-references/finder.json +146 -0
  132. package/dist-references/google-ads-transparency.json +95 -0
  133. package/dist-references/google-flow.json +649 -0
  134. package/dist-references/instagram.json +341 -0
  135. package/dist-references/linkedin.json +324 -0
  136. package/dist-references/meta-ad-library.json +86 -0
  137. package/dist-references/n8n.json +387 -0
  138. package/dist-references/notes.json +27 -0
  139. package/dist-references/notion.json +163 -0
  140. package/dist-references/reddit.json +341 -0
  141. package/dist-references/threads.json +337 -0
  142. package/dist-references/x-twitter.json +403 -0
  143. package/dist-references/youtube.json +373 -0
  144. package/native/macos-bridge/Package.swift +22 -0
  145. package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
  146. package/native/macos-bridge/Sources/AppManagement.swift +339 -0
  147. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
  148. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  149. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  150. package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
  151. package/native/macos-bridge/Sources/main.swift +498 -0
  152. package/native/windows-bridge/AppManagement.cs +234 -0
  153. package/native/windows-bridge/InputBridge.cs +436 -0
  154. package/native/windows-bridge/Program.cs +270 -0
  155. package/native/windows-bridge/ScreenCapture.cs +453 -0
  156. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  157. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  158. package/package.json +12 -1
  159. package/scripts/postinstall.cjs +127 -0
  160. package/dist/.audit-log.jsonl +0 -55
  161. package/dist/.screenhand/memory/.lock +0 -1
  162. package/dist/.screenhand/memory/actions.jsonl +0 -85
  163. package/dist/.screenhand/memory/errors.jsonl +0 -5
  164. package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
  165. package/dist/.screenhand/memory/state.json +0 -35
  166. package/dist/.screenhand/memory/state.json.bak +0 -35
  167. package/dist/.screenhand/memory/strategies.jsonl +0 -12
  168. package/dist/agent/cli.js +0 -73
  169. package/dist/agent/loop.js +0 -258
  170. package/dist/config.js +0 -9
  171. package/dist/index.js +0 -56
  172. package/dist/logging/timeline-logger.js +0 -29
  173. package/dist/mcp/mcp-stdio-server.js +0 -448
  174. package/dist/mcp/server.js +0 -347
  175. package/dist/mcp-entry.js +0 -59
  176. package/dist/memory/recall.js +0 -160
  177. package/dist/memory/research.js +0 -98
  178. package/dist/memory/seeds.js +0 -89
  179. package/dist/memory/session.js +0 -161
  180. package/dist/memory/store.js +0 -391
  181. package/dist/memory/types.js +0 -4
  182. package/dist/monitor/codex-monitor.js +0 -377
  183. package/dist/monitor/task-queue.js +0 -84
  184. package/dist/monitor/types.js +0 -49
  185. package/dist/native/bridge-client.js +0 -174
  186. package/dist/native/macos-bridge-client.js +0 -5
  187. package/dist/npm-publish-helper.js +0 -117
  188. package/dist/npm-token-cdp.js +0 -113
  189. package/dist/npm-token-create.js +0 -135
  190. package/dist/npm-token-finish.js +0 -126
  191. package/dist/playbook/engine.js +0 -193
  192. package/dist/playbook/index.js +0 -4
  193. package/dist/playbook/recorder.js +0 -519
  194. package/dist/playbook/runner.js +0 -392
  195. package/dist/playbook/store.js +0 -166
  196. package/dist/playbook/types.js +0 -4
  197. package/dist/runtime/accessibility-adapter.js +0 -377
  198. package/dist/runtime/app-adapter.js +0 -48
  199. package/dist/runtime/applescript-adapter.js +0 -283
  200. package/dist/runtime/ax-role-map.js +0 -80
  201. package/dist/runtime/browser-adapter.js +0 -36
  202. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  203. package/dist/runtime/composite-adapter.js +0 -205
  204. package/dist/runtime/executor.js +0 -250
  205. package/dist/runtime/locator-cache.js +0 -12
  206. package/dist/runtime/planning-loop.js +0 -47
  207. package/dist/runtime/service.js +0 -372
  208. package/dist/runtime/session-manager.js +0 -28
  209. package/dist/runtime/state-observer.js +0 -105
  210. package/dist/runtime/vision-adapter.js +0 -208
  211. package/dist/test-mcp-protocol.js +0 -138
  212. package/dist/types.js +0 -1
@@ -0,0 +1,537 @@
1
+ import CoreGraphics
2
+ import Foundation
3
+ import AppKit
4
+
5
+ class CoreGraphicsBridge {
6
+
7
+ // MARK: - PID-targeted Event Posting
8
+
9
+ /// Post a CGEvent to a specific process (PID-targeted) or to the global HID stream.
10
+ /// When targetPid is provided, posts the event directly to that process
11
+ /// instead of broadcasting to the frontmost app via the global HID stream.
12
+ private func postEvent(_ event: CGEvent, targetPid: pid_t?) {
13
+ if let pid = targetPid {
14
+ event.postToPid(pid)
15
+ } else {
16
+ event.post(tap: .cghidEventTap)
17
+ }
18
+ }
19
+
20
+ // MARK: - Mouse Events
21
+
22
+ func mouseClick(x: Double, y: Double, button: String, clickCount: Int, modifiers: [String] = [], targetPid: pid_t? = nil) {
23
+ let point = CGPoint(x: x, y: y)
24
+
25
+ let (downType, upType) = mouseButtonTypes(button: button)
26
+ var flags: CGEventFlags = []
27
+ for mod in modifiers {
28
+ switch mod.lowercased() {
29
+ case "cmd", "command", "meta": flags.insert(.maskCommand)
30
+ case "shift": flags.insert(.maskShift)
31
+ case "alt", "option": flags.insert(.maskAlternate)
32
+ case "ctrl", "control": flags.insert(.maskControl)
33
+ default: break
34
+ }
35
+ }
36
+
37
+ // Multi-click (double/triple) must use global HID posting — postToPid drops clickState
38
+ let useGlobal = clickCount > 1
39
+ for i in 1...clickCount {
40
+ if let downEvent = CGEvent(mouseEventSource: nil, mouseType: downType, mouseCursorPosition: point, mouseButton: mouseButton(button)) {
41
+ downEvent.setIntegerValueField(.mouseEventClickState, value: Int64(i))
42
+ if !flags.isEmpty { downEvent.flags = flags }
43
+ if useGlobal {
44
+ downEvent.post(tap: .cghidEventTap)
45
+ } else {
46
+ postEvent(downEvent, targetPid: targetPid)
47
+ }
48
+ }
49
+ usleep(10_000) // 10ms between down and up
50
+ if let upEvent = CGEvent(mouseEventSource: nil, mouseType: upType, mouseCursorPosition: point, mouseButton: mouseButton(button)) {
51
+ upEvent.setIntegerValueField(.mouseEventClickState, value: Int64(i))
52
+ if !flags.isEmpty { upEvent.flags = flags }
53
+ if useGlobal {
54
+ upEvent.post(tap: .cghidEventTap)
55
+ } else {
56
+ postEvent(upEvent, targetPid: targetPid)
57
+ }
58
+ }
59
+ if i < clickCount { usleep(30_000) } // 30ms between clicks (enough for triple-click)
60
+ }
61
+ }
62
+
63
+ func mouseMove(x: Double, y: Double, targetPid: pid_t? = nil) {
64
+ let point = CGPoint(x: x, y: y)
65
+ if let event = CGEvent(mouseEventSource: nil, mouseType: .mouseMoved, mouseCursorPosition: point, mouseButton: .left) {
66
+ postEvent(event, targetPid: targetPid)
67
+ }
68
+ }
69
+
70
+ func mouseDrag(fromX: Double, fromY: Double, toX: Double, toY: Double, modifiers: [String] = [], targetPid: pid_t? = nil) {
71
+ let from = CGPoint(x: fromX, y: fromY)
72
+ let to = CGPoint(x: toX, y: toY)
73
+
74
+ var flags: CGEventFlags = []
75
+ for mod in modifiers {
76
+ switch mod.lowercased() {
77
+ case "cmd", "command", "meta": flags.insert(.maskCommand)
78
+ case "shift": flags.insert(.maskShift)
79
+ case "alt", "option": flags.insert(.maskAlternate)
80
+ case "ctrl", "control": flags.insert(.maskControl)
81
+ default: break
82
+ }
83
+ }
84
+
85
+ // Mouse down at source
86
+ if let downEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: from, mouseButton: .left) {
87
+ if !flags.isEmpty { downEvent.flags = flags }
88
+ postEvent(downEvent, targetPid: targetPid)
89
+ }
90
+ usleep(100_000) // 100ms
91
+
92
+ // Interpolate drag points
93
+ let steps = 10
94
+ for i in 1...steps {
95
+ let t = Double(i) / Double(steps)
96
+ let x = fromX + (toX - fromX) * t
97
+ let y = fromY + (toY - fromY) * t
98
+ let point = CGPoint(x: x, y: y)
99
+ if let dragEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDragged, mouseCursorPosition: point, mouseButton: .left) {
100
+ if !flags.isEmpty { dragEvent.flags = flags }
101
+ postEvent(dragEvent, targetPid: targetPid)
102
+ }
103
+ usleep(20_000) // 20ms between steps
104
+ }
105
+
106
+ // Mouse up at destination
107
+ if let upEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: to, mouseButton: .left) {
108
+ if !flags.isEmpty { upEvent.flags = flags }
109
+ postEvent(upEvent, targetPid: targetPid)
110
+ }
111
+ }
112
+
113
+ /// Press and hold at a position for a duration (milliseconds).
114
+ /// Used for accent character picker, long-press menus, etc.
115
+ func mousePressAndHold(x: Double, y: Double, durationMs: Int, targetPid: pid_t? = nil) {
116
+ let point = CGPoint(x: x, y: y)
117
+
118
+ if let downEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: point, mouseButton: .left) {
119
+ postEvent(downEvent, targetPid: targetPid)
120
+ }
121
+ usleep(UInt32(durationMs) * 1000)
122
+ if let upEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: point, mouseButton: .left) {
123
+ postEvent(upEvent, targetPid: targetPid)
124
+ }
125
+ }
126
+
127
+ /// Key press and hold for a duration (milliseconds).
128
+ /// Used for accent character picker (hold 'e' to get é, è, ê, etc.).
129
+ func keyPressAndHold(key: String, durationMs: Int, targetPid: pid_t? = nil) {
130
+ guard let code = keyCodeForString(key.lowercased()) else { return }
131
+ let source = CoreGraphicsBridge.typingSource
132
+
133
+ if let downEvent = CGEvent(keyboardEventSource: source, virtualKey: code, keyDown: true) {
134
+ postEvent(downEvent, targetPid: targetPid)
135
+ }
136
+ usleep(UInt32(durationMs) * 1000)
137
+ if let upEvent = CGEvent(keyboardEventSource: source, virtualKey: code, keyDown: false) {
138
+ postEvent(upEvent, targetPid: targetPid)
139
+ }
140
+ }
141
+
142
+ /// Fast flick gesture — 3 steps, 5ms gaps. Triggers iOS swipe gestures.
143
+ func mouseFlick(fromX: Double, fromY: Double, toX: Double, toY: Double, targetPid: pid_t? = nil) {
144
+ let from = CGPoint(x: fromX, y: fromY)
145
+ let to = CGPoint(x: toX, y: toY)
146
+
147
+ if let downEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: from, mouseButton: .left) {
148
+ postEvent(downEvent, targetPid: targetPid)
149
+ }
150
+ usleep(10_000) // 10ms
151
+
152
+ // Just 3 fast steps
153
+ for i in 1...3 {
154
+ let t = Double(i) / 3.0
155
+ let point = CGPoint(x: fromX + (toX - fromX) * t, y: fromY + (toY - fromY) * t)
156
+ if let dragEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDragged, mouseCursorPosition: point, mouseButton: .left) {
157
+ postEvent(dragEvent, targetPid: targetPid)
158
+ }
159
+ usleep(5_000) // 5ms
160
+ }
161
+
162
+ if let upEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: to, mouseButton: .left) {
163
+ postEvent(upEvent, targetPid: targetPid)
164
+ }
165
+ }
166
+
167
+ func scroll(x: Double, y: Double, deltaX: Int, deltaY: Int, targetPid: pid_t? = nil) {
168
+ // Move mouse to position first
169
+ mouseMove(x: x, y: y, targetPid: targetPid)
170
+ usleep(50_000)
171
+
172
+ if let scrollEvent = CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 2, wheel1: Int32(deltaY), wheel2: Int32(deltaX), wheel3: 0) {
173
+ postEvent(scrollEvent, targetPid: targetPid)
174
+ }
175
+ }
176
+
177
+ // MARK: - Keyboard Events
178
+
179
+ func keyCombo(keys: [String], targetPid: pid_t? = nil) {
180
+ var modifiers: CGEventFlags = []
181
+ var keyCode: CGKeyCode?
182
+
183
+ for key in keys {
184
+ let lower = key.lowercased()
185
+ switch lower {
186
+ case "cmd", "command", "meta":
187
+ modifiers.insert(.maskCommand)
188
+ case "shift":
189
+ modifiers.insert(.maskShift)
190
+ case "alt", "option":
191
+ modifiers.insert(.maskAlternate)
192
+ case "ctrl", "control":
193
+ modifiers.insert(.maskControl)
194
+ case "fn":
195
+ modifiers.insert(.maskSecondaryFn)
196
+ default:
197
+ keyCode = keyCodeForString(lower)
198
+ }
199
+ }
200
+
201
+ guard let code = keyCode else { return }
202
+
203
+ if let downEvent = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: true) {
204
+ downEvent.flags = modifiers
205
+ postEvent(downEvent, targetPid: targetPid)
206
+ }
207
+ usleep(50_000)
208
+ if let upEvent = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: false) {
209
+ upEvent.flags = modifiers
210
+ postEvent(upEvent, targetPid: targetPid)
211
+ }
212
+ }
213
+
214
+ /// Shared event source for typing — associates events with the current login session
215
+ /// so Cocoa text views (NSTextView, etc.) accept them via the input method pipeline.
216
+ private static let typingSource: CGEventSource? = CGEventSource(stateID: .combinedSessionState)
217
+
218
+ func typeText(text: String, targetPid: pid_t? = nil) {
219
+ let source = CoreGraphicsBridge.typingSource
220
+ for char in text {
221
+ // Handle control characters as real key presses
222
+ if char == "\n" || char == "\r" {
223
+ if let down = CGEvent(keyboardEventSource: source, virtualKey: 36, keyDown: true) { // Return
224
+ postEvent(down, targetPid: targetPid)
225
+ }
226
+ usleep(30_000)
227
+ if let up = CGEvent(keyboardEventSource: source, virtualKey: 36, keyDown: false) {
228
+ postEvent(up, targetPid: targetPid)
229
+ }
230
+ usleep(15_000)
231
+ continue
232
+ }
233
+ if char == "\t" {
234
+ if let down = CGEvent(keyboardEventSource: source, virtualKey: 48, keyDown: true) { // Tab
235
+ postEvent(down, targetPid: targetPid)
236
+ }
237
+ usleep(30_000)
238
+ if let up = CGEvent(keyboardEventSource: source, virtualKey: 48, keyDown: false) {
239
+ postEvent(up, targetPid: targetPid)
240
+ }
241
+ usleep(15_000)
242
+ continue
243
+ }
244
+
245
+ let str = String(char)
246
+ let chars = Array(str.utf16)
247
+ // Use virtualKey 9 (unused on most layouts) for non-ASCII to prevent the
248
+ // input method from resolving virtualKey 0 ('a') and overriding the unicode string.
249
+ let isAscii = char.isASCII
250
+ let vk: CGKeyCode = isAscii ? 0 : 9
251
+
252
+ if let downEvent = CGEvent(keyboardEventSource: source, virtualKey: vk, keyDown: true) {
253
+ downEvent.keyboardSetUnicodeString(stringLength: chars.count, unicodeString: chars)
254
+ postEvent(downEvent, targetPid: targetPid)
255
+ }
256
+ // Non-ASCII needs slightly more time for the input method pipeline to process
257
+ // but keep delays short to avoid bridge timeout on long strings (10s limit)
258
+ usleep(isAscii ? 20_000 : 35_000)
259
+ if let upEvent = CGEvent(keyboardEventSource: source, virtualKey: vk, keyDown: false) {
260
+ upEvent.keyboardSetUnicodeString(stringLength: chars.count, unicodeString: chars)
261
+ postEvent(upEvent, targetPid: targetPid)
262
+ }
263
+ usleep(isAscii ? 10_000 : 20_000)
264
+ }
265
+ }
266
+
267
+ // MARK: - Screenshots
268
+
269
+ /// Track consecutive CG API failures per window to prefer CLI fallback
270
+ private var cgWindowFailures = [Int: Int]()
271
+ private static let CG_FAILURE_THRESHOLD = 2
272
+
273
+ /// Run a capture operation on a background thread with a timeout.
274
+ /// Uses autoreleasepool to prevent CGImage memory accumulation.
275
+ /// CGWindowListCreateImage can block indefinitely when screen recording
276
+ /// permission hasn't been granted, so we need a timeout guard.
277
+ private func timedCapture<T>(timeoutSec: Double = 10, _ work: @escaping () throws -> T) throws -> T {
278
+ let semaphore = DispatchSemaphore(value: 0)
279
+ var result: T?
280
+ var captureError: Error?
281
+
282
+ DispatchQueue.global(qos: .userInitiated).async {
283
+ autoreleasepool {
284
+ do {
285
+ result = try work()
286
+ } catch {
287
+ captureError = error
288
+ }
289
+ }
290
+ semaphore.signal()
291
+ }
292
+
293
+ let waitResult = semaphore.wait(timeout: .now() + timeoutSec)
294
+ if waitResult == .timedOut {
295
+ throw BridgeError.permissionDenied("Screen capture timed out — screen recording permission likely not granted. Grant access in System Settings → Privacy & Security → Screen Recording, then restart.")
296
+ }
297
+ if let err = captureError { throw err }
298
+ return result!
299
+ }
300
+
301
+ func captureScreen(region: [String: Double]?) throws -> [String: Any] {
302
+ // Try CGWindowListCreateImage first (fast, in-process)
303
+ // Fall back to `screencapture` CLI (always has permission as a system binary)
304
+ do {
305
+ return try timedCapture(timeoutSec: 5) {
306
+ let rect: CGRect
307
+ if let region = region {
308
+ rect = CGRect(
309
+ x: region["x"] ?? 0,
310
+ y: region["y"] ?? 0,
311
+ width: region["width"] ?? 0,
312
+ height: region["height"] ?? 0
313
+ )
314
+ } else {
315
+ rect = CGRect.infinite
316
+ }
317
+ guard let image = CGWindowListCreateImage(rect, .optionOnScreenOnly, kCGNullWindowID, .bestResolution) else {
318
+ throw BridgeError.general("CGWindowListCreateImage returned nil")
319
+ }
320
+ let path = try self.saveImage(image)
321
+ return ["path": path, "width": image.width, "height": image.height]
322
+ }
323
+ } catch {
324
+ // Fallback: use macOS screencapture CLI
325
+ return try screencaptureCliFullscreen(region: region)
326
+ }
327
+ }
328
+
329
+ func captureWindow(windowId: Int, safeCLI: Bool = false) throws -> [String: Any] {
330
+ // safeCLI=true: always use CLI (for browser windows that crash CG API)
331
+ if safeCLI {
332
+ return try screencaptureCliWindow(windowId: windowId)
333
+ }
334
+
335
+ // If CG API has been crashing for this window, go straight to CLI fallback
336
+ let failures = cgWindowFailures[windowId] ?? 0
337
+ if failures >= CoreGraphicsBridge.CG_FAILURE_THRESHOLD {
338
+ return try screencaptureCliWindow(windowId: windowId)
339
+ }
340
+
341
+ do {
342
+ let result: [String: Any] = try timedCapture(timeoutSec: 5) {
343
+ guard let image = CGWindowListCreateImage(
344
+ .null, .optionIncludingWindow, CGWindowID(windowId), [.bestResolution, .boundsIgnoreFraming]
345
+ ) else {
346
+ throw BridgeError.general("CGWindowListCreateImage returned nil for window \(windowId)")
347
+ }
348
+ let path = try self.saveImage(image)
349
+ return ["path": path, "width": image.width, "height": image.height]
350
+ }
351
+ // CG API succeeded — reset failure counter
352
+ cgWindowFailures[windowId] = 0
353
+ return result
354
+ } catch {
355
+ // Track CG failure so we prefer CLI next time
356
+ cgWindowFailures[windowId] = failures + 1
357
+ // Fallback: use screencapture -l (runs in subprocess, crash-safe)
358
+ return try screencaptureCliWindow(windowId: windowId)
359
+ }
360
+ }
361
+
362
+ /// Fallback screenshot using macOS `screencapture` CLI (always has permission).
363
+ /// Runs in a subprocess — crash-safe even for GPU-heavy windows.
364
+ private func screencaptureCliFullscreen(region: [String: Double]?) throws -> [String: Any] {
365
+ let tempDir = FileManager.default.temporaryDirectory
366
+ let fileName = "bridge_screenshot_\(UUID().uuidString).png"
367
+ let fileURL = tempDir.appendingPathComponent(fileName)
368
+
369
+ var args = ["-x", fileURL.path] // -x = no sound
370
+ if let r = region {
371
+ let x = Int(r["x"] ?? 0)
372
+ let y = Int(r["y"] ?? 0)
373
+ let w = Int(r["width"] ?? 0)
374
+ let h = Int(r["height"] ?? 0)
375
+ args = ["-x", "-R", "\(x),\(y),\(w),\(h)", fileURL.path]
376
+ }
377
+
378
+ let process = Process()
379
+ process.executableURL = URL(fileURLWithPath: "/usr/sbin/screencapture")
380
+ process.arguments = args
381
+ try process.run()
382
+ process.waitUntilExit()
383
+
384
+ guard process.terminationStatus == 0 else {
385
+ throw BridgeError.general("screencapture failed with exit code \(process.terminationStatus)")
386
+ }
387
+
388
+ return readImageDimensions(fileURL: fileURL)
389
+ }
390
+
391
+ /// Fallback window capture using `screencapture -l <windowId>`.
392
+ /// Runs in a subprocess — crash-safe even for GPU-heavy windows.
393
+ private func screencaptureCliWindow(windowId: Int) throws -> [String: Any] {
394
+ let tempDir = FileManager.default.temporaryDirectory
395
+ let fileName = "bridge_screenshot_\(UUID().uuidString).png"
396
+ let fileURL = tempDir.appendingPathComponent(fileName)
397
+
398
+ let process = Process()
399
+ process.executableURL = URL(fileURLWithPath: "/usr/sbin/screencapture")
400
+ process.arguments = ["-x", "-l", String(windowId), fileURL.path]
401
+ try process.run()
402
+ process.waitUntilExit()
403
+
404
+ guard process.terminationStatus == 0 else {
405
+ throw BridgeError.general("screencapture -l failed with exit code \(process.terminationStatus)")
406
+ }
407
+
408
+ return readImageDimensions(fileURL: fileURL)
409
+ }
410
+
411
+ /// Read image dimensions from a file.
412
+ private func readImageDimensions(fileURL: URL) -> [String: Any] {
413
+ guard let image = NSImage(contentsOf: fileURL),
414
+ let cgImage = image.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
415
+ return ["path": fileURL.path, "width": 0, "height": 0]
416
+ }
417
+ return ["path": fileURL.path, "width": cgImage.width, "height": cgImage.height]
418
+ }
419
+
420
+ /// Capture a window and return the image as an in-memory base64 PNG string.
421
+ /// Avoids disk I/O — useful for high-frequency perception (vision diffs).
422
+ /// Falls back to file-based capture if CG API fails.
423
+ func captureWindowBuffer(windowId: Int, safeCLI: Bool = false) throws -> [String: Any] {
424
+ // safeCLI=true: always use CLI (for browser windows that crash CG API)
425
+ if safeCLI {
426
+ return try captureWindowBufferViaFile(windowId: windowId)
427
+ }
428
+
429
+ // If CG API keeps failing, fall back to file-based capture + base64 encode
430
+ let failures = cgWindowFailures[windowId] ?? 0
431
+ if failures >= CoreGraphicsBridge.CG_FAILURE_THRESHOLD {
432
+ return try captureWindowBufferViaFile(windowId: windowId)
433
+ }
434
+
435
+ do {
436
+ let result: [String: Any] = try timedCapture(timeoutSec: 5) {
437
+ guard let image = CGWindowListCreateImage(
438
+ .null, .optionIncludingWindow, CGWindowID(windowId), [.bestResolution, .boundsIgnoreFraming]
439
+ ) else {
440
+ throw BridgeError.general("CGWindowListCreateImage returned nil for window \(windowId)")
441
+ }
442
+
443
+ // Encode CGImage → PNG Data in memory (no temp file)
444
+ let mutableData = NSMutableData()
445
+ guard let dest = CGImageDestinationCreateWithData(mutableData as CFMutableData, "public.png" as CFString, 1, nil) else {
446
+ throw BridgeError.general("Failed to create in-memory image destination")
447
+ }
448
+ CGImageDestinationAddImage(dest, image, nil)
449
+ guard CGImageDestinationFinalize(dest) else {
450
+ throw BridgeError.general("Failed to encode PNG to memory buffer")
451
+ }
452
+
453
+ let base64 = (mutableData as Data).base64EncodedString()
454
+ return ["base64": base64, "width": image.width, "height": image.height]
455
+ }
456
+ cgWindowFailures[windowId] = 0
457
+ return result
458
+ } catch {
459
+ cgWindowFailures[windowId] = (cgWindowFailures[windowId] ?? 0) + 1
460
+ return try captureWindowBufferViaFile(windowId: windowId)
461
+ }
462
+ }
463
+
464
+ /// Fallback for captureWindowBuffer: capture to file via CLI, then read+encode.
465
+ private func captureWindowBufferViaFile(windowId: Int) throws -> [String: Any] {
466
+ let fileResult = try captureWindow(windowId: windowId)
467
+ guard let path = fileResult["path"] as? String else {
468
+ throw BridgeError.general("captureWindow fallback returned no path")
469
+ }
470
+ let url = URL(fileURLWithPath: path)
471
+ let data = try Data(contentsOf: url)
472
+ let base64 = data.base64EncodedString()
473
+ let width = fileResult["width"] as? Int ?? 0
474
+ let height = fileResult["height"] as? Int ?? 0
475
+ // Clean up temp file
476
+ try? FileManager.default.removeItem(at: url)
477
+ return ["base64": base64, "width": width, "height": height]
478
+ }
479
+
480
+ private func saveImage(_ image: CGImage) throws -> String {
481
+ let tempDir = FileManager.default.temporaryDirectory
482
+ let fileName = "bridge_screenshot_\(UUID().uuidString).png"
483
+ let fileURL = tempDir.appendingPathComponent(fileName)
484
+
485
+ guard let dest = CGImageDestinationCreateWithURL(fileURL as CFURL, "public.png" as CFString, 1, nil) else {
486
+ throw BridgeError.general("Failed to create image destination")
487
+ }
488
+ CGImageDestinationAddImage(dest, image, nil)
489
+ guard CGImageDestinationFinalize(dest) else {
490
+ throw BridgeError.general("Failed to write screenshot")
491
+ }
492
+
493
+ return fileURL.path
494
+ }
495
+
496
+ // MARK: - Key Code Mapping
497
+
498
+ private func mouseButtonTypes(button: String) -> (CGEventType, CGEventType) {
499
+ switch button.lowercased() {
500
+ case "right":
501
+ return (.rightMouseDown, .rightMouseUp)
502
+ case "other", "middle":
503
+ return (.otherMouseDown, .otherMouseUp)
504
+ default:
505
+ return (.leftMouseDown, .leftMouseUp)
506
+ }
507
+ }
508
+
509
+ private func mouseButton(_ button: String) -> CGMouseButton {
510
+ switch button.lowercased() {
511
+ case "right": return .right
512
+ case "other", "middle": return .center
513
+ default: return .left
514
+ }
515
+ }
516
+
517
+ private func keyCodeForString(_ key: String) -> CGKeyCode? {
518
+ let keyMap: [String: CGKeyCode] = [
519
+ "a": 0, "b": 11, "c": 8, "d": 2, "e": 14, "f": 3, "g": 5,
520
+ "h": 4, "i": 34, "j": 38, "k": 40, "l": 37, "m": 46, "n": 45,
521
+ "o": 31, "p": 35, "q": 12, "r": 15, "s": 1, "t": 17, "u": 32,
522
+ "v": 9, "w": 13, "x": 7, "y": 16, "z": 6,
523
+ "0": 29, "1": 18, "2": 19, "3": 20, "4": 21, "5": 23,
524
+ "6": 22, "7": 26, "8": 28, "9": 25,
525
+ "return": 36, "enter": 36, "tab": 48, "space": 49,
526
+ "delete": 51, "backspace": 51, "escape": 53, "esc": 53,
527
+ "up": 126, "down": 125, "left": 123, "right": 124,
528
+ "f1": 122, "f2": 120, "f3": 99, "f4": 118, "f5": 96,
529
+ "f6": 97, "f7": 98, "f8": 100, "f9": 101, "f10": 109,
530
+ "f11": 103, "f12": 111,
531
+ "home": 115, "end": 119, "pageup": 116, "pagedown": 121,
532
+ "-": 27, "=": 24, "[": 33, "]": 30, "\\": 42,
533
+ ";": 41, "'": 39, ",": 43, ".": 47, "/": 44, "`": 50,
534
+ ]
535
+ return keyMap[key]
536
+ }
537
+ }
@@ -0,0 +1,120 @@
1
+ import ApplicationServices
2
+ import Foundation
3
+
4
+ class ObserverBridge {
5
+ private var observers: [pid_t: AXObserver] = [:]
6
+ var onEvent: (([String: Any]) -> Void)?
7
+
8
+ private let defaultNotifications: [String] = [
9
+ kAXValueChangedNotification,
10
+ kAXFocusedUIElementChangedNotification,
11
+ kAXWindowCreatedNotification,
12
+ kAXUIElementDestroyedNotification,
13
+ kAXTitleChangedNotification,
14
+ kAXMenuOpenedNotification,
15
+ kAXSelectedTextChangedNotification,
16
+ kAXLayoutChangedNotification,
17
+ ]
18
+
19
+ func startObserving(pid: pid_t, notifications: [String]?) throws {
20
+ // Stop existing observer for this PID if any
21
+ stopObserving(pid: pid)
22
+
23
+ var observer: AXObserver?
24
+ let result = AXObserverCreate(pid, observerCallback, &observer)
25
+ guard result == .success, let obs = observer else {
26
+ throw BridgeError.general("Failed to create AX observer for PID \(pid), code \(result.rawValue)")
27
+ }
28
+
29
+ let appElement = AXUIElementCreateApplication(pid)
30
+ let notifs = notifications ?? defaultNotifications
31
+
32
+ for notif in notifs {
33
+ // Pass self pointer as refcon for callback
34
+ let refcon = Unmanaged.passUnretained(self).toOpaque()
35
+ AXObserverAddNotification(obs, appElement, notif as CFString, refcon)
36
+ }
37
+
38
+ CFRunLoopAddSource(
39
+ CFRunLoopGetMain(),
40
+ AXObserverGetRunLoopSource(obs),
41
+ .defaultMode
42
+ )
43
+
44
+ observers[pid] = obs
45
+ }
46
+
47
+ func stopObserving(pid: pid_t) {
48
+ guard let observer = observers[pid] else { return }
49
+ CFRunLoopRemoveSource(
50
+ CFRunLoopGetMain(),
51
+ AXObserverGetRunLoopSource(observer),
52
+ .defaultMode
53
+ )
54
+ observers.removeValue(forKey: pid)
55
+ }
56
+
57
+ func handleNotification(observer: AXObserver, element: AXUIElement, notification: String) {
58
+ var event: [String: Any] = [
59
+ "type": mapNotificationType(notification),
60
+ "notification": notification,
61
+ "timestamp": ISO8601DateFormatter().string(from: Date()),
62
+ ]
63
+
64
+ // Get PID
65
+ var pid: pid_t = 0
66
+ AXUIElementGetPid(element, &pid)
67
+ event["pid"] = Int(pid)
68
+
69
+ // Get element role
70
+ var roleValue: AnyObject?
71
+ if AXUIElementCopyAttributeValue(element, kAXRoleAttribute as CFString, &roleValue) == .success {
72
+ event["elementRole"] = roleValue as? String
73
+ }
74
+
75
+ // Get element title
76
+ var titleValue: AnyObject?
77
+ if AXUIElementCopyAttributeValue(element, kAXTitleAttribute as CFString, &titleValue) == .success {
78
+ event["elementLabel"] = titleValue as? String
79
+ }
80
+
81
+ // Get element value for value_changed
82
+ if notification == kAXValueChangedNotification {
83
+ var valObj: AnyObject?
84
+ if AXUIElementCopyAttributeValue(element, kAXValueAttribute as CFString, &valObj) == .success {
85
+ event["newValue"] = "\(valObj!)"
86
+ }
87
+ }
88
+
89
+ onEvent?(event)
90
+ }
91
+
92
+ private func mapNotificationType(_ notification: String) -> String {
93
+ switch notification {
94
+ case kAXValueChangedNotification: return "value_changed"
95
+ case kAXFocusedUIElementChangedNotification: return "focus_changed"
96
+ case kAXWindowCreatedNotification: return "window_created"
97
+ case kAXUIElementDestroyedNotification: return "window_closed"
98
+ case kAXTitleChangedNotification: return "title_changed"
99
+ case kAXMenuOpenedNotification: return "menu_opened"
100
+ case kAXLayoutChangedNotification: return "layout_changed"
101
+ default: return notification
102
+ }
103
+ }
104
+ }
105
+
106
+ /// C callback for AXObserver notifications.
107
+ private func observerCallback(
108
+ observer: AXObserver,
109
+ element: AXUIElement,
110
+ notification: CFString,
111
+ refcon: UnsafeMutableRawPointer?
112
+ ) {
113
+ guard let refcon = refcon else { return }
114
+ let bridge = Unmanaged<ObserverBridge>.fromOpaque(refcon).takeUnretainedValue()
115
+ bridge.handleNotification(
116
+ observer: observer,
117
+ element: element,
118
+ notification: notification as String
119
+ )
120
+ }