screenhand 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +193 -109
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +5876 -0
  4. package/dist/scripts/codex-monitor-daemon.js +335 -0
  5. package/dist/scripts/export-help-center.js +112 -0
  6. package/dist/scripts/marketing-loop.js +117 -0
  7. package/dist/scripts/observer-daemon.js +288 -0
  8. package/dist/scripts/orchestrator-daemon.js +399 -0
  9. package/dist/scripts/supervisor-daemon.js +272 -0
  10. package/dist/scripts/threads-campaign.js +208 -0
  11. package/dist/scripts/worker-daemon.js +228 -0
  12. package/dist/src/agent/cli.js +82 -0
  13. package/dist/src/agent/loop.js +274 -0
  14. package/dist/src/community/fetcher.js +109 -0
  15. package/dist/src/community/index.js +6 -0
  16. package/dist/src/community/publisher.js +191 -0
  17. package/dist/src/community/remote-api.js +121 -0
  18. package/dist/src/community/types.js +3 -0
  19. package/dist/src/community/validator.js +95 -0
  20. package/{src/config.ts → dist/src/config.js} +5 -10
  21. package/dist/src/context-tracker.js +489 -0
  22. package/{src/index.ts → dist/src/index.js} +32 -52
  23. package/dist/src/ingestion/coverage-auditor.js +233 -0
  24. package/dist/src/ingestion/doc-parser.js +164 -0
  25. package/dist/src/ingestion/index.js +8 -0
  26. package/dist/src/ingestion/menu-scanner.js +152 -0
  27. package/dist/src/ingestion/reference-merger.js +186 -0
  28. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  29. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  30. package/dist/src/ingestion/types.js +3 -0
  31. package/dist/src/jobs/manager.js +305 -0
  32. package/dist/src/jobs/runner.js +806 -0
  33. package/dist/src/jobs/store.js +102 -0
  34. package/dist/src/jobs/types.js +30 -0
  35. package/dist/src/jobs/worker.js +97 -0
  36. package/dist/src/learning/engine.js +356 -0
  37. package/dist/src/learning/index.js +9 -0
  38. package/dist/src/learning/locator-policy.js +120 -0
  39. package/dist/src/learning/pattern-policy.js +89 -0
  40. package/dist/src/learning/recovery-policy.js +116 -0
  41. package/dist/src/learning/sensor-policy.js +115 -0
  42. package/dist/src/learning/timing-model.js +204 -0
  43. package/dist/src/learning/topology-policy.js +90 -0
  44. package/dist/src/learning/types.js +9 -0
  45. package/dist/src/logging/timeline-logger.js +48 -0
  46. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  47. package/dist/src/mcp/server.js +363 -0
  48. package/dist/src/mcp-entry.js +60 -0
  49. package/dist/src/memory/playbook-seeds.js +200 -0
  50. package/dist/src/memory/recall.js +222 -0
  51. package/dist/src/memory/research.js +104 -0
  52. package/dist/src/memory/seeds.js +101 -0
  53. package/dist/src/memory/service.js +446 -0
  54. package/dist/src/memory/session.js +169 -0
  55. package/dist/src/memory/store.js +451 -0
  56. package/{src/runtime/locator-cache.ts → dist/src/memory/types.js} +1 -17
  57. package/dist/src/monitor/codex-monitor.js +382 -0
  58. package/dist/src/monitor/task-queue.js +97 -0
  59. package/dist/src/monitor/types.js +62 -0
  60. package/dist/src/native/bridge-client.js +412 -0
  61. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  62. package/dist/src/observer/state.js +199 -0
  63. package/dist/src/observer/types.js +43 -0
  64. package/dist/src/orchestrator/state.js +68 -0
  65. package/dist/src/orchestrator/types.js +22 -0
  66. package/dist/src/perception/ax-source.js +162 -0
  67. package/dist/src/perception/cdp-source.js +162 -0
  68. package/dist/src/perception/coordinator.js +771 -0
  69. package/dist/src/perception/frame-differ.js +287 -0
  70. package/dist/src/perception/index.js +22 -0
  71. package/dist/src/perception/manager.js +199 -0
  72. package/dist/src/perception/types.js +47 -0
  73. package/dist/src/perception/vision-source.js +399 -0
  74. package/dist/src/planner/deterministic.js +298 -0
  75. package/dist/src/planner/executor.js +870 -0
  76. package/dist/src/planner/goal-store.js +92 -0
  77. package/dist/src/planner/index.js +21 -0
  78. package/dist/src/planner/planner.js +520 -0
  79. package/dist/src/planner/tool-registry.js +71 -0
  80. package/dist/src/planner/types.js +22 -0
  81. package/dist/src/platform/explorer.js +213 -0
  82. package/dist/src/platform/help-center-markdown.js +527 -0
  83. package/dist/src/platform/learner.js +257 -0
  84. package/dist/src/playbook/engine.js +486 -0
  85. package/dist/src/playbook/index.js +20 -0
  86. package/dist/src/playbook/mcp-recorder.js +204 -0
  87. package/dist/src/playbook/recorder.js +536 -0
  88. package/dist/src/playbook/runner.js +408 -0
  89. package/dist/src/playbook/store.js +312 -0
  90. package/dist/src/playbook/types.js +17 -0
  91. package/dist/src/recovery/detectors.js +156 -0
  92. package/dist/src/recovery/engine.js +327 -0
  93. package/dist/src/recovery/index.js +20 -0
  94. package/dist/src/recovery/strategies.js +274 -0
  95. package/dist/src/recovery/types.js +20 -0
  96. package/dist/src/runtime/accessibility-adapter.js +430 -0
  97. package/dist/src/runtime/app-adapter.js +64 -0
  98. package/dist/src/runtime/applescript-adapter.js +305 -0
  99. package/dist/src/runtime/ax-role-map.js +96 -0
  100. package/dist/src/runtime/browser-adapter.js +52 -0
  101. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  102. package/dist/src/runtime/composite-adapter.js +221 -0
  103. package/dist/src/runtime/execution-contract.js +159 -0
  104. package/dist/src/runtime/executor.js +286 -0
  105. package/dist/src/runtime/locator-cache.js +50 -0
  106. package/dist/src/runtime/planning-loop.js +63 -0
  107. package/dist/src/runtime/service.js +432 -0
  108. package/dist/src/runtime/session-manager.js +63 -0
  109. package/dist/src/runtime/state-observer.js +121 -0
  110. package/dist/src/runtime/vision-adapter.js +225 -0
  111. package/dist/src/state/app-map-types.js +72 -0
  112. package/dist/src/state/app-map.js +1974 -0
  113. package/dist/src/state/entity-tracker.js +108 -0
  114. package/dist/src/state/fusion.js +96 -0
  115. package/dist/src/state/index.js +21 -0
  116. package/dist/src/state/ladder-generator.js +236 -0
  117. package/dist/src/state/persistence.js +156 -0
  118. package/dist/src/state/types.js +17 -0
  119. package/dist/src/state/world-model.js +1456 -0
  120. package/dist/src/supervisor/locks.js +186 -0
  121. package/dist/src/supervisor/supervisor.js +403 -0
  122. package/dist/src/supervisor/types.js +30 -0
  123. package/dist/src/test-mcp-protocol.js +154 -0
  124. package/dist/src/types.js +17 -0
  125. package/dist/src/util/atomic-write.js +133 -0
  126. package/dist/src/util/sanitize.js +146 -0
  127. package/dist-app-maps/com.figma.Desktop.json +959 -0
  128. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  129. package/dist-app-maps/notion.id.json +2831 -0
  130. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  131. package/dist-playbooks/codex-desktop.json +76 -0
  132. package/dist-playbooks/competitor-research-stack.json +122 -0
  133. package/dist-playbooks/davinci-color-grade.json +153 -0
  134. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  135. package/dist-playbooks/davinci-render.json +114 -0
  136. package/dist-playbooks/devto.json +52 -0
  137. package/dist-playbooks/discord.json +41 -0
  138. package/dist-playbooks/google-flow-create-project.json +59 -0
  139. package/dist-playbooks/google-flow-edit-image.json +90 -0
  140. package/dist-playbooks/google-flow-edit-video.json +90 -0
  141. package/dist-playbooks/google-flow-generate-image.json +68 -0
  142. package/dist-playbooks/google-flow-generate-video.json +191 -0
  143. package/dist-playbooks/google-flow-open-project.json +48 -0
  144. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  145. package/dist-playbooks/google-flow-search-assets.json +64 -0
  146. package/dist-playbooks/instagram.json +57 -0
  147. package/dist-playbooks/linkedin.json +52 -0
  148. package/dist-playbooks/n8n.json +43 -0
  149. package/dist-playbooks/reddit.json +52 -0
  150. package/dist-playbooks/threads.json +59 -0
  151. package/dist-playbooks/x-twitter.json +59 -0
  152. package/dist-playbooks/youtube.json +59 -0
  153. package/dist-references/canva.json +646 -0
  154. package/dist-references/codex-desktop.json +305 -0
  155. package/dist-references/davinci-resolve-keyboard.json +594 -0
  156. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  157. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  158. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  159. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  160. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  161. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  162. package/dist-references/devto.json +317 -0
  163. package/dist-references/discord.json +549 -0
  164. package/dist-references/figma.json +1186 -0
  165. package/dist-references/finder.json +146 -0
  166. package/dist-references/google-ads-transparency.json +95 -0
  167. package/dist-references/google-flow.json +649 -0
  168. package/dist-references/instagram.json +341 -0
  169. package/dist-references/linkedin.json +324 -0
  170. package/dist-references/meta-ad-library.json +86 -0
  171. package/dist-references/n8n.json +387 -0
  172. package/dist-references/notes.json +27 -0
  173. package/dist-references/notion.json +163 -0
  174. package/dist-references/reddit.json +341 -0
  175. package/dist-references/threads.json +337 -0
  176. package/dist-references/x-twitter.json +403 -0
  177. package/dist-references/youtube.json +373 -0
  178. package/native/macos-bridge/Package.swift +1 -0
  179. package/native/macos-bridge/Sources/AccessibilityBridge.swift +257 -36
  180. package/native/macos-bridge/Sources/AppManagement.swift +212 -2
  181. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +348 -53
  182. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  183. package/native/macos-bridge/Sources/VisionBridge.swift +165 -7
  184. package/native/macos-bridge/Sources/main.swift +169 -16
  185. package/native/windows-bridge/Program.cs +5 -0
  186. package/native/windows-bridge/ScreenCapture.cs +124 -0
  187. package/package.json +29 -4
  188. package/scripts/postinstall.cjs +127 -0
  189. package/.claude/commands/automate.md +0 -28
  190. package/.claude/commands/debug-ui.md +0 -19
  191. package/.claude/commands/screenshot.md +0 -15
  192. package/.github/FUNDING.yml +0 -1
  193. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  194. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  195. package/.mcp.json +0 -8
  196. package/DESKTOP_MCP_GUIDE.md +0 -92
  197. package/SECURITY.md +0 -44
  198. package/docs/architecture.md +0 -47
  199. package/install-skills.sh +0 -19
  200. package/mcp-bridge.ts +0 -271
  201. package/mcp-desktop.ts +0 -1221
  202. package/playbooks/instagram.json +0 -41
  203. package/playbooks/instagram_v2.json +0 -201
  204. package/playbooks/x_v1.json +0 -211
  205. package/scripts/devpost-live-loop.mjs +0 -421
  206. package/src/logging/timeline-logger.ts +0 -55
  207. package/src/mcp/server.ts +0 -449
  208. package/src/memory/recall.ts +0 -191
  209. package/src/memory/research.ts +0 -146
  210. package/src/memory/seeds.ts +0 -123
  211. package/src/memory/session.ts +0 -201
  212. package/src/memory/store.ts +0 -434
  213. package/src/memory/types.ts +0 -69
  214. package/src/native/bridge-client.ts +0 -239
  215. package/src/runtime/accessibility-adapter.ts +0 -487
  216. package/src/runtime/app-adapter.ts +0 -169
  217. package/src/runtime/applescript-adapter.ts +0 -376
  218. package/src/runtime/ax-role-map.ts +0 -102
  219. package/src/runtime/browser-adapter.ts +0 -129
  220. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  221. package/src/runtime/composite-adapter.ts +0 -274
  222. package/src/runtime/executor.ts +0 -396
  223. package/src/runtime/planning-loop.ts +0 -81
  224. package/src/runtime/service.ts +0 -448
  225. package/src/runtime/session-manager.ts +0 -50
  226. package/src/runtime/state-observer.ts +0 -136
  227. package/src/runtime/vision-adapter.ts +0 -297
  228. package/src/types.ts +0 -297
  229. package/tests/bridge-client.test.ts +0 -176
  230. package/tests/browser-stealth.test.ts +0 -210
  231. package/tests/composite-adapter.test.ts +0 -64
  232. package/tests/mcp-server.test.ts +0 -151
  233. package/tests/memory-recall.test.ts +0 -339
  234. package/tests/memory-research.test.ts +0 -159
  235. package/tests/memory-seeds.test.ts +0 -120
  236. package/tests/memory-store.test.ts +0 -392
  237. package/tests/types.test.ts +0 -92
  238. package/tsconfig.check.json +0 -17
  239. package/tsconfig.json +0 -19
  240. package/vitest.config.ts +0 -8
  241. /package/{playbooks → dist-references}/devpost.json +0 -0
@@ -4,40 +4,88 @@ import AppKit
4
4
 
5
5
  class CoreGraphicsBridge {
6
6
 
7
+ // MARK: - PID-targeted Event Posting
8
+
9
+ /// Post a CGEvent to a specific process (PID-targeted) or to the global HID stream.
10
+ /// When targetPid is provided, posts the event directly to that process
11
+ /// instead of broadcasting to the frontmost app via the global HID stream.
12
+ private func postEvent(_ event: CGEvent, targetPid: pid_t?) {
13
+ if let pid = targetPid {
14
+ event.postToPid(pid)
15
+ } else {
16
+ event.post(tap: .cghidEventTap)
17
+ }
18
+ }
19
+
7
20
  // MARK: - Mouse Events
8
21
 
9
- func mouseClick(x: Double, y: Double, button: String, clickCount: Int) {
22
+ func mouseClick(x: Double, y: Double, button: String, clickCount: Int, modifiers: [String] = [], targetPid: pid_t? = nil) {
10
23
  let point = CGPoint(x: x, y: y)
11
24
 
12
25
  let (downType, upType) = mouseButtonTypes(button: button)
26
+ var flags: CGEventFlags = []
27
+ for mod in modifiers {
28
+ switch mod.lowercased() {
29
+ case "cmd", "command", "meta": flags.insert(.maskCommand)
30
+ case "shift": flags.insert(.maskShift)
31
+ case "alt", "option": flags.insert(.maskAlternate)
32
+ case "ctrl", "control": flags.insert(.maskControl)
33
+ default: break
34
+ }
35
+ }
13
36
 
14
- for _ in 0..<clickCount {
37
+ // Multi-click (double/triple) must use global HID posting — postToPid drops clickState
38
+ let useGlobal = clickCount > 1
39
+ for i in 1...clickCount {
15
40
  if let downEvent = CGEvent(mouseEventSource: nil, mouseType: downType, mouseCursorPosition: point, mouseButton: mouseButton(button)) {
16
- downEvent.setIntegerValueField(.mouseEventClickState, value: Int64(clickCount))
17
- downEvent.post(tap: .cghidEventTap)
41
+ downEvent.setIntegerValueField(.mouseEventClickState, value: Int64(i))
42
+ if !flags.isEmpty { downEvent.flags = flags }
43
+ if useGlobal {
44
+ downEvent.post(tap: .cghidEventTap)
45
+ } else {
46
+ postEvent(downEvent, targetPid: targetPid)
47
+ }
18
48
  }
19
- usleep(50_000) // 50ms between down and up
49
+ usleep(10_000) // 10ms between down and up
20
50
  if let upEvent = CGEvent(mouseEventSource: nil, mouseType: upType, mouseCursorPosition: point, mouseButton: mouseButton(button)) {
21
- upEvent.setIntegerValueField(.mouseEventClickState, value: Int64(clickCount))
22
- upEvent.post(tap: .cghidEventTap)
51
+ upEvent.setIntegerValueField(.mouseEventClickState, value: Int64(i))
52
+ if !flags.isEmpty { upEvent.flags = flags }
53
+ if useGlobal {
54
+ upEvent.post(tap: .cghidEventTap)
55
+ } else {
56
+ postEvent(upEvent, targetPid: targetPid)
57
+ }
23
58
  }
59
+ if i < clickCount { usleep(30_000) } // 30ms between clicks (enough for triple-click)
24
60
  }
25
61
  }
26
62
 
27
- func mouseMove(x: Double, y: Double) {
63
+ func mouseMove(x: Double, y: Double, targetPid: pid_t? = nil) {
28
64
  let point = CGPoint(x: x, y: y)
29
65
  if let event = CGEvent(mouseEventSource: nil, mouseType: .mouseMoved, mouseCursorPosition: point, mouseButton: .left) {
30
- event.post(tap: .cghidEventTap)
66
+ postEvent(event, targetPid: targetPid)
31
67
  }
32
68
  }
33
69
 
34
- func mouseDrag(fromX: Double, fromY: Double, toX: Double, toY: Double) {
70
+ func mouseDrag(fromX: Double, fromY: Double, toX: Double, toY: Double, modifiers: [String] = [], targetPid: pid_t? = nil) {
35
71
  let from = CGPoint(x: fromX, y: fromY)
36
72
  let to = CGPoint(x: toX, y: toY)
37
73
 
74
+ var flags: CGEventFlags = []
75
+ for mod in modifiers {
76
+ switch mod.lowercased() {
77
+ case "cmd", "command", "meta": flags.insert(.maskCommand)
78
+ case "shift": flags.insert(.maskShift)
79
+ case "alt", "option": flags.insert(.maskAlternate)
80
+ case "ctrl", "control": flags.insert(.maskControl)
81
+ default: break
82
+ }
83
+ }
84
+
38
85
  // Mouse down at source
39
86
  if let downEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: from, mouseButton: .left) {
40
- downEvent.post(tap: .cghidEventTap)
87
+ if !flags.isEmpty { downEvent.flags = flags }
88
+ postEvent(downEvent, targetPid: targetPid)
41
89
  }
42
90
  usleep(100_000) // 100ms
43
91
 
@@ -49,24 +97,55 @@ class CoreGraphicsBridge {
49
97
  let y = fromY + (toY - fromY) * t
50
98
  let point = CGPoint(x: x, y: y)
51
99
  if let dragEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDragged, mouseCursorPosition: point, mouseButton: .left) {
52
- dragEvent.post(tap: .cghidEventTap)
100
+ if !flags.isEmpty { dragEvent.flags = flags }
101
+ postEvent(dragEvent, targetPid: targetPid)
53
102
  }
54
103
  usleep(20_000) // 20ms between steps
55
104
  }
56
105
 
57
106
  // Mouse up at destination
58
107
  if let upEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: to, mouseButton: .left) {
59
- upEvent.post(tap: .cghidEventTap)
108
+ if !flags.isEmpty { upEvent.flags = flags }
109
+ postEvent(upEvent, targetPid: targetPid)
110
+ }
111
+ }
112
+
113
+ /// Press and hold at a position for a duration (milliseconds).
114
+ /// Used for accent character picker, long-press menus, etc.
115
+ func mousePressAndHold(x: Double, y: Double, durationMs: Int, targetPid: pid_t? = nil) {
116
+ let point = CGPoint(x: x, y: y)
117
+
118
+ if let downEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: point, mouseButton: .left) {
119
+ postEvent(downEvent, targetPid: targetPid)
120
+ }
121
+ usleep(UInt32(durationMs) * 1000)
122
+ if let upEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: point, mouseButton: .left) {
123
+ postEvent(upEvent, targetPid: targetPid)
124
+ }
125
+ }
126
+
127
+ /// Key press and hold for a duration (milliseconds).
128
+ /// Used for accent character picker (hold 'e' to get é, è, ê, etc.).
129
+ func keyPressAndHold(key: String, durationMs: Int, targetPid: pid_t? = nil) {
130
+ guard let code = keyCodeForString(key.lowercased()) else { return }
131
+ let source = CoreGraphicsBridge.typingSource
132
+
133
+ if let downEvent = CGEvent(keyboardEventSource: source, virtualKey: code, keyDown: true) {
134
+ postEvent(downEvent, targetPid: targetPid)
135
+ }
136
+ usleep(UInt32(durationMs) * 1000)
137
+ if let upEvent = CGEvent(keyboardEventSource: source, virtualKey: code, keyDown: false) {
138
+ postEvent(upEvent, targetPid: targetPid)
60
139
  }
61
140
  }
62
141
 
63
142
  /// Fast flick gesture — 3 steps, 5ms gaps. Triggers iOS swipe gestures.
64
- func mouseFlick(fromX: Double, fromY: Double, toX: Double, toY: Double) {
143
+ func mouseFlick(fromX: Double, fromY: Double, toX: Double, toY: Double, targetPid: pid_t? = nil) {
65
144
  let from = CGPoint(x: fromX, y: fromY)
66
145
  let to = CGPoint(x: toX, y: toY)
67
146
 
68
147
  if let downEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: from, mouseButton: .left) {
69
- downEvent.post(tap: .cghidEventTap)
148
+ postEvent(downEvent, targetPid: targetPid)
70
149
  }
71
150
  usleep(10_000) // 10ms
72
151
 
@@ -75,29 +154,29 @@ class CoreGraphicsBridge {
75
154
  let t = Double(i) / 3.0
76
155
  let point = CGPoint(x: fromX + (toX - fromX) * t, y: fromY + (toY - fromY) * t)
77
156
  if let dragEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDragged, mouseCursorPosition: point, mouseButton: .left) {
78
- dragEvent.post(tap: .cghidEventTap)
157
+ postEvent(dragEvent, targetPid: targetPid)
79
158
  }
80
159
  usleep(5_000) // 5ms
81
160
  }
82
161
 
83
162
  if let upEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: to, mouseButton: .left) {
84
- upEvent.post(tap: .cghidEventTap)
163
+ postEvent(upEvent, targetPid: targetPid)
85
164
  }
86
165
  }
87
166
 
88
- func scroll(x: Double, y: Double, deltaX: Int, deltaY: Int) {
167
+ func scroll(x: Double, y: Double, deltaX: Int, deltaY: Int, targetPid: pid_t? = nil) {
89
168
  // Move mouse to position first
90
- mouseMove(x: x, y: y)
169
+ mouseMove(x: x, y: y, targetPid: targetPid)
91
170
  usleep(50_000)
92
171
 
93
172
  if let scrollEvent = CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 2, wheel1: Int32(deltaY), wheel2: Int32(deltaX), wheel3: 0) {
94
- scrollEvent.post(tap: .cghidEventTap)
173
+ postEvent(scrollEvent, targetPid: targetPid)
95
174
  }
96
175
  }
97
176
 
98
177
  // MARK: - Keyboard Events
99
178
 
100
- func keyCombo(keys: [String]) {
179
+ func keyCombo(keys: [String], targetPid: pid_t? = nil) {
101
180
  var modifiers: CGEventFlags = []
102
181
  var keyCode: CGKeyCode?
103
182
 
@@ -123,63 +202,279 @@ class CoreGraphicsBridge {
123
202
 
124
203
  if let downEvent = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: true) {
125
204
  downEvent.flags = modifiers
126
- downEvent.post(tap: .cghidEventTap)
205
+ postEvent(downEvent, targetPid: targetPid)
127
206
  }
128
207
  usleep(50_000)
129
208
  if let upEvent = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: false) {
130
209
  upEvent.flags = modifiers
131
- upEvent.post(tap: .cghidEventTap)
210
+ postEvent(upEvent, targetPid: targetPid)
132
211
  }
133
212
  }
134
213
 
135
- func typeText(text: String) {
214
+ /// Shared event source for typing — associates events with the current login session
215
+ /// so Cocoa text views (NSTextView, etc.) accept them via the input method pipeline.
216
+ private static let typingSource: CGEventSource? = CGEventSource(stateID: .combinedSessionState)
217
+
218
+ func typeText(text: String, targetPid: pid_t? = nil) {
219
+ let source = CoreGraphicsBridge.typingSource
136
220
  for char in text {
221
+ // Handle control characters as real key presses
222
+ if char == "\n" || char == "\r" {
223
+ if let down = CGEvent(keyboardEventSource: source, virtualKey: 36, keyDown: true) { // Return
224
+ postEvent(down, targetPid: targetPid)
225
+ }
226
+ usleep(30_000)
227
+ if let up = CGEvent(keyboardEventSource: source, virtualKey: 36, keyDown: false) {
228
+ postEvent(up, targetPid: targetPid)
229
+ }
230
+ usleep(15_000)
231
+ continue
232
+ }
233
+ if char == "\t" {
234
+ if let down = CGEvent(keyboardEventSource: source, virtualKey: 48, keyDown: true) { // Tab
235
+ postEvent(down, targetPid: targetPid)
236
+ }
237
+ usleep(30_000)
238
+ if let up = CGEvent(keyboardEventSource: source, virtualKey: 48, keyDown: false) {
239
+ postEvent(up, targetPid: targetPid)
240
+ }
241
+ usleep(15_000)
242
+ continue
243
+ }
244
+
137
245
  let str = String(char)
138
- if let event = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: true) {
139
- let chars = Array(str.utf16)
140
- event.keyboardSetUnicodeString(stringLength: chars.count, unicodeString: chars)
141
- event.post(tap: .cghidEventTap)
246
+ let chars = Array(str.utf16)
247
+ // Use virtualKey 9 (unused on most layouts) for non-ASCII to prevent the
248
+ // input method from resolving virtualKey 0 ('a') and overriding the unicode string.
249
+ let isAscii = char.isASCII
250
+ let vk: CGKeyCode = isAscii ? 0 : 9
251
+
252
+ if let downEvent = CGEvent(keyboardEventSource: source, virtualKey: vk, keyDown: true) {
253
+ downEvent.keyboardSetUnicodeString(stringLength: chars.count, unicodeString: chars)
254
+ postEvent(downEvent, targetPid: targetPid)
142
255
  }
143
- usleep(20_000) // 20ms between characters
144
- if let event = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: false) {
145
- event.post(tap: .cghidEventTap)
256
+ // Non-ASCII needs slightly more time for the input method pipeline to process
257
+ // but keep delays short to avoid bridge timeout on long strings (10s limit)
258
+ usleep(isAscii ? 20_000 : 35_000)
259
+ if let upEvent = CGEvent(keyboardEventSource: source, virtualKey: vk, keyDown: false) {
260
+ upEvent.keyboardSetUnicodeString(stringLength: chars.count, unicodeString: chars)
261
+ postEvent(upEvent, targetPid: targetPid)
146
262
  }
147
- usleep(10_000)
263
+ usleep(isAscii ? 10_000 : 20_000)
148
264
  }
149
265
  }
150
266
 
151
267
  // MARK: - Screenshots
152
268
 
269
+ /// Track consecutive CG API failures per window to prefer CLI fallback
270
+ private var cgWindowFailures = [Int: Int]()
271
+ private static let CG_FAILURE_THRESHOLD = 2
272
+
273
+ /// Run a capture operation on a background thread with a timeout.
274
+ /// Uses autoreleasepool to prevent CGImage memory accumulation.
275
+ /// CGWindowListCreateImage can block indefinitely when screen recording
276
+ /// permission hasn't been granted, so we need a timeout guard.
277
+ private func timedCapture<T>(timeoutSec: Double = 10, _ work: @escaping () throws -> T) throws -> T {
278
+ let semaphore = DispatchSemaphore(value: 0)
279
+ var result: T?
280
+ var captureError: Error?
281
+
282
+ DispatchQueue.global(qos: .userInitiated).async {
283
+ autoreleasepool {
284
+ do {
285
+ result = try work()
286
+ } catch {
287
+ captureError = error
288
+ }
289
+ }
290
+ semaphore.signal()
291
+ }
292
+
293
+ let waitResult = semaphore.wait(timeout: .now() + timeoutSec)
294
+ if waitResult == .timedOut {
295
+ throw BridgeError.permissionDenied("Screen capture timed out — screen recording permission likely not granted. Grant access in System Settings → Privacy & Security → Screen Recording, then restart.")
296
+ }
297
+ if let err = captureError { throw err }
298
+ return result!
299
+ }
300
+
153
301
  func captureScreen(region: [String: Double]?) throws -> [String: Any] {
154
- let rect: CGRect
155
- if let region = region {
156
- rect = CGRect(
157
- x: region["x"] ?? 0,
158
- y: region["y"] ?? 0,
159
- width: region["width"] ?? 0,
160
- height: region["height"] ?? 0
161
- )
162
- } else {
163
- rect = CGRect.infinite
302
+ // Try CGWindowListCreateImage first (fast, in-process)
303
+ // Fall back to `screencapture` CLI (always has permission as a system binary)
304
+ do {
305
+ return try timedCapture(timeoutSec: 5) {
306
+ let rect: CGRect
307
+ if let region = region {
308
+ rect = CGRect(
309
+ x: region["x"] ?? 0,
310
+ y: region["y"] ?? 0,
311
+ width: region["width"] ?? 0,
312
+ height: region["height"] ?? 0
313
+ )
314
+ } else {
315
+ rect = CGRect.infinite
316
+ }
317
+ guard let image = CGWindowListCreateImage(rect, .optionOnScreenOnly, kCGNullWindowID, .bestResolution) else {
318
+ throw BridgeError.general("CGWindowListCreateImage returned nil")
319
+ }
320
+ let path = try self.saveImage(image)
321
+ return ["path": path, "width": image.width, "height": image.height]
322
+ }
323
+ } catch {
324
+ // Fallback: use macOS screencapture CLI
325
+ return try screencaptureCliFullscreen(region: region)
164
326
  }
327
+ }
165
328
 
166
- guard let image = CGWindowListCreateImage(rect, .optionOnScreenOnly, kCGNullWindowID, .bestResolution) else {
167
- throw BridgeError.general("Failed to capture screen")
329
+ func captureWindow(windowId: Int, safeCLI: Bool = false) throws -> [String: Any] {
330
+ // safeCLI=true: always use CLI (for browser windows that crash CG API)
331
+ if safeCLI {
332
+ return try screencaptureCliWindow(windowId: windowId)
168
333
  }
169
334
 
170
- let path = try saveImage(image)
171
- return ["path": path, "width": image.width, "height": image.height]
335
+ // If CG API has been crashing for this window, go straight to CLI fallback
336
+ let failures = cgWindowFailures[windowId] ?? 0
337
+ if failures >= CoreGraphicsBridge.CG_FAILURE_THRESHOLD {
338
+ return try screencaptureCliWindow(windowId: windowId)
339
+ }
340
+
341
+ do {
342
+ let result: [String: Any] = try timedCapture(timeoutSec: 5) {
343
+ guard let image = CGWindowListCreateImage(
344
+ .null, .optionIncludingWindow, CGWindowID(windowId), [.bestResolution, .boundsIgnoreFraming]
345
+ ) else {
346
+ throw BridgeError.general("CGWindowListCreateImage returned nil for window \(windowId)")
347
+ }
348
+ let path = try self.saveImage(image)
349
+ return ["path": path, "width": image.width, "height": image.height]
350
+ }
351
+ // CG API succeeded — reset failure counter
352
+ cgWindowFailures[windowId] = 0
353
+ return result
354
+ } catch {
355
+ // Track CG failure so we prefer CLI next time
356
+ cgWindowFailures[windowId] = failures + 1
357
+ // Fallback: use screencapture -l (runs in subprocess, crash-safe)
358
+ return try screencaptureCliWindow(windowId: windowId)
359
+ }
360
+ }
361
+
362
+ /// Fallback screenshot using macOS `screencapture` CLI (always has permission).
363
+ /// Runs in a subprocess — crash-safe even for GPU-heavy windows.
364
+ private func screencaptureCliFullscreen(region: [String: Double]?) throws -> [String: Any] {
365
+ let tempDir = FileManager.default.temporaryDirectory
366
+ let fileName = "bridge_screenshot_\(UUID().uuidString).png"
367
+ let fileURL = tempDir.appendingPathComponent(fileName)
368
+
369
+ var args = ["-x", fileURL.path] // -x = no sound
370
+ if let r = region {
371
+ let x = Int(r["x"] ?? 0)
372
+ let y = Int(r["y"] ?? 0)
373
+ let w = Int(r["width"] ?? 0)
374
+ let h = Int(r["height"] ?? 0)
375
+ args = ["-x", "-R", "\(x),\(y),\(w),\(h)", fileURL.path]
376
+ }
377
+
378
+ let process = Process()
379
+ process.executableURL = URL(fileURLWithPath: "/usr/sbin/screencapture")
380
+ process.arguments = args
381
+ try process.run()
382
+ process.waitUntilExit()
383
+
384
+ guard process.terminationStatus == 0 else {
385
+ throw BridgeError.general("screencapture failed with exit code \(process.terminationStatus)")
386
+ }
387
+
388
+ return readImageDimensions(fileURL: fileURL)
389
+ }
390
+
391
+ /// Fallback window capture using `screencapture -l <windowId>`.
392
+ /// Runs in a subprocess — crash-safe even for GPU-heavy windows.
393
+ private func screencaptureCliWindow(windowId: Int) throws -> [String: Any] {
394
+ let tempDir = FileManager.default.temporaryDirectory
395
+ let fileName = "bridge_screenshot_\(UUID().uuidString).png"
396
+ let fileURL = tempDir.appendingPathComponent(fileName)
397
+
398
+ let process = Process()
399
+ process.executableURL = URL(fileURLWithPath: "/usr/sbin/screencapture")
400
+ process.arguments = ["-x", "-l", String(windowId), fileURL.path]
401
+ try process.run()
402
+ process.waitUntilExit()
403
+
404
+ guard process.terminationStatus == 0 else {
405
+ throw BridgeError.general("screencapture -l failed with exit code \(process.terminationStatus)")
406
+ }
407
+
408
+ return readImageDimensions(fileURL: fileURL)
172
409
  }
173
410
 
174
- func captureWindow(windowId: Int) throws -> [String: Any] {
175
- guard let image = CGWindowListCreateImage(
176
- .null, .optionIncludingWindow, CGWindowID(windowId), .bestResolution
177
- ) else {
178
- throw BridgeError.general("Failed to capture window \(windowId)")
411
+ /// Read image dimensions from a file.
412
+ private func readImageDimensions(fileURL: URL) -> [String: Any] {
413
+ guard let image = NSImage(contentsOf: fileURL),
414
+ let cgImage = image.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
415
+ return ["path": fileURL.path, "width": 0, "height": 0]
179
416
  }
417
+ return ["path": fileURL.path, "width": cgImage.width, "height": cgImage.height]
418
+ }
180
419
 
181
- let path = try saveImage(image)
182
- return ["path": path, "width": image.width, "height": image.height]
420
+ /// Capture a window and return the image as an in-memory base64 PNG string.
421
+ /// Avoids disk I/O useful for high-frequency perception (vision diffs).
422
+ /// Falls back to file-based capture if CG API fails.
423
+ func captureWindowBuffer(windowId: Int, safeCLI: Bool = false) throws -> [String: Any] {
424
+ // safeCLI=true: always use CLI (for browser windows that crash CG API)
425
+ if safeCLI {
426
+ return try captureWindowBufferViaFile(windowId: windowId)
427
+ }
428
+
429
+ // If CG API keeps failing, fall back to file-based capture + base64 encode
430
+ let failures = cgWindowFailures[windowId] ?? 0
431
+ if failures >= CoreGraphicsBridge.CG_FAILURE_THRESHOLD {
432
+ return try captureWindowBufferViaFile(windowId: windowId)
433
+ }
434
+
435
+ do {
436
+ let result: [String: Any] = try timedCapture(timeoutSec: 5) {
437
+ guard let image = CGWindowListCreateImage(
438
+ .null, .optionIncludingWindow, CGWindowID(windowId), [.bestResolution, .boundsIgnoreFraming]
439
+ ) else {
440
+ throw BridgeError.general("CGWindowListCreateImage returned nil for window \(windowId)")
441
+ }
442
+
443
+ // Encode CGImage → PNG Data in memory (no temp file)
444
+ let mutableData = NSMutableData()
445
+ guard let dest = CGImageDestinationCreateWithData(mutableData as CFMutableData, "public.png" as CFString, 1, nil) else {
446
+ throw BridgeError.general("Failed to create in-memory image destination")
447
+ }
448
+ CGImageDestinationAddImage(dest, image, nil)
449
+ guard CGImageDestinationFinalize(dest) else {
450
+ throw BridgeError.general("Failed to encode PNG to memory buffer")
451
+ }
452
+
453
+ let base64 = (mutableData as Data).base64EncodedString()
454
+ return ["base64": base64, "width": image.width, "height": image.height]
455
+ }
456
+ cgWindowFailures[windowId] = 0
457
+ return result
458
+ } catch {
459
+ cgWindowFailures[windowId] = (cgWindowFailures[windowId] ?? 0) + 1
460
+ return try captureWindowBufferViaFile(windowId: windowId)
461
+ }
462
+ }
463
+
464
+ /// Fallback for captureWindowBuffer: capture to file via CLI, then read+encode.
465
+ private func captureWindowBufferViaFile(windowId: Int) throws -> [String: Any] {
466
+ let fileResult = try captureWindow(windowId: windowId)
467
+ guard let path = fileResult["path"] as? String else {
468
+ throw BridgeError.general("captureWindow fallback returned no path")
469
+ }
470
+ let url = URL(fileURLWithPath: path)
471
+ let data = try Data(contentsOf: url)
472
+ let base64 = data.base64EncodedString()
473
+ let width = fileResult["width"] as? Int ?? 0
474
+ let height = fileResult["height"] as? Int ?? 0
475
+ // Clean up temp file
476
+ try? FileManager.default.removeItem(at: url)
477
+ return ["base64": base64, "width": width, "height": height]
183
478
  }
184
479
 
185
480
  private func saveImage(_ image: CGImage) throws -> String {
@@ -0,0 +1,136 @@
1
+ import Foundation
2
+ import ScreenCaptureKit
3
+ import CoreMedia
4
+ import AppKit
5
+
6
+ /// Continuous screen capture using SCStream.
7
+ /// Keeps the latest frame as a temp PNG file on disk.
8
+ /// Replaces one-shot CGWindowListCreateImage (~200ms) with pre-captured frames (~0ms read).
9
+ class StreamCapture: NSObject, SCStreamOutput {
10
+ private var stream: SCStream?
11
+ private var _running = false
12
+ private let queue = DispatchQueue(label: "streamcapture.state")
13
+
14
+ /// Path to the latest captured frame (PNG file)
15
+ private var _latestFramePath: String?
16
+ private var _latestWidth: Int = 0
17
+ private var _latestHeight: Int = 0
18
+ private var _latestFrameTime: Date?
19
+ private var _frameCount: UInt64 = 0
20
+ private var saveEveryN: Int = 1
21
+
22
+ /// Start continuous capture for a specific window.
23
+ func start(windowId: Int, fps: Int = 30) async throws {
24
+ var alreadyRunning = false
25
+ queue.sync { alreadyRunning = self._running }
26
+ if alreadyRunning { return }
27
+
28
+ let content = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: true)
29
+ guard let window = content.windows.first(where: { $0.windowID == CGWindowID(windowId) }) else {
30
+ throw BridgeError.general("Window \(windowId) not found for stream capture")
31
+ }
32
+
33
+ self.saveEveryN = max(1, fps / 30)
34
+
35
+ let filter = SCContentFilter(desktopIndependentWindow: window)
36
+ let config = SCStreamConfiguration()
37
+ config.width = window.frame.width > 0 ? Int(window.frame.width) * 2 : 2880
38
+ config.height = window.frame.height > 0 ? Int(window.frame.height) * 2 : 1800
39
+ config.showsCursor = false
40
+ config.capturesAudio = false
41
+ config.minimumFrameInterval = CMTime(value: 1, timescale: CMTimeScale(fps))
42
+ config.queueDepth = 3
43
+
44
+ let newStream = SCStream(filter: filter, configuration: config, delegate: nil)
45
+ try newStream.addStreamOutput(self, type: .screen, sampleHandlerQueue: .global(qos: .userInitiated))
46
+ try await newStream.startCapture()
47
+
48
+ queue.sync {
49
+ self.stream = newStream
50
+ self._running = true
51
+ self._frameCount = 0
52
+ }
53
+ }
54
+
55
+ /// Stop the stream and clean up.
56
+ func stop() async {
57
+ var s: SCStream?
58
+ var pathToClean: String?
59
+
60
+ queue.sync {
61
+ s = self.stream
62
+ self._running = false
63
+ self.stream = nil
64
+ pathToClean = self._latestFramePath
65
+ self._latestFramePath = nil
66
+ }
67
+
68
+ if let s = s {
69
+ try? await s.stopCapture()
70
+ }
71
+
72
+ if let path = pathToClean {
73
+ try? FileManager.default.removeItem(atPath: path)
74
+ }
75
+ }
76
+
77
+ var isRunning: Bool {
78
+ queue.sync { _running }
79
+ }
80
+
81
+ /// Get info about the latest frame.
82
+ func getLatestInfo() -> [String: Any]? {
83
+ queue.sync {
84
+ guard let path = _latestFramePath, let time = _latestFrameTime else { return nil }
85
+ return [
86
+ "path": path,
87
+ "width": _latestWidth,
88
+ "height": _latestHeight,
89
+ "ageMs": Int(Date().timeIntervalSince(time) * 1000),
90
+ "frameCount": _frameCount,
91
+ ]
92
+ }
93
+ }
94
+
95
+ // MARK: - SCStreamOutput
96
+
97
+ func stream(_ stream: SCStream, didOutputSampleBuffer sampleBuffer: CMSampleBuffer, of type: SCStreamOutputType) {
98
+ guard type == .screen else { return }
99
+
100
+ var shouldSave = false
101
+ queue.sync {
102
+ guard self._running else { shouldSave = false; return }
103
+ _frameCount += 1
104
+ shouldSave = _frameCount % UInt64(saveEveryN) == 0
105
+ }
106
+ guard shouldSave else { return }
107
+
108
+ guard let imageBuffer = sampleBuffer.imageBuffer else { return }
109
+ let ciImage = CIImage(cvImageBuffer: imageBuffer)
110
+ let context = CIContext()
111
+ let width = CVPixelBufferGetWidth(imageBuffer)
112
+ let height = CVPixelBufferGetHeight(imageBuffer)
113
+
114
+ guard let cgImage = context.createCGImage(ciImage, from: CGRect(x: 0, y: 0, width: width, height: height)) else { return }
115
+
116
+ let tempDir = FileManager.default.temporaryDirectory
117
+ let fileURL = tempDir.appendingPathComponent("stream_frame_latest.png")
118
+ let bitmapRep = NSBitmapImageRep(cgImage: cgImage)
119
+ guard let pngData = bitmapRep.representation(using: .png, properties: [:]) else { return }
120
+
121
+ do {
122
+ let tmpURL = tempDir.appendingPathComponent("stream_frame_tmp_\(ProcessInfo.processInfo.processIdentifier).png")
123
+ try pngData.write(to: tmpURL)
124
+ _ = try FileManager.default.replaceItemAt(fileURL, withItemAt: tmpURL)
125
+
126
+ queue.sync {
127
+ self._latestFramePath = fileURL.path
128
+ self._latestWidth = width
129
+ self._latestHeight = height
130
+ self._latestFrameTime = Date()
131
+ }
132
+ } catch {
133
+ // Skip frame on write failure
134
+ }
135
+ }
136
+ }