screenhand 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +165 -446
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +3615 -400
  4. package/dist/scripts/export-help-center.js +112 -0
  5. package/dist/scripts/marketing-loop.js +117 -0
  6. package/dist/scripts/observer-daemon.js +288 -0
  7. package/dist/scripts/orchestrator-daemon.js +399 -0
  8. package/dist/scripts/threads-campaign.js +208 -0
  9. package/dist/src/community/fetcher.js +109 -0
  10. package/dist/src/community/index.js +6 -0
  11. package/dist/src/community/publisher.js +191 -0
  12. package/dist/src/community/remote-api.js +121 -0
  13. package/dist/src/community/types.js +3 -0
  14. package/dist/src/community/validator.js +95 -0
  15. package/dist/src/context-tracker.js +489 -0
  16. package/dist/src/ingestion/coverage-auditor.js +233 -0
  17. package/dist/src/ingestion/doc-parser.js +164 -0
  18. package/dist/src/ingestion/index.js +8 -0
  19. package/dist/src/ingestion/menu-scanner.js +152 -0
  20. package/dist/src/ingestion/reference-merger.js +186 -0
  21. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  22. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  23. package/dist/src/ingestion/types.js +3 -0
  24. package/dist/src/jobs/manager.js +82 -14
  25. package/dist/src/jobs/runner.js +138 -15
  26. package/dist/src/learning/engine.js +356 -0
  27. package/dist/src/learning/index.js +9 -0
  28. package/dist/src/learning/locator-policy.js +120 -0
  29. package/dist/src/learning/pattern-policy.js +89 -0
  30. package/dist/src/learning/recovery-policy.js +116 -0
  31. package/dist/src/learning/sensor-policy.js +115 -0
  32. package/dist/src/learning/timing-model.js +204 -0
  33. package/dist/src/learning/topology-policy.js +90 -0
  34. package/dist/src/learning/types.js +9 -0
  35. package/dist/src/logging/timeline-logger.js +4 -1
  36. package/dist/src/memory/playbook-seeds.js +200 -0
  37. package/dist/src/memory/recall.js +60 -8
  38. package/dist/src/memory/service.js +30 -5
  39. package/dist/src/memory/store.js +34 -5
  40. package/dist/src/native/bridge-client.js +253 -31
  41. package/dist/src/observer/state.js +199 -0
  42. package/dist/src/observer/types.js +43 -0
  43. package/dist/src/orchestrator/state.js +68 -0
  44. package/dist/src/orchestrator/types.js +22 -0
  45. package/dist/src/perception/ax-source.js +162 -0
  46. package/dist/src/perception/cdp-source.js +162 -0
  47. package/dist/src/perception/coordinator.js +771 -0
  48. package/dist/src/perception/frame-differ.js +287 -0
  49. package/dist/src/perception/index.js +22 -0
  50. package/dist/src/perception/manager.js +199 -0
  51. package/dist/src/perception/types.js +47 -0
  52. package/dist/src/perception/vision-source.js +399 -0
  53. package/dist/src/planner/deterministic.js +298 -0
  54. package/dist/src/planner/executor.js +870 -0
  55. package/dist/src/planner/goal-store.js +92 -0
  56. package/dist/src/planner/index.js +21 -0
  57. package/dist/src/planner/planner.js +520 -0
  58. package/dist/src/planner/tool-registry.js +71 -0
  59. package/dist/src/planner/types.js +22 -0
  60. package/dist/src/platform/explorer.js +213 -0
  61. package/dist/src/platform/help-center-markdown.js +527 -0
  62. package/dist/src/platform/learner.js +257 -0
  63. package/dist/src/playbook/engine.js +296 -11
  64. package/dist/src/playbook/mcp-recorder.js +204 -0
  65. package/dist/src/playbook/recorder.js +3 -2
  66. package/dist/src/playbook/runner.js +1 -1
  67. package/dist/src/playbook/store.js +139 -10
  68. package/dist/src/recovery/detectors.js +156 -0
  69. package/dist/src/recovery/engine.js +327 -0
  70. package/dist/src/recovery/index.js +20 -0
  71. package/dist/src/recovery/strategies.js +274 -0
  72. package/dist/src/recovery/types.js +20 -0
  73. package/dist/src/runtime/accessibility-adapter.js +55 -18
  74. package/dist/src/runtime/applescript-adapter.js +8 -2
  75. package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
  76. package/dist/src/runtime/executor.js +23 -3
  77. package/dist/src/runtime/locator-cache.js +24 -2
  78. package/dist/src/runtime/service.js +59 -15
  79. package/dist/src/runtime/session-manager.js +4 -1
  80. package/dist/src/runtime/vision-adapter.js +2 -1
  81. package/dist/src/state/app-map-types.js +72 -0
  82. package/dist/src/state/app-map.js +1974 -0
  83. package/dist/src/state/entity-tracker.js +108 -0
  84. package/dist/src/state/fusion.js +96 -0
  85. package/dist/src/state/index.js +21 -0
  86. package/dist/src/state/ladder-generator.js +236 -0
  87. package/dist/src/state/persistence.js +156 -0
  88. package/dist/src/state/types.js +17 -0
  89. package/dist/src/state/world-model.js +1456 -0
  90. package/dist/src/util/atomic-write.js +19 -4
  91. package/dist/src/util/sanitize.js +146 -0
  92. package/dist-app-maps/com.figma.Desktop.json +959 -0
  93. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  94. package/dist-app-maps/notion.id.json +2831 -0
  95. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  96. package/dist-playbooks/codex-desktop.json +76 -0
  97. package/dist-playbooks/competitor-research-stack.json +122 -0
  98. package/dist-playbooks/davinci-color-grade.json +153 -0
  99. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  100. package/dist-playbooks/davinci-render.json +114 -0
  101. package/dist-playbooks/devto.json +52 -0
  102. package/dist-playbooks/discord.json +41 -0
  103. package/dist-playbooks/google-flow-create-project.json +59 -0
  104. package/dist-playbooks/google-flow-edit-image.json +90 -0
  105. package/dist-playbooks/google-flow-edit-video.json +90 -0
  106. package/dist-playbooks/google-flow-generate-image.json +68 -0
  107. package/dist-playbooks/google-flow-generate-video.json +191 -0
  108. package/dist-playbooks/google-flow-open-project.json +48 -0
  109. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  110. package/dist-playbooks/google-flow-search-assets.json +64 -0
  111. package/dist-playbooks/instagram.json +57 -0
  112. package/dist-playbooks/linkedin.json +52 -0
  113. package/dist-playbooks/n8n.json +43 -0
  114. package/dist-playbooks/reddit.json +52 -0
  115. package/dist-playbooks/threads.json +59 -0
  116. package/dist-playbooks/x-twitter.json +59 -0
  117. package/dist-playbooks/youtube.json +59 -0
  118. package/dist-references/canva.json +646 -0
  119. package/dist-references/codex-desktop.json +305 -0
  120. package/dist-references/davinci-resolve-keyboard.json +594 -0
  121. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  122. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  123. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  124. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  125. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  126. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  127. package/dist-references/devpost.json +186 -0
  128. package/dist-references/devto.json +317 -0
  129. package/dist-references/discord.json +549 -0
  130. package/dist-references/figma.json +1186 -0
  131. package/dist-references/finder.json +146 -0
  132. package/dist-references/google-ads-transparency.json +95 -0
  133. package/dist-references/google-flow.json +649 -0
  134. package/dist-references/instagram.json +341 -0
  135. package/dist-references/linkedin.json +324 -0
  136. package/dist-references/meta-ad-library.json +86 -0
  137. package/dist-references/n8n.json +387 -0
  138. package/dist-references/notes.json +27 -0
  139. package/dist-references/notion.json +163 -0
  140. package/dist-references/reddit.json +341 -0
  141. package/dist-references/threads.json +337 -0
  142. package/dist-references/x-twitter.json +403 -0
  143. package/dist-references/youtube.json +373 -0
  144. package/native/macos-bridge/Package.swift +22 -0
  145. package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
  146. package/native/macos-bridge/Sources/AppManagement.swift +339 -0
  147. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
  148. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  149. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  150. package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
  151. package/native/macos-bridge/Sources/main.swift +498 -0
  152. package/native/windows-bridge/AppManagement.cs +234 -0
  153. package/native/windows-bridge/InputBridge.cs +436 -0
  154. package/native/windows-bridge/Program.cs +270 -0
  155. package/native/windows-bridge/ScreenCapture.cs +453 -0
  156. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  157. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  158. package/package.json +12 -1
  159. package/scripts/postinstall.cjs +127 -0
  160. package/dist/.audit-log.jsonl +0 -55
  161. package/dist/.screenhand/memory/.lock +0 -1
  162. package/dist/.screenhand/memory/actions.jsonl +0 -85
  163. package/dist/.screenhand/memory/errors.jsonl +0 -5
  164. package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
  165. package/dist/.screenhand/memory/state.json +0 -35
  166. package/dist/.screenhand/memory/state.json.bak +0 -35
  167. package/dist/.screenhand/memory/strategies.jsonl +0 -12
  168. package/dist/agent/cli.js +0 -73
  169. package/dist/agent/loop.js +0 -258
  170. package/dist/config.js +0 -9
  171. package/dist/index.js +0 -56
  172. package/dist/logging/timeline-logger.js +0 -29
  173. package/dist/mcp/mcp-stdio-server.js +0 -448
  174. package/dist/mcp/server.js +0 -347
  175. package/dist/mcp-entry.js +0 -59
  176. package/dist/memory/recall.js +0 -160
  177. package/dist/memory/research.js +0 -98
  178. package/dist/memory/seeds.js +0 -89
  179. package/dist/memory/session.js +0 -161
  180. package/dist/memory/store.js +0 -391
  181. package/dist/memory/types.js +0 -4
  182. package/dist/monitor/codex-monitor.js +0 -377
  183. package/dist/monitor/task-queue.js +0 -84
  184. package/dist/monitor/types.js +0 -49
  185. package/dist/native/bridge-client.js +0 -174
  186. package/dist/native/macos-bridge-client.js +0 -5
  187. package/dist/npm-publish-helper.js +0 -117
  188. package/dist/npm-token-cdp.js +0 -113
  189. package/dist/npm-token-create.js +0 -135
  190. package/dist/npm-token-finish.js +0 -126
  191. package/dist/playbook/engine.js +0 -193
  192. package/dist/playbook/index.js +0 -4
  193. package/dist/playbook/recorder.js +0 -519
  194. package/dist/playbook/runner.js +0 -392
  195. package/dist/playbook/store.js +0 -166
  196. package/dist/playbook/types.js +0 -4
  197. package/dist/runtime/accessibility-adapter.js +0 -377
  198. package/dist/runtime/app-adapter.js +0 -48
  199. package/dist/runtime/applescript-adapter.js +0 -283
  200. package/dist/runtime/ax-role-map.js +0 -80
  201. package/dist/runtime/browser-adapter.js +0 -36
  202. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  203. package/dist/runtime/composite-adapter.js +0 -205
  204. package/dist/runtime/executor.js +0 -250
  205. package/dist/runtime/locator-cache.js +0 -12
  206. package/dist/runtime/planning-loop.js +0 -47
  207. package/dist/runtime/service.js +0 -372
  208. package/dist/runtime/session-manager.js +0 -28
  209. package/dist/runtime/state-observer.js +0 -105
  210. package/dist/runtime/vision-adapter.js +0 -208
  211. package/dist/test-mcp-protocol.js +0 -138
  212. package/dist/types.js +0 -1
@@ -0,0 +1,498 @@
1
+ import Foundation
2
+
3
+ /// JSON-RPC over stdio bridge for macOS native APIs.
4
+ /// Reads JSON requests from stdin (one per line), dispatches to the appropriate bridge,
5
+ /// and writes JSON responses to stdout (one per line).
6
+
7
+ // MARK: - Signal Handlers
8
+ // Catch fatal signals (SIGSEGV, SIGBUS, SIGABRT) that CGWindowListCreateImage
9
+ // can trigger on GPU-heavy windows. Write an error to stderr so the Node.js
10
+ // bridge client can detect the crash, then exit cleanly.
11
+ func installSignalHandlers() {
12
+ // Fatal signals — crash reporting
13
+ let fatalSignals: [Int32] = [SIGSEGV, SIGBUS, SIGABRT]
14
+ for sig in fatalSignals {
15
+ signal(sig) { signum in
16
+ let msg = "Bridge fatal signal \(signum) — restarting\n"
17
+ msg.withCString { ptr in
18
+ _ = Darwin.write(STDERR_FILENO, ptr, Int(strlen(ptr)))
19
+ }
20
+ _exit(128 + signum)
21
+ }
22
+ }
23
+
24
+ // Graceful shutdown signals — notify Node.js BridgeClient before exit
25
+ let gracefulSignals: [Int32] = [SIGTERM, SIGINT]
26
+ for sig in gracefulSignals {
27
+ signal(sig) { signum in
28
+ let reason = signum == SIGTERM ? "SIGTERM" : "SIGINT"
29
+ let notification = "{\"jsonrpc\":\"2.0\",\"method\":\"bridge.shutdown\",\"params\":{\"reason\":\"\(reason)\"}}\n"
30
+ notification.withCString { ptr in
31
+ _ = Darwin.write(STDOUT_FILENO, ptr, Int(strlen(ptr)))
32
+ }
33
+ // Flush stdout
34
+ fflush(stdout)
35
+ _exit(0)
36
+ }
37
+ }
38
+ }
39
+ installSignalHandlers()
40
+
41
+ struct JsonRpcRequest: Codable {
42
+ let id: Int
43
+ let method: String
44
+ let params: [String: AnyCodable]?
45
+ }
46
+
47
+ struct JsonRpcResponse: Codable {
48
+ let id: Int
49
+ let result: AnyCodable?
50
+ let error: JsonRpcError?
51
+ }
52
+
53
+ struct JsonRpcError: Codable {
54
+ let code: Int
55
+ let message: String
56
+ }
57
+
58
+ /// Type-erased Codable wrapper for JSON values.
59
+ struct AnyCodable: Codable {
60
+ let value: Any
61
+
62
+ init(_ value: Any) {
63
+ self.value = value
64
+ }
65
+
66
+ init(from decoder: Decoder) throws {
67
+ let container = try decoder.singleValueContainer()
68
+ if container.decodeNil() {
69
+ value = NSNull()
70
+ } else if let bool = try? container.decode(Bool.self) {
71
+ value = bool
72
+ } else if let int = try? container.decode(Int.self) {
73
+ value = int
74
+ } else if let double = try? container.decode(Double.self) {
75
+ value = double
76
+ } else if let string = try? container.decode(String.self) {
77
+ value = string
78
+ } else if let array = try? container.decode([AnyCodable].self) {
79
+ value = array.map { $0.value }
80
+ } else if let dict = try? container.decode([String: AnyCodable].self) {
81
+ value = dict.mapValues { $0.value }
82
+ } else {
83
+ throw DecodingError.dataCorruptedError(in: container, debugDescription: "Unsupported type")
84
+ }
85
+ }
86
+
87
+ func encode(to encoder: Encoder) throws {
88
+ var container = encoder.singleValueContainer()
89
+ switch value {
90
+ case is NSNull:
91
+ try container.encodeNil()
92
+ case let bool as Bool:
93
+ try container.encode(bool)
94
+ case let int as Int:
95
+ try container.encode(int)
96
+ case let double as Double:
97
+ try container.encode(double)
98
+ case let string as String:
99
+ try container.encode(string)
100
+ case let array as [Any]:
101
+ try container.encode(array.map { AnyCodable($0) })
102
+ case let dict as [String: Any]:
103
+ try container.encode(dict.mapValues { AnyCodable($0) })
104
+ default:
105
+ try container.encodeNil()
106
+ }
107
+ }
108
+ }
109
+
110
+ // MARK: - Helpers
111
+
112
+ func param<T>(_ params: [String: AnyCodable]?, _ key: String) -> T? {
113
+ guard let raw = params?[key]?.value else { return nil }
114
+ if let v = raw as? T { return v }
115
+ // Numeric coercion: JSON integers may arrive as Int when Double is expected
116
+ if T.self == Double.self {
117
+ if let i = raw as? Int { return Double(i) as? T }
118
+ }
119
+ if T.self == Int.self {
120
+ if let d = raw as? Double { return Int(d) as? T }
121
+ }
122
+ return nil
123
+ }
124
+
125
+ func requiredParam<T>(_ params: [String: AnyCodable]?, _ key: String) throws -> T {
126
+ guard let value: T = param(params, key) else {
127
+ throw BridgeError.missingParam(key)
128
+ }
129
+ return value
130
+ }
131
+
132
+ enum BridgeError: LocalizedError {
133
+ case missingParam(String)
134
+ case notFound(String)
135
+ case permissionDenied(String)
136
+ case general(String)
137
+
138
+ var errorDescription: String? {
139
+ switch self {
140
+ case .missingParam(let name): return "Missing required parameter: \(name)"
141
+ case .notFound(let what): return "Not found: \(what)"
142
+ case .permissionDenied(let msg): return "Permission denied: \(msg)"
143
+ case .general(let msg): return msg
144
+ }
145
+ }
146
+ }
147
+
148
+ // MARK: - Bridge Modules
149
+
150
+ let accessibilityBridge = AccessibilityBridge()
151
+ let observerBridge = ObserverBridge()
152
+ let coreGraphicsBridge = CoreGraphicsBridge()
153
+ let visionBridge = VisionBridge()
154
+ let appManagement = AppManagement(ax: accessibilityBridge)
155
+ let streamCapture = StreamCapture()
156
+
157
+ // MARK: - Method Dispatch
158
+
159
+ func dispatch(method: String, params: [String: AnyCodable]?) throws -> Any {
160
+ switch method {
161
+ // Lifecycle
162
+ case "ping":
163
+ return ["pong": true, "pid": ProcessInfo.processInfo.processIdentifier, "accessible": accessibilityBridge.isAccessibilityTrusted()] as [String: Any]
164
+
165
+ case "check_permissions":
166
+ return ["trusted": accessibilityBridge.isAccessibilityTrusted()] as [String: Bool]
167
+
168
+ // App Management
169
+ case "app.launch":
170
+ let bundleId: String = try requiredParam(params, "bundleId")
171
+ return try appManagement.launchApp(bundleId: bundleId)
172
+
173
+ case "app.focus":
174
+ let bundleId: String = try requiredParam(params, "bundleId")
175
+ try appManagement.focusApp(bundleId: bundleId)
176
+ return ["ok": true]
177
+
178
+ case "app.list":
179
+ return appManagement.listRunningApps()
180
+
181
+ case "app.windows":
182
+ return appManagement.listWindows()
183
+
184
+ case "app.frontmost":
185
+ return appManagement.frontmostApp()
186
+
187
+ // Window management (AX-enriched)
188
+ case "window.list":
189
+ return appManagement.listWindowsWithAX()
190
+
191
+ case "window.focus":
192
+ let windowId: Int = try requiredParam(params, "windowId")
193
+ try appManagement.focusWindow(windowId: windowId)
194
+ return ["ok": true]
195
+
196
+ // Accessibility
197
+ case "ax.findElement":
198
+ let pid: Int = try requiredParam(params, "pid")
199
+ let role: String? = param(params, "role")
200
+ let title: String? = param(params, "title")
201
+ let value: String? = param(params, "value")
202
+ let identifier: String? = param(params, "identifier")
203
+ let exact: Bool = param(params, "exact") ?? true
204
+ let maxDepth: Int = param(params, "maxDepth") ?? 30
205
+ return try accessibilityBridge.findElement(
206
+ pid: pid_t(pid), role: role, title: title, value: value,
207
+ identifier: identifier, exact: exact, maxDepth: maxDepth
208
+ )
209
+
210
+ case "ax.getElementTree":
211
+ let pid: Int = try requiredParam(params, "pid")
212
+ let maxDepth: Int = param(params, "maxDepth") ?? 5
213
+ let windowId: Int? = param(params, "windowId")
214
+ return try accessibilityBridge.getElementTree(pid: pid_t(pid), maxDepth: maxDepth, windowId: windowId)
215
+
216
+ case "ax.getMenuBar":
217
+ let pid: Int = try requiredParam(params, "pid")
218
+ let maxDepth: Int = param(params, "maxDepth") ?? 10
219
+ return try accessibilityBridge.getMenuBarTree(pid: pid_t(pid), maxDepth: maxDepth)
220
+
221
+ case "ax.performAction":
222
+ let pid: Int = try requiredParam(params, "pid")
223
+ let elementPath: [Int] = try requiredParam(params, "elementPath")
224
+ let action: String = param(params, "action") ?? "AXPress"
225
+ let expectedTitle: String? = param(params, "expectedTitle")
226
+ try accessibilityBridge.performAction(pid: pid_t(pid), elementPath: elementPath, action: action, expectedTitle: expectedTitle)
227
+ return ["ok": true]
228
+
229
+ case "ax.setElementValue":
230
+ let pid: Int = try requiredParam(params, "pid")
231
+ let elementPath: [Int] = try requiredParam(params, "elementPath")
232
+ let value: String = try requiredParam(params, "value")
233
+ try accessibilityBridge.setElementValue(pid: pid_t(pid), elementPath: elementPath, value: value)
234
+ return ["ok": true]
235
+
236
+ case "ax.getElementValue":
237
+ let pid: Int = try requiredParam(params, "pid")
238
+ let elementPath: [Int] = try requiredParam(params, "elementPath")
239
+ return try accessibilityBridge.getElementValue(pid: pid_t(pid), elementPath: elementPath)
240
+
241
+ case "ax.menuClick":
242
+ let pid: Int = try requiredParam(params, "pid")
243
+ let menuPath: [String] = try requiredParam(params, "menuPath")
244
+ try accessibilityBridge.menuClick(pid: pid_t(pid), menuPath: menuPath)
245
+ return ["ok": true]
246
+
247
+ // Observer
248
+ case "observer.start":
249
+ let pid: Int = try requiredParam(params, "pid")
250
+ let notifications: [String]? = param(params, "notifications")
251
+ try observerBridge.startObserving(pid: pid_t(pid), notifications: notifications)
252
+ return ["ok": true]
253
+
254
+ case "observer.stop":
255
+ let pid: Int = try requiredParam(params, "pid")
256
+ observerBridge.stopObserving(pid: pid_t(pid))
257
+ return ["ok": true]
258
+
259
+ // CoreGraphics
260
+ case "cg.mouseClick":
261
+ let x: Double = try requiredParam(params, "x")
262
+ let y: Double = try requiredParam(params, "y")
263
+ let button: String = param(params, "button") ?? "left"
264
+ let clickCount: Int = param(params, "clickCount") ?? 1
265
+ let modifiers: [String] = param(params, "modifiers") ?? []
266
+ let mcTargetPid: pid_t? = (param(params, "targetPid") as Int?).map { pid_t($0) }
267
+ coreGraphicsBridge.mouseClick(x: x, y: y, button: button, clickCount: clickCount, modifiers: modifiers, targetPid: mcTargetPid)
268
+ return ["ok": true]
269
+
270
+ case "cg.mouseMove":
271
+ let x: Double = try requiredParam(params, "x")
272
+ let y: Double = try requiredParam(params, "y")
273
+ let mmTargetPid: pid_t? = (param(params, "targetPid") as Int?).map { pid_t($0) }
274
+ coreGraphicsBridge.mouseMove(x: x, y: y, targetPid: mmTargetPid)
275
+ return ["ok": true]
276
+
277
+ case "cg.mouseDrag":
278
+ let fromX: Double = try requiredParam(params, "fromX")
279
+ let fromY: Double = try requiredParam(params, "fromY")
280
+ let toX: Double = try requiredParam(params, "toX")
281
+ let toY: Double = try requiredParam(params, "toY")
282
+ let dragModifiers: [String] = param(params, "modifiers") ?? []
283
+ let mdTargetPid: pid_t? = (param(params, "targetPid") as Int?).map { pid_t($0) }
284
+ coreGraphicsBridge.mouseDrag(fromX: fromX, fromY: fromY, toX: toX, toY: toY, modifiers: dragModifiers, targetPid: mdTargetPid)
285
+ return ["ok": true]
286
+
287
+ case "cg.mousePressAndHold":
288
+ let phX: Double = try requiredParam(params, "x")
289
+ let phY: Double = try requiredParam(params, "y")
290
+ let phDuration: Int = param(params, "durationMs") ?? 500
291
+ let phTargetPid: pid_t? = (param(params, "targetPid") as Int?).map { pid_t($0) }
292
+ coreGraphicsBridge.mousePressAndHold(x: phX, y: phY, durationMs: phDuration, targetPid: phTargetPid)
293
+ return ["ok": true]
294
+
295
+ case "cg.keyPressAndHold":
296
+ let kphKey: String = try requiredParam(params, "key")
297
+ let kphDuration: Int = param(params, "durationMs") ?? 500
298
+ let kphTargetPid: pid_t? = (param(params, "targetPid") as Int?).map { pid_t($0) }
299
+ coreGraphicsBridge.keyPressAndHold(key: kphKey, durationMs: kphDuration, targetPid: kphTargetPid)
300
+ return ["ok": true]
301
+
302
+ case "cg.mouseFlick":
303
+ let fxF: Double = try requiredParam(params, "fromX")
304
+ let fyF: Double = try requiredParam(params, "fromY")
305
+ let txF: Double = try requiredParam(params, "toX")
306
+ let tyF: Double = try requiredParam(params, "toY")
307
+ let mfTargetPid: pid_t? = (param(params, "targetPid") as Int?).map { pid_t($0) }
308
+ coreGraphicsBridge.mouseFlick(fromX: fxF, fromY: fyF, toX: txF, toY: tyF, targetPid: mfTargetPid)
309
+ return ["ok": true]
310
+
311
+ case "cg.keyCombo":
312
+ let keys: [String] = try requiredParam(params, "keys")
313
+ let kcTargetPid: pid_t? = (param(params, "targetPid") as Int?).map { pid_t($0) }
314
+ coreGraphicsBridge.keyCombo(keys: keys, targetPid: kcTargetPid)
315
+ return ["ok": true]
316
+
317
+ case "cg.typeText":
318
+ let text: String = try requiredParam(params, "text")
319
+ let ttTargetPid: pid_t? = (param(params, "targetPid") as Int?).map { pid_t($0) }
320
+ coreGraphicsBridge.typeText(text: text, targetPid: ttTargetPid)
321
+ return ["ok": true]
322
+
323
+ case "cg.captureScreen":
324
+ let region: [String: Double]? = param(params, "region")
325
+ return try coreGraphicsBridge.captureScreen(region: region)
326
+
327
+ case "cg.captureWindow":
328
+ let windowId: Int = try requiredParam(params, "windowId")
329
+ let safeCLI: Bool = param(params, "safeCLI") ?? false
330
+ return try coreGraphicsBridge.captureWindow(windowId: windowId, safeCLI: safeCLI)
331
+
332
+ case "cg.captureWindowBuffer":
333
+ let windowId: Int = try requiredParam(params, "windowId")
334
+ let safeCLI: Bool = param(params, "safeCLI") ?? false
335
+ return try coreGraphicsBridge.captureWindowBuffer(windowId: windowId, safeCLI: safeCLI)
336
+
337
+ case "cg.scroll":
338
+ let x: Double = try requiredParam(params, "x")
339
+ let y: Double = try requiredParam(params, "y")
340
+ let deltaX: Int = param(params, "deltaX") ?? 0
341
+ let deltaY: Int = param(params, "deltaY") ?? 0
342
+ let scTargetPid: pid_t? = (param(params, "targetPid") as Int?).map { pid_t($0) }
343
+ coreGraphicsBridge.scroll(x: x, y: y, deltaX: deltaX, deltaY: deltaY, targetPid: scTargetPid)
344
+ return ["ok": true]
345
+
346
+ // Vision
347
+ case "vision.findText":
348
+ let imagePath: String = try requiredParam(params, "imagePath")
349
+ let searchText: String? = param(params, "searchText")
350
+ let ftMode: String = param(params, "mode") ?? "accurate"
351
+ return try visionBridge.findText(imagePath: imagePath, searchText: searchText, mode: ftMode)
352
+
353
+ case "vision.ocr":
354
+ let imagePath: String = try requiredParam(params, "imagePath")
355
+ let ocrMode: String = param(params, "mode") ?? "accurate"
356
+ return try visionBridge.ocr(imagePath: imagePath, mode: ocrMode)
357
+
358
+ case "vision.ocrRegion":
359
+ let windowId: Int = try requiredParam(params, "windowId")
360
+ let region: [String: Double] = try requiredParam(params, "region")
361
+ let ocrRegionMode: String = param(params, "mode") ?? "accurate"
362
+ return try visionBridge.ocrRegion(windowId: windowId, region: region, mode: ocrRegionMode)
363
+
364
+ case "vision.detectElements":
365
+ let imagePath: String = try requiredParam(params, "imagePath")
366
+ let confidence: Double = param(params, "confidence") ?? 0.25
367
+ let elements = try visionBridge.detectElements(imagePath: imagePath, confidence: confidence)
368
+ return ["elements": elements, "count": elements.count]
369
+
370
+ // Stream capture — continuous SCStream for fast perception
371
+ case "vision.startStream":
372
+ let windowId: Int = try requiredParam(params, "windowId")
373
+ let fps: Int = param(params, "fps") ?? 30
374
+ let sem = DispatchSemaphore(value: 0)
375
+ var streamError: Error?
376
+ Task {
377
+ do {
378
+ try await streamCapture.start(windowId: windowId, fps: fps)
379
+ } catch {
380
+ streamError = error
381
+ }
382
+ sem.signal()
383
+ }
384
+ sem.wait()
385
+ if let err = streamError { throw err }
386
+ return ["ok": true, "fps": fps]
387
+
388
+ case "vision.stopStream":
389
+ let sem = DispatchSemaphore(value: 0)
390
+ Task {
391
+ await streamCapture.stop()
392
+ sem.signal()
393
+ }
394
+ sem.wait()
395
+ return ["ok": true]
396
+
397
+ case "vision.streamStatus":
398
+ let running = streamCapture.isRunning
399
+ if running, let info = streamCapture.getLatestInfo() {
400
+ return ["running": true, "path": info["path"]!, "width": info["width"]!, "height": info["height"]!, "ageMs": info["ageMs"]!, "frameCount": info["frameCount"]!]
401
+ }
402
+ return ["running": running]
403
+
404
+ case "vision.latestFrame":
405
+ guard streamCapture.isRunning else {
406
+ throw BridgeError.general("Stream not running")
407
+ }
408
+ guard let info = streamCapture.getLatestInfo() else {
409
+ throw BridgeError.general("No frame captured yet")
410
+ }
411
+ return info
412
+
413
+ default:
414
+ throw BridgeError.general("Unknown method: \(method)")
415
+ }
416
+ }
417
+
418
+ // MARK: - Main Loop
419
+
420
+ let encoder = JSONEncoder()
421
+ encoder.outputFormatting = []
422
+
423
+ let decoder = JSONDecoder()
424
+
425
+ /// Write a JSON line to stdout (thread-safe).
426
+ let outputLock = NSLock()
427
+ func writeLine(_ data: Data) {
428
+ outputLock.lock()
429
+ defer { outputLock.unlock() }
430
+ FileHandle.standardOutput.write(data)
431
+ FileHandle.standardOutput.write("\n".data(using: .utf8)!)
432
+ }
433
+
434
+ func writeResponse(_ response: JsonRpcResponse) {
435
+ if let data = try? encoder.encode(response) {
436
+ writeLine(data)
437
+ }
438
+ }
439
+
440
+ func writeEvent(_ event: [String: Any]) {
441
+ let wrapped: [String: Any] = ["id": 0, "event": event]
442
+ if let data = try? JSONSerialization.data(withJSONObject: wrapped) {
443
+ writeLine(data)
444
+ }
445
+ }
446
+
447
+ // Set up observer event forwarding
448
+ observerBridge.onEvent = { event in
449
+ writeEvent(event)
450
+ }
451
+
452
+ // Process stdin line by line
453
+ while let line = readLine() {
454
+ guard !line.isEmpty else { continue }
455
+ guard let data = line.data(using: .utf8) else { continue }
456
+
457
+ do {
458
+ let request = try decoder.decode(JsonRpcRequest.self, from: data)
459
+ do {
460
+ let result = try dispatch(method: request.method, params: request.params)
461
+ let response = JsonRpcResponse(
462
+ id: request.id,
463
+ result: AnyCodable(result),
464
+ error: nil
465
+ )
466
+ writeResponse(response)
467
+ } catch {
468
+ let response = JsonRpcResponse(
469
+ id: request.id,
470
+ result: nil,
471
+ error: JsonRpcError(code: -1, message: error.localizedDescription)
472
+ )
473
+ writeResponse(response)
474
+ }
475
+ } catch {
476
+ // Malformed JSON — try to extract id from raw string
477
+ var extractedId = 0
478
+ if let idRange = line.range(of: "\"id\"\\s*:\\s*(\\d+)", options: .regularExpression) {
479
+ let match = line[idRange]
480
+ if let digitRange = match.range(of: "\\d+$", options: .regularExpression) {
481
+ extractedId = Int(match[digitRange]) ?? 0
482
+ }
483
+ }
484
+ if extractedId == 0 {
485
+ // No id could be extracted — log to stderr so Node.js BridgeClient can detect it
486
+ let stderrMsg = "Bridge parse error (no id): \(error.localizedDescription)\n"
487
+ stderrMsg.withCString { ptr in
488
+ _ = Darwin.write(STDERR_FILENO, ptr, Int(strlen(ptr)))
489
+ }
490
+ }
491
+ let response = JsonRpcResponse(
492
+ id: extractedId,
493
+ result: nil,
494
+ error: JsonRpcError(code: -32700, message: "Parse error: \(error.localizedDescription)")
495
+ )
496
+ writeResponse(response)
497
+ }
498
+ }