screenhand 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/.claude/commands/automate.md +28 -0
  2. package/.claude/commands/debug-ui.md +19 -0
  3. package/.claude/commands/screenshot.md +15 -0
  4. package/.github/FUNDING.yml +1 -0
  5. package/.github/ISSUE_TEMPLATE/bug_report.md +27 -0
  6. package/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  7. package/.mcp.json +8 -0
  8. package/DESKTOP_MCP_GUIDE.md +92 -0
  9. package/LICENSE +661 -21
  10. package/README.md +97 -292
  11. package/SECURITY.md +44 -0
  12. package/docs/architecture.md +47 -0
  13. package/install-skills.sh +19 -0
  14. package/mcp-bridge.ts +271 -0
  15. package/mcp-desktop.ts +1221 -0
  16. package/native/macos-bridge/Package.swift +21 -0
  17. package/native/macos-bridge/Sources/AccessibilityBridge.swift +261 -0
  18. package/native/macos-bridge/Sources/AppManagement.swift +129 -0
  19. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +242 -0
  20. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  21. package/native/macos-bridge/Sources/VisionBridge.swift +80 -0
  22. package/native/macos-bridge/Sources/main.swift +345 -0
  23. package/native/windows-bridge/AppManagement.cs +234 -0
  24. package/native/windows-bridge/InputBridge.cs +436 -0
  25. package/native/windows-bridge/Program.cs +265 -0
  26. package/native/windows-bridge/ScreenCapture.cs +329 -0
  27. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  28. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  29. package/package.json +3 -14
  30. package/playbooks/devpost.json +186 -0
  31. package/playbooks/instagram.json +41 -0
  32. package/playbooks/instagram_v2.json +201 -0
  33. package/playbooks/x_v1.json +211 -0
  34. package/scripts/devpost-live-loop.mjs +421 -0
  35. package/src/config.ts +30 -0
  36. package/src/index.ts +92 -0
  37. package/src/logging/timeline-logger.ts +55 -0
  38. package/src/mcp/server.ts +449 -0
  39. package/src/memory/recall.ts +191 -0
  40. package/src/memory/research.ts +146 -0
  41. package/src/memory/seeds.ts +123 -0
  42. package/src/memory/session.ts +201 -0
  43. package/src/memory/store.ts +434 -0
  44. package/src/memory/types.ts +69 -0
  45. package/src/native/bridge-client.ts +239 -0
  46. package/src/native/macos-bridge-client.ts +22 -0
  47. package/src/runtime/accessibility-adapter.ts +487 -0
  48. package/src/runtime/app-adapter.ts +169 -0
  49. package/src/runtime/applescript-adapter.ts +376 -0
  50. package/src/runtime/ax-role-map.ts +102 -0
  51. package/src/runtime/browser-adapter.ts +129 -0
  52. package/src/runtime/cdp-chrome-adapter.ts +676 -0
  53. package/src/runtime/composite-adapter.ts +274 -0
  54. package/src/runtime/executor.ts +396 -0
  55. package/src/runtime/locator-cache.ts +33 -0
  56. package/src/runtime/planning-loop.ts +81 -0
  57. package/src/runtime/service.ts +448 -0
  58. package/src/runtime/session-manager.ts +50 -0
  59. package/src/runtime/state-observer.ts +136 -0
  60. package/src/runtime/vision-adapter.ts +297 -0
  61. package/src/types.ts +297 -0
  62. package/tests/bridge-client.test.ts +176 -0
  63. package/tests/browser-stealth.test.ts +210 -0
  64. package/tests/composite-adapter.test.ts +64 -0
  65. package/tests/mcp-server.test.ts +151 -0
  66. package/tests/memory-recall.test.ts +339 -0
  67. package/tests/memory-research.test.ts +159 -0
  68. package/tests/memory-seeds.test.ts +120 -0
  69. package/tests/memory-store.test.ts +392 -0
  70. package/tests/types.test.ts +92 -0
  71. package/tsconfig.check.json +17 -0
  72. package/tsconfig.json +19 -0
  73. package/vitest.config.ts +8 -0
  74. package/dist/config.js +0 -9
  75. package/dist/index.js +0 -55
  76. package/dist/logging/timeline-logger.js +0 -29
  77. package/dist/mcp/mcp-stdio-server.js +0 -284
  78. package/dist/mcp/server.js +0 -347
  79. package/dist/mcp-entry.js +0 -62
  80. package/dist/memory/recall.js +0 -160
  81. package/dist/memory/research.js +0 -98
  82. package/dist/memory/seeds.js +0 -89
  83. package/dist/memory/session.js +0 -161
  84. package/dist/memory/store.js +0 -391
  85. package/dist/memory/types.js +0 -4
  86. package/dist/native/bridge-client.js +0 -173
  87. package/dist/native/macos-bridge-client.js +0 -5
  88. package/dist/runtime/accessibility-adapter.js +0 -377
  89. package/dist/runtime/app-adapter.js +0 -48
  90. package/dist/runtime/applescript-adapter.js +0 -283
  91. package/dist/runtime/ax-role-map.js +0 -80
  92. package/dist/runtime/browser-adapter.js +0 -36
  93. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  94. package/dist/runtime/composite-adapter.js +0 -205
  95. package/dist/runtime/executor.js +0 -250
  96. package/dist/runtime/locator-cache.js +0 -12
  97. package/dist/runtime/planning-loop.js +0 -47
  98. package/dist/runtime/service.js +0 -372
  99. package/dist/runtime/session-manager.js +0 -28
  100. package/dist/runtime/state-observer.js +0 -105
  101. package/dist/runtime/vision-adapter.js +0 -208
  102. package/dist/test-mcp-protocol.js +0 -138
  103. package/dist/types.js +0 -1
@@ -0,0 +1,120 @@
1
+ import ApplicationServices
2
+ import Foundation
3
+
4
+ class ObserverBridge {
5
+ private var observers: [pid_t: AXObserver] = [:]
6
+ var onEvent: (([String: Any]) -> Void)?
7
+
8
+ private let defaultNotifications: [String] = [
9
+ kAXValueChangedNotification,
10
+ kAXFocusedUIElementChangedNotification,
11
+ kAXWindowCreatedNotification,
12
+ kAXUIElementDestroyedNotification,
13
+ kAXTitleChangedNotification,
14
+ kAXMenuOpenedNotification,
15
+ kAXSelectedTextChangedNotification,
16
+ kAXLayoutChangedNotification,
17
+ ]
18
+
19
+ func startObserving(pid: pid_t, notifications: [String]?) throws {
20
+ // Stop existing observer for this PID if any
21
+ stopObserving(pid: pid)
22
+
23
+ var observer: AXObserver?
24
+ let result = AXObserverCreate(pid, observerCallback, &observer)
25
+ guard result == .success, let obs = observer else {
26
+ throw BridgeError.general("Failed to create AX observer for PID \(pid), code \(result.rawValue)")
27
+ }
28
+
29
+ let appElement = AXUIElementCreateApplication(pid)
30
+ let notifs = notifications ?? defaultNotifications
31
+
32
+ for notif in notifs {
33
+ // Pass self pointer as refcon for callback
34
+ let refcon = Unmanaged.passUnretained(self).toOpaque()
35
+ AXObserverAddNotification(obs, appElement, notif as CFString, refcon)
36
+ }
37
+
38
+ CFRunLoopAddSource(
39
+ CFRunLoopGetMain(),
40
+ AXObserverGetRunLoopSource(obs),
41
+ .defaultMode
42
+ )
43
+
44
+ observers[pid] = obs
45
+ }
46
+
47
+ func stopObserving(pid: pid_t) {
48
+ guard let observer = observers[pid] else { return }
49
+ CFRunLoopRemoveSource(
50
+ CFRunLoopGetMain(),
51
+ AXObserverGetRunLoopSource(observer),
52
+ .defaultMode
53
+ )
54
+ observers.removeValue(forKey: pid)
55
+ }
56
+
57
+ func handleNotification(observer: AXObserver, element: AXUIElement, notification: String) {
58
+ var event: [String: Any] = [
59
+ "type": mapNotificationType(notification),
60
+ "notification": notification,
61
+ "timestamp": ISO8601DateFormatter().string(from: Date()),
62
+ ]
63
+
64
+ // Get PID
65
+ var pid: pid_t = 0
66
+ AXUIElementGetPid(element, &pid)
67
+ event["pid"] = Int(pid)
68
+
69
+ // Get element role
70
+ var roleValue: AnyObject?
71
+ if AXUIElementCopyAttributeValue(element, kAXRoleAttribute as CFString, &roleValue) == .success {
72
+ event["elementRole"] = roleValue as? String
73
+ }
74
+
75
+ // Get element title
76
+ var titleValue: AnyObject?
77
+ if AXUIElementCopyAttributeValue(element, kAXTitleAttribute as CFString, &titleValue) == .success {
78
+ event["elementLabel"] = titleValue as? String
79
+ }
80
+
81
+ // Get element value for value_changed
82
+ if notification == kAXValueChangedNotification {
83
+ var valObj: AnyObject?
84
+ if AXUIElementCopyAttributeValue(element, kAXValueAttribute as CFString, &valObj) == .success {
85
+ event["newValue"] = "\(valObj!)"
86
+ }
87
+ }
88
+
89
+ onEvent?(event)
90
+ }
91
+
92
+ private func mapNotificationType(_ notification: String) -> String {
93
+ switch notification {
94
+ case kAXValueChangedNotification: return "value_changed"
95
+ case kAXFocusedUIElementChangedNotification: return "focus_changed"
96
+ case kAXWindowCreatedNotification: return "window_created"
97
+ case kAXUIElementDestroyedNotification: return "window_closed"
98
+ case kAXTitleChangedNotification: return "title_changed"
99
+ case kAXMenuOpenedNotification: return "menu_opened"
100
+ case kAXLayoutChangedNotification: return "layout_changed"
101
+ default: return notification
102
+ }
103
+ }
104
+ }
105
+
106
+ /// C callback for AXObserver notifications.
107
+ private func observerCallback(
108
+ observer: AXObserver,
109
+ element: AXUIElement,
110
+ notification: CFString,
111
+ refcon: UnsafeMutableRawPointer?
112
+ ) {
113
+ guard let refcon = refcon else { return }
114
+ let bridge = Unmanaged<ObserverBridge>.fromOpaque(refcon).takeUnretainedValue()
115
+ bridge.handleNotification(
116
+ observer: observer,
117
+ element: element,
118
+ notification: notification as String
119
+ )
120
+ }
@@ -0,0 +1,80 @@
1
+ import Foundation
2
+ import Vision
3
+ import AppKit
4
+
5
+ class VisionBridge {
6
+
7
+ /// Perform OCR on an image, optionally searching for specific text.
8
+ /// Returns all recognized text with bounding boxes.
9
+ func findText(imagePath: String, searchText: String?) throws -> [[String: Any]] {
10
+ let results = try performOCR(imagePath: imagePath)
11
+
12
+ guard let search = searchText?.lowercased() else {
13
+ return results
14
+ }
15
+
16
+ return results.filter { result in
17
+ guard let text = result["text"] as? String else { return false }
18
+ return text.lowercased().contains(search)
19
+ }
20
+ }
21
+
22
+ /// Full OCR of an image — returns all recognized text.
23
+ func ocr(imagePath: String) throws -> [String: Any] {
24
+ let results = try performOCR(imagePath: imagePath)
25
+ let fullText = results.compactMap { $0["text"] as? String }.joined(separator: "\n")
26
+ return [
27
+ "text": fullText,
28
+ "regions": results,
29
+ ]
30
+ }
31
+
32
+ private func performOCR(imagePath: String) throws -> [[String: Any]] {
33
+ let url = URL(fileURLWithPath: imagePath)
34
+
35
+ guard let image = NSImage(contentsOf: url),
36
+ let cgImage = image.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
37
+ throw BridgeError.general("Failed to load image at \(imagePath)")
38
+ }
39
+
40
+ let imageWidth = CGFloat(cgImage.width)
41
+ let imageHeight = CGFloat(cgImage.height)
42
+
43
+ let request = VNRecognizeTextRequest()
44
+ request.recognitionLevel = .accurate
45
+ request.usesLanguageCorrection = true
46
+
47
+ let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
48
+ try handler.perform([request])
49
+
50
+ guard let observations = request.results else {
51
+ return []
52
+ }
53
+
54
+ var results: [[String: Any]] = []
55
+
56
+ for observation in observations {
57
+ guard let candidate = observation.topCandidates(1).first else { continue }
58
+
59
+ let boundingBox = observation.boundingBox
60
+ // Convert from Vision's normalized coordinates (origin bottom-left) to screen coordinates
61
+ let x = boundingBox.origin.x * imageWidth
62
+ let y = (1 - boundingBox.origin.y - boundingBox.height) * imageHeight
63
+ let width = boundingBox.width * imageWidth
64
+ let height = boundingBox.height * imageHeight
65
+
66
+ results.append([
67
+ "text": candidate.string,
68
+ "confidence": Double(candidate.confidence),
69
+ "bounds": [
70
+ "x": Double(x),
71
+ "y": Double(y),
72
+ "width": Double(width),
73
+ "height": Double(height),
74
+ ] as [String: Any],
75
+ ] as [String: Any])
76
+ }
77
+
78
+ return results
79
+ }
80
+ }
@@ -0,0 +1,345 @@
1
+ import Foundation
2
+
3
+ /// JSON-RPC over stdio bridge for macOS native APIs.
4
+ /// Reads JSON requests from stdin (one per line), dispatches to the appropriate bridge,
5
+ /// and writes JSON responses to stdout (one per line).
6
+
7
+ struct JsonRpcRequest: Codable {
8
+ let id: Int
9
+ let method: String
10
+ let params: [String: AnyCodable]?
11
+ }
12
+
13
+ struct JsonRpcResponse: Codable {
14
+ let id: Int
15
+ let result: AnyCodable?
16
+ let error: JsonRpcError?
17
+ }
18
+
19
+ struct JsonRpcError: Codable {
20
+ let code: Int
21
+ let message: String
22
+ }
23
+
24
+ /// Type-erased Codable wrapper for JSON values.
25
+ struct AnyCodable: Codable {
26
+ let value: Any
27
+
28
+ init(_ value: Any) {
29
+ self.value = value
30
+ }
31
+
32
+ init(from decoder: Decoder) throws {
33
+ let container = try decoder.singleValueContainer()
34
+ if container.decodeNil() {
35
+ value = NSNull()
36
+ } else if let bool = try? container.decode(Bool.self) {
37
+ value = bool
38
+ } else if let int = try? container.decode(Int.self) {
39
+ value = int
40
+ } else if let double = try? container.decode(Double.self) {
41
+ value = double
42
+ } else if let string = try? container.decode(String.self) {
43
+ value = string
44
+ } else if let array = try? container.decode([AnyCodable].self) {
45
+ value = array.map { $0.value }
46
+ } else if let dict = try? container.decode([String: AnyCodable].self) {
47
+ value = dict.mapValues { $0.value }
48
+ } else {
49
+ throw DecodingError.dataCorruptedError(in: container, debugDescription: "Unsupported type")
50
+ }
51
+ }
52
+
53
+ func encode(to encoder: Encoder) throws {
54
+ var container = encoder.singleValueContainer()
55
+ switch value {
56
+ case is NSNull:
57
+ try container.encodeNil()
58
+ case let bool as Bool:
59
+ try container.encode(bool)
60
+ case let int as Int:
61
+ try container.encode(int)
62
+ case let double as Double:
63
+ try container.encode(double)
64
+ case let string as String:
65
+ try container.encode(string)
66
+ case let array as [Any]:
67
+ try container.encode(array.map { AnyCodable($0) })
68
+ case let dict as [String: Any]:
69
+ try container.encode(dict.mapValues { AnyCodable($0) })
70
+ default:
71
+ try container.encodeNil()
72
+ }
73
+ }
74
+ }
75
+
76
+ // MARK: - Helpers
77
+
78
+ func param<T>(_ params: [String: AnyCodable]?, _ key: String) -> T? {
79
+ guard let raw = params?[key]?.value else { return nil }
80
+ if let v = raw as? T { return v }
81
+ // Numeric coercion: JSON integers may arrive as Int when Double is expected
82
+ if T.self == Double.self {
83
+ if let i = raw as? Int { return Double(i) as? T }
84
+ }
85
+ if T.self == Int.self {
86
+ if let d = raw as? Double { return Int(d) as? T }
87
+ }
88
+ return nil
89
+ }
90
+
91
+ func requiredParam<T>(_ params: [String: AnyCodable]?, _ key: String) throws -> T {
92
+ guard let value: T = param(params, key) else {
93
+ throw BridgeError.missingParam(key)
94
+ }
95
+ return value
96
+ }
97
+
98
+ enum BridgeError: LocalizedError {
99
+ case missingParam(String)
100
+ case notFound(String)
101
+ case permissionDenied(String)
102
+ case general(String)
103
+
104
+ var errorDescription: String? {
105
+ switch self {
106
+ case .missingParam(let name): return "Missing required parameter: \(name)"
107
+ case .notFound(let what): return "Not found: \(what)"
108
+ case .permissionDenied(let msg): return "Permission denied: \(msg)"
109
+ case .general(let msg): return msg
110
+ }
111
+ }
112
+ }
113
+
114
+ // MARK: - Bridge Modules
115
+
116
+ let accessibilityBridge = AccessibilityBridge()
117
+ let observerBridge = ObserverBridge()
118
+ let coreGraphicsBridge = CoreGraphicsBridge()
119
+ let visionBridge = VisionBridge()
120
+ let appManagement = AppManagement()
121
+
122
+ // MARK: - Method Dispatch
123
+
124
+ func dispatch(method: String, params: [String: AnyCodable]?) throws -> Any {
125
+ switch method {
126
+ // Lifecycle
127
+ case "ping":
128
+ return ["pong": true, "pid": ProcessInfo.processInfo.processIdentifier, "accessible": accessibilityBridge.isAccessibilityTrusted()] as [String: Any]
129
+
130
+ case "check_permissions":
131
+ return ["trusted": accessibilityBridge.isAccessibilityTrusted()] as [String: Bool]
132
+
133
+ // App Management
134
+ case "app.launch":
135
+ let bundleId: String = try requiredParam(params, "bundleId")
136
+ return try appManagement.launchApp(bundleId: bundleId)
137
+
138
+ case "app.focus":
139
+ let bundleId: String = try requiredParam(params, "bundleId")
140
+ try appManagement.focusApp(bundleId: bundleId)
141
+ return ["ok": true]
142
+
143
+ case "app.list":
144
+ return appManagement.listRunningApps()
145
+
146
+ case "app.windows":
147
+ return appManagement.listWindows()
148
+
149
+ case "app.frontmost":
150
+ return appManagement.frontmostApp()
151
+
152
+ // Accessibility
153
+ case "ax.findElement":
154
+ let pid: Int = try requiredParam(params, "pid")
155
+ let role: String? = param(params, "role")
156
+ let title: String? = param(params, "title")
157
+ let value: String? = param(params, "value")
158
+ let identifier: String? = param(params, "identifier")
159
+ let exact: Bool = param(params, "exact") ?? true
160
+ return try accessibilityBridge.findElement(
161
+ pid: pid_t(pid), role: role, title: title, value: value,
162
+ identifier: identifier, exact: exact
163
+ )
164
+
165
+ case "ax.getElementTree":
166
+ let pid: Int = try requiredParam(params, "pid")
167
+ let maxDepth: Int = param(params, "maxDepth") ?? 5
168
+ return try accessibilityBridge.getElementTree(pid: pid_t(pid), maxDepth: maxDepth)
169
+
170
+ case "ax.performAction":
171
+ let pid: Int = try requiredParam(params, "pid")
172
+ let elementPath: [Int] = try requiredParam(params, "elementPath")
173
+ let action: String = param(params, "action") ?? "AXPress"
174
+ try accessibilityBridge.performAction(pid: pid_t(pid), elementPath: elementPath, action: action)
175
+ return ["ok": true]
176
+
177
+ case "ax.setElementValue":
178
+ let pid: Int = try requiredParam(params, "pid")
179
+ let elementPath: [Int] = try requiredParam(params, "elementPath")
180
+ let value: String = try requiredParam(params, "value")
181
+ try accessibilityBridge.setElementValue(pid: pid_t(pid), elementPath: elementPath, value: value)
182
+ return ["ok": true]
183
+
184
+ case "ax.getElementValue":
185
+ let pid: Int = try requiredParam(params, "pid")
186
+ let elementPath: [Int] = try requiredParam(params, "elementPath")
187
+ return try accessibilityBridge.getElementValue(pid: pid_t(pid), elementPath: elementPath)
188
+
189
+ case "ax.menuClick":
190
+ let pid: Int = try requiredParam(params, "pid")
191
+ let menuPath: [String] = try requiredParam(params, "menuPath")
192
+ try accessibilityBridge.menuClick(pid: pid_t(pid), menuPath: menuPath)
193
+ return ["ok": true]
194
+
195
+ // Observer
196
+ case "observer.start":
197
+ let pid: Int = try requiredParam(params, "pid")
198
+ let notifications: [String]? = param(params, "notifications")
199
+ try observerBridge.startObserving(pid: pid_t(pid), notifications: notifications)
200
+ return ["ok": true]
201
+
202
+ case "observer.stop":
203
+ let pid: Int = try requiredParam(params, "pid")
204
+ observerBridge.stopObserving(pid: pid_t(pid))
205
+ return ["ok": true]
206
+
207
+ // CoreGraphics
208
+ case "cg.mouseClick":
209
+ let x: Double = try requiredParam(params, "x")
210
+ let y: Double = try requiredParam(params, "y")
211
+ let button: String = param(params, "button") ?? "left"
212
+ let clickCount: Int = param(params, "clickCount") ?? 1
213
+ coreGraphicsBridge.mouseClick(x: x, y: y, button: button, clickCount: clickCount)
214
+ return ["ok": true]
215
+
216
+ case "cg.mouseMove":
217
+ let x: Double = try requiredParam(params, "x")
218
+ let y: Double = try requiredParam(params, "y")
219
+ coreGraphicsBridge.mouseMove(x: x, y: y)
220
+ return ["ok": true]
221
+
222
+ case "cg.mouseDrag":
223
+ let fromX: Double = try requiredParam(params, "fromX")
224
+ let fromY: Double = try requiredParam(params, "fromY")
225
+ let toX: Double = try requiredParam(params, "toX")
226
+ let toY: Double = try requiredParam(params, "toY")
227
+ coreGraphicsBridge.mouseDrag(fromX: fromX, fromY: fromY, toX: toX, toY: toY)
228
+ return ["ok": true]
229
+
230
+ case "cg.mouseFlick":
231
+ let fxF: Double = try requiredParam(params, "fromX")
232
+ let fyF: Double = try requiredParam(params, "fromY")
233
+ let txF: Double = try requiredParam(params, "toX")
234
+ let tyF: Double = try requiredParam(params, "toY")
235
+ coreGraphicsBridge.mouseFlick(fromX: fxF, fromY: fyF, toX: txF, toY: tyF)
236
+ return ["ok": true]
237
+
238
+ case "cg.keyCombo":
239
+ let keys: [String] = try requiredParam(params, "keys")
240
+ coreGraphicsBridge.keyCombo(keys: keys)
241
+ return ["ok": true]
242
+
243
+ case "cg.typeText":
244
+ let text: String = try requiredParam(params, "text")
245
+ coreGraphicsBridge.typeText(text: text)
246
+ return ["ok": true]
247
+
248
+ case "cg.captureScreen":
249
+ let region: [String: Double]? = param(params, "region")
250
+ return try coreGraphicsBridge.captureScreen(region: region)
251
+
252
+ case "cg.captureWindow":
253
+ let windowId: Int = try requiredParam(params, "windowId")
254
+ return try coreGraphicsBridge.captureWindow(windowId: windowId)
255
+
256
+ case "cg.scroll":
257
+ let x: Double = try requiredParam(params, "x")
258
+ let y: Double = try requiredParam(params, "y")
259
+ let deltaX: Int = param(params, "deltaX") ?? 0
260
+ let deltaY: Int = param(params, "deltaY") ?? 0
261
+ coreGraphicsBridge.scroll(x: x, y: y, deltaX: deltaX, deltaY: deltaY)
262
+ return ["ok": true]
263
+
264
+ // Vision
265
+ case "vision.findText":
266
+ let imagePath: String = try requiredParam(params, "imagePath")
267
+ let searchText: String? = param(params, "searchText")
268
+ return try visionBridge.findText(imagePath: imagePath, searchText: searchText)
269
+
270
+ case "vision.ocr":
271
+ let imagePath: String = try requiredParam(params, "imagePath")
272
+ return try visionBridge.ocr(imagePath: imagePath)
273
+
274
+ default:
275
+ throw BridgeError.general("Unknown method: \(method)")
276
+ }
277
+ }
278
+
279
+ // MARK: - Main Loop
280
+
281
+ let encoder = JSONEncoder()
282
+ encoder.outputFormatting = []
283
+
284
+ let decoder = JSONDecoder()
285
+
286
+ /// Write a JSON line to stdout (thread-safe).
287
+ let outputLock = NSLock()
288
+ func writeLine(_ data: Data) {
289
+ outputLock.lock()
290
+ defer { outputLock.unlock() }
291
+ FileHandle.standardOutput.write(data)
292
+ FileHandle.standardOutput.write("\n".data(using: .utf8)!)
293
+ }
294
+
295
+ func writeResponse(_ response: JsonRpcResponse) {
296
+ if let data = try? encoder.encode(response) {
297
+ writeLine(data)
298
+ }
299
+ }
300
+
301
+ func writeEvent(_ event: [String: Any]) {
302
+ let wrapped: [String: Any] = ["id": 0, "event": event]
303
+ if let data = try? JSONSerialization.data(withJSONObject: wrapped) {
304
+ writeLine(data)
305
+ }
306
+ }
307
+
308
+ // Set up observer event forwarding
309
+ observerBridge.onEvent = { event in
310
+ writeEvent(event)
311
+ }
312
+
313
+ // Process stdin line by line
314
+ while let line = readLine() {
315
+ guard !line.isEmpty else { continue }
316
+ guard let data = line.data(using: .utf8) else { continue }
317
+
318
+ do {
319
+ let request = try decoder.decode(JsonRpcRequest.self, from: data)
320
+ do {
321
+ let result = try dispatch(method: request.method, params: request.params)
322
+ let response = JsonRpcResponse(
323
+ id: request.id,
324
+ result: AnyCodable(result),
325
+ error: nil
326
+ )
327
+ writeResponse(response)
328
+ } catch {
329
+ let response = JsonRpcResponse(
330
+ id: request.id,
331
+ result: nil,
332
+ error: JsonRpcError(code: -1, message: error.localizedDescription)
333
+ )
334
+ writeResponse(response)
335
+ }
336
+ } catch {
337
+ // Malformed JSON — write error with id=0
338
+ let response = JsonRpcResponse(
339
+ id: 0,
340
+ result: nil,
341
+ error: JsonRpcError(code: -32700, message: "Parse error: \(error.localizedDescription)")
342
+ )
343
+ writeResponse(response)
344
+ }
345
+ }