screenhand 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/.claude/commands/automate.md +28 -0
  2. package/.claude/commands/debug-ui.md +19 -0
  3. package/.claude/commands/screenshot.md +15 -0
  4. package/.github/FUNDING.yml +1 -0
  5. package/.github/ISSUE_TEMPLATE/bug_report.md +27 -0
  6. package/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  7. package/.mcp.json +8 -0
  8. package/DESKTOP_MCP_GUIDE.md +92 -0
  9. package/LICENSE +661 -21
  10. package/README.md +97 -292
  11. package/SECURITY.md +44 -0
  12. package/docs/architecture.md +47 -0
  13. package/install-skills.sh +19 -0
  14. package/mcp-bridge.ts +271 -0
  15. package/mcp-desktop.ts +1221 -0
  16. package/native/macos-bridge/Package.swift +21 -0
  17. package/native/macos-bridge/Sources/AccessibilityBridge.swift +261 -0
  18. package/native/macos-bridge/Sources/AppManagement.swift +129 -0
  19. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +242 -0
  20. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  21. package/native/macos-bridge/Sources/VisionBridge.swift +80 -0
  22. package/native/macos-bridge/Sources/main.swift +345 -0
  23. package/native/windows-bridge/AppManagement.cs +234 -0
  24. package/native/windows-bridge/InputBridge.cs +436 -0
  25. package/native/windows-bridge/Program.cs +265 -0
  26. package/native/windows-bridge/ScreenCapture.cs +329 -0
  27. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  28. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  29. package/package.json +3 -14
  30. package/playbooks/devpost.json +186 -0
  31. package/playbooks/instagram.json +41 -0
  32. package/playbooks/instagram_v2.json +201 -0
  33. package/playbooks/x_v1.json +211 -0
  34. package/scripts/devpost-live-loop.mjs +421 -0
  35. package/src/config.ts +30 -0
  36. package/src/index.ts +92 -0
  37. package/src/logging/timeline-logger.ts +55 -0
  38. package/src/mcp/server.ts +449 -0
  39. package/src/memory/recall.ts +191 -0
  40. package/src/memory/research.ts +146 -0
  41. package/src/memory/seeds.ts +123 -0
  42. package/src/memory/session.ts +201 -0
  43. package/src/memory/store.ts +434 -0
  44. package/src/memory/types.ts +69 -0
  45. package/src/native/bridge-client.ts +239 -0
  46. package/src/native/macos-bridge-client.ts +22 -0
  47. package/src/runtime/accessibility-adapter.ts +487 -0
  48. package/src/runtime/app-adapter.ts +169 -0
  49. package/src/runtime/applescript-adapter.ts +376 -0
  50. package/src/runtime/ax-role-map.ts +102 -0
  51. package/src/runtime/browser-adapter.ts +129 -0
  52. package/src/runtime/cdp-chrome-adapter.ts +676 -0
  53. package/src/runtime/composite-adapter.ts +274 -0
  54. package/src/runtime/executor.ts +396 -0
  55. package/src/runtime/locator-cache.ts +33 -0
  56. package/src/runtime/planning-loop.ts +81 -0
  57. package/src/runtime/service.ts +448 -0
  58. package/src/runtime/session-manager.ts +50 -0
  59. package/src/runtime/state-observer.ts +136 -0
  60. package/src/runtime/vision-adapter.ts +297 -0
  61. package/src/types.ts +297 -0
  62. package/tests/bridge-client.test.ts +176 -0
  63. package/tests/browser-stealth.test.ts +210 -0
  64. package/tests/composite-adapter.test.ts +64 -0
  65. package/tests/mcp-server.test.ts +151 -0
  66. package/tests/memory-recall.test.ts +339 -0
  67. package/tests/memory-research.test.ts +159 -0
  68. package/tests/memory-seeds.test.ts +120 -0
  69. package/tests/memory-store.test.ts +392 -0
  70. package/tests/types.test.ts +92 -0
  71. package/tsconfig.check.json +17 -0
  72. package/tsconfig.json +19 -0
  73. package/vitest.config.ts +8 -0
  74. package/dist/config.js +0 -9
  75. package/dist/index.js +0 -55
  76. package/dist/logging/timeline-logger.js +0 -29
  77. package/dist/mcp/mcp-stdio-server.js +0 -284
  78. package/dist/mcp/server.js +0 -347
  79. package/dist/mcp-entry.js +0 -62
  80. package/dist/memory/recall.js +0 -160
  81. package/dist/memory/research.js +0 -98
  82. package/dist/memory/seeds.js +0 -89
  83. package/dist/memory/session.js +0 -161
  84. package/dist/memory/store.js +0 -391
  85. package/dist/memory/types.js +0 -4
  86. package/dist/native/bridge-client.js +0 -173
  87. package/dist/native/macos-bridge-client.js +0 -5
  88. package/dist/runtime/accessibility-adapter.js +0 -377
  89. package/dist/runtime/app-adapter.js +0 -48
  90. package/dist/runtime/applescript-adapter.js +0 -283
  91. package/dist/runtime/ax-role-map.js +0 -80
  92. package/dist/runtime/browser-adapter.js +0 -36
  93. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  94. package/dist/runtime/composite-adapter.js +0 -205
  95. package/dist/runtime/executor.js +0 -250
  96. package/dist/runtime/locator-cache.js +0 -12
  97. package/dist/runtime/planning-loop.js +0 -47
  98. package/dist/runtime/service.js +0 -372
  99. package/dist/runtime/session-manager.js +0 -28
  100. package/dist/runtime/state-observer.js +0 -105
  101. package/dist/runtime/vision-adapter.js +0 -208
  102. package/dist/test-mcp-protocol.js +0 -138
  103. package/dist/types.js +0 -1
@@ -0,0 +1,21 @@
1
+ // swift-tools-version: 5.9
2
+ import PackageDescription
3
+
4
+ let package = Package(
5
+ name: "macos-bridge",
6
+ platforms: [
7
+ .macOS(.v13)
8
+ ],
9
+ targets: [
10
+ .executableTarget(
11
+ name: "macos-bridge",
12
+ path: "Sources",
13
+ linkerSettings: [
14
+ .linkedFramework("ApplicationServices"),
15
+ .linkedFramework("CoreGraphics"),
16
+ .linkedFramework("AppKit"),
17
+ .linkedFramework("Vision"),
18
+ ]
19
+ )
20
+ ]
21
+ )
@@ -0,0 +1,261 @@
1
+ import ApplicationServices
2
+ import AppKit
3
+ import Foundation
4
+
5
+ class AccessibilityBridge {
6
+
7
+ func isAccessibilityTrusted() -> Bool {
8
+ return AXIsProcessTrusted()
9
+ }
10
+
11
+ // MARK: - Element Tree
12
+
13
+ func getElementTree(pid: pid_t, maxDepth: Int) throws -> [String: Any] {
14
+ let appElement = AXUIElementCreateApplication(pid)
15
+ return try buildTree(element: appElement, depth: 0, maxDepth: maxDepth)
16
+ }
17
+
18
+ private func buildTree(element: AXUIElement, depth: Int, maxDepth: Int) throws -> [String: Any] {
19
+ var node: [String: Any] = [:]
20
+
21
+ node["role"] = getAttribute(element, kAXRoleAttribute) as? String ?? "Unknown"
22
+ if let title = getAttribute(element, kAXTitleAttribute) as? String, !title.isEmpty {
23
+ node["title"] = title
24
+ }
25
+ if let value = getAttribute(element, kAXValueAttribute) {
26
+ node["value"] = "\(value)"
27
+ }
28
+ if let desc = getAttribute(element, kAXDescriptionAttribute) as? String, !desc.isEmpty {
29
+ node["description"] = desc
30
+ }
31
+ if let identifier = getAttribute(element, kAXIdentifierAttribute) as? String, !identifier.isEmpty {
32
+ node["identifier"] = identifier
33
+ }
34
+ if let enabled = getAttribute(element, kAXEnabledAttribute) as? Bool {
35
+ node["enabled"] = enabled
36
+ }
37
+ if let focused = getAttribute(element, kAXFocusedAttribute) as? Bool {
38
+ node["focused"] = focused
39
+ }
40
+
41
+ // Position and size
42
+ if let posValue = getAttribute(element, kAXPositionAttribute) {
43
+ var point = CGPoint.zero
44
+ if AXValueGetValue(posValue as! AXValue, .cgPoint, &point) {
45
+ node["position"] = ["x": Double(point.x), "y": Double(point.y)]
46
+ }
47
+ }
48
+ if let sizeValue = getAttribute(element, kAXSizeAttribute) {
49
+ var size = CGSize.zero
50
+ if AXValueGetValue(sizeValue as! AXValue, .cgSize, &size) {
51
+ node["size"] = ["width": Double(size.width), "height": Double(size.height)]
52
+ }
53
+ }
54
+
55
+ // Children (if not at max depth)
56
+ if depth < maxDepth {
57
+ if let children = getAttribute(element, kAXChildrenAttribute) as? [AXUIElement] {
58
+ var childNodes: [[String: Any]] = []
59
+ for (index, child) in children.enumerated() {
60
+ if index > 100 { break } // Safety limit
61
+ if let childNode = try? buildTree(element: child, depth: depth + 1, maxDepth: maxDepth) {
62
+ childNodes.append(childNode)
63
+ }
64
+ }
65
+ if !childNodes.isEmpty {
66
+ node["children"] = childNodes
67
+ }
68
+ }
69
+ }
70
+
71
+ return node
72
+ }
73
+
74
+ // MARK: - Find Element
75
+
76
+ func findElement(pid: pid_t, role: String?, title: String?, value: String?,
77
+ identifier: String?, exact: Bool) throws -> [String: Any] {
78
+ let appElement = AXUIElementCreateApplication(pid)
79
+ guard let result = searchElement(
80
+ element: appElement, path: [], role: role, title: title,
81
+ value: value, identifier: identifier, exact: exact
82
+ ) else {
83
+ throw BridgeError.notFound("Element not found matching criteria")
84
+ }
85
+ return result
86
+ }
87
+
88
+ private func searchElement(element: AXUIElement, path: [Int], role: String?,
89
+ title: String?, value: String?, identifier: String?,
90
+ exact: Bool) -> [String: Any]? {
91
+ // Check if this element matches
92
+ let elementRole = getAttribute(element, kAXRoleAttribute) as? String ?? ""
93
+ let elementTitle = getAttribute(element, kAXTitleAttribute) as? String ?? ""
94
+ let elementValue = getAttribute(element, kAXValueAttribute).flatMap { "\($0)" } ?? ""
95
+ let elementId = getAttribute(element, kAXIdentifierAttribute) as? String ?? ""
96
+
97
+ var matches = true
98
+ if let role = role {
99
+ matches = matches && matchString(elementRole, role, exact: exact)
100
+ }
101
+ if let title = title {
102
+ matches = matches && matchString(elementTitle, title, exact: exact)
103
+ }
104
+ if let value = value {
105
+ matches = matches && matchString(elementValue, value, exact: exact)
106
+ }
107
+ if let identifier = identifier {
108
+ matches = matches && matchString(elementId, identifier, exact: exact)
109
+ }
110
+
111
+ if matches && (role != nil || title != nil || value != nil || identifier != nil) {
112
+ var result: [String: Any] = [
113
+ "role": elementRole,
114
+ "title": elementTitle,
115
+ "elementPath": path,
116
+ "handleId": "ax_\(path.map { String($0) }.joined(separator: "_"))",
117
+ ]
118
+ if !elementValue.isEmpty { result["value"] = elementValue }
119
+ if !elementId.isEmpty { result["identifier"] = elementId }
120
+
121
+ // Get position for coordinates
122
+ if let posValue = getAttribute(element, kAXPositionAttribute) {
123
+ var point = CGPoint.zero
124
+ if AXValueGetValue(posValue as! AXValue, .cgPoint, &point) {
125
+ if let sizeValue = getAttribute(element, kAXSizeAttribute) {
126
+ var size = CGSize.zero
127
+ if AXValueGetValue(sizeValue as! AXValue, .cgSize, &size) {
128
+ result["bounds"] = [
129
+ "x": Double(point.x), "y": Double(point.y),
130
+ "width": Double(size.width), "height": Double(size.height)
131
+ ]
132
+ }
133
+ }
134
+ }
135
+ }
136
+
137
+ return result
138
+ }
139
+
140
+ // Search children
141
+ if let children = getAttribute(element, kAXChildrenAttribute) as? [AXUIElement] {
142
+ for (index, child) in children.enumerated() {
143
+ var childPath = path
144
+ childPath.append(index)
145
+ if let found = searchElement(
146
+ element: child, path: childPath, role: role, title: title,
147
+ value: value, identifier: identifier, exact: exact
148
+ ) {
149
+ return found
150
+ }
151
+ }
152
+ }
153
+
154
+ return nil
155
+ }
156
+
157
+ // MARK: - Actions
158
+
159
+ func performAction(pid: pid_t, elementPath: [Int], action: String) throws {
160
+ let element = try resolveElement(pid: pid, path: elementPath)
161
+ let result = AXUIElementPerformAction(element, action as CFString)
162
+ if result != .success {
163
+ throw BridgeError.general("AX action '\(action)' failed with code \(result.rawValue)")
164
+ }
165
+ }
166
+
167
+ func setElementValue(pid: pid_t, elementPath: [Int], value: String) throws {
168
+ let element = try resolveElement(pid: pid, path: elementPath)
169
+ let result = AXUIElementSetAttributeValue(element, kAXValueAttribute as CFString, value as CFTypeRef)
170
+ if result != .success {
171
+ // Try focused approach: set focus then type
172
+ let focusResult = AXUIElementSetAttributeValue(element, kAXFocusedAttribute as CFString, true as CFTypeRef)
173
+ if focusResult != .success {
174
+ throw BridgeError.general("Cannot focus element for value set, code \(focusResult.rawValue)")
175
+ }
176
+ // Use CG to type the value
177
+ CoreGraphicsBridge().typeText(text: value)
178
+ }
179
+ }
180
+
181
+ func getElementValue(pid: pid_t, elementPath: [Int]) throws -> [String: Any] {
182
+ let element = try resolveElement(pid: pid, path: elementPath)
183
+ let value = getAttribute(element, kAXValueAttribute)
184
+ return ["value": value.flatMap { "\($0)" } ?? ""]
185
+ }
186
+
187
+ // MARK: - Menu Click
188
+
189
+ func menuClick(pid: pid_t, menuPath: [String]) throws {
190
+ guard !menuPath.isEmpty else {
191
+ throw BridgeError.missingParam("menuPath must not be empty")
192
+ }
193
+
194
+ let appElement = AXUIElementCreateApplication(pid)
195
+ guard let menuBar = getAttribute(appElement, kAXMenuBarAttribute) as AnyObject? else {
196
+ throw BridgeError.notFound("Menu bar not found")
197
+ }
198
+ let menuBarElement = menuBar as! AXUIElement
199
+
200
+ var currentElement: AXUIElement = menuBarElement
201
+
202
+ for menuItem in menuPath {
203
+ guard let children = getAttribute(currentElement, kAXChildrenAttribute) as? [AXUIElement] else {
204
+ throw BridgeError.notFound("No children found in menu for '\(menuItem)'")
205
+ }
206
+
207
+ var found = false
208
+ for child in children {
209
+ let title = getAttribute(child, kAXTitleAttribute) as? String ?? ""
210
+ if title == menuItem {
211
+ // Press this menu item to open it (for submenus) or activate it
212
+ AXUIElementPerformAction(child, kAXPressAction as CFString)
213
+ // Small delay for menu to open
214
+ Thread.sleep(forTimeInterval: 0.1)
215
+
216
+ // If there are more items in the path, navigate into the submenu
217
+ if let submenu = getAttribute(child, kAXChildrenAttribute) as? [AXUIElement],
218
+ let firstChild = submenu.first {
219
+ currentElement = firstChild
220
+ }
221
+
222
+ found = true
223
+ break
224
+ }
225
+ }
226
+
227
+ if !found {
228
+ throw BridgeError.notFound("Menu item '\(menuItem)' not found")
229
+ }
230
+ }
231
+ }
232
+
233
+ // MARK: - Helpers
234
+
235
+ private func resolveElement(pid: pid_t, path: [Int]) throws -> AXUIElement {
236
+ var current = AXUIElementCreateApplication(pid) as AXUIElement
237
+ for index in path {
238
+ guard let children = getAttribute(current, kAXChildrenAttribute) as? [AXUIElement] else {
239
+ throw BridgeError.notFound("No children at path index \(index)")
240
+ }
241
+ guard index < children.count else {
242
+ throw BridgeError.notFound("Index \(index) out of bounds (count: \(children.count))")
243
+ }
244
+ current = children[index]
245
+ }
246
+ return current
247
+ }
248
+
249
+ private func getAttribute(_ element: AXUIElement, _ attribute: String) -> AnyObject? {
250
+ var value: AnyObject?
251
+ let result = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
252
+ return result == .success ? value : nil
253
+ }
254
+
255
+ private func matchString(_ haystack: String, _ needle: String, exact: Bool) -> Bool {
256
+ if exact {
257
+ return haystack == needle
258
+ }
259
+ return haystack.localizedCaseInsensitiveContains(needle)
260
+ }
261
+ }
@@ -0,0 +1,129 @@
1
+ import AppKit
2
+ import Foundation
3
+
4
+ class AppManagement {
5
+
6
+ func launchApp(bundleId: String) throws -> [String: Any] {
7
+ let workspace = NSWorkspace.shared
8
+
9
+ guard let url = workspace.urlForApplication(withBundleIdentifier: bundleId) else {
10
+ throw BridgeError.notFound("Application with bundle ID '\(bundleId)' not found")
11
+ }
12
+
13
+ let config = NSWorkspace.OpenConfiguration()
14
+ config.activates = true
15
+
16
+ let semaphore = DispatchSemaphore(value: 0)
17
+ var launchedApp: NSRunningApplication?
18
+ var launchError: Error?
19
+
20
+ workspace.openApplication(at: url, configuration: config) { app, error in
21
+ launchedApp = app
22
+ launchError = error
23
+ semaphore.signal()
24
+ }
25
+
26
+ semaphore.wait()
27
+
28
+ if let error = launchError {
29
+ throw BridgeError.general("Failed to launch '\(bundleId)': \(error.localizedDescription)")
30
+ }
31
+
32
+ guard let app = launchedApp else {
33
+ throw BridgeError.general("Launch returned nil for '\(bundleId)'")
34
+ }
35
+
36
+ // Wait for the app to finish launching (up to 10 seconds)
37
+ let deadline = Date().addingTimeInterval(10)
38
+ while !app.isFinishedLaunching && Date() < deadline {
39
+ Thread.sleep(forTimeInterval: 0.1)
40
+ }
41
+
42
+ return [
43
+ "bundleId": bundleId,
44
+ "appName": app.localizedName ?? bundleId,
45
+ "pid": Int(app.processIdentifier),
46
+ "windowTitle": "",
47
+ ]
48
+ }
49
+
50
+ func focusApp(bundleId: String) throws {
51
+ guard let app = NSRunningApplication.runningApplications(withBundleIdentifier: bundleId).first else {
52
+ throw BridgeError.notFound("No running application with bundle ID '\(bundleId)'")
53
+ }
54
+ app.activate()
55
+ }
56
+
57
+ func listRunningApps() -> [[String: Any]] {
58
+ let workspace = NSWorkspace.shared
59
+ return workspace.runningApplications
60
+ .filter { $0.activationPolicy == .regular }
61
+ .map { app in
62
+ [
63
+ "bundleId": app.bundleIdentifier ?? "unknown",
64
+ "name": app.localizedName ?? "Unknown",
65
+ "pid": Int(app.processIdentifier),
66
+ "isActive": app.isActive,
67
+ ] as [String: Any]
68
+ }
69
+ }
70
+
71
+ func listWindows() -> [[String: Any]] {
72
+ guard let windowList = CGWindowListCopyWindowInfo([.optionOnScreenOnly, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else {
73
+ return []
74
+ }
75
+
76
+ return windowList.compactMap { window -> [String: Any]? in
77
+ guard let windowId = window[kCGWindowNumber as String] as? Int,
78
+ let ownerPid = window[kCGWindowOwnerPID as String] as? Int,
79
+ let boundsRaw = window[kCGWindowBounds as String],
80
+ let layer = window[kCGWindowLayer as String] as? Int,
81
+ layer == 0 else { // Only normal windows (layer 0)
82
+ return nil
83
+ }
84
+
85
+ // Parse bounds — CGWindowListCopyWindowInfo returns a dict with CGFloat values
86
+ var rect = CGRect.zero
87
+ if let boundsDict = boundsRaw as? [String: Any] {
88
+ let bx = (boundsDict["X"] as? NSNumber)?.doubleValue ?? 0
89
+ let by = (boundsDict["Y"] as? NSNumber)?.doubleValue ?? 0
90
+ let bw = (boundsDict["Width"] as? NSNumber)?.doubleValue ?? 0
91
+ let bh = (boundsDict["Height"] as? NSNumber)?.doubleValue ?? 0
92
+ rect = CGRect(x: bx, y: by, width: bw, height: bh)
93
+ }
94
+
95
+ let title = window[kCGWindowName as String] as? String ?? ""
96
+ let ownerName = window[kCGWindowOwnerName as String] as? String ?? ""
97
+ let isOnScreen = window[kCGWindowIsOnscreen as String] as? Bool ?? true
98
+
99
+ // Look up bundle ID from PID
100
+ let bundleId = NSRunningApplication(processIdentifier: pid_t(ownerPid))?.bundleIdentifier ?? ""
101
+
102
+ return [
103
+ "windowId": windowId,
104
+ "title": title,
105
+ "bundleId": bundleId,
106
+ "pid": ownerPid,
107
+ "appName": ownerName,
108
+ "bounds": [
109
+ "x": Double(rect.origin.x),
110
+ "y": Double(rect.origin.y),
111
+ "width": Double(rect.size.width),
112
+ "height": Double(rect.size.height),
113
+ ] as [String: Double],
114
+ "isOnScreen": isOnScreen,
115
+ ]
116
+ }
117
+ }
118
+
119
+ func frontmostApp() -> [String: Any] {
120
+ guard let app = NSWorkspace.shared.frontmostApplication else {
121
+ return ["error": "No frontmost application"]
122
+ }
123
+ return [
124
+ "bundleId": app.bundleIdentifier ?? "unknown",
125
+ "name": app.localizedName ?? "Unknown",
126
+ "pid": Int(app.processIdentifier),
127
+ ]
128
+ }
129
+ }
@@ -0,0 +1,242 @@
1
+ import CoreGraphics
2
+ import Foundation
3
+ import AppKit
4
+
5
+ class CoreGraphicsBridge {
6
+
7
+ // MARK: - Mouse Events
8
+
9
+ func mouseClick(x: Double, y: Double, button: String, clickCount: Int) {
10
+ let point = CGPoint(x: x, y: y)
11
+
12
+ let (downType, upType) = mouseButtonTypes(button: button)
13
+
14
+ for _ in 0..<clickCount {
15
+ if let downEvent = CGEvent(mouseEventSource: nil, mouseType: downType, mouseCursorPosition: point, mouseButton: mouseButton(button)) {
16
+ downEvent.setIntegerValueField(.mouseEventClickState, value: Int64(clickCount))
17
+ downEvent.post(tap: .cghidEventTap)
18
+ }
19
+ usleep(50_000) // 50ms between down and up
20
+ if let upEvent = CGEvent(mouseEventSource: nil, mouseType: upType, mouseCursorPosition: point, mouseButton: mouseButton(button)) {
21
+ upEvent.setIntegerValueField(.mouseEventClickState, value: Int64(clickCount))
22
+ upEvent.post(tap: .cghidEventTap)
23
+ }
24
+ }
25
+ }
26
+
27
+ func mouseMove(x: Double, y: Double) {
28
+ let point = CGPoint(x: x, y: y)
29
+ if let event = CGEvent(mouseEventSource: nil, mouseType: .mouseMoved, mouseCursorPosition: point, mouseButton: .left) {
30
+ event.post(tap: .cghidEventTap)
31
+ }
32
+ }
33
+
34
+ func mouseDrag(fromX: Double, fromY: Double, toX: Double, toY: Double) {
35
+ let from = CGPoint(x: fromX, y: fromY)
36
+ let to = CGPoint(x: toX, y: toY)
37
+
38
+ // Mouse down at source
39
+ if let downEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: from, mouseButton: .left) {
40
+ downEvent.post(tap: .cghidEventTap)
41
+ }
42
+ usleep(100_000) // 100ms
43
+
44
+ // Interpolate drag points
45
+ let steps = 10
46
+ for i in 1...steps {
47
+ let t = Double(i) / Double(steps)
48
+ let x = fromX + (toX - fromX) * t
49
+ let y = fromY + (toY - fromY) * t
50
+ let point = CGPoint(x: x, y: y)
51
+ if let dragEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDragged, mouseCursorPosition: point, mouseButton: .left) {
52
+ dragEvent.post(tap: .cghidEventTap)
53
+ }
54
+ usleep(20_000) // 20ms between steps
55
+ }
56
+
57
+ // Mouse up at destination
58
+ if let upEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: to, mouseButton: .left) {
59
+ upEvent.post(tap: .cghidEventTap)
60
+ }
61
+ }
62
+
63
+ /// Fast flick gesture — 3 steps, 5ms gaps. Triggers iOS swipe gestures.
64
+ func mouseFlick(fromX: Double, fromY: Double, toX: Double, toY: Double) {
65
+ let from = CGPoint(x: fromX, y: fromY)
66
+ let to = CGPoint(x: toX, y: toY)
67
+
68
+ if let downEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: from, mouseButton: .left) {
69
+ downEvent.post(tap: .cghidEventTap)
70
+ }
71
+ usleep(10_000) // 10ms
72
+
73
+ // Just 3 fast steps
74
+ for i in 1...3 {
75
+ let t = Double(i) / 3.0
76
+ let point = CGPoint(x: fromX + (toX - fromX) * t, y: fromY + (toY - fromY) * t)
77
+ if let dragEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDragged, mouseCursorPosition: point, mouseButton: .left) {
78
+ dragEvent.post(tap: .cghidEventTap)
79
+ }
80
+ usleep(5_000) // 5ms
81
+ }
82
+
83
+ if let upEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: to, mouseButton: .left) {
84
+ upEvent.post(tap: .cghidEventTap)
85
+ }
86
+ }
87
+
88
+ func scroll(x: Double, y: Double, deltaX: Int, deltaY: Int) {
89
+ // Move mouse to position first
90
+ mouseMove(x: x, y: y)
91
+ usleep(50_000)
92
+
93
+ if let scrollEvent = CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 2, wheel1: Int32(deltaY), wheel2: Int32(deltaX), wheel3: 0) {
94
+ scrollEvent.post(tap: .cghidEventTap)
95
+ }
96
+ }
97
+
98
+ // MARK: - Keyboard Events
99
+
100
+ func keyCombo(keys: [String]) {
101
+ var modifiers: CGEventFlags = []
102
+ var keyCode: CGKeyCode?
103
+
104
+ for key in keys {
105
+ let lower = key.lowercased()
106
+ switch lower {
107
+ case "cmd", "command", "meta":
108
+ modifiers.insert(.maskCommand)
109
+ case "shift":
110
+ modifiers.insert(.maskShift)
111
+ case "alt", "option":
112
+ modifiers.insert(.maskAlternate)
113
+ case "ctrl", "control":
114
+ modifiers.insert(.maskControl)
115
+ case "fn":
116
+ modifiers.insert(.maskSecondaryFn)
117
+ default:
118
+ keyCode = keyCodeForString(lower)
119
+ }
120
+ }
121
+
122
+ guard let code = keyCode else { return }
123
+
124
+ if let downEvent = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: true) {
125
+ downEvent.flags = modifiers
126
+ downEvent.post(tap: .cghidEventTap)
127
+ }
128
+ usleep(50_000)
129
+ if let upEvent = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: false) {
130
+ upEvent.flags = modifiers
131
+ upEvent.post(tap: .cghidEventTap)
132
+ }
133
+ }
134
+
135
+ func typeText(text: String) {
136
+ for char in text {
137
+ let str = String(char)
138
+ if let event = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: true) {
139
+ let chars = Array(str.utf16)
140
+ event.keyboardSetUnicodeString(stringLength: chars.count, unicodeString: chars)
141
+ event.post(tap: .cghidEventTap)
142
+ }
143
+ usleep(20_000) // 20ms between characters
144
+ if let event = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: false) {
145
+ event.post(tap: .cghidEventTap)
146
+ }
147
+ usleep(10_000)
148
+ }
149
+ }
150
+
151
+ // MARK: - Screenshots
152
+
153
+ func captureScreen(region: [String: Double]?) throws -> [String: Any] {
154
+ let rect: CGRect
155
+ if let region = region {
156
+ rect = CGRect(
157
+ x: region["x"] ?? 0,
158
+ y: region["y"] ?? 0,
159
+ width: region["width"] ?? 0,
160
+ height: region["height"] ?? 0
161
+ )
162
+ } else {
163
+ rect = CGRect.infinite
164
+ }
165
+
166
+ guard let image = CGWindowListCreateImage(rect, .optionOnScreenOnly, kCGNullWindowID, .bestResolution) else {
167
+ throw BridgeError.general("Failed to capture screen")
168
+ }
169
+
170
+ let path = try saveImage(image)
171
+ return ["path": path, "width": image.width, "height": image.height]
172
+ }
173
+
174
+ func captureWindow(windowId: Int) throws -> [String: Any] {
175
+ guard let image = CGWindowListCreateImage(
176
+ .null, .optionIncludingWindow, CGWindowID(windowId), .bestResolution
177
+ ) else {
178
+ throw BridgeError.general("Failed to capture window \(windowId)")
179
+ }
180
+
181
+ let path = try saveImage(image)
182
+ return ["path": path, "width": image.width, "height": image.height]
183
+ }
184
+
185
+ private func saveImage(_ image: CGImage) throws -> String {
186
+ let tempDir = FileManager.default.temporaryDirectory
187
+ let fileName = "bridge_screenshot_\(UUID().uuidString).png"
188
+ let fileURL = tempDir.appendingPathComponent(fileName)
189
+
190
+ guard let dest = CGImageDestinationCreateWithURL(fileURL as CFURL, "public.png" as CFString, 1, nil) else {
191
+ throw BridgeError.general("Failed to create image destination")
192
+ }
193
+ CGImageDestinationAddImage(dest, image, nil)
194
+ guard CGImageDestinationFinalize(dest) else {
195
+ throw BridgeError.general("Failed to write screenshot")
196
+ }
197
+
198
+ return fileURL.path
199
+ }
200
+
201
+ // MARK: - Key Code Mapping
202
+
203
+ private func mouseButtonTypes(button: String) -> (CGEventType, CGEventType) {
204
+ switch button.lowercased() {
205
+ case "right":
206
+ return (.rightMouseDown, .rightMouseUp)
207
+ case "other", "middle":
208
+ return (.otherMouseDown, .otherMouseUp)
209
+ default:
210
+ return (.leftMouseDown, .leftMouseUp)
211
+ }
212
+ }
213
+
214
+ private func mouseButton(_ button: String) -> CGMouseButton {
215
+ switch button.lowercased() {
216
+ case "right": return .right
217
+ case "other", "middle": return .center
218
+ default: return .left
219
+ }
220
+ }
221
+
222
+ private func keyCodeForString(_ key: String) -> CGKeyCode? {
223
+ let keyMap: [String: CGKeyCode] = [
224
+ "a": 0, "b": 11, "c": 8, "d": 2, "e": 14, "f": 3, "g": 5,
225
+ "h": 4, "i": 34, "j": 38, "k": 40, "l": 37, "m": 46, "n": 45,
226
+ "o": 31, "p": 35, "q": 12, "r": 15, "s": 1, "t": 17, "u": 32,
227
+ "v": 9, "w": 13, "x": 7, "y": 16, "z": 6,
228
+ "0": 29, "1": 18, "2": 19, "3": 20, "4": 21, "5": 23,
229
+ "6": 22, "7": 26, "8": 28, "9": 25,
230
+ "return": 36, "enter": 36, "tab": 48, "space": 49,
231
+ "delete": 51, "backspace": 51, "escape": 53, "esc": 53,
232
+ "up": 126, "down": 125, "left": 123, "right": 124,
233
+ "f1": 122, "f2": 120, "f3": 99, "f4": 118, "f5": 96,
234
+ "f6": 97, "f7": 98, "f8": 100, "f9": 101, "f10": 109,
235
+ "f11": 103, "f12": 111,
236
+ "home": 115, "end": 119, "pageup": 116, "pagedown": 121,
237
+ "-": 27, "=": 24, "[": 33, "]": 30, "\\": 42,
238
+ ";": 41, "'": 39, ",": 43, ".": 47, "/": 44, "`": 50,
239
+ ]
240
+ return keyMap[key]
241
+ }
242
+ }