screenhand 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/automate.md +28 -0
- package/.claude/commands/debug-ui.md +19 -0
- package/.claude/commands/screenshot.md +15 -0
- package/.github/FUNDING.yml +1 -0
- package/.github/ISSUE_TEMPLATE/bug_report.md +27 -0
- package/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- package/.mcp.json +8 -0
- package/DESKTOP_MCP_GUIDE.md +92 -0
- package/LICENSE +661 -21
- package/README.md +97 -292
- package/SECURITY.md +44 -0
- package/docs/architecture.md +47 -0
- package/install-skills.sh +19 -0
- package/mcp-bridge.ts +271 -0
- package/mcp-desktop.ts +1221 -0
- package/native/macos-bridge/Package.swift +21 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +261 -0
- package/native/macos-bridge/Sources/AppManagement.swift +129 -0
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +242 -0
- package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +80 -0
- package/native/macos-bridge/Sources/main.swift +345 -0
- package/native/windows-bridge/AppManagement.cs +234 -0
- package/native/windows-bridge/InputBridge.cs +436 -0
- package/native/windows-bridge/Program.cs +265 -0
- package/native/windows-bridge/ScreenCapture.cs +329 -0
- package/native/windows-bridge/UIAutomationBridge.cs +571 -0
- package/native/windows-bridge/WindowsBridge.csproj +17 -0
- package/package.json +3 -14
- package/playbooks/devpost.json +186 -0
- package/playbooks/instagram.json +41 -0
- package/playbooks/instagram_v2.json +201 -0
- package/playbooks/x_v1.json +211 -0
- package/scripts/devpost-live-loop.mjs +421 -0
- package/src/config.ts +30 -0
- package/src/index.ts +92 -0
- package/src/logging/timeline-logger.ts +55 -0
- package/src/mcp/server.ts +449 -0
- package/src/memory/recall.ts +191 -0
- package/src/memory/research.ts +146 -0
- package/src/memory/seeds.ts +123 -0
- package/src/memory/session.ts +201 -0
- package/src/memory/store.ts +434 -0
- package/src/memory/types.ts +69 -0
- package/src/native/bridge-client.ts +239 -0
- package/src/native/macos-bridge-client.ts +22 -0
- package/src/runtime/accessibility-adapter.ts +487 -0
- package/src/runtime/app-adapter.ts +169 -0
- package/src/runtime/applescript-adapter.ts +376 -0
- package/src/runtime/ax-role-map.ts +102 -0
- package/src/runtime/browser-adapter.ts +129 -0
- package/src/runtime/cdp-chrome-adapter.ts +676 -0
- package/src/runtime/composite-adapter.ts +274 -0
- package/src/runtime/executor.ts +396 -0
- package/src/runtime/locator-cache.ts +33 -0
- package/src/runtime/planning-loop.ts +81 -0
- package/src/runtime/service.ts +448 -0
- package/src/runtime/session-manager.ts +50 -0
- package/src/runtime/state-observer.ts +136 -0
- package/src/runtime/vision-adapter.ts +297 -0
- package/src/types.ts +297 -0
- package/tests/bridge-client.test.ts +176 -0
- package/tests/browser-stealth.test.ts +210 -0
- package/tests/composite-adapter.test.ts +64 -0
- package/tests/mcp-server.test.ts +151 -0
- package/tests/memory-recall.test.ts +339 -0
- package/tests/memory-research.test.ts +159 -0
- package/tests/memory-seeds.test.ts +120 -0
- package/tests/memory-store.test.ts +392 -0
- package/tests/types.test.ts +92 -0
- package/tsconfig.check.json +17 -0
- package/tsconfig.json +19 -0
- package/vitest.config.ts +8 -0
- package/dist/config.js +0 -9
- package/dist/index.js +0 -55
- package/dist/logging/timeline-logger.js +0 -29
- package/dist/mcp/mcp-stdio-server.js +0 -284
- package/dist/mcp/server.js +0 -347
- package/dist/mcp-entry.js +0 -62
- package/dist/memory/recall.js +0 -160
- package/dist/memory/research.js +0 -98
- package/dist/memory/seeds.js +0 -89
- package/dist/memory/session.js +0 -161
- package/dist/memory/store.js +0 -391
- package/dist/memory/types.js +0 -4
- package/dist/native/bridge-client.js +0 -173
- package/dist/native/macos-bridge-client.js +0 -5
- package/dist/runtime/accessibility-adapter.js +0 -377
- package/dist/runtime/app-adapter.js +0 -48
- package/dist/runtime/applescript-adapter.js +0 -283
- package/dist/runtime/ax-role-map.js +0 -80
- package/dist/runtime/browser-adapter.js +0 -36
- package/dist/runtime/cdp-chrome-adapter.js +0 -505
- package/dist/runtime/composite-adapter.js +0 -205
- package/dist/runtime/executor.js +0 -250
- package/dist/runtime/locator-cache.js +0 -12
- package/dist/runtime/planning-loop.js +0 -47
- package/dist/runtime/service.js +0 -372
- package/dist/runtime/session-manager.js +0 -28
- package/dist/runtime/state-observer.js +0 -105
- package/dist/runtime/vision-adapter.js +0 -208
- package/dist/test-mcp-protocol.js +0 -138
- package/dist/types.js +0 -1
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// swift-tools-version: 5.9
|
|
2
|
+
import PackageDescription
|
|
3
|
+
|
|
4
|
+
let package = Package(
|
|
5
|
+
name: "macos-bridge",
|
|
6
|
+
platforms: [
|
|
7
|
+
.macOS(.v13)
|
|
8
|
+
],
|
|
9
|
+
targets: [
|
|
10
|
+
.executableTarget(
|
|
11
|
+
name: "macos-bridge",
|
|
12
|
+
path: "Sources",
|
|
13
|
+
linkerSettings: [
|
|
14
|
+
.linkedFramework("ApplicationServices"),
|
|
15
|
+
.linkedFramework("CoreGraphics"),
|
|
16
|
+
.linkedFramework("AppKit"),
|
|
17
|
+
.linkedFramework("Vision"),
|
|
18
|
+
]
|
|
19
|
+
)
|
|
20
|
+
]
|
|
21
|
+
)
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import ApplicationServices
|
|
2
|
+
import AppKit
|
|
3
|
+
import Foundation
|
|
4
|
+
|
|
5
|
+
class AccessibilityBridge {
|
|
6
|
+
|
|
7
|
+
func isAccessibilityTrusted() -> Bool {
|
|
8
|
+
return AXIsProcessTrusted()
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
// MARK: - Element Tree
|
|
12
|
+
|
|
13
|
+
func getElementTree(pid: pid_t, maxDepth: Int) throws -> [String: Any] {
|
|
14
|
+
let appElement = AXUIElementCreateApplication(pid)
|
|
15
|
+
return try buildTree(element: appElement, depth: 0, maxDepth: maxDepth)
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
private func buildTree(element: AXUIElement, depth: Int, maxDepth: Int) throws -> [String: Any] {
|
|
19
|
+
var node: [String: Any] = [:]
|
|
20
|
+
|
|
21
|
+
node["role"] = getAttribute(element, kAXRoleAttribute) as? String ?? "Unknown"
|
|
22
|
+
if let title = getAttribute(element, kAXTitleAttribute) as? String, !title.isEmpty {
|
|
23
|
+
node["title"] = title
|
|
24
|
+
}
|
|
25
|
+
if let value = getAttribute(element, kAXValueAttribute) {
|
|
26
|
+
node["value"] = "\(value)"
|
|
27
|
+
}
|
|
28
|
+
if let desc = getAttribute(element, kAXDescriptionAttribute) as? String, !desc.isEmpty {
|
|
29
|
+
node["description"] = desc
|
|
30
|
+
}
|
|
31
|
+
if let identifier = getAttribute(element, kAXIdentifierAttribute) as? String, !identifier.isEmpty {
|
|
32
|
+
node["identifier"] = identifier
|
|
33
|
+
}
|
|
34
|
+
if let enabled = getAttribute(element, kAXEnabledAttribute) as? Bool {
|
|
35
|
+
node["enabled"] = enabled
|
|
36
|
+
}
|
|
37
|
+
if let focused = getAttribute(element, kAXFocusedAttribute) as? Bool {
|
|
38
|
+
node["focused"] = focused
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Position and size
|
|
42
|
+
if let posValue = getAttribute(element, kAXPositionAttribute) {
|
|
43
|
+
var point = CGPoint.zero
|
|
44
|
+
if AXValueGetValue(posValue as! AXValue, .cgPoint, &point) {
|
|
45
|
+
node["position"] = ["x": Double(point.x), "y": Double(point.y)]
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
if let sizeValue = getAttribute(element, kAXSizeAttribute) {
|
|
49
|
+
var size = CGSize.zero
|
|
50
|
+
if AXValueGetValue(sizeValue as! AXValue, .cgSize, &size) {
|
|
51
|
+
node["size"] = ["width": Double(size.width), "height": Double(size.height)]
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Children (if not at max depth)
|
|
56
|
+
if depth < maxDepth {
|
|
57
|
+
if let children = getAttribute(element, kAXChildrenAttribute) as? [AXUIElement] {
|
|
58
|
+
var childNodes: [[String: Any]] = []
|
|
59
|
+
for (index, child) in children.enumerated() {
|
|
60
|
+
if index > 100 { break } // Safety limit
|
|
61
|
+
if let childNode = try? buildTree(element: child, depth: depth + 1, maxDepth: maxDepth) {
|
|
62
|
+
childNodes.append(childNode)
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
if !childNodes.isEmpty {
|
|
66
|
+
node["children"] = childNodes
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return node
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// MARK: - Find Element
|
|
75
|
+
|
|
76
|
+
func findElement(pid: pid_t, role: String?, title: String?, value: String?,
|
|
77
|
+
identifier: String?, exact: Bool) throws -> [String: Any] {
|
|
78
|
+
let appElement = AXUIElementCreateApplication(pid)
|
|
79
|
+
guard let result = searchElement(
|
|
80
|
+
element: appElement, path: [], role: role, title: title,
|
|
81
|
+
value: value, identifier: identifier, exact: exact
|
|
82
|
+
) else {
|
|
83
|
+
throw BridgeError.notFound("Element not found matching criteria")
|
|
84
|
+
}
|
|
85
|
+
return result
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
private func searchElement(element: AXUIElement, path: [Int], role: String?,
|
|
89
|
+
title: String?, value: String?, identifier: String?,
|
|
90
|
+
exact: Bool) -> [String: Any]? {
|
|
91
|
+
// Check if this element matches
|
|
92
|
+
let elementRole = getAttribute(element, kAXRoleAttribute) as? String ?? ""
|
|
93
|
+
let elementTitle = getAttribute(element, kAXTitleAttribute) as? String ?? ""
|
|
94
|
+
let elementValue = getAttribute(element, kAXValueAttribute).flatMap { "\($0)" } ?? ""
|
|
95
|
+
let elementId = getAttribute(element, kAXIdentifierAttribute) as? String ?? ""
|
|
96
|
+
|
|
97
|
+
var matches = true
|
|
98
|
+
if let role = role {
|
|
99
|
+
matches = matches && matchString(elementRole, role, exact: exact)
|
|
100
|
+
}
|
|
101
|
+
if let title = title {
|
|
102
|
+
matches = matches && matchString(elementTitle, title, exact: exact)
|
|
103
|
+
}
|
|
104
|
+
if let value = value {
|
|
105
|
+
matches = matches && matchString(elementValue, value, exact: exact)
|
|
106
|
+
}
|
|
107
|
+
if let identifier = identifier {
|
|
108
|
+
matches = matches && matchString(elementId, identifier, exact: exact)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if matches && (role != nil || title != nil || value != nil || identifier != nil) {
|
|
112
|
+
var result: [String: Any] = [
|
|
113
|
+
"role": elementRole,
|
|
114
|
+
"title": elementTitle,
|
|
115
|
+
"elementPath": path,
|
|
116
|
+
"handleId": "ax_\(path.map { String($0) }.joined(separator: "_"))",
|
|
117
|
+
]
|
|
118
|
+
if !elementValue.isEmpty { result["value"] = elementValue }
|
|
119
|
+
if !elementId.isEmpty { result["identifier"] = elementId }
|
|
120
|
+
|
|
121
|
+
// Get position for coordinates
|
|
122
|
+
if let posValue = getAttribute(element, kAXPositionAttribute) {
|
|
123
|
+
var point = CGPoint.zero
|
|
124
|
+
if AXValueGetValue(posValue as! AXValue, .cgPoint, &point) {
|
|
125
|
+
if let sizeValue = getAttribute(element, kAXSizeAttribute) {
|
|
126
|
+
var size = CGSize.zero
|
|
127
|
+
if AXValueGetValue(sizeValue as! AXValue, .cgSize, &size) {
|
|
128
|
+
result["bounds"] = [
|
|
129
|
+
"x": Double(point.x), "y": Double(point.y),
|
|
130
|
+
"width": Double(size.width), "height": Double(size.height)
|
|
131
|
+
]
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return result
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Search children
|
|
141
|
+
if let children = getAttribute(element, kAXChildrenAttribute) as? [AXUIElement] {
|
|
142
|
+
for (index, child) in children.enumerated() {
|
|
143
|
+
var childPath = path
|
|
144
|
+
childPath.append(index)
|
|
145
|
+
if let found = searchElement(
|
|
146
|
+
element: child, path: childPath, role: role, title: title,
|
|
147
|
+
value: value, identifier: identifier, exact: exact
|
|
148
|
+
) {
|
|
149
|
+
return found
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return nil
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// MARK: - Actions
|
|
158
|
+
|
|
159
|
+
func performAction(pid: pid_t, elementPath: [Int], action: String) throws {
|
|
160
|
+
let element = try resolveElement(pid: pid, path: elementPath)
|
|
161
|
+
let result = AXUIElementPerformAction(element, action as CFString)
|
|
162
|
+
if result != .success {
|
|
163
|
+
throw BridgeError.general("AX action '\(action)' failed with code \(result.rawValue)")
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
func setElementValue(pid: pid_t, elementPath: [Int], value: String) throws {
|
|
168
|
+
let element = try resolveElement(pid: pid, path: elementPath)
|
|
169
|
+
let result = AXUIElementSetAttributeValue(element, kAXValueAttribute as CFString, value as CFTypeRef)
|
|
170
|
+
if result != .success {
|
|
171
|
+
// Try focused approach: set focus then type
|
|
172
|
+
let focusResult = AXUIElementSetAttributeValue(element, kAXFocusedAttribute as CFString, true as CFTypeRef)
|
|
173
|
+
if focusResult != .success {
|
|
174
|
+
throw BridgeError.general("Cannot focus element for value set, code \(focusResult.rawValue)")
|
|
175
|
+
}
|
|
176
|
+
// Use CG to type the value
|
|
177
|
+
CoreGraphicsBridge().typeText(text: value)
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
func getElementValue(pid: pid_t, elementPath: [Int]) throws -> [String: Any] {
|
|
182
|
+
let element = try resolveElement(pid: pid, path: elementPath)
|
|
183
|
+
let value = getAttribute(element, kAXValueAttribute)
|
|
184
|
+
return ["value": value.flatMap { "\($0)" } ?? ""]
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// MARK: - Menu Click
|
|
188
|
+
|
|
189
|
+
func menuClick(pid: pid_t, menuPath: [String]) throws {
|
|
190
|
+
guard !menuPath.isEmpty else {
|
|
191
|
+
throw BridgeError.missingParam("menuPath must not be empty")
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
let appElement = AXUIElementCreateApplication(pid)
|
|
195
|
+
guard let menuBar = getAttribute(appElement, kAXMenuBarAttribute) as AnyObject? else {
|
|
196
|
+
throw BridgeError.notFound("Menu bar not found")
|
|
197
|
+
}
|
|
198
|
+
let menuBarElement = menuBar as! AXUIElement
|
|
199
|
+
|
|
200
|
+
var currentElement: AXUIElement = menuBarElement
|
|
201
|
+
|
|
202
|
+
for menuItem in menuPath {
|
|
203
|
+
guard let children = getAttribute(currentElement, kAXChildrenAttribute) as? [AXUIElement] else {
|
|
204
|
+
throw BridgeError.notFound("No children found in menu for '\(menuItem)'")
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
var found = false
|
|
208
|
+
for child in children {
|
|
209
|
+
let title = getAttribute(child, kAXTitleAttribute) as? String ?? ""
|
|
210
|
+
if title == menuItem {
|
|
211
|
+
// Press this menu item to open it (for submenus) or activate it
|
|
212
|
+
AXUIElementPerformAction(child, kAXPressAction as CFString)
|
|
213
|
+
// Small delay for menu to open
|
|
214
|
+
Thread.sleep(forTimeInterval: 0.1)
|
|
215
|
+
|
|
216
|
+
// If there are more items in the path, navigate into the submenu
|
|
217
|
+
if let submenu = getAttribute(child, kAXChildrenAttribute) as? [AXUIElement],
|
|
218
|
+
let firstChild = submenu.first {
|
|
219
|
+
currentElement = firstChild
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
found = true
|
|
223
|
+
break
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if !found {
|
|
228
|
+
throw BridgeError.notFound("Menu item '\(menuItem)' not found")
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// MARK: - Helpers
|
|
234
|
+
|
|
235
|
+
private func resolveElement(pid: pid_t, path: [Int]) throws -> AXUIElement {
|
|
236
|
+
var current = AXUIElementCreateApplication(pid) as AXUIElement
|
|
237
|
+
for index in path {
|
|
238
|
+
guard let children = getAttribute(current, kAXChildrenAttribute) as? [AXUIElement] else {
|
|
239
|
+
throw BridgeError.notFound("No children at path index \(index)")
|
|
240
|
+
}
|
|
241
|
+
guard index < children.count else {
|
|
242
|
+
throw BridgeError.notFound("Index \(index) out of bounds (count: \(children.count))")
|
|
243
|
+
}
|
|
244
|
+
current = children[index]
|
|
245
|
+
}
|
|
246
|
+
return current
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
private func getAttribute(_ element: AXUIElement, _ attribute: String) -> AnyObject? {
|
|
250
|
+
var value: AnyObject?
|
|
251
|
+
let result = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
|
|
252
|
+
return result == .success ? value : nil
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
private func matchString(_ haystack: String, _ needle: String, exact: Bool) -> Bool {
|
|
256
|
+
if exact {
|
|
257
|
+
return haystack == needle
|
|
258
|
+
}
|
|
259
|
+
return haystack.localizedCaseInsensitiveContains(needle)
|
|
260
|
+
}
|
|
261
|
+
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import AppKit
|
|
2
|
+
import Foundation
|
|
3
|
+
|
|
4
|
+
class AppManagement {
|
|
5
|
+
|
|
6
|
+
func launchApp(bundleId: String) throws -> [String: Any] {
|
|
7
|
+
let workspace = NSWorkspace.shared
|
|
8
|
+
|
|
9
|
+
guard let url = workspace.urlForApplication(withBundleIdentifier: bundleId) else {
|
|
10
|
+
throw BridgeError.notFound("Application with bundle ID '\(bundleId)' not found")
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
let config = NSWorkspace.OpenConfiguration()
|
|
14
|
+
config.activates = true
|
|
15
|
+
|
|
16
|
+
let semaphore = DispatchSemaphore(value: 0)
|
|
17
|
+
var launchedApp: NSRunningApplication?
|
|
18
|
+
var launchError: Error?
|
|
19
|
+
|
|
20
|
+
workspace.openApplication(at: url, configuration: config) { app, error in
|
|
21
|
+
launchedApp = app
|
|
22
|
+
launchError = error
|
|
23
|
+
semaphore.signal()
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
semaphore.wait()
|
|
27
|
+
|
|
28
|
+
if let error = launchError {
|
|
29
|
+
throw BridgeError.general("Failed to launch '\(bundleId)': \(error.localizedDescription)")
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
guard let app = launchedApp else {
|
|
33
|
+
throw BridgeError.general("Launch returned nil for '\(bundleId)'")
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Wait for the app to finish launching (up to 10 seconds)
|
|
37
|
+
let deadline = Date().addingTimeInterval(10)
|
|
38
|
+
while !app.isFinishedLaunching && Date() < deadline {
|
|
39
|
+
Thread.sleep(forTimeInterval: 0.1)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return [
|
|
43
|
+
"bundleId": bundleId,
|
|
44
|
+
"appName": app.localizedName ?? bundleId,
|
|
45
|
+
"pid": Int(app.processIdentifier),
|
|
46
|
+
"windowTitle": "",
|
|
47
|
+
]
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
func focusApp(bundleId: String) throws {
|
|
51
|
+
guard let app = NSRunningApplication.runningApplications(withBundleIdentifier: bundleId).first else {
|
|
52
|
+
throw BridgeError.notFound("No running application with bundle ID '\(bundleId)'")
|
|
53
|
+
}
|
|
54
|
+
app.activate()
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
func listRunningApps() -> [[String: Any]] {
|
|
58
|
+
let workspace = NSWorkspace.shared
|
|
59
|
+
return workspace.runningApplications
|
|
60
|
+
.filter { $0.activationPolicy == .regular }
|
|
61
|
+
.map { app in
|
|
62
|
+
[
|
|
63
|
+
"bundleId": app.bundleIdentifier ?? "unknown",
|
|
64
|
+
"name": app.localizedName ?? "Unknown",
|
|
65
|
+
"pid": Int(app.processIdentifier),
|
|
66
|
+
"isActive": app.isActive,
|
|
67
|
+
] as [String: Any]
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
func listWindows() -> [[String: Any]] {
|
|
72
|
+
guard let windowList = CGWindowListCopyWindowInfo([.optionOnScreenOnly, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else {
|
|
73
|
+
return []
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return windowList.compactMap { window -> [String: Any]? in
|
|
77
|
+
guard let windowId = window[kCGWindowNumber as String] as? Int,
|
|
78
|
+
let ownerPid = window[kCGWindowOwnerPID as String] as? Int,
|
|
79
|
+
let boundsRaw = window[kCGWindowBounds as String],
|
|
80
|
+
let layer = window[kCGWindowLayer as String] as? Int,
|
|
81
|
+
layer == 0 else { // Only normal windows (layer 0)
|
|
82
|
+
return nil
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Parse bounds — CGWindowListCopyWindowInfo returns a dict with CGFloat values
|
|
86
|
+
var rect = CGRect.zero
|
|
87
|
+
if let boundsDict = boundsRaw as? [String: Any] {
|
|
88
|
+
let bx = (boundsDict["X"] as? NSNumber)?.doubleValue ?? 0
|
|
89
|
+
let by = (boundsDict["Y"] as? NSNumber)?.doubleValue ?? 0
|
|
90
|
+
let bw = (boundsDict["Width"] as? NSNumber)?.doubleValue ?? 0
|
|
91
|
+
let bh = (boundsDict["Height"] as? NSNumber)?.doubleValue ?? 0
|
|
92
|
+
rect = CGRect(x: bx, y: by, width: bw, height: bh)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
let title = window[kCGWindowName as String] as? String ?? ""
|
|
96
|
+
let ownerName = window[kCGWindowOwnerName as String] as? String ?? ""
|
|
97
|
+
let isOnScreen = window[kCGWindowIsOnscreen as String] as? Bool ?? true
|
|
98
|
+
|
|
99
|
+
// Look up bundle ID from PID
|
|
100
|
+
let bundleId = NSRunningApplication(processIdentifier: pid_t(ownerPid))?.bundleIdentifier ?? ""
|
|
101
|
+
|
|
102
|
+
return [
|
|
103
|
+
"windowId": windowId,
|
|
104
|
+
"title": title,
|
|
105
|
+
"bundleId": bundleId,
|
|
106
|
+
"pid": ownerPid,
|
|
107
|
+
"appName": ownerName,
|
|
108
|
+
"bounds": [
|
|
109
|
+
"x": Double(rect.origin.x),
|
|
110
|
+
"y": Double(rect.origin.y),
|
|
111
|
+
"width": Double(rect.size.width),
|
|
112
|
+
"height": Double(rect.size.height),
|
|
113
|
+
] as [String: Double],
|
|
114
|
+
"isOnScreen": isOnScreen,
|
|
115
|
+
]
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
func frontmostApp() -> [String: Any] {
|
|
120
|
+
guard let app = NSWorkspace.shared.frontmostApplication else {
|
|
121
|
+
return ["error": "No frontmost application"]
|
|
122
|
+
}
|
|
123
|
+
return [
|
|
124
|
+
"bundleId": app.bundleIdentifier ?? "unknown",
|
|
125
|
+
"name": app.localizedName ?? "Unknown",
|
|
126
|
+
"pid": Int(app.processIdentifier),
|
|
127
|
+
]
|
|
128
|
+
}
|
|
129
|
+
}
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import CoreGraphics
|
|
2
|
+
import Foundation
|
|
3
|
+
import AppKit
|
|
4
|
+
|
|
5
|
+
class CoreGraphicsBridge {
|
|
6
|
+
|
|
7
|
+
// MARK: - Mouse Events
|
|
8
|
+
|
|
9
|
+
func mouseClick(x: Double, y: Double, button: String, clickCount: Int) {
|
|
10
|
+
let point = CGPoint(x: x, y: y)
|
|
11
|
+
|
|
12
|
+
let (downType, upType) = mouseButtonTypes(button: button)
|
|
13
|
+
|
|
14
|
+
for _ in 0..<clickCount {
|
|
15
|
+
if let downEvent = CGEvent(mouseEventSource: nil, mouseType: downType, mouseCursorPosition: point, mouseButton: mouseButton(button)) {
|
|
16
|
+
downEvent.setIntegerValueField(.mouseEventClickState, value: Int64(clickCount))
|
|
17
|
+
downEvent.post(tap: .cghidEventTap)
|
|
18
|
+
}
|
|
19
|
+
usleep(50_000) // 50ms between down and up
|
|
20
|
+
if let upEvent = CGEvent(mouseEventSource: nil, mouseType: upType, mouseCursorPosition: point, mouseButton: mouseButton(button)) {
|
|
21
|
+
upEvent.setIntegerValueField(.mouseEventClickState, value: Int64(clickCount))
|
|
22
|
+
upEvent.post(tap: .cghidEventTap)
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
func mouseMove(x: Double, y: Double) {
|
|
28
|
+
let point = CGPoint(x: x, y: y)
|
|
29
|
+
if let event = CGEvent(mouseEventSource: nil, mouseType: .mouseMoved, mouseCursorPosition: point, mouseButton: .left) {
|
|
30
|
+
event.post(tap: .cghidEventTap)
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
func mouseDrag(fromX: Double, fromY: Double, toX: Double, toY: Double) {
|
|
35
|
+
let from = CGPoint(x: fromX, y: fromY)
|
|
36
|
+
let to = CGPoint(x: toX, y: toY)
|
|
37
|
+
|
|
38
|
+
// Mouse down at source
|
|
39
|
+
if let downEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: from, mouseButton: .left) {
|
|
40
|
+
downEvent.post(tap: .cghidEventTap)
|
|
41
|
+
}
|
|
42
|
+
usleep(100_000) // 100ms
|
|
43
|
+
|
|
44
|
+
// Interpolate drag points
|
|
45
|
+
let steps = 10
|
|
46
|
+
for i in 1...steps {
|
|
47
|
+
let t = Double(i) / Double(steps)
|
|
48
|
+
let x = fromX + (toX - fromX) * t
|
|
49
|
+
let y = fromY + (toY - fromY) * t
|
|
50
|
+
let point = CGPoint(x: x, y: y)
|
|
51
|
+
if let dragEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDragged, mouseCursorPosition: point, mouseButton: .left) {
|
|
52
|
+
dragEvent.post(tap: .cghidEventTap)
|
|
53
|
+
}
|
|
54
|
+
usleep(20_000) // 20ms between steps
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Mouse up at destination
|
|
58
|
+
if let upEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: to, mouseButton: .left) {
|
|
59
|
+
upEvent.post(tap: .cghidEventTap)
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/// Fast flick gesture — 3 steps, 5ms gaps. Triggers iOS swipe gestures.
|
|
64
|
+
func mouseFlick(fromX: Double, fromY: Double, toX: Double, toY: Double) {
|
|
65
|
+
let from = CGPoint(x: fromX, y: fromY)
|
|
66
|
+
let to = CGPoint(x: toX, y: toY)
|
|
67
|
+
|
|
68
|
+
if let downEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: from, mouseButton: .left) {
|
|
69
|
+
downEvent.post(tap: .cghidEventTap)
|
|
70
|
+
}
|
|
71
|
+
usleep(10_000) // 10ms
|
|
72
|
+
|
|
73
|
+
// Just 3 fast steps
|
|
74
|
+
for i in 1...3 {
|
|
75
|
+
let t = Double(i) / 3.0
|
|
76
|
+
let point = CGPoint(x: fromX + (toX - fromX) * t, y: fromY + (toY - fromY) * t)
|
|
77
|
+
if let dragEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDragged, mouseCursorPosition: point, mouseButton: .left) {
|
|
78
|
+
dragEvent.post(tap: .cghidEventTap)
|
|
79
|
+
}
|
|
80
|
+
usleep(5_000) // 5ms
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if let upEvent = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: to, mouseButton: .left) {
|
|
84
|
+
upEvent.post(tap: .cghidEventTap)
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
func scroll(x: Double, y: Double, deltaX: Int, deltaY: Int) {
|
|
89
|
+
// Move mouse to position first
|
|
90
|
+
mouseMove(x: x, y: y)
|
|
91
|
+
usleep(50_000)
|
|
92
|
+
|
|
93
|
+
if let scrollEvent = CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 2, wheel1: Int32(deltaY), wheel2: Int32(deltaX), wheel3: 0) {
|
|
94
|
+
scrollEvent.post(tap: .cghidEventTap)
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// MARK: - Keyboard Events
|
|
99
|
+
|
|
100
|
+
func keyCombo(keys: [String]) {
|
|
101
|
+
var modifiers: CGEventFlags = []
|
|
102
|
+
var keyCode: CGKeyCode?
|
|
103
|
+
|
|
104
|
+
for key in keys {
|
|
105
|
+
let lower = key.lowercased()
|
|
106
|
+
switch lower {
|
|
107
|
+
case "cmd", "command", "meta":
|
|
108
|
+
modifiers.insert(.maskCommand)
|
|
109
|
+
case "shift":
|
|
110
|
+
modifiers.insert(.maskShift)
|
|
111
|
+
case "alt", "option":
|
|
112
|
+
modifiers.insert(.maskAlternate)
|
|
113
|
+
case "ctrl", "control":
|
|
114
|
+
modifiers.insert(.maskControl)
|
|
115
|
+
case "fn":
|
|
116
|
+
modifiers.insert(.maskSecondaryFn)
|
|
117
|
+
default:
|
|
118
|
+
keyCode = keyCodeForString(lower)
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
guard let code = keyCode else { return }
|
|
123
|
+
|
|
124
|
+
if let downEvent = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: true) {
|
|
125
|
+
downEvent.flags = modifiers
|
|
126
|
+
downEvent.post(tap: .cghidEventTap)
|
|
127
|
+
}
|
|
128
|
+
usleep(50_000)
|
|
129
|
+
if let upEvent = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: false) {
|
|
130
|
+
upEvent.flags = modifiers
|
|
131
|
+
upEvent.post(tap: .cghidEventTap)
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
func typeText(text: String) {
|
|
136
|
+
for char in text {
|
|
137
|
+
let str = String(char)
|
|
138
|
+
if let event = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: true) {
|
|
139
|
+
let chars = Array(str.utf16)
|
|
140
|
+
event.keyboardSetUnicodeString(stringLength: chars.count, unicodeString: chars)
|
|
141
|
+
event.post(tap: .cghidEventTap)
|
|
142
|
+
}
|
|
143
|
+
usleep(20_000) // 20ms between characters
|
|
144
|
+
if let event = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: false) {
|
|
145
|
+
event.post(tap: .cghidEventTap)
|
|
146
|
+
}
|
|
147
|
+
usleep(10_000)
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// MARK: - Screenshots
|
|
152
|
+
|
|
153
|
+
func captureScreen(region: [String: Double]?) throws -> [String: Any] {
|
|
154
|
+
let rect: CGRect
|
|
155
|
+
if let region = region {
|
|
156
|
+
rect = CGRect(
|
|
157
|
+
x: region["x"] ?? 0,
|
|
158
|
+
y: region["y"] ?? 0,
|
|
159
|
+
width: region["width"] ?? 0,
|
|
160
|
+
height: region["height"] ?? 0
|
|
161
|
+
)
|
|
162
|
+
} else {
|
|
163
|
+
rect = CGRect.infinite
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
guard let image = CGWindowListCreateImage(rect, .optionOnScreenOnly, kCGNullWindowID, .bestResolution) else {
|
|
167
|
+
throw BridgeError.general("Failed to capture screen")
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
let path = try saveImage(image)
|
|
171
|
+
return ["path": path, "width": image.width, "height": image.height]
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
func captureWindow(windowId: Int) throws -> [String: Any] {
|
|
175
|
+
guard let image = CGWindowListCreateImage(
|
|
176
|
+
.null, .optionIncludingWindow, CGWindowID(windowId), .bestResolution
|
|
177
|
+
) else {
|
|
178
|
+
throw BridgeError.general("Failed to capture window \(windowId)")
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
let path = try saveImage(image)
|
|
182
|
+
return ["path": path, "width": image.width, "height": image.height]
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
private func saveImage(_ image: CGImage) throws -> String {
|
|
186
|
+
let tempDir = FileManager.default.temporaryDirectory
|
|
187
|
+
let fileName = "bridge_screenshot_\(UUID().uuidString).png"
|
|
188
|
+
let fileURL = tempDir.appendingPathComponent(fileName)
|
|
189
|
+
|
|
190
|
+
guard let dest = CGImageDestinationCreateWithURL(fileURL as CFURL, "public.png" as CFString, 1, nil) else {
|
|
191
|
+
throw BridgeError.general("Failed to create image destination")
|
|
192
|
+
}
|
|
193
|
+
CGImageDestinationAddImage(dest, image, nil)
|
|
194
|
+
guard CGImageDestinationFinalize(dest) else {
|
|
195
|
+
throw BridgeError.general("Failed to write screenshot")
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return fileURL.path
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// MARK: - Key Code Mapping
|
|
202
|
+
|
|
203
|
+
private func mouseButtonTypes(button: String) -> (CGEventType, CGEventType) {
|
|
204
|
+
switch button.lowercased() {
|
|
205
|
+
case "right":
|
|
206
|
+
return (.rightMouseDown, .rightMouseUp)
|
|
207
|
+
case "other", "middle":
|
|
208
|
+
return (.otherMouseDown, .otherMouseUp)
|
|
209
|
+
default:
|
|
210
|
+
return (.leftMouseDown, .leftMouseUp)
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
private func mouseButton(_ button: String) -> CGMouseButton {
|
|
215
|
+
switch button.lowercased() {
|
|
216
|
+
case "right": return .right
|
|
217
|
+
case "other", "middle": return .center
|
|
218
|
+
default: return .left
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
private func keyCodeForString(_ key: String) -> CGKeyCode? {
|
|
223
|
+
let keyMap: [String: CGKeyCode] = [
|
|
224
|
+
"a": 0, "b": 11, "c": 8, "d": 2, "e": 14, "f": 3, "g": 5,
|
|
225
|
+
"h": 4, "i": 34, "j": 38, "k": 40, "l": 37, "m": 46, "n": 45,
|
|
226
|
+
"o": 31, "p": 35, "q": 12, "r": 15, "s": 1, "t": 17, "u": 32,
|
|
227
|
+
"v": 9, "w": 13, "x": 7, "y": 16, "z": 6,
|
|
228
|
+
"0": 29, "1": 18, "2": 19, "3": 20, "4": 21, "5": 23,
|
|
229
|
+
"6": 22, "7": 26, "8": 28, "9": 25,
|
|
230
|
+
"return": 36, "enter": 36, "tab": 48, "space": 49,
|
|
231
|
+
"delete": 51, "backspace": 51, "escape": 53, "esc": 53,
|
|
232
|
+
"up": 126, "down": 125, "left": 123, "right": 124,
|
|
233
|
+
"f1": 122, "f2": 120, "f3": 99, "f4": 118, "f5": 96,
|
|
234
|
+
"f6": 97, "f7": 98, "f8": 100, "f9": 101, "f10": 109,
|
|
235
|
+
"f11": 103, "f12": 111,
|
|
236
|
+
"home": 115, "end": 119, "pageup": 116, "pagedown": 121,
|
|
237
|
+
"-": 27, "=": 24, "[": 33, "]": 30, "\\": 42,
|
|
238
|
+
";": 41, "'": 39, ",": 43, ".": 47, "/": 44, "`": 50,
|
|
239
|
+
]
|
|
240
|
+
return keyMap[key]
|
|
241
|
+
}
|
|
242
|
+
}
|