desktop-pilot-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/Package.swift +38 -0
- package/README.md +462 -0
- package/Sources/DesktopPilot/Core/AppRegistry.swift +102 -0
- package/Sources/DesktopPilot/Core/ElementStore.swift +59 -0
- package/Sources/DesktopPilot/Core/Router.swift +242 -0
- package/Sources/DesktopPilot/Core/Snapshot.swift +192 -0
- package/Sources/DesktopPilot/Layers/AccessibilityLayer.swift +190 -0
- package/Sources/DesktopPilot/Layers/AppleScriptLayer.swift +462 -0
- package/Sources/DesktopPilot/Layers/CGEventLayer.swift +318 -0
- package/Sources/DesktopPilot/Layers/LayerProtocol.swift +40 -0
- package/Sources/DesktopPilot/Layers/ScreenshotLayer.swift +122 -0
- package/Sources/DesktopPilot/MCP/Server.swift +536 -0
- package/Sources/DesktopPilot/MCP/Tools.swift +772 -0
- package/Sources/DesktopPilot/MCP/Types.swift +107 -0
- package/Sources/DesktopPilot/Platform/PlatformProtocol.swift +49 -0
- package/Sources/DesktopPilot/Platform/macOS/AXBridge.swift +232 -0
- package/Sources/DesktopPilot/Platform/macOS/Permissions.swift +34 -0
- package/Sources/DesktopPilot/Platform/macOS/SystemEvents.swift +323 -0
- package/Sources/DesktopPilot/main.swift +19 -0
- package/Sources/DesktopPilotCLI/main.swift +19 -0
- package/Tests/DesktopPilotTests/DesktopPilotTests.swift +290 -0
- package/bin/cli.js +61 -0
- package/package.json +52 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
// MARK: - UI Element Types
|
|
4
|
+
|
|
5
|
+
/// A UI element in the accessibility tree.
|
|
6
|
+
struct PilotElement: Codable, Sendable {
|
|
7
|
+
let ref: String
|
|
8
|
+
let role: String
|
|
9
|
+
let title: String?
|
|
10
|
+
let value: String?
|
|
11
|
+
let description: String?
|
|
12
|
+
let enabled: Bool
|
|
13
|
+
let focused: Bool
|
|
14
|
+
let bounds: ElementBounds?
|
|
15
|
+
let children: [PilotElement]?
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/// Screen position and size of a UI element.
|
|
19
|
+
struct ElementBounds: Codable, Sendable {
|
|
20
|
+
let x: Double
|
|
21
|
+
let y: Double
|
|
22
|
+
let width: Double
|
|
23
|
+
let height: Double
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// MARK: - App Snapshot
|
|
27
|
+
|
|
28
|
+
/// Snapshot of an app's full UI tree at a point in time.
|
|
29
|
+
struct AppSnapshot: Codable, Sendable {
|
|
30
|
+
let app: String
|
|
31
|
+
let bundleID: String?
|
|
32
|
+
let pid: Int32
|
|
33
|
+
let timestamp: String
|
|
34
|
+
let elementCount: Int
|
|
35
|
+
let elements: [PilotElement]
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// MARK: - Action Result
|
|
39
|
+
|
|
40
|
+
/// Result of performing an action on a UI element.
|
|
41
|
+
struct ActionResult: Codable, Sendable {
|
|
42
|
+
let success: Bool
|
|
43
|
+
let message: String
|
|
44
|
+
let ref: String?
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// MARK: - App Info
|
|
48
|
+
|
|
49
|
+
/// Summary info about a running application.
|
|
50
|
+
struct AppInfo: Codable, Sendable {
|
|
51
|
+
let name: String
|
|
52
|
+
let bundleID: String?
|
|
53
|
+
let pid: Int32
|
|
54
|
+
let isScriptable: Bool
|
|
55
|
+
let windowCount: Int
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// MARK: - Tool Input Types
|
|
59
|
+
|
|
60
|
+
struct SnapshotInput: Codable, Sendable {
|
|
61
|
+
let app: String?
|
|
62
|
+
let maxDepth: Int?
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
struct ClickInput: Codable, Sendable {
|
|
66
|
+
let ref: String
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
struct TypeInput: Codable, Sendable {
|
|
70
|
+
let ref: String
|
|
71
|
+
let text: String
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
struct ReadInput: Codable, Sendable {
|
|
75
|
+
let ref: String
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
struct FindInput: Codable, Sendable {
|
|
79
|
+
let role: String?
|
|
80
|
+
let title: String?
|
|
81
|
+
let value: String?
|
|
82
|
+
let app: String?
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
struct MenuInput: Codable, Sendable {
|
|
86
|
+
let path: String
|
|
87
|
+
let app: String?
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
struct ScriptInput: Codable, Sendable {
|
|
91
|
+
let app: String
|
|
92
|
+
let code: String
|
|
93
|
+
let language: String?
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
struct ScreenshotInput: Codable, Sendable {
|
|
97
|
+
let ref: String?
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
struct BatchAction: Codable, Sendable {
|
|
101
|
+
let tool: String
|
|
102
|
+
let params: [String: String]
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
struct BatchInput: Codable, Sendable {
|
|
106
|
+
let actions: [BatchAction]
|
|
107
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
// MARK: - Platform Errors
|
|
4
|
+
|
|
5
|
+
/// Errors that can occur during platform operations.
|
|
6
|
+
enum PlatformError: Error, Sendable {
|
|
7
|
+
case permissionDenied
|
|
8
|
+
case elementNotFound(ref: String)
|
|
9
|
+
case actionFailed(action: String, reason: String)
|
|
10
|
+
case attributeReadFailed(attribute: String)
|
|
11
|
+
case appNotFound(name: String)
|
|
12
|
+
case menuNavigationFailed(path: [String])
|
|
13
|
+
case unsupported(description: String)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// MARK: - Platform Bridge Protocol
|
|
17
|
+
|
|
18
|
+
/// Protocol for platform-specific UI automation.
|
|
19
|
+
/// macOS implementation uses AXUIElement; future Windows implementation
|
|
20
|
+
/// would use UI Automation.
|
|
21
|
+
protocol PlatformBridge: Sendable {
|
|
22
|
+
|
|
23
|
+
/// Check if accessibility permissions are granted.
|
|
24
|
+
func checkPermissions() -> Bool
|
|
25
|
+
|
|
26
|
+
/// Request accessibility permissions (shows system dialog).
|
|
27
|
+
func requestPermissions() -> Bool
|
|
28
|
+
|
|
29
|
+
/// Get the frontmost application info.
|
|
30
|
+
func getFrontmostApp() -> (name: String, bundleID: String?, pid: Int32)?
|
|
31
|
+
|
|
32
|
+
/// List all running applications with windows.
|
|
33
|
+
func listApps() -> [(name: String, bundleID: String?, pid: Int32, windowCount: Int)]
|
|
34
|
+
|
|
35
|
+
/// Get the accessibility tree for an app by PID.
|
|
36
|
+
func getAccessibilityTree(pid: Int32, maxDepth: Int) -> [PilotElement]
|
|
37
|
+
|
|
38
|
+
/// Perform click on element.
|
|
39
|
+
func clickElement(ref: String) throws
|
|
40
|
+
|
|
41
|
+
/// Type text into element.
|
|
42
|
+
func typeIntoElement(ref: String, text: String) throws
|
|
43
|
+
|
|
44
|
+
/// Read value from element.
|
|
45
|
+
func readElement(ref: String) throws -> String?
|
|
46
|
+
|
|
47
|
+
/// Navigate menu bar.
|
|
48
|
+
func navigateMenu(appPID: Int32, path: [String]) throws
|
|
49
|
+
}
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import ApplicationServices
|
|
2
|
+
import AppKit
|
|
3
|
+
import Foundation
|
|
4
|
+
|
|
5
|
+
// MARK: - Sendable Wrapper
|
|
6
|
+
|
|
7
|
+
/// Wraps an AXUIElement so it can cross concurrency boundaries.
|
|
8
|
+
/// AXUIElement is a CFTypeRef (thread-safe by Apple's AX implementation)
|
|
9
|
+
/// but Swift 6 does not know that, so we use @unchecked Sendable.
|
|
10
|
+
public final class AXElementWrapper: @unchecked Sendable {
|
|
11
|
+
public let element: AXUIElement
|
|
12
|
+
|
|
13
|
+
public init(_ element: AXUIElement) {
|
|
14
|
+
self.element = element
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// MARK: - AXBridge
|
|
19
|
+
|
|
20
|
+
/// Low-level wrapper around the macOS AXUIElement C API.
|
|
21
|
+
/// Every public method is pure — no shared mutable state — so the
|
|
22
|
+
/// type is safe to use from any isolation context.
|
|
23
|
+
public final class AXBridge: @unchecked Sendable {
|
|
24
|
+
|
|
25
|
+
public init() {}
|
|
26
|
+
|
|
27
|
+
// MARK: - Element Creation
|
|
28
|
+
|
|
29
|
+
/// Create the root accessibility element for a running app.
|
|
30
|
+
func appElement(pid: pid_t) -> AXUIElement {
|
|
31
|
+
return AXUIElementCreateApplication(pid)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// MARK: - Attribute Reading
|
|
35
|
+
|
|
36
|
+
/// Read a single attribute from an element.
|
|
37
|
+
func getAttribute(_ element: AXUIElement, _ attribute: String) -> CFTypeRef? {
|
|
38
|
+
var value: CFTypeRef?
|
|
39
|
+
let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
|
|
40
|
+
guard error == .success else { return nil }
|
|
41
|
+
return value
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/// Read multiple attributes in one call (faster than individual reads).
|
|
45
|
+
func getAttributes(_ element: AXUIElement, _ attributes: [String]) -> [CFTypeRef?] {
|
|
46
|
+
let cfAttributes = attributes.map { $0 as CFString } as CFArray
|
|
47
|
+
var values: CFArray?
|
|
48
|
+
let error = AXUIElementCopyMultipleAttributeValues(
|
|
49
|
+
element,
|
|
50
|
+
cfAttributes,
|
|
51
|
+
AXCopyMultipleAttributeOptions(rawValue: 0),
|
|
52
|
+
&values
|
|
53
|
+
)
|
|
54
|
+
guard error == .success, let cfArray = values else {
|
|
55
|
+
return Array(repeating: nil, count: attributes.count)
|
|
56
|
+
}
|
|
57
|
+
let count = CFArrayGetCount(cfArray)
|
|
58
|
+
var result: [CFTypeRef?] = []
|
|
59
|
+
for i in 0..<count {
|
|
60
|
+
let raw = CFArrayGetValueAtIndex(cfArray, i)
|
|
61
|
+
if let raw = raw {
|
|
62
|
+
let ref = Unmanaged<CFTypeRef>.fromOpaque(raw).takeUnretainedValue()
|
|
63
|
+
// AXError sentinel values come back as kCFNull
|
|
64
|
+
if CFGetTypeID(ref) == CFNullGetTypeID() {
|
|
65
|
+
result.append(nil)
|
|
66
|
+
} else {
|
|
67
|
+
result.append(ref)
|
|
68
|
+
}
|
|
69
|
+
} else {
|
|
70
|
+
result.append(nil)
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return result
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// MARK: - Attribute Writing
|
|
77
|
+
|
|
78
|
+
/// Set an attribute value on an element.
|
|
79
|
+
func setAttribute(_ element: AXUIElement, _ attribute: String, _ value: CFTypeRef) -> Bool {
|
|
80
|
+
let error = AXUIElementSetAttributeValue(element, attribute as CFString, value)
|
|
81
|
+
return error == .success
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// MARK: - Actions
|
|
85
|
+
|
|
86
|
+
/// Perform a named action on an element (e.g. kAXPressAction).
|
|
87
|
+
func performAction(_ element: AXUIElement, _ action: String) -> Bool {
|
|
88
|
+
let error = AXUIElementPerformAction(element, action as CFString)
|
|
89
|
+
return error == .success
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// MARK: - Convenience Readers
|
|
93
|
+
|
|
94
|
+
/// Get children of an element.
|
|
95
|
+
func getChildren(_ element: AXUIElement) -> [AXUIElement] {
|
|
96
|
+
guard let value = getAttribute(element, kAXChildrenAttribute) else { return [] }
|
|
97
|
+
guard let children = value as? [AXUIElement] else { return [] }
|
|
98
|
+
return children
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/// Get the role of an element (e.g. "AXButton").
|
|
102
|
+
func getRole(_ element: AXUIElement) -> String? {
|
|
103
|
+
guard let value = getAttribute(element, kAXRoleAttribute) else { return nil }
|
|
104
|
+
return value as? String
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/// Get the title of an element.
|
|
108
|
+
func getTitle(_ element: AXUIElement) -> String? {
|
|
109
|
+
guard let value = getAttribute(element, kAXTitleAttribute) else { return nil }
|
|
110
|
+
return value as? String
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/// Get the value of an element as a string.
|
|
114
|
+
func getValue(_ element: AXUIElement) -> String? {
|
|
115
|
+
guard let value = getAttribute(element, kAXValueAttribute) else { return nil }
|
|
116
|
+
if let str = value as? String { return str }
|
|
117
|
+
if let num = value as? NSNumber { return num.stringValue }
|
|
118
|
+
return String(describing: value)
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/// Get the accessibility description of an element.
|
|
122
|
+
func getDescription(_ element: AXUIElement) -> String? {
|
|
123
|
+
guard let value = getAttribute(element, kAXDescriptionAttribute) else { return nil }
|
|
124
|
+
return value as? String
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/// Check if element is enabled.
|
|
128
|
+
func isEnabled(_ element: AXUIElement) -> Bool {
|
|
129
|
+
guard let value = getAttribute(element, kAXEnabledAttribute) else { return true }
|
|
130
|
+
return (value as? Bool) ?? true
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/// Check if element has keyboard focus.
|
|
134
|
+
func isFocused(_ element: AXUIElement) -> Bool {
|
|
135
|
+
guard let value = getAttribute(element, kAXFocusedAttribute) else { return false }
|
|
136
|
+
return (value as? Bool) ?? false
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/// Get the bounding rectangle of an element in screen coordinates.
|
|
140
|
+
func getBounds(_ element: AXUIElement) -> ElementBounds? {
|
|
141
|
+
guard let posValue = getAttribute(element, kAXPositionAttribute) else { return nil }
|
|
142
|
+
var point = CGPoint.zero
|
|
143
|
+
// swiftlint:disable force_cast
|
|
144
|
+
guard AXValueGetValue(posValue as! AXValue, .cgPoint, &point) else { return nil }
|
|
145
|
+
|
|
146
|
+
guard let sizeValue = getAttribute(element, kAXSizeAttribute) else { return nil }
|
|
147
|
+
var size = CGSize.zero
|
|
148
|
+
guard AXValueGetValue(sizeValue as! AXValue, .cgSize, &size) else { return nil }
|
|
149
|
+
// swiftlint:enable force_cast
|
|
150
|
+
|
|
151
|
+
return ElementBounds(
|
|
152
|
+
x: Double(point.x),
|
|
153
|
+
y: Double(point.y),
|
|
154
|
+
width: Double(size.width),
|
|
155
|
+
height: Double(size.height)
|
|
156
|
+
)
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// MARK: - App-level Queries
|
|
160
|
+
|
|
161
|
+
/// Get the currently focused UI element inside an app.
|
|
162
|
+
func getFocusedElement(_ appElement: AXUIElement) -> AXUIElement? {
|
|
163
|
+
guard let value = getAttribute(appElement, kAXFocusedUIElementAttribute) else { return nil }
|
|
164
|
+
// value is guaranteed to be AXUIElement when the attribute exists
|
|
165
|
+
return (value as! AXUIElement) // swiftlint:disable:this force_cast
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/// Get all windows belonging to an app.
|
|
169
|
+
func getWindows(_ appElement: AXUIElement) -> [AXUIElement] {
|
|
170
|
+
guard let value = getAttribute(appElement, kAXWindowsAttribute) else { return [] }
|
|
171
|
+
guard let windows = value as? [AXUIElement] else { return [] }
|
|
172
|
+
return windows
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/// Get the menu bar element for an app.
|
|
176
|
+
func getMenuBar(_ appElement: AXUIElement) -> AXUIElement? {
|
|
177
|
+
guard let value = getAttribute(appElement, kAXMenuBarAttribute) else { return nil }
|
|
178
|
+
return (value as! AXUIElement) // swiftlint:disable:this force_cast
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// MARK: - Menu Navigation
|
|
182
|
+
|
|
183
|
+
/// Walk a menu path like ["File", "Save As..."] and press the final item.
|
|
184
|
+
func navigateMenu(_ appElement: AXUIElement, path: [String]) -> Bool {
|
|
185
|
+
guard let menuBar = getMenuBar(appElement), !path.isEmpty else { return false }
|
|
186
|
+
|
|
187
|
+
var current: AXUIElement = menuBar
|
|
188
|
+
|
|
189
|
+
for (index, menuName) in path.enumerated() {
|
|
190
|
+
let children = getChildren(current)
|
|
191
|
+
var matched = false
|
|
192
|
+
|
|
193
|
+
for child in children {
|
|
194
|
+
let title = getTitle(child)
|
|
195
|
+
guard title == menuName else { continue }
|
|
196
|
+
|
|
197
|
+
let isLast = index == path.count - 1
|
|
198
|
+
if isLast {
|
|
199
|
+
return performAction(child, kAXPressAction)
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Intermediate menu — drill into its submenu children
|
|
203
|
+
let subChildren = getChildren(child)
|
|
204
|
+
if let submenu = subChildren.first {
|
|
205
|
+
current = submenu
|
|
206
|
+
matched = true
|
|
207
|
+
break
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if !matched {
|
|
212
|
+
return false
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return false
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// MARK: - Permissions
|
|
220
|
+
|
|
221
|
+
/// Check if the current process is trusted for accessibility.
|
|
222
|
+
public func isAccessibilityEnabled() -> Bool {
|
|
223
|
+
return AXIsProcessTrusted()
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/// Prompt the user to grant accessibility access if not already trusted.
|
|
227
|
+
public func promptForAccessibility() -> Bool {
|
|
228
|
+
let key = "AXTrustedCheckOptionPrompt" as CFString
|
|
229
|
+
let options = [key: true] as CFDictionary
|
|
230
|
+
return AXIsProcessTrustedWithOptions(options)
|
|
231
|
+
}
|
|
232
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import ApplicationServices
|
|
2
|
+
import Foundation
|
|
3
|
+
|
|
4
|
+
// MARK: - Permission Manager
|
|
5
|
+
|
|
6
|
+
/// Handles accessibility permission checking and requesting.
|
|
7
|
+
struct PermissionManager: Sendable {
|
|
8
|
+
|
|
9
|
+
/// Check if the current process has accessibility permissions.
|
|
10
|
+
static func isGranted() -> Bool {
|
|
11
|
+
return AXIsProcessTrusted()
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/// Check permissions and optionally prompt the user via the system dialog.
|
|
15
|
+
/// Returns `true` if already granted; if not, shows the dialog and returns
|
|
16
|
+
/// the (likely still-false) current state — the user must toggle the setting
|
|
17
|
+
/// in System Settings and restart the process.
|
|
18
|
+
static func checkOrPrompt() -> Bool {
|
|
19
|
+
let key = "AXTrustedCheckOptionPrompt" as CFString
|
|
20
|
+
let options = [key: true] as CFDictionary
|
|
21
|
+
return AXIsProcessTrustedWithOptions(options)
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/// Write a human-readable permission status to stderr.
|
|
25
|
+
static func logPermissionStatus() {
|
|
26
|
+
let granted = isGranted()
|
|
27
|
+
let message = granted
|
|
28
|
+
? "[DesktopPilot] Accessibility permissions: GRANTED"
|
|
29
|
+
: "[DesktopPilot] Accessibility permissions: NOT GRANTED — open System Settings > Privacy & Security > Accessibility"
|
|
30
|
+
if let data = (message + "\n").data(using: .utf8) {
|
|
31
|
+
FileHandle.standardError.write(data)
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|