desktop-pilot-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,107 @@
1
+ import Foundation
2
+
3
+ // MARK: - UI Element Types
4
+
5
+ /// A UI element in the accessibility tree.
6
+ struct PilotElement: Codable, Sendable {
7
+ let ref: String
8
+ let role: String
9
+ let title: String?
10
+ let value: String?
11
+ let description: String?
12
+ let enabled: Bool
13
+ let focused: Bool
14
+ let bounds: ElementBounds?
15
+ let children: [PilotElement]?
16
+ }
17
+
18
+ /// Screen position and size of a UI element.
19
+ struct ElementBounds: Codable, Sendable {
20
+ let x: Double
21
+ let y: Double
22
+ let width: Double
23
+ let height: Double
24
+ }
25
+
26
+ // MARK: - App Snapshot
27
+
28
+ /// Snapshot of an app's full UI tree at a point in time.
29
+ struct AppSnapshot: Codable, Sendable {
30
+ let app: String
31
+ let bundleID: String?
32
+ let pid: Int32
33
+ let timestamp: String
34
+ let elementCount: Int
35
+ let elements: [PilotElement]
36
+ }
37
+
38
+ // MARK: - Action Result
39
+
40
+ /// Result of performing an action on a UI element.
41
+ struct ActionResult: Codable, Sendable {
42
+ let success: Bool
43
+ let message: String
44
+ let ref: String?
45
+ }
46
+
47
+ // MARK: - App Info
48
+
49
+ /// Summary info about a running application.
50
+ struct AppInfo: Codable, Sendable {
51
+ let name: String
52
+ let bundleID: String?
53
+ let pid: Int32
54
+ let isScriptable: Bool
55
+ let windowCount: Int
56
+ }
57
+
58
+ // MARK: - Tool Input Types
59
+
60
+ struct SnapshotInput: Codable, Sendable {
61
+ let app: String?
62
+ let maxDepth: Int?
63
+ }
64
+
65
+ struct ClickInput: Codable, Sendable {
66
+ let ref: String
67
+ }
68
+
69
+ struct TypeInput: Codable, Sendable {
70
+ let ref: String
71
+ let text: String
72
+ }
73
+
74
+ struct ReadInput: Codable, Sendable {
75
+ let ref: String
76
+ }
77
+
78
+ struct FindInput: Codable, Sendable {
79
+ let role: String?
80
+ let title: String?
81
+ let value: String?
82
+ let app: String?
83
+ }
84
+
85
+ struct MenuInput: Codable, Sendable {
86
+ let path: String
87
+ let app: String?
88
+ }
89
+
90
+ struct ScriptInput: Codable, Sendable {
91
+ let app: String
92
+ let code: String
93
+ let language: String?
94
+ }
95
+
96
+ struct ScreenshotInput: Codable, Sendable {
97
+ let ref: String?
98
+ }
99
+
100
+ struct BatchAction: Codable, Sendable {
101
+ let tool: String
102
+ let params: [String: String]
103
+ }
104
+
105
+ struct BatchInput: Codable, Sendable {
106
+ let actions: [BatchAction]
107
+ }
@@ -0,0 +1,49 @@
1
+ import Foundation
2
+
3
+ // MARK: - Platform Errors
4
+
5
+ /// Errors that can occur during platform operations.
6
+ enum PlatformError: Error, Sendable {
7
+ case permissionDenied
8
+ case elementNotFound(ref: String)
9
+ case actionFailed(action: String, reason: String)
10
+ case attributeReadFailed(attribute: String)
11
+ case appNotFound(name: String)
12
+ case menuNavigationFailed(path: [String])
13
+ case unsupported(description: String)
14
+ }
15
+
16
+ // MARK: - Platform Bridge Protocol
17
+
18
+ /// Protocol for platform-specific UI automation.
19
+ /// macOS implementation uses AXUIElement; future Windows implementation
20
+ /// would use UI Automation.
21
+ protocol PlatformBridge: Sendable {
22
+
23
+ /// Check if accessibility permissions are granted.
24
+ func checkPermissions() -> Bool
25
+
26
+ /// Request accessibility permissions (shows system dialog).
27
+ func requestPermissions() -> Bool
28
+
29
+ /// Get the frontmost application info.
30
+ func getFrontmostApp() -> (name: String, bundleID: String?, pid: Int32)?
31
+
32
+ /// List all running applications with windows.
33
+ func listApps() -> [(name: String, bundleID: String?, pid: Int32, windowCount: Int)]
34
+
35
+ /// Get the accessibility tree for an app by PID.
36
+ func getAccessibilityTree(pid: Int32, maxDepth: Int) -> [PilotElement]
37
+
38
+ /// Perform click on element.
39
+ func clickElement(ref: String) throws
40
+
41
+ /// Type text into element.
42
+ func typeIntoElement(ref: String, text: String) throws
43
+
44
+ /// Read value from element.
45
+ func readElement(ref: String) throws -> String?
46
+
47
+ /// Navigate menu bar.
48
+ func navigateMenu(appPID: Int32, path: [String]) throws
49
+ }
@@ -0,0 +1,232 @@
1
+ import ApplicationServices
2
+ import AppKit
3
+ import Foundation
4
+
5
+ // MARK: - Sendable Wrapper
6
+
7
+ /// Wraps an AXUIElement so it can cross concurrency boundaries.
8
+ /// AXUIElement is a CFTypeRef (thread-safe by Apple's AX implementation)
9
+ /// but Swift 6 does not know that, so we use @unchecked Sendable.
10
+ public final class AXElementWrapper: @unchecked Sendable {
11
+ public let element: AXUIElement
12
+
13
+ public init(_ element: AXUIElement) {
14
+ self.element = element
15
+ }
16
+ }
17
+
18
+ // MARK: - AXBridge
19
+
20
+ /// Low-level wrapper around the macOS AXUIElement C API.
21
+ /// Every public method is pure — no shared mutable state — so the
22
+ /// type is safe to use from any isolation context.
23
+ public final class AXBridge: @unchecked Sendable {
24
+
25
+ public init() {}
26
+
27
+ // MARK: - Element Creation
28
+
29
+ /// Create the root accessibility element for a running app.
30
+ func appElement(pid: pid_t) -> AXUIElement {
31
+ return AXUIElementCreateApplication(pid)
32
+ }
33
+
34
+ // MARK: - Attribute Reading
35
+
36
+ /// Read a single attribute from an element.
37
+ func getAttribute(_ element: AXUIElement, _ attribute: String) -> CFTypeRef? {
38
+ var value: CFTypeRef?
39
+ let error = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
40
+ guard error == .success else { return nil }
41
+ return value
42
+ }
43
+
44
+ /// Read multiple attributes in one call (faster than individual reads).
45
+ func getAttributes(_ element: AXUIElement, _ attributes: [String]) -> [CFTypeRef?] {
46
+ let cfAttributes = attributes.map { $0 as CFString } as CFArray
47
+ var values: CFArray?
48
+ let error = AXUIElementCopyMultipleAttributeValues(
49
+ element,
50
+ cfAttributes,
51
+ AXCopyMultipleAttributeOptions(rawValue: 0),
52
+ &values
53
+ )
54
+ guard error == .success, let cfArray = values else {
55
+ return Array(repeating: nil, count: attributes.count)
56
+ }
57
+ let count = CFArrayGetCount(cfArray)
58
+ var result: [CFTypeRef?] = []
59
+ for i in 0..<count {
60
+ let raw = CFArrayGetValueAtIndex(cfArray, i)
61
+ if let raw = raw {
62
+ let ref = Unmanaged<CFTypeRef>.fromOpaque(raw).takeUnretainedValue()
63
+ // AXError sentinel values come back as kCFNull
64
+ if CFGetTypeID(ref) == CFNullGetTypeID() {
65
+ result.append(nil)
66
+ } else {
67
+ result.append(ref)
68
+ }
69
+ } else {
70
+ result.append(nil)
71
+ }
72
+ }
73
+ return result
74
+ }
75
+
76
+ // MARK: - Attribute Writing
77
+
78
+ /// Set an attribute value on an element.
79
+ func setAttribute(_ element: AXUIElement, _ attribute: String, _ value: CFTypeRef) -> Bool {
80
+ let error = AXUIElementSetAttributeValue(element, attribute as CFString, value)
81
+ return error == .success
82
+ }
83
+
84
+ // MARK: - Actions
85
+
86
+ /// Perform a named action on an element (e.g. kAXPressAction).
87
+ func performAction(_ element: AXUIElement, _ action: String) -> Bool {
88
+ let error = AXUIElementPerformAction(element, action as CFString)
89
+ return error == .success
90
+ }
91
+
92
+ // MARK: - Convenience Readers
93
+
94
+ /// Get children of an element.
95
+ func getChildren(_ element: AXUIElement) -> [AXUIElement] {
96
+ guard let value = getAttribute(element, kAXChildrenAttribute) else { return [] }
97
+ guard let children = value as? [AXUIElement] else { return [] }
98
+ return children
99
+ }
100
+
101
+ /// Get the role of an element (e.g. "AXButton").
102
+ func getRole(_ element: AXUIElement) -> String? {
103
+ guard let value = getAttribute(element, kAXRoleAttribute) else { return nil }
104
+ return value as? String
105
+ }
106
+
107
+ /// Get the title of an element.
108
+ func getTitle(_ element: AXUIElement) -> String? {
109
+ guard let value = getAttribute(element, kAXTitleAttribute) else { return nil }
110
+ return value as? String
111
+ }
112
+
113
+ /// Get the value of an element as a string.
114
+ func getValue(_ element: AXUIElement) -> String? {
115
+ guard let value = getAttribute(element, kAXValueAttribute) else { return nil }
116
+ if let str = value as? String { return str }
117
+ if let num = value as? NSNumber { return num.stringValue }
118
+ return String(describing: value)
119
+ }
120
+
121
+ /// Get the accessibility description of an element.
122
+ func getDescription(_ element: AXUIElement) -> String? {
123
+ guard let value = getAttribute(element, kAXDescriptionAttribute) else { return nil }
124
+ return value as? String
125
+ }
126
+
127
+ /// Check if element is enabled.
128
+ func isEnabled(_ element: AXUIElement) -> Bool {
129
+ guard let value = getAttribute(element, kAXEnabledAttribute) else { return true }
130
+ return (value as? Bool) ?? true
131
+ }
132
+
133
+ /// Check if element has keyboard focus.
134
+ func isFocused(_ element: AXUIElement) -> Bool {
135
+ guard let value = getAttribute(element, kAXFocusedAttribute) else { return false }
136
+ return (value as? Bool) ?? false
137
+ }
138
+
139
+ /// Get the bounding rectangle of an element in screen coordinates.
140
+ func getBounds(_ element: AXUIElement) -> ElementBounds? {
141
+ guard let posValue = getAttribute(element, kAXPositionAttribute) else { return nil }
142
+ var point = CGPoint.zero
143
+ // swiftlint:disable force_cast
144
+ guard AXValueGetValue(posValue as! AXValue, .cgPoint, &point) else { return nil }
145
+
146
+ guard let sizeValue = getAttribute(element, kAXSizeAttribute) else { return nil }
147
+ var size = CGSize.zero
148
+ guard AXValueGetValue(sizeValue as! AXValue, .cgSize, &size) else { return nil }
149
+ // swiftlint:enable force_cast
150
+
151
+ return ElementBounds(
152
+ x: Double(point.x),
153
+ y: Double(point.y),
154
+ width: Double(size.width),
155
+ height: Double(size.height)
156
+ )
157
+ }
158
+
159
+ // MARK: - App-level Queries
160
+
161
+ /// Get the currently focused UI element inside an app.
162
+ func getFocusedElement(_ appElement: AXUIElement) -> AXUIElement? {
163
+ guard let value = getAttribute(appElement, kAXFocusedUIElementAttribute) else { return nil }
164
+ // value is guaranteed to be AXUIElement when the attribute exists
165
+ return (value as! AXUIElement) // swiftlint:disable:this force_cast
166
+ }
167
+
168
+ /// Get all windows belonging to an app.
169
+ func getWindows(_ appElement: AXUIElement) -> [AXUIElement] {
170
+ guard let value = getAttribute(appElement, kAXWindowsAttribute) else { return [] }
171
+ guard let windows = value as? [AXUIElement] else { return [] }
172
+ return windows
173
+ }
174
+
175
+ /// Get the menu bar element for an app.
176
+ func getMenuBar(_ appElement: AXUIElement) -> AXUIElement? {
177
+ guard let value = getAttribute(appElement, kAXMenuBarAttribute) else { return nil }
178
+ return (value as! AXUIElement) // swiftlint:disable:this force_cast
179
+ }
180
+
181
+ // MARK: - Menu Navigation
182
+
183
+ /// Walk a menu path like ["File", "Save As..."] and press the final item.
184
+ func navigateMenu(_ appElement: AXUIElement, path: [String]) -> Bool {
185
+ guard let menuBar = getMenuBar(appElement), !path.isEmpty else { return false }
186
+
187
+ var current: AXUIElement = menuBar
188
+
189
+ for (index, menuName) in path.enumerated() {
190
+ let children = getChildren(current)
191
+ var matched = false
192
+
193
+ for child in children {
194
+ let title = getTitle(child)
195
+ guard title == menuName else { continue }
196
+
197
+ let isLast = index == path.count - 1
198
+ if isLast {
199
+ return performAction(child, kAXPressAction)
200
+ }
201
+
202
+ // Intermediate menu — drill into its submenu children
203
+ let subChildren = getChildren(child)
204
+ if let submenu = subChildren.first {
205
+ current = submenu
206
+ matched = true
207
+ break
208
+ }
209
+ }
210
+
211
+ if !matched {
212
+ return false
213
+ }
214
+ }
215
+
216
+ return false
217
+ }
218
+
219
+ // MARK: - Permissions
220
+
221
+ /// Check if the current process is trusted for accessibility.
222
+ public func isAccessibilityEnabled() -> Bool {
223
+ return AXIsProcessTrusted()
224
+ }
225
+
226
+ /// Prompt the user to grant accessibility access if not already trusted.
227
+ public func promptForAccessibility() -> Bool {
228
+ let key = "AXTrustedCheckOptionPrompt" as CFString
229
+ let options = [key: true] as CFDictionary
230
+ return AXIsProcessTrustedWithOptions(options)
231
+ }
232
+ }
@@ -0,0 +1,34 @@
1
+ import ApplicationServices
2
+ import Foundation
3
+
4
+ // MARK: - Permission Manager
5
+
6
+ /// Handles accessibility permission checking and requesting.
7
+ struct PermissionManager: Sendable {
8
+
9
+ /// Check if the current process has accessibility permissions.
10
+ static func isGranted() -> Bool {
11
+ return AXIsProcessTrusted()
12
+ }
13
+
14
+ /// Check permissions and optionally prompt the user via the system dialog.
15
+ /// Returns `true` if already granted; if not, shows the dialog and returns
16
+ /// the (likely still-false) current state — the user must toggle the setting
17
+ /// in System Settings and restart the process.
18
+ static func checkOrPrompt() -> Bool {
19
+ let key = "AXTrustedCheckOptionPrompt" as CFString
20
+ let options = [key: true] as CFDictionary
21
+ return AXIsProcessTrustedWithOptions(options)
22
+ }
23
+
24
+ /// Write a human-readable permission status to stderr.
25
+ static func logPermissionStatus() {
26
+ let granted = isGranted()
27
+ let message = granted
28
+ ? "[DesktopPilot] Accessibility permissions: GRANTED"
29
+ : "[DesktopPilot] Accessibility permissions: NOT GRANTED — open System Settings > Privacy & Security > Accessibility"
30
+ if let data = (message + "\n").data(using: .utf8) {
31
+ FileHandle.standardError.write(data)
32
+ }
33
+ }
34
+ }