desktop-pilot-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,323 @@
1
+ import Foundation
2
+
3
+ // MARK: - System Events Error
4
+
5
+ /// Errors from AppleScript/System Events execution.
6
+ enum SystemEventsError: Error, LocalizedError, Sendable {
7
+ case scriptFailed(String)
8
+ case appNotFound(String)
9
+ case elementNotFound(String)
10
+
11
+ var errorDescription: String? {
12
+ switch self {
13
+ case .scriptFailed(let msg):
14
+ return "AppleScript error: \(msg)"
15
+ case .appNotFound(let name):
16
+ return "App not found: \(name)"
17
+ case .elementNotFound(let desc):
18
+ return "Element not found: \(desc)"
19
+ }
20
+ }
21
+ }
22
+
23
+ // MARK: - System Events Helper
24
+
25
+ /// Helper for executing AppleScript/JXA and System Events commands.
26
+ ///
27
+ /// System Events is the universal UI automation bridge -- it works
28
+ /// with ANY app, not just scriptable ones. This helper wraps common
29
+ /// operations: running scripts, clicking elements, typing text,
30
+ /// pressing keys, and inspecting UI trees.
31
+ struct SystemEventsHelper: Sendable {
32
+
33
+ // MARK: - Script Execution
34
+
35
+ /// Execute AppleScript code and return the result.
36
+ func runAppleScript(_ code: String) -> Result<String, Error> {
37
+ let process = Process()
38
+ let stdout = Pipe()
39
+ let stderr = Pipe()
40
+
41
+ process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript")
42
+ process.arguments = ["-e", code]
43
+ process.standardOutput = stdout
44
+ process.standardError = stderr
45
+
46
+ do {
47
+ try process.run()
48
+ process.waitUntilExit()
49
+
50
+ let outData = stdout.fileHandleForReading.readDataToEndOfFile()
51
+ let errData = stderr.fileHandleForReading.readDataToEndOfFile()
52
+ let output = String(data: outData, encoding: .utf8)?
53
+ .trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
54
+ let errorOutput = String(data: errData, encoding: .utf8)?
55
+ .trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
56
+
57
+ if process.terminationStatus == 0 {
58
+ return .success(output)
59
+ } else {
60
+ return .failure(SystemEventsError.scriptFailed(errorOutput))
61
+ }
62
+ } catch {
63
+ return .failure(error)
64
+ }
65
+ }
66
+
67
+ /// Execute JXA (JavaScript for Automation) code.
68
+ func runJXA(_ code: String) -> Result<String, Error> {
69
+ let process = Process()
70
+ let stdout = Pipe()
71
+ let stderr = Pipe()
72
+
73
+ process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript")
74
+ process.arguments = ["-l", "JavaScript", "-e", code]
75
+ process.standardOutput = stdout
76
+ process.standardError = stderr
77
+
78
+ do {
79
+ try process.run()
80
+ process.waitUntilExit()
81
+
82
+ let outData = stdout.fileHandleForReading.readDataToEndOfFile()
83
+ let errData = stderr.fileHandleForReading.readDataToEndOfFile()
84
+ let output = String(data: outData, encoding: .utf8)?
85
+ .trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
86
+ let errorOutput = String(data: errData, encoding: .utf8)?
87
+ .trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
88
+
89
+ if process.terminationStatus == 0 {
90
+ return .success(output)
91
+ } else {
92
+ return .failure(SystemEventsError.scriptFailed(errorOutput))
93
+ }
94
+ } catch {
95
+ return .failure(error)
96
+ }
97
+ }
98
+
99
+ // MARK: - Scriptability Detection
100
+
101
+ /// Check if an app has an AppleScript dictionary (is natively scriptable).
102
+ ///
103
+ /// Uses `sdef` to probe for a scripting definition file. Apps with an sdef
104
+ /// support direct `tell application` commands beyond System Events.
105
+ func isScriptable(appName: String) -> Bool {
106
+ guard let appPath = findAppPath(appName) else { return false }
107
+
108
+ let process = Process()
109
+ process.executableURL = URL(fileURLWithPath: "/usr/bin/sdef")
110
+ process.arguments = [appPath]
111
+ process.standardOutput = Pipe()
112
+ process.standardError = Pipe()
113
+
114
+ do {
115
+ try process.run()
116
+ process.waitUntilExit()
117
+ return process.terminationStatus == 0
118
+ } catch {
119
+ return false
120
+ }
121
+ }
122
+
123
+ /// Find the filesystem path to an app by its display name.
124
+ ///
125
+ /// Uses Spotlight (`mdfind`) to locate the .app bundle.
126
+ func findAppPath(_ appName: String) -> String? {
127
+ let process = Process()
128
+ let pipe = Pipe()
129
+
130
+ process.executableURL = URL(fileURLWithPath: "/usr/bin/mdfind")
131
+ process.arguments = [
132
+ "kMDItemDisplayName == '\(appName)' && kMDItemContentType == 'com.apple.application-bundle'"
133
+ ]
134
+ process.standardOutput = pipe
135
+ process.standardError = Pipe()
136
+
137
+ do {
138
+ try process.run()
139
+ process.waitUntilExit()
140
+ let data = pipe.fileHandleForReading.readDataToEndOfFile()
141
+ let output = String(data: data, encoding: .utf8)?
142
+ .trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
143
+ return output.components(separatedBy: "\n").first(where: { !$0.isEmpty })
144
+ } catch {
145
+ return nil
146
+ }
147
+ }
148
+
149
+ // MARK: - System Events UI Actions
150
+
151
+ /// Click a UI element via System Events using its description/path.
152
+ ///
153
+ /// - Parameters:
154
+ /// - appName: The process name of the target application.
155
+ /// - elementDescription: AppleScript element reference, e.g.
156
+ /// `button "OK" of window 1`.
157
+ func clickElement(
158
+ appName: String,
159
+ elementDescription: String
160
+ ) -> Result<String, Error> {
161
+ let script = """
162
+ tell application "System Events"
163
+ tell process "\(escapeForAppleScript(appName))"
164
+ click \(elementDescription)
165
+ end tell
166
+ end tell
167
+ """
168
+ return runAppleScript(script)
169
+ }
170
+
171
+ /// Get the UI element tree for an app via System Events.
172
+ ///
173
+ /// Returns a text listing of every window and its contents
174
+ /// (class, name, description) suitable for LLM consumption.
175
+ func getUIElements(appName: String) -> Result<String, Error> {
176
+ let escaped = escapeForAppleScript(appName)
177
+ let script = """
178
+ tell application "System Events"
179
+ tell process "\(escaped)"
180
+ set windowList to every window
181
+ set resultText to ""
182
+ repeat with w in windowList
183
+ set resultText to resultText & "Window: " & (name of w) & linefeed
184
+ try
185
+ set uiElements to entire contents of w
186
+ repeat with elem in uiElements
187
+ set elemClass to class of elem as text
188
+ set elemName to ""
189
+ try
190
+ set elemName to name of elem
191
+ end try
192
+ set elemDesc to ""
193
+ try
194
+ set elemDesc to description of elem
195
+ end try
196
+ set resultText to resultText & " " & elemClass & ": " & elemName & " (" & elemDesc & ")" & linefeed
197
+ end repeat
198
+ end try
199
+ end repeat
200
+ return resultText
201
+ end tell
202
+ end tell
203
+ """
204
+ return runAppleScript(script)
205
+ }
206
+
207
+ /// Type text into the focused field of an app via System Events.
208
+ ///
209
+ /// Uses `keystroke` which simulates actual key presses, making it
210
+ /// more reliable than `AXSetValue` for some apps (e.g. Electron apps).
211
+ func typeText(
212
+ appName: String,
213
+ text: String
214
+ ) -> Result<String, Error> {
215
+ let escaped = escapeForAppleScript(text)
216
+ let script = """
217
+ tell application "System Events"
218
+ tell process "\(escapeForAppleScript(appName))"
219
+ keystroke "\(escaped)"
220
+ end tell
221
+ end tell
222
+ """
223
+ return runAppleScript(script)
224
+ }
225
+
226
+ /// Press a keyboard shortcut via System Events.
227
+ ///
228
+ /// - Parameters:
229
+ /// - appName: The process name of the target application.
230
+ /// - key: The key code (numeric) or key character to press.
231
+ /// - modifiers: Modifier names, e.g. `["command down", "shift down"]`.
232
+ func keyPress(
233
+ appName: String,
234
+ key: String,
235
+ modifiers: [String]
236
+ ) -> Result<String, Error> {
237
+ let modifierStr = modifiers.isEmpty
238
+ ? ""
239
+ : " using {\(modifiers.joined(separator: ", "))}"
240
+ let script = """
241
+ tell application "System Events"
242
+ tell process "\(escapeForAppleScript(appName))"
243
+ key code \(key)\(modifierStr)
244
+ end tell
245
+ end tell
246
+ """
247
+ return runAppleScript(script)
248
+ }
249
+
250
+ /// Press a keystroke with modifiers (character-based, not key code).
251
+ ///
252
+ /// Use this for shortcuts like Cmd+C, Cmd+V where you know the character.
253
+ func keystrokeWithModifiers(
254
+ appName: String,
255
+ character: String,
256
+ modifiers: [String]
257
+ ) -> Result<String, Error> {
258
+ let modifierStr = modifiers.isEmpty
259
+ ? ""
260
+ : " using {\(modifiers.joined(separator: ", "))}"
261
+ let script = """
262
+ tell application "System Events"
263
+ tell process "\(escapeForAppleScript(appName))"
264
+ keystroke "\(escapeForAppleScript(character))"\(modifierStr)
265
+ end tell
266
+ end tell
267
+ """
268
+ return runAppleScript(script)
269
+ }
270
+
271
+ // MARK: - App Queries
272
+
273
+ /// Check if an app process is currently running via System Events.
274
+ func isRunning(appName: String) -> Bool {
275
+ let script = """
276
+ tell application "System Events"
277
+ return (name of every process) contains "\(escapeForAppleScript(appName))"
278
+ end tell
279
+ """
280
+ switch runAppleScript(script) {
281
+ case .success(let result):
282
+ return result.lowercased() == "true"
283
+ case .failure:
284
+ return false
285
+ }
286
+ }
287
+
288
+ /// Get the frontmost status of an app via System Events.
289
+ func isFrontmost(appName: String) -> Bool {
290
+ let script = """
291
+ tell application "System Events"
292
+ tell process "\(escapeForAppleScript(appName))"
293
+ return frontmost
294
+ end tell
295
+ end tell
296
+ """
297
+ switch runAppleScript(script) {
298
+ case .success(let result):
299
+ return result.lowercased() == "true"
300
+ case .failure:
301
+ return false
302
+ }
303
+ }
304
+
305
+ /// Bring an app to the front via System Events.
306
+ func activate(appName: String) -> Result<String, Error> {
307
+ let script = """
308
+ tell application "System Events"
309
+ set frontmost of process "\(escapeForAppleScript(appName))" to true
310
+ end tell
311
+ """
312
+ return runAppleScript(script)
313
+ }
314
+
315
+ // MARK: - Private Helpers
316
+
317
+ /// Escape special characters for safe embedding in AppleScript strings.
318
+ private func escapeForAppleScript(_ input: String) -> String {
319
+ return input
320
+ .replacingOccurrences(of: "\\", with: "\\\\")
321
+ .replacingOccurrences(of: "\"", with: "\\\"")
322
+ }
323
+ }
@@ -0,0 +1,19 @@
1
+ import Foundation
2
+
3
+ let bridge = AXBridge()
4
+ let store = ElementStore()
5
+
6
+ // Check accessibility permissions on startup
7
+ if !bridge.isAccessibilityEnabled() {
8
+ Log.error(
9
+ "Accessibility permission not granted. "
10
+ + "Go to System Settings > Privacy & Security > Accessibility "
11
+ + "and add this application."
12
+ )
13
+ _ = bridge.promptForAccessibility()
14
+ }
15
+
16
+ let toolHandler = PilotToolHandler(bridge: bridge, store: store)
17
+ let server = MCPServer(toolHandler: toolHandler)
18
+
19
+ await server.run()
@@ -0,0 +1,19 @@
1
+ import DesktopPilot
2
+
3
+ let bridge = AXBridge()
4
+ let store = ElementStore()
5
+
6
+ // Check accessibility permissions on startup
7
+ if !bridge.isAccessibilityEnabled() {
8
+ Log.error(
9
+ "Accessibility permission not granted. "
10
+ + "Go to System Settings > Privacy & Security > Accessibility "
11
+ + "and add this application."
12
+ )
13
+ _ = bridge.promptForAccessibility()
14
+ }
15
+
16
+ let toolHandler = PilotToolHandler(bridge: bridge, store: store)
17
+ let server = MCPServer(toolHandler: toolHandler)
18
+
19
+ await server.run()
@@ -0,0 +1,290 @@
1
+ import XCTest
2
+ @testable import DesktopPilot
3
+
4
+ final class DesktopPilotTests: XCTestCase {
5
+
6
+ // MARK: - Types Tests
7
+
8
+ func testPilotElementEncoding() throws {
9
+ let element = PilotElement(
10
+ ref: "e1",
11
+ role: "AXButton",
12
+ title: "OK",
13
+ value: nil,
14
+ description: "Confirm button",
15
+ enabled: true,
16
+ focused: false,
17
+ bounds: ElementBounds(x: 100, y: 200, width: 80, height: 30),
18
+ children: nil
19
+ )
20
+
21
+ let encoder = JSONEncoder()
22
+ let data = try encoder.encode(element)
23
+ let decoded = try JSONDecoder().decode(PilotElement.self, from: data)
24
+
25
+ XCTAssertEqual(decoded.ref, "e1")
26
+ XCTAssertEqual(decoded.role, "AXButton")
27
+ XCTAssertEqual(decoded.title, "OK")
28
+ XCTAssertNil(decoded.value)
29
+ XCTAssertTrue(decoded.enabled)
30
+ XCTAssertFalse(decoded.focused)
31
+ XCTAssertEqual(decoded.bounds?.x, 100)
32
+ }
33
+
34
+ func testAppSnapshotEncoding() throws {
35
+ let snapshot = AppSnapshot(
36
+ app: "Finder",
37
+ bundleID: "com.apple.finder",
38
+ pid: 1234,
39
+ timestamp: "2024-01-01T00:00:00Z",
40
+ elementCount: 5,
41
+ elements: []
42
+ )
43
+
44
+ let data = try JSONEncoder().encode(snapshot)
45
+ let decoded = try JSONDecoder().decode(AppSnapshot.self, from: data)
46
+
47
+ XCTAssertEqual(decoded.app, "Finder")
48
+ XCTAssertEqual(decoded.bundleID, "com.apple.finder")
49
+ XCTAssertEqual(decoded.pid, 1234)
50
+ XCTAssertEqual(decoded.elementCount, 5)
51
+ }
52
+
53
+ func testAppInfoEncoding() throws {
54
+ let info = AppInfo(
55
+ name: "Safari",
56
+ bundleID: "com.apple.Safari",
57
+ pid: 5678,
58
+ isScriptable: true,
59
+ windowCount: 3
60
+ )
61
+
62
+ let data = try JSONEncoder().encode(info)
63
+ let decoded = try JSONDecoder().decode(AppInfo.self, from: data)
64
+
65
+ XCTAssertEqual(decoded.name, "Safari")
66
+ XCTAssertTrue(decoded.isScriptable)
67
+ XCTAssertEqual(decoded.windowCount, 3)
68
+ }
69
+
70
+ // MARK: - JSON Value Tests
71
+
72
+ func testJSONValueStringExtraction() {
73
+ let json: JSONValue = .object([
74
+ "name": .string("test"),
75
+ "count": .number(42)
76
+ ])
77
+
78
+ XCTAssertEqual(json.stringValue(forKey: "name"), "test")
79
+ XCTAssertNil(json.stringValue(forKey: "missing"))
80
+ XCTAssertEqual(json.intValue(forKey: "count"), 42)
81
+ }
82
+
83
+ func testJSONValueCodingRoundTrip() throws {
84
+ let original: JSONValue = .object([
85
+ "string": .string("hello"),
86
+ "number": .number(3.14),
87
+ "bool": .bool(true),
88
+ "null": .null,
89
+ "array": .array([.string("a"), .number(1)]),
90
+ "nested": .object(["key": .string("value")])
91
+ ])
92
+
93
+ let data = try JSONEncoder().encode(original)
94
+ let decoded = try JSONDecoder().decode(JSONValue.self, from: data)
95
+
96
+ XCTAssertEqual(decoded, original)
97
+ }
98
+
99
+ // MARK: - Element Store Tests
100
+
101
+ func testElementStoreBasics() async {
102
+ let store = ElementStore()
103
+
104
+ // Initially empty
105
+ let count = await store.count()
106
+ XCTAssertEqual(count, 0)
107
+
108
+ // Resolve unknown ref returns nil
109
+ let result = await store.resolve("e1")
110
+ XCTAssertNil(result)
111
+ }
112
+
113
+ func testElementStoreReset() async {
114
+ let store = ElementStore()
115
+ let count = await store.count()
116
+ XCTAssertEqual(count, 0)
117
+
118
+ await store.reset()
119
+ let countAfter = await store.count()
120
+ XCTAssertEqual(countAfter, 0)
121
+ }
122
+
123
+ // MARK: - Router Tests
124
+
125
+ func testRouterCategorization() {
126
+ let router = Router()
127
+
128
+ // Known scriptable
129
+ let finder = router.categorize(bundleID: "com.apple.finder", appName: "Finder")
130
+ XCTAssertEqual(finder, .scriptable)
131
+
132
+ // Known Electron
133
+ let discord = router.categorize(bundleID: "com.hnc.Discord", appName: "Discord")
134
+ XCTAssertEqual(discord, .electron)
135
+
136
+ // Unknown app
137
+ let unknown = router.categorize(bundleID: "com.example.unknown", appName: "SomeApp")
138
+ XCTAssertTrue(unknown == .unknown || unknown == .nativeStandard)
139
+ }
140
+
141
+ func testRouterSnapshotAlwaysAccessibility() {
142
+ let router = Router()
143
+
144
+ let method = router.bestMethodForAction(
145
+ action: "snapshot",
146
+ appName: "Finder",
147
+ bundleID: "com.apple.finder"
148
+ )
149
+ XCTAssertEqual(method, .accessibility)
150
+ }
151
+
152
+ func testRouterReadAlwaysAccessibility() {
153
+ let router = Router()
154
+
155
+ let method = router.bestMethodForAction(
156
+ action: "read",
157
+ appName: "Safari",
158
+ bundleID: "com.apple.Safari"
159
+ )
160
+ XCTAssertEqual(method, .accessibility)
161
+ }
162
+
163
+ func testRouterTypingPrefersCGEvent() {
164
+ let router = Router()
165
+
166
+ let method = router.bestMethodForAction(
167
+ action: "type",
168
+ appName: "Notes",
169
+ bundleID: "com.apple.Notes"
170
+ )
171
+ XCTAssertEqual(method, .cgevent)
172
+ }
173
+
174
+ func testRouterAllMethodsAvailable() {
175
+ let router = Router()
176
+
177
+ XCTAssertTrue(router.isAvailable(.accessibility))
178
+ XCTAssertTrue(router.isAvailable(.applescript))
179
+ XCTAssertTrue(router.isAvailable(.cgevent))
180
+ XCTAssertTrue(router.isAvailable(.screenshot))
181
+ }
182
+
183
+ // MARK: - App Registry Tests
184
+
185
+ func testAppRegistryListApps() {
186
+ let registry = AppRegistry()
187
+ let apps = registry.listApps()
188
+
189
+ // Should always find at least a few running apps
190
+ XCTAssertGreaterThan(apps.count, 0)
191
+
192
+ // Each app should have a name
193
+ for app in apps {
194
+ XCTAssertFalse(app.name.isEmpty)
195
+ XCTAssertGreaterThan(app.pid, 0)
196
+ }
197
+ }
198
+
199
+ func testAppRegistryFrontmostApp() {
200
+ let registry = AppRegistry()
201
+ let frontmost = registry.frontmostApp()
202
+
203
+ // Should always have a frontmost app
204
+ XCTAssertNotNil(frontmost)
205
+ XCTAssertFalse(frontmost!.name.isEmpty)
206
+ }
207
+
208
+ // MARK: - MCP Protocol Tests
209
+
210
+ func testMCPToolResultSuccess() {
211
+ let result = MCPToolResult.success("hello")
212
+ XCTAssertFalse(result.isError)
213
+ XCTAssertEqual(result.content.count, 1)
214
+ XCTAssertEqual(result.content[0].text, "hello")
215
+ }
216
+
217
+ func testMCPToolResultError() {
218
+ let result = MCPToolResult.error("something failed")
219
+ XCTAssertTrue(result.isError)
220
+ XCTAssertEqual(result.content[0].text, "something failed")
221
+ }
222
+
223
+ func testMCPContentText() {
224
+ let content = MCPContent.text("test")
225
+ XCTAssertEqual(content.type, "text")
226
+ XCTAssertEqual(content.text, "test")
227
+ XCTAssertNil(content.data)
228
+ }
229
+
230
+ func testMCPContentImage() {
231
+ let content = MCPContent.image(base64: "abc123", mimeType: "image/png")
232
+ XCTAssertEqual(content.type, "image")
233
+ XCTAssertNil(content.text)
234
+ XCTAssertEqual(content.data, "abc123")
235
+ XCTAssertEqual(content.mimeType, "image/png")
236
+ }
237
+
238
+ // MARK: - Tool Handler Tests
239
+
240
+ func testToolHandlerListsAllTools() {
241
+ let bridge = AXBridge()
242
+ let store = ElementStore()
243
+ let handler = PilotToolHandler(bridge: bridge, store: store)
244
+
245
+ let tools = handler.listTools()
246
+ XCTAssertEqual(tools.count, 10)
247
+
248
+ let names = tools.map { $0.name }
249
+ XCTAssertTrue(names.contains("pilot_snapshot"))
250
+ XCTAssertTrue(names.contains("pilot_click"))
251
+ XCTAssertTrue(names.contains("pilot_type"))
252
+ XCTAssertTrue(names.contains("pilot_read"))
253
+ XCTAssertTrue(names.contains("pilot_find"))
254
+ XCTAssertTrue(names.contains("pilot_list_apps"))
255
+ XCTAssertTrue(names.contains("pilot_menu"))
256
+ XCTAssertTrue(names.contains("pilot_script"))
257
+ XCTAssertTrue(names.contains("pilot_screenshot"))
258
+ XCTAssertTrue(names.contains("pilot_batch"))
259
+ }
260
+
261
+ func testToolHandlerUnknownTool() async throws {
262
+ let bridge = AXBridge()
263
+ let store = ElementStore()
264
+ let handler = PilotToolHandler(bridge: bridge, store: store)
265
+
266
+ let result = try await handler.callTool(name: "nonexistent", arguments: nil)
267
+ XCTAssertTrue(result.isError)
268
+ }
269
+
270
+ func testToolHandlerClickMissingRef() async throws {
271
+ let bridge = AXBridge()
272
+ let store = ElementStore()
273
+ let handler = PilotToolHandler(bridge: bridge, store: store)
274
+
275
+ let result = try await handler.callTool(name: "pilot_click", arguments: nil)
276
+ XCTAssertTrue(result.isError)
277
+ XCTAssertTrue(result.content[0].text?.contains("Missing") ?? false)
278
+ }
279
+
280
+ func testToolHandlerClickUnknownRef() async throws {
281
+ let bridge = AXBridge()
282
+ let store = ElementStore()
283
+ let handler = PilotToolHandler(bridge: bridge, store: store)
284
+
285
+ let args: JSONValue = .object(["ref": .string("e999")])
286
+ let result = try await handler.callTool(name: "pilot_click", arguments: args)
287
+ XCTAssertTrue(result.isError)
288
+ XCTAssertTrue(result.content[0].text?.contains("Unknown ref") ?? false)
289
+ }
290
+ }