desktop-pilot-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,772 @@
1
+ import ApplicationServices
2
+ import Foundation
3
+
4
+ // MARK: - Tool Registry
5
+
6
+ /// Registers all Desktop Pilot tools and dispatches calls to real implementations.
7
+ public final class PilotToolHandler: ToolHandler, @unchecked Sendable {
8
+
9
+ private let bridge: AXBridge
10
+ private let store: ElementStore
11
+ private let registry: AppRegistry
12
+ private let snapshotBuilder: SnapshotBuilder
13
+ private let screenshotLayer: ScreenshotLayer
14
+
15
+ public init(bridge: AXBridge, store: ElementStore) {
16
+ self.bridge = bridge
17
+ self.store = store
18
+ self.registry = AppRegistry()
19
+ self.snapshotBuilder = SnapshotBuilder(bridge: bridge)
20
+ self.screenshotLayer = ScreenshotLayer(bridge: bridge, store: store)
21
+ }
22
+
23
+ public func listTools() -> [ToolDefinition] {
24
+ [
25
+ snapshotTool,
26
+ clickTool,
27
+ typeTool,
28
+ readTool,
29
+ findTool,
30
+ listAppsTool,
31
+ menuTool,
32
+ scriptTool,
33
+ screenshotTool,
34
+ batchTool,
35
+ ]
36
+ }
37
+
38
+ public func callTool(name: String, arguments: JSONValue?) async throws -> MCPToolResult {
39
+ switch name {
40
+ case "pilot_snapshot":
41
+ return await handleSnapshot(arguments)
42
+ case "pilot_click":
43
+ return await handleClick(arguments)
44
+ case "pilot_type":
45
+ return await handleType(arguments)
46
+ case "pilot_read":
47
+ return await handleRead(arguments)
48
+ case "pilot_find":
49
+ return await handleFind(arguments)
50
+ case "pilot_list_apps":
51
+ return await handleListApps(arguments)
52
+ case "pilot_menu":
53
+ return await handleMenu(arguments)
54
+ case "pilot_script":
55
+ return await handleScript(arguments)
56
+ case "pilot_screenshot":
57
+ return await handleScreenshot(arguments)
58
+ case "pilot_batch":
59
+ return await handleBatch(arguments)
60
+ default:
61
+ return .error("Unknown tool: \(name)")
62
+ }
63
+ }
64
+
65
+ // MARK: - Tool Definitions
66
+
67
+ private var snapshotTool: ToolDefinition {
68
+ ToolDefinition(
69
+ name: "pilot_snapshot",
70
+ description: """
71
+ Get a structured snapshot of an app's UI element tree via the Accessibility API. \
72
+ Use this as the FIRST step when interacting with any macOS app -- it returns every \
73
+ visible element with a ref ID you can pass to other tools. Much faster and more \
74
+ reliable than screenshots.
75
+ """,
76
+ inputSchema: .object([
77
+ "type": .string("object"),
78
+ "properties": .object([
79
+ "app": .object([
80
+ "type": .string("string"),
81
+ "description": .string(
82
+ "App name or bundle ID. Omit for frontmost app."
83
+ ),
84
+ ]),
85
+ "maxDepth": .object([
86
+ "type": .string("integer"),
87
+ "description": .string(
88
+ "Maximum tree depth to traverse (default 10)."
89
+ ),
90
+ ]),
91
+ ]),
92
+ "required": .array([]),
93
+ ])
94
+ )
95
+ }
96
+
97
+ private var clickTool: ToolDefinition {
98
+ ToolDefinition(
99
+ name: "pilot_click",
100
+ description: """
101
+ Click a UI element by its ref ID. Use after pilot_snapshot to interact with \
102
+ buttons, checkboxes, menu items, and other clickable elements.
103
+ """,
104
+ inputSchema: .object([
105
+ "type": .string("object"),
106
+ "properties": .object([
107
+ "ref": .object([
108
+ "type": .string("string"),
109
+ "description": .string(
110
+ "Element reference ID from a snapshot (e.g., \"e1\")."
111
+ ),
112
+ ]),
113
+ ]),
114
+ "required": .array([.string("ref")]),
115
+ ])
116
+ )
117
+ }
118
+
119
+ private var typeTool: ToolDefinition {
120
+ ToolDefinition(
121
+ name: "pilot_type",
122
+ description: """
123
+ Type text into a UI element (text field, search box, etc.) by its ref ID. \
124
+ Focuses the element first, then inserts the text.
125
+ """,
126
+ inputSchema: .object([
127
+ "type": .string("object"),
128
+ "properties": .object([
129
+ "ref": .object([
130
+ "type": .string("string"),
131
+ "description": .string(
132
+ "Element reference ID from a snapshot."
133
+ ),
134
+ ]),
135
+ "text": .object([
136
+ "type": .string("string"),
137
+ "description": .string(
138
+ "Text to type into the element."
139
+ ),
140
+ ]),
141
+ ]),
142
+ "required": .array([.string("ref"), .string("text")]),
143
+ ])
144
+ )
145
+ }
146
+
147
+ private var readTool: ToolDefinition {
148
+ ToolDefinition(
149
+ name: "pilot_read",
150
+ description: """
151
+ Read the current value/content of a UI element by its ref ID. Use to get text \
152
+ field contents, label text, checkbox state, or any element's value attribute.
153
+ """,
154
+ inputSchema: .object([
155
+ "type": .string("object"),
156
+ "properties": .object([
157
+ "ref": .object([
158
+ "type": .string("string"),
159
+ "description": .string(
160
+ "Element reference ID from a snapshot."
161
+ ),
162
+ ]),
163
+ ]),
164
+ "required": .array([.string("ref")]),
165
+ ])
166
+ )
167
+ }
168
+
169
+ private var findTool: ToolDefinition {
170
+ ToolDefinition(
171
+ name: "pilot_find",
172
+ description: """
173
+ Search for UI elements matching criteria (role, title, value) across one or \
174
+ all apps. Faster than taking a full snapshot when you know what you're looking for.
175
+ """,
176
+ inputSchema: .object([
177
+ "type": .string("object"),
178
+ "properties": .object([
179
+ "role": .object([
180
+ "type": .string("string"),
181
+ "description": .string(
182
+ "AX role to match (e.g., \"AXButton\", \"AXTextField\")."
183
+ ),
184
+ ]),
185
+ "title": .object([
186
+ "type": .string("string"),
187
+ "description": .string(
188
+ "Title/label substring to match (case-insensitive)."
189
+ ),
190
+ ]),
191
+ "value": .object([
192
+ "type": .string("string"),
193
+ "description": .string(
194
+ "Value substring to match."
195
+ ),
196
+ ]),
197
+ "app": .object([
198
+ "type": .string("string"),
199
+ "description": .string(
200
+ "Limit search to this app name or bundle ID."
201
+ ),
202
+ ]),
203
+ ]),
204
+ "required": .array([]),
205
+ ])
206
+ )
207
+ }
208
+
209
+ private var listAppsTool: ToolDefinition {
210
+ ToolDefinition(
211
+ name: "pilot_list_apps",
212
+ description: """
213
+ List all running applications with their names, bundle IDs, PIDs, and window \
214
+ counts. Use to discover what apps are available before taking a snapshot.
215
+ """,
216
+ inputSchema: .object([
217
+ "type": .string("object"),
218
+ "properties": .object([:]),
219
+ "required": .array([]),
220
+ ])
221
+ )
222
+ }
223
+
224
+ private var menuTool: ToolDefinition {
225
+ ToolDefinition(
226
+ name: "pilot_menu",
227
+ description: """
228
+ Activate a menu bar item by its path (e.g., \"File > Save As...\"). Works by \
229
+ traversing the app's menu bar hierarchy.
230
+ """,
231
+ inputSchema: .object([
232
+ "type": .string("object"),
233
+ "properties": .object([
234
+ "path": .object([
235
+ "type": .string("string"),
236
+ "description": .string(
237
+ "Menu path using \" > \" separator (e.g., \"File > Save As...\")."
238
+ ),
239
+ ]),
240
+ "app": .object([
241
+ "type": .string("string"),
242
+ "description": .string(
243
+ "App name or bundle ID. Omit for frontmost app."
244
+ ),
245
+ ]),
246
+ ]),
247
+ "required": .array([.string("path")]),
248
+ ])
249
+ )
250
+ }
251
+
252
+ private var scriptTool: ToolDefinition {
253
+ ToolDefinition(
254
+ name: "pilot_script",
255
+ description: """
256
+ Run an AppleScript or JXA script targeting a specific app. Use for operations \
257
+ that are easier to express in script form than through individual UI actions.
258
+ """,
259
+ inputSchema: .object([
260
+ "type": .string("object"),
261
+ "properties": .object([
262
+ "app": .object([
263
+ "type": .string("string"),
264
+ "description": .string(
265
+ "Target app name."
266
+ ),
267
+ ]),
268
+ "code": .object([
269
+ "type": .string("string"),
270
+ "description": .string(
271
+ "AppleScript or JXA code to execute."
272
+ ),
273
+ ]),
274
+ "language": .object([
275
+ "type": .string("string"),
276
+ "description": .string(
277
+ "Script language: \"applescript\" (default) or \"jxa\"."
278
+ ),
279
+ "enum": .array([
280
+ .string("applescript"),
281
+ .string("jxa"),
282
+ ]),
283
+ ]),
284
+ ]),
285
+ "required": .array([.string("app"), .string("code")]),
286
+ ])
287
+ )
288
+ }
289
+
290
+ private var screenshotTool: ToolDefinition {
291
+ ToolDefinition(
292
+ name: "pilot_screenshot",
293
+ description: """
294
+ Capture a screenshot of a specific element or the full screen. Returns a \
295
+ base64-encoded PNG. Use sparingly -- pilot_snapshot is usually better for \
296
+ understanding UI state.
297
+ """,
298
+ inputSchema: .object([
299
+ "type": .string("object"),
300
+ "properties": .object([
301
+ "ref": .object([
302
+ "type": .string("string"),
303
+ "description": .string(
304
+ "Element ref to screenshot. Omit for full screen."
305
+ ),
306
+ ]),
307
+ ]),
308
+ "required": .array([]),
309
+ ])
310
+ )
311
+ }
312
+
313
+ private var batchTool: ToolDefinition {
314
+ ToolDefinition(
315
+ name: "pilot_batch",
316
+ description: """
317
+ Execute multiple tool calls in sequence. Reduces round-trips when you need \
318
+ to perform several actions in a row (e.g., click a field, type text, click \
319
+ a button).
320
+ """,
321
+ inputSchema: .object([
322
+ "type": .string("object"),
323
+ "properties": .object([
324
+ "actions": .object([
325
+ "type": .string("array"),
326
+ "description": .string(
327
+ "Array of actions to execute in order."
328
+ ),
329
+ "items": .object([
330
+ "type": .string("object"),
331
+ "properties": .object([
332
+ "tool": .object([
333
+ "type": .string("string"),
334
+ "description": .string(
335
+ "Tool name (e.g., \"pilot_click\")."
336
+ ),
337
+ ]),
338
+ "params": .object([
339
+ "type": .string("object"),
340
+ "description": .string(
341
+ "Tool parameters as key-value pairs."
342
+ ),
343
+ "additionalProperties": .object([
344
+ "type": .string("string"),
345
+ ]),
346
+ ]),
347
+ ]),
348
+ "required": .array([
349
+ .string("tool"),
350
+ .string("params"),
351
+ ]),
352
+ ]),
353
+ ]),
354
+ ]),
355
+ "required": .array([.string("actions")]),
356
+ ])
357
+ )
358
+ }
359
+
360
+ // MARK: - Real Handlers
361
+
362
+ private func resolveApp(_ appName: String?) -> (name: String, pid: Int32)? {
363
+ if let appName = appName {
364
+ if let info = registry.findApp(name: appName) {
365
+ return (info.name, info.pid)
366
+ }
367
+ return nil
368
+ }
369
+ if let info = registry.frontmostApp() {
370
+ return (info.name, info.pid)
371
+ }
372
+ return nil
373
+ }
374
+
375
+ private func handleSnapshot(_ arguments: JSONValue?) async -> MCPToolResult {
376
+ guard bridge.isAccessibilityEnabled() else {
377
+ return .error(
378
+ "Accessibility permission not granted. "
379
+ + "Go to System Settings > Privacy & Security > Accessibility "
380
+ + "and add this application."
381
+ )
382
+ }
383
+
384
+ let appName = arguments?.stringValue(forKey: "app")
385
+ let maxDepth = arguments?.intValue(forKey: "maxDepth") ?? 10
386
+
387
+ guard let app = resolveApp(appName) else {
388
+ return .error("Could not find app: \(appName ?? "frontmost"). Is it running?")
389
+ }
390
+
391
+ let appElement = bridge.appElement(pid: app.pid)
392
+ let appInfo = registry.findApp(pid: app.pid)
393
+
394
+ let snapshot = await snapshotBuilder.buildSnapshot(
395
+ appElement: appElement,
396
+ appName: app.name,
397
+ bundleID: appInfo?.bundleID,
398
+ pid: app.pid,
399
+ store: store,
400
+ maxDepth: maxDepth
401
+ )
402
+
403
+ let encoder = JSONEncoder()
404
+ encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
405
+
406
+ do {
407
+ let data = try encoder.encode(snapshot)
408
+ let json = String(data: data, encoding: .utf8) ?? "{}"
409
+ return .success(json)
410
+ } catch {
411
+ return .error("Failed to encode snapshot: \(error)")
412
+ }
413
+ }
414
+
415
+ private func handleClick(_ arguments: JSONValue?) async -> MCPToolResult {
416
+ guard let ref = arguments?.stringValue(forKey: "ref") else {
417
+ return .error("Missing required parameter: ref")
418
+ }
419
+
420
+ guard let wrapper = await store.resolve(ref) else {
421
+ return .error("Unknown ref '\(ref)'. Take a new snapshot first.")
422
+ }
423
+
424
+ let success = bridge.performAction(wrapper.element, kAXPressAction)
425
+ if success {
426
+ return .success("Clicked element \(ref)")
427
+ } else {
428
+ return .error("Failed to click element \(ref). It may not be clickable.")
429
+ }
430
+ }
431
+
432
+ private func handleType(_ arguments: JSONValue?) async -> MCPToolResult {
433
+ guard let ref = arguments?.stringValue(forKey: "ref"),
434
+ let text = arguments?.stringValue(forKey: "text") else {
435
+ return .error("Missing required parameters: ref, text")
436
+ }
437
+
438
+ guard let wrapper = await store.resolve(ref) else {
439
+ return .error("Unknown ref '\(ref)'. Take a new snapshot first.")
440
+ }
441
+
442
+ // Focus the element first
443
+ _ = bridge.setAttribute(
444
+ wrapper.element,
445
+ kAXFocusedAttribute,
446
+ true as CFTypeRef
447
+ )
448
+
449
+ // Try to set value directly
450
+ let success = bridge.setAttribute(
451
+ wrapper.element,
452
+ kAXValueAttribute,
453
+ text as CFTypeRef
454
+ )
455
+
456
+ if success {
457
+ return .success("Typed \"\(text)\" into element \(ref)")
458
+ } else {
459
+ return .error(
460
+ "Failed to type into element \(ref). "
461
+ + "It may not be a text input field."
462
+ )
463
+ }
464
+ }
465
+
466
+ private func handleRead(_ arguments: JSONValue?) async -> MCPToolResult {
467
+ guard let ref = arguments?.stringValue(forKey: "ref") else {
468
+ return .error("Missing required parameter: ref")
469
+ }
470
+
471
+ guard let wrapper = await store.resolve(ref) else {
472
+ return .error("Unknown ref '\(ref)'. Take a new snapshot first.")
473
+ }
474
+
475
+ let value = bridge.getValue(wrapper.element)
476
+ let title = bridge.getTitle(wrapper.element)
477
+ let role = bridge.getRole(wrapper.element)
478
+ let description = bridge.getDescription(wrapper.element)
479
+
480
+ var parts: [String] = []
481
+ if let role = role { parts.append("role: \(role)") }
482
+ if let title = title { parts.append("title: \(title)") }
483
+ if let value = value { parts.append("value: \(value)") }
484
+ if let description = description { parts.append("description: \(description)") }
485
+
486
+ if parts.isEmpty {
487
+ return .success("Element \(ref) has no readable attributes.")
488
+ }
489
+ return .success(parts.joined(separator: "\n"))
490
+ }
491
+
492
+ private func handleFind(_ arguments: JSONValue?) async -> MCPToolResult {
493
+ guard bridge.isAccessibilityEnabled() else {
494
+ return .error("Accessibility permission not granted.")
495
+ }
496
+
497
+ let roleFilter = arguments?.stringValue(forKey: "role")
498
+ let titleFilter = arguments?.stringValue(forKey: "title")
499
+ let valueFilter = arguments?.stringValue(forKey: "value")
500
+ let appName = arguments?.stringValue(forKey: "app")
501
+
502
+ guard let app = resolveApp(appName) else {
503
+ return .error("Could not find app: \(appName ?? "frontmost")")
504
+ }
505
+
506
+ let appElement = bridge.appElement(pid: app.pid)
507
+ let appInfo = registry.findApp(pid: app.pid)
508
+
509
+ // Build a snapshot to search through
510
+ let snapshot = await snapshotBuilder.buildSnapshot(
511
+ appElement: appElement,
512
+ appName: app.name,
513
+ bundleID: appInfo?.bundleID,
514
+ pid: app.pid,
515
+ store: store,
516
+ maxDepth: 10
517
+ )
518
+
519
+ // Flatten and filter
520
+ var matches: [PilotElement] = []
521
+ flattenAndFilter(
522
+ elements: snapshot.elements,
523
+ role: roleFilter,
524
+ title: titleFilter,
525
+ value: valueFilter,
526
+ into: &matches,
527
+ limit: 50
528
+ )
529
+
530
+ if matches.isEmpty {
531
+ return .success("No elements found matching the criteria.")
532
+ }
533
+
534
+ let encoder = JSONEncoder()
535
+ encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
536
+
537
+ do {
538
+ let data = try encoder.encode(matches)
539
+ let json = String(data: data, encoding: .utf8) ?? "[]"
540
+ return .success(json)
541
+ } catch {
542
+ return .error("Failed to encode results: \(error)")
543
+ }
544
+ }
545
+
546
+ private func flattenAndFilter(
547
+ elements: [PilotElement],
548
+ role: String?,
549
+ title: String?,
550
+ value: String?,
551
+ into matches: inout [PilotElement],
552
+ limit: Int
553
+ ) {
554
+ for element in elements {
555
+ guard matches.count < limit else { return }
556
+
557
+ var isMatch = true
558
+
559
+ if let role = role {
560
+ let elementRole = element.role.lowercased()
561
+ let searchRole = role.lowercased()
562
+ if !elementRole.contains(searchRole) {
563
+ isMatch = false
564
+ }
565
+ }
566
+
567
+ if let title = title, isMatch {
568
+ let elementTitle = (element.title ?? "").lowercased()
569
+ if !elementTitle.contains(title.lowercased()) {
570
+ isMatch = false
571
+ }
572
+ }
573
+
574
+ if let value = value, isMatch {
575
+ let elementValue = (element.value ?? "").lowercased()
576
+ if !elementValue.contains(value.lowercased()) {
577
+ isMatch = false
578
+ }
579
+ }
580
+
581
+ if isMatch {
582
+ // Return without children to keep output compact
583
+ matches.append(PilotElement(
584
+ ref: element.ref,
585
+ role: element.role,
586
+ title: element.title,
587
+ value: element.value,
588
+ description: element.description,
589
+ enabled: element.enabled,
590
+ focused: element.focused,
591
+ bounds: element.bounds,
592
+ children: nil
593
+ ))
594
+ }
595
+
596
+ if let children = element.children {
597
+ flattenAndFilter(
598
+ elements: children,
599
+ role: role,
600
+ title: title,
601
+ value: value,
602
+ into: &matches,
603
+ limit: limit
604
+ )
605
+ }
606
+ }
607
+ }
608
+
609
+ private func handleListApps(_ arguments: JSONValue?) async -> MCPToolResult {
610
+ var apps = registry.listApps()
611
+
612
+ // Enrich with window counts if accessibility is enabled
613
+ if bridge.isAccessibilityEnabled() {
614
+ apps = apps.map { registry.enrichWithWindowCount($0, bridge: bridge) }
615
+ }
616
+
617
+ let encoder = JSONEncoder()
618
+ encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
619
+
620
+ do {
621
+ let data = try encoder.encode(apps)
622
+ let json = String(data: data, encoding: .utf8) ?? "[]"
623
+ return .success(json)
624
+ } catch {
625
+ return .error("Failed to encode app list: \(error)")
626
+ }
627
+ }
628
+
629
+ private func handleMenu(_ arguments: JSONValue?) async -> MCPToolResult {
630
+ guard let pathStr = arguments?.stringValue(forKey: "path") else {
631
+ return .error("Missing required parameter: path")
632
+ }
633
+
634
+ let appName = arguments?.stringValue(forKey: "app")
635
+ guard let app = resolveApp(appName) else {
636
+ return .error("Could not find app: \(appName ?? "frontmost")")
637
+ }
638
+
639
+ let pathComponents = pathStr
640
+ .components(separatedBy: " > ")
641
+ .map { $0.trimmingCharacters(in: .whitespaces) }
642
+
643
+ guard !pathComponents.isEmpty else {
644
+ return .error("Invalid menu path: \(pathStr)")
645
+ }
646
+
647
+ let appElement = bridge.appElement(pid: app.pid)
648
+ let success = bridge.navigateMenu(appElement, path: pathComponents)
649
+
650
+ if success {
651
+ return .success("Activated menu: \(pathStr)")
652
+ } else {
653
+ return .error(
654
+ "Failed to navigate menu path: \(pathStr). "
655
+ + "Check that the menu items exist in \(app.name)."
656
+ )
657
+ }
658
+ }
659
+
660
+ private func handleScript(_ arguments: JSONValue?) async -> MCPToolResult {
661
+ guard let code = arguments?.stringValue(forKey: "code") else {
662
+ return .error("Missing required parameter: code")
663
+ }
664
+
665
+ let language = arguments?.stringValue(forKey: "language") ?? "applescript"
666
+
667
+ let process = Process()
668
+ let pipe = Pipe()
669
+
670
+ if language == "jxa" {
671
+ process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript")
672
+ process.arguments = ["-l", "JavaScript", "-e", code]
673
+ } else {
674
+ process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript")
675
+ process.arguments = ["-e", code]
676
+ }
677
+
678
+ process.standardOutput = pipe
679
+ process.standardError = pipe
680
+
681
+ do {
682
+ try process.run()
683
+ process.waitUntilExit()
684
+
685
+ let outputData = pipe.fileHandleForReading.readDataToEndOfFile()
686
+ let output = String(data: outputData, encoding: .utf8)?.trimmingCharacters(
687
+ in: .whitespacesAndNewlines
688
+ ) ?? ""
689
+
690
+ if process.terminationStatus == 0 {
691
+ return .success(output.isEmpty ? "Script executed successfully." : output)
692
+ } else {
693
+ return .error("Script failed (exit \(process.terminationStatus)): \(output)")
694
+ }
695
+ } catch {
696
+ return .error("Failed to run script: \(error)")
697
+ }
698
+ }
699
+
700
+ private func handleScreenshot(_ arguments: JSONValue?) async -> MCPToolResult {
701
+ let ref = arguments?.stringValue(forKey: "ref")
702
+
703
+ // If a ref is provided, capture that element's bounds
704
+ if let ref {
705
+ guard let wrapper = await store.resolve(ref) else {
706
+ return .error("Unknown ref '\(ref)'. Take a new snapshot first.")
707
+ }
708
+
709
+ guard let bounds = bridge.getBounds(wrapper.element) else {
710
+ return .error(
711
+ "Could not determine bounds for element \(ref). "
712
+ + "The element may not have a visible frame."
713
+ )
714
+ }
715
+
716
+ guard let base64 = screenshotLayer.captureElementBase64(bounds: bounds) else {
717
+ return .error(
718
+ "Failed to capture screenshot of element \(ref). "
719
+ + "Screen recording permission may not be granted."
720
+ )
721
+ }
722
+
723
+ return MCPToolResult(
724
+ content: [.image(base64: base64, mimeType: "image/png")],
725
+ isError: false
726
+ )
727
+ }
728
+
729
+ // No ref -- capture full screen
730
+ guard let base64 = screenshotLayer.captureFullScreenBase64() else {
731
+ return .error(
732
+ "Failed to capture full screen screenshot. "
733
+ + "Screen recording permission may not be granted. "
734
+ + "Go to System Settings > Privacy & Security > Screen Recording "
735
+ + "and add this application."
736
+ )
737
+ }
738
+
739
+ return MCPToolResult(
740
+ content: [.image(base64: base64, mimeType: "image/png")],
741
+ isError: false
742
+ )
743
+ }
744
+
745
+ private func handleBatch(_ arguments: JSONValue?) async -> MCPToolResult {
746
+ guard case .object(let dict) = arguments,
747
+ case .array(let actions) = dict["actions"] else {
748
+ return .error("Missing required parameter: actions")
749
+ }
750
+
751
+ var results: [String] = []
752
+ for (index, action) in actions.enumerated() {
753
+ guard case .object(let actionDict) = action,
754
+ case .string(let toolName) = actionDict["tool"] else {
755
+ results.append("action[\(index)]: invalid format")
756
+ continue
757
+ }
758
+ do {
759
+ let toolResult = try await callTool(
760
+ name: toolName,
761
+ arguments: actionDict["params"]
762
+ )
763
+ let text = toolResult.content.first?.text ?? "(no output)"
764
+ results.append("action[\(index)] \(toolName): \(text)")
765
+ } catch {
766
+ results.append("action[\(index)] \(toolName): error - \(error)")
767
+ }
768
+ }
769
+
770
+ return .success(results.joined(separator: "\n"))
771
+ }
772
+ }