desktop-pilot-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-2026 VersoXBT
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/Package.swift ADDED
@@ -0,0 +1,38 @@
1
+ // swift-tools-version: 6.0
2
+
3
+ import PackageDescription
4
+
5
+ let package = Package(
6
+ name: "DesktopPilot",
7
+ platforms: [
8
+ .macOS(.v13)
9
+ ],
10
+ products: [
11
+ .executable(name: "desktop-pilot-mcp", targets: ["DesktopPilotCLI"]),
12
+ .library(name: "DesktopPilot", targets: ["DesktopPilot"])
13
+ ],
14
+ dependencies: [],
15
+ targets: [
16
+ .target(
17
+ name: "DesktopPilot",
18
+ dependencies: [],
19
+ path: "Sources/DesktopPilot",
20
+ exclude: ["main.swift"],
21
+ linkerSettings: [
22
+ .linkedFramework("ApplicationServices"),
23
+ .linkedFramework("AppKit"),
24
+ .linkedFramework("CoreGraphics")
25
+ ]
26
+ ),
27
+ .executableTarget(
28
+ name: "DesktopPilotCLI",
29
+ dependencies: ["DesktopPilot"],
30
+ path: "Sources/DesktopPilotCLI"
31
+ ),
32
+ .testTarget(
33
+ name: "DesktopPilotTests",
34
+ dependencies: ["DesktopPilot"],
35
+ path: "Tests/DesktopPilotTests"
36
+ )
37
+ ]
38
+ )
package/README.md ADDED
@@ -0,0 +1,462 @@
1
+ <p align="center">
2
+ <h1 align="center">Desktop Pilot MCP</h1>
3
+ <p align="center">Native macOS automation for Claude. 30-100x faster than screenshots.</p>
4
+ </p>
5
+
6
+ <p align="center">
7
+ <a href="#"><img alt="Version" src="https://img.shields.io/badge/version-1.0.0-blue.svg" /></a>
8
+ <a href="LICENSE"><img alt="License: MIT" src="https://img.shields.io/badge/license-MIT-green.svg" /></a>
9
+ <a href="#"><img alt="Platform" src="https://img.shields.io/badge/platform-macOS%2013%2B-lightgrey.svg" /></a>
10
+ <a href="#"><img alt="Swift 6" src="https://img.shields.io/badge/swift-6.0-orange.svg" /></a>
11
+ <a href="#"><img alt="Tests" src="https://img.shields.io/badge/tests-22%20passing-brightgreen.svg" /></a>
12
+ <a href="#"><img alt="Binary Size" src="https://img.shields.io/badge/binary-427KB-purple.svg" /></a>
13
+ <a href="#"><img alt="Dependencies" src="https://img.shields.io/badge/dependencies-0-blue.svg" /></a>
14
+ </p>
15
+
16
+ ---
17
+
18
+ Desktop Pilot is an MCP server that gives Claude direct access to any macOS application through the Accessibility API, AppleScript, and CGEvent -- no screenshots, no pixel coordinates, no vision model overhead. It reads the actual UI tree and acts on semantic element references, the same way Playwright works for browsers.
19
+
20
+ **One snapshot of Telegram takes 20ms and returns structured data. The same operation with screenshot-based computer-use takes ~3 seconds and returns pixels.**
21
+
22
+ ```
23
+ pilot_snapshot { "app": "Telegram" }
24
+
25
+ [e1] Window "Saved Messages"
26
+ [e2] MenuButton "Main menu"
27
+ [e3] Button "All chats (111 unread chats)"
28
+ [e7] Button "Code (4 unread chats)"
29
+ [e18] TextField "Write a message..."
30
+ [e20] Button "Record Voice Message"
31
+
32
+ pilot_click { "ref": "e18" } // focus the text field
33
+ pilot_type { "ref": "e18", "text": "Hello from Claude" }
34
+ pilot_click { "ref": "e20" } // send
35
+ ```
36
+
37
+ No coordinates. No screenshots. No guessing. Just refs.
38
+
39
+ ---
40
+
41
+ ## Benchmarks
42
+
43
+ Real measurements from testing against Telegram, Finder, and other macOS apps:
44
+
45
+ | Operation | computer-use (screenshots) | Desktop Pilot | Speedup |
46
+ |-----------|---------------------------|---------------|---------|
47
+ | Snapshot (read full UI tree) | ~3000ms | 20ms | **150x** |
48
+ | Snapshot (Finder, 45 elements) | ~3000ms | 78ms | **38x** |
49
+ | Click element | ~3000ms | ~50ms | **60x** |
50
+ | Read element value | ~3000ms | <1ms | **3000x** |
51
+ | Find buttons by role | ~3000ms | 4ms | **750x** |
52
+ | Type text | ~4000ms | ~20ms | **200x** |
53
+ | Full flow (click + type + send) | ~14s | ~450ms | **30x** |
54
+
55
+ Screenshot-based approaches (computer-use, etc.) pay the cost of a full screen capture, a vision model call, and coordinate calculation on every single operation. Desktop Pilot reads and acts on the live UI tree directly.
56
+
57
+ ---
58
+
59
+ ## How It Works
60
+
61
+ Desktop Pilot uses four interaction layers with a smart router that picks the fastest method for each app and action:
62
+
63
+ ```
64
+ +-------------------+
65
+ | Smart Router |
66
+ | (per-app + per- |
67
+ | action routing) |
68
+ +--------+----------+
69
+ |
70
+ +-------+-------+-------+--------+
71
+ | | | |
72
+ +------+--+ +--+------+ +-----+---+ +--+--------+
73
+ |AppleScript| | AX | | CGEvent | |Screenshot |
74
+ | Layer | | Layer | | Layer | | Layer |
75
+ +----------+ +--------+ +---------+ +-----------+
76
+ Priority: 20 Pri: 0 Pri: 40 Pri: 50
77
+ Scriptable Universal Raw input Fallback
78
+ apps only all apps injection (vision)
79
+ ```
80
+
81
+ **Layer 1 -- Accessibility API** (priority 0, universal)
82
+ Reads the structured UI tree of any macOS app. Every button, text field, menu item, and label is exposed as a node with a semantic ref ID. This is the primary layer for reading state, clicking, and finding elements.
83
+
84
+ **Layer 2 -- AppleScript / System Events** (priority 20, scriptable apps)
85
+ Deep scripting for apps with AppleScript dictionaries (Finder, Safari, Mail, Keynote, Music, etc.). The router detects scriptable apps via `sdef` and routes script-based operations here automatically.
86
+
87
+ **Layer 3 -- CGEvent** (priority 40, input injection)
88
+ Ultra-fast keyboard and mouse input at 1-5ms latency. Used for typing text (more reliable than AXSetValue for most apps), keyboard shortcuts, mouse clicks at coordinates, and drag operations.
89
+
90
+ **Layer 4 -- Screenshot** (priority 50, last resort)
91
+ Captures screen regions or specific element bounds as base64 PNG. Only used when Accessibility can't see the content -- game viewports, canvas elements, custom-rendered UI.
92
+
93
+ The **Smart Router** classifies each app (scriptable, Electron, native, unknown) and picks the optimal layer per action:
94
+
95
+ | Action | Scriptable apps | Electron apps | Native apps |
96
+ |--------|----------------|---------------|-------------|
97
+ | Snapshot / Read / Find | Accessibility | Accessibility | Accessibility |
98
+ | Click | Accessibility | Accessibility | Accessibility |
99
+ | Type | CGEvent | Accessibility | CGEvent |
100
+ | Script | AppleScript | Accessibility | Accessibility |
101
+ | Menu | Accessibility | Accessibility | Accessibility |
102
+
103
+ ---
104
+
105
+ ## Comparison
106
+
107
+ | Feature | Desktop Pilot | computer-use (built-in) | Playwright MCP | adamrdrew/macos-accessibility-mcp | steipete/macos-automator-mcp |
108
+ |---------|:------------:|:-----------------------:|:--------------:|:---------------------------------:|:----------------------------:|
109
+ | Speed | 20-100ms | 2-5s | 50-200ms | ~200ms | ~500ms |
110
+ | Native macOS apps | Yes | Yes | No | Yes | Yes |
111
+ | Web apps / browsers | Yes | Yes | Yes | No | No |
112
+ | Electron apps | Yes | Yes | Yes | Partial | No |
113
+ | Accessibility API | Yes | No | No | Yes | No |
114
+ | AppleScript integration | Yes | No | No | No | Yes |
115
+ | CGEvent (raw input) | Yes | No | No | No | No |
116
+ | Screenshot fallback | Yes | Yes (primary) | Yes | No | No |
117
+ | Smart layer routing | Yes | No | No | No | No |
118
+ | Semantic element refs | Yes | No | Yes | Basic | No |
119
+ | Batch operations | Yes | No | No | No | No |
120
+ | Menu bar navigation | Yes | No | No | No | Via script |
121
+ | Zero dependencies | Yes | N/A | Node.js | Node.js | Node.js |
122
+ | Binary size | 427KB | N/A | ~50MB+ | ~30MB+ | ~30MB+ |
123
+
124
+ ---
125
+
126
+ ## Installation
127
+
128
+ ### Requirements
129
+
130
+ - macOS 13.0+ (Ventura or later)
131
+ - Swift 6.0+ (included with Xcode 16+)
132
+ - Accessibility permission (one-time grant in System Settings)
133
+
134
+ ### Build from source
135
+
136
+ ```bash
137
+ git clone https://github.com/VersoXBT/desktop-pilot-mcp.git
138
+ cd desktop-pilot-mcp
139
+ swift build -c release
140
+ ```
141
+
142
+ The compiled binary is at `.build/release/desktop-pilot-mcp` (427KB, zero runtime dependencies).
143
+
144
+ ### Grant Accessibility Permission
145
+
146
+ On first launch, macOS will prompt for Accessibility access. If it doesn't:
147
+
148
+ 1. Open **System Settings > Privacy & Security > Accessibility**
149
+ 2. Add the `desktop-pilot-mcp` binary (or the terminal app running it)
150
+ 3. Restart the server
151
+
152
+ For screenshot functionality, also grant **Screen Recording** permission.
153
+
154
+ ---
155
+
156
+ ## Configuration
157
+
158
+ ### Claude Code
159
+
160
+ Add to `~/.claude.json`:
161
+
162
+ ```json
163
+ {
164
+ "mcpServers": {
165
+ "desktop-pilot": {
166
+ "command": "/absolute/path/to/desktop-pilot-mcp/.build/release/desktop-pilot-mcp",
167
+ "args": []
168
+ }
169
+ }
170
+ }
171
+ ```
172
+
173
+ ### Claude Desktop
174
+
175
+ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json`):
176
+
177
+ ```json
178
+ {
179
+ "mcpServers": {
180
+ "desktop-pilot": {
181
+ "command": "/absolute/path/to/desktop-pilot-mcp/.build/release/desktop-pilot-mcp",
182
+ "args": []
183
+ }
184
+ }
185
+ }
186
+ ```
187
+
188
+ Replace `/absolute/path/to` with the actual path where you cloned the repo.
189
+
190
+ ---
191
+
192
+ ## Tool Reference
193
+
194
+ Desktop Pilot exposes 10 tools through the MCP protocol. All tools use the `pilot_` prefix.
195
+
196
+ ### `pilot_snapshot`
197
+
198
+ Get a structured snapshot of an app's UI element tree. This is the starting point for any interaction -- it returns every visible element with a ref ID you can pass to other tools.
199
+
200
+ ```json
201
+ { "app": "Telegram" }
202
+ ```
203
+
204
+ | Parameter | Type | Required | Description |
205
+ |-----------|------|----------|-------------|
206
+ | `app` | string | No | App name or bundle ID. Omit for frontmost app. |
207
+ | `maxDepth` | integer | No | Maximum tree depth to traverse (default 10). |
208
+
209
+ Returns a tree of elements, each with a `ref` (e.g. `e1`, `e2`), role, title, value, enabled/focused state, and bounding rectangle.
210
+
211
+ ---
212
+
213
+ ### `pilot_click`
214
+
215
+ Click a UI element by its ref ID. Works with buttons, checkboxes, menu items, and any clickable element.
216
+
217
+ ```json
218
+ { "ref": "e5" }
219
+ ```
220
+
221
+ | Parameter | Type | Required | Description |
222
+ |-----------|------|----------|-------------|
223
+ | `ref` | string | Yes | Element reference ID from a snapshot. |
224
+
225
+ ---
226
+
227
+ ### `pilot_type`
228
+
229
+ Type text into a text field, search box, or any editable element. Focuses the element first, then inserts the text.
230
+
231
+ ```json
232
+ { "ref": "e18", "text": "Hello from Claude" }
233
+ ```
234
+
235
+ | Parameter | Type | Required | Description |
236
+ |-----------|------|----------|-------------|
237
+ | `ref` | string | Yes | Element reference ID from a snapshot. |
238
+ | `text` | string | Yes | Text to type into the element. |
239
+
240
+ ---
241
+
242
+ ### `pilot_read`
243
+
244
+ Read the current value, title, role, and description of a UI element. Use to check text field contents, checkbox state, or label text.
245
+
246
+ ```json
247
+ { "ref": "e3" }
248
+ ```
249
+
250
+ | Parameter | Type | Required | Description |
251
+ |-----------|------|----------|-------------|
252
+ | `ref` | string | Yes | Element reference ID from a snapshot. |
253
+
254
+ ---
255
+
256
+ ### `pilot_find`
257
+
258
+ Search for UI elements matching criteria across an app's UI tree. Faster than a full snapshot when you know what you're looking for.
259
+
260
+ ```json
261
+ { "role": "AXButton", "title": "Save", "app": "Finder" }
262
+ ```
263
+
264
+ | Parameter | Type | Required | Description |
265
+ |-----------|------|----------|-------------|
266
+ | `role` | string | No | AX role to match (e.g. `AXButton`, `AXTextField`). |
267
+ | `title` | string | No | Title/label substring, case-insensitive. |
268
+ | `value` | string | No | Value substring to match. |
269
+ | `app` | string | No | Limit search to this app. Omit for frontmost. |
270
+
271
+ ---
272
+
273
+ ### `pilot_menu`
274
+
275
+ Activate a menu bar item by path. Traverses the app's menu bar hierarchy directly.
276
+
277
+ ```json
278
+ { "path": "File > Save As...", "app": "TextEdit" }
279
+ ```
280
+
281
+ | Parameter | Type | Required | Description |
282
+ |-----------|------|----------|-------------|
283
+ | `path` | string | Yes | Menu path with ` > ` separator. |
284
+ | `app` | string | No | App name or bundle ID. Omit for frontmost. |
285
+
286
+ ---
287
+
288
+ ### `pilot_script`
289
+
290
+ Run AppleScript or JXA (JavaScript for Automation) code targeting a specific app.
291
+
292
+ ```json
293
+ {
294
+ "app": "Finder",
295
+ "code": "tell application \"Finder\" to get name of every window",
296
+ "language": "applescript"
297
+ }
298
+ ```
299
+
300
+ | Parameter | Type | Required | Description |
301
+ |-----------|------|----------|-------------|
302
+ | `app` | string | Yes | Target app name. |
303
+ | `code` | string | Yes | AppleScript or JXA code to execute. |
304
+ | `language` | string | No | `applescript` (default) or `jxa`. |
305
+
306
+ ---
307
+
308
+ ### `pilot_screenshot`
309
+
310
+ Capture a screenshot of a specific element or the full screen. Returns base64 PNG. Use sparingly -- `pilot_snapshot` is usually better for understanding UI state.
311
+
312
+ ```json
313
+ { "ref": "e1" }
314
+ ```
315
+
316
+ | Parameter | Type | Required | Description |
317
+ |-----------|------|----------|-------------|
318
+ | `ref` | string | No | Element ref to screenshot. Omit for full screen. |
319
+
320
+ ---
321
+
322
+ ### `pilot_batch`
323
+
324
+ Execute multiple tool calls in sequence within a single MCP round-trip. Use to reduce latency when performing multi-step actions.
325
+
326
+ ```json
327
+ {
328
+ "actions": [
329
+ { "tool": "pilot_click", "params": { "ref": "e18" } },
330
+ { "tool": "pilot_type", "params": { "ref": "e18", "text": "Hello" } },
331
+ { "tool": "pilot_click", "params": { "ref": "e20" } }
332
+ ]
333
+ }
334
+ ```
335
+
336
+ | Parameter | Type | Required | Description |
337
+ |-----------|------|----------|-------------|
338
+ | `actions` | array | Yes | Array of `{ tool, params }` objects to execute in order. |
339
+
340
+ ---
341
+
342
+ ### `pilot_list_apps`
343
+
344
+ List all running macOS applications with their names, bundle IDs, PIDs, and window counts. Use to discover available apps before taking a snapshot.
345
+
346
+ ```json
347
+ {}
348
+ ```
349
+
350
+ No parameters required.
351
+
352
+ ---
353
+
354
+ ## Architecture
355
+
356
+ ```
357
+ Sources/
358
+ DesktopPilot/
359
+ Core/
360
+ AppRegistry.swift # App discovery via NSWorkspace
361
+ ElementStore.swift # Actor-based ref-to-element mapping
362
+ Router.swift # Smart per-app, per-action routing
363
+ Snapshot.swift # Batch AX tree traversal
364
+ Layers/
365
+ LayerProtocol.swift # InteractionLayer protocol
366
+ AccessibilityLayer.swift # AXUIElement tree reading + actions
367
+ AppleScriptLayer.swift # System Events + sdef scripting
368
+ CGEventLayer.swift # Raw keyboard/mouse injection
369
+ ScreenshotLayer.swift # Screen capture fallback
370
+ MCP/
371
+ Server.swift # JSON-RPC 2.0 with Content-Length framing
372
+ Tools.swift # 10 tool definitions + dispatch
373
+ Types.swift # PilotElement, AppSnapshot, AppInfo
374
+ Platform/
375
+ PlatformProtocol.swift # Cross-platform bridge interface
376
+ macOS/
377
+ AXBridge.swift # Low-level AXUIElement C API wrapper
378
+ Permissions.swift # Accessibility permission management
379
+ SystemEvents.swift # AppleScript/JXA execution helper
380
+ DesktopPilotCLI/
381
+ main.swift # Entry point: permission check + server start
382
+ Tests/
383
+ DesktopPilotTests/
384
+ DesktopPilotTests.swift # 22 tests: types, router, registry, MCP, tools
385
+ ```
386
+
387
+ **Key design decisions:**
388
+
389
+ - **Zero dependencies.** The entire server is built on Apple frameworks only (ApplicationServices, AppKit, CoreGraphics). No SwiftNIO, no Vapor, no third-party JSON library. This keeps the binary at 427KB.
390
+ - **Actor-based element store.** Refs are ephemeral -- they reset on each snapshot. The `ElementStore` actor guarantees thread-safe access to the AXUIElement-to-ref mapping across concurrent tool calls.
391
+ - **Content-Length framing.** The MCP server uses the standard JSON-RPC 2.0 protocol with `Content-Length` header framing over stdin/stdout, matching the MCP specification exactly.
392
+ - **Batch attribute reading.** Instead of N individual AXUIElementCopyAttributeValue calls per element, the snapshot builder uses `AXUIElementCopyMultipleAttributeValues` to read 6 attributes in a single call. This is why snapshots are fast.
393
+
394
+ ---
395
+
396
+ ## Supported Apps
397
+
398
+ Desktop Pilot works with any macOS application that exposes an accessibility tree (which is virtually all of them):
399
+
400
+ | Category | Examples | Primary Layer |
401
+ |----------|----------|---------------|
402
+ | Apple native | Finder, Safari, Mail, Notes, Calendar, Music | AppleScript + Accessibility |
403
+ | Productivity | Microsoft Office, Google Chrome, Firefox | Accessibility |
404
+ | Electron | VS Code, Discord, Slack, Spotify, Signal | Accessibility |
405
+ | Creative | Final Cut Pro, Logic Pro, Xcode | AppleScript + Accessibility |
406
+ | Communication | Telegram, iMessage, WhatsApp | Accessibility |
407
+ | System | System Settings, Activity Monitor, Terminal | Accessibility |
408
+
409
+ ---
410
+
411
+ ## Use Cases
412
+
413
+ - **Automate any macOS workflow** -- file management, app configuration, data entry across apps
414
+ - **Build AI agents** that operate native desktop applications Claude can't reach through web APIs
415
+ - **Test macOS apps** by driving the UI through structured element refs instead of fragile pixel coordinates
416
+ - **Cross-app orchestration** -- copy data from one app, process it, paste into another, all in a single Claude session
417
+ - **Accessibility auditing** -- inspect the full UI tree of any app to verify accessibility compliance
418
+
419
+ ---
420
+
421
+ ## Troubleshooting
422
+
423
+ **"Accessibility permission not granted"**
424
+ Open System Settings > Privacy & Security > Accessibility and add the binary or your terminal app. Restart the MCP server after granting.
425
+
426
+ **"Failed to capture screenshot"**
427
+ Grant Screen Recording permission in System Settings > Privacy & Security > Screen Recording. Required only for `pilot_screenshot`.
428
+
429
+ **Stale refs (`Unknown ref 'e5'`)**
430
+ Refs reset on every `pilot_snapshot` call. Always take a fresh snapshot before interacting with elements. If an app's UI has changed since the last snapshot, the old refs are invalid.
431
+
432
+ **Electron apps not responding to `pilot_type`**
433
+ Some Electron apps (VS Code, Discord) swallow raw key events. The router handles this by using Accessibility (AXSetValue) instead of CGEvent for Electron apps. If typing still fails, try `pilot_script` with a System Events keystroke.
434
+
435
+ **Empty snapshots**
436
+ The app may not have any open windows, or it may use a non-standard UI framework (games, OpenGL/Metal renderers). Use `pilot_screenshot` as a fallback for custom-rendered content.
437
+
438
+ ---
439
+
440
+ ## Development
441
+
442
+ ```bash
443
+ # Build debug
444
+ swift build
445
+
446
+ # Build release
447
+ swift build -c release
448
+
449
+ # Run tests
450
+ swift test
451
+
452
+ # Run the server directly
453
+ swift run desktop-pilot-mcp
454
+ ```
455
+
456
+ The project is split into a library target (`DesktopPilot`) and an executable target (`DesktopPilotCLI`) for testability. All core logic lives in the library; the CLI is a thin entry point.
457
+
458
+ ---
459
+
460
+ ## License
461
+
462
+ MIT
@@ -0,0 +1,102 @@
1
+ import AppKit
2
+ import Foundation
3
+
4
+ // MARK: - App Registry
5
+
6
+ /// Discovers and tracks running macOS applications.
7
+ ///
8
+ /// Uses `NSWorkspace` to enumerate GUI apps and provides lookup by
9
+ /// name, bundle ID, or PID. Window counts are not populated here
10
+ /// (they require AX access); callers should enrich via `AXBridge`
11
+ /// when needed.
12
+ struct AppRegistry: Sendable {
13
+
14
+ // MARK: - List All Apps
15
+
16
+ /// List all running apps that have a GUI (activation policy `.regular`).
17
+ ///
18
+ /// This filters out background daemons, menu bar extras, and other
19
+ /// non-user-facing processes.
20
+ func listApps() -> [AppInfo] {
21
+ let apps = NSWorkspace.shared.runningApplications
22
+
23
+ return apps
24
+ .filter { $0.activationPolicy == .regular }
25
+ .compactMap { app -> AppInfo? in
26
+ guard let name = app.localizedName else { return nil }
27
+ return AppInfo(
28
+ name: name,
29
+ bundleID: app.bundleIdentifier,
30
+ pid: app.processIdentifier,
31
+ isScriptable: false,
32
+ windowCount: 0
33
+ )
34
+ }
35
+ }
36
+
37
+ // MARK: - Frontmost App
38
+
39
+ /// Get the currently frontmost (active) application.
40
+ func frontmostApp() -> AppInfo? {
41
+ guard let app = NSWorkspace.shared.frontmostApplication,
42
+ let name = app.localizedName else {
43
+ return nil
44
+ }
45
+ return AppInfo(
46
+ name: name,
47
+ bundleID: app.bundleIdentifier,
48
+ pid: app.processIdentifier,
49
+ isScriptable: false,
50
+ windowCount: 0
51
+ )
52
+ }
53
+
54
+ // MARK: - Find by Name
55
+
56
+ /// Find an app by name using case-insensitive partial matching.
57
+ ///
58
+ /// Matches against both the display name and bundle identifier.
59
+ /// Returns the first match found.
60
+ func findApp(name: String) -> AppInfo? {
61
+ let lower = name.lowercased()
62
+ return listApps().first {
63
+ $0.name.lowercased().contains(lower)
64
+ || ($0.bundleID?.lowercased().contains(lower) ?? false)
65
+ }
66
+ }
67
+
68
+ // MARK: - Find by PID
69
+
70
+ /// Find an app by its process identifier.
71
+ func findApp(pid: Int32) -> AppInfo? {
72
+ guard let app = NSRunningApplication(processIdentifier: pid),
73
+ let name = app.localizedName else {
74
+ return nil
75
+ }
76
+ return AppInfo(
77
+ name: name,
78
+ bundleID: app.bundleIdentifier,
79
+ pid: app.processIdentifier,
80
+ isScriptable: false,
81
+ windowCount: 0
82
+ )
83
+ }
84
+
85
+ // MARK: - Enrich with Window Count
86
+
87
+ /// Return an updated `AppInfo` with the actual window count from AX.
88
+ ///
89
+ /// This requires accessibility permissions. If the count cannot be
90
+ /// read, the original info is returned unchanged.
91
+ func enrichWithWindowCount(_ info: AppInfo, bridge: AXBridge) -> AppInfo {
92
+ let appElement = bridge.appElement(pid: info.pid)
93
+ let windows = bridge.getWindows(appElement)
94
+ return AppInfo(
95
+ name: info.name,
96
+ bundleID: info.bundleID,
97
+ pid: info.pid,
98
+ isScriptable: info.isScriptable,
99
+ windowCount: windows.count
100
+ )
101
+ }
102
+ }
@@ -0,0 +1,59 @@
1
+ import ApplicationServices
2
+ import Foundation
3
+
4
+ // MARK: - Element Store
5
+
6
+ /// Stores the mapping between opaque ref strings and AXUIElement objects.
7
+ ///
8
+ /// Refs are ephemeral -- they reset on each new snapshot. When Claude says
9
+ /// "click e5", the store resolves "e5" back to the actual AXUIElement.
10
+ ///
11
+ /// Uses `actor` isolation to guarantee thread safety. AXUIElement objects
12
+ /// are wrapped in `AXElementWrapper` (@unchecked Sendable) before crossing
13
+ /// the isolation boundary, since AXUIElement is thread-safe by Apple's
14
+ /// implementation but Swift 6 does not know that.
15
+ public actor ElementStore {
16
+ private var elements: [String: AXElementWrapper] = [:]
17
+ private var counter: Int = 0
18
+
19
+ public init() {}
20
+
21
+ // MARK: - Lifecycle
22
+
23
+ /// Reset the store before building a new snapshot.
24
+ /// All previous refs become invalid after this call.
25
+ public func reset() {
26
+ elements = [:]
27
+ counter = 0
28
+ }
29
+
30
+ // MARK: - Registration
31
+
32
+ /// Register an element wrapper and return its unique ref (e.g. "e1", "e2").
33
+ func register(_ wrapper: AXElementWrapper) -> String {
34
+ counter += 1
35
+ let ref = "e\(counter)"
36
+ elements[ref] = wrapper
37
+ return ref
38
+ }
39
+
40
+ // MARK: - Lookup
41
+
42
+ /// Resolve a ref string back to its AXElementWrapper.
43
+ /// Returns `nil` if the ref is unknown or the store has been reset.
44
+ public func resolve(_ ref: String) -> AXElementWrapper? {
45
+ return elements[ref]
46
+ }
47
+
48
+ /// Convenience alias used by AccessibilityLayer.
49
+ func resolveWrapped(_ ref: String) -> AXElementWrapper? {
50
+ return elements[ref]
51
+ }
52
+
53
+ // MARK: - Diagnostics
54
+
55
+ /// Current number of stored elements.
56
+ public func count() -> Int {
57
+ return elements.count
58
+ }
59
+ }