desktop-pilot-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/Package.swift +38 -0
- package/README.md +462 -0
- package/Sources/DesktopPilot/Core/AppRegistry.swift +102 -0
- package/Sources/DesktopPilot/Core/ElementStore.swift +59 -0
- package/Sources/DesktopPilot/Core/Router.swift +242 -0
- package/Sources/DesktopPilot/Core/Snapshot.swift +192 -0
- package/Sources/DesktopPilot/Layers/AccessibilityLayer.swift +190 -0
- package/Sources/DesktopPilot/Layers/AppleScriptLayer.swift +462 -0
- package/Sources/DesktopPilot/Layers/CGEventLayer.swift +318 -0
- package/Sources/DesktopPilot/Layers/LayerProtocol.swift +40 -0
- package/Sources/DesktopPilot/Layers/ScreenshotLayer.swift +122 -0
- package/Sources/DesktopPilot/MCP/Server.swift +536 -0
- package/Sources/DesktopPilot/MCP/Tools.swift +772 -0
- package/Sources/DesktopPilot/MCP/Types.swift +107 -0
- package/Sources/DesktopPilot/Platform/PlatformProtocol.swift +49 -0
- package/Sources/DesktopPilot/Platform/macOS/AXBridge.swift +232 -0
- package/Sources/DesktopPilot/Platform/macOS/Permissions.swift +34 -0
- package/Sources/DesktopPilot/Platform/macOS/SystemEvents.swift +323 -0
- package/Sources/DesktopPilot/main.swift +19 -0
- package/Sources/DesktopPilotCLI/main.swift +19 -0
- package/Tests/DesktopPilotTests/DesktopPilotTests.swift +290 -0
- package/bin/cli.js +61 -0
- package/package.json +52 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import AppKit
|
|
2
|
+
import Foundation
|
|
3
|
+
|
|
4
|
+
// MARK: - Interaction Method
|
|
5
|
+
|
|
6
|
+
/// The layer used to interact with a macOS application.
|
|
7
|
+
enum InteractionMethod: Sendable, Equatable {
|
|
8
|
+
case accessibility
|
|
9
|
+
case applescript
|
|
10
|
+
case cgevent
|
|
11
|
+
case screenshot
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
// MARK: - App Category
|
|
15
|
+
|
|
16
|
+
/// Classification of an app based on its technology stack and scripting support.
|
|
17
|
+
enum AppCategory: Sendable, Equatable {
|
|
18
|
+
/// Has an AppleScript dictionary (scriptable via `sdef`).
|
|
19
|
+
case scriptable
|
|
20
|
+
/// Chromium-based (Electron). Limited Accessibility support, no AppleScript.
|
|
21
|
+
case electron
|
|
22
|
+
/// Standard native macOS app with good Accessibility support.
|
|
23
|
+
case nativeStandard
|
|
24
|
+
/// Unknown technology stack.
|
|
25
|
+
case unknown
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// MARK: - Router
|
|
29
|
+
|
|
30
|
+
/// Routes operations to the best available interaction layer for a given
|
|
31
|
+
/// app and action.
|
|
32
|
+
///
|
|
33
|
+
/// Phase 2 implementation selects the optimal method based on:
|
|
34
|
+
/// - The action being performed (snapshot, click, type, menu, script, find)
|
|
35
|
+
/// - The target app's category (scriptable, Electron, native, unknown)
|
|
36
|
+
/// - Known bundle-ID lists for scriptable and Electron apps
|
|
37
|
+
final class Router: Sendable {
|
|
38
|
+
|
|
39
|
+
// MARK: - Known Bundle IDs
|
|
40
|
+
|
|
41
|
+
/// macOS apps that expose an AppleScript dictionary.
|
|
42
|
+
private static let scriptableBundleIDs: Set<String> = [
|
|
43
|
+
"com.apple.finder",
|
|
44
|
+
"com.apple.Safari",
|
|
45
|
+
"com.apple.mail",
|
|
46
|
+
"com.apple.Notes",
|
|
47
|
+
"com.apple.iWork.Keynote",
|
|
48
|
+
"com.apple.iWork.Numbers",
|
|
49
|
+
"com.apple.iWork.Pages",
|
|
50
|
+
"com.apple.dt.Xcode",
|
|
51
|
+
"com.apple.iMovie",
|
|
52
|
+
"com.apple.garageband",
|
|
53
|
+
"com.apple.MobileSMS",
|
|
54
|
+
"com.apple.ical",
|
|
55
|
+
"com.apple.reminders",
|
|
56
|
+
"com.apple.Music",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
/// Electron / Chromium-based apps with limited AX and no AppleScript.
|
|
60
|
+
private static let electronBundleIDs: Set<String> = [
|
|
61
|
+
"com.hnc.Discord",
|
|
62
|
+
"com.microsoft.VSCode",
|
|
63
|
+
"com.tinyspeck.slackmacgap",
|
|
64
|
+
"org.whispersystems.signal-desktop",
|
|
65
|
+
"com.spotify.client",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
// MARK: - App Categorization
|
|
69
|
+
|
|
70
|
+
/// Classify an app by its bundle ID, with optional `sdef` detection fallback.
|
|
71
|
+
///
|
|
72
|
+
/// The lookup order is:
|
|
73
|
+
/// 1. Check the static Electron set (these must never use AppleScript).
|
|
74
|
+
/// 2. Check the static scriptable set.
|
|
75
|
+
/// 3. Attempt dynamic `sdef` detection for unknown bundle IDs.
|
|
76
|
+
/// 4. Fall back to `.nativeStandard` for Apple first-party apps,
|
|
77
|
+
/// `.unknown` otherwise.
|
|
78
|
+
///
|
|
79
|
+
/// - Parameters:
|
|
80
|
+
/// - bundleID: The bundle identifier, if known.
|
|
81
|
+
/// - appName: The display name (used as a heuristic when bundle ID is nil).
|
|
82
|
+
/// - Returns: The inferred `AppCategory`.
|
|
83
|
+
func categorize(bundleID: String?, appName: String) -> AppCategory {
|
|
84
|
+
guard let id = bundleID else {
|
|
85
|
+
return .unknown
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if Self.electronBundleIDs.contains(id) {
|
|
89
|
+
return .electron
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if Self.scriptableBundleIDs.contains(id) {
|
|
93
|
+
return .scriptable
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if hasSdefDictionary(bundleID: id) {
|
|
97
|
+
return .scriptable
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if id.hasPrefix("com.apple.") {
|
|
101
|
+
return .nativeStandard
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return .unknown
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// MARK: - App-level Routing
|
|
108
|
+
|
|
109
|
+
/// Determine the best general interaction method for an app.
|
|
110
|
+
///
|
|
111
|
+
/// - Parameters:
|
|
112
|
+
/// - appName: The display name of the target application.
|
|
113
|
+
/// - bundleID: The bundle identifier, if known.
|
|
114
|
+
/// - Returns: The recommended interaction method.
|
|
115
|
+
func bestMethod(appName: String, bundleID: String?) -> InteractionMethod {
|
|
116
|
+
let category = categorize(bundleID: bundleID, appName: appName)
|
|
117
|
+
|
|
118
|
+
switch category {
|
|
119
|
+
case .scriptable:
|
|
120
|
+
return .applescript
|
|
121
|
+
case .electron:
|
|
122
|
+
return .accessibility
|
|
123
|
+
case .nativeStandard:
|
|
124
|
+
return .accessibility
|
|
125
|
+
case .unknown:
|
|
126
|
+
return .accessibility
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// MARK: - Action-level Routing
|
|
131
|
+
|
|
132
|
+
/// Determine the best method for a specific operation on an app.
|
|
133
|
+
///
|
|
134
|
+
/// Routing rules by action:
|
|
135
|
+
/// - `snapshot` / `read` / `find` -- Accessibility (only method that reads UI state)
|
|
136
|
+
/// - `click` -- Accessibility (AXPress is more precise than coordinate clicking)
|
|
137
|
+
/// - `type` -- CGEvent for most apps (more reliable), Accessibility for Electron
|
|
138
|
+
/// - `menu` -- Accessibility (menu bar traversal)
|
|
139
|
+
/// - `script` -- AppleScript for scriptable apps, Accessibility otherwise
|
|
140
|
+
///
|
|
141
|
+
/// - Parameters:
|
|
142
|
+
/// - action: The action being performed (e.g. "click", "type", "snapshot").
|
|
143
|
+
/// - appName: The display name of the target application.
|
|
144
|
+
/// - bundleID: The bundle identifier, if known.
|
|
145
|
+
/// - Returns: The recommended interaction method.
|
|
146
|
+
func bestMethodForAction(
|
|
147
|
+
action: String,
|
|
148
|
+
appName: String,
|
|
149
|
+
bundleID: String?
|
|
150
|
+
) -> InteractionMethod {
|
|
151
|
+
let normalized = action.lowercased()
|
|
152
|
+
let category = categorize(bundleID: bundleID, appName: appName)
|
|
153
|
+
|
|
154
|
+
switch normalized {
|
|
155
|
+
case "snapshot", "read", "find":
|
|
156
|
+
return .accessibility
|
|
157
|
+
|
|
158
|
+
case "click":
|
|
159
|
+
return .accessibility
|
|
160
|
+
|
|
161
|
+
case "type":
|
|
162
|
+
return routeTyping(category: category)
|
|
163
|
+
|
|
164
|
+
case "menu":
|
|
165
|
+
return .accessibility
|
|
166
|
+
|
|
167
|
+
case "script":
|
|
168
|
+
return routeScripting(category: category)
|
|
169
|
+
|
|
170
|
+
default:
|
|
171
|
+
return bestMethod(appName: appName, bundleID: bundleID)
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// MARK: - Capability Check
|
|
176
|
+
|
|
177
|
+
/// Check whether a given method is available on this system.
|
|
178
|
+
///
|
|
179
|
+
/// All methods are available in Phase 2.
|
|
180
|
+
///
|
|
181
|
+
/// - Parameter method: The interaction method to check.
|
|
182
|
+
/// - Returns: `true` if the method can be used.
|
|
183
|
+
func isAvailable(_ method: InteractionMethod) -> Bool {
|
|
184
|
+
return true
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// MARK: - Private Helpers
|
|
188
|
+
|
|
189
|
+
/// Pick the best method for typing text into the given app category.
|
|
190
|
+
///
|
|
191
|
+
/// CGEvent is generally more reliable for keystroke injection, but
|
|
192
|
+
/// Electron apps sometimes swallow raw key events, so Accessibility
|
|
193
|
+
/// (AXSetValue) is safer there.
|
|
194
|
+
private func routeTyping(category: AppCategory) -> InteractionMethod {
|
|
195
|
+
switch category {
|
|
196
|
+
case .electron:
|
|
197
|
+
return .accessibility
|
|
198
|
+
case .scriptable, .nativeStandard, .unknown:
|
|
199
|
+
return .cgevent
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/// Pick the best method for executing a script against the given app category.
|
|
204
|
+
///
|
|
205
|
+
/// Only apps with an AppleScript dictionary benefit from `.applescript`;
|
|
206
|
+
/// everything else falls back to Accessibility.
|
|
207
|
+
private func routeScripting(category: AppCategory) -> InteractionMethod {
|
|
208
|
+
switch category {
|
|
209
|
+
case .scriptable:
|
|
210
|
+
return .applescript
|
|
211
|
+
case .electron, .nativeStandard, .unknown:
|
|
212
|
+
return .accessibility
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/// Detect whether an app has an AppleScript dictionary via `sdef`.
|
|
217
|
+
///
|
|
218
|
+
/// Shells out to `/usr/bin/sdef` with the app's bundle path resolved
|
|
219
|
+
/// through `NSWorkspace`. Returns `false` on any error or if the app
|
|
220
|
+
/// cannot be found.
|
|
221
|
+
private func hasSdefDictionary(bundleID: String) -> Bool {
|
|
222
|
+
guard let url = NSWorkspace.shared.urlForApplication(
|
|
223
|
+
withBundleIdentifier: bundleID
|
|
224
|
+
) else {
|
|
225
|
+
return false
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
let process = Process()
|
|
229
|
+
process.executableURL = URL(fileURLWithPath: "/usr/bin/sdef")
|
|
230
|
+
process.arguments = [url.path]
|
|
231
|
+
process.standardOutput = FileHandle.nullDevice
|
|
232
|
+
process.standardError = FileHandle.nullDevice
|
|
233
|
+
|
|
234
|
+
do {
|
|
235
|
+
try process.run()
|
|
236
|
+
process.waitUntilExit()
|
|
237
|
+
return process.terminationStatus == 0
|
|
238
|
+
} catch {
|
|
239
|
+
return false
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import ApplicationServices
|
|
2
|
+
import Foundation
|
|
3
|
+
|
|
4
|
+
// MARK: - Snapshot Builder
|
|
5
|
+
|
|
6
|
+
/// Builds a `PilotElement` tree from a running app's accessibility tree.
|
|
7
|
+
///
|
|
8
|
+
/// Walks the AX hierarchy starting from the app element, reads attributes
|
|
9
|
+
/// via `AXBridge`, and registers every meaningful element in the
|
|
10
|
+
/// `ElementStore` so it can be referenced later by its opaque ref.
|
|
11
|
+
struct SnapshotBuilder: Sendable {
|
|
12
|
+
let bridge: AXBridge
|
|
13
|
+
|
|
14
|
+
/// Attributes fetched in a single batch call per element for performance.
|
|
15
|
+
private static let batchAttributes: [String] = [
|
|
16
|
+
kAXRoleAttribute,
|
|
17
|
+
kAXTitleAttribute,
|
|
18
|
+
kAXValueAttribute,
|
|
19
|
+
kAXDescriptionAttribute,
|
|
20
|
+
kAXEnabledAttribute,
|
|
21
|
+
kAXFocusedAttribute,
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
// MARK: - Public API
|
|
25
|
+
|
|
26
|
+
/// Build a complete snapshot of an app's UI tree.
|
|
27
|
+
///
|
|
28
|
+
/// - Parameters:
|
|
29
|
+
/// - appElement: The root AXUIElement for the app (from `AXBridge.appElement`).
|
|
30
|
+
/// - appName: Display name of the application.
|
|
31
|
+
/// - bundleID: Bundle identifier (e.g. "com.apple.Safari"), if available.
|
|
32
|
+
/// - pid: Process identifier.
|
|
33
|
+
/// - store: The `ElementStore` actor that will hold ref-to-element mappings.
|
|
34
|
+
/// - maxDepth: Maximum recursion depth to prevent runaway traversal (default 10).
|
|
35
|
+
/// - Returns: A fully populated `AppSnapshot`.
|
|
36
|
+
func buildSnapshot(
|
|
37
|
+
appElement: AXUIElement,
|
|
38
|
+
appName: String,
|
|
39
|
+
bundleID: String?,
|
|
40
|
+
pid: Int32,
|
|
41
|
+
store: ElementStore,
|
|
42
|
+
maxDepth: Int = 10
|
|
43
|
+
) async -> AppSnapshot {
|
|
44
|
+
await store.reset()
|
|
45
|
+
|
|
46
|
+
let windows = bridge.getWindows(appElement)
|
|
47
|
+
var topLevelElements: [PilotElement] = []
|
|
48
|
+
|
|
49
|
+
for window in windows {
|
|
50
|
+
let element = await buildElement(
|
|
51
|
+
from: window,
|
|
52
|
+
store: store,
|
|
53
|
+
depth: 0,
|
|
54
|
+
maxDepth: maxDepth
|
|
55
|
+
)
|
|
56
|
+
if let element {
|
|
57
|
+
topLevelElements.append(element)
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// If no windows, try direct children of the app element
|
|
62
|
+
if topLevelElements.isEmpty {
|
|
63
|
+
let children = bridge.getChildren(appElement)
|
|
64
|
+
for child in children {
|
|
65
|
+
let element = await buildElement(
|
|
66
|
+
from: child,
|
|
67
|
+
store: store,
|
|
68
|
+
depth: 0,
|
|
69
|
+
maxDepth: maxDepth
|
|
70
|
+
)
|
|
71
|
+
if let element {
|
|
72
|
+
topLevelElements.append(element)
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
let count = await store.count()
|
|
78
|
+
let formatter = ISO8601DateFormatter()
|
|
79
|
+
|
|
80
|
+
return AppSnapshot(
|
|
81
|
+
app: appName,
|
|
82
|
+
bundleID: bundleID,
|
|
83
|
+
pid: pid,
|
|
84
|
+
timestamp: formatter.string(from: Date()),
|
|
85
|
+
elementCount: count,
|
|
86
|
+
elements: topLevelElements
|
|
87
|
+
)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// MARK: - Recursive Tree Building
|
|
91
|
+
|
|
92
|
+
/// Build a single `PilotElement` from an `AXUIElement`, recursing into children.
|
|
93
|
+
///
|
|
94
|
+
/// Returns `nil` if the element has no useful information (no role, title, or value).
|
|
95
|
+
private func buildElement(
|
|
96
|
+
from axElement: AXUIElement,
|
|
97
|
+
store: ElementStore,
|
|
98
|
+
depth: Int,
|
|
99
|
+
maxDepth: Int
|
|
100
|
+
) async -> PilotElement? {
|
|
101
|
+
let attrs = readBatchAttributes(axElement)
|
|
102
|
+
|
|
103
|
+
let role = attrs.role
|
|
104
|
+
|
|
105
|
+
// Skip elements with unknown or missing roles that carry no info
|
|
106
|
+
if role == nil && attrs.title == nil && attrs.value == nil && attrs.description == nil {
|
|
107
|
+
return nil
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Skip explicitly unknown roles
|
|
111
|
+
if role == "AXUnknown" {
|
|
112
|
+
return nil
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
let wrapper = AXElementWrapper(axElement)
|
|
116
|
+
let ref = await store.register(wrapper)
|
|
117
|
+
let bounds = bridge.getBounds(axElement)
|
|
118
|
+
|
|
119
|
+
// Recurse into children if within depth limit
|
|
120
|
+
var childElements: [PilotElement]?
|
|
121
|
+
if depth < maxDepth {
|
|
122
|
+
let axChildren = bridge.getChildren(axElement)
|
|
123
|
+
if !axChildren.isEmpty {
|
|
124
|
+
var built: [PilotElement] = []
|
|
125
|
+
for child in axChildren {
|
|
126
|
+
if let childElement = await buildElement(
|
|
127
|
+
from: child,
|
|
128
|
+
store: store,
|
|
129
|
+
depth: depth + 1,
|
|
130
|
+
maxDepth: maxDepth
|
|
131
|
+
) {
|
|
132
|
+
built.append(childElement)
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
childElements = built.isEmpty ? nil : built
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return PilotElement(
|
|
140
|
+
ref: ref,
|
|
141
|
+
role: role ?? "AXUnknown",
|
|
142
|
+
title: attrs.title,
|
|
143
|
+
value: attrs.value,
|
|
144
|
+
description: attrs.description,
|
|
145
|
+
enabled: attrs.enabled,
|
|
146
|
+
focused: attrs.focused,
|
|
147
|
+
bounds: bounds,
|
|
148
|
+
children: childElements
|
|
149
|
+
)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// MARK: - Batch Attribute Reading
|
|
153
|
+
|
|
154
|
+
/// Holds the parsed result of a batch attribute read.
|
|
155
|
+
private struct BatchResult {
|
|
156
|
+
let role: String?
|
|
157
|
+
let title: String?
|
|
158
|
+
let value: String?
|
|
159
|
+
let description: String?
|
|
160
|
+
let enabled: Bool
|
|
161
|
+
let focused: Bool
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/// Read all standard attributes in one batch call for performance.
|
|
165
|
+
private func readBatchAttributes(_ element: AXUIElement) -> BatchResult {
|
|
166
|
+
let values = bridge.getAttributes(element, Self.batchAttributes)
|
|
167
|
+
|
|
168
|
+
let role = values[0] as? String
|
|
169
|
+
let title = values[1] as? String
|
|
170
|
+
|
|
171
|
+
// Value needs special handling: could be String, NSNumber, etc.
|
|
172
|
+
let value: String? = {
|
|
173
|
+
guard let raw = values[2] else { return nil }
|
|
174
|
+
if let str = raw as? String { return str }
|
|
175
|
+
if let num = raw as? NSNumber { return num.stringValue }
|
|
176
|
+
return String(describing: raw)
|
|
177
|
+
}()
|
|
178
|
+
|
|
179
|
+
let description = values[3] as? String
|
|
180
|
+
let enabled = (values[4] as? Bool) ?? true
|
|
181
|
+
let focused = (values[5] as? Bool) ?? false
|
|
182
|
+
|
|
183
|
+
return BatchResult(
|
|
184
|
+
role: role,
|
|
185
|
+
title: title,
|
|
186
|
+
value: value,
|
|
187
|
+
description: description,
|
|
188
|
+
enabled: enabled,
|
|
189
|
+
focused: focused
|
|
190
|
+
)
|
|
191
|
+
}
|
|
192
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import ApplicationServices
|
|
2
|
+
import AppKit
|
|
3
|
+
import Foundation
|
|
4
|
+
|
|
5
|
+
// MARK: - Layer-local Element Cache
|
|
6
|
+
|
|
7
|
+
/// Lock-based element cache used by AccessibilityLayer.
|
|
8
|
+
/// Unlike the actor-based `ElementStore` in Core, this uses NSLock so it
|
|
9
|
+
/// can be called from synchronous `InteractionLayer` protocol methods
|
|
10
|
+
/// without crossing isolation boundaries.
|
|
11
|
+
private final class LayerElementCache: @unchecked Sendable {
|
|
12
|
+
|
|
13
|
+
private let lock = NSLock()
|
|
14
|
+
private var elements: [String: AXElementWrapper] = [:]
|
|
15
|
+
private var counter: Int = 0
|
|
16
|
+
|
|
17
|
+
/// Register an element and return its sequential ref.
|
|
18
|
+
func register(_ element: AXUIElement) -> String {
|
|
19
|
+
lock.lock()
|
|
20
|
+
defer { lock.unlock() }
|
|
21
|
+
counter += 1
|
|
22
|
+
let ref = "e\(counter)"
|
|
23
|
+
elements[ref] = AXElementWrapper(element)
|
|
24
|
+
return ref
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/// Look up a previously stored element by ref.
|
|
28
|
+
func lookup(_ ref: String) -> AXElementWrapper? {
|
|
29
|
+
lock.lock()
|
|
30
|
+
defer { lock.unlock() }
|
|
31
|
+
return elements[ref]
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/// Remove all stored elements (call before a fresh snapshot).
|
|
35
|
+
func clear() {
|
|
36
|
+
lock.lock()
|
|
37
|
+
defer { lock.unlock() }
|
|
38
|
+
elements.removeAll()
|
|
39
|
+
counter = 0
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/// Current number of stored elements.
|
|
43
|
+
func count() -> Int {
|
|
44
|
+
lock.lock()
|
|
45
|
+
defer { lock.unlock() }
|
|
46
|
+
return elements.count
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// MARK: - Accessibility Layer
|
|
51
|
+
|
|
52
|
+
/// Primary interaction layer using the macOS Accessibility API.
|
|
53
|
+
/// Walks AXUIElement trees via `AXBridge`, builds `PilotElement`
|
|
54
|
+
/// snapshots, and performs actions (click, type, read).
|
|
55
|
+
final class AccessibilityLayer: @unchecked Sendable, InteractionLayer {
|
|
56
|
+
|
|
57
|
+
let name: String = "Accessibility"
|
|
58
|
+
let priority: Int = 0
|
|
59
|
+
|
|
60
|
+
private let bridge: AXBridge
|
|
61
|
+
private let cache: LayerElementCache
|
|
62
|
+
|
|
63
|
+
init(bridge: AXBridge = AXBridge()) {
|
|
64
|
+
self.bridge = bridge
|
|
65
|
+
self.cache = LayerElementCache()
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// MARK: - InteractionLayer Conformance
|
|
69
|
+
|
|
70
|
+
func canHandle(bundleID: String?, appName: String) -> Bool {
|
|
71
|
+
// The accessibility layer can handle any app that exposes an AX tree.
|
|
72
|
+
// We optimistically return true; individual operations will fail
|
|
73
|
+
// gracefully if the app doesn't cooperate.
|
|
74
|
+
return true
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
func snapshot(pid: Int32, maxDepth: Int) throws -> [PilotElement] {
|
|
78
|
+
guard bridge.isAccessibilityEnabled() else {
|
|
79
|
+
throw PlatformError.permissionDenied
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
cache.clear()
|
|
83
|
+
|
|
84
|
+
let appEl = bridge.appElement(pid: pid)
|
|
85
|
+
let windows = bridge.getWindows(appEl)
|
|
86
|
+
let sources = windows.isEmpty ? bridge.getChildren(appEl) : windows
|
|
87
|
+
|
|
88
|
+
if sources.isEmpty {
|
|
89
|
+
throw LayerError.snapshotFailed(
|
|
90
|
+
pid: pid,
|
|
91
|
+
reason: "App has no windows or accessible children"
|
|
92
|
+
)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return sources.compactMap { buildElement($0, depth: 0, maxDepth: maxDepth) }
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
func click(ref: String) throws {
|
|
99
|
+
let wrapper = try resolveRef(ref)
|
|
100
|
+
let pressed = bridge.performAction(wrapper.element, kAXPressAction)
|
|
101
|
+
if !pressed {
|
|
102
|
+
throw PlatformError.actionFailed(
|
|
103
|
+
action: "press",
|
|
104
|
+
reason: "AXPress action failed for ref '\(ref)'"
|
|
105
|
+
)
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
func typeText(ref: String, text: String) throws {
|
|
110
|
+
let wrapper = try resolveRef(ref)
|
|
111
|
+
let element = wrapper.element
|
|
112
|
+
|
|
113
|
+
// Focus the element first
|
|
114
|
+
_ = bridge.setAttribute(element, kAXFocusedAttribute, kCFBooleanTrue)
|
|
115
|
+
|
|
116
|
+
// Try setting the value directly
|
|
117
|
+
let success = bridge.setAttribute(element, kAXValueAttribute, text as CFTypeRef)
|
|
118
|
+
if !success {
|
|
119
|
+
throw LayerError.typingFailed(
|
|
120
|
+
ref: ref,
|
|
121
|
+
reason: "Could not set AXValue on element -- it may not be an editable text field"
|
|
122
|
+
)
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
func readValue(ref: String) throws -> String? {
|
|
127
|
+
let wrapper = try resolveRef(ref)
|
|
128
|
+
return bridge.getValue(wrapper.element)
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// MARK: - Public Helpers
|
|
132
|
+
|
|
133
|
+
/// Look up a stored element wrapper by ref.
|
|
134
|
+
func findElement(ref: String) -> AXElementWrapper? {
|
|
135
|
+
return cache.lookup(ref)
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// MARK: - Private Helpers
|
|
139
|
+
|
|
140
|
+
/// Resolve a ref string to an AXElementWrapper, throwing if not found.
|
|
141
|
+
private func resolveRef(_ ref: String) throws -> AXElementWrapper {
|
|
142
|
+
guard let wrapper = cache.lookup(ref) else {
|
|
143
|
+
throw PlatformError.elementNotFound(ref: ref)
|
|
144
|
+
}
|
|
145
|
+
return wrapper
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/// Recursively build a PilotElement tree from an AXUIElement.
|
|
149
|
+
private func buildElement(
|
|
150
|
+
_ element: AXUIElement,
|
|
151
|
+
depth: Int,
|
|
152
|
+
maxDepth: Int
|
|
153
|
+
) -> PilotElement? {
|
|
154
|
+
guard let role = bridge.getRole(element) else { return nil }
|
|
155
|
+
|
|
156
|
+
// Skip explicitly unknown roles
|
|
157
|
+
if role == "AXUnknown" { return nil }
|
|
158
|
+
|
|
159
|
+
let ref = cache.register(element)
|
|
160
|
+
let title = bridge.getTitle(element)
|
|
161
|
+
let value = bridge.getValue(element)
|
|
162
|
+
let description = bridge.getDescription(element)
|
|
163
|
+
let enabled = bridge.isEnabled(element)
|
|
164
|
+
let focused = bridge.isFocused(element)
|
|
165
|
+
let bounds = bridge.getBounds(element)
|
|
166
|
+
|
|
167
|
+
var children: [PilotElement]?
|
|
168
|
+
if depth < maxDepth {
|
|
169
|
+
let axChildren = bridge.getChildren(element)
|
|
170
|
+
if !axChildren.isEmpty {
|
|
171
|
+
let built = axChildren.compactMap { child in
|
|
172
|
+
buildElement(child, depth: depth + 1, maxDepth: maxDepth)
|
|
173
|
+
}
|
|
174
|
+
children = built.isEmpty ? nil : built
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return PilotElement(
|
|
179
|
+
ref: ref,
|
|
180
|
+
role: role,
|
|
181
|
+
title: title,
|
|
182
|
+
value: value,
|
|
183
|
+
description: description,
|
|
184
|
+
enabled: enabled,
|
|
185
|
+
focused: focused,
|
|
186
|
+
bounds: bounds,
|
|
187
|
+
children: children
|
|
188
|
+
)
|
|
189
|
+
}
|
|
190
|
+
}
|