desktop-pilot-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/Package.swift +38 -0
- package/README.md +462 -0
- package/Sources/DesktopPilot/Core/AppRegistry.swift +102 -0
- package/Sources/DesktopPilot/Core/ElementStore.swift +59 -0
- package/Sources/DesktopPilot/Core/Router.swift +242 -0
- package/Sources/DesktopPilot/Core/Snapshot.swift +192 -0
- package/Sources/DesktopPilot/Layers/AccessibilityLayer.swift +190 -0
- package/Sources/DesktopPilot/Layers/AppleScriptLayer.swift +462 -0
- package/Sources/DesktopPilot/Layers/CGEventLayer.swift +318 -0
- package/Sources/DesktopPilot/Layers/LayerProtocol.swift +40 -0
- package/Sources/DesktopPilot/Layers/ScreenshotLayer.swift +122 -0
- package/Sources/DesktopPilot/MCP/Server.swift +536 -0
- package/Sources/DesktopPilot/MCP/Tools.swift +772 -0
- package/Sources/DesktopPilot/MCP/Types.swift +107 -0
- package/Sources/DesktopPilot/Platform/PlatformProtocol.swift +49 -0
- package/Sources/DesktopPilot/Platform/macOS/AXBridge.swift +232 -0
- package/Sources/DesktopPilot/Platform/macOS/Permissions.swift +34 -0
- package/Sources/DesktopPilot/Platform/macOS/SystemEvents.swift +323 -0
- package/Sources/DesktopPilot/main.swift +19 -0
- package/Sources/DesktopPilotCLI/main.swift +19 -0
- package/Tests/DesktopPilotTests/DesktopPilotTests.swift +290 -0
- package/bin/cli.js +61 -0
- package/package.json +52 -0
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
import CoreGraphics
|
|
2
|
+
import ApplicationServices
|
|
3
|
+
import Foundation
|
|
4
|
+
|
|
5
|
+
// MARK: - Virtual Key Codes
|
|
6
|
+
|
|
7
|
+
/// Common macOS virtual key codes for CGEvent keyboard events.
|
|
8
|
+
/// Based on Events.h / HIToolbox kVK constants.
|
|
9
|
+
enum VirtualKeyCode {
|
|
10
|
+
static let returnKey: CGKeyCode = 36
|
|
11
|
+
static let tab: CGKeyCode = 48
|
|
12
|
+
static let space: CGKeyCode = 49
|
|
13
|
+
static let delete: CGKeyCode = 51
|
|
14
|
+
static let escape: CGKeyCode = 53
|
|
15
|
+
static let forwardDelete: CGKeyCode = 117
|
|
16
|
+
|
|
17
|
+
// Arrow keys
|
|
18
|
+
static let leftArrow: CGKeyCode = 123
|
|
19
|
+
static let rightArrow: CGKeyCode = 124
|
|
20
|
+
static let downArrow: CGKeyCode = 125
|
|
21
|
+
static let upArrow: CGKeyCode = 126
|
|
22
|
+
|
|
23
|
+
// Modifier keys (for reference; typically used via CGEventFlags)
|
|
24
|
+
static let command: CGKeyCode = 55
|
|
25
|
+
static let shift: CGKeyCode = 56
|
|
26
|
+
static let option: CGKeyCode = 58
|
|
27
|
+
static let control: CGKeyCode = 59
|
|
28
|
+
static let capsLock: CGKeyCode = 57
|
|
29
|
+
|
|
30
|
+
// Function keys
|
|
31
|
+
static let f1: CGKeyCode = 122
|
|
32
|
+
static let f2: CGKeyCode = 120
|
|
33
|
+
static let f3: CGKeyCode = 99
|
|
34
|
+
static let f4: CGKeyCode = 118
|
|
35
|
+
static let f5: CGKeyCode = 96
|
|
36
|
+
static let f6: CGKeyCode = 97
|
|
37
|
+
static let f7: CGKeyCode = 98
|
|
38
|
+
static let f8: CGKeyCode = 100
|
|
39
|
+
static let f9: CGKeyCode = 101
|
|
40
|
+
static let f10: CGKeyCode = 109
|
|
41
|
+
static let f11: CGKeyCode = 103
|
|
42
|
+
static let f12: CGKeyCode = 111
|
|
43
|
+
|
|
44
|
+
// Navigation
|
|
45
|
+
static let home: CGKeyCode = 115
|
|
46
|
+
static let end: CGKeyCode = 119
|
|
47
|
+
static let pageUp: CGKeyCode = 116
|
|
48
|
+
static let pageDown: CGKeyCode = 121
|
|
49
|
+
|
|
50
|
+
// Common letter keys (virtual key codes for Cmd+key combos)
|
|
51
|
+
static let a: CGKeyCode = 0
|
|
52
|
+
static let c: CGKeyCode = 8
|
|
53
|
+
static let v: CGKeyCode = 9
|
|
54
|
+
static let x: CGKeyCode = 7
|
|
55
|
+
static let z: CGKeyCode = 6
|
|
56
|
+
static let s: CGKeyCode = 1
|
|
57
|
+
static let w: CGKeyCode = 13
|
|
58
|
+
static let q: CGKeyCode = 12
|
|
59
|
+
static let n: CGKeyCode = 45
|
|
60
|
+
static let t: CGKeyCode = 17
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// MARK: - CGEventLayer
|
|
64
|
+
|
|
65
|
+
/// Ultra-fast input injection via CGEvent.
|
|
66
|
+
/// Used for keyboard input and coordinate-based clicking when other methods fail.
|
|
67
|
+
///
|
|
68
|
+
/// CGEvent provides 1-5ms latency for mouse and keyboard events, making it
|
|
69
|
+
/// the fastest way to simulate user input on macOS. Unlike the Accessibility
|
|
70
|
+
/// layer, CGEvent cannot read UI state -- it can only inject input events.
|
|
71
|
+
final class CGEventLayer: @unchecked Sendable {
|
|
72
|
+
|
|
73
|
+
private let bridge: AXBridge
|
|
74
|
+
private let store: ElementStore
|
|
75
|
+
|
|
76
|
+
init(bridge: AXBridge, store: ElementStore) {
|
|
77
|
+
self.bridge = bridge
|
|
78
|
+
self.store = store
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// MARK: - Mouse Events
|
|
82
|
+
|
|
83
|
+
/// Click at screen coordinates.
|
|
84
|
+
func clickAt(x: Double, y: Double) {
|
|
85
|
+
let point = CGPoint(x: x, y: y)
|
|
86
|
+
|
|
87
|
+
let mouseDown = CGEvent(
|
|
88
|
+
mouseEventSource: nil,
|
|
89
|
+
mouseType: .leftMouseDown,
|
|
90
|
+
mouseCursorPosition: point,
|
|
91
|
+
mouseButton: .left
|
|
92
|
+
)
|
|
93
|
+
let mouseUp = CGEvent(
|
|
94
|
+
mouseEventSource: nil,
|
|
95
|
+
mouseType: .leftMouseUp,
|
|
96
|
+
mouseCursorPosition: point,
|
|
97
|
+
mouseButton: .left
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
mouseDown?.post(tap: .cghidEventTap)
|
|
101
|
+
mouseUp?.post(tap: .cghidEventTap)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/// Click at the center of an element's bounding rectangle.
|
|
105
|
+
func clickElement(bounds: ElementBounds) {
|
|
106
|
+
let centerX = bounds.x + bounds.width / 2.0
|
|
107
|
+
let centerY = bounds.y + bounds.height / 2.0
|
|
108
|
+
clickAt(x: centerX, y: centerY)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/// Double-click at screen coordinates.
|
|
112
|
+
func doubleClickAt(x: Double, y: Double) {
|
|
113
|
+
let point = CGPoint(x: x, y: y)
|
|
114
|
+
|
|
115
|
+
for _ in 0..<2 {
|
|
116
|
+
let mouseDown = CGEvent(
|
|
117
|
+
mouseEventSource: nil,
|
|
118
|
+
mouseType: .leftMouseDown,
|
|
119
|
+
mouseCursorPosition: point,
|
|
120
|
+
mouseButton: .left
|
|
121
|
+
)
|
|
122
|
+
let mouseUp = CGEvent(
|
|
123
|
+
mouseEventSource: nil,
|
|
124
|
+
mouseType: .leftMouseUp,
|
|
125
|
+
mouseCursorPosition: point,
|
|
126
|
+
mouseButton: .left
|
|
127
|
+
)
|
|
128
|
+
mouseDown?.setIntegerValueField(.mouseEventClickState, value: 2)
|
|
129
|
+
mouseUp?.setIntegerValueField(.mouseEventClickState, value: 2)
|
|
130
|
+
mouseDown?.post(tap: .cghidEventTap)
|
|
131
|
+
mouseUp?.post(tap: .cghidEventTap)
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/// Right-click at screen coordinates.
|
|
136
|
+
func rightClickAt(x: Double, y: Double) {
|
|
137
|
+
let point = CGPoint(x: x, y: y)
|
|
138
|
+
|
|
139
|
+
let mouseDown = CGEvent(
|
|
140
|
+
mouseEventSource: nil,
|
|
141
|
+
mouseType: .rightMouseDown,
|
|
142
|
+
mouseCursorPosition: point,
|
|
143
|
+
mouseButton: .right
|
|
144
|
+
)
|
|
145
|
+
let mouseUp = CGEvent(
|
|
146
|
+
mouseEventSource: nil,
|
|
147
|
+
mouseType: .rightMouseUp,
|
|
148
|
+
mouseCursorPosition: point,
|
|
149
|
+
mouseButton: .right
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
mouseDown?.post(tap: .cghidEventTap)
|
|
153
|
+
mouseUp?.post(tap: .cghidEventTap)
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/// Move mouse to coordinates.
|
|
157
|
+
func moveTo(x: Double, y: Double) {
|
|
158
|
+
let point = CGPoint(x: x, y: y)
|
|
159
|
+
let moveEvent = CGEvent(
|
|
160
|
+
mouseEventSource: nil,
|
|
161
|
+
mouseType: .mouseMoved,
|
|
162
|
+
mouseCursorPosition: point,
|
|
163
|
+
mouseButton: .left
|
|
164
|
+
)
|
|
165
|
+
moveEvent?.post(tap: .cghidEventTap)
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/// Drag from one point to another using left mouse button.
|
|
169
|
+
func drag(from start: CGPoint, to end: CGPoint) {
|
|
170
|
+
// Press down at start
|
|
171
|
+
let mouseDown = CGEvent(
|
|
172
|
+
mouseEventSource: nil,
|
|
173
|
+
mouseType: .leftMouseDown,
|
|
174
|
+
mouseCursorPosition: start,
|
|
175
|
+
mouseButton: .left
|
|
176
|
+
)
|
|
177
|
+
mouseDown?.post(tap: .cghidEventTap)
|
|
178
|
+
|
|
179
|
+
// Drag to end point
|
|
180
|
+
let dragEvent = CGEvent(
|
|
181
|
+
mouseEventSource: nil,
|
|
182
|
+
mouseType: .leftMouseDragged,
|
|
183
|
+
mouseCursorPosition: end,
|
|
184
|
+
mouseButton: .left
|
|
185
|
+
)
|
|
186
|
+
dragEvent?.post(tap: .cghidEventTap)
|
|
187
|
+
|
|
188
|
+
// Release at end
|
|
189
|
+
let mouseUp = CGEvent(
|
|
190
|
+
mouseEventSource: nil,
|
|
191
|
+
mouseType: .leftMouseUp,
|
|
192
|
+
mouseCursorPosition: end,
|
|
193
|
+
mouseButton: .left
|
|
194
|
+
)
|
|
195
|
+
mouseUp?.post(tap: .cghidEventTap)
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// MARK: - Keyboard Events
|
|
199
|
+
|
|
200
|
+
/// Type a string character by character using CGEvent keyboard events.
|
|
201
|
+
/// This is the fastest and most reliable typing method.
|
|
202
|
+
func typeString(_ text: String) {
|
|
203
|
+
let source = CGEventSource(stateID: .hidSystemState)
|
|
204
|
+
|
|
205
|
+
for char in text {
|
|
206
|
+
let str = String(char)
|
|
207
|
+
for scalar in str.unicodeScalars {
|
|
208
|
+
var unichar = UniChar(scalar.value)
|
|
209
|
+
let keyDown = CGEvent(keyboardEventSource: source, virtualKey: 0, keyDown: true)
|
|
210
|
+
let keyUp = CGEvent(keyboardEventSource: source, virtualKey: 0, keyDown: false)
|
|
211
|
+
|
|
212
|
+
keyDown?.keyboardSetUnicodeString(stringLength: 1, unicodeString: &unichar)
|
|
213
|
+
keyUp?.keyboardSetUnicodeString(stringLength: 1, unicodeString: &unichar)
|
|
214
|
+
|
|
215
|
+
keyDown?.post(tap: .cghidEventTap)
|
|
216
|
+
keyUp?.post(tap: .cghidEventTap)
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/// Press a virtual key with optional modifiers.
|
|
222
|
+
func pressKey(virtualKey: CGKeyCode, flags: CGEventFlags = []) {
|
|
223
|
+
let source = CGEventSource(stateID: .hidSystemState)
|
|
224
|
+
let keyDown = CGEvent(keyboardEventSource: source, virtualKey: virtualKey, keyDown: true)
|
|
225
|
+
let keyUp = CGEvent(keyboardEventSource: source, virtualKey: virtualKey, keyDown: false)
|
|
226
|
+
|
|
227
|
+
keyDown?.flags = flags
|
|
228
|
+
keyUp?.flags = flags
|
|
229
|
+
|
|
230
|
+
keyDown?.post(tap: .cghidEventTap)
|
|
231
|
+
keyUp?.post(tap: .cghidEventTap)
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/// Press Return/Enter.
|
|
235
|
+
func pressReturn() { pressKey(virtualKey: VirtualKeyCode.returnKey) }
|
|
236
|
+
|
|
237
|
+
/// Press Tab.
|
|
238
|
+
func pressTab() { pressKey(virtualKey: VirtualKeyCode.tab) }
|
|
239
|
+
|
|
240
|
+
/// Press Escape.
|
|
241
|
+
func pressEscape() { pressKey(virtualKey: VirtualKeyCode.escape) }
|
|
242
|
+
|
|
243
|
+
/// Press Delete/Backspace.
|
|
244
|
+
func pressDelete() { pressKey(virtualKey: VirtualKeyCode.delete) }
|
|
245
|
+
|
|
246
|
+
/// Cmd+A (Select All).
|
|
247
|
+
func selectAll() { pressKey(virtualKey: VirtualKeyCode.a, flags: .maskCommand) }
|
|
248
|
+
|
|
249
|
+
/// Cmd+C (Copy).
|
|
250
|
+
func copy() { pressKey(virtualKey: VirtualKeyCode.c, flags: .maskCommand) }
|
|
251
|
+
|
|
252
|
+
/// Cmd+V (Paste).
|
|
253
|
+
func paste() { pressKey(virtualKey: VirtualKeyCode.v, flags: .maskCommand) }
|
|
254
|
+
|
|
255
|
+
/// Cmd+X (Cut).
|
|
256
|
+
func cut() { pressKey(virtualKey: VirtualKeyCode.x, flags: .maskCommand) }
|
|
257
|
+
|
|
258
|
+
/// Cmd+Z (Undo).
|
|
259
|
+
func undo() { pressKey(virtualKey: VirtualKeyCode.z, flags: .maskCommand) }
|
|
260
|
+
|
|
261
|
+
/// Cmd+Shift+Z (Redo).
|
|
262
|
+
func redo() { pressKey(virtualKey: VirtualKeyCode.z, flags: [.maskCommand, .maskShift]) }
|
|
263
|
+
|
|
264
|
+
// MARK: - Scroll
|
|
265
|
+
|
|
266
|
+
/// Scroll at current mouse position.
|
|
267
|
+
func scroll(deltaY: Int32, deltaX: Int32 = 0) {
|
|
268
|
+
let event = CGEvent(
|
|
269
|
+
scrollWheelEvent2Source: nil,
|
|
270
|
+
units: .pixel,
|
|
271
|
+
wheelCount: 2,
|
|
272
|
+
wheel1: deltaY,
|
|
273
|
+
wheel2: deltaX,
|
|
274
|
+
wheel3: 0
|
|
275
|
+
)
|
|
276
|
+
event?.post(tap: .cghidEventTap)
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// MARK: - InteractionLayer Conformance
|
|
281
|
+
|
|
282
|
+
extension CGEventLayer: InteractionLayer {
|
|
283
|
+
var name: String { "CGEvent" }
|
|
284
|
+
var priority: Int { 40 }
|
|
285
|
+
|
|
286
|
+
func canHandle(bundleID: String?, appName: String) -> Bool {
|
|
287
|
+
// CGEvent works with any app for input injection
|
|
288
|
+
return true
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
func snapshot(pid: Int32, maxDepth: Int) throws -> [PilotElement] {
|
|
292
|
+
throw LayerError.notSupported(
|
|
293
|
+
layer: name,
|
|
294
|
+
reason: "CGEvent cannot read UI state. Use Accessibility layer for snapshots."
|
|
295
|
+
)
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
func click(ref: String) throws {
|
|
299
|
+
// CGEvent click requires screen coordinates, not an element ref.
|
|
300
|
+
// Use clickAt(x:y:) or clickElement(bounds:) directly instead.
|
|
301
|
+
throw LayerError.notSupported(
|
|
302
|
+
layer: name,
|
|
303
|
+
reason: "Use clickAt(x:y:) directly with coordinates from element bounds."
|
|
304
|
+
)
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
func typeText(ref: String, text: String) throws {
|
|
308
|
+
// CGEvent typing doesn't target a specific element -- it types to whatever is focused
|
|
309
|
+
typeString(text)
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
func readValue(ref: String) throws -> String? {
|
|
313
|
+
throw LayerError.notSupported(
|
|
314
|
+
layer: name,
|
|
315
|
+
reason: "CGEvent cannot read UI state. Use Accessibility layer for reading."
|
|
316
|
+
)
|
|
317
|
+
}
|
|
318
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
// MARK: - Layer Errors
|
|
4
|
+
|
|
5
|
+
/// Errors specific to interaction layers.
|
|
6
|
+
enum LayerError: Error, Sendable {
|
|
7
|
+
case notSupported(layer: String, reason: String)
|
|
8
|
+
case elementNotInteractable(ref: String)
|
|
9
|
+
case typingFailed(ref: String, reason: String)
|
|
10
|
+
case readFailed(ref: String, reason: String)
|
|
11
|
+
case snapshotFailed(pid: Int32, reason: String)
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
// MARK: - Interaction Layer Protocol
|
|
15
|
+
|
|
16
|
+
/// An interaction method (accessibility, applescript, cgevent, screenshot).
|
|
17
|
+
/// The Router picks the highest-priority layer that can handle a given app.
|
|
18
|
+
protocol InteractionLayer: Sendable {
|
|
19
|
+
|
|
20
|
+
/// Human-readable name of this layer (e.g. "Accessibility", "AppleScript").
|
|
21
|
+
var name: String { get }
|
|
22
|
+
|
|
23
|
+
/// Priority (lower number = preferred). The Router tries layers in order.
|
|
24
|
+
var priority: Int { get }
|
|
25
|
+
|
|
26
|
+
/// Can this layer handle the given app?
|
|
27
|
+
func canHandle(bundleID: String?, appName: String) -> Bool
|
|
28
|
+
|
|
29
|
+
/// Capture a UI snapshot tree for the given app.
|
|
30
|
+
func snapshot(pid: Int32, maxDepth: Int) throws -> [PilotElement]
|
|
31
|
+
|
|
32
|
+
/// Click an element identified by ref.
|
|
33
|
+
func click(ref: String) throws
|
|
34
|
+
|
|
35
|
+
/// Type text into an element identified by ref.
|
|
36
|
+
func typeText(ref: String, text: String) throws
|
|
37
|
+
|
|
38
|
+
/// Read the current value of an element identified by ref.
|
|
39
|
+
func readValue(ref: String) throws -> String?
|
|
40
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import CoreGraphics
|
|
2
|
+
import AppKit
|
|
3
|
+
import Foundation
|
|
4
|
+
|
|
5
|
+
/// Screenshot fallback layer for custom-rendered content.
|
|
6
|
+
/// Used when Accessibility API can't see UI elements (game viewports, canvas, etc.).
|
|
7
|
+
final class ScreenshotLayer: @unchecked Sendable {
|
|
8
|
+
|
|
9
|
+
private let bridge: AXBridge
|
|
10
|
+
private let store: ElementStore
|
|
11
|
+
|
|
12
|
+
init(bridge: AXBridge, store: ElementStore) {
|
|
13
|
+
self.bridge = bridge
|
|
14
|
+
self.store = store
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// MARK: - Full Screen Screenshot
|
|
18
|
+
|
|
19
|
+
/// Capture the entire main display.
|
|
20
|
+
func captureFullScreen() -> Data? {
|
|
21
|
+
guard let image = CGDisplayCreateImage(CGMainDisplayID()) else { return nil }
|
|
22
|
+
return pngData(from: image)
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// MARK: - Region Screenshot
|
|
26
|
+
|
|
27
|
+
/// Capture a specific screen region.
|
|
28
|
+
func captureRegion(x: Double, y: Double, width: Double, height: Double) -> Data? {
|
|
29
|
+
let rect = CGRect(x: x, y: y, width: width, height: height)
|
|
30
|
+
guard let image = CGDisplayCreateImage(CGMainDisplayID(), rect: rect) else { return nil }
|
|
31
|
+
return pngData(from: image)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// MARK: - Element Screenshot
|
|
35
|
+
|
|
36
|
+
/// Capture the bounds of a specific element (resolved from ref via AXBridge).
|
|
37
|
+
func captureElement(bounds: ElementBounds) -> Data? {
|
|
38
|
+
return captureRegion(
|
|
39
|
+
x: bounds.x, y: bounds.y,
|
|
40
|
+
width: bounds.width, height: bounds.height
|
|
41
|
+
)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// MARK: - Window Screenshot
|
|
45
|
+
|
|
46
|
+
/// Capture a specific window by its windowID.
|
|
47
|
+
func captureWindow(windowID: CGWindowID) -> Data? {
|
|
48
|
+
let image = CGWindowListCreateImage(
|
|
49
|
+
.null,
|
|
50
|
+
.optionIncludingWindow,
|
|
51
|
+
windowID,
|
|
52
|
+
[.boundsIgnoreFraming, .bestResolution]
|
|
53
|
+
)
|
|
54
|
+
guard let image else { return nil }
|
|
55
|
+
return pngData(from: image)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// MARK: - PNG Encoding
|
|
59
|
+
|
|
60
|
+
private func pngData(from cgImage: CGImage) -> Data? {
|
|
61
|
+
let bitmapRep = NSBitmapImageRep(cgImage: cgImage)
|
|
62
|
+
return bitmapRep.representation(using: .png, properties: [:])
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// MARK: - Base64 Encoding
|
|
66
|
+
|
|
67
|
+
/// Capture and return as base64 string (for MCP image responses).
|
|
68
|
+
func captureFullScreenBase64() -> String? {
|
|
69
|
+
guard let data = captureFullScreen() else { return nil }
|
|
70
|
+
return data.base64EncodedString()
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
func captureRegionBase64(x: Double, y: Double, width: Double, height: Double) -> String? {
|
|
74
|
+
guard let data = captureRegion(x: x, y: y, width: width, height: height) else { return nil }
|
|
75
|
+
return data.base64EncodedString()
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
func captureElementBase64(bounds: ElementBounds) -> String? {
|
|
79
|
+
guard let data = captureElement(bounds: bounds) else { return nil }
|
|
80
|
+
return data.base64EncodedString()
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// MARK: - InteractionLayer Conformance
|
|
85
|
+
|
|
86
|
+
extension ScreenshotLayer: InteractionLayer {
|
|
87
|
+
var name: String { "Screenshot" }
|
|
88
|
+
var priority: Int { 50 } // Lowest priority -- last resort
|
|
89
|
+
|
|
90
|
+
func canHandle(bundleID: String?, appName: String) -> Bool {
|
|
91
|
+
return true // Can always take screenshots
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
func snapshot(pid: Int32, maxDepth: Int) throws -> [PilotElement] {
|
|
95
|
+
// Screenshots can't provide structured UI trees
|
|
96
|
+
throw LayerError.notSupported(
|
|
97
|
+
layer: name,
|
|
98
|
+
reason: "Screenshot layer cannot provide structured snapshots. Use pilot_screenshot tool instead."
|
|
99
|
+
)
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
func click(ref: String) throws {
|
|
103
|
+
throw LayerError.notSupported(
|
|
104
|
+
layer: name,
|
|
105
|
+
reason: "Screenshot layer cannot click elements. Use coordinate-based clicking via CGEvent."
|
|
106
|
+
)
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
func typeText(ref: String, text: String) throws {
|
|
110
|
+
throw LayerError.notSupported(
|
|
111
|
+
layer: name,
|
|
112
|
+
reason: "Screenshot layer cannot type. Use CGEvent or Accessibility layer."
|
|
113
|
+
)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
func readValue(ref: String) throws -> String? {
|
|
117
|
+
throw LayerError.notSupported(
|
|
118
|
+
layer: name,
|
|
119
|
+
reason: "Screenshot layer cannot read values. Use Accessibility layer."
|
|
120
|
+
)
|
|
121
|
+
}
|
|
122
|
+
}
|