agent-device 0.11.0 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/1~rslib-runtime.js +1 -0
- package/dist/src/916.js +3 -0
- package/dist/src/bin.js +58 -64
- package/dist/src/daemon.js +41 -40
- package/dist/src/index.d.ts +19 -0
- package/dist/src/index.js +3 -3
- package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift +14 -3
- package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift +10 -0
- package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift +1 -0
- package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Transport.swift +19 -7
- package/macos-helper/Sources/AgentDeviceMacOSHelper/SnapshotTraversal.swift +235 -34
- package/macos-helper/Sources/AgentDeviceMacOSHelper/main.swift +121 -2
- package/package.json +14 -14
- package/skills/agent-device/references/exploration.md +13 -0
- package/skills/agent-device/references/macos-desktop.md +3 -2
- package/skills/agent-device/references/verification.md +3 -1
- package/dist/src/36.js +0 -3
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import AppKit
|
|
2
2
|
import ApplicationServices
|
|
3
|
+
import CoreGraphics
|
|
3
4
|
import Foundation
|
|
4
5
|
|
|
5
6
|
private enum SnapshotTraversalLimits {
|
|
6
7
|
static let maxDesktopApps = 24
|
|
7
8
|
static let maxNodes = 1500
|
|
8
9
|
static let maxDepth = 12
|
|
10
|
+
static let maxMenuBarBandY = 64.0
|
|
11
|
+
static let maxMenuBarBandHeight = 64.0
|
|
12
|
+
static let maxMenuBarExtraWidth = 256.0
|
|
9
13
|
}
|
|
10
14
|
|
|
11
15
|
struct RectResponse: Encodable {
|
|
@@ -62,7 +66,17 @@ private struct SnapshotTraversalState {
|
|
|
62
66
|
var truncated = false
|
|
63
67
|
}
|
|
64
68
|
|
|
65
|
-
|
|
69
|
+
private struct MenuBarWindowFallbackCandidate {
|
|
70
|
+
let windowNumber: Int
|
|
71
|
+
let rect: RectResponse
|
|
72
|
+
let layer: Int
|
|
73
|
+
|
|
74
|
+
var area: Double {
|
|
75
|
+
rect.width * rect.height
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
func captureSnapshotResponse(surface: String, bundleId: String? = nil) throws -> SnapshotResponse {
|
|
66
80
|
let result: SnapshotBuildResult
|
|
67
81
|
switch surface {
|
|
68
82
|
case "frontmost-app":
|
|
@@ -70,7 +84,7 @@ func captureSnapshotResponse(surface: String) throws -> SnapshotResponse {
|
|
|
70
84
|
case "desktop":
|
|
71
85
|
result = snapshotDesktop()
|
|
72
86
|
case "menubar":
|
|
73
|
-
result = snapshotMenuBar()
|
|
87
|
+
result = try snapshotMenuBar(bundleId: bundleId)
|
|
74
88
|
default:
|
|
75
89
|
throw HelperError.invalidArgs("snapshot requires --surface <frontmost-app|desktop|menubar>")
|
|
76
90
|
}
|
|
@@ -200,8 +214,9 @@ private func appendApplicationSnapshot(
|
|
|
200
214
|
return true
|
|
201
215
|
}
|
|
202
216
|
|
|
203
|
-
private func snapshotMenuBar() -> SnapshotBuildResult {
|
|
217
|
+
private func snapshotMenuBar(bundleId: String?) throws -> SnapshotBuildResult {
|
|
204
218
|
var state = SnapshotTraversalState()
|
|
219
|
+
let screenRect = mainScreenRectResponse()
|
|
205
220
|
guard
|
|
206
221
|
let rootIndex = appendSyntheticSnapshotNode(
|
|
207
222
|
into: &state,
|
|
@@ -209,29 +224,54 @@ private func snapshotMenuBar() -> SnapshotBuildResult {
|
|
|
209
224
|
label: "Menu Bar",
|
|
210
225
|
depth: 0,
|
|
211
226
|
parentIndex: nil,
|
|
212
|
-
surface: "menubar"
|
|
227
|
+
surface: "menubar",
|
|
228
|
+
rect: screenRect
|
|
213
229
|
)
|
|
214
230
|
else {
|
|
215
231
|
return SnapshotBuildResult(nodes: state.nodes, truncated: true)
|
|
216
232
|
}
|
|
217
233
|
|
|
218
|
-
if let
|
|
219
|
-
let
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
234
|
+
if let bundleId {
|
|
235
|
+
let targetApp = try resolveTargetApplication(bundleId: bundleId, surface: nil)
|
|
236
|
+
let appendedExtras = appendMenuBarSnapshot(
|
|
237
|
+
targetApp,
|
|
238
|
+
attribute: kAXExtrasMenuBarAttribute as String,
|
|
239
|
+
depth: 1,
|
|
240
|
+
parentIndex: rootIndex,
|
|
241
|
+
surface: "menubar",
|
|
242
|
+
state: &state
|
|
243
|
+
)
|
|
244
|
+
if !appendedExtras {
|
|
245
|
+
let appendedMenuBar = appendMenuBarSnapshot(
|
|
246
|
+
targetApp,
|
|
247
|
+
attribute: kAXMenuBarAttribute as String,
|
|
223
248
|
depth: 1,
|
|
224
249
|
parentIndex: rootIndex,
|
|
225
|
-
|
|
226
|
-
surface: "menubar",
|
|
227
|
-
pid: Int32(frontmost.processIdentifier),
|
|
228
|
-
bundleId: frontmost.bundleIdentifier,
|
|
229
|
-
appName: frontmost.localizedName,
|
|
230
|
-
windowTitle: frontmost.localizedName
|
|
231
|
-
),
|
|
250
|
+
surface: "menubar",
|
|
232
251
|
state: &state
|
|
233
252
|
)
|
|
253
|
+
if !appendedMenuBar {
|
|
254
|
+
_ = appendMenuBarWindowFallback(
|
|
255
|
+
targetApp,
|
|
256
|
+
depth: 1,
|
|
257
|
+
parentIndex: rootIndex,
|
|
258
|
+
surface: "menubar",
|
|
259
|
+
state: &state
|
|
260
|
+
)
|
|
261
|
+
}
|
|
234
262
|
}
|
|
263
|
+
return SnapshotBuildResult(nodes: state.nodes, truncated: state.truncated)
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
if let frontmost = NSWorkspace.shared.frontmostApplication {
|
|
267
|
+
_ = appendMenuBarSnapshot(
|
|
268
|
+
frontmost,
|
|
269
|
+
attribute: kAXMenuBarAttribute as String,
|
|
270
|
+
depth: 1,
|
|
271
|
+
parentIndex: rootIndex,
|
|
272
|
+
surface: "menubar",
|
|
273
|
+
state: &state
|
|
274
|
+
)
|
|
235
275
|
}
|
|
236
276
|
|
|
237
277
|
if !state.truncated,
|
|
@@ -239,20 +279,24 @@ private func snapshotMenuBar() -> SnapshotBuildResult {
|
|
|
239
279
|
withBundleIdentifier: "com.apple.systemuiserver"
|
|
240
280
|
).first
|
|
241
281
|
{
|
|
242
|
-
let
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
282
|
+
let appendedExtras = appendMenuBarSnapshot(
|
|
283
|
+
systemUiServer,
|
|
284
|
+
attribute: kAXExtrasMenuBarAttribute as String,
|
|
285
|
+
depth: 1,
|
|
286
|
+
parentIndex: rootIndex,
|
|
287
|
+
surface: "menubar",
|
|
288
|
+
state: &state,
|
|
289
|
+
windowTitle: "System Menu Extras"
|
|
290
|
+
)
|
|
291
|
+
if !appendedExtras {
|
|
292
|
+
_ = appendMenuBarSnapshot(
|
|
293
|
+
systemUiServer,
|
|
294
|
+
attribute: kAXMenuBarAttribute as String,
|
|
246
295
|
depth: 1,
|
|
247
296
|
parentIndex: rootIndex,
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
bundleId: systemUiServer.bundleIdentifier,
|
|
252
|
-
appName: systemUiServer.localizedName,
|
|
253
|
-
windowTitle: "System Menu Extras"
|
|
254
|
-
),
|
|
255
|
-
state: &state
|
|
297
|
+
surface: "menubar",
|
|
298
|
+
state: &state,
|
|
299
|
+
windowTitle: "System Menu Extras"
|
|
256
300
|
)
|
|
257
301
|
}
|
|
258
302
|
}
|
|
@@ -260,6 +304,67 @@ private func snapshotMenuBar() -> SnapshotBuildResult {
|
|
|
260
304
|
return SnapshotBuildResult(nodes: state.nodes, truncated: state.truncated)
|
|
261
305
|
}
|
|
262
306
|
|
|
307
|
+
@discardableResult
|
|
308
|
+
private func appendMenuBarSnapshot(
|
|
309
|
+
_ app: NSRunningApplication,
|
|
310
|
+
attribute: String,
|
|
311
|
+
depth: Int,
|
|
312
|
+
parentIndex: Int,
|
|
313
|
+
surface: String,
|
|
314
|
+
state: inout SnapshotTraversalState,
|
|
315
|
+
windowTitle: String? = nil
|
|
316
|
+
) -> Bool {
|
|
317
|
+
let appElement = AXUIElementCreateApplication(app.processIdentifier)
|
|
318
|
+
guard let menuBar = elementAttribute(appElement, attribute: attribute) else {
|
|
319
|
+
return false
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
let nodeCountBefore = state.nodes.count
|
|
323
|
+
_ = appendElementSnapshot(
|
|
324
|
+
menuBar,
|
|
325
|
+
depth: depth,
|
|
326
|
+
parentIndex: parentIndex,
|
|
327
|
+
context: SnapshotContext(
|
|
328
|
+
surface: surface,
|
|
329
|
+
pid: Int32(app.processIdentifier),
|
|
330
|
+
bundleId: app.bundleIdentifier,
|
|
331
|
+
appName: app.localizedName,
|
|
332
|
+
windowTitle: windowTitle ?? app.localizedName
|
|
333
|
+
),
|
|
334
|
+
state: &state
|
|
335
|
+
)
|
|
336
|
+
return state.nodes.count > nodeCountBefore
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
@discardableResult
|
|
340
|
+
private func appendMenuBarWindowFallback(
|
|
341
|
+
_ app: NSRunningApplication,
|
|
342
|
+
depth: Int,
|
|
343
|
+
parentIndex: Int,
|
|
344
|
+
surface: String,
|
|
345
|
+
state: inout SnapshotTraversalState
|
|
346
|
+
) -> Bool {
|
|
347
|
+
guard let candidate = menuBarWindowFallbackCandidate(for: app) else {
|
|
348
|
+
return false
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
return appendSyntheticSnapshotNode(
|
|
352
|
+
into: &state,
|
|
353
|
+
type: "MenuBarItem",
|
|
354
|
+
label: app.localizedName ?? app.bundleIdentifier ?? "Menu Bar Item",
|
|
355
|
+
depth: depth,
|
|
356
|
+
parentIndex: parentIndex,
|
|
357
|
+
surface: surface,
|
|
358
|
+
identifier: "cgwindow:\(candidate.windowNumber)",
|
|
359
|
+
pid: Int32(app.processIdentifier),
|
|
360
|
+
bundleId: app.bundleIdentifier,
|
|
361
|
+
appName: app.localizedName,
|
|
362
|
+
windowTitle: app.localizedName,
|
|
363
|
+
rect: candidate.rect,
|
|
364
|
+
hittable: true
|
|
365
|
+
) != nil
|
|
366
|
+
}
|
|
367
|
+
|
|
263
368
|
@discardableResult
|
|
264
369
|
private func appendSyntheticSnapshotNode(
|
|
265
370
|
into state: inout SnapshotTraversalState,
|
|
@@ -272,7 +377,9 @@ private func appendSyntheticSnapshotNode(
|
|
|
272
377
|
pid: Int32? = nil,
|
|
273
378
|
bundleId: String? = nil,
|
|
274
379
|
appName: String? = nil,
|
|
275
|
-
windowTitle: String? = nil
|
|
380
|
+
windowTitle: String? = nil,
|
|
381
|
+
rect: RectResponse? = nil,
|
|
382
|
+
hittable: Bool = false
|
|
276
383
|
) -> Int? {
|
|
277
384
|
guard reserveSnapshotNodeCapacity(&state) else {
|
|
278
385
|
return nil
|
|
@@ -288,10 +395,10 @@ private func appendSyntheticSnapshotNode(
|
|
|
288
395
|
label: label,
|
|
289
396
|
value: nil,
|
|
290
397
|
identifier: identifier ?? "surface:\(surface):\(type.lowercased())",
|
|
291
|
-
rect:
|
|
398
|
+
rect: rect,
|
|
292
399
|
enabled: true,
|
|
293
400
|
selected: nil,
|
|
294
|
-
hittable:
|
|
401
|
+
hittable: hittable && rect != nil,
|
|
295
402
|
depth: depth,
|
|
296
403
|
parentIndex: parentIndex,
|
|
297
404
|
pid: pid,
|
|
@@ -304,6 +411,83 @@ private func appendSyntheticSnapshotNode(
|
|
|
304
411
|
return index
|
|
305
412
|
}
|
|
306
413
|
|
|
414
|
+
private func mainScreenRectResponse() -> RectResponse? {
|
|
415
|
+
guard let screenFrame = NSScreen.main?.frame, screenFrame.width > 0, screenFrame.height > 0 else {
|
|
416
|
+
return nil
|
|
417
|
+
}
|
|
418
|
+
return RectResponse(
|
|
419
|
+
x: Double(screenFrame.origin.x),
|
|
420
|
+
y: Double(screenFrame.origin.y),
|
|
421
|
+
width: Double(screenFrame.width),
|
|
422
|
+
height: Double(screenFrame.height)
|
|
423
|
+
)
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
private func menuBarWindowFallbackCandidate(
|
|
427
|
+
for app: NSRunningApplication
|
|
428
|
+
) -> MenuBarWindowFallbackCandidate? {
|
|
429
|
+
guard
|
|
430
|
+
let windowInfoList = CGWindowListCopyWindowInfo([.optionAll], kCGNullWindowID)
|
|
431
|
+
as? [[String: Any]]
|
|
432
|
+
else {
|
|
433
|
+
return nil
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
let pid = Int(app.processIdentifier)
|
|
437
|
+
let allCandidates = windowInfoList.compactMap { info -> MenuBarWindowFallbackCandidate? in
|
|
438
|
+
let ownerPid = (info[kCGWindowOwnerPID as String] as? NSNumber)?.intValue
|
|
439
|
+
guard ownerPid == pid else {
|
|
440
|
+
return nil
|
|
441
|
+
}
|
|
442
|
+
guard let boundsDictionary = info[kCGWindowBounds as String] as? NSDictionary,
|
|
443
|
+
let bounds = CGRect(dictionaryRepresentation: boundsDictionary)
|
|
444
|
+
else {
|
|
445
|
+
return nil
|
|
446
|
+
}
|
|
447
|
+
guard bounds.width > 0, bounds.height > 0 else {
|
|
448
|
+
return nil
|
|
449
|
+
}
|
|
450
|
+
let alpha = (info[kCGWindowAlpha as String] as? NSNumber)?.doubleValue ?? 1
|
|
451
|
+
guard alpha > 0 else {
|
|
452
|
+
return nil
|
|
453
|
+
}
|
|
454
|
+
let rect = RectResponse(
|
|
455
|
+
x: Double(bounds.origin.x),
|
|
456
|
+
y: Double(bounds.origin.y),
|
|
457
|
+
width: Double(bounds.width),
|
|
458
|
+
height: Double(bounds.height)
|
|
459
|
+
)
|
|
460
|
+
let windowNumber = (info[kCGWindowNumber as String] as? NSNumber)?.intValue ?? 0
|
|
461
|
+
let layer = (info[kCGWindowLayer as String] as? NSNumber)?.intValue ?? 0
|
|
462
|
+
return MenuBarWindowFallbackCandidate(
|
|
463
|
+
windowNumber: windowNumber,
|
|
464
|
+
rect: rect,
|
|
465
|
+
layer: layer
|
|
466
|
+
)
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// CGWindowList can surface multiple app-owned utility windows. Prefer the small
|
|
470
|
+
// top-band window that matches typical menu bar extra geometry before ranking by area.
|
|
471
|
+
let menuBarBandCandidates = allCandidates.filter { candidate in
|
|
472
|
+
candidate.rect.y <= SnapshotTraversalLimits.maxMenuBarBandY
|
|
473
|
+
&& candidate.rect.height <= SnapshotTraversalLimits.maxMenuBarBandHeight
|
|
474
|
+
}
|
|
475
|
+
let narrowCandidates = menuBarBandCandidates.filter { candidate in
|
|
476
|
+
candidate.rect.width <= SnapshotTraversalLimits.maxMenuBarExtraWidth
|
|
477
|
+
}
|
|
478
|
+
let rankedCandidates = (narrowCandidates.isEmpty ? menuBarBandCandidates : narrowCandidates)
|
|
479
|
+
.sorted { left, right in
|
|
480
|
+
if left.area != right.area {
|
|
481
|
+
return left.area < right.area
|
|
482
|
+
}
|
|
483
|
+
if left.layer != right.layer {
|
|
484
|
+
return left.layer < right.layer
|
|
485
|
+
}
|
|
486
|
+
return left.windowNumber < right.windowNumber
|
|
487
|
+
}
|
|
488
|
+
return rankedCandidates.first
|
|
489
|
+
}
|
|
490
|
+
|
|
307
491
|
@discardableResult
|
|
308
492
|
private func appendElementSnapshot(
|
|
309
493
|
_ element: AXUIElement,
|
|
@@ -361,7 +545,7 @@ private func appendElementSnapshot(
|
|
|
361
545
|
return index
|
|
362
546
|
}
|
|
363
547
|
|
|
364
|
-
for child in
|
|
548
|
+
for child in snapshotChildren(of: element, role: role) {
|
|
365
549
|
if state.truncated {
|
|
366
550
|
break
|
|
367
551
|
}
|
|
@@ -522,9 +706,9 @@ private func accessibilityAxValue(_ value: CFTypeRef?) -> AXValue? {
|
|
|
522
706
|
return (value as! AXValue)
|
|
523
707
|
}
|
|
524
708
|
|
|
525
|
-
func
|
|
709
|
+
private func elementArrayAttribute(_ element: AXUIElement, attribute: String) -> [AXUIElement] {
|
|
526
710
|
var value: CFTypeRef?
|
|
527
|
-
guard AXUIElementCopyAttributeValue(element,
|
|
711
|
+
guard AXUIElementCopyAttributeValue(element, attribute as CFString, &value) == .success,
|
|
528
712
|
let children = value as? [AXUIElement]
|
|
529
713
|
else {
|
|
530
714
|
return []
|
|
@@ -532,6 +716,23 @@ func children(of element: AXUIElement) -> [AXUIElement] {
|
|
|
532
716
|
return children
|
|
533
717
|
}
|
|
534
718
|
|
|
719
|
+
func children(of element: AXUIElement) -> [AXUIElement] {
|
|
720
|
+
return elementArrayAttribute(element, attribute: kAXChildrenAttribute as String)
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
private func snapshotChildren(of element: AXUIElement, role: String?) -> [AXUIElement] {
|
|
724
|
+
let directChildren = children(of: element)
|
|
725
|
+
if !directChildren.isEmpty {
|
|
726
|
+
return directChildren
|
|
727
|
+
}
|
|
728
|
+
switch role {
|
|
729
|
+
case "AXMenuBar", "AXMenuBarItem", "AXMenu":
|
|
730
|
+
return elementArrayAttribute(element, attribute: kAXVisibleChildrenAttribute as String)
|
|
731
|
+
default:
|
|
732
|
+
return []
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
|
|
535
736
|
func windows(of appElement: AXUIElement) -> [AXUIElement] {
|
|
536
737
|
var value: CFTypeRef?
|
|
537
738
|
guard AXUIElementCopyAttributeValue(appElement, "AXWindows" as CFString, &value) == .success,
|
|
@@ -2,6 +2,9 @@ import AppKit
|
|
|
2
2
|
import ApplicationServices
|
|
3
3
|
import CoreGraphics
|
|
4
4
|
import Foundation
|
|
5
|
+
import ImageIO
|
|
6
|
+
import ScreenCaptureKit
|
|
7
|
+
import UniformTypeIdentifiers
|
|
5
8
|
|
|
6
9
|
enum HelperError: Error {
|
|
7
10
|
case invalidArgs(String)
|
|
@@ -57,6 +60,19 @@ struct ReadResponse: Encodable {
|
|
|
57
60
|
let text: String
|
|
58
61
|
}
|
|
59
62
|
|
|
63
|
+
struct PressResponse: Encodable {
|
|
64
|
+
let x: Double
|
|
65
|
+
let y: Double
|
|
66
|
+
let bundleId: String?
|
|
67
|
+
let surface: String?
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
struct ScreenshotResponse: Encodable {
|
|
71
|
+
let path: String
|
|
72
|
+
let surface: String?
|
|
73
|
+
let fullscreen: Bool
|
|
74
|
+
}
|
|
75
|
+
|
|
60
76
|
struct AgentDeviceMacOSHelper {
|
|
61
77
|
static func main() {
|
|
62
78
|
do {
|
|
@@ -100,6 +116,10 @@ struct AgentDeviceMacOSHelper {
|
|
|
100
116
|
return try handleSnapshot(arguments: Array(arguments.dropFirst()))
|
|
101
117
|
case "read":
|
|
102
118
|
return try handleRead(arguments: Array(arguments.dropFirst()))
|
|
119
|
+
case "press":
|
|
120
|
+
return try handlePress(arguments: Array(arguments.dropFirst()))
|
|
121
|
+
case "screenshot":
|
|
122
|
+
return try handleScreenshot(arguments: Array(arguments.dropFirst()))
|
|
103
123
|
default:
|
|
104
124
|
throw HelperError.invalidArgs("unknown command: \(command)")
|
|
105
125
|
}
|
|
@@ -315,11 +335,13 @@ struct AgentDeviceMacOSHelper {
|
|
|
315
335
|
throw HelperError.invalidArgs("snapshot requires --surface <frontmost-app|desktop|menubar>")
|
|
316
336
|
}
|
|
317
337
|
|
|
338
|
+
let bundleId = try optionValue(arguments: arguments, name: "--bundle-id").map(validatedBundleId)
|
|
339
|
+
|
|
318
340
|
switch surface {
|
|
319
341
|
case "frontmost-app":
|
|
320
|
-
return SuccessEnvelope(data: try captureSnapshotResponse(surface: surface))
|
|
342
|
+
return SuccessEnvelope(data: try captureSnapshotResponse(surface: surface, bundleId: bundleId))
|
|
321
343
|
case "desktop", "menubar":
|
|
322
|
-
return SuccessEnvelope(data: try captureSnapshotResponse(surface: surface))
|
|
344
|
+
return SuccessEnvelope(data: try captureSnapshotResponse(surface: surface, bundleId: bundleId))
|
|
323
345
|
default:
|
|
324
346
|
throw HelperError.invalidArgs("snapshot requires --surface <frontmost-app|desktop|menubar>")
|
|
325
347
|
}
|
|
@@ -339,6 +361,35 @@ struct AgentDeviceMacOSHelper {
|
|
|
339
361
|
let text = try readTextAtPosition(bundleId: bundleId, surface: surface, x: x, y: y)
|
|
340
362
|
return SuccessEnvelope(data: ReadResponse(text: text))
|
|
341
363
|
}
|
|
364
|
+
|
|
365
|
+
static func handlePress(arguments: [String]) throws -> any Encodable {
|
|
366
|
+
guard let rawX = optionValue(arguments: arguments, name: "--x"),
|
|
367
|
+
let rawY = optionValue(arguments: arguments, name: "--y"),
|
|
368
|
+
let x = Double(rawX),
|
|
369
|
+
let y = Double(rawY)
|
|
370
|
+
else {
|
|
371
|
+
throw HelperError.invalidArgs("press requires --x <number> --y <number>")
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
let bundleId = try optionValue(arguments: arguments, name: "--bundle-id").map(validatedBundleId)
|
|
375
|
+
let surface = optionValue(arguments: arguments, name: "--surface")
|
|
376
|
+
try pressAtPosition(bundleId: bundleId, surface: surface, x: x, y: y)
|
|
377
|
+
return SuccessEnvelope(data: PressResponse(x: x, y: y, bundleId: bundleId, surface: surface))
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
static func handleScreenshot(arguments: [String]) throws -> any Encodable {
|
|
381
|
+
guard let outPath = optionValue(arguments: arguments, name: "--out")?
|
|
382
|
+
.trimmingCharacters(in: .whitespacesAndNewlines),
|
|
383
|
+
!outPath.isEmpty
|
|
384
|
+
else {
|
|
385
|
+
throw HelperError.invalidArgs("screenshot requires --out <path>")
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
let surface = optionValue(arguments: arguments, name: "--surface")
|
|
389
|
+
let fullscreen = arguments.contains("--fullscreen")
|
|
390
|
+
try captureSurfaceScreenshot(surface: surface, outPath: outPath, fullscreen: fullscreen)
|
|
391
|
+
return SuccessEnvelope(data: ScreenshotResponse(path: outPath, surface: surface, fullscreen: fullscreen))
|
|
392
|
+
}
|
|
342
393
|
}
|
|
343
394
|
|
|
344
395
|
private func optionValue(arguments: [String], name: String) -> String? {
|
|
@@ -395,6 +446,74 @@ private func readTextAtPosition(bundleId: String?, surface: String?, x: Double,
|
|
|
395
446
|
throw HelperError.commandFailed("read did not resolve text")
|
|
396
447
|
}
|
|
397
448
|
|
|
449
|
+
private func pressAtPosition(bundleId: String?, surface: String?, x: Double, y: Double) throws {
|
|
450
|
+
_ = bundleId
|
|
451
|
+
_ = surface
|
|
452
|
+
let point = CGPoint(x: x, y: y)
|
|
453
|
+
guard let move = CGEvent(mouseEventSource: nil, mouseType: .mouseMoved, mouseCursorPosition: point, mouseButton: .left),
|
|
454
|
+
let down = CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: point, mouseButton: .left),
|
|
455
|
+
let up = CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: point, mouseButton: .left)
|
|
456
|
+
else {
|
|
457
|
+
throw HelperError.commandFailed("press action failed", details: ["reason": "event_creation_failed"])
|
|
458
|
+
}
|
|
459
|
+
move.post(tap: .cghidEventTap)
|
|
460
|
+
down.post(tap: .cghidEventTap)
|
|
461
|
+
up.post(tap: .cghidEventTap)
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
private func captureSurfaceScreenshot(surface: String?, outPath: String, fullscreen: Bool) throws {
|
|
465
|
+
_ = fullscreen
|
|
466
|
+
guard #available(macOS 15.2, *) else {
|
|
467
|
+
throw HelperError.commandFailed(
|
|
468
|
+
"screenshot on macOS desktop and menubar surfaces requires macOS 15.2 or newer"
|
|
469
|
+
)
|
|
470
|
+
}
|
|
471
|
+
guard let screenFrame = NSScreen.main?.frame, screenFrame.width > 0, screenFrame.height > 0 else {
|
|
472
|
+
throw HelperError.commandFailed("screenshot could not resolve main screen bounds")
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
let rect = CGRect(origin: screenFrame.origin, size: screenFrame.size)
|
|
476
|
+
let semaphore = DispatchSemaphore(value: 0)
|
|
477
|
+
var capturedImage: CGImage?
|
|
478
|
+
var capturedError: Error?
|
|
479
|
+
SCScreenshotManager.captureImage(in: rect) { image, error in
|
|
480
|
+
capturedImage = image
|
|
481
|
+
capturedError = error
|
|
482
|
+
semaphore.signal()
|
|
483
|
+
}
|
|
484
|
+
semaphore.wait()
|
|
485
|
+
|
|
486
|
+
if let error = capturedError as NSError? {
|
|
487
|
+
if error.domain == "com.apple.ScreenCaptureKit.SCStreamErrorDomain", error.code == -3801 {
|
|
488
|
+
throw HelperError.commandFailed(
|
|
489
|
+
"screenshot requires Screen Recording permission on macOS desktop and menubar surfaces",
|
|
490
|
+
details: ["surface": surface ?? "", "permission": "screen-recording"]
|
|
491
|
+
)
|
|
492
|
+
}
|
|
493
|
+
throw HelperError.commandFailed("screenshot failed", details: ["error": error.localizedDescription])
|
|
494
|
+
}
|
|
495
|
+
guard let capturedImage else {
|
|
496
|
+
throw HelperError.commandFailed("screenshot failed")
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
let outputURL = URL(fileURLWithPath: outPath)
|
|
500
|
+
if let parent = outputURL.deletingLastPathComponent().path.removingPercentEncoding, !parent.isEmpty {
|
|
501
|
+
try FileManager.default.createDirectory(atPath: parent, withIntermediateDirectories: true)
|
|
502
|
+
}
|
|
503
|
+
guard let destination = CGImageDestinationCreateWithURL(
|
|
504
|
+
outputURL as CFURL,
|
|
505
|
+
UTType.png.identifier as CFString,
|
|
506
|
+
1,
|
|
507
|
+
nil
|
|
508
|
+
) else {
|
|
509
|
+
throw HelperError.commandFailed("screenshot could not create PNG destination")
|
|
510
|
+
}
|
|
511
|
+
CGImageDestinationAddImage(destination, capturedImage, nil)
|
|
512
|
+
guard CGImageDestinationFinalize(destination) else {
|
|
513
|
+
throw HelperError.commandFailed("screenshot could not write PNG file")
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
398
517
|
private func readableText(for element: AXUIElement) -> String? {
|
|
399
518
|
return
|
|
400
519
|
stringAttribute(element, attribute: kAXValueAttribute as String)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-device",
|
|
3
|
-
"version": "0.11.
|
|
3
|
+
"version": "0.11.2",
|
|
4
4
|
"description": "Unified control plane for physical and virtual devices via an agent-driven CLI.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Callstack",
|
|
@@ -24,13 +24,18 @@
|
|
|
24
24
|
"clean:daemon": "rm -f ~/.agent-device/daemon.json && rm -f ~/.agent-device/daemon.lock",
|
|
25
25
|
"build:node": "pnpm build && pnpm clean:daemon",
|
|
26
26
|
"build:xcuitest": "pnpm build:xcuitest:ios && pnpm build:xcuitest:macos",
|
|
27
|
-
"build:xcuitest:ios": "
|
|
28
|
-
"build:xcuitest:macos": "
|
|
29
|
-
"build:xcuitest:tvos": "
|
|
27
|
+
"build:xcuitest:ios": "AGENT_DEVICE_XCUITEST_PLATFORM=ios AGENT_DEVICE_IOS_CLEAN_DERIVED=1 sh ./scripts/build-xcuitest-apple.sh",
|
|
28
|
+
"build:xcuitest:macos": "AGENT_DEVICE_XCUITEST_PLATFORM=macos sh ./scripts/build-xcuitest-apple.sh",
|
|
29
|
+
"build:xcuitest:tvos": "AGENT_DEVICE_XCUITEST_PLATFORM=tvos AGENT_DEVICE_IOS_CLEAN_DERIVED=1 sh ./scripts/build-xcuitest-apple.sh",
|
|
30
30
|
"build:macos-helper": "swift build -c release --package-path macos-helper",
|
|
31
31
|
"build:all": "pnpm build:node && pnpm build:xcuitest",
|
|
32
32
|
"ad": "node bin/agent-device.mjs",
|
|
33
|
-
"
|
|
33
|
+
"lint": "oxlint . --deny-warnings",
|
|
34
|
+
"format": "oxfmt --write src test skills package.json tsconfig.json .oxlintrc.json .oxfmtrc.json",
|
|
35
|
+
"check:quick": "pnpm lint && pnpm typecheck",
|
|
36
|
+
"check:tooling": "pnpm lint && pnpm typecheck && pnpm build",
|
|
37
|
+
"check:unit": "pnpm test:unit && pnpm test:smoke",
|
|
38
|
+
"check": "pnpm check:tooling && pnpm check:unit",
|
|
34
39
|
"prepack": "pnpm build:all",
|
|
35
40
|
"typecheck": "tsc -p tsconfig.json",
|
|
36
41
|
"test": "vitest run",
|
|
@@ -66,22 +71,17 @@
|
|
|
66
71
|
"ios",
|
|
67
72
|
"android"
|
|
68
73
|
],
|
|
69
|
-
"prettier": {
|
|
70
|
-
"singleQuote": true,
|
|
71
|
-
"semi": true,
|
|
72
|
-
"trailingComma": "all",
|
|
73
|
-
"printWidth": 100
|
|
74
|
-
},
|
|
75
74
|
"dependencies": {
|
|
76
75
|
"pngjs": "^7.0.0"
|
|
77
76
|
},
|
|
78
77
|
"devDependencies": {
|
|
79
78
|
"@microsoft/api-extractor": "^7.52.10",
|
|
80
|
-
"@rslib/core": "0.
|
|
79
|
+
"@rslib/core": "0.20.1",
|
|
81
80
|
"@types/node": "^22.0.0",
|
|
82
81
|
"@types/pngjs": "^6.0.5",
|
|
83
|
-
"
|
|
84
|
-
"
|
|
82
|
+
"oxfmt": "^0.42.0",
|
|
83
|
+
"oxlint": "^1.57.0",
|
|
84
|
+
"typescript": "^6.0.2",
|
|
85
85
|
"vitest": "^4.1.2"
|
|
86
86
|
}
|
|
87
87
|
}
|
|
@@ -112,6 +112,19 @@ agent-device press 'id="camera_row" || label="Camera" role=button'
|
|
|
112
112
|
agent-device is visible 'id="camera_settings_anchor"'
|
|
113
113
|
```
|
|
114
114
|
|
|
115
|
+
## Interaction fallbacks
|
|
116
|
+
|
|
117
|
+
When `press @ref` fails:
|
|
118
|
+
|
|
119
|
+
1. Re-snapshot if the UI may have changed.
|
|
120
|
+
2. Retry `press @ref` or a selector-based `press`.
|
|
121
|
+
3. If `screenshot --overlay-refs --json` returned a reliable `overlayRefs[].center`, use `agent-device press <x> <y>`.
|
|
122
|
+
4. Use an external vision-based tap tool only after semantic and coordinate targeting fail.
|
|
123
|
+
|
|
124
|
+
- Prefer `@ref` over coordinates.
|
|
125
|
+
- Do not guess coordinates from the image when structured `center` is available.
|
|
126
|
+
- `agent-device` does not provide a built-in vision-tap flag.
|
|
127
|
+
|
|
115
128
|
## Text entry rules
|
|
116
129
|
|
|
117
130
|
- Use `fill` to replace text in an editable field.
|
|
@@ -30,7 +30,8 @@ agent-device close
|
|
|
30
30
|
- `app`: default surface and the normal choice for `click`, `fill`, `press`, `scroll`, `screenshot`, and `record`.
|
|
31
31
|
- `frontmost-app`: inspect the currently focused app without naming it first.
|
|
32
32
|
- `desktop`: inspect visible desktop windows across apps.
|
|
33
|
-
- `menubar`: inspect the active app menu bar and system menu extras.
|
|
33
|
+
- `menubar`: inspect the active app menu bar and system menu extras. Use `open <app> --platform macos --surface menubar` when you need one menu bar app's extras, such as a status-item app.
|
|
34
|
+
- Menu bar apps can expose a sparse or empty default `app` tree. Prefer the `menubar` surface first when the app lives entirely in the top bar.
|
|
34
35
|
|
|
35
36
|
Use inspect-first surfaces to understand desktop-global UI, then switch back to `app` when you need to act in one app.
|
|
36
37
|
|
|
@@ -81,6 +82,6 @@ Troubleshooting:
|
|
|
81
82
|
|
|
82
83
|
- If visible content is missing from `snapshot -i`, re-snapshot after the UI settles.
|
|
83
84
|
- If `desktop` is too broad, retry with `frontmost-app`.
|
|
84
|
-
- If `menubar` is missing the expected menu, make the app frontmost first and retry.
|
|
85
|
+
- If `menubar` is missing the expected menu, retry with `open <app> --platform macos --surface menubar` for menu bar apps, or make the app frontmost first and retry the generic menubar surface.
|
|
85
86
|
- If the wrong menu opened, retry secondary-clicking the row or cell wrapper rather than the nested text node.
|
|
86
87
|
- If the app has multiple windows, make the correct window frontmost before relying on refs.
|
|
@@ -22,7 +22,7 @@ Do not use verification tools as the first exploration step. First get the app i
|
|
|
22
22
|
agent-device open Settings --platform ios
|
|
23
23
|
# after using exploration to reach the state you want to verify
|
|
24
24
|
agent-device snapshot
|
|
25
|
-
agent-device screenshot /tmp/settings-proof.png
|
|
25
|
+
agent-device screenshot /tmp/settings-proof.png --overlay-refs
|
|
26
26
|
agent-device close
|
|
27
27
|
```
|
|
28
28
|
|
|
@@ -45,6 +45,8 @@ agent-device diff snapshot -i
|
|
|
45
45
|
|
|
46
46
|
Use `screenshot` when the proof needs a rendered image instead of a structural tree.
|
|
47
47
|
|
|
48
|
+
- Add `--overlay-refs` when you want the saved PNG to show fresh `@eN` refs burned into the screenshot.
|
|
49
|
+
|
|
48
50
|
## Session recording
|
|
49
51
|
|
|
50
52
|
Use `record` for debugging, documentation, or shareable verification artifacts.
|