agent-device 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,9 @@
7
7
 
8
8
  import XCTest
9
9
  import Network
10
+ import AVFoundation
11
+ import CoreVideo
12
+ import UIKit
10
13
 
11
14
  final class RunnerTests: XCTestCase {
12
15
  private enum RunnerErrorDomain {
@@ -23,7 +26,6 @@ final class RunnerTests: XCTestCase {
23
26
 
24
27
  private static let springboardBundleId = "com.apple.springboard"
25
28
  private var listener: NWListener?
26
- private var port: UInt16 = 0
27
29
  private var doneExpectation: XCTestExpectation?
28
30
  private let app = XCUIApplication()
29
31
  private lazy var springboard = XCUIApplication(bundleIdentifier: Self.springboardBundleId)
@@ -33,11 +35,16 @@ final class RunnerTests: XCTestCase {
33
35
  private let maxSnapshotElements = 600
34
36
  private let fastSnapshotLimit = 300
35
37
  private let mainThreadExecutionTimeout: TimeInterval = 30
38
+ private let appExistenceTimeout: TimeInterval = 30
36
39
  private let retryCooldown: TimeInterval = 0.2
37
40
  private let postSnapshotInteractionDelay: TimeInterval = 0.2
38
41
  private let firstInteractionAfterActivateDelay: TimeInterval = 0.25
42
+ private let scrollInteractionIdleTimeoutDefault: TimeInterval = 1.0
43
+ private let minRecordingFps = 1
44
+ private let maxRecordingFps = 120
39
45
  private var needsPostSnapshotInteractionDelay = false
40
46
  private var needsFirstInteractionDelay = false
47
+ private var activeRecording: ScreenRecorder?
41
48
  private let interactiveTypes: Set<XCUIElement.ElementType> = [
42
49
  .button,
43
50
  .cell,
@@ -66,6 +73,262 @@ final class RunnerTests: XCTestCase {
66
73
  .switch,
67
74
  ]
68
75
 
76
+ private final class ScreenRecorder {
77
+ private let outputPath: String
78
+ private let fps: Int32?
79
+ private let uncappedFrameInterval: TimeInterval = 0.001
80
+ private var uncappedTimestampTimescale: Int32 {
81
+ Int32(max(1, Int((1.0 / uncappedFrameInterval).rounded())))
82
+ }
83
+ private var frameInterval: TimeInterval {
84
+ guard let fps else { return uncappedFrameInterval }
85
+ return 1.0 / Double(fps)
86
+ }
87
+ private let queue = DispatchQueue(label: "agent-device.runner.recorder")
88
+ private let lock = NSLock()
89
+ private var assetWriter: AVAssetWriter?
90
+ private var writerInput: AVAssetWriterInput?
91
+ private var pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor?
92
+ private var timer: DispatchSourceTimer?
93
+ private var recordingStartUptime: TimeInterval?
94
+ private var lastTimestampValue: Int64 = -1
95
+ private var isStopping = false
96
+ private var startedSession = false
97
+ private var startError: Error?
98
+
99
+ init(outputPath: String, fps: Int32?) {
100
+ self.outputPath = outputPath
101
+ self.fps = fps
102
+ }
103
+
104
+ func start(captureFrame: @escaping () -> UIImage?) throws {
105
+ let url = URL(fileURLWithPath: outputPath)
106
+ let directory = url.deletingLastPathComponent()
107
+ try FileManager.default.createDirectory(
108
+ at: directory,
109
+ withIntermediateDirectories: true,
110
+ attributes: nil
111
+ )
112
+ if FileManager.default.fileExists(atPath: outputPath) {
113
+ try FileManager.default.removeItem(atPath: outputPath)
114
+ }
115
+
116
+ var dimensions: CGSize = .zero
117
+ var bootstrapImage: UIImage?
118
+ let bootstrapDeadline = Date().addingTimeInterval(2.0)
119
+ while Date() < bootstrapDeadline {
120
+ if let image = captureFrame(), let cgImage = image.cgImage {
121
+ bootstrapImage = image
122
+ dimensions = CGSize(width: cgImage.width, height: cgImage.height)
123
+ break
124
+ }
125
+ Thread.sleep(forTimeInterval: 0.05)
126
+ }
127
+ guard dimensions.width > 0, dimensions.height > 0 else {
128
+ throw NSError(
129
+ domain: "AgentDeviceRunner.Record",
130
+ code: 1,
131
+ userInfo: [NSLocalizedDescriptionKey: "failed to capture initial frame"]
132
+ )
133
+ }
134
+
135
+ let writer = try AVAssetWriter(outputURL: url, fileType: .mp4)
136
+ let outputSettings: [String: Any] = [
137
+ AVVideoCodecKey: AVVideoCodecType.h264,
138
+ AVVideoWidthKey: Int(dimensions.width),
139
+ AVVideoHeightKey: Int(dimensions.height),
140
+ ]
141
+ let input = AVAssetWriterInput(mediaType: .video, outputSettings: outputSettings)
142
+ input.expectsMediaDataInRealTime = true
143
+ let attributes: [String: Any] = [
144
+ kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32ARGB,
145
+ kCVPixelBufferWidthKey as String: Int(dimensions.width),
146
+ kCVPixelBufferHeightKey as String: Int(dimensions.height),
147
+ ]
148
+ let adaptor = AVAssetWriterInputPixelBufferAdaptor(
149
+ assetWriterInput: input,
150
+ sourcePixelBufferAttributes: attributes
151
+ )
152
+ guard writer.canAdd(input) else {
153
+ throw NSError(
154
+ domain: "AgentDeviceRunner.Record",
155
+ code: 2,
156
+ userInfo: [NSLocalizedDescriptionKey: "failed to add video input"]
157
+ )
158
+ }
159
+ writer.add(input)
160
+ guard writer.startWriting() else {
161
+ throw writer.error ?? NSError(
162
+ domain: "AgentDeviceRunner.Record",
163
+ code: 3,
164
+ userInfo: [NSLocalizedDescriptionKey: "failed to start writing"]
165
+ )
166
+ }
167
+
168
+ lock.lock()
169
+ assetWriter = writer
170
+ writerInput = input
171
+ pixelBufferAdaptor = adaptor
172
+ recordingStartUptime = nil
173
+ lastTimestampValue = -1
174
+ isStopping = false
175
+ startedSession = false
176
+ startError = nil
177
+ lock.unlock()
178
+
179
+ if let firstImage = bootstrapImage {
180
+ append(image: firstImage)
181
+ }
182
+
183
+ let timer = DispatchSource.makeTimerSource(queue: queue)
184
+ timer.schedule(deadline: .now() + frameInterval, repeating: frameInterval)
185
+ timer.setEventHandler { [weak self] in
186
+ guard let self else { return }
187
+ if self.shouldStop() { return }
188
+ guard let image = captureFrame() else { return }
189
+ self.append(image: image)
190
+ }
191
+ self.timer = timer
192
+ timer.resume()
193
+ }
194
+
195
+ func stop() throws {
196
+ var writer: AVAssetWriter?
197
+ var input: AVAssetWriterInput?
198
+ var appendError: Error?
199
+ lock.lock()
200
+ if isStopping {
201
+ lock.unlock()
202
+ return
203
+ }
204
+ isStopping = true
205
+ let activeTimer = timer
206
+ timer = nil
207
+ writer = assetWriter
208
+ input = writerInput
209
+ appendError = startError
210
+ lock.unlock()
211
+
212
+ activeTimer?.cancel()
213
+ input?.markAsFinished()
214
+ guard let writer else { return }
215
+
216
+ let semaphore = DispatchSemaphore(value: 0)
217
+ writer.finishWriting {
218
+ semaphore.signal()
219
+ }
220
+ var stopFailure: Error?
221
+ let waitResult = semaphore.wait(timeout: .now() + 10)
222
+ if waitResult == .timedOut {
223
+ writer.cancelWriting()
224
+ stopFailure = NSError(
225
+ domain: "AgentDeviceRunner.Record",
226
+ code: 6,
227
+ userInfo: [NSLocalizedDescriptionKey: "recording finalization timed out"]
228
+ )
229
+ } else if let appendError {
230
+ stopFailure = appendError
231
+ } else if writer.status == .failed {
232
+ stopFailure = writer.error ?? NSError(
233
+ domain: "AgentDeviceRunner.Record",
234
+ code: 4,
235
+ userInfo: [NSLocalizedDescriptionKey: "failed to finalize recording"]
236
+ )
237
+ }
238
+
239
+ lock.lock()
240
+ assetWriter = nil
241
+ writerInput = nil
242
+ pixelBufferAdaptor = nil
243
+ recordingStartUptime = nil
244
+ lastTimestampValue = -1
245
+ startedSession = false
246
+ startError = nil
247
+ lock.unlock()
248
+
249
+ if let stopFailure {
250
+ throw stopFailure
251
+ }
252
+ }
253
+
254
+ private func append(image: UIImage) {
255
+ guard let cgImage = image.cgImage else { return }
256
+ lock.lock()
257
+ defer { lock.unlock() }
258
+ if isStopping { return }
259
+ if let startError { return }
260
+ guard
261
+ let writer = assetWriter,
262
+ let input = writerInput,
263
+ let adaptor = pixelBufferAdaptor
264
+ else {
265
+ return
266
+ }
267
+ if !startedSession {
268
+ writer.startSession(atSourceTime: .zero)
269
+ startedSession = true
270
+ }
271
+ guard input.isReadyForMoreMediaData else { return }
272
+ guard let pixelBuffer = makePixelBuffer(from: cgImage) else { return }
273
+ let nowUptime = ProcessInfo.processInfo.systemUptime
274
+ if recordingStartUptime == nil {
275
+ recordingStartUptime = nowUptime
276
+ }
277
+ let elapsed = max(0, nowUptime - (recordingStartUptime ?? nowUptime))
278
+ let timescale = fps ?? uncappedTimestampTimescale
279
+ var timestampValue = Int64((elapsed * Double(timescale)).rounded(.down))
280
+ if timestampValue <= lastTimestampValue {
281
+ timestampValue = lastTimestampValue + 1
282
+ }
283
+ let timestamp = CMTime(value: timestampValue, timescale: timescale)
284
+ if !adaptor.append(pixelBuffer, withPresentationTime: timestamp) {
285
+ startError = writer.error ?? NSError(
286
+ domain: "AgentDeviceRunner.Record",
287
+ code: 5,
288
+ userInfo: [NSLocalizedDescriptionKey: "failed to append frame"]
289
+ )
290
+ return
291
+ }
292
+ lastTimestampValue = timestampValue
293
+ }
294
+
295
+ private func shouldStop() -> Bool {
296
+ lock.lock()
297
+ defer { lock.unlock() }
298
+ return isStopping
299
+ }
300
+
301
+ private func makePixelBuffer(from image: CGImage) -> CVPixelBuffer? {
302
+ guard let adaptor = pixelBufferAdaptor else { return nil }
303
+ var pixelBuffer: CVPixelBuffer?
304
+ guard let pool = adaptor.pixelBufferPool else { return nil }
305
+ let status = CVPixelBufferPoolCreatePixelBuffer(
306
+ nil,
307
+ pool,
308
+ &pixelBuffer
309
+ )
310
+ guard status == kCVReturnSuccess, let pixelBuffer else { return nil }
311
+
312
+ CVPixelBufferLockBaseAddress(pixelBuffer, [])
313
+ defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, []) }
314
+ guard
315
+ let context = CGContext(
316
+ data: CVPixelBufferGetBaseAddress(pixelBuffer),
317
+ width: image.width,
318
+ height: image.height,
319
+ bitsPerComponent: 8,
320
+ bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer),
321
+ space: CGColorSpaceCreateDeviceRGB(),
322
+ bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue
323
+ )
324
+ else {
325
+ return nil
326
+ }
327
+ context.draw(image, in: CGRect(x: 0, y: 0, width: image.width, height: image.height))
328
+ return pixelBuffer
329
+ }
330
+ }
331
+
69
332
  override func setUp() {
70
333
  continueAfterFailure = true
71
334
  }
@@ -88,7 +351,6 @@ final class RunnerTests: XCTestCase {
88
351
  case .ready:
89
352
  NSLog("AGENT_DEVICE_RUNNER_LISTENER_READY")
90
353
  if let listenerPort = self?.listener?.port {
91
- self?.port = listenerPort.rawValue
92
354
  NSLog("AGENT_DEVICE_RUNNER_PORT=%d", listenerPort.rawValue)
93
355
  } else {
94
356
  NSLog("AGENT_DEVICE_RUNNER_PORT_NOT_SET")
@@ -261,7 +523,11 @@ final class RunnerTests: XCTestCase {
261
523
  if let exceptionMessage {
262
524
  currentApp = nil
263
525
  currentBundleId = nil
264
- if !hasRetried, shouldRetryCommand(command.command) {
526
+ if !hasRetried, shouldRetryException(command, message: exceptionMessage) {
527
+ NSLog(
528
+ "AGENT_DEVICE_RUNNER_RETRY command=%@ reason=objc_exception",
529
+ command.command.rawValue
530
+ )
265
531
  hasRetried = true
266
532
  sleepFor(retryCooldown)
267
533
  continue
@@ -282,7 +548,11 @@ final class RunnerTests: XCTestCase {
282
548
  userInfo: [NSLocalizedDescriptionKey: "command returned no response"]
283
549
  )
284
550
  }
285
- if !hasRetried, shouldRetryCommand(command.command), shouldRetryResponse(response) {
551
+ if !hasRetried, shouldRetryCommand(command), shouldRetryResponse(response) {
552
+ NSLog(
553
+ "AGENT_DEVICE_RUNNER_RETRY command=%@ reason=response_unavailable",
554
+ command.command.rawValue
555
+ )
286
556
  hasRetried = true
287
557
  currentApp = nil
288
558
  currentBundleId = nil
@@ -294,61 +564,111 @@ final class RunnerTests: XCTestCase {
294
564
  }
295
565
 
296
566
  private func executeOnMain(command: Command) throws -> Response {
297
- if command.command == .shutdown {
298
- return Response(ok: true, data: DataPayload(message: "shutdown"))
299
- }
300
-
301
- let normalizedBundleId = command.appBundleId?
302
- .trimmingCharacters(in: .whitespacesAndNewlines)
303
- let requestedBundleId = (normalizedBundleId?.isEmpty == true) ? nil : normalizedBundleId
304
- if let bundleId = requestedBundleId {
305
- if currentBundleId != bundleId || currentApp == nil {
306
- _ = activateTarget(bundleId: bundleId, reason: "bundle_changed")
307
- }
308
- } else {
309
- // Do not reuse stale bundle targets when the caller does not explicitly request one.
310
- currentApp = nil
311
- currentBundleId = nil
312
- }
313
-
314
567
  var activeApp = currentApp ?? app
315
- if let bundleId = requestedBundleId, targetNeedsActivation(activeApp) {
316
- activeApp = activateTarget(bundleId: bundleId, reason: "stale_target")
317
- } else if requestedBundleId == nil, targetNeedsActivation(activeApp) {
318
- app.activate()
319
- activeApp = app
320
- }
321
-
322
- if !activeApp.waitForExistence(timeout: 5) {
568
+ if !isRunnerLifecycleCommand(command.command) {
569
+ let normalizedBundleId = command.appBundleId?
570
+ .trimmingCharacters(in: .whitespacesAndNewlines)
571
+ let requestedBundleId = (normalizedBundleId?.isEmpty == true) ? nil : normalizedBundleId
323
572
  if let bundleId = requestedBundleId {
324
- activeApp = activateTarget(bundleId: bundleId, reason: "missing_after_wait")
325
- guard activeApp.waitForExistence(timeout: 5) else {
326
- return Response(ok: false, error: ErrorPayload(message: "app '\(bundleId)' is not available"))
573
+ if currentBundleId != bundleId || currentApp == nil {
574
+ _ = activateTarget(bundleId: bundleId, reason: "bundle_changed")
327
575
  }
328
576
  } else {
329
- return Response(ok: false, error: ErrorPayload(message: "runner app is not available"))
577
+ // Do not reuse stale bundle targets when the caller does not explicitly request one.
578
+ currentApp = nil
579
+ currentBundleId = nil
330
580
  }
331
- }
332
581
 
333
- if isInteractionCommand(command.command) {
334
- if let bundleId = requestedBundleId, activeApp.state != .runningForeground {
335
- activeApp = activateTarget(bundleId: bundleId, reason: "interaction_foreground_guard")
336
- } else if requestedBundleId == nil, activeApp.state != .runningForeground {
582
+ activeApp = currentApp ?? app
583
+ if let bundleId = requestedBundleId, targetNeedsActivation(activeApp) {
584
+ activeApp = activateTarget(bundleId: bundleId, reason: "stale_target")
585
+ } else if requestedBundleId == nil, targetNeedsActivation(activeApp) {
337
586
  app.activate()
338
587
  activeApp = app
339
588
  }
340
- if !activeApp.waitForExistence(timeout: 2) {
589
+
590
+ if !activeApp.waitForExistence(timeout: appExistenceTimeout) {
341
591
  if let bundleId = requestedBundleId {
342
- return Response(ok: false, error: ErrorPayload(message: "app '\(bundleId)' is not available"))
592
+ activeApp = activateTarget(bundleId: bundleId, reason: "missing_after_wait")
593
+ guard activeApp.waitForExistence(timeout: appExistenceTimeout) else {
594
+ return Response(ok: false, error: ErrorPayload(message: "app '\(bundleId)' is not available"))
595
+ }
596
+ } else {
597
+ return Response(ok: false, error: ErrorPayload(message: "runner app is not available"))
598
+ }
599
+ }
600
+
601
+ if isInteractionCommand(command.command) {
602
+ if let bundleId = requestedBundleId, activeApp.state != .runningForeground {
603
+ activeApp = activateTarget(bundleId: bundleId, reason: "interaction_foreground_guard")
604
+ } else if requestedBundleId == nil, activeApp.state != .runningForeground {
605
+ app.activate()
606
+ activeApp = app
607
+ }
608
+ if !activeApp.waitForExistence(timeout: 2) {
609
+ if let bundleId = requestedBundleId {
610
+ return Response(ok: false, error: ErrorPayload(message: "app '\(bundleId)' is not available"))
611
+ }
612
+ return Response(ok: false, error: ErrorPayload(message: "runner app is not available"))
343
613
  }
344
- return Response(ok: false, error: ErrorPayload(message: "runner app is not available"))
614
+ applyInteractionStabilizationIfNeeded()
345
615
  }
346
- applyInteractionStabilizationIfNeeded()
347
616
  }
348
617
 
349
618
  switch command.command {
350
619
  case .shutdown:
620
+ stopRecordingIfNeeded()
351
621
  return Response(ok: true, data: DataPayload(message: "shutdown"))
622
+ case .recordStart:
623
+ guard
624
+ let requestedOutPath = command.outPath?.trimmingCharacters(in: .whitespacesAndNewlines),
625
+ !requestedOutPath.isEmpty
626
+ else {
627
+ return Response(ok: false, error: ErrorPayload(message: "recordStart requires outPath"))
628
+ }
629
+ let hasAppBundleId = !(command.appBundleId?
630
+ .trimmingCharacters(in: .whitespacesAndNewlines)
631
+ .isEmpty ?? true)
632
+ guard hasAppBundleId else {
633
+ return Response(ok: false, error: ErrorPayload(message: "recordStart requires appBundleId"))
634
+ }
635
+ if activeRecording != nil {
636
+ return Response(ok: false, error: ErrorPayload(message: "recording already in progress"))
637
+ }
638
+ if let requestedFps = command.fps, (requestedFps < minRecordingFps || requestedFps > maxRecordingFps) {
639
+ return Response(ok: false, error: ErrorPayload(message: "recordStart fps must be between \(minRecordingFps) and \(maxRecordingFps)"))
640
+ }
641
+ do {
642
+ let resolvedOutPath = resolveRecordingOutPath(requestedOutPath)
643
+ let fpsLabel = command.fps.map(String.init) ?? "max"
644
+ NSLog(
645
+ "AGENT_DEVICE_RUNNER_RECORD_START requestedOutPath=%@ resolvedOutPath=%@ fps=%@",
646
+ requestedOutPath,
647
+ resolvedOutPath,
648
+ fpsLabel
649
+ )
650
+ let recorder = ScreenRecorder(outputPath: resolvedOutPath, fps: command.fps.map { Int32($0) })
651
+ try recorder.start { [weak self] in
652
+ return self?.captureRunnerFrame()
653
+ }
654
+ activeRecording = recorder
655
+ return Response(ok: true, data: DataPayload(message: "recording started"))
656
+ } catch {
657
+ activeRecording = nil
658
+ return Response(ok: false, error: ErrorPayload(message: "failed to start recording: \(error.localizedDescription)"))
659
+ }
660
+ case .recordStop:
661
+ guard let recorder = activeRecording else {
662
+ return Response(ok: false, error: ErrorPayload(message: "no active recording"))
663
+ }
664
+ do {
665
+ try recorder.stop()
666
+ activeRecording = nil
667
+ return Response(ok: true, data: DataPayload(message: "recording stopped"))
668
+ } catch {
669
+ activeRecording = nil
670
+ return Response(ok: false, error: ErrorPayload(message: "failed to stop recording: \(error.localizedDescription)"))
671
+ }
352
672
  case .tap:
353
673
  if let text = command.text {
354
674
  if let element = findElement(app: activeApp, text: text) {
@@ -391,7 +711,9 @@ final class RunnerTests: XCTestCase {
391
711
  return Response(ok: false, error: ErrorPayload(message: "drag requires x, y, x2, and y2"))
392
712
  }
393
713
  let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0)
394
- dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
714
+ withTemporaryScrollIdleTimeoutIfSupported(activeApp) {
715
+ dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
716
+ }
395
717
  return Response(ok: true, data: DataPayload(message: "dragged"))
396
718
  case .dragSeries:
397
719
  guard let x = command.x, let y = command.y, let x2 = command.x2, let y2 = command.y2 else {
@@ -404,12 +726,14 @@ final class RunnerTests: XCTestCase {
404
726
  return Response(ok: false, error: ErrorPayload(message: "dragSeries pattern must be one-way or ping-pong"))
405
727
  }
406
728
  let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0)
407
- runSeries(count: count, pauseMs: pauseMs) { idx in
408
- let reverse = pattern == "ping-pong" && (idx % 2 == 1)
409
- if reverse {
410
- dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration)
411
- } else {
412
- dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
729
+ withTemporaryScrollIdleTimeoutIfSupported(activeApp) {
730
+ runSeries(count: count, pauseMs: pauseMs) { idx in
731
+ let reverse = pattern == "ping-pong" && (idx % 2 == 1)
732
+ if reverse {
733
+ dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration)
734
+ } else {
735
+ dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
736
+ }
413
737
  }
414
738
  }
415
739
  return Response(ok: true, data: DataPayload(message: "drag series"))
@@ -435,7 +759,9 @@ final class RunnerTests: XCTestCase {
435
759
  guard let direction = command.direction else {
436
760
  return Response(ok: false, error: ErrorPayload(message: "swipe requires direction"))
437
761
  }
438
- swipe(app: activeApp, direction: direction)
762
+ withTemporaryScrollIdleTimeoutIfSupported(activeApp) {
763
+ swipe(app: activeApp, direction: direction)
764
+ }
439
765
  return Response(ok: true, data: DataPayload(message: "swiped"))
440
766
  case .findText:
441
767
  guard let text = command.text else {
@@ -443,17 +769,6 @@ final class RunnerTests: XCTestCase {
443
769
  }
444
770
  let found = findElement(app: activeApp, text: text) != nil
445
771
  return Response(ok: true, data: DataPayload(found: found))
446
- case .listTappables:
447
- let elements = activeApp.descendants(matching: .any).allElementsBoundByIndex
448
- let labels = elements.compactMap { element -> String? in
449
- guard element.isHittable else { return nil }
450
- let label = element.label.trimmingCharacters(in: .whitespacesAndNewlines)
451
- if label.isEmpty { return nil }
452
- let identifier = element.identifier.trimmingCharacters(in: .whitespacesAndNewlines)
453
- return identifier.isEmpty ? label : "\(label) [\(identifier)]"
454
- }
455
- let unique = Array(Set(labels)).sorted()
456
- return Response(ok: true, data: DataPayload(items: unique))
457
772
  case .snapshot:
458
773
  let options = SnapshotOptions(
459
774
  interactiveOnly: command.interactiveOnly ?? false,
@@ -507,6 +822,37 @@ final class RunnerTests: XCTestCase {
507
822
  }
508
823
  }
509
824
 
825
+ private func captureRunnerFrame() -> UIImage? {
826
+ var image: UIImage?
827
+ let capture = {
828
+ let screenshot = XCUIScreen.main.screenshot()
829
+ image = screenshot.image
830
+ }
831
+ if Thread.isMainThread {
832
+ capture()
833
+ } else {
834
+ DispatchQueue.main.sync(execute: capture)
835
+ }
836
+ return image
837
+ }
838
+
839
+ private func stopRecordingIfNeeded() {
840
+ guard let recorder = activeRecording else { return }
841
+ do {
842
+ try recorder.stop()
843
+ } catch {
844
+ NSLog("AGENT_DEVICE_RUNNER_RECORD_STOP_FAILED=%@", String(describing: error))
845
+ }
846
+ activeRecording = nil
847
+ }
848
+
849
+ private func resolveRecordingOutPath(_ requestedOutPath: String) -> String {
850
+ let fileName = URL(fileURLWithPath: requestedOutPath).lastPathComponent
851
+ let fallbackName = "agent-device-recording-\(Int(Date().timeIntervalSince1970 * 1000)).mp4"
852
+ let safeFileName = fileName.isEmpty ? fallbackName : fileName
853
+ return (NSTemporaryDirectory() as NSString).appendingPathComponent(safeFileName)
854
+ }
855
+
510
856
  private func targetNeedsActivation(_ target: XCUIApplication) -> Bool {
511
857
  switch target.state {
512
858
  case .unknown, .notRunning, .runningBackground, .runningBackgroundSuspended:
@@ -532,10 +878,67 @@ final class RunnerTests: XCTestCase {
532
878
  return target
533
879
  }
534
880
 
535
- private func shouldRetryCommand(_ command: CommandType) -> Bool {
536
- switch command {
537
- case .tap, .longPress, .drag:
881
+ private func withTemporaryScrollIdleTimeoutIfSupported(
882
+ _ target: XCUIApplication,
883
+ operation: () -> Void
884
+ ) {
885
+ let setter = NSSelectorFromString("setWaitForIdleTimeout:")
886
+ guard target.responds(to: setter) else {
887
+ operation()
888
+ return
889
+ }
890
+ let previous = target.value(forKey: "waitForIdleTimeout") as? NSNumber
891
+ target.setValue(resolveScrollInteractionIdleTimeout(), forKey: "waitForIdleTimeout")
892
+ defer {
893
+ if let previous {
894
+ target.setValue(previous.doubleValue, forKey: "waitForIdleTimeout")
895
+ }
896
+ }
897
+ operation()
898
+ }
899
+
900
+ private func resolveScrollInteractionIdleTimeout() -> TimeInterval {
901
+ guard
902
+ let raw = ProcessInfo.processInfo.environment["AGENT_DEVICE_IOS_INTERACTION_IDLE_TIMEOUT"],
903
+ !raw.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
904
+ else {
905
+ return scrollInteractionIdleTimeoutDefault
906
+ }
907
+ guard let parsed = Double(raw), parsed >= 0 else {
908
+ return scrollInteractionIdleTimeoutDefault
909
+ }
910
+ return min(parsed, 30)
911
+ }
912
+
913
+ private func shouldRetryCommand(_ command: Command) -> Bool {
914
+ if isEnvTruthy("AGENT_DEVICE_RUNNER_DISABLE_READONLY_RETRY") {
915
+ return false
916
+ }
917
+ return isReadOnlyCommand(command)
918
+ }
919
+
920
+ private func shouldRetryException(_ command: Command, message: String) -> Bool {
921
+ guard shouldRetryCommand(command) else { return false }
922
+ let normalized = message.lowercased()
923
+ if normalized.contains("kaxerrorservernotfound") {
924
+ return true
925
+ }
926
+ if normalized.contains("main thread execution timed out") {
927
+ return true
928
+ }
929
+ if normalized.contains("timed out") && command.command == .snapshot {
930
+ return true
931
+ }
932
+ return false
933
+ }
934
+
935
+ private func isReadOnlyCommand(_ command: Command) -> Bool {
936
+ switch command.command {
937
+ case .findText, .snapshot:
538
938
  return true
939
+ case .alert:
940
+ let action = (command.action ?? "get").lowercased()
941
+ return action == "get"
539
942
  default:
540
943
  return false
541
944
  }
@@ -556,6 +959,15 @@ final class RunnerTests: XCTestCase {
556
959
  }
557
960
  }
558
961
 
962
+ private func isRunnerLifecycleCommand(_ command: CommandType) -> Bool {
963
+ switch command {
964
+ case .shutdown, .recordStop:
965
+ return true
966
+ default:
967
+ return false
968
+ }
969
+ }
970
+
559
971
  private func applyInteractionStabilizationIfNeeded() {
560
972
  if needsPostSnapshotInteractionDelay {
561
973
  sleepFor(postSnapshotInteractionDelay)
@@ -977,43 +1389,58 @@ final class RunnerTests: XCTestCase {
977
1389
  }
978
1390
 
979
1391
  let title = preferredSystemModalTitle(modal)
980
-
981
- var nodes: [SnapshotNode] = [
982
- makeSnapshotNode(
983
- element: modal,
984
- index: 0,
985
- type: "Alert",
986
- labelOverride: title,
987
- identifierOverride: modal.identifier,
988
- depth: 0,
989
- hittableOverride: true
990
- )
991
- ]
1392
+ guard let modalNode = safeMakeSnapshotNode(
1393
+ element: modal,
1394
+ index: 0,
1395
+ type: "Alert",
1396
+ labelOverride: title,
1397
+ identifierOverride: modal.identifier,
1398
+ depth: 0,
1399
+ hittableOverride: true
1400
+ ) else {
1401
+ return nil
1402
+ }
1403
+ var nodes: [SnapshotNode] = [modalNode]
992
1404
 
993
1405
  for action in actions {
994
- nodes.append(
995
- makeSnapshotNode(
996
- element: action,
997
- index: nodes.count,
998
- type: elementTypeName(action.elementType),
999
- depth: 1,
1000
- hittableOverride: true
1001
- )
1002
- )
1406
+ guard let actionNode = safeMakeSnapshotNode(
1407
+ element: action,
1408
+ index: nodes.count,
1409
+ type: elementTypeName(action.elementType),
1410
+ depth: 1,
1411
+ hittableOverride: true
1412
+ ) else {
1413
+ continue
1414
+ }
1415
+ nodes.append(actionNode)
1003
1416
  }
1004
1417
 
1005
1418
  return DataPayload(nodes: nodes, truncated: false)
1006
1419
  }
1007
1420
 
1008
1421
  private func firstBlockingSystemModal(in springboard: XCUIApplication) -> XCUIElement? {
1009
- for alert in springboard.alerts.allElementsBoundByIndex {
1010
- if isBlockingSystemModal(alert, in: springboard) {
1422
+ let disableSafeProbe = isEnvTruthy("AGENT_DEVICE_RUNNER_DISABLE_SAFE_MODAL_PROBE")
1423
+ let queryElements: (() -> [XCUIElement]) -> [XCUIElement] = { fetch in
1424
+ if disableSafeProbe {
1425
+ return fetch()
1426
+ }
1427
+ return self.safeElementsQuery(fetch)
1428
+ }
1429
+
1430
+ let alerts = queryElements {
1431
+ springboard.alerts.allElementsBoundByIndex
1432
+ }
1433
+ for alert in alerts {
1434
+ if safeIsBlockingSystemModal(alert, in: springboard) {
1011
1435
  return alert
1012
1436
  }
1013
1437
  }
1014
1438
 
1015
- for sheet in springboard.sheets.allElementsBoundByIndex {
1016
- if isBlockingSystemModal(sheet, in: springboard) {
1439
+ let sheets = queryElements {
1440
+ springboard.sheets.allElementsBoundByIndex
1441
+ }
1442
+ for sheet in sheets {
1443
+ if safeIsBlockingSystemModal(sheet, in: springboard) {
1017
1444
  return sheet
1018
1445
  }
1019
1446
  }
@@ -1021,6 +1448,36 @@ final class RunnerTests: XCTestCase {
1021
1448
  return nil
1022
1449
  }
1023
1450
 
1451
+ private func safeElementsQuery(_ fetch: () -> [XCUIElement]) -> [XCUIElement] {
1452
+ var elements: [XCUIElement] = []
1453
+ let exceptionMessage = RunnerObjCExceptionCatcher.catchException({
1454
+ elements = fetch()
1455
+ })
1456
+ if let exceptionMessage {
1457
+ NSLog(
1458
+ "AGENT_DEVICE_RUNNER_MODAL_QUERY_IGNORED_EXCEPTION=%@",
1459
+ exceptionMessage
1460
+ )
1461
+ return []
1462
+ }
1463
+ return elements
1464
+ }
1465
+
1466
+ private func safeIsBlockingSystemModal(_ element: XCUIElement, in springboard: XCUIApplication) -> Bool {
1467
+ var isBlocking = false
1468
+ let exceptionMessage = RunnerObjCExceptionCatcher.catchException({
1469
+ isBlocking = isBlockingSystemModal(element, in: springboard)
1470
+ })
1471
+ if let exceptionMessage {
1472
+ NSLog(
1473
+ "AGENT_DEVICE_RUNNER_MODAL_CHECK_IGNORED_EXCEPTION=%@",
1474
+ exceptionMessage
1475
+ )
1476
+ return false
1477
+ }
1478
+ return isBlocking
1479
+ }
1480
+
1024
1481
  private func isBlockingSystemModal(_ element: XCUIElement, in springboard: XCUIApplication) -> Bool {
1025
1482
  guard element.exists else { return false }
1026
1483
  let frame = element.frame
@@ -1038,18 +1495,36 @@ final class RunnerTests: XCTestCase {
1038
1495
  private func actionableElements(in element: XCUIElement) -> [XCUIElement] {
1039
1496
  var seen = Set<String>()
1040
1497
  var actions: [XCUIElement] = []
1041
- let descendants = element.descendants(matching: .any).allElementsBoundByIndex
1498
+ let descendants = safeElementsQuery {
1499
+ element.descendants(matching: .any).allElementsBoundByIndex
1500
+ }
1042
1501
  for candidate in descendants {
1043
- if !candidate.exists || !candidate.isHittable { continue }
1044
- if !actionableTypes.contains(candidate.elementType) { continue }
1502
+ if !safeIsActionableCandidate(candidate, seen: &seen) { continue }
1503
+ actions.append(candidate)
1504
+ }
1505
+ return actions
1506
+ }
1507
+
1508
+ private func safeIsActionableCandidate(_ candidate: XCUIElement, seen: inout Set<String>) -> Bool {
1509
+ var include = false
1510
+ let exceptionMessage = RunnerObjCExceptionCatcher.catchException({
1511
+ if !candidate.exists || !candidate.isHittable { return }
1512
+ if !actionableTypes.contains(candidate.elementType) { return }
1045
1513
  let frame = candidate.frame
1046
- if frame.isNull || frame.isEmpty { continue }
1514
+ if frame.isNull || frame.isEmpty { return }
1047
1515
  let key = "\(candidate.elementType.rawValue)-\(frame.origin.x)-\(frame.origin.y)-\(frame.size.width)-\(frame.size.height)-\(candidate.label)"
1048
- if seen.contains(key) { continue }
1516
+ if seen.contains(key) { return }
1049
1517
  seen.insert(key)
1050
- actions.append(candidate)
1518
+ include = true
1519
+ })
1520
+ if let exceptionMessage {
1521
+ NSLog(
1522
+ "AGENT_DEVICE_RUNNER_MODAL_ACTION_IGNORED_EXCEPTION=%@",
1523
+ exceptionMessage
1524
+ )
1525
+ return false
1051
1526
  }
1052
- return actions
1527
+ return include
1053
1528
  }
1054
1529
 
1055
1530
  private func preferredSystemModalTitle(_ element: XCUIElement) -> String {
@@ -1088,6 +1563,37 @@ final class RunnerTests: XCTestCase {
1088
1563
  )
1089
1564
  }
1090
1565
 
1566
+ private func safeMakeSnapshotNode(
1567
+ element: XCUIElement,
1568
+ index: Int,
1569
+ type: String,
1570
+ labelOverride: String? = nil,
1571
+ identifierOverride: String? = nil,
1572
+ depth: Int,
1573
+ hittableOverride: Bool? = nil
1574
+ ) -> SnapshotNode? {
1575
+ var node: SnapshotNode?
1576
+ let exceptionMessage = RunnerObjCExceptionCatcher.catchException({
1577
+ node = makeSnapshotNode(
1578
+ element: element,
1579
+ index: index,
1580
+ type: type,
1581
+ labelOverride: labelOverride,
1582
+ identifierOverride: identifierOverride,
1583
+ depth: depth,
1584
+ hittableOverride: hittableOverride
1585
+ )
1586
+ })
1587
+ if let exceptionMessage {
1588
+ NSLog(
1589
+ "AGENT_DEVICE_RUNNER_MODAL_NODE_IGNORED_EXCEPTION=%@",
1590
+ exceptionMessage
1591
+ )
1592
+ return nil
1593
+ }
1594
+ return node
1595
+ }
1596
+
1091
1597
  private func snapshotRect(from frame: CGRect) -> SnapshotRect {
1092
1598
  return SnapshotRect(
1093
1599
  x: Double(frame.origin.x),
@@ -1213,6 +1719,18 @@ private func resolveRunnerPort() -> UInt16 {
1213
1719
  return 0
1214
1720
  }
1215
1721
 
1722
+ private func isEnvTruthy(_ name: String) -> Bool {
1723
+ guard let raw = ProcessInfo.processInfo.environment[name] else {
1724
+ return false
1725
+ }
1726
+ switch raw.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() {
1727
+ case "1", "true", "yes", "on":
1728
+ return true
1729
+ default:
1730
+ return false
1731
+ }
1732
+ }
1733
+
1216
1734
  enum CommandType: String, Codable {
1217
1735
  case tap
1218
1736
  case tapSeries
@@ -1222,13 +1740,14 @@ enum CommandType: String, Codable {
1222
1740
  case type
1223
1741
  case swipe
1224
1742
  case findText
1225
- case listTappables
1226
1743
  case snapshot
1227
1744
  case back
1228
1745
  case home
1229
1746
  case appSwitcher
1230
1747
  case alert
1231
1748
  case pinch
1749
+ case recordStart
1750
+ case recordStop
1232
1751
  case shutdown
1233
1752
  }
1234
1753
 
@@ -1257,6 +1776,8 @@ struct Command: Codable {
1257
1776
  let durationMs: Double?
1258
1777
  let direction: SwipeDirection?
1259
1778
  let scale: Double?
1779
+ let outPath: String?
1780
+ let fps: Int?
1260
1781
  let interactiveOnly: Bool?
1261
1782
  let compact: Bool?
1262
1783
  let depth: Int?