agent-device 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -401,6 +401,7 @@
401
401
  STRING_CATALOG_GENERATE_SYMBOLS = NO;
402
402
  SWIFT_APPROACHABLE_CONCURRENCY = YES;
403
403
  SWIFT_EMIT_LOC_STRINGS = NO;
404
+ SWIFT_OBJC_BRIDGING_HEADER = "AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h";
404
405
  SWIFT_UPCOMING_FEATURE_MEMBER_IMPORT_VISIBILITY = YES;
405
406
  SWIFT_VERSION = 5.0;
406
407
  TARGETED_DEVICE_FAMILY = "1,2";
@@ -422,6 +423,7 @@
422
423
  STRING_CATALOG_GENERATE_SYMBOLS = NO;
423
424
  SWIFT_APPROACHABLE_CONCURRENCY = YES;
424
425
  SWIFT_EMIT_LOC_STRINGS = NO;
426
+ SWIFT_OBJC_BRIDGING_HEADER = "AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h";
425
427
  SWIFT_UPCOMING_FEATURE_MEMBER_IMPORT_VISIBILITY = YES;
426
428
  SWIFT_VERSION = 5.0;
427
429
  TARGETED_DEVICE_FAMILY = "1,2";
@@ -0,0 +1,11 @@
1
+ #import <Foundation/Foundation.h>
2
+
3
+ NS_ASSUME_NONNULL_BEGIN
4
+
5
+ @interface RunnerObjCExceptionCatcher : NSObject
6
+
7
+ + (NSString * _Nullable)catchException:(NS_NOESCAPE dispatch_block_t)tryBlock;
8
+
9
+ @end
10
+
11
+ NS_ASSUME_NONNULL_END
@@ -0,0 +1,16 @@
1
+ #import "RunnerObjCExceptionCatcher.h"
2
+
3
+ @implementation RunnerObjCExceptionCatcher
4
+
5
+ + (NSString * _Nullable)catchException:(NS_NOESCAPE dispatch_block_t)tryBlock {
6
+ @try {
7
+ tryBlock();
8
+ return nil;
9
+ } @catch (NSException *exception) {
10
+ NSString *name = exception.name ?: @"NSException";
11
+ NSString *reason = exception.reason ?: @"Unhandled XCTest exception";
12
+ return [NSString stringWithFormat:@"%@: %@", name, reason];
13
+ }
14
+ }
15
+
16
+ @end
@@ -9,6 +9,18 @@ import XCTest
9
9
  import Network
10
10
 
11
11
  final class RunnerTests: XCTestCase {
12
+ private enum RunnerErrorDomain {
13
+ static let general = "AgentDeviceRunner"
14
+ static let exception = "AgentDeviceRunner.NSException"
15
+ }
16
+
17
+ private enum RunnerErrorCode {
18
+ static let noResponseFromMainThread = 1
19
+ static let commandReturnedNoResponse = 2
20
+ static let mainThreadExecutionTimedOut = 3
21
+ static let objcException = 1
22
+ }
23
+
12
24
  private static let springboardBundleId = "com.apple.springboard"
13
25
  private var listener: NWListener?
14
26
  private var port: UInt16 = 0
@@ -20,6 +32,12 @@ final class RunnerTests: XCTestCase {
20
32
  private let maxRequestBytes = 2 * 1024 * 1024
21
33
  private let maxSnapshotElements = 600
22
34
  private let fastSnapshotLimit = 300
35
+ private let mainThreadExecutionTimeout: TimeInterval = 30
36
+ private let retryCooldown: TimeInterval = 0.2
37
+ private let postSnapshotInteractionDelay: TimeInterval = 0.2
38
+ private let firstInteractionAfterActivateDelay: TimeInterval = 0.25
39
+ private var needsPostSnapshotInteractionDelay = false
40
+ private var needsFirstInteractionDelay = false
23
41
  private let interactiveTypes: Set<XCUIElement.ElementType> = [
24
42
  .button,
25
43
  .cell,
@@ -49,7 +67,7 @@ final class RunnerTests: XCTestCase {
49
67
  ]
50
68
 
51
69
  override func setUp() {
52
- continueAfterFailure = false
70
+ continueAfterFailure = true
53
71
  }
54
72
 
55
73
  @MainActor
@@ -192,47 +210,141 @@ final class RunnerTests: XCTestCase {
192
210
 
193
211
  private func execute(command: Command) throws -> Response {
194
212
  if Thread.isMainThread {
195
- return try executeOnMain(command: command)
213
+ return try executeOnMainSafely(command: command)
196
214
  }
197
215
  var result: Result<Response, Error>?
198
216
  let semaphore = DispatchSemaphore(value: 0)
199
217
  DispatchQueue.main.async {
200
218
  do {
201
- result = .success(try self.executeOnMain(command: command))
219
+ result = .success(try self.executeOnMainSafely(command: command))
202
220
  } catch {
203
221
  result = .failure(error)
204
222
  }
205
223
  semaphore.signal()
206
224
  }
207
- semaphore.wait()
225
+ let waitResult = semaphore.wait(timeout: .now() + mainThreadExecutionTimeout)
226
+ if waitResult == .timedOut {
227
+ // The main queue work may still be running; we stop waiting and report timeout.
228
+ throw NSError(
229
+ domain: RunnerErrorDomain.general,
230
+ code: RunnerErrorCode.mainThreadExecutionTimedOut,
231
+ userInfo: [NSLocalizedDescriptionKey: "main thread execution timed out"]
232
+ )
233
+ }
208
234
  switch result {
209
235
  case .success(let response):
210
236
  return response
211
237
  case .failure(let error):
212
238
  throw error
213
239
  case .none:
214
- throw NSError(domain: "AgentDeviceRunner", code: 1, userInfo: [NSLocalizedDescriptionKey: "no response from main thread"])
240
+ throw NSError(
241
+ domain: RunnerErrorDomain.general,
242
+ code: RunnerErrorCode.noResponseFromMainThread,
243
+ userInfo: [NSLocalizedDescriptionKey: "no response from main thread"]
244
+ )
245
+ }
246
+ }
247
+
248
+ private func executeOnMainSafely(command: Command) throws -> Response {
249
+ var hasRetried = false
250
+ while true {
251
+ var response: Response?
252
+ var swiftError: Error?
253
+ let exceptionMessage = RunnerObjCExceptionCatcher.catchException({
254
+ do {
255
+ response = try self.executeOnMain(command: command)
256
+ } catch {
257
+ swiftError = error
258
+ }
259
+ })
260
+
261
+ if let exceptionMessage {
262
+ currentApp = nil
263
+ currentBundleId = nil
264
+ if !hasRetried, shouldRetryCommand(command.command) {
265
+ hasRetried = true
266
+ sleepFor(retryCooldown)
267
+ continue
268
+ }
269
+ throw NSError(
270
+ domain: RunnerErrorDomain.exception,
271
+ code: RunnerErrorCode.objcException,
272
+ userInfo: [NSLocalizedDescriptionKey: exceptionMessage]
273
+ )
274
+ }
275
+ if let swiftError {
276
+ throw swiftError
277
+ }
278
+ guard let response else {
279
+ throw NSError(
280
+ domain: RunnerErrorDomain.general,
281
+ code: RunnerErrorCode.commandReturnedNoResponse,
282
+ userInfo: [NSLocalizedDescriptionKey: "command returned no response"]
283
+ )
284
+ }
285
+ if !hasRetried, shouldRetryCommand(command.command), shouldRetryResponse(response) {
286
+ hasRetried = true
287
+ currentApp = nil
288
+ currentBundleId = nil
289
+ sleepFor(retryCooldown)
290
+ continue
291
+ }
292
+ return response
215
293
  }
216
294
  }
217
295
 
218
296
  private func executeOnMain(command: Command) throws -> Response {
297
+ if command.command == .shutdown {
298
+ return Response(ok: true, data: DataPayload(message: "shutdown"))
299
+ }
300
+
219
301
  let normalizedBundleId = command.appBundleId?
220
302
  .trimmingCharacters(in: .whitespacesAndNewlines)
221
303
  let requestedBundleId = (normalizedBundleId?.isEmpty == true) ? nil : normalizedBundleId
222
- if let bundleId = requestedBundleId, currentBundleId != bundleId {
223
- let target = XCUIApplication(bundleIdentifier: bundleId)
224
- NSLog("AGENT_DEVICE_RUNNER_ACTIVATE bundle=%@ state=%d", bundleId, target.state.rawValue)
225
- // activate avoids terminating and relaunching the target app
226
- target.activate()
227
- currentApp = target
228
- currentBundleId = bundleId
229
- } else if requestedBundleId == nil {
304
+ if let bundleId = requestedBundleId {
305
+ if currentBundleId != bundleId || currentApp == nil {
306
+ _ = activateTarget(bundleId: bundleId, reason: "bundle_changed")
307
+ }
308
+ } else {
230
309
  // Do not reuse stale bundle targets when the caller does not explicitly request one.
231
310
  currentApp = nil
232
311
  currentBundleId = nil
233
312
  }
234
- let activeApp = currentApp ?? app
235
- _ = activeApp.waitForExistence(timeout: 5)
313
+
314
+ var activeApp = currentApp ?? app
315
+ if let bundleId = requestedBundleId, targetNeedsActivation(activeApp) {
316
+ activeApp = activateTarget(bundleId: bundleId, reason: "stale_target")
317
+ } else if requestedBundleId == nil, targetNeedsActivation(activeApp) {
318
+ app.activate()
319
+ activeApp = app
320
+ }
321
+
322
+ if !activeApp.waitForExistence(timeout: 5) {
323
+ if let bundleId = requestedBundleId {
324
+ activeApp = activateTarget(bundleId: bundleId, reason: "missing_after_wait")
325
+ guard activeApp.waitForExistence(timeout: 5) else {
326
+ return Response(ok: false, error: ErrorPayload(message: "app '\(bundleId)' is not available"))
327
+ }
328
+ } else {
329
+ return Response(ok: false, error: ErrorPayload(message: "runner app is not available"))
330
+ }
331
+ }
332
+
333
+ if isInteractionCommand(command.command) {
334
+ if let bundleId = requestedBundleId, activeApp.state != .runningForeground {
335
+ activeApp = activateTarget(bundleId: bundleId, reason: "interaction_foreground_guard")
336
+ } else if requestedBundleId == nil, activeApp.state != .runningForeground {
337
+ app.activate()
338
+ activeApp = app
339
+ }
340
+ if !activeApp.waitForExistence(timeout: 2) {
341
+ if let bundleId = requestedBundleId {
342
+ return Response(ok: false, error: ErrorPayload(message: "app '\(bundleId)' is not available"))
343
+ }
344
+ return Response(ok: false, error: ErrorPayload(message: "runner app is not available"))
345
+ }
346
+ applyInteractionStabilizationIfNeeded()
347
+ }
236
348
 
237
349
  switch command.command {
238
350
  case .shutdown:
@@ -250,6 +362,23 @@ final class RunnerTests: XCTestCase {
250
362
  return Response(ok: true, data: DataPayload(message: "tapped"))
251
363
  }
252
364
  return Response(ok: false, error: ErrorPayload(message: "tap requires text or x/y"))
365
+ case .tapSeries:
366
+ guard let x = command.x, let y = command.y else {
367
+ return Response(ok: false, error: ErrorPayload(message: "tapSeries requires x and y"))
368
+ }
369
+ let count = max(Int(command.count ?? 1), 1)
370
+ let intervalMs = max(command.intervalMs ?? 0, 0)
371
+ let doubleTap = command.doubleTap ?? false
372
+ if doubleTap {
373
+ runSeries(count: count, pauseMs: intervalMs) { _ in
374
+ doubleTapAt(app: activeApp, x: x, y: y)
375
+ }
376
+ return Response(ok: true, data: DataPayload(message: "tap series"))
377
+ }
378
+ runSeries(count: count, pauseMs: intervalMs) { _ in
379
+ tapAt(app: activeApp, x: x, y: y)
380
+ }
381
+ return Response(ok: true, data: DataPayload(message: "tap series"))
253
382
  case .longPress:
254
383
  guard let x = command.x, let y = command.y else {
255
384
  return Response(ok: false, error: ErrorPayload(message: "longPress requires x and y"))
@@ -264,6 +393,26 @@ final class RunnerTests: XCTestCase {
264
393
  let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0)
265
394
  dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
266
395
  return Response(ok: true, data: DataPayload(message: "dragged"))
396
+ case .dragSeries:
397
+ guard let x = command.x, let y = command.y, let x2 = command.x2, let y2 = command.y2 else {
398
+ return Response(ok: false, error: ErrorPayload(message: "dragSeries requires x, y, x2, and y2"))
399
+ }
400
+ let count = max(Int(command.count ?? 1), 1)
401
+ let pauseMs = max(command.pauseMs ?? 0, 0)
402
+ let pattern = command.pattern ?? "one-way"
403
+ if pattern != "one-way" && pattern != "ping-pong" {
404
+ return Response(ok: false, error: ErrorPayload(message: "dragSeries pattern must be one-way or ping-pong"))
405
+ }
406
+ let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0)
407
+ runSeries(count: count, pauseMs: pauseMs) { idx in
408
+ let reverse = pattern == "ping-pong" && (idx % 2 == 1)
409
+ if reverse {
410
+ dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration)
411
+ } else {
412
+ dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
413
+ }
414
+ }
415
+ return Response(ok: true, data: DataPayload(message: "drag series"))
267
416
  case .type:
268
417
  guard let text = command.text else {
269
418
  return Response(ok: false, error: ErrorPayload(message: "type requires text"))
@@ -314,8 +463,10 @@ final class RunnerTests: XCTestCase {
314
463
  raw: command.raw ?? false,
315
464
  )
316
465
  if options.raw {
466
+ needsPostSnapshotInteractionDelay = true
317
467
  return Response(ok: true, data: snapshotRaw(app: activeApp, options: options))
318
468
  }
469
+ needsPostSnapshotInteractionDelay = true
319
470
  return Response(ok: true, data: snapshotFast(app: activeApp, options: options))
320
471
  case .back:
321
472
  if tapNavigationBack(app: activeApp) {
@@ -356,6 +507,71 @@ final class RunnerTests: XCTestCase {
356
507
  }
357
508
  }
358
509
 
510
+ private func targetNeedsActivation(_ target: XCUIApplication) -> Bool {
511
+ switch target.state {
512
+ case .unknown, .notRunning, .runningBackground, .runningBackgroundSuspended:
513
+ return true
514
+ default:
515
+ return false
516
+ }
517
+ }
518
+
519
+ private func activateTarget(bundleId: String, reason: String) -> XCUIApplication {
520
+ let target = XCUIApplication(bundleIdentifier: bundleId)
521
+ NSLog(
522
+ "AGENT_DEVICE_RUNNER_ACTIVATE bundle=%@ state=%d reason=%@",
523
+ bundleId,
524
+ target.state.rawValue,
525
+ reason
526
+ )
527
+ // activate avoids terminating and relaunching the target app
528
+ target.activate()
529
+ currentApp = target
530
+ currentBundleId = bundleId
531
+ needsFirstInteractionDelay = true
532
+ return target
533
+ }
534
+
535
+ private func shouldRetryCommand(_ command: CommandType) -> Bool {
536
+ switch command {
537
+ case .tap, .longPress, .drag:
538
+ return true
539
+ default:
540
+ return false
541
+ }
542
+ }
543
+
544
+ private func shouldRetryResponse(_ response: Response) -> Bool {
545
+ guard response.ok == false else { return false }
546
+ guard let message = response.error?.message.lowercased() else { return false }
547
+ return message.contains("is not available")
548
+ }
549
+
550
+ private func isInteractionCommand(_ command: CommandType) -> Bool {
551
+ switch command {
552
+ case .tap, .longPress, .drag, .type, .swipe, .back, .appSwitcher, .pinch:
553
+ return true
554
+ default:
555
+ return false
556
+ }
557
+ }
558
+
559
+ private func applyInteractionStabilizationIfNeeded() {
560
+ if needsPostSnapshotInteractionDelay {
561
+ sleepFor(postSnapshotInteractionDelay)
562
+ needsPostSnapshotInteractionDelay = false
563
+ }
564
+ if needsFirstInteractionDelay {
565
+ sleepFor(firstInteractionAfterActivateDelay)
566
+ needsFirstInteractionDelay = false
567
+ }
568
+ }
569
+
570
+ private func sleepFor(_ delay: TimeInterval) {
571
+ guard delay > 0 else { return }
572
+ usleep(useconds_t(delay * 1_000_000))
573
+ }
574
+
359
575
  private func tapNavigationBack(app: XCUIApplication) -> Bool {
360
576
  let buttons = app.navigationBars.buttons.allElementsBoundByIndex
361
577
  if let back = buttons.first(where: { $0.isHittable }) {
@@ -443,6 +659,12 @@ final class RunnerTests: XCTestCase {
443
659
  coordinate.tap()
444
660
  }
445
661
 
662
+ private func doubleTapAt(app: XCUIApplication, x: Double, y: Double) {
663
+ let origin = app.coordinate(withNormalizedOffset: CGVector(dx: 0, dy: 0))
664
+ let coordinate = origin.withOffset(CGVector(dx: x, dy: y))
665
+ coordinate.doubleTap()
666
+ }
667
+
446
668
  private func longPressAt(app: XCUIApplication, x: Double, y: Double, duration: TimeInterval) {
447
669
  let origin = app.coordinate(withNormalizedOffset: CGVector(dx: 0, dy: 0))
448
670
  let coordinate = origin.withOffset(CGVector(dx: x, dy: y))
@@ -463,6 +685,17 @@ final class RunnerTests: XCTestCase {
463
685
  start.press(forDuration: holdDuration, thenDragTo: end)
464
686
  }
465
687
 
688
+ private func runSeries(count: Int, pauseMs: Double, operation: (Int) -> Void) {
689
+ let total = max(count, 1)
690
+ let pause = max(pauseMs, 0)
691
+ for idx in 0..<total {
692
+ operation(idx)
693
+ if idx < total - 1 && pause > 0 {
694
+ Thread.sleep(forTimeInterval: pause / 1000.0)
695
+ }
696
+ }
697
+ }
698
+
466
699
  private func swipe(app: XCUIApplication, direction: SwipeDirection) {
467
700
  let target = app.windows.firstMatch.exists ? app.windows.firstMatch : app
468
701
  let start = target.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.2))
@@ -982,8 +1215,10 @@ private func resolveRunnerPort() -> UInt16 {
982
1215
 
983
1216
  enum CommandType: String, Codable {
984
1217
  case tap
1218
+ case tapSeries
985
1219
  case longPress
986
1220
  case drag
1221
+ case dragSeries
987
1222
  case type
988
1223
  case swipe
989
1224
  case findText
@@ -1012,6 +1247,11 @@ struct Command: Codable {
1012
1247
  let action: String?
1013
1248
  let x: Double?
1014
1249
  let y: Double?
1250
+ let count: Double?
1251
+ let intervalMs: Double?
1252
+ let doubleTap: Bool?
1253
+ let pauseMs: Double?
1254
+ let pattern: String?
1015
1255
  let x2: Double?
1016
1256
  let y2: Double?
1017
1257
  let durationMs: Double?
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-device",
3
- "version": "0.5.0",
3
+ "version": "0.5.2",
4
4
  "description": "Unified control plane for physical and virtual devices via an agent-driven CLI.",
5
5
  "license": "MIT",
6
6
  "author": "Callstack",
@@ -12,7 +12,7 @@ For agent-driven exploration: use refs. For deterministic replay scripts: use se
12
12
  ```bash
13
13
  agent-device open Settings --platform ios
14
14
  agent-device snapshot -i
15
- agent-device click @e3
15
+ agent-device press @e3
16
16
  agent-device wait text "Camera"
17
17
  agent-device alert wait 10000
18
18
  agent-device fill @e5 "test"
@@ -29,7 +29,7 @@ npx -y agent-device
29
29
 
30
30
  1. Open app or deep link: `open [app|url] [url]` (`open` handles target selection + boot/activation in the normal flow)
31
31
  2. Snapshot: `snapshot` to get refs from accessibility tree
32
- 3. Interact using refs (`click @ref`, `fill @ref "text"`)
32
+ 3. Interact using refs (`press @ref`, `fill @ref "text"`; `click` is an alias of `press`)
33
33
  4. Re-snapshot after navigation/UI changes
34
34
  5. Close session when done
35
35
 
@@ -109,13 +109,15 @@ agent-device appstate
109
109
  ### Interactions (use @refs from snapshot)
110
110
 
111
111
  ```bash
112
- agent-device click @e1
112
+ agent-device press @e1 # Canonical tap command (`click` is an alias)
113
113
  agent-device focus @e2
114
114
  agent-device fill @e2 "text" # Clear then type (Android: verifies value and retries once on mismatch)
115
115
  agent-device type "text" # Type into focused field without clearing
116
116
  agent-device press 300 500 # Tap by coordinates
117
117
  agent-device press 300 500 --count 12 --interval-ms 45
118
118
  agent-device press 300 500 --count 6 --hold-ms 120 --interval-ms 30 --jitter-px 2
119
+ agent-device press @e1 --count 5 # Repeat taps on the same target
120
+ agent-device press @e1 --count 5 --double-tap # Use double-tap gesture per iteration
119
121
  agent-device swipe 540 1500 540 500 120
120
122
  agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong
121
123
  agent-device long-press 300 500 800 # Long press (where supported)
@@ -155,6 +157,50 @@ agent-device replay -u ./session.ad # Update selector drift and rewrite .ad sc
155
157
  `--save-script` path is a file path; parent directories are created automatically.
156
158
  For ambiguous bare values, use `--save-script=workflow.ad` or `./workflow.ad`.
157
159
 
160
+ ### Fast batching (JSON steps)
161
+
162
+ Use `batch` when an agent already has a known short sequence and wants fewer orchestration round trips.
163
+
164
+ ```bash
165
+ agent-device batch \
166
+ --session sim \
167
+ --platform ios \
168
+ --udid 00008150-001849640CF8401C \
169
+ --steps-file /tmp/batch-steps.json \
170
+ --json
171
+ ```
172
+
173
+ Inline JSON works for small payloads:
174
+
175
+ ```bash
176
+ agent-device batch --steps '[{"command":"open","positionals":["settings"]},{"command":"wait","positionals":["100"]}]'
177
+ ```
178
+
179
+ Step format:
180
+
181
+ ```json
182
+ [
183
+ { "command": "open", "positionals": ["settings"], "flags": {} },
184
+ { "command": "wait", "positionals": ["label=\"Privacy & Security\"", "3000"], "flags": {} },
185
+ { "command": "click", "positionals": ["label=\"Privacy & Security\""], "flags": {} },
186
+ { "command": "get", "positionals": ["text", "label=\"Tracking\""], "flags": {} }
187
+ ]
188
+ ```
189
+
190
+ Batch best practices:
191
+
192
+ - Batch one screen-local flow at a time.
193
+ - Add sync guards (`wait`, `is exists`) after mutating steps (`open`, `click`, `fill`, `swipe`).
194
+ - Treat prior refs/snapshot assumptions as stale after UI mutations.
195
+ - Prefer `--steps-file` over inline JSON.
196
+ - Keep batches moderate (about 5-20 steps).
197
+ - Use failure context (`step`, `partialResults`) to replan from the failed step.
198
+
199
+ Stale accessibility tree note:
200
+
201
+ - Rapid mutations can outrun accessibility tree updates.
202
+ - Mitigate with explicit waits and phase splitting (navigate, verify/extract, cleanup).
203
+
158
204
  ### Trace logs (XCTest)
159
205
 
160
206
  ```bash
@@ -178,7 +224,10 @@ agent-device apps --platform android --user-installed
178
224
 
179
225
  ## Best practices
180
226
 
181
- - `press` supports gesture series controls: `--count`, `--interval-ms`, `--hold-ms`, `--jitter-px`.
227
+ - `press` is the canonical tap command; `click` is an alias with the same behavior.
228
+ - `press` (and `click`) accepts `x y`, `@ref`, and selector targets.
229
+ - `press`/`click` support gesture series controls: `--count`, `--interval-ms`, `--hold-ms`, `--jitter-px`, `--double-tap`.
230
+ - `--double-tap` cannot be combined with `--hold-ms` or `--jitter-px`.
182
231
  - `swipe` supports coordinate + timing controls and repeat patterns: `swipe x1 y1 x2 y2 [durationMs] --count --pause-ms --pattern`.
183
232
  - `swipe` timing is platform-safe: Android uses requested duration; iOS uses normalized safe timing to avoid long-press side effects.
184
233
  - Pinch (`pinch <scale> [x y]`) is iOS simulator-only; scale > 1 zooms in, < 1 zooms out.
@@ -208,3 +257,4 @@ agent-device apps --platform android --user-installed
208
257
  - [references/permissions.md](references/permissions.md)
209
258
  - [references/video-recording.md](references/video-recording.md)
210
259
  - [references/coordinate-system.md](references/coordinate-system.md)
260
+ - [references/batching.md](references/batching.md)
@@ -0,0 +1,79 @@
1
+ # Batching
2
+
3
+ ## When to use batch
4
+
5
+ - The agent already knows a short sequence of commands.
6
+ - Steps belong to one logical screen flow.
7
+ - You want one result object with per-step timing and failure context.
8
+
9
+ ## When not to use batch
10
+
11
+ - Flows are unrelated and should be retried independently.
12
+ - The workflow is highly dynamic and requires replanning after each step.
13
+ - You need human approvals between steps.
14
+
15
+ ## CLI patterns
16
+
17
+ From file:
18
+
19
+ ```bash
20
+ agent-device batch --session sim --platform ios --steps-file /tmp/batch-steps.json --json
21
+ ```
22
+
23
+ Inline (small payloads only):
24
+
25
+ ```bash
26
+ agent-device batch --steps '[{"command":"open","positionals":["settings"]}]'
27
+ ```
28
+
29
+ ## Step payload contract
30
+
31
+ ```json
32
+ [
33
+ { "command": "open", "positionals": ["settings"], "flags": {} },
34
+ { "command": "wait", "positionals": ["label=\"Privacy & Security\"", "3000"], "flags": {} },
35
+ { "command": "click", "positionals": ["label=\"Privacy & Security\""], "flags": {} },
36
+ { "command": "get", "positionals": ["text", "label=\"Tracking\""], "flags": {} }
37
+ ]
38
+ ```
39
+
40
+ Rules:
41
+
42
+ - `positionals` optional, defaults to `[]`.
43
+ - `flags` optional, defaults to `{}`.
44
+ - nested `batch` and `replay` are rejected.
45
+ - stop-on-first-error is the supported mode (`--on-error stop`).
46
+
47
+ ## Response handling
48
+
49
+ Success includes:
50
+
51
+ - `total`, `executed`, `totalDurationMs`
52
+ - `results[]` entries with `step`, `command`, `durationMs`, and optional `data`
53
+
54
+ Failure includes:
55
+
56
+ - `details.step`
57
+ - `details.command`
58
+ - `details.executed`
59
+ - `details.partialResults`
60
+
61
+ Use these fields to replan from the first failing step.
62
+
63
+ ## Common error categories and agent actions
64
+
65
+ - `INVALID_ARGS`: payload/step shape issue; fix payload and retry.
66
+ - `SESSION_NOT_FOUND`: open or select the correct session, then retry.
67
+ - `UNSUPPORTED_OPERATION`: switch command/target to supported operation.
68
+ - `AMBIGUOUS_MATCH`: refine selector/locator, then retry failed step.
69
+ - `COMMAND_FAILED`: add sync guard (`wait`, `is exists`) and retry from failed step.
70
+
71
+ ## Reliability guardrails
72
+
73
+ - Add sync guards after mutating steps.
74
+ - Assume snapshot/ref drift after navigation.
75
+ - Keep batch size moderate (about 5-20 steps).
76
+ - Split long workflows into phases:
77
+ 1. navigate
78
+ 2. verify/extract
79
+ 3. cleanup
@@ -22,6 +22,10 @@ If daemon startup fails with stale metadata hints, clean stale files and retry:
22
22
  - `~/.agent-device/daemon.json`
23
23
  - `~/.agent-device/daemon.lock`
24
24
 
25
+ ## iOS: "Allow Paste" dialog
26
+
27
+ iOS 16+ shows an "Allow Paste" prompt when an app reads the system pasteboard. Under XCUITest (which `agent-device` uses), this prompt is suppressed by the testing runtime. Use `xcrun simctl pbcopy booted` to set clipboard content directly on the simulator instead.
28
+
25
29
  ## Simulator troubleshooting
26
30
 
27
31
  - If snapshots return 0 nodes, restart Simulator and re-open the app.
@@ -3,6 +3,7 @@
3
3
  ## Purpose
4
4
 
5
5
  Refs are useful for discovery/debugging. For deterministic scripts, use selectors.
6
+ For tap interactions, `press` is canonical; `click` is an equivalent alias.
6
7
 
7
8
  ## Snapshot
8
9
 
@@ -24,14 +25,14 @@ App: com.apple.Preferences
24
25
  ## Using refs (discovery/debug)
25
26
 
26
27
  ```bash
27
- agent-device click @e2
28
+ agent-device press @e2
28
29
  agent-device fill @e5 "test"
29
30
  ```
30
31
 
31
32
  ## Using selectors (deterministic)
32
33
 
33
34
  ```bash
34
- agent-device click 'id="camera_row" || label="Camera" role=button'
35
+ agent-device press 'id="camera_row" || label="Camera" role=button'
35
36
  agent-device fill 'id="search_input" editable=true' "test"
36
37
  agent-device is visible 'id="camera_settings_anchor"'
37
38
  ```
package/dist/src/797.js DELETED
@@ -1 +0,0 @@
1
- import e,{promises as t}from"node:fs";import r from"node:path";import{fileURLToPath as n,pathToFileURL as o}from"node:url";import{spawn as i,spawnSync as u}from"node:child_process";class s extends Error{code;details;cause;constructor(e,t,r,n){super(t),this.code=e,this.details=r,this.cause=n}}function d(e){return e instanceof s?e:e instanceof Error?new s("UNKNOWN",e.message,void 0,e):new s("UNKNOWN","Unknown error",{err:e})}function a(){try{let t=l();return JSON.parse(e.readFileSync(r.join(t,"package.json"),"utf8")).version??"0.0.0"}catch{return"0.0.0"}}function l(){let t=r.dirname(n(import.meta.url)),o=t;for(let t=0;t<6;t+=1){let t=r.join(o,"package.json");if(e.existsSync(t))return o;o=r.dirname(o)}return t}async function c(e,t,r={}){return new Promise((n,o)=>{let u=i(e,t,{cwd:r.cwd,env:r.env,stdio:["pipe","pipe","pipe"]}),d="",a=r.binaryStdout?Buffer.alloc(0):void 0,l="",c=!1,f=h(r.timeoutMs),m=f?setTimeout(()=>{c=!0,u.kill("SIGKILL")},f):null;r.binaryStdout||u.stdout.setEncoding("utf8"),u.stderr.setEncoding("utf8"),void 0!==r.stdin&&u.stdin.write(r.stdin),u.stdin.end(),u.stdout.on("data",e=>{r.binaryStdout?a=Buffer.concat([a??Buffer.alloc(0),Buffer.isBuffer(e)?e:Buffer.from(e)]):d+=e}),u.stderr.on("data",e=>{l+=e}),u.on("error",r=>{(m&&clearTimeout(m),"ENOENT"===r.code)?o(new s("TOOL_MISSING",`${e} not found in PATH`,{cmd:e},r)):o(new s("COMMAND_FAILED",`Failed to run ${e}`,{cmd:e,args:t},r))}),u.on("close",i=>{m&&clearTimeout(m);let u=i??1;c&&f?o(new s("COMMAND_FAILED",`${e} timed out after ${f}ms`,{cmd:e,args:t,stdout:d,stderr:l,exitCode:u,timeoutMs:f})):0===u||r.allowFailure?n({stdout:d,stderr:l,exitCode:u,stdoutBuffer:a}):o(new s("COMMAND_FAILED",`${e} exited with code ${u}`,{cmd:e,args:t,stdout:d,stderr:l,exitCode:u}))})})}async function f(e){try{var t;let{shell:r,args:n}=(t=e,"win32"===process.platform?{shell:"cmd.exe",args:["/c","where",t]}:{shell:"bash",args:["-lc",`command -v ${t}`]}),o=await c(r,n,{allowFailure:!0});return 0===o.exitCode&&o.stdout.trim().length>0}catch{return!1}}function m(e,t,r={}){let n=u(e,t,{cwd:r.cwd,env:r.env,stdio:["pipe","pipe","pipe"],encoding:r.binaryStdout?void 0:"utf8",input:r.stdin,timeout:h(r.timeoutMs)});if(n.error){let o=n.error.code;if("ETIMEDOUT"===o)throw new s("COMMAND_FAILED",`${e} timed out after ${h(r.timeoutMs)}ms`,{cmd:e,args:t,timeoutMs:h(r.timeoutMs)},n.error);if("ENOENT"===o)throw new s("TOOL_MISSING",`${e} not found in PATH`,{cmd:e},n.error);throw new s("COMMAND_FAILED",`Failed to run ${e}`,{cmd:e,args:t},n.error)}let o=r.binaryStdout?Buffer.isBuffer(n.stdout)?n.stdout:Buffer.from(n.stdout??""):void 0,i=r.binaryStdout?"":"string"==typeof n.stdout?n.stdout:(n.stdout??"").toString(),d="string"==typeof n.stderr?n.stderr:(n.stderr??"").toString(),a=n.status??1;if(0!==a&&!r.allowFailure)throw new s("COMMAND_FAILED",`${e} exited with code ${a}`,{cmd:e,args:t,stdout:i,stderr:d,exitCode:a});return{stdout:i,stderr:d,exitCode:a,stdoutBuffer:o}}function w(e,t,r={}){i(e,t,{cwd:r.cwd,env:r.env,stdio:"ignore",detached:!0}).unref()}async function p(e,t,r={}){return new Promise((n,o)=>{let u=i(e,t,{cwd:r.cwd,env:r.env,stdio:["pipe","pipe","pipe"]}),d="",a="",l=r.binaryStdout?Buffer.alloc(0):void 0;r.binaryStdout||u.stdout.setEncoding("utf8"),u.stderr.setEncoding("utf8"),void 0!==r.stdin&&u.stdin.write(r.stdin),u.stdin.end(),u.stdout.on("data",e=>{if(r.binaryStdout){l=Buffer.concat([l??Buffer.alloc(0),Buffer.isBuffer(e)?e:Buffer.from(e)]);return}let t=String(e);d+=t,r.onStdoutChunk?.(t)}),u.stderr.on("data",e=>{let t=String(e);a+=t,r.onStderrChunk?.(t)}),u.on("error",r=>{"ENOENT"===r.code?o(new s("TOOL_MISSING",`${e} not found in PATH`,{cmd:e},r)):o(new s("COMMAND_FAILED",`Failed to run ${e}`,{cmd:e,args:t},r))}),u.on("close",i=>{let u=i??1;0===u||r.allowFailure?n({stdout:d,stderr:a,exitCode:u,stdoutBuffer:l}):o(new s("COMMAND_FAILED",`${e} exited with code ${u}`,{cmd:e,args:t,stdout:d,stderr:a,exitCode:u}))})})}function M(e,t,r={}){let n=i(e,t,{cwd:r.cwd,env:r.env,stdio:["ignore","pipe","pipe"]}),o="",u="";n.stdout.setEncoding("utf8"),n.stderr.setEncoding("utf8"),n.stdout.on("data",e=>{o+=e}),n.stderr.on("data",e=>{u+=e});let d=new Promise((i,d)=>{n.on("error",r=>{"ENOENT"===r.code?d(new s("TOOL_MISSING",`${e} not found in PATH`,{cmd:e},r)):d(new s("COMMAND_FAILED",`Failed to run ${e}`,{cmd:e,args:t},r))}),n.on("close",n=>{let a=n??1;0===a||r.allowFailure?i({stdout:o,stderr:u,exitCode:a}):d(new s("COMMAND_FAILED",`${e} exited with code ${a}`,{cmd:e,args:t,stdout:o,stderr:u,exitCode:a}))})});return{child:n,wait:d}}function h(e){if(!Number.isFinite(e))return;let t=Math.floor(e);if(!(t<=0))return t}let E=[/(^|[\/\s"'=])dist\/src\/daemon\.js($|[\s"'])/,/(^|[\/\s"'=])src\/daemon\.ts($|[\s"'])/];function S(e){if(!Number.isInteger(e)||e<=0)return!1;try{return process.kill(e,0),!0}catch(e){return"EPERM"===e.code}}function N(e){if(!Number.isInteger(e)||e<=0)return null;try{let t=m("ps",["-p",String(e),"-o","lstart="],{allowFailure:!0,timeoutMs:1e3});if(0!==t.exitCode)return null;let r=t.stdout.trim();return r.length>0?r:null}catch{return null}}function g(e,t){let r;if(!S(e))return!1;if(t){let r=N(e);if(!r||r!==t)return!1}let n=function(e){if(!Number.isInteger(e)||e<=0)return null;try{let t=m("ps",["-p",String(e),"-o","command="],{allowFailure:!0,timeoutMs:1e3});if(0!==t.exitCode)return null;let r=t.stdout.trim();return r.length>0?r:null}catch{return null}}(e);return!!n&&!!(r=n.toLowerCase().replaceAll("\\","/")).includes("agent-device")&&E.some(e=>e.test(r))}function A(e,t){try{return process.kill(e,t),!0}catch(t){let e=t.code;if("ESRCH"===e||"EPERM"===e)return!1;throw t}}async function I(e,t){if(!S(e))return!0;let r=Date.now();for(;Date.now()-r<t;)if(await new Promise(e=>setTimeout(e,50)),!S(e))return!0;return!S(e)}async function D(e,t){!g(e,t.expectedStartTime)||!A(e,"SIGTERM")||await I(e,t.termTimeoutMs)||A(e,"SIGKILL")&&await I(e,t.killTimeoutMs)}export{default as node_net}from"node:net";export{default as node_os}from"node:os";export{s as AppError,d as asAppError,n as fileURLToPath,l as findProjectRoot,g as isAgentDeviceDaemonProcess,S as isProcessAlive,e as node_fs,r as node_path,o as pathToFileURL,t as promises,N as readProcessStartTime,a as readVersion,c as runCmd,M as runCmdBackground,w as runCmdDetached,p as runCmdStreaming,D as stopProcessForTakeover,f as whichCmd};