npm - agent-device - Versions diffs - 0.5.0 → 0.5.2 - Mend

agent-device 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/ios-runner/AgentDeviceRunner/AgentDeviceRunner.xcodeproj/project.pbxproj CHANGED Viewed

@@ -401,6 +401,7 @@
 				STRING_CATALOG_GENERATE_SYMBOLS = NO;
 				SWIFT_APPROACHABLE_CONCURRENCY = YES;
 				SWIFT_EMIT_LOC_STRINGS = NO;
+				SWIFT_OBJC_BRIDGING_HEADER = "AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h";
 				SWIFT_UPCOMING_FEATURE_MEMBER_IMPORT_VISIBILITY = YES;
 				SWIFT_VERSION = 5.0;
 				TARGETED_DEVICE_FAMILY = "1,2";
@@ -422,6 +423,7 @@
 				STRING_CATALOG_GENERATE_SYMBOLS = NO;
 				SWIFT_APPROACHABLE_CONCURRENCY = YES;
 				SWIFT_EMIT_LOC_STRINGS = NO;
+				SWIFT_OBJC_BRIDGING_HEADER = "AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h";
 				SWIFT_UPCOMING_FEATURE_MEMBER_IMPORT_VISIBILITY = YES;
 				SWIFT_VERSION = 5.0;
 				TARGETED_DEVICE_FAMILY = "1,2";

package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h ADDED Viewed

	@@ -0,0 +1 @@
1	+ #import "RunnerObjCExceptionCatcher.h"

package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerObjCExceptionCatcher.h ADDED Viewed

@@ -0,0 +1,11 @@
+#import <Foundation/Foundation.h>
+NS_ASSUME_NONNULL_BEGIN
+@interface RunnerObjCExceptionCatcher : NSObject
++ (NSString * _Nullable)catchException:(NS_NOESCAPE dispatch_block_t)tryBlock;
+@end
+NS_ASSUME_NONNULL_END

package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerObjCExceptionCatcher.m ADDED Viewed

@@ -0,0 +1,16 @@
+#import "RunnerObjCExceptionCatcher.h"
+@implementation RunnerObjCExceptionCatcher
++ (NSString * _Nullable)catchException:(NS_NOESCAPE dispatch_block_t)tryBlock {
+  @try {
+    tryBlock();
+    return nil;
+  } @catch (NSException *exception) {
+    NSString *name = exception.name ?: @"NSException";
+    NSString *reason = exception.reason ?: @"Unhandled XCTest exception";
+    return [NSString stringWithFormat:@"%@: %@", name, reason];
+  }
+}
+@end

package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift CHANGED Viewed

@@ -9,6 +9,18 @@ import XCTest
 import Network
 final class RunnerTests: XCTestCase {
+  private enum RunnerErrorDomain {
+    static let general = "AgentDeviceRunner"
+    static let exception = "AgentDeviceRunner.NSException"
+  }
+  private enum RunnerErrorCode {
+    static let noResponseFromMainThread = 1
+    static let commandReturnedNoResponse = 2
+    static let mainThreadExecutionTimedOut = 3
+    static let objcException = 1
+  }
   private static let springboardBundleId = "com.apple.springboard"
   private var listener: NWListener?
   private var port: UInt16 = 0
@@ -20,6 +32,12 @@ final class RunnerTests: XCTestCase {
   private let maxRequestBytes = 2 * 1024 * 1024
   private let maxSnapshotElements = 600
   private let fastSnapshotLimit = 300
+  private let mainThreadExecutionTimeout: TimeInterval = 30
+  private let retryCooldown: TimeInterval = 0.2
+  private let postSnapshotInteractionDelay: TimeInterval = 0.2
+  private let firstInteractionAfterActivateDelay: TimeInterval = 0.25
+  private var needsPostSnapshotInteractionDelay = false
+  private var needsFirstInteractionDelay = false
   private let interactiveTypes: Set<XCUIElement.ElementType> = [
     .button,
     .cell,
@@ -49,7 +67,7 @@ final class RunnerTests: XCTestCase {
   ]
   override func setUp() {
-    continueAfterFailure = false
+    continueAfterFailure = true
   }
   @MainActor
@@ -192,47 +210,141 @@ final class RunnerTests: XCTestCase {
   private func execute(command: Command) throws -> Response {
     if Thread.isMainThread {
-      return try executeOnMain(command: command)
+      return try executeOnMainSafely(command: command)
     }
     var result: Result<Response, Error>?
     let semaphore = DispatchSemaphore(value: 0)
     DispatchQueue.main.async {
       do {
-        result = .success(try self.executeOnMain(command: command))
+        result = .success(try self.executeOnMainSafely(command: command))
       } catch {
         result = .failure(error)
       }
       semaphore.signal()
     }
-    semaphore.wait()
+    let waitResult = semaphore.wait(timeout: .now() + mainThreadExecutionTimeout)
+    if waitResult == .timedOut {
+      // The main queue work may still be running; we stop waiting and report timeout.
+      throw NSError(
+        domain: RunnerErrorDomain.general,
+        code: RunnerErrorCode.mainThreadExecutionTimedOut,
+        userInfo: [NSLocalizedDescriptionKey: "main thread execution timed out"]
+      )
+    }
     switch result {
     case .success(let response):
       return response
     case .failure(let error):
       throw error
     case .none:
-      throw NSError(domain: "AgentDeviceRunner", code: 1, userInfo: [NSLocalizedDescriptionKey: "no response from main thread"])
+      throw NSError(
+        domain: RunnerErrorDomain.general,
+        code: RunnerErrorCode.noResponseFromMainThread,
+        userInfo: [NSLocalizedDescriptionKey: "no response from main thread"]
+      )
+    }
+  }
+  private func executeOnMainSafely(command: Command) throws -> Response {
+    var hasRetried = false
+    while true {
+      var response: Response?
+      var swiftError: Error?
+      let exceptionMessage = RunnerObjCExceptionCatcher.catchException({
+        do {
+          response = try self.executeOnMain(command: command)
+        } catch {
+          swiftError = error
+        }
+      })
+      if let exceptionMessage {
+        currentApp = nil
+        currentBundleId = nil
+        if !hasRetried, shouldRetryCommand(command.command) {
+          hasRetried = true
+          sleepFor(retryCooldown)
+          continue
+        }
+        throw NSError(
+          domain: RunnerErrorDomain.exception,
+          code: RunnerErrorCode.objcException,
+          userInfo: [NSLocalizedDescriptionKey: exceptionMessage]
+        )
+      }
+      if let swiftError {
+        throw swiftError
+      }
+      guard let response else {
+        throw NSError(
+          domain: RunnerErrorDomain.general,
+          code: RunnerErrorCode.commandReturnedNoResponse,
+          userInfo: [NSLocalizedDescriptionKey: "command returned no response"]
+        )
+      }
+      if !hasRetried, shouldRetryCommand(command.command), shouldRetryResponse(response) {
+        hasRetried = true
+        currentApp = nil
+        currentBundleId = nil
+        sleepFor(retryCooldown)
+        continue
+      }
+      return response
     }
   }
   private func executeOnMain(command: Command) throws -> Response {
+    if command.command == .shutdown {
+      return Response(ok: true, data: DataPayload(message: "shutdown"))
+    }
     let normalizedBundleId = command.appBundleId?
       .trimmingCharacters(in: .whitespacesAndNewlines)
     let requestedBundleId = (normalizedBundleId?.isEmpty == true) ? nil : normalizedBundleId
-    if let bundleId = requestedBundleId, currentBundleId != bundleId {
-      let target = XCUIApplication(bundleIdentifier: bundleId)
-      NSLog("AGENT_DEVICE_RUNNER_ACTIVATE bundle=%@ state=%d", bundleId, target.state.rawValue)
-      // activate avoids terminating and relaunching the target app
-      target.activate()
-      currentApp = target
-      currentBundleId = bundleId
-    } else if requestedBundleId == nil {
+    if let bundleId = requestedBundleId {
+      if currentBundleId != bundleId || currentApp == nil {
+        _ = activateTarget(bundleId: bundleId, reason: "bundle_changed")
+      }
+    } else {
       // Do not reuse stale bundle targets when the caller does not explicitly request one.
       currentApp = nil
       currentBundleId = nil
     }
-    let activeApp = currentApp ?? app
-    _ = activeApp.waitForExistence(timeout: 5)
+    var activeApp = currentApp ?? app
+    if let bundleId = requestedBundleId, targetNeedsActivation(activeApp) {
+      activeApp = activateTarget(bundleId: bundleId, reason: "stale_target")
+    } else if requestedBundleId == nil, targetNeedsActivation(activeApp) {
+      app.activate()
+      activeApp = app
+    }
+    if !activeApp.waitForExistence(timeout: 5) {
+      if let bundleId = requestedBundleId {
+        activeApp = activateTarget(bundleId: bundleId, reason: "missing_after_wait")
+        guard activeApp.waitForExistence(timeout: 5) else {
+          return Response(ok: false, error: ErrorPayload(message: "app '\(bundleId)' is not available"))
+        }
+      } else {
+        return Response(ok: false, error: ErrorPayload(message: "runner app is not available"))
+      }
+    }
+    if isInteractionCommand(command.command) {
+      if let bundleId = requestedBundleId, activeApp.state != .runningForeground {
+        activeApp = activateTarget(bundleId: bundleId, reason: "interaction_foreground_guard")
+      } else if requestedBundleId == nil, activeApp.state != .runningForeground {
+        app.activate()
+        activeApp = app
+      }
+      if !activeApp.waitForExistence(timeout: 2) {
+        if let bundleId = requestedBundleId {
+          return Response(ok: false, error: ErrorPayload(message: "app '\(bundleId)' is not available"))
+        }
+        return Response(ok: false, error: ErrorPayload(message: "runner app is not available"))
+      }
+      applyInteractionStabilizationIfNeeded()
+    }
     switch command.command {
     case .shutdown:
@@ -250,6 +362,23 @@ final class RunnerTests: XCTestCase {
         return Response(ok: true, data: DataPayload(message: "tapped"))
       }
       return Response(ok: false, error: ErrorPayload(message: "tap requires text or x/y"))
+    case .tapSeries:
+      guard let x = command.x, let y = command.y else {
+        return Response(ok: false, error: ErrorPayload(message: "tapSeries requires x and y"))
+      }
+      let count = max(Int(command.count ?? 1), 1)
+      let intervalMs = max(command.intervalMs ?? 0, 0)
+      let doubleTap = command.doubleTap ?? false
+      if doubleTap {
+        runSeries(count: count, pauseMs: intervalMs) { _ in
+          doubleTapAt(app: activeApp, x: x, y: y)
+        }
+        return Response(ok: true, data: DataPayload(message: "tap series"))
+      }
+      runSeries(count: count, pauseMs: intervalMs) { _ in
+        tapAt(app: activeApp, x: x, y: y)
+      }
+      return Response(ok: true, data: DataPayload(message: "tap series"))
     case .longPress:
       guard let x = command.x, let y = command.y else {
         return Response(ok: false, error: ErrorPayload(message: "longPress requires x and y"))
@@ -264,6 +393,26 @@ final class RunnerTests: XCTestCase {
       let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0)
       dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
       return Response(ok: true, data: DataPayload(message: "dragged"))
+    case .dragSeries:
+      guard let x = command.x, let y = command.y, let x2 = command.x2, let y2 = command.y2 else {
+        return Response(ok: false, error: ErrorPayload(message: "dragSeries requires x, y, x2, and y2"))
+      }
+      let count = max(Int(command.count ?? 1), 1)
+      let pauseMs = max(command.pauseMs ?? 0, 0)
+      let pattern = command.pattern ?? "one-way"
+      if pattern != "one-way" && pattern != "ping-pong" {
+        return Response(ok: false, error: ErrorPayload(message: "dragSeries pattern must be one-way or ping-pong"))
+      }
+      let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0)
+      runSeries(count: count, pauseMs: pauseMs) { idx in
+        let reverse = pattern == "ping-pong" && (idx % 2 == 1)
+        if reverse {
+          dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration)
+        } else {
+          dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration)
+        }
+      }
+      return Response(ok: true, data: DataPayload(message: "drag series"))
     case .type:
       guard let text = command.text else {
         return Response(ok: false, error: ErrorPayload(message: "type requires text"))
@@ -314,8 +463,10 @@ final class RunnerTests: XCTestCase {
         raw: command.raw ?? false,
       )
       if options.raw {
+        needsPostSnapshotInteractionDelay = true
         return Response(ok: true, data: snapshotRaw(app: activeApp, options: options))
       }
+      needsPostSnapshotInteractionDelay = true
       return Response(ok: true, data: snapshotFast(app: activeApp, options: options))
     case .back:
       if tapNavigationBack(app: activeApp) {
@@ -356,6 +507,71 @@ final class RunnerTests: XCTestCase {
     }
   }
+  private func targetNeedsActivation(_ target: XCUIApplication) -> Bool {
+    switch target.state {
+    case .unknown, .notRunning, .runningBackground, .runningBackgroundSuspended:
+      return true
+    default:
+      return false
+    }
+  }
+  private func activateTarget(bundleId: String, reason: String) -> XCUIApplication {
+    let target = XCUIApplication(bundleIdentifier: bundleId)
+    NSLog(
+      "AGENT_DEVICE_RUNNER_ACTIVATE bundle=%@ state=%d reason=%@",
+      bundleId,
+      target.state.rawValue,
+      reason
+    )
+    // activate avoids terminating and relaunching the target app
+    target.activate()
+    currentApp = target
+    currentBundleId = bundleId
+    needsFirstInteractionDelay = true
+    return target
+  }
+  private func shouldRetryCommand(_ command: CommandType) -> Bool {
+    switch command {
+    case .tap, .longPress, .drag:
+      return true
+    default:
+      return false
+    }
+  }
+  private func shouldRetryResponse(_ response: Response) -> Bool {
+    guard response.ok == false else { return false }
+    guard let message = response.error?.message.lowercased() else { return false }
+    return message.contains("is not available")
+  }
+  private func isInteractionCommand(_ command: CommandType) -> Bool {
+    switch command {
+    case .tap, .longPress, .drag, .type, .swipe, .back, .appSwitcher, .pinch:
+      return true
+    default:
+      return false
+    }
+  }
+  private func applyInteractionStabilizationIfNeeded() {
+    if needsPostSnapshotInteractionDelay {
+      sleepFor(postSnapshotInteractionDelay)
+      needsPostSnapshotInteractionDelay = false
+    }
+    if needsFirstInteractionDelay {
+      sleepFor(firstInteractionAfterActivateDelay)
+      needsFirstInteractionDelay = false
+    }
+  }
+  private func sleepFor(_ delay: TimeInterval) {
+    guard delay > 0 else { return }
+    usleep(useconds_t(delay * 1_000_000))
+  }
   private func tapNavigationBack(app: XCUIApplication) -> Bool {
     let buttons = app.navigationBars.buttons.allElementsBoundByIndex
     if let back = buttons.first(where: { $0.isHittable }) {
@@ -443,6 +659,12 @@ final class RunnerTests: XCTestCase {
     coordinate.tap()
   }
+  private func doubleTapAt(app: XCUIApplication, x: Double, y: Double) {
+    let origin = app.coordinate(withNormalizedOffset: CGVector(dx: 0, dy: 0))
+    let coordinate = origin.withOffset(CGVector(dx: x, dy: y))
+    coordinate.doubleTap()
+  }
   private func longPressAt(app: XCUIApplication, x: Double, y: Double, duration: TimeInterval) {
     let origin = app.coordinate(withNormalizedOffset: CGVector(dx: 0, dy: 0))
     let coordinate = origin.withOffset(CGVector(dx: x, dy: y))
@@ -463,6 +685,17 @@ final class RunnerTests: XCTestCase {
     start.press(forDuration: holdDuration, thenDragTo: end)
   }
+  private func runSeries(count: Int, pauseMs: Double, operation: (Int) -> Void) {
+    let total = max(count, 1)
+    let pause = max(pauseMs, 0)
+    for idx in 0..<total {
+      operation(idx)
+      if idx < total - 1 && pause > 0 {
+        Thread.sleep(forTimeInterval: pause / 1000.0)
+      }
+    }
+  }
   private func swipe(app: XCUIApplication, direction: SwipeDirection) {
     let target = app.windows.firstMatch.exists ? app.windows.firstMatch : app
     let start = target.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.2))
@@ -982,8 +1215,10 @@ private func resolveRunnerPort() -> UInt16 {
 enum CommandType: String, Codable {
   case tap
+  case tapSeries
   case longPress
   case drag
+  case dragSeries
   case type
   case swipe
   case findText
@@ -1012,6 +1247,11 @@ struct Command: Codable {
   let action: String?
   let x: Double?
   let y: Double?
+  let count: Double?
+  let intervalMs: Double?
+  let doubleTap: Bool?
+  let pauseMs: Double?
+  let pattern: String?
   let x2: Double?
   let y2: Double?
   let durationMs: Double?

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-device",
-  "version": "0.5.0",
+  "version": "0.5.2",
   "description": "Unified control plane for physical and virtual devices via an agent-driven CLI.",
   "license": "MIT",
   "author": "Callstack",

package/skills/agent-device/SKILL.md CHANGED Viewed

@@ -12,7 +12,7 @@ For agent-driven exploration: use refs. For deterministic replay scripts: use se
 ```bash
 agent-device open Settings --platform ios
 agent-device snapshot -i
-agent-device click @e3
+agent-device press @e3
 agent-device wait text "Camera"
 agent-device alert wait 10000
 agent-device fill @e5 "test"
@@ -29,7 +29,7 @@ npx -y agent-device
 1. Open app or deep link: `open [app|url] [url]` (`open` handles target selection + boot/activation in the normal flow)
 2. Snapshot: `snapshot` to get refs from accessibility tree
-3. Interact using refs (`click @ref`, `fill @ref "text"`)
+3. Interact using refs (`press @ref`, `fill @ref "text"`; `click` is an alias of `press`)
 4. Re-snapshot after navigation/UI changes
 5. Close session when done
@@ -109,13 +109,15 @@ agent-device appstate
 ### Interactions (use @refs from snapshot)
 ```bash
-agent-device click @e1
+agent-device press @e1                # Canonical tap command (`click` is an alias)
 agent-device focus @e2
 agent-device fill @e2 "text"           # Clear then type (Android: verifies value and retries once on mismatch)
 agent-device type "text"               # Type into focused field without clearing
 agent-device press 300 500             # Tap by coordinates
 agent-device press 300 500 --count 12 --interval-ms 45
 agent-device press 300 500 --count 6 --hold-ms 120 --interval-ms 30 --jitter-px 2
+agent-device press @e1 --count 5             # Repeat taps on the same target
+agent-device press @e1 --count 5 --double-tap # Use double-tap gesture per iteration
 agent-device swipe 540 1500 540 500 120
 agent-device swipe 540 1500 540 500 120 --count 8 --pause-ms 30 --pattern ping-pong
 agent-device long-press 300 500 800    # Long press (where supported)
@@ -155,6 +157,50 @@ agent-device replay -u ./session.ad   # Update selector drift and rewrite .ad sc
 `--save-script` path is a file path; parent directories are created automatically.
 For ambiguous bare values, use `--save-script=workflow.ad` or `./workflow.ad`.
+### Fast batching (JSON steps)
+Use `batch` when an agent already has a known short sequence and wants fewer orchestration round trips.
+```bash
+agent-device batch \
+  --session sim \
+  --platform ios \
+  --udid 00008150-001849640CF8401C \
+  --steps-file /tmp/batch-steps.json \
+  --json
+```
+Inline JSON works for small payloads:
+```bash
+agent-device batch --steps '[{"command":"open","positionals":["settings"]},{"command":"wait","positionals":["100"]}]'
+```
+Step format:
+```json
+[
+  { "command": "open", "positionals": ["settings"], "flags": {} },
+  { "command": "wait", "positionals": ["label=\"Privacy & Security\"", "3000"], "flags": {} },
+  { "command": "click", "positionals": ["label=\"Privacy & Security\""], "flags": {} },
+  { "command": "get", "positionals": ["text", "label=\"Tracking\""], "flags": {} }
+]
+```
+Batch best practices:
+- Batch one screen-local flow at a time.
+- Add sync guards (`wait`, `is exists`) after mutating steps (`open`, `click`, `fill`, `swipe`).
+- Treat prior refs/snapshot assumptions as stale after UI mutations.
+- Prefer `--steps-file` over inline JSON.
+- Keep batches moderate (about 5-20 steps).
+- Use failure context (`step`, `partialResults`) to replan from the failed step.
+Stale accessibility tree note:
+- Rapid mutations can outrun accessibility tree updates.
+- Mitigate with explicit waits and phase splitting (navigate, verify/extract, cleanup).
 ### Trace logs (XCTest)
 ```bash
@@ -178,7 +224,10 @@ agent-device apps --platform android --user-installed
 ## Best practices
-- `press` supports gesture series controls: `--count`, `--interval-ms`, `--hold-ms`, `--jitter-px`.
+- `press` is the canonical tap command; `click` is an alias with the same behavior.
+- `press` (and `click`) accepts `x y`, `@ref`, and selector targets.
+- `press`/`click` support gesture series controls: `--count`, `--interval-ms`, `--hold-ms`, `--jitter-px`, `--double-tap`.
+- `--double-tap` cannot be combined with `--hold-ms` or `--jitter-px`.
 - `swipe` supports coordinate + timing controls and repeat patterns: `swipe x1 y1 x2 y2 [durationMs] --count --pause-ms --pattern`.
 - `swipe` timing is platform-safe: Android uses requested duration; iOS uses normalized safe timing to avoid long-press side effects.
 - Pinch (`pinch <scale> [x y]`) is iOS simulator-only; scale > 1 zooms in, < 1 zooms out.
@@ -208,3 +257,4 @@ agent-device apps --platform android --user-installed
 - [references/permissions.md](references/permissions.md)
 - [references/video-recording.md](references/video-recording.md)
 - [references/coordinate-system.md](references/coordinate-system.md)
+- [references/batching.md](references/batching.md)

package/skills/agent-device/references/batching.md ADDED Viewed

@@ -0,0 +1,79 @@
+# Batching
+## When to use batch
+- The agent already knows a short sequence of commands.
+- Steps belong to one logical screen flow.
+- You want one result object with per-step timing and failure context.
+## When not to use batch
+- Flows are unrelated and should be retried independently.
+- The workflow is highly dynamic and requires replanning after each step.
+- You need human approvals between steps.
+## CLI patterns
+From file:
+```bash
+agent-device batch --session sim --platform ios --steps-file /tmp/batch-steps.json --json
+```
+Inline (small payloads only):
+```bash
+agent-device batch --steps '[{"command":"open","positionals":["settings"]}]'
+```
+## Step payload contract
+```json
+[
+  { "command": "open", "positionals": ["settings"], "flags": {} },
+  { "command": "wait", "positionals": ["label=\"Privacy & Security\"", "3000"], "flags": {} },
+  { "command": "click", "positionals": ["label=\"Privacy & Security\""], "flags": {} },
+  { "command": "get", "positionals": ["text", "label=\"Tracking\""], "flags": {} }
+]
+```
+Rules:
+- `positionals` optional, defaults to `[]`.
+- `flags` optional, defaults to `{}`.
+- nested `batch` and `replay` are rejected.
+- stop-on-first-error is the supported mode (`--on-error stop`).
+## Response handling
+Success includes:
+- `total`, `executed`, `totalDurationMs`
+- `results[]` entries with `step`, `command`, `durationMs`, and optional `data`
+Failure includes:
+- `details.step`
+- `details.command`
+- `details.executed`
+- `details.partialResults`
+Use these fields to replan from the first failing step.
+## Common error categories and agent actions
+- `INVALID_ARGS`: payload/step shape issue; fix payload and retry.
+- `SESSION_NOT_FOUND`: open or select the correct session, then retry.
+- `UNSUPPORTED_OPERATION`: switch command/target to supported operation.
+- `AMBIGUOUS_MATCH`: refine selector/locator, then retry failed step.
+- `COMMAND_FAILED`: add sync guard (`wait`, `is exists`) and retry from failed step.
+## Reliability guardrails
+- Add sync guards after mutating steps.
+- Assume snapshot/ref drift after navigation.
+- Keep batch size moderate (about 5-20 steps).
+- Split long workflows into phases:
+  1. navigate
+  2. verify/extract
+  3. cleanup

package/skills/agent-device/references/permissions.md CHANGED Viewed

@@ -22,6 +22,10 @@ If daemon startup fails with stale metadata hints, clean stale files and retry:
 - `~/.agent-device/daemon.json`
 - `~/.agent-device/daemon.lock`
+## iOS: "Allow Paste" dialog
+iOS 16+ shows an "Allow Paste" prompt when an app reads the system pasteboard. Under XCUITest (which `agent-device` uses), this prompt is suppressed by the testing runtime. Use `xcrun simctl pbcopy booted` to set clipboard content directly on the simulator instead.
 ## Simulator troubleshooting
 - If snapshots return 0 nodes, restart Simulator and re-open the app.

package/skills/agent-device/references/snapshot-refs.md CHANGED Viewed

@@ -3,6 +3,7 @@
 ## Purpose
 Refs are useful for discovery/debugging. For deterministic scripts, use selectors.
+For tap interactions, `press` is canonical; `click` is an equivalent alias.
 ## Snapshot
@@ -24,14 +25,14 @@ App: com.apple.Preferences
 ## Using refs (discovery/debug)
 ```bash
-agent-device click @e2
+agent-device press @e2
 agent-device fill @e5 "test"
 ```
 ## Using selectors (deterministic)
 ```bash
-agent-device click 'id="camera_row" || label="Camera" role=button'
+agent-device press 'id="camera_row" || label="Camera" role=button'
 agent-device fill 'id="search_input" editable=true' "test"
 agent-device is visible 'id="camera_settings_anchor"'
 ```

package/dist/src/797.js DELETED Viewed

@@ -1 +0,0 @@

- import e,{promises as t}from"node:fs";import r from"node:path";import{fileURLToPath as n,pathToFileURL as o}from"node:url";import{spawn as i,spawnSync as u}from"node:child_process";class s extends Error{code;details;cause;constructor(e,t,r,n){super(t),this.code=e,this.details=r,this.cause=n}}function d(e){return e instanceof s?e:e instanceof Error?new s("UNKNOWN",e.message,void 0,e):new s("UNKNOWN","Unknown error",{err:e})}function a(){try{let t=l();return JSON.parse(e.readFileSync(r.join(t,"package.json"),"utf8")).version??"0.0.0"}catch{return"0.0.0"}}function l(){let t=r.dirname(n(import.meta.url)),o=t;for(let t=0;t<6;t+=1){let t=r.join(o,"package.json");if(e.existsSync(t))return o;o=r.dirname(o)}return t}async function c(e,t,r={}){return new Promise((n,o)=>{let u=i(e,t,{cwd:r.cwd,env:r.env,stdio:["pipe","pipe","pipe"]}),d="",a=r.binaryStdout?Buffer.alloc(0):void 0,l="",c=!1,f=h(r.timeoutMs),m=f?setTimeout(()=>{c=!0,u.kill("SIGKILL")},f):null;r.binaryStdout||u.stdout.setEncoding("utf8"),u.stderr.setEncoding("utf8"),void 0!==r.stdin&&u.stdin.write(r.stdin),u.stdin.end(),u.stdout.on("data",e=>{r.binaryStdout?a=Buffer.concat([a??Buffer.alloc(0),Buffer.isBuffer(e)?e:Buffer.from(e)]):d+=e}),u.stderr.on("data",e=>{l+=e}),u.on("error",r=>{(m&&clearTimeout(m),"ENOENT"===r.code)?o(new s("TOOL_MISSING",`${e} not found in PATH`,{cmd:e},r)):o(new s("COMMAND_FAILED",`Failed to run ${e}`,{cmd:e,args:t},r))}),u.on("close",i=>{m&&clearTimeout(m);let u=i??1;c&&f?o(new s("COMMAND_FAILED",`${e} timed out after ${f}ms`,{cmd:e,args:t,stdout:d,stderr:l,exitCode:u,timeoutMs:f})):0===u||r.allowFailure?n({stdout:d,stderr:l,exitCode:u,stdoutBuffer:a}):o(new s("COMMAND_FAILED",`${e} exited with code ${u}`,{cmd:e,args:t,stdout:d,stderr:l,exitCode:u}))})})}async function f(e){try{var t;let{shell:r,args:n}=(t=e,"win32"===process.platform?{shell:"cmd.exe",args:["/c","where",t]}:{shell:"bash",args:["-lc",`command -v ${t}`]}),o=await c(r,n,{allowFailure:!0});return 0===o.exitCode&&o.stdout.trim().length>0}catch{return!1}}function m(e,t,r={}){let n=u(e,t,{cwd:r.cwd,env:r.env,stdio:["pipe","pipe","pipe"],encoding:r.binaryStdout?void 0:"utf8",input:r.stdin,timeout:h(r.timeoutMs)});if(n.error){let o=n.error.code;if("ETIMEDOUT"===o)throw new s("COMMAND_FAILED",`${e} timed out after ${h(r.timeoutMs)}ms`,{cmd:e,args:t,timeoutMs:h(r.timeoutMs)},n.error);if("ENOENT"===o)throw new s("TOOL_MISSING",`${e} not found in PATH`,{cmd:e},n.error);throw new s("COMMAND_FAILED",`Failed to run ${e}`,{cmd:e,args:t},n.error)}let o=r.binaryStdout?Buffer.isBuffer(n.stdout)?n.stdout:Buffer.from(n.stdout??""):void 0,i=r.binaryStdout?"":"string"==typeof n.stdout?n.stdout:(n.stdout??"").toString(),d="string"==typeof n.stderr?n.stderr:(n.stderr??"").toString(),a=n.status??1;if(0!==a&&!r.allowFailure)throw new s("COMMAND_FAILED",`${e} exited with code ${a}`,{cmd:e,args:t,stdout:i,stderr:d,exitCode:a});return{stdout:i,stderr:d,exitCode:a,stdoutBuffer:o}}function w(e,t,r={}){i(e,t,{cwd:r.cwd,env:r.env,stdio:"ignore",detached:!0}).unref()}async function p(e,t,r={}){return new Promise((n,o)=>{let u=i(e,t,{cwd:r.cwd,env:r.env,stdio:["pipe","pipe","pipe"]}),d="",a="",l=r.binaryStdout?Buffer.alloc(0):void 0;r.binaryStdout||u.stdout.setEncoding("utf8"),u.stderr.setEncoding("utf8"),void 0!==r.stdin&&u.stdin.write(r.stdin),u.stdin.end(),u.stdout.on("data",e=>{if(r.binaryStdout){l=Buffer.concat([l??Buffer.alloc(0),Buffer.isBuffer(e)?e:Buffer.from(e)]);return}let t=String(e);d+=t,r.onStdoutChunk?.(t)}),u.stderr.on("data",e=>{let t=String(e);a+=t,r.onStderrChunk?.(t)}),u.on("error",r=>{"ENOENT"===r.code?o(new s("TOOL_MISSING",`${e} not found in PATH`,{cmd:e},r)):o(new s("COMMAND_FAILED",`Failed to run ${e}`,{cmd:e,args:t},r))}),u.on("close",i=>{let u=i??1;0===u||r.allowFailure?n({stdout:d,stderr:a,exitCode:u,stdoutBuffer:l}):o(new s("COMMAND_FAILED",`${e} exited with code ${u}`,{cmd:e,args:t,stdout:d,stderr:a,exitCode:u}))})})}function M(e,t,r={}){let n=i(e,t,{cwd:r.cwd,env:r.env,stdio:["ignore","pipe","pipe"]}),o="",u="";n.stdout.setEncoding("utf8"),n.stderr.setEncoding("utf8"),n.stdout.on("data",e=>{o+=e}),n.stderr.on("data",e=>{u+=e});let d=new Promise((i,d)=>{n.on("error",r=>{"ENOENT"===r.code?d(new s("TOOL_MISSING",`${e} not found in PATH`,{cmd:e},r)):d(new s("COMMAND_FAILED",`Failed to run ${e}`,{cmd:e,args:t},r))}),n.on("close",n=>{let a=n??1;0===a||r.allowFailure?i({stdout:o,stderr:u,exitCode:a}):d(new s("COMMAND_FAILED",`${e} exited with code ${a}`,{cmd:e,args:t,stdout:o,stderr:u,exitCode:a}))})});return{child:n,wait:d}}function h(e){if(!Number.isFinite(e))return;let t=Math.floor(e);if(!(t<=0))return t}let E=[/(^|[\/\s"'=])dist\/src\/daemon\.js($|[\s"'])/,/(^|[\/\s"'=])src\/daemon\.ts($|[\s"'])/];function S(e){if(!Number.isInteger(e)||e<=0)return!1;try{return process.kill(e,0),!0}catch(e){return"EPERM"===e.code}}function N(e){if(!Number.isInteger(e)||e<=0)return null;try{let t=m("ps",["-p",String(e),"-o","lstart="],{allowFailure:!0,timeoutMs:1e3});if(0!==t.exitCode)return null;let r=t.stdout.trim();return r.length>0?r:null}catch{return null}}function g(e,t){let r;if(!S(e))return!1;if(t){let r=N(e);if(!r||r!==t)return!1}let n=function(e){if(!Number.isInteger(e)||e<=0)return null;try{let t=m("ps",["-p",String(e),"-o","command="],{allowFailure:!0,timeoutMs:1e3});if(0!==t.exitCode)return null;let r=t.stdout.trim();return r.length>0?r:null}catch{return null}}(e);return!!n&&!!(r=n.toLowerCase().replaceAll("\\","/")).includes("agent-device")&&E.some(e=>e.test(r))}function A(e,t){try{return process.kill(e,t),!0}catch(t){let e=t.code;if("ESRCH"===e||"EPERM"===e)return!1;throw t}}async function I(e,t){if(!S(e))return!0;let r=Date.now();for(;Date.now()-r<t;)if(await new Promise(e=>setTimeout(e,50)),!S(e))return!0;return!S(e)}async function D(e,t){!g(e,t.expectedStartTime)||!A(e,"SIGTERM")||await I(e,t.termTimeoutMs)||A(e,"SIGKILL")&&await I(e,t.killTimeoutMs)}export{default as node_net}from"node:net";export{default as node_os}from"node:os";export{s as AppError,d as asAppError,n as fileURLToPath,l as findProjectRoot,g as isAgentDeviceDaemonProcess,S as isProcessAlive,e as node_fs,r as node_path,o as pathToFileURL,t as promises,N as readProcessStartTime,a as readVersion,c as runCmd,M as runCmdBackground,w as runCmdDetached,p as runCmdStreaming,D as stopProcessForTakeover,f as whichCmd};