agent-device 0.10.0 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +4 -607
  2. package/dist/src/331.js +3 -3
  3. package/dist/src/425.js +1 -0
  4. package/dist/src/bin.js +28 -28
  5. package/dist/src/core/dispatch.d.ts +2 -0
  6. package/dist/src/core/session-surface.d.ts +3 -0
  7. package/dist/src/core/settings-contract.d.ts +2 -1
  8. package/dist/src/daemon/android-system-dialog.d.ts +11 -0
  9. package/dist/src/daemon/app-log-ios.d.ts +2 -1
  10. package/dist/src/daemon/app-log-process.d.ts +1 -1
  11. package/dist/src/daemon/app-log.d.ts +1 -1
  12. package/dist/src/daemon/context.d.ts +2 -0
  13. package/dist/src/daemon/handlers/interaction-common.d.ts +30 -1
  14. package/dist/src/daemon/handlers/interaction-read.d.ts +14 -0
  15. package/dist/src/daemon/handlers/interaction-touch.d.ts +45 -0
  16. package/dist/src/daemon/handlers/interaction.d.ts +2 -0
  17. package/dist/src/daemon/handlers/record-trace-android.d.ts +18 -0
  18. package/dist/src/daemon/handlers/record-trace-ios.d.ts +52 -0
  19. package/dist/src/daemon/handlers/record-trace-recording.d.ts +32 -0
  20. package/dist/src/daemon/handlers/record-trace.d.ts +2 -7
  21. package/dist/src/daemon/handlers/snapshot-capture.d.ts +11 -4
  22. package/dist/src/daemon/record-trace-errors.d.ts +6 -0
  23. package/dist/src/daemon/recording-gestures.d.ts +3 -0
  24. package/dist/src/daemon/recording-telemetry.d.ts +20 -0
  25. package/dist/src/daemon/recording-timing.d.ts +24 -0
  26. package/dist/src/daemon/request-router.d.ts +6 -0
  27. package/dist/src/daemon/script-utils.d.ts +1 -0
  28. package/dist/src/daemon/snapshot-processing.d.ts +1 -0
  29. package/dist/src/daemon/touch-reference-frame.d.ts +7 -0
  30. package/dist/src/daemon/types.d.ts +65 -11
  31. package/dist/src/daemon.js +62 -36
  32. package/dist/src/platforms/android/index.d.ts +1 -1
  33. package/dist/src/platforms/android/input-actions.d.ts +5 -0
  34. package/dist/src/platforms/android/settings.d.ts +1 -1
  35. package/dist/src/platforms/ios/apps.d.ts +1 -1
  36. package/dist/src/platforms/ios/macos-helper.d.ts +69 -0
  37. package/dist/src/platforms/ios/runner-client.d.ts +2 -2
  38. package/dist/src/platforms/ios/runner-session.d.ts +5 -0
  39. package/dist/src/platforms/ios/runner-xctestrun.d.ts +3 -1
  40. package/dist/src/recording/overlay.d.ts +10 -0
  41. package/dist/src/utils/command-schema.d.ts +2 -0
  42. package/dist/src/utils/interactors.d.ts +8 -8
  43. package/dist/src/utils/snapshot-lines.d.ts +5 -2
  44. package/dist/src/utils/snapshot.d.ts +8 -1
  45. package/dist/src/utils/text-surface.d.ts +19 -0
  46. package/dist/src/utils/video.d.ts +9 -0
  47. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift +196 -51
  48. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift +133 -0
  49. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift +1 -1
  50. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift +33 -1
  51. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScreenRecorder.swift +4 -6
  52. package/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift +1 -0
  53. package/ios-runner/AgentDeviceRunner/RecordingScripts/recording-overlay.swift +571 -0
  54. package/ios-runner/AgentDeviceRunner/RecordingScripts/recording-trim.swift +140 -0
  55. package/macos-helper/Package.swift +18 -0
  56. package/macos-helper/Sources/AgentDeviceMacOSHelper/SnapshotTraversal.swift +543 -0
  57. package/macos-helper/Sources/AgentDeviceMacOSHelper/main.swift +545 -0
  58. package/package.json +4 -1
  59. package/skills/agent-device/SKILL.md +25 -334
  60. package/skills/agent-device/references/bootstrap-install.md +167 -0
  61. package/skills/agent-device/references/coordinate-system.md +24 -4
  62. package/skills/agent-device/references/debugging.md +115 -0
  63. package/skills/agent-device/references/exploration.md +193 -0
  64. package/skills/agent-device/references/macos-desktop.md +55 -57
  65. package/skills/agent-device/references/remote-tenancy.md +56 -47
  66. package/skills/agent-device/references/verification.md +103 -0
  67. package/dist/src/274.js +0 -1
  68. package/dist/src/daemon/handlers/interaction-fill.d.ts +0 -3
  69. package/dist/src/daemon/handlers/interaction-press.d.ts +0 -3
  70. package/skills/agent-device/references/batching.md +0 -79
  71. package/skills/agent-device/references/logs-and-debug.md +0 -113
  72. package/skills/agent-device/references/perf-metrics.md +0 -53
  73. package/skills/agent-device/references/permissions.md +0 -70
  74. package/skills/agent-device/references/session-management.md +0 -101
  75. package/skills/agent-device/references/snapshot-refs.md +0 -102
  76. package/skills/agent-device/references/video-recording.md +0 -41
@@ -1,6 +1,27 @@
1
1
  import XCTest
2
2
 
3
3
  extension RunnerTests {
4
+ struct TouchVisualizationFrame {
5
+ let x: Double
6
+ let y: Double
7
+ let referenceWidth: Double
8
+ let referenceHeight: Double
9
+ }
10
+
11
+ struct DragVisualizationFrame {
12
+ let x: Double
13
+ let y: Double
14
+ let x2: Double
15
+ let y2: Double
16
+ let referenceWidth: Double
17
+ let referenceHeight: Double
18
+ }
19
+
20
+ struct GestureReferenceFrame {
21
+ let referenceWidth: Double
22
+ let referenceHeight: Double
23
+ }
24
+
4
25
  // MARK: - Navigation Gestures
5
26
 
6
27
  func tapNavigationBack(app: XCUIApplication) -> Bool {
@@ -99,6 +120,40 @@ extension RunnerTests {
99
120
  return element.exists ? element : nil
100
121
  }
101
122
 
123
+ func readTextAt(app: XCUIApplication, x: Double, y: Double) -> String? {
124
+ let point = CGPoint(x: x, y: y)
125
+ let candidates = app.descendants(matching: .any).allElementsBoundByIndex
126
+ .filter { element in
127
+ element.exists && !element.frame.isEmpty && element.frame.contains(point)
128
+ }
129
+ .sorted { left, right in
130
+ let leftArea = max(1, left.frame.width * left.frame.height)
131
+ let rightArea = max(1, right.frame.width * right.frame.height)
132
+ if leftArea != rightArea {
133
+ return leftArea < rightArea
134
+ }
135
+ if left.frame.minY != right.frame.minY {
136
+ return left.frame.minY < right.frame.minY
137
+ }
138
+ if left.frame.minX != right.frame.minX {
139
+ return left.frame.minX < right.frame.minX
140
+ }
141
+ return left.elementType.rawValue < right.elementType.rawValue
142
+ }
143
+
144
+ for element in candidates where prefersExpandedTextRead(element) {
145
+ if let text = readableText(for: element) {
146
+ return text
147
+ }
148
+ }
149
+ for element in candidates {
150
+ if let text = readableText(for: element) {
151
+ return text
152
+ }
153
+ }
154
+ return nil
155
+ }
156
+
102
157
  func clearTextInput(_ element: XCUIElement) {
103
158
  moveCaretToEnd(element: element)
104
159
  let count = estimatedDeleteCount(for: element)
@@ -141,6 +196,32 @@ extension RunnerTests {
141
196
  return max(24, min(120, base))
142
197
  }
143
198
 
199
+ private func readableText(for element: XCUIElement) -> String? {
200
+ let label = element.label.trimmingCharacters(in: .whitespacesAndNewlines)
201
+ let identifier = element.identifier.trimmingCharacters(in: .whitespacesAndNewlines)
202
+ let valueText = String(describing: element.value ?? "")
203
+ .trimmingCharacters(in: .whitespacesAndNewlines)
204
+ switch element.elementType {
205
+ case .textField, .secureTextField, .searchField, .textView:
206
+ if !valueText.isEmpty { return valueText }
207
+ if !label.isEmpty { return label }
208
+ return identifier.isEmpty ? nil : identifier
209
+ default:
210
+ if !label.isEmpty { return label }
211
+ if !valueText.isEmpty { return valueText }
212
+ return identifier.isEmpty ? nil : identifier
213
+ }
214
+ }
215
+
216
+ private func prefersExpandedTextRead(_ element: XCUIElement) -> Bool {
217
+ switch element.elementType {
218
+ case .textField, .secureTextField, .searchField, .textView:
219
+ return true
220
+ default:
221
+ return false
222
+ }
223
+ }
224
+
144
225
  func findScopeElement(app: XCUIApplication, scope: String) -> XCUIElement? {
145
226
  let predicate = NSPredicate(
146
227
  format: "label CONTAINS[c] %@ OR identifier CONTAINS[c] %@",
@@ -209,6 +290,58 @@ extension RunnerTests {
209
290
  start.press(forDuration: holdDuration, thenDragTo: end)
210
291
  }
211
292
 
293
+ func resolvedTouchVisualizationFrame(app: XCUIApplication, x: Double, y: Double) -> TouchVisualizationFrame {
294
+ let appFrame = app.frame
295
+ let referenceFrame = resolvedTouchReferenceFrame(app: app, appFrame: appFrame)
296
+ let originX = appFrame.isEmpty ? referenceFrame.minX : appFrame.minX
297
+ let originY = appFrame.isEmpty ? referenceFrame.minY : appFrame.minY
298
+ return TouchVisualizationFrame(
299
+ x: originX + x,
300
+ y: originY + y,
301
+ referenceWidth: referenceFrame.width,
302
+ referenceHeight: referenceFrame.height
303
+ )
304
+ }
305
+
306
+ func resolvedDragVisualizationFrame(
307
+ app: XCUIApplication,
308
+ x: Double,
309
+ y: Double,
310
+ x2: Double,
311
+ y2: Double
312
+ ) -> DragVisualizationFrame {
313
+ let start = resolvedTouchVisualizationFrame(app: app, x: x, y: y)
314
+ let end = resolvedTouchVisualizationFrame(app: app, x: x2, y: y2)
315
+ return DragVisualizationFrame(
316
+ x: start.x,
317
+ y: start.y,
318
+ x2: end.x,
319
+ y2: end.y,
320
+ referenceWidth: start.referenceWidth,
321
+ referenceHeight: start.referenceHeight
322
+ )
323
+ }
324
+
325
+ private func resolvedTouchReferenceFrame(app: XCUIApplication, appFrame: CGRect) -> CGRect {
326
+ let window = app.windows.firstMatch
327
+ let windowFrame = window.frame
328
+ if window.exists && !windowFrame.isEmpty {
329
+ return windowFrame
330
+ }
331
+ if !appFrame.isEmpty {
332
+ return appFrame
333
+ }
334
+ return CGRect(x: 0, y: 0, width: 0, height: 0)
335
+ }
336
+
337
+ func resolvedGestureReferenceFrame(app: XCUIApplication) -> GestureReferenceFrame {
338
+ let frame = resolvedTouchReferenceFrame(app: app, appFrame: app.frame)
339
+ return GestureReferenceFrame(
340
+ referenceWidth: frame.width,
341
+ referenceHeight: frame.height
342
+ )
343
+ }
344
+
212
345
  func runSeries(count: Int, pauseMs: Double, operation: (Int) -> Void) {
213
346
  let total = max(count, 1)
214
347
  let pause = max(pauseMs, 0)
@@ -152,7 +152,7 @@ extension RunnerTests {
152
152
 
153
153
  func isReadOnlyCommand(_ command: Command) -> Bool {
154
154
  switch command.command {
155
- case .findText, .snapshot, .screenshot:
155
+ case .findText, .readText, .snapshot, .screenshot:
156
156
  return true
157
157
  case .alert:
158
158
  let action = (command.action ?? "get").lowercased()
@@ -10,6 +10,7 @@ enum CommandType: String, Codable {
10
10
  case type
11
11
  case swipe
12
12
  case findText
13
+ case readText
13
14
  case snapshot
14
15
  case screenshot
15
16
  case back
@@ -19,6 +20,7 @@ enum CommandType: String, Codable {
19
20
  case pinch
20
21
  case recordStart
21
22
  case recordStop
23
+ case uptime
22
24
  case shutdown
23
25
  }
24
26
 
@@ -71,23 +73,53 @@ struct Response: Codable {
71
73
 
72
74
  struct DataPayload: Codable {
73
75
  let message: String?
76
+ let text: String?
74
77
  let found: Bool?
75
78
  let items: [String]?
76
79
  let nodes: [SnapshotNode]?
77
80
  let truncated: Bool?
81
+ let gestureStartUptimeMs: Double?
82
+ let gestureEndUptimeMs: Double?
83
+ let x: Double?
84
+ let y: Double?
85
+ let x2: Double?
86
+ let y2: Double?
87
+ let referenceWidth: Double?
88
+ let referenceHeight: Double?
89
+ let currentUptimeMs: Double?
78
90
 
79
91
  init(
80
92
  message: String? = nil,
93
+ text: String? = nil,
81
94
  found: Bool? = nil,
82
95
  items: [String]? = nil,
83
96
  nodes: [SnapshotNode]? = nil,
84
- truncated: Bool? = nil
97
+ truncated: Bool? = nil,
98
+ gestureStartUptimeMs: Double? = nil,
99
+ gestureEndUptimeMs: Double? = nil,
100
+ x: Double? = nil,
101
+ y: Double? = nil,
102
+ x2: Double? = nil,
103
+ y2: Double? = nil,
104
+ referenceWidth: Double? = nil,
105
+ referenceHeight: Double? = nil,
106
+ currentUptimeMs: Double? = nil
85
107
  ) {
86
108
  self.message = message
109
+ self.text = text
87
110
  self.found = found
88
111
  self.items = items
89
112
  self.nodes = nodes
90
113
  self.truncated = truncated
114
+ self.gestureStartUptimeMs = gestureStartUptimeMs
115
+ self.gestureEndUptimeMs = gestureEndUptimeMs
116
+ self.x = x
117
+ self.y = y
118
+ self.x2 = x2
119
+ self.y2 = y2
120
+ self.referenceWidth = referenceWidth
121
+ self.referenceHeight = referenceHeight
122
+ self.currentUptimeMs = currentUptimeMs
91
123
  }
92
124
  }
93
125
 
@@ -7,13 +7,11 @@ extension RunnerTests {
7
7
  final class ScreenRecorder {
8
8
  private let outputPath: String
9
9
  private let fps: Int32?
10
- private let uncappedFrameInterval: TimeInterval = 0.001
11
- private var uncappedTimestampTimescale: Int32 {
12
- Int32(max(1, Int((1.0 / uncappedFrameInterval).rounded())))
10
+ private var effectiveFps: Int32 {
11
+ max(1, fps ?? RunnerTests.defaultRecordingFps)
13
12
  }
14
13
  private var frameInterval: TimeInterval {
15
- guard let fps else { return uncappedFrameInterval }
16
- return 1.0 / Double(fps)
14
+ 1.0 / Double(effectiveFps)
17
15
  }
18
16
  private let queue = DispatchQueue(label: "agent-device.runner.recorder")
19
17
  private let lock = NSLock()
@@ -206,7 +204,7 @@ extension RunnerTests {
206
204
  recordingStartUptime = nowUptime
207
205
  }
208
206
  let elapsed = max(0, nowUptime - (recordingStartUptime ?? nowUptime))
209
- let timescale = fps ?? uncappedTimestampTimescale
207
+ let timescale = effectiveFps
210
208
  var timestampValue = Int64((elapsed * Double(timescale)).rounded(.down))
211
209
  if timestampValue <= lastTimestampValue {
212
210
  timestampValue = lastTimestampValue + 1
@@ -29,6 +29,7 @@ final class RunnerTests: XCTestCase {
29
29
  }
30
30
 
31
31
  static let springboardBundleId = "com.apple.springboard"
32
+ static let defaultRecordingFps: Int32 = 15
32
33
  var listener: NWListener?
33
34
  var doneExpectation: XCTestExpectation?
34
35
  let app = XCUIApplication()