Version not found. Please check the version and try again.

@astur-mobile/ios 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1872 @@
1
+ import CoreGraphics
2
+ import Foundation
3
+ import UIKit
4
+ import XCTest
5
+
6
+ struct AsturCommand {
7
+ let id: String
8
+ let method: String
9
+ let params: [String: Any]
10
+ }
11
+
12
+ struct AsturCommandResult {
13
+ let id: String
14
+ let ok: Bool
15
+ let result: Any?
16
+ let error: AsturCommandError?
17
+ }
18
+
19
+ struct AsturCommandError {
20
+ let code: String
21
+ let message: String
22
+ let details: Any?
23
+ }
24
+
25
+ private struct AsturSelector {
26
+ let strategy: String
27
+ let value: String
28
+ let exact: Bool
29
+ let name: Any?
30
+
31
+ func toDictionary() -> [String: Any] {
32
+ var result: [String: Any] = [
33
+ "strategy": strategy,
34
+ "value": value,
35
+ "exact": exact
36
+ ]
37
+ if let name {
38
+ result["name"] = name
39
+ }
40
+ return result
41
+ }
42
+ }
43
+
44
+ private struct AsturWaitOptions {
45
+ let timeoutMs: Int
46
+ let intervalMs: Int
47
+ let state: String
48
+ }
49
+
50
+ private struct AsturActionability {
51
+ let visible: Bool
52
+ let enabled: Bool
53
+ let stable: Bool
54
+ let hittable: Bool
55
+ }
56
+
57
+ private struct AsturElementActionOptions {
58
+ let wait: AsturWaitOptions
59
+ let actionability: AsturActionability
60
+ let keyboard: String?
61
+ let intervalMs: Int?
62
+ let durationMs: Int?
63
+ let clear: Bool?
64
+ let textInputMode: String?
65
+ }
66
+
67
+ private struct AsturPoint {
68
+ let x: Int
69
+ let y: Int
70
+
71
+ func toDictionary() -> [String: Int] {
72
+ [
73
+ "x": x,
74
+ "y": y
75
+ ]
76
+ }
77
+ }
78
+
79
+ private struct AsturElementDragTarget {
80
+ let point: AsturPoint?
81
+ let selector: AsturSelector?
82
+ }
83
+
84
+ private struct AsturSwipeGesture {
85
+ let start: AsturPoint
86
+ let end: AsturPoint
87
+ let durationMs: Int
88
+ }
89
+
90
+ private struct AsturDragGesture {
91
+ let start: AsturPoint
92
+ let end: AsturPoint
93
+ let durationMs: Int
94
+ }
95
+
96
+ private struct AsturBounds {
97
+ let x: Int
98
+ let y: Int
99
+ let width: Int
100
+ let height: Int
101
+
102
+ func toDictionary() -> [String: Int] {
103
+ [
104
+ "x": x,
105
+ "y": y,
106
+ "width": width,
107
+ "height": height
108
+ ]
109
+ }
110
+ }
111
+
112
+ private struct AsturElementSnapshot {
113
+ let id: String?
114
+ let text: String?
115
+ let label: String?
116
+ let value: String?
117
+ let type: String
118
+ let enabled: Bool
119
+ let visible: Bool
120
+ let selected: Bool?
121
+ let focused: Bool?
122
+ let bounds: AsturBounds
123
+ let children: [AsturElementSnapshot]
124
+
125
+ func toDictionary() -> [String: Any] {
126
+ var result: [String: Any] = [
127
+ "type": type,
128
+ "enabled": enabled,
129
+ "visible": visible,
130
+ "bounds": bounds.toDictionary(),
131
+ "children": children.map { $0.toDictionary() },
132
+ "platform": "ios"
133
+ ]
134
+
135
+ if let id {
136
+ result["id"] = id
137
+ }
138
+ if let text {
139
+ result["text"] = text
140
+ }
141
+ if let label {
142
+ result["label"] = label
143
+ }
144
+ if let value {
145
+ result["value"] = value
146
+ }
147
+ if let selected {
148
+ result["selected"] = selected
149
+ }
150
+ if let focused {
151
+ result["focused"] = focused
152
+ }
153
+
154
+ return result
155
+ }
156
+ }
157
+
158
+ final class AsturAgent {
159
+ private let app: XCUIApplication
160
+
161
+ init(bundleIdentifier: String) {
162
+ self.app = XCUIApplication(bundleIdentifier: bundleIdentifier)
163
+ }
164
+
165
+ func launchIfNeeded() {
166
+ if app.state == .runningForeground {
167
+ return
168
+ }
169
+
170
+ if app.state == .notRunning {
171
+ app.launch()
172
+ } else {
173
+ app.activate()
174
+ }
175
+
176
+ // Poll at 50ms intervals instead of blocking for a fixed 8s.
177
+ // Return as soon as the app reaches foreground (typically <1s).
178
+ let deadline = Date().addingTimeInterval(5)
179
+ while app.state != .runningForeground && Date() < deadline {
180
+ usleep(50_000)
181
+ }
182
+ }
183
+
184
+ func terminateIfRunning() {
185
+ if app.state == .notRunning {
186
+ return
187
+ }
188
+
189
+ app.terminate()
190
+ let deadline = Date().addingTimeInterval(3)
191
+ while app.state != .notRunning && Date() < deadline {
192
+ usleep(50_000)
193
+ }
194
+ }
195
+
196
+ private func setOrientation(_ value: String) throws {
197
+ switch value {
198
+ case "portrait":
199
+ XCUIDevice.shared.orientation = .portrait
200
+ case "portrait-upside-down":
201
+ XCUIDevice.shared.orientation = .portraitUpsideDown
202
+ case "landscape", "landscape-left":
203
+ XCUIDevice.shared.orientation = .landscapeLeft
204
+ case "landscape-right":
205
+ XCUIDevice.shared.orientation = .landscapeRight
206
+ default:
207
+ throw AsturAgentFailure(
208
+ code: "UNSUPPORTED_ORIENTATION",
209
+ message: "Unsupported iOS orientation: \(value)"
210
+ )
211
+ }
212
+
213
+ // Brief settle time for orientation animation; 150ms is sufficient
214
+ // on modern devices (was 500ms).
215
+ usleep(150_000)
216
+ }
217
+
218
+ private func inspectorScreenshotBase64() -> String {
219
+ let window = app.windows.firstMatch
220
+ if window.exists, !window.frame.isEmpty {
221
+ return window.screenshot().pngRepresentation.base64EncodedString()
222
+ }
223
+
224
+ return app.screenshot().pngRepresentation.base64EncodedString()
225
+ }
226
+
227
+ func dispatch(_ command: AsturCommand) -> AsturCommandResult {
228
+ do {
229
+ switch command.method {
230
+ case "agent.ping":
231
+ return ok(command.id, agentInfo())
232
+
233
+ case "app.launch":
234
+ launchIfNeeded()
235
+ return ok(command.id)
236
+
237
+ case "app.terminate":
238
+ terminateIfRunning()
239
+ return ok(command.id)
240
+
241
+ case "device.screenshot":
242
+ launchIfNeeded()
243
+ return ok(command.id, [
244
+ "base64": inspectorScreenshotBase64()
245
+ ])
246
+
247
+ case "device.viewport":
248
+ launchIfNeeded()
249
+ return ok(command.id, bounds(app.frame).toDictionary())
250
+
251
+ case "device.setOrientation":
252
+ let orientation = try command.params.requiredString("orientation")
253
+ try setOrientation(orientation)
254
+ return ok(command.id)
255
+
256
+ case "tree.get":
257
+ launchIfNeeded()
258
+ return ok(command.id, getTree().toDictionary())
259
+
260
+ case "element.find":
261
+ let selector = try parseSelectorFromParams(command.params)
262
+ return ok(command.id, findElement(selector)?.toDictionary())
263
+
264
+ case "element.findAll":
265
+ let selector = try parseSelectorFromParams(command.params)
266
+ return ok(command.id, findElements(selector).map { $0.toDictionary() })
267
+
268
+ case "element.findMany":
269
+ let selectors = try parseSelectorsFromParams(command.params)
270
+ return ok(command.id, findManyElements(selectors).map { $0.toDictionary() })
271
+
272
+ case "element.wait":
273
+ let selector = try parseSelectorFromParams(command.params)
274
+ let options = try parseWaitOptions(command.params.mapValue("options"))
275
+ return ok(command.id, try waitForElement(selector, options: options)?.toDictionary())
276
+
277
+ case "element.tap":
278
+ let selector = try parseSelectorFromParams(command.params)
279
+ let options = try parseElementActionOptions(command.params.mapValue("options"))
280
+ try resolveElement(selector, options: options).tap()
281
+ return ok(command.id)
282
+
283
+ case "element.doubleTap":
284
+ let selector = try parseSelectorFromParams(command.params)
285
+ let options = try parseElementActionOptions(command.params.mapValue("options"))
286
+ try doubleTap(resolveElement(selector, options: options), intervalMs: options.intervalMs)
287
+ return ok(command.id)
288
+
289
+ case "element.longPress":
290
+ let selector = try parseSelectorFromParams(command.params)
291
+ let options = try parseElementActionOptions(command.params.mapValue("options"))
292
+ let duration = Double(options.durationMs ?? defaultLongPressMs) / 1_000
293
+ try resolveElement(selector, options: options).press(forDuration: duration)
294
+ return ok(command.id)
295
+
296
+ case "element.fill":
297
+ let selector = try parseSelectorFromParams(command.params)
298
+ let value = try command.params.requiredString("value")
299
+ let options = try parseElementActionOptions(command.params.mapValue("options"))
300
+ try fillElement(selector, value: value, options: options, clear: options.clear ?? true)
301
+ return ok(command.id)
302
+
303
+ case "element.drag":
304
+ let selector = try parseSelectorFromParams(command.params)
305
+ let target = try parseElementDragTarget(command.params)
306
+ let options = try parseElementActionOptions(command.params.mapValue("options"))
307
+ try dragElement(selector, target: target, options: options)
308
+ return ok(command.id)
309
+
310
+ case "gesture.tap":
311
+ try coordinate(try parsePointFromParams(command.params, "target")).tap()
312
+ return ok(command.id)
313
+
314
+ case "gesture.doubleTap":
315
+ let intervalMs = try command.params.mapValue("options")?.intValue("intervalMs")
316
+ try doubleTap(coordinate(try parsePointFromParams(command.params, "target")), intervalMs: intervalMs)
317
+ return ok(command.id)
318
+
319
+ case "gesture.longPress":
320
+ let target = try parsePointFromParams(command.params, "target")
321
+ let options = command.params.mapValue("options")
322
+ let duration = Double(try options?.intValue("durationMs") ?? defaultLongPressMs) / 1_000
323
+ try coordinate(target).press(forDuration: duration)
324
+ return ok(command.id)
325
+
326
+ case "gesture.swipe":
327
+ try drag(try parseSwipeGesture(command.params))
328
+ return ok(command.id)
329
+
330
+ case "gesture.drag":
331
+ try drag(try parseDragGesture(command.params))
332
+ return ok(command.id)
333
+
334
+ case "keyboard.state":
335
+ return ok(command.id, keyboardState())
336
+
337
+ case "keyboard.dismiss":
338
+ dismissKeyboard()
339
+ return ok(command.id)
340
+
341
+ default:
342
+ return error(command.id, "UNKNOWN_COMMAND", "Unknown Astur agent command: \(command.method)")
343
+ }
344
+ } catch let failure as AsturAgentFailure {
345
+ return error(command.id, failure.code, failure.message, failure.details)
346
+ } catch {
347
+ return self.error(
348
+ command.id,
349
+ "INTERNAL_ERROR",
350
+ "Unhandled iOS XCUITest agent failure.",
351
+ ["cause": String(describing: error)]
352
+ )
353
+ }
354
+ }
355
+
356
+ private func agentInfo() -> [String: Any] {
357
+ [
358
+ "id": "astur-ios-xctest",
359
+ "platform": "ios",
360
+ "version": "0.1.0-alpha.0",
361
+ "protocolVersion": 1,
362
+ "capabilities": supportedCapabilities
363
+ ]
364
+ }
365
+
366
+ private func getTree() -> AsturElementSnapshot {
367
+ let appFrame = app.frame
368
+
369
+ // Primary path: a single full-hierarchy snapshot. XCUIElement.snapshot()
370
+ // returns the entire descendant tree in one IPC round trip, so every
371
+ // on-screen element (buttons, labels, containers, nested controls) is
372
+ // captured — not just a few identified controls. This is fast because it
373
+ // is one bridged call, unlike per-element `boundBy:` enumeration which is
374
+ // what historically timed out on real devices.
375
+ if let snapshotTree = try? buildSnapshotTree(viewport: appFrame) {
376
+ return snapshotTree
377
+ }
378
+
379
+ // Fallback: bounded typed-candidate collection. Only used when XCUITest
380
+ // cannot produce a full hierarchy snapshot for the current app/screen.
381
+ let children = treeCandidateSnapshots(viewport: appFrame)
382
+ .sorted(by: treeSnapshotPrecedes)
383
+
384
+ return AsturElementSnapshot(
385
+ id: app.identifier.nonEmpty,
386
+ text: app.label.nonEmpty,
387
+ label: app.label.nonEmpty,
388
+ value: nil,
389
+ type: elementTypeName(app.elementType),
390
+ enabled: true,
391
+ visible: !appFrame.isEmpty,
392
+ selected: nil,
393
+ focused: nil,
394
+ bounds: bounds(appFrame),
395
+ children: children
396
+ )
397
+ }
398
+
399
+ private func buildSnapshotTree(viewport: CGRect) throws -> AsturElementSnapshot {
400
+ let root = try app.snapshot()
401
+ var budget = maxFullTreeNodes
402
+ return convertSnapshot(root, viewport: viewport, budget: &budget)
403
+ }
404
+
405
+ private func convertSnapshot(
406
+ _ snapshot: XCUIElementSnapshot,
407
+ viewport: CGRect,
408
+ budget: inout Int
409
+ ) -> AsturElementSnapshot {
410
+ let frame = snapshot.frame
411
+ let label = snapshot.label.nonEmpty
412
+ let value = stringValue(snapshot.value)?.nonEmpty
413
+ let placeholder = snapshot.placeholderValue?.nonEmpty
414
+ let visible = !frame.isEmpty && (viewport.isEmpty || frame.intersects(viewport))
415
+
416
+ var children: [AsturElementSnapshot] = []
417
+ children.reserveCapacity(snapshot.children.count)
418
+ for child in snapshot.children {
419
+ if budget <= 0 {
420
+ break
421
+ }
422
+ budget -= 1
423
+ children.append(convertSnapshot(child, viewport: viewport, budget: &budget))
424
+ }
425
+
426
+ return AsturElementSnapshot(
427
+ id: snapshot.identifier.nonEmpty,
428
+ text: label ?? value ?? placeholder,
429
+ label: label,
430
+ value: value,
431
+ type: elementTypeName(snapshot.elementType),
432
+ enabled: snapshot.isEnabled,
433
+ visible: visible,
434
+ selected: snapshot.isSelected,
435
+ focused: nil,
436
+ bounds: bounds(frame),
437
+ children: children
438
+ )
439
+ }
440
+
441
+ private func findElement(_ selector: AsturSelector) -> AsturElementSnapshot? {
442
+ guard let element = findElementObject(selector) else {
443
+ return nil
444
+ }
445
+
446
+ return snapshot(element, includeChildren: false)
447
+ }
448
+
449
+ private func findElements(_ selector: AsturSelector) -> [AsturElementSnapshot] {
450
+ findElementObjects(selector).map { snapshot($0, includeChildren: false) }
451
+ }
452
+
453
+ private func findManyElements(_ selectors: [AsturSelector]) -> [AsturElementSnapshot] {
454
+ guard !selectors.isEmpty else {
455
+ return []
456
+ }
457
+
458
+ let query = directManyQuery(selectors) ?? findManyCandidateQuery(selectors)
459
+
460
+ return boundedElements(query, limit: maxFindManyResults)
461
+ .filter { element in selectors.contains { matches(element, selector: $0) } }
462
+ .prefix(maxFindManyResults)
463
+ .map { snapshot($0, includeChildren: false) }
464
+ }
465
+
466
+ private func waitForElement(_ selector: AsturSelector, options: AsturWaitOptions) throws -> AsturElementSnapshot? {
467
+ if options.state == "hidden" {
468
+ try waitForHidden(selector, options: options)
469
+ return nil
470
+ }
471
+
472
+ guard let element = waitForElementObject(selector, options: options) else {
473
+ return nil
474
+ }
475
+
476
+ return snapshot(element, includeChildren: false)
477
+ }
478
+
479
+ private func fillElement(
480
+ _ selector: AsturSelector,
481
+ value: String,
482
+ options: AsturElementActionOptions,
483
+ clear: Bool
484
+ ) throws {
485
+ let element = try resolveElement(selector, options: options)
486
+ element.tap()
487
+
488
+ if clear {
489
+ clearText(element)
490
+ }
491
+
492
+ if options.textInputMode == "paste" || (options.textInputMode != "type" && value.count > 4) {
493
+ // Default to paste for strings >4 chars — dramatically faster than
494
+ // character-by-character typeText on real devices.
495
+ pasteText(value, into: element)
496
+ } else {
497
+ element.typeText(value)
498
+ }
499
+
500
+ if options.keyboard == "auto" {
501
+ dismissKeyboard()
502
+ }
503
+ }
504
+
505
+ private func dragElement(
506
+ _ selector: AsturSelector,
507
+ target: AsturElementDragTarget,
508
+ options: AsturElementActionOptions
509
+ ) throws {
510
+ let source = try resolveElement(selector, options: options)
511
+ let start = source.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.5))
512
+ let destination = try resolveTargetCoordinate(target, wait: options.wait)
513
+ let duration = Double(options.durationMs ?? defaultDragMs) / 1_000
514
+ start.press(forDuration: duration, thenDragTo: destination)
515
+ }
516
+
517
+ private func drag(_ gesture: AsturSwipeGesture) throws {
518
+ let dx = gesture.end.x - gesture.start.x
519
+ let dy = gesture.end.y - gesture.start.y
520
+
521
+ if abs(dy) > 120 && abs(dy) > abs(dx) * 2 {
522
+ if dy < 0 {
523
+ app.swipeUp()
524
+ } else {
525
+ app.swipeDown()
526
+ }
527
+ return
528
+ }
529
+
530
+ let start = try coordinate(gesture.start)
531
+ let end = try coordinate(gesture.end)
532
+ let duration = max(0.01, Double(gesture.durationMs) / 1_000)
533
+ start.press(forDuration: duration, thenDragTo: end)
534
+ }
535
+
536
+ private func drag(_ gesture: AsturDragGesture) throws {
537
+ let start = try coordinate(gesture.start)
538
+ let end = try coordinate(gesture.end)
539
+ let duration = max(0.01, Double(gesture.durationMs) / 1_000)
540
+ start.press(forDuration: duration, thenDragTo: end)
541
+ }
542
+
543
+ private func doubleTap(_ element: XCUIElement, intervalMs: Int?) throws {
544
+ // Prefer XCTest's native double-tap: it synthesizes both taps inside the
545
+ // OS double-tap recognition window. Two separate element.tap() calls are
546
+ // bracketed by app-idle waits, so the gap between touches blows past that
547
+ // window and the app records two single taps instead. Only fall back to
548
+ // the manual two-tap path when the caller explicitly forces an interval.
549
+ if intervalMs == nil {
550
+ element.doubleTap()
551
+ return
552
+ }
553
+
554
+ element.tap()
555
+ usleep(UInt32(max(30, intervalMs!) * 1_000))
556
+ element.tap()
557
+ }
558
+
559
+ private func doubleTap(_ coordinate: XCUICoordinate, intervalMs: Int?) {
560
+ if intervalMs == nil {
561
+ coordinate.doubleTap()
562
+ return
563
+ }
564
+
565
+ coordinate.tap()
566
+ usleep(UInt32(max(30, intervalMs!) * 1_000))
567
+ coordinate.tap()
568
+ }
569
+
570
+ private func keyboardState() -> [String: Any] {
571
+ let keyboard = app.keyboards.firstMatch
572
+ guard keyboard.exists else {
573
+ return ["visible": false]
574
+ }
575
+
576
+ return [
577
+ "visible": true,
578
+ "bounds": bounds(keyboard.frame).toDictionary()
579
+ ]
580
+ }
581
+
582
+ private func dismissKeyboard() {
583
+ let keyboard = app.keyboards.firstMatch
584
+ guard keyboard.exists else {
585
+ return
586
+ }
587
+
588
+ for title in ["Done", "Return", "Go", "Search", "Next"] {
589
+ let button = keyboard.buttons[title]
590
+ if button.exists {
591
+ button.tap()
592
+ return
593
+ }
594
+ }
595
+
596
+ app.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.05)).tap()
597
+ }
598
+
599
+ private func resolveElement(_ selector: AsturSelector, options: AsturElementActionOptions) throws -> XCUIElement {
600
+ guard let element = waitForElementObject(selector, options: options.wait) else {
601
+ throw AsturAgentFailure(
602
+ code: "ELEMENT_NOT_FOUND",
603
+ message: "Could not resolve element before action.",
604
+ details: locatorFailureDetails(
605
+ selector,
606
+ state: options.wait.state,
607
+ timeoutMs: options.wait.timeoutMs
608
+ )
609
+ )
610
+ }
611
+
612
+ try ensureActionable(element, selector: selector, requirements: options.actionability)
613
+ return element
614
+ }
615
+
616
+ private func ensureActionable(
617
+ _ element: XCUIElement,
618
+ selector: AsturSelector,
619
+ requirements: AsturActionability
620
+ ) throws {
621
+ if requirements.visible && !isVisible(element) {
622
+ throw AsturAgentFailure(
623
+ code: "ELEMENT_NOT_VISIBLE",
624
+ message: "Element is not visible.",
625
+ details: actionabilityFailureDetails(element, selector: selector, failed: "visible")
626
+ )
627
+ }
628
+
629
+ if requirements.enabled && !element.isEnabled {
630
+ throw AsturAgentFailure(
631
+ code: "ELEMENT_DISABLED",
632
+ message: "Element is disabled.",
633
+ details: actionabilityFailureDetails(element, selector: selector, failed: "enabled")
634
+ )
635
+ }
636
+
637
+ let stable = requirements.stable ? isStable(element) : nil
638
+ if requirements.stable && stable == false {
639
+ throw AsturAgentFailure(
640
+ code: "ELEMENT_UNSTABLE",
641
+ message: "Element bounds are not stable.",
642
+ details: actionabilityFailureDetails(element, selector: selector, failed: "stable", stable: stable)
643
+ )
644
+ }
645
+
646
+ if requirements.hittable && !element.isHittable {
647
+ throw AsturAgentFailure(
648
+ code: "ELEMENT_NOT_HITTABLE",
649
+ message: "Element is not hittable.",
650
+ details: actionabilityFailureDetails(element, selector: selector, failed: "hittable", stable: stable)
651
+ )
652
+ }
653
+ }
654
+
655
+ private func locatorFailureDetails(
656
+ _ selector: AsturSelector,
657
+ state: String,
658
+ timeoutMs: Int
659
+ ) -> [String: Any] {
660
+ let candidates = findElementObjects(selector)
661
+ .prefix(5)
662
+ .map { snapshot($0, includeChildren: false).toDictionary() }
663
+ let diagnostics: [String: Any] = [
664
+ "selector": selector.toDictionary(),
665
+ "state": state,
666
+ "timeoutMs": timeoutMs,
667
+ "matchingCandidates": candidates.count,
668
+ "candidates": candidates
669
+ ]
670
+
671
+ return [
672
+ "selector": selector.toDictionary(),
673
+ "state": state,
674
+ "timeout": timeoutMs,
675
+ "diagnostics": diagnostics
676
+ ]
677
+ }
678
+
679
+ private func actionabilityFailureDetails(
680
+ _ element: XCUIElement,
681
+ selector: AsturSelector,
682
+ failed: String,
683
+ stable: Bool? = nil
684
+ ) -> [String: Any] {
685
+ let candidate = snapshot(element, includeChildren: false).toDictionary()
686
+ var actionability: [String: Any] = [
687
+ "failed": failed,
688
+ "visible": isVisible(element),
689
+ "enabled": element.isEnabled,
690
+ "hittable": element.isHittable
691
+ ]
692
+
693
+ if let stable {
694
+ actionability["stable"] = stable
695
+ }
696
+
697
+ let diagnostics: [String: Any] = [
698
+ "selector": selector.toDictionary(),
699
+ "matchingCandidates": 1,
700
+ "candidate": candidate,
701
+ "actionability": actionability
702
+ ]
703
+
704
+ return [
705
+ "selector": selector.toDictionary(),
706
+ "candidate": candidate,
707
+ "actionability": actionability,
708
+ "diagnostics": diagnostics
709
+ ]
710
+ }
711
+
712
+ private func waitForElementObject(_ selector: AsturSelector, options: AsturWaitOptions) -> XCUIElement? {
713
+ // First attempt with zero delay — element may already be present.
714
+ if let element = findElementObject(selector) {
715
+ if options.state == "attached" {
716
+ return element
717
+ }
718
+ if options.state == "visible" && isVisible(element) {
719
+ return element
720
+ }
721
+ }
722
+
723
+ let deadline = Date().addingTimeInterval(Double(max(0, options.timeoutMs)) / 1_000)
724
+ let interval = UInt32(max(30, options.intervalMs) * 1_000)
725
+
726
+ while Date() < deadline {
727
+ usleep(interval)
728
+
729
+ if let element = findElementObject(selector) {
730
+ if options.state == "attached" {
731
+ return element
732
+ }
733
+ if options.state == "visible" && isVisible(element) {
734
+ return element
735
+ }
736
+ }
737
+ }
738
+
739
+ return nil
740
+ }
741
+
742
+ private func waitForHidden(_ selector: AsturSelector, options: AsturWaitOptions) throws {
743
+ // Immediate check before entering the loop.
744
+ let element = findElementObject(selector)
745
+ if element == nil || !isVisible(element!) {
746
+ return
747
+ }
748
+
749
+ let deadline = Date().addingTimeInterval(Double(max(0, options.timeoutMs)) / 1_000)
750
+ let interval = UInt32(max(30, options.intervalMs) * 1_000)
751
+
752
+ while Date() < deadline {
753
+ usleep(interval)
754
+
755
+ let el = findElementObject(selector)
756
+ if el == nil || !isVisible(el!) {
757
+ return
758
+ }
759
+ }
760
+
761
+ throw AsturAgentFailure(
762
+ code: "ELEMENT_NOT_HIDDEN",
763
+ message: "Timed out waiting for element to be hidden.",
764
+ details: locatorFailureDetails(selector, state: "hidden", timeoutMs: options.timeoutMs)
765
+ )
766
+ }
767
+
768
+ private func findElementObject(_ selector: AsturSelector) -> XCUIElement? {
769
+ if let query = directQuery(selector) {
770
+ let element = query.firstMatch
771
+ if element.exists && matchesName(element, expected: selector.name, exact: selector.exact) {
772
+ return element
773
+ }
774
+
775
+ if let alertElement = findAlertElement(selector) {
776
+ return alertElement
777
+ }
778
+
779
+ if usesDirectQueryOnly(selector) {
780
+ return nil
781
+ }
782
+ }
783
+
784
+ return boundedElements(app.descendants(matching: .any), limit: maxFindAllResults)
785
+ .first { matches($0, selector: selector) }
786
+ }
787
+
788
+ private func findAlertElement(_ selector: AsturSelector) -> XCUIElement? {
789
+ guard selector.exact else {
790
+ return nil
791
+ }
792
+
793
+ let predicate: NSPredicate?
794
+ switch selector.strategy.lowercased() {
795
+ case "id":
796
+ predicate = NSPredicate(format: "identifier == %@", selector.value)
797
+ case "accessibility", "text":
798
+ predicate = NSPredicate(
799
+ format: "identifier == %@ OR label == %@ OR value == %@",
800
+ selector.value,
801
+ selector.value,
802
+ selector.value
803
+ )
804
+ default:
805
+ predicate = nil
806
+ }
807
+
808
+ guard let predicate else {
809
+ return nil
810
+ }
811
+
812
+ for alertHost in [app, XCUIApplication(bundleIdentifier: "com.apple.springboard")] {
813
+ let element = alertHost.alerts.descendants(matching: .any).matching(predicate).firstMatch
814
+ if element.exists {
815
+ return element
816
+ }
817
+ }
818
+
819
+ return nil
820
+ }
821
+
822
+ private func findElementObjects(_ selector: AsturSelector) -> [XCUIElement] {
823
+ if let query = directQuery(selector) {
824
+ return boundedElements(query, limit: maxFindAllResults)
825
+ .filter { matches($0, selector: selector) }
826
+ .prefix(maxFindAllResults)
827
+ .map { $0 }
828
+ }
829
+
830
+ return boundedElements(app.descendants(matching: .any), limit: maxFindAllResults)
831
+ .filter { matches($0, selector: selector) }
832
+ .prefix(maxFindAllResults)
833
+ .map { $0 }
834
+ }
835
+
836
+ private func usesDirectQueryOnly(_ selector: AsturSelector) -> Bool {
837
+ switch selector.strategy.lowercased() {
838
+ case "id", "accessibility", "text":
839
+ return selector.exact
840
+ case "role", "type":
841
+ return selector.name == nil || selector.name is String
842
+ default:
843
+ return false
844
+ }
845
+ }
846
+
847
+ private func directQuery(_ selector: AsturSelector) -> XCUIElementQuery? {
848
+ switch selector.strategy.lowercased() {
849
+ case "id":
850
+ if selector.exact {
851
+ return app.descendants(matching: .any).matching(identifier: selector.value)
852
+ }
853
+ return app.descendants(matching: .any).matching(
854
+ NSPredicate(format: "identifier CONTAINS[cd] %@", selector.value)
855
+ )
856
+
857
+ case "accessibility":
858
+ if selector.exact {
859
+ return app.descendants(matching: .any).matching(
860
+ NSPredicate(format: "identifier == %@ OR label == %@", selector.value, selector.value)
861
+ )
862
+ }
863
+ return app.descendants(matching: .any).matching(
864
+ NSPredicate(format: "identifier CONTAINS[cd] %@ OR label CONTAINS[cd] %@", selector.value, selector.value)
865
+ )
866
+
867
+ case "text":
868
+ if selector.exact {
869
+ return app.staticTexts.matching(
870
+ NSPredicate(format: "label == %@ OR value == %@", selector.value, selector.value)
871
+ )
872
+ }
873
+ return app.staticTexts.matching(
874
+ NSPredicate(format: "label CONTAINS[cd] %@ OR value CONTAINS[cd] %@", selector.value, selector.value)
875
+ )
876
+
877
+ case "role":
878
+ guard let type = elementTypeForRole(selector.value) else {
879
+ return nil
880
+ }
881
+ return app.descendants(matching: type)
882
+
883
+ case "type":
884
+ guard let type = elementTypeFromString(selector.value) else {
885
+ return nil
886
+ }
887
+ return app.descendants(matching: type)
888
+
889
+ case "xpath":
890
+ return nil
891
+
892
+ case "coordinates":
893
+ return nil
894
+
895
+ default:
896
+ return nil
897
+ }
898
+ }
899
+
900
+ private func findManyCandidateQuery(_ selectors: [AsturSelector]) -> XCUIElementQuery {
901
+ if selectors.allSatisfy({ $0.strategy.lowercased() == "text" }) {
902
+ return app.staticTexts
903
+ }
904
+
905
+ if selectors.allSatisfy({ $0.strategy.lowercased() == "role" && elementTypeForRole($0.value) == .button }) {
906
+ return app.buttons
907
+ }
908
+
909
+ return app.descendants(matching: .any)
910
+ }
911
+
912
+ private func directManyQuery(_ selectors: [AsturSelector]) -> XCUIElementQuery? {
913
+ guard !selectors.isEmpty else {
914
+ return nil
915
+ }
916
+
917
+ let supportsSingleTextPredicate = selectors.allSatisfy {
918
+ $0.strategy.lowercased() == "text" && $0.exact && $0.name == nil
919
+ }
920
+ guard supportsSingleTextPredicate else {
921
+ return nil
922
+ }
923
+
924
+ let values = selectors.map { $0.value }
925
+ return app.staticTexts.matching(
926
+ NSPredicate(format: "label IN %@ OR value IN %@", values, values)
927
+ )
928
+ }
929
+
930
+ private func matches(_ element: XCUIElement, selector: AsturSelector) -> Bool {
931
+ guard element.exists else {
932
+ return false
933
+ }
934
+
935
+ switch selector.strategy.lowercased() {
936
+ case "id":
937
+ return match(element.identifier, selector.value, exact: selector.exact)
938
+
939
+ case "accessibility":
940
+ return [
941
+ element.identifier,
942
+ element.label
943
+ ].contains { match($0, selector.value, exact: selector.exact) }
944
+
945
+ case "text":
946
+ return [
947
+ element.label,
948
+ stringValue(element.value),
949
+ element.placeholderValue
950
+ ].contains { match($0, selector.value, exact: selector.exact) }
951
+
952
+ case "role":
953
+ return hasRole(element, role: selector.value)
954
+ && matchesName(element, expected: selector.name, exact: selector.exact)
955
+
956
+ case "type":
957
+ if selector.value.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() == "any" {
958
+ return true
959
+ }
960
+ return match(elementTypeName(element.elementType), selector.value, exact: selector.exact)
961
+ || match(shortElementTypeName(element.elementType), selector.value, exact: selector.exact)
962
+
963
+ default:
964
+ return false
965
+ }
966
+ }
967
+
968
+ private func matchesName(_ element: XCUIElement, expected: Any?, exact: Bool) -> Bool {
969
+ guard let expected = expected as? String else {
970
+ return true
971
+ }
972
+
973
+ return [
974
+ element.identifier,
975
+ element.label,
976
+ stringValue(element.value),
977
+ element.placeholderValue
978
+ ].contains { match($0, expected, exact: exact) }
979
+ }
980
+
981
+ private func hasRole(_ element: XCUIElement, role: String) -> Bool {
982
+ guard let type = elementTypeForRole(role) else {
983
+ return false
984
+ }
985
+
986
+ return element.elementType == type
987
+ }
988
+
989
+ private func resolveTargetCoordinate(_ target: AsturElementDragTarget, wait: AsturWaitOptions) throws -> XCUICoordinate {
990
+ if let point = target.point {
991
+ return try coordinate(point)
992
+ }
993
+
994
+ guard let selector = target.selector else {
995
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "Drag target must include either point or selector.")
996
+ }
997
+
998
+ guard let element = waitForElementObject(selector, options: AsturWaitOptions(
999
+ timeoutMs: wait.timeoutMs,
1000
+ intervalMs: wait.intervalMs,
1001
+ state: "visible"
1002
+ )) else {
1003
+ throw AsturAgentFailure(
1004
+ code: "ELEMENT_NOT_FOUND",
1005
+ message: "Drag target selector did not resolve.",
1006
+ details: ["targetSelector": selector.toDictionary()]
1007
+ )
1008
+ }
1009
+
1010
+ return element.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.5))
1011
+ }
1012
+
1013
+ private func coordinate(_ point: AsturPoint) throws -> XCUICoordinate {
1014
+ let clamped = clamp(point)
1015
+ return app.coordinate(withNormalizedOffset: CGVector(dx: 0, dy: 0))
1016
+ .withOffset(CGVector(dx: clamped.x, dy: clamped.y))
1017
+ }
1018
+
1019
+ private func clamp(_ point: AsturPoint) -> AsturPoint {
1020
+ let frame = app.frame
1021
+ let maxX = max(1, Int(frame.maxX.rounded(.down)) - 1)
1022
+ let maxY = max(1, Int(frame.maxY.rounded(.down)) - 1)
1023
+
1024
+ return AsturPoint(
1025
+ x: min(max(0, point.x), maxX),
1026
+ y: min(max(0, point.y), maxY)
1027
+ )
1028
+ }
1029
+
1030
+ private func clearText(_ element: XCUIElement) {
1031
+ element.coordinate(withNormalizedOffset: CGVector(dx: 0.96, dy: 0.5)).tap()
1032
+ selectAllText(in: element)
1033
+ element.typeText(XCUIKeyboardKey.delete.rawValue)
1034
+
1035
+ if isTextInputEmpty(element) {
1036
+ return
1037
+ }
1038
+
1039
+ let currentLength = max(stringValue(element.value)?.count ?? 0, element.label.count)
1040
+ let deleteCount = min(120, max(32, currentLength + 24))
1041
+ element.typeText(String(repeating: XCUIKeyboardKey.delete.rawValue, count: deleteCount))
1042
+
1043
+ if !isTextInputEmpty(element) {
1044
+ selectAllText(in: element)
1045
+ element.typeText(XCUIKeyboardKey.delete.rawValue)
1046
+ }
1047
+ }
1048
+
1049
+ private func pasteText(_ value: String, into element: XCUIElement) {
1050
+ UIPasteboard.general.string = value
1051
+ element.press(forDuration: 0.5) // 500ms is the iOS minimum long-press (was 800ms)
1052
+
1053
+ let paste = app.menuItems["Paste"].firstMatch
1054
+ if paste.waitForExistence(timeout: 0.8) {
1055
+ paste.tap()
1056
+ usleep(80_000) // brief settle (was 150ms)
1057
+ return
1058
+ }
1059
+
1060
+ // Fallback to character-by-character
1061
+ element.typeText(value)
1062
+ }
1063
+
1064
+ private func selectAllText(in element: XCUIElement) {
1065
+ element.press(forDuration: 0.5) // 500ms (was 800ms)
1066
+ let selectAll = app.menuItems["Select All"].firstMatch
1067
+ if selectAll.waitForExistence(timeout: 0.6) {
1068
+ selectAll.tap()
1069
+ usleep(60_000) // brief settle (was 120ms)
1070
+ }
1071
+ }
1072
+
1073
+ private func isTextInputEmpty(_ element: XCUIElement) -> Bool {
1074
+ let value = stringValue(element.value)?.trimmingCharacters(in: .whitespacesAndNewlines)
1075
+ let placeholder = element.placeholderValue?.trimmingCharacters(in: .whitespacesAndNewlines)
1076
+
1077
+ return value == nil || value == "" || value == placeholder
1078
+ }
1079
+
1080
+ private func isVisible(_ element: XCUIElement) -> Bool {
1081
+ guard element.exists else {
1082
+ return false
1083
+ }
1084
+
1085
+ let frame = element.frame
1086
+ return !frame.isEmpty && frame.intersects(app.frame)
1087
+ }
1088
+
1089
+ private func isStable(_ element: XCUIElement) -> Bool {
1090
+ let first = element.frame
1091
+ usleep(30_000) // 30ms settle check (was 80ms)
1092
+ return first.equalTo(element.frame)
1093
+ }
1094
+
1095
+ private func boundedElements(_ query: XCUIElementQuery, limit: Int) -> [XCUIElement] {
1096
+ guard limit > 0 else {
1097
+ return []
1098
+ }
1099
+
1100
+ var elements: [XCUIElement] = []
1101
+ elements.reserveCapacity(limit)
1102
+
1103
+ for index in 0..<limit {
1104
+ let element = query.element(boundBy: index)
1105
+ if element.exists {
1106
+ elements.append(element)
1107
+ } else {
1108
+ break
1109
+ }
1110
+ }
1111
+
1112
+ return elements
1113
+ }
1114
+
1115
+ private func boundedTreeElements(_ query: XCUIElementQuery, limit: Int) -> [XCUIElement] {
1116
+ guard limit > 0 else {
1117
+ return []
1118
+ }
1119
+
1120
+ return boundedElements(query, limit: limit)
1121
+ }
1122
+
1123
+ private func scanTreeElements(_ query: XCUIElementQuery, scanLimit: Int, collectLimit: Int) -> [XCUIElement] {
1124
+ guard scanLimit > 0 && collectLimit > 0 else {
1125
+ return []
1126
+ }
1127
+
1128
+ var elements: [XCUIElement] = []
1129
+ elements.reserveCapacity(collectLimit)
1130
+ var misses = 0
1131
+
1132
+ for index in 0..<scanLimit {
1133
+ let element = query.element(boundBy: index)
1134
+ if element.exists {
1135
+ elements.append(element)
1136
+ misses = 0
1137
+ if elements.count >= collectLimit {
1138
+ break
1139
+ }
1140
+ } else {
1141
+ misses += 1
1142
+ if misses >= maxTreeSnapshotConsecutiveMisses {
1143
+ break
1144
+ }
1145
+ }
1146
+ }
1147
+
1148
+ return elements
1149
+ }
1150
+
1151
+ private func treeCandidateElements() -> [XCUIElement] {
1152
+ let queries: [(XCUIElementQuery, Int)] = [
1153
+ (app.otherElements.matching(NSPredicate(format: "identifier != ''")), maxTreeSnapshotOtherPerQuery),
1154
+ (app.staticTexts, maxTreeSnapshotTextPerQuery),
1155
+ (app.buttons, maxTreeSnapshotControlsPerQuery),
1156
+ (app.textFields, maxTreeSnapshotControlsPerQuery),
1157
+ (app.secureTextFields, maxTreeSnapshotControlsPerQuery),
1158
+ (app.textViews, maxTreeSnapshotTextViewPerQuery),
1159
+ (app.switches, maxTreeSnapshotControlsPerQuery),
1160
+ (app.sliders, maxTreeSnapshotControlsPerQuery)
1161
+ ]
1162
+
1163
+ return queries.flatMap { query, limit in
1164
+ boundedTreeElements(query, limit: min(limit, maxTreeSnapshotChildren))
1165
+ }
1166
+ }
1167
+
1168
+ private func treeCandidateSnapshots(viewport: CGRect) -> [AsturElementSnapshot] {
1169
+ var snapshots: [AsturElementSnapshot] = []
1170
+ snapshots.reserveCapacity(maxTreeSnapshotChildren)
1171
+
1172
+ scanTreeSnapshots(
1173
+ app.otherElements.matching(NSPredicate(format: "identifier != ''")),
1174
+ scanLimit: maxTreeSnapshotOtherScan,
1175
+ collectLimit: maxTreeSnapshotOtherPerQuery,
1176
+ viewport: viewport,
1177
+ into: &snapshots
1178
+ )
1179
+
1180
+ let queries: [(XCUIElementQuery, Int)] = [
1181
+ (app.staticTexts, maxTreeSnapshotTextPerQuery),
1182
+ (app.buttons, maxTreeSnapshotControlsPerQuery),
1183
+ (app.textFields, maxTreeSnapshotControlsPerQuery),
1184
+ (app.secureTextFields, maxTreeSnapshotControlsPerQuery),
1185
+ (app.textViews, maxTreeSnapshotTextViewPerQuery),
1186
+ (app.switches, maxTreeSnapshotControlsPerQuery),
1187
+ (app.sliders, maxTreeSnapshotControlsPerQuery)
1188
+ ]
1189
+
1190
+ for (query, limit) in queries {
1191
+ if snapshots.count >= maxTreeSnapshotChildren {
1192
+ break
1193
+ }
1194
+
1195
+ for element in boundedTreeElements(query, limit: min(limit, maxTreeSnapshotChildren - snapshots.count)) {
1196
+ let snapshot = treeSnapshot(element, viewport: viewport)
1197
+ if isUsefulTreeSnapshot(snapshot, viewport: viewport) {
1198
+ snapshots.append(snapshot)
1199
+ }
1200
+ }
1201
+ }
1202
+
1203
+ return deduplicateTreeSnapshots(snapshots)
1204
+ }
1205
+
1206
+ private func scanTreeSnapshots(
1207
+ _ query: XCUIElementQuery,
1208
+ scanLimit: Int,
1209
+ collectLimit: Int,
1210
+ viewport: CGRect,
1211
+ into snapshots: inout [AsturElementSnapshot]
1212
+ ) {
1213
+ guard scanLimit > 0 && collectLimit > 0 else {
1214
+ return
1215
+ }
1216
+
1217
+ var collected = 0
1218
+ var misses = 0
1219
+
1220
+ for index in 0..<scanLimit {
1221
+ if snapshots.count >= maxTreeSnapshotChildren || collected >= collectLimit {
1222
+ break
1223
+ }
1224
+
1225
+ let element = query.element(boundBy: index)
1226
+ if !element.exists {
1227
+ misses += 1
1228
+ if misses >= maxTreeSnapshotConsecutiveMisses {
1229
+ break
1230
+ }
1231
+ continue
1232
+ }
1233
+
1234
+ misses = 0
1235
+ let snapshot = treeSnapshot(element, viewport: viewport)
1236
+ if isUsefulTreeSnapshot(snapshot, viewport: viewport) {
1237
+ snapshots.append(snapshot)
1238
+ collected += 1
1239
+ }
1240
+ }
1241
+ }
1242
+
1243
+ private func deduplicateTreeSnapshots(_ snapshots: [AsturElementSnapshot]) -> [AsturElementSnapshot] {
1244
+ var seen = Set<String>()
1245
+ var result: [AsturElementSnapshot] = []
1246
+ result.reserveCapacity(snapshots.count)
1247
+
1248
+ for snapshot in snapshots {
1249
+ let key = [
1250
+ snapshot.type,
1251
+ snapshot.id ?? "",
1252
+ snapshot.label ?? "",
1253
+ snapshot.text ?? "",
1254
+ snapshot.value ?? "",
1255
+ String(snapshot.bounds.x),
1256
+ String(snapshot.bounds.y),
1257
+ String(snapshot.bounds.width),
1258
+ String(snapshot.bounds.height)
1259
+ ].joined(separator: "|")
1260
+
1261
+ if seen.insert(key).inserted {
1262
+ result.append(snapshot)
1263
+ }
1264
+ }
1265
+
1266
+ return result
1267
+ }
1268
+
1269
+ private func isUsefulTreeSnapshot(_ snapshot: AsturElementSnapshot, viewport: CGRect) -> Bool {
1270
+ if !snapshot.visible || snapshot.bounds.width <= 0 || snapshot.bounds.height <= 0 {
1271
+ return false
1272
+ }
1273
+
1274
+ let hasName = snapshot.id != nil || snapshot.label != nil || snapshot.text != nil || snapshot.value != nil
1275
+ if !hasName && !isUsefulUnnamedTreeType(snapshot.type) {
1276
+ return false
1277
+ }
1278
+
1279
+ if snapshot.type == "XCUIElementTypeOther" {
1280
+ let coversViewport = snapshot.bounds.x <= 1
1281
+ && snapshot.bounds.y <= 1
1282
+ && snapshot.bounds.width >= max(1, Int(viewport.width.rounded()) - 2)
1283
+ && snapshot.bounds.height >= max(1, Int(viewport.height.rounded()) - 2)
1284
+ return hasName && !coversViewport
1285
+ }
1286
+
1287
+ return true
1288
+ }
1289
+
1290
+ private func isUsefulUnnamedTreeType(_ type: String) -> Bool {
1291
+ type == "XCUIElementTypeButton"
1292
+ || type == "XCUIElementTypeTextField"
1293
+ || type == "XCUIElementTypeSecureTextField"
1294
+ || type == "XCUIElementTypeTextView"
1295
+ || type == "XCUIElementTypeSwitch"
1296
+ || type == "XCUIElementTypeSlider"
1297
+ }
1298
+
1299
+ private func treeSnapshotPrecedes(_ left: AsturElementSnapshot, _ right: AsturElementSnapshot) -> Bool {
1300
+ if abs(left.bounds.y - right.bounds.y) > 4 {
1301
+ return left.bounds.y < right.bounds.y
1302
+ }
1303
+ if abs(left.bounds.x - right.bounds.x) > 4 {
1304
+ return left.bounds.x < right.bounds.x
1305
+ }
1306
+ return left.type < right.type
1307
+ }
1308
+
1309
+ private func snapshot(_ element: XCUIElement, includeChildren: Bool) -> AsturElementSnapshot {
1310
+ let children = includeChildren
1311
+ ? boundedElements(element.descendants(matching: .any), limit: maxSnapshotChildren).map {
1312
+ snapshot($0, includeChildren: false)
1313
+ }
1314
+ : []
1315
+
1316
+ return AsturElementSnapshot(
1317
+ id: element.identifier.nonEmpty,
1318
+ text: bestText(element),
1319
+ label: element.label.nonEmpty,
1320
+ value: snapshotValue(element),
1321
+ type: elementTypeName(element.elementType),
1322
+ enabled: element.isEnabled,
1323
+ visible: isVisible(element),
1324
+ selected: element.isSelected,
1325
+ focused: nil,
1326
+ bounds: bounds(element.frame),
1327
+ children: Array(children)
1328
+ )
1329
+ }
1330
+
1331
+ private func treeSnapshot(_ element: XCUIElement, viewport: CGRect) -> AsturElementSnapshot {
1332
+ let elementType = element.elementType
1333
+ let frame = element.frame
1334
+ let label = element.label.nonEmpty
1335
+ let value = lightweightSnapshotValue(element, type: elementType)
1336
+ let visible = !frame.isEmpty && (viewport.isEmpty || frame.intersects(viewport))
1337
+ let children = treeChildElements(element, type: elementType, frame: frame, viewport: viewport)
1338
+ .map { treeLeafSnapshot($0, viewport: viewport) }
1339
+ .filter { isUsefulTreeSnapshot($0, viewport: viewport) }
1340
+
1341
+ return AsturElementSnapshot(
1342
+ id: element.identifier.nonEmpty,
1343
+ text: label ?? value,
1344
+ label: label,
1345
+ value: value,
1346
+ type: elementTypeName(elementType),
1347
+ enabled: true,
1348
+ visible: visible,
1349
+ selected: nil,
1350
+ focused: nil,
1351
+ bounds: bounds(frame),
1352
+ children: deduplicateTreeSnapshots(children).sorted(by: treeSnapshotPrecedes)
1353
+ )
1354
+ }
1355
+
1356
+ private func treeLeafSnapshot(_ element: XCUIElement, viewport: CGRect) -> AsturElementSnapshot {
1357
+ let elementType = element.elementType
1358
+ let frame = element.frame
1359
+ let label = element.label.nonEmpty
1360
+ let value = lightweightSnapshotValue(element, type: elementType)
1361
+ let visible = !frame.isEmpty && (viewport.isEmpty || frame.intersects(viewport))
1362
+
1363
+ return AsturElementSnapshot(
1364
+ id: element.identifier.nonEmpty,
1365
+ text: label ?? value,
1366
+ label: label,
1367
+ value: value,
1368
+ type: elementTypeName(elementType),
1369
+ enabled: true,
1370
+ visible: visible,
1371
+ selected: nil,
1372
+ focused: nil,
1373
+ bounds: bounds(frame),
1374
+ children: []
1375
+ )
1376
+ }
1377
+
1378
+ private func treeChildElements(
1379
+ _ element: XCUIElement,
1380
+ type: XCUIElement.ElementType,
1381
+ frame: CGRect,
1382
+ viewport: CGRect
1383
+ ) -> [XCUIElement] {
1384
+ []
1385
+ }
1386
+
1387
+ private func shouldExpandTreeElement(
1388
+ type: XCUIElement.ElementType,
1389
+ frame: CGRect,
1390
+ viewport: CGRect,
1391
+ id: String?
1392
+ ) -> Bool {
1393
+ guard type == .other, id != nil, !frame.isEmpty, frame.intersects(viewport) else {
1394
+ return false
1395
+ }
1396
+
1397
+ let viewportArea = max(1, viewport.width * viewport.height)
1398
+ let elementArea = frame.width * frame.height
1399
+ return elementArea > 900 && elementArea < viewportArea * 0.75
1400
+ }
1401
+
1402
+ private func lightweightSnapshotValue(_ element: XCUIElement, type: XCUIElement.ElementType) -> String? {
1403
+ switch type {
1404
+ case .textField, .secureTextField, .textView, .slider, .switch:
1405
+ return stringValue(element.value)?.nonEmpty
1406
+ default:
1407
+ return nil
1408
+ }
1409
+ }
1410
+
1411
+ private func bestText(_ element: XCUIElement) -> String? {
1412
+ element.label.nonEmpty
1413
+ ?? stringValue(element.value)?.nonEmpty
1414
+ ?? element.placeholderValue?.nonEmpty
1415
+ }
1416
+
1417
+ private func textValues(_ element: XCUIElement) -> [String] {
1418
+ [
1419
+ element.label.nonEmpty,
1420
+ stringValue(element.value)?.nonEmpty,
1421
+ element.placeholderValue?.nonEmpty
1422
+ ].compactMap { $0 }
1423
+ }
1424
+
1425
+ private func snapshotValue(_ element: XCUIElement) -> String? {
1426
+ if let value = stringValue(element.value)?.nonEmpty {
1427
+ return value
1428
+ }
1429
+
1430
+ switch element.elementType {
1431
+ case .textField, .secureTextField, .textView:
1432
+ return bestText(element)
1433
+ default:
1434
+ return nil
1435
+ }
1436
+ }
1437
+
1438
+ private func bounds(_ frame: CGRect) -> AsturBounds {
1439
+ AsturBounds(
1440
+ x: Int(frame.minX.rounded()),
1441
+ y: Int(frame.minY.rounded()),
1442
+ width: max(0, Int(frame.width.rounded())),
1443
+ height: max(0, Int(frame.height.rounded()))
1444
+ )
1445
+ }
1446
+
1447
+ private func parseSelectorFromParams(_ params: [String: Any]) throws -> AsturSelector {
1448
+ try parseSelector(params.mapValue("selector"))
1449
+ }
1450
+
1451
+ private func parseSelectorsFromParams(_ params: [String: Any]) throws -> [AsturSelector] {
1452
+ guard let rawSelectors = params["selectors"] as? [[String: Any]] else {
1453
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "selectors is required and must be an array.")
1454
+ }
1455
+
1456
+ return try rawSelectors.map { try parseSelector($0) }
1457
+ }
1458
+
1459
+ private func parseSelector(_ raw: [String: Any]?) throws -> AsturSelector {
1460
+ guard let raw else {
1461
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "selector is required and must be an object.")
1462
+ }
1463
+
1464
+ return AsturSelector(
1465
+ strategy: try raw.requiredString("strategy"),
1466
+ value: try raw.requiredString("value"),
1467
+ exact: try raw.boolValue("exact") ?? true,
1468
+ name: raw["name"]
1469
+ )
1470
+ }
1471
+
1472
+ private func parseWaitOptions(_ raw: [String: Any]?) throws -> AsturWaitOptions {
1473
+ let state = try raw?.stringValue("state") ?? "attached"
1474
+
1475
+ if !["attached", "visible", "hidden"].contains(state) {
1476
+ throw AsturAgentFailure(
1477
+ code: "INVALID_PARAMS",
1478
+ message: "options.state must be attached, visible, or hidden.",
1479
+ details: ["state": state]
1480
+ )
1481
+ }
1482
+
1483
+ return AsturWaitOptions(
1484
+ timeoutMs: max(0, try raw?.intValue("timeout") ?? defaultWaitTimeoutMs),
1485
+ intervalMs: max(30, try raw?.intValue("interval") ?? defaultWaitIntervalMs),
1486
+ state: state
1487
+ )
1488
+ }
1489
+
1490
+ private func parseElementActionOptions(_ raw: [String: Any]?) throws -> AsturElementActionOptions {
1491
+ var wait = try parseWaitOptions(raw)
1492
+ if wait.state == "attached" {
1493
+ wait = AsturWaitOptions(timeoutMs: wait.timeoutMs, intervalMs: wait.intervalMs, state: "visible")
1494
+ }
1495
+
1496
+ let actionabilityRaw = raw?.mapValue("actionability")
1497
+ let actionability = AsturActionability(
1498
+ visible: try actionabilityRaw?.boolValue("visible") ?? true,
1499
+ enabled: try actionabilityRaw?.boolValue("enabled") ?? true,
1500
+ stable: try actionabilityRaw?.boolValue("stable") ?? false,
1501
+ hittable: try actionabilityRaw?.boolValue("hittable") ?? false
1502
+ )
1503
+
1504
+ return AsturElementActionOptions(
1505
+ wait: wait,
1506
+ actionability: actionability,
1507
+ keyboard: try raw?.stringValue("keyboard"),
1508
+ intervalMs: try raw?.intValue("intervalMs"),
1509
+ durationMs: try raw?.intValue("durationMs"),
1510
+ clear: try raw?.boolValue("clear"),
1511
+ textInputMode: try raw?.stringValue("textInputMode")
1512
+ )
1513
+ }
1514
+
1515
+ private func parseElementDragTarget(_ params: [String: Any]) throws -> AsturElementDragTarget {
1516
+ guard let target = params.mapValue("target") else {
1517
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "target is required for element.drag.")
1518
+ }
1519
+
1520
+ let selector = try target.mapValue("selector").map { try parseSelector($0) }
1521
+ let point = target["x"] != nil || target["y"] != nil
1522
+ ? AsturPoint(x: try target.requiredInt("x"), y: try target.requiredInt("y"))
1523
+ : nil
1524
+
1525
+ if selector == nil && point == nil {
1526
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "target must be either { x, y } or { selector }.")
1527
+ }
1528
+
1529
+ return AsturElementDragTarget(point: point, selector: selector)
1530
+ }
1531
+
1532
+ private func parseSwipeGesture(_ params: [String: Any]) throws -> AsturSwipeGesture {
1533
+ guard let gesture = params.mapValue("gesture") else {
1534
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "gesture is required and must be an object.")
1535
+ }
1536
+
1537
+ return AsturSwipeGesture(
1538
+ start: try parsePointFromMap(gesture, "start"),
1539
+ end: try parsePointFromMap(gesture, "end"),
1540
+ durationMs: max(50, try gesture.intValue("durationMs") ?? defaultSwipeMs)
1541
+ )
1542
+ }
1543
+
1544
+ private func parseDragGesture(_ params: [String: Any]) throws -> AsturDragGesture {
1545
+ guard let gesture = params.mapValue("gesture") else {
1546
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "gesture is required and must be an object.")
1547
+ }
1548
+
1549
+ return AsturDragGesture(
1550
+ start: try parsePointFromMap(gesture, "start"),
1551
+ end: try parsePointFromMap(gesture, "end"),
1552
+ durationMs: max(50, try gesture.intValue("durationMs") ?? defaultDragMs)
1553
+ )
1554
+ }
1555
+
1556
+ private func parsePointFromParams(_ params: [String: Any], _ key: String) throws -> AsturPoint {
1557
+ try parsePointFromMap(params, key)
1558
+ }
1559
+
1560
+ private func parsePointFromMap(_ raw: [String: Any], _ key: String) throws -> AsturPoint {
1561
+ guard let point = raw.mapValue(key) else {
1562
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "\(key) is required and must contain x and y.")
1563
+ }
1564
+
1565
+ return AsturPoint(
1566
+ x: try point.requiredInt("x"),
1567
+ y: try point.requiredInt("y")
1568
+ )
1569
+ }
1570
+
1571
+ private func ok(_ id: String, _ result: Any? = nil) -> AsturCommandResult {
1572
+ AsturCommandResult(id: id, ok: true, result: result, error: nil)
1573
+ }
1574
+
1575
+ private func error(_ id: String, _ code: String, _ message: String, _ details: Any? = nil) -> AsturCommandResult {
1576
+ AsturCommandResult(
1577
+ id: id,
1578
+ ok: false,
1579
+ result: nil,
1580
+ error: AsturCommandError(code: code, message: message, details: details)
1581
+ )
1582
+ }
1583
+ }
1584
+
1585
+ private let defaultWaitTimeoutMs = 10_000
1586
+ private let defaultWaitIntervalMs = 100 // 100ms (was 250ms) — faster element detection
1587
+ private let defaultLongPressMs = 500 // 500ms (was 800ms) — sufficient for iOS long-press
1588
+ private let defaultSwipeMs = 200 // 200ms (was 300ms) — faster swipe gestures
1589
+ private let defaultDragMs = 400 // 400ms (was 700ms) — faster drag gestures
1590
+ private let maxFullTreeNodes = 1_200 // cap for full-hierarchy snapshot tree payload
1591
+ private let maxTreeSnapshotChildren = 48
1592
+ private let maxTreeSnapshotOtherScan = 16
1593
+ private let maxTreeSnapshotOtherPerQuery = 6
1594
+ private let maxTreeSnapshotTextPerQuery = 12
1595
+ private let maxTreeSnapshotTextPerContainer = 4
1596
+ private let maxTreeSnapshotControlsPerQuery = 8
1597
+ private let maxTreeSnapshotTextViewPerQuery = 6
1598
+ private let maxTreeSnapshotConsecutiveMisses = 2
1599
+ private let maxSnapshotChildren = 2_000
1600
+ private let maxFindAllResults = 500
1601
+ private let maxFindManyResults = 500
1602
+
1603
+ private let supportedCapabilities = [
1604
+ "agent.ping",
1605
+ "app.launch",
1606
+ "app.terminate",
1607
+ "device.screenshot",
1608
+ "device.viewport",
1609
+ "device.setOrientation",
1610
+ "tree.get",
1611
+ "element.find",
1612
+ "element.findAll",
1613
+ "element.findMany",
1614
+ "element.wait",
1615
+ "element.tap",
1616
+ "element.doubleTap",
1617
+ "element.longPress",
1618
+ "element.fill",
1619
+ "element.drag",
1620
+ "gesture.tap",
1621
+ "gesture.doubleTap",
1622
+ "gesture.longPress",
1623
+ "gesture.swipe",
1624
+ "gesture.drag",
1625
+ "keyboard.state",
1626
+ "keyboard.dismiss"
1627
+ ]
1628
+
1629
+ private func elementTypeForRole(_ role: String) -> XCUIElement.ElementType? {
1630
+ switch role.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() {
1631
+ case "button":
1632
+ return .button
1633
+ case "checkbox":
1634
+ return .checkBox
1635
+ case "image", "img":
1636
+ return .image
1637
+ case "link":
1638
+ return .link
1639
+ case "menuitem":
1640
+ return .menuItem
1641
+ case "radio":
1642
+ return .radioButton
1643
+ case "slider":
1644
+ return .slider
1645
+ case "switch":
1646
+ return .switch
1647
+ case "tab":
1648
+ return .tabBar
1649
+ case "text":
1650
+ return .staticText
1651
+ case "textbox":
1652
+ return .textField
1653
+ default:
1654
+ return nil
1655
+ }
1656
+ }
1657
+
1658
+ private func elementTypeFromString(_ value: String) -> XCUIElement.ElementType? {
1659
+ switch value
1660
+ .replacingOccurrences(of: "XCUIElementType", with: "")
1661
+ .trimmingCharacters(in: .whitespacesAndNewlines)
1662
+ .lowercased() {
1663
+ case "any":
1664
+ return .any
1665
+ case "application":
1666
+ return .application
1667
+ case "button":
1668
+ return .button
1669
+ case "cell":
1670
+ return .cell
1671
+ case "checkbox", "check box":
1672
+ return .checkBox
1673
+ case "collectionview", "collection view":
1674
+ return .collectionView
1675
+ case "image":
1676
+ return .image
1677
+ case "link":
1678
+ return .link
1679
+ case "menuitem", "menu item":
1680
+ return .menuItem
1681
+ case "navigationbar", "navigation bar":
1682
+ return .navigationBar
1683
+ case "other":
1684
+ return .other
1685
+ case "pageindicator", "page indicator":
1686
+ return .pageIndicator
1687
+ case "radiobutton", "radio button":
1688
+ return .radioButton
1689
+ case "scrollview", "scroll view":
1690
+ return .scrollView
1691
+ case "securetextfield", "secure text field":
1692
+ return .secureTextField
1693
+ case "slider":
1694
+ return .slider
1695
+ case "statictext", "static text", "text":
1696
+ return .staticText
1697
+ case "switch":
1698
+ return .switch
1699
+ case "tabbar", "tab bar":
1700
+ return .tabBar
1701
+ case "table":
1702
+ return .table
1703
+ case "textfield", "text field", "textbox":
1704
+ return .textField
1705
+ case "textview", "text view":
1706
+ return .textView
1707
+ case "webview", "web view":
1708
+ return .webView
1709
+ default:
1710
+ return nil
1711
+ }
1712
+ }
1713
+
1714
+ private func elementTypeName(_ type: XCUIElement.ElementType) -> String {
1715
+ "XCUIElementType\(shortElementTypeName(type))"
1716
+ }
1717
+
1718
+ private func shortElementTypeName(_ type: XCUIElement.ElementType) -> String {
1719
+ switch type {
1720
+ case .any:
1721
+ return "Any"
1722
+ case .application:
1723
+ return "Application"
1724
+ case .button:
1725
+ return "Button"
1726
+ case .cell:
1727
+ return "Cell"
1728
+ case .checkBox:
1729
+ return "CheckBox"
1730
+ case .collectionView:
1731
+ return "CollectionView"
1732
+ case .image:
1733
+ return "Image"
1734
+ case .link:
1735
+ return "Link"
1736
+ case .menuItem:
1737
+ return "MenuItem"
1738
+ case .navigationBar:
1739
+ return "NavigationBar"
1740
+ case .other:
1741
+ return "Other"
1742
+ case .pageIndicator:
1743
+ return "PageIndicator"
1744
+ case .radioButton:
1745
+ return "RadioButton"
1746
+ case .scrollView:
1747
+ return "ScrollView"
1748
+ case .secureTextField:
1749
+ return "SecureTextField"
1750
+ case .slider:
1751
+ return "Slider"
1752
+ case .staticText:
1753
+ return "StaticText"
1754
+ case .switch:
1755
+ return "Switch"
1756
+ case .tabBar:
1757
+ return "TabBar"
1758
+ case .table:
1759
+ return "Table"
1760
+ case .textField:
1761
+ return "TextField"
1762
+ case .textView:
1763
+ return "TextView"
1764
+ case .webView:
1765
+ return "WebView"
1766
+ default:
1767
+ return String(describing: type)
1768
+ }
1769
+ }
1770
+
1771
+ private func match(_ actual: String?, _ expected: String, exact: Bool) -> Bool {
1772
+ guard let actual, !actual.isEmpty else {
1773
+ return false
1774
+ }
1775
+
1776
+ return exact ? actual == expected : actual.localizedCaseInsensitiveContains(expected)
1777
+ }
1778
+
1779
+ private func stringValue(_ value: Any?) -> String? {
1780
+ guard let value else {
1781
+ return nil
1782
+ }
1783
+
1784
+ if let string = value as? String {
1785
+ return string
1786
+ }
1787
+
1788
+ return String(describing: value)
1789
+ }
1790
+
1791
+ private struct AsturAgentFailure: Error {
1792
+ let code: String
1793
+ let message: String
1794
+ let details: Any?
1795
+
1796
+ init(code: String, message: String, details: Any? = nil) {
1797
+ self.code = code
1798
+ self.message = message
1799
+ self.details = details
1800
+ }
1801
+ }
1802
+
1803
+ private extension String {
1804
+ var nonEmpty: String? {
1805
+ isEmpty ? nil : self
1806
+ }
1807
+ }
1808
+
1809
+ private extension Dictionary where Key == String, Value == Any {
1810
+ func mapValue(_ key: String) -> [String: Any]? {
1811
+ self[key] as? [String: Any]
1812
+ }
1813
+
1814
+ func requiredString(_ key: String) throws -> String {
1815
+ guard let value = self[key] as? String else {
1816
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "\(key) is required and must be a string.")
1817
+ }
1818
+ return value
1819
+ }
1820
+
1821
+ func stringValue(_ key: String) throws -> String? {
1822
+ guard let value = self[key], !(value is NSNull) else {
1823
+ return nil
1824
+ }
1825
+
1826
+ guard let string = value as? String else {
1827
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "\(key) must be a string.")
1828
+ }
1829
+
1830
+ return string
1831
+ }
1832
+
1833
+ func requiredInt(_ key: String) throws -> Int {
1834
+ guard let value = try intValue(key) else {
1835
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "\(key) is required and must be numeric.")
1836
+ }
1837
+
1838
+ return value
1839
+ }
1840
+
1841
+ func intValue(_ key: String) throws -> Int? {
1842
+ guard let value = self[key], !(value is NSNull) else {
1843
+ return nil
1844
+ }
1845
+
1846
+ if let int = value as? Int {
1847
+ return int
1848
+ }
1849
+
1850
+ if let number = value as? NSNumber {
1851
+ return number.intValue
1852
+ }
1853
+
1854
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "\(key) must be numeric.")
1855
+ }
1856
+
1857
+ func boolValue(_ key: String) throws -> Bool? {
1858
+ guard let value = self[key], !(value is NSNull) else {
1859
+ return nil
1860
+ }
1861
+
1862
+ if let bool = value as? Bool {
1863
+ return bool
1864
+ }
1865
+
1866
+ if let number = value as? NSNumber {
1867
+ return number.boolValue
1868
+ }
1869
+
1870
+ throw AsturAgentFailure(code: "INVALID_PARAMS", message: "\(key) must be a boolean.")
1871
+ }
1872
+ }