mcp-baepsae 5.1.0 → 6.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README-KR.md +98 -33
  2. package/README.md +101 -35
  3. package/bundled/baepsae-native +0 -0
  4. package/dist/backend.d.ts +26 -0
  5. package/dist/backend.d.ts.map +1 -0
  6. package/dist/backend.js +79 -0
  7. package/dist/backend.js.map +1 -0
  8. package/dist/index.js +3 -1
  9. package/dist/index.js.map +1 -1
  10. package/dist/tool-manifest.d.ts +12 -0
  11. package/dist/tool-manifest.d.ts.map +1 -0
  12. package/dist/tool-manifest.js +79 -0
  13. package/dist/tool-manifest.js.map +1 -0
  14. package/dist/tools/info.d.ts.map +1 -1
  15. package/dist/tools/info.js +104 -5
  16. package/dist/tools/info.js.map +1 -1
  17. package/dist/tools/input.js +7 -6
  18. package/dist/tools/input.js.map +1 -1
  19. package/dist/tools/media.d.ts.map +1 -1
  20. package/dist/tools/media.js +137 -11
  21. package/dist/tools/media.js.map +1 -1
  22. package/dist/tools/simulator.js +7 -7
  23. package/dist/tools/simulator.js.map +1 -1
  24. package/dist/tools/system.d.ts.map +1 -1
  25. package/dist/tools/system.js +2 -2
  26. package/dist/tools/system.js.map +1 -1
  27. package/dist/tools/ui.d.ts.map +1 -1
  28. package/dist/tools/ui.js +126 -8
  29. package/dist/tools/ui.js.map +1 -1
  30. package/dist/tools/workflow.d.ts +3 -0
  31. package/dist/tools/workflow.d.ts.map +1 -0
  32. package/dist/tools/workflow.js +434 -0
  33. package/dist/tools/workflow.js.map +1 -0
  34. package/dist/types.d.ts +15 -0
  35. package/dist/types.d.ts.map +1 -1
  36. package/dist/utils.d.ts +19 -3
  37. package/dist/utils.d.ts.map +1 -1
  38. package/dist/utils.js +110 -5
  39. package/dist/utils.js.map +1 -1
  40. package/dist/version.d.ts +1 -1
  41. package/dist/version.js +1 -1
  42. package/native/Sources/Commands/InputCommands.swift +53 -33
  43. package/native/Sources/Commands/SystemCommands.swift +86 -0
  44. package/native/Sources/Commands/UICommands.swift +254 -35
  45. package/native/Sources/Commands/WindowCommands.swift +11 -4
  46. package/native/Sources/IndigoHID/IndigoHIDClient.swift +222 -0
  47. package/native/Sources/IndigoHID/IndigoHIDCoordinates.swift +74 -0
  48. package/native/Sources/IndigoHID/IndigoHIDEvents.swift +63 -0
  49. package/native/Sources/IndigoHID/IndigoHIDLoader.swift +102 -0
  50. package/native/Sources/IndigoHID/IndigoHIDTypes.swift +41 -0
  51. package/native/Sources/Types.swift +26 -0
  52. package/native/Sources/Utils.swift +653 -13
  53. package/native/Sources/Version.swift +1 -1
  54. package/native/Sources/main.swift +55 -8
  55. package/native/Tests/BaepsaeNativeTests/BinaryInvocationTests.swift +54 -6
  56. package/package.json +12 -3
  57. package/scripts/dump-tabbar-actions.mjs +312 -0
  58. package/scripts/generate-tool-manifest.mjs +75 -0
  59. package/scripts/research-coordinate-calibration.mjs +276 -0
  60. package/scripts/research-input-channels.mjs +327 -0
  61. package/scripts/research-tap-tab-grid.mjs +271 -0
  62. package/scripts/verify-media-capture.mjs +99 -0
@@ -15,6 +15,7 @@ let supportedCommands: Set<String> = [
15
15
  "describe-ui",
16
16
  "search-ui",
17
17
  "tap",
18
+ "tap-tab",
18
19
  "type",
19
20
  "swipe",
20
21
  "button",
@@ -25,6 +26,7 @@ let supportedCommands: Set<String> = [
25
26
  "gesture",
26
27
  "stream-video",
27
28
  "list-apps",
29
+ "doctor",
28
30
  "list-windows",
29
31
  "activate-app",
30
32
  "screenshot-app",
@@ -52,6 +54,11 @@ func parse(arguments: [String]) throws -> ParsedOptions {
52
54
  var flags: Set<String> = []
53
55
  var positionals: [String] = []
54
56
 
57
+ func looksLikeNegativeNumber(_ value: String) -> Bool {
58
+ let pattern = #"^-\d+(\.\d+)?$"#
59
+ return value.range(of: pattern, options: .regularExpression) != nil
60
+ }
61
+
55
62
  var index = 1
56
63
  while index < arguments.count {
57
64
  let item = arguments[index]
@@ -60,14 +67,14 @@ func parse(arguments: [String]) throws -> ParsedOptions {
60
67
  let key = String(item[..<separator])
61
68
  let value = String(item[item.index(after: separator)...])
62
69
  options[key] = value
63
- } else if index + 1 < arguments.count, !arguments[index + 1].hasPrefix("-") {
70
+ } else if index + 1 < arguments.count, (!arguments[index + 1].hasPrefix("-") || looksLikeNegativeNumber(arguments[index + 1])) {
64
71
  options[item] = arguments[index + 1]
65
72
  index += 1
66
73
  } else {
67
74
  flags.insert(item)
68
75
  }
69
76
  } else if item.hasPrefix("-") {
70
- if index + 1 < arguments.count, !arguments[index + 1].hasPrefix("-") {
77
+ if index + 1 < arguments.count, (!arguments[index + 1].hasPrefix("-") || looksLikeNegativeNumber(arguments[index + 1])) {
71
78
  options[item] = arguments[index + 1]
72
79
  index += 1
73
80
  } else {
@@ -132,10 +139,27 @@ func readFileText(_ path: String) throws -> String {
132
139
  // MARK: - Process Execution
133
140
 
134
141
  @discardableResult
135
- func runProcess(_ command: String, _ arguments: [String]) throws -> Int32 {
142
+ func runProcess(_ command: String, _ arguments: [String], stdinText: String? = nil) throws -> Int32 {
136
143
  let process = Process()
137
144
  process.executableURL = URL(fileURLWithPath: command)
138
145
  process.arguments = arguments
146
+ if let stdinText {
147
+ let pipe = Pipe()
148
+ process.standardInput = pipe
149
+ process.standardOutput = FileHandle.standardOutput
150
+ process.standardError = FileHandle.standardError
151
+ do {
152
+ try process.run()
153
+ } catch {
154
+ throw NativeError.commandFailed("Failed to launch process: \(command) \(arguments.joined(separator: " "))")
155
+ }
156
+ if let data = stdinText.data(using: .utf8) {
157
+ pipe.fileHandleForWriting.write(data)
158
+ }
159
+ pipe.fileHandleForWriting.closeFile()
160
+ process.waitUntilExit()
161
+ return process.terminationStatus
162
+ }
139
163
  process.standardInput = FileHandle.standardInput
140
164
  process.standardOutput = FileHandle.standardOutput
141
165
  process.standardError = FileHandle.standardError
@@ -235,6 +259,11 @@ func requireSimulatorUdid(_ target: TargetApp) throws -> String {
235
259
  return udid
236
260
  }
237
261
 
262
+ func simulatorUdid(from target: TargetApp) -> String? {
263
+ guard case .simulator(let udid) = target else { return nil }
264
+ return udid
265
+ }
266
+
238
267
  // MARK: - Accessibility Helpers
239
268
 
240
269
  func ensureAccessibilityTrusted() throws {
@@ -276,6 +305,24 @@ func simulatorAccessibilityRootElement() throws -> UIElement {
276
305
  return appElement
277
306
  }
278
307
 
308
+ func shellCaptureCommand(_ command: String, _ arguments: [String]) -> String? {
309
+ let process = Process()
310
+ process.executableURL = URL(fileURLWithPath: command)
311
+ process.arguments = arguments
312
+ let pipe = Pipe()
313
+ process.standardOutput = pipe
314
+ process.standardError = FileHandle.nullDevice
315
+ do {
316
+ try process.run()
317
+ process.waitUntilExit()
318
+ guard process.terminationStatus == 0 else { return nil }
319
+ let data = pipe.fileHandleForReading.readDataToEndOfFile()
320
+ return String(data: data, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines)
321
+ } catch {
322
+ return nil
323
+ }
324
+ }
325
+
279
326
  func CopyAttributeValue(_ element: UIElement, _ attribute: CFString) -> CFTypeRef? {
280
327
  var value: CFTypeRef?
281
328
  let status = AXUIElementCopyAttributeValue(element, attribute, &value)
@@ -410,6 +457,24 @@ func IdentifierAttribute(_ element: UIElement) -> String? {
410
457
  return StringAttribute(element, "AXIdentifier" as CFString)
411
458
  }
412
459
 
460
+ func performPrimaryAction(on element: UIElement) throws {
461
+ let actions = ActionNames(element)
462
+ if actions.contains(kAXPressAction as String) {
463
+ let status = AXUIElementPerformAction(element, kAXPressAction as CFString)
464
+ if status != .success {
465
+ throw NativeError.commandFailed("Matched accessibility element but AXPress failed with status \(status.rawValue).")
466
+ }
467
+ return
468
+ }
469
+
470
+ if let frame = FrameAttribute(element) {
471
+ sendClick(at: CGPoint(x: frame.midX, y: frame.midY))
472
+ return
473
+ }
474
+
475
+ throw NativeError.commandFailed("Matched accessibility element has no AXPress action or frame for fallback click.")
476
+ }
477
+
413
478
  // MARK: - Text Helpers
414
479
 
415
480
  func normalizeText(_ value: String) -> String {
@@ -516,6 +581,98 @@ func describeAccessibilityElement(_ element: UIElement, includeEmpty: Bool = tru
516
581
  return parts.joined(separator: " ")
517
582
  }
518
583
 
584
+ func actionableTabBarItems(in tabBar: UIElement, maxDepth: Int = 2) -> [UIElement] {
585
+ var stack: [(element: UIElement, depth: Int)] = Children(tabBar).map { ($0, 1) }.reversed()
586
+ var matches: [UIElement] = []
587
+
588
+ while let current = stack.popLast() {
589
+ let role = StringAttribute(current.element, kAXRoleAttribute as CFString) ?? ""
590
+ let actions = ActionNames(current.element)
591
+ let hasFrame = FrameAttribute(current.element) != nil
592
+ let isLikelyTabItem =
593
+ role == "AXButton" ||
594
+ role == "AXRadioButton" ||
595
+ role == "AXCheckBox" ||
596
+ actions.contains(kAXPressAction as String)
597
+
598
+ if isLikelyTabItem && hasFrame {
599
+ if !matches.contains(where: { elementsAreEqual($0, current.element) }) {
600
+ matches.append(current.element)
601
+ }
602
+ }
603
+
604
+ if current.depth < maxDepth {
605
+ for child in Children(current.element).reversed() {
606
+ stack.append((child, current.depth + 1))
607
+ }
608
+ }
609
+ }
610
+
611
+ return matches.sorted {
612
+ (FrameAttribute($0)?.midX ?? 0) < (FrameAttribute($1)?.midX ?? 0)
613
+ }
614
+ }
615
+
616
+ func semanticProxyTabButtons(in contentRoot: UIElement, excluding excludedElement: UIElement? = nil, expectedCount: Int) -> [UIElement] {
617
+ guard expectedCount > 0 else { return [] }
618
+ guard let contentFrame = FrameAttribute(contentRoot) else { return [] }
619
+ let excludedFrame = excludedElement.flatMap(FrameAttribute)
620
+
621
+ let directChildren = Children(contentRoot)
622
+ let candidates = directChildren.filter { child in
623
+ let role = StringAttribute(child, kAXRoleAttribute as CFString) ?? ""
624
+ let actions = ActionNames(child)
625
+ guard let frame = FrameAttribute(child) else { return false }
626
+ if let excludedFrame, excludedFrame.contains(frame) {
627
+ return false
628
+ }
629
+ guard frame.midY < contentFrame.origin.y + contentFrame.height * 0.55 else {
630
+ return false
631
+ }
632
+ guard frame.width < contentFrame.width * 0.8 else {
633
+ return false
634
+ }
635
+ return role == "AXButton" || actions.contains(kAXPressAction as String)
636
+ }
637
+
638
+ guard !candidates.isEmpty else { return [] }
639
+
640
+ struct RowGroup {
641
+ var meanY: CGFloat
642
+ var elements: [UIElement]
643
+ }
644
+
645
+ var rows: [RowGroup] = []
646
+ let tolerance: CGFloat = 24
647
+ for element in candidates.sorted(by: { (FrameAttribute($0)?.midY ?? 0) < (FrameAttribute($1)?.midY ?? 0) }) {
648
+ let midY = FrameAttribute(element)?.midY ?? 0
649
+ if let rowIndex = rows.firstIndex(where: { abs($0.meanY - midY) <= tolerance }) {
650
+ rows[rowIndex].elements.append(element)
651
+ let count = CGFloat(rows[rowIndex].elements.count)
652
+ rows[rowIndex].meanY = ((rows[rowIndex].meanY * (count - 1)) + midY) / count
653
+ } else {
654
+ rows.append(RowGroup(meanY: midY, elements: [element]))
655
+ }
656
+ }
657
+
658
+ guard let bestRow = rows.sorted(by: {
659
+ if $0.elements.count == $1.elements.count {
660
+ return $0.meanY < $1.meanY
661
+ }
662
+ return $0.elements.count > $1.elements.count
663
+ }).first else {
664
+ return []
665
+ }
666
+
667
+ guard bestRow.elements.count == expectedCount else {
668
+ return []
669
+ }
670
+
671
+ return bestRow.elements.sorted {
672
+ (FrameAttribute($0)?.midX ?? 0) < (FrameAttribute($1)?.midX ?? 0)
673
+ }
674
+ }
675
+
519
676
  func describeAccessibilityTree(from root: UIElement, options: DescribeOptions = DescribeOptions()) -> [String] {
520
677
  var lines: [String] = []
521
678
  var stack: [(element: UIElement, depth: Int)] = [(root, 0)]
@@ -590,6 +747,20 @@ func describeAccessibilityTree(from root: UIElement, options: DescribeOptions =
590
747
  lines.append("\(prefix)- [hidden: no accessible content]")
591
748
  }
592
749
  emitted += 1
750
+ // Add hint for tab bar elements with unlabeled children
751
+ let attrs2 = copyMultipleAttributes(element, [kAXRoleAttribute as String])
752
+ if let roleRef = attrs2[kAXRoleAttribute as String], let role = stringFromCFTypeRef(roleRef),
753
+ role == "AXTabGroup" || role == "AXRadioGroup" {
754
+ let tabChildren = Children(element)
755
+ let unlabeledCount = tabChildren.filter { child in
756
+ let childTexts = getElementTextValues(child)
757
+ return childTexts.isEmpty
758
+ }.count
759
+ if unlabeledCount > 0 && unlabeledCount == tabChildren.count {
760
+ let hint = "\(prefix) [Tab bar with \(tabChildren.count) unlabeled items - use tap_tab with index 0..\(tabChildren.count - 1)]"
761
+ lines.append(hint)
762
+ }
763
+ }
593
764
  }
594
765
  }
595
766
 
@@ -701,6 +872,21 @@ func findAccessibilityElement(
701
872
  return nil
702
873
  }
703
874
 
875
+ func findAccessibilityElement(
876
+ in roots: [UIElement],
877
+ identifier: String?,
878
+ label: String?,
879
+ maxDepth: Int = Int.max,
880
+ maxNodes: Int = Int.max
881
+ ) -> UIElement? {
882
+ for root in roots {
883
+ if let match = findAccessibilityElement(in: root, identifier: identifier, label: label, maxDepth: maxDepth, maxNodes: maxNodes) {
884
+ return match
885
+ }
886
+ }
887
+ return nil
888
+ }
889
+
704
890
  func searchAccessibilityElements(in root: UIElement, query: String, options: SearchOptions = SearchOptions()) -> [String] {
705
891
  var results: [String] = []
706
892
  var stack: [(element: UIElement, depth: Int)] = [(root, 0)]
@@ -789,6 +975,14 @@ func searchAccessibilityElements(in root: UIElement, query: String, options: Sea
789
975
  return results
790
976
  }
791
977
 
978
+ func searchAccessibilityElements(in roots: [UIElement], query: String, options: SearchOptions = SearchOptions()) -> [String] {
979
+ var results: [String] = []
980
+ for root in roots {
981
+ results.append(contentsOf: searchAccessibilityElements(in: root, query: query, options: options))
982
+ }
983
+ return results
984
+ }
985
+
792
986
  func findElementBySubrole(from root: UIElement, subrole: String) -> UIElement? {
793
987
  var stack: [UIElement] = [root]
794
988
  var visited = 0
@@ -808,16 +1002,328 @@ func findElementBySubrole(from root: UIElement, subrole: String) -> UIElement? {
808
1002
  return nil
809
1003
  }
810
1004
 
811
- func simulatorContentRootElement(from appRoot: UIElement) -> UIElement? {
1005
+ func simulatorDeviceName(for udid: String?) -> String? {
1006
+ guard let udid, !udid.isEmpty else { return nil }
1007
+ return shellCaptureCommand("/usr/bin/xcrun", ["simctl", "getenv", udid, "SIMULATOR_DEVICE_NAME"])
1008
+ }
1009
+
1010
+ func simulatorWindowTitle(_ element: UIElement) -> String? {
1011
+ return StringAttribute(element, kAXTitleAttribute as CFString)
1012
+ ?? StringAttribute(element, kAXDescriptionAttribute as CFString)
1013
+ ?? StringAttribute(element, kAXValueAttribute as CFString)
1014
+ }
1015
+
1016
+ func simulatorWindowElement(from appRoot: UIElement, udid: String? = nil) -> UIElement? {
1017
+ let windows = Children(appRoot).filter { element in
1018
+ let attrs = copyMultipleAttributes(element, [kAXRoleAttribute as String])
1019
+ if let ref = attrs[kAXRoleAttribute as String], let role = stringFromCFTypeRef(ref) {
1020
+ return role == "AXWindow"
1021
+ }
1022
+ return false
1023
+ }
1024
+
1025
+ guard !windows.isEmpty else { return nil }
1026
+
1027
+ let normalizedDeviceName = simulatorDeviceName(for: udid).map(normalizeText)
1028
+ let preferredWindows: [UIElement]
1029
+ if let normalizedDeviceName {
1030
+ let matched = windows.filter { window in
1031
+ guard let title = simulatorWindowTitle(window) else { return false }
1032
+ return normalizeText(title).contains(normalizedDeviceName)
1033
+ }
1034
+ preferredWindows = matched.isEmpty ? windows : matched
1035
+ } else {
1036
+ preferredWindows = windows
1037
+ }
1038
+
1039
+ var bestWindow: UIElement?
1040
+ var bestArea: CGFloat = 0
1041
+ for window in preferredWindows {
1042
+ let area = FrameAttribute(window).map { $0.width * $0.height } ?? 0
1043
+ if bestWindow == nil || area > bestArea {
1044
+ bestWindow = window
1045
+ bestArea = area
1046
+ }
1047
+ }
1048
+ return bestWindow
1049
+ }
1050
+
1051
+ func simulatorContentRootElement(from appRoot: UIElement, udid: String? = nil) -> UIElement? {
1052
+ if let scopedWindow = simulatorWindowElement(from: appRoot, udid: udid),
1053
+ let scopedContentRoot = findElementBySubrole(from: scopedWindow, subrole: "iOSContentGroup") {
1054
+ return scopedContentRoot
1055
+ }
812
1056
  return findElementBySubrole(from: appRoot, subrole: "iOSContentGroup")
813
1057
  }
814
1058
 
1059
+ struct SimulatorAuxiliaryContainerCandidate {
1060
+ let element: UIElement
1061
+ let label: String
1062
+ }
1063
+
1064
+ func elementsAreEqual(_ lhs: UIElement, _ rhs: UIElement) -> Bool {
1065
+ CFEqual(lhs, rhs)
1066
+ }
1067
+
1068
+ func collectElements(
1069
+ in root: UIElement,
1070
+ matching predicate: (UIElement, Int) -> Bool,
1071
+ maxVisited: Int = 500,
1072
+ maxMatches: Int = 8
1073
+ ) -> [UIElement] {
1074
+ var stack: [(element: UIElement, depth: Int)] = [(root, 0)]
1075
+ var visited = 0
1076
+ var matches: [UIElement] = []
1077
+
1078
+ while let current = stack.popLast() {
1079
+ if visited >= maxVisited || matches.count >= maxMatches {
1080
+ break
1081
+ }
1082
+ visited += 1
1083
+
1084
+ if predicate(current.element, current.depth) {
1085
+ matches.append(current.element)
1086
+ }
1087
+
1088
+ for child in Children(current.element).reversed() {
1089
+ stack.append((child, current.depth + 1))
1090
+ }
1091
+ }
1092
+
1093
+ return matches
1094
+ }
1095
+
1096
+ func collectElementsByRole(in root: UIElement, role: String, maxMatches: Int = 4) -> [UIElement] {
1097
+ collectElements(
1098
+ in: root,
1099
+ matching: { element, _ in
1100
+ let attrs = copyMultipleAttributes(element, [kAXRoleAttribute as String])
1101
+ if let ref = attrs[kAXRoleAttribute as String], let value = stringFromCFTypeRef(ref) {
1102
+ return value == role
1103
+ }
1104
+ return false
1105
+ },
1106
+ maxMatches: maxMatches
1107
+ )
1108
+ }
1109
+
1110
+ func collectWideAuxiliaryGroups(in root: UIElement, contentRootFrame: CGRect? = nil, maxMatches: Int = 4) -> [UIElement] {
1111
+ guard let mainScreen = NSScreen.main else { return [] }
1112
+ let screenWidth = mainScreen.frame.width
1113
+ let screenHeight = mainScreen.frame.height
1114
+ let topThreshold = screenHeight * 0.20
1115
+ let bottomThreshold = screenHeight * 0.20
1116
+
1117
+ return collectElements(
1118
+ in: root,
1119
+ matching: { element, _ in
1120
+ let attrs = copyMultipleAttributes(element, [kAXRoleAttribute as String, "AXFrame"])
1121
+ guard let ref = attrs[kAXRoleAttribute as String], let role = stringFromCFTypeRef(ref), role == "AXGroup" else {
1122
+ return false
1123
+ }
1124
+ guard let frameRef = attrs["AXFrame"], let frame = frameFromCFTypeRef(frameRef) else {
1125
+ return false
1126
+ }
1127
+ guard frame.width > screenWidth * 0.6 else {
1128
+ return false
1129
+ }
1130
+ guard frame.origin.y < topThreshold || frame.origin.y > screenHeight - bottomThreshold else {
1131
+ return false
1132
+ }
1133
+ if let contentRootFrame, contentRootFrame.contains(frame) {
1134
+ return false
1135
+ }
1136
+ return Children(element).count >= 2
1137
+ },
1138
+ maxMatches: maxMatches
1139
+ )
1140
+ }
1141
+
1142
+ func simulatorAuxiliaryContainerCandidates(from appRoot: UIElement, excluding contentRoot: UIElement? = nil, udid: String? = nil) -> [SimulatorAuxiliaryContainerCandidate] {
1143
+ let scopeRoot = simulatorWindowElement(from: appRoot, udid: udid) ?? appRoot
1144
+ let contentRootFrame = contentRoot.flatMap(FrameAttribute)
1145
+ let roleCandidates: [(role: String, label: String)] = [
1146
+ ("AXTabGroup", "tab bar"),
1147
+ ("AXRadioGroup", "radio group"),
1148
+ ("AXSegmentedControl", "segmented control"),
1149
+ ("AXToolbar", "toolbar"),
1150
+ ]
1151
+
1152
+ var candidates: [SimulatorAuxiliaryContainerCandidate] = []
1153
+
1154
+ func appendCandidate(_ element: UIElement, label: String) {
1155
+ if let contentRoot, elementsAreEqual(element, contentRoot) {
1156
+ return
1157
+ }
1158
+ if let candidateFrame = FrameAttribute(element), let contentRootFrame, contentRootFrame.contains(candidateFrame) {
1159
+ return
1160
+ }
1161
+ if candidates.contains(where: { elementsAreEqual($0.element, element) }) {
1162
+ return
1163
+ }
1164
+ candidates.append(SimulatorAuxiliaryContainerCandidate(element: element, label: label))
1165
+ }
1166
+
1167
+ for roleCandidate in roleCandidates {
1168
+ for element in collectElementsByRole(in: scopeRoot, role: roleCandidate.role) {
1169
+ appendCandidate(element, label: roleCandidate.label)
1170
+ }
1171
+ }
1172
+
1173
+ for element in collectWideAuxiliaryGroups(in: scopeRoot, contentRootFrame: contentRootFrame) {
1174
+ appendCandidate(element, label: "auxiliary group")
1175
+ }
1176
+
1177
+ return candidates
1178
+ }
1179
+
1180
+ func simulatorAuxiliaryContainerLabels(from appRoot: UIElement, excluding contentRoot: UIElement? = nil, udid: String? = nil) -> [String] {
1181
+ simulatorAuxiliaryContainerCandidates(from: appRoot, excluding: contentRoot, udid: udid).map(\.label)
1182
+ }
1183
+
1184
+ func formatSimulatorAuxiliaryContainerHint(_ labels: [String]) -> String? {
1185
+ var seen: Set<String> = []
1186
+ let uniqueLabels = labels.filter { label in
1187
+ let normalized = label.trimmingCharacters(in: .whitespacesAndNewlines)
1188
+ guard !normalized.isEmpty else { return false }
1189
+ return seen.insert(normalized).inserted
1190
+ }
1191
+ guard !uniqueLabels.isEmpty else {
1192
+ return nil
1193
+ }
1194
+ let containerList = uniqueLabels.joined(separator: ", ")
1195
+ return "[Hint] Simulator auxiliary containers outside iOSContentGroup: \(containerList). Use --all to inspect Simulator chrome UI."
1196
+ }
1197
+
1198
+ func simulatorSelectorNotFoundMessage(selectorText: String, auxiliaryLabels: [String]) -> String {
1199
+ if let hint = formatSimulatorAuxiliaryContainerHint(auxiliaryLabels) {
1200
+ return "No accessibility element matched \(selectorText) in simulator app content or auxiliary containers. \(hint)"
1201
+ }
1202
+ return "No accessibility element matched \(selectorText) in simulator app content. Try --all to include Simulator chrome UI."
1203
+ }
1204
+
1205
+ func findTabBarElement(in root: UIElement, simulatorUdid: String? = nil) -> UIElement? {
1206
+ // 1st pass: Look for AXTabGroup
1207
+ var stack: [UIElement] = [root]
1208
+ var visited = 0
1209
+ while let current = stack.popLast() {
1210
+ if visited > 500 { break }
1211
+ visited += 1
1212
+
1213
+ let attrs = copyMultipleAttributes(current, [kAXRoleAttribute as String])
1214
+ if let ref = attrs[kAXRoleAttribute as String], let role = stringFromCFTypeRef(ref), role == "AXTabGroup" {
1215
+ return current
1216
+ }
1217
+ for child in Children(current).reversed() {
1218
+ stack.append(child)
1219
+ }
1220
+ }
1221
+
1222
+ // 2nd pass: Look for AXRadioGroup
1223
+ stack = [root]
1224
+ visited = 0
1225
+ while let current = stack.popLast() {
1226
+ if visited > 500 { break }
1227
+ visited += 1
1228
+
1229
+ let attrs = copyMultipleAttributes(current, [kAXRoleAttribute as String])
1230
+ if let ref = attrs[kAXRoleAttribute as String], let role = stringFromCFTypeRef(ref), role == "AXRadioGroup" {
1231
+ return current
1232
+ }
1233
+ for child in Children(current).reversed() {
1234
+ stack.append(child)
1235
+ }
1236
+ }
1237
+
1238
+ // 3rd pass: Simulator-specific heuristic — look for a wide bottom group
1239
+ // inside iOSContentGroup. SwiftUI TabView on Simulator frequently exposes
1240
+ // the tab bar as AXGroup text="Tab Bar" rather than AXTabGroup.
1241
+ if let contentRoot = simulatorContentRootElement(from: root, udid: simulatorUdid),
1242
+ let contentFrame = FrameAttribute(contentRoot) {
1243
+ let bottomThresholdY = contentFrame.origin.y + contentFrame.height * 0.65
1244
+ stack = [contentRoot]
1245
+ visited = 0
1246
+ while let current = stack.popLast() {
1247
+ if visited > 800 { break }
1248
+ visited += 1
1249
+
1250
+ let attrs = copyMultipleAttributes(current, [
1251
+ kAXRoleAttribute as String,
1252
+ "AXFrame",
1253
+ "AXLabel",
1254
+ kAXTitleAttribute as String,
1255
+ kAXDescriptionAttribute as String,
1256
+ kAXValueAttribute as String,
1257
+ ])
1258
+
1259
+ if let ref = attrs[kAXRoleAttribute as String],
1260
+ let role = stringFromCFTypeRef(ref),
1261
+ role == "AXGroup",
1262
+ let frameRef = attrs["AXFrame"],
1263
+ let frame = frameFromCFTypeRef(frameRef) {
1264
+ let textCandidates = [
1265
+ attrs["AXLabel"],
1266
+ attrs[kAXTitleAttribute as String],
1267
+ attrs[kAXDescriptionAttribute as String],
1268
+ attrs[kAXValueAttribute as String],
1269
+ ].compactMap { $0 }.compactMap(stringFromCFTypeRef)
1270
+
1271
+ let hasExplicitTabBarText = textCandidates.contains { candidate in
1272
+ normalizeText(candidate).contains("tab bar")
1273
+ }
1274
+
1275
+ let isWide = frame.width >= contentFrame.width * 0.60
1276
+ let isNearBottom = frame.origin.y >= bottomThresholdY && frame.maxY <= contentFrame.maxY + 8
1277
+ let plausibleBarHeight = frame.height >= 32 && frame.height <= 140
1278
+
1279
+ if hasExplicitTabBarText || (isWide && isNearBottom && plausibleBarHeight) {
1280
+ return current
1281
+ }
1282
+ }
1283
+
1284
+ for child in Children(current).reversed() {
1285
+ stack.append(child)
1286
+ }
1287
+ }
1288
+ }
1289
+
1290
+ // 4th pass: Generic heuristic — wide AXGroup in bottom 15% of screen
1291
+ guard let mainScreen = NSScreen.main else { return nil }
1292
+ let screenHeight = mainScreen.frame.height
1293
+ let bottomThreshold = screenHeight * 0.15
1294
+
1295
+ stack = [root]
1296
+ visited = 0
1297
+ while let current = stack.popLast() {
1298
+ if visited > 500 { break }
1299
+ visited += 1
1300
+
1301
+ let attrs = copyMultipleAttributes(current, [kAXRoleAttribute as String, "AXFrame"])
1302
+ if let ref = attrs[kAXRoleAttribute as String], let role = stringFromCFTypeRef(ref), role == "AXGroup" {
1303
+ if let frameRef = attrs["AXFrame"], let frame = frameFromCFTypeRef(frameRef) {
1304
+ // AXFrame uses screen coordinates with origin at top-left.
1305
+ // A tab bar near the bottom of the screen has a high y value.
1306
+ // We check if the element is wide (>60% of screen width) and in the bottom 15%.
1307
+ let screenWidth = mainScreen.frame.width
1308
+ if frame.width > screenWidth * 0.6 && frame.origin.y > screenHeight - bottomThreshold {
1309
+ return current
1310
+ }
1311
+ }
1312
+ }
1313
+ for child in Children(current).reversed() {
1314
+ stack.append(child)
1315
+ }
1316
+ }
1317
+
1318
+ return nil
1319
+ }
1320
+
815
1321
  // MARK: - Window / Coordinate Helpers
816
1322
 
817
1323
  func windowBounds(for target: TargetApp) -> CGRect? {
818
1324
  switch target {
819
- case .simulator:
820
- return simulatorWindowBounds()
1325
+ case .simulator(let udid):
1326
+ return simulatorWindowBounds(udid: udid)
821
1327
  case .macApp(let pid, _, _):
822
1328
  guard let windowInfo = CGWindowListCopyWindowInfo([.optionOnScreenOnly, .excludeDesktopElements], kCGNullWindowID)
823
1329
  as? [[String: Any]] else {
@@ -849,7 +1355,7 @@ func windowBounds(for target: TargetApp) -> CGRect? {
849
1355
  }
850
1356
  }
851
1357
 
852
- func simulatorWindowBounds() -> CGRect? {
1358
+ func simulatorWindowBounds(udid: String? = nil) -> CGRect? {
853
1359
  guard let windowInfo = CGWindowListCopyWindowInfo([.optionOnScreenOnly, .excludeDesktopElements], kCGNullWindowID)
854
1360
  as? [[String: Any]] else {
855
1361
  return nil
@@ -861,9 +1367,21 @@ func simulatorWindowBounds() -> CGRect? {
861
1367
  return owner == "Simulator" && (layer ?? 0) == 0
862
1368
  }
863
1369
 
1370
+ let normalizedDeviceName = simulatorDeviceName(for: udid).map(normalizeText)
1371
+ let preferredWindows: [[String: Any]]
1372
+ if let normalizedDeviceName {
1373
+ let matched = windows.filter { info in
1374
+ let title = (info[kCGWindowName as String] as? String) ?? ""
1375
+ return normalizeText(title).contains(normalizedDeviceName)
1376
+ }
1377
+ preferredWindows = matched.isEmpty ? windows : matched
1378
+ } else {
1379
+ preferredWindows = windows
1380
+ }
1381
+
864
1382
  var best: CGRect?
865
1383
  var bestArea: CGFloat = 0
866
- for info in windows {
1384
+ for info in preferredWindows {
867
1385
  guard let boundsDict = info[kCGWindowBounds as String] as? [String: Any],
868
1386
  let x = boundsDict["X"] as? CGFloat,
869
1387
  let y = boundsDict["Y"] as? CGFloat,
@@ -883,8 +1401,8 @@ func simulatorWindowBounds() -> CGRect? {
883
1401
 
884
1402
  func pointInWindow(x: Double, y: Double, for target: TargetApp) throws -> CGPoint {
885
1403
  switch target {
886
- case .simulator:
887
- return try pointInSimulatorWindow(x: x, y: y)
1404
+ case .simulator(let udid):
1405
+ return try pointInSimulatorWindow(x: x, y: y, udid: udid)
888
1406
  case .macApp:
889
1407
  guard let bounds = windowBounds(for: target) else {
890
1408
  throw NativeError.commandFailed("Application window not found. Ensure the app is running and visible.")
@@ -895,15 +1413,67 @@ func pointInWindow(x: Double, y: Double, for target: TargetApp) throws -> CGPoin
895
1413
  }
896
1414
  }
897
1415
 
898
- func pointInSimulatorWindow(x: Double, y: Double) throws -> CGPoint {
899
- guard let bounds = simulatorWindowBounds() else {
1416
+ func pointInSimulatorWindow(x: Double, y: Double, udid: String? = nil) throws -> CGPoint {
1417
+ guard let bounds = simulatorWindowBounds(udid: udid) else {
900
1418
  throw NativeError.commandFailed("Simulator window not found. Ensure Simulator is running and visible.")
901
1419
  }
902
1420
  let targetX = bounds.origin.x + CGFloat(x)
903
- let targetY = bounds.origin.y + bounds.size.height - CGFloat(y)
1421
+ let targetY = bounds.origin.y + CGFloat(y)
904
1422
  return CGPoint(x: targetX, y: targetY)
905
1423
  }
906
1424
 
1425
+ // MARK: - Simulator Content Bounds
1426
+
1427
+ func simulatorContentBounds(udid: String? = nil) -> CGRect? {
1428
+ guard let appRoot = try? simulatorAccessibilityRootElement() else {
1429
+ return simulatorWindowBounds(udid: udid)
1430
+ }
1431
+ if let contentGroup = simulatorContentRootElement(from: appRoot, udid: udid),
1432
+ let frame = FrameAttribute(contentGroup) {
1433
+ return frame
1434
+ }
1435
+ return simulatorWindowBounds(udid: udid)
1436
+ }
1437
+
1438
+ func pointInSimulatorContent(x: Double, y: Double, udid: String? = nil) throws -> CGPoint {
1439
+ guard let bounds = simulatorContentBounds(udid: udid) else {
1440
+ throw NativeError.commandFailed("Simulator content area not found. Ensure Simulator is running and visible.")
1441
+ }
1442
+ let targetX = bounds.origin.x + CGFloat(x)
1443
+ let targetY = bounds.origin.y + CGFloat(y)
1444
+ return CGPoint(x: targetX, y: targetY)
1445
+ }
1446
+
1447
+ func pointForInput(x: Double, y: Double, for target: TargetApp) throws -> CGPoint {
1448
+ switch target {
1449
+ case .simulator(let udid):
1450
+ return try pointInSimulatorContent(x: x, y: y, udid: udid)
1451
+ case .macApp:
1452
+ return try pointInWindow(x: x, y: y, for: target)
1453
+ }
1454
+ }
1455
+
1456
+ func simulatorScrollAnchorPoint(x: Double?, y: Double?, udid: String? = nil) throws -> CGPoint {
1457
+ if let x, let y {
1458
+ return CGPoint(x: x, y: y)
1459
+ }
1460
+ guard let bounds = simulatorContentBounds(udid: udid) else {
1461
+ throw NativeError.commandFailed("Simulator content area not found. Ensure Simulator is running and visible.")
1462
+ }
1463
+ return CGPoint(x: bounds.width * 0.5, y: bounds.height * 0.5)
1464
+ }
1465
+
1466
+ func simulatorScrollDistance(deltaX: Double, deltaY: Double) -> CGSize {
1467
+ func component(for delta: Double) -> CGFloat {
1468
+ guard delta != 0 else { return 0 }
1469
+ let magnitude = min(max(abs(delta) * 18.0, 90.0), 320.0)
1470
+ let sign: CGFloat = delta < 0 ? -1 : 1
1471
+ return CGFloat(magnitude) * sign
1472
+ }
1473
+
1474
+ return CGSize(width: component(for: deltaX), height: component(for: deltaY))
1475
+ }
1476
+
907
1477
  // MARK: - App Activation
908
1478
 
909
1479
  func activateTarget(_ target: TargetApp) throws {
@@ -1000,6 +1570,43 @@ func sendSwipe(from start: CGPoint, to end: CGPoint, duration: Double?) {
1000
1570
  postMouseEvent(type: .leftMouseUp, point: end)
1001
1571
  }
1002
1572
 
1573
+ func sendDrag(from start: CGPoint, to end: CGPoint, holdDuration: Double, moveDuration: Double?) {
1574
+ postMouseEvent(type: .leftMouseDown, point: start)
1575
+ if holdDuration > 0 {
1576
+ Thread.sleep(forTimeInterval: holdDuration)
1577
+ }
1578
+
1579
+ // iOS drag & drop is often sensitive to the exact event sequence.
1580
+ // Use a slightly larger warmup move so SwiftUI DragGesture reliably
1581
+ // leaves the initial long-press state and enters an active drag.
1582
+ let warmupOffset: CGFloat = 4.0
1583
+ let warmupPoint = CGPoint(
1584
+ x: start.x + (end.x >= start.x ? warmupOffset : -warmupOffset),
1585
+ y: start.y + (end.y >= start.y ? warmupOffset : -warmupOffset)
1586
+ )
1587
+ postMouseEvent(type: .leftMouseDragged, point: warmupPoint)
1588
+ if let moveDuration {
1589
+ Thread.sleep(forTimeInterval: min(max(moveDuration / 24.0, 0.02), 0.06))
1590
+ } else {
1591
+ Thread.sleep(forTimeInterval: 0.05)
1592
+ }
1593
+
1594
+ let steps = 18
1595
+ for step in 1...steps {
1596
+ let progress = CGFloat(step) / CGFloat(steps)
1597
+ let x = start.x + (end.x - start.x) * progress
1598
+ let y = start.y + (end.y - start.y) * progress
1599
+ postMouseEvent(type: .leftMouseDragged, point: CGPoint(x: x, y: y))
1600
+ if let moveDuration {
1601
+ Thread.sleep(forTimeInterval: moveDuration / Double(steps))
1602
+ } else {
1603
+ Thread.sleep(forTimeInterval: 0.02)
1604
+ }
1605
+ }
1606
+ Thread.sleep(forTimeInterval: 0.08)
1607
+ postMouseEvent(type: .leftMouseUp, point: end)
1608
+ }
1609
+
1003
1610
  // MARK: - Keyboard Events
1004
1611
 
1005
1612
  func sendKeyPress(keyCode: Int, duration: Double?) {
@@ -1046,6 +1653,39 @@ func sendText(_ text: String) {
1046
1653
  }
1047
1654
  }
1048
1655
 
1656
+ // MARK: - Input Backend
1657
+
1658
+ enum InputBackend {
1659
+ case cgevent
1660
+ case indigoHID(IndigoHIDClient)
1661
+ }
1662
+
1663
+ /// Resolve the input backend for the given target.
1664
+ /// For simulators: try IndigoHID first, fall back to CGEvent.
1665
+ /// For macOS apps: always use CGEvent.
1666
+ /// Override with BAEPSAE_INPUT_BACKEND=indigo|cgevent|auto
1667
+ func resolveInputBackend(for target: TargetApp) -> InputBackend {
1668
+ let envOverride = ProcessInfo.processInfo.environment["BAEPSAE_INPUT_BACKEND"]?.lowercased()
1669
+
1670
+ switch target {
1671
+ case .macApp:
1672
+ return .cgevent
1673
+ case .simulator(let udid):
1674
+ if envOverride == "cgevent" {
1675
+ return .cgevent
1676
+ }
1677
+
1678
+ if let client = IndigoHIDClient(udid: udid) {
1679
+ return .indigoHID(client)
1680
+ }
1681
+
1682
+ if envOverride == "indigo" {
1683
+ fputs("Warning: IndigoHID requested but not available, falling back to CGEvent\n", stderr)
1684
+ }
1685
+ return .cgevent
1686
+ }
1687
+ }
1688
+
1049
1689
  // MARK: - Misc Helpers
1050
1690
 
1051
1691
  func defaultOutputPath(prefix: String, ext: String) -> String {