@ishlabs/cli 0.25.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,15 @@
1
1
  /**
2
- * IOSDevice — drives a local iOS simulator via `xcrun simctl` + `idb`,
2
+ * IOSDevice — drives a local iOS simulator via `xcrun simctl` (lifecycle +
3
+ * screenshot) and WebDriverAgent/XCUITest (UI + a11y; see xcuitest.ts),
3
4
  * implementing the SimulationDevice surface the loop expects. Mirrors
4
5
  * AndroidDevice; the one substantive difference is the coordinate space.
5
6
  *
6
7
  * Two resolution paths, mirroring the browser:
7
- * - ELEMENT (preferred): observe() reads the `idb ui describe-all` a11y tree,
8
- * serializes it to the `[id] role "label"` string the backend DOMLocator
9
- * reasons over, and keeps a local `shortId → bounds` map (bounds in POINTS).
10
- * The backend returns a `node_id`; executeAction() looks the bounds up and
11
- * taps the element's CENTER.
8
+ * - ELEMENT (preferred): observe() reads WDA's `/source` a11y tree, serializes
9
+ * it to the `[id] role "label"` string the backend DOMLocator reasons over,
10
+ * and keeps a local `shortId → bounds` map (bounds in POINTS). The backend
11
+ * returns a `node_id`; executeAction() looks the bounds up and taps the
12
+ * element's CENTER.
12
13
  * - VISION (fallback): when the tree is empty/sparse, observe() returns an
13
14
  * empty tree so the backend takes its vision branch and returns NORMALIZED
14
15
  * 0-1000 coordinates. Also taken per-action whenever node_id is absent.
@@ -16,7 +17,7 @@
16
17
  * COORDINATE SPACE — two spaces, the key difference from Android (where
17
18
  * screencap and tap share one pixel space):
18
19
  * `simctl io booted screenshot` is in PIXELS (e.g. 1179x2556 @3x), but
19
- * `idb ui tap/swipe` AND the `describe-all` a11y frames are POINTS (393x852).
20
+ * WDA taps/swipes AND the `/source` a11y frames are POINTS (393x852).
20
21
  * The invariant in BOTH paths: TAP in points, RECORD in pixels, because the
21
22
  * loop re-normalizes the recorded coord against dimensions() (PIXELS).
22
23
  * - VISION: tap pt = round(n/1000 * pointSize); record px = round(n/1000 * pixelSize).
@@ -30,10 +31,12 @@
30
31
  * backend never converts coords with screen_width/height.
31
32
  */
32
33
  import { resolveTextValue } from "./actions.js";
33
- import { requireOneBootedSimulator, describeScreen, describeAll, screenshotPng, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, } from "./simctl.js";
34
+ import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, } from "./simctl.js";
35
+ // iOS UI interaction + a11y run through WebDriverAgent (XCUITest), not idb.
36
+ import { ensureWda, closeWda, describeScreen, describeAll, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, } from "./xcuitest.js";
34
37
  import { isLocalPath } from "../upload.js";
35
38
  import { deNormalizePoint, deNormalizeDrag, pointToPixel } from "./coordinates.js";
36
- import { parseIdbDescribeAll, serializeNativeTree, boundsCenter } from "./native-a11y.js";
39
+ import { parseXcuiHierarchy, serializeNativeTree, boundsCenter } from "./native-a11y.js";
37
40
  // Let animations/transitions settle before the next observation so the
38
41
  // screenshot the LLM reasons over reflects the action's result.
39
42
  const POST_GESTURE_SETTLE_MS = 500;
@@ -61,6 +64,8 @@ export class IOSDevice {
61
64
  appPath;
62
65
  /** udid of the single booted simulator we drive. */
63
66
  udid = "";
67
+ /** Set once the WebDriverAgent runner is up, so the startup note logs once. */
68
+ wdaStarted = false;
64
69
  /** POINT size — what idb ui tap/swipe consume (de-normalization basis for TAPS). */
65
70
  pointWidth = 0;
66
71
  pointHeight = 0;
@@ -96,6 +101,14 @@ export class IOSDevice {
96
101
  this.bundleId = await this.resolveBundleId(target);
97
102
  }
98
103
  const bundleId = this.bundleId;
104
+ // Bring up the WebDriverAgent runner (install + simctl-launch the prebuilt
105
+ // xctrunner, open a session). Idempotent and reused across participants, so
106
+ // the ~30-60s first-launch cost is paid once per run.
107
+ if (!this.wdaStarted) {
108
+ this.log("Starting the iOS automation runner (WebDriverAgent); first launch can take ~30-60s...");
109
+ }
110
+ await ensureWda(this.udid);
111
+ this.wdaStarted = true;
99
112
  // Prime screen geometry (points) before the first de-normalization.
100
113
  await this.refreshScreen();
101
114
  // Per-participant reset: terminate then relaunch from a clean state.
@@ -171,7 +184,7 @@ export class IOSDevice {
171
184
  };
172
185
  }
173
186
  /**
174
- * Read + serialize the idb describe-all a11y tree (bounds in POINTS). Any
187
+ * Read + serialize WDA's /source a11y tree (bounds in POINTS). Any
175
188
  * failure (retries exhausted on a trivial tree, parse error) degrades to an
176
189
  * empty tree so the backend falls back to vision — a missing tree must never
177
190
  * abort the observation.
@@ -179,7 +192,7 @@ export class IOSDevice {
179
192
  async dumpTree() {
180
193
  try {
181
194
  const json = await describeAll(this.udid);
182
- const nodes = parseIdbDescribeAll(json);
195
+ const nodes = parseXcuiHierarchy(json);
183
196
  const tree = serializeNativeTree(nodes);
184
197
  this.log(`a11y tree: ${tree.nodeMap.size} node(s)`);
185
198
  return tree;
@@ -206,7 +219,7 @@ export class IOSDevice {
206
219
  // separately in points (see toPoints()).
207
220
  return { width: this.pixelWidth, height: this.pixelHeight };
208
221
  }
209
- /** Normalized 0-1000 → POINT space (idb ui tap/swipe take points). */
222
+ /** Normalized 0-1000 → POINT space (WDA taps/swipes take points). */
210
223
  toPoints(c) {
211
224
  return deNormalizePoint(c, this.pointWidth, this.pointHeight);
212
225
  }
@@ -227,7 +240,7 @@ export class IOSDevice {
227
240
  const bounds = this.lastNodeMap.get(action.node_id);
228
241
  if (!bounds)
229
242
  return { pt: null, px: null, stale: true };
230
- const pt = boundsCenter(bounds); // POINTS — idb taps directly
243
+ const pt = boundsCenter(bounds); // POINTS — WDA taps directly
231
244
  const px = pointToPixel(pt, this.pointWidth, this.pointHeight, this.pixelWidth, this.pixelHeight);
232
245
  return { pt, px, stale: false };
233
246
  }
@@ -237,7 +250,7 @@ export class IOSDevice {
237
250
  }
238
251
  async executeAction(action) {
239
252
  try {
240
- // pt drives the idb TAP (points); px is what we RECORD (pixels). ELEMENT
253
+ // pt drives the WDA TAP (points); px is what we RECORD (pixels). ELEMENT
241
254
  // path: pt = bounds-center, px = that center scaled to pixels. VISION
242
255
  // path: both derive from the same normalized coord. Either way the tap
243
256
  // lands right and the recorded px round-trips against dimensions().
@@ -279,6 +292,11 @@ export class IOSDevice {
279
292
  await this.navigateBack();
280
293
  break;
281
294
  }
295
+ case "open_system_panel": {
296
+ // Element-less, like navigate_back: best-effort top-edge pull-down.
297
+ await this.openSystemPanel(action.panel === "quick_settings" ? "quick_settings" : "notifications");
298
+ break;
299
+ }
282
300
  case "drag": {
283
301
  // A drag GRABS an element and RELEASES it elsewhere ("click the
284
302
  // element, move, let go") — distinct from a swipe (element-less
@@ -364,7 +382,7 @@ export class IOSDevice {
364
382
  await settle(250);
365
383
  }
366
384
  const text = resolveTextValue(action, this.contextValues);
367
- // idb ui text appends to the focused field; for click_type (replace) there
385
+ // WDA text input appends to the focused field; for click_type (replace) there
368
386
  // is no idb "clear", so we rely on the field being empty after focus. The
369
387
  // vision agent typically taps an empty field, so this matches Android's
370
388
  // common path; a true select-all clear isn't exposed by idb.
@@ -470,10 +488,10 @@ export class IOSDevice {
470
488
  * do drive the system gesture) when no back button is visible.
471
489
  */
472
490
  async navigateBack() {
473
- const nodes = parseIdbDescribeAll(await describeAll(this.udid));
491
+ const nodes = parseXcuiHierarchy(await describeAll(this.udid));
474
492
  const back = this.findBackButton(nodes);
475
493
  if (back) {
476
- const c = boundsCenter(back.bounds); // POINTS — idb taps directly
494
+ const c = boundsCenter(back.bounds); // POINTS — WDA taps directly
477
495
  await uiTap(this.udid, c.x, c.y);
478
496
  return;
479
497
  }
@@ -483,6 +501,39 @@ export class IOSDevice {
483
501
  const midY = Math.round(this.pointHeight / 2);
484
502
  await uiSwipe(this.udid, 1, midY, Math.round(this.pointWidth * 0.5), midY, 300);
485
503
  }
504
+ /**
505
+ * Best-effort open of an iOS system panel by swiping down from the top edge.
506
+ * iOS has no `cmd statusbar` equivalent, so on a Face-ID layout:
507
+ * - notifications → Notification Center: swipe down from the top-CENTER.
508
+ * - quick_settings → Control Center: swipe down from the top-RIGHT corner.
509
+ * Coordinates are POINTS (idb consumes points; see toPoints()/the swipe()
510
+ * helper). This is FLAKY on the simulator — idb's synthetic touch frequently
511
+ * doesn't trigger the system edge gesture (the same limitation navigateBack's
512
+ * edge-swipe hits). We compare a before/after screenshot and log LOUDLY when
513
+ * the screen didn't change, rather than silently reporting success, so a
514
+ * no-op is visible in the run. The executeAction caller still returns
515
+ * success:true (the gesture was attempted); the loud log is the signal.
516
+ */
517
+ async openSystemPanel(panel) {
518
+ const before = await screenshotPng();
519
+ const w = this.pointWidth;
520
+ const h = this.pointHeight;
521
+ // Start ON the top edge and travel a third of the screen down. Control
522
+ // Center lives under the top-right (battery/status) corner on Face-ID
523
+ // devices; Notification Center under the top-center notch area.
524
+ const startX = panel === "quick_settings" ? Math.round(w * 0.92) : Math.round(w * 0.5);
525
+ const startY = 1;
526
+ const endY = Math.round(h * 0.35);
527
+ await uiSwipe(this.udid, startX, startY, startX, endY, 350);
528
+ await settle();
529
+ // Loudly surface a no-op: the simulator's synthetic touch often can't drive
530
+ // the system edge gesture. An identical screenshot means the panel didn't open.
531
+ const after = await screenshotPng();
532
+ if (before.equals(after)) {
533
+ this.log(`open_system_panel (${panel}): top-edge swipe produced no visible change — ` +
534
+ `the simulator's synthetic touch likely didn't trigger the system gesture (flaky on the simulator).`);
535
+ }
536
+ }
486
537
  /**
487
538
  * The nav-bar back button: the leading (leftmost) actionable button in the
488
539
  * top nav-bar band. iOS HIG guarantees "back" is the leading nav item in a
@@ -526,7 +577,7 @@ export class IOSDevice {
526
577
  // the agent can adapt.
527
578
  const hint = {
528
579
  pinch_zoom: "no multi-touch on the native driver; the agent should zoom via double_tap",
529
- rotate_device: "idb exposes no clean rotate; leave orientation as-is",
580
+ rotate_device: "rotation is not wired on the native driver; leave orientation as-is",
530
581
  keyboard_shortcut: "no hardware keyboard on the native driver; use on-screen taps/text_input",
531
582
  switch_tab: "tabs are a browser concept; the native app has a single window",
532
583
  close_tab: "tabs are a browser concept; the native app has a single window",
@@ -540,7 +591,9 @@ export class IOSDevice {
540
591
  return "";
541
592
  }
542
593
  async close() {
543
- // Leave the app installed/running; the simulator is shared and the next run
544
- // resets via launchOrReset. Nothing to tear down (no IME state on iOS).
594
+ // Tear down the WebDriverAgent session (the runner is left installed on the
595
+ // shared simulator for the next run). The app resets via launchOrReset; no
596
+ // IME state to restore on iOS.
597
+ await closeWda(this.udid);
545
598
  }
546
599
  }
@@ -49,6 +49,7 @@ export function flattenAction(raw, nodeId = null, nodeDescription = null) {
49
49
  key: a.key ?? null,
50
50
  tab_id: a.tab_id ?? null,
51
51
  orientation: a.orientation ?? null,
52
+ panel: a.panel ?? null,
52
53
  scale: a.scale ?? null,
53
54
  // Native path: ResolvedAction.coordinates (top level of the resolved_actions
54
55
  // entry) is the single {x,y} execution point. Fall back to the nested action
@@ -431,6 +432,7 @@ async function runSingleSimulation(client, participantId, participantName, opts,
431
432
  ...(action.key && { key: action.key }),
432
433
  ...(action.tab_id && { tab_id: action.tab_id }),
433
434
  ...(action.orientation && { orientation: action.orientation }),
435
+ ...(action.panel && { panel: action.panel }),
434
436
  // The recorded `coordinates` is the drag START; persist the END
435
437
  // (normalized 0-1000) too so the journey captures the full path.
436
438
  ...(action.drag && { drag_end: { x: action.drag.endX, y: action.drag.endY } }),
@@ -12,7 +12,7 @@
12
12
  *
13
13
  * COORDINATE SPACE — carried, not converted, by this module:
14
14
  * - Android `uiautomator dump` bounds are screencap PIXELS (`space: "px"`).
15
- * - iOS `idb ui describe-all` frames are POINTS (`space: "points"`).
15
+ * - iOS WebDriverAgent /source frames are POINTS (`space: "points"`).
16
16
  * The device de-normalizes/taps in its own space (AndroidDevice taps pixels;
17
17
  * IOSDevice taps points), so the `space` tag tells the caller which dimension a
18
18
  * node's bounds-center belongs to. This module never mixes the two.
@@ -65,15 +65,29 @@ export interface NativeTree {
65
65
  */
66
66
  export declare function parseUiautomatorXml(xml: string): NativeNode[];
67
67
  /**
68
- * Parse `idb ui describe-all` JSON (a FLAT array of elements, each with a `frame`
69
- * in POINTS) into NativeNodes in array order. iOS is already a flat,
70
- * properly-labeled list no ancestor walk needed — so `clickable` is derived
71
- * from the element's role/type and whether it carries a usable label.
68
+ * Parse WDA's `GET /source?format=json` a NESTED accessibility tree into the
69
+ * FLAT, depth-first `NativeNode[]` (POINTS) that `parseXcuiHierarchy` produces,
70
+ * so `serializeNativeTree` consumes it unchanged. WDA's `type` matches idb's iOS
71
+ * types (Button/StaticText/SearchField/Cell/Image/Application…), so
72
+ * `normalizeRole`/`IOS_ACTIONABLE_TYPES`/`frameToBounds` all apply as-is.
73
+ *
74
+ * KEY: WDA's `/source` is the FULL XCUIElement tree — every container and leaf —
75
+ * NOT idb's clean accessibility-elements list. iOS settings rows surface as an
76
+ * accessible `Button` ("General", isAccessible=1) that ALSO contains a duplicate
77
+ * inner `StaticText` ("General", isAccessible=0) and is wrapped in a `Cell`
78
+ * (isAccessible=0). Emitting all three yields "General General" + empty
79
+ * listitems. So we emit ONLY `isAccessible && isVisible` nodes — exactly the
80
+ * VoiceOver-exposed set idb returned: the labeled Button is both the label and
81
+ * the tap target; the duplicate StaticText and the wrapping Cell are pruned. A
82
+ * sparse a11y tree degrades to the loop's vision fallback, so strict filtering
83
+ * never strands the run.
84
+ *
85
+ * Accepts either the raw tree or the W3C `{ value: <tree> }` envelope WDA returns.
72
86
  */
73
- export declare function parseIdbDescribeAll(json: string): NativeNode[];
87
+ export declare function parseXcuiHierarchy(json: string): NativeNode[];
74
88
  /**
75
89
  * Serialize a flat NativeNode list (from `parseUiautomatorXml` /
76
- * `parseIdbDescribeAll`) into the `[id] role "label"` string the DOMLocator
90
+ * `parseXcuiHierarchy`) into the `[id] role "label"` string the DOMLocator
77
91
  * reasons over, plus a `shortId → bounds` map for local tap resolution.
78
92
  *
79
93
  * Emission rules (kept tight, like the DOM serializer):
@@ -12,7 +12,7 @@
12
12
  *
13
13
  * COORDINATE SPACE — carried, not converted, by this module:
14
14
  * - Android `uiautomator dump` bounds are screencap PIXELS (`space: "px"`).
15
- * - iOS `idb ui describe-all` frames are POINTS (`space: "points"`).
15
+ * - iOS WebDriverAgent /source frames are POINTS (`space: "points"`).
16
16
  * The device de-normalizes/taps in its own space (AndroidDevice taps pixels;
17
17
  * IOSDevice taps points), so the `space` tag tells the caller which dimension a
18
18
  * node's bounds-center belongs to. This module never mixes the two.
@@ -50,7 +50,7 @@ const ROLE_NORMALIZATION = {
50
50
  ScrollView: "generic",
51
51
  RecyclerView: "list",
52
52
  ListView: "list",
53
- // iOS (idb `type`, AX-prefixed `role` handled by stripAxPrefix below).
53
+ // iOS (WDA / XCUITest `type`, AX-prefixed `role` handled by stripAxPrefix below).
54
54
  StaticText: "text",
55
55
  TextField: "textbox",
56
56
  SecureTextField: "textbox",
@@ -181,6 +181,9 @@ function buildAndroidTree(xml) {
181
181
  function makeRawAndroidNode(role, text, contentDesc, resourceId, clickable, bounds) {
182
182
  return { role, text, contentDesc, resourceId, clickable, bounds, children: [] };
183
183
  }
184
+ // ---------------------------------------------------------------------------
185
+ // iOS — shared helpers for the WebDriverAgent (XCUITest) /source parser below
186
+ // ---------------------------------------------------------------------------
184
187
  /** iOS roles/types that are directly actionable (the device taps their center). */
185
188
  const IOS_ACTIONABLE_TYPES = new Set([
186
189
  "Button",
@@ -195,50 +198,7 @@ const IOS_ACTIONABLE_TYPES = new Set([
195
198
  "MenuItem",
196
199
  "Tab",
197
200
  ]);
198
- /**
199
- * Parse `idb ui describe-all` JSON (a FLAT array of elements, each with a `frame`
200
- * in POINTS) into NativeNodes in array order. iOS is already a flat,
201
- * properly-labeled list — no ancestor walk needed — so `clickable` is derived
202
- * from the element's role/type and whether it carries a usable label.
203
- */
204
- export function parseIdbDescribeAll(json) {
205
- let parsed;
206
- try {
207
- parsed = JSON.parse(json);
208
- }
209
- catch {
210
- return [];
211
- }
212
- if (!Array.isArray(parsed))
213
- return [];
214
- const out = [];
215
- for (const raw of parsed) {
216
- const bounds = idbFrameToBounds(raw.frame);
217
- if (!bounds)
218
- continue; // malformed / zero-area frame → no tappable center
219
- // Label: prefer the spoken AXLabel; fall back to AXValue (search fields
220
- // expose their placeholder as AXValue, e.g. "Search"). AXValue is only a
221
- // STRING fallback — switches/sliders/steppers report it as a number/boolean
222
- // (a Switch is 1/0), and `.trim()` on those would throw and lose the whole
223
- // tree to a silent vision fallback. An unlabeled toggle then emits as a bare
224
- // `[id] switch` (still tappable via its frame center).
225
- const label = (raw.AXLabel ?? (typeof raw.AXValue === "string" ? raw.AXValue : "")).trim();
226
- const rawType = raw.type ?? (raw.role ? stripAxPrefix(raw.role) : "");
227
- const typeKey = stripAxPrefix(rawType);
228
- const actionable = IOS_ACTIONABLE_TYPES.has(typeKey) && raw.enabled !== false;
229
- out.push({
230
- role: normalizeRole(rawType),
231
- label,
232
- bounds,
233
- clickable: actionable,
234
- hasOwnLabel: label.length > 0,
235
- resourceId: raw.AXUniqueId ?? undefined,
236
- space: "points",
237
- });
238
- }
239
- return out;
240
- }
241
- function idbFrameToBounds(frame) {
201
+ function frameToBounds(frame) {
242
202
  if (!frame)
243
203
  return null;
244
204
  const { x, y, width, height } = frame;
@@ -254,6 +214,81 @@ function idbFrameToBounds(frame) {
254
214
  }
255
215
  return { x, y, width, height };
256
216
  }
217
+ /** WDA's "1"/"0" (or real boolean) → boolean. */
218
+ function wdaTruthy(v) {
219
+ return v === true || v === "1";
220
+ }
221
+ /**
222
+ * Parse WDA's `GET /source?format=json` — a NESTED accessibility tree — into the
223
+ * FLAT, depth-first `NativeNode[]` (POINTS) that `parseXcuiHierarchy` produces,
224
+ * so `serializeNativeTree` consumes it unchanged. WDA's `type` matches idb's iOS
225
+ * types (Button/StaticText/SearchField/Cell/Image/Application…), so
226
+ * `normalizeRole`/`IOS_ACTIONABLE_TYPES`/`frameToBounds` all apply as-is.
227
+ *
228
+ * KEY: WDA's `/source` is the FULL XCUIElement tree — every container and leaf —
229
+ * NOT idb's clean accessibility-elements list. iOS settings rows surface as an
230
+ * accessible `Button` ("General", isAccessible=1) that ALSO contains a duplicate
231
+ * inner `StaticText` ("General", isAccessible=0) and is wrapped in a `Cell`
232
+ * (isAccessible=0). Emitting all three yields "General General" + empty
233
+ * listitems. So we emit ONLY `isAccessible && isVisible` nodes — exactly the
234
+ * VoiceOver-exposed set idb returned: the labeled Button is both the label and
235
+ * the tap target; the duplicate StaticText and the wrapping Cell are pruned. A
236
+ * sparse a11y tree degrades to the loop's vision fallback, so strict filtering
237
+ * never strands the run.
238
+ *
239
+ * Accepts either the raw tree or the W3C `{ value: <tree> }` envelope WDA returns.
240
+ */
241
+ export function parseXcuiHierarchy(json) {
242
+ let parsed;
243
+ try {
244
+ parsed = JSON.parse(json);
245
+ }
246
+ catch {
247
+ return [];
248
+ }
249
+ // WDA returns the tree under a W3C `{ value: <tree>, sessionId }` envelope, but
250
+ // a raw tree NODE also has its own `value` field (the element's value) — so we
251
+ // can't unwrap on `"value" in parsed` alone. The actual tree root is the one
252
+ // carrying a node-shaped `type`; only unwrap `value` when the top level is NOT
253
+ // itself a node.
254
+ const obj = parsed;
255
+ const root = obj && typeof obj === "object" && !("type" in obj) && "value" in obj
256
+ ? obj.value
257
+ : obj;
258
+ if (!root || typeof root !== "object")
259
+ return [];
260
+ const out = [];
261
+ const visit = (n) => {
262
+ const bounds = frameToBounds(n.rect ?? undefined);
263
+ if (bounds && wdaTruthy(n.isAccessible) && wdaTruthy(n.isVisible)) {
264
+ // Prefer the spoken label; fall back to a STRING value (search fields
265
+ // expose their placeholder as `value`). Non-string values (a Switch's 1/0)
266
+ // are ignored for the label, exactly like the idb path.
267
+ const label = (n.label ?? (typeof n.value === "string" ? n.value : "")).trim();
268
+ const rawType = n.type ?? "";
269
+ const typeKey = stripAxPrefix(rawType);
270
+ // `isEnabled` absent ⇒ assume enabled (WDA omits it on always-enabled types).
271
+ const enabled = n.isEnabled == null ? true : wdaTruthy(n.isEnabled);
272
+ const actionable = IOS_ACTIONABLE_TYPES.has(typeKey) && enabled;
273
+ out.push({
274
+ role: normalizeRole(rawType),
275
+ label,
276
+ bounds,
277
+ clickable: actionable,
278
+ hasOwnLabel: label.length > 0,
279
+ resourceId: (n.name || n.rawIdentifier) ?? undefined,
280
+ space: "points",
281
+ });
282
+ }
283
+ // Recurse into ALL children — an accessible element can nest inside a
284
+ // non-accessible container (the Cell wrapping the Button), so we must not
285
+ // prune the walk by accessibility, only the emission.
286
+ for (const c of n.children ?? [])
287
+ visit(c);
288
+ };
289
+ visit(root);
290
+ return out;
291
+ }
257
292
  // ---------------------------------------------------------------------------
258
293
  // Serialization — flat NativeNode list → `[id] role "label"` + nodeMap
259
294
  // ---------------------------------------------------------------------------
@@ -271,7 +306,7 @@ function normalizeLabel(label) {
271
306
  }
272
307
  /**
273
308
  * Serialize a flat NativeNode list (from `parseUiautomatorXml` /
274
- * `parseIdbDescribeAll`) into the `[id] role "label"` string the DOMLocator
309
+ * `parseXcuiHierarchy`) into the `[id] role "label"` string the DOMLocator
275
310
  * reasons over, plus a `shortId → bounds` map for local tap resolution.
276
311
  *
277
312
  * Emission rules (kept tight, like the DOM serializer):
@@ -1,38 +1,34 @@
1
1
  /**
2
- * Thin async wrappers over `xcrun simctl` + `idb` for the native-iOS sim path.
2
+ * Thin async wrappers over `xcrun simctl` for the native-iOS sim path: simulator
3
+ * LIFECYCLE (boot detection, install, terminate, launch) and the SCREENSHOT.
3
4
  *
4
- * Two tools, two jobs:
5
- * - `xcrun simctl` drives the simulator LIFECYCLE (boot detection, install,
6
- * terminate, launch) and the SCREENSHOT.
7
- * - `idb` drives UI INPUT (tap/swipe/text/key) and reports the screen
8
- * geometry (pixels, points, and the scale between them).
5
+ * UI interaction + the accessibility tree live in `xcuitest.ts` (WebDriverAgent),
6
+ * NOT here iOS no longer depends on idb.
9
7
  *
10
8
  * COORDINATE SPACES (the key difference from Android, where screencap and tap
11
9
  * share one pixel space):
12
10
  * - `simctl io booted screenshot` writes a PNG in PIXELS (e.g. 1179x2556 @3x).
13
- * - `idb ui tap/swipe` take POINTS (e.g. 393x852) — pixels / scale.
14
- * The native sim TAPS in points (de-normalize 0-1000 against the POINT size)
15
- * but RECORDS in PIXELS: dimensions() returns the pixel size so the loop's
16
- * round-trip is exact. Recording in points would drift — the point grid (393)
17
- * is coarser than the 0-1000 normalized grid, so it double-rounds. See
18
- * IOSDevice for the full derivation.
11
+ * - WebDriverAgent's taps/swipes + a11y frames are POINTS (e.g. 393x852).
12
+ * The native sim TAPS in points (de-normalize 0-1000 against the POINT size) but
13
+ * RECORDS in PIXELS: dimensions() returns the pixel size so the loop's round-trip
14
+ * is exact. Recording in points would drift — the point grid (393) is coarser
15
+ * than the 0-1000 normalized grid, so it double-rounds. See IOSDevice.
19
16
  */
20
17
  export declare class IosError extends Error {
21
18
  constructor(message: string);
22
19
  }
23
20
  /** Run `xcrun simctl <args>` and return trimmed stdout. */
24
21
  export declare function simctl(args: string[], timeoutMs?: number): Promise<string>;
25
- /** Run `idb <args>` and return trimmed stdout. */
26
- export declare function idb(args: string[], timeoutMs?: number): Promise<string>;
27
22
  /**
28
23
  * Assert exactly one simulator is Booted and return its udid. We pin every
29
- * subsequent idb/simctl call (and the screenshot) to "booted", so multiple
24
+ * subsequent simctl/WDA call (and the screenshot) to "booted", so multiple
30
25
  * booted simulators are ambiguous and rejected.
31
26
  */
32
27
  export declare function requireOneBootedSimulator(): Promise<string>;
33
28
  /**
34
- * Screen geometry from `idb describe --json`: PIXEL size, POINT size, and the
35
- * scale (`density`) between them. Points drive idb ui tap/swipe; pixels are the
29
+ * Screen geometry: PIXEL size, POINT size, and the scale (`density`) between
30
+ * them. Produced by the XCUITest driver's `describeScreen` (xcuitest.ts) and
31
+ * consumed by IOSDevice — points drive WDA taps/swipes; pixels are the
36
32
  * screenshot's resolution.
37
33
  */
38
34
  export interface IosScreen {
@@ -42,38 +38,12 @@ export interface IosScreen {
42
38
  pointHeight: number;
43
39
  density: number;
44
40
  }
45
- export declare function describeScreen(udid: string): Promise<IosScreen>;
46
41
  /**
47
42
  * Capture the booted simulator's screen as PNG bytes via
48
43
  * `simctl io booted screenshot`. simctl writes to a file path (no reliable
49
44
  * stdout in current Xcode), so we round-trip through a temp file.
50
45
  */
51
46
  export declare function screenshotPng(): Promise<Buffer>;
52
- export declare function uiTap(udid: string, x: number, y: number): Promise<void>;
53
- export declare function uiLongPress(udid: string, x: number, y: number, durationMs?: number): Promise<void>;
54
- export declare function uiSwipe(udid: string, x1: number, y1: number, x2: number, y2: number, durationMs?: number): Promise<void>;
55
- /**
56
- * Type text into the focused field. Unlike Android's `adb shell input text`,
57
- * `idb ui text` handles spaces/unicode/quotes correctly, so no helper IME is
58
- * needed.
59
- */
60
- export declare function uiText(udid: string, text: string): Promise<void>;
61
- /**
62
- * Press a hardware key by HID usage code. `idb ui key 40` is Return/Enter
63
- * (used to submit a text field).
64
- */
65
- export declare function uiKey(udid: string, keycode: number): Promise<void>;
66
- /** HID usage code for Return/Enter. */
67
- export declare const HID_KEY_RETURN = 40;
68
- /**
69
- * Capture the current accessibility tree as `idb ui describe-all` JSON (a flat
70
- * array of elements, each with a POINT frame) and return it. Mirrors the
71
- * oracle's `ios_describe`: right after a tap the tree can be mid-transition and
72
- * come back empty/partial, so we retry until we get an array with more than just
73
- * the root application node. Throws IosError if every attempt yields a trivial
74
- * tree so the caller can degrade to the vision path.
75
- */
76
- export declare function describeAll(udid: string): Promise<string>;
77
47
  export declare function terminateApp(udid: string, bundleId: string): Promise<void>;
78
48
  export declare function launchApp(udid: string, bundleId: string): Promise<void>;
79
49
  export declare function installApp(udid: string, appPath: string): Promise<void>;