@ishlabs/cli 0.25.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/doctor.d.ts +26 -0
- package/dist/commands/doctor.js +334 -0
- package/dist/index.js +2 -0
- package/dist/lib/local-sim/actions.js +2 -0
- package/dist/lib/local-sim/adb.d.ts +10 -0
- package/dist/lib/local-sim/adb.js +16 -2
- package/dist/lib/local-sim/android.js +6 -1
- package/dist/lib/local-sim/coordinates.d.ts +4 -4
- package/dist/lib/local-sim/coordinates.js +4 -4
- package/dist/lib/local-sim/device.d.ts +2 -2
- package/dist/lib/local-sim/device.js +1 -1
- package/dist/lib/local-sim/ios.d.ts +26 -9
- package/dist/lib/local-sim/ios.js +73 -20
- package/dist/lib/local-sim/loop.js +2 -0
- package/dist/lib/local-sim/native-a11y.d.ts +21 -7
- package/dist/lib/local-sim/native-a11y.js +82 -47
- package/dist/lib/local-sim/simctl.d.ts +13 -43
- package/dist/lib/local-sim/simctl.js +12 -141
- package/dist/lib/local-sim/types.d.ts +2 -0
- package/dist/lib/local-sim/xcuitest.d.ts +60 -0
- package/dist/lib/local-sim/xcuitest.js +303 -0
- package/dist/lib/paths.d.ts +8 -0
- package/dist/lib/paths.js +12 -0
- package/package.json +1 -1
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* IOSDevice — drives a local iOS simulator via `xcrun simctl` +
|
|
2
|
+
* IOSDevice — drives a local iOS simulator via `xcrun simctl` (lifecycle +
|
|
3
|
+
* screenshot) and WebDriverAgent/XCUITest (UI + a11y; see xcuitest.ts),
|
|
3
4
|
* implementing the SimulationDevice surface the loop expects. Mirrors
|
|
4
5
|
* AndroidDevice; the one substantive difference is the coordinate space.
|
|
5
6
|
*
|
|
6
7
|
* Two resolution paths, mirroring the browser:
|
|
7
|
-
* - ELEMENT (preferred): observe() reads
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
8
|
+
* - ELEMENT (preferred): observe() reads WDA's `/source` a11y tree, serializes
|
|
9
|
+
* it to the `[id] role "label"` string the backend DOMLocator reasons over,
|
|
10
|
+
* and keeps a local `shortId → bounds` map (bounds in POINTS). The backend
|
|
11
|
+
* returns a `node_id`; executeAction() looks the bounds up and taps the
|
|
12
|
+
* element's CENTER.
|
|
12
13
|
* - VISION (fallback): when the tree is empty/sparse, observe() returns an
|
|
13
14
|
* empty tree so the backend takes its vision branch and returns NORMALIZED
|
|
14
15
|
* 0-1000 coordinates. Also taken per-action whenever node_id is absent.
|
|
@@ -16,7 +17,7 @@
|
|
|
16
17
|
* COORDINATE SPACE — two spaces, the key difference from Android (where
|
|
17
18
|
* screencap and tap share one pixel space):
|
|
18
19
|
* `simctl io booted screenshot` is in PIXELS (e.g. 1179x2556 @3x), but
|
|
19
|
-
*
|
|
20
|
+
* WDA taps/swipes AND the `/source` a11y frames are POINTS (393x852).
|
|
20
21
|
* The invariant in BOTH paths: TAP in points, RECORD in pixels, because the
|
|
21
22
|
* loop re-normalizes the recorded coord against dimensions() (PIXELS).
|
|
22
23
|
* - VISION: tap pt = round(n/1000 * pointSize); record px = round(n/1000 * pixelSize).
|
|
@@ -30,10 +31,12 @@
|
|
|
30
31
|
* backend never converts coords with screen_width/height.
|
|
31
32
|
*/
|
|
32
33
|
import { resolveTextValue } from "./actions.js";
|
|
33
|
-
import { requireOneBootedSimulator,
|
|
34
|
+
import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, } from "./simctl.js";
|
|
35
|
+
// iOS UI interaction + a11y run through WebDriverAgent (XCUITest), not idb.
|
|
36
|
+
import { ensureWda, closeWda, describeScreen, describeAll, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, } from "./xcuitest.js";
|
|
34
37
|
import { isLocalPath } from "../upload.js";
|
|
35
38
|
import { deNormalizePoint, deNormalizeDrag, pointToPixel } from "./coordinates.js";
|
|
36
|
-
import {
|
|
39
|
+
import { parseXcuiHierarchy, serializeNativeTree, boundsCenter } from "./native-a11y.js";
|
|
37
40
|
// Let animations/transitions settle before the next observation so the
|
|
38
41
|
// screenshot the LLM reasons over reflects the action's result.
|
|
39
42
|
const POST_GESTURE_SETTLE_MS = 500;
|
|
@@ -61,6 +64,8 @@ export class IOSDevice {
|
|
|
61
64
|
appPath;
|
|
62
65
|
/** udid of the single booted simulator we drive. */
|
|
63
66
|
udid = "";
|
|
67
|
+
/** Set once the WebDriverAgent runner is up, so the startup note logs once. */
|
|
68
|
+
wdaStarted = false;
|
|
64
69
|
/** POINT size — what idb ui tap/swipe consume (de-normalization basis for TAPS). */
|
|
65
70
|
pointWidth = 0;
|
|
66
71
|
pointHeight = 0;
|
|
@@ -96,6 +101,14 @@ export class IOSDevice {
|
|
|
96
101
|
this.bundleId = await this.resolveBundleId(target);
|
|
97
102
|
}
|
|
98
103
|
const bundleId = this.bundleId;
|
|
104
|
+
// Bring up the WebDriverAgent runner (install + simctl-launch the prebuilt
|
|
105
|
+
// xctrunner, open a session). Idempotent and reused across participants, so
|
|
106
|
+
// the ~30-60s first-launch cost is paid once per run.
|
|
107
|
+
if (!this.wdaStarted) {
|
|
108
|
+
this.log("Starting the iOS automation runner (WebDriverAgent); first launch can take ~30-60s...");
|
|
109
|
+
}
|
|
110
|
+
await ensureWda(this.udid);
|
|
111
|
+
this.wdaStarted = true;
|
|
99
112
|
// Prime screen geometry (points) before the first de-normalization.
|
|
100
113
|
await this.refreshScreen();
|
|
101
114
|
// Per-participant reset: terminate then relaunch from a clean state.
|
|
@@ -171,7 +184,7 @@ export class IOSDevice {
|
|
|
171
184
|
};
|
|
172
185
|
}
|
|
173
186
|
/**
|
|
174
|
-
* Read + serialize
|
|
187
|
+
* Read + serialize WDA's /source a11y tree (bounds in POINTS). Any
|
|
175
188
|
* failure (retries exhausted on a trivial tree, parse error) degrades to an
|
|
176
189
|
* empty tree so the backend falls back to vision — a missing tree must never
|
|
177
190
|
* abort the observation.
|
|
@@ -179,7 +192,7 @@ export class IOSDevice {
|
|
|
179
192
|
async dumpTree() {
|
|
180
193
|
try {
|
|
181
194
|
const json = await describeAll(this.udid);
|
|
182
|
-
const nodes =
|
|
195
|
+
const nodes = parseXcuiHierarchy(json);
|
|
183
196
|
const tree = serializeNativeTree(nodes);
|
|
184
197
|
this.log(`a11y tree: ${tree.nodeMap.size} node(s)`);
|
|
185
198
|
return tree;
|
|
@@ -206,7 +219,7 @@ export class IOSDevice {
|
|
|
206
219
|
// separately in points (see toPoints()).
|
|
207
220
|
return { width: this.pixelWidth, height: this.pixelHeight };
|
|
208
221
|
}
|
|
209
|
-
/** Normalized 0-1000 → POINT space (
|
|
222
|
+
/** Normalized 0-1000 → POINT space (WDA taps/swipes take points). */
|
|
210
223
|
toPoints(c) {
|
|
211
224
|
return deNormalizePoint(c, this.pointWidth, this.pointHeight);
|
|
212
225
|
}
|
|
@@ -227,7 +240,7 @@ export class IOSDevice {
|
|
|
227
240
|
const bounds = this.lastNodeMap.get(action.node_id);
|
|
228
241
|
if (!bounds)
|
|
229
242
|
return { pt: null, px: null, stale: true };
|
|
230
|
-
const pt = boundsCenter(bounds); // POINTS —
|
|
243
|
+
const pt = boundsCenter(bounds); // POINTS — WDA taps directly
|
|
231
244
|
const px = pointToPixel(pt, this.pointWidth, this.pointHeight, this.pixelWidth, this.pixelHeight);
|
|
232
245
|
return { pt, px, stale: false };
|
|
233
246
|
}
|
|
@@ -237,7 +250,7 @@ export class IOSDevice {
|
|
|
237
250
|
}
|
|
238
251
|
async executeAction(action) {
|
|
239
252
|
try {
|
|
240
|
-
// pt drives the
|
|
253
|
+
// pt drives the WDA TAP (points); px is what we RECORD (pixels). ELEMENT
|
|
241
254
|
// path: pt = bounds-center, px = that center scaled to pixels. VISION
|
|
242
255
|
// path: both derive from the same normalized coord. Either way the tap
|
|
243
256
|
// lands right and the recorded px round-trips against dimensions().
|
|
@@ -279,6 +292,11 @@ export class IOSDevice {
|
|
|
279
292
|
await this.navigateBack();
|
|
280
293
|
break;
|
|
281
294
|
}
|
|
295
|
+
case "open_system_panel": {
|
|
296
|
+
// Element-less, like navigate_back: best-effort top-edge pull-down.
|
|
297
|
+
await this.openSystemPanel(action.panel === "quick_settings" ? "quick_settings" : "notifications");
|
|
298
|
+
break;
|
|
299
|
+
}
|
|
282
300
|
case "drag": {
|
|
283
301
|
// A drag GRABS an element and RELEASES it elsewhere ("click the
|
|
284
302
|
// element, move, let go") — distinct from a swipe (element-less
|
|
@@ -364,7 +382,7 @@ export class IOSDevice {
|
|
|
364
382
|
await settle(250);
|
|
365
383
|
}
|
|
366
384
|
const text = resolveTextValue(action, this.contextValues);
|
|
367
|
-
//
|
|
385
|
+
// WDA text input appends to the focused field; for click_type (replace) there
|
|
368
386
|
// is no idb "clear", so we rely on the field being empty after focus. The
|
|
369
387
|
// vision agent typically taps an empty field, so this matches Android's
|
|
370
388
|
// common path; a true select-all clear isn't exposed by idb.
|
|
@@ -470,10 +488,10 @@ export class IOSDevice {
|
|
|
470
488
|
* do drive the system gesture) when no back button is visible.
|
|
471
489
|
*/
|
|
472
490
|
async navigateBack() {
|
|
473
|
-
const nodes =
|
|
491
|
+
const nodes = parseXcuiHierarchy(await describeAll(this.udid));
|
|
474
492
|
const back = this.findBackButton(nodes);
|
|
475
493
|
if (back) {
|
|
476
|
-
const c = boundsCenter(back.bounds); // POINTS —
|
|
494
|
+
const c = boundsCenter(back.bounds); // POINTS — WDA taps directly
|
|
477
495
|
await uiTap(this.udid, c.x, c.y);
|
|
478
496
|
return;
|
|
479
497
|
}
|
|
@@ -483,6 +501,39 @@ export class IOSDevice {
|
|
|
483
501
|
const midY = Math.round(this.pointHeight / 2);
|
|
484
502
|
await uiSwipe(this.udid, 1, midY, Math.round(this.pointWidth * 0.5), midY, 300);
|
|
485
503
|
}
|
|
504
|
+
/**
|
|
505
|
+
* Best-effort open of an iOS system panel by swiping down from the top edge.
|
|
506
|
+
* iOS has no `cmd statusbar` equivalent, so on a Face-ID layout:
|
|
507
|
+
* - notifications → Notification Center: swipe down from the top-CENTER.
|
|
508
|
+
* - quick_settings → Control Center: swipe down from the top-RIGHT corner.
|
|
509
|
+
* Coordinates are POINTS (idb consumes points; see toPoints()/the swipe()
|
|
510
|
+
* helper). This is FLAKY on the simulator — idb's synthetic touch frequently
|
|
511
|
+
* doesn't trigger the system edge gesture (the same limitation navigateBack's
|
|
512
|
+
* edge-swipe hits). We compare a before/after screenshot and log LOUDLY when
|
|
513
|
+
* the screen didn't change, rather than silently reporting success, so a
|
|
514
|
+
* no-op is visible in the run. The executeAction caller still returns
|
|
515
|
+
* success:true (the gesture was attempted); the loud log is the signal.
|
|
516
|
+
*/
|
|
517
|
+
async openSystemPanel(panel) {
|
|
518
|
+
const before = await screenshotPng();
|
|
519
|
+
const w = this.pointWidth;
|
|
520
|
+
const h = this.pointHeight;
|
|
521
|
+
// Start ON the top edge and travel a third of the screen down. Control
|
|
522
|
+
// Center lives under the top-right (battery/status) corner on Face-ID
|
|
523
|
+
// devices; Notification Center under the top-center notch area.
|
|
524
|
+
const startX = panel === "quick_settings" ? Math.round(w * 0.92) : Math.round(w * 0.5);
|
|
525
|
+
const startY = 1;
|
|
526
|
+
const endY = Math.round(h * 0.35);
|
|
527
|
+
await uiSwipe(this.udid, startX, startY, startX, endY, 350);
|
|
528
|
+
await settle();
|
|
529
|
+
// Loudly surface a no-op: the simulator's synthetic touch often can't drive
|
|
530
|
+
// the system edge gesture. An identical screenshot means the panel didn't open.
|
|
531
|
+
const after = await screenshotPng();
|
|
532
|
+
if (before.equals(after)) {
|
|
533
|
+
this.log(`open_system_panel (${panel}): top-edge swipe produced no visible change — ` +
|
|
534
|
+
`the simulator's synthetic touch likely didn't trigger the system gesture (flaky on the simulator).`);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
486
537
|
/**
|
|
487
538
|
* The nav-bar back button: the leading (leftmost) actionable button in the
|
|
488
539
|
* top nav-bar band. iOS HIG guarantees "back" is the leading nav item in a
|
|
@@ -526,7 +577,7 @@ export class IOSDevice {
|
|
|
526
577
|
// the agent can adapt.
|
|
527
578
|
const hint = {
|
|
528
579
|
pinch_zoom: "no multi-touch on the native driver; the agent should zoom via double_tap",
|
|
529
|
-
rotate_device: "
|
|
580
|
+
rotate_device: "rotation is not wired on the native driver; leave orientation as-is",
|
|
530
581
|
keyboard_shortcut: "no hardware keyboard on the native driver; use on-screen taps/text_input",
|
|
531
582
|
switch_tab: "tabs are a browser concept; the native app has a single window",
|
|
532
583
|
close_tab: "tabs are a browser concept; the native app has a single window",
|
|
@@ -540,7 +591,9 @@ export class IOSDevice {
|
|
|
540
591
|
return "";
|
|
541
592
|
}
|
|
542
593
|
async close() {
|
|
543
|
-
//
|
|
544
|
-
//
|
|
594
|
+
// Tear down the WebDriverAgent session (the runner is left installed on the
|
|
595
|
+
// shared simulator for the next run). The app resets via launchOrReset; no
|
|
596
|
+
// IME state to restore on iOS.
|
|
597
|
+
await closeWda(this.udid);
|
|
545
598
|
}
|
|
546
599
|
}
|
|
@@ -49,6 +49,7 @@ export function flattenAction(raw, nodeId = null, nodeDescription = null) {
|
|
|
49
49
|
key: a.key ?? null,
|
|
50
50
|
tab_id: a.tab_id ?? null,
|
|
51
51
|
orientation: a.orientation ?? null,
|
|
52
|
+
panel: a.panel ?? null,
|
|
52
53
|
scale: a.scale ?? null,
|
|
53
54
|
// Native path: ResolvedAction.coordinates (top level of the resolved_actions
|
|
54
55
|
// entry) is the single {x,y} execution point. Fall back to the nested action
|
|
@@ -431,6 +432,7 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
431
432
|
...(action.key && { key: action.key }),
|
|
432
433
|
...(action.tab_id && { tab_id: action.tab_id }),
|
|
433
434
|
...(action.orientation && { orientation: action.orientation }),
|
|
435
|
+
...(action.panel && { panel: action.panel }),
|
|
434
436
|
// The recorded `coordinates` is the drag START; persist the END
|
|
435
437
|
// (normalized 0-1000) too so the journey captures the full path.
|
|
436
438
|
...(action.drag && { drag_end: { x: action.drag.endX, y: action.drag.endY } }),
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*
|
|
13
13
|
* COORDINATE SPACE — carried, not converted, by this module:
|
|
14
14
|
* - Android `uiautomator dump` bounds are screencap PIXELS (`space: "px"`).
|
|
15
|
-
* - iOS
|
|
15
|
+
* - iOS WebDriverAgent /source frames are POINTS (`space: "points"`).
|
|
16
16
|
* The device de-normalizes/taps in its own space (AndroidDevice taps pixels;
|
|
17
17
|
* IOSDevice taps points), so the `space` tag tells the caller which dimension a
|
|
18
18
|
* node's bounds-center belongs to. This module never mixes the two.
|
|
@@ -65,15 +65,29 @@ export interface NativeTree {
|
|
|
65
65
|
*/
|
|
66
66
|
export declare function parseUiautomatorXml(xml: string): NativeNode[];
|
|
67
67
|
/**
|
|
68
|
-
* Parse `
|
|
69
|
-
*
|
|
70
|
-
*
|
|
71
|
-
*
|
|
68
|
+
* Parse WDA's `GET /source?format=json` — a NESTED accessibility tree — into the
|
|
69
|
+
* FLAT, depth-first `NativeNode[]` (POINTS) that `parseXcuiHierarchy` produces,
|
|
70
|
+
* so `serializeNativeTree` consumes it unchanged. WDA's `type` matches idb's iOS
|
|
71
|
+
* types (Button/StaticText/SearchField/Cell/Image/Application…), so
|
|
72
|
+
* `normalizeRole`/`IOS_ACTIONABLE_TYPES`/`frameToBounds` all apply as-is.
|
|
73
|
+
*
|
|
74
|
+
* KEY: WDA's `/source` is the FULL XCUIElement tree — every container and leaf —
|
|
75
|
+
* NOT idb's clean accessibility-elements list. iOS settings rows surface as an
|
|
76
|
+
* accessible `Button` ("General", isAccessible=1) that ALSO contains a duplicate
|
|
77
|
+
* inner `StaticText` ("General", isAccessible=0) and is wrapped in a `Cell`
|
|
78
|
+
* (isAccessible=0). Emitting all three yields "General General" + empty
|
|
79
|
+
* listitems. So we emit ONLY `isAccessible && isVisible` nodes — exactly the
|
|
80
|
+
* VoiceOver-exposed set idb returned: the labeled Button is both the label and
|
|
81
|
+
* the tap target; the duplicate StaticText and the wrapping Cell are pruned. A
|
|
82
|
+
* sparse a11y tree degrades to the loop's vision fallback, so strict filtering
|
|
83
|
+
* never strands the run.
|
|
84
|
+
*
|
|
85
|
+
* Accepts either the raw tree or the W3C `{ value: <tree> }` envelope WDA returns.
|
|
72
86
|
*/
|
|
73
|
-
export declare function
|
|
87
|
+
export declare function parseXcuiHierarchy(json: string): NativeNode[];
|
|
74
88
|
/**
|
|
75
89
|
* Serialize a flat NativeNode list (from `parseUiautomatorXml` /
|
|
76
|
-
* `
|
|
90
|
+
* `parseXcuiHierarchy`) into the `[id] role "label"` string the DOMLocator
|
|
77
91
|
* reasons over, plus a `shortId → bounds` map for local tap resolution.
|
|
78
92
|
*
|
|
79
93
|
* Emission rules (kept tight, like the DOM serializer):
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*
|
|
13
13
|
* COORDINATE SPACE — carried, not converted, by this module:
|
|
14
14
|
* - Android `uiautomator dump` bounds are screencap PIXELS (`space: "px"`).
|
|
15
|
-
* - iOS
|
|
15
|
+
* - iOS WebDriverAgent /source frames are POINTS (`space: "points"`).
|
|
16
16
|
* The device de-normalizes/taps in its own space (AndroidDevice taps pixels;
|
|
17
17
|
* IOSDevice taps points), so the `space` tag tells the caller which dimension a
|
|
18
18
|
* node's bounds-center belongs to. This module never mixes the two.
|
|
@@ -50,7 +50,7 @@ const ROLE_NORMALIZATION = {
|
|
|
50
50
|
ScrollView: "generic",
|
|
51
51
|
RecyclerView: "list",
|
|
52
52
|
ListView: "list",
|
|
53
|
-
// iOS (
|
|
53
|
+
// iOS (WDA / XCUITest `type`, AX-prefixed `role` handled by stripAxPrefix below).
|
|
54
54
|
StaticText: "text",
|
|
55
55
|
TextField: "textbox",
|
|
56
56
|
SecureTextField: "textbox",
|
|
@@ -181,6 +181,9 @@ function buildAndroidTree(xml) {
|
|
|
181
181
|
function makeRawAndroidNode(role, text, contentDesc, resourceId, clickable, bounds) {
|
|
182
182
|
return { role, text, contentDesc, resourceId, clickable, bounds, children: [] };
|
|
183
183
|
}
|
|
184
|
+
// ---------------------------------------------------------------------------
|
|
185
|
+
// iOS — shared helpers for the WebDriverAgent (XCUITest) /source parser below
|
|
186
|
+
// ---------------------------------------------------------------------------
|
|
184
187
|
/** iOS roles/types that are directly actionable (the device taps their center). */
|
|
185
188
|
const IOS_ACTIONABLE_TYPES = new Set([
|
|
186
189
|
"Button",
|
|
@@ -195,50 +198,7 @@ const IOS_ACTIONABLE_TYPES = new Set([
|
|
|
195
198
|
"MenuItem",
|
|
196
199
|
"Tab",
|
|
197
200
|
]);
|
|
198
|
-
|
|
199
|
-
* Parse `idb ui describe-all` JSON (a FLAT array of elements, each with a `frame`
|
|
200
|
-
* in POINTS) into NativeNodes in array order. iOS is already a flat,
|
|
201
|
-
* properly-labeled list — no ancestor walk needed — so `clickable` is derived
|
|
202
|
-
* from the element's role/type and whether it carries a usable label.
|
|
203
|
-
*/
|
|
204
|
-
export function parseIdbDescribeAll(json) {
|
|
205
|
-
let parsed;
|
|
206
|
-
try {
|
|
207
|
-
parsed = JSON.parse(json);
|
|
208
|
-
}
|
|
209
|
-
catch {
|
|
210
|
-
return [];
|
|
211
|
-
}
|
|
212
|
-
if (!Array.isArray(parsed))
|
|
213
|
-
return [];
|
|
214
|
-
const out = [];
|
|
215
|
-
for (const raw of parsed) {
|
|
216
|
-
const bounds = idbFrameToBounds(raw.frame);
|
|
217
|
-
if (!bounds)
|
|
218
|
-
continue; // malformed / zero-area frame → no tappable center
|
|
219
|
-
// Label: prefer the spoken AXLabel; fall back to AXValue (search fields
|
|
220
|
-
// expose their placeholder as AXValue, e.g. "Search"). AXValue is only a
|
|
221
|
-
// STRING fallback — switches/sliders/steppers report it as a number/boolean
|
|
222
|
-
// (a Switch is 1/0), and `.trim()` on those would throw and lose the whole
|
|
223
|
-
// tree to a silent vision fallback. An unlabeled toggle then emits as a bare
|
|
224
|
-
// `[id] switch` (still tappable via its frame center).
|
|
225
|
-
const label = (raw.AXLabel ?? (typeof raw.AXValue === "string" ? raw.AXValue : "")).trim();
|
|
226
|
-
const rawType = raw.type ?? (raw.role ? stripAxPrefix(raw.role) : "");
|
|
227
|
-
const typeKey = stripAxPrefix(rawType);
|
|
228
|
-
const actionable = IOS_ACTIONABLE_TYPES.has(typeKey) && raw.enabled !== false;
|
|
229
|
-
out.push({
|
|
230
|
-
role: normalizeRole(rawType),
|
|
231
|
-
label,
|
|
232
|
-
bounds,
|
|
233
|
-
clickable: actionable,
|
|
234
|
-
hasOwnLabel: label.length > 0,
|
|
235
|
-
resourceId: raw.AXUniqueId ?? undefined,
|
|
236
|
-
space: "points",
|
|
237
|
-
});
|
|
238
|
-
}
|
|
239
|
-
return out;
|
|
240
|
-
}
|
|
241
|
-
function idbFrameToBounds(frame) {
|
|
201
|
+
function frameToBounds(frame) {
|
|
242
202
|
if (!frame)
|
|
243
203
|
return null;
|
|
244
204
|
const { x, y, width, height } = frame;
|
|
@@ -254,6 +214,81 @@ function idbFrameToBounds(frame) {
|
|
|
254
214
|
}
|
|
255
215
|
return { x, y, width, height };
|
|
256
216
|
}
|
|
217
|
+
/** WDA's "1"/"0" (or real boolean) → boolean. */
|
|
218
|
+
function wdaTruthy(v) {
|
|
219
|
+
return v === true || v === "1";
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Parse WDA's `GET /source?format=json` — a NESTED accessibility tree — into the
|
|
223
|
+
* FLAT, depth-first `NativeNode[]` (POINTS) that `parseXcuiHierarchy` produces,
|
|
224
|
+
* so `serializeNativeTree` consumes it unchanged. WDA's `type` matches idb's iOS
|
|
225
|
+
* types (Button/StaticText/SearchField/Cell/Image/Application…), so
|
|
226
|
+
* `normalizeRole`/`IOS_ACTIONABLE_TYPES`/`frameToBounds` all apply as-is.
|
|
227
|
+
*
|
|
228
|
+
* KEY: WDA's `/source` is the FULL XCUIElement tree — every container and leaf —
|
|
229
|
+
* NOT idb's clean accessibility-elements list. iOS settings rows surface as an
|
|
230
|
+
* accessible `Button` ("General", isAccessible=1) that ALSO contains a duplicate
|
|
231
|
+
* inner `StaticText` ("General", isAccessible=0) and is wrapped in a `Cell`
|
|
232
|
+
* (isAccessible=0). Emitting all three yields "General General" + empty
|
|
233
|
+
* listitems. So we emit ONLY `isAccessible && isVisible` nodes — exactly the
|
|
234
|
+
* VoiceOver-exposed set idb returned: the labeled Button is both the label and
|
|
235
|
+
* the tap target; the duplicate StaticText and the wrapping Cell are pruned. A
|
|
236
|
+
* sparse a11y tree degrades to the loop's vision fallback, so strict filtering
|
|
237
|
+
* never strands the run.
|
|
238
|
+
*
|
|
239
|
+
* Accepts either the raw tree or the W3C `{ value: <tree> }` envelope WDA returns.
|
|
240
|
+
*/
|
|
241
|
+
export function parseXcuiHierarchy(json) {
|
|
242
|
+
let parsed;
|
|
243
|
+
try {
|
|
244
|
+
parsed = JSON.parse(json);
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
return [];
|
|
248
|
+
}
|
|
249
|
+
// WDA returns the tree under a W3C `{ value: <tree>, sessionId }` envelope, but
|
|
250
|
+
// a raw tree NODE also has its own `value` field (the element's value) — so we
|
|
251
|
+
// can't unwrap on `"value" in parsed` alone. The actual tree root is the one
|
|
252
|
+
// carrying a node-shaped `type`; only unwrap `value` when the top level is NOT
|
|
253
|
+
// itself a node.
|
|
254
|
+
const obj = parsed;
|
|
255
|
+
const root = obj && typeof obj === "object" && !("type" in obj) && "value" in obj
|
|
256
|
+
? obj.value
|
|
257
|
+
: obj;
|
|
258
|
+
if (!root || typeof root !== "object")
|
|
259
|
+
return [];
|
|
260
|
+
const out = [];
|
|
261
|
+
const visit = (n) => {
|
|
262
|
+
const bounds = frameToBounds(n.rect ?? undefined);
|
|
263
|
+
if (bounds && wdaTruthy(n.isAccessible) && wdaTruthy(n.isVisible)) {
|
|
264
|
+
// Prefer the spoken label; fall back to a STRING value (search fields
|
|
265
|
+
// expose their placeholder as `value`). Non-string values (a Switch's 1/0)
|
|
266
|
+
// are ignored for the label, exactly like the idb path.
|
|
267
|
+
const label = (n.label ?? (typeof n.value === "string" ? n.value : "")).trim();
|
|
268
|
+
const rawType = n.type ?? "";
|
|
269
|
+
const typeKey = stripAxPrefix(rawType);
|
|
270
|
+
// `isEnabled` absent ⇒ assume enabled (WDA omits it on always-enabled types).
|
|
271
|
+
const enabled = n.isEnabled == null ? true : wdaTruthy(n.isEnabled);
|
|
272
|
+
const actionable = IOS_ACTIONABLE_TYPES.has(typeKey) && enabled;
|
|
273
|
+
out.push({
|
|
274
|
+
role: normalizeRole(rawType),
|
|
275
|
+
label,
|
|
276
|
+
bounds,
|
|
277
|
+
clickable: actionable,
|
|
278
|
+
hasOwnLabel: label.length > 0,
|
|
279
|
+
resourceId: (n.name || n.rawIdentifier) ?? undefined,
|
|
280
|
+
space: "points",
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
// Recurse into ALL children — an accessible element can nest inside a
|
|
284
|
+
// non-accessible container (the Cell wrapping the Button), so we must not
|
|
285
|
+
// prune the walk by accessibility, only the emission.
|
|
286
|
+
for (const c of n.children ?? [])
|
|
287
|
+
visit(c);
|
|
288
|
+
};
|
|
289
|
+
visit(root);
|
|
290
|
+
return out;
|
|
291
|
+
}
|
|
257
292
|
// ---------------------------------------------------------------------------
|
|
258
293
|
// Serialization — flat NativeNode list → `[id] role "label"` + nodeMap
|
|
259
294
|
// ---------------------------------------------------------------------------
|
|
@@ -271,7 +306,7 @@ function normalizeLabel(label) {
|
|
|
271
306
|
}
|
|
272
307
|
/**
|
|
273
308
|
* Serialize a flat NativeNode list (from `parseUiautomatorXml` /
|
|
274
|
-
* `
|
|
309
|
+
* `parseXcuiHierarchy`) into the `[id] role "label"` string the DOMLocator
|
|
275
310
|
* reasons over, plus a `shortId → bounds` map for local tap resolution.
|
|
276
311
|
*
|
|
277
312
|
* Emission rules (kept tight, like the DOM serializer):
|
|
@@ -1,38 +1,34 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Thin async wrappers over `xcrun simctl`
|
|
2
|
+
* Thin async wrappers over `xcrun simctl` for the native-iOS sim path: simulator
|
|
3
|
+
* LIFECYCLE (boot detection, install, terminate, launch) and the SCREENSHOT.
|
|
3
4
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* terminate, launch) and the SCREENSHOT.
|
|
7
|
-
* - `idb` drives UI INPUT (tap/swipe/text/key) and reports the screen
|
|
8
|
-
* geometry (pixels, points, and the scale between them).
|
|
5
|
+
* UI interaction + the accessibility tree live in `xcuitest.ts` (WebDriverAgent),
|
|
6
|
+
* NOT here — iOS no longer depends on idb.
|
|
9
7
|
*
|
|
10
8
|
* COORDINATE SPACES (the key difference from Android, where screencap and tap
|
|
11
9
|
* share one pixel space):
|
|
12
10
|
* - `simctl io booted screenshot` writes a PNG in PIXELS (e.g. 1179x2556 @3x).
|
|
13
|
-
* -
|
|
14
|
-
* The native sim TAPS in points (de-normalize 0-1000 against the POINT size)
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
* IOSDevice for the full derivation.
|
|
11
|
+
* - WebDriverAgent's taps/swipes + a11y frames are POINTS (e.g. 393x852).
|
|
12
|
+
* The native sim TAPS in points (de-normalize 0-1000 against the POINT size) but
|
|
13
|
+
* RECORDS in PIXELS: dimensions() returns the pixel size so the loop's round-trip
|
|
14
|
+
* is exact. Recording in points would drift — the point grid (393) is coarser
|
|
15
|
+
* than the 0-1000 normalized grid, so it double-rounds. See IOSDevice.
|
|
19
16
|
*/
|
|
20
17
|
export declare class IosError extends Error {
|
|
21
18
|
constructor(message: string);
|
|
22
19
|
}
|
|
23
20
|
/** Run `xcrun simctl <args>` and return trimmed stdout. */
|
|
24
21
|
export declare function simctl(args: string[], timeoutMs?: number): Promise<string>;
|
|
25
|
-
/** Run `idb <args>` and return trimmed stdout. */
|
|
26
|
-
export declare function idb(args: string[], timeoutMs?: number): Promise<string>;
|
|
27
22
|
/**
|
|
28
23
|
* Assert exactly one simulator is Booted and return its udid. We pin every
|
|
29
|
-
* subsequent
|
|
24
|
+
* subsequent simctl/WDA call (and the screenshot) to "booted", so multiple
|
|
30
25
|
* booted simulators are ambiguous and rejected.
|
|
31
26
|
*/
|
|
32
27
|
export declare function requireOneBootedSimulator(): Promise<string>;
|
|
33
28
|
/**
|
|
34
|
-
* Screen geometry
|
|
35
|
-
*
|
|
29
|
+
* Screen geometry: PIXEL size, POINT size, and the scale (`density`) between
|
|
30
|
+
* them. Produced by the XCUITest driver's `describeScreen` (xcuitest.ts) and
|
|
31
|
+
* consumed by IOSDevice — points drive WDA taps/swipes; pixels are the
|
|
36
32
|
* screenshot's resolution.
|
|
37
33
|
*/
|
|
38
34
|
export interface IosScreen {
|
|
@@ -42,38 +38,12 @@ export interface IosScreen {
|
|
|
42
38
|
pointHeight: number;
|
|
43
39
|
density: number;
|
|
44
40
|
}
|
|
45
|
-
export declare function describeScreen(udid: string): Promise<IosScreen>;
|
|
46
41
|
/**
|
|
47
42
|
* Capture the booted simulator's screen as PNG bytes via
|
|
48
43
|
* `simctl io booted screenshot`. simctl writes to a file path (no reliable
|
|
49
44
|
* stdout in current Xcode), so we round-trip through a temp file.
|
|
50
45
|
*/
|
|
51
46
|
export declare function screenshotPng(): Promise<Buffer>;
|
|
52
|
-
export declare function uiTap(udid: string, x: number, y: number): Promise<void>;
|
|
53
|
-
export declare function uiLongPress(udid: string, x: number, y: number, durationMs?: number): Promise<void>;
|
|
54
|
-
export declare function uiSwipe(udid: string, x1: number, y1: number, x2: number, y2: number, durationMs?: number): Promise<void>;
|
|
55
|
-
/**
|
|
56
|
-
* Type text into the focused field. Unlike Android's `adb shell input text`,
|
|
57
|
-
* `idb ui text` handles spaces/unicode/quotes correctly, so no helper IME is
|
|
58
|
-
* needed.
|
|
59
|
-
*/
|
|
60
|
-
export declare function uiText(udid: string, text: string): Promise<void>;
|
|
61
|
-
/**
|
|
62
|
-
* Press a hardware key by HID usage code. `idb ui key 40` is Return/Enter
|
|
63
|
-
* (used to submit a text field).
|
|
64
|
-
*/
|
|
65
|
-
export declare function uiKey(udid: string, keycode: number): Promise<void>;
|
|
66
|
-
/** HID usage code for Return/Enter. */
|
|
67
|
-
export declare const HID_KEY_RETURN = 40;
|
|
68
|
-
/**
|
|
69
|
-
* Capture the current accessibility tree as `idb ui describe-all` JSON (a flat
|
|
70
|
-
* array of elements, each with a POINT frame) and return it. Mirrors the
|
|
71
|
-
* oracle's `ios_describe`: right after a tap the tree can be mid-transition and
|
|
72
|
-
* come back empty/partial, so we retry until we get an array with more than just
|
|
73
|
-
* the root application node. Throws IosError if every attempt yields a trivial
|
|
74
|
-
* tree so the caller can degrade to the vision path.
|
|
75
|
-
*/
|
|
76
|
-
export declare function describeAll(udid: string): Promise<string>;
|
|
77
47
|
export declare function terminateApp(udid: string, bundleId: string): Promise<void>;
|
|
78
48
|
export declare function launchApp(udid: string, bundleId: string): Promise<void>;
|
|
79
49
|
export declare function installApp(udid: string, appPath: string): Promise<void>;
|