@ishlabs/cli 0.24.1 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/commands/ask.js +3 -3
  2. package/dist/commands/doctor.d.ts +26 -0
  3. package/dist/commands/doctor.js +334 -0
  4. package/dist/commands/iteration.js +1 -1
  5. package/dist/commands/study-analyze.js +1 -1
  6. package/dist/commands/study-run.js +80 -12
  7. package/dist/commands/study.js +11 -7
  8. package/dist/index.js +2 -0
  9. package/dist/lib/alias-store.js +1 -1
  10. package/dist/lib/api-client.d.ts +2 -0
  11. package/dist/lib/docs.js +57 -42
  12. package/dist/lib/local-sim/actions.d.ts +10 -2
  13. package/dist/lib/local-sim/actions.js +18 -11
  14. package/dist/lib/local-sim/adb.d.ts +113 -0
  15. package/dist/lib/local-sim/adb.js +366 -0
  16. package/dist/lib/local-sim/android.d.ts +111 -0
  17. package/dist/lib/local-sim/android.js +504 -0
  18. package/dist/lib/local-sim/apk-manifest.d.ts +22 -0
  19. package/dist/lib/local-sim/apk-manifest.js +210 -0
  20. package/dist/lib/local-sim/browser.d.ts +22 -0
  21. package/dist/lib/local-sim/browser.js +65 -0
  22. package/dist/lib/local-sim/coordinates.d.ts +69 -0
  23. package/dist/lib/local-sim/coordinates.js +59 -0
  24. package/dist/lib/local-sim/device.d.ts +143 -0
  25. package/dist/lib/local-sim/device.js +152 -0
  26. package/dist/lib/local-sim/ios.d.ts +185 -0
  27. package/dist/lib/local-sim/ios.js +599 -0
  28. package/dist/lib/local-sim/loop.d.ts +14 -2
  29. package/dist/lib/local-sim/loop.js +168 -73
  30. package/dist/lib/local-sim/native-a11y.d.ts +111 -0
  31. package/dist/lib/local-sim/native-a11y.js +419 -0
  32. package/dist/lib/local-sim/simctl.d.ts +55 -0
  33. package/dist/lib/local-sim/simctl.js +144 -0
  34. package/dist/lib/local-sim/types.d.ts +39 -2
  35. package/dist/lib/local-sim/upload.d.ts +1 -1
  36. package/dist/lib/local-sim/upload.js +9 -6
  37. package/dist/lib/local-sim/xcuitest.d.ts +60 -0
  38. package/dist/lib/local-sim/xcuitest.js +303 -0
  39. package/dist/lib/output.js +58 -12
  40. package/dist/lib/paths.d.ts +8 -0
  41. package/dist/lib/paths.js +12 -0
  42. package/dist/lib/skill-content.js +10 -9
  43. package/package.json +2 -1
@@ -0,0 +1,419 @@
1
+ /**
2
+ * Pure parser/serializer for native (Android/iOS) accessibility trees — the
3
+ * native counterpart of the browser's DOM-locator tree. It turns a raw device
4
+ * a11y dump into the SAME `[id] role "name"` string the backend's DOMLocator
5
+ * reasons over, plus a local `shortId → bounds` map the device taps the CENTER
6
+ * of. No bounds ship to the backend; like the browser path, the CLI keeps the
7
+ * map and resolves the LLM's returned short id locally.
8
+ *
9
+ * FCIS: this module is pure (string in, structs out) — no `adb`/`idb` I/O — so
10
+ * it's unit-testable without a device, exactly like `coordinates.ts`. The I/O
11
+ * lives in `adb.ts`/`simctl.ts`; the parse/serialize math lives here.
12
+ *
13
+ * COORDINATE SPACE — carried, not converted, by this module:
14
+ * - Android `uiautomator dump` bounds are screencap PIXELS (`space: "px"`).
15
+ * - iOS WebDriverAgent /source frames are POINTS (`space: "points"`).
16
+ * The device de-normalizes/taps in its own space (AndroidDevice taps pixels;
17
+ * IOSDevice taps points), so the `space` tag tells the caller which dimension a
18
+ * node's bounds-center belongs to. This module never mixes the two.
19
+ *
20
+ * ANCESTOR-VS-LEAF (the hard part): on Android the visible label
21
+ * ("Network & internet") sits on a `clickable=false` TextView nested inside the
22
+ * clickable PARENT row. Tapping the leaf's center misses the row's hit logic and
23
+ * lands "slightly off"; the click target is the row. So the serializer walks to
24
+ * the nearest clickable ANCESTOR, aggregates its descendants' text/content-desc
25
+ * into ONE label, and emits the CLICKABLE node WITH THE ROW'S BOUNDS — never the
26
+ * leaf. iOS Buttons are already labeled + actionable, so they emit directly.
27
+ */
28
+ /**
29
+ * Native role → ARIA synonym so the DOMLocator prompt sees the same vocabulary
30
+ * it does for the web (Option A from the Phase-0 spike). One small lookup so the
31
+ * map is trivial to tweak once the spike finalizes it; unknown roles pass
32
+ * through lower-cased (better a literal native role than a dropped node).
33
+ */
34
+ const ROLE_NORMALIZATION = {
35
+ // Android (android.widget.* / android.view.*, matched on the leaf class name).
36
+ Button: "button",
37
+ ImageButton: "button",
38
+ EditText: "textbox",
39
+ AutoCompleteTextView: "textbox",
40
+ TextView: "text",
41
+ CheckBox: "checkbox",
42
+ RadioButton: "radio",
43
+ Switch: "switch",
44
+ ToggleButton: "switch",
45
+ ImageView: "image",
46
+ ViewGroup: "generic",
47
+ LinearLayout: "generic",
48
+ FrameLayout: "generic",
49
+ RelativeLayout: "generic",
50
+ ScrollView: "generic",
51
+ RecyclerView: "list",
52
+ ListView: "list",
53
+ // iOS (WDA / XCUITest `type`, AX-prefixed `role` handled by stripAxPrefix below).
54
+ StaticText: "text",
55
+ TextField: "textbox",
56
+ SecureTextField: "textbox",
57
+ SearchField: "searchbox",
58
+ Cell: "listitem",
59
+ Heading: "heading",
60
+ Image: "image",
61
+ Group: "generic",
62
+ Link: "link",
63
+ Application: "application",
64
+ };
65
+ /** Roles that are pure layout containers — only kept if they're a tappable row. */
66
+ const DECORATIVE_GENERIC_ROLES = new Set(["generic", "application"]);
67
+ function normalizeRole(rawRole) {
68
+ const key = stripAxPrefix(rawRole);
69
+ return ROLE_NORMALIZATION[key] ?? key.toLowerCase();
70
+ }
71
+ /** "AXButton" → "Button", "AXStaticText" → "StaticText"; non-AX passes through. */
72
+ function stripAxPrefix(role) {
73
+ return role.startsWith("AX") ? role.slice(2) : role;
74
+ }
75
+ const ANDROID_BOUNDS_RE = /^\[(-?\d+),(-?\d+)\]\[(-?\d+),(-?\d+)\]$/;
76
+ /** Parse `bounds="[x1,y1][x2,y2]"` → Bounds, or null if malformed/zero-area. */
77
+ function parseAndroidBounds(raw) {
78
+ const m = ANDROID_BOUNDS_RE.exec(raw.trim());
79
+ if (!m)
80
+ return null;
81
+ const x1 = Number(m[1]);
82
+ const y1 = Number(m[2]);
83
+ const x2 = Number(m[3]);
84
+ const y2 = Number(m[4]);
85
+ const width = x2 - x1;
86
+ const height = y2 - y1;
87
+ // A zero-area rect ([0,0][0,0], collapsed/off-screen rows) has no tappable
88
+ // center — treat it as malformed so it's dropped, not tapped at (0,0).
89
+ if (width <= 0 || height <= 0)
90
+ return null;
91
+ return { x: x1, y: y1, width, height };
92
+ }
93
+ /** Last segment of a dotted class, e.g. "android.widget.TextView" → "TextView". */
94
+ function androidLeafClass(cls) {
95
+ const dot = cls.lastIndexOf(".");
96
+ return dot >= 0 ? cls.slice(dot + 1) : cls;
97
+ }
98
+ function attr(tag, name) {
99
+ // uiautomator attributes are double-quoted with XML entity escapes. Match the
100
+ // literal attr and unescape the handful of entities uiautomator emits.
101
+ const m = new RegExp(`\\s${name}="([^"]*)"`).exec(tag);
102
+ return m ? unescapeXml(m[1]) : "";
103
+ }
104
+ function unescapeXml(s) {
105
+ return s
106
+ .replace(/&amp;/g, "&")
107
+ .replace(/&lt;/g, "<")
108
+ .replace(/&gt;/g, ">")
109
+ .replace(/&quot;/g, '"')
110
+ .replace(/&apos;/g, "'");
111
+ }
112
+ /**
113
+ * Parse a uiautomator XML dump into a flat list of leaf-significant nodes in
114
+ * document order. The dump is a single line of nested `<node ...>` tags; we
115
+ * rebuild the parent/child nesting from the open/close-tag stream (mirroring the
116
+ * "break after `>`" split the oracle scripts use, but tracking depth so the
117
+ * ancestor-aggregation in `serializeNativeTree` has the real tree).
118
+ *
119
+ * Returns the FLATTENED set of nodes (depth-first, document order) with their
120
+ * raw fields; the serializer decides which to emit and how to aggregate.
121
+ */
122
+ export function parseUiautomatorXml(xml) {
123
+ const root = buildAndroidTree(xml);
124
+ const out = [];
125
+ const visit = (n) => {
126
+ // Drop nodes with no usable bounds (malformed/zero-area) — they have no
127
+ // tappable center and would corrupt the nodeMap.
128
+ if (n.bounds) {
129
+ const label = n.text || n.contentDesc;
130
+ out.push({
131
+ role: normalizeRole(androidLeafClass(n.role)),
132
+ label,
133
+ bounds: n.bounds,
134
+ clickable: n.clickable,
135
+ hasOwnLabel: label.length > 0,
136
+ resourceId: n.resourceId || undefined,
137
+ space: "px",
138
+ });
139
+ }
140
+ for (const c of n.children)
141
+ visit(c);
142
+ };
143
+ for (const c of root.children)
144
+ visit(c);
145
+ return out;
146
+ }
147
+ /**
148
+ * Rebuild the uiautomator node tree from the flat tag stream. uiautomator emits
149
+ * `<node ...>...</node>` (container) and `<node .../>` (self-closing leaf) on one
150
+ * line; we tokenize the tags and use an explicit stack so each node's children
151
+ * are its true descendants — required for ancestor-vs-leaf aggregation.
152
+ */
153
+ function buildAndroidTree(xml) {
154
+ const root = makeRawAndroidNode("", "", "", "", false, null);
155
+ const stack = [root];
156
+ // Match every <node ...> / <node .../> open tag and standalone </node> close.
157
+ // Attribute values are consumed as atomic quoted runs (`"[^"]*"`) so a literal
158
+ // `>` INSIDE a value can't terminate the tag early — uiautomator escapes `&`,
159
+ // `<`, `"` but NOT `>`, so a label like text="Home > Settings" (breadcrumbs)
160
+ // would otherwise truncate the tag and drop the whole node.
161
+ const tagRe = /<node\b(?:[^>"]|"[^"]*")*>|<\/node>/g;
162
+ let m;
163
+ while ((m = tagRe.exec(xml)) !== null) {
164
+ const tag = m[0];
165
+ if (tag === "</node>") {
166
+ if (stack.length > 1)
167
+ stack.pop();
168
+ continue;
169
+ }
170
+ // Self-closing is read off the matched tag (`.../>`), not a capture group:
171
+ // the greedy run above swallows the trailing slash, so a `(\/?)` capture
172
+ // can't see it.
173
+ const selfClosing = tag.endsWith("/>");
174
+ const node = makeRawAndroidNode(attr(tag, "class"), attr(tag, "text"), attr(tag, "content-desc"), attr(tag, "resource-id"), attr(tag, "clickable") === "true", parseAndroidBounds(attr(tag, "bounds")));
175
+ stack[stack.length - 1].children.push(node);
176
+ if (!selfClosing)
177
+ stack.push(node);
178
+ }
179
+ return root;
180
+ }
181
+ function makeRawAndroidNode(role, text, contentDesc, resourceId, clickable, bounds) {
182
+ return { role, text, contentDesc, resourceId, clickable, bounds, children: [] };
183
+ }
184
+ // ---------------------------------------------------------------------------
185
+ // iOS — shared helpers for the WebDriverAgent (XCUITest) /source parser below
186
+ // ---------------------------------------------------------------------------
187
+ /** iOS roles/types that are directly actionable (the device taps their center). */
188
+ const IOS_ACTIONABLE_TYPES = new Set([
189
+ "Button",
190
+ "Link",
191
+ "TextField",
192
+ "SecureTextField",
193
+ "SearchField",
194
+ "Cell",
195
+ "Switch",
196
+ "Checkbox",
197
+ "RadioButton",
198
+ "MenuItem",
199
+ "Tab",
200
+ ]);
201
+ function frameToBounds(frame) {
202
+ if (!frame)
203
+ return null;
204
+ const { x, y, width, height } = frame;
205
+ if (typeof x !== "number" ||
206
+ typeof y !== "number" ||
207
+ typeof width !== "number" ||
208
+ typeof height !== "number" ||
209
+ !Number.isFinite(x) ||
210
+ !Number.isFinite(y) ||
211
+ width <= 0 ||
212
+ height <= 0) {
213
+ return null;
214
+ }
215
+ return { x, y, width, height };
216
+ }
217
+ /** WDA's "1"/"0" (or real boolean) → boolean. */
218
+ function wdaTruthy(v) {
219
+ return v === true || v === "1";
220
+ }
221
+ /**
222
+ * Parse WDA's `GET /source?format=json` — a NESTED accessibility tree — into the
223
+ * FLAT, depth-first `NativeNode[]` (POINTS) that `parseXcuiHierarchy` produces,
224
+ * so `serializeNativeTree` consumes it unchanged. WDA's `type` matches idb's iOS
225
+ * types (Button/StaticText/SearchField/Cell/Image/Application…), so
226
+ * `normalizeRole`/`IOS_ACTIONABLE_TYPES`/`frameToBounds` all apply as-is.
227
+ *
228
+ * KEY: WDA's `/source` is the FULL XCUIElement tree — every container and leaf —
229
+ * NOT idb's clean accessibility-elements list. iOS settings rows surface as an
230
+ * accessible `Button` ("General", isAccessible=1) that ALSO contains a duplicate
231
+ * inner `StaticText` ("General", isAccessible=0) and is wrapped in a `Cell`
232
+ * (isAccessible=0). Emitting all three yields "General General" + empty
233
+ * listitems. So we emit ONLY `isAccessible && isVisible` nodes — exactly the
234
+ * VoiceOver-exposed set idb returned: the labeled Button is both the label and
235
+ * the tap target; the duplicate StaticText and the wrapping Cell are pruned. A
236
+ * sparse a11y tree degrades to the loop's vision fallback, so strict filtering
237
+ * never strands the run.
238
+ *
239
+ * Accepts either the raw tree or the W3C `{ value: <tree> }` envelope WDA returns.
240
+ */
241
+ export function parseXcuiHierarchy(json) {
242
+ let parsed;
243
+ try {
244
+ parsed = JSON.parse(json);
245
+ }
246
+ catch {
247
+ return [];
248
+ }
249
+ // WDA returns the tree under a W3C `{ value: <tree>, sessionId }` envelope, but
250
+ // a raw tree NODE also has its own `value` field (the element's value) — so we
251
+ // can't unwrap on `"value" in parsed` alone. The actual tree root is the one
252
+ // carrying a node-shaped `type`; only unwrap `value` when the top level is NOT
253
+ // itself a node.
254
+ const obj = parsed;
255
+ const root = obj && typeof obj === "object" && !("type" in obj) && "value" in obj
256
+ ? obj.value
257
+ : obj;
258
+ if (!root || typeof root !== "object")
259
+ return [];
260
+ const out = [];
261
+ const visit = (n) => {
262
+ const bounds = frameToBounds(n.rect ?? undefined);
263
+ if (bounds && wdaTruthy(n.isAccessible) && wdaTruthy(n.isVisible)) {
264
+ // Prefer the spoken label; fall back to a STRING value (search fields
265
+ // expose their placeholder as `value`). Non-string values (a Switch's 1/0)
266
+ // are ignored for the label, exactly like the idb path.
267
+ const label = (n.label ?? (typeof n.value === "string" ? n.value : "")).trim();
268
+ const rawType = n.type ?? "";
269
+ const typeKey = stripAxPrefix(rawType);
270
+ // `isEnabled` absent ⇒ assume enabled (WDA omits it on always-enabled types).
271
+ const enabled = n.isEnabled == null ? true : wdaTruthy(n.isEnabled);
272
+ const actionable = IOS_ACTIONABLE_TYPES.has(typeKey) && enabled;
273
+ out.push({
274
+ role: normalizeRole(rawType),
275
+ label,
276
+ bounds,
277
+ clickable: actionable,
278
+ hasOwnLabel: label.length > 0,
279
+ resourceId: (n.name || n.rawIdentifier) ?? undefined,
280
+ space: "points",
281
+ });
282
+ }
283
+ // Recurse into ALL children — an accessible element can nest inside a
284
+ // non-accessible container (the Cell wrapping the Button), so we must not
285
+ // prune the walk by accessibility, only the emission.
286
+ for (const c of n.children ?? [])
287
+ visit(c);
288
+ };
289
+ visit(root);
290
+ return out;
291
+ }
292
+ // ---------------------------------------------------------------------------
293
+ // Serialization — flat NativeNode list → `[id] role "label"` + nodeMap
294
+ // ---------------------------------------------------------------------------
295
+ const NODE_LABEL_MAX_LENGTH = 100;
296
+ /** Stable short-id prefix per space so a mixed log is unambiguous. */
297
+ function shortIdPrefix(space) {
298
+ return space === "px" ? "A" : "I";
299
+ }
300
+ function truncate(text, max) {
301
+ return text.length <= max ? text : text.slice(0, max - 1) + "…";
302
+ }
303
+ /** Collapse runs of whitespace so aggregated multi-line labels read on one line. */
304
+ function normalizeLabel(label) {
305
+ return label.replace(/\s+/g, " ").trim();
306
+ }
307
+ /**
308
+ * Serialize a flat NativeNode list (from `parseUiautomatorXml` /
309
+ * `parseXcuiHierarchy`) into the `[id] role "label"` string the DOMLocator
310
+ * reasons over, plus a `shortId → bounds` map for local tap resolution.
311
+ *
312
+ * Emission rules (kept tight, like the DOM serializer):
313
+ * - ANCESTOR-VS-LEAF: a CLICKABLE node absorbs its descendants' labels and is
314
+ * emitted with ITS OWN bounds (the tappable row). The descendant
315
+ * label-bearing leaves are then NOT emitted on their own — their text lives
316
+ * on the row. A label-bearing leaf with NO clickable ancestor (e.g. a
317
+ * standalone heading) is emitted directly so on-screen text isn't lost.
318
+ * - Skip pure decoration: a node that is neither clickable nor label-bearing,
319
+ * and a generic/application container that didn't aggregate a label.
320
+ *
321
+ * The input list is depth-first / document order, which is the order the raw
322
+ * parsers produce; we recover ancestry from that order using bounds containment
323
+ * (Android leaves nest inside their clickable row's rect; iOS is already flat).
324
+ */
325
+ export function serializeNativeTree(nodes) {
326
+ const space = nodes[0]?.space ?? "px";
327
+ const prefix = shortIdPrefix(space);
328
+ // 1) Aggregate descendant labels onto their nearest clickable ancestor. A
329
+ // descendant is a label-bearing node whose bounds sit inside a clickable
330
+ // node's bounds (and is not itself the clickable node). Document order
331
+ // guarantees an ancestor appears before its descendants on Android.
332
+ const clickables = nodes.filter((n) => n.clickable);
333
+ const aggregatedLabel = new Map();
334
+ const consumedAsDescendant = new Set();
335
+ for (const node of nodes) {
336
+ if (!node.hasOwnLabel)
337
+ continue;
338
+ // Find the smallest clickable rect that contains this label node. Skip the
339
+ // node itself (a clickable node keeps its own label).
340
+ const host = smallestContainingClickable(node, clickables);
341
+ if (host && host !== node) {
342
+ const bucket = aggregatedLabel.get(host) ?? [];
343
+ bucket.push(node.label);
344
+ aggregatedLabel.set(host, bucket);
345
+ consumedAsDescendant.add(node);
346
+ }
347
+ }
348
+ // 2) Emit. Walk in document order so ids follow the on-screen reading order.
349
+ const lines = [];
350
+ const nodeMap = new Map();
351
+ let counter = 0;
352
+ for (const node of nodes) {
353
+ if (consumedAsDescendant.has(node))
354
+ continue; // its label moved to the row
355
+ const aggregated = aggregatedLabel.get(node);
356
+ const ownLabel = node.label;
357
+ // A clickable node's label = its own label + any absorbed descendant labels;
358
+ // its own label leads (iOS Buttons carry it directly).
359
+ const combined = normalizeLabel([ownLabel, ...(aggregated ?? [])].filter(Boolean).join(" "));
360
+ const emit = shouldEmit(node, combined);
361
+ if (!emit)
362
+ continue;
363
+ counter += 1;
364
+ const shortId = `${prefix}${counter}`;
365
+ const label = truncate(combined, NODE_LABEL_MAX_LENGTH);
366
+ const labelPart = label ? ` "${label}"` : "";
367
+ lines.push(`[${shortId}] ${node.role}${labelPart}`);
368
+ nodeMap.set(shortId, node.bounds);
369
+ }
370
+ return { simplified: lines.join("\n"), nodeMap };
371
+ }
372
+ /**
373
+ * Decide whether a node makes the simplified tree. Keep it if it's actionable,
374
+ * or it carries (aggregated) text and isn't a bare layout container. Drop pure
375
+ * decoration: unlabeled non-clickable nodes, and generic/application containers
376
+ * that absorbed no label.
377
+ */
378
+ function shouldEmit(node, combinedLabel) {
379
+ if (node.clickable)
380
+ return true;
381
+ if (!combinedLabel)
382
+ return false;
383
+ // A labeled non-clickable node is real on-screen text (heading, value);
384
+ // keep it unless it's a bare generic container with no useful role signal.
385
+ return !DECORATIVE_GENERIC_ROLES.has(node.role);
386
+ }
387
+ /**
388
+ * Return the smallest-area clickable node whose bounds CONTAIN `node`'s bounds
389
+ * (its center inside the rect), or null. "Smallest" picks the immediate row, not
390
+ * a giant scroll container wrapping everything.
391
+ */
392
+ function smallestContainingClickable(node, clickables) {
393
+ const cx = node.bounds.x + node.bounds.width / 2;
394
+ const cy = node.bounds.y + node.bounds.height / 2;
395
+ let best = null;
396
+ let bestArea = Infinity;
397
+ for (const c of clickables) {
398
+ if (c === node) {
399
+ best = c; // a clickable node hosts its own label
400
+ bestArea = c.bounds.width * c.bounds.height;
401
+ continue;
402
+ }
403
+ if (!containsPoint(c.bounds, cx, cy))
404
+ continue;
405
+ const area = c.bounds.width * c.bounds.height;
406
+ if (area < bestArea) {
407
+ best = c;
408
+ bestArea = area;
409
+ }
410
+ }
411
+ return best;
412
+ }
413
+ function containsPoint(b, x, y) {
414
+ return x >= b.x && x <= b.x + b.width && y >= b.y && y <= b.y + b.height;
415
+ }
416
+ /** Center of a node's bounds — the point the device taps. */
417
+ export function boundsCenter(b) {
418
+ return { x: Math.round(b.x + b.width / 2), y: Math.round(b.y + b.height / 2) };
419
+ }
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Thin async wrappers over `xcrun simctl` for the native-iOS sim path: simulator
3
+ * LIFECYCLE (boot detection, install, terminate, launch) and the SCREENSHOT.
4
+ *
5
+ * UI interaction + the accessibility tree live in `xcuitest.ts` (WebDriverAgent),
6
+ * NOT here — iOS no longer depends on idb.
7
+ *
8
+ * COORDINATE SPACES (the key difference from Android, where screencap and tap
9
+ * share one pixel space):
10
+ * - `simctl io booted screenshot` writes a PNG in PIXELS (e.g. 1179x2556 @3x).
11
+ * - WebDriverAgent's taps/swipes + a11y frames are POINTS (e.g. 393x852).
12
+ * The native sim TAPS in points (de-normalize 0-1000 against the POINT size) but
13
+ * RECORDS in PIXELS: dimensions() returns the pixel size so the loop's round-trip
14
+ * is exact. Recording in points would drift — the point grid (393) is coarser
15
+ * than the 0-1000 normalized grid, so it double-rounds. See IOSDevice.
16
+ */
17
+ export declare class IosError extends Error {
18
+ constructor(message: string);
19
+ }
20
+ /** Run `xcrun simctl <args>` and return trimmed stdout. */
21
+ export declare function simctl(args: string[], timeoutMs?: number): Promise<string>;
22
+ /**
23
+ * Assert exactly one simulator is Booted and return its udid. We pin every
24
+ * subsequent simctl/WDA call (and the screenshot) to "booted", so multiple
25
+ * booted simulators are ambiguous and rejected.
26
+ */
27
+ export declare function requireOneBootedSimulator(): Promise<string>;
28
+ /**
29
+ * Screen geometry: PIXEL size, POINT size, and the scale (`density`) between
30
+ * them. Produced by the XCUITest driver's `describeScreen` (xcuitest.ts) and
31
+ * consumed by IOSDevice — points drive WDA taps/swipes; pixels are the
32
+ * screenshot's resolution.
33
+ */
34
+ export interface IosScreen {
35
+ pixelWidth: number;
36
+ pixelHeight: number;
37
+ pointWidth: number;
38
+ pointHeight: number;
39
+ density: number;
40
+ }
41
+ /**
42
+ * Capture the booted simulator's screen as PNG bytes via
43
+ * `simctl io booted screenshot`. simctl writes to a file path (no reliable
44
+ * stdout in current Xcode), so we round-trip through a temp file.
45
+ */
46
+ export declare function screenshotPng(): Promise<Buffer>;
47
+ export declare function terminateApp(udid: string, bundleId: string): Promise<void>;
48
+ export declare function launchApp(udid: string, bundleId: string): Promise<void>;
49
+ export declare function installApp(udid: string, appPath: string): Promise<void>;
50
+ export declare function isAppInstalled(udid: string, bundleId: string): Promise<boolean>;
51
+ /**
52
+ * Read CFBundleIdentifier from a local `.app`'s Info.plist via `plutil`. Lets us
53
+ * terminate+launch a just-installed app without diffing the app list.
54
+ */
55
+ export declare function bundleIdFromApp(appPath: string): Promise<string | null>;
@@ -0,0 +1,144 @@
1
+ /**
2
+ * Thin async wrappers over `xcrun simctl` for the native-iOS sim path: simulator
3
+ * LIFECYCLE (boot detection, install, terminate, launch) and the SCREENSHOT.
4
+ *
5
+ * UI interaction + the accessibility tree live in `xcuitest.ts` (WebDriverAgent),
6
+ * NOT here — iOS no longer depends on idb.
7
+ *
8
+ * COORDINATE SPACES (the key difference from Android, where screencap and tap
9
+ * share one pixel space):
10
+ * - `simctl io booted screenshot` writes a PNG in PIXELS (e.g. 1179x2556 @3x).
11
+ * - WebDriverAgent's taps/swipes + a11y frames are POINTS (e.g. 393x852).
12
+ * The native sim TAPS in points (de-normalize 0-1000 against the POINT size) but
13
+ * RECORDS in PIXELS: dimensions() returns the pixel size so the loop's round-trip
14
+ * is exact. Recording in points would drift — the point grid (393) is coarser
15
+ * than the 0-1000 normalized grid, so it double-rounds. See IOSDevice.
16
+ */
17
+ import { execFile } from "node:child_process";
18
+ import { existsSync } from "node:fs";
19
+ import { mkdtemp, readFile, rm } from "node:fs/promises";
20
+ import { tmpdir } from "node:os";
21
+ import { join } from "node:path";
22
+ import { promisify } from "node:util";
23
+ const execFileAsync = promisify(execFile);
24
+ const XCRUN = "/usr/bin/xcrun";
25
+ const PLUTIL = "/usr/bin/plutil";
26
+ const DEFAULT_TIMEOUT_MS = 30_000;
27
+ const SCREENSHOT_TIMEOUT_MS = 30_000;
28
+ export class IosError extends Error {
29
+ constructor(message) {
30
+ super(message);
31
+ this.name = "IosError";
32
+ }
33
+ }
34
+ /** Run `xcrun simctl <args>` and return trimmed stdout. */
35
+ export async function simctl(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
36
+ try {
37
+ const { stdout } = await execFileAsync(XCRUN, ["simctl", ...args], {
38
+ timeout: timeoutMs,
39
+ maxBuffer: 4 * 1024 * 1024,
40
+ });
41
+ return stdout.trim();
42
+ }
43
+ catch (err) {
44
+ const msg = err instanceof Error ? err.message : String(err);
45
+ throw new IosError(`xcrun simctl ${args.join(" ")} failed: ${msg}`);
46
+ }
47
+ }
48
+ // --- Device state ---
49
+ /**
50
+ * Assert exactly one simulator is Booted and return its udid. We pin every
51
+ * subsequent simctl/WDA call (and the screenshot) to "booted", so multiple
52
+ * booted simulators are ambiguous and rejected.
53
+ */
54
+ export async function requireOneBootedSimulator() {
55
+ let out;
56
+ try {
57
+ out = await simctl(["list", "devices", "booted", "-j"]);
58
+ }
59
+ catch (err) {
60
+ const msg = err instanceof Error ? err.message : String(err);
61
+ throw new IosError(`Could not run xcrun simctl. Run \`ish check ios\` to check your setup. ${msg}`);
62
+ }
63
+ let booted = [];
64
+ try {
65
+ const parsed = JSON.parse(out);
66
+ booted = Object.values(parsed.devices)
67
+ .flat()
68
+ .filter((d) => d.state === "Booted")
69
+ .map((d) => ({ udid: d.udid, name: d.name }));
70
+ }
71
+ catch {
72
+ throw new IosError("Could not parse `simctl list devices booted -j` output.");
73
+ }
74
+ if (booted.length === 0) {
75
+ throw new IosError("No iOS simulator booted. Open Simulator.app, or run `ish check ios` to check your setup.");
76
+ }
77
+ if (booted.length > 1) {
78
+ throw new IosError(`Expected exactly one booted simulator, found ${booted.length} (${booted.map((d) => d.name).join(", ")}). ` +
79
+ "Shut down the extras (the sim drives a single device).");
80
+ }
81
+ return booted[0].udid;
82
+ }
83
+ // --- Screenshot (PIXELS) ---
84
+ /**
85
+ * Capture the booted simulator's screen as PNG bytes via
86
+ * `simctl io booted screenshot`. simctl writes to a file path (no reliable
87
+ * stdout in current Xcode), so we round-trip through a temp file.
88
+ */
89
+ export async function screenshotPng() {
90
+ const dir = await mkdtemp(join(tmpdir(), "ish-ios-shot-"));
91
+ const path = join(dir, "shot.png");
92
+ try {
93
+ await simctl(["io", "booted", "screenshot", path], SCREENSHOT_TIMEOUT_MS);
94
+ return await readFile(path);
95
+ }
96
+ finally {
97
+ await rm(dir, { recursive: true, force: true }).catch(() => { });
98
+ }
99
+ }
100
+ // --- App lifecycle (simctl) ---
101
+ export async function terminateApp(udid, bundleId) {
102
+ // Terminating an app that isn't running exits non-zero ("found nothing to
103
+ // terminate"); that's fine for a reset, so swallow it.
104
+ try {
105
+ await simctl(["terminate", udid, bundleId]);
106
+ }
107
+ catch {
108
+ // not running — nothing to stop
109
+ }
110
+ }
111
+ export async function launchApp(udid, bundleId) {
112
+ // simctl launch exits non-zero with a clear message if the bundle id isn't
113
+ // installed, so the wrapper's throw is already a loud failure.
114
+ await simctl(["launch", udid, bundleId]);
115
+ }
116
+ export async function installApp(udid, appPath) {
117
+ // Simulator builds aren't code-signed; `simctl install` just stages the .app.
118
+ await simctl(["install", udid, appPath], 180_000);
119
+ }
120
+ export async function isAppInstalled(udid, bundleId) {
121
+ // `simctl listapps` emits a plist of installed bundles; a substring check on
122
+ // the quoted bundle id is enough to confirm presence.
123
+ const out = await simctl(["listapps", udid], 60_000);
124
+ return out.includes(`"${bundleId}"`) || out.includes(`CFBundleIdentifier = "${bundleId}"`);
125
+ }
126
+ /**
127
+ * Read CFBundleIdentifier from a local `.app`'s Info.plist via `plutil`. Lets us
128
+ * terminate+launch a just-installed app without diffing the app list.
129
+ */
130
+ export async function bundleIdFromApp(appPath) {
131
+ const plist = join(appPath, "Info.plist");
132
+ if (!existsSync(plist))
133
+ return null;
134
+ try {
135
+ const { stdout } = await execFileAsync(PLUTIL, ["-extract", "CFBundleIdentifier", "raw", "-o", "-", plist], {
136
+ timeout: 10_000,
137
+ });
138
+ const id = stdout.trim();
139
+ return id || null;
140
+ }
141
+ catch {
142
+ return null;
143
+ }
144
+ }