@ishlabs/cli 0.24.1 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/ask.js +3 -3
- package/dist/commands/doctor.d.ts +26 -0
- package/dist/commands/doctor.js +334 -0
- package/dist/commands/iteration.js +1 -1
- package/dist/commands/study-analyze.js +1 -1
- package/dist/commands/study-run.js +80 -12
- package/dist/commands/study.js +11 -7
- package/dist/index.js +2 -0
- package/dist/lib/alias-store.js +1 -1
- package/dist/lib/api-client.d.ts +2 -0
- package/dist/lib/docs.js +57 -42
- package/dist/lib/local-sim/actions.d.ts +10 -2
- package/dist/lib/local-sim/actions.js +18 -11
- package/dist/lib/local-sim/adb.d.ts +113 -0
- package/dist/lib/local-sim/adb.js +366 -0
- package/dist/lib/local-sim/android.d.ts +111 -0
- package/dist/lib/local-sim/android.js +504 -0
- package/dist/lib/local-sim/apk-manifest.d.ts +22 -0
- package/dist/lib/local-sim/apk-manifest.js +210 -0
- package/dist/lib/local-sim/browser.d.ts +22 -0
- package/dist/lib/local-sim/browser.js +65 -0
- package/dist/lib/local-sim/coordinates.d.ts +69 -0
- package/dist/lib/local-sim/coordinates.js +59 -0
- package/dist/lib/local-sim/device.d.ts +143 -0
- package/dist/lib/local-sim/device.js +152 -0
- package/dist/lib/local-sim/ios.d.ts +185 -0
- package/dist/lib/local-sim/ios.js +599 -0
- package/dist/lib/local-sim/loop.d.ts +14 -2
- package/dist/lib/local-sim/loop.js +168 -73
- package/dist/lib/local-sim/native-a11y.d.ts +111 -0
- package/dist/lib/local-sim/native-a11y.js +419 -0
- package/dist/lib/local-sim/simctl.d.ts +55 -0
- package/dist/lib/local-sim/simctl.js +144 -0
- package/dist/lib/local-sim/types.d.ts +39 -2
- package/dist/lib/local-sim/upload.d.ts +1 -1
- package/dist/lib/local-sim/upload.js +9 -6
- package/dist/lib/local-sim/xcuitest.d.ts +60 -0
- package/dist/lib/local-sim/xcuitest.js +303 -0
- package/dist/lib/output.js +58 -12
- package/dist/lib/paths.d.ts +8 -0
- package/dist/lib/paths.js +12 -0
- package/dist/lib/skill-content.js +10 -9
- package/package.json +2 -1
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure parser/serializer for native (Android/iOS) accessibility trees — the
|
|
3
|
+
* native counterpart of the browser's DOM-locator tree. It turns a raw device
|
|
4
|
+
* a11y dump into the SAME `[id] role "name"` string the backend's DOMLocator
|
|
5
|
+
* reasons over, plus a local `shortId → bounds` map the device taps the CENTER
|
|
6
|
+
* of. No bounds ship to the backend; like the browser path, the CLI keeps the
|
|
7
|
+
* map and resolves the LLM's returned short id locally.
|
|
8
|
+
*
|
|
9
|
+
* FCIS: this module is pure (string in, structs out) — no `adb`/`idb` I/O — so
|
|
10
|
+
* it's unit-testable without a device, exactly like `coordinates.ts`. The I/O
|
|
11
|
+
* lives in `adb.ts`/`simctl.ts`; the parse/serialize math lives here.
|
|
12
|
+
*
|
|
13
|
+
* COORDINATE SPACE — carried, not converted, by this module:
|
|
14
|
+
* - Android `uiautomator dump` bounds are screencap PIXELS (`space: "px"`).
|
|
15
|
+
* - iOS WebDriverAgent /source frames are POINTS (`space: "points"`).
|
|
16
|
+
* The device de-normalizes/taps in its own space (AndroidDevice taps pixels;
|
|
17
|
+
* IOSDevice taps points), so the `space` tag tells the caller which dimension a
|
|
18
|
+
* node's bounds-center belongs to. This module never mixes the two.
|
|
19
|
+
*
|
|
20
|
+
* ANCESTOR-VS-LEAF (the hard part): on Android the visible label
|
|
21
|
+
* ("Network & internet") sits on a `clickable=false` TextView nested inside the
|
|
22
|
+
* clickable PARENT row. Tapping the leaf's center misses the row's hit logic and
|
|
23
|
+
* lands "slightly off"; the click target is the row. So the serializer walks to
|
|
24
|
+
* the nearest clickable ANCESTOR, aggregates its descendants' text/content-desc
|
|
25
|
+
* into ONE label, and emits the CLICKABLE node WITH THE ROW'S BOUNDS — never the
|
|
26
|
+
* leaf. iOS Buttons are already labeled + actionable, so they emit directly.
|
|
27
|
+
*/
|
|
28
|
+
/**
|
|
29
|
+
* Native role → ARIA synonym so the DOMLocator prompt sees the same vocabulary
|
|
30
|
+
* it does for the web (Option A from the Phase-0 spike). One small lookup so the
|
|
31
|
+
* map is trivial to tweak once the spike finalizes it; unknown roles pass
|
|
32
|
+
* through lower-cased (better a literal native role than a dropped node).
|
|
33
|
+
*/
|
|
34
|
+
const ROLE_NORMALIZATION = {
|
|
35
|
+
// Android (android.widget.* / android.view.*, matched on the leaf class name).
|
|
36
|
+
Button: "button",
|
|
37
|
+
ImageButton: "button",
|
|
38
|
+
EditText: "textbox",
|
|
39
|
+
AutoCompleteTextView: "textbox",
|
|
40
|
+
TextView: "text",
|
|
41
|
+
CheckBox: "checkbox",
|
|
42
|
+
RadioButton: "radio",
|
|
43
|
+
Switch: "switch",
|
|
44
|
+
ToggleButton: "switch",
|
|
45
|
+
ImageView: "image",
|
|
46
|
+
ViewGroup: "generic",
|
|
47
|
+
LinearLayout: "generic",
|
|
48
|
+
FrameLayout: "generic",
|
|
49
|
+
RelativeLayout: "generic",
|
|
50
|
+
ScrollView: "generic",
|
|
51
|
+
RecyclerView: "list",
|
|
52
|
+
ListView: "list",
|
|
53
|
+
// iOS (WDA / XCUITest `type`, AX-prefixed `role` handled by stripAxPrefix below).
|
|
54
|
+
StaticText: "text",
|
|
55
|
+
TextField: "textbox",
|
|
56
|
+
SecureTextField: "textbox",
|
|
57
|
+
SearchField: "searchbox",
|
|
58
|
+
Cell: "listitem",
|
|
59
|
+
Heading: "heading",
|
|
60
|
+
Image: "image",
|
|
61
|
+
Group: "generic",
|
|
62
|
+
Link: "link",
|
|
63
|
+
Application: "application",
|
|
64
|
+
};
|
|
65
|
+
/** Roles that are pure layout containers — only kept if they're a tappable row. */
|
|
66
|
+
const DECORATIVE_GENERIC_ROLES = new Set(["generic", "application"]);
|
|
67
|
+
function normalizeRole(rawRole) {
|
|
68
|
+
const key = stripAxPrefix(rawRole);
|
|
69
|
+
return ROLE_NORMALIZATION[key] ?? key.toLowerCase();
|
|
70
|
+
}
|
|
71
|
+
/** "AXButton" → "Button", "AXStaticText" → "StaticText"; non-AX passes through. */
|
|
72
|
+
function stripAxPrefix(role) {
|
|
73
|
+
return role.startsWith("AX") ? role.slice(2) : role;
|
|
74
|
+
}
|
|
75
|
+
const ANDROID_BOUNDS_RE = /^\[(-?\d+),(-?\d+)\]\[(-?\d+),(-?\d+)\]$/;
|
|
76
|
+
/** Parse `bounds="[x1,y1][x2,y2]"` → Bounds, or null if malformed/zero-area. */
|
|
77
|
+
function parseAndroidBounds(raw) {
|
|
78
|
+
const m = ANDROID_BOUNDS_RE.exec(raw.trim());
|
|
79
|
+
if (!m)
|
|
80
|
+
return null;
|
|
81
|
+
const x1 = Number(m[1]);
|
|
82
|
+
const y1 = Number(m[2]);
|
|
83
|
+
const x2 = Number(m[3]);
|
|
84
|
+
const y2 = Number(m[4]);
|
|
85
|
+
const width = x2 - x1;
|
|
86
|
+
const height = y2 - y1;
|
|
87
|
+
// A zero-area rect ([0,0][0,0], collapsed/off-screen rows) has no tappable
|
|
88
|
+
// center — treat it as malformed so it's dropped, not tapped at (0,0).
|
|
89
|
+
if (width <= 0 || height <= 0)
|
|
90
|
+
return null;
|
|
91
|
+
return { x: x1, y: y1, width, height };
|
|
92
|
+
}
|
|
93
|
+
/** Last segment of a dotted class, e.g. "android.widget.TextView" → "TextView". */
|
|
94
|
+
function androidLeafClass(cls) {
|
|
95
|
+
const dot = cls.lastIndexOf(".");
|
|
96
|
+
return dot >= 0 ? cls.slice(dot + 1) : cls;
|
|
97
|
+
}
|
|
98
|
+
function attr(tag, name) {
|
|
99
|
+
// uiautomator attributes are double-quoted with XML entity escapes. Match the
|
|
100
|
+
// literal attr and unescape the handful of entities uiautomator emits.
|
|
101
|
+
const m = new RegExp(`\\s${name}="([^"]*)"`).exec(tag);
|
|
102
|
+
return m ? unescapeXml(m[1]) : "";
|
|
103
|
+
}
|
|
104
|
+
function unescapeXml(s) {
|
|
105
|
+
return s
|
|
106
|
+
.replace(/&/g, "&")
|
|
107
|
+
.replace(/</g, "<")
|
|
108
|
+
.replace(/>/g, ">")
|
|
109
|
+
.replace(/"/g, '"')
|
|
110
|
+
.replace(/'/g, "'");
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Parse a uiautomator XML dump into a flat list of leaf-significant nodes in
|
|
114
|
+
* document order. The dump is a single line of nested `<node ...>` tags; we
|
|
115
|
+
* rebuild the parent/child nesting from the open/close-tag stream (mirroring the
|
|
116
|
+
* "break after `>`" split the oracle scripts use, but tracking depth so the
|
|
117
|
+
* ancestor-aggregation in `serializeNativeTree` has the real tree).
|
|
118
|
+
*
|
|
119
|
+
* Returns the FLATTENED set of nodes (depth-first, document order) with their
|
|
120
|
+
* raw fields; the serializer decides which to emit and how to aggregate.
|
|
121
|
+
*/
|
|
122
|
+
export function parseUiautomatorXml(xml) {
|
|
123
|
+
const root = buildAndroidTree(xml);
|
|
124
|
+
const out = [];
|
|
125
|
+
const visit = (n) => {
|
|
126
|
+
// Drop nodes with no usable bounds (malformed/zero-area) — they have no
|
|
127
|
+
// tappable center and would corrupt the nodeMap.
|
|
128
|
+
if (n.bounds) {
|
|
129
|
+
const label = n.text || n.contentDesc;
|
|
130
|
+
out.push({
|
|
131
|
+
role: normalizeRole(androidLeafClass(n.role)),
|
|
132
|
+
label,
|
|
133
|
+
bounds: n.bounds,
|
|
134
|
+
clickable: n.clickable,
|
|
135
|
+
hasOwnLabel: label.length > 0,
|
|
136
|
+
resourceId: n.resourceId || undefined,
|
|
137
|
+
space: "px",
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
for (const c of n.children)
|
|
141
|
+
visit(c);
|
|
142
|
+
};
|
|
143
|
+
for (const c of root.children)
|
|
144
|
+
visit(c);
|
|
145
|
+
return out;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Rebuild the uiautomator node tree from the flat tag stream. uiautomator emits
|
|
149
|
+
* `<node ...>...</node>` (container) and `<node .../>` (self-closing leaf) on one
|
|
150
|
+
* line; we tokenize the tags and use an explicit stack so each node's children
|
|
151
|
+
* are its true descendants — required for ancestor-vs-leaf aggregation.
|
|
152
|
+
*/
|
|
153
|
+
function buildAndroidTree(xml) {
|
|
154
|
+
const root = makeRawAndroidNode("", "", "", "", false, null);
|
|
155
|
+
const stack = [root];
|
|
156
|
+
// Match every <node ...> / <node .../> open tag and standalone </node> close.
|
|
157
|
+
// Attribute values are consumed as atomic quoted runs (`"[^"]*"`) so a literal
|
|
158
|
+
// `>` INSIDE a value can't terminate the tag early — uiautomator escapes `&`,
|
|
159
|
+
// `<`, `"` but NOT `>`, so a label like text="Home > Settings" (breadcrumbs)
|
|
160
|
+
// would otherwise truncate the tag and drop the whole node.
|
|
161
|
+
const tagRe = /<node\b(?:[^>"]|"[^"]*")*>|<\/node>/g;
|
|
162
|
+
let m;
|
|
163
|
+
while ((m = tagRe.exec(xml)) !== null) {
|
|
164
|
+
const tag = m[0];
|
|
165
|
+
if (tag === "</node>") {
|
|
166
|
+
if (stack.length > 1)
|
|
167
|
+
stack.pop();
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
// Self-closing is read off the matched tag (`.../>`), not a capture group:
|
|
171
|
+
// the greedy run above swallows the trailing slash, so a `(\/?)` capture
|
|
172
|
+
// can't see it.
|
|
173
|
+
const selfClosing = tag.endsWith("/>");
|
|
174
|
+
const node = makeRawAndroidNode(attr(tag, "class"), attr(tag, "text"), attr(tag, "content-desc"), attr(tag, "resource-id"), attr(tag, "clickable") === "true", parseAndroidBounds(attr(tag, "bounds")));
|
|
175
|
+
stack[stack.length - 1].children.push(node);
|
|
176
|
+
if (!selfClosing)
|
|
177
|
+
stack.push(node);
|
|
178
|
+
}
|
|
179
|
+
return root;
|
|
180
|
+
}
|
|
181
|
+
function makeRawAndroidNode(role, text, contentDesc, resourceId, clickable, bounds) {
|
|
182
|
+
return { role, text, contentDesc, resourceId, clickable, bounds, children: [] };
|
|
183
|
+
}
|
|
184
|
+
// ---------------------------------------------------------------------------
|
|
185
|
+
// iOS — shared helpers for the WebDriverAgent (XCUITest) /source parser below
|
|
186
|
+
// ---------------------------------------------------------------------------
|
|
187
|
+
/** iOS roles/types that are directly actionable (the device taps their center). */
|
|
188
|
+
const IOS_ACTIONABLE_TYPES = new Set([
|
|
189
|
+
"Button",
|
|
190
|
+
"Link",
|
|
191
|
+
"TextField",
|
|
192
|
+
"SecureTextField",
|
|
193
|
+
"SearchField",
|
|
194
|
+
"Cell",
|
|
195
|
+
"Switch",
|
|
196
|
+
"Checkbox",
|
|
197
|
+
"RadioButton",
|
|
198
|
+
"MenuItem",
|
|
199
|
+
"Tab",
|
|
200
|
+
]);
|
|
201
|
+
function frameToBounds(frame) {
|
|
202
|
+
if (!frame)
|
|
203
|
+
return null;
|
|
204
|
+
const { x, y, width, height } = frame;
|
|
205
|
+
if (typeof x !== "number" ||
|
|
206
|
+
typeof y !== "number" ||
|
|
207
|
+
typeof width !== "number" ||
|
|
208
|
+
typeof height !== "number" ||
|
|
209
|
+
!Number.isFinite(x) ||
|
|
210
|
+
!Number.isFinite(y) ||
|
|
211
|
+
width <= 0 ||
|
|
212
|
+
height <= 0) {
|
|
213
|
+
return null;
|
|
214
|
+
}
|
|
215
|
+
return { x, y, width, height };
|
|
216
|
+
}
|
|
217
|
+
/** WDA's "1"/"0" (or real boolean) → boolean. */
|
|
218
|
+
function wdaTruthy(v) {
|
|
219
|
+
return v === true || v === "1";
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Parse WDA's `GET /source?format=json` — a NESTED accessibility tree — into the
|
|
223
|
+
* FLAT, depth-first `NativeNode[]` (POINTS) that `parseXcuiHierarchy` produces,
|
|
224
|
+
* so `serializeNativeTree` consumes it unchanged. WDA's `type` matches idb's iOS
|
|
225
|
+
* types (Button/StaticText/SearchField/Cell/Image/Application…), so
|
|
226
|
+
* `normalizeRole`/`IOS_ACTIONABLE_TYPES`/`frameToBounds` all apply as-is.
|
|
227
|
+
*
|
|
228
|
+
* KEY: WDA's `/source` is the FULL XCUIElement tree — every container and leaf —
|
|
229
|
+
* NOT idb's clean accessibility-elements list. iOS settings rows surface as an
|
|
230
|
+
* accessible `Button` ("General", isAccessible=1) that ALSO contains a duplicate
|
|
231
|
+
* inner `StaticText` ("General", isAccessible=0) and is wrapped in a `Cell`
|
|
232
|
+
* (isAccessible=0). Emitting all three yields "General General" + empty
|
|
233
|
+
* listitems. So we emit ONLY `isAccessible && isVisible` nodes — exactly the
|
|
234
|
+
* VoiceOver-exposed set idb returned: the labeled Button is both the label and
|
|
235
|
+
* the tap target; the duplicate StaticText and the wrapping Cell are pruned. A
|
|
236
|
+
* sparse a11y tree degrades to the loop's vision fallback, so strict filtering
|
|
237
|
+
* never strands the run.
|
|
238
|
+
*
|
|
239
|
+
* Accepts either the raw tree or the W3C `{ value: <tree> }` envelope WDA returns.
|
|
240
|
+
*/
|
|
241
|
+
export function parseXcuiHierarchy(json) {
|
|
242
|
+
let parsed;
|
|
243
|
+
try {
|
|
244
|
+
parsed = JSON.parse(json);
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
return [];
|
|
248
|
+
}
|
|
249
|
+
// WDA returns the tree under a W3C `{ value: <tree>, sessionId }` envelope, but
|
|
250
|
+
// a raw tree NODE also has its own `value` field (the element's value) — so we
|
|
251
|
+
// can't unwrap on `"value" in parsed` alone. The actual tree root is the one
|
|
252
|
+
// carrying a node-shaped `type`; only unwrap `value` when the top level is NOT
|
|
253
|
+
// itself a node.
|
|
254
|
+
const obj = parsed;
|
|
255
|
+
const root = obj && typeof obj === "object" && !("type" in obj) && "value" in obj
|
|
256
|
+
? obj.value
|
|
257
|
+
: obj;
|
|
258
|
+
if (!root || typeof root !== "object")
|
|
259
|
+
return [];
|
|
260
|
+
const out = [];
|
|
261
|
+
const visit = (n) => {
|
|
262
|
+
const bounds = frameToBounds(n.rect ?? undefined);
|
|
263
|
+
if (bounds && wdaTruthy(n.isAccessible) && wdaTruthy(n.isVisible)) {
|
|
264
|
+
// Prefer the spoken label; fall back to a STRING value (search fields
|
|
265
|
+
// expose their placeholder as `value`). Non-string values (a Switch's 1/0)
|
|
266
|
+
// are ignored for the label, exactly like the idb path.
|
|
267
|
+
const label = (n.label ?? (typeof n.value === "string" ? n.value : "")).trim();
|
|
268
|
+
const rawType = n.type ?? "";
|
|
269
|
+
const typeKey = stripAxPrefix(rawType);
|
|
270
|
+
// `isEnabled` absent ⇒ assume enabled (WDA omits it on always-enabled types).
|
|
271
|
+
const enabled = n.isEnabled == null ? true : wdaTruthy(n.isEnabled);
|
|
272
|
+
const actionable = IOS_ACTIONABLE_TYPES.has(typeKey) && enabled;
|
|
273
|
+
out.push({
|
|
274
|
+
role: normalizeRole(rawType),
|
|
275
|
+
label,
|
|
276
|
+
bounds,
|
|
277
|
+
clickable: actionable,
|
|
278
|
+
hasOwnLabel: label.length > 0,
|
|
279
|
+
resourceId: (n.name || n.rawIdentifier) ?? undefined,
|
|
280
|
+
space: "points",
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
// Recurse into ALL children — an accessible element can nest inside a
|
|
284
|
+
// non-accessible container (the Cell wrapping the Button), so we must not
|
|
285
|
+
// prune the walk by accessibility, only the emission.
|
|
286
|
+
for (const c of n.children ?? [])
|
|
287
|
+
visit(c);
|
|
288
|
+
};
|
|
289
|
+
visit(root);
|
|
290
|
+
return out;
|
|
291
|
+
}
|
|
292
|
+
// ---------------------------------------------------------------------------
|
|
293
|
+
// Serialization — flat NativeNode list → `[id] role "label"` + nodeMap
|
|
294
|
+
// ---------------------------------------------------------------------------
|
|
295
|
+
const NODE_LABEL_MAX_LENGTH = 100;
|
|
296
|
+
/** Stable short-id prefix per space so a mixed log is unambiguous. */
|
|
297
|
+
function shortIdPrefix(space) {
|
|
298
|
+
return space === "px" ? "A" : "I";
|
|
299
|
+
}
|
|
300
|
+
function truncate(text, max) {
|
|
301
|
+
return text.length <= max ? text : text.slice(0, max - 1) + "…";
|
|
302
|
+
}
|
|
303
|
+
/** Collapse runs of whitespace so aggregated multi-line labels read on one line. */
|
|
304
|
+
function normalizeLabel(label) {
|
|
305
|
+
return label.replace(/\s+/g, " ").trim();
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Serialize a flat NativeNode list (from `parseUiautomatorXml` /
|
|
309
|
+
* `parseXcuiHierarchy`) into the `[id] role "label"` string the DOMLocator
|
|
310
|
+
* reasons over, plus a `shortId → bounds` map for local tap resolution.
|
|
311
|
+
*
|
|
312
|
+
* Emission rules (kept tight, like the DOM serializer):
|
|
313
|
+
* - ANCESTOR-VS-LEAF: a CLICKABLE node absorbs its descendants' labels and is
|
|
314
|
+
* emitted with ITS OWN bounds (the tappable row). The descendant
|
|
315
|
+
* label-bearing leaves are then NOT emitted on their own — their text lives
|
|
316
|
+
* on the row. A label-bearing leaf with NO clickable ancestor (e.g. a
|
|
317
|
+
* standalone heading) is emitted directly so on-screen text isn't lost.
|
|
318
|
+
* - Skip pure decoration: a node that is neither clickable nor label-bearing,
|
|
319
|
+
* and a generic/application container that didn't aggregate a label.
|
|
320
|
+
*
|
|
321
|
+
* The input list is depth-first / document order, which is the order the raw
|
|
322
|
+
* parsers produce; we recover ancestry from that order using bounds containment
|
|
323
|
+
* (Android leaves nest inside their clickable row's rect; iOS is already flat).
|
|
324
|
+
*/
|
|
325
|
+
export function serializeNativeTree(nodes) {
|
|
326
|
+
const space = nodes[0]?.space ?? "px";
|
|
327
|
+
const prefix = shortIdPrefix(space);
|
|
328
|
+
// 1) Aggregate descendant labels onto their nearest clickable ancestor. A
|
|
329
|
+
// descendant is a label-bearing node whose bounds sit inside a clickable
|
|
330
|
+
// node's bounds (and is not itself the clickable node). Document order
|
|
331
|
+
// guarantees an ancestor appears before its descendants on Android.
|
|
332
|
+
const clickables = nodes.filter((n) => n.clickable);
|
|
333
|
+
const aggregatedLabel = new Map();
|
|
334
|
+
const consumedAsDescendant = new Set();
|
|
335
|
+
for (const node of nodes) {
|
|
336
|
+
if (!node.hasOwnLabel)
|
|
337
|
+
continue;
|
|
338
|
+
// Find the smallest clickable rect that contains this label node. Skip the
|
|
339
|
+
// node itself (a clickable node keeps its own label).
|
|
340
|
+
const host = smallestContainingClickable(node, clickables);
|
|
341
|
+
if (host && host !== node) {
|
|
342
|
+
const bucket = aggregatedLabel.get(host) ?? [];
|
|
343
|
+
bucket.push(node.label);
|
|
344
|
+
aggregatedLabel.set(host, bucket);
|
|
345
|
+
consumedAsDescendant.add(node);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
// 2) Emit. Walk in document order so ids follow the on-screen reading order.
|
|
349
|
+
const lines = [];
|
|
350
|
+
const nodeMap = new Map();
|
|
351
|
+
let counter = 0;
|
|
352
|
+
for (const node of nodes) {
|
|
353
|
+
if (consumedAsDescendant.has(node))
|
|
354
|
+
continue; // its label moved to the row
|
|
355
|
+
const aggregated = aggregatedLabel.get(node);
|
|
356
|
+
const ownLabel = node.label;
|
|
357
|
+
// A clickable node's label = its own label + any absorbed descendant labels;
|
|
358
|
+
// its own label leads (iOS Buttons carry it directly).
|
|
359
|
+
const combined = normalizeLabel([ownLabel, ...(aggregated ?? [])].filter(Boolean).join(" "));
|
|
360
|
+
const emit = shouldEmit(node, combined);
|
|
361
|
+
if (!emit)
|
|
362
|
+
continue;
|
|
363
|
+
counter += 1;
|
|
364
|
+
const shortId = `${prefix}${counter}`;
|
|
365
|
+
const label = truncate(combined, NODE_LABEL_MAX_LENGTH);
|
|
366
|
+
const labelPart = label ? ` "${label}"` : "";
|
|
367
|
+
lines.push(`[${shortId}] ${node.role}${labelPart}`);
|
|
368
|
+
nodeMap.set(shortId, node.bounds);
|
|
369
|
+
}
|
|
370
|
+
return { simplified: lines.join("\n"), nodeMap };
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* Decide whether a node makes the simplified tree. Keep it if it's actionable,
|
|
374
|
+
* or it carries (aggregated) text and isn't a bare layout container. Drop pure
|
|
375
|
+
* decoration: unlabeled non-clickable nodes, and generic/application containers
|
|
376
|
+
* that absorbed no label.
|
|
377
|
+
*/
|
|
378
|
+
function shouldEmit(node, combinedLabel) {
|
|
379
|
+
if (node.clickable)
|
|
380
|
+
return true;
|
|
381
|
+
if (!combinedLabel)
|
|
382
|
+
return false;
|
|
383
|
+
// A labeled non-clickable node is real on-screen text (heading, value);
|
|
384
|
+
// keep it unless it's a bare generic container with no useful role signal.
|
|
385
|
+
return !DECORATIVE_GENERIC_ROLES.has(node.role);
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Return the smallest-area clickable node whose bounds CONTAIN `node`'s bounds
|
|
389
|
+
* (its center inside the rect), or null. "Smallest" picks the immediate row, not
|
|
390
|
+
* a giant scroll container wrapping everything.
|
|
391
|
+
*/
|
|
392
|
+
function smallestContainingClickable(node, clickables) {
|
|
393
|
+
const cx = node.bounds.x + node.bounds.width / 2;
|
|
394
|
+
const cy = node.bounds.y + node.bounds.height / 2;
|
|
395
|
+
let best = null;
|
|
396
|
+
let bestArea = Infinity;
|
|
397
|
+
for (const c of clickables) {
|
|
398
|
+
if (c === node) {
|
|
399
|
+
best = c; // a clickable node hosts its own label
|
|
400
|
+
bestArea = c.bounds.width * c.bounds.height;
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
if (!containsPoint(c.bounds, cx, cy))
|
|
404
|
+
continue;
|
|
405
|
+
const area = c.bounds.width * c.bounds.height;
|
|
406
|
+
if (area < bestArea) {
|
|
407
|
+
best = c;
|
|
408
|
+
bestArea = area;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
return best;
|
|
412
|
+
}
|
|
413
|
+
function containsPoint(b, x, y) {
|
|
414
|
+
return x >= b.x && x <= b.x + b.width && y >= b.y && y <= b.y + b.height;
|
|
415
|
+
}
|
|
416
|
+
/** Center of a node's bounds — the point the device taps. */
|
|
417
|
+
export function boundsCenter(b) {
|
|
418
|
+
return { x: Math.round(b.x + b.width / 2), y: Math.round(b.y + b.height / 2) };
|
|
419
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thin async wrappers over `xcrun simctl` for the native-iOS sim path: simulator
|
|
3
|
+
* LIFECYCLE (boot detection, install, terminate, launch) and the SCREENSHOT.
|
|
4
|
+
*
|
|
5
|
+
* UI interaction + the accessibility tree live in `xcuitest.ts` (WebDriverAgent),
|
|
6
|
+
* NOT here — iOS no longer depends on idb.
|
|
7
|
+
*
|
|
8
|
+
* COORDINATE SPACES (the key difference from Android, where screencap and tap
|
|
9
|
+
* share one pixel space):
|
|
10
|
+
* - `simctl io booted screenshot` writes a PNG in PIXELS (e.g. 1179x2556 @3x).
|
|
11
|
+
* - WebDriverAgent's taps/swipes + a11y frames are POINTS (e.g. 393x852).
|
|
12
|
+
* The native sim TAPS in points (de-normalize 0-1000 against the POINT size) but
|
|
13
|
+
* RECORDS in PIXELS: dimensions() returns the pixel size so the loop's round-trip
|
|
14
|
+
* is exact. Recording in points would drift — the point grid (393) is coarser
|
|
15
|
+
* than the 0-1000 normalized grid, so it double-rounds. See IOSDevice.
|
|
16
|
+
*/
|
|
17
|
+
export declare class IosError extends Error {
|
|
18
|
+
constructor(message: string);
|
|
19
|
+
}
|
|
20
|
+
/** Run `xcrun simctl <args>` and return trimmed stdout. */
|
|
21
|
+
export declare function simctl(args: string[], timeoutMs?: number): Promise<string>;
|
|
22
|
+
/**
|
|
23
|
+
* Assert exactly one simulator is Booted and return its udid. We pin every
|
|
24
|
+
* subsequent simctl/WDA call (and the screenshot) to "booted", so multiple
|
|
25
|
+
* booted simulators are ambiguous and rejected.
|
|
26
|
+
*/
|
|
27
|
+
export declare function requireOneBootedSimulator(): Promise<string>;
|
|
28
|
+
/**
|
|
29
|
+
* Screen geometry: PIXEL size, POINT size, and the scale (`density`) between
|
|
30
|
+
* them. Produced by the XCUITest driver's `describeScreen` (xcuitest.ts) and
|
|
31
|
+
* consumed by IOSDevice — points drive WDA taps/swipes; pixels are the
|
|
32
|
+
* screenshot's resolution.
|
|
33
|
+
*/
|
|
34
|
+
export interface IosScreen {
|
|
35
|
+
pixelWidth: number;
|
|
36
|
+
pixelHeight: number;
|
|
37
|
+
pointWidth: number;
|
|
38
|
+
pointHeight: number;
|
|
39
|
+
density: number;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Capture the booted simulator's screen as PNG bytes via
|
|
43
|
+
* `simctl io booted screenshot`. simctl writes to a file path (no reliable
|
|
44
|
+
* stdout in current Xcode), so we round-trip through a temp file.
|
|
45
|
+
*/
|
|
46
|
+
export declare function screenshotPng(): Promise<Buffer>;
|
|
47
|
+
export declare function terminateApp(udid: string, bundleId: string): Promise<void>;
|
|
48
|
+
export declare function launchApp(udid: string, bundleId: string): Promise<void>;
|
|
49
|
+
export declare function installApp(udid: string, appPath: string): Promise<void>;
|
|
50
|
+
export declare function isAppInstalled(udid: string, bundleId: string): Promise<boolean>;
|
|
51
|
+
/**
|
|
52
|
+
* Read CFBundleIdentifier from a local `.app`'s Info.plist via `plutil`. Lets us
|
|
53
|
+
* terminate+launch a just-installed app without diffing the app list.
|
|
54
|
+
*/
|
|
55
|
+
export declare function bundleIdFromApp(appPath: string): Promise<string | null>;
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thin async wrappers over `xcrun simctl` for the native-iOS sim path: simulator
|
|
3
|
+
* LIFECYCLE (boot detection, install, terminate, launch) and the SCREENSHOT.
|
|
4
|
+
*
|
|
5
|
+
* UI interaction + the accessibility tree live in `xcuitest.ts` (WebDriverAgent),
|
|
6
|
+
* NOT here — iOS no longer depends on idb.
|
|
7
|
+
*
|
|
8
|
+
* COORDINATE SPACES (the key difference from Android, where screencap and tap
|
|
9
|
+
* share one pixel space):
|
|
10
|
+
* - `simctl io booted screenshot` writes a PNG in PIXELS (e.g. 1179x2556 @3x).
|
|
11
|
+
* - WebDriverAgent's taps/swipes + a11y frames are POINTS (e.g. 393x852).
|
|
12
|
+
* The native sim TAPS in points (de-normalize 0-1000 against the POINT size) but
|
|
13
|
+
* RECORDS in PIXELS: dimensions() returns the pixel size so the loop's round-trip
|
|
14
|
+
* is exact. Recording in points would drift — the point grid (393) is coarser
|
|
15
|
+
* than the 0-1000 normalized grid, so it double-rounds. See IOSDevice.
|
|
16
|
+
*/
|
|
17
|
+
import { execFile } from "node:child_process";
|
|
18
|
+
import { existsSync } from "node:fs";
|
|
19
|
+
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
|
20
|
+
import { tmpdir } from "node:os";
|
|
21
|
+
import { join } from "node:path";
|
|
22
|
+
import { promisify } from "node:util";
|
|
23
|
+
const execFileAsync = promisify(execFile);
|
|
24
|
+
const XCRUN = "/usr/bin/xcrun";
|
|
25
|
+
const PLUTIL = "/usr/bin/plutil";
|
|
26
|
+
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
27
|
+
const SCREENSHOT_TIMEOUT_MS = 30_000;
|
|
28
|
+
export class IosError extends Error {
|
|
29
|
+
constructor(message) {
|
|
30
|
+
super(message);
|
|
31
|
+
this.name = "IosError";
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
/** Run `xcrun simctl <args>` and return trimmed stdout. */
|
|
35
|
+
export async function simctl(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
36
|
+
try {
|
|
37
|
+
const { stdout } = await execFileAsync(XCRUN, ["simctl", ...args], {
|
|
38
|
+
timeout: timeoutMs,
|
|
39
|
+
maxBuffer: 4 * 1024 * 1024,
|
|
40
|
+
});
|
|
41
|
+
return stdout.trim();
|
|
42
|
+
}
|
|
43
|
+
catch (err) {
|
|
44
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
45
|
+
throw new IosError(`xcrun simctl ${args.join(" ")} failed: ${msg}`);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
// --- Device state ---
|
|
49
|
+
/**
|
|
50
|
+
* Assert exactly one simulator is Booted and return its udid. We pin every
|
|
51
|
+
* subsequent simctl/WDA call (and the screenshot) to "booted", so multiple
|
|
52
|
+
* booted simulators are ambiguous and rejected.
|
|
53
|
+
*/
|
|
54
|
+
export async function requireOneBootedSimulator() {
|
|
55
|
+
let out;
|
|
56
|
+
try {
|
|
57
|
+
out = await simctl(["list", "devices", "booted", "-j"]);
|
|
58
|
+
}
|
|
59
|
+
catch (err) {
|
|
60
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
61
|
+
throw new IosError(`Could not run xcrun simctl. Run \`ish check ios\` to check your setup. ${msg}`);
|
|
62
|
+
}
|
|
63
|
+
let booted = [];
|
|
64
|
+
try {
|
|
65
|
+
const parsed = JSON.parse(out);
|
|
66
|
+
booted = Object.values(parsed.devices)
|
|
67
|
+
.flat()
|
|
68
|
+
.filter((d) => d.state === "Booted")
|
|
69
|
+
.map((d) => ({ udid: d.udid, name: d.name }));
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
throw new IosError("Could not parse `simctl list devices booted -j` output.");
|
|
73
|
+
}
|
|
74
|
+
if (booted.length === 0) {
|
|
75
|
+
throw new IosError("No iOS simulator booted. Open Simulator.app, or run `ish check ios` to check your setup.");
|
|
76
|
+
}
|
|
77
|
+
if (booted.length > 1) {
|
|
78
|
+
throw new IosError(`Expected exactly one booted simulator, found ${booted.length} (${booted.map((d) => d.name).join(", ")}). ` +
|
|
79
|
+
"Shut down the extras (the sim drives a single device).");
|
|
80
|
+
}
|
|
81
|
+
return booted[0].udid;
|
|
82
|
+
}
|
|
83
|
+
// --- Screenshot (PIXELS) ---
|
|
84
|
+
/**
|
|
85
|
+
* Capture the booted simulator's screen as PNG bytes via
|
|
86
|
+
* `simctl io booted screenshot`. simctl writes to a file path (no reliable
|
|
87
|
+
* stdout in current Xcode), so we round-trip through a temp file.
|
|
88
|
+
*/
|
|
89
|
+
export async function screenshotPng() {
|
|
90
|
+
const dir = await mkdtemp(join(tmpdir(), "ish-ios-shot-"));
|
|
91
|
+
const path = join(dir, "shot.png");
|
|
92
|
+
try {
|
|
93
|
+
await simctl(["io", "booted", "screenshot", path], SCREENSHOT_TIMEOUT_MS);
|
|
94
|
+
return await readFile(path);
|
|
95
|
+
}
|
|
96
|
+
finally {
|
|
97
|
+
await rm(dir, { recursive: true, force: true }).catch(() => { });
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// --- App lifecycle (simctl) ---
|
|
101
|
+
export async function terminateApp(udid, bundleId) {
|
|
102
|
+
// Terminating an app that isn't running exits non-zero ("found nothing to
|
|
103
|
+
// terminate"); that's fine for a reset, so swallow it.
|
|
104
|
+
try {
|
|
105
|
+
await simctl(["terminate", udid, bundleId]);
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
// not running — nothing to stop
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
export async function launchApp(udid, bundleId) {
|
|
112
|
+
// simctl launch exits non-zero with a clear message if the bundle id isn't
|
|
113
|
+
// installed, so the wrapper's throw is already a loud failure.
|
|
114
|
+
await simctl(["launch", udid, bundleId]);
|
|
115
|
+
}
|
|
116
|
+
export async function installApp(udid, appPath) {
|
|
117
|
+
// Simulator builds aren't code-signed; `simctl install` just stages the .app.
|
|
118
|
+
await simctl(["install", udid, appPath], 180_000);
|
|
119
|
+
}
|
|
120
|
+
export async function isAppInstalled(udid, bundleId) {
|
|
121
|
+
// `simctl listapps` emits a plist of installed bundles; a substring check on
|
|
122
|
+
// the quoted bundle id is enough to confirm presence.
|
|
123
|
+
const out = await simctl(["listapps", udid], 60_000);
|
|
124
|
+
return out.includes(`"${bundleId}"`) || out.includes(`CFBundleIdentifier = "${bundleId}"`);
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Read CFBundleIdentifier from a local `.app`'s Info.plist via `plutil`. Lets us
|
|
128
|
+
* terminate+launch a just-installed app without diffing the app list.
|
|
129
|
+
*/
|
|
130
|
+
export async function bundleIdFromApp(appPath) {
|
|
131
|
+
const plist = join(appPath, "Info.plist");
|
|
132
|
+
if (!existsSync(plist))
|
|
133
|
+
return null;
|
|
134
|
+
try {
|
|
135
|
+
const { stdout } = await execFileAsync(PLUTIL, ["-extract", "CFBundleIdentifier", "raw", "-o", "-", plist], {
|
|
136
|
+
timeout: 10_000,
|
|
137
|
+
});
|
|
138
|
+
const id = stdout.trim();
|
|
139
|
+
return id || null;
|
|
140
|
+
}
|
|
141
|
+
catch {
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
}
|