@ishlabs/cli 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/ask.js +3 -3
- package/dist/commands/iteration.js +1 -1
- package/dist/commands/study-analyze.js +1 -1
- package/dist/commands/study-run.js +83 -15
- package/dist/commands/study.js +11 -7
- package/dist/lib/alias-store.js +1 -1
- package/dist/lib/api-client.d.ts +2 -0
- package/dist/lib/billing.d.ts +30 -16
- package/dist/lib/billing.js +77 -27
- package/dist/lib/docs.js +57 -42
- package/dist/lib/local-sim/actions.d.ts +10 -2
- package/dist/lib/local-sim/actions.js +16 -11
- package/dist/lib/local-sim/adb.d.ts +103 -0
- package/dist/lib/local-sim/adb.js +352 -0
- package/dist/lib/local-sim/android.d.ts +111 -0
- package/dist/lib/local-sim/android.js +499 -0
- package/dist/lib/local-sim/apk-manifest.d.ts +22 -0
- package/dist/lib/local-sim/apk-manifest.js +210 -0
- package/dist/lib/local-sim/browser.d.ts +22 -0
- package/dist/lib/local-sim/browser.js +65 -0
- package/dist/lib/local-sim/coordinates.d.ts +69 -0
- package/dist/lib/local-sim/coordinates.js +59 -0
- package/dist/lib/local-sim/device.d.ts +143 -0
- package/dist/lib/local-sim/device.js +152 -0
- package/dist/lib/local-sim/ios.d.ts +168 -0
- package/dist/lib/local-sim/ios.js +546 -0
- package/dist/lib/local-sim/loop.d.ts +14 -2
- package/dist/lib/local-sim/loop.js +166 -73
- package/dist/lib/local-sim/native-a11y.d.ts +97 -0
- package/dist/lib/local-sim/native-a11y.js +384 -0
- package/dist/lib/local-sim/simctl.d.ts +85 -0
- package/dist/lib/local-sim/simctl.js +273 -0
- package/dist/lib/local-sim/types.d.ts +37 -2
- package/dist/lib/local-sim/upload.d.ts +1 -1
- package/dist/lib/local-sim/upload.js +9 -6
- package/dist/lib/modality.d.ts +10 -1
- package/dist/lib/modality.js +21 -0
- package/dist/lib/output.js +58 -12
- package/dist/lib/skill-content.js +10 -9
- package/package.json +2 -1
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure parser/serializer for native (Android/iOS) accessibility trees — the
|
|
3
|
+
* native counterpart of the browser's DOM-locator tree. It turns a raw device
|
|
4
|
+
* a11y dump into the SAME `[id] role "name"` string the backend's DOMLocator
|
|
5
|
+
* reasons over, plus a local `shortId → bounds` map the device taps the CENTER
|
|
6
|
+
* of. No bounds ship to the backend; like the browser path, the CLI keeps the
|
|
7
|
+
* map and resolves the LLM's returned short id locally.
|
|
8
|
+
*
|
|
9
|
+
* FCIS: this module is pure (string in, structs out) — no `adb`/`idb` I/O — so
|
|
10
|
+
* it's unit-testable without a device, exactly like `coordinates.ts`. The I/O
|
|
11
|
+
* lives in `adb.ts`/`simctl.ts`; the parse/serialize math lives here.
|
|
12
|
+
*
|
|
13
|
+
* COORDINATE SPACE — carried, not converted, by this module:
|
|
14
|
+
* - Android `uiautomator dump` bounds are screencap PIXELS (`space: "px"`).
|
|
15
|
+
* - iOS `idb ui describe-all` frames are POINTS (`space: "points"`).
|
|
16
|
+
* The device de-normalizes/taps in its own space (AndroidDevice taps pixels;
|
|
17
|
+
* IOSDevice taps points), so the `space` tag tells the caller which dimension a
|
|
18
|
+
* node's bounds-center belongs to. This module never mixes the two.
|
|
19
|
+
*
|
|
20
|
+
* ANCESTOR-VS-LEAF (the hard part): on Android the visible label
|
|
21
|
+
* ("Network & internet") sits on a `clickable=false` TextView nested inside the
|
|
22
|
+
* clickable PARENT row. Tapping the leaf's center misses the row's hit logic and
|
|
23
|
+
* lands "slightly off"; the click target is the row. So the serializer walks to
|
|
24
|
+
* the nearest clickable ANCESTOR, aggregates its descendants' text/content-desc
|
|
25
|
+
* into ONE label, and emits the CLICKABLE node WITH THE ROW'S BOUNDS — never the
|
|
26
|
+
* leaf. iOS Buttons are already labeled + actionable, so they emit directly.
|
|
27
|
+
*/
|
|
28
|
+
/**
|
|
29
|
+
* Native role → ARIA synonym so the DOMLocator prompt sees the same vocabulary
|
|
30
|
+
* it does for the web (Option A from the Phase-0 spike). One small lookup so the
|
|
31
|
+
* map is trivial to tweak once the spike finalizes it; unknown roles pass
|
|
32
|
+
* through lower-cased (better a literal native role than a dropped node).
|
|
33
|
+
*/
|
|
34
|
+
const ROLE_NORMALIZATION = {
|
|
35
|
+
// Android (android.widget.* / android.view.*, matched on the leaf class name).
|
|
36
|
+
Button: "button",
|
|
37
|
+
ImageButton: "button",
|
|
38
|
+
EditText: "textbox",
|
|
39
|
+
AutoCompleteTextView: "textbox",
|
|
40
|
+
TextView: "text",
|
|
41
|
+
CheckBox: "checkbox",
|
|
42
|
+
RadioButton: "radio",
|
|
43
|
+
Switch: "switch",
|
|
44
|
+
ToggleButton: "switch",
|
|
45
|
+
ImageView: "image",
|
|
46
|
+
ViewGroup: "generic",
|
|
47
|
+
LinearLayout: "generic",
|
|
48
|
+
FrameLayout: "generic",
|
|
49
|
+
RelativeLayout: "generic",
|
|
50
|
+
ScrollView: "generic",
|
|
51
|
+
RecyclerView: "list",
|
|
52
|
+
ListView: "list",
|
|
53
|
+
// iOS (idb `type`, AX-prefixed `role` handled by stripAxPrefix below).
|
|
54
|
+
StaticText: "text",
|
|
55
|
+
TextField: "textbox",
|
|
56
|
+
SecureTextField: "textbox",
|
|
57
|
+
SearchField: "searchbox",
|
|
58
|
+
Cell: "listitem",
|
|
59
|
+
Heading: "heading",
|
|
60
|
+
Image: "image",
|
|
61
|
+
Group: "generic",
|
|
62
|
+
Link: "link",
|
|
63
|
+
Application: "application",
|
|
64
|
+
};
|
|
65
|
+
/** Roles that are pure layout containers — only kept if they're a tappable row. */
|
|
66
|
+
const DECORATIVE_GENERIC_ROLES = new Set(["generic", "application"]);
|
|
67
|
+
function normalizeRole(rawRole) {
|
|
68
|
+
const key = stripAxPrefix(rawRole);
|
|
69
|
+
return ROLE_NORMALIZATION[key] ?? key.toLowerCase();
|
|
70
|
+
}
|
|
71
|
+
/** "AXButton" → "Button", "AXStaticText" → "StaticText"; non-AX passes through. */
|
|
72
|
+
function stripAxPrefix(role) {
|
|
73
|
+
return role.startsWith("AX") ? role.slice(2) : role;
|
|
74
|
+
}
|
|
75
|
+
const ANDROID_BOUNDS_RE = /^\[(-?\d+),(-?\d+)\]\[(-?\d+),(-?\d+)\]$/;
|
|
76
|
+
/** Parse `bounds="[x1,y1][x2,y2]"` → Bounds, or null if malformed/zero-area. */
|
|
77
|
+
function parseAndroidBounds(raw) {
|
|
78
|
+
const m = ANDROID_BOUNDS_RE.exec(raw.trim());
|
|
79
|
+
if (!m)
|
|
80
|
+
return null;
|
|
81
|
+
const x1 = Number(m[1]);
|
|
82
|
+
const y1 = Number(m[2]);
|
|
83
|
+
const x2 = Number(m[3]);
|
|
84
|
+
const y2 = Number(m[4]);
|
|
85
|
+
const width = x2 - x1;
|
|
86
|
+
const height = y2 - y1;
|
|
87
|
+
// A zero-area rect ([0,0][0,0], collapsed/off-screen rows) has no tappable
|
|
88
|
+
// center — treat it as malformed so it's dropped, not tapped at (0,0).
|
|
89
|
+
if (width <= 0 || height <= 0)
|
|
90
|
+
return null;
|
|
91
|
+
return { x: x1, y: y1, width, height };
|
|
92
|
+
}
|
|
93
|
+
/** Last segment of a dotted class, e.g. "android.widget.TextView" → "TextView". */
|
|
94
|
+
function androidLeafClass(cls) {
|
|
95
|
+
const dot = cls.lastIndexOf(".");
|
|
96
|
+
return dot >= 0 ? cls.slice(dot + 1) : cls;
|
|
97
|
+
}
|
|
98
|
+
function attr(tag, name) {
|
|
99
|
+
// uiautomator attributes are double-quoted with XML entity escapes. Match the
|
|
100
|
+
// literal attr and unescape the handful of entities uiautomator emits.
|
|
101
|
+
const m = new RegExp(`\\s${name}="([^"]*)"`).exec(tag);
|
|
102
|
+
return m ? unescapeXml(m[1]) : "";
|
|
103
|
+
}
|
|
104
|
+
function unescapeXml(s) {
|
|
105
|
+
return s
|
|
106
|
+
.replace(/&/g, "&")
|
|
107
|
+
.replace(/</g, "<")
|
|
108
|
+
.replace(/>/g, ">")
|
|
109
|
+
.replace(/"/g, '"')
|
|
110
|
+
.replace(/'/g, "'");
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Parse a uiautomator XML dump into a flat list of leaf-significant nodes in
|
|
114
|
+
* document order. The dump is a single line of nested `<node ...>` tags; we
|
|
115
|
+
* rebuild the parent/child nesting from the open/close-tag stream (mirroring the
|
|
116
|
+
* "break after `>`" split the oracle scripts use, but tracking depth so the
|
|
117
|
+
* ancestor-aggregation in `serializeNativeTree` has the real tree).
|
|
118
|
+
*
|
|
119
|
+
* Returns the FLATTENED set of nodes (depth-first, document order) with their
|
|
120
|
+
* raw fields; the serializer decides which to emit and how to aggregate.
|
|
121
|
+
*/
|
|
122
|
+
export function parseUiautomatorXml(xml) {
|
|
123
|
+
const root = buildAndroidTree(xml);
|
|
124
|
+
const out = [];
|
|
125
|
+
const visit = (n) => {
|
|
126
|
+
// Drop nodes with no usable bounds (malformed/zero-area) — they have no
|
|
127
|
+
// tappable center and would corrupt the nodeMap.
|
|
128
|
+
if (n.bounds) {
|
|
129
|
+
const label = n.text || n.contentDesc;
|
|
130
|
+
out.push({
|
|
131
|
+
role: normalizeRole(androidLeafClass(n.role)),
|
|
132
|
+
label,
|
|
133
|
+
bounds: n.bounds,
|
|
134
|
+
clickable: n.clickable,
|
|
135
|
+
hasOwnLabel: label.length > 0,
|
|
136
|
+
resourceId: n.resourceId || undefined,
|
|
137
|
+
space: "px",
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
for (const c of n.children)
|
|
141
|
+
visit(c);
|
|
142
|
+
};
|
|
143
|
+
for (const c of root.children)
|
|
144
|
+
visit(c);
|
|
145
|
+
return out;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Rebuild the uiautomator node tree from the flat tag stream. uiautomator emits
|
|
149
|
+
* `<node ...>...</node>` (container) and `<node .../>` (self-closing leaf) on one
|
|
150
|
+
* line; we tokenize the tags and use an explicit stack so each node's children
|
|
151
|
+
* are its true descendants — required for ancestor-vs-leaf aggregation.
|
|
152
|
+
*/
|
|
153
|
+
function buildAndroidTree(xml) {
|
|
154
|
+
const root = makeRawAndroidNode("", "", "", "", false, null);
|
|
155
|
+
const stack = [root];
|
|
156
|
+
// Match every <node ...> / <node .../> open tag and standalone </node> close.
|
|
157
|
+
// Attribute values are consumed as atomic quoted runs (`"[^"]*"`) so a literal
|
|
158
|
+
// `>` INSIDE a value can't terminate the tag early — uiautomator escapes `&`,
|
|
159
|
+
// `<`, `"` but NOT `>`, so a label like text="Home > Settings" (breadcrumbs)
|
|
160
|
+
// would otherwise truncate the tag and drop the whole node.
|
|
161
|
+
const tagRe = /<node\b(?:[^>"]|"[^"]*")*>|<\/node>/g;
|
|
162
|
+
let m;
|
|
163
|
+
while ((m = tagRe.exec(xml)) !== null) {
|
|
164
|
+
const tag = m[0];
|
|
165
|
+
if (tag === "</node>") {
|
|
166
|
+
if (stack.length > 1)
|
|
167
|
+
stack.pop();
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
// Self-closing is read off the matched tag (`.../>`), not a capture group:
|
|
171
|
+
// the greedy run above swallows the trailing slash, so a `(\/?)` capture
|
|
172
|
+
// can't see it.
|
|
173
|
+
const selfClosing = tag.endsWith("/>");
|
|
174
|
+
const node = makeRawAndroidNode(attr(tag, "class"), attr(tag, "text"), attr(tag, "content-desc"), attr(tag, "resource-id"), attr(tag, "clickable") === "true", parseAndroidBounds(attr(tag, "bounds")));
|
|
175
|
+
stack[stack.length - 1].children.push(node);
|
|
176
|
+
if (!selfClosing)
|
|
177
|
+
stack.push(node);
|
|
178
|
+
}
|
|
179
|
+
return root;
|
|
180
|
+
}
|
|
181
|
+
function makeRawAndroidNode(role, text, contentDesc, resourceId, clickable, bounds) {
|
|
182
|
+
return { role, text, contentDesc, resourceId, clickable, bounds, children: [] };
|
|
183
|
+
}
|
|
184
|
+
/** iOS roles/types that are directly actionable (the device taps their center). */
|
|
185
|
+
const IOS_ACTIONABLE_TYPES = new Set([
|
|
186
|
+
"Button",
|
|
187
|
+
"Link",
|
|
188
|
+
"TextField",
|
|
189
|
+
"SecureTextField",
|
|
190
|
+
"SearchField",
|
|
191
|
+
"Cell",
|
|
192
|
+
"Switch",
|
|
193
|
+
"Checkbox",
|
|
194
|
+
"RadioButton",
|
|
195
|
+
"MenuItem",
|
|
196
|
+
"Tab",
|
|
197
|
+
]);
|
|
198
|
+
/**
|
|
199
|
+
* Parse `idb ui describe-all` JSON (a FLAT array of elements, each with a `frame`
|
|
200
|
+
* in POINTS) into NativeNodes in array order. iOS is already a flat,
|
|
201
|
+
* properly-labeled list — no ancestor walk needed — so `clickable` is derived
|
|
202
|
+
* from the element's role/type and whether it carries a usable label.
|
|
203
|
+
*/
|
|
204
|
+
export function parseIdbDescribeAll(json) {
|
|
205
|
+
let parsed;
|
|
206
|
+
try {
|
|
207
|
+
parsed = JSON.parse(json);
|
|
208
|
+
}
|
|
209
|
+
catch {
|
|
210
|
+
return [];
|
|
211
|
+
}
|
|
212
|
+
if (!Array.isArray(parsed))
|
|
213
|
+
return [];
|
|
214
|
+
const out = [];
|
|
215
|
+
for (const raw of parsed) {
|
|
216
|
+
const bounds = idbFrameToBounds(raw.frame);
|
|
217
|
+
if (!bounds)
|
|
218
|
+
continue; // malformed / zero-area frame → no tappable center
|
|
219
|
+
// Label: prefer the spoken AXLabel; fall back to AXValue (search fields
|
|
220
|
+
// expose their placeholder as AXValue, e.g. "Search"). AXValue is only a
|
|
221
|
+
// STRING fallback — switches/sliders/steppers report it as a number/boolean
|
|
222
|
+
// (a Switch is 1/0), and `.trim()` on those would throw and lose the whole
|
|
223
|
+
// tree to a silent vision fallback. An unlabeled toggle then emits as a bare
|
|
224
|
+
// `[id] switch` (still tappable via its frame center).
|
|
225
|
+
const label = (raw.AXLabel ?? (typeof raw.AXValue === "string" ? raw.AXValue : "")).trim();
|
|
226
|
+
const rawType = raw.type ?? (raw.role ? stripAxPrefix(raw.role) : "");
|
|
227
|
+
const typeKey = stripAxPrefix(rawType);
|
|
228
|
+
const actionable = IOS_ACTIONABLE_TYPES.has(typeKey) && raw.enabled !== false;
|
|
229
|
+
out.push({
|
|
230
|
+
role: normalizeRole(rawType),
|
|
231
|
+
label,
|
|
232
|
+
bounds,
|
|
233
|
+
clickable: actionable,
|
|
234
|
+
hasOwnLabel: label.length > 0,
|
|
235
|
+
resourceId: raw.AXUniqueId ?? undefined,
|
|
236
|
+
space: "points",
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
return out;
|
|
240
|
+
}
|
|
241
|
+
function idbFrameToBounds(frame) {
|
|
242
|
+
if (!frame)
|
|
243
|
+
return null;
|
|
244
|
+
const { x, y, width, height } = frame;
|
|
245
|
+
if (typeof x !== "number" ||
|
|
246
|
+
typeof y !== "number" ||
|
|
247
|
+
typeof width !== "number" ||
|
|
248
|
+
typeof height !== "number" ||
|
|
249
|
+
!Number.isFinite(x) ||
|
|
250
|
+
!Number.isFinite(y) ||
|
|
251
|
+
width <= 0 ||
|
|
252
|
+
height <= 0) {
|
|
253
|
+
return null;
|
|
254
|
+
}
|
|
255
|
+
return { x, y, width, height };
|
|
256
|
+
}
|
|
257
|
+
// ---------------------------------------------------------------------------
|
|
258
|
+
// Serialization — flat NativeNode list → `[id] role "label"` + nodeMap
|
|
259
|
+
// ---------------------------------------------------------------------------
|
|
260
|
+
const NODE_LABEL_MAX_LENGTH = 100;
|
|
261
|
+
/** Stable short-id prefix per space so a mixed log is unambiguous. */
|
|
262
|
+
function shortIdPrefix(space) {
|
|
263
|
+
return space === "px" ? "A" : "I";
|
|
264
|
+
}
|
|
265
|
+
function truncate(text, max) {
|
|
266
|
+
return text.length <= max ? text : text.slice(0, max - 1) + "…";
|
|
267
|
+
}
|
|
268
|
+
/** Collapse runs of whitespace so aggregated multi-line labels read on one line. */
|
|
269
|
+
function normalizeLabel(label) {
|
|
270
|
+
return label.replace(/\s+/g, " ").trim();
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Serialize a flat NativeNode list (from `parseUiautomatorXml` /
|
|
274
|
+
* `parseIdbDescribeAll`) into the `[id] role "label"` string the DOMLocator
|
|
275
|
+
* reasons over, plus a `shortId → bounds` map for local tap resolution.
|
|
276
|
+
*
|
|
277
|
+
* Emission rules (kept tight, like the DOM serializer):
|
|
278
|
+
* - ANCESTOR-VS-LEAF: a CLICKABLE node absorbs its descendants' labels and is
|
|
279
|
+
* emitted with ITS OWN bounds (the tappable row). The descendant
|
|
280
|
+
* label-bearing leaves are then NOT emitted on their own — their text lives
|
|
281
|
+
* on the row. A label-bearing leaf with NO clickable ancestor (e.g. a
|
|
282
|
+
* standalone heading) is emitted directly so on-screen text isn't lost.
|
|
283
|
+
* - Skip pure decoration: a node that is neither clickable nor label-bearing,
|
|
284
|
+
* and a generic/application container that didn't aggregate a label.
|
|
285
|
+
*
|
|
286
|
+
* The input list is depth-first / document order, which is the order the raw
|
|
287
|
+
* parsers produce; we recover ancestry from that order using bounds containment
|
|
288
|
+
* (Android leaves nest inside their clickable row's rect; iOS is already flat).
|
|
289
|
+
*/
|
|
290
|
+
export function serializeNativeTree(nodes) {
|
|
291
|
+
const space = nodes[0]?.space ?? "px";
|
|
292
|
+
const prefix = shortIdPrefix(space);
|
|
293
|
+
// 1) Aggregate descendant labels onto their nearest clickable ancestor. A
|
|
294
|
+
// descendant is a label-bearing node whose bounds sit inside a clickable
|
|
295
|
+
// node's bounds (and is not itself the clickable node). Document order
|
|
296
|
+
// guarantees an ancestor appears before its descendants on Android.
|
|
297
|
+
const clickables = nodes.filter((n) => n.clickable);
|
|
298
|
+
const aggregatedLabel = new Map();
|
|
299
|
+
const consumedAsDescendant = new Set();
|
|
300
|
+
for (const node of nodes) {
|
|
301
|
+
if (!node.hasOwnLabel)
|
|
302
|
+
continue;
|
|
303
|
+
// Find the smallest clickable rect that contains this label node. Skip the
|
|
304
|
+
// node itself (a clickable node keeps its own label).
|
|
305
|
+
const host = smallestContainingClickable(node, clickables);
|
|
306
|
+
if (host && host !== node) {
|
|
307
|
+
const bucket = aggregatedLabel.get(host) ?? [];
|
|
308
|
+
bucket.push(node.label);
|
|
309
|
+
aggregatedLabel.set(host, bucket);
|
|
310
|
+
consumedAsDescendant.add(node);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
// 2) Emit. Walk in document order so ids follow the on-screen reading order.
|
|
314
|
+
const lines = [];
|
|
315
|
+
const nodeMap = new Map();
|
|
316
|
+
let counter = 0;
|
|
317
|
+
for (const node of nodes) {
|
|
318
|
+
if (consumedAsDescendant.has(node))
|
|
319
|
+
continue; // its label moved to the row
|
|
320
|
+
const aggregated = aggregatedLabel.get(node);
|
|
321
|
+
const ownLabel = node.label;
|
|
322
|
+
// A clickable node's label = its own label + any absorbed descendant labels;
|
|
323
|
+
// its own label leads (iOS Buttons carry it directly).
|
|
324
|
+
const combined = normalizeLabel([ownLabel, ...(aggregated ?? [])].filter(Boolean).join(" "));
|
|
325
|
+
const emit = shouldEmit(node, combined);
|
|
326
|
+
if (!emit)
|
|
327
|
+
continue;
|
|
328
|
+
counter += 1;
|
|
329
|
+
const shortId = `${prefix}${counter}`;
|
|
330
|
+
const label = truncate(combined, NODE_LABEL_MAX_LENGTH);
|
|
331
|
+
const labelPart = label ? ` "${label}"` : "";
|
|
332
|
+
lines.push(`[${shortId}] ${node.role}${labelPart}`);
|
|
333
|
+
nodeMap.set(shortId, node.bounds);
|
|
334
|
+
}
|
|
335
|
+
return { simplified: lines.join("\n"), nodeMap };
|
|
336
|
+
}
|
|
337
|
+
/**
|
|
338
|
+
* Decide whether a node makes the simplified tree. Keep it if it's actionable,
|
|
339
|
+
* or it carries (aggregated) text and isn't a bare layout container. Drop pure
|
|
340
|
+
* decoration: unlabeled non-clickable nodes, and generic/application containers
|
|
341
|
+
* that absorbed no label.
|
|
342
|
+
*/
|
|
343
|
+
function shouldEmit(node, combinedLabel) {
|
|
344
|
+
if (node.clickable)
|
|
345
|
+
return true;
|
|
346
|
+
if (!combinedLabel)
|
|
347
|
+
return false;
|
|
348
|
+
// A labeled non-clickable node is real on-screen text (heading, value);
|
|
349
|
+
// keep it unless it's a bare generic container with no useful role signal.
|
|
350
|
+
return !DECORATIVE_GENERIC_ROLES.has(node.role);
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Return the smallest-area clickable node whose bounds CONTAIN `node`'s bounds
|
|
354
|
+
* (its center inside the rect), or null. "Smallest" picks the immediate row, not
|
|
355
|
+
* a giant scroll container wrapping everything.
|
|
356
|
+
*/
|
|
357
|
+
function smallestContainingClickable(node, clickables) {
|
|
358
|
+
const cx = node.bounds.x + node.bounds.width / 2;
|
|
359
|
+
const cy = node.bounds.y + node.bounds.height / 2;
|
|
360
|
+
let best = null;
|
|
361
|
+
let bestArea = Infinity;
|
|
362
|
+
for (const c of clickables) {
|
|
363
|
+
if (c === node) {
|
|
364
|
+
best = c; // a clickable node hosts its own label
|
|
365
|
+
bestArea = c.bounds.width * c.bounds.height;
|
|
366
|
+
continue;
|
|
367
|
+
}
|
|
368
|
+
if (!containsPoint(c.bounds, cx, cy))
|
|
369
|
+
continue;
|
|
370
|
+
const area = c.bounds.width * c.bounds.height;
|
|
371
|
+
if (area < bestArea) {
|
|
372
|
+
best = c;
|
|
373
|
+
bestArea = area;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
return best;
|
|
377
|
+
}
|
|
378
|
+
function containsPoint(b, x, y) {
|
|
379
|
+
return x >= b.x && x <= b.x + b.width && y >= b.y && y <= b.y + b.height;
|
|
380
|
+
}
|
|
381
|
+
/** Center of a node's bounds — the point the device taps. */
|
|
382
|
+
export function boundsCenter(b) {
|
|
383
|
+
return { x: Math.round(b.x + b.width / 2), y: Math.round(b.y + b.height / 2) };
|
|
384
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thin async wrappers over `xcrun simctl` + `idb` for the native-iOS sim path.
|
|
3
|
+
*
|
|
4
|
+
* Two tools, two jobs:
|
|
5
|
+
* - `xcrun simctl` drives the simulator LIFECYCLE (boot detection, install,
|
|
6
|
+
* terminate, launch) and the SCREENSHOT.
|
|
7
|
+
* - `idb` drives UI INPUT (tap/swipe/text/key) and reports the screen
|
|
8
|
+
* geometry (pixels, points, and the scale between them).
|
|
9
|
+
*
|
|
10
|
+
* COORDINATE SPACES (the key difference from Android, where screencap and tap
|
|
11
|
+
* share one pixel space):
|
|
12
|
+
* - `simctl io booted screenshot` writes a PNG in PIXELS (e.g. 1179x2556 @3x).
|
|
13
|
+
* - `idb ui tap/swipe` take POINTS (e.g. 393x852) — pixels / scale.
|
|
14
|
+
* The native sim TAPS in points (de-normalize 0-1000 against the POINT size)
|
|
15
|
+
* but RECORDS in PIXELS: dimensions() returns the pixel size so the loop's
|
|
16
|
+
* round-trip is exact. Recording in points would drift — the point grid (393)
|
|
17
|
+
* is coarser than the 0-1000 normalized grid, so it double-rounds. See
|
|
18
|
+
* IOSDevice for the full derivation.
|
|
19
|
+
*/
|
|
20
|
+
export declare class IosError extends Error {
|
|
21
|
+
constructor(message: string);
|
|
22
|
+
}
|
|
23
|
+
/** Run `xcrun simctl <args>` and return trimmed stdout. */
|
|
24
|
+
export declare function simctl(args: string[], timeoutMs?: number): Promise<string>;
|
|
25
|
+
/** Run `idb <args>` and return trimmed stdout. */
|
|
26
|
+
export declare function idb(args: string[], timeoutMs?: number): Promise<string>;
|
|
27
|
+
/**
|
|
28
|
+
* Assert exactly one simulator is Booted and return its udid. We pin every
|
|
29
|
+
* subsequent idb/simctl call (and the screenshot) to "booted", so multiple
|
|
30
|
+
* booted simulators are ambiguous and rejected.
|
|
31
|
+
*/
|
|
32
|
+
export declare function requireOneBootedSimulator(): Promise<string>;
|
|
33
|
+
/**
|
|
34
|
+
* Screen geometry from `idb describe --json`: PIXEL size, POINT size, and the
|
|
35
|
+
* scale (`density`) between them. Points drive idb ui tap/swipe; pixels are the
|
|
36
|
+
* screenshot's resolution.
|
|
37
|
+
*/
|
|
38
|
+
export interface IosScreen {
|
|
39
|
+
pixelWidth: number;
|
|
40
|
+
pixelHeight: number;
|
|
41
|
+
pointWidth: number;
|
|
42
|
+
pointHeight: number;
|
|
43
|
+
density: number;
|
|
44
|
+
}
|
|
45
|
+
export declare function describeScreen(udid: string): Promise<IosScreen>;
|
|
46
|
+
/**
|
|
47
|
+
* Capture the booted simulator's screen as PNG bytes via
|
|
48
|
+
* `simctl io booted screenshot`. simctl writes to a file path (no reliable
|
|
49
|
+
* stdout in current Xcode), so we round-trip through a temp file.
|
|
50
|
+
*/
|
|
51
|
+
export declare function screenshotPng(): Promise<Buffer>;
|
|
52
|
+
export declare function uiTap(udid: string, x: number, y: number): Promise<void>;
|
|
53
|
+
export declare function uiLongPress(udid: string, x: number, y: number, durationMs?: number): Promise<void>;
|
|
54
|
+
export declare function uiSwipe(udid: string, x1: number, y1: number, x2: number, y2: number, durationMs?: number): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Type text into the focused field. Unlike Android's `adb shell input text`,
|
|
57
|
+
* `idb ui text` handles spaces/unicode/quotes correctly, so no helper IME is
|
|
58
|
+
* needed.
|
|
59
|
+
*/
|
|
60
|
+
export declare function uiText(udid: string, text: string): Promise<void>;
|
|
61
|
+
/**
|
|
62
|
+
* Press a hardware key by HID usage code. `idb ui key 40` is Return/Enter
|
|
63
|
+
* (used to submit a text field).
|
|
64
|
+
*/
|
|
65
|
+
export declare function uiKey(udid: string, keycode: number): Promise<void>;
|
|
66
|
+
/** HID usage code for Return/Enter. */
|
|
67
|
+
export declare const HID_KEY_RETURN = 40;
|
|
68
|
+
/**
|
|
69
|
+
* Capture the current accessibility tree as `idb ui describe-all` JSON (a flat
|
|
70
|
+
* array of elements, each with a POINT frame) and return it. Mirrors the
|
|
71
|
+
* oracle's `ios_describe`: right after a tap the tree can be mid-transition and
|
|
72
|
+
* come back empty/partial, so we retry until we get an array with more than just
|
|
73
|
+
* the root application node. Throws IosError if every attempt yields a trivial
|
|
74
|
+
* tree so the caller can degrade to the vision path.
|
|
75
|
+
*/
|
|
76
|
+
export declare function describeAll(udid: string): Promise<string>;
|
|
77
|
+
export declare function terminateApp(udid: string, bundleId: string): Promise<void>;
|
|
78
|
+
export declare function launchApp(udid: string, bundleId: string): Promise<void>;
|
|
79
|
+
export declare function installApp(udid: string, appPath: string): Promise<void>;
|
|
80
|
+
export declare function isAppInstalled(udid: string, bundleId: string): Promise<boolean>;
|
|
81
|
+
/**
|
|
82
|
+
* Read CFBundleIdentifier from a local `.app`'s Info.plist via `plutil`. Lets us
|
|
83
|
+
* terminate+launch a just-installed app without diffing the app list.
|
|
84
|
+
*/
|
|
85
|
+
export declare function bundleIdFromApp(appPath: string): Promise<string | null>;
|