@ishlabs/cli 0.26.1 → 0.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +4 -0
  2. package/dist/commands/doctor.js +21 -11
  3. package/dist/commands/iteration.js +13 -4
  4. package/dist/commands/study-run.js +12 -12
  5. package/dist/commands/study-screenshots.js +15 -12
  6. package/dist/commands/study.js +22 -3
  7. package/dist/lib/api-client.d.ts +1 -0
  8. package/dist/lib/docs.js +139 -7
  9. package/dist/lib/local-sim/adb.d.ts +35 -2
  10. package/dist/lib/local-sim/adb.js +107 -14
  11. package/dist/lib/local-sim/android.d.ts +5 -3
  12. package/dist/lib/local-sim/android.js +29 -11
  13. package/dist/lib/local-sim/device-pool.d.ts +85 -0
  14. package/dist/lib/local-sim/device-pool.js +316 -0
  15. package/dist/lib/local-sim/device.d.ts +29 -0
  16. package/dist/lib/local-sim/device.js +19 -1
  17. package/dist/lib/local-sim/emulator.d.ts +50 -0
  18. package/dist/lib/local-sim/emulator.js +189 -0
  19. package/dist/lib/local-sim/install.js +23 -3
  20. package/dist/lib/local-sim/ios.d.ts +31 -5
  21. package/dist/lib/local-sim/ios.js +80 -21
  22. package/dist/lib/local-sim/loop.js +199 -9
  23. package/dist/lib/local-sim/native-a11y.d.ts +24 -0
  24. package/dist/lib/local-sim/native-a11y.js +76 -14
  25. package/dist/lib/local-sim/screen-signature.d.ts +77 -0
  26. package/dist/lib/local-sim/screen-signature.js +170 -0
  27. package/dist/lib/local-sim/simctl-provision.d.ts +49 -0
  28. package/dist/lib/local-sim/simctl-provision.js +89 -0
  29. package/dist/lib/local-sim/simctl.d.ts +6 -4
  30. package/dist/lib/local-sim/simctl.js +18 -5
  31. package/dist/lib/local-sim/xcuitest.d.ts +22 -1
  32. package/dist/lib/local-sim/xcuitest.js +38 -6
  33. package/dist/lib/modality.js +7 -2
  34. package/dist/lib/paths.d.ts +1 -0
  35. package/dist/lib/paths.js +3 -0
  36. package/dist/lib/skill-content.js +5 -2
  37. package/dist/lib/upload.d.ts +27 -0
  38. package/dist/lib/upload.js +108 -11
  39. package/package.json +2 -2
@@ -5,10 +5,15 @@
5
5
  * against a SimulationDevice (a Playwright browser today; a native Android
6
6
  * emulator next). The loop is device-agnostic — see device.ts.
7
7
  */
8
+ import { appendFileSync } from "node:fs";
8
9
  import { launchSharedBrowser, FULL_PAGE_HEIGHT_CAP_PX_MOBILE, FULL_PAGE_HEIGHT_CAP_PX_DESKTOP, } from "./browser.js";
9
10
  import { uploadScreenshot } from "./upload.js";
10
11
  import { detectNoVisibleChange, describeAction, classifyStepKind } from "./actions.js";
11
12
  import { createDevice } from "./device.js";
13
+ import { nativeStateResetWarning } from "./ios.js";
14
+ import { provisionDevicePool, maxConcurrentDevices, totalMemBytes, PER_DEVICE_MB, } from "./device-pool.js";
15
+ import { listOnlineSerials } from "./adb.js";
16
+ import { listAvds } from "./emulator.js";
12
17
  import pkg from "../../../package.json" with { type: "json" };
13
18
  import { enableDebug, isDebugEnabled, debugRawResponse, debugNormalizedActions, debugActionExecution, debugForwards, debugStepSummary, debugRecord, } from "./debug.js";
14
19
  /**
@@ -19,6 +24,58 @@ import { enableDebug, isDebugEnabled, debugRawResponse, debugNormalizedActions,
19
24
  function isNativePlatform(platform) {
20
25
  return platform === "android" || platform === "ios";
21
26
  }
27
+ /**
28
+ * Build ONE corpus-dump JSON line capturing everything needed to replay any
29
+ * screen-signature algorithm offline against this observation. Pure (input →
30
+ * string); the caller owns the I/O and the env gating. `app` is the coarse
31
+ * package (android) / bundle id (ios). Each node is projected down to the exact
32
+ * fields `computeScreenSignature` reads, so the line is a faithful replay basis.
33
+ */
34
+ function buildCorpusDumpLine(input) {
35
+ const { coarse } = input;
36
+ return (JSON.stringify({
37
+ ts: input.ts,
38
+ app: (coarse.platform === "android" ? coarse.package : coarse.bundleId) ?? "",
39
+ platform: coarse.platform,
40
+ location: input.location,
41
+ coarse: {
42
+ platform: coarse.platform,
43
+ package: coarse.package ?? null,
44
+ activity: coarse.activity ?? null,
45
+ bundleId: coarse.bundleId ?? null,
46
+ },
47
+ nodes: input.nodes.map((n) => ({
48
+ role: n.role,
49
+ label: n.label,
50
+ resourceId: n.resourceId ?? null,
51
+ scrollable: n.scrollable,
52
+ insideScrollable: n.insideScrollable,
53
+ })),
54
+ signature: input.signature
55
+ ? {
56
+ value: input.signature.value,
57
+ usable: input.signature.usable,
58
+ tokenCount: input.signature.tokenCount,
59
+ }
60
+ : null,
61
+ frame_version_id: input.frameVersionId ?? null,
62
+ action_kind: input.actionKind,
63
+ }) + "\n");
64
+ }
65
+ /**
66
+ * Append one corpus-dump line to `path`. Best-effort: a dump failure (bad path,
67
+ * full disk) is swallowed so the instrumentation can NEVER abort a live sim.
68
+ * Gated entirely by the caller on ISH_DUMP_CORPUS + native source.
69
+ */
70
+ function appendCorpusDumpLine(path, input, log) {
71
+ try {
72
+ appendFileSync(path, buildCorpusDumpLine(input));
73
+ }
74
+ catch (err) {
75
+ const msg = err instanceof Error ? err.message : String(err);
76
+ log(` Warning: corpus dump append failed — ${msg}`);
77
+ }
78
+ }
22
79
  /**
23
80
  * Convert a raw action (from either resolved_actions or output.action.actions)
24
81
  * into the flat LocalStepAction shape used by the executor. Exported for unit
@@ -161,13 +218,59 @@ export async function runLocalSimulations(client, opts) {
161
218
  log("\nCancelling after current step...");
162
219
  };
163
220
  process.on("SIGINT", onSigint);
164
- // Native runs share ONE physical device (emulator / simulator), so they
165
- // can't run in parallel — force sequential regardless of --parallel.
166
221
  const isNativeRun = isNativePlatform(opts.platform);
167
- if (isNativeRun && (opts.parallel ?? 1) > 1) {
168
- log("Native (android/ios) runs drive a single device running sequentially.");
222
+ const requested = opts.parallel ?? opts.participantIds.length;
223
+ // Native (iOS + Android) can drive a POOL of devices concurrently, auto-sized
224
+ // to the host's RAM (and, for Android, the number of AVDs) so a small machine
225
+ // just runs fewer in parallel + queues the rest — never errors. Browser uses
226
+ // the requested parallelism directly.
227
+ const NATIVE_PARALLEL_MAX = 5;
228
+ const nativeParallel = isNativeRun && requested > 1 && opts.participantIds.length > 1;
229
+ let concurrency;
230
+ if (nativeParallel) {
231
+ const cap = Math.min(requested, opts.participantIds.length, NATIVE_PARALLEL_MAX);
232
+ if (opts.platform === "android") {
233
+ // Bound by AVDs we can launch + emulators already online.
234
+ let slots = cap;
235
+ try {
236
+ const [avds, online] = await Promise.all([listAvds(), listOnlineSerials()]);
237
+ slots = avds.length + online.length;
238
+ }
239
+ catch {
240
+ /* fall back to cap if the toolchain query fails */
241
+ }
242
+ concurrency = maxConcurrentDevices({
243
+ totalMemBytes: totalMemBytes(),
244
+ perDeviceMb: PER_DEVICE_MB.android,
245
+ requested: cap,
246
+ deviceCount: Math.max(1, slots),
247
+ });
248
+ }
249
+ else {
250
+ concurrency = maxConcurrentDevices({
251
+ totalMemBytes: totalMemBytes(),
252
+ perDeviceMb: PER_DEVICE_MB.ios,
253
+ requested: cap,
254
+ });
255
+ }
256
+ }
257
+ else {
258
+ if (isNativeRun && requested > 1) {
259
+ log("Native parallel needs --parallel >1 and >1 participant; running sequentially.");
260
+ }
261
+ concurrency = isNativeRun ? 1 : requested;
262
+ }
263
+ // iOS: a bundle-id / system-app target can't be reinstalled, so its data
264
+ // isn't cleared between participants (a local .app IS reinstalled per
265
+ // participant — see ios.ts resolveBundleId). Warn once up front when that
266
+ // could skew a multi-participant run. (--app .app ⇒ reinstallable; a bundle id
267
+ // from --app or the iteration's app_artifact ⇒ not.)
268
+ if (opts.platform === "ios") {
269
+ const reinstallable = !!opts.appPath?.toLowerCase().endsWith(".app");
270
+ const warning = nativeStateResetWarning(reinstallable, opts.participantIds.length);
271
+ if (warning)
272
+ log(warning);
169
273
  }
170
- const concurrency = isNativeRun ? 1 : (opts.parallel ?? opts.participantIds.length);
171
274
  // Native runs stamp the app build onto the iteration once — every
172
275
  // participant in a run drives the same installed build, so dedupe to a
173
276
  // single best-effort POST after the first device resolves its app.
@@ -178,6 +281,15 @@ export async function runLocalSimulations(client, opts) {
178
281
  appBuildReported = true;
179
282
  void reportObservedApp(client, opts.iterationId, platform, build, log);
180
283
  };
284
+ // With a device pool, N workers would each read the app build (a redundant
285
+ // simctl listapps / dumpsys per device). Let only the first worker do it.
286
+ let appBuildClaimed = false;
287
+ const claimAppBuild = () => {
288
+ if (appBuildClaimed)
289
+ return false;
290
+ appBuildClaimed = true;
291
+ return true;
292
+ };
181
293
  try {
182
294
  if (concurrency <= 1 || opts.participantIds.length <= 1) {
183
295
  // Sequential execution — each participant owns its own browser
@@ -197,6 +309,47 @@ export async function runLocalSimulations(client, opts) {
197
309
  }
198
310
  }
199
311
  }
312
+ else if (nativeParallel) {
313
+ // Native device pool — N simulators/emulators, one participant per device.
314
+ const deviceWord = opts.platform === "ios" ? "simulator" : "emulator";
315
+ // nativeParallel ⇒ opts.platform is "ios" | "android" (isNativePlatform true).
316
+ const pool = await provisionDevicePool({ platform: opts.platform, size: concurrency, log });
317
+ try {
318
+ const poolN = pool.devices.length;
319
+ log(`\nRunning ${opts.participantIds.length} ${opts.platform} simulations across a pool of ` +
320
+ `${poolN} ${deviceWord}${poolN === 1 ? "" : "s"}` +
321
+ (poolN < Math.min(requested, opts.participantIds.length, NATIVE_PARALLEL_MAX)
322
+ ? " (auto-sized to fit this machine)"
323
+ : "") +
324
+ "...");
325
+ // Launch all participants; each awaits a free device, so actual
326
+ // concurrency is bounded by the pool size. A finished worker releases
327
+ // its device to the next in line.
328
+ const runOne = async (participantId) => {
329
+ if (cancelled)
330
+ return;
331
+ const participantName = opts.participantNames.get(participantId) ?? participantId;
332
+ const participantLog = (msg) => log(`[${participantName}] ${msg}`);
333
+ const device = await pool.claim();
334
+ try {
335
+ participantLog(`Starting on ${deviceWord} ${device.id}`);
336
+ await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled, reportAppBuild, undefined, device, claimAppBuild);
337
+ log(`Completed: ${participantName}`);
338
+ }
339
+ catch (err) {
340
+ const msg = err instanceof Error ? err.message : String(err);
341
+ log(`Failed: ${participantName} — ${msg}`);
342
+ }
343
+ finally {
344
+ pool.release(device);
345
+ }
346
+ };
347
+ await Promise.allSettled(opts.participantIds.map(runOne));
348
+ }
349
+ finally {
350
+ await pool.teardown();
351
+ }
352
+ }
200
353
  else {
201
354
  // Parallel execution — shared browser, one tab per participant
202
355
  log(`\nRunning ${opts.participantIds.length} simulations in parallel (concurrency: ${concurrency})...`);
@@ -242,7 +395,7 @@ export async function runLocalSimulations(client, opts) {
242
395
  process.off("SIGINT", onSigint);
243
396
  }
244
397
  }
245
- async function runSingleSimulation(client, participantId, participantName, opts, log, isCancelled, onAppBuild, sharedBrowser) {
398
+ async function runSingleSimulation(client, participantId, participantName, opts, log, isCancelled, onAppBuild, sharedBrowser, pooledDevice, claimAppBuild) {
246
399
  // Step 1: Initialize session
247
400
  const initResponse = await client.localSimInit({
248
401
  participant_id: participantId,
@@ -300,6 +453,8 @@ async function runSingleSimulation(client, participantId, participantName, opts,
300
453
  contextValues: session.context_values,
301
454
  sharedBrowser,
302
455
  appPath: opts.appPath,
456
+ deviceId: pooledDevice?.id,
457
+ wdaPort: pooledDevice?.wdaPort,
303
458
  log,
304
459
  });
305
460
  const history = [];
@@ -314,9 +469,10 @@ async function runSingleSimulation(client, participantId, participantName, opts,
314
469
  // Step 3: Launch / navigate the target to its starting point.
315
470
  await device.launchOrReset(launchTarget);
316
471
  // Step 3b: Capture the installed app's build (native only). Best-effort —
317
- // the dedupe in runLocalSimulations keeps this to one POST per run, and a
318
- // failed read or report never disturbs the simulation.
319
- if (onAppBuild) {
472
+ // the dedupe in runLocalSimulations keeps this to one POST per run. With a
473
+ // device pool, only the worker that wins claimAppBuild() reads it (one
474
+ // simctl/dumpsys read total, not one per device).
475
+ if (onAppBuild && (!claimAppBuild || claimAppBuild())) {
320
476
  try {
321
477
  const observed = await device.appBuild?.();
322
478
  if (observed)
@@ -349,6 +505,11 @@ async function runSingleSimulation(client, participantId, participantName, opts,
349
505
  // TODO(perf): backend can downscale before the vision LLM; full-res sent for now.
350
506
  const obs = await device.observe();
351
507
  const currentScreenshot = obs.screenshot;
508
+ // Corpus dump (ISH_DUMP_CORPUS): the action_kind of the step that LED to
509
+ // THIS observation is the inbound lastStepKind (carried from the prior
510
+ // step; reassigned below AFTER the match-frame call). At step 0 nothing
511
+ // preceded this screen, so report it as "initial".
512
+ const inboundActionKind = step === 0 ? "initial" : lastStepKind;
352
513
  // Capture JPEG of observation for upload and recording (pre-action)
353
514
  const obsJpeg = await device.captureScreenshotJpeg();
354
515
  const obsBase64 = obsJpeg.toString("base64");
@@ -580,6 +741,15 @@ async function runSingleSimulation(client, participantId, participantName, opts,
580
741
  // updated AFTER this call for the next iteration.
581
742
  ...(isNative && lastFrameVersionId ? { previous_frame_version_id: lastFrameVersionId } : {}),
582
743
  same_screen_continuation: isNative && (lastStepKind === "scroll" || lastStepKind === "keyboard"),
744
+ // Phase 2: scroll-invariant structural screen signature as an
745
+ // entry/cross-run anchor. Sent ONLY when usable (>= 2 stable chrome
746
+ // ids) — a sparse/empty id-set hashes to a colliding value that
747
+ // would silently over-merge distinct screens, so we omit it and let
748
+ // the backend fall back to Phase-1 continuity. Computed in the
749
+ // device's observe() from this step's parsed a11y tree.
750
+ ...(isNative && obs.screenSignature?.usable
751
+ ? { native_screen_signature: obs.screenSignature.value }
752
+ : {}),
583
753
  });
584
754
  frameVersionId = matchResult.frame_version_id;
585
755
  }
@@ -587,6 +757,26 @@ async function runSingleSimulation(client, participantId, participantName, opts,
587
757
  const msg = err instanceof Error ? err.message : String(err);
588
758
  log(` Warning: frame matching failed — ${msg}`);
589
759
  }
760
+ // Corpus dump (ISH_DUMP_CORPUS, native only): one JSON line per
761
+ // observation with everything needed to replay any screen-signature
762
+ // algorithm offline — the LLM screen label (ground truth), the coarse
763
+ // inputs, the exact parsed NativeNode[], the current algorithm's
764
+ // signature, the backend frame id, and the inbound action_kind. Fully
765
+ // gated and best-effort: zero overhead/behavior change when unset, and a
766
+ // dump failure never aborts the sim. Requires the native observe()'s
767
+ // optional nativeNodes/coarseInputs (browser leaves them undefined).
768
+ const corpusDumpPath = process.env.ISH_DUMP_CORPUS;
769
+ if (corpusDumpPath && isNative && obs.nativeNodes && obs.coarseInputs) {
770
+ appendCorpusDumpLine(corpusDumpPath, {
771
+ ts: step,
772
+ location: stepResponse.current_location,
773
+ coarse: obs.coarseInputs,
774
+ nodes: obs.nativeNodes,
775
+ signature: obs.screenSignature,
776
+ frameVersionId,
777
+ actionKind: inboundActionKind,
778
+ }, log);
779
+ }
590
780
  // Carry THIS step's logical-screen classification + matched frame
591
781
  // forward for the NEXT iteration's match-frame call (consumed above as
592
782
  // last*). Classify after the call so ordering is consume-then-update.
@@ -45,6 +45,23 @@ export interface NativeNode {
45
45
  /** True for nodes whose own text/desc is a label (used to aggregate onto rows). */
46
46
  hasOwnLabel: boolean;
47
47
  resourceId?: string;
48
+ /**
49
+ * True for a scroll container (Android `scrollable="true"`; iOS
50
+ * ScrollView/Table/CollectionView). The screen-signature uses it to keep the
51
+ * container's OWN id as durable chrome — see screen-signature.ts.
52
+ */
53
+ scrollable: boolean;
54
+ /**
55
+ * True iff this node has a scrollable ANCESTOR — i.e. it is scroll CONTENT that
56
+ * shifts under a scroll. Computed STRUCTURALLY during parsing (tree ancestry),
57
+ * not geometrically: an overlay/FAB that merely sits inside a list's rect is
58
+ * NOT marked (it isn't a tree descendant), and on iOS the descendants of a
59
+ * pruned (isAccessible=0) scroll container still inherit the flag. The
60
+ * screen-signature excludes these from the stable token set so a scroll never
61
+ * changes the signature — see screen-signature.ts. A scroll container itself
62
+ * has `scrollable=true` but `insideScrollable=false` (unless nested).
63
+ */
64
+ insideScrollable: boolean;
48
65
  space: CoordinateSpace;
49
66
  }
50
67
  export interface NativeTree {
@@ -64,6 +81,13 @@ export interface NativeTree {
64
81
  * raw fields; the serializer decides which to emit and how to aggregate.
65
82
  */
66
83
  export declare function parseUiautomatorXml(xml: string): NativeNode[];
84
+ /**
85
+ * The foreground app's package name from a uiautomator dump's `package="..."`
86
+ * attribute. uiautomator stamps every node with the owning package; the first
87
+ * one is the foreground app. Used as a coarse-token input for the screen
88
+ * signature (see screen-signature.ts). Returns "" when absent (best-effort).
89
+ */
90
+ export declare function androidPackage(xml: string): string;
67
91
  /**
68
92
  * Parse WDA's `GET /source?format=json` — a NESTED accessibility tree — into the
69
93
  * FLAT, depth-first `NativeNode[]` (POINTS) that `parseXcuiHierarchy` produces,
@@ -122,7 +122,11 @@ function unescapeXml(s) {
122
122
  export function parseUiautomatorXml(xml) {
123
123
  const root = buildAndroidTree(xml);
124
124
  const out = [];
125
- const visit = (n) => {
125
+ // `parentScrollable` is true iff any ANCESTOR (not this node) had
126
+ // scrollable=true — i.e. this node is scroll CONTENT. Threaded down the
127
+ // descent so the screen-signature can exclude content structurally (a scroll
128
+ // moves these; chrome outside any scrollable keeps the signature stable).
129
+ const visit = (n, parentScrollable) => {
126
130
  // Drop nodes with no usable bounds (malformed/zero-area) — they have no
127
131
  // tappable center and would corrupt the nodeMap.
128
132
  if (n.bounds) {
@@ -134,14 +138,20 @@ export function parseUiautomatorXml(xml) {
134
138
  clickable: n.clickable,
135
139
  hasOwnLabel: label.length > 0,
136
140
  resourceId: n.resourceId || undefined,
141
+ scrollable: n.scrollable,
142
+ insideScrollable: parentScrollable,
137
143
  space: "px",
138
144
  });
139
145
  }
146
+ // A node inside a scrollable makes ALL its descendants scroll content; the
147
+ // container's own flag stays false (it's durable chrome) but its children
148
+ // inherit true.
149
+ const childScrollable = parentScrollable || n.scrollable;
140
150
  for (const c of n.children)
141
- visit(c);
151
+ visit(c, childScrollable);
142
152
  };
143
153
  for (const c of root.children)
144
- visit(c);
154
+ visit(c, false);
145
155
  return out;
146
156
  }
147
157
  /**
@@ -151,7 +161,7 @@ export function parseUiautomatorXml(xml) {
151
161
  * are its true descendants — required for ancestor-vs-leaf aggregation.
152
162
  */
153
163
  function buildAndroidTree(xml) {
154
- const root = makeRawAndroidNode("", "", "", "", false, null);
164
+ const root = makeRawAndroidNode("", "", "", "", false, false, null);
155
165
  const stack = [root];
156
166
  // Match every <node ...> / <node .../> open tag and standalone </node> close.
157
167
  // Attribute values are consumed as atomic quoted runs (`"[^"]*"`) so a literal
@@ -171,19 +181,34 @@ function buildAndroidTree(xml) {
171
181
  // the greedy run above swallows the trailing slash, so a `(\/?)` capture
172
182
  // can't see it.
173
183
  const selfClosing = tag.endsWith("/>");
174
- const node = makeRawAndroidNode(attr(tag, "class"), attr(tag, "text"), attr(tag, "content-desc"), attr(tag, "resource-id"), attr(tag, "clickable") === "true", parseAndroidBounds(attr(tag, "bounds")));
184
+ const node = makeRawAndroidNode(attr(tag, "class"), attr(tag, "text"), attr(tag, "content-desc"), attr(tag, "resource-id"), attr(tag, "clickable") === "true", attr(tag, "scrollable") === "true", parseAndroidBounds(attr(tag, "bounds")));
175
185
  stack[stack.length - 1].children.push(node);
176
186
  if (!selfClosing)
177
187
  stack.push(node);
178
188
  }
179
189
  return root;
180
190
  }
181
- function makeRawAndroidNode(role, text, contentDesc, resourceId, clickable, bounds) {
182
- return { role, text, contentDesc, resourceId, clickable, bounds, children: [] };
191
+ function makeRawAndroidNode(role, text, contentDesc, resourceId, clickable, scrollable, bounds) {
192
+ return { role, text, contentDesc, resourceId, clickable, scrollable, bounds, children: [] };
193
+ }
194
+ /**
195
+ * The foreground app's package name from a uiautomator dump's `package="..."`
196
+ * attribute. uiautomator stamps every node with the owning package; the first
197
+ * one is the foreground app. Used as a coarse-token input for the screen
198
+ * signature (see screen-signature.ts). Returns "" when absent (best-effort).
199
+ */
200
+ export function androidPackage(xml) {
201
+ const m = /<node\b[^>]*?\spackage="([^"]*)"/.exec(xml);
202
+ return m ? unescapeXml(m[1]) : "";
183
203
  }
184
204
  // ---------------------------------------------------------------------------
185
205
  // iOS — shared helpers for the WebDriverAgent (XCUITest) /source parser below
186
206
  // ---------------------------------------------------------------------------
207
+ /** iOS container types whose CONTENT scrolls. A node of one of these types (or
208
+ * any descendant of one) is marked `insideScrollable` so the screen signature
209
+ * excludes scroll content structurally while keeping the container's own id
210
+ * (see screen-signature.ts). */
211
+ const IOS_SCROLLABLE_TYPES = new Set(["ScrollView", "Table", "CollectionView"]);
187
212
  /** iOS roles/types that are directly actionable (the device taps their center). */
188
213
  const IOS_ACTIONABLE_TYPES = new Set([
189
214
  "Button",
@@ -258,15 +283,47 @@ export function parseXcuiHierarchy(json) {
258
283
  if (!root || typeof root !== "object")
259
284
  return [];
260
285
  const out = [];
261
- const visit = (n) => {
286
+ // `parentScrollable` is true iff this node OR any ANCESTOR is a scroll
287
+ // container. CRITICAL (the M1 fix): WDA's scroll CONTAINER is isAccessible=0
288
+ // and therefore NOT emitted, but its descendants are scroll content all the
289
+ // same — so the flag is threaded down the recursion regardless of whether the
290
+ // container node itself is emitted. The screen-signature excludes these
291
+ // structurally, so a scroll never changes the iOS signature.
292
+ const visit = (n, parentScrollable) => {
293
+ const rawType = n.type ?? "";
294
+ const typeKey = stripAxPrefix(rawType);
295
+ const isScroll = IOS_SCROLLABLE_TYPES.has(typeKey);
262
296
  const bounds = frameToBounds(n.rect ?? undefined);
297
+ // iOS NAVIGATION-BAR TITLE recovery. The bar carries the screen title in its
298
+ // `name`, but WDA marks the bar isAccessible=0 (so it's pruned) AND the large
299
+ // title StaticText scrolls WITH the content (insideScrollable). The title is
300
+ // then lost from the signature, silently OVER-MERGING distinct pushed screens
301
+ // (proven live: iOS Settings General/Accessibility/Privacy all reduced to the
302
+ // back button's parent label {tx:settings} → one frame). Emit the bar's name
303
+ // as a stable chrome node — it sits ABOVE the scroll (insideScrollable=false)
304
+ // and is scroll-invariant (constant as the large title collapses). Emitted
305
+ // first so `iosNavTitle` (find role==="navigationbar") sees the titled bar.
306
+ if (bounds && typeKey === "NavigationBar" && wdaTruthy(n.isVisible)) {
307
+ const navName = (n.name ?? "").trim();
308
+ if (navName) {
309
+ out.push({
310
+ role: normalizeRole(rawType),
311
+ label: navName,
312
+ bounds,
313
+ clickable: false,
314
+ hasOwnLabel: true,
315
+ resourceId: undefined,
316
+ scrollable: false,
317
+ insideScrollable: false,
318
+ space: "points",
319
+ });
320
+ }
321
+ }
263
322
  if (bounds && wdaTruthy(n.isAccessible) && wdaTruthy(n.isVisible)) {
264
323
  // Prefer the spoken label; fall back to a STRING value (search fields
265
324
  // expose their placeholder as `value`). Non-string values (a Switch's 1/0)
266
325
  // are ignored for the label, exactly like the idb path.
267
326
  const label = (n.label ?? (typeof n.value === "string" ? n.value : "")).trim();
268
- const rawType = n.type ?? "";
269
- const typeKey = stripAxPrefix(rawType);
270
327
  // `isEnabled` absent ⇒ assume enabled (WDA omits it on always-enabled types).
271
328
  const enabled = n.isEnabled == null ? true : wdaTruthy(n.isEnabled);
272
329
  const actionable = IOS_ACTIONABLE_TYPES.has(typeKey) && enabled;
@@ -277,16 +334,21 @@ export function parseXcuiHierarchy(json) {
277
334
  clickable: actionable,
278
335
  hasOwnLabel: label.length > 0,
279
336
  resourceId: (n.name || n.rawIdentifier) ?? undefined,
337
+ scrollable: isScroll,
338
+ insideScrollable: parentScrollable,
280
339
  space: "points",
281
340
  });
282
341
  }
283
342
  // Recurse into ALL children — an accessible element can nest inside a
284
- // non-accessible container (the Cell wrapping the Button), so we must not
285
- // prune the walk by accessibility, only the emission.
343
+ // non-accessible container (the Cell wrapping the Button, or the pruned
344
+ // scroll container), so we must not prune the walk by accessibility, only
345
+ // the emission. The scroll flag propagates onto descendants even though the
346
+ // container itself wasn't emitted.
347
+ const childScrollable = parentScrollable || isScroll;
286
348
  for (const c of n.children ?? [])
287
- visit(c);
349
+ visit(c, childScrollable);
288
350
  };
289
- visit(root);
351
+ visit(root, false);
290
352
  return out;
291
353
  }
292
354
  // ---------------------------------------------------------------------------
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Native "screen signature" v2 — a SCROLL-INVARIANT structural identity for a
3
+ * logical native screen, derived from the accessibility tree, sent to the
4
+ * backend as an entry/cross-run frame anchor (Phase 2 of native frame
5
+ * continuity; Phase 1 reuses the prior frame on pure scroll/keyboard steps).
6
+ *
7
+ * FCIS: this module is PURE (NativeNode[] + coarse inputs in, signature out) —
8
+ * no device access. The device gathers the coarse inputs (foreground activity /
9
+ * bundle id) and the parsed tree; this turns them into `{value, usable}`.
10
+ *
11
+ * The signature has two parts:
12
+ * coarse — a cheap, almost-always-available anchor (android `package|activity`,
13
+ * ios `bundleId|navTitle`).
14
+ * tokens — the persistent CHROME tokens that are NOT scroll content. Each
15
+ * chrome node contributes its resource-id (`id:…`) AND its label
16
+ * (`tx:…`) when present. This is what makes the signature
17
+ * scroll-invariant AND lets two same-activity screens be told apart.
18
+ *
19
+ * WHY v2 (two verified gaps in the id-only v1):
20
+ * 1. LABELS close the shared-chrome OVER-MERGE. A single-Activity app — Jetpack
21
+ * Compose (exposes NO resource-ids beyond the framework `android:id/content`)
22
+ * or a View app with a fixed toolbar+container shared across fragments —
23
+ * gives two DISTINCT screens the SAME id-set → identical signature → SILENT
24
+ * over-merge (the cardinal failure). But those screens DO differ in chrome
25
+ * LABELS (a home screen vs a settings sub-screen show different toolbar /
26
+ * button text). Including labels makes distinct screens produce distinct
27
+ * signatures, and makes Compose usable at all (label-only tokens).
28
+ * 2. STRUCTURAL scroll-exclusion replaces v1's geometric `contains()`. v1
29
+ * excluded scroll content by bounds-containment, which (a) mis-flagged an
30
+ * overlay/FAB sitting inside a list's rect as content (→ could over-merge),
31
+ * and (b) on iOS the scroll CONTAINER is isAccessible=0 and pruned from the
32
+ * NativeNode[], so geometric exclusion never fired (scroll changed the
33
+ * signature → over-split, feature inert). v2 excludes by TREE STRUCTURE: a
34
+ * node is content iff `insideScrollable` (it has a scrollable ANCESTOR),
35
+ * computed during parsing — see native-a11y.ts. The scroll container's OWN
36
+ * tokens are kept (it's durable chrome; `insideScrollable` is about
37
+ * descendants).
38
+ *
39
+ * The remaining failure mode after v2 is SAFE: dynamic chrome labels (a live
40
+ * clock, an unread badge) cause OVER-SPLIT (a new frame), never over-merge — the
41
+ * backend just mints a fresh frame, which is the conservative direction.
42
+ *
43
+ * USABLE GUARD (load-bearing, unchanged in spirit): `usable` is true only with
44
+ * >= MIN_STABLE_TOKENS tokens. A signature derived from an empty/sparse token
45
+ * set must NEVER be sent — sha1("") (and any near-empty set) collides across
46
+ * distinct screens and would silently over-merge them. When unusable the caller
47
+ * omits the field entirely and the backend falls back to Phase-1 continuity.
48
+ * This is the SAFE default: Flutter (no a11y tree) and the sparsest screens
49
+ * degrade here; id-rich Android and label-rich Compose are the validated wins.
50
+ */
51
+ import type { NativeNode } from "./native-a11y.js";
52
+ /** Minimum stable-chrome tokens for a signature to be usable (sent to the backend). */
53
+ export declare const MIN_STABLE_TOKENS = 2;
54
+ /** Coarse-token inputs gathered from the device (cheap, almost-always-available). */
55
+ export interface CoarseInputs {
56
+ platform: "android" | "ios";
57
+ /** Android: foreground app package (uiautomator `package` attr). */
58
+ package?: string;
59
+ /** Android: foreground activity (`pkg/activity` from dumpsys). */
60
+ activity?: string;
61
+ /** iOS: active app bundle id (WDA /wda/activeAppInfo). navTitle is derived here. */
62
+ bundleId?: string;
63
+ }
64
+ export interface ScreenSignature {
65
+ /** `platform|coarse|sha1(tokens)` — the value sent as native_screen_signature. */
66
+ value: string;
67
+ /** True only with >= MIN_STABLE_TOKENS tokens; the caller omits the field when false. */
68
+ usable: boolean;
69
+ /** Number of stable chrome tokens — the guard's basis. */
70
+ tokenCount: number;
71
+ }
72
+ /**
73
+ * Compute the screen signature from this step's parsed tree + coarse inputs.
74
+ * `value` is `platform|coarse|sha1(tokens)`; `usable` gates whether it's safe to
75
+ * send (>= MIN_STABLE_TOKENS distinct stable chrome tokens).
76
+ */
77
+ export declare function computeScreenSignature(nodes: NativeNode[], coarse: CoarseInputs): ScreenSignature;