@ishlabs/cli 0.26.1 → 0.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +4 -0
  2. package/dist/commands/doctor.js +21 -11
  3. package/dist/commands/iteration.js +13 -4
  4. package/dist/commands/study-run.js +12 -12
  5. package/dist/commands/study-screenshots.js +15 -12
  6. package/dist/commands/study.js +22 -3
  7. package/dist/lib/api-client.d.ts +1 -0
  8. package/dist/lib/docs.js +139 -7
  9. package/dist/lib/local-sim/adb.d.ts +35 -2
  10. package/dist/lib/local-sim/adb.js +107 -14
  11. package/dist/lib/local-sim/android.d.ts +5 -3
  12. package/dist/lib/local-sim/android.js +29 -11
  13. package/dist/lib/local-sim/device-pool.d.ts +85 -0
  14. package/dist/lib/local-sim/device-pool.js +316 -0
  15. package/dist/lib/local-sim/device.d.ts +29 -0
  16. package/dist/lib/local-sim/device.js +19 -1
  17. package/dist/lib/local-sim/emulator.d.ts +50 -0
  18. package/dist/lib/local-sim/emulator.js +189 -0
  19. package/dist/lib/local-sim/install.js +23 -3
  20. package/dist/lib/local-sim/ios.d.ts +31 -5
  21. package/dist/lib/local-sim/ios.js +80 -21
  22. package/dist/lib/local-sim/loop.js +199 -9
  23. package/dist/lib/local-sim/native-a11y.d.ts +24 -0
  24. package/dist/lib/local-sim/native-a11y.js +76 -14
  25. package/dist/lib/local-sim/screen-signature.d.ts +77 -0
  26. package/dist/lib/local-sim/screen-signature.js +170 -0
  27. package/dist/lib/local-sim/simctl-provision.d.ts +49 -0
  28. package/dist/lib/local-sim/simctl-provision.js +89 -0
  29. package/dist/lib/local-sim/simctl.d.ts +6 -4
  30. package/dist/lib/local-sim/simctl.js +18 -5
  31. package/dist/lib/local-sim/xcuitest.d.ts +22 -1
  32. package/dist/lib/local-sim/xcuitest.js +38 -6
  33. package/dist/lib/modality.js +7 -2
  34. package/dist/lib/paths.d.ts +1 -0
  35. package/dist/lib/paths.js +3 -0
  36. package/dist/lib/skill-content.js +5 -2
  37. package/dist/lib/upload.d.ts +27 -0
  38. package/dist/lib/upload.js +108 -11
  39. package/package.json +2 -2
@@ -14,6 +14,8 @@
14
14
  import type { Browser } from "playwright-core";
15
15
  import type { LocalStepAction, LocalSimBrowserOptions, LocalTabInfo, ContextValue } from "./types.js";
16
16
  import type { BrowserSession } from "./browser.js";
17
+ import type { ScreenSignature, CoarseInputs } from "./screen-signature.js";
18
+ import type { NativeNode } from "./native-a11y.js";
17
19
  /**
18
20
  * One observation of the target's current state.
19
21
  *
@@ -39,6 +41,29 @@ export interface DeviceObservation {
39
41
  documentHeight: number;
40
42
  /** Open-tab snapshot (browser-only; empty for native). */
41
43
  tabs: LocalTabInfo[];
44
+ /**
45
+ * Native only: the scroll-invariant structural "screen signature" computed
46
+ * from this observation's a11y tree (see screen-signature.ts). The loop sends
47
+ * `value` as the match-frame anchor ONLY when `usable` is true; browser
48
+ * targets omit it. Undefined when the platform doesn't compute one.
49
+ */
50
+ screenSignature?: ScreenSignature;
51
+ /**
52
+ * Native only, corpus-dump only: the PARSED a11y nodes that
53
+ * `computeScreenSignature` consumed for this observation (the exact array, so
54
+ * any signature algorithm can be replayed offline against it). Populated by the
55
+ * android/ios `observe()`; the browser leaves it undefined. Only surfaced for
56
+ * the `ISH_DUMP_CORPUS` instrumentation in loop.ts — nothing in the live path
57
+ * reads it.
58
+ */
59
+ nativeNodes?: NativeNode[];
60
+ /**
61
+ * Native only, corpus-dump only: the `CoarseInputs` (platform / package /
62
+ * activity / bundleId) fed into `computeScreenSignature` for this observation.
63
+ * Populated by the android/ios `observe()`; the browser leaves it undefined.
64
+ * Same instrumentation-only purpose as `nativeNodes`.
65
+ */
66
+ coarseInputs?: CoarseInputs;
42
67
  }
43
68
  /**
44
69
  * Result of executing one action against the target.
@@ -158,5 +183,9 @@ export declare function createDevice(platform: string, opts: {
158
183
  sharedBrowser?: Browser;
159
184
  /** Native: local .apk/.app path to install or a package/bundle id to launch. */
160
185
  appPath?: string;
186
+ /** Native pool: the device to drive — iOS simulator udid or Android adb serial. */
187
+ deviceId?: string;
188
+ /** iOS pool: the per-device WDA port. */
189
+ wdaPort?: number;
161
190
  log?: (msg: string) => void;
162
191
  }): Promise<SimulationDevice>;
@@ -132,17 +132,35 @@ export async function createDevice(platform, opts) {
132
132
  }
133
133
  case "android": {
134
134
  const { AndroidDevice } = await import("./android.js");
135
- return new AndroidDevice({
135
+ const dev = new AndroidDevice({
136
136
  appPath: opts.appPath,
137
137
  contextValues: opts.contextValues,
138
138
  log: opts.log,
139
139
  });
140
+ if (!opts.deviceId)
141
+ return dev; // single-device path — unchanged
142
+ // Parallel pool: pin every adb call this device makes to its serial.
143
+ // A Proxy runs each method inside withAdbSerial so AsyncLocalStorage
144
+ // carries the serial through all the device's internal adb calls — no
145
+ // per-method threading, and concurrent devices stay isolated.
146
+ const { withAdbSerial } = await import("./adb.js");
147
+ const serial = opts.deviceId;
148
+ return new Proxy(dev, {
149
+ get(target, prop, receiver) {
150
+ const val = Reflect.get(target, prop, receiver);
151
+ return typeof val === "function"
152
+ ? (...args) => withAdbSerial(serial, () => val.apply(target, args))
153
+ : val;
154
+ },
155
+ });
140
156
  }
141
157
  case "ios": {
142
158
  const { IOSDevice } = await import("./ios.js");
143
159
  return new IOSDevice({
144
160
  appPath: opts.appPath,
145
161
  contextValues: opts.contextValues,
162
+ udid: opts.deviceId,
163
+ wdaPort: opts.wdaPort,
146
164
  log: opts.log,
147
165
  });
148
166
  }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Android emulator (AVD) provisioning for parallel runs: list AVDs, launch
3
+ * tuned/headless emulators on distinct ports, wait for boot, kill them.
4
+ *
5
+ * The pool launches these and tears them down; the per-device adb routing is
6
+ * handled by `withAdbSerial` (see adb.ts), so this module only deals with the
7
+ * emulator PROCESS lifecycle (mirrors how connect.ts spawns/kills cloudflared).
8
+ */
9
+ import { type ChildProcess } from "node:child_process";
10
+ export declare class EmulatorError extends Error {
11
+ constructor(message: string);
12
+ }
13
+ /**
14
+ * Rewrite the `path=` / `path.rel=` lines of an AVD's pointer `.ini` to point at
15
+ * the clone's dir. Pure (tested).
16
+ */
17
+ export declare function rewriteAvdPointerIni(text: string, newAvdDirAbs: string): string;
18
+ /**
19
+ * Clone an existing AVD by file-copy — NO avdmanager (and therefore no JDK
20
+ * dependency, which `avdmanager` needs and many machines lack). Copies the
21
+ * `.avd` dir minus its running-state, rewrites the pointer `.ini` paths and the
22
+ * `AvdId` / displayname. Turns "you need N AVDs" into "you need ONE".
23
+ */
24
+ export declare function cloneAvd(source: string, newName: string): void;
25
+ /** Delete a (cloned) AVD's files. Best-effort, no avdmanager needed. */
26
+ export declare function deleteAvd(name: string): void;
27
+ /** AVD names available on this machine (`emulator -list-avds`). */
28
+ export declare function listAvds(): Promise<string[]>;
29
+ export interface SpawnedEmulator {
30
+ child: ChildProcess;
31
+ /** adb serial for this instance, e.g. "emulator-5554". */
32
+ serial: string;
33
+ port: number;
34
+ }
35
+ /**
36
+ * Launch an AVD as a tuned, lightweight, (by default) headless emulator on a
37
+ * specific console port. The serial is deterministically `emulator-<port>`.
38
+ * The flags keep it small so many fit on a normal machine: no window, software
39
+ * GPU, capped RAM, no boot animation / audio / snapshot writeback.
40
+ */
41
+ export declare function spawnEmulator(avd: string, port: number, opts?: {
42
+ headless?: boolean;
43
+ memMb?: number;
44
+ }): SpawnedEmulator;
45
+ /** Console ports for N emulators: 5554, 5556, 5558, … (adb wants even ports). */
46
+ export declare function emulatorPorts(count: number, base?: number): number[];
47
+ /** Wait until `serial` is online AND `sys.boot_completed` is 1. */
48
+ export declare function waitForBoot(serial: string, timeoutMs?: number): Promise<void>;
49
+ /** Gracefully stop an emulator (`adb -s <serial> emu kill`). Best-effort. */
50
+ export declare function emuKill(serial: string): Promise<void>;
@@ -0,0 +1,189 @@
1
+ /**
2
+ * Android emulator (AVD) provisioning for parallel runs: list AVDs, launch
3
+ * tuned/headless emulators on distinct ports, wait for boot, kill them.
4
+ *
5
+ * The pool launches these and tears them down; the per-device adb routing is
6
+ * handled by `withAdbSerial` (see adb.ts), so this module only deals with the
7
+ * emulator PROCESS lifecycle (mirrors how connect.ts spawns/kills cloudflared).
8
+ */
9
+ import { spawn, execFile, execFileSync } from "node:child_process";
10
+ import { existsSync, readFileSync, writeFileSync, cpSync, rmSync } from "node:fs";
11
+ import { join, basename } from "node:path";
12
+ import { homedir } from "node:os";
13
+ import { promisify } from "node:util";
14
+ import { withAdbSerial, adb, adbShell } from "./adb.js";
15
+ const execFileAsync = promisify(execFile);
16
+ /** Candidate Android SDK roots (env first, then the OS defaults). */
17
+ function sdkRoots() {
18
+ return [
19
+ process.env.ANDROID_HOME,
20
+ process.env.ANDROID_SDK_ROOT,
21
+ join(homedir(), "Library", "Android", "sdk"),
22
+ join(homedir(), "Android", "Sdk"),
23
+ ].filter(Boolean);
24
+ }
25
+ export class EmulatorError extends Error {
26
+ constructor(message) {
27
+ super(message);
28
+ this.name = "EmulatorError";
29
+ }
30
+ }
31
+ /** Resolve the `emulator` binary: ISH_EMULATOR → SDK → PATH. */
32
+ function findEmulator() {
33
+ const fromEnv = process.env.ISH_EMULATOR;
34
+ if (fromEnv && existsSync(fromEnv))
35
+ return fromEnv;
36
+ for (const home of sdkRoots()) {
37
+ const p = join(home, "emulator", "emulator");
38
+ if (existsSync(p))
39
+ return p;
40
+ }
41
+ try {
42
+ execFileSync(process.platform === "win32" ? "where" : "which", ["emulator"], { stdio: "ignore" });
43
+ return "emulator";
44
+ }
45
+ catch {
46
+ return null;
47
+ }
48
+ }
49
+ /** The AVD home dir (where `<name>.avd/` + `<name>.ini` live). */
50
+ function avdHome() {
51
+ return process.env.ANDROID_AVD_HOME || join(homedir(), ".android", "avd");
52
+ }
53
+ /**
54
+ * Running-state / lock files we must NOT copy into a clone — the emulator
55
+ * regenerates them on boot, and copying them would carry over locks, snapshots,
56
+ * and the running userdata overlay (also a needless multi-GB copy).
57
+ */
58
+ const TRANSIENT_AVD_ENTRIES = new Set([
59
+ "snapshots",
60
+ "snapshot.lock.lock",
61
+ "snapshot.trace",
62
+ "read-snapshot.txt",
63
+ "multiinstance.lock",
64
+ "hardware-qemu.ini",
65
+ "hardware-qemu.ini.lock",
66
+ "userdata-qemu.img",
67
+ "userdata-qemu.img.qcow2",
68
+ "cache.img.qcow2",
69
+ "encryptionkey.img.qcow2",
70
+ "tmpAdbCmds",
71
+ "version_num.cache",
72
+ ]);
73
+ /**
74
+ * Rewrite the `path=` / `path.rel=` lines of an AVD's pointer `.ini` to point at
75
+ * the clone's dir. Pure (tested).
76
+ */
77
+ export function rewriteAvdPointerIni(text, newAvdDirAbs) {
78
+ const rel = `avd/${basename(newAvdDirAbs)}`;
79
+ return text
80
+ .replace(/^path=.*$/m, `path=${newAvdDirAbs}`)
81
+ .replace(/^path\.rel=.*$/m, `path.rel=${rel}`);
82
+ }
83
+ /**
84
+ * Clone an existing AVD by file-copy — NO avdmanager (and therefore no JDK
85
+ * dependency, which `avdmanager` needs and many machines lack). Copies the
86
+ * `.avd` dir minus its running-state, rewrites the pointer `.ini` paths and the
87
+ * `AvdId` / displayname. Turns "you need N AVDs" into "you need ONE".
88
+ */
89
+ export function cloneAvd(source, newName) {
90
+ const home = avdHome();
91
+ const srcDir = join(home, `${source}.avd`);
92
+ const srcIni = join(home, `${source}.ini`);
93
+ const dstDir = join(home, `${newName}.avd`);
94
+ const dstIni = join(home, `${newName}.ini`);
95
+ if (!existsSync(srcDir)) {
96
+ throw new EmulatorError(`Cannot clone AVD "${source}" — ${srcDir} not found.`);
97
+ }
98
+ // Copy the data dir, skipping running-state/lock files (also avoids a needless
99
+ // multi-GB copy of the live userdata overlay).
100
+ cpSync(srcDir, dstDir, {
101
+ recursive: true,
102
+ filter: (src) => !TRANSIENT_AVD_ENTRIES.has(basename(src)),
103
+ });
104
+ // Pointer .ini (paths) and config.ini (AvdId / displayname) → the new name.
105
+ if (existsSync(srcIni)) {
106
+ writeFileSync(dstIni, rewriteAvdPointerIni(readFileSync(srcIni, "utf8"), dstDir));
107
+ }
108
+ const cfgPath = join(dstDir, "config.ini");
109
+ if (existsSync(cfgPath)) {
110
+ const cfg = readFileSync(cfgPath, "utf8")
111
+ .replace(/^AvdId=.*$/m, `AvdId=${newName}`)
112
+ .replace(/^avd\.ini\.displayname=.*$/m, `avd.ini.displayname=${newName}`);
113
+ writeFileSync(cfgPath, cfg);
114
+ }
115
+ }
116
+ /** Delete a (cloned) AVD's files. Best-effort, no avdmanager needed. */
117
+ export function deleteAvd(name) {
118
+ const home = avdHome();
119
+ rmSync(join(home, `${name}.avd`), { recursive: true, force: true });
120
+ rmSync(join(home, `${name}.ini`), { force: true });
121
+ }
122
+ /** AVD names available on this machine (`emulator -list-avds`). */
123
+ export async function listAvds() {
124
+ const bin = findEmulator();
125
+ if (!bin)
126
+ return [];
127
+ try {
128
+ const { stdout } = await execFileAsync(bin, ["-list-avds"], { timeout: 15_000 });
129
+ return stdout.split("\n").map((s) => s.trim()).filter(Boolean);
130
+ }
131
+ catch {
132
+ return [];
133
+ }
134
+ }
135
+ /**
136
+ * Launch an AVD as a tuned, lightweight, (by default) headless emulator on a
137
+ * specific console port. The serial is deterministically `emulator-<port>`.
138
+ * The flags keep it small so many fit on a normal machine: no window, software
139
+ * GPU, capped RAM, no boot animation / audio / snapshot writeback.
140
+ */
141
+ export function spawnEmulator(avd, port, opts = {}) {
142
+ const bin = findEmulator();
143
+ if (!bin) {
144
+ throw new EmulatorError("emulator binary not found — set ISH_EMULATOR or install the Android SDK 'emulator' package.");
145
+ }
146
+ const args = [
147
+ "-avd", avd,
148
+ "-port", String(port),
149
+ "-no-snapshot-save",
150
+ "-no-boot-anim",
151
+ "-no-audio",
152
+ "-gpu", "swiftshader_indirect",
153
+ "-memory", String(opts.memMb ?? 1536),
154
+ ];
155
+ if (opts.headless !== false)
156
+ args.push("-no-window");
157
+ // Detach so the emulator outlives this call; we track the child to kill it.
158
+ const child = spawn(bin, args, { detached: true, stdio: "ignore" });
159
+ child.unref();
160
+ return { child, serial: `emulator-${port}`, port };
161
+ }
162
+ /** Console ports for N emulators: 5554, 5556, 5558, … (adb wants even ports). */
163
+ export function emulatorPorts(count, base = 5554) {
164
+ return Array.from({ length: count }, (_, i) => base + i * 2);
165
+ }
166
+ const delay = (ms) => new Promise((r) => setTimeout(r, ms));
167
+ /** Wait until `serial` is online AND `sys.boot_completed` is 1. */
168
+ export async function waitForBoot(serial, timeoutMs = 180_000) {
169
+ const deadline = Date.now() + timeoutMs;
170
+ while (Date.now() < deadline) {
171
+ try {
172
+ const booted = await withAdbSerial(serial, async () => {
173
+ await adb(["wait-for-device"], 10_000);
174
+ return (await adbShell(["getprop", "sys.boot_completed"], 10_000)).trim();
175
+ });
176
+ if (booted === "1")
177
+ return;
178
+ }
179
+ catch {
180
+ /* not online yet */
181
+ }
182
+ await delay(2000);
183
+ }
184
+ throw new EmulatorError(`emulator ${serial} did not finish booting within ${timeoutMs / 1000}s`);
185
+ }
186
+ /** Gracefully stop an emulator (`adb -s <serial> emu kill`). Best-effort. */
187
+ export async function emuKill(serial) {
188
+ await withAdbSerial(serial, () => adb(["emu", "kill"], 15_000)).catch(() => { });
189
+ }
@@ -5,9 +5,6 @@
5
5
  */
6
6
  import { existsSync } from "node:fs";
7
7
  import { chromium } from "playwright-core";
8
- // Deep-import the bundled registry so this works in both the npm-install path
9
- // and the standalone bun binary (which has no `npx` to spawn).
10
- import { registry } from "playwright-core/lib/server/registry/index";
11
8
  // playwright-core's userAgent module does `require("../../../package.json")`
12
9
  // at runtime to read its version. bun's --compile bundler is unreliable about
13
10
  // embedding that JSON, which causes install to crash in the standalone binary
@@ -17,6 +14,13 @@ import { registry } from "playwright-core/lib/server/registry/index";
17
14
  // Keep this string in sync with the playwright-core dep in package.json. It
18
15
  // only feeds the User-Agent string sent to download CDN, so a slight mismatch
19
16
  // is harmless.
17
+ //
18
+ // package.json pins playwright-core EXACTLY (no ^) because this module's
19
+ // registry deep import and scripts/patch-playwright-core.mjs both depend on
20
+ // playwright-core internals that change between minor versions — 1.60.0
21
+ // removed `./lib/server/registry/index` from the exports map, which broke
22
+ // every fresh `npm install @ishlabs/cli` while CI (lockfile-pinned to 1.59.1)
23
+ // stayed green. Bump the pin and this constant together, deliberately.
20
24
  const PLAYWRIGHT_CORE_VERSION = "1.59.1";
21
25
  if (!process.env.PW_VERSION_OVERRIDE) {
22
26
  process.env.PW_VERSION_OVERRIDE = PLAYWRIGHT_CORE_VERSION;
@@ -34,6 +38,22 @@ export async function installBrowser(quiet = false) {
34
38
  const log = (msg) => { if (!quiet)
35
39
  console.error(msg); };
36
40
  log("Installing Chromium for local simulations (~120 MB)...");
41
+ // Deep-import the bundled registry so this works in both the npm-install
42
+ // path and the standalone bun binary (which has no `npx` to spawn). The
43
+ // import is lazy — `playwright-core/lib/server/registry/index` is not a
44
+ // semver-stable subpath (1.60.0 dropped it from the exports map), so a
45
+ // top-level import would make this whole module unloadable and take
46
+ // `isBrowserInstalled()` / doctor down with it.
47
+ let registry;
48
+ try {
49
+ ({ registry } = await import("playwright-core/lib/server/registry/index"));
50
+ }
51
+ catch (err) {
52
+ const detail = err instanceof Error ? err.message : String(err);
53
+ throw new Error(`Failed to load the Playwright browser installer (${detail}). ` +
54
+ `The installed playwright-core version is incompatible with this CLI — ` +
55
+ `expected ${PLAYWRIGHT_CORE_VERSION}. Reinstall the CLI to fix.`);
56
+ }
37
57
  try {
38
58
  const executables = registry.resolveBrowsers(["chromium"], {});
39
59
  await registry.install(executables, { force: false });
@@ -37,16 +37,41 @@ export interface IosDeviceOptions {
37
37
  bundleId?: string;
38
38
  /** Local .app path to install before the run, or a bundle id to launch. */
39
39
  appPath?: string;
40
+ /** Simulator udid to drive. When set (pooled/parallel run), skip booted-sim discovery. */
41
+ udid?: string;
42
+ /** WDA port for THIS device. When set, the runner binds it instead of DEFAULT_PORT (concurrent pool). */
43
+ wdaPort?: number;
40
44
  contextValues: ContextValue[];
41
45
  log?: (msg: string) => void;
42
46
  }
47
+ /**
48
+ * The run-up-front caveat for native state reset, or null when none is needed.
49
+ *
50
+ * A terminate+relaunch does NOT clear app data, so state one participant creates
51
+ * (a reminder, a saved record) can leak into the next. When we hold an
52
+ * installable `.app` the runner uninstall+reinstalls per participant (clean), so
53
+ * no warning is needed. A bare bundle-id / system app (e.g.
54
+ * `com.apple.reminders`) can't be reinstalled — so for a multi-participant run
55
+ * against one, warn that state may persist. A single-participant run has nothing
56
+ * to leak into, so it stays quiet.
57
+ *
58
+ * Lives here (not on the per-participant IOSDevice, which is recreated each
59
+ * participant) because the decision needs the run-scoped cohort size — the loop
60
+ * owns that. Pure + exported so it can be unit-tested without a simulator.
61
+ *
62
+ * @param reinstallable true when the target is a local `.app`/`.apk` we reinstall
63
+ * @param participantCount number of participants in this run
64
+ */
65
+ export declare function nativeStateResetWarning(reinstallable: boolean, participantCount: number): string | null;
43
66
  export declare class IOSDevice implements SimulationDevice {
44
67
  private readonly contextValues;
45
68
  private readonly log;
46
69
  private bundleId;
47
70
  private readonly appPath;
48
- /** udid of the single booted simulator we drive. */
71
+ /** udid of the simulator we drive. Set from opts (pooled) or discovered (single-device). */
49
72
  private udid;
73
+ /** WDA port for this device when pooled; undefined → DEFAULT_PORT (single-device). */
74
+ private readonly wdaPort;
50
75
  /** Set once the WebDriverAgent runner is up, so the startup note logs once. */
51
76
  private wdaStarted;
52
77
  /** POINT size — what idb ui tap/swipe consume (de-normalization basis for TAPS). */
@@ -86,10 +111,11 @@ export declare class IOSDevice implements SimulationDevice {
86
111
  private refreshScreen;
87
112
  observe(): Promise<DeviceObservation>;
88
113
  /**
89
- * Read + serialize WDA's /source a11y tree (bounds in POINTS). Any
90
- * failure (retries exhausted on a trivial tree, parse error) degrades to an
91
- * empty tree so the backend falls back to vision — a missing tree must never
92
- * abort the observation.
114
+ * Read + serialize WDA's /source a11y tree (bounds in POINTS). Returns the
115
+ * serialized tree, the node map and the FLAT parsed nodes (for the screen
116
+ * signature). Any failure (retries exhausted on a trivial tree, parse error)
117
+ * degrades to an empty tree so the backend falls back to vision — a missing
118
+ * tree must never abort the observation.
93
119
  */
94
120
  private dumpTree;
95
121
  captureScreenshot(): Promise<string>;
@@ -31,12 +31,13 @@
31
31
  * backend never converts coords with screen_width/height.
32
32
  */
33
33
  import { resolveTextValue } from "./actions.js";
34
- import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, appBuildFromSimulator, } from "./simctl.js";
34
+ import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, uninstallApp, isAppInstalled, bundleIdFromApp, appBuildFromSimulator, } from "./simctl.js";
35
35
  // iOS UI interaction + a11y run through WebDriverAgent (XCUITest), not idb.
36
- import { ensureWda, closeWda, describeScreen, describeAll, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, } from "./xcuitest.js";
36
+ import { ensureWda, closeWda, describeScreen, describeAll, activeBundleId, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, } from "./xcuitest.js";
37
37
  import { isLocalPath } from "../upload.js";
38
38
  import { deNormalizePoint, deNormalizeDrag, pointToPixel } from "./coordinates.js";
39
39
  import { parseXcuiHierarchy, serializeNativeTree, boundsCenter } from "./native-a11y.js";
40
+ import { computeScreenSignature } from "./screen-signature.js";
40
41
  // Let animations/transitions settle before the next observation so the
41
42
  // screenshot the LLM reasons over reflects the action's result.
42
43
  const POST_GESTURE_SETTLE_MS = 500;
@@ -57,13 +58,42 @@ const NON_BACK_LEADING_LABELS = new Set([
57
58
  async function settle(ms = POST_GESTURE_SETTLE_MS) {
58
59
  await new Promise((r) => setTimeout(r, ms));
59
60
  }
61
+ /**
62
+ * The run-up-front caveat for native state reset, or null when none is needed.
63
+ *
64
+ * A terminate+relaunch does NOT clear app data, so state one participant creates
65
+ * (a reminder, a saved record) can leak into the next. When we hold an
66
+ * installable `.app` the runner uninstall+reinstalls per participant (clean), so
67
+ * no warning is needed. A bare bundle-id / system app (e.g.
68
+ * `com.apple.reminders`) can't be reinstalled — so for a multi-participant run
69
+ * against one, warn that state may persist. A single-participant run has nothing
70
+ * to leak into, so it stays quiet.
71
+ *
72
+ * Lives here (not on the per-participant IOSDevice, which is recreated each
73
+ * participant) because the decision needs the run-scoped cohort size — the loop
74
+ * owns that. Pure + exported so it can be unit-tested without a simulator.
75
+ *
76
+ * @param reinstallable true when the target is a local `.app`/`.apk` we reinstall
77
+ * @param participantCount number of participants in this run
78
+ */
79
+ export function nativeStateResetWarning(reinstallable, participantCount) {
80
+ if (reinstallable || participantCount <= 1)
81
+ return null;
82
+ return ("Note: app data is NOT reset between participants — the target is an installed " +
83
+ "bundle id (e.g. a system app), which can't be reinstalled, so state an earlier " +
84
+ "participant creates may persist into the next and skew results. Pass " +
85
+ "--app <path-to.app> to enable a clean reinstall per participant, or run one " +
86
+ "participant per study for a guaranteed clean start.");
87
+ }
60
88
  export class IOSDevice {
61
89
  contextValues;
62
90
  log;
63
91
  bundleId;
64
92
  appPath;
65
- /** udid of the single booted simulator we drive. */
93
+ /** udid of the simulator we drive. Set from opts (pooled) or discovered (single-device). */
66
94
  udid = "";
95
+ /** WDA port for this device when pooled; undefined → DEFAULT_PORT (single-device). */
96
+ wdaPort;
67
97
  /** Set once the WebDriverAgent runner is up, so the startup note logs once. */
68
98
  wdaStarted = false;
69
99
  /** POINT size — what idb ui tap/swipe consume (de-normalization basis for TAPS). */
@@ -90,9 +120,13 @@ export class IOSDevice {
90
120
  this.log = opts.log ?? (() => { });
91
121
  this.bundleId = opts.bundleId ?? null;
92
122
  this.appPath = opts.appPath;
123
+ this.udid = opts.udid ?? "";
124
+ this.wdaPort = opts.wdaPort;
93
125
  }
94
126
  async launchOrReset(target) {
95
- this.udid = await requireOneBootedSimulator();
127
+ // Pooled/parallel runs pin a udid via opts; the single-device path discovers
128
+ // the one booted simulator (and still rejects >1, preserving today's UX).
129
+ this.udid = this.udid || (await requireOneBootedSimulator());
96
130
  // First call: install the .app (if --app is a local path) and resolve the
97
131
  // bundle id to terminate/relaunch on. `target` is the iteration's platform
98
132
  // target (a bundle id) when no --app is supplied. Throws (rather than
@@ -107,11 +141,15 @@ export class IOSDevice {
107
141
  if (!this.wdaStarted) {
108
142
  this.log("Starting the iOS automation runner (WebDriverAgent); first launch can take ~30-60s...");
109
143
  }
110
- await ensureWda(this.udid);
144
+ await ensureWda(this.udid, { port: this.wdaPort });
111
145
  this.wdaStarted = true;
112
146
  // Prime screen geometry (points) before the first de-normalization.
113
147
  await this.refreshScreen();
114
- // Per-participant reset: terminate then relaunch from a clean state.
148
+ // Per-participant reset: terminate then relaunch. For a local .app target,
149
+ // resolveBundleId (above) already uninstall+reinstalled this fresh device's
150
+ // app, so each participant starts from clean data. A bundle-id / system-app
151
+ // target can't be reinstalled — runLocalSimulations warns once up front that
152
+ // its state may persist between participants (see nativeStateResetWarning).
115
153
  await terminateApp(this.udid, bundleId);
116
154
  await launchApp(this.udid, bundleId);
117
155
  await settle(1500); // cold start needs longer than a gesture settle
@@ -155,7 +193,10 @@ export class IOSDevice {
155
193
  throw new Error(`Could not read CFBundleIdentifier from "${appSpec}/Info.plist". ` +
156
194
  `Pass --app <bundle.id> explicitly if the .app layout is unusual.`);
157
195
  }
158
- this.log(`Installing ${appSpec} (${id})...`);
196
+ // Uninstall first so a build left over from a prior run doesn't carry its
197
+ // data into participant 0 — installApp alone preserves the data container.
198
+ this.log(`Installing a clean build of ${appSpec} (${id})...`);
199
+ await uninstallApp(this.udid, id);
159
200
  await installApp(this.udid, appSpec);
160
201
  return id;
161
202
  }
@@ -176,14 +217,26 @@ export class IOSDevice {
176
217
  }
177
218
  async observe() {
178
219
  // Refresh geometry each step (orientation can change), then capture the
179
- // pixel screenshot and the a11y tree in parallel (independent reads). The
180
- // dump is wrapped so a failure degrades to the vision path (empty tree).
220
+ // pixel screenshot, the a11y tree, and the active bundle id in parallel
221
+ // (independent reads). The dump is wrapped so a failure degrades to the
222
+ // vision path (empty tree); the bundle-id read is best-effort ("" on
223
+ // failure → the navTitle-only coarse token).
181
224
  await this.refreshScreen();
182
- const [png, tree] = await Promise.all([
183
- screenshotPng(),
225
+ const [png, tree, bundleId] = await Promise.all([
226
+ screenshotPng(this.udid),
184
227
  this.dumpTree(),
228
+ activeBundleId(this.udid),
185
229
  ]);
186
230
  this.lastNodeMap = tree.nodeMap;
231
+ // Scroll-invariant screen signature from this dump's parsed nodes + coarse
232
+ // inputs (active bundle id; navTitle is derived from the nodes). iOS is
233
+ // best-effort — sparse SwiftUI trees are usually unusable and fall back to
234
+ // Phase-1 continuity (sent only when usable; see loop.ts).
235
+ const coarseInputs = {
236
+ platform: "ios",
237
+ bundleId,
238
+ };
239
+ const screenSignature = computeScreenSignature(tree.nodes, coarseInputs);
187
240
  return {
188
241
  screenshot: png.toString("base64"),
189
242
  // Element path when describe-all produced a tree; "" → backend vision.
@@ -196,13 +249,19 @@ export class IOSDevice {
196
249
  // Native has no scrollable document; the screen IS the page.
197
250
  documentHeight: this.pixelHeight,
198
251
  tabs: [],
252
+ screenSignature,
253
+ // Corpus-dump only (ISH_DUMP_CORPUS): the exact parsed nodes + coarse
254
+ // inputs the signature consumed, so any algorithm can be replayed offline.
255
+ nativeNodes: tree.nodes,
256
+ coarseInputs,
199
257
  };
200
258
  }
201
259
  /**
202
- * Read + serialize WDA's /source a11y tree (bounds in POINTS). Any
203
- * failure (retries exhausted on a trivial tree, parse error) degrades to an
204
- * empty tree so the backend falls back to vision — a missing tree must never
205
- * abort the observation.
260
+ * Read + serialize WDA's /source a11y tree (bounds in POINTS). Returns the
261
+ * serialized tree, the node map and the FLAT parsed nodes (for the screen
262
+ * signature). Any failure (retries exhausted on a trivial tree, parse error)
263
+ * degrades to an empty tree so the backend falls back to vision — a missing
264
+ * tree must never abort the observation.
206
265
  */
207
266
  async dumpTree() {
208
267
  try {
@@ -210,23 +269,23 @@ export class IOSDevice {
210
269
  const nodes = parseXcuiHierarchy(json);
211
270
  const tree = serializeNativeTree(nodes);
212
271
  this.log(`a11y tree: ${tree.nodeMap.size} node(s)`);
213
- return tree;
272
+ return { ...tree, nodes };
214
273
  }
215
274
  catch (err) {
216
275
  const msg = err instanceof Error ? err.message : String(err);
217
276
  this.log(`a11y describe-all failed, falling back to vision: ${msg}`);
218
- return { simplified: "", nodeMap: new Map() };
277
+ return { simplified: "", nodeMap: new Map(), nodes: [] };
219
278
  }
220
279
  }
221
280
  async captureScreenshot() {
222
- const png = await screenshotPng();
281
+ const png = await screenshotPng(this.udid);
223
282
  return png.toString("base64");
224
283
  }
225
284
  async captureScreenshotJpeg() {
226
285
  // simctl screenshot only emits PNG. We return the PNG bytes; the upload/
227
286
  // record path treats them as opaque image bytes (PDQ frame-matching works
228
287
  // on PNG). The loop labels native uploads image/png.
229
- return screenshotPng();
288
+ return screenshotPng(this.udid);
230
289
  }
231
290
  dimensions() {
232
291
  // PIXELS — the space the loop re-normalizes the recorded coord against.
@@ -530,7 +589,7 @@ export class IOSDevice {
530
589
  * success:true (the gesture was attempted); the loud log is the signal.
531
590
  */
532
591
  async openSystemPanel(panel) {
533
- const before = await screenshotPng();
592
+ const before = await screenshotPng(this.udid);
534
593
  const w = this.pointWidth;
535
594
  const h = this.pointHeight;
536
595
  // Start ON the top edge and travel a third of the screen down. Control
@@ -543,7 +602,7 @@ export class IOSDevice {
543
602
  await settle();
544
603
  // Loudly surface a no-op: the simulator's synthetic touch often can't drive
545
604
  // the system edge gesture. An identical screenshot means the panel didn't open.
546
- const after = await screenshotPng();
605
+ const after = await screenshotPng(this.udid);
547
606
  if (before.equals(after)) {
548
607
  this.log(`open_system_panel (${panel}): top-edge swipe produced no visible change — ` +
549
608
  `the simulator's synthetic touch likely didn't trigger the system gesture (flaky on the simulator).`);