@ishlabs/cli 0.26.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/commands/doctor.d.ts +16 -0
  2. package/dist/commands/doctor.js +34 -9
  3. package/dist/commands/iteration.js +23 -5
  4. package/dist/commands/study-participant.js +1 -1
  5. package/dist/commands/study-run.js +26 -1
  6. package/dist/commands/study-screenshots.js +38 -5
  7. package/dist/lib/api-client.d.ts +4 -0
  8. package/dist/lib/api-client.js +6 -1
  9. package/dist/lib/docs.js +15 -3
  10. package/dist/lib/local-sim/actions.d.ts +18 -0
  11. package/dist/lib/local-sim/actions.js +30 -0
  12. package/dist/lib/local-sim/adb.d.ts +39 -0
  13. package/dist/lib/local-sim/adb.js +152 -17
  14. package/dist/lib/local-sim/android.d.ts +12 -4
  15. package/dist/lib/local-sim/android.js +44 -11
  16. package/dist/lib/local-sim/device.d.ts +44 -0
  17. package/dist/lib/local-sim/ios.d.ts +12 -5
  18. package/dist/lib/local-sim/ios.js +45 -11
  19. package/dist/lib/local-sim/loop.js +220 -26
  20. package/dist/lib/local-sim/native-a11y.d.ts +24 -0
  21. package/dist/lib/local-sim/native-a11y.js +76 -14
  22. package/dist/lib/local-sim/screen-signature.d.ts +77 -0
  23. package/dist/lib/local-sim/screen-signature.js +166 -0
  24. package/dist/lib/local-sim/simctl.d.ts +15 -0
  25. package/dist/lib/local-sim/simctl.js +41 -1
  26. package/dist/lib/local-sim/types.d.ts +11 -2
  27. package/dist/lib/local-sim/xcuitest.d.ts +7 -0
  28. package/dist/lib/local-sim/xcuitest.js +16 -0
  29. package/dist/lib/modality.js +7 -2
  30. package/dist/lib/paths.d.ts +6 -0
  31. package/dist/lib/paths.js +9 -0
  32. package/dist/lib/report-readiness.d.ts +44 -0
  33. package/dist/lib/report-readiness.js +74 -0
  34. package/dist/lib/skill-content.js +2 -0
  35. package/package.json +1 -1
@@ -10,30 +10,90 @@
10
10
  * backend's 0-1000 coordinates against the screencap pixel size and taps
11
11
  * directly. (Verified by the Layer-1 driver smoke; see scripts/mobile-e2e.)
12
12
  */
13
- import { execFile } from "node:child_process";
14
- import { existsSync } from "node:fs";
13
+ import { execFile, execFileSync } from "node:child_process";
14
+ import { existsSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
15
+ import { join } from "node:path";
15
16
  import { promisify } from "node:util";
17
+ import { binDir, adbBin } from "../paths.js";
16
18
  const execFileAsync = promisify(execFile);
17
- // adb ships with Homebrew's android-platform-tools and inside the SDK. Prefer
18
- // an explicit absolute path so we never depend on the caller's PATH (mirrors
19
- // scripts/mobile-e2e/lib.sh). Override with ISH_ADB / ADB.
20
- function resolveAdb() {
19
+ // Resolve adb without depending on the caller's PATH: ISH_ADB/ADB override the
20
+ // Android SDK Homebrew our own download cache PATH. If none is found,
21
+ // ensureAdb() fetches Google's standalone platform-tools (a small zip) into
22
+ // ~/.ish/bin, mirroring how cloudflared / the iOS WebDriverAgent runner are
23
+ // fetched. Override the binary with ISH_ADB / ADB.
24
+ function findAdb() {
21
25
  const fromEnv = process.env.ISH_ADB || process.env.ADB;
22
26
  if (fromEnv && existsSync(fromEnv))
23
27
  return fromEnv;
24
- const homebrew = "/opt/homebrew/bin/adb";
25
- if (existsSync(homebrew))
26
- return homebrew;
27
28
  const sdkHome = process.env.ANDROID_HOME || process.env.ANDROID_SDK_ROOT;
28
29
  if (sdkHome) {
29
- const sdkAdb = `${sdkHome}/platform-tools/adb`;
30
+ const sdkAdb = join(sdkHome, "platform-tools", "adb");
30
31
  if (existsSync(sdkAdb))
31
32
  return sdkAdb;
32
33
  }
33
- // Last resort: rely on PATH and surface a clear error if it's missing.
34
- return "adb";
34
+ const homebrew = "/opt/homebrew/bin/adb";
35
+ if (existsSync(homebrew))
36
+ return homebrew;
37
+ if (existsSync(adbBin()))
38
+ return adbBin(); // our downloaded cache
39
+ // PATH fallback — only if `adb` actually resolves there.
40
+ try {
41
+ execFileSync(process.platform === "win32" ? "where" : "which", ["adb"], { stdio: "ignore" });
42
+ return "adb";
43
+ }
44
+ catch {
45
+ return null;
46
+ }
47
+ }
48
+ let cachedAdb = null;
49
+ /** Resolve adb, downloading Google's platform-tools on first use if not found. */
50
+ export async function ensureAdb() {
51
+ if (cachedAdb)
52
+ return cachedAdb;
53
+ cachedAdb = findAdb() ?? (await downloadAdb());
54
+ return cachedAdb;
55
+ }
56
+ const PLATFORM_TOOLS_OS = {
57
+ darwin: "darwin",
58
+ linux: "linux",
59
+ win32: "windows",
60
+ };
61
+ /** Fetch + unpack Google's standalone platform-tools into ~/.ish/bin. */
62
+ async function downloadAdb() {
63
+ const os = PLATFORM_TOOLS_OS[process.platform];
64
+ if (!os) {
65
+ throw new AdbError(`no prebuilt adb for ${process.platform}; install Android platform-tools and set ISH_ADB`);
66
+ }
67
+ const url = `https://dl.google.com/android/repository/platform-tools-latest-${os}.zip`;
68
+ const dir = binDir();
69
+ console.error("Fetching adb (Android platform-tools) from Google...");
70
+ mkdirSync(dir, { recursive: true });
71
+ const zipPath = join(dir, "platform-tools.zip");
72
+ let resp;
73
+ try {
74
+ resp = await fetch(url, { signal: AbortSignal.timeout(120_000) });
75
+ }
76
+ catch (e) {
77
+ throw new AdbError(`failed to download platform-tools from ${url}: ${e instanceof Error ? e.message : String(e)}`);
78
+ }
79
+ if (!resp.ok)
80
+ throw new AdbError(`failed to download platform-tools: HTTP ${resp.status} from ${url}`);
81
+ writeFileSync(zipPath, Buffer.from(await resp.arrayBuffer()));
82
+ try {
83
+ // The zip carries a top-level `platform-tools/` dir; extract into binDir().
84
+ const [cmd, args] = process.platform === "win32"
85
+ ? ["tar", ["-xf", zipPath, "-C", dir]]
86
+ : ["unzip", ["-o", "-q", zipPath, "-d", dir]];
87
+ await execFileAsync(cmd, args, { timeout: 120_000 });
88
+ }
89
+ catch (e) {
90
+ throw new AdbError(`failed to unpack platform-tools: ${e instanceof Error ? e.message : String(e)}`);
91
+ }
92
+ rmSync(zipPath, { force: true });
93
+ if (!existsSync(adbBin()))
94
+ throw new AdbError(`platform-tools unpacked but adb is missing at ${adbBin()}`);
95
+ return adbBin();
35
96
  }
36
- const ADB = resolveAdb();
37
97
  const DEFAULT_TIMEOUT_MS = 30_000;
38
98
  // screencap on a cold emulator frame can be slow; give it generous headroom.
39
99
  const SCREENCAP_TIMEOUT_MS = 30_000;
@@ -47,8 +107,9 @@ export class AdbError extends Error {
47
107
  }
48
108
  /** Run `adb <args>` and return trimmed stdout. Throws AdbError on failure. */
49
109
  export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
110
+ const bin = await ensureAdb();
50
111
  try {
51
- const { stdout } = await execFileAsync(ADB, args, {
112
+ const { stdout } = await execFileAsync(bin, args, {
52
113
  timeout: timeoutMs,
53
114
  maxBuffer: 4 * 1024 * 1024,
54
115
  });
@@ -63,14 +124,75 @@ export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
63
124
  export async function adbShell(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
64
125
  return adb(["shell", ...args], timeoutMs);
65
126
  }
127
+ /**
128
+ * Pull versionName / versionCode out of `dumpsys package <pkg>` text. The
129
+ * relevant lines read `versionCode=42 minSdk=24 targetSdk=34` and
130
+ * `versionName=1.2.3`; `\d+` stops the build before the trailing tokens and
131
+ * `\S+` takes the version up to the next space. Returns null when neither is
132
+ * present (wrong/empty package).
133
+ */
134
+ export function parseDumpsysAppBuild(out) {
135
+ const version = out.match(/versionName=(\S+)/)?.[1] ?? null;
136
+ const build = out.match(/versionCode=(\d+)/)?.[1] ?? null;
137
+ if (!version && !build)
138
+ return null;
139
+ return { version, build };
140
+ }
141
+ /**
142
+ * Read an installed package's versionName / versionCode from
143
+ * `dumpsys package <pkg>`. Best-effort: returns null on any failure (the run
144
+ * never depends on it). Covers both freshly-installed apks and pre-installed
145
+ * packages — by call time the package name is already resolved.
146
+ */
147
+ export async function appBuildFromDevice(pkg) {
148
+ try {
149
+ return parseDumpsysAppBuild(await adbShell(["dumpsys", "package", pkg], 30_000));
150
+ }
151
+ catch {
152
+ return null;
153
+ }
154
+ }
155
+ /**
156
+ * Pull `"pkg/activity"` out of `dumpsys activity activities`. The foreground
157
+ * activity surfaces as `topResumedActivity=ActivityRecord{... u0 pkg/activity
158
+ * t123}` (older builds: `mResumedActivity=...`); we take the `pkg/activity`
159
+ * token from whichever line is present. The activity may be a short `.Name`
160
+ * (relative to the package) — kept as-is, exactly what dumpsys reports. Returns
161
+ * "" when neither line is present.
162
+ */
163
+ export function parseTopActivity(out) {
164
+ const m = /topResumedActivity=ActivityRecord\{[^}]*\s(\S+\/\S+)/.exec(out) ??
165
+ /mResumedActivity:\s*ActivityRecord\{[^}]*\s(\S+\/\S+)/.exec(out) ??
166
+ /mResumedActivity=ActivityRecord\{[^}]*\s(\S+\/\S+)/.exec(out);
167
+ if (!m)
168
+ return "";
169
+ // The token can carry a trailing task id glued by the regex boundary? No —
170
+ // `\S+/\S+` stops at the first whitespace, so it is exactly `pkg/activity`.
171
+ return m[1];
172
+ }
173
+ /**
174
+ * The foreground `"pkg/activity"` from `dumpsys activity activities`, a coarse
175
+ * input for the screen signature. Best-effort: returns "" on any failure (the
176
+ * signature degrades to its package-only coarse token, and the run never
177
+ * depends on this read).
178
+ */
179
+ export async function currentActivity() {
180
+ try {
181
+ return parseTopActivity(await adbShell(["dumpsys", "activity", "activities"], 15_000));
182
+ }
183
+ catch {
184
+ return "";
185
+ }
186
+ }
66
187
  /**
67
188
  * Capture the current screen as raw PNG bytes via `adb exec-out screencap -p`.
68
189
  * `exec-out` (not `shell`) avoids the CRLF translation that corrupts binary
69
190
  * output. Returns the PNG buffer at full device resolution.
70
191
  */
71
192
  export async function screencapPng() {
193
+ const bin = await ensureAdb();
72
194
  try {
73
- const { stdout } = await execFileAsync(ADB, ["exec-out", "screencap", "-p"], {
195
+ const { stdout } = await execFileAsync(bin, ["exec-out", "screencap", "-p"], {
74
196
  timeout: SCREENCAP_TIMEOUT_MS,
75
197
  maxBuffer: SCREENCAP_MAX_BUFFER,
76
198
  encoding: "buffer",
@@ -90,7 +212,7 @@ export async function requireOneDevice() {
90
212
  }
91
213
  catch (err) {
92
214
  const msg = err instanceof Error ? err.message : String(err);
93
- throw new AdbError(`Could not run adb (looked for "${ADB}"). Run \`ish check android\` to check your setup. ${msg}`);
215
+ throw new AdbError(`Could not run adb (looked for "${findAdb() ?? "adb"}"). Run \`ish check android\` to check your setup. ${msg}`);
94
216
  }
95
217
  // Output: "List of devices attached\n<serial>\tdevice\n..."
96
218
  const online = out
@@ -101,8 +223,21 @@ export async function requireOneDevice() {
101
223
  if (online.length === 0) {
102
224
  throw new AdbError("No Android device/emulator online. Run `ish check android` to check your setup and how to boot one.");
103
225
  }
226
+ // Honor ANDROID_SERIAL (the standard adb convention): when it names an online
227
+ // device, pin to it instead of failing on "more than one device". The adb
228
+ // wrapper inherits process.env, so every subsequent `adb` call already targets
229
+ // that serial — this lets multiple emulators run in parallel, each driven by a
230
+ // CLI invocation with its own ANDROID_SERIAL.
231
+ const pinned = process.env.ANDROID_SERIAL?.trim();
232
+ if (pinned) {
233
+ if (online.some((l) => l.startsWith(`${pinned}\t`)))
234
+ return;
235
+ throw new AdbError(`ANDROID_SERIAL=${pinned} is set but that device is not online. ` +
236
+ `Online: ${online.map((l) => l.split("\t")[0]).join(", ") || "none"}.`);
237
+ }
104
238
  if (online.length > 1) {
105
- throw new AdbError(`Expected exactly one Android device, found ${online.length}. Stop the extras (the sim drives a single device).`);
239
+ throw new AdbError(`Expected exactly one Android device, found ${online.length}. ` +
240
+ `Stop the extras, or set ANDROID_SERIAL=<serial> to pin one (parallel runs).`);
106
241
  }
107
242
  }
108
243
  // --- Input gestures (all in screencap pixel space) ---
@@ -19,7 +19,7 @@
19
19
  * - Vision path: px = round(x / 1000 * screencapWidth); same for y.
20
20
  */
21
21
  import type { LocalStepAction, ContextValue } from "./types.js";
22
- import type { SimulationDevice, DeviceObservation, DeviceActionResult } from "./device.js";
22
+ import type { SimulationDevice, DeviceObservation, DeviceActionResult, AppBuild } from "./device.js";
23
23
  export interface AndroidDeviceOptions {
24
24
  /** App package name to force-stop/relaunch between participants. May be derived from --app. */
25
25
  appPackage?: string;
@@ -47,6 +47,12 @@ export declare class AndroidDevice implements SimulationDevice {
47
47
  private adbKeyboardActive;
48
48
  constructor(opts: AndroidDeviceOptions);
49
49
  launchOrReset(target: string): Promise<void>;
50
+ /**
51
+ * The installed app's version/build, read off the device after
52
+ * launchOrReset has resolved the package. Best-effort — null until the
53
+ * package is known, or if dumpsys can't report it.
54
+ */
55
+ appBuild(): Promise<AppBuild | null>;
50
56
  /**
51
57
  * Resolve which package to drive, returning a non-null package name or
52
58
  * throwing. For a local .apk we read the package straight from its binary
@@ -60,9 +66,11 @@ export declare class AndroidDevice implements SimulationDevice {
60
66
  private refreshDimensions;
61
67
  observe(): Promise<DeviceObservation>;
62
68
  /**
63
- * Dump + serialize the uiautomator a11y tree. Any failure (dump retries
64
- * exhausted, parse error) degrades to an empty tree so the backend falls back
65
- * to the vision path a missing tree must never abort the observation.
69
+ * Dump + serialize the uiautomator a11y tree. Returns the serialized tree, the
70
+ * node map, the FLAT parsed nodes (for the screen signature) and the
71
+ * foreground package read off the dump. Any failure (dump retries exhausted,
72
+ * parse error) degrades to an empty tree so the backend falls back to the
73
+ * vision path — a missing tree must never abort the observation.
66
74
  */
67
75
  private dumpTree;
68
76
  captureScreenshot(): Promise<string>;
@@ -19,10 +19,11 @@
19
19
  * - Vision path: px = round(x / 1000 * screencapWidth); same for y.
20
20
  */
21
21
  import { resolveTextValue } from "./actions.js";
22
- import { requireOneDevice, screencapPng, pngDimensions, dumpUiautomatorXml, inputTap, inputSwipe, inputDrag, inputLongPress, setUserRotation, forceStop, launchApp, installApk, isPackageInstalled, listPackages, isAdbKeyboardInstalled, enableAdbKeyboard, setIme, resetIme, currentIme, adbKeyboardType, adbKeyboardClear, pressKeyEvent, statusbarExpand, ADB_KEYBOARD_PKG, } from "./adb.js";
22
+ import { requireOneDevice, screencapPng, pngDimensions, dumpUiautomatorXml, inputTap, inputSwipe, inputDrag, inputLongPress, setUserRotation, forceStop, launchApp, installApk, isPackageInstalled, listPackages, isAdbKeyboardInstalled, enableAdbKeyboard, setIme, resetIme, currentIme, adbKeyboardType, adbKeyboardClear, pressKeyEvent, statusbarExpand, appBuildFromDevice, currentActivity, ADB_KEYBOARD_PKG, } from "./adb.js";
23
23
  import { isLocalPath } from "../upload.js";
24
24
  import { deNormalizePoint, deNormalizeDrag } from "./coordinates.js";
25
- import { parseUiautomatorXml, serializeNativeTree, boundsCenter } from "./native-a11y.js";
25
+ import { parseUiautomatorXml, serializeNativeTree, boundsCenter, androidPackage, } from "./native-a11y.js";
26
+ import { computeScreenSignature } from "./screen-signature.js";
26
27
  import { packageNameFromApk } from "./apk-manifest.js";
27
28
  // Let animations/IME transitions settle before the next observation so the
28
29
  // screenshot the LLM reasons over reflects the action's result.
@@ -74,6 +75,21 @@ export class AndroidDevice {
74
75
  // Prime screencap dimensions for the first de-normalization.
75
76
  await this.refreshDimensions();
76
77
  }
78
+ /**
79
+ * The installed app's version/build, read off the device after
80
+ * launchOrReset has resolved the package. Best-effort — null until the
81
+ * package is known, or if dumpsys can't report it.
82
+ */
83
+ async appBuild() {
84
+ if (!this.appPackage)
85
+ return null;
86
+ const meta = await appBuildFromDevice(this.appPackage);
87
+ return {
88
+ package: this.appPackage,
89
+ version: meta?.version ?? null,
90
+ build: meta?.build ?? null,
91
+ };
92
+ }
77
93
  /**
78
94
  * Resolve which package to drive, returning a non-null package name or
79
95
  * throwing. For a local .apk we read the package straight from its binary
@@ -160,14 +176,24 @@ export class AndroidDevice {
160
176
  return png;
161
177
  }
162
178
  async observe() {
163
- // Screencap and the a11y dump are independent reads run them in parallel.
164
- // The dump is wrapped so a failure degrades to the vision path (empty tree)
165
- // rather than aborting the observation.
166
- const [png, tree] = await Promise.all([
179
+ // Screencap, the a11y dump, and the foreground-activity read are independent
180
+ // — run them in parallel. The dump is wrapped so a failure degrades to the
181
+ // vision path (empty tree) rather than aborting the observation; the
182
+ // activity read is best-effort ("" on failure → package-only coarse token).
183
+ const [png, tree, activity] = await Promise.all([
167
184
  this.refreshDimensions(),
168
185
  this.dumpTree(),
186
+ currentActivity(),
169
187
  ]);
170
188
  this.lastNodeMap = tree.nodeMap;
189
+ // Scroll-invariant screen signature from this dump's parsed nodes + coarse
190
+ // inputs (foreground package/activity). Sent only when usable (see loop.ts).
191
+ const coarseInputs = {
192
+ platform: "android",
193
+ package: tree.package,
194
+ activity,
195
+ };
196
+ const screenSignature = computeScreenSignature(tree.nodes, coarseInputs);
171
197
  return {
172
198
  screenshot: png.toString("base64"),
173
199
  // Element path when the dump produced a tree; "" → backend vision branch.
@@ -178,12 +204,19 @@ export class AndroidDevice {
178
204
  // Native has no scrollable document; the screen IS the page.
179
205
  documentHeight: this.screenHeight,
180
206
  tabs: [],
207
+ screenSignature,
208
+ // Corpus-dump only (ISH_DUMP_CORPUS): the exact parsed nodes + coarse
209
+ // inputs the signature consumed, so any algorithm can be replayed offline.
210
+ nativeNodes: tree.nodes,
211
+ coarseInputs,
181
212
  };
182
213
  }
183
214
  /**
184
- * Dump + serialize the uiautomator a11y tree. Any failure (dump retries
185
- * exhausted, parse error) degrades to an empty tree so the backend falls back
186
- * to the vision path a missing tree must never abort the observation.
215
+ * Dump + serialize the uiautomator a11y tree. Returns the serialized tree, the
216
+ * node map, the FLAT parsed nodes (for the screen signature) and the
217
+ * foreground package read off the dump. Any failure (dump retries exhausted,
218
+ * parse error) degrades to an empty tree so the backend falls back to the
219
+ * vision path — a missing tree must never abort the observation.
187
220
  */
188
221
  async dumpTree() {
189
222
  try {
@@ -191,12 +224,12 @@ export class AndroidDevice {
191
224
  const nodes = parseUiautomatorXml(xml);
192
225
  const tree = serializeNativeTree(nodes);
193
226
  this.log(`a11y tree: ${tree.nodeMap.size} node(s)`);
194
- return tree;
227
+ return { ...tree, nodes, package: androidPackage(xml) };
195
228
  }
196
229
  catch (err) {
197
230
  const msg = err instanceof Error ? err.message : String(err);
198
231
  this.log(`a11y dump failed, falling back to vision: ${msg}`);
199
- return { simplified: "", nodeMap: new Map() };
232
+ return { simplified: "", nodeMap: new Map(), nodes: [], package: "" };
200
233
  }
201
234
  }
202
235
  async captureScreenshot() {
@@ -14,6 +14,8 @@
14
14
  import type { Browser } from "playwright-core";
15
15
  import type { LocalStepAction, LocalSimBrowserOptions, LocalTabInfo, ContextValue } from "./types.js";
16
16
  import type { BrowserSession } from "./browser.js";
17
+ import type { ScreenSignature, CoarseInputs } from "./screen-signature.js";
18
+ import type { NativeNode } from "./native-a11y.js";
17
19
  /**
18
20
  * One observation of the target's current state.
19
21
  *
@@ -39,6 +41,29 @@ export interface DeviceObservation {
39
41
  documentHeight: number;
40
42
  /** Open-tab snapshot (browser-only; empty for native). */
41
43
  tabs: LocalTabInfo[];
44
+ /**
45
+ * Native only: the scroll-invariant structural "screen signature" computed
46
+ * from this observation's a11y tree (see screen-signature.ts). The loop sends
47
+ * `value` as the match-frame anchor ONLY when `usable` is true; browser
48
+ * targets omit it. Undefined when the platform doesn't compute one.
49
+ */
50
+ screenSignature?: ScreenSignature;
51
+ /**
52
+ * Native only, corpus-dump only: the PARSED a11y nodes that
53
+ * `computeScreenSignature` consumed for this observation (the exact array, so
54
+ * any signature algorithm can be replayed offline against it). Populated by the
55
+ * android/ios `observe()`; the browser leaves it undefined. Only surfaced for
56
+ * the `ISH_DUMP_CORPUS` instrumentation in loop.ts — nothing in the live path
57
+ * reads it.
58
+ */
59
+ nativeNodes?: NativeNode[];
60
+ /**
61
+ * Native only, corpus-dump only: the `CoarseInputs` (platform / package /
62
+ * activity / bundleId) fed into `computeScreenSignature` for this observation.
63
+ * Populated by the android/ios `observe()`; the browser leaves it undefined.
64
+ * Same instrumentation-only purpose as `nativeNodes`.
65
+ */
66
+ coarseInputs?: CoarseInputs;
42
67
  }
43
68
  /**
44
69
  * Result of executing one action against the target.
@@ -56,6 +81,19 @@ export interface DeviceActionResult {
56
81
  } | null;
57
82
  openedNewTab: boolean;
58
83
  }
84
+ /**
85
+ * The version/build of the installed native app being driven, read off the
86
+ * device after `launchOrReset`. Lets the web app show which build an iteration
87
+ * last ran against. `package` is the resolved bundle id (iOS) / package name
88
+ * (Android); `version` is the marketing version (CFBundleShortVersionString /
89
+ * versionName) and `build` the build number (CFBundleVersion / versionCode),
90
+ * either of which may be null when the device doesn't report it.
91
+ */
92
+ export interface AppBuild {
93
+ package: string;
94
+ version: string | null;
95
+ build: string | null;
96
+ }
59
97
  /**
60
98
  * A drivable simulation target. Implementations own their own lifecycle and
61
99
  * (for the browser) tab bookkeeping.
@@ -90,6 +128,12 @@ export interface SimulationDevice {
90
128
  executeAction(action: LocalStepAction): Promise<DeviceActionResult>;
91
129
  /** Current location string for recording (URL for browser; "" for native). */
92
130
  currentUrl(): string;
131
+ /**
132
+ * Native only: the version/build of the installed app being driven, read
133
+ * off the device after `launchOrReset`. Browser omits it. Best-effort — a
134
+ * failed read resolves to null and never disturbs the run.
135
+ */
136
+ appBuild?(): Promise<AppBuild | null>;
93
137
  /** Tear down. For shared-browser tabs this closes just the tab. */
94
138
  close(): Promise<void>;
95
139
  }
@@ -31,7 +31,7 @@
31
31
  * backend never converts coords with screen_width/height.
32
32
  */
33
33
  import type { LocalStepAction, ContextValue } from "./types.js";
34
- import type { SimulationDevice, DeviceObservation, DeviceActionResult } from "./device.js";
34
+ import type { SimulationDevice, DeviceObservation, DeviceActionResult, AppBuild } from "./device.js";
35
35
  export interface IosDeviceOptions {
36
36
  /** Bundle id to terminate/relaunch between participants. Derived from --app when a .app is given. */
37
37
  bundleId?: string;
@@ -70,6 +70,12 @@ export declare class IOSDevice implements SimulationDevice {
70
70
  private lastNodeMap;
71
71
  constructor(opts: IosDeviceOptions);
72
72
  launchOrReset(target: string): Promise<void>;
73
+ /**
74
+ * The installed app's version/build, read off the simulator after
75
+ * launchOrReset has resolved the bundle id. Best-effort — null until the
76
+ * bundle id is known, or if simctl/plutil can't report it.
77
+ */
78
+ appBuild(): Promise<AppBuild | null>;
73
79
  /**
74
80
  * Resolve the bundle id to drive, returning a non-null id or throwing.
75
81
  * Installs a local `.app` first and reads its CFBundleIdentifier from
@@ -80,10 +86,11 @@ export declare class IOSDevice implements SimulationDevice {
80
86
  private refreshScreen;
81
87
  observe(): Promise<DeviceObservation>;
82
88
  /**
83
- * Read + serialize WDA's /source a11y tree (bounds in POINTS). Any
84
- * failure (retries exhausted on a trivial tree, parse error) degrades to an
85
- * empty tree so the backend falls back to vision — a missing tree must never
86
- * abort the observation.
89
+ * Read + serialize WDA's /source a11y tree (bounds in POINTS). Returns the
90
+ * serialized tree, the node map and the FLAT parsed nodes (for the screen
91
+ * signature). Any failure (retries exhausted on a trivial tree, parse error)
92
+ * degrades to an empty tree so the backend falls back to vision — a missing
93
+ * tree must never abort the observation.
87
94
  */
88
95
  private dumpTree;
89
96
  captureScreenshot(): Promise<string>;
@@ -31,12 +31,13 @@
31
31
  * backend never converts coords with screen_width/height.
32
32
  */
33
33
  import { resolveTextValue } from "./actions.js";
34
- import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, } from "./simctl.js";
34
+ import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, appBuildFromSimulator, } from "./simctl.js";
35
35
  // iOS UI interaction + a11y run through WebDriverAgent (XCUITest), not idb.
36
- import { ensureWda, closeWda, describeScreen, describeAll, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, } from "./xcuitest.js";
36
+ import { ensureWda, closeWda, describeScreen, describeAll, activeBundleId, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, } from "./xcuitest.js";
37
37
  import { isLocalPath } from "../upload.js";
38
38
  import { deNormalizePoint, deNormalizeDrag, pointToPixel } from "./coordinates.js";
39
39
  import { parseXcuiHierarchy, serializeNativeTree, boundsCenter } from "./native-a11y.js";
40
+ import { computeScreenSignature } from "./screen-signature.js";
40
41
  // Let animations/transitions settle before the next observation so the
41
42
  // screenshot the LLM reasons over reflects the action's result.
42
43
  const POST_GESTURE_SETTLE_MS = 500;
@@ -116,6 +117,21 @@ export class IOSDevice {
116
117
  await launchApp(this.udid, bundleId);
117
118
  await settle(1500); // cold start needs longer than a gesture settle
118
119
  }
120
+ /**
121
+ * The installed app's version/build, read off the simulator after
122
+ * launchOrReset has resolved the bundle id. Best-effort — null until the
123
+ * bundle id is known, or if simctl/plutil can't report it.
124
+ */
125
+ async appBuild() {
126
+ if (!this.bundleId || !this.udid)
127
+ return null;
128
+ const meta = await appBuildFromSimulator(this.udid, this.bundleId);
129
+ return {
130
+ package: this.bundleId,
131
+ version: meta?.version ?? null,
132
+ build: meta?.build ?? null,
133
+ };
134
+ }
119
135
  /**
120
136
  * Resolve the bundle id to drive, returning a non-null id or throwing.
121
137
  * Installs a local `.app` first and reads its CFBundleIdentifier from
@@ -161,14 +177,26 @@ export class IOSDevice {
161
177
  }
162
178
  async observe() {
163
179
  // Refresh geometry each step (orientation can change), then capture the
164
- // pixel screenshot and the a11y tree in parallel (independent reads). The
165
- // dump is wrapped so a failure degrades to the vision path (empty tree).
180
+ // pixel screenshot, the a11y tree, and the active bundle id in parallel
181
+ // (independent reads). The dump is wrapped so a failure degrades to the
182
+ // vision path (empty tree); the bundle-id read is best-effort ("" on
183
+ // failure → the navTitle-only coarse token).
166
184
  await this.refreshScreen();
167
- const [png, tree] = await Promise.all([
185
+ const [png, tree, bundleId] = await Promise.all([
168
186
  screenshotPng(),
169
187
  this.dumpTree(),
188
+ activeBundleId(this.udid),
170
189
  ]);
171
190
  this.lastNodeMap = tree.nodeMap;
191
+ // Scroll-invariant screen signature from this dump's parsed nodes + coarse
192
+ // inputs (active bundle id; navTitle is derived from the nodes). iOS is
193
+ // best-effort — sparse SwiftUI trees are usually unusable and fall back to
194
+ // Phase-1 continuity (sent only when usable; see loop.ts).
195
+ const coarseInputs = {
196
+ platform: "ios",
197
+ bundleId,
198
+ };
199
+ const screenSignature = computeScreenSignature(tree.nodes, coarseInputs);
172
200
  return {
173
201
  screenshot: png.toString("base64"),
174
202
  // Element path when describe-all produced a tree; "" → backend vision.
@@ -181,13 +209,19 @@ export class IOSDevice {
181
209
  // Native has no scrollable document; the screen IS the page.
182
210
  documentHeight: this.pixelHeight,
183
211
  tabs: [],
212
+ screenSignature,
213
+ // Corpus-dump only (ISH_DUMP_CORPUS): the exact parsed nodes + coarse
214
+ // inputs the signature consumed, so any algorithm can be replayed offline.
215
+ nativeNodes: tree.nodes,
216
+ coarseInputs,
184
217
  };
185
218
  }
186
219
  /**
187
- * Read + serialize WDA's /source a11y tree (bounds in POINTS). Any
188
- * failure (retries exhausted on a trivial tree, parse error) degrades to an
189
- * empty tree so the backend falls back to vision — a missing tree must never
190
- * abort the observation.
220
+ * Read + serialize WDA's /source a11y tree (bounds in POINTS). Returns the
221
+ * serialized tree, the node map and the FLAT parsed nodes (for the screen
222
+ * signature). Any failure (retries exhausted on a trivial tree, parse error)
223
+ * degrades to an empty tree so the backend falls back to vision — a missing
224
+ * tree must never abort the observation.
191
225
  */
192
226
  async dumpTree() {
193
227
  try {
@@ -195,12 +229,12 @@ export class IOSDevice {
195
229
  const nodes = parseXcuiHierarchy(json);
196
230
  const tree = serializeNativeTree(nodes);
197
231
  this.log(`a11y tree: ${tree.nodeMap.size} node(s)`);
198
- return tree;
232
+ return { ...tree, nodes };
199
233
  }
200
234
  catch (err) {
201
235
  const msg = err instanceof Error ? err.message : String(err);
202
236
  this.log(`a11y describe-all failed, falling back to vision: ${msg}`);
203
- return { simplified: "", nodeMap: new Map() };
237
+ return { simplified: "", nodeMap: new Map(), nodes: [] };
204
238
  }
205
239
  }
206
240
  async captureScreenshot() {