@ishlabs/cli 0.26.0 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,30 +10,90 @@
10
10
  * backend's 0-1000 coordinates against the screencap pixel size and taps
11
11
  * directly. (Verified by the Layer-1 driver smoke; see scripts/mobile-e2e.)
12
12
  */
13
- import { execFile } from "node:child_process";
14
- import { existsSync } from "node:fs";
13
+ import { execFile, execFileSync } from "node:child_process";
14
+ import { existsSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
15
+ import { join } from "node:path";
15
16
  import { promisify } from "node:util";
17
+ import { binDir, adbBin } from "../paths.js";
16
18
  const execFileAsync = promisify(execFile);
17
- // adb ships with Homebrew's android-platform-tools and inside the SDK. Prefer
18
- // an explicit absolute path so we never depend on the caller's PATH (mirrors
19
- // scripts/mobile-e2e/lib.sh). Override with ISH_ADB / ADB.
20
- function resolveAdb() {
19
+ // Resolve adb without depending on the caller's PATH: ISH_ADB/ADB override the
20
+ // Android SDK Homebrew our own download cache PATH. If none is found,
21
+ // ensureAdb() fetches Google's standalone platform-tools (a small zip) into
22
+ // ~/.ish/bin, mirroring how cloudflared / the iOS WebDriverAgent runner are
23
+ // fetched. Override the binary with ISH_ADB / ADB.
24
+ function findAdb() {
21
25
  const fromEnv = process.env.ISH_ADB || process.env.ADB;
22
26
  if (fromEnv && existsSync(fromEnv))
23
27
  return fromEnv;
24
- const homebrew = "/opt/homebrew/bin/adb";
25
- if (existsSync(homebrew))
26
- return homebrew;
27
28
  const sdkHome = process.env.ANDROID_HOME || process.env.ANDROID_SDK_ROOT;
28
29
  if (sdkHome) {
29
- const sdkAdb = `${sdkHome}/platform-tools/adb`;
30
+ const sdkAdb = join(sdkHome, "platform-tools", "adb");
30
31
  if (existsSync(sdkAdb))
31
32
  return sdkAdb;
32
33
  }
33
- // Last resort: rely on PATH and surface a clear error if it's missing.
34
- return "adb";
34
+ const homebrew = "/opt/homebrew/bin/adb";
35
+ if (existsSync(homebrew))
36
+ return homebrew;
37
+ if (existsSync(adbBin()))
38
+ return adbBin(); // our downloaded cache
39
+ // PATH fallback — only if `adb` actually resolves there.
40
+ try {
41
+ execFileSync(process.platform === "win32" ? "where" : "which", ["adb"], { stdio: "ignore" });
42
+ return "adb";
43
+ }
44
+ catch {
45
+ return null;
46
+ }
47
+ }
48
+ let cachedAdb = null;
49
+ /** Resolve adb, downloading Google's platform-tools on first use if not found. */
50
+ export async function ensureAdb() {
51
+ if (cachedAdb)
52
+ return cachedAdb;
53
+ cachedAdb = findAdb() ?? (await downloadAdb());
54
+ return cachedAdb;
55
+ }
56
+ const PLATFORM_TOOLS_OS = {
57
+ darwin: "darwin",
58
+ linux: "linux",
59
+ win32: "windows",
60
+ };
61
+ /** Fetch + unpack Google's standalone platform-tools into ~/.ish/bin. */
62
+ async function downloadAdb() {
63
+ const os = PLATFORM_TOOLS_OS[process.platform];
64
+ if (!os) {
65
+ throw new AdbError(`no prebuilt adb for ${process.platform}; install Android platform-tools and set ISH_ADB`);
66
+ }
67
+ const url = `https://dl.google.com/android/repository/platform-tools-latest-${os}.zip`;
68
+ const dir = binDir();
69
+ console.error("Fetching adb (Android platform-tools) from Google...");
70
+ mkdirSync(dir, { recursive: true });
71
+ const zipPath = join(dir, "platform-tools.zip");
72
+ let resp;
73
+ try {
74
+ resp = await fetch(url, { signal: AbortSignal.timeout(120_000) });
75
+ }
76
+ catch (e) {
77
+ throw new AdbError(`failed to download platform-tools from ${url}: ${e instanceof Error ? e.message : String(e)}`);
78
+ }
79
+ if (!resp.ok)
80
+ throw new AdbError(`failed to download platform-tools: HTTP ${resp.status} from ${url}`);
81
+ writeFileSync(zipPath, Buffer.from(await resp.arrayBuffer()));
82
+ try {
83
+ // The zip carries a top-level `platform-tools/` dir; extract into binDir().
84
+ const [cmd, args] = process.platform === "win32"
85
+ ? ["tar", ["-xf", zipPath, "-C", dir]]
86
+ : ["unzip", ["-o", "-q", zipPath, "-d", dir]];
87
+ await execFileAsync(cmd, args, { timeout: 120_000 });
88
+ }
89
+ catch (e) {
90
+ throw new AdbError(`failed to unpack platform-tools: ${e instanceof Error ? e.message : String(e)}`);
91
+ }
92
+ rmSync(zipPath, { force: true });
93
+ if (!existsSync(adbBin()))
94
+ throw new AdbError(`platform-tools unpacked but adb is missing at ${adbBin()}`);
95
+ return adbBin();
35
96
  }
36
- const ADB = resolveAdb();
37
97
  const DEFAULT_TIMEOUT_MS = 30_000;
38
98
  // screencap on a cold emulator frame can be slow; give it generous headroom.
39
99
  const SCREENCAP_TIMEOUT_MS = 30_000;
@@ -47,8 +107,9 @@ export class AdbError extends Error {
47
107
  }
48
108
  /** Run `adb <args>` and return trimmed stdout. Throws AdbError on failure. */
49
109
  export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
110
+ const bin = await ensureAdb();
50
111
  try {
51
- const { stdout } = await execFileAsync(ADB, args, {
112
+ const { stdout } = await execFileAsync(bin, args, {
52
113
  timeout: timeoutMs,
53
114
  maxBuffer: 4 * 1024 * 1024,
54
115
  });
@@ -63,14 +124,43 @@ export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
63
124
  export async function adbShell(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
64
125
  return adb(["shell", ...args], timeoutMs);
65
126
  }
127
+ /**
128
+ * Pull versionName / versionCode out of `dumpsys package <pkg>` text. The
129
+ * relevant lines read `versionCode=42 minSdk=24 targetSdk=34` and
130
+ * `versionName=1.2.3`; `\d+` stops the build before the trailing tokens and
131
+ * `\S+` takes the version up to the next space. Returns null when neither is
132
+ * present (wrong/empty package).
133
+ */
134
+ export function parseDumpsysAppBuild(out) {
135
+ const version = out.match(/versionName=(\S+)/)?.[1] ?? null;
136
+ const build = out.match(/versionCode=(\d+)/)?.[1] ?? null;
137
+ if (!version && !build)
138
+ return null;
139
+ return { version, build };
140
+ }
141
+ /**
142
+ * Read an installed package's versionName / versionCode from
143
+ * `dumpsys package <pkg>`. Best-effort: returns null on any failure (the run
144
+ * never depends on it). Covers both freshly-installed apks and pre-installed
145
+ * packages — by call time the package name is already resolved.
146
+ */
147
+ export async function appBuildFromDevice(pkg) {
148
+ try {
149
+ return parseDumpsysAppBuild(await adbShell(["dumpsys", "package", pkg], 30_000));
150
+ }
151
+ catch {
152
+ return null;
153
+ }
154
+ }
66
155
  /**
67
156
  * Capture the current screen as raw PNG bytes via `adb exec-out screencap -p`.
68
157
  * `exec-out` (not `shell`) avoids the CRLF translation that corrupts binary
69
158
  * output. Returns the PNG buffer at full device resolution.
70
159
  */
71
160
  export async function screencapPng() {
161
+ const bin = await ensureAdb();
72
162
  try {
73
- const { stdout } = await execFileAsync(ADB, ["exec-out", "screencap", "-p"], {
163
+ const { stdout } = await execFileAsync(bin, ["exec-out", "screencap", "-p"], {
74
164
  timeout: SCREENCAP_TIMEOUT_MS,
75
165
  maxBuffer: SCREENCAP_MAX_BUFFER,
76
166
  encoding: "buffer",
@@ -90,7 +180,7 @@ export async function requireOneDevice() {
90
180
  }
91
181
  catch (err) {
92
182
  const msg = err instanceof Error ? err.message : String(err);
93
- throw new AdbError(`Could not run adb (looked for "${ADB}"). Run \`ish check android\` to check your setup. ${msg}`);
183
+ throw new AdbError(`Could not run adb (looked for "${findAdb() ?? "adb"}"). Run \`ish check android\` to check your setup. ${msg}`);
94
184
  }
95
185
  // Output: "List of devices attached\n<serial>\tdevice\n..."
96
186
  const online = out
@@ -19,7 +19,7 @@
19
19
  * - Vision path: px = round(x / 1000 * screencapWidth); same for y.
20
20
  */
21
21
  import type { LocalStepAction, ContextValue } from "./types.js";
22
- import type { SimulationDevice, DeviceObservation, DeviceActionResult } from "./device.js";
22
+ import type { SimulationDevice, DeviceObservation, DeviceActionResult, AppBuild } from "./device.js";
23
23
  export interface AndroidDeviceOptions {
24
24
  /** App package name to force-stop/relaunch between participants. May be derived from --app. */
25
25
  appPackage?: string;
@@ -47,6 +47,12 @@ export declare class AndroidDevice implements SimulationDevice {
47
47
  private adbKeyboardActive;
48
48
  constructor(opts: AndroidDeviceOptions);
49
49
  launchOrReset(target: string): Promise<void>;
50
+ /**
51
+ * The installed app's version/build, read off the device after
52
+ * launchOrReset has resolved the package. Best-effort — null until the
53
+ * package is known, or if dumpsys can't report it.
54
+ */
55
+ appBuild(): Promise<AppBuild | null>;
50
56
  /**
51
57
  * Resolve which package to drive, returning a non-null package name or
52
58
  * throwing. For a local .apk we read the package straight from its binary
@@ -19,7 +19,7 @@
19
19
  * - Vision path: px = round(x / 1000 * screencapWidth); same for y.
20
20
  */
21
21
  import { resolveTextValue } from "./actions.js";
22
- import { requireOneDevice, screencapPng, pngDimensions, dumpUiautomatorXml, inputTap, inputSwipe, inputDrag, inputLongPress, setUserRotation, forceStop, launchApp, installApk, isPackageInstalled, listPackages, isAdbKeyboardInstalled, enableAdbKeyboard, setIme, resetIme, currentIme, adbKeyboardType, adbKeyboardClear, pressKeyEvent, statusbarExpand, ADB_KEYBOARD_PKG, } from "./adb.js";
22
+ import { requireOneDevice, screencapPng, pngDimensions, dumpUiautomatorXml, inputTap, inputSwipe, inputDrag, inputLongPress, setUserRotation, forceStop, launchApp, installApk, isPackageInstalled, listPackages, isAdbKeyboardInstalled, enableAdbKeyboard, setIme, resetIme, currentIme, adbKeyboardType, adbKeyboardClear, pressKeyEvent, statusbarExpand, appBuildFromDevice, ADB_KEYBOARD_PKG, } from "./adb.js";
23
23
  import { isLocalPath } from "../upload.js";
24
24
  import { deNormalizePoint, deNormalizeDrag } from "./coordinates.js";
25
25
  import { parseUiautomatorXml, serializeNativeTree, boundsCenter } from "./native-a11y.js";
@@ -74,6 +74,21 @@ export class AndroidDevice {
74
74
  // Prime screencap dimensions for the first de-normalization.
75
75
  await this.refreshDimensions();
76
76
  }
77
+ /**
78
+ * The installed app's version/build, read off the device after
79
+ * launchOrReset has resolved the package. Best-effort — null until the
80
+ * package is known, or if dumpsys can't report it.
81
+ */
82
+ async appBuild() {
83
+ if (!this.appPackage)
84
+ return null;
85
+ const meta = await appBuildFromDevice(this.appPackage);
86
+ return {
87
+ package: this.appPackage,
88
+ version: meta?.version ?? null,
89
+ build: meta?.build ?? null,
90
+ };
91
+ }
77
92
  /**
78
93
  * Resolve which package to drive, returning a non-null package name or
79
94
  * throwing. For a local .apk we read the package straight from its binary
@@ -56,6 +56,19 @@ export interface DeviceActionResult {
56
56
  } | null;
57
57
  openedNewTab: boolean;
58
58
  }
59
+ /**
60
+ * The version/build of the installed native app being driven, read off the
61
+ * device after `launchOrReset`. Lets the web app show which build an iteration
62
+ * last ran against. `package` is the resolved bundle id (iOS) / package name
63
+ * (Android); `version` is the marketing version (CFBundleShortVersionString /
64
+ * versionName) and `build` the build number (CFBundleVersion / versionCode),
65
+ * either of which may be null when the device doesn't report it.
66
+ */
67
+ export interface AppBuild {
68
+ package: string;
69
+ version: string | null;
70
+ build: string | null;
71
+ }
59
72
  /**
60
73
  * A drivable simulation target. Implementations own their own lifecycle and
61
74
  * (for the browser) tab bookkeeping.
@@ -90,6 +103,12 @@ export interface SimulationDevice {
90
103
  executeAction(action: LocalStepAction): Promise<DeviceActionResult>;
91
104
  /** Current location string for recording (URL for browser; "" for native). */
92
105
  currentUrl(): string;
106
+ /**
107
+ * Native only: the version/build of the installed app being driven, read
108
+ * off the device after `launchOrReset`. Browser omits it. Best-effort — a
109
+ * failed read resolves to null and never disturbs the run.
110
+ */
111
+ appBuild?(): Promise<AppBuild | null>;
93
112
  /** Tear down. For shared-browser tabs this closes just the tab. */
94
113
  close(): Promise<void>;
95
114
  }
@@ -31,7 +31,7 @@
31
31
  * backend never converts coords with screen_width/height.
32
32
  */
33
33
  import type { LocalStepAction, ContextValue } from "./types.js";
34
- import type { SimulationDevice, DeviceObservation, DeviceActionResult } from "./device.js";
34
+ import type { SimulationDevice, DeviceObservation, DeviceActionResult, AppBuild } from "./device.js";
35
35
  export interface IosDeviceOptions {
36
36
  /** Bundle id to terminate/relaunch between participants. Derived from --app when a .app is given. */
37
37
  bundleId?: string;
@@ -70,6 +70,12 @@ export declare class IOSDevice implements SimulationDevice {
70
70
  private lastNodeMap;
71
71
  constructor(opts: IosDeviceOptions);
72
72
  launchOrReset(target: string): Promise<void>;
73
+ /**
74
+ * The installed app's version/build, read off the simulator after
75
+ * launchOrReset has resolved the bundle id. Best-effort — null until the
76
+ * bundle id is known, or if simctl/plutil can't report it.
77
+ */
78
+ appBuild(): Promise<AppBuild | null>;
73
79
  /**
74
80
  * Resolve the bundle id to drive, returning a non-null id or throwing.
75
81
  * Installs a local `.app` first and reads its CFBundleIdentifier from
@@ -31,7 +31,7 @@
31
31
  * backend never converts coords with screen_width/height.
32
32
  */
33
33
  import { resolveTextValue } from "./actions.js";
34
- import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, } from "./simctl.js";
34
+ import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, appBuildFromSimulator, } from "./simctl.js";
35
35
  // iOS UI interaction + a11y run through WebDriverAgent (XCUITest), not idb.
36
36
  import { ensureWda, closeWda, describeScreen, describeAll, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, } from "./xcuitest.js";
37
37
  import { isLocalPath } from "../upload.js";
@@ -116,6 +116,21 @@ export class IOSDevice {
116
116
  await launchApp(this.udid, bundleId);
117
117
  await settle(1500); // cold start needs longer than a gesture settle
118
118
  }
119
+ /**
120
+ * The installed app's version/build, read off the simulator after
121
+ * launchOrReset has resolved the bundle id. Best-effort — null until the
122
+ * bundle id is known, or if simctl/plutil can't report it.
123
+ */
124
+ async appBuild() {
125
+ if (!this.bundleId || !this.udid)
126
+ return null;
127
+ const meta = await appBuildFromSimulator(this.udid, this.bundleId);
128
+ return {
129
+ package: this.bundleId,
130
+ version: meta?.version ?? null,
131
+ build: meta?.build ?? null,
132
+ };
133
+ }
119
134
  /**
120
135
  * Resolve the bundle id to drive, returning a non-null id or throwing.
121
136
  * Installs a local `.app` first and reads its CFBundleIdentifier from
@@ -7,8 +7,9 @@
7
7
  */
8
8
  import { launchSharedBrowser, FULL_PAGE_HEIGHT_CAP_PX_MOBILE, FULL_PAGE_HEIGHT_CAP_PX_DESKTOP, } from "./browser.js";
9
9
  import { uploadScreenshot } from "./upload.js";
10
- import { detectNoVisibleChange, describeAction } from "./actions.js";
10
+ import { detectNoVisibleChange, describeAction, classifyStepKind } from "./actions.js";
11
11
  import { createDevice } from "./device.js";
12
+ import pkg from "../../../package.json" with { type: "json" };
12
13
  import { enableDebug, isDebugEnabled, debugRawResponse, debugNormalizedActions, debugActionExecution, debugForwards, debugStepSummary, debugRecord, } from "./debug.js";
13
14
  /**
14
15
  * Native (mobile) platforms drive a single physical device via screenshot →
@@ -113,6 +114,34 @@ const SENTIMENT_ICONS = {
113
114
  Positive: "+", Negative: "-", Neutral: "~",
114
115
  Frustrated: "!", Confused: "?", Delighted: "*",
115
116
  };
117
+ const CLI_VERSION = pkg.version;
118
+ /**
119
+ * Stamp the app build this run drove onto the iteration, so the web app's
120
+ * run-settings card can show which build the iteration is on. Best-effort:
121
+ * a native run never depends on this landing, so failures are warned, not
122
+ * thrown. Only native platforms carry a build.
123
+ */
124
+ async function reportObservedApp(client, iterationId, platform, build, log) {
125
+ if (platform !== "ios" && platform !== "android")
126
+ return;
127
+ try {
128
+ await client.post(`/iterations/${iterationId}/observed-app`, {
129
+ platform,
130
+ package: build.package,
131
+ version: build.version,
132
+ build: build.build,
133
+ cli_version: CLI_VERSION,
134
+ });
135
+ const label = [build.version, build.build ? `(${build.build})` : null]
136
+ .filter(Boolean)
137
+ .join(" ");
138
+ log(`Recorded app build${label ? `: ${label}` : ""}`);
139
+ }
140
+ catch (err) {
141
+ const msg = err instanceof Error ? err.message : String(err);
142
+ console.warn(`Could not record app build for the iteration: ${msg}`);
143
+ }
144
+ }
116
145
  /**
117
146
  * Run local simulations — parallel when multiple participants, sequential by default.
118
147
  * Use --parallel <n> to control concurrency (default: number of participants).
@@ -139,6 +168,16 @@ export async function runLocalSimulations(client, opts) {
139
168
  log("Native (android/ios) runs drive a single device — running sequentially.");
140
169
  }
141
170
  const concurrency = isNativeRun ? 1 : (opts.parallel ?? opts.participantIds.length);
171
+ // Native runs stamp the app build onto the iteration once — every
172
+ // participant in a run drives the same installed build, so dedupe to a
173
+ // single best-effort POST after the first device resolves its app.
174
+ let appBuildReported = false;
175
+ const reportAppBuild = (build, platform) => {
176
+ if (appBuildReported)
177
+ return;
178
+ appBuildReported = true;
179
+ void reportObservedApp(client, opts.iterationId, platform, build, log);
180
+ };
142
181
  try {
143
182
  if (concurrency <= 1 || opts.participantIds.length <= 1) {
144
183
  // Sequential execution — each participant owns its own browser
@@ -149,7 +188,7 @@ export async function runLocalSimulations(client, opts) {
149
188
  log(`\nStarting local simulation for ${participantName}...`);
150
189
  try {
151
190
  const participantLog = (msg) => log(`[${participantName}] ${msg}`);
152
- await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled);
191
+ await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled, reportAppBuild);
153
192
  log(`Completed: ${participantName}`);
154
193
  }
155
194
  catch (err) {
@@ -183,7 +222,7 @@ export async function runLocalSimulations(client, opts) {
183
222
  const participantLog = (msg) => log(`[${participantName}] ${msg}`);
184
223
  participantLog("Starting...");
185
224
  try {
186
- await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled, sharedBrowser);
225
+ await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled, reportAppBuild, sharedBrowser);
187
226
  participantLog("Completed");
188
227
  }
189
228
  catch (err) {
@@ -203,7 +242,7 @@ export async function runLocalSimulations(client, opts) {
203
242
  process.off("SIGINT", onSigint);
204
243
  }
205
244
  }
206
- async function runSingleSimulation(client, participantId, participantName, opts, log, isCancelled, sharedBrowser) {
245
+ async function runSingleSimulation(client, participantId, participantName, opts, log, isCancelled, onAppBuild, sharedBrowser) {
207
246
  // Step 1: Initialize session
208
247
  const initResponse = await client.localSimInit({
209
248
  participant_id: participantId,
@@ -274,6 +313,19 @@ async function runSingleSimulation(client, participantId, participantName, opts,
274
313
  try {
275
314
  // Step 3: Launch / navigate the target to its starting point.
276
315
  await device.launchOrReset(launchTarget);
316
+ // Step 3b: Capture the installed app's build (native only). Best-effort —
317
+ // the dedupe in runLocalSimulations keeps this to one POST per run, and a
318
+ // failed read or report never disturbs the simulation.
319
+ if (onAppBuild) {
320
+ try {
321
+ const observed = await device.appBuild?.();
322
+ if (observed)
323
+ onAppBuild(observed, platform);
324
+ }
325
+ catch {
326
+ // ignore — build capture is non-essential
327
+ }
328
+ }
277
329
  // Step 4: Run assignment loop
278
330
  for (let assignmentIdx = 0; assignmentIdx < session.assignments.length; assignmentIdx++) {
279
331
  const assignment = session.assignments[assignmentIdx];
@@ -284,6 +336,12 @@ async function runSingleSimulation(client, participantId, participantName, opts,
284
336
  // status when the loop ends because the agent terminated (completed vs
285
337
  // abandoned). Stays "in_progress" if the loop hits max_steps.
286
338
  let lastAssignmentStatus = "in_progress";
339
+ // Frame continuity (native): carry the PREVIOUS step's logical-screen
340
+ // classification + matched frame forward, so this step's match-frame call
341
+ // can tell the backend to reuse the frame when the screen didn't change
342
+ // (pure scroll / non-submitting keyboard). Reset per assignment.
343
+ let lastStepKind = "none";
344
+ let lastFrameVersionId;
287
345
  while (step < maxSteps && !assignmentCompleted && !isCancelled()) {
288
346
  // OBSERVE — the device refreshes its own active surface (popup /
289
347
  // switch_tab for browser) before capturing. (The browser device emits
@@ -395,12 +453,16 @@ async function runSingleSimulation(client, participantId, participantName, opts,
395
453
  const actionDescs = [];
396
454
  const elementNames = [];
397
455
  const actionDebugEntries = [];
456
+ // Per-action success (index-aligned with stepResponse.actions), used to
457
+ // classify this step's logical-screen kind for frame continuity.
458
+ const perActionSuccess = [];
398
459
  const preActionScreenshot = await device.captureScreenshot();
399
460
  for (let i = 0; i < stepResponse.actions.length; i++) {
400
461
  if (isCancelled())
401
462
  break;
402
463
  const action = stepResponse.actions[i];
403
464
  const result = await device.executeAction(action);
465
+ perActionSuccess[i] = result.success;
404
466
  const desc = describeAction(action);
405
467
  debugActionExecution(i, action, result, action.node_id ? "cdp" : "playwright");
406
468
  const openedNewTab = result.openedNewTab;
@@ -415,29 +477,44 @@ async function runSingleSimulation(client, participantId, participantName, opts,
415
477
  const actionType = action.type || "unknown";
416
478
  const INTERNAL_ACTIONS = new Set(["think"]);
417
479
  if (!INTERNAL_ACTIONS.has(actionType)) {
480
+ // Pack `data` to match the hosted sim's map_action_to_db so native
481
+ // rows render identically. value_type lets the FE flag var/secret;
482
+ // drag's full path goes under data.coordinates (0-1000), not a
483
+ // bespoke drag_end. Secret `value` stays masked (it's the variable
484
+ // key, not the resolved secret — masking is strictly safer than the
485
+ // web path, and value_type now drives the FE lock glyph).
486
+ const actionData = {
487
+ ...(action.value !== undefined && action.value !== null && { value: action.value_type === "secret" ? "***" : action.value }),
488
+ ...(action.value_type && { value_type: action.value_type }),
489
+ ...(action.mode && { mode: action.mode }),
490
+ ...(action.submit && { submit: action.submit }),
491
+ ...(action.direction && { direction: action.direction }),
492
+ ...(action.amount && { amount: action.amount }),
493
+ ...(action.count && action.count > 1 && { count: action.count }),
494
+ ...(action.duration_ms && { duration_ms: action.duration_ms }),
495
+ ...(action.modifiers?.length && { modifiers: action.modifiers }),
496
+ ...(action.key && { key: action.key }),
497
+ ...(action.tab_id && { tab_id: action.tab_id }),
498
+ ...(action.orientation && { orientation: action.orientation }),
499
+ ...(action.panel && { panel: action.panel }),
500
+ ...(action.drag && {
501
+ coordinates: {
502
+ startX: action.drag.startX,
503
+ startY: action.drag.startY,
504
+ endX: action.drag.endX,
505
+ endY: action.drag.endY,
506
+ },
507
+ }),
508
+ ...(openedNewTab && { opened_new_tab: true }),
509
+ };
418
510
  actionDatas.push({
419
511
  action_type: actionType,
420
512
  element_label: action.element_name ?? null,
421
513
  element_type: action.element_type ?? null,
422
- coordinates: normalizedCoords,
423
- data: {
424
- ...(action.value !== undefined && action.value !== null && { value: action.value_type === "secret" ? "***" : action.value }),
425
- ...(action.mode && { mode: action.mode }),
426
- ...(action.submit && { submit: action.submit }),
427
- ...(action.direction && { direction: action.direction }),
428
- ...(action.amount && { amount: action.amount }),
429
- ...(action.count && action.count > 1 && { count: action.count }),
430
- ...(action.duration_ms && { duration_ms: action.duration_ms }),
431
- ...(action.modifiers?.length && { modifiers: action.modifiers }),
432
- ...(action.key && { key: action.key }),
433
- ...(action.tab_id && { tab_id: action.tab_id }),
434
- ...(action.orientation && { orientation: action.orientation }),
435
- ...(action.panel && { panel: action.panel }),
436
- // The recorded `coordinates` is the drag START; persist the END
437
- // (normalized 0-1000) too so the journey captures the full path.
438
- ...(action.drag && { drag_end: { x: action.drag.endX, y: action.drag.endY } }),
439
- ...(openedNewTab && { opened_new_tab: true }),
440
- },
514
+ // Drag's path lives in data.coordinates; the hosted sim leaves the
515
+ // top-level coordinates null for a drag.
516
+ coordinates: action.drag ? null : normalizedCoords,
517
+ data: Object.keys(actionData).length ? actionData : null,
441
518
  order: i,
442
519
  });
443
520
  }
@@ -494,6 +571,15 @@ async function runSingleSimulation(client, participantId, participantName, opts,
494
571
  // Native: drive FrameSourceType.ANDROID/IOS directly; browser falls
495
572
  // back to screen_format server-side.
496
573
  platform,
574
+ // Frame continuity: these describe the transition INTO this
575
+ // observation, produced by the PREVIOUS step's action. When that
576
+ // step was a pure scroll / non-submitting keyboard on a native
577
+ // device, the logical screen didn't change — tell the backend to
578
+ // reuse the previous frame instead of minting a new one off the
579
+ // shifted pixels. Carried from lastStepKind / lastFrameVersionId,
580
+ // updated AFTER this call for the next iteration.
581
+ ...(isNative && lastFrameVersionId ? { previous_frame_version_id: lastFrameVersionId } : {}),
582
+ same_screen_continuation: isNative && (lastStepKind === "scroll" || lastStepKind === "keyboard"),
497
583
  });
498
584
  frameVersionId = matchResult.frame_version_id;
499
585
  }
@@ -501,6 +587,11 @@ async function runSingleSimulation(client, participantId, participantName, opts,
501
587
  const msg = err instanceof Error ? err.message : String(err);
502
588
  log(` Warning: frame matching failed — ${msg}`);
503
589
  }
590
+ // Carry THIS step's logical-screen classification + matched frame
591
+ // forward for the NEXT iteration's match-frame call (consumed above as
592
+ // last*). Classify after the call so ordering is consume-then-update.
593
+ lastStepKind = classifyStepKind(stepResponse.actions, perActionSuccess);
594
+ lastFrameVersionId = frameVersionId;
504
595
  // Debug-only: capture post-action screenshot to show result
505
596
  let postActionBase64;
506
597
  if (isDebugEnabled()) {
@@ -520,7 +611,7 @@ async function runSingleSimulation(client, participantId, participantName, opts,
520
611
  forwards.push({ type: "LOOP_DETECTED", content: "A repetitive action cycle was detected. Try a different approach." });
521
612
  }
522
613
  // Record interaction (1-indexed step for backend)
523
- interactions.push({
614
+ const interaction = {
524
615
  step: step + 1,
525
616
  assignment_id: assignment.id,
526
617
  ...(screenshotUrl ? { screenshot_url: screenshotUrl } : { screenshot_base64: obsBase64 }),
@@ -544,7 +635,24 @@ async function runSingleSimulation(client, participantId, participantName, opts,
544
635
  // Server reduces this to Interaction.tab when N >= 2; omit on
545
636
  // single-tab steps to keep the payload (and DB column) null.
546
637
  ...(tabsSnapshot.length >= 2 ? { tabs: tabsSnapshot } : {}),
547
- });
638
+ };
639
+ // Keep the in-memory array for the debug HTML report.
640
+ interactions.push(interaction);
641
+ // Stream this interaction live so the backend persists + commits it
642
+ // immediately and fires INTERACTION_CREATED in realtime. A streaming
643
+ // failure must never abort the run — log and continue (the run-end
644
+ // finalize call still records the terminal state).
645
+ try {
646
+ await client.localSimRecordInteraction({
647
+ participant_id: session.participant_id,
648
+ product_id: session.product_id,
649
+ interaction,
650
+ });
651
+ }
652
+ catch (err) {
653
+ const msg = err instanceof Error ? err.message : String(err);
654
+ log(` Warning: failed to stream interaction ${step + 1} — ${msg}`);
655
+ }
548
656
  // Update history for next step
549
657
  history.push({
550
658
  comment: stepResponse.comment,
@@ -635,7 +743,6 @@ async function runSingleSimulation(client, participantId, participantName, opts,
635
743
  await client.localSimRecord({
636
744
  participant_id: session.participant_id,
637
745
  product_id: session.product_id,
638
- interactions,
639
746
  final_status: finalStatus,
640
747
  assignment_statuses: assignmentStatuses,
641
748
  });
@@ -53,3 +53,18 @@ export declare function isAppInstalled(udid: string, bundleId: string): Promise<
53
53
  * terminate+launch a just-installed app without diffing the app list.
54
54
  */
55
55
  export declare function bundleIdFromApp(appPath: string): Promise<string | null>;
56
+ /**
57
+ * Read the installed app's marketing version + build number from the booted
58
+ * simulator. `simctl listapps` emits a (NeXTSTEP) plist of every installed
59
+ * bundle; we round-trip it through `plutil -convert json` and index by bundle
60
+ * id. JSON-not-keypath because a bundle id's dots (`com.apple.Preferences`)
61
+ * collide with plutil's `-extract` keypath separator.
62
+ *
63
+ * Best-effort: returns null on any failure (the run never depends on it). Works
64
+ * for both CLI-installed `.app`s and pre-installed system/app-store bundles —
65
+ * by call time the bundle id is already resolved.
66
+ */
67
+ export declare function appBuildFromSimulator(udid: string, bundleId: string): Promise<{
68
+ version: string | null;
69
+ build: string | null;
70
+ } | null>;
@@ -16,7 +16,7 @@
16
16
  */
17
17
  import { execFile } from "node:child_process";
18
18
  import { existsSync } from "node:fs";
19
- import { mkdtemp, readFile, rm } from "node:fs/promises";
19
+ import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
20
20
  import { tmpdir } from "node:os";
21
21
  import { join } from "node:path";
22
22
  import { promisify } from "node:util";
@@ -142,3 +142,43 @@ export async function bundleIdFromApp(appPath) {
142
142
  return null;
143
143
  }
144
144
  }
145
+ /**
146
+ * Read the installed app's marketing version + build number from the booted
147
+ * simulator. `simctl listapps` emits a (NeXTSTEP) plist of every installed
148
+ * bundle; we round-trip it through `plutil -convert json` and index by bundle
149
+ * id. JSON-not-keypath because a bundle id's dots (`com.apple.Preferences`)
150
+ * collide with plutil's `-extract` keypath separator.
151
+ *
152
+ * Best-effort: returns null on any failure (the run never depends on it). Works
153
+ * for both CLI-installed `.app`s and pre-installed system/app-store bundles —
154
+ * by call time the bundle id is already resolved.
155
+ */
156
+ export async function appBuildFromSimulator(udid, bundleId) {
157
+ const dir = await mkdtemp(join(tmpdir(), "ish-ios-apps-"));
158
+ const path = join(dir, "apps.plist");
159
+ try {
160
+ const { stdout } = await execFileAsync(XCRUN, ["simctl", "listapps", udid], {
161
+ timeout: 60_000,
162
+ maxBuffer: 16 * 1024 * 1024,
163
+ });
164
+ await writeFile(path, stdout);
165
+ const { stdout: json } = await execFileAsync(PLUTIL, ["-convert", "json", "-o", "-", path], {
166
+ timeout: 10_000,
167
+ maxBuffer: 16 * 1024 * 1024,
168
+ });
169
+ const apps = JSON.parse(json);
170
+ const app = apps[bundleId];
171
+ if (!app)
172
+ return null;
173
+ return {
174
+ version: app.CFBundleShortVersionString ?? null,
175
+ build: app.CFBundleVersion ?? null,
176
+ };
177
+ }
178
+ catch {
179
+ return null;
180
+ }
181
+ finally {
182
+ await rm(dir, { recursive: true, force: true }).catch(() => { });
183
+ }
184
+ }