@ishlabs/cli 0.26.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/doctor.d.ts +16 -0
- package/dist/commands/doctor.js +34 -9
- package/dist/commands/iteration.js +23 -5
- package/dist/commands/study-participant.js +1 -1
- package/dist/commands/study-run.js +26 -1
- package/dist/commands/study-screenshots.js +38 -5
- package/dist/lib/api-client.d.ts +4 -0
- package/dist/lib/api-client.js +6 -1
- package/dist/lib/docs.js +15 -3
- package/dist/lib/local-sim/actions.d.ts +18 -0
- package/dist/lib/local-sim/actions.js +30 -0
- package/dist/lib/local-sim/adb.d.ts +39 -0
- package/dist/lib/local-sim/adb.js +152 -17
- package/dist/lib/local-sim/android.d.ts +12 -4
- package/dist/lib/local-sim/android.js +44 -11
- package/dist/lib/local-sim/device.d.ts +44 -0
- package/dist/lib/local-sim/ios.d.ts +12 -5
- package/dist/lib/local-sim/ios.js +45 -11
- package/dist/lib/local-sim/loop.js +220 -26
- package/dist/lib/local-sim/native-a11y.d.ts +24 -0
- package/dist/lib/local-sim/native-a11y.js +76 -14
- package/dist/lib/local-sim/screen-signature.d.ts +77 -0
- package/dist/lib/local-sim/screen-signature.js +166 -0
- package/dist/lib/local-sim/simctl.d.ts +15 -0
- package/dist/lib/local-sim/simctl.js +41 -1
- package/dist/lib/local-sim/types.d.ts +11 -2
- package/dist/lib/local-sim/xcuitest.d.ts +7 -0
- package/dist/lib/local-sim/xcuitest.js +16 -0
- package/dist/lib/modality.js +7 -2
- package/dist/lib/paths.d.ts +6 -0
- package/dist/lib/paths.js +9 -0
- package/dist/lib/report-readiness.d.ts +44 -0
- package/dist/lib/report-readiness.js +74 -0
- package/dist/lib/skill-content.js +2 -0
- package/package.json +1 -1
|
@@ -10,30 +10,90 @@
|
|
|
10
10
|
* backend's 0-1000 coordinates against the screencap pixel size and taps
|
|
11
11
|
* directly. (Verified by the Layer-1 driver smoke; see scripts/mobile-e2e.)
|
|
12
12
|
*/
|
|
13
|
-
import { execFile } from "node:child_process";
|
|
14
|
-
import { existsSync } from "node:fs";
|
|
13
|
+
import { execFile, execFileSync } from "node:child_process";
|
|
14
|
+
import { existsSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
|
|
15
|
+
import { join } from "node:path";
|
|
15
16
|
import { promisify } from "node:util";
|
|
17
|
+
import { binDir, adbBin } from "../paths.js";
|
|
16
18
|
const execFileAsync = promisify(execFile);
|
|
17
|
-
// adb
|
|
18
|
-
//
|
|
19
|
-
//
|
|
20
|
-
|
|
19
|
+
// Resolve adb without depending on the caller's PATH: ISH_ADB/ADB override → the
|
|
20
|
+
// Android SDK → Homebrew → our own download cache → PATH. If none is found,
|
|
21
|
+
// ensureAdb() fetches Google's standalone platform-tools (a small zip) into
|
|
22
|
+
// ~/.ish/bin, mirroring how cloudflared / the iOS WebDriverAgent runner are
|
|
23
|
+
// fetched. Override the binary with ISH_ADB / ADB.
|
|
24
|
+
function findAdb() {
|
|
21
25
|
const fromEnv = process.env.ISH_ADB || process.env.ADB;
|
|
22
26
|
if (fromEnv && existsSync(fromEnv))
|
|
23
27
|
return fromEnv;
|
|
24
|
-
const homebrew = "/opt/homebrew/bin/adb";
|
|
25
|
-
if (existsSync(homebrew))
|
|
26
|
-
return homebrew;
|
|
27
28
|
const sdkHome = process.env.ANDROID_HOME || process.env.ANDROID_SDK_ROOT;
|
|
28
29
|
if (sdkHome) {
|
|
29
|
-
const sdkAdb =
|
|
30
|
+
const sdkAdb = join(sdkHome, "platform-tools", "adb");
|
|
30
31
|
if (existsSync(sdkAdb))
|
|
31
32
|
return sdkAdb;
|
|
32
33
|
}
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
const homebrew = "/opt/homebrew/bin/adb";
|
|
35
|
+
if (existsSync(homebrew))
|
|
36
|
+
return homebrew;
|
|
37
|
+
if (existsSync(adbBin()))
|
|
38
|
+
return adbBin(); // our downloaded cache
|
|
39
|
+
// PATH fallback — only if `adb` actually resolves there.
|
|
40
|
+
try {
|
|
41
|
+
execFileSync(process.platform === "win32" ? "where" : "which", ["adb"], { stdio: "ignore" });
|
|
42
|
+
return "adb";
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
let cachedAdb = null;
|
|
49
|
+
/** Resolve adb, downloading Google's platform-tools on first use if not found. */
|
|
50
|
+
export async function ensureAdb() {
|
|
51
|
+
if (cachedAdb)
|
|
52
|
+
return cachedAdb;
|
|
53
|
+
cachedAdb = findAdb() ?? (await downloadAdb());
|
|
54
|
+
return cachedAdb;
|
|
55
|
+
}
|
|
56
|
+
const PLATFORM_TOOLS_OS = {
|
|
57
|
+
darwin: "darwin",
|
|
58
|
+
linux: "linux",
|
|
59
|
+
win32: "windows",
|
|
60
|
+
};
|
|
61
|
+
/** Fetch + unpack Google's standalone platform-tools into ~/.ish/bin. */
|
|
62
|
+
async function downloadAdb() {
|
|
63
|
+
const os = PLATFORM_TOOLS_OS[process.platform];
|
|
64
|
+
if (!os) {
|
|
65
|
+
throw new AdbError(`no prebuilt adb for ${process.platform}; install Android platform-tools and set ISH_ADB`);
|
|
66
|
+
}
|
|
67
|
+
const url = `https://dl.google.com/android/repository/platform-tools-latest-${os}.zip`;
|
|
68
|
+
const dir = binDir();
|
|
69
|
+
console.error("Fetching adb (Android platform-tools) from Google...");
|
|
70
|
+
mkdirSync(dir, { recursive: true });
|
|
71
|
+
const zipPath = join(dir, "platform-tools.zip");
|
|
72
|
+
let resp;
|
|
73
|
+
try {
|
|
74
|
+
resp = await fetch(url, { signal: AbortSignal.timeout(120_000) });
|
|
75
|
+
}
|
|
76
|
+
catch (e) {
|
|
77
|
+
throw new AdbError(`failed to download platform-tools from ${url}: ${e instanceof Error ? e.message : String(e)}`);
|
|
78
|
+
}
|
|
79
|
+
if (!resp.ok)
|
|
80
|
+
throw new AdbError(`failed to download platform-tools: HTTP ${resp.status} from ${url}`);
|
|
81
|
+
writeFileSync(zipPath, Buffer.from(await resp.arrayBuffer()));
|
|
82
|
+
try {
|
|
83
|
+
// The zip carries a top-level `platform-tools/` dir; extract into binDir().
|
|
84
|
+
const [cmd, args] = process.platform === "win32"
|
|
85
|
+
? ["tar", ["-xf", zipPath, "-C", dir]]
|
|
86
|
+
: ["unzip", ["-o", "-q", zipPath, "-d", dir]];
|
|
87
|
+
await execFileAsync(cmd, args, { timeout: 120_000 });
|
|
88
|
+
}
|
|
89
|
+
catch (e) {
|
|
90
|
+
throw new AdbError(`failed to unpack platform-tools: ${e instanceof Error ? e.message : String(e)}`);
|
|
91
|
+
}
|
|
92
|
+
rmSync(zipPath, { force: true });
|
|
93
|
+
if (!existsSync(adbBin()))
|
|
94
|
+
throw new AdbError(`platform-tools unpacked but adb is missing at ${adbBin()}`);
|
|
95
|
+
return adbBin();
|
|
35
96
|
}
|
|
36
|
-
const ADB = resolveAdb();
|
|
37
97
|
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
38
98
|
// screencap on a cold emulator frame can be slow; give it generous headroom.
|
|
39
99
|
const SCREENCAP_TIMEOUT_MS = 30_000;
|
|
@@ -47,8 +107,9 @@ export class AdbError extends Error {
|
|
|
47
107
|
}
|
|
48
108
|
/** Run `adb <args>` and return trimmed stdout. Throws AdbError on failure. */
|
|
49
109
|
export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
110
|
+
const bin = await ensureAdb();
|
|
50
111
|
try {
|
|
51
|
-
const { stdout } = await execFileAsync(
|
|
112
|
+
const { stdout } = await execFileAsync(bin, args, {
|
|
52
113
|
timeout: timeoutMs,
|
|
53
114
|
maxBuffer: 4 * 1024 * 1024,
|
|
54
115
|
});
|
|
@@ -63,14 +124,75 @@ export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
|
63
124
|
export async function adbShell(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
64
125
|
return adb(["shell", ...args], timeoutMs);
|
|
65
126
|
}
|
|
127
|
+
/**
|
|
128
|
+
* Pull versionName / versionCode out of `dumpsys package <pkg>` text. The
|
|
129
|
+
* relevant lines read `versionCode=42 minSdk=24 targetSdk=34` and
|
|
130
|
+
* `versionName=1.2.3`; `\d+` stops the build before the trailing tokens and
|
|
131
|
+
* `\S+` takes the version up to the next space. Returns null when neither is
|
|
132
|
+
* present (wrong/empty package).
|
|
133
|
+
*/
|
|
134
|
+
export function parseDumpsysAppBuild(out) {
|
|
135
|
+
const version = out.match(/versionName=(\S+)/)?.[1] ?? null;
|
|
136
|
+
const build = out.match(/versionCode=(\d+)/)?.[1] ?? null;
|
|
137
|
+
if (!version && !build)
|
|
138
|
+
return null;
|
|
139
|
+
return { version, build };
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Read an installed package's versionName / versionCode from
|
|
143
|
+
* `dumpsys package <pkg>`. Best-effort: returns null on any failure (the run
|
|
144
|
+
* never depends on it). Covers both freshly-installed apks and pre-installed
|
|
145
|
+
* packages — by call time the package name is already resolved.
|
|
146
|
+
*/
|
|
147
|
+
export async function appBuildFromDevice(pkg) {
|
|
148
|
+
try {
|
|
149
|
+
return parseDumpsysAppBuild(await adbShell(["dumpsys", "package", pkg], 30_000));
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
return null;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Pull `"pkg/activity"` out of `dumpsys activity activities`. The foreground
|
|
157
|
+
* activity surfaces as `topResumedActivity=ActivityRecord{... u0 pkg/activity
|
|
158
|
+
* t123}` (older builds: `mResumedActivity=...`); we take the `pkg/activity`
|
|
159
|
+
* token from whichever line is present. The activity may be a short `.Name`
|
|
160
|
+
* (relative to the package) — kept as-is, exactly what dumpsys reports. Returns
|
|
161
|
+
* "" when neither line is present.
|
|
162
|
+
*/
|
|
163
|
+
export function parseTopActivity(out) {
|
|
164
|
+
const m = /topResumedActivity=ActivityRecord\{[^}]*\s(\S+\/\S+)/.exec(out) ??
|
|
165
|
+
/mResumedActivity:\s*ActivityRecord\{[^}]*\s(\S+\/\S+)/.exec(out) ??
|
|
166
|
+
/mResumedActivity=ActivityRecord\{[^}]*\s(\S+\/\S+)/.exec(out);
|
|
167
|
+
if (!m)
|
|
168
|
+
return "";
|
|
169
|
+
// The token can carry a trailing task id glued by the regex boundary? No —
|
|
170
|
+
// `\S+/\S+` stops at the first whitespace, so it is exactly `pkg/activity`.
|
|
171
|
+
return m[1];
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* The foreground `"pkg/activity"` from `dumpsys activity activities`, a coarse
|
|
175
|
+
* input for the screen signature. Best-effort: returns "" on any failure (the
|
|
176
|
+
* signature degrades to its package-only coarse token, and the run never
|
|
177
|
+
* depends on this read).
|
|
178
|
+
*/
|
|
179
|
+
export async function currentActivity() {
|
|
180
|
+
try {
|
|
181
|
+
return parseTopActivity(await adbShell(["dumpsys", "activity", "activities"], 15_000));
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
return "";
|
|
185
|
+
}
|
|
186
|
+
}
|
|
66
187
|
/**
|
|
67
188
|
* Capture the current screen as raw PNG bytes via `adb exec-out screencap -p`.
|
|
68
189
|
* `exec-out` (not `shell`) avoids the CRLF translation that corrupts binary
|
|
69
190
|
* output. Returns the PNG buffer at full device resolution.
|
|
70
191
|
*/
|
|
71
192
|
export async function screencapPng() {
|
|
193
|
+
const bin = await ensureAdb();
|
|
72
194
|
try {
|
|
73
|
-
const { stdout } = await execFileAsync(
|
|
195
|
+
const { stdout } = await execFileAsync(bin, ["exec-out", "screencap", "-p"], {
|
|
74
196
|
timeout: SCREENCAP_TIMEOUT_MS,
|
|
75
197
|
maxBuffer: SCREENCAP_MAX_BUFFER,
|
|
76
198
|
encoding: "buffer",
|
|
@@ -90,7 +212,7 @@ export async function requireOneDevice() {
|
|
|
90
212
|
}
|
|
91
213
|
catch (err) {
|
|
92
214
|
const msg = err instanceof Error ? err.message : String(err);
|
|
93
|
-
throw new AdbError(`Could not run adb (looked for "${
|
|
215
|
+
throw new AdbError(`Could not run adb (looked for "${findAdb() ?? "adb"}"). Run \`ish check android\` to check your setup. ${msg}`);
|
|
94
216
|
}
|
|
95
217
|
// Output: "List of devices attached\n<serial>\tdevice\n..."
|
|
96
218
|
const online = out
|
|
@@ -101,8 +223,21 @@ export async function requireOneDevice() {
|
|
|
101
223
|
if (online.length === 0) {
|
|
102
224
|
throw new AdbError("No Android device/emulator online. Run `ish check android` to check your setup and how to boot one.");
|
|
103
225
|
}
|
|
226
|
+
// Honor ANDROID_SERIAL (the standard adb convention): when it names an online
|
|
227
|
+
// device, pin to it instead of failing on "more than one device". The adb
|
|
228
|
+
// wrapper inherits process.env, so every subsequent `adb` call already targets
|
|
229
|
+
// that serial — this lets multiple emulators run in parallel, each driven by a
|
|
230
|
+
// CLI invocation with its own ANDROID_SERIAL.
|
|
231
|
+
const pinned = process.env.ANDROID_SERIAL?.trim();
|
|
232
|
+
if (pinned) {
|
|
233
|
+
if (online.some((l) => l.startsWith(`${pinned}\t`)))
|
|
234
|
+
return;
|
|
235
|
+
throw new AdbError(`ANDROID_SERIAL=${pinned} is set but that device is not online. ` +
|
|
236
|
+
`Online: ${online.map((l) => l.split("\t")[0]).join(", ") || "none"}.`);
|
|
237
|
+
}
|
|
104
238
|
if (online.length > 1) {
|
|
105
|
-
throw new AdbError(`Expected exactly one Android device, found ${online.length}.
|
|
239
|
+
throw new AdbError(`Expected exactly one Android device, found ${online.length}. ` +
|
|
240
|
+
`Stop the extras, or set ANDROID_SERIAL=<serial> to pin one (parallel runs).`);
|
|
106
241
|
}
|
|
107
242
|
}
|
|
108
243
|
// --- Input gestures (all in screencap pixel space) ---
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
* - Vision path: px = round(x / 1000 * screencapWidth); same for y.
|
|
20
20
|
*/
|
|
21
21
|
import type { LocalStepAction, ContextValue } from "./types.js";
|
|
22
|
-
import type { SimulationDevice, DeviceObservation, DeviceActionResult } from "./device.js";
|
|
22
|
+
import type { SimulationDevice, DeviceObservation, DeviceActionResult, AppBuild } from "./device.js";
|
|
23
23
|
export interface AndroidDeviceOptions {
|
|
24
24
|
/** App package name to force-stop/relaunch between participants. May be derived from --app. */
|
|
25
25
|
appPackage?: string;
|
|
@@ -47,6 +47,12 @@ export declare class AndroidDevice implements SimulationDevice {
|
|
|
47
47
|
private adbKeyboardActive;
|
|
48
48
|
constructor(opts: AndroidDeviceOptions);
|
|
49
49
|
launchOrReset(target: string): Promise<void>;
|
|
50
|
+
/**
|
|
51
|
+
* The installed app's version/build, read off the device after
|
|
52
|
+
* launchOrReset has resolved the package. Best-effort — null until the
|
|
53
|
+
* package is known, or if dumpsys can't report it.
|
|
54
|
+
*/
|
|
55
|
+
appBuild(): Promise<AppBuild | null>;
|
|
50
56
|
/**
|
|
51
57
|
* Resolve which package to drive, returning a non-null package name or
|
|
52
58
|
* throwing. For a local .apk we read the package straight from its binary
|
|
@@ -60,9 +66,11 @@ export declare class AndroidDevice implements SimulationDevice {
|
|
|
60
66
|
private refreshDimensions;
|
|
61
67
|
observe(): Promise<DeviceObservation>;
|
|
62
68
|
/**
|
|
63
|
-
* Dump + serialize the uiautomator a11y tree.
|
|
64
|
-
*
|
|
65
|
-
*
|
|
69
|
+
* Dump + serialize the uiautomator a11y tree. Returns the serialized tree, the
|
|
70
|
+
* node map, the FLAT parsed nodes (for the screen signature) and the
|
|
71
|
+
* foreground package read off the dump. Any failure (dump retries exhausted,
|
|
72
|
+
* parse error) degrades to an empty tree so the backend falls back to the
|
|
73
|
+
* vision path — a missing tree must never abort the observation.
|
|
66
74
|
*/
|
|
67
75
|
private dumpTree;
|
|
68
76
|
captureScreenshot(): Promise<string>;
|
|
@@ -19,10 +19,11 @@
|
|
|
19
19
|
* - Vision path: px = round(x / 1000 * screencapWidth); same for y.
|
|
20
20
|
*/
|
|
21
21
|
import { resolveTextValue } from "./actions.js";
|
|
22
|
-
import { requireOneDevice, screencapPng, pngDimensions, dumpUiautomatorXml, inputTap, inputSwipe, inputDrag, inputLongPress, setUserRotation, forceStop, launchApp, installApk, isPackageInstalled, listPackages, isAdbKeyboardInstalled, enableAdbKeyboard, setIme, resetIme, currentIme, adbKeyboardType, adbKeyboardClear, pressKeyEvent, statusbarExpand, ADB_KEYBOARD_PKG, } from "./adb.js";
|
|
22
|
+
import { requireOneDevice, screencapPng, pngDimensions, dumpUiautomatorXml, inputTap, inputSwipe, inputDrag, inputLongPress, setUserRotation, forceStop, launchApp, installApk, isPackageInstalled, listPackages, isAdbKeyboardInstalled, enableAdbKeyboard, setIme, resetIme, currentIme, adbKeyboardType, adbKeyboardClear, pressKeyEvent, statusbarExpand, appBuildFromDevice, currentActivity, ADB_KEYBOARD_PKG, } from "./adb.js";
|
|
23
23
|
import { isLocalPath } from "../upload.js";
|
|
24
24
|
import { deNormalizePoint, deNormalizeDrag } from "./coordinates.js";
|
|
25
|
-
import { parseUiautomatorXml, serializeNativeTree, boundsCenter } from "./native-a11y.js";
|
|
25
|
+
import { parseUiautomatorXml, serializeNativeTree, boundsCenter, androidPackage, } from "./native-a11y.js";
|
|
26
|
+
import { computeScreenSignature } from "./screen-signature.js";
|
|
26
27
|
import { packageNameFromApk } from "./apk-manifest.js";
|
|
27
28
|
// Let animations/IME transitions settle before the next observation so the
|
|
28
29
|
// screenshot the LLM reasons over reflects the action's result.
|
|
@@ -74,6 +75,21 @@ export class AndroidDevice {
|
|
|
74
75
|
// Prime screencap dimensions for the first de-normalization.
|
|
75
76
|
await this.refreshDimensions();
|
|
76
77
|
}
|
|
78
|
+
/**
|
|
79
|
+
* The installed app's version/build, read off the device after
|
|
80
|
+
* launchOrReset has resolved the package. Best-effort — null until the
|
|
81
|
+
* package is known, or if dumpsys can't report it.
|
|
82
|
+
*/
|
|
83
|
+
async appBuild() {
|
|
84
|
+
if (!this.appPackage)
|
|
85
|
+
return null;
|
|
86
|
+
const meta = await appBuildFromDevice(this.appPackage);
|
|
87
|
+
return {
|
|
88
|
+
package: this.appPackage,
|
|
89
|
+
version: meta?.version ?? null,
|
|
90
|
+
build: meta?.build ?? null,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
77
93
|
/**
|
|
78
94
|
* Resolve which package to drive, returning a non-null package name or
|
|
79
95
|
* throwing. For a local .apk we read the package straight from its binary
|
|
@@ -160,14 +176,24 @@ export class AndroidDevice {
|
|
|
160
176
|
return png;
|
|
161
177
|
}
|
|
162
178
|
async observe() {
|
|
163
|
-
// Screencap
|
|
164
|
-
// The dump is wrapped so a failure degrades to the
|
|
165
|
-
// rather than aborting the observation
|
|
166
|
-
|
|
179
|
+
// Screencap, the a11y dump, and the foreground-activity read are independent
|
|
180
|
+
// — run them in parallel. The dump is wrapped so a failure degrades to the
|
|
181
|
+
// vision path (empty tree) rather than aborting the observation; the
|
|
182
|
+
// activity read is best-effort ("" on failure → package-only coarse token).
|
|
183
|
+
const [png, tree, activity] = await Promise.all([
|
|
167
184
|
this.refreshDimensions(),
|
|
168
185
|
this.dumpTree(),
|
|
186
|
+
currentActivity(),
|
|
169
187
|
]);
|
|
170
188
|
this.lastNodeMap = tree.nodeMap;
|
|
189
|
+
// Scroll-invariant screen signature from this dump's parsed nodes + coarse
|
|
190
|
+
// inputs (foreground package/activity). Sent only when usable (see loop.ts).
|
|
191
|
+
const coarseInputs = {
|
|
192
|
+
platform: "android",
|
|
193
|
+
package: tree.package,
|
|
194
|
+
activity,
|
|
195
|
+
};
|
|
196
|
+
const screenSignature = computeScreenSignature(tree.nodes, coarseInputs);
|
|
171
197
|
return {
|
|
172
198
|
screenshot: png.toString("base64"),
|
|
173
199
|
// Element path when the dump produced a tree; "" → backend vision branch.
|
|
@@ -178,12 +204,19 @@ export class AndroidDevice {
|
|
|
178
204
|
// Native has no scrollable document; the screen IS the page.
|
|
179
205
|
documentHeight: this.screenHeight,
|
|
180
206
|
tabs: [],
|
|
207
|
+
screenSignature,
|
|
208
|
+
// Corpus-dump only (ISH_DUMP_CORPUS): the exact parsed nodes + coarse
|
|
209
|
+
// inputs the signature consumed, so any algorithm can be replayed offline.
|
|
210
|
+
nativeNodes: tree.nodes,
|
|
211
|
+
coarseInputs,
|
|
181
212
|
};
|
|
182
213
|
}
|
|
183
214
|
/**
|
|
184
|
-
* Dump + serialize the uiautomator a11y tree.
|
|
185
|
-
*
|
|
186
|
-
*
|
|
215
|
+
* Dump + serialize the uiautomator a11y tree. Returns the serialized tree, the
|
|
216
|
+
* node map, the FLAT parsed nodes (for the screen signature) and the
|
|
217
|
+
* foreground package read off the dump. Any failure (dump retries exhausted,
|
|
218
|
+
* parse error) degrades to an empty tree so the backend falls back to the
|
|
219
|
+
* vision path — a missing tree must never abort the observation.
|
|
187
220
|
*/
|
|
188
221
|
async dumpTree() {
|
|
189
222
|
try {
|
|
@@ -191,12 +224,12 @@ export class AndroidDevice {
|
|
|
191
224
|
const nodes = parseUiautomatorXml(xml);
|
|
192
225
|
const tree = serializeNativeTree(nodes);
|
|
193
226
|
this.log(`a11y tree: ${tree.nodeMap.size} node(s)`);
|
|
194
|
-
return tree;
|
|
227
|
+
return { ...tree, nodes, package: androidPackage(xml) };
|
|
195
228
|
}
|
|
196
229
|
catch (err) {
|
|
197
230
|
const msg = err instanceof Error ? err.message : String(err);
|
|
198
231
|
this.log(`a11y dump failed, falling back to vision: ${msg}`);
|
|
199
|
-
return { simplified: "", nodeMap: new Map() };
|
|
232
|
+
return { simplified: "", nodeMap: new Map(), nodes: [], package: "" };
|
|
200
233
|
}
|
|
201
234
|
}
|
|
202
235
|
async captureScreenshot() {
|
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
import type { Browser } from "playwright-core";
|
|
15
15
|
import type { LocalStepAction, LocalSimBrowserOptions, LocalTabInfo, ContextValue } from "./types.js";
|
|
16
16
|
import type { BrowserSession } from "./browser.js";
|
|
17
|
+
import type { ScreenSignature, CoarseInputs } from "./screen-signature.js";
|
|
18
|
+
import type { NativeNode } from "./native-a11y.js";
|
|
17
19
|
/**
|
|
18
20
|
* One observation of the target's current state.
|
|
19
21
|
*
|
|
@@ -39,6 +41,29 @@ export interface DeviceObservation {
|
|
|
39
41
|
documentHeight: number;
|
|
40
42
|
/** Open-tab snapshot (browser-only; empty for native). */
|
|
41
43
|
tabs: LocalTabInfo[];
|
|
44
|
+
/**
|
|
45
|
+
* Native only: the scroll-invariant structural "screen signature" computed
|
|
46
|
+
* from this observation's a11y tree (see screen-signature.ts). The loop sends
|
|
47
|
+
* `value` as the match-frame anchor ONLY when `usable` is true; browser
|
|
48
|
+
* targets omit it. Undefined when the platform doesn't compute one.
|
|
49
|
+
*/
|
|
50
|
+
screenSignature?: ScreenSignature;
|
|
51
|
+
/**
|
|
52
|
+
* Native only, corpus-dump only: the PARSED a11y nodes that
|
|
53
|
+
* `computeScreenSignature` consumed for this observation (the exact array, so
|
|
54
|
+
* any signature algorithm can be replayed offline against it). Populated by the
|
|
55
|
+
* android/ios `observe()`; the browser leaves it undefined. Only surfaced for
|
|
56
|
+
* the `ISH_DUMP_CORPUS` instrumentation in loop.ts — nothing in the live path
|
|
57
|
+
* reads it.
|
|
58
|
+
*/
|
|
59
|
+
nativeNodes?: NativeNode[];
|
|
60
|
+
/**
|
|
61
|
+
* Native only, corpus-dump only: the `CoarseInputs` (platform / package /
|
|
62
|
+
* activity / bundleId) fed into `computeScreenSignature` for this observation.
|
|
63
|
+
* Populated by the android/ios `observe()`; the browser leaves it undefined.
|
|
64
|
+
* Same instrumentation-only purpose as `nativeNodes`.
|
|
65
|
+
*/
|
|
66
|
+
coarseInputs?: CoarseInputs;
|
|
42
67
|
}
|
|
43
68
|
/**
|
|
44
69
|
* Result of executing one action against the target.
|
|
@@ -56,6 +81,19 @@ export interface DeviceActionResult {
|
|
|
56
81
|
} | null;
|
|
57
82
|
openedNewTab: boolean;
|
|
58
83
|
}
|
|
84
|
+
/**
|
|
85
|
+
* The version/build of the installed native app being driven, read off the
|
|
86
|
+
* device after `launchOrReset`. Lets the web app show which build an iteration
|
|
87
|
+
* last ran against. `package` is the resolved bundle id (iOS) / package name
|
|
88
|
+
* (Android); `version` is the marketing version (CFBundleShortVersionString /
|
|
89
|
+
* versionName) and `build` the build number (CFBundleVersion / versionCode),
|
|
90
|
+
* either of which may be null when the device doesn't report it.
|
|
91
|
+
*/
|
|
92
|
+
export interface AppBuild {
|
|
93
|
+
package: string;
|
|
94
|
+
version: string | null;
|
|
95
|
+
build: string | null;
|
|
96
|
+
}
|
|
59
97
|
/**
|
|
60
98
|
* A drivable simulation target. Implementations own their own lifecycle and
|
|
61
99
|
* (for the browser) tab bookkeeping.
|
|
@@ -90,6 +128,12 @@ export interface SimulationDevice {
|
|
|
90
128
|
executeAction(action: LocalStepAction): Promise<DeviceActionResult>;
|
|
91
129
|
/** Current location string for recording (URL for browser; "" for native). */
|
|
92
130
|
currentUrl(): string;
|
|
131
|
+
/**
|
|
132
|
+
* Native only: the version/build of the installed app being driven, read
|
|
133
|
+
* off the device after `launchOrReset`. Browser omits it. Best-effort — a
|
|
134
|
+
* failed read resolves to null and never disturbs the run.
|
|
135
|
+
*/
|
|
136
|
+
appBuild?(): Promise<AppBuild | null>;
|
|
93
137
|
/** Tear down. For shared-browser tabs this closes just the tab. */
|
|
94
138
|
close(): Promise<void>;
|
|
95
139
|
}
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
* backend never converts coords with screen_width/height.
|
|
32
32
|
*/
|
|
33
33
|
import type { LocalStepAction, ContextValue } from "./types.js";
|
|
34
|
-
import type { SimulationDevice, DeviceObservation, DeviceActionResult } from "./device.js";
|
|
34
|
+
import type { SimulationDevice, DeviceObservation, DeviceActionResult, AppBuild } from "./device.js";
|
|
35
35
|
export interface IosDeviceOptions {
|
|
36
36
|
/** Bundle id to terminate/relaunch between participants. Derived from --app when a .app is given. */
|
|
37
37
|
bundleId?: string;
|
|
@@ -70,6 +70,12 @@ export declare class IOSDevice implements SimulationDevice {
|
|
|
70
70
|
private lastNodeMap;
|
|
71
71
|
constructor(opts: IosDeviceOptions);
|
|
72
72
|
launchOrReset(target: string): Promise<void>;
|
|
73
|
+
/**
|
|
74
|
+
* The installed app's version/build, read off the simulator after
|
|
75
|
+
* launchOrReset has resolved the bundle id. Best-effort — null until the
|
|
76
|
+
* bundle id is known, or if simctl/plutil can't report it.
|
|
77
|
+
*/
|
|
78
|
+
appBuild(): Promise<AppBuild | null>;
|
|
73
79
|
/**
|
|
74
80
|
* Resolve the bundle id to drive, returning a non-null id or throwing.
|
|
75
81
|
* Installs a local `.app` first and reads its CFBundleIdentifier from
|
|
@@ -80,10 +86,11 @@ export declare class IOSDevice implements SimulationDevice {
|
|
|
80
86
|
private refreshScreen;
|
|
81
87
|
observe(): Promise<DeviceObservation>;
|
|
82
88
|
/**
|
|
83
|
-
* Read + serialize WDA's /source a11y tree (bounds in POINTS).
|
|
84
|
-
*
|
|
85
|
-
*
|
|
86
|
-
*
|
|
89
|
+
* Read + serialize WDA's /source a11y tree (bounds in POINTS). Returns the
|
|
90
|
+
* serialized tree, the node map and the FLAT parsed nodes (for the screen
|
|
91
|
+
* signature). Any failure (retries exhausted on a trivial tree, parse error)
|
|
92
|
+
* degrades to an empty tree so the backend falls back to vision — a missing
|
|
93
|
+
* tree must never abort the observation.
|
|
87
94
|
*/
|
|
88
95
|
private dumpTree;
|
|
89
96
|
captureScreenshot(): Promise<string>;
|
|
@@ -31,12 +31,13 @@
|
|
|
31
31
|
* backend never converts coords with screen_width/height.
|
|
32
32
|
*/
|
|
33
33
|
import { resolveTextValue } from "./actions.js";
|
|
34
|
-
import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, } from "./simctl.js";
|
|
34
|
+
import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, appBuildFromSimulator, } from "./simctl.js";
|
|
35
35
|
// iOS UI interaction + a11y run through WebDriverAgent (XCUITest), not idb.
|
|
36
|
-
import { ensureWda, closeWda, describeScreen, describeAll, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, } from "./xcuitest.js";
|
|
36
|
+
import { ensureWda, closeWda, describeScreen, describeAll, activeBundleId, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, } from "./xcuitest.js";
|
|
37
37
|
import { isLocalPath } from "../upload.js";
|
|
38
38
|
import { deNormalizePoint, deNormalizeDrag, pointToPixel } from "./coordinates.js";
|
|
39
39
|
import { parseXcuiHierarchy, serializeNativeTree, boundsCenter } from "./native-a11y.js";
|
|
40
|
+
import { computeScreenSignature } from "./screen-signature.js";
|
|
40
41
|
// Let animations/transitions settle before the next observation so the
|
|
41
42
|
// screenshot the LLM reasons over reflects the action's result.
|
|
42
43
|
const POST_GESTURE_SETTLE_MS = 500;
|
|
@@ -116,6 +117,21 @@ export class IOSDevice {
|
|
|
116
117
|
await launchApp(this.udid, bundleId);
|
|
117
118
|
await settle(1500); // cold start needs longer than a gesture settle
|
|
118
119
|
}
|
|
120
|
+
/**
|
|
121
|
+
* The installed app's version/build, read off the simulator after
|
|
122
|
+
* launchOrReset has resolved the bundle id. Best-effort — null until the
|
|
123
|
+
* bundle id is known, or if simctl/plutil can't report it.
|
|
124
|
+
*/
|
|
125
|
+
async appBuild() {
|
|
126
|
+
if (!this.bundleId || !this.udid)
|
|
127
|
+
return null;
|
|
128
|
+
const meta = await appBuildFromSimulator(this.udid, this.bundleId);
|
|
129
|
+
return {
|
|
130
|
+
package: this.bundleId,
|
|
131
|
+
version: meta?.version ?? null,
|
|
132
|
+
build: meta?.build ?? null,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
119
135
|
/**
|
|
120
136
|
* Resolve the bundle id to drive, returning a non-null id or throwing.
|
|
121
137
|
* Installs a local `.app` first and reads its CFBundleIdentifier from
|
|
@@ -161,14 +177,26 @@ export class IOSDevice {
|
|
|
161
177
|
}
|
|
162
178
|
async observe() {
|
|
163
179
|
// Refresh geometry each step (orientation can change), then capture the
|
|
164
|
-
// pixel screenshot
|
|
165
|
-
// dump is wrapped so a failure degrades to the
|
|
180
|
+
// pixel screenshot, the a11y tree, and the active bundle id in parallel
|
|
181
|
+
// (independent reads). The dump is wrapped so a failure degrades to the
|
|
182
|
+
// vision path (empty tree); the bundle-id read is best-effort ("" on
|
|
183
|
+
// failure → the navTitle-only coarse token).
|
|
166
184
|
await this.refreshScreen();
|
|
167
|
-
const [png, tree] = await Promise.all([
|
|
185
|
+
const [png, tree, bundleId] = await Promise.all([
|
|
168
186
|
screenshotPng(),
|
|
169
187
|
this.dumpTree(),
|
|
188
|
+
activeBundleId(this.udid),
|
|
170
189
|
]);
|
|
171
190
|
this.lastNodeMap = tree.nodeMap;
|
|
191
|
+
// Scroll-invariant screen signature from this dump's parsed nodes + coarse
|
|
192
|
+
// inputs (active bundle id; navTitle is derived from the nodes). iOS is
|
|
193
|
+
// best-effort — sparse SwiftUI trees are usually unusable and fall back to
|
|
194
|
+
// Phase-1 continuity (sent only when usable; see loop.ts).
|
|
195
|
+
const coarseInputs = {
|
|
196
|
+
platform: "ios",
|
|
197
|
+
bundleId,
|
|
198
|
+
};
|
|
199
|
+
const screenSignature = computeScreenSignature(tree.nodes, coarseInputs);
|
|
172
200
|
return {
|
|
173
201
|
screenshot: png.toString("base64"),
|
|
174
202
|
// Element path when describe-all produced a tree; "" → backend vision.
|
|
@@ -181,13 +209,19 @@ export class IOSDevice {
|
|
|
181
209
|
// Native has no scrollable document; the screen IS the page.
|
|
182
210
|
documentHeight: this.pixelHeight,
|
|
183
211
|
tabs: [],
|
|
212
|
+
screenSignature,
|
|
213
|
+
// Corpus-dump only (ISH_DUMP_CORPUS): the exact parsed nodes + coarse
|
|
214
|
+
// inputs the signature consumed, so any algorithm can be replayed offline.
|
|
215
|
+
nativeNodes: tree.nodes,
|
|
216
|
+
coarseInputs,
|
|
184
217
|
};
|
|
185
218
|
}
|
|
186
219
|
/**
|
|
187
|
-
* Read + serialize WDA's /source a11y tree (bounds in POINTS).
|
|
188
|
-
*
|
|
189
|
-
*
|
|
190
|
-
*
|
|
220
|
+
* Read + serialize WDA's /source a11y tree (bounds in POINTS). Returns the
|
|
221
|
+
* serialized tree, the node map and the FLAT parsed nodes (for the screen
|
|
222
|
+
* signature). Any failure (retries exhausted on a trivial tree, parse error)
|
|
223
|
+
* degrades to an empty tree so the backend falls back to vision — a missing
|
|
224
|
+
* tree must never abort the observation.
|
|
191
225
|
*/
|
|
192
226
|
async dumpTree() {
|
|
193
227
|
try {
|
|
@@ -195,12 +229,12 @@ export class IOSDevice {
|
|
|
195
229
|
const nodes = parseXcuiHierarchy(json);
|
|
196
230
|
const tree = serializeNativeTree(nodes);
|
|
197
231
|
this.log(`a11y tree: ${tree.nodeMap.size} node(s)`);
|
|
198
|
-
return tree;
|
|
232
|
+
return { ...tree, nodes };
|
|
199
233
|
}
|
|
200
234
|
catch (err) {
|
|
201
235
|
const msg = err instanceof Error ? err.message : String(err);
|
|
202
236
|
this.log(`a11y describe-all failed, falling back to vision: ${msg}`);
|
|
203
|
-
return { simplified: "", nodeMap: new Map() };
|
|
237
|
+
return { simplified: "", nodeMap: new Map(), nodes: [] };
|
|
204
238
|
}
|
|
205
239
|
}
|
|
206
240
|
async captureScreenshot() {
|